xref: /illumos-gate/usr/src/uts/common/io/igc/igc_ring.c (revision 6bbbd4428a21d3f4d1d329851dbc599121cb5d31)
1*6bbbd442SRobert Mustacchi /*
2*6bbbd442SRobert Mustacchi  * This file and its contents are supplied under the terms of the
3*6bbbd442SRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
4*6bbbd442SRobert Mustacchi  * You may only use this file in accordance with the terms of version
5*6bbbd442SRobert Mustacchi  * 1.0 of the CDDL.
6*6bbbd442SRobert Mustacchi  *
7*6bbbd442SRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
8*6bbbd442SRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
9*6bbbd442SRobert Mustacchi  * http://www.illumos.org/license/CDDL.
10*6bbbd442SRobert Mustacchi  */
11*6bbbd442SRobert Mustacchi 
12*6bbbd442SRobert Mustacchi /*
13*6bbbd442SRobert Mustacchi  * Copyright 2024 Oxide Computer Company
14*6bbbd442SRobert Mustacchi  */
15*6bbbd442SRobert Mustacchi 
16*6bbbd442SRobert Mustacchi /*
17*6bbbd442SRobert Mustacchi  * igc ring related functions. This is where the bulk of our I/O occurs.
18*6bbbd442SRobert Mustacchi  */
19*6bbbd442SRobert Mustacchi 
20*6bbbd442SRobert Mustacchi #include <sys/stddef.h>
21*6bbbd442SRobert Mustacchi #include <sys/strsubr.h>
22*6bbbd442SRobert Mustacchi #include <sys/strsun.h>
23*6bbbd442SRobert Mustacchi #include <sys/sysmacros.h>
24*6bbbd442SRobert Mustacchi #include <sys/sdt.h>
25*6bbbd442SRobert Mustacchi 
26*6bbbd442SRobert Mustacchi #include "igc.h"
27*6bbbd442SRobert Mustacchi 
28*6bbbd442SRobert Mustacchi /*
29*6bbbd442SRobert Mustacchi  * Structure used to consolidate TX information about a given packet.
30*6bbbd442SRobert Mustacchi  */
31*6bbbd442SRobert Mustacchi typedef struct igc_tx_state {
32*6bbbd442SRobert Mustacchi 	list_t itx_bufs;
33*6bbbd442SRobert Mustacchi 	mac_ether_offload_info_t itx_meoi;
34*6bbbd442SRobert Mustacchi 	uint32_t itx_cksum;
35*6bbbd442SRobert Mustacchi 	uint32_t itx_mss;
36*6bbbd442SRobert Mustacchi 	uint32_t itx_lso;
37*6bbbd442SRobert Mustacchi 	igc_tx_buffer_t *itx_cur_buf;
38*6bbbd442SRobert Mustacchi 	size_t itx_buf_rem;
39*6bbbd442SRobert Mustacchi 	mblk_t *itx_free_mp;
40*6bbbd442SRobert Mustacchi 	uint32_t itx_ndescs;
41*6bbbd442SRobert Mustacchi } igc_tx_state_t;
42*6bbbd442SRobert Mustacchi 
43*6bbbd442SRobert Mustacchi /*
44*6bbbd442SRobert Mustacchi  * DMA attributes that are used for descriptor rings. .
45*6bbbd442SRobert Mustacchi  */
46*6bbbd442SRobert Mustacchi static const ddi_dma_attr_t igc_desc_dma_attr = {
47*6bbbd442SRobert Mustacchi 	.dma_attr_version = DMA_ATTR_V0,
48*6bbbd442SRobert Mustacchi 	/*
49*6bbbd442SRobert Mustacchi 	 * DMA descriptor rings can show up anywhere in the address space. The
50*6bbbd442SRobert Mustacchi 	 * card supports a 64-bit address for this.
51*6bbbd442SRobert Mustacchi 	 */
52*6bbbd442SRobert Mustacchi 	.dma_attr_addr_lo = 0,
53*6bbbd442SRobert Mustacchi 	.dma_attr_addr_hi = UINT64_MAX,
54*6bbbd442SRobert Mustacchi 	/*
55*6bbbd442SRobert Mustacchi 	 * The I210 datasheet says that the ring descriptor length can support
56*6bbbd442SRobert Mustacchi 	 * at most 32K entries that are each 16 bytes long. Hence the following
57*6bbbd442SRobert Mustacchi 	 * max.
58*6bbbd442SRobert Mustacchi 	 */
59*6bbbd442SRobert Mustacchi 	.dma_attr_count_max = 0x80000,
60*6bbbd442SRobert Mustacchi 	/*
61*6bbbd442SRobert Mustacchi 	 * The I210 datasheet, which is the closest we have for the I225,
62*6bbbd442SRobert Mustacchi 	 * requires 128 byte alignment for rings. Note, igb and e1000g default
63*6bbbd442SRobert Mustacchi 	 * to a 4KiB alignment here.
64*6bbbd442SRobert Mustacchi 	 */
65*6bbbd442SRobert Mustacchi 	.dma_attr_align = 0x80,
66*6bbbd442SRobert Mustacchi 	/*
67*6bbbd442SRobert Mustacchi 	 * Borrowed from igb(4D).
68*6bbbd442SRobert Mustacchi 	 */
69*6bbbd442SRobert Mustacchi 	.dma_attr_burstsizes = 0xfff,
70*6bbbd442SRobert Mustacchi 	/*
71*6bbbd442SRobert Mustacchi 	 * We set the minimum and maximum based upon what the RDLEN/TDLEN
72*6bbbd442SRobert Mustacchi 	 * register will actually support.
73*6bbbd442SRobert Mustacchi 	 */
74*6bbbd442SRobert Mustacchi 	.dma_attr_minxfer = 0x80,
75*6bbbd442SRobert Mustacchi 	.dma_attr_maxxfer = 0x80000,
76*6bbbd442SRobert Mustacchi 	/*
77*6bbbd442SRobert Mustacchi 	 * The receive ring must be continuous, indicated by the maximum sgllen
78*6bbbd442SRobert Mustacchi 	 * value, which means that this doesn't have any boundary crossing
79*6bbbd442SRobert Mustacchi 	 * constraints.
80*6bbbd442SRobert Mustacchi 	 */
81*6bbbd442SRobert Mustacchi 	.dma_attr_seg = UINT64_MAX,
82*6bbbd442SRobert Mustacchi 	.dma_attr_sgllen = 1,
83*6bbbd442SRobert Mustacchi 	/*
84*6bbbd442SRobert Mustacchi 	 * For descriptor rings, hardware asks for the size in 128 byte chunks,
85*6bbbd442SRobert Mustacchi 	 * so we set that here again.
86*6bbbd442SRobert Mustacchi 	 */
87*6bbbd442SRobert Mustacchi 	.dma_attr_granular = 0x80,
88*6bbbd442SRobert Mustacchi 	.dma_attr_flags = 0
89*6bbbd442SRobert Mustacchi };
90*6bbbd442SRobert Mustacchi 
91*6bbbd442SRobert Mustacchi /*
92*6bbbd442SRobert Mustacchi  * DMA attributes that cover pre-allocated data buffers. Note, RX buffers are
93*6bbbd442SRobert Mustacchi  * slightly more constrained than TX buffers because the RX buffer addr[0] can
94*6bbbd442SRobert Mustacchi  * sometimes be used as a no snoop enable bit. Therefore we purposefully avoid
95*6bbbd442SRobert Mustacchi  * that in our allocations here to allow for use of that in the future if
96*6bbbd442SRobert Mustacchi  * desired.
97*6bbbd442SRobert Mustacchi  */
98*6bbbd442SRobert Mustacchi static const ddi_dma_attr_t igc_data_dma_attr = {
99*6bbbd442SRobert Mustacchi 	.dma_attr_version = DMA_ATTR_V0,
100*6bbbd442SRobert Mustacchi 	/*
101*6bbbd442SRobert Mustacchi 	 * Packet data can go anywhere in memory.
102*6bbbd442SRobert Mustacchi 	 */
103*6bbbd442SRobert Mustacchi 	.dma_attr_addr_lo = 0,
104*6bbbd442SRobert Mustacchi 	.dma_attr_addr_hi = UINT64_MAX,
105*6bbbd442SRobert Mustacchi 	/*
106*6bbbd442SRobert Mustacchi 	 * The maximum size of an RX packet is 127 KiB in the SRRCTL register.
107*6bbbd442SRobert Mustacchi 	 * For TX, the maximum value is a 16-bit quantity because that's the
108*6bbbd442SRobert Mustacchi 	 * tx descriptor's size. So we cap it at this value.
109*6bbbd442SRobert Mustacchi 	 */
110*6bbbd442SRobert Mustacchi 	.dma_attr_count_max = UINT16_MAX,
111*6bbbd442SRobert Mustacchi 	/*
112*6bbbd442SRobert Mustacchi 	 * The hardware strictly requires only 2 byte alignment in RX
113*6bbbd442SRobert Mustacchi 	 * descriptors in case no snoop is enabled and no such constraints in
114*6bbbd442SRobert Mustacchi 	 * TX. We end up increasing this to a request for 16 byte alignment so
115*6bbbd442SRobert Mustacchi 	 * that we can guarantee the IP header alignment and offsetting needs to
116*6bbbd442SRobert Mustacchi 	 * happen on all rx descriptors.
117*6bbbd442SRobert Mustacchi 	 */
118*6bbbd442SRobert Mustacchi 	.dma_attr_align = 0x10,
119*6bbbd442SRobert Mustacchi 	/*
120*6bbbd442SRobert Mustacchi 	 * We're not constrained here at least via PCIe, so we use the wider
121*6bbbd442SRobert Mustacchi 	 * setting here. Similarly to the ring descriptors we just set the
122*6bbbd442SRobert Mustacchi 	 * granularity widely.
123*6bbbd442SRobert Mustacchi 	 */
124*6bbbd442SRobert Mustacchi 	.dma_attr_minxfer = 0x1,
125*6bbbd442SRobert Mustacchi 	.dma_attr_maxxfer = UINT32_MAX,
126*6bbbd442SRobert Mustacchi 	.dma_attr_seg = UINT64_MAX,
127*6bbbd442SRobert Mustacchi 	/*
128*6bbbd442SRobert Mustacchi 	 * The hardware allows for arbitrary chaining of descriptors; however,
129*6bbbd442SRobert Mustacchi 	 * we want to move to a world where we are allocating page sized buffers
130*6bbbd442SRobert Mustacchi 	 * at most and therefore constrain the number of cookies for these
131*6bbbd442SRobert Mustacchi 	 * buffers. Transmit caps the buffer allocation size at the page size,
132*6bbbd442SRobert Mustacchi 	 * but receive does not today. We set the granularity to 1 to reflect
133*6bbbd442SRobert Mustacchi 	 * the device's flexibility.
134*6bbbd442SRobert Mustacchi 	 */
135*6bbbd442SRobert Mustacchi 	.dma_attr_sgllen = 1,
136*6bbbd442SRobert Mustacchi 	.dma_attr_granular = 1,
137*6bbbd442SRobert Mustacchi 	.dma_attr_flags = 0
138*6bbbd442SRobert Mustacchi };
139*6bbbd442SRobert Mustacchi 
140*6bbbd442SRobert Mustacchi /*
141*6bbbd442SRobert Mustacchi  * These are the DMA attributes we use when performing DMA TX binding for an
142*6bbbd442SRobert Mustacchi  * mblk_t.
143*6bbbd442SRobert Mustacchi  */
144*6bbbd442SRobert Mustacchi static const ddi_dma_attr_t igc_tx_dma_attr = {
145*6bbbd442SRobert Mustacchi 	.dma_attr_version = DMA_ATTR_V0,
146*6bbbd442SRobert Mustacchi 	/*
147*6bbbd442SRobert Mustacchi 	 * Packet data can go anywhere in memory.
148*6bbbd442SRobert Mustacchi 	 */
149*6bbbd442SRobert Mustacchi 	.dma_attr_addr_lo = 0,
150*6bbbd442SRobert Mustacchi 	.dma_attr_addr_hi = UINT64_MAX,
151*6bbbd442SRobert Mustacchi 	/*
152*6bbbd442SRobert Mustacchi 	 * For TX, the maximum value is a 16-bit quantity because that's the
153*6bbbd442SRobert Mustacchi 	 * tx descriptor's size.
154*6bbbd442SRobert Mustacchi 	 */
155*6bbbd442SRobert Mustacchi 	.dma_attr_count_max = UINT16_MAX,
156*6bbbd442SRobert Mustacchi 	/*
157*6bbbd442SRobert Mustacchi 	 * TX data can go anywhere, but we ask for 16 byte alignment just to
158*6bbbd442SRobert Mustacchi 	 * keep things somewhat aligned in the system.
159*6bbbd442SRobert Mustacchi 	 */
160*6bbbd442SRobert Mustacchi 	.dma_attr_align = 0x10,
161*6bbbd442SRobert Mustacchi 	/*
162*6bbbd442SRobert Mustacchi 	 * We're not constrained here at least via PCIe, so we use the wider
163*6bbbd442SRobert Mustacchi 	 * setting here. Similarly to the ring descriptors we just set the
164*6bbbd442SRobert Mustacchi 	 * granularity widely.
165*6bbbd442SRobert Mustacchi 	 */
166*6bbbd442SRobert Mustacchi 	.dma_attr_minxfer = 0x1,
167*6bbbd442SRobert Mustacchi 	.dma_attr_maxxfer = UINT32_MAX,
168*6bbbd442SRobert Mustacchi 	.dma_attr_seg = UINT64_MAX,
169*6bbbd442SRobert Mustacchi 	/*
170*6bbbd442SRobert Mustacchi 	 * We size our transmit cookies so that the maximum sized LSO packet can
171*6bbbd442SRobert Mustacchi 	 * go through here.
172*6bbbd442SRobert Mustacchi 	 */
173*6bbbd442SRobert Mustacchi 	.dma_attr_sgllen = IGC_MAX_TX_COOKIES,
174*6bbbd442SRobert Mustacchi 	.dma_attr_granular = 1,
175*6bbbd442SRobert Mustacchi 	.dma_attr_flags = 0
176*6bbbd442SRobert Mustacchi 
177*6bbbd442SRobert Mustacchi };
178*6bbbd442SRobert Mustacchi 
179*6bbbd442SRobert Mustacchi /*
180*6bbbd442SRobert Mustacchi  * All of these wrappers are so we only have one place to tack into FMA
181*6bbbd442SRobert Mustacchi  * register accesses in the future.
182*6bbbd442SRobert Mustacchi  */
183*6bbbd442SRobert Mustacchi static void
igc_dma_acc_attr(igc_t * igc,ddi_device_acc_attr_t * accp)184*6bbbd442SRobert Mustacchi igc_dma_acc_attr(igc_t *igc, ddi_device_acc_attr_t *accp)
185*6bbbd442SRobert Mustacchi {
186*6bbbd442SRobert Mustacchi 	bzero(accp, sizeof (ddi_device_acc_attr_t));
187*6bbbd442SRobert Mustacchi 
188*6bbbd442SRobert Mustacchi 	accp->devacc_attr_version = DDI_DEVICE_ATTR_V1;
189*6bbbd442SRobert Mustacchi 	accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
190*6bbbd442SRobert Mustacchi 	accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC;
191*6bbbd442SRobert Mustacchi 	accp->devacc_attr_access = DDI_DEFAULT_ACC;
192*6bbbd442SRobert Mustacchi }
193*6bbbd442SRobert Mustacchi 
194*6bbbd442SRobert Mustacchi static void
igc_dma_desc_attr(igc_t * igc,ddi_dma_attr_t * attrp)195*6bbbd442SRobert Mustacchi igc_dma_desc_attr(igc_t *igc, ddi_dma_attr_t *attrp)
196*6bbbd442SRobert Mustacchi {
197*6bbbd442SRobert Mustacchi 	bcopy(&igc_desc_dma_attr, attrp, sizeof (ddi_dma_attr_t));
198*6bbbd442SRobert Mustacchi }
199*6bbbd442SRobert Mustacchi 
200*6bbbd442SRobert Mustacchi static void
igc_dma_data_attr(igc_t * igc,ddi_dma_attr_t * attrp)201*6bbbd442SRobert Mustacchi igc_dma_data_attr(igc_t *igc, ddi_dma_attr_t *attrp)
202*6bbbd442SRobert Mustacchi {
203*6bbbd442SRobert Mustacchi 	bcopy(&igc_data_dma_attr, attrp, sizeof (ddi_dma_attr_t));
204*6bbbd442SRobert Mustacchi }
205*6bbbd442SRobert Mustacchi 
206*6bbbd442SRobert Mustacchi static void
igc_dma_tx_attr(igc_t * igc,ddi_dma_attr_t * attrp)207*6bbbd442SRobert Mustacchi igc_dma_tx_attr(igc_t *igc, ddi_dma_attr_t *attrp)
208*6bbbd442SRobert Mustacchi {
209*6bbbd442SRobert Mustacchi 	bcopy(&igc_tx_dma_attr, attrp, sizeof (ddi_dma_attr_t));
210*6bbbd442SRobert Mustacchi }
211*6bbbd442SRobert Mustacchi 
212*6bbbd442SRobert Mustacchi static void
igc_dma_free(igc_dma_buffer_t * idb)213*6bbbd442SRobert Mustacchi igc_dma_free(igc_dma_buffer_t *idb)
214*6bbbd442SRobert Mustacchi {
215*6bbbd442SRobert Mustacchi 	/* Proxy for DMA handle bound */
216*6bbbd442SRobert Mustacchi 	if (idb->idb_size != 0) {
217*6bbbd442SRobert Mustacchi 		(void) ddi_dma_unbind_handle(idb->idb_hdl);
218*6bbbd442SRobert Mustacchi 		idb->idb_size = 0;
219*6bbbd442SRobert Mustacchi 	}
220*6bbbd442SRobert Mustacchi 
221*6bbbd442SRobert Mustacchi 	if (idb->idb_acc != NULL) {
222*6bbbd442SRobert Mustacchi 		ddi_dma_mem_free(&idb->idb_acc);
223*6bbbd442SRobert Mustacchi 		idb->idb_acc = NULL;
224*6bbbd442SRobert Mustacchi 		idb->idb_va = NULL;
225*6bbbd442SRobert Mustacchi 		idb->idb_alloc_len = 0;
226*6bbbd442SRobert Mustacchi 	}
227*6bbbd442SRobert Mustacchi 
228*6bbbd442SRobert Mustacchi 	if (idb->idb_hdl != NULL) {
229*6bbbd442SRobert Mustacchi 		ddi_dma_free_handle(&idb->idb_hdl);
230*6bbbd442SRobert Mustacchi 		idb->idb_hdl = NULL;
231*6bbbd442SRobert Mustacchi 	}
232*6bbbd442SRobert Mustacchi 
233*6bbbd442SRobert Mustacchi 	ASSERT0(idb->idb_size);
234*6bbbd442SRobert Mustacchi 	ASSERT0(idb->idb_alloc_len);
235*6bbbd442SRobert Mustacchi 	ASSERT3P(idb->idb_acc, ==, NULL);
236*6bbbd442SRobert Mustacchi 	ASSERT3P(idb->idb_hdl, ==, NULL);
237*6bbbd442SRobert Mustacchi 	ASSERT3P(idb->idb_va, ==, NULL);
238*6bbbd442SRobert Mustacchi }
239*6bbbd442SRobert Mustacchi 
240*6bbbd442SRobert Mustacchi static bool
igc_dma_alloc(igc_t * igc,igc_dma_buffer_t * idb,ddi_dma_attr_t * attrp,size_t size)241*6bbbd442SRobert Mustacchi igc_dma_alloc(igc_t *igc, igc_dma_buffer_t *idb, ddi_dma_attr_t *attrp,
242*6bbbd442SRobert Mustacchi     size_t size)
243*6bbbd442SRobert Mustacchi {
244*6bbbd442SRobert Mustacchi 	int ret;
245*6bbbd442SRobert Mustacchi 	ddi_device_acc_attr_t acc;
246*6bbbd442SRobert Mustacchi 	uint_t flags = DDI_DMA_STREAMING;
247*6bbbd442SRobert Mustacchi 
248*6bbbd442SRobert Mustacchi 	bzero(idb, sizeof (igc_dma_buffer_t));
249*6bbbd442SRobert Mustacchi 	ret = ddi_dma_alloc_handle(igc->igc_dip, attrp, DDI_DMA_DONTWAIT, NULL,
250*6bbbd442SRobert Mustacchi 	    &idb->idb_hdl);
251*6bbbd442SRobert Mustacchi 	if (ret != DDI_SUCCESS) {
252*6bbbd442SRobert Mustacchi 		dev_err(igc->igc_dip, CE_WARN, "!failed to allocate DMA "
253*6bbbd442SRobert Mustacchi 		    "handle: %d", ret);
254*6bbbd442SRobert Mustacchi 		return (false);
255*6bbbd442SRobert Mustacchi 	}
256*6bbbd442SRobert Mustacchi 
257*6bbbd442SRobert Mustacchi 	igc_dma_acc_attr(igc, &acc);
258*6bbbd442SRobert Mustacchi 	ret = ddi_dma_mem_alloc(idb->idb_hdl, size, &acc, flags,
259*6bbbd442SRobert Mustacchi 	    DDI_DMA_DONTWAIT, NULL, &idb->idb_va, &idb->idb_alloc_len,
260*6bbbd442SRobert Mustacchi 	    &idb->idb_acc);
261*6bbbd442SRobert Mustacchi 	if (ret != DDI_SUCCESS) {
262*6bbbd442SRobert Mustacchi 		dev_err(igc->igc_dip, CE_WARN, "!failed to allocate %lu bytes "
263*6bbbd442SRobert Mustacchi 		    "of DMA memory: %d", size, ret);
264*6bbbd442SRobert Mustacchi 		igc_dma_free(idb);
265*6bbbd442SRobert Mustacchi 		return (false);
266*6bbbd442SRobert Mustacchi 	}
267*6bbbd442SRobert Mustacchi 
268*6bbbd442SRobert Mustacchi 	bzero(idb->idb_va, idb->idb_alloc_len);
269*6bbbd442SRobert Mustacchi 	ret = ddi_dma_addr_bind_handle(idb->idb_hdl, NULL, idb->idb_va,
270*6bbbd442SRobert Mustacchi 	    idb->idb_alloc_len, DDI_DMA_RDWR | flags, DDI_DMA_DONTWAIT, NULL,
271*6bbbd442SRobert Mustacchi 	    NULL, NULL);
272*6bbbd442SRobert Mustacchi 	if (ret != DDI_SUCCESS) {
273*6bbbd442SRobert Mustacchi 		dev_err(igc->igc_dip, CE_WARN, "!failed to bind %lu bytes of "
274*6bbbd442SRobert Mustacchi 		    "DMA memory: %d", idb->idb_alloc_len, ret);
275*6bbbd442SRobert Mustacchi 		igc_dma_free(idb);
276*6bbbd442SRobert Mustacchi 		return (false);
277*6bbbd442SRobert Mustacchi 	}
278*6bbbd442SRobert Mustacchi 
279*6bbbd442SRobert Mustacchi 	idb->idb_size = size;
280*6bbbd442SRobert Mustacchi 	return (true);
281*6bbbd442SRobert Mustacchi }
282*6bbbd442SRobert Mustacchi 
283*6bbbd442SRobert Mustacchi static void
igc_rx_recycle(caddr_t arg)284*6bbbd442SRobert Mustacchi igc_rx_recycle(caddr_t arg)
285*6bbbd442SRobert Mustacchi {
286*6bbbd442SRobert Mustacchi 	igc_rx_buffer_t *buf = (igc_rx_buffer_t *)arg;
287*6bbbd442SRobert Mustacchi 	igc_rx_ring_t *ring = buf->irb_ring;
288*6bbbd442SRobert Mustacchi 	caddr_t mblk_va;
289*6bbbd442SRobert Mustacchi 	size_t mblk_len;
290*6bbbd442SRobert Mustacchi 
291*6bbbd442SRobert Mustacchi 	/*
292*6bbbd442SRobert Mustacchi 	 * The mblk is free regardless of what happens next, so make sure we
293*6bbbd442SRobert Mustacchi 	 * clean up.
294*6bbbd442SRobert Mustacchi 	 */
295*6bbbd442SRobert Mustacchi 	buf->irb_mp = NULL;
296*6bbbd442SRobert Mustacchi 
297*6bbbd442SRobert Mustacchi 	/*
298*6bbbd442SRobert Mustacchi 	 * The mblk_t is pre-created ahead of binding. If loaned is not set then
299*6bbbd442SRobert Mustacchi 	 * this simply means we're tearing down this as part of tearing down the
300*6bbbd442SRobert Mustacchi 	 * device as opposed to getting it from the rest of the stack and
301*6bbbd442SRobert Mustacchi 	 * therefore there's nothing else to do.
302*6bbbd442SRobert Mustacchi 	 */
303*6bbbd442SRobert Mustacchi 	if (!buf->irb_loaned) {
304*6bbbd442SRobert Mustacchi 		return;
305*6bbbd442SRobert Mustacchi 	}
306*6bbbd442SRobert Mustacchi 
307*6bbbd442SRobert Mustacchi 	/*
308*6bbbd442SRobert Mustacchi 	 * Ensure we mark this buffer as no longer loaned and then insert it
309*6bbbd442SRobert Mustacchi 	 * onto the free list.
310*6bbbd442SRobert Mustacchi 	 */
311*6bbbd442SRobert Mustacchi 	buf->irb_loaned = false;
312*6bbbd442SRobert Mustacchi 
313*6bbbd442SRobert Mustacchi 	/*
314*6bbbd442SRobert Mustacchi 	 * Create a new mblk and insert it on the free list.
315*6bbbd442SRobert Mustacchi 	 */
316*6bbbd442SRobert Mustacchi 	mblk_va = buf->irb_dma.idb_va + IGC_RX_BUF_IP_ALIGN;
317*6bbbd442SRobert Mustacchi 	mblk_len = buf->irb_dma.idb_size - IGC_RX_BUF_IP_ALIGN;
318*6bbbd442SRobert Mustacchi 	buf->irb_mp = desballoc((uchar_t *)mblk_va, mblk_len, 0,
319*6bbbd442SRobert Mustacchi 	    &buf->irb_free_rtn);
320*6bbbd442SRobert Mustacchi 
321*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->irr_free_lock);
322*6bbbd442SRobert Mustacchi 	ring->irr_free_list[ring->irr_nfree] = buf;
323*6bbbd442SRobert Mustacchi 	ring->irr_nfree++;
324*6bbbd442SRobert Mustacchi #ifdef	DEBUG
325*6bbbd442SRobert Mustacchi 	igc_t *igc = ring->irr_igc;
326*6bbbd442SRobert Mustacchi 	ASSERT3U(ring->irr_nfree, <=, igc->igc_rx_nfree);
327*6bbbd442SRobert Mustacchi #endif
328*6bbbd442SRobert Mustacchi 	cv_signal(&ring->irr_free_cv);
329*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->irr_free_lock);
330*6bbbd442SRobert Mustacchi }
331*6bbbd442SRobert Mustacchi 
332*6bbbd442SRobert Mustacchi static void
igc_rx_bufs_free(igc_t * igc,igc_rx_ring_t * ring)333*6bbbd442SRobert Mustacchi igc_rx_bufs_free(igc_t *igc, igc_rx_ring_t *ring)
334*6bbbd442SRobert Mustacchi {
335*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_rx_nbuf; i++) {
336*6bbbd442SRobert Mustacchi 		igc_rx_buffer_t *buf = &ring->irr_arena[i];
337*6bbbd442SRobert Mustacchi 
338*6bbbd442SRobert Mustacchi 		ASSERT3U(buf->irb_loaned, ==, false);
339*6bbbd442SRobert Mustacchi 		freemsg(buf->irb_mp);
340*6bbbd442SRobert Mustacchi 		buf->irb_mp = NULL;
341*6bbbd442SRobert Mustacchi 		igc_dma_free(&buf->irb_dma);
342*6bbbd442SRobert Mustacchi 	}
343*6bbbd442SRobert Mustacchi }
344*6bbbd442SRobert Mustacchi 
345*6bbbd442SRobert Mustacchi static bool
igc_rx_bufs_alloc(igc_t * igc,igc_rx_ring_t * ring)346*6bbbd442SRobert Mustacchi igc_rx_bufs_alloc(igc_t *igc, igc_rx_ring_t *ring)
347*6bbbd442SRobert Mustacchi {
348*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_rx_nbuf; i++) {
349*6bbbd442SRobert Mustacchi 		igc_rx_buffer_t *buf = &ring->irr_arena[i];
350*6bbbd442SRobert Mustacchi 		ddi_dma_attr_t attr;
351*6bbbd442SRobert Mustacchi 		caddr_t mblk_va;
352*6bbbd442SRobert Mustacchi 		size_t mblk_len;
353*6bbbd442SRobert Mustacchi 
354*6bbbd442SRobert Mustacchi 		buf->irb_ring = ring;
355*6bbbd442SRobert Mustacchi 		igc_dma_data_attr(igc, &attr);
356*6bbbd442SRobert Mustacchi 		if (!igc_dma_alloc(igc, &buf->irb_dma, &attr,
357*6bbbd442SRobert Mustacchi 		    igc->igc_rx_buf_size)) {
358*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate RX "
359*6bbbd442SRobert Mustacchi 			    "ring %u buffer %u", ring->irr_idx, i);
360*6bbbd442SRobert Mustacchi 			return (false);
361*6bbbd442SRobert Mustacchi 		}
362*6bbbd442SRobert Mustacchi 
363*6bbbd442SRobert Mustacchi 		buf->irb_free_rtn.free_func = igc_rx_recycle;
364*6bbbd442SRobert Mustacchi 		buf->irb_free_rtn.free_arg = (caddr_t)buf;
365*6bbbd442SRobert Mustacchi 
366*6bbbd442SRobert Mustacchi 		/*
367*6bbbd442SRobert Mustacchi 		 * We ignore whether or not this was successful because we have
368*6bbbd442SRobert Mustacchi 		 * to handle the case that we will have buffers without mblk's
369*6bbbd442SRobert Mustacchi 		 * due to loaning and related.
370*6bbbd442SRobert Mustacchi 		 */
371*6bbbd442SRobert Mustacchi 		mblk_va = buf->irb_dma.idb_va + IGC_RX_BUF_IP_ALIGN;
372*6bbbd442SRobert Mustacchi 		mblk_len = buf->irb_dma.idb_size - IGC_RX_BUF_IP_ALIGN;
373*6bbbd442SRobert Mustacchi 		buf->irb_mp = desballoc((uchar_t *)mblk_va, mblk_len, 0,
374*6bbbd442SRobert Mustacchi 		    &buf->irb_free_rtn);
375*6bbbd442SRobert Mustacchi 
376*6bbbd442SRobert Mustacchi 		if (i < igc->igc_rx_ndesc) {
377*6bbbd442SRobert Mustacchi 			ring->irr_work_list[i] = buf;
378*6bbbd442SRobert Mustacchi 		} else {
379*6bbbd442SRobert Mustacchi 			ring->irr_free_list[ring->irr_nfree] = buf;
380*6bbbd442SRobert Mustacchi 			ring->irr_nfree++;
381*6bbbd442SRobert Mustacchi 		}
382*6bbbd442SRobert Mustacchi 	}
383*6bbbd442SRobert Mustacchi 
384*6bbbd442SRobert Mustacchi 	return (true);
385*6bbbd442SRobert Mustacchi }
386*6bbbd442SRobert Mustacchi 
387*6bbbd442SRobert Mustacchi void
igc_rx_data_free(igc_t * igc)388*6bbbd442SRobert Mustacchi igc_rx_data_free(igc_t *igc)
389*6bbbd442SRobert Mustacchi {
390*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_nrx_rings; i++) {
391*6bbbd442SRobert Mustacchi 		igc_rx_ring_t *ring = &igc->igc_rx_rings[i];
392*6bbbd442SRobert Mustacchi 
393*6bbbd442SRobert Mustacchi 		if (ring->irr_arena != NULL) {
394*6bbbd442SRobert Mustacchi 			igc_rx_bufs_free(igc, ring);
395*6bbbd442SRobert Mustacchi 			kmem_free(ring->irr_arena, sizeof (igc_rx_buffer_t) *
396*6bbbd442SRobert Mustacchi 			    igc->igc_rx_nbuf);
397*6bbbd442SRobert Mustacchi 			ring->irr_arena = NULL;
398*6bbbd442SRobert Mustacchi 		}
399*6bbbd442SRobert Mustacchi 
400*6bbbd442SRobert Mustacchi 		if (ring->irr_free_list != NULL) {
401*6bbbd442SRobert Mustacchi 			kmem_free(ring->irr_free_list, igc->igc_rx_nfree *
402*6bbbd442SRobert Mustacchi 			    sizeof (igc_rx_buffer_t *));
403*6bbbd442SRobert Mustacchi 			ring->irr_free_list = NULL;
404*6bbbd442SRobert Mustacchi 		}
405*6bbbd442SRobert Mustacchi 
406*6bbbd442SRobert Mustacchi 		if (ring->irr_work_list != NULL) {
407*6bbbd442SRobert Mustacchi 			kmem_free(ring->irr_work_list, igc->igc_rx_ndesc *
408*6bbbd442SRobert Mustacchi 			    sizeof (igc_rx_buffer_t *));
409*6bbbd442SRobert Mustacchi 			ring->irr_work_list = NULL;
410*6bbbd442SRobert Mustacchi 		}
411*6bbbd442SRobert Mustacchi 
412*6bbbd442SRobert Mustacchi 		if (ring->irr_ring != NULL) {
413*6bbbd442SRobert Mustacchi 			igc_dma_free(&ring->irr_desc_dma);
414*6bbbd442SRobert Mustacchi 			ring->irr_ring = NULL;
415*6bbbd442SRobert Mustacchi 			ring->irr_next = 0;
416*6bbbd442SRobert Mustacchi 		}
417*6bbbd442SRobert Mustacchi 	}
418*6bbbd442SRobert Mustacchi }
419*6bbbd442SRobert Mustacchi 
420*6bbbd442SRobert Mustacchi bool
igc_rx_data_alloc(igc_t * igc)421*6bbbd442SRobert Mustacchi igc_rx_data_alloc(igc_t *igc)
422*6bbbd442SRobert Mustacchi {
423*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_nrx_rings; i++) {
424*6bbbd442SRobert Mustacchi 		igc_rx_ring_t *ring = &igc->igc_rx_rings[i];
425*6bbbd442SRobert Mustacchi 		ddi_dma_attr_t desc_attr;
426*6bbbd442SRobert Mustacchi 		size_t desc_len;
427*6bbbd442SRobert Mustacchi 
428*6bbbd442SRobert Mustacchi 		igc_dma_desc_attr(igc, &desc_attr);
429*6bbbd442SRobert Mustacchi 		desc_len = sizeof (union igc_adv_rx_desc) *
430*6bbbd442SRobert Mustacchi 		    igc->igc_rx_ndesc;
431*6bbbd442SRobert Mustacchi 		if (!igc_dma_alloc(igc, &ring->irr_desc_dma, &desc_attr,
432*6bbbd442SRobert Mustacchi 		    desc_len)) {
433*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
434*6bbbd442SRobert Mustacchi 			    "RX descriptor ring %u", i);
435*6bbbd442SRobert Mustacchi 			goto cleanup;
436*6bbbd442SRobert Mustacchi 		}
437*6bbbd442SRobert Mustacchi 		ring->irr_ring = (void *)ring->irr_desc_dma.idb_va;
438*6bbbd442SRobert Mustacchi 
439*6bbbd442SRobert Mustacchi 		ring->irr_work_list = kmem_zalloc(sizeof (igc_rx_buffer_t *) *
440*6bbbd442SRobert Mustacchi 		    igc->igc_rx_ndesc, KM_NOSLEEP);
441*6bbbd442SRobert Mustacchi 		if (ring->irr_work_list == NULL) {
442*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
443*6bbbd442SRobert Mustacchi 			    "RX descriptor ring %u rx work list", i);
444*6bbbd442SRobert Mustacchi 			goto cleanup;
445*6bbbd442SRobert Mustacchi 		}
446*6bbbd442SRobert Mustacchi 
447*6bbbd442SRobert Mustacchi 		ring->irr_free_list = kmem_zalloc(sizeof (igc_rx_buffer_t *) *
448*6bbbd442SRobert Mustacchi 		    igc->igc_rx_nfree, KM_NOSLEEP);
449*6bbbd442SRobert Mustacchi 		if (ring->irr_free_list == NULL) {
450*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
451*6bbbd442SRobert Mustacchi 			    "RX descriptor ring %u rx free list", i);
452*6bbbd442SRobert Mustacchi 			goto cleanup;
453*6bbbd442SRobert Mustacchi 		}
454*6bbbd442SRobert Mustacchi 
455*6bbbd442SRobert Mustacchi 
456*6bbbd442SRobert Mustacchi 		ring->irr_arena = kmem_zalloc(sizeof (igc_rx_buffer_t) *
457*6bbbd442SRobert Mustacchi 		    igc->igc_rx_nbuf, KM_NOSLEEP);
458*6bbbd442SRobert Mustacchi 		if (ring->irr_arena == NULL) {
459*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
460*6bbbd442SRobert Mustacchi 			    "RX descriptor ring %u rx buf arena", i);
461*6bbbd442SRobert Mustacchi 			goto cleanup;
462*6bbbd442SRobert Mustacchi 		}
463*6bbbd442SRobert Mustacchi 
464*6bbbd442SRobert Mustacchi 		if (!igc_rx_bufs_alloc(igc, ring)) {
465*6bbbd442SRobert Mustacchi 			goto cleanup;
466*6bbbd442SRobert Mustacchi 		}
467*6bbbd442SRobert Mustacchi 	}
468*6bbbd442SRobert Mustacchi 
469*6bbbd442SRobert Mustacchi 	return (true);
470*6bbbd442SRobert Mustacchi 
471*6bbbd442SRobert Mustacchi cleanup:
472*6bbbd442SRobert Mustacchi 	igc_rx_data_free(igc);
473*6bbbd442SRobert Mustacchi 	return (false);
474*6bbbd442SRobert Mustacchi }
475*6bbbd442SRobert Mustacchi 
476*6bbbd442SRobert Mustacchi /*
477*6bbbd442SRobert Mustacchi  * Write / update a descriptor ring entry. This had been implemented in a few
478*6bbbd442SRobert Mustacchi  * places, so this was intended as a consolidation of those.
479*6bbbd442SRobert Mustacchi  */
480*6bbbd442SRobert Mustacchi static inline void
igc_rx_ring_desc_write(igc_rx_ring_t * ring,uint32_t idx)481*6bbbd442SRobert Mustacchi igc_rx_ring_desc_write(igc_rx_ring_t *ring, uint32_t idx)
482*6bbbd442SRobert Mustacchi {
483*6bbbd442SRobert Mustacchi 	const ddi_dma_cookie_t *cookie;
484*6bbbd442SRobert Mustacchi 	uint64_t addr;
485*6bbbd442SRobert Mustacchi 	igc_dma_buffer_t *irb = &ring->irr_work_list[idx]->irb_dma;
486*6bbbd442SRobert Mustacchi 
487*6bbbd442SRobert Mustacchi 	cookie = ddi_dma_cookie_one(irb->idb_hdl);
488*6bbbd442SRobert Mustacchi 	addr = cookie->dmac_laddress + IGC_RX_BUF_IP_ALIGN;
489*6bbbd442SRobert Mustacchi 	ring->irr_ring[idx].read.pkt_addr = LE_64(addr);
490*6bbbd442SRobert Mustacchi 	ring->irr_ring[idx].read.hdr_addr = LE_64(0);
491*6bbbd442SRobert Mustacchi }
492*6bbbd442SRobert Mustacchi 
493*6bbbd442SRobert Mustacchi /*
494*6bbbd442SRobert Mustacchi  * Fully initialize a receive ring. This involves:
495*6bbbd442SRobert Mustacchi  *
496*6bbbd442SRobert Mustacchi  *  - Doing an initial programming and sync of the descriptor ring
497*6bbbd442SRobert Mustacchi  *  - Programming the base and length registers
498*6bbbd442SRobert Mustacchi  *  - Programming the ring's buffer size and descriptor type
499*6bbbd442SRobert Mustacchi  *  - Programming the queue's receive control register
500*6bbbd442SRobert Mustacchi  */
501*6bbbd442SRobert Mustacchi static void
igc_rx_ring_hw_init(igc_t * igc,igc_rx_ring_t * ring)502*6bbbd442SRobert Mustacchi igc_rx_ring_hw_init(igc_t *igc, igc_rx_ring_t *ring)
503*6bbbd442SRobert Mustacchi {
504*6bbbd442SRobert Mustacchi 	uint32_t val, high, low;
505*6bbbd442SRobert Mustacchi 	const ddi_dma_cookie_t *desc;
506*6bbbd442SRobert Mustacchi 
507*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_rx_ndesc; i++) {
508*6bbbd442SRobert Mustacchi 		igc_rx_ring_desc_write(ring, i);
509*6bbbd442SRobert Mustacchi 	}
510*6bbbd442SRobert Mustacchi 	IGC_DMA_SYNC(&ring->irr_desc_dma, DDI_DMA_SYNC_FORDEV);
511*6bbbd442SRobert Mustacchi 
512*6bbbd442SRobert Mustacchi 	/*
513*6bbbd442SRobert Mustacchi 	 * Program the ring's address.
514*6bbbd442SRobert Mustacchi 	 */
515*6bbbd442SRobert Mustacchi 	desc = ddi_dma_cookie_one(ring->irr_desc_dma.idb_hdl);
516*6bbbd442SRobert Mustacchi 	high = (uint32_t)(desc->dmac_laddress >> 32);
517*6bbbd442SRobert Mustacchi 	low = (uint32_t)desc->dmac_laddress;
518*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RDBAH(ring->irr_idx), high);
519*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RDBAL(ring->irr_idx), low);
520*6bbbd442SRobert Mustacchi 
521*6bbbd442SRobert Mustacchi 	/*
522*6bbbd442SRobert Mustacchi 	 * Program the ring length.
523*6bbbd442SRobert Mustacchi 	 */
524*6bbbd442SRobert Mustacchi 	val = igc->igc_rx_ndesc * sizeof (union igc_adv_rx_desc);
525*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RDLEN(ring->irr_idx), val);
526*6bbbd442SRobert Mustacchi 
527*6bbbd442SRobert Mustacchi 	/*
528*6bbbd442SRobert Mustacchi 	 * Program the descriptor type and buffer length.
529*6bbbd442SRobert Mustacchi 	 */
530*6bbbd442SRobert Mustacchi 	val = (igc->igc_rx_buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) |
531*6bbbd442SRobert Mustacchi 	    IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
532*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_SRRCTL(ring->irr_idx), val);
533*6bbbd442SRobert Mustacchi 
534*6bbbd442SRobert Mustacchi 	/*
535*6bbbd442SRobert Mustacchi 	 * Program the ring control register itself. Note, we crib the threshold
536*6bbbd442SRobert Mustacchi 	 * values directly from igb and didn't think much harder than that.
537*6bbbd442SRobert Mustacchi 	 */
538*6bbbd442SRobert Mustacchi 	val = igc_read32(igc, IGC_RXDCTL(ring->irr_idx));
539*6bbbd442SRobert Mustacchi 	val &= IGC_RXDCTL_PRESERVE;
540*6bbbd442SRobert Mustacchi 	val |= IGC_RXDCTL_QUEUE_ENABLE;
541*6bbbd442SRobert Mustacchi 	val = IGC_RXDCTL_SET_PTHRESH(val, 16);
542*6bbbd442SRobert Mustacchi 	val = IGC_RXDCTL_SET_HTHRESH(val, 8);
543*6bbbd442SRobert Mustacchi 	val = IGC_RXDCTL_SET_WTHRESH(val, 1);
544*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RXDCTL(ring->irr_idx), val);
545*6bbbd442SRobert Mustacchi }
546*6bbbd442SRobert Mustacchi 
547*6bbbd442SRobert Mustacchi void
igc_rx_hw_init(igc_t * igc)548*6bbbd442SRobert Mustacchi igc_rx_hw_init(igc_t *igc)
549*6bbbd442SRobert Mustacchi {
550*6bbbd442SRobert Mustacchi 	uint32_t rctl, rxcsum;
551*6bbbd442SRobert Mustacchi 
552*6bbbd442SRobert Mustacchi 	/*
553*6bbbd442SRobert Mustacchi 	 * Start by setting up the receive control register.
554*6bbbd442SRobert Mustacchi 	 *
555*6bbbd442SRobert Mustacchi 	 * We clear out any bits in the multicast shift portion. This'll leave
556*6bbbd442SRobert Mustacchi 	 * it so [47:36] of the address are used as part of the look up. We also
557*6bbbd442SRobert Mustacchi 	 * don't want to receive bad packets, so make sure that's cleared out.
558*6bbbd442SRobert Mustacchi 	 * In addition, we clear out loopback mode.
559*6bbbd442SRobert Mustacchi 	 */
560*6bbbd442SRobert Mustacchi 	rctl = igc_read32(igc, IGC_RCTL);
561*6bbbd442SRobert Mustacchi 	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
562*6bbbd442SRobert Mustacchi 	rctl &= ~IGC_RCTL_SBP;
563*6bbbd442SRobert Mustacchi 	rctl &= ~(IGC_RCTL_LBM_MAC | IGC_RCTL_LBM_TCVR);
564*6bbbd442SRobert Mustacchi 
565*6bbbd442SRobert Mustacchi 	/*
566*6bbbd442SRobert Mustacchi 	 * Set things up such that we're enabled, we receive broadcast packets,
567*6bbbd442SRobert Mustacchi 	 * and we allow for large packets. We leave the rx descriptor threshold
568*6bbbd442SRobert Mustacchi 	 * at 2048 bytes and make sure to always strip the Ethernet CRC as mac
569*6bbbd442SRobert Mustacchi 	 * doesn't want it.
570*6bbbd442SRobert Mustacchi 	 */
571*6bbbd442SRobert Mustacchi 	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LPE |
572*6bbbd442SRobert Mustacchi 	    IGC_RCTL_RDMTS_HALF | IGC_RCTL_SECRC;
573*6bbbd442SRobert Mustacchi 
574*6bbbd442SRobert Mustacchi 	/*
575*6bbbd442SRobert Mustacchi 	 * Set the multicast filter based on hardware.
576*6bbbd442SRobert Mustacchi 	 */
577*6bbbd442SRobert Mustacchi 	rctl |= igc->igc_hw.mac.mc_filter_type << IGC_RCTL_MO_SHIFT;
578*6bbbd442SRobert Mustacchi 
579*6bbbd442SRobert Mustacchi 	/*
580*6bbbd442SRobert Mustacchi 	 * Make sure each ring is set up and its registers are programmed.
581*6bbbd442SRobert Mustacchi 	 */
582*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_nrx_rings; i++) {
583*6bbbd442SRobert Mustacchi 		igc_rx_ring_hw_init(igc, &igc->igc_rx_rings[i]);
584*6bbbd442SRobert Mustacchi 	}
585*6bbbd442SRobert Mustacchi 
586*6bbbd442SRobert Mustacchi 	/*
587*6bbbd442SRobert Mustacchi 	 * As we always set LPE (large packet enable) in the receive control
588*6bbbd442SRobert Mustacchi 	 * register, we must go through and explicitly update the maximum frame
589*6bbbd442SRobert Mustacchi 	 * size.
590*6bbbd442SRobert Mustacchi 	 */
591*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RLPML, igc->igc_max_frame);
592*6bbbd442SRobert Mustacchi 
593*6bbbd442SRobert Mustacchi 	/*
594*6bbbd442SRobert Mustacchi 	 * Explicitly enable IPv4 and TCP checksums. We leave PCSD set to zero
595*6bbbd442SRobert Mustacchi 	 * for the moment as we're not enabling RSS, which is what would be
596*6bbbd442SRobert Mustacchi 	 * required to get that. After this is where we would set up the VMDq
597*6bbbd442SRobert Mustacchi 	 * mode and RSS if we supported multiple RX rings.
598*6bbbd442SRobert Mustacchi 	 */
599*6bbbd442SRobert Mustacchi 	rxcsum = IGC_RXCSUM_IPOFL | IGC_RXCSUM_TUOFL;
600*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RXCSUM, rxcsum);
601*6bbbd442SRobert Mustacchi 
602*6bbbd442SRobert Mustacchi 	/*
603*6bbbd442SRobert Mustacchi 	 * Enable the receive unit finally
604*6bbbd442SRobert Mustacchi 	 */
605*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_RCTL, rctl);
606*6bbbd442SRobert Mustacchi 
607*6bbbd442SRobert Mustacchi 	/*
608*6bbbd442SRobert Mustacchi 	 * Only after the receive unit is initialized can we actually set up the
609*6bbbd442SRobert Mustacchi 	 * ring head and tail pointers.
610*6bbbd442SRobert Mustacchi 	 */
611*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_nrx_rings; i++) {
612*6bbbd442SRobert Mustacchi 		igc_write32(igc, IGC_RDH(igc->igc_rx_rings[i].irr_idx), 0);
613*6bbbd442SRobert Mustacchi 		igc_write32(igc, IGC_RDT(igc->igc_rx_rings[i].irr_idx),
614*6bbbd442SRobert Mustacchi 		    igc->igc_rx_ndesc - 1);
615*6bbbd442SRobert Mustacchi 	}
616*6bbbd442SRobert Mustacchi }
617*6bbbd442SRobert Mustacchi 
618*6bbbd442SRobert Mustacchi static inline uint32_t
igc_next_desc(uint32_t cur,uint32_t count,uint32_t size)619*6bbbd442SRobert Mustacchi igc_next_desc(uint32_t cur, uint32_t count, uint32_t size)
620*6bbbd442SRobert Mustacchi {
621*6bbbd442SRobert Mustacchi 	uint32_t out;
622*6bbbd442SRobert Mustacchi 
623*6bbbd442SRobert Mustacchi 	if (cur + count < size) {
624*6bbbd442SRobert Mustacchi 		out = cur + count;
625*6bbbd442SRobert Mustacchi 	} else {
626*6bbbd442SRobert Mustacchi 		out = cur + count - size;
627*6bbbd442SRobert Mustacchi 	}
628*6bbbd442SRobert Mustacchi 
629*6bbbd442SRobert Mustacchi 	return (out);
630*6bbbd442SRobert Mustacchi }
631*6bbbd442SRobert Mustacchi 
632*6bbbd442SRobert Mustacchi static inline uint32_t
igc_prev_desc(uint32_t cur,uint32_t count,uint32_t size)633*6bbbd442SRobert Mustacchi igc_prev_desc(uint32_t cur, uint32_t count, uint32_t size)
634*6bbbd442SRobert Mustacchi {
635*6bbbd442SRobert Mustacchi 	uint32_t out;
636*6bbbd442SRobert Mustacchi 
637*6bbbd442SRobert Mustacchi 	if (cur >= count) {
638*6bbbd442SRobert Mustacchi 		out = cur - count;
639*6bbbd442SRobert Mustacchi 	} else {
640*6bbbd442SRobert Mustacchi 		out = cur - count + size;
641*6bbbd442SRobert Mustacchi 	}
642*6bbbd442SRobert Mustacchi 
643*6bbbd442SRobert Mustacchi 	return (out);
644*6bbbd442SRobert Mustacchi }
645*6bbbd442SRobert Mustacchi 
646*6bbbd442SRobert Mustacchi 
647*6bbbd442SRobert Mustacchi static mblk_t *
igc_rx_copy(igc_rx_ring_t * ring,uint32_t idx,uint32_t len)648*6bbbd442SRobert Mustacchi igc_rx_copy(igc_rx_ring_t *ring, uint32_t idx, uint32_t len)
649*6bbbd442SRobert Mustacchi {
650*6bbbd442SRobert Mustacchi 	const igc_rx_buffer_t *buf = ring->irr_work_list[idx];
651*6bbbd442SRobert Mustacchi 	mblk_t *mp;
652*6bbbd442SRobert Mustacchi 
653*6bbbd442SRobert Mustacchi 	IGC_DMA_SYNC(&buf->irb_dma, DDI_DMA_SYNC_FORKERNEL);
654*6bbbd442SRobert Mustacchi 	mp = allocb(len + IGC_RX_BUF_IP_ALIGN, 0);
655*6bbbd442SRobert Mustacchi 	if (mp == NULL) {
656*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_copy_nomem.value.ui64++;
657*6bbbd442SRobert Mustacchi 		return (NULL);
658*6bbbd442SRobert Mustacchi 	}
659*6bbbd442SRobert Mustacchi 
660*6bbbd442SRobert Mustacchi 	mp->b_rptr += IGC_RX_BUF_IP_ALIGN;
661*6bbbd442SRobert Mustacchi 	bcopy(buf->irb_dma.idb_va + IGC_RX_BUF_IP_ALIGN, mp->b_rptr, len);
662*6bbbd442SRobert Mustacchi 	mp->b_wptr = mp->b_rptr + len;
663*6bbbd442SRobert Mustacchi 	ring->irr_stat.irs_ncopy.value.ui64++;
664*6bbbd442SRobert Mustacchi 	return (mp);
665*6bbbd442SRobert Mustacchi }
666*6bbbd442SRobert Mustacchi 
667*6bbbd442SRobert Mustacchi static mblk_t *
igc_rx_bind(igc_rx_ring_t * ring,uint32_t idx,uint32_t len)668*6bbbd442SRobert Mustacchi igc_rx_bind(igc_rx_ring_t *ring, uint32_t idx, uint32_t len)
669*6bbbd442SRobert Mustacchi {
670*6bbbd442SRobert Mustacchi 	igc_rx_buffer_t *buf = ring->irr_work_list[idx];
671*6bbbd442SRobert Mustacchi 	igc_rx_buffer_t *sub;
672*6bbbd442SRobert Mustacchi 
673*6bbbd442SRobert Mustacchi 	ASSERT(MUTEX_HELD(&ring->irr_lock));
674*6bbbd442SRobert Mustacchi 
675*6bbbd442SRobert Mustacchi 	/*
676*6bbbd442SRobert Mustacchi 	 * If there are no free buffers, we can't bind. Try to grab this now so
677*6bbbd442SRobert Mustacchi 	 * we can minimize free list contention.
678*6bbbd442SRobert Mustacchi 	 */
679*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->irr_free_lock);
680*6bbbd442SRobert Mustacchi 	if (ring->irr_nfree == 0) {
681*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_bind_nobuf.value.ui64++;
682*6bbbd442SRobert Mustacchi 		mutex_exit(&ring->irr_free_lock);
683*6bbbd442SRobert Mustacchi 		return (NULL);
684*6bbbd442SRobert Mustacchi 	}
685*6bbbd442SRobert Mustacchi 	ring->irr_nfree--;
686*6bbbd442SRobert Mustacchi 	sub = ring->irr_free_list[ring->irr_nfree];
687*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->irr_free_lock);
688*6bbbd442SRobert Mustacchi 
689*6bbbd442SRobert Mustacchi 	/*
690*6bbbd442SRobert Mustacchi 	 * Check if we have an mblk_t here. If not, we'll need to allocate one
691*6bbbd442SRobert Mustacchi 	 * again. If that fails, we'll fail this and fall back to copy, though
692*6bbbd442SRobert Mustacchi 	 * the odds of that working are small.
693*6bbbd442SRobert Mustacchi 	 */
694*6bbbd442SRobert Mustacchi 	if (buf->irb_mp == NULL) {
695*6bbbd442SRobert Mustacchi 		caddr_t mblk_va = buf->irb_dma.idb_va + IGC_RX_BUF_IP_ALIGN;
696*6bbbd442SRobert Mustacchi 		size_t mblk_len = buf->irb_dma.idb_size - IGC_RX_BUF_IP_ALIGN;
697*6bbbd442SRobert Mustacchi 		buf->irb_mp = desballoc((uchar_t *)mblk_va, mblk_len, 0,
698*6bbbd442SRobert Mustacchi 		    &buf->irb_free_rtn);
699*6bbbd442SRobert Mustacchi 		if (buf->irb_mp == NULL) {
700*6bbbd442SRobert Mustacchi 			ring->irr_stat.irs_bind_nomp.value.ui64++;
701*6bbbd442SRobert Mustacchi 			mutex_enter(&ring->irr_free_lock);
702*6bbbd442SRobert Mustacchi 			ring->irr_free_list[ring->irr_nfree] = sub;
703*6bbbd442SRobert Mustacchi 			ring->irr_nfree++;
704*6bbbd442SRobert Mustacchi 			mutex_exit(&ring->irr_free_lock);
705*6bbbd442SRobert Mustacchi 			return (NULL);
706*6bbbd442SRobert Mustacchi 		}
707*6bbbd442SRobert Mustacchi 	}
708*6bbbd442SRobert Mustacchi 	buf->irb_mp->b_wptr = buf->irb_mp->b_rptr + len;
709*6bbbd442SRobert Mustacchi 	IGC_DMA_SYNC(&buf->irb_dma, DDI_DMA_SYNC_FORKERNEL);
710*6bbbd442SRobert Mustacchi 
711*6bbbd442SRobert Mustacchi 	/*
712*6bbbd442SRobert Mustacchi 	 * Swap an entry on the free list to replace this on the work list.
713*6bbbd442SRobert Mustacchi 	 */
714*6bbbd442SRobert Mustacchi 	ring->irr_work_list[idx] = sub;
715*6bbbd442SRobert Mustacchi 	ring->irr_stat.irs_nbind.value.ui64++;
716*6bbbd442SRobert Mustacchi 
717*6bbbd442SRobert Mustacchi 	/*
718*6bbbd442SRobert Mustacchi 	 * Update the buffer to make sure that we indicate it's been loaned for
719*6bbbd442SRobert Mustacchi 	 * future recycling.
720*6bbbd442SRobert Mustacchi 	 */
721*6bbbd442SRobert Mustacchi 	buf->irb_loaned = true;
722*6bbbd442SRobert Mustacchi 
723*6bbbd442SRobert Mustacchi 	return (buf->irb_mp);
724*6bbbd442SRobert Mustacchi }
725*6bbbd442SRobert Mustacchi 
726*6bbbd442SRobert Mustacchi /*
727*6bbbd442SRobert Mustacchi  * Go through the status bits defined in hardware to see if we can set checksum
728*6bbbd442SRobert Mustacchi  * information.
729*6bbbd442SRobert Mustacchi  */
730*6bbbd442SRobert Mustacchi static void
igc_rx_hcksum(igc_rx_ring_t * ring,mblk_t * mp,uint32_t status)731*6bbbd442SRobert Mustacchi igc_rx_hcksum(igc_rx_ring_t *ring, mblk_t *mp, uint32_t status)
732*6bbbd442SRobert Mustacchi {
733*6bbbd442SRobert Mustacchi 	uint32_t cksum = 0;
734*6bbbd442SRobert Mustacchi 	const uint32_t l4_valid = IGC_RXD_STAT_TCPCS | IGC_RXD_STAT_UDPCS;
735*6bbbd442SRobert Mustacchi 	const uint32_t l4_invalid = IGC_RXDEXT_STATERR_L4E;
736*6bbbd442SRobert Mustacchi 
737*6bbbd442SRobert Mustacchi 	if ((status & IGC_RXD_STAT_IXSM) != 0) {
738*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_ixsm.value.ui64++;
739*6bbbd442SRobert Mustacchi 		return;
740*6bbbd442SRobert Mustacchi 	}
741*6bbbd442SRobert Mustacchi 
742*6bbbd442SRobert Mustacchi 	if ((status & l4_invalid) != 0) {
743*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_l4cksum_err.value.ui64++;
744*6bbbd442SRobert Mustacchi 	} else if ((status & l4_valid) != 0) {
745*6bbbd442SRobert Mustacchi 		cksum |= HCK_FULLCKSUM_OK;
746*6bbbd442SRobert Mustacchi 	}
747*6bbbd442SRobert Mustacchi 
748*6bbbd442SRobert Mustacchi 	if ((status & IGC_RXDEXT_STATERR_IPE) != 0) {
749*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_l3cksum_err.value.ui64++;
750*6bbbd442SRobert Mustacchi 	} else if ((status & IGC_RXD_STAT_IPCS) != 0) {
751*6bbbd442SRobert Mustacchi 		cksum |= HCK_IPV4_HDRCKSUM_OK;
752*6bbbd442SRobert Mustacchi 	}
753*6bbbd442SRobert Mustacchi 
754*6bbbd442SRobert Mustacchi 	if (cksum != 0) {
755*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_hcksum_hit.value.ui64++;
756*6bbbd442SRobert Mustacchi 		mac_hcksum_set(mp, 0, 0, 0, 0, cksum);
757*6bbbd442SRobert Mustacchi 	} else {
758*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_hcksum_miss.value.ui64++;
759*6bbbd442SRobert Mustacchi 	}
760*6bbbd442SRobert Mustacchi }
761*6bbbd442SRobert Mustacchi 
762*6bbbd442SRobert Mustacchi mblk_t *
igc_ring_rx(igc_rx_ring_t * ring,int poll_bytes)763*6bbbd442SRobert Mustacchi igc_ring_rx(igc_rx_ring_t *ring, int poll_bytes)
764*6bbbd442SRobert Mustacchi {
765*6bbbd442SRobert Mustacchi 	union igc_adv_rx_desc *cur_desc;
766*6bbbd442SRobert Mustacchi 	uint32_t cur_status, cur_head;
767*6bbbd442SRobert Mustacchi 	uint64_t rx_bytes = 0, rx_frames = 0;
768*6bbbd442SRobert Mustacchi 	igc_t *igc = ring->irr_igc;
769*6bbbd442SRobert Mustacchi 	mblk_t *mp_head = NULL, **mp_tail = NULL;
770*6bbbd442SRobert Mustacchi 
771*6bbbd442SRobert Mustacchi 	ASSERT(MUTEX_HELD(&ring->irr_lock));
772*6bbbd442SRobert Mustacchi 	IGC_DMA_SYNC(&ring->irr_desc_dma, DDI_DMA_SYNC_FORKERNEL);
773*6bbbd442SRobert Mustacchi 
774*6bbbd442SRobert Mustacchi 	/*
775*6bbbd442SRobert Mustacchi 	 * Set up the invariants that we will maintain for the loop and then set
776*6bbbd442SRobert Mustacchi 	 * up our mblk queue.
777*6bbbd442SRobert Mustacchi 	 */
778*6bbbd442SRobert Mustacchi 	cur_head = ring->irr_next;
779*6bbbd442SRobert Mustacchi 	cur_desc = &ring->irr_ring[cur_head];
780*6bbbd442SRobert Mustacchi 	cur_status = LE_32(cur_desc->wb.upper.status_error);
781*6bbbd442SRobert Mustacchi 	mp_head = NULL;
782*6bbbd442SRobert Mustacchi 	mp_tail = &mp_head;
783*6bbbd442SRobert Mustacchi 
784*6bbbd442SRobert Mustacchi 	while ((cur_status & IGC_RXD_STAT_DD) != 0) {
785*6bbbd442SRobert Mustacchi 		uint16_t cur_length = 0;
786*6bbbd442SRobert Mustacchi 		mblk_t *mp;
787*6bbbd442SRobert Mustacchi 
788*6bbbd442SRobert Mustacchi 		/*
789*6bbbd442SRobert Mustacchi 		 * Check that we have no errors on this packet. This packet
790*6bbbd442SRobert Mustacchi 		 * should also have EOP set because we only use a single
791*6bbbd442SRobert Mustacchi 		 * descriptor today. We primarily just check for the RXE error.
792*6bbbd442SRobert Mustacchi 		 * Most other error types were dropped in the extended format.
793*6bbbd442SRobert Mustacchi 		 */
794*6bbbd442SRobert Mustacchi 		if ((cur_status & IGC_RXDEXT_STATERR_RXE) != 0 ||
795*6bbbd442SRobert Mustacchi 		    (cur_status & IGC_RXD_STAT_EOP) == 0) {
796*6bbbd442SRobert Mustacchi 			ring->irr_stat.irs_desc_error.value.ui64++;
797*6bbbd442SRobert Mustacchi 			goto discard;
798*6bbbd442SRobert Mustacchi 		}
799*6bbbd442SRobert Mustacchi 
800*6bbbd442SRobert Mustacchi 
801*6bbbd442SRobert Mustacchi 		/*
802*6bbbd442SRobert Mustacchi 		 * We don't bump rx_frames here, because we do that at the end,
803*6bbbd442SRobert Mustacchi 		 * even if we've discarded frames so we can know to write the
804*6bbbd442SRobert Mustacchi 		 * tail register.
805*6bbbd442SRobert Mustacchi 		 */
806*6bbbd442SRobert Mustacchi 		cur_length = LE_16(cur_desc->wb.upper.length);
807*6bbbd442SRobert Mustacchi 		rx_bytes += cur_length;
808*6bbbd442SRobert Mustacchi 
809*6bbbd442SRobert Mustacchi 		mp = NULL;
810*6bbbd442SRobert Mustacchi 		if (cur_length > igc->igc_rx_bind_thresh) {
811*6bbbd442SRobert Mustacchi 			mp = igc_rx_bind(ring, cur_head, cur_length);
812*6bbbd442SRobert Mustacchi 		}
813*6bbbd442SRobert Mustacchi 
814*6bbbd442SRobert Mustacchi 		if (mp == NULL) {
815*6bbbd442SRobert Mustacchi 			mp = igc_rx_copy(ring, cur_head, cur_length);
816*6bbbd442SRobert Mustacchi 		}
817*6bbbd442SRobert Mustacchi 
818*6bbbd442SRobert Mustacchi 		if (mp != NULL) {
819*6bbbd442SRobert Mustacchi 			igc_rx_hcksum(ring, mp, cur_status);
820*6bbbd442SRobert Mustacchi 			*mp_tail = mp;
821*6bbbd442SRobert Mustacchi 			mp_tail = &mp->b_next;
822*6bbbd442SRobert Mustacchi 		}
823*6bbbd442SRobert Mustacchi 
824*6bbbd442SRobert Mustacchi discard:
825*6bbbd442SRobert Mustacchi 		/*
826*6bbbd442SRobert Mustacchi 		 * Prepare the frame for use again. Note, we can't assume that
827*6bbbd442SRobert Mustacchi 		 * the memory in the buffer is valid.
828*6bbbd442SRobert Mustacchi 		 */
829*6bbbd442SRobert Mustacchi 		igc_rx_ring_desc_write(ring, cur_head);
830*6bbbd442SRobert Mustacchi 
831*6bbbd442SRobert Mustacchi 		/*
832*6bbbd442SRobert Mustacchi 		 * Go through and update the values that our loop is using now.
833*6bbbd442SRobert Mustacchi 		 */
834*6bbbd442SRobert Mustacchi 		cur_head = igc_next_desc(cur_head, 1, igc->igc_rx_ndesc);
835*6bbbd442SRobert Mustacchi 		cur_desc = &ring->irr_ring[cur_head];
836*6bbbd442SRobert Mustacchi 		cur_status = LE_32(cur_desc->wb.upper.status_error);
837*6bbbd442SRobert Mustacchi 
838*6bbbd442SRobert Mustacchi 		/*
839*6bbbd442SRobert Mustacchi 		 * If we're polling, we need to check against the number of
840*6bbbd442SRobert Mustacchi 		 * received bytes. If we're in interrupt mode, we have a maximum
841*6bbbd442SRobert Mustacchi 		 * number of frames we're allowed to check.
842*6bbbd442SRobert Mustacchi 		 */
843*6bbbd442SRobert Mustacchi 		rx_frames++;
844*6bbbd442SRobert Mustacchi 		if (poll_bytes != IGC_RX_POLL_INTR &&
845*6bbbd442SRobert Mustacchi 		    (cur_length + rx_bytes) > poll_bytes) {
846*6bbbd442SRobert Mustacchi 			break;
847*6bbbd442SRobert Mustacchi 		} else if (poll_bytes == IGC_RX_POLL_INTR &&
848*6bbbd442SRobert Mustacchi 		    rx_frames >= igc->igc_rx_intr_nframes) {
849*6bbbd442SRobert Mustacchi 			break;
850*6bbbd442SRobert Mustacchi 		}
851*6bbbd442SRobert Mustacchi 	}
852*6bbbd442SRobert Mustacchi 
853*6bbbd442SRobert Mustacchi 	/*
854*6bbbd442SRobert Mustacchi 	 * Go ahead and re-arm the ring and update our stats along the way as
855*6bbbd442SRobert Mustacchi 	 * long as we received at least one frame. Because we modified the
856*6bbbd442SRobert Mustacchi 	 * descriptor ring as part of resetting frames, we must resync.
857*6bbbd442SRobert Mustacchi 	 */
858*6bbbd442SRobert Mustacchi 	if (rx_frames != 0) {
859*6bbbd442SRobert Mustacchi 		uint32_t tail;
860*6bbbd442SRobert Mustacchi 
861*6bbbd442SRobert Mustacchi 		IGC_DMA_SYNC(&ring->irr_desc_dma, DDI_DMA_SYNC_FORDEV);
862*6bbbd442SRobert Mustacchi 		ring->irr_next = cur_head;
863*6bbbd442SRobert Mustacchi 		tail = igc_prev_desc(cur_head, 1, igc->igc_rx_ndesc);
864*6bbbd442SRobert Mustacchi 		igc_write32(igc, IGC_RDT(ring->irr_idx), tail);
865*6bbbd442SRobert Mustacchi 
866*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_rbytes.value.ui64 += rx_bytes;
867*6bbbd442SRobert Mustacchi 		ring->irr_stat.irs_ipackets.value.ui64 += rx_frames;
868*6bbbd442SRobert Mustacchi 	}
869*6bbbd442SRobert Mustacchi 
870*6bbbd442SRobert Mustacchi #ifdef	DEBUG
871*6bbbd442SRobert Mustacchi 	if (rx_frames == 0) {
872*6bbbd442SRobert Mustacchi 		ASSERT0(rx_bytes);
873*6bbbd442SRobert Mustacchi 	}
874*6bbbd442SRobert Mustacchi #endif
875*6bbbd442SRobert Mustacchi 
876*6bbbd442SRobert Mustacchi 	return (mp_head);
877*6bbbd442SRobert Mustacchi }
878*6bbbd442SRobert Mustacchi 
879*6bbbd442SRobert Mustacchi /*
880*6bbbd442SRobert Mustacchi  * This is called from the stop entry point after the hardware has been reset.
881*6bbbd442SRobert Mustacchi  * After the hardware has been reset, the other possible consumer of rx buffers
882*6bbbd442SRobert Mustacchi  * are those that have been loaned up the stack. As such, we need to wait on
883*6bbbd442SRobert Mustacchi  * each free list until the number of free entries have gotten back to the
884*6bbbd442SRobert Mustacchi  * expected number.
885*6bbbd442SRobert Mustacchi  */
886*6bbbd442SRobert Mustacchi void
igc_rx_drain(igc_t * igc)887*6bbbd442SRobert Mustacchi igc_rx_drain(igc_t *igc)
888*6bbbd442SRobert Mustacchi {
889*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_nrx_rings; i++) {
890*6bbbd442SRobert Mustacchi 		igc_rx_ring_t *ring = &igc->igc_rx_rings[i];
891*6bbbd442SRobert Mustacchi 
892*6bbbd442SRobert Mustacchi 		mutex_enter(&ring->irr_free_lock);
893*6bbbd442SRobert Mustacchi 		while (ring->irr_nfree < igc->igc_rx_nfree) {
894*6bbbd442SRobert Mustacchi 			cv_wait(&ring->irr_free_cv, &ring->irr_free_lock);
895*6bbbd442SRobert Mustacchi 		}
896*6bbbd442SRobert Mustacchi 		mutex_exit(&ring->irr_free_lock);
897*6bbbd442SRobert Mustacchi 	}
898*6bbbd442SRobert Mustacchi }
899*6bbbd442SRobert Mustacchi 
900*6bbbd442SRobert Mustacchi static void
igc_tx_bufs_free(igc_t * igc,igc_tx_ring_t * ring)901*6bbbd442SRobert Mustacchi igc_tx_bufs_free(igc_t *igc, igc_tx_ring_t *ring)
902*6bbbd442SRobert Mustacchi {
903*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_tx_nbuf; i++) {
904*6bbbd442SRobert Mustacchi 		igc_tx_buffer_t *buf = &ring->itr_arena[i];
905*6bbbd442SRobert Mustacchi 
906*6bbbd442SRobert Mustacchi 		/*
907*6bbbd442SRobert Mustacchi 		 * While we try to clean up the ring reasonably well, if for
908*6bbbd442SRobert Mustacchi 		 * some reason we insert descriptors that the device doesn't
909*6bbbd442SRobert Mustacchi 		 * like, then parts of the ring may not end up cleaned up. In
910*6bbbd442SRobert Mustacchi 		 * such cases we'll need to free the mblk here ourselves and
911*6bbbd442SRobert Mustacchi 		 * clean up any binding.
912*6bbbd442SRobert Mustacchi 		 */
913*6bbbd442SRobert Mustacchi 		if (buf->itb_bind) {
914*6bbbd442SRobert Mustacchi 			buf->itb_bind = false;
915*6bbbd442SRobert Mustacchi 			(void) ddi_dma_unbind_handle(buf->itb_bind_hdl);
916*6bbbd442SRobert Mustacchi 		}
917*6bbbd442SRobert Mustacchi 		freemsgchain(buf->itb_mp);
918*6bbbd442SRobert Mustacchi 		igc_dma_free(&buf->itb_dma);
919*6bbbd442SRobert Mustacchi 		if (buf->itb_bind_hdl != NULL) {
920*6bbbd442SRobert Mustacchi 			ddi_dma_free_handle(&buf->itb_bind_hdl);
921*6bbbd442SRobert Mustacchi 		}
922*6bbbd442SRobert Mustacchi 	}
923*6bbbd442SRobert Mustacchi }
924*6bbbd442SRobert Mustacchi 
925*6bbbd442SRobert Mustacchi static bool
igc_tx_bufs_alloc(igc_t * igc,igc_tx_ring_t * ring)926*6bbbd442SRobert Mustacchi igc_tx_bufs_alloc(igc_t *igc, igc_tx_ring_t *ring)
927*6bbbd442SRobert Mustacchi {
928*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_tx_nbuf; i++) {
929*6bbbd442SRobert Mustacchi 		igc_tx_buffer_t *buf = &ring->itr_arena[i];
930*6bbbd442SRobert Mustacchi 		ddi_dma_attr_t attr;
931*6bbbd442SRobert Mustacchi 		int ret;
932*6bbbd442SRobert Mustacchi 
933*6bbbd442SRobert Mustacchi 		igc_dma_data_attr(igc, &attr);
934*6bbbd442SRobert Mustacchi 		if (!igc_dma_alloc(igc, &buf->itb_dma, &attr,
935*6bbbd442SRobert Mustacchi 		    igc->igc_tx_buf_size)) {
936*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate TX "
937*6bbbd442SRobert Mustacchi 			    "ring %u buffer %u", ring->itr_idx, i);
938*6bbbd442SRobert Mustacchi 			return (false);
939*6bbbd442SRobert Mustacchi 		}
940*6bbbd442SRobert Mustacchi 
941*6bbbd442SRobert Mustacchi 		igc_dma_tx_attr(igc, &attr);
942*6bbbd442SRobert Mustacchi 		if ((ret = ddi_dma_alloc_handle(igc->igc_dip, &attr,
943*6bbbd442SRobert Mustacchi 		    DDI_DMA_DONTWAIT, NULL, &buf->itb_bind_hdl)) !=
944*6bbbd442SRobert Mustacchi 		    DDI_SUCCESS) {
945*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate TX "
946*6bbbd442SRobert Mustacchi 			    "ring %u TX DMA handle %u: %d", ring->itr_idx, i,
947*6bbbd442SRobert Mustacchi 			    ret);
948*6bbbd442SRobert Mustacchi 			return (false);
949*6bbbd442SRobert Mustacchi 		}
950*6bbbd442SRobert Mustacchi 
951*6bbbd442SRobert Mustacchi 		list_insert_tail(&ring->itr_free_list, buf);
952*6bbbd442SRobert Mustacchi 	}
953*6bbbd442SRobert Mustacchi 
954*6bbbd442SRobert Mustacchi 	return (true);
955*6bbbd442SRobert Mustacchi }
956*6bbbd442SRobert Mustacchi 
957*6bbbd442SRobert Mustacchi void
igc_tx_data_free(igc_t * igc)958*6bbbd442SRobert Mustacchi igc_tx_data_free(igc_t *igc)
959*6bbbd442SRobert Mustacchi {
960*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_ntx_rings; i++) {
961*6bbbd442SRobert Mustacchi 		igc_tx_ring_t *ring = &igc->igc_tx_rings[i];
962*6bbbd442SRobert Mustacchi 
963*6bbbd442SRobert Mustacchi 		/*
964*6bbbd442SRobert Mustacchi 		 * Empty the free list before we destroy the list to avoid
965*6bbbd442SRobert Mustacchi 		 * blowing an assertion.
966*6bbbd442SRobert Mustacchi 		 */
967*6bbbd442SRobert Mustacchi 		while (list_remove_head(&ring->itr_free_list) != NULL)
968*6bbbd442SRobert Mustacchi 			;
969*6bbbd442SRobert Mustacchi 
970*6bbbd442SRobert Mustacchi 		if (ring->itr_arena != NULL) {
971*6bbbd442SRobert Mustacchi 			igc_tx_bufs_free(igc, ring);
972*6bbbd442SRobert Mustacchi 			kmem_free(ring->itr_arena, sizeof (igc_tx_buffer_t) *
973*6bbbd442SRobert Mustacchi 			    igc->igc_tx_nbuf);
974*6bbbd442SRobert Mustacchi 			ring->itr_arena = NULL;
975*6bbbd442SRobert Mustacchi 		}
976*6bbbd442SRobert Mustacchi 
977*6bbbd442SRobert Mustacchi 		list_destroy(&ring->itr_free_list);
978*6bbbd442SRobert Mustacchi 
979*6bbbd442SRobert Mustacchi 		if (ring->itr_work_list != NULL) {
980*6bbbd442SRobert Mustacchi 			kmem_free(ring->itr_work_list, igc->igc_tx_ndesc *
981*6bbbd442SRobert Mustacchi 			    sizeof (igc_tx_buffer_t *));
982*6bbbd442SRobert Mustacchi 			ring->itr_work_list = NULL;
983*6bbbd442SRobert Mustacchi 		}
984*6bbbd442SRobert Mustacchi 
985*6bbbd442SRobert Mustacchi 		if (ring->itr_ring != NULL) {
986*6bbbd442SRobert Mustacchi 			igc_dma_free(&ring->itr_desc_dma);
987*6bbbd442SRobert Mustacchi 			ring->itr_ring = NULL;
988*6bbbd442SRobert Mustacchi 			ring->itr_ring_head = 0;
989*6bbbd442SRobert Mustacchi 			ring->itr_ring_tail = 0;
990*6bbbd442SRobert Mustacchi 			ring->itr_ring_free = 0;
991*6bbbd442SRobert Mustacchi 		}
992*6bbbd442SRobert Mustacchi 	}
993*6bbbd442SRobert Mustacchi }
994*6bbbd442SRobert Mustacchi 
995*6bbbd442SRobert Mustacchi bool
igc_tx_data_alloc(igc_t * igc)996*6bbbd442SRobert Mustacchi igc_tx_data_alloc(igc_t *igc)
997*6bbbd442SRobert Mustacchi {
998*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_ntx_rings; i++) {
999*6bbbd442SRobert Mustacchi 		igc_tx_ring_t *ring = &igc->igc_tx_rings[i];
1000*6bbbd442SRobert Mustacchi 		ddi_dma_attr_t desc_attr;
1001*6bbbd442SRobert Mustacchi 		size_t desc_len;
1002*6bbbd442SRobert Mustacchi 
1003*6bbbd442SRobert Mustacchi 		igc_dma_desc_attr(igc, &desc_attr);
1004*6bbbd442SRobert Mustacchi 		desc_len = sizeof (union igc_adv_tx_desc) *
1005*6bbbd442SRobert Mustacchi 		    igc->igc_tx_ndesc;
1006*6bbbd442SRobert Mustacchi 		if (!igc_dma_alloc(igc, &ring->itr_desc_dma, &desc_attr,
1007*6bbbd442SRobert Mustacchi 		    desc_len)) {
1008*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
1009*6bbbd442SRobert Mustacchi 			    "TX descriptor ring %u", i);
1010*6bbbd442SRobert Mustacchi 			goto cleanup;
1011*6bbbd442SRobert Mustacchi 		}
1012*6bbbd442SRobert Mustacchi 		ring->itr_ring = (void *)ring->itr_desc_dma.idb_va;
1013*6bbbd442SRobert Mustacchi 
1014*6bbbd442SRobert Mustacchi 		ring->itr_work_list = kmem_zalloc(sizeof (igc_tx_buffer_t *) *
1015*6bbbd442SRobert Mustacchi 		    igc->igc_tx_ndesc, KM_NOSLEEP);
1016*6bbbd442SRobert Mustacchi 		if (ring->itr_work_list == NULL) {
1017*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
1018*6bbbd442SRobert Mustacchi 			    "TX descriptor ring %u tx work list", i);
1019*6bbbd442SRobert Mustacchi 			goto cleanup;
1020*6bbbd442SRobert Mustacchi 		}
1021*6bbbd442SRobert Mustacchi 
1022*6bbbd442SRobert Mustacchi 		list_create(&ring->itr_free_list, sizeof (igc_tx_buffer_t),
1023*6bbbd442SRobert Mustacchi 		    offsetof(igc_tx_buffer_t, itb_node));
1024*6bbbd442SRobert Mustacchi 
1025*6bbbd442SRobert Mustacchi 		ring->itr_arena = kmem_zalloc(sizeof (igc_tx_buffer_t) *
1026*6bbbd442SRobert Mustacchi 		    igc->igc_tx_nbuf, KM_NOSLEEP);
1027*6bbbd442SRobert Mustacchi 		if (ring->itr_arena == NULL) {
1028*6bbbd442SRobert Mustacchi 			dev_err(igc->igc_dip, CE_WARN, "!failed to allocate "
1029*6bbbd442SRobert Mustacchi 			    "TX descriptor ring %u tx buf arena", i);
1030*6bbbd442SRobert Mustacchi 			goto cleanup;
1031*6bbbd442SRobert Mustacchi 		}
1032*6bbbd442SRobert Mustacchi 
1033*6bbbd442SRobert Mustacchi 		if (!igc_tx_bufs_alloc(igc, ring)) {
1034*6bbbd442SRobert Mustacchi 			goto cleanup;
1035*6bbbd442SRobert Mustacchi 		}
1036*6bbbd442SRobert Mustacchi 	}
1037*6bbbd442SRobert Mustacchi 
1038*6bbbd442SRobert Mustacchi 	return (true);
1039*6bbbd442SRobert Mustacchi 
1040*6bbbd442SRobert Mustacchi cleanup:
1041*6bbbd442SRobert Mustacchi 	igc_tx_data_free(igc);
1042*6bbbd442SRobert Mustacchi 	return (false);
1043*6bbbd442SRobert Mustacchi }
1044*6bbbd442SRobert Mustacchi 
1045*6bbbd442SRobert Mustacchi static void
igc_tx_ring_hw_init(igc_t * igc,igc_tx_ring_t * ring)1046*6bbbd442SRobert Mustacchi igc_tx_ring_hw_init(igc_t *igc, igc_tx_ring_t *ring)
1047*6bbbd442SRobert Mustacchi {
1048*6bbbd442SRobert Mustacchi 	uint32_t val, high, low;
1049*6bbbd442SRobert Mustacchi 	const ddi_dma_cookie_t *desc;
1050*6bbbd442SRobert Mustacchi 
1051*6bbbd442SRobert Mustacchi 	/*
1052*6bbbd442SRobert Mustacchi 	 * Program the ring's address.
1053*6bbbd442SRobert Mustacchi 	 */
1054*6bbbd442SRobert Mustacchi 	desc = ddi_dma_cookie_one(ring->itr_desc_dma.idb_hdl);
1055*6bbbd442SRobert Mustacchi 	high = (uint32_t)(desc->dmac_laddress >> 32);
1056*6bbbd442SRobert Mustacchi 	low = (uint32_t)desc->dmac_laddress;
1057*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TDBAH(ring->itr_idx), high);
1058*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TDBAL(ring->itr_idx), low);
1059*6bbbd442SRobert Mustacchi 
1060*6bbbd442SRobert Mustacchi 	/*
1061*6bbbd442SRobert Mustacchi 	 * Program the ring length.
1062*6bbbd442SRobert Mustacchi 	 */
1063*6bbbd442SRobert Mustacchi 	val = igc->igc_tx_ndesc * sizeof (union igc_adv_tx_desc);
1064*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TDLEN(ring->itr_idx), val);
1065*6bbbd442SRobert Mustacchi 
1066*6bbbd442SRobert Mustacchi 	/*
1067*6bbbd442SRobert Mustacchi 	 * Initialize the head and tail pointers that are in use. We can do this
1068*6bbbd442SRobert Mustacchi 	 * for TX unlike RX because we don't want the device to transmit
1069*6bbbd442SRobert Mustacchi 	 * anything.
1070*6bbbd442SRobert Mustacchi 	 */
1071*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TDH(ring->itr_idx), 0);
1072*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TDT(ring->itr_idx), 0);
1073*6bbbd442SRobert Mustacchi 	ring->itr_ring_head = 0;
1074*6bbbd442SRobert Mustacchi 	ring->itr_ring_tail = 0;
1075*6bbbd442SRobert Mustacchi 	ring->itr_ring_free = igc->igc_tx_ndesc;
1076*6bbbd442SRobert Mustacchi 
1077*6bbbd442SRobert Mustacchi 	/*
1078*6bbbd442SRobert Mustacchi 	 * Ensure that a tx queue is disabled prior to taking any action. We do
1079*6bbbd442SRobert Mustacchi 	 * a subsequent read just in case relaxed ordering is enabled. We are
1080*6bbbd442SRobert Mustacchi 	 * required to set the various thresholds for when prefetch should
1081*6bbbd442SRobert Mustacchi 	 * occur, how many valid descriptors it waits before prefetch, and then
1082*6bbbd442SRobert Mustacchi 	 * what the write back granularity is. Picking these numbers is a bit
1083*6bbbd442SRobert Mustacchi 	 * weird.
1084*6bbbd442SRobert Mustacchi 	 *
1085*6bbbd442SRobert Mustacchi 	 * igb historically didn't modify these values. e1000g varied based on
1086*6bbbd442SRobert Mustacchi 	 * the hardware type and has done any number of different things here.
1087*6bbbd442SRobert Mustacchi 	 * The generic datasheet recommendation in the I210 is to set WTHRESH to
1088*6bbbd442SRobert Mustacchi 	 * 1 and leave everything else at zero. Drivers in other systems vary
1089*6bbbd442SRobert Mustacchi 	 * their settings.
1090*6bbbd442SRobert Mustacchi 	 *
1091*6bbbd442SRobert Mustacchi 	 * Right now we end up basically just following the datasheet and also
1092*6bbbd442SRobert Mustacchi 	 * rely on the ITR that we set. This can probably be improved upon at
1093*6bbbd442SRobert Mustacchi 	 * some point.
1094*6bbbd442SRobert Mustacchi 	 */
1095*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TXDCTL(0), 0);
1096*6bbbd442SRobert Mustacchi 	(void) igc_read32(igc, IGC_STATUS);
1097*6bbbd442SRobert Mustacchi 	val = 0;
1098*6bbbd442SRobert Mustacchi 	val = IGC_TXDCTL_SET_PTHRESH(val, 0);
1099*6bbbd442SRobert Mustacchi 	val = IGC_TXDCTL_SET_HTHRESH(val, 0);
1100*6bbbd442SRobert Mustacchi 	val = IGC_TXDCTL_SET_WTHRESH(val, 1);
1101*6bbbd442SRobert Mustacchi 	val |= IGC_TXDCTL_QUEUE_ENABLE;
1102*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TXDCTL(0), val);
1103*6bbbd442SRobert Mustacchi }
1104*6bbbd442SRobert Mustacchi 
1105*6bbbd442SRobert Mustacchi void
igc_tx_hw_init(igc_t * igc)1106*6bbbd442SRobert Mustacchi igc_tx_hw_init(igc_t *igc)
1107*6bbbd442SRobert Mustacchi {
1108*6bbbd442SRobert Mustacchi 	uint32_t val;
1109*6bbbd442SRobert Mustacchi 
1110*6bbbd442SRobert Mustacchi 	for (uint32_t i = 0; i < igc->igc_ntx_rings; i++) {
1111*6bbbd442SRobert Mustacchi 		igc_tx_ring_hw_init(igc, &igc->igc_tx_rings[i]);
1112*6bbbd442SRobert Mustacchi 	}
1113*6bbbd442SRobert Mustacchi 
1114*6bbbd442SRobert Mustacchi 	val = igc_read32(igc, IGC_TCTL);
1115*6bbbd442SRobert Mustacchi 	val &= ~IGC_TCTL_CT;
1116*6bbbd442SRobert Mustacchi 	val |= IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
1117*6bbbd442SRobert Mustacchi 	    (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
1118*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TCTL, val);
1119*6bbbd442SRobert Mustacchi }
1120*6bbbd442SRobert Mustacchi 
1121*6bbbd442SRobert Mustacchi static void
igc_tx_buf_reset(igc_tx_buffer_t * buf)1122*6bbbd442SRobert Mustacchi igc_tx_buf_reset(igc_tx_buffer_t *buf)
1123*6bbbd442SRobert Mustacchi {
1124*6bbbd442SRobert Mustacchi 	buf->itb_mp = NULL;
1125*6bbbd442SRobert Mustacchi 	buf->itb_len = 0;
1126*6bbbd442SRobert Mustacchi 	buf->itb_last_desc = 0;
1127*6bbbd442SRobert Mustacchi 	buf->itb_first = false;
1128*6bbbd442SRobert Mustacchi 	if (buf->itb_bind) {
1129*6bbbd442SRobert Mustacchi 		(void) ddi_dma_unbind_handle(buf->itb_bind_hdl);
1130*6bbbd442SRobert Mustacchi 	}
1131*6bbbd442SRobert Mustacchi 	buf->itb_bind = false;
1132*6bbbd442SRobert Mustacchi }
1133*6bbbd442SRobert Mustacchi 
1134*6bbbd442SRobert Mustacchi /*
1135*6bbbd442SRobert Mustacchi  * When we are recycling packets, we need to sync the ring and then walk from
1136*6bbbd442SRobert Mustacchi  * what we last processed up to what is in the tail or the first entry that is
1137*6bbbd442SRobert Mustacchi  * not done. It is not clear that the I225 hardware has the separate write back
1138*6bbbd442SRobert Mustacchi  * feature that igb does, so instead we have to look for the packet being noted
1139*6bbbd442SRobert Mustacchi  * as done in the descriptor.
1140*6bbbd442SRobert Mustacchi  */
1141*6bbbd442SRobert Mustacchi void
igc_tx_recycle(igc_t * igc,igc_tx_ring_t * ring)1142*6bbbd442SRobert Mustacchi igc_tx_recycle(igc_t *igc, igc_tx_ring_t *ring)
1143*6bbbd442SRobert Mustacchi {
1144*6bbbd442SRobert Mustacchi 	uint32_t head, tail, ndesc = 0;
1145*6bbbd442SRobert Mustacchi 	list_t to_free;
1146*6bbbd442SRobert Mustacchi 	mblk_t *mp = NULL;
1147*6bbbd442SRobert Mustacchi 	bool notify = false;
1148*6bbbd442SRobert Mustacchi 
1149*6bbbd442SRobert Mustacchi 	/*
1150*6bbbd442SRobert Mustacchi 	 * Snapshot the current head and tail before we do more processing. The
1151*6bbbd442SRobert Mustacchi 	 * driver bumps the tail when transmitting and bumps the head only here,
1152*6bbbd442SRobert Mustacchi 	 * so we know that anything in the region of [head, tail) is safe for us
1153*6bbbd442SRobert Mustacchi 	 * to touch (if the hardware is done) while anything in the region of
1154*6bbbd442SRobert Mustacchi 	 * [tail, head) is not.
1155*6bbbd442SRobert Mustacchi 	 */
1156*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->itr_lock);
1157*6bbbd442SRobert Mustacchi 	if (ring->itr_recycle) {
1158*6bbbd442SRobert Mustacchi 		mutex_exit(&ring->itr_lock);
1159*6bbbd442SRobert Mustacchi 		return;
1160*6bbbd442SRobert Mustacchi 	}
1161*6bbbd442SRobert Mustacchi 	ring->itr_recycle = true;
1162*6bbbd442SRobert Mustacchi 	head = ring->itr_ring_head;
1163*6bbbd442SRobert Mustacchi 	tail = ring->itr_ring_tail;
1164*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->itr_lock);
1165*6bbbd442SRobert Mustacchi 
1166*6bbbd442SRobert Mustacchi 	list_create(&to_free, sizeof (igc_tx_buffer_t),
1167*6bbbd442SRobert Mustacchi 	    offsetof(igc_tx_buffer_t, itb_node));
1168*6bbbd442SRobert Mustacchi 
1169*6bbbd442SRobert Mustacchi 	IGC_DMA_SYNC(&ring->itr_desc_dma, DDI_DMA_SYNC_FORKERNEL);
1170*6bbbd442SRobert Mustacchi 
1171*6bbbd442SRobert Mustacchi 	/*
1172*6bbbd442SRobert Mustacchi 	 * We need to walk the transmit descriptors to see what we can free.
1173*6bbbd442SRobert Mustacchi 	 * Here is where we need to deal with the wrinkle the theory statement
1174*6bbbd442SRobert Mustacchi 	 * discusses (see 'TX Data Path Design' in igc.c). We look at the head
1175*6bbbd442SRobert Mustacchi 	 * of the ring and see what item has the tail that we expect to be done
1176*6bbbd442SRobert Mustacchi 	 * and use that to determine if we are done with the entire packet. If
1177*6bbbd442SRobert Mustacchi 	 * we're done with the entire packet, then we walk the rest of the
1178*6bbbd442SRobert Mustacchi 	 * descriptors and will proceed.
1179*6bbbd442SRobert Mustacchi 	 */
1180*6bbbd442SRobert Mustacchi 	while (head != tail) {
1181*6bbbd442SRobert Mustacchi 		uint32_t status, last_desc, next_desc;
1182*6bbbd442SRobert Mustacchi 		igc_tx_buffer_t *check_buf = ring->itr_work_list[head];
1183*6bbbd442SRobert Mustacchi 
1184*6bbbd442SRobert Mustacchi 		ASSERT3P(check_buf, !=, NULL);
1185*6bbbd442SRobert Mustacchi 		ASSERT3U(check_buf->itb_first, ==, true);
1186*6bbbd442SRobert Mustacchi 
1187*6bbbd442SRobert Mustacchi 		last_desc = check_buf->itb_last_desc;
1188*6bbbd442SRobert Mustacchi 		status = LE_32(ring->itr_ring[last_desc].wb.status);
1189*6bbbd442SRobert Mustacchi 		if ((status & IGC_TXD_STAT_DD) == 0) {
1190*6bbbd442SRobert Mustacchi 			break;
1191*6bbbd442SRobert Mustacchi 		}
1192*6bbbd442SRobert Mustacchi 
1193*6bbbd442SRobert Mustacchi 		/*
1194*6bbbd442SRobert Mustacchi 		 * We need to clean up this packet. This involves walking each
1195*6bbbd442SRobert Mustacchi 		 * descriptor, resetting it, finding each tx buffer, and mblk,
1196*6bbbd442SRobert Mustacchi 		 * and cleaning that up. A descriptor may or may not have a tx
1197*6bbbd442SRobert Mustacchi 		 * buffer associated with it.
1198*6bbbd442SRobert Mustacchi 		 */
1199*6bbbd442SRobert Mustacchi 		next_desc = igc_next_desc(last_desc, 1, igc->igc_tx_ndesc);
1200*6bbbd442SRobert Mustacchi 		for (uint32_t desc = head; desc != next_desc;
1201*6bbbd442SRobert Mustacchi 		    desc = igc_next_desc(desc, 1, igc->igc_tx_ndesc)) {
1202*6bbbd442SRobert Mustacchi 			igc_tx_buffer_t *buf;
1203*6bbbd442SRobert Mustacchi 			bzero(&ring->itr_ring[desc],
1204*6bbbd442SRobert Mustacchi 			    sizeof (union igc_adv_tx_desc));
1205*6bbbd442SRobert Mustacchi 			ndesc++;
1206*6bbbd442SRobert Mustacchi 			buf = ring->itr_work_list[desc];
1207*6bbbd442SRobert Mustacchi 			if (buf == NULL)
1208*6bbbd442SRobert Mustacchi 				continue;
1209*6bbbd442SRobert Mustacchi 			ring->itr_work_list[desc] = NULL;
1210*6bbbd442SRobert Mustacchi 
1211*6bbbd442SRobert Mustacchi 			if (buf->itb_mp != NULL) {
1212*6bbbd442SRobert Mustacchi 				buf->itb_mp->b_next = mp;
1213*6bbbd442SRobert Mustacchi 				mp = buf->itb_mp;
1214*6bbbd442SRobert Mustacchi 			}
1215*6bbbd442SRobert Mustacchi 			igc_tx_buf_reset(buf);
1216*6bbbd442SRobert Mustacchi 			list_insert_tail(&to_free, buf);
1217*6bbbd442SRobert Mustacchi 		}
1218*6bbbd442SRobert Mustacchi 
1219*6bbbd442SRobert Mustacchi 		head = next_desc;
1220*6bbbd442SRobert Mustacchi 	}
1221*6bbbd442SRobert Mustacchi 
1222*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->itr_lock);
1223*6bbbd442SRobert Mustacchi 	ring->itr_ring_head = head;
1224*6bbbd442SRobert Mustacchi 	ring->itr_ring_free += ndesc;
1225*6bbbd442SRobert Mustacchi 	list_move_tail(&ring->itr_free_list, &to_free);
1226*6bbbd442SRobert Mustacchi 	if (ring->itr_mac_blocked && ring->itr_ring_free >
1227*6bbbd442SRobert Mustacchi 	    igc->igc_tx_notify_thresh) {
1228*6bbbd442SRobert Mustacchi 		ring->itr_mac_blocked = false;
1229*6bbbd442SRobert Mustacchi 		notify = true;
1230*6bbbd442SRobert Mustacchi 	}
1231*6bbbd442SRobert Mustacchi 	ring->itr_recycle = false;
1232*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->itr_lock);
1233*6bbbd442SRobert Mustacchi 
1234*6bbbd442SRobert Mustacchi 	if (notify) {
1235*6bbbd442SRobert Mustacchi 		mac_tx_ring_update(igc->igc_mac_hdl, ring->itr_rh);
1236*6bbbd442SRobert Mustacchi 	}
1237*6bbbd442SRobert Mustacchi 
1238*6bbbd442SRobert Mustacchi 	freemsgchain(mp);
1239*6bbbd442SRobert Mustacchi 	list_destroy(&to_free);
1240*6bbbd442SRobert Mustacchi }
1241*6bbbd442SRobert Mustacchi 
1242*6bbbd442SRobert Mustacchi static igc_tx_buffer_t *
igc_tx_buffer_alloc(igc_tx_ring_t * ring)1243*6bbbd442SRobert Mustacchi igc_tx_buffer_alloc(igc_tx_ring_t *ring)
1244*6bbbd442SRobert Mustacchi {
1245*6bbbd442SRobert Mustacchi 	igc_tx_buffer_t *buf;
1246*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->itr_lock);
1247*6bbbd442SRobert Mustacchi 	buf = list_remove_head(&ring->itr_free_list);
1248*6bbbd442SRobert Mustacchi 	if (buf == NULL) {
1249*6bbbd442SRobert Mustacchi 		ring->itr_stat.its_no_tx_bufs.value.ui64++;
1250*6bbbd442SRobert Mustacchi 	}
1251*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->itr_lock);
1252*6bbbd442SRobert Mustacchi 
1253*6bbbd442SRobert Mustacchi 	return (buf);
1254*6bbbd442SRobert Mustacchi }
1255*6bbbd442SRobert Mustacchi 
1256*6bbbd442SRobert Mustacchi /*
1257*6bbbd442SRobert Mustacchi  * Utilize a new tx buffer to perform a DMA binding for this mblk.
1258*6bbbd442SRobert Mustacchi  */
1259*6bbbd442SRobert Mustacchi static bool
igc_tx_ring_bind(igc_tx_ring_t * ring,mblk_t * mp,igc_tx_state_t * tx)1260*6bbbd442SRobert Mustacchi igc_tx_ring_bind(igc_tx_ring_t *ring, mblk_t *mp, igc_tx_state_t *tx)
1261*6bbbd442SRobert Mustacchi {
1262*6bbbd442SRobert Mustacchi 	size_t len = MBLKL(mp);
1263*6bbbd442SRobert Mustacchi 	igc_tx_buffer_t *buf;
1264*6bbbd442SRobert Mustacchi 	int ret;
1265*6bbbd442SRobert Mustacchi 	uint_t ncookie;
1266*6bbbd442SRobert Mustacchi 
1267*6bbbd442SRobert Mustacchi 	buf = igc_tx_buffer_alloc(ring);
1268*6bbbd442SRobert Mustacchi 	if (buf == NULL) {
1269*6bbbd442SRobert Mustacchi 		return (false);
1270*6bbbd442SRobert Mustacchi 	}
1271*6bbbd442SRobert Mustacchi 
1272*6bbbd442SRobert Mustacchi 	ret = ddi_dma_addr_bind_handle(buf->itb_bind_hdl, NULL,
1273*6bbbd442SRobert Mustacchi 	    (void *)mp->b_rptr, len, DDI_DMA_WRITE | DDI_DMA_STREAMING,
1274*6bbbd442SRobert Mustacchi 	    DDI_DMA_DONTWAIT, NULL, NULL, &ncookie);
1275*6bbbd442SRobert Mustacchi 	if (ret != DDI_DMA_MAPPED) {
1276*6bbbd442SRobert Mustacchi 		/*
1277*6bbbd442SRobert Mustacchi 		 * Binding failed. Give this buffer back.
1278*6bbbd442SRobert Mustacchi 		 */
1279*6bbbd442SRobert Mustacchi 		ring->itr_stat.its_tx_bind_fail.value.ui64++;
1280*6bbbd442SRobert Mustacchi 		mutex_enter(&ring->itr_lock);
1281*6bbbd442SRobert Mustacchi 		list_insert_tail(&ring->itr_free_list, buf);
1282*6bbbd442SRobert Mustacchi 		mutex_exit(&ring->itr_lock);
1283*6bbbd442SRobert Mustacchi 		return (false);
1284*6bbbd442SRobert Mustacchi 	}
1285*6bbbd442SRobert Mustacchi 
1286*6bbbd442SRobert Mustacchi 	/*
1287*6bbbd442SRobert Mustacchi 	 * Now that this is successful, we append it to the list and update our
1288*6bbbd442SRobert Mustacchi 	 * tracking structure. We don't do this earlier so we can keep using the
1289*6bbbd442SRobert Mustacchi 	 * extent buffer for copying as that's the fallback path.
1290*6bbbd442SRobert Mustacchi 	 */
1291*6bbbd442SRobert Mustacchi 	buf->itb_len = len;
1292*6bbbd442SRobert Mustacchi 	buf->itb_bind = true;
1293*6bbbd442SRobert Mustacchi 	tx->itx_ndescs += ncookie;
1294*6bbbd442SRobert Mustacchi 	tx->itx_buf_rem = 0;
1295*6bbbd442SRobert Mustacchi 	tx->itx_cur_buf = buf;
1296*6bbbd442SRobert Mustacchi 	list_insert_tail(&tx->itx_bufs, tx->itx_cur_buf);
1297*6bbbd442SRobert Mustacchi 	ring->itr_stat.its_tx_bind.value.ui64++;
1298*6bbbd442SRobert Mustacchi 	return (true);
1299*6bbbd442SRobert Mustacchi }
1300*6bbbd442SRobert Mustacchi 
1301*6bbbd442SRobert Mustacchi /*
1302*6bbbd442SRobert Mustacchi  * Copy the current mblk into a series of one or more tx buffers depending on
1303*6bbbd442SRobert Mustacchi  * what's available.
1304*6bbbd442SRobert Mustacchi  */
1305*6bbbd442SRobert Mustacchi static bool
igc_tx_ring_copy(igc_tx_ring_t * ring,mblk_t * mp,igc_tx_state_t * tx)1306*6bbbd442SRobert Mustacchi igc_tx_ring_copy(igc_tx_ring_t *ring, mblk_t *mp, igc_tx_state_t *tx)
1307*6bbbd442SRobert Mustacchi {
1308*6bbbd442SRobert Mustacchi 	size_t len = MBLKL(mp);
1309*6bbbd442SRobert Mustacchi 	size_t off = 0;
1310*6bbbd442SRobert Mustacchi 
1311*6bbbd442SRobert Mustacchi 	while (len > 0) {
1312*6bbbd442SRobert Mustacchi 		const void *src;
1313*6bbbd442SRobert Mustacchi 		void *dest;
1314*6bbbd442SRobert Mustacchi 		size_t to_copy;
1315*6bbbd442SRobert Mustacchi 
1316*6bbbd442SRobert Mustacchi 		/*
1317*6bbbd442SRobert Mustacchi 		 * If the current buffer is used for binding, then we must get a
1318*6bbbd442SRobert Mustacchi 		 * new one. If it is used for copying, we can keep going until
1319*6bbbd442SRobert Mustacchi 		 * it is full.
1320*6bbbd442SRobert Mustacchi 		 */
1321*6bbbd442SRobert Mustacchi 		if (tx->itx_cur_buf != NULL && (tx->itx_cur_buf->itb_bind ||
1322*6bbbd442SRobert Mustacchi 		    tx->itx_buf_rem == 0)) {
1323*6bbbd442SRobert Mustacchi 			tx->itx_cur_buf = NULL;
1324*6bbbd442SRobert Mustacchi 			tx->itx_buf_rem = 0;
1325*6bbbd442SRobert Mustacchi 		}
1326*6bbbd442SRobert Mustacchi 
1327*6bbbd442SRobert Mustacchi 		if (tx->itx_cur_buf == NULL) {
1328*6bbbd442SRobert Mustacchi 			tx->itx_cur_buf = igc_tx_buffer_alloc(ring);
1329*6bbbd442SRobert Mustacchi 			if (tx->itx_cur_buf == NULL) {
1330*6bbbd442SRobert Mustacchi 				return (false);
1331*6bbbd442SRobert Mustacchi 			}
1332*6bbbd442SRobert Mustacchi 			list_insert_tail(&tx->itx_bufs, tx->itx_cur_buf);
1333*6bbbd442SRobert Mustacchi 			tx->itx_buf_rem = tx->itx_cur_buf->itb_dma.idb_size;
1334*6bbbd442SRobert Mustacchi 			/*
1335*6bbbd442SRobert Mustacchi 			 * Each DMA buffer used for TX only requires a single
1336*6bbbd442SRobert Mustacchi 			 * cookie. So note that descriptor requirement here and
1337*6bbbd442SRobert Mustacchi 			 * flag this tx buffer as being used for copying.
1338*6bbbd442SRobert Mustacchi 			 */
1339*6bbbd442SRobert Mustacchi 			tx->itx_ndescs++;
1340*6bbbd442SRobert Mustacchi 			tx->itx_cur_buf->itb_bind = false;
1341*6bbbd442SRobert Mustacchi 		}
1342*6bbbd442SRobert Mustacchi 
1343*6bbbd442SRobert Mustacchi 		to_copy = MIN(len, tx->itx_buf_rem);
1344*6bbbd442SRobert Mustacchi 		src = mp->b_rptr + off;
1345*6bbbd442SRobert Mustacchi 		dest = tx->itx_cur_buf->itb_dma.idb_va +
1346*6bbbd442SRobert Mustacchi 		    tx->itx_cur_buf->itb_len;
1347*6bbbd442SRobert Mustacchi 		bcopy(src, dest, to_copy);
1348*6bbbd442SRobert Mustacchi 
1349*6bbbd442SRobert Mustacchi 		tx->itx_buf_rem -= to_copy;
1350*6bbbd442SRobert Mustacchi 		tx->itx_cur_buf->itb_len += to_copy;
1351*6bbbd442SRobert Mustacchi 		len -= to_copy;
1352*6bbbd442SRobert Mustacchi 		off += to_copy;
1353*6bbbd442SRobert Mustacchi 	}
1354*6bbbd442SRobert Mustacchi 
1355*6bbbd442SRobert Mustacchi 	ring->itr_stat.its_tx_copy.value.ui64++;
1356*6bbbd442SRobert Mustacchi 	return (true);
1357*6bbbd442SRobert Mustacchi }
1358*6bbbd442SRobert Mustacchi 
1359*6bbbd442SRobert Mustacchi /*
1360*6bbbd442SRobert Mustacchi  * We only need to load a context descriptor if what we're loading has changed.
1361*6bbbd442SRobert Mustacchi  * This checks if it has and if so, updates the fields that have changed. Note,
1362*6bbbd442SRobert Mustacchi  * a packet that doesn't require offloads won't end up taking us through this
1363*6bbbd442SRobert Mustacchi  * path.
1364*6bbbd442SRobert Mustacchi  */
1365*6bbbd442SRobert Mustacchi static bool
igc_tx_ring_context_changed(igc_tx_ring_t * ring,igc_tx_state_t * tx)1366*6bbbd442SRobert Mustacchi igc_tx_ring_context_changed(igc_tx_ring_t *ring, igc_tx_state_t *tx)
1367*6bbbd442SRobert Mustacchi {
1368*6bbbd442SRobert Mustacchi 	bool change = false;
1369*6bbbd442SRobert Mustacchi 	igc_tx_context_data_t *data = &ring->itr_tx_ctx;
1370*6bbbd442SRobert Mustacchi 
1371*6bbbd442SRobert Mustacchi 	if (data->itc_l2hlen != tx->itx_meoi.meoi_l2hlen) {
1372*6bbbd442SRobert Mustacchi 		change = true;
1373*6bbbd442SRobert Mustacchi 		data->itc_l2hlen = tx->itx_meoi.meoi_l2hlen;
1374*6bbbd442SRobert Mustacchi 	}
1375*6bbbd442SRobert Mustacchi 
1376*6bbbd442SRobert Mustacchi 	if (data->itc_l3hlen != tx->itx_meoi.meoi_l3hlen) {
1377*6bbbd442SRobert Mustacchi 		change = true;
1378*6bbbd442SRobert Mustacchi 		data->itc_l3hlen = tx->itx_meoi.meoi_l3hlen;
1379*6bbbd442SRobert Mustacchi 	}
1380*6bbbd442SRobert Mustacchi 
1381*6bbbd442SRobert Mustacchi 	if (data->itc_l3proto != tx->itx_meoi.meoi_l3proto) {
1382*6bbbd442SRobert Mustacchi 		change = true;
1383*6bbbd442SRobert Mustacchi 		data->itc_l3proto = tx->itx_meoi.meoi_l3proto;
1384*6bbbd442SRobert Mustacchi 	}
1385*6bbbd442SRobert Mustacchi 
1386*6bbbd442SRobert Mustacchi 	if (data->itc_l4proto != tx->itx_meoi.meoi_l4proto) {
1387*6bbbd442SRobert Mustacchi 		change = true;
1388*6bbbd442SRobert Mustacchi 		data->itc_l4proto = tx->itx_meoi.meoi_l4proto;
1389*6bbbd442SRobert Mustacchi 	}
1390*6bbbd442SRobert Mustacchi 
1391*6bbbd442SRobert Mustacchi 	if (data->itc_l4hlen != tx->itx_meoi.meoi_l4hlen) {
1392*6bbbd442SRobert Mustacchi 		change = true;
1393*6bbbd442SRobert Mustacchi 		data->itc_l4hlen = tx->itx_meoi.meoi_l4hlen;
1394*6bbbd442SRobert Mustacchi 	}
1395*6bbbd442SRobert Mustacchi 
1396*6bbbd442SRobert Mustacchi 	if (data->itc_mss != tx->itx_mss) {
1397*6bbbd442SRobert Mustacchi 		change = true;
1398*6bbbd442SRobert Mustacchi 		data->itc_mss = tx->itx_mss;
1399*6bbbd442SRobert Mustacchi 	}
1400*6bbbd442SRobert Mustacchi 
1401*6bbbd442SRobert Mustacchi 	if (data->itc_cksum != tx->itx_cksum) {
1402*6bbbd442SRobert Mustacchi 		change = true;
1403*6bbbd442SRobert Mustacchi 		data->itc_cksum = tx->itx_cksum;
1404*6bbbd442SRobert Mustacchi 	}
1405*6bbbd442SRobert Mustacchi 
1406*6bbbd442SRobert Mustacchi 	if (data->itc_lso != tx->itx_lso) {
1407*6bbbd442SRobert Mustacchi 		change = true;
1408*6bbbd442SRobert Mustacchi 		data->itc_lso = tx->itx_lso;
1409*6bbbd442SRobert Mustacchi 	}
1410*6bbbd442SRobert Mustacchi 
1411*6bbbd442SRobert Mustacchi 	return (change);
1412*6bbbd442SRobert Mustacchi }
1413*6bbbd442SRobert Mustacchi 
1414*6bbbd442SRobert Mustacchi /*
1415*6bbbd442SRobert Mustacchi  * Fill out common descriptor information. First and last descriptor information
1416*6bbbd442SRobert Mustacchi  * is handled after this.
1417*6bbbd442SRobert Mustacchi  */
1418*6bbbd442SRobert Mustacchi static void
igc_tx_ring_write_buf_descs(igc_t * igc,igc_tx_ring_t * ring,igc_tx_buffer_t * buf)1419*6bbbd442SRobert Mustacchi igc_tx_ring_write_buf_descs(igc_t *igc, igc_tx_ring_t *ring,
1420*6bbbd442SRobert Mustacchi     igc_tx_buffer_t *buf)
1421*6bbbd442SRobert Mustacchi {
1422*6bbbd442SRobert Mustacchi 	ddi_dma_handle_t hdl = buf->itb_bind ? buf->itb_bind_hdl :
1423*6bbbd442SRobert Mustacchi 	    buf->itb_dma.idb_hdl;
1424*6bbbd442SRobert Mustacchi 	uint_t nc = ddi_dma_ncookies(hdl);
1425*6bbbd442SRobert Mustacchi 	size_t rem_len = buf->itb_len;
1426*6bbbd442SRobert Mustacchi 
1427*6bbbd442SRobert Mustacchi 	ASSERT(MUTEX_HELD(&ring->itr_lock));
1428*6bbbd442SRobert Mustacchi 	ASSERT3U(rem_len, !=, 0);
1429*6bbbd442SRobert Mustacchi 
1430*6bbbd442SRobert Mustacchi 	for (uint_t i = 0; i < nc; i++, ring->itr_ring_tail =
1431*6bbbd442SRobert Mustacchi 	    igc_next_desc(ring->itr_ring_tail, 1, igc->igc_tx_ndesc)) {
1432*6bbbd442SRobert Mustacchi 		const ddi_dma_cookie_t *c = ddi_dma_cookie_get(hdl, i);
1433*6bbbd442SRobert Mustacchi 		union igc_adv_tx_desc *desc;
1434*6bbbd442SRobert Mustacchi 		uint32_t type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
1435*6bbbd442SRobert Mustacchi 		    IGC_ADVTXD_DCMD_IFCS;
1436*6bbbd442SRobert Mustacchi 		uint32_t desc_len = MIN(rem_len, c->dmac_size);
1437*6bbbd442SRobert Mustacchi 
1438*6bbbd442SRobert Mustacchi 		/* Quick sanity check on max data descriptor */
1439*6bbbd442SRobert Mustacchi 		ASSERT3U(desc_len, <, 0x10000);
1440*6bbbd442SRobert Mustacchi 		ASSERT3U(desc_len, >, 0x0);
1441*6bbbd442SRobert Mustacchi 		type |= desc_len;
1442*6bbbd442SRobert Mustacchi 		rem_len -= desc_len;
1443*6bbbd442SRobert Mustacchi 		desc = &ring->itr_ring[ring->itr_ring_tail];
1444*6bbbd442SRobert Mustacchi 		desc->read.buffer_addr = LE_64(c->dmac_laddress);
1445*6bbbd442SRobert Mustacchi 		desc->read.cmd_type_len = LE_32(type);
1446*6bbbd442SRobert Mustacchi 		desc->read.olinfo_status = LE_32(0);
1447*6bbbd442SRobert Mustacchi 
1448*6bbbd442SRobert Mustacchi 		/*
1449*6bbbd442SRobert Mustacchi 		 * Save the transmit buffer in the first descriptor entry that
1450*6bbbd442SRobert Mustacchi 		 * we use for this.
1451*6bbbd442SRobert Mustacchi 		 */
1452*6bbbd442SRobert Mustacchi 		if (i == 0) {
1453*6bbbd442SRobert Mustacchi 			ring->itr_work_list[ring->itr_ring_tail] = buf;
1454*6bbbd442SRobert Mustacchi 		}
1455*6bbbd442SRobert Mustacchi 	}
1456*6bbbd442SRobert Mustacchi }
1457*6bbbd442SRobert Mustacchi 
1458*6bbbd442SRobert Mustacchi /*
1459*6bbbd442SRobert Mustacchi  * We have created our chain of tx buffers that have been copied and bound. Now
1460*6bbbd442SRobert Mustacchi  * insert them into place and insert a context descriptor if it will be
1461*6bbbd442SRobert Mustacchi  * required. Unlike igb we don't save the old context descriptor to try to reuse
1462*6bbbd442SRobert Mustacchi  * it and instead just always set it.
1463*6bbbd442SRobert Mustacchi  */
1464*6bbbd442SRobert Mustacchi static bool
igc_tx_ring_write_descs(igc_t * igc,igc_tx_ring_t * ring,mblk_t * mp,igc_tx_state_t * tx)1465*6bbbd442SRobert Mustacchi igc_tx_ring_write_descs(igc_t *igc, igc_tx_ring_t *ring, mblk_t *mp,
1466*6bbbd442SRobert Mustacchi     igc_tx_state_t *tx)
1467*6bbbd442SRobert Mustacchi {
1468*6bbbd442SRobert Mustacchi 	bool do_ctx = false;
1469*6bbbd442SRobert Mustacchi 	igc_tx_buffer_t *buf;
1470*6bbbd442SRobert Mustacchi 	uint32_t ctx_desc, first_desc, last_desc, flags, status;
1471*6bbbd442SRobert Mustacchi 
1472*6bbbd442SRobert Mustacchi 	/*
1473*6bbbd442SRobert Mustacchi 	 * If either checksumming or LSO is set, we may need a context
1474*6bbbd442SRobert Mustacchi 	 * descriptor. We assume we will and then if not will adjust that.
1475*6bbbd442SRobert Mustacchi 	 */
1476*6bbbd442SRobert Mustacchi 	if (tx->itx_cksum != 0 || tx->itx_lso != 0) {
1477*6bbbd442SRobert Mustacchi 		do_ctx = true;
1478*6bbbd442SRobert Mustacchi 		tx->itx_ndescs++;
1479*6bbbd442SRobert Mustacchi 	}
1480*6bbbd442SRobert Mustacchi 
1481*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->itr_lock);
1482*6bbbd442SRobert Mustacchi 	if (tx->itx_ndescs + igc->igc_tx_gap > ring->itr_ring_free) {
1483*6bbbd442SRobert Mustacchi 		/*
1484*6bbbd442SRobert Mustacchi 		 * Attempt to recycle descriptors before we give up.
1485*6bbbd442SRobert Mustacchi 		 */
1486*6bbbd442SRobert Mustacchi 		mutex_exit(&ring->itr_lock);
1487*6bbbd442SRobert Mustacchi 		igc_tx_recycle(igc, ring);
1488*6bbbd442SRobert Mustacchi 		mutex_enter(&ring->itr_lock);
1489*6bbbd442SRobert Mustacchi 		if (tx->itx_ndescs + igc->igc_tx_gap > ring->itr_ring_free) {
1490*6bbbd442SRobert Mustacchi 			mutex_exit(&ring->itr_lock);
1491*6bbbd442SRobert Mustacchi 			return (false);
1492*6bbbd442SRobert Mustacchi 		}
1493*6bbbd442SRobert Mustacchi 	}
1494*6bbbd442SRobert Mustacchi 
1495*6bbbd442SRobert Mustacchi 	/*
1496*6bbbd442SRobert Mustacchi 	 * Now see if the context descriptor has changed, if required. If not,
1497*6bbbd442SRobert Mustacchi 	 * then we can reduce the number of descriptors required. We wnt to do
1498*6bbbd442SRobert Mustacchi 	 * this after we've checked for descriptors because this will mutate the
1499*6bbbd442SRobert Mustacchi 	 * next tx descriptor we have to load.
1500*6bbbd442SRobert Mustacchi 	 */
1501*6bbbd442SRobert Mustacchi 	if (do_ctx && !igc_tx_ring_context_changed(ring, tx)) {
1502*6bbbd442SRobert Mustacchi 		do_ctx = false;
1503*6bbbd442SRobert Mustacchi 		tx->itx_ndescs--;
1504*6bbbd442SRobert Mustacchi 	}
1505*6bbbd442SRobert Mustacchi 
1506*6bbbd442SRobert Mustacchi 	ring->itr_ring_free -= tx->itx_ndescs;
1507*6bbbd442SRobert Mustacchi 	ctx_desc = ring->itr_ring_tail;
1508*6bbbd442SRobert Mustacchi 	if (do_ctx) {
1509*6bbbd442SRobert Mustacchi 		struct igc_adv_tx_context_desc *ctx;
1510*6bbbd442SRobert Mustacchi 		uint32_t len = tx->itx_meoi.meoi_l3hlen |
1511*6bbbd442SRobert Mustacchi 		    (tx->itx_meoi.meoi_l2hlen << IGC_ADVTXD_MACLEN_SHIFT);
1512*6bbbd442SRobert Mustacchi 		uint32_t tucmd = IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1513*6bbbd442SRobert Mustacchi 		uint32_t l4idx = 0;
1514*6bbbd442SRobert Mustacchi 
1515*6bbbd442SRobert Mustacchi 		if ((tx->itx_lso & HW_LSO) != 0 ||
1516*6bbbd442SRobert Mustacchi 		    (tx->itx_cksum & HCK_IPV4_HDRCKSUM) != 0) {
1517*6bbbd442SRobert Mustacchi 			if (tx->itx_meoi.meoi_l3proto == ETHERTYPE_IP) {
1518*6bbbd442SRobert Mustacchi 				tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1519*6bbbd442SRobert Mustacchi 			} else {
1520*6bbbd442SRobert Mustacchi 				ASSERT3U(tx->itx_meoi.meoi_l3proto, ==,
1521*6bbbd442SRobert Mustacchi 				    ETHERTYPE_IPV6);
1522*6bbbd442SRobert Mustacchi 				tucmd |= IGC_ADVTXD_TUCMD_IPV6;
1523*6bbbd442SRobert Mustacchi 			}
1524*6bbbd442SRobert Mustacchi 		}
1525*6bbbd442SRobert Mustacchi 
1526*6bbbd442SRobert Mustacchi 		if ((tx->itx_lso & HW_LSO) != 0 ||
1527*6bbbd442SRobert Mustacchi 		    (tx->itx_cksum & HCK_PARTIALCKSUM) != 0) {
1528*6bbbd442SRobert Mustacchi 			if (tx->itx_meoi.meoi_l4proto == IPPROTO_TCP) {
1529*6bbbd442SRobert Mustacchi 				tucmd |= IGC_ADVTXD_TUCMD_L4T_TCP;
1530*6bbbd442SRobert Mustacchi 			} else if (tx->itx_meoi.meoi_l4proto == IPPROTO_UDP) {
1531*6bbbd442SRobert Mustacchi 				tucmd |= IGC_ADVTXD_TUCMD_L4T_UDP;
1532*6bbbd442SRobert Mustacchi 			}
1533*6bbbd442SRobert Mustacchi 		}
1534*6bbbd442SRobert Mustacchi 
1535*6bbbd442SRobert Mustacchi 		/*
1536*6bbbd442SRobert Mustacchi 		 * The L4LEN and MSS fields are only required if we're
1537*6bbbd442SRobert Mustacchi 		 * performing TSO. The index is always zero regardless because
1538*6bbbd442SRobert Mustacchi 		 * the I225 only has one context per queue.
1539*6bbbd442SRobert Mustacchi 		 */
1540*6bbbd442SRobert Mustacchi 		if ((tx->itx_lso & HW_LSO) != 0) {
1541*6bbbd442SRobert Mustacchi 			l4idx |= tx->itx_meoi.meoi_l4hlen <<
1542*6bbbd442SRobert Mustacchi 			    IGC_ADVTXD_L4LEN_SHIFT;
1543*6bbbd442SRobert Mustacchi 			l4idx |= tx->itx_mss << IGC_ADVTXD_MSS_SHIFT;
1544*6bbbd442SRobert Mustacchi 		}
1545*6bbbd442SRobert Mustacchi 
1546*6bbbd442SRobert Mustacchi 		ctx = (void *)&ring->itr_ring[ctx_desc];
1547*6bbbd442SRobert Mustacchi 		ctx->vlan_macip_lens = LE_32(len);
1548*6bbbd442SRobert Mustacchi 		ctx->launch_time = 0;
1549*6bbbd442SRobert Mustacchi 		ctx->type_tucmd_mlhl = LE_32(tucmd);
1550*6bbbd442SRobert Mustacchi 		ctx->mss_l4len_idx = LE_32(l4idx);
1551*6bbbd442SRobert Mustacchi 		ring->itr_ring_tail = igc_next_desc(ring->itr_ring_tail, 1,
1552*6bbbd442SRobert Mustacchi 		    igc->igc_tx_ndesc);
1553*6bbbd442SRobert Mustacchi 		DTRACE_PROBE4(igc__context__desc, igc_t *, igc, igc_tx_ring_t *,
1554*6bbbd442SRobert Mustacchi 		    ring, igc_tx_state_t *, tx,
1555*6bbbd442SRobert Mustacchi 		    struct igc_adv_tx_context_desc *, ctx);
1556*6bbbd442SRobert Mustacchi 	}
1557*6bbbd442SRobert Mustacchi 
1558*6bbbd442SRobert Mustacchi 	first_desc = ring->itr_ring_tail;
1559*6bbbd442SRobert Mustacchi 
1560*6bbbd442SRobert Mustacchi 	while ((buf = list_remove_head(&tx->itx_bufs)) != NULL) {
1561*6bbbd442SRobert Mustacchi 		igc_tx_ring_write_buf_descs(igc, ring, buf);
1562*6bbbd442SRobert Mustacchi 	}
1563*6bbbd442SRobert Mustacchi 
1564*6bbbd442SRobert Mustacchi 	/*
1565*6bbbd442SRobert Mustacchi 	 * The last descriptor must have end of packet set and is the entry that
1566*6bbbd442SRobert Mustacchi 	 * we ask for status on. That is, we don't actually ask for the status
1567*6bbbd442SRobert Mustacchi 	 * of each transmit buffer, only the final one so we can more easily
1568*6bbbd442SRobert Mustacchi 	 * collect everything including the context descriptor if present.
1569*6bbbd442SRobert Mustacchi 	 */
1570*6bbbd442SRobert Mustacchi 	last_desc = igc_prev_desc(ring->itr_ring_tail, 1, igc->igc_tx_ndesc);
1571*6bbbd442SRobert Mustacchi 	flags = IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS;
1572*6bbbd442SRobert Mustacchi 	ring->itr_ring[last_desc].read.cmd_type_len |= LE_32(flags);
1573*6bbbd442SRobert Mustacchi 
1574*6bbbd442SRobert Mustacchi 	/*
1575*6bbbd442SRobert Mustacchi 	 * We must now go back and set settings on the first data descriptor to
1576*6bbbd442SRobert Mustacchi 	 * indicate what checksumming and offload features we require. Note, we
1577*6bbbd442SRobert Mustacchi 	 * keep the IDX field as zero because there is only one context field
1578*6bbbd442SRobert Mustacchi 	 * per queue in the I225.
1579*6bbbd442SRobert Mustacchi 	 *
1580*6bbbd442SRobert Mustacchi 	 * We also save the mblk_t on the first tx buffer in the set which
1581*6bbbd442SRobert Mustacchi 	 * should always be saved with the first descriptor we use, which may
1582*6bbbd442SRobert Mustacchi 	 * include the context descriptor. Because this descriptor tracks when
1583*6bbbd442SRobert Mustacchi 	 * the entire packet is sent and we won't collect it until we're done
1584*6bbbd442SRobert Mustacchi 	 * with the entire packet, it's okay to leave this on the start.
1585*6bbbd442SRobert Mustacchi 	 */
1586*6bbbd442SRobert Mustacchi 	flags = 0;
1587*6bbbd442SRobert Mustacchi 	status = 0;
1588*6bbbd442SRobert Mustacchi 	if ((tx->itx_cksum & HCK_IPV4_HDRCKSUM) != 0) {
1589*6bbbd442SRobert Mustacchi 		status |= IGC_TXD_POPTS_IXSM << 8;
1590*6bbbd442SRobert Mustacchi 	}
1591*6bbbd442SRobert Mustacchi 
1592*6bbbd442SRobert Mustacchi 	if ((tx->itx_cksum & HCK_PARTIALCKSUM) != 0) {
1593*6bbbd442SRobert Mustacchi 		status |= IGC_TXD_POPTS_TXSM << 8;
1594*6bbbd442SRobert Mustacchi 	}
1595*6bbbd442SRobert Mustacchi 
1596*6bbbd442SRobert Mustacchi 	if ((tx->itx_lso & HW_LSO) != 0) {
1597*6bbbd442SRobert Mustacchi 		size_t payload = tx->itx_meoi.meoi_len -
1598*6bbbd442SRobert Mustacchi 		    tx->itx_meoi.meoi_l2hlen - tx->itx_meoi.meoi_l3hlen -
1599*6bbbd442SRobert Mustacchi 		    tx->itx_meoi.meoi_l4hlen;
1600*6bbbd442SRobert Mustacchi 		flags |= IGC_ADVTXD_DCMD_TSE;
1601*6bbbd442SRobert Mustacchi 		status |= payload << IGC_ADVTXD_PAYLEN_SHIFT;
1602*6bbbd442SRobert Mustacchi 	} else {
1603*6bbbd442SRobert Mustacchi 		status |= tx->itx_meoi.meoi_len << IGC_ADVTXD_PAYLEN_SHIFT;
1604*6bbbd442SRobert Mustacchi 	}
1605*6bbbd442SRobert Mustacchi 
1606*6bbbd442SRobert Mustacchi 	ring->itr_ring[first_desc].read.cmd_type_len |= LE_32(flags);
1607*6bbbd442SRobert Mustacchi 	ring->itr_ring[first_desc].read.olinfo_status |= LE_32(status);
1608*6bbbd442SRobert Mustacchi 	ring->itr_work_list[first_desc]->itb_mp = mp;
1609*6bbbd442SRobert Mustacchi 	ring->itr_work_list[first_desc]->itb_first = true;
1610*6bbbd442SRobert Mustacchi 	ring->itr_work_list[first_desc]->itb_last_desc = last_desc;
1611*6bbbd442SRobert Mustacchi 
1612*6bbbd442SRobert Mustacchi 	/*
1613*6bbbd442SRobert Mustacchi 	 * If we have a context descriptor, we must adjust the first work list
1614*6bbbd442SRobert Mustacchi 	 * item to point to the context descriptor. See 'TX Data Path Design' in
1615*6bbbd442SRobert Mustacchi 	 * the theory statemenet for more information.
1616*6bbbd442SRobert Mustacchi 	 */
1617*6bbbd442SRobert Mustacchi 	if (do_ctx) {
1618*6bbbd442SRobert Mustacchi 		ring->itr_work_list[ctx_desc] = ring->itr_work_list[first_desc];
1619*6bbbd442SRobert Mustacchi 		ring->itr_work_list[first_desc] = NULL;
1620*6bbbd442SRobert Mustacchi 	}
1621*6bbbd442SRobert Mustacchi 
1622*6bbbd442SRobert Mustacchi 	ring->itr_stat.its_obytes.value.ui64 += tx->itx_meoi.meoi_len;
1623*6bbbd442SRobert Mustacchi 	ring->itr_stat.its_opackets.value.ui64++;
1624*6bbbd442SRobert Mustacchi 
1625*6bbbd442SRobert Mustacchi 	IGC_DMA_SYNC(&ring->itr_desc_dma, DDI_DMA_SYNC_FORDEV);
1626*6bbbd442SRobert Mustacchi 	igc_write32(igc, IGC_TDT(ring->itr_idx), ring->itr_ring_tail);
1627*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->itr_lock);
1628*6bbbd442SRobert Mustacchi 	return (true);
1629*6bbbd442SRobert Mustacchi }
1630*6bbbd442SRobert Mustacchi 
1631*6bbbd442SRobert Mustacchi mblk_t *
igc_ring_tx(void * arg,mblk_t * mp)1632*6bbbd442SRobert Mustacchi igc_ring_tx(void *arg, mblk_t *mp)
1633*6bbbd442SRobert Mustacchi {
1634*6bbbd442SRobert Mustacchi 	igc_tx_ring_t *ring = arg;
1635*6bbbd442SRobert Mustacchi 	igc_t *igc = ring->itr_igc;
1636*6bbbd442SRobert Mustacchi 	igc_tx_state_t tx = { 0 };
1637*6bbbd442SRobert Mustacchi 
1638*6bbbd442SRobert Mustacchi 	ASSERT3P(mp->b_next, ==, NULL);
1639*6bbbd442SRobert Mustacchi 
1640*6bbbd442SRobert Mustacchi 	if (mac_ether_offload_info(mp, &tx.itx_meoi) != 0) {
1641*6bbbd442SRobert Mustacchi 		freemsg(mp);
1642*6bbbd442SRobert Mustacchi 		ring->itr_stat.its_bad_meo.value.ui64++;
1643*6bbbd442SRobert Mustacchi 		return (NULL);
1644*6bbbd442SRobert Mustacchi 	}
1645*6bbbd442SRobert Mustacchi 
1646*6bbbd442SRobert Mustacchi 	mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &tx.itx_cksum);
1647*6bbbd442SRobert Mustacchi 	mac_lso_get(mp, &tx.itx_mss, &tx.itx_lso);
1648*6bbbd442SRobert Mustacchi 
1649*6bbbd442SRobert Mustacchi 	/*
1650*6bbbd442SRobert Mustacchi 	 * Note, we don't really care that the following check of the number of
1651*6bbbd442SRobert Mustacchi 	 * free descriptors may race with other threads due to a lack of the
1652*6bbbd442SRobert Mustacchi 	 * lock.
1653*6bbbd442SRobert Mustacchi 	 */
1654*6bbbd442SRobert Mustacchi 	if (ring->itr_ring_free < igc->igc_tx_recycle_thresh) {
1655*6bbbd442SRobert Mustacchi 		igc_tx_recycle(igc, ring);
1656*6bbbd442SRobert Mustacchi 	}
1657*6bbbd442SRobert Mustacchi 
1658*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->itr_lock);
1659*6bbbd442SRobert Mustacchi 	if (ring->itr_ring_free < igc->igc_tx_notify_thresh) {
1660*6bbbd442SRobert Mustacchi 		ring->itr_stat.its_ring_full.value.ui64++;
1661*6bbbd442SRobert Mustacchi 		ring->itr_mac_blocked = true;
1662*6bbbd442SRobert Mustacchi 		mutex_exit(&ring->itr_lock);
1663*6bbbd442SRobert Mustacchi 		return (mp);
1664*6bbbd442SRobert Mustacchi 	}
1665*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->itr_lock);
1666*6bbbd442SRobert Mustacchi 
1667*6bbbd442SRobert Mustacchi 	/*
1668*6bbbd442SRobert Mustacchi 	 * If we end up some day supporting lso and it was requested, then we
1669*6bbbd442SRobert Mustacchi 	 * need to check that the header and the payoad are all in one
1670*6bbbd442SRobert Mustacchi 	 * contiguous block. If they're not then we'll need to force a copy into
1671*6bbbd442SRobert Mustacchi 	 * the descriptor for the headers.
1672*6bbbd442SRobert Mustacchi 	 */
1673*6bbbd442SRobert Mustacchi 
1674*6bbbd442SRobert Mustacchi 	/*
1675*6bbbd442SRobert Mustacchi 	 * This list tracks the various tx buffers that we've allocated and will
1676*6bbbd442SRobert Mustacchi 	 * use.
1677*6bbbd442SRobert Mustacchi 	 */
1678*6bbbd442SRobert Mustacchi 	list_create(&tx.itx_bufs, sizeof (igc_tx_buffer_t),
1679*6bbbd442SRobert Mustacchi 	    offsetof(igc_tx_buffer_t, itb_node));
1680*6bbbd442SRobert Mustacchi 
1681*6bbbd442SRobert Mustacchi 	for (mblk_t *cur_mp = mp; cur_mp != NULL; cur_mp = cur_mp->b_cont) {
1682*6bbbd442SRobert Mustacchi 		size_t len = MBLKL(cur_mp);
1683*6bbbd442SRobert Mustacchi 
1684*6bbbd442SRobert Mustacchi 		if (len == 0) {
1685*6bbbd442SRobert Mustacchi 			continue;
1686*6bbbd442SRobert Mustacchi 		}
1687*6bbbd442SRobert Mustacchi 
1688*6bbbd442SRobert Mustacchi 		if (len > igc->igc_tx_bind_thresh &&
1689*6bbbd442SRobert Mustacchi 		    igc_tx_ring_bind(ring, cur_mp, &tx)) {
1690*6bbbd442SRobert Mustacchi 			continue;
1691*6bbbd442SRobert Mustacchi 		}
1692*6bbbd442SRobert Mustacchi 
1693*6bbbd442SRobert Mustacchi 		if (!igc_tx_ring_copy(ring, cur_mp, &tx))
1694*6bbbd442SRobert Mustacchi 			goto tx_failure;
1695*6bbbd442SRobert Mustacchi 	}
1696*6bbbd442SRobert Mustacchi 
1697*6bbbd442SRobert Mustacchi 	if (!igc_tx_ring_write_descs(igc, ring, mp, &tx)) {
1698*6bbbd442SRobert Mustacchi 		goto tx_failure;
1699*6bbbd442SRobert Mustacchi 	}
1700*6bbbd442SRobert Mustacchi 
1701*6bbbd442SRobert Mustacchi 	list_destroy(&tx.itx_bufs);
1702*6bbbd442SRobert Mustacchi 	return (NULL);
1703*6bbbd442SRobert Mustacchi 
1704*6bbbd442SRobert Mustacchi tx_failure:
1705*6bbbd442SRobert Mustacchi 	/*
1706*6bbbd442SRobert Mustacchi 	 * We are out of descriptors. Clean up and give the mblk back to MAC.
1707*6bbbd442SRobert Mustacchi 	 */
1708*6bbbd442SRobert Mustacchi 	for (igc_tx_buffer_t *buf = list_head(&tx.itx_bufs); buf != NULL;
1709*6bbbd442SRobert Mustacchi 	    buf = list_next(&tx.itx_bufs, buf)) {
1710*6bbbd442SRobert Mustacchi 		igc_tx_buf_reset(buf);
1711*6bbbd442SRobert Mustacchi 	}
1712*6bbbd442SRobert Mustacchi 
1713*6bbbd442SRobert Mustacchi 	mutex_enter(&ring->itr_lock);
1714*6bbbd442SRobert Mustacchi 	list_move_tail(&ring->itr_free_list, &tx.itx_bufs);
1715*6bbbd442SRobert Mustacchi 	ring->itr_mac_blocked = true;
1716*6bbbd442SRobert Mustacchi 	mutex_exit(&ring->itr_lock);
1717*6bbbd442SRobert Mustacchi 	list_destroy(&tx.itx_bufs);
1718*6bbbd442SRobert Mustacchi 
1719*6bbbd442SRobert Mustacchi 	return (mp);
1720*6bbbd442SRobert Mustacchi }
1721