1 /*
2 * This file is provided under a CDDLv1 license. When using or
3 * redistributing this file, you may do so under this license.
4 * In redistributing this file this license must be included
5 * and no other modification of this header file is permitted.
6 *
7 * CDDL LICENSE SUMMARY
8 *
9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved.
10 *
11 * The contents of this file are subject to the terms of Version
12 * 1.0 of the Common Development and Distribution License (the "License").
13 *
14 * You should have received a copy of the License with this software.
15 * You can obtain a copy of the License at
16 * http://www.opensolaris.org/os/licensing.
17 * See the License for the specific language governing permissions
18 * and limitations under the License.
19 */
20
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * **********************************************************************
28 * *
29 * Module Name: *
30 * e1000g_tx.c *
31 * *
32 * Abstract: *
33 * This file contains some routines that take care of Transmit, *
34 * make the hardware to send the data pointed by the packet out *
35 * on to the physical medium. *
36 * *
37 * **********************************************************************
38 */
39
40 #include "e1000g_sw.h"
41 #include "e1000g_debug.h"
42
43 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
44 static int e1000g_tx_copy(e1000g_tx_ring_t *,
45 p_tx_sw_packet_t, mblk_t *, boolean_t);
46 static int e1000g_tx_bind(e1000g_tx_ring_t *,
47 p_tx_sw_packet_t, mblk_t *);
48 static boolean_t e1000g_retrieve_context(mblk_t *, context_data_t *, size_t);
49 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *);
50 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
51 context_data_t *);
52 static void e1000g_fill_context_descriptor(context_data_t *,
53 struct e1000_context_desc *);
54 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *,
55 p_tx_sw_packet_t, uint64_t, size_t);
56 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length,
57 p_desc_array_t desc_array);
58 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t);
59 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t);
60 static void e1000g_82547_timeout(void *);
61 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *);
62 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *);
63
64 #ifndef E1000G_DEBUG
65 #pragma inline(e1000g_tx_copy)
66 #pragma inline(e1000g_tx_bind)
67 #pragma inline(e1000g_retrieve_context)
68 #pragma inline(e1000g_check_context)
69 #pragma inline(e1000g_fill_tx_ring)
70 #pragma inline(e1000g_fill_context_descriptor)
71 #pragma inline(e1000g_fill_tx_desc)
72 #pragma inline(e1000g_fill_82544_desc)
73 #pragma inline(e1000g_tx_workaround_PCIX_82544)
74 #pragma inline(e1000g_tx_workaround_jumbo_82544)
75 #pragma inline(e1000g_free_tx_swpkt)
76 #endif
77
78 /*
79 * e1000g_free_tx_swpkt - free up the tx sw packet
80 *
81 * Unbind the previously bound DMA handle for a given
82 * transmit sw packet. And reset the sw packet data.
83 */
84 void
e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet)85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet)
86 {
87 switch (packet->data_transfer_type) {
88 case USE_BCOPY:
89 packet->tx_buf->len = 0;
90 break;
91 #ifdef __sparc
92 case USE_DVMA:
93 dvma_unload(packet->tx_dma_handle, 0, -1);
94 break;
95 #endif
96 case USE_DMA:
97 (void) ddi_dma_unbind_handle(packet->tx_dma_handle);
98 break;
99 default:
100 break;
101 }
102
103 /*
104 * The mblk has been stripped off the sw packet
105 * and will be freed in a triggered soft intr.
106 */
107 ASSERT(packet->mp == NULL);
108
109 packet->data_transfer_type = USE_NONE;
110 packet->num_mblk_frag = 0;
111 packet->num_desc = 0;
112 }
113
114 mblk_t *
e1000g_m_tx(void * arg,mblk_t * mp)115 e1000g_m_tx(void *arg, mblk_t *mp)
116 {
117 struct e1000g *Adapter = (struct e1000g *)arg;
118 mblk_t *next;
119
120 rw_enter(&Adapter->chip_lock, RW_READER);
121
122 if ((Adapter->e1000g_state & E1000G_SUSPENDED) ||
123 !(Adapter->e1000g_state & E1000G_STARTED) ||
124 (Adapter->link_state != LINK_STATE_UP)) {
125 freemsgchain(mp);
126 mp = NULL;
127 }
128
129 while (mp != NULL) {
130 next = mp->b_next;
131 mp->b_next = NULL;
132
133 if (!e1000g_send(Adapter, mp)) {
134 mp->b_next = next;
135 break;
136 }
137
138 mp = next;
139 }
140
141 rw_exit(&Adapter->chip_lock);
142 return (mp);
143 }
144
145 /*
146 * e1000g_send - send packets onto the wire
147 *
148 * Called from e1000g_m_tx with an mblk ready to send. this
149 * routine sets up the transmit descriptors and sends data to
150 * the wire. It also pushes the just transmitted packet to
151 * the used tx sw packet list.
152 */
153 static boolean_t
e1000g_send(struct e1000g * Adapter,mblk_t * mp)154 e1000g_send(struct e1000g *Adapter, mblk_t *mp)
155 {
156 p_tx_sw_packet_t packet;
157 LIST_DESCRIBER pending_list;
158 size_t len;
159 size_t msg_size;
160 uint32_t frag_count;
161 int desc_count;
162 uint32_t desc_total;
163 uint32_t bcopy_thresh;
164 uint32_t hdr_frag_len;
165 boolean_t tx_undersize_flag;
166 mblk_t *nmp;
167 mblk_t *tmp;
168 mblk_t *new_mp;
169 mblk_t *pre_mp;
170 mblk_t *next_mp;
171 e1000g_tx_ring_t *tx_ring;
172 context_data_t cur_context;
173
174 tx_ring = Adapter->tx_ring;
175 bcopy_thresh = Adapter->tx_bcopy_thresh;
176
177 /* Get the total size and frags number of the message */
178 tx_undersize_flag = B_FALSE;
179 frag_count = 0;
180 msg_size = 0;
181 for (nmp = mp; nmp; nmp = nmp->b_cont) {
182 frag_count++;
183 msg_size += MBLKL(nmp);
184 }
185
186 /* retrieve and compute information for context descriptor */
187 if (!e1000g_retrieve_context(mp, &cur_context, msg_size)) {
188 freemsg(mp);
189 return (B_TRUE);
190 }
191
192 /*
193 * Make sure the packet is less than the allowed size
194 */
195 if (!cur_context.lso_flag &&
196 (msg_size > Adapter->max_frame_size - ETHERFCSL)) {
197 /*
198 * For the over size packet, we'll just drop it.
199 * So we return B_TRUE here.
200 */
201 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL,
202 "Tx packet out of bound. length = %d \n", msg_size);
203 E1000G_STAT(tx_ring->stat_over_size);
204 freemsg(mp);
205 return (B_TRUE);
206 }
207
208 /*
209 * Check and reclaim tx descriptors.
210 * This low water mark check should be done all the time as
211 * Transmit interrupt delay can produce Transmit interrupts little
212 * late and that may cause few problems related to reaping Tx
213 * Descriptors... As you may run short of them before getting any
214 * transmit interrupt...
215 */
216 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) {
217 (void) e1000g_recycle(tx_ring);
218 E1000G_DEBUG_STAT(tx_ring->stat_recycle);
219
220 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) {
221 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc);
222 goto tx_no_resource;
223 }
224 }
225
226 /*
227 * If the message size is less than the minimum ethernet packet size,
228 * we'll use bcopy to send it, and padd it to 60 bytes later.
229 */
230 if (msg_size < ETHERMIN) {
231 E1000G_DEBUG_STAT(tx_ring->stat_under_size);
232 tx_undersize_flag = B_TRUE;
233 }
234
235 /* Initialize variables */
236 desc_count = 1; /* The initial value should be greater than 0 */
237 desc_total = 0;
238 new_mp = NULL;
239 QUEUE_INIT_LIST(&pending_list);
240
241 /* Process each mblk fragment and fill tx descriptors */
242 /*
243 * The software should guarantee LSO packet header(MAC+IP+TCP)
244 * to be within one descriptor. Here we reallocate and refill the
245 * the header if it's physical memory non-contiguous.
246 */
247 if (cur_context.lso_flag) {
248 /* find the last fragment of the header */
249 len = MBLKL(mp);
250 ASSERT(len > 0);
251 next_mp = mp;
252 pre_mp = NULL;
253 while (len < cur_context.hdr_len) {
254 pre_mp = next_mp;
255 next_mp = next_mp->b_cont;
256 len += MBLKL(next_mp);
257 }
258 /*
259 * If the header and the payload are in different mblks,
260 * we simply force the header to be copied into pre-allocated
261 * page-aligned buffer.
262 */
263 if (len == cur_context.hdr_len)
264 goto adjust_threshold;
265
266 hdr_frag_len = cur_context.hdr_len - (len - MBLKL(next_mp));
267 /*
268 * There are three cases we need to reallocate a mblk for the
269 * last header fragment:
270 *
271 * 1. the header is in multiple mblks and the last fragment
272 * share the same mblk with the payload
273 *
274 * 2. the header is in a single mblk shared with the payload
275 * and the header is physical memory non-contiguous
276 *
277 * 3. there is 4 KB boundary within the header and 64 bytes
278 * following the end of the header bytes. The case may cause
279 * TCP data corruption issue.
280 *
281 * The workaround for the case #2 and case #3 is:
282 * Assuming standard Ethernet/IP/TCP headers of 54 bytes,
283 * this means that the buffer(containing the headers) should
284 * not start -118 bytes before a 4 KB boundary. For example,
285 * 128-byte alignment for this buffer could be used to fulfill
286 * this condition.
287 */
288 if ((next_mp != mp) ||
289 (P2NPHASE((uintptr_t)next_mp->b_rptr,
290 E1000_LSO_FIRST_DESC_ALIGNMENT_BOUNDARY_4K)
291 < E1000_LSO_FIRST_DESC_ALIGNMENT)) {
292 E1000G_DEBUG_STAT(tx_ring->stat_lso_header_fail);
293 /*
294 * reallocate the mblk for the last header fragment,
295 * expect to bcopy into pre-allocated page-aligned
296 * buffer
297 */
298 new_mp = allocb(hdr_frag_len, NULL);
299 if (!new_mp)
300 return (B_FALSE);
301 bcopy(next_mp->b_rptr, new_mp->b_rptr, hdr_frag_len);
302 /* link the new header fragment with the other parts */
303 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len;
304 new_mp->b_cont = next_mp;
305 if (pre_mp)
306 pre_mp->b_cont = new_mp;
307 else
308 mp = new_mp;
309 next_mp->b_rptr += hdr_frag_len;
310 frag_count++;
311 }
312 adjust_threshold:
313 /*
314 * adjust the bcopy threshhold to guarantee
315 * the header to use bcopy way
316 */
317 if (bcopy_thresh < cur_context.hdr_len)
318 bcopy_thresh = cur_context.hdr_len;
319 }
320
321 packet = NULL;
322 nmp = mp;
323 while (nmp) {
324 tmp = nmp->b_cont;
325
326 len = MBLKL(nmp);
327 /* Check zero length mblks */
328 if (len == 0) {
329 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags);
330 /*
331 * If there're no packet buffers have been used,
332 * or we just completed processing a buffer, then
333 * skip the empty mblk fragment.
334 * Otherwise, there's still a pending buffer that
335 * needs to be processed (tx_copy).
336 */
337 if (desc_count > 0) {
338 nmp = tmp;
339 continue;
340 }
341 }
342
343 /*
344 * Get a new TxSwPacket to process mblk buffers.
345 */
346 if (desc_count > 0) {
347 mutex_enter(&tx_ring->freelist_lock);
348 packet = (p_tx_sw_packet_t)
349 QUEUE_POP_HEAD(&tx_ring->free_list);
350 mutex_exit(&tx_ring->freelist_lock);
351
352 if (packet == NULL) {
353 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
354 "No Tx SwPacket available\n");
355 E1000G_STAT(tx_ring->stat_no_swpkt);
356 goto tx_send_failed;
357 }
358 QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
359 }
360
361 ASSERT(packet);
362 /*
363 * If the size of the fragment is less than the tx_bcopy_thresh
364 * we'll use bcopy; Otherwise, we'll use DMA binding.
365 */
366 if ((len <= bcopy_thresh) || tx_undersize_flag) {
367 desc_count =
368 e1000g_tx_copy(tx_ring, packet, nmp,
369 tx_undersize_flag);
370 E1000G_DEBUG_STAT(tx_ring->stat_copy);
371 } else {
372 desc_count =
373 e1000g_tx_bind(tx_ring, packet, nmp);
374 E1000G_DEBUG_STAT(tx_ring->stat_bind);
375 }
376
377 if (desc_count > 0)
378 desc_total += desc_count;
379 else if (desc_count < 0)
380 goto tx_send_failed;
381
382 nmp = tmp;
383 }
384
385 /* Assign the message to the last sw packet */
386 ASSERT(packet);
387 ASSERT(packet->mp == NULL);
388 packet->mp = mp;
389
390 /* Try to recycle the tx descriptors again */
391 if (tx_ring->tbd_avail < (desc_total + 3)) {
392 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry);
393 (void) e1000g_recycle(tx_ring);
394 }
395
396 mutex_enter(&tx_ring->tx_lock);
397
398 /*
399 * If the number of available tx descriptors is not enough for transmit
400 * (one redundant descriptor and one hw checksum context descriptor are
401 * included), then return failure.
402 */
403 if (tx_ring->tbd_avail < (desc_total + 3)) {
404 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
405 "No Enough Tx descriptors\n");
406 E1000G_STAT(tx_ring->stat_no_desc);
407 mutex_exit(&tx_ring->tx_lock);
408 goto tx_send_failed;
409 }
410
411 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context);
412
413 mutex_exit(&tx_ring->tx_lock);
414
415 ASSERT(desc_count > 0);
416
417 /* Send successful */
418 return (B_TRUE);
419
420 tx_send_failed:
421 /* Restore mp to original */
422 if (new_mp) {
423 if (pre_mp) {
424 pre_mp->b_cont = next_mp;
425 }
426 new_mp->b_cont = NULL;
427 freemsg(new_mp);
428
429 next_mp->b_rptr -= hdr_frag_len;
430 }
431
432 /*
433 * Enable Transmit interrupts, so that the interrupt routine can
434 * call mac_tx_update() when transmit descriptors become available.
435 */
436 tx_ring->resched_timestamp = ddi_get_lbolt();
437 tx_ring->resched_needed = B_TRUE;
438 if (!Adapter->tx_intr_enable)
439 e1000g_mask_tx_interrupt(Adapter);
440
441 /* Free pending TxSwPackets */
442 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
443 while (packet) {
444 packet->mp = NULL;
445 e1000g_free_tx_swpkt(packet);
446 packet = (p_tx_sw_packet_t)
447 QUEUE_GET_NEXT(&pending_list, &packet->Link);
448 }
449
450 /* Return pending TxSwPackets to the "Free" list */
451 mutex_enter(&tx_ring->freelist_lock);
452 QUEUE_APPEND(&tx_ring->free_list, &pending_list);
453 mutex_exit(&tx_ring->freelist_lock);
454
455 E1000G_STAT(tx_ring->stat_send_fail);
456
457 /* Message will be scheduled for re-transmit */
458 return (B_FALSE);
459
460 tx_no_resource:
461 /*
462 * Enable Transmit interrupts, so that the interrupt routine can
463 * call mac_tx_update() when transmit descriptors become available.
464 */
465 tx_ring->resched_timestamp = ddi_get_lbolt();
466 tx_ring->resched_needed = B_TRUE;
467 if (!Adapter->tx_intr_enable)
468 e1000g_mask_tx_interrupt(Adapter);
469
470 /* Message will be scheduled for re-transmit */
471 return (B_FALSE);
472 }
473
474 static boolean_t
e1000g_retrieve_context(mblk_t * mp,context_data_t * cur_context,size_t msg_size)475 e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context,
476 size_t msg_size)
477 {
478 uintptr_t ip_start;
479 uintptr_t tcp_start;
480 mblk_t *nmp;
481 uint32_t lsoflags;
482 uint32_t mss;
483
484 bzero(cur_context, sizeof (context_data_t));
485
486 /* first check lso information */
487 mac_lso_get(mp, &mss, &lsoflags);
488
489 /* retrieve checksum info */
490 mac_hcksum_get(mp, &cur_context->cksum_start,
491 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags);
492 /* retrieve ethernet header size */
493 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid ==
494 htons(ETHERTYPE_VLAN))
495 cur_context->ether_header_size =
496 sizeof (struct ether_vlan_header);
497 else
498 cur_context->ether_header_size =
499 sizeof (struct ether_header);
500
501 if (lsoflags & HW_LSO) {
502 ASSERT(mss != 0);
503
504 /* free the invalid packet */
505 if (mss == 0 ||
506 !((cur_context->cksum_flags & HCK_PARTIALCKSUM) &&
507 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) {
508 return (B_FALSE);
509 }
510 cur_context->mss = (uint16_t)mss;
511 cur_context->lso_flag = B_TRUE;
512
513 /*
514 * Some fields are cleared for the hardware to fill
515 * in. We don't assume Ethernet header, IP header and
516 * TCP header are always in the same mblk fragment,
517 * while we assume each header is always within one
518 * mblk fragment and Ethernet header is always in the
519 * first mblk fragment.
520 */
521 nmp = mp;
522 ip_start = (uintptr_t)(nmp->b_rptr)
523 + cur_context->ether_header_size;
524 if (ip_start >= (uintptr_t)(nmp->b_wptr)) {
525 ip_start = (uintptr_t)nmp->b_cont->b_rptr
526 + (ip_start - (uintptr_t)(nmp->b_wptr));
527 nmp = nmp->b_cont;
528 }
529 tcp_start = ip_start +
530 IPH_HDR_LENGTH((ipha_t *)ip_start);
531 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) {
532 tcp_start = (uintptr_t)nmp->b_cont->b_rptr
533 + (tcp_start - (uintptr_t)(nmp->b_wptr));
534 nmp = nmp->b_cont;
535 }
536 cur_context->hdr_len = cur_context->ether_header_size
537 + IPH_HDR_LENGTH((ipha_t *)ip_start)
538 + TCP_HDR_LENGTH((tcph_t *)tcp_start);
539 ((ipha_t *)ip_start)->ipha_length = 0;
540 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0;
541 /* calculate the TCP packet payload length */
542 cur_context->pay_len = msg_size - cur_context->hdr_len;
543 }
544 return (B_TRUE);
545 }
546
547 static boolean_t
e1000g_check_context(e1000g_tx_ring_t * tx_ring,context_data_t * cur_context)548 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context)
549 {
550 boolean_t context_reload;
551 context_data_t *pre_context;
552 struct e1000g *Adapter;
553
554 context_reload = B_FALSE;
555 pre_context = &tx_ring->pre_context;
556 Adapter = tx_ring->adapter;
557
558 /*
559 * The following code determine if the context descriptor is
560 * needed to be reloaded. The sequence of the conditions is
561 * made by their possibilities of changing.
562 */
563 /*
564 * workaround for 82546EB, context descriptor must be reloaded
565 * per LSO/hw_cksum packet if LSO is enabled.
566 */
567 if (Adapter->lso_premature_issue &&
568 Adapter->lso_enable &&
569 (cur_context->cksum_flags != 0)) {
570
571 context_reload = B_TRUE;
572 } else if (cur_context->lso_flag) {
573 if ((cur_context->lso_flag != pre_context->lso_flag) ||
574 (cur_context->cksum_flags != pre_context->cksum_flags) ||
575 (cur_context->pay_len != pre_context->pay_len) ||
576 (cur_context->mss != pre_context->mss) ||
577 (cur_context->hdr_len != pre_context->hdr_len) ||
578 (cur_context->cksum_stuff != pre_context->cksum_stuff) ||
579 (cur_context->cksum_start != pre_context->cksum_start) ||
580 (cur_context->ether_header_size !=
581 pre_context->ether_header_size)) {
582
583 context_reload = B_TRUE;
584 }
585 } else if (cur_context->cksum_flags != 0) {
586 if ((cur_context->lso_flag != pre_context->lso_flag) ||
587 (cur_context->cksum_flags != pre_context->cksum_flags) ||
588 (cur_context->cksum_stuff != pre_context->cksum_stuff) ||
589 (cur_context->cksum_start != pre_context->cksum_start) ||
590 (cur_context->ether_header_size !=
591 pre_context->ether_header_size)) {
592
593 context_reload = B_TRUE;
594 }
595 }
596
597 return (context_reload);
598 }
599
600 static int
e1000g_fill_tx_ring(e1000g_tx_ring_t * tx_ring,LIST_DESCRIBER * pending_list,context_data_t * cur_context)601 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
602 context_data_t *cur_context)
603 {
604 struct e1000g *Adapter;
605 struct e1000_hw *hw;
606 p_tx_sw_packet_t first_packet;
607 p_tx_sw_packet_t packet;
608 p_tx_sw_packet_t previous_packet;
609 boolean_t context_reload;
610 struct e1000_tx_desc *first_data_desc;
611 struct e1000_tx_desc *next_desc;
612 struct e1000_tx_desc *descriptor;
613 int desc_count;
614 boolean_t buff_overrun_flag;
615 int i;
616
617 Adapter = tx_ring->adapter;
618 hw = &Adapter->shared;
619
620 desc_count = 0;
621 first_packet = NULL;
622 first_data_desc = NULL;
623 descriptor = NULL;
624 first_packet = NULL;
625 packet = NULL;
626 buff_overrun_flag = B_FALSE;
627
628 next_desc = tx_ring->tbd_next;
629
630 /* Context descriptor reload check */
631 context_reload = e1000g_check_context(tx_ring, cur_context);
632
633 if (context_reload) {
634 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
635
636 descriptor = next_desc;
637
638 e1000g_fill_context_descriptor(cur_context,
639 (struct e1000_context_desc *)descriptor);
640
641 /* Check the wrap-around case */
642 if (descriptor == tx_ring->tbd_last)
643 next_desc = tx_ring->tbd_first;
644 else
645 next_desc++;
646
647 desc_count++;
648 }
649
650 first_data_desc = next_desc;
651
652 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
653 while (packet) {
654 ASSERT(packet->num_desc);
655
656 for (i = 0; i < packet->num_desc; i++) {
657 ASSERT(tx_ring->tbd_avail > 0);
658
659 descriptor = next_desc;
660 descriptor->buffer_addr =
661 packet->desc[i].address;
662 descriptor->lower.data =
663 packet->desc[i].length;
664
665 /* Zero out status */
666 descriptor->upper.data = 0;
667
668 descriptor->lower.data |=
669 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
670 /* must set RS on every outgoing descriptor */
671 descriptor->lower.data |=
672 E1000_TXD_CMD_RS;
673
674 if (cur_context->lso_flag)
675 descriptor->lower.data |= E1000_TXD_CMD_TSE;
676
677 /* Check the wrap-around case */
678 if (descriptor == tx_ring->tbd_last)
679 next_desc = tx_ring->tbd_first;
680 else
681 next_desc++;
682
683 desc_count++;
684
685 /*
686 * workaround for 82546EB errata 33, hang in PCI-X
687 * systems due to 2k Buffer Overrun during Transmit
688 * Operation. The workaround applies to all the Intel
689 * PCI-X chips.
690 */
691 if (hw->bus.type == e1000_bus_type_pcix &&
692 descriptor == first_data_desc &&
693 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK)
694 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) {
695 /* modified the first descriptor */
696 descriptor->lower.data &=
697 ~E1000G_TBD_LENGTH_MASK;
698 descriptor->lower.flags.length =
699 E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
700
701 /* insert a new descriptor */
702 ASSERT(tx_ring->tbd_avail > 0);
703 next_desc->buffer_addr =
704 packet->desc[0].address +
705 E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
706 next_desc->lower.data =
707 packet->desc[0].length -
708 E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
709
710 /* Zero out status */
711 next_desc->upper.data = 0;
712
713 next_desc->lower.data |=
714 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
715 /* must set RS on every outgoing descriptor */
716 next_desc->lower.data |=
717 E1000_TXD_CMD_RS;
718
719 if (cur_context->lso_flag)
720 next_desc->lower.data |=
721 E1000_TXD_CMD_TSE;
722
723 descriptor = next_desc;
724
725 /* Check the wrap-around case */
726 if (next_desc == tx_ring->tbd_last)
727 next_desc = tx_ring->tbd_first;
728 else
729 next_desc++;
730
731 desc_count++;
732 buff_overrun_flag = B_TRUE;
733 }
734 }
735
736 if (buff_overrun_flag) {
737 packet->num_desc++;
738 buff_overrun_flag = B_FALSE;
739 }
740
741 if (first_packet != NULL) {
742 /*
743 * Count the checksum context descriptor for
744 * the first SwPacket.
745 */
746 first_packet->num_desc++;
747 first_packet = NULL;
748 }
749
750 packet->tickstamp = ddi_get_lbolt64();
751
752 previous_packet = packet;
753 packet = (p_tx_sw_packet_t)
754 QUEUE_GET_NEXT(pending_list, &packet->Link);
755 }
756
757 /*
758 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back
759 */
760 if (Adapter->lso_premature_issue && cur_context->lso_flag &&
761 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) {
762 /* modified the previous descriptor */
763 descriptor->lower.data -= 4;
764
765 /* insert a new descriptor */
766 ASSERT(tx_ring->tbd_avail > 0);
767 /* the lower 20 bits of lower.data is the length field */
768 next_desc->buffer_addr =
769 descriptor->buffer_addr +
770 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK);
771 next_desc->lower.data = 4;
772
773 /* Zero out status */
774 next_desc->upper.data = 0;
775 /* It must be part of a LSO packet */
776 next_desc->lower.data |=
777 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
778 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE;
779
780 descriptor = next_desc;
781
782 /* Check the wrap-around case */
783 if (descriptor == tx_ring->tbd_last)
784 next_desc = tx_ring->tbd_first;
785 else
786 next_desc++;
787
788 desc_count++;
789 /* update the number of descriptors */
790 previous_packet->num_desc++;
791 }
792
793 ASSERT(descriptor);
794
795 if (cur_context->cksum_flags) {
796 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM)
797 ((struct e1000_data_desc *)first_data_desc)->
798 upper.fields.popts |= E1000_TXD_POPTS_IXSM;
799 if (cur_context->cksum_flags & HCK_PARTIALCKSUM)
800 ((struct e1000_data_desc *)first_data_desc)->
801 upper.fields.popts |= E1000_TXD_POPTS_TXSM;
802 }
803
804 /*
805 * Last Descriptor of Packet needs End Of Packet (EOP), Report
806 * Status (RS) set.
807 */
808 if (Adapter->tx_intr_delay) {
809 descriptor->lower.data |= E1000_TXD_CMD_IDE |
810 E1000_TXD_CMD_EOP;
811 } else {
812 descriptor->lower.data |= E1000_TXD_CMD_EOP;
813 }
814
815 /* Set append Ethernet CRC (IFCS) bits */
816 if (cur_context->lso_flag) {
817 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS;
818 } else {
819 descriptor->lower.data |= E1000_TXD_CMD_IFCS;
820 }
821
822 /*
823 * Sync the Tx descriptors DMA buffer
824 */
825 (void) ddi_dma_sync(tx_ring->tbd_dma_handle,
826 0, 0, DDI_DMA_SYNC_FORDEV);
827
828 tx_ring->tbd_next = next_desc;
829
830 /*
831 * Advance the Transmit Descriptor Tail (Tdt), this tells the
832 * FX1000 that this frame is available to transmit.
833 */
834 if (hw->mac.type == e1000_82547)
835 e1000g_82547_tx_move_tail(tx_ring);
836 else
837 E1000_WRITE_REG(hw, E1000_TDT(0),
838 (uint32_t)(next_desc - tx_ring->tbd_first));
839
840 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
841 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
842 Adapter->e1000g_state |= E1000G_ERROR;
843 }
844
845 /* Put the pending SwPackets to the "Used" list */
846 mutex_enter(&tx_ring->usedlist_lock);
847 QUEUE_APPEND(&tx_ring->used_list, pending_list);
848 tx_ring->tbd_avail -= desc_count;
849 mutex_exit(&tx_ring->usedlist_lock);
850
851 /* update LSO related data */
852 if (context_reload)
853 tx_ring->pre_context = *cur_context;
854
855 return (desc_count);
856 }
857
858 /*
859 * e1000g_tx_setup - setup tx data structures
860 *
861 * This routine initializes all of the transmit related
862 * structures. This includes the Transmit descriptors,
863 * and the tx_sw_packet structures.
864 */
865 void
e1000g_tx_setup(struct e1000g * Adapter)866 e1000g_tx_setup(struct e1000g *Adapter)
867 {
868 struct e1000_hw *hw;
869 p_tx_sw_packet_t packet;
870 uint32_t i;
871 uint32_t buf_high;
872 uint32_t buf_low;
873 uint32_t reg_tipg;
874 uint32_t reg_tctl;
875 int size;
876 e1000g_tx_ring_t *tx_ring;
877
878 hw = &Adapter->shared;
879 tx_ring = Adapter->tx_ring;
880
881 /* init the lists */
882 /*
883 * Here we don't need to protect the lists using the
884 * usedlist_lock and freelist_lock, for they have
885 * been protected by the chip_lock.
886 */
887 QUEUE_INIT_LIST(&tx_ring->used_list);
888 QUEUE_INIT_LIST(&tx_ring->free_list);
889
890 /* Go through and set up each SW_Packet */
891 packet = tx_ring->packet_area;
892 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) {
893 /* Initialize this tx_sw_apcket area */
894 e1000g_free_tx_swpkt(packet);
895 /* Add this tx_sw_packet to the free list */
896 QUEUE_PUSH_TAIL(&tx_ring->free_list,
897 &packet->Link);
898 }
899
900 /* Setup TX descriptor pointers */
901 tx_ring->tbd_next = tx_ring->tbd_first;
902 tx_ring->tbd_oldest = tx_ring->tbd_first;
903
904 /*
905 * Setup Hardware TX Registers
906 */
907 /* Setup the Transmit Control Register (TCTL). */
908 reg_tctl = E1000_READ_REG(hw, E1000_TCTL);
909 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN |
910 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) |
911 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) |
912 E1000_TCTL_RTLC;
913
914 /* Enable the MULR bit */
915 if (hw->bus.type == e1000_bus_type_pci_express)
916 reg_tctl |= E1000_TCTL_MULR;
917
918 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl);
919
920 /* Setup HW Base and Length of Tx descriptor area */
921 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc));
922 E1000_WRITE_REG(hw, E1000_TDLEN(0), size);
923 size = E1000_READ_REG(hw, E1000_TDLEN(0));
924
925 buf_low = (uint32_t)tx_ring->tbd_dma_addr;
926 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32);
927
928 /*
929 * Write the highest location first and work backward to the lowest.
930 * This is necessary for some adapter types to
931 * prevent write combining from occurring.
932 */
933 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high);
934 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low);
935
936 /* Setup our HW Tx Head & Tail descriptor pointers */
937 E1000_WRITE_REG(hw, E1000_TDH(0), 0);
938 E1000_WRITE_REG(hw, E1000_TDT(0), 0);
939
940 /* Set the default values for the Tx Inter Packet Gap timer */
941 if ((hw->mac.type == e1000_82542) &&
942 ((hw->revision_id == E1000_REVISION_2) ||
943 (hw->revision_id == E1000_REVISION_3))) {
944 reg_tipg = DEFAULT_82542_TIPG_IPGT;
945 reg_tipg |=
946 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
947 reg_tipg |=
948 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
949 } else if (hw->mac.type == e1000_80003es2lan) {
950 reg_tipg = DEFAULT_82543_TIPG_IPGR1;
951 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
952 E1000_TIPG_IPGR2_SHIFT;
953 } else {
954 if (hw->phy.media_type == e1000_media_type_fiber)
955 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
956 else
957 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
958 reg_tipg |=
959 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
960 reg_tipg |=
961 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
962 }
963 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg);
964
965 /* Setup Transmit Interrupt Delay Value */
966 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay);
967 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
968 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay);
969
970 if (hw->mac.type >= e1000_82540) {
971 E1000_WRITE_REG(&Adapter->shared, E1000_TADV,
972 Adapter->tx_intr_abs_delay);
973 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
974 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay);
975 }
976
977 tx_ring->tbd_avail = Adapter->tx_desc_num;
978
979 /* Initialize stored context information */
980 bzero(&(tx_ring->pre_context), sizeof (context_data_t));
981 }
982
983 /*
984 * e1000g_recycle - recycle the tx descriptors and tx sw packets
985 */
986 int
e1000g_recycle(e1000g_tx_ring_t * tx_ring)987 e1000g_recycle(e1000g_tx_ring_t *tx_ring)
988 {
989 struct e1000g *Adapter;
990 LIST_DESCRIBER pending_list;
991 p_tx_sw_packet_t packet;
992 mblk_t *mp;
993 mblk_t *nmp;
994 struct e1000_tx_desc *descriptor;
995 int desc_count;
996 int64_t delta;
997
998 /*
999 * This function will examine each TxSwPacket in the 'used' queue
1000 * if the e1000g is done with it then the associated resources (Tx
1001 * Descriptors) will be "freed" and the TxSwPacket will be
1002 * returned to the 'free' queue.
1003 */
1004 Adapter = tx_ring->adapter;
1005 delta = 0;
1006
1007 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list);
1008 if (packet == NULL) {
1009 Adapter->stall_flag = B_FALSE;
1010 return (0);
1011 }
1012
1013 desc_count = 0;
1014 QUEUE_INIT_LIST(&pending_list);
1015
1016 /* Sync the Tx descriptor DMA buffer */
1017 (void) ddi_dma_sync(tx_ring->tbd_dma_handle,
1018 0, 0, DDI_DMA_SYNC_FORKERNEL);
1019 if (e1000g_check_dma_handle(
1020 tx_ring->tbd_dma_handle) != DDI_FM_OK) {
1021 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
1022 Adapter->e1000g_state |= E1000G_ERROR;
1023 return (0);
1024 }
1025
1026 /*
1027 * While there are still TxSwPackets in the used queue check them
1028 */
1029 mutex_enter(&tx_ring->usedlist_lock);
1030 while ((packet =
1031 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) {
1032
1033 /*
1034 * Get hold of the next descriptor that the e1000g will
1035 * report status back to (this will be the last descriptor
1036 * of a given sw packet). We only want to free the
1037 * sw packet (and it resources) if the e1000g is done
1038 * with ALL of the descriptors. If the e1000g is done
1039 * with the last one then it is done with all of them.
1040 */
1041 ASSERT(packet->num_desc);
1042 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1);
1043
1044 /* Check for wrap case */
1045 if (descriptor > tx_ring->tbd_last)
1046 descriptor -= Adapter->tx_desc_num;
1047
1048 /*
1049 * If the descriptor done bit is set free TxSwPacket and
1050 * associated resources
1051 */
1052 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) {
1053 QUEUE_POP_HEAD(&tx_ring->used_list);
1054 QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
1055
1056 if (descriptor == tx_ring->tbd_last)
1057 tx_ring->tbd_oldest =
1058 tx_ring->tbd_first;
1059 else
1060 tx_ring->tbd_oldest =
1061 descriptor + 1;
1062
1063 desc_count += packet->num_desc;
1064 } else {
1065 /*
1066 * Found a sw packet that the e1000g is not done
1067 * with then there is no reason to check the rest
1068 * of the queue.
1069 */
1070 delta = ddi_get_lbolt64() - packet->tickstamp;
1071 break;
1072 }
1073 }
1074
1075 tx_ring->tbd_avail += desc_count;
1076 Adapter->tx_pkt_cnt += desc_count;
1077
1078 mutex_exit(&tx_ring->usedlist_lock);
1079
1080 if (desc_count == 0) {
1081 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none);
1082 /*
1083 * If the packet hasn't been sent out for seconds and
1084 * the transmitter is not under paused flowctrl condition,
1085 * the transmitter is considered to be stalled.
1086 */
1087 if ((delta > Adapter->stall_threshold) &&
1088 !(E1000_READ_REG(&Adapter->shared,
1089 E1000_STATUS) & E1000_STATUS_TXOFF)) {
1090 Adapter->stall_flag = B_TRUE;
1091 }
1092 return (0);
1093 }
1094
1095 Adapter->stall_flag = B_FALSE;
1096
1097 mp = NULL;
1098 nmp = NULL;
1099 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
1100 ASSERT(packet != NULL);
1101 while (packet != NULL) {
1102 if (packet->mp != NULL) {
1103 ASSERT(packet->mp->b_next == NULL);
1104 /* Assemble the message chain */
1105 if (mp == NULL) {
1106 mp = packet->mp;
1107 nmp = packet->mp;
1108 } else {
1109 nmp->b_next = packet->mp;
1110 nmp = packet->mp;
1111 }
1112 /* Disconnect the message from the sw packet */
1113 packet->mp = NULL;
1114 }
1115
1116 /* Free the TxSwPackets */
1117 e1000g_free_tx_swpkt(packet);
1118
1119 packet = (p_tx_sw_packet_t)
1120 QUEUE_GET_NEXT(&pending_list, &packet->Link);
1121 }
1122
1123 /* Return the TxSwPackets back to the FreeList */
1124 mutex_enter(&tx_ring->freelist_lock);
1125 QUEUE_APPEND(&tx_ring->free_list, &pending_list);
1126 mutex_exit(&tx_ring->freelist_lock);
1127
1128 if (mp != NULL)
1129 freemsgchain(mp);
1130
1131 return (desc_count);
1132 }
1133 /*
1134 * 82544 Coexistence issue workaround:
1135 * There are 2 issues.
1136 * 1. If a 32 bit split completion happens from P64H2 and another
1137 * agent drives a 64 bit request/split completion after ONLY
1138 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then
1139 * 82544 has a problem where in to clock all the data in, it
1140 * looks at REQ64# signal and since it has changed so fast (i.e. 1
1141 * idle clock turn around), it will fail to clock all the data in.
1142 * Data coming from certain ending addresses has exposure to this issue.
1143 *
1144 * To detect this issue, following equation can be used...
1145 * SIZE[3:0] + ADDR[2:0] = SUM[3:0].
1146 * If SUM[3:0] is in between 1 to 4, we will have this issue.
1147 *
1148 * ROOT CAUSE:
1149 * The erratum involves the 82544 PCIX elasticity FIFO implementations as
1150 * 64-bit FIFO's and flushing of the final partial-bytes corresponding
1151 * to the end of a requested read burst. Under a specific burst condition
1152 * of ending-data alignment and 32-byte split-completions, the final
1153 * byte(s) of split-completion data require an extra clock cycle to flush
1154 * into 64-bit FIFO orientation. An incorrect logic dependency on the
1155 * REQ64# signal occurring during during this clock cycle may cause the
1156 * residual byte(s) to be lost, thereby rendering the internal DMA client
1157 * forever awaiting the final byte(s) for an outbound data-fetch. The
1158 * erratum is confirmed to *only* occur if certain subsequent external
1159 * 64-bit PCIX bus transactions occur immediately (minimum possible bus
1160 * turn- around) following the odd-aligned 32-bit split-completion
1161 * containing the final byte(s). Intel has confirmed that this has been
1162 * seen only with chipset/bridges which have the capability to provide
1163 * 32-bit split-completion data, and in the presence of newer PCIX bus
1164 * agents which fully-optimize the inter-transaction turn-around (zero
1165 * additional initiator latency when pre-granted bus ownership).
1166 *
1167 * This issue does not exist in PCI bus mode, when any agent is operating
1168 * in 32 bit only mode or on chipsets that do not do 32 bit split
1169 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does
1170 * 32 bit split completions for any read request that has bit 2 set to 1
1171 * for the requested address and read request size is more than 8 bytes.
1172 *
1173 * 2. Another issue is related to 82544 driving DACs under the similar
1174 * scenario (32 bit split completion followed by 64 bit transaction with
1175 * only 1 cycle turnaround). This issue is still being root caused. We
1176 * think that both of these issues can be avoided if following workaround
1177 * is implemented. It seems DAC issues is related to ending addresses being
1178 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity
1179 * FIFO which does not get flushed due to REQ64# dependency. We will only
1180 * know the full story after it has been simulated successfully by HW team.
1181 *
1182 * WORKAROUND:
1183 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC)
1184 */
1185 static uint32_t
e1000g_fill_82544_desc(uint64_t address,size_t length,p_desc_array_t desc_array)1186 e1000g_fill_82544_desc(uint64_t address,
1187 size_t length, p_desc_array_t desc_array)
1188 {
1189 /*
1190 * Since issue is sensitive to length and address.
1191 * Let us first check the address...
1192 */
1193 uint32_t safe_terminator;
1194
1195 if (length <= 4) {
1196 desc_array->descriptor[0].address = address;
1197 desc_array->descriptor[0].length = (uint32_t)length;
1198 desc_array->elements = 1;
1199 return (desc_array->elements);
1200 }
1201 safe_terminator =
1202 (uint32_t)((((uint32_t)address & 0x7) +
1203 (length & 0xF)) & 0xF);
1204 /*
1205 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then
1206 * return
1207 */
1208 if (safe_terminator == 0 ||
1209 (safe_terminator > 4 && safe_terminator < 9) ||
1210 (safe_terminator > 0xC && safe_terminator <= 0xF)) {
1211 desc_array->descriptor[0].address = address;
1212 desc_array->descriptor[0].length = (uint32_t)length;
1213 desc_array->elements = 1;
1214 return (desc_array->elements);
1215 }
1216
1217 desc_array->descriptor[0].address = address;
1218 desc_array->descriptor[0].length = length - 4;
1219 desc_array->descriptor[1].address = address + (length - 4);
1220 desc_array->descriptor[1].length = 4;
1221 desc_array->elements = 2;
1222 return (desc_array->elements);
1223 }
1224
1225 static int
e1000g_tx_copy(e1000g_tx_ring_t * tx_ring,p_tx_sw_packet_t packet,mblk_t * mp,boolean_t tx_undersize_flag)1226 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet,
1227 mblk_t *mp, boolean_t tx_undersize_flag)
1228 {
1229 size_t len;
1230 size_t len1;
1231 dma_buffer_t *tx_buf;
1232 mblk_t *nmp;
1233 boolean_t finished;
1234 int desc_count;
1235
1236 desc_count = 0;
1237 tx_buf = packet->tx_buf;
1238 len = MBLKL(mp);
1239
1240 ASSERT((tx_buf->len + len) <= tx_buf->size);
1241
1242 if (len > 0) {
1243 bcopy(mp->b_rptr,
1244 tx_buf->address + tx_buf->len,
1245 len);
1246 tx_buf->len += len;
1247
1248 packet->num_mblk_frag++;
1249 }
1250
1251 nmp = mp->b_cont;
1252 if (nmp == NULL) {
1253 finished = B_TRUE;
1254 } else {
1255 len1 = MBLKL(nmp);
1256 if ((tx_buf->len + len1) > tx_buf->size)
1257 finished = B_TRUE;
1258 else if (tx_undersize_flag)
1259 finished = B_FALSE;
1260 else if (len1 > tx_ring->adapter->tx_bcopy_thresh)
1261 finished = B_TRUE;
1262 else
1263 finished = B_FALSE;
1264 }
1265
1266 if (finished) {
1267 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy,
1268 (tx_buf->len > len));
1269
1270 /*
1271 * If the packet is smaller than 64 bytes, which is the
1272 * minimum ethernet packet size, pad the packet to make
1273 * it at least 60 bytes. The hardware will add 4 bytes
1274 * for CRC.
1275 */
1276 if (tx_undersize_flag) {
1277 ASSERT(tx_buf->len < ETHERMIN);
1278
1279 bzero(tx_buf->address + tx_buf->len,
1280 ETHERMIN - tx_buf->len);
1281 tx_buf->len = ETHERMIN;
1282 }
1283
1284 #ifdef __sparc
1285 if (packet->dma_type == USE_DVMA)
1286 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV);
1287 else
1288 (void) ddi_dma_sync(tx_buf->dma_handle, 0,
1289 tx_buf->len, DDI_DMA_SYNC_FORDEV);
1290 #else
1291 (void) ddi_dma_sync(tx_buf->dma_handle, 0,
1292 tx_buf->len, DDI_DMA_SYNC_FORDEV);
1293 #endif
1294
1295 packet->data_transfer_type = USE_BCOPY;
1296
1297 desc_count = e1000g_fill_tx_desc(tx_ring,
1298 packet,
1299 tx_buf->dma_address,
1300 tx_buf->len);
1301
1302 if (desc_count <= 0)
1303 return (-1);
1304 }
1305
1306 return (desc_count);
1307 }
1308
1309 static int
e1000g_tx_bind(e1000g_tx_ring_t * tx_ring,p_tx_sw_packet_t packet,mblk_t * mp)1310 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp)
1311 {
1312 int j;
1313 int mystat;
1314 size_t len;
1315 ddi_dma_cookie_t dma_cookie;
1316 uint_t ncookies;
1317 int desc_count;
1318 uint32_t desc_total;
1319
1320 desc_total = 0;
1321 len = MBLKL(mp);
1322
1323 /*
1324 * ddi_dma_addr_bind_handle() allocates DMA resources for a
1325 * memory object such that a device can perform DMA to or from
1326 * the object. DMA resources are allocated considering the
1327 * device's DMA attributes as expressed by ddi_dma_attr(9S)
1328 * (see ddi_dma_alloc_handle(9F)).
1329 *
1330 * ddi_dma_addr_bind_handle() fills in the first DMA cookie
1331 * pointed to by cookiep with the appropriate address, length,
1332 * and bus type. *ccountp is set to the number of DMA cookies
1333 * representing this DMA object. Subsequent DMA cookies must be
1334 * retrieved by calling ddi_dma_nextcookie(9F) the number of
1335 * times specified by *countp - 1.
1336 */
1337 switch (packet->dma_type) {
1338 #ifdef __sparc
1339 case USE_DVMA:
1340 dvma_kaddr_load(packet->tx_dma_handle,
1341 (caddr_t)mp->b_rptr, len, 0, &dma_cookie);
1342
1343 dvma_sync(packet->tx_dma_handle, 0,
1344 DDI_DMA_SYNC_FORDEV);
1345
1346 ncookies = 1;
1347 packet->data_transfer_type = USE_DVMA;
1348 break;
1349 #endif
1350 case USE_DMA:
1351 if ((mystat = ddi_dma_addr_bind_handle(
1352 packet->tx_dma_handle, NULL,
1353 (caddr_t)mp->b_rptr, len,
1354 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1355 DDI_DMA_DONTWAIT, 0, &dma_cookie,
1356 &ncookies)) != DDI_DMA_MAPPED) {
1357
1358 e1000g_log(tx_ring->adapter, CE_WARN,
1359 "Couldn't bind mblk buffer to Tx DMA handle: "
1360 "return: %X, Pkt: %X\n",
1361 mystat, packet);
1362 return (-1);
1363 }
1364
1365 /*
1366 * An implicit ddi_dma_sync() is done when the
1367 * ddi_dma_addr_bind_handle() is called. So we
1368 * don't need to explicitly call ddi_dma_sync()
1369 * here any more.
1370 */
1371 ASSERT(ncookies);
1372 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie,
1373 (ncookies > 1));
1374
1375 /*
1376 * The data_transfer_type value must be set after the handle
1377 * has been bound, for it will be used in e1000g_free_tx_swpkt()
1378 * to decide whether we need to unbind the handle.
1379 */
1380 packet->data_transfer_type = USE_DMA;
1381 break;
1382 default:
1383 ASSERT(B_FALSE);
1384 break;
1385 }
1386
1387 packet->num_mblk_frag++;
1388
1389 /*
1390 * Each address could span thru multpile cookie..
1391 * Each cookie will have one descriptor
1392 */
1393 for (j = ncookies; j != 0; j--) {
1394
1395 desc_count = e1000g_fill_tx_desc(tx_ring,
1396 packet,
1397 dma_cookie.dmac_laddress,
1398 dma_cookie.dmac_size);
1399
1400 if (desc_count <= 0)
1401 return (-1);
1402
1403 desc_total += desc_count;
1404
1405 /*
1406 * ddi_dma_nextcookie() retrieves subsequent DMA
1407 * cookies for a DMA object.
1408 * ddi_dma_nextcookie() fills in the
1409 * ddi_dma_cookie(9S) structure pointed to by
1410 * cookiep. The ddi_dma_cookie(9S) structure
1411 * must be allocated prior to calling
1412 * ddi_dma_nextcookie(). The DMA cookie count
1413 * returned by ddi_dma_buf_bind_handle(9F),
1414 * ddi_dma_addr_bind_handle(9F), or
1415 * ddi_dma_getwin(9F) indicates the number of DMA
1416 * cookies a DMA object consists of. If the
1417 * resulting cookie count, N, is larger than 1,
1418 * ddi_dma_nextcookie() must be called N-1 times
1419 * to retrieve all DMA cookies.
1420 */
1421 if (j > 1) {
1422 ddi_dma_nextcookie(packet->tx_dma_handle,
1423 &dma_cookie);
1424 }
1425 }
1426
1427 return (desc_total);
1428 }
1429
1430 static void
e1000g_fill_context_descriptor(context_data_t * cur_context,struct e1000_context_desc * context_desc)1431 e1000g_fill_context_descriptor(context_data_t *cur_context,
1432 struct e1000_context_desc *context_desc)
1433 {
1434 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) {
1435 context_desc->lower_setup.ip_fields.ipcss =
1436 cur_context->ether_header_size;
1437 context_desc->lower_setup.ip_fields.ipcso =
1438 cur_context->ether_header_size +
1439 offsetof(struct ip, ip_sum);
1440 context_desc->lower_setup.ip_fields.ipcse =
1441 cur_context->ether_header_size +
1442 cur_context->cksum_start - 1;
1443 } else
1444 context_desc->lower_setup.ip_config = 0;
1445
1446 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) {
1447 /*
1448 * The packet with same protocol has the following
1449 * stuff and start offset:
1450 * | Protocol | Stuff | Start | Checksum
1451 * | | Offset | Offset | Enable
1452 * | IPv4 + TCP | 0x24 | 0x14 | Yes
1453 * | IPv4 + UDP | 0x1A | 0x14 | Yes
1454 * | IPv6 + TCP | 0x20 | 0x10 | No
1455 * | IPv6 + UDP | 0x14 | 0x10 | No
1456 */
1457 context_desc->upper_setup.tcp_fields.tucss =
1458 cur_context->cksum_start + cur_context->ether_header_size;
1459 context_desc->upper_setup.tcp_fields.tucso =
1460 cur_context->cksum_stuff + cur_context->ether_header_size;
1461 context_desc->upper_setup.tcp_fields.tucse = 0;
1462 } else
1463 context_desc->upper_setup.tcp_config = 0;
1464
1465 if (cur_context->lso_flag) {
1466 context_desc->tcp_seg_setup.fields.mss = cur_context->mss;
1467 context_desc->tcp_seg_setup.fields.hdr_len =
1468 cur_context->hdr_len;
1469 /*
1470 * workaround for 82546EB errata 23, status-writeback
1471 * reporting (RS) should not be set on context or
1472 * Null descriptors
1473 */
1474 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT
1475 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP
1476 | E1000_TXD_DTYP_C | cur_context->pay_len;
1477 } else {
1478 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT
1479 | E1000_TXD_DTYP_C;
1480 /*
1481 * Zero out the options for TCP Segmentation Offload
1482 */
1483 context_desc->tcp_seg_setup.data = 0;
1484 }
1485 }
1486
1487 static int
e1000g_fill_tx_desc(e1000g_tx_ring_t * tx_ring,p_tx_sw_packet_t packet,uint64_t address,size_t size)1488 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring,
1489 p_tx_sw_packet_t packet, uint64_t address, size_t size)
1490 {
1491 struct e1000_hw *hw = &tx_ring->adapter->shared;
1492 p_sw_desc_t desc;
1493
1494 if (hw->mac.type == e1000_82544) {
1495 if (hw->bus.type == e1000_bus_type_pcix)
1496 return (e1000g_tx_workaround_PCIX_82544(packet,
1497 address, size));
1498
1499 if (size > JUMBO_FRAG_LENGTH)
1500 return (e1000g_tx_workaround_jumbo_82544(packet,
1501 address, size));
1502 }
1503
1504 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1505
1506 desc = &packet->desc[packet->num_desc];
1507 desc->address = address;
1508 desc->length = (uint32_t)size;
1509
1510 packet->num_desc++;
1511
1512 return (1);
1513 }
1514
1515 static int
e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet,uint64_t address,size_t size)1516 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet,
1517 uint64_t address, size_t size)
1518 {
1519 p_sw_desc_t desc;
1520 int desc_count;
1521 long size_left;
1522 size_t len;
1523 uint32_t counter;
1524 uint32_t array_elements;
1525 desc_array_t desc_array;
1526
1527 /*
1528 * Coexist Workaround for cordova: RP: 07/04/03
1529 *
1530 * RP: ERRATA: Workaround ISSUE:
1531 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup
1532 * Eachbuffer in to 8kb pieces until the
1533 * remainder is < 8kb
1534 */
1535 size_left = size;
1536 desc_count = 0;
1537
1538 while (size_left > 0) {
1539 if (size_left > MAX_TX_BUF_SIZE)
1540 len = MAX_TX_BUF_SIZE;
1541 else
1542 len = size_left;
1543
1544 array_elements = e1000g_fill_82544_desc(address,
1545 len, &desc_array);
1546
1547 for (counter = 0; counter < array_elements; counter++) {
1548 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1549 /*
1550 * Put in the buffer address
1551 */
1552 desc = &packet->desc[packet->num_desc];
1553
1554 desc->address =
1555 desc_array.descriptor[counter].address;
1556 desc->length =
1557 desc_array.descriptor[counter].length;
1558
1559 packet->num_desc++;
1560 desc_count++;
1561 } /* for */
1562
1563 /*
1564 * Update the buffer address and length
1565 */
1566 address += MAX_TX_BUF_SIZE;
1567 size_left -= MAX_TX_BUF_SIZE;
1568 } /* while */
1569
1570 return (desc_count);
1571 }
1572
1573 static int
e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet,uint64_t address,size_t size)1574 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet,
1575 uint64_t address, size_t size)
1576 {
1577 p_sw_desc_t desc;
1578 int desc_count;
1579 long size_left;
1580 uint32_t offset;
1581
1582 /*
1583 * Workaround for Jumbo Frames on Cordova
1584 * PSD 06/01/2001
1585 */
1586 size_left = size;
1587 desc_count = 0;
1588 offset = 0;
1589 while (size_left > 0) {
1590 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1591
1592 desc = &packet->desc[packet->num_desc];
1593
1594 desc->address = address + offset;
1595
1596 if (size_left > JUMBO_FRAG_LENGTH)
1597 desc->length = JUMBO_FRAG_LENGTH;
1598 else
1599 desc->length = (uint32_t)size_left;
1600
1601 packet->num_desc++;
1602 desc_count++;
1603
1604 offset += desc->length;
1605 size_left -= JUMBO_FRAG_LENGTH;
1606 }
1607
1608 return (desc_count);
1609 }
1610
1611 #pragma inline(e1000g_82547_tx_move_tail_work)
1612
1613 static void
e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t * tx_ring)1614 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring)
1615 {
1616 struct e1000_hw *hw;
1617 uint16_t hw_tdt;
1618 uint16_t sw_tdt;
1619 struct e1000_tx_desc *tx_desc;
1620 uint16_t length = 0;
1621 boolean_t eop = B_FALSE;
1622 struct e1000g *Adapter;
1623
1624 Adapter = tx_ring->adapter;
1625 hw = &Adapter->shared;
1626
1627 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0));
1628 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first;
1629
1630 while (hw_tdt != sw_tdt) {
1631 tx_desc = &(tx_ring->tbd_first[hw_tdt]);
1632 length += tx_desc->lower.flags.length;
1633 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1634 if (++hw_tdt == Adapter->tx_desc_num)
1635 hw_tdt = 0;
1636
1637 if (eop) {
1638 if ((Adapter->link_duplex == HALF_DUPLEX) &&
1639 (e1000_fifo_workaround_82547(hw, length)
1640 != E1000_SUCCESS)) {
1641 if (tx_ring->timer_enable_82547) {
1642 ASSERT(tx_ring->timer_id_82547 == 0);
1643 tx_ring->timer_id_82547 =
1644 timeout(e1000g_82547_timeout,
1645 (void *)tx_ring,
1646 drv_usectohz(10000));
1647 }
1648 return;
1649
1650 } else {
1651 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt);
1652 e1000_update_tx_fifo_head_82547(hw, length);
1653 length = 0;
1654 }
1655 }
1656 }
1657 }
1658
1659 static void
e1000g_82547_timeout(void * arg)1660 e1000g_82547_timeout(void *arg)
1661 {
1662 e1000g_tx_ring_t *tx_ring;
1663
1664 tx_ring = (e1000g_tx_ring_t *)arg;
1665
1666 mutex_enter(&tx_ring->tx_lock);
1667
1668 tx_ring->timer_id_82547 = 0;
1669 e1000g_82547_tx_move_tail_work(tx_ring);
1670
1671 mutex_exit(&tx_ring->tx_lock);
1672 }
1673
1674 static void
e1000g_82547_tx_move_tail(e1000g_tx_ring_t * tx_ring)1675 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring)
1676 {
1677 timeout_id_t tid;
1678
1679 ASSERT(MUTEX_HELD(&tx_ring->tx_lock));
1680
1681 tid = tx_ring->timer_id_82547;
1682 tx_ring->timer_id_82547 = 0;
1683 if (tid != 0) {
1684 tx_ring->timer_enable_82547 = B_FALSE;
1685 mutex_exit(&tx_ring->tx_lock);
1686
1687 (void) untimeout(tid);
1688
1689 mutex_enter(&tx_ring->tx_lock);
1690 }
1691 tx_ring->timer_enable_82547 = B_TRUE;
1692 e1000g_82547_tx_move_tail_work(tx_ring);
1693 }
1694