1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include "bge_impl.h"
27
28
29 /*
30 * The transmit-side code uses an allocation process which is similar
31 * to some theme park roller-coaster rides, where riders sit in cars
32 * that can go individually, but work better in a train.
33 *
34 * 1) RESERVE a place - this doesn't refer to any specific car or
35 * seat, just that you will get a ride. The attempt to RESERVE a
36 * place can fail if all spaces in all cars are already committed.
37 *
38 * 2) Prepare yourself; this may take an arbitrary (but not unbounded)
39 * time, and you can back out at this stage, in which case you must
40 * give up (RENOUNCE) your place.
41 *
42 * 3) CLAIM your space - a specific car (the next sequentially
43 * numbered one) is allocated at this stage, and is guaranteed
44 * to be part of the next train to depart. Once you've done
45 * this, you can't back out, nor wait for any external event
46 * or resource.
47 *
48 * 4) Occupy your car - when all CLAIMED cars are OCCUPIED, they
49 * all depart together as a single train!
50 *
51 * 5) At the end of the ride, you climb out of the car and RENOUNCE
52 * your right to it, so that it can be recycled for another rider.
53 *
54 * For each rider, these have to occur in this order, but the riders
55 * don't have to stay in the same order at each stage. In particular,
56 * they may overtake each other between RESERVING a place and CLAIMING
57 * it, or between CLAIMING and OCCUPYING a space.
58 *
59 * Once a car is CLAIMED, the train currently being assembled can't go
60 * without that car (this guarantees that the cars in a single train
61 * make up a consecutively-numbered set). Therefore, when any train
62 * leaves, we know there can't be any riders in transit between CLAIMING
63 * and OCCUPYING their cars. There can be some who have RESERVED but
64 * not yet CLAIMED their places. That's OK, though, because they'll go
65 * into the next train.
66 */
67
68 #define BGE_DBG BGE_DBG_SEND /* debug flag for this code */
69
70 /*
71 * ========== Send-side recycle routines ==========
72 */
73
74 /*
75 * Recycle all the completed buffers in the specified send ring up to
76 * (but not including) the consumer index in the status block.
77 *
78 * This function must advance (srp->tc_next) AND adjust (srp->tx_free)
79 * to account for the packets it has recycled.
80 *
81 * This is a trivial version that just does that and nothing more, but
82 * it suffices while there's only one method for sending messages (by
83 * copying) and that method doesn't need any special per-buffer action
84 * for recycling.
85 */
86 static boolean_t bge_recycle_ring(bge_t *bgep, send_ring_t *srp);
87 #pragma inline(bge_recycle_ring)
88
89 static boolean_t
bge_recycle_ring(bge_t * bgep,send_ring_t * srp)90 bge_recycle_ring(bge_t *bgep, send_ring_t *srp)
91 {
92 sw_sbd_t *ssbdp;
93 bge_queue_item_t *buf_item;
94 bge_queue_item_t *buf_item_head;
95 bge_queue_item_t *buf_item_tail;
96 bge_queue_t *txbuf_queue;
97 uint64_t slot;
98 uint64_t n;
99
100 ASSERT(mutex_owned(srp->tc_lock));
101
102 /*
103 * We're about to release one or more places :-)
104 * These ASSERTions check that our invariants still hold:
105 * there must always be at least one free place
106 * at this point, there must be at least one place NOT free
107 * we're not about to free more places than were claimed!
108 */
109 ASSERT(srp->tx_free <= srp->desc.nslots);
110
111 buf_item_head = buf_item_tail = NULL;
112 for (n = 0, slot = srp->tc_next; slot != *srp->cons_index_p;
113 slot = NEXT(slot, srp->desc.nslots)) {
114 ssbdp = &srp->sw_sbds[slot];
115 ASSERT(ssbdp->pbuf != NULL);
116 buf_item = ssbdp->pbuf;
117 if (buf_item_head == NULL)
118 buf_item_head = buf_item_tail = buf_item;
119 else {
120 buf_item_tail->next = buf_item;
121 buf_item_tail = buf_item;
122 }
123 ssbdp->pbuf = NULL;
124 n++;
125 }
126 if (n == 0)
127 return (B_FALSE);
128
129 /*
130 * Reset the watchdog count: to 0 if all buffers are
131 * now free, or to 1 if some are still outstanding.
132 * Note: non-synchonised access here means we may get
133 * the "wrong" answer, but only in a harmless fashion
134 * (i.e. we deactivate the watchdog because all buffers
135 * are apparently free, even though another thread may
136 * have claimed one before we leave here; in this case
137 * the watchdog will restart on the next send() call).
138 */
139 bgep->watchdog = (slot == srp->tx_next) ? 0 : 1;
140
141 /*
142 * Update recycle index and free tx BD number
143 */
144 srp->tc_next = slot;
145 ASSERT(srp->tx_free + n <= srp->desc.nslots);
146 bge_atomic_renounce(&srp->tx_free, n);
147
148 /*
149 * Return tx buffers to buffer push queue
150 */
151 txbuf_queue = srp->txbuf_push_queue;
152 mutex_enter(txbuf_queue->lock);
153 buf_item_tail->next = txbuf_queue->head;
154 txbuf_queue->head = buf_item_head;
155 txbuf_queue->count += n;
156 mutex_exit(txbuf_queue->lock);
157
158 /*
159 * Check if we need exchange the tx buffer push and pop queue
160 */
161 if ((srp->txbuf_pop_queue->count < srp->tx_buffers_low) &&
162 (srp->txbuf_pop_queue->count < txbuf_queue->count)) {
163 srp->txbuf_push_queue = srp->txbuf_pop_queue;
164 srp->txbuf_pop_queue = txbuf_queue;
165 }
166
167 if (srp->tx_flow != 0 || bgep->tx_resched_needed)
168 ddi_trigger_softintr(bgep->drain_id);
169
170 return (B_TRUE);
171 }
172
173 /*
174 * Recycle all returned slots in all rings.
175 *
176 * To give priority to low-numbered rings, whenever we have recycled any
177 * slots in any ring except 0, we restart scanning again from ring 0.
178 * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the
179 * pattern of recycles might go 0, 3, 10, 3, 0, 10, 0:
180 *
181 * 0 found some - recycle them
182 * 1..2 none found
183 * 3 found some - recycle them and restart scan
184 * 0..9 none found
185 * 10 found some - recycle them and restart scan
186 * 0..2 none found
187 * 3 found some more - recycle them and restart scan
188 * 0 found some more - recycle them
189 * 0..9 none found
190 * 10 found some more - recycle them and restart scan
191 * 0 found some more - recycle them
192 * 1..15 none found
193 *
194 * The routine returns only when a complete scan has been performed
195 * without finding any slots to recycle.
196 *
197 * Note: the expression (BGE_SEND_RINGS_USED > 1) yields a compile-time
198 * constant and allows the compiler to optimise away the outer do-loop
199 * if only one send ring is being used.
200 */
201 boolean_t bge_recycle(bge_t *bgep, bge_status_t *bsp);
202 #pragma no_inline(bge_recycle)
203
204 boolean_t
bge_recycle(bge_t * bgep,bge_status_t * bsp)205 bge_recycle(bge_t *bgep, bge_status_t *bsp)
206 {
207 send_ring_t *srp;
208 uint64_t ring;
209 uint64_t tx_rings = bgep->chipid.tx_rings;
210 boolean_t tx_done = B_FALSE;
211
212 restart:
213 ring = 0;
214 srp = &bgep->send[ring];
215 do {
216 /*
217 * For each ring, (srp->cons_index_p) points to the
218 * proper index within the status block (which has
219 * already been sync'd by the caller).
220 */
221 ASSERT(srp->cons_index_p == SEND_INDEX_P(bsp, ring));
222
223 if (*srp->cons_index_p == srp->tc_next)
224 continue; /* no slots to recycle */
225 if (mutex_tryenter(srp->tc_lock) == 0)
226 continue; /* already in process */
227 tx_done |= bge_recycle_ring(bgep, srp);
228 mutex_exit(srp->tc_lock);
229
230 /*
231 * Restart from ring 0, if we're not on ring 0 already.
232 * As H/W selects send BDs totally based on priority and
233 * available BDs on the higher priority ring are always
234 * selected first, driver should keep consistence with H/W
235 * and gives lower-numbered ring with higher priority.
236 */
237 if (tx_rings > 1 && ring > 0)
238 goto restart;
239
240 /*
241 * Loop over all rings (if there *are* multiple rings)
242 */
243 } while (++srp, ++ring < tx_rings);
244
245 return (tx_done);
246 }
247
248
249 /*
250 * ========== Send-side transmit routines ==========
251 */
252 #define TCP_CKSUM_OFFSET 16
253 #define UDP_CKSUM_OFFSET 6
254
255 static void
bge_pseudo_cksum(uint8_t * buf)256 bge_pseudo_cksum(uint8_t *buf)
257 {
258 uint32_t cksum;
259 uint16_t iphl;
260 uint16_t proto;
261
262 /*
263 * Point it to the ip header.
264 */
265 buf += sizeof (struct ether_header);
266
267 /*
268 * Calculate the pseudo-header checksum.
269 */
270 iphl = 4 * (buf[0] & 0xF);
271 cksum = (((uint16_t)buf[2])<<8) + buf[3] - iphl;
272 cksum += proto = buf[9];
273 cksum += (((uint16_t)buf[12])<<8) + buf[13];
274 cksum += (((uint16_t)buf[14])<<8) + buf[15];
275 cksum += (((uint16_t)buf[16])<<8) + buf[17];
276 cksum += (((uint16_t)buf[18])<<8) + buf[19];
277 cksum = (cksum>>16) + (cksum & 0xFFFF);
278 cksum = (cksum>>16) + (cksum & 0xFFFF);
279
280 /*
281 * Point it to the TCP/UDP header, and
282 * update the checksum field.
283 */
284 buf += iphl + ((proto == IPPROTO_TCP) ?
285 TCP_CKSUM_OFFSET : UDP_CKSUM_OFFSET);
286
287 /*
288 * A real possibility that pointer cast is a problem.
289 * Should be fixed when we know the code better.
290 * E_BAD_PTR_CAST_ALIGN is added to make it temporarily clean.
291 */
292 *(uint16_t *)buf = htons((uint16_t)cksum);
293 }
294
295 static bge_queue_item_t *
bge_get_txbuf(bge_t * bgep,send_ring_t * srp)296 bge_get_txbuf(bge_t *bgep, send_ring_t *srp)
297 {
298 bge_queue_item_t *txbuf_item;
299 bge_queue_t *txbuf_queue;
300
301 txbuf_queue = srp->txbuf_pop_queue;
302 mutex_enter(txbuf_queue->lock);
303 if (txbuf_queue->count == 0) {
304 mutex_exit(txbuf_queue->lock);
305 txbuf_queue = srp->txbuf_push_queue;
306 mutex_enter(txbuf_queue->lock);
307 if (txbuf_queue->count == 0) {
308 mutex_exit(txbuf_queue->lock);
309 /* Try to allocate more tx buffers */
310 if (srp->tx_array < srp->tx_array_max) {
311 mutex_enter(srp->tx_lock);
312 txbuf_item = bge_alloc_txbuf_array(bgep, srp);
313 mutex_exit(srp->tx_lock);
314 } else
315 txbuf_item = NULL;
316 return (txbuf_item);
317 }
318 }
319 txbuf_item = txbuf_queue->head;
320 txbuf_queue->head = (bge_queue_item_t *)txbuf_item->next;
321 txbuf_queue->count--;
322 mutex_exit(txbuf_queue->lock);
323 txbuf_item->next = NULL;
324
325 return (txbuf_item);
326 }
327
328 /*
329 * Send a message by copying it into a preallocated (and premapped) buffer
330 */
331 static void bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp);
332 #pragma inline(bge_send_copy)
333
334 static void
bge_send_copy(bge_t * bgep,sw_txbuf_t * txbuf,mblk_t * mp)335 bge_send_copy(bge_t *bgep, sw_txbuf_t *txbuf, mblk_t *mp)
336 {
337 mblk_t *bp;
338 uint32_t mblen;
339 char *pbuf;
340
341 txbuf->copy_len = 0;
342 pbuf = DMA_VPTR(txbuf->buf);
343 for (bp = mp; bp != NULL; bp = bp->b_cont) {
344 if ((mblen = MBLKL(bp)) == 0)
345 continue;
346 ASSERT(txbuf->copy_len + mblen <=
347 bgep->chipid.snd_buff_size);
348 bcopy(bp->b_rptr, pbuf, mblen);
349 pbuf += mblen;
350 txbuf->copy_len += mblen;
351 }
352 }
353
354 /*
355 * Fill the Tx buffer descriptors and trigger the h/w transmission
356 */
357 static void
bge_send_serial(bge_t * bgep,send_ring_t * srp)358 bge_send_serial(bge_t *bgep, send_ring_t *srp)
359 {
360 send_pkt_t *pktp;
361 uint64_t txfill_next;
362 uint32_t count;
363 uint32_t tx_next;
364 sw_sbd_t *ssbdp;
365 bge_status_t *bsp;
366 bge_sbd_t *hw_sbd_p;
367 bge_queue_item_t *txbuf_item;
368 sw_txbuf_t *txbuf;
369
370 /*
371 * Try to hold the tx lock:
372 * If we are in an interrupt context, use mutex_enter() to
373 * ensure quick response for tx in interrupt context;
374 * Otherwise, use mutex_tryenter() to serialize this h/w tx
375 * BD filling and transmission triggering task.
376 */
377 if (servicing_interrupt() != 0)
378 mutex_enter(srp->tx_lock);
379 else if (mutex_tryenter(srp->tx_lock) == 0)
380 return; /* already in process */
381
382 bsp = DMA_VPTR(bgep->status_block);
383 txfill_next = srp->txfill_next;
384 tx_next = srp->tx_next;
385 start_tx:
386 for (count = 0; count < bgep->param_drain_max; ++count) {
387 pktp = &srp->pktp[txfill_next];
388 if (!pktp->tx_ready) {
389 if (count == 0)
390 srp->tx_block++;
391 break;
392 }
393
394 /*
395 * If there are no enough BDs: try to recycle more
396 */
397 if (srp->tx_free <= 1)
398 (void) bge_recycle(bgep, bsp);
399
400 /*
401 * Reserved required BDs: 1 is enough
402 */
403 if (!bge_atomic_reserve(&srp->tx_free, 1)) {
404 srp->tx_nobd++;
405 break;
406 }
407
408 /*
409 * Filling the tx BD
410 */
411
412 /*
413 * Go straight to claiming our already-reserved places
414 * on the train!
415 */
416 ASSERT(pktp->txbuf_item != NULL);
417 txbuf_item = pktp->txbuf_item;
418 pktp->txbuf_item = NULL;
419 pktp->tx_ready = B_FALSE;
420
421 txbuf = txbuf_item->item;
422 ASSERT(txbuf->copy_len != 0);
423 (void) ddi_dma_sync(txbuf->buf.dma_hdl, 0,
424 txbuf->copy_len, DDI_DMA_SYNC_FORDEV);
425
426 ssbdp = &srp->sw_sbds[tx_next];
427 ASSERT(ssbdp->pbuf == NULL);
428 ssbdp->pbuf = txbuf_item;
429
430 /*
431 * Setting hardware send buffer descriptor
432 */
433 hw_sbd_p = DMA_VPTR(ssbdp->desc);
434 hw_sbd_p->flags = 0;
435 hw_sbd_p->host_buf_addr = txbuf->buf.cookie.dmac_laddress;
436 hw_sbd_p->len = txbuf->copy_len;
437 if (pktp->vlan_tci != 0) {
438 hw_sbd_p->vlan_tci = pktp->vlan_tci;
439 hw_sbd_p->host_buf_addr += VLAN_TAGSZ;
440 hw_sbd_p->flags |= SBD_FLAG_VLAN_TAG;
441 }
442 if (pktp->pflags & HCK_IPV4_HDRCKSUM)
443 hw_sbd_p->flags |= SBD_FLAG_IP_CKSUM;
444 if (pktp->pflags & HCK_FULLCKSUM)
445 hw_sbd_p->flags |= SBD_FLAG_TCP_UDP_CKSUM;
446 hw_sbd_p->flags |= SBD_FLAG_PACKET_END;
447
448 txfill_next = NEXT(txfill_next, BGE_SEND_BUF_MAX);
449 tx_next = NEXT(tx_next, srp->desc.nslots);
450 }
451
452 /*
453 * Trigger h/w to start transmission.
454 */
455 if (count != 0) {
456 bge_atomic_sub64(&srp->tx_flow, count);
457 srp->txfill_next = txfill_next;
458
459 if (srp->tx_next > tx_next) {
460 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0,
461 (srp->desc.nslots - srp->tx_next) *
462 sizeof (bge_sbd_t),
463 DDI_DMA_SYNC_FORDEV);
464 count -= srp->desc.nslots - srp->tx_next;
465 ssbdp = &srp->sw_sbds[0];
466 }
467 (void) ddi_dma_sync(ssbdp->desc.dma_hdl, 0,
468 count*sizeof (bge_sbd_t), DDI_DMA_SYNC_FORDEV);
469 bge_mbx_put(bgep, srp->chip_mbx_reg, tx_next);
470 srp->tx_next = tx_next;
471 atomic_or_32(&bgep->watchdog, 1);
472
473 if (srp->tx_flow != 0 && srp->tx_free > 1)
474 goto start_tx;
475 }
476
477 mutex_exit(srp->tx_lock);
478 }
479
480 mblk_t *
bge_ring_tx(void * arg,mblk_t * mp)481 bge_ring_tx(void *arg, mblk_t *mp)
482 {
483 send_ring_t *srp = arg;
484 bge_t *bgep = srp->bgep;
485 struct ether_vlan_header *ehp;
486 bge_queue_item_t *txbuf_item;
487 sw_txbuf_t *txbuf;
488 send_pkt_t *pktp;
489 uint64_t pkt_slot;
490 uint16_t vlan_tci;
491 uint32_t pflags;
492 char *pbuf;
493
494 ASSERT(mp->b_next == NULL);
495
496 /*
497 * Get a s/w tx buffer first
498 */
499 txbuf_item = bge_get_txbuf(bgep, srp);
500 if (txbuf_item == NULL) {
501 /* no tx buffer available */
502 srp->tx_nobuf++;
503 bgep->tx_resched_needed = B_TRUE;
504 bge_send_serial(bgep, srp);
505 return (mp);
506 }
507
508 /*
509 * Copy all mp fragments to the pkt buffer
510 */
511 txbuf = txbuf_item->item;
512 bge_send_copy(bgep, txbuf, mp);
513
514 /*
515 * Determine if the packet is VLAN tagged.
516 */
517 ASSERT(txbuf->copy_len >= sizeof (struct ether_header));
518 pbuf = DMA_VPTR(txbuf->buf);
519
520 ehp = (void *)pbuf;
521 if (ehp->ether_tpid == htons(ETHERTYPE_VLAN)) {
522 /* Strip the vlan tag */
523 vlan_tci = ntohs(ehp->ether_tci);
524 pbuf = memmove(pbuf + VLAN_TAGSZ, pbuf, 2 * ETHERADDRL);
525 txbuf->copy_len -= VLAN_TAGSZ;
526 } else
527 vlan_tci = 0;
528
529 /*
530 * Retrieve checksum offloading info.
531 */
532 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &pflags);
533
534 /*
535 * Calculate pseudo checksum if needed.
536 */
537 if ((pflags & HCK_FULLCKSUM) &&
538 (bgep->chipid.flags & CHIP_FLAG_PARTIAL_CSUM))
539 bge_pseudo_cksum((uint8_t *)pbuf);
540
541 /*
542 * Packet buffer is ready to send: get and fill pkt info
543 */
544 pkt_slot = bge_atomic_next(&srp->txpkt_next, BGE_SEND_BUF_MAX);
545 pktp = &srp->pktp[pkt_slot];
546 ASSERT(pktp->txbuf_item == NULL);
547 pktp->txbuf_item = txbuf_item;
548 pktp->vlan_tci = vlan_tci;
549 pktp->pflags = pflags;
550 atomic_inc_64(&srp->tx_flow);
551 ASSERT(pktp->tx_ready == B_FALSE);
552 pktp->tx_ready = B_TRUE;
553
554 /*
555 * Filling the h/w bd and trigger the h/w to start transmission
556 */
557 bge_send_serial(bgep, srp);
558
559 srp->pushed_bytes += MBLKL(mp);
560
561 /*
562 * We've copied the contents, the message can be freed right away
563 */
564 freemsg(mp);
565 return (NULL);
566 }
567
568 static mblk_t *
bge_send(bge_t * bgep,mblk_t * mp)569 bge_send(bge_t *bgep, mblk_t *mp)
570 {
571 send_ring_t *ring;
572
573 ring = &bgep->send[0]; /* ring 0 */
574
575 return (bge_ring_tx(ring, mp));
576 }
577
578 uint_t
bge_send_drain(caddr_t arg)579 bge_send_drain(caddr_t arg)
580 {
581 uint_t ring = 0; /* use ring 0 */
582 bge_t *bgep;
583 send_ring_t *srp;
584
585 bgep = (void *)arg;
586 BGE_TRACE(("bge_send_drain($%p)", (void *)bgep));
587
588 srp = &bgep->send[ring];
589 bge_send_serial(bgep, srp);
590
591 if (bgep->tx_resched_needed &&
592 (srp->tx_flow < srp->tx_buffers_low) &&
593 (bgep->bge_mac_state == BGE_MAC_STARTED)) {
594 mac_tx_update(bgep->mh);
595 bgep->tx_resched_needed = B_FALSE;
596 bgep->tx_resched++;
597 }
598
599 return (DDI_INTR_CLAIMED);
600 }
601
602 /*
603 * bge_m_tx() - send a chain of packets
604 */
605 mblk_t *
bge_m_tx(void * arg,mblk_t * mp)606 bge_m_tx(void *arg, mblk_t *mp)
607 {
608 bge_t *bgep = arg; /* private device info */
609 mblk_t *next;
610
611 BGE_TRACE(("bge_m_tx($%p, $%p)", arg, (void *)mp));
612
613 ASSERT(mp != NULL);
614 ASSERT(bgep->bge_mac_state == BGE_MAC_STARTED);
615
616 rw_enter(bgep->errlock, RW_READER);
617 if ((bgep->bge_chip_state != BGE_CHIP_RUNNING) ||
618 !(bgep->param_link_up)) {
619 BGE_DEBUG(("bge_m_tx: chip not running or link down"));
620 freemsgchain(mp);
621 mp = NULL;
622 }
623
624 while (mp != NULL) {
625 next = mp->b_next;
626 mp->b_next = NULL;
627
628 if ((mp = bge_send(bgep, mp)) != NULL) {
629 mp->b_next = next;
630 break;
631 }
632
633 mp = next;
634 }
635 rw_exit(bgep->errlock);
636
637 return (mp);
638 }
639