1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
29 * Use is subject to license terms.
30 */
31
32 #ifndef lint
33 static const char __idstring[] =
34 "@(#)$Id: myri10ge.c,v 1.186 2009-06-29 13:47:22 gallatin Exp $";
35 #endif
36
37 #define MXGEFW_NDIS
38 #include "myri10ge_var.h"
39 #include "rss_eth_z8e.h"
40 #include "rss_ethp_z8e.h"
41 #include "mcp_gen_header.h"
42
43 #define MYRI10GE_MAX_ETHER_MTU 9014
44
45 #define MYRI10GE_ETH_STOPPED 0
46 #define MYRI10GE_ETH_STOPPING 1
47 #define MYRI10GE_ETH_STARTING 2
48 #define MYRI10GE_ETH_RUNNING 3
49 #define MYRI10GE_ETH_OPEN_FAILED 4
50 #define MYRI10GE_ETH_SUSPENDED_RUNNING 5
51
52 static int myri10ge_small_bytes = 510;
53 static int myri10ge_intr_coal_delay = 125;
54 static int myri10ge_flow_control = 1;
55 #if #cpu(i386) || defined __i386 || defined i386 || \
56 defined __i386__ || #cpu(x86_64) || defined __x86_64__
57 static int myri10ge_nvidia_ecrc_enable = 1;
58 #endif
59 static int myri10ge_mtu_override = 0;
60 static int myri10ge_tx_copylen = 512;
61 static int myri10ge_deassert_wait = 1;
62 static int myri10ge_verbose = 0;
63 static int myri10ge_watchdog_reset = 0;
64 static int myri10ge_use_msix = 1;
65 static int myri10ge_max_slices = -1;
66 static int myri10ge_use_msi = 1;
67 int myri10ge_force_firmware = 0;
68 static boolean_t myri10ge_use_lso = B_TRUE;
69 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
70 static int myri10ge_tx_hash = 1;
71 static int myri10ge_lro = 0;
72 static int myri10ge_lro_cnt = 8;
73 int myri10ge_lro_max_aggr = 2;
74 static int myri10ge_lso_copy = 0;
75 static mblk_t *myri10ge_send_wrapper(void *arg, mblk_t *mp);
76 int myri10ge_tx_handles_initial = 128;
77
78 static kmutex_t myri10ge_param_lock;
79 static void* myri10ge_db_lastfree;
80
81 static int myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
82 static int myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
83 static int myri10ge_quiesce(dev_info_t *dip);
84
85 DDI_DEFINE_STREAM_OPS(myri10ge_ops, nulldev, nulldev, myri10ge_attach,
86 myri10ge_detach, nodev, NULL, D_MP, NULL, myri10ge_quiesce);
87
88
89 static struct modldrv modldrv = {
90 &mod_driverops,
91 "Myricom 10G driver (10GbE)",
92 &myri10ge_ops,
93 };
94
95
96 static struct modlinkage modlinkage = {
97 MODREV_1,
98 {&modldrv, NULL},
99 };
100
101 unsigned char myri10ge_broadcastaddr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
102
103 static ddi_dma_attr_t myri10ge_misc_dma_attr = {
104 DMA_ATTR_V0, /* version number. */
105 (uint64_t)0, /* low address */
106 (uint64_t)0xffffffffffffffffULL, /* high address */
107 (uint64_t)0x7ffffff, /* address counter max */
108 (uint64_t)4096, /* alignment */
109 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
110 (uint32_t)0x1, /* minimum transfer size */
111 (uint64_t)0x7fffffff, /* maximum transfer size */
112 (uint64_t)0x7fffffff, /* maximum segment size */
113 1, /* scatter/gather list length */
114 1, /* granularity */
115 0 /* attribute flags */
116 };
117
118 /*
119 * The Myri10GE NIC has the following constraints on receive buffers:
120 * 1) Buffers which cross a 4KB boundary must be aligned to 4KB
121 * 2) Buffers which are not aligned to 4KB must not cross a 4KB boundary
122 */
123
124 static ddi_dma_attr_t myri10ge_rx_jumbo_dma_attr = {
125 DMA_ATTR_V0, /* version number. */
126 (uint64_t)0, /* low address */
127 (uint64_t)0xffffffffffffffffULL, /* high address */
128 (uint64_t)0x7ffffff, /* address counter max */
129 (uint64_t)4096, /* alignment */
130 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
131 (uint32_t)0x1, /* minimum transfer size */
132 (uint64_t)0x7fffffff, /* maximum transfer size */
133 UINT64_MAX, /* maximum segment size */
134 1, /* scatter/gather list length */
135 1, /* granularity */
136 0 /* attribute flags */
137 };
138
139 static ddi_dma_attr_t myri10ge_rx_std_dma_attr = {
140 DMA_ATTR_V0, /* version number. */
141 (uint64_t)0, /* low address */
142 (uint64_t)0xffffffffffffffffULL, /* high address */
143 (uint64_t)0x7ffffff, /* address counter max */
144 #if defined sparc64 || defined __sparcv9
145 (uint64_t)4096, /* alignment */
146 #else
147 (uint64_t)0x80, /* alignment */
148 #endif
149 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
150 (uint32_t)0x1, /* minimum transfer size */
151 (uint64_t)0x7fffffff, /* maximum transfer size */
152 #if defined sparc64 || defined __sparcv9
153 UINT64_MAX, /* maximum segment size */
154 #else
155 (uint64_t)0xfff, /* maximum segment size */
156 #endif
157 1, /* scatter/gather list length */
158 1, /* granularity */
159 0 /* attribute flags */
160 };
161
162 static ddi_dma_attr_t myri10ge_tx_dma_attr = {
163 DMA_ATTR_V0, /* version number. */
164 (uint64_t)0, /* low address */
165 (uint64_t)0xffffffffffffffffULL, /* high address */
166 (uint64_t)0x7ffffff, /* address counter max */
167 (uint64_t)1, /* alignment */
168 (uint_t)0x7f, /* burstsizes for 32b and 64b xfers */
169 (uint32_t)0x1, /* minimum transfer size */
170 (uint64_t)0x7fffffff, /* maximum transfer size */
171 UINT64_MAX, /* maximum segment size */
172 INT32_MAX, /* scatter/gather list length */
173 1, /* granularity */
174 0 /* attribute flags */
175 };
176
177 #if defined sparc64 || defined __sparcv9
178 #define WC 0
179 #else
180 #define WC 1
181 #endif
182
183 struct ddi_device_acc_attr myri10ge_dev_access_attr = {
184 DDI_DEVICE_ATTR_V0, /* version */
185 DDI_NEVERSWAP_ACC, /* endian flash */
186 #if WC
187 DDI_MERGING_OK_ACC /* data order */
188 #else
189 DDI_STRICTORDER_ACC
190 #endif
191 };
192
193 static void myri10ge_watchdog(void *arg);
194
195 #ifdef MYRICOM_PRIV
196 int myri10ge_mtu = MYRI10GE_MAX_ETHER_MTU + MXGEFW_PAD + VLAN_TAGSZ;
197 #else
198 int myri10ge_mtu = ETHERMAX + MXGEFW_PAD + VLAN_TAGSZ;
199 #endif
200 int myri10ge_bigbufs_initial = 1024;
201 int myri10ge_bigbufs_max = 4096;
202
203
204 caddr_t
myri10ge_dma_alloc(dev_info_t * dip,size_t len,ddi_dma_attr_t * attr,ddi_device_acc_attr_t * accattr,uint_t alloc_flags,int bind_flags,struct myri10ge_dma_stuff * dma,int warn,int (* wait)(caddr_t))205 myri10ge_dma_alloc(dev_info_t *dip, size_t len,
206 ddi_dma_attr_t *attr, ddi_device_acc_attr_t *accattr,
207 uint_t alloc_flags, int bind_flags, struct myri10ge_dma_stuff *dma,
208 int warn, int (*wait)(caddr_t))
209 {
210 caddr_t kaddr;
211 size_t real_length;
212 ddi_dma_cookie_t cookie;
213 uint_t count;
214 int err;
215
216 err = ddi_dma_alloc_handle(dip, attr, wait,
217 NULL, &dma->handle);
218 if (err != DDI_SUCCESS) {
219 if (warn)
220 cmn_err(CE_WARN,
221 "myri10ge: ddi_dma_alloc_handle failed\n");
222 goto abort_with_nothing;
223 }
224
225 err = ddi_dma_mem_alloc(dma->handle, len, accattr, alloc_flags,
226 wait, NULL, &kaddr, &real_length,
227 &dma->acc_handle);
228 if (err != DDI_SUCCESS) {
229 if (warn)
230 cmn_err(CE_WARN,
231 "myri10ge: ddi_dma_mem_alloc failed\n");
232 goto abort_with_handle;
233 }
234
235 err = ddi_dma_addr_bind_handle(dma->handle, NULL, kaddr, len,
236 bind_flags, wait, NULL, &cookie, &count);
237
238 if (err != DDI_SUCCESS) {
239 if (warn)
240 cmn_err(CE_WARN,
241 "myri10ge: ddi_dma_addr_bind_handle failed\n");
242 goto abort_with_mem;
243 }
244
245 if (count != 1) {
246 if (warn)
247 cmn_err(CE_WARN,
248 "myri10ge: got too many dma segments ");
249 goto abort_with_bind;
250 }
251 dma->low = htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
252 dma->high = htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
253 return (kaddr);
254
255 abort_with_bind:
256 (void) ddi_dma_unbind_handle(dma->handle);
257
258 abort_with_mem:
259 ddi_dma_mem_free(&dma->acc_handle);
260
261 abort_with_handle:
262 ddi_dma_free_handle(&dma->handle);
263 abort_with_nothing:
264 if (warn) {
265 cmn_err(CE_WARN, "myri10ge: myri10ge_dma_alloc failed.\n ");
266 cmn_err(CE_WARN, "args: dip=%p len=0x%lx ddi_dma_attr=%p\n",
267 (void*) dip, len, (void*) attr);
268 cmn_err(CE_WARN,
269 "args: ddi_device_acc_attr=%p alloc_flags=0x%x\n",
270 (void*) accattr, alloc_flags);
271 cmn_err(CE_WARN, "args: bind_flags=0x%x dmastuff=%p",
272 bind_flags, (void*) dma);
273 }
274 return (NULL);
275
276 }
277
278 void
myri10ge_dma_free(struct myri10ge_dma_stuff * dma)279 myri10ge_dma_free(struct myri10ge_dma_stuff *dma)
280 {
281 (void) ddi_dma_unbind_handle(dma->handle);
282 ddi_dma_mem_free(&dma->acc_handle);
283 ddi_dma_free_handle(&dma->handle);
284 }
285
286 static inline void
myri10ge_pio_copy32(void * to,uint32_t * from32,size_t size)287 myri10ge_pio_copy32(void *to, uint32_t *from32, size_t size)
288 {
289 register volatile uint32_t *to32;
290 size_t i;
291
292 to32 = (volatile uint32_t *) to;
293 for (i = (size / 4); i; i--) {
294 *to32 = *from32;
295 to32++;
296 from32++;
297 }
298 }
299
300 #if defined(_LP64)
301 static inline void
myri10ge_pio_copy64(void * to,uint64_t * from64,size_t size)302 myri10ge_pio_copy64(void *to, uint64_t *from64, size_t size)
303 {
304 register volatile uint64_t *to64;
305 size_t i;
306
307 to64 = (volatile uint64_t *) to;
308 for (i = (size / 8); i; i--) {
309 *to64 = *from64;
310 to64++;
311 from64++;
312 }
313 }
314 #endif
315
316 /*
317 * This routine copies memory from the host to the NIC.
318 * The "size" argument must always be a multiple of
319 * the size of long (4 or 8 bytes), and to/from must also
320 * be naturally aligned.
321 */
322 static inline void
myri10ge_pio_copy(void * to,void * from,size_t size)323 myri10ge_pio_copy(void *to, void *from, size_t size)
324 {
325 #if !defined(_LP64)
326 ASSERT((size % 4) == 0);
327 myri10ge_pio_copy32(to, (uint32_t *)from, size);
328 #else
329 ASSERT((size % 8) == 0);
330 myri10ge_pio_copy64(to, (uint64_t *)from, size);
331 #endif
332 }
333
334
335 /*
336 * Due to various bugs in Solaris (especially bug 6186772 where the
337 * TCP/UDP checksum is calculated incorrectly on mblk chains with more
338 * than two elements), and the design bug where hardware checksums are
339 * ignored on mblk chains with more than 2 elements, we need to
340 * allocate private pool of physically contiguous receive buffers.
341 */
342
343 static void
myri10ge_jpool_init(struct myri10ge_slice_state * ss)344 myri10ge_jpool_init(struct myri10ge_slice_state *ss)
345 {
346 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
347
348 bzero(jpool, sizeof (*jpool));
349 mutex_init(&jpool->mtx, NULL, MUTEX_DRIVER,
350 ss->mgp->icookie);
351 jpool->head = NULL;
352 }
353
354 static void
myri10ge_jpool_fini(struct myri10ge_slice_state * ss)355 myri10ge_jpool_fini(struct myri10ge_slice_state *ss)
356 {
357 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
358
359 if (jpool->head != NULL) {
360 cmn_err(CE_WARN,
361 "%s: BUG! myri10ge_jpool_fini called on non-empty pool\n",
362 ss->mgp->name);
363 }
364 mutex_destroy(&jpool->mtx);
365 }
366
367
368 /*
369 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
370 * at most 32 bytes at a time, so as to avoid involving the software
371 * pio handler in the nic. We re-write the first segment's low
372 * DMA address to mark it valid only after we write the entire chunk
373 * in a burst
374 */
375 static inline void
myri10ge_submit_8rx(mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)376 myri10ge_submit_8rx(mcp_kreq_ether_recv_t *dst, mcp_kreq_ether_recv_t *src)
377 {
378 src->addr_low |= BE_32(1);
379 myri10ge_pio_copy(dst, src, 4 * sizeof (*src));
380 mb();
381 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
382 mb();
383 src->addr_low &= ~(BE_32(1));
384 dst->addr_low = src->addr_low;
385 mb();
386 }
387
388 static void
myri10ge_pull_jpool(struct myri10ge_slice_state * ss)389 myri10ge_pull_jpool(struct myri10ge_slice_state *ss)
390 {
391 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
392 struct myri10ge_jpool_entry *jtail, *j, *jfree;
393 volatile uintptr_t *putp;
394 uintptr_t put;
395 int i;
396
397 /* find tail */
398 jtail = NULL;
399 if (jpool->head != NULL) {
400 j = jpool->head;
401 while (j->next != NULL)
402 j = j->next;
403 jtail = j;
404 }
405
406 /*
407 * iterate over all per-CPU caches, and add contents into
408 * jpool
409 */
410 for (i = 0; i < MYRI10GE_MAX_CPUS; i++) {
411 /* take per-CPU free list */
412 putp = (void *)&jpool->cpu[i & MYRI10GE_MAX_CPU_MASK].head;
413 if (*putp == NULL)
414 continue;
415 put = atomic_swap_ulong(putp, 0);
416 jfree = (struct myri10ge_jpool_entry *)put;
417
418 /* append to pool */
419 if (jtail == NULL) {
420 jpool->head = jfree;
421 } else {
422 jtail->next = jfree;
423 }
424 j = jfree;
425 while (j->next != NULL)
426 j = j->next;
427 jtail = j;
428 }
429 }
430
431 /*
432 * Transfers buffers from the free pool to the nic
433 * Must be called holding the jpool mutex.
434 */
435
436 static inline void
myri10ge_restock_jumbos(struct myri10ge_slice_state * ss)437 myri10ge_restock_jumbos(struct myri10ge_slice_state *ss)
438 {
439 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
440 struct myri10ge_jpool_entry *j;
441 myri10ge_rx_ring_t *rx;
442 int i, idx, limit;
443
444 rx = &ss->rx_big;
445 limit = ss->j_rx_cnt + (rx->mask + 1);
446
447 for (i = rx->cnt; i != limit; i++) {
448 idx = i & (rx->mask);
449 j = jpool->head;
450 if (j == NULL) {
451 myri10ge_pull_jpool(ss);
452 j = jpool->head;
453 if (j == NULL) {
454 break;
455 }
456 }
457 jpool->head = j->next;
458 rx->info[idx].j = j;
459 rx->shadow[idx].addr_low = j->dma.low;
460 rx->shadow[idx].addr_high = j->dma.high;
461 /* copy 4 descriptors (32-bytes) to the mcp at a time */
462 if ((idx & 7) == 7) {
463 myri10ge_submit_8rx(&rx->lanai[idx - 7],
464 &rx->shadow[idx - 7]);
465 }
466 }
467 rx->cnt = i;
468 }
469
470 /*
471 * Transfer buffers from the nic to the free pool.
472 * Should be called holding the jpool mutex
473 */
474
475 static inline void
myri10ge_unstock_jumbos(struct myri10ge_slice_state * ss)476 myri10ge_unstock_jumbos(struct myri10ge_slice_state *ss)
477 {
478 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
479 struct myri10ge_jpool_entry *j;
480 myri10ge_rx_ring_t *rx;
481 int i;
482
483 mutex_enter(&jpool->mtx);
484 rx = &ss->rx_big;
485
486 for (i = 0; i < rx->mask + 1; i++) {
487 j = rx->info[i].j;
488 rx->info[i].j = NULL;
489 if (j == NULL)
490 continue;
491 j->next = jpool->head;
492 jpool->head = j;
493 }
494 mutex_exit(&jpool->mtx);
495
496 }
497
498
499 /*
500 * Free routine which is called when the mblk allocated via
501 * esballoc() is freed. Here we return the jumbo buffer
502 * to the free pool, and possibly pass some jumbo buffers
503 * to the nic
504 */
505
506 static void
myri10ge_jfree_rtn(void * arg)507 myri10ge_jfree_rtn(void *arg)
508 {
509 struct myri10ge_jpool_entry *j = (struct myri10ge_jpool_entry *)arg;
510 struct myri10ge_jpool_stuff *jpool;
511 volatile uintptr_t *putp;
512 uintptr_t old, new;
513
514 jpool = &j->ss->jpool;
515
516 /* prepend buffer locklessly to per-CPU freelist */
517 putp = (void *)&jpool->cpu[CPU->cpu_seqid & MYRI10GE_MAX_CPU_MASK].head;
518 new = (uintptr_t)j;
519 do {
520 old = *putp;
521 j->next = (void *)old;
522 } while (atomic_cas_ulong(putp, old, new) != old);
523 }
524
525 static void
myri10ge_remove_jbuf(struct myri10ge_jpool_entry * j)526 myri10ge_remove_jbuf(struct myri10ge_jpool_entry *j)
527 {
528 (void) ddi_dma_unbind_handle(j->dma_handle);
529 ddi_dma_mem_free(&j->acc_handle);
530 ddi_dma_free_handle(&j->dma_handle);
531 kmem_free(j, sizeof (*j));
532 }
533
534
535 /*
536 * Allocates one physically contiguous descriptor
537 * and add it to the jumbo buffer pool.
538 */
539
540 static int
myri10ge_add_jbuf(struct myri10ge_slice_state * ss)541 myri10ge_add_jbuf(struct myri10ge_slice_state *ss)
542 {
543 struct myri10ge_jpool_entry *j;
544 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
545 ddi_dma_attr_t *rx_dma_attr;
546 size_t real_length;
547 ddi_dma_cookie_t cookie;
548 uint_t count;
549 int err;
550
551 if (myri10ge_mtu < 2048)
552 rx_dma_attr = &myri10ge_rx_std_dma_attr;
553 else
554 rx_dma_attr = &myri10ge_rx_jumbo_dma_attr;
555
556 again:
557 j = (struct myri10ge_jpool_entry *)
558 kmem_alloc(sizeof (*j), KM_SLEEP);
559 err = ddi_dma_alloc_handle(ss->mgp->dip, rx_dma_attr,
560 DDI_DMA_DONTWAIT, NULL, &j->dma_handle);
561 if (err != DDI_SUCCESS)
562 goto abort_with_j;
563
564 err = ddi_dma_mem_alloc(j->dma_handle, myri10ge_mtu,
565 &myri10ge_dev_access_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
566 NULL, &j->buf, &real_length, &j->acc_handle);
567 if (err != DDI_SUCCESS)
568 goto abort_with_handle;
569
570 err = ddi_dma_addr_bind_handle(j->dma_handle, NULL, j->buf,
571 real_length, DDI_DMA_READ|DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
572 NULL, &cookie, &count);
573 if (err != DDI_SUCCESS)
574 goto abort_with_mem;
575
576 /*
577 * Make certain std MTU buffers do not cross a 4KB boundary:
578 *
579 * Setting dma_attr_align=4096 will do this, but the system
580 * will only allocate 1 RX buffer per 4KB page, rather than 2.
581 * Setting dma_attr_granular=4096 *seems* to work around this,
582 * but I'm paranoid about future systems no longer honoring
583 * this, so fall back to the safe, but memory wasting way if a
584 * buffer crosses a 4KB boundary.
585 */
586
587 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
588 rx_dma_attr->dma_attr_align != 4096) {
589 uint32_t start, end;
590
591 start = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
592 end = start + myri10ge_mtu;
593 if (((end >> 12) != (start >> 12)) && (start & 4095U)) {
594 printf("std buffer crossed a 4KB boundary!\n");
595 myri10ge_remove_jbuf(j);
596 rx_dma_attr->dma_attr_align = 4096;
597 rx_dma_attr->dma_attr_seg = UINT64_MAX;
598 goto again;
599 }
600 }
601
602 j->dma.low =
603 htonl(MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress));
604 j->dma.high =
605 htonl(MYRI10GE_HIGHPART_TO_U32(cookie.dmac_laddress));
606 j->ss = ss;
607
608
609 j->free_func.free_func = myri10ge_jfree_rtn;
610 j->free_func.free_arg = (char *)j;
611 mutex_enter(&jpool->mtx);
612 j->next = jpool->head;
613 jpool->head = j;
614 jpool->num_alloc++;
615 mutex_exit(&jpool->mtx);
616 return (0);
617
618 abort_with_mem:
619 ddi_dma_mem_free(&j->acc_handle);
620
621 abort_with_handle:
622 ddi_dma_free_handle(&j->dma_handle);
623
624 abort_with_j:
625 kmem_free(j, sizeof (*j));
626
627 /*
628 * If an allocation failed, perhaps it failed because it could
629 * not satisfy granularity requirement. Disable that, and
630 * try agin.
631 */
632 if (rx_dma_attr == &myri10ge_rx_std_dma_attr &&
633 rx_dma_attr->dma_attr_align != 4096) {
634 cmn_err(CE_NOTE,
635 "!alloc failed, reverting to gran=1\n");
636 rx_dma_attr->dma_attr_align = 4096;
637 rx_dma_attr->dma_attr_seg = UINT64_MAX;
638 goto again;
639 }
640 return (err);
641 }
642
643 static int
myri10ge_jfree_cnt(struct myri10ge_jpool_stuff * jpool)644 myri10ge_jfree_cnt(struct myri10ge_jpool_stuff *jpool)
645 {
646 int i;
647 struct myri10ge_jpool_entry *j;
648
649 mutex_enter(&jpool->mtx);
650 j = jpool->head;
651 i = 0;
652 while (j != NULL) {
653 i++;
654 j = j->next;
655 }
656 mutex_exit(&jpool->mtx);
657 return (i);
658 }
659
660 static int
myri10ge_add_jbufs(struct myri10ge_slice_state * ss,int num,int total)661 myri10ge_add_jbufs(struct myri10ge_slice_state *ss, int num, int total)
662 {
663 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
664 int allocated = 0;
665 int err;
666 int needed;
667
668 /*
669 * if total is set, user wants "num" jbufs in the pool,
670 * otherwise the user wants to "num" additional jbufs
671 * added to the pool
672 */
673 if (total && jpool->num_alloc) {
674 allocated = myri10ge_jfree_cnt(jpool);
675 needed = num - allocated;
676 } else {
677 needed = num;
678 }
679
680 while (needed > 0) {
681 needed--;
682 err = myri10ge_add_jbuf(ss);
683 if (err == 0) {
684 allocated++;
685 }
686 }
687 return (allocated);
688 }
689
690 static void
myri10ge_remove_jbufs(struct myri10ge_slice_state * ss)691 myri10ge_remove_jbufs(struct myri10ge_slice_state *ss)
692 {
693 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
694 struct myri10ge_jpool_entry *j;
695
696 mutex_enter(&jpool->mtx);
697 myri10ge_pull_jpool(ss);
698 while (jpool->head != NULL) {
699 jpool->num_alloc--;
700 j = jpool->head;
701 jpool->head = j->next;
702 myri10ge_remove_jbuf(j);
703 }
704 mutex_exit(&jpool->mtx);
705 }
706
707 static void
myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state * ss)708 myri10ge_carve_up_jbufs_into_small_ring(struct myri10ge_slice_state *ss)
709 {
710 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
711 struct myri10ge_jpool_entry *j = NULL;
712 caddr_t ptr;
713 uint32_t dma_low, dma_high;
714 int idx, len;
715 unsigned int alloc_size;
716
717 dma_low = dma_high = len = 0;
718 alloc_size = myri10ge_small_bytes + MXGEFW_PAD;
719 ptr = NULL;
720 for (idx = 0; idx < ss->rx_small.mask + 1; idx++) {
721 /* Allocate a jumbo frame and carve it into small frames */
722 if (len < alloc_size) {
723 mutex_enter(&jpool->mtx);
724 /* remove jumbo from freelist */
725 j = jpool->head;
726 jpool->head = j->next;
727 /* place it onto small list */
728 j->next = ss->small_jpool;
729 ss->small_jpool = j;
730 mutex_exit(&jpool->mtx);
731 len = myri10ge_mtu;
732 dma_low = ntohl(j->dma.low);
733 dma_high = ntohl(j->dma.high);
734 ptr = j->buf;
735 }
736 ss->rx_small.info[idx].ptr = ptr;
737 ss->rx_small.shadow[idx].addr_low = htonl(dma_low);
738 ss->rx_small.shadow[idx].addr_high = htonl(dma_high);
739 len -= alloc_size;
740 ptr += alloc_size;
741 dma_low += alloc_size;
742 }
743 }
744
745 /*
746 * Return the jumbo bufs we carved up for small to the jumbo pool
747 */
748
749 static void
myri10ge_release_small_jbufs(struct myri10ge_slice_state * ss)750 myri10ge_release_small_jbufs(struct myri10ge_slice_state *ss)
751 {
752 struct myri10ge_jpool_stuff *jpool = &ss->jpool;
753 struct myri10ge_jpool_entry *j = NULL;
754
755 mutex_enter(&jpool->mtx);
756 while (ss->small_jpool != NULL) {
757 j = ss->small_jpool;
758 ss->small_jpool = j->next;
759 j->next = jpool->head;
760 jpool->head = j;
761 }
762 mutex_exit(&jpool->mtx);
763 ss->jbufs_for_smalls = 0;
764 }
765
766 static int
myri10ge_add_tx_handle(struct myri10ge_slice_state * ss)767 myri10ge_add_tx_handle(struct myri10ge_slice_state *ss)
768 {
769 myri10ge_tx_ring_t *tx = &ss->tx;
770 struct myri10ge_priv *mgp = ss->mgp;
771 struct myri10ge_tx_dma_handle *handle;
772 int err;
773
774 handle = kmem_zalloc(sizeof (*handle), KM_SLEEP);
775 err = ddi_dma_alloc_handle(mgp->dip,
776 &myri10ge_tx_dma_attr,
777 DDI_DMA_SLEEP, NULL,
778 &handle->h);
779 if (err) {
780 static int limit = 0;
781 if (limit == 0)
782 cmn_err(CE_WARN, "%s: Falled to alloc tx dma handle\n",
783 mgp->name);
784 limit++;
785 kmem_free(handle, sizeof (*handle));
786 return (err);
787 }
788 mutex_enter(&tx->handle_lock);
789 MYRI10GE_SLICE_STAT_INC(tx_handles_alloced);
790 handle->next = tx->free_tx_handles;
791 tx->free_tx_handles = handle;
792 mutex_exit(&tx->handle_lock);
793 return (DDI_SUCCESS);
794 }
795
796 static void
myri10ge_remove_tx_handles(struct myri10ge_slice_state * ss)797 myri10ge_remove_tx_handles(struct myri10ge_slice_state *ss)
798 {
799 myri10ge_tx_ring_t *tx = &ss->tx;
800 struct myri10ge_tx_dma_handle *handle;
801 mutex_enter(&tx->handle_lock);
802
803 handle = tx->free_tx_handles;
804 while (handle != NULL) {
805 tx->free_tx_handles = handle->next;
806 ddi_dma_free_handle(&handle->h);
807 kmem_free(handle, sizeof (*handle));
808 handle = tx->free_tx_handles;
809 MYRI10GE_SLICE_STAT_DEC(tx_handles_alloced);
810 }
811 mutex_exit(&tx->handle_lock);
812 if (MYRI10GE_SLICE_STAT(tx_handles_alloced) != 0) {
813 cmn_err(CE_WARN, "%s: %d tx dma handles allocated at close\n",
814 ss->mgp->name,
815 (int)MYRI10GE_SLICE_STAT(tx_handles_alloced));
816 }
817 }
818
819 static void
myri10ge_free_tx_handles(myri10ge_tx_ring_t * tx,struct myri10ge_tx_dma_handle_head * list)820 myri10ge_free_tx_handles(myri10ge_tx_ring_t *tx,
821 struct myri10ge_tx_dma_handle_head *list)
822 {
823 mutex_enter(&tx->handle_lock);
824 list->tail->next = tx->free_tx_handles;
825 tx->free_tx_handles = list->head;
826 mutex_exit(&tx->handle_lock);
827 }
828
829 static void
myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t * tx,struct myri10ge_tx_dma_handle * handle)830 myri10ge_free_tx_handle_slist(myri10ge_tx_ring_t *tx,
831 struct myri10ge_tx_dma_handle *handle)
832 {
833 struct myri10ge_tx_dma_handle_head list;
834
835 if (handle == NULL)
836 return;
837 list.head = handle;
838 list.tail = handle;
839 while (handle != NULL) {
840 list.tail = handle;
841 handle = handle->next;
842 }
843 myri10ge_free_tx_handles(tx, &list);
844 }
845
846 static int
myri10ge_alloc_tx_handles(struct myri10ge_slice_state * ss,int count,struct myri10ge_tx_dma_handle ** ret)847 myri10ge_alloc_tx_handles(struct myri10ge_slice_state *ss, int count,
848 struct myri10ge_tx_dma_handle **ret)
849 {
850 myri10ge_tx_ring_t *tx = &ss->tx;
851 struct myri10ge_tx_dma_handle *handle;
852 int err, i;
853
854 mutex_enter(&tx->handle_lock);
855 for (i = 0; i < count; i++) {
856 handle = tx->free_tx_handles;
857 while (handle == NULL) {
858 mutex_exit(&tx->handle_lock);
859 err = myri10ge_add_tx_handle(ss);
860 if (err != DDI_SUCCESS) {
861 goto abort_with_handles;
862 }
863 mutex_enter(&tx->handle_lock);
864 handle = tx->free_tx_handles;
865 }
866 tx->free_tx_handles = handle->next;
867 handle->next = *ret;
868 *ret = handle;
869 }
870 mutex_exit(&tx->handle_lock);
871 return (DDI_SUCCESS);
872
873 abort_with_handles:
874 myri10ge_free_tx_handle_slist(tx, *ret);
875 return (err);
876 }
877
878
879 /*
880 * Frees DMA resources associated with the send ring
881 */
882 static void
myri10ge_unprepare_tx_ring(struct myri10ge_slice_state * ss)883 myri10ge_unprepare_tx_ring(struct myri10ge_slice_state *ss)
884 {
885 myri10ge_tx_ring_t *tx;
886 struct myri10ge_tx_dma_handle_head handles;
887 size_t bytes;
888 int idx;
889
890 tx = &ss->tx;
891 handles.head = NULL;
892 handles.tail = NULL;
893 for (idx = 0; idx < ss->tx.mask + 1; idx++) {
894 if (tx->info[idx].m) {
895 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
896 handles.head = tx->info[idx].handle;
897 if (handles.tail == NULL)
898 handles.tail = tx->info[idx].handle;
899 freeb(tx->info[idx].m);
900 tx->info[idx].m = 0;
901 tx->info[idx].handle = 0;
902 }
903 tx->cp[idx].va = NULL;
904 myri10ge_dma_free(&tx->cp[idx].dma);
905 }
906 bytes = sizeof (*tx->cp) * (tx->mask + 1);
907 kmem_free(tx->cp, bytes);
908 tx->cp = NULL;
909 if (handles.head != NULL)
910 myri10ge_free_tx_handles(tx, &handles);
911 myri10ge_remove_tx_handles(ss);
912 }
913
914 /*
915 * Allocates DMA handles associated with the send ring
916 */
917 static inline int
myri10ge_prepare_tx_ring(struct myri10ge_slice_state * ss)918 myri10ge_prepare_tx_ring(struct myri10ge_slice_state *ss)
919 {
920 struct myri10ge_tx_dma_handle *handles;
921 int h;
922 size_t bytes;
923
924 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
925 ss->tx.cp = kmem_zalloc(bytes, KM_SLEEP);
926 if (ss->tx.cp == NULL) {
927 cmn_err(CE_WARN,
928 "%s: Failed to allocate tx copyblock storage\n",
929 ss->mgp->name);
930 return (DDI_FAILURE);
931 }
932
933
934 /* allocate the TX copyblocks */
935 for (h = 0; h < ss->tx.mask + 1; h++) {
936 ss->tx.cp[h].va = myri10ge_dma_alloc(ss->mgp->dip,
937 4096, &myri10ge_rx_jumbo_dma_attr,
938 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
939 DDI_DMA_WRITE|DDI_DMA_STREAMING, &ss->tx.cp[h].dma, 1,
940 DDI_DMA_DONTWAIT);
941 if (ss->tx.cp[h].va == NULL) {
942 cmn_err(CE_WARN, "%s: Failed to allocate tx "
943 "copyblock %d\n", ss->mgp->name, h);
944 goto abort_with_copyblocks;
945 }
946 }
947 /* pre-allocate transmit handles */
948 handles = NULL;
949 (void) myri10ge_alloc_tx_handles(ss, myri10ge_tx_handles_initial,
950 &handles);
951 if (handles != NULL)
952 myri10ge_free_tx_handle_slist(&ss->tx, handles);
953
954 return (DDI_SUCCESS);
955
956 abort_with_copyblocks:
957 while (h > 0) {
958 h--;
959 myri10ge_dma_free(&ss->tx.cp[h].dma);
960 }
961
962 bytes = sizeof (*ss->tx.cp) * (ss->tx.mask + 1);
963 kmem_free(ss->tx.cp, bytes);
964 ss->tx.cp = NULL;
965 return (DDI_FAILURE);
966 }
967
968 /*
969 * The eeprom strings on the lanaiX have the format
970 * SN=x\0
971 * MAC=x:x:x:x:x:x\0
972 * PT:ddd mmm xx xx:xx:xx xx\0
973 * PV:ddd mmm xx xx:xx:xx xx\0
974 */
975 static int
myri10ge_read_mac_addr(struct myri10ge_priv * mgp)976 myri10ge_read_mac_addr(struct myri10ge_priv *mgp)
977 {
978 #define MYRI10GE_NEXT_STRING(p) while (ptr < limit && *ptr++)
979 #define myri10ge_digit(c) (((c) >= '0' && (c) <= '9') ? ((c) - '0') : \
980 (((c) >= 'A' && (c) <= 'F') ? (10 + (c) - 'A') : \
981 (((c) >= 'a' && (c) <= 'f') ? (10 + (c) - 'a') : -1)))
982
983 char *ptr, *limit;
984 int i, hv, lv;
985
986 ptr = mgp->eeprom_strings;
987 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
988
989 while (*ptr != '\0' && ptr < limit) {
990 if (memcmp(ptr, "MAC=", 4) == 0) {
991 ptr += 4;
992 if (myri10ge_verbose)
993 printf("%s: mac address = %s\n", mgp->name,
994 ptr);
995 mgp->mac_addr_string = ptr;
996 for (i = 0; i < 6; i++) {
997 if ((ptr + 2) > limit)
998 goto abort;
999
1000 if (*(ptr+1) == ':') {
1001 hv = 0;
1002 lv = myri10ge_digit(*ptr); ptr++;
1003 } else {
1004 hv = myri10ge_digit(*ptr); ptr++;
1005 lv = myri10ge_digit(*ptr); ptr++;
1006 }
1007 mgp->mac_addr[i] = (hv << 4) | lv;
1008 ptr++;
1009 }
1010 }
1011 if (memcmp((const void *)ptr, "SN=", 3) == 0) {
1012 ptr += 3;
1013 mgp->sn_str = (char *)ptr;
1014 }
1015 if (memcmp((const void *)ptr, "PC=", 3) == 0) {
1016 ptr += 3;
1017 mgp->pc_str = (char *)ptr;
1018 }
1019 MYRI10GE_NEXT_STRING(ptr);
1020 }
1021
1022 return (0);
1023
1024 abort:
1025 cmn_err(CE_WARN, "%s: failed to parse eeprom_strings", mgp->name);
1026 return (ENXIO);
1027 }
1028
1029
1030 /*
1031 * Determine the register set containing the PCI resource we
1032 * want to map: the memory-mappable part of the interface. We do
1033 * this by scanning the DDI "reg" property of the interface,
1034 * which is an array of mx_ddi_reg_set structures.
1035 */
1036 static int
myri10ge_reg_set(dev_info_t * dip,int * reg_set,int * span,unsigned long * busno,unsigned long * devno,unsigned long * funcno)1037 myri10ge_reg_set(dev_info_t *dip, int *reg_set, int *span,
1038 unsigned long *busno, unsigned long *devno,
1039 unsigned long *funcno)
1040 {
1041
1042 #define REGISTER_NUMBER(ip) (ip[0] >> 0 & 0xff)
1043 #define FUNCTION_NUMBER(ip) (ip[0] >> 8 & 0x07)
1044 #define DEVICE_NUMBER(ip) (ip[0] >> 11 & 0x1f)
1045 #define BUS_NUMBER(ip) (ip[0] >> 16 & 0xff)
1046 #define ADDRESS_SPACE(ip) (ip[0] >> 24 & 0x03)
1047 #define PCI_ADDR_HIGH(ip) (ip[1])
1048 #define PCI_ADDR_LOW(ip) (ip[2])
1049 #define PCI_SPAN_HIGH(ip) (ip[3])
1050 #define PCI_SPAN_LOW(ip) (ip[4])
1051
1052 #define MX_DDI_REG_SET_32_BIT_MEMORY_SPACE 2
1053 #define MX_DDI_REG_SET_64_BIT_MEMORY_SPACE 3
1054
1055 int *data, i, *rs;
1056 uint32_t nelementsp;
1057
1058 #ifdef MYRI10GE_REGSET_VERBOSE
1059 char *address_space_name[] = { "Configuration Space",
1060 "I/O Space",
1061 "32-bit Memory Space",
1062 "64-bit Memory Space"
1063 };
1064 #endif
1065
1066 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1067 "reg", &data, &nelementsp) != DDI_SUCCESS) {
1068 printf("Could not determine register set.\n");
1069 return (ENXIO);
1070 }
1071
1072 #ifdef MYRI10GE_REGSET_VERBOSE
1073 printf("There are %d register sets.\n", nelementsp / 5);
1074 #endif
1075 if (!nelementsp) {
1076 printf("Didn't find any \"reg\" properties.\n");
1077 ddi_prop_free(data);
1078 return (ENODEV);
1079 }
1080
1081 /* Scan for the register number. */
1082 rs = &data[0];
1083 *busno = BUS_NUMBER(rs);
1084 *devno = DEVICE_NUMBER(rs);
1085 *funcno = FUNCTION_NUMBER(rs);
1086
1087 #ifdef MYRI10GE_REGSET_VERBOSE
1088 printf("*** Scanning for register number.\n");
1089 #endif
1090 for (i = 0; i < nelementsp / 5; i++) {
1091 rs = &data[5 * i];
1092 #ifdef MYRI10GE_REGSET_VERBOSE
1093 printf("Examining register set %d:\n", i);
1094 printf(" Register number = %d.\n", REGISTER_NUMBER(rs));
1095 printf(" Function number = %d.\n", FUNCTION_NUMBER(rs));
1096 printf(" Device number = %d.\n", DEVICE_NUMBER(rs));
1097 printf(" Bus number = %d.\n", BUS_NUMBER(rs));
1098 printf(" Address space = %d (%s ).\n", ADDRESS_SPACE(rs),
1099 address_space_name[ADDRESS_SPACE(rs)]);
1100 printf(" pci address 0x%08x %08x\n", PCI_ADDR_HIGH(rs),
1101 PCI_ADDR_LOW(rs));
1102 printf(" pci span 0x%08x %08x\n", PCI_SPAN_HIGH(rs),
1103 PCI_SPAN_LOW(rs));
1104 #endif
1105 /* We are looking for a memory property. */
1106
1107 if (ADDRESS_SPACE(rs) == MX_DDI_REG_SET_64_BIT_MEMORY_SPACE ||
1108 ADDRESS_SPACE(rs) == MX_DDI_REG_SET_32_BIT_MEMORY_SPACE) {
1109 *reg_set = i;
1110
1111 #ifdef MYRI10GE_REGSET_VERBOSE
1112 printf("%s uses register set %d.\n",
1113 address_space_name[ADDRESS_SPACE(rs)], *reg_set);
1114 #endif
1115
1116 *span = (PCI_SPAN_LOW(rs));
1117 #ifdef MYRI10GE_REGSET_VERBOSE
1118 printf("Board span is 0x%x\n", *span);
1119 #endif
1120 break;
1121 }
1122 }
1123
1124 ddi_prop_free(data);
1125
1126 /* If no match, fail. */
1127 if (i >= nelementsp / 5) {
1128 return (EIO);
1129 }
1130
1131 return (0);
1132 }
1133
1134
1135 static int
myri10ge_load_firmware_from_zlib(struct myri10ge_priv * mgp,uint32_t * limit)1136 myri10ge_load_firmware_from_zlib(struct myri10ge_priv *mgp, uint32_t *limit)
1137 {
1138 void *inflate_buffer;
1139 int rv, status;
1140 size_t sram_size = mgp->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE;
1141 size_t destlen;
1142 mcp_gen_header_t *hdr;
1143 unsigned hdr_offset, i;
1144
1145
1146 *limit = 0; /* -Wuninitialized */
1147 status = 0;
1148
1149 inflate_buffer = kmem_zalloc(sram_size, KM_NOSLEEP);
1150 if (!inflate_buffer) {
1151 cmn_err(CE_WARN,
1152 "%s: Could not allocate buffer to inflate mcp\n",
1153 mgp->name);
1154 return (ENOMEM);
1155 }
1156
1157 destlen = sram_size;
1158 rv = z_uncompress(inflate_buffer, &destlen, mgp->eth_z8e,
1159 mgp->eth_z8e_length);
1160
1161 if (rv != Z_OK) {
1162 cmn_err(CE_WARN, "%s: Could not inflate mcp: %s\n",
1163 mgp->name, z_strerror(rv));
1164 status = ENXIO;
1165 goto abort;
1166 }
1167
1168 *limit = (uint32_t)destlen;
1169
1170 hdr_offset = htonl(*(uint32_t *)(void *)((char *)inflate_buffer +
1171 MCP_HEADER_PTR_OFFSET));
1172 hdr = (void *)((char *)inflate_buffer + hdr_offset);
1173 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) {
1174 cmn_err(CE_WARN, "%s: Bad firmware type: 0x%x\n", mgp->name,
1175 ntohl(hdr->mcp_type));
1176 status = EIO;
1177 goto abort;
1178 }
1179
1180 /* save firmware version for kstat */
1181 (void) strncpy(mgp->fw_version, hdr->version, sizeof (mgp->fw_version));
1182 if (myri10ge_verbose)
1183 printf("%s: firmware id: %s\n", mgp->name, hdr->version);
1184
1185 /* Copy the inflated firmware to NIC SRAM. */
1186 for (i = 0; i < *limit; i += 256) {
1187 myri10ge_pio_copy((char *)mgp->sram + MYRI10GE_FW_OFFSET + i,
1188 (char *)inflate_buffer + i,
1189 min(256U, (unsigned)(*limit - i)));
1190 mb();
1191 (void) *(int *)(void *)mgp->sram;
1192 mb();
1193 }
1194
1195 abort:
1196 kmem_free(inflate_buffer, sram_size);
1197
1198 return (status);
1199
1200 }
1201
1202
1203 int
myri10ge_send_cmd(struct myri10ge_priv * mgp,uint32_t cmd,myri10ge_cmd_t * data)1204 myri10ge_send_cmd(struct myri10ge_priv *mgp, uint32_t cmd,
1205 myri10ge_cmd_t *data)
1206 {
1207 mcp_cmd_t *buf;
1208 char buf_bytes[sizeof (*buf) + 8];
1209 volatile mcp_cmd_response_t *response = mgp->cmd;
1210 volatile char *cmd_addr =
1211 (volatile char *)mgp->sram + MXGEFW_ETH_CMD;
1212 int sleep_total = 0;
1213
1214 /* ensure buf is aligned to 8 bytes */
1215 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1216
1217 buf->data0 = htonl(data->data0);
1218 buf->data1 = htonl(data->data1);
1219 buf->data2 = htonl(data->data2);
1220 buf->cmd = htonl(cmd);
1221 buf->response_addr.low = mgp->cmd_dma.low;
1222 buf->response_addr.high = mgp->cmd_dma.high;
1223 mutex_enter(&mgp->cmd_lock);
1224 response->result = 0xffffffff;
1225 mb();
1226
1227 myri10ge_pio_copy((void *)cmd_addr, buf, sizeof (*buf));
1228
1229 /* wait up to 20ms */
1230 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
1231 mb();
1232 if (response->result != 0xffffffff) {
1233 if (response->result == 0) {
1234 data->data0 = ntohl(response->data);
1235 mutex_exit(&mgp->cmd_lock);
1236 return (0);
1237 } else if (ntohl(response->result)
1238 == MXGEFW_CMD_UNKNOWN) {
1239 mutex_exit(&mgp->cmd_lock);
1240 return (ENOSYS);
1241 } else if (ntohl(response->result)
1242 == MXGEFW_CMD_ERROR_UNALIGNED) {
1243 mutex_exit(&mgp->cmd_lock);
1244 return (E2BIG);
1245 } else {
1246 cmn_err(CE_WARN,
1247 "%s: command %d failed, result = %d\n",
1248 mgp->name, cmd, ntohl(response->result));
1249 mutex_exit(&mgp->cmd_lock);
1250 return (ENXIO);
1251 }
1252 }
1253 drv_usecwait(1000);
1254 }
1255 mutex_exit(&mgp->cmd_lock);
1256 cmn_err(CE_WARN, "%s: command %d timed out, result = %d\n",
1257 mgp->name, cmd, ntohl(response->result));
1258 return (EAGAIN);
1259 }
1260
1261 /*
1262 * Enable or disable periodic RDMAs from the host to make certain
1263 * chipsets resend dropped PCIe messages
1264 */
1265
1266 static void
myri10ge_dummy_rdma(struct myri10ge_priv * mgp,int enable)1267 myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable)
1268 {
1269 char buf_bytes[72];
1270 volatile uint32_t *confirm;
1271 volatile char *submit;
1272 uint32_t *buf;
1273 int i;
1274
1275 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1276
1277 /* clear confirmation addr */
1278 confirm = (volatile uint32_t *)mgp->cmd;
1279 *confirm = 0;
1280 mb();
1281
1282 /*
1283 * send an rdma command to the PCIe engine, and wait for the
1284 * response in the confirmation address. The firmware should
1285 * write a -1 there to indicate it is alive and well
1286 */
1287
1288 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1289 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1290 buf[2] = htonl(0xffffffff); /* confirm data */
1291 buf[3] = htonl(mgp->cmd_dma.high); /* dummy addr MSW */
1292 buf[4] = htonl(mgp->cmd_dma.low); /* dummy addr LSW */
1293 buf[5] = htonl(enable); /* enable? */
1294
1295
1296 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_DUMMY_RDMA);
1297
1298 myri10ge_pio_copy((char *)submit, buf, 64);
1299 mb();
1300 drv_usecwait(1000);
1301 mb();
1302 i = 0;
1303 while (*confirm != 0xffffffff && i < 20) {
1304 drv_usecwait(1000);
1305 i++;
1306 }
1307 if (*confirm != 0xffffffff) {
1308 cmn_err(CE_WARN, "%s: dummy rdma %s failed (%p = 0x%x)",
1309 mgp->name,
1310 (enable ? "enable" : "disable"), (void*) confirm, *confirm);
1311 }
1312 }
1313
1314 static int
myri10ge_load_firmware(struct myri10ge_priv * mgp)1315 myri10ge_load_firmware(struct myri10ge_priv *mgp)
1316 {
1317 myri10ge_cmd_t cmd;
1318 volatile uint32_t *confirm;
1319 volatile char *submit;
1320 char buf_bytes[72];
1321 uint32_t *buf, size;
1322 int status, i;
1323
1324 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
1325
1326 status = myri10ge_load_firmware_from_zlib(mgp, &size);
1327 if (status) {
1328 cmn_err(CE_WARN, "%s: firmware loading failed\n", mgp->name);
1329 return (status);
1330 }
1331
1332 /* clear confirmation addr */
1333 confirm = (volatile uint32_t *)mgp->cmd;
1334 *confirm = 0;
1335 mb();
1336
1337 /*
1338 * send a reload command to the bootstrap MCP, and wait for the
1339 * response in the confirmation address. The firmware should
1340 * write a -1 there to indicate it is alive and well
1341 */
1342
1343 buf[0] = mgp->cmd_dma.high; /* confirm addr MSW */
1344 buf[1] = mgp->cmd_dma.low; /* confirm addr LSW */
1345 buf[2] = htonl(0xffffffff); /* confirm data */
1346
1347 /*
1348 * FIX: All newest firmware should un-protect the bottom of
1349 * the sram before handoff. However, the very first interfaces
1350 * do not. Therefore the handoff copy must skip the first 8 bytes
1351 */
1352 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */
1353 buf[4] = htonl(size - 8); /* length of code */
1354 buf[5] = htonl(8); /* where to copy to */
1355 buf[6] = htonl(0); /* where to jump to */
1356
1357 submit = (volatile char *)(mgp->sram + MXGEFW_BOOT_HANDOFF);
1358
1359 myri10ge_pio_copy((char *)submit, buf, 64);
1360 mb();
1361 drv_usecwait(1000);
1362 mb();
1363 i = 0;
1364 while (*confirm != 0xffffffff && i < 1000) {
1365 drv_usecwait(1000);
1366 i++;
1367 }
1368 if (*confirm != 0xffffffff) {
1369 cmn_err(CE_WARN, "%s: handoff failed (%p = 0x%x)",
1370 mgp->name, (void *) confirm, *confirm);
1371
1372 return (ENXIO);
1373 }
1374 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1375 if (status != 0) {
1376 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_GET_RX_RING_SIZE\n",
1377 mgp->name);
1378 return (ENXIO);
1379 }
1380
1381 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
1382 myri10ge_dummy_rdma(mgp, 1);
1383 return (0);
1384 }
1385
1386 static int
myri10ge_m_unicst(void * arg,const uint8_t * addr)1387 myri10ge_m_unicst(void *arg, const uint8_t *addr)
1388 {
1389 struct myri10ge_priv *mgp = arg;
1390 myri10ge_cmd_t cmd;
1391 int status;
1392
1393 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1394 | (addr[2] << 8) | addr[3]);
1395
1396 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1397
1398 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd);
1399 if (status == 0 && (addr != mgp->mac_addr))
1400 (void) memcpy(mgp->mac_addr, addr, sizeof (mgp->mac_addr));
1401
1402 return (status);
1403 }
1404
1405 static int
myri10ge_change_pause(struct myri10ge_priv * mgp,int pause)1406 myri10ge_change_pause(struct myri10ge_priv *mgp, int pause)
1407 {
1408 myri10ge_cmd_t cmd;
1409 int status;
1410
1411 if (pause)
1412 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_FLOW_CONTROL,
1413 &cmd);
1414 else
1415 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_FLOW_CONTROL,
1416 &cmd);
1417
1418 if (status) {
1419 cmn_err(CE_WARN, "%s: Failed to set flow control mode\n",
1420 mgp->name);
1421 return (ENXIO);
1422 }
1423 mgp->pause = pause;
1424 return (0);
1425 }
1426
1427 static void
myri10ge_change_promisc(struct myri10ge_priv * mgp,int promisc)1428 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc)
1429 {
1430 myri10ge_cmd_t cmd;
1431 int status;
1432
1433 if (promisc)
1434 status = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_PROMISC, &cmd);
1435 else
1436 status = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_PROMISC, &cmd);
1437
1438 if (status) {
1439 cmn_err(CE_WARN, "%s: Failed to set promisc mode\n",
1440 mgp->name);
1441 }
1442 }
1443
1444 static int
myri10ge_dma_test(struct myri10ge_priv * mgp,int test_type)1445 myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
1446 {
1447 myri10ge_cmd_t cmd;
1448 int status;
1449 uint32_t len;
1450 void *dmabench;
1451 struct myri10ge_dma_stuff dmabench_dma;
1452 char *test = " ";
1453
1454 /*
1455 * Run a small DMA test.
1456 * The magic multipliers to the length tell the firmware
1457 * tp do DMA read, write, or read+write tests. The
1458 * results are returned in cmd.data0. The upper 16
1459 * bits or the return is the number of transfers completed.
1460 * The lower 16 bits is the time in 0.5us ticks that the
1461 * transfers took to complete
1462 */
1463
1464 len = mgp->tx_boundary;
1465
1466 dmabench = myri10ge_dma_alloc(mgp->dip, len,
1467 &myri10ge_rx_jumbo_dma_attr, &myri10ge_dev_access_attr,
1468 DDI_DMA_STREAMING, DDI_DMA_RDWR|DDI_DMA_STREAMING,
1469 &dmabench_dma, 1, DDI_DMA_DONTWAIT);
1470 mgp->read_dma = mgp->write_dma = mgp->read_write_dma = 0;
1471 if (dmabench == NULL) {
1472 cmn_err(CE_WARN, "%s dma benchmark aborted\n", mgp->name);
1473 return (ENOMEM);
1474 }
1475
1476 cmd.data0 = ntohl(dmabench_dma.low);
1477 cmd.data1 = ntohl(dmabench_dma.high);
1478 cmd.data2 = len * 0x10000;
1479 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1480 if (status != 0) {
1481 test = "read";
1482 goto abort;
1483 }
1484 mgp->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1485
1486 cmd.data0 = ntohl(dmabench_dma.low);
1487 cmd.data1 = ntohl(dmabench_dma.high);
1488 cmd.data2 = len * 0x1;
1489 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1490 if (status != 0) {
1491 test = "write";
1492 goto abort;
1493 }
1494 mgp->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
1495
1496 cmd.data0 = ntohl(dmabench_dma.low);
1497 cmd.data1 = ntohl(dmabench_dma.high);
1498 cmd.data2 = len * 0x10001;
1499 status = myri10ge_send_cmd(mgp, test_type, &cmd);
1500 if (status != 0) {
1501 test = "read/write";
1502 goto abort;
1503 }
1504 mgp->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
1505 (cmd.data0 & 0xffff);
1506
1507
1508 abort:
1509 myri10ge_dma_free(&dmabench_dma);
1510 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
1511 cmn_err(CE_WARN, "%s %s dma benchmark failed\n", mgp->name,
1512 test);
1513 return (status);
1514 }
1515
1516 static int
myri10ge_reset(struct myri10ge_priv * mgp)1517 myri10ge_reset(struct myri10ge_priv *mgp)
1518 {
1519 myri10ge_cmd_t cmd;
1520 struct myri10ge_nic_stat *ethstat;
1521 struct myri10ge_slice_state *ss;
1522 int i, status;
1523 size_t bytes;
1524
1525 /* send a reset command to the card to see if it is alive */
1526 (void) memset(&cmd, 0, sizeof (cmd));
1527 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
1528 if (status != 0) {
1529 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
1530 return (ENXIO);
1531 }
1532
1533 /* Now exchange information about interrupts */
1534
1535 bytes = mgp->max_intr_slots * sizeof (*mgp->ss[0].rx_done.entry);
1536 cmd.data0 = (uint32_t)bytes;
1537 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1538
1539 /*
1540 * Even though we already know how many slices are supported
1541 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES
1542 * has magic side effects, and must be called after a reset.
1543 * It must be called prior to calling any RSS related cmds,
1544 * including assigning an interrupt queue for anything but
1545 * slice 0. It must also be called *after*
1546 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1547 * the firmware to compute offsets.
1548 */
1549
1550 if (mgp->num_slices > 1) {
1551
1552 /* ask the maximum number of slices it supports */
1553 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1554 &cmd);
1555 if (status != 0) {
1556 cmn_err(CE_WARN,
1557 "%s: failed to get number of slices\n",
1558 mgp->name);
1559 return (status);
1560 }
1561
1562 /*
1563 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1564 * to setting up the interrupt queue DMA
1565 */
1566
1567 cmd.data0 = mgp->num_slices;
1568 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE |
1569 MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1570 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1571 &cmd);
1572 if (status != 0) {
1573 cmn_err(CE_WARN,
1574 "%s: failed to set number of slices\n",
1575 mgp->name);
1576 return (status);
1577 }
1578 }
1579 for (i = 0; i < mgp->num_slices; i++) {
1580 ss = &mgp->ss[i];
1581 cmd.data0 = ntohl(ss->rx_done.dma.low);
1582 cmd.data1 = ntohl(ss->rx_done.dma.high);
1583 cmd.data2 = i;
1584 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA,
1585 &cmd);
1586 };
1587
1588 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1589 for (i = 0; i < mgp->num_slices; i++) {
1590 ss = &mgp->ss[i];
1591 ss->irq_claim = (volatile unsigned int *)
1592 (void *)(mgp->sram + cmd.data0 + 8 * i);
1593 }
1594
1595 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
1596 status |= myri10ge_send_cmd(mgp,
1597 MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1598 mgp->irq_deassert = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1599 }
1600
1601 status |= myri10ge_send_cmd(mgp,
1602 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1603 mgp->intr_coal_delay_ptr = (uint32_t *)(void *)(mgp->sram + cmd.data0);
1604
1605 if (status != 0) {
1606 cmn_err(CE_WARN, "%s: failed set interrupt parameters\n",
1607 mgp->name);
1608 return (status);
1609 }
1610
1611 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
1612 (void) myri10ge_dma_test(mgp, MXGEFW_DMA_TEST);
1613
1614 /* reset mcp/driver shared state back to 0 */
1615
1616 for (i = 0; i < mgp->num_slices; i++) {
1617 ss = &mgp->ss[i];
1618 bytes = mgp->max_intr_slots *
1619 sizeof (*mgp->ss[0].rx_done.entry);
1620 (void) memset(ss->rx_done.entry, 0, bytes);
1621 ss->tx.req = 0;
1622 ss->tx.done = 0;
1623 ss->tx.pkt_done = 0;
1624 ss->rx_big.cnt = 0;
1625 ss->rx_small.cnt = 0;
1626 ss->rx_done.idx = 0;
1627 ss->rx_done.cnt = 0;
1628 ss->rx_token = 0;
1629 ss->tx.watchdog_done = 0;
1630 ss->tx.watchdog_req = 0;
1631 ss->tx.active = 0;
1632 ss->tx.activate = 0;
1633 }
1634 mgp->watchdog_rx_pause = 0;
1635 if (mgp->ksp_stat != NULL) {
1636 ethstat = (struct myri10ge_nic_stat *)mgp->ksp_stat->ks_data;
1637 ethstat->link_changes.value.ul = 0;
1638 }
1639 status = myri10ge_m_unicst(mgp, mgp->mac_addr);
1640 myri10ge_change_promisc(mgp, 0);
1641 (void) myri10ge_change_pause(mgp, mgp->pause);
1642 return (status);
1643 }
1644
1645 static int
myri10ge_init_toeplitz(struct myri10ge_priv * mgp)1646 myri10ge_init_toeplitz(struct myri10ge_priv *mgp)
1647 {
1648 myri10ge_cmd_t cmd;
1649 int i, b, s, t, j;
1650 int status;
1651 uint32_t k[8];
1652 uint32_t tmp;
1653 uint8_t *key;
1654
1655 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
1656 &cmd);
1657 if (status != 0) {
1658 cmn_err(CE_WARN, "%s: failed to get rss key\n",
1659 mgp->name);
1660 return (EIO);
1661 }
1662 myri10ge_pio_copy32(mgp->rss_key,
1663 (uint32_t *)(void*)((char *)mgp->sram + cmd.data0),
1664 sizeof (mgp->rss_key));
1665
1666 mgp->toeplitz_hash_table = kmem_alloc(sizeof (uint32_t) * 12 * 256,
1667 KM_SLEEP);
1668 key = (uint8_t *)mgp->rss_key;
1669 t = 0;
1670 for (b = 0; b < 12; b++) {
1671 for (s = 0; s < 8; s++) {
1672 /* Bits: b*8+s, ..., b*8+s+31 */
1673 k[s] = 0;
1674 for (j = 0; j < 32; j++) {
1675 int bit = b*8+s+j;
1676 bit = 0x1 & (key[bit / 8] >> (7 -(bit & 0x7)));
1677 k[s] |= bit << (31 - j);
1678 }
1679 }
1680
1681 for (i = 0; i <= 0xff; i++) {
1682 tmp = 0;
1683 if (i & (1 << 7)) { tmp ^= k[0]; }
1684 if (i & (1 << 6)) { tmp ^= k[1]; }
1685 if (i & (1 << 5)) { tmp ^= k[2]; }
1686 if (i & (1 << 4)) { tmp ^= k[3]; }
1687 if (i & (1 << 3)) { tmp ^= k[4]; }
1688 if (i & (1 << 2)) { tmp ^= k[5]; }
1689 if (i & (1 << 1)) { tmp ^= k[6]; }
1690 if (i & (1 << 0)) { tmp ^= k[7]; }
1691 mgp->toeplitz_hash_table[t++] = tmp;
1692 }
1693 }
1694 return (0);
1695 }
1696
1697 static inline struct myri10ge_slice_state *
myri10ge_toeplitz_send_hash(struct myri10ge_priv * mgp,struct ip * ip)1698 myri10ge_toeplitz_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1699 {
1700 struct tcphdr *hdr;
1701 uint32_t saddr, daddr;
1702 uint32_t hash, slice;
1703 uint32_t *table = mgp->toeplitz_hash_table;
1704 uint16_t src, dst;
1705
1706 /*
1707 * Note hashing order is reversed from how it is done
1708 * in the NIC, so as to generate the same hash value
1709 * for the connection to try to keep connections CPU local
1710 */
1711
1712 /* hash on IPv4 src/dst address */
1713 saddr = ntohl(ip->ip_src.s_addr);
1714 daddr = ntohl(ip->ip_dst.s_addr);
1715 hash = table[(256 * 0) + ((daddr >> 24) & 0xff)];
1716 hash ^= table[(256 * 1) + ((daddr >> 16) & 0xff)];
1717 hash ^= table[(256 * 2) + ((daddr >> 8) & 0xff)];
1718 hash ^= table[(256 * 3) + ((daddr) & 0xff)];
1719 hash ^= table[(256 * 4) + ((saddr >> 24) & 0xff)];
1720 hash ^= table[(256 * 5) + ((saddr >> 16) & 0xff)];
1721 hash ^= table[(256 * 6) + ((saddr >> 8) & 0xff)];
1722 hash ^= table[(256 * 7) + ((saddr) & 0xff)];
1723 /* hash on TCP port, if required */
1724 if ((myri10ge_rss_hash & MXGEFW_RSS_HASH_TYPE_TCP_IPV4) &&
1725 ip->ip_p == IPPROTO_TCP) {
1726 hdr = (struct tcphdr *)(void *)
1727 (((uint8_t *)ip) + (ip->ip_hl << 2));
1728 src = ntohs(hdr->th_sport);
1729 dst = ntohs(hdr->th_dport);
1730
1731 hash ^= table[(256 * 8) + ((dst >> 8) & 0xff)];
1732 hash ^= table[(256 * 9) + ((dst) & 0xff)];
1733 hash ^= table[(256 * 10) + ((src >> 8) & 0xff)];
1734 hash ^= table[(256 * 11) + ((src) & 0xff)];
1735 }
1736 slice = (mgp->num_slices - 1) & hash;
1737 return (&mgp->ss[slice]);
1738
1739 }
1740
1741 static inline struct myri10ge_slice_state *
myri10ge_simple_send_hash(struct myri10ge_priv * mgp,struct ip * ip)1742 myri10ge_simple_send_hash(struct myri10ge_priv *mgp, struct ip *ip)
1743 {
1744 struct tcphdr *hdr;
1745 uint32_t slice, hash_val;
1746
1747
1748 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) {
1749 return (&mgp->ss[0]);
1750 }
1751 hdr = (struct tcphdr *)(void *)(((uint8_t *)ip) + (ip->ip_hl << 2));
1752
1753 /*
1754 * Use the second byte of the *destination* address for
1755 * MXGEFW_RSS_HASH_TYPE_SRC_PORT, so as to match NIC's hashing
1756 */
1757 hash_val = ntohs(hdr->th_dport) & 0xff;
1758 if (myri10ge_rss_hash == MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT)
1759 hash_val += ntohs(hdr->th_sport) & 0xff;
1760
1761 slice = (mgp->num_slices - 1) & hash_val;
1762 return (&mgp->ss[slice]);
1763 }
1764
1765 static inline struct myri10ge_slice_state *
myri10ge_send_hash(struct myri10ge_priv * mgp,mblk_t * mp)1766 myri10ge_send_hash(struct myri10ge_priv *mgp, mblk_t *mp)
1767 {
1768 unsigned int slice = 0;
1769 struct ether_header *eh;
1770 struct ether_vlan_header *vh;
1771 struct ip *ip;
1772 int ehl, ihl;
1773
1774 if (mgp->num_slices == 1)
1775 return (&mgp->ss[0]);
1776
1777 if (myri10ge_tx_hash == 0) {
1778 slice = CPU->cpu_id & (mgp->num_slices - 1);
1779 return (&mgp->ss[slice]);
1780 }
1781
1782 /*
1783 * ensure it is a TCP or UDP over IPv4 packet, and that the
1784 * headers are in the 1st mblk. Otherwise, punt
1785 */
1786 ehl = sizeof (*eh);
1787 ihl = sizeof (*ip);
1788 if ((MBLKL(mp)) < (ehl + ihl + 8))
1789 return (&mgp->ss[0]);
1790 eh = (struct ether_header *)(void *)mp->b_rptr;
1791 ip = (struct ip *)(void *)(eh + 1);
1792 if (eh->ether_type != BE_16(ETHERTYPE_IP)) {
1793 if (eh->ether_type != BE_16(ETHERTYPE_VLAN))
1794 return (&mgp->ss[0]);
1795 vh = (struct ether_vlan_header *)(void *)mp->b_rptr;
1796 if (vh->ether_type != BE_16(ETHERTYPE_IP))
1797 return (&mgp->ss[0]);
1798 ehl += 4;
1799 ip = (struct ip *)(void *)(vh + 1);
1800 }
1801 ihl = ip->ip_hl << 2;
1802 if (MBLKL(mp) < (ehl + ihl + 8))
1803 return (&mgp->ss[0]);
1804 switch (myri10ge_rss_hash) {
1805 case MXGEFW_RSS_HASH_TYPE_IPV4:
1806 /* fallthru */
1807 case MXGEFW_RSS_HASH_TYPE_TCP_IPV4:
1808 /* fallthru */
1809 case (MXGEFW_RSS_HASH_TYPE_IPV4|MXGEFW_RSS_HASH_TYPE_TCP_IPV4):
1810 return (myri10ge_toeplitz_send_hash(mgp, ip));
1811 case MXGEFW_RSS_HASH_TYPE_SRC_PORT:
1812 /* fallthru */
1813 case MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT:
1814 return (myri10ge_simple_send_hash(mgp, ip));
1815 default:
1816 break;
1817 }
1818 return (&mgp->ss[0]);
1819 }
1820
1821 static int
myri10ge_setup_slice(struct myri10ge_slice_state * ss)1822 myri10ge_setup_slice(struct myri10ge_slice_state *ss)
1823 {
1824 struct myri10ge_priv *mgp = ss->mgp;
1825 myri10ge_cmd_t cmd;
1826 int tx_ring_size, rx_ring_size;
1827 int tx_ring_entries, rx_ring_entries;
1828 int slice, status;
1829 int allocated, idx;
1830 size_t bytes;
1831
1832 slice = ss - mgp->ss;
1833 cmd.data0 = slice;
1834 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1835 tx_ring_size = cmd.data0;
1836 cmd.data0 = slice;
1837 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1838 if (status != 0)
1839 return (status);
1840 rx_ring_size = cmd.data0;
1841
1842 tx_ring_entries = tx_ring_size / sizeof (struct mcp_kreq_ether_send);
1843 rx_ring_entries = rx_ring_size / sizeof (struct mcp_dma_addr);
1844 ss->tx.mask = tx_ring_entries - 1;
1845 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
1846
1847 /* get the lanai pointers to the send and receive rings */
1848
1849 cmd.data0 = slice;
1850 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
1851 ss->tx.lanai = (mcp_kreq_ether_send_t *)(void *)(mgp->sram + cmd.data0);
1852 if (mgp->num_slices > 1) {
1853 ss->tx.go = (char *)mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice;
1854 ss->tx.stop = (char *)mgp->sram + MXGEFW_ETH_SEND_STOP +
1855 64 * slice;
1856 } else {
1857 ss->tx.go = NULL;
1858 ss->tx.stop = NULL;
1859 }
1860
1861 cmd.data0 = slice;
1862 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
1863 ss->rx_small.lanai = (mcp_kreq_ether_recv_t *)
1864 (void *)(mgp->sram + cmd.data0);
1865
1866 cmd.data0 = slice;
1867 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
1868 ss->rx_big.lanai = (mcp_kreq_ether_recv_t *)(void *)
1869 (mgp->sram + cmd.data0);
1870
1871 if (status != 0) {
1872 cmn_err(CE_WARN,
1873 "%s: failed to get ring sizes or locations\n", mgp->name);
1874 return (status);
1875 }
1876
1877 status = ENOMEM;
1878 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
1879 ss->rx_small.shadow = kmem_zalloc(bytes, KM_SLEEP);
1880 if (ss->rx_small.shadow == NULL)
1881 goto abort;
1882 (void) memset(ss->rx_small.shadow, 0, bytes);
1883
1884 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
1885 ss->rx_big.shadow = kmem_zalloc(bytes, KM_SLEEP);
1886 if (ss->rx_big.shadow == NULL)
1887 goto abort_with_rx_small_shadow;
1888 (void) memset(ss->rx_big.shadow, 0, bytes);
1889
1890 /* allocate the host info rings */
1891
1892 bytes = tx_ring_entries * sizeof (*ss->tx.info);
1893 ss->tx.info = kmem_zalloc(bytes, KM_SLEEP);
1894 if (ss->tx.info == NULL)
1895 goto abort_with_rx_big_shadow;
1896 (void) memset(ss->tx.info, 0, bytes);
1897
1898 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
1899 ss->rx_small.info = kmem_zalloc(bytes, KM_SLEEP);
1900 if (ss->rx_small.info == NULL)
1901 goto abort_with_tx_info;
1902 (void) memset(ss->rx_small.info, 0, bytes);
1903
1904 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
1905 ss->rx_big.info = kmem_zalloc(bytes, KM_SLEEP);
1906 if (ss->rx_big.info == NULL)
1907 goto abort_with_rx_small_info;
1908 (void) memset(ss->rx_big.info, 0, bytes);
1909
1910 ss->tx.stall = ss->tx.sched = 0;
1911 ss->tx.stall_early = ss->tx.stall_late = 0;
1912
1913 ss->jbufs_for_smalls = 1 + (1 + ss->rx_small.mask) /
1914 (myri10ge_mtu / (myri10ge_small_bytes + MXGEFW_PAD));
1915
1916 allocated = myri10ge_add_jbufs(ss,
1917 myri10ge_bigbufs_initial + ss->jbufs_for_smalls, 1);
1918 if (allocated < ss->jbufs_for_smalls + myri10ge_bigbufs_initial) {
1919 cmn_err(CE_WARN,
1920 "%s: Could not allocate enough receive buffers (%d/%d)\n",
1921 mgp->name, allocated,
1922 myri10ge_bigbufs_initial + ss->jbufs_for_smalls);
1923 goto abort_with_jumbos;
1924 }
1925
1926 myri10ge_carve_up_jbufs_into_small_ring(ss);
1927 ss->j_rx_cnt = 0;
1928
1929 mutex_enter(&ss->jpool.mtx);
1930 if (allocated < rx_ring_entries)
1931 ss->jpool.low_water = allocated / 4;
1932 else
1933 ss->jpool.low_water = rx_ring_entries / 2;
1934
1935 /*
1936 * invalidate the big receive ring in case we do not
1937 * allocate sufficient jumbos to fill it
1938 */
1939 (void) memset(ss->rx_big.shadow, 1,
1940 (ss->rx_big.mask + 1) * sizeof (ss->rx_big.shadow[0]));
1941 for (idx = 7; idx <= ss->rx_big.mask; idx += 8) {
1942 myri10ge_submit_8rx(&ss->rx_big.lanai[idx - 7],
1943 &ss->rx_big.shadow[idx - 7]);
1944 mb();
1945 }
1946
1947
1948 myri10ge_restock_jumbos(ss);
1949
1950 for (idx = 7; idx <= ss->rx_small.mask; idx += 8) {
1951 myri10ge_submit_8rx(&ss->rx_small.lanai[idx - 7],
1952 &ss->rx_small.shadow[idx - 7]);
1953 mb();
1954 }
1955 ss->rx_small.cnt = ss->rx_small.mask + 1;
1956
1957 mutex_exit(&ss->jpool.mtx);
1958
1959 status = myri10ge_prepare_tx_ring(ss);
1960
1961 if (status != 0)
1962 goto abort_with_small_jbufs;
1963
1964 cmd.data0 = ntohl(ss->fw_stats_dma.low);
1965 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1966 cmd.data2 = sizeof (mcp_irq_data_t);
1967 cmd.data2 |= (slice << 16);
1968 bzero(ss->fw_stats, sizeof (*ss->fw_stats));
1969 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
1970 if (status == ENOSYS) {
1971 cmd.data0 = ntohl(ss->fw_stats_dma.low) +
1972 offsetof(mcp_irq_data_t, send_done_count);
1973 cmd.data1 = ntohl(ss->fw_stats_dma.high);
1974 status = myri10ge_send_cmd(mgp,
1975 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, &cmd);
1976 }
1977 if (status) {
1978 cmn_err(CE_WARN, "%s: Couldn't set stats DMA\n", mgp->name);
1979 goto abort_with_tx;
1980 }
1981
1982 return (0);
1983
1984 abort_with_tx:
1985 myri10ge_unprepare_tx_ring(ss);
1986
1987 abort_with_small_jbufs:
1988 myri10ge_release_small_jbufs(ss);
1989
1990 abort_with_jumbos:
1991 if (allocated != 0) {
1992 mutex_enter(&ss->jpool.mtx);
1993 ss->jpool.low_water = 0;
1994 mutex_exit(&ss->jpool.mtx);
1995 myri10ge_unstock_jumbos(ss);
1996 myri10ge_remove_jbufs(ss);
1997 }
1998
1999 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2000 kmem_free(ss->rx_big.info, bytes);
2001
2002 abort_with_rx_small_info:
2003 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2004 kmem_free(ss->rx_small.info, bytes);
2005
2006 abort_with_tx_info:
2007 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2008 kmem_free(ss->tx.info, bytes);
2009
2010 abort_with_rx_big_shadow:
2011 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2012 kmem_free(ss->rx_big.shadow, bytes);
2013
2014 abort_with_rx_small_shadow:
2015 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2016 kmem_free(ss->rx_small.shadow, bytes);
2017 abort:
2018 return (status);
2019
2020 }
2021
2022 static void
myri10ge_teardown_slice(struct myri10ge_slice_state * ss)2023 myri10ge_teardown_slice(struct myri10ge_slice_state *ss)
2024 {
2025 int tx_ring_entries, rx_ring_entries;
2026 size_t bytes;
2027
2028 /* ignore slices that have not been fully setup */
2029 if (ss->tx.cp == NULL)
2030 return;
2031 /* Free the TX copy buffers */
2032 myri10ge_unprepare_tx_ring(ss);
2033
2034 /* stop passing returned buffers to firmware */
2035
2036 mutex_enter(&ss->jpool.mtx);
2037 ss->jpool.low_water = 0;
2038 mutex_exit(&ss->jpool.mtx);
2039 myri10ge_release_small_jbufs(ss);
2040
2041 /* Release the free jumbo frame pool */
2042 myri10ge_unstock_jumbos(ss);
2043 myri10ge_remove_jbufs(ss);
2044
2045 rx_ring_entries = ss->rx_big.mask + 1;
2046 tx_ring_entries = ss->tx.mask + 1;
2047
2048 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2049 kmem_free(ss->rx_big.info, bytes);
2050
2051 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2052 kmem_free(ss->rx_small.info, bytes);
2053
2054 bytes = tx_ring_entries * sizeof (*ss->tx.info);
2055 kmem_free(ss->tx.info, bytes);
2056
2057 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2058 kmem_free(ss->rx_big.shadow, bytes);
2059
2060 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2061 kmem_free(ss->rx_small.shadow, bytes);
2062
2063 }
2064 static int
myri10ge_start_locked(struct myri10ge_priv * mgp)2065 myri10ge_start_locked(struct myri10ge_priv *mgp)
2066 {
2067 myri10ge_cmd_t cmd;
2068 int status, big_pow2, i;
2069 volatile uint8_t *itable;
2070
2071 status = DDI_SUCCESS;
2072 /* Allocate DMA resources and receive buffers */
2073
2074 status = myri10ge_reset(mgp);
2075 if (status != 0) {
2076 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
2077 return (DDI_FAILURE);
2078 }
2079
2080 if (mgp->num_slices > 1) {
2081 cmd.data0 = mgp->num_slices;
2082 cmd.data1 = 1; /* use MSI-X */
2083 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
2084 &cmd);
2085 if (status != 0) {
2086 cmn_err(CE_WARN,
2087 "%s: failed to set number of slices\n",
2088 mgp->name);
2089 goto abort_with_nothing;
2090 }
2091 /* setup the indirection table */
2092 cmd.data0 = mgp->num_slices;
2093 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
2094 &cmd);
2095
2096 status |= myri10ge_send_cmd(mgp,
2097 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
2098 if (status != 0) {
2099 cmn_err(CE_WARN,
2100 "%s: failed to setup rss tables\n", mgp->name);
2101 }
2102
2103 /* just enable an identity mapping */
2104 itable = mgp->sram + cmd.data0;
2105 for (i = 0; i < mgp->num_slices; i++)
2106 itable[i] = (uint8_t)i;
2107
2108 if (myri10ge_rss_hash & MYRI10GE_TOEPLITZ_HASH) {
2109 status = myri10ge_init_toeplitz(mgp);
2110 if (status != 0) {
2111 cmn_err(CE_WARN, "%s: failed to setup "
2112 "toeplitz tx hash table", mgp->name);
2113 goto abort_with_nothing;
2114 }
2115 }
2116 cmd.data0 = 1;
2117 cmd.data1 = myri10ge_rss_hash;
2118 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE,
2119 &cmd);
2120 if (status != 0) {
2121 cmn_err(CE_WARN,
2122 "%s: failed to enable slices\n", mgp->name);
2123 goto abort_with_toeplitz;
2124 }
2125 }
2126
2127 for (i = 0; i < mgp->num_slices; i++) {
2128 status = myri10ge_setup_slice(&mgp->ss[i]);
2129 if (status != 0)
2130 goto abort_with_slices;
2131 }
2132
2133 /*
2134 * Tell the MCP how many buffers he has, and to
2135 * bring the ethernet interface up
2136 *
2137 * Firmware needs the big buff size as a power of 2. Lie and
2138 * tell him the buffer is larger, because we only use 1
2139 * buffer/pkt, and the mtu will prevent overruns
2140 */
2141 big_pow2 = myri10ge_mtu + MXGEFW_PAD;
2142 while ((big_pow2 & (big_pow2 - 1)) != 0)
2143 big_pow2++;
2144
2145 /* now give firmware buffers sizes, and MTU */
2146 cmd.data0 = myri10ge_mtu;
2147 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd);
2148 cmd.data0 = myri10ge_small_bytes;
2149 status |=
2150 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
2151 cmd.data0 = big_pow2;
2152 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2153 if (status) {
2154 cmn_err(CE_WARN, "%s: Couldn't set buffer sizes\n", mgp->name);
2155 goto abort_with_slices;
2156 }
2157
2158
2159 cmd.data0 = 1;
2160 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd);
2161 if (status) {
2162 cmn_err(CE_WARN, "%s: unable to setup TSO (%d)\n",
2163 mgp->name, status);
2164 } else {
2165 mgp->features |= MYRI10GE_TSO;
2166 }
2167
2168 mgp->link_state = -1;
2169 mgp->rdma_tags_available = 15;
2170 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd);
2171 if (status) {
2172 cmn_err(CE_WARN, "%s: unable to start ethernet\n", mgp->name);
2173 goto abort_with_slices;
2174 }
2175 mgp->running = MYRI10GE_ETH_RUNNING;
2176 return (DDI_SUCCESS);
2177
2178 abort_with_slices:
2179 for (i = 0; i < mgp->num_slices; i++)
2180 myri10ge_teardown_slice(&mgp->ss[i]);
2181
2182 mgp->running = MYRI10GE_ETH_STOPPED;
2183
2184 abort_with_toeplitz:
2185 if (mgp->toeplitz_hash_table != NULL) {
2186 kmem_free(mgp->toeplitz_hash_table,
2187 sizeof (uint32_t) * 12 * 256);
2188 mgp->toeplitz_hash_table = NULL;
2189 }
2190
2191 abort_with_nothing:
2192 return (DDI_FAILURE);
2193 }
2194
2195 static void
myri10ge_stop_locked(struct myri10ge_priv * mgp)2196 myri10ge_stop_locked(struct myri10ge_priv *mgp)
2197 {
2198 int status, old_down_cnt;
2199 myri10ge_cmd_t cmd;
2200 int wait_time = 10;
2201 int i, polling;
2202
2203 old_down_cnt = mgp->down_cnt;
2204 mb();
2205 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2206 if (status) {
2207 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
2208 }
2209
2210 while (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2211 delay(1 * drv_usectohz(1000000));
2212 wait_time--;
2213 if (wait_time == 0)
2214 break;
2215 }
2216 again:
2217 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2218 cmn_err(CE_WARN, "%s: didn't get down irq\n", mgp->name);
2219 for (i = 0; i < mgp->num_slices; i++) {
2220 /*
2221 * take and release the rx lock to ensure
2222 * that no interrupt thread is blocked
2223 * elsewhere in the stack, preventing
2224 * completion
2225 */
2226
2227 mutex_enter(&mgp->ss[i].rx_lock);
2228 printf("%s: slice %d rx irq idle\n",
2229 mgp->name, i);
2230 mutex_exit(&mgp->ss[i].rx_lock);
2231
2232 /* verify that the poll handler is inactive */
2233 mutex_enter(&mgp->ss->poll_lock);
2234 polling = mgp->ss->rx_polling;
2235 mutex_exit(&mgp->ss->poll_lock);
2236 if (polling) {
2237 printf("%s: slice %d is polling\n",
2238 mgp->name, i);
2239 delay(1 * drv_usectohz(1000000));
2240 goto again;
2241 }
2242 }
2243 delay(1 * drv_usectohz(1000000));
2244 if (old_down_cnt == *((volatile int *)&mgp->down_cnt)) {
2245 cmn_err(CE_WARN, "%s: Never got down irq\n", mgp->name);
2246 }
2247 }
2248
2249 for (i = 0; i < mgp->num_slices; i++)
2250 myri10ge_teardown_slice(&mgp->ss[i]);
2251
2252 if (mgp->toeplitz_hash_table != NULL) {
2253 kmem_free(mgp->toeplitz_hash_table,
2254 sizeof (uint32_t) * 12 * 256);
2255 mgp->toeplitz_hash_table = NULL;
2256 }
2257 mgp->running = MYRI10GE_ETH_STOPPED;
2258 }
2259
2260 static int
myri10ge_m_start(void * arg)2261 myri10ge_m_start(void *arg)
2262 {
2263 struct myri10ge_priv *mgp = arg;
2264 int status;
2265
2266 mutex_enter(&mgp->intrlock);
2267
2268 if (mgp->running != MYRI10GE_ETH_STOPPED) {
2269 mutex_exit(&mgp->intrlock);
2270 return (DDI_FAILURE);
2271 }
2272 status = myri10ge_start_locked(mgp);
2273 mutex_exit(&mgp->intrlock);
2274
2275 if (status != DDI_SUCCESS)
2276 return (status);
2277
2278 /* start the watchdog timer */
2279 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
2280 mgp->timer_ticks);
2281 return (DDI_SUCCESS);
2282
2283 }
2284
2285 static void
myri10ge_m_stop(void * arg)2286 myri10ge_m_stop(void *arg)
2287 {
2288 struct myri10ge_priv *mgp = arg;
2289
2290 mutex_enter(&mgp->intrlock);
2291 /* if the device not running give up */
2292 if (mgp->running != MYRI10GE_ETH_RUNNING) {
2293 mutex_exit(&mgp->intrlock);
2294 return;
2295 }
2296
2297 mgp->running = MYRI10GE_ETH_STOPPING;
2298 mutex_exit(&mgp->intrlock);
2299 (void) untimeout(mgp->timer_id);
2300 mutex_enter(&mgp->intrlock);
2301 myri10ge_stop_locked(mgp);
2302 mutex_exit(&mgp->intrlock);
2303
2304 }
2305
2306 static inline void
myri10ge_rx_csum(mblk_t * mp,struct myri10ge_rx_ring_stats * s,uint32_t csum)2307 myri10ge_rx_csum(mblk_t *mp, struct myri10ge_rx_ring_stats *s, uint32_t csum)
2308 {
2309 struct ether_header *eh;
2310 struct ip *ip;
2311 struct ip6_hdr *ip6;
2312 uint32_t start, stuff, end, partial, hdrlen;
2313
2314
2315 csum = ntohs((uint16_t)csum);
2316 eh = (struct ether_header *)(void *)mp->b_rptr;
2317 hdrlen = sizeof (*eh);
2318 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2319 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2320 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2321 s->brdcstrcv++;
2322 else
2323 s->multircv++;
2324 }
2325
2326 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
2327 /*
2328 * fix checksum by subtracting 4 bytes after what the
2329 * firmware thought was the end of the ether hdr
2330 */
2331 partial = *(uint32_t *)
2332 (void *)(mp->b_rptr + ETHERNET_HEADER_SIZE);
2333 csum += ~partial;
2334 csum += (csum < ~partial);
2335 csum = (csum >> 16) + (csum & 0xFFFF);
2336 csum = (csum >> 16) + (csum & 0xFFFF);
2337 hdrlen += VLAN_TAGSZ;
2338 }
2339
2340 if (eh->ether_type == BE_16(ETHERTYPE_IP)) {
2341 ip = (struct ip *)(void *)(mp->b_rptr + hdrlen);
2342 start = ip->ip_hl << 2;
2343
2344 if (ip->ip_p == IPPROTO_TCP)
2345 stuff = start + offsetof(struct tcphdr, th_sum);
2346 else if (ip->ip_p == IPPROTO_UDP)
2347 stuff = start + offsetof(struct udphdr, uh_sum);
2348 else
2349 return;
2350 end = ntohs(ip->ip_len);
2351 } else if (eh->ether_type == BE_16(ETHERTYPE_IPV6)) {
2352 ip6 = (struct ip6_hdr *)(void *)(mp->b_rptr + hdrlen);
2353 start = sizeof (*ip6);
2354 if (ip6->ip6_nxt == IPPROTO_TCP) {
2355 stuff = start + offsetof(struct tcphdr, th_sum);
2356 } else if (ip6->ip6_nxt == IPPROTO_UDP)
2357 stuff = start + offsetof(struct udphdr, uh_sum);
2358 else
2359 return;
2360 end = start + ntohs(ip6->ip6_plen);
2361 /*
2362 * IPv6 headers do not contain a checksum, and hence
2363 * do not checksum to zero, so they don't "fall out"
2364 * of the partial checksum calculation like IPv4
2365 * headers do. We need to fix the partial checksum by
2366 * subtracting the checksum of the IPv6 header.
2367 */
2368
2369 partial = myri10ge_csum_generic((uint16_t *)ip6, sizeof (*ip6));
2370 csum += ~partial;
2371 csum += (csum < ~partial);
2372 csum = (csum >> 16) + (csum & 0xFFFF);
2373 csum = (csum >> 16) + (csum & 0xFFFF);
2374 } else {
2375 return;
2376 }
2377
2378 if (MBLKL(mp) > hdrlen + end) {
2379 /* padded frame, so hw csum may be invalid */
2380 return;
2381 }
2382
2383 mac_hcksum_set(mp, start, stuff, end, csum, HCK_PARTIALCKSUM);
2384 }
2385
2386 static mblk_t *
myri10ge_rx_done_small(struct myri10ge_slice_state * ss,uint32_t len,uint32_t csum)2387 myri10ge_rx_done_small(struct myri10ge_slice_state *ss, uint32_t len,
2388 uint32_t csum)
2389 {
2390 mblk_t *mp;
2391 myri10ge_rx_ring_t *rx;
2392 int idx;
2393
2394 rx = &ss->rx_small;
2395 idx = rx->cnt & rx->mask;
2396 ss->rx_small.cnt++;
2397
2398 /* allocate a new buffer to pass up the stack */
2399 mp = allocb(len + MXGEFW_PAD, 0);
2400 if (mp == NULL) {
2401 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_small_nobuf);
2402 goto abort;
2403 }
2404 bcopy(ss->rx_small.info[idx].ptr,
2405 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2406 mp->b_wptr += len + MXGEFW_PAD;
2407 mp->b_rptr += MXGEFW_PAD;
2408
2409 ss->rx_stats.ibytes += len;
2410 ss->rx_stats.ipackets += 1;
2411 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2412
2413 abort:
2414 if ((idx & 7) == 7) {
2415 myri10ge_submit_8rx(&rx->lanai[idx - 7],
2416 &rx->shadow[idx - 7]);
2417 }
2418
2419 return (mp);
2420 }
2421
2422
2423 static mblk_t *
myri10ge_rx_done_big(struct myri10ge_slice_state * ss,uint32_t len,uint32_t csum)2424 myri10ge_rx_done_big(struct myri10ge_slice_state *ss, uint32_t len,
2425 uint32_t csum)
2426 {
2427 struct myri10ge_jpool_stuff *jpool;
2428 struct myri10ge_jpool_entry *j;
2429 mblk_t *mp;
2430 int idx, num_owned_by_mcp;
2431
2432 jpool = &ss->jpool;
2433 idx = ss->j_rx_cnt & ss->rx_big.mask;
2434 j = ss->rx_big.info[idx].j;
2435
2436 if (j == NULL) {
2437 printf("%s: null j at idx=%d, rx_big.cnt = %d, j_rx_cnt=%d\n",
2438 ss->mgp->name, idx, ss->rx_big.cnt, ss->j_rx_cnt);
2439 return (NULL);
2440 }
2441
2442
2443 ss->rx_big.info[idx].j = NULL;
2444 ss->j_rx_cnt++;
2445
2446
2447 /*
2448 * Check to see if we are low on rx buffers.
2449 * Note that we must leave at least 8 free so there are
2450 * enough to free in a single 64-byte write.
2451 */
2452 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2453 if (num_owned_by_mcp < jpool->low_water) {
2454 mutex_enter(&jpool->mtx);
2455 myri10ge_restock_jumbos(ss);
2456 mutex_exit(&jpool->mtx);
2457 num_owned_by_mcp = ss->rx_big.cnt - ss->j_rx_cnt;
2458 /* if we are still low, then we have to copy */
2459 if (num_owned_by_mcp < 16) {
2460 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_copy);
2461 /* allocate a new buffer to pass up the stack */
2462 mp = allocb(len + MXGEFW_PAD, 0);
2463 if (mp == NULL) {
2464 goto abort;
2465 }
2466 bcopy(j->buf,
2467 (caddr_t)mp->b_wptr, len + MXGEFW_PAD);
2468 myri10ge_jfree_rtn(j);
2469 /* push buffer back to NIC */
2470 mutex_enter(&jpool->mtx);
2471 myri10ge_restock_jumbos(ss);
2472 mutex_exit(&jpool->mtx);
2473 goto set_len;
2474 }
2475 }
2476
2477 /* loan our buffer to the stack */
2478 mp = desballoc((unsigned char *)j->buf, myri10ge_mtu, 0, &j->free_func);
2479 if (mp == NULL) {
2480 goto abort;
2481 }
2482
2483 set_len:
2484 mp->b_rptr += MXGEFW_PAD;
2485 mp->b_wptr = ((unsigned char *) mp->b_rptr + len);
2486
2487 ss->rx_stats.ibytes += len;
2488 ss->rx_stats.ipackets += 1;
2489 myri10ge_rx_csum(mp, &ss->rx_stats, csum);
2490
2491 return (mp);
2492
2493 abort:
2494 myri10ge_jfree_rtn(j);
2495 MYRI10GE_ATOMIC_SLICE_STAT_INC(rx_big_nobuf);
2496 return (NULL);
2497 }
2498
2499 /*
2500 * Free all transmit buffers up until the specified index
2501 */
2502 static inline void
myri10ge_tx_done(struct myri10ge_slice_state * ss,uint32_t mcp_index)2503 myri10ge_tx_done(struct myri10ge_slice_state *ss, uint32_t mcp_index)
2504 {
2505 myri10ge_tx_ring_t *tx;
2506 struct myri10ge_tx_dma_handle_head handles;
2507 int idx;
2508 int limit = 0;
2509
2510 tx = &ss->tx;
2511 handles.head = NULL;
2512 handles.tail = NULL;
2513 while (tx->pkt_done != (int)mcp_index) {
2514 idx = tx->done & tx->mask;
2515
2516 /*
2517 * mblk & DMA handle attached only to first slot
2518 * per buffer in the packet
2519 */
2520
2521 if (tx->info[idx].m) {
2522 (void) ddi_dma_unbind_handle(tx->info[idx].handle->h);
2523 tx->info[idx].handle->next = handles.head;
2524 handles.head = tx->info[idx].handle;
2525 if (handles.tail == NULL)
2526 handles.tail = tx->info[idx].handle;
2527 freeb(tx->info[idx].m);
2528 tx->info[idx].m = 0;
2529 tx->info[idx].handle = 0;
2530 }
2531 if (tx->info[idx].ostat.opackets != 0) {
2532 tx->stats.multixmt += tx->info[idx].ostat.multixmt;
2533 tx->stats.brdcstxmt += tx->info[idx].ostat.brdcstxmt;
2534 tx->stats.obytes += tx->info[idx].ostat.obytes;
2535 tx->stats.opackets += tx->info[idx].ostat.opackets;
2536 tx->info[idx].stat.un.all = 0;
2537 tx->pkt_done++;
2538 }
2539
2540 tx->done++;
2541 /*
2542 * if we stalled the queue, wake it. But Wait until
2543 * we have at least 1/2 our slots free.
2544 */
2545 if ((tx->req - tx->done) < (tx->mask >> 1) &&
2546 tx->stall != tx->sched) {
2547 mutex_enter(&ss->tx.lock);
2548 tx->sched = tx->stall;
2549 mutex_exit(&ss->tx.lock);
2550 mac_tx_ring_update(ss->mgp->mh, tx->rh);
2551 }
2552
2553 /* limit potential for livelock */
2554 if (unlikely(++limit > 2 * tx->mask))
2555 break;
2556 }
2557 if (tx->req == tx->done && tx->stop != NULL) {
2558 /*
2559 * Nic has sent all pending requests, allow him
2560 * to stop polling this queue
2561 */
2562 mutex_enter(&tx->lock);
2563 if (tx->req == tx->done && tx->active) {
2564 *(int *)(void *)tx->stop = 1;
2565 tx->active = 0;
2566 mb();
2567 }
2568 mutex_exit(&tx->lock);
2569 }
2570 if (handles.head != NULL)
2571 myri10ge_free_tx_handles(tx, &handles);
2572 }
2573
2574 static void
myri10ge_mbl_init(struct myri10ge_mblk_list * mbl)2575 myri10ge_mbl_init(struct myri10ge_mblk_list *mbl)
2576 {
2577 mbl->head = NULL;
2578 mbl->tail = &mbl->head;
2579 mbl->cnt = 0;
2580 }
2581
2582 /*ARGSUSED*/
2583 void
myri10ge_mbl_append(struct myri10ge_slice_state * ss,struct myri10ge_mblk_list * mbl,mblk_t * mp)2584 myri10ge_mbl_append(struct myri10ge_slice_state *ss,
2585 struct myri10ge_mblk_list *mbl, mblk_t *mp)
2586 {
2587 *(mbl->tail) = mp;
2588 mbl->tail = &mp->b_next;
2589 mp->b_next = NULL;
2590 mbl->cnt++;
2591 }
2592
2593
2594 static inline void
myri10ge_clean_rx_done(struct myri10ge_slice_state * ss,struct myri10ge_mblk_list * mbl,int limit,boolean_t * stop)2595 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss,
2596 struct myri10ge_mblk_list *mbl, int limit, boolean_t *stop)
2597 {
2598 myri10ge_rx_done_t *rx_done = &ss->rx_done;
2599 struct myri10ge_priv *mgp = ss->mgp;
2600 mblk_t *mp;
2601 struct lro_entry *lro;
2602 uint16_t length;
2603 uint16_t checksum;
2604
2605
2606 while (rx_done->entry[rx_done->idx].length != 0) {
2607 if (unlikely (*stop)) {
2608 break;
2609 }
2610 length = ntohs(rx_done->entry[rx_done->idx].length);
2611 length &= (~MXGEFW_RSS_HASH_MASK);
2612
2613 /* limit potential for livelock */
2614 limit -= length;
2615 if (unlikely(limit < 0))
2616 break;
2617
2618 rx_done->entry[rx_done->idx].length = 0;
2619 checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
2620 if (length <= myri10ge_small_bytes)
2621 mp = myri10ge_rx_done_small(ss, length, checksum);
2622 else
2623 mp = myri10ge_rx_done_big(ss, length, checksum);
2624 if (mp != NULL) {
2625 if (!myri10ge_lro ||
2626 0 != myri10ge_lro_rx(ss, mp, checksum, mbl))
2627 myri10ge_mbl_append(ss, mbl, mp);
2628 }
2629 rx_done->cnt++;
2630 rx_done->idx = rx_done->cnt & (mgp->max_intr_slots - 1);
2631 }
2632 while (ss->lro_active != NULL) {
2633 lro = ss->lro_active;
2634 ss->lro_active = lro->next;
2635 myri10ge_lro_flush(ss, lro, mbl);
2636 }
2637 }
2638
2639 static void
myri10ge_intr_rx(struct myri10ge_slice_state * ss)2640 myri10ge_intr_rx(struct myri10ge_slice_state *ss)
2641 {
2642 uint64_t gen;
2643 struct myri10ge_mblk_list mbl;
2644
2645 myri10ge_mbl_init(&mbl);
2646 if (mutex_tryenter(&ss->rx_lock) == 0)
2647 return;
2648 gen = ss->rx_gen_num;
2649 myri10ge_clean_rx_done(ss, &mbl, MYRI10GE_POLL_NULL,
2650 &ss->rx_polling);
2651 if (mbl.head != NULL)
2652 mac_rx_ring(ss->mgp->mh, ss->rx_rh, mbl.head, gen);
2653 mutex_exit(&ss->rx_lock);
2654
2655 }
2656
2657 static mblk_t *
myri10ge_poll_rx(void * arg,int bytes)2658 myri10ge_poll_rx(void *arg, int bytes)
2659 {
2660 struct myri10ge_slice_state *ss = arg;
2661 struct myri10ge_mblk_list mbl;
2662 boolean_t dummy = B_FALSE;
2663
2664 if (bytes == 0)
2665 return (NULL);
2666
2667 myri10ge_mbl_init(&mbl);
2668 mutex_enter(&ss->rx_lock);
2669 if (ss->rx_polling)
2670 myri10ge_clean_rx_done(ss, &mbl, bytes, &dummy);
2671 else
2672 printf("%d: poll_rx: token=%d, polling=%d\n", (int)(ss -
2673 ss->mgp->ss), ss->rx_token, ss->rx_polling);
2674 mutex_exit(&ss->rx_lock);
2675 return (mbl.head);
2676 }
2677
2678 /*ARGSUSED*/
2679 static uint_t
myri10ge_intr(caddr_t arg0,caddr_t arg1)2680 myri10ge_intr(caddr_t arg0, caddr_t arg1)
2681 {
2682 struct myri10ge_slice_state *ss =
2683 (struct myri10ge_slice_state *)(void *)arg0;
2684 struct myri10ge_priv *mgp = ss->mgp;
2685 mcp_irq_data_t *stats = ss->fw_stats;
2686 myri10ge_tx_ring_t *tx = &ss->tx;
2687 uint32_t send_done_count;
2688 uint8_t valid;
2689
2690
2691 /* make sure the DMA has finished */
2692 if (!stats->valid) {
2693 return (DDI_INTR_UNCLAIMED);
2694 }
2695 valid = stats->valid;
2696
2697 /* low bit indicates receives are present */
2698 if (valid & 1)
2699 myri10ge_intr_rx(ss);
2700
2701 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
2702 /* lower legacy IRQ */
2703 *mgp->irq_deassert = 0;
2704 if (!myri10ge_deassert_wait)
2705 /* don't wait for conf. that irq is low */
2706 stats->valid = 0;
2707 mb();
2708 } else {
2709 /* no need to wait for conf. that irq is low */
2710 stats->valid = 0;
2711 }
2712
2713 do {
2714 /* check for transmit completes and receives */
2715 send_done_count = ntohl(stats->send_done_count);
2716 if (send_done_count != tx->pkt_done)
2717 myri10ge_tx_done(ss, (int)send_done_count);
2718 } while (*((volatile uint8_t *) &stats->valid));
2719
2720 if (stats->stats_updated) {
2721 if (mgp->link_state != stats->link_up || stats->link_down) {
2722 mgp->link_state = stats->link_up;
2723 if (stats->link_down) {
2724 mgp->down_cnt += stats->link_down;
2725 mgp->link_state = 0;
2726 }
2727 if (mgp->link_state) {
2728 if (myri10ge_verbose)
2729 printf("%s: link up\n", mgp->name);
2730 mac_link_update(mgp->mh, LINK_STATE_UP);
2731 } else {
2732 if (myri10ge_verbose)
2733 printf("%s: link down\n", mgp->name);
2734 mac_link_update(mgp->mh, LINK_STATE_DOWN);
2735 }
2736 MYRI10GE_NIC_STAT_INC(link_changes);
2737 }
2738 if (mgp->rdma_tags_available !=
2739 ntohl(ss->fw_stats->rdma_tags_available)) {
2740 mgp->rdma_tags_available =
2741 ntohl(ss->fw_stats->rdma_tags_available);
2742 cmn_err(CE_NOTE, "%s: RDMA timed out! "
2743 "%d tags left\n", mgp->name,
2744 mgp->rdma_tags_available);
2745 }
2746 }
2747
2748 mb();
2749 /* check to see if we have rx token to pass back */
2750 if (valid & 0x1) {
2751 mutex_enter(&ss->poll_lock);
2752 if (ss->rx_polling) {
2753 ss->rx_token = 1;
2754 } else {
2755 *ss->irq_claim = BE_32(3);
2756 ss->rx_token = 0;
2757 }
2758 mutex_exit(&ss->poll_lock);
2759 }
2760 *(ss->irq_claim + 1) = BE_32(3);
2761 return (DDI_INTR_CLAIMED);
2762 }
2763
2764 /*
2765 * Add or remove a multicast address. This is called with our
2766 * macinfo's lock held by GLD, so we do not need to worry about
2767 * our own locking here.
2768 */
2769 static int
myri10ge_m_multicst(void * arg,boolean_t add,const uint8_t * multicastaddr)2770 myri10ge_m_multicst(void *arg, boolean_t add, const uint8_t *multicastaddr)
2771 {
2772 myri10ge_cmd_t cmd;
2773 struct myri10ge_priv *mgp = arg;
2774 int status, join_leave;
2775
2776 if (add)
2777 join_leave = MXGEFW_JOIN_MULTICAST_GROUP;
2778 else
2779 join_leave = MXGEFW_LEAVE_MULTICAST_GROUP;
2780 (void) memcpy(&cmd.data0, multicastaddr, 4);
2781 (void) memcpy(&cmd.data1, multicastaddr + 4, 2);
2782 cmd.data0 = htonl(cmd.data0);
2783 cmd.data1 = htonl(cmd.data1);
2784 status = myri10ge_send_cmd(mgp, join_leave, &cmd);
2785 if (status == 0)
2786 return (0);
2787
2788 cmn_err(CE_WARN, "%s: failed to set multicast address\n",
2789 mgp->name);
2790 return (status);
2791 }
2792
2793
2794 static int
myri10ge_m_promisc(void * arg,boolean_t on)2795 myri10ge_m_promisc(void *arg, boolean_t on)
2796 {
2797 struct myri10ge_priv *mgp = arg;
2798
2799 myri10ge_change_promisc(mgp, on);
2800 return (0);
2801 }
2802
2803 /*
2804 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2805 * backwards one at a time and handle ring wraps
2806 */
2807
2808 static inline void
myri10ge_submit_req_backwards(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)2809 myri10ge_submit_req_backwards(myri10ge_tx_ring_t *tx,
2810 mcp_kreq_ether_send_t *src, int cnt)
2811 {
2812 int idx, starting_slot;
2813 starting_slot = tx->req;
2814 while (cnt > 1) {
2815 cnt--;
2816 idx = (starting_slot + cnt) & tx->mask;
2817 myri10ge_pio_copy(&tx->lanai[idx],
2818 &src[cnt], sizeof (*src));
2819 mb();
2820 }
2821 }
2822
2823 /*
2824 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
2825 * at most 32 bytes at a time, so as to avoid involving the software
2826 * pio handler in the nic. We re-write the first segment's flags
2827 * to mark them valid only after writing the entire chain
2828 */
2829
2830 static inline void
myri10ge_submit_req(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)2831 myri10ge_submit_req(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
2832 int cnt)
2833 {
2834 int idx, i;
2835 uint32_t *src_ints, *dst_ints;
2836 mcp_kreq_ether_send_t *srcp, *dstp, *dst;
2837 uint8_t last_flags;
2838
2839 idx = tx->req & tx->mask;
2840
2841 last_flags = src->flags;
2842 src->flags = 0;
2843 mb();
2844 dst = dstp = &tx->lanai[idx];
2845 srcp = src;
2846
2847 if ((idx + cnt) < tx->mask) {
2848 for (i = 0; i < (cnt - 1); i += 2) {
2849 myri10ge_pio_copy(dstp, srcp, 2 * sizeof (*src));
2850 mb(); /* force write every 32 bytes */
2851 srcp += 2;
2852 dstp += 2;
2853 }
2854 } else {
2855 /*
2856 * submit all but the first request, and ensure
2857 * that it is submitted below
2858 */
2859 myri10ge_submit_req_backwards(tx, src, cnt);
2860 i = 0;
2861 }
2862 if (i < cnt) {
2863 /* submit the first request */
2864 myri10ge_pio_copy(dstp, srcp, sizeof (*src));
2865 mb(); /* barrier before setting valid flag */
2866 }
2867
2868 /* re-write the last 32-bits with the valid flags */
2869 src->flags |= last_flags;
2870 src_ints = (uint32_t *)src;
2871 src_ints += 3;
2872 dst_ints = (uint32_t *)dst;
2873 dst_ints += 3;
2874 *dst_ints = *src_ints;
2875 tx->req += cnt;
2876 mb();
2877 /* notify NIC to poll this tx ring */
2878 if (!tx->active && tx->go != NULL) {
2879 *(int *)(void *)tx->go = 1;
2880 tx->active = 1;
2881 tx->activate++;
2882 mb();
2883 }
2884 }
2885
2886 /* ARGSUSED */
2887 static inline void
myri10ge_lso_info_get(mblk_t * mp,uint32_t * mss,uint32_t * flags)2888 myri10ge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
2889 {
2890 uint32_t lso_flag;
2891 mac_lso_get(mp, mss, &lso_flag);
2892 (*flags) |= lso_flag;
2893 }
2894
2895
2896 /* like pullupmsg, except preserve hcksum/LSO attributes */
2897 static int
myri10ge_pullup(struct myri10ge_slice_state * ss,mblk_t * mp)2898 myri10ge_pullup(struct myri10ge_slice_state *ss, mblk_t *mp)
2899 {
2900 uint32_t start, stuff, tx_offload_flags, mss;
2901 int ok;
2902
2903 mss = 0;
2904 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
2905 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
2906
2907 ok = pullupmsg(mp, -1);
2908 if (!ok) {
2909 printf("pullupmsg failed");
2910 return (DDI_FAILURE);
2911 }
2912 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_pullup);
2913 mac_hcksum_set(mp, start, stuff, NULL, NULL, tx_offload_flags);
2914 if (tx_offload_flags & HW_LSO)
2915 DB_LSOMSS(mp) = (uint16_t)mss;
2916 lso_info_set(mp, mss, tx_offload_flags);
2917 return (DDI_SUCCESS);
2918 }
2919
2920 static inline void
myri10ge_tx_stat(struct myri10ge_tx_pkt_stats * s,struct ether_header * eh,int opackets,int obytes)2921 myri10ge_tx_stat(struct myri10ge_tx_pkt_stats *s, struct ether_header *eh,
2922 int opackets, int obytes)
2923 {
2924 s->un.all = 0;
2925 if (eh->ether_dhost.ether_addr_octet[0] & 1) {
2926 if (0 == (bcmp(eh->ether_dhost.ether_addr_octet,
2927 myri10ge_broadcastaddr, sizeof (eh->ether_dhost))))
2928 s->un.s.brdcstxmt = 1;
2929 else
2930 s->un.s.multixmt = 1;
2931 }
2932 s->un.s.opackets = (uint16_t)opackets;
2933 s->un.s.obytes = obytes;
2934 }
2935
2936 static int
myri10ge_tx_copy(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req)2937 myri10ge_tx_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
2938 mcp_kreq_ether_send_t *req)
2939 {
2940 myri10ge_tx_ring_t *tx = &ss->tx;
2941 caddr_t ptr;
2942 struct myri10ge_tx_copybuf *cp;
2943 mblk_t *bp;
2944 int idx, mblen, avail;
2945 uint16_t len;
2946
2947 mutex_enter(&tx->lock);
2948 avail = tx->mask - (tx->req - tx->done);
2949 if (avail <= 1) {
2950 mutex_exit(&tx->lock);
2951 return (EBUSY);
2952 }
2953 idx = tx->req & tx->mask;
2954 cp = &tx->cp[idx];
2955 ptr = cp->va;
2956 for (len = 0, bp = mp; bp != NULL; bp = bp->b_cont) {
2957 mblen = MBLKL(bp);
2958 bcopy(bp->b_rptr, ptr, mblen);
2959 ptr += mblen;
2960 len += mblen;
2961 }
2962 /* ensure runts are padded to 60 bytes */
2963 if (len < 60) {
2964 bzero(ptr, 64 - len);
2965 len = 60;
2966 }
2967 req->addr_low = cp->dma.low;
2968 req->addr_high = cp->dma.high;
2969 req->length = htons(len);
2970 req->pad = 0;
2971 req->rdma_count = 1;
2972 myri10ge_tx_stat(&tx->info[idx].stat,
2973 (struct ether_header *)(void *)cp->va, 1, len);
2974 (void) ddi_dma_sync(cp->dma.handle, 0, len, DDI_DMA_SYNC_FORDEV);
2975 myri10ge_submit_req(&ss->tx, req, 1);
2976 mutex_exit(&tx->lock);
2977 freemsg(mp);
2978 return (DDI_SUCCESS);
2979 }
2980
2981
2982 static void
myri10ge_send_locked(myri10ge_tx_ring_t * tx,mcp_kreq_ether_send_t * req_list,struct myri10ge_tx_buffer_state * tx_info,int count)2983 myri10ge_send_locked(myri10ge_tx_ring_t *tx, mcp_kreq_ether_send_t *req_list,
2984 struct myri10ge_tx_buffer_state *tx_info,
2985 int count)
2986 {
2987 int i, idx;
2988
2989 idx = 0; /* gcc -Wuninitialized */
2990 /* store unmapping and bp info for tx irq handler */
2991 for (i = 0; i < count; i++) {
2992 idx = (tx->req + i) & tx->mask;
2993 tx->info[idx].m = tx_info[i].m;
2994 tx->info[idx].handle = tx_info[i].handle;
2995 }
2996 tx->info[idx].stat.un.all = tx_info[0].stat.un.all;
2997
2998 /* submit the frame to the nic */
2999 myri10ge_submit_req(tx, req_list, count);
3000
3001
3002 }
3003
3004
3005
3006 static void
myri10ge_copydata(mblk_t * mp,int off,int len,caddr_t buf)3007 myri10ge_copydata(mblk_t *mp, int off, int len, caddr_t buf)
3008 {
3009 mblk_t *bp;
3010 int seglen;
3011 uint_t count;
3012
3013 bp = mp;
3014
3015 while (off > 0) {
3016 seglen = MBLKL(bp);
3017 if (off < seglen)
3018 break;
3019 off -= seglen;
3020 bp = bp->b_cont;
3021 }
3022 while (len > 0) {
3023 seglen = MBLKL(bp);
3024 count = min(seglen - off, len);
3025 bcopy(bp->b_rptr + off, buf, count);
3026 len -= count;
3027 buf += count;
3028 off = 0;
3029 bp = bp->b_cont;
3030 }
3031 }
3032
3033 static int
myri10ge_ether_parse_header(mblk_t * mp)3034 myri10ge_ether_parse_header(mblk_t *mp)
3035 {
3036 struct ether_header eh_copy;
3037 struct ether_header *eh;
3038 int eth_hdr_len, seglen;
3039
3040 seglen = MBLKL(mp);
3041 eth_hdr_len = sizeof (*eh);
3042 if (seglen < eth_hdr_len) {
3043 myri10ge_copydata(mp, 0, eth_hdr_len, (caddr_t)&eh_copy);
3044 eh = &eh_copy;
3045 } else {
3046 eh = (struct ether_header *)(void *)mp->b_rptr;
3047 }
3048 if (eh->ether_type == BE_16(ETHERTYPE_VLAN)) {
3049 eth_hdr_len += 4;
3050 }
3051
3052 return (eth_hdr_len);
3053 }
3054
3055 static int
myri10ge_lso_parse_header(mblk_t * mp,int off)3056 myri10ge_lso_parse_header(mblk_t *mp, int off)
3057 {
3058 char buf[128];
3059 int seglen, sum_off;
3060 struct ip *ip;
3061 struct tcphdr *tcp;
3062
3063 seglen = MBLKL(mp);
3064 if (seglen < off + sizeof (*ip)) {
3065 myri10ge_copydata(mp, off, sizeof (*ip), buf);
3066 ip = (struct ip *)(void *)buf;
3067 } else {
3068 ip = (struct ip *)(void *)(mp->b_rptr + off);
3069 }
3070 if (seglen < off + (ip->ip_hl << 2) + sizeof (*tcp)) {
3071 myri10ge_copydata(mp, off,
3072 (ip->ip_hl << 2) + sizeof (*tcp), buf);
3073 ip = (struct ip *)(void *)buf;
3074 }
3075 tcp = (struct tcphdr *)(void *)((char *)ip + (ip->ip_hl << 2));
3076
3077 /*
3078 * NIC expects ip_sum to be zero. Recent changes to
3079 * OpenSolaris leave the correct ip checksum there, rather
3080 * than the required zero, so we need to zero it. Otherwise,
3081 * the NIC will produce bad checksums when sending LSO packets.
3082 */
3083 if (ip->ip_sum != 0) {
3084 if (((char *)ip) != buf) {
3085 /* ip points into mblk, so just zero it */
3086 ip->ip_sum = 0;
3087 } else {
3088 /*
3089 * ip points into a copy, so walk the chain
3090 * to find the ip_csum, then zero it
3091 */
3092 sum_off = off + _PTRDIFF(&ip->ip_sum, buf);
3093 while (sum_off > (int)(MBLKL(mp) - 1)) {
3094 sum_off -= MBLKL(mp);
3095 mp = mp->b_cont;
3096 }
3097 mp->b_rptr[sum_off] = 0;
3098 sum_off++;
3099 while (sum_off > MBLKL(mp) - 1) {
3100 sum_off -= MBLKL(mp);
3101 mp = mp->b_cont;
3102 }
3103 mp->b_rptr[sum_off] = 0;
3104 }
3105 }
3106 return (off + ((ip->ip_hl + tcp->th_off) << 2));
3107 }
3108
3109 static int
myri10ge_tx_tso_copy(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req_list,int hdr_size,int pkt_size,uint16_t mss,uint8_t cksum_offset)3110 myri10ge_tx_tso_copy(struct myri10ge_slice_state *ss, mblk_t *mp,
3111 mcp_kreq_ether_send_t *req_list, int hdr_size, int pkt_size,
3112 uint16_t mss, uint8_t cksum_offset)
3113 {
3114 myri10ge_tx_ring_t *tx = &ss->tx;
3115 struct myri10ge_priv *mgp = ss->mgp;
3116 mblk_t *bp;
3117 mcp_kreq_ether_send_t *req;
3118 struct myri10ge_tx_copybuf *cp;
3119 caddr_t rptr, ptr;
3120 int mblen, count, cum_len, mss_resid, tx_req, pkt_size_tmp;
3121 int resid, avail, idx, hdr_size_tmp, tx_boundary;
3122 int rdma_count;
3123 uint32_t seglen, len, boundary, low, high_swapped;
3124 uint16_t pseudo_hdr_offset = htons(mss);
3125 uint8_t flags;
3126
3127 tx_boundary = mgp->tx_boundary;
3128 hdr_size_tmp = hdr_size;
3129 resid = tx_boundary;
3130 count = 1;
3131 mutex_enter(&tx->lock);
3132
3133 /* check to see if the slots are really there */
3134 avail = tx->mask - (tx->req - tx->done);
3135 if (unlikely(avail <= MYRI10GE_MAX_SEND_DESC_TSO)) {
3136 atomic_add_32(&tx->stall, 1);
3137 mutex_exit(&tx->lock);
3138 return (EBUSY);
3139 }
3140
3141 /* copy */
3142 cum_len = -hdr_size;
3143 count = 0;
3144 req = req_list;
3145 idx = tx->mask & tx->req;
3146 cp = &tx->cp[idx];
3147 low = ntohl(cp->dma.low);
3148 ptr = cp->va;
3149 cp->len = 0;
3150 if (mss) {
3151 int payload = pkt_size - hdr_size;
3152 uint16_t opackets = (payload / mss) + ((payload % mss) != 0);
3153 tx->info[idx].ostat.opackets = opackets;
3154 tx->info[idx].ostat.obytes = (opackets - 1) * hdr_size
3155 + pkt_size;
3156 }
3157 hdr_size_tmp = hdr_size;
3158 mss_resid = mss;
3159 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3160 tx_req = tx->req;
3161 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3162 mblen = MBLKL(bp);
3163 rptr = (caddr_t)bp->b_rptr;
3164 len = min(hdr_size_tmp, mblen);
3165 if (len) {
3166 bcopy(rptr, ptr, len);
3167 rptr += len;
3168 ptr += len;
3169 resid -= len;
3170 mblen -= len;
3171 hdr_size_tmp -= len;
3172 cp->len += len;
3173 if (hdr_size_tmp)
3174 continue;
3175 if (resid < mss) {
3176 tx_req++;
3177 idx = tx->mask & tx_req;
3178 cp = &tx->cp[idx];
3179 low = ntohl(cp->dma.low);
3180 ptr = cp->va;
3181 resid = tx_boundary;
3182 }
3183 }
3184 while (mblen) {
3185 len = min(mss_resid, mblen);
3186 bcopy(rptr, ptr, len);
3187 mss_resid -= len;
3188 resid -= len;
3189 mblen -= len;
3190 rptr += len;
3191 ptr += len;
3192 cp->len += len;
3193 if (mss_resid == 0) {
3194 mss_resid = mss;
3195 if (resid < mss) {
3196 tx_req++;
3197 idx = tx->mask & tx_req;
3198 cp = &tx->cp[idx];
3199 cp->len = 0;
3200 low = ntohl(cp->dma.low);
3201 ptr = cp->va;
3202 resid = tx_boundary;
3203 }
3204 }
3205 }
3206 }
3207
3208 req = req_list;
3209 pkt_size_tmp = pkt_size;
3210 count = 0;
3211 rdma_count = 0;
3212 tx_req = tx->req;
3213 while (pkt_size_tmp) {
3214 idx = tx->mask & tx_req;
3215 cp = &tx->cp[idx];
3216 high_swapped = cp->dma.high;
3217 low = ntohl(cp->dma.low);
3218 len = cp->len;
3219 if (len == 0) {
3220 printf("len=0! pkt_size_tmp=%d, pkt_size=%d\n",
3221 pkt_size_tmp, pkt_size);
3222 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3223 mblen = MBLKL(bp);
3224 printf("mblen:%d\n", mblen);
3225 }
3226 pkt_size_tmp = pkt_size;
3227 tx_req = tx->req;
3228 while (pkt_size_tmp > 0) {
3229 idx = tx->mask & tx_req;
3230 cp = &tx->cp[idx];
3231 printf("cp->len = %d\n", cp->len);
3232 pkt_size_tmp -= cp->len;
3233 tx_req++;
3234 }
3235 printf("dropped\n");
3236 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3237 goto done;
3238 }
3239 pkt_size_tmp -= len;
3240 while (len) {
3241 while (len) {
3242 uint8_t flags_next;
3243 int cum_len_next;
3244
3245 boundary = (low + mgp->tx_boundary) &
3246 ~(mgp->tx_boundary - 1);
3247 seglen = boundary - low;
3248 if (seglen > len)
3249 seglen = len;
3250
3251 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3252 cum_len_next = cum_len + seglen;
3253 (req-rdma_count)->rdma_count = rdma_count + 1;
3254 if (likely(cum_len >= 0)) {
3255 /* payload */
3256 int next_is_first, chop;
3257
3258 chop = (cum_len_next > mss);
3259 cum_len_next = cum_len_next % mss;
3260 next_is_first = (cum_len_next == 0);
3261 flags |= chop *
3262 MXGEFW_FLAGS_TSO_CHOP;
3263 flags_next |= next_is_first *
3264 MXGEFW_FLAGS_FIRST;
3265 rdma_count |= -(chop | next_is_first);
3266 rdma_count += chop & !next_is_first;
3267 } else if (likely(cum_len_next >= 0)) {
3268 /* header ends */
3269 int small;
3270
3271 rdma_count = -1;
3272 cum_len_next = 0;
3273 seglen = -cum_len;
3274 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
3275 flags_next = MXGEFW_FLAGS_TSO_PLD |
3276 MXGEFW_FLAGS_FIRST |
3277 (small * MXGEFW_FLAGS_SMALL);
3278 }
3279 req->addr_high = high_swapped;
3280 req->addr_low = htonl(low);
3281 req->pseudo_hdr_offset = pseudo_hdr_offset;
3282 req->pad = 0; /* complete solid 16-byte block */
3283 req->rdma_count = 1;
3284 req->cksum_offset = cksum_offset;
3285 req->length = htons(seglen);
3286 req->flags = flags | ((cum_len & 1) *
3287 MXGEFW_FLAGS_ALIGN_ODD);
3288 if (cksum_offset > seglen)
3289 cksum_offset -= seglen;
3290 else
3291 cksum_offset = 0;
3292 low += seglen;
3293 len -= seglen;
3294 cum_len = cum_len_next;
3295 req++;
3296 req->flags = 0;
3297 flags = flags_next;
3298 count++;
3299 rdma_count++;
3300 }
3301 }
3302 tx_req++;
3303 }
3304 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3305 do {
3306 req--;
3307 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3308 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3309 MXGEFW_FLAGS_FIRST)));
3310
3311 myri10ge_submit_req(tx, req_list, count);
3312 done:
3313 mutex_exit(&tx->lock);
3314 freemsg(mp);
3315 return (DDI_SUCCESS);
3316 }
3317
3318 /*
3319 * Try to send the chain of buffers described by the mp. We must not
3320 * encapsulate more than eth->tx.req - eth->tx.done, or
3321 * MXGEFW_MAX_SEND_DESC, whichever is more.
3322 */
3323
3324 static int
myri10ge_send(struct myri10ge_slice_state * ss,mblk_t * mp,mcp_kreq_ether_send_t * req_list,struct myri10ge_tx_buffer_state * tx_info)3325 myri10ge_send(struct myri10ge_slice_state *ss, mblk_t *mp,
3326 mcp_kreq_ether_send_t *req_list, struct myri10ge_tx_buffer_state *tx_info)
3327 {
3328 struct myri10ge_priv *mgp = ss->mgp;
3329 myri10ge_tx_ring_t *tx = &ss->tx;
3330 mcp_kreq_ether_send_t *req;
3331 struct myri10ge_tx_dma_handle *handles, *dma_handle = NULL;
3332 mblk_t *bp;
3333 ddi_dma_cookie_t cookie;
3334 int err, rv, count, avail, mblen, try_pullup, i, max_segs, maclen,
3335 rdma_count, cum_len, lso_hdr_size;
3336 uint32_t start, stuff, tx_offload_flags;
3337 uint32_t seglen, len, mss, boundary, low, high_swapped;
3338 uint_t ncookies;
3339 uint16_t pseudo_hdr_offset;
3340 uint8_t flags, cksum_offset, odd_flag;
3341 int pkt_size;
3342 int lso_copy = myri10ge_lso_copy;
3343 try_pullup = 1;
3344
3345 again:
3346 /* Setup checksum offloading, if needed */
3347 mac_hcksum_get(mp, &start, &stuff, NULL, NULL, &tx_offload_flags);
3348 myri10ge_lso_info_get(mp, &mss, &tx_offload_flags);
3349 if (tx_offload_flags & HW_LSO) {
3350 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3351 if ((tx_offload_flags & HCK_PARTIALCKSUM) == 0) {
3352 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_lsobadflags);
3353 freemsg(mp);
3354 return (DDI_SUCCESS);
3355 }
3356 } else {
3357 max_segs = MXGEFW_MAX_SEND_DESC;
3358 mss = 0;
3359 }
3360 req = req_list;
3361 cksum_offset = 0;
3362 pseudo_hdr_offset = 0;
3363
3364 /* leave an extra slot keep the ring from wrapping */
3365 avail = tx->mask - (tx->req - tx->done);
3366
3367 /*
3368 * If we have > MXGEFW_MAX_SEND_DESC, then any over-length
3369 * message will need to be pulled up in order to fit.
3370 * Otherwise, we are low on transmit descriptors, it is
3371 * probably better to stall and try again rather than pullup a
3372 * message to fit.
3373 */
3374
3375 if (avail < max_segs) {
3376 err = EBUSY;
3377 atomic_add_32(&tx->stall_early, 1);
3378 goto stall;
3379 }
3380
3381 /* find out how long the frame is and how many segments it is */
3382 count = 0;
3383 odd_flag = 0;
3384 pkt_size = 0;
3385 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
3386 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3387 dblk_t *dbp;
3388 mblen = MBLKL(bp);
3389 if (mblen == 0) {
3390 /*
3391 * we can't simply skip over 0-length mblks
3392 * because the hardware can't deal with them,
3393 * and we could leak them.
3394 */
3395 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_zero_len);
3396 err = EIO;
3397 goto pullup;
3398 }
3399 /*
3400 * There's no advantage to copying most gesballoc
3401 * attached blocks, so disable lso copy in that case
3402 */
3403 if (mss && lso_copy == 1 && ((dbp = bp->b_datap) != NULL)) {
3404 if ((void *)dbp->db_lastfree != myri10ge_db_lastfree) {
3405 lso_copy = 0;
3406 }
3407 }
3408 pkt_size += mblen;
3409 count++;
3410 }
3411
3412 /* Try to pull up excessivly long chains */
3413 if (count >= max_segs) {
3414 err = myri10ge_pullup(ss, mp);
3415 if (likely(err == DDI_SUCCESS)) {
3416 count = 1;
3417 } else {
3418 if (count < MYRI10GE_MAX_SEND_DESC_TSO) {
3419 /*
3420 * just let the h/w send it, it will be
3421 * inefficient, but us better than dropping
3422 */
3423 max_segs = MYRI10GE_MAX_SEND_DESC_TSO;
3424 } else {
3425 /* drop it */
3426 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3427 freemsg(mp);
3428 return (0);
3429 }
3430 }
3431 }
3432
3433 cum_len = 0;
3434 maclen = myri10ge_ether_parse_header(mp);
3435
3436 if (tx_offload_flags & HCK_PARTIALCKSUM) {
3437
3438 cksum_offset = start + maclen;
3439 pseudo_hdr_offset = htons(stuff + maclen);
3440 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
3441 flags |= MXGEFW_FLAGS_CKSUM;
3442 }
3443
3444 lso_hdr_size = 0; /* -Wunitinialized */
3445 if (mss) { /* LSO */
3446 /* this removes any CKSUM flag from before */
3447 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST);
3448 /*
3449 * parse the headers and set cum_len to a negative
3450 * value to reflect the offset of the TCP payload
3451 */
3452 lso_hdr_size = myri10ge_lso_parse_header(mp, maclen);
3453 cum_len = -lso_hdr_size;
3454 if ((mss < mgp->tx_boundary) && lso_copy) {
3455 err = myri10ge_tx_tso_copy(ss, mp, req_list,
3456 lso_hdr_size, pkt_size, mss, cksum_offset);
3457 return (err);
3458 }
3459
3460 /*
3461 * for TSO, pseudo_hdr_offset holds mss. The firmware
3462 * figures out where to put the checksum by parsing
3463 * the header.
3464 */
3465
3466 pseudo_hdr_offset = htons(mss);
3467 } else if (pkt_size <= MXGEFW_SEND_SMALL_SIZE) {
3468 flags |= MXGEFW_FLAGS_SMALL;
3469 if (pkt_size < myri10ge_tx_copylen) {
3470 req->cksum_offset = cksum_offset;
3471 req->pseudo_hdr_offset = pseudo_hdr_offset;
3472 req->flags = flags;
3473 err = myri10ge_tx_copy(ss, mp, req);
3474 return (err);
3475 }
3476 cum_len = 0;
3477 }
3478
3479 /* pull one DMA handle for each bp from our freelist */
3480 handles = NULL;
3481 err = myri10ge_alloc_tx_handles(ss, count, &handles);
3482 if (err != DDI_SUCCESS) {
3483 err = DDI_FAILURE;
3484 goto stall;
3485 }
3486 count = 0;
3487 rdma_count = 0;
3488 for (bp = mp; bp != NULL; bp = bp->b_cont) {
3489 mblen = MBLKL(bp);
3490 dma_handle = handles;
3491 handles = handles->next;
3492
3493 rv = ddi_dma_addr_bind_handle(dma_handle->h, NULL,
3494 (caddr_t)bp->b_rptr, mblen,
3495 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
3496 &cookie, &ncookies);
3497 if (unlikely(rv != DDI_DMA_MAPPED)) {
3498 err = EIO;
3499 try_pullup = 0;
3500 dma_handle->next = handles;
3501 handles = dma_handle;
3502 goto abort_with_handles;
3503 }
3504
3505 /* reserve the slot */
3506 tx_info[count].m = bp;
3507 tx_info[count].handle = dma_handle;
3508
3509 for (; ; ) {
3510 low = MYRI10GE_LOWPART_TO_U32(cookie.dmac_laddress);
3511 high_swapped =
3512 htonl(MYRI10GE_HIGHPART_TO_U32(
3513 cookie.dmac_laddress));
3514 len = (uint32_t)cookie.dmac_size;
3515 while (len) {
3516 uint8_t flags_next;
3517 int cum_len_next;
3518
3519 boundary = (low + mgp->tx_boundary) &
3520 ~(mgp->tx_boundary - 1);
3521 seglen = boundary - low;
3522 if (seglen > len)
3523 seglen = len;
3524
3525 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
3526 cum_len_next = cum_len + seglen;
3527 if (mss) {
3528 (req-rdma_count)->rdma_count =
3529 rdma_count + 1;
3530 if (likely(cum_len >= 0)) {
3531 /* payload */
3532 int next_is_first, chop;
3533
3534 chop = (cum_len_next > mss);
3535 cum_len_next =
3536 cum_len_next % mss;
3537 next_is_first =
3538 (cum_len_next == 0);
3539 flags |= chop *
3540 MXGEFW_FLAGS_TSO_CHOP;
3541 flags_next |= next_is_first *
3542 MXGEFW_FLAGS_FIRST;
3543 rdma_count |=
3544 -(chop | next_is_first);
3545 rdma_count +=
3546 chop & !next_is_first;
3547 } else if (likely(cum_len_next >= 0)) {
3548 /* header ends */
3549 int small;
3550
3551 rdma_count = -1;
3552 cum_len_next = 0;
3553 seglen = -cum_len;
3554 small = (mss <=
3555 MXGEFW_SEND_SMALL_SIZE);
3556 flags_next =
3557 MXGEFW_FLAGS_TSO_PLD
3558 | MXGEFW_FLAGS_FIRST
3559 | (small *
3560 MXGEFW_FLAGS_SMALL);
3561 }
3562 }
3563 req->addr_high = high_swapped;
3564 req->addr_low = htonl(low);
3565 req->pseudo_hdr_offset = pseudo_hdr_offset;
3566 req->pad = 0; /* complete solid 16-byte block */
3567 req->rdma_count = 1;
3568 req->cksum_offset = cksum_offset;
3569 req->length = htons(seglen);
3570 req->flags = flags | ((cum_len & 1) * odd_flag);
3571 if (cksum_offset > seglen)
3572 cksum_offset -= seglen;
3573 else
3574 cksum_offset = 0;
3575 low += seglen;
3576 len -= seglen;
3577 cum_len = cum_len_next;
3578 count++;
3579 rdma_count++;
3580 /* make sure all the segments will fit */
3581 if (unlikely(count >= max_segs)) {
3582 MYRI10GE_ATOMIC_SLICE_STAT_INC(
3583 xmit_lowbuf);
3584 /* may try a pullup */
3585 err = EBUSY;
3586 if (try_pullup)
3587 try_pullup = 2;
3588 goto abort_with_handles;
3589 }
3590 req++;
3591 req->flags = 0;
3592 flags = flags_next;
3593 tx_info[count].m = 0;
3594 }
3595 ncookies--;
3596 if (ncookies == 0)
3597 break;
3598 ddi_dma_nextcookie(dma_handle->h, &cookie);
3599 }
3600 }
3601 (req-rdma_count)->rdma_count = (uint8_t)rdma_count;
3602
3603 if (mss) {
3604 do {
3605 req--;
3606 req->flags |= MXGEFW_FLAGS_TSO_LAST;
3607 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP |
3608 MXGEFW_FLAGS_FIRST)));
3609 }
3610
3611 /* calculate tx stats */
3612 if (mss) {
3613 uint16_t opackets;
3614 int payload;
3615
3616 payload = pkt_size - lso_hdr_size;
3617 opackets = (payload / mss) + ((payload % mss) != 0);
3618 tx_info[0].stat.un.all = 0;
3619 tx_info[0].ostat.opackets = opackets;
3620 tx_info[0].ostat.obytes = (opackets - 1) * lso_hdr_size
3621 + pkt_size;
3622 } else {
3623 myri10ge_tx_stat(&tx_info[0].stat,
3624 (struct ether_header *)(void *)mp->b_rptr, 1, pkt_size);
3625 }
3626 mutex_enter(&tx->lock);
3627
3628 /* check to see if the slots are really there */
3629 avail = tx->mask - (tx->req - tx->done);
3630 if (unlikely(avail <= count)) {
3631 mutex_exit(&tx->lock);
3632 err = 0;
3633 goto late_stall;
3634 }
3635
3636 myri10ge_send_locked(tx, req_list, tx_info, count);
3637 mutex_exit(&tx->lock);
3638 return (DDI_SUCCESS);
3639
3640 late_stall:
3641 try_pullup = 0;
3642 atomic_add_32(&tx->stall_late, 1);
3643
3644 abort_with_handles:
3645 /* unbind and free handles from previous mblks */
3646 for (i = 0; i < count; i++) {
3647 bp = tx_info[i].m;
3648 tx_info[i].m = 0;
3649 if (bp) {
3650 dma_handle = tx_info[i].handle;
3651 (void) ddi_dma_unbind_handle(dma_handle->h);
3652 dma_handle->next = handles;
3653 handles = dma_handle;
3654 tx_info[i].handle = NULL;
3655 tx_info[i].m = NULL;
3656 }
3657 }
3658 myri10ge_free_tx_handle_slist(tx, handles);
3659 pullup:
3660 if (try_pullup) {
3661 err = myri10ge_pullup(ss, mp);
3662 if (err != DDI_SUCCESS && try_pullup == 2) {
3663 /* drop */
3664 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3665 freemsg(mp);
3666 return (0);
3667 }
3668 try_pullup = 0;
3669 goto again;
3670 }
3671
3672 stall:
3673 if (err != 0) {
3674 if (err == EBUSY) {
3675 atomic_add_32(&tx->stall, 1);
3676 } else {
3677 MYRI10GE_ATOMIC_SLICE_STAT_INC(xmit_err);
3678 }
3679 }
3680 return (err);
3681 }
3682
3683 static mblk_t *
myri10ge_send_wrapper(void * arg,mblk_t * mp)3684 myri10ge_send_wrapper(void *arg, mblk_t *mp)
3685 {
3686 struct myri10ge_slice_state *ss = arg;
3687 int err = 0;
3688 mcp_kreq_ether_send_t *req_list;
3689 #if defined(__i386)
3690 /*
3691 * We need about 2.5KB of scratch space to handle transmits.
3692 * i86pc has only 8KB of kernel stack space, so we malloc the
3693 * scratch space there rather than keeping it on the stack.
3694 */
3695 size_t req_size, tx_info_size;
3696 struct myri10ge_tx_buffer_state *tx_info;
3697 caddr_t req_bytes;
3698
3699 req_size = sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3700 + 8;
3701 req_bytes = kmem_alloc(req_size, KM_SLEEP);
3702 tx_info_size = sizeof (*tx_info) * (MYRI10GE_MAX_SEND_DESC_TSO + 1);
3703 tx_info = kmem_alloc(tx_info_size, KM_SLEEP);
3704 #else
3705 char req_bytes[sizeof (*req_list) * (MYRI10GE_MAX_SEND_DESC_TSO + 4)
3706 + 8];
3707 struct myri10ge_tx_buffer_state tx_info[MYRI10GE_MAX_SEND_DESC_TSO + 1];
3708 #endif
3709
3710 /* ensure req_list entries are aligned to 8 bytes */
3711 req_list = (struct mcp_kreq_ether_send *)
3712 (((unsigned long)req_bytes + 7UL) & ~7UL);
3713
3714 err = myri10ge_send(ss, mp, req_list, tx_info);
3715
3716 #if defined(__i386)
3717 kmem_free(tx_info, tx_info_size);
3718 kmem_free(req_bytes, req_size);
3719 #endif
3720 if (err)
3721 return (mp);
3722 else
3723 return (NULL);
3724 }
3725
3726 static int
myri10ge_addmac(void * arg,const uint8_t * mac_addr)3727 myri10ge_addmac(void *arg, const uint8_t *mac_addr)
3728 {
3729 struct myri10ge_priv *mgp = arg;
3730 int err;
3731
3732 if (mac_addr == NULL)
3733 return (EINVAL);
3734
3735 mutex_enter(&mgp->intrlock);
3736 if (mgp->macaddr_cnt) {
3737 mutex_exit(&mgp->intrlock);
3738 return (ENOSPC);
3739 }
3740 err = myri10ge_m_unicst(mgp, mac_addr);
3741 if (!err)
3742 mgp->macaddr_cnt++;
3743
3744 mutex_exit(&mgp->intrlock);
3745 if (err)
3746 return (err);
3747
3748 bcopy(mac_addr, mgp->mac_addr, sizeof (mgp->mac_addr));
3749 return (0);
3750 }
3751
3752 /*ARGSUSED*/
3753 static int
myri10ge_remmac(void * arg,const uint8_t * mac_addr)3754 myri10ge_remmac(void *arg, const uint8_t *mac_addr)
3755 {
3756 struct myri10ge_priv *mgp = arg;
3757
3758 mutex_enter(&mgp->intrlock);
3759 mgp->macaddr_cnt--;
3760 mutex_exit(&mgp->intrlock);
3761
3762 return (0);
3763 }
3764
3765 /*ARGSUSED*/
3766 static void
myri10ge_fill_group(void * arg,mac_ring_type_t rtype,const int index,mac_group_info_t * infop,mac_group_handle_t gh)3767 myri10ge_fill_group(void *arg, mac_ring_type_t rtype, const int index,
3768 mac_group_info_t *infop, mac_group_handle_t gh)
3769 {
3770 struct myri10ge_priv *mgp = arg;
3771
3772 if (rtype != MAC_RING_TYPE_RX)
3773 return;
3774
3775 infop->mgi_driver = (mac_group_driver_t)mgp;
3776 infop->mgi_start = NULL;
3777 infop->mgi_stop = NULL;
3778 infop->mgi_addmac = myri10ge_addmac;
3779 infop->mgi_remmac = myri10ge_remmac;
3780 infop->mgi_count = mgp->num_slices;
3781 }
3782
3783 static int
myri10ge_ring_start(mac_ring_driver_t rh,uint64_t mr_gen_num)3784 myri10ge_ring_start(mac_ring_driver_t rh, uint64_t mr_gen_num)
3785 {
3786 struct myri10ge_slice_state *ss;
3787
3788 ss = (struct myri10ge_slice_state *)rh;
3789 mutex_enter(&ss->rx_lock);
3790 ss->rx_gen_num = mr_gen_num;
3791 mutex_exit(&ss->rx_lock);
3792 return (0);
3793 }
3794
3795 /*
3796 * Retrieve a value for one of the statistics for a particular rx ring
3797 */
3798 int
myri10ge_rx_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)3799 myri10ge_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3800 {
3801 struct myri10ge_slice_state *ss;
3802
3803 ss = (struct myri10ge_slice_state *)rh;
3804 switch (stat) {
3805 case MAC_STAT_RBYTES:
3806 *val = ss->rx_stats.ibytes;
3807 break;
3808
3809 case MAC_STAT_IPACKETS:
3810 *val = ss->rx_stats.ipackets;
3811 break;
3812
3813 default:
3814 *val = 0;
3815 return (ENOTSUP);
3816 }
3817
3818 return (0);
3819 }
3820
3821 /*
3822 * Retrieve a value for one of the statistics for a particular tx ring
3823 */
3824 int
myri10ge_tx_ring_stat(mac_ring_driver_t rh,uint_t stat,uint64_t * val)3825 myri10ge_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
3826 {
3827 struct myri10ge_slice_state *ss;
3828
3829 ss = (struct myri10ge_slice_state *)rh;
3830 switch (stat) {
3831 case MAC_STAT_OBYTES:
3832 *val = ss->tx.stats.obytes;
3833 break;
3834
3835 case MAC_STAT_OPACKETS:
3836 *val = ss->tx.stats.opackets;
3837 break;
3838
3839 default:
3840 *val = 0;
3841 return (ENOTSUP);
3842 }
3843
3844 return (0);
3845 }
3846
3847 static int
myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)3848 myri10ge_rx_ring_intr_disable(mac_intr_handle_t intrh)
3849 {
3850 struct myri10ge_slice_state *ss;
3851
3852 ss = (struct myri10ge_slice_state *)intrh;
3853 mutex_enter(&ss->poll_lock);
3854 ss->rx_polling = B_TRUE;
3855 mutex_exit(&ss->poll_lock);
3856 return (0);
3857 }
3858
3859 static int
myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)3860 myri10ge_rx_ring_intr_enable(mac_intr_handle_t intrh)
3861 {
3862 struct myri10ge_slice_state *ss;
3863
3864 ss = (struct myri10ge_slice_state *)intrh;
3865 mutex_enter(&ss->poll_lock);
3866 ss->rx_polling = B_FALSE;
3867 if (ss->rx_token) {
3868 *ss->irq_claim = BE_32(3);
3869 ss->rx_token = 0;
3870 }
3871 mutex_exit(&ss->poll_lock);
3872 return (0);
3873 }
3874
3875 /*ARGSUSED*/
3876 static void
myri10ge_fill_ring(void * arg,mac_ring_type_t rtype,const int rg_index,const int ring_index,mac_ring_info_t * infop,mac_ring_handle_t rh)3877 myri10ge_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
3878 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
3879 {
3880 struct myri10ge_priv *mgp = arg;
3881 struct myri10ge_slice_state *ss;
3882 mac_intr_t *mintr = &infop->mri_intr;
3883
3884 ASSERT((unsigned int)ring_index < mgp->num_slices);
3885
3886 ss = &mgp->ss[ring_index];
3887 switch (rtype) {
3888 case MAC_RING_TYPE_RX:
3889 ss->rx_rh = rh;
3890 infop->mri_driver = (mac_ring_driver_t)ss;
3891 infop->mri_start = myri10ge_ring_start;
3892 infop->mri_stop = NULL;
3893 infop->mri_poll = myri10ge_poll_rx;
3894 infop->mri_stat = myri10ge_rx_ring_stat;
3895 mintr->mi_handle = (mac_intr_handle_t)ss;
3896 mintr->mi_enable = myri10ge_rx_ring_intr_enable;
3897 mintr->mi_disable = myri10ge_rx_ring_intr_disable;
3898 break;
3899 case MAC_RING_TYPE_TX:
3900 ss->tx.rh = rh;
3901 infop->mri_driver = (mac_ring_driver_t)ss;
3902 infop->mri_start = NULL;
3903 infop->mri_stop = NULL;
3904 infop->mri_tx = myri10ge_send_wrapper;
3905 infop->mri_stat = myri10ge_tx_ring_stat;
3906 break;
3907 default:
3908 break;
3909 }
3910 }
3911
3912 static void
myri10ge_nic_stat_destroy(struct myri10ge_priv * mgp)3913 myri10ge_nic_stat_destroy(struct myri10ge_priv *mgp)
3914 {
3915 if (mgp->ksp_stat == NULL)
3916 return;
3917
3918 kstat_delete(mgp->ksp_stat);
3919 mgp->ksp_stat = NULL;
3920 }
3921
3922 static void
myri10ge_slice_stat_destroy(struct myri10ge_slice_state * ss)3923 myri10ge_slice_stat_destroy(struct myri10ge_slice_state *ss)
3924 {
3925 if (ss->ksp_stat == NULL)
3926 return;
3927
3928 kstat_delete(ss->ksp_stat);
3929 ss->ksp_stat = NULL;
3930 }
3931
3932 static void
myri10ge_info_destroy(struct myri10ge_priv * mgp)3933 myri10ge_info_destroy(struct myri10ge_priv *mgp)
3934 {
3935 if (mgp->ksp_info == NULL)
3936 return;
3937
3938 kstat_delete(mgp->ksp_info);
3939 mgp->ksp_info = NULL;
3940 }
3941
3942 static int
myri10ge_nic_stat_kstat_update(kstat_t * ksp,int rw)3943 myri10ge_nic_stat_kstat_update(kstat_t *ksp, int rw)
3944 {
3945 struct myri10ge_nic_stat *ethstat;
3946 struct myri10ge_priv *mgp;
3947 mcp_irq_data_t *fw_stats;
3948
3949
3950 if (rw == KSTAT_WRITE)
3951 return (EACCES);
3952
3953 ethstat = (struct myri10ge_nic_stat *)ksp->ks_data;
3954 mgp = (struct myri10ge_priv *)ksp->ks_private;
3955 fw_stats = mgp->ss[0].fw_stats;
3956
3957 ethstat->dma_read_bw_MBs.value.ul = mgp->read_dma;
3958 ethstat->dma_write_bw_MBs.value.ul = mgp->write_dma;
3959 ethstat->dma_read_write_bw_MBs.value.ul = mgp->read_write_dma;
3960 if (myri10ge_tx_dma_attr.dma_attr_flags & DDI_DMA_FORCE_PHYSICAL)
3961 ethstat->dma_force_physical.value.ul = 1;
3962 else
3963 ethstat->dma_force_physical.value.ul = 0;
3964 ethstat->lanes.value.ul = mgp->pcie_link_width;
3965 ethstat->dropped_bad_crc32.value.ul =
3966 ntohl(fw_stats->dropped_bad_crc32);
3967 ethstat->dropped_bad_phy.value.ul =
3968 ntohl(fw_stats->dropped_bad_phy);
3969 ethstat->dropped_link_error_or_filtered.value.ul =
3970 ntohl(fw_stats->dropped_link_error_or_filtered);
3971 ethstat->dropped_link_overflow.value.ul =
3972 ntohl(fw_stats->dropped_link_overflow);
3973 ethstat->dropped_multicast_filtered.value.ul =
3974 ntohl(fw_stats->dropped_multicast_filtered);
3975 ethstat->dropped_no_big_buffer.value.ul =
3976 ntohl(fw_stats->dropped_no_big_buffer);
3977 ethstat->dropped_no_small_buffer.value.ul =
3978 ntohl(fw_stats->dropped_no_small_buffer);
3979 ethstat->dropped_overrun.value.ul =
3980 ntohl(fw_stats->dropped_overrun);
3981 ethstat->dropped_pause.value.ul =
3982 ntohl(fw_stats->dropped_pause);
3983 ethstat->dropped_runt.value.ul =
3984 ntohl(fw_stats->dropped_runt);
3985 ethstat->link_up.value.ul =
3986 ntohl(fw_stats->link_up);
3987 ethstat->dropped_unicast_filtered.value.ul =
3988 ntohl(fw_stats->dropped_unicast_filtered);
3989 return (0);
3990 }
3991
3992 static int
myri10ge_slice_stat_kstat_update(kstat_t * ksp,int rw)3993 myri10ge_slice_stat_kstat_update(kstat_t *ksp, int rw)
3994 {
3995 struct myri10ge_slice_stat *ethstat;
3996 struct myri10ge_slice_state *ss;
3997
3998 if (rw == KSTAT_WRITE)
3999 return (EACCES);
4000
4001 ethstat = (struct myri10ge_slice_stat *)ksp->ks_data;
4002 ss = (struct myri10ge_slice_state *)ksp->ks_private;
4003
4004 ethstat->rx_big.value.ul = ss->j_rx_cnt;
4005 ethstat->rx_bigbuf_firmware.value.ul = ss->rx_big.cnt - ss->j_rx_cnt;
4006 ethstat->rx_bigbuf_pool.value.ul =
4007 ss->jpool.num_alloc - ss->jbufs_for_smalls;
4008 ethstat->rx_bigbuf_smalls.value.ul = ss->jbufs_for_smalls;
4009 ethstat->rx_small.value.ul = ss->rx_small.cnt -
4010 (ss->rx_small.mask + 1);
4011 ethstat->tx_done.value.ul = ss->tx.done;
4012 ethstat->tx_req.value.ul = ss->tx.req;
4013 ethstat->tx_activate.value.ul = ss->tx.activate;
4014 ethstat->xmit_sched.value.ul = ss->tx.sched;
4015 ethstat->xmit_stall.value.ul = ss->tx.stall;
4016 ethstat->xmit_stall_early.value.ul = ss->tx.stall_early;
4017 ethstat->xmit_stall_late.value.ul = ss->tx.stall_late;
4018 ethstat->xmit_err.value.ul = MYRI10GE_SLICE_STAT(xmit_err);
4019 return (0);
4020 }
4021
4022 static int
myri10ge_info_kstat_update(kstat_t * ksp,int rw)4023 myri10ge_info_kstat_update(kstat_t *ksp, int rw)
4024 {
4025 struct myri10ge_info *info;
4026 struct myri10ge_priv *mgp;
4027
4028
4029 if (rw == KSTAT_WRITE)
4030 return (EACCES);
4031
4032 info = (struct myri10ge_info *)ksp->ks_data;
4033 mgp = (struct myri10ge_priv *)ksp->ks_private;
4034 kstat_named_setstr(&info->driver_version, MYRI10GE_VERSION_STR);
4035 kstat_named_setstr(&info->firmware_version, mgp->fw_version);
4036 kstat_named_setstr(&info->firmware_name, mgp->fw_name);
4037 kstat_named_setstr(&info->interrupt_type, mgp->intr_type);
4038 kstat_named_setstr(&info->product_code, mgp->pc_str);
4039 kstat_named_setstr(&info->serial_number, mgp->sn_str);
4040 return (0);
4041 }
4042
4043 static struct myri10ge_info myri10ge_info_template = {
4044 { "driver_version", KSTAT_DATA_STRING },
4045 { "firmware_version", KSTAT_DATA_STRING },
4046 { "firmware_name", KSTAT_DATA_STRING },
4047 { "interrupt_type", KSTAT_DATA_STRING },
4048 { "product_code", KSTAT_DATA_STRING },
4049 { "serial_number", KSTAT_DATA_STRING },
4050 };
4051 static kmutex_t myri10ge_info_template_lock;
4052
4053
4054 static int
myri10ge_info_init(struct myri10ge_priv * mgp)4055 myri10ge_info_init(struct myri10ge_priv *mgp)
4056 {
4057 struct kstat *ksp;
4058
4059 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4060 "myri10ge_info", "net", KSTAT_TYPE_NAMED,
4061 sizeof (myri10ge_info_template) /
4062 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
4063 if (ksp == NULL) {
4064 cmn_err(CE_WARN,
4065 "%s: myri10ge_info_init: kstat_create failed", mgp->name);
4066 return (DDI_FAILURE);
4067 }
4068 mgp->ksp_info = ksp;
4069 ksp->ks_update = myri10ge_info_kstat_update;
4070 ksp->ks_private = (void *) mgp;
4071 ksp->ks_data = &myri10ge_info_template;
4072 ksp->ks_lock = &myri10ge_info_template_lock;
4073 if (MYRI10GE_VERSION_STR != NULL)
4074 ksp->ks_data_size += strlen(MYRI10GE_VERSION_STR) + 1;
4075 if (mgp->fw_version != NULL)
4076 ksp->ks_data_size += strlen(mgp->fw_version) + 1;
4077 ksp->ks_data_size += strlen(mgp->fw_name) + 1;
4078 ksp->ks_data_size += strlen(mgp->intr_type) + 1;
4079 if (mgp->pc_str != NULL)
4080 ksp->ks_data_size += strlen(mgp->pc_str) + 1;
4081 if (mgp->sn_str != NULL)
4082 ksp->ks_data_size += strlen(mgp->sn_str) + 1;
4083
4084 kstat_install(ksp);
4085 return (DDI_SUCCESS);
4086 }
4087
4088
4089 static int
myri10ge_nic_stat_init(struct myri10ge_priv * mgp)4090 myri10ge_nic_stat_init(struct myri10ge_priv *mgp)
4091 {
4092 struct kstat *ksp;
4093 struct myri10ge_nic_stat *ethstat;
4094
4095 ksp = kstat_create("myri10ge", ddi_get_instance(mgp->dip),
4096 "myri10ge_nic_stats", "net", KSTAT_TYPE_NAMED,
4097 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4098 if (ksp == NULL) {
4099 cmn_err(CE_WARN,
4100 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4101 return (DDI_FAILURE);
4102 }
4103 mgp->ksp_stat = ksp;
4104 ethstat = (struct myri10ge_nic_stat *)(ksp->ks_data);
4105
4106 kstat_named_init(ðstat->dma_read_bw_MBs,
4107 "dma_read_bw_MBs", KSTAT_DATA_ULONG);
4108 kstat_named_init(ðstat->dma_write_bw_MBs,
4109 "dma_write_bw_MBs", KSTAT_DATA_ULONG);
4110 kstat_named_init(ðstat->dma_read_write_bw_MBs,
4111 "dma_read_write_bw_MBs", KSTAT_DATA_ULONG);
4112 kstat_named_init(ðstat->dma_force_physical,
4113 "dma_force_physical", KSTAT_DATA_ULONG);
4114 kstat_named_init(ðstat->lanes,
4115 "lanes", KSTAT_DATA_ULONG);
4116 kstat_named_init(ðstat->dropped_bad_crc32,
4117 "dropped_bad_crc32", KSTAT_DATA_ULONG);
4118 kstat_named_init(ðstat->dropped_bad_phy,
4119 "dropped_bad_phy", KSTAT_DATA_ULONG);
4120 kstat_named_init(ðstat->dropped_link_error_or_filtered,
4121 "dropped_link_error_or_filtered", KSTAT_DATA_ULONG);
4122 kstat_named_init(ðstat->dropped_link_overflow,
4123 "dropped_link_overflow", KSTAT_DATA_ULONG);
4124 kstat_named_init(ðstat->dropped_multicast_filtered,
4125 "dropped_multicast_filtered", KSTAT_DATA_ULONG);
4126 kstat_named_init(ðstat->dropped_no_big_buffer,
4127 "dropped_no_big_buffer", KSTAT_DATA_ULONG);
4128 kstat_named_init(ðstat->dropped_no_small_buffer,
4129 "dropped_no_small_buffer", KSTAT_DATA_ULONG);
4130 kstat_named_init(ðstat->dropped_overrun,
4131 "dropped_overrun", KSTAT_DATA_ULONG);
4132 kstat_named_init(ðstat->dropped_pause,
4133 "dropped_pause", KSTAT_DATA_ULONG);
4134 kstat_named_init(ðstat->dropped_runt,
4135 "dropped_runt", KSTAT_DATA_ULONG);
4136 kstat_named_init(ðstat->dropped_unicast_filtered,
4137 "dropped_unicast_filtered", KSTAT_DATA_ULONG);
4138 kstat_named_init(ðstat->dropped_runt, "dropped_runt",
4139 KSTAT_DATA_ULONG);
4140 kstat_named_init(ðstat->link_up, "link_up", KSTAT_DATA_ULONG);
4141 kstat_named_init(ðstat->link_changes, "link_changes",
4142 KSTAT_DATA_ULONG);
4143 ksp->ks_update = myri10ge_nic_stat_kstat_update;
4144 ksp->ks_private = (void *) mgp;
4145 kstat_install(ksp);
4146 return (DDI_SUCCESS);
4147 }
4148
4149 static int
myri10ge_slice_stat_init(struct myri10ge_slice_state * ss)4150 myri10ge_slice_stat_init(struct myri10ge_slice_state *ss)
4151 {
4152 struct myri10ge_priv *mgp = ss->mgp;
4153 struct kstat *ksp;
4154 struct myri10ge_slice_stat *ethstat;
4155 int instance;
4156
4157 /*
4158 * fake an instance so that the same slice numbers from
4159 * different instances do not collide
4160 */
4161 instance = (ddi_get_instance(mgp->dip) * 1000) + (int)(ss - mgp->ss);
4162 ksp = kstat_create("myri10ge", instance,
4163 "myri10ge_slice_stats", "net", KSTAT_TYPE_NAMED,
4164 sizeof (*ethstat) / sizeof (kstat_named_t), 0);
4165 if (ksp == NULL) {
4166 cmn_err(CE_WARN,
4167 "%s: myri10ge_stat_init: kstat_create failed", mgp->name);
4168 return (DDI_FAILURE);
4169 }
4170 ss->ksp_stat = ksp;
4171 ethstat = (struct myri10ge_slice_stat *)(ksp->ks_data);
4172 kstat_named_init(ðstat->lro_bad_csum, "lro_bad_csum",
4173 KSTAT_DATA_ULONG);
4174 kstat_named_init(ðstat->lro_flushed, "lro_flushed",
4175 KSTAT_DATA_ULONG);
4176 kstat_named_init(ðstat->lro_queued, "lro_queued",
4177 KSTAT_DATA_ULONG);
4178 kstat_named_init(ðstat->rx_bigbuf_firmware, "rx_bigbuf_firmware",
4179 KSTAT_DATA_ULONG);
4180 kstat_named_init(ðstat->rx_bigbuf_pool, "rx_bigbuf_pool",
4181 KSTAT_DATA_ULONG);
4182 kstat_named_init(ðstat->rx_bigbuf_smalls, "rx_bigbuf_smalls",
4183 KSTAT_DATA_ULONG);
4184 kstat_named_init(ðstat->rx_copy, "rx_copy",
4185 KSTAT_DATA_ULONG);
4186 kstat_named_init(ðstat->rx_big_nobuf, "rx_big_nobuf",
4187 KSTAT_DATA_ULONG);
4188 kstat_named_init(ðstat->rx_small_nobuf, "rx_small_nobuf",
4189 KSTAT_DATA_ULONG);
4190 kstat_named_init(ðstat->xmit_zero_len, "xmit_zero_len",
4191 KSTAT_DATA_ULONG);
4192 kstat_named_init(ðstat->xmit_pullup, "xmit_pullup",
4193 KSTAT_DATA_ULONG);
4194 kstat_named_init(ðstat->xmit_pullup_first, "xmit_pullup_first",
4195 KSTAT_DATA_ULONG);
4196 kstat_named_init(ðstat->xmit_lowbuf, "xmit_lowbuf",
4197 KSTAT_DATA_ULONG);
4198 kstat_named_init(ðstat->xmit_lsobadflags, "xmit_lsobadflags",
4199 KSTAT_DATA_ULONG);
4200 kstat_named_init(ðstat->xmit_sched, "xmit_sched",
4201 KSTAT_DATA_ULONG);
4202 kstat_named_init(ðstat->xmit_stall, "xmit_stall",
4203 KSTAT_DATA_ULONG);
4204 kstat_named_init(ðstat->xmit_stall_early, "xmit_stall_early",
4205 KSTAT_DATA_ULONG);
4206 kstat_named_init(ðstat->xmit_stall_late, "xmit_stall_late",
4207 KSTAT_DATA_ULONG);
4208 kstat_named_init(ðstat->xmit_err, "xmit_err",
4209 KSTAT_DATA_ULONG);
4210 kstat_named_init(ðstat->tx_req, "tx_req",
4211 KSTAT_DATA_ULONG);
4212 kstat_named_init(ðstat->tx_activate, "tx_activate",
4213 KSTAT_DATA_ULONG);
4214 kstat_named_init(ðstat->tx_done, "tx_done",
4215 KSTAT_DATA_ULONG);
4216 kstat_named_init(ðstat->tx_handles_alloced, "tx_handles_alloced",
4217 KSTAT_DATA_ULONG);
4218 kstat_named_init(ðstat->rx_big, "rx_big",
4219 KSTAT_DATA_ULONG);
4220 kstat_named_init(ðstat->rx_small, "rx_small",
4221 KSTAT_DATA_ULONG);
4222 ksp->ks_update = myri10ge_slice_stat_kstat_update;
4223 ksp->ks_private = (void *) ss;
4224 kstat_install(ksp);
4225 return (DDI_SUCCESS);
4226 }
4227
4228
4229
4230 #if #cpu(i386) || defined __i386 || defined i386 || \
4231 defined __i386__ || #cpu(x86_64) || defined __x86_64__
4232
4233 #include <vm/hat.h>
4234 #include <sys/ddi_isa.h>
4235 void *device_arena_alloc(size_t size, int vm_flag);
4236 void device_arena_free(void *vaddr, size_t size);
4237
4238 static void
myri10ge_enable_nvidia_ecrc(struct myri10ge_priv * mgp)4239 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4240 {
4241 dev_info_t *parent_dip;
4242 ddi_acc_handle_t handle;
4243 unsigned long bus_number, dev_number, func_number;
4244 unsigned long cfg_pa, paddr, base, pgoffset;
4245 char *cvaddr, *ptr;
4246 uint32_t *ptr32;
4247 int retval = DDI_FAILURE;
4248 int dontcare;
4249 uint16_t read_vid, read_did, vendor_id, device_id;
4250
4251 if (!myri10ge_nvidia_ecrc_enable)
4252 return;
4253
4254 parent_dip = ddi_get_parent(mgp->dip);
4255 if (parent_dip == NULL) {
4256 cmn_err(CE_WARN, "%s: I'm an orphan?", mgp->name);
4257 return;
4258 }
4259
4260 if (pci_config_setup(parent_dip, &handle) != DDI_SUCCESS) {
4261 cmn_err(CE_WARN,
4262 "%s: Could not access my parent's registers", mgp->name);
4263 return;
4264 }
4265
4266 vendor_id = pci_config_get16(handle, PCI_CONF_VENID);
4267 device_id = pci_config_get16(handle, PCI_CONF_DEVID);
4268 pci_config_teardown(&handle);
4269
4270 if (myri10ge_verbose) {
4271 unsigned long bus_number, dev_number, func_number;
4272 int reg_set, span;
4273 (void) myri10ge_reg_set(parent_dip, ®_set, &span,
4274 &bus_number, &dev_number, &func_number);
4275 if (myri10ge_verbose)
4276 printf("%s: parent at %ld:%ld:%ld\n", mgp->name,
4277 bus_number, dev_number, func_number);
4278 }
4279
4280 if (vendor_id != 0x10de)
4281 return;
4282
4283 if (device_id != 0x005d /* CK804 */ &&
4284 (device_id < 0x374 || device_id > 0x378) /* MCP55 */) {
4285 return;
4286 }
4287 (void) myri10ge_reg_set(parent_dip, &dontcare, &dontcare,
4288 &bus_number, &dev_number, &func_number);
4289
4290 for (cfg_pa = 0xf0000000UL;
4291 retval != DDI_SUCCESS && cfg_pa >= 0xe0000000UL;
4292 cfg_pa -= 0x10000000UL) {
4293 /* find the config space address for the nvidia bridge */
4294 paddr = (cfg_pa + bus_number * 0x00100000UL +
4295 (dev_number * 8 + func_number) * 0x00001000UL);
4296
4297 base = paddr & (~MMU_PAGEOFFSET);
4298 pgoffset = paddr & MMU_PAGEOFFSET;
4299
4300 /* map it into the kernel */
4301 cvaddr = device_arena_alloc(ptob(1), VM_NOSLEEP);
4302 if (cvaddr == NULL)
4303 cmn_err(CE_WARN, "%s: failed to map nf4: cvaddr\n",
4304 mgp->name);
4305
4306 hat_devload(kas.a_hat, cvaddr, mmu_ptob(1),
4307 i_ddi_paddr_to_pfn(base),
4308 PROT_WRITE|HAT_STRICTORDER, HAT_LOAD_LOCK);
4309
4310 ptr = cvaddr + pgoffset;
4311 read_vid = *(uint16_t *)(void *)(ptr + PCI_CONF_VENID);
4312 read_did = *(uint16_t *)(void *)(ptr + PCI_CONF_DEVID);
4313 if (vendor_id == read_did || device_id == read_did) {
4314 ptr32 = (uint32_t *)(void *)(ptr + 0x178);
4315 if (myri10ge_verbose)
4316 printf("%s: Enabling ECRC on upstream "
4317 "Nvidia bridge (0x%x:0x%x) "
4318 "at %ld:%ld:%ld\n", mgp->name,
4319 read_vid, read_did, bus_number,
4320 dev_number, func_number);
4321 *ptr32 |= 0x40;
4322 retval = DDI_SUCCESS;
4323 }
4324 hat_unload(kas.a_hat, cvaddr, ptob(1), HAT_UNLOAD_UNLOCK);
4325 device_arena_free(cvaddr, ptob(1));
4326 }
4327 }
4328
4329 #else
4330 /*ARGSUSED*/
4331 static void
myri10ge_enable_nvidia_ecrc(struct myri10ge_priv * mgp)4332 myri10ge_enable_nvidia_ecrc(struct myri10ge_priv *mgp)
4333 {
4334 }
4335 #endif /* i386 */
4336
4337
4338 /*
4339 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
4340 * when the PCI-E Completion packets are aligned on an 8-byte
4341 * boundary. Some PCI-E chip sets always align Completion packets; on
4342 * the ones that do not, the alignment can be enforced by enabling
4343 * ECRC generation (if supported).
4344 *
4345 * When PCI-E Completion packets are not aligned, it is actually more
4346 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
4347 *
4348 * If the driver can neither enable ECRC nor verify that it has
4349 * already been enabled, then it must use a firmware image which works
4350 * around unaligned completion packets (ethp_z8e.dat), and it should
4351 * also ensure that it never gives the device a Read-DMA which is
4352 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
4353 * enabled, then the driver should use the aligned (eth_z8e.dat)
4354 * firmware image, and set tx.boundary to 4KB.
4355 */
4356
4357
4358 static int
myri10ge_firmware_probe(struct myri10ge_priv * mgp)4359 myri10ge_firmware_probe(struct myri10ge_priv *mgp)
4360 {
4361 int status;
4362
4363 mgp->tx_boundary = 4096;
4364 /*
4365 * Verify the max read request size was set to 4KB
4366 * before trying the test with 4KB.
4367 */
4368 if (mgp->max_read_request_4k == 0)
4369 mgp->tx_boundary = 2048;
4370 /*
4371 * load the optimized firmware which assumes aligned PCIe
4372 * completions in order to see if it works on this host.
4373 */
4374
4375 mgp->fw_name = "rss_eth_z8e";
4376 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4377 mgp->eth_z8e_length = rss_eth_z8e_length;
4378
4379 status = myri10ge_load_firmware(mgp);
4380 if (status != 0) {
4381 return (status);
4382 }
4383 /*
4384 * Enable ECRC if possible
4385 */
4386 myri10ge_enable_nvidia_ecrc(mgp);
4387
4388 /*
4389 * Run a DMA test which watches for unaligned completions and
4390 * aborts on the first one seen.
4391 */
4392 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST);
4393 if (status == 0)
4394 return (0); /* keep the aligned firmware */
4395
4396 if (status != E2BIG)
4397 cmn_err(CE_WARN, "%s: DMA test failed: %d\n",
4398 mgp->name, status);
4399 if (status == ENOSYS)
4400 cmn_err(CE_WARN, "%s: Falling back to ethp! "
4401 "Please install up to date fw\n", mgp->name);
4402 return (status);
4403 }
4404
4405 static int
myri10ge_select_firmware(struct myri10ge_priv * mgp)4406 myri10ge_select_firmware(struct myri10ge_priv *mgp)
4407 {
4408 int aligned;
4409
4410 aligned = 0;
4411
4412 if (myri10ge_force_firmware == 1) {
4413 if (myri10ge_verbose)
4414 printf("%s: Assuming aligned completions (forced)\n",
4415 mgp->name);
4416 aligned = 1;
4417 goto done;
4418 }
4419
4420 if (myri10ge_force_firmware == 2) {
4421 if (myri10ge_verbose)
4422 printf("%s: Assuming unaligned completions (forced)\n",
4423 mgp->name);
4424 aligned = 0;
4425 goto done;
4426 }
4427
4428 /* If the width is less than 8, we may used the aligned firmware */
4429 if (mgp->pcie_link_width != 0 && mgp->pcie_link_width < 8) {
4430 cmn_err(CE_WARN, "!%s: PCIe link running at x%d\n",
4431 mgp->name, mgp->pcie_link_width);
4432 aligned = 1;
4433 goto done;
4434 }
4435
4436 if (0 == myri10ge_firmware_probe(mgp))
4437 return (0); /* keep optimized firmware */
4438
4439 done:
4440 if (aligned) {
4441 mgp->fw_name = "rss_eth_z8e";
4442 mgp->eth_z8e = (unsigned char *)rss_eth_z8e;
4443 mgp->eth_z8e_length = rss_eth_z8e_length;
4444 mgp->tx_boundary = 4096;
4445 } else {
4446 mgp->fw_name = "rss_ethp_z8e";
4447 mgp->eth_z8e = (unsigned char *)rss_ethp_z8e;
4448 mgp->eth_z8e_length = rss_ethp_z8e_length;
4449 mgp->tx_boundary = 2048;
4450 }
4451
4452 return (myri10ge_load_firmware(mgp));
4453 }
4454
4455 static int
myri10ge_add_intrs(struct myri10ge_priv * mgp,int add_handler)4456 myri10ge_add_intrs(struct myri10ge_priv *mgp, int add_handler)
4457 {
4458 dev_info_t *devinfo = mgp->dip;
4459 int count, avail, actual, intr_types;
4460 int x, y, rc, inum = 0;
4461
4462
4463 rc = ddi_intr_get_supported_types(devinfo, &intr_types);
4464 if (rc != DDI_SUCCESS) {
4465 cmn_err(CE_WARN,
4466 "!%s: ddi_intr_get_nintrs() failure, rc = %d\n", mgp->name,
4467 rc);
4468 return (DDI_FAILURE);
4469 }
4470
4471 if (!myri10ge_use_msi)
4472 intr_types &= ~DDI_INTR_TYPE_MSI;
4473 if (!myri10ge_use_msix)
4474 intr_types &= ~DDI_INTR_TYPE_MSIX;
4475
4476 if (intr_types & DDI_INTR_TYPE_MSIX) {
4477 mgp->ddi_intr_type = DDI_INTR_TYPE_MSIX;
4478 mgp->intr_type = "MSI-X";
4479 } else if (intr_types & DDI_INTR_TYPE_MSI) {
4480 mgp->ddi_intr_type = DDI_INTR_TYPE_MSI;
4481 mgp->intr_type = "MSI";
4482 } else {
4483 mgp->ddi_intr_type = DDI_INTR_TYPE_FIXED;
4484 mgp->intr_type = "Legacy";
4485 }
4486 /* Get number of interrupts */
4487 rc = ddi_intr_get_nintrs(devinfo, mgp->ddi_intr_type, &count);
4488 if ((rc != DDI_SUCCESS) || (count == 0)) {
4489 cmn_err(CE_WARN, "%s: ddi_intr_get_nintrs() failure, rc: %d, "
4490 "count: %d", mgp->name, rc, count);
4491
4492 return (DDI_FAILURE);
4493 }
4494
4495 /* Get number of available interrupts */
4496 rc = ddi_intr_get_navail(devinfo, mgp->ddi_intr_type, &avail);
4497 if ((rc != DDI_SUCCESS) || (avail == 0)) {
4498 cmn_err(CE_WARN, "%s: ddi_intr_get_navail() failure, "
4499 "rc: %d, avail: %d\n", mgp->name, rc, avail);
4500 return (DDI_FAILURE);
4501 }
4502 if (avail < count) {
4503 cmn_err(CE_NOTE,
4504 "!%s: nintrs() returned %d, navail returned %d",
4505 mgp->name, count, avail);
4506 count = avail;
4507 }
4508
4509 if (count < mgp->num_slices)
4510 return (DDI_FAILURE);
4511
4512 if (count > mgp->num_slices)
4513 count = mgp->num_slices;
4514
4515 /* Allocate memory for MSI interrupts */
4516 mgp->intr_size = count * sizeof (ddi_intr_handle_t);
4517 mgp->htable = kmem_alloc(mgp->intr_size, KM_SLEEP);
4518
4519 rc = ddi_intr_alloc(devinfo, mgp->htable, mgp->ddi_intr_type, inum,
4520 count, &actual, DDI_INTR_ALLOC_NORMAL);
4521
4522 if ((rc != DDI_SUCCESS) || (actual == 0)) {
4523 cmn_err(CE_WARN, "%s: ddi_intr_alloc() failed: %d",
4524 mgp->name, rc);
4525
4526 kmem_free(mgp->htable, mgp->intr_size);
4527 mgp->htable = NULL;
4528 return (DDI_FAILURE);
4529 }
4530
4531 if ((actual < count) && myri10ge_verbose) {
4532 cmn_err(CE_NOTE, "%s: got %d/%d slices",
4533 mgp->name, actual, count);
4534 }
4535
4536 mgp->intr_cnt = actual;
4537
4538 /*
4539 * Get priority for first irq, assume remaining are all the same
4540 */
4541 if (ddi_intr_get_pri(mgp->htable[0], &mgp->intr_pri)
4542 != DDI_SUCCESS) {
4543 cmn_err(CE_WARN, "%s: ddi_intr_get_pri() failed", mgp->name);
4544
4545 /* Free already allocated intr */
4546 for (y = 0; y < actual; y++) {
4547 (void) ddi_intr_free(mgp->htable[y]);
4548 }
4549
4550 kmem_free(mgp->htable, mgp->intr_size);
4551 mgp->htable = NULL;
4552 return (DDI_FAILURE);
4553 }
4554
4555 mgp->icookie = (void *)(uintptr_t)mgp->intr_pri;
4556
4557 if (!add_handler)
4558 return (DDI_SUCCESS);
4559
4560 /* Call ddi_intr_add_handler() */
4561 for (x = 0; x < actual; x++) {
4562 if (ddi_intr_add_handler(mgp->htable[x], myri10ge_intr,
4563 (caddr_t)&mgp->ss[x], NULL) != DDI_SUCCESS) {
4564 cmn_err(CE_WARN, "%s: ddi_intr_add_handler() failed",
4565 mgp->name);
4566
4567 /* Free already allocated intr */
4568 for (y = 0; y < actual; y++) {
4569 (void) ddi_intr_free(mgp->htable[y]);
4570 }
4571
4572 kmem_free(mgp->htable, mgp->intr_size);
4573 mgp->htable = NULL;
4574 return (DDI_FAILURE);
4575 }
4576 }
4577
4578 (void) ddi_intr_get_cap(mgp->htable[0], &mgp->intr_cap);
4579 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4580 /* Call ddi_intr_block_enable() for MSI */
4581 (void) ddi_intr_block_enable(mgp->htable, mgp->intr_cnt);
4582 } else {
4583 /* Call ddi_intr_enable() for MSI non block enable */
4584 for (x = 0; x < mgp->intr_cnt; x++) {
4585 (void) ddi_intr_enable(mgp->htable[x]);
4586 }
4587 }
4588
4589 return (DDI_SUCCESS);
4590 }
4591
4592 static void
myri10ge_rem_intrs(struct myri10ge_priv * mgp,int handler_installed)4593 myri10ge_rem_intrs(struct myri10ge_priv *mgp, int handler_installed)
4594 {
4595 int x, err;
4596
4597 /* Disable all interrupts */
4598 if (handler_installed) {
4599 if (mgp->intr_cap & DDI_INTR_FLAG_BLOCK) {
4600 /* Call ddi_intr_block_disable() */
4601 (void) ddi_intr_block_disable(mgp->htable,
4602 mgp->intr_cnt);
4603 } else {
4604 for (x = 0; x < mgp->intr_cnt; x++) {
4605 (void) ddi_intr_disable(mgp->htable[x]);
4606 }
4607 }
4608 }
4609
4610 for (x = 0; x < mgp->intr_cnt; x++) {
4611 if (handler_installed) {
4612 /* Call ddi_intr_remove_handler() */
4613 err = ddi_intr_remove_handler(mgp->htable[x]);
4614 if (err != DDI_SUCCESS) {
4615 cmn_err(CE_WARN,
4616 "%s: ddi_intr_remove_handler for"
4617 "vec %d returned %d\n", mgp->name,
4618 x, err);
4619 }
4620 }
4621 err = ddi_intr_free(mgp->htable[x]);
4622 if (err != DDI_SUCCESS) {
4623 cmn_err(CE_WARN,
4624 "%s: ddi_intr_free for vec %d returned %d\n",
4625 mgp->name, x, err);
4626 }
4627 }
4628 kmem_free(mgp->htable, mgp->intr_size);
4629 mgp->htable = NULL;
4630 }
4631
4632 static void
myri10ge_test_physical(dev_info_t * dip)4633 myri10ge_test_physical(dev_info_t *dip)
4634 {
4635 ddi_dma_handle_t handle;
4636 struct myri10ge_dma_stuff dma;
4637 void *addr;
4638 int err;
4639
4640 /* test #1, sufficient for older sparc systems */
4641 myri10ge_tx_dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
4642 err = ddi_dma_alloc_handle(dip, &myri10ge_tx_dma_attr,
4643 DDI_DMA_DONTWAIT, NULL, &handle);
4644 if (err == DDI_DMA_BADATTR)
4645 goto fail;
4646 ddi_dma_free_handle(&handle);
4647
4648 /* test #2, required on Olympis where the bind is what fails */
4649 addr = myri10ge_dma_alloc(dip, 128, &myri10ge_tx_dma_attr,
4650 &myri10ge_dev_access_attr, DDI_DMA_STREAMING,
4651 DDI_DMA_WRITE|DDI_DMA_STREAMING, &dma, 0, DDI_DMA_DONTWAIT);
4652 if (addr == NULL)
4653 goto fail;
4654 myri10ge_dma_free(&dma);
4655 return;
4656
4657 fail:
4658 if (myri10ge_verbose)
4659 printf("myri10ge%d: DDI_DMA_FORCE_PHYSICAL failed, "
4660 "using IOMMU\n", ddi_get_instance(dip));
4661
4662 myri10ge_tx_dma_attr.dma_attr_flags &= ~DDI_DMA_FORCE_PHYSICAL;
4663 }
4664
4665 static void
myri10ge_get_props(dev_info_t * dip)4666 myri10ge_get_props(dev_info_t *dip)
4667 {
4668
4669 myri10ge_flow_control = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4670 "myri10ge_flow_control", myri10ge_flow_control);
4671
4672 myri10ge_intr_coal_delay = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4673 "myri10ge_intr_coal_delay", myri10ge_intr_coal_delay);
4674
4675 #if #cpu(i386) || defined __i386 || defined i386 || \
4676 defined __i386__ || #cpu(x86_64) || defined __x86_64__
4677 myri10ge_nvidia_ecrc_enable = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4678 "myri10ge_nvidia_ecrc_enable", 1);
4679 #endif
4680
4681
4682 myri10ge_use_msi = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4683 "myri10ge_use_msi", myri10ge_use_msi);
4684
4685 myri10ge_deassert_wait = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4686 "myri10ge_deassert_wait", myri10ge_deassert_wait);
4687
4688 myri10ge_verbose = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4689 "myri10ge_verbose", myri10ge_verbose);
4690
4691 myri10ge_tx_copylen = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4692 "myri10ge_tx_copylen", myri10ge_tx_copylen);
4693
4694 if (myri10ge_tx_copylen < 60) {
4695 cmn_err(CE_WARN,
4696 "myri10ge_tx_copylen must be >= 60 bytes\n");
4697 myri10ge_tx_copylen = 60;
4698 }
4699
4700 myri10ge_mtu_override = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4701 "myri10ge_mtu_override", myri10ge_mtu_override);
4702
4703 if (myri10ge_mtu_override >= 1500 && myri10ge_mtu_override <= 9000)
4704 myri10ge_mtu = myri10ge_mtu_override +
4705 sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ;
4706 else if (myri10ge_mtu_override != 0) {
4707 cmn_err(CE_WARN,
4708 "myri10ge_mtu_override must be between 1500 and "
4709 "9000 bytes\n");
4710 }
4711
4712 myri10ge_bigbufs_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4713 "myri10ge_bigbufs_initial", myri10ge_bigbufs_initial);
4714 myri10ge_bigbufs_max = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4715 "myri10ge_bigbufs_max", myri10ge_bigbufs_max);
4716
4717 myri10ge_watchdog_reset = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4718 "myri10ge_watchdog_reset", myri10ge_watchdog_reset);
4719
4720 if (myri10ge_bigbufs_initial < 128) {
4721 cmn_err(CE_WARN,
4722 "myri10ge_bigbufs_initial be at least 128\n");
4723 myri10ge_bigbufs_initial = 128;
4724 }
4725 if (myri10ge_bigbufs_max < 128) {
4726 cmn_err(CE_WARN,
4727 "myri10ge_bigbufs_max be at least 128\n");
4728 myri10ge_bigbufs_max = 128;
4729 }
4730
4731 if (myri10ge_bigbufs_max < myri10ge_bigbufs_initial) {
4732 cmn_err(CE_WARN,
4733 "myri10ge_bigbufs_max must be >= "
4734 "myri10ge_bigbufs_initial\n");
4735 myri10ge_bigbufs_max = myri10ge_bigbufs_initial;
4736 }
4737
4738 myri10ge_force_firmware = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4739 "myri10ge_force_firmware", myri10ge_force_firmware);
4740
4741 myri10ge_max_slices = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4742 "myri10ge_max_slices", myri10ge_max_slices);
4743
4744 myri10ge_use_msix = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4745 "myri10ge_use_msix", myri10ge_use_msix);
4746
4747 myri10ge_rss_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4748 "myri10ge_rss_hash", myri10ge_rss_hash);
4749
4750 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX ||
4751 myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
4752 cmn_err(CE_WARN, "myri10ge: Illegal rssh hash type %d\n",
4753 myri10ge_rss_hash);
4754 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4755 }
4756 myri10ge_lro = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4757 "myri10ge_lro", myri10ge_lro);
4758 myri10ge_lro_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4759 "myri10ge_lro_cnt", myri10ge_lro_cnt);
4760 myri10ge_lro_max_aggr = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4761 "myri10ge_lro_max_aggr", myri10ge_lro_max_aggr);
4762 myri10ge_tx_hash = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4763 "myri10ge_tx_hash", myri10ge_tx_hash);
4764 myri10ge_use_lso = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4765 "myri10ge_use_lso", myri10ge_use_lso);
4766 myri10ge_lso_copy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4767 "myri10ge_lso_copy", myri10ge_lso_copy);
4768 myri10ge_tx_handles_initial = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4769 "myri10ge_tx_handles_initial", myri10ge_tx_handles_initial);
4770 myri10ge_small_bytes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4771 "myri10ge_small_bytes", myri10ge_small_bytes);
4772 if ((myri10ge_small_bytes + MXGEFW_PAD) & (128 -1)) {
4773 cmn_err(CE_WARN, "myri10ge: myri10ge_small_bytes (%d)\n",
4774 myri10ge_small_bytes);
4775 cmn_err(CE_WARN, "must be aligned on 128b bndry -2\n");
4776 myri10ge_small_bytes += 128;
4777 myri10ge_small_bytes &= ~(128 -1);
4778 myri10ge_small_bytes -= MXGEFW_PAD;
4779 cmn_err(CE_WARN, "rounded up to %d\n",
4780 myri10ge_small_bytes);
4781
4782 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4783 }
4784 }
4785
4786 #ifndef PCI_EXP_LNKSTA
4787 #define PCI_EXP_LNKSTA 18
4788 #endif
4789
4790 static int
myri10ge_find_cap(ddi_acc_handle_t handle,uint8_t * capptr,uint8_t capid)4791 myri10ge_find_cap(ddi_acc_handle_t handle, uint8_t *capptr, uint8_t capid)
4792 {
4793 uint16_t status;
4794 uint8_t ptr;
4795
4796 /* check to see if we have capabilities */
4797 status = pci_config_get16(handle, PCI_CONF_STAT);
4798 if (!(status & PCI_STAT_CAP)) {
4799 cmn_err(CE_WARN, "PCI_STAT_CAP not found\n");
4800 return (ENXIO);
4801 }
4802
4803 ptr = pci_config_get8(handle, PCI_CONF_CAP_PTR);
4804
4805 /* Walk the capabilities list, looking for a PCI Express cap */
4806 while (ptr != PCI_CAP_NEXT_PTR_NULL) {
4807 if (pci_config_get8(handle, ptr + PCI_CAP_ID) == capid)
4808 break;
4809 ptr = pci_config_get8(handle, ptr + PCI_CAP_NEXT_PTR);
4810 }
4811 if (ptr < 64) {
4812 cmn_err(CE_WARN, "Bad capability offset %d\n", ptr);
4813 return (ENXIO);
4814 }
4815 *capptr = ptr;
4816 return (0);
4817 }
4818
4819 static int
myri10ge_set_max_readreq(ddi_acc_handle_t handle)4820 myri10ge_set_max_readreq(ddi_acc_handle_t handle)
4821 {
4822 int err;
4823 uint16_t val;
4824 uint8_t ptr;
4825
4826 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4827 if (err != 0) {
4828 cmn_err(CE_WARN, "could not find PCIe cap\n");
4829 return (ENXIO);
4830 }
4831
4832 /* set max read req to 4096 */
4833 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4834 val = (val & ~PCIE_DEVCTL_MAX_READ_REQ_MASK) |
4835 PCIE_DEVCTL_MAX_READ_REQ_4096;
4836 pci_config_put16(handle, ptr + PCIE_DEVCTL, val);
4837 val = pci_config_get16(handle, ptr + PCIE_DEVCTL);
4838 if ((val & (PCIE_DEVCTL_MAX_READ_REQ_4096)) !=
4839 PCIE_DEVCTL_MAX_READ_REQ_4096) {
4840 cmn_err(CE_WARN, "could not set max read req (%x)\n", val);
4841 return (EINVAL);
4842 }
4843 return (0);
4844 }
4845
4846 static int
myri10ge_read_pcie_link_width(ddi_acc_handle_t handle,int * link)4847 myri10ge_read_pcie_link_width(ddi_acc_handle_t handle, int *link)
4848 {
4849 int err;
4850 uint16_t val;
4851 uint8_t ptr;
4852
4853 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_PCI_E);
4854 if (err != 0) {
4855 cmn_err(CE_WARN, "could not set max read req\n");
4856 return (ENXIO);
4857 }
4858
4859 /* read link width */
4860 val = pci_config_get16(handle, ptr + PCIE_LINKSTS);
4861 val &= PCIE_LINKSTS_NEG_WIDTH_MASK;
4862 *link = (val >> 4);
4863 return (0);
4864 }
4865
4866 static int
myri10ge_reset_nic(struct myri10ge_priv * mgp)4867 myri10ge_reset_nic(struct myri10ge_priv *mgp)
4868 {
4869 ddi_acc_handle_t handle = mgp->cfg_hdl;
4870 uint32_t reboot;
4871 uint16_t cmd;
4872 int err;
4873
4874 cmd = pci_config_get16(handle, PCI_CONF_COMM);
4875 if ((cmd & PCI_COMM_ME) == 0) {
4876 /*
4877 * Bus master DMA disabled? Check to see if the card
4878 * rebooted due to a parity error For now, just report
4879 * it
4880 */
4881
4882 /* enter read32 mode */
4883 pci_config_put8(handle, mgp->vso + 0x10, 0x3);
4884 /* read REBOOT_STATUS (0xfffffff0) */
4885 pci_config_put32(handle, mgp->vso + 0x18, 0xfffffff0);
4886 reboot = pci_config_get16(handle, mgp->vso + 0x14);
4887 cmn_err(CE_WARN, "%s NIC rebooted 0x%x\n", mgp->name, reboot);
4888 return (0);
4889 }
4890 if (!myri10ge_watchdog_reset) {
4891 cmn_err(CE_WARN, "%s: not resetting\n", mgp->name);
4892 return (1);
4893 }
4894
4895 myri10ge_stop_locked(mgp);
4896 err = myri10ge_start_locked(mgp);
4897 if (err == DDI_FAILURE) {
4898 return (0);
4899 }
4900 mac_tx_update(mgp->mh);
4901 return (1);
4902 }
4903
4904 static inline int
myri10ge_ring_stalled(myri10ge_tx_ring_t * tx)4905 myri10ge_ring_stalled(myri10ge_tx_ring_t *tx)
4906 {
4907 if (tx->sched != tx->stall &&
4908 tx->done == tx->watchdog_done &&
4909 tx->watchdog_req != tx->watchdog_done)
4910 return (1);
4911 return (0);
4912 }
4913
4914 static void
myri10ge_watchdog(void * arg)4915 myri10ge_watchdog(void *arg)
4916 {
4917 struct myri10ge_priv *mgp;
4918 struct myri10ge_slice_state *ss;
4919 myri10ge_tx_ring_t *tx;
4920 int nic_ok = 1;
4921 int slices_stalled, rx_pause, i;
4922 int add_rx;
4923
4924 mgp = arg;
4925 mutex_enter(&mgp->intrlock);
4926 if (mgp->running != MYRI10GE_ETH_RUNNING) {
4927 cmn_err(CE_WARN,
4928 "%s not running, not rearming watchdog (%d)\n",
4929 mgp->name, mgp->running);
4930 mutex_exit(&mgp->intrlock);
4931 return;
4932 }
4933
4934 rx_pause = ntohl(mgp->ss[0].fw_stats->dropped_pause);
4935
4936 /*
4937 * make sure nic is stalled before we reset the nic, so as to
4938 * ensure we don't rip the transmit data structures out from
4939 * under a pending transmit
4940 */
4941
4942 for (slices_stalled = 0, i = 0; i < mgp->num_slices; i++) {
4943 tx = &mgp->ss[i].tx;
4944 slices_stalled = myri10ge_ring_stalled(tx);
4945 if (slices_stalled)
4946 break;
4947 }
4948
4949 if (slices_stalled) {
4950 if (mgp->watchdog_rx_pause == rx_pause) {
4951 cmn_err(CE_WARN,
4952 "%s slice %d stalled:(%d, %d, %d, %d, %d %d %d\n)",
4953 mgp->name, i, tx->sched, tx->stall,
4954 tx->done, tx->watchdog_done, tx->req, tx->pkt_done,
4955 (int)ntohl(mgp->ss[i].fw_stats->send_done_count));
4956 nic_ok = myri10ge_reset_nic(mgp);
4957 } else {
4958 cmn_err(CE_WARN,
4959 "%s Flow controlled, check link partner\n",
4960 mgp->name);
4961 }
4962 }
4963
4964 if (!nic_ok) {
4965 cmn_err(CE_WARN,
4966 "%s Nic dead, not rearming watchdog\n", mgp->name);
4967 mutex_exit(&mgp->intrlock);
4968 return;
4969 }
4970 for (i = 0; i < mgp->num_slices; i++) {
4971 ss = &mgp->ss[i];
4972 tx = &ss->tx;
4973 tx->watchdog_done = tx->done;
4974 tx->watchdog_req = tx->req;
4975 if (ss->watchdog_rx_copy != MYRI10GE_SLICE_STAT(rx_copy)) {
4976 ss->watchdog_rx_copy = MYRI10GE_SLICE_STAT(rx_copy);
4977 add_rx =
4978 min(ss->jpool.num_alloc,
4979 myri10ge_bigbufs_max -
4980 (ss->jpool.num_alloc -
4981 ss->jbufs_for_smalls));
4982 if (add_rx != 0) {
4983 (void) myri10ge_add_jbufs(ss, add_rx, 0);
4984 /* now feed them to the firmware */
4985 mutex_enter(&ss->jpool.mtx);
4986 myri10ge_restock_jumbos(ss);
4987 mutex_exit(&ss->jpool.mtx);
4988 }
4989 }
4990 }
4991 mgp->watchdog_rx_pause = rx_pause;
4992
4993 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
4994 mgp->timer_ticks);
4995 mutex_exit(&mgp->intrlock);
4996 }
4997
4998 /*ARGSUSED*/
4999 static int
myri10ge_get_coalesce(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5000 myri10ge_get_coalesce(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5001
5002 {
5003 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5004 (void) mi_mpprintf(mp, "%d", mgp->intr_coal_delay);
5005 return (0);
5006 }
5007
5008 /*ARGSUSED*/
5009 static int
myri10ge_set_coalesce(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5010 myri10ge_set_coalesce(queue_t *q, mblk_t *mp, char *value,
5011 caddr_t cp, cred_t *credp)
5012
5013 {
5014 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5015 char *end;
5016 size_t new_value;
5017
5018 new_value = mi_strtol(value, &end, 10);
5019 if (end == value)
5020 return (EINVAL);
5021
5022 mutex_enter(&myri10ge_param_lock);
5023 mgp->intr_coal_delay = (int)new_value;
5024 *mgp->intr_coal_delay_ptr = htonl(mgp->intr_coal_delay);
5025 mutex_exit(&myri10ge_param_lock);
5026 return (0);
5027 }
5028
5029 /*ARGSUSED*/
5030 static int
myri10ge_get_pauseparam(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5031 myri10ge_get_pauseparam(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5032
5033 {
5034 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5035 (void) mi_mpprintf(mp, "%d", mgp->pause);
5036 return (0);
5037 }
5038
5039 /*ARGSUSED*/
5040 static int
myri10ge_set_pauseparam(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5041 myri10ge_set_pauseparam(queue_t *q, mblk_t *mp, char *value,
5042 caddr_t cp, cred_t *credp)
5043
5044 {
5045 struct myri10ge_priv *mgp = (struct myri10ge_priv *)(void *)cp;
5046 char *end;
5047 size_t new_value;
5048 int err = 0;
5049
5050 new_value = mi_strtol(value, &end, 10);
5051 if (end == value)
5052 return (EINVAL);
5053 if (new_value != 0)
5054 new_value = 1;
5055
5056 mutex_enter(&myri10ge_param_lock);
5057 if (new_value != mgp->pause)
5058 err = myri10ge_change_pause(mgp, new_value);
5059 mutex_exit(&myri10ge_param_lock);
5060 return (err);
5061 }
5062
5063 /*ARGSUSED*/
5064 static int
myri10ge_get_int(queue_t * q,mblk_t * mp,caddr_t cp,cred_t * credp)5065 myri10ge_get_int(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *credp)
5066
5067 {
5068 (void) mi_mpprintf(mp, "%d", *(int *)(void *)cp);
5069 return (0);
5070 }
5071
5072 /*ARGSUSED*/
5073 static int
myri10ge_set_int(queue_t * q,mblk_t * mp,char * value,caddr_t cp,cred_t * credp)5074 myri10ge_set_int(queue_t *q, mblk_t *mp, char *value,
5075 caddr_t cp, cred_t *credp)
5076
5077 {
5078 char *end;
5079 size_t new_value;
5080
5081 new_value = mi_strtol(value, &end, 10);
5082 if (end == value)
5083 return (EINVAL);
5084 *(int *)(void *)cp = new_value;
5085
5086 return (0);
5087 }
5088
5089 static void
myri10ge_ndd_init(struct myri10ge_priv * mgp)5090 myri10ge_ndd_init(struct myri10ge_priv *mgp)
5091 {
5092 mgp->nd_head = NULL;
5093
5094 (void) nd_load(&mgp->nd_head, "myri10ge_intr_coal_delay",
5095 myri10ge_get_coalesce, myri10ge_set_coalesce, (caddr_t)mgp);
5096 (void) nd_load(&mgp->nd_head, "myri10ge_flow_control",
5097 myri10ge_get_pauseparam, myri10ge_set_pauseparam, (caddr_t)mgp);
5098 (void) nd_load(&mgp->nd_head, "myri10ge_verbose",
5099 myri10ge_get_int, myri10ge_set_int, (caddr_t)&myri10ge_verbose);
5100 (void) nd_load(&mgp->nd_head, "myri10ge_deassert_wait",
5101 myri10ge_get_int, myri10ge_set_int,
5102 (caddr_t)&myri10ge_deassert_wait);
5103 (void) nd_load(&mgp->nd_head, "myri10ge_bigbufs_max",
5104 myri10ge_get_int, myri10ge_set_int,
5105 (caddr_t)&myri10ge_bigbufs_max);
5106 (void) nd_load(&mgp->nd_head, "myri10ge_lro",
5107 myri10ge_get_int, myri10ge_set_int,
5108 (caddr_t)&myri10ge_lro);
5109 (void) nd_load(&mgp->nd_head, "myri10ge_lro_max_aggr",
5110 myri10ge_get_int, myri10ge_set_int,
5111 (caddr_t)&myri10ge_lro_max_aggr);
5112 (void) nd_load(&mgp->nd_head, "myri10ge_tx_hash",
5113 myri10ge_get_int, myri10ge_set_int,
5114 (caddr_t)&myri10ge_tx_hash);
5115 (void) nd_load(&mgp->nd_head, "myri10ge_lso_copy",
5116 myri10ge_get_int, myri10ge_set_int,
5117 (caddr_t)&myri10ge_lso_copy);
5118 }
5119
5120 static void
myri10ge_ndd_fini(struct myri10ge_priv * mgp)5121 myri10ge_ndd_fini(struct myri10ge_priv *mgp)
5122 {
5123 nd_free(&mgp->nd_head);
5124 }
5125
5126 static void
myri10ge_m_ioctl(void * arg,queue_t * wq,mblk_t * mp)5127 myri10ge_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
5128 {
5129 struct iocblk *iocp;
5130 struct myri10ge_priv *mgp = arg;
5131 int cmd, ok, err;
5132
5133 iocp = (struct iocblk *)(void *)mp->b_rptr;
5134 cmd = iocp->ioc_cmd;
5135
5136 ok = 0;
5137 err = 0;
5138
5139 switch (cmd) {
5140 case ND_GET:
5141 case ND_SET:
5142 ok = nd_getset(wq, mgp->nd_head, mp);
5143 break;
5144 default:
5145 break;
5146 }
5147 if (!ok)
5148 err = EINVAL;
5149 else
5150 err = iocp->ioc_error;
5151
5152 if (!err)
5153 miocack(wq, mp, iocp->ioc_count, err);
5154 else
5155 miocnak(wq, mp, 0, err);
5156 }
5157
5158 static struct myri10ge_priv *mgp_list;
5159
5160 struct myri10ge_priv *
myri10ge_get_instance(uint_t unit)5161 myri10ge_get_instance(uint_t unit)
5162 {
5163 struct myri10ge_priv *mgp;
5164
5165 mutex_enter(&myri10ge_param_lock);
5166 for (mgp = mgp_list; mgp != NULL; mgp = mgp->next) {
5167 if (unit == ddi_get_instance(mgp->dip)) {
5168 mgp->refcnt++;
5169 break;
5170 }
5171 }
5172 mutex_exit(&myri10ge_param_lock);
5173 return (mgp);
5174 }
5175
5176 void
myri10ge_put_instance(struct myri10ge_priv * mgp)5177 myri10ge_put_instance(struct myri10ge_priv *mgp)
5178 {
5179 mutex_enter(&myri10ge_param_lock);
5180 mgp->refcnt--;
5181 mutex_exit(&myri10ge_param_lock);
5182 }
5183
5184 static boolean_t
myri10ge_m_getcapab(void * arg,mac_capab_t cap,void * cap_data)5185 myri10ge_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
5186 {
5187 struct myri10ge_priv *mgp = arg;
5188 uint32_t *cap_hcksum;
5189 mac_capab_lso_t *cap_lso;
5190 mac_capab_rings_t *cap_rings;
5191
5192 switch (cap) {
5193 case MAC_CAPAB_HCKSUM:
5194 cap_hcksum = cap_data;
5195 *cap_hcksum = HCKSUM_INET_PARTIAL;
5196 break;
5197 case MAC_CAPAB_RINGS:
5198 cap_rings = cap_data;
5199 switch (cap_rings->mr_type) {
5200 case MAC_RING_TYPE_RX:
5201 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5202 cap_rings->mr_rnum = mgp->num_slices;
5203 cap_rings->mr_gnum = 1;
5204 cap_rings->mr_rget = myri10ge_fill_ring;
5205 cap_rings->mr_gget = myri10ge_fill_group;
5206 break;
5207 case MAC_RING_TYPE_TX:
5208 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
5209 cap_rings->mr_rnum = mgp->num_slices;
5210 cap_rings->mr_gnum = 0;
5211 cap_rings->mr_rget = myri10ge_fill_ring;
5212 cap_rings->mr_gget = NULL;
5213 break;
5214 default:
5215 return (B_FALSE);
5216 }
5217 break;
5218 case MAC_CAPAB_LSO:
5219 cap_lso = cap_data;
5220 if (!myri10ge_use_lso)
5221 return (B_FALSE);
5222 if (!(mgp->features & MYRI10GE_TSO))
5223 return (B_FALSE);
5224 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
5225 cap_lso->lso_basic_tcp_ipv4.lso_max = (uint16_t)-1;
5226 break;
5227
5228 default:
5229 return (B_FALSE);
5230 }
5231 return (B_TRUE);
5232 }
5233
5234
5235 static int
myri10ge_m_stat(void * arg,uint_t stat,uint64_t * val)5236 myri10ge_m_stat(void *arg, uint_t stat, uint64_t *val)
5237 {
5238 struct myri10ge_priv *mgp = arg;
5239 struct myri10ge_rx_ring_stats *rstat;
5240 struct myri10ge_tx_ring_stats *tstat;
5241 mcp_irq_data_t *fw_stats = mgp->ss[0].fw_stats;
5242 struct myri10ge_slice_state *ss;
5243 uint64_t tmp = 0;
5244 int i;
5245
5246 switch (stat) {
5247 case MAC_STAT_IFSPEED:
5248 *val = 10ull * 1000ull * 1000000ull;
5249 break;
5250
5251 case MAC_STAT_MULTIRCV:
5252 for (i = 0; i < mgp->num_slices; i++) {
5253 rstat = &mgp->ss[i].rx_stats;
5254 tmp += rstat->multircv;
5255 }
5256 *val = tmp;
5257 break;
5258
5259 case MAC_STAT_BRDCSTRCV:
5260 for (i = 0; i < mgp->num_slices; i++) {
5261 rstat = &mgp->ss[i].rx_stats;
5262 tmp += rstat->brdcstrcv;
5263 }
5264 *val = tmp;
5265 break;
5266
5267 case MAC_STAT_MULTIXMT:
5268 for (i = 0; i < mgp->num_slices; i++) {
5269 tstat = &mgp->ss[i].tx.stats;
5270 tmp += tstat->multixmt;
5271 }
5272 *val = tmp;
5273 break;
5274
5275 case MAC_STAT_BRDCSTXMT:
5276 for (i = 0; i < mgp->num_slices; i++) {
5277 tstat = &mgp->ss[i].tx.stats;
5278 tmp += tstat->brdcstxmt;
5279 }
5280 *val = tmp;
5281 break;
5282
5283 case MAC_STAT_NORCVBUF:
5284 tmp = ntohl(fw_stats->dropped_no_big_buffer);
5285 tmp += ntohl(fw_stats->dropped_no_small_buffer);
5286 tmp += ntohl(fw_stats->dropped_link_overflow);
5287 for (i = 0; i < mgp->num_slices; i++) {
5288 ss = &mgp->ss[i];
5289 tmp += MYRI10GE_SLICE_STAT(rx_big_nobuf);
5290 tmp += MYRI10GE_SLICE_STAT(rx_small_nobuf);
5291 }
5292 *val = tmp;
5293 break;
5294
5295 case MAC_STAT_IERRORS:
5296 tmp += ntohl(fw_stats->dropped_bad_crc32);
5297 tmp += ntohl(fw_stats->dropped_bad_phy);
5298 tmp += ntohl(fw_stats->dropped_runt);
5299 tmp += ntohl(fw_stats->dropped_overrun);
5300 *val = tmp;
5301 break;
5302
5303 case MAC_STAT_OERRORS:
5304 for (i = 0; i < mgp->num_slices; i++) {
5305 ss = &mgp->ss[i];
5306 tmp += MYRI10GE_SLICE_STAT(xmit_lsobadflags);
5307 tmp += MYRI10GE_SLICE_STAT(xmit_err);
5308 }
5309 *val = tmp;
5310 break;
5311
5312 case MAC_STAT_RBYTES:
5313 for (i = 0; i < mgp->num_slices; i++) {
5314 rstat = &mgp->ss[i].rx_stats;
5315 tmp += rstat->ibytes;
5316 }
5317 *val = tmp;
5318 break;
5319
5320 case MAC_STAT_IPACKETS:
5321 for (i = 0; i < mgp->num_slices; i++) {
5322 rstat = &mgp->ss[i].rx_stats;
5323 tmp += rstat->ipackets;
5324 }
5325 *val = tmp;
5326 break;
5327
5328 case MAC_STAT_OBYTES:
5329 for (i = 0; i < mgp->num_slices; i++) {
5330 tstat = &mgp->ss[i].tx.stats;
5331 tmp += tstat->obytes;
5332 }
5333 *val = tmp;
5334 break;
5335
5336 case MAC_STAT_OPACKETS:
5337 for (i = 0; i < mgp->num_slices; i++) {
5338 tstat = &mgp->ss[i].tx.stats;
5339 tmp += tstat->opackets;
5340 }
5341 *val = tmp;
5342 break;
5343
5344 case ETHER_STAT_TOOLONG_ERRORS:
5345 *val = ntohl(fw_stats->dropped_overrun);
5346 break;
5347
5348 #ifdef SOLARIS_S11
5349 case ETHER_STAT_TOOSHORT_ERRORS:
5350 *val = ntohl(fw_stats->dropped_runt);
5351 break;
5352 #endif
5353
5354 case ETHER_STAT_LINK_PAUSE:
5355 *val = mgp->pause;
5356 break;
5357
5358 case ETHER_STAT_LINK_AUTONEG:
5359 *val = 1;
5360 break;
5361
5362 case ETHER_STAT_LINK_DUPLEX:
5363 *val = LINK_DUPLEX_FULL;
5364 break;
5365
5366 default:
5367 return (ENOTSUP);
5368 }
5369
5370 return (0);
5371 }
5372
5373 static mac_callbacks_t myri10ge_m_callbacks = {
5374 (MC_IOCTL | MC_GETCAPAB),
5375 myri10ge_m_stat,
5376 myri10ge_m_start,
5377 myri10ge_m_stop,
5378 myri10ge_m_promisc,
5379 myri10ge_m_multicst,
5380 NULL,
5381 NULL,
5382 NULL,
5383 myri10ge_m_ioctl,
5384 myri10ge_m_getcapab
5385 };
5386
5387
5388 static int
myri10ge_probe_slices(struct myri10ge_priv * mgp)5389 myri10ge_probe_slices(struct myri10ge_priv *mgp)
5390 {
5391 myri10ge_cmd_t cmd;
5392 int status;
5393
5394 mgp->num_slices = 1;
5395
5396 /* hit the board with a reset to ensure it is alive */
5397 (void) memset(&cmd, 0, sizeof (cmd));
5398 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd);
5399 if (status != 0) {
5400 cmn_err(CE_WARN, "%s: failed reset\n", mgp->name);
5401 return (ENXIO);
5402 }
5403
5404 if (myri10ge_use_msix == 0)
5405 return (0);
5406
5407 /* tell it the size of the interrupt queues */
5408 cmd.data0 = mgp->max_intr_slots * sizeof (struct mcp_slot);
5409 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
5410 if (status != 0) {
5411 cmn_err(CE_WARN, "%s: failed MXGEFW_CMD_SET_INTRQ_SIZE\n",
5412 mgp->name);
5413 return (ENXIO);
5414 }
5415
5416 /* ask the maximum number of slices it supports */
5417 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
5418 &cmd);
5419 if (status != 0)
5420 return (0);
5421
5422 mgp->num_slices = cmd.data0;
5423
5424 /*
5425 * if the admin did not specify a limit to how many
5426 * slices we should use, cap it automatically to the
5427 * number of CPUs currently online
5428 */
5429 if (myri10ge_max_slices == -1)
5430 myri10ge_max_slices = ncpus;
5431
5432 if (mgp->num_slices > myri10ge_max_slices)
5433 mgp->num_slices = myri10ge_max_slices;
5434
5435
5436 /*
5437 * Now try to allocate as many MSI-X vectors as we have
5438 * slices. We give up on MSI-X if we can only get a single
5439 * vector.
5440 */
5441 while (mgp->num_slices > 1) {
5442 /* make sure it is a power of two */
5443 while (mgp->num_slices & (mgp->num_slices - 1))
5444 mgp->num_slices--;
5445 if (mgp->num_slices == 1)
5446 return (0);
5447
5448 status = myri10ge_add_intrs(mgp, 0);
5449 if (status == 0) {
5450 myri10ge_rem_intrs(mgp, 0);
5451 if (mgp->intr_cnt == mgp->num_slices) {
5452 if (myri10ge_verbose)
5453 printf("Got %d slices!\n",
5454 mgp->num_slices);
5455 return (0);
5456 }
5457 mgp->num_slices = mgp->intr_cnt;
5458 } else {
5459 mgp->num_slices = mgp->num_slices / 2;
5460 }
5461 }
5462
5463 if (myri10ge_verbose)
5464 printf("Got %d slices\n", mgp->num_slices);
5465 return (0);
5466 }
5467
5468 static void
myri10ge_lro_free(struct myri10ge_slice_state * ss)5469 myri10ge_lro_free(struct myri10ge_slice_state *ss)
5470 {
5471 struct lro_entry *lro;
5472
5473 while (ss->lro_free != NULL) {
5474 lro = ss->lro_free;
5475 ss->lro_free = lro->next;
5476 kmem_free(lro, sizeof (*lro));
5477 }
5478 }
5479
5480 static void
myri10ge_lro_alloc(struct myri10ge_slice_state * ss)5481 myri10ge_lro_alloc(struct myri10ge_slice_state *ss)
5482 {
5483 struct lro_entry *lro;
5484 int idx;
5485
5486 ss->lro_free = NULL;
5487 ss->lro_active = NULL;
5488
5489 for (idx = 0; idx < myri10ge_lro_cnt; idx++) {
5490 lro = kmem_zalloc(sizeof (*lro), KM_SLEEP);
5491 if (lro == NULL)
5492 continue;
5493 lro->next = ss->lro_free;
5494 ss->lro_free = lro;
5495 }
5496 }
5497
5498 static void
myri10ge_free_slices(struct myri10ge_priv * mgp)5499 myri10ge_free_slices(struct myri10ge_priv *mgp)
5500 {
5501 struct myri10ge_slice_state *ss;
5502 size_t bytes;
5503 int i;
5504
5505 if (mgp->ss == NULL)
5506 return;
5507
5508 for (i = 0; i < mgp->num_slices; i++) {
5509 ss = &mgp->ss[i];
5510 if (ss->rx_done.entry == NULL)
5511 continue;
5512 myri10ge_dma_free(&ss->rx_done.dma);
5513 ss->rx_done.entry = NULL;
5514 if (ss->fw_stats == NULL)
5515 continue;
5516 myri10ge_dma_free(&ss->fw_stats_dma);
5517 ss->fw_stats = NULL;
5518 mutex_destroy(&ss->rx_lock);
5519 mutex_destroy(&ss->tx.lock);
5520 mutex_destroy(&ss->tx.handle_lock);
5521 mutex_destroy(&ss->poll_lock);
5522 myri10ge_jpool_fini(ss);
5523 myri10ge_slice_stat_destroy(ss);
5524 myri10ge_lro_free(ss);
5525 }
5526 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5527 kmem_free(mgp->ss, bytes);
5528 mgp->ss = NULL;
5529 }
5530
5531
5532 static int
myri10ge_alloc_slices(struct myri10ge_priv * mgp)5533 myri10ge_alloc_slices(struct myri10ge_priv *mgp)
5534 {
5535 struct myri10ge_slice_state *ss;
5536 size_t bytes;
5537 int i;
5538
5539 bytes = sizeof (*mgp->ss) * mgp->num_slices;
5540 mgp->ss = kmem_zalloc(bytes, KM_SLEEP);
5541 if (mgp->ss == NULL)
5542 return (ENOMEM);
5543 for (i = 0; i < mgp->num_slices; i++) {
5544 ss = &mgp->ss[i];
5545
5546 ss->mgp = mgp;
5547
5548 /* allocate the per-slice firmware stats */
5549 bytes = sizeof (*ss->fw_stats);
5550 ss->fw_stats = (mcp_irq_data_t *)(void *)
5551 myri10ge_dma_alloc(mgp->dip, bytes,
5552 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5553 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5554 &ss->fw_stats_dma, 1, DDI_DMA_DONTWAIT);
5555 if (ss->fw_stats == NULL)
5556 goto abort;
5557 (void) memset(ss->fw_stats, 0, bytes);
5558
5559 /* allocate rx done ring */
5560 bytes = mgp->max_intr_slots *
5561 sizeof (*ss->rx_done.entry);
5562 ss->rx_done.entry = (mcp_slot_t *)(void *)
5563 myri10ge_dma_alloc(mgp->dip, bytes,
5564 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5565 DDI_DMA_CONSISTENT, DDI_DMA_READ|DDI_DMA_CONSISTENT,
5566 &ss->rx_done.dma, 1, DDI_DMA_DONTWAIT);
5567 if (ss->rx_done.entry == NULL) {
5568 goto abort;
5569 }
5570 (void) memset(ss->rx_done.entry, 0, bytes);
5571 mutex_init(&ss->rx_lock, NULL, MUTEX_DEFAULT, mgp->icookie);
5572 mutex_init(&ss->tx.lock, NULL, MUTEX_DEFAULT, NULL);
5573 mutex_init(&ss->tx.handle_lock, NULL, MUTEX_DEFAULT, NULL);
5574 mutex_init(&ss->poll_lock, NULL, MUTEX_DEFAULT, NULL);
5575 myri10ge_jpool_init(ss);
5576 (void) myri10ge_slice_stat_init(ss);
5577 myri10ge_lro_alloc(ss);
5578 }
5579
5580 return (0);
5581
5582 abort:
5583 myri10ge_free_slices(mgp);
5584 return (ENOMEM);
5585 }
5586
5587 static int
myri10ge_save_msi_state(struct myri10ge_priv * mgp,ddi_acc_handle_t handle)5588 myri10ge_save_msi_state(struct myri10ge_priv *mgp,
5589 ddi_acc_handle_t handle)
5590 {
5591 uint8_t ptr;
5592 int err;
5593
5594 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5595 if (err != 0) {
5596 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5597 mgp->name);
5598 return (DDI_FAILURE);
5599 }
5600 mgp->pci_saved_state.msi_ctrl =
5601 pci_config_get16(handle, ptr + PCI_MSI_CTRL);
5602 mgp->pci_saved_state.msi_addr_low =
5603 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET);
5604 mgp->pci_saved_state.msi_addr_high =
5605 pci_config_get32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4);
5606 mgp->pci_saved_state.msi_data_32 =
5607 pci_config_get16(handle, ptr + PCI_MSI_32BIT_DATA);
5608 mgp->pci_saved_state.msi_data_64 =
5609 pci_config_get16(handle, ptr + PCI_MSI_64BIT_DATA);
5610 return (DDI_SUCCESS);
5611 }
5612
5613 static int
myri10ge_restore_msi_state(struct myri10ge_priv * mgp,ddi_acc_handle_t handle)5614 myri10ge_restore_msi_state(struct myri10ge_priv *mgp,
5615 ddi_acc_handle_t handle)
5616 {
5617 uint8_t ptr;
5618 int err;
5619
5620 err = myri10ge_find_cap(handle, &ptr, PCI_CAP_ID_MSI);
5621 if (err != 0) {
5622 cmn_err(CE_WARN, "%s: could not find MSI cap\n",
5623 mgp->name);
5624 return (DDI_FAILURE);
5625 }
5626
5627 pci_config_put16(handle, ptr + PCI_MSI_CTRL,
5628 mgp->pci_saved_state.msi_ctrl);
5629 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET,
5630 mgp->pci_saved_state.msi_addr_low);
5631 pci_config_put32(handle, ptr + PCI_MSI_ADDR_OFFSET + 4,
5632 mgp->pci_saved_state.msi_addr_high);
5633 pci_config_put16(handle, ptr + PCI_MSI_32BIT_DATA,
5634 mgp->pci_saved_state.msi_data_32);
5635 pci_config_put16(handle, ptr + PCI_MSI_64BIT_DATA,
5636 mgp->pci_saved_state.msi_data_64);
5637
5638 return (DDI_SUCCESS);
5639 }
5640
5641 static int
myri10ge_save_pci_state(struct myri10ge_priv * mgp)5642 myri10ge_save_pci_state(struct myri10ge_priv *mgp)
5643 {
5644 ddi_acc_handle_t handle = mgp->cfg_hdl;
5645 int i;
5646 int err = DDI_SUCCESS;
5647
5648
5649 /* Save the non-extended PCI config space 32-bits at a time */
5650 for (i = 0; i < 16; i++)
5651 mgp->pci_saved_state.base[i] =
5652 pci_config_get32(handle, i*4);
5653
5654 /* now save MSI interrupt state *, if needed */
5655 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5656 err = myri10ge_save_msi_state(mgp, handle);
5657
5658 return (err);
5659 }
5660
5661 static int
myri10ge_restore_pci_state(struct myri10ge_priv * mgp)5662 myri10ge_restore_pci_state(struct myri10ge_priv *mgp)
5663 {
5664 ddi_acc_handle_t handle = mgp->cfg_hdl;
5665 int i;
5666 int err = DDI_SUCCESS;
5667
5668
5669 /* Restore the non-extended PCI config space 32-bits at a time */
5670 for (i = 15; i >= 0; i--)
5671 pci_config_put32(handle, i*4, mgp->pci_saved_state.base[i]);
5672
5673 /* now restore MSI interrupt state *, if needed */
5674 if (mgp->ddi_intr_type == DDI_INTR_TYPE_MSI)
5675 err = myri10ge_restore_msi_state(mgp, handle);
5676
5677 if (mgp->max_read_request_4k)
5678 (void) myri10ge_set_max_readreq(handle);
5679 return (err);
5680 }
5681
5682
5683 static int
myri10ge_suspend(dev_info_t * dip)5684 myri10ge_suspend(dev_info_t *dip)
5685 {
5686 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5687 int status;
5688
5689 if (mgp == NULL) {
5690 cmn_err(CE_WARN, "null dip in myri10ge_suspend\n");
5691 return (DDI_FAILURE);
5692 }
5693 if (mgp->dip != dip) {
5694 cmn_err(CE_WARN, "bad dip in myri10ge_suspend\n");
5695 return (DDI_FAILURE);
5696 }
5697 mutex_enter(&mgp->intrlock);
5698 if (mgp->running == MYRI10GE_ETH_RUNNING) {
5699 mgp->running = MYRI10GE_ETH_STOPPING;
5700 mutex_exit(&mgp->intrlock);
5701 (void) untimeout(mgp->timer_id);
5702 mutex_enter(&mgp->intrlock);
5703 myri10ge_stop_locked(mgp);
5704 mgp->running = MYRI10GE_ETH_SUSPENDED_RUNNING;
5705 }
5706 status = myri10ge_save_pci_state(mgp);
5707 mutex_exit(&mgp->intrlock);
5708 return (status);
5709 }
5710
5711 static int
myri10ge_resume(dev_info_t * dip)5712 myri10ge_resume(dev_info_t *dip)
5713 {
5714 struct myri10ge_priv *mgp = ddi_get_driver_private(dip);
5715 int status = DDI_SUCCESS;
5716
5717 if (mgp == NULL) {
5718 cmn_err(CE_WARN, "null dip in myri10ge_resume\n");
5719 return (DDI_FAILURE);
5720 }
5721 if (mgp->dip != dip) {
5722 cmn_err(CE_WARN, "bad dip in myri10ge_resume\n");
5723 return (DDI_FAILURE);
5724 }
5725
5726 mutex_enter(&mgp->intrlock);
5727 status = myri10ge_restore_pci_state(mgp);
5728 if (status == DDI_SUCCESS &&
5729 mgp->running == MYRI10GE_ETH_SUSPENDED_RUNNING) {
5730 status = myri10ge_start_locked(mgp);
5731 }
5732 mutex_exit(&mgp->intrlock);
5733 if (status != DDI_SUCCESS)
5734 return (status);
5735
5736 /* start the watchdog timer */
5737 mgp->timer_id = timeout(myri10ge_watchdog, mgp,
5738 mgp->timer_ticks);
5739 return (DDI_SUCCESS);
5740 }
5741
5742 static int
myri10ge_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5743 myri10ge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5744 {
5745
5746 struct myri10ge_priv *mgp;
5747 mac_register_t *macp, *omacp;
5748 ddi_acc_handle_t handle;
5749 uint32_t csr, hdr_offset;
5750 int status, span, link_width, max_read_request_4k;
5751 unsigned long bus_number, dev_number, func_number;
5752 size_t bytes;
5753 offset_t ss_offset;
5754 uint8_t vso;
5755
5756 if (cmd == DDI_RESUME) {
5757 return (myri10ge_resume(dip));
5758 }
5759
5760 if (cmd != DDI_ATTACH)
5761 return (DDI_FAILURE);
5762 if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
5763 return (DDI_FAILURE);
5764
5765 /* enable busmater and io space access */
5766 csr = pci_config_get32(handle, PCI_CONF_COMM);
5767 pci_config_put32(handle, PCI_CONF_COMM,
5768 (csr |PCI_COMM_ME|PCI_COMM_MAE));
5769 status = myri10ge_read_pcie_link_width(handle, &link_width);
5770 if (status != 0) {
5771 cmn_err(CE_WARN, "could not read link width!\n");
5772 link_width = 0;
5773 }
5774 max_read_request_4k = !myri10ge_set_max_readreq(handle);
5775 status = myri10ge_find_cap(handle, &vso, PCI_CAP_ID_VS);
5776 if (status != 0)
5777 goto abort_with_cfg_hdl;
5778 if ((omacp = mac_alloc(MAC_VERSION)) == NULL)
5779 goto abort_with_cfg_hdl;
5780 /*
5781 * XXXX Hack: mac_register_t grows in newer kernels. To be
5782 * able to write newer fields, such as m_margin, without
5783 * writing outside allocated memory, we allocate our own macp
5784 * and pass that to mac_register()
5785 */
5786 macp = kmem_zalloc(sizeof (*macp) * 8, KM_SLEEP);
5787 macp->m_version = omacp->m_version;
5788
5789 if ((mgp = (struct myri10ge_priv *)
5790 kmem_zalloc(sizeof (*mgp), KM_SLEEP)) == NULL) {
5791 goto abort_with_macinfo;
5792 }
5793 ddi_set_driver_private(dip, mgp);
5794
5795 /* setup device name for log messages */
5796 (void) sprintf(mgp->name, "myri10ge%d", ddi_get_instance(dip));
5797
5798 mutex_enter(&myri10ge_param_lock);
5799 myri10ge_get_props(dip);
5800 mgp->intr_coal_delay = myri10ge_intr_coal_delay;
5801 mgp->pause = myri10ge_flow_control;
5802 mutex_exit(&myri10ge_param_lock);
5803
5804 mgp->max_read_request_4k = max_read_request_4k;
5805 mgp->pcie_link_width = link_width;
5806 mgp->running = MYRI10GE_ETH_STOPPED;
5807 mgp->vso = vso;
5808 mgp->dip = dip;
5809 mgp->cfg_hdl = handle;
5810
5811 mgp->timer_ticks = 5 * drv_usectohz(1000000); /* 5 seconds */
5812 myri10ge_test_physical(dip);
5813
5814 /* allocate command page */
5815 bytes = sizeof (*mgp->cmd);
5816 mgp->cmd = (mcp_cmd_response_t *)
5817 (void *)myri10ge_dma_alloc(dip, bytes,
5818 &myri10ge_misc_dma_attr, &myri10ge_dev_access_attr,
5819 DDI_DMA_CONSISTENT, DDI_DMA_RDWR|DDI_DMA_CONSISTENT,
5820 &mgp->cmd_dma, 1, DDI_DMA_DONTWAIT);
5821 if (mgp->cmd == NULL)
5822 goto abort_with_mgp;
5823
5824 (void) myri10ge_reg_set(dip, &mgp->reg_set, &span, &bus_number,
5825 &dev_number, &func_number);
5826 if (myri10ge_verbose)
5827 printf("%s at %ld:%ld:%ld attaching\n", mgp->name,
5828 bus_number, dev_number, func_number);
5829 status = ddi_regs_map_setup(dip, mgp->reg_set, (caddr_t *)&mgp->sram,
5830 (offset_t)0, (offset_t)span, &myri10ge_dev_access_attr,
5831 &mgp->io_handle);
5832 if (status != DDI_SUCCESS) {
5833 cmn_err(CE_WARN, "%s: couldn't map memory space", mgp->name);
5834 printf("%s: reg_set = %d, span = %d, status = %d",
5835 mgp->name, mgp->reg_set, span, status);
5836 goto abort_with_mgp;
5837 }
5838
5839 hdr_offset = *(uint32_t *)(void*)(mgp->sram + MCP_HEADER_PTR_OFFSET);
5840 hdr_offset = ntohl(hdr_offset) & 0xffffc;
5841 ss_offset = hdr_offset +
5842 offsetof(struct mcp_gen_header, string_specs);
5843 mgp->sram_size = ntohl(*(uint32_t *)(void*)(mgp->sram + ss_offset));
5844 myri10ge_pio_copy32(mgp->eeprom_strings,
5845 (uint32_t *)(void*)((char *)mgp->sram + mgp->sram_size),
5846 MYRI10GE_EEPROM_STRINGS_SIZE);
5847 (void) memset(mgp->eeprom_strings +
5848 MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2);
5849
5850 status = myri10ge_read_mac_addr(mgp);
5851 if (status) {
5852 goto abort_with_mapped;
5853 }
5854
5855 status = myri10ge_select_firmware(mgp);
5856 if (status != 0) {
5857 cmn_err(CE_WARN, "%s: failed to load firmware\n", mgp->name);
5858 goto abort_with_mapped;
5859 }
5860
5861 status = myri10ge_probe_slices(mgp);
5862 if (status != 0) {
5863 cmn_err(CE_WARN, "%s: failed to probe slices\n", mgp->name);
5864 goto abort_with_dummy_rdma;
5865 }
5866
5867 status = myri10ge_alloc_slices(mgp);
5868 if (status != 0) {
5869 cmn_err(CE_WARN, "%s: failed to alloc slices\n", mgp->name);
5870 goto abort_with_dummy_rdma;
5871 }
5872
5873 /* add the interrupt handler */
5874 status = myri10ge_add_intrs(mgp, 1);
5875 if (status != 0) {
5876 cmn_err(CE_WARN, "%s: Failed to add interrupt\n",
5877 mgp->name);
5878 goto abort_with_slices;
5879 }
5880
5881 /* now that we have an iblock_cookie, init the mutexes */
5882 mutex_init(&mgp->cmd_lock, NULL, MUTEX_DRIVER, mgp->icookie);
5883 mutex_init(&mgp->intrlock, NULL, MUTEX_DRIVER, mgp->icookie);
5884
5885
5886 status = myri10ge_nic_stat_init(mgp);
5887 if (status != DDI_SUCCESS)
5888 goto abort_with_interrupts;
5889 status = myri10ge_info_init(mgp);
5890 if (status != DDI_SUCCESS)
5891 goto abort_with_stats;
5892
5893 /*
5894 * Initialize GLD state
5895 */
5896
5897 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
5898 macp->m_driver = mgp;
5899 macp->m_dip = dip;
5900 macp->m_src_addr = mgp->mac_addr;
5901 macp->m_callbacks = &myri10ge_m_callbacks;
5902 macp->m_min_sdu = 0;
5903 macp->m_max_sdu = myri10ge_mtu -
5904 (sizeof (struct ether_header) + MXGEFW_PAD + VLAN_TAGSZ);
5905 #ifdef SOLARIS_S11
5906 macp->m_margin = VLAN_TAGSZ;
5907 #endif
5908 macp->m_v12n = MAC_VIRT_LEVEL1;
5909 status = mac_register(macp, &mgp->mh);
5910 if (status != 0) {
5911 cmn_err(CE_WARN, "%s: mac_register failed with %d\n",
5912 mgp->name, status);
5913 goto abort_with_info;
5914 }
5915 myri10ge_ndd_init(mgp);
5916 if (myri10ge_verbose)
5917 printf("%s: %s, tx bndry %d, fw %s\n", mgp->name,
5918 mgp->intr_type, mgp->tx_boundary, mgp->fw_name);
5919 mutex_enter(&myri10ge_param_lock);
5920 mgp->next = mgp_list;
5921 mgp_list = mgp;
5922 mutex_exit(&myri10ge_param_lock);
5923 kmem_free(macp, sizeof (*macp) * 8);
5924 mac_free(omacp);
5925 return (DDI_SUCCESS);
5926
5927 abort_with_info:
5928 myri10ge_info_destroy(mgp);
5929
5930 abort_with_stats:
5931 myri10ge_nic_stat_destroy(mgp);
5932
5933 abort_with_interrupts:
5934 mutex_destroy(&mgp->cmd_lock);
5935 mutex_destroy(&mgp->intrlock);
5936 myri10ge_rem_intrs(mgp, 1);
5937
5938 abort_with_slices:
5939 myri10ge_free_slices(mgp);
5940
5941 abort_with_dummy_rdma:
5942 myri10ge_dummy_rdma(mgp, 0);
5943
5944 abort_with_mapped:
5945 ddi_regs_map_free(&mgp->io_handle);
5946
5947 myri10ge_dma_free(&mgp->cmd_dma);
5948
5949 abort_with_mgp:
5950 kmem_free(mgp, sizeof (*mgp));
5951
5952 abort_with_macinfo:
5953 kmem_free(macp, sizeof (*macp) * 8);
5954 mac_free(omacp);
5955
5956 abort_with_cfg_hdl:
5957 pci_config_teardown(&handle);
5958 return (DDI_FAILURE);
5959
5960 }
5961
5962
5963 static int
myri10ge_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)5964 myri10ge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5965 {
5966 struct myri10ge_priv *mgp, *tmp;
5967 int status, i, jbufs_alloced;
5968
5969 if (cmd == DDI_SUSPEND) {
5970 status = myri10ge_suspend(dip);
5971 return (status);
5972 }
5973
5974 if (cmd != DDI_DETACH) {
5975 return (DDI_FAILURE);
5976 }
5977 /* Get the driver private (gld_mac_info_t) structure */
5978 mgp = ddi_get_driver_private(dip);
5979
5980 mutex_enter(&mgp->intrlock);
5981 jbufs_alloced = 0;
5982 for (i = 0; i < mgp->num_slices; i++) {
5983 myri10ge_remove_jbufs(&mgp->ss[i]);
5984 jbufs_alloced += mgp->ss[i].jpool.num_alloc;
5985 }
5986 mutex_exit(&mgp->intrlock);
5987 if (jbufs_alloced != 0) {
5988 cmn_err(CE_NOTE, "%s: %d loaned rx buffers remain\n",
5989 mgp->name, jbufs_alloced);
5990 return (DDI_FAILURE);
5991 }
5992
5993 mutex_enter(&myri10ge_param_lock);
5994 if (mgp->refcnt != 0) {
5995 mutex_exit(&myri10ge_param_lock);
5996 cmn_err(CE_NOTE, "%s: %d external refs remain\n",
5997 mgp->name, mgp->refcnt);
5998 return (DDI_FAILURE);
5999 }
6000 mutex_exit(&myri10ge_param_lock);
6001
6002 status = mac_unregister(mgp->mh);
6003 if (status != DDI_SUCCESS)
6004 return (status);
6005
6006 myri10ge_ndd_fini(mgp);
6007 myri10ge_dummy_rdma(mgp, 0);
6008 myri10ge_nic_stat_destroy(mgp);
6009 myri10ge_info_destroy(mgp);
6010
6011 mutex_destroy(&mgp->cmd_lock);
6012 mutex_destroy(&mgp->intrlock);
6013
6014 myri10ge_rem_intrs(mgp, 1);
6015
6016 myri10ge_free_slices(mgp);
6017 ddi_regs_map_free(&mgp->io_handle);
6018 myri10ge_dma_free(&mgp->cmd_dma);
6019 pci_config_teardown(&mgp->cfg_hdl);
6020
6021 mutex_enter(&myri10ge_param_lock);
6022 if (mgp_list == mgp) {
6023 mgp_list = mgp->next;
6024 } else {
6025 tmp = mgp_list;
6026 while (tmp->next != mgp && tmp->next != NULL)
6027 tmp = tmp->next;
6028 if (tmp->next != NULL)
6029 tmp->next = tmp->next->next;
6030 }
6031 kmem_free(mgp, sizeof (*mgp));
6032 mutex_exit(&myri10ge_param_lock);
6033 return (DDI_SUCCESS);
6034 }
6035
6036 /*
6037 * Helper for quiesce entry point: Interrupt threads are not being
6038 * scheduled, so we must poll for the confirmation DMA to arrive in
6039 * the firmware stats block for slice 0. We're essentially running
6040 * the guts of the interrupt handler, and just cherry picking the
6041 * confirmation that the NIC is queuesced (stats->link_down)
6042 */
6043
6044 static int
myri10ge_poll_down(struct myri10ge_priv * mgp)6045 myri10ge_poll_down(struct myri10ge_priv *mgp)
6046 {
6047 struct myri10ge_slice_state *ss = mgp->ss;
6048 mcp_irq_data_t *stats = ss->fw_stats;
6049 int valid;
6050 int found_down = 0;
6051
6052
6053 /* check for a pending IRQ */
6054
6055 if (! *((volatile uint8_t *)& stats->valid))
6056 return (0);
6057 valid = stats->valid;
6058
6059 /*
6060 * Make sure to tell the NIC to lower a legacy IRQ, else
6061 * it may have corrupt state after restarting
6062 */
6063
6064 if (mgp->ddi_intr_type == DDI_INTR_TYPE_FIXED) {
6065 /* lower legacy IRQ */
6066 *mgp->irq_deassert = 0;
6067 mb();
6068 /* wait for irq conf DMA */
6069 while (*((volatile uint8_t *)& stats->valid))
6070 ;
6071 }
6072 if (stats->stats_updated && stats->link_down)
6073 found_down = 1;
6074
6075 if (valid & 0x1)
6076 *ss->irq_claim = BE_32(3);
6077 *(ss->irq_claim + 1) = BE_32(3);
6078
6079 return (found_down);
6080 }
6081
6082 static int
myri10ge_quiesce(dev_info_t * dip)6083 myri10ge_quiesce(dev_info_t *dip)
6084 {
6085 struct myri10ge_priv *mgp;
6086 myri10ge_cmd_t cmd;
6087 int status, down, i;
6088
6089 mgp = ddi_get_driver_private(dip);
6090 if (mgp == NULL)
6091 return (DDI_FAILURE);
6092
6093 /* if devices was unplumbed, it is guaranteed to be quiescent */
6094 if (mgp->running == MYRI10GE_ETH_STOPPED)
6095 return (DDI_SUCCESS);
6096
6097 /* send a down CMD to queuesce NIC */
6098 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
6099 if (status) {
6100 cmn_err(CE_WARN, "%s: Couldn't bring down link\n", mgp->name);
6101 return (DDI_FAILURE);
6102 }
6103
6104 for (i = 0; i < 20; i++) {
6105 down = myri10ge_poll_down(mgp);
6106 if (down)
6107 break;
6108 delay(drv_usectohz(100000));
6109 mb();
6110 }
6111 if (down)
6112 return (DDI_SUCCESS);
6113 return (DDI_FAILURE);
6114 }
6115
6116 /*
6117 * Distinguish between allocb'ed blocks, and gesballoc'ed attached
6118 * storage.
6119 */
6120 static void
myri10ge_find_lastfree(void)6121 myri10ge_find_lastfree(void)
6122 {
6123 mblk_t *mp = allocb(1024, 0);
6124 dblk_t *dbp;
6125
6126 if (mp == NULL) {
6127 cmn_err(CE_WARN, "myri10ge_find_lastfree failed\n");
6128 return;
6129 }
6130 dbp = mp->b_datap;
6131 myri10ge_db_lastfree = (void *)dbp->db_lastfree;
6132 }
6133
6134 int
_init(void)6135 _init(void)
6136 {
6137 int i;
6138
6139 if (myri10ge_verbose)
6140 cmn_err(CE_NOTE,
6141 "Myricom 10G driver (10GbE) version %s loading\n",
6142 MYRI10GE_VERSION_STR);
6143 myri10ge_find_lastfree();
6144 mac_init_ops(&myri10ge_ops, "myri10ge");
6145 mutex_init(&myri10ge_param_lock, NULL, MUTEX_DEFAULT, NULL);
6146 if ((i = mod_install(&modlinkage)) != 0) {
6147 cmn_err(CE_WARN, "mod_install returned %d\n", i);
6148 mac_fini_ops(&myri10ge_ops);
6149 mutex_destroy(&myri10ge_param_lock);
6150 }
6151 return (i);
6152 }
6153
6154 int
_fini(void)6155 _fini(void)
6156 {
6157 int i;
6158 i = mod_remove(&modlinkage);
6159 if (i != 0) {
6160 return (i);
6161 }
6162 mac_fini_ops(&myri10ge_ops);
6163 mutex_destroy(&myri10ge_param_lock);
6164 return (0);
6165 }
6166
6167 int
_info(struct modinfo * modinfop)6168 _info(struct modinfo *modinfop)
6169 {
6170 return (mod_info(&modlinkage, modinfop));
6171 }
6172
6173
6174 /*
6175 * This file uses MyriGE driver indentation.
6176 *
6177 * Local Variables:
6178 * c-file-style:"sun"
6179 * tab-width:8
6180 * End:
6181 */
6182