xref: /plan9-contrib/sys/src/9/pc/etherm10g.c (revision b39189fd423aed869c5cf5189bc504918cff969b)
1 /*
2  * myricom 10g-pcie-8a 10 Gb ethernet driver
3  * © 2007 erik quanstrom, coraid
4  *
5  * the card is big endian.
6  * we use uvlong rather than uintptr to hold addresses so that
7  * we don't get "warning: stupid shift" on 32-bit architectures.
8  */
9 #include "u.h"
10 #include "../port/lib.h"
11 #include "mem.h"
12 #include "dat.h"
13 #include "fns.h"
14 #include "io.h"
15 #include "../port/error.h"
16 #include "../port/netif.h"
17 
18 #include "../pc/etherif.h"
19 
20 #ifndef KiB
21 #define KiB		1024u			/* Kibi 0x0000000000000400 */
22 #define MiB		1048576u		/* Mebi 0x0000000000100000 */
23 #endif /* KiB */
24 
25 #define	dprint(...)	if(debug) print(__VA_ARGS__)
26 #define	pcicapdbg(...)
27 #define malign(n)	mallocalign((n), 4*KiB, 0, 0)
28 
29 #include "etherm10g2k.i"
30 #include "etherm10g4k.i"
31 
32 static int 	debug		= 0;
33 static char	Etimeout[]	= "timeout";
34 
35 enum {
36 	Epromsz	= 256,
37 	Maxslots= 1024,
38 	Align	= 4096,
39 	Maxmtu	= 9000,
40 	Noconf	= 0xffffffff,
41 
42 	Fwoffset= 1*MiB,
43 	Cmdoff	= 0xf80000,	/* command port offset */
44 	Fwsubmt	= 0xfc0000,	/* firmware submission command port offset */
45 	Rdmaoff	= 0xfc01c0,	/* rdma command port offset */
46 };
47 
48 enum {
49 	CZero,
50 	Creset,
51 	Cversion,
52 
53 	CSintrqdma,	/* issue these before Cetherup */
54 	CSbigsz,	/* in bytes bigsize = 2^n */
55 	CSsmallsz,
56 
57 	CGsendoff,
58 	CGsmallrxoff,
59 	CGbigrxoff,
60 	CGirqackoff,
61 	CGirqdeassoff,
62 	CGsendrgsz,
63 	CGrxrgsz,
64 
65 	CSintrqsz,	/* 2^n */
66 	Cetherup,	/* above parameters + mtu/mac addr must be set first. */
67 	Cetherdn,
68 
69 	CSmtu,		/* below may be issued live */
70 	CGcoaloff,	/* in µs */
71 	CSstatsrate,	/* in µs */
72 	CSstatsdma,
73 
74 	Cpromisc,
75 	Cnopromisc,
76 	CSmac,
77 
78 	Cenablefc,
79 	Cdisablefc,
80 
81 	Cdmatest,	/* address in d[0-1], d[2]=length */
82 
83 	Cenableallmc,
84 	Cdisableallmc,
85 
86 	CSjoinmc,
87 	CSleavemc,
88 	Cleaveallmc,
89 
90 	CSstatsdma2,	/* adds (unused) multicast stats */
91 };
92 
93 typedef union {
94 	uint	i[2];
95 	uchar	c[8];
96 } Cmd;
97 
98 typedef ulong Slot;
99 typedef struct {
100 	ushort	cksum;
101 	ushort	len;
102 } Slotparts;
103 
104 enum {
105 	SFsmall	= 1,
106 	SFfirst	= 2,
107 	SFalign	= 4,
108 	SFnotso	= 16,
109 };
110 
111 typedef struct {
112 	ulong	high;
113 	ulong	low;
114 	ushort	hdroff;
115 	ushort	len;
116 	uchar	pad;
117 	uchar	nrdma;
118 	uchar	chkoff;
119 	uchar	flags;
120 } Send;
121 
122 typedef struct {
123 	QLock;
124 	Send	*lanai;		/* tx ring (cksum+len in lanai memory) */
125 	Send	*host;		/* tx ring (data in our memory) */
126 	Block	**bring;
127 //	uchar	*wcfifo;	/* what the heck is a w/c fifo? */
128 	int	size;		/* of buffers in the z8's memory */
129 	ulong	segsz;
130 	uint	n;		/* rxslots */
131 	uint	m;		/* mask; rxslots must be a power of two */
132 	uint	i;		/* number of segments (not frames) queued */
133 	uint	cnt;		/* number of segments sent by the card */
134 
135 	ulong	npkt;
136 	vlong	nbytes;
137 } Tx;
138 
139 typedef struct {
140 	Lock;
141 	Block	*head;
142 	uint	size;		/* buffer size of each block */
143 	uint	n;		/* n free buffers */
144 	uint	cnt;
145 } Bpool;
146 
147 static Bpool	smpool 	= { .size = 128, };
148 static Bpool	bgpool	= { .size = Maxmtu, };
149 
150 typedef struct {
151 	Bpool	*pool;		/* free buffers */
152 	ulong	*lanai;		/* rx ring; we have no permanent host shadow */
153 	Block	**host;		/* called "info" in myricom driver */
154 //	uchar	*wcfifo;	/* cmd submission fifo */
155 	uint	m;
156 	uint	n;		/* rxslots */
157 	uint	i;
158 	uint	cnt;		/* number of buffers allocated (lifetime) */
159 	uint	allocfail;
160 } Rx;
161 
162 /* dma mapped.  unix network byte order. */
163 typedef struct {
164 	uchar	txcnt[4];
165 	uchar	linkstat[4];
166 	uchar	dlink[4];
167 	uchar	derror[4];
168 	uchar	drunt[4];
169 	uchar	doverrun[4];
170 	uchar	dnosm[4];
171 	uchar	dnobg[4];
172 	uchar	nrdma[4];
173 	uchar	txstopped;
174 	uchar	down;
175 	uchar	updated;
176 	uchar	valid;
177 } Stats;
178 
179 enum {
180 	Detached,
181 	Attached,
182 	Runed,
183 };
184 
185 typedef struct {
186 	Slot 	*entry;
187 	uvlong	busaddr;
188 	uint	m;
189 	uint	n;
190 	uint	i;
191 } Done;
192 
193 typedef struct Ctlr Ctlr;
194 typedef struct Ctlr {
195 	QLock;
196 	int	state;
197 	int	kprocs;
198 	uvlong	port;
199 	Pcidev*	pcidev;
200 	Ctlr*	next;
201 	int	active;
202 	int	id;		/* do we need this? */
203 
204 	uchar	ra[Eaddrlen];
205 
206 	int	ramsz;
207 	uchar	*ram;
208 
209 	ulong	*irqack;
210 	ulong	*irqdeass;
211 	ulong	*coal;
212 
213 	char	eprom[Epromsz];
214 	ulong	serial;		/* unit serial number */
215 
216 	QLock	cmdl;
217 	Cmd	*cmd;		/* address of command return */
218 	uvlong	cprt;		/* bus address of command */
219 
220 	uvlong	boot;		/* boot address */
221 
222 	Done	done;
223 	Tx	tx;
224 	Rx	sm;
225 	Rx	bg;
226 	Stats	*stats;
227 	uvlong	statsprt;
228 
229 	Rendez	rxrendez;
230 	Rendez	txrendez;
231 
232 	int	msi;
233 	ulong	linkstat;
234 	ulong	nrdma;
235 } Ctlr;
236 
237 static Ctlr 	*ctlrs;
238 
239 enum {
240 	PciCapPMG	 = 0x01,	/* power management */
241 	PciCapAGP	 = 0x02,
242 	PciCapVPD	 = 0x03,	/* vital product data */
243 	PciCapSID	 = 0x04,	/* slot id */
244 	PciCapMSI	 = 0x05,
245 	PciCapCHS	 = 0x06,	/* compact pci hot swap */
246 	PciCapPCIX	 = 0x07,
247 	PciCapHTC	 = 0x08,	/* hypertransport irq conf */
248 	PciCapVND	 = 0x09,	/* vendor specific information */
249 	PciCapHSW	 = 0x0C,	/* hot swap */
250 	PciCapPCIe	 = 0x10,
251 	PciCapMSIX	 = 0x11,
252 };
253 
254 enum {
255 	PcieAERC = 1,
256 	PcieVC,
257 	PcieSNC,
258 	PciePBC,
259 };
260 
261 enum {
262 	AercCCR	= 0x18,		/* control register */
263 };
264 
265 enum {
266 	PcieCTL	= 8,
267 	PcieLCR	= 12,
268 	PcieMRD	= 0x7000,	/* maximum read size */
269 };
270 
271 static int
272 pcicap(Pcidev *p, int cap)
273 {
274 	int i, c, off;
275 
276 	pcicapdbg("pcicap: %x:%d\n", p->vid, p->did);
277 	off = 0x34;			/* 0x14 for cardbus */
278 	for(i = 48; i--; ){
279 		pcicapdbg("\t" "loop %x\n", off);
280 		off = pcicfgr8(p, off);
281 		pcicapdbg("\t" "pcicfgr8 %x\n", off);
282 		if(off < 0x40)
283 			break;
284 		off &= ~3;
285 		c = pcicfgr8(p, off);
286 		pcicapdbg("\t" "pcicfgr8 %x\n", c);
287 		if(c == 0xff)
288 			break;
289 		if(c == cap)
290 			return off;
291 		off++;
292 	}
293 	return 0;
294 }
295 
296 /*
297  * this function doesn't work because pcicgr32 doesn't have access
298  * to the pcie extended configuration space.
299  */
300 static int
301 pciecap(Pcidev *p, int cap)
302 {
303 	uint off, i;
304 
305 	off = 0x100;
306 	while(((i = pcicfgr32(p, off)) & 0xffff) != cap){
307 		off = i >> 20;
308 		print("m10g: pciecap offset = %ud",  off);
309 		if(off < 0x100 || off >= 4*KiB - 1)
310 			return 0;
311 	}
312 	print("m10g: pciecap found = %ud",  off);
313 	return off;
314 }
315 
316 static int
317 setpcie(Pcidev *p)
318 {
319 	int off;
320 
321 	/* set 4k writes */
322 	off = pcicap(p, PciCapPCIe);
323 	if(off < 64)
324 		return -1;
325 	off += PcieCTL;
326 	pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12);
327 	return 0;
328 }
329 
330 static int
331 whichfw(Pcidev *p)
332 {
333 	char *s;
334 	int i, off, lanes, ecrc;
335 	ulong cap;
336 
337 	/* check the number of configured lanes. */
338 	off = pcicap(p, PciCapPCIe);
339 	if(off < 64)
340 		return -1;
341 	off += PcieLCR;
342 	cap = pcicfgr16(p, off);
343 	lanes = (cap>>4) & 0x3f;
344 
345 	/* check AERC register.  we need it on.  */
346 	off = pciecap(p, PcieAERC);
347 	print("; offset %d returned\n", off);
348 	cap = 0;
349 	if(off != 0){
350 		off += AercCCR;
351 		cap = pcicfgr32(p, off);
352 		print("m10g: %lud cap\n", cap);
353 	}
354 	ecrc = (cap>>4) & 0xf;
355 	/* if we don't like the aerc, kick it here. */
356 
357 	print("m10g: %d lanes; ecrc=%d; ", lanes, ecrc);
358 	if(s = getconf("myriforce")){
359 		i = atoi(s);
360 		if(i != 4*KiB || i != 2*KiB)
361 			i = 2*KiB;
362 		print("fw = %d [forced]\n", i);
363 		return i;
364 	}
365 	if(lanes <= 4)
366 		print("fw = 4096 [lanes]\n");
367 	else if(ecrc & 10)
368 		print("fw = 4096 [ecrc set]\n");
369 	else
370 		print("fw = 4096 [default]\n");
371 	return 4*KiB;
372 }
373 
374 static int
375 parseeprom(Ctlr *c)
376 {
377 	int i, j, k, l, bits;
378 	char *s;
379 
380 	dprint("m10g eprom:\n");
381 	s = c->eprom;
382 	bits = 3;
383 	for(i = 0; s[i] && i < Epromsz; i++){
384 		l = strlen(s+i);
385 		dprint("\t%s\n", s+i);
386 		if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){
387 			bits ^= 1;
388 			j = i + 4;
389 			for(k = 0; k < 6; k++)
390 				c->ra[k] = strtoul(s+j+3*k, 0, 16);
391 		}else if(strncmp(s+i, "SN=", 3) == 0){
392 			bits ^= 2;
393 			c->serial = atoi(s+i+3);
394 		}
395 		i += l;
396 	}
397 	if(bits)
398 		return -1;
399 	return 0;
400 }
401 
402 static ushort
403 pbit16(ushort i)
404 {
405 	ushort j;
406 	uchar *p;
407 
408 	p = (uchar*)&j;
409 	p[1] = i;
410 	p[0] = i>>8;
411 	return j;
412 }
413 
414 static ushort
415 gbit16(uchar i[2])
416 {
417 	ushort j;
418 
419 	j  = i[1];
420 	j |= i[0]<<8;
421 	return j;
422 }
423 
424 static ulong
425 pbit32(ulong i)
426 {
427 	ulong j;
428 	uchar *p;
429 
430 	p = (uchar*)&j;
431 	p[3] = i;
432 	p[2] = i>>8;
433 	p[1] = i>>16;
434 	p[0] = i>>24;
435 	return j;
436 }
437 
438 static ulong
439 gbit32(uchar i[4])
440 {
441 	ulong j;
442 
443 	j  = i[3];
444 	j |= i[2]<<8;
445 	j |= i[1]<<16;
446 	j |= i[0]<<24;
447 	return j;
448 }
449 
450 static void
451 prepcmd(ulong *cmd, int i)
452 {
453 	while(i-- > 0)
454 		cmd[i] = pbit32(cmd[i]);
455 }
456 
457 /*
458  * the command looks like this (int 32bit integers)
459  * cmd type
460  * addr (low)
461  * addr (high)
462  * pad (used for dma testing)
463  * response (high)
464  * response (low)
465  * 40 byte = 5 int pad.
466  */
467 
468 ulong
469 cmd(Ctlr *c, int type, uvlong data)
470 {
471 	ulong buf[16], i;
472 	Cmd *cmd;
473 
474 	qlock(&c->cmdl);
475 	cmd = c->cmd;
476 	cmd->i[1] = Noconf;
477 	memset(buf, 0, sizeof buf);
478 	buf[0] = type;
479 	buf[1] = data;
480 	buf[2] = data >> 32;
481 	buf[4] = c->cprt >> 32;
482 	buf[5] = c->cprt;
483 	prepcmd(buf, 6);
484 	coherence();
485 	memmove(c->ram + Cmdoff, buf, sizeof buf);
486 
487 	if(waserror())
488 		nexterror();
489 	for(i = 0; i < 15; i++){
490 		if(cmd->i[1] != Noconf){
491 			poperror();
492 			i = gbit32(cmd->c);
493 			qunlock(&c->cmdl);
494 			if(cmd->i[1] != 0)
495 				dprint("[%lux]", i);
496 			return i;
497 		}
498 		tsleep(&up->sleep, return0, 0, 1);
499 	}
500 	qunlock(&c->cmdl);
501 	iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n",
502 		cmd->i[0], cmd->i[1], type);
503 	error(Etimeout);
504 	return ~0;			/* silence! */
505 }
506 
507 ulong
508 maccmd(Ctlr *c, int type, uchar *m)
509 {
510 	ulong buf[16], i;
511 	Cmd *cmd;
512 
513 	qlock(&c->cmdl);
514 	cmd = c->cmd;
515 	cmd->i[1] = Noconf;
516 	memset(buf, 0, sizeof buf);
517 	buf[0] = type;
518 	buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3];
519 	buf[2] = m[4]<< 8 | m[5];
520 	buf[4] = c->cprt >> 32;
521 	buf[5] = c->cprt;
522 	prepcmd(buf, 6);
523 	coherence();
524 	memmove(c->ram + Cmdoff, buf, sizeof buf);
525 
526 	if(waserror())
527 		nexterror();
528 	for(i = 0; i < 15; i++){
529 		if(cmd->i[1] != Noconf){
530 			poperror();
531 			i = gbit32(cmd->c);
532 			qunlock(&c->cmdl);
533 			if(cmd->i[1] != 0)
534 				dprint("[%lux]", i);
535 			return i;
536 		}
537 		tsleep(&up->sleep, return0, 0, 1);
538 	}
539 	qunlock(&c->cmdl);
540 	iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n",
541 		cmd->i[0], cmd->i[1], type);
542 	error(Etimeout);
543 	return ~0;			/* silence! */
544 }
545 
546 /* remove this garbage after testing */
547 enum {
548 	DMAread	= 0x10000,
549 	DMAwrite= 0x1,
550 };
551 
552 ulong
553 dmatestcmd(Ctlr *c, int type, uvlong addr, int len)
554 {
555 	ulong buf[16], i;
556 
557 	memset(buf, 0, sizeof buf);
558 	memset(c->cmd, Noconf, sizeof *c->cmd);
559 	buf[0] = Cdmatest;
560 	buf[1] = addr;
561 	buf[2] = addr >> 32;
562 	buf[3] = len * type;
563 	buf[4] = c->cprt >> 32;
564 	buf[5] = c->cprt;
565 	prepcmd(buf, 6);
566 	coherence();
567 	memmove(c->ram + Cmdoff, buf, sizeof buf);
568 
569 	if(waserror())
570 		nexterror();
571 	for(i = 0; i < 15; i++){
572 		if(c->cmd->i[1] != Noconf){
573 			i = gbit32(c->cmd->c);
574 			if(i == 0)
575 				error(Eio);
576 			poperror();
577 			return i;
578 		}
579 		tsleep(&up->sleep, return0, 0, 5);
580 	}
581 	error(Etimeout);
582 	return ~0;			/* silence! */
583 }
584 
585 ulong
586 rdmacmd(Ctlr *c, int on)
587 {
588 	ulong buf[16], i;
589 
590 	memset(buf, 0, sizeof buf);
591 	c->cmd->i[0] = 0;
592 	coherence();
593 	buf[0] = c->cprt >> 32;
594 	buf[1] = c->cprt;
595 	buf[2] = Noconf;
596 	buf[3] = c->cprt >> 32;
597 	buf[4] = c->cprt;
598 	buf[5] = on;
599 	prepcmd(buf, 6);
600 	memmove(c->ram + Rdmaoff, buf, sizeof buf);
601 
602 	if(waserror())
603 		nexterror();
604 	for(i = 0; i < 20; i++){
605 		if(c->cmd->i[0] == Noconf){
606 			poperror();
607 			return gbit32(c->cmd->c);
608 		}
609 		tsleep(&up->sleep, return0, 0, 1);
610 	}
611 	error(Etimeout);
612 	iprint("m10g: rdmacmd timeout\n");
613 	return ~0;			/* silence! */
614 }
615 
616 static int
617 loadfw(Ctlr *c, int *align)
618 {
619 	ulong *f, *s, sz;
620 	int i;
621 
622 	if((*align = whichfw(c->pcidev)) == 4*KiB){
623 		f = (ulong*)fw4k;
624 		sz = sizeof fw4k;
625 	}else{
626 		f = (ulong*)fw2k;
627 		sz = sizeof fw2k;
628 	}
629 
630 	s = (ulong*)(c->ram + Fwoffset);
631 	for(i = 0; i < sz / 4; i++)
632 		s[i] = f[i];
633 	return sz & ~3;
634 }
635 
636 static int
637 bootfw(Ctlr *c)
638 {
639 	int i, sz, align;
640 	ulong buf[16];
641 	Cmd* cmd;
642 
643 	if((sz = loadfw(c, &align)) == 0)
644 		return 0;
645 	dprint("bootfw %d bytes ... ", sz);
646 	cmd = c->cmd;
647 
648 	memset(buf, 0, sizeof buf);
649 	c->cmd->i[0] = 0;
650 	coherence();
651 	buf[0] = c->cprt >> 32;	/* upper dma target address */
652 	buf[1] = c->cprt;	/* lower */
653 	buf[2] = Noconf;	/* writeback */
654 	buf[3] = Fwoffset + 8,
655 	buf[4] = sz - 8;
656 	buf[5] = 8;
657 	buf[6] = 0;
658 	prepcmd(buf, 7);
659 	coherence();
660 	memmove(c->ram + Fwsubmt, buf, sizeof buf);
661 
662 	for(i = 0; i < 20; i++){
663 		if(cmd->i[0] == Noconf)
664 			break;
665 		delay(1);
666 	}
667 	dprint("[%lux %lux]", gbit32(cmd->c), gbit32(cmd->c+4));
668 	if(i == 20){
669 		print("m10g: cannot load fw\n");
670 		return -1;
671 	}
672 	dprint("\n");
673 	c->tx.segsz = align;
674 	return 0;
675 }
676 
677 static int
678 kickthebaby(Pcidev *p, Ctlr *c)
679 {
680 	/* don't kick the baby! */
681 	ulong code;
682 
683 	pcicfgw8(p,  0x10 + c->boot, 0x3);
684 	pcicfgw32(p, 0x18 + c->boot, 0xfffffff0);
685 	code = pcicfgr32(p, 0x14 + c->boot);
686 
687 	dprint("reboot status = %lux\n", code);
688 	if(code != 0xfffffff0)
689 		return -1;
690 	return 0;
691 }
692 
693 typedef struct {
694 	uchar	len[4];
695 	uchar	type[4];
696 	char	version[128];
697 	uchar	globals[4];
698 	uchar	ramsz[4];
699 	uchar	specs[4];
700 	uchar	specssz[4];
701 } Fwhdr;
702 
703 enum {
704 	Tmx	= 0x4d582020,
705 	Tpcie	= 0x70636965,
706 	Teth	= 0x45544820,
707 	Tmcp0	= 0x4d435030,
708 };
709 
710 static char *
711 fwtype(ulong type)
712 {
713 	switch(type){
714 	case Tmx:
715 		return "mx";
716 	case Tpcie:
717 		return "PCIe";
718 	case Teth:
719 		return "eth";
720 	case Tmcp0:
721 		return "mcp0";
722 	}
723 	return "*GOK*";
724 }
725 
726 static int
727 chkfw(Ctlr *c)
728 {
729 	ulong off, type;
730 	Fwhdr *h;
731 
732 	off = gbit32(c->ram+0x3c);
733 	dprint("firmware %lux\n", off);
734 	if((off&3) || off + sizeof *h > c->ramsz){
735 		print("!m10g: bad firmware %lux\n", off);
736 		return -1;
737 	}
738 	h = (Fwhdr*)(c->ram + off);
739 	type = gbit32(h->type);
740 	dprint("\t" "type	%s\n", fwtype(type));
741 	dprint("\t" "vers	%s\n", h->version);
742 	dprint("\t" "ramsz	%lux\n", gbit32(h->ramsz));
743 	if(type != Teth){
744 		print("!m10g: bad card type %s\n", fwtype(type));
745 		return -1;
746 	}
747 
748 	return bootfw(c) || rdmacmd(c, 0);
749 }
750 
751 static int
752 reset(Ether *e, Ctlr *c)
753 {
754 	ulong i, sz;
755 
756 	if(waserror()){
757 		print("m10g: reset error\n");
758 		nexterror();
759 		return -1;
760 	}
761 
762 	chkfw(c);
763 	cmd(c, Creset, 0);
764 
765 	cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry);
766 	cmd(c, CSintrqdma, c->done.busaddr);
767 	c->irqack =   (ulong*)(c->ram + cmd(c, CGirqackoff, 0));
768 	/* required only if we're not doing msi? */
769 	c->irqdeass = (ulong*)(c->ram + cmd(c, CGirqdeassoff, 0));
770 	/* this is the driver default, why fiddle with this? */
771 	c->coal = (ulong*)(c->ram + cmd(c, CGcoaloff, 0));
772 	*c->coal = pbit32(25);
773 
774 	dprint("dma stats:\n");
775 	rdmacmd(c, 1);
776 	sz = c->tx.segsz;
777 	i = dmatestcmd(c, DMAread, c->done.busaddr, sz);
778 	print("m10g: read %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff));
779 	i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz);
780 	print(" write %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff));
781 	i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz);
782 	print(" r/w %lud MB/s\n", ((i>>16)*sz*2*2) / (i&0xffff));
783 	memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry);
784 
785 	maccmd(c, CSmac, c->ra);
786 //	cmd(c, Cnopromisc, 0);
787 	cmd(c, Cenablefc, 0);
788 	e->maxmtu = Maxmtu;
789 	cmd(c, CSmtu, e->maxmtu);
790 	dprint("CSmtu %d...\n", e->maxmtu);
791 
792 	poperror();
793 	return 0;
794 }
795 
796 static void
797 ctlrfree(Ctlr *c)
798 {
799 	/* free up all the Block*s, too */
800 	free(c->tx.host);
801 	free(c->sm.host);
802 	free(c->bg.host);
803 	free(c->cmd);
804 	free(c->done.entry);
805 	free(c->stats);
806 	free(c);
807 }
808 
809 static int
810 setmem(Pcidev *p, Ctlr *c)
811 {
812 	ulong i;
813 	uvlong raddr;
814 	Done *d;
815 	void *mem;
816 
817 	c->tx.segsz = 2048;
818 	c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100;
819 	if(c->ramsz > p->mem[0].size)
820 		return -1;
821 
822 	raddr = p->mem[0].bar & ~0x0F;
823 	mem = vmap(raddr, p->mem[0].size);
824 	if(mem == nil){
825 		print("m10g: can't map %8.8lux\n", p->mem[0].bar);
826 		return -1;
827 	}
828 	dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size);
829 	c->port = raddr;
830 	c->ram = mem;
831 	c->cmd = malign(sizeof *c->cmd);
832 	c->cprt = PCIWADDR(c->cmd);
833 
834 	d = &c->done;
835 	d->n = Maxslots;
836 	d->m = d->n - 1;
837 	i = d->n * sizeof *d->entry;
838 	d->entry = malign(i);
839 	memset(d->entry, 0, i);
840 	d->busaddr = PCIWADDR(d->entry);
841 
842 	c->stats = malign(sizeof *c->stats);
843 	memset(c->stats, 0, sizeof *c->stats);
844 	c->statsprt = PCIWADDR(c->stats);
845 
846 	memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2);
847 	return setpcie(p) || parseeprom(c);
848 }
849 
850 static Rx*
851 whichrx(Ctlr *c, int sz)
852 {
853 	if(sz <= smpool.size)
854 		return &c->sm;
855 	return &c->bg;
856 }
857 
858 static Block*
859 balloc(Rx* rx)
860 {
861 	Block *bp;
862 
863 	ilock(rx->pool);
864 	if((bp = rx->pool->head) != nil){
865 		rx->pool->head = bp->next;
866 		bp->next = nil;
867 		_xinc(&bp->ref);	/* prevent bp from being freed */
868 		rx->pool->n--;
869 	}
870 	iunlock(rx->pool);
871 	return bp;
872 }
873 
874 static void
875 rbfree(Block *b, Bpool *p)
876 {
877 	b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
878  	b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
879 
880 	ilock(p);
881 	b->next = p->head;
882 	p->head = b;
883 	p->n++;
884 	p->cnt++;
885 	iunlock(p);
886 }
887 
888 static void
889 smbfree(Block *b)
890 {
891 	rbfree(b, &smpool);
892 }
893 
894 static void
895 bgbfree(Block *b)
896 {
897 	rbfree(b, &bgpool);
898 }
899 
900 static void
901 replenish(Rx *rx)
902 {
903 	ulong buf[16], i, idx, e;
904 	Bpool *p;
905 	Block *b;
906 
907 	p = rx->pool;
908 	if(p->n < 8)
909 		return;
910 	memset(buf, 0, sizeof buf);
911 	e = (rx->i - rx->cnt) & ~7;
912 	e += rx->n;
913 	while(p->n >= 8 && e){
914 		idx = rx->cnt & rx->m;
915 		for(i = 0; i < 8; i++){
916 			b = balloc(rx);
917 			buf[i*2]   = pbit32((uvlong)PCIWADDR(b->wp) >> 32);
918 			buf[i*2+1] = pbit32(PCIWADDR(b->wp));
919 			rx->host[idx+i] = b;
920 			assert(b);
921 		}
922 		memmove(rx->lanai + 2*idx, buf, sizeof buf);
923 		coherence();
924 		rx->cnt += 8;
925 		e -= 8;
926 	}
927 	if(e && p->n > 7+1)
928 		print("m10g: should panic? pool->n = %d\n", p->n);
929 }
930 
931 /*
932  * future:
933  * if (c->mtrr >= 0) {
934  *	c->tx.wcfifo = c->ram+0x200000;
935  *	c->sm.wcfifo = c->ram+0x300000;
936  *	c->bg.wcfifo = c->ram+0x340000;
937  * }
938  */
939 
940 static int
941 nextpow(int j)
942 {
943 	int i;
944 
945 	for(i = 0; j > (1 << i); i++)
946 		;
947 	return 1 << i;
948 }
949 
950 static void*
951 emalign(int sz)
952 {
953 	void *v;
954 
955 	v = malign(sz);
956 	if(v == nil)
957 		error(Enomem);
958 	memset(v, 0, sz);
959 	return v;
960 }
961 
962 static void
963 open0(Ether *e, Ctlr *c)
964 {
965 	Block *b;
966 	int i, sz, entries;
967 
968 	entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai;
969 	c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0));
970 	c->tx.host  = emalign(entries * sizeof *c->tx.host);
971 	c->tx.bring = emalign(entries * sizeof *c->tx.bring);
972 	c->tx.n = entries;
973 	c->tx.m = entries-1;
974 
975 	entries = cmd(c, CGrxrgsz, 0)/8;
976 	c->sm.pool = &smpool;
977 	cmd(c, CSsmallsz, c->sm.pool->size);
978 	c->sm.lanai = (ulong*)(c->ram + cmd(c, CGsmallrxoff, 0));
979 	c->sm.n = entries;
980 	c->sm.m = entries-1;
981 	c->sm.host = emalign(entries * sizeof *c->sm.host);
982 
983 	c->bg.pool = &bgpool;
984 	c->bg.pool->size = nextpow(2 + e->maxmtu);  /* 2-byte alignment pad */
985 	cmd(c, CSbigsz, c->bg.pool->size);
986 	c->bg.lanai = (ulong*)(c->ram + cmd(c, CGbigrxoff, 0));
987 	c->bg.n = entries;
988 	c->bg.m = entries-1;
989 	c->bg.host = emalign(entries * sizeof *c->bg.host);
990 
991 	sz = c->sm.pool->size + BY2PG;
992 	for(i = 0; i < c->sm.n; i++){
993 		if((b = allocb(sz)) == 0)
994 			break;
995 		b->free = smbfree;
996 		freeb(b);
997 	}
998 	sz = c->bg.pool->size + BY2PG;
999 	for(i = 0; i < c->bg.n; i++){
1000 		if((b = allocb(sz)) == 0)
1001 			break;
1002 		b->free = bgbfree;
1003 		freeb(b);
1004 	}
1005 
1006 	cmd(c, CSstatsdma, c->statsprt);
1007 	c->linkstat = ~0;
1008 	c->nrdma = 15;
1009 
1010 	cmd(c, Cetherup, 0);
1011 }
1012 
1013 static Block*
1014 nextblock(Ctlr *c)
1015 {
1016 	uint i;
1017 	ushort l, k;
1018 	Block *b;
1019 	Done *d;
1020 	Rx *rx;
1021 	Slot *s;
1022 	Slotparts *sp;
1023 
1024 	d = &c->done;
1025 	s = d->entry;
1026 	i = d->i & d->m;
1027 	sp = (Slotparts *)(s + i);
1028 	l = sp->len;
1029 	if(l == 0)
1030 		return 0;
1031 	k = sp->cksum;
1032 	s[i] = 0;
1033 	d->i++;
1034 	l = gbit16((uchar*)&l);
1035 //dprint("nextb: i=%d l=%d\n", d->i, l);
1036 	rx = whichrx(c, l);
1037 	if(rx->i >= rx->cnt){
1038 		iprint("m10g: overrun\n");
1039 		return 0;
1040 	}
1041 	i = rx->i & rx->m;
1042 	b = rx->host[i];
1043 	rx->host[i] = 0;
1044 	if(b == 0){
1045 		iprint("m10g: error rx to no block.  memory is hosed.\n");
1046 		return 0;
1047 	}
1048 	rx->i++;
1049 
1050 	b->flag |= Bipck|Btcpck|Budpck;
1051 	b->checksum = k;
1052 	b->rp += 2;
1053 	b->wp += 2+l;
1054 	b->lim = b->wp;			/* lie like a dog. */
1055 	return b;
1056 }
1057 
1058 static int
1059 rxcansleep(void *v)
1060 {
1061 	Ctlr *c;
1062 	Slot *s;
1063 	Slotparts *sp;
1064 	Done *d;
1065 
1066 	c = v;
1067 	d = &c->done;
1068 	s = c->done.entry;
1069 	sp = (Slotparts *)(s + (d->i & d->m));
1070 	if(sp->len != 0)
1071 		return -1;
1072 	c->irqack[0] = pbit32(3);
1073 	return 0;
1074 }
1075 
1076 static void
1077 m10rx(void *v)
1078 {
1079 	Ether *e;
1080 	Ctlr *c;
1081 	Block *b;
1082 
1083 	e = v;
1084 	c = e->ctlr;
1085 	for(;;){
1086 		replenish(&c->sm);
1087 		replenish(&c->bg);
1088 		sleep(&c->rxrendez, rxcansleep, c);
1089 		while(b = nextblock(c))
1090 			etheriq(e, b, 1);
1091 	}
1092 }
1093 
1094 static void
1095 txcleanup(Tx *tx, ulong n)
1096 {
1097 	Block *b;
1098 	uint j, l, m;
1099 
1100 	if(tx->npkt == n)
1101 		return;
1102 	l = 0;
1103 	m = tx->m;
1104 	/*
1105 	 * if tx->cnt == tx->i, yet tx->npkt == n-1, we just
1106 	 * caught ourselves and myricom card updating.
1107 	 */
1108 	for(;; tx->cnt++){
1109 		j = tx->cnt & tx->m;
1110 		if(b = tx->bring[j]){
1111 			tx->bring[j] = 0;
1112 			tx->nbytes += BLEN(b);
1113 			freeb(b);
1114 			if(++tx->npkt == n)
1115 				return;
1116 		}
1117 		if(tx->cnt == tx->i)
1118 			return;
1119 		if(l++ == m){
1120 			iprint("m10g: tx ovrun: %lud %lud\n", n, tx->npkt);
1121 			return;
1122 		}
1123 	}
1124 }
1125 
1126 static int
1127 txcansleep(void *v)
1128 {
1129 	Ctlr *c;
1130 
1131 	c = v;
1132 	if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt))
1133 		return -1;
1134 	return 0;
1135 }
1136 
1137 static void
1138 txproc(void *v)
1139 {
1140 	Ether *e;
1141 	Ctlr *c;
1142 	Tx *tx;
1143 
1144 	e = v;
1145 	c = e->ctlr;
1146 	tx = &c->tx;
1147 	for(;;){
1148  		sleep(&c->txrendez, txcansleep, c);
1149 		txcleanup(tx, gbit32(c->stats->txcnt));
1150 	}
1151 }
1152 
1153 static void
1154 submittx(Tx *tx, int n)
1155 {
1156 	Send *l, *h;
1157 	int i0, i, m;
1158 
1159 	m = tx->m;
1160 	i0 = tx->i & m;
1161 	l = tx->lanai;
1162 	h = tx->host;
1163 	for(i = n-1; i >= 0; i--)
1164 		memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h);
1165 	tx->i += n;
1166 //	coherence();
1167 }
1168 
1169 static int
1170 nsegments(Block *b, int segsz)
1171 {
1172 	uintptr bus, end, slen, len;
1173 	int i;
1174 
1175 	bus = PCIWADDR(b->rp);
1176 	i = 0;
1177 	for(len = BLEN(b); len; len -= slen){
1178 		end = bus + segsz & ~(segsz-1);
1179 		slen = end - bus;
1180 		if(slen > len)
1181 			slen = len;
1182 		bus += slen;
1183 		i++;
1184 	}
1185 	return i;
1186 }
1187 
1188 static void
1189 m10gtransmit(Ether *e)
1190 {
1191 	ushort slen;
1192 	ulong i, cnt, rdma, nseg, count, end, bus, len, segsz;
1193 	uchar flags;
1194 	Block *b;
1195 	Ctlr *c;
1196 	Send *s, *s0, *s0m8;
1197 	Tx *tx;
1198 
1199 	c = e->ctlr;
1200 	tx = &c->tx;
1201 	segsz = tx->segsz;
1202 
1203 	qlock(tx);
1204 	count = 0;
1205 	s = tx->host + (tx->i & tx->m);
1206 	cnt = tx->cnt;
1207 	s0 =   tx->host + (cnt & tx->m);
1208 	s0m8 = tx->host + ((cnt - 8) & tx->m);
1209 	i = tx->i;
1210 	for(; s >= s0 || s < s0m8; i += nseg){
1211 		if((b = qget(e->oq)) == nil)
1212 			break;
1213 		flags = SFfirst|SFnotso;
1214 		if((len = BLEN(b)) < 1520)
1215 			flags |= SFsmall;
1216 		rdma = nseg = nsegments(b, segsz);
1217 		bus = PCIWADDR(b->rp);
1218 		for(; len; len -= slen){
1219 			end = (bus + segsz) & ~(segsz-1);
1220 			slen = end - bus;
1221 			if(slen > len)
1222 				slen = len;
1223 			s->low = pbit32(bus);
1224 			s->len = pbit16(slen);
1225 			s->nrdma = rdma;
1226 			s->flags = flags;
1227 
1228 			bus += slen;
1229 			if(++s ==  tx->host + tx->n)
1230 				s = tx->host;
1231 			count++;
1232 			flags &= ~SFfirst;
1233 			rdma = 1;
1234 		}
1235 		tx->bring[(i + nseg - 1) & tx->m] = b;
1236 		if(1 || count > 0){
1237 			submittx(tx, count);
1238 			count = 0;
1239 			cnt = tx->cnt;
1240 			s0 =   tx->host + (cnt & tx->m);
1241 			s0m8 = tx->host + ((cnt - 8) & tx->m);
1242 		}
1243 	}
1244 	qunlock(tx);
1245 }
1246 
1247 static void
1248 checkstats(Ether *e, Ctlr *c, Stats *s)
1249 {
1250 	ulong i;
1251 
1252 	if(s->updated == 0)
1253 		return;
1254 
1255 	i = gbit32(s->linkstat);
1256 	if(c->linkstat != i){
1257 		e->link = i;
1258 		if(c->linkstat = i)
1259 			dprint("m10g: link up\n");
1260 		else
1261 			dprint("m10g: link down\n");
1262 	}
1263 	i = gbit32(s->nrdma);
1264 	if(i != c->nrdma){
1265 		dprint("m10g: rdma timeout %ld\n", i);
1266 		c->nrdma = i;
1267 	}
1268 }
1269 
1270 static void
1271 waitintx(Ctlr *c)
1272 {
1273 	int i;
1274 
1275 	for(i = 0; i < 1024*1024; i++){
1276 		if(c->stats->valid == 0)
1277 			break;
1278 		coherence();
1279 	}
1280 }
1281 
1282 static void
1283 m10ginterrupt(Ureg *, void *v)
1284 {
1285 	Ether *e;
1286 	Ctlr *c;
1287 
1288 	e = v;
1289 	c = e->ctlr;
1290 
1291 	if(c->state != Runed || c->stats->valid == 0)	/* not ready for us? */
1292 		return;
1293 
1294 	if(c->stats->valid & 1)
1295 		wakeup(&c->rxrendez);
1296 	if(gbit32(c->stats->txcnt) != c->tx.npkt)
1297 		wakeup(&c->txrendez);
1298 	if(c->msi == 0)
1299 		*c->irqdeass = 0;
1300 	else
1301 		c->stats->valid = 0;
1302 	waitintx(c);
1303 	checkstats(e, c, c->stats);
1304 	c->irqack[1] = pbit32(3);
1305 }
1306 
1307 static void
1308 m10gattach(Ether *e)
1309 {
1310 	Ctlr *c;
1311 	char name[12];
1312 
1313 	dprint("m10gattach\n");
1314 
1315 	qlock(e->ctlr);
1316 	c = e->ctlr;
1317 	if(c->state != Detached){
1318 		qunlock(c);
1319 		return;
1320 	}
1321 	if(waserror()){
1322 		c->state = Detached;
1323 		qunlock(c);
1324 		nexterror();
1325 	}
1326 	reset(e, c);
1327 	c->state = Attached;
1328 	open0(e, c);
1329 	if(c->kprocs == 0){
1330 		c->kprocs++;
1331 		snprint(name, sizeof name, "#l%drxproc", e->ctlrno);
1332 		kproc(name, m10rx, e);
1333 		snprint(name, sizeof name, "#l%dtxproc", e->ctlrno);
1334 		kproc(name, txproc, e);
1335 	}
1336 	c->state = Runed;
1337 	qunlock(c);
1338 	poperror();
1339 }
1340 
1341 static int
1342 m10gdetach(Ctlr *c)
1343 {
1344 	dprint("m10gdetach\n");
1345 //	reset(e->ctlr);
1346 	vunmap(c->ram, c->pcidev->mem[0].size);
1347 	ctlrfree(c);
1348 	return -1;
1349 }
1350 
1351 static int
1352 lstcount(Block *b)
1353 {
1354 	int i;
1355 
1356 	i = 0;
1357 	for(; b; b = b->next)
1358 		i++;
1359 	return i;
1360 }
1361 
1362 static long
1363 m10gifstat(Ether *e, void *v, long n, ulong off)
1364 {
1365 	int l;
1366 	char *p;
1367 	Ctlr *c;
1368 	Stats s;
1369 
1370 	c = e->ctlr;
1371 	p = malloc(READSTR+1);
1372 	l = 0;
1373 	/* no point in locking this because this is done via dma. */
1374 	memmove(&s, c->stats, sizeof s);
1375 
1376 	// l +=
1377 	snprint(p+l, READSTR,
1378 		"txcnt = %lud\n"  "linkstat = %lud\n" 	"dlink = %lud\n"
1379 		"derror = %lud\n" "drunt = %lud\n" 	"doverrun = %lud\n"
1380 		"dnosm = %lud\n"  "dnobg = %lud\n"	"nrdma = %lud\n"
1381 		"txstopped = %ud\n" "down = %ud\n" 	"updated = %ud\n"
1382 		"valid = %ud\n\n"
1383 		"tx pkt = %lud\n"  "tx bytes = %lld\n"
1384 		"tx cnt = %ud\n"  "tx n = %ud\n"	"tx i = %ud\n"
1385 		"sm cnt = %ud\n"  "sm i = %ud\n"	"sm n = %ud\n"
1386 		"sm lst = %ud\n"
1387 		"bg cnt = %ud\n"  "bg i = %ud\n"	"bg n = %ud\n"
1388 		"bg lst = %ud\n"
1389 		"segsz = %lud\n"   "coal = %lud\n",
1390 		gbit32(s.txcnt),  gbit32(s.linkstat),	gbit32(s.dlink),
1391 		gbit32(s.derror), gbit32(s.drunt),	gbit32(s.doverrun),
1392 		gbit32(s.dnosm),  gbit32(s.dnobg),	gbit32(s.nrdma),
1393 		s.txstopped,  s.down, s.updated, s.valid,
1394 		c->tx.npkt, c->tx.nbytes,
1395 		c->tx.cnt, c->tx.n, c->tx.i,
1396 		c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head),
1397 		c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head),
1398 		c->tx.segsz, gbit32((uchar*)c->coal));
1399 
1400 	n = readstr(off, v, n, p);
1401 	free(p);
1402 	return n;
1403 }
1404 
1405 //static void
1406 //summary(Ether *e)
1407 //{
1408 //	char *buf;
1409 //	int n, i, j;
1410 //
1411 //	if(e == 0)
1412 //		return;
1413 //	buf = malloc(n=250);
1414 //	if(buf == 0)
1415 //		return;
1416 //
1417 //	snprint(buf, n, "oq\n");
1418 //	qsummary(e->oq, buf+3, n-3-1);
1419 //	iprint("%s", buf);
1420 //
1421 //	if(e->f) for(i = 0; e->f[i]; i++){
1422 //		j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type);
1423 //		qsummary(e->f[i]->in, buf+j, n-j-1);
1424 //		print("%s", buf);
1425 //	}
1426 //
1427 //	free(buf);
1428 //}
1429 
1430 static void
1431 rxring(Ctlr *c)
1432 {
1433 	Done *d;
1434 	Slot *s;
1435 	Slotparts *sp;
1436 	int i;
1437 
1438 	d = &c->done;
1439 	s = d->entry;
1440 	for(i = 0; i < d->n; i++) {
1441 		sp = (Slotparts *)(s + i);
1442 		if(sp->len)
1443 			iprint("s[%d] = %d\n", i, sp->len);
1444 	}
1445 }
1446 
1447 enum {
1448 	CMdebug,
1449 	CMcoal,
1450 	CMwakeup,
1451 	CMtxwakeup,
1452 	CMqsummary,
1453 	CMrxring,
1454 };
1455 
1456 static Cmdtab ctab[] = {
1457 	CMdebug,	"debug",	2,
1458 	CMcoal,		"coal",		2,
1459 	CMwakeup,	"wakeup",	1,
1460 	CMtxwakeup,	"txwakeup",	1,
1461 //	CMqsummary,	"q",		1,
1462 	CMrxring,	"rxring",	1,
1463 };
1464 
1465 static long
1466 m10gctl(Ether *e, void *v, long n)
1467 {
1468 	int i;
1469 	Cmdbuf *c;
1470 	Cmdtab *t;
1471 
1472 	dprint("m10gctl\n");
1473 	if(e->ctlr == nil)
1474 		error(Enonexist);
1475 
1476 	c = parsecmd(v, n);
1477 	if(waserror()){
1478 		free(c);
1479 		nexterror();
1480 	}
1481 	t = lookupcmd(c, ctab, nelem(ctab));
1482 	switch(t->index){
1483 	case CMdebug:
1484 		debug = (strcmp(c->f[1], "on") == 0);
1485 		break;
1486 	case CMcoal:
1487 		i = atoi(c->f[1]);
1488 		if(i < 0 || i > 1000)
1489 			error(Ebadarg);
1490 		*((Ctlr*)e->ctlr)->coal = pbit32(i);
1491 		break;
1492 	case CMwakeup:
1493 		wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */
1494 		break;
1495 	case CMtxwakeup:
1496 		wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */
1497 		break;
1498 //	case CMqsummary:
1499 //		summary(e);
1500 //		break;
1501 	case CMrxring:
1502 		rxring(e->ctlr);
1503 		break;
1504 	default:
1505 		error(Ebadarg);
1506 	}
1507 	free(c);
1508 	poperror();
1509 	return n;
1510 }
1511 
1512 static void
1513 m10gshutdown(Ether *e)
1514 {
1515 	dprint("m10gshutdown\n");
1516 	m10gdetach(e->ctlr);
1517 }
1518 
1519 static void
1520 m10gpromiscuous(void *v, int on)
1521 {
1522 	Ether *e;
1523 	int i;
1524 
1525 	dprint("m10gpromiscuous\n");
1526 	e = v;
1527 	if(on)
1528 		i = Cpromisc;
1529 	else
1530 		i = Cnopromisc;
1531 	cmd(e->ctlr, i, 0);
1532 }
1533 
1534 static int	mcctab[]  = { CSleavemc, CSjoinmc };
1535 static char	*mcntab[] = { "leave", "join" };
1536 
1537 static void
1538 m10gmulticast(void *v, uchar *ea, int on)
1539 {
1540 	Ether *e;
1541 	int i;
1542 
1543 	dprint("m10gmulticast\n");
1544 	e = v;
1545 	if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0)
1546 		print("m10g: can't %s %E: %d\n", mcntab[on], ea, i);
1547 }
1548 
1549 static void
1550 m10gpci(void)
1551 {
1552 	Pcidev *p;
1553 	Ctlr *t, *c;
1554 
1555 	t = 0;
1556 	for(p = 0; p = pcimatch(p, Vmyricom, 0); ){
1557 		switch(p->did){
1558 		case 0x8:		/* 8a */
1559 			break;
1560 		case 0x9:		/* 8a with msi-x fw */
1561 		case 0xa:		/* 8b */
1562 		case 0xb:		/* 8b2 */
1563 		case 0xc:		/* 2-8b2 */
1564 			/* untested */
1565 			break;
1566 		default:
1567 			print("etherm10g: unknown myricom did %#ux\n", p->did);
1568 			continue;
1569 		}
1570 		c = malloc(sizeof *c);
1571 		if(c == nil)
1572 			continue;
1573 		c->pcidev = p;
1574 		c->id = p->did<<16 | p->vid;
1575 		c->boot = pcicap(p, PciCapVND);
1576 //		kickthebaby(p, c);
1577 		pcisetbme(p);
1578 		if(setmem(p, c) == -1){
1579 			print("m10g: setmem failed\n");
1580 			free(c);
1581 			/* cleanup */
1582 			continue;
1583 		}
1584 		if(t)
1585 			t->next = c;
1586 		else
1587 			ctlrs = c;
1588 		t = c;
1589 	}
1590 }
1591 
1592 static int
1593 m10gpnp(Ether *e)
1594 {
1595 	Ctlr *c;
1596 
1597 	if(ctlrs == nil)
1598 		m10gpci();
1599 
1600 	for(c = ctlrs; c != nil; c = c->next)
1601 		if(c->active)
1602 			continue;
1603 		else if(e->port == 0 || e->port == c->port)
1604 			break;
1605 	if(c == nil)
1606 		return -1;
1607 	c->active = 1;
1608 
1609 	e->ctlr = c;
1610 	e->port = c->port;
1611 	e->irq = c->pcidev->intl;
1612 	e->tbdf = c->pcidev->tbdf;
1613 	e->mbps = 10000;
1614 	memmove(e->ea, c->ra, Eaddrlen);
1615 
1616 	e->attach = m10gattach;
1617 	e->detach = m10gshutdown;
1618 	e->transmit = m10gtransmit;
1619 	e->interrupt = m10ginterrupt;
1620 	e->ifstat = m10gifstat;
1621 	e->ctl = m10gctl;
1622 //	e->power = m10gpower;
1623 	e->shutdown = m10gshutdown;
1624 
1625 	e->arg = e;
1626 	e->promiscuous = m10gpromiscuous;
1627 	e->multicast = m10gmulticast;
1628 
1629 	return 0;
1630 }
1631 
1632 void
1633 etherm10glink(void)
1634 {
1635 	addethercard("m10g", m10gpnp);
1636 }
1637