xref: /plan9-contrib/sys/src/9/pc/ether82598.c (revision b94bb474148e9d24a82a427863d9c9eb4c20f4ae)
1 /*
2  * intel 10GB ethernet pci-express driver
3  * copyright © 2007, coraid, inc.
4  */
5 #include "u.h"
6 #include "../port/lib.h"
7 #include "mem.h"
8 #include "dat.h"
9 #include "fns.h"
10 #include "io.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
13 #include "etherif.h"
14 
15 /*
16  * // comments note conflicts with 82563-style drivers,
17  * and the registers are all different.
18  */
19 
20 enum {
21 	/* general */
22 	Ctrl		= 0x00000/4,	/* Device Control */
23 	Status		= 0x00008/4,	/* Device Status */
24 	Ctrlext		= 0x00018/4,	/* Extended Device Control */
25 	Esdp		= 0x00020/4,	/* extended sdp control */
26 	Esodp		= 0x00028/4,	/* extended od sdp control */
27 	Ledctl		= 0x00200/4,	/* led control */
28 	Tcptimer	= 0x0004c/4,	/* tcp timer */
29 	Ecc		= 0x110b0/4,	/* errata ecc control magic */
30 
31 	/* nvm */
32 	Eec		= 0x10010/4,	/* eeprom/flash control */
33 	Eerd		= 0x10014/4,	/* eeprom read */
34 	Fla		= 0x1001c/4,	/* flash access */
35 	Flop		= 0x1013c/4,	/* flash opcode */
36 	Grc		= 0x10200/4,	/* general rx control */
37 
38 	/* interrupt */
39 	Icr		= 0x00800/4,	/* interrupt cause read */
40 	Ics		= 0x00808/4,	/* " set */
41 	Ims		= 0x00880/4,	/* " mask read/set */
42 	Imc		= 0x00888/4,	/* " mask clear */
43 	Iac		= 0x00810/4,	/* " ayto clear */
44 	Iam		= 0x00890/4,	/* " auto mask enable */
45 	Itr		= 0x00820/4,	/* " throttling rate (0-19) */
46 	Ivar		= 0x00900/4,	/* " vector allocation regs. */
47 	/* msi interrupt */
48 	Msixt		= 0x0000/4,	/* msix table (bar3) */
49 	Msipba		= 0x2000/4,	/* msix pending bit array (bar3) */
50 	Pbacl		= 0x11068/4,	/* pba clear */
51 	Gpie		= 0x00898/4,	/* general purpose int enable */
52 
53 	/* flow control */
54 	Pfctop		= 0x03008/4,	/* priority flow ctl type opcode */
55 	Fcttv		= 0x03200/4,	/* " transmit timer value (0-3) */
56 	Fcrtl		= 0x03220/4,	/* " rx threshold low (0-7) +8n */
57 	Fcrth		= 0x03260/4,	/* " rx threshold high (0-7) +8n */
58 	Rcrtv		= 0x032a0/4,	/* " refresh value threshold */
59 	Tfcs		= 0x0ce00/4,	/* " tx status */
60 
61 	/* rx dma */
62 	Rbal		= 0x01000/4,	/* rx desc base low (0-63) +0x40n */
63 	Rbah		= 0x01004/4,	/* " high */
64 	Rdlen		= 0x01008/4,	/* " length */
65 	Rdh		= 0x01010/4,	/* " head */
66 	Rdt		= 0x01018/4,	/* " tail */
67 	Rxdctl		= 0x01028/4,	/* " control */
68 
69 	Srrctl		= 0x02100/4,	/* split and replication rx ctl. */
70 	Dcarxctl	= 0x02200/4,	/* rx dca control */
71 	Rdrxctl		= 0x02f00/4,	/* rx dma control */
72 	Rxpbsize	= 0x03c00/4,	/* rx packet buffer size */
73 	Rxctl		= 0x03000/4,	/* rx control */
74 	Dropen		= 0x03d04/4,	/* drop enable control */
75 
76 	/* rx */
77 	Rxcsum		= 0x05000/4,	/* rx checksum control */
78 	Rfctl		= 0x04008/4,	/* rx filter control */
79 	Mta		= 0x05200/4,	/* multicast table array (0-127) */
80 	Ral		= 0x05400/4,	/* rx address low */
81 	Rah		= 0x05404/4,
82 	Psrtype		= 0x05480/4,	/* packet split rx type. */
83 	Vfta		= 0x0a000/4,	/* vlan filter table array. */
84 	Fctrl		= 0x05080/4,	/* filter control */
85 	Vlnctrl		= 0x05088/4,	/* vlan control */
86 	Msctctrl	= 0x05090/4,	/* multicast control */
87 	Mrqc		= 0x05818/4,	/* multiple rx queues cmd */
88 	Vmdctl		= 0x0581c/4,	/* vmdq control */
89 	Imir		= 0x05a80/4,	/* immediate irq rx (0-7) */
90 	Imirext		= 0x05aa0/4,	/* immediate irq rx ext */
91 	Imirvp		= 0x05ac0/4,	/* immediate irq vlan priority */
92 	Reta		= 0x05c00/4,	/* redirection table */
93 	Rssrk		= 0x05c80/4,	/* rss random key */
94 
95 	/* tx */
96 	Tdbal		= 0x06000/4,	/* tx desc base low +0x40n */
97 	Tdbah		= 0x06004/4,	/* " high */
98 	Tdlen		= 0x06008/4,	/* " len */
99 	Tdh		= 0x06010/4,	/* " head */
100 	Tdt		= 0x06018/4,	/* " tail */
101 	Txdctl		= 0x06028/4,	/* " control */
102 	Tdwbal		= 0x06038/4,	/* " write-back address low */
103 	Tdwbah		= 0x0603c/4,
104 
105 	Dtxctl		= 0x07e00/4,	/* tx dma control */
106 	Tdcatxctrl	= 0x07200/4,	/* tx dca register (0-15) */
107 	Tipg		= 0x0cb00/4,	/* tx inter-packet gap */
108 	Txpbsize	= 0x0cc00/4,	/* tx packet-buffer size (0-15) */
109 
110 	/* mac */
111 	Hlreg0		= 0x04240/4,	/* highlander control reg 0 */
112 	Hlreg1		= 0x04244/4,	/* highlander control reg 1 (ro) */
113 	Msca		= 0x0425c/4,	/* mdi signal cmd & addr */
114 	Msrwd		= 0x04260/4,	/* mdi single rw data */
115 	Mhadd		= 0x04268/4,	/* mac addr high & max frame */
116 	Pcss1		= 0x04288/4,	/* xgxs status 1 */
117 	Pcss2		= 0x0428c/4,
118 	Xpcss		= 0x04290/4,	/* 10gb-x pcs status */
119 	Serdesc		= 0x04298/4,	/* serdes control */
120 	Macs		= 0x0429c/4,	/* fifo control & report */
121 	Autoc		= 0x042a0/4,	/* autodetect control & status */
122 	Links		= 0x042a4/4,	/* link status */
123 	Autoc2		= 0x042a8/4,
124 };
125 
126 enum {
127 	/* Ctrl */
128 	Rst		= 1<<26,	/* full nic reset */
129 
130 	/* Txdctl */
131 	Ten		= 1<<25,
132 
133 	/* Fctrl */
134 	Bam		= 1<<10,	/* broadcast accept mode */
135 	Upe 		= 1<<9,		/* unicast promiscuous */
136 	Mpe 		= 1<<8,		/* multicast promiscuous */
137 
138 	/* Rxdctl */
139 	Pthresh		= 0,		/* prefresh threshold shift in bits */
140 	Hthresh		= 8,		/* host buffer minimum threshold " */
141 	Wthresh		= 16,		/* writeback threshold */
142 	Renable		= 1<<25,
143 
144 	/* Rxctl */
145 	Rxen		= 1<<0,
146 	Dmbyps		= 1<<1,
147 
148 	/* Rdrxctl */
149 	Rdmt½		= 0,
150 	Rdmt¼		= 1,
151 	Rdmt⅛		= 2,
152 
153 	/* Rxcsum */
154 	Ippcse		= 1<<12,	/* ip payload checksum enable */
155 
156 	/* Eerd */
157 	EEstart		= 1<<0,		/* Start Read */
158 	EEdone		= 1<<1,		/* Read done */
159 
160 	/* interrupts */
161 	Irx0		= 1<<0,		/* driver defined */
162 	Itx0		= 1<<1,		/* driver defined */
163 	Lsc		= 1<<20,	/* link status change */
164 
165 	/* Links */
166 	Lnkup	= 1<<30,
167 	Lnkspd	= 1<<29,
168 
169 	/* Hlreg0 */
170 	Jumboen	= 1<<2,
171 };
172 
173 typedef struct {
174 	uint	reg;
175 	char	*name;
176 } Stat;
177 
178 Stat stattab[] = {
179 	0x4000,	"crc error",
180 	0x4004,	"illegal byte",
181 	0x4008,	"short packet",
182 	0x3fa0,	"missed pkt0",
183 	0x4034,	"mac local flt",
184 	0x4038,	"mac rmt flt",
185 	0x4040,	"rx length err",
186 	0x3f60,	"xon tx",
187 	0xcf60,	"xon rx",
188 	0x3f68,	"xoff tx",
189 	0xcf68,	"xoff rx",
190 	0x405c,	"rx 040",
191 	0x4060,	"rx 07f",
192 	0x4064,	"rx 100",
193 	0x4068,	"rx 200",
194 	0x406c,	"rx 3ff",
195 	0x4070,	"rx big",
196 	0x4074,	"rx ok",
197 	0x4078,	"rx bcast",
198 	0x3fc0,	"rx no buf0",
199 	0x40a4,	"rx runt",
200 	0x40a8,	"rx frag",
201 	0x40ac,	"rx ovrsz",
202 	0x40b0,	"rx jab",
203 	0x40d0,	"rx pkt",
204 
205 	0x40d4,	"tx pkt",
206 	0x40d8,	"tx 040",
207 	0x40dc,	"tx 07f",
208 	0x40e0,	"tx 100",
209 	0x40e4,	"tx 200",
210 	0x40e8,	"tx 3ff",
211 	0x40ec,	"tx big",
212 	0x40f4,	"tx bcast",
213 	0x4120,	"xsum err",
214 };
215 
216 /* status */
217 enum {
218 	Pif	= 1<<7,	/* past exact filter (sic) */
219 	Ipcs	= 1<<6,	/* ip checksum calcuated */
220 	L4cs	= 1<<5,	/* layer 2 */
221 	Tcpcs	= 1<<4,	/* tcp checksum calcuated */
222 	Vp	= 1<<3,	/* 802.1q packet matched vet */
223 	Ixsm	= 1<<2,	/* ignore checksum */
224 	Reop	= 1<<1,	/* end of packet */
225 	Rdd	= 1<<0,	/* descriptor done */
226 };
227 
228 typedef struct {
229 	u32int	addr[2];
230 	ushort	length;
231 	ushort	cksum;
232 	uchar	status;
233 	uchar	errors;
234 	ushort	vlan;
235 } Rd;
236 
237 enum {
238 	/* Td cmd */
239 	Rs	= 1<<3,
240 	Ic	= 1<<2,
241 	Ifcs	= 1<<1,
242 	Teop	= 1<<0,
243 
244 	/* Td status */
245 	Tdd	= 1<<0,
246 };
247 
248 typedef struct {
249 	u32int	addr[2];
250 	ushort	length;
251 	uchar	cso;
252 	uchar	cmd;
253 	uchar	status;
254 	uchar	css;
255 	ushort	vlan;
256 } Td;
257 
258 enum {
259 	Factive		= 1<<0,
260 	Fstarted	= 1<<1,
261 };
262 
263 typedef struct {
264 	Pcidev	*p;
265 	Ether	*edev;
266 	u32int	*reg;
267 	u32int	*reg3;
268 	uchar	flag;
269 	int	nrd;
270 	int	ntd;
271 	int	nrb;
272 	int	rbsz;
273 	Lock	slock;
274 	Lock	alock;
275 	QLock	tlock;
276 	Rendez	lrendez;
277 	Rendez	trendez;
278 	Rendez	rrendez;
279 	uint	im;
280 	uint	lim;
281 	uint	rim;
282 	uint	tim;
283 	Lock	imlock;
284 	char	*alloc;
285 
286 	Rd	*rdba;
287 	Block	**rb;
288 	uint	rdt;
289 	uint	rdfree;
290 
291 	Td	*tdba;
292 	uint	tdh;
293 	uint	tdt;
294 	Block	**tb;
295 
296 	uchar	ra[Eaddrlen];
297 	uchar	mta[128];
298 	ulong	stats[nelem(stattab)];
299 	uint	speeds[3];
300 } Ctlr;
301 
302 /* tweakable paramaters */
303 enum {
304 	Rbsz	= 12*1024,
305 	Nrd	= 256,
306 	Ntd	= 64,
307 	Nrb	= 256,
308 };
309 
310 static	Ctlr	*ctlrtab[4];
311 static	int	nctlr;
312 static	Lock	rblock;
313 static	Block	*rbpool;
314 
315 static void
316 readstats(Ctlr *c)
317 {
318 	int i;
319 
320 	lock(&c->slock);
321 	for(i = 0; i < nelem(c->stats); i++)
322 		c->stats[i] += c->reg[stattab[i].reg >> 2];
323 	unlock(&c->slock);
324 }
325 
326 static int speedtab[] = {
327 	0,
328 	1000,
329 	10000,
330 };
331 
332 static long
333 ifstat(Ether *e, void *a, long n, ulong offset)
334 {
335 	uint i, *t;
336 	char *s, *p, *q;
337 	Ctlr *c;
338 
339 	c = e->ctlr;
340 	p = s = malloc(READSTR);
341 	if(p == nil)
342 		error(Enomem);
343 	q = p + READSTR;
344 
345 	readstats(c);
346 	for(i = 0; i < nelem(stattab); i++)
347 		if(c->stats[i] > 0)
348 			p = seprint(p, q, "%.10s  %uld\n", stattab[i].name,					c->stats[i]);
349 	t = c->speeds;
350 	p = seprint(p, q, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
351 	p = seprint(p, q, "mtu: min:%d max:%d\n", e->minmtu, e->maxmtu);
352 	seprint(p, q, "rdfree %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt],
353 		c->reg[Rdh]);
354 	n = readstr(offset, a, n, s);
355 	free(s);
356 
357 	return n;
358 }
359 
360 static void
361 im(Ctlr *c, int i)
362 {
363 	ilock(&c->imlock);
364 	c->im |= i;
365 	c->reg[Ims] = c->im;
366 	iunlock(&c->imlock);
367 }
368 
369 static int
370 lim(void *v)
371 {
372 	return ((Ctlr*)v)->lim != 0;
373 }
374 
375 static void
376 lproc(void *v)
377 {
378 	int r, i;
379 	Ctlr *c;
380 	Ether *e;
381 
382 	e = v;
383 	c = e->ctlr;
384 	for (;;) {
385 		r = c->reg[Links];
386 		e->link = (r & Lnkup) != 0;
387 		i = 0;
388 		if(e->link)
389 			i = 1 + ((r & Lnkspd) != 0);
390 		c->speeds[i]++;
391 		e->mbps = speedtab[i];
392 		c->lim = 0;
393 		im(c, Lsc);
394 		sleep(&c->lrendez, lim, c);
395 		c->lim = 0;
396 	}
397 }
398 
399 static long
400 ctl(Ether *, void *, long)
401 {
402 	error(Ebadarg);
403 	return -1;
404 }
405 
406 static Block*
407 rballoc(void)
408 {
409 	Block *bp;
410 
411 	ilock(&rblock);
412 	if((bp = rbpool) != nil){
413 		rbpool = bp->next;
414 		bp->next = 0;
415 		_xinc(&bp->ref);	/* prevent bp from being freed */
416 	}
417 	iunlock(&rblock);
418 	return bp;
419 }
420 
421 void
422 rbfree(Block *b)
423 {
424 	b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
425  	b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
426 	ilock(&rblock);
427 	b->next = rbpool;
428 	rbpool = b;
429 	iunlock(&rblock);
430 }
431 
432 #define Next(x, m)	(((x)+1) & (m))
433 
434 static int
435 cleanup(Ctlr *c, int tdh)
436 {
437 	Block *b;
438 	uint m, n;
439 
440 	m = c->ntd - 1;
441 	while(c->tdba[n = Next(tdh, m)].status & Tdd){
442 		tdh = n;
443 		b = c->tb[tdh];
444 		c->tb[tdh] = 0;
445 		freeb(b);
446 		c->tdba[tdh].status = 0;
447 	}
448 	return tdh;
449 }
450 
451 void
452 transmit(Ether *e)
453 {
454 	uint i, m, tdt, tdh;
455 	Ctlr *c;
456 	Block *b;
457 	Td *t;
458 
459 	c = e->ctlr;
460 	if(!canqlock(&c->tlock)){
461 		im(c, Itx0);
462 		return;
463 	}
464 	tdh = c->tdh = cleanup(c, c->tdh);
465 	tdt = c->tdt;
466 	m = c->ntd - 1;
467 	for(i = 0; i < 8; i++){
468 		if(Next(tdt, m) == tdh){
469 			im(c, Itx0);
470 			break;
471 		}
472 		if(!(b = qget(e->oq)))
473 			break;
474 		t = c->tdba + tdt;
475 		t->addr[0] = PCIWADDR(b->rp);
476 		t->length = BLEN(b);
477 		t->cmd = Rs | Ifcs | Teop;
478 		c->tb[tdt] = b;
479 		tdt = Next(tdt, m);
480 	}
481 	if(i){
482 		c->tdt = tdt;
483 		c->reg[Tdt] = tdt;
484 	}
485 	qunlock(&c->tlock);
486 }
487 
488 static int
489 tim(void *c)
490 {
491 	return ((Ctlr*)c)->tim != 0;
492 }
493 
494 static void
495 tproc(void *v)
496 {
497 	Ctlr *c;
498 	Ether *e;
499 
500 	e = v;
501 	c = e->ctlr;
502 	for (;;) {
503 		sleep(&c->trendez, tim, c);	/* transmit kicks us */
504 		c->tim = 0;
505 		transmit(e);
506 	}
507 }
508 
509 static void
510 rxinit(Ctlr *c)
511 {
512 	int i;
513 	Block *b;
514 
515 	c->reg[Rxctl] &= ~Rxen;
516 	for(i = 0; i < c->nrd; i++){
517 		b = c->rb[i];
518 		c->rb[i] = 0;
519 		if(b)
520 			freeb(b);
521 	}
522 	c->rdfree = 0;
523 
524 	c->reg[Fctrl] |= Bam;
525 	c->reg[Rxcsum] |= Ipcs;
526 	c->reg[Srrctl] = (c->rbsz + 1023)/1024;
527 	c->reg[Mhadd] = c->rbsz << 16;
528 	c->reg[Hlreg0] |= Jumboen;
529 
530 	c->reg[Rbal] = PCIWADDR(c->rdba);
531 	c->reg[Rbah] = 0;
532 	c->reg[Rdlen] = c->nrd*sizeof(Rd);
533 	c->reg[Rdh] = 0;
534 	c->reg[Rdt] = c->rdt = 0;
535 
536 	c->reg[Rdrxctl] = Rdmt¼;
537 	c->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
538 	c->reg[Rxctl] |= Rxen | Dmbyps;
539 }
540 
541 static void
542 replenish(Ctlr *c, uint rdh)
543 {
544 	int rdt, m, i;
545 	Block *b;
546 	Rd *r;
547 
548 	m = c->nrd - 1;
549 	i = 0;
550 	for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){
551 		r = c->rdba + rdt;
552 		if(!(b = rballoc())){
553 			print("82598: no buffers\n");
554 			break;
555 		}
556 		c->rb[rdt] = b;
557 		r->addr[0] = PCIWADDR(b->rp);
558 		r->status = 0;
559 		c->rdfree++;
560 		i++;
561 	}
562 	if(i)
563 		c->reg[Rdt] = c->rdt = rdt;
564 }
565 
566 static int
567 rim(void *v)
568 {
569 	return ((Ctlr*)v)->rim != 0;
570 }
571 
572 static uchar zeroea[Eaddrlen];
573 
574 void
575 rproc(void *v)
576 {
577 	uint m, rdh;
578 	Block *b;
579 	Ctlr *c;
580 	Ether *e;
581 	Rd *r;
582 
583 	e = v;
584 	c = e->ctlr;
585 	m = c->nrd - 1;
586 	for (rdh = 0; ; rdh = Next(rdh, m)) {
587 		replenish(c, rdh);
588 		im(c, Irx0);
589 		sleep(&c->rrendez, rim, c);
590 		do {
591 			c->rim = 0;
592 			if(c->nrd - c->rdfree >= 16)
593 				replenish(c, rdh);
594 			r = c->rdba + rdh;
595 		} while(!(r->status & Rdd));
596 		b = c->rb[rdh];
597 		c->rb[rdh] = 0;
598 		b->wp += r->length;
599 		b->lim = b->wp;		/* lie like a dog */
600 		if(!(r->status & Ixsm)){
601 			if(r->status & Ipcs)
602 				b->flag |= Bipck;
603 			if(r->status & Tcpcs)
604 				b->flag |= Btcpck | Budpck;
605 			b->checksum = r->cksum;
606 		}
607 //		r->status = 0;
608 		etheriq(e, b, 1);
609 		c->rdfree--;
610 	}
611 }
612 
613 static void
614 promiscuous(void *a, int on)
615 {
616 	Ctlr *c;
617 	Ether *e;
618 
619 	e = a;
620 	c = e->ctlr;
621 	if(on)
622 		c->reg[Fctrl] |= Upe | Mpe;
623 	else
624 		c->reg[Fctrl] &= ~(Upe | Mpe);
625 }
626 
627 static void
628 multicast(void *a, uchar *ea, int on)
629 {
630 	int b, i;
631 	Ctlr *c;
632 	Ether *e;
633 
634 	e = a;
635 	c = e->ctlr;
636 
637 	/*
638 	 * multiple ether addresses can hash to the same filter bit,
639 	 * so it's never safe to clear a filter bit.
640 	 * if we want to clear filter bits, we need to keep track of
641 	 * all the multicast addresses in use, clear all the filter bits,
642 	 * then set the ones corresponding to in-use addresses.
643 	 */
644 	i = ea[5] >> 1;
645 	b = (ea[5]&1)<<4 | ea[4]>>4;
646 	b = 1 << b;
647 	if(on)
648 		c->mta[i] |= b;
649 //	else
650 //		c->mta[i] &= ~b;
651 	c->reg[Mta+i] = c->mta[i];
652 }
653 
654 static int
655 detach(Ctlr *c)
656 {
657 	int i;
658 
659 	c->reg[Imc] = ~0;
660 	c->reg[Ctrl] |= Rst;
661 	for(i = 0; i < 100; i++){
662 		delay(1);
663 		if((c->reg[Ctrl] & Rst) == 0)
664 			break;
665 	}
666 	if (i >= 100)
667 		return -1;
668 	/* errata */
669 	delay(50);
670 	c->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
671 
672 	/* not cleared by reset; kill it manually. */
673 	for(i = 1; i < 16; i++)
674 		c->reg[Rah] &= ~(1 << 31);
675 	for(i = 0; i < 128; i++)
676 		c->reg[Mta + i] = 0;
677 	for(i = 1; i < 640; i++)
678 		c->reg[Vfta + i] = 0;
679 	return 0;
680 }
681 
682 static void
683 shutdown(Ether *e)
684 {
685 	detach(e->ctlr);
686 }
687 
688 /* ≤ 20ms */
689 static ushort
690 eeread(Ctlr *c, int i)
691 {
692 	c->reg[Eerd] = EEstart | i<<2;
693 	while((c->reg[Eerd] & EEdone) == 0)
694 		;
695 	return c->reg[Eerd] >> 16;
696 }
697 
698 static int
699 eeload(Ctlr *c)
700 {
701 	ushort u, v, p, l, i, j;
702 
703 	if((eeread(c, 0) & 0xc0) != 0x40)
704 		return -1;
705 	u = 0;
706 	for(i = 0; i < 0x40; i++)
707 		u +=  eeread(c, i);
708 	for(i = 3; i < 0xf; i++){
709 		p = eeread(c, i);
710 		l = eeread(c, p++);
711 		if((int)p + l + 1 > 0xffff)
712 			continue;
713 		for(j = p; j < p + l; j++)
714 			u += eeread(c, j);
715 	}
716 	if(u != 0xbaba)
717 		return -1;
718 	if(c->reg[Status] & (1<<3))
719 		u = eeread(c, 10);
720 	else
721 		u = eeread(c, 9);
722 	u++;
723 	for(i = 0; i < Eaddrlen;){
724 		v = eeread(c, u + i/2);
725 		c->ra[i++] = v;
726 		c->ra[i++] = v>>8;
727 	}
728 	c->ra[5] += (c->reg[Status] & 0xc) >> 2;
729 	return 0;
730 }
731 
732 static int
733 reset(Ctlr *c)
734 {
735 	int i;
736 	uchar *p;
737 
738 	if(detach(c)){
739 		print("82598: reset timeout\n");
740 		return -1;
741 	}
742 	if(eeload(c)){
743 		print("82598: eeprom failure\n");
744 		return -1;
745 	}
746 	p = c->ra;
747 	c->reg[Ral] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
748 	c->reg[Rah] = p[5]<<8 | p[4] | 1<<31;
749 
750 	readstats(c);
751 	for(i = 0; i<nelem(c->stats); i++)
752 		c->stats[i] = 0;
753 
754 	c->reg[Ctrlext] |= 1 << 16;
755 	/* make some guesses for flow control */
756 	c->reg[Fcrtl] = 0x10000 | 1<<31;
757 	c->reg[Fcrth] = 0x40000 | 1<<31;
758 	c->reg[Rcrtv] = 0x6000;
759 
760 	/* configure interrupt mapping (don't ask) */
761 	c->reg[Ivar+0] =     0 | 1<<7;
762 	c->reg[Ivar+64/4] =  1 | 1<<7;
763 //	c->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
764 
765 	/* interrupt throttling goes here. */
766 	for(i = Itr; i < Itr + 20; i++)
767 		c->reg[i] = 128;		/* ¼µs intervals */
768 	c->reg[Itr + Itx0] = 256;
769 	return 0;
770 }
771 
772 static void
773 txinit(Ctlr *c)
774 {
775 	Block *b;
776 	int i;
777 
778 	c->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
779 	for(i = 0; i < c->ntd; i++){
780 		b = c->tb[i];
781 		c->tb[i] = 0;
782 		if(b)
783 			freeb(b);
784 	}
785 	memset(c->tdba, 0, c->ntd * sizeof(Td));
786 	c->reg[Tdbal] = PCIWADDR(c->tdba);
787 	c->reg[Tdbah] = 0;
788 	c->reg[Tdlen] = c->ntd*sizeof(Td);
789 	c->reg[Tdh] = 0;
790 	c->reg[Tdt] = 0;
791 	c->tdh = c->ntd - 1;
792 	c->tdt = 0;
793 	c->reg[Txdctl] |= Ten;
794 }
795 
796 static void
797 attach(Ether *e)
798 {
799 	Block *b;
800 	Ctlr *c;
801 	int t;
802 	char buf[KNAMELEN];
803 
804 	c = e->ctlr;
805 	c->edev = e;			/* point back to Ether* */
806 	lock(&c->alock);
807 	if(c->alloc){
808 		unlock(&c->alock);
809 		return;
810 	}
811 
812 	c->nrd = Nrd;
813 	c->ntd = Ntd;
814 	t  = c->nrd * sizeof *c->rdba + 255;
815 	t += c->ntd * sizeof *c->tdba + 255;
816 	t += (c->ntd + c->nrd) * sizeof(Block*);
817 	c->alloc = malloc(t);
818 	unlock(&c->alock);
819 	if(c->alloc == nil)
820 		error(Enomem);
821 
822 	c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256);
823 	c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba + c->nrd), 256);
824 	c->rb = (Block**)(c->tdba + c->ntd);
825 	c->tb = (Block**)(c->rb + c->nrd);
826 
827 	if(waserror()){
828 		while(b = rballoc()){
829 			b->free = 0;
830 			freeb(b);
831 		}
832 		free(c->alloc);
833 		c->alloc = nil;
834 		nexterror();
835 	}
836 	for(c->nrb = 0; c->nrb < 2*Nrb; c->nrb++){
837 		if(!(b = allocb(c->rbsz+BY2PG)))
838 			error(Enomem);
839 		b->free = rbfree;
840 		freeb(b);
841 	}
842 	poperror();
843 
844 	rxinit(c);
845 	txinit(c);
846 
847 	snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
848 	kproc(buf, lproc, e);
849 	snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
850 	kproc(buf, rproc, e);
851 	snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
852 	kproc(buf, tproc, e);
853 }
854 
855 static void
856 interrupt(Ureg*, void *v)
857 {
858 	int icr, im;
859 	Ctlr *c;
860 	Ether *e;
861 
862 	e = v;
863 	c = e->ctlr;
864 	ilock(&c->imlock);
865 	c->reg[Imc] = ~0;
866 	im = c->im;
867 	while((icr = c->reg[Icr] & c->im) != 0){
868 		if(icr & Lsc){
869 			im &= ~Lsc;
870 			c->lim = icr & Lsc;
871 			wakeup(&c->lrendez);
872 		}
873 		if(icr & Irx0){
874 			im &= ~Irx0;
875 			c->rim = icr & Irx0;
876 			wakeup(&c->rrendez);
877 		}
878 		if(icr & Itx0){
879 			im &= ~Itx0;
880 			c->tim = icr & Itx0;
881 			wakeup(&c->trendez);
882 		}
883 	}
884 	c->reg[Ims] = c->im = im;
885 	iunlock(&c->imlock);
886 }
887 
888 static void
889 scan(void)
890 {
891 	ulong io, io3;
892 	void *mem, *mem3;
893 	Ctlr *c;
894 	Pcidev *p;
895 
896 	p = 0;
897 	while(p = pcimatch(p, 0x8086, 0)){
898 		switch(p->did){
899 		case 0x10c6:		/* 82598 af dual port */
900 		case 0x10c7:		/* 82598 af single port */
901 		case 0x10b6:		/* 82598 backplane */
902 		case 0x10dd:		/* 82598 at cx4 */
903 		case 0x10ec:		/* 82598 at cx4 dual port */
904 			break;
905 		default:
906 			continue;
907 		}
908 		if(nctlr == nelem(ctlrtab)){
909 			print("i82598: too many controllers\n");
910 			return;
911 		}
912 		io = p->mem[0].bar & ~0xf;
913 		mem = vmap(io, p->mem[0].size);
914 		if(mem == nil){
915 			print("i82598: can't map %#p\n", p->mem[0].bar);
916 			continue;
917 		}
918 		io3 = p->mem[3].bar & ~0xf;
919 		mem3 = vmap(io3, p->mem[3].size);
920 		if(mem3 == nil){
921 			print("i82598: can't map %#p\n", p->mem[3].bar);
922 			vunmap(mem, p->mem[0].size);
923 			continue;
924 		}
925 		c = malloc(sizeof *c);
926 		if(c == nil) {
927 			vunmap(mem, p->mem[0].size);
928 			vunmap(mem3, p->mem[3].size);
929 			error(Enomem);
930 		}
931 		c->p = p;
932 		c->reg = (u32int*)mem;
933 		c->reg3 = (u32int*)mem3;
934 		c->rbsz = Rbsz;
935 		if(reset(c)){
936 			print("i82598: can't reset\n");
937 			free(c);
938 			vunmap(mem, p->mem[0].size);
939 			vunmap(mem3, p->mem[3].size);
940 			continue;
941 		}
942 		pcisetbme(p);
943 		ctlrtab[nctlr++] = c;
944 	}
945 }
946 
947 static int
948 pnp(Ether *e)
949 {
950 	int i;
951 	Ctlr *c = nil;
952 
953 	if(nctlr == 0)
954 		scan();
955 	for(i = 0; i < nctlr; i++){
956 		c = ctlrtab[i];
957 		if(c == nil || c->flag & Factive)
958 			continue;
959 		if(e->port == 0 || e->port == (ulong)c->reg)
960 			break;
961 	}
962 	if (i >= nctlr)
963 		return -1;
964 	c->flag |= Factive;
965 	e->ctlr = c;
966 	e->port = (uintptr)c->reg;
967 	e->irq = c->p->intl;
968 	e->tbdf = c->p->tbdf;
969 	e->mbps = 10000;
970 	e->maxmtu = c->rbsz;
971 	memmove(e->ea, c->ra, Eaddrlen);
972 	e->arg = e;
973 	e->attach = attach;
974 	e->ctl = ctl;
975 	e->ifstat = ifstat;
976 	e->interrupt = interrupt;
977 	e->multicast = multicast;
978 	e->promiscuous = promiscuous;
979 	e->shutdown = shutdown;
980 	e->transmit = transmit;
981 
982 	return 0;
983 }
984 
985 void
986 ether82598link(void)
987 {
988 	addethercard("i82598", pnp);
989 }
990