xref: /plan9-contrib/sys/src/9/pc/ether82598.c (revision 25fc69938fdecc61cd09e795cbe2d2f72f1082b1)
1 /*
2  * intel 10gbe pci-express driver
3  * copyright © 2007, coraid, inc.
4  */
5 #include "u.h"
6 #include "../port/lib.h"
7 #include "mem.h"
8 #include "dat.h"
9 #include "fns.h"
10 #include "io.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
13 #include "etherif.h"
14 
15 /*
16  * // comments note conflicts with 82563-style drivers,
17  * and the registers are all different.
18  */
19 
20 enum {
21 	/* general */
22 	Ctrl		= 0x00000/4,	/* Device Control */
23 	Status		= 0x00008/4,	/* Device Status */
24 	Ctrlext		= 0x00018/4,	/* Extended Device Control */
25 	Esdp		= 0x00020/4,	/* extended sdp control */
26 	Esodp		= 0x00028/4,	/* extended od sdp control */
27 	Ledctl		= 0x00200/4,	/* led control */
28 	Tcptimer	= 0x0004c/4,	/* tcp timer */
29 	Ecc		= 0x110b0/4,	/* errata ecc control magic */
30 
31 	/* nvm */
32 	Eec		= 0x10010/4,	/* eeprom/flash control */
33 	Eerd		= 0x10014/4,	/* eeprom read */
34 	Fla		= 0x1001c/4,	/* flash access */
35 	Flop		= 0x1013c/4,	/* flash opcode */
36 	Grc		= 0x10200/4,	/* general rx control */
37 
38 	/* interrupt */
39 	Icr		= 0x00800/4,	/* interrupt cause read */
40 	Ics		= 0x00808/4,	/* " set */
41 	Ims		= 0x00880/4,	/* " mask read/set */
42 	Imc		= 0x00888/4,	/* " mask clear */
43 	Iac		= 0x00810/4,	/* " ayto clear */
44 	Iam		= 0x00890/4,	/* " auto mask enable */
45 	Itr		= 0x00820/4,	/* " throttling rate (0-19) */
46 	Ivar		= 0x00900/4,	/* " vector allocation regs. */
47 	/* msi interrupt */
48 	Msixt		= 0x0000/4,	/* msix table (bar3) */
49 	Msipba		= 0x2000/4,	/* msix pending bit array (bar3) */
50 	Pbacl		= 0x11068/4,	/* pba clear */
51 	Gpie		= 0x00898/4,	/* general purpose int enable */
52 
53 	/* flow control */
54 	Pfctop		= 0x03008/4,	/* priority flow ctl type opcode */
55 	Fcttv		= 0x03200/4,	/* " transmit timer value (0-3) */
56 	Fcrtl		= 0x03220/4,	/* " rx threshold low (0-7) +8n */
57 	Fcrth		= 0x03260/4,	/* " rx threshold high (0-7) +8n */
58 	Rcrtv		= 0x032a0/4,	/* " refresh value threshold */
59 	Tfcs		= 0x0ce00/4,	/* " tx status */
60 
61 	/* rx dma */
62 	Rbal		= 0x01000/4,	/* rx desc base low (0-63) +0x40n */
63 	Rbah		= 0x01004/4,	/* " high */
64 	Rdlen		= 0x01008/4,	/* " length */
65 	Rdh		= 0x01010/4,	/* " head */
66 	Rdt		= 0x01018/4,	/* " tail */
67 	Rxdctl		= 0x01028/4,	/* " control */
68 
69 	Srrctl		= 0x02100/4,	/* split and replication rx ctl. */
70 	Dcarxctl	= 0x02200/4,	/* rx dca control */
71 	Rdrxctl		= 0x02f00/4,	/* rx dma control */
72 	Rxpbsize	= 0x03c00/4,	/* rx packet buffer size */
73 	Rxctl		= 0x03000/4,	/* rx control */
74 	Dropen		= 0x03d04/4,	/* drop enable control */
75 
76 	/* rx */
77 	Rxcsum		= 0x05000/4,	/* rx checksum control */
78 	Rfctl		= 0x04008/4,	/* rx filter control */
79 	Mta		= 0x05200/4,	/* multicast table array (0-127) */
80 	Ral		= 0x05400/4,	/* rx address low */
81 	Rah		= 0x05404/4,
82 	Psrtype		= 0x05480/4,	/* packet split rx type. */
83 	Vfta		= 0x0a000/4,	/* vlan filter table array. */
84 	Fctrl		= 0x05080/4,	/* filter control */
85 	Vlnctrl		= 0x05088/4,	/* vlan control */
86 	Msctctrl	= 0x05090/4,	/* multicast control */
87 	Mrqc		= 0x05818/4,	/* multiple rx queues cmd */
88 	Vmdctl		= 0x0581c/4,	/* vmdq control */
89 	Imir		= 0x05a80/4,	/* immediate irq rx (0-7) */
90 	Imirext		= 0x05aa0/4,	/* immediate irq rx ext */
91 	Imirvp		= 0x05ac0/4,	/* immediate irq vlan priority */
92 	Reta		= 0x05c00/4,	/* redirection table */
93 	Rssrk		= 0x05c80/4,	/* rss random key */
94 
95 	/* tx */
96 	Tdbal		= 0x06000/4,	/* tx desc base low +0x40n */
97 	Tdbah		= 0x06004/4,	/* " high */
98 	Tdlen		= 0x06008/4,	/* " len */
99 	Tdh		= 0x06010/4,	/* " head */
100 	Tdt		= 0x06018/4,	/* " tail */
101 	Txdctl		= 0x06028/4,	/* " control */
102 	Tdwbal		= 0x06038/4,	/* " write-back address low */
103 	Tdwbah		= 0x0603c/4,
104 
105 	Dtxctl		= 0x07e00/4,	/* tx dma control */
106 	Tdcatxctrl	= 0x07200/4,	/* tx dca register (0-15) */
107 	Tipg		= 0x0cb00/4,	/* tx inter-packet gap */
108 	Txpbsize	= 0x0cc00/4,	/* tx packet-buffer size (0-15) */
109 
110 	/* mac */
111 	Hlreg0		= 0x04240/4,	/* highlander control reg 0 */
112 	Hlreg1		= 0x04244/4,	/* highlander control reg 1 (ro) */
113 	Msca		= 0x0425c/4,	/* mdi signal cmd & addr */
114 	Msrwd		= 0x04260/4,	/* mdi single rw data */
115 	Mhadd		= 0x04268/4,	/* mac addr high & max frame */
116 	Pcss1		= 0x04288/4,	/* xgxs status 1 */
117 	Pcss2		= 0x0428c/4,
118 	Xpcss		= 0x04290/4,	/* 10gb-x pcs status */
119 	Serdesc		= 0x04298/4,	/* serdes control */
120 	Macs		= 0x0429c/4,	/* fifo control & report */
121 	Autoc		= 0x042a0/4,	/* autodetect control & status */
122 	Links		= 0x042a4/4,	/* link status */
123 	Autoc2		= 0x042a8/4,
124 };
125 
126 enum {
127 	/* Ctrl */
128 	Rst		= 1<<26,	/* full nic reset */
129 
130 	/* Txdctl */
131 	Ten		= 1<<25,
132 
133 	/* Fctrl */
134 	Bam		= 1<<10,	/* broadcast accept mode */
135 	Upe 		= 1<<9,		/* unicast promiscuous */
136 	Mpe 		= 1<<8,		/* multicast promiscuous */
137 
138 	/* Rxdctl */
139 	Pthresh		= 0,		/* prefresh threshold shift in bits */
140 	Hthresh		= 8,		/* host buffer minimum threshold " */
141 	Wthresh		= 16,		/* writeback threshold */
142 	Renable		= 1<<25,
143 
144 	/* Rxctl */
145 	Rxen		= 1<<0,
146 	Dmbyps		= 1<<1,
147 
148 	/* Rdrxctl */
149 	Rdmt½		= 0,
150 	Rdmt¼		= 1,
151 	Rdmt⅛		= 2,
152 
153 	/* Rxcsum */
154 	Ippcse		= 1<<12,	/* ip payload checksum enable */
155 
156 	/* Eerd */
157 	EEstart		= 1<<0,		/* Start Read */
158 	EEdone		= 1<<1,		/* Read done */
159 
160 	/* interrupts */
161 	Irx0		= 1<<0,		/* driver defined */
162 	Itx0		= 1<<1,		/* driver defined */
163 	Lsc		= 1<<20,	/* link status change */
164 
165 	/* Links */
166 	Lnkup	= 1<<30,
167 	Lnkspd	= 1<<29,
168 
169 	/* Hlreg0 */
170 	Jumboen	= 1<<2,
171 };
172 
173 typedef struct {
174 	uint	reg;
175 	char	*name;
176 } Stat;
177 
178 Stat stattab[] = {
179 	0x4000,	"crc error",
180 	0x4004,	"illegal byte",
181 	0x4008,	"short packet",
182 	0x3fa0,	"missed pkt0",
183 	0x4034,	"mac local flt",
184 	0x4038,	"mac rmt flt",
185 	0x4040,	"rx length err",
186 	0x3f60,	"xon tx",
187 	0xcf60,	"xon rx",
188 	0x3f68,	"xoff tx",
189 	0xcf68,	"xoff rx",
190 	0x405c,	"rx 040",
191 	0x4060,	"rx 07f",
192 	0x4064,	"rx 100",
193 	0x4068,	"rx 200",
194 	0x406c,	"rx 3ff",
195 	0x4070,	"rx big",
196 	0x4074,	"rx ok",
197 	0x4078,	"rx bcast",
198 	0x3fc0,	"rx no buf0",
199 	0x40a4,	"rx runt",
200 	0x40a8,	"rx frag",
201 	0x40ac,	"rx ovrsz",
202 	0x40b0,	"rx jab",
203 	0x40d0,	"rx pkt",
204 
205 	0x40d4,	"tx pkt",
206 	0x40d8,	"tx 040",
207 	0x40dc,	"tx 07f",
208 	0x40e0,	"tx 100",
209 	0x40e4,	"tx 200",
210 	0x40e8,	"tx 3ff",
211 	0x40ec,	"tx big",
212 	0x40f4,	"tx bcast",
213 	0x4120,	"xsum err",
214 };
215 
216 /* status */
217 enum {
218 	Pif	= 1<<7,	/* past exact filter (sic) */
219 	Ipcs	= 1<<6,	/* ip checksum calcuated */
220 	L4cs	= 1<<5,	/* layer 2 */
221 	Tcpcs	= 1<<4,	/* tcp checksum calcuated */
222 	Vp	= 1<<3,	/* 802.1q packet matched vet */
223 	Ixsm	= 1<<2,	/* ignore checksum */
224 	Reop	= 1<<1,	/* end of packet */
225 	Rdd	= 1<<0,	/* descriptor done */
226 };
227 
228 typedef struct {
229 	u32int	addr[2];
230 	ushort	length;
231 	ushort	cksum;
232 	uchar	status;
233 	uchar	errors;
234 	ushort	vlan;
235 } Rd;
236 
237 enum {
238 	/* Td cmd */
239 	Rs	= 1<<3,
240 	Ic	= 1<<2,
241 	Ifcs	= 1<<1,
242 	Teop	= 1<<0,
243 
244 	/* Td status */
245 	Tdd	= 1<<0,
246 };
247 
248 typedef struct {
249 	u32int	addr[2];
250 	ushort	length;
251 	uchar	cso;
252 	uchar	cmd;
253 	uchar	status;
254 	uchar	css;
255 	ushort	vlan;
256 } Td;
257 
258 enum {
259 	Factive		= 1<<0,
260 	Fstarted	= 1<<1,
261 };
262 
263 typedef struct {
264 	Pcidev	*p;
265 	Ether	*edev;
266 	u32int	*reg;
267 	u32int	*reg3;
268 	uchar	flag;
269 	int	nrd;
270 	int	ntd;
271 	int	nrb;
272 	int	rbsz;
273 	QLock	slock;
274 	QLock	alock;
275 	QLock	tlock;
276 	Rendez	lrendez;
277 	Rendez	trendez;
278 	Rendez	rrendez;
279 	uint	im;
280 	uint	lim;
281 	uint	rim;
282 	uint	tim;
283 	Lock	imlock;
284 	char	*alloc;
285 
286 	Rd	*rdba;
287 	Block	**rb;
288 	uint	rdt;
289 	uint	rdfree;
290 
291 	Td	*tdba;
292 	uint	tdh;
293 	uint	tdt;
294 	Block	**tb;
295 
296 	uchar	ra[Eaddrlen];
297 	uchar	mta[128];
298 	ulong	stats[nelem(stattab)];
299 	uint	speeds[3];
300 } Ctlr;
301 
302 /* tweakable paramaters */
303 enum {
304 	Rbsz	= 12*1024,
305 	Nrd	= 256,
306 	Ntd	= 256,
307 	Nrb	= 256,
308 };
309 
310 static	Ctlr	*ctlrtab[4];
311 static	int	nctlr;
312 static	Lock	rblock;
313 static	Block	*rbpool;
314 
315 static void
316 readstats(Ctlr *c)
317 {
318 	int i;
319 
320 	qlock(&c->slock);
321 	for(i = 0; i < nelem(c->stats); i++)
322 		c->stats[i] += c->reg[stattab[i].reg >> 2];
323 	qunlock(&c->slock);
324 }
325 
326 static int speedtab[] = {
327 	0,
328 	1000,
329 	10000,
330 };
331 
332 static long
333 ifstat(Ether *e, void *a, long n, ulong offset)
334 {
335 	uint i, *t;
336 	char *s, *p, *q;
337 	Ctlr *c;
338 
339 	c = e->ctlr;
340 	p = s = malloc(READSTR);
341 	q = p + READSTR;
342 
343 	readstats(c);
344 	for(i = 0; i < nelem(stattab); i++)
345 		if(c->stats[i] > 0)
346 			p = seprint(p, q, "%.10s  %uld\n", stattab[i].name,					c->stats[i]);
347 	t = c->speeds;
348 	p = seprint(p, q, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
349 	seprint(p, q, "rdfree %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt],
350 		c->reg[Rdh]);
351 	n = readstr(offset, a, n, s);
352 	free(s);
353 
354 	return n;
355 }
356 
357 static void
358 im(Ctlr *c, int i)
359 {
360 	ilock(&c->imlock);
361 	c->im |= i;
362 	c->reg[Ims] = c->im;
363 	iunlock(&c->imlock);
364 }
365 
366 static int
367 lim(void *v)
368 {
369 	return ((Ctlr*)v)->lim != 0;
370 }
371 
372 static void
373 lproc(void *v)
374 {
375 	int r, i;
376 	Ctlr *c;
377 	Ether *e;
378 
379 	e = v;
380 	c = e->ctlr;
381 	for (;;) {
382 		r = c->reg[Links];
383 		e->link = (r & Lnkup) != 0;
384 		i = 0;
385 		if(e->link)
386 			i = 1 + ((r & Lnkspd) != 0);
387 		c->speeds[i]++;
388 		e->mbps = speedtab[i];
389 		c->lim = 0;
390 		im(c, Lsc);
391 		sleep(&c->lrendez, lim, c);
392 		c->lim = 0;
393 	}
394 }
395 
396 static long
397 ctl(Ether *, void *, long)
398 {
399 	error(Ebadarg);
400 	return -1;
401 }
402 
403 static Block*
404 rballoc(void)
405 {
406 	Block *bp;
407 
408 	ilock(&rblock);
409 	if((bp = rbpool) != nil){
410 		rbpool = bp->next;
411 		bp->next = 0;
412 		_xinc(&bp->ref);	/* prevent bp from being freed */
413 	}
414 	iunlock(&rblock);
415 	return bp;
416 }
417 
418 void
419 rbfree(Block *b)
420 {
421 	b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
422  	b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
423 	ilock(&rblock);
424 	b->next = rbpool;
425 	rbpool = b;
426 	iunlock(&rblock);
427 }
428 
429 #define Next(x, m)	(((x)+1) & (m))
430 
431 static int
432 cleanup(Ctlr *c, int tdh)
433 {
434 	Block *b;
435 	uint m, n;
436 
437 	m = c->ntd - 1;
438 	while(c->tdba[n = Next(tdh, m)].status & Tdd){
439 		tdh = n;
440 		b = c->tb[tdh];
441 		c->tb[tdh] = 0;
442 		freeb(b);
443 		c->tdba[tdh].status = 0;
444 	}
445 	return tdh;
446 }
447 
448 void
449 transmit(Ether *e)
450 {
451 	uint i, m, tdt, tdh;
452 	Ctlr *c;
453 	Block *b;
454 	Td *t;
455 
456 	c = e->ctlr;
457 //	qlock(&c->tlock);
458 	if(!canqlock(&c->tlock)){
459 		im(c, Itx0);
460 		return;
461 	}
462 	tdh = c->tdh = cleanup(c, c->tdh);
463 	tdt = c->tdt;
464 	m = c->ntd - 1;
465 	for(i = 0; i < 8; i++){
466 		if(Next(tdt, m) == tdh){
467 			im(c, Itx0);
468 			break;
469 		}
470 		if(!(b = qget(e->oq)))
471 			break;
472 		t = c->tdba + tdt;
473 		t->addr[0] = PCIWADDR(b->rp);
474 		t->length = BLEN(b);
475 		t->cmd = Rs | Ifcs | Teop;
476 		c->tb[tdt] = b;
477 		tdt = Next(tdt, m);
478 	}
479 	if(i){
480 		c->tdt = tdt;
481 		c->reg[Tdt] = tdt;
482 	}
483 	qunlock(&c->tlock);
484 }
485 
486 static int
487 tim(void *c)
488 {
489 	return ((Ctlr*)c)->tim != 0;
490 }
491 
492 static void
493 tproc(void *v)
494 {
495 	Ctlr *c;
496 	Ether *e;
497 
498 	e = v;
499 	c = e->ctlr;
500 	for (;;) {
501 		sleep(&c->trendez, tim, c);	/* transmit kicks us */
502 		c->tim = 0;
503 		transmit(e);
504 	}
505 }
506 
507 static void
508 rxinit(Ctlr *c)
509 {
510 	int i;
511 	Block *b;
512 
513 	c->reg[Rxctl] &= ~Rxen;
514 	for(i = 0; i < c->nrd; i++){
515 		b = c->rb[i];
516 		c->rb[i] = 0;
517 		if(b)
518 			freeb(b);
519 	}
520 	c->rdfree = 0;
521 
522 	c->reg[Fctrl] |= Bam;
523 	c->reg[Rxcsum] |= Ipcs;
524 	c->reg[Srrctl] = (c->rbsz + 1023)/1024;
525 	c->reg[Mhadd] = c->rbsz << 16;
526 	c->reg[Hlreg0] |= Jumboen;
527 
528 	c->reg[Rbal] = PCIWADDR(c->rdba);
529 	c->reg[Rbah] = 0;
530 	c->reg[Rdlen] = c->nrd*sizeof(Rd);
531 	c->reg[Rdh] = 0;
532 	c->reg[Rdt] = c->rdt = 0;
533 
534 	c->reg[Rdrxctl] = Rdmt¼;
535 	c->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
536 	c->reg[Rxctl] |= Rxen | Dmbyps;
537 }
538 
539 static void
540 replenish(Ctlr *c, uint rdh)
541 {
542 	int rdt, m, i;
543 	Block *b;
544 	Rd *r;
545 
546 	m = c->nrd - 1;
547 	i = 0;
548 	for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){
549 		r = c->rdba + rdt;
550 		if(!(b = rballoc())){
551 			print("82598: no buffers\n");
552 			break;
553 		}
554 		c->rb[rdt] = b;
555 		r->addr[0] = PCIWADDR(b->rp);
556 		r->status = 0;
557 		c->rdfree++;
558 		i++;
559 	}
560 	if(i)
561 		c->reg[Rdt] = c->rdt = rdt;
562 }
563 
564 static int
565 rim(void *v)
566 {
567 	return ((Ctlr*)v)->rim != 0;
568 }
569 
570 static uchar zeroea[Eaddrlen];
571 
572 void
573 rproc(void *v)
574 {
575 	uint m, rdh;
576 	Block *b;
577 	Ctlr *c;
578 	Ether *e;
579 	Rd *r;
580 
581 	e = v;
582 	c = e->ctlr;
583 	m = c->nrd - 1;
584 	rdh = 0;
585 loop:
586 	replenish(c, rdh);
587 	im(c, Irx0);
588 	sleep(&c->rrendez, rim, c);
589 loop1:
590 	c->rim = 0;
591 	if(c->nrd - c->rdfree >= 16)
592 		replenish(c, rdh);
593 	r = c->rdba + rdh;
594 	if(!(r->status & Rdd))
595 		goto loop;		/* UGH */
596 	b = c->rb[rdh];
597 	c->rb[rdh] = 0;
598 	b->wp += r->length;
599 	b->lim = b->wp;		/* lie like a dog */
600 	if(!(r->status & Ixsm)){
601 		if(r->status & Ipcs)
602 			b->flag |= Bipck;
603 		if(r->status & Tcpcs)
604 			b->flag |= Btcpck | Budpck;
605 		b->checksum = r->cksum;
606 	}
607 //	r->status = 0;
608 	etheriq(e, b, 1);
609 	c->rdfree--;
610 	rdh = Next(rdh, m);
611 	goto loop1;			/* UGH */
612 }
613 
614 static void
615 promiscuous(void *a, int on)
616 {
617 	Ctlr *c;
618 	Ether *e;
619 
620 	e = a;
621 	c = e->ctlr;
622 	if(on)
623 		c->reg[Fctrl] |= Upe | Mpe;
624 	else
625 		c->reg[Fctrl] &= ~(Upe | Mpe);
626 }
627 
628 static void
629 multicast(void *a, uchar *ea, int on)
630 {
631 	int b, i;
632 	Ctlr *c;
633 	Ether *e;
634 
635 	e = a;
636 	c = e->ctlr;
637 
638 	/*
639 	 * multiple ether addresses can hash to the same filter bit,
640 	 * so it's never safe to clear a filter bit.
641 	 * if we want to clear filter bits, we need to keep track of
642 	 * all the multicast addresses in use, clear all the filter bits,
643 	 * then set the ones corresponding to in-use addresses.
644 	 */
645 	i = ea[5] >> 1;
646 	b = (ea[5]&1)<<4 | ea[4]>>4;
647 	b = 1 << b;
648 	if(on)
649 		c->mta[i] |= b;
650 //	else
651 //		c->mta[i] &= ~b;
652 	c->reg[Mta+i] = c->mta[i];
653 }
654 
655 static int
656 detach(Ctlr *c)
657 {
658 	int i;
659 
660 	c->reg[Imc] = ~0;
661 	c->reg[Ctrl] |= Rst;
662 	for(i = 0; i < 100; i++){
663 		delay(1);
664 		if((c->reg[Ctrl] & Rst) == 0)
665 			break;
666 	}
667 	if (i >= 100)
668 		return -1;
669 	/* errata */
670 	delay(50);
671 	c->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
672 
673 	/* not cleared by reset; kill it manually. */
674 	for(i = 1; i < 16; i++)
675 		c->reg[Rah] &= ~(1 << 31);
676 	for(i = 0; i < 128; i++)
677 		c->reg[Mta + i] = 0;
678 	for(i = 1; i < 640; i++)
679 		c->reg[Vfta + i] = 0;
680 	return 0;
681 }
682 
683 static void
684 shutdown(Ether *e)
685 {
686 	detach(e->ctlr);
687 }
688 
689 /* ≤ 20ms */
690 static ushort
691 eeread(Ctlr *c, int i)
692 {
693 	c->reg[Eerd] = EEstart | i<<2;
694 	while((c->reg[Eerd] & EEdone) == 0)
695 		;
696 	return c->reg[Eerd] >> 16;
697 }
698 
699 static int
700 eeload(Ctlr *c)
701 {
702 	ushort u, v, p, l, i, j;
703 
704 	if((eeread(c, 0) & 0xc0) != 0x40)
705 		return -1;
706 	u = 0;
707 	for(i = 0; i < 0x40; i++)
708 		u +=  eeread(c, i);
709 	for(i = 3; i < 0xf; i++){
710 		p = eeread(c, i);
711 		l = eeread(c, p++);
712 		if((int)p + l + 1 > 0xffff)
713 			continue;
714 		for(j = p; j < p + l; j++)
715 			u += eeread(c, j);
716 	}
717 	if(u != 0xbaba)
718 		return -1;
719 	if(c->reg[Status] & (1<<3))
720 		u = eeread(c, 10);
721 	else
722 		u = eeread(c, 9);
723 	u++;
724 	for(i = 0; i < Eaddrlen;){
725 		v = eeread(c, u + i/2);
726 		c->ra[i++] = v;
727 		c->ra[i++] = v>>8;
728 	}
729 	c->ra[5] += (c->reg[Status] & 0xc) >> 2;
730 	return 0;
731 }
732 
733 static int
734 reset(Ctlr *c)
735 {
736 	int i;
737 	uchar *p;
738 
739 	if(detach(c)){
740 		print("82598: reset timeout\n");
741 		return -1;
742 	}
743 	if(eeload(c)){
744 		print("82598: eeprom failure\n");
745 		return -1;
746 	}
747 	p = c->ra;
748 	c->reg[Ral] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
749 	c->reg[Rah] = p[5]<<8 | p[4] | 1<<31;
750 
751 	readstats(c);
752 	for(i = 0; i<nelem(c->stats); i++)
753 		c->stats[i] = 0;
754 
755 	c->reg[Ctrlext] |= 1 << 16;
756 	/* make some guesses for flow control */
757 	c->reg[Fcrtl] = 0x10000 | 1<<31;
758 	c->reg[Fcrth] = 0x40000 | 1<<31;
759 	c->reg[Rcrtv] = 0x6000;
760 
761 	/* configure interrupt mapping (don't ask) */
762 	c->reg[Ivar+0] =     0 | 1<<7;
763 	c->reg[Ivar+64/4] =  1 | 1<<7;
764 //	c->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
765 
766 	/* interrupt throttling goes here. */
767 	for(i = Itr; i < Itr + 20; i++)
768 		c->reg[i] = 128;		/* ¼µs intervals */
769 	c->reg[Itr + Itx0] = 256;
770 	return 0;
771 }
772 
773 static void
774 txinit(Ctlr *c)
775 {
776 	Block *b;
777 	int i;
778 
779 	c->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
780 	for(i = 0; i < c->ntd; i++){
781 		b = c->tb[i];
782 		c->tb[i] = 0;
783 		if(b)
784 			freeb(b);
785 	}
786 	memset(c->tdba, 0, c->ntd * sizeof(Td));
787 	c->reg[Tdbal] = PCIWADDR(c->tdba);
788 	c->reg[Tdbah] = 0;
789 	c->reg[Tdlen] = c->ntd*sizeof(Td);
790 	c->reg[Tdh] = 0;
791 	c->reg[Tdt] = 0;
792 	c->tdh = c->ntd - 1;
793 	c->tdt = 0;
794 	c->reg[Txdctl] |= Ten;
795 }
796 
797 static void
798 attach(Ether *e)
799 {
800 	Block *b;
801 	Ctlr *c;
802 	int t;
803 	char buf[KNAMELEN];
804 
805 	c = e->ctlr;
806 	c->edev = e;			/* point back to Ether* */
807 	qlock(&c->alock);
808 	if(c->alloc){
809 		qunlock(&c->alock);
810 		return;
811 	}
812 
813 	c->nrd = Nrd;
814 	c->ntd = Ntd;
815 	t  = c->nrd * sizeof *c->rdba + 255;
816 	t += c->ntd * sizeof *c->tdba + 255;
817 	t += (c->ntd + c->nrd) * sizeof(Block*);
818 	c->alloc = malloc(t);
819 	qunlock(&c->alock);
820 	if(c->alloc == nil)
821 		error(Enomem);
822 
823 	c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256);
824 	c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba + c->nrd), 256);
825 	c->rb = (Block**)(c->tdba + c->ntd);
826 	c->tb = (Block**)(c->rb + c->nrd);
827 
828 	if(waserror()){
829 		while(b = rballoc()){
830 			b->free = 0;
831 			freeb(b);
832 		}
833 		free(c->alloc);
834 		c->alloc = nil;
835 		nexterror();
836 	}
837 	for(c->nrb = 0; c->nrb < 2*Nrb; c->nrb++){
838 		if(!(b = allocb(c->rbsz+BY2PG)))
839 			error(Enomem);
840 		b->free = rbfree;
841 		freeb(b);
842 	}
843 	poperror();
844 
845 	rxinit(c);
846 	txinit(c);
847 
848 	sprint(buf, "#l%dl", e->ctlrno);
849 	kproc(buf, lproc, e);
850 	sprint(buf, "#l%dr", e->ctlrno);
851 	kproc(buf, rproc, e);
852 	sprint(buf, "#l%dt", e->ctlrno);
853 	kproc(buf, tproc, e);
854 }
855 
856 static void
857 interrupt(Ureg*, void *v)
858 {
859 	int icr, im;
860 	Ctlr *c;
861 	Ether *e;
862 
863 	e = v;
864 	c = e->ctlr;
865 	ilock(&c->imlock);
866 	c->reg[Imc] = ~0;
867 	im = c->im;
868 	while((icr = c->reg[Icr] & c->im) != 0){
869 		if(icr & Lsc){
870 			im &= ~Lsc;
871 			c->lim = icr & Lsc;
872 			wakeup(&c->lrendez);
873 		}
874 		if(icr & Irx0){
875 			im &= ~Irx0;
876 			c->rim = icr & Irx0;
877 			wakeup(&c->rrendez);
878 		}
879 		if(icr & Itx0){
880 			im &= ~Itx0;
881 			c->tim = icr & Itx0;
882 			wakeup(&c->trendez);
883 		}
884 	}
885 	c->reg[Ims] = c->im = im;
886 	iunlock(&c->imlock);
887 }
888 
889 static void
890 scan(void)
891 {
892 	ulong io, io3;
893 	void *mem, *mem3;
894 	Ctlr *c;
895 	Pcidev *p;
896 
897 	p = 0;
898 	while(p = pcimatch(p, 0x8086, 0)){
899 		switch(p->did){
900 		case 0x10c6:		/* 82598 af dual port */
901 		case 0x10c7:		/* 82598 af single port */
902 		case 0x10b6:		/* 82598 backplane */
903 		case 0x10dd:		/* 82598 at cx4 */
904 			break;
905 		default:
906 			continue;
907 		}
908 		if(nctlr == nelem(ctlrtab)){
909 			print("i82598: too many controllers\n");
910 			return;
911 		}
912 		io = p->mem[0].bar & ~0xf;
913 		mem = vmap(io, p->mem[0].size);
914 		if(mem == nil){
915 			print("i82598: can't map %#p\n", p->mem[0].bar);
916 			continue;
917 		}
918 		io3 = p->mem[3].bar & ~0xf;
919 		mem3 = vmap(io3, p->mem[3].size);
920 		if(mem3 == nil){
921 			print("i82598: can't map %#p\n", p->mem[3].bar);
922 			vunmap(mem, p->mem[0].size);
923 			continue;
924 		}
925 		c = malloc(sizeof *c);
926 		c->p = p;
927 		c->reg = (u32int*)mem;
928 		c->reg3 = (u32int*)mem3;
929 		c->rbsz = Rbsz;
930 		if(reset(c)){
931 			print("i82598: can't reset\n");
932 			free(c);
933 			vunmap(mem, p->mem[0].size);
934 			vunmap(mem3, p->mem[3].size);
935 			continue;
936 		}
937 		pcisetbme(p);
938 		ctlrtab[nctlr++] = c;
939 	}
940 }
941 
942 static int
943 pnp(Ether *e)
944 {
945 	int i;
946 	Ctlr *c = nil;
947 
948 	if(nctlr == 0)
949 		scan();
950 	for(i = 0; i < nctlr; i++){
951 		c = ctlrtab[i];
952 		if(c == nil || c->flag & Factive)
953 			continue;
954 		if(e->port == 0 || e->port == (ulong)c->reg)
955 			break;
956 	}
957 	if (i >= nctlr)
958 		return -1;
959 	c->flag |= Factive;
960 	e->ctlr = c;
961 	e->port = (uintptr)c->reg;
962 	e->irq = c->p->intl;
963 	e->tbdf = c->p->tbdf;
964 	e->mbps = 10000;
965 	e->maxmtu = c->rbsz;
966 	memmove(e->ea, c->ra, Eaddrlen);
967 	e->arg = e;
968 	e->attach = attach;
969 	e->ctl = ctl;
970 	e->ifstat = ifstat;
971 	e->interrupt = interrupt;
972 	e->multicast = multicast;
973 	e->promiscuous = promiscuous;
974 	e->shutdown = shutdown;
975 	e->transmit = transmit;
976 
977 	return 0;
978 }
979 
980 void
981 ether82598link(void)
982 {
983 	addethercard("i82598", pnp);
984 }
985