1 /*
2 * intel pci-express 10Gb ethernet driver for 8259[89]
3 * copyright © 2007, coraid, inc.
4 * depessimised and made to work on the 82599 at bell labs, 2013.
5 *
6 * 82599 requests should ideally not cross a 4KB (page) boundary.
7 */
8 #include "u.h"
9 #include "../port/lib.h"
10 #include "mem.h"
11 #include "dat.h"
12 #include "fns.h"
13 #include "io.h"
14 #include "../port/error.h"
15 #include "../port/netif.h"
16 #include "etherif.h"
17
18 #define NEXTPOW2(x, m) (((x)+1) & (m))
19
20 enum {
21 Rbsz = ETHERMAXTU+32, /* +slop is for vlan headers, crcs, etc. */
22 Descalign= 128, /* 599 manual needs 128-byte alignment */
23
24 /* tunable parameters */
25 Goslow = 0, /* flag: go slow by throttling intrs, etc. */
26 /* were 256, 1024 & 64, but 30, 47 and 1 are ample. */
27 Nrd = 64, /* multiple of 8, power of 2 for NEXTPOW2 */
28 Nrb = 128,
29 Ntd = 32, /* multiple of 8, power of 2 for NEXTPOW2 */
30 };
31
32 enum {
33 /* general */
34 Ctrl = 0x00000/4, /* Device Control */
35 Status = 0x00008/4, /* Device Status */
36 Ctrlext = 0x00018/4, /* Extended Device Control */
37 Esdp = 0x00020/4, /* extended sdp control */
38 Esodp = 0x00028/4, /* extended od sdp control (i2cctl on 599) */
39 Ledctl = 0x00200/4, /* led control */
40 Tcptimer = 0x0004c/4, /* tcp timer */
41 Ecc = 0x110b0/4, /* errata ecc control magic (pcie intr cause on 599) */
42
43 /* nvm */
44 Eec = 0x10010/4, /* eeprom/flash control */
45 Eerd = 0x10014/4, /* eeprom read */
46 Fla = 0x1001c/4, /* flash access */
47 Flop = 0x1013c/4, /* flash opcode */
48 Grc = 0x10200/4, /* general rx control */
49
50 /* interrupt */
51 Icr = 0x00800/4, /* interrupt cause read */
52 Ics = 0x00808/4, /* " set */
53 Ims = 0x00880/4, /* " mask read/set (actually enable) */
54 Imc = 0x00888/4, /* " mask clear */
55 Iac = 0x00810/4, /* " auto clear */
56 Iam = 0x00890/4, /* " auto mask enable */
57 Itr = 0x00820/4, /* " throttling rate regs (0-19) */
58 Ivar = 0x00900/4, /* " vector allocation regs. */
59 /* msi interrupt */
60 Msixt = 0x0000/4, /* msix table (bar3) */
61 Msipba = 0x2000/4, /* msix pending bit array (bar3) */
62 Pbacl = 0x11068/4, /* pba clear */
63 Gpie = 0x00898/4, /* general purpose int enable */
64
65 /* flow control */
66 Pfctop = 0x03008/4, /* priority flow ctl type opcode */
67 Fcttv = 0x03200/4, /* " transmit timer value (0-3) */
68 Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */
69 Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */
70 Rcrtv = 0x032a0/4, /* " refresh value threshold */
71 Tfcs = 0x0ce00/4, /* " tx status */
72
73 /* rx dma */
74 Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */
75 Rbah = 0x01004/4, /* " high */
76 Rdlen = 0x01008/4, /* " length */
77 Rdh = 0x01010/4, /* " head */
78 Rdt = 0x01018/4, /* " tail */
79 Rxdctl = 0x01028/4, /* " control */
80
81 Srrctl = 0x02100/4, /* split & replication rx ctl. array */
82 Dcarxctl = 0x02200/4, /* rx dca control */
83 Rdrxctl = 0x02f00/4, /* rx dma control */
84 Rxpbsize = 0x03c00/4, /* rx packet buffer size */
85 Rxctl = 0x03000/4, /* rx control */
86 Dropen = 0x03d04/4, /* drop enable control (598 only) */
87
88 /* rx */
89 Rxcsum = 0x05000/4, /* rx checksum control */
90 Rfctl = 0x05008/4, /* rx filter control */
91 Mta = 0x05200/4, /* multicast table array (0-127) */
92 Ral98 = 0x05400/4, /* rx address low (598) */
93 Rah98 = 0x05404/4,
94 Ral99 = 0x0a200/4, /* rx address low array (599) */
95 Rah99 = 0x0a204/4,
96 Psrtype = 0x05480/4, /* packet split rx type. */
97 Vfta = 0x0a000/4, /* vlan filter table array. */
98 Fctrl = 0x05080/4, /* filter control */
99 Vlnctrl = 0x05088/4, /* vlan control */
100 Msctctrl = 0x05090/4, /* multicast control */
101 Mrqc = 0x05818/4, /* multiple rx queues cmd */
102 Vmdctl = 0x0581c/4, /* vmdq control (598 only) */
103 Imir = 0x05a80/4, /* immediate irq rx (0-7) (598 only) */
104 Imirext = 0x05aa0/4, /* immediate irq rx ext (598 only) */
105 Imirvp = 0x05ac0/4, /* immediate irq vlan priority (598 only) */
106 Reta = 0x05c00/4, /* redirection table */
107 Rssrk = 0x05c80/4, /* rss random key */
108
109 /* tx */
110 Tdbal = 0x06000/4, /* tx desc base low +0x40n array */
111 Tdbah = 0x06004/4, /* " high */
112 Tdlen = 0x06008/4, /* " len */
113 Tdh = 0x06010/4, /* " head */
114 Tdt = 0x06018/4, /* " tail */
115 Txdctl = 0x06028/4, /* " control */
116 Tdwbal = 0x06038/4, /* " write-back address low */
117 Tdwbah = 0x0603c/4,
118
119 Dtxctl98 = 0x07e00/4, /* tx dma control (598 only) */
120 Dtxctl99 = 0x04a80/4, /* tx dma control (599 only) */
121 Tdcatxctrl98 = 0x07200/4, /* tx dca register (0-15) (598 only) */
122 Tdcatxctrl99 = 0x0600c/4, /* tx dca register (0-127) (599 only) */
123 Tipg = 0x0cb00/4, /* tx inter-packet gap (598 only) */
124 Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */
125
126 /* mac */
127 Hlreg0 = 0x04240/4, /* highlander control reg 0 */
128 Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */
129 Msca = 0x0425c/4, /* mdi signal cmd & addr */
130 Msrwd = 0x04260/4, /* mdi single rw data */
131 Mhadd = 0x04268/4, /* mac addr high & max frame */
132 Pcss1 = 0x04288/4, /* xgxs status 1 */
133 Pcss2 = 0x0428c/4,
134 Xpcss = 0x04290/4, /* 10gb-x pcs status */
135 Serdesc = 0x04298/4, /* serdes control */
136 Macs = 0x0429c/4, /* fifo control & report */
137 Autoc = 0x042a0/4, /* autodetect control & status */
138 Links = 0x042a4/4, /* link status */
139 Links2 = 0x04324/4, /* 599 only */
140 Autoc2 = 0x042a8/4,
141 };
142
143 enum {
144 Factive = 1<<0,
145 Enable = 1<<31,
146
147 /* Ctrl */
148 Rst = 1<<26, /* full nic reset */
149
150 /* Txdctl */
151 Ten = 1<<25,
152
153 /* Dtxctl99 */
154 Te = 1<<0, /* dma tx enable */
155
156 /* Fctrl */
157 Bam = 1<<10, /* broadcast accept mode */
158 Upe = 1<<9, /* unicast promiscuous */
159 Mpe = 1<<8, /* multicast promiscuous */
160
161 /* Rxdctl */
162 Pthresh = 0, /* prefresh threshold shift in bits */
163 Hthresh = 8, /* host buffer minimum threshold " */
164 Wthresh = 16, /* writeback threshold */
165 Renable = 1<<25,
166
167 /* Rxctl */
168 Rxen = 1<<0,
169 Dmbyps = 1<<1, /* descr. monitor bypass (598 only) */
170
171 /* Rdrxctl */
172 Rdmt½ = 0, /* 598 */
173 Rdmt¼ = 1, /* 598 */
174 Rdmt⅛ = 2, /* 598 */
175 Crcstrip = 1<<1, /* 599 */
176 Rscfrstsize = 037<<17, /* 599; should be zero */
177
178 /* Rxcsum */
179 Ippcse = 1<<12, /* ip payload checksum enable */
180
181 /* Eerd */
182 EEstart = 1<<0, /* Start Read */
183 EEdone = 1<<1, /* Read done */
184
185 /* interrupts */
186 Irx0 = 1<<0, /* driver defined */
187 Itx0 = 1<<1, /* driver defined */
188 Lsc = 1<<20, /* link status change */
189
190 /* Links */
191 Lnkup = 1<<30,
192 Lnkspd = 1<<29,
193
194 /* Hlreg0 */
195 Txcrcen = 1<<0, /* add crc during xmit */
196 Rxcrcstrip = 1<<1, /* strip crc during recv */
197 Jumboen = 1<<2,
198 Txpaden = 1<<10, /* pad short frames during xmit */
199
200 /* Autoc */
201 Flu = 1<<0, /* force link up */
202 Lmsshift = 13, /* link mode select shift */
203 Lmsmask = 7,
204 };
205
206 typedef struct Ctlr Ctlr;
207 typedef struct Rd Rd;
208 typedef struct Td Td;
209
210 typedef struct {
211 uint reg;
212 char *name;
213 } Stat;
214
215 Stat stattab[] = {
216 0x4000, "crc error",
217 0x4004, "illegal byte",
218 0x4008, "short packet",
219 0x3fa0, "missed pkt0",
220 0x4034, "mac local flt",
221 0x4038, "mac rmt flt",
222 0x4040, "rx length err",
223 0x3f60, "xon tx",
224 0xcf60, "xon rx",
225 0x3f68, "xoff tx",
226 0xcf68, "xoff rx",
227 0x405c, "rx 040",
228 0x4060, "rx 07f",
229 0x4064, "rx 100",
230 0x4068, "rx 200",
231 0x406c, "rx 3ff",
232 0x4070, "rx big",
233 0x4074, "rx ok",
234 0x4078, "rx bcast",
235 0x3fc0, "rx no buf0",
236 0x40a4, "rx runt",
237 0x40a8, "rx frag",
238 0x40ac, "rx ovrsz",
239 0x40b0, "rx jab",
240 0x40d0, "rx pkt",
241
242 0x40d4, "tx pkt",
243 0x40d8, "tx 040",
244 0x40dc, "tx 07f",
245 0x40e0, "tx 100",
246 0x40e4, "tx 200",
247 0x40e8, "tx 3ff",
248 0x40ec, "tx big",
249 0x40f4, "tx bcast",
250 0x4120, "xsum err",
251 };
252
253 /* status */
254 enum {
255 Pif = 1<<7, /* past exact filter (sic) */
256 Ipcs = 1<<6, /* ip checksum calculated */
257 L4cs = 1<<5, /* layer 2 */
258 Tcpcs = 1<<4, /* tcp checksum calculated */
259 Vp = 1<<3, /* 802.1q packet matched vet */
260 Ixsm = 1<<2, /* ignore checksum */
261 Reop = 1<<1, /* end of packet */
262 Rdd = 1<<0, /* descriptor done */
263 };
264
265 struct Rd { /* Receive Descriptor */
266 u32int addr[2];
267 ushort length;
268 ushort cksum;
269 uchar status;
270 uchar errors;
271 ushort vlan;
272 };
273
274 enum {
275 /* Td cmd */
276 Rs = 1<<3, /* report status */
277 Ic = 1<<2, /* insert checksum */
278 Ifcs = 1<<1, /* insert FCS (ethernet crc) */
279 Teop = 1<<0, /* end of packet */
280
281 /* Td status */
282 Tdd = 1<<0, /* descriptor done */
283 };
284
285 struct Td { /* Transmit Descriptor */
286 u32int addr[2];
287 ushort length;
288 uchar cso;
289 uchar cmd;
290 uchar status;
291 uchar css;
292 ushort vlan;
293 };
294
295 struct Ctlr {
296 Pcidev *p;
297 Ether *edev;
298 int type;
299
300 /* virtual */
301 u32int *reg;
302 u32int *msix; /* unused */
303
304 /* physical */
305 u32int *physreg;
306 u32int *physmsix; /* unused */
307
308 uchar flag;
309 int nrd;
310 int ntd;
311 int nrb; /* # bufs this Ctlr has in the pool */
312 uint rbsz;
313 int procsrunning;
314 int attached;
315
316 Watermark wmrb;
317 Watermark wmrd;
318 Watermark wmtd;
319
320 QLock slock;
321 QLock alock; /* attach lock */
322 QLock tlock;
323 Rendez lrendez;
324 Rendez trendez;
325 Rendez rrendez;
326
327 uint im; /* interrupt mask */
328 uint lim;
329 uint rim;
330 uint tim;
331 Lock imlock;
332
333 Rd* rdba; /* receive descriptor base address */
334 Block** rb; /* receive buffers */
335 int rdt; /* receive descriptor tail */
336 int rdfree; /* rx descriptors awaiting packets */
337
338 Td* tdba; /* transmit descriptor base address */
339 int tdh; /* transmit descriptor head */
340 int tdt; /* transmit descriptor tail */
341 Block** tb; /* transmit buffers */
342
343 uchar ra[Eaddrlen]; /* receive address */
344 uchar mta[128]; /* multicast table array */
345 ulong stats[nelem(stattab)];
346 uint speeds[3];
347 };
348
349 enum {
350 I82598 = 1,
351 I82599,
352 };
353
354 static Ctlr *ctlrtab[4];
355 static int nctlr;
356 static Lock rblock;
357 static Block *rbpool;
358 static int nrbfull; /* # of rcv Blocks with data awaiting processing */
359
360 static void
readstats(Ctlr * ctlr)361 readstats(Ctlr *ctlr)
362 {
363 int i;
364
365 qlock(&ctlr->slock);
366 for(i = 0; i < nelem(ctlr->stats); i++)
367 ctlr->stats[i] += ctlr->reg[stattab[i].reg >> 2];
368 qunlock(&ctlr->slock);
369 }
370
371 static int speedtab[] = {
372 0,
373 1000,
374 10000,
375 };
376
377 static long
ifstat(Ether * edev,void * a,long n,ulong offset)378 ifstat(Ether *edev, void *a, long n, ulong offset)
379 {
380 uint i, *t;
381 char *s, *p, *e;
382 Ctlr *ctlr;
383
384 ctlr = edev->ctlr;
385 p = s = malloc(READSTR);
386 if(p == nil)
387 error(Enomem);
388 e = p + READSTR;
389
390 readstats(ctlr);
391 for(i = 0; i < nelem(stattab); i++)
392 if(ctlr->stats[i] > 0)
393 p = seprint(p, e, "%.10s %uld\n", stattab[i].name,
394 ctlr->stats[i]);
395 t = ctlr->speeds;
396 p = seprint(p, e, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
397 p = seprint(p, e, "mtu: min:%d max:%d\n", edev->minmtu, edev->maxmtu);
398 p = seprint(p, e, "rdfree %d rdh %d rdt %d\n", ctlr->rdfree, ctlr->reg[Rdt],
399 ctlr->reg[Rdh]);
400 p = seprintmark(p, e, &ctlr->wmrb);
401 p = seprintmark(p, e, &ctlr->wmrd);
402 p = seprintmark(p, e, &ctlr->wmtd);
403 USED(p);
404 n = readstr(offset, a, n, s);
405 free(s);
406
407 return n;
408 }
409
410 static void
ienable(Ctlr * ctlr,int i)411 ienable(Ctlr *ctlr, int i)
412 {
413 ilock(&ctlr->imlock);
414 ctlr->im |= i;
415 ctlr->reg[Ims] = ctlr->im;
416 iunlock(&ctlr->imlock);
417 }
418
419 static int
lim(void * v)420 lim(void *v)
421 {
422 return ((Ctlr*)v)->lim != 0;
423 }
424
425 static void
lproc(void * v)426 lproc(void *v)
427 {
428 int r, i;
429 Ctlr *ctlr;
430 Ether *e;
431
432 e = v;
433 ctlr = e->ctlr;
434 for (;;) {
435 r = ctlr->reg[Links];
436 e->link = (r & Lnkup) != 0;
437 i = 0;
438 if(e->link)
439 i = 1 + ((r & Lnkspd) != 0);
440 ctlr->speeds[i]++;
441 e->mbps = speedtab[i];
442 ctlr->lim = 0;
443 ienable(ctlr, Lsc);
444 sleep(&ctlr->lrendez, lim, ctlr);
445 ctlr->lim = 0;
446 }
447 }
448
449 static long
ctl(Ether *,void *,long)450 ctl(Ether *, void *, long)
451 {
452 error(Ebadarg);
453 return -1;
454 }
455
456 static Block*
rballoc(void)457 rballoc(void)
458 {
459 Block *bp;
460
461 ilock(&rblock);
462 if((bp = rbpool) != nil){
463 rbpool = bp->next;
464 bp->next = 0;
465 _xinc(&bp->ref); /* prevent bp from being freed */
466 }
467 iunlock(&rblock);
468 return bp;
469 }
470
471 void
rbfree(Block * b)472 rbfree(Block *b)
473 {
474 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
475 b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
476 ilock(&rblock);
477 b->next = rbpool;
478 rbpool = b;
479 nrbfull--;
480 iunlock(&rblock);
481 }
482
483 static int
cleanup(Ctlr * ctlr,int tdh)484 cleanup(Ctlr *ctlr, int tdh)
485 {
486 Block *b;
487 uint m, n;
488
489 m = ctlr->ntd - 1;
490 while(ctlr->tdba[n = NEXTPOW2(tdh, m)].status & Tdd){
491 tdh = n;
492 b = ctlr->tb[tdh];
493 ctlr->tb[tdh] = 0;
494 if (b)
495 freeb(b);
496 ctlr->tdba[tdh].status = 0;
497 }
498 return tdh;
499 }
500
501 void
transmit(Ether * e)502 transmit(Ether *e)
503 {
504 uint i, m, tdt, tdh;
505 Ctlr *ctlr;
506 Block *b;
507 Td *t;
508
509 ctlr = e->ctlr;
510 if(!canqlock(&ctlr->tlock)){
511 ienable(ctlr, Itx0);
512 return;
513 }
514 tdh = ctlr->tdh = cleanup(ctlr, ctlr->tdh);
515 tdt = ctlr->tdt;
516 m = ctlr->ntd - 1;
517 for(i = 0; ; i++){
518 if(NEXTPOW2(tdt, m) == tdh){ /* ring full? */
519 ienable(ctlr, Itx0);
520 break;
521 }
522 if((b = qget(e->oq)) == nil)
523 break;
524 assert(ctlr->tdba != nil);
525 t = ctlr->tdba + tdt;
526 t->addr[0] = PCIWADDR(b->rp);
527 t->length = BLEN(b);
528 t->cmd = Ifcs | Teop;
529 if (!Goslow)
530 t->cmd |= Rs;
531 ctlr->tb[tdt] = b;
532 /* note size of queue of tds awaiting transmission */
533 notemark(&ctlr->wmtd, (tdt + Ntd - tdh) % Ntd);
534 tdt = NEXTPOW2(tdt, m);
535 }
536 if(i) {
537 coherence();
538 ctlr->reg[Tdt] = ctlr->tdt = tdt; /* make new Tds active */
539 coherence();
540 ienable(ctlr, Itx0);
541 }
542 qunlock(&ctlr->tlock);
543 }
544
545 static int
tim(void * c)546 tim(void *c)
547 {
548 return ((Ctlr*)c)->tim != 0;
549 }
550
551 static void
tproc(void * v)552 tproc(void *v)
553 {
554 Ctlr *ctlr;
555 Ether *e;
556
557 e = v;
558 ctlr = e->ctlr;
559 for (;;) {
560 sleep(&ctlr->trendez, tim, ctlr); /* xmit interrupt kicks us */
561 ctlr->tim = 0;
562 transmit(e);
563 }
564 }
565
566 static void
rxinit(Ctlr * ctlr)567 rxinit(Ctlr *ctlr)
568 {
569 int i, is598, autoc;
570 ulong until;
571 Block *b;
572
573 ctlr->reg[Rxctl] &= ~Rxen;
574 ctlr->reg[Rxdctl] = 0;
575 for(i = 0; i < ctlr->nrd; i++){
576 b = ctlr->rb[i];
577 ctlr->rb[i] = 0;
578 if(b)
579 freeb(b);
580 }
581 ctlr->rdfree = 0;
582
583 coherence();
584 ctlr->reg[Fctrl] |= Bam;
585 ctlr->reg[Fctrl] &= ~(Upe | Mpe);
586
587 /* intel gets some csums wrong (e.g., errata 44) */
588 ctlr->reg[Rxcsum] &= ~Ippcse;
589 ctlr->reg[Hlreg0] &= ~Jumboen; /* jumbos are a bad idea */
590 ctlr->reg[Hlreg0] |= Txcrcen | Rxcrcstrip | Txpaden;
591 ctlr->reg[Srrctl] = (ctlr->rbsz + 1024 - 1) / 1024;
592 ctlr->reg[Mhadd] = ctlr->rbsz << 16;
593
594 ctlr->reg[Rbal] = PCIWADDR(ctlr->rdba);
595 ctlr->reg[Rbah] = 0;
596 ctlr->reg[Rdlen] = ctlr->nrd*sizeof(Rd); /* must be multiple of 128 */
597 ctlr->reg[Rdh] = 0;
598 ctlr->reg[Rdt] = ctlr->rdt = 0;
599 coherence();
600
601 is598 = (ctlr->type == I82598);
602 if (is598)
603 ctlr->reg[Rdrxctl] = Rdmt¼;
604 else {
605 ctlr->reg[Rdrxctl] |= Crcstrip;
606 ctlr->reg[Rdrxctl] &= ~Rscfrstsize;
607 }
608 if (Goslow && is598)
609 ctlr->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
610 else
611 ctlr->reg[Rxdctl] = Renable;
612 coherence();
613
614 /*
615 * don't wait forever like an idiot (and hang the system),
616 * maybe it's disconnected.
617 */
618 until = TK2MS(MACHP(0)->ticks) + 250;
619 while (!(ctlr->reg[Rxdctl] & Renable) && TK2MS(MACHP(0)->ticks) < until)
620 ;
621 if(!(ctlr->reg[Rxdctl] & Renable))
622 print("#l%d: Renable didn't come on, might be disconnected\n",
623 ctlr->edev->ctlrno);
624
625 ctlr->reg[Rxctl] |= Rxen | (is598? Dmbyps: 0);
626
627 if (is598){
628 autoc = ctlr->reg[Autoc];
629 /* what is this rubbish and why do we care? */
630 print("#l%d: autoc %#ux; lms %d (3 is 10g sfp)\n",
631 ctlr->edev->ctlrno, autoc, (autoc>>Lmsshift) & Lmsmask);
632 ctlr->reg[Autoc] |= Flu;
633 coherence();
634 delay(50);
635 }
636 }
637
638 static void
replenish(Ctlr * ctlr,uint rdh)639 replenish(Ctlr *ctlr, uint rdh)
640 {
641 int rdt, m, i;
642 Block *b;
643 Rd *r;
644
645 m = ctlr->nrd - 1;
646 i = 0;
647 for(rdt = ctlr->rdt; NEXTPOW2(rdt, m) != rdh; rdt = NEXTPOW2(rdt, m)){
648 r = ctlr->rdba + rdt;
649 if((b = rballoc()) == nil){
650 print("#l%d: no buffers\n", ctlr->edev->ctlrno);
651 break;
652 }
653 ctlr->rb[rdt] = b;
654 r->addr[0] = PCIWADDR(b->rp);
655 r->status = 0;
656 ctlr->rdfree++;
657 i++;
658 }
659 if(i) {
660 coherence();
661 ctlr->reg[Rdt] = ctlr->rdt = rdt; /* hand back recycled rdescs */
662 coherence();
663 }
664 }
665
666 static int
rim(void * v)667 rim(void *v)
668 {
669 return ((Ctlr*)v)->rim != 0;
670 }
671
672 void
rproc(void * v)673 rproc(void *v)
674 {
675 int passed;
676 uint m, rdh;
677 Block *bp;
678 Ctlr *ctlr;
679 Ether *e;
680 Rd *r;
681
682 e = v;
683 ctlr = e->ctlr;
684 m = ctlr->nrd - 1;
685 for (rdh = 0; ; ) {
686 replenish(ctlr, rdh);
687 ienable(ctlr, Irx0);
688 sleep(&ctlr->rrendez, rim, ctlr);
689 passed = 0;
690 for (;;) {
691 ctlr->rim = 0;
692 r = ctlr->rdba + rdh;
693 if(!(r->status & Rdd))
694 break; /* wait for pkts to arrive */
695 bp = ctlr->rb[rdh];
696 ctlr->rb[rdh] = 0;
697 if (r->length > ETHERMAXTU)
698 print("#l%d: got jumbo of %d bytes\n",
699 e->ctlrno, r->length);
700 bp->wp += r->length;
701 bp->lim = bp->wp; /* lie like a dog */
702 // r->status = 0;
703
704 ilock(&rblock);
705 nrbfull++;
706 iunlock(&rblock);
707 notemark(&ctlr->wmrb, nrbfull);
708 etheriq(e, bp, 1);
709
710 passed++;
711 ctlr->rdfree--;
712 rdh = NEXTPOW2(rdh, m);
713 if (ctlr->rdfree <= ctlr->nrd - 16)
714 replenish(ctlr, rdh);
715 }
716 /* note how many rds had full buffers */
717 notemark(&ctlr->wmrd, passed);
718 }
719 }
720
721 static void
promiscuous(void * a,int on)722 promiscuous(void *a, int on)
723 {
724 Ctlr *ctlr;
725 Ether *e;
726
727 e = a;
728 ctlr = e->ctlr;
729 if(on)
730 ctlr->reg[Fctrl] |= Upe | Mpe;
731 else
732 ctlr->reg[Fctrl] &= ~(Upe | Mpe);
733 }
734
735 static void
multicast(void * a,uchar * ea,int on)736 multicast(void *a, uchar *ea, int on)
737 {
738 int b, i;
739 Ctlr *ctlr;
740 Ether *e;
741
742 e = a;
743 ctlr = e->ctlr;
744
745 /*
746 * multiple ether addresses can hash to the same filter bit,
747 * so it's never safe to clear a filter bit.
748 * if we want to clear filter bits, we need to keep track of
749 * all the multicast addresses in use, clear all the filter bits,
750 * then set the ones corresponding to in-use addresses.
751 */
752 i = ea[5] >> 1;
753 b = (ea[5]&1)<<4 | ea[4]>>4;
754 b = 1 << b;
755 if(on)
756 ctlr->mta[i] |= b;
757 // else
758 // ctlr->mta[i] &= ~b;
759 ctlr->reg[Mta+i] = ctlr->mta[i];
760 }
761
762 static void
freemem(Ctlr * ctlr)763 freemem(Ctlr *ctlr)
764 {
765 Block *b;
766
767 while(b = rballoc()){
768 b->free = 0;
769 freeb(b);
770 }
771 free(ctlr->rdba);
772 ctlr->rdba = nil;
773 free(ctlr->tdba);
774 ctlr->tdba = nil;
775 free(ctlr->rb);
776 ctlr->rb = nil;
777 free(ctlr->tb);
778 ctlr->tb = nil;
779 }
780
781 static int
detach(Ctlr * ctlr)782 detach(Ctlr *ctlr)
783 {
784 int i, is598;
785
786 ctlr->reg[Imc] = ~0;
787 ctlr->reg[Ctrl] |= Rst;
788 for(i = 0; i < 100; i++){
789 delay(1);
790 if((ctlr->reg[Ctrl] & Rst) == 0)
791 break;
792 }
793 if (i >= 100)
794 return -1;
795 is598 = (ctlr->type == I82598);
796 if (is598) { /* errata */
797 delay(50);
798 ctlr->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
799 }
800
801 /* not cleared by reset; kill it manually. */
802 for(i = 1; i < 16; i++)
803 ctlr->reg[is598? Rah98: Rah99] &= ~Enable;
804 for(i = 0; i < 128; i++)
805 ctlr->reg[Mta + i] = 0;
806 for(i = 1; i < (is598? 640: 128); i++)
807 ctlr->reg[Vfta + i] = 0;
808
809 // freemem(ctlr); // TODO
810 ctlr->attached = 0;
811 return 0;
812 }
813
814 static void
shutdown(Ether * e)815 shutdown(Ether *e)
816 {
817 detach(e->ctlr);
818 // freemem(e->ctlr);
819 }
820
821 /* ≤ 20ms */
822 static ushort
eeread(Ctlr * ctlr,int i)823 eeread(Ctlr *ctlr, int i)
824 {
825 ctlr->reg[Eerd] = EEstart | i<<2;
826 while((ctlr->reg[Eerd] & EEdone) == 0)
827 ;
828 return ctlr->reg[Eerd] >> 16;
829 }
830
831 static int
eeload(Ctlr * ctlr)832 eeload(Ctlr *ctlr)
833 {
834 ushort u, v, p, l, i, j;
835
836 if((eeread(ctlr, 0) & 0xc0) != 0x40)
837 return -1;
838 u = 0;
839 for(i = 0; i < 0x40; i++)
840 u += eeread(ctlr, i);
841 for(i = 3; i < 0xf; i++){
842 p = eeread(ctlr, i);
843 l = eeread(ctlr, p++);
844 if((int)p + l + 1 > 0xffff)
845 continue;
846 for(j = p; j < p + l; j++)
847 u += eeread(ctlr, j);
848 }
849 if(u != 0xbaba)
850 return -1;
851 if(ctlr->reg[Status] & (1<<3))
852 u = eeread(ctlr, 10);
853 else
854 u = eeread(ctlr, 9);
855 u++;
856 for(i = 0; i < Eaddrlen;){
857 v = eeread(ctlr, u + i/2);
858 ctlr->ra[i++] = v;
859 ctlr->ra[i++] = v>>8;
860 }
861 ctlr->ra[5] += (ctlr->reg[Status] & 0xc) >> 2;
862 return 0;
863 }
864
865 static int
reset(Ctlr * ctlr)866 reset(Ctlr *ctlr)
867 {
868 int i, is598;
869 uchar *p;
870
871 if(detach(ctlr)){
872 print("82598: reset timeout\n");
873 return -1;
874 }
875 if(eeload(ctlr)){
876 print("82598: eeprom failure\n");
877 return -1;
878 }
879 p = ctlr->ra;
880 is598 = (ctlr->type == I82598);
881 ctlr->reg[is598? Ral98: Ral99] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
882 ctlr->reg[is598? Rah98: Rah99] = p[5]<<8 | p[4] | Enable;
883
884 readstats(ctlr);
885 for(i = 0; i<nelem(ctlr->stats); i++)
886 ctlr->stats[i] = 0;
887
888 ctlr->reg[Ctrlext] |= 1 << 16; /* required by errata (spec change 4) */
889 if (Goslow) {
890 /* make some guesses for flow control */
891 ctlr->reg[Fcrtl] = 0x10000 | Enable;
892 ctlr->reg[Fcrth] = 0x40000 | Enable;
893 ctlr->reg[Rcrtv] = 0x6000;
894 } else
895 ctlr->reg[Fcrtl] = ctlr->reg[Fcrth] = ctlr->reg[Rcrtv] = 0;
896
897 /* configure interrupt mapping (don't ask) */
898 ctlr->reg[Ivar+0] = 0 | 1<<7;
899 ctlr->reg[Ivar+64/4] = 1 | 1<<7;
900 // ctlr->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
901
902 if (Goslow) {
903 /* interrupt throttling goes here. */
904 for(i = Itr; i < Itr + 20; i++)
905 ctlr->reg[i] = 128; /* ¼µs intervals */
906 ctlr->reg[Itr + Itx0] = 256;
907 } else { /* don't throttle */
908 for(i = Itr; i < Itr + 20; i++)
909 ctlr->reg[i] = 0; /* ¼µs intervals */
910 ctlr->reg[Itr + Itx0] = 0;
911 }
912 return 0;
913 }
914
915 static void
txinit(Ctlr * ctlr)916 txinit(Ctlr *ctlr)
917 {
918 Block *b;
919 int i;
920
921 if (Goslow)
922 ctlr->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
923 else
924 ctlr->reg[Txdctl] = 0;
925 if (ctlr->type == I82599)
926 ctlr->reg[Dtxctl99] = 0;
927 coherence();
928 for(i = 0; i < ctlr->ntd; i++){
929 b = ctlr->tb[i];
930 ctlr->tb[i] = 0;
931 if(b)
932 freeb(b);
933 }
934
935 assert(ctlr->tdba != nil);
936 memset(ctlr->tdba, 0, ctlr->ntd * sizeof(Td));
937 ctlr->reg[Tdbal] = PCIWADDR(ctlr->tdba);
938 ctlr->reg[Tdbah] = 0;
939 ctlr->reg[Tdlen] = ctlr->ntd*sizeof(Td); /* must be multiple of 128 */
940 ctlr->reg[Tdh] = 0;
941 ctlr->tdh = ctlr->ntd - 1;
942 ctlr->reg[Tdt] = ctlr->tdt = 0;
943 coherence();
944 if (ctlr->type == I82599)
945 ctlr->reg[Dtxctl99] |= Te;
946 coherence();
947 ctlr->reg[Txdctl] |= Ten;
948 coherence();
949 while (!(ctlr->reg[Txdctl] & Ten))
950 ;
951 }
952
953 static void
attach(Ether * e)954 attach(Ether *e)
955 {
956 Block *b;
957 Ctlr *ctlr;
958 char buf[KNAMELEN];
959
960 ctlr = e->ctlr;
961 ctlr->edev = e; /* point back to Ether* */
962 qlock(&ctlr->alock);
963 if(waserror()){
964 reset(ctlr);
965 freemem(ctlr);
966 qunlock(&ctlr->alock);
967 nexterror();
968 }
969 if(ctlr->rdba == nil) {
970 ctlr->nrd = Nrd;
971 ctlr->ntd = Ntd;
972 ctlr->rdba = mallocalign(ctlr->nrd * sizeof *ctlr->rdba,
973 Descalign, 0, 0);
974 ctlr->tdba = mallocalign(ctlr->ntd * sizeof *ctlr->tdba,
975 Descalign, 0, 0);
976 ctlr->rb = malloc(ctlr->nrd * sizeof(Block *));
977 ctlr->tb = malloc(ctlr->ntd * sizeof(Block *));
978 if (ctlr->rdba == nil || ctlr->tdba == nil ||
979 ctlr->rb == nil || ctlr->tb == nil)
980 error(Enomem);
981
982 for(ctlr->nrb = 0; ctlr->nrb < 2*Nrb; ctlr->nrb++){
983 b = allocb(ctlr->rbsz + BY2PG); /* see rbfree() */
984 if(b == nil)
985 error(Enomem);
986 b->free = rbfree;
987 freeb(b);
988 }
989 }
990 if (!ctlr->attached) {
991 rxinit(ctlr);
992 txinit(ctlr);
993 nrbfull = 0;
994 if (!ctlr->procsrunning) {
995 snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
996 kproc(buf, lproc, e);
997 snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
998 kproc(buf, rproc, e);
999 snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
1000 kproc(buf, tproc, e);
1001 ctlr->procsrunning = 1;
1002 }
1003 initmark(&ctlr->wmrb, Nrb, "rcv bufs unprocessed");
1004 initmark(&ctlr->wmrd, Nrd-1, "rcv descrs processed at once");
1005 initmark(&ctlr->wmtd, Ntd-1, "xmit descr queue len");
1006 ctlr->attached = 1;
1007 }
1008 qunlock(&ctlr->alock);
1009 poperror();
1010 }
1011
1012 static void
interrupt(Ureg *,void * v)1013 interrupt(Ureg*, void *v)
1014 {
1015 int icr, im;
1016 Ctlr *ctlr;
1017 Ether *e;
1018
1019 e = v;
1020 ctlr = e->ctlr;
1021 ilock(&ctlr->imlock);
1022 ctlr->reg[Imc] = ~0; /* disable all intrs */
1023 im = ctlr->im;
1024 while((icr = ctlr->reg[Icr] & ctlr->im) != 0){
1025 if(icr & Irx0){
1026 im &= ~Irx0;
1027 ctlr->rim = Irx0;
1028 wakeup(&ctlr->rrendez);
1029 }
1030 if(icr & Itx0){
1031 im &= ~Itx0;
1032 ctlr->tim = Itx0;
1033 wakeup(&ctlr->trendez);
1034 }
1035 if(icr & Lsc){
1036 im &= ~Lsc;
1037 ctlr->lim = Lsc;
1038 wakeup(&ctlr->lrendez);
1039 }
1040 }
1041 ctlr->reg[Ims] = ctlr->im = im; /* enable only intrs we didn't service */
1042 iunlock(&ctlr->imlock);
1043 }
1044
1045 static void
scan(void)1046 scan(void)
1047 {
1048 int pciregs, pcimsix, type;
1049 ulong io, iomsi;
1050 void *mem, *memmsi;
1051 Ctlr *ctlr;
1052 Pcidev *p;
1053
1054 p = 0;
1055 while(p = pcimatch(p, Vintel, 0)){
1056 switch(p->did){
1057 case 0x10b6: /* 82598 backplane */
1058 case 0x10c6: /* 82598 af dual port */
1059 case 0x10c7: /* 82598 af single port */
1060 case 0x10dd: /* 82598 at cx4 */
1061 case 0x10ec: /* 82598 at cx4 dual port */
1062 pcimsix = 3;
1063 type = I82598;
1064 break;
1065 case 0x10f7: /* 82599 kx/kx4 */
1066 case 0x10f8: /* 82599 kx/kx4/kx */
1067 case 0x10f9: /* 82599 cx4 */
1068 case 0x10fb: /* 82599 sfi/sfp+ */
1069 case 0x10fc: /* 82599 xaui/bx4 */
1070 case 0x1557: /* 82599 single-port sfi */
1071 pcimsix = 4;
1072 type = I82599;
1073 break;
1074 default:
1075 continue;
1076 }
1077 pciregs = 0;
1078 if(nctlr >= nelem(ctlrtab)){
1079 print("i82598: too many controllers\n");
1080 return;
1081 }
1082
1083 io = p->mem[pciregs].bar & ~0xf;
1084 mem = vmap(io, p->mem[pciregs].size);
1085 if(mem == nil){
1086 print("i82598: can't map regs %#p\n",
1087 p->mem[pciregs].bar);
1088 continue;
1089 }
1090
1091 iomsi = p->mem[pcimsix].bar & ~0xf;
1092 memmsi = vmap(iomsi, p->mem[pcimsix].size);
1093 if(memmsi == nil){
1094 print("i82598: can't map msi-x regs %#p\n",
1095 p->mem[pcimsix].bar);
1096 vunmap(mem, p->mem[pciregs].size);
1097 continue;
1098 }
1099
1100 ctlr = malloc(sizeof *ctlr);
1101 if(ctlr == nil) {
1102 vunmap(mem, p->mem[pciregs].size);
1103 vunmap(memmsi, p->mem[pcimsix].size);
1104 error(Enomem);
1105 }
1106 ctlr->p = p;
1107 ctlr->type = type;
1108 ctlr->physreg = (u32int*)io;
1109 ctlr->physmsix = (u32int*)iomsi;
1110 ctlr->reg = (u32int*)mem;
1111 ctlr->msix = (u32int*)memmsi; /* unused */
1112 ctlr->rbsz = Rbsz;
1113 if(reset(ctlr)){
1114 print("i82598: can't reset\n");
1115 free(ctlr);
1116 vunmap(mem, p->mem[pciregs].size);
1117 vunmap(memmsi, p->mem[pcimsix].size);
1118 continue;
1119 }
1120 pcisetbme(p);
1121 ctlrtab[nctlr++] = ctlr;
1122 }
1123 }
1124
1125 static int
pnp(Ether * e)1126 pnp(Ether *e)
1127 {
1128 int i;
1129 Ctlr *ctlr;
1130
1131 if(nctlr == 0)
1132 scan();
1133 ctlr = nil;
1134 for(i = 0; i < nctlr; i++){
1135 ctlr = ctlrtab[i];
1136 if(ctlr == nil || ctlr->flag & Factive)
1137 continue;
1138 if(e->port == 0 || e->port == (ulong)ctlr->reg)
1139 break;
1140 }
1141 if (i >= nctlr)
1142 return -1;
1143 ctlr->flag |= Factive;
1144 e->ctlr = ctlr;
1145 e->port = (uintptr)ctlr->physreg;
1146 e->irq = ctlr->p->intl;
1147 e->tbdf = ctlr->p->tbdf;
1148 e->mbps = 10000;
1149 e->maxmtu = ETHERMAXTU;
1150 memmove(e->ea, ctlr->ra, Eaddrlen);
1151
1152 e->arg = e;
1153 e->attach = attach;
1154 e->detach = shutdown;
1155 e->transmit = transmit;
1156 e->interrupt = interrupt;
1157 e->ifstat = ifstat;
1158 e->shutdown = shutdown;
1159 e->ctl = ctl;
1160 e->multicast = multicast;
1161 e->promiscuous = promiscuous;
1162
1163 return 0;
1164 }
1165
1166 void
ether82598link(void)1167 ether82598link(void)
1168 {
1169 addethercard("i82598", pnp);
1170 addethercard("i10gbe", pnp);
1171 }
1172