1 /*
2 * myricom 10g-pcie-8a 10 Gb ethernet driver
3 * © 2007 erik quanstrom, coraid
4 *
5 * the card is big endian.
6 * we use uvlong rather than uintptr to hold addresses so that
7 * we don't get "warning: stupid shift" on 32-bit architectures.
8 *
9 * appears to have massively-bloated buffers.
10 */
11 #include "u.h"
12 #include "../port/lib.h"
13 #include "mem.h"
14 #include "dat.h"
15 #include "fns.h"
16 #include "io.h"
17 #include "../port/error.h"
18 #include "../port/netif.h"
19
20 #include "../pc/etherif.h"
21
22 #ifndef KiB
23 #define KiB 1024u /* Kibi 0x0000000000000400 */
24 #define MiB 1048576u /* Mebi 0x0000000000100000 */
25 #endif /* KiB */
26
27 #define dprint(...) if(debug) print(__VA_ARGS__)
28 #define pcicapdbg(...)
29 #define malign(n) mallocalign((n), 4*KiB, 0, 0)
30
31 #include "etherm10g2k.h"
32 #include "etherm10g4k.h"
33
34 static int debug = 0;
35 static char Etimeout[] = "timeout";
36
37 enum {
38 Epromsz = 256,
39 Maxslots= 1024, /* rcv descriptors; wasteful: only 9 needed */
40 Align = 4096,
41 Maxmtu = 9000, /* jumbos; bad idea */
42 Noconf = 0xffffffff,
43
44 Fwoffset= 1*MiB,
45 Cmdoff = 0xf80000, /* command port offset */
46 Fwsubmt = 0xfc0000, /* firmware submission command port offset */
47 Rdmaoff = 0xfc01c0, /* rdma command port offset */
48 };
49
50 enum {
51 CZero,
52 Creset,
53 Cversion,
54
55 CSintrqdma, /* issue these before Cetherup */
56 CSbigsz, /* in bytes bigsize = 2^n */
57 CSsmallsz,
58
59 CGsendoff,
60 CGsmallrxoff,
61 CGbigrxoff,
62 CGirqackoff,
63 CGirqdeassoff,
64 CGsendrgsz,
65 CGrxrgsz,
66
67 CSintrqsz, /* 2^n */
68 Cetherup, /* above parameters + mtu/mac addr must be set first. */
69 Cetherdn,
70
71 CSmtu, /* below may be issued live */
72 CGcoaloff, /* in µs */
73 CSstatsrate, /* in µs */
74 CSstatsdma,
75
76 Cpromisc,
77 Cnopromisc,
78 CSmac,
79
80 Cenablefc,
81 Cdisablefc,
82
83 Cdmatest, /* address in d[0-1], d[2]=length */
84
85 Cenableallmc,
86 Cdisableallmc,
87
88 CSjoinmc,
89 CSleavemc,
90 Cleaveallmc,
91
92 CSstatsdma2, /* adds (unused) multicast stats */
93 };
94
95 typedef union {
96 uint i[2];
97 uchar c[8];
98 } Cmd;
99
100 typedef ulong Slot;
101 typedef struct {
102 ushort cksum;
103 ushort len;
104 } Slotparts;
105
106 enum {
107 SFsmall = 1,
108 SFfirst = 2,
109 SFalign = 4,
110 SFnotso = 16,
111 };
112
113 typedef struct {
114 ulong high;
115 ulong low;
116 ushort hdroff;
117 ushort len;
118 uchar pad;
119 uchar nrdma;
120 uchar chkoff;
121 uchar flags;
122 } Send;
123
124 typedef struct {
125 QLock;
126 Send *lanai; /* tx ring (cksum+len in lanai memory) */
127 Send *host; /* tx ring (data in our memory) */
128 Block **bring;
129 // uchar *wcfifo; /* what the heck is a w/c fifo? */
130 int size; /* of buffers in the z8's memory */
131 ulong segsz;
132 uint n; /* rxslots */
133 uint m; /* mask; rxslots must be a power of two */
134 uint i; /* number of segments (not frames) queued */
135 uint cnt; /* number of segments sent by the card */
136
137 ulong npkt;
138 vlong nbytes;
139 } Tx;
140
141 typedef struct {
142 Lock;
143 Block *head;
144 uint size; /* buffer size of each block */
145 uint n; /* n free buffers */
146 uint cnt;
147 } Bpool;
148
149 static Bpool smpool = { .size = 128, };
150 static Bpool bgpool = { .size = Maxmtu, };
151
152 typedef struct {
153 Bpool *pool; /* free buffers */
154 ulong *lanai; /* rx ring; we have no permanent host shadow */
155 Block **host; /* called "info" in myricom driver */
156 // uchar *wcfifo; /* cmd submission fifo */
157 uint m;
158 uint n; /* rxslots */
159 uint i;
160 uint cnt; /* number of buffers allocated (lifetime) */
161 uint allocfail;
162 } Rx;
163
164 /* dma mapped. unix network byte order. */
165 typedef struct {
166 uchar txcnt[4];
167 uchar linkstat[4];
168 uchar dlink[4];
169 uchar derror[4];
170 uchar drunt[4];
171 uchar doverrun[4];
172 uchar dnosm[4];
173 uchar dnobg[4];
174 uchar nrdma[4];
175 uchar txstopped;
176 uchar down;
177 uchar updated;
178 uchar valid;
179 } Stats;
180
181 enum {
182 Detached,
183 Attached,
184 Runed,
185 };
186
187 typedef struct {
188 Slot *entry;
189 uvlong busaddr;
190 uint m;
191 uint n;
192 uint i;
193 } Done;
194
195 typedef struct Ctlr Ctlr;
196 typedef struct Ctlr {
197 QLock;
198 int state;
199 int kprocs;
200 uvlong port;
201 Pcidev* pcidev;
202 Ctlr* next;
203 int active;
204 int id; /* do we need this? */
205
206 uchar ra[Eaddrlen];
207
208 int ramsz;
209 uchar *ram;
210
211 ulong *irqack;
212 ulong *irqdeass;
213 ulong *coal;
214
215 char eprom[Epromsz];
216 ulong serial; /* unit serial number */
217
218 QLock cmdl;
219 Cmd *cmd; /* address of command return */
220 uvlong cprt; /* bus address of command */
221
222 uvlong boot; /* boot address */
223
224 Done done;
225 Tx tx;
226 Rx sm;
227 Rx bg;
228 Stats *stats;
229 uvlong statsprt;
230
231 Rendez rxrendez;
232 Rendez txrendez;
233
234 int msi;
235 ulong linkstat;
236 ulong nrdma;
237 } Ctlr;
238
239 static Ctlr *ctlrs;
240
241 enum {
242 PcieAERC = 1,
243 PcieVC,
244 PcieSNC,
245 PciePBC,
246 };
247
248 enum {
249 AercCCR = 0x18, /* control register */
250 };
251
252 enum {
253 PcieCTL = 8,
254 PcieLCR = 12,
255 PcieMRD = 0x7000, /* maximum read size */
256 };
257
258 /*
259 * this function doesn't work because pcicgr32 doesn't have access
260 * to the pcie extended configuration space.
261 */
262 static int
pciecap(Pcidev * p,int cap)263 pciecap(Pcidev *p, int cap)
264 {
265 uint off, i;
266
267 off = 0x100;
268 while(((i = pcicfgr32(p, off)) & 0xffff) != cap){
269 off = i >> 20;
270 print("m10g: pciecap offset = %ud", off);
271 if(off < 0x100 || off >= 4*KiB - 1)
272 return 0;
273 }
274 print("m10g: pciecap found = %ud", off);
275 return off;
276 }
277
278 static int
setpcie(Pcidev * p)279 setpcie(Pcidev *p)
280 {
281 int off;
282
283 /* set 4k writes */
284 off = pcicap(p, PciCapPCIe);
285 if(off < 64)
286 return -1;
287 off += PcieCTL;
288 pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12);
289 return 0;
290 }
291
292 static int
whichfw(Pcidev * p)293 whichfw(Pcidev *p)
294 {
295 char *s;
296 int i, off, lanes, ecrc;
297 ulong cap;
298
299 /* check the number of configured lanes. */
300 off = pcicap(p, PciCapPCIe);
301 if(off < 64)
302 return -1;
303 off += PcieLCR;
304 cap = pcicfgr16(p, off);
305 lanes = (cap>>4) & 0x3f;
306
307 /* check AERC register. we need it on. */
308 off = pciecap(p, PcieAERC);
309 print("; offset %d returned\n", off);
310 cap = 0;
311 if(off != 0){
312 off += AercCCR;
313 cap = pcicfgr32(p, off);
314 print("m10g: %lud cap\n", cap);
315 }
316 ecrc = (cap>>4) & 0xf;
317 /* if we don't like the aerc, kick it here. */
318
319 print("m10g: %d lanes; ecrc=%d; ", lanes, ecrc);
320 if(s = getconf("myriforce")){
321 i = atoi(s);
322 if(i != 4*KiB || i != 2*KiB)
323 i = 2*KiB;
324 print("fw = %d [forced]\n", i);
325 return i;
326 }
327 if(lanes <= 4)
328 print("fw = 4096 [lanes]\n");
329 else if(ecrc & 10)
330 print("fw = 4096 [ecrc set]\n");
331 else
332 print("fw = 4096 [default]\n");
333 return 4*KiB;
334 }
335
336 static int
parseeprom(Ctlr * c)337 parseeprom(Ctlr *c)
338 {
339 int i, j, k, l, bits;
340 char *s;
341
342 dprint("m10g eprom:\n");
343 s = c->eprom;
344 bits = 3;
345 for(i = 0; s[i] && i < Epromsz; i++){
346 l = strlen(s+i);
347 dprint("\t%s\n", s+i);
348 if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){
349 bits ^= 1;
350 j = i + 4;
351 for(k = 0; k < 6; k++)
352 c->ra[k] = strtoul(s+j+3*k, 0, 16);
353 }else if(strncmp(s+i, "SN=", 3) == 0){
354 bits ^= 2;
355 c->serial = atoi(s+i+3);
356 }
357 i += l;
358 }
359 if(bits)
360 return -1;
361 return 0;
362 }
363
364 static ushort
pbit16(ushort i)365 pbit16(ushort i)
366 {
367 ushort j;
368 uchar *p;
369
370 p = (uchar*)&j;
371 p[1] = i;
372 p[0] = i>>8;
373 return j;
374 }
375
376 static ushort
gbit16(uchar i[2])377 gbit16(uchar i[2])
378 {
379 ushort j;
380
381 j = i[1];
382 j |= i[0]<<8;
383 return j;
384 }
385
386 static ulong
pbit32(ulong i)387 pbit32(ulong i)
388 {
389 ulong j;
390 uchar *p;
391
392 p = (uchar*)&j;
393 p[3] = i;
394 p[2] = i>>8;
395 p[1] = i>>16;
396 p[0] = i>>24;
397 return j;
398 }
399
400 static ulong
gbit32(uchar i[4])401 gbit32(uchar i[4])
402 {
403 ulong j;
404
405 j = i[3];
406 j |= i[2]<<8;
407 j |= i[1]<<16;
408 j |= i[0]<<24;
409 return j;
410 }
411
412 static void
prepcmd(ulong * cmd,int i)413 prepcmd(ulong *cmd, int i)
414 {
415 while(i-- > 0)
416 cmd[i] = pbit32(cmd[i]);
417 }
418
419 /*
420 * the command looks like this (int 32bit integers)
421 * cmd type
422 * addr (low)
423 * addr (high)
424 * pad (used for dma testing)
425 * response (high)
426 * response (low)
427 * 40 byte = 5 int pad.
428 */
429
430 ulong
cmd(Ctlr * c,int type,uvlong data)431 cmd(Ctlr *c, int type, uvlong data)
432 {
433 ulong buf[16], i;
434 Cmd *cmd;
435
436 qlock(&c->cmdl);
437 cmd = c->cmd;
438 cmd->i[1] = Noconf;
439 memset(buf, 0, sizeof buf);
440 buf[0] = type;
441 buf[1] = data;
442 buf[2] = data >> 32;
443 buf[4] = c->cprt >> 32;
444 buf[5] = c->cprt;
445 prepcmd(buf, 6);
446 coherence();
447 memmove(c->ram + Cmdoff, buf, sizeof buf);
448
449 if(waserror()){
450 qunlock(&c->cmdl);
451 nexterror();
452 }
453 for(i = 0; i < 15; i++){
454 if(cmd->i[1] != Noconf){
455 poperror();
456 i = gbit32(cmd->c);
457 qunlock(&c->cmdl);
458 if(cmd->i[1] != 0)
459 dprint("[%lux]", i);
460 return i; /* normal return */
461 }
462 tsleep(&up->sleep, return0, 0, 1);
463 }
464 iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n",
465 cmd->i[0], cmd->i[1], type);
466 error(Etimeout);
467 return ~0; /* silence! */
468 }
469
470 ulong
maccmd(Ctlr * c,int type,uchar * m)471 maccmd(Ctlr *c, int type, uchar *m)
472 {
473 ulong buf[16], i;
474 Cmd *cmd;
475
476 qlock(&c->cmdl);
477 cmd = c->cmd;
478 cmd->i[1] = Noconf;
479 memset(buf, 0, sizeof buf);
480 buf[0] = type;
481 buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3];
482 buf[2] = m[4]<< 8 | m[5];
483 buf[4] = c->cprt >> 32;
484 buf[5] = c->cprt;
485 prepcmd(buf, 6);
486 coherence();
487 memmove(c->ram + Cmdoff, buf, sizeof buf);
488
489 if(waserror()){
490 qunlock(&c->cmdl);
491 nexterror();
492 }
493 for(i = 0; i < 15; i++){
494 if(cmd->i[1] != Noconf){
495 poperror();
496 i = gbit32(cmd->c);
497 qunlock(&c->cmdl);
498 if(cmd->i[1] != 0)
499 dprint("[%lux]", i);
500 return i; /* normal return */
501 }
502 tsleep(&up->sleep, return0, 0, 1);
503 }
504 iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n",
505 cmd->i[0], cmd->i[1], type);
506 error(Etimeout);
507 return ~0; /* silence! */
508 }
509
510 /* remove this garbage after testing */
511 enum {
512 DMAread = 0x10000,
513 DMAwrite= 0x1,
514 };
515
516 ulong
dmatestcmd(Ctlr * c,int type,uvlong addr,int len)517 dmatestcmd(Ctlr *c, int type, uvlong addr, int len)
518 {
519 ulong buf[16], i;
520
521 memset(buf, 0, sizeof buf);
522 memset(c->cmd, Noconf, sizeof *c->cmd);
523 buf[0] = Cdmatest;
524 buf[1] = addr;
525 buf[2] = addr >> 32;
526 buf[3] = len * type;
527 buf[4] = c->cprt >> 32;
528 buf[5] = c->cprt;
529 prepcmd(buf, 6);
530 coherence();
531 memmove(c->ram + Cmdoff, buf, sizeof buf);
532
533 for(i = 0; i < 15; i++){
534 if(c->cmd->i[1] != Noconf){
535 i = gbit32(c->cmd->c);
536 if(i == 0)
537 error(Eio);
538 return i; /* normal return */
539 }
540 tsleep(&up->sleep, return0, 0, 5);
541 }
542 error(Etimeout);
543 return ~0; /* silence! */
544 }
545
546 ulong
rdmacmd(Ctlr * c,int on)547 rdmacmd(Ctlr *c, int on)
548 {
549 ulong buf[16], i;
550
551 memset(buf, 0, sizeof buf);
552 c->cmd->i[0] = 0;
553 coherence();
554 buf[0] = c->cprt >> 32;
555 buf[1] = c->cprt;
556 buf[2] = Noconf;
557 buf[3] = c->cprt >> 32;
558 buf[4] = c->cprt;
559 buf[5] = on;
560 prepcmd(buf, 6);
561 memmove(c->ram + Rdmaoff, buf, sizeof buf);
562
563 for(i = 0; i < 20; i++){
564 if(c->cmd->i[0] == Noconf)
565 return gbit32(c->cmd->c); /* normal return */
566 tsleep(&up->sleep, return0, 0, 1);
567 }
568 iprint("m10g: rdmacmd timeout\n");
569 error(Etimeout);
570 return ~0; /* silence! */
571 }
572
573 static int
loadfw(Ctlr * c,int * align)574 loadfw(Ctlr *c, int *align)
575 {
576 ulong *f, *s, sz;
577 int i;
578
579 if((*align = whichfw(c->pcidev)) == 4*KiB){
580 f = (ulong*)fw4k;
581 sz = sizeof fw4k;
582 }else{
583 f = (ulong*)fw2k;
584 sz = sizeof fw2k;
585 }
586
587 s = (ulong*)(c->ram + Fwoffset);
588 for(i = 0; i < sz / 4; i++)
589 s[i] = f[i];
590 return sz & ~3;
591 }
592
593 static int
bootfw(Ctlr * c)594 bootfw(Ctlr *c)
595 {
596 int i, sz, align;
597 ulong buf[16];
598 Cmd* cmd;
599
600 if((sz = loadfw(c, &align)) == 0)
601 return 0;
602 dprint("bootfw %d bytes ... ", sz);
603 cmd = c->cmd;
604
605 memset(buf, 0, sizeof buf);
606 c->cmd->i[0] = 0;
607 coherence();
608 buf[0] = c->cprt >> 32; /* upper dma target address */
609 buf[1] = c->cprt; /* lower */
610 buf[2] = Noconf; /* writeback */
611 buf[3] = Fwoffset + 8,
612 buf[4] = sz - 8;
613 buf[5] = 8;
614 buf[6] = 0;
615 prepcmd(buf, 7);
616 coherence();
617 memmove(c->ram + Fwsubmt, buf, sizeof buf);
618
619 for(i = 0; i < 20; i++){
620 if(cmd->i[0] == Noconf)
621 break;
622 delay(1);
623 }
624 dprint("[%lux %lux]", gbit32(cmd->c), gbit32(cmd->c+4));
625 if(i == 20){
626 print("m10g: cannot load fw\n");
627 return -1;
628 }
629 dprint("\n");
630 c->tx.segsz = align;
631 return 0;
632 }
633
634 static int
kickthebaby(Pcidev * p,Ctlr * c)635 kickthebaby(Pcidev *p, Ctlr *c)
636 {
637 /* don't kick the baby! */
638 ulong code;
639
640 pcicfgw8(p, 0x10 + c->boot, 0x3);
641 pcicfgw32(p, 0x18 + c->boot, 0xfffffff0);
642 code = pcicfgr32(p, 0x14 + c->boot);
643
644 dprint("reboot status = %lux\n", code);
645 if(code != 0xfffffff0)
646 return -1;
647 return 0;
648 }
649
650 typedef struct {
651 uchar len[4];
652 uchar type[4];
653 char version[128];
654 uchar globals[4];
655 uchar ramsz[4];
656 uchar specs[4];
657 uchar specssz[4];
658 } Fwhdr;
659
660 enum {
661 Tmx = 0x4d582020,
662 Tpcie = 0x70636965,
663 Teth = 0x45544820,
664 Tmcp0 = 0x4d435030,
665 };
666
667 static char *
fwtype(ulong type)668 fwtype(ulong type)
669 {
670 switch(type){
671 case Tmx:
672 return "mx";
673 case Tpcie:
674 return "PCIe";
675 case Teth:
676 return "eth";
677 case Tmcp0:
678 return "mcp0";
679 }
680 return "*GOK*";
681 }
682
683 static int
chkfw(Ctlr * c)684 chkfw(Ctlr *c)
685 {
686 ulong off, type;
687 Fwhdr *h;
688
689 off = gbit32(c->ram+0x3c);
690 dprint("firmware %lux\n", off);
691 if((off&3) || off + sizeof *h > c->ramsz){
692 print("!m10g: bad firmware %lux\n", off);
693 return -1;
694 }
695 h = (Fwhdr*)(c->ram + off);
696 type = gbit32(h->type);
697 dprint("\t" "type %s\n", fwtype(type));
698 dprint("\t" "vers %s\n", h->version);
699 dprint("\t" "ramsz %lux\n", gbit32(h->ramsz));
700 if(type != Teth){
701 print("!m10g: bad card type %s\n", fwtype(type));
702 return -1;
703 }
704
705 return bootfw(c) || rdmacmd(c, 0);
706 }
707
708 static int
reset(Ether * e,Ctlr * c)709 reset(Ether *e, Ctlr *c)
710 {
711 ulong i, sz;
712
713 if(waserror()){
714 print("m10g: reset error\n");
715 nexterror();
716 return -1;
717 }
718
719 chkfw(c);
720 cmd(c, Creset, 0);
721
722 cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry);
723 cmd(c, CSintrqdma, c->done.busaddr);
724 c->irqack = (ulong*)(c->ram + cmd(c, CGirqackoff, 0));
725 /* required only if we're not doing msi? */
726 c->irqdeass = (ulong*)(c->ram + cmd(c, CGirqdeassoff, 0));
727 /* this is the driver default, why fiddle with this? */
728 c->coal = (ulong*)(c->ram + cmd(c, CGcoaloff, 0));
729 *c->coal = pbit32(25);
730
731 dprint("dma stats:\n");
732 rdmacmd(c, 1);
733 sz = c->tx.segsz;
734 i = dmatestcmd(c, DMAread, c->done.busaddr, sz);
735 print("m10g: read %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff));
736 i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz);
737 print(" write %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff));
738 i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz);
739 print(" r/w %lud MB/s\n", ((i>>16)*sz*2*2) / (i&0xffff));
740 memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry);
741
742 maccmd(c, CSmac, c->ra);
743 // cmd(c, Cnopromisc, 0);
744 cmd(c, Cenablefc, 0);
745 e->maxmtu = Maxmtu;
746 cmd(c, CSmtu, e->maxmtu);
747 dprint("CSmtu %d...\n", e->maxmtu);
748
749 poperror();
750 return 0;
751 }
752
753 static void
ctlrfree(Ctlr * c)754 ctlrfree(Ctlr *c)
755 {
756 /* free up all the Block*s, too */
757 free(c->tx.host);
758 free(c->sm.host);
759 free(c->bg.host);
760 free(c->cmd);
761 free(c->done.entry);
762 free(c->stats);
763 free(c);
764 }
765
766 static int
setmem(Pcidev * p,Ctlr * c)767 setmem(Pcidev *p, Ctlr *c)
768 {
769 ulong i;
770 uvlong raddr;
771 Done *d;
772 void *mem;
773
774 c->tx.segsz = 2048;
775 c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100;
776 if(c->ramsz > p->mem[0].size)
777 return -1;
778
779 raddr = p->mem[0].bar & ~0x0F;
780 mem = vmap(raddr, p->mem[0].size);
781 if(mem == nil){
782 print("m10g: can't map %8.8lux\n", p->mem[0].bar);
783 return -1;
784 }
785 dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size);
786 c->port = raddr;
787 c->ram = mem;
788 c->cmd = malign(sizeof *c->cmd);
789 c->cprt = PCIWADDR(c->cmd);
790
791 d = &c->done;
792 d->n = Maxslots;
793 d->m = d->n - 1;
794 i = d->n * sizeof *d->entry;
795 d->entry = malign(i);
796 memset(d->entry, 0, i);
797 d->busaddr = PCIWADDR(d->entry);
798
799 c->stats = malign(sizeof *c->stats);
800 memset(c->stats, 0, sizeof *c->stats);
801 c->statsprt = PCIWADDR(c->stats);
802
803 memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2);
804 return setpcie(p) || parseeprom(c);
805 }
806
807 static Rx*
whichrx(Ctlr * c,int sz)808 whichrx(Ctlr *c, int sz)
809 {
810 if(sz <= smpool.size)
811 return &c->sm;
812 return &c->bg;
813 }
814
815 static Block*
balloc(Rx * rx)816 balloc(Rx* rx)
817 {
818 Block *bp;
819
820 ilock(rx->pool);
821 if((bp = rx->pool->head) != nil){
822 rx->pool->head = bp->next;
823 bp->next = nil;
824 ainc(&bp->ref); /* prevent bp from being freed */
825 rx->pool->n--;
826 }
827 iunlock(rx->pool);
828 return bp;
829 }
830
831 static void
rbfree(Block * b,Bpool * p)832 rbfree(Block *b, Bpool *p)
833 {
834 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
835 b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
836
837 ilock(p);
838 b->next = p->head;
839 p->head = b;
840 p->n++;
841 p->cnt++;
842 iunlock(p);
843 }
844
845 static void
smbfree(Block * b)846 smbfree(Block *b)
847 {
848 rbfree(b, &smpool);
849 }
850
851 static void
bgbfree(Block * b)852 bgbfree(Block *b)
853 {
854 rbfree(b, &bgpool);
855 }
856
857 static void
replenish(Rx * rx)858 replenish(Rx *rx)
859 {
860 ulong buf[16], i, idx, e;
861 Bpool *p;
862 Block *b;
863
864 p = rx->pool;
865 if(p->n < 8)
866 return;
867 memset(buf, 0, sizeof buf);
868 e = (rx->i - rx->cnt) & ~7;
869 e += rx->n;
870 while(p->n >= 8 && e){
871 idx = rx->cnt & rx->m;
872 for(i = 0; i < 8; i++){
873 b = balloc(rx);
874 buf[i*2] = pbit32((uvlong)PCIWADDR(b->wp) >> 32);
875 buf[i*2+1] = pbit32(PCIWADDR(b->wp));
876 rx->host[idx+i] = b;
877 assert(b);
878 }
879 memmove(rx->lanai + 2*idx, buf, sizeof buf);
880 coherence();
881 rx->cnt += 8;
882 e -= 8;
883 }
884 if(e && p->n > 7+1)
885 print("m10g: should panic? pool->n = %d\n", p->n);
886 }
887
888 /*
889 * future:
890 * if (c->mtrr >= 0) {
891 * c->tx.wcfifo = c->ram+0x200000;
892 * c->sm.wcfifo = c->ram+0x300000;
893 * c->bg.wcfifo = c->ram+0x340000;
894 * }
895 */
896
897 static int
nextpow(int j)898 nextpow(int j)
899 {
900 int i;
901
902 for(i = 0; j > (1 << i); i++)
903 ;
904 return 1 << i;
905 }
906
907 static void*
emalign(int sz)908 emalign(int sz)
909 {
910 void *v;
911
912 v = malign(sz);
913 if(v == nil)
914 error(Enomem);
915 memset(v, 0, sz);
916 return v;
917 }
918
919 static void
open0(Ether * e,Ctlr * c)920 open0(Ether *e, Ctlr *c)
921 {
922 Block *b;
923 int i, sz, entries;
924
925 entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai;
926 c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0));
927 c->tx.host = emalign(entries * sizeof *c->tx.host);
928 c->tx.bring = emalign(entries * sizeof *c->tx.bring);
929 c->tx.n = entries;
930 c->tx.m = entries-1;
931
932 entries = cmd(c, CGrxrgsz, 0)/8;
933 c->sm.pool = &smpool;
934 cmd(c, CSsmallsz, c->sm.pool->size);
935 c->sm.lanai = (ulong*)(c->ram + cmd(c, CGsmallrxoff, 0));
936 c->sm.n = entries;
937 c->sm.m = entries-1;
938 c->sm.host = emalign(entries * sizeof *c->sm.host);
939
940 c->bg.pool = &bgpool;
941 c->bg.pool->size = nextpow(2 + e->maxmtu); /* 2-byte alignment pad */
942 cmd(c, CSbigsz, c->bg.pool->size);
943 c->bg.lanai = (ulong*)(c->ram + cmd(c, CGbigrxoff, 0));
944 c->bg.n = entries;
945 c->bg.m = entries-1;
946 c->bg.host = emalign(entries * sizeof *c->bg.host);
947
948 sz = c->sm.pool->size + BY2PG;
949 for(i = 0; i < c->sm.n; i++){
950 if((b = allocb(sz)) == 0)
951 break;
952 b->free = smbfree;
953 freeb(b);
954 }
955 sz = c->bg.pool->size + BY2PG;
956 for(i = 0; i < c->bg.n; i++){
957 if((b = allocb(sz)) == 0)
958 break;
959 b->free = bgbfree;
960 freeb(b);
961 }
962
963 cmd(c, CSstatsdma, c->statsprt);
964 c->linkstat = ~0;
965 c->nrdma = 15;
966
967 cmd(c, Cetherup, 0);
968 }
969
970 static Block*
nextblock(Ctlr * c)971 nextblock(Ctlr *c)
972 {
973 uint i;
974 ushort l, k;
975 Block *b;
976 Done *d;
977 Rx *rx;
978 Slot *s;
979 Slotparts *sp;
980
981 d = &c->done;
982 s = d->entry;
983 i = d->i & d->m;
984 sp = (Slotparts *)(s + i);
985 l = sp->len;
986 if(l == 0)
987 return 0;
988 k = sp->cksum;
989 s[i] = 0;
990 d->i++;
991 l = gbit16((uchar*)&l);
992 //dprint("nextb: i=%d l=%d\n", d->i, l);
993 rx = whichrx(c, l);
994 if(rx->i >= rx->cnt){
995 iprint("m10g: overrun\n");
996 return 0;
997 }
998 i = rx->i & rx->m;
999 b = rx->host[i];
1000 rx->host[i] = 0;
1001 if(b == 0){
1002 iprint("m10g: error rx to no block. memory is hosed.\n");
1003 return 0;
1004 }
1005 rx->i++;
1006
1007 b->flag |= Bipck|Btcpck|Budpck;
1008 b->checksum = k;
1009 b->rp += 2;
1010 b->wp += 2+l;
1011 b->lim = b->wp; /* lie like a dog. */
1012 return b;
1013 }
1014
1015 static int
rxcansleep(void * v)1016 rxcansleep(void *v)
1017 {
1018 Ctlr *c;
1019 Slot *s;
1020 Slotparts *sp;
1021 Done *d;
1022
1023 c = v;
1024 d = &c->done;
1025 s = c->done.entry;
1026 sp = (Slotparts *)(s + (d->i & d->m));
1027 if(sp->len != 0)
1028 return -1;
1029 c->irqack[0] = pbit32(3);
1030 return 0;
1031 }
1032
1033 static void
m10rx(void * v)1034 m10rx(void *v)
1035 {
1036 Ether *e;
1037 Ctlr *c;
1038 Block *b;
1039
1040 e = v;
1041 c = e->ctlr;
1042 for(;;){
1043 replenish(&c->sm);
1044 replenish(&c->bg);
1045 sleep(&c->rxrendez, rxcansleep, c);
1046 while(b = nextblock(c))
1047 etheriq(e, b, 1);
1048 }
1049 }
1050
1051 static void
txcleanup(Tx * tx,ulong n)1052 txcleanup(Tx *tx, ulong n)
1053 {
1054 Block *b;
1055 uint j, l, m;
1056
1057 if(tx->npkt == n)
1058 return;
1059 l = 0;
1060 m = tx->m;
1061 /*
1062 * if tx->cnt == tx->i, yet tx->npkt == n-1, we just
1063 * caught ourselves and myricom card updating.
1064 */
1065 for(;; tx->cnt++){
1066 j = tx->cnt & tx->m;
1067 if(b = tx->bring[j]){
1068 tx->bring[j] = 0;
1069 tx->nbytes += BLEN(b);
1070 freeb(b);
1071 if(++tx->npkt == n)
1072 return;
1073 }
1074 if(tx->cnt == tx->i)
1075 return;
1076 if(l++ == m){
1077 iprint("m10g: tx ovrun: %lud %lud\n", n, tx->npkt);
1078 return;
1079 }
1080 }
1081 }
1082
1083 static int
txcansleep(void * v)1084 txcansleep(void *v)
1085 {
1086 Ctlr *c;
1087
1088 c = v;
1089 if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt))
1090 return -1;
1091 return 0;
1092 }
1093
1094 static void
txproc(void * v)1095 txproc(void *v)
1096 {
1097 Ether *e;
1098 Ctlr *c;
1099 Tx *tx;
1100
1101 e = v;
1102 c = e->ctlr;
1103 tx = &c->tx;
1104 for(;;){
1105 sleep(&c->txrendez, txcansleep, c);
1106 txcleanup(tx, gbit32(c->stats->txcnt));
1107 }
1108 }
1109
1110 static void
submittx(Tx * tx,int n)1111 submittx(Tx *tx, int n)
1112 {
1113 Send *l, *h;
1114 int i0, i, m;
1115
1116 m = tx->m;
1117 i0 = tx->i & m;
1118 l = tx->lanai;
1119 h = tx->host;
1120 for(i = n-1; i >= 0; i--)
1121 memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h);
1122 tx->i += n;
1123 // coherence();
1124 }
1125
1126 static int
nsegments(Block * b,int segsz)1127 nsegments(Block *b, int segsz)
1128 {
1129 uintptr bus, end, slen, len;
1130 int i;
1131
1132 bus = PCIWADDR(b->rp);
1133 i = 0;
1134 for(len = BLEN(b); len; len -= slen){
1135 end = bus + segsz & ~(segsz-1);
1136 slen = end - bus;
1137 if(slen > len)
1138 slen = len;
1139 bus += slen;
1140 i++;
1141 }
1142 return i;
1143 }
1144
1145 static void
m10gtransmit(Ether * e)1146 m10gtransmit(Ether *e)
1147 {
1148 ushort slen;
1149 ulong i, cnt, rdma, nseg, count, end, bus, len, segsz;
1150 uchar flags;
1151 Block *b;
1152 Ctlr *c;
1153 Send *s, *s0, *s0m8;
1154 Tx *tx;
1155
1156 c = e->ctlr;
1157 tx = &c->tx;
1158 segsz = tx->segsz;
1159
1160 qlock(tx);
1161 count = 0;
1162 s = tx->host + (tx->i & tx->m);
1163 cnt = tx->cnt;
1164 s0 = tx->host + (cnt & tx->m);
1165 s0m8 = tx->host + ((cnt - 8) & tx->m);
1166 i = tx->i;
1167 for(; s >= s0 || s < s0m8; i += nseg){
1168 if((b = qget(e->oq)) == nil)
1169 break;
1170 flags = SFfirst|SFnotso;
1171 if((len = BLEN(b)) < 1520)
1172 flags |= SFsmall;
1173 rdma = nseg = nsegments(b, segsz);
1174 bus = PCIWADDR(b->rp);
1175 for(; len; len -= slen){
1176 end = (bus + segsz) & ~(segsz-1);
1177 slen = end - bus;
1178 if(slen > len)
1179 slen = len;
1180 s->low = pbit32(bus);
1181 s->len = pbit16(slen);
1182 s->nrdma = rdma;
1183 s->flags = flags;
1184
1185 bus += slen;
1186 if(++s == tx->host + tx->n)
1187 s = tx->host;
1188 count++;
1189 flags &= ~SFfirst;
1190 rdma = 1;
1191 }
1192 tx->bring[(i + nseg - 1) & tx->m] = b;
1193 if(1 || count > 0){
1194 submittx(tx, count);
1195 count = 0;
1196 cnt = tx->cnt;
1197 s0 = tx->host + (cnt & tx->m);
1198 s0m8 = tx->host + ((cnt - 8) & tx->m);
1199 }
1200 }
1201 qunlock(tx);
1202 }
1203
1204 static void
checkstats(Ether * e,Ctlr * c,Stats * s)1205 checkstats(Ether *e, Ctlr *c, Stats *s)
1206 {
1207 ulong i;
1208
1209 if(s->updated == 0)
1210 return;
1211
1212 i = gbit32(s->linkstat);
1213 if(c->linkstat != i){
1214 e->link = i;
1215 if(c->linkstat = i)
1216 dprint("m10g: link up\n");
1217 else
1218 dprint("m10g: link down\n");
1219 }
1220 i = gbit32(s->nrdma);
1221 if(i != c->nrdma){
1222 dprint("m10g: rdma timeout %ld\n", i);
1223 c->nrdma = i;
1224 }
1225 }
1226
1227 static void
waitintx(Ctlr * c)1228 waitintx(Ctlr *c)
1229 {
1230 int i;
1231
1232 for(i = 0; i < 1024*1024; i++){
1233 if(c->stats->valid == 0)
1234 break;
1235 coherence();
1236 }
1237 }
1238
1239 static void
m10ginterrupt(Ureg *,void * v)1240 m10ginterrupt(Ureg *, void *v)
1241 {
1242 Ether *e;
1243 Ctlr *c;
1244
1245 e = v;
1246 c = e->ctlr;
1247
1248 if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */
1249 return;
1250
1251 if(c->stats->valid & 1)
1252 wakeup(&c->rxrendez);
1253 if(gbit32(c->stats->txcnt) != c->tx.npkt)
1254 wakeup(&c->txrendez);
1255 if(c->msi == 0)
1256 *c->irqdeass = 0;
1257 else
1258 c->stats->valid = 0;
1259 waitintx(c);
1260 checkstats(e, c, c->stats);
1261 c->irqack[1] = pbit32(3);
1262 }
1263
1264 static void
m10gattach(Ether * e)1265 m10gattach(Ether *e)
1266 {
1267 Ctlr *c;
1268 char name[12];
1269
1270 dprint("m10gattach\n");
1271
1272 qlock(e->ctlr);
1273 c = e->ctlr;
1274 if(c->state != Detached){
1275 qunlock(c);
1276 return;
1277 }
1278 if(waserror()){
1279 c->state = Detached;
1280 qunlock(c);
1281 nexterror();
1282 }
1283 reset(e, c);
1284 c->state = Attached;
1285 open0(e, c);
1286 if(c->kprocs == 0){
1287 c->kprocs++;
1288 snprint(name, sizeof name, "#l%drxproc", e->ctlrno);
1289 kproc(name, m10rx, e);
1290 snprint(name, sizeof name, "#l%dtxproc", e->ctlrno);
1291 kproc(name, txproc, e);
1292 }
1293 c->state = Runed;
1294 qunlock(c);
1295 poperror();
1296 }
1297
1298 static int
m10gdetach(Ctlr * c)1299 m10gdetach(Ctlr *c)
1300 {
1301 dprint("m10gdetach\n");
1302 // reset(e->ctlr);
1303 vunmap(c->ram, c->pcidev->mem[0].size);
1304 ctlrfree(c); /* this is a bad idea: don't free c */
1305 return -1;
1306 }
1307
1308 static int
lstcount(Block * b)1309 lstcount(Block *b)
1310 {
1311 int i;
1312
1313 i = 0;
1314 for(; b; b = b->next)
1315 i++;
1316 return i;
1317 }
1318
1319 static long
m10gifstat(Ether * e,void * v,long n,ulong off)1320 m10gifstat(Ether *e, void *v, long n, ulong off)
1321 {
1322 char *p;
1323 Ctlr *c;
1324 Stats s;
1325
1326 c = e->ctlr;
1327 p = malloc(READSTR+1);
1328 if(p == nil)
1329 error(Enomem);
1330 /* no point in locking this because this is done via dma. */
1331 memmove(&s, c->stats, sizeof s);
1332
1333 snprint(p, READSTR,
1334 "txcnt = %lud\n" "linkstat = %lud\n" "dlink = %lud\n"
1335 "derror = %lud\n" "drunt = %lud\n" "doverrun = %lud\n"
1336 "dnosm = %lud\n" "dnobg = %lud\n" "nrdma = %lud\n"
1337 "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n"
1338 "valid = %ud\n\n"
1339 "tx pkt = %lud\n" "tx bytes = %lld\n"
1340 "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n"
1341 "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n"
1342 "sm lst = %ud\n"
1343 "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n"
1344 "bg lst = %ud\n"
1345 "segsz = %lud\n" "coal = %lud\n",
1346 gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink),
1347 gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun),
1348 gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma),
1349 s.txstopped, s.down, s.updated, s.valid,
1350 c->tx.npkt, c->tx.nbytes,
1351 c->tx.cnt, c->tx.n, c->tx.i,
1352 c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head),
1353 c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head),
1354 c->tx.segsz, gbit32((uchar*)c->coal));
1355
1356 n = readstr(off, v, n, p);
1357 free(p);
1358 return n;
1359 }
1360
1361 //static void
1362 //summary(Ether *e)
1363 //{
1364 // char *buf;
1365 // int n, i, j;
1366 //
1367 // if(e == 0)
1368 // return;
1369 // buf = malloc(n=250);
1370 // if(buf == 0)
1371 // return;
1372 //
1373 // snprint(buf, n, "oq\n");
1374 // qsummary(e->oq, buf+3, n-3-1);
1375 // iprint("%s", buf);
1376 //
1377 // if(e->f) for(i = 0; e->f[i]; i++){
1378 // j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type);
1379 // qsummary(e->f[i]->in, buf+j, n-j-1);
1380 // print("%s", buf);
1381 // }
1382 //
1383 // free(buf);
1384 //}
1385
1386 static void
rxring(Ctlr * c)1387 rxring(Ctlr *c)
1388 {
1389 Done *d;
1390 Slot *s;
1391 Slotparts *sp;
1392 int i;
1393
1394 d = &c->done;
1395 s = d->entry;
1396 for(i = 0; i < d->n; i++) {
1397 sp = (Slotparts *)(s + i);
1398 if(sp->len)
1399 iprint("s[%d] = %d\n", i, sp->len);
1400 }
1401 }
1402
1403 enum {
1404 CMdebug,
1405 CMcoal,
1406 CMwakeup,
1407 CMtxwakeup,
1408 CMqsummary,
1409 CMrxring,
1410 };
1411
1412 static Cmdtab ctab[] = {
1413 CMdebug, "debug", 2,
1414 CMcoal, "coal", 2,
1415 CMwakeup, "wakeup", 1,
1416 CMtxwakeup, "txwakeup", 1,
1417 // CMqsummary, "q", 1,
1418 CMrxring, "rxring", 1,
1419 };
1420
1421 static long
m10gctl(Ether * e,void * v,long n)1422 m10gctl(Ether *e, void *v, long n)
1423 {
1424 int i;
1425 Cmdbuf *c;
1426 Cmdtab *t;
1427
1428 dprint("m10gctl\n");
1429 if(e->ctlr == nil)
1430 error(Enonexist);
1431
1432 c = parsecmd(v, n);
1433 if(waserror()){
1434 free(c);
1435 nexterror();
1436 }
1437 t = lookupcmd(c, ctab, nelem(ctab));
1438 switch(t->index){
1439 case CMdebug:
1440 debug = (strcmp(c->f[1], "on") == 0);
1441 break;
1442 case CMcoal:
1443 i = atoi(c->f[1]);
1444 if(i < 0 || i > 1000)
1445 error(Ebadarg);
1446 *((Ctlr*)e->ctlr)->coal = pbit32(i);
1447 break;
1448 case CMwakeup:
1449 wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */
1450 break;
1451 case CMtxwakeup:
1452 wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */
1453 break;
1454 // case CMqsummary:
1455 // summary(e);
1456 // break;
1457 case CMrxring:
1458 rxring(e->ctlr);
1459 break;
1460 default:
1461 error(Ebadarg);
1462 }
1463 free(c);
1464 poperror();
1465 return n;
1466 }
1467
1468 static void
m10gshutdown(Ether * e)1469 m10gshutdown(Ether *e)
1470 {
1471 dprint("m10gshutdown\n");
1472 m10gdetach(e->ctlr);
1473 }
1474
1475 static void
m10gpromiscuous(void * v,int on)1476 m10gpromiscuous(void *v, int on)
1477 {
1478 Ether *e;
1479 int i;
1480
1481 dprint("m10gpromiscuous\n");
1482 e = v;
1483 if(on)
1484 i = Cpromisc;
1485 else
1486 i = Cnopromisc;
1487 cmd(e->ctlr, i, 0);
1488 }
1489
1490 static int mcctab[] = { CSleavemc, CSjoinmc };
1491 static char *mcntab[] = { "leave", "join" };
1492
1493 static void
m10gmulticast(void * v,uchar * ea,int on)1494 m10gmulticast(void *v, uchar *ea, int on)
1495 {
1496 Ether *e;
1497 int i;
1498
1499 dprint("m10gmulticast\n");
1500 e = v;
1501 if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0)
1502 print("m10g: can't %s %E: %d\n", mcntab[on], ea, i);
1503 }
1504
1505 static void
m10gpci(void)1506 m10gpci(void)
1507 {
1508 Pcidev *p;
1509 Ctlr *t, *c;
1510
1511 t = 0;
1512 for(p = 0; p = pcimatch(p, Vmyricom, 0); ){
1513 switch(p->did){
1514 case 0x8: /* 8a */
1515 break;
1516 case 0x9: /* 8a with msi-x fw */
1517 case 0xa: /* 8b */
1518 case 0xb: /* 8b2 */
1519 case 0xc: /* 2-8b2 */
1520 /* untested */
1521 break;
1522 default:
1523 print("etherm10g: unknown myricom did %#ux\n", p->did);
1524 continue;
1525 }
1526 c = malloc(sizeof *c);
1527 if(c == nil)
1528 break;
1529 c->pcidev = p;
1530 c->id = p->did<<16 | p->vid;
1531 c->boot = pcicap(p, PciCapVND);
1532 // kickthebaby(p, c);
1533 pcisetbme(p);
1534 if(setmem(p, c) == -1){
1535 print("m10g: setmem failed\n");
1536 free(c);
1537 /* cleanup */
1538 continue;
1539 }
1540 if(t)
1541 t->next = c;
1542 else
1543 ctlrs = c;
1544 t = c;
1545 }
1546 }
1547
1548 static int
m10gpnp(Ether * e)1549 m10gpnp(Ether *e)
1550 {
1551 Ctlr *c;
1552
1553 if(ctlrs == nil)
1554 m10gpci();
1555
1556 for(c = ctlrs; c != nil; c = c->next)
1557 if(c->active)
1558 continue;
1559 else if(e->port == 0 || e->port == c->port)
1560 break;
1561 if(c == nil)
1562 return -1;
1563 c->active = 1;
1564
1565 e->ctlr = c;
1566 e->port = c->port;
1567 e->irq = c->pcidev->intl;
1568 e->tbdf = c->pcidev->tbdf;
1569 e->mbps = 10000;
1570 memmove(e->ea, c->ra, Eaddrlen);
1571
1572 e->attach = m10gattach;
1573 e->detach = m10gshutdown;
1574 e->transmit = m10gtransmit;
1575 e->interrupt = m10ginterrupt;
1576 e->ifstat = m10gifstat;
1577 e->ctl = m10gctl;
1578 e->shutdown = m10gshutdown;
1579
1580 e->arg = e;
1581 e->promiscuous = m10gpromiscuous;
1582 e->multicast = m10gmulticast;
1583 return 0;
1584 }
1585
1586 void
etherm10glink(void)1587 etherm10glink(void)
1588 {
1589 addethercard("m10g", m10gpnp);
1590 }
1591