1 /* 2 * myricom 10g-pcie-8a 10 Gb ethernet driver 3 * © 2007 erik quanstrom, coraid 4 * 5 * the card is big endian. 6 * we use uvlong rather than uintptr to hold addresses so that 7 * we don't get "warning: stupid shift" on 32-bit architectures. 8 */ 9 #include "u.h" 10 #include "../port/lib.h" 11 #include "mem.h" 12 #include "dat.h" 13 #include "fns.h" 14 #include "io.h" 15 #include "../port/error.h" 16 #include "../port/netif.h" 17 18 #include "../pc/etherif.h" 19 20 #ifndef KiB 21 #define KiB 1024u /* Kibi 0x0000000000000400 */ 22 #define MiB 1048576u /* Mebi 0x0000000000100000 */ 23 #endif /* KiB */ 24 25 #define dprint(...) if(debug) print(__VA_ARGS__) 26 #define pcicapdbg(...) 27 #define malign(n) mallocalign((n), 4*KiB, 0, 0) 28 29 #include "etherm10g2k.i" 30 #include "etherm10g4k.i" 31 32 static int debug = 0; 33 static char Etimeout[] = "timeout"; 34 35 enum { 36 Epromsz = 256, 37 Maxslots= 1024, 38 Align = 4096, 39 Maxmtu = 9000, 40 Noconf = 0xffffffff, 41 42 Fwoffset= 1*MiB, 43 Cmdoff = 0xf80000, /* command port offset */ 44 Fwsubmt = 0xfc0000, /* firmware submission command port offset */ 45 Rdmaoff = 0xfc01c0, /* rdma command port offset */ 46 }; 47 48 enum { 49 CZero, 50 Creset, 51 Cversion, 52 53 CSintrqdma, /* issue these before Cetherup */ 54 CSbigsz, /* in bytes bigsize = 2^n */ 55 CSsmallsz, 56 57 CGsendoff, 58 CGsmallrxoff, 59 CGbigrxoff, 60 CGirqackoff, 61 CGirqdeassoff, 62 CGsendrgsz, 63 CGrxrgsz, 64 65 CSintrqsz, /* 2^n */ 66 Cetherup, /* above parameters + mtu/mac addr must be set first. */ 67 Cetherdn, 68 69 CSmtu, /* below may be issued live */ 70 CGcoaloff, /* in µs */ 71 CSstatsrate, /* in µs */ 72 CSstatsdma, 73 74 Cpromisc, 75 Cnopromisc, 76 CSmac, 77 78 Cenablefc, 79 Cdisablefc, 80 81 Cdmatest, /* address in d[0-1], d[2]=length */ 82 83 Cenableallmc, 84 Cdisableallmc, 85 86 CSjoinmc, 87 CSleavemc, 88 Cleaveallmc, 89 90 CSstatsdma2, /* adds (unused) multicast stats */ 91 }; 92 93 typedef union { 94 uint i[2]; 95 uchar c[8]; 96 } Cmd; 97 98 typedef ulong Slot; 99 typedef struct { 100 ushort cksum; 101 ushort len; 102 } Slotparts; 103 104 enum { 105 SFsmall = 1, 106 SFfirst = 2, 107 SFalign = 4, 108 SFnotso = 16, 109 }; 110 111 typedef struct { 112 ulong high; 113 ulong low; 114 ushort hdroff; 115 ushort len; 116 uchar pad; 117 uchar nrdma; 118 uchar chkoff; 119 uchar flags; 120 } Send; 121 122 typedef struct { 123 QLock; 124 Send *lanai; /* tx ring (cksum+len in lanai memory) */ 125 Send *host; /* tx ring (data in our memory) */ 126 Block **bring; 127 // uchar *wcfifo; /* what the heck is a w/c fifo? */ 128 int size; /* of buffers in the z8's memory */ 129 ulong segsz; 130 uint n; /* rxslots */ 131 uint m; /* mask; rxslots must be a power of two */ 132 uint i; /* number of segments (not frames) queued */ 133 uint cnt; /* number of segments sent by the card */ 134 135 ulong npkt; 136 vlong nbytes; 137 } Tx; 138 139 typedef struct { 140 Lock; 141 Block *head; 142 uint size; /* buffer size of each block */ 143 uint n; /* n free buffers */ 144 uint cnt; 145 } Bpool; 146 147 static Bpool smpool = { .size = 128, }; 148 static Bpool bgpool = { .size = Maxmtu, }; 149 150 typedef struct { 151 Bpool *pool; /* free buffers */ 152 ulong *lanai; /* rx ring; we have no permanent host shadow */ 153 Block **host; /* called "info" in myricom driver */ 154 // uchar *wcfifo; /* cmd submission fifo */ 155 uint m; 156 uint n; /* rxslots */ 157 uint i; 158 uint cnt; /* number of buffers allocated (lifetime) */ 159 uint allocfail; 160 } Rx; 161 162 /* dma mapped. unix network byte order. */ 163 typedef struct { 164 uchar txcnt[4]; 165 uchar linkstat[4]; 166 uchar dlink[4]; 167 uchar derror[4]; 168 uchar drunt[4]; 169 uchar doverrun[4]; 170 uchar dnosm[4]; 171 uchar dnobg[4]; 172 uchar nrdma[4]; 173 uchar txstopped; 174 uchar down; 175 uchar updated; 176 uchar valid; 177 } Stats; 178 179 enum { 180 Detached, 181 Attached, 182 Runed, 183 }; 184 185 typedef struct { 186 Slot *entry; 187 uvlong busaddr; 188 uint m; 189 uint n; 190 uint i; 191 } Done; 192 193 typedef struct Ctlr Ctlr; 194 typedef struct Ctlr { 195 QLock; 196 int state; 197 int kprocs; 198 uvlong port; 199 Pcidev* pcidev; 200 Ctlr* next; 201 int active; 202 int id; /* do we need this? */ 203 204 uchar ra[Eaddrlen]; 205 206 int ramsz; 207 uchar *ram; 208 209 ulong *irqack; 210 ulong *irqdeass; 211 ulong *coal; 212 213 char eprom[Epromsz]; 214 ulong serial; /* unit serial number */ 215 216 QLock cmdl; 217 Cmd *cmd; /* address of command return */ 218 uvlong cprt; /* bus address of command */ 219 220 uvlong boot; /* boot address */ 221 222 Done done; 223 Tx tx; 224 Rx sm; 225 Rx bg; 226 Stats *stats; 227 uvlong statsprt; 228 229 Rendez rxrendez; 230 Rendez txrendez; 231 232 int msi; 233 ulong linkstat; 234 ulong nrdma; 235 } Ctlr; 236 237 static Ctlr *ctlrs; 238 239 enum { 240 PciCapPMG = 0x01, /* power management */ 241 PciCapAGP = 0x02, 242 PciCapVPD = 0x03, /* vital product data */ 243 PciCapSID = 0x04, /* slot id */ 244 PciCapMSI = 0x05, 245 PciCapCHS = 0x06, /* compact pci hot swap */ 246 PciCapPCIX = 0x07, 247 PciCapHTC = 0x08, /* hypertransport irq conf */ 248 PciCapVND = 0x09, /* vendor specific information */ 249 PciCapHSW = 0x0C, /* hot swap */ 250 PciCapPCIe = 0x10, 251 PciCapMSIX = 0x11, 252 }; 253 254 enum { 255 PcieAERC = 1, 256 PcieVC, 257 PcieSNC, 258 PciePBC, 259 }; 260 261 enum { 262 AercCCR = 0x18, /* control register */ 263 }; 264 265 enum { 266 PcieCTL = 8, 267 PcieLCR = 12, 268 PcieMRD = 0x7000, /* maximum read size */ 269 }; 270 271 static int 272 pcicap(Pcidev *p, int cap) 273 { 274 int i, c, off; 275 276 pcicapdbg("pcicap: %x:%d\n", p->vid, p->did); 277 off = 0x34; /* 0x14 for cardbus */ 278 for(i = 48; i--; ){ 279 pcicapdbg("\t" "loop %x\n", off); 280 off = pcicfgr8(p, off); 281 pcicapdbg("\t" "pcicfgr8 %x\n", off); 282 if(off < 0x40) 283 break; 284 off &= ~3; 285 c = pcicfgr8(p, off); 286 pcicapdbg("\t" "pcicfgr8 %x\n", c); 287 if(c == 0xff) 288 break; 289 if(c == cap) 290 return off; 291 off++; 292 } 293 return 0; 294 } 295 296 /* 297 * this function doesn't work because pcicgr32 doesn't have access 298 * to the pcie extended configuration space. 299 */ 300 static int 301 pciecap(Pcidev *p, int cap) 302 { 303 uint off, i; 304 305 off = 0x100; 306 while(((i = pcicfgr32(p, off)) & 0xffff) != cap){ 307 off = i >> 20; 308 print("m10g: pciecap offset = %ud", off); 309 if(off < 0x100 || off >= 4*KiB - 1) 310 return 0; 311 } 312 print("m10g: pciecap found = %ud", off); 313 return off; 314 } 315 316 static int 317 setpcie(Pcidev *p) 318 { 319 int off; 320 321 /* set 4k writes */ 322 off = pcicap(p, PciCapPCIe); 323 if(off < 64) 324 return -1; 325 off += PcieCTL; 326 pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12); 327 return 0; 328 } 329 330 static int 331 whichfw(Pcidev *p) 332 { 333 char *s; 334 int i, off, lanes, ecrc; 335 ulong cap; 336 337 /* check the number of configured lanes. */ 338 off = pcicap(p, PciCapPCIe); 339 if(off < 64) 340 return -1; 341 off += PcieLCR; 342 cap = pcicfgr16(p, off); 343 lanes = (cap>>4) & 0x3f; 344 345 /* check AERC register. we need it on. */ 346 off = pciecap(p, PcieAERC); 347 print("; offset %d returned\n", off); 348 cap = 0; 349 if(off != 0){ 350 off += AercCCR; 351 cap = pcicfgr32(p, off); 352 print("m10g: %lud cap\n", cap); 353 } 354 ecrc = (cap>>4) & 0xf; 355 /* if we don't like the aerc, kick it here. */ 356 357 print("m10g: %d lanes; ecrc=%d; ", lanes, ecrc); 358 if(s = getconf("myriforce")){ 359 i = atoi(s); 360 if(i != 4*KiB || i != 2*KiB) 361 i = 2*KiB; 362 print("fw = %d [forced]\n", i); 363 return i; 364 } 365 if(lanes <= 4) 366 print("fw = 4096 [lanes]\n"); 367 else if(ecrc & 10) 368 print("fw = 4096 [ecrc set]\n"); 369 else 370 print("fw = 4096 [default]\n"); 371 return 4*KiB; 372 } 373 374 static int 375 parseeprom(Ctlr *c) 376 { 377 int i, j, k, l, bits; 378 char *s; 379 380 dprint("m10g eprom:\n"); 381 s = c->eprom; 382 bits = 3; 383 for(i = 0; s[i] && i < Epromsz; i++){ 384 l = strlen(s+i); 385 dprint("\t%s\n", s+i); 386 if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){ 387 bits ^= 1; 388 j = i + 4; 389 for(k = 0; k < 6; k++) 390 c->ra[k] = strtoul(s+j+3*k, 0, 16); 391 }else if(strncmp(s+i, "SN=", 3) == 0){ 392 bits ^= 2; 393 c->serial = atoi(s+i+3); 394 } 395 i += l; 396 } 397 if(bits) 398 return -1; 399 return 0; 400 } 401 402 static ushort 403 pbit16(ushort i) 404 { 405 ushort j; 406 uchar *p; 407 408 p = (uchar*)&j; 409 p[1] = i; 410 p[0] = i>>8; 411 return j; 412 } 413 414 static ushort 415 gbit16(uchar i[2]) 416 { 417 ushort j; 418 419 j = i[1]; 420 j |= i[0]<<8; 421 return j; 422 } 423 424 static ulong 425 pbit32(ulong i) 426 { 427 ulong j; 428 uchar *p; 429 430 p = (uchar*)&j; 431 p[3] = i; 432 p[2] = i>>8; 433 p[1] = i>>16; 434 p[0] = i>>24; 435 return j; 436 } 437 438 static ulong 439 gbit32(uchar i[4]) 440 { 441 ulong j; 442 443 j = i[3]; 444 j |= i[2]<<8; 445 j |= i[1]<<16; 446 j |= i[0]<<24; 447 return j; 448 } 449 450 static void 451 prepcmd(ulong *cmd, int i) 452 { 453 while(i-- > 0) 454 cmd[i] = pbit32(cmd[i]); 455 } 456 457 /* 458 * the command looks like this (int 32bit integers) 459 * cmd type 460 * addr (low) 461 * addr (high) 462 * pad (used for dma testing) 463 * response (high) 464 * response (low) 465 * 40 byte = 5 int pad. 466 */ 467 468 ulong 469 cmd(Ctlr *c, int type, uvlong data) 470 { 471 ulong buf[16], i; 472 Cmd *cmd; 473 474 qlock(&c->cmdl); 475 cmd = c->cmd; 476 cmd->i[1] = Noconf; 477 memset(buf, 0, sizeof buf); 478 buf[0] = type; 479 buf[1] = data; 480 buf[2] = data >> 32; 481 buf[4] = c->cprt >> 32; 482 buf[5] = c->cprt; 483 prepcmd(buf, 6); 484 coherence(); 485 memmove(c->ram + Cmdoff, buf, sizeof buf); 486 487 if(waserror()) 488 nexterror(); 489 for(i = 0; i < 15; i++){ 490 if(cmd->i[1] != Noconf){ 491 poperror(); 492 i = gbit32(cmd->c); 493 qunlock(&c->cmdl); 494 if(cmd->i[1] != 0) 495 dprint("[%lux]", i); 496 return i; 497 } 498 tsleep(&up->sleep, return0, 0, 1); 499 } 500 qunlock(&c->cmdl); 501 iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n", 502 cmd->i[0], cmd->i[1], type); 503 error(Etimeout); 504 return ~0; /* silence! */ 505 } 506 507 ulong 508 maccmd(Ctlr *c, int type, uchar *m) 509 { 510 ulong buf[16], i; 511 Cmd *cmd; 512 513 qlock(&c->cmdl); 514 cmd = c->cmd; 515 cmd->i[1] = Noconf; 516 memset(buf, 0, sizeof buf); 517 buf[0] = type; 518 buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3]; 519 buf[2] = m[4]<< 8 | m[5]; 520 buf[4] = c->cprt >> 32; 521 buf[5] = c->cprt; 522 prepcmd(buf, 6); 523 coherence(); 524 memmove(c->ram + Cmdoff, buf, sizeof buf); 525 526 if(waserror()) 527 nexterror(); 528 for(i = 0; i < 15; i++){ 529 if(cmd->i[1] != Noconf){ 530 poperror(); 531 i = gbit32(cmd->c); 532 qunlock(&c->cmdl); 533 if(cmd->i[1] != 0) 534 dprint("[%lux]", i); 535 return i; 536 } 537 tsleep(&up->sleep, return0, 0, 1); 538 } 539 qunlock(&c->cmdl); 540 iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n", 541 cmd->i[0], cmd->i[1], type); 542 error(Etimeout); 543 return ~0; /* silence! */ 544 } 545 546 /* remove this garbage after testing */ 547 enum { 548 DMAread = 0x10000, 549 DMAwrite= 0x1, 550 }; 551 552 ulong 553 dmatestcmd(Ctlr *c, int type, uvlong addr, int len) 554 { 555 ulong buf[16], i; 556 557 memset(buf, 0, sizeof buf); 558 memset(c->cmd, Noconf, sizeof *c->cmd); 559 buf[0] = Cdmatest; 560 buf[1] = addr; 561 buf[2] = addr >> 32; 562 buf[3] = len * type; 563 buf[4] = c->cprt >> 32; 564 buf[5] = c->cprt; 565 prepcmd(buf, 6); 566 coherence(); 567 memmove(c->ram + Cmdoff, buf, sizeof buf); 568 569 if(waserror()) 570 nexterror(); 571 for(i = 0; i < 15; i++){ 572 if(c->cmd->i[1] != Noconf){ 573 i = gbit32(c->cmd->c); 574 if(i == 0) 575 error(Eio); 576 poperror(); 577 return i; 578 } 579 tsleep(&up->sleep, return0, 0, 5); 580 } 581 error(Etimeout); 582 return ~0; /* silence! */ 583 } 584 585 ulong 586 rdmacmd(Ctlr *c, int on) 587 { 588 ulong buf[16], i; 589 590 memset(buf, 0, sizeof buf); 591 c->cmd->i[0] = 0; 592 coherence(); 593 buf[0] = c->cprt >> 32; 594 buf[1] = c->cprt; 595 buf[2] = Noconf; 596 buf[3] = c->cprt >> 32; 597 buf[4] = c->cprt; 598 buf[5] = on; 599 prepcmd(buf, 6); 600 memmove(c->ram + Rdmaoff, buf, sizeof buf); 601 602 if(waserror()) 603 nexterror(); 604 for(i = 0; i < 20; i++){ 605 if(c->cmd->i[0] == Noconf){ 606 poperror(); 607 return gbit32(c->cmd->c); 608 } 609 tsleep(&up->sleep, return0, 0, 1); 610 } 611 error(Etimeout); 612 iprint("m10g: rdmacmd timeout\n"); 613 return ~0; /* silence! */ 614 } 615 616 static int 617 loadfw(Ctlr *c, int *align) 618 { 619 ulong *f, *s, sz; 620 int i; 621 622 if((*align = whichfw(c->pcidev)) == 4*KiB){ 623 f = (ulong*)fw4k; 624 sz = sizeof fw4k; 625 }else{ 626 f = (ulong*)fw2k; 627 sz = sizeof fw2k; 628 } 629 630 s = (ulong*)(c->ram + Fwoffset); 631 for(i = 0; i < sz / 4; i++) 632 s[i] = f[i]; 633 return sz & ~3; 634 } 635 636 static int 637 bootfw(Ctlr *c) 638 { 639 int i, sz, align; 640 ulong buf[16]; 641 Cmd* cmd; 642 643 if((sz = loadfw(c, &align)) == 0) 644 return 0; 645 dprint("bootfw %d bytes ... ", sz); 646 cmd = c->cmd; 647 648 memset(buf, 0, sizeof buf); 649 c->cmd->i[0] = 0; 650 coherence(); 651 buf[0] = c->cprt >> 32; /* upper dma target address */ 652 buf[1] = c->cprt; /* lower */ 653 buf[2] = Noconf; /* writeback */ 654 buf[3] = Fwoffset + 8, 655 buf[4] = sz - 8; 656 buf[5] = 8; 657 buf[6] = 0; 658 prepcmd(buf, 7); 659 coherence(); 660 memmove(c->ram + Fwsubmt, buf, sizeof buf); 661 662 for(i = 0; i < 20; i++){ 663 if(cmd->i[0] == Noconf) 664 break; 665 delay(1); 666 } 667 dprint("[%lux %lux]", gbit32(cmd->c), gbit32(cmd->c+4)); 668 if(i == 20){ 669 print("m10g: cannot load fw\n"); 670 return -1; 671 } 672 dprint("\n"); 673 c->tx.segsz = align; 674 return 0; 675 } 676 677 static int 678 kickthebaby(Pcidev *p, Ctlr *c) 679 { 680 /* don't kick the baby! */ 681 ulong code; 682 683 pcicfgw8(p, 0x10 + c->boot, 0x3); 684 pcicfgw32(p, 0x18 + c->boot, 0xfffffff0); 685 code = pcicfgr32(p, 0x14 + c->boot); 686 687 dprint("reboot status = %lux\n", code); 688 if(code != 0xfffffff0) 689 return -1; 690 return 0; 691 } 692 693 typedef struct { 694 uchar len[4]; 695 uchar type[4]; 696 char version[128]; 697 uchar globals[4]; 698 uchar ramsz[4]; 699 uchar specs[4]; 700 uchar specssz[4]; 701 } Fwhdr; 702 703 enum { 704 Tmx = 0x4d582020, 705 Tpcie = 0x70636965, 706 Teth = 0x45544820, 707 Tmcp0 = 0x4d435030, 708 }; 709 710 static char * 711 fwtype(ulong type) 712 { 713 switch(type){ 714 case Tmx: 715 return "mx"; 716 case Tpcie: 717 return "PCIe"; 718 case Teth: 719 return "eth"; 720 case Tmcp0: 721 return "mcp0"; 722 } 723 return "*GOK*"; 724 } 725 726 static int 727 chkfw(Ctlr *c) 728 { 729 ulong off, type; 730 Fwhdr *h; 731 732 off = gbit32(c->ram+0x3c); 733 dprint("firmware %lux\n", off); 734 if((off&3) || off + sizeof *h > c->ramsz){ 735 print("!m10g: bad firmware %lux\n", off); 736 return -1; 737 } 738 h = (Fwhdr*)(c->ram + off); 739 type = gbit32(h->type); 740 dprint("\t" "type %s\n", fwtype(type)); 741 dprint("\t" "vers %s\n", h->version); 742 dprint("\t" "ramsz %lux\n", gbit32(h->ramsz)); 743 if(type != Teth){ 744 print("!m10g: bad card type %s\n", fwtype(type)); 745 return -1; 746 } 747 748 return bootfw(c) || rdmacmd(c, 0); 749 } 750 751 static int 752 reset(Ether *e, Ctlr *c) 753 { 754 ulong i, sz; 755 756 if(waserror()){ 757 print("m10g: reset error\n"); 758 nexterror(); 759 return -1; 760 } 761 762 chkfw(c); 763 cmd(c, Creset, 0); 764 765 cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry); 766 cmd(c, CSintrqdma, c->done.busaddr); 767 c->irqack = (ulong*)(c->ram + cmd(c, CGirqackoff, 0)); 768 /* required only if we're not doing msi? */ 769 c->irqdeass = (ulong*)(c->ram + cmd(c, CGirqdeassoff, 0)); 770 /* this is the driver default, why fiddle with this? */ 771 c->coal = (ulong*)(c->ram + cmd(c, CGcoaloff, 0)); 772 *c->coal = pbit32(25); 773 774 dprint("dma stats:\n"); 775 rdmacmd(c, 1); 776 sz = c->tx.segsz; 777 i = dmatestcmd(c, DMAread, c->done.busaddr, sz); 778 print("m10g: read %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff)); 779 i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz); 780 print(" write %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff)); 781 i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz); 782 print(" r/w %lud MB/s\n", ((i>>16)*sz*2*2) / (i&0xffff)); 783 memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry); 784 785 maccmd(c, CSmac, c->ra); 786 // cmd(c, Cnopromisc, 0); 787 cmd(c, Cenablefc, 0); 788 e->maxmtu = Maxmtu; 789 cmd(c, CSmtu, e->maxmtu); 790 dprint("CSmtu %d...\n", e->maxmtu); 791 792 poperror(); 793 return 0; 794 } 795 796 static void 797 ctlrfree(Ctlr *c) 798 { 799 /* free up all the Block*s, too */ 800 free(c->tx.host); 801 free(c->sm.host); 802 free(c->bg.host); 803 free(c->cmd); 804 free(c->done.entry); 805 free(c->stats); 806 free(c); 807 } 808 809 static int 810 setmem(Pcidev *p, Ctlr *c) 811 { 812 ulong i; 813 uvlong raddr; 814 Done *d; 815 void *mem; 816 817 c->tx.segsz = 2048; 818 c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100; 819 if(c->ramsz > p->mem[0].size) 820 return -1; 821 822 raddr = p->mem[0].bar & ~0x0F; 823 mem = vmap(raddr, p->mem[0].size); 824 if(mem == nil){ 825 print("m10g: can't map %8.8lux\n", p->mem[0].bar); 826 return -1; 827 } 828 dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size); 829 c->port = raddr; 830 c->ram = mem; 831 c->cmd = malign(sizeof *c->cmd); 832 c->cprt = PCIWADDR(c->cmd); 833 834 d = &c->done; 835 d->n = Maxslots; 836 d->m = d->n - 1; 837 i = d->n * sizeof *d->entry; 838 d->entry = malign(i); 839 memset(d->entry, 0, i); 840 d->busaddr = PCIWADDR(d->entry); 841 842 c->stats = malign(sizeof *c->stats); 843 memset(c->stats, 0, sizeof *c->stats); 844 c->statsprt = PCIWADDR(c->stats); 845 846 memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2); 847 return setpcie(p) || parseeprom(c); 848 } 849 850 static Rx* 851 whichrx(Ctlr *c, int sz) 852 { 853 if(sz <= smpool.size) 854 return &c->sm; 855 return &c->bg; 856 } 857 858 static Block* 859 balloc(Rx* rx) 860 { 861 Block *bp; 862 863 ilock(rx->pool); 864 if((bp = rx->pool->head) != nil){ 865 rx->pool->head = bp->next; 866 bp->next = nil; 867 _xinc(&bp->ref); /* prevent bp from being freed */ 868 rx->pool->n--; 869 } 870 iunlock(rx->pool); 871 return bp; 872 } 873 874 static void 875 rbfree(Block *b, Bpool *p) 876 { 877 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base); 878 b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck); 879 880 ilock(p); 881 b->next = p->head; 882 p->head = b; 883 p->n++; 884 p->cnt++; 885 iunlock(p); 886 } 887 888 static void 889 smbfree(Block *b) 890 { 891 rbfree(b, &smpool); 892 } 893 894 static void 895 bgbfree(Block *b) 896 { 897 rbfree(b, &bgpool); 898 } 899 900 static void 901 replenish(Rx *rx) 902 { 903 ulong buf[16], i, idx, e; 904 Bpool *p; 905 Block *b; 906 907 p = rx->pool; 908 if(p->n < 8) 909 return; 910 memset(buf, 0, sizeof buf); 911 e = (rx->i - rx->cnt) & ~7; 912 e += rx->n; 913 while(p->n >= 8 && e){ 914 idx = rx->cnt & rx->m; 915 for(i = 0; i < 8; i++){ 916 b = balloc(rx); 917 buf[i*2] = pbit32((uvlong)PCIWADDR(b->wp) >> 32); 918 buf[i*2+1] = pbit32(PCIWADDR(b->wp)); 919 rx->host[idx+i] = b; 920 assert(b); 921 } 922 memmove(rx->lanai + 2*idx, buf, sizeof buf); 923 coherence(); 924 rx->cnt += 8; 925 e -= 8; 926 } 927 if(e && p->n > 7+1) 928 print("m10g: should panic? pool->n = %d\n", p->n); 929 } 930 931 /* 932 * future: 933 * if (c->mtrr >= 0) { 934 * c->tx.wcfifo = c->ram+0x200000; 935 * c->sm.wcfifo = c->ram+0x300000; 936 * c->bg.wcfifo = c->ram+0x340000; 937 * } 938 */ 939 940 static int 941 nextpow(int j) 942 { 943 int i; 944 945 for(i = 0; j > (1 << i); i++) 946 ; 947 return 1 << i; 948 } 949 950 static void* 951 emalign(int sz) 952 { 953 void *v; 954 955 v = malign(sz); 956 if(v == nil) 957 error(Enomem); 958 memset(v, 0, sz); 959 return v; 960 } 961 962 static void 963 open0(Ether *e, Ctlr *c) 964 { 965 Block *b; 966 int i, sz, entries; 967 968 entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai; 969 c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0)); 970 c->tx.host = emalign(entries * sizeof *c->tx.host); 971 c->tx.bring = emalign(entries * sizeof *c->tx.bring); 972 c->tx.n = entries; 973 c->tx.m = entries-1; 974 975 entries = cmd(c, CGrxrgsz, 0)/8; 976 c->sm.pool = &smpool; 977 cmd(c, CSsmallsz, c->sm.pool->size); 978 c->sm.lanai = (ulong*)(c->ram + cmd(c, CGsmallrxoff, 0)); 979 c->sm.n = entries; 980 c->sm.m = entries-1; 981 c->sm.host = emalign(entries * sizeof *c->sm.host); 982 983 c->bg.pool = &bgpool; 984 c->bg.pool->size = nextpow(2 + e->maxmtu); /* 2-byte alignment pad */ 985 cmd(c, CSbigsz, c->bg.pool->size); 986 c->bg.lanai = (ulong*)(c->ram + cmd(c, CGbigrxoff, 0)); 987 c->bg.n = entries; 988 c->bg.m = entries-1; 989 c->bg.host = emalign(entries * sizeof *c->bg.host); 990 991 sz = c->sm.pool->size + BY2PG; 992 for(i = 0; i < c->sm.n; i++){ 993 if((b = allocb(sz)) == 0) 994 break; 995 b->free = smbfree; 996 freeb(b); 997 } 998 sz = c->bg.pool->size + BY2PG; 999 for(i = 0; i < c->bg.n; i++){ 1000 if((b = allocb(sz)) == 0) 1001 break; 1002 b->free = bgbfree; 1003 freeb(b); 1004 } 1005 1006 cmd(c, CSstatsdma, c->statsprt); 1007 c->linkstat = ~0; 1008 c->nrdma = 15; 1009 1010 cmd(c, Cetherup, 0); 1011 } 1012 1013 static Block* 1014 nextblock(Ctlr *c) 1015 { 1016 uint i; 1017 ushort l, k; 1018 Block *b; 1019 Done *d; 1020 Rx *rx; 1021 Slot *s; 1022 Slotparts *sp; 1023 1024 d = &c->done; 1025 s = d->entry; 1026 i = d->i & d->m; 1027 sp = (Slotparts *)(s + i); 1028 l = sp->len; 1029 if(l == 0) 1030 return 0; 1031 k = sp->cksum; 1032 s[i] = 0; 1033 d->i++; 1034 l = gbit16((uchar*)&l); 1035 //dprint("nextb: i=%d l=%d\n", d->i, l); 1036 rx = whichrx(c, l); 1037 if(rx->i >= rx->cnt){ 1038 iprint("m10g: overrun\n"); 1039 return 0; 1040 } 1041 i = rx->i & rx->m; 1042 b = rx->host[i]; 1043 rx->host[i] = 0; 1044 if(b == 0){ 1045 iprint("m10g: error rx to no block. memory is hosed.\n"); 1046 return 0; 1047 } 1048 rx->i++; 1049 1050 b->flag |= Bipck|Btcpck|Budpck; 1051 b->checksum = k; 1052 b->rp += 2; 1053 b->wp += 2+l; 1054 b->lim = b->wp; /* lie like a dog. */ 1055 return b; 1056 } 1057 1058 static int 1059 rxcansleep(void *v) 1060 { 1061 Ctlr *c; 1062 Slot *s; 1063 Slotparts *sp; 1064 Done *d; 1065 1066 c = v; 1067 d = &c->done; 1068 s = c->done.entry; 1069 sp = (Slotparts *)(s + (d->i & d->m)); 1070 if(sp->len != 0) 1071 return -1; 1072 c->irqack[0] = pbit32(3); 1073 return 0; 1074 } 1075 1076 static void 1077 m10rx(void *v) 1078 { 1079 Ether *e; 1080 Ctlr *c; 1081 Block *b; 1082 1083 e = v; 1084 c = e->ctlr; 1085 for(;;){ 1086 replenish(&c->sm); 1087 replenish(&c->bg); 1088 sleep(&c->rxrendez, rxcansleep, c); 1089 while(b = nextblock(c)) 1090 etheriq(e, b, 1); 1091 } 1092 } 1093 1094 static void 1095 txcleanup(Tx *tx, ulong n) 1096 { 1097 Block *b; 1098 uint j, l, m; 1099 1100 if(tx->npkt == n) 1101 return; 1102 l = 0; 1103 m = tx->m; 1104 /* 1105 * if tx->cnt == tx->i, yet tx->npkt == n-1, we just 1106 * caught ourselves and myricom card updating. 1107 */ 1108 for(;; tx->cnt++){ 1109 j = tx->cnt & tx->m; 1110 if(b = tx->bring[j]){ 1111 tx->bring[j] = 0; 1112 tx->nbytes += BLEN(b); 1113 freeb(b); 1114 if(++tx->npkt == n) 1115 return; 1116 } 1117 if(tx->cnt == tx->i) 1118 return; 1119 if(l++ == m){ 1120 iprint("m10g: tx ovrun: %lud %lud\n", n, tx->npkt); 1121 return; 1122 } 1123 } 1124 } 1125 1126 static int 1127 txcansleep(void *v) 1128 { 1129 Ctlr *c; 1130 1131 c = v; 1132 if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt)) 1133 return -1; 1134 return 0; 1135 } 1136 1137 static void 1138 txproc(void *v) 1139 { 1140 Ether *e; 1141 Ctlr *c; 1142 Tx *tx; 1143 1144 e = v; 1145 c = e->ctlr; 1146 tx = &c->tx; 1147 for(;;){ 1148 sleep(&c->txrendez, txcansleep, c); 1149 txcleanup(tx, gbit32(c->stats->txcnt)); 1150 } 1151 } 1152 1153 static void 1154 submittx(Tx *tx, int n) 1155 { 1156 Send *l, *h; 1157 int i0, i, m; 1158 1159 m = tx->m; 1160 i0 = tx->i & m; 1161 l = tx->lanai; 1162 h = tx->host; 1163 for(i = n-1; i >= 0; i--) 1164 memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h); 1165 tx->i += n; 1166 // coherence(); 1167 } 1168 1169 static int 1170 nsegments(Block *b, int segsz) 1171 { 1172 uintptr bus, end, slen, len; 1173 int i; 1174 1175 bus = PCIWADDR(b->rp); 1176 i = 0; 1177 for(len = BLEN(b); len; len -= slen){ 1178 end = bus + segsz & ~(segsz-1); 1179 slen = end - bus; 1180 if(slen > len) 1181 slen = len; 1182 bus += slen; 1183 i++; 1184 } 1185 return i; 1186 } 1187 1188 static void 1189 m10gtransmit(Ether *e) 1190 { 1191 ushort slen; 1192 ulong i, cnt, rdma, nseg, count, end, bus, len, segsz; 1193 uchar flags; 1194 Block *b; 1195 Ctlr *c; 1196 Send *s, *s0, *s0m8; 1197 Tx *tx; 1198 1199 c = e->ctlr; 1200 tx = &c->tx; 1201 segsz = tx->segsz; 1202 1203 qlock(tx); 1204 count = 0; 1205 s = tx->host + (tx->i & tx->m); 1206 cnt = tx->cnt; 1207 s0 = tx->host + (cnt & tx->m); 1208 s0m8 = tx->host + ((cnt - 8) & tx->m); 1209 i = tx->i; 1210 for(; s >= s0 || s < s0m8; i += nseg){ 1211 if((b = qget(e->oq)) == nil) 1212 break; 1213 flags = SFfirst|SFnotso; 1214 if((len = BLEN(b)) < 1520) 1215 flags |= SFsmall; 1216 rdma = nseg = nsegments(b, segsz); 1217 bus = PCIWADDR(b->rp); 1218 for(; len; len -= slen){ 1219 end = (bus + segsz) & ~(segsz-1); 1220 slen = end - bus; 1221 if(slen > len) 1222 slen = len; 1223 s->low = pbit32(bus); 1224 s->len = pbit16(slen); 1225 s->nrdma = rdma; 1226 s->flags = flags; 1227 1228 bus += slen; 1229 if(++s == tx->host + tx->n) 1230 s = tx->host; 1231 count++; 1232 flags &= ~SFfirst; 1233 rdma = 1; 1234 } 1235 tx->bring[(i + nseg - 1) & tx->m] = b; 1236 if(1 || count > 0){ 1237 submittx(tx, count); 1238 count = 0; 1239 cnt = tx->cnt; 1240 s0 = tx->host + (cnt & tx->m); 1241 s0m8 = tx->host + ((cnt - 8) & tx->m); 1242 } 1243 } 1244 qunlock(tx); 1245 } 1246 1247 static void 1248 checkstats(Ether *e, Ctlr *c, Stats *s) 1249 { 1250 ulong i; 1251 1252 if(s->updated == 0) 1253 return; 1254 1255 i = gbit32(s->linkstat); 1256 if(c->linkstat != i){ 1257 e->link = i; 1258 if(c->linkstat = i) 1259 dprint("m10g: link up\n"); 1260 else 1261 dprint("m10g: link down\n"); 1262 } 1263 i = gbit32(s->nrdma); 1264 if(i != c->nrdma){ 1265 dprint("m10g: rdma timeout %ld\n", i); 1266 c->nrdma = i; 1267 } 1268 } 1269 1270 static void 1271 waitintx(Ctlr *c) 1272 { 1273 int i; 1274 1275 for(i = 0; i < 1024*1024; i++){ 1276 if(c->stats->valid == 0) 1277 break; 1278 coherence(); 1279 } 1280 } 1281 1282 static void 1283 m10ginterrupt(Ureg *, void *v) 1284 { 1285 Ether *e; 1286 Ctlr *c; 1287 1288 e = v; 1289 c = e->ctlr; 1290 1291 if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */ 1292 return; 1293 1294 if(c->stats->valid & 1) 1295 wakeup(&c->rxrendez); 1296 if(gbit32(c->stats->txcnt) != c->tx.npkt) 1297 wakeup(&c->txrendez); 1298 if(c->msi == 0) 1299 *c->irqdeass = 0; 1300 else 1301 c->stats->valid = 0; 1302 waitintx(c); 1303 checkstats(e, c, c->stats); 1304 c->irqack[1] = pbit32(3); 1305 } 1306 1307 static void 1308 m10gattach(Ether *e) 1309 { 1310 Ctlr *c; 1311 char name[12]; 1312 1313 dprint("m10gattach\n"); 1314 1315 qlock(e->ctlr); 1316 c = e->ctlr; 1317 if(c->state != Detached){ 1318 qunlock(c); 1319 return; 1320 } 1321 if(waserror()){ 1322 c->state = Detached; 1323 qunlock(c); 1324 nexterror(); 1325 } 1326 reset(e, c); 1327 c->state = Attached; 1328 open0(e, c); 1329 if(c->kprocs == 0){ 1330 c->kprocs++; 1331 snprint(name, sizeof name, "#l%drxproc", e->ctlrno); 1332 kproc(name, m10rx, e); 1333 snprint(name, sizeof name, "#l%dtxproc", e->ctlrno); 1334 kproc(name, txproc, e); 1335 } 1336 c->state = Runed; 1337 qunlock(c); 1338 poperror(); 1339 } 1340 1341 static int 1342 m10gdetach(Ctlr *c) 1343 { 1344 dprint("m10gdetach\n"); 1345 // reset(e->ctlr); 1346 vunmap(c->ram, c->pcidev->mem[0].size); 1347 ctlrfree(c); 1348 return -1; 1349 } 1350 1351 static int 1352 lstcount(Block *b) 1353 { 1354 int i; 1355 1356 i = 0; 1357 for(; b; b = b->next) 1358 i++; 1359 return i; 1360 } 1361 1362 static long 1363 m10gifstat(Ether *e, void *v, long n, ulong off) 1364 { 1365 int l; 1366 char *p; 1367 Ctlr *c; 1368 Stats s; 1369 1370 c = e->ctlr; 1371 p = malloc(READSTR+1); 1372 l = 0; 1373 /* no point in locking this because this is done via dma. */ 1374 memmove(&s, c->stats, sizeof s); 1375 1376 // l += 1377 snprint(p+l, READSTR, 1378 "txcnt = %lud\n" "linkstat = %lud\n" "dlink = %lud\n" 1379 "derror = %lud\n" "drunt = %lud\n" "doverrun = %lud\n" 1380 "dnosm = %lud\n" "dnobg = %lud\n" "nrdma = %lud\n" 1381 "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n" 1382 "valid = %ud\n\n" 1383 "tx pkt = %lud\n" "tx bytes = %lld\n" 1384 "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n" 1385 "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n" 1386 "sm lst = %ud\n" 1387 "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n" 1388 "bg lst = %ud\n" 1389 "segsz = %lud\n" "coal = %lud\n", 1390 gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink), 1391 gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun), 1392 gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma), 1393 s.txstopped, s.down, s.updated, s.valid, 1394 c->tx.npkt, c->tx.nbytes, 1395 c->tx.cnt, c->tx.n, c->tx.i, 1396 c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head), 1397 c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head), 1398 c->tx.segsz, gbit32((uchar*)c->coal)); 1399 1400 n = readstr(off, v, n, p); 1401 free(p); 1402 return n; 1403 } 1404 1405 //static void 1406 //summary(Ether *e) 1407 //{ 1408 // char *buf; 1409 // int n, i, j; 1410 // 1411 // if(e == 0) 1412 // return; 1413 // buf = malloc(n=250); 1414 // if(buf == 0) 1415 // return; 1416 // 1417 // snprint(buf, n, "oq\n"); 1418 // qsummary(e->oq, buf+3, n-3-1); 1419 // iprint("%s", buf); 1420 // 1421 // if(e->f) for(i = 0; e->f[i]; i++){ 1422 // j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type); 1423 // qsummary(e->f[i]->in, buf+j, n-j-1); 1424 // print("%s", buf); 1425 // } 1426 // 1427 // free(buf); 1428 //} 1429 1430 static void 1431 rxring(Ctlr *c) 1432 { 1433 Done *d; 1434 Slot *s; 1435 Slotparts *sp; 1436 int i; 1437 1438 d = &c->done; 1439 s = d->entry; 1440 for(i = 0; i < d->n; i++) { 1441 sp = (Slotparts *)(s + i); 1442 if(sp->len) 1443 iprint("s[%d] = %d\n", i, sp->len); 1444 } 1445 } 1446 1447 enum { 1448 CMdebug, 1449 CMcoal, 1450 CMwakeup, 1451 CMtxwakeup, 1452 CMqsummary, 1453 CMrxring, 1454 }; 1455 1456 static Cmdtab ctab[] = { 1457 CMdebug, "debug", 2, 1458 CMcoal, "coal", 2, 1459 CMwakeup, "wakeup", 1, 1460 CMtxwakeup, "txwakeup", 1, 1461 // CMqsummary, "q", 1, 1462 CMrxring, "rxring", 1, 1463 }; 1464 1465 static long 1466 m10gctl(Ether *e, void *v, long n) 1467 { 1468 int i; 1469 Cmdbuf *c; 1470 Cmdtab *t; 1471 1472 dprint("m10gctl\n"); 1473 if(e->ctlr == nil) 1474 error(Enonexist); 1475 1476 c = parsecmd(v, n); 1477 if(waserror()){ 1478 free(c); 1479 nexterror(); 1480 } 1481 t = lookupcmd(c, ctab, nelem(ctab)); 1482 switch(t->index){ 1483 case CMdebug: 1484 debug = (strcmp(c->f[1], "on") == 0); 1485 break; 1486 case CMcoal: 1487 i = atoi(c->f[1]); 1488 if(i < 0 || i > 1000) 1489 error(Ebadarg); 1490 *((Ctlr*)e->ctlr)->coal = pbit32(i); 1491 break; 1492 case CMwakeup: 1493 wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */ 1494 break; 1495 case CMtxwakeup: 1496 wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */ 1497 break; 1498 // case CMqsummary: 1499 // summary(e); 1500 // break; 1501 case CMrxring: 1502 rxring(e->ctlr); 1503 break; 1504 default: 1505 error(Ebadarg); 1506 } 1507 free(c); 1508 poperror(); 1509 return n; 1510 } 1511 1512 static void 1513 m10gshutdown(Ether *e) 1514 { 1515 dprint("m10gshutdown\n"); 1516 m10gdetach(e->ctlr); 1517 } 1518 1519 static void 1520 m10gpromiscuous(void *v, int on) 1521 { 1522 Ether *e; 1523 int i; 1524 1525 dprint("m10gpromiscuous\n"); 1526 e = v; 1527 if(on) 1528 i = Cpromisc; 1529 else 1530 i = Cnopromisc; 1531 cmd(e->ctlr, i, 0); 1532 } 1533 1534 static int mcctab[] = { CSleavemc, CSjoinmc }; 1535 static char *mcntab[] = { "leave", "join" }; 1536 1537 static void 1538 m10gmulticast(void *v, uchar *ea, int on) 1539 { 1540 Ether *e; 1541 int i; 1542 1543 dprint("m10gmulticast\n"); 1544 e = v; 1545 if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0) 1546 print("m10g: can't %s %E: %d\n", mcntab[on], ea, i); 1547 } 1548 1549 static void 1550 m10gpci(void) 1551 { 1552 Pcidev *p; 1553 Ctlr *t, *c; 1554 1555 t = 0; 1556 for(p = 0; p = pcimatch(p, Vmyricom, 0); ){ 1557 switch(p->did){ 1558 case 0x8: /* 8a */ 1559 break; 1560 case 0x9: /* 8a with msi-x fw */ 1561 case 0xa: /* 8b */ 1562 case 0xb: /* 8b2 */ 1563 case 0xc: /* 2-8b2 */ 1564 /* untested */ 1565 break; 1566 default: 1567 print("etherm10g: unknown myricom did %#ux\n", p->did); 1568 continue; 1569 } 1570 c = malloc(sizeof *c); 1571 if(c == nil) 1572 continue; 1573 c->pcidev = p; 1574 c->id = p->did<<16 | p->vid; 1575 c->boot = pcicap(p, PciCapVND); 1576 // kickthebaby(p, c); 1577 pcisetbme(p); 1578 if(setmem(p, c) == -1){ 1579 print("m10g: setmem failed\n"); 1580 free(c); 1581 /* cleanup */ 1582 continue; 1583 } 1584 if(t) 1585 t->next = c; 1586 else 1587 ctlrs = c; 1588 t = c; 1589 } 1590 } 1591 1592 static int 1593 m10gpnp(Ether *e) 1594 { 1595 Ctlr *c; 1596 1597 if(ctlrs == nil) 1598 m10gpci(); 1599 1600 for(c = ctlrs; c != nil; c = c->next) 1601 if(c->active) 1602 continue; 1603 else if(e->port == 0 || e->port == c->port) 1604 break; 1605 if(c == nil) 1606 return -1; 1607 c->active = 1; 1608 1609 e->ctlr = c; 1610 e->port = c->port; 1611 e->irq = c->pcidev->intl; 1612 e->tbdf = c->pcidev->tbdf; 1613 e->mbps = 10000; 1614 memmove(e->ea, c->ra, Eaddrlen); 1615 1616 e->attach = m10gattach; 1617 e->detach = m10gshutdown; 1618 e->transmit = m10gtransmit; 1619 e->interrupt = m10ginterrupt; 1620 e->ifstat = m10gifstat; 1621 e->ctl = m10gctl; 1622 // e->power = m10gpower; 1623 e->shutdown = m10gshutdown; 1624 1625 e->arg = e; 1626 e->promiscuous = m10gpromiscuous; 1627 e->multicast = m10gmulticast; 1628 1629 return 0; 1630 } 1631 1632 void 1633 etherm10glink(void) 1634 { 1635 addethercard("m10g", m10gpnp); 1636 } 1637