1 /* 2 * myricom 10 Gb ethernet driver 3 * © 2007 erik quanstrom, coraid 4 * 5 * the card is big endian. 6 * we use uvlong rather than uintptr to hold addresses so that 7 * we don't get "warning: stupid shift" on 32-bit architectures. 8 */ 9 #include "u.h" 10 #include "../port/lib.h" 11 #include "mem.h" 12 #include "dat.h" 13 #include "fns.h" 14 #include "io.h" 15 #include "../port/error.h" 16 #include "../port/netif.h" 17 18 #include "../pc/etherif.h" 19 20 #ifndef KiB 21 #define KiB 1024u /* Kibi 0x0000000000000400 */ 22 #define MiB 1048576u /* Mebi 0x0000000000100000 */ 23 #endif /* KiB */ 24 25 #define dprint(...) if(debug) print(__VA_ARGS__) 26 #define pcicapdbg(...) 27 #define malign(n) mallocalign((n), 4*KiB, 0, 0) 28 29 #include "etherm10g2k.i" 30 #include "etherm10g4k.i" 31 32 static int debug = 0; 33 static char Etimeout[] = "timeout"; 34 35 enum { 36 Epromsz = 256, 37 Maxslots= 1024, 38 Align = 4096, 39 Maxmtu = 9000, 40 Noconf = 0xffffffff, 41 42 Fwoffset= 1*MiB, 43 Cmdoff = 0xf80000, /* command port offset */ 44 Fwsubmt = 0xfc0000, /* firmware submission command port offset */ 45 Rdmaoff = 0xfc01c0, /* rdma command port offset */ 46 }; 47 48 enum { 49 CZero, 50 Creset, 51 Cversion, 52 53 CSintrqdma, /* issue these before Cetherup */ 54 CSbigsz, /* in bytes bigsize = 2^n */ 55 CSsmallsz, 56 57 CGsendoff, 58 CGsmallrxoff, 59 CGbigrxoff, 60 CGirqackoff, 61 CGirqdeassoff, 62 CGsendrgsz, 63 CGrxrgsz, 64 65 CSintrqsz, /* 2^n */ 66 Cetherup, /* above parameters + mtu/mac addr must be set first. */ 67 Cetherdn, 68 69 CSmtu, /* below may be issued live */ 70 CGcoaloff, /* in µs */ 71 CSstatsrate, /* in µs */ 72 CSstatsdma, 73 74 Cpromisc, 75 Cnopromisc, 76 CSmac, 77 78 Cenablefc, 79 Cdisablefc, 80 81 Cdmatest, /* address in d[0-1], d[2]=length */ 82 83 Cenableallmc, 84 Cdisableallmc, 85 86 CSjoinmc, 87 CSleavemc, 88 Cleaveallmc, 89 90 CSstatsdma2, /* adds (unused) multicast stats */ 91 }; 92 93 typedef union { 94 uint i[2]; 95 uchar c[8]; 96 } Cmd; 97 98 typedef ulong Slot; 99 typedef struct { 100 ushort cksum; 101 ushort len; 102 } Slotparts; 103 104 enum { 105 SFsmall = 1, 106 SFfirst = 2, 107 SFalign = 4, 108 SFnotso = 16, 109 }; 110 111 typedef struct { 112 ulong high; 113 ulong low; 114 ushort hdroff; 115 ushort len; 116 uchar pad; 117 uchar nrdma; 118 uchar chkoff; 119 uchar flags; 120 } Send; 121 122 typedef struct { 123 QLock; 124 Send *lanai; /* tx ring (cksum+len in lanai memory) */ 125 Send *host; /* tx ring (data in our memory) */ 126 Block **bring; 127 // uchar *wcfifo; /* what the heck is a w/c fifo? */ 128 int size; /* of buffers in the z8's memory */ 129 ulong segsz; 130 uint n; /* rxslots */ 131 uint m; /* mask; rxslots must be a power of two */ 132 uint i; /* number of segments (not frames) queued */ 133 uint cnt; /* number of segments sent by the card */ 134 135 ulong npkt; 136 vlong nbytes; 137 } Tx; 138 139 typedef struct { 140 Lock; 141 Block *head; 142 uint size; /* buffer size of each block */ 143 uint n; /* n free buffers */ 144 uint cnt; 145 } Bpool; 146 147 static Bpool smpool = { .size = 128, }; 148 static Bpool bgpool = { .size = Maxmtu, }; 149 150 typedef struct { 151 Bpool *pool; /* free buffers */ 152 ulong *lanai; /* rx ring; we have no permanent host shadow */ 153 Block **host; /* called "info" in myricom driver */ 154 // uchar *wcfifo; /* cmd submission fifo */ 155 uint m; 156 uint n; /* rxslots */ 157 uint i; 158 uint cnt; /* number of buffers allocated (lifetime) */ 159 uint allocfail; 160 } Rx; 161 162 /* dma mapped. unix network byte order. */ 163 typedef struct { 164 uchar txcnt[4]; 165 uchar linkstat[4]; 166 uchar dlink[4]; 167 uchar derror[4]; 168 uchar drunt[4]; 169 uchar doverrun[4]; 170 uchar dnosm[4]; 171 uchar dnobg[4]; 172 uchar nrdma[4]; 173 uchar txstopped; 174 uchar down; 175 uchar updated; 176 uchar valid; 177 } Stats; 178 179 enum { 180 Detached, 181 Attached, 182 Runed, 183 }; 184 185 typedef struct { 186 Slot *entry; 187 uvlong busaddr; 188 uint m; 189 uint n; 190 uint i; 191 } Done; 192 193 typedef struct Ctlr Ctlr; 194 typedef struct Ctlr { 195 QLock; 196 int state; 197 int kprocs; 198 uvlong port; 199 Pcidev* pcidev; 200 Ctlr* next; 201 int active; 202 int id; /* do we need this? */ 203 204 uchar ra[Eaddrlen]; 205 206 int ramsz; 207 uchar *ram; 208 209 ulong *irqack; 210 ulong *irqdeass; 211 ulong *coal; 212 213 char eprom[Epromsz]; 214 ulong serial; /* unit serial number */ 215 216 QLock cmdl; 217 Cmd *cmd; /* address of command return */ 218 uvlong cprt; /* bus address of command */ 219 220 uvlong boot; /* boot address */ 221 222 Done done; 223 Tx tx; 224 Rx sm; 225 Rx bg; 226 Stats *stats; 227 uvlong statsprt; 228 229 Rendez rxrendez; 230 Rendez txrendez; 231 232 int msi; 233 ulong linkstat; 234 ulong nrdma; 235 } Ctlr; 236 237 static Ctlr *ctlrs; 238 239 enum { 240 PciCapPMG = 0x01, /* power management */ 241 PciCapAGP = 0x02, 242 PciCapVPD = 0x03, /* vital product data */ 243 PciCapSID = 0x04, /* slot id */ 244 PciCapMSI = 0x05, 245 PciCapCHS = 0x06, /* compact pci hot swap */ 246 PciCapPCIX = 0x07, 247 PciCapHTC = 0x08, /* hypertransport irq conf */ 248 PciCapVND = 0x09, /* vendor specific information */ 249 PciCapHSW = 0x0C, /* hot swap */ 250 PciCapPCIe = 0x10, 251 PciCapMSIX = 0x11, 252 }; 253 254 enum { 255 PcieAERC = 1, 256 PcieVC, 257 PcieSNC, 258 PciePBC, 259 }; 260 261 enum { 262 AercCCR = 0x18, /* control register */ 263 }; 264 265 enum { 266 PcieCTL = 8, 267 PcieLCR = 12, 268 PcieMRD = 0x7000, /* maximum read size */ 269 }; 270 271 static int 272 pcicap(Pcidev *p, int cap) 273 { 274 int i, c, off; 275 276 pcicapdbg("pcicap: %x:%d\n", p->vid, p->did); 277 off = 0x34; /* 0x14 for cardbus */ 278 for(i = 48; i--; ){ 279 pcicapdbg("\t" "loop %x\n", off); 280 off = pcicfgr8(p, off); 281 pcicapdbg("\t" "pcicfgr8 %x\n", off); 282 if(off < 0x40) 283 break; 284 off &= ~3; 285 c = pcicfgr8(p, off); 286 pcicapdbg("\t" "pcicfgr8 %x\n", c); 287 if(c == 0xff) 288 break; 289 if(c == cap) 290 return off; 291 off++; 292 } 293 return 0; 294 } 295 296 /* 297 * this function doesn't work because pcicgr32 doesn't have access 298 * to the pcie extended configuration space. 299 */ 300 static int 301 pciecap(Pcidev *p, int cap) 302 { 303 uint off, i; 304 305 off = 0x100; 306 while(((i = pcicfgr32(p, off)) & 0xffff) != cap){ 307 off = i >> 20; 308 print("m10g: pciecap offset = %ud", off); 309 if(off < 0x100 || off >= 4*KiB - 1) 310 return 0; 311 } 312 print("m10g: pciecap found = %ud", off); 313 return off; 314 } 315 316 static int 317 setpcie(Pcidev *p) 318 { 319 int off; 320 321 /* set 4k writes */ 322 off = pcicap(p, PciCapPCIe); 323 if(off < 64) 324 return -1; 325 off += PcieCTL; 326 pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12); 327 return 0; 328 } 329 330 static int 331 whichfw(Pcidev *p) 332 { 333 char *s; 334 int i, off, lanes, ecrc; 335 ulong cap; 336 337 /* check the number of configured lanes. */ 338 off = pcicap(p, PciCapPCIe); 339 if(off < 64) 340 return -1; 341 off += PcieLCR; 342 cap = pcicfgr16(p, off); 343 lanes = (cap>>4) & 0x3f; 344 345 /* check AERC register. we need it on. */ 346 off = pciecap(p, PcieAERC); 347 print("; offset %d returned\n", off); 348 cap = 0; 349 if(off != 0){ 350 off += AercCCR; 351 cap = pcicfgr32(p, off); 352 print("m10g: %lud cap\n", cap); 353 } 354 ecrc = (cap>>4) & 0xf; 355 /* if we don't like the aerc, kick it here. */ 356 357 print("m10g: %d lanes; ecrc=%d; ", lanes, ecrc); 358 if(s = getconf("myriforce")){ 359 i = atoi(s); 360 if(i != 4*KiB || i != 2*KiB) 361 i = 2*KiB; 362 print("fw = %d [forced]\n", i); 363 return i; 364 } 365 if(lanes <= 4){ 366 print("fw = 4096 [lanes]\n"); 367 return 4*KiB; 368 } 369 if(ecrc & 10){ 370 print("fw = 4096 [ecrc set]\n"); 371 return 4*KiB; 372 } 373 print("fw = 4096 [default]\n"); 374 return 4*KiB; 375 } 376 377 static int 378 parseeprom(Ctlr *c) 379 { 380 int i, j, k, l, bits; 381 char *s; 382 383 dprint("m10g eprom:\n"); 384 s = c->eprom; 385 bits = 3; 386 for(i = 0; s[i] && i < Epromsz; i++){ 387 l = strlen(s+i); 388 dprint("\t%s\n", s+i); 389 if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){ 390 bits ^= 1; 391 j = i + 4; 392 for(k = 0; k < 6; k++) 393 c->ra[k] = strtoul(s+j+3*k, 0, 16); 394 }else if(strncmp(s+i, "SN=", 3) == 0){ 395 bits ^= 2; 396 c->serial = atoi(s+i+3); 397 } 398 i += l; 399 } 400 if(bits) 401 return -1; 402 return 0; 403 } 404 405 static ushort 406 pbit16(ushort i) 407 { 408 ushort j; 409 uchar *p; 410 411 p = (uchar*)&j; 412 p[1] = i; 413 p[0] = i>>8; 414 return j; 415 } 416 417 static ushort 418 gbit16(uchar i[2]) 419 { 420 ushort j; 421 422 j = i[1]; 423 j |= i[0]<<8; 424 return j; 425 } 426 427 static ulong 428 pbit32(ulong i) 429 { 430 ulong j; 431 uchar *p; 432 433 p = (uchar*)&j; 434 p[3] = i; 435 p[2] = i>>8; 436 p[1] = i>>16; 437 p[0] = i>>24; 438 return j; 439 } 440 441 static ulong 442 gbit32(uchar i[4]) 443 { 444 ulong j; 445 446 j = i[3]; 447 j |= i[2]<<8; 448 j |= i[1]<<16; 449 j |= i[0]<<24; 450 return j; 451 } 452 453 static void 454 prepcmd(ulong *cmd, int i) 455 { 456 while(i-- > 0) 457 cmd[i] = pbit32(cmd[i]); 458 } 459 460 /* 461 * the command looks like this (int 32bit integers) 462 * cmd type 463 * addr (low) 464 * addr (high) 465 * pad (used for dma testing) 466 * response (high) 467 * response (low) 468 * 40 byte = 5 int pad. 469 */ 470 471 ulong 472 cmd(Ctlr *c, int type, uvlong data) 473 { 474 ulong buf[16], i; 475 Cmd *cmd; 476 477 qlock(&c->cmdl); 478 cmd = c->cmd; 479 cmd->i[1] = Noconf; 480 memset(buf, 0, sizeof buf); 481 buf[0] = type; 482 buf[1] = data; 483 buf[2] = data >> 32; 484 buf[4] = c->cprt >> 32; 485 buf[5] = c->cprt; 486 prepcmd(buf, 6); 487 coherence(); 488 memmove(c->ram + Cmdoff, buf, sizeof buf); 489 490 if(waserror()) 491 nexterror(); 492 for(i = 0; i < 15; i++){ 493 if(cmd->i[1] != Noconf){ 494 poperror(); 495 i = gbit32(cmd->c); 496 qunlock(&c->cmdl); 497 if(cmd->i[1] != 0) 498 dprint("[%lux]", i); 499 return i; 500 } 501 tsleep(&up->sleep, return0, 0, 1); 502 } 503 qunlock(&c->cmdl); 504 iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n", 505 cmd->i[0], cmd->i[1], type); 506 error(Etimeout); 507 return ~0; /* silence! */ 508 } 509 510 ulong 511 maccmd(Ctlr *c, int type, uchar *m) 512 { 513 ulong buf[16], i; 514 Cmd *cmd; 515 516 qlock(&c->cmdl); 517 cmd = c->cmd; 518 cmd->i[1] = Noconf; 519 memset(buf, 0, sizeof buf); 520 buf[0] = type; 521 buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3]; 522 buf[2] = m[4]<< 8 | m[5]; 523 buf[4] = c->cprt >> 32; 524 buf[5] = c->cprt; 525 prepcmd(buf, 6); 526 coherence(); 527 memmove(c->ram + Cmdoff, buf, sizeof buf); 528 529 if(waserror()) 530 nexterror(); 531 for(i = 0; i < 15; i++){ 532 if(cmd->i[1] != Noconf){ 533 poperror(); 534 i = gbit32(cmd->c); 535 qunlock(&c->cmdl); 536 if(cmd->i[1] != 0) 537 dprint("[%lux]", i); 538 return i; 539 } 540 tsleep(&up->sleep, return0, 0, 1); 541 } 542 qunlock(&c->cmdl); 543 iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n", 544 cmd->i[0], cmd->i[1], type); 545 error(Etimeout); 546 return ~0; /* silence! */ 547 } 548 549 /* remove this garbage after testing */ 550 enum { 551 DMAread = 0x10000, 552 DMAwrite= 0x1, 553 }; 554 555 ulong 556 dmatestcmd(Ctlr *c, int type, uvlong addr, int len) 557 { 558 ulong buf[16], i; 559 560 memset(buf, 0, sizeof buf); 561 memset(c->cmd, Noconf, sizeof *c->cmd); 562 buf[0] = Cdmatest; 563 buf[1] = addr; 564 buf[2] = addr >> 32; 565 buf[3] = len * type; 566 buf[4] = c->cprt >> 32; 567 buf[5] = c->cprt; 568 prepcmd(buf, 6); 569 coherence(); 570 memmove(c->ram + Cmdoff, buf, sizeof buf); 571 572 if(waserror()) 573 nexterror(); 574 for(i = 0; i < 15; i++){ 575 if(c->cmd->i[1] != Noconf){ 576 i = gbit32(c->cmd->c); 577 if(i == 0) 578 error(Eio); 579 poperror(); 580 return i; 581 } 582 tsleep(&up->sleep, return0, 0, 5); 583 } 584 error(Etimeout); 585 return ~0; /* silence! */ 586 } 587 588 ulong 589 rdmacmd(Ctlr *c, int on) 590 { 591 ulong buf[16], i; 592 593 memset(buf, 0, sizeof buf); 594 c->cmd->i[0] = 0; 595 coherence(); 596 buf[0] = c->cprt >> 32; 597 buf[1] = c->cprt; 598 buf[2] = Noconf; 599 buf[3] = c->cprt >> 32; 600 buf[4] = c->cprt; 601 buf[5] = on; 602 prepcmd(buf, 6); 603 memmove(c->ram + Rdmaoff, buf, sizeof buf); 604 605 if(waserror()) 606 nexterror(); 607 for(i = 0; i < 20; i++){ 608 if(c->cmd->i[0] == Noconf){ 609 poperror(); 610 return gbit32(c->cmd->c); 611 } 612 tsleep(&up->sleep, return0, 0, 1); 613 } 614 error(Etimeout); 615 iprint("m10g: rdmacmd timeout\n"); 616 return ~0; /* silence! */ 617 } 618 619 static int 620 loadfw(Ctlr *c, int *align) 621 { 622 ulong *f, *s, sz; 623 int i; 624 625 if((*align = whichfw(c->pcidev)) == 4*KiB){ 626 f = (ulong*)fw4k; 627 sz = sizeof fw4k; 628 }else{ 629 f = (ulong*)fw2k; 630 sz = sizeof fw2k; 631 } 632 633 s = (ulong*)(c->ram + Fwoffset); 634 for(i = 0; i < sz / 4; i++) 635 s[i] = f[i]; 636 return sz & ~3; 637 } 638 639 static int 640 bootfw(Ctlr *c) 641 { 642 int i, sz, align; 643 ulong buf[16]; 644 Cmd* cmd; 645 646 if((sz = loadfw(c, &align)) == 0) 647 return 0; 648 dprint("bootfw %d bytes ... ", sz); 649 cmd = c->cmd; 650 651 memset(buf, 0, sizeof buf); 652 c->cmd->i[0] = 0; 653 coherence(); 654 buf[0] = c->cprt >> 32; /* upper dma target address */ 655 buf[1] = c->cprt; /* lower */ 656 buf[2] = Noconf; /* writeback */ 657 buf[3] = Fwoffset + 8, 658 buf[4] = sz - 8; 659 buf[5] = 8; 660 buf[6] = 0; 661 prepcmd(buf, 7); 662 coherence(); 663 memmove(c->ram + Fwsubmt, buf, sizeof buf); 664 665 for(i = 0; i < 20; i++){ 666 if(cmd->i[0] == Noconf) 667 break; 668 delay(1); 669 } 670 dprint("[%lux %lux]", gbit32(cmd->c), gbit32(cmd->c+4)); 671 if(i == 20){ 672 print("m10g: cannot load fw\n"); 673 return -1; 674 } 675 dprint("\n"); 676 c->tx.segsz = align; 677 return 0; 678 } 679 680 static int 681 kickthebaby(Pcidev *p, Ctlr *c) 682 { 683 /* don't kick the baby! */ 684 ulong code; 685 686 pcicfgw8(p, 0x10 + c->boot, 0x3); 687 pcicfgw32(p, 0x18 + c->boot, 0xfffffff0); 688 code = pcicfgr32(p, 0x14 + c->boot); 689 690 dprint("reboot status = %lux\n", code); 691 if(code != 0xfffffff0) 692 return -1; 693 return 0; 694 } 695 696 typedef struct { 697 uchar len[4]; 698 uchar type[4]; 699 char version[128]; 700 uchar globals[4]; 701 uchar ramsz[4]; 702 uchar specs[4]; 703 uchar specssz[4]; 704 } Fwhdr; 705 706 enum { 707 Tmx = 0x4d582020, 708 Tpcie = 0x70636965, 709 Teth = 0x45544820, 710 Tmcp0 = 0x4d435030, 711 }; 712 713 static char * 714 fwtype(ulong type) 715 { 716 switch(type){ 717 case Tmx: 718 return "mx"; 719 case Tpcie: 720 return "PCIe"; 721 case Teth: 722 return "eth"; 723 case Tmcp0: 724 return "mcp0"; 725 } 726 return "*GOK*"; 727 } 728 729 static int 730 chkfw(Ctlr *c) 731 { 732 ulong off, type; 733 Fwhdr *h; 734 735 off = gbit32(c->ram+0x3c); 736 dprint("firmware %lux\n", off); 737 if((off&3) || off + sizeof *h > c->ramsz){ 738 print("!m10g: bad firmware %lux\n", off); 739 return -1; 740 } 741 h = (Fwhdr*)(c->ram + off); 742 type = gbit32(h->type); 743 dprint("\t" "type %s\n", fwtype(type)); 744 dprint("\t" "vers %s\n", h->version); 745 dprint("\t" "ramsz %lux\n", gbit32(h->ramsz)); 746 if(type != Teth){ 747 print("!m10g: bad card type %s\n", fwtype(type)); 748 return -1; 749 } 750 751 return bootfw(c) || rdmacmd(c, 0); 752 } 753 754 static int 755 reset(Ether *e, Ctlr *c) 756 { 757 ulong i, sz; 758 759 if(waserror()){ 760 print("m10g: reset error\n"); 761 nexterror(); 762 return -1; 763 } 764 765 chkfw(c); 766 cmd(c, Creset, 0); 767 768 cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry); 769 cmd(c, CSintrqdma, c->done.busaddr); 770 c->irqack = (ulong*)(c->ram + cmd(c, CGirqackoff, 0)); 771 /* required only if we're not doing msi? */ 772 c->irqdeass = (ulong*)(c->ram + cmd(c, CGirqdeassoff, 0)); 773 /* this is the driver default, why fiddle with this? */ 774 c->coal = (ulong*)(c->ram + cmd(c, CGcoaloff, 0)); 775 *c->coal = pbit32(25); 776 777 dprint("dma stats:\n"); 778 rdmacmd(c, 1); 779 sz = c->tx.segsz; 780 i = dmatestcmd(c, DMAread, c->done.busaddr, sz); 781 print("m10g: read %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff)); 782 i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz); 783 print(" write %lud MB/s;", ((i>>16)*sz*2) / (i&0xffff)); 784 i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz); 785 print(" r/w %lud MB/s\n", ((i>>16)*sz*2*2) / (i&0xffff)); 786 memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry); 787 788 maccmd(c, CSmac, c->ra); 789 // cmd(c, Cnopromisc, 0); 790 cmd(c, Cenablefc, 0); 791 e->maxmtu = Maxmtu; 792 cmd(c, CSmtu, e->maxmtu); 793 dprint("CSmtu %d...\n", e->maxmtu); 794 795 poperror(); 796 return 0; 797 } 798 799 static void 800 ctlrfree(Ctlr *c) 801 { 802 /* free up all the Block*s, too */ 803 free(c->tx.host); 804 free(c->sm.host); 805 free(c->bg.host); 806 free(c->cmd); 807 free(c->done.entry); 808 free(c->stats); 809 free(c); 810 } 811 812 static int 813 setmem(Pcidev *p, Ctlr *c) 814 { 815 ulong i; 816 uvlong raddr; 817 Done *d; 818 void *mem; 819 820 c->tx.segsz = 2048; 821 c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100; 822 if(c->ramsz > p->mem[0].size) 823 return -1; 824 825 raddr = p->mem[0].bar & ~0x0F; 826 mem = vmap(raddr, p->mem[0].size); 827 if(mem == nil){ 828 print("m10g: can't map %8.8lux\n", p->mem[0].bar); 829 return -1; 830 } 831 dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size); 832 c->port = raddr; 833 c->ram = mem; 834 c->cmd = malign(sizeof *c->cmd); 835 c->cprt = PCIWADDR(c->cmd); 836 837 d = &c->done; 838 d->n = Maxslots; 839 d->m = d->n - 1; 840 i = d->n * sizeof *d->entry; 841 d->entry = malign(i); 842 memset(d->entry, 0, i); 843 d->busaddr = PCIWADDR(d->entry); 844 845 c->stats = malign(sizeof *c->stats); 846 memset(c->stats, 0, sizeof *c->stats); 847 c->statsprt = PCIWADDR(c->stats); 848 849 memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2); 850 return setpcie(p) || parseeprom(c); 851 } 852 853 static Rx* 854 whichrx(Ctlr *c, int sz) 855 { 856 if(sz <= smpool.size) 857 return &c->sm; 858 return &c->bg; 859 } 860 861 static Block* 862 balloc(Rx* rx) 863 { 864 Block *bp; 865 866 ilock(rx->pool); 867 if((bp = rx->pool->head) != nil){ 868 rx->pool->head = bp->next; 869 bp->next = nil; 870 _xinc(&bp->ref); /* prevent bp from being freed */ 871 rx->pool->n--; 872 } 873 iunlock(rx->pool); 874 return bp; 875 } 876 877 static void 878 rbfree(Block *b, Bpool *p) 879 { 880 b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base); 881 b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck); 882 883 ilock(p); 884 b->next = p->head; 885 p->head = b; 886 p->n++; 887 p->cnt++; 888 iunlock(p); 889 } 890 891 static void 892 smbfree(Block *b) 893 { 894 rbfree(b, &smpool); 895 } 896 897 static void 898 bgbfree(Block *b) 899 { 900 rbfree(b, &bgpool); 901 } 902 903 static void 904 replenish(Rx *rx) 905 { 906 ulong buf[16], i, idx, e; 907 Bpool *p; 908 Block *b; 909 910 p = rx->pool; 911 if(p->n < 8) 912 return; 913 memset(buf, 0, sizeof buf); 914 e = (rx->i - rx->cnt) & ~7; 915 e += rx->n; 916 while(p->n >= 8 && e){ 917 idx = rx->cnt & rx->m; 918 for(i = 0; i < 8; i++){ 919 b = balloc(rx); 920 buf[i*2] = pbit32((uvlong)PCIWADDR(b->wp) >> 32); 921 buf[i*2+1] = pbit32(PCIWADDR(b->wp)); 922 rx->host[idx+i] = b; 923 assert(b); 924 } 925 memmove(rx->lanai + 2*idx, buf, sizeof buf); 926 coherence(); 927 rx->cnt += 8; 928 e -= 8; 929 } 930 if(e && p->n > 7+1) 931 print("m10g: should panic? pool->n = %d\n", p->n); 932 } 933 934 /* 935 * future: 936 * if (c->mtrr >= 0) { 937 * c->tx.wcfifo = c->ram+0x200000; 938 * c->sm.wcfifo = c->ram+0x300000; 939 * c->bg.wcfifo = c->ram+0x340000; 940 * } 941 */ 942 943 static int 944 nextpow(int j) 945 { 946 int i; 947 948 for(i = 0; j > (1 << i); i++) 949 ; 950 return 1 << i; 951 } 952 953 static void* 954 emalign(int sz) 955 { 956 void *v; 957 958 v = malign(sz); 959 if(v == nil) 960 error(Enomem); 961 memset(v, 0, sz); 962 return v; 963 } 964 965 static void 966 open0(Ether *e, Ctlr *c) 967 { 968 Block *b; 969 int i, sz, entries; 970 971 entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai; 972 c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0)); 973 c->tx.host = emalign(entries * sizeof *c->tx.host); 974 c->tx.bring = emalign(entries * sizeof *c->tx.bring); 975 c->tx.n = entries; 976 c->tx.m = entries-1; 977 978 entries = cmd(c, CGrxrgsz, 0)/8; 979 c->sm.pool = &smpool; 980 cmd(c, CSsmallsz, c->sm.pool->size); 981 c->sm.lanai = (ulong*)(c->ram + cmd(c, CGsmallrxoff, 0)); 982 c->sm.n = entries; 983 c->sm.m = entries-1; 984 c->sm.host = emalign(entries * sizeof *c->sm.host); 985 986 c->bg.pool = &bgpool; 987 c->bg.pool->size = nextpow(2 + e->maxmtu); /* 2-byte alignment pad */ 988 cmd(c, CSbigsz, c->bg.pool->size); 989 c->bg.lanai = (ulong*)(c->ram + cmd(c, CGbigrxoff, 0)); 990 c->bg.n = entries; 991 c->bg.m = entries-1; 992 c->bg.host = emalign(entries * sizeof *c->bg.host); 993 994 sz = c->sm.pool->size + BY2PG; 995 for(i = 0; i < c->sm.n; i++){ 996 if((b = allocb(sz)) == 0) 997 break; 998 b->free = smbfree; 999 freeb(b); 1000 } 1001 sz = c->bg.pool->size + BY2PG; 1002 for(i = 0; i < c->bg.n; i++){ 1003 if((b = allocb(sz)) == 0) 1004 break; 1005 b->free = bgbfree; 1006 freeb(b); 1007 } 1008 1009 cmd(c, CSstatsdma, c->statsprt); 1010 c->linkstat = ~0; 1011 c->nrdma = 15; 1012 1013 cmd(c, Cetherup, 0); 1014 } 1015 1016 static Block* 1017 nextblock(Ctlr *c) 1018 { 1019 uint i; 1020 ushort l, k; 1021 Block *b; 1022 Done *d; 1023 Rx *rx; 1024 Slot *s; 1025 Slotparts *sp; 1026 1027 d = &c->done; 1028 s = d->entry; 1029 i = d->i & d->m; 1030 sp = (Slotparts *)(s + i); 1031 l = sp->len; 1032 if(l == 0) 1033 return 0; 1034 k = sp->cksum; 1035 s[i] = 0; 1036 d->i++; 1037 l = gbit16((uchar*)&l); 1038 //dprint("nextb: i=%d l=%d\n", d->i, l); 1039 rx = whichrx(c, l); 1040 if(rx->i >= rx->cnt){ 1041 iprint("m10g: overrun\n"); 1042 return 0; 1043 } 1044 i = rx->i & rx->m; 1045 b = rx->host[i]; 1046 rx->host[i] = 0; 1047 if(b == 0){ 1048 iprint("m10g: error rx to no block. memory is hosed.\n"); 1049 return 0; 1050 } 1051 rx->i++; 1052 1053 b->flag |= Bipck|Btcpck|Budpck; 1054 b->checksum = k; 1055 b->rp += 2; 1056 b->wp += 2+l; 1057 b->lim = b->wp; /* lie like a dog. */ 1058 return b; 1059 } 1060 1061 static int 1062 rxcansleep(void *v) 1063 { 1064 Ctlr *c; 1065 Slot *s; 1066 Slotparts *sp; 1067 Done *d; 1068 1069 c = v; 1070 d = &c->done; 1071 s = c->done.entry; 1072 sp = (Slotparts *)(s + (d->i & d->m)); 1073 if(sp->len != 0) 1074 return -1; 1075 c->irqack[0] = pbit32(3); 1076 return 0; 1077 } 1078 1079 static void 1080 m10rx(void *v) 1081 { 1082 Ether *e; 1083 Ctlr *c; 1084 Block *b; 1085 1086 e = v; 1087 c = e->ctlr; 1088 for(;;){ 1089 replenish(&c->sm); 1090 replenish(&c->bg); 1091 sleep(&c->rxrendez, rxcansleep, c); 1092 while(b = nextblock(c)) 1093 etheriq(e, b, 1); 1094 } 1095 } 1096 1097 static void 1098 txcleanup(Tx *tx, ulong n) 1099 { 1100 Block *b; 1101 uint j, l, m; 1102 1103 if(tx->npkt == n) 1104 return; 1105 l = 0; 1106 m = tx->m; 1107 /* 1108 * if tx->cnt == tx->i, yet tx->npkt == n-1, we just 1109 * caught ourselves and myricom card updating. 1110 */ 1111 for(;; tx->cnt++){ 1112 j = tx->cnt & tx->m; 1113 if(b = tx->bring[j]){ 1114 tx->bring[j] = 0; 1115 tx->nbytes += BLEN(b); 1116 freeb(b); 1117 if(++tx->npkt == n) 1118 return; 1119 } 1120 if(tx->cnt == tx->i) 1121 return; 1122 if(l++ == m){ 1123 iprint("m10g: tx ovrun: %lud %lud\n", n, tx->npkt); 1124 return; 1125 } 1126 } 1127 } 1128 1129 static int 1130 txcansleep(void *v) 1131 { 1132 Ctlr *c; 1133 1134 c = v; 1135 if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt)) 1136 return -1; 1137 return 0; 1138 } 1139 1140 static void 1141 txproc(void *v) 1142 { 1143 Ether *e; 1144 Ctlr *c; 1145 Tx *tx; 1146 1147 e = v; 1148 c = e->ctlr; 1149 tx = &c->tx; 1150 for(;;){ 1151 sleep(&c->txrendez, txcansleep, c); 1152 txcleanup(tx, gbit32(c->stats->txcnt)); 1153 } 1154 } 1155 1156 static void 1157 submittx(Tx *tx, int n) 1158 { 1159 Send *l, *h; 1160 int i0, i, m; 1161 1162 m = tx->m; 1163 i0 = tx->i & m; 1164 l = tx->lanai; 1165 h = tx->host; 1166 for(i = n-1; i >= 0; i--) 1167 memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h); 1168 tx->i += n; 1169 // coherence(); 1170 } 1171 1172 static int 1173 nsegments(Block *b, int segsz) 1174 { 1175 uintptr bus, end, slen, len; 1176 int i; 1177 1178 bus = PCIWADDR(b->rp); 1179 i = 0; 1180 for(len = BLEN(b); len; len -= slen){ 1181 end = bus + segsz & ~(segsz-1); 1182 slen = end - bus; 1183 if(slen > len) 1184 slen = len; 1185 bus += slen; 1186 i++; 1187 } 1188 return i; 1189 } 1190 1191 static void 1192 m10gtransmit(Ether *e) 1193 { 1194 ushort slen; 1195 ulong i, cnt, rdma, nseg, count, end, bus, len, segsz; 1196 uchar flags; 1197 Block *b; 1198 Ctlr *c; 1199 Send *s, *s0, *s0m8; 1200 Tx *tx; 1201 1202 c = e->ctlr; 1203 tx = &c->tx; 1204 segsz = tx->segsz; 1205 1206 qlock(tx); 1207 count = 0; 1208 s = tx->host + (tx->i & tx->m); 1209 cnt = tx->cnt; 1210 s0 = tx->host + (cnt & tx->m); 1211 s0m8 = tx->host + ((cnt - 8) & tx->m); 1212 i = tx->i; 1213 for(; s >= s0 || s < s0m8; i += nseg){ 1214 if((b = qget(e->oq)) == nil) 1215 break; 1216 flags = SFfirst|SFnotso; 1217 if((len = BLEN(b)) < 1520) 1218 flags |= SFsmall; 1219 rdma = nseg = nsegments(b, segsz); 1220 bus = PCIWADDR(b->rp); 1221 for(; len; len -= slen){ 1222 end = (bus + segsz) & ~(segsz-1); 1223 slen = end - bus; 1224 if(slen > len) 1225 slen = len; 1226 s->low = pbit32(bus); 1227 s->len = pbit16(slen); 1228 s->nrdma = rdma; 1229 s->flags = flags; 1230 1231 bus += slen; 1232 if(++s == tx->host + tx->n) 1233 s = tx->host; 1234 count++; 1235 flags &= ~SFfirst; 1236 rdma = 1; 1237 } 1238 tx->bring[(i + nseg - 1) & tx->m] = b; 1239 if(1 || count > 0){ 1240 submittx(tx, count); 1241 count = 0; 1242 cnt = tx->cnt; 1243 s0 = tx->host + (cnt & tx->m); 1244 s0m8 = tx->host + ((cnt - 8) & tx->m); 1245 } 1246 } 1247 qunlock(tx); 1248 } 1249 1250 static void 1251 checkstats(Ether *e, Ctlr *c, Stats *s) 1252 { 1253 ulong i; 1254 1255 if(s->updated == 0) 1256 return; 1257 1258 i = gbit32(s->linkstat); 1259 if(c->linkstat != i){ 1260 e->link = i; 1261 if(c->linkstat = i) 1262 dprint("m10g: link up\n"); 1263 else 1264 dprint("m10g: link down\n"); 1265 } 1266 i = gbit32(s->nrdma); 1267 if(i != c->nrdma){ 1268 dprint("m10g: rdma timeout %ld\n", i); 1269 c->nrdma = i; 1270 } 1271 } 1272 1273 static void 1274 waitintx(Ctlr *c) 1275 { 1276 int i; 1277 1278 for(i = 0; i < 1024*1024; i++){ 1279 if(c->stats->valid == 0) 1280 break; 1281 coherence(); 1282 } 1283 } 1284 1285 static void 1286 m10ginterrupt(Ureg *, void *v) 1287 { 1288 Ether *e; 1289 Ctlr *c; 1290 1291 e = v; 1292 c = e->ctlr; 1293 1294 if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */ 1295 return; 1296 1297 if(c->stats->valid & 1) 1298 wakeup(&c->rxrendez); 1299 if(gbit32(c->stats->txcnt) != c->tx.npkt) 1300 wakeup(&c->txrendez); 1301 if(c->msi == 0) 1302 *c->irqdeass = 0; 1303 else 1304 c->stats->valid = 0; 1305 waitintx(c); 1306 checkstats(e, c, c->stats); 1307 c->irqack[1] = pbit32(3); 1308 } 1309 1310 static void 1311 m10gattach(Ether *e) 1312 { 1313 Ctlr *c; 1314 char name[12]; 1315 1316 dprint("m10gattach\n"); 1317 1318 qlock(e->ctlr); 1319 c = e->ctlr; 1320 if(c->state != Detached){ 1321 qunlock(c); 1322 return; 1323 } 1324 if(waserror()){ 1325 c->state = Detached; 1326 qunlock(c); 1327 nexterror(); 1328 } 1329 reset(e, c); 1330 c->state = Attached; 1331 open0(e, c); 1332 if(c->kprocs == 0){ 1333 c->kprocs++; 1334 snprint(name, sizeof name, "#l%drxproc", e->ctlrno); 1335 kproc(name, m10rx, e); 1336 snprint(name, sizeof name, "#l%dtxproc", e->ctlrno); 1337 kproc(name, txproc, e); 1338 } 1339 c->state = Runed; 1340 qunlock(c); 1341 poperror(); 1342 } 1343 1344 static int 1345 m10gdetach(Ctlr *c) 1346 { 1347 dprint("m10gdetach\n"); 1348 // reset(e->ctlr); 1349 vunmap(c->ram, c->pcidev->mem[0].size); 1350 ctlrfree(c); 1351 return -1; 1352 } 1353 1354 static int 1355 lstcount(Block *b) 1356 { 1357 int i; 1358 1359 i = 0; 1360 for(; b; b = b->next) 1361 i++; 1362 return i; 1363 } 1364 1365 static long 1366 m10gifstat(Ether *e, void *v, long n, ulong off) 1367 { 1368 int l; 1369 char *p; 1370 Ctlr *c; 1371 Stats s; 1372 1373 c = e->ctlr; 1374 p = malloc(READSTR+1); 1375 l = 0; 1376 /* no point in locking this because this is done via dma. */ 1377 memmove(&s, c->stats, sizeof s); 1378 1379 // l += 1380 snprint(p+l, READSTR, 1381 "txcnt = %lud\n" "linkstat = %lud\n" "dlink = %lud\n" 1382 "derror = %lud\n" "drunt = %lud\n" "doverrun = %lud\n" 1383 "dnosm = %lud\n" "dnobg = %lud\n" "nrdma = %lud\n" 1384 "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n" 1385 "valid = %ud\n\n" 1386 "tx pkt = %lud\n" "tx bytes = %lld\n" 1387 "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n" 1388 "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n" 1389 "sm lst = %ud\n" 1390 "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n" 1391 "bg lst = %ud\n" 1392 "segsz = %lud\n" "coal = %lud\n", 1393 gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink), 1394 gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun), 1395 gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma), 1396 s.txstopped, s.down, s.updated, s.valid, 1397 c->tx.npkt, c->tx.nbytes, 1398 c->tx.cnt, c->tx.n, c->tx.i, 1399 c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head), 1400 c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head), 1401 c->tx.segsz, gbit32((uchar*)c->coal)); 1402 1403 n = readstr(off, v, n, p); 1404 free(p); 1405 return n; 1406 } 1407 1408 //static void 1409 //summary(Ether *e) 1410 //{ 1411 // char *buf; 1412 // int n, i, j; 1413 // 1414 // if(e == 0) 1415 // return; 1416 // buf = malloc(n=250); 1417 // if(buf == 0) 1418 // return; 1419 // 1420 // snprint(buf, n, "oq\n"); 1421 // qsummary(e->oq, buf+3, n-3-1); 1422 // iprint("%s", buf); 1423 // 1424 // if(e->f) for(i = 0; e->f[i]; i++){ 1425 // j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type); 1426 // qsummary(e->f[i]->in, buf+j, n-j-1); 1427 // print("%s", buf); 1428 // } 1429 // 1430 // free(buf); 1431 //} 1432 1433 static void 1434 rxring(Ctlr *c) 1435 { 1436 Done *d; 1437 Slot *s; 1438 Slotparts *sp; 1439 int i; 1440 1441 d = &c->done; 1442 s = d->entry; 1443 for(i = 0; i < d->n; i++) { 1444 sp = (Slotparts *)(s + i); 1445 if(sp->len) 1446 iprint("s[%d] = %d\n", i, sp->len); 1447 } 1448 } 1449 1450 enum { 1451 CMdebug, 1452 CMcoal, 1453 CMwakeup, 1454 CMtxwakeup, 1455 CMqsummary, 1456 CMrxring, 1457 }; 1458 1459 static Cmdtab ctab[] = { 1460 CMdebug, "debug", 2, 1461 CMcoal, "coal", 2, 1462 CMwakeup, "wakeup", 1, 1463 CMtxwakeup, "txwakeup", 1, 1464 // CMqsummary, "q", 1, 1465 CMrxring, "rxring", 1, 1466 }; 1467 1468 static long 1469 m10gctl(Ether *e, void *v, long n) 1470 { 1471 int i; 1472 Cmdbuf *c; 1473 Cmdtab *t; 1474 1475 dprint("m10gctl\n"); 1476 if(e->ctlr == nil) 1477 error(Enonexist); 1478 1479 c = parsecmd(v, n); 1480 if(waserror()){ 1481 free(c); 1482 nexterror(); 1483 } 1484 t = lookupcmd(c, ctab, nelem(ctab)); 1485 switch(t->index){ 1486 case CMdebug: 1487 debug = (strcmp(c->f[1], "on") == 0); 1488 break; 1489 case CMcoal: 1490 i = atoi(c->f[1]); 1491 if(i < 0 || i > 1000) 1492 error(Ebadarg); 1493 *((Ctlr*)e->ctlr)->coal = pbit32(i); 1494 break; 1495 case CMwakeup: 1496 wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */ 1497 break; 1498 case CMtxwakeup: 1499 wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */ 1500 break; 1501 // case CMqsummary: 1502 // summary(e); 1503 // break; 1504 case CMrxring: 1505 rxring(e->ctlr); 1506 break; 1507 default: 1508 error(Ebadarg); 1509 } 1510 free(c); 1511 poperror(); 1512 return n; 1513 } 1514 1515 static void 1516 m10gshutdown(Ether *e) 1517 { 1518 dprint("m10gshutdown\n"); 1519 m10gdetach(e->ctlr); 1520 } 1521 1522 static void 1523 m10gpromiscuous(void *v, int on) 1524 { 1525 Ether *e; 1526 int i; 1527 1528 dprint("m10gpromiscuous\n"); 1529 e = v; 1530 if(on) 1531 i = Cpromisc; 1532 else 1533 i = Cnopromisc; 1534 cmd(e->ctlr, i, 0); 1535 } 1536 1537 static int mcctab[] = { CSleavemc, CSjoinmc }; 1538 static char *mcntab[] = { "leave", "join" }; 1539 1540 static void 1541 m10gmulticast(void *v, uchar *ea, int on) 1542 { 1543 Ether *e; 1544 int i; 1545 1546 dprint("m10gmulticast\n"); 1547 e = v; 1548 if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0) 1549 print("m10g: can't %s %E: %d\n", mcntab[on], ea, i); 1550 } 1551 1552 static void 1553 m10gpci(void) 1554 { 1555 Pcidev *p; 1556 Ctlr *t, *c; 1557 1558 t = 0; 1559 for(p = 0; p = pcimatch(p, 0x14c1, 0x0008); ){ 1560 c = malloc(sizeof *c); 1561 if(c == nil) 1562 continue; 1563 c->pcidev = p; 1564 c->id = p->did<<16 | p->vid; 1565 c->boot = pcicap(p, PciCapVND); 1566 // kickthebaby(p, c); 1567 pcisetbme(p); 1568 if(setmem(p, c) == -1){ 1569 print("m10g: setmem failed\n"); 1570 free(c); 1571 /* cleanup */ 1572 continue; 1573 } 1574 if(t) 1575 t->next = c; 1576 else 1577 ctlrs = c; 1578 t = c; 1579 } 1580 } 1581 1582 static int 1583 m10gpnp(Ether *e) 1584 { 1585 Ctlr *c; 1586 1587 if(ctlrs == nil) 1588 m10gpci(); 1589 1590 for(c = ctlrs; c != nil; c = c->next) 1591 if(c->active) 1592 continue; 1593 else if(e->port == 0 || e->port == c->port) 1594 break; 1595 if(c == nil) 1596 return -1; 1597 c->active = 1; 1598 1599 e->ctlr = c; 1600 e->port = c->port; 1601 e->irq = c->pcidev->intl; 1602 e->tbdf = c->pcidev->tbdf; 1603 e->mbps = 10000; 1604 memmove(e->ea, c->ra, Eaddrlen); 1605 1606 e->attach = m10gattach; 1607 e->detach = m10gshutdown; 1608 e->transmit = m10gtransmit; 1609 e->interrupt = m10ginterrupt; 1610 e->ifstat = m10gifstat; 1611 e->ctl = m10gctl; 1612 // e->power = m10gpower; 1613 e->shutdown = m10gshutdown; 1614 1615 e->arg = e; 1616 e->promiscuous = m10gpromiscuous; 1617 e->multicast = m10gmulticast; 1618 1619 return 0; 1620 } 1621 1622 void 1623 etherm10glink(void) 1624 { 1625 addethercard("m10g", m10gpnp); 1626 } 1627