1 /* 2 * IPv4 Ethernet bridge 3 */ 4 #include "u.h" 5 #include "../port/lib.h" 6 #include "mem.h" 7 #include "dat.h" 8 #include "fns.h" 9 #include "../ip/ip.h" 10 #include "../port/netif.h" 11 #include "../port/error.h" 12 13 typedef struct Bridge Bridge; 14 typedef struct Port Port; 15 typedef struct Centry Centry; 16 typedef struct Iphdr Iphdr; 17 typedef struct Tcphdr Tcphdr; 18 19 enum 20 { 21 Qtopdir= 1, /* top level directory */ 22 23 Qbridgedir, /* bridge* directory */ 24 Qbctl, 25 Qstats, 26 Qcache, 27 Qlog, 28 29 Qportdir, /* directory for a protocol */ 30 Qpctl, 31 Qlocal, 32 Qstatus, 33 34 MaxQ, 35 36 Maxbridge= 4, 37 Maxport= 128, // power of 2 38 CacheHash= 257, // prime 39 CacheLook= 5, // how many cache entries to examine 40 CacheSize= (CacheHash+CacheLook-1), 41 CacheTimeout= 5*60, // timeout for cache entry in seconds 42 43 TcpMssMax = 1300, // max desirable Tcp MSS value 44 TunnelMtu = 1400, 45 }; 46 47 static Dirtab bridgedirtab[]={ 48 "ctl", {Qbctl}, 0, 0666, 49 "stats", {Qstats}, 0, 0444, 50 "cache", {Qcache}, 0, 0444, 51 "log", {Qlog}, 0, 0666, 52 }; 53 54 static Dirtab portdirtab[]={ 55 "ctl", {Qpctl}, 0, 0666, 56 "local", {Qlocal}, 0, 0444, 57 "status", {Qstatus}, 0, 0444, 58 }; 59 60 enum { 61 Logcache= (1<<0), 62 Logmcast= (1<<1), 63 }; 64 65 // types of interfaces 66 enum 67 { 68 Tether, 69 Ttun, 70 }; 71 72 static Logflag logflags[] = 73 { 74 { "cache", Logcache, }, 75 { "multicast", Logmcast, }, 76 { nil, 0, }, 77 }; 78 79 static Dirtab *dirtab[MaxQ]; 80 81 #define TYPE(x) (((ulong)(x).path) & 0xff) 82 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1)) 83 #define QID(x, y) (((x)<<8) | (y)) 84 85 struct Centry 86 { 87 uchar d[Eaddrlen]; 88 int port; 89 long expire; // entry expires this many seconds after bootime 90 long src; 91 long dst; 92 }; 93 94 struct Bridge 95 { 96 QLock; 97 int nport; 98 Port *port[Maxport]; 99 Centry cache[CacheSize]; 100 ulong hit; 101 ulong miss; 102 ulong copy; 103 long delay0; // constant microsecond delay per packet 104 long delayn; // microsecond delay per byte 105 int tcpmss; // modify tcpmss value 106 107 Log; 108 }; 109 110 struct Port 111 { 112 int id; 113 Bridge *bridge; 114 int ref; 115 int closed; 116 117 Chan *data[2]; // channel to data 118 119 Proc *readp; // read proc 120 121 // the following uniquely identifies the port 122 int type; 123 char name[KNAMELEN]; 124 125 // owner hash - avoids bind/unbind races 126 ulong ownhash; 127 128 // various stats 129 int in; // number of packets read 130 int inmulti; // multicast or broadcast 131 int inunknown; // unknown address 132 int out; // number of packets read 133 int outmulti; // multicast or broadcast 134 int outunknown; // unknown address 135 int outfrag; // fragmented the packet 136 int nentry; // number of cache entries for this port 137 }; 138 139 enum { 140 IP_TCPPROTO = 6, 141 EOLOPT = 0, 142 NOOPOPT = 1, 143 MSSOPT = 2, 144 MSS_LENGTH = 4, /* Mean segment size */ 145 SYN = 0x02, /* Pkt. is synchronise */ 146 IPHDR = 20, /* sizeof(Iphdr) */ 147 }; 148 149 struct Iphdr 150 { 151 uchar vihl; /* Version and header length */ 152 uchar tos; /* Type of service */ 153 uchar length[2]; /* packet length */ 154 uchar id[2]; /* ip->identification */ 155 uchar frag[2]; /* Fragment information */ 156 uchar ttl; /* Time to live */ 157 uchar proto; /* Protocol */ 158 uchar cksum[2]; /* Header checksum */ 159 uchar src[4]; /* IP source */ 160 uchar dst[4]; /* IP destination */ 161 }; 162 163 struct Tcphdr 164 { 165 uchar sport[2]; 166 uchar dport[2]; 167 uchar seq[4]; 168 uchar ack[4]; 169 uchar flag[2]; 170 uchar win[2]; 171 uchar cksum[2]; 172 uchar urg[2]; 173 }; 174 175 static Bridge bridgetab[Maxbridge]; 176 177 static int m2p[] = { 178 [OREAD] 4, 179 [OWRITE] 2, 180 [ORDWR] 6 181 }; 182 183 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp); 184 static void portbind(Bridge *b, int argc, char *argv[]); 185 static void portunbind(Bridge *b, int argc, char *argv[]); 186 static void etherread(void *a); 187 static char *cachedump(Bridge *b); 188 static void portfree(Port *port); 189 static void cacheflushport(Bridge *b, int port); 190 static void etherwrite(Port *port, Block *bp); 191 192 extern ulong parseip(uchar*, char*); 193 extern ushort ipcsum(uchar *addr); 194 195 static void 196 bridgeinit(void) 197 { 198 int i; 199 Dirtab *dt; 200 201 // setup dirtab with non directory entries 202 for(i=0; i<nelem(bridgedirtab); i++) { 203 dt = bridgedirtab + i; 204 dirtab[TYPE(dt->qid)] = dt; 205 } 206 for(i=0; i<nelem(portdirtab); i++) { 207 dt = portdirtab + i; 208 dirtab[TYPE(dt->qid)] = dt; 209 } 210 } 211 212 static Chan* 213 bridgeattach(char* spec) 214 { 215 Chan *c; 216 int dev; 217 218 dev = atoi(spec); 219 if(dev<0 || dev >= Maxbridge) 220 error("bad specification"); 221 222 c = devattach('B', spec); 223 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR); 224 c->dev = dev; 225 return c; 226 } 227 228 static Walkqid* 229 bridgewalk(Chan *c, Chan *nc, char **name, int nname) 230 { 231 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen); 232 } 233 234 static int 235 bridgestat(Chan* c, uchar* db, int n) 236 { 237 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen); 238 } 239 240 static Chan* 241 bridgeopen(Chan* c, int omode) 242 { 243 int perm; 244 Bridge *b; 245 246 omode &= 3; 247 perm = m2p[omode]; 248 USED(perm); 249 250 b = bridgetab + c->dev; 251 USED(b); 252 253 switch(TYPE(c->qid)) { 254 default: 255 break; 256 case Qlog: 257 logopen(b); 258 break; 259 case Qcache: 260 c->aux = cachedump(b); 261 break; 262 } 263 c->mode = openmode(omode); 264 c->flag |= COPEN; 265 c->offset = 0; 266 return c; 267 } 268 269 static void 270 bridgeclose(Chan* c) 271 { 272 Bridge *b = bridgetab + c->dev; 273 274 switch(TYPE(c->qid)) { 275 case Qcache: 276 if(c->flag & COPEN) 277 free(c->aux); 278 break; 279 case Qlog: 280 if(c->flag & COPEN) 281 logclose(b); 282 break; 283 } 284 } 285 286 static long 287 bridgeread(Chan *c, void *a, long n, vlong off) 288 { 289 char buf[256]; 290 Bridge *b = bridgetab + c->dev; 291 Port *port; 292 int i, ingood, outgood; 293 294 USED(off); 295 switch(TYPE(c->qid)) { 296 default: 297 error(Eperm); 298 case Qtopdir: 299 case Qbridgedir: 300 case Qportdir: 301 return devdirread(c, a, n, 0, 0, bridgegen); 302 case Qlog: 303 return logread(b, a, off, n); 304 case Qstatus: 305 qlock(b); 306 port = b->port[PORT(c->qid)]; 307 if(port == 0) 308 strcpy(buf, "unbound\n"); 309 else { 310 i = 0; 311 switch(port->type) { 312 default: 313 panic("bridgeread: unknown port type: %d", 314 port->type); 315 case Tether: 316 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name); 317 break; 318 case Ttun: 319 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name); 320 break; 321 } 322 ingood = port->in - port->inmulti - port->inunknown; 323 outgood = port->out - port->outmulti - port->outunknown; 324 i += snprint(buf+i, sizeof(buf)-i, 325 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n", 326 port->in, ingood, port->inmulti, port->inunknown, 327 port->out, outgood, port->outmulti, 328 port->outunknown, port->outfrag); 329 USED(i); 330 } 331 n = readstr(off, a, n, buf); 332 qunlock(b); 333 return n; 334 case Qbctl: 335 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n", 336 b->tcpmss ? "set" : "clear", b->delay0, b->delayn); 337 n = readstr(off, a, n, buf); 338 return n; 339 case Qcache: 340 n = readstr(off, a, n, c->aux); 341 return n; 342 case Qstats: 343 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n", 344 b->hit, b->miss, b->copy); 345 n = readstr(off, a, n, buf); 346 return n; 347 } 348 } 349 350 static void 351 bridgeoption(Bridge *b, char *option, int value) 352 { 353 if(strcmp(option, "tcpmss") == 0) 354 b->tcpmss = value; 355 else 356 error("unknown bridge option"); 357 } 358 359 360 static long 361 bridgewrite(Chan *c, void *a, long n, vlong off) 362 { 363 Bridge *b = bridgetab + c->dev; 364 Cmdbuf *cb; 365 char *arg0, *p; 366 367 USED(off); 368 switch(TYPE(c->qid)) { 369 default: 370 error(Eperm); 371 case Qbctl: 372 cb = parsecmd(a, n); 373 qlock(b); 374 if(waserror()) { 375 qunlock(b); 376 free(cb); 377 nexterror(); 378 } 379 if(cb->nf == 0) 380 error("short write"); 381 arg0 = cb->f[0]; 382 if(strcmp(arg0, "bind") == 0) { 383 portbind(b, cb->nf-1, cb->f+1); 384 } else if(strcmp(arg0, "unbind") == 0) { 385 portunbind(b, cb->nf-1, cb->f+1); 386 } else if(strcmp(arg0, "cacheflush") == 0) { 387 log(b, Logcache, "cache flush\n"); 388 memset(b->cache, 0, CacheSize*sizeof(Centry)); 389 } else if(strcmp(arg0, "set") == 0) { 390 if(cb->nf != 2) 391 error("usage: set option"); 392 bridgeoption(b, cb->f[1], 1); 393 } else if(strcmp(arg0, "clear") == 0) { 394 if(cb->nf != 2) 395 error("usage: clear option"); 396 bridgeoption(b, cb->f[1], 0); 397 } else if(strcmp(arg0, "delay") == 0) { 398 if(cb->nf != 3) 399 error("usage: delay delay0 delayn"); 400 b->delay0 = strtol(cb->f[1], nil, 10); 401 b->delayn = strtol(cb->f[2], nil, 10); 402 } else 403 error("unknown control request"); 404 poperror(); 405 qunlock(b); 406 free(cb); 407 return n; 408 case Qlog: 409 cb = parsecmd(a, n); 410 p = logctl(b, cb->nf, cb->f, logflags); 411 free(cb); 412 if(p != nil) 413 error(p); 414 return n; 415 } 416 } 417 418 static int 419 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) 420 { 421 Bridge *b = bridgetab + c->dev; 422 int type = TYPE(c->qid); 423 Dirtab *dt; 424 Qid qid; 425 426 if(s == DEVDOTDOT){ 427 switch(TYPE(c->qid)){ 428 case Qtopdir: 429 case Qbridgedir: 430 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev); 431 mkqid(&qid, Qtopdir, 0, QTDIR); 432 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 433 break; 434 case Qportdir: 435 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev); 436 mkqid(&qid, Qbridgedir, 0, QTDIR); 437 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 438 break; 439 default: 440 panic("bridgewalk %llux", c->qid.path); 441 } 442 return 1; 443 } 444 445 switch(type) { 446 default: 447 /* non-directory entries end up here */ 448 if(c->qid.type & QTDIR) 449 panic("bridgegen: unexpected directory"); 450 if(s != 0) 451 return -1; 452 dt = dirtab[TYPE(c->qid)]; 453 if(dt == nil) 454 panic("bridgegen: unknown type: %lud", TYPE(c->qid)); 455 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp); 456 return 1; 457 case Qtopdir: 458 if(s != 0) 459 return -1; 460 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev); 461 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR); 462 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 463 return 1; 464 case Qbridgedir: 465 if(s<nelem(bridgedirtab)) { 466 dt = bridgedirtab+s; 467 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp); 468 return 1; 469 } 470 s -= nelem(bridgedirtab); 471 if(s >= b->nport) 472 return -1; 473 mkqid(&qid, QID(s, Qportdir), 0, QTDIR); 474 snprint(up->genbuf, sizeof(up->genbuf), "%d", s); 475 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 476 return 1; 477 case Qportdir: 478 if(s>=nelem(portdirtab)) 479 return -1; 480 dt = portdirtab+s; 481 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE); 482 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp); 483 return 1; 484 } 485 } 486 487 // parse mac address; also in netif.c 488 static int 489 parseaddr(uchar *to, char *from, int alen) 490 { 491 char nip[4]; 492 char *p; 493 int i; 494 495 p = from; 496 for(i = 0; i < alen; i++){ 497 if(*p == 0) 498 return -1; 499 nip[0] = *p++; 500 if(*p == 0) 501 return -1; 502 nip[1] = *p++; 503 nip[2] = 0; 504 to[i] = strtoul(nip, 0, 16); 505 if(*p == ':') 506 p++; 507 } 508 return 0; 509 } 510 511 // assumes b is locked 512 static void 513 portbind(Bridge *b, int argc, char *argv[]) 514 { 515 Port *port; 516 Chan *ctl; 517 int type = 0, i, n; 518 ulong ownhash; 519 char *dev, *dev2 = nil, *p; 520 char buf[100], name[KNAMELEN], path[8*KNAMELEN]; 521 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]"; 522 523 memset(name, 0, KNAMELEN); 524 if(argc < 4) 525 error(usage); 526 if(strcmp(argv[0], "ether") == 0) { 527 if(argc != 4) 528 error(usage); 529 type = Tether; 530 strncpy(name, argv[1], KNAMELEN); 531 name[KNAMELEN-1] = 0; 532 // parseaddr(addr, argv[1], Eaddrlen); 533 } else if(strcmp(argv[0], "tunnel") == 0) { 534 if(argc != 5) 535 error(usage); 536 type = Ttun; 537 strncpy(name, argv[1], KNAMELEN); 538 name[KNAMELEN-1] = 0; 539 // parseip(addr, argv[1]); 540 dev2 = argv[4]; 541 } else 542 error(usage); 543 ownhash = atoi(argv[2]); 544 dev = argv[3]; 545 for(i=0; i<b->nport; i++) { 546 port = b->port[i]; 547 if(port != nil && port->type == type && 548 memcmp(port->name, name, KNAMELEN) == 0) 549 error("port in use"); 550 } 551 for(i=0; i<Maxport; i++) 552 if(b->port[i] == nil) 553 break; 554 if(i == Maxport) 555 error("no more ports"); 556 port = smalloc(sizeof(Port)); 557 port->ref = 1; 558 port->id = i; 559 port->ownhash = ownhash; 560 561 if(waserror()) { 562 portfree(port); 563 nexterror(); 564 } 565 port->type = type; 566 memmove(port->name, name, KNAMELEN); 567 switch(port->type) { 568 default: 569 panic("portbind: unknown port type: %d", type); 570 case Tether: 571 snprint(path, sizeof(path), "%s/clone", dev); 572 ctl = namec(path, Aopen, ORDWR, 0); 573 if(waserror()) { 574 cclose(ctl); 575 nexterror(); 576 } 577 // check addr? 578 579 // get directory name 580 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0); 581 buf[n] = 0; 582 for(p = buf; *p == ' '; p++) 583 ; 584 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0)); 585 586 // setup connection to be promiscuous 587 snprint(buf, sizeof(buf), "connect -1"); 588 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0); 589 snprint(buf, sizeof(buf), "promiscuous"); 590 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0); 591 snprint(buf, sizeof(buf), "bridge"); 592 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0); 593 594 // open data port 595 port->data[0] = namec(path, Aopen, ORDWR, 0); 596 // dup it 597 incref(port->data[0]); 598 port->data[1] = port->data[0]; 599 600 poperror(); 601 cclose(ctl); 602 603 break; 604 case Ttun: 605 port->data[0] = namec(dev, Aopen, OREAD, 0); 606 port->data[1] = namec(dev2, Aopen, OWRITE, 0); 607 break; 608 } 609 610 poperror(); 611 612 /* committed to binding port */ 613 b->port[port->id] = port; 614 port->bridge = b; 615 if(b->nport <= port->id) 616 b->nport = port->id+1; 617 618 // assumes kproc always succeeds 619 kproc("etherread", etherread, port); // poperror must be next 620 port->ref++; 621 } 622 623 // assumes b is locked 624 static void 625 portunbind(Bridge *b, int argc, char *argv[]) 626 { 627 int type = 0, i; 628 char name[KNAMELEN]; 629 ulong ownhash; 630 Port *port = nil; 631 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]"; 632 633 memset(name, 0, KNAMELEN); 634 if(argc < 2 || argc > 3) 635 error(usage); 636 if(strcmp(argv[0], "ether") == 0) { 637 type = Tether; 638 strncpy(name, argv[1], KNAMELEN); 639 name[KNAMELEN-1] = 0; 640 // parseaddr(addr, argv[1], Eaddrlen); 641 } else if(strcmp(argv[0], "tunnel") == 0) { 642 type = Ttun; 643 strncpy(name, argv[1], KNAMELEN); 644 name[KNAMELEN-1] = 0; 645 // parseip(addr, argv[1]); 646 } else 647 error(usage); 648 if(argc == 3) 649 ownhash = atoi(argv[2]); 650 else 651 ownhash = 0; 652 for(i=0; i<b->nport; i++) { 653 port = b->port[i]; 654 if(port != nil && port->type == type && 655 memcmp(port->name, name, KNAMELEN) == 0) 656 break; 657 } 658 if(i == b->nport) 659 error("port not found"); 660 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash) 661 error("bad owner hash"); 662 663 port->closed = 1; 664 b->port[i] = nil; // port is now unbound 665 cacheflushport(b, i); 666 667 // try and stop reader 668 if(port->readp) 669 postnote(port->readp, 1, "unbind", 0); 670 portfree(port); 671 } 672 673 // assumes b is locked 674 static Centry * 675 cachelookup(Bridge *b, uchar d[Eaddrlen]) 676 { 677 int i; 678 uint h; 679 Centry *p; 680 long sec; 681 682 // dont cache multicast or broadcast 683 if(d[0] & 1) 684 return 0; 685 686 h = 0; 687 for(i=0; i<Eaddrlen; i++) { 688 h *= 7; 689 h += d[i]; 690 } 691 h %= CacheHash; 692 p = b->cache + h; 693 sec = TK2SEC(m->ticks); 694 for(i=0; i<CacheLook; i++,p++) { 695 if(memcmp(d, p->d, Eaddrlen) == 0) { 696 p->dst++; 697 if(sec >= p->expire) { 698 log(b, Logcache, "expired cache entry: %E %d\n", 699 d, p->port); 700 return nil; 701 } 702 p->expire = sec + CacheTimeout; 703 return p; 704 } 705 } 706 log(b, Logcache, "cache miss: %E\n", d); 707 return nil; 708 } 709 710 // assumes b is locked 711 static void 712 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port) 713 { 714 int i; 715 uint h; 716 Centry *p, *pp; 717 long sec; 718 719 // dont cache multicast or broadcast 720 if(d[0] & 1) { 721 log(b, Logcache, "bad source address: %E\n", d); 722 return; 723 } 724 725 h = 0; 726 for(i=0; i<Eaddrlen; i++) { 727 h *= 7; 728 h += d[i]; 729 } 730 h %= CacheHash; 731 p = b->cache + h; 732 pp = p; 733 sec = p->expire; 734 735 // look for oldest entry 736 for(i=0; i<CacheLook; i++,p++) { 737 if(memcmp(p->d, d, Eaddrlen) == 0) { 738 p->expire = TK2SEC(m->ticks) + CacheTimeout; 739 if(p->port != port) { 740 log(b, Logcache, "NIC changed port %d->%d: %E\n", 741 p->port, port, d); 742 p->port = port; 743 } 744 p->src++; 745 return; 746 } 747 if(p->expire < sec) { 748 sec = p->expire; 749 pp = p; 750 } 751 } 752 if(pp->expire != 0) 753 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port); 754 pp->expire = TK2SEC(m->ticks) + CacheTimeout; 755 memmove(pp->d, d, Eaddrlen); 756 pp->port = port; 757 pp->src = 1; 758 pp->dst = 0; 759 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port); 760 } 761 762 // assumes b is locked 763 static void 764 cacheflushport(Bridge *b, int port) 765 { 766 Centry *ce; 767 int i; 768 769 ce = b->cache; 770 for(i=0; i<CacheSize; i++,ce++) { 771 if(ce->port != port) 772 continue; 773 memset(ce, 0, sizeof(Centry)); 774 } 775 } 776 777 static char * 778 cachedump(Bridge *b) 779 { 780 int i, n; 781 long sec, off; 782 char *buf, *p, *ep; 783 Centry *ce; 784 char c; 785 786 qlock(b); 787 if(waserror()) { 788 qunlock(b); 789 nexterror(); 790 } 791 sec = TK2SEC(m->ticks); 792 n = 0; 793 for(i=0; i<CacheSize; i++) 794 if(b->cache[i].expire != 0) 795 n++; 796 797 n *= 51; // change if print format is changed 798 n += 10; // some slop at the end 799 buf = malloc(n); 800 p = buf; 801 ep = buf + n; 802 ce = b->cache; 803 off = seconds() - sec; 804 for(i=0; i<CacheSize; i++,ce++) { 805 if(ce->expire == 0) 806 continue; 807 c = (sec < ce->expire)?'v':'e'; 808 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d, 809 ce->port, ce->src, ce->dst, ce->expire+off, c); 810 } 811 *p = 0; 812 poperror(); 813 qunlock(b); 814 815 return buf; 816 } 817 818 819 820 // assumes b is locked 821 static void 822 ethermultiwrite(Bridge *b, Block *bp, Port *port) 823 { 824 Port *oport; 825 Block *bp2; 826 Etherpkt *ep; 827 int i, mcast; 828 829 if(waserror()) { 830 if(bp) 831 freeb(bp); 832 nexterror(); 833 } 834 835 ep = (Etherpkt*)bp->rp; 836 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */ 837 838 oport = nil; 839 for(i=0; i<b->nport; i++) { 840 if(i == port->id || b->port[i] == nil) 841 continue; 842 /* 843 * we need to forward multicast packets for ipv6, 844 * so always do it. 845 */ 846 if(mcast) 847 b->port[i]->outmulti++; 848 else 849 b->port[i]->outunknown++; 850 851 // delay one so that the last write does not copy 852 if(oport != nil) { 853 b->copy++; 854 bp2 = copyblock(bp, blocklen(bp)); 855 if(!waserror()) { 856 etherwrite(oport, bp2); 857 poperror(); 858 } 859 } 860 oport = b->port[i]; 861 } 862 863 // last write free block 864 if(oport) { 865 bp2 = bp; bp = nil; USED(bp); 866 if(!waserror()) { 867 etherwrite(oport, bp2); 868 poperror(); 869 } 870 } else 871 freeb(bp); 872 873 poperror(); 874 } 875 876 static void 877 tcpmsshack(Etherpkt *epkt, int n) 878 { 879 int hl, optlen; 880 Iphdr *iphdr; 881 Tcphdr *tcphdr; 882 ulong mss, cksum; 883 uchar *optr; 884 885 /* ignore non-ipv4 packets */ 886 if(nhgets(epkt->type) != ETIP4) 887 return; 888 iphdr = (Iphdr*)(epkt->data); 889 n -= ETHERHDRSIZE; 890 if(n < IPHDR) 891 return; 892 893 /* ignore bad packets */ 894 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) { 895 hl = (iphdr->vihl&0xF)<<2; 896 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2)) 897 return; 898 } else 899 hl = IP_HLEN4<<2; 900 901 /* ignore non-tcp packets */ 902 if(iphdr->proto != IP_TCPPROTO) 903 return; 904 n -= hl; 905 if(n < sizeof(Tcphdr)) 906 return; 907 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl); 908 // MSS can only appear in SYN packet 909 if(!(tcphdr->flag[1] & SYN)) 910 return; 911 hl = (tcphdr->flag[0] & 0xf0)>>2; 912 if(n < hl) 913 return; 914 915 // check for MSS option 916 optr = (uchar*)tcphdr + sizeof(Tcphdr); 917 n = hl - sizeof(Tcphdr); 918 for(;;) { 919 if(n <= 0 || *optr == EOLOPT) 920 return; 921 if(*optr == NOOPOPT) { 922 n--; 923 optr++; 924 continue; 925 } 926 optlen = optr[1]; 927 if(optlen < 2 || optlen > n) 928 return; 929 if(*optr == MSSOPT && optlen == MSS_LENGTH) 930 break; 931 n -= optlen; 932 optr += optlen; 933 } 934 935 mss = nhgets(optr+2); 936 if(mss <= TcpMssMax) 937 return; 938 // fit checksum 939 cksum = nhgets(tcphdr->cksum); 940 if(optr-(uchar*)tcphdr & 1) { 941 print("tcpmsshack: odd alignment!\n"); 942 // odd alignments are a pain 943 cksum += nhgets(optr+1); 944 cksum -= (optr[1]<<8)|(TcpMssMax>>8); 945 cksum += (cksum>>16); 946 cksum &= 0xffff; 947 cksum += nhgets(optr+3); 948 cksum -= ((TcpMssMax&0xff)<<8)|optr[4]; 949 cksum += (cksum>>16); 950 } else { 951 cksum += mss; 952 cksum -= TcpMssMax; 953 cksum += (cksum>>16); 954 } 955 hnputs(tcphdr->cksum, cksum); 956 hnputs(optr+2, TcpMssMax); 957 } 958 959 /* 960 * process to read from the ethernet 961 */ 962 static void 963 etherread(void *a) 964 { 965 Port *port = a; 966 Bridge *b = port->bridge; 967 Block *bp, *bp2; 968 Etherpkt *ep; 969 Centry *ce; 970 long md; 971 972 qlock(b); 973 port->readp = up; /* hide identity under a rock for unbind */ 974 975 while(!port->closed){ 976 // release lock to read - error means it is time to quit 977 qunlock(b); 978 if(waserror()) { 979 print("etherread read error: %s\n", up->errstr); 980 qlock(b); 981 break; 982 } 983 if(0) 984 print("devbridge: etherread: reading\n"); 985 bp = devtab[port->data[0]->type]->bread(port->data[0], 986 ETHERMAXTU, 0); 987 if(0) 988 print("devbridge: etherread: blocklen = %d\n", 989 blocklen(bp)); 990 poperror(); 991 qlock(b); 992 if(bp == nil || port->closed) 993 break; 994 if(waserror()) { 995 // print("etherread bridge error\n"); 996 if(bp) 997 freeb(bp); 998 continue; 999 } 1000 if(blocklen(bp) < ETHERMINTU) 1001 error("short packet"); 1002 port->in++; 1003 1004 ep = (Etherpkt*)bp->rp; 1005 cacheupdate(b, ep->s, port->id); 1006 if(b->tcpmss) 1007 tcpmsshack(ep, BLEN(bp)); 1008 1009 /* 1010 * delay packets to simulate a slow link 1011 */ 1012 if(b->delay0 || b->delayn){ 1013 md = b->delay0 + b->delayn * BLEN(bp); 1014 if(md > 0) 1015 microdelay(md); 1016 } 1017 1018 if(ep->d[0] & 1) { 1019 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n", 1020 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]); 1021 port->inmulti++; 1022 bp2 = bp; bp = nil; 1023 ethermultiwrite(b, bp2, port); 1024 } else { 1025 ce = cachelookup(b, ep->d); 1026 if(ce == nil) { 1027 b->miss++; 1028 port->inunknown++; 1029 bp2 = bp; bp = nil; 1030 ethermultiwrite(b, bp2, port); 1031 }else if(ce->port != port->id){ 1032 b->hit++; 1033 bp2 = bp; bp = nil; 1034 etherwrite(b->port[ce->port], bp2); 1035 } 1036 } 1037 1038 poperror(); 1039 if(bp) 1040 freeb(bp); 1041 } 1042 // print("etherread: trying to exit\n"); 1043 port->readp = nil; 1044 portfree(port); 1045 qunlock(b); 1046 pexit("hangup", 1); 1047 } 1048 1049 static int 1050 fragment(Etherpkt *epkt, int n) 1051 { 1052 Iphdr *iphdr; 1053 1054 if(n <= TunnelMtu) 1055 return 0; 1056 1057 /* ignore non-ipv4 packets */ 1058 if(nhgets(epkt->type) != ETIP4) 1059 return 0; 1060 iphdr = (Iphdr*)(epkt->data); 1061 n -= ETHERHDRSIZE; 1062 /* 1063 * ignore: IP runt packets, bad packets (I don't handle IP 1064 * options for the moment), packets with don't-fragment set, 1065 * and short blocks. 1066 */ 1067 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) || 1068 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n) 1069 return 0; 1070 1071 return 1; 1072 } 1073 1074 1075 static void 1076 etherwrite(Port *port, Block *bp) 1077 { 1078 Iphdr *eh, *feh; 1079 Etherpkt *epkt; 1080 int n, lid, len, seglen, chunk, dlen, blklen, offset, mf; 1081 Block *xp, *nb; 1082 ushort fragoff, frag; 1083 1084 port->out++; 1085 epkt = (Etherpkt*)bp->rp; 1086 n = blocklen(bp); 1087 if(port->type != Ttun || !fragment(epkt, n)) { 1088 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0); 1089 return; 1090 } 1091 port->outfrag++; 1092 if(waserror()){ 1093 freeblist(bp); 1094 nexterror(); 1095 } 1096 1097 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7; 1098 eh = (Iphdr*)(epkt->data); 1099 len = nhgets(eh->length); 1100 frag = nhgets(eh->frag); 1101 mf = frag & IP_MF; 1102 frag <<= 3; 1103 dlen = len - IPHDR; 1104 xp = bp; 1105 lid = nhgets(eh->id); 1106 offset = ETHERHDRSIZE+IPHDR; 1107 while(xp != nil && offset && offset >= BLEN(xp)) { 1108 offset -= BLEN(xp); 1109 xp = xp->next; 1110 } 1111 xp->rp += offset; 1112 1113 if(0) 1114 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n", 1115 seglen, dlen, mf, frag); 1116 for(fragoff = 0; fragoff < dlen; fragoff += seglen) { 1117 nb = allocb(ETHERHDRSIZE+IPHDR+seglen); 1118 1119 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE); 1120 1121 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR); 1122 nb->wp += ETHERHDRSIZE+IPHDR; 1123 1124 if((fragoff + seglen) >= dlen) { 1125 seglen = dlen - fragoff; 1126 hnputs(feh->frag, (frag+fragoff)>>3 | mf); 1127 } 1128 else 1129 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF); 1130 1131 hnputs(feh->length, seglen + IPHDR); 1132 hnputs(feh->id, lid); 1133 1134 /* Copy up the data area */ 1135 chunk = seglen; 1136 while(chunk) { 1137 blklen = chunk; 1138 if(BLEN(xp) < chunk) 1139 blklen = BLEN(xp); 1140 memmove(nb->wp, xp->rp, blklen); 1141 nb->wp += blklen; 1142 xp->rp += blklen; 1143 chunk -= blklen; 1144 if(xp->rp == xp->wp) 1145 xp = xp->next; 1146 } 1147 1148 feh->cksum[0] = 0; 1149 feh->cksum[1] = 0; 1150 hnputs(feh->cksum, ipcsum(&feh->vihl)); 1151 1152 /* don't generate small packets */ 1153 if(BLEN(nb) < ETHERMINTU) 1154 nb->wp = nb->rp + ETHERMINTU; 1155 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0); 1156 } 1157 poperror(); 1158 freeblist(bp); 1159 } 1160 1161 // hold b lock 1162 static void 1163 portfree(Port *port) 1164 { 1165 port->ref--; 1166 if(port->ref < 0) 1167 panic("portfree: bad ref"); 1168 if(port->ref > 0) 1169 return; 1170 1171 if(port->data[0]) 1172 cclose(port->data[0]); 1173 if(port->data[1]) 1174 cclose(port->data[1]); 1175 memset(port, 0, sizeof(Port)); 1176 free(port); 1177 } 1178 1179 Dev bridgedevtab = { 1180 'B', 1181 "bridge", 1182 1183 devreset, 1184 bridgeinit, 1185 devshutdown, 1186 bridgeattach, 1187 bridgewalk, 1188 bridgestat, 1189 bridgeopen, 1190 devcreate, 1191 bridgeclose, 1192 bridgeread, 1193 devbread, 1194 bridgewrite, 1195 devbwrite, 1196 devremove, 1197 devwstat, 1198 }; 1199