1 /* 2 * IPv4 Ethernet bridge 3 */ 4 #include "u.h" 5 #include "../port/lib.h" 6 #include "mem.h" 7 #include "dat.h" 8 #include "fns.h" 9 #include "../ip/ip.h" 10 #include "../port/netif.h" 11 #include "../port/error.h" 12 13 typedef struct Bridge Bridge; 14 typedef struct Port Port; 15 typedef struct Centry Centry; 16 typedef struct Iphdr Iphdr; 17 typedef struct Tcphdr Tcphdr; 18 19 enum 20 { 21 Qtopdir= 1, /* top level directory */ 22 23 Qbridgedir, /* bridge* directory */ 24 Qbctl, 25 Qstats, 26 Qcache, 27 Qlog, 28 29 Qportdir, /* directory for a protocol */ 30 Qpctl, 31 Qlocal, 32 Qstatus, 33 34 MaxQ, 35 36 Maxbridge= 4, 37 Maxport= 128, // power of 2 38 CacheHash= 257, // prime 39 CacheLook= 5, // how many cache entries to examine 40 CacheSize= (CacheHash+CacheLook-1), 41 CacheTimeout= 5*60, // timeout for cache entry in seconds 42 43 TcpMssMax = 1300, // max desirable Tcp MSS value 44 TunnelMtu = 1400, 45 }; 46 47 static Dirtab bridgedirtab[]={ 48 "ctl", {Qbctl}, 0, 0666, 49 "stats", {Qstats}, 0, 0444, 50 "cache", {Qcache}, 0, 0444, 51 "log", {Qlog}, 0, 0666, 52 }; 53 54 static Dirtab portdirtab[]={ 55 "ctl", {Qpctl}, 0, 0666, 56 "local", {Qlocal}, 0, 0444, 57 "status", {Qstatus}, 0, 0444, 58 }; 59 60 enum { 61 Logcache= (1<<0), 62 Logmcast= (1<<1), 63 }; 64 65 // types of interfaces 66 enum 67 { 68 Tether, 69 Ttun, 70 }; 71 72 static Logflag logflags[] = 73 { 74 { "cache", Logcache, }, 75 { "multicast", Logmcast, }, 76 { nil, 0, }, 77 }; 78 79 static Dirtab *dirtab[MaxQ]; 80 81 #define TYPE(x) (((ulong)(x).path) & 0xff) 82 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1)) 83 #define QID(x, y) (((x)<<8) | (y)) 84 85 struct Centry 86 { 87 uchar d[Eaddrlen]; 88 int port; 89 long expire; // entry expires this many seconds after bootime 90 long src; 91 long dst; 92 }; 93 94 struct Bridge 95 { 96 QLock; 97 int nport; 98 Port *port[Maxport]; 99 Centry cache[CacheSize]; 100 ulong hit; 101 ulong miss; 102 ulong copy; 103 long delay0; // constant microsecond delay per packet 104 long delayn; // microsecond delay per byte 105 int tcpmss; // modify tcpmss value 106 107 Log; 108 }; 109 110 struct Port 111 { 112 int id; 113 Bridge *bridge; 114 int ref; 115 int closed; 116 117 Chan *data[2]; // channel to data 118 119 Proc *readp; // read proc 120 121 // the following uniquely identifies the port 122 int type; 123 char name[KNAMELEN]; 124 125 // owner hash - avoids bind/unbind races 126 ulong ownhash; 127 128 // various stats 129 int in; // number of packets read 130 int inmulti; // multicast or broadcast 131 int inunknown; // unknown address 132 int out; // number of packets read 133 int outmulti; // multicast or broadcast 134 int outunknown; // unknown address 135 int outfrag; // fragmented the packet 136 int nentry; // number of cache entries for this port 137 }; 138 139 enum { 140 IP_TCPPROTO = 6, 141 EOLOPT = 0, 142 NOOPOPT = 1, 143 MSSOPT = 2, 144 MSS_LENGTH = 4, /* Mean segment size */ 145 SYN = 0x02, /* Pkt. is synchronise */ 146 IPHDR = 20, /* sizeof(Iphdr) */ 147 }; 148 149 struct Iphdr 150 { 151 uchar vihl; /* Version and header length */ 152 uchar tos; /* Type of service */ 153 uchar length[2]; /* packet length */ 154 uchar id[2]; /* ip->identification */ 155 uchar frag[2]; /* Fragment information */ 156 uchar ttl; /* Time to live */ 157 uchar proto; /* Protocol */ 158 uchar cksum[2]; /* Header checksum */ 159 uchar src[4]; /* IP source */ 160 uchar dst[4]; /* IP destination */ 161 }; 162 163 struct Tcphdr 164 { 165 uchar sport[2]; 166 uchar dport[2]; 167 uchar seq[4]; 168 uchar ack[4]; 169 uchar flag[2]; 170 uchar win[2]; 171 uchar cksum[2]; 172 uchar urg[2]; 173 }; 174 175 static Bridge bridgetab[Maxbridge]; 176 177 static int m2p[] = { 178 [OREAD] 4, 179 [OWRITE] 2, 180 [ORDWR] 6 181 }; 182 183 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp); 184 static void portbind(Bridge *b, int argc, char *argv[]); 185 static void portunbind(Bridge *b, int argc, char *argv[]); 186 static void etherread(void *a); 187 static char *cachedump(Bridge *b); 188 static void portfree(Port *port); 189 static void cacheflushport(Bridge *b, int port); 190 static void etherwrite(Port *port, Block *bp); 191 192 static void 193 bridgeinit(void) 194 { 195 int i; 196 Dirtab *dt; 197 198 // setup dirtab with non directory entries 199 for(i=0; i<nelem(bridgedirtab); i++) { 200 dt = bridgedirtab + i; 201 dirtab[TYPE(dt->qid)] = dt; 202 } 203 for(i=0; i<nelem(portdirtab); i++) { 204 dt = portdirtab + i; 205 dirtab[TYPE(dt->qid)] = dt; 206 } 207 } 208 209 static Chan* 210 bridgeattach(char* spec) 211 { 212 Chan *c; 213 int dev; 214 215 dev = atoi(spec); 216 if(dev<0 || dev >= Maxbridge) 217 error("bad specification"); 218 219 c = devattach('B', spec); 220 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR); 221 c->dev = dev; 222 return c; 223 } 224 225 static Walkqid* 226 bridgewalk(Chan *c, Chan *nc, char **name, int nname) 227 { 228 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen); 229 } 230 231 static int 232 bridgestat(Chan* c, uchar* db, int n) 233 { 234 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen); 235 } 236 237 static Chan* 238 bridgeopen(Chan* c, int omode) 239 { 240 int perm; 241 Bridge *b; 242 243 omode &= 3; 244 perm = m2p[omode]; 245 USED(perm); 246 247 b = bridgetab + c->dev; 248 USED(b); 249 250 switch(TYPE(c->qid)) { 251 default: 252 break; 253 case Qlog: 254 logopen(b); 255 break; 256 case Qcache: 257 c->aux = cachedump(b); 258 break; 259 } 260 c->mode = openmode(omode); 261 c->flag |= COPEN; 262 c->offset = 0; 263 return c; 264 } 265 266 static void 267 bridgeclose(Chan* c) 268 { 269 Bridge *b = bridgetab + c->dev; 270 271 switch(TYPE(c->qid)) { 272 case Qcache: 273 if(c->flag & COPEN) 274 free(c->aux); 275 break; 276 case Qlog: 277 if(c->flag & COPEN) 278 logclose(b); 279 break; 280 } 281 } 282 283 static long 284 bridgeread(Chan *c, void *a, long n, vlong off) 285 { 286 char buf[256]; 287 Bridge *b = bridgetab + c->dev; 288 Port *port; 289 int i, ingood, outgood; 290 291 USED(off); 292 switch(TYPE(c->qid)) { 293 default: 294 error(Eperm); 295 case Qtopdir: 296 case Qbridgedir: 297 case Qportdir: 298 return devdirread(c, a, n, 0, 0, bridgegen); 299 case Qlog: 300 return logread(b, a, off, n); 301 case Qstatus: 302 qlock(b); 303 port = b->port[PORT(c->qid)]; 304 if(port == 0) 305 strcpy(buf, "unbound\n"); 306 else { 307 i = 0; 308 switch(port->type) { 309 default: 310 panic("bridgeread: unknown port type: %d", 311 port->type); 312 case Tether: 313 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name); 314 break; 315 case Ttun: 316 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name); 317 break; 318 } 319 ingood = port->in - port->inmulti - port->inunknown; 320 outgood = port->out - port->outmulti - port->outunknown; 321 i += snprint(buf+i, sizeof(buf)-i, 322 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n", 323 port->in, ingood, port->inmulti, port->inunknown, 324 port->out, outgood, port->outmulti, 325 port->outunknown, port->outfrag); 326 USED(i); 327 } 328 n = readstr(off, a, n, buf); 329 qunlock(b); 330 return n; 331 case Qbctl: 332 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n", 333 b->tcpmss ? "set" : "clear", b->delay0, b->delayn); 334 n = readstr(off, a, n, buf); 335 return n; 336 case Qcache: 337 n = readstr(off, a, n, c->aux); 338 return n; 339 case Qstats: 340 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n", 341 b->hit, b->miss, b->copy); 342 n = readstr(off, a, n, buf); 343 return n; 344 } 345 } 346 347 static void 348 bridgeoption(Bridge *b, char *option, int value) 349 { 350 if(strcmp(option, "tcpmss") == 0) 351 b->tcpmss = value; 352 else 353 error("unknown bridge option"); 354 } 355 356 357 static long 358 bridgewrite(Chan *c, void *a, long n, vlong off) 359 { 360 Bridge *b = bridgetab + c->dev; 361 Cmdbuf *cb; 362 char *arg0, *p; 363 364 USED(off); 365 switch(TYPE(c->qid)) { 366 default: 367 error(Eperm); 368 case Qbctl: 369 cb = parsecmd(a, n); 370 qlock(b); 371 if(waserror()) { 372 qunlock(b); 373 free(cb); 374 nexterror(); 375 } 376 if(cb->nf == 0) 377 error("short write"); 378 arg0 = cb->f[0]; 379 if(strcmp(arg0, "bind") == 0) { 380 portbind(b, cb->nf-1, cb->f+1); 381 } else if(strcmp(arg0, "unbind") == 0) { 382 portunbind(b, cb->nf-1, cb->f+1); 383 } else if(strcmp(arg0, "cacheflush") == 0) { 384 log(b, Logcache, "cache flush\n"); 385 memset(b->cache, 0, CacheSize*sizeof(Centry)); 386 } else if(strcmp(arg0, "set") == 0) { 387 if(cb->nf != 2) 388 error("usage: set option"); 389 bridgeoption(b, cb->f[1], 1); 390 } else if(strcmp(arg0, "clear") == 0) { 391 if(cb->nf != 2) 392 error("usage: clear option"); 393 bridgeoption(b, cb->f[1], 0); 394 } else if(strcmp(arg0, "delay") == 0) { 395 if(cb->nf != 3) 396 error("usage: delay delay0 delayn"); 397 b->delay0 = strtol(cb->f[1], nil, 10); 398 b->delayn = strtol(cb->f[2], nil, 10); 399 } else 400 error("unknown control request"); 401 poperror(); 402 qunlock(b); 403 free(cb); 404 return n; 405 case Qlog: 406 cb = parsecmd(a, n); 407 p = logctl(b, cb->nf, cb->f, logflags); 408 free(cb); 409 if(p != nil) 410 error(p); 411 return n; 412 } 413 } 414 415 static int 416 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) 417 { 418 Bridge *b = bridgetab + c->dev; 419 int type = TYPE(c->qid); 420 Dirtab *dt; 421 Qid qid; 422 423 if(s == DEVDOTDOT){ 424 switch(TYPE(c->qid)){ 425 case Qtopdir: 426 case Qbridgedir: 427 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev); 428 mkqid(&qid, Qtopdir, 0, QTDIR); 429 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 430 break; 431 case Qportdir: 432 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev); 433 mkqid(&qid, Qbridgedir, 0, QTDIR); 434 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 435 break; 436 default: 437 panic("bridgewalk %llux", c->qid.path); 438 } 439 return 1; 440 } 441 442 switch(type) { 443 default: 444 /* non-directory entries end up here */ 445 if(c->qid.type & QTDIR) 446 panic("bridgegen: unexpected directory"); 447 if(s != 0) 448 return -1; 449 dt = dirtab[TYPE(c->qid)]; 450 if(dt == nil) 451 panic("bridgegen: unknown type: %lud", TYPE(c->qid)); 452 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp); 453 return 1; 454 case Qtopdir: 455 if(s != 0) 456 return -1; 457 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev); 458 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR); 459 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 460 return 1; 461 case Qbridgedir: 462 if(s<nelem(bridgedirtab)) { 463 dt = bridgedirtab+s; 464 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp); 465 return 1; 466 } 467 s -= nelem(bridgedirtab); 468 if(s >= b->nport) 469 return -1; 470 mkqid(&qid, QID(s, Qportdir), 0, QTDIR); 471 snprint(up->genbuf, sizeof(up->genbuf), "%d", s); 472 devdir(c, qid, up->genbuf, 0, eve, 0555, dp); 473 return 1; 474 case Qportdir: 475 if(s>=nelem(portdirtab)) 476 return -1; 477 dt = portdirtab+s; 478 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE); 479 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp); 480 return 1; 481 } 482 } 483 484 // parse mac address; also in netif.c 485 static int 486 parseaddr(uchar *to, char *from, int alen) 487 { 488 char nip[4]; 489 char *p; 490 int i; 491 492 p = from; 493 for(i = 0; i < alen; i++){ 494 if(*p == 0) 495 return -1; 496 nip[0] = *p++; 497 if(*p == 0) 498 return -1; 499 nip[1] = *p++; 500 nip[2] = 0; 501 to[i] = strtoul(nip, 0, 16); 502 if(*p == ':') 503 p++; 504 } 505 return 0; 506 } 507 508 // assumes b is locked 509 static void 510 portbind(Bridge *b, int argc, char *argv[]) 511 { 512 Port *port; 513 Chan *ctl; 514 int type = 0, i, n; 515 ulong ownhash; 516 char *dev, *dev2 = nil, *p; 517 char buf[100], name[KNAMELEN], path[8*KNAMELEN]; 518 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]"; 519 520 memset(name, 0, KNAMELEN); 521 if(argc < 4) 522 error(usage); 523 if(strcmp(argv[0], "ether") == 0) { 524 if(argc != 4) 525 error(usage); 526 type = Tether; 527 strncpy(name, argv[1], KNAMELEN); 528 name[KNAMELEN-1] = 0; 529 // parseaddr(addr, argv[1], Eaddrlen); 530 } else if(strcmp(argv[0], "tunnel") == 0) { 531 if(argc != 5) 532 error(usage); 533 type = Ttun; 534 strncpy(name, argv[1], KNAMELEN); 535 name[KNAMELEN-1] = 0; 536 // parseip(addr, argv[1]); 537 dev2 = argv[4]; 538 } else 539 error(usage); 540 ownhash = atoi(argv[2]); 541 dev = argv[3]; 542 for(i=0; i<b->nport; i++) { 543 port = b->port[i]; 544 if(port != nil && port->type == type && 545 memcmp(port->name, name, KNAMELEN) == 0) 546 error("port in use"); 547 } 548 for(i=0; i<Maxport; i++) 549 if(b->port[i] == nil) 550 break; 551 if(i == Maxport) 552 error("no more ports"); 553 port = smalloc(sizeof(Port)); 554 port->ref = 1; 555 port->id = i; 556 port->ownhash = ownhash; 557 558 if(waserror()) { 559 portfree(port); 560 nexterror(); 561 } 562 port->type = type; 563 memmove(port->name, name, KNAMELEN); 564 switch(port->type) { 565 default: 566 panic("portbind: unknown port type: %d", type); 567 case Tether: 568 snprint(path, sizeof(path), "%s/clone", dev); 569 ctl = namec(path, Aopen, ORDWR, 0); 570 if(waserror()) { 571 cclose(ctl); 572 nexterror(); 573 } 574 // check addr? 575 576 // get directory name 577 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0); 578 buf[n] = 0; 579 for(p = buf; *p == ' '; p++) 580 ; 581 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0)); 582 583 // setup connection to be promiscuous 584 snprint(buf, sizeof(buf), "connect -1"); 585 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0); 586 snprint(buf, sizeof(buf), "promiscuous"); 587 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0); 588 snprint(buf, sizeof(buf), "bridge"); 589 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0); 590 591 // open data port 592 port->data[0] = namec(path, Aopen, ORDWR, 0); 593 // dup it 594 incref(port->data[0]); 595 port->data[1] = port->data[0]; 596 597 poperror(); 598 cclose(ctl); 599 600 break; 601 case Ttun: 602 port->data[0] = namec(dev, Aopen, OREAD, 0); 603 port->data[1] = namec(dev2, Aopen, OWRITE, 0); 604 break; 605 } 606 607 poperror(); 608 609 /* committed to binding port */ 610 b->port[port->id] = port; 611 port->bridge = b; 612 if(b->nport <= port->id) 613 b->nport = port->id+1; 614 615 // assumes kproc always succeeds 616 kproc("etherread", etherread, port); // poperror must be next 617 port->ref++; 618 } 619 620 // assumes b is locked 621 static void 622 portunbind(Bridge *b, int argc, char *argv[]) 623 { 624 int type = 0, i; 625 char name[KNAMELEN]; 626 ulong ownhash; 627 Port *port = nil; 628 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]"; 629 630 memset(name, 0, KNAMELEN); 631 if(argc < 2 || argc > 3) 632 error(usage); 633 if(strcmp(argv[0], "ether") == 0) { 634 type = Tether; 635 strncpy(name, argv[1], KNAMELEN); 636 name[KNAMELEN-1] = 0; 637 // parseaddr(addr, argv[1], Eaddrlen); 638 } else if(strcmp(argv[0], "tunnel") == 0) { 639 type = Ttun; 640 strncpy(name, argv[1], KNAMELEN); 641 name[KNAMELEN-1] = 0; 642 // parseip(addr, argv[1]); 643 } else 644 error(usage); 645 if(argc == 3) 646 ownhash = atoi(argv[2]); 647 else 648 ownhash = 0; 649 for(i=0; i<b->nport; i++) { 650 port = b->port[i]; 651 if(port != nil && port->type == type && 652 memcmp(port->name, name, KNAMELEN) == 0) 653 break; 654 } 655 if(i == b->nport) 656 error("port not found"); 657 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash) 658 error("bad owner hash"); 659 660 port->closed = 1; 661 b->port[i] = nil; // port is now unbound 662 cacheflushport(b, i); 663 664 // try and stop reader 665 if(port->readp) 666 postnote(port->readp, 1, "unbind", 0); 667 portfree(port); 668 } 669 670 // assumes b is locked 671 static Centry * 672 cachelookup(Bridge *b, uchar d[Eaddrlen]) 673 { 674 int i; 675 uint h; 676 Centry *p; 677 long sec; 678 679 // dont cache multicast or broadcast 680 if(d[0] & 1) 681 return 0; 682 683 h = 0; 684 for(i=0; i<Eaddrlen; i++) { 685 h *= 7; 686 h += d[i]; 687 } 688 h %= CacheHash; 689 p = b->cache + h; 690 sec = TK2SEC(m->ticks); 691 for(i=0; i<CacheLook; i++,p++) { 692 if(memcmp(d, p->d, Eaddrlen) == 0) { 693 p->dst++; 694 if(sec >= p->expire) { 695 log(b, Logcache, "expired cache entry: %E %d\n", 696 d, p->port); 697 return nil; 698 } 699 p->expire = sec + CacheTimeout; 700 return p; 701 } 702 } 703 log(b, Logcache, "cache miss: %E\n", d); 704 return nil; 705 } 706 707 // assumes b is locked 708 static void 709 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port) 710 { 711 int i; 712 uint h; 713 Centry *p, *pp; 714 long sec; 715 716 // dont cache multicast or broadcast 717 if(d[0] & 1) { 718 log(b, Logcache, "bad source address: %E\n", d); 719 return; 720 } 721 722 h = 0; 723 for(i=0; i<Eaddrlen; i++) { 724 h *= 7; 725 h += d[i]; 726 } 727 h %= CacheHash; 728 p = b->cache + h; 729 pp = p; 730 sec = p->expire; 731 732 // look for oldest entry 733 for(i=0; i<CacheLook; i++,p++) { 734 if(memcmp(p->d, d, Eaddrlen) == 0) { 735 p->expire = TK2SEC(m->ticks) + CacheTimeout; 736 if(p->port != port) { 737 log(b, Logcache, "NIC changed port %d->%d: %E\n", 738 p->port, port, d); 739 p->port = port; 740 } 741 p->src++; 742 return; 743 } 744 if(p->expire < sec) { 745 sec = p->expire; 746 pp = p; 747 } 748 } 749 if(pp->expire != 0) 750 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port); 751 pp->expire = TK2SEC(m->ticks) + CacheTimeout; 752 memmove(pp->d, d, Eaddrlen); 753 pp->port = port; 754 pp->src = 1; 755 pp->dst = 0; 756 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port); 757 } 758 759 // assumes b is locked 760 static void 761 cacheflushport(Bridge *b, int port) 762 { 763 Centry *ce; 764 int i; 765 766 ce = b->cache; 767 for(i=0; i<CacheSize; i++,ce++) { 768 if(ce->port != port) 769 continue; 770 memset(ce, 0, sizeof(Centry)); 771 } 772 } 773 774 static char * 775 cachedump(Bridge *b) 776 { 777 int i, n; 778 long sec, off; 779 char *buf, *p, *ep; 780 Centry *ce; 781 char c; 782 783 qlock(b); 784 if(waserror()) { 785 qunlock(b); 786 nexterror(); 787 } 788 sec = TK2SEC(m->ticks); 789 n = 0; 790 for(i=0; i<CacheSize; i++) 791 if(b->cache[i].expire != 0) 792 n++; 793 794 n *= 51; // change if print format is changed 795 n += 10; // some slop at the end 796 buf = malloc(n); 797 if(buf == nil) 798 error(Enomem); 799 p = buf; 800 ep = buf + n; 801 ce = b->cache; 802 off = seconds() - sec; 803 for(i=0; i<CacheSize; i++,ce++) { 804 if(ce->expire == 0) 805 continue; 806 c = (sec < ce->expire)?'v':'e'; 807 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d, 808 ce->port, ce->src, ce->dst, ce->expire+off, c); 809 } 810 *p = 0; 811 poperror(); 812 qunlock(b); 813 814 return buf; 815 } 816 817 818 819 // assumes b is locked 820 static void 821 ethermultiwrite(Bridge *b, Block *bp, Port *port) 822 { 823 Port *oport; 824 Block *bp2; 825 Etherpkt *ep; 826 int i, mcast; 827 828 if(waserror()) { 829 if(bp) 830 freeb(bp); 831 nexterror(); 832 } 833 834 ep = (Etherpkt*)bp->rp; 835 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */ 836 837 oport = nil; 838 for(i=0; i<b->nport; i++) { 839 if(i == port->id || b->port[i] == nil) 840 continue; 841 /* 842 * we need to forward multicast packets for ipv6, 843 * so always do it. 844 */ 845 if(mcast) 846 b->port[i]->outmulti++; 847 else 848 b->port[i]->outunknown++; 849 850 // delay one so that the last write does not copy 851 if(oport != nil) { 852 b->copy++; 853 bp2 = copyblock(bp, blocklen(bp)); 854 if(!waserror()) { 855 etherwrite(oport, bp2); 856 poperror(); 857 } 858 } 859 oport = b->port[i]; 860 } 861 862 // last write free block 863 if(oport) { 864 bp2 = bp; bp = nil; USED(bp); 865 if(!waserror()) { 866 etherwrite(oport, bp2); 867 poperror(); 868 } 869 } else 870 freeb(bp); 871 872 poperror(); 873 } 874 875 static void 876 tcpmsshack(Etherpkt *epkt, int n) 877 { 878 int hl, optlen; 879 Iphdr *iphdr; 880 Tcphdr *tcphdr; 881 ulong mss, cksum; 882 uchar *optr; 883 884 /* ignore non-ipv4 packets */ 885 if(nhgets(epkt->type) != ETIP4) 886 return; 887 iphdr = (Iphdr*)(epkt->data); 888 n -= ETHERHDRSIZE; 889 if(n < IPHDR) 890 return; 891 892 /* ignore bad packets */ 893 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) { 894 hl = (iphdr->vihl&0xF)<<2; 895 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2)) 896 return; 897 } else 898 hl = IP_HLEN4<<2; 899 900 /* ignore non-tcp packets */ 901 if(iphdr->proto != IP_TCPPROTO) 902 return; 903 n -= hl; 904 if(n < sizeof(Tcphdr)) 905 return; 906 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl); 907 // MSS can only appear in SYN packet 908 if(!(tcphdr->flag[1] & SYN)) 909 return; 910 hl = (tcphdr->flag[0] & 0xf0)>>2; 911 if(n < hl) 912 return; 913 914 // check for MSS option 915 optr = (uchar*)tcphdr + sizeof(Tcphdr); 916 n = hl - sizeof(Tcphdr); 917 for(;;) { 918 if(n <= 0 || *optr == EOLOPT) 919 return; 920 if(*optr == NOOPOPT) { 921 n--; 922 optr++; 923 continue; 924 } 925 optlen = optr[1]; 926 if(optlen < 2 || optlen > n) 927 return; 928 if(*optr == MSSOPT && optlen == MSS_LENGTH) 929 break; 930 n -= optlen; 931 optr += optlen; 932 } 933 934 mss = nhgets(optr+2); 935 if(mss <= TcpMssMax) 936 return; 937 // fit checksum 938 cksum = nhgets(tcphdr->cksum); 939 if(optr-(uchar*)tcphdr & 1) { 940 print("tcpmsshack: odd alignment!\n"); 941 // odd alignments are a pain 942 cksum += nhgets(optr+1); 943 cksum -= (optr[1]<<8)|(TcpMssMax>>8); 944 cksum += (cksum>>16); 945 cksum &= 0xffff; 946 cksum += nhgets(optr+3); 947 cksum -= ((TcpMssMax&0xff)<<8)|optr[4]; 948 cksum += (cksum>>16); 949 } else { 950 cksum += mss; 951 cksum -= TcpMssMax; 952 cksum += (cksum>>16); 953 } 954 hnputs(tcphdr->cksum, cksum); 955 hnputs(optr+2, TcpMssMax); 956 } 957 958 /* 959 * process to read from the ethernet 960 */ 961 static void 962 etherread(void *a) 963 { 964 Port *port = a; 965 Bridge *b = port->bridge; 966 Block *bp, *bp2; 967 Etherpkt *ep; 968 Centry *ce; 969 long md; 970 971 qlock(b); 972 port->readp = up; /* hide identity under a rock for unbind */ 973 974 while(!port->closed){ 975 // release lock to read - error means it is time to quit 976 qunlock(b); 977 if(waserror()) { 978 print("etherread read error: %s\n", up->errstr); 979 qlock(b); 980 break; 981 } 982 if(0) 983 print("devbridge: etherread: reading\n"); 984 bp = devtab[port->data[0]->type]->bread(port->data[0], 985 ETHERMAXTU, 0); 986 if(0) 987 print("devbridge: etherread: blocklen = %d\n", 988 blocklen(bp)); 989 poperror(); 990 qlock(b); 991 if(bp == nil || port->closed) 992 break; 993 if(waserror()) { 994 // print("etherread bridge error\n"); 995 if(bp) 996 freeb(bp); 997 continue; 998 } 999 if(blocklen(bp) < ETHERMINTU) 1000 error("short packet"); 1001 port->in++; 1002 1003 ep = (Etherpkt*)bp->rp; 1004 cacheupdate(b, ep->s, port->id); 1005 if(b->tcpmss) 1006 tcpmsshack(ep, BLEN(bp)); 1007 1008 /* 1009 * delay packets to simulate a slow link 1010 */ 1011 if(b->delay0 || b->delayn){ 1012 md = b->delay0 + b->delayn * BLEN(bp); 1013 if(md > 0) 1014 microdelay(md); 1015 } 1016 1017 if(ep->d[0] & 1) { 1018 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n", 1019 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]); 1020 port->inmulti++; 1021 bp2 = bp; bp = nil; 1022 ethermultiwrite(b, bp2, port); 1023 } else { 1024 ce = cachelookup(b, ep->d); 1025 if(ce == nil) { 1026 b->miss++; 1027 port->inunknown++; 1028 bp2 = bp; bp = nil; 1029 ethermultiwrite(b, bp2, port); 1030 }else if(ce->port != port->id){ 1031 b->hit++; 1032 bp2 = bp; bp = nil; 1033 etherwrite(b->port[ce->port], bp2); 1034 } 1035 } 1036 1037 poperror(); 1038 if(bp) 1039 freeb(bp); 1040 } 1041 // print("etherread: trying to exit\n"); 1042 port->readp = nil; 1043 portfree(port); 1044 qunlock(b); 1045 pexit("hangup", 1); 1046 } 1047 1048 static int 1049 fragment(Etherpkt *epkt, int n) 1050 { 1051 Iphdr *iphdr; 1052 1053 if(n <= TunnelMtu) 1054 return 0; 1055 1056 /* ignore non-ipv4 packets */ 1057 if(nhgets(epkt->type) != ETIP4) 1058 return 0; 1059 iphdr = (Iphdr*)(epkt->data); 1060 n -= ETHERHDRSIZE; 1061 /* 1062 * ignore: IP runt packets, bad packets (I don't handle IP 1063 * options for the moment), packets with don't-fragment set, 1064 * and short blocks. 1065 */ 1066 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) || 1067 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n) 1068 return 0; 1069 1070 return 1; 1071 } 1072 1073 1074 static void 1075 etherwrite(Port *port, Block *bp) 1076 { 1077 Iphdr *eh, *feh; 1078 Etherpkt *epkt; 1079 int n, lid, len, seglen, chunk, dlen, blklen, offset, mf; 1080 Block *xp, *nb; 1081 ushort fragoff, frag; 1082 1083 port->out++; 1084 epkt = (Etherpkt*)bp->rp; 1085 n = blocklen(bp); 1086 if(port->type != Ttun || !fragment(epkt, n)) { 1087 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0); 1088 return; 1089 } 1090 port->outfrag++; 1091 if(waserror()){ 1092 freeblist(bp); 1093 nexterror(); 1094 } 1095 1096 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7; 1097 eh = (Iphdr*)(epkt->data); 1098 len = nhgets(eh->length); 1099 frag = nhgets(eh->frag); 1100 mf = frag & IP_MF; 1101 frag <<= 3; 1102 dlen = len - IPHDR; 1103 xp = bp; 1104 lid = nhgets(eh->id); 1105 offset = ETHERHDRSIZE+IPHDR; 1106 while(xp != nil && offset && offset >= BLEN(xp)) { 1107 offset -= BLEN(xp); 1108 xp = xp->next; 1109 } 1110 xp->rp += offset; 1111 1112 if(0) 1113 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n", 1114 seglen, dlen, mf, frag); 1115 for(fragoff = 0; fragoff < dlen; fragoff += seglen) { 1116 nb = allocb(ETHERHDRSIZE+IPHDR+seglen); 1117 1118 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE); 1119 1120 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR); 1121 nb->wp += ETHERHDRSIZE+IPHDR; 1122 1123 if((fragoff + seglen) >= dlen) { 1124 seglen = dlen - fragoff; 1125 hnputs(feh->frag, (frag+fragoff)>>3 | mf); 1126 } 1127 else 1128 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF); 1129 1130 hnputs(feh->length, seglen + IPHDR); 1131 hnputs(feh->id, lid); 1132 1133 /* Copy up the data area */ 1134 chunk = seglen; 1135 while(chunk) { 1136 blklen = chunk; 1137 if(BLEN(xp) < chunk) 1138 blklen = BLEN(xp); 1139 memmove(nb->wp, xp->rp, blklen); 1140 nb->wp += blklen; 1141 xp->rp += blklen; 1142 chunk -= blklen; 1143 if(xp->rp == xp->wp) 1144 xp = xp->next; 1145 } 1146 1147 feh->cksum[0] = 0; 1148 feh->cksum[1] = 0; 1149 hnputs(feh->cksum, ipcsum(&feh->vihl)); 1150 1151 /* don't generate small packets */ 1152 if(BLEN(nb) < ETHERMINTU) 1153 nb->wp = nb->rp + ETHERMINTU; 1154 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0); 1155 } 1156 poperror(); 1157 freeblist(bp); 1158 } 1159 1160 // hold b lock 1161 static void 1162 portfree(Port *port) 1163 { 1164 port->ref--; 1165 if(port->ref < 0) 1166 panic("portfree: bad ref"); 1167 if(port->ref > 0) 1168 return; 1169 1170 if(port->data[0]) 1171 cclose(port->data[0]); 1172 if(port->data[1]) 1173 cclose(port->data[1]); 1174 memset(port, 0, sizeof(Port)); 1175 free(port); 1176 } 1177 1178 Dev bridgedevtab = { 1179 'B', 1180 "bridge", 1181 1182 devreset, 1183 bridgeinit, 1184 devshutdown, 1185 bridgeattach, 1186 bridgewalk, 1187 bridgestat, 1188 bridgeopen, 1189 devcreate, 1190 bridgeclose, 1191 bridgeread, 1192 devbread, 1193 bridgewrite, 1194 devbwrite, 1195 devremove, 1196 devwstat, 1197 }; 1198