1 /*
2 * IPv4 Ethernet bridge
3 */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../ip/ip.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
12
13 typedef struct Bridge Bridge;
14 typedef struct Port Port;
15 typedef struct Centry Centry;
16 typedef struct Iphdr Iphdr;
17 typedef struct Tcphdr Tcphdr;
18
19 enum
20 {
21 Qtopdir= 1, /* top level directory */
22
23 Qbridgedir, /* bridge* directory */
24 Qbctl,
25 Qstats,
26 Qcache,
27 Qlog,
28
29 Qportdir, /* directory for a protocol */
30 Qpctl,
31 Qlocal,
32 Qstatus,
33
34 MaxQ,
35
36 Maxbridge= 4,
37 Maxport= 128, // power of 2
38 CacheHash= 257, // prime
39 CacheLook= 5, // how many cache entries to examine
40 CacheSize= (CacheHash+CacheLook-1),
41 CacheTimeout= 5*60, // timeout for cache entry in seconds
42
43 TcpMssMax = 1300, // max desirable Tcp MSS value
44 TunnelMtu = 1400,
45 };
46
47 static Dirtab bridgedirtab[]={
48 "ctl", {Qbctl}, 0, 0666,
49 "stats", {Qstats}, 0, 0444,
50 "cache", {Qcache}, 0, 0444,
51 "log", {Qlog}, 0, 0666,
52 };
53
54 static Dirtab portdirtab[]={
55 "ctl", {Qpctl}, 0, 0666,
56 "local", {Qlocal}, 0, 0444,
57 "status", {Qstatus}, 0, 0444,
58 };
59
60 enum {
61 Logcache= (1<<0),
62 Logmcast= (1<<1),
63 };
64
65 // types of interfaces
66 enum
67 {
68 Tether,
69 Ttun,
70 };
71
72 static Logflag logflags[] =
73 {
74 { "cache", Logcache, },
75 { "multicast", Logmcast, },
76 { nil, 0, },
77 };
78
79 static Dirtab *dirtab[MaxQ];
80
81 #define TYPE(x) (((ulong)(x).path) & 0xff)
82 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
83 #define QID(x, y) (((x)<<8) | (y))
84
85 struct Centry
86 {
87 uchar d[Eaddrlen];
88 int port;
89 long expire; // entry expires this many seconds after bootime
90 long src;
91 long dst;
92 };
93
94 struct Bridge
95 {
96 QLock;
97 int nport;
98 Port *port[Maxport];
99 Centry cache[CacheSize];
100 ulong hit;
101 ulong miss;
102 ulong copy;
103 long delay0; // constant microsecond delay per packet
104 long delayn; // microsecond delay per byte
105 int tcpmss; // modify tcpmss value
106
107 Log;
108 };
109
110 struct Port
111 {
112 int id;
113 Bridge *bridge;
114 int ref;
115 int closed;
116
117 Chan *data[2]; // channel to data
118
119 Proc *readp; // read proc
120
121 // the following uniquely identifies the port
122 int type;
123 char name[KNAMELEN];
124
125 // owner hash - avoids bind/unbind races
126 ulong ownhash;
127
128 // various stats
129 int in; // number of packets read
130 int inmulti; // multicast or broadcast
131 int inunknown; // unknown address
132 int out; // number of packets read
133 int outmulti; // multicast or broadcast
134 int outunknown; // unknown address
135 int outfrag; // fragmented the packet
136 int nentry; // number of cache entries for this port
137 };
138
139 enum {
140 IP_TCPPROTO = 6,
141 EOLOPT = 0,
142 NOOPOPT = 1,
143 MSSOPT = 2,
144 MSS_LENGTH = 4, /* Mean segment size */
145 SYN = 0x02, /* Pkt. is synchronise */
146 IPHDR = 20, /* sizeof(Iphdr) */
147 };
148
149 struct Iphdr
150 {
151 uchar vihl; /* Version and header length */
152 uchar tos; /* Type of service */
153 uchar length[2]; /* packet length */
154 uchar id[2]; /* ip->identification */
155 uchar frag[2]; /* Fragment information */
156 uchar ttl; /* Time to live */
157 uchar proto; /* Protocol */
158 uchar cksum[2]; /* Header checksum */
159 uchar src[4]; /* IP source */
160 uchar dst[4]; /* IP destination */
161 };
162
163 struct Tcphdr
164 {
165 uchar sport[2];
166 uchar dport[2];
167 uchar seq[4];
168 uchar ack[4];
169 uchar flag[2];
170 uchar win[2];
171 uchar cksum[2];
172 uchar urg[2];
173 };
174
175 static Bridge bridgetab[Maxbridge];
176
177 static int m2p[] = {
178 [OREAD] 4,
179 [OWRITE] 2,
180 [ORDWR] 6
181 };
182
183 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
184 static void portbind(Bridge *b, int argc, char *argv[]);
185 static void portunbind(Bridge *b, int argc, char *argv[]);
186 static void etherread(void *a);
187 static char *cachedump(Bridge *b);
188 static void portfree(Port *port);
189 static void cacheflushport(Bridge *b, int port);
190 static void etherwrite(Port *port, Block *bp);
191
192 static void
bridgeinit(void)193 bridgeinit(void)
194 {
195 int i;
196 Dirtab *dt;
197
198 // setup dirtab with non directory entries
199 for(i=0; i<nelem(bridgedirtab); i++) {
200 dt = bridgedirtab + i;
201 dirtab[TYPE(dt->qid)] = dt;
202 }
203 for(i=0; i<nelem(portdirtab); i++) {
204 dt = portdirtab + i;
205 dirtab[TYPE(dt->qid)] = dt;
206 }
207 }
208
209 static Chan*
bridgeattach(char * spec)210 bridgeattach(char* spec)
211 {
212 Chan *c;
213 int dev;
214
215 dev = atoi(spec);
216 if(dev<0 || dev >= Maxbridge)
217 error("bad specification");
218
219 c = devattach('B', spec);
220 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
221 c->dev = dev;
222 return c;
223 }
224
225 static Walkqid*
bridgewalk(Chan * c,Chan * nc,char ** name,int nname)226 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
227 {
228 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
229 }
230
231 static int
bridgestat(Chan * c,uchar * db,int n)232 bridgestat(Chan* c, uchar* db, int n)
233 {
234 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
235 }
236
237 static Chan*
bridgeopen(Chan * c,int omode)238 bridgeopen(Chan* c, int omode)
239 {
240 int perm;
241 Bridge *b;
242
243 omode &= 3;
244 perm = m2p[omode];
245 USED(perm);
246
247 b = bridgetab + c->dev;
248 USED(b);
249
250 switch(TYPE(c->qid)) {
251 default:
252 break;
253 case Qlog:
254 logopen(b);
255 break;
256 case Qcache:
257 c->aux = cachedump(b);
258 break;
259 }
260 c->mode = openmode(omode);
261 c->flag |= COPEN;
262 c->offset = 0;
263 return c;
264 }
265
266 static void
bridgeclose(Chan * c)267 bridgeclose(Chan* c)
268 {
269 Bridge *b = bridgetab + c->dev;
270
271 switch(TYPE(c->qid)) {
272 case Qcache:
273 if(c->flag & COPEN)
274 free(c->aux);
275 break;
276 case Qlog:
277 if(c->flag & COPEN)
278 logclose(b);
279 break;
280 }
281 }
282
283 static long
bridgeread(Chan * c,void * a,long n,vlong off)284 bridgeread(Chan *c, void *a, long n, vlong off)
285 {
286 char buf[256];
287 Bridge *b = bridgetab + c->dev;
288 Port *port;
289 int i, ingood, outgood;
290
291 USED(off);
292 switch(TYPE(c->qid)) {
293 default:
294 error(Eperm);
295 case Qtopdir:
296 case Qbridgedir:
297 case Qportdir:
298 return devdirread(c, a, n, 0, 0, bridgegen);
299 case Qlog:
300 return logread(b, a, off, n);
301 case Qstatus:
302 qlock(b);
303 port = b->port[PORT(c->qid)];
304 if(port == 0)
305 strcpy(buf, "unbound\n");
306 else {
307 i = 0;
308 switch(port->type) {
309 default:
310 panic("bridgeread: unknown port type: %d",
311 port->type);
312 case Tether:
313 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
314 break;
315 case Ttun:
316 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
317 break;
318 }
319 ingood = port->in - port->inmulti - port->inunknown;
320 outgood = port->out - port->outmulti - port->outunknown;
321 i += snprint(buf+i, sizeof(buf)-i,
322 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
323 port->in, ingood, port->inmulti, port->inunknown,
324 port->out, outgood, port->outmulti,
325 port->outunknown, port->outfrag);
326 USED(i);
327 }
328 n = readstr(off, a, n, buf);
329 qunlock(b);
330 return n;
331 case Qbctl:
332 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
333 b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
334 n = readstr(off, a, n, buf);
335 return n;
336 case Qcache:
337 n = readstr(off, a, n, c->aux);
338 return n;
339 case Qstats:
340 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
341 b->hit, b->miss, b->copy);
342 n = readstr(off, a, n, buf);
343 return n;
344 }
345 }
346
347 static void
bridgeoption(Bridge * b,char * option,int value)348 bridgeoption(Bridge *b, char *option, int value)
349 {
350 if(strcmp(option, "tcpmss") == 0)
351 b->tcpmss = value;
352 else
353 error("unknown bridge option");
354 }
355
356
357 static long
bridgewrite(Chan * c,void * a,long n,vlong off)358 bridgewrite(Chan *c, void *a, long n, vlong off)
359 {
360 Bridge *b = bridgetab + c->dev;
361 Cmdbuf *cb;
362 char *arg0, *p;
363
364 USED(off);
365 switch(TYPE(c->qid)) {
366 default:
367 error(Eperm);
368 case Qbctl:
369 cb = parsecmd(a, n);
370 qlock(b);
371 if(waserror()) {
372 qunlock(b);
373 free(cb);
374 nexterror();
375 }
376 if(cb->nf == 0)
377 error("short write");
378 arg0 = cb->f[0];
379 if(strcmp(arg0, "bind") == 0) {
380 portbind(b, cb->nf-1, cb->f+1);
381 } else if(strcmp(arg0, "unbind") == 0) {
382 portunbind(b, cb->nf-1, cb->f+1);
383 } else if(strcmp(arg0, "cacheflush") == 0) {
384 log(b, Logcache, "cache flush\n");
385 memset(b->cache, 0, CacheSize*sizeof(Centry));
386 } else if(strcmp(arg0, "set") == 0) {
387 if(cb->nf != 2)
388 error("usage: set option");
389 bridgeoption(b, cb->f[1], 1);
390 } else if(strcmp(arg0, "clear") == 0) {
391 if(cb->nf != 2)
392 error("usage: clear option");
393 bridgeoption(b, cb->f[1], 0);
394 } else if(strcmp(arg0, "delay") == 0) {
395 if(cb->nf != 3)
396 error("usage: delay delay0 delayn");
397 b->delay0 = strtol(cb->f[1], nil, 10);
398 b->delayn = strtol(cb->f[2], nil, 10);
399 } else
400 error("unknown control request");
401 poperror();
402 qunlock(b);
403 free(cb);
404 return n;
405 case Qlog:
406 cb = parsecmd(a, n);
407 p = logctl(b, cb->nf, cb->f, logflags);
408 free(cb);
409 if(p != nil)
410 error(p);
411 return n;
412 }
413 }
414
415 static int
bridgegen(Chan * c,char *,Dirtab *,int,int s,Dir * dp)416 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
417 {
418 Bridge *b = bridgetab + c->dev;
419 int type = TYPE(c->qid);
420 Dirtab *dt;
421 Qid qid;
422
423 if(s == DEVDOTDOT){
424 switch(TYPE(c->qid)){
425 case Qtopdir:
426 case Qbridgedir:
427 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
428 mkqid(&qid, Qtopdir, 0, QTDIR);
429 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
430 break;
431 case Qportdir:
432 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
433 mkqid(&qid, Qbridgedir, 0, QTDIR);
434 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
435 break;
436 default:
437 panic("bridgewalk %llux", c->qid.path);
438 }
439 return 1;
440 }
441
442 switch(type) {
443 default:
444 /* non-directory entries end up here */
445 if(c->qid.type & QTDIR)
446 panic("bridgegen: unexpected directory");
447 if(s != 0)
448 return -1;
449 dt = dirtab[TYPE(c->qid)];
450 if(dt == nil)
451 panic("bridgegen: unknown type: %lud", TYPE(c->qid));
452 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
453 return 1;
454 case Qtopdir:
455 if(s != 0)
456 return -1;
457 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
458 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
459 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
460 return 1;
461 case Qbridgedir:
462 if(s<nelem(bridgedirtab)) {
463 dt = bridgedirtab+s;
464 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
465 return 1;
466 }
467 s -= nelem(bridgedirtab);
468 if(s >= b->nport)
469 return -1;
470 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
471 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
472 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
473 return 1;
474 case Qportdir:
475 if(s>=nelem(portdirtab))
476 return -1;
477 dt = portdirtab+s;
478 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
479 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
480 return 1;
481 }
482 }
483
484 // parse mac address; also in netif.c
485 static int
parseaddr(uchar * to,char * from,int alen)486 parseaddr(uchar *to, char *from, int alen)
487 {
488 char nip[4];
489 char *p;
490 int i;
491
492 p = from;
493 for(i = 0; i < alen; i++){
494 if(*p == 0)
495 return -1;
496 nip[0] = *p++;
497 if(*p == 0)
498 return -1;
499 nip[1] = *p++;
500 nip[2] = 0;
501 to[i] = strtoul(nip, 0, 16);
502 if(*p == ':')
503 p++;
504 }
505 return 0;
506 }
507
508 // assumes b is locked
509 static void
portbind(Bridge * b,int argc,char * argv[])510 portbind(Bridge *b, int argc, char *argv[])
511 {
512 Port *port;
513 Chan *ctl;
514 int type = 0, i, n;
515 ulong ownhash;
516 char *dev, *dev2 = nil, *p;
517 char buf[100], name[KNAMELEN], path[8*KNAMELEN];
518 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
519
520 memset(name, 0, KNAMELEN);
521 if(argc < 4)
522 error(usage);
523 if(strcmp(argv[0], "ether") == 0) {
524 if(argc != 4)
525 error(usage);
526 type = Tether;
527 strncpy(name, argv[1], KNAMELEN);
528 name[KNAMELEN-1] = 0;
529 // parseaddr(addr, argv[1], Eaddrlen);
530 } else if(strcmp(argv[0], "tunnel") == 0) {
531 if(argc != 5)
532 error(usage);
533 type = Ttun;
534 strncpy(name, argv[1], KNAMELEN);
535 name[KNAMELEN-1] = 0;
536 // parseip(addr, argv[1]);
537 dev2 = argv[4];
538 } else
539 error(usage);
540 ownhash = atoi(argv[2]);
541 dev = argv[3];
542 for(i=0; i<b->nport; i++) {
543 port = b->port[i];
544 if(port != nil && port->type == type &&
545 memcmp(port->name, name, KNAMELEN) == 0)
546 error("port in use");
547 }
548 for(i=0; i<Maxport; i++)
549 if(b->port[i] == nil)
550 break;
551 if(i == Maxport)
552 error("no more ports");
553 port = smalloc(sizeof(Port));
554 port->ref = 1;
555 port->id = i;
556 port->ownhash = ownhash;
557
558 if(waserror()) {
559 portfree(port);
560 nexterror();
561 }
562 port->type = type;
563 memmove(port->name, name, KNAMELEN);
564 switch(port->type) {
565 default:
566 panic("portbind: unknown port type: %d", type);
567 case Tether:
568 snprint(path, sizeof(path), "%s/clone", dev);
569 ctl = namec(path, Aopen, ORDWR, 0);
570 if(waserror()) {
571 cclose(ctl);
572 nexterror();
573 }
574 // check addr?
575
576 // get directory name
577 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
578 buf[n] = 0;
579 for(p = buf; *p == ' '; p++)
580 ;
581 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
582
583 // setup connection to be promiscuous
584 snprint(buf, sizeof(buf), "connect -1");
585 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
586 snprint(buf, sizeof(buf), "promiscuous");
587 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
588 snprint(buf, sizeof(buf), "bridge");
589 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590
591 // open data port
592 port->data[0] = namec(path, Aopen, ORDWR, 0);
593 // dup it
594 incref(port->data[0]);
595 port->data[1] = port->data[0];
596
597 poperror();
598 cclose(ctl);
599
600 break;
601 case Ttun:
602 port->data[0] = namec(dev, Aopen, OREAD, 0);
603 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
604 break;
605 }
606
607 poperror();
608
609 /* committed to binding port */
610 b->port[port->id] = port;
611 port->bridge = b;
612 if(b->nport <= port->id)
613 b->nport = port->id+1;
614
615 // assumes kproc always succeeds
616 kproc("etherread", etherread, port); // poperror must be next
617 port->ref++;
618 }
619
620 // assumes b is locked
621 static void
portunbind(Bridge * b,int argc,char * argv[])622 portunbind(Bridge *b, int argc, char *argv[])
623 {
624 int type = 0, i;
625 char name[KNAMELEN];
626 ulong ownhash;
627 Port *port = nil;
628 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
629
630 memset(name, 0, KNAMELEN);
631 if(argc < 2 || argc > 3)
632 error(usage);
633 if(strcmp(argv[0], "ether") == 0) {
634 type = Tether;
635 strncpy(name, argv[1], KNAMELEN);
636 name[KNAMELEN-1] = 0;
637 // parseaddr(addr, argv[1], Eaddrlen);
638 } else if(strcmp(argv[0], "tunnel") == 0) {
639 type = Ttun;
640 strncpy(name, argv[1], KNAMELEN);
641 name[KNAMELEN-1] = 0;
642 // parseip(addr, argv[1]);
643 } else
644 error(usage);
645 if(argc == 3)
646 ownhash = atoi(argv[2]);
647 else
648 ownhash = 0;
649 for(i=0; i<b->nport; i++) {
650 port = b->port[i];
651 if(port != nil && port->type == type &&
652 memcmp(port->name, name, KNAMELEN) == 0)
653 break;
654 }
655 if(i == b->nport)
656 error("port not found");
657 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
658 error("bad owner hash");
659
660 port->closed = 1;
661 b->port[i] = nil; // port is now unbound
662 cacheflushport(b, i);
663
664 // try and stop reader
665 if(port->readp)
666 postnote(port->readp, 1, "unbind", 0);
667 portfree(port);
668 }
669
670 // assumes b is locked
671 static Centry *
cachelookup(Bridge * b,uchar d[Eaddrlen])672 cachelookup(Bridge *b, uchar d[Eaddrlen])
673 {
674 int i;
675 uint h;
676 Centry *p;
677 long sec;
678
679 // dont cache multicast or broadcast
680 if(d[0] & 1)
681 return 0;
682
683 h = 0;
684 for(i=0; i<Eaddrlen; i++) {
685 h *= 7;
686 h += d[i];
687 }
688 h %= CacheHash;
689 p = b->cache + h;
690 sec = TK2SEC(m->ticks);
691 for(i=0; i<CacheLook; i++,p++) {
692 if(memcmp(d, p->d, Eaddrlen) == 0) {
693 p->dst++;
694 if(sec >= p->expire) {
695 log(b, Logcache, "expired cache entry: %E %d\n",
696 d, p->port);
697 return nil;
698 }
699 p->expire = sec + CacheTimeout;
700 return p;
701 }
702 }
703 log(b, Logcache, "cache miss: %E\n", d);
704 return nil;
705 }
706
707 // assumes b is locked
708 static void
cacheupdate(Bridge * b,uchar d[Eaddrlen],int port)709 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
710 {
711 int i;
712 uint h;
713 Centry *p, *pp;
714 long sec;
715
716 // dont cache multicast or broadcast
717 if(d[0] & 1) {
718 log(b, Logcache, "bad source address: %E\n", d);
719 return;
720 }
721
722 h = 0;
723 for(i=0; i<Eaddrlen; i++) {
724 h *= 7;
725 h += d[i];
726 }
727 h %= CacheHash;
728 p = b->cache + h;
729 pp = p;
730 sec = p->expire;
731
732 // look for oldest entry
733 for(i=0; i<CacheLook; i++,p++) {
734 if(memcmp(p->d, d, Eaddrlen) == 0) {
735 p->expire = TK2SEC(m->ticks) + CacheTimeout;
736 if(p->port != port) {
737 log(b, Logcache, "NIC changed port %d->%d: %E\n",
738 p->port, port, d);
739 p->port = port;
740 }
741 p->src++;
742 return;
743 }
744 if(p->expire < sec) {
745 sec = p->expire;
746 pp = p;
747 }
748 }
749 if(pp->expire != 0)
750 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
751 pp->expire = TK2SEC(m->ticks) + CacheTimeout;
752 memmove(pp->d, d, Eaddrlen);
753 pp->port = port;
754 pp->src = 1;
755 pp->dst = 0;
756 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
757 }
758
759 // assumes b is locked
760 static void
cacheflushport(Bridge * b,int port)761 cacheflushport(Bridge *b, int port)
762 {
763 Centry *ce;
764 int i;
765
766 ce = b->cache;
767 for(i=0; i<CacheSize; i++,ce++) {
768 if(ce->port != port)
769 continue;
770 memset(ce, 0, sizeof(Centry));
771 }
772 }
773
774 static char *
cachedump(Bridge * b)775 cachedump(Bridge *b)
776 {
777 int i, n;
778 long sec, off;
779 char *buf, *p, *ep;
780 Centry *ce;
781 char c;
782
783 qlock(b);
784 if(waserror()) {
785 qunlock(b);
786 nexterror();
787 }
788 sec = TK2SEC(m->ticks);
789 n = 0;
790 for(i=0; i<CacheSize; i++)
791 if(b->cache[i].expire != 0)
792 n++;
793
794 n *= 51; // change if print format is changed
795 n += 10; // some slop at the end
796 buf = malloc(n);
797 if(buf == nil)
798 error(Enomem);
799 p = buf;
800 ep = buf + n;
801 ce = b->cache;
802 off = seconds() - sec;
803 for(i=0; i<CacheSize; i++,ce++) {
804 if(ce->expire == 0)
805 continue;
806 c = (sec < ce->expire)?'v':'e';
807 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
808 ce->port, ce->src, ce->dst, ce->expire+off, c);
809 }
810 *p = 0;
811 poperror();
812 qunlock(b);
813
814 return buf;
815 }
816
817
818
819 // assumes b is locked
820 static void
ethermultiwrite(Bridge * b,Block * bp,Port * port)821 ethermultiwrite(Bridge *b, Block *bp, Port *port)
822 {
823 Port *oport;
824 Block *bp2;
825 Etherpkt *ep;
826 int i, mcast;
827
828 if(waserror()) {
829 if(bp)
830 freeb(bp);
831 nexterror();
832 }
833
834 ep = (Etherpkt*)bp->rp;
835 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
836
837 oport = nil;
838 for(i=0; i<b->nport; i++) {
839 if(i == port->id || b->port[i] == nil)
840 continue;
841 /*
842 * we need to forward multicast packets for ipv6,
843 * so always do it.
844 */
845 if(mcast)
846 b->port[i]->outmulti++;
847 else
848 b->port[i]->outunknown++;
849
850 // delay one so that the last write does not copy
851 if(oport != nil) {
852 b->copy++;
853 bp2 = copyblock(bp, blocklen(bp));
854 if(!waserror()) {
855 etherwrite(oport, bp2);
856 poperror();
857 }
858 }
859 oport = b->port[i];
860 }
861
862 // last write free block
863 if(oport) {
864 bp2 = bp; bp = nil; USED(bp);
865 if(!waserror()) {
866 etherwrite(oport, bp2);
867 poperror();
868 }
869 } else
870 freeb(bp);
871
872 poperror();
873 }
874
875 static void
tcpmsshack(Etherpkt * epkt,int n)876 tcpmsshack(Etherpkt *epkt, int n)
877 {
878 int hl, optlen;
879 Iphdr *iphdr;
880 Tcphdr *tcphdr;
881 ulong mss, cksum;
882 uchar *optr;
883
884 /* ignore non-ipv4 packets */
885 if(nhgets(epkt->type) != ETIP4)
886 return;
887 iphdr = (Iphdr*)(epkt->data);
888 n -= ETHERHDRSIZE;
889 if(n < IPHDR)
890 return;
891
892 /* ignore bad packets */
893 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
894 hl = (iphdr->vihl&0xF)<<2;
895 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
896 return;
897 } else
898 hl = IP_HLEN4<<2;
899
900 /* ignore non-tcp packets */
901 if(iphdr->proto != IP_TCPPROTO)
902 return;
903 n -= hl;
904 if(n < sizeof(Tcphdr))
905 return;
906 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
907 // MSS can only appear in SYN packet
908 if(!(tcphdr->flag[1] & SYN))
909 return;
910 hl = (tcphdr->flag[0] & 0xf0)>>2;
911 if(n < hl)
912 return;
913
914 // check for MSS option
915 optr = (uchar*)tcphdr + sizeof(Tcphdr);
916 n = hl - sizeof(Tcphdr);
917 for(;;) {
918 if(n <= 0 || *optr == EOLOPT)
919 return;
920 if(*optr == NOOPOPT) {
921 n--;
922 optr++;
923 continue;
924 }
925 optlen = optr[1];
926 if(optlen < 2 || optlen > n)
927 return;
928 if(*optr == MSSOPT && optlen == MSS_LENGTH)
929 break;
930 n -= optlen;
931 optr += optlen;
932 }
933
934 mss = nhgets(optr+2);
935 if(mss <= TcpMssMax)
936 return;
937 // fit checksum
938 cksum = nhgets(tcphdr->cksum);
939 if(optr-(uchar*)tcphdr & 1) {
940 print("tcpmsshack: odd alignment!\n");
941 // odd alignments are a pain
942 cksum += nhgets(optr+1);
943 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
944 cksum += (cksum>>16);
945 cksum &= 0xffff;
946 cksum += nhgets(optr+3);
947 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
948 cksum += (cksum>>16);
949 } else {
950 cksum += mss;
951 cksum -= TcpMssMax;
952 cksum += (cksum>>16);
953 }
954 hnputs(tcphdr->cksum, cksum);
955 hnputs(optr+2, TcpMssMax);
956 }
957
958 /*
959 * process to read from the ethernet
960 */
961 static void
etherread(void * a)962 etherread(void *a)
963 {
964 Port *port = a;
965 Bridge *b = port->bridge;
966 Block *bp, *bp2;
967 Etherpkt *ep;
968 Centry *ce;
969 long md;
970
971 qlock(b);
972 port->readp = up; /* hide identity under a rock for unbind */
973
974 while(!port->closed){
975 // release lock to read - error means it is time to quit
976 qunlock(b);
977 if(waserror()) {
978 print("etherread read error: %s\n", up->errstr);
979 qlock(b);
980 break;
981 }
982 if(0)
983 print("devbridge: etherread: reading\n");
984 bp = devtab[port->data[0]->type]->bread(port->data[0],
985 ETHERMAXTU, 0);
986 if(0)
987 print("devbridge: etherread: blocklen = %d\n",
988 blocklen(bp));
989 poperror();
990 qlock(b);
991 if(bp == nil || port->closed)
992 break;
993 if(waserror()) {
994 // print("etherread bridge error\n");
995 if(bp)
996 freeb(bp);
997 continue;
998 }
999 if(blocklen(bp) < ETHERMINTU)
1000 error("short packet");
1001 port->in++;
1002
1003 ep = (Etherpkt*)bp->rp;
1004 cacheupdate(b, ep->s, port->id);
1005 if(b->tcpmss)
1006 tcpmsshack(ep, BLEN(bp));
1007
1008 /*
1009 * delay packets to simulate a slow link
1010 */
1011 if(b->delay0 || b->delayn){
1012 md = b->delay0 + b->delayn * BLEN(bp);
1013 if(md > 0)
1014 microdelay(md);
1015 }
1016
1017 if(ep->d[0] & 1) {
1018 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1019 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1020 port->inmulti++;
1021 bp2 = bp; bp = nil;
1022 ethermultiwrite(b, bp2, port);
1023 } else {
1024 ce = cachelookup(b, ep->d);
1025 if(ce == nil) {
1026 b->miss++;
1027 port->inunknown++;
1028 bp2 = bp; bp = nil;
1029 ethermultiwrite(b, bp2, port);
1030 }else if(ce->port != port->id){
1031 b->hit++;
1032 bp2 = bp; bp = nil;
1033 etherwrite(b->port[ce->port], bp2);
1034 }
1035 }
1036
1037 poperror();
1038 if(bp)
1039 freeb(bp);
1040 }
1041 // print("etherread: trying to exit\n");
1042 port->readp = nil;
1043 portfree(port);
1044 qunlock(b);
1045 pexit("hangup", 1);
1046 }
1047
1048 static int
fragment(Etherpkt * epkt,int n)1049 fragment(Etherpkt *epkt, int n)
1050 {
1051 Iphdr *iphdr;
1052
1053 if(n <= TunnelMtu)
1054 return 0;
1055
1056 /* ignore non-ipv4 packets */
1057 if(nhgets(epkt->type) != ETIP4)
1058 return 0;
1059 iphdr = (Iphdr*)(epkt->data);
1060 n -= ETHERHDRSIZE;
1061 /*
1062 * ignore: IP runt packets, bad packets (I don't handle IP
1063 * options for the moment), packets with don't-fragment set,
1064 * and short blocks.
1065 */
1066 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1067 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1068 return 0;
1069
1070 return 1;
1071 }
1072
1073
1074 static void
etherwrite(Port * port,Block * bp)1075 etherwrite(Port *port, Block *bp)
1076 {
1077 Iphdr *eh, *feh;
1078 Etherpkt *epkt;
1079 int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1080 Block *xp, *nb;
1081 ushort fragoff, frag;
1082
1083 port->out++;
1084 epkt = (Etherpkt*)bp->rp;
1085 n = blocklen(bp);
1086 if(port->type != Ttun || !fragment(epkt, n)) {
1087 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1088 return;
1089 }
1090 port->outfrag++;
1091 if(waserror()){
1092 freeblist(bp);
1093 nexterror();
1094 }
1095
1096 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1097 eh = (Iphdr*)(epkt->data);
1098 len = nhgets(eh->length);
1099 frag = nhgets(eh->frag);
1100 mf = frag & IP_MF;
1101 frag <<= 3;
1102 dlen = len - IPHDR;
1103 xp = bp;
1104 lid = nhgets(eh->id);
1105 offset = ETHERHDRSIZE+IPHDR;
1106 while(xp != nil && offset && offset >= BLEN(xp)) {
1107 offset -= BLEN(xp);
1108 xp = xp->next;
1109 }
1110 xp->rp += offset;
1111
1112 if(0)
1113 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1114 seglen, dlen, mf, frag);
1115 for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1116 nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1117
1118 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1119
1120 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1121 nb->wp += ETHERHDRSIZE+IPHDR;
1122
1123 if((fragoff + seglen) >= dlen) {
1124 seglen = dlen - fragoff;
1125 hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1126 }
1127 else
1128 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1129
1130 hnputs(feh->length, seglen + IPHDR);
1131 hnputs(feh->id, lid);
1132
1133 /* Copy up the data area */
1134 chunk = seglen;
1135 while(chunk) {
1136 blklen = chunk;
1137 if(BLEN(xp) < chunk)
1138 blklen = BLEN(xp);
1139 memmove(nb->wp, xp->rp, blklen);
1140 nb->wp += blklen;
1141 xp->rp += blklen;
1142 chunk -= blklen;
1143 if(xp->rp == xp->wp)
1144 xp = xp->next;
1145 }
1146
1147 feh->cksum[0] = 0;
1148 feh->cksum[1] = 0;
1149 hnputs(feh->cksum, ipcsum(&feh->vihl));
1150
1151 /* don't generate small packets */
1152 if(BLEN(nb) < ETHERMINTU)
1153 nb->wp = nb->rp + ETHERMINTU;
1154 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1155 }
1156 poperror();
1157 freeblist(bp);
1158 }
1159
1160 // hold b lock
1161 static void
portfree(Port * port)1162 portfree(Port *port)
1163 {
1164 port->ref--;
1165 if(port->ref < 0)
1166 panic("portfree: bad ref");
1167 if(port->ref > 0)
1168 return;
1169
1170 if(port->data[0])
1171 cclose(port->data[0]);
1172 if(port->data[1])
1173 cclose(port->data[1]);
1174 memset(port, 0, sizeof(Port));
1175 free(port);
1176 }
1177
1178 Dev bridgedevtab = {
1179 'B',
1180 "bridge",
1181
1182 devreset,
1183 bridgeinit,
1184 devshutdown,
1185 bridgeattach,
1186 bridgewalk,
1187 bridgestat,
1188 bridgeopen,
1189 devcreate,
1190 bridgeclose,
1191 bridgeread,
1192 devbread,
1193 bridgewrite,
1194 devbwrite,
1195 devremove,
1196 devwstat,
1197 };
1198