1 /*
2 * IPv4 Ethernet bridge
3 */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../ip/ip.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
12
13 typedef struct Bridge Bridge;
14 typedef struct Port Port;
15 typedef struct Centry Centry;
16 typedef struct Iphdr Iphdr;
17 typedef struct Tcphdr Tcphdr;
18
19 enum
20 {
21 Qtopdir= 1, /* top level directory */
22
23 Qbridgedir, /* bridge* directory */
24 Qbctl,
25 Qstats,
26 Qcache,
27 Qlog,
28
29 Qportdir, /* directory for a protocol */
30 Qpctl,
31 Qlocal,
32 Qstatus,
33
34 MaxQ,
35
36 Maxbridge= 4,
37 Maxport= 128, // power of 2
38 CacheHash= 257, // prime
39 CacheLook= 5, // how many cache entries to examine
40 CacheSize= (CacheHash+CacheLook-1),
41 CacheTimeout= 5*60, // timeout for cache entry in seconds
42 MaxMTU= IP_MAX, // allow for jumbo frames and large UDP
43
44 TcpMssMax = 1300, // max desirable Tcp MSS value
45 TunnelMtu = 1400,
46 };
47
48 static Dirtab bridgedirtab[]={
49 "ctl", {Qbctl}, 0, 0666,
50 "stats", {Qstats}, 0, 0444,
51 "cache", {Qcache}, 0, 0444,
52 "log", {Qlog}, 0, 0666,
53 };
54
55 static Dirtab portdirtab[]={
56 "ctl", {Qpctl}, 0, 0666,
57 "local", {Qlocal}, 0, 0444,
58 "status", {Qstatus}, 0, 0444,
59 };
60
61 enum {
62 Logcache= (1<<0),
63 Logmcast= (1<<1),
64 };
65
66 // types of interfaces
67 enum
68 {
69 Tether,
70 Ttun,
71 };
72
73 static Logflag logflags[] =
74 {
75 { "cache", Logcache, },
76 { "multicast", Logmcast, },
77 { nil, 0, },
78 };
79
80 static Dirtab *dirtab[MaxQ];
81
82 #define TYPE(x) (((ulong)(x).path) & 0xff)
83 #define PORT(x) ((((ulong)(x).path) >> 8)&(Maxport-1))
84 #define QID(x, y) (((x)<<8) | (y))
85
86 struct Centry
87 {
88 uchar d[Eaddrlen];
89 int port;
90 long expire; // entry expires this many seconds after bootime
91 long src;
92 long dst;
93 };
94
95 struct Bridge
96 {
97 QLock;
98 int nport;
99 Port *port[Maxport];
100 Centry cache[CacheSize];
101 ulong hit;
102 ulong miss;
103 ulong copy;
104 long delay0; // constant microsecond delay per packet
105 long delayn; // microsecond delay per byte
106 int tcpmss; // modify tcpmss value
107
108 Log;
109 };
110
111 struct Port
112 {
113 Ref;
114 int id;
115 Bridge *bridge;
116 int closed;
117
118 Chan *data[2]; // channel to data
119
120 Proc *readp; // read proc
121
122 // the following uniquely identifies the port
123 int type;
124 char name[KNAMELEN];
125
126 // owner hash - avoids bind/unbind races
127 ulong ownhash;
128
129 // various stats
130 int in; // number of packets read
131 int inmulti; // multicast or broadcast
132 int inunknown; // unknown address
133 int out; // number of packets read
134 int outmulti; // multicast or broadcast
135 int outunknown; // unknown address
136 int outfrag; // fragmented the packet
137 int nentry; // number of cache entries for this port
138 };
139
140 enum {
141 IP_TCPPROTO = 6,
142 EOLOPT = 0,
143 NOOPOPT = 1,
144 MSSOPT = 2,
145 MSS_LENGTH = 4, /* Mean segment size */
146 SYN = 0x02, /* Pkt. is synchronise */
147 IPHDR = 20, /* sizeof(Iphdr) */
148 };
149
150 struct Iphdr
151 {
152 uchar vihl; /* Version and header length */
153 uchar tos; /* Type of service */
154 uchar length[2]; /* packet length */
155 uchar id[2]; /* ip->identification */
156 uchar frag[2]; /* Fragment information */
157 uchar ttl; /* Time to live */
158 uchar proto; /* Protocol */
159 uchar cksum[2]; /* Header checksum */
160 uchar src[4]; /* IP source */
161 uchar dst[4]; /* IP destination */
162 };
163
164 struct Tcphdr
165 {
166 uchar sport[2];
167 uchar dport[2];
168 uchar seq[4];
169 uchar ack[4];
170 uchar flag[2];
171 uchar win[2];
172 uchar cksum[2];
173 uchar urg[2];
174 };
175
176 static Bridge bridgetab[Maxbridge];
177
178 static int m2p[] = {
179 [OREAD] 4,
180 [OWRITE] 2,
181 [ORDWR] 6
182 };
183
184 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
185 static void portbind(Bridge *b, int argc, char *argv[]);
186 static void portunbind(Bridge *b, int argc, char *argv[]);
187 static void etherread(void *a);
188 static char *cachedump(Bridge *b);
189 static void portfree(Port *port);
190 static void cacheflushport(Bridge *b, int port);
191 static void etherwrite(Port *port, Block *bp);
192
193 static void
bridgeinit(void)194 bridgeinit(void)
195 {
196 int i;
197 Dirtab *dt;
198
199 // setup dirtab with non directory entries
200 for(i=0; i<nelem(bridgedirtab); i++) {
201 dt = bridgedirtab + i;
202 dirtab[TYPE(dt->qid)] = dt;
203 }
204 for(i=0; i<nelem(portdirtab); i++) {
205 dt = portdirtab + i;
206 dirtab[TYPE(dt->qid)] = dt;
207 }
208 }
209
210 static Chan*
bridgeattach(char * spec)211 bridgeattach(char* spec)
212 {
213 Chan *c;
214 int dev;
215
216 dev = atoi(spec);
217 if(dev<0 || dev >= Maxbridge)
218 error("bad specification");
219
220 c = devattach('B', spec);
221 mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
222 c->dev = dev;
223 return c;
224 }
225
226 static Walkqid*
bridgewalk(Chan * c,Chan * nc,char ** name,int nname)227 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
228 {
229 return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
230 }
231
232 static int
bridgestat(Chan * c,uchar * db,int n)233 bridgestat(Chan* c, uchar* db, int n)
234 {
235 return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
236 }
237
238 static Chan*
bridgeopen(Chan * c,int omode)239 bridgeopen(Chan* c, int omode)
240 {
241 int perm;
242 Bridge *b;
243
244 omode &= 3;
245 perm = m2p[omode];
246 USED(perm);
247
248 b = bridgetab + c->dev;
249 USED(b);
250
251 switch(TYPE(c->qid)) {
252 default:
253 break;
254 case Qlog:
255 logopen(b);
256 break;
257 case Qcache:
258 c->aux = cachedump(b);
259 break;
260 }
261 c->mode = openmode(omode);
262 c->flag |= COPEN;
263 c->offset = 0;
264 return c;
265 }
266
267 static void
bridgeclose(Chan * c)268 bridgeclose(Chan* c)
269 {
270 Bridge *b = bridgetab + c->dev;
271
272 switch(TYPE(c->qid)) {
273 case Qcache:
274 if(c->flag & COPEN)
275 free(c->aux);
276 break;
277 case Qlog:
278 if(c->flag & COPEN)
279 logclose(b);
280 break;
281 }
282 }
283
284 static long
bridgeread(Chan * c,void * a,long n,vlong off)285 bridgeread(Chan *c, void *a, long n, vlong off)
286 {
287 char buf[256];
288 Bridge *b = bridgetab + c->dev;
289 Port *port;
290 int i, ingood, outgood;
291
292 USED(off);
293 switch(TYPE(c->qid)) {
294 default:
295 error(Egreg);
296 case Qtopdir:
297 case Qbridgedir:
298 case Qportdir:
299 return devdirread(c, a, n, 0, 0, bridgegen);
300 case Qlog:
301 return logread(b, a, off, n);
302 case Qlocal:
303 return 0; /* TO DO */
304 case Qstatus:
305 qlock(b);
306 if(waserror()){
307 qunlock(b);
308 nexterror();
309 }
310 port = b->port[PORT(c->qid)];
311 if(port == 0)
312 strcpy(buf, "unbound\n");
313 else {
314 i = 0;
315 switch(port->type) {
316 default:
317 panic("bridgeread: unknown port type: %d",
318 port->type);
319 case Tether:
320 i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
321 break;
322 case Ttun:
323 i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
324 break;
325 }
326 ingood = port->in - port->inmulti - port->inunknown;
327 outgood = port->out - port->outmulti - port->outunknown;
328 snprint(buf+i, sizeof(buf)-i,
329 "in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
330 port->in, ingood, port->inmulti, port->inunknown,
331 port->out, outgood, port->outmulti,
332 port->outunknown, port->outfrag);
333 }
334 poperror();
335 qunlock(b);
336 return readstr(off, a, n, buf);
337 case Qbctl:
338 snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
339 b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
340 n = readstr(off, a, n, buf);
341 return n;
342 case Qcache:
343 n = readstr(off, a, n, c->aux);
344 return n;
345 case Qstats:
346 snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
347 b->hit, b->miss, b->copy);
348 n = readstr(off, a, n, buf);
349 return n;
350 }
351 }
352
353 static void
bridgeoption(Bridge * b,char * option,int value)354 bridgeoption(Bridge *b, char *option, int value)
355 {
356 if(strcmp(option, "tcpmss") == 0)
357 b->tcpmss = value;
358 else
359 error("unknown bridge option");
360 }
361
362
363 static long
bridgewrite(Chan * c,void * a,long n,vlong off)364 bridgewrite(Chan *c, void *a, long n, vlong off)
365 {
366 Bridge *b = bridgetab + c->dev;
367 Cmdbuf *cb;
368 char *arg0, *p;
369
370 USED(off);
371 switch(TYPE(c->qid)) {
372 default:
373 error(Eperm);
374 case Qbctl:
375 cb = parsecmd(a, n);
376 qlock(b);
377 if(waserror()) {
378 qunlock(b);
379 free(cb);
380 nexterror();
381 }
382 if(cb->nf == 0)
383 error("short write");
384 arg0 = cb->f[0];
385 if(strcmp(arg0, "bind") == 0) {
386 portbind(b, cb->nf-1, cb->f+1);
387 } else if(strcmp(arg0, "unbind") == 0) {
388 portunbind(b, cb->nf-1, cb->f+1);
389 } else if(strcmp(arg0, "cacheflush") == 0) {
390 log(b, Logcache, "cache flush\n");
391 memset(b->cache, 0, CacheSize*sizeof(Centry));
392 } else if(strcmp(arg0, "set") == 0) {
393 if(cb->nf != 2)
394 error("usage: set option");
395 bridgeoption(b, cb->f[1], 1);
396 } else if(strcmp(arg0, "clear") == 0) {
397 if(cb->nf != 2)
398 error("usage: clear option");
399 bridgeoption(b, cb->f[1], 0);
400 } else if(strcmp(arg0, "delay") == 0) {
401 if(cb->nf != 3)
402 error("usage: delay delay0 delayn");
403 b->delay0 = strtol(cb->f[1], nil, 10);
404 b->delayn = strtol(cb->f[2], nil, 10);
405 } else
406 error("unknown control request");
407 poperror();
408 qunlock(b);
409 free(cb);
410 return n;
411 case Qlog:
412 cb = parsecmd(a, n);
413 p = logctl(b, cb->nf, cb->f, logflags);
414 free(cb);
415 if(p != nil)
416 error(p);
417 return n;
418 }
419 }
420
421 static int
bridgegen(Chan * c,char *,Dirtab *,int,int s,Dir * dp)422 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
423 {
424 Bridge *b = bridgetab + c->dev;
425 int type = TYPE(c->qid);
426 Dirtab *dt;
427 Qid qid;
428
429 if(s == DEVDOTDOT){
430 switch(TYPE(c->qid)){
431 case Qtopdir:
432 case Qbridgedir:
433 snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
434 mkqid(&qid, Qtopdir, 0, QTDIR);
435 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
436 break;
437 case Qportdir:
438 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
439 mkqid(&qid, Qbridgedir, 0, QTDIR);
440 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
441 break;
442 default:
443 panic("bridgewalk %llux", c->qid.path);
444 }
445 return 1;
446 }
447
448 switch(type) {
449 default:
450 /* non-directory entries end up here */
451 if(c->qid.type & QTDIR)
452 panic("bridgegen: unexpected directory");
453 if(s != 0)
454 return -1;
455 dt = dirtab[TYPE(c->qid)];
456 if(dt == nil)
457 panic("bridgegen: unknown type: %lud", TYPE(c->qid));
458 devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
459 return 1;
460 case Qtopdir:
461 if(s != 0)
462 return -1;
463 snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
464 mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
465 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
466 return 1;
467 case Qbridgedir:
468 if(s<nelem(bridgedirtab)) {
469 dt = bridgedirtab+s;
470 devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
471 return 1;
472 }
473 s -= nelem(bridgedirtab);
474 if(s >= b->nport)
475 return -1;
476 mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
477 snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
478 devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
479 return 1;
480 case Qportdir:
481 if(s>=nelem(portdirtab))
482 return -1;
483 dt = portdirtab+s;
484 mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
485 devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
486 return 1;
487 }
488 }
489
490 // parse mac address; also in netif.c
491 static int
parseaddr(uchar * to,char * from,int alen)492 parseaddr(uchar *to, char *from, int alen)
493 {
494 char nip[4];
495 char *p;
496 int i;
497
498 p = from;
499 for(i = 0; i < alen; i++){
500 if(*p == 0)
501 return -1;
502 nip[0] = *p++;
503 if(*p == 0)
504 return -1;
505 nip[1] = *p++;
506 nip[2] = 0;
507 to[i] = strtoul(nip, 0, 16);
508 if(*p == ':')
509 p++;
510 }
511 return 0;
512 }
513
514 // assumes b is locked
515 static void
portbind(Bridge * b,int argc,char * argv[])516 portbind(Bridge *b, int argc, char *argv[])
517 {
518 Port *port;
519 Chan *ctl;
520 int type = 0, i, n;
521 ulong ownhash;
522 char *dev, *dev2 = nil;
523 char buf[100], name[KNAMELEN], path[8*KNAMELEN];
524 static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
525
526 memset(name, 0, KNAMELEN);
527 if(argc < 4)
528 error(usage);
529 if(strcmp(argv[0], "ether") == 0) {
530 if(argc != 4)
531 error(usage);
532 type = Tether;
533 strncpy(name, argv[1], KNAMELEN);
534 name[KNAMELEN-1] = 0;
535 // parseaddr(addr, argv[1], Eaddrlen);
536 } else if(strcmp(argv[0], "tunnel") == 0) {
537 if(argc != 5)
538 error(usage);
539 type = Ttun;
540 strncpy(name, argv[1], KNAMELEN);
541 name[KNAMELEN-1] = 0;
542 // parseip(addr, argv[1]);
543 dev2 = argv[4];
544 } else
545 error(usage);
546 ownhash = atoi(argv[2]);
547 dev = argv[3];
548 for(i=0; i<b->nport; i++) {
549 port = b->port[i];
550 if(port != nil && port->type == type &&
551 memcmp(port->name, name, KNAMELEN) == 0)
552 error("port in use");
553 }
554 for(i=0; i<Maxport; i++)
555 if(b->port[i] == nil)
556 break;
557 if(i == Maxport)
558 error("no more ports");
559 port = smalloc(sizeof(Port));
560 port->ref = 1;
561 port->id = i;
562 port->ownhash = ownhash;
563
564 if(waserror()) {
565 portfree(port);
566 nexterror();
567 }
568 port->type = type;
569 memmove(port->name, name, KNAMELEN);
570 switch(port->type) {
571 default:
572 panic("portbind: unknown port type: %d", type);
573 case Tether:
574 snprint(path, sizeof(path), "%s/clone", dev);
575 ctl = namec(path, Aopen, ORDWR, 0);
576 if(waserror()) {
577 cclose(ctl);
578 nexterror();
579 }
580 // check addr?
581
582 // get directory name
583 n = devtab[ctl->type]->read(ctl, buf, sizeof(buf)-1, 0);
584 buf[n] = 0;
585 snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(buf, 0, 0));
586
587 // setup connection to be promiscuous
588 snprint(buf, sizeof(buf), "connect -1");
589 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590 snprint(buf, sizeof(buf), "promiscuous");
591 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
592 snprint(buf, sizeof(buf), "bridge");
593 devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
594
595 // open data port
596 port->data[0] = namec(path, Aopen, ORDWR, 0);
597 // dup it
598 incref(port->data[0]);
599 port->data[1] = port->data[0];
600
601 poperror();
602 cclose(ctl);
603
604 break;
605 case Ttun:
606 port->data[0] = namec(dev, Aopen, OREAD, 0);
607 port->data[1] = namec(dev2, Aopen, OWRITE, 0);
608 break;
609 }
610
611 poperror();
612
613 /* committed to binding port */
614 b->port[port->id] = port;
615 port->bridge = b;
616 if(b->nport <= port->id)
617 b->nport = port->id+1;
618
619 // assumes kproc always succeeds
620 incref(port);
621 snprint(buf, sizeof(buf), "bridge:%s", dev);
622 kproc(buf, etherread, port);
623 }
624
625 // assumes b is locked
626 static void
portunbind(Bridge * b,int argc,char * argv[])627 portunbind(Bridge *b, int argc, char *argv[])
628 {
629 int type = 0, i;
630 char name[KNAMELEN];
631 ulong ownhash;
632 Port *port = nil;
633 static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
634
635 memset(name, 0, KNAMELEN);
636 if(argc < 2 || argc > 3)
637 error(usage);
638 if(strcmp(argv[0], "ether") == 0) {
639 type = Tether;
640 strncpy(name, argv[1], KNAMELEN);
641 name[KNAMELEN-1] = 0;
642 // parseaddr(addr, argv[1], Eaddrlen);
643 } else if(strcmp(argv[0], "tunnel") == 0) {
644 type = Ttun;
645 strncpy(name, argv[1], KNAMELEN);
646 name[KNAMELEN-1] = 0;
647 // parseip(addr, argv[1]);
648 } else
649 error(usage);
650 if(argc == 3)
651 ownhash = atoi(argv[2]);
652 else
653 ownhash = 0;
654 for(i=0; i<b->nport; i++) {
655 port = b->port[i];
656 if(port != nil && port->type == type &&
657 memcmp(port->name, name, KNAMELEN) == 0)
658 break;
659 }
660 if(i == b->nport)
661 error("port not found");
662 if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
663 error("bad owner hash");
664
665 port->closed = 1;
666 b->port[i] = nil; // port is now unbound
667 cacheflushport(b, i);
668
669 // try and stop reader
670 if(port->readp)
671 postnote(port->readp, 1, "unbind", 0);
672 portfree(port);
673 }
674
675 // assumes b is locked
676 static Centry *
cachelookup(Bridge * b,uchar d[Eaddrlen])677 cachelookup(Bridge *b, uchar d[Eaddrlen])
678 {
679 int i;
680 uint h;
681 Centry *p;
682 long sec;
683
684 // dont cache multicast or broadcast
685 if(d[0] & 1)
686 return 0;
687
688 h = 0;
689 for(i=0; i<Eaddrlen; i++) {
690 h *= 7;
691 h += d[i];
692 }
693 h %= CacheHash;
694 p = b->cache + h;
695 sec = TK2SEC(m->ticks);
696 for(i=0; i<CacheLook; i++,p++) {
697 if(memcmp(d, p->d, Eaddrlen) == 0) {
698 p->dst++;
699 if(sec >= p->expire) {
700 log(b, Logcache, "expired cache entry: %E %d\n",
701 d, p->port);
702 return nil;
703 }
704 p->expire = sec + CacheTimeout;
705 return p;
706 }
707 }
708 log(b, Logcache, "cache miss: %E\n", d);
709 return nil;
710 }
711
712 // assumes b is locked
713 static void
cacheupdate(Bridge * b,uchar d[Eaddrlen],int port)714 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
715 {
716 int i;
717 uint h;
718 Centry *p, *pp;
719 long sec;
720
721 // dont cache multicast or broadcast
722 if(d[0] & 1) {
723 log(b, Logcache, "bad source address: %E\n", d);
724 return;
725 }
726
727 h = 0;
728 for(i=0; i<Eaddrlen; i++) {
729 h *= 7;
730 h += d[i];
731 }
732 h %= CacheHash;
733 p = b->cache + h;
734 pp = p;
735 sec = p->expire;
736
737 // look for oldest entry
738 for(i=0; i<CacheLook; i++,p++) {
739 if(memcmp(p->d, d, Eaddrlen) == 0) {
740 p->expire = TK2SEC(m->ticks) + CacheTimeout;
741 if(p->port != port) {
742 log(b, Logcache, "NIC changed port %d->%d: %E\n",
743 p->port, port, d);
744 p->port = port;
745 }
746 p->src++;
747 return;
748 }
749 if(p->expire < sec) {
750 sec = p->expire;
751 pp = p;
752 }
753 }
754 if(pp->expire != 0)
755 log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
756 pp->expire = TK2SEC(m->ticks) + CacheTimeout;
757 memmove(pp->d, d, Eaddrlen);
758 pp->port = port;
759 pp->src = 1;
760 pp->dst = 0;
761 log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
762 }
763
764 // assumes b is locked
765 static void
cacheflushport(Bridge * b,int port)766 cacheflushport(Bridge *b, int port)
767 {
768 Centry *ce;
769 int i;
770
771 ce = b->cache;
772 for(i=0; i<CacheSize; i++,ce++) {
773 if(ce->port != port)
774 continue;
775 memset(ce, 0, sizeof(Centry));
776 }
777 }
778
779 static char *
cachedump(Bridge * b)780 cachedump(Bridge *b)
781 {
782 int i, n;
783 long sec, off;
784 char *buf, *p, *ep;
785 Centry *ce;
786 char c;
787
788 qlock(b);
789 if(waserror()) {
790 qunlock(b);
791 nexterror();
792 }
793 sec = TK2SEC(m->ticks);
794 n = 0;
795 for(i=0; i<CacheSize; i++)
796 if(b->cache[i].expire != 0)
797 n++;
798
799 n *= 51; // change if print format is changed
800 n += 10; // some slop at the end
801 buf = malloc(n);
802 if(buf == nil)
803 error(Enomem);
804 p = buf;
805 ep = buf + n;
806 ce = b->cache;
807 off = seconds() - sec;
808 for(i=0; i<CacheSize; i++,ce++) {
809 if(ce->expire == 0)
810 continue;
811 c = (sec < ce->expire)?'v':'e';
812 p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
813 ce->port, ce->src, ce->dst, ce->expire+off, c);
814 }
815 *p = 0;
816 poperror();
817 qunlock(b);
818
819 return buf;
820 }
821
822
823
824 // assumes b is locked, no error return
825 static void
ethermultiwrite(Bridge * b,Block * bp,Port * port)826 ethermultiwrite(Bridge *b, Block *bp, Port *port)
827 {
828 Port *oport;
829 Etherpkt *ep;
830 int i, mcast;
831
832 ep = (Etherpkt*)bp->rp;
833 mcast = ep->d[0] & 1; /* multicast bit of ethernet address */
834
835 oport = nil;
836 for(i=0; i<b->nport; i++) {
837 if(i == port->id || b->port[i] == nil)
838 continue;
839 /*
840 * we need to forward multicast packets for ipv6,
841 * so always do it.
842 */
843 if(mcast)
844 b->port[i]->outmulti++;
845 else
846 b->port[i]->outunknown++;
847
848 // delay one so that the last write does not copy
849 if(oport != nil) {
850 b->copy++;
851 etherwrite(oport, copyblock(bp, blocklen(bp)));
852 }
853 oport = b->port[i];
854 }
855
856 // last write free block
857 if(oport)
858 etherwrite(oport, bp);
859 else
860 freeb(bp);
861 }
862
863 static void
tcpmsshack(Etherpkt * epkt,int n)864 tcpmsshack(Etherpkt *epkt, int n)
865 {
866 int hl, optlen;
867 Iphdr *iphdr;
868 Tcphdr *tcphdr;
869 ulong mss, cksum;
870 uchar *optr;
871
872 /* ignore non-ipv4 packets */
873 if(nhgets(epkt->type) != ETIP4)
874 return;
875 iphdr = (Iphdr*)(epkt->data);
876 n -= ETHERHDRSIZE;
877 if(n < IPHDR)
878 return;
879
880 /* ignore bad packets */
881 if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
882 hl = (iphdr->vihl&0xF)<<2;
883 if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
884 return;
885 } else
886 hl = IP_HLEN4<<2;
887
888 /* ignore non-tcp packets */
889 if(iphdr->proto != IP_TCPPROTO)
890 return;
891 n -= hl;
892 if(n < sizeof(Tcphdr))
893 return;
894 tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
895 // MSS can only appear in SYN packet
896 if(!(tcphdr->flag[1] & SYN))
897 return;
898 hl = (tcphdr->flag[0] & 0xf0)>>2;
899 if(n < hl)
900 return;
901
902 // check for MSS option
903 optr = (uchar*)tcphdr + sizeof(Tcphdr);
904 n = hl - sizeof(Tcphdr);
905 for(;;) {
906 if(n <= 0 || *optr == EOLOPT)
907 return;
908 if(*optr == NOOPOPT) {
909 n--;
910 optr++;
911 continue;
912 }
913 optlen = optr[1];
914 if(optlen < 2 || optlen > n)
915 return;
916 if(*optr == MSSOPT && optlen == MSS_LENGTH)
917 break;
918 n -= optlen;
919 optr += optlen;
920 }
921
922 mss = nhgets(optr+2);
923 if(mss <= TcpMssMax)
924 return;
925 // fit checksum
926 cksum = nhgets(tcphdr->cksum);
927 if(optr-(uchar*)tcphdr & 1) {
928 print("tcpmsshack: odd alignment!\n");
929 // odd alignments are a pain
930 cksum += nhgets(optr+1);
931 cksum -= (optr[1]<<8)|(TcpMssMax>>8);
932 cksum += (cksum>>16);
933 cksum &= 0xffff;
934 cksum += nhgets(optr+3);
935 cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
936 cksum += (cksum>>16);
937 } else {
938 cksum += mss;
939 cksum -= TcpMssMax;
940 cksum += (cksum>>16);
941 }
942 hnputs(tcphdr->cksum, cksum);
943 hnputs(optr+2, TcpMssMax);
944 }
945
946 /*
947 * process to read from the ethernet
948 */
949 static void
etherread(void * a)950 etherread(void *a)
951 {
952 Port *port = a;
953 Bridge *b = port->bridge;
954 Block *bp;
955 Etherpkt *ep;
956 Centry *ce;
957 long md, n;
958
959 qlock(b);
960 port->readp = up; /* hide identity under a rock for unbind */
961
962 while(!port->closed){
963 // release lock to read - error means it is time to quit
964 qunlock(b);
965 if(waserror()) {
966 print("etherread read error: %s\n", up->errstr);
967 qlock(b);
968 break;
969 }
970 bp = devtab[port->data[0]->type]->bread(port->data[0], MaxMTU, 0);
971 poperror();
972 qlock(b);
973 if(bp == nil)
974 break;
975 n = blocklen(bp);
976 if(port->closed || n < ETHERMINTU){
977 freeb(bp);
978 continue;
979 }
980 if(waserror()) {
981 // print("etherread bridge error\n");
982 freeb(bp);
983 continue;
984 }
985 port->in++;
986
987 ep = (Etherpkt*)bp->rp;
988 cacheupdate(b, ep->s, port->id);
989 if(b->tcpmss)
990 tcpmsshack(ep, n);
991
992 /*
993 * delay packets to simulate a slow link
994 */
995 if(b->delay0 != 0 || b->delayn != 0){
996 md = b->delay0 + b->delayn * n;
997 if(md > 0)
998 microdelay(md);
999 }
1000
1001 poperror(); /* must now dispose of bp */
1002
1003 if(ep->d[0] & 1) {
1004 log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1005 port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1006 port->inmulti++;
1007 ethermultiwrite(b, bp, port);
1008 } else {
1009 ce = cachelookup(b, ep->d);
1010 if(ce == nil) {
1011 b->miss++;
1012 port->inunknown++;
1013 ethermultiwrite(b, bp, port);
1014 }else if(ce->port != port->id){
1015 b->hit++;
1016 etherwrite(b->port[ce->port], bp);
1017 }else
1018 freeb(bp);
1019 }
1020 }
1021 // print("etherread: trying to exit\n");
1022 port->readp = nil;
1023 portfree(port);
1024 qunlock(b);
1025 pexit("hangup", 1);
1026 }
1027
1028 static int
fragment(Etherpkt * epkt,int n)1029 fragment(Etherpkt *epkt, int n)
1030 {
1031 Iphdr *iphdr;
1032
1033 if(n <= TunnelMtu)
1034 return 0;
1035
1036 /* ignore non-ipv4 packets */
1037 if(nhgets(epkt->type) != ETIP4)
1038 return 0;
1039 iphdr = (Iphdr*)(epkt->data);
1040 n -= ETHERHDRSIZE;
1041 /*
1042 * ignore: IP runt packets, bad packets (I don't handle IP
1043 * options for the moment), packets with don't-fragment set,
1044 * and short blocks.
1045 */
1046 if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1047 iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1048 return 0;
1049
1050 return 1;
1051 }
1052
1053 static void
etherwrite(Port * port,Block * bp)1054 etherwrite(Port *port, Block *bp)
1055 {
1056 Iphdr *eh, *feh;
1057 Etherpkt *epkt;
1058 int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1059 Block *xp, *nb;
1060 ushort fragoff, frag;
1061
1062 port->out++;
1063 epkt = (Etherpkt*)bp->rp;
1064 n = blocklen(bp);
1065 if(port->type != Ttun || !fragment(epkt, n)) {
1066 if(!waserror()){
1067 devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1068 poperror();
1069 }
1070 return;
1071 }
1072 port->outfrag++;
1073 if(waserror()){
1074 freeblist(bp);
1075 return;
1076 }
1077
1078 seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1079 eh = (Iphdr*)(epkt->data);
1080 len = nhgets(eh->length);
1081 frag = nhgets(eh->frag);
1082 mf = frag & IP_MF;
1083 frag <<= 3;
1084 dlen = len - IPHDR;
1085 xp = bp;
1086 lid = nhgets(eh->id);
1087 offset = ETHERHDRSIZE+IPHDR;
1088 while(xp != nil && offset && offset >= BLEN(xp)) {
1089 offset -= BLEN(xp);
1090 xp = xp->next;
1091 }
1092 xp->rp += offset;
1093
1094 if(0)
1095 print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1096 seglen, dlen, mf, frag);
1097 for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1098 nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1099
1100 feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1101
1102 memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1103 nb->wp += ETHERHDRSIZE+IPHDR;
1104
1105 if((fragoff + seglen) >= dlen) {
1106 seglen = dlen - fragoff;
1107 hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1108 }
1109 else
1110 hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1111
1112 hnputs(feh->length, seglen + IPHDR);
1113 hnputs(feh->id, lid);
1114
1115 /* Copy up the data area */
1116 chunk = seglen;
1117 while(chunk) {
1118 blklen = chunk;
1119 if(BLEN(xp) < chunk)
1120 blklen = BLEN(xp);
1121 memmove(nb->wp, xp->rp, blklen);
1122 nb->wp += blklen;
1123 xp->rp += blklen;
1124 chunk -= blklen;
1125 if(xp->rp == xp->wp)
1126 xp = xp->next;
1127 }
1128
1129 feh->cksum[0] = 0;
1130 feh->cksum[1] = 0;
1131 hnputs(feh->cksum, ipcsum(&feh->vihl));
1132
1133 /* don't generate small packets */
1134 if(BLEN(nb) < ETHERMINTU)
1135 nb->wp = nb->rp + ETHERMINTU;
1136 devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1137 }
1138 poperror();
1139 freeblist(bp);
1140 }
1141
1142 // hold b lock
1143 static void
portfree(Port * port)1144 portfree(Port *port)
1145 {
1146 if(decref(port) != 0)
1147 return;
1148
1149 if(port->data[0])
1150 cclose(port->data[0]);
1151 if(port->data[1])
1152 cclose(port->data[1]);
1153 memset(port, 0, sizeof(Port));
1154 free(port);
1155 }
1156
1157 Dev bridgedevtab = {
1158 'B',
1159 "bridge",
1160
1161 devreset,
1162 bridgeinit,
1163 devshutdown,
1164 bridgeattach,
1165 bridgewalk,
1166 bridgestat,
1167 bridgeopen,
1168 devcreate,
1169 bridgeclose,
1170 bridgeread,
1171 devbread,
1172 bridgewrite,
1173 devbwrite,
1174 devremove,
1175 devwstat,
1176 };
1177