xref: /plan9-contrib/sys/src/9/port/devbridge.c (revision a129eb931c3e454c10d1dd5b175b8baa61325928)
1 /*
2  * IPv4 Ethernet bridge
3  */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../ip/ip.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
12 
13 typedef struct Bridge 	Bridge;
14 typedef struct Port 	Port;
15 typedef struct Centry	Centry;
16 typedef struct Iphdr	Iphdr;
17 typedef struct Tcphdr	Tcphdr;
18 
19 enum
20 {
21 	Qtopdir=	1,		/* top level directory */
22 
23 	Qbridgedir,			/* bridge* directory */
24 	Qbctl,
25 	Qstats,
26 	Qcache,
27 	Qlog,
28 
29 	Qportdir,			/* directory for a protocol */
30 	Qpctl,
31 	Qlocal,
32 	Qstatus,
33 
34 	MaxQ,
35 
36 	Maxbridge=	4,
37 	Maxport=	128,		// power of 2
38 	CacheHash=	257,		// prime
39 	CacheLook=	5,		// how many cache entries to examine
40 	CacheSize=	(CacheHash+CacheLook-1),
41 	CacheTimeout=	5*60,		// timeout for cache entry in seconds
42 	MaxMTU=	IP_MAX,	// allow for jumbo frames and large UDP
43 
44 	TcpMssMax = 1300,		// max desirable Tcp MSS value
45 	TunnelMtu = 1400,
46 };
47 
48 static Dirtab bridgedirtab[]={
49 	"ctl",		{Qbctl},	0,	0666,
50 	"stats",	{Qstats},	0,	0444,
51 	"cache",	{Qcache},	0,	0444,
52 	"log",		{Qlog},		0,	0666,
53 };
54 
55 static Dirtab portdirtab[]={
56 	"ctl",		{Qpctl},	0,	0666,
57 	"local",	{Qlocal},	0,	0444,
58 	"status",	{Qstatus},	0,	0444,
59 };
60 
61 enum {
62 	Logcache=	(1<<0),
63 	Logmcast=	(1<<1),
64 };
65 
66 // types of interfaces
67 enum
68 {
69 	Tether,
70 	Ttun,
71 };
72 
73 static Logflag logflags[] =
74 {
75 	{ "cache",	Logcache, },
76 	{ "multicast",	Logmcast, },
77 	{ nil,		0, },
78 };
79 
80 static Dirtab	*dirtab[MaxQ];
81 
82 #define TYPE(x) 	(((ulong)(x).path) & 0xff)
83 #define PORT(x) 	((((ulong)(x).path) >> 8)&(Maxport-1))
84 #define QID(x, y) 	(((x)<<8) | (y))
85 
86 struct Centry
87 {
88 	uchar	d[Eaddrlen];
89 	int	port;
90 	long	expire;		// entry expires this many seconds after bootime
91 	long	src;
92 	long	dst;
93 };
94 
95 struct Bridge
96 {
97 	QLock;
98 	int	nport;
99 	Port	*port[Maxport];
100 	Centry	cache[CacheSize];
101 	ulong	hit;
102 	ulong	miss;
103 	ulong	copy;
104 	long	delay0;		// constant microsecond delay per packet
105 	long	delayn;		// microsecond delay per byte
106 	int	tcpmss;		// modify tcpmss value
107 
108 	Log;
109 };
110 
111 struct Port
112 {
113 	Ref;
114 	int	id;
115 	Bridge	*bridge;
116 	int	closed;
117 
118 	Chan	*data[2];	// channel to data
119 
120 	Proc	*readp;		// read proc
121 
122 	// the following uniquely identifies the port
123 	int	type;
124 	char	name[KNAMELEN];
125 
126 	// owner hash - avoids bind/unbind races
127 	ulong	ownhash;
128 
129 	// various stats
130 	int	in;		// number of packets read
131 	int	inmulti;	// multicast or broadcast
132 	int	inunknown;	// unknown address
133 	int	out;		// number of packets read
134 	int	outmulti;	// multicast or broadcast
135 	int	outunknown;	// unknown address
136 	int	outfrag;	// fragmented the packet
137 	int	nentry;		// number of cache entries for this port
138 };
139 
140 enum {
141 	IP_TCPPROTO	= 6,
142 	EOLOPT		= 0,
143 	NOOPOPT		= 1,
144 	MSSOPT		= 2,
145 	MSS_LENGTH	= 4,		/* Mean segment size */
146 	SYN		= 0x02,		/* Pkt. is synchronise */
147 	IPHDR		= 20,		/* sizeof(Iphdr) */
148 };
149 
150 struct Iphdr
151 {
152 	uchar	vihl;		/* Version and header length */
153 	uchar	tos;		/* Type of service */
154 	uchar	length[2];	/* packet length */
155 	uchar	id[2];		/* ip->identification */
156 	uchar	frag[2];	/* Fragment information */
157 	uchar	ttl;		/* Time to live */
158 	uchar	proto;		/* Protocol */
159 	uchar	cksum[2];	/* Header checksum */
160 	uchar	src[4];		/* IP source */
161 	uchar	dst[4];		/* IP destination */
162 };
163 
164 struct Tcphdr
165 {
166 	uchar	sport[2];
167 	uchar	dport[2];
168 	uchar	seq[4];
169 	uchar	ack[4];
170 	uchar	flag[2];
171 	uchar	win[2];
172 	uchar	cksum[2];
173 	uchar	urg[2];
174 };
175 
176 static Bridge bridgetab[Maxbridge];
177 
178 static int m2p[] = {
179 	[OREAD]		4,
180 	[OWRITE]	2,
181 	[ORDWR]		6
182 };
183 
184 static int	bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
185 static void	portbind(Bridge *b, int argc, char *argv[]);
186 static void	portunbind(Bridge *b, int argc, char *argv[]);
187 static void	etherread(void *a);
188 static char	*cachedump(Bridge *b);
189 static void	portfree(Port *port);
190 static void	cacheflushport(Bridge *b, int port);
191 static void	etherwrite(Port *port, Block *bp);
192 
193 static void
bridgeinit(void)194 bridgeinit(void)
195 {
196 	int i;
197 	Dirtab *dt;
198 
199 	// setup dirtab with non directory entries
200 	for(i=0; i<nelem(bridgedirtab); i++) {
201 		dt = bridgedirtab + i;
202 		dirtab[TYPE(dt->qid)] = dt;
203 	}
204 	for(i=0; i<nelem(portdirtab); i++) {
205 		dt = portdirtab + i;
206 		dirtab[TYPE(dt->qid)] = dt;
207 	}
208 }
209 
210 static Chan*
bridgeattach(char * spec)211 bridgeattach(char* spec)
212 {
213 	Chan *c;
214 	int dev;
215 
216 	dev = atoi(spec);
217 	if(dev<0 || dev >= Maxbridge)
218 		error("bad specification");
219 
220 	c = devattach('B', spec);
221 	mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
222 	c->dev = dev;
223 	return c;
224 }
225 
226 static Walkqid*
bridgewalk(Chan * c,Chan * nc,char ** name,int nname)227 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
228 {
229 	return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
230 }
231 
232 static int
bridgestat(Chan * c,uchar * db,int n)233 bridgestat(Chan* c, uchar* db, int n)
234 {
235 	return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
236 }
237 
238 static Chan*
bridgeopen(Chan * c,int omode)239 bridgeopen(Chan* c, int omode)
240 {
241 	int perm;
242 	Bridge *b;
243 
244 	omode &= 3;
245 	perm = m2p[omode];
246 	USED(perm);
247 
248 	b = bridgetab + c->dev;
249 	USED(b);
250 
251 	switch(TYPE(c->qid)) {
252 	default:
253 		break;
254 	case Qlog:
255 		logopen(b);
256 		break;
257 	case Qcache:
258 		c->aux = cachedump(b);
259 		break;
260 	}
261 	c->mode = openmode(omode);
262 	c->flag |= COPEN;
263 	c->offset = 0;
264 	return c;
265 }
266 
267 static void
bridgeclose(Chan * c)268 bridgeclose(Chan* c)
269 {
270 	Bridge *b  = bridgetab + c->dev;
271 
272 	switch(TYPE(c->qid)) {
273 	case Qcache:
274 		if(c->flag & COPEN)
275 			free(c->aux);
276 		break;
277 	case Qlog:
278 		if(c->flag & COPEN)
279 			logclose(b);
280 		break;
281 	}
282 }
283 
284 static long
bridgeread(Chan * c,void * a,long n,vlong off)285 bridgeread(Chan *c, void *a, long n, vlong off)
286 {
287 	char buf[256];
288 	Bridge *b = bridgetab + c->dev;
289 	Port *port;
290 	int i, ingood, outgood;
291 
292 	USED(off);
293 	switch(TYPE(c->qid)) {
294 	default:
295 		error(Egreg);
296 	case Qtopdir:
297 	case Qbridgedir:
298 	case Qportdir:
299 		return devdirread(c, a, n, 0, 0, bridgegen);
300 	case Qlog:
301 		return logread(b, a, off, n);
302 	case Qlocal:
303 		return 0;	/* TO DO */
304 	case Qstatus:
305 		qlock(b);
306 		if(waserror()){
307 			qunlock(b);
308 			nexterror();
309 		}
310 		port = b->port[PORT(c->qid)];
311 		if(port == 0)
312 			strcpy(buf, "unbound\n");
313 		else {
314 			i = 0;
315 			switch(port->type) {
316 			default:
317 				panic("bridgeread: unknown port type: %d",
318 					port->type);
319 			case Tether:
320 				i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
321 				break;
322 			case Ttun:
323 				i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
324 				break;
325 			}
326 			ingood = port->in - port->inmulti - port->inunknown;
327 			outgood = port->out - port->outmulti - port->outunknown;
328 			snprint(buf+i, sizeof(buf)-i,
329 				"in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
330 				port->in, ingood, port->inmulti, port->inunknown,
331 				port->out, outgood, port->outmulti,
332 				port->outunknown, port->outfrag);
333 		}
334 		poperror();
335 		qunlock(b);
336 		return readstr(off, a, n, buf);
337 	case Qbctl:
338 		snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
339 			b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
340 		n = readstr(off, a, n, buf);
341 		return n;
342 	case Qcache:
343 		n = readstr(off, a, n, c->aux);
344 		return n;
345 	case Qstats:
346 		snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
347 			b->hit, b->miss, b->copy);
348 		n = readstr(off, a, n, buf);
349 		return n;
350 	}
351 }
352 
353 static void
bridgeoption(Bridge * b,char * option,int value)354 bridgeoption(Bridge *b, char *option, int value)
355 {
356 	if(strcmp(option, "tcpmss") == 0)
357 		b->tcpmss = value;
358 	else
359 		error("unknown bridge option");
360 }
361 
362 
363 static long
bridgewrite(Chan * c,void * a,long n,vlong off)364 bridgewrite(Chan *c, void *a, long n, vlong off)
365 {
366 	Bridge *b = bridgetab + c->dev;
367 	Cmdbuf *cb;
368 	char *arg0, *p;
369 
370 	USED(off);
371 	switch(TYPE(c->qid)) {
372 	default:
373 		error(Eperm);
374 	case Qbctl:
375 		cb = parsecmd(a, n);
376 		qlock(b);
377 		if(waserror()) {
378 			qunlock(b);
379 			free(cb);
380 			nexterror();
381 		}
382 		if(cb->nf == 0)
383 			error("short write");
384 		arg0 = cb->f[0];
385 		if(strcmp(arg0, "bind") == 0) {
386 			portbind(b, cb->nf-1, cb->f+1);
387 		} else if(strcmp(arg0, "unbind") == 0) {
388 			portunbind(b, cb->nf-1, cb->f+1);
389 		} else if(strcmp(arg0, "cacheflush") == 0) {
390 			log(b, Logcache, "cache flush\n");
391 			memset(b->cache, 0, CacheSize*sizeof(Centry));
392 		} else if(strcmp(arg0, "set") == 0) {
393 			if(cb->nf != 2)
394 				error("usage: set option");
395 			bridgeoption(b, cb->f[1], 1);
396 		} else if(strcmp(arg0, "clear") == 0) {
397 			if(cb->nf != 2)
398 				error("usage: clear option");
399 			bridgeoption(b, cb->f[1], 0);
400 		} else if(strcmp(arg0, "delay") == 0) {
401 			if(cb->nf != 3)
402 				error("usage: delay delay0 delayn");
403 			b->delay0 = strtol(cb->f[1], nil, 10);
404 			b->delayn = strtol(cb->f[2], nil, 10);
405 		} else
406 			error("unknown control request");
407 		poperror();
408 		qunlock(b);
409 		free(cb);
410 		return n;
411 	case Qlog:
412 		cb = parsecmd(a, n);
413 		p = logctl(b, cb->nf, cb->f, logflags);
414 		free(cb);
415 		if(p != nil)
416 			error(p);
417 		return n;
418 	}
419 }
420 
421 static int
bridgegen(Chan * c,char *,Dirtab *,int,int s,Dir * dp)422 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
423 {
424 	Bridge *b = bridgetab + c->dev;
425 	int type = TYPE(c->qid);
426 	Dirtab *dt;
427 	Qid qid;
428 
429 	if(s  == DEVDOTDOT){
430 		switch(TYPE(c->qid)){
431 		case Qtopdir:
432 		case Qbridgedir:
433 			snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
434 			mkqid(&qid, Qtopdir, 0, QTDIR);
435 			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
436 			break;
437 		case Qportdir:
438 			snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
439 			mkqid(&qid, Qbridgedir, 0, QTDIR);
440 			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
441 			break;
442 		default:
443 			panic("bridgewalk %llux", c->qid.path);
444 		}
445 		return 1;
446 	}
447 
448 	switch(type) {
449 	default:
450 		/* non-directory entries end up here */
451 		if(c->qid.type & QTDIR)
452 			panic("bridgegen: unexpected directory");
453 		if(s != 0)
454 			return -1;
455 		dt = dirtab[TYPE(c->qid)];
456 		if(dt == nil)
457 			panic("bridgegen: unknown type: %lud", TYPE(c->qid));
458 		devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
459 		return 1;
460 	case Qtopdir:
461 		if(s != 0)
462 			return -1;
463 		snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
464 		mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
465 		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
466 		return 1;
467 	case Qbridgedir:
468 		if(s<nelem(bridgedirtab)) {
469 			dt = bridgedirtab+s;
470 			devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
471 			return 1;
472 		}
473 		s -= nelem(bridgedirtab);
474 		if(s >= b->nport)
475 			return -1;
476 		mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
477 		snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
478 		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
479 		return 1;
480 	case Qportdir:
481 		if(s>=nelem(portdirtab))
482 			return -1;
483 		dt = portdirtab+s;
484 		mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
485 		devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
486 		return 1;
487 	}
488 }
489 
490 // parse mac address; also in netif.c
491 static int
parseaddr(uchar * to,char * from,int alen)492 parseaddr(uchar *to, char *from, int alen)
493 {
494 	char nip[4];
495 	char *p;
496 	int i;
497 
498 	p = from;
499 	for(i = 0; i < alen; i++){
500 		if(*p == 0)
501 			return -1;
502 		nip[0] = *p++;
503 		if(*p == 0)
504 			return -1;
505 		nip[1] = *p++;
506 		nip[2] = 0;
507 		to[i] = strtoul(nip, 0, 16);
508 		if(*p == ':')
509 			p++;
510 	}
511 	return 0;
512 }
513 
514 // assumes b is locked
515 static void
portbind(Bridge * b,int argc,char * argv[])516 portbind(Bridge *b, int argc, char *argv[])
517 {
518 	Port *port;
519 	Chan *ctl;
520 	int type = 0, i, n;
521 	ulong ownhash;
522 	char *dev, *dev2 = nil;
523 	char buf[100], name[KNAMELEN], path[8*KNAMELEN];
524 	static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
525 
526 	memset(name, 0, KNAMELEN);
527 	if(argc < 4)
528 		error(usage);
529 	if(strcmp(argv[0], "ether") == 0) {
530 		if(argc != 4)
531 			error(usage);
532 		type = Tether;
533 		strncpy(name, argv[1], KNAMELEN);
534 		name[KNAMELEN-1] = 0;
535 //		parseaddr(addr, argv[1], Eaddrlen);
536 	} else if(strcmp(argv[0], "tunnel") == 0) {
537 		if(argc != 5)
538 			error(usage);
539 		type = Ttun;
540 		strncpy(name, argv[1], KNAMELEN);
541 		name[KNAMELEN-1] = 0;
542 //		parseip(addr, argv[1]);
543 		dev2 = argv[4];
544 	} else
545 		error(usage);
546 	ownhash = atoi(argv[2]);
547 	dev = argv[3];
548 	for(i=0; i<b->nport; i++) {
549 		port = b->port[i];
550 		if(port != nil && port->type == type &&
551 		    memcmp(port->name, name, KNAMELEN) == 0)
552 			error("port in use");
553 	}
554 	for(i=0; i<Maxport; i++)
555 		if(b->port[i] == nil)
556 			break;
557 	if(i == Maxport)
558 		error("no more ports");
559 	port = smalloc(sizeof(Port));
560 	port->ref = 1;
561 	port->id = i;
562 	port->ownhash = ownhash;
563 
564 	if(waserror()) {
565 		portfree(port);
566 		nexterror();
567 	}
568 	port->type = type;
569 	memmove(port->name, name, KNAMELEN);
570 	switch(port->type) {
571 	default:
572 		panic("portbind: unknown port type: %d", type);
573 	case Tether:
574 		snprint(path, sizeof(path), "%s/clone", dev);
575 		ctl = namec(path, Aopen, ORDWR, 0);
576 		if(waserror()) {
577 			cclose(ctl);
578 			nexterror();
579 		}
580 		// check addr?
581 
582 		// get directory name
583 		n = devtab[ctl->type]->read(ctl, buf, sizeof(buf)-1, 0);
584 		buf[n] = 0;
585 		snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(buf, 0, 0));
586 
587 		// setup connection to be promiscuous
588 		snprint(buf, sizeof(buf), "connect -1");
589 		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590 		snprint(buf, sizeof(buf), "promiscuous");
591 		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
592 		snprint(buf, sizeof(buf), "bridge");
593 		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
594 
595 		// open data port
596 		port->data[0] = namec(path, Aopen, ORDWR, 0);
597 		// dup it
598 		incref(port->data[0]);
599 		port->data[1] = port->data[0];
600 
601 		poperror();
602 		cclose(ctl);
603 
604 		break;
605 	case Ttun:
606 		port->data[0] = namec(dev, Aopen, OREAD, 0);
607 		port->data[1] = namec(dev2, Aopen, OWRITE, 0);
608 		break;
609 	}
610 
611 	poperror();
612 
613 	/* committed to binding port */
614 	b->port[port->id] = port;
615 	port->bridge = b;
616 	if(b->nport <= port->id)
617 		b->nport = port->id+1;
618 
619 	// assumes kproc always succeeds
620 	incref(port);
621 	snprint(buf, sizeof(buf), "bridge:%s", dev);
622 	kproc(buf, etherread, port);
623 }
624 
625 // assumes b is locked
626 static void
portunbind(Bridge * b,int argc,char * argv[])627 portunbind(Bridge *b, int argc, char *argv[])
628 {
629 	int type = 0, i;
630 	char name[KNAMELEN];
631 	ulong ownhash;
632 	Port *port = nil;
633 	static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
634 
635 	memset(name, 0, KNAMELEN);
636 	if(argc < 2 || argc > 3)
637 		error(usage);
638 	if(strcmp(argv[0], "ether") == 0) {
639 		type = Tether;
640 		strncpy(name, argv[1], KNAMELEN);
641 		name[KNAMELEN-1] = 0;
642 //		parseaddr(addr, argv[1], Eaddrlen);
643 	} else if(strcmp(argv[0], "tunnel") == 0) {
644 		type = Ttun;
645 		strncpy(name, argv[1], KNAMELEN);
646 		name[KNAMELEN-1] = 0;
647 //		parseip(addr, argv[1]);
648 	} else
649 		error(usage);
650 	if(argc == 3)
651 		ownhash = atoi(argv[2]);
652 	else
653 		ownhash = 0;
654 	for(i=0; i<b->nport; i++) {
655 		port = b->port[i];
656 		if(port != nil && port->type == type &&
657 		    memcmp(port->name, name, KNAMELEN) == 0)
658 			break;
659 	}
660 	if(i == b->nport)
661 		error("port not found");
662 	if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
663 		error("bad owner hash");
664 
665 	port->closed = 1;
666 	b->port[i] = nil;	// port is now unbound
667 	cacheflushport(b, i);
668 
669 	// try and stop reader
670 	if(port->readp)
671 		postnote(port->readp, 1, "unbind", 0);
672 	portfree(port);
673 }
674 
675 // assumes b is locked
676 static Centry *
cachelookup(Bridge * b,uchar d[Eaddrlen])677 cachelookup(Bridge *b, uchar d[Eaddrlen])
678 {
679 	int i;
680 	uint h;
681 	Centry *p;
682 	long sec;
683 
684 	// dont cache multicast or broadcast
685 	if(d[0] & 1)
686 		return 0;
687 
688 	h = 0;
689 	for(i=0; i<Eaddrlen; i++) {
690 		h *= 7;
691 		h += d[i];
692 	}
693 	h %= CacheHash;
694 	p = b->cache + h;
695 	sec = TK2SEC(m->ticks);
696 	for(i=0; i<CacheLook; i++,p++) {
697 		if(memcmp(d, p->d, Eaddrlen) == 0) {
698 			p->dst++;
699 			if(sec >= p->expire) {
700 				log(b, Logcache, "expired cache entry: %E %d\n",
701 					d, p->port);
702 				return nil;
703 			}
704 			p->expire = sec + CacheTimeout;
705 			return p;
706 		}
707 	}
708 	log(b, Logcache, "cache miss: %E\n", d);
709 	return nil;
710 }
711 
712 // assumes b is locked
713 static void
cacheupdate(Bridge * b,uchar d[Eaddrlen],int port)714 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
715 {
716 	int i;
717 	uint h;
718 	Centry *p, *pp;
719 	long sec;
720 
721 	// dont cache multicast or broadcast
722 	if(d[0] & 1) {
723 		log(b, Logcache, "bad source address: %E\n", d);
724 		return;
725 	}
726 
727 	h = 0;
728 	for(i=0; i<Eaddrlen; i++) {
729 		h *= 7;
730 		h += d[i];
731 	}
732 	h %= CacheHash;
733 	p = b->cache + h;
734 	pp = p;
735 	sec = p->expire;
736 
737 	// look for oldest entry
738 	for(i=0; i<CacheLook; i++,p++) {
739 		if(memcmp(p->d, d, Eaddrlen) == 0) {
740 			p->expire = TK2SEC(m->ticks) + CacheTimeout;
741 			if(p->port != port) {
742 				log(b, Logcache, "NIC changed port %d->%d: %E\n",
743 					p->port, port, d);
744 				p->port = port;
745 			}
746 			p->src++;
747 			return;
748 		}
749 		if(p->expire < sec) {
750 			sec = p->expire;
751 			pp = p;
752 		}
753 	}
754 	if(pp->expire != 0)
755 		log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
756 	pp->expire = TK2SEC(m->ticks) + CacheTimeout;
757 	memmove(pp->d, d, Eaddrlen);
758 	pp->port = port;
759 	pp->src = 1;
760 	pp->dst = 0;
761 	log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
762 }
763 
764 // assumes b is locked
765 static void
cacheflushport(Bridge * b,int port)766 cacheflushport(Bridge *b, int port)
767 {
768 	Centry *ce;
769 	int i;
770 
771 	ce = b->cache;
772 	for(i=0; i<CacheSize; i++,ce++) {
773 		if(ce->port != port)
774 			continue;
775 		memset(ce, 0, sizeof(Centry));
776 	}
777 }
778 
779 static char *
cachedump(Bridge * b)780 cachedump(Bridge *b)
781 {
782 	int i, n;
783 	long sec, off;
784 	char *buf, *p, *ep;
785 	Centry *ce;
786 	char c;
787 
788 	qlock(b);
789 	if(waserror()) {
790 		qunlock(b);
791 		nexterror();
792 	}
793 	sec = TK2SEC(m->ticks);
794 	n = 0;
795 	for(i=0; i<CacheSize; i++)
796 		if(b->cache[i].expire != 0)
797 			n++;
798 
799 	n *= 51;	// change if print format is changed
800 	n += 10;	// some slop at the end
801 	buf = malloc(n);
802 	if(buf == nil)
803 		error(Enomem);
804 	p = buf;
805 	ep = buf + n;
806 	ce = b->cache;
807 	off = seconds() - sec;
808 	for(i=0; i<CacheSize; i++,ce++) {
809 		if(ce->expire == 0)
810 			continue;
811 		c = (sec < ce->expire)?'v':'e';
812 		p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
813 			ce->port, ce->src, ce->dst, ce->expire+off, c);
814 	}
815 	*p = 0;
816 	poperror();
817 	qunlock(b);
818 
819 	return buf;
820 }
821 
822 
823 
824 // assumes b is locked, no error return
825 static void
ethermultiwrite(Bridge * b,Block * bp,Port * port)826 ethermultiwrite(Bridge *b, Block *bp, Port *port)
827 {
828 	Port *oport;
829 	Etherpkt *ep;
830 	int i, mcast;
831 
832 	ep = (Etherpkt*)bp->rp;
833 	mcast = ep->d[0] & 1;		/* multicast bit of ethernet address */
834 
835 	oport = nil;
836 	for(i=0; i<b->nport; i++) {
837 		if(i == port->id || b->port[i] == nil)
838 			continue;
839 		/*
840 		 * we need to forward multicast packets for ipv6,
841 		 * so always do it.
842 		 */
843 		if(mcast)
844 			b->port[i]->outmulti++;
845 		else
846 			b->port[i]->outunknown++;
847 
848 		// delay one so that the last write does not copy
849 		if(oport != nil) {
850 			b->copy++;
851 			etherwrite(oport, copyblock(bp, blocklen(bp)));
852 		}
853 		oport = b->port[i];
854 	}
855 
856 	// last write free block
857 	if(oport)
858 		etherwrite(oport, bp);
859 	else
860 		freeb(bp);
861 }
862 
863 static void
tcpmsshack(Etherpkt * epkt,int n)864 tcpmsshack(Etherpkt *epkt, int n)
865 {
866 	int hl, optlen;
867 	Iphdr *iphdr;
868 	Tcphdr *tcphdr;
869 	ulong mss, cksum;
870 	uchar *optr;
871 
872 	/* ignore non-ipv4 packets */
873 	if(nhgets(epkt->type) != ETIP4)
874 		return;
875 	iphdr = (Iphdr*)(epkt->data);
876 	n -= ETHERHDRSIZE;
877 	if(n < IPHDR)
878 		return;
879 
880 	/* ignore bad packets */
881 	if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
882 		hl = (iphdr->vihl&0xF)<<2;
883 		if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
884 			return;
885 	} else
886 		hl = IP_HLEN4<<2;
887 
888 	/* ignore non-tcp packets */
889 	if(iphdr->proto != IP_TCPPROTO)
890 		return;
891 	n -= hl;
892 	if(n < sizeof(Tcphdr))
893 		return;
894 	tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
895 	// MSS can only appear in SYN packet
896 	if(!(tcphdr->flag[1] & SYN))
897 		return;
898 	hl = (tcphdr->flag[0] & 0xf0)>>2;
899 	if(n < hl)
900 		return;
901 
902 	// check for MSS option
903 	optr = (uchar*)tcphdr + sizeof(Tcphdr);
904 	n = hl - sizeof(Tcphdr);
905 	for(;;) {
906 		if(n <= 0 || *optr == EOLOPT)
907 			return;
908 		if(*optr == NOOPOPT) {
909 			n--;
910 			optr++;
911 			continue;
912 		}
913 		optlen = optr[1];
914 		if(optlen < 2 || optlen > n)
915 			return;
916 		if(*optr == MSSOPT && optlen == MSS_LENGTH)
917 			break;
918 		n -= optlen;
919 		optr += optlen;
920 	}
921 
922 	mss = nhgets(optr+2);
923 	if(mss <= TcpMssMax)
924 		return;
925 	// fit checksum
926 	cksum = nhgets(tcphdr->cksum);
927 	if(optr-(uchar*)tcphdr & 1) {
928 print("tcpmsshack: odd alignment!\n");
929 		// odd alignments are a pain
930 		cksum += nhgets(optr+1);
931 		cksum -= (optr[1]<<8)|(TcpMssMax>>8);
932 		cksum += (cksum>>16);
933 		cksum &= 0xffff;
934 		cksum += nhgets(optr+3);
935 		cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
936 		cksum += (cksum>>16);
937 	} else {
938 		cksum += mss;
939 		cksum -= TcpMssMax;
940 		cksum += (cksum>>16);
941 	}
942 	hnputs(tcphdr->cksum, cksum);
943 	hnputs(optr+2, TcpMssMax);
944 }
945 
946 /*
947  *  process to read from the ethernet
948  */
949 static void
etherread(void * a)950 etherread(void *a)
951 {
952 	Port *port = a;
953 	Bridge *b = port->bridge;
954 	Block *bp;
955 	Etherpkt *ep;
956 	Centry *ce;
957 	long md, n;
958 
959 	qlock(b);
960 	port->readp = up;	/* hide identity under a rock for unbind */
961 
962 	while(!port->closed){
963 		// release lock to read - error means it is time to quit
964 		qunlock(b);
965 		if(waserror()) {
966 			print("etherread read error: %s\n", up->errstr);
967 			qlock(b);
968 			break;
969 		}
970 		bp = devtab[port->data[0]->type]->bread(port->data[0], MaxMTU, 0);
971 		poperror();
972 		qlock(b);
973 		if(bp == nil)
974 			break;
975 		n = blocklen(bp);
976 		if(port->closed || n < ETHERMINTU){
977 			freeb(bp);
978 			continue;
979 		}
980 		if(waserror()) {
981 //			print("etherread bridge error\n");
982 			freeb(bp);
983 			continue;
984 		}
985 		port->in++;
986 
987 		ep = (Etherpkt*)bp->rp;
988 		cacheupdate(b, ep->s, port->id);
989 		if(b->tcpmss)
990 			tcpmsshack(ep, n);
991 
992 		/*
993 		 * delay packets to simulate a slow link
994 		 */
995 		if(b->delay0 != 0 || b->delayn != 0){
996 			md = b->delay0 + b->delayn * n;
997 			if(md > 0)
998 				microdelay(md);
999 		}
1000 
1001 		poperror();	/* must now dispose of bp */
1002 
1003 		if(ep->d[0] & 1) {
1004 			log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1005 				port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1006 			port->inmulti++;
1007 			ethermultiwrite(b, bp, port);
1008 		} else {
1009 			ce = cachelookup(b, ep->d);
1010 			if(ce == nil) {
1011 				b->miss++;
1012 				port->inunknown++;
1013 				ethermultiwrite(b, bp, port);
1014 			}else if(ce->port != port->id){
1015 				b->hit++;
1016 				etherwrite(b->port[ce->port], bp);
1017 			}else
1018 				freeb(bp);
1019 		}
1020 	}
1021 //	print("etherread: trying to exit\n");
1022 	port->readp = nil;
1023 	portfree(port);
1024 	qunlock(b);
1025 	pexit("hangup", 1);
1026 }
1027 
1028 static int
fragment(Etherpkt * epkt,int n)1029 fragment(Etherpkt *epkt, int n)
1030 {
1031 	Iphdr *iphdr;
1032 
1033 	if(n <= TunnelMtu)
1034 		return 0;
1035 
1036 	/* ignore non-ipv4 packets */
1037 	if(nhgets(epkt->type) != ETIP4)
1038 		return 0;
1039 	iphdr = (Iphdr*)(epkt->data);
1040 	n -= ETHERHDRSIZE;
1041 	/*
1042 	 * ignore: IP runt packets, bad packets (I don't handle IP
1043 	 * options for the moment), packets with don't-fragment set,
1044 	 * and short blocks.
1045 	 */
1046 	if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1047 	    iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1048 		return 0;
1049 
1050 	return 1;
1051 }
1052 
1053 static void
etherwrite(Port * port,Block * bp)1054 etherwrite(Port *port, Block *bp)
1055 {
1056 	Iphdr *eh, *feh;
1057 	Etherpkt *epkt;
1058 	int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1059 	Block *xp, *nb;
1060 	ushort fragoff, frag;
1061 
1062 	port->out++;
1063 	epkt = (Etherpkt*)bp->rp;
1064 	n = blocklen(bp);
1065 	if(port->type != Ttun || !fragment(epkt, n)) {
1066 		if(!waserror()){
1067 			devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1068 			poperror();
1069 		}
1070 		return;
1071 	}
1072 	port->outfrag++;
1073 	if(waserror()){
1074 		freeblist(bp);
1075 		return;
1076 	}
1077 
1078 	seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1079 	eh = (Iphdr*)(epkt->data);
1080 	len = nhgets(eh->length);
1081 	frag = nhgets(eh->frag);
1082 	mf = frag & IP_MF;
1083 	frag <<= 3;
1084 	dlen = len - IPHDR;
1085 	xp = bp;
1086 	lid = nhgets(eh->id);
1087 	offset = ETHERHDRSIZE+IPHDR;
1088 	while(xp != nil && offset && offset >= BLEN(xp)) {
1089 		offset -= BLEN(xp);
1090 		xp = xp->next;
1091 	}
1092 	xp->rp += offset;
1093 
1094 	if(0)
1095 		print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1096 			seglen, dlen, mf, frag);
1097 	for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1098 		nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1099 
1100 		feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1101 
1102 		memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1103 		nb->wp += ETHERHDRSIZE+IPHDR;
1104 
1105 		if((fragoff + seglen) >= dlen) {
1106 			seglen = dlen - fragoff;
1107 			hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1108 		}
1109 		else
1110 			hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1111 
1112 		hnputs(feh->length, seglen + IPHDR);
1113 		hnputs(feh->id, lid);
1114 
1115 		/* Copy up the data area */
1116 		chunk = seglen;
1117 		while(chunk) {
1118 			blklen = chunk;
1119 			if(BLEN(xp) < chunk)
1120 				blklen = BLEN(xp);
1121 			memmove(nb->wp, xp->rp, blklen);
1122 			nb->wp += blklen;
1123 			xp->rp += blklen;
1124 			chunk -= blklen;
1125 			if(xp->rp == xp->wp)
1126 				xp = xp->next;
1127 		}
1128 
1129 		feh->cksum[0] = 0;
1130 		feh->cksum[1] = 0;
1131 		hnputs(feh->cksum, ipcsum(&feh->vihl));
1132 
1133 		/* don't generate small packets */
1134 		if(BLEN(nb) < ETHERMINTU)
1135 			nb->wp = nb->rp + ETHERMINTU;
1136 		devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1137 	}
1138 	poperror();
1139 	freeblist(bp);
1140 }
1141 
1142 // hold b lock
1143 static void
portfree(Port * port)1144 portfree(Port *port)
1145 {
1146 	if(decref(port) != 0)
1147 		return;
1148 
1149 	if(port->data[0])
1150 		cclose(port->data[0]);
1151 	if(port->data[1])
1152 		cclose(port->data[1]);
1153 	memset(port, 0, sizeof(Port));
1154 	free(port);
1155 }
1156 
1157 Dev bridgedevtab = {
1158 	'B',
1159 	"bridge",
1160 
1161 	devreset,
1162 	bridgeinit,
1163 	devshutdown,
1164 	bridgeattach,
1165 	bridgewalk,
1166 	bridgestat,
1167 	bridgeopen,
1168 	devcreate,
1169 	bridgeclose,
1170 	bridgeread,
1171 	devbread,
1172 	bridgewrite,
1173 	devbwrite,
1174 	devremove,
1175 	devwstat,
1176 };
1177