xref: /plan9/sys/src/9/port/devbridge.c (revision aa72973a2891ccbd3fb042462446761159389e19)
1 /*
2  * IPv4 Ethernet bridge
3  */
4 #include "u.h"
5 #include "../port/lib.h"
6 #include "mem.h"
7 #include "dat.h"
8 #include "fns.h"
9 #include "../ip/ip.h"
10 #include "../port/netif.h"
11 #include "../port/error.h"
12 
13 typedef struct Bridge 	Bridge;
14 typedef struct Port 	Port;
15 typedef struct Centry	Centry;
16 typedef struct Iphdr	Iphdr;
17 typedef struct Tcphdr	Tcphdr;
18 
19 enum
20 {
21 	Qtopdir=	1,		/* top level directory */
22 
23 	Qbridgedir,			/* bridge* directory */
24 	Qbctl,
25 	Qstats,
26 	Qcache,
27 	Qlog,
28 
29 	Qportdir,			/* directory for a protocol */
30 	Qpctl,
31 	Qlocal,
32 	Qstatus,
33 
34 	MaxQ,
35 
36 	Maxbridge=	4,
37 	Maxport=	128,		// power of 2
38 	CacheHash=	257,		// prime
39 	CacheLook=	5,		// how many cache entries to examine
40 	CacheSize=	(CacheHash+CacheLook-1),
41 	CacheTimeout=	5*60,		// timeout for cache entry in seconds
42 
43 	TcpMssMax = 1300,		// max desirable Tcp MSS value
44 	TunnelMtu = 1400,
45 };
46 
47 static Dirtab bridgedirtab[]={
48 	"ctl",		{Qbctl},	0,	0666,
49 	"stats",	{Qstats},	0,	0444,
50 	"cache",	{Qcache},	0,	0444,
51 	"log",		{Qlog},		0,	0666,
52 };
53 
54 static Dirtab portdirtab[]={
55 	"ctl",		{Qpctl},	0,	0666,
56 	"local",	{Qlocal},	0,	0444,
57 	"status",	{Qstatus},	0,	0444,
58 };
59 
60 enum {
61 	Logcache=	(1<<0),
62 	Logmcast=	(1<<1),
63 };
64 
65 // types of interfaces
66 enum
67 {
68 	Tether,
69 	Ttun,
70 };
71 
72 static Logflag logflags[] =
73 {
74 	{ "cache",	Logcache, },
75 	{ "multicast",	Logmcast, },
76 	{ nil,		0, },
77 };
78 
79 static Dirtab	*dirtab[MaxQ];
80 
81 #define TYPE(x) 	(((ulong)(x).path) & 0xff)
82 #define PORT(x) 	((((ulong)(x).path) >> 8)&(Maxport-1))
83 #define QID(x, y) 	(((x)<<8) | (y))
84 
85 struct Centry
86 {
87 	uchar	d[Eaddrlen];
88 	int	port;
89 	long	expire;		// entry expires this many seconds after bootime
90 	long	src;
91 	long	dst;
92 };
93 
94 struct Bridge
95 {
96 	QLock;
97 	int	nport;
98 	Port	*port[Maxport];
99 	Centry	cache[CacheSize];
100 	ulong	hit;
101 	ulong	miss;
102 	ulong	copy;
103 	long	delay0;		// constant microsecond delay per packet
104 	long	delayn;		// microsecond delay per byte
105 	int	tcpmss;		// modify tcpmss value
106 
107 	Log;
108 };
109 
110 struct Port
111 {
112 	int	id;
113 	Bridge	*bridge;
114 	int	ref;
115 	int	closed;
116 
117 	Chan	*data[2];	// channel to data
118 
119 	Proc	*readp;		// read proc
120 
121 	// the following uniquely identifies the port
122 	int	type;
123 	char	name[KNAMELEN];
124 
125 	// owner hash - avoids bind/unbind races
126 	ulong	ownhash;
127 
128 	// various stats
129 	int	in;		// number of packets read
130 	int	inmulti;	// multicast or broadcast
131 	int	inunknown;	// unknown address
132 	int	out;		// number of packets read
133 	int	outmulti;	// multicast or broadcast
134 	int	outunknown;	// unknown address
135 	int	outfrag;	// fragmented the packet
136 	int	nentry;		// number of cache entries for this port
137 };
138 
139 enum {
140 	IP_TCPPROTO	= 6,
141 	EOLOPT		= 0,
142 	NOOPOPT		= 1,
143 	MSSOPT		= 2,
144 	MSS_LENGTH	= 4,		/* Mean segment size */
145 	SYN		= 0x02,		/* Pkt. is synchronise */
146 	IPHDR		= 20,		/* sizeof(Iphdr) */
147 };
148 
149 struct Iphdr
150 {
151 	uchar	vihl;		/* Version and header length */
152 	uchar	tos;		/* Type of service */
153 	uchar	length[2];	/* packet length */
154 	uchar	id[2];		/* ip->identification */
155 	uchar	frag[2];	/* Fragment information */
156 	uchar	ttl;		/* Time to live */
157 	uchar	proto;		/* Protocol */
158 	uchar	cksum[2];	/* Header checksum */
159 	uchar	src[4];		/* IP source */
160 	uchar	dst[4];		/* IP destination */
161 };
162 
163 struct Tcphdr
164 {
165 	uchar	sport[2];
166 	uchar	dport[2];
167 	uchar	seq[4];
168 	uchar	ack[4];
169 	uchar	flag[2];
170 	uchar	win[2];
171 	uchar	cksum[2];
172 	uchar	urg[2];
173 };
174 
175 static Bridge bridgetab[Maxbridge];
176 
177 static int m2p[] = {
178 	[OREAD]		4,
179 	[OWRITE]	2,
180 	[ORDWR]		6
181 };
182 
183 static int	bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
184 static void	portbind(Bridge *b, int argc, char *argv[]);
185 static void	portunbind(Bridge *b, int argc, char *argv[]);
186 static void	etherread(void *a);
187 static char	*cachedump(Bridge *b);
188 static void	portfree(Port *port);
189 static void	cacheflushport(Bridge *b, int port);
190 static void	etherwrite(Port *port, Block *bp);
191 
192 static void
bridgeinit(void)193 bridgeinit(void)
194 {
195 	int i;
196 	Dirtab *dt;
197 
198 	// setup dirtab with non directory entries
199 	for(i=0; i<nelem(bridgedirtab); i++) {
200 		dt = bridgedirtab + i;
201 		dirtab[TYPE(dt->qid)] = dt;
202 	}
203 	for(i=0; i<nelem(portdirtab); i++) {
204 		dt = portdirtab + i;
205 		dirtab[TYPE(dt->qid)] = dt;
206 	}
207 }
208 
209 static Chan*
bridgeattach(char * spec)210 bridgeattach(char* spec)
211 {
212 	Chan *c;
213 	int dev;
214 
215 	dev = atoi(spec);
216 	if(dev<0 || dev >= Maxbridge)
217 		error("bad specification");
218 
219 	c = devattach('B', spec);
220 	mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR);
221 	c->dev = dev;
222 	return c;
223 }
224 
225 static Walkqid*
bridgewalk(Chan * c,Chan * nc,char ** name,int nname)226 bridgewalk(Chan *c, Chan *nc, char **name, int nname)
227 {
228 	return devwalk(c, nc, name, nname, (Dirtab*)0, 0, bridgegen);
229 }
230 
231 static int
bridgestat(Chan * c,uchar * db,int n)232 bridgestat(Chan* c, uchar* db, int n)
233 {
234 	return devstat(c, db, n, (Dirtab *)0, 0L, bridgegen);
235 }
236 
237 static Chan*
bridgeopen(Chan * c,int omode)238 bridgeopen(Chan* c, int omode)
239 {
240 	int perm;
241 	Bridge *b;
242 
243 	omode &= 3;
244 	perm = m2p[omode];
245 	USED(perm);
246 
247 	b = bridgetab + c->dev;
248 	USED(b);
249 
250 	switch(TYPE(c->qid)) {
251 	default:
252 		break;
253 	case Qlog:
254 		logopen(b);
255 		break;
256 	case Qcache:
257 		c->aux = cachedump(b);
258 		break;
259 	}
260 	c->mode = openmode(omode);
261 	c->flag |= COPEN;
262 	c->offset = 0;
263 	return c;
264 }
265 
266 static void
bridgeclose(Chan * c)267 bridgeclose(Chan* c)
268 {
269 	Bridge *b  = bridgetab + c->dev;
270 
271 	switch(TYPE(c->qid)) {
272 	case Qcache:
273 		if(c->flag & COPEN)
274 			free(c->aux);
275 		break;
276 	case Qlog:
277 		if(c->flag & COPEN)
278 			logclose(b);
279 		break;
280 	}
281 }
282 
283 static long
bridgeread(Chan * c,void * a,long n,vlong off)284 bridgeread(Chan *c, void *a, long n, vlong off)
285 {
286 	char buf[256];
287 	Bridge *b = bridgetab + c->dev;
288 	Port *port;
289 	int i, ingood, outgood;
290 
291 	USED(off);
292 	switch(TYPE(c->qid)) {
293 	default:
294 		error(Eperm);
295 	case Qtopdir:
296 	case Qbridgedir:
297 	case Qportdir:
298 		return devdirread(c, a, n, 0, 0, bridgegen);
299 	case Qlog:
300 		return logread(b, a, off, n);
301 	case Qstatus:
302 		qlock(b);
303 		port = b->port[PORT(c->qid)];
304 		if(port == 0)
305 			strcpy(buf, "unbound\n");
306 		else {
307 			i = 0;
308 			switch(port->type) {
309 			default:
310 				panic("bridgeread: unknown port type: %d",
311 					port->type);
312 			case Tether:
313 				i += snprint(buf+i, sizeof(buf)-i, "ether %s: ", port->name);
314 				break;
315 			case Ttun:
316 				i += snprint(buf+i, sizeof(buf)-i, "tunnel %s: ", port->name);
317 				break;
318 			}
319 			ingood = port->in - port->inmulti - port->inunknown;
320 			outgood = port->out - port->outmulti - port->outunknown;
321 			i += snprint(buf+i, sizeof(buf)-i,
322 				"in=%d(%d:%d:%d) out=%d(%d:%d:%d:%d)\n",
323 				port->in, ingood, port->inmulti, port->inunknown,
324 				port->out, outgood, port->outmulti,
325 				port->outunknown, port->outfrag);
326 			USED(i);
327 		}
328 		n = readstr(off, a, n, buf);
329 		qunlock(b);
330 		return n;
331 	case Qbctl:
332 		snprint(buf, sizeof(buf), "%s tcpmss\ndelay %ld %ld\n",
333 			b->tcpmss ? "set" : "clear", b->delay0, b->delayn);
334 		n = readstr(off, a, n, buf);
335 		return n;
336 	case Qcache:
337 		n = readstr(off, a, n, c->aux);
338 		return n;
339 	case Qstats:
340 		snprint(buf, sizeof(buf), "hit=%uld miss=%uld copy=%uld\n",
341 			b->hit, b->miss, b->copy);
342 		n = readstr(off, a, n, buf);
343 		return n;
344 	}
345 }
346 
347 static void
bridgeoption(Bridge * b,char * option,int value)348 bridgeoption(Bridge *b, char *option, int value)
349 {
350 	if(strcmp(option, "tcpmss") == 0)
351 		b->tcpmss = value;
352 	else
353 		error("unknown bridge option");
354 }
355 
356 
357 static long
bridgewrite(Chan * c,void * a,long n,vlong off)358 bridgewrite(Chan *c, void *a, long n, vlong off)
359 {
360 	Bridge *b = bridgetab + c->dev;
361 	Cmdbuf *cb;
362 	char *arg0, *p;
363 
364 	USED(off);
365 	switch(TYPE(c->qid)) {
366 	default:
367 		error(Eperm);
368 	case Qbctl:
369 		cb = parsecmd(a, n);
370 		qlock(b);
371 		if(waserror()) {
372 			qunlock(b);
373 			free(cb);
374 			nexterror();
375 		}
376 		if(cb->nf == 0)
377 			error("short write");
378 		arg0 = cb->f[0];
379 		if(strcmp(arg0, "bind") == 0) {
380 			portbind(b, cb->nf-1, cb->f+1);
381 		} else if(strcmp(arg0, "unbind") == 0) {
382 			portunbind(b, cb->nf-1, cb->f+1);
383 		} else if(strcmp(arg0, "cacheflush") == 0) {
384 			log(b, Logcache, "cache flush\n");
385 			memset(b->cache, 0, CacheSize*sizeof(Centry));
386 		} else if(strcmp(arg0, "set") == 0) {
387 			if(cb->nf != 2)
388 				error("usage: set option");
389 			bridgeoption(b, cb->f[1], 1);
390 		} else if(strcmp(arg0, "clear") == 0) {
391 			if(cb->nf != 2)
392 				error("usage: clear option");
393 			bridgeoption(b, cb->f[1], 0);
394 		} else if(strcmp(arg0, "delay") == 0) {
395 			if(cb->nf != 3)
396 				error("usage: delay delay0 delayn");
397 			b->delay0 = strtol(cb->f[1], nil, 10);
398 			b->delayn = strtol(cb->f[2], nil, 10);
399 		} else
400 			error("unknown control request");
401 		poperror();
402 		qunlock(b);
403 		free(cb);
404 		return n;
405 	case Qlog:
406 		cb = parsecmd(a, n);
407 		p = logctl(b, cb->nf, cb->f, logflags);
408 		free(cb);
409 		if(p != nil)
410 			error(p);
411 		return n;
412 	}
413 }
414 
415 static int
bridgegen(Chan * c,char *,Dirtab *,int,int s,Dir * dp)416 bridgegen(Chan *c, char *, Dirtab*, int, int s, Dir *dp)
417 {
418 	Bridge *b = bridgetab + c->dev;
419 	int type = TYPE(c->qid);
420 	Dirtab *dt;
421 	Qid qid;
422 
423 	if(s  == DEVDOTDOT){
424 		switch(TYPE(c->qid)){
425 		case Qtopdir:
426 		case Qbridgedir:
427 			snprint(up->genbuf, sizeof(up->genbuf), "#B%ld", c->dev);
428 			mkqid(&qid, Qtopdir, 0, QTDIR);
429 			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
430 			break;
431 		case Qportdir:
432 			snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
433 			mkqid(&qid, Qbridgedir, 0, QTDIR);
434 			devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
435 			break;
436 		default:
437 			panic("bridgewalk %llux", c->qid.path);
438 		}
439 		return 1;
440 	}
441 
442 	switch(type) {
443 	default:
444 		/* non-directory entries end up here */
445 		if(c->qid.type & QTDIR)
446 			panic("bridgegen: unexpected directory");
447 		if(s != 0)
448 			return -1;
449 		dt = dirtab[TYPE(c->qid)];
450 		if(dt == nil)
451 			panic("bridgegen: unknown type: %lud", TYPE(c->qid));
452 		devdir(c, c->qid, dt->name, dt->length, eve, dt->perm, dp);
453 		return 1;
454 	case Qtopdir:
455 		if(s != 0)
456 			return -1;
457 		snprint(up->genbuf, sizeof(up->genbuf), "bridge%ld", c->dev);
458 		mkqid(&qid, QID(0, Qbridgedir), 0, QTDIR);
459 		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
460 		return 1;
461 	case Qbridgedir:
462 		if(s<nelem(bridgedirtab)) {
463 			dt = bridgedirtab+s;
464 			devdir(c, dt->qid, dt->name, dt->length, eve, dt->perm, dp);
465 			return 1;
466 		}
467 		s -= nelem(bridgedirtab);
468 		if(s >= b->nport)
469 			return -1;
470 		mkqid(&qid, QID(s, Qportdir), 0, QTDIR);
471 		snprint(up->genbuf, sizeof(up->genbuf), "%d", s);
472 		devdir(c, qid, up->genbuf, 0, eve, 0555, dp);
473 		return 1;
474 	case Qportdir:
475 		if(s>=nelem(portdirtab))
476 			return -1;
477 		dt = portdirtab+s;
478 		mkqid(&qid, QID(PORT(c->qid),TYPE(dt->qid)), 0, QTFILE);
479 		devdir(c, qid, dt->name, dt->length, eve, dt->perm, dp);
480 		return 1;
481 	}
482 }
483 
484 // parse mac address; also in netif.c
485 static int
parseaddr(uchar * to,char * from,int alen)486 parseaddr(uchar *to, char *from, int alen)
487 {
488 	char nip[4];
489 	char *p;
490 	int i;
491 
492 	p = from;
493 	for(i = 0; i < alen; i++){
494 		if(*p == 0)
495 			return -1;
496 		nip[0] = *p++;
497 		if(*p == 0)
498 			return -1;
499 		nip[1] = *p++;
500 		nip[2] = 0;
501 		to[i] = strtoul(nip, 0, 16);
502 		if(*p == ':')
503 			p++;
504 	}
505 	return 0;
506 }
507 
508 // assumes b is locked
509 static void
portbind(Bridge * b,int argc,char * argv[])510 portbind(Bridge *b, int argc, char *argv[])
511 {
512 	Port *port;
513 	Chan *ctl;
514 	int type = 0, i, n;
515 	ulong ownhash;
516 	char *dev, *dev2 = nil, *p;
517 	char buf[100], name[KNAMELEN], path[8*KNAMELEN];
518 	static char usage[] = "usage: bind ether|tunnel name ownhash dev [dev2]";
519 
520 	memset(name, 0, KNAMELEN);
521 	if(argc < 4)
522 		error(usage);
523 	if(strcmp(argv[0], "ether") == 0) {
524 		if(argc != 4)
525 			error(usage);
526 		type = Tether;
527 		strncpy(name, argv[1], KNAMELEN);
528 		name[KNAMELEN-1] = 0;
529 //		parseaddr(addr, argv[1], Eaddrlen);
530 	} else if(strcmp(argv[0], "tunnel") == 0) {
531 		if(argc != 5)
532 			error(usage);
533 		type = Ttun;
534 		strncpy(name, argv[1], KNAMELEN);
535 		name[KNAMELEN-1] = 0;
536 //		parseip(addr, argv[1]);
537 		dev2 = argv[4];
538 	} else
539 		error(usage);
540 	ownhash = atoi(argv[2]);
541 	dev = argv[3];
542 	for(i=0; i<b->nport; i++) {
543 		port = b->port[i];
544 		if(port != nil && port->type == type &&
545 		    memcmp(port->name, name, KNAMELEN) == 0)
546 			error("port in use");
547 	}
548 	for(i=0; i<Maxport; i++)
549 		if(b->port[i] == nil)
550 			break;
551 	if(i == Maxport)
552 		error("no more ports");
553 	port = smalloc(sizeof(Port));
554 	port->ref = 1;
555 	port->id = i;
556 	port->ownhash = ownhash;
557 
558 	if(waserror()) {
559 		portfree(port);
560 		nexterror();
561 	}
562 	port->type = type;
563 	memmove(port->name, name, KNAMELEN);
564 	switch(port->type) {
565 	default:
566 		panic("portbind: unknown port type: %d", type);
567 	case Tether:
568 		snprint(path, sizeof(path), "%s/clone", dev);
569 		ctl = namec(path, Aopen, ORDWR, 0);
570 		if(waserror()) {
571 			cclose(ctl);
572 			nexterror();
573 		}
574 		// check addr?
575 
576 		// get directory name
577 		n = devtab[ctl->type]->read(ctl, buf, sizeof(buf), 0);
578 		buf[n] = 0;
579 		for(p = buf; *p == ' '; p++)
580 			;
581 		snprint(path, sizeof(path), "%s/%lud/data", dev, strtoul(p, 0, 0));
582 
583 		// setup connection to be promiscuous
584 		snprint(buf, sizeof(buf), "connect -1");
585 		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
586 		snprint(buf, sizeof(buf), "promiscuous");
587 		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
588 		snprint(buf, sizeof(buf), "bridge");
589 		devtab[ctl->type]->write(ctl, buf, strlen(buf), 0);
590 
591 		// open data port
592 		port->data[0] = namec(path, Aopen, ORDWR, 0);
593 		// dup it
594 		incref(port->data[0]);
595 		port->data[1] = port->data[0];
596 
597 		poperror();
598 		cclose(ctl);
599 
600 		break;
601 	case Ttun:
602 		port->data[0] = namec(dev, Aopen, OREAD, 0);
603 		port->data[1] = namec(dev2, Aopen, OWRITE, 0);
604 		break;
605 	}
606 
607 	poperror();
608 
609 	/* committed to binding port */
610 	b->port[port->id] = port;
611 	port->bridge = b;
612 	if(b->nport <= port->id)
613 		b->nport = port->id+1;
614 
615 	// assumes kproc always succeeds
616 	kproc("etherread", etherread, port);	// poperror must be next
617 	port->ref++;
618 }
619 
620 // assumes b is locked
621 static void
portunbind(Bridge * b,int argc,char * argv[])622 portunbind(Bridge *b, int argc, char *argv[])
623 {
624 	int type = 0, i;
625 	char name[KNAMELEN];
626 	ulong ownhash;
627 	Port *port = nil;
628 	static char usage[] = "usage: unbind ether|tunnel addr [ownhash]";
629 
630 	memset(name, 0, KNAMELEN);
631 	if(argc < 2 || argc > 3)
632 		error(usage);
633 	if(strcmp(argv[0], "ether") == 0) {
634 		type = Tether;
635 		strncpy(name, argv[1], KNAMELEN);
636 		name[KNAMELEN-1] = 0;
637 //		parseaddr(addr, argv[1], Eaddrlen);
638 	} else if(strcmp(argv[0], "tunnel") == 0) {
639 		type = Ttun;
640 		strncpy(name, argv[1], KNAMELEN);
641 		name[KNAMELEN-1] = 0;
642 //		parseip(addr, argv[1]);
643 	} else
644 		error(usage);
645 	if(argc == 3)
646 		ownhash = atoi(argv[2]);
647 	else
648 		ownhash = 0;
649 	for(i=0; i<b->nport; i++) {
650 		port = b->port[i];
651 		if(port != nil && port->type == type &&
652 		    memcmp(port->name, name, KNAMELEN) == 0)
653 			break;
654 	}
655 	if(i == b->nport)
656 		error("port not found");
657 	if(ownhash != 0 && port->ownhash != 0 && ownhash != port->ownhash)
658 		error("bad owner hash");
659 
660 	port->closed = 1;
661 	b->port[i] = nil;	// port is now unbound
662 	cacheflushport(b, i);
663 
664 	// try and stop reader
665 	if(port->readp)
666 		postnote(port->readp, 1, "unbind", 0);
667 	portfree(port);
668 }
669 
670 // assumes b is locked
671 static Centry *
cachelookup(Bridge * b,uchar d[Eaddrlen])672 cachelookup(Bridge *b, uchar d[Eaddrlen])
673 {
674 	int i;
675 	uint h;
676 	Centry *p;
677 	long sec;
678 
679 	// dont cache multicast or broadcast
680 	if(d[0] & 1)
681 		return 0;
682 
683 	h = 0;
684 	for(i=0; i<Eaddrlen; i++) {
685 		h *= 7;
686 		h += d[i];
687 	}
688 	h %= CacheHash;
689 	p = b->cache + h;
690 	sec = TK2SEC(m->ticks);
691 	for(i=0; i<CacheLook; i++,p++) {
692 		if(memcmp(d, p->d, Eaddrlen) == 0) {
693 			p->dst++;
694 			if(sec >= p->expire) {
695 				log(b, Logcache, "expired cache entry: %E %d\n",
696 					d, p->port);
697 				return nil;
698 			}
699 			p->expire = sec + CacheTimeout;
700 			return p;
701 		}
702 	}
703 	log(b, Logcache, "cache miss: %E\n", d);
704 	return nil;
705 }
706 
707 // assumes b is locked
708 static void
cacheupdate(Bridge * b,uchar d[Eaddrlen],int port)709 cacheupdate(Bridge *b, uchar d[Eaddrlen], int port)
710 {
711 	int i;
712 	uint h;
713 	Centry *p, *pp;
714 	long sec;
715 
716 	// dont cache multicast or broadcast
717 	if(d[0] & 1) {
718 		log(b, Logcache, "bad source address: %E\n", d);
719 		return;
720 	}
721 
722 	h = 0;
723 	for(i=0; i<Eaddrlen; i++) {
724 		h *= 7;
725 		h += d[i];
726 	}
727 	h %= CacheHash;
728 	p = b->cache + h;
729 	pp = p;
730 	sec = p->expire;
731 
732 	// look for oldest entry
733 	for(i=0; i<CacheLook; i++,p++) {
734 		if(memcmp(p->d, d, Eaddrlen) == 0) {
735 			p->expire = TK2SEC(m->ticks) + CacheTimeout;
736 			if(p->port != port) {
737 				log(b, Logcache, "NIC changed port %d->%d: %E\n",
738 					p->port, port, d);
739 				p->port = port;
740 			}
741 			p->src++;
742 			return;
743 		}
744 		if(p->expire < sec) {
745 			sec = p->expire;
746 			pp = p;
747 		}
748 	}
749 	if(pp->expire != 0)
750 		log(b, Logcache, "bumping from cache: %E %d\n", pp->d, pp->port);
751 	pp->expire = TK2SEC(m->ticks) + CacheTimeout;
752 	memmove(pp->d, d, Eaddrlen);
753 	pp->port = port;
754 	pp->src = 1;
755 	pp->dst = 0;
756 	log(b, Logcache, "adding to cache: %E %d\n", pp->d, pp->port);
757 }
758 
759 // assumes b is locked
760 static void
cacheflushport(Bridge * b,int port)761 cacheflushport(Bridge *b, int port)
762 {
763 	Centry *ce;
764 	int i;
765 
766 	ce = b->cache;
767 	for(i=0; i<CacheSize; i++,ce++) {
768 		if(ce->port != port)
769 			continue;
770 		memset(ce, 0, sizeof(Centry));
771 	}
772 }
773 
774 static char *
cachedump(Bridge * b)775 cachedump(Bridge *b)
776 {
777 	int i, n;
778 	long sec, off;
779 	char *buf, *p, *ep;
780 	Centry *ce;
781 	char c;
782 
783 	qlock(b);
784 	if(waserror()) {
785 		qunlock(b);
786 		nexterror();
787 	}
788 	sec = TK2SEC(m->ticks);
789 	n = 0;
790 	for(i=0; i<CacheSize; i++)
791 		if(b->cache[i].expire != 0)
792 			n++;
793 
794 	n *= 51;	// change if print format is changed
795 	n += 10;	// some slop at the end
796 	buf = malloc(n);
797 	if(buf == nil)
798 		error(Enomem);
799 	p = buf;
800 	ep = buf + n;
801 	ce = b->cache;
802 	off = seconds() - sec;
803 	for(i=0; i<CacheSize; i++,ce++) {
804 		if(ce->expire == 0)
805 			continue;
806 		c = (sec < ce->expire)?'v':'e';
807 		p += snprint(p, ep-p, "%E %2d %10ld %10ld %10ld %c\n", ce->d,
808 			ce->port, ce->src, ce->dst, ce->expire+off, c);
809 	}
810 	*p = 0;
811 	poperror();
812 	qunlock(b);
813 
814 	return buf;
815 }
816 
817 
818 
819 // assumes b is locked
820 static void
ethermultiwrite(Bridge * b,Block * bp,Port * port)821 ethermultiwrite(Bridge *b, Block *bp, Port *port)
822 {
823 	Port *oport;
824 	Block *bp2;
825 	Etherpkt *ep;
826 	int i, mcast;
827 
828 	if(waserror()) {
829 		if(bp)
830 			freeb(bp);
831 		nexterror();
832 	}
833 
834 	ep = (Etherpkt*)bp->rp;
835 	mcast = ep->d[0] & 1;		/* multicast bit of ethernet address */
836 
837 	oport = nil;
838 	for(i=0; i<b->nport; i++) {
839 		if(i == port->id || b->port[i] == nil)
840 			continue;
841 		/*
842 		 * we need to forward multicast packets for ipv6,
843 		 * so always do it.
844 		 */
845 		if(mcast)
846 			b->port[i]->outmulti++;
847 		else
848 			b->port[i]->outunknown++;
849 
850 		// delay one so that the last write does not copy
851 		if(oport != nil) {
852 			b->copy++;
853 			bp2 = copyblock(bp, blocklen(bp));
854 			if(!waserror()) {
855 				etherwrite(oport, bp2);
856 				poperror();
857 			}
858 		}
859 		oport = b->port[i];
860 	}
861 
862 	// last write free block
863 	if(oport) {
864 		bp2 = bp; bp = nil; USED(bp);
865 		if(!waserror()) {
866 			etherwrite(oport, bp2);
867 			poperror();
868 		}
869 	} else
870 		freeb(bp);
871 
872 	poperror();
873 }
874 
875 static void
tcpmsshack(Etherpkt * epkt,int n)876 tcpmsshack(Etherpkt *epkt, int n)
877 {
878 	int hl, optlen;
879 	Iphdr *iphdr;
880 	Tcphdr *tcphdr;
881 	ulong mss, cksum;
882 	uchar *optr;
883 
884 	/* ignore non-ipv4 packets */
885 	if(nhgets(epkt->type) != ETIP4)
886 		return;
887 	iphdr = (Iphdr*)(epkt->data);
888 	n -= ETHERHDRSIZE;
889 	if(n < IPHDR)
890 		return;
891 
892 	/* ignore bad packets */
893 	if(iphdr->vihl != (IP_VER4|IP_HLEN4)) {
894 		hl = (iphdr->vihl&0xF)<<2;
895 		if((iphdr->vihl&0xF0) != IP_VER4 || hl < (IP_HLEN4<<2))
896 			return;
897 	} else
898 		hl = IP_HLEN4<<2;
899 
900 	/* ignore non-tcp packets */
901 	if(iphdr->proto != IP_TCPPROTO)
902 		return;
903 	n -= hl;
904 	if(n < sizeof(Tcphdr))
905 		return;
906 	tcphdr = (Tcphdr*)((uchar*)(iphdr) + hl);
907 	// MSS can only appear in SYN packet
908 	if(!(tcphdr->flag[1] & SYN))
909 		return;
910 	hl = (tcphdr->flag[0] & 0xf0)>>2;
911 	if(n < hl)
912 		return;
913 
914 	// check for MSS option
915 	optr = (uchar*)tcphdr + sizeof(Tcphdr);
916 	n = hl - sizeof(Tcphdr);
917 	for(;;) {
918 		if(n <= 0 || *optr == EOLOPT)
919 			return;
920 		if(*optr == NOOPOPT) {
921 			n--;
922 			optr++;
923 			continue;
924 		}
925 		optlen = optr[1];
926 		if(optlen < 2 || optlen > n)
927 			return;
928 		if(*optr == MSSOPT && optlen == MSS_LENGTH)
929 			break;
930 		n -= optlen;
931 		optr += optlen;
932 	}
933 
934 	mss = nhgets(optr+2);
935 	if(mss <= TcpMssMax)
936 		return;
937 	// fit checksum
938 	cksum = nhgets(tcphdr->cksum);
939 	if(optr-(uchar*)tcphdr & 1) {
940 print("tcpmsshack: odd alignment!\n");
941 		// odd alignments are a pain
942 		cksum += nhgets(optr+1);
943 		cksum -= (optr[1]<<8)|(TcpMssMax>>8);
944 		cksum += (cksum>>16);
945 		cksum &= 0xffff;
946 		cksum += nhgets(optr+3);
947 		cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
948 		cksum += (cksum>>16);
949 	} else {
950 		cksum += mss;
951 		cksum -= TcpMssMax;
952 		cksum += (cksum>>16);
953 	}
954 	hnputs(tcphdr->cksum, cksum);
955 	hnputs(optr+2, TcpMssMax);
956 }
957 
958 /*
959  *  process to read from the ethernet
960  */
961 static void
etherread(void * a)962 etherread(void *a)
963 {
964 	Port *port = a;
965 	Bridge *b = port->bridge;
966 	Block *bp, *bp2;
967 	Etherpkt *ep;
968 	Centry *ce;
969 	long md;
970 
971 	qlock(b);
972 	port->readp = up;	/* hide identity under a rock for unbind */
973 
974 	while(!port->closed){
975 		// release lock to read - error means it is time to quit
976 		qunlock(b);
977 		if(waserror()) {
978 			print("etherread read error: %s\n", up->errstr);
979 			qlock(b);
980 			break;
981 		}
982 		if(0)
983 			print("devbridge: etherread: reading\n");
984 		bp = devtab[port->data[0]->type]->bread(port->data[0],
985 			ETHERMAXTU, 0);
986 		if(0)
987 			print("devbridge: etherread: blocklen = %d\n",
988 				blocklen(bp));
989 		poperror();
990 		qlock(b);
991 		if(bp == nil || port->closed)
992 			break;
993 		if(waserror()) {
994 //			print("etherread bridge error\n");
995 			if(bp)
996 				freeb(bp);
997 			continue;
998 		}
999 		if(blocklen(bp) < ETHERMINTU)
1000 			error("short packet");
1001 		port->in++;
1002 
1003 		ep = (Etherpkt*)bp->rp;
1004 		cacheupdate(b, ep->s, port->id);
1005 		if(b->tcpmss)
1006 			tcpmsshack(ep, BLEN(bp));
1007 
1008 		/*
1009 		 * delay packets to simulate a slow link
1010 		 */
1011 		if(b->delay0 || b->delayn){
1012 			md = b->delay0 + b->delayn * BLEN(bp);
1013 			if(md > 0)
1014 				microdelay(md);
1015 		}
1016 
1017 		if(ep->d[0] & 1) {
1018 			log(b, Logmcast, "multicast: port=%d src=%E dst=%E type=%#.4ux\n",
1019 				port->id, ep->s, ep->d, ep->type[0]<<8|ep->type[1]);
1020 			port->inmulti++;
1021 			bp2 = bp; bp = nil;
1022 			ethermultiwrite(b, bp2, port);
1023 		} else {
1024 			ce = cachelookup(b, ep->d);
1025 			if(ce == nil) {
1026 				b->miss++;
1027 				port->inunknown++;
1028 				bp2 = bp; bp = nil;
1029 				ethermultiwrite(b, bp2, port);
1030 			}else if(ce->port != port->id){
1031 				b->hit++;
1032 				bp2 = bp; bp = nil;
1033 				etherwrite(b->port[ce->port], bp2);
1034 			}
1035 		}
1036 
1037 		poperror();
1038 		if(bp)
1039 			freeb(bp);
1040 	}
1041 //	print("etherread: trying to exit\n");
1042 	port->readp = nil;
1043 	portfree(port);
1044 	qunlock(b);
1045 	pexit("hangup", 1);
1046 }
1047 
1048 static int
fragment(Etherpkt * epkt,int n)1049 fragment(Etherpkt *epkt, int n)
1050 {
1051 	Iphdr *iphdr;
1052 
1053 	if(n <= TunnelMtu)
1054 		return 0;
1055 
1056 	/* ignore non-ipv4 packets */
1057 	if(nhgets(epkt->type) != ETIP4)
1058 		return 0;
1059 	iphdr = (Iphdr*)(epkt->data);
1060 	n -= ETHERHDRSIZE;
1061 	/*
1062 	 * ignore: IP runt packets, bad packets (I don't handle IP
1063 	 * options for the moment), packets with don't-fragment set,
1064 	 * and short blocks.
1065 	 */
1066 	if(n < IPHDR || iphdr->vihl != (IP_VER4|IP_HLEN4) ||
1067 	    iphdr->frag[0] & (IP_DF>>8) || nhgets(iphdr->length) > n)
1068 		return 0;
1069 
1070 	return 1;
1071 }
1072 
1073 
1074 static void
etherwrite(Port * port,Block * bp)1075 etherwrite(Port *port, Block *bp)
1076 {
1077 	Iphdr *eh, *feh;
1078 	Etherpkt *epkt;
1079 	int n, lid, len, seglen, chunk, dlen, blklen, offset, mf;
1080 	Block *xp, *nb;
1081 	ushort fragoff, frag;
1082 
1083 	port->out++;
1084 	epkt = (Etherpkt*)bp->rp;
1085 	n = blocklen(bp);
1086 	if(port->type != Ttun || !fragment(epkt, n)) {
1087 		devtab[port->data[1]->type]->bwrite(port->data[1], bp, 0);
1088 		return;
1089 	}
1090 	port->outfrag++;
1091 	if(waserror()){
1092 		freeblist(bp);
1093 		nexterror();
1094 	}
1095 
1096 	seglen = (TunnelMtu - ETHERHDRSIZE - IPHDR) & ~7;
1097 	eh = (Iphdr*)(epkt->data);
1098 	len = nhgets(eh->length);
1099 	frag = nhgets(eh->frag);
1100 	mf = frag & IP_MF;
1101 	frag <<= 3;
1102 	dlen = len - IPHDR;
1103 	xp = bp;
1104 	lid = nhgets(eh->id);
1105 	offset = ETHERHDRSIZE+IPHDR;
1106 	while(xp != nil && offset && offset >= BLEN(xp)) {
1107 		offset -= BLEN(xp);
1108 		xp = xp->next;
1109 	}
1110 	xp->rp += offset;
1111 
1112 	if(0)
1113 		print("seglen=%d, dlen=%d, mf=%x, frag=%d\n",
1114 			seglen, dlen, mf, frag);
1115 	for(fragoff = 0; fragoff < dlen; fragoff += seglen) {
1116 		nb = allocb(ETHERHDRSIZE+IPHDR+seglen);
1117 
1118 		feh = (Iphdr*)(nb->wp+ETHERHDRSIZE);
1119 
1120 		memmove(nb->wp, epkt, ETHERHDRSIZE+IPHDR);
1121 		nb->wp += ETHERHDRSIZE+IPHDR;
1122 
1123 		if((fragoff + seglen) >= dlen) {
1124 			seglen = dlen - fragoff;
1125 			hnputs(feh->frag, (frag+fragoff)>>3 | mf);
1126 		}
1127 		else
1128 			hnputs(feh->frag, (frag+fragoff>>3) | IP_MF);
1129 
1130 		hnputs(feh->length, seglen + IPHDR);
1131 		hnputs(feh->id, lid);
1132 
1133 		/* Copy up the data area */
1134 		chunk = seglen;
1135 		while(chunk) {
1136 			blklen = chunk;
1137 			if(BLEN(xp) < chunk)
1138 				blklen = BLEN(xp);
1139 			memmove(nb->wp, xp->rp, blklen);
1140 			nb->wp += blklen;
1141 			xp->rp += blklen;
1142 			chunk -= blklen;
1143 			if(xp->rp == xp->wp)
1144 				xp = xp->next;
1145 		}
1146 
1147 		feh->cksum[0] = 0;
1148 		feh->cksum[1] = 0;
1149 		hnputs(feh->cksum, ipcsum(&feh->vihl));
1150 
1151 		/* don't generate small packets */
1152 		if(BLEN(nb) < ETHERMINTU)
1153 			nb->wp = nb->rp + ETHERMINTU;
1154 		devtab[port->data[1]->type]->bwrite(port->data[1], nb, 0);
1155 	}
1156 	poperror();
1157 	freeblist(bp);
1158 }
1159 
1160 // hold b lock
1161 static void
portfree(Port * port)1162 portfree(Port *port)
1163 {
1164 	port->ref--;
1165 	if(port->ref < 0)
1166 		panic("portfree: bad ref");
1167 	if(port->ref > 0)
1168 		return;
1169 
1170 	if(port->data[0])
1171 		cclose(port->data[0]);
1172 	if(port->data[1])
1173 		cclose(port->data[1]);
1174 	memset(port, 0, sizeof(Port));
1175 	free(port);
1176 }
1177 
1178 Dev bridgedevtab = {
1179 	'B',
1180 	"bridge",
1181 
1182 	devreset,
1183 	bridgeinit,
1184 	devshutdown,
1185 	bridgeattach,
1186 	bridgewalk,
1187 	bridgestat,
1188 	bridgeopen,
1189 	devcreate,
1190 	bridgeclose,
1191 	bridgeread,
1192 	devbread,
1193 	bridgewrite,
1194 	devbwrite,
1195 	devremove,
1196 	devwstat,
1197 };
1198