xref: /plan9-contrib/sys/src/9k/386/ethervirtio.c (revision c7eea38903abce59fd96ffa8676318e1b67f68d9)
1 /*
2  * virtio ethernet driver implementing the legacy interface:
3  * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
4  */
5 #include "u.h"
6 #include "../port/lib.h"
7 #include "mem.h"
8 #include "dat.h"
9 #include "fns.h"
10 #include "io.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
13 #include "etherif.h"
14 
15 typedef struct Vring Vring;
16 typedef struct Vdesc Vdesc;
17 typedef struct Vused Vused;
18 typedef struct Vheader Vheader;
19 typedef struct Vqueue Vqueue;
20 typedef struct Ctlr Ctlr;
21 
22 enum {
23 	/* §2.1 Device Status Field */
24 	Sacknowledge = 1,
25 	Sdriver = 2,
26 	Sdriverok = 4,
27 	Sfeatureok = 8,
28 	Sfailed = 128,
29 
30 	/* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
31 	Qdevfeat = 0,
32 	Qdrvfeat = 4,
33 	Qaddr = 8,
34 	Qsize = 12,
35 	Qselect = 14,
36 	Qnotify = 16,
37 	Qstatus = 18,
38 	Qisr = 19,
39 	Qmac = 20,
40 	Qnetstatus = 26,
41 
42 	/* flags in Qnetstatus */
43 	Nlinkup = (1<<0),
44 	Nannounce = (1<<1),
45 
46 	/* feature bits */
47 	Fmac = (1<<5),
48 	Fstatus = (1<<16),
49 	Fctrlvq = (1<<17),
50 	Fctrlrx = (1<<18),
51 
52 	/* vring used flags */
53 	Unonotify = 1,
54 	/* vring avail flags */
55 	Rnointerrupt = 1,
56 
57 	/* descriptor flags */
58 	Dnext = 1,
59 	Dwrite = 2,
60 	Dindirect = 4,
61 
62 	/* struct sizes */
63 	VringSize = 4,
64 	VdescSize = 16,
65 	VusedSize = 8,
66 	VheaderSize = 10,
67 
68 	/* §4.1.5.1.4.1 says pages are 4096 bytes
69 	 * for the purposes of the driver.
70 	 */
71 	VBY2PG	= 4096,
72 #define VPGROUND(s)	ROUNDUP(s, VBY2PG)
73 
74 	Vrxq	= 0,
75 	Vtxq	= 1,
76 	Vctlq	= 2,
77 
78 	/* class/cmd for Vctlq */
79 	CtrlRx	= 0x00,
80 		CmdPromisc	= 0x00,
81 		CmdAllmulti	= 0x01,
82 	CtrlMac	= 0x01,
83 		CmdMacTableSet	= 0x00,
84 	CtrlVlan= 0x02,
85 		CmdVlanAdd	= 0x00,
86 		CmdVlanDel	= 0x01,
87 };
88 
89 struct Vring
90 {
91 	u16int	flags;
92 	u16int	idx;
93 };
94 
95 struct Vdesc
96 {
97 	u64int	addr;
98 	u32int	len;
99 	u16int	flags;
100 	u16int	next;
101 };
102 
103 struct Vused
104 {
105 	u32int	id;
106 	u32int	len;
107 };
108 
109 struct Vheader
110 {
111 	u8int	flags;
112 	u8int	segtype;
113 	u16int	hlen;
114 	u16int	seglen;
115 	u16int	csumstart;
116 	u16int	csumend;
117 };
118 
119 /* §2.4 Virtqueues */
120 struct Vqueue
121 {
122 	Rendez;
123 
124 	uint	qsize;
125 	uint	qmask;
126 
127 	Vdesc	*desc;
128 
129 	Vring	*avail;
130 	u16int	*availent;
131 	u16int	*availevent;
132 
133 	Vring	*used;
134 	Vused	*usedent;
135 	u16int	*usedevent;
136 	u16int	lastused;
137 
138 	uint	nintr;
139 	uint	nnote;
140 };
141 
142 struct Ctlr {
143 	Lock;
144 
145 	QLock	ctllock;
146 
147 	int	attached;
148 
149 	int	port;
150 	Pcidev	*pcidev;
151 	Ctlr	*next;
152 	int	active;
153 	int	id;
154 	int	typ;
155 	ulong	feat;
156 	int	nqueue;
157 
158 	/* virtioether has 3 queues: rx, tx and ctl */
159 	Vqueue	queue[3];
160 };
161 
162 static Ctlr *ctlrhead;
163 
164 static int
vhasroom(void * v)165 vhasroom(void *v)
166 {
167 	Vqueue *q = v;
168 	return q->lastused != q->used->idx;
169 }
170 
171 static void
vqnotify(Ctlr * ctlr,int x)172 vqnotify(Ctlr *ctlr, int x)
173 {
174 	Vqueue *q;
175 
176 	coherence();
177 	q = &ctlr->queue[x];
178 	if(q->used->flags & Unonotify)
179 		return;
180 	q->nnote++;
181 	outs(ctlr->port+Qnotify, x);
182 }
183 
184 static void
txproc(void * v)185 txproc(void *v)
186 {
187 	Vheader *header;
188 	Block **blocks;
189 	Ether *edev;
190 	Ctlr *ctlr;
191 	Vqueue *q;
192 	Vused *u;
193 	Block *b;
194 	int i, j;
195 
196 	edev = v;
197 	ctlr = edev->ctlr;
198 	q = &ctlr->queue[Vtxq];
199 
200 	header = smalloc(VheaderSize);
201 	blocks = smalloc(sizeof(Block*) * (q->qsize/2));
202 
203 	for(i = 0; i < q->qsize/2; i++){
204 		j = i << 1;
205 		q->desc[j].addr = PADDR(header);
206 		q->desc[j].len = VheaderSize;
207 		q->desc[j].next = j | 1;
208 		q->desc[j].flags = Dnext;
209 
210 		q->availent[i] = q->availent[i + q->qsize/2] = j;
211 
212 		j |= 1;
213 		q->desc[j].next = 0;
214 		q->desc[j].flags = 0;
215 	}
216 
217 	q->avail->flags &= ~Rnointerrupt;
218 
219 	while(waserror())
220 		;
221 
222 	while((b = qbread(edev->oq, 1000000)) != nil){
223 		for(;;){
224 			/* retire completed packets */
225 			while((i = q->lastused) != q->used->idx){
226 				u = &q->usedent[i & q->qmask];
227 				i = (u->id & q->qmask) >> 1;
228 				if(blocks[i] == nil)
229 					break;
230 				freeb(blocks[i]);
231 				blocks[i] = nil;
232 				q->lastused++;
233 			}
234 
235 			/* have free slot? */
236 			i = q->avail->idx & (q->qmask >> 1);
237 			if(blocks[i] == nil)
238 				break;
239 
240 			/* ring full, wait and retry */
241 			if(!vhasroom(q))
242 				sleep(q, vhasroom, q);
243 		}
244 
245 		/* slot is free, fill in descriptor */
246 		blocks[i] = b;
247 		j = (i << 1) | 1;
248 		q->desc[j].addr = PADDR(b->rp);
249 		q->desc[j].len = BLEN(b);
250 		coherence();
251 		q->avail->idx++;
252 		vqnotify(ctlr, Vtxq);
253 	}
254 
255 	pexit("ether out queue closed", 1);
256 }
257 
258 static void
rxproc(void * v)259 rxproc(void *v)
260 {
261 	Vheader *header;
262 	Block **blocks;
263 	Ether *edev;
264 	Ctlr *ctlr;
265 	Vqueue *q;
266 	Vused *u;
267 	Block *b;
268 	int i, j;
269 
270 	edev = v;
271 	ctlr = edev->ctlr;
272 	q = &ctlr->queue[Vrxq];
273 
274 	header = smalloc(VheaderSize);
275 	blocks = smalloc(sizeof(Block*) * (q->qsize/2));
276 
277 	for(i = 0; i < q->qsize/2; i++){
278 		j = i << 1;
279 		q->desc[j].addr = PADDR(header);
280 		q->desc[j].len = VheaderSize;
281 		q->desc[j].next = j | 1;
282 		q->desc[j].flags = Dwrite|Dnext;
283 
284 		q->availent[i] = q->availent[i + q->qsize/2] = j;
285 
286 		j |= 1;
287 		q->desc[j].next = 0;
288 		q->desc[j].flags = Dwrite;
289 	}
290 
291 	q->avail->flags &= ~Rnointerrupt;
292 
293 	while(waserror())
294 		;
295 
296 	for(;;){
297 		/* replenish receive ring */
298 		do {
299 			i = q->avail->idx & (q->qmask >> 1);
300 			if(blocks[i] != nil)
301 				break;
302 			if((b = iallocb(ETHERMAXTU)) == nil)
303 				break;
304 			blocks[i] = b;
305 			j = (i << 1) | 1;
306 			q->desc[j].addr = PADDR(b->rp);
307 			q->desc[j].len = BALLOC(b);
308 			coherence();
309 			q->avail->idx++;
310 		} while(q->avail->idx != q->used->idx);
311 		vqnotify(ctlr, Vrxq);
312 
313 		/* wait for any packets to complete */
314 		if(!vhasroom(q))
315 			sleep(q, vhasroom, q);
316 
317 		/* retire completed packets */
318 		while((i = q->lastused) != q->used->idx) {
319 			u = &q->usedent[i & q->qmask];
320 			i = (u->id & q->qmask) >> 1;
321 			if((b = blocks[i]) == nil)
322 				break;
323 
324 			blocks[i] = nil;
325 
326 			b->wp = b->rp + u->len - VheaderSize;
327 			etheriq(edev, b, 1);
328 			q->lastused++;
329 		}
330 	}
331 }
332 
333 static int
vctlcmd(Ether * edev,uchar class,uchar cmd,uchar * data,int ndata)334 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
335 {
336 	uchar hdr[2], ack[1];
337 	Ctlr *ctlr;
338 	Vqueue *q;
339 	Vdesc *d;
340 	int i;
341 
342 	ctlr = edev->ctlr;
343 	q = &ctlr->queue[Vctlq];
344 	if(q->qsize < 3)
345 		return -1;
346 
347 	qlock(&ctlr->ctllock);
348 	while(waserror())
349 		;
350 
351 	ack[0] = 0x55;
352 	hdr[0] = class;
353 	hdr[1] = cmd;
354 
355 	d = &q->desc[0];
356 	d->addr = PADDR(hdr);
357 	d->len = sizeof(hdr);
358 	d->next = 1;
359 	d->flags = Dnext;
360 	d++;
361 	d->addr = PADDR(data);
362 	d->len = ndata;
363 	d->next = 2;
364 	d->flags = Dnext;
365 	d++;
366 	d->addr = PADDR(ack);
367 	d->len = sizeof(ack);
368 	d->next = 0;
369 	d->flags = Dwrite;
370 
371 	i = q->avail->idx & q->qmask;
372 	q->availent[i] = 0;
373 	coherence();
374 
375 	q->avail->flags &= ~Rnointerrupt;
376 	q->avail->idx++;
377 	vqnotify(ctlr, Vctlq);
378 	while(!vhasroom(q))
379 		sleep(q, vhasroom, q);
380 	q->lastused = q->used->idx;
381 	q->avail->flags |= Rnointerrupt;
382 
383 	qunlock(&ctlr->ctllock);
384 	poperror();
385 
386 	if(ack[0] != 0)
387 		print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
388 
389 	return ack[0];
390 }
391 
392 static void
interrupt(Ureg *,void * arg)393 interrupt(Ureg*, void* arg)
394 {
395 	Ether *edev;
396 	Ctlr *ctlr;
397 	Vqueue *q;
398 	int i;
399 
400 	edev = arg;
401 	ctlr = edev->ctlr;
402 	if(inb(ctlr->port+Qisr) & 1){
403 		for(i = 0; i < ctlr->nqueue; i++){
404 			q = &ctlr->queue[i];
405 			if(vhasroom(q)){
406 				q->nintr++;
407 				wakeup(q);
408 			}
409 		}
410 	}
411 }
412 
413 static void
attach(Ether * edev)414 attach(Ether* edev)
415 {
416 	char name[KNAMELEN];
417 	Ctlr* ctlr;
418 
419 	ctlr = edev->ctlr;
420 	lock(ctlr);
421 	if(ctlr->attached){
422 		unlock(ctlr);
423 		return;
424 	}
425 	ctlr->attached = 1;
426 	unlock(ctlr);
427 
428 	/* ready to go */
429 	outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
430 
431 	/* start kprocs */
432 	snprint(name, sizeof name, "#l%drx", edev->ctlrno);
433 	kproc(name, rxproc, edev);
434 	snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
435 	kproc(name, txproc, edev);
436 }
437 
438 static long
ifstat(Ether * edev,void * a,long n,ulong offset)439 ifstat(Ether *edev, void *a, long n, ulong offset)
440 {
441 	int i, l;
442 	char *p;
443 	Ctlr *ctlr;
444 	Vqueue *q;
445 
446 	ctlr = edev->ctlr;
447 
448 	p = smalloc(READSTR);
449 
450 	l = snprint(p, READSTR, "devfeat %4.4luX\n", ctlr->feat);
451 	l += snprint(p+l, READSTR-l, "drvfeat %4.4luX\n", inl(ctlr->port+Qdrvfeat));
452 	l += snprint(p+l, READSTR-l, "devstatus %uX\n", inb(ctlr->port+Qstatus));
453 	if(ctlr->feat & Fstatus)
454 		l += snprint(p+l, READSTR-l, "netstatus %uX\n",  inb(ctlr->port+Qnetstatus));
455 
456 	for(i = 0; i < ctlr->nqueue; i++){
457 		q = &ctlr->queue[i];
458 		l += snprint(p+l, READSTR-l,
459 			"vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
460 			i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
461 	}
462 
463 	n = readstr(offset, a, n, p);
464 	free(p);
465 
466 	return n;
467 }
468 
469 static void
shutdown(Ether * edev)470 shutdown(Ether* edev)
471 {
472 	Ctlr *ctlr = edev->ctlr;
473 	outb(ctlr->port+Qstatus, 0);
474 	pciclrbme(ctlr->pcidev);
475 }
476 
477 static void
promiscuous(void * arg,int on)478 promiscuous(void *arg, int on)
479 {
480 	Ether *edev = arg;
481 	uchar b[1];
482 
483 	b[0] = on != 0;
484 	vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
485 }
486 
487 static void
multicast(void * arg,uchar *,int)488 multicast(void *arg, uchar*, int)
489 {
490 	Ether *edev = arg;
491 	uchar b[1];
492 
493 	b[0] = edev->nmaddr > 0;
494 	vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
495 }
496 
497 /* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
498 static ulong
queuesize(ulong size)499 queuesize(ulong size)
500 {
501 	return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
502 		+ VPGROUND(sizeof(u16int)*3 + VusedSize*size);
503 }
504 
505 static int
initqueue(Vqueue * q,int size)506 initqueue(Vqueue *q, int size)
507 {
508 	uchar *p;
509 
510 	/* §2.4: Queue Size value is always a power of 2 and <= 32768 */
511 	assert(!(size & (size - 1)) && size <= 32768);
512 
513 	p = mallocalign(queuesize(size), VBY2PG, 0, 0);
514 	if(p == nil){
515 		print("ethervirtio: no memory for Vqueue\n");
516 		free(p);
517 		return -1;
518 	}
519 
520 	q->desc = (void*)p;
521 	p += VdescSize*size;
522 	q->avail = (void*)p;
523 	p += VringSize;
524 	q->availent = (void*)p;
525 	p += sizeof(u16int)*size;
526 	q->availevent = (void*)p;
527 	p += sizeof(u16int);
528 
529 	p = (uchar*)VPGROUND((uintptr)p);
530 	q->used = (void*)p;
531 	p += VringSize;
532 	q->usedent = (void*)p;
533 	p += VusedSize*size;
534 	q->usedevent = (void*)p;
535 
536 	q->qsize = size;
537 	q->qmask = q->qsize - 1;
538 
539 	q->lastused = q->avail->idx = q->used->idx = 0;
540 
541 	q->avail->flags |= Rnointerrupt;
542 
543 	return 0;
544 }
545 
546 static Ctlr*
pciprobe(int typ)547 pciprobe(int typ)
548 {
549 	Ctlr *c, *h, *t;
550 	Pcidev *p;
551 	int n, i;
552 
553 	h = t = nil;
554 
555 	/* §4.1.2 PCI Device Discovery */
556 	for(p = nil; p = pcimatch(p, 0x1AF4, 0);){
557 		/* the two possible DIDs for virtio-net */
558 		if(p->did != 0x1000 && p->did != 0x1041)
559 			continue;
560 		/*
561 		 * non-transitional devices will have a revision > 0,
562 		 * these are handled by ethervirtio10 driver.
563 		 */
564 		if(p->rid != 0)
565 			continue;
566 		/* first membar needs to be I/O */
567 		if((p->mem[0].bar & 1) == 0)
568 			continue;
569 		/* non-transitional device will have typ+0x40 */
570 		if(pcicfgr16(p, 0x2E) != typ)
571 			continue;
572 		if((c = mallocz(sizeof(Ctlr), 1)) == nil){
573 			print("ethervirtio: no memory for Ctlr\n");
574 			break;
575 		}
576 		c->port = p->mem[0].bar & ~3;
577 		if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
578 			print("ethervirtio: port %ux in use\n", c->port);
579 			free(c);
580 			continue;
581 		}
582 
583 		c->typ = typ;
584 		c->pcidev = p;
585 		c->id = (p->did<<16)|p->vid;
586 
587 		/* §3.1.2 Legacy Device Initialization */
588 		outb(c->port+Qstatus, 0);
589 		while(inb(c->port+Qstatus) != 0)
590 			delay(1);
591 		outb(c->port+Qstatus, Sacknowledge|Sdriver);
592 
593 		/* negotiate feature bits */
594 		c->feat = inl(c->port+Qdevfeat);
595 		outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
596 
597 		/* §4.1.5.1.4 Virtqueue Configuration */
598 		for(i=0; i<nelem(c->queue); i++){
599 			outs(c->port+Qselect, i);
600 			n = ins(c->port+Qsize);
601 			if(n == 0 || (n & (n-1)) != 0){
602 				if(i < 2)
603 					print("ethervirtio: queue %d has invalid size %d\n", i, n);
604 				break;
605 			}
606 			if(initqueue(&c->queue[i], n) < 0)
607 				break;
608 			coherence();
609 			outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
610 		}
611 		if(i < 2){
612 			print("ethervirtio: no queues\n");
613 			free(c);
614 			continue;
615 		}
616 		c->nqueue = i;
617 
618 		if(h == nil)
619 			h = c;
620 		else
621 			t->next = c;
622 		t = c;
623 	}
624 
625 	return h;
626 }
627 
628 
629 static int
reset(Ether * edev)630 reset(Ether* edev)
631 {
632 	static uchar zeros[Eaddrlen];
633 	Ctlr *ctlr;
634 	int i;
635 
636 	if(ctlrhead == nil)
637 		ctlrhead = pciprobe(1);
638 
639 	for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
640 		if(ctlr->active)
641 			continue;
642 		if(edev->port == 0 || edev->port == ctlr->port){
643 			ctlr->active = 1;
644 			break;
645 		}
646 	}
647 
648 	if(ctlr == nil)
649 		return -1;
650 
651 	edev->ctlr = ctlr;
652 	edev->port = ctlr->port;
653 	edev->irq = ctlr->pcidev->intl;
654 	edev->tbdf = ctlr->pcidev->tbdf;
655 	edev->mbps = 1000;
656 	edev->link = 1;
657 
658 	if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
659 		for(i = 0; i < Eaddrlen; i++)
660 			edev->ea[i] = inb(ctlr->port+Qmac+i);
661 	} else {
662 		for(i = 0; i < Eaddrlen; i++)
663 			outb(ctlr->port+Qmac+i, edev->ea[i]);
664 	}
665 
666 	edev->arg = edev;
667 
668 	edev->attach = attach;
669 	edev->shutdown = shutdown;
670 	edev->ifstat = ifstat;
671 
672 	if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
673 		edev->multicast = multicast;
674 		edev->promiscuous = promiscuous;
675 	}
676 
677 	pcisetbme(ctlr->pcidev);
678 	intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
679 
680 	return 0;
681 }
682 
683 void
ethervirtiolink(void)684 ethervirtiolink(void)
685 {
686 	addethercard("virtio", reset);
687 }
688 
689