1 /*
2 * virtio 1.0 ethernet driver
3 * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
4 *
5 * In contrast to ethervirtio.c, this driver handles the non-legacy
6 * interface for virtio ethernet which uses mmio for all register accesses
7 * and requires a laborate pci capability structure dance to get working.
8 *
9 * It is kind of pointless as it is most likely slower than
10 * port i/o (harder to emulate on the pc platform).
11 *
12 * The reason why this driver is needed it is that vultr set the
13 * disable-legacy=on option in the -device parameter for qemu
14 * on their hypervisor.
15 */
16 #include "u.h"
17 #include "../port/lib.h"
18 #include "mem.h"
19 #include "dat.h"
20 #include "fns.h"
21 #include "io.h"
22 #include "../port/error.h"
23 #include "../port/netif.h"
24 #include "etherif.h"
25
26 typedef struct Vconfig Vconfig;
27 typedef struct Vnetcfg Vnetcfg;
28
29 typedef struct Vring Vring;
30 typedef struct Vdesc Vdesc;
31 typedef struct Vused Vused;
32 typedef struct Vheader Vheader;
33 typedef struct Vqueue Vqueue;
34
35 typedef struct Ctlr Ctlr;
36
37 enum {
38 /* §2.1 Device Status Field */
39 Sacknowledge = 1,
40 Sdriver = 2,
41 Sdriverok = 4,
42 Sfeaturesok = 8,
43 Sfailed = 128,
44
45 /* flags in Qnetstatus */
46 Nlinkup = (1<<0),
47 Nannounce = (1<<1),
48
49 /* feat[0] bits */
50 Fmac = 1<<5,
51 Fstatus = 1<<16,
52 Fctrlvq = 1<<17,
53 Fctrlrx = 1<<18,
54
55 /* feat[1] bits */
56 Fversion1 = 1<<(32-32),
57
58 /* vring used flags */
59 Unonotify = 1,
60 /* vring avail flags */
61 Rnointerrupt = 1,
62
63 /* descriptor flags */
64 Dnext = 1,
65 Dwrite = 2,
66 Dindirect = 4,
67
68 /* struct sizes */
69 VringSize = 4,
70 VdescSize = 16,
71 VusedSize = 8,
72 VheaderSize = 12,
73
74 Vrxq = 0,
75 Vtxq = 1,
76 Vctlq = 2,
77
78 /* class/cmd for Vctlq */
79 CtrlRx = 0x00,
80 CmdPromisc = 0x00,
81 CmdAllmulti = 0x01,
82 CtrlMac = 0x01,
83 CmdMacTableSet = 0x00,
84 CtrlVlan= 0x02,
85 CmdVlanAdd = 0x00,
86 CmdVlanDel = 0x01,
87 };
88
89 struct Vconfig {
90 u32int devfeatsel;
91 u32int devfeat;
92 u32int drvfeatsel;
93 u32int drvfeat;
94
95 u16int msixcfg;
96 u16int nqueues;
97
98 u8int status;
99 u8int cfggen;
100 u16int queuesel;
101
102 u16int queuesize;
103 u16int queuemsixvect;
104
105 u16int queueenable;
106 u16int queuenotifyoff;
107
108 u64int queuedesc;
109 u64int queueavail;
110 u64int queueused;
111 };
112
113 struct Vnetcfg
114 {
115 u16int mac0;
116 u16int mac1;
117 u16int mac2;
118 u16int status;
119 u16int maxqueuepairs;
120 u16int mtu;
121 };
122
123 struct Vring
124 {
125 u16int flags;
126 u16int idx;
127 };
128
129 struct Vdesc
130 {
131 u64int addr;
132 u32int len;
133 u16int flags;
134 u16int next;
135 };
136
137 struct Vused
138 {
139 u32int id;
140 u32int len;
141 };
142
143 struct Vheader
144 {
145 u8int flags;
146 u8int segtype;
147 u16int hlen;
148 u16int seglen;
149 u16int csumstart;
150 u16int csumend;
151 };
152
153 struct Vqueue
154 {
155 Rendez;
156
157 uint qsize;
158 uint qmask;
159
160 Vdesc *desc;
161
162 Vring *avail;
163 u16int *availent;
164 u16int *availevent;
165
166 Vring *used;
167 Vused *usedent;
168 u16int *usedevent;
169 u16int lastused;
170
171 uint nintr;
172 uint nnote;
173
174 /* notify register */
175 void *notify;
176 };
177
178 struct Ctlr {
179 Lock;
180
181 QLock ctllock;
182
183 int attached;
184
185 /* registers */
186 Vconfig *cfg;
187 Vnetcfg *dev;
188 u8int *isr;
189 u8int *notify;
190 u32int notifyoffmult;
191
192 uvlong port;
193 Pcidev *pcidev;
194 Ctlr *next;
195 int active;
196 ulong feat[2];
197 int nqueue;
198
199 /* virtioether has 3 queues: rx, tx and ctl */
200 Vqueue queue[3];
201 };
202
203 static Ctlr *ctlrhead;
204
205 static int
vhasroom(void * v)206 vhasroom(void *v)
207 {
208 Vqueue *q = v;
209 return q->lastused != q->used->idx;
210 }
211
212 static void
vqnotify(Ctlr * ctlr,int x)213 vqnotify(Ctlr *ctlr, int x)
214 {
215 Vqueue *q;
216
217 coherence();
218 q = &ctlr->queue[x];
219 if(q->used->flags & Unonotify)
220 return;
221 q->nnote++;
222 *((u16int*)q->notify) = x;
223 }
224
225 static void
txproc(void * v)226 txproc(void *v)
227 {
228 Vheader *header;
229 Block **blocks;
230 Ether *edev;
231 Ctlr *ctlr;
232 Vqueue *q;
233 Vused *u;
234 Block *b;
235 int i, j;
236
237 edev = v;
238 ctlr = edev->ctlr;
239 q = &ctlr->queue[Vtxq];
240
241 header = smalloc(VheaderSize);
242 blocks = smalloc(sizeof(Block*) * (q->qsize/2));
243
244 for(i = 0; i < q->qsize/2; i++){
245 j = i << 1;
246 q->desc[j].addr = PADDR(header);
247 q->desc[j].len = VheaderSize;
248 q->desc[j].next = j | 1;
249 q->desc[j].flags = Dnext;
250
251 q->availent[i] = q->availent[i + q->qsize/2] = j;
252
253 j |= 1;
254 q->desc[j].next = 0;
255 q->desc[j].flags = 0;
256 }
257
258 q->avail->flags &= ~Rnointerrupt;
259
260 while(waserror())
261 ;
262
263 while((b = qbread(edev->oq, 1000000)) != nil){
264 for(;;){
265 /* retire completed packets */
266 while((i = q->lastused) != q->used->idx){
267 u = &q->usedent[i & q->qmask];
268 i = (u->id & q->qmask) >> 1;
269 if(blocks[i] == nil)
270 break;
271 freeb(blocks[i]);
272 blocks[i] = nil;
273 q->lastused++;
274 }
275
276 /* have free slot? */
277 i = q->avail->idx & (q->qmask >> 1);
278 if(blocks[i] == nil)
279 break;
280
281 /* ring full, wait and retry */
282 if(!vhasroom(q))
283 sleep(q, vhasroom, q);
284 }
285
286 /* slot is free, fill in descriptor */
287 blocks[i] = b;
288 j = (i << 1) | 1;
289 q->desc[j].addr = PADDR(b->rp);
290 q->desc[j].len = BLEN(b);
291 coherence();
292 q->avail->idx++;
293 vqnotify(ctlr, Vtxq);
294 }
295
296 pexit("ether out queue closed", 1);
297 }
298
299 static void
rxproc(void * v)300 rxproc(void *v)
301 {
302 Vheader *header;
303 Block **blocks;
304 Ether *edev;
305 Ctlr *ctlr;
306 Vqueue *q;
307 Vused *u;
308 Block *b;
309 int i, j;
310
311 edev = v;
312 ctlr = edev->ctlr;
313 q = &ctlr->queue[Vrxq];
314
315 header = smalloc(VheaderSize);
316 blocks = smalloc(sizeof(Block*) * (q->qsize/2));
317
318 for(i = 0; i < q->qsize/2; i++){
319 j = i << 1;
320 q->desc[j].addr = PADDR(header);
321 q->desc[j].len = VheaderSize;
322 q->desc[j].next = j | 1;
323 q->desc[j].flags = Dwrite|Dnext;
324
325 q->availent[i] = q->availent[i + q->qsize/2] = j;
326
327 j |= 1;
328 q->desc[j].next = 0;
329 q->desc[j].flags = Dwrite;
330 }
331
332 q->avail->flags &= ~Rnointerrupt;
333
334 while(waserror())
335 ;
336
337 for(;;){
338 /* replenish receive ring */
339 do {
340 i = q->avail->idx & (q->qmask >> 1);
341 if(blocks[i] != nil)
342 break;
343 if((b = iallocb(ETHERMAXTU)) == nil)
344 break;
345 blocks[i] = b;
346 j = (i << 1) | 1;
347 q->desc[j].addr = PADDR(b->rp);
348 q->desc[j].len = BALLOC(b);
349 coherence();
350 q->avail->idx++;
351 } while(q->avail->idx != q->used->idx);
352 vqnotify(ctlr, Vrxq);
353
354 /* wait for any packets to complete */
355 if(!vhasroom(q))
356 sleep(q, vhasroom, q);
357
358 /* retire completed packets */
359 while((i = q->lastused) != q->used->idx) {
360 u = &q->usedent[i & q->qmask];
361 i = (u->id & q->qmask) >> 1;
362 if((b = blocks[i]) == nil)
363 break;
364
365 blocks[i] = nil;
366 b->wp = b->rp + u->len - VheaderSize;
367 etheriq(edev, b, 1);
368 q->lastused++;
369 }
370 }
371 }
372
373 static int
vctlcmd(Ether * edev,uchar class,uchar cmd,uchar * data,int ndata)374 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
375 {
376 uchar hdr[2], ack[1];
377 Ctlr *ctlr;
378 Vqueue *q;
379 Vdesc *d;
380 int i;
381
382 ctlr = edev->ctlr;
383 q = &ctlr->queue[Vctlq];
384 if(q->qsize < 3)
385 return -1;
386
387 qlock(&ctlr->ctllock);
388 while(waserror())
389 ;
390
391 ack[0] = 0x55;
392 hdr[0] = class;
393 hdr[1] = cmd;
394
395 d = &q->desc[0];
396 d->addr = PADDR(hdr);
397 d->len = sizeof(hdr);
398 d->next = 1;
399 d->flags = Dnext;
400 d++;
401 d->addr = PADDR(data);
402 d->len = ndata;
403 d->next = 2;
404 d->flags = Dnext;
405 d++;
406 d->addr = PADDR(ack);
407 d->len = sizeof(ack);
408 d->next = 0;
409 d->flags = Dwrite;
410
411 i = q->avail->idx & q->qmask;
412 q->availent[i] = 0;
413 coherence();
414
415 q->avail->flags &= ~Rnointerrupt;
416 q->avail->idx++;
417 vqnotify(ctlr, Vctlq);
418 while(!vhasroom(q))
419 sleep(q, vhasroom, q);
420 q->lastused = q->used->idx;
421 q->avail->flags |= Rnointerrupt;
422
423 qunlock(&ctlr->ctllock);
424 poperror();
425
426 if(ack[0] != 0)
427 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
428
429 return ack[0];
430 }
431
432 static void
interrupt(Ureg *,void * arg)433 interrupt(Ureg*, void* arg)
434 {
435 Ether *edev;
436 Ctlr *ctlr;
437 Vqueue *q;
438 int i;
439
440 edev = arg;
441 ctlr = edev->ctlr;
442 if(*ctlr->isr & 1){
443 for(i = 0; i < ctlr->nqueue; i++){
444 q = &ctlr->queue[i];
445 if(vhasroom(q)){
446 q->nintr++;
447 wakeup(q);
448 }
449 }
450 }
451 }
452
453 static void
attach(Ether * edev)454 attach(Ether* edev)
455 {
456 char name[KNAMELEN];
457 Ctlr* ctlr;
458 int i;
459
460 ctlr = edev->ctlr;
461 ilock(ctlr);
462 if(ctlr->attached){
463 iunlock(ctlr);
464 return;
465 }
466 ctlr->attached = 1;
467
468 /* enable the queues */
469 for(i = 0; i < ctlr->nqueue; i++){
470 ctlr->cfg->queuesel = i;
471 ctlr->cfg->queueenable = 1;
472 }
473
474 /* driver is ready */
475 ctlr->cfg->status |= Sdriverok;
476
477 iunlock(ctlr);
478
479 /* start kprocs */
480 snprint(name, sizeof name, "#l%drx", edev->ctlrno);
481 kproc(name, rxproc, edev);
482 snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
483 kproc(name, txproc, edev);
484 }
485
486 static long
ifstat(Ether * edev,void * a,long n,ulong offset)487 ifstat(Ether *edev, void *a, long n, ulong offset)
488 {
489 int i, l;
490 char *p;
491 Ctlr *ctlr;
492 Vqueue *q;
493
494 ctlr = edev->ctlr;
495
496 p = smalloc(READSTR);
497
498 l = snprint(p, READSTR, "devfeat %32.32luX %32.32luX\n", ctlr->feat[1], ctlr->feat[0]);
499 l += snprint(p+l, READSTR-l, "devstatus %8.8uX\n", ctlr->cfg->status);
500
501 for(i = 0; i < ctlr->nqueue; i++){
502 q = &ctlr->queue[i];
503 l += snprint(p+l, READSTR-l,
504 "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
505 i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
506 }
507
508 n = readstr(offset, a, n, p);
509 free(p);
510
511 return n;
512 }
513
514 static void
shutdown(Ether * edev)515 shutdown(Ether* edev)
516 {
517 Ctlr *ctlr = edev->ctlr;
518
519 coherence();
520 ctlr->cfg->status = 0;
521 coherence();
522
523 pciclrbme(ctlr->pcidev);
524 }
525
526 static void
promiscuous(void * arg,int on)527 promiscuous(void *arg, int on)
528 {
529 Ether *edev = arg;
530 uchar b[1];
531
532 b[0] = on != 0;
533 vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
534 }
535
536 static void
multicast(void * arg,uchar *,int)537 multicast(void *arg, uchar*, int)
538 {
539 Ether *edev = arg;
540 uchar b[1];
541
542 b[0] = edev->nmaddr > 0;
543 vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
544 }
545
546 static int
initqueue(Vqueue * q,int size)547 initqueue(Vqueue *q, int size)
548 {
549 uchar *p;
550
551 q->desc = mallocalign(VdescSize*size, 16, 0, 0);
552 if(q->desc == nil)
553 return -1;
554 p = mallocalign(VringSize + 2*size + 2, 2, 0, 0);
555 if(p == nil){
556 FreeDesc:
557 free(q->desc);
558 q->desc = nil;
559 return -1;
560 }
561 q->avail = (void*)p;
562 p += VringSize;
563 q->availent = (void*)p;
564 p += sizeof(u16int)*size;
565 q->availevent = (void*)p;
566 p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0);
567 if(p == nil){
568 free(q->avail);
569 q->avail = nil;
570 goto FreeDesc;
571 }
572 q->used = (void*)p;
573 p += VringSize;
574 q->usedent = (void*)p;
575 p += VusedSize*size;
576 q->usedevent = (void*)p;
577
578 q->qsize = size;
579 q->qmask = q->qsize - 1;
580
581 q->lastused = q->avail->idx = q->used->idx = 0;
582
583 q->avail->flags |= Rnointerrupt;
584
585 return 0;
586 }
587
588 static int
matchvirtiocfgcap(Pcidev * p,int cap,int off,int typ)589 matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
590 {
591 int bar;
592
593 if(cap != 9 || pcicfgr8(p, off+3) != typ)
594 return 1;
595
596 /* skip invalid or non memory bars */
597 bar = pcicfgr8(p, off+4);
598 if(bar < 0 || bar >= nelem(p->mem)
599 || p->mem[bar].size == 0
600 || (p->mem[bar].bar & 3) != 0)
601 return 1;
602
603 return 0;
604 }
605
606 static int
virtiocap(Pcidev * p,int typ)607 virtiocap(Pcidev *p, int typ)
608 {
609 return pcienumcaps(p, matchvirtiocfgcap, typ);
610 }
611
612 static void*
virtiomapregs(Pcidev * p,int cap,int size)613 virtiomapregs(Pcidev *p, int cap, int size)
614 {
615 int bar, len;
616 uvlong addr;
617
618 if(cap < 0)
619 return nil;
620 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
621 addr = pcicfgr32(p, cap+8);
622 len = pcicfgr32(p, cap+12);
623 if(size <= 0)
624 size = len;
625 else if(len < size)
626 return nil;
627 if(addr+len > p->mem[bar].size)
628 return nil;
629 addr += p->mem[bar].bar & ~0xFULL;
630 return vmap(addr, size);
631 }
632
633 static Ctlr*
pciprobe(void)634 pciprobe(void)
635 {
636 Ctlr *c, *h, *t;
637 Pcidev *p;
638 Vconfig *cfg;
639 int bar, cap, n, i;
640
641 h = t = nil;
642
643 /* §4.1.2 PCI Device Discovery */
644 for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){
645 /* non-transitional devices will have a revision > 0 */
646 if(p->rid == 0)
647 continue;
648 if((cap = virtiocap(p, 1)) < 0)
649 continue;
650 bar = pcicfgr8(p, cap+4) % nelem(p->mem);
651 cfg = virtiomapregs(p, cap, sizeof(Vconfig));
652 if(cfg == nil)
653 continue;
654 if((c = mallocz(sizeof(Ctlr), 1)) == nil){
655 print("ethervirtio: no memory for Ctlr\n");
656 break;
657 }
658 c->cfg = cfg;
659 c->pcidev = p;
660 c->port = p->mem[bar].bar & ~0xFULL;
661
662 c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg));
663 if(c->dev == nil)
664 goto Baddev;
665 c->isr = virtiomapregs(p, virtiocap(p, 3), 0);
666 if(c->isr == nil)
667 goto Baddev;
668 cap = virtiocap(p, 2);
669 c->notify = virtiomapregs(p, cap, 0);
670 if(c->notify == nil)
671 goto Baddev;
672 c->notifyoffmult = pcicfgr32(p, cap+16);
673
674 /* device reset */
675 coherence();
676 cfg->status = 0;
677 while(cfg->status != 0)
678 delay(1);
679 cfg->status = Sacknowledge|Sdriver;
680
681 /* negotiate feature bits */
682 cfg->devfeatsel = 1;
683 c->feat[1] = cfg->devfeat;
684
685 cfg->devfeatsel = 0;
686 c->feat[0] = cfg->devfeat;
687
688 cfg->drvfeatsel = 1;
689 cfg->drvfeat = c->feat[1] & Fversion1;
690
691 cfg->drvfeatsel = 0;
692 cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx);
693
694 cfg->status |= Sfeaturesok;
695
696 for(i=0; i<nelem(c->queue); i++){
697 cfg->queuesel = i;
698 n = cfg->queuesize;
699 if(n == 0 || (n & (n-1)) != 0){
700 if(i < 2)
701 print("ethervirtio: queue %d has invalid size %d\n", i, n);
702 break;
703 }
704 if(initqueue(&c->queue[i], n) < 0)
705 break;
706 c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff;
707 coherence();
708 cfg->queuedesc = PADDR(c->queue[i].desc);
709 cfg->queueavail = PADDR(c->queue[i].avail);
710 cfg->queueused = PADDR(c->queue[i].used);
711 }
712 if(i < 2){
713 print("ethervirtio: no queues\n");
714 Baddev:
715 /* TODO, vunmap */
716 free(c);
717 continue;
718 }
719 c->nqueue = i;
720
721 if(h == nil)
722 h = c;
723 else
724 t->next = c;
725 t = c;
726 }
727
728 return h;
729 }
730
731
732 static int
reset(Ether * edev)733 reset(Ether* edev)
734 {
735 static uchar zeros[Eaddrlen];
736 Ctlr *ctlr;
737 int i;
738
739 if(ctlrhead == nil)
740 ctlrhead = pciprobe();
741
742 for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
743 if(ctlr->active)
744 continue;
745 if(edev->port == 0 || edev->port == ctlr->port){
746 ctlr->active = 1;
747 break;
748 }
749 }
750
751 if(ctlr == nil)
752 return -1;
753
754 edev->ctlr = ctlr;
755 edev->port = ctlr->port;
756 edev->irq = ctlr->pcidev->intl;
757 edev->tbdf = ctlr->pcidev->tbdf;
758 edev->mbps = 1000;
759 edev->link = 1;
760
761 if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
762 for(i = 0; i < Eaddrlen; i++)
763 edev->ea[i] = ((uchar*)ctlr->dev)[i];
764 } else {
765 for(i = 0; i < Eaddrlen; i++)
766 ((uchar*)ctlr->dev)[i] = edev->ea[i];
767 }
768
769 edev->arg = edev;
770
771 edev->attach = attach;
772 edev->shutdown = shutdown;
773 edev->ifstat = ifstat;
774
775 if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
776 edev->multicast = multicast;
777 edev->promiscuous = promiscuous;
778 }
779
780 pcisetbme(ctlr->pcidev);
781 intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
782
783 return 0;
784 }
785
786 void
ethervirtio10link(void)787 ethervirtio10link(void)
788 {
789 addethercard("virtio10", reset);
790 }
791