1 /*
2 * virtio ethernet driver implementing the legacy interface:
3 * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
4 */
5 #include "u.h"
6 #include "../port/lib.h"
7 #include "mem.h"
8 #include "dat.h"
9 #include "fns.h"
10 #include "io.h"
11 #include "../port/error.h"
12 #include "../port/netif.h"
13 #include "etherif.h"
14
15 typedef struct Vring Vring;
16 typedef struct Vdesc Vdesc;
17 typedef struct Vused Vused;
18 typedef struct Vheader Vheader;
19 typedef struct Vqueue Vqueue;
20 typedef struct Ctlr Ctlr;
21
22 enum {
23 /* §2.1 Device Status Field */
24 Sacknowledge = 1,
25 Sdriver = 2,
26 Sdriverok = 4,
27 Sfeatureok = 8,
28 Sfailed = 128,
29
30 /* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
31 Qdevfeat = 0,
32 Qdrvfeat = 4,
33 Qaddr = 8,
34 Qsize = 12,
35 Qselect = 14,
36 Qnotify = 16,
37 Qstatus = 18,
38 Qisr = 19,
39 Qmac = 20,
40 Qnetstatus = 26,
41
42 /* flags in Qnetstatus */
43 Nlinkup = (1<<0),
44 Nannounce = (1<<1),
45
46 /* feature bits */
47 Fmac = (1<<5),
48 Fstatus = (1<<16),
49 Fctrlvq = (1<<17),
50 Fctrlrx = (1<<18),
51
52 /* vring used flags */
53 Unonotify = 1,
54 /* vring avail flags */
55 Rnointerrupt = 1,
56
57 /* descriptor flags */
58 Dnext = 1,
59 Dwrite = 2,
60 Dindirect = 4,
61
62 /* struct sizes */
63 VringSize = 4,
64 VdescSize = 16,
65 VusedSize = 8,
66 VheaderSize = 10,
67
68 /* §4.1.5.1.4.1 says pages are 4096 bytes
69 * for the purposes of the driver.
70 */
71 VBY2PG = 4096,
72 #define VPGROUND(s) ROUNDUP(s, VBY2PG)
73
74 Vrxq = 0,
75 Vtxq = 1,
76 Vctlq = 2,
77
78 /* class/cmd for Vctlq */
79 CtrlRx = 0x00,
80 CmdPromisc = 0x00,
81 CmdAllmulti = 0x01,
82 CtrlMac = 0x01,
83 CmdMacTableSet = 0x00,
84 CtrlVlan= 0x02,
85 CmdVlanAdd = 0x00,
86 CmdVlanDel = 0x01,
87 };
88
89 struct Vring
90 {
91 u16int flags;
92 u16int idx;
93 };
94
95 struct Vdesc
96 {
97 u64int addr;
98 u32int len;
99 u16int flags;
100 u16int next;
101 };
102
103 struct Vused
104 {
105 u32int id;
106 u32int len;
107 };
108
109 struct Vheader
110 {
111 u8int flags;
112 u8int segtype;
113 u16int hlen;
114 u16int seglen;
115 u16int csumstart;
116 u16int csumend;
117 };
118
119 /* §2.4 Virtqueues */
120 struct Vqueue
121 {
122 Rendez;
123
124 uint qsize;
125 uint qmask;
126
127 Vdesc *desc;
128
129 Vring *avail;
130 u16int *availent;
131 u16int *availevent;
132
133 Vring *used;
134 Vused *usedent;
135 u16int *usedevent;
136 u16int lastused;
137
138 uint nintr;
139 uint nnote;
140 };
141
142 struct Ctlr {
143 Lock;
144
145 QLock ctllock;
146
147 int attached;
148
149 int port;
150 Pcidev *pcidev;
151 Ctlr *next;
152 int active;
153 int id;
154 int typ;
155 ulong feat;
156 int nqueue;
157
158 /* virtioether has 3 queues: rx, tx and ctl */
159 Vqueue queue[3];
160 };
161
162 static Ctlr *ctlrhead;
163
164 static int
vhasroom(void * v)165 vhasroom(void *v)
166 {
167 Vqueue *q = v;
168 return q->lastused != q->used->idx;
169 }
170
171 static void
vqnotify(Ctlr * ctlr,int x)172 vqnotify(Ctlr *ctlr, int x)
173 {
174 Vqueue *q;
175
176 coherence();
177 q = &ctlr->queue[x];
178 if(q->used->flags & Unonotify)
179 return;
180 q->nnote++;
181 outs(ctlr->port+Qnotify, x);
182 }
183
184 static void
txproc(void * v)185 txproc(void *v)
186 {
187 Vheader *header;
188 Block **blocks;
189 Ether *edev;
190 Ctlr *ctlr;
191 Vqueue *q;
192 Vused *u;
193 Block *b;
194 int i, j;
195
196 edev = v;
197 ctlr = edev->ctlr;
198 q = &ctlr->queue[Vtxq];
199
200 header = smalloc(VheaderSize);
201 blocks = smalloc(sizeof(Block*) * (q->qsize/2));
202
203 for(i = 0; i < q->qsize/2; i++){
204 j = i << 1;
205 q->desc[j].addr = PADDR(header);
206 q->desc[j].len = VheaderSize;
207 q->desc[j].next = j | 1;
208 q->desc[j].flags = Dnext;
209
210 q->availent[i] = q->availent[i + q->qsize/2] = j;
211
212 j |= 1;
213 q->desc[j].next = 0;
214 q->desc[j].flags = 0;
215 }
216
217 q->avail->flags &= ~Rnointerrupt;
218
219 while(waserror())
220 ;
221
222 while((b = qbread(edev->oq, 1000000)) != nil){
223 for(;;){
224 /* retire completed packets */
225 while((i = q->lastused) != q->used->idx){
226 u = &q->usedent[i & q->qmask];
227 i = (u->id & q->qmask) >> 1;
228 if(blocks[i] == nil)
229 break;
230 freeb(blocks[i]);
231 blocks[i] = nil;
232 q->lastused++;
233 }
234
235 /* have free slot? */
236 i = q->avail->idx & (q->qmask >> 1);
237 if(blocks[i] == nil)
238 break;
239
240 /* ring full, wait and retry */
241 if(!vhasroom(q))
242 sleep(q, vhasroom, q);
243 }
244
245 /* slot is free, fill in descriptor */
246 blocks[i] = b;
247 j = (i << 1) | 1;
248 q->desc[j].addr = PADDR(b->rp);
249 q->desc[j].len = BLEN(b);
250 coherence();
251 q->avail->idx++;
252 vqnotify(ctlr, Vtxq);
253 }
254
255 pexit("ether out queue closed", 1);
256 }
257
258 static void
rxproc(void * v)259 rxproc(void *v)
260 {
261 Vheader *header;
262 Block **blocks;
263 Ether *edev;
264 Ctlr *ctlr;
265 Vqueue *q;
266 Vused *u;
267 Block *b;
268 int i, j;
269
270 edev = v;
271 ctlr = edev->ctlr;
272 q = &ctlr->queue[Vrxq];
273
274 header = smalloc(VheaderSize);
275 blocks = smalloc(sizeof(Block*) * (q->qsize/2));
276
277 for(i = 0; i < q->qsize/2; i++){
278 j = i << 1;
279 q->desc[j].addr = PADDR(header);
280 q->desc[j].len = VheaderSize;
281 q->desc[j].next = j | 1;
282 q->desc[j].flags = Dwrite|Dnext;
283
284 q->availent[i] = q->availent[i + q->qsize/2] = j;
285
286 j |= 1;
287 q->desc[j].next = 0;
288 q->desc[j].flags = Dwrite;
289 }
290
291 q->avail->flags &= ~Rnointerrupt;
292
293 while(waserror())
294 ;
295
296 for(;;){
297 /* replenish receive ring */
298 do {
299 i = q->avail->idx & (q->qmask >> 1);
300 if(blocks[i] != nil)
301 break;
302 if((b = iallocb(ETHERMAXTU)) == nil)
303 break;
304 blocks[i] = b;
305 j = (i << 1) | 1;
306 q->desc[j].addr = PADDR(b->rp);
307 q->desc[j].len = BALLOC(b);
308 coherence();
309 q->avail->idx++;
310 } while(q->avail->idx != q->used->idx);
311 vqnotify(ctlr, Vrxq);
312
313 /* wait for any packets to complete */
314 if(!vhasroom(q))
315 sleep(q, vhasroom, q);
316
317 /* retire completed packets */
318 while((i = q->lastused) != q->used->idx) {
319 u = &q->usedent[i & q->qmask];
320 i = (u->id & q->qmask) >> 1;
321 if((b = blocks[i]) == nil)
322 break;
323
324 blocks[i] = nil;
325
326 b->wp = b->rp + u->len - VheaderSize;
327 etheriq(edev, b, 1);
328 q->lastused++;
329 }
330 }
331 }
332
333 static int
vctlcmd(Ether * edev,uchar class,uchar cmd,uchar * data,int ndata)334 vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
335 {
336 uchar hdr[2], ack[1];
337 Ctlr *ctlr;
338 Vqueue *q;
339 Vdesc *d;
340 int i;
341
342 ctlr = edev->ctlr;
343 q = &ctlr->queue[Vctlq];
344 if(q->qsize < 3)
345 return -1;
346
347 qlock(&ctlr->ctllock);
348 while(waserror())
349 ;
350
351 ack[0] = 0x55;
352 hdr[0] = class;
353 hdr[1] = cmd;
354
355 d = &q->desc[0];
356 d->addr = PADDR(hdr);
357 d->len = sizeof(hdr);
358 d->next = 1;
359 d->flags = Dnext;
360 d++;
361 d->addr = PADDR(data);
362 d->len = ndata;
363 d->next = 2;
364 d->flags = Dnext;
365 d++;
366 d->addr = PADDR(ack);
367 d->len = sizeof(ack);
368 d->next = 0;
369 d->flags = Dwrite;
370
371 i = q->avail->idx & q->qmask;
372 q->availent[i] = 0;
373 coherence();
374
375 q->avail->flags &= ~Rnointerrupt;
376 q->avail->idx++;
377 vqnotify(ctlr, Vctlq);
378 while(!vhasroom(q))
379 sleep(q, vhasroom, q);
380 q->lastused = q->used->idx;
381 q->avail->flags |= Rnointerrupt;
382
383 qunlock(&ctlr->ctllock);
384 poperror();
385
386 if(ack[0] != 0)
387 print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
388
389 return ack[0];
390 }
391
392 static void
interrupt(Ureg *,void * arg)393 interrupt(Ureg*, void* arg)
394 {
395 Ether *edev;
396 Ctlr *ctlr;
397 Vqueue *q;
398 int i;
399
400 edev = arg;
401 ctlr = edev->ctlr;
402 if(inb(ctlr->port+Qisr) & 1){
403 for(i = 0; i < ctlr->nqueue; i++){
404 q = &ctlr->queue[i];
405 if(vhasroom(q)){
406 q->nintr++;
407 wakeup(q);
408 }
409 }
410 }
411 }
412
413 static void
attach(Ether * edev)414 attach(Ether* edev)
415 {
416 char name[KNAMELEN];
417 Ctlr* ctlr;
418
419 ctlr = edev->ctlr;
420 lock(ctlr);
421 if(ctlr->attached){
422 unlock(ctlr);
423 return;
424 }
425 ctlr->attached = 1;
426 unlock(ctlr);
427
428 /* ready to go */
429 outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
430
431 /* start kprocs */
432 snprint(name, sizeof name, "#l%drx", edev->ctlrno);
433 kproc(name, rxproc, edev);
434 snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
435 kproc(name, txproc, edev);
436 }
437
438 static long
ifstat(Ether * edev,void * a,long n,ulong offset)439 ifstat(Ether *edev, void *a, long n, ulong offset)
440 {
441 int i, l;
442 char *p;
443 Ctlr *ctlr;
444 Vqueue *q;
445
446 ctlr = edev->ctlr;
447
448 p = smalloc(READSTR);
449
450 l = snprint(p, READSTR, "devfeat %4.4luX\n", ctlr->feat);
451 l += snprint(p+l, READSTR-l, "drvfeat %4.4luX\n", inl(ctlr->port+Qdrvfeat));
452 l += snprint(p+l, READSTR-l, "devstatus %uX\n", inb(ctlr->port+Qstatus));
453 if(ctlr->feat & Fstatus)
454 l += snprint(p+l, READSTR-l, "netstatus %uX\n", inb(ctlr->port+Qnetstatus));
455
456 for(i = 0; i < ctlr->nqueue; i++){
457 q = &ctlr->queue[i];
458 l += snprint(p+l, READSTR-l,
459 "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
460 i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
461 }
462
463 n = readstr(offset, a, n, p);
464 free(p);
465
466 return n;
467 }
468
469 static void
shutdown(Ether * edev)470 shutdown(Ether* edev)
471 {
472 Ctlr *ctlr = edev->ctlr;
473 outb(ctlr->port+Qstatus, 0);
474 pciclrbme(ctlr->pcidev);
475 }
476
477 static void
promiscuous(void * arg,int on)478 promiscuous(void *arg, int on)
479 {
480 Ether *edev = arg;
481 uchar b[1];
482
483 b[0] = on != 0;
484 vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
485 }
486
487 static void
multicast(void * arg,uchar *,int)488 multicast(void *arg, uchar*, int)
489 {
490 Ether *edev = arg;
491 uchar b[1];
492
493 b[0] = edev->nmaddr > 0;
494 vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
495 }
496
497 /* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
498 static ulong
queuesize(ulong size)499 queuesize(ulong size)
500 {
501 return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
502 + VPGROUND(sizeof(u16int)*3 + VusedSize*size);
503 }
504
505 static int
initqueue(Vqueue * q,int size)506 initqueue(Vqueue *q, int size)
507 {
508 uchar *p;
509
510 /* §2.4: Queue Size value is always a power of 2 and <= 32768 */
511 assert(!(size & (size - 1)) && size <= 32768);
512
513 p = mallocalign(queuesize(size), VBY2PG, 0, 0);
514 if(p == nil){
515 print("ethervirtio: no memory for Vqueue\n");
516 free(p);
517 return -1;
518 }
519
520 q->desc = (void*)p;
521 p += VdescSize*size;
522 q->avail = (void*)p;
523 p += VringSize;
524 q->availent = (void*)p;
525 p += sizeof(u16int)*size;
526 q->availevent = (void*)p;
527 p += sizeof(u16int);
528
529 p = (uchar*)VPGROUND((uintptr)p);
530 q->used = (void*)p;
531 p += VringSize;
532 q->usedent = (void*)p;
533 p += VusedSize*size;
534 q->usedevent = (void*)p;
535
536 q->qsize = size;
537 q->qmask = q->qsize - 1;
538
539 q->lastused = q->avail->idx = q->used->idx = 0;
540
541 q->avail->flags |= Rnointerrupt;
542
543 return 0;
544 }
545
546 static Ctlr*
pciprobe(int typ)547 pciprobe(int typ)
548 {
549 Ctlr *c, *h, *t;
550 Pcidev *p;
551 int n, i;
552
553 h = t = nil;
554
555 /* §4.1.2 PCI Device Discovery */
556 for(p = nil; p = pcimatch(p, 0x1AF4, 0);){
557 /* the two possible DIDs for virtio-net */
558 if(p->did != 0x1000 && p->did != 0x1041)
559 continue;
560 /*
561 * non-transitional devices will have a revision > 0,
562 * these are handled by ethervirtio10 driver.
563 */
564 if(p->rid != 0)
565 continue;
566 /* first membar needs to be I/O */
567 if((p->mem[0].bar & 1) == 0)
568 continue;
569 /* non-transitional device will have typ+0x40 */
570 if(pcicfgr16(p, 0x2E) != typ)
571 continue;
572 if((c = mallocz(sizeof(Ctlr), 1)) == nil){
573 print("ethervirtio: no memory for Ctlr\n");
574 break;
575 }
576 c->port = p->mem[0].bar & ~3;
577 if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
578 print("ethervirtio: port %ux in use\n", c->port);
579 free(c);
580 continue;
581 }
582
583 c->typ = typ;
584 c->pcidev = p;
585 c->id = (p->did<<16)|p->vid;
586
587 /* §3.1.2 Legacy Device Initialization */
588 outb(c->port+Qstatus, 0);
589 while(inb(c->port+Qstatus) != 0)
590 delay(1);
591 outb(c->port+Qstatus, Sacknowledge|Sdriver);
592
593 /* negotiate feature bits */
594 c->feat = inl(c->port+Qdevfeat);
595 outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
596
597 /* §4.1.5.1.4 Virtqueue Configuration */
598 for(i=0; i<nelem(c->queue); i++){
599 outs(c->port+Qselect, i);
600 n = ins(c->port+Qsize);
601 if(n == 0 || (n & (n-1)) != 0){
602 if(i < 2)
603 print("ethervirtio: queue %d has invalid size %d\n", i, n);
604 break;
605 }
606 if(initqueue(&c->queue[i], n) < 0)
607 break;
608 coherence();
609 outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
610 }
611 if(i < 2){
612 print("ethervirtio: no queues\n");
613 free(c);
614 continue;
615 }
616 c->nqueue = i;
617
618 if(h == nil)
619 h = c;
620 else
621 t->next = c;
622 t = c;
623 }
624
625 return h;
626 }
627
628
629 static int
reset(Ether * edev)630 reset(Ether* edev)
631 {
632 static uchar zeros[Eaddrlen];
633 Ctlr *ctlr;
634 int i;
635
636 if(ctlrhead == nil)
637 ctlrhead = pciprobe(1);
638
639 for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
640 if(ctlr->active)
641 continue;
642 if(edev->port == 0 || edev->port == ctlr->port){
643 ctlr->active = 1;
644 break;
645 }
646 }
647
648 if(ctlr == nil)
649 return -1;
650
651 edev->ctlr = ctlr;
652 edev->port = ctlr->port;
653 edev->irq = ctlr->pcidev->intl;
654 edev->tbdf = ctlr->pcidev->tbdf;
655 edev->mbps = 1000;
656 edev->link = 1;
657
658 if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
659 for(i = 0; i < Eaddrlen; i++)
660 edev->ea[i] = inb(ctlr->port+Qmac+i);
661 } else {
662 for(i = 0; i < Eaddrlen; i++)
663 outb(ctlr->port+Qmac+i, edev->ea[i]);
664 }
665
666 edev->arg = edev;
667
668 edev->attach = attach;
669 edev->shutdown = shutdown;
670 edev->ifstat = ifstat;
671
672 if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
673 edev->multicast = multicast;
674 edev->promiscuous = promiscuous;
675 }
676
677 pcisetbme(ctlr->pcidev);
678 intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
679
680 return 0;
681 }
682
683 void
ethervirtiolink(void)684 ethervirtiolink(void)
685 {
686 addethercard("virtio", reset);
687 }
688
689