1 /* $NetBSD: viomb.c,v 1.17 2023/03/25 11:04:34 mlelstv Exp $ */
2
3 /*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.17 2023/03/25 11:04:34 mlelstv Exp $");
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/condvar.h>
36 #include <sys/device.h>
37 #include <sys/kthread.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
40 #include <uvm/uvm_page.h>
41 #include <sys/module.h>
42
43 #include <dev/pci/virtioreg.h>
44 #include <dev/pci/virtiovar.h>
45
46 #include "ioconf.h"
47
48 /* Configuration registers */
49 #define VIRTIO_BALLOON_CONFIG_NUM_PAGES 0 /* 32bit */
50 #define VIRTIO_BALLOON_CONFIG_ACTUAL 4 /* 32bit */
51
52 /* Feature bits */
53 #define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
54 #define VIRTIO_BALLOON_F_STATS_VQ (1<<1)
55
56 #define VIRTIO_BALLOON_FLAG_BITS \
57 VIRTIO_COMMON_FLAG_BITS \
58 "b\x01" "STATS_VQ\0" \
59 "b\x00" "MUST_TELL_HOST\0"
60
61 #define PGS_PER_REQ (256) /* 1MB, 4KB/page */
62 #define VQ_INFLATE 0
63 #define VQ_DEFLATE 1
64
65
66 CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
67
68 struct balloon_req {
69 bus_dmamap_t bl_dmamap;
70 struct pglist bl_pglist;
71 int bl_nentries;
72 uint32_t bl_pages[PGS_PER_REQ];
73 };
74
75 struct viomb_softc {
76 device_t sc_dev;
77
78 struct virtio_softc *sc_virtio;
79 struct virtqueue sc_vq[2];
80
81 unsigned int sc_npages;
82 unsigned int sc_actual;
83 int sc_inflight;
84 struct balloon_req sc_req;
85 struct pglist sc_balloon_pages;
86
87 int sc_inflate_done;
88 int sc_deflate_done;
89
90 kcondvar_t sc_wait;
91 kmutex_t sc_waitlock;
92 };
93
94 static int balloon_initialized = 0; /* multiple balloon is not allowed */
95
96 static int viomb_match(device_t, cfdata_t, void *);
97 static void viomb_attach(device_t, device_t, void *);
98 static void viomb_read_config(struct viomb_softc *);
99 static int viomb_config_change(struct virtio_softc *);
100 static int inflate(struct viomb_softc *);
101 static int inflateq_done(struct virtqueue *);
102 static int inflate_done(struct viomb_softc *);
103 static int deflate(struct viomb_softc *);
104 static int deflateq_done(struct virtqueue *);
105 static int deflate_done(struct viomb_softc *);
106 static void viomb_thread(void *);
107
108 CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
109 viomb_match, viomb_attach, NULL, NULL);
110
111 static int
viomb_match(device_t parent,cfdata_t match,void * aux)112 viomb_match(device_t parent, cfdata_t match, void *aux)
113 {
114 struct virtio_attach_args *va = aux;
115
116 if (va->sc_childdevid == VIRTIO_DEVICE_ID_BALLOON)
117 return 1;
118
119 return 0;
120 }
121
122 static void
viomb_attach(device_t parent,device_t self,void * aux)123 viomb_attach(device_t parent, device_t self, void *aux)
124 {
125 struct viomb_softc *sc = device_private(self);
126 struct virtio_softc *vsc = device_private(parent);
127 const struct sysctlnode *node;
128 uint64_t features;
129
130 if (virtio_child(vsc) != NULL) {
131 aprint_normal(": child already attached for %s; "
132 "something wrong...\n", device_xname(parent));
133 return;
134 }
135
136 if (balloon_initialized++) {
137 aprint_normal(": balloon already exists; something wrong...\n");
138 return;
139 }
140
141 /* fail on non-4K page size archs */
142 if (VIRTIO_PAGE_SIZE != PAGE_SIZE){
143 aprint_normal("non-4K page size arch found, needs %d, got %d\n",
144 VIRTIO_PAGE_SIZE, PAGE_SIZE);
145 return;
146 }
147
148 sc->sc_dev = self;
149 sc->sc_virtio = vsc;
150
151 virtio_child_attach_start(vsc, self, IPL_VM,
152 VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_FLAG_BITS);
153
154 features = virtio_features(vsc);
155 if (features == 0)
156 goto err_none;
157
158 viomb_read_config(sc);
159 sc->sc_inflight = 0;
160 TAILQ_INIT(&sc->sc_balloon_pages);
161
162 sc->sc_inflate_done = sc->sc_deflate_done = 0;
163 mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
164 cv_init(&sc->sc_wait, "balloon");
165
166 virtio_init_vq_vqdone(vsc, &sc->sc_vq[VQ_INFLATE], VQ_INFLATE,
167 inflateq_done);
168 virtio_init_vq_vqdone(vsc, &sc->sc_vq[VQ_DEFLATE], VQ_DEFLATE,
169 deflateq_done);
170
171 if (virtio_alloc_vq(vsc, &sc->sc_vq[VQ_INFLATE],
172 sizeof(uint32_t)*PGS_PER_REQ, 1,
173 "inflate") != 0)
174 goto err_mutex;
175 if (virtio_alloc_vq(vsc, &sc->sc_vq[VQ_DEFLATE],
176 sizeof(uint32_t)*PGS_PER_REQ, 1,
177 "deflate") != 0)
178 goto err_vq0;
179
180 if (bus_dmamap_create(virtio_dmat(vsc), sizeof(uint32_t)*PGS_PER_REQ,
181 1, sizeof(uint32_t)*PGS_PER_REQ, 0,
182 BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
183 aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
184 goto err_vq;
185 }
186 if (bus_dmamap_load(virtio_dmat(vsc), sc->sc_req.bl_dmamap,
187 &sc->sc_req.bl_pages[0],
188 sizeof(uint32_t) * PGS_PER_REQ,
189 NULL, BUS_DMA_NOWAIT)) {
190 aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
191 goto err_dmamap;
192 }
193
194 if (virtio_child_attach_finish(vsc, sc->sc_vq, __arraycount(sc->sc_vq),
195 viomb_config_change, VIRTIO_F_INTR_MPSAFE) != 0)
196 goto err_out;
197
198 if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
199 viomb_thread, sc, NULL, "viomb")) {
200 aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
201 goto err_out;
202 }
203
204 sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
205 "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
206 NULL, 0, NULL, 0,
207 CTL_HW, CTL_CREATE, CTL_EOL);
208 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
209 "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
210 NULL, 0, &sc->sc_npages, 0,
211 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
212 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
213 "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
214 NULL, 0, &sc->sc_actual, 0,
215 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
216 return;
217
218 err_out:
219 err_dmamap:
220 bus_dmamap_destroy(virtio_dmat(vsc), sc->sc_req.bl_dmamap);
221 err_vq:
222 virtio_free_vq(vsc, &sc->sc_vq[VQ_DEFLATE]);
223 err_vq0:
224 virtio_free_vq(vsc, &sc->sc_vq[VQ_INFLATE]);
225 err_mutex:
226 cv_destroy(&sc->sc_wait);
227 mutex_destroy(&sc->sc_waitlock);
228 err_none:
229 virtio_child_attach_failed(vsc);
230 return;
231 }
232
233 static void
viomb_read_config(struct viomb_softc * sc)234 viomb_read_config(struct viomb_softc *sc)
235 {
236 /* these values are explicitly specified as little-endian */
237 sc->sc_npages = virtio_read_device_config_le_4(sc->sc_virtio,
238 VIRTIO_BALLOON_CONFIG_NUM_PAGES);
239
240 sc->sc_actual = virtio_read_device_config_le_4(sc->sc_virtio,
241 VIRTIO_BALLOON_CONFIG_ACTUAL);
242 }
243
244 /*
245 * Config change callback: wakeup the kthread.
246 */
247 static int
viomb_config_change(struct virtio_softc * vsc)248 viomb_config_change(struct virtio_softc *vsc)
249 {
250 struct viomb_softc *sc = device_private(virtio_child(vsc));
251 unsigned int old;
252
253 old = sc->sc_npages;
254 viomb_read_config(sc);
255 mutex_enter(&sc->sc_waitlock);
256 cv_signal(&sc->sc_wait);
257 mutex_exit(&sc->sc_waitlock);
258 if (sc->sc_npages > old)
259 printf("%s: inflating balloon from %u to %u.\n",
260 device_xname(sc->sc_dev), old, sc->sc_npages);
261 else if (sc->sc_npages < old)
262 printf("%s: deflating balloon from %u to %u.\n",
263 device_xname(sc->sc_dev), old, sc->sc_npages);
264
265 return 1;
266 }
267
268 /*
269 * Inflate: consume some amount of physical memory.
270 */
271 static int
inflate(struct viomb_softc * sc)272 inflate(struct viomb_softc *sc)
273 {
274 struct virtio_softc *vsc = sc->sc_virtio;
275 int i, slot;
276 uint64_t nvpages, nhpages;
277 struct balloon_req *b;
278 struct vm_page *p;
279 struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
280
281 if (sc->sc_inflight)
282 return 0;
283 nvpages = sc->sc_npages - sc->sc_actual;
284 if (nvpages > PGS_PER_REQ)
285 nvpages = PGS_PER_REQ;
286 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
287
288 b = &sc->sc_req;
289 if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*(paddr_t)PAGE_SIZE,
290 0, 0, &b->bl_pglist, nhpages, 0)) {
291 printf("%s: %" PRIu64 " pages of physical memory "
292 "could not be allocated, retrying...\n",
293 device_xname(sc->sc_dev), nhpages);
294 return 1; /* sleep longer */
295 }
296
297 b->bl_nentries = nvpages;
298 i = 0;
299 TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
300 b->bl_pages[i++] =
301 htole32(VM_PAGE_TO_PHYS(p) / VIRTIO_PAGE_SIZE);
302 }
303 KASSERT(i == nvpages);
304
305 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
306 printf("%s: inflate enqueue failed.\n",
307 device_xname(sc->sc_dev));
308 uvm_pglistfree(&b->bl_pglist);
309 return 0;
310 }
311 if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
312 printf("%s: inflate enqueue failed.\n",
313 device_xname(sc->sc_dev));
314 uvm_pglistfree(&b->bl_pglist);
315 return 0;
316 }
317 bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
318 sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
319 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
320 virtio_enqueue_commit(vsc, vq, slot, true);
321 sc->sc_inflight += nvpages;
322
323 return 0;
324 }
325
326 static int
inflateq_done(struct virtqueue * vq)327 inflateq_done(struct virtqueue *vq)
328 {
329 struct virtio_softc *vsc = vq->vq_owner;
330 struct viomb_softc *sc = device_private(virtio_child(vsc));
331
332 mutex_enter(&sc->sc_waitlock);
333 sc->sc_inflate_done = 1;
334 cv_signal(&sc->sc_wait);
335 mutex_exit(&sc->sc_waitlock);
336
337 return 1;
338 }
339
340 static int
inflate_done(struct viomb_softc * sc)341 inflate_done(struct viomb_softc *sc)
342 {
343 struct virtio_softc *vsc = sc->sc_virtio;
344 struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
345 struct balloon_req *b;
346 int r, slot;
347 uint64_t nvpages;
348 struct vm_page *p;
349
350 r = virtio_dequeue(vsc, vq, &slot, NULL);
351 if (r != 0) {
352 printf("%s: inflate dequeue failed, errno %d.\n",
353 device_xname(sc->sc_dev), r);
354 return 1;
355 }
356 virtio_dequeue_commit(vsc, vq, slot);
357
358 b = &sc->sc_req;
359 nvpages = b->bl_nentries;
360 bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
361 0,
362 sizeof(uint32_t)*nvpages,
363 BUS_DMASYNC_POSTWRITE);
364 while (!TAILQ_EMPTY(&b->bl_pglist)) {
365 p = TAILQ_FIRST(&b->bl_pglist);
366 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
367 TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
368 }
369
370 sc->sc_inflight -= nvpages;
371 virtio_write_device_config_le_4(vsc,
372 VIRTIO_BALLOON_CONFIG_ACTUAL,
373 sc->sc_actual + nvpages);
374 viomb_read_config(sc);
375
376 return 1;
377 }
378
379 /*
380 * Deflate: free previously allocated memory.
381 */
382 static int
deflate(struct viomb_softc * sc)383 deflate(struct viomb_softc *sc)
384 {
385 struct virtio_softc *vsc = sc->sc_virtio;
386 int i, slot;
387 uint64_t nvpages, nhpages;
388 struct balloon_req *b;
389 struct vm_page *p;
390 struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
391
392 nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
393 if (nvpages > PGS_PER_REQ)
394 nvpages = PGS_PER_REQ;
395 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
396
397 b = &sc->sc_req;
398
399 b->bl_nentries = nvpages;
400 TAILQ_INIT(&b->bl_pglist);
401 for (i = 0; i < nhpages; i++) {
402 p = TAILQ_FIRST(&sc->sc_balloon_pages);
403 if (p == NULL)
404 break;
405 TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
406 TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
407 b->bl_pages[i] =
408 htole32(VM_PAGE_TO_PHYS(p) / VIRTIO_PAGE_SIZE);
409 }
410
411 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
412 printf("%s: deflate enqueue failed.\n",
413 device_xname(sc->sc_dev));
414 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
415 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
416 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
417 pageq.queue);
418 }
419 return 0;
420 }
421 if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
422 printf("%s: deflate enqueue failed.\n",
423 device_xname(sc->sc_dev));
424 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
425 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
426 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
427 pageq.queue);
428 }
429 return 0;
430 }
431 bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
432 sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
433 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
434 virtio_enqueue_commit(vsc, vq, slot, true);
435 sc->sc_inflight -= nvpages;
436
437 if (!(virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST))
438 uvm_pglistfree(&b->bl_pglist);
439
440 return 0;
441 }
442
443 static int
deflateq_done(struct virtqueue * vq)444 deflateq_done(struct virtqueue *vq)
445 {
446 struct virtio_softc *vsc = vq->vq_owner;
447 struct viomb_softc *sc = device_private(virtio_child(vsc));
448
449 mutex_enter(&sc->sc_waitlock);
450 sc->sc_deflate_done = 1;
451 cv_signal(&sc->sc_wait);
452 mutex_exit(&sc->sc_waitlock);
453
454 return 1;
455 }
456
457 static int
deflate_done(struct viomb_softc * sc)458 deflate_done(struct viomb_softc *sc)
459 {
460 struct virtio_softc *vsc = sc->sc_virtio;
461 struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
462 struct balloon_req *b;
463 int r, slot;
464 uint64_t nvpages;
465
466 r = virtio_dequeue(vsc, vq, &slot, NULL);
467 if (r != 0) {
468 printf("%s: deflate dequeue failed, errno %d\n",
469 device_xname(sc->sc_dev), r);
470 return 1;
471 }
472 virtio_dequeue_commit(vsc, vq, slot);
473
474 b = &sc->sc_req;
475 nvpages = b->bl_nentries;
476 bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
477 0,
478 sizeof(uint32_t)*nvpages,
479 BUS_DMASYNC_POSTWRITE);
480
481 if (virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST)
482 uvm_pglistfree(&b->bl_pglist);
483
484 sc->sc_inflight += nvpages;
485 virtio_write_device_config_le_4(vsc,
486 VIRTIO_BALLOON_CONFIG_ACTUAL,
487 sc->sc_actual - nvpages);
488 viomb_read_config(sc);
489
490 return 1;
491 }
492
493 /*
494 * Kthread: sleeps, eventually inflate and deflate.
495 */
496 static void
viomb_thread(void * arg)497 viomb_thread(void *arg)
498 {
499 struct viomb_softc *sc = arg;
500 int sleeptime, r;
501
502 for ( ; ; ) {
503 sleeptime = 30000;
504 if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
505 if (sc->sc_inflight == 0) {
506 r = inflate(sc);
507 if (r != 0)
508 sleeptime = 10000;
509 else
510 sleeptime = 100;
511 } else
512 sleeptime = 20;
513 } else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
514 if (sc->sc_inflight == 0)
515 r = deflate(sc);
516 sleeptime = 100;
517 }
518
519 again:
520 mutex_enter(&sc->sc_waitlock);
521 if (sc->sc_inflate_done) {
522 sc->sc_inflate_done = 0;
523 mutex_exit(&sc->sc_waitlock);
524 inflate_done(sc);
525 goto again;
526 }
527 if (sc->sc_deflate_done) {
528 sc->sc_deflate_done = 0;
529 mutex_exit(&sc->sc_waitlock);
530 deflate_done(sc);
531 goto again;
532 }
533 cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
534 mstohz(sleeptime));
535 mutex_exit(&sc->sc_waitlock);
536 }
537 }
538
539 MODULE(MODULE_CLASS_DRIVER, viomb, "virtio");
540
541 #ifdef _MODULE
542 #include "ioconf.c"
543 #endif
544
545 static int
viomb_modcmd(modcmd_t cmd,void * opaque)546 viomb_modcmd(modcmd_t cmd, void *opaque)
547 {
548 int error = 0;
549
550 #ifdef _MODULE
551 switch (cmd) {
552 case MODULE_CMD_INIT:
553 error = config_init_component(cfdriver_ioconf_viomb,
554 cfattach_ioconf_viomb, cfdata_ioconf_viomb);
555 break;
556 case MODULE_CMD_FINI:
557 error = config_fini_component(cfdriver_ioconf_viomb,
558 cfattach_ioconf_viomb, cfdata_ioconf_viomb);
559 break;
560 default:
561 error = ENOTTY;
562 break;
563 }
564 #endif
565
566 return error;
567 }
568