xref: /netbsd-src/sys/dev/pci/viomb.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: viomb.c,v 1.12 2021/01/20 19:46:48 reinoud Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Minoura Makoto.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.12 2021/01/20 19:46:48 reinoud Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/condvar.h>
36 #include <sys/device.h>
37 #include <sys/kthread.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
40 #include <uvm/uvm_page.h>
41 #include <sys/module.h>
42 
43 #include <dev/pci/virtioreg.h>
44 #include <dev/pci/virtiovar.h>
45 
46 #include "ioconf.h"
47 
48 /* Configuration registers */
49 #define VIRTIO_BALLOON_CONFIG_NUM_PAGES	0 /* 32bit */
50 #define VIRTIO_BALLOON_CONFIG_ACTUAL	4 /* 32bit */
51 
52 /* Feature bits */
53 #define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
54 #define VIRTIO_BALLOON_F_STATS_VQ	(1<<1)
55 
56 #define VIRTIO_BALLOON_FLAG_BITS \
57 	VIRTIO_COMMON_FLAG_BITS \
58 	"\x02""STATS_VQ" \
59 	"\x01""MUST_TELL_HOST"
60 
61 #define PGS_PER_REQ		(256) /* 1MB, 4KB/page */
62 #define VQ_INFLATE	0
63 #define VQ_DEFLATE	1
64 
65 
66 CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
67 
68 struct balloon_req {
69 	bus_dmamap_t			bl_dmamap;
70 	struct pglist			bl_pglist;
71 	int				bl_nentries;
72 	uint32_t			bl_pages[PGS_PER_REQ];
73 };
74 
75 struct viomb_softc {
76 	device_t		sc_dev;
77 
78 	struct virtio_softc	*sc_virtio;
79 	struct virtqueue	sc_vq[2];
80 
81 	unsigned int		sc_npages;
82 	unsigned int		sc_actual;
83 	int			sc_inflight;
84 	struct balloon_req	sc_req;
85 	struct pglist		sc_balloon_pages;
86 
87 	int			sc_inflate_done;
88 	int			sc_deflate_done;
89 
90 	kcondvar_t		sc_wait;
91 	kmutex_t		sc_waitlock;
92 };
93 
94 static int	balloon_initialized = 0; /* multiple balloon is not allowed */
95 
96 static int	viomb_match(device_t, cfdata_t, void *);
97 static void	viomb_attach(device_t, device_t, void *);
98 static void	viomb_read_config(struct viomb_softc *);
99 static int	viomb_config_change(struct virtio_softc *);
100 static int	inflate(struct viomb_softc *);
101 static int	inflateq_done(struct virtqueue *);
102 static int	inflate_done(struct viomb_softc *);
103 static int	deflate(struct viomb_softc *);
104 static int	deflateq_done(struct virtqueue *);
105 static int	deflate_done(struct viomb_softc *);
106 static void	viomb_thread(void *);
107 
108 CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
109     viomb_match, viomb_attach, NULL, NULL);
110 
111 static int
112 viomb_match(device_t parent, cfdata_t match, void *aux)
113 {
114 	struct virtio_attach_args *va = aux;
115 
116 	if (va->sc_childdevid == VIRTIO_DEVICE_ID_BALLOON)
117 		return 1;
118 
119 	return 0;
120 }
121 
122 static void
123 viomb_attach(device_t parent, device_t self, void *aux)
124 {
125 	struct viomb_softc *sc = device_private(self);
126 	struct virtio_softc *vsc = device_private(parent);
127 	const struct sysctlnode *node;
128 	uint64_t features;
129 
130 	if (virtio_child(vsc) != NULL) {
131 		aprint_normal(": child already attached for %s; "
132 			      "something wrong...\n", device_xname(parent));
133 		return;
134 	}
135 
136 	if (balloon_initialized++) {
137 		aprint_normal(": balloon already exists; something wrong...\n");
138 		return;
139 	}
140 
141 	/* fail on non-4K page size archs */
142 	if (VIRTIO_PAGE_SIZE != PAGE_SIZE){
143 		aprint_normal("non-4K page size arch found, needs %d, got %d\n",
144 		    VIRTIO_PAGE_SIZE, PAGE_SIZE);
145 		return;
146 	}
147 
148 	sc->sc_dev = self;
149 	sc->sc_virtio = vsc;
150 
151 	virtio_child_attach_start(vsc, self, IPL_VM, sc->sc_vq,
152 	    viomb_config_change, virtio_vq_intr, 0,
153 	    VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_FLAG_BITS);
154 
155 	features = virtio_features(vsc);
156 	if (features == 0)
157 		goto err_none;
158 
159 	viomb_read_config(sc);
160 	sc->sc_inflight = 0;
161 	TAILQ_INIT(&sc->sc_balloon_pages);
162 
163 	sc->sc_inflate_done = sc->sc_deflate_done = 0;
164 	mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
165 	cv_init(&sc->sc_wait, "balloon");
166 
167 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQ_INFLATE], 0,
168 			     sizeof(uint32_t)*PGS_PER_REQ, 1,
169 			     "inflate") != 0)
170 		goto err_mutex;
171 	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQ_DEFLATE], 1,
172 			     sizeof(uint32_t)*PGS_PER_REQ, 1,
173 			     "deflate") != 0)
174 		goto err_vq0;
175 
176 	sc->sc_vq[VQ_INFLATE].vq_done = inflateq_done;
177 	sc->sc_vq[VQ_DEFLATE].vq_done = deflateq_done;
178 
179 	if (bus_dmamap_create(virtio_dmat(vsc), sizeof(uint32_t)*PGS_PER_REQ,
180 			      1, sizeof(uint32_t)*PGS_PER_REQ, 0,
181 			      BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
182 		aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
183 		goto err_vq;
184 	}
185 	if (bus_dmamap_load(virtio_dmat(vsc), sc->sc_req.bl_dmamap,
186 			    &sc->sc_req.bl_pages[0],
187 			    sizeof(uint32_t) * PGS_PER_REQ,
188 			    NULL, BUS_DMA_NOWAIT)) {
189 		aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
190 		goto err_dmamap;
191 	}
192 
193 	if (virtio_child_attach_finish(vsc) != 0)
194 		goto err_out;
195 
196 	if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
197 			   viomb_thread, sc, NULL, "viomb")) {
198 		aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
199 		goto err_out;
200 	}
201 
202 	sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
203 		       "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
204 		       NULL, 0, NULL, 0,
205 		       CTL_HW, CTL_CREATE, CTL_EOL);
206 	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
207 		       "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
208 		       NULL, 0, &sc->sc_npages, 0,
209 		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
210 	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
211 		       "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
212 		       NULL, 0, &sc->sc_actual, 0,
213 		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
214 	return;
215 
216 err_out:
217 err_dmamap:
218 	bus_dmamap_destroy(virtio_dmat(vsc), sc->sc_req.bl_dmamap);
219 err_vq:
220 	virtio_free_vq(vsc, &sc->sc_vq[VQ_DEFLATE]);
221 err_vq0:
222 	virtio_free_vq(vsc, &sc->sc_vq[VQ_INFLATE]);
223 err_mutex:
224 	cv_destroy(&sc->sc_wait);
225 	mutex_destroy(&sc->sc_waitlock);
226 err_none:
227 	virtio_child_attach_failed(vsc);
228 	return;
229 }
230 
231 static void
232 viomb_read_config(struct viomb_softc *sc)
233 {
234 	/* these values are explicitly specified as little-endian */
235 	sc->sc_npages = virtio_read_device_config_le_4(sc->sc_virtio,
236 		  VIRTIO_BALLOON_CONFIG_NUM_PAGES);
237 
238 	sc->sc_actual = virtio_read_device_config_le_4(sc->sc_virtio,
239 		  VIRTIO_BALLOON_CONFIG_ACTUAL);
240 }
241 
242 /*
243  * Config change callback: wakeup the kthread.
244  */
245 static int
246 viomb_config_change(struct virtio_softc *vsc)
247 {
248 	struct viomb_softc *sc = device_private(virtio_child(vsc));
249 	unsigned int old;
250 
251 	old = sc->sc_npages;
252 	viomb_read_config(sc);
253 	mutex_enter(&sc->sc_waitlock);
254 	cv_signal(&sc->sc_wait);
255 	mutex_exit(&sc->sc_waitlock);
256 	if (sc->sc_npages > old)
257 		printf("%s: inflating balloon from %u to %u.\n",
258 		       device_xname(sc->sc_dev), old, sc->sc_npages);
259 	else if  (sc->sc_npages < old)
260 		printf("%s: deflating balloon from %u to %u.\n",
261 		       device_xname(sc->sc_dev), old, sc->sc_npages);
262 
263 	return 1;
264 }
265 
266 /*
267  * Inflate: consume some amount of physical memory.
268  */
269 static int
270 inflate(struct viomb_softc *sc)
271 {
272 	struct virtio_softc *vsc = sc->sc_virtio;
273 	int i, slot;
274 	uint64_t nvpages, nhpages;
275 	struct balloon_req *b;
276 	struct vm_page *p;
277 	struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
278 
279 	if (sc->sc_inflight)
280 		return 0;
281 	nvpages = sc->sc_npages - sc->sc_actual;
282 	if (nvpages > PGS_PER_REQ)
283 		nvpages = PGS_PER_REQ;
284 	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
285 
286 	b = &sc->sc_req;
287 	if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*PAGE_SIZE,
288 			    0, 0, &b->bl_pglist, nhpages, 1)) {
289 		printf("%s: %" PRIu64 " pages of physical memory "
290 		       "could not be allocated, retrying...\n",
291 		       device_xname(sc->sc_dev), nhpages);
292 		return 1;	/* sleep longer */
293 	}
294 
295 	b->bl_nentries = nvpages;
296 	i = 0;
297 	TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
298 		b->bl_pages[i++] =
299 			htole32(VM_PAGE_TO_PHYS(p) / VIRTIO_PAGE_SIZE);
300 	}
301 	KASSERT(i == nvpages);
302 
303 	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
304 		printf("%s: inflate enqueue failed.\n",
305 		       device_xname(sc->sc_dev));
306 		uvm_pglistfree(&b->bl_pglist);
307 		return 0;
308 	}
309 	if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
310 		printf("%s: inflate enqueue failed.\n",
311 		       device_xname(sc->sc_dev));
312 		uvm_pglistfree(&b->bl_pglist);
313 		return 0;
314 	}
315 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
316 	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
317 	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
318 	virtio_enqueue_commit(vsc, vq, slot, true);
319 	sc->sc_inflight += nvpages;
320 
321 	return 0;
322 }
323 
324 static int
325 inflateq_done(struct virtqueue *vq)
326 {
327 	struct virtio_softc *vsc = vq->vq_owner;
328 	struct viomb_softc *sc = device_private(virtio_child(vsc));
329 
330 	mutex_enter(&sc->sc_waitlock);
331 	sc->sc_inflate_done = 1;
332 	cv_signal(&sc->sc_wait);
333 	mutex_exit(&sc->sc_waitlock);
334 
335 	return 1;
336 }
337 
338 static int
339 inflate_done(struct viomb_softc *sc)
340 {
341 	struct virtio_softc *vsc = sc->sc_virtio;
342 	struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
343 	struct balloon_req *b;
344 	int r, slot;
345 	uint64_t nvpages;
346 	struct vm_page *p;
347 
348 	r = virtio_dequeue(vsc, vq, &slot, NULL);
349 	if (r != 0) {
350 		printf("%s: inflate dequeue failed, errno %d.\n",
351 		       device_xname(sc->sc_dev), r);
352 		return 1;
353 	}
354 	virtio_dequeue_commit(vsc, vq, slot);
355 
356 	b = &sc->sc_req;
357 	nvpages = b->bl_nentries;
358 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
359 			0,
360 			sizeof(uint32_t)*nvpages,
361 			BUS_DMASYNC_POSTWRITE);
362 	while (!TAILQ_EMPTY(&b->bl_pglist)) {
363 		p = TAILQ_FIRST(&b->bl_pglist);
364 		TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
365 		TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
366 	}
367 
368 	sc->sc_inflight -= nvpages;
369 	virtio_write_device_config_le_4(vsc,
370 		     VIRTIO_BALLOON_CONFIG_ACTUAL,
371 		     sc->sc_actual + nvpages);
372 	viomb_read_config(sc);
373 
374 	return 1;
375 }
376 
377 /*
378  * Deflate: free previously allocated memory.
379  */
380 static int
381 deflate(struct viomb_softc *sc)
382 {
383 	struct virtio_softc *vsc = sc->sc_virtio;
384 	int i, slot;
385 	uint64_t nvpages, nhpages;
386 	struct balloon_req *b;
387 	struct vm_page *p;
388 	struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
389 
390 	nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
391 	if (nvpages > PGS_PER_REQ)
392 		nvpages = PGS_PER_REQ;
393 	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
394 
395 	b = &sc->sc_req;
396 
397 	b->bl_nentries = nvpages;
398 	TAILQ_INIT(&b->bl_pglist);
399 	for (i = 0; i < nhpages; i++) {
400 		p = TAILQ_FIRST(&sc->sc_balloon_pages);
401 		if (p == NULL)
402 			break;
403 		TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
404 		TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
405 		b->bl_pages[i] =
406 			htole32(VM_PAGE_TO_PHYS(p) / VIRTIO_PAGE_SIZE);
407 	}
408 
409 	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
410 		printf("%s: deflate enqueue failed.\n",
411 		       device_xname(sc->sc_dev));
412 		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
413 			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
414 			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
415 			    pageq.queue);
416 		}
417 		return 0;
418 	}
419 	if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
420 		printf("%s: deflate enqueue failed.\n",
421 		       device_xname(sc->sc_dev));
422 		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
423 			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
424 			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
425 			    pageq.queue);
426 		}
427 		return 0;
428 	}
429 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
430 	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
431 	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
432 	virtio_enqueue_commit(vsc, vq, slot, true);
433 	sc->sc_inflight -= nvpages;
434 
435 	if (!(virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST))
436 		uvm_pglistfree(&b->bl_pglist);
437 
438 	return 0;
439 }
440 
441 static int
442 deflateq_done(struct virtqueue *vq)
443 {
444 	struct virtio_softc *vsc = vq->vq_owner;
445 	struct viomb_softc *sc = device_private(virtio_child(vsc));
446 
447 	mutex_enter(&sc->sc_waitlock);
448 	sc->sc_deflate_done = 1;
449 	cv_signal(&sc->sc_wait);
450 	mutex_exit(&sc->sc_waitlock);
451 
452 	return 1;
453 }
454 
455 static int
456 deflate_done(struct viomb_softc *sc)
457 {
458 	struct virtio_softc *vsc = sc->sc_virtio;
459 	struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
460 	struct balloon_req *b;
461 	int r, slot;
462 	uint64_t nvpages;
463 
464 	r = virtio_dequeue(vsc, vq, &slot, NULL);
465 	if (r != 0) {
466 		printf("%s: deflate dequeue failed, errno %d\n",
467 		       device_xname(sc->sc_dev), r);
468 		return 1;
469 	}
470 	virtio_dequeue_commit(vsc, vq, slot);
471 
472 	b = &sc->sc_req;
473 	nvpages = b->bl_nentries;
474 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
475 			0,
476 			sizeof(uint32_t)*nvpages,
477 			BUS_DMASYNC_POSTWRITE);
478 
479 	if (virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST)
480 		uvm_pglistfree(&b->bl_pglist);
481 
482 	sc->sc_inflight += nvpages;
483 	virtio_write_device_config_le_4(vsc,
484 		     VIRTIO_BALLOON_CONFIG_ACTUAL,
485 		     sc->sc_actual - nvpages);
486 	viomb_read_config(sc);
487 
488 	return 1;
489 }
490 
491 /*
492  * Kthread: sleeps, eventually inflate and deflate.
493  */
494 static void
495 viomb_thread(void *arg)
496 {
497 	struct viomb_softc *sc = arg;
498 	int sleeptime, r;
499 
500 	for ( ; ; ) {
501 		sleeptime = 30000;
502 		if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
503 			if (sc->sc_inflight == 0) {
504 				r = inflate(sc);
505 				if (r != 0)
506 					sleeptime = 10000;
507 				else
508 					sleeptime = 1000;
509 			} else
510 				sleeptime = 100;
511 		} else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
512 			if (sc->sc_inflight == 0)
513 				r = deflate(sc);
514 			sleeptime = 100;
515 		}
516 
517 	again:
518 		mutex_enter(&sc->sc_waitlock);
519 		if (sc->sc_inflate_done) {
520 			sc->sc_inflate_done = 0;
521 			mutex_exit(&sc->sc_waitlock);
522 			inflate_done(sc);
523 			goto again;
524 		}
525 		if (sc->sc_deflate_done) {
526 			sc->sc_deflate_done = 0;
527 			mutex_exit(&sc->sc_waitlock);
528 			deflate_done(sc);
529 			goto again;
530 		}
531 		cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
532 			     mstohz(sleeptime));
533 		mutex_exit(&sc->sc_waitlock);
534 	}
535 }
536 
537 MODULE(MODULE_CLASS_DRIVER, viomb, "virtio");
538 
539 #ifdef _MODULE
540 #include "ioconf.c"
541 #endif
542 
543 static int
544 viomb_modcmd(modcmd_t cmd, void *opaque)
545 {
546 	int error = 0;
547 
548 #ifdef _MODULE
549 	switch (cmd) {
550 	case MODULE_CMD_INIT:
551 		error = config_init_component(cfdriver_ioconf_viomb,
552 		    cfattach_ioconf_viomb, cfdata_ioconf_viomb);
553 		break;
554 	case MODULE_CMD_FINI:
555 		error = config_fini_component(cfdriver_ioconf_viomb,
556 		    cfattach_ioconf_viomb, cfdata_ioconf_viomb);
557 		break;
558 	default:
559 		error = ENOTTY;
560 		break;
561 	}
562 #endif
563 
564 	return error;
565 }
566