xref: /netbsd-src/sys/dev/pci/viomb.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /*	$NetBSD: viomb.c,v 1.8 2017/03/25 18:02:06 jdolecek Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Minoura Makoto.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.8 2017/03/25 18:02:06 jdolecek Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/condvar.h>
36 #include <sys/device.h>
37 #include <sys/kthread.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
40 #include <uvm/uvm_page.h>
41 #include <sys/module.h>
42 
43 #include <dev/pci/pcidevs.h>
44 #include <dev/pci/pcireg.h>
45 #include <dev/pci/pcivar.h>
46 
47 #include <dev/pci/virtioreg.h>
48 #include <dev/pci/virtiovar.h>
49 
50 #include "ioconf.h"
51 
52 /* Configuration registers */
53 #define VIRTIO_BALLOON_CONFIG_NUM_PAGES	0 /* 32bit */
54 #define VIRTIO_BALLOON_CONFIG_ACTUAL	4 /* 32bit */
55 
56 /* Feature bits */
57 #define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
58 #define VIRTIO_BALLOON_F_STATS_VQ	(1<<1)
59 
60 #define VIRTIO_BALLOON_FLAG_BITS \
61 	VIRTIO_COMMON_FLAG_BITS \
62 	"\x02""STATS_VQ" \
63 	"\x01""MUST_TELL_HOST"
64 
65 #define PGS_PER_REQ		(256) /* 1MB, 4KB/page */
66 
67 CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
68 
69 struct balloon_req {
70 	bus_dmamap_t			bl_dmamap;
71 	struct pglist			bl_pglist;
72 	int				bl_nentries;
73 	uint32_t			bl_pages[PGS_PER_REQ];
74 };
75 
76 struct viomb_softc {
77 	device_t		sc_dev;
78 
79 	struct virtio_softc	*sc_virtio;
80 	struct virtqueue	sc_vq[2];
81 
82 	unsigned int		sc_npages;
83 	unsigned int		sc_actual;
84 	int			sc_inflight;
85 	struct balloon_req	sc_req;
86 	struct pglist		sc_balloon_pages;
87 
88 	int			sc_inflate_done;
89 	int			sc_deflate_done;
90 
91 	kcondvar_t		sc_wait;
92 	kmutex_t		sc_waitlock;
93 };
94 
95 static int	balloon_initialized = 0; /* multiple balloon is not allowed */
96 
97 static int	viomb_match(device_t, cfdata_t, void *);
98 static void	viomb_attach(device_t, device_t, void *);
99 static void	viomb_read_config(struct viomb_softc *);
100 static int	viomb_config_change(struct virtio_softc *);
101 static int	inflate(struct viomb_softc *);
102 static int	inflateq_done(struct virtqueue *);
103 static int	inflate_done(struct viomb_softc *);
104 static int	deflate(struct viomb_softc *);
105 static int	deflateq_done(struct virtqueue *);
106 static int	deflate_done(struct viomb_softc *);
107 static void	viomb_thread(void *);
108 
109 CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
110     viomb_match, viomb_attach, NULL, NULL);
111 
112 static int
113 viomb_match(device_t parent, cfdata_t match, void *aux)
114 {
115 	struct virtio_attach_args *va = aux;
116 
117 	if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_BALLOON)
118 		return 1;
119 
120 	return 0;
121 }
122 
123 static void
124 viomb_attach(device_t parent, device_t self, void *aux)
125 {
126 	struct viomb_softc *sc = device_private(self);
127 	struct virtio_softc *vsc = device_private(parent);
128 	const struct sysctlnode *node;
129 
130 	if (virtio_child(vsc) != NULL) {
131 		aprint_normal(": child already attached for %s; "
132 			      "something wrong...\n", device_xname(parent));
133 		return;
134 	}
135 
136 	if (balloon_initialized++) {
137 		aprint_normal(": balloon already exists; something wrong...\n");
138 		goto err_none;
139 	}
140 
141 	sc->sc_dev = self;
142 	sc->sc_virtio = vsc;
143 
144 	if ((virtio_alloc_vq(vsc, &sc->sc_vq[0], 0,
145 			     sizeof(uint32_t)*PGS_PER_REQ, 1,
146 			     "inflate") != 0) ||
147 	    (virtio_alloc_vq(vsc, &sc->sc_vq[1], 1,
148 			     sizeof(uint32_t)*PGS_PER_REQ, 1,
149 			     "deflate") != 0)) {
150 		goto err_none;
151 	}
152 	sc->sc_vq[0].vq_done = inflateq_done;
153 	sc->sc_vq[1].vq_done = deflateq_done;
154 
155 	viomb_read_config(sc);
156 	sc->sc_inflight = 0;
157 	TAILQ_INIT(&sc->sc_balloon_pages);
158 
159 	if (bus_dmamap_create(virtio_dmat(vsc), sizeof(uint32_t)*PGS_PER_REQ,
160 			      1, sizeof(uint32_t)*PGS_PER_REQ, 0,
161 			      BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
162 		aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
163 		goto err_vq;
164 	}
165 	if (bus_dmamap_load(virtio_dmat(vsc), sc->sc_req.bl_dmamap,
166 			    &sc->sc_req.bl_pages[0],
167 			    sizeof(uint32_t) * PGS_PER_REQ,
168 			    NULL, BUS_DMA_NOWAIT)) {
169 		aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
170 		goto err_dmamap;
171 	}
172 
173 	sc->sc_inflate_done = sc->sc_deflate_done = 0;
174 	mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
175 	cv_init(&sc->sc_wait, "balloon");
176 
177 	virtio_child_attach_start(vsc, self, IPL_VM, sc->sc_vq,
178 	    viomb_config_change, virtio_vq_intr, 0,
179 	    0, VIRTIO_BALLOON_FLAG_BITS);
180 
181 	if (virtio_child_attach_finish(vsc) != 0)
182 		goto err_mutex;
183 
184 	if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
185 			   viomb_thread, sc, NULL, "viomb")) {
186 		aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
187 		goto err_mutex;
188 	}
189 
190 	sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
191 		       "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
192 		       NULL, 0, NULL, 0,
193 		       CTL_HW, CTL_CREATE, CTL_EOL);
194 	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
195 		       "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
196 		       NULL, 0, &sc->sc_npages, 0,
197 		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
198 	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
199 		       "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
200 		       NULL, 0, &sc->sc_actual, 0,
201 		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
202 	return;
203 
204 err_mutex:
205 	cv_destroy(&sc->sc_wait);
206 	mutex_destroy(&sc->sc_waitlock);
207 err_dmamap:
208 	bus_dmamap_destroy(virtio_dmat(vsc), sc->sc_req.bl_dmamap);
209 err_vq:
210 	virtio_free_vq(vsc, &sc->sc_vq[1]);
211 	virtio_free_vq(vsc, &sc->sc_vq[0]);
212 err_none:
213 	virtio_child_attach_failed(vsc);
214 	return;
215 }
216 
217 static void
218 viomb_read_config(struct viomb_softc *sc)
219 {
220 	unsigned int reg;
221 
222 	/* these values are explicitly specified as little-endian */
223 	reg = virtio_read_device_config_4(sc->sc_virtio,
224 					  VIRTIO_BALLOON_CONFIG_NUM_PAGES);
225 	sc->sc_npages = le32toh(reg);
226 
227 	reg = virtio_read_device_config_4(sc->sc_virtio,
228 					  VIRTIO_BALLOON_CONFIG_ACTUAL);
229 	sc->sc_actual = le32toh(reg);
230 }
231 
232 /*
233  * Config change callback: wakeup the kthread.
234  */
235 static int
236 viomb_config_change(struct virtio_softc *vsc)
237 {
238 	struct viomb_softc *sc = device_private(virtio_child(vsc));
239 	unsigned int old;
240 
241 	old = sc->sc_npages;
242 	viomb_read_config(sc);
243 	mutex_enter(&sc->sc_waitlock);
244 	cv_signal(&sc->sc_wait);
245 	mutex_exit(&sc->sc_waitlock);
246 	if (sc->sc_npages > old)
247 		printf("%s: inflating balloon from %u to %u.\n",
248 		       device_xname(sc->sc_dev), old, sc->sc_npages);
249 	else if  (sc->sc_npages < old)
250 		printf("%s: deflating balloon from %u to %u.\n",
251 		       device_xname(sc->sc_dev), old, sc->sc_npages);
252 
253 	return 1;
254 }
255 
256 /*
257  * Inflate: consume some amount of physical memory.
258  */
259 static int
260 inflate(struct viomb_softc *sc)
261 {
262 	struct virtio_softc *vsc = sc->sc_virtio;
263 	int i, slot;
264 	uint64_t nvpages, nhpages;
265 	struct balloon_req *b;
266 	struct vm_page *p;
267 	struct virtqueue *vq = &sc->sc_vq[0];
268 
269 	if (sc->sc_inflight)
270 		return 0;
271 	nvpages = sc->sc_npages - sc->sc_actual;
272 	if (nvpages > PGS_PER_REQ)
273 		nvpages = PGS_PER_REQ;
274 	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
275 
276 	b = &sc->sc_req;
277 	if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*PAGE_SIZE,
278 			    0, 0, &b->bl_pglist, nhpages, 1)) {
279 		printf("%s: %" PRIu64 " pages of physical memory "
280 		       "could not be allocated, retrying...\n",
281 		       device_xname(sc->sc_dev), nhpages);
282 		return 1;	/* sleep longer */
283 	}
284 
285 	b->bl_nentries = nvpages;
286 	i = 0;
287 	TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
288 		b->bl_pages[i++] = p->phys_addr / VIRTIO_PAGE_SIZE;
289 	}
290 	KASSERT(i == nvpages);
291 
292 	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
293 		printf("%s: inflate enqueue failed.\n",
294 		       device_xname(sc->sc_dev));
295 		uvm_pglistfree(&b->bl_pglist);
296 		return 0;
297 	}
298 	if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
299 		printf("%s: inflate enqueue failed.\n",
300 		       device_xname(sc->sc_dev));
301 		uvm_pglistfree(&b->bl_pglist);
302 		return 0;
303 	}
304 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
305 	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
306 	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
307 	virtio_enqueue_commit(vsc, vq, slot, true);
308 	sc->sc_inflight += nvpages;
309 
310 	return 0;
311 }
312 
313 static int
314 inflateq_done(struct virtqueue *vq)
315 {
316 	struct virtio_softc *vsc = vq->vq_owner;
317 	struct viomb_softc *sc = device_private(virtio_child(vsc));
318 
319 	mutex_enter(&sc->sc_waitlock);
320 	sc->sc_inflate_done = 1;
321 	cv_signal(&sc->sc_wait);
322 	mutex_exit(&sc->sc_waitlock);
323 
324 	return 1;
325 }
326 
327 static int
328 inflate_done(struct viomb_softc *sc)
329 {
330 	struct virtio_softc *vsc = sc->sc_virtio;
331 	struct virtqueue *vq = &sc->sc_vq[0];
332 	struct balloon_req *b;
333 	int r, slot;
334 	uint64_t nvpages;
335 	struct vm_page *p;
336 
337 	r = virtio_dequeue(vsc, vq, &slot, NULL);
338 	if (r != 0) {
339 		printf("%s: inflate dequeue failed, errno %d.\n",
340 		       device_xname(sc->sc_dev), r);
341 		return 1;
342 	}
343 	virtio_dequeue_commit(vsc, vq, slot);
344 
345 	b = &sc->sc_req;
346 	nvpages = b->bl_nentries;
347 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
348 			offsetof(struct balloon_req, bl_pages),
349 			sizeof(uint32_t)*nvpages,
350 			BUS_DMASYNC_POSTWRITE);
351 	while (!TAILQ_EMPTY(&b->bl_pglist)) {
352 		p = TAILQ_FIRST(&b->bl_pglist);
353 		TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
354 		TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
355 	}
356 
357 	sc->sc_inflight -= nvpages;
358 	virtio_write_device_config_4(vsc,
359 				     VIRTIO_BALLOON_CONFIG_ACTUAL,
360 				     sc->sc_actual + nvpages);
361 	viomb_read_config(sc);
362 
363 	return 1;
364 }
365 
366 /*
367  * Deflate: free previously allocated memory.
368  */
369 static int
370 deflate(struct viomb_softc *sc)
371 {
372 	struct virtio_softc *vsc = sc->sc_virtio;
373 	int i, slot;
374 	uint64_t nvpages, nhpages;
375 	struct balloon_req *b;
376 	struct vm_page *p;
377 	struct virtqueue *vq = &sc->sc_vq[1];
378 
379 	nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
380 	if (nvpages > PGS_PER_REQ)
381 		nvpages = PGS_PER_REQ;
382 	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
383 
384 	b = &sc->sc_req;
385 
386 	b->bl_nentries = nvpages;
387 	TAILQ_INIT(&b->bl_pglist);
388 	for (i = 0; i < nhpages; i++) {
389 		p = TAILQ_FIRST(&sc->sc_balloon_pages);
390 		if (p == NULL)
391 			break;
392 		TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
393 		TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
394 		b->bl_pages[i] = p->phys_addr / VIRTIO_PAGE_SIZE;
395 	}
396 
397 	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
398 		printf("%s: deflate enqueue failed.\n",
399 		       device_xname(sc->sc_dev));
400 		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
401 			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
402 			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
403 			    pageq.queue);
404 		}
405 		return 0;
406 	}
407 	if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
408 		printf("%s: deflate enqueue failed.\n",
409 		       device_xname(sc->sc_dev));
410 		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
411 			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
412 			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
413 			    pageq.queue);
414 		}
415 		return 0;
416 	}
417 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
418 	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
419 	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
420 	virtio_enqueue_commit(vsc, vq, slot, true);
421 	sc->sc_inflight -= nvpages;
422 
423 	if (!(virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST))
424 		uvm_pglistfree(&b->bl_pglist);
425 
426 	return 0;
427 }
428 
429 static int
430 deflateq_done(struct virtqueue *vq)
431 {
432 	struct virtio_softc *vsc = vq->vq_owner;
433 	struct viomb_softc *sc = device_private(virtio_child(vsc));
434 
435 	mutex_enter(&sc->sc_waitlock);
436 	sc->sc_deflate_done = 1;
437 	cv_signal(&sc->sc_wait);
438 	mutex_exit(&sc->sc_waitlock);
439 
440 	return 1;
441 }
442 
443 static int
444 deflate_done(struct viomb_softc *sc)
445 {
446 	struct virtio_softc *vsc = sc->sc_virtio;
447 	struct virtqueue *vq = &sc->sc_vq[1];
448 	struct balloon_req *b;
449 	int r, slot;
450 	uint64_t nvpages;
451 
452 	r = virtio_dequeue(vsc, vq, &slot, NULL);
453 	if (r != 0) {
454 		printf("%s: deflate dequeue failed, errno %d\n",
455 		       device_xname(sc->sc_dev), r);
456 		return 1;
457 	}
458 	virtio_dequeue_commit(vsc, vq, slot);
459 
460 	b = &sc->sc_req;
461 	nvpages = b->bl_nentries;
462 	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
463 			offsetof(struct balloon_req, bl_pages),
464 			sizeof(uint32_t)*nvpages,
465 			BUS_DMASYNC_POSTWRITE);
466 
467 	if (virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST)
468 		uvm_pglistfree(&b->bl_pglist);
469 
470 	sc->sc_inflight += nvpages;
471 	virtio_write_device_config_4(vsc,
472 				     VIRTIO_BALLOON_CONFIG_ACTUAL,
473 				     sc->sc_actual - nvpages);
474 	viomb_read_config(sc);
475 
476 	return 1;
477 }
478 
479 /*
480  * Kthread: sleeps, eventually inflate and deflate.
481  */
482 static void
483 viomb_thread(void *arg)
484 {
485 	struct viomb_softc *sc = arg;
486 	int sleeptime, r;
487 
488 	for ( ; ; ) {
489 		sleeptime = 30000;
490 		if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
491 			if (sc->sc_inflight == 0) {
492 				r = inflate(sc);
493 				if (r != 0)
494 					sleeptime = 10000;
495 				else
496 					sleeptime = 1000;
497 			} else
498 				sleeptime = 100;
499 		} else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
500 			if (sc->sc_inflight == 0)
501 				r = deflate(sc);
502 			sleeptime = 100;
503 		}
504 
505 	again:
506 		mutex_enter(&sc->sc_waitlock);
507 		if (sc->sc_inflate_done) {
508 			sc->sc_inflate_done = 0;
509 			mutex_exit(&sc->sc_waitlock);
510 			inflate_done(sc);
511 			goto again;
512 		}
513 		if (sc->sc_deflate_done) {
514 			sc->sc_deflate_done = 0;
515 			mutex_exit(&sc->sc_waitlock);
516 			deflate_done(sc);
517 			goto again;
518 		}
519 		cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
520 			     mstohz(sleeptime));
521 		mutex_exit(&sc->sc_waitlock);
522 	}
523 }
524 
525 MODULE(MODULE_CLASS_DRIVER, viomb, "virtio");
526 
527 #ifdef _MODULE
528 #include "ioconf.c"
529 #endif
530 
531 static int
532 viomb_modcmd(modcmd_t cmd, void *opaque)
533 {
534 	int error = 0;
535 
536 #ifdef _MODULE
537 	switch (cmd) {
538 	case MODULE_CMD_INIT:
539 		error = config_init_component(cfdriver_ioconf_viomb,
540 		    cfattach_ioconf_viomb, cfdata_ioconf_viomb);
541 		break;
542 	case MODULE_CMD_FINI:
543 		error = config_fini_component(cfdriver_ioconf_viomb,
544 		    cfattach_ioconf_viomb, cfdata_ioconf_viomb);
545 		break;
546 	default:
547 		error = ENOTTY;
548 		break;
549 	}
550 #endif
551 
552 	return error;
553 }
554