xref: /netbsd-src/sys/dev/pci/viomb.c (revision b83ebeba7f767758d2778bb0f9d7a76534253621)
1 /*	$NetBSD: viomb.c,v 1.6 2016/07/07 06:55:41 msaitoh Exp $	*/
2 
3 /*
4  * Copyright (c) 2010 Minoura Makoto.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.6 2016/07/07 06:55:41 msaitoh Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/condvar.h>
36 #include <sys/device.h>
37 #include <sys/kthread.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
40 #include <uvm/uvm_page.h>
41 
42 #include <dev/pci/pcidevs.h>
43 #include <dev/pci/pcireg.h>
44 #include <dev/pci/pcivar.h>
45 
46 #include <dev/pci/virtioreg.h>
47 #include <dev/pci/virtiovar.h>
48 
49 /* Configuration registers */
50 #define VIRTIO_BALLOON_CONFIG_NUM_PAGES	0 /* 32bit */
51 #define VIRTIO_BALLOON_CONFIG_ACTUAL	4 /* 32bit */
52 
53 /* Feature bits */
54 #define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
55 #define VIRTIO_BALLOON_F_STATS_VQ	(1<<1)
56 
57 #define VIRTIO_BALLOON_FLAG_BITS \
58 	VIRTIO_COMMON_FLAG_BITS \
59 	"\x02""STATS_VQ" \
60 	"\x01""MUST_TELL_HOST"
61 
62 #define PGS_PER_REQ		(256) /* 1MB, 4KB/page */
63 
64 CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
65 
66 struct balloon_req {
67 	bus_dmamap_t			bl_dmamap;
68 	struct pglist			bl_pglist;
69 	int				bl_nentries;
70 	uint32_t			bl_pages[PGS_PER_REQ];
71 };
72 
73 struct viomb_softc {
74 	device_t		sc_dev;
75 
76 	struct virtio_softc	*sc_virtio;
77 	struct virtqueue	sc_vq[2];
78 
79 	unsigned int		sc_npages;
80 	unsigned int		sc_actual;
81 	int			sc_inflight;
82 	struct balloon_req	sc_req;
83 	struct pglist		sc_balloon_pages;
84 
85 	int			sc_inflate_done;
86 	int			sc_deflate_done;
87 
88 	kcondvar_t		sc_wait;
89 	kmutex_t		sc_waitlock;
90 };
91 
92 static int	balloon_initialized = 0; /* multiple balloon is not allowed */
93 
94 static int	viomb_match(device_t, cfdata_t, void *);
95 static void	viomb_attach(device_t, device_t, void *);
96 static void	viomb_read_config(struct viomb_softc *);
97 static int	viomb_config_change(struct virtio_softc *);
98 static int	inflate(struct viomb_softc *);
99 static int	inflateq_done(struct virtqueue *);
100 static int	inflate_done(struct viomb_softc *);
101 static int	deflate(struct viomb_softc *);
102 static int	deflateq_done(struct virtqueue *);
103 static int	deflate_done(struct viomb_softc *);
104 static void	viomb_thread(void *);
105 
106 CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
107     viomb_match, viomb_attach, NULL, NULL);
108 
109 static int
110 viomb_match(device_t parent, cfdata_t match, void *aux)
111 {
112 	struct virtio_softc *vsc = aux;
113 
114 	if (vsc->sc_childdevid == PCI_PRODUCT_VIRTIO_BALLOON)
115 		return 1;
116 
117 	return 0;
118 }
119 
120 static void
121 viomb_attach(device_t parent, device_t self, void *aux)
122 {
123 	struct viomb_softc *sc = device_private(self);
124 	struct virtio_softc *vsc = device_private(parent);
125 	const struct sysctlnode *node;
126 	uint32_t features;
127 	char buf[256];
128 
129 	if (vsc->sc_child != NULL) {
130 		aprint_normal(": child already attached for %s; "
131 			      "something wrong...\n", device_xname(parent));
132 		return;
133 	}
134 	if (balloon_initialized++) {
135 		aprint_normal(": balloon already exists; something wrong...\n");
136 		goto err_none;
137 	}
138 
139 	sc->sc_dev = self;
140 	sc->sc_virtio = vsc;
141 
142 	vsc->sc_child = self;
143 	vsc->sc_ipl = IPL_VM;
144 	vsc->sc_vqs = &sc->sc_vq[0];
145 	vsc->sc_nvqs = 2;
146 	vsc->sc_config_change = viomb_config_change;
147 	vsc->sc_intrhand = virtio_vq_intr;
148 	vsc->sc_flags = 0;
149 
150 	features = virtio_negotiate_features(vsc,
151 	    VIRTIO_CONFIG_DEVICE_FEATURES);
152 
153 	snprintb(buf, sizeof(buf), VIRTIO_BALLOON_FLAG_BITS, features);
154 	aprint_normal(": Features: %s\n", buf);
155 	if ((virtio_alloc_vq(vsc, &sc->sc_vq[0], 0,
156 			     sizeof(uint32_t)*PGS_PER_REQ, 1,
157 			     "inflate") != 0) ||
158 	    (virtio_alloc_vq(vsc, &sc->sc_vq[1], 1,
159 			     sizeof(uint32_t)*PGS_PER_REQ, 1,
160 			     "deflate") != 0)) {
161 		goto err_none;
162 	}
163 	sc->sc_vq[0].vq_done = inflateq_done;
164 	sc->sc_vq[1].vq_done = deflateq_done;
165 
166 	viomb_read_config(sc);
167 	sc->sc_inflight = 0;
168 	TAILQ_INIT(&sc->sc_balloon_pages);
169 
170 	if (bus_dmamap_create(vsc->sc_dmat, sizeof(uint32_t)*PGS_PER_REQ,
171 			      1, sizeof(uint32_t)*PGS_PER_REQ, 0,
172 			      BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
173 		aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
174 		goto err_vq;
175 	}
176 	if (bus_dmamap_load(vsc->sc_dmat, sc->sc_req.bl_dmamap,
177 			    &sc->sc_req.bl_pages[0],
178 			    sizeof(uint32_t) * PGS_PER_REQ,
179 			    NULL, BUS_DMA_NOWAIT)) {
180 		aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
181 		goto err_dmamap;
182 	}
183 
184 	sc->sc_inflate_done = sc->sc_deflate_done = 0;
185 	mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
186 	cv_init(&sc->sc_wait, "balloon");
187 
188 	if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
189 			   viomb_thread, sc, NULL, "viomb")) {
190 		aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
191 		goto err_mutex;
192 	}
193 
194 	sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
195 		       "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
196 		       NULL, 0, NULL, 0,
197 		       CTL_HW, CTL_CREATE, CTL_EOL);
198 	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
199 		       "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
200 		       NULL, 0, &sc->sc_npages, 0,
201 		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
202 	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
203 		       "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
204 		       NULL, 0, &sc->sc_actual, 0,
205 		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
206 	return;
207 
208 err_mutex:
209 	cv_destroy(&sc->sc_wait);
210 	mutex_destroy(&sc->sc_waitlock);
211 err_dmamap:
212 	bus_dmamap_destroy(vsc->sc_dmat, sc->sc_req.bl_dmamap);
213 err_vq:
214 	virtio_free_vq(vsc, &sc->sc_vq[1]);
215 	virtio_free_vq(vsc, &sc->sc_vq[0]);
216 err_none:
217 	vsc->sc_child = (void*)1;
218 	return;
219 }
220 
221 static void
222 viomb_read_config(struct viomb_softc *sc)
223 {
224 	unsigned int reg;
225 
226 	/* these values are explicitly specified as little-endian */
227 	reg = virtio_read_device_config_4(sc->sc_virtio,
228 					  VIRTIO_BALLOON_CONFIG_NUM_PAGES);
229 	sc->sc_npages = le32toh(reg);
230 
231 	reg = virtio_read_device_config_4(sc->sc_virtio,
232 					  VIRTIO_BALLOON_CONFIG_ACTUAL);
233 	sc->sc_actual = le32toh(reg);
234 }
235 
236 /*
237  * Config change callback: wakeup the kthread.
238  */
239 static int
240 viomb_config_change(struct virtio_softc *vsc)
241 {
242 	struct viomb_softc *sc = device_private(vsc->sc_child);
243 	unsigned int old;
244 
245 	old = sc->sc_npages;
246 	viomb_read_config(sc);
247 	mutex_enter(&sc->sc_waitlock);
248 	cv_signal(&sc->sc_wait);
249 	mutex_exit(&sc->sc_waitlock);
250 	if (sc->sc_npages > old)
251 		printf("%s: inflating balloon from %u to %u.\n",
252 		       device_xname(sc->sc_dev), old, sc->sc_npages);
253 	else if  (sc->sc_npages < old)
254 		printf("%s: deflating balloon from %u to %u.\n",
255 		       device_xname(sc->sc_dev), old, sc->sc_npages);
256 
257 	return 1;
258 }
259 
260 /*
261  * Inflate: consume some amount of physical memory.
262  */
263 static int
264 inflate(struct viomb_softc *sc)
265 {
266 	struct virtio_softc *vsc = sc->sc_virtio;
267 	int i, slot;
268 	uint64_t nvpages, nhpages;
269 	struct balloon_req *b;
270 	struct vm_page *p;
271 	struct virtqueue *vq = &sc->sc_vq[0];
272 
273 	if (sc->sc_inflight)
274 		return 0;
275 	nvpages = sc->sc_npages - sc->sc_actual;
276 	if (nvpages > PGS_PER_REQ)
277 		nvpages = PGS_PER_REQ;
278 	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
279 
280 	b = &sc->sc_req;
281 	if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*PAGE_SIZE,
282 			    0, 0, &b->bl_pglist, nhpages, 1)) {
283 		printf("%s: %" PRIu64 " pages of physical memory "
284 		       "could not be allocated, retrying...\n",
285 		       device_xname(sc->sc_dev), nhpages);
286 		return 1;	/* sleep longer */
287 	}
288 
289 	b->bl_nentries = nvpages;
290 	i = 0;
291 	TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
292 		b->bl_pages[i++] = p->phys_addr / VIRTIO_PAGE_SIZE;
293 	}
294 	KASSERT(i == nvpages);
295 
296 	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
297 		printf("%s: inflate enqueue failed.\n",
298 		       device_xname(sc->sc_dev));
299 		uvm_pglistfree(&b->bl_pglist);
300 		return 0;
301 	}
302 	if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
303 		printf("%s: inflate enqueue failed.\n",
304 		       device_xname(sc->sc_dev));
305 		uvm_pglistfree(&b->bl_pglist);
306 		return 0;
307 	}
308 	bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0,
309 	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
310 	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
311 	virtio_enqueue_commit(vsc, vq, slot, true);
312 	sc->sc_inflight += nvpages;
313 
314 	return 0;
315 }
316 
317 static int
318 inflateq_done(struct virtqueue *vq)
319 {
320 	struct virtio_softc *vsc = vq->vq_owner;
321 	struct viomb_softc *sc = device_private(vsc->sc_child);
322 
323 	mutex_enter(&sc->sc_waitlock);
324 	sc->sc_inflate_done = 1;
325 	cv_signal(&sc->sc_wait);
326 	mutex_exit(&sc->sc_waitlock);
327 
328 	return 1;
329 }
330 
331 static int
332 inflate_done(struct viomb_softc *sc)
333 {
334 	struct virtio_softc *vsc = sc->sc_virtio;
335 	struct virtqueue *vq = &sc->sc_vq[0];
336 	struct balloon_req *b;
337 	int r, slot;
338 	uint64_t nvpages;
339 	struct vm_page *p;
340 
341 	r = virtio_dequeue(vsc, vq, &slot, NULL);
342 	if (r != 0) {
343 		printf("%s: inflate dequeue failed, errno %d.\n",
344 		       device_xname(sc->sc_dev), r);
345 		return 1;
346 	}
347 	virtio_dequeue_commit(vsc, vq, slot);
348 
349 	b = &sc->sc_req;
350 	nvpages = b->bl_nentries;
351 	bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
352 			offsetof(struct balloon_req, bl_pages),
353 			sizeof(uint32_t)*nvpages,
354 			BUS_DMASYNC_POSTWRITE);
355 	while (!TAILQ_EMPTY(&b->bl_pglist)) {
356 		p = TAILQ_FIRST(&b->bl_pglist);
357 		TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
358 		TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
359 	}
360 
361 	sc->sc_inflight -= nvpages;
362 	virtio_write_device_config_4(vsc,
363 				     VIRTIO_BALLOON_CONFIG_ACTUAL,
364 				     sc->sc_actual + nvpages);
365 	viomb_read_config(sc);
366 
367 	return 1;
368 }
369 
370 /*
371  * Deflate: free previously allocated memory.
372  */
373 static int
374 deflate(struct viomb_softc *sc)
375 {
376 	struct virtio_softc *vsc = sc->sc_virtio;
377 	int i, slot;
378 	uint64_t nvpages, nhpages;
379 	struct balloon_req *b;
380 	struct vm_page *p;
381 	struct virtqueue *vq = &sc->sc_vq[1];
382 
383 	nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
384 	if (nvpages > PGS_PER_REQ)
385 		nvpages = PGS_PER_REQ;
386 	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
387 
388 	b = &sc->sc_req;
389 
390 	b->bl_nentries = nvpages;
391 	TAILQ_INIT(&b->bl_pglist);
392 	for (i = 0; i < nhpages; i++) {
393 		p = TAILQ_FIRST(&sc->sc_balloon_pages);
394 		if (p == NULL)
395 			break;
396 		TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
397 		TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
398 		b->bl_pages[i] = p->phys_addr / VIRTIO_PAGE_SIZE;
399 	}
400 
401 	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
402 		printf("%s: deflate enqueue failed.\n",
403 		       device_xname(sc->sc_dev));
404 		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
405 			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
406 			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
407 			    pageq.queue);
408 		}
409 		return 0;
410 	}
411 	if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
412 		printf("%s: deflate enqueue failed.\n",
413 		       device_xname(sc->sc_dev));
414 		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
415 			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
416 			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
417 			    pageq.queue);
418 		}
419 		return 0;
420 	}
421 	bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0,
422 	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
423 	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
424 	virtio_enqueue_commit(vsc, vq, slot, true);
425 	sc->sc_inflight -= nvpages;
426 
427 	if (!(vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST))
428 		uvm_pglistfree(&b->bl_pglist);
429 
430 	return 0;
431 }
432 
433 static int
434 deflateq_done(struct virtqueue *vq)
435 {
436 	struct virtio_softc *vsc = vq->vq_owner;
437 	struct viomb_softc *sc = device_private(vsc->sc_child);
438 
439 	mutex_enter(&sc->sc_waitlock);
440 	sc->sc_deflate_done = 1;
441 	cv_signal(&sc->sc_wait);
442 	mutex_exit(&sc->sc_waitlock);
443 
444 	return 1;
445 }
446 
447 static int
448 deflate_done(struct viomb_softc *sc)
449 {
450 	struct virtio_softc *vsc = sc->sc_virtio;
451 	struct virtqueue *vq = &sc->sc_vq[1];
452 	struct balloon_req *b;
453 	int r, slot;
454 	uint64_t nvpages;
455 
456 	r = virtio_dequeue(vsc, vq, &slot, NULL);
457 	if (r != 0) {
458 		printf("%s: deflate dequeue failed, errno %d\n",
459 		       device_xname(sc->sc_dev), r);
460 		return 1;
461 	}
462 	virtio_dequeue_commit(vsc, vq, slot);
463 
464 	b = &sc->sc_req;
465 	nvpages = b->bl_nentries;
466 	bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
467 			offsetof(struct balloon_req, bl_pages),
468 			sizeof(uint32_t)*nvpages,
469 			BUS_DMASYNC_POSTWRITE);
470 
471 	if (vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST)
472 		uvm_pglistfree(&b->bl_pglist);
473 
474 	sc->sc_inflight += nvpages;
475 	virtio_write_device_config_4(vsc,
476 				     VIRTIO_BALLOON_CONFIG_ACTUAL,
477 				     sc->sc_actual - nvpages);
478 	viomb_read_config(sc);
479 
480 	return 1;
481 }
482 
483 /*
484  * Kthread: sleeps, eventually inflate and deflate.
485  */
486 static void
487 viomb_thread(void *arg)
488 {
489 	struct viomb_softc *sc = arg;
490 	int sleeptime, r;
491 
492 	for ( ; ; ) {
493 		sleeptime = 30000;
494 		if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
495 			if (sc->sc_inflight == 0) {
496 				r = inflate(sc);
497 				if (r != 0)
498 					sleeptime = 10000;
499 				else
500 					sleeptime = 1000;
501 			} else
502 				sleeptime = 100;
503 		} else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
504 			if (sc->sc_inflight == 0)
505 				r = deflate(sc);
506 			sleeptime = 100;
507 		}
508 
509 	again:
510 		mutex_enter(&sc->sc_waitlock);
511 		if (sc->sc_inflate_done) {
512 			sc->sc_inflate_done = 0;
513 			mutex_exit(&sc->sc_waitlock);
514 			inflate_done(sc);
515 			goto again;
516 		}
517 		if (sc->sc_deflate_done) {
518 			sc->sc_deflate_done = 0;
519 			mutex_exit(&sc->sc_waitlock);
520 			deflate_done(sc);
521 			goto again;
522 		}
523 		cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
524 			     mstohz(sleeptime));
525 		mutex_exit(&sc->sc_waitlock);
526 	}
527 }
528