1 /* $NetBSD: viomb.c,v 1.2 2013/10/17 21:06:15 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2010 Minoura Makoto. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.2 2013/10/17 21:06:15 christos Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bus.h> 35 #include <sys/condvar.h> 36 #include <sys/device.h> 37 #include <sys/kthread.h> 38 #include <sys/mutex.h> 39 #include <sys/sysctl.h> 40 #include <uvm/uvm_page.h> 41 42 #include <dev/pci/pcidevs.h> 43 #include <dev/pci/pcireg.h> 44 #include <dev/pci/pcivar.h> 45 46 #include <dev/pci/virtioreg.h> 47 #include <dev/pci/virtiovar.h> 48 49 /* Configuration registers */ 50 #define VIRTIO_BALLOON_CONFIG_NUM_PAGES 0 /* 32bit */ 51 #define VIRTIO_BALLOON_CONFIG_ACTUAL 4 /* 32bit */ 52 53 /* Feature bits */ 54 #define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0) 55 #define VIRTIO_BALLOON_F_STATS_VQ (1<<1) 56 57 #define PGS_PER_REQ (256) /* 1MB, 4KB/page */ 58 59 CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */ 60 61 struct balloon_req { 62 bus_dmamap_t bl_dmamap; 63 struct pglist bl_pglist; 64 int bl_nentries; 65 uint32_t bl_pages[PGS_PER_REQ]; 66 }; 67 68 struct viomb_softc { 69 device_t sc_dev; 70 71 struct virtio_softc *sc_virtio; 72 struct virtqueue sc_vq[2]; 73 74 unsigned int sc_npages; 75 unsigned int sc_actual; 76 int sc_inflight; 77 struct balloon_req sc_req; 78 struct pglist sc_balloon_pages; 79 80 int sc_inflate_done; 81 int sc_deflate_done; 82 83 kcondvar_t sc_wait; 84 kmutex_t sc_waitlock; 85 }; 86 87 static int balloon_initialized = 0; /* multiple balloon is not allowed */ 88 89 static int viomb_match(device_t, cfdata_t, void *); 90 static void viomb_attach(device_t, device_t, void *); 91 static void viomb_read_config(struct viomb_softc *); 92 static int viomb_config_change(struct virtio_softc *); 93 static int inflate(struct viomb_softc *); 94 static int inflateq_done(struct virtqueue *); 95 static int inflate_done(struct viomb_softc *); 96 static int deflate(struct viomb_softc *); 97 static int deflateq_done(struct virtqueue *); 98 static int deflate_done(struct viomb_softc *); 99 static void viomb_thread(void *); 100 101 CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc), 102 viomb_match, viomb_attach, NULL, NULL); 103 104 static int 105 viomb_match(device_t parent, cfdata_t match, void *aux) 106 { 107 struct virtio_softc *vsc = aux; 108 109 if (vsc->sc_childdevid == PCI_PRODUCT_VIRTIO_BALLOON) 110 return 1; 111 112 return 0; 113 } 114 115 static void 116 viomb_attach(device_t parent, device_t self, void *aux) 117 { 118 struct viomb_softc *sc = device_private(self); 119 struct virtio_softc *vsc = device_private(parent); 120 const struct sysctlnode *node; 121 122 if (vsc->sc_child != NULL) { 123 aprint_normal(": child already attached for %s; " 124 "something wrong...\n", 125 device_xname(parent)); 126 return; 127 } 128 if (balloon_initialized++) { 129 aprint_normal(": balloon already exists; something wrong...\n"); 130 goto err_none; 131 } 132 aprint_normal("\n"); 133 134 sc->sc_dev = self; 135 sc->sc_virtio = vsc; 136 137 vsc->sc_child = self; 138 vsc->sc_ipl = IPL_VM; 139 vsc->sc_vqs = &sc->sc_vq[0]; 140 vsc->sc_nvqs = 2; 141 vsc->sc_config_change = viomb_config_change; 142 vsc->sc_intrhand = virtio_vq_intr; 143 144 virtio_negotiate_features(vsc, 145 VIRTIO_CONFIG_DEVICE_FEATURES); 146 if ((virtio_alloc_vq(vsc, &sc->sc_vq[0], 0, 147 sizeof(uint32_t)*PGS_PER_REQ, 1, 148 "inflate") != 0) || 149 (virtio_alloc_vq(vsc, &sc->sc_vq[1], 1, 150 sizeof(uint32_t)*PGS_PER_REQ, 1, 151 "deflate") != 0)) { 152 goto err_none; 153 } 154 sc->sc_vq[0].vq_done = inflateq_done; 155 sc->sc_vq[1].vq_done = deflateq_done; 156 157 viomb_read_config(sc); 158 sc->sc_inflight = 0; 159 TAILQ_INIT(&sc->sc_balloon_pages); 160 161 if (bus_dmamap_create(vsc->sc_dmat, sizeof(uint32_t)*PGS_PER_REQ, 162 1, sizeof(uint32_t)*PGS_PER_REQ, 0, 163 BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) { 164 aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n"); 165 goto err_vq; 166 } 167 if (bus_dmamap_load(vsc->sc_dmat, sc->sc_req.bl_dmamap, 168 &sc->sc_req.bl_pages[0], 169 sizeof(uint32_t) * PGS_PER_REQ, 170 NULL, BUS_DMA_NOWAIT)) { 171 aprint_error_dev(sc->sc_dev, "dmamap load failed.\n"); 172 goto err_dmamap; 173 } 174 175 sc->sc_inflate_done = sc->sc_deflate_done = 0; 176 mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */ 177 cv_init(&sc->sc_wait, "balloon"); 178 179 if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL, 180 viomb_thread, sc, NULL, "viomb")) { 181 aprint_error_dev(sc->sc_dev, "cannot create kthread.\n"); 182 goto err_mutex; 183 } 184 185 sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE, 186 "viomb", SYSCTL_DESCR("VirtIO Balloon status"), 187 NULL, 0, NULL, 0, 188 CTL_HW, CTL_CREATE, CTL_EOL); 189 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT, 190 "npages", SYSCTL_DESCR("VirtIO Balloon npages value"), 191 NULL, 0, &sc->sc_npages, 0, 192 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL); 193 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT, 194 "actual", SYSCTL_DESCR("VirtIO Balloon actual value"), 195 NULL, 0, &sc->sc_actual, 0, 196 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL); 197 return; 198 199 err_mutex: 200 cv_destroy(&sc->sc_wait); 201 mutex_destroy(&sc->sc_waitlock); 202 err_dmamap: 203 bus_dmamap_destroy(vsc->sc_dmat, sc->sc_req.bl_dmamap); 204 err_vq: 205 virtio_free_vq(vsc, &sc->sc_vq[1]); 206 virtio_free_vq(vsc, &sc->sc_vq[0]); 207 err_none: 208 vsc->sc_child = (void*)1; 209 return; 210 } 211 212 static void 213 viomb_read_config(struct viomb_softc *sc) 214 { 215 unsigned int reg; 216 217 /* these values are explicitly specified as little-endian */ 218 reg = virtio_read_device_config_4(sc->sc_virtio, 219 VIRTIO_BALLOON_CONFIG_NUM_PAGES); 220 sc->sc_npages = le32toh(reg); 221 222 reg = virtio_read_device_config_4(sc->sc_virtio, 223 VIRTIO_BALLOON_CONFIG_ACTUAL); 224 sc->sc_actual = le32toh(reg); 225 } 226 227 /* 228 * Config change callback: wakeup the kthread. 229 */ 230 static int 231 viomb_config_change(struct virtio_softc *vsc) 232 { 233 struct viomb_softc *sc = device_private(vsc->sc_child); 234 unsigned int old; 235 236 old = sc->sc_npages; 237 viomb_read_config(sc); 238 mutex_enter(&sc->sc_waitlock); 239 cv_signal(&sc->sc_wait); 240 mutex_exit(&sc->sc_waitlock); 241 if (sc->sc_npages > old) 242 printf("%s: inflating balloon from %u to %u.\n", 243 device_xname(sc->sc_dev), old, sc->sc_npages); 244 else if (sc->sc_npages < old) 245 printf("%s: deflating balloon from %u to %u.\n", 246 device_xname(sc->sc_dev), old, sc->sc_npages); 247 248 return 1; 249 } 250 251 /* 252 * Inflate: consume some amount of physical memory. 253 */ 254 static int 255 inflate(struct viomb_softc *sc) 256 { 257 struct virtio_softc *vsc = sc->sc_virtio; 258 int i, slot; 259 uint64_t nvpages, nhpages; 260 struct balloon_req *b; 261 struct vm_page *p; 262 struct virtqueue *vq = &sc->sc_vq[0]; 263 264 if (sc->sc_inflight) 265 return 0; 266 nvpages = sc->sc_npages - sc->sc_actual; 267 if (nvpages > PGS_PER_REQ) 268 nvpages = PGS_PER_REQ; 269 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE; 270 271 b = &sc->sc_req; 272 if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*PAGE_SIZE, 273 0, 0, &b->bl_pglist, nhpages, 1)) { 274 printf("%s: %" PRIu64 " pages of physical memory " 275 "could not be allocated, retrying...\n", 276 device_xname(sc->sc_dev), nhpages); 277 return 1; /* sleep longer */ 278 } 279 280 b->bl_nentries = nvpages; 281 i = 0; 282 TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) { 283 b->bl_pages[i++] = p->phys_addr / VIRTIO_PAGE_SIZE; 284 } 285 KASSERT(i == nvpages); 286 287 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) { 288 printf("%s: inflate enqueue failed.\n", 289 device_xname(sc->sc_dev)); 290 uvm_pglistfree(&b->bl_pglist); 291 return 0; 292 } 293 if (virtio_enqueue_reserve(vsc, vq, slot, 1)) { 294 printf("%s: inflate enqueue failed.\n", 295 device_xname(sc->sc_dev)); 296 uvm_pglistfree(&b->bl_pglist); 297 return 0; 298 } 299 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0, sizeof(uint32_t)*nvpages, 300 BUS_DMASYNC_PREWRITE); 301 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true); 302 virtio_enqueue_commit(vsc, vq, slot, true); 303 sc->sc_inflight += nvpages; 304 305 return 0; 306 } 307 308 static int 309 inflateq_done(struct virtqueue *vq) 310 { 311 struct virtio_softc *vsc = vq->vq_owner; 312 struct viomb_softc *sc = device_private(vsc->sc_child); 313 314 mutex_enter(&sc->sc_waitlock); 315 sc->sc_inflate_done = 1; 316 cv_signal(&sc->sc_wait); 317 mutex_exit(&sc->sc_waitlock); 318 319 return 1; 320 } 321 322 static int 323 inflate_done(struct viomb_softc *sc) 324 { 325 struct virtio_softc *vsc = sc->sc_virtio; 326 struct virtqueue *vq = &sc->sc_vq[0]; 327 struct balloon_req *b; 328 int r, slot; 329 uint64_t nvpages; 330 struct vm_page *p; 331 332 r = virtio_dequeue(vsc, vq, &slot, NULL); 333 if (r != 0) { 334 printf("%s: inflate dequeue failed, errno %d.\n", 335 device_xname(sc->sc_dev), r); 336 return 1; 337 } 338 virtio_dequeue_commit(vsc, vq, slot); 339 340 b = &sc->sc_req; 341 nvpages = b->bl_nentries; 342 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 343 offsetof(struct balloon_req, bl_pages), 344 sizeof(uint32_t)*nvpages, 345 BUS_DMASYNC_POSTWRITE); 346 while (!TAILQ_EMPTY(&b->bl_pglist)) { 347 p = TAILQ_FIRST(&b->bl_pglist); 348 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue); 349 TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue); 350 } 351 352 sc->sc_inflight -= nvpages; 353 virtio_write_device_config_4(vsc, 354 VIRTIO_BALLOON_CONFIG_ACTUAL, 355 sc->sc_actual + nvpages); 356 viomb_read_config(sc); 357 358 return 1; 359 } 360 361 /* 362 * Deflate: free previously allocated memory. 363 */ 364 static int 365 deflate(struct viomb_softc *sc) 366 { 367 struct virtio_softc *vsc = sc->sc_virtio; 368 int i, slot; 369 uint64_t nvpages, nhpages; 370 struct balloon_req *b; 371 struct vm_page *p; 372 struct virtqueue *vq = &sc->sc_vq[1]; 373 374 nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages; 375 if (nvpages > PGS_PER_REQ) 376 nvpages = PGS_PER_REQ; 377 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE; 378 379 b = &sc->sc_req; 380 381 b->bl_nentries = nvpages; 382 TAILQ_INIT(&b->bl_pglist); 383 for (i = 0; i < nhpages; i++) { 384 p = TAILQ_FIRST(&sc->sc_balloon_pages); 385 TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue); 386 TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue); 387 b->bl_pages[i] = p->phys_addr / VIRTIO_PAGE_SIZE; 388 } 389 390 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) { 391 printf("%s: deflate enqueue failed.\n", 392 device_xname(sc->sc_dev)); 393 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) { 394 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue); 395 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p, pageq.queue); 396 } 397 return 0; 398 } 399 if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) { 400 printf("%s: deflate enqueue failed.\n", 401 device_xname(sc->sc_dev)); 402 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) { 403 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue); 404 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p, pageq.queue); 405 } 406 return 0; 407 } 408 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0, sizeof(uint32_t)*nvpages, 409 BUS_DMASYNC_PREWRITE); 410 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true); 411 virtio_enqueue_commit(vsc, vq, slot, true); 412 sc->sc_inflight -= nvpages; 413 414 if (!(vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST)) 415 uvm_pglistfree(&b->bl_pglist); 416 417 return 0; 418 } 419 420 static int 421 deflateq_done(struct virtqueue *vq) 422 { 423 struct virtio_softc *vsc = vq->vq_owner; 424 struct viomb_softc *sc = device_private(vsc->sc_child); 425 426 mutex_enter(&sc->sc_waitlock); 427 sc->sc_deflate_done = 1; 428 cv_signal(&sc->sc_wait); 429 mutex_exit(&sc->sc_waitlock); 430 431 return 1; 432 } 433 434 static int 435 deflate_done(struct viomb_softc *sc) 436 { 437 struct virtio_softc *vsc = sc->sc_virtio; 438 struct virtqueue *vq = &sc->sc_vq[1]; 439 struct balloon_req *b; 440 int r, slot; 441 uint64_t nvpages; 442 443 r = virtio_dequeue(vsc, vq, &slot, NULL); 444 if (r != 0) { 445 printf("%s: deflate dequeue failed, errno %d\n", 446 device_xname(sc->sc_dev), r); 447 return 1; 448 } 449 virtio_dequeue_commit(vsc, vq, slot); 450 451 b = &sc->sc_req; 452 nvpages = b->bl_nentries; 453 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 454 offsetof(struct balloon_req, bl_pages), 455 sizeof(uint32_t)*nvpages, 456 BUS_DMASYNC_POSTWRITE); 457 458 if (vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST) 459 uvm_pglistfree(&b->bl_pglist); 460 461 sc->sc_inflight += nvpages; 462 virtio_write_device_config_4(vsc, 463 VIRTIO_BALLOON_CONFIG_ACTUAL, 464 sc->sc_actual - nvpages); 465 viomb_read_config(sc); 466 467 return 1; 468 } 469 470 /* 471 * Kthread: sleeps, eventually inflate and deflate. 472 */ 473 static void 474 viomb_thread(void *arg) 475 { 476 struct viomb_softc *sc = arg; 477 int sleeptime, r; 478 479 for ( ; ; ) { 480 sleeptime = 30000; 481 if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) { 482 if (sc->sc_inflight == 0) { 483 r = inflate(sc); 484 if (r != 0) 485 sleeptime = 10000; 486 else 487 sleeptime = 1000; 488 } else 489 sleeptime = 100; 490 } else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) { 491 if (sc->sc_inflight == 0) 492 r = deflate(sc); 493 sleeptime = 100; 494 } 495 496 again: 497 mutex_enter(&sc->sc_waitlock); 498 if (sc->sc_inflate_done) { 499 sc->sc_inflate_done = 0; 500 mutex_exit(&sc->sc_waitlock); 501 inflate_done(sc); 502 goto again; 503 } 504 if (sc->sc_deflate_done) { 505 sc->sc_deflate_done = 0; 506 mutex_exit(&sc->sc_waitlock); 507 deflate_done(sc); 508 goto again; 509 } 510 cv_timedwait(&sc->sc_wait, &sc->sc_waitlock, 511 mstohz(sleeptime)); 512 mutex_exit(&sc->sc_waitlock); 513 } 514 } 515