1 /* $NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.41 2023/02/25 00:35:52 riastradh Exp $"); 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kmem.h> 35 #include <sys/queue.h> 36 #include <sys/extent.h> 37 #include <sys/kernel.h> 38 #include <sys/mutex.h> 39 #include <uvm/uvm.h> 40 41 #include <xen/hypervisor.h> 42 #include <xen/xen.h> 43 #include <xen/granttables.h> 44 45 #include "opt_xen.h" 46 47 /* #define XENDEBUG */ 48 #ifdef XENDEBUG 49 #define DPRINTF(x) printf x 50 #else 51 #define DPRINTF(x) 52 #endif 53 54 /* External tools reserve first few grant table entries. */ 55 #define NR_RESERVED_ENTRIES 8 56 57 /* current supported version */ 58 int gnt_v = 0; 59 #define GNT_ISV1 (gnt_v == 1) 60 #define GNT_ISV2 (gnt_v == 2) 61 /* Current number of frames making up the grant table */ 62 int gnt_nr_grant_frames; 63 /* Maximum number of frames that can make up the grant table */ 64 int gnt_max_grant_frames; 65 66 /* table of free grant entries */ 67 grant_ref_t *gnt_entries; 68 /* last free entry */ 69 int last_gnt_entry; 70 /* empty entry in the list */ 71 #define XENGNT_NO_ENTRY 0xffffffff 72 73 /* VM address of the grant table */ 74 #define NR_GRANT_ENTRIES_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t)) 75 #define NR_GRANT_ENTRIES_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t)) 76 #define NR_GRANT_ENTRIES_PER_PAGE \ 77 ((gnt_v == 1) ? NR_GRANT_ENTRIES_PER_PAGE_V1 : NR_GRANT_ENTRIES_PER_PAGE_V2) 78 #define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) 79 80 union { 81 grant_entry_v1_t *gntt_v1; 82 grant_entry_v2_t *gntt_v2; 83 void *gntt; 84 } grant_table; 85 86 /* Number of grant status frames (v2 only)*/ 87 int gnt_status_frames; 88 89 grant_status_t *grant_status; 90 kmutex_t grant_lock; 91 92 static grant_ref_t xengnt_get_entry(void); 93 static void xengnt_free_entry(grant_ref_t); 94 static int xengnt_more_entries(void); 95 static int xengnt_map_status(void); 96 static bool xengnt_finish_init(void); 97 98 void 99 xengnt_init(void) 100 { 101 struct gnttab_query_size query; 102 int rc; 103 int nr_grant_entries; 104 int i; 105 106 /* first try to see which version we support */ 107 struct gnttab_set_version gntversion; 108 gnt_v = gntversion.version = 2; 109 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1); 110 if (rc < 0 || gntversion.version != 2) { 111 aprint_debug("GNTTABOP_set_version 2 failed (%d), " 112 "fall back to version 1\n", rc); 113 gnt_v = 1; 114 } 115 116 query.dom = DOMID_SELF; 117 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 118 if ((rc < 0) || (query.status != GNTST_okay)) 119 gnt_max_grant_frames = 4; /* Legacy max number of frames */ 120 else 121 gnt_max_grant_frames = query.max_nr_frames; 122 123 /* 124 * Always allocate max number of grant frames, never expand in runtime 125 */ 126 gnt_nr_grant_frames = gnt_max_grant_frames; 127 128 nr_grant_entries = 129 gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE; 130 131 grant_table.gntt = (void *)uvm_km_alloc(kernel_map, 132 gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY); 133 if (grant_table.gntt == NULL) 134 panic("xengnt_init() table no VM space"); 135 136 gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t), 137 KM_SLEEP); 138 for (i = 0; i <= nr_grant_entries; i++) 139 gnt_entries[i] = XENGNT_NO_ENTRY; 140 141 if (GNT_ISV2) { 142 gnt_status_frames = 143 round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE; 144 grant_status = (void *)uvm_km_alloc(kernel_map, 145 gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY); 146 if (grant_status == NULL) 147 panic("xengnt_init() status no VM space"); 148 } 149 150 mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM); 151 152 xengnt_finish_init(); 153 } 154 155 /* 156 * Resume grant table state 157 */ 158 bool 159 xengnt_resume(void) 160 { 161 int rc; 162 163 struct gnttab_set_version gntversion; 164 KASSERT(gnt_v == 1 || gnt_v == 2); 165 gntversion.version = gnt_v; 166 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1); 167 168 if (GNT_ISV2) { 169 if (rc < 0 || gntversion.version != 2) { 170 panic("GNTTABOP_set_version 2 failed %d", rc); 171 } 172 } else { 173 if (rc == 0 && gntversion.version != 1) { 174 panic("GNTTABOP_set_version 1 failed"); 175 } 176 } 177 178 return xengnt_finish_init(); 179 } 180 181 static bool 182 xengnt_finish_init(void) 183 { 184 int previous_nr_grant_frames = gnt_nr_grant_frames; 185 186 last_gnt_entry = 0; 187 gnt_nr_grant_frames = 0; 188 189 mutex_enter(&grant_lock); 190 while (gnt_nr_grant_frames < previous_nr_grant_frames) { 191 if (xengnt_more_entries() != 0) 192 panic("xengnt_resume: can't restore grant frames"); 193 } 194 if (GNT_ISV2) 195 xengnt_map_status(); 196 mutex_exit(&grant_lock); 197 return true; 198 } 199 200 /* 201 * Suspend grant table state 202 */ 203 bool 204 xengnt_suspend(void) { 205 206 int i; 207 208 mutex_enter(&grant_lock); 209 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 210 211 for (i = 0; i < last_gnt_entry; i++) { 212 /* invalidate all grant entries (necessary for resume) */ 213 gnt_entries[i] = XENGNT_NO_ENTRY; 214 } 215 216 /* Remove virtual => machine mapping for grant table */ 217 pmap_kremove((vaddr_t)grant_table.gntt, gnt_nr_grant_frames * PAGE_SIZE); 218 219 if (GNT_ISV2) { 220 /* Remove virtual => machine mapping for status table */ 221 pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE); 222 } 223 224 pmap_update(pmap_kernel()); 225 mutex_exit(&grant_lock); 226 return true; 227 } 228 229 /* 230 * Get status frames and enter them into the VA space. 231 */ 232 static int 233 xengnt_map_status(void) 234 { 235 uint64_t *pages; 236 size_t sz; 237 KASSERT(mutex_owned(&grant_lock)); 238 KASSERT(GNT_ISV2); 239 240 sz = gnt_status_frames * sizeof(*pages); 241 pages = kmem_alloc(sz, KM_NOSLEEP); 242 if (pages == NULL) 243 return ENOMEM; 244 245 #ifdef XENPV 246 gnttab_get_status_frames_t getstatus; 247 int err; 248 249 getstatus.dom = DOMID_SELF; 250 getstatus.nr_frames = gnt_status_frames; 251 set_xen_guest_handle(getstatus.frame_list, pages); 252 253 /* 254 * get the status frames, and return the list of their virtual 255 * addresses in 'pages' 256 */ 257 if ((err = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, 258 &getstatus, 1)) != 0) 259 panic("%s: get_status_frames failed: %d", __func__, err); 260 if (getstatus.status != GNTST_okay) { 261 aprint_error("%s: get_status_frames returned %d\n", 262 __func__, getstatus.status); 263 kmem_free(pages, sz); 264 return ENOMEM; 265 } 266 #else /* XENPV */ 267 for (int i = 0; i < gnt_status_frames; i++) { 268 struct vm_page *pg; 269 struct xen_add_to_physmap xmap; 270 271 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); 272 pages[i] = atop(uvm_vm_page_to_phys(pg)); 273 274 xmap.domid = DOMID_SELF; 275 xmap.space = XENMAPSPACE_grant_table; 276 xmap.idx = i | XENMAPIDX_grant_table_status; 277 xmap.gpfn = pages[i]; 278 279 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0) 280 panic("%s: Unable to add grant tables\n", __func__); 281 } 282 #endif /* XENPV */ 283 /* 284 * map between status_table addresses and the machine addresses of 285 * the status table frames 286 */ 287 for (int i = 0; i < gnt_status_frames; i++) { 288 pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE, 289 ((paddr_t)pages[i]) << PAGE_SHIFT, 290 VM_PROT_WRITE, 0); 291 } 292 pmap_update(pmap_kernel()); 293 294 kmem_free(pages, sz); 295 return 0; 296 } 297 298 /* 299 * Add another page to the grant table 300 * Returns 0 on success, ENOMEM on failure 301 */ 302 static int 303 xengnt_more_entries(void) 304 { 305 gnttab_setup_table_t setup; 306 u_long *pages; 307 int nframes_new = gnt_nr_grant_frames + 1; 308 int i, start_gnt; 309 size_t sz; 310 KASSERT(mutex_owned(&grant_lock)); 311 312 if (gnt_nr_grant_frames == gnt_max_grant_frames) 313 return ENOMEM; 314 315 sz = nframes_new * sizeof(*pages); 316 pages = kmem_alloc(sz, KM_NOSLEEP); 317 if (pages == NULL) 318 return ENOMEM; 319 320 if (xen_feature(XENFEAT_auto_translated_physmap)) { 321 /* 322 * Note: Although we allocate space for the entire 323 * table, in this mode we only update one entry at a 324 * time. 325 */ 326 struct vm_page *pg; 327 struct xen_add_to_physmap xmap; 328 329 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); 330 pages[gnt_nr_grant_frames] = atop(uvm_vm_page_to_phys(pg)); 331 332 xmap.domid = DOMID_SELF; 333 xmap.space = XENMAPSPACE_grant_table; 334 xmap.idx = gnt_nr_grant_frames; 335 xmap.gpfn = pages[gnt_nr_grant_frames]; 336 337 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0) 338 panic("%s: Unable to add grant frames\n", __func__); 339 340 } else { 341 setup.dom = DOMID_SELF; 342 setup.nr_frames = nframes_new; 343 set_xen_guest_handle(setup.frame_list, pages); 344 345 /* 346 * setup the grant table, made of nframes_new frames 347 * and return the list of their virtual addresses 348 * in 'pages' 349 */ 350 if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0) 351 panic("%s: setup table failed", __func__); 352 if (setup.status != GNTST_okay) { 353 aprint_error("%s: setup table returned %d\n", 354 __func__, setup.status); 355 kmem_free(pages, sz); 356 return ENOMEM; 357 } 358 } 359 360 DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n", 361 pages[gnt_nr_grant_frames], 362 (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE)); 363 364 /* 365 * map between grant_table addresses and the machine addresses of 366 * the grant table frames 367 */ 368 pmap_kenter_ma(((vaddr_t)grant_table.gntt) + gnt_nr_grant_frames * PAGE_SIZE, 369 ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT, 370 VM_PROT_WRITE, 0); 371 pmap_update(pmap_kernel()); 372 373 /* 374 * add the grant entries associated to the last grant table frame 375 * and mark them as free. Prevent using the first grants (from 0 to 8) 376 * since they are used by the tools. 377 */ 378 start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) < 379 (NR_RESERVED_ENTRIES + 1) ? 380 (NR_RESERVED_ENTRIES + 1) : 381 (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE); 382 for (i = start_gnt; 383 i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE; 384 i++) { 385 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 386 gnt_entries[last_gnt_entry] = i; 387 last_gnt_entry++; 388 } 389 gnt_nr_grant_frames = nframes_new; 390 kmem_free(pages, sz); 391 return 0; 392 } 393 394 /* 395 * Returns a reference to the first free entry in grant table 396 */ 397 static grant_ref_t 398 xengnt_get_entry(void) 399 { 400 grant_ref_t entry; 401 static struct timeval xengnt_nonmemtime; 402 static const struct timeval xengnt_nonmemintvl = {5,0}; 403 404 KASSERT(mutex_owned(&grant_lock)); 405 406 if (__predict_false(last_gnt_entry == 0)) { 407 if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl)) 408 printf("xengnt_get_entry: out of grant " 409 "table entries\n"); 410 return XENGNT_NO_ENTRY; 411 } 412 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 413 last_gnt_entry--; 414 entry = gnt_entries[last_gnt_entry]; 415 gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY; 416 KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES); 417 KASSERT(last_gnt_entry >= 0); 418 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE); 419 return entry; 420 } 421 422 /* 423 * Mark the grant table entry as free 424 */ 425 static void 426 xengnt_free_entry(grant_ref_t entry) 427 { 428 mutex_enter(&grant_lock); 429 KASSERT(entry > NR_RESERVED_ENTRIES); 430 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 431 KASSERT(last_gnt_entry >= 0); 432 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE); 433 gnt_entries[last_gnt_entry] = entry; 434 last_gnt_entry++; 435 mutex_exit(&grant_lock); 436 } 437 438 int 439 xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp) 440 { 441 mutex_enter(&grant_lock); 442 443 *entryp = xengnt_get_entry(); 444 if (__predict_false(*entryp == XENGNT_NO_ENTRY)) { 445 mutex_exit(&grant_lock); 446 return ENOMEM; 447 } 448 449 if (GNT_ISV2) { 450 grant_table.gntt_v2[*entryp].full_page.frame = ma >> PAGE_SHIFT; 451 grant_table.gntt_v2[*entryp].hdr.domid = dom; 452 /* 453 * ensure that the above values reach global visibility 454 * before permitting frame's access (done when we set flags) 455 */ 456 xen_wmb(); 457 grant_table.gntt_v2[*entryp].hdr.flags = 458 GTF_permit_access | (ro ? GTF_readonly : 0); 459 } else { 460 grant_table.gntt_v1[*entryp].frame = ma >> PAGE_SHIFT; 461 grant_table.gntt_v1[*entryp].domid = dom; 462 /* 463 * ensure that the above values reach global visibility 464 * before permitting frame's access (done when we set flags) 465 */ 466 xen_wmb(); 467 grant_table.gntt_v1[*entryp].flags = 468 GTF_permit_access | (ro ? GTF_readonly : 0); 469 } 470 mutex_exit(&grant_lock); 471 return 0; 472 } 473 474 static inline uint16_t 475 xen_atomic_cmpxchg16(volatile uint16_t *ptr, uint16_t val, uint16_t newval) 476 { 477 unsigned long result; 478 479 __asm volatile(__LOCK_PREFIX 480 "cmpxchgw %w1,%2" 481 :"=a" (result) 482 :"q"(newval), "m" (*ptr), "0" (val) 483 :"memory"); 484 485 return result; 486 } 487 488 void 489 xengnt_revoke_access(grant_ref_t entry) 490 { 491 if (GNT_ISV2) { 492 grant_table.gntt_v2[entry].hdr.flags = 0; 493 xen_mb(); /* Concurrent access by hypervisor */ 494 495 if (__predict_false( 496 (grant_status[entry] & (GTF_reading|GTF_writing)) != 0)) { 497 printf("xengnt_revoke_access(%u): still in use\n", 498 entry); 499 } else { 500 501 /* 502 * The read of grant_status needs to have acquire 503 * semantics. 504 * Reads already have that on x86, so need only protect 505 * against compiler reordering. May need full barrier 506 * on other architectures. 507 */ 508 __insn_barrier(); 509 } 510 } else { 511 uint16_t flags, nflags; 512 513 nflags = grant_table.gntt_v1[entry].flags; 514 515 do { 516 if ((flags = nflags) & (GTF_reading|GTF_writing)) 517 panic("xengnt_revoke_access: still in use"); 518 nflags = xen_atomic_cmpxchg16( 519 &grant_table.gntt_v1[entry].flags, flags, 0); 520 } while (nflags != flags); 521 522 } 523 xengnt_free_entry(entry); 524 } 525 526 int 527 xengnt_status(grant_ref_t entry) 528 { 529 if (GNT_ISV2) 530 return grant_status[entry] & (GTF_reading|GTF_writing); 531 else 532 return (grant_table.gntt_v1[entry].flags & (GTF_reading|GTF_writing)); 533 } 534