1 /* $NetBSD: xengnt.c,v 1.38 2020/05/13 16:13:14 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.38 2020/05/13 16:13:14 jdolecek Exp $"); 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kmem.h> 35 #include <sys/queue.h> 36 #include <sys/extent.h> 37 #include <sys/kernel.h> 38 #include <sys/mutex.h> 39 #include <uvm/uvm.h> 40 41 #include <xen/hypervisor.h> 42 #include <xen/xen.h> 43 #include <xen/granttables.h> 44 45 #include "opt_xen.h" 46 47 /* #define XENDEBUG */ 48 #ifdef XENDEBUG 49 #define DPRINTF(x) printf x 50 #else 51 #define DPRINTF(x) 52 #endif 53 54 /* External tools reserve first few grant table entries. */ 55 #define NR_RESERVED_ENTRIES 8 56 57 /* Current number of frames making up the grant table */ 58 int gnt_nr_grant_frames; 59 /* Maximum number of frames that can make up the grant table */ 60 int gnt_max_grant_frames; 61 62 /* table of free grant entries */ 63 grant_ref_t *gnt_entries; 64 /* last free entry */ 65 int last_gnt_entry; 66 /* empty entry in the list */ 67 #define XENGNT_NO_ENTRY 0xffffffff 68 69 /* VM address of the grant table */ 70 #define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t)) 71 #define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) 72 73 grant_entry_v2_t *grant_table; 74 /* Number of grant status frames */ 75 int gnt_status_frames; 76 77 grant_status_t *grant_status; 78 kmutex_t grant_lock; 79 80 static grant_ref_t xengnt_get_entry(void); 81 static void xengnt_free_entry(grant_ref_t); 82 static int xengnt_more_entries(void); 83 static int xengnt_map_status(void); 84 85 void 86 xengnt_init(void) 87 { 88 struct gnttab_query_size query; 89 int rc; 90 int nr_grant_entries; 91 int i; 92 93 query.dom = DOMID_SELF; 94 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); 95 if ((rc < 0) || (query.status != GNTST_okay)) 96 gnt_max_grant_frames = 4; /* Legacy max number of frames */ 97 else 98 gnt_max_grant_frames = query.max_nr_frames; 99 100 /* 101 * Always allocate max number of grant frames, never expand in runtime 102 */ 103 gnt_nr_grant_frames = gnt_max_grant_frames; 104 105 nr_grant_entries = 106 gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE; 107 108 grant_table = (void *)uvm_km_alloc(kernel_map, 109 gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY); 110 if (grant_table == NULL) 111 panic("xengnt_init() table no VM space"); 112 113 gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t), 114 KM_SLEEP); 115 for (i = 0; i <= nr_grant_entries; i++) 116 gnt_entries[i] = XENGNT_NO_ENTRY; 117 118 gnt_status_frames = 119 round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE; 120 grant_status = (void *)uvm_km_alloc(kernel_map, 121 gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY); 122 if (grant_status == NULL) 123 panic("xengnt_init() status no VM space"); 124 125 mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM); 126 127 xengnt_resume(); 128 } 129 130 /* 131 * Resume grant table state 132 */ 133 bool 134 xengnt_resume(void) 135 { 136 int rc; 137 int previous_nr_grant_frames = gnt_nr_grant_frames; 138 139 struct gnttab_set_version gntversion; 140 gntversion.version = 2; 141 rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1); 142 if (rc < 0 || gntversion.version != 2) 143 panic("GNTTABOP_set_version 2 failed %d", rc); 144 145 last_gnt_entry = 0; 146 gnt_nr_grant_frames = 0; 147 148 mutex_enter(&grant_lock); 149 while (gnt_nr_grant_frames < previous_nr_grant_frames) { 150 if (xengnt_more_entries() != 0) 151 panic("xengnt_resume: can't restore grant frames"); 152 } 153 xengnt_map_status(); 154 mutex_exit(&grant_lock); 155 return true; 156 } 157 158 /* 159 * Suspend grant table state 160 */ 161 bool 162 xengnt_suspend(void) { 163 164 int i; 165 166 mutex_enter(&grant_lock); 167 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 168 169 for (i = 0; i < last_gnt_entry; i++) { 170 /* invalidate all grant entries (necessary for resume) */ 171 gnt_entries[i] = XENGNT_NO_ENTRY; 172 } 173 174 /* Remove virtual => machine mapping for grant table */ 175 pmap_kremove((vaddr_t)grant_table, gnt_nr_grant_frames * PAGE_SIZE); 176 177 /* Remove virtual => machine mapping for status table */ 178 pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE); 179 180 pmap_update(pmap_kernel()); 181 mutex_exit(&grant_lock); 182 return true; 183 } 184 185 /* 186 * Get status frames and enter them into the VA space. 187 */ 188 static int 189 xengnt_map_status(void) 190 { 191 uint64_t *pages; 192 size_t sz; 193 KASSERT(mutex_owned(&grant_lock)); 194 195 sz = gnt_status_frames * sizeof(*pages); 196 pages = kmem_alloc(sz, KM_NOSLEEP); 197 if (pages == NULL) 198 return ENOMEM; 199 200 #ifdef XENPV 201 gnttab_get_status_frames_t getstatus; 202 int err; 203 204 getstatus.dom = DOMID_SELF; 205 getstatus.nr_frames = gnt_status_frames; 206 set_xen_guest_handle(getstatus.frame_list, pages); 207 208 /* 209 * get the status frames, and return the list of their virtual 210 * addresses in 'pages' 211 */ 212 if ((err = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, 213 &getstatus, 1)) != 0) 214 panic("%s: get_status_frames failed: %d", __func__, err); 215 if (getstatus.status != GNTST_okay) { 216 aprint_error("%s: get_status_frames returned %d\n", 217 __func__, getstatus.status); 218 kmem_free(pages, sz); 219 return ENOMEM; 220 } 221 #else /* XENPV */ 222 for (int i = 0; i < gnt_status_frames; i++) { 223 struct vm_page *pg; 224 struct xen_add_to_physmap xmap; 225 226 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); 227 pages[i] = atop(uvm_vm_page_to_phys(pg)); 228 229 xmap.domid = DOMID_SELF; 230 xmap.space = XENMAPSPACE_grant_table; 231 xmap.idx = i | XENMAPIDX_grant_table_status; 232 xmap.gpfn = pages[i]; 233 234 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0) 235 panic("%s: Unable to add grant tables\n", __func__); 236 } 237 #endif /* XENPV */ 238 /* 239 * map between status_table addresses and the machine addresses of 240 * the status table frames 241 */ 242 for (int i = 0; i < gnt_status_frames; i++) { 243 pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE, 244 ((paddr_t)pages[i]) << PAGE_SHIFT, 245 VM_PROT_WRITE, 0); 246 } 247 pmap_update(pmap_kernel()); 248 249 kmem_free(pages, sz); 250 return 0; 251 } 252 253 /* 254 * Add another page to the grant table 255 * Returns 0 on success, ENOMEM on failure 256 */ 257 static int 258 xengnt_more_entries(void) 259 { 260 gnttab_setup_table_t setup; 261 u_long *pages; 262 int nframes_new = gnt_nr_grant_frames + 1; 263 int i, start_gnt; 264 size_t sz; 265 KASSERT(mutex_owned(&grant_lock)); 266 267 if (gnt_nr_grant_frames == gnt_max_grant_frames) 268 return ENOMEM; 269 270 sz = nframes_new * sizeof(*pages); 271 pages = kmem_alloc(sz, KM_NOSLEEP); 272 if (pages == NULL) 273 return ENOMEM; 274 275 if (xen_feature(XENFEAT_auto_translated_physmap)) { 276 /* 277 * Note: Although we allocate space for the entire 278 * table, in this mode we only update one entry at a 279 * time. 280 */ 281 struct vm_page *pg; 282 struct xen_add_to_physmap xmap; 283 284 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE|UVM_PGA_ZERO); 285 pages[gnt_nr_grant_frames] = atop(uvm_vm_page_to_phys(pg)); 286 287 xmap.domid = DOMID_SELF; 288 xmap.space = XENMAPSPACE_grant_table; 289 xmap.idx = gnt_nr_grant_frames; 290 xmap.gpfn = pages[gnt_nr_grant_frames]; 291 292 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xmap) < 0) 293 panic("%s: Unable to add grant frames\n", __func__); 294 295 } else { 296 setup.dom = DOMID_SELF; 297 setup.nr_frames = nframes_new; 298 set_xen_guest_handle(setup.frame_list, pages); 299 300 /* 301 * setup the grant table, made of nframes_new frames 302 * and return the list of their virtual addresses 303 * in 'pages' 304 */ 305 if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0) 306 panic("%s: setup table failed", __func__); 307 if (setup.status != GNTST_okay) { 308 aprint_error("%s: setup table returned %d\n", 309 __func__, setup.status); 310 kmem_free(pages, sz); 311 return ENOMEM; 312 } 313 } 314 315 DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n", 316 pages[gnt_nr_grant_frames], 317 (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE)); 318 319 /* 320 * map between grant_table addresses and the machine addresses of 321 * the grant table frames 322 */ 323 pmap_kenter_ma(((vaddr_t)grant_table) + gnt_nr_grant_frames * PAGE_SIZE, 324 ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT, 325 VM_PROT_WRITE, 0); 326 pmap_update(pmap_kernel()); 327 328 /* 329 * add the grant entries associated to the last grant table frame 330 * and mark them as free. Prevent using the first grants (from 0 to 8) 331 * since they are used by the tools. 332 */ 333 start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) < 334 (NR_RESERVED_ENTRIES + 1) ? 335 (NR_RESERVED_ENTRIES + 1) : 336 (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE); 337 for (i = start_gnt; 338 i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE; 339 i++) { 340 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 341 gnt_entries[last_gnt_entry] = i; 342 last_gnt_entry++; 343 } 344 gnt_nr_grant_frames = nframes_new; 345 kmem_free(pages, sz); 346 return 0; 347 } 348 349 /* 350 * Returns a reference to the first free entry in grant table 351 */ 352 static grant_ref_t 353 xengnt_get_entry(void) 354 { 355 grant_ref_t entry; 356 static struct timeval xengnt_nonmemtime; 357 static const struct timeval xengnt_nonmemintvl = {5,0}; 358 359 KASSERT(mutex_owned(&grant_lock)); 360 361 if (__predict_false(last_gnt_entry == 0)) { 362 if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl)) 363 printf("xengnt_get_entry: out of grant " 364 "table entries\n"); 365 return XENGNT_NO_ENTRY; 366 } 367 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 368 last_gnt_entry--; 369 entry = gnt_entries[last_gnt_entry]; 370 gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY; 371 KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES); 372 KASSERT(last_gnt_entry >= 0); 373 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE); 374 return entry; 375 } 376 377 /* 378 * Mark the grant table entry as free 379 */ 380 static void 381 xengnt_free_entry(grant_ref_t entry) 382 { 383 mutex_enter(&grant_lock); 384 KASSERT(entry > NR_RESERVED_ENTRIES); 385 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); 386 KASSERT(last_gnt_entry >= 0); 387 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE); 388 gnt_entries[last_gnt_entry] = entry; 389 last_gnt_entry++; 390 mutex_exit(&grant_lock); 391 } 392 393 int 394 xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp) 395 { 396 mutex_enter(&grant_lock); 397 398 *entryp = xengnt_get_entry(); 399 if (__predict_false(*entryp == XENGNT_NO_ENTRY)) { 400 mutex_exit(&grant_lock); 401 return ENOMEM; 402 } 403 404 grant_table[*entryp].full_page.frame = ma >> PAGE_SHIFT; 405 grant_table[*entryp].hdr.domid = dom; 406 /* 407 * ensure that the above values reach global visibility 408 * before permitting frame's access (done when we set flags) 409 */ 410 xen_rmb(); 411 grant_table[*entryp].hdr.flags = 412 GTF_permit_access | (ro ? GTF_readonly : 0); 413 mutex_exit(&grant_lock); 414 return 0; 415 } 416 417 void 418 xengnt_revoke_access(grant_ref_t entry) 419 { 420 grant_table[entry].hdr.flags = 0; 421 xen_mb(); /* Concurrent access by hypervisor */ 422 423 if (__predict_false((grant_status[entry] & (GTF_reading|GTF_writing)) 424 != 0)) 425 printf("xengnt_revoke_access(%u): still in use\n", 426 entry); 427 else { 428 429 /* 430 * The read of grant_status needs to have acquire semantics. 431 * Reads already have that on x86, so need only protect 432 * against compiler reordering. May need full barrier 433 * on other architectures. 434 */ 435 __insn_barrier(); 436 } 437 xengnt_free_entry(entry); 438 } 439 440 int 441 xengnt_status(grant_ref_t entry) 442 { 443 return grant_status[entry] & (GTF_reading|GTF_writing); 444 } 445