xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_vm.c (revision b9b43e848e8a93ea5f612b51b05c8bb8bf612ee2)
17c8c0b82SPatrick Mooney /*
27c8c0b82SPatrick Mooney  * This file and its contents are supplied under the terms of the
37c8c0b82SPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
47c8c0b82SPatrick Mooney  * You may only use this file in accordance with the terms of version
57c8c0b82SPatrick Mooney  * 1.0 of the CDDL.
67c8c0b82SPatrick Mooney  *
77c8c0b82SPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
87c8c0b82SPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
97c8c0b82SPatrick Mooney  * http://www.illumos.org/license/CDDL.
107c8c0b82SPatrick Mooney  */
117c8c0b82SPatrick Mooney /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
127c8c0b82SPatrick Mooney 
137c8c0b82SPatrick Mooney /*
147c8c0b82SPatrick Mooney  * Copyright 2019 Joyent, Inc.
153a0fa64cSPatrick Mooney  * Copyright 2023 Oxide Computer Company
167c8c0b82SPatrick Mooney  * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
177c8c0b82SPatrick Mooney  */
187c8c0b82SPatrick Mooney 
197c8c0b82SPatrick Mooney #include <sys/param.h>
207c8c0b82SPatrick Mooney #include <sys/kmem.h>
217c8c0b82SPatrick Mooney #include <sys/thread.h>
227c8c0b82SPatrick Mooney #include <sys/list.h>
237c8c0b82SPatrick Mooney #include <sys/mman.h>
247c8c0b82SPatrick Mooney #include <sys/types.h>
257c8c0b82SPatrick Mooney #include <sys/ddi.h>
267c8c0b82SPatrick Mooney #include <sys/sysmacros.h>
277c8c0b82SPatrick Mooney #include <sys/machsystm.h>
287c8c0b82SPatrick Mooney #include <sys/vmsystm.h>
297c8c0b82SPatrick Mooney #include <sys/x86_archext.h>
307c8c0b82SPatrick Mooney #include <vm/as.h>
317c8c0b82SPatrick Mooney #include <vm/hat_i86.h>
327c8c0b82SPatrick Mooney #include <vm/seg_vn.h>
337c8c0b82SPatrick Mooney #include <vm/seg_kmem.h>
347c8c0b82SPatrick Mooney 
357c8c0b82SPatrick Mooney #include <sys/vmm_vm.h>
367c8c0b82SPatrick Mooney #include <sys/seg_vmm.h>
377c8c0b82SPatrick Mooney #include <sys/vmm_kernel.h>
387c8c0b82SPatrick Mooney #include <sys/vmm_reservoir.h>
397c8c0b82SPatrick Mooney #include <sys/vmm_gpt.h>
407c8c0b82SPatrick Mooney 
417c8c0b82SPatrick Mooney 
427c8c0b82SPatrick Mooney /*
437c8c0b82SPatrick Mooney  * VMM Virtual Memory
447c8c0b82SPatrick Mooney  *
457c8c0b82SPatrick Mooney  * History
467c8c0b82SPatrick Mooney  *
477c8c0b82SPatrick Mooney  * When bhyve was ported to illumos, one significant hole was handling guest
487c8c0b82SPatrick Mooney  * memory and memory accesses.  In the original Pluribus port, bhyve itself
497c8c0b82SPatrick Mooney  * manually handled the EPT structures for guest memory.  The updated sources
507c8c0b82SPatrick Mooney  * (from FreeBSD 11) took a different approach, using the native FreeBSD VM
517c8c0b82SPatrick Mooney  * system for memory allocations and management of the EPT structures.  Keeping
527c8c0b82SPatrick Mooney  * source differences to a minimum was a priority, so illumos-bhyve implemented
537c8c0b82SPatrick Mooney  * a makeshift "VM shim" which exposed the bare minimum of those interfaces to
547c8c0b82SPatrick Mooney  * boot and run guests.
557c8c0b82SPatrick Mooney  *
567c8c0b82SPatrick Mooney  * While the VM shim was successful in getting illumos-bhyve to a functional
577c8c0b82SPatrick Mooney  * state on Intel (and later AMD) gear, the FreeBSD-specific nature of the
587c8c0b82SPatrick Mooney  * compatibility interfaces made it awkward to use.  As source differences with
597c8c0b82SPatrick Mooney  * the upstream kernel code became less of a concern, and upcoming features
607c8c0b82SPatrick Mooney  * (such as live migration) would demand more of those VM interfaces, it became
617c8c0b82SPatrick Mooney  * clear that an overhaul was prudent.
627c8c0b82SPatrick Mooney  *
637c8c0b82SPatrick Mooney  * Design
647c8c0b82SPatrick Mooney  *
657c8c0b82SPatrick Mooney  * The new VM system for bhyve retains a number of the same concepts as what it
667c8c0b82SPatrick Mooney  * replaces:
677c8c0b82SPatrick Mooney  *
687c8c0b82SPatrick Mooney  * - `vmspace_t` is the top-level entity for a guest memory space
697c8c0b82SPatrick Mooney  * - `vm_object_t` represents a memory object which can be mapped into a vmspace
707c8c0b82SPatrick Mooney  * - `vm_page_t` represents a page hold within a given vmspace, providing access
717c8c0b82SPatrick Mooney  *   to the underlying memory page
727c8c0b82SPatrick Mooney  *
737c8c0b82SPatrick Mooney  * Unlike the old code, where most of the involved structures were exposed via
747c8c0b82SPatrick Mooney  * public definitions, this replacement VM interface keeps all involved
757c8c0b82SPatrick Mooney  * structures opaque to consumers.  Furthermore, there is a clear delineation
767c8c0b82SPatrick Mooney  * between infrequent administrative operations (such as mapping/unmapping
777c8c0b82SPatrick Mooney  * regions) and common data-path operations (attempting a page hold at a given
787c8c0b82SPatrick Mooney  * guest-physical address).  Those administrative operations are performed
797c8c0b82SPatrick Mooney  * directly against the vmspace, whereas the data-path operations are performed
807c8c0b82SPatrick Mooney  * through a `vm_client_t` handle.  That VM client abstraction is meant to
817c8c0b82SPatrick Mooney  * reduce contention and overhead for frequent access operations and provide
827c8c0b82SPatrick Mooney  * debugging insight into how different subcomponents are accessing the vmspace.
837c8c0b82SPatrick Mooney  * A VM client is allocated for each vCPU, each viona ring (via the vmm_drv
847c8c0b82SPatrick Mooney  * interface) and each VMM userspace segment mapping.
857c8c0b82SPatrick Mooney  *
867c8c0b82SPatrick Mooney  * Exclusion
877c8c0b82SPatrick Mooney  *
887c8c0b82SPatrick Mooney  * Making changes to the vmspace (such as mapping or unmapping regions) requires
897c8c0b82SPatrick Mooney  * other accessors be excluded while the change is underway to prevent them from
907c8c0b82SPatrick Mooney  * observing invalid intermediate states.  A simple approach could use a mutex
917c8c0b82SPatrick Mooney  * or rwlock to achieve this, but that risks contention when the rate of access
927c8c0b82SPatrick Mooney  * to the vmspace is high.
937c8c0b82SPatrick Mooney  *
947c8c0b82SPatrick Mooney  * Since vmspace changes (map/unmap) are rare, we can instead do the exclusion
957c8c0b82SPatrick Mooney  * at a per-vm_client_t basis.  While this raises the cost for vmspace changes,
967c8c0b82SPatrick Mooney  * it means that the much more common page accesses through the vm_client can
977c8c0b82SPatrick Mooney  * normally proceed unimpeded and independently.
987c8c0b82SPatrick Mooney  *
997c8c0b82SPatrick Mooney  * When a change to the vmspace is required, the caller will put the vmspace in
1007c8c0b82SPatrick Mooney  * a 'hold' state, iterating over all associated vm_client instances, waiting
1017c8c0b82SPatrick Mooney  * for them to complete any in-flight lookup (indicated by VCS_ACTIVE) before
1027c8c0b82SPatrick Mooney  * setting VCS_HOLD in their state flag fields.  With VCS_HOLD set, any call on
1037c8c0b82SPatrick Mooney  * the vm_client which would access the vmspace state (vmc_hold or vmc_fault)
1047c8c0b82SPatrick Mooney  * will block until the hold condition is cleared.  Once the hold is asserted
1057c8c0b82SPatrick Mooney  * for all clients, the vmspace change can proceed with confidence.  Upon
1067c8c0b82SPatrick Mooney  * completion of that operation, VCS_HOLD is cleared from the clients, and they
1077c8c0b82SPatrick Mooney  * are released to resume vmspace accesses.
1087c8c0b82SPatrick Mooney  *
1097c8c0b82SPatrick Mooney  * vCPU Consumers
1107c8c0b82SPatrick Mooney  *
1117c8c0b82SPatrick Mooney  * Access to the vmspace for vCPUs running in guest context is different from
1127c8c0b82SPatrick Mooney  * emulation-related vm_client activity: they solely rely on the contents of the
1137c8c0b82SPatrick Mooney  * page tables.  Furthermore, the existing VCS_HOLD mechanism used to exclude
1147c8c0b82SPatrick Mooney  * client access is not feasible when entering guest context, since interrupts
1157c8c0b82SPatrick Mooney  * are disabled, making it impossible to block entry.  This is not a concern as
1167c8c0b82SPatrick Mooney  * long as vmspace modifications never place the page tables in invalid states
1177c8c0b82SPatrick Mooney  * (either intermediate, or final).  The vm_client hold mechanism does provide
1187c8c0b82SPatrick Mooney  * the means to IPI vCPU consumers which will trigger a notification once they
1197c8c0b82SPatrick Mooney  * report their exit from guest context.  This can be used to ensure that page
1207c8c0b82SPatrick Mooney  * table modifications are made visible to those vCPUs within a certain
1217c8c0b82SPatrick Mooney  * time frame.
1227c8c0b82SPatrick Mooney  */
1237c8c0b82SPatrick Mooney 
1247c8c0b82SPatrick Mooney typedef struct vmspace_mapping {
1257c8c0b82SPatrick Mooney 	list_node_t	vmsm_node;
1267c8c0b82SPatrick Mooney 	vm_object_t	*vmsm_object;	/* object backing this mapping */
1277c8c0b82SPatrick Mooney 	uintptr_t	vmsm_addr;	/* start addr in vmspace for mapping */
1287c8c0b82SPatrick Mooney 	size_t		vmsm_len;	/* length (in bytes) of mapping */
1297c8c0b82SPatrick Mooney 	off_t		vmsm_offset;	/* byte offset into object */
1307c8c0b82SPatrick Mooney 	uint_t		vmsm_prot;
1317c8c0b82SPatrick Mooney } vmspace_mapping_t;
1327c8c0b82SPatrick Mooney 
1337c8c0b82SPatrick Mooney #define	VMSM_OFFSET(vmsm, addr)	(			\
1347c8c0b82SPatrick Mooney 	    (vmsm)->vmsm_offset +			\
1357c8c0b82SPatrick Mooney 	    ((addr) - (uintptr_t)(vmsm)->vmsm_addr))
1367c8c0b82SPatrick Mooney 
1377c8c0b82SPatrick Mooney typedef enum vm_client_state {
1387c8c0b82SPatrick Mooney 	VCS_IDLE	= 0,
1397c8c0b82SPatrick Mooney 	/* currently accessing vmspace for client operation (hold or fault) */
1407c8c0b82SPatrick Mooney 	VCS_ACTIVE	= (1 << 0),
1417c8c0b82SPatrick Mooney 	/* client hold requested/asserted */
1427c8c0b82SPatrick Mooney 	VCS_HOLD	= (1 << 1),
1437c8c0b82SPatrick Mooney 	/* vCPU is accessing page tables in guest context */
1447c8c0b82SPatrick Mooney 	VCS_ON_CPU	= (1 << 2),
1457c8c0b82SPatrick Mooney 	/* client has been orphaned (no more access to vmspace) */
1467c8c0b82SPatrick Mooney 	VCS_ORPHANED	= (1 << 3),
1477c8c0b82SPatrick Mooney 	/* client undergoing destroy operation */
1487c8c0b82SPatrick Mooney 	VCS_DESTROY	= (1 << 4),
1497c8c0b82SPatrick Mooney } vm_client_state_t;
1507c8c0b82SPatrick Mooney 
1517c8c0b82SPatrick Mooney struct vmspace {
1527c8c0b82SPatrick Mooney 	kmutex_t	vms_lock;
1537c8c0b82SPatrick Mooney 	kcondvar_t	vms_cv;
1547c8c0b82SPatrick Mooney 	bool		vms_held;
1557c8c0b82SPatrick Mooney 	uintptr_t	vms_size;	/* immutable after creation */
1567c8c0b82SPatrick Mooney 
1577c8c0b82SPatrick Mooney 	/* (nested) page table state */
1587c8c0b82SPatrick Mooney 	vmm_gpt_t	*vms_gpt;
1597c8c0b82SPatrick Mooney 	uint64_t	vms_pt_gen;
1607c8c0b82SPatrick Mooney 	uint64_t	vms_pages_mapped;
1617c8c0b82SPatrick Mooney 	bool		vms_track_dirty;
1627c8c0b82SPatrick Mooney 
1637c8c0b82SPatrick Mooney 	list_t		vms_maplist;
1647c8c0b82SPatrick Mooney 	list_t		vms_clients;
1657c8c0b82SPatrick Mooney };
1667c8c0b82SPatrick Mooney 
1677c8c0b82SPatrick Mooney struct vm_client {
1687c8c0b82SPatrick Mooney 	vmspace_t	*vmc_space;
1697c8c0b82SPatrick Mooney 	list_node_t	vmc_node;
1707c8c0b82SPatrick Mooney 
1717c8c0b82SPatrick Mooney 	kmutex_t	vmc_lock;
1727c8c0b82SPatrick Mooney 	kcondvar_t	vmc_cv;
1737c8c0b82SPatrick Mooney 	vm_client_state_t vmc_state;
1747c8c0b82SPatrick Mooney 	int		vmc_cpu_active;
1757c8c0b82SPatrick Mooney 	uint64_t	vmc_cpu_gen;
1767c8c0b82SPatrick Mooney 	bool		vmc_track_dirty;
1777c8c0b82SPatrick Mooney 	vmc_inval_cb_t	vmc_inval_func;
1787c8c0b82SPatrick Mooney 	void		*vmc_inval_data;
1797c8c0b82SPatrick Mooney 
1807c8c0b82SPatrick Mooney 	list_t		vmc_held_pages;
1817c8c0b82SPatrick Mooney };
1827c8c0b82SPatrick Mooney 
1837c8c0b82SPatrick Mooney typedef enum vm_object_type {
1847c8c0b82SPatrick Mooney 	VMOT_NONE,
1857c8c0b82SPatrick Mooney 	VMOT_MEM,
1867c8c0b82SPatrick Mooney 	VMOT_MMIO,
1877c8c0b82SPatrick Mooney } vm_object_type_t;
1887c8c0b82SPatrick Mooney 
1897c8c0b82SPatrick Mooney struct vm_object {
1907c8c0b82SPatrick Mooney 	uint_t		vmo_refcnt;	/* manipulated with atomic ops */
1917c8c0b82SPatrick Mooney 
1927c8c0b82SPatrick Mooney 	/* Fields below are fixed at creation time */
1937c8c0b82SPatrick Mooney 	vm_object_type_t vmo_type;
1947c8c0b82SPatrick Mooney 	size_t		vmo_size;
1957c8c0b82SPatrick Mooney 	void		*vmo_data;
1967c8c0b82SPatrick Mooney 	uint8_t		vmo_attr;
1977c8c0b82SPatrick Mooney };
1987c8c0b82SPatrick Mooney 
199f2357d97SPatrick Mooney /* Convenience consolidation of all flag(s) for validity checking */
200f2357d97SPatrick Mooney #define	VPF_ALL		(VPF_DEFER_DIRTY)
201f2357d97SPatrick Mooney 
2027c8c0b82SPatrick Mooney struct vm_page {
2037c8c0b82SPatrick Mooney 	vm_client_t	*vmp_client;
2047c8c0b82SPatrick Mooney 	list_node_t	vmp_node;
2057c8c0b82SPatrick Mooney 	vm_page_t	*vmp_chain;
2067c8c0b82SPatrick Mooney 	uintptr_t	vmp_gpa;
2077c8c0b82SPatrick Mooney 	pfn_t		vmp_pfn;
2087c8c0b82SPatrick Mooney 	uint64_t	*vmp_ptep;
2097c8c0b82SPatrick Mooney 	vm_object_t	*vmp_obj_ref;
210f2357d97SPatrick Mooney 	uint8_t		vmp_prot;
211f2357d97SPatrick Mooney 	uint8_t		vmp_flags;
2127c8c0b82SPatrick Mooney };
2137c8c0b82SPatrick Mooney 
2147c8c0b82SPatrick Mooney static vmspace_mapping_t *vm_mapping_find(vmspace_t *, uintptr_t, size_t);
2157c8c0b82SPatrick Mooney static void vmspace_hold_enter(vmspace_t *);
2167c8c0b82SPatrick Mooney static void vmspace_hold_exit(vmspace_t *, bool);
217*b9b43e84SPatrick Mooney static void vmspace_clients_invalidate(vmspace_t *, uintptr_t, size_t);
218*b9b43e84SPatrick Mooney static int vmspace_ensure_mapped(vmspace_t *, uintptr_t, int, pfn_t *,
219*b9b43e84SPatrick Mooney     uint64_t *);
2207c8c0b82SPatrick Mooney static void vmc_space_hold(vm_client_t *);
2217c8c0b82SPatrick Mooney static void vmc_space_release(vm_client_t *, bool);
2227c8c0b82SPatrick Mooney static void vmc_space_invalidate(vm_client_t *, uintptr_t, size_t, uint64_t);
2237c8c0b82SPatrick Mooney static void vmc_space_unmap(vm_client_t *, uintptr_t, size_t, vm_object_t *);
2247c8c0b82SPatrick Mooney static vm_client_t *vmc_space_orphan(vm_client_t *, vmspace_t *);
2257c8c0b82SPatrick Mooney 
2267c8c0b82SPatrick Mooney 
2277c8c0b82SPatrick Mooney /*
2287c8c0b82SPatrick Mooney  * Create a new vmspace with a maximum address of `end`.
2297c8c0b82SPatrick Mooney  */
2307c8c0b82SPatrick Mooney vmspace_t *
vmspace_alloc(size_t end,vmm_pte_ops_t * pte_ops,bool track_dirty)2317c8c0b82SPatrick Mooney vmspace_alloc(size_t end, vmm_pte_ops_t *pte_ops, bool track_dirty)
2327c8c0b82SPatrick Mooney {
2337c8c0b82SPatrick Mooney 	vmspace_t *vms;
2347c8c0b82SPatrick Mooney 	const uintptr_t size = end + 1;
2357c8c0b82SPatrick Mooney 
2367c8c0b82SPatrick Mooney 	/*
2377c8c0b82SPatrick Mooney 	 * This whole mess is built on the assumption that a 64-bit address
2387c8c0b82SPatrick Mooney 	 * space is available to work with for the various pagetable tricks.
2397c8c0b82SPatrick Mooney 	 */
2407c8c0b82SPatrick Mooney 	VERIFY(size > 0 && (size & PAGEOFFSET) == 0 &&
2417c8c0b82SPatrick Mooney 	    size <= (uintptr_t)USERLIMIT);
2427c8c0b82SPatrick Mooney 
2437c8c0b82SPatrick Mooney 	vms = kmem_zalloc(sizeof (*vms), KM_SLEEP);
2447c8c0b82SPatrick Mooney 	vms->vms_size = size;
2457c8c0b82SPatrick Mooney 	list_create(&vms->vms_maplist, sizeof (vmspace_mapping_t),
2467c8c0b82SPatrick Mooney 	    offsetof(vmspace_mapping_t, vmsm_node));
2477c8c0b82SPatrick Mooney 	list_create(&vms->vms_clients, sizeof (vm_client_t),
2487c8c0b82SPatrick Mooney 	    offsetof(vm_client_t, vmc_node));
2497c8c0b82SPatrick Mooney 
2507c8c0b82SPatrick Mooney 	vms->vms_gpt = vmm_gpt_alloc(pte_ops);
2517c8c0b82SPatrick Mooney 	vms->vms_pt_gen = 1;
2527c8c0b82SPatrick Mooney 	vms->vms_track_dirty = track_dirty;
2537c8c0b82SPatrick Mooney 
2547c8c0b82SPatrick Mooney 	return (vms);
2557c8c0b82SPatrick Mooney }
2567c8c0b82SPatrick Mooney 
2577c8c0b82SPatrick Mooney /*
2587c8c0b82SPatrick Mooney  * Destroy a vmspace.  All regions in the space must be unmapped.  Any remaining
2597c8c0b82SPatrick Mooney  * clients will be orphaned.
2607c8c0b82SPatrick Mooney  */
2617c8c0b82SPatrick Mooney void
vmspace_destroy(vmspace_t * vms)2627c8c0b82SPatrick Mooney vmspace_destroy(vmspace_t *vms)
2637c8c0b82SPatrick Mooney {
2647c8c0b82SPatrick Mooney 	mutex_enter(&vms->vms_lock);
2657c8c0b82SPatrick Mooney 	VERIFY(list_is_empty(&vms->vms_maplist));
2667c8c0b82SPatrick Mooney 
2677c8c0b82SPatrick Mooney 	if (!list_is_empty(&vms->vms_clients)) {
2687c8c0b82SPatrick Mooney 		vm_client_t *vmc = list_head(&vms->vms_clients);
2697c8c0b82SPatrick Mooney 		while (vmc != NULL) {
2707c8c0b82SPatrick Mooney 			vmc = vmc_space_orphan(vmc, vms);
2717c8c0b82SPatrick Mooney 		}
2727c8c0b82SPatrick Mooney 		/*
2737c8c0b82SPatrick Mooney 		 * Wait for any clients which were in the process of destroying
2747c8c0b82SPatrick Mooney 		 * themselves to disappear.
2757c8c0b82SPatrick Mooney 		 */
2767c8c0b82SPatrick Mooney 		while (!list_is_empty(&vms->vms_clients)) {
2777c8c0b82SPatrick Mooney 			cv_wait(&vms->vms_cv, &vms->vms_lock);
2787c8c0b82SPatrick Mooney 		}
2797c8c0b82SPatrick Mooney 	}
2807c8c0b82SPatrick Mooney 	VERIFY(list_is_empty(&vms->vms_clients));
2817c8c0b82SPatrick Mooney 
2827c8c0b82SPatrick Mooney 	vmm_gpt_free(vms->vms_gpt);
2837c8c0b82SPatrick Mooney 	mutex_exit(&vms->vms_lock);
2847c8c0b82SPatrick Mooney 
2857c8c0b82SPatrick Mooney 	mutex_destroy(&vms->vms_lock);
2867c8c0b82SPatrick Mooney 	cv_destroy(&vms->vms_cv);
2877c8c0b82SPatrick Mooney 	list_destroy(&vms->vms_maplist);
2887c8c0b82SPatrick Mooney 	list_destroy(&vms->vms_clients);
2897c8c0b82SPatrick Mooney 
2907c8c0b82SPatrick Mooney 	kmem_free(vms, sizeof (*vms));
2917c8c0b82SPatrick Mooney }
2927c8c0b82SPatrick Mooney 
2937c8c0b82SPatrick Mooney /*
2947c8c0b82SPatrick Mooney  * Retrieve the count of resident (mapped into the page tables) pages.
2957c8c0b82SPatrick Mooney  */
2967c8c0b82SPatrick Mooney uint64_t
vmspace_resident_count(vmspace_t * vms)2977c8c0b82SPatrick Mooney vmspace_resident_count(vmspace_t *vms)
2987c8c0b82SPatrick Mooney {
2997c8c0b82SPatrick Mooney 	return (vms->vms_pages_mapped);
3007c8c0b82SPatrick Mooney }
3017c8c0b82SPatrick Mooney 
3027c8c0b82SPatrick Mooney /*
303*b9b43e84SPatrick Mooney  * Perform an operation on the status (accessed/dirty) bits held in the page
304*b9b43e84SPatrick Mooney  * tables of this vmspace.
3057c8c0b82SPatrick Mooney  *
306*b9b43e84SPatrick Mooney  * Such manipulations race against both hardware writes (from running vCPUs) and
307*b9b43e84SPatrick Mooney  * emulated accesses reflected from userspace.  Safe functionality depends on
308*b9b43e84SPatrick Mooney  * the VM instance being read-locked to prevent vmspace_map/vmspace_unmap
3097c8c0b82SPatrick Mooney  * operations from changing the page tables during the walk.
3107c8c0b82SPatrick Mooney  */
311*b9b43e84SPatrick Mooney void
vmspace_bits_operate(vmspace_t * vms,uint64_t gpa,size_t len,vmspace_bit_oper_t oper,uint8_t * bitmap)312*b9b43e84SPatrick Mooney vmspace_bits_operate(vmspace_t *vms, uint64_t gpa, size_t len,
313*b9b43e84SPatrick Mooney     vmspace_bit_oper_t oper, uint8_t *bitmap)
314*b9b43e84SPatrick Mooney {
315*b9b43e84SPatrick Mooney 	const bool bit_input = (oper & VBO_FLAG_BITMAP_IN) != 0;
316*b9b43e84SPatrick Mooney 	const bool bit_output = (oper & VBO_FLAG_BITMAP_OUT) != 0;
317*b9b43e84SPatrick Mooney 	const vmspace_bit_oper_t oper_only =
318*b9b43e84SPatrick Mooney 	    oper & ~(VBO_FLAG_BITMAP_IN | VBO_FLAG_BITMAP_OUT);
319*b9b43e84SPatrick Mooney 	vmm_gpt_t *gpt = vms->vms_gpt;
320*b9b43e84SPatrick Mooney 
321*b9b43e84SPatrick Mooney 	/*
322*b9b43e84SPatrick Mooney 	 * The bitmap cannot be NULL if the requested operation involves reading
323*b9b43e84SPatrick Mooney 	 * or writing from it.
324*b9b43e84SPatrick Mooney 	 */
325*b9b43e84SPatrick Mooney 	ASSERT(bitmap != NULL || (!bit_input && !bit_output));
326*b9b43e84SPatrick Mooney 
3277c8c0b82SPatrick Mooney 	for (size_t offset = 0; offset < len; offset += PAGESIZE) {
328*b9b43e84SPatrick Mooney 		const uint64_t pfn_offset = offset >> PAGESHIFT;
329*b9b43e84SPatrick Mooney 		const size_t bit_offset = pfn_offset / 8;
330*b9b43e84SPatrick Mooney 		const uint8_t bit_mask = 1 << (pfn_offset % 8);
331*b9b43e84SPatrick Mooney 
332*b9b43e84SPatrick Mooney 		if (bit_input && (bitmap[bit_offset] & bit_mask) == 0) {
333*b9b43e84SPatrick Mooney 			continue;
334*b9b43e84SPatrick Mooney 		}
335*b9b43e84SPatrick Mooney 
336*b9b43e84SPatrick Mooney 		bool value = false;
337*b9b43e84SPatrick Mooney 		uint64_t *entry = vmm_gpt_lookup(gpt, gpa + offset);
338*b9b43e84SPatrick Mooney 		if (entry == NULL) {
339*b9b43e84SPatrick Mooney 			if (bit_output) {
340*b9b43e84SPatrick Mooney 				bitmap[bit_offset] &= ~bit_mask;
341*b9b43e84SPatrick Mooney 			}
342*b9b43e84SPatrick Mooney 			continue;
343*b9b43e84SPatrick Mooney 		}
344*b9b43e84SPatrick Mooney 
345*b9b43e84SPatrick Mooney 		switch (oper_only) {
346*b9b43e84SPatrick Mooney 		case VBO_GET_DIRTY:
347*b9b43e84SPatrick Mooney 			value = vmm_gpt_query(gpt, entry, VGQ_DIRTY);
348*b9b43e84SPatrick Mooney 			break;
349*b9b43e84SPatrick Mooney 		case VBO_SET_DIRTY: {
350*b9b43e84SPatrick Mooney 			uint_t prot = 0;
351*b9b43e84SPatrick Mooney 			bool present_writable = false;
352*b9b43e84SPatrick Mooney 			pfn_t pfn;
353*b9b43e84SPatrick Mooney 
354*b9b43e84SPatrick Mooney 			/*
355*b9b43e84SPatrick Mooney 			 * To avoid blindly setting the dirty bit on otherwise
356*b9b43e84SPatrick Mooney 			 * empty PTEs, we must first check if the entry for the
357*b9b43e84SPatrick Mooney 			 * address in question has been populated.
358*b9b43e84SPatrick Mooney 			 *
359*b9b43e84SPatrick Mooney 			 * Only if the page is marked both Present and Writable
360*b9b43e84SPatrick Mooney 			 * will we permit the dirty bit to be set.
361*b9b43e84SPatrick Mooney 			 */
362*b9b43e84SPatrick Mooney 			if (!vmm_gpt_is_mapped(gpt, entry, &pfn, &prot)) {
363*b9b43e84SPatrick Mooney 				int err = vmspace_ensure_mapped(vms, gpa,
364*b9b43e84SPatrick Mooney 				    PROT_WRITE, &pfn, entry);
365*b9b43e84SPatrick Mooney 				if (err == 0) {
366*b9b43e84SPatrick Mooney 					present_writable = true;
367*b9b43e84SPatrick Mooney 				}
368*b9b43e84SPatrick Mooney 			} else if ((prot & PROT_WRITE) != 0) {
369*b9b43e84SPatrick Mooney 				present_writable = true;
370*b9b43e84SPatrick Mooney 			}
371*b9b43e84SPatrick Mooney 
372*b9b43e84SPatrick Mooney 			if (present_writable) {
373*b9b43e84SPatrick Mooney 				value = !vmm_gpt_reset_dirty(gpt, entry, true);
374*b9b43e84SPatrick Mooney 			}
375*b9b43e84SPatrick Mooney 			break;
376*b9b43e84SPatrick Mooney 		}
377*b9b43e84SPatrick Mooney 		case VBO_RESET_DIRTY:
378*b9b43e84SPatrick Mooney 			/*
379*b9b43e84SPatrick Mooney 			 * Although at first glance, it may seem like the act of
380*b9b43e84SPatrick Mooney 			 * resetting the dirty bit may require the same care as
381*b9b43e84SPatrick Mooney 			 * setting it, the constraints make for a simpler task.
382*b9b43e84SPatrick Mooney 			 *
383*b9b43e84SPatrick Mooney 			 * Any PTEs with the dirty bit set will have already
384*b9b43e84SPatrick Mooney 			 * been properly populated.
385*b9b43e84SPatrick Mooney 			 */
386*b9b43e84SPatrick Mooney 			value = vmm_gpt_reset_dirty(gpt, entry, false);
387*b9b43e84SPatrick Mooney 			break;
388*b9b43e84SPatrick Mooney 		default:
389*b9b43e84SPatrick Mooney 			panic("unrecognized operator: %d", oper_only);
390*b9b43e84SPatrick Mooney 			break;
391*b9b43e84SPatrick Mooney 		}
392*b9b43e84SPatrick Mooney 		if (bit_output) {
393*b9b43e84SPatrick Mooney 			if (value) {
394*b9b43e84SPatrick Mooney 				bitmap[bit_offset] |= bit_mask;
395*b9b43e84SPatrick Mooney 			} else {
396*b9b43e84SPatrick Mooney 				bitmap[bit_offset] &= ~bit_mask;
397*b9b43e84SPatrick Mooney 			}
398*b9b43e84SPatrick Mooney 		}
3997c8c0b82SPatrick Mooney 	}
4007c8c0b82SPatrick Mooney 
4017c8c0b82SPatrick Mooney 	/*
402*b9b43e84SPatrick Mooney 	 * Invalidate the address range potentially effected by the changes to
403*b9b43e84SPatrick Mooney 	 * page table bits, issuing shoot-downs for those who might have it in
404*b9b43e84SPatrick Mooney 	 * cache.
4057c8c0b82SPatrick Mooney 	 */
4067c8c0b82SPatrick Mooney 	vmspace_hold_enter(vms);
4077c8c0b82SPatrick Mooney 	vms->vms_pt_gen++;
408*b9b43e84SPatrick Mooney 	vmspace_clients_invalidate(vms, gpa, len);
409*b9b43e84SPatrick Mooney 	vmspace_hold_exit(vms, true);
410*b9b43e84SPatrick Mooney }
411*b9b43e84SPatrick Mooney 
412*b9b43e84SPatrick Mooney /*
413*b9b43e84SPatrick Mooney  * Is dirty-page-tracking enabled for the vmspace?
414*b9b43e84SPatrick Mooney  */
415*b9b43e84SPatrick Mooney bool
vmspace_get_tracking(vmspace_t * vms)416*b9b43e84SPatrick Mooney vmspace_get_tracking(vmspace_t *vms)
417*b9b43e84SPatrick Mooney {
418*b9b43e84SPatrick Mooney 	mutex_enter(&vms->vms_lock);
419*b9b43e84SPatrick Mooney 	const bool val = vms->vms_track_dirty;
420*b9b43e84SPatrick Mooney 	mutex_exit(&vms->vms_lock);
421*b9b43e84SPatrick Mooney 	return (val);
422*b9b43e84SPatrick Mooney }
423*b9b43e84SPatrick Mooney 
424*b9b43e84SPatrick Mooney /*
425*b9b43e84SPatrick Mooney  * Set the state (enabled/disabled) of dirty-page-tracking for the vmspace.
426*b9b43e84SPatrick Mooney  */
427*b9b43e84SPatrick Mooney int
vmspace_set_tracking(vmspace_t * vms,bool enable_dirty_tracking)428*b9b43e84SPatrick Mooney vmspace_set_tracking(vmspace_t *vms, bool enable_dirty_tracking)
429*b9b43e84SPatrick Mooney {
430*b9b43e84SPatrick Mooney 	if (enable_dirty_tracking && !vmm_gpt_can_track_dirty(vms->vms_gpt)) {
431*b9b43e84SPatrick Mooney 		/* Do not allow this to be set if it is not supported */
432*b9b43e84SPatrick Mooney 		return (ENOTSUP);
433*b9b43e84SPatrick Mooney 	}
434*b9b43e84SPatrick Mooney 
435*b9b43e84SPatrick Mooney 	vmspace_hold_enter(vms);
436*b9b43e84SPatrick Mooney 	if (vms->vms_track_dirty == enable_dirty_tracking) {
437*b9b43e84SPatrick Mooney 		/* No further effort required if state already matches */
438*b9b43e84SPatrick Mooney 		vmspace_hold_exit(vms, false);
439*b9b43e84SPatrick Mooney 		return (0);
440*b9b43e84SPatrick Mooney 	}
441*b9b43e84SPatrick Mooney 
442*b9b43e84SPatrick Mooney 	vms->vms_track_dirty = enable_dirty_tracking;
443*b9b43e84SPatrick Mooney 
444*b9b43e84SPatrick Mooney 	/* Configure all existing clients for new tracking behavior */
4457c8c0b82SPatrick Mooney 	for (vm_client_t *vmc = list_head(&vms->vms_clients);
4467c8c0b82SPatrick Mooney 	    vmc != NULL;
4477c8c0b82SPatrick Mooney 	    vmc = list_next(&vms->vms_clients, vmc)) {
448*b9b43e84SPatrick Mooney 		mutex_enter(&vmc->vmc_lock);
449*b9b43e84SPatrick Mooney 		vmc->vmc_track_dirty = enable_dirty_tracking;
450*b9b43e84SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
4517c8c0b82SPatrick Mooney 	}
4524ac713daSLuqman Aden 
453*b9b43e84SPatrick Mooney 	/*
454*b9b43e84SPatrick Mooney 	 * Notify all clients of what is considered an invalidation of the
455*b9b43e84SPatrick Mooney 	 * entire vmspace.
456*b9b43e84SPatrick Mooney 	 */
457*b9b43e84SPatrick Mooney 	vms->vms_pt_gen++;
458*b9b43e84SPatrick Mooney 	vmspace_clients_invalidate(vms, 0, vms->vms_size);
459*b9b43e84SPatrick Mooney 
460*b9b43e84SPatrick Mooney 	vmspace_hold_exit(vms, true);
4614ac713daSLuqman Aden 	return (0);
4627c8c0b82SPatrick Mooney }
4637c8c0b82SPatrick Mooney 
4647c8c0b82SPatrick Mooney static pfn_t
vm_object_pager_reservoir(vm_object_t * vmo,uintptr_t off)4657c8c0b82SPatrick Mooney vm_object_pager_reservoir(vm_object_t *vmo, uintptr_t off)
4667c8c0b82SPatrick Mooney {
4677c8c0b82SPatrick Mooney 	vmmr_region_t *region;
4687c8c0b82SPatrick Mooney 	pfn_t pfn;
4697c8c0b82SPatrick Mooney 
4707c8c0b82SPatrick Mooney 	ASSERT3U(vmo->vmo_type, ==, VMOT_MEM);
4717c8c0b82SPatrick Mooney 
4727c8c0b82SPatrick Mooney 	region = vmo->vmo_data;
4737c8c0b82SPatrick Mooney 	pfn = vmmr_region_pfn_at(region, off);
4747c8c0b82SPatrick Mooney 
4757c8c0b82SPatrick Mooney 	return (pfn);
4767c8c0b82SPatrick Mooney }
4777c8c0b82SPatrick Mooney 
4787c8c0b82SPatrick Mooney static pfn_t
vm_object_pager_mmio(vm_object_t * vmo,uintptr_t off)4797c8c0b82SPatrick Mooney vm_object_pager_mmio(vm_object_t *vmo, uintptr_t off)
4807c8c0b82SPatrick Mooney {
4817c8c0b82SPatrick Mooney 	pfn_t pfn;
4827c8c0b82SPatrick Mooney 
4837c8c0b82SPatrick Mooney 	ASSERT3U(vmo->vmo_type, ==, VMOT_MMIO);
4847c8c0b82SPatrick Mooney 	ASSERT3P(vmo->vmo_data, !=, NULL);
4857c8c0b82SPatrick Mooney 	ASSERT3U(off, <, vmo->vmo_size);
4867c8c0b82SPatrick Mooney 
4877c8c0b82SPatrick Mooney 	pfn = ((uintptr_t)vmo->vmo_data + off) >> PAGESHIFT;
4887c8c0b82SPatrick Mooney 
4897c8c0b82SPatrick Mooney 	return (pfn);
4907c8c0b82SPatrick Mooney }
4917c8c0b82SPatrick Mooney 
4927c8c0b82SPatrick Mooney /*
4937c8c0b82SPatrick Mooney  * Allocate a VM object backed by VMM reservoir memory.
4947c8c0b82SPatrick Mooney  */
4957c8c0b82SPatrick Mooney vm_object_t *
vm_object_mem_allocate(size_t size,bool transient)4967c8c0b82SPatrick Mooney vm_object_mem_allocate(size_t size, bool transient)
4977c8c0b82SPatrick Mooney {
4987c8c0b82SPatrick Mooney 	int err;
4997c8c0b82SPatrick Mooney 	vmmr_region_t *region = NULL;
5007c8c0b82SPatrick Mooney 	vm_object_t *vmo;
5017c8c0b82SPatrick Mooney 
5027c8c0b82SPatrick Mooney 	ASSERT3U(size, !=, 0);
5037c8c0b82SPatrick Mooney 	ASSERT3U(size & PAGEOFFSET, ==, 0);
5047c8c0b82SPatrick Mooney 
5057c8c0b82SPatrick Mooney 	err = vmmr_alloc(size, transient, &region);
5067c8c0b82SPatrick Mooney 	if (err != 0) {
5077c8c0b82SPatrick Mooney 		return (NULL);
5087c8c0b82SPatrick Mooney 	}
5097c8c0b82SPatrick Mooney 
5107c8c0b82SPatrick Mooney 	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
5117c8c0b82SPatrick Mooney 
5127c8c0b82SPatrick Mooney 	/* For now, these are to stay fixed after allocation */
5137c8c0b82SPatrick Mooney 	vmo->vmo_type = VMOT_MEM;
5147c8c0b82SPatrick Mooney 	vmo->vmo_size = size;
5157c8c0b82SPatrick Mooney 	vmo->vmo_attr = MTRR_TYPE_WB;
5167c8c0b82SPatrick Mooney 	vmo->vmo_data = region;
5177c8c0b82SPatrick Mooney 	vmo->vmo_refcnt = 1;
5187c8c0b82SPatrick Mooney 
5197c8c0b82SPatrick Mooney 	return (vmo);
5207c8c0b82SPatrick Mooney }
5217c8c0b82SPatrick Mooney 
5227c8c0b82SPatrick Mooney static vm_object_t *
vm_object_mmio_allocate(size_t size,uintptr_t hpa)5237c8c0b82SPatrick Mooney vm_object_mmio_allocate(size_t size, uintptr_t hpa)
5247c8c0b82SPatrick Mooney {
5257c8c0b82SPatrick Mooney 	vm_object_t *vmo;
5267c8c0b82SPatrick Mooney 
5277c8c0b82SPatrick Mooney 	ASSERT3U(size, !=, 0);
5287c8c0b82SPatrick Mooney 	ASSERT3U(size & PAGEOFFSET, ==, 0);
5297c8c0b82SPatrick Mooney 	ASSERT3U(hpa & PAGEOFFSET, ==, 0);
5307c8c0b82SPatrick Mooney 
5317c8c0b82SPatrick Mooney 	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
5327c8c0b82SPatrick Mooney 
5337c8c0b82SPatrick Mooney 	/* For now, these are to stay fixed after allocation */
5347c8c0b82SPatrick Mooney 	vmo->vmo_type = VMOT_MMIO;
5357c8c0b82SPatrick Mooney 	vmo->vmo_size = size;
5367c8c0b82SPatrick Mooney 	vmo->vmo_attr = MTRR_TYPE_UC;
5377c8c0b82SPatrick Mooney 	vmo->vmo_data = (void *)hpa;
5387c8c0b82SPatrick Mooney 	vmo->vmo_refcnt = 1;
5397c8c0b82SPatrick Mooney 
5407c8c0b82SPatrick Mooney 	return (vmo);
5417c8c0b82SPatrick Mooney }
5427c8c0b82SPatrick Mooney 
5437c8c0b82SPatrick Mooney /*
5447c8c0b82SPatrick Mooney  * Allocate a VM object backed by an existing range of physical memory.
5457c8c0b82SPatrick Mooney  */
5467c8c0b82SPatrick Mooney vm_object_t *
vmm_mmio_alloc(vmspace_t * vmspace,uintptr_t gpa,size_t len,uintptr_t hpa)5477c8c0b82SPatrick Mooney vmm_mmio_alloc(vmspace_t *vmspace, uintptr_t gpa, size_t len, uintptr_t hpa)
5487c8c0b82SPatrick Mooney {
5497c8c0b82SPatrick Mooney 	int error;
5507c8c0b82SPatrick Mooney 	vm_object_t *obj;
5517c8c0b82SPatrick Mooney 
5527c8c0b82SPatrick Mooney 	obj = vm_object_mmio_allocate(len, hpa);
5537c8c0b82SPatrick Mooney 	if (obj != NULL) {
5547c8c0b82SPatrick Mooney 		error = vmspace_map(vmspace, obj, 0, gpa, len,
5557c8c0b82SPatrick Mooney 		    PROT_READ | PROT_WRITE);
5567c8c0b82SPatrick Mooney 		if (error != 0) {
5577c8c0b82SPatrick Mooney 			vm_object_release(obj);
5587c8c0b82SPatrick Mooney 			obj = NULL;
5597c8c0b82SPatrick Mooney 		}
5607c8c0b82SPatrick Mooney 	}
5617c8c0b82SPatrick Mooney 
5627c8c0b82SPatrick Mooney 	return (obj);
5637c8c0b82SPatrick Mooney }
5647c8c0b82SPatrick Mooney 
5657c8c0b82SPatrick Mooney /*
5667c8c0b82SPatrick Mooney  * Release a vm_object reference
5677c8c0b82SPatrick Mooney  */
5687c8c0b82SPatrick Mooney void
vm_object_release(vm_object_t * vmo)5697c8c0b82SPatrick Mooney vm_object_release(vm_object_t *vmo)
5707c8c0b82SPatrick Mooney {
5717c8c0b82SPatrick Mooney 	ASSERT(vmo != NULL);
5727c8c0b82SPatrick Mooney 
5737c8c0b82SPatrick Mooney 	uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
5747c8c0b82SPatrick Mooney 	/* underflow would be a deadly serious mistake */
5757c8c0b82SPatrick Mooney 	VERIFY3U(ref, !=, UINT_MAX);
5767c8c0b82SPatrick Mooney 	if (ref != 0) {
5777c8c0b82SPatrick Mooney 		return;
5787c8c0b82SPatrick Mooney 	}
5797c8c0b82SPatrick Mooney 
5807c8c0b82SPatrick Mooney 	switch (vmo->vmo_type) {
5817c8c0b82SPatrick Mooney 	case VMOT_MEM:
5827c8c0b82SPatrick Mooney 		vmmr_free((vmmr_region_t *)vmo->vmo_data);
5837c8c0b82SPatrick Mooney 		break;
5847c8c0b82SPatrick Mooney 	case VMOT_MMIO:
5857c8c0b82SPatrick Mooney 		break;
5867c8c0b82SPatrick Mooney 	default:
5877c8c0b82SPatrick Mooney 		panic("unexpected object type %u", vmo->vmo_type);
5887c8c0b82SPatrick Mooney 		break;
5897c8c0b82SPatrick Mooney 	}
5907c8c0b82SPatrick Mooney 
5917c8c0b82SPatrick Mooney 	vmo->vmo_data = NULL;
5927c8c0b82SPatrick Mooney 	vmo->vmo_size = 0;
5937c8c0b82SPatrick Mooney 	kmem_free(vmo, sizeof (*vmo));
5947c8c0b82SPatrick Mooney }
5957c8c0b82SPatrick Mooney 
5967c8c0b82SPatrick Mooney /*
5977c8c0b82SPatrick Mooney  * Increase refcount for vm_object reference
5987c8c0b82SPatrick Mooney  */
5997c8c0b82SPatrick Mooney void
vm_object_reference(vm_object_t * vmo)6007c8c0b82SPatrick Mooney vm_object_reference(vm_object_t *vmo)
6017c8c0b82SPatrick Mooney {
6027c8c0b82SPatrick Mooney 	ASSERT(vmo != NULL);
6037c8c0b82SPatrick Mooney 
6047c8c0b82SPatrick Mooney 	uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
6057c8c0b82SPatrick Mooney 	/* overflow would be a deadly serious mistake */
6067c8c0b82SPatrick Mooney 	VERIFY3U(ref, !=, 0);
6077c8c0b82SPatrick Mooney }
6087c8c0b82SPatrick Mooney 
6097c8c0b82SPatrick Mooney /*
6107c8c0b82SPatrick Mooney  * Get the host-physical PFN for a given offset into a vm_object.
6117c8c0b82SPatrick Mooney  *
6127c8c0b82SPatrick Mooney  * The provided `off` must be within the allocated size of the vm_object.
6137c8c0b82SPatrick Mooney  */
6147c8c0b82SPatrick Mooney pfn_t
vm_object_pfn(vm_object_t * vmo,uintptr_t off)6157c8c0b82SPatrick Mooney vm_object_pfn(vm_object_t *vmo, uintptr_t off)
6167c8c0b82SPatrick Mooney {
6177c8c0b82SPatrick Mooney 	const uintptr_t aligned_off = off & PAGEMASK;
6187c8c0b82SPatrick Mooney 
6197c8c0b82SPatrick Mooney 	switch (vmo->vmo_type) {
6207c8c0b82SPatrick Mooney 	case VMOT_MEM:
6217c8c0b82SPatrick Mooney 		return (vm_object_pager_reservoir(vmo, aligned_off));
6227c8c0b82SPatrick Mooney 	case VMOT_MMIO:
6237c8c0b82SPatrick Mooney 		return (vm_object_pager_mmio(vmo, aligned_off));
6247c8c0b82SPatrick Mooney 	case VMOT_NONE:
6257c8c0b82SPatrick Mooney 		break;
6267c8c0b82SPatrick Mooney 	}
6277c8c0b82SPatrick Mooney 	panic("unexpected object type %u", vmo->vmo_type);
6287c8c0b82SPatrick Mooney }
6297c8c0b82SPatrick Mooney 
6307c8c0b82SPatrick Mooney static vmspace_mapping_t *
vm_mapping_find(vmspace_t * vms,uintptr_t addr,size_t size)6317c8c0b82SPatrick Mooney vm_mapping_find(vmspace_t *vms, uintptr_t addr, size_t size)
6327c8c0b82SPatrick Mooney {
6337c8c0b82SPatrick Mooney 	vmspace_mapping_t *vmsm;
6347c8c0b82SPatrick Mooney 	list_t *ml = &vms->vms_maplist;
6357c8c0b82SPatrick Mooney 	const uintptr_t range_end = addr + size;
6367c8c0b82SPatrick Mooney 
6377c8c0b82SPatrick Mooney 	ASSERT3U(addr, <=, range_end);
6387c8c0b82SPatrick Mooney 
6397c8c0b82SPatrick Mooney 	if (addr >= vms->vms_size) {
6407c8c0b82SPatrick Mooney 		return (NULL);
6417c8c0b82SPatrick Mooney 	}
6427c8c0b82SPatrick Mooney 	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
6437c8c0b82SPatrick Mooney 		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len;
6447c8c0b82SPatrick Mooney 
6457c8c0b82SPatrick Mooney 		if (addr >= vmsm->vmsm_addr && addr < seg_end) {
6467c8c0b82SPatrick Mooney 			if (range_end <= seg_end) {
6477c8c0b82SPatrick Mooney 				return (vmsm);
6487c8c0b82SPatrick Mooney 			} else {
6497c8c0b82SPatrick Mooney 				return (NULL);
6507c8c0b82SPatrick Mooney 			}
6517c8c0b82SPatrick Mooney 		}
6527c8c0b82SPatrick Mooney 	}
6537c8c0b82SPatrick Mooney 	return (NULL);
6547c8c0b82SPatrick Mooney }
6557c8c0b82SPatrick Mooney 
6567c8c0b82SPatrick Mooney /*
6577c8c0b82SPatrick Mooney  * Check to see if any mappings reside within [addr, addr + size) span in the
6587c8c0b82SPatrick Mooney  * vmspace, returning true if that span is indeed empty.
6597c8c0b82SPatrick Mooney  */
6607c8c0b82SPatrick Mooney static bool
vm_mapping_gap(vmspace_t * vms,uintptr_t addr,size_t size)6617c8c0b82SPatrick Mooney vm_mapping_gap(vmspace_t *vms, uintptr_t addr, size_t size)
6627c8c0b82SPatrick Mooney {
6637c8c0b82SPatrick Mooney 	vmspace_mapping_t *vmsm;
6647c8c0b82SPatrick Mooney 	list_t *ml = &vms->vms_maplist;
6657c8c0b82SPatrick Mooney 	const uintptr_t range_end = addr + size - 1;
6667c8c0b82SPatrick Mooney 
6677c8c0b82SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
6687c8c0b82SPatrick Mooney 	ASSERT(size > 0);
6697c8c0b82SPatrick Mooney 
6707c8c0b82SPatrick Mooney 	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
6717c8c0b82SPatrick Mooney 		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len - 1;
6727c8c0b82SPatrick Mooney 
6737c8c0b82SPatrick Mooney 		/*
6747c8c0b82SPatrick Mooney 		 * The two ranges do not overlap if the start of either of
6757c8c0b82SPatrick Mooney 		 * them is after the end of the other.
6767c8c0b82SPatrick Mooney 		 */
6777c8c0b82SPatrick Mooney 		if (vmsm->vmsm_addr > range_end || addr > seg_end)
6787c8c0b82SPatrick Mooney 			continue;
6797c8c0b82SPatrick Mooney 		return (false);
6807c8c0b82SPatrick Mooney 	}
6817c8c0b82SPatrick Mooney 	return (true);
6827c8c0b82SPatrick Mooney }
6837c8c0b82SPatrick Mooney 
6847c8c0b82SPatrick Mooney static void
vm_mapping_remove(vmspace_t * vms,vmspace_mapping_t * vmsm)6857c8c0b82SPatrick Mooney vm_mapping_remove(vmspace_t *vms, vmspace_mapping_t *vmsm)
6867c8c0b82SPatrick Mooney {
6877c8c0b82SPatrick Mooney 	list_t *ml = &vms->vms_maplist;
6887c8c0b82SPatrick Mooney 
6897c8c0b82SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
6907c8c0b82SPatrick Mooney 	ASSERT(vms->vms_held);
6917c8c0b82SPatrick Mooney 
6927c8c0b82SPatrick Mooney 	list_remove(ml, vmsm);
6937c8c0b82SPatrick Mooney 	vm_object_release(vmsm->vmsm_object);
6947c8c0b82SPatrick Mooney 	kmem_free(vmsm, sizeof (*vmsm));
6957c8c0b82SPatrick Mooney }
6967c8c0b82SPatrick Mooney 
6977c8c0b82SPatrick Mooney /*
6987c8c0b82SPatrick Mooney  * Enter a hold state on the vmspace.  This ensures that all VM clients
6997c8c0b82SPatrick Mooney  * associated with the vmspace are excluded from establishing new page holds,
7007c8c0b82SPatrick Mooney  * or any other actions which would require accessing vmspace state subject to
7017c8c0b82SPatrick Mooney  * potential change.
7027c8c0b82SPatrick Mooney  *
7037c8c0b82SPatrick Mooney  * Returns with vmspace_t`vms_lock held.
7047c8c0b82SPatrick Mooney  */
7057c8c0b82SPatrick Mooney static void
vmspace_hold_enter(vmspace_t * vms)7067c8c0b82SPatrick Mooney vmspace_hold_enter(vmspace_t *vms)
7077c8c0b82SPatrick Mooney {
7087c8c0b82SPatrick Mooney 	mutex_enter(&vms->vms_lock);
7097c8c0b82SPatrick Mooney 	VERIFY(!vms->vms_held);
7107c8c0b82SPatrick Mooney 
7117c8c0b82SPatrick Mooney 	vm_client_t *vmc = list_head(&vms->vms_clients);
7127c8c0b82SPatrick Mooney 	for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
7137c8c0b82SPatrick Mooney 		vmc_space_hold(vmc);
7147c8c0b82SPatrick Mooney 	}
7157c8c0b82SPatrick Mooney 	vms->vms_held = true;
7167c8c0b82SPatrick Mooney }
7177c8c0b82SPatrick Mooney 
7187c8c0b82SPatrick Mooney /*
7197c8c0b82SPatrick Mooney  * Exit a hold state on the vmspace.  This releases all VM clients associated
7207c8c0b82SPatrick Mooney  * with the vmspace to be able to establish new page holds, and partake in other
7217c8c0b82SPatrick Mooney  * actions which require accessing changed vmspace state.  If `kick_on_cpu` is
7227c8c0b82SPatrick Mooney  * true, then any CPUs actively using the page tables will be IPIed, and the
7237c8c0b82SPatrick Mooney  * call will block until they have acknowledged being ready to use the latest
7247c8c0b82SPatrick Mooney  * state of the tables.
7257c8c0b82SPatrick Mooney  *
7267c8c0b82SPatrick Mooney  * Requires vmspace_t`vms_lock be held, which is released as part of the call.
7277c8c0b82SPatrick Mooney  */
7287c8c0b82SPatrick Mooney static void
vmspace_hold_exit(vmspace_t * vms,bool kick_on_cpu)7297c8c0b82SPatrick Mooney vmspace_hold_exit(vmspace_t *vms, bool kick_on_cpu)
7307c8c0b82SPatrick Mooney {
7317c8c0b82SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
7327c8c0b82SPatrick Mooney 	VERIFY(vms->vms_held);
7337c8c0b82SPatrick Mooney 
7347c8c0b82SPatrick Mooney 	vm_client_t *vmc = list_head(&vms->vms_clients);
7357c8c0b82SPatrick Mooney 	for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
7367c8c0b82SPatrick Mooney 		vmc_space_release(vmc, kick_on_cpu);
7377c8c0b82SPatrick Mooney 	}
7387c8c0b82SPatrick Mooney 	vms->vms_held = false;
7397c8c0b82SPatrick Mooney 	mutex_exit(&vms->vms_lock);
7407c8c0b82SPatrick Mooney }
7417c8c0b82SPatrick Mooney 
742*b9b43e84SPatrick Mooney static void
vmspace_clients_invalidate(vmspace_t * vms,uintptr_t gpa,size_t len)743*b9b43e84SPatrick Mooney vmspace_clients_invalidate(vmspace_t *vms, uintptr_t gpa, size_t len)
744*b9b43e84SPatrick Mooney {
745*b9b43e84SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
746*b9b43e84SPatrick Mooney 	VERIFY(vms->vms_held);
747*b9b43e84SPatrick Mooney 
748*b9b43e84SPatrick Mooney 	for (vm_client_t *vmc = list_head(&vms->vms_clients);
749*b9b43e84SPatrick Mooney 	    vmc != NULL;
750*b9b43e84SPatrick Mooney 	    vmc = list_next(&vms->vms_clients, vmc)) {
751*b9b43e84SPatrick Mooney 		vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen);
752*b9b43e84SPatrick Mooney 	}
753*b9b43e84SPatrick Mooney }
754*b9b43e84SPatrick Mooney 
7557c8c0b82SPatrick Mooney /*
7567c8c0b82SPatrick Mooney  * Attempt to map a vm_object span into the vmspace.
7577c8c0b82SPatrick Mooney  *
7587c8c0b82SPatrick Mooney  * Requirements:
7597c8c0b82SPatrick Mooney  * - `obj_off`, `addr`, and `len` must be page-aligned
7607c8c0b82SPatrick Mooney  * - `obj_off` cannot be greater than the allocated size of the object
7617c8c0b82SPatrick Mooney  * - [`obj_off`, `obj_off` + `len`) span cannot extend beyond the allocated
7627c8c0b82SPatrick Mooney  *   size of the object
7637c8c0b82SPatrick Mooney  * - [`addr`, `addr` + `len`) span cannot reside beyond the maximum address
7647c8c0b82SPatrick Mooney  *   of the vmspace
7657c8c0b82SPatrick Mooney  */
7667c8c0b82SPatrick Mooney int
vmspace_map(vmspace_t * vms,vm_object_t * vmo,uintptr_t obj_off,uintptr_t addr,size_t len,uint8_t prot)7677c8c0b82SPatrick Mooney vmspace_map(vmspace_t *vms, vm_object_t *vmo, uintptr_t obj_off, uintptr_t addr,
7687c8c0b82SPatrick Mooney     size_t len, uint8_t prot)
7697c8c0b82SPatrick Mooney {
7707c8c0b82SPatrick Mooney 	vmspace_mapping_t *vmsm;
7717c8c0b82SPatrick Mooney 	int res = 0;
7727c8c0b82SPatrick Mooney 
7737c8c0b82SPatrick Mooney 	if (len == 0 || (addr + len) < addr ||
7747c8c0b82SPatrick Mooney 	    obj_off >= (obj_off + len) || vmo->vmo_size < (obj_off + len)) {
7757c8c0b82SPatrick Mooney 		return (EINVAL);
7767c8c0b82SPatrick Mooney 	}
7777c8c0b82SPatrick Mooney 	if ((addr + len) >= vms->vms_size) {
7787c8c0b82SPatrick Mooney 		return (ENOMEM);
7797c8c0b82SPatrick Mooney 	}
7807c8c0b82SPatrick Mooney 
7817c8c0b82SPatrick Mooney 	vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
7827c8c0b82SPatrick Mooney 
7837c8c0b82SPatrick Mooney 	vmspace_hold_enter(vms);
7847c8c0b82SPatrick Mooney 	if (!vm_mapping_gap(vms, addr, len)) {
7857c8c0b82SPatrick Mooney 		kmem_free(vmsm, sizeof (*vmsm));
7867c8c0b82SPatrick Mooney 		res = ENOMEM;
7877c8c0b82SPatrick Mooney 	} else {
7887c8c0b82SPatrick Mooney 		vmsm->vmsm_object = vmo;
7897c8c0b82SPatrick Mooney 		vmsm->vmsm_addr = addr;
7907c8c0b82SPatrick Mooney 		vmsm->vmsm_len = len;
7917c8c0b82SPatrick Mooney 		vmsm->vmsm_offset = (off_t)obj_off;
7927c8c0b82SPatrick Mooney 		vmsm->vmsm_prot = prot;
7937c8c0b82SPatrick Mooney 		list_insert_tail(&vms->vms_maplist, vmsm);
7947c8c0b82SPatrick Mooney 
7957c8c0b82SPatrick Mooney 		/*
7967c8c0b82SPatrick Mooney 		 * Make sure the GPT has tables ready for leaf entries across
7977c8c0b82SPatrick Mooney 		 * the entire new mapping.
7987c8c0b82SPatrick Mooney 		 */
7993a0fa64cSPatrick Mooney 		vmm_gpt_populate_region(vms->vms_gpt, addr, len);
8007c8c0b82SPatrick Mooney 	}
8017c8c0b82SPatrick Mooney 	vmspace_hold_exit(vms, false);
8027c8c0b82SPatrick Mooney 	return (res);
8037c8c0b82SPatrick Mooney }
8047c8c0b82SPatrick Mooney 
8057c8c0b82SPatrick Mooney /*
8067c8c0b82SPatrick Mooney  * Unmap a region of the vmspace.
8077c8c0b82SPatrick Mooney  *
8087c8c0b82SPatrick Mooney  * Presently the [start, end) span must equal a region previously mapped by a
8097c8c0b82SPatrick Mooney  * call to vmspace_map().
8107c8c0b82SPatrick Mooney  */
8117c8c0b82SPatrick Mooney int
vmspace_unmap(vmspace_t * vms,uintptr_t addr,uintptr_t len)8123a0fa64cSPatrick Mooney vmspace_unmap(vmspace_t *vms, uintptr_t addr, uintptr_t len)
8137c8c0b82SPatrick Mooney {
8143a0fa64cSPatrick Mooney 	const uintptr_t end = addr + len;
8157c8c0b82SPatrick Mooney 	vmspace_mapping_t *vmsm;
8167c8c0b82SPatrick Mooney 	vm_client_t *vmc;
8177c8c0b82SPatrick Mooney 	uint64_t gen = 0;
8187c8c0b82SPatrick Mooney 
8193a0fa64cSPatrick Mooney 	ASSERT3U(addr, <, end);
8207c8c0b82SPatrick Mooney 
8217c8c0b82SPatrick Mooney 	vmspace_hold_enter(vms);
8227c8c0b82SPatrick Mooney 	/* expect to match existing mapping exactly */
8233a0fa64cSPatrick Mooney 	if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL ||
8243a0fa64cSPatrick Mooney 	    vmsm->vmsm_addr != addr || vmsm->vmsm_len != len) {
8257c8c0b82SPatrick Mooney 		vmspace_hold_exit(vms, false);
8267c8c0b82SPatrick Mooney 		return (ENOENT);
8277c8c0b82SPatrick Mooney 	}
8287c8c0b82SPatrick Mooney 
8297c8c0b82SPatrick Mooney 	/* Prepare clients (and their held pages) for the unmap. */
8307c8c0b82SPatrick Mooney 	for (vmc = list_head(&vms->vms_clients); vmc != NULL;
8317c8c0b82SPatrick Mooney 	    vmc = list_next(&vms->vms_clients, vmc)) {
8323a0fa64cSPatrick Mooney 		vmc_space_unmap(vmc, addr, len, vmsm->vmsm_object);
8337c8c0b82SPatrick Mooney 	}
8347c8c0b82SPatrick Mooney 
8357c8c0b82SPatrick Mooney 	/* Clear all PTEs for region */
8363a0fa64cSPatrick Mooney 	if (vmm_gpt_unmap_region(vms->vms_gpt, addr, len) != 0) {
8377c8c0b82SPatrick Mooney 		vms->vms_pt_gen++;
8387c8c0b82SPatrick Mooney 		gen = vms->vms_pt_gen;
8397c8c0b82SPatrick Mooney 	}
8407c8c0b82SPatrick Mooney 	/* ... and the intermediate (directory) PTEs as well */
8413a0fa64cSPatrick Mooney 	vmm_gpt_vacate_region(vms->vms_gpt, addr, len);
8427c8c0b82SPatrick Mooney 
8437c8c0b82SPatrick Mooney 	/*
8447c8c0b82SPatrick Mooney 	 * If pages were actually unmapped from the GPT, provide clients with
8457c8c0b82SPatrick Mooney 	 * an invalidation notice.
8467c8c0b82SPatrick Mooney 	 */
8477c8c0b82SPatrick Mooney 	if (gen != 0) {
848*b9b43e84SPatrick Mooney 		vmspace_clients_invalidate(vms, addr, len);
8497c8c0b82SPatrick Mooney 	}
8507c8c0b82SPatrick Mooney 
8517c8c0b82SPatrick Mooney 	vm_mapping_remove(vms, vmsm);
8527c8c0b82SPatrick Mooney 	vmspace_hold_exit(vms, true);
8537c8c0b82SPatrick Mooney 	return (0);
8547c8c0b82SPatrick Mooney }
8557c8c0b82SPatrick Mooney 
856*b9b43e84SPatrick Mooney /*
857*b9b43e84SPatrick Mooney  * For a given GPA in the vmspace, ensure that the backing page (if any) is
858*b9b43e84SPatrick Mooney  * properly mapped as present in the provided PTE.
859*b9b43e84SPatrick Mooney  */
860*b9b43e84SPatrick Mooney static int
vmspace_ensure_mapped(vmspace_t * vms,uintptr_t gpa,int req_prot,pfn_t * pfnp,uint64_t * leaf_pte)861*b9b43e84SPatrick Mooney vmspace_ensure_mapped(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
862*b9b43e84SPatrick Mooney     uint64_t *leaf_pte)
863*b9b43e84SPatrick Mooney {
864*b9b43e84SPatrick Mooney 	vmspace_mapping_t *vmsm;
865*b9b43e84SPatrick Mooney 	vm_object_t *vmo;
866*b9b43e84SPatrick Mooney 	pfn_t pfn;
867*b9b43e84SPatrick Mooney 
868*b9b43e84SPatrick Mooney 	ASSERT(pfnp != NULL);
869*b9b43e84SPatrick Mooney 	ASSERT(leaf_pte != NULL);
870*b9b43e84SPatrick Mooney 
871*b9b43e84SPatrick Mooney 	vmsm = vm_mapping_find(vms, gpa, PAGESIZE);
872*b9b43e84SPatrick Mooney 	if (vmsm == NULL) {
873*b9b43e84SPatrick Mooney 		return (FC_NOMAP);
874*b9b43e84SPatrick Mooney 	}
875*b9b43e84SPatrick Mooney 	if ((req_prot & vmsm->vmsm_prot) != req_prot) {
876*b9b43e84SPatrick Mooney 		return (FC_PROT);
877*b9b43e84SPatrick Mooney 	}
878*b9b43e84SPatrick Mooney 
879*b9b43e84SPatrick Mooney 	vmo = vmsm->vmsm_object;
880*b9b43e84SPatrick Mooney 	pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
881*b9b43e84SPatrick Mooney 	VERIFY(pfn != PFN_INVALID);
882*b9b43e84SPatrick Mooney 
883*b9b43e84SPatrick Mooney 	if (vmm_gpt_map_at(vms->vms_gpt, leaf_pte, pfn, vmsm->vmsm_prot,
884*b9b43e84SPatrick Mooney 	    vmo->vmo_attr)) {
885*b9b43e84SPatrick Mooney 		atomic_inc_64(&vms->vms_pages_mapped);
886*b9b43e84SPatrick Mooney 	}
887*b9b43e84SPatrick Mooney 
888*b9b43e84SPatrick Mooney 	*pfnp = pfn;
889*b9b43e84SPatrick Mooney 	return (0);
890*b9b43e84SPatrick Mooney }
891*b9b43e84SPatrick Mooney 
892*b9b43e84SPatrick Mooney /*
893*b9b43e84SPatrick Mooney  * Look up the PTE for a given GPA in the vmspace, populating it with
894*b9b43e84SPatrick Mooney  * appropriate contents (pfn, protection, etc) if it is empty, but backed by a
895*b9b43e84SPatrick Mooney  * valid mapping.
896*b9b43e84SPatrick Mooney  */
8977c8c0b82SPatrick Mooney static int
vmspace_lookup_map(vmspace_t * vms,uintptr_t gpa,int req_prot,pfn_t * pfnp,uint64_t ** ptepp)8987c8c0b82SPatrick Mooney vmspace_lookup_map(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
8997c8c0b82SPatrick Mooney     uint64_t **ptepp)
9007c8c0b82SPatrick Mooney {
9017c8c0b82SPatrick Mooney 	vmm_gpt_t *gpt = vms->vms_gpt;
9027c8c0b82SPatrick Mooney 	uint64_t *entries[MAX_GPT_LEVEL], *leaf;
9037c8c0b82SPatrick Mooney 	pfn_t pfn = PFN_INVALID;
9047c8c0b82SPatrick Mooney 	uint_t prot;
9057c8c0b82SPatrick Mooney 
9067c8c0b82SPatrick Mooney 	ASSERT0(gpa & PAGEOFFSET);
9077c8c0b82SPatrick Mooney 	ASSERT((req_prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) != PROT_NONE);
9087c8c0b82SPatrick Mooney 
9097c8c0b82SPatrick Mooney 	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
9107c8c0b82SPatrick Mooney 	leaf = entries[LEVEL1];
9117c8c0b82SPatrick Mooney 	if (leaf == NULL) {
9127c8c0b82SPatrick Mooney 		/*
9137c8c0b82SPatrick Mooney 		 * Since we populated the intermediate tables for any regions
9147c8c0b82SPatrick Mooney 		 * mapped in the GPT, an empty leaf entry indicates there is no
9157c8c0b82SPatrick Mooney 		 * mapping, populated or not, at this GPT.
9167c8c0b82SPatrick Mooney 		 */
9177c8c0b82SPatrick Mooney 		return (FC_NOMAP);
9187c8c0b82SPatrick Mooney 	}
9197c8c0b82SPatrick Mooney 
9207c8c0b82SPatrick Mooney 	if (vmm_gpt_is_mapped(gpt, leaf, &pfn, &prot)) {
9217c8c0b82SPatrick Mooney 		if ((req_prot & prot) != req_prot) {
9227c8c0b82SPatrick Mooney 			return (FC_PROT);
9237c8c0b82SPatrick Mooney 		}
9247c8c0b82SPatrick Mooney 	} else {
925*b9b43e84SPatrick Mooney 		int err = vmspace_ensure_mapped(vms, gpa, req_prot, &pfn, leaf);
926*b9b43e84SPatrick Mooney 		if (err != 0) {
927*b9b43e84SPatrick Mooney 			return (err);
9287c8c0b82SPatrick Mooney 		}
9297c8c0b82SPatrick Mooney 	}
9307c8c0b82SPatrick Mooney 
9317c8c0b82SPatrick Mooney 	ASSERT(pfn != PFN_INVALID && leaf != NULL);
9327c8c0b82SPatrick Mooney 	if (pfnp != NULL) {
9337c8c0b82SPatrick Mooney 		*pfnp = pfn;
9347c8c0b82SPatrick Mooney 	}
9357c8c0b82SPatrick Mooney 	if (ptepp != NULL) {
9367c8c0b82SPatrick Mooney 		*ptepp = leaf;
9377c8c0b82SPatrick Mooney 	}
9387c8c0b82SPatrick Mooney 	return (0);
9397c8c0b82SPatrick Mooney }
9407c8c0b82SPatrick Mooney 
9417c8c0b82SPatrick Mooney /*
9427c8c0b82SPatrick Mooney  * Populate (make resident in the page tables) a region of the vmspace.
9437c8c0b82SPatrick Mooney  *
9447c8c0b82SPatrick Mooney  * Presently the [start, end) span must equal a region previously mapped by a
9457c8c0b82SPatrick Mooney  * call to vmspace_map().
9467c8c0b82SPatrick Mooney  */
9477c8c0b82SPatrick Mooney int
vmspace_populate(vmspace_t * vms,uintptr_t addr,uintptr_t len)9483a0fa64cSPatrick Mooney vmspace_populate(vmspace_t *vms, uintptr_t addr, uintptr_t len)
9497c8c0b82SPatrick Mooney {
9507c8c0b82SPatrick Mooney 	vmspace_mapping_t *vmsm;
9517c8c0b82SPatrick Mooney 	mutex_enter(&vms->vms_lock);
9527c8c0b82SPatrick Mooney 
9537c8c0b82SPatrick Mooney 	/* For the time being, only exact-match mappings are expected */
9543a0fa64cSPatrick Mooney 	if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL) {
9557c8c0b82SPatrick Mooney 		mutex_exit(&vms->vms_lock);
9567c8c0b82SPatrick Mooney 		return (FC_NOMAP);
9577c8c0b82SPatrick Mooney 	}
9587c8c0b82SPatrick Mooney 
9597c8c0b82SPatrick Mooney 	vm_object_t *vmo = vmsm->vmsm_object;
9607c8c0b82SPatrick Mooney 	const int prot = vmsm->vmsm_prot;
9617c8c0b82SPatrick Mooney 	const uint8_t attr = vmo->vmo_attr;
9627c8c0b82SPatrick Mooney 	size_t populated = 0;
9633a0fa64cSPatrick Mooney 	const size_t end = addr + len;
9643a0fa64cSPatrick Mooney 	for (uintptr_t gpa = addr & PAGEMASK; gpa < end; gpa += PAGESIZE) {
9657c8c0b82SPatrick Mooney 		const pfn_t pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
9667c8c0b82SPatrick Mooney 		VERIFY(pfn != PFN_INVALID);
9677c8c0b82SPatrick Mooney 
9687c8c0b82SPatrick Mooney 		if (vmm_gpt_map(vms->vms_gpt, gpa, pfn, prot, attr)) {
9697c8c0b82SPatrick Mooney 			populated++;
9707c8c0b82SPatrick Mooney 		}
9717c8c0b82SPatrick Mooney 	}
9727c8c0b82SPatrick Mooney 	atomic_add_64(&vms->vms_pages_mapped, populated);
9737c8c0b82SPatrick Mooney 
9747c8c0b82SPatrick Mooney 	mutex_exit(&vms->vms_lock);
9757c8c0b82SPatrick Mooney 	return (0);
9767c8c0b82SPatrick Mooney }
9777c8c0b82SPatrick Mooney 
9787c8c0b82SPatrick Mooney /*
9797c8c0b82SPatrick Mooney  * Allocate a client from a given vmspace.
9807c8c0b82SPatrick Mooney  */
9817c8c0b82SPatrick Mooney vm_client_t *
vmspace_client_alloc(vmspace_t * vms)9827c8c0b82SPatrick Mooney vmspace_client_alloc(vmspace_t *vms)
9837c8c0b82SPatrick Mooney {
9847c8c0b82SPatrick Mooney 	vm_client_t *vmc;
9857c8c0b82SPatrick Mooney 
9867c8c0b82SPatrick Mooney 	vmc = kmem_zalloc(sizeof (vm_client_t), KM_SLEEP);
9877c8c0b82SPatrick Mooney 	vmc->vmc_space = vms;
9887c8c0b82SPatrick Mooney 	mutex_init(&vmc->vmc_lock, NULL, MUTEX_DRIVER, NULL);
9897c8c0b82SPatrick Mooney 	cv_init(&vmc->vmc_cv, NULL, CV_DRIVER, NULL);
9907c8c0b82SPatrick Mooney 	vmc->vmc_state = VCS_IDLE;
9917c8c0b82SPatrick Mooney 	vmc->vmc_cpu_active = -1;
9927c8c0b82SPatrick Mooney 	list_create(&vmc->vmc_held_pages, sizeof (vm_page_t),
9937c8c0b82SPatrick Mooney 	    offsetof(vm_page_t, vmp_node));
9947c8c0b82SPatrick Mooney 	vmc->vmc_track_dirty = vms->vms_track_dirty;
9957c8c0b82SPatrick Mooney 
9967c8c0b82SPatrick Mooney 	mutex_enter(&vms->vms_lock);
9977c8c0b82SPatrick Mooney 	list_insert_tail(&vms->vms_clients, vmc);
9987c8c0b82SPatrick Mooney 	mutex_exit(&vms->vms_lock);
9997c8c0b82SPatrick Mooney 
10007c8c0b82SPatrick Mooney 	return (vmc);
10017c8c0b82SPatrick Mooney }
10027c8c0b82SPatrick Mooney 
10037c8c0b82SPatrick Mooney /*
10047c8c0b82SPatrick Mooney  * Get the nested page table root pointer (EPTP/NCR3) value.
10057c8c0b82SPatrick Mooney  */
10067c8c0b82SPatrick Mooney uint64_t
vmspace_table_root(vmspace_t * vms)10077c8c0b82SPatrick Mooney vmspace_table_root(vmspace_t *vms)
10087c8c0b82SPatrick Mooney {
10094ac713daSLuqman Aden 	return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty));
10107c8c0b82SPatrick Mooney }
10117c8c0b82SPatrick Mooney 
10127c8c0b82SPatrick Mooney /*
10137c8c0b82SPatrick Mooney  * Get the current generation number of the nested page table.
10147c8c0b82SPatrick Mooney  */
10157c8c0b82SPatrick Mooney uint64_t
vmspace_table_gen(vmspace_t * vms)10167c8c0b82SPatrick Mooney vmspace_table_gen(vmspace_t *vms)
10177c8c0b82SPatrick Mooney {
10187c8c0b82SPatrick Mooney 	return (vms->vms_pt_gen);
10197c8c0b82SPatrick Mooney }
10207c8c0b82SPatrick Mooney 
10217c8c0b82SPatrick Mooney /*
10227c8c0b82SPatrick Mooney  * Mark a vm_client as active.  This will block if/while the client is held by
10237c8c0b82SPatrick Mooney  * the vmspace.  On success, it returns with vm_client_t`vmc_lock held.  It will
10247c8c0b82SPatrick Mooney  * fail if the vm_client has been orphaned.
10257c8c0b82SPatrick Mooney  */
10267c8c0b82SPatrick Mooney static int
vmc_activate(vm_client_t * vmc)10277c8c0b82SPatrick Mooney vmc_activate(vm_client_t *vmc)
10287c8c0b82SPatrick Mooney {
10297c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
10307c8c0b82SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_ACTIVE);
10317c8c0b82SPatrick Mooney 	if ((vmc->vmc_state & VCS_ORPHANED) != 0) {
10327c8c0b82SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
10337c8c0b82SPatrick Mooney 		return (ENXIO);
10347c8c0b82SPatrick Mooney 	}
10357c8c0b82SPatrick Mooney 	while ((vmc->vmc_state & VCS_HOLD) != 0) {
10367c8c0b82SPatrick Mooney 		cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
10377c8c0b82SPatrick Mooney 	}
10387c8c0b82SPatrick Mooney 	vmc->vmc_state |= VCS_ACTIVE;
10397c8c0b82SPatrick Mooney 	return (0);
10407c8c0b82SPatrick Mooney }
10417c8c0b82SPatrick Mooney 
10427c8c0b82SPatrick Mooney /*
10437c8c0b82SPatrick Mooney  * Mark a vm_client as no longer active.  It must be called with
10447c8c0b82SPatrick Mooney  * vm_client_t`vmc_lock already held, and will return with it released.
10457c8c0b82SPatrick Mooney  */
10467c8c0b82SPatrick Mooney static void
vmc_deactivate(vm_client_t * vmc)10477c8c0b82SPatrick Mooney vmc_deactivate(vm_client_t *vmc)
10487c8c0b82SPatrick Mooney {
10497c8c0b82SPatrick Mooney 	ASSERT(MUTEX_HELD(&vmc->vmc_lock));
10507c8c0b82SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_ACTIVE);
10517c8c0b82SPatrick Mooney 
10527c8c0b82SPatrick Mooney 	vmc->vmc_state ^= VCS_ACTIVE;
10537c8c0b82SPatrick Mooney 	if ((vmc->vmc_state & VCS_HOLD) != 0) {
10547c8c0b82SPatrick Mooney 		cv_broadcast(&vmc->vmc_cv);
10557c8c0b82SPatrick Mooney 	}
10567c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
10577c8c0b82SPatrick Mooney }
10587c8c0b82SPatrick Mooney 
10597c8c0b82SPatrick Mooney /*
10607c8c0b82SPatrick Mooney  * Indicate that a CPU will be utilizing the nested page tables through this VM
10617c8c0b82SPatrick Mooney  * client.  Interrupts (and/or the GIF) are expected to be disabled when calling
10627c8c0b82SPatrick Mooney  * this function.  Returns the generation number of the nested page table (to be
10637c8c0b82SPatrick Mooney  * used for TLB invalidations).
10647c8c0b82SPatrick Mooney  */
10657c8c0b82SPatrick Mooney uint64_t
vmc_table_enter(vm_client_t * vmc)10667c8c0b82SPatrick Mooney vmc_table_enter(vm_client_t *vmc)
10677c8c0b82SPatrick Mooney {
10687c8c0b82SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
10697c8c0b82SPatrick Mooney 	uint64_t gen;
10707c8c0b82SPatrick Mooney 
10717c8c0b82SPatrick Mooney 	ASSERT0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
10727c8c0b82SPatrick Mooney 	ASSERT3S(vmc->vmc_cpu_active, ==, -1);
10737c8c0b82SPatrick Mooney 
10747c8c0b82SPatrick Mooney 	/*
10757c8c0b82SPatrick Mooney 	 * Since the NPT activation occurs with interrupts disabled, this must
10767c8c0b82SPatrick Mooney 	 * be done without taking vmc_lock like normal.
10777c8c0b82SPatrick Mooney 	 */
10787c8c0b82SPatrick Mooney 	gen = vms->vms_pt_gen;
10797c8c0b82SPatrick Mooney 	vmc->vmc_cpu_active = CPU->cpu_id;
10807c8c0b82SPatrick Mooney 	vmc->vmc_cpu_gen = gen;
10817c8c0b82SPatrick Mooney 	atomic_or_uint(&vmc->vmc_state, VCS_ON_CPU);
10827c8c0b82SPatrick Mooney 
10837c8c0b82SPatrick Mooney 	return (gen);
10847c8c0b82SPatrick Mooney }
10857c8c0b82SPatrick Mooney 
10867c8c0b82SPatrick Mooney /*
10877c8c0b82SPatrick Mooney  * Indicate that this VM client is not longer (directly) using the underlying
10887c8c0b82SPatrick Mooney  * page tables.  Interrupts (and/or the GIF) must be enabled prior to calling
10897c8c0b82SPatrick Mooney  * this function.
10907c8c0b82SPatrick Mooney  */
10917c8c0b82SPatrick Mooney void
vmc_table_exit(vm_client_t * vmc)10927c8c0b82SPatrick Mooney vmc_table_exit(vm_client_t *vmc)
10937c8c0b82SPatrick Mooney {
10947c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
10957c8c0b82SPatrick Mooney 
10967c8c0b82SPatrick Mooney 	ASSERT(vmc->vmc_state & VCS_ON_CPU);
10977c8c0b82SPatrick Mooney 	vmc->vmc_state ^= VCS_ON_CPU;
10987c8c0b82SPatrick Mooney 	vmc->vmc_cpu_active = -1;
10997c8c0b82SPatrick Mooney 	if ((vmc->vmc_state & VCS_HOLD) != 0) {
11007c8c0b82SPatrick Mooney 		cv_broadcast(&vmc->vmc_cv);
11017c8c0b82SPatrick Mooney 	}
11027c8c0b82SPatrick Mooney 
11037c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11047c8c0b82SPatrick Mooney }
11057c8c0b82SPatrick Mooney 
11067c8c0b82SPatrick Mooney static void
vmc_space_hold(vm_client_t * vmc)11077c8c0b82SPatrick Mooney vmc_space_hold(vm_client_t *vmc)
11087c8c0b82SPatrick Mooney {
11097c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11107c8c0b82SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_HOLD);
11117c8c0b82SPatrick Mooney 
11127c8c0b82SPatrick Mooney 	/*
11137c8c0b82SPatrick Mooney 	 * Because vmc_table_enter() alters vmc_state from a context where
11147c8c0b82SPatrick Mooney 	 * interrupts are disabled, it cannot pay heed to vmc_lock, so setting
11157c8c0b82SPatrick Mooney 	 * VMC_HOLD must be done atomically here.
11167c8c0b82SPatrick Mooney 	 */
11177c8c0b82SPatrick Mooney 	atomic_or_uint(&vmc->vmc_state, VCS_HOLD);
11187c8c0b82SPatrick Mooney 
11197c8c0b82SPatrick Mooney 	/* Wait for client to go inactive */
11207c8c0b82SPatrick Mooney 	while ((vmc->vmc_state & VCS_ACTIVE) != 0) {
11217c8c0b82SPatrick Mooney 		cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11227c8c0b82SPatrick Mooney 	}
11237c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11247c8c0b82SPatrick Mooney }
11257c8c0b82SPatrick Mooney 
11267c8c0b82SPatrick Mooney static void
vmc_space_release(vm_client_t * vmc,bool kick_on_cpu)11277c8c0b82SPatrick Mooney vmc_space_release(vm_client_t *vmc, bool kick_on_cpu)
11287c8c0b82SPatrick Mooney {
11297c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11307c8c0b82SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_HOLD);
11317c8c0b82SPatrick Mooney 
11327c8c0b82SPatrick Mooney 	if (kick_on_cpu && (vmc->vmc_state & VCS_ON_CPU) != 0) {
11337c8c0b82SPatrick Mooney 		poke_cpu(vmc->vmc_cpu_active);
11347c8c0b82SPatrick Mooney 
11357c8c0b82SPatrick Mooney 		while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11367c8c0b82SPatrick Mooney 			cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11377c8c0b82SPatrick Mooney 		}
11387c8c0b82SPatrick Mooney 	}
11397c8c0b82SPatrick Mooney 
11407c8c0b82SPatrick Mooney 	/*
11417c8c0b82SPatrick Mooney 	 * Because vmc_table_enter() alters vmc_state from a context where
11427c8c0b82SPatrick Mooney 	 * interrupts are disabled, it cannot pay heed to vmc_lock, so clearing
11437c8c0b82SPatrick Mooney 	 * VMC_HOLD must be done atomically here.
11447c8c0b82SPatrick Mooney 	 */
11457c8c0b82SPatrick Mooney 	atomic_and_uint(&vmc->vmc_state, ~VCS_HOLD);
11467c8c0b82SPatrick Mooney 	cv_broadcast(&vmc->vmc_cv);
11477c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11487c8c0b82SPatrick Mooney }
11497c8c0b82SPatrick Mooney 
11507c8c0b82SPatrick Mooney static void
vmc_space_invalidate(vm_client_t * vmc,uintptr_t addr,size_t size,uint64_t gen)11517c8c0b82SPatrick Mooney vmc_space_invalidate(vm_client_t *vmc, uintptr_t addr, size_t size,
11527c8c0b82SPatrick Mooney     uint64_t gen)
11537c8c0b82SPatrick Mooney {
11547c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11557c8c0b82SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_HOLD);
11567c8c0b82SPatrick Mooney 	if ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11577c8c0b82SPatrick Mooney 		/*
11587c8c0b82SPatrick Mooney 		 * Wait for clients using an old generation of the page tables
11597c8c0b82SPatrick Mooney 		 * to exit guest context, where they subsequently flush the TLB
11607c8c0b82SPatrick Mooney 		 * for the new generation.
11617c8c0b82SPatrick Mooney 		 */
11627c8c0b82SPatrick Mooney 		if (vmc->vmc_cpu_gen < gen) {
11637c8c0b82SPatrick Mooney 			poke_cpu(vmc->vmc_cpu_active);
11647c8c0b82SPatrick Mooney 
11657c8c0b82SPatrick Mooney 			while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11667c8c0b82SPatrick Mooney 				cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11677c8c0b82SPatrick Mooney 			}
11687c8c0b82SPatrick Mooney 		}
11697c8c0b82SPatrick Mooney 	}
11707c8c0b82SPatrick Mooney 	if (vmc->vmc_inval_func != NULL) {
11717c8c0b82SPatrick Mooney 		vmc_inval_cb_t func = vmc->vmc_inval_func;
11727c8c0b82SPatrick Mooney 		void *data = vmc->vmc_inval_data;
11737c8c0b82SPatrick Mooney 
11747c8c0b82SPatrick Mooney 		/*
11757c8c0b82SPatrick Mooney 		 * Perform the actual invalidation call outside vmc_lock to
11767c8c0b82SPatrick Mooney 		 * avoid lock ordering issues in the consumer.  Since the client
11777c8c0b82SPatrick Mooney 		 * is under VCS_HOLD, this is safe.
11787c8c0b82SPatrick Mooney 		 */
11797c8c0b82SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
11807c8c0b82SPatrick Mooney 		func(data, addr, size);
11817c8c0b82SPatrick Mooney 		mutex_enter(&vmc->vmc_lock);
11827c8c0b82SPatrick Mooney 	}
11837c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11847c8c0b82SPatrick Mooney }
11857c8c0b82SPatrick Mooney 
11867c8c0b82SPatrick Mooney static void
vmc_space_unmap(vm_client_t * vmc,uintptr_t addr,size_t size,vm_object_t * vmo)11877c8c0b82SPatrick Mooney vmc_space_unmap(vm_client_t *vmc, uintptr_t addr, size_t size,
11887c8c0b82SPatrick Mooney     vm_object_t *vmo)
11897c8c0b82SPatrick Mooney {
11907c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11917c8c0b82SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_HOLD);
11927c8c0b82SPatrick Mooney 
11937c8c0b82SPatrick Mooney 	/*
11947c8c0b82SPatrick Mooney 	 * With the current vCPU exclusion invariants in place, we do not expect
11957c8c0b82SPatrick Mooney 	 * a vCPU to be in guest context during an unmap.
11967c8c0b82SPatrick Mooney 	 */
11977c8c0b82SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_ON_CPU);
11987c8c0b82SPatrick Mooney 
11997c8c0b82SPatrick Mooney 	/*
12007c8c0b82SPatrick Mooney 	 * Any holds against the unmapped region need to establish their own
12017c8c0b82SPatrick Mooney 	 * reference to the underlying object to avoid a potential
12027c8c0b82SPatrick Mooney 	 * use-after-free.
12037c8c0b82SPatrick Mooney 	 */
12047c8c0b82SPatrick Mooney 	for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
12057c8c0b82SPatrick Mooney 	    vmp != NULL;
12067c8c0b82SPatrick Mooney 	    vmp = list_next(&vmc->vmc_held_pages, vmc)) {
12077c8c0b82SPatrick Mooney 		if (vmp->vmp_gpa < addr ||
12087c8c0b82SPatrick Mooney 		    vmp->vmp_gpa >= (addr + size)) {
12097c8c0b82SPatrick Mooney 			/* Hold outside region in question */
12107c8c0b82SPatrick Mooney 			continue;
12117c8c0b82SPatrick Mooney 		}
12127c8c0b82SPatrick Mooney 		if (vmp->vmp_obj_ref == NULL) {
12137c8c0b82SPatrick Mooney 			vm_object_reference(vmo);
12147c8c0b82SPatrick Mooney 			vmp->vmp_obj_ref = vmo;
12157c8c0b82SPatrick Mooney 			/* For an unmapped region, PTE is now meaningless */
12167c8c0b82SPatrick Mooney 			vmp->vmp_ptep = NULL;
12177c8c0b82SPatrick Mooney 		} else {
12187c8c0b82SPatrick Mooney 			/*
12197c8c0b82SPatrick Mooney 			 * Object could have gone through cycle of
12207c8c0b82SPatrick Mooney 			 * unmap-map-unmap before the hold was released.
12217c8c0b82SPatrick Mooney 			 */
12227c8c0b82SPatrick Mooney 			VERIFY3P(vmp->vmp_ptep, ==, NULL);
12237c8c0b82SPatrick Mooney 		}
12247c8c0b82SPatrick Mooney 	}
12257c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
12267c8c0b82SPatrick Mooney }
12277c8c0b82SPatrick Mooney 
12287c8c0b82SPatrick Mooney static vm_client_t *
vmc_space_orphan(vm_client_t * vmc,vmspace_t * vms)12297c8c0b82SPatrick Mooney vmc_space_orphan(vm_client_t *vmc, vmspace_t *vms)
12307c8c0b82SPatrick Mooney {
12317c8c0b82SPatrick Mooney 	vm_client_t *next;
12327c8c0b82SPatrick Mooney 
12337c8c0b82SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
12347c8c0b82SPatrick Mooney 
12357c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
12367c8c0b82SPatrick Mooney 	VERIFY3P(vmc->vmc_space, ==, vms);
12377c8c0b82SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_ORPHANED);
12387c8c0b82SPatrick Mooney 	if (vmc->vmc_state & VCS_DESTROY) {
12397c8c0b82SPatrick Mooney 		/*
12407c8c0b82SPatrick Mooney 		 * This vm_client is currently undergoing destruction, so it
12417c8c0b82SPatrick Mooney 		 * does not need to be orphaned.  Let it proceed with its own
12427c8c0b82SPatrick Mooney 		 * clean-up task.
12437c8c0b82SPatrick Mooney 		 */
12447c8c0b82SPatrick Mooney 		next = list_next(&vms->vms_clients, vmc);
12457c8c0b82SPatrick Mooney 	} else {
12467c8c0b82SPatrick Mooney 		/*
12477c8c0b82SPatrick Mooney 		 * Clients are only orphaned when the containing vmspace is
12487c8c0b82SPatrick Mooney 		 * being torn down.  All mappings from the vmspace should
12497c8c0b82SPatrick Mooney 		 * already be gone, meaning any remaining held pages should have
12507c8c0b82SPatrick Mooney 		 * direct references to the object.
12517c8c0b82SPatrick Mooney 		 */
12527c8c0b82SPatrick Mooney 		for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
12537c8c0b82SPatrick Mooney 		    vmp != NULL;
12547c8c0b82SPatrick Mooney 		    vmp = list_next(&vmc->vmc_held_pages, vmp)) {
12557c8c0b82SPatrick Mooney 			ASSERT3P(vmp->vmp_ptep, ==, NULL);
12567c8c0b82SPatrick Mooney 			ASSERT3P(vmp->vmp_obj_ref, !=, NULL);
12577c8c0b82SPatrick Mooney 		}
12587c8c0b82SPatrick Mooney 
12597c8c0b82SPatrick Mooney 		/*
12607c8c0b82SPatrick Mooney 		 * After this point, the client will be orphaned, unable to
12617c8c0b82SPatrick Mooney 		 * establish new page holds (or access any vmspace-related
12627c8c0b82SPatrick Mooney 		 * resources) and is in charge of cleaning up after itself.
12637c8c0b82SPatrick Mooney 		 */
12647c8c0b82SPatrick Mooney 		vmc->vmc_state |= VCS_ORPHANED;
12657c8c0b82SPatrick Mooney 		next = list_next(&vms->vms_clients, vmc);
12667c8c0b82SPatrick Mooney 		list_remove(&vms->vms_clients, vmc);
12677c8c0b82SPatrick Mooney 		vmc->vmc_space = NULL;
12687c8c0b82SPatrick Mooney 	}
12697c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
12707c8c0b82SPatrick Mooney 	return (next);
12717c8c0b82SPatrick Mooney }
12727c8c0b82SPatrick Mooney 
12737c8c0b82SPatrick Mooney /*
12747c8c0b82SPatrick Mooney  * Attempt to hold a page at `gpa` inside the referenced vmspace.
12757c8c0b82SPatrick Mooney  */
12767c8c0b82SPatrick Mooney vm_page_t *
vmc_hold_ext(vm_client_t * vmc,uintptr_t gpa,int prot,int flags)1277f2357d97SPatrick Mooney vmc_hold_ext(vm_client_t *vmc, uintptr_t gpa, int prot, int flags)
12787c8c0b82SPatrick Mooney {
12797c8c0b82SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
12807c8c0b82SPatrick Mooney 	vm_page_t *vmp;
12817c8c0b82SPatrick Mooney 	pfn_t pfn = PFN_INVALID;
12827c8c0b82SPatrick Mooney 	uint64_t *ptep = NULL;
12837c8c0b82SPatrick Mooney 
12847c8c0b82SPatrick Mooney 	ASSERT0(gpa & PAGEOFFSET);
12857c8c0b82SPatrick Mooney 	ASSERT((prot & (PROT_READ | PROT_WRITE)) != PROT_NONE);
1286f2357d97SPatrick Mooney 	ASSERT0(prot & ~PROT_ALL);
1287f2357d97SPatrick Mooney 	ASSERT0(flags & ~VPF_ALL);
12887c8c0b82SPatrick Mooney 
12897c8c0b82SPatrick Mooney 	vmp = kmem_alloc(sizeof (*vmp), KM_SLEEP);
12907c8c0b82SPatrick Mooney 	if (vmc_activate(vmc) != 0) {
12917c8c0b82SPatrick Mooney 		kmem_free(vmp, sizeof (*vmp));
12927c8c0b82SPatrick Mooney 		return (NULL);
12937c8c0b82SPatrick Mooney 	}
12947c8c0b82SPatrick Mooney 
12957c8c0b82SPatrick Mooney 	if (vmspace_lookup_map(vms, gpa, prot, &pfn, &ptep) != 0) {
12967c8c0b82SPatrick Mooney 		vmc_deactivate(vmc);
12977c8c0b82SPatrick Mooney 		kmem_free(vmp, sizeof (*vmp));
12987c8c0b82SPatrick Mooney 		return (NULL);
12997c8c0b82SPatrick Mooney 	}
13007c8c0b82SPatrick Mooney 	ASSERT(pfn != PFN_INVALID && ptep != NULL);
13017c8c0b82SPatrick Mooney 
13027c8c0b82SPatrick Mooney 	vmp->vmp_client = vmc;
13037c8c0b82SPatrick Mooney 	vmp->vmp_chain = NULL;
13047c8c0b82SPatrick Mooney 	vmp->vmp_gpa = gpa;
13057c8c0b82SPatrick Mooney 	vmp->vmp_pfn = pfn;
13067c8c0b82SPatrick Mooney 	vmp->vmp_ptep = ptep;
13077c8c0b82SPatrick Mooney 	vmp->vmp_obj_ref = NULL;
1308f2357d97SPatrick Mooney 	vmp->vmp_prot = (uint8_t)prot;
1309f2357d97SPatrick Mooney 	vmp->vmp_flags = (uint8_t)flags;
13107c8c0b82SPatrick Mooney 	list_insert_tail(&vmc->vmc_held_pages, vmp);
13117c8c0b82SPatrick Mooney 	vmc_deactivate(vmc);
13127c8c0b82SPatrick Mooney 
13137c8c0b82SPatrick Mooney 	return (vmp);
13147c8c0b82SPatrick Mooney }
13157c8c0b82SPatrick Mooney 
1316f2357d97SPatrick Mooney /*
1317f2357d97SPatrick Mooney  * Attempt to hold a page at `gpa` inside the referenced vmspace.
1318f2357d97SPatrick Mooney  */
1319f2357d97SPatrick Mooney vm_page_t *
vmc_hold(vm_client_t * vmc,uintptr_t gpa,int prot)1320f2357d97SPatrick Mooney vmc_hold(vm_client_t *vmc, uintptr_t gpa, int prot)
1321f2357d97SPatrick Mooney {
1322f2357d97SPatrick Mooney 	return (vmc_hold_ext(vmc, gpa, prot, VPF_DEFAULT));
1323f2357d97SPatrick Mooney }
1324f2357d97SPatrick Mooney 
13257c8c0b82SPatrick Mooney int
vmc_fault(vm_client_t * vmc,uintptr_t gpa,int prot)13267c8c0b82SPatrick Mooney vmc_fault(vm_client_t *vmc, uintptr_t gpa, int prot)
13277c8c0b82SPatrick Mooney {
13287c8c0b82SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
13297c8c0b82SPatrick Mooney 	int err;
13307c8c0b82SPatrick Mooney 
13317c8c0b82SPatrick Mooney 	err = vmc_activate(vmc);
13327c8c0b82SPatrick Mooney 	if (err == 0) {
13337c8c0b82SPatrick Mooney 		err = vmspace_lookup_map(vms, gpa & PAGEMASK, prot, NULL, NULL);
13347c8c0b82SPatrick Mooney 		vmc_deactivate(vmc);
13357c8c0b82SPatrick Mooney 	}
13367c8c0b82SPatrick Mooney 
13377c8c0b82SPatrick Mooney 	return (err);
13387c8c0b82SPatrick Mooney }
13397c8c0b82SPatrick Mooney 
13407c8c0b82SPatrick Mooney /*
13417c8c0b82SPatrick Mooney  * Allocate an additional vm_client_t, based on an existing one.  Only the
13427c8c0b82SPatrick Mooney  * associatation with the vmspace is cloned, not existing holds or any
13437c8c0b82SPatrick Mooney  * configured invalidation function.
13447c8c0b82SPatrick Mooney  */
13457c8c0b82SPatrick Mooney vm_client_t *
vmc_clone(vm_client_t * vmc)13467c8c0b82SPatrick Mooney vmc_clone(vm_client_t *vmc)
13477c8c0b82SPatrick Mooney {
13487c8c0b82SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
13497c8c0b82SPatrick Mooney 
13507c8c0b82SPatrick Mooney 	return (vmspace_client_alloc(vms));
13517c8c0b82SPatrick Mooney }
13527c8c0b82SPatrick Mooney 
13537c8c0b82SPatrick Mooney /*
13547c8c0b82SPatrick Mooney  * Register a function (and associated data pointer) to be called when an
13557c8c0b82SPatrick Mooney  * address range in the vmspace is invalidated.
13567c8c0b82SPatrick Mooney  */
13577c8c0b82SPatrick Mooney int
vmc_set_inval_cb(vm_client_t * vmc,vmc_inval_cb_t func,void * data)13587c8c0b82SPatrick Mooney vmc_set_inval_cb(vm_client_t *vmc, vmc_inval_cb_t func, void *data)
13597c8c0b82SPatrick Mooney {
13607c8c0b82SPatrick Mooney 	int err;
13617c8c0b82SPatrick Mooney 
13627c8c0b82SPatrick Mooney 	err = vmc_activate(vmc);
13637c8c0b82SPatrick Mooney 	if (err == 0) {
13647c8c0b82SPatrick Mooney 		vmc->vmc_inval_func = func;
13657c8c0b82SPatrick Mooney 		vmc->vmc_inval_data = data;
13667c8c0b82SPatrick Mooney 		vmc_deactivate(vmc);
13677c8c0b82SPatrick Mooney 	}
13687c8c0b82SPatrick Mooney 
13697c8c0b82SPatrick Mooney 	return (err);
13707c8c0b82SPatrick Mooney }
13717c8c0b82SPatrick Mooney 
13727c8c0b82SPatrick Mooney /*
13737c8c0b82SPatrick Mooney  * Destroy a vm_client_t instance.
13747c8c0b82SPatrick Mooney  *
13757c8c0b82SPatrick Mooney  * No pages held through this vm_client_t may be outstanding when performing a
13767c8c0b82SPatrick Mooney  * vmc_destroy().  For vCPU clients, the client cannot be on-CPU (a call to
13777c8c0b82SPatrick Mooney  * vmc_table_exit() has been made).
13787c8c0b82SPatrick Mooney  */
13797c8c0b82SPatrick Mooney void
vmc_destroy(vm_client_t * vmc)13807c8c0b82SPatrick Mooney vmc_destroy(vm_client_t *vmc)
13817c8c0b82SPatrick Mooney {
13827c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
13837c8c0b82SPatrick Mooney 
13847c8c0b82SPatrick Mooney 	VERIFY(list_is_empty(&vmc->vmc_held_pages));
13857c8c0b82SPatrick Mooney 	VERIFY0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
13867c8c0b82SPatrick Mooney 
13877c8c0b82SPatrick Mooney 	if ((vmc->vmc_state & VCS_ORPHANED) == 0) {
13887c8c0b82SPatrick Mooney 		vmspace_t *vms;
13897c8c0b82SPatrick Mooney 
13907c8c0b82SPatrick Mooney 		/*
13917c8c0b82SPatrick Mooney 		 * Deassociation with the parent vmspace must be done carefully:
13927c8c0b82SPatrick Mooney 		 * The vmspace could attempt to orphan this vm_client while we
13937c8c0b82SPatrick Mooney 		 * release vmc_lock in order to take vms_lock (the required
13947c8c0b82SPatrick Mooney 		 * order).  The client is marked to indicate that destruction is
13957c8c0b82SPatrick Mooney 		 * under way.  Doing so prevents any racing orphan operation
13967c8c0b82SPatrick Mooney 		 * from applying to this client, allowing us to deassociate from
13977c8c0b82SPatrick Mooney 		 * the vmspace safely.
13987c8c0b82SPatrick Mooney 		 */
13997c8c0b82SPatrick Mooney 		vmc->vmc_state |= VCS_DESTROY;
14007c8c0b82SPatrick Mooney 		vms = vmc->vmc_space;
14017c8c0b82SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
14027c8c0b82SPatrick Mooney 
14037c8c0b82SPatrick Mooney 		mutex_enter(&vms->vms_lock);
14047c8c0b82SPatrick Mooney 		mutex_enter(&vmc->vmc_lock);
14057c8c0b82SPatrick Mooney 		list_remove(&vms->vms_clients, vmc);
14067c8c0b82SPatrick Mooney 		/*
14077c8c0b82SPatrick Mooney 		 * If the vmspace began its own destruction operation while we
14087c8c0b82SPatrick Mooney 		 * were navigating the locks, be sure to notify it about this
14097c8c0b82SPatrick Mooney 		 * vm_client being deassociated.
14107c8c0b82SPatrick Mooney 		 */
14117c8c0b82SPatrick Mooney 		cv_signal(&vms->vms_cv);
14127c8c0b82SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
14137c8c0b82SPatrick Mooney 		mutex_exit(&vms->vms_lock);
14147c8c0b82SPatrick Mooney 	} else {
14157c8c0b82SPatrick Mooney 		VERIFY3P(vmc->vmc_space, ==, NULL);
14167c8c0b82SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
14177c8c0b82SPatrick Mooney 	}
14187c8c0b82SPatrick Mooney 
14197c8c0b82SPatrick Mooney 	mutex_destroy(&vmc->vmc_lock);
14207c8c0b82SPatrick Mooney 	cv_destroy(&vmc->vmc_cv);
14217c8c0b82SPatrick Mooney 	list_destroy(&vmc->vmc_held_pages);
14227c8c0b82SPatrick Mooney 
14237c8c0b82SPatrick Mooney 	kmem_free(vmc, sizeof (*vmc));
14247c8c0b82SPatrick Mooney }
14257c8c0b82SPatrick Mooney 
14267c8c0b82SPatrick Mooney static __inline void *
vmp_ptr(const vm_page_t * vmp)14277c8c0b82SPatrick Mooney vmp_ptr(const vm_page_t *vmp)
14287c8c0b82SPatrick Mooney {
14297c8c0b82SPatrick Mooney 	ASSERT3U(vmp->vmp_pfn, !=, PFN_INVALID);
14307c8c0b82SPatrick Mooney 
14317c8c0b82SPatrick Mooney 	const uintptr_t paddr = (vmp->vmp_pfn << PAGESHIFT);
14327c8c0b82SPatrick Mooney 	return ((void *)((uintptr_t)kpm_vbase + paddr));
14337c8c0b82SPatrick Mooney }
14347c8c0b82SPatrick Mooney 
14357c8c0b82SPatrick Mooney /*
14367c8c0b82SPatrick Mooney  * Get a readable kernel-virtual pointer for a held page.
14377c8c0b82SPatrick Mooney  *
14387c8c0b82SPatrick Mooney  * Only legal to call if PROT_READ was specified in `prot` for the vmc_hold()
14397c8c0b82SPatrick Mooney  * call to acquire this page reference.
14407c8c0b82SPatrick Mooney  */
14417c8c0b82SPatrick Mooney const void *
vmp_get_readable(const vm_page_t * vmp)14427c8c0b82SPatrick Mooney vmp_get_readable(const vm_page_t *vmp)
14437c8c0b82SPatrick Mooney {
14447c8c0b82SPatrick Mooney 	ASSERT(vmp->vmp_prot & PROT_READ);
14457c8c0b82SPatrick Mooney 
14467c8c0b82SPatrick Mooney 	return (vmp_ptr(vmp));
14477c8c0b82SPatrick Mooney }
14487c8c0b82SPatrick Mooney 
14497c8c0b82SPatrick Mooney /*
14507c8c0b82SPatrick Mooney  * Get a writable kernel-virtual pointer for a held page.
14517c8c0b82SPatrick Mooney  *
14527c8c0b82SPatrick Mooney  * Only legal to call if PROT_WRITE was specified in `prot` for the vmc_hold()
14537c8c0b82SPatrick Mooney  * call to acquire this page reference.
14547c8c0b82SPatrick Mooney  */
14557c8c0b82SPatrick Mooney void *
vmp_get_writable(const vm_page_t * vmp)14567c8c0b82SPatrick Mooney vmp_get_writable(const vm_page_t *vmp)
14577c8c0b82SPatrick Mooney {
14587c8c0b82SPatrick Mooney 	ASSERT(vmp->vmp_prot & PROT_WRITE);
14597c8c0b82SPatrick Mooney 
14607c8c0b82SPatrick Mooney 	return (vmp_ptr(vmp));
14617c8c0b82SPatrick Mooney }
14627c8c0b82SPatrick Mooney 
14637c8c0b82SPatrick Mooney /*
14647c8c0b82SPatrick Mooney  * Get the host-physical PFN for a held page.
14657c8c0b82SPatrick Mooney  */
14667c8c0b82SPatrick Mooney pfn_t
vmp_get_pfn(const vm_page_t * vmp)14677c8c0b82SPatrick Mooney vmp_get_pfn(const vm_page_t *vmp)
14687c8c0b82SPatrick Mooney {
14697c8c0b82SPatrick Mooney 	return (vmp->vmp_pfn);
14707c8c0b82SPatrick Mooney }
14717c8c0b82SPatrick Mooney 
14727c8c0b82SPatrick Mooney /*
1473f2357d97SPatrick Mooney  * If this page was deferring dirty-marking in the corresponding vmspace page
1474f2357d97SPatrick Mooney  * tables, clear such a state so it is considered dirty from now on.
1475f2357d97SPatrick Mooney  */
1476f2357d97SPatrick Mooney void
vmp_mark_dirty(vm_page_t * vmp)1477f2357d97SPatrick Mooney vmp_mark_dirty(vm_page_t *vmp)
1478f2357d97SPatrick Mooney {
1479f2357d97SPatrick Mooney 	ASSERT((vmp->vmp_prot & PROT_WRITE) != 0);
1480f2357d97SPatrick Mooney 
1481f2357d97SPatrick Mooney 	atomic_and_8(&vmp->vmp_flags, ~VPF_DEFER_DIRTY);
1482f2357d97SPatrick Mooney }
1483f2357d97SPatrick Mooney 
1484f2357d97SPatrick Mooney /*
14857c8c0b82SPatrick Mooney  * Store a pointer to `to_chain` in the page-chaining slot of `vmp`.
14867c8c0b82SPatrick Mooney  */
14877c8c0b82SPatrick Mooney void
vmp_chain(vm_page_t * vmp,vm_page_t * to_chain)14887c8c0b82SPatrick Mooney vmp_chain(vm_page_t *vmp, vm_page_t *to_chain)
14897c8c0b82SPatrick Mooney {
14907c8c0b82SPatrick Mooney 	ASSERT3P(vmp->vmp_chain, ==, NULL);
14917c8c0b82SPatrick Mooney 
14927c8c0b82SPatrick Mooney 	vmp->vmp_chain = to_chain;
14937c8c0b82SPatrick Mooney }
14947c8c0b82SPatrick Mooney 
14957c8c0b82SPatrick Mooney /*
14967c8c0b82SPatrick Mooney  * Retrieve the pointer from the page-chaining in `vmp`.
14977c8c0b82SPatrick Mooney  */
14987c8c0b82SPatrick Mooney vm_page_t *
vmp_next(const vm_page_t * vmp)14997c8c0b82SPatrick Mooney vmp_next(const vm_page_t *vmp)
15007c8c0b82SPatrick Mooney {
15017c8c0b82SPatrick Mooney 	return (vmp->vmp_chain);
15027c8c0b82SPatrick Mooney }
15037c8c0b82SPatrick Mooney 
15047c8c0b82SPatrick Mooney static __inline bool
vmp_release_inner(vm_page_t * vmp,vm_client_t * vmc)15057c8c0b82SPatrick Mooney vmp_release_inner(vm_page_t *vmp, vm_client_t *vmc)
15067c8c0b82SPatrick Mooney {
15077c8c0b82SPatrick Mooney 	ASSERT(MUTEX_HELD(&vmc->vmc_lock));
15087c8c0b82SPatrick Mooney 
15097c8c0b82SPatrick Mooney 	bool was_unmapped = false;
15107c8c0b82SPatrick Mooney 
15117c8c0b82SPatrick Mooney 	list_remove(&vmc->vmc_held_pages, vmp);
15127c8c0b82SPatrick Mooney 	if (vmp->vmp_obj_ref != NULL) {
15137c8c0b82SPatrick Mooney 		ASSERT3P(vmp->vmp_ptep, ==, NULL);
15147c8c0b82SPatrick Mooney 
15157c8c0b82SPatrick Mooney 		vm_object_release(vmp->vmp_obj_ref);
15167c8c0b82SPatrick Mooney 		was_unmapped = true;
15177c8c0b82SPatrick Mooney 	} else {
15187c8c0b82SPatrick Mooney 		ASSERT3P(vmp->vmp_ptep, !=, NULL);
15197c8c0b82SPatrick Mooney 
1520f2357d97SPatrick Mooney 		/*
1521f2357d97SPatrick Mooney 		 * Track appropriate (accessed/dirty) bits for the guest-virtual
1522f2357d97SPatrick Mooney 		 * address corresponding to this page, if it is from the vmspace
1523f2357d97SPatrick Mooney 		 * rather than a direct reference to an underlying object.
1524f2357d97SPatrick Mooney 		 *
1525f2357d97SPatrick Mooney 		 * The protection and/or configured flags may obviate the need
1526f2357d97SPatrick Mooney 		 * for such an update.
1527f2357d97SPatrick Mooney 		 */
1528f2357d97SPatrick Mooney 		if ((vmp->vmp_prot & PROT_WRITE) != 0 &&
1529f2357d97SPatrick Mooney 		    (vmp->vmp_flags & VPF_DEFER_DIRTY) == 0 &&
1530f2357d97SPatrick Mooney 		    vmc->vmc_track_dirty) {
15317c8c0b82SPatrick Mooney 			vmm_gpt_t *gpt = vmc->vmc_space->vms_gpt;
1532e0994bd2SPatrick Mooney 			(void) vmm_gpt_reset_dirty(gpt, vmp->vmp_ptep, true);
15337c8c0b82SPatrick Mooney 		}
15347c8c0b82SPatrick Mooney 	}
15357c8c0b82SPatrick Mooney 	kmem_free(vmp, sizeof (*vmp));
15367c8c0b82SPatrick Mooney 	return (was_unmapped);
15377c8c0b82SPatrick Mooney }
15387c8c0b82SPatrick Mooney 
15397c8c0b82SPatrick Mooney /*
15407c8c0b82SPatrick Mooney  * Release held page.  Returns true if page resided on region which was
15417c8c0b82SPatrick Mooney  * subsequently unmapped.
15427c8c0b82SPatrick Mooney  */
15437c8c0b82SPatrick Mooney bool
vmp_release(vm_page_t * vmp)15447c8c0b82SPatrick Mooney vmp_release(vm_page_t *vmp)
15457c8c0b82SPatrick Mooney {
15467c8c0b82SPatrick Mooney 	vm_client_t *vmc = vmp->vmp_client;
15477c8c0b82SPatrick Mooney 
15487c8c0b82SPatrick Mooney 	VERIFY(vmc != NULL);
15497c8c0b82SPatrick Mooney 
15507c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
15517c8c0b82SPatrick Mooney 	const bool was_unmapped = vmp_release_inner(vmp, vmc);
15527c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
15537c8c0b82SPatrick Mooney 	return (was_unmapped);
15547c8c0b82SPatrick Mooney }
15557c8c0b82SPatrick Mooney 
15567c8c0b82SPatrick Mooney /*
15577c8c0b82SPatrick Mooney  * Release a chain of pages which were associated via vmp_chain() (setting
15587c8c0b82SPatrick Mooney  * page-chaining pointer).  Returns true if any pages resided upon a region
15597c8c0b82SPatrick Mooney  * which was subsequently unmapped.
15607c8c0b82SPatrick Mooney  *
15617c8c0b82SPatrick Mooney  * All of those pages must have been held through the same vm_client_t.
15627c8c0b82SPatrick Mooney  */
15637c8c0b82SPatrick Mooney bool
vmp_release_chain(vm_page_t * vmp)15647c8c0b82SPatrick Mooney vmp_release_chain(vm_page_t *vmp)
15657c8c0b82SPatrick Mooney {
15667c8c0b82SPatrick Mooney 	vm_client_t *vmc = vmp->vmp_client;
15677c8c0b82SPatrick Mooney 	bool any_unmapped = false;
15687c8c0b82SPatrick Mooney 
15697c8c0b82SPatrick Mooney 	ASSERT(vmp != NULL);
15707c8c0b82SPatrick Mooney 
15717c8c0b82SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
15727c8c0b82SPatrick Mooney 	while (vmp != NULL) {
15737c8c0b82SPatrick Mooney 		vm_page_t *next = vmp->vmp_chain;
15747c8c0b82SPatrick Mooney 
15757c8c0b82SPatrick Mooney 		/* We expect all pages in chain to be from same client */
15767c8c0b82SPatrick Mooney 		ASSERT3P(vmp->vmp_client, ==, vmc);
15777c8c0b82SPatrick Mooney 
15787c8c0b82SPatrick Mooney 		if (vmp_release_inner(vmp, vmc)) {
15797c8c0b82SPatrick Mooney 			any_unmapped = true;
15807c8c0b82SPatrick Mooney 		}
15817c8c0b82SPatrick Mooney 		vmp = next;
15827c8c0b82SPatrick Mooney 	}
15837c8c0b82SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
15847c8c0b82SPatrick Mooney 	return (any_unmapped);
15857c8c0b82SPatrick Mooney }
15867c8c0b82SPatrick Mooney 
15877c8c0b82SPatrick Mooney 
15887c8c0b82SPatrick Mooney int
vm_segmap_obj(struct vm * vm,int segid,off_t segoff,off_t len,struct as * as,caddr_t * addrp,uint_t prot,uint_t maxprot,uint_t flags)15897c8c0b82SPatrick Mooney vm_segmap_obj(struct vm *vm, int segid, off_t segoff, off_t len,
15907c8c0b82SPatrick Mooney     struct as *as, caddr_t *addrp, uint_t prot, uint_t maxprot, uint_t flags)
15917c8c0b82SPatrick Mooney {
15927c8c0b82SPatrick Mooney 	vm_object_t *vmo;
15937c8c0b82SPatrick Mooney 	int err;
15947c8c0b82SPatrick Mooney 
15957c8c0b82SPatrick Mooney 	if (segoff < 0 || len <= 0 ||
15967c8c0b82SPatrick Mooney 	    (segoff & PAGEOFFSET) != 0 || (len & PAGEOFFSET) != 0) {
15977c8c0b82SPatrick Mooney 		return (EINVAL);
15987c8c0b82SPatrick Mooney 	}
15997c8c0b82SPatrick Mooney 	if ((prot & PROT_USER) == 0) {
16007c8c0b82SPatrick Mooney 		return (ENOTSUP);
16017c8c0b82SPatrick Mooney 	}
16027c8c0b82SPatrick Mooney 	err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
16037c8c0b82SPatrick Mooney 	if (err != 0) {
16047c8c0b82SPatrick Mooney 		return (err);
16057c8c0b82SPatrick Mooney 	}
16067c8c0b82SPatrick Mooney 
16077c8c0b82SPatrick Mooney 	VERIFY(segoff >= 0);
16087c8c0b82SPatrick Mooney 	VERIFY(len <= vmo->vmo_size);
16097c8c0b82SPatrick Mooney 	VERIFY((len + segoff) <= vmo->vmo_size);
16107c8c0b82SPatrick Mooney 
16117c8c0b82SPatrick Mooney 	if (vmo->vmo_type != VMOT_MEM) {
16127c8c0b82SPatrick Mooney 		/* Only support memory objects for now */
16137c8c0b82SPatrick Mooney 		return (ENOTSUP);
16147c8c0b82SPatrick Mooney 	}
16157c8c0b82SPatrick Mooney 
16167c8c0b82SPatrick Mooney 	as_rangelock(as);
16177c8c0b82SPatrick Mooney 
16187c8c0b82SPatrick Mooney 	err = choose_addr(as, addrp, (size_t)len, 0, ADDR_VACALIGN, flags);
16197c8c0b82SPatrick Mooney 	if (err == 0) {
16207c8c0b82SPatrick Mooney 		segvmm_crargs_t svma;
16217c8c0b82SPatrick Mooney 
16227c8c0b82SPatrick Mooney 		svma.prot = prot;
16237c8c0b82SPatrick Mooney 		svma.offset = segoff;
16247c8c0b82SPatrick Mooney 		svma.vmo = vmo;
16257c8c0b82SPatrick Mooney 		svma.vmc = NULL;
16267c8c0b82SPatrick Mooney 
16277c8c0b82SPatrick Mooney 		err = as_map(as, *addrp, (size_t)len, segvmm_create, &svma);
16287c8c0b82SPatrick Mooney 	}
16297c8c0b82SPatrick Mooney 
16307c8c0b82SPatrick Mooney 	as_rangeunlock(as);
16317c8c0b82SPatrick Mooney 	return (err);
16327c8c0b82SPatrick Mooney }
16337c8c0b82SPatrick Mooney 
16347c8c0b82SPatrick Mooney int
vm_segmap_space(struct vm * vm,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags)16357c8c0b82SPatrick Mooney vm_segmap_space(struct vm *vm, off_t off, struct as *as, caddr_t *addrp,
16367c8c0b82SPatrick Mooney     off_t len, uint_t prot, uint_t maxprot, uint_t flags)
16377c8c0b82SPatrick Mooney {
16387c8c0b82SPatrick Mooney 
16397c8c0b82SPatrick Mooney 	const uintptr_t gpa = (uintptr_t)off;
16407c8c0b82SPatrick Mooney 	const size_t size = (uintptr_t)len;
16417c8c0b82SPatrick Mooney 	int err;
16427c8c0b82SPatrick Mooney 
16437c8c0b82SPatrick Mooney 	if (off < 0 || len <= 0 ||
16447c8c0b82SPatrick Mooney 	    (gpa & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
16457c8c0b82SPatrick Mooney 		return (EINVAL);
16467c8c0b82SPatrick Mooney 	}
16477c8c0b82SPatrick Mooney 	if ((prot & PROT_USER) == 0) {
16487c8c0b82SPatrick Mooney 		return (ENOTSUP);
16497c8c0b82SPatrick Mooney 	}
16507c8c0b82SPatrick Mooney 
16517c8c0b82SPatrick Mooney 	as_rangelock(as);
16527c8c0b82SPatrick Mooney 
16537c8c0b82SPatrick Mooney 	err = choose_addr(as, addrp, size, off, ADDR_VACALIGN, flags);
16547c8c0b82SPatrick Mooney 	if (err == 0) {
16557c8c0b82SPatrick Mooney 		segvmm_crargs_t svma;
16567c8c0b82SPatrick Mooney 
16577c8c0b82SPatrick Mooney 		svma.prot = prot;
16587c8c0b82SPatrick Mooney 		svma.offset = gpa;
16597c8c0b82SPatrick Mooney 		svma.vmo = NULL;
16607c8c0b82SPatrick Mooney 		svma.vmc = vmspace_client_alloc(vm_get_vmspace(vm));
16617c8c0b82SPatrick Mooney 
16627c8c0b82SPatrick Mooney 		err = as_map(as, *addrp, len, segvmm_create, &svma);
16637c8c0b82SPatrick Mooney 	}
16647c8c0b82SPatrick Mooney 
16657c8c0b82SPatrick Mooney 	as_rangeunlock(as);
16667c8c0b82SPatrick Mooney 	return (err);
16677c8c0b82SPatrick Mooney }
1668