13253Smec /*
23253Smec * CDDL HEADER START
33253Smec *
43253Smec * The contents of this file are subject to the terms of the
53253Smec * Common Development and Distribution License (the "License").
63253Smec * You may not use this file except in compliance with the License.
73253Smec *
83253Smec * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93253Smec * or http://www.opensolaris.org/os/licensing.
103253Smec * See the License for the specific language governing permissions
113253Smec * and limitations under the License.
123253Smec *
133253Smec * When distributing Covered Code, include this CDDL HEADER in each
143253Smec * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153253Smec * If applicable, add the following below this CDDL HEADER, with the
163253Smec * fields enclosed by brackets "[]" replaced with your own identifying
173253Smec * information: Portions Copyright [yyyy] [name of copyright owner]
183253Smec *
193253Smec * CDDL HEADER END
203253Smec */
213253Smec /*
22*7656SSherry.Moore@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
233253Smec * Use is subject to license terms.
243253Smec */
253253Smec
263253Smec
273253Smec #include <sys/types.h>
283253Smec #include <sys/modctl.h>
293253Smec #include <sys/conf.h>
303253Smec #include <sys/ddi.h>
313253Smec #include <sys/sunddi.h>
323253Smec #include <sys/devops.h>
333253Smec #include <sys/stat.h>
343253Smec #include <sys/file.h>
353253Smec #include <sys/cred.h>
363253Smec #include <sys/policy.h>
373253Smec #include <sys/errno.h>
383253Smec #include <vm/seg_dev.h>
393253Smec #include <vm/seg_vn.h>
403253Smec #include <vm/page.h>
413253Smec #include <sys/fs/swapnode.h>
423253Smec #include <sys/sysmacros.h>
433253Smec #include <sys/fcntl.h>
443253Smec #include <sys/vmsystm.h>
453253Smec #include <sys/physmem.h>
463898Srsb #include <sys/vfs_opreg.h>
473253Smec
483253Smec static dev_info_t *physmem_dip = NULL;
493253Smec
503253Smec /*
513253Smec * Linked list element hanging off physmem_proc_hash below, which holds all
523253Smec * the information for a given segment which has been setup for this process.
533253Smec * This is a simple linked list as we are assuming that for a given process
543253Smec * the setup ioctl will only be called a handful of times. If this assumption
553253Smec * changes in the future, a quicker to traverse data structure should be used.
563253Smec */
573253Smec struct physmem_hash {
583253Smec struct physmem_hash *ph_next;
593253Smec uint64_t ph_base_pa;
603253Smec caddr_t ph_base_va;
613253Smec size_t ph_seg_len;
623253Smec struct vnode *ph_vnode;
633253Smec };
643253Smec
653253Smec /*
663253Smec * Hash of all of the processes which have setup mappings with the driver with
673253Smec * pointers to per process data.
683253Smec */
693253Smec struct physmem_proc_hash {
703253Smec struct proc *pph_proc;
713253Smec struct physmem_hash *pph_hash;
723253Smec struct physmem_proc_hash *pph_next;
733253Smec };
743253Smec
753253Smec
763253Smec /* Needs to be a power of two for simple hash algorithm */
773253Smec #define PPH_SIZE 8
783253Smec struct physmem_proc_hash *pph[PPH_SIZE];
793253Smec
803253Smec /*
813253Smec * Lock which protects the pph hash above. To add an element (either a new
823253Smec * process or a new segment) the WRITE lock must be held. To traverse the
833253Smec * list, only a READ lock is needed.
843253Smec */
853253Smec krwlock_t pph_rwlock;
863253Smec
873253Smec #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
883253Smec
893253Smec /*
903253Smec * Need to keep a reference count of how many processes have the driver
913253Smec * open to prevent it from disappearing.
923253Smec */
933253Smec uint64_t physmem_vnodecnt;
943253Smec kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */
953253Smec
963253Smec static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
973253Smec uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
985331Samw enum seg_rw rw, struct cred *cr, caller_context_t *ct);
993253Smec
1003253Smec static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
1013253Smec caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
1025331Samw struct cred *cred, caller_context_t *ct);
1033253Smec
1043253Smec static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
1053253Smec caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
1065331Samw struct cred *cred, caller_context_t *ct);
1073253Smec
1085331Samw static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct);
1093253Smec
1103253Smec const fs_operation_def_t physmem_vnodeops_template[] = {
1113898Srsb VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage },
1123898Srsb VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap },
1133898Srsb VOPNAME_DELMAP, { .vop_delmap = physmem_delmap },
1143898Srsb VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive },
1153898Srsb NULL, NULL
1163253Smec };
1173253Smec
1183253Smec vnodeops_t *physmem_vnodeops = NULL;
1193253Smec
1203253Smec /*
1213253Smec * Removes the current process from the hash if the process has no more
1223253Smec * physmem segments active.
1233253Smec */
1243253Smec void
physmem_remove_hash_proc()1253253Smec physmem_remove_hash_proc()
1263253Smec {
1273253Smec int index;
1283253Smec struct physmem_proc_hash **walker;
1293253Smec struct physmem_proc_hash *victim = NULL;
1303253Smec
1313253Smec index = PHYSMEM_HASH(curproc);
1323253Smec rw_enter(&pph_rwlock, RW_WRITER);
1333253Smec walker = &pph[index];
1343253Smec while (*walker != NULL) {
1353253Smec if ((*walker)->pph_proc == curproc &&
1363253Smec (*walker)->pph_hash == NULL) {
1373253Smec victim = *walker;
1383253Smec *walker = victim->pph_next;
1393253Smec break;
1403253Smec }
1413253Smec walker = &((*walker)->pph_next);
1423253Smec }
1433253Smec rw_exit(&pph_rwlock);
1443253Smec if (victim != NULL)
1453253Smec kmem_free(victim, sizeof (struct physmem_proc_hash));
1463253Smec }
1473253Smec
1483253Smec /*
1493253Smec * Add a new entry to the hash for the given process to cache the
1503253Smec * address ranges that it is working on. If this is the first hash
1513253Smec * item to be added for this process, we will create the head pointer
1523253Smec * for this process.
1533253Smec * Returns 0 on success, ERANGE when the physical address is already in the
1543616Smec * hash.
1553253Smec */
1563253Smec int
physmem_add_hash(struct physmem_hash * php)1573253Smec physmem_add_hash(struct physmem_hash *php)
1583253Smec {
1593253Smec int index;
1603253Smec struct physmem_proc_hash *iterator;
1613253Smec struct physmem_proc_hash *newp = NULL;
1623253Smec struct physmem_hash *temp;
1633253Smec int ret = 0;
1643253Smec
1653253Smec index = PHYSMEM_HASH(curproc);
1663253Smec
1673253Smec insert:
1683253Smec rw_enter(&pph_rwlock, RW_WRITER);
1693253Smec iterator = pph[index];
1703253Smec while (iterator != NULL) {
1713253Smec if (iterator->pph_proc == curproc) {
1723253Smec /*
1733253Smec * check to make sure a single process does not try to
1743253Smec * map the same region twice.
1753253Smec */
1763253Smec for (temp = iterator->pph_hash; temp != NULL;
1773253Smec temp = temp->ph_next) {
1783253Smec if ((php->ph_base_pa >= temp->ph_base_pa &&
1793253Smec php->ph_base_pa < temp->ph_base_pa +
1803253Smec temp->ph_seg_len) ||
1813253Smec (temp->ph_base_pa >= php->ph_base_pa &&
1823253Smec temp->ph_base_pa < php->ph_base_pa +
1833253Smec php->ph_seg_len)) {
1843253Smec ret = ERANGE;
1853253Smec break;
1863253Smec }
1873253Smec }
1883253Smec if (ret == 0) {
1893253Smec php->ph_next = iterator->pph_hash;
1903253Smec iterator->pph_hash = php;
1913253Smec }
1923253Smec rw_exit(&pph_rwlock);
1933253Smec /* Need to check for two threads in sync */
1943253Smec if (newp != NULL)
1953253Smec kmem_free(newp, sizeof (*newp));
1963253Smec return (ret);
1973253Smec }
1983253Smec iterator = iterator->pph_next;
1993253Smec }
2003253Smec
2013253Smec if (newp != NULL) {
2023253Smec newp->pph_proc = curproc;
2033253Smec newp->pph_next = pph[index];
2043253Smec newp->pph_hash = php;
2053253Smec php->ph_next = NULL;
2063253Smec pph[index] = newp;
2073253Smec rw_exit(&pph_rwlock);
2083253Smec return (0);
2093253Smec }
2103253Smec
2113253Smec rw_exit(&pph_rwlock);
2123253Smec /* Dropped the lock so we could use KM_SLEEP */
2133253Smec newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
2143253Smec goto insert;
2153253Smec }
2163253Smec
2173253Smec /*
2183253Smec * Will return the pointer to the physmem_hash struct if the setup routine
2193253Smec * has previously been called for this memory.
2203253Smec * Returns NULL on failure.
2213253Smec */
2223253Smec struct physmem_hash *
physmem_get_hash(uint64_t req_paddr,size_t len,proc_t * procp)2233253Smec physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
2243253Smec {
2253253Smec int index;
2263253Smec struct physmem_proc_hash *proc_hp;
2273253Smec struct physmem_hash *php;
2283253Smec
2293253Smec ASSERT(rw_lock_held(&pph_rwlock));
2303253Smec
2313253Smec index = PHYSMEM_HASH(procp);
2323253Smec proc_hp = pph[index];
2333253Smec while (proc_hp != NULL) {
2343253Smec if (proc_hp->pph_proc == procp) {
2353253Smec php = proc_hp->pph_hash;
2363253Smec while (php != NULL) {
2373253Smec if ((req_paddr >= php->ph_base_pa) &&
2383253Smec (req_paddr + len <=
2393253Smec php->ph_base_pa + php->ph_seg_len)) {
2403253Smec return (php);
2413253Smec }
2423253Smec php = php->ph_next;
2433253Smec }
2443253Smec }
2453253Smec proc_hp = proc_hp->pph_next;
2463253Smec }
2473253Smec return (NULL);
2483253Smec }
2493253Smec
2503253Smec int
physmem_validate_cookie(uint64_t p_cookie)2513253Smec physmem_validate_cookie(uint64_t p_cookie)
2523253Smec {
2533253Smec int index;
2543253Smec struct physmem_proc_hash *proc_hp;
2553253Smec struct physmem_hash *php;
2563253Smec
2573253Smec ASSERT(rw_lock_held(&pph_rwlock));
2583253Smec
2593253Smec index = PHYSMEM_HASH(curproc);
2603253Smec proc_hp = pph[index];
2613253Smec while (proc_hp != NULL) {
2623253Smec if (proc_hp->pph_proc == curproc) {
2633253Smec php = proc_hp->pph_hash;
2643253Smec while (php != NULL) {
2653253Smec if ((uint64_t)(uintptr_t)php == p_cookie) {
2663253Smec return (1);
2673253Smec }
2683253Smec php = php->ph_next;
2693253Smec }
2703253Smec }
2713253Smec proc_hp = proc_hp->pph_next;
2723253Smec }
2733253Smec return (0);
2743253Smec }
2753253Smec
2763253Smec /*
2773253Smec * Remove the given vnode from the pph hash. If it exists in the hash the
2783253Smec * process still has to be around as the vnode is obviously still around and
2793253Smec * since it's a physmem vnode, it must be in the hash.
2803253Smec * If it is not in the hash that must mean that the setup ioctl failed.
2813253Smec * Return 0 in this instance, 1 if it is in the hash.
2823253Smec */
2833253Smec int
physmem_remove_vnode_hash(vnode_t * vp)2843253Smec physmem_remove_vnode_hash(vnode_t *vp)
2853253Smec {
2863253Smec int index;
2873253Smec struct physmem_proc_hash *proc_hp;
2883253Smec struct physmem_hash **phpp;
2893253Smec struct physmem_hash *victim;
2903253Smec
2913253Smec index = PHYSMEM_HASH(curproc);
2923253Smec /* synchronize with the map routine */
2933253Smec rw_enter(&pph_rwlock, RW_WRITER);
2943253Smec proc_hp = pph[index];
2953253Smec while (proc_hp != NULL) {
2963253Smec if (proc_hp->pph_proc == curproc) {
2973253Smec phpp = &proc_hp->pph_hash;
2983253Smec while (*phpp != NULL) {
2993253Smec if ((*phpp)->ph_vnode == vp) {
3003253Smec victim = *phpp;
3013253Smec *phpp = victim->ph_next;
3023253Smec
3033253Smec rw_exit(&pph_rwlock);
3043253Smec kmem_free(victim, sizeof (*victim));
3053253Smec return (1);
3063253Smec }
3073253Smec phpp = &(*phpp)->ph_next;
3083253Smec }
3093253Smec }
3103253Smec proc_hp = proc_hp->pph_next;
3113253Smec }
3123253Smec rw_exit(&pph_rwlock);
3133253Smec
3143253Smec /* not found */
3153253Smec return (0);
3163253Smec }
3173253Smec
3183253Smec int
physmem_setup_vnops()3193253Smec physmem_setup_vnops()
3203253Smec {
3213253Smec int error;
3223253Smec char *name = "physmem";
3233253Smec if (physmem_vnodeops != NULL)
3243253Smec cmn_err(CE_PANIC, "physmem vnodeops already set\n");
3253253Smec error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
3263253Smec if (error != 0) {
3273253Smec cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
3283253Smec }
3293253Smec return (error);
3303253Smec }
3313253Smec
3323253Smec /*
3333253Smec * The guts of the PHYSMEM_SETUP ioctl.
3343253Smec * Create a segment in the address space with the specified parameters.
3353253Smec * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
3365331Samw * We do not do bounds checking on the requested physical addresses, if they
3373253Smec * do not exist in the system, they will not be mappable.
3383253Smec * Returns 0 on success with the following error codes on failure:
3393253Smec * ENOMEM - The VA range requested was already mapped if pspp->user_va is
3403253Smec * non-NULL or the system was unable to find enough VA space for
3413253Smec * the desired length if user_va was NULL>
3423253Smec * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
3433253Smec */
3443253Smec int
physmem_setup_addrs(struct physmem_setup_param * pspp)3453253Smec physmem_setup_addrs(struct physmem_setup_param *pspp)
3463253Smec {
3473253Smec struct as *as = curproc->p_as;
3483253Smec struct segvn_crargs vn_a;
3493253Smec int ret = 0;
3503253Smec uint64_t base_pa;
3513253Smec size_t len;
3523253Smec caddr_t uvaddr;
3533253Smec struct vnode *vp;
3543253Smec struct physmem_hash *php;
3553253Smec
3563253Smec ASSERT(pspp != NULL);
3573253Smec base_pa = pspp->req_paddr;
3583253Smec len = pspp->len;
3593253Smec uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
3603253Smec
3613253Smec /* Sanity checking */
3623253Smec if (!IS_P2ALIGNED(base_pa, PAGESIZE))
3633253Smec return (EINVAL);
3643253Smec if (!IS_P2ALIGNED(len, PAGESIZE))
3653253Smec return (EINVAL);
3663253Smec if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
3673253Smec return (EINVAL);
3683253Smec
3693253Smec php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
3703253Smec
3713253Smec /* Need to bump vnode count so that the driver can not be unloaded */
3723253Smec mutex_enter(&physmem_mutex);
3733253Smec physmem_vnodecnt++;
3743253Smec mutex_exit(&physmem_mutex);
3753253Smec
3763253Smec vp = vn_alloc(KM_SLEEP);
3773253Smec ASSERT(vp != NULL); /* SLEEP can't return NULL */
3783253Smec vn_setops(vp, physmem_vnodeops);
3793253Smec
3803253Smec php->ph_vnode = vp;
3813253Smec
3823253Smec vn_a.vp = vp;
3833253Smec vn_a.offset = (u_offset_t)base_pa;
3843253Smec vn_a.type = MAP_SHARED;
3853253Smec vn_a.prot = PROT_ALL;
3863253Smec vn_a.maxprot = PROT_ALL;
3873253Smec vn_a.flags = 0;
3883253Smec vn_a.cred = NULL;
3893253Smec vn_a.amp = NULL;
3903253Smec vn_a.szc = 0;
3913253Smec vn_a.lgrp_mem_policy_flags = 0;
3923253Smec
3933253Smec as_rangelock(as);
3943253Smec if (uvaddr != NULL) {
3953253Smec if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
3963253Smec ret = ENOMEM;
3973253Smec fail:
3983253Smec as_rangeunlock(as);
3993253Smec vn_free(vp);
4003253Smec kmem_free(php, sizeof (*php));
4013253Smec mutex_enter(&physmem_mutex);
4023253Smec physmem_vnodecnt--;
4033253Smec mutex_exit(&physmem_mutex);
4043253Smec return (ret);
4053253Smec }
4063253Smec } else {
4073253Smec /* We pick the address for the user */
4083253Smec map_addr(&uvaddr, len, 0, 1, 0);
4093253Smec if (uvaddr == NULL) {
4103253Smec ret = ENOMEM;
4113253Smec goto fail;
4123253Smec }
4133253Smec }
4143253Smec ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
4153253Smec
4163253Smec if (ret == 0) {
4173616Smec as_rangeunlock(as);
4183253Smec php->ph_base_pa = base_pa;
4193253Smec php->ph_base_va = uvaddr;
4203253Smec php->ph_seg_len = len;
4213253Smec pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
4223253Smec pspp->cookie = (uint64_t)(uintptr_t)php;
4233253Smec ret = physmem_add_hash(php);
4243253Smec if (ret == 0)
4253253Smec return (0);
4263616Smec
4273616Smec /* Note that the call to as_unmap will free the vnode */
4283253Smec (void) as_unmap(as, uvaddr, len);
4293616Smec kmem_free(php, sizeof (*php));
4303253Smec return (ret);
4313253Smec }
4323253Smec
4333253Smec goto fail;
4343253Smec /*NOTREACHED*/
4353253Smec }
4363253Smec
4373253Smec /*
4383253Smec * The guts of the PHYSMEM_MAP ioctl.
4393253Smec * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
4403253Smec * been called for this PA range.
4413253Smec * Returns 0 on success with the following error codes on failure:
4423253Smec * EPERM - The requested page is long term locked, and thus repeated
4433253Smec * requests to allocate this page will likely fail.
4443253Smec * EAGAIN - The requested page could not be allocated, but it is believed
4453253Smec * that future attempts could succeed.
4463253Smec * ENOMEM - There was not enough free memory in the system to safely
4473253Smec * map the requested page.
4483253Smec * EINVAL - The requested paddr was not PAGESIZE aligned or the
4493253Smec * PHYSMEM_SETUP ioctl was not called for this page.
4503253Smec * ENOENT - The requested page was iniside the kernel cage, and the
4513253Smec * PHYSMEM_CAGE flag was not set.
4523253Smec * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
4533253Smec * was not set.
4543253Smec */
4553253Smec static int
physmem_map_addrs(struct physmem_map_param * pmpp)4563253Smec physmem_map_addrs(struct physmem_map_param *pmpp)
4573253Smec {
4583253Smec caddr_t uvaddr;
4593253Smec page_t *pp;
4603253Smec uint64_t req_paddr;
4613253Smec struct vnode *vp;
4623253Smec int ret = 0;
4633253Smec struct physmem_hash *php;
4643253Smec uint_t flags = 0;
4653253Smec
4663253Smec ASSERT(pmpp != NULL);
4673253Smec req_paddr = pmpp->req_paddr;
4683253Smec
4693253Smec if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
4703253Smec return (EINVAL);
4713253Smec /* Find the vnode for this map request */
4723253Smec rw_enter(&pph_rwlock, RW_READER);
4733253Smec php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
4743253Smec if (php == NULL) {
4753253Smec rw_exit(&pph_rwlock);
4763253Smec return (EINVAL);
4773253Smec }
4783253Smec vp = php->ph_vnode;
4793253Smec uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
4803253Smec rw_exit(&pph_rwlock);
4813253Smec
4823253Smec pp = page_numtopp_nolock(btop((size_t)req_paddr));
4833253Smec if (pp == NULL) {
4843253Smec pmpp->ret_va = NULL;
4853253Smec return (EPERM);
4863253Smec }
4873253Smec
4883253Smec /*
4893253Smec * Check to see if page already mapped correctly. This can happen
4903253Smec * when we failed to capture a page previously and it was captured
4913253Smec * asynchronously for us. Return success in this case.
4923253Smec */
4933253Smec if (pp->p_vnode == vp) {
4943253Smec ASSERT(pp->p_offset == (u_offset_t)req_paddr);
4953253Smec pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
4963253Smec return (0);
4973253Smec }
4983253Smec
4993253Smec /*
5003253Smec * physmem should be responsible for checking for cage
5013253Smec * and prom pages.
5023253Smec */
5033253Smec if (pmpp->flags & PHYSMEM_CAGE)
5043253Smec flags = CAPTURE_GET_CAGE;
5053253Smec if (pmpp->flags & PHYSMEM_RETIRED)
5063253Smec flags |= CAPTURE_GET_RETIRED;
5073253Smec
5083253Smec ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
5093253Smec
5103253Smec if (ret != 0) {
5113253Smec pmpp->ret_va = NULL;
5123253Smec return (ret);
5133253Smec } else {
5143253Smec pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
5153253Smec return (0);
5163253Smec }
5173253Smec }
5183253Smec
5193253Smec /*
5203253Smec * Map the given page into the process's address space if possible.
5213253Smec * We actually only hash the page in on the correct vnode as the page
5223253Smec * will be mapped via segvn_pagefault.
5233253Smec * returns 0 on success
5243253Smec * returns 1 if there is no need to map this page anymore (process exited)
5253253Smec * returns -1 if we failed to map the page.
5263253Smec */
5273253Smec int
map_page_proc(page_t * pp,void * arg,uint_t flags)5283253Smec map_page_proc(page_t *pp, void *arg, uint_t flags)
5293253Smec {
5303253Smec struct vnode *vp;
5313253Smec proc_t *procp = (proc_t *)arg;
5323253Smec int ret;
5333253Smec u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
5343253Smec struct physmem_hash *php;
5353253Smec
5363253Smec ASSERT(pp != NULL);
5373253Smec
5383253Smec /*
5393253Smec * Check against availrmem to make sure that we're not low on memory.
5403253Smec * We check again here as ASYNC requests do not do this check elsewhere.
5413253Smec * We return 1 as we don't want the page to have the PR_CAPTURE bit
5423253Smec * set or be on the page capture hash.
5433253Smec */
5443253Smec if (swapfs_minfree > availrmem + 1) {
5453253Smec page_free(pp, 1);
5463253Smec return (1);
5473253Smec }
5483253Smec
5493253Smec /*
5503253Smec * If this is an asynchronous request for the current process,
5513253Smec * we can not map the page as it's possible that we are also in the
5523253Smec * process of unmapping the page which could result in a deadlock
5533253Smec * with the as lock.
5543253Smec */
5553253Smec if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
5563253Smec page_free(pp, 1);
5573253Smec return (-1);
5583253Smec }
5593253Smec
5603253Smec /* only return zeroed out pages */
5613253Smec pagezero(pp, 0, PAGESIZE);
5623253Smec
5633253Smec rw_enter(&pph_rwlock, RW_READER);
5643253Smec php = physmem_get_hash(paddr, PAGESIZE, procp);
5653253Smec if (php == NULL) {
5663253Smec rw_exit(&pph_rwlock);
5673253Smec /*
5683253Smec * Free the page as there is no longer a valid outstanding
5693253Smec * request for this page.
5703253Smec */
5713253Smec page_free(pp, 1);
5723253Smec return (1);
5733253Smec }
5743253Smec
5753253Smec vp = php->ph_vnode;
5763253Smec
5773253Smec /*
5783253Smec * We need to protect against a possible deadlock here where we own
5793253Smec * the vnode page hash mutex and want to acquire it again as there
5803253Smec * are locations in the code, where we unlock a page while holding
5813253Smec * the mutex which can lead to the page being captured and eventually
5823253Smec * end up here.
5833253Smec */
5843253Smec if (mutex_owned(page_vnode_mutex(vp))) {
5853253Smec rw_exit(&pph_rwlock);
5863253Smec page_free(pp, 1);
5873253Smec return (-1);
5883253Smec }
5893253Smec
5903253Smec ret = page_hashin(pp, vp, paddr, NULL);
5913253Smec rw_exit(&pph_rwlock);
5923253Smec if (ret == 0) {
5933253Smec page_free(pp, 1);
5943253Smec return (-1);
5953253Smec }
5963253Smec
5973253Smec page_downgrade(pp);
5983253Smec
5993253Smec mutex_enter(&freemem_lock);
6003253Smec availrmem--;
6013253Smec mutex_exit(&freemem_lock);
6023253Smec
6033253Smec return (0);
6043253Smec }
6053253Smec
6063253Smec /*
6073253Smec * The guts of the PHYSMEM_DESTROY ioctl.
6083253Smec * The cookie passed in will provide all of the information needed to
6093253Smec * free up the address space and physical memory associated with the
6103253Smec * corresponding PHSYMEM_SETUP ioctl.
6113253Smec * Returns 0 on success with the following error codes on failure:
6123253Smec * EINVAL - The cookie supplied is not valid.
6133253Smec */
6143253Smec int
physmem_destroy_addrs(uint64_t p_cookie)6153253Smec physmem_destroy_addrs(uint64_t p_cookie)
6163253Smec {
6173253Smec struct as *as = curproc->p_as;
6183253Smec size_t len;
6193253Smec caddr_t uvaddr;
6203253Smec
6213253Smec rw_enter(&pph_rwlock, RW_READER);
6223253Smec if (physmem_validate_cookie(p_cookie) == 0) {
6233253Smec rw_exit(&pph_rwlock);
6243253Smec return (EINVAL);
6253253Smec }
6263253Smec
6273253Smec len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
6283253Smec uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
6293253Smec rw_exit(&pph_rwlock);
6303253Smec
6313253Smec (void) as_unmap(as, uvaddr, len);
6323253Smec
6333253Smec return (0);
6343253Smec }
6353253Smec
6363253Smec /*
6373253Smec * If the page has been hashed into the physmem vnode, then just look it up
6383253Smec * and return it via pl, otherwise return ENOMEM as the map ioctl has not
6393253Smec * succeeded on the given page.
6403253Smec */
6413253Smec /*ARGSUSED*/
6423253Smec static int
physmem_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)6433253Smec physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
6443253Smec page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
6455331Samw struct cred *cr, caller_context_t *ct)
6463253Smec {
6473253Smec page_t *pp;
6483253Smec
6493253Smec ASSERT(len == PAGESIZE);
6503253Smec ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock));
6513253Smec
6523253Smec /*
6533253Smec * If the page is in the hash, then we successfully claimed this
6543253Smec * page earlier, so return it to the caller.
6553253Smec */
6563253Smec pp = page_lookup(vp, off, SE_SHARED);
6573253Smec if (pp != NULL) {
6583253Smec pl[0] = pp;
6593253Smec pl[1] = NULL;
6603253Smec *protp = PROT_ALL;
6613253Smec return (0);
6623253Smec }
6633253Smec return (ENOMEM);
6643253Smec }
6653253Smec
6663253Smec /*
6673253Smec * We can not allow a process mapping /dev/physmem pages to fork as there can
6683253Smec * only be a single mapping to a /dev/physmem page at a given time. Thus, the
6693253Smec * return of EINVAL when we are not working on our own address space.
6703253Smec * Otherwise we return zero as this function is required for normal operation.
6713253Smec */
6723253Smec /*ARGSUSED*/
6733253Smec static int
physmem_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)6743253Smec physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
6753253Smec caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
6765331Samw struct cred *cred, caller_context_t *ct)
6773253Smec {
6783253Smec if (curproc->p_as != as) {
6793253Smec return (EINVAL);
6803253Smec }
6813253Smec return (0);
6823253Smec }
6833253Smec
6843253Smec /* Will always get called for removing a whole segment. */
6853253Smec /*ARGSUSED*/
6863253Smec static int
physmem_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)6873253Smec physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
6883253Smec caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
6895331Samw struct cred *cred, caller_context_t *ct)
6903253Smec {
6913253Smec /*
6923253Smec * Release our hold on the vnode so that the final VN_RELE will
6933253Smec * call physmem_inactive to clean things up.
6943253Smec */
6953253Smec VN_RELE(vp);
6963253Smec
6973253Smec return (0);
6983253Smec }
6993253Smec
7003253Smec /*
7013253Smec * Clean up all the pages belonging to this vnode and then free it.
7023253Smec */
7033253Smec /*ARGSUSED*/
7043253Smec static void
physmem_inactive(vnode_t * vp,cred_t * crp,caller_context_t * ct)7055331Samw physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
7063253Smec {
7073253Smec page_t *pp;
7083253Smec
7093253Smec /*
7103253Smec * Remove the vnode from the hash now, to prevent asynchronous
7113253Smec * attempts to map into this vnode. This avoids a deadlock
7123253Smec * where two threads try to get into this logic at the same
7133253Smec * time and try to map the pages they are destroying into the
7143253Smec * other's address space.
7153253Smec * If it's not in the hash, just free it.
7163253Smec */
7173253Smec if (physmem_remove_vnode_hash(vp) == 0) {
7183253Smec ASSERT(vp->v_pages == NULL);
7193253Smec vn_free(vp);
7203253Smec physmem_remove_hash_proc();
7213253Smec mutex_enter(&physmem_mutex);
7223253Smec physmem_vnodecnt--;
7233253Smec mutex_exit(&physmem_mutex);
7243253Smec return;
7253253Smec }
7263253Smec
7273253Smec /*
7283253Smec * At this point in time, no other logic can be adding or removing
7293253Smec * pages from the vnode, otherwise the v_pages list could be inaccurate.
7303253Smec */
7313253Smec
7323253Smec while ((pp = vp->v_pages) != NULL) {
7333253Smec page_t *rpp;
7343253Smec if (page_tryupgrade(pp)) {
7353253Smec /*
7363253Smec * set lckcnt for page_destroy to do availrmem
7373253Smec * accounting
7383253Smec */
7393253Smec pp->p_lckcnt = 1;
7403253Smec page_destroy(pp, 0);
7413253Smec } else {
7423253Smec /* failure to lock should be transient */
7433253Smec rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
7443253Smec if (rpp != pp) {
7453253Smec page_unlock(rpp);
7463253Smec continue;
7473253Smec }
7483253Smec page_unlock(pp);
7493253Smec }
7503253Smec }
7513253Smec vn_free(vp);
7523253Smec physmem_remove_hash_proc();
7533253Smec mutex_enter(&physmem_mutex);
7543253Smec physmem_vnodecnt--;
7553253Smec mutex_exit(&physmem_mutex);
7563253Smec }
7573253Smec
7583253Smec /*ARGSUSED*/
7593253Smec static int
physmem_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)7603253Smec physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7613253Smec int *rvalp)
7623253Smec {
7633253Smec int ret;
7643253Smec
7653253Smec switch (cmd) {
7663253Smec case PHYSMEM_SETUP:
7673253Smec {
7683253Smec struct physmem_setup_param psp;
7693253Smec if (ddi_copyin((void *)arg, &psp,
7703253Smec sizeof (struct physmem_setup_param), 0))
7713253Smec return (EFAULT);
7723253Smec ret = physmem_setup_addrs(&psp);
7733253Smec if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
7743253Smec return (EFAULT);
7753253Smec }
7763253Smec break;
7773253Smec case PHYSMEM_MAP:
7783253Smec {
7793253Smec struct physmem_map_param pmp;
7803253Smec if (ddi_copyin((void *)arg, &pmp,
7813253Smec sizeof (struct physmem_map_param), 0))
7823253Smec return (EFAULT);
7833253Smec ret = physmem_map_addrs(&pmp);
7843253Smec if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
7853253Smec return (EFAULT);
7863253Smec }
7873253Smec break;
7883253Smec case PHYSMEM_DESTROY:
7893253Smec {
7903253Smec uint64_t cookie;
7913253Smec if (ddi_copyin((void *)arg, &cookie,
7923253Smec sizeof (uint64_t), 0))
7933253Smec return (EFAULT);
7943253Smec ret = physmem_destroy_addrs(cookie);
7953253Smec }
7963253Smec break;
7973253Smec default:
7983253Smec return (ENOTSUP);
7993253Smec }
8003253Smec return (ret);
8013253Smec }
8023253Smec
8033253Smec /*ARGSUSED*/
8043253Smec static int
physmem_open(dev_t * devp,int flag,int otyp,cred_t * credp)8053253Smec physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
8063253Smec {
8073253Smec int ret;
8083253Smec static int msg_printed = 0;
8093253Smec
8103253Smec if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
8113253Smec return (EINVAL);
8123253Smec }
8133253Smec
8143253Smec /* need to make sure we have the right privileges */
8153253Smec if ((ret = secpolicy_resource(credp)) != 0)
8163253Smec return (ret);
8173253Smec if ((ret = secpolicy_lock_memory(credp)) != 0)
8183253Smec return (ret);
8193253Smec
8203253Smec if (msg_printed == 0) {
8213253Smec cmn_err(CE_NOTE, "!driver has been opened. This driver may "
8223253Smec "take out long term locks on pages which may impact "
8233253Smec "dynamic reconfiguration events");
8243253Smec msg_printed = 1;
8253253Smec }
8263253Smec
8273253Smec return (0);
8283253Smec }
8293253Smec
8303253Smec /*ARGSUSED*/
8313253Smec static int
physmem_close(dev_t dev,int flag,int otyp,cred_t * credp)8323253Smec physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
8333253Smec {
8343253Smec return (0);
8353253Smec }
8363253Smec
8373253Smec /*ARGSUSED*/
8383253Smec static int
physmem_getinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** resultp)8393253Smec physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
8403253Smec void *arg, void **resultp)
8413253Smec {
8423253Smec switch (infocmd) {
8433253Smec case DDI_INFO_DEVT2DEVINFO:
8443253Smec *resultp = physmem_dip;
8453253Smec return (DDI_SUCCESS);
8463253Smec
8473253Smec case DDI_INFO_DEVT2INSTANCE:
8483253Smec *resultp = (void *)(ulong_t)getminor((dev_t)arg);
8493253Smec return (DDI_SUCCESS);
8503253Smec
8513253Smec default:
8523253Smec return (DDI_FAILURE);
8533253Smec }
8543253Smec }
8553253Smec
8563253Smec static int
physmem_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)8573253Smec physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
8583253Smec {
8593253Smec int i;
8603253Smec
8613253Smec if (cmd == DDI_RESUME) {
8623253Smec return (DDI_SUCCESS);
8633253Smec }
8643253Smec
8653253Smec if (cmd != DDI_ATTACH)
8663253Smec return (DDI_FAILURE);
8673253Smec
8683253Smec if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
8693253Smec ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
8703253Smec return (DDI_FAILURE);
8713253Smec
8723253Smec physmem_dip = dip;
8733253Smec
8743253Smec /* Initialize driver specific data */
8753253Smec if (physmem_setup_vnops()) {
8763253Smec ddi_remove_minor_node(dip, ddi_get_name(dip));
8773253Smec return (DDI_FAILURE);
8783253Smec }
8793253Smec
8803253Smec for (i = 0; i < PPH_SIZE; i++)
8813253Smec pph[i] = NULL;
8823253Smec
8833253Smec page_capture_register_callback(PC_PHYSMEM, 10000,
8843253Smec map_page_proc);
8853253Smec
8863253Smec return (DDI_SUCCESS);
8873253Smec }
8883253Smec
8893253Smec static int
physmem_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)8903253Smec physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
8913253Smec {
8923253Smec int ret = DDI_SUCCESS;
8933253Smec
8943253Smec if (cmd == DDI_SUSPEND) {
8953253Smec return (DDI_SUCCESS);
8963253Smec }
8973253Smec
8983253Smec if (cmd != DDI_DETACH)
8993253Smec return (DDI_FAILURE);
9003253Smec
9013253Smec ASSERT(physmem_dip == dip);
9023253Smec
9033253Smec mutex_enter(&physmem_mutex);
9043253Smec if (physmem_vnodecnt == 0) {
9053253Smec if (physmem_vnodeops != NULL) {
9063253Smec vn_freevnodeops(physmem_vnodeops);
9073253Smec physmem_vnodeops = NULL;
9083253Smec page_capture_unregister_callback(PC_PHYSMEM);
9093253Smec }
9103253Smec } else {
9113253Smec ret = EBUSY;
9123253Smec }
9133253Smec mutex_exit(&physmem_mutex);
9143253Smec if (ret == DDI_SUCCESS)
9153253Smec ddi_remove_minor_node(dip, ddi_get_name(dip));
9163253Smec return (ret);
9173253Smec }
9183253Smec
9193253Smec static struct cb_ops physmem_cb_ops = {
9203253Smec physmem_open, /* open */
9213253Smec physmem_close, /* close */
9223253Smec nodev, /* strategy */
9233253Smec nodev, /* print */
9243253Smec nodev, /* dump */
9253253Smec nodev, /* read */
9263253Smec nodev, /* write */
9273253Smec physmem_ioctl, /* ioctl */
9283253Smec nodev, /* devmap */
9293253Smec nodev, /* mmap */
9303253Smec nodev, /* segmap */
9313253Smec nochpoll, /* chpoll */
9323253Smec ddi_prop_op, /* prop_op */
9333253Smec NULL, /* cb_str */
9343253Smec D_NEW | D_MP | D_DEVMAP,
9353253Smec CB_REV,
9363253Smec NULL,
9373253Smec NULL
9383253Smec };
9393253Smec
9403253Smec static struct dev_ops physmem_ops = {
9413253Smec DEVO_REV,
9423253Smec 0,
9433253Smec physmem_getinfo,
9443253Smec nulldev,
9453253Smec nulldev,
9463253Smec physmem_attach,
9473253Smec physmem_detach,
9483253Smec nodev,
9493253Smec &physmem_cb_ops,
9503253Smec NULL,
951*7656SSherry.Moore@Sun.COM NULL,
952*7656SSherry.Moore@Sun.COM ddi_quiesce_not_needed, /* quiesce */
9533253Smec };
9543253Smec
9553253Smec static struct modldrv modldrv = {
9563253Smec &mod_driverops,
957*7656SSherry.Moore@Sun.COM "physmem driver",
9583253Smec &physmem_ops
9593253Smec };
9603253Smec
9613253Smec static struct modlinkage modlinkage = {
9623253Smec MODREV_1,
9633253Smec &modldrv,
9643253Smec NULL
9653253Smec };
9663253Smec
9673253Smec int
_init(void)9683253Smec _init(void)
9693253Smec {
9703253Smec return (mod_install(&modlinkage));
9713253Smec }
9723253Smec
9733253Smec int
_info(struct modinfo * modinfop)9743253Smec _info(struct modinfo *modinfop)
9753253Smec {
9763253Smec return (mod_info(&modlinkage, modinfop));
9773253Smec }
9783253Smec
9793253Smec int
_fini(void)9803253Smec _fini(void)
9813253Smec {
9823253Smec return (mod_remove(&modlinkage));
9833253Smec }
984