13253Smec /* 23253Smec * CDDL HEADER START 33253Smec * 43253Smec * The contents of this file are subject to the terms of the 53253Smec * Common Development and Distribution License (the "License"). 63253Smec * You may not use this file except in compliance with the License. 73253Smec * 83253Smec * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93253Smec * or http://www.opensolaris.org/os/licensing. 103253Smec * See the License for the specific language governing permissions 113253Smec * and limitations under the License. 123253Smec * 133253Smec * When distributing Covered Code, include this CDDL HEADER in each 143253Smec * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153253Smec * If applicable, add the following below this CDDL HEADER, with the 163253Smec * fields enclosed by brackets "[]" replaced with your own identifying 173253Smec * information: Portions Copyright [yyyy] [name of copyright owner] 183253Smec * 193253Smec * CDDL HEADER END 203253Smec */ 213253Smec /* 223616Smec * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 233253Smec * Use is subject to license terms. 243253Smec */ 253253Smec 263253Smec #pragma ident "%Z%%M% %I% %E% SMI" 273253Smec 283253Smec #include <sys/types.h> 293253Smec #include <sys/modctl.h> 303253Smec #include <sys/conf.h> 313253Smec #include <sys/ddi.h> 323253Smec #include <sys/sunddi.h> 333253Smec #include <sys/devops.h> 343253Smec #include <sys/stat.h> 353253Smec #include <sys/file.h> 363253Smec #include <sys/cred.h> 373253Smec #include <sys/policy.h> 383253Smec #include <sys/errno.h> 393253Smec #include <vm/seg_dev.h> 403253Smec #include <vm/seg_vn.h> 413253Smec #include <vm/page.h> 423253Smec #include <sys/fs/swapnode.h> 433253Smec #include <sys/sysmacros.h> 443253Smec #include <sys/fcntl.h> 453253Smec #include <sys/vmsystm.h> 463253Smec #include <sys/physmem.h> 473898Srsb #include <sys/vfs_opreg.h> 483253Smec 493253Smec static dev_info_t *physmem_dip = NULL; 503253Smec 513253Smec /* 523253Smec * Linked list element hanging off physmem_proc_hash below, which holds all 533253Smec * the information for a given segment which has been setup for this process. 543253Smec * This is a simple linked list as we are assuming that for a given process 553253Smec * the setup ioctl will only be called a handful of times. If this assumption 563253Smec * changes in the future, a quicker to traverse data structure should be used. 573253Smec */ 583253Smec struct physmem_hash { 593253Smec struct physmem_hash *ph_next; 603253Smec uint64_t ph_base_pa; 613253Smec caddr_t ph_base_va; 623253Smec size_t ph_seg_len; 633253Smec struct vnode *ph_vnode; 643253Smec }; 653253Smec 663253Smec /* 673253Smec * Hash of all of the processes which have setup mappings with the driver with 683253Smec * pointers to per process data. 693253Smec */ 703253Smec struct physmem_proc_hash { 713253Smec struct proc *pph_proc; 723253Smec struct physmem_hash *pph_hash; 733253Smec struct physmem_proc_hash *pph_next; 743253Smec }; 753253Smec 763253Smec 773253Smec /* Needs to be a power of two for simple hash algorithm */ 783253Smec #define PPH_SIZE 8 793253Smec struct physmem_proc_hash *pph[PPH_SIZE]; 803253Smec 813253Smec /* 823253Smec * Lock which protects the pph hash above. To add an element (either a new 833253Smec * process or a new segment) the WRITE lock must be held. To traverse the 843253Smec * list, only a READ lock is needed. 853253Smec */ 863253Smec krwlock_t pph_rwlock; 873253Smec 883253Smec #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1))) 893253Smec 903253Smec /* 913253Smec * Need to keep a reference count of how many processes have the driver 923253Smec * open to prevent it from disappearing. 933253Smec */ 943253Smec uint64_t physmem_vnodecnt; 953253Smec kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */ 963253Smec 973253Smec static int physmem_getpage(struct vnode *vp, offset_t off, size_t len, 983253Smec uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 99*5331Samw enum seg_rw rw, struct cred *cr, caller_context_t *ct); 1003253Smec 1013253Smec static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 1023253Smec caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 103*5331Samw struct cred *cred, caller_context_t *ct); 1043253Smec 1053253Smec static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 1063253Smec caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 107*5331Samw struct cred *cred, caller_context_t *ct); 1083253Smec 109*5331Samw static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct); 1103253Smec 1113253Smec const fs_operation_def_t physmem_vnodeops_template[] = { 1123898Srsb VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage }, 1133898Srsb VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap }, 1143898Srsb VOPNAME_DELMAP, { .vop_delmap = physmem_delmap }, 1153898Srsb VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive }, 1163898Srsb NULL, NULL 1173253Smec }; 1183253Smec 1193253Smec vnodeops_t *physmem_vnodeops = NULL; 1203253Smec 1213253Smec /* 1223253Smec * Removes the current process from the hash if the process has no more 1233253Smec * physmem segments active. 1243253Smec */ 1253253Smec void 1263253Smec physmem_remove_hash_proc() 1273253Smec { 1283253Smec int index; 1293253Smec struct physmem_proc_hash **walker; 1303253Smec struct physmem_proc_hash *victim = NULL; 1313253Smec 1323253Smec index = PHYSMEM_HASH(curproc); 1333253Smec rw_enter(&pph_rwlock, RW_WRITER); 1343253Smec walker = &pph[index]; 1353253Smec while (*walker != NULL) { 1363253Smec if ((*walker)->pph_proc == curproc && 1373253Smec (*walker)->pph_hash == NULL) { 1383253Smec victim = *walker; 1393253Smec *walker = victim->pph_next; 1403253Smec break; 1413253Smec } 1423253Smec walker = &((*walker)->pph_next); 1433253Smec } 1443253Smec rw_exit(&pph_rwlock); 1453253Smec if (victim != NULL) 1463253Smec kmem_free(victim, sizeof (struct physmem_proc_hash)); 1473253Smec } 1483253Smec 1493253Smec /* 1503253Smec * Add a new entry to the hash for the given process to cache the 1513253Smec * address ranges that it is working on. If this is the first hash 1523253Smec * item to be added for this process, we will create the head pointer 1533253Smec * for this process. 1543253Smec * Returns 0 on success, ERANGE when the physical address is already in the 1553616Smec * hash. 1563253Smec */ 1573253Smec int 1583253Smec physmem_add_hash(struct physmem_hash *php) 1593253Smec { 1603253Smec int index; 1613253Smec struct physmem_proc_hash *iterator; 1623253Smec struct physmem_proc_hash *newp = NULL; 1633253Smec struct physmem_hash *temp; 1643253Smec int ret = 0; 1653253Smec 1663253Smec index = PHYSMEM_HASH(curproc); 1673253Smec 1683253Smec insert: 1693253Smec rw_enter(&pph_rwlock, RW_WRITER); 1703253Smec iterator = pph[index]; 1713253Smec while (iterator != NULL) { 1723253Smec if (iterator->pph_proc == curproc) { 1733253Smec /* 1743253Smec * check to make sure a single process does not try to 1753253Smec * map the same region twice. 1763253Smec */ 1773253Smec for (temp = iterator->pph_hash; temp != NULL; 1783253Smec temp = temp->ph_next) { 1793253Smec if ((php->ph_base_pa >= temp->ph_base_pa && 1803253Smec php->ph_base_pa < temp->ph_base_pa + 1813253Smec temp->ph_seg_len) || 1823253Smec (temp->ph_base_pa >= php->ph_base_pa && 1833253Smec temp->ph_base_pa < php->ph_base_pa + 1843253Smec php->ph_seg_len)) { 1853253Smec ret = ERANGE; 1863253Smec break; 1873253Smec } 1883253Smec } 1893253Smec if (ret == 0) { 1903253Smec php->ph_next = iterator->pph_hash; 1913253Smec iterator->pph_hash = php; 1923253Smec } 1933253Smec rw_exit(&pph_rwlock); 1943253Smec /* Need to check for two threads in sync */ 1953253Smec if (newp != NULL) 1963253Smec kmem_free(newp, sizeof (*newp)); 1973253Smec return (ret); 1983253Smec } 1993253Smec iterator = iterator->pph_next; 2003253Smec } 2013253Smec 2023253Smec if (newp != NULL) { 2033253Smec newp->pph_proc = curproc; 2043253Smec newp->pph_next = pph[index]; 2053253Smec newp->pph_hash = php; 2063253Smec php->ph_next = NULL; 2073253Smec pph[index] = newp; 2083253Smec rw_exit(&pph_rwlock); 2093253Smec return (0); 2103253Smec } 2113253Smec 2123253Smec rw_exit(&pph_rwlock); 2133253Smec /* Dropped the lock so we could use KM_SLEEP */ 2143253Smec newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP); 2153253Smec goto insert; 2163253Smec } 2173253Smec 2183253Smec /* 2193253Smec * Will return the pointer to the physmem_hash struct if the setup routine 2203253Smec * has previously been called for this memory. 2213253Smec * Returns NULL on failure. 2223253Smec */ 2233253Smec struct physmem_hash * 2243253Smec physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp) 2253253Smec { 2263253Smec int index; 2273253Smec struct physmem_proc_hash *proc_hp; 2283253Smec struct physmem_hash *php; 2293253Smec 2303253Smec ASSERT(rw_lock_held(&pph_rwlock)); 2313253Smec 2323253Smec index = PHYSMEM_HASH(procp); 2333253Smec proc_hp = pph[index]; 2343253Smec while (proc_hp != NULL) { 2353253Smec if (proc_hp->pph_proc == procp) { 2363253Smec php = proc_hp->pph_hash; 2373253Smec while (php != NULL) { 2383253Smec if ((req_paddr >= php->ph_base_pa) && 2393253Smec (req_paddr + len <= 2403253Smec php->ph_base_pa + php->ph_seg_len)) { 2413253Smec return (php); 2423253Smec } 2433253Smec php = php->ph_next; 2443253Smec } 2453253Smec } 2463253Smec proc_hp = proc_hp->pph_next; 2473253Smec } 2483253Smec return (NULL); 2493253Smec } 2503253Smec 2513253Smec int 2523253Smec physmem_validate_cookie(uint64_t p_cookie) 2533253Smec { 2543253Smec int index; 2553253Smec struct physmem_proc_hash *proc_hp; 2563253Smec struct physmem_hash *php; 2573253Smec 2583253Smec ASSERT(rw_lock_held(&pph_rwlock)); 2593253Smec 2603253Smec index = PHYSMEM_HASH(curproc); 2613253Smec proc_hp = pph[index]; 2623253Smec while (proc_hp != NULL) { 2633253Smec if (proc_hp->pph_proc == curproc) { 2643253Smec php = proc_hp->pph_hash; 2653253Smec while (php != NULL) { 2663253Smec if ((uint64_t)(uintptr_t)php == p_cookie) { 2673253Smec return (1); 2683253Smec } 2693253Smec php = php->ph_next; 2703253Smec } 2713253Smec } 2723253Smec proc_hp = proc_hp->pph_next; 2733253Smec } 2743253Smec return (0); 2753253Smec } 2763253Smec 2773253Smec /* 2783253Smec * Remove the given vnode from the pph hash. If it exists in the hash the 2793253Smec * process still has to be around as the vnode is obviously still around and 2803253Smec * since it's a physmem vnode, it must be in the hash. 2813253Smec * If it is not in the hash that must mean that the setup ioctl failed. 2823253Smec * Return 0 in this instance, 1 if it is in the hash. 2833253Smec */ 2843253Smec int 2853253Smec physmem_remove_vnode_hash(vnode_t *vp) 2863253Smec { 2873253Smec int index; 2883253Smec struct physmem_proc_hash *proc_hp; 2893253Smec struct physmem_hash **phpp; 2903253Smec struct physmem_hash *victim; 2913253Smec 2923253Smec index = PHYSMEM_HASH(curproc); 2933253Smec /* synchronize with the map routine */ 2943253Smec rw_enter(&pph_rwlock, RW_WRITER); 2953253Smec proc_hp = pph[index]; 2963253Smec while (proc_hp != NULL) { 2973253Smec if (proc_hp->pph_proc == curproc) { 2983253Smec phpp = &proc_hp->pph_hash; 2993253Smec while (*phpp != NULL) { 3003253Smec if ((*phpp)->ph_vnode == vp) { 3013253Smec victim = *phpp; 3023253Smec *phpp = victim->ph_next; 3033253Smec 3043253Smec rw_exit(&pph_rwlock); 3053253Smec kmem_free(victim, sizeof (*victim)); 3063253Smec return (1); 3073253Smec } 3083253Smec phpp = &(*phpp)->ph_next; 3093253Smec } 3103253Smec } 3113253Smec proc_hp = proc_hp->pph_next; 3123253Smec } 3133253Smec rw_exit(&pph_rwlock); 3143253Smec 3153253Smec /* not found */ 3163253Smec return (0); 3173253Smec } 3183253Smec 3193253Smec int 3203253Smec physmem_setup_vnops() 3213253Smec { 3223253Smec int error; 3233253Smec char *name = "physmem"; 3243253Smec if (physmem_vnodeops != NULL) 3253253Smec cmn_err(CE_PANIC, "physmem vnodeops already set\n"); 3263253Smec error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops); 3273253Smec if (error != 0) { 3283253Smec cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template"); 3293253Smec } 3303253Smec return (error); 3313253Smec } 3323253Smec 3333253Smec /* 3343253Smec * The guts of the PHYSMEM_SETUP ioctl. 3353253Smec * Create a segment in the address space with the specified parameters. 3363253Smec * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA. 337*5331Samw * We do not do bounds checking on the requested physical addresses, if they 3383253Smec * do not exist in the system, they will not be mappable. 3393253Smec * Returns 0 on success with the following error codes on failure: 3403253Smec * ENOMEM - The VA range requested was already mapped if pspp->user_va is 3413253Smec * non-NULL or the system was unable to find enough VA space for 3423253Smec * the desired length if user_va was NULL> 3433253Smec * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned. 3443253Smec */ 3453253Smec int 3463253Smec physmem_setup_addrs(struct physmem_setup_param *pspp) 3473253Smec { 3483253Smec struct as *as = curproc->p_as; 3493253Smec struct segvn_crargs vn_a; 3503253Smec int ret = 0; 3513253Smec uint64_t base_pa; 3523253Smec size_t len; 3533253Smec caddr_t uvaddr; 3543253Smec struct vnode *vp; 3553253Smec struct physmem_hash *php; 3563253Smec 3573253Smec ASSERT(pspp != NULL); 3583253Smec base_pa = pspp->req_paddr; 3593253Smec len = pspp->len; 3603253Smec uvaddr = (caddr_t)(uintptr_t)pspp->user_va; 3613253Smec 3623253Smec /* Sanity checking */ 3633253Smec if (!IS_P2ALIGNED(base_pa, PAGESIZE)) 3643253Smec return (EINVAL); 3653253Smec if (!IS_P2ALIGNED(len, PAGESIZE)) 3663253Smec return (EINVAL); 3673253Smec if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE)) 3683253Smec return (EINVAL); 3693253Smec 3703253Smec php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP); 3713253Smec 3723253Smec /* Need to bump vnode count so that the driver can not be unloaded */ 3733253Smec mutex_enter(&physmem_mutex); 3743253Smec physmem_vnodecnt++; 3753253Smec mutex_exit(&physmem_mutex); 3763253Smec 3773253Smec vp = vn_alloc(KM_SLEEP); 3783253Smec ASSERT(vp != NULL); /* SLEEP can't return NULL */ 3793253Smec vn_setops(vp, physmem_vnodeops); 3803253Smec 3813253Smec php->ph_vnode = vp; 3823253Smec 3833253Smec vn_a.vp = vp; 3843253Smec vn_a.offset = (u_offset_t)base_pa; 3853253Smec vn_a.type = MAP_SHARED; 3863253Smec vn_a.prot = PROT_ALL; 3873253Smec vn_a.maxprot = PROT_ALL; 3883253Smec vn_a.flags = 0; 3893253Smec vn_a.cred = NULL; 3903253Smec vn_a.amp = NULL; 3913253Smec vn_a.szc = 0; 3923253Smec vn_a.lgrp_mem_policy_flags = 0; 3933253Smec 3943253Smec as_rangelock(as); 3953253Smec if (uvaddr != NULL) { 3963253Smec if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) { 3973253Smec ret = ENOMEM; 3983253Smec fail: 3993253Smec as_rangeunlock(as); 4003253Smec vn_free(vp); 4013253Smec kmem_free(php, sizeof (*php)); 4023253Smec mutex_enter(&physmem_mutex); 4033253Smec physmem_vnodecnt--; 4043253Smec mutex_exit(&physmem_mutex); 4053253Smec return (ret); 4063253Smec } 4073253Smec } else { 4083253Smec /* We pick the address for the user */ 4093253Smec map_addr(&uvaddr, len, 0, 1, 0); 4103253Smec if (uvaddr == NULL) { 4113253Smec ret = ENOMEM; 4123253Smec goto fail; 4133253Smec } 4143253Smec } 4153253Smec ret = as_map(as, uvaddr, len, segvn_create, &vn_a); 4163253Smec 4173253Smec if (ret == 0) { 4183616Smec as_rangeunlock(as); 4193253Smec php->ph_base_pa = base_pa; 4203253Smec php->ph_base_va = uvaddr; 4213253Smec php->ph_seg_len = len; 4223253Smec pspp->user_va = (uint64_t)(uintptr_t)uvaddr; 4233253Smec pspp->cookie = (uint64_t)(uintptr_t)php; 4243253Smec ret = physmem_add_hash(php); 4253253Smec if (ret == 0) 4263253Smec return (0); 4273616Smec 4283616Smec /* Note that the call to as_unmap will free the vnode */ 4293253Smec (void) as_unmap(as, uvaddr, len); 4303616Smec kmem_free(php, sizeof (*php)); 4313253Smec return (ret); 4323253Smec } 4333253Smec 4343253Smec goto fail; 4353253Smec /*NOTREACHED*/ 4363253Smec } 4373253Smec 4383253Smec /* 4393253Smec * The guts of the PHYSMEM_MAP ioctl. 4403253Smec * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already 4413253Smec * been called for this PA range. 4423253Smec * Returns 0 on success with the following error codes on failure: 4433253Smec * EPERM - The requested page is long term locked, and thus repeated 4443253Smec * requests to allocate this page will likely fail. 4453253Smec * EAGAIN - The requested page could not be allocated, but it is believed 4463253Smec * that future attempts could succeed. 4473253Smec * ENOMEM - There was not enough free memory in the system to safely 4483253Smec * map the requested page. 4493253Smec * EINVAL - The requested paddr was not PAGESIZE aligned or the 4503253Smec * PHYSMEM_SETUP ioctl was not called for this page. 4513253Smec * ENOENT - The requested page was iniside the kernel cage, and the 4523253Smec * PHYSMEM_CAGE flag was not set. 4533253Smec * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag 4543253Smec * was not set. 4553253Smec */ 4563253Smec static int 4573253Smec physmem_map_addrs(struct physmem_map_param *pmpp) 4583253Smec { 4593253Smec caddr_t uvaddr; 4603253Smec page_t *pp; 4613253Smec uint64_t req_paddr; 4623253Smec struct vnode *vp; 4633253Smec int ret = 0; 4643253Smec struct physmem_hash *php; 4653253Smec uint_t flags = 0; 4663253Smec 4673253Smec ASSERT(pmpp != NULL); 4683253Smec req_paddr = pmpp->req_paddr; 4693253Smec 4703253Smec if (!IS_P2ALIGNED(req_paddr, PAGESIZE)) 4713253Smec return (EINVAL); 4723253Smec /* Find the vnode for this map request */ 4733253Smec rw_enter(&pph_rwlock, RW_READER); 4743253Smec php = physmem_get_hash(req_paddr, PAGESIZE, curproc); 4753253Smec if (php == NULL) { 4763253Smec rw_exit(&pph_rwlock); 4773253Smec return (EINVAL); 4783253Smec } 4793253Smec vp = php->ph_vnode; 4803253Smec uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa); 4813253Smec rw_exit(&pph_rwlock); 4823253Smec 4833253Smec pp = page_numtopp_nolock(btop((size_t)req_paddr)); 4843253Smec if (pp == NULL) { 4853253Smec pmpp->ret_va = NULL; 4863253Smec return (EPERM); 4873253Smec } 4883253Smec 4893253Smec /* 4903253Smec * Check to see if page already mapped correctly. This can happen 4913253Smec * when we failed to capture a page previously and it was captured 4923253Smec * asynchronously for us. Return success in this case. 4933253Smec */ 4943253Smec if (pp->p_vnode == vp) { 4953253Smec ASSERT(pp->p_offset == (u_offset_t)req_paddr); 4963253Smec pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 4973253Smec return (0); 4983253Smec } 4993253Smec 5003253Smec /* 5013253Smec * physmem should be responsible for checking for cage 5023253Smec * and prom pages. 5033253Smec */ 5043253Smec if (pmpp->flags & PHYSMEM_CAGE) 5053253Smec flags = CAPTURE_GET_CAGE; 5063253Smec if (pmpp->flags & PHYSMEM_RETIRED) 5073253Smec flags |= CAPTURE_GET_RETIRED; 5083253Smec 5093253Smec ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc); 5103253Smec 5113253Smec if (ret != 0) { 5123253Smec pmpp->ret_va = NULL; 5133253Smec return (ret); 5143253Smec } else { 5153253Smec pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 5163253Smec return (0); 5173253Smec } 5183253Smec } 5193253Smec 5203253Smec /* 5213253Smec * Map the given page into the process's address space if possible. 5223253Smec * We actually only hash the page in on the correct vnode as the page 5233253Smec * will be mapped via segvn_pagefault. 5243253Smec * returns 0 on success 5253253Smec * returns 1 if there is no need to map this page anymore (process exited) 5263253Smec * returns -1 if we failed to map the page. 5273253Smec */ 5283253Smec int 5293253Smec map_page_proc(page_t *pp, void *arg, uint_t flags) 5303253Smec { 5313253Smec struct vnode *vp; 5323253Smec proc_t *procp = (proc_t *)arg; 5333253Smec int ret; 5343253Smec u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum); 5353253Smec struct physmem_hash *php; 5363253Smec 5373253Smec ASSERT(pp != NULL); 5383253Smec 5393253Smec /* 5403253Smec * Check against availrmem to make sure that we're not low on memory. 5413253Smec * We check again here as ASYNC requests do not do this check elsewhere. 5423253Smec * We return 1 as we don't want the page to have the PR_CAPTURE bit 5433253Smec * set or be on the page capture hash. 5443253Smec */ 5453253Smec if (swapfs_minfree > availrmem + 1) { 5463253Smec page_free(pp, 1); 5473253Smec return (1); 5483253Smec } 5493253Smec 5503253Smec /* 5513253Smec * If this is an asynchronous request for the current process, 5523253Smec * we can not map the page as it's possible that we are also in the 5533253Smec * process of unmapping the page which could result in a deadlock 5543253Smec * with the as lock. 5553253Smec */ 5563253Smec if ((flags & CAPTURE_ASYNC) && (curproc == procp)) { 5573253Smec page_free(pp, 1); 5583253Smec return (-1); 5593253Smec } 5603253Smec 5613253Smec /* only return zeroed out pages */ 5623253Smec pagezero(pp, 0, PAGESIZE); 5633253Smec 5643253Smec rw_enter(&pph_rwlock, RW_READER); 5653253Smec php = physmem_get_hash(paddr, PAGESIZE, procp); 5663253Smec if (php == NULL) { 5673253Smec rw_exit(&pph_rwlock); 5683253Smec /* 5693253Smec * Free the page as there is no longer a valid outstanding 5703253Smec * request for this page. 5713253Smec */ 5723253Smec page_free(pp, 1); 5733253Smec return (1); 5743253Smec } 5753253Smec 5763253Smec vp = php->ph_vnode; 5773253Smec 5783253Smec /* 5793253Smec * We need to protect against a possible deadlock here where we own 5803253Smec * the vnode page hash mutex and want to acquire it again as there 5813253Smec * are locations in the code, where we unlock a page while holding 5823253Smec * the mutex which can lead to the page being captured and eventually 5833253Smec * end up here. 5843253Smec */ 5853253Smec if (mutex_owned(page_vnode_mutex(vp))) { 5863253Smec rw_exit(&pph_rwlock); 5873253Smec page_free(pp, 1); 5883253Smec return (-1); 5893253Smec } 5903253Smec 5913253Smec ret = page_hashin(pp, vp, paddr, NULL); 5923253Smec rw_exit(&pph_rwlock); 5933253Smec if (ret == 0) { 5943253Smec page_free(pp, 1); 5953253Smec return (-1); 5963253Smec } 5973253Smec 5983253Smec page_downgrade(pp); 5993253Smec 6003253Smec mutex_enter(&freemem_lock); 6013253Smec availrmem--; 6023253Smec mutex_exit(&freemem_lock); 6033253Smec 6043253Smec return (0); 6053253Smec } 6063253Smec 6073253Smec /* 6083253Smec * The guts of the PHYSMEM_DESTROY ioctl. 6093253Smec * The cookie passed in will provide all of the information needed to 6103253Smec * free up the address space and physical memory associated with the 6113253Smec * corresponding PHSYMEM_SETUP ioctl. 6123253Smec * Returns 0 on success with the following error codes on failure: 6133253Smec * EINVAL - The cookie supplied is not valid. 6143253Smec */ 6153253Smec int 6163253Smec physmem_destroy_addrs(uint64_t p_cookie) 6173253Smec { 6183253Smec struct as *as = curproc->p_as; 6193253Smec size_t len; 6203253Smec caddr_t uvaddr; 6213253Smec 6223253Smec rw_enter(&pph_rwlock, RW_READER); 6233253Smec if (physmem_validate_cookie(p_cookie) == 0) { 6243253Smec rw_exit(&pph_rwlock); 6253253Smec return (EINVAL); 6263253Smec } 6273253Smec 6283253Smec len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len; 6293253Smec uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va; 6303253Smec rw_exit(&pph_rwlock); 6313253Smec 6323253Smec (void) as_unmap(as, uvaddr, len); 6333253Smec 6343253Smec return (0); 6353253Smec } 6363253Smec 6373253Smec /* 6383253Smec * If the page has been hashed into the physmem vnode, then just look it up 6393253Smec * and return it via pl, otherwise return ENOMEM as the map ioctl has not 6403253Smec * succeeded on the given page. 6413253Smec */ 6423253Smec /*ARGSUSED*/ 6433253Smec static int 6443253Smec physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 6453253Smec page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 646*5331Samw struct cred *cr, caller_context_t *ct) 6473253Smec { 6483253Smec page_t *pp; 6493253Smec 6503253Smec ASSERT(len == PAGESIZE); 6513253Smec ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)); 6523253Smec 6533253Smec /* 6543253Smec * If the page is in the hash, then we successfully claimed this 6553253Smec * page earlier, so return it to the caller. 6563253Smec */ 6573253Smec pp = page_lookup(vp, off, SE_SHARED); 6583253Smec if (pp != NULL) { 6593253Smec pl[0] = pp; 6603253Smec pl[1] = NULL; 6613253Smec *protp = PROT_ALL; 6623253Smec return (0); 6633253Smec } 6643253Smec return (ENOMEM); 6653253Smec } 6663253Smec 6673253Smec /* 6683253Smec * We can not allow a process mapping /dev/physmem pages to fork as there can 6693253Smec * only be a single mapping to a /dev/physmem page at a given time. Thus, the 6703253Smec * return of EINVAL when we are not working on our own address space. 6713253Smec * Otherwise we return zero as this function is required for normal operation. 6723253Smec */ 6733253Smec /*ARGSUSED*/ 6743253Smec static int 6753253Smec physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 6763253Smec caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 677*5331Samw struct cred *cred, caller_context_t *ct) 6783253Smec { 6793253Smec if (curproc->p_as != as) { 6803253Smec return (EINVAL); 6813253Smec } 6823253Smec return (0); 6833253Smec } 6843253Smec 6853253Smec /* Will always get called for removing a whole segment. */ 6863253Smec /*ARGSUSED*/ 6873253Smec static int 6883253Smec physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 6893253Smec caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 690*5331Samw struct cred *cred, caller_context_t *ct) 6913253Smec { 6923253Smec /* 6933253Smec * Release our hold on the vnode so that the final VN_RELE will 6943253Smec * call physmem_inactive to clean things up. 6953253Smec */ 6963253Smec VN_RELE(vp); 6973253Smec 6983253Smec return (0); 6993253Smec } 7003253Smec 7013253Smec /* 7023253Smec * Clean up all the pages belonging to this vnode and then free it. 7033253Smec */ 7043253Smec /*ARGSUSED*/ 7053253Smec static void 706*5331Samw physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct) 7073253Smec { 7083253Smec page_t *pp; 7093253Smec 7103253Smec /* 7113253Smec * Remove the vnode from the hash now, to prevent asynchronous 7123253Smec * attempts to map into this vnode. This avoids a deadlock 7133253Smec * where two threads try to get into this logic at the same 7143253Smec * time and try to map the pages they are destroying into the 7153253Smec * other's address space. 7163253Smec * If it's not in the hash, just free it. 7173253Smec */ 7183253Smec if (physmem_remove_vnode_hash(vp) == 0) { 7193253Smec ASSERT(vp->v_pages == NULL); 7203253Smec vn_free(vp); 7213253Smec physmem_remove_hash_proc(); 7223253Smec mutex_enter(&physmem_mutex); 7233253Smec physmem_vnodecnt--; 7243253Smec mutex_exit(&physmem_mutex); 7253253Smec return; 7263253Smec } 7273253Smec 7283253Smec /* 7293253Smec * At this point in time, no other logic can be adding or removing 7303253Smec * pages from the vnode, otherwise the v_pages list could be inaccurate. 7313253Smec */ 7323253Smec 7333253Smec while ((pp = vp->v_pages) != NULL) { 7343253Smec page_t *rpp; 7353253Smec if (page_tryupgrade(pp)) { 7363253Smec /* 7373253Smec * set lckcnt for page_destroy to do availrmem 7383253Smec * accounting 7393253Smec */ 7403253Smec pp->p_lckcnt = 1; 7413253Smec page_destroy(pp, 0); 7423253Smec } else { 7433253Smec /* failure to lock should be transient */ 7443253Smec rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED); 7453253Smec if (rpp != pp) { 7463253Smec page_unlock(rpp); 7473253Smec continue; 7483253Smec } 7493253Smec page_unlock(pp); 7503253Smec } 7513253Smec } 7523253Smec vn_free(vp); 7533253Smec physmem_remove_hash_proc(); 7543253Smec mutex_enter(&physmem_mutex); 7553253Smec physmem_vnodecnt--; 7563253Smec mutex_exit(&physmem_mutex); 7573253Smec } 7583253Smec 7593253Smec /*ARGSUSED*/ 7603253Smec static int 7613253Smec physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7623253Smec int *rvalp) 7633253Smec { 7643253Smec int ret; 7653253Smec 7663253Smec switch (cmd) { 7673253Smec case PHYSMEM_SETUP: 7683253Smec { 7693253Smec struct physmem_setup_param psp; 7703253Smec if (ddi_copyin((void *)arg, &psp, 7713253Smec sizeof (struct physmem_setup_param), 0)) 7723253Smec return (EFAULT); 7733253Smec ret = physmem_setup_addrs(&psp); 7743253Smec if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0)) 7753253Smec return (EFAULT); 7763253Smec } 7773253Smec break; 7783253Smec case PHYSMEM_MAP: 7793253Smec { 7803253Smec struct physmem_map_param pmp; 7813253Smec if (ddi_copyin((void *)arg, &pmp, 7823253Smec sizeof (struct physmem_map_param), 0)) 7833253Smec return (EFAULT); 7843253Smec ret = physmem_map_addrs(&pmp); 7853253Smec if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0)) 7863253Smec return (EFAULT); 7873253Smec } 7883253Smec break; 7893253Smec case PHYSMEM_DESTROY: 7903253Smec { 7913253Smec uint64_t cookie; 7923253Smec if (ddi_copyin((void *)arg, &cookie, 7933253Smec sizeof (uint64_t), 0)) 7943253Smec return (EFAULT); 7953253Smec ret = physmem_destroy_addrs(cookie); 7963253Smec } 7973253Smec break; 7983253Smec default: 7993253Smec return (ENOTSUP); 8003253Smec } 8013253Smec return (ret); 8023253Smec } 8033253Smec 8043253Smec /*ARGSUSED*/ 8053253Smec static int 8063253Smec physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp) 8073253Smec { 8083253Smec int ret; 8093253Smec static int msg_printed = 0; 8103253Smec 8113253Smec if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) { 8123253Smec return (EINVAL); 8133253Smec } 8143253Smec 8153253Smec /* need to make sure we have the right privileges */ 8163253Smec if ((ret = secpolicy_resource(credp)) != 0) 8173253Smec return (ret); 8183253Smec if ((ret = secpolicy_lock_memory(credp)) != 0) 8193253Smec return (ret); 8203253Smec 8213253Smec if (msg_printed == 0) { 8223253Smec cmn_err(CE_NOTE, "!driver has been opened. This driver may " 8233253Smec "take out long term locks on pages which may impact " 8243253Smec "dynamic reconfiguration events"); 8253253Smec msg_printed = 1; 8263253Smec } 8273253Smec 8283253Smec return (0); 8293253Smec } 8303253Smec 8313253Smec /*ARGSUSED*/ 8323253Smec static int 8333253Smec physmem_close(dev_t dev, int flag, int otyp, cred_t *credp) 8343253Smec { 8353253Smec return (0); 8363253Smec } 8373253Smec 8383253Smec /*ARGSUSED*/ 8393253Smec static int 8403253Smec physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, 8413253Smec void *arg, void **resultp) 8423253Smec { 8433253Smec switch (infocmd) { 8443253Smec case DDI_INFO_DEVT2DEVINFO: 8453253Smec *resultp = physmem_dip; 8463253Smec return (DDI_SUCCESS); 8473253Smec 8483253Smec case DDI_INFO_DEVT2INSTANCE: 8493253Smec *resultp = (void *)(ulong_t)getminor((dev_t)arg); 8503253Smec return (DDI_SUCCESS); 8513253Smec 8523253Smec default: 8533253Smec return (DDI_FAILURE); 8543253Smec } 8553253Smec } 8563253Smec 8573253Smec static int 8583253Smec physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 8593253Smec { 8603253Smec int i; 8613253Smec 8623253Smec if (cmd == DDI_RESUME) { 8633253Smec return (DDI_SUCCESS); 8643253Smec } 8653253Smec 8663253Smec if (cmd != DDI_ATTACH) 8673253Smec return (DDI_FAILURE); 8683253Smec 8693253Smec if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 8703253Smec ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 8713253Smec return (DDI_FAILURE); 8723253Smec 8733253Smec physmem_dip = dip; 8743253Smec 8753253Smec /* Initialize driver specific data */ 8763253Smec if (physmem_setup_vnops()) { 8773253Smec ddi_remove_minor_node(dip, ddi_get_name(dip)); 8783253Smec return (DDI_FAILURE); 8793253Smec } 8803253Smec 8813253Smec for (i = 0; i < PPH_SIZE; i++) 8823253Smec pph[i] = NULL; 8833253Smec 8843253Smec page_capture_register_callback(PC_PHYSMEM, 10000, 8853253Smec map_page_proc); 8863253Smec 8873253Smec return (DDI_SUCCESS); 8883253Smec } 8893253Smec 8903253Smec static int 8913253Smec physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 8923253Smec { 8933253Smec int ret = DDI_SUCCESS; 8943253Smec 8953253Smec if (cmd == DDI_SUSPEND) { 8963253Smec return (DDI_SUCCESS); 8973253Smec } 8983253Smec 8993253Smec if (cmd != DDI_DETACH) 9003253Smec return (DDI_FAILURE); 9013253Smec 9023253Smec ASSERT(physmem_dip == dip); 9033253Smec 9043253Smec mutex_enter(&physmem_mutex); 9053253Smec if (physmem_vnodecnt == 0) { 9063253Smec if (physmem_vnodeops != NULL) { 9073253Smec vn_freevnodeops(physmem_vnodeops); 9083253Smec physmem_vnodeops = NULL; 9093253Smec page_capture_unregister_callback(PC_PHYSMEM); 9103253Smec } 9113253Smec } else { 9123253Smec ret = EBUSY; 9133253Smec } 9143253Smec mutex_exit(&physmem_mutex); 9153253Smec if (ret == DDI_SUCCESS) 9163253Smec ddi_remove_minor_node(dip, ddi_get_name(dip)); 9173253Smec return (ret); 9183253Smec } 9193253Smec 9203253Smec static struct cb_ops physmem_cb_ops = { 9213253Smec physmem_open, /* open */ 9223253Smec physmem_close, /* close */ 9233253Smec nodev, /* strategy */ 9243253Smec nodev, /* print */ 9253253Smec nodev, /* dump */ 9263253Smec nodev, /* read */ 9273253Smec nodev, /* write */ 9283253Smec physmem_ioctl, /* ioctl */ 9293253Smec nodev, /* devmap */ 9303253Smec nodev, /* mmap */ 9313253Smec nodev, /* segmap */ 9323253Smec nochpoll, /* chpoll */ 9333253Smec ddi_prop_op, /* prop_op */ 9343253Smec NULL, /* cb_str */ 9353253Smec D_NEW | D_MP | D_DEVMAP, 9363253Smec CB_REV, 9373253Smec NULL, 9383253Smec NULL 9393253Smec }; 9403253Smec 9413253Smec static struct dev_ops physmem_ops = { 9423253Smec DEVO_REV, 9433253Smec 0, 9443253Smec physmem_getinfo, 9453253Smec nulldev, 9463253Smec nulldev, 9473253Smec physmem_attach, 9483253Smec physmem_detach, 9493253Smec nodev, 9503253Smec &physmem_cb_ops, 9513253Smec NULL, 9523253Smec NULL 9533253Smec }; 9543253Smec 9553253Smec static struct modldrv modldrv = { 9563253Smec &mod_driverops, 9573253Smec "physmem driver %I%", 9583253Smec &physmem_ops 9593253Smec }; 9603253Smec 9613253Smec static struct modlinkage modlinkage = { 9623253Smec MODREV_1, 9633253Smec &modldrv, 9643253Smec NULL 9653253Smec }; 9663253Smec 9673253Smec int 9683253Smec _init(void) 9693253Smec { 9703253Smec return (mod_install(&modlinkage)); 9713253Smec } 9723253Smec 9733253Smec int 9743253Smec _info(struct modinfo *modinfop) 9753253Smec { 9763253Smec return (mod_info(&modlinkage, modinfop)); 9773253Smec } 9783253Smec 9793253Smec int 9803253Smec _fini(void) 9813253Smec { 9823253Smec return (mod_remove(&modlinkage)); 9833253Smec } 984