13253Smec /* 23253Smec * CDDL HEADER START 33253Smec * 43253Smec * The contents of this file are subject to the terms of the 53253Smec * Common Development and Distribution License (the "License"). 63253Smec * You may not use this file except in compliance with the License. 73253Smec * 83253Smec * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 93253Smec * or http://www.opensolaris.org/os/licensing. 103253Smec * See the License for the specific language governing permissions 113253Smec * and limitations under the License. 123253Smec * 133253Smec * When distributing Covered Code, include this CDDL HEADER in each 143253Smec * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 153253Smec * If applicable, add the following below this CDDL HEADER, with the 163253Smec * fields enclosed by brackets "[]" replaced with your own identifying 173253Smec * information: Portions Copyright [yyyy] [name of copyright owner] 183253Smec * 193253Smec * CDDL HEADER END 203253Smec */ 213253Smec /* 22*3616Smec * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 233253Smec * Use is subject to license terms. 243253Smec */ 253253Smec 263253Smec #pragma ident "%Z%%M% %I% %E% SMI" 273253Smec 283253Smec #include <sys/types.h> 293253Smec #include <sys/modctl.h> 303253Smec #include <sys/conf.h> 313253Smec #include <sys/ddi.h> 323253Smec #include <sys/sunddi.h> 333253Smec #include <sys/devops.h> 343253Smec #include <sys/stat.h> 353253Smec #include <sys/file.h> 363253Smec #include <sys/cred.h> 373253Smec #include <sys/policy.h> 383253Smec #include <sys/errno.h> 393253Smec #include <vm/seg_dev.h> 403253Smec #include <vm/seg_vn.h> 413253Smec #include <vm/page.h> 423253Smec #include <sys/fs/swapnode.h> 433253Smec #include <sys/sysmacros.h> 443253Smec #include <sys/fcntl.h> 453253Smec #include <sys/vmsystm.h> 463253Smec #include <sys/physmem.h> 473253Smec 483253Smec static dev_info_t *physmem_dip = NULL; 493253Smec 503253Smec /* 513253Smec * Linked list element hanging off physmem_proc_hash below, which holds all 523253Smec * the information for a given segment which has been setup for this process. 533253Smec * This is a simple linked list as we are assuming that for a given process 543253Smec * the setup ioctl will only be called a handful of times. If this assumption 553253Smec * changes in the future, a quicker to traverse data structure should be used. 563253Smec */ 573253Smec struct physmem_hash { 583253Smec struct physmem_hash *ph_next; 593253Smec uint64_t ph_base_pa; 603253Smec caddr_t ph_base_va; 613253Smec size_t ph_seg_len; 623253Smec struct vnode *ph_vnode; 633253Smec }; 643253Smec 653253Smec /* 663253Smec * Hash of all of the processes which have setup mappings with the driver with 673253Smec * pointers to per process data. 683253Smec */ 693253Smec struct physmem_proc_hash { 703253Smec struct proc *pph_proc; 713253Smec struct physmem_hash *pph_hash; 723253Smec struct physmem_proc_hash *pph_next; 733253Smec }; 743253Smec 753253Smec 763253Smec /* Needs to be a power of two for simple hash algorithm */ 773253Smec #define PPH_SIZE 8 783253Smec struct physmem_proc_hash *pph[PPH_SIZE]; 793253Smec 803253Smec /* 813253Smec * Lock which protects the pph hash above. To add an element (either a new 823253Smec * process or a new segment) the WRITE lock must be held. To traverse the 833253Smec * list, only a READ lock is needed. 843253Smec */ 853253Smec krwlock_t pph_rwlock; 863253Smec 873253Smec #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1))) 883253Smec 893253Smec /* 903253Smec * Need to keep a reference count of how many processes have the driver 913253Smec * open to prevent it from disappearing. 923253Smec */ 933253Smec uint64_t physmem_vnodecnt; 943253Smec kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */ 953253Smec 963253Smec static int physmem_getpage(struct vnode *vp, offset_t off, size_t len, 973253Smec uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 983253Smec enum seg_rw rw, struct cred *cr); 993253Smec 1003253Smec static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 1013253Smec caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 1023253Smec struct cred *cred); 1033253Smec 1043253Smec static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 1053253Smec caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 1063253Smec struct cred *cred); 1073253Smec 1083253Smec static void physmem_inactive(vnode_t *vp, cred_t *crp); 1093253Smec 1103253Smec const fs_operation_def_t physmem_vnodeops_template[] = { 1113253Smec VOPNAME_GETPAGE, physmem_getpage, 1123253Smec VOPNAME_ADDMAP, (fs_generic_func_p) physmem_addmap, 1133253Smec VOPNAME_DELMAP, physmem_delmap, 1143253Smec VOPNAME_INACTIVE, (fs_generic_func_p) physmem_inactive, 1153253Smec NULL, NULL 1163253Smec }; 1173253Smec 1183253Smec vnodeops_t *physmem_vnodeops = NULL; 1193253Smec 1203253Smec /* 1213253Smec * Removes the current process from the hash if the process has no more 1223253Smec * physmem segments active. 1233253Smec */ 1243253Smec void 1253253Smec physmem_remove_hash_proc() 1263253Smec { 1273253Smec int index; 1283253Smec struct physmem_proc_hash **walker; 1293253Smec struct physmem_proc_hash *victim = NULL; 1303253Smec 1313253Smec index = PHYSMEM_HASH(curproc); 1323253Smec rw_enter(&pph_rwlock, RW_WRITER); 1333253Smec walker = &pph[index]; 1343253Smec while (*walker != NULL) { 1353253Smec if ((*walker)->pph_proc == curproc && 1363253Smec (*walker)->pph_hash == NULL) { 1373253Smec victim = *walker; 1383253Smec *walker = victim->pph_next; 1393253Smec break; 1403253Smec } 1413253Smec walker = &((*walker)->pph_next); 1423253Smec } 1433253Smec rw_exit(&pph_rwlock); 1443253Smec if (victim != NULL) 1453253Smec kmem_free(victim, sizeof (struct physmem_proc_hash)); 1463253Smec } 1473253Smec 1483253Smec /* 1493253Smec * Add a new entry to the hash for the given process to cache the 1503253Smec * address ranges that it is working on. If this is the first hash 1513253Smec * item to be added for this process, we will create the head pointer 1523253Smec * for this process. 1533253Smec * Returns 0 on success, ERANGE when the physical address is already in the 154*3616Smec * hash. 1553253Smec */ 1563253Smec int 1573253Smec physmem_add_hash(struct physmem_hash *php) 1583253Smec { 1593253Smec int index; 1603253Smec struct physmem_proc_hash *iterator; 1613253Smec struct physmem_proc_hash *newp = NULL; 1623253Smec struct physmem_hash *temp; 1633253Smec int ret = 0; 1643253Smec 1653253Smec index = PHYSMEM_HASH(curproc); 1663253Smec 1673253Smec insert: 1683253Smec rw_enter(&pph_rwlock, RW_WRITER); 1693253Smec iterator = pph[index]; 1703253Smec while (iterator != NULL) { 1713253Smec if (iterator->pph_proc == curproc) { 1723253Smec /* 1733253Smec * check to make sure a single process does not try to 1743253Smec * map the same region twice. 1753253Smec */ 1763253Smec for (temp = iterator->pph_hash; temp != NULL; 1773253Smec temp = temp->ph_next) { 1783253Smec if ((php->ph_base_pa >= temp->ph_base_pa && 1793253Smec php->ph_base_pa < temp->ph_base_pa + 1803253Smec temp->ph_seg_len) || 1813253Smec (temp->ph_base_pa >= php->ph_base_pa && 1823253Smec temp->ph_base_pa < php->ph_base_pa + 1833253Smec php->ph_seg_len)) { 1843253Smec ret = ERANGE; 1853253Smec break; 1863253Smec } 1873253Smec } 1883253Smec if (ret == 0) { 1893253Smec php->ph_next = iterator->pph_hash; 1903253Smec iterator->pph_hash = php; 1913253Smec } 1923253Smec rw_exit(&pph_rwlock); 1933253Smec /* Need to check for two threads in sync */ 1943253Smec if (newp != NULL) 1953253Smec kmem_free(newp, sizeof (*newp)); 1963253Smec return (ret); 1973253Smec } 1983253Smec iterator = iterator->pph_next; 1993253Smec } 2003253Smec 2013253Smec if (newp != NULL) { 2023253Smec newp->pph_proc = curproc; 2033253Smec newp->pph_next = pph[index]; 2043253Smec newp->pph_hash = php; 2053253Smec php->ph_next = NULL; 2063253Smec pph[index] = newp; 2073253Smec rw_exit(&pph_rwlock); 2083253Smec return (0); 2093253Smec } 2103253Smec 2113253Smec rw_exit(&pph_rwlock); 2123253Smec /* Dropped the lock so we could use KM_SLEEP */ 2133253Smec newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP); 2143253Smec goto insert; 2153253Smec } 2163253Smec 2173253Smec /* 2183253Smec * Will return the pointer to the physmem_hash struct if the setup routine 2193253Smec * has previously been called for this memory. 2203253Smec * Returns NULL on failure. 2213253Smec */ 2223253Smec struct physmem_hash * 2233253Smec physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp) 2243253Smec { 2253253Smec int index; 2263253Smec struct physmem_proc_hash *proc_hp; 2273253Smec struct physmem_hash *php; 2283253Smec 2293253Smec ASSERT(rw_lock_held(&pph_rwlock)); 2303253Smec 2313253Smec index = PHYSMEM_HASH(procp); 2323253Smec proc_hp = pph[index]; 2333253Smec while (proc_hp != NULL) { 2343253Smec if (proc_hp->pph_proc == procp) { 2353253Smec php = proc_hp->pph_hash; 2363253Smec while (php != NULL) { 2373253Smec if ((req_paddr >= php->ph_base_pa) && 2383253Smec (req_paddr + len <= 2393253Smec php->ph_base_pa + php->ph_seg_len)) { 2403253Smec return (php); 2413253Smec } 2423253Smec php = php->ph_next; 2433253Smec } 2443253Smec } 2453253Smec proc_hp = proc_hp->pph_next; 2463253Smec } 2473253Smec return (NULL); 2483253Smec } 2493253Smec 2503253Smec int 2513253Smec physmem_validate_cookie(uint64_t p_cookie) 2523253Smec { 2533253Smec int index; 2543253Smec struct physmem_proc_hash *proc_hp; 2553253Smec struct physmem_hash *php; 2563253Smec 2573253Smec ASSERT(rw_lock_held(&pph_rwlock)); 2583253Smec 2593253Smec index = PHYSMEM_HASH(curproc); 2603253Smec proc_hp = pph[index]; 2613253Smec while (proc_hp != NULL) { 2623253Smec if (proc_hp->pph_proc == curproc) { 2633253Smec php = proc_hp->pph_hash; 2643253Smec while (php != NULL) { 2653253Smec if ((uint64_t)(uintptr_t)php == p_cookie) { 2663253Smec return (1); 2673253Smec } 2683253Smec php = php->ph_next; 2693253Smec } 2703253Smec } 2713253Smec proc_hp = proc_hp->pph_next; 2723253Smec } 2733253Smec return (0); 2743253Smec } 2753253Smec 2763253Smec /* 2773253Smec * Remove the given vnode from the pph hash. If it exists in the hash the 2783253Smec * process still has to be around as the vnode is obviously still around and 2793253Smec * since it's a physmem vnode, it must be in the hash. 2803253Smec * If it is not in the hash that must mean that the setup ioctl failed. 2813253Smec * Return 0 in this instance, 1 if it is in the hash. 2823253Smec */ 2833253Smec int 2843253Smec physmem_remove_vnode_hash(vnode_t *vp) 2853253Smec { 2863253Smec int index; 2873253Smec struct physmem_proc_hash *proc_hp; 2883253Smec struct physmem_hash **phpp; 2893253Smec struct physmem_hash *victim; 2903253Smec 2913253Smec index = PHYSMEM_HASH(curproc); 2923253Smec /* synchronize with the map routine */ 2933253Smec rw_enter(&pph_rwlock, RW_WRITER); 2943253Smec proc_hp = pph[index]; 2953253Smec while (proc_hp != NULL) { 2963253Smec if (proc_hp->pph_proc == curproc) { 2973253Smec phpp = &proc_hp->pph_hash; 2983253Smec while (*phpp != NULL) { 2993253Smec if ((*phpp)->ph_vnode == vp) { 3003253Smec victim = *phpp; 3013253Smec *phpp = victim->ph_next; 3023253Smec 3033253Smec rw_exit(&pph_rwlock); 3043253Smec kmem_free(victim, sizeof (*victim)); 3053253Smec return (1); 3063253Smec } 3073253Smec phpp = &(*phpp)->ph_next; 3083253Smec } 3093253Smec } 3103253Smec proc_hp = proc_hp->pph_next; 3113253Smec } 3123253Smec rw_exit(&pph_rwlock); 3133253Smec 3143253Smec /* not found */ 3153253Smec return (0); 3163253Smec } 3173253Smec 3183253Smec int 3193253Smec physmem_setup_vnops() 3203253Smec { 3213253Smec int error; 3223253Smec char *name = "physmem"; 3233253Smec if (physmem_vnodeops != NULL) 3243253Smec cmn_err(CE_PANIC, "physmem vnodeops already set\n"); 3253253Smec error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops); 3263253Smec if (error != 0) { 3273253Smec cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template"); 3283253Smec } 3293253Smec return (error); 3303253Smec } 3313253Smec 3323253Smec /* 3333253Smec * The guts of the PHYSMEM_SETUP ioctl. 3343253Smec * Create a segment in the address space with the specified parameters. 3353253Smec * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA. 3363253Smec * We do not do bounds checking on the requested phsycial addresses, if they 3373253Smec * do not exist in the system, they will not be mappable. 3383253Smec * Returns 0 on success with the following error codes on failure: 3393253Smec * ENOMEM - The VA range requested was already mapped if pspp->user_va is 3403253Smec * non-NULL or the system was unable to find enough VA space for 3413253Smec * the desired length if user_va was NULL> 3423253Smec * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned. 3433253Smec */ 3443253Smec int 3453253Smec physmem_setup_addrs(struct physmem_setup_param *pspp) 3463253Smec { 3473253Smec struct as *as = curproc->p_as; 3483253Smec struct segvn_crargs vn_a; 3493253Smec int ret = 0; 3503253Smec uint64_t base_pa; 3513253Smec size_t len; 3523253Smec caddr_t uvaddr; 3533253Smec struct vnode *vp; 3543253Smec struct physmem_hash *php; 3553253Smec 3563253Smec ASSERT(pspp != NULL); 3573253Smec base_pa = pspp->req_paddr; 3583253Smec len = pspp->len; 3593253Smec uvaddr = (caddr_t)(uintptr_t)pspp->user_va; 3603253Smec 3613253Smec /* Sanity checking */ 3623253Smec if (!IS_P2ALIGNED(base_pa, PAGESIZE)) 3633253Smec return (EINVAL); 3643253Smec if (!IS_P2ALIGNED(len, PAGESIZE)) 3653253Smec return (EINVAL); 3663253Smec if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE)) 3673253Smec return (EINVAL); 3683253Smec 3693253Smec php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP); 3703253Smec 3713253Smec /* Need to bump vnode count so that the driver can not be unloaded */ 3723253Smec mutex_enter(&physmem_mutex); 3733253Smec physmem_vnodecnt++; 3743253Smec mutex_exit(&physmem_mutex); 3753253Smec 3763253Smec vp = vn_alloc(KM_SLEEP); 3773253Smec ASSERT(vp != NULL); /* SLEEP can't return NULL */ 3783253Smec vn_setops(vp, physmem_vnodeops); 3793253Smec 3803253Smec php->ph_vnode = vp; 3813253Smec 3823253Smec vn_a.vp = vp; 3833253Smec vn_a.offset = (u_offset_t)base_pa; 3843253Smec vn_a.type = MAP_SHARED; 3853253Smec vn_a.prot = PROT_ALL; 3863253Smec vn_a.maxprot = PROT_ALL; 3873253Smec vn_a.flags = 0; 3883253Smec vn_a.cred = NULL; 3893253Smec vn_a.amp = NULL; 3903253Smec vn_a.szc = 0; 3913253Smec vn_a.lgrp_mem_policy_flags = 0; 3923253Smec 3933253Smec as_rangelock(as); 3943253Smec if (uvaddr != NULL) { 3953253Smec if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) { 3963253Smec ret = ENOMEM; 3973253Smec fail: 3983253Smec as_rangeunlock(as); 3993253Smec vn_free(vp); 4003253Smec kmem_free(php, sizeof (*php)); 4013253Smec mutex_enter(&physmem_mutex); 4023253Smec physmem_vnodecnt--; 4033253Smec mutex_exit(&physmem_mutex); 4043253Smec return (ret); 4053253Smec } 4063253Smec } else { 4073253Smec /* We pick the address for the user */ 4083253Smec map_addr(&uvaddr, len, 0, 1, 0); 4093253Smec if (uvaddr == NULL) { 4103253Smec ret = ENOMEM; 4113253Smec goto fail; 4123253Smec } 4133253Smec } 4143253Smec ret = as_map(as, uvaddr, len, segvn_create, &vn_a); 4153253Smec 4163253Smec if (ret == 0) { 417*3616Smec as_rangeunlock(as); 4183253Smec php->ph_base_pa = base_pa; 4193253Smec php->ph_base_va = uvaddr; 4203253Smec php->ph_seg_len = len; 4213253Smec pspp->user_va = (uint64_t)(uintptr_t)uvaddr; 4223253Smec pspp->cookie = (uint64_t)(uintptr_t)php; 4233253Smec ret = physmem_add_hash(php); 4243253Smec if (ret == 0) 4253253Smec return (0); 426*3616Smec 427*3616Smec /* Note that the call to as_unmap will free the vnode */ 4283253Smec (void) as_unmap(as, uvaddr, len); 429*3616Smec kmem_free(php, sizeof (*php)); 4303253Smec return (ret); 4313253Smec } 4323253Smec 4333253Smec goto fail; 4343253Smec /*NOTREACHED*/ 4353253Smec } 4363253Smec 4373253Smec /* 4383253Smec * The guts of the PHYSMEM_MAP ioctl. 4393253Smec * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already 4403253Smec * been called for this PA range. 4413253Smec * Returns 0 on success with the following error codes on failure: 4423253Smec * EPERM - The requested page is long term locked, and thus repeated 4433253Smec * requests to allocate this page will likely fail. 4443253Smec * EAGAIN - The requested page could not be allocated, but it is believed 4453253Smec * that future attempts could succeed. 4463253Smec * ENOMEM - There was not enough free memory in the system to safely 4473253Smec * map the requested page. 4483253Smec * EINVAL - The requested paddr was not PAGESIZE aligned or the 4493253Smec * PHYSMEM_SETUP ioctl was not called for this page. 4503253Smec * ENOENT - The requested page was iniside the kernel cage, and the 4513253Smec * PHYSMEM_CAGE flag was not set. 4523253Smec * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag 4533253Smec * was not set. 4543253Smec */ 4553253Smec static int 4563253Smec physmem_map_addrs(struct physmem_map_param *pmpp) 4573253Smec { 4583253Smec caddr_t uvaddr; 4593253Smec page_t *pp; 4603253Smec uint64_t req_paddr; 4613253Smec struct vnode *vp; 4623253Smec int ret = 0; 4633253Smec struct physmem_hash *php; 4643253Smec uint_t flags = 0; 4653253Smec 4663253Smec ASSERT(pmpp != NULL); 4673253Smec req_paddr = pmpp->req_paddr; 4683253Smec 4693253Smec if (!IS_P2ALIGNED(req_paddr, PAGESIZE)) 4703253Smec return (EINVAL); 4713253Smec /* Find the vnode for this map request */ 4723253Smec rw_enter(&pph_rwlock, RW_READER); 4733253Smec php = physmem_get_hash(req_paddr, PAGESIZE, curproc); 4743253Smec if (php == NULL) { 4753253Smec rw_exit(&pph_rwlock); 4763253Smec return (EINVAL); 4773253Smec } 4783253Smec vp = php->ph_vnode; 4793253Smec uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa); 4803253Smec rw_exit(&pph_rwlock); 4813253Smec 4823253Smec pp = page_numtopp_nolock(btop((size_t)req_paddr)); 4833253Smec if (pp == NULL) { 4843253Smec pmpp->ret_va = NULL; 4853253Smec return (EPERM); 4863253Smec } 4873253Smec 4883253Smec /* 4893253Smec * Check to see if page already mapped correctly. This can happen 4903253Smec * when we failed to capture a page previously and it was captured 4913253Smec * asynchronously for us. Return success in this case. 4923253Smec */ 4933253Smec if (pp->p_vnode == vp) { 4943253Smec ASSERT(pp->p_offset == (u_offset_t)req_paddr); 4953253Smec pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 4963253Smec return (0); 4973253Smec } 4983253Smec 4993253Smec /* 5003253Smec * physmem should be responsible for checking for cage 5013253Smec * and prom pages. 5023253Smec */ 5033253Smec if (pmpp->flags & PHYSMEM_CAGE) 5043253Smec flags = CAPTURE_GET_CAGE; 5053253Smec if (pmpp->flags & PHYSMEM_RETIRED) 5063253Smec flags |= CAPTURE_GET_RETIRED; 5073253Smec 5083253Smec ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc); 5093253Smec 5103253Smec if (ret != 0) { 5113253Smec pmpp->ret_va = NULL; 5123253Smec return (ret); 5133253Smec } else { 5143253Smec pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr; 5153253Smec return (0); 5163253Smec } 5173253Smec } 5183253Smec 5193253Smec /* 5203253Smec * Map the given page into the process's address space if possible. 5213253Smec * We actually only hash the page in on the correct vnode as the page 5223253Smec * will be mapped via segvn_pagefault. 5233253Smec * returns 0 on success 5243253Smec * returns 1 if there is no need to map this page anymore (process exited) 5253253Smec * returns -1 if we failed to map the page. 5263253Smec */ 5273253Smec int 5283253Smec map_page_proc(page_t *pp, void *arg, uint_t flags) 5293253Smec { 5303253Smec struct vnode *vp; 5313253Smec proc_t *procp = (proc_t *)arg; 5323253Smec int ret; 5333253Smec u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum); 5343253Smec struct physmem_hash *php; 5353253Smec 5363253Smec ASSERT(pp != NULL); 5373253Smec 5383253Smec /* 5393253Smec * Check against availrmem to make sure that we're not low on memory. 5403253Smec * We check again here as ASYNC requests do not do this check elsewhere. 5413253Smec * We return 1 as we don't want the page to have the PR_CAPTURE bit 5423253Smec * set or be on the page capture hash. 5433253Smec */ 5443253Smec if (swapfs_minfree > availrmem + 1) { 5453253Smec page_free(pp, 1); 5463253Smec return (1); 5473253Smec } 5483253Smec 5493253Smec /* 5503253Smec * If this is an asynchronous request for the current process, 5513253Smec * we can not map the page as it's possible that we are also in the 5523253Smec * process of unmapping the page which could result in a deadlock 5533253Smec * with the as lock. 5543253Smec */ 5553253Smec if ((flags & CAPTURE_ASYNC) && (curproc == procp)) { 5563253Smec page_free(pp, 1); 5573253Smec return (-1); 5583253Smec } 5593253Smec 5603253Smec /* only return zeroed out pages */ 5613253Smec pagezero(pp, 0, PAGESIZE); 5623253Smec 5633253Smec rw_enter(&pph_rwlock, RW_READER); 5643253Smec php = physmem_get_hash(paddr, PAGESIZE, procp); 5653253Smec if (php == NULL) { 5663253Smec rw_exit(&pph_rwlock); 5673253Smec /* 5683253Smec * Free the page as there is no longer a valid outstanding 5693253Smec * request for this page. 5703253Smec */ 5713253Smec page_free(pp, 1); 5723253Smec return (1); 5733253Smec } 5743253Smec 5753253Smec vp = php->ph_vnode; 5763253Smec 5773253Smec /* 5783253Smec * We need to protect against a possible deadlock here where we own 5793253Smec * the vnode page hash mutex and want to acquire it again as there 5803253Smec * are locations in the code, where we unlock a page while holding 5813253Smec * the mutex which can lead to the page being captured and eventually 5823253Smec * end up here. 5833253Smec */ 5843253Smec if (mutex_owned(page_vnode_mutex(vp))) { 5853253Smec rw_exit(&pph_rwlock); 5863253Smec page_free(pp, 1); 5873253Smec return (-1); 5883253Smec } 5893253Smec 5903253Smec ret = page_hashin(pp, vp, paddr, NULL); 5913253Smec rw_exit(&pph_rwlock); 5923253Smec if (ret == 0) { 5933253Smec page_free(pp, 1); 5943253Smec return (-1); 5953253Smec } 5963253Smec 5973253Smec page_downgrade(pp); 5983253Smec 5993253Smec mutex_enter(&freemem_lock); 6003253Smec availrmem--; 6013253Smec mutex_exit(&freemem_lock); 6023253Smec 6033253Smec return (0); 6043253Smec } 6053253Smec 6063253Smec /* 6073253Smec * The guts of the PHYSMEM_DESTROY ioctl. 6083253Smec * The cookie passed in will provide all of the information needed to 6093253Smec * free up the address space and physical memory associated with the 6103253Smec * corresponding PHSYMEM_SETUP ioctl. 6113253Smec * Returns 0 on success with the following error codes on failure: 6123253Smec * EINVAL - The cookie supplied is not valid. 6133253Smec */ 6143253Smec int 6153253Smec physmem_destroy_addrs(uint64_t p_cookie) 6163253Smec { 6173253Smec struct as *as = curproc->p_as; 6183253Smec size_t len; 6193253Smec caddr_t uvaddr; 6203253Smec 6213253Smec rw_enter(&pph_rwlock, RW_READER); 6223253Smec if (physmem_validate_cookie(p_cookie) == 0) { 6233253Smec rw_exit(&pph_rwlock); 6243253Smec return (EINVAL); 6253253Smec } 6263253Smec 6273253Smec len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len; 6283253Smec uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va; 6293253Smec rw_exit(&pph_rwlock); 6303253Smec 6313253Smec (void) as_unmap(as, uvaddr, len); 6323253Smec 6333253Smec return (0); 6343253Smec } 6353253Smec 6363253Smec /* 6373253Smec * If the page has been hashed into the physmem vnode, then just look it up 6383253Smec * and return it via pl, otherwise return ENOMEM as the map ioctl has not 6393253Smec * succeeded on the given page. 6403253Smec */ 6413253Smec /*ARGSUSED*/ 6423253Smec static int 6433253Smec physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, 6443253Smec page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw, 6453253Smec struct cred *cr) 6463253Smec { 6473253Smec page_t *pp; 6483253Smec 6493253Smec ASSERT(len == PAGESIZE); 6503253Smec ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)); 6513253Smec 6523253Smec /* 6533253Smec * If the page is in the hash, then we successfully claimed this 6543253Smec * page earlier, so return it to the caller. 6553253Smec */ 6563253Smec pp = page_lookup(vp, off, SE_SHARED); 6573253Smec if (pp != NULL) { 6583253Smec pl[0] = pp; 6593253Smec pl[1] = NULL; 6603253Smec *protp = PROT_ALL; 6613253Smec return (0); 6623253Smec } 6633253Smec return (ENOMEM); 6643253Smec } 6653253Smec 6663253Smec /* 6673253Smec * We can not allow a process mapping /dev/physmem pages to fork as there can 6683253Smec * only be a single mapping to a /dev/physmem page at a given time. Thus, the 6693253Smec * return of EINVAL when we are not working on our own address space. 6703253Smec * Otherwise we return zero as this function is required for normal operation. 6713253Smec */ 6723253Smec /*ARGSUSED*/ 6733253Smec static int 6743253Smec physmem_addmap(struct vnode *vp, offset_t off, struct as *as, 6753253Smec caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 6763253Smec struct cred *cred) 6773253Smec { 6783253Smec if (curproc->p_as != as) { 6793253Smec return (EINVAL); 6803253Smec } 6813253Smec return (0); 6823253Smec } 6833253Smec 6843253Smec /* Will always get called for removing a whole segment. */ 6853253Smec /*ARGSUSED*/ 6863253Smec static int 6873253Smec physmem_delmap(struct vnode *vp, offset_t off, struct as *as, 6883253Smec caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 6893253Smec struct cred *cred) 6903253Smec { 6913253Smec /* 6923253Smec * Release our hold on the vnode so that the final VN_RELE will 6933253Smec * call physmem_inactive to clean things up. 6943253Smec */ 6953253Smec VN_RELE(vp); 6963253Smec 6973253Smec return (0); 6983253Smec } 6993253Smec 7003253Smec /* 7013253Smec * Clean up all the pages belonging to this vnode and then free it. 7023253Smec */ 7033253Smec /*ARGSUSED*/ 7043253Smec static void 7053253Smec physmem_inactive(vnode_t *vp, cred_t *crp) 7063253Smec { 7073253Smec page_t *pp; 7083253Smec 7093253Smec /* 7103253Smec * Remove the vnode from the hash now, to prevent asynchronous 7113253Smec * attempts to map into this vnode. This avoids a deadlock 7123253Smec * where two threads try to get into this logic at the same 7133253Smec * time and try to map the pages they are destroying into the 7143253Smec * other's address space. 7153253Smec * If it's not in the hash, just free it. 7163253Smec */ 7173253Smec if (physmem_remove_vnode_hash(vp) == 0) { 7183253Smec ASSERT(vp->v_pages == NULL); 7193253Smec vn_free(vp); 7203253Smec physmem_remove_hash_proc(); 7213253Smec mutex_enter(&physmem_mutex); 7223253Smec physmem_vnodecnt--; 7233253Smec mutex_exit(&physmem_mutex); 7243253Smec return; 7253253Smec } 7263253Smec 7273253Smec /* 7283253Smec * At this point in time, no other logic can be adding or removing 7293253Smec * pages from the vnode, otherwise the v_pages list could be inaccurate. 7303253Smec */ 7313253Smec 7323253Smec while ((pp = vp->v_pages) != NULL) { 7333253Smec page_t *rpp; 7343253Smec if (page_tryupgrade(pp)) { 7353253Smec /* 7363253Smec * set lckcnt for page_destroy to do availrmem 7373253Smec * accounting 7383253Smec */ 7393253Smec pp->p_lckcnt = 1; 7403253Smec page_destroy(pp, 0); 7413253Smec } else { 7423253Smec /* failure to lock should be transient */ 7433253Smec rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED); 7443253Smec if (rpp != pp) { 7453253Smec page_unlock(rpp); 7463253Smec continue; 7473253Smec } 7483253Smec page_unlock(pp); 7493253Smec } 7503253Smec } 7513253Smec vn_free(vp); 7523253Smec physmem_remove_hash_proc(); 7533253Smec mutex_enter(&physmem_mutex); 7543253Smec physmem_vnodecnt--; 7553253Smec mutex_exit(&physmem_mutex); 7563253Smec } 7573253Smec 7583253Smec /*ARGSUSED*/ 7593253Smec static int 7603253Smec physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7613253Smec int *rvalp) 7623253Smec { 7633253Smec int ret; 7643253Smec 7653253Smec switch (cmd) { 7663253Smec case PHYSMEM_SETUP: 7673253Smec { 7683253Smec struct physmem_setup_param psp; 7693253Smec if (ddi_copyin((void *)arg, &psp, 7703253Smec sizeof (struct physmem_setup_param), 0)) 7713253Smec return (EFAULT); 7723253Smec ret = physmem_setup_addrs(&psp); 7733253Smec if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0)) 7743253Smec return (EFAULT); 7753253Smec } 7763253Smec break; 7773253Smec case PHYSMEM_MAP: 7783253Smec { 7793253Smec struct physmem_map_param pmp; 7803253Smec if (ddi_copyin((void *)arg, &pmp, 7813253Smec sizeof (struct physmem_map_param), 0)) 7823253Smec return (EFAULT); 7833253Smec ret = physmem_map_addrs(&pmp); 7843253Smec if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0)) 7853253Smec return (EFAULT); 7863253Smec } 7873253Smec break; 7883253Smec case PHYSMEM_DESTROY: 7893253Smec { 7903253Smec uint64_t cookie; 7913253Smec if (ddi_copyin((void *)arg, &cookie, 7923253Smec sizeof (uint64_t), 0)) 7933253Smec return (EFAULT); 7943253Smec ret = physmem_destroy_addrs(cookie); 7953253Smec } 7963253Smec break; 7973253Smec default: 7983253Smec return (ENOTSUP); 7993253Smec } 8003253Smec return (ret); 8013253Smec } 8023253Smec 8033253Smec /*ARGSUSED*/ 8043253Smec static int 8053253Smec physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp) 8063253Smec { 8073253Smec int ret; 8083253Smec static int msg_printed = 0; 8093253Smec 8103253Smec if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) { 8113253Smec return (EINVAL); 8123253Smec } 8133253Smec 8143253Smec /* need to make sure we have the right privileges */ 8153253Smec if ((ret = secpolicy_resource(credp)) != 0) 8163253Smec return (ret); 8173253Smec if ((ret = secpolicy_lock_memory(credp)) != 0) 8183253Smec return (ret); 8193253Smec 8203253Smec if (msg_printed == 0) { 8213253Smec cmn_err(CE_NOTE, "!driver has been opened. This driver may " 8223253Smec "take out long term locks on pages which may impact " 8233253Smec "dynamic reconfiguration events"); 8243253Smec msg_printed = 1; 8253253Smec } 8263253Smec 8273253Smec return (0); 8283253Smec } 8293253Smec 8303253Smec /*ARGSUSED*/ 8313253Smec static int 8323253Smec physmem_close(dev_t dev, int flag, int otyp, cred_t *credp) 8333253Smec { 8343253Smec return (0); 8353253Smec } 8363253Smec 8373253Smec /*ARGSUSED*/ 8383253Smec static int 8393253Smec physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, 8403253Smec void *arg, void **resultp) 8413253Smec { 8423253Smec switch (infocmd) { 8433253Smec case DDI_INFO_DEVT2DEVINFO: 8443253Smec *resultp = physmem_dip; 8453253Smec return (DDI_SUCCESS); 8463253Smec 8473253Smec case DDI_INFO_DEVT2INSTANCE: 8483253Smec *resultp = (void *)(ulong_t)getminor((dev_t)arg); 8493253Smec return (DDI_SUCCESS); 8503253Smec 8513253Smec default: 8523253Smec return (DDI_FAILURE); 8533253Smec } 8543253Smec } 8553253Smec 8563253Smec static int 8573253Smec physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 8583253Smec { 8593253Smec int i; 8603253Smec 8613253Smec if (cmd == DDI_RESUME) { 8623253Smec return (DDI_SUCCESS); 8633253Smec } 8643253Smec 8653253Smec if (cmd != DDI_ATTACH) 8663253Smec return (DDI_FAILURE); 8673253Smec 8683253Smec if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 8693253Smec ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 8703253Smec return (DDI_FAILURE); 8713253Smec 8723253Smec physmem_dip = dip; 8733253Smec 8743253Smec /* Initialize driver specific data */ 8753253Smec if (physmem_setup_vnops()) { 8763253Smec ddi_remove_minor_node(dip, ddi_get_name(dip)); 8773253Smec return (DDI_FAILURE); 8783253Smec } 8793253Smec 8803253Smec for (i = 0; i < PPH_SIZE; i++) 8813253Smec pph[i] = NULL; 8823253Smec 8833253Smec page_capture_register_callback(PC_PHYSMEM, 10000, 8843253Smec map_page_proc); 8853253Smec 8863253Smec return (DDI_SUCCESS); 8873253Smec } 8883253Smec 8893253Smec static int 8903253Smec physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 8913253Smec { 8923253Smec int ret = DDI_SUCCESS; 8933253Smec 8943253Smec if (cmd == DDI_SUSPEND) { 8953253Smec return (DDI_SUCCESS); 8963253Smec } 8973253Smec 8983253Smec if (cmd != DDI_DETACH) 8993253Smec return (DDI_FAILURE); 9003253Smec 9013253Smec ASSERT(physmem_dip == dip); 9023253Smec 9033253Smec mutex_enter(&physmem_mutex); 9043253Smec if (physmem_vnodecnt == 0) { 9053253Smec if (physmem_vnodeops != NULL) { 9063253Smec vn_freevnodeops(physmem_vnodeops); 9073253Smec physmem_vnodeops = NULL; 9083253Smec page_capture_unregister_callback(PC_PHYSMEM); 9093253Smec } 9103253Smec } else { 9113253Smec ret = EBUSY; 9123253Smec } 9133253Smec mutex_exit(&physmem_mutex); 9143253Smec if (ret == DDI_SUCCESS) 9153253Smec ddi_remove_minor_node(dip, ddi_get_name(dip)); 9163253Smec return (ret); 9173253Smec } 9183253Smec 9193253Smec static struct cb_ops physmem_cb_ops = { 9203253Smec physmem_open, /* open */ 9213253Smec physmem_close, /* close */ 9223253Smec nodev, /* strategy */ 9233253Smec nodev, /* print */ 9243253Smec nodev, /* dump */ 9253253Smec nodev, /* read */ 9263253Smec nodev, /* write */ 9273253Smec physmem_ioctl, /* ioctl */ 9283253Smec nodev, /* devmap */ 9293253Smec nodev, /* mmap */ 9303253Smec nodev, /* segmap */ 9313253Smec nochpoll, /* chpoll */ 9323253Smec ddi_prop_op, /* prop_op */ 9333253Smec NULL, /* cb_str */ 9343253Smec D_NEW | D_MP | D_DEVMAP, 9353253Smec CB_REV, 9363253Smec NULL, 9373253Smec NULL 9383253Smec }; 9393253Smec 9403253Smec static struct dev_ops physmem_ops = { 9413253Smec DEVO_REV, 9423253Smec 0, 9433253Smec physmem_getinfo, 9443253Smec nulldev, 9453253Smec nulldev, 9463253Smec physmem_attach, 9473253Smec physmem_detach, 9483253Smec nodev, 9493253Smec &physmem_cb_ops, 9503253Smec NULL, 9513253Smec NULL 9523253Smec }; 9533253Smec 9543253Smec static struct modldrv modldrv = { 9553253Smec &mod_driverops, 9563253Smec "physmem driver %I%", 9573253Smec &physmem_ops 9583253Smec }; 9593253Smec 9603253Smec static struct modlinkage modlinkage = { 9613253Smec MODREV_1, 9623253Smec &modldrv, 9633253Smec NULL 9643253Smec }; 9653253Smec 9663253Smec int 9673253Smec _init(void) 9683253Smec { 9693253Smec return (mod_install(&modlinkage)); 9703253Smec } 9713253Smec 9723253Smec int 9733253Smec _info(struct modinfo *modinfop) 9743253Smec { 9753253Smec return (mod_info(&modlinkage, modinfop)); 9763253Smec } 9773253Smec 9783253Smec int 9793253Smec _fini(void) 9803253Smec { 9813253Smec return (mod_remove(&modlinkage)); 9823253Smec } 983