1*a83cdbecSriastradh /* $NetBSD: uvm_bio.c,v 1.128 2023/04/09 09:00:56 riastradh Exp $ */
2aeda8d3bSchs
3aeda8d3bSchs /*
4aeda8d3bSchs * Copyright (c) 1998 Chuck Silvers.
5aeda8d3bSchs * All rights reserved.
6aeda8d3bSchs *
7aeda8d3bSchs * Redistribution and use in source and binary forms, with or without
8aeda8d3bSchs * modification, are permitted provided that the following conditions
9aeda8d3bSchs * are met:
10aeda8d3bSchs * 1. Redistributions of source code must retain the above copyright
11aeda8d3bSchs * notice, this list of conditions and the following disclaimer.
12aeda8d3bSchs * 2. Redistributions in binary form must reproduce the above copyright
13aeda8d3bSchs * notice, this list of conditions and the following disclaimer in the
14aeda8d3bSchs * documentation and/or other materials provided with the distribution.
15aeda8d3bSchs * 3. The name of the author may not be used to endorse or promote products
16aeda8d3bSchs * derived from this software without specific prior written permission.
17aeda8d3bSchs *
18aeda8d3bSchs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19aeda8d3bSchs * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20aeda8d3bSchs * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21aeda8d3bSchs * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22aeda8d3bSchs * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23aeda8d3bSchs * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24aeda8d3bSchs * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25aeda8d3bSchs * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26aeda8d3bSchs * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27aeda8d3bSchs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28aeda8d3bSchs * SUCH DAMAGE.
29aeda8d3bSchs *
30aeda8d3bSchs */
31aeda8d3bSchs
32aeda8d3bSchs /*
338975a085Schs * uvm_bio.c: buffered i/o object mapping cache
34aeda8d3bSchs */
35aeda8d3bSchs
36b616d1caSlukem #include <sys/cdefs.h>
37*a83cdbecSriastradh __KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.128 2023/04/09 09:00:56 riastradh Exp $");
38b616d1caSlukem
39b616d1caSlukem #include "opt_uvmhist.h"
4004a8266eSyamt #include "opt_ubc.h"
41aeda8d3bSchs
42aeda8d3bSchs #include <sys/param.h>
43aeda8d3bSchs #include <sys/systm.h>
44e071d39cSad #include <sys/kmem.h>
45aeda8d3bSchs #include <sys/kernel.h>
464688843dSad #include <sys/proc.h>
47688f1f85Ssimonb #include <sys/sysctl.h>
48e0fc658aSpooka #include <sys/vnode.h>
49e4118f41Sjdolecek #include <sys/bitops.h> /* for ilog2() */
50aeda8d3bSchs
51aeda8d3bSchs #include <uvm/uvm.h>
5259596445Sad #include <uvm/uvm_pdpolicy.h>
53aeda8d3bSchs
546a369df7Sjdolecek #ifdef PMAP_DIRECT
556a369df7Sjdolecek # define UBC_USE_PMAP_DIRECT
566a369df7Sjdolecek #endif
57aeda8d3bSchs
58aeda8d3bSchs /*
59aeda8d3bSchs * local functions
60aeda8d3bSchs */
61aeda8d3bSchs
62e569faccSthorpej static int ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
632d1a0b57Sdrochner int, int, vm_prot_t, int);
64e569faccSthorpej static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t);
65688f1f85Ssimonb static int ubchash_stats(struct hashstat_sysctl *hs, bool fill);
666a369df7Sjdolecek #ifdef UBC_USE_PMAP_DIRECT
676a369df7Sjdolecek static int __noinline ubc_uiomove_direct(struct uvm_object *, struct uio *, vsize_t,
686a369df7Sjdolecek int, int);
696a369df7Sjdolecek static void __noinline ubc_zerorange_direct(struct uvm_object *, off_t, size_t, int);
706a369df7Sjdolecek
7111d79438Sthorpej /* XXX disabled by default until the kinks are worked out. */
7211d79438Sthorpej bool ubc_direct = false;
736a369df7Sjdolecek #endif
74aeda8d3bSchs
75aeda8d3bSchs /*
766c0e29fbSandvar * local data structures
77aeda8d3bSchs */
78aeda8d3bSchs
7964c6d1d2Schs #define UBC_HASH(uobj, offset) \
8064c6d1d2Schs (((((u_long)(uobj)) >> 8) + (((u_long)(offset)) >> PAGE_SHIFT)) & \
81aeda8d3bSchs ubc_object.hashmask)
82aeda8d3bSchs
8364c6d1d2Schs #define UBC_QUEUE(offset) \
8464c6d1d2Schs (&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) & \
85aeda8d3bSchs (UBC_NQUEUES - 1)])
86aeda8d3bSchs
8764c6d1d2Schs #define UBC_UMAP_ADDR(u) \
8864c6d1d2Schs (vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift))
8964c6d1d2Schs
9064c6d1d2Schs
9164c6d1d2Schs #define UMAP_PAGES_LOCKED 0x0001
9264c6d1d2Schs #define UMAP_MAPPING_CACHED 0x0002
9364c6d1d2Schs
94e225b7bdSrmind struct ubc_map {
95aeda8d3bSchs struct uvm_object * uobj; /* mapped object */
96aeda8d3bSchs voff_t offset; /* offset into uobj */
978975a085Schs voff_t writeoff; /* write offset */
988975a085Schs vsize_t writelen; /* write len */
9964c6d1d2Schs int refcount; /* refcount on mapping */
10064c6d1d2Schs int flags; /* extra state */
10122161687Syamt int advice;
102aeda8d3bSchs
103aeda8d3bSchs LIST_ENTRY(ubc_map) hash; /* hash table */
104aeda8d3bSchs TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */
105e225b7bdSrmind LIST_ENTRY(ubc_map) list; /* per-object list */
106aeda8d3bSchs };
107aeda8d3bSchs
10842253a31Smatt TAILQ_HEAD(ubc_inactive_head, ubc_map);
109e225b7bdSrmind static struct ubc_object {
110aeda8d3bSchs struct uvm_object uobj; /* glue for uvm_map() */
111aeda8d3bSchs char *kva; /* where ubc_object is mapped */
112aeda8d3bSchs struct ubc_map *umap; /* array of ubc_map's */
113aeda8d3bSchs
114aeda8d3bSchs LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */
115aeda8d3bSchs u_long hashmask; /* mask for hashtable */
116aeda8d3bSchs
11742253a31Smatt struct ubc_inactive_head *inactive;
118aeda8d3bSchs /* inactive queues for ubc_map's */
119aeda8d3bSchs } ubc_object;
120aeda8d3bSchs
121e8abff70Syamt const struct uvm_pagerops ubc_pager = {
122aa6004daSchristos .pgo_fault = ubc_fault,
123aeda8d3bSchs /* ... rest are NULL */
124aeda8d3bSchs };
125aeda8d3bSchs
126e4118f41Sjdolecek /* Use value at least as big as maximum page size supported by architecture */
127e4118f41Sjdolecek #define UBC_MAX_WINSHIFT \
128e4118f41Sjdolecek ((1 << UBC_WINSHIFT) > MAX_PAGE_SIZE ? UBC_WINSHIFT : ilog2(MAX_PAGE_SIZE))
129e4118f41Sjdolecek
130aeda8d3bSchs int ubc_nwins = UBC_NWINS;
131e4118f41Sjdolecek const int ubc_winshift = UBC_MAX_WINSHIFT;
132e4118f41Sjdolecek const int ubc_winsize = 1 << UBC_MAX_WINSHIFT;
1332a493af5Sthorpej #if defined(PMAP_PREFER)
134aeda8d3bSchs int ubc_nqueues;
135aeda8d3bSchs #define UBC_NQUEUES ubc_nqueues
136aeda8d3bSchs #else
137aeda8d3bSchs #define UBC_NQUEUES 1
138aeda8d3bSchs #endif
139aeda8d3bSchs
14004a8266eSyamt #if defined(UBC_STATS)
14104a8266eSyamt
14204a8266eSyamt #define UBC_EVCNT_DEFINE(name) \
14304a8266eSyamt struct evcnt ubc_evcnt_##name = \
14404a8266eSyamt EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "ubc", #name); \
14504a8266eSyamt EVCNT_ATTACH_STATIC(ubc_evcnt_##name);
14604a8266eSyamt #define UBC_EVCNT_INCR(name) ubc_evcnt_##name.ev_count++
14704a8266eSyamt
14804a8266eSyamt #else /* defined(UBC_STATS) */
14904a8266eSyamt
15004a8266eSyamt #define UBC_EVCNT_DEFINE(name) /* nothing */
15104a8266eSyamt #define UBC_EVCNT_INCR(name) /* nothing */
15204a8266eSyamt
15304a8266eSyamt #endif /* defined(UBC_STATS) */
15404a8266eSyamt
15504a8266eSyamt UBC_EVCNT_DEFINE(wincachehit)
UBC_EVCNT_DEFINE(wincachemiss)15604a8266eSyamt UBC_EVCNT_DEFINE(wincachemiss)
157e332a342Syamt UBC_EVCNT_DEFINE(faultbusy)
15804a8266eSyamt
159aeda8d3bSchs /*
160aeda8d3bSchs * ubc_init
161aeda8d3bSchs *
162aeda8d3bSchs * init pager private data structures.
163aeda8d3bSchs */
164aeda8d3bSchs
165aeda8d3bSchs void
166aeda8d3bSchs ubc_init(void)
167aeda8d3bSchs {
168aeda8d3bSchs /*
16985ded670Ssimonb * Make sure ubc_winshift is sane.
17085ded670Ssimonb */
171e4118f41Sjdolecek KASSERT(ubc_winshift >= PAGE_SHIFT);
17285ded670Ssimonb
17385ded670Ssimonb /*
174aeda8d3bSchs * init ubc_object.
175aeda8d3bSchs * alloc and init ubc_map's.
176aeda8d3bSchs * init inactive queues.
177aeda8d3bSchs * alloc and init hashtable.
178aeda8d3bSchs * map in ubc_object.
179aeda8d3bSchs */
180aeda8d3bSchs
181e225b7bdSrmind uvm_obj_init(&ubc_object.uobj, &ubc_pager, true, UVM_OBJ_KERN);
182aeda8d3bSchs
183e071d39cSad ubc_object.umap = kmem_zalloc(ubc_nwins * sizeof(struct ubc_map),
184e071d39cSad KM_SLEEP);
185dda0d509Senami if (ubc_object.umap == NULL)
186dda0d509Senami panic("ubc_init: failed to allocate ubc_map");
187aeda8d3bSchs
1886a369df7Sjdolecek vaddr_t va = (vaddr_t)1L;
189aeda8d3bSchs #ifdef PMAP_PREFER
190a0769578Satatat PMAP_PREFER(0, &va, 0, 0); /* kernel is never topdown */
191c40daf0aSchs ubc_nqueues = va >> ubc_winshift;
192c40daf0aSchs if (ubc_nqueues == 0) {
193c40daf0aSchs ubc_nqueues = 1;
194aeda8d3bSchs }
195aeda8d3bSchs #endif
196e071d39cSad ubc_object.inactive = kmem_alloc(UBC_NQUEUES *
197e071d39cSad sizeof(struct ubc_inactive_head), KM_SLEEP);
1986a369df7Sjdolecek for (int i = 0; i < UBC_NQUEUES; i++) {
199aeda8d3bSchs TAILQ_INIT(&ubc_object.inactive[i]);
200aeda8d3bSchs }
2016a369df7Sjdolecek for (int i = 0; i < ubc_nwins; i++) {
2026a369df7Sjdolecek struct ubc_map *umap;
203aeda8d3bSchs umap = &ubc_object.umap[i];
204aeda8d3bSchs TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
205aeda8d3bSchs umap, inactive);
206aeda8d3bSchs }
207aeda8d3bSchs
208e071d39cSad ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true,
209aeda8d3bSchs &ubc_object.hashmask);
2106a369df7Sjdolecek for (int i = 0; i <= ubc_object.hashmask; i++) {
211aeda8d3bSchs LIST_INIT(&ubc_object.hash[i]);
212aeda8d3bSchs }
213aeda8d3bSchs
214aeda8d3bSchs if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
215c40daf0aSchs ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va,
216d4848b04Smaxv UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
217ac3bc537Schs UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
2180f09ed48Sprovos panic("ubc_init: failed to map ubc_object");
219aeda8d3bSchs }
220688f1f85Ssimonb
221688f1f85Ssimonb hashstat_register("ubchash", ubchash_stats);
2224fa7e602Sriastradh }
2234fa7e602Sriastradh
2244fa7e602Sriastradh void
ubchist_init(void)2254fa7e602Sriastradh ubchist_init(void)
2264fa7e602Sriastradh {
2274fa7e602Sriastradh
228aeda8d3bSchs UVMHIST_INIT(ubchist, 300);
229aeda8d3bSchs }
230aeda8d3bSchs
231aeda8d3bSchs /*
2327bf5bfa1Srmind * ubc_fault_page: helper of ubc_fault to handle a single page.
2333e68c949Srmind *
2343e68c949Srmind * => Caller has UVM object locked.
235e225b7bdSrmind * => Caller will perform pmap_update().
2367bf5bfa1Srmind */
2377bf5bfa1Srmind
2383e68c949Srmind static inline int
ubc_fault_page(const struct uvm_faultinfo * ufi,const struct ubc_map * umap,struct vm_page * pg,vm_prot_t prot,vm_prot_t access_type,vaddr_t va)2397bf5bfa1Srmind ubc_fault_page(const struct uvm_faultinfo *ufi, const struct ubc_map *umap,
2407bf5bfa1Srmind struct vm_page *pg, vm_prot_t prot, vm_prot_t access_type, vaddr_t va)
2417bf5bfa1Srmind {
24227b5f917Srin vm_prot_t mask;
2437bf5bfa1Srmind int error;
24427b5f917Srin bool rdonly;
2457bf5bfa1Srmind
246d2a0ebb6Sad KASSERT(rw_write_held(pg->uobject->vmobjlock));
2473e68c949Srmind
2487bf5bfa1Srmind KASSERT((pg->flags & PG_FAKE) == 0);
2497bf5bfa1Srmind if (pg->flags & PG_RELEASED) {
2507bf5bfa1Srmind uvm_pagefree(pg);
2513e68c949Srmind return 0;
2527bf5bfa1Srmind }
2537bf5bfa1Srmind if (pg->loan_count != 0) {
2547bf5bfa1Srmind
2557bf5bfa1Srmind /*
2567bf5bfa1Srmind * Avoid unneeded loan break, if possible.
2577bf5bfa1Srmind */
2587bf5bfa1Srmind
2597bf5bfa1Srmind if ((access_type & VM_PROT_WRITE) == 0) {
2607bf5bfa1Srmind prot &= ~VM_PROT_WRITE;
2617bf5bfa1Srmind }
2627bf5bfa1Srmind if (prot & VM_PROT_WRITE) {
2637bf5bfa1Srmind struct vm_page *newpg;
2647bf5bfa1Srmind
2657bf5bfa1Srmind newpg = uvm_loanbreak(pg);
2667bf5bfa1Srmind if (newpg == NULL) {
2677bf5bfa1Srmind uvm_page_unbusy(&pg, 1);
2683e68c949Srmind return ENOMEM;
2697bf5bfa1Srmind }
2707bf5bfa1Srmind pg = newpg;
2717bf5bfa1Srmind }
2727bf5bfa1Srmind }
2737bf5bfa1Srmind
2747bf5bfa1Srmind /*
2757bf5bfa1Srmind * Note that a page whose backing store is partially allocated
2767bf5bfa1Srmind * is marked as PG_RDONLY.
27705a3457eSad *
27805a3457eSad * it's a responsibility of ubc_alloc's caller to allocate backing
27905a3457eSad * blocks before writing to the window.
2807bf5bfa1Srmind */
2817bf5bfa1Srmind
2827bf5bfa1Srmind KASSERT((pg->flags & PG_RDONLY) == 0 ||
2837bf5bfa1Srmind (access_type & VM_PROT_WRITE) == 0 ||
2847bf5bfa1Srmind pg->offset < umap->writeoff ||
2857bf5bfa1Srmind pg->offset + PAGE_SIZE > umap->writeoff + umap->writelen);
2867bf5bfa1Srmind
28727b5f917Srin rdonly = uvm_pagereadonly_p(pg);
28827b5f917Srin mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL;
2897bf5bfa1Srmind
2907bf5bfa1Srmind error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
29127b5f917Srin prot & mask, PMAP_CANFAIL | (access_type & mask));
2927bf5bfa1Srmind
29394843b13Sad uvm_pagelock(pg);
2947bf5bfa1Srmind uvm_pageactivate(pg);
2951912643fSad uvm_pagewakeup(pg);
29694843b13Sad uvm_pageunlock(pg);
2971912643fSad pg->flags &= ~PG_BUSY;
2981912643fSad UVM_PAGE_OWN(pg, NULL);
2997bf5bfa1Srmind
3003e68c949Srmind return error;
3017bf5bfa1Srmind }
3027bf5bfa1Srmind
3037bf5bfa1Srmind /*
304aeda8d3bSchs * ubc_fault: fault routine for ubc mapping
305aeda8d3bSchs */
30664c6d1d2Schs
307e569faccSthorpej static int
ubc_fault(struct uvm_faultinfo * ufi,vaddr_t ign1,struct vm_page ** ign2,int ign3,int ign4,vm_prot_t access_type,int flags)3081a7bc55dSyamt ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
3091a7bc55dSyamt int ign3, int ign4, vm_prot_t access_type, int flags)
310aeda8d3bSchs {
311aeda8d3bSchs struct uvm_object *uobj;
312aeda8d3bSchs struct ubc_map *umap;
313aeda8d3bSchs vaddr_t va, eva, ubc_offset, slot_offset;
314e4118f41Sjdolecek struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
31564c6d1d2Schs int i, error, npages;
316b263a7ebSchs vm_prot_t prot;
3177bf5bfa1Srmind
318f3bd60e2Sskrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
319aeda8d3bSchs
320aeda8d3bSchs /*
321aeda8d3bSchs * no need to try with PGO_LOCKED...
322aeda8d3bSchs * we don't need to have the map locked since we know that
323aeda8d3bSchs * no one will mess with it until our reference is released.
324aeda8d3bSchs */
32564c6d1d2Schs
326aeda8d3bSchs if (flags & PGO_LOCKED) {
327e225b7bdSrmind uvmfault_unlockall(ufi, NULL, &ubc_object.uobj);
328aeda8d3bSchs flags &= ~PGO_LOCKED;
329aeda8d3bSchs }
330aeda8d3bSchs
331aeda8d3bSchs va = ufi->orig_rvaddr;
332aeda8d3bSchs ubc_offset = va - (vaddr_t)ubc_object.kva;
333c40daf0aSchs umap = &ubc_object.umap[ubc_offset >> ubc_winshift];
334aeda8d3bSchs KASSERT(umap->refcount != 0);
3354ba42a71Syamt KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0);
33664c6d1d2Schs slot_offset = ubc_offset & (ubc_winsize - 1);
337aeda8d3bSchs
338a877481dSchs /*
339a877481dSchs * some platforms cannot write to individual bytes atomically, so
340a877481dSchs * software has to do read/modify/write of larger quantities instead.
341a877481dSchs * this means that the access_type for "write" operations
342a877481dSchs * can be VM_PROT_READ, which confuses us mightily.
343a877481dSchs *
344a877481dSchs * deal with this by resetting access_type based on the info
345a877481dSchs * that ubc_alloc() stores for us.
346a877481dSchs */
347a877481dSchs
348a877481dSchs access_type = umap->writelen ? VM_PROT_WRITE : VM_PROT_READ;
349e9de1129Sskrll UVMHIST_LOG(ubchist, "va %#jx ubc_offset %#jx access_type %jd",
350a877481dSchs va, ubc_offset, access_type, 0);
351a877481dSchs
3528975a085Schs if ((access_type & VM_PROT_WRITE) != 0) {
3536c932ac0Skre #ifndef PRIxOFF /* XXX */
354cd91c66bSkre #define PRIxOFF "jx" /* XXX */
3556c932ac0Skre #endif /* XXX */
356291c0f69Sriastradh KASSERTMSG((trunc_page(umap->writeoff) <= slot_offset),
3576c932ac0Skre "out of range write: slot=%#"PRIxVSIZE" off=%#"PRIxOFF,
358cd91c66bSkre slot_offset, (intmax_t)umap->writeoff);
359291c0f69Sriastradh KASSERTMSG((slot_offset < umap->writeoff + umap->writelen),
3606c932ac0Skre "out of range write: slot=%#"PRIxVADDR
3616c932ac0Skre " off=%#"PRIxOFF" len=%#"PRIxVSIZE,
36220fd1f7cSozaki-r slot_offset, (intmax_t)umap->writeoff, umap->writelen);
3638975a085Schs }
3648975a085Schs
365aeda8d3bSchs /* no umap locking needed since we have a ref on the umap */
366aeda8d3bSchs uobj = umap->uobj;
367aeda8d3bSchs
3688975a085Schs if ((access_type & VM_PROT_WRITE) == 0) {
3698975a085Schs npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
3708975a085Schs } else {
3718975a085Schs npages = (round_page(umap->offset + umap->writeoff +
3728975a085Schs umap->writelen) - (umap->offset + slot_offset))
3738975a085Schs >> PAGE_SHIFT;
3748975a085Schs flags |= PGO_PASTEOF;
3758975a085Schs }
376aeda8d3bSchs
377aeda8d3bSchs again:
378aeda8d3bSchs memset(pgs, 0, sizeof (pgs));
379d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
380aeda8d3bSchs
381e9de1129Sskrll UVMHIST_LOG(ubchist, "slot_offset %#jx writeoff %#jx writelen %#jx ",
3828975a085Schs slot_offset, umap->writeoff, umap->writelen, 0);
383e9de1129Sskrll UVMHIST_LOG(ubchist, "getpages uobj %#jx offset %#jx npages %jd",
384cb32a134Spgoyette (uintptr_t)uobj, umap->offset + slot_offset, npages, 0);
385aeda8d3bSchs
3868975a085Schs error = (*uobj->pgops->pgo_get)(uobj, umap->offset + slot_offset, pgs,
38722161687Syamt &npages, 0, access_type, umap->advice, flags | PGO_NOBLOCKALLOC |
388b7bfe828Syamt PGO_NOTIMESTAMP);
389cb32a134Spgoyette UVMHIST_LOG(ubchist, "getpages error %jd npages %jd", error, npages, 0,
390fbaba2a9Ssimonb 0);
391aeda8d3bSchs
392de569051Schs if (error == EAGAIN) {
3933e68c949Srmind kpause("ubc_fault", false, hz >> 2, NULL);
394aeda8d3bSchs goto again;
395aeda8d3bSchs }
396de569051Schs if (error) {
39719accb3dSchs return error;
398de569051Schs }
399aeda8d3bSchs
400b263a7ebSchs /*
4017bf5bfa1Srmind * For virtually-indexed, virtually-tagged caches we should avoid
4027bf5bfa1Srmind * creating writable mappings when we do not absolutely need them,
4037bf5bfa1Srmind * since the "compatible alias" trick does not work on such caches.
4047bf5bfa1Srmind * Otherwise, we can always map the pages writable.
405b263a7ebSchs */
406b263a7ebSchs
407b263a7ebSchs #ifdef PMAP_CACHE_VIVT
408b263a7ebSchs prot = VM_PROT_READ | access_type;
409b263a7ebSchs #else
410b263a7ebSchs prot = VM_PROT_READ | VM_PROT_WRITE;
411b263a7ebSchs #endif
4123e68c949Srmind
4137bf5bfa1Srmind va = ufi->orig_rvaddr;
4147bf5bfa1Srmind eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
4157bf5bfa1Srmind
416e9de1129Sskrll UVMHIST_LOG(ubchist, "va %#jx eva %#jx", va, eva, 0, 0);
417e225b7bdSrmind
418e225b7bdSrmind /*
419e225b7bdSrmind * Note: normally all returned pages would have the same UVM object.
420e225b7bdSrmind * However, layered file-systems and e.g. tmpfs, may return pages
421e225b7bdSrmind * which belong to underlying UVM object. In such case, lock is
422e225b7bdSrmind * shared amongst the objects.
423e225b7bdSrmind */
424d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
4257bf5bfa1Srmind for (i = 0; va < eva; i++, va += PAGE_SIZE) {
4267bf5bfa1Srmind struct vm_page *pg;
4277bf5bfa1Srmind
428cb32a134Spgoyette UVMHIST_LOG(ubchist, "pgs[%jd] = %#jx", i, (uintptr_t)pgs[i],
429cb32a134Spgoyette 0, 0);
430aeda8d3bSchs pg = pgs[i];
431aeda8d3bSchs
432aeda8d3bSchs if (pg == NULL || pg == PGO_DONTCARE) {
433aeda8d3bSchs continue;
434aeda8d3bSchs }
435e225b7bdSrmind KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
4363e68c949Srmind error = ubc_fault_page(ufi, umap, pg, prot, access_type, va);
4373e68c949Srmind if (error) {
4383e68c949Srmind /*
4393e68c949Srmind * Flush (there might be pages entered), drop the lock,
440e225b7bdSrmind * and perform uvm_wait(). Note: page will re-fault.
4413e68c949Srmind */
4423e68c949Srmind pmap_update(ufi->orig_map->pmap);
443d2a0ebb6Sad rw_exit(uobj->vmobjlock);
4443e68c949Srmind uvm_wait("ubc_fault");
445d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
4463e68c949Srmind }
4473e68c949Srmind }
448e225b7bdSrmind /* Must make VA visible before the unlock. */
4493e68c949Srmind pmap_update(ufi->orig_map->pmap);
450d2a0ebb6Sad rw_exit(uobj->vmobjlock);
451e225b7bdSrmind
452dd82ad8eSchs return 0;
453aeda8d3bSchs }
454aeda8d3bSchs
455aeda8d3bSchs /*
456aeda8d3bSchs * local functions
457aeda8d3bSchs */
458aeda8d3bSchs
459e569faccSthorpej static struct ubc_map *
ubc_find_mapping(struct uvm_object * uobj,voff_t offset)460e569faccSthorpej ubc_find_mapping(struct uvm_object *uobj, voff_t offset)
461aeda8d3bSchs {
462aeda8d3bSchs struct ubc_map *umap;
463aeda8d3bSchs
464aeda8d3bSchs LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
465aeda8d3bSchs if (umap->uobj == uobj && umap->offset == offset) {
466aeda8d3bSchs return umap;
467aeda8d3bSchs }
468aeda8d3bSchs }
469aeda8d3bSchs return NULL;
470aeda8d3bSchs }
471aeda8d3bSchs
472aeda8d3bSchs
473aeda8d3bSchs /*
474aeda8d3bSchs * ubc interface functions
475aeda8d3bSchs */
476aeda8d3bSchs
477aeda8d3bSchs /*
47864c6d1d2Schs * ubc_alloc: allocate a file mapping window
479aeda8d3bSchs */
48064c6d1d2Schs
481b6ce67bcSjdolecek static void * __noinline
ubc_alloc(struct uvm_object * uobj,voff_t offset,vsize_t * lenp,int advice,int flags,struct vm_page ** pgs,int * npagesp)48222161687Syamt ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
48359596445Sad int flags, struct vm_page **pgs, int *npagesp)
484aeda8d3bSchs {
485910b4f2eSchs vaddr_t slot_offset, va;
486aeda8d3bSchs struct ubc_map *umap;
487910b4f2eSchs voff_t umap_offset;
48864c6d1d2Schs int error;
489f3bd60e2Sskrll UVMHIST_FUNC(__func__);
490e9de1129Sskrll UVMHIST_CALLARGS(ubchist, "uobj %#jx offset %#jx len %#jx",
491cb32a134Spgoyette (uintptr_t)uobj, offset, *lenp, 0);
492aeda8d3bSchs
493a877481dSchs KASSERT(*lenp > 0);
494910b4f2eSchs umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
4954e59adc1Senami slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
49664c6d1d2Schs *lenp = MIN(*lenp, ubc_winsize - slot_offset);
49759596445Sad KASSERT(*lenp > 0);
498aeda8d3bSchs
499d2a0ebb6Sad rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
500aeda8d3bSchs again:
5017a15ad24Srmind /*
5027a15ad24Srmind * The UVM object is already referenced.
5037a15ad24Srmind * Lock order: UBC object -> ubc_map::uobj.
5047a15ad24Srmind */
505aeda8d3bSchs umap = ubc_find_mapping(uobj, umap_offset);
506aeda8d3bSchs if (umap == NULL) {
507e225b7bdSrmind struct uvm_object *oobj;
508e225b7bdSrmind
50904a8266eSyamt UBC_EVCNT_INCR(wincachemiss);
510aeda8d3bSchs umap = TAILQ_FIRST(UBC_QUEUE(offset));
511aeda8d3bSchs if (umap == NULL) {
512d2a0ebb6Sad rw_exit(ubc_object.uobj.vmobjlock);
513d2a0ebb6Sad kpause("ubc_alloc", false, hz >> 2, NULL);
514d2a0ebb6Sad rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
515aeda8d3bSchs goto again;
516aeda8d3bSchs }
517aeda8d3bSchs
518e225b7bdSrmind va = UBC_UMAP_ADDR(umap);
519e225b7bdSrmind oobj = umap->uobj;
520e225b7bdSrmind
521aeda8d3bSchs /*
5227a15ad24Srmind * Remove from old hash (if any), add to new hash.
523aeda8d3bSchs */
524aeda8d3bSchs
525e225b7bdSrmind if (oobj != NULL) {
5267a15ad24Srmind /*
5277a15ad24Srmind * Mapping must be removed before the list entry,
5287a15ad24Srmind * since there is a race with ubc_purge().
5297a15ad24Srmind */
530e225b7bdSrmind if (umap->flags & UMAP_MAPPING_CACHED) {
531e225b7bdSrmind umap->flags &= ~UMAP_MAPPING_CACHED;
532d2a0ebb6Sad rw_enter(oobj->vmobjlock, RW_WRITER);
533e225b7bdSrmind pmap_remove(pmap_kernel(), va,
534e225b7bdSrmind va + ubc_winsize);
535e225b7bdSrmind pmap_update(pmap_kernel());
536d2a0ebb6Sad rw_exit(oobj->vmobjlock);
537e225b7bdSrmind }
538772633beShannken LIST_REMOVE(umap, hash);
539772633beShannken LIST_REMOVE(umap, list);
540e225b7bdSrmind } else {
541e225b7bdSrmind KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0);
542aeda8d3bSchs }
543aeda8d3bSchs umap->uobj = uobj;
544aeda8d3bSchs umap->offset = umap_offset;
545aeda8d3bSchs LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
546aeda8d3bSchs umap, hash);
547e225b7bdSrmind LIST_INSERT_HEAD(&uobj->uo_ubc, umap, list);
54864c6d1d2Schs } else {
54904a8266eSyamt UBC_EVCNT_INCR(wincachehit);
55064c6d1d2Schs va = UBC_UMAP_ADDR(umap);
55164c6d1d2Schs }
552aeda8d3bSchs
553aeda8d3bSchs if (umap->refcount == 0) {
554aeda8d3bSchs TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
555aeda8d3bSchs }
556aeda8d3bSchs
557aeda8d3bSchs if (flags & UBC_WRITE) {
558*a83cdbecSriastradh KASSERTMSG(umap->writeoff == 0,
559*a83cdbecSriastradh "ubc_alloc: concurrent writes to uobj %p", uobj);
560*a83cdbecSriastradh KASSERTMSG(umap->writelen == 0,
561325494feSjym "ubc_alloc: concurrent writes to uobj %p", uobj);
562aeda8d3bSchs umap->writeoff = slot_offset;
563aeda8d3bSchs umap->writelen = *lenp;
564aeda8d3bSchs }
565aeda8d3bSchs
566aeda8d3bSchs umap->refcount++;
56722161687Syamt umap->advice = advice;
568d2a0ebb6Sad rw_exit(ubc_object.uobj.vmobjlock);
569e9de1129Sskrll UVMHIST_LOG(ubchist, "umap %#jx refs %jd va %#jx flags %#jx",
570cb32a134Spgoyette (uintptr_t)umap, umap->refcount, (uintptr_t)va, flags);
571aeda8d3bSchs
57264c6d1d2Schs if (flags & UBC_FAULTBUSY) {
57359596445Sad int npages = (*lenp + (offset & (PAGE_SIZE - 1)) +
57459596445Sad PAGE_SIZE - 1) >> PAGE_SHIFT;
5752a6dc9d0Syamt int gpflags =
576b7bfe828Syamt PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF|PGO_NOBLOCKALLOC|
577b7bfe828Syamt PGO_NOTIMESTAMP;
57864c6d1d2Schs int i;
5795dc123ecSdbj KDASSERT(flags & UBC_WRITE);
58059596445Sad KASSERT(npages <= *npagesp);
581e332a342Syamt KASSERT(umap->refcount == 1);
58264c6d1d2Schs
583e332a342Syamt UBC_EVCNT_INCR(faultbusy);
584e225b7bdSrmind again_faultbusy:
585d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
58664c6d1d2Schs if (umap->flags & UMAP_MAPPING_CACHED) {
58764c6d1d2Schs umap->flags &= ~UMAP_MAPPING_CACHED;
58864c6d1d2Schs pmap_remove(pmap_kernel(), va, va + ubc_winsize);
58964c6d1d2Schs }
59059596445Sad memset(pgs, 0, *npagesp * sizeof(pgs[0]));
591e225b7bdSrmind
5928975a085Schs error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
59322161687Syamt &npages, 0, VM_PROT_READ | VM_PROT_WRITE, advice, gpflags);
594cb32a134Spgoyette UVMHIST_LOG(ubchist, "faultbusy getpages %jd", error, 0, 0, 0);
59564c6d1d2Schs if (error) {
596657547e6Srmind /*
597657547e6Srmind * Flush: the mapping above might have been removed.
598657547e6Srmind */
599657547e6Srmind pmap_update(pmap_kernel());
60064c6d1d2Schs goto out;
60164c6d1d2Schs }
60264c6d1d2Schs for (i = 0; i < npages; i++) {
603670c065bSyamt struct vm_page *pg = pgs[i];
604670c065bSyamt
605670c065bSyamt KASSERT(pg->uobject == uobj);
606670c065bSyamt if (pg->loan_count != 0) {
607d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
608670c065bSyamt if (pg->loan_count != 0) {
609670c065bSyamt pg = uvm_loanbreak(pg);
610670c065bSyamt }
611670c065bSyamt if (pg == NULL) {
612670c065bSyamt pmap_kremove(va, ubc_winsize);
613670c065bSyamt pmap_update(pmap_kernel());
614670c065bSyamt uvm_page_unbusy(pgs, npages);
615d2a0ebb6Sad rw_exit(uobj->vmobjlock);
616670c065bSyamt uvm_wait("ubc_alloc");
617670c065bSyamt goto again_faultbusy;
618670c065bSyamt }
619d2a0ebb6Sad rw_exit(uobj->vmobjlock);
620670c065bSyamt pgs[i] = pg;
621670c065bSyamt }
622b0317a42Srin pmap_kenter_pa(
623b0317a42Srin va + trunc_page(slot_offset) + (i << PAGE_SHIFT),
6249480c51bScegger VM_PAGE_TO_PHYS(pg),
6259480c51bScegger VM_PROT_READ | VM_PROT_WRITE, 0);
62664c6d1d2Schs }
62764c6d1d2Schs pmap_update(pmap_kernel());
62864c6d1d2Schs umap->flags |= UMAP_PAGES_LOCKED;
62959596445Sad *npagesp = npages;
630e332a342Syamt } else {
631e332a342Syamt KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0);
632aeda8d3bSchs }
633aeda8d3bSchs
63464c6d1d2Schs out:
63564c6d1d2Schs return (void *)(va + slot_offset);
63664c6d1d2Schs }
63764c6d1d2Schs
63864c6d1d2Schs /*
63964c6d1d2Schs * ubc_release: free a file mapping window.
64064c6d1d2Schs */
641aeda8d3bSchs
642b6ce67bcSjdolecek static void __noinline
ubc_release(void * va,int flags,struct vm_page ** pgs,int npages)64359596445Sad ubc_release(void *va, int flags, struct vm_page **pgs, int npages)
644aeda8d3bSchs {
645aeda8d3bSchs struct ubc_map *umap;
646aeda8d3bSchs struct uvm_object *uobj;
64764c6d1d2Schs vaddr_t umapva;
648712239e3Sthorpej bool unmapped;
649f3bd60e2Sskrll UVMHIST_FUNC(__func__);
650f3bd60e2Sskrll UVMHIST_CALLARGS(ubchist, "va %#jx", (uintptr_t)va, 0, 0, 0);
651aeda8d3bSchs
652c40daf0aSchs umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
65364c6d1d2Schs umapva = UBC_UMAP_ADDR(umap);
654aeda8d3bSchs uobj = umap->uobj;
655aeda8d3bSchs KASSERT(uobj != NULL);
656aeda8d3bSchs
65764c6d1d2Schs if (umap->flags & UMAP_PAGES_LOCKED) {
6584b0ec606Srmind const voff_t endoff = umap->writeoff + umap->writelen;
6594b0ec606Srmind const voff_t zerolen = round_page(endoff) - endoff;
66064c6d1d2Schs
66159596445Sad KASSERT(npages == (round_page(endoff) -
66259596445Sad trunc_page(umap->writeoff)) >> PAGE_SHIFT);
663e332a342Syamt KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0);
66464c6d1d2Schs if (zerolen) {
66564c6d1d2Schs memset((char *)umapva + endoff, 0, zerolen);
66664c6d1d2Schs }
66764c6d1d2Schs umap->flags &= ~UMAP_PAGES_LOCKED;
668d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
6694b0ec606Srmind for (u_int i = 0; i < npages; i++) {
67059596445Sad struct vm_page *pg = pgs[i];
67159596445Sad #ifdef DIAGNOSTIC
6724b0ec606Srmind paddr_t pa;
67359596445Sad bool rv;
67459596445Sad rv = pmap_extract(pmap_kernel(), umapva +
67559596445Sad umap->writeoff + (i << PAGE_SHIFT), &pa);
67664c6d1d2Schs KASSERT(rv);
67759596445Sad KASSERT(PHYS_TO_VM_PAGE(pa) == pg);
67859596445Sad #endif
67959596445Sad pg->flags &= ~PG_FAKE;
68059596445Sad KASSERTMSG(uvm_pagegetdirty(pg) ==
68105a3457eSad UVM_PAGE_STATUS_DIRTY,
68259596445Sad "page %p not dirty", pg);
68359596445Sad KASSERT(pg->loan_count == 0);
68459596445Sad if (uvmpdpol_pageactivate_p(pg)) {
68559596445Sad uvm_pagelock(pg);
68659596445Sad uvm_pageactivate(pg);
68759596445Sad uvm_pageunlock(pg);
68859596445Sad }
68964c6d1d2Schs }
69064c6d1d2Schs pmap_kremove(umapva, ubc_winsize);
69164c6d1d2Schs pmap_update(pmap_kernel());
69264c6d1d2Schs uvm_page_unbusy(pgs, npages);
693d2a0ebb6Sad rw_exit(uobj->vmobjlock);
694b3667adaSthorpej unmapped = true;
69564c6d1d2Schs } else {
696b3667adaSthorpej unmapped = false;
69764c6d1d2Schs }
69864c6d1d2Schs
699d2a0ebb6Sad rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
700aeda8d3bSchs umap->writeoff = 0;
701aeda8d3bSchs umap->writelen = 0;
702aeda8d3bSchs umap->refcount--;
703aeda8d3bSchs if (umap->refcount == 0) {
7048975a085Schs if (flags & UBC_UNMAP) {
705aeda8d3bSchs /*
7068975a085Schs * Invalidate any cached mappings if requested.
7078975a085Schs * This is typically used to avoid leaving
7088975a085Schs * incompatible cache aliases around indefinitely.
709aeda8d3bSchs */
710d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
71164c6d1d2Schs pmap_remove(pmap_kernel(), umapva,
71264c6d1d2Schs umapva + ubc_winsize);
7130e7661f0Schris pmap_update(pmap_kernel());
714d2a0ebb6Sad rw_exit(uobj->vmobjlock);
715e225b7bdSrmind
716e225b7bdSrmind umap->flags &= ~UMAP_MAPPING_CACHED;
717aeda8d3bSchs LIST_REMOVE(umap, hash);
7187083a919Srmind LIST_REMOVE(umap, list);
719aeda8d3bSchs umap->uobj = NULL;
720aeda8d3bSchs TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
721aeda8d3bSchs inactive);
722aeda8d3bSchs } else {
72364c6d1d2Schs if (!unmapped) {
72464c6d1d2Schs umap->flags |= UMAP_MAPPING_CACHED;
72564c6d1d2Schs }
726aeda8d3bSchs TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
727aeda8d3bSchs inactive);
728aeda8d3bSchs }
729aeda8d3bSchs }
730f5635300Sskrll UVMHIST_LOG(ubchist, "umap %#jx refs %jd", (uintptr_t)umap,
731cb32a134Spgoyette umap->refcount, 0, 0);
732d2a0ebb6Sad rw_exit(ubc_object.uobj.vmobjlock);
733aeda8d3bSchs }
734aeda8d3bSchs
735da51d139Syamt /*
736da51d139Syamt * ubc_uiomove: move data to/from an object.
737da51d139Syamt */
738da51d139Syamt
739da51d139Syamt int
ubc_uiomove(struct uvm_object * uobj,struct uio * uio,vsize_t todo,int advice,int flags)7403822af70Syamt ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
7413822af70Syamt int flags)
742da51d139Syamt {
743da51d139Syamt const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
744e4118f41Sjdolecek struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
7454b0ec606Srmind voff_t off;
74659596445Sad int error, npages;
747da51d139Syamt
748da51d139Syamt KASSERT(todo <= uio->uio_resid);
749da51d139Syamt KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
750da51d139Syamt ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
751da51d139Syamt
7526a369df7Sjdolecek #ifdef UBC_USE_PMAP_DIRECT
753f3fdb8c6Sad /*
754f5ad84fdSad * during direct access pages need to be held busy to prevent them
755f5ad84fdSad * changing identity, and therefore if we read or write an object
756f5ad84fdSad * into a mapped view of same we could deadlock while faulting.
757f3fdb8c6Sad *
758f5ad84fdSad * avoid the problem by disallowing direct access if the object
759f5ad84fdSad * might be visible somewhere via mmap().
760504f478aSad *
761504f478aSad * XXX concurrent reads cause thundering herd issues with PG_BUSY.
762504f478aSad * In the future enable by default for writes or if ncpu<=2, and
763504f478aSad * make the toggle override that.
764f3fdb8c6Sad */
765504f478aSad if ((ubc_direct && (flags & UBC_ISMAPPED) == 0) ||
766504f478aSad (flags & UBC_FAULTBUSY) != 0) {
767f5ad84fdSad return ubc_uiomove_direct(uobj, uio, todo, advice, flags);
7686a369df7Sjdolecek }
7696a369df7Sjdolecek #endif
7706a369df7Sjdolecek
771da51d139Syamt off = uio->uio_offset;
772da51d139Syamt error = 0;
773da51d139Syamt while (todo > 0) {
774da51d139Syamt vsize_t bytelen = todo;
775da51d139Syamt void *win;
776da51d139Syamt
77759596445Sad npages = __arraycount(pgs);
77859596445Sad win = ubc_alloc(uobj, off, &bytelen, advice, flags, pgs,
77959596445Sad &npages);
780da51d139Syamt if (error == 0) {
781da51d139Syamt error = uiomove(win, bytelen, uio);
782da51d139Syamt }
783da51d139Syamt if (error != 0 && overwrite) {
784da51d139Syamt /*
785da51d139Syamt * if we haven't initialized the pages yet,
786da51d139Syamt * do it now. it's safe to use memset here
787da51d139Syamt * because we just mapped the pages above.
788da51d139Syamt */
789da51d139Syamt memset(win, 0, bytelen);
790da51d139Syamt }
79159596445Sad ubc_release(win, flags, pgs, npages);
792da51d139Syamt off += bytelen;
793da51d139Syamt todo -= bytelen;
794da51d139Syamt if (error != 0 && (flags & UBC_PARTIALOK) != 0) {
795da51d139Syamt break;
796da51d139Syamt }
797da51d139Syamt }
798da51d139Syamt
799da51d139Syamt return error;
800da51d139Syamt }
801e0fc658aSpooka
802e0fc658aSpooka /*
803d296304eShannken * ubc_zerorange: set a range of bytes in an object to zero.
804e0fc658aSpooka */
805e0fc658aSpooka
806e0fc658aSpooka void
ubc_zerorange(struct uvm_object * uobj,off_t off,size_t len,int flags)807d296304eShannken ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
808e0fc658aSpooka {
809e4118f41Sjdolecek struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
81059596445Sad int npages;
8116a369df7Sjdolecek
8126a369df7Sjdolecek #ifdef UBC_USE_PMAP_DIRECT
81359596445Sad if (ubc_direct || (flags & UBC_FAULTBUSY) != 0) {
8146a369df7Sjdolecek ubc_zerorange_direct(uobj, off, len, flags);
8156a369df7Sjdolecek return;
8166a369df7Sjdolecek }
8176a369df7Sjdolecek #endif
818e0fc658aSpooka
819e0fc658aSpooka /*
820e0fc658aSpooka * XXXUBC invent kzero() and use it
821e0fc658aSpooka */
822e0fc658aSpooka
823e0fc658aSpooka while (len) {
8246a369df7Sjdolecek void *win;
825e0fc658aSpooka vsize_t bytelen = len;
826e0fc658aSpooka
82759596445Sad npages = __arraycount(pgs);
82859596445Sad win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, UBC_WRITE,
82959596445Sad pgs, &npages);
830e0fc658aSpooka memset(win, 0, bytelen);
83159596445Sad ubc_release(win, flags, pgs, npages);
832e0fc658aSpooka
833e0fc658aSpooka off += bytelen;
834e0fc658aSpooka len -= bytelen;
835e0fc658aSpooka }
836e0fc658aSpooka }
837e225b7bdSrmind
8386a369df7Sjdolecek #ifdef UBC_USE_PMAP_DIRECT
8396a369df7Sjdolecek /* Copy data using direct map */
8406a369df7Sjdolecek
8416a369df7Sjdolecek /*
8426a369df7Sjdolecek * ubc_alloc_direct: allocate a file mapping window using direct map
8436a369df7Sjdolecek */
8446a369df7Sjdolecek static int __noinline
ubc_alloc_direct(struct uvm_object * uobj,voff_t offset,vsize_t * lenp,int advice,int flags,struct vm_page ** pgs,int * npages)8456a369df7Sjdolecek ubc_alloc_direct(struct uvm_object *uobj, voff_t offset, vsize_t *lenp,
8466a369df7Sjdolecek int advice, int flags, struct vm_page **pgs, int *npages)
8476a369df7Sjdolecek {
8486a369df7Sjdolecek voff_t pgoff;
8496a369df7Sjdolecek int error;
850812b46dfSad int gpflags = flags | PGO_NOTIMESTAMP | PGO_SYNCIO;
8516a369df7Sjdolecek int access_type = VM_PROT_READ;
852f3bd60e2Sskrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);
8536a369df7Sjdolecek
8546a369df7Sjdolecek if (flags & UBC_WRITE) {
8556a369df7Sjdolecek if (flags & UBC_FAULTBUSY)
856504f478aSad gpflags |= PGO_OVERWRITE | PGO_NOBLOCKALLOC;
8576a369df7Sjdolecek #if 0
8586a369df7Sjdolecek KASSERT(!UVM_OBJ_NEEDS_WRITEFAULT(uobj));
8596a369df7Sjdolecek #endif
8606a369df7Sjdolecek
8612386f90fSjdolecek /*
8622386f90fSjdolecek * Tell genfs_getpages() we already have the journal lock,
8632386f90fSjdolecek * allow allocation past current EOF.
8642386f90fSjdolecek */
8652386f90fSjdolecek gpflags |= PGO_JOURNALLOCKED | PGO_PASTEOF;
8666a369df7Sjdolecek access_type |= VM_PROT_WRITE;
8672386f90fSjdolecek } else {
8682386f90fSjdolecek /* Don't need the empty blocks allocated, PG_RDONLY is okay */
8692386f90fSjdolecek gpflags |= PGO_NOBLOCKALLOC;
8706a369df7Sjdolecek }
8716a369df7Sjdolecek
8726a369df7Sjdolecek pgoff = (offset & PAGE_MASK);
8736a369df7Sjdolecek *lenp = MIN(*lenp, ubc_winsize - pgoff);
8746a369df7Sjdolecek
8756a369df7Sjdolecek again:
8766a369df7Sjdolecek *npages = (*lenp + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT;
8776a369df7Sjdolecek KASSERT((*npages * PAGE_SIZE) <= ubc_winsize);
8786a369df7Sjdolecek KASSERT(*lenp + pgoff <= ubc_winsize);
8796a369df7Sjdolecek memset(pgs, 0, *npages * sizeof(pgs[0]));
8806a369df7Sjdolecek
881d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
8826a369df7Sjdolecek error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
8836a369df7Sjdolecek npages, 0, access_type, advice, gpflags);
8846a369df7Sjdolecek UVMHIST_LOG(ubchist, "alloc_direct getpages %jd", error, 0, 0, 0);
8856a369df7Sjdolecek if (error) {
8866a369df7Sjdolecek if (error == EAGAIN) {
8876a369df7Sjdolecek kpause("ubc_alloc_directg", false, hz >> 2, NULL);
8886a369df7Sjdolecek goto again;
8896a369df7Sjdolecek }
8906a369df7Sjdolecek return error;
8916a369df7Sjdolecek }
8926a369df7Sjdolecek
893d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
8946a369df7Sjdolecek for (int i = 0; i < *npages; i++) {
8956a369df7Sjdolecek struct vm_page *pg = pgs[i];
8966a369df7Sjdolecek
8976a369df7Sjdolecek KASSERT(pg != NULL);
8986a369df7Sjdolecek KASSERT(pg != PGO_DONTCARE);
8996a369df7Sjdolecek KASSERT((pg->flags & PG_FAKE) == 0 || (gpflags & PGO_OVERWRITE));
9006a369df7Sjdolecek KASSERT(pg->uobject->vmobjlock == uobj->vmobjlock);
9016a369df7Sjdolecek
9026a369df7Sjdolecek /* Avoid breaking loan if possible, only do it on write */
9036a369df7Sjdolecek if ((flags & UBC_WRITE) && pg->loan_count != 0) {
9046a369df7Sjdolecek pg = uvm_loanbreak(pg);
9056a369df7Sjdolecek if (pg == NULL) {
9066a369df7Sjdolecek uvm_page_unbusy(pgs, *npages);
907d2a0ebb6Sad rw_exit(uobj->vmobjlock);
9086a369df7Sjdolecek uvm_wait("ubc_alloc_directl");
9096a369df7Sjdolecek goto again;
9106a369df7Sjdolecek }
9116a369df7Sjdolecek pgs[i] = pg;
9126a369df7Sjdolecek }
9136a369df7Sjdolecek
9146a369df7Sjdolecek /* Page must be writable by now */
9156a369df7Sjdolecek KASSERT((pg->flags & PG_RDONLY) == 0 || (flags & UBC_WRITE) == 0);
91618391da5Sad
917504f478aSad /*
918504f478aSad * XXX For aobj pages. No managed mapping - mark the page
919504f478aSad * dirty.
920504f478aSad */
92118391da5Sad if ((flags & UBC_WRITE) != 0) {
92218391da5Sad uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
92318391da5Sad }
9246a369df7Sjdolecek }
925d2a0ebb6Sad rw_exit(uobj->vmobjlock);
9266a369df7Sjdolecek
9276a369df7Sjdolecek return 0;
9286a369df7Sjdolecek }
9296a369df7Sjdolecek
93091c2b861Sjdolecek static void __noinline
ubc_direct_release(struct uvm_object * uobj,int flags,struct vm_page ** pgs,int npages)93191c2b861Sjdolecek ubc_direct_release(struct uvm_object *uobj,
93291c2b861Sjdolecek int flags, struct vm_page **pgs, int npages)
93391c2b861Sjdolecek {
934d2a0ebb6Sad rw_enter(uobj->vmobjlock, RW_WRITER);
93591c2b861Sjdolecek for (int i = 0; i < npages; i++) {
93691c2b861Sjdolecek struct vm_page *pg = pgs[i];
93791c2b861Sjdolecek
938e4cdabc9Sad pg->flags &= ~PG_BUSY;
939e4cdabc9Sad UVM_PAGE_OWN(pg, NULL);
940e4cdabc9Sad if (pg->flags & PG_RELEASED) {
941e4cdabc9Sad pg->flags &= ~PG_RELEASED;
942e4cdabc9Sad uvm_pagefree(pg);
943e4cdabc9Sad continue;
944e4cdabc9Sad }
945504f478aSad
946504f478aSad if (uvm_pagewanted_p(pg) || uvmpdpol_pageactivate_p(pg)) {
94794843b13Sad uvm_pagelock(pg);
94891c2b861Sjdolecek uvm_pageactivate(pg);
949e4cdabc9Sad uvm_pagewakeup(pg);
95094843b13Sad uvm_pageunlock(pg);
951504f478aSad }
95291c2b861Sjdolecek
9535294ba60Sad /* Page was changed, no longer fake and neither clean. */
95405a3457eSad if (flags & UBC_WRITE) {
95505a3457eSad KASSERTMSG(uvm_pagegetdirty(pg) ==
95605a3457eSad UVM_PAGE_STATUS_DIRTY,
95705a3457eSad "page %p not dirty", pg);
95818391da5Sad pg->flags &= ~PG_FAKE;
95905a3457eSad }
96091c2b861Sjdolecek }
961d2a0ebb6Sad rw_exit(uobj->vmobjlock);
96291c2b861Sjdolecek }
96391c2b861Sjdolecek
9646a369df7Sjdolecek static int
ubc_uiomove_process(void * win,size_t len,void * arg)9656a369df7Sjdolecek ubc_uiomove_process(void *win, size_t len, void *arg)
9666a369df7Sjdolecek {
9676a369df7Sjdolecek struct uio *uio = (struct uio *)arg;
9686a369df7Sjdolecek
9696a369df7Sjdolecek return uiomove(win, len, uio);
9706a369df7Sjdolecek }
9716a369df7Sjdolecek
9726a369df7Sjdolecek static int
ubc_zerorange_process(void * win,size_t len,void * arg)9736a369df7Sjdolecek ubc_zerorange_process(void *win, size_t len, void *arg)
9746a369df7Sjdolecek {
9756a369df7Sjdolecek memset(win, 0, len);
9766a369df7Sjdolecek return 0;
9776a369df7Sjdolecek }
9786a369df7Sjdolecek
9796a369df7Sjdolecek static int __noinline
ubc_uiomove_direct(struct uvm_object * uobj,struct uio * uio,vsize_t todo,int advice,int flags)9806a369df7Sjdolecek ubc_uiomove_direct(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
9816a369df7Sjdolecek int flags)
9826a369df7Sjdolecek {
9836a369df7Sjdolecek const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
9846a369df7Sjdolecek voff_t off;
9856a369df7Sjdolecek int error, npages;
986e4118f41Sjdolecek struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
9876a369df7Sjdolecek
9886a369df7Sjdolecek KASSERT(todo <= uio->uio_resid);
9896a369df7Sjdolecek KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
9906a369df7Sjdolecek ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
9916a369df7Sjdolecek
9926a369df7Sjdolecek off = uio->uio_offset;
9936a369df7Sjdolecek error = 0;
9946a369df7Sjdolecek while (todo > 0) {
9956a369df7Sjdolecek vsize_t bytelen = todo;
9966a369df7Sjdolecek
9976a369df7Sjdolecek error = ubc_alloc_direct(uobj, off, &bytelen, advice, flags,
9986a369df7Sjdolecek pgs, &npages);
9996a369df7Sjdolecek if (error != 0) {
10006a369df7Sjdolecek /* can't do anything, failed to get the pages */
10016a369df7Sjdolecek break;
10026a369df7Sjdolecek }
10036a369df7Sjdolecek
10046a369df7Sjdolecek if (error == 0) {
10056a369df7Sjdolecek error = uvm_direct_process(pgs, npages, off, bytelen,
10066a369df7Sjdolecek ubc_uiomove_process, uio);
10076a369df7Sjdolecek }
10086a369df7Sjdolecek
1009704e6857Sad if (overwrite) {
1010704e6857Sad voff_t endoff;
1011704e6857Sad
1012704e6857Sad /*
1013704e6857Sad * if we haven't initialized the pages yet due to an
1014704e6857Sad * error above, do it now.
1015704e6857Sad */
1016704e6857Sad if (error != 0) {
1017704e6857Sad (void) uvm_direct_process(pgs, npages, off,
1018704e6857Sad bytelen, ubc_zerorange_process, NULL);
1019704e6857Sad }
10206a369df7Sjdolecek
10216a369df7Sjdolecek off += bytelen;
10226a369df7Sjdolecek todo -= bytelen;
1023704e6857Sad endoff = off & (PAGE_SIZE - 1);
1024704e6857Sad
1025704e6857Sad /*
1026704e6857Sad * zero out the remaining portion of the final page
1027704e6857Sad * (if any).
1028704e6857Sad */
1029704e6857Sad if (todo == 0 && endoff != 0) {
1030704e6857Sad vsize_t zlen = PAGE_SIZE - endoff;
1031704e6857Sad (void) uvm_direct_process(pgs + npages - 1, 1,
1032704e6857Sad off, zlen, ubc_zerorange_process, NULL);
1033704e6857Sad }
1034704e6857Sad } else {
1035704e6857Sad off += bytelen;
1036704e6857Sad todo -= bytelen;
1037704e6857Sad }
1038704e6857Sad
1039704e6857Sad ubc_direct_release(uobj, flags, pgs, npages);
10406a369df7Sjdolecek
10416a369df7Sjdolecek if (error != 0 && ISSET(flags, UBC_PARTIALOK)) {
10426a369df7Sjdolecek break;
10436a369df7Sjdolecek }
10446a369df7Sjdolecek }
10456a369df7Sjdolecek
10466a369df7Sjdolecek return error;
10476a369df7Sjdolecek }
10486a369df7Sjdolecek
10496a369df7Sjdolecek static void __noinline
ubc_zerorange_direct(struct uvm_object * uobj,off_t off,size_t todo,int flags)10506a369df7Sjdolecek ubc_zerorange_direct(struct uvm_object *uobj, off_t off, size_t todo, int flags)
10516a369df7Sjdolecek {
10526a369df7Sjdolecek int error, npages;
1053e4118f41Sjdolecek struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
10546a369df7Sjdolecek
105591c2b861Sjdolecek flags |= UBC_WRITE;
105691c2b861Sjdolecek
10576a369df7Sjdolecek error = 0;
10586a369df7Sjdolecek while (todo > 0) {
10596a369df7Sjdolecek vsize_t bytelen = todo;
10606a369df7Sjdolecek
10616a369df7Sjdolecek error = ubc_alloc_direct(uobj, off, &bytelen, UVM_ADV_NORMAL,
106291c2b861Sjdolecek flags, pgs, &npages);
10636a369df7Sjdolecek if (error != 0) {
10646a369df7Sjdolecek /* can't do anything, failed to get the pages */
10656a369df7Sjdolecek break;
10666a369df7Sjdolecek }
10676a369df7Sjdolecek
10686a369df7Sjdolecek error = uvm_direct_process(pgs, npages, off, bytelen,
10696a369df7Sjdolecek ubc_zerorange_process, NULL);
10706a369df7Sjdolecek
107191c2b861Sjdolecek ubc_direct_release(uobj, flags, pgs, npages);
10726a369df7Sjdolecek
10736a369df7Sjdolecek off += bytelen;
10746a369df7Sjdolecek todo -= bytelen;
10756a369df7Sjdolecek }
10766a369df7Sjdolecek }
10776a369df7Sjdolecek
10786a369df7Sjdolecek #endif /* UBC_USE_PMAP_DIRECT */
10796a369df7Sjdolecek
1080e225b7bdSrmind /*
1081e225b7bdSrmind * ubc_purge: disassociate ubc_map structures from an empty uvm_object.
1082e225b7bdSrmind */
1083e225b7bdSrmind
1084e225b7bdSrmind void
ubc_purge(struct uvm_object * uobj)1085e225b7bdSrmind ubc_purge(struct uvm_object *uobj)
1086e225b7bdSrmind {
1087e225b7bdSrmind struct ubc_map *umap;
1088e225b7bdSrmind vaddr_t va;
1089e225b7bdSrmind
1090e225b7bdSrmind KASSERT(uobj->uo_npages == 0);
1091e225b7bdSrmind
10927a15ad24Srmind /*
10937a15ad24Srmind * Safe to check without lock held, as ubc_alloc() removes
10947a15ad24Srmind * the mapping and list entry in the correct order.
10957a15ad24Srmind */
10967a15ad24Srmind if (__predict_true(LIST_EMPTY(&uobj->uo_ubc))) {
10977a15ad24Srmind return;
10987a15ad24Srmind }
1099d2a0ebb6Sad rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
1100e225b7bdSrmind while ((umap = LIST_FIRST(&uobj->uo_ubc)) != NULL) {
1101e225b7bdSrmind KASSERT(umap->refcount == 0);
1102e225b7bdSrmind for (va = 0; va < ubc_winsize; va += PAGE_SIZE) {
1103e225b7bdSrmind KASSERT(!pmap_extract(pmap_kernel(),
1104e225b7bdSrmind va + UBC_UMAP_ADDR(umap), NULL));
1105e225b7bdSrmind }
1106e225b7bdSrmind LIST_REMOVE(umap, list);
1107e225b7bdSrmind LIST_REMOVE(umap, hash);
1108e225b7bdSrmind umap->flags &= ~UMAP_MAPPING_CACHED;
1109e225b7bdSrmind umap->uobj = NULL;
1110e225b7bdSrmind }
1111d2a0ebb6Sad rw_exit(ubc_object.uobj.vmobjlock);
1112e225b7bdSrmind }
1113688f1f85Ssimonb
1114688f1f85Ssimonb static int
ubchash_stats(struct hashstat_sysctl * hs,bool fill)1115688f1f85Ssimonb ubchash_stats(struct hashstat_sysctl *hs, bool fill)
1116688f1f85Ssimonb {
1117688f1f85Ssimonb struct ubc_map *umap;
1118688f1f85Ssimonb uint64_t chain;
1119688f1f85Ssimonb
1120688f1f85Ssimonb strlcpy(hs->hash_name, "ubchash", sizeof(hs->hash_name));
1121688f1f85Ssimonb strlcpy(hs->hash_desc, "ubc object hash", sizeof(hs->hash_desc));
1122688f1f85Ssimonb if (!fill)
1123688f1f85Ssimonb return 0;
1124688f1f85Ssimonb
1125688f1f85Ssimonb hs->hash_size = ubc_object.hashmask + 1;
1126688f1f85Ssimonb
1127688f1f85Ssimonb for (size_t i = 0; i < hs->hash_size; i++) {
1128688f1f85Ssimonb chain = 0;
1129688f1f85Ssimonb rw_enter(ubc_object.uobj.vmobjlock, RW_READER);
1130688f1f85Ssimonb LIST_FOREACH(umap, &ubc_object.hash[i], hash) {
1131688f1f85Ssimonb chain++;
1132688f1f85Ssimonb }
1133688f1f85Ssimonb rw_exit(ubc_object.uobj.vmobjlock);
1134688f1f85Ssimonb if (chain > 0) {
1135688f1f85Ssimonb hs->hash_used++;
1136688f1f85Ssimonb hs->hash_items += chain;
1137688f1f85Ssimonb if (chain > hs->hash_maxchain)
1138688f1f85Ssimonb hs->hash_maxchain = chain;
1139688f1f85Ssimonb }
1140688f1f85Ssimonb preempt_point();
1141688f1f85Ssimonb }
1142688f1f85Ssimonb
1143688f1f85Ssimonb return 0;
1144688f1f85Ssimonb }
1145