145748Smckusick /*
263379Sbostic * Copyright (c) 1991, 1993
363379Sbostic * The Regents of the University of California. All rights reserved.
445748Smckusick *
547663Smckusick * This code is derived from software contributed to Berkeley by
647663Smckusick * The Mach Operating System project at Carnegie-Mellon University.
747663Smckusick *
847663Smckusick * %sccs.include.redist.c%
947663Smckusick *
10*69384Smckusick * @(#)vm_fault.c 8.6 (Berkeley) 05/11/95
1147663Smckusick *
1247663Smckusick *
1347663Smckusick * Copyright (c) 1987, 1990 Carnegie-Mellon University.
1447663Smckusick * All rights reserved.
1547663Smckusick *
1647592Smckusick * Authors: Avadis Tevanian, Jr., Michael Wayne Young
1747592Smckusick *
1847592Smckusick * Permission to use, copy, modify and distribute this software and
1947592Smckusick * its documentation is hereby granted, provided that both the copyright
2047592Smckusick * notice and this permission notice appear in all copies of the
2147592Smckusick * software, derivative works or modified versions, and any portions
2247592Smckusick * thereof, and that both notices appear in supporting documentation.
2347592Smckusick *
2447592Smckusick * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
2547592Smckusick * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
2647592Smckusick * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
2747592Smckusick *
2847663Smckusick * Carnegie Mellon requests users of this software to return to
2945748Smckusick *
3047663Smckusick * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
3147663Smckusick * School of Computer Science
3247663Smckusick * Carnegie Mellon University
3347663Smckusick * Pittsburgh PA 15213-3890
3447662Smckusick *
3547663Smckusick * any improvements or extensions that they make and grant Carnegie the
3647663Smckusick * rights to redistribute these changes.
3745748Smckusick */
3845748Smckusick
3945748Smckusick /*
4045748Smckusick * Page fault handling module.
4145748Smckusick */
4245748Smckusick
4353348Sbostic #include <sys/param.h>
4453348Sbostic #include <sys/systm.h>
4545748Smckusick
4653348Sbostic #include <vm/vm.h>
4753348Sbostic #include <vm/vm_page.h>
4853348Sbostic #include <vm/vm_pageout.h>
4948386Skarels
5045748Smckusick /*
5145748Smckusick * vm_fault:
5245748Smckusick *
5345748Smckusick * Handle a page fault occuring at the given address,
5445748Smckusick * requiring the given permissions, in the map specified.
5545748Smckusick * If successful, the page is inserted into the
5645748Smckusick * associated physical map.
5745748Smckusick *
5845748Smckusick * NOTE: the given address should be truncated to the
5945748Smckusick * proper page address.
6045748Smckusick *
6145748Smckusick * KERN_SUCCESS is returned if the page fault is handled; otherwise,
6245748Smckusick * a standard error specifying why the fault is fatal is returned.
6345748Smckusick *
6445748Smckusick *
6545748Smckusick * The map in question must be referenced, and remains so.
6645748Smckusick * Caller may hold no locks.
6745748Smckusick */
6853348Sbostic int
vm_fault(map,vaddr,fault_type,change_wiring)6945748Smckusick vm_fault(map, vaddr, fault_type, change_wiring)
7045748Smckusick vm_map_t map;
7145748Smckusick vm_offset_t vaddr;
7245748Smckusick vm_prot_t fault_type;
7345748Smckusick boolean_t change_wiring;
7445748Smckusick {
7545748Smckusick vm_object_t first_object;
7645748Smckusick vm_offset_t first_offset;
7745748Smckusick vm_map_entry_t entry;
7845748Smckusick register vm_object_t object;
7945748Smckusick register vm_offset_t offset;
8045748Smckusick register vm_page_t m;
8145748Smckusick vm_page_t first_m;
8245748Smckusick vm_prot_t prot;
8345748Smckusick int result;
8445748Smckusick boolean_t wired;
8545748Smckusick boolean_t su;
8645748Smckusick boolean_t lookup_still_valid;
8745748Smckusick boolean_t page_exists;
8845748Smckusick vm_page_t old_m;
8945748Smckusick vm_object_t next_object;
9045748Smckusick
9165684Shibler cnt.v_faults++; /* needs lock XXX */
9245748Smckusick /*
9345748Smckusick * Recovery actions
9445748Smckusick */
9545748Smckusick #define FREE_PAGE(m) { \
9645748Smckusick PAGE_WAKEUP(m); \
9745748Smckusick vm_page_lock_queues(); \
9845748Smckusick vm_page_free(m); \
9945748Smckusick vm_page_unlock_queues(); \
10045748Smckusick }
10145748Smckusick
10245748Smckusick #define RELEASE_PAGE(m) { \
10345748Smckusick PAGE_WAKEUP(m); \
10445748Smckusick vm_page_lock_queues(); \
10545748Smckusick vm_page_activate(m); \
10645748Smckusick vm_page_unlock_queues(); \
10745748Smckusick }
10845748Smckusick
10945748Smckusick #define UNLOCK_MAP { \
11045748Smckusick if (lookup_still_valid) { \
11145748Smckusick vm_map_lookup_done(map, entry); \
11245748Smckusick lookup_still_valid = FALSE; \
11345748Smckusick } \
11445748Smckusick }
11545748Smckusick
11645748Smckusick #define UNLOCK_THINGS { \
11745748Smckusick object->paging_in_progress--; \
11845748Smckusick vm_object_unlock(object); \
11945748Smckusick if (object != first_object) { \
12045748Smckusick vm_object_lock(first_object); \
12145748Smckusick FREE_PAGE(first_m); \
12245748Smckusick first_object->paging_in_progress--; \
12345748Smckusick vm_object_unlock(first_object); \
12445748Smckusick } \
12545748Smckusick UNLOCK_MAP; \
12645748Smckusick }
12745748Smckusick
12845748Smckusick #define UNLOCK_AND_DEALLOCATE { \
12945748Smckusick UNLOCK_THINGS; \
13045748Smckusick vm_object_deallocate(first_object); \
13145748Smckusick }
13245748Smckusick
13345748Smckusick RetryFault: ;
13445748Smckusick
13545748Smckusick /*
13645748Smckusick * Find the backing store object and offset into
13745748Smckusick * it to begin the search.
13845748Smckusick */
13945748Smckusick
14045748Smckusick if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
14145748Smckusick &first_object, &first_offset,
14245748Smckusick &prot, &wired, &su)) != KERN_SUCCESS) {
14345748Smckusick return(result);
14445748Smckusick }
14545748Smckusick lookup_still_valid = TRUE;
14645748Smckusick
14745748Smckusick if (wired)
14845748Smckusick fault_type = prot;
14945748Smckusick
15048386Skarels first_m = NULL;
15145748Smckusick
15245748Smckusick /*
15345748Smckusick * Make a reference to this object to
15445748Smckusick * prevent its disposal while we are messing with
15545748Smckusick * it. Once we have the reference, the map is free
15645748Smckusick * to be diddled. Since objects reference their
15745748Smckusick * shadows (and copies), they will stay around as well.
15845748Smckusick */
15945748Smckusick
16045748Smckusick vm_object_lock(first_object);
16145748Smckusick
16245748Smckusick first_object->ref_count++;
16345748Smckusick first_object->paging_in_progress++;
16445748Smckusick
16545748Smckusick /*
16645748Smckusick * INVARIANTS (through entire routine):
16745748Smckusick *
16845748Smckusick * 1) At all times, we must either have the object
16945748Smckusick * lock or a busy page in some object to prevent
17045748Smckusick * some other thread from trying to bring in
17145748Smckusick * the same page.
17245748Smckusick *
17345748Smckusick * Note that we cannot hold any locks during the
17445748Smckusick * pager access or when waiting for memory, so
17545748Smckusick * we use a busy page then.
17645748Smckusick *
17745748Smckusick * Note also that we aren't as concerned about
17845748Smckusick * more than one thead attempting to pager_data_unlock
17945748Smckusick * the same page at once, so we don't hold the page
18045748Smckusick * as busy then, but do record the highest unlock
18145748Smckusick * value so far. [Unlock requests may also be delivered
18245748Smckusick * out of order.]
18345748Smckusick *
18445748Smckusick * 2) Once we have a busy page, we must remove it from
18545748Smckusick * the pageout queues, so that the pageout daemon
18645748Smckusick * will not grab it away.
18745748Smckusick *
18845748Smckusick * 3) To prevent another thread from racing us down the
18945748Smckusick * shadow chain and entering a new page in the top
19045748Smckusick * object before we do, we must keep a busy page in
19145748Smckusick * the top object while following the shadow chain.
19245748Smckusick *
19345748Smckusick * 4) We must increment paging_in_progress on any object
19445748Smckusick * for which we have a busy page, to prevent
19545748Smckusick * vm_object_collapse from removing the busy page
19645748Smckusick * without our noticing.
19745748Smckusick */
19845748Smckusick
19945748Smckusick /*
20045748Smckusick * Search for the page at object/offset.
20145748Smckusick */
20245748Smckusick
20345748Smckusick object = first_object;
20445748Smckusick offset = first_offset;
20545748Smckusick
20645748Smckusick /*
20745748Smckusick * See whether this page is resident
20845748Smckusick */
20945748Smckusick
21045748Smckusick while (TRUE) {
21145748Smckusick m = vm_page_lookup(object, offset);
21248386Skarels if (m != NULL) {
21345748Smckusick /*
21445748Smckusick * If the page is being brought in,
21545748Smckusick * wait for it and then retry.
21645748Smckusick */
21756382Smckusick if (m->flags & PG_BUSY) {
21845748Smckusick #ifdef DOTHREADS
21945748Smckusick int wait_result;
22045748Smckusick
22145748Smckusick PAGE_ASSERT_WAIT(m, !change_wiring);
22245748Smckusick UNLOCK_THINGS;
22345748Smckusick thread_block();
22445748Smckusick wait_result = current_thread()->wait_result;
22545748Smckusick vm_object_deallocate(first_object);
22645748Smckusick if (wait_result != THREAD_AWAKENED)
22745748Smckusick return(KERN_SUCCESS);
22845748Smckusick goto RetryFault;
22945748Smckusick #else
23045748Smckusick PAGE_ASSERT_WAIT(m, !change_wiring);
23145748Smckusick UNLOCK_THINGS;
23265684Shibler cnt.v_intrans++;
23345748Smckusick thread_block();
23445748Smckusick vm_object_deallocate(first_object);
23545748Smckusick goto RetryFault;
23645748Smckusick #endif
23745748Smckusick }
23845748Smckusick
23945748Smckusick /*
24045748Smckusick * Remove the page from the pageout daemon's
24145748Smckusick * reach while we play with it.
24245748Smckusick */
24345748Smckusick
24445748Smckusick vm_page_lock_queues();
24556382Smckusick if (m->flags & PG_INACTIVE) {
24665231Smckusick TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
24756382Smckusick m->flags &= ~PG_INACTIVE;
24850911Smckusick cnt.v_inactive_count--;
24950911Smckusick cnt.v_reactivated++;
25045748Smckusick }
25145748Smckusick
25256382Smckusick if (m->flags & PG_ACTIVE) {
25365231Smckusick TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
25456382Smckusick m->flags &= ~PG_ACTIVE;
25550911Smckusick cnt.v_active_count--;
25645748Smckusick }
25745748Smckusick vm_page_unlock_queues();
25845748Smckusick
25945748Smckusick /*
26045748Smckusick * Mark page busy for other threads.
26145748Smckusick */
26256382Smckusick m->flags |= PG_BUSY;
26345748Smckusick break;
26445748Smckusick }
26545748Smckusick
26648386Skarels if (((object->pager != NULL) &&
26745748Smckusick (!change_wiring || wired))
26845748Smckusick || (object == first_object)) {
26945748Smckusick
27045748Smckusick /*
27145748Smckusick * Allocate a new page for this object/offset
27245748Smckusick * pair.
27345748Smckusick */
27445748Smckusick
27545748Smckusick m = vm_page_alloc(object, offset);
27645748Smckusick
27748386Skarels if (m == NULL) {
27845748Smckusick UNLOCK_AND_DEALLOCATE;
27945748Smckusick VM_WAIT;
28045748Smckusick goto RetryFault;
28145748Smckusick }
28245748Smckusick }
28345748Smckusick
28456323Shibler if (object->pager != NULL && (!change_wiring || wired)) {
28545748Smckusick int rv;
28645748Smckusick
28745748Smckusick /*
28845748Smckusick * Now that we have a busy page, we can
28945748Smckusick * release the object lock.
29045748Smckusick */
29145748Smckusick vm_object_unlock(object);
29245748Smckusick
29345748Smckusick /*
29445748Smckusick * Call the pager to retrieve the data, if any,
29545748Smckusick * after releasing the lock on the map.
29645748Smckusick */
29745748Smckusick UNLOCK_MAP;
29865684Shibler cnt.v_pageins++;
29956323Shibler rv = vm_pager_get(object->pager, m, TRUE);
30045748Smckusick
30156323Shibler /*
30256323Shibler * Reaquire the object lock to preserve our
30356323Shibler * invariant.
30456323Shibler */
30556323Shibler vm_object_lock(object);
30656323Shibler
30756323Shibler /*
30856323Shibler * Found the page.
30956323Shibler * Leave it busy while we play with it.
31056323Shibler */
31145748Smckusick if (rv == VM_PAGER_OK) {
31245748Smckusick /*
31345748Smckusick * Relookup in case pager changed page.
31445748Smckusick * Pager is responsible for disposition
31545748Smckusick * of old page if moved.
31645748Smckusick */
31745748Smckusick m = vm_page_lookup(object, offset);
31845748Smckusick
31965684Shibler cnt.v_pgpgin++;
32056382Smckusick m->flags &= ~PG_FAKE;
32156382Smckusick m->flags |= PG_CLEAN;
32245748Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m));
32345748Smckusick break;
32445748Smckusick }
32545748Smckusick
32645748Smckusick /*
32756323Shibler * IO error or page outside the range of the pager:
32856323Shibler * cleanup and return an error.
32945748Smckusick */
33056323Shibler if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
33145748Smckusick FREE_PAGE(m);
33245748Smckusick UNLOCK_AND_DEALLOCATE;
33345748Smckusick return(KERN_PROTECTION_FAILURE); /* XXX */
33445748Smckusick }
33556323Shibler /*
33656323Shibler * rv == VM_PAGER_FAIL:
33756323Shibler *
33856323Shibler * Page does not exist at this object/offset.
33956323Shibler * Free the bogus page (waking up anyone waiting
34056323Shibler * for it) and continue on to the next object.
34156323Shibler *
34256323Shibler * If this is the top-level object, we must
34356323Shibler * leave the busy page to prevent another
34456323Shibler * thread from rushing past us, and inserting
34556323Shibler * the page in that object at the same time
34656323Shibler * that we are.
34756323Shibler */
34845748Smckusick if (object != first_object) {
34945748Smckusick FREE_PAGE(m);
35053941Shibler /* note that `m' is not used after this */
35145748Smckusick }
35245748Smckusick }
35345748Smckusick
35445748Smckusick /*
35545748Smckusick * We get here if the object has no pager (or unwiring)
35645748Smckusick * or the pager doesn't have the page.
35745748Smckusick */
35845748Smckusick if (object == first_object)
35945748Smckusick first_m = m;
36045748Smckusick
36145748Smckusick /*
36245748Smckusick * Move on to the next object. Lock the next
36345748Smckusick * object before unlocking the current one.
36445748Smckusick */
36545748Smckusick
36645748Smckusick offset += object->shadow_offset;
36745748Smckusick next_object = object->shadow;
36848386Skarels if (next_object == NULL) {
36945748Smckusick /*
37045748Smckusick * If there's no object left, fill the page
37145748Smckusick * in the top object with zeros.
37245748Smckusick */
37345748Smckusick if (object != first_object) {
37445748Smckusick object->paging_in_progress--;
37545748Smckusick vm_object_unlock(object);
37645748Smckusick
37745748Smckusick object = first_object;
37845748Smckusick offset = first_offset;
37945748Smckusick m = first_m;
38045748Smckusick vm_object_lock(object);
38145748Smckusick }
38248386Skarels first_m = NULL;
38345748Smckusick
38445748Smckusick vm_page_zero_fill(m);
38550911Smckusick cnt.v_zfod++;
38656920Shibler m->flags &= ~PG_FAKE;
38745748Smckusick break;
38845748Smckusick }
38945748Smckusick else {
39045748Smckusick vm_object_lock(next_object);
39145748Smckusick if (object != first_object)
39245748Smckusick object->paging_in_progress--;
39345748Smckusick vm_object_unlock(object);
39445748Smckusick object = next_object;
39545748Smckusick object->paging_in_progress++;
39645748Smckusick }
39745748Smckusick }
39845748Smckusick
39956920Shibler if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY)
40056920Shibler panic("vm_fault: active, inactive or !busy after main loop");
40145748Smckusick
40245748Smckusick /*
40345748Smckusick * PAGE HAS BEEN FOUND.
40445748Smckusick * [Loop invariant still holds -- the object lock
40545748Smckusick * is held.]
40645748Smckusick */
40745748Smckusick
40845748Smckusick old_m = m; /* save page that would be copied */
40945748Smckusick
41045748Smckusick /*
41145748Smckusick * If the page is being written, but isn't
41245748Smckusick * already owned by the top-level object,
41345748Smckusick * we have to copy it into a new page owned
41445748Smckusick * by the top-level object.
41545748Smckusick */
41645748Smckusick
41745748Smckusick if (object != first_object) {
41845748Smckusick /*
41945748Smckusick * We only really need to copy if we
42045748Smckusick * want to write it.
42145748Smckusick */
42245748Smckusick
42345748Smckusick if (fault_type & VM_PROT_WRITE) {
42445748Smckusick
42545748Smckusick /*
42645748Smckusick * If we try to collapse first_object at this
42745748Smckusick * point, we may deadlock when we try to get
42845748Smckusick * the lock on an intermediate object (since we
42945748Smckusick * have the bottom object locked). We can't
43045748Smckusick * unlock the bottom object, because the page
43145748Smckusick * we found may move (by collapse) if we do.
43245748Smckusick *
43345748Smckusick * Instead, we first copy the page. Then, when
43445748Smckusick * we have no more use for the bottom object,
43545748Smckusick * we unlock it and try to collapse.
43645748Smckusick *
43745748Smckusick * Note that we copy the page even if we didn't
43845748Smckusick * need to... that's the breaks.
43945748Smckusick */
44045748Smckusick
44145748Smckusick /*
44245748Smckusick * We already have an empty page in
44345748Smckusick * first_object - use it.
44445748Smckusick */
44545748Smckusick
44645748Smckusick vm_page_copy(m, first_m);
44756920Shibler first_m->flags &= ~PG_FAKE;
44845748Smckusick
44945748Smckusick /*
45045748Smckusick * If another map is truly sharing this
45145748Smckusick * page with us, we have to flush all
45245748Smckusick * uses of the original page, since we
45345748Smckusick * can't distinguish those which want the
45445748Smckusick * original from those which need the
45545748Smckusick * new copy.
45649288Shibler *
45749288Shibler * XXX If we know that only one map has
45849288Shibler * access to this page, then we could
45949288Shibler * avoid the pmap_page_protect() call.
46045748Smckusick */
46145748Smckusick
46245748Smckusick vm_page_lock_queues();
46352904Smckusick vm_page_activate(m);
46464692Shibler vm_page_deactivate(m);
465*69384Smckusick vm_page_unlock_queues();
466*69384Smckusick /*
467*69384Smckusick * XXX gag! The page protect has been moved out
468*69384Smckusick * of the page queue lock section to avoid a deadlock
469*69384Smckusick * in the hp300-style (recursive) pmap module.
470*69384Smckusick * If you were on an MP, p_p_protect might result
471*69384Smckusick * in a vm_map_pageable(..., TRUE) for the associated
472*69384Smckusick * page table page. This would call vm_fault_unwire
473*69384Smckusick * which would try to lock the page queues.
474*69384Smckusick * Moving the call out is safe here because the
475*69384Smckusick * object is still locked and that will prevent
476*69384Smckusick * the pageout daemon from messing with this page
477*69384Smckusick * on the inactive list. (It would move it back to
478*69384Smckusick * the active list if it were referenced but
479*69384Smckusick * v_p_deallocate clears the ref bit).
480*69384Smckusick */
48149288Shibler pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
48245748Smckusick
48345748Smckusick /*
48445748Smckusick * We no longer need the old page or object.
48545748Smckusick */
48645748Smckusick PAGE_WAKEUP(m);
48745748Smckusick object->paging_in_progress--;
48845748Smckusick vm_object_unlock(object);
48945748Smckusick
49045748Smckusick /*
49145748Smckusick * Only use the new page below...
49245748Smckusick */
49345748Smckusick
49450911Smckusick cnt.v_cow_faults++;
49545748Smckusick m = first_m;
49645748Smckusick object = first_object;
49745748Smckusick offset = first_offset;
49845748Smckusick
49945748Smckusick /*
50045748Smckusick * Now that we've gotten the copy out of the
50145748Smckusick * way, let's try to collapse the top object.
50245748Smckusick */
50345748Smckusick vm_object_lock(object);
50445748Smckusick /*
50545748Smckusick * But we have to play ugly games with
50645748Smckusick * paging_in_progress to do that...
50745748Smckusick */
50845748Smckusick object->paging_in_progress--;
50945748Smckusick vm_object_collapse(object);
51045748Smckusick object->paging_in_progress++;
51145748Smckusick }
51245748Smckusick else {
51365684Shibler prot &= ~VM_PROT_WRITE;
51456382Smckusick m->flags |= PG_COPYONWRITE;
51545748Smckusick }
51645748Smckusick }
51745748Smckusick
51856920Shibler if (m->flags & (PG_ACTIVE|PG_INACTIVE))
51945748Smckusick panic("vm_fault: active or inactive before copy object handling");
52045748Smckusick
52145748Smckusick /*
52245748Smckusick * If the page is being written, but hasn't been
52345748Smckusick * copied to the copy-object, we have to copy it there.
52445748Smckusick */
52545748Smckusick RetryCopy:
52648386Skarels if (first_object->copy != NULL) {
52748386Skarels vm_object_t copy_object = first_object->copy;
52848386Skarels vm_offset_t copy_offset;
52948386Skarels vm_page_t copy_m;
53045748Smckusick
53145748Smckusick /*
53245748Smckusick * We only need to copy if we want to write it.
53345748Smckusick */
53445748Smckusick if ((fault_type & VM_PROT_WRITE) == 0) {
53545748Smckusick prot &= ~VM_PROT_WRITE;
53656382Smckusick m->flags |= PG_COPYONWRITE;
53745748Smckusick }
53845748Smckusick else {
53945748Smckusick /*
54045748Smckusick * Try to get the lock on the copy_object.
54145748Smckusick */
54245748Smckusick if (!vm_object_lock_try(copy_object)) {
54345748Smckusick vm_object_unlock(object);
54445748Smckusick /* should spin a bit here... */
54545748Smckusick vm_object_lock(object);
54645748Smckusick goto RetryCopy;
54745748Smckusick }
54845748Smckusick
54945748Smckusick /*
55045748Smckusick * Make another reference to the copy-object,
55145748Smckusick * to keep it from disappearing during the
55245748Smckusick * copy.
55345748Smckusick */
55445748Smckusick copy_object->ref_count++;
55545748Smckusick
55645748Smckusick /*
55745748Smckusick * Does the page exist in the copy?
55845748Smckusick */
55945748Smckusick copy_offset = first_offset
56045748Smckusick - copy_object->shadow_offset;
56145748Smckusick copy_m = vm_page_lookup(copy_object, copy_offset);
56248386Skarels if (page_exists = (copy_m != NULL)) {
56356382Smckusick if (copy_m->flags & PG_BUSY) {
56445748Smckusick #ifdef DOTHREADS
56545748Smckusick int wait_result;
56645748Smckusick
56745748Smckusick /*
56845748Smckusick * If the page is being brought
56945748Smckusick * in, wait for it and then retry.
57045748Smckusick */
57145748Smckusick PAGE_ASSERT_WAIT(copy_m, !change_wiring);
57245748Smckusick RELEASE_PAGE(m);
57345748Smckusick copy_object->ref_count--;
57445748Smckusick vm_object_unlock(copy_object);
57545748Smckusick UNLOCK_THINGS;
57645748Smckusick thread_block();
57745748Smckusick wait_result = current_thread()->wait_result;
57845748Smckusick vm_object_deallocate(first_object);
57945748Smckusick if (wait_result != THREAD_AWAKENED)
58045748Smckusick return(KERN_SUCCESS);
58145748Smckusick goto RetryFault;
58245748Smckusick #else
58345748Smckusick /*
58445748Smckusick * If the page is being brought
58545748Smckusick * in, wait for it and then retry.
58645748Smckusick */
58745748Smckusick PAGE_ASSERT_WAIT(copy_m, !change_wiring);
58845748Smckusick RELEASE_PAGE(m);
58945748Smckusick copy_object->ref_count--;
59045748Smckusick vm_object_unlock(copy_object);
59145748Smckusick UNLOCK_THINGS;
59245748Smckusick thread_block();
59345748Smckusick vm_object_deallocate(first_object);
59445748Smckusick goto RetryFault;
59545748Smckusick #endif
59645748Smckusick }
59745748Smckusick }
59845748Smckusick
59945748Smckusick /*
60045748Smckusick * If the page is not in memory (in the object)
60145748Smckusick * and the object has a pager, we have to check
60245748Smckusick * if the pager has the data in secondary
60345748Smckusick * storage.
60445748Smckusick */
60545748Smckusick if (!page_exists) {
60645748Smckusick
60745748Smckusick /*
60845748Smckusick * If we don't allocate a (blank) page
60945748Smckusick * here... another thread could try
61045748Smckusick * to page it in, allocate a page, and
61145748Smckusick * then block on the busy page in its
61245748Smckusick * shadow (first_object). Then we'd
61345748Smckusick * trip over the busy page after we
61445748Smckusick * found that the copy_object's pager
61545748Smckusick * doesn't have the page...
61645748Smckusick */
61745748Smckusick copy_m = vm_page_alloc(copy_object,
61845748Smckusick copy_offset);
61948386Skarels if (copy_m == NULL) {
62045748Smckusick /*
62145748Smckusick * Wait for a page, then retry.
62245748Smckusick */
62345748Smckusick RELEASE_PAGE(m);
62445748Smckusick copy_object->ref_count--;
62545748Smckusick vm_object_unlock(copy_object);
62645748Smckusick UNLOCK_AND_DEALLOCATE;
62745748Smckusick VM_WAIT;
62845748Smckusick goto RetryFault;
62945748Smckusick }
63045748Smckusick
63148386Skarels if (copy_object->pager != NULL) {
63245748Smckusick vm_object_unlock(object);
63345748Smckusick vm_object_unlock(copy_object);
63445748Smckusick UNLOCK_MAP;
63545748Smckusick
63645748Smckusick page_exists = vm_pager_has_page(
63745748Smckusick copy_object->pager,
63845748Smckusick (copy_offset + copy_object->paging_offset));
63945748Smckusick
64045748Smckusick vm_object_lock(copy_object);
64145748Smckusick
64245748Smckusick /*
64345748Smckusick * Since the map is unlocked, someone
64445748Smckusick * else could have copied this object
64545748Smckusick * and put a different copy_object
64645748Smckusick * between the two. Or, the last
64745748Smckusick * reference to the copy-object (other
64845748Smckusick * than the one we have) may have
64945748Smckusick * disappeared - if that has happened,
65045748Smckusick * we don't need to make the copy.
65145748Smckusick */
65245748Smckusick if (copy_object->shadow != object ||
65345748Smckusick copy_object->ref_count == 1) {
65445748Smckusick /*
65545748Smckusick * Gaah... start over!
65645748Smckusick */
65745748Smckusick FREE_PAGE(copy_m);
65845748Smckusick vm_object_unlock(copy_object);
65945748Smckusick vm_object_deallocate(copy_object);
66045748Smckusick /* may block */
66145748Smckusick vm_object_lock(object);
66245748Smckusick goto RetryCopy;
66345748Smckusick }
66445748Smckusick vm_object_lock(object);
66545748Smckusick
66645748Smckusick if (page_exists) {
66745748Smckusick /*
66845748Smckusick * We didn't need the page
66945748Smckusick */
67045748Smckusick FREE_PAGE(copy_m);
67145748Smckusick }
67245748Smckusick }
67345748Smckusick }
67445748Smckusick if (!page_exists) {
67545748Smckusick /*
67645748Smckusick * Must copy page into copy-object.
67745748Smckusick */
67845748Smckusick vm_page_copy(m, copy_m);
67956920Shibler copy_m->flags &= ~PG_FAKE;
68045748Smckusick
68145748Smckusick /*
68245748Smckusick * Things to remember:
68345748Smckusick * 1. The copied page must be marked 'dirty'
68445748Smckusick * so it will be paged out to the copy
68545748Smckusick * object.
68645748Smckusick * 2. If the old page was in use by any users
68745748Smckusick * of the copy-object, it must be removed
68845748Smckusick * from all pmaps. (We can't know which
68945748Smckusick * pmaps use it.)
69045748Smckusick */
69145748Smckusick vm_page_lock_queues();
69249288Shibler pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
69349288Shibler VM_PROT_NONE);
69456382Smckusick copy_m->flags &= ~PG_CLEAN;
69545748Smckusick vm_page_activate(copy_m); /* XXX */
69645748Smckusick vm_page_unlock_queues();
69745748Smckusick
69845748Smckusick PAGE_WAKEUP(copy_m);
69945748Smckusick }
70045748Smckusick /*
70145748Smckusick * The reference count on copy_object must be
70245748Smckusick * at least 2: one for our extra reference,
70345748Smckusick * and at least one from the outside world
70445748Smckusick * (we checked that when we last locked
70545748Smckusick * copy_object).
70645748Smckusick */
70745748Smckusick copy_object->ref_count--;
70845748Smckusick vm_object_unlock(copy_object);
70956382Smckusick m->flags &= ~PG_COPYONWRITE;
71045748Smckusick }
71145748Smckusick }
71245748Smckusick
71356382Smckusick if (m->flags & (PG_ACTIVE | PG_INACTIVE))
71445748Smckusick panic("vm_fault: active or inactive before retrying lookup");
71545748Smckusick
71645748Smckusick /*
71745748Smckusick * We must verify that the maps have not changed
71845748Smckusick * since our last lookup.
71945748Smckusick */
72045748Smckusick
72145748Smckusick if (!lookup_still_valid) {
72245748Smckusick vm_object_t retry_object;
72345748Smckusick vm_offset_t retry_offset;
72445748Smckusick vm_prot_t retry_prot;
72545748Smckusick
72645748Smckusick /*
72745748Smckusick * Since map entries may be pageable, make sure we can
72845748Smckusick * take a page fault on them.
72945748Smckusick */
73045748Smckusick vm_object_unlock(object);
73145748Smckusick
73245748Smckusick /*
73345748Smckusick * To avoid trying to write_lock the map while another
73445748Smckusick * thread has it read_locked (in vm_map_pageable), we
73545748Smckusick * do not try for write permission. If the page is
73645748Smckusick * still writable, we will get write permission. If it
73745748Smckusick * is not, or has been marked needs_copy, we enter the
73845748Smckusick * mapping without write permission, and will merely
73945748Smckusick * take another fault.
74045748Smckusick */
74145748Smckusick result = vm_map_lookup(&map, vaddr,
74245748Smckusick fault_type & ~VM_PROT_WRITE, &entry,
74345748Smckusick &retry_object, &retry_offset, &retry_prot,
74445748Smckusick &wired, &su);
74545748Smckusick
74645748Smckusick vm_object_lock(object);
74745748Smckusick
74845748Smckusick /*
74945748Smckusick * If we don't need the page any longer, put it on the
75045748Smckusick * active list (the easiest thing to do here). If no
75145748Smckusick * one needs it, pageout will grab it eventually.
75245748Smckusick */
75345748Smckusick
75445748Smckusick if (result != KERN_SUCCESS) {
75545748Smckusick RELEASE_PAGE(m);
75645748Smckusick UNLOCK_AND_DEALLOCATE;
75745748Smckusick return(result);
75845748Smckusick }
75945748Smckusick
76045748Smckusick lookup_still_valid = TRUE;
76145748Smckusick
76245748Smckusick if ((retry_object != first_object) ||
76345748Smckusick (retry_offset != first_offset)) {
76445748Smckusick RELEASE_PAGE(m);
76545748Smckusick UNLOCK_AND_DEALLOCATE;
76645748Smckusick goto RetryFault;
76745748Smckusick }
76845748Smckusick
76945748Smckusick /*
77045748Smckusick * Check whether the protection has changed or the object
77145748Smckusick * has been copied while we left the map unlocked.
77245748Smckusick * Changing from read to write permission is OK - we leave
77345748Smckusick * the page write-protected, and catch the write fault.
77445748Smckusick * Changing from write to read permission means that we
77545748Smckusick * can't mark the page write-enabled after all.
77645748Smckusick */
77745748Smckusick prot &= retry_prot;
77856382Smckusick if (m->flags & PG_COPYONWRITE)
77945748Smckusick prot &= ~VM_PROT_WRITE;
78045748Smckusick }
78145748Smckusick
78245748Smckusick /*
78345748Smckusick * (the various bits we're fiddling with here are locked by
78445748Smckusick * the object's lock)
78545748Smckusick */
78645748Smckusick
78745748Smckusick /* XXX This distorts the meaning of the copy_on_write bit */
78845748Smckusick
78945748Smckusick if (prot & VM_PROT_WRITE)
79056382Smckusick m->flags &= ~PG_COPYONWRITE;
79145748Smckusick
79245748Smckusick /*
79345748Smckusick * It's critically important that a wired-down page be faulted
79445748Smckusick * only once in each map for which it is wired.
79545748Smckusick */
79645748Smckusick
79756382Smckusick if (m->flags & (PG_ACTIVE | PG_INACTIVE))
79845748Smckusick panic("vm_fault: active or inactive before pmap_enter");
79945748Smckusick
80045748Smckusick vm_object_unlock(object);
80145748Smckusick
80245748Smckusick /*
80345748Smckusick * Put this page into the physical map.
80445748Smckusick * We had to do the unlock above because pmap_enter
80545748Smckusick * may cause other faults. We don't put the
80645748Smckusick * page back on the active queue until later so
80745748Smckusick * that the page-out daemon won't find us (yet).
80845748Smckusick */
80945748Smckusick
81056920Shibler pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
81145748Smckusick
81245748Smckusick /*
81345748Smckusick * If the page is not wired down, then put it where the
81445748Smckusick * pageout daemon can find it.
81545748Smckusick */
81645748Smckusick vm_object_lock(object);
81745748Smckusick vm_page_lock_queues();
81845748Smckusick if (change_wiring) {
81945748Smckusick if (wired)
82045748Smckusick vm_page_wire(m);
82145748Smckusick else
82245748Smckusick vm_page_unwire(m);
82345748Smckusick }
82445748Smckusick else
82545748Smckusick vm_page_activate(m);
82645748Smckusick vm_page_unlock_queues();
82745748Smckusick
82845748Smckusick /*
82945748Smckusick * Unlock everything, and return
83045748Smckusick */
83145748Smckusick
83245748Smckusick PAGE_WAKEUP(m);
83345748Smckusick UNLOCK_AND_DEALLOCATE;
83445748Smckusick
83545748Smckusick return(KERN_SUCCESS);
83645748Smckusick
83745748Smckusick }
83845748Smckusick
83945748Smckusick /*
84045748Smckusick * vm_fault_wire:
84145748Smckusick *
84245748Smckusick * Wire down a range of virtual addresses in a map.
84345748Smckusick */
84458597Shibler int
vm_fault_wire(map,start,end)84558597Shibler vm_fault_wire(map, start, end)
84645748Smckusick vm_map_t map;
84745748Smckusick vm_offset_t start, end;
84845748Smckusick {
84945748Smckusick register vm_offset_t va;
85045748Smckusick register pmap_t pmap;
85158597Shibler int rv;
85245748Smckusick
85345748Smckusick pmap = vm_map_pmap(map);
85445748Smckusick
85545748Smckusick /*
85645748Smckusick * Inform the physical mapping system that the
85745748Smckusick * range of addresses may not fault, so that
85845748Smckusick * page tables and such can be locked down as well.
85945748Smckusick */
86045748Smckusick
86145748Smckusick pmap_pageable(pmap, start, end, FALSE);
86245748Smckusick
86345748Smckusick /*
86445748Smckusick * We simulate a fault to get the page and enter it
86545748Smckusick * in the physical map.
86645748Smckusick */
86745748Smckusick
86845748Smckusick for (va = start; va < end; va += PAGE_SIZE) {
86958597Shibler rv = vm_fault(map, va, VM_PROT_NONE, TRUE);
87058597Shibler if (rv) {
87158597Shibler if (va != start)
87258597Shibler vm_fault_unwire(map, start, va);
87358597Shibler return(rv);
87458597Shibler }
87545748Smckusick }
87658597Shibler return(KERN_SUCCESS);
87745748Smckusick }
87845748Smckusick
87945748Smckusick
88045748Smckusick /*
88145748Smckusick * vm_fault_unwire:
88245748Smckusick *
88345748Smckusick * Unwire a range of virtual addresses in a map.
88445748Smckusick */
88568164Scgd void
vm_fault_unwire(map,start,end)88668164Scgd vm_fault_unwire(map, start, end)
88745748Smckusick vm_map_t map;
88845748Smckusick vm_offset_t start, end;
88945748Smckusick {
89045748Smckusick
89145748Smckusick register vm_offset_t va, pa;
89245748Smckusick register pmap_t pmap;
89345748Smckusick
89445748Smckusick pmap = vm_map_pmap(map);
89545748Smckusick
89645748Smckusick /*
89745748Smckusick * Since the pages are wired down, we must be able to
89845748Smckusick * get their mappings from the physical map system.
89945748Smckusick */
90045748Smckusick
90145748Smckusick vm_page_lock_queues();
90245748Smckusick
90345748Smckusick for (va = start; va < end; va += PAGE_SIZE) {
90445748Smckusick pa = pmap_extract(pmap, va);
90545748Smckusick if (pa == (vm_offset_t) 0) {
90645748Smckusick panic("unwire: page not in pmap");
90745748Smckusick }
90845748Smckusick pmap_change_wiring(pmap, va, FALSE);
90945748Smckusick vm_page_unwire(PHYS_TO_VM_PAGE(pa));
91045748Smckusick }
91145748Smckusick vm_page_unlock_queues();
91245748Smckusick
91345748Smckusick /*
91445748Smckusick * Inform the physical mapping system that the range
91545748Smckusick * of addresses may fault, so that page tables and
91645748Smckusick * such may be unwired themselves.
91745748Smckusick */
91845748Smckusick
91945748Smckusick pmap_pageable(pmap, start, end, TRUE);
92045748Smckusick
92145748Smckusick }
92245748Smckusick
92345748Smckusick /*
92445748Smckusick * Routine:
92545748Smckusick * vm_fault_copy_entry
92645748Smckusick * Function:
92745748Smckusick * Copy all of the pages from a wired-down map entry to another.
92845748Smckusick *
92945748Smckusick * In/out conditions:
93045748Smckusick * The source and destination maps must be locked for write.
93145748Smckusick * The source map entry must be wired down (or be a sharing map
93245748Smckusick * entry corresponding to a main map entry that is wired down).
93345748Smckusick */
93445748Smckusick
93568164Scgd void
vm_fault_copy_entry(dst_map,src_map,dst_entry,src_entry)93668164Scgd vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
93745748Smckusick vm_map_t dst_map;
93845748Smckusick vm_map_t src_map;
93945748Smckusick vm_map_entry_t dst_entry;
94045748Smckusick vm_map_entry_t src_entry;
94145748Smckusick {
94245748Smckusick
94345748Smckusick vm_object_t dst_object;
94445748Smckusick vm_object_t src_object;
94545748Smckusick vm_offset_t dst_offset;
94645748Smckusick vm_offset_t src_offset;
94745748Smckusick vm_prot_t prot;
94845748Smckusick vm_offset_t vaddr;
94945748Smckusick vm_page_t dst_m;
95045748Smckusick vm_page_t src_m;
95145748Smckusick
95245748Smckusick #ifdef lint
95345748Smckusick src_map++;
95460345Storek #endif
95545748Smckusick
95645748Smckusick src_object = src_entry->object.vm_object;
95745748Smckusick src_offset = src_entry->offset;
95845748Smckusick
95945748Smckusick /*
96045748Smckusick * Create the top-level object for the destination entry.
96145748Smckusick * (Doesn't actually shadow anything - we copy the pages
96245748Smckusick * directly.)
96345748Smckusick */
96445748Smckusick dst_object = vm_object_allocate(
96545748Smckusick (vm_size_t) (dst_entry->end - dst_entry->start));
96645748Smckusick
96745748Smckusick dst_entry->object.vm_object = dst_object;
96845748Smckusick dst_entry->offset = 0;
96945748Smckusick
97045748Smckusick prot = dst_entry->max_protection;
97145748Smckusick
97245748Smckusick /*
97345748Smckusick * Loop through all of the pages in the entry's range, copying
97445748Smckusick * each one from the source object (it should be there) to the
97545748Smckusick * destination object.
97645748Smckusick */
97745748Smckusick for (vaddr = dst_entry->start, dst_offset = 0;
97845748Smckusick vaddr < dst_entry->end;
97945748Smckusick vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
98045748Smckusick
98145748Smckusick /*
98245748Smckusick * Allocate a page in the destination object
98345748Smckusick */
98445748Smckusick vm_object_lock(dst_object);
98545748Smckusick do {
98645748Smckusick dst_m = vm_page_alloc(dst_object, dst_offset);
98748386Skarels if (dst_m == NULL) {
98845748Smckusick vm_object_unlock(dst_object);
98945748Smckusick VM_WAIT;
99045748Smckusick vm_object_lock(dst_object);
99145748Smckusick }
99248386Skarels } while (dst_m == NULL);
99345748Smckusick
99445748Smckusick /*
99545748Smckusick * Find the page in the source object, and copy it in.
99645748Smckusick * (Because the source is wired down, the page will be
99745748Smckusick * in memory.)
99845748Smckusick */
99945748Smckusick vm_object_lock(src_object);
100045748Smckusick src_m = vm_page_lookup(src_object, dst_offset + src_offset);
100148386Skarels if (src_m == NULL)
100245748Smckusick panic("vm_fault_copy_wired: page missing");
100345748Smckusick
100445748Smckusick vm_page_copy(src_m, dst_m);
100545748Smckusick
100645748Smckusick /*
100745748Smckusick * Enter it in the pmap...
100845748Smckusick */
100945748Smckusick vm_object_unlock(src_object);
101045748Smckusick vm_object_unlock(dst_object);
101145748Smckusick
101245748Smckusick pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
101345748Smckusick prot, FALSE);
101445748Smckusick
101545748Smckusick /*
101645748Smckusick * Mark it no longer busy, and put it on the active list.
101745748Smckusick */
101845748Smckusick vm_object_lock(dst_object);
101945748Smckusick vm_page_lock_queues();
102045748Smckusick vm_page_activate(dst_m);
102145748Smckusick vm_page_unlock_queues();
102245748Smckusick PAGE_WAKEUP(dst_m);
102345748Smckusick vm_object_unlock(dst_object);
102445748Smckusick }
102545748Smckusick
102645748Smckusick }
1027