xref: /csrg-svn/sys/vm/vm_fault.c (revision 65231)
145748Smckusick /*
263379Sbostic  * Copyright (c) 1991, 1993
363379Sbostic  *	The Regents of the University of California.  All rights reserved.
445748Smckusick  *
547663Smckusick  * This code is derived from software contributed to Berkeley by
647663Smckusick  * The Mach Operating System project at Carnegie-Mellon University.
747663Smckusick  *
847663Smckusick  * %sccs.include.redist.c%
947663Smckusick  *
10*65231Smckusick  *	@(#)vm_fault.c	8.3 (Berkeley) 12/30/93
1147663Smckusick  *
1247663Smckusick  *
1347663Smckusick  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
1447663Smckusick  * All rights reserved.
1547663Smckusick  *
1647592Smckusick  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
1747592Smckusick  *
1847592Smckusick  * Permission to use, copy, modify and distribute this software and
1947592Smckusick  * its documentation is hereby granted, provided that both the copyright
2047592Smckusick  * notice and this permission notice appear in all copies of the
2147592Smckusick  * software, derivative works or modified versions, and any portions
2247592Smckusick  * thereof, and that both notices appear in supporting documentation.
2347592Smckusick  *
2447592Smckusick  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
2547592Smckusick  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
2647592Smckusick  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
2747592Smckusick  *
2847663Smckusick  * Carnegie Mellon requests users of this software to return to
2945748Smckusick  *
3047663Smckusick  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
3147663Smckusick  *  School of Computer Science
3247663Smckusick  *  Carnegie Mellon University
3347663Smckusick  *  Pittsburgh PA 15213-3890
3447662Smckusick  *
3547663Smckusick  * any improvements or extensions that they make and grant Carnegie the
3647663Smckusick  * rights to redistribute these changes.
3745748Smckusick  */
3845748Smckusick 
3945748Smckusick /*
4045748Smckusick  *	Page fault handling module.
4145748Smckusick  */
4245748Smckusick 
4353348Sbostic #include <sys/param.h>
4453348Sbostic #include <sys/systm.h>
4545748Smckusick 
4653348Sbostic #include <vm/vm.h>
4753348Sbostic #include <vm/vm_page.h>
4853348Sbostic #include <vm/vm_pageout.h>
4948386Skarels 
5045748Smckusick /*
5145748Smckusick  *	vm_fault:
5245748Smckusick  *
5345748Smckusick  *	Handle a page fault occuring at the given address,
5445748Smckusick  *	requiring the given permissions, in the map specified.
5545748Smckusick  *	If successful, the page is inserted into the
5645748Smckusick  *	associated physical map.
5745748Smckusick  *
5845748Smckusick  *	NOTE: the given address should be truncated to the
5945748Smckusick  *	proper page address.
6045748Smckusick  *
6145748Smckusick  *	KERN_SUCCESS is returned if the page fault is handled; otherwise,
6245748Smckusick  *	a standard error specifying why the fault is fatal is returned.
6345748Smckusick  *
6445748Smckusick  *
6545748Smckusick  *	The map in question must be referenced, and remains so.
6645748Smckusick  *	Caller may hold no locks.
6745748Smckusick  */
6853348Sbostic int
6945748Smckusick vm_fault(map, vaddr, fault_type, change_wiring)
7045748Smckusick 	vm_map_t	map;
7145748Smckusick 	vm_offset_t	vaddr;
7245748Smckusick 	vm_prot_t	fault_type;
7345748Smckusick 	boolean_t	change_wiring;
7445748Smckusick {
7545748Smckusick 	vm_object_t		first_object;
7645748Smckusick 	vm_offset_t		first_offset;
7745748Smckusick 	vm_map_entry_t		entry;
7845748Smckusick 	register vm_object_t	object;
7945748Smckusick 	register vm_offset_t	offset;
8045748Smckusick 	register vm_page_t	m;
8145748Smckusick 	vm_page_t		first_m;
8245748Smckusick 	vm_prot_t		prot;
8345748Smckusick 	int			result;
8445748Smckusick 	boolean_t		wired;
8545748Smckusick 	boolean_t		su;
8645748Smckusick 	boolean_t		lookup_still_valid;
8745748Smckusick 	boolean_t		page_exists;
8845748Smckusick 	vm_page_t		old_m;
8945748Smckusick 	vm_object_t		next_object;
9045748Smckusick 
9150911Smckusick 	cnt.v_vm_faults++;		/* needs lock XXX */
9245748Smckusick /*
9345748Smckusick  *	Recovery actions
9445748Smckusick  */
9545748Smckusick #define	FREE_PAGE(m)	{				\
9645748Smckusick 	PAGE_WAKEUP(m);					\
9745748Smckusick 	vm_page_lock_queues();				\
9845748Smckusick 	vm_page_free(m);				\
9945748Smckusick 	vm_page_unlock_queues();			\
10045748Smckusick }
10145748Smckusick 
10245748Smckusick #define	RELEASE_PAGE(m)	{				\
10345748Smckusick 	PAGE_WAKEUP(m);					\
10445748Smckusick 	vm_page_lock_queues();				\
10545748Smckusick 	vm_page_activate(m);				\
10645748Smckusick 	vm_page_unlock_queues();			\
10745748Smckusick }
10845748Smckusick 
10945748Smckusick #define	UNLOCK_MAP	{				\
11045748Smckusick 	if (lookup_still_valid) {			\
11145748Smckusick 		vm_map_lookup_done(map, entry);		\
11245748Smckusick 		lookup_still_valid = FALSE;		\
11345748Smckusick 	}						\
11445748Smckusick }
11545748Smckusick 
11645748Smckusick #define	UNLOCK_THINGS	{				\
11745748Smckusick 	object->paging_in_progress--;			\
11845748Smckusick 	vm_object_unlock(object);			\
11945748Smckusick 	if (object != first_object) {			\
12045748Smckusick 		vm_object_lock(first_object);		\
12145748Smckusick 		FREE_PAGE(first_m);			\
12245748Smckusick 		first_object->paging_in_progress--;	\
12345748Smckusick 		vm_object_unlock(first_object);		\
12445748Smckusick 	}						\
12545748Smckusick 	UNLOCK_MAP;					\
12645748Smckusick }
12745748Smckusick 
12845748Smckusick #define	UNLOCK_AND_DEALLOCATE	{			\
12945748Smckusick 	UNLOCK_THINGS;					\
13045748Smckusick 	vm_object_deallocate(first_object);		\
13145748Smckusick }
13245748Smckusick 
13345748Smckusick     RetryFault: ;
13445748Smckusick 
13545748Smckusick 	/*
13645748Smckusick 	 *	Find the backing store object and offset into
13745748Smckusick 	 *	it to begin the search.
13845748Smckusick 	 */
13945748Smckusick 
14045748Smckusick 	if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
14145748Smckusick 			&first_object, &first_offset,
14245748Smckusick 			&prot, &wired, &su)) != KERN_SUCCESS) {
14345748Smckusick 		return(result);
14445748Smckusick 	}
14545748Smckusick 	lookup_still_valid = TRUE;
14645748Smckusick 
14745748Smckusick 	if (wired)
14845748Smckusick 		fault_type = prot;
14945748Smckusick 
15048386Skarels 	first_m = NULL;
15145748Smckusick 
15245748Smckusick    	/*
15345748Smckusick 	 *	Make a reference to this object to
15445748Smckusick 	 *	prevent its disposal while we are messing with
15545748Smckusick 	 *	it.  Once we have the reference, the map is free
15645748Smckusick 	 *	to be diddled.  Since objects reference their
15745748Smckusick 	 *	shadows (and copies), they will stay around as well.
15845748Smckusick 	 */
15945748Smckusick 
16045748Smckusick 	vm_object_lock(first_object);
16145748Smckusick 
16245748Smckusick 	first_object->ref_count++;
16345748Smckusick 	first_object->paging_in_progress++;
16445748Smckusick 
16545748Smckusick 	/*
16645748Smckusick 	 *	INVARIANTS (through entire routine):
16745748Smckusick 	 *
16845748Smckusick 	 *	1)	At all times, we must either have the object
16945748Smckusick 	 *		lock or a busy page in some object to prevent
17045748Smckusick 	 *		some other thread from trying to bring in
17145748Smckusick 	 *		the same page.
17245748Smckusick 	 *
17345748Smckusick 	 *		Note that we cannot hold any locks during the
17445748Smckusick 	 *		pager access or when waiting for memory, so
17545748Smckusick 	 *		we use a busy page then.
17645748Smckusick 	 *
17745748Smckusick 	 *		Note also that we aren't as concerned about
17845748Smckusick 	 *		more than one thead attempting to pager_data_unlock
17945748Smckusick 	 *		the same page at once, so we don't hold the page
18045748Smckusick 	 *		as busy then, but do record the highest unlock
18145748Smckusick 	 *		value so far.  [Unlock requests may also be delivered
18245748Smckusick 	 *		out of order.]
18345748Smckusick 	 *
18445748Smckusick 	 *	2)	Once we have a busy page, we must remove it from
18545748Smckusick 	 *		the pageout queues, so that the pageout daemon
18645748Smckusick 	 *		will not grab it away.
18745748Smckusick 	 *
18845748Smckusick 	 *	3)	To prevent another thread from racing us down the
18945748Smckusick 	 *		shadow chain and entering a new page in the top
19045748Smckusick 	 *		object before we do, we must keep a busy page in
19145748Smckusick 	 *		the top object while following the shadow chain.
19245748Smckusick 	 *
19345748Smckusick 	 *	4)	We must increment paging_in_progress on any object
19445748Smckusick 	 *		for which we have a busy page, to prevent
19545748Smckusick 	 *		vm_object_collapse from removing the busy page
19645748Smckusick 	 *		without our noticing.
19745748Smckusick 	 */
19845748Smckusick 
19945748Smckusick 	/*
20045748Smckusick 	 *	Search for the page at object/offset.
20145748Smckusick 	 */
20245748Smckusick 
20345748Smckusick 	object = first_object;
20445748Smckusick 	offset = first_offset;
20545748Smckusick 
20645748Smckusick 	/*
20745748Smckusick 	 *	See whether this page is resident
20845748Smckusick 	 */
20945748Smckusick 
21045748Smckusick 	while (TRUE) {
21145748Smckusick 		m = vm_page_lookup(object, offset);
21248386Skarels 		if (m != NULL) {
21345748Smckusick 			/*
21445748Smckusick 			 *	If the page is being brought in,
21545748Smckusick 			 *	wait for it and then retry.
21645748Smckusick 			 */
21756382Smckusick 			if (m->flags & PG_BUSY) {
21845748Smckusick #ifdef DOTHREADS
21945748Smckusick 				int	wait_result;
22045748Smckusick 
22145748Smckusick 				PAGE_ASSERT_WAIT(m, !change_wiring);
22245748Smckusick 				UNLOCK_THINGS;
22345748Smckusick 				thread_block();
22445748Smckusick 				wait_result = current_thread()->wait_result;
22545748Smckusick 				vm_object_deallocate(first_object);
22645748Smckusick 				if (wait_result != THREAD_AWAKENED)
22745748Smckusick 					return(KERN_SUCCESS);
22845748Smckusick 				goto RetryFault;
22945748Smckusick #else
23045748Smckusick 				PAGE_ASSERT_WAIT(m, !change_wiring);
23145748Smckusick 				UNLOCK_THINGS;
23245748Smckusick 				thread_block();
23345748Smckusick 				vm_object_deallocate(first_object);
23445748Smckusick 				goto RetryFault;
23545748Smckusick #endif
23645748Smckusick 			}
23745748Smckusick 
23845748Smckusick 			/*
23945748Smckusick 			 *	Remove the page from the pageout daemon's
24045748Smckusick 			 *	reach while we play with it.
24145748Smckusick 			 */
24245748Smckusick 
24345748Smckusick 			vm_page_lock_queues();
24456382Smckusick 			if (m->flags & PG_INACTIVE) {
245*65231Smckusick 				TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
24656382Smckusick 				m->flags &= ~PG_INACTIVE;
24750911Smckusick 				cnt.v_inactive_count--;
24850911Smckusick 				cnt.v_reactivated++;
24945748Smckusick 			}
25045748Smckusick 
25156382Smckusick 			if (m->flags & PG_ACTIVE) {
252*65231Smckusick 				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
25356382Smckusick 				m->flags &= ~PG_ACTIVE;
25450911Smckusick 				cnt.v_active_count--;
25545748Smckusick 			}
25645748Smckusick 			vm_page_unlock_queues();
25745748Smckusick 
25845748Smckusick 			/*
25945748Smckusick 			 *	Mark page busy for other threads.
26045748Smckusick 			 */
26156382Smckusick 			m->flags |= PG_BUSY;
26245748Smckusick 			break;
26345748Smckusick 		}
26445748Smckusick 
26548386Skarels 		if (((object->pager != NULL) &&
26645748Smckusick 				(!change_wiring || wired))
26745748Smckusick 		    || (object == first_object)) {
26845748Smckusick 
26945748Smckusick 			/*
27045748Smckusick 			 *	Allocate a new page for this object/offset
27145748Smckusick 			 *	pair.
27245748Smckusick 			 */
27345748Smckusick 
27445748Smckusick 			m = vm_page_alloc(object, offset);
27545748Smckusick 
27648386Skarels 			if (m == NULL) {
27745748Smckusick 				UNLOCK_AND_DEALLOCATE;
27845748Smckusick 				VM_WAIT;
27945748Smckusick 				goto RetryFault;
28045748Smckusick 			}
28145748Smckusick 		}
28245748Smckusick 
28356323Shibler 		if (object->pager != NULL && (!change_wiring || wired)) {
28445748Smckusick 			int rv;
28545748Smckusick 
28645748Smckusick 			/*
28745748Smckusick 			 *	Now that we have a busy page, we can
28845748Smckusick 			 *	release the object lock.
28945748Smckusick 			 */
29045748Smckusick 			vm_object_unlock(object);
29145748Smckusick 
29245748Smckusick 			/*
29345748Smckusick 			 *	Call the pager to retrieve the data, if any,
29445748Smckusick 			 *	after releasing the lock on the map.
29545748Smckusick 			 */
29645748Smckusick 			UNLOCK_MAP;
29756323Shibler 			rv = vm_pager_get(object->pager, m, TRUE);
29845748Smckusick 
29956323Shibler 			/*
30056323Shibler 			 *	Reaquire the object lock to preserve our
30156323Shibler 			 *	invariant.
30256323Shibler 			 */
30356323Shibler 			vm_object_lock(object);
30456323Shibler 
30556323Shibler 			/*
30656323Shibler 			 *	Found the page.
30756323Shibler 			 *	Leave it busy while we play with it.
30856323Shibler 			 */
30945748Smckusick 			if (rv == VM_PAGER_OK) {
31045748Smckusick 				/*
31145748Smckusick 				 *	Relookup in case pager changed page.
31245748Smckusick 				 *	Pager is responsible for disposition
31345748Smckusick 				 *	of old page if moved.
31445748Smckusick 				 */
31545748Smckusick 				m = vm_page_lookup(object, offset);
31645748Smckusick 
31750911Smckusick 				cnt.v_pageins++;
31856382Smckusick 				m->flags &= ~PG_FAKE;
31956382Smckusick 				m->flags |= PG_CLEAN;
32045748Smckusick 				pmap_clear_modify(VM_PAGE_TO_PHYS(m));
32145748Smckusick 				break;
32245748Smckusick 			}
32345748Smckusick 
32445748Smckusick 			/*
32556323Shibler 			 * IO error or page outside the range of the pager:
32656323Shibler 			 * cleanup and return an error.
32745748Smckusick 			 */
32856323Shibler 			if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
32945748Smckusick 				FREE_PAGE(m);
33045748Smckusick 				UNLOCK_AND_DEALLOCATE;
33145748Smckusick 				return(KERN_PROTECTION_FAILURE); /* XXX */
33245748Smckusick 			}
33356323Shibler 			/*
33456323Shibler 			 * rv == VM_PAGER_FAIL:
33556323Shibler 			 *
33656323Shibler 			 * Page does not exist at this object/offset.
33756323Shibler 			 * Free the bogus page (waking up anyone waiting
33856323Shibler 			 * for it) and continue on to the next object.
33956323Shibler 			 *
34056323Shibler 			 * If this is the top-level object, we must
34156323Shibler 			 * leave the busy page to prevent another
34256323Shibler 			 * thread from rushing past us, and inserting
34356323Shibler 			 * the page in that object at the same time
34456323Shibler 			 * that we are.
34556323Shibler 			 */
34645748Smckusick 			if (object != first_object) {
34745748Smckusick 				FREE_PAGE(m);
34853941Shibler 				/* note that `m' is not used after this */
34945748Smckusick 			}
35045748Smckusick 		}
35145748Smckusick 
35245748Smckusick 		/*
35345748Smckusick 		 * We get here if the object has no pager (or unwiring)
35445748Smckusick 		 * or the pager doesn't have the page.
35545748Smckusick 		 */
35645748Smckusick 		if (object == first_object)
35745748Smckusick 			first_m = m;
35845748Smckusick 
35945748Smckusick 		/*
36045748Smckusick 		 *	Move on to the next object.  Lock the next
36145748Smckusick 		 *	object before unlocking the current one.
36245748Smckusick 		 */
36345748Smckusick 
36445748Smckusick 		offset += object->shadow_offset;
36545748Smckusick 		next_object = object->shadow;
36648386Skarels 		if (next_object == NULL) {
36745748Smckusick 			/*
36845748Smckusick 			 *	If there's no object left, fill the page
36945748Smckusick 			 *	in the top object with zeros.
37045748Smckusick 			 */
37145748Smckusick 			if (object != first_object) {
37245748Smckusick 				object->paging_in_progress--;
37345748Smckusick 				vm_object_unlock(object);
37445748Smckusick 
37545748Smckusick 				object = first_object;
37645748Smckusick 				offset = first_offset;
37745748Smckusick 				m = first_m;
37845748Smckusick 				vm_object_lock(object);
37945748Smckusick 			}
38048386Skarels 			first_m = NULL;
38145748Smckusick 
38245748Smckusick 			vm_page_zero_fill(m);
38350911Smckusick 			cnt.v_zfod++;
38456920Shibler 			m->flags &= ~PG_FAKE;
38545748Smckusick 			break;
38645748Smckusick 		}
38745748Smckusick 		else {
38845748Smckusick 			vm_object_lock(next_object);
38945748Smckusick 			if (object != first_object)
39045748Smckusick 				object->paging_in_progress--;
39145748Smckusick 			vm_object_unlock(object);
39245748Smckusick 			object = next_object;
39345748Smckusick 			object->paging_in_progress++;
39445748Smckusick 		}
39545748Smckusick 	}
39645748Smckusick 
39756920Shibler 	if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY)
39856920Shibler 		panic("vm_fault: active, inactive or !busy after main loop");
39945748Smckusick 
40045748Smckusick 	/*
40145748Smckusick 	 *	PAGE HAS BEEN FOUND.
40245748Smckusick 	 *	[Loop invariant still holds -- the object lock
40345748Smckusick 	 *	is held.]
40445748Smckusick 	 */
40545748Smckusick 
40645748Smckusick 	old_m = m;	/* save page that would be copied */
40745748Smckusick 
40845748Smckusick 	/*
40945748Smckusick 	 *	If the page is being written, but isn't
41045748Smckusick 	 *	already owned by the top-level object,
41145748Smckusick 	 *	we have to copy it into a new page owned
41245748Smckusick 	 *	by the top-level object.
41345748Smckusick 	 */
41445748Smckusick 
41545748Smckusick 	if (object != first_object) {
41645748Smckusick 	    	/*
41745748Smckusick 		 *	We only really need to copy if we
41845748Smckusick 		 *	want to write it.
41945748Smckusick 		 */
42045748Smckusick 
42145748Smckusick 	    	if (fault_type & VM_PROT_WRITE) {
42245748Smckusick 
42345748Smckusick 			/*
42445748Smckusick 			 *	If we try to collapse first_object at this
42545748Smckusick 			 *	point, we may deadlock when we try to get
42645748Smckusick 			 *	the lock on an intermediate object (since we
42745748Smckusick 			 *	have the bottom object locked).  We can't
42845748Smckusick 			 *	unlock the bottom object, because the page
42945748Smckusick 			 *	we found may move (by collapse) if we do.
43045748Smckusick 			 *
43145748Smckusick 			 *	Instead, we first copy the page.  Then, when
43245748Smckusick 			 *	we have no more use for the bottom object,
43345748Smckusick 			 *	we unlock it and try to collapse.
43445748Smckusick 			 *
43545748Smckusick 			 *	Note that we copy the page even if we didn't
43645748Smckusick 			 *	need to... that's the breaks.
43745748Smckusick 			 */
43845748Smckusick 
43945748Smckusick 		    	/*
44045748Smckusick 			 *	We already have an empty page in
44145748Smckusick 			 *	first_object - use it.
44245748Smckusick 			 */
44345748Smckusick 
44445748Smckusick 			vm_page_copy(m, first_m);
44556920Shibler 			first_m->flags &= ~PG_FAKE;
44645748Smckusick 
44745748Smckusick 			/*
44845748Smckusick 			 *	If another map is truly sharing this
44945748Smckusick 			 *	page with us, we have to flush all
45045748Smckusick 			 *	uses of the original page, since we
45145748Smckusick 			 *	can't distinguish those which want the
45245748Smckusick 			 *	original from those which need the
45345748Smckusick 			 *	new copy.
45449288Shibler 			 *
45549288Shibler 			 *	XXX If we know that only one map has
45649288Shibler 			 *	access to this page, then we could
45749288Shibler 			 *	avoid the pmap_page_protect() call.
45845748Smckusick 			 */
45945748Smckusick 
46045748Smckusick 			vm_page_lock_queues();
46152904Smckusick 			vm_page_activate(m);
46264692Shibler 			vm_page_deactivate(m);
46349288Shibler 			pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
46445748Smckusick 			vm_page_unlock_queues();
46545748Smckusick 
46645748Smckusick 			/*
46745748Smckusick 			 *	We no longer need the old page or object.
46845748Smckusick 			 */
46945748Smckusick 			PAGE_WAKEUP(m);
47045748Smckusick 			object->paging_in_progress--;
47145748Smckusick 			vm_object_unlock(object);
47245748Smckusick 
47345748Smckusick 			/*
47445748Smckusick 			 *	Only use the new page below...
47545748Smckusick 			 */
47645748Smckusick 
47750911Smckusick 			cnt.v_cow_faults++;
47845748Smckusick 			m = first_m;
47945748Smckusick 			object = first_object;
48045748Smckusick 			offset = first_offset;
48145748Smckusick 
48245748Smckusick 			/*
48345748Smckusick 			 *	Now that we've gotten the copy out of the
48445748Smckusick 			 *	way, let's try to collapse the top object.
48545748Smckusick 			 */
48645748Smckusick 			vm_object_lock(object);
48745748Smckusick 			/*
48845748Smckusick 			 *	But we have to play ugly games with
48945748Smckusick 			 *	paging_in_progress to do that...
49045748Smckusick 			 */
49145748Smckusick 			object->paging_in_progress--;
49245748Smckusick 			vm_object_collapse(object);
49345748Smckusick 			object->paging_in_progress++;
49445748Smckusick 		}
49545748Smckusick 		else {
49645748Smckusick 		    	prot &= (~VM_PROT_WRITE);
49756382Smckusick 			m->flags |= PG_COPYONWRITE;
49845748Smckusick 		}
49945748Smckusick 	}
50045748Smckusick 
50156920Shibler 	if (m->flags & (PG_ACTIVE|PG_INACTIVE))
50245748Smckusick 		panic("vm_fault: active or inactive before copy object handling");
50345748Smckusick 
50445748Smckusick 	/*
50545748Smckusick 	 *	If the page is being written, but hasn't been
50645748Smckusick 	 *	copied to the copy-object, we have to copy it there.
50745748Smckusick 	 */
50845748Smckusick     RetryCopy:
50948386Skarels 	if (first_object->copy != NULL) {
51048386Skarels 		vm_object_t copy_object = first_object->copy;
51148386Skarels 		vm_offset_t copy_offset;
51248386Skarels 		vm_page_t copy_m;
51345748Smckusick 
51445748Smckusick 		/*
51545748Smckusick 		 *	We only need to copy if we want to write it.
51645748Smckusick 		 */
51745748Smckusick 		if ((fault_type & VM_PROT_WRITE) == 0) {
51845748Smckusick 			prot &= ~VM_PROT_WRITE;
51956382Smckusick 			m->flags |= PG_COPYONWRITE;
52045748Smckusick 		}
52145748Smckusick 		else {
52245748Smckusick 			/*
52345748Smckusick 			 *	Try to get the lock on the copy_object.
52445748Smckusick 			 */
52545748Smckusick 			if (!vm_object_lock_try(copy_object)) {
52645748Smckusick 				vm_object_unlock(object);
52745748Smckusick 				/* should spin a bit here... */
52845748Smckusick 				vm_object_lock(object);
52945748Smckusick 				goto RetryCopy;
53045748Smckusick 			}
53145748Smckusick 
53245748Smckusick 			/*
53345748Smckusick 			 *	Make another reference to the copy-object,
53445748Smckusick 			 *	to keep it from disappearing during the
53545748Smckusick 			 *	copy.
53645748Smckusick 			 */
53745748Smckusick 			copy_object->ref_count++;
53845748Smckusick 
53945748Smckusick 			/*
54045748Smckusick 			 *	Does the page exist in the copy?
54145748Smckusick 			 */
54245748Smckusick 			copy_offset = first_offset
54345748Smckusick 				- copy_object->shadow_offset;
54445748Smckusick 			copy_m = vm_page_lookup(copy_object, copy_offset);
54548386Skarels 			if (page_exists = (copy_m != NULL)) {
54656382Smckusick 				if (copy_m->flags & PG_BUSY) {
54745748Smckusick #ifdef DOTHREADS
54845748Smckusick 					int	wait_result;
54945748Smckusick 
55045748Smckusick 					/*
55145748Smckusick 					 *	If the page is being brought
55245748Smckusick 					 *	in, wait for it and then retry.
55345748Smckusick 					 */
55445748Smckusick 					PAGE_ASSERT_WAIT(copy_m, !change_wiring);
55545748Smckusick 					RELEASE_PAGE(m);
55645748Smckusick 					copy_object->ref_count--;
55745748Smckusick 					vm_object_unlock(copy_object);
55845748Smckusick 					UNLOCK_THINGS;
55945748Smckusick 					thread_block();
56045748Smckusick 					wait_result = current_thread()->wait_result;
56145748Smckusick 					vm_object_deallocate(first_object);
56245748Smckusick 					if (wait_result != THREAD_AWAKENED)
56345748Smckusick 						return(KERN_SUCCESS);
56445748Smckusick 					goto RetryFault;
56545748Smckusick #else
56645748Smckusick 					/*
56745748Smckusick 					 *	If the page is being brought
56845748Smckusick 					 *	in, wait for it and then retry.
56945748Smckusick 					 */
57045748Smckusick 					PAGE_ASSERT_WAIT(copy_m, !change_wiring);
57145748Smckusick 					RELEASE_PAGE(m);
57245748Smckusick 					copy_object->ref_count--;
57345748Smckusick 					vm_object_unlock(copy_object);
57445748Smckusick 					UNLOCK_THINGS;
57545748Smckusick 					thread_block();
57645748Smckusick 					vm_object_deallocate(first_object);
57745748Smckusick 					goto RetryFault;
57845748Smckusick #endif
57945748Smckusick 				}
58045748Smckusick 			}
58145748Smckusick 
58245748Smckusick 			/*
58345748Smckusick 			 *	If the page is not in memory (in the object)
58445748Smckusick 			 *	and the object has a pager, we have to check
58545748Smckusick 			 *	if the pager has the data in secondary
58645748Smckusick 			 *	storage.
58745748Smckusick 			 */
58845748Smckusick 			if (!page_exists) {
58945748Smckusick 
59045748Smckusick 				/*
59145748Smckusick 				 *	If we don't allocate a (blank) page
59245748Smckusick 				 *	here... another thread could try
59345748Smckusick 				 *	to page it in, allocate a page, and
59445748Smckusick 				 *	then block on the busy page in its
59545748Smckusick 				 *	shadow (first_object).  Then we'd
59645748Smckusick 				 *	trip over the busy page after we
59745748Smckusick 				 *	found that the copy_object's pager
59845748Smckusick 				 *	doesn't have the page...
59945748Smckusick 				 */
60045748Smckusick 				copy_m = vm_page_alloc(copy_object,
60145748Smckusick 								copy_offset);
60248386Skarels 				if (copy_m == NULL) {
60345748Smckusick 					/*
60445748Smckusick 					 *	Wait for a page, then retry.
60545748Smckusick 					 */
60645748Smckusick 					RELEASE_PAGE(m);
60745748Smckusick 					copy_object->ref_count--;
60845748Smckusick 					vm_object_unlock(copy_object);
60945748Smckusick 					UNLOCK_AND_DEALLOCATE;
61045748Smckusick 					VM_WAIT;
61145748Smckusick 					goto RetryFault;
61245748Smckusick 				}
61345748Smckusick 
61448386Skarels 			 	if (copy_object->pager != NULL) {
61545748Smckusick 					vm_object_unlock(object);
61645748Smckusick 					vm_object_unlock(copy_object);
61745748Smckusick 					UNLOCK_MAP;
61845748Smckusick 
61945748Smckusick 					page_exists = vm_pager_has_page(
62045748Smckusick 							copy_object->pager,
62145748Smckusick 							(copy_offset + copy_object->paging_offset));
62245748Smckusick 
62345748Smckusick 					vm_object_lock(copy_object);
62445748Smckusick 
62545748Smckusick 					/*
62645748Smckusick 					 * Since the map is unlocked, someone
62745748Smckusick 					 * else could have copied this object
62845748Smckusick 					 * and put a different copy_object
62945748Smckusick 					 * between the two.  Or, the last
63045748Smckusick 					 * reference to the copy-object (other
63145748Smckusick 					 * than the one we have) may have
63245748Smckusick 					 * disappeared - if that has happened,
63345748Smckusick 					 * we don't need to make the copy.
63445748Smckusick 					 */
63545748Smckusick 					if (copy_object->shadow != object ||
63645748Smckusick 					    copy_object->ref_count == 1) {
63745748Smckusick 						/*
63845748Smckusick 						 *	Gaah... start over!
63945748Smckusick 						 */
64045748Smckusick 						FREE_PAGE(copy_m);
64145748Smckusick 						vm_object_unlock(copy_object);
64245748Smckusick 						vm_object_deallocate(copy_object);
64345748Smckusick 							/* may block */
64445748Smckusick 						vm_object_lock(object);
64545748Smckusick 						goto RetryCopy;
64645748Smckusick 					}
64745748Smckusick 					vm_object_lock(object);
64845748Smckusick 
64945748Smckusick 					if (page_exists) {
65045748Smckusick 						/*
65145748Smckusick 						 *	We didn't need the page
65245748Smckusick 						 */
65345748Smckusick 						FREE_PAGE(copy_m);
65445748Smckusick 					}
65545748Smckusick 				}
65645748Smckusick 			}
65745748Smckusick 			if (!page_exists) {
65845748Smckusick 				/*
65945748Smckusick 				 *	Must copy page into copy-object.
66045748Smckusick 				 */
66145748Smckusick 				vm_page_copy(m, copy_m);
66256920Shibler 				copy_m->flags &= ~PG_FAKE;
66345748Smckusick 
66445748Smckusick 				/*
66545748Smckusick 				 * Things to remember:
66645748Smckusick 				 * 1. The copied page must be marked 'dirty'
66745748Smckusick 				 *    so it will be paged out to the copy
66845748Smckusick 				 *    object.
66945748Smckusick 				 * 2. If the old page was in use by any users
67045748Smckusick 				 *    of the copy-object, it must be removed
67145748Smckusick 				 *    from all pmaps.  (We can't know which
67245748Smckusick 				 *    pmaps use it.)
67345748Smckusick 				 */
67445748Smckusick 				vm_page_lock_queues();
67549288Shibler 				pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
67649288Shibler 						  VM_PROT_NONE);
67756382Smckusick 				copy_m->flags &= ~PG_CLEAN;
67845748Smckusick 				vm_page_activate(copy_m);	/* XXX */
67945748Smckusick 				vm_page_unlock_queues();
68045748Smckusick 
68145748Smckusick 				PAGE_WAKEUP(copy_m);
68245748Smckusick 			}
68345748Smckusick 			/*
68445748Smckusick 			 *	The reference count on copy_object must be
68545748Smckusick 			 *	at least 2: one for our extra reference,
68645748Smckusick 			 *	and at least one from the outside world
68745748Smckusick 			 *	(we checked that when we last locked
68845748Smckusick 			 *	copy_object).
68945748Smckusick 			 */
69045748Smckusick 			copy_object->ref_count--;
69145748Smckusick 			vm_object_unlock(copy_object);
69256382Smckusick 			m->flags &= ~PG_COPYONWRITE;
69345748Smckusick 		}
69445748Smckusick 	}
69545748Smckusick 
69656382Smckusick 	if (m->flags & (PG_ACTIVE | PG_INACTIVE))
69745748Smckusick 		panic("vm_fault: active or inactive before retrying lookup");
69845748Smckusick 
69945748Smckusick 	/*
70045748Smckusick 	 *	We must verify that the maps have not changed
70145748Smckusick 	 *	since our last lookup.
70245748Smckusick 	 */
70345748Smckusick 
70445748Smckusick 	if (!lookup_still_valid) {
70545748Smckusick 		vm_object_t	retry_object;
70645748Smckusick 		vm_offset_t	retry_offset;
70745748Smckusick 		vm_prot_t	retry_prot;
70845748Smckusick 
70945748Smckusick 		/*
71045748Smckusick 		 *	Since map entries may be pageable, make sure we can
71145748Smckusick 		 *	take a page fault on them.
71245748Smckusick 		 */
71345748Smckusick 		vm_object_unlock(object);
71445748Smckusick 
71545748Smckusick 		/*
71645748Smckusick 		 *	To avoid trying to write_lock the map while another
71745748Smckusick 		 *	thread has it read_locked (in vm_map_pageable), we
71845748Smckusick 		 *	do not try for write permission.  If the page is
71945748Smckusick 		 *	still writable, we will get write permission.  If it
72045748Smckusick 		 *	is not, or has been marked needs_copy, we enter the
72145748Smckusick 		 *	mapping without write permission, and will merely
72245748Smckusick 		 *	take another fault.
72345748Smckusick 		 */
72445748Smckusick 		result = vm_map_lookup(&map, vaddr,
72545748Smckusick 				fault_type & ~VM_PROT_WRITE, &entry,
72645748Smckusick 				&retry_object, &retry_offset, &retry_prot,
72745748Smckusick 				&wired, &su);
72845748Smckusick 
72945748Smckusick 		vm_object_lock(object);
73045748Smckusick 
73145748Smckusick 		/*
73245748Smckusick 		 *	If we don't need the page any longer, put it on the
73345748Smckusick 		 *	active list (the easiest thing to do here).  If no
73445748Smckusick 		 *	one needs it, pageout will grab it eventually.
73545748Smckusick 		 */
73645748Smckusick 
73745748Smckusick 		if (result != KERN_SUCCESS) {
73845748Smckusick 			RELEASE_PAGE(m);
73945748Smckusick 			UNLOCK_AND_DEALLOCATE;
74045748Smckusick 			return(result);
74145748Smckusick 		}
74245748Smckusick 
74345748Smckusick 		lookup_still_valid = TRUE;
74445748Smckusick 
74545748Smckusick 		if ((retry_object != first_object) ||
74645748Smckusick 				(retry_offset != first_offset)) {
74745748Smckusick 			RELEASE_PAGE(m);
74845748Smckusick 			UNLOCK_AND_DEALLOCATE;
74945748Smckusick 			goto RetryFault;
75045748Smckusick 		}
75145748Smckusick 
75245748Smckusick 		/*
75345748Smckusick 		 *	Check whether the protection has changed or the object
75445748Smckusick 		 *	has been copied while we left the map unlocked.
75545748Smckusick 		 *	Changing from read to write permission is OK - we leave
75645748Smckusick 		 *	the page write-protected, and catch the write fault.
75745748Smckusick 		 *	Changing from write to read permission means that we
75845748Smckusick 		 *	can't mark the page write-enabled after all.
75945748Smckusick 		 */
76045748Smckusick 		prot &= retry_prot;
76156382Smckusick 		if (m->flags & PG_COPYONWRITE)
76245748Smckusick 			prot &= ~VM_PROT_WRITE;
76345748Smckusick 	}
76445748Smckusick 
76545748Smckusick 	/*
76645748Smckusick 	 * (the various bits we're fiddling with here are locked by
76745748Smckusick 	 * the object's lock)
76845748Smckusick 	 */
76945748Smckusick 
77045748Smckusick 	/* XXX This distorts the meaning of the copy_on_write bit */
77145748Smckusick 
77245748Smckusick 	if (prot & VM_PROT_WRITE)
77356382Smckusick 		m->flags &= ~PG_COPYONWRITE;
77445748Smckusick 
77545748Smckusick 	/*
77645748Smckusick 	 *	It's critically important that a wired-down page be faulted
77745748Smckusick 	 *	only once in each map for which it is wired.
77845748Smckusick 	 */
77945748Smckusick 
78056382Smckusick 	if (m->flags & (PG_ACTIVE | PG_INACTIVE))
78145748Smckusick 		panic("vm_fault: active or inactive before pmap_enter");
78245748Smckusick 
78345748Smckusick 	vm_object_unlock(object);
78445748Smckusick 
78545748Smckusick 	/*
78645748Smckusick 	 *	Put this page into the physical map.
78745748Smckusick 	 *	We had to do the unlock above because pmap_enter
78845748Smckusick 	 *	may cause other faults.   We don't put the
78945748Smckusick 	 *	page back on the active queue until later so
79045748Smckusick 	 *	that the page-out daemon won't find us (yet).
79145748Smckusick 	 */
79245748Smckusick 
79356920Shibler 	pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
79445748Smckusick 
79545748Smckusick 	/*
79645748Smckusick 	 *	If the page is not wired down, then put it where the
79745748Smckusick 	 *	pageout daemon can find it.
79845748Smckusick 	 */
79945748Smckusick 	vm_object_lock(object);
80045748Smckusick 	vm_page_lock_queues();
80145748Smckusick 	if (change_wiring) {
80245748Smckusick 		if (wired)
80345748Smckusick 			vm_page_wire(m);
80445748Smckusick 		else
80545748Smckusick 			vm_page_unwire(m);
80645748Smckusick 	}
80745748Smckusick 	else
80845748Smckusick 		vm_page_activate(m);
80945748Smckusick 	vm_page_unlock_queues();
81045748Smckusick 
81145748Smckusick 	/*
81245748Smckusick 	 *	Unlock everything, and return
81345748Smckusick 	 */
81445748Smckusick 
81545748Smckusick 	PAGE_WAKEUP(m);
81645748Smckusick 	UNLOCK_AND_DEALLOCATE;
81745748Smckusick 
81845748Smckusick 	return(KERN_SUCCESS);
81945748Smckusick 
82045748Smckusick }
82145748Smckusick 
82245748Smckusick /*
82345748Smckusick  *	vm_fault_wire:
82445748Smckusick  *
82545748Smckusick  *	Wire down a range of virtual addresses in a map.
82645748Smckusick  */
82758597Shibler int
82858597Shibler vm_fault_wire(map, start, end)
82945748Smckusick 	vm_map_t	map;
83045748Smckusick 	vm_offset_t	start, end;
83145748Smckusick {
83245748Smckusick 	register vm_offset_t	va;
83345748Smckusick 	register pmap_t		pmap;
83458597Shibler 	int			rv;
83545748Smckusick 
83645748Smckusick 	pmap = vm_map_pmap(map);
83745748Smckusick 
83845748Smckusick 	/*
83945748Smckusick 	 *	Inform the physical mapping system that the
84045748Smckusick 	 *	range of addresses may not fault, so that
84145748Smckusick 	 *	page tables and such can be locked down as well.
84245748Smckusick 	 */
84345748Smckusick 
84445748Smckusick 	pmap_pageable(pmap, start, end, FALSE);
84545748Smckusick 
84645748Smckusick 	/*
84745748Smckusick 	 *	We simulate a fault to get the page and enter it
84845748Smckusick 	 *	in the physical map.
84945748Smckusick 	 */
85045748Smckusick 
85145748Smckusick 	for (va = start; va < end; va += PAGE_SIZE) {
85258597Shibler 		rv = vm_fault(map, va, VM_PROT_NONE, TRUE);
85358597Shibler 		if (rv) {
85458597Shibler 			if (va != start)
85558597Shibler 				vm_fault_unwire(map, start, va);
85658597Shibler 			return(rv);
85758597Shibler 		}
85845748Smckusick 	}
85958597Shibler 	return(KERN_SUCCESS);
86045748Smckusick }
86145748Smckusick 
86245748Smckusick 
86345748Smckusick /*
86445748Smckusick  *	vm_fault_unwire:
86545748Smckusick  *
86645748Smckusick  *	Unwire a range of virtual addresses in a map.
86745748Smckusick  */
86845748Smckusick void vm_fault_unwire(map, start, end)
86945748Smckusick 	vm_map_t	map;
87045748Smckusick 	vm_offset_t	start, end;
87145748Smckusick {
87245748Smckusick 
87345748Smckusick 	register vm_offset_t	va, pa;
87445748Smckusick 	register pmap_t		pmap;
87545748Smckusick 
87645748Smckusick 	pmap = vm_map_pmap(map);
87745748Smckusick 
87845748Smckusick 	/*
87945748Smckusick 	 *	Since the pages are wired down, we must be able to
88045748Smckusick 	 *	get their mappings from the physical map system.
88145748Smckusick 	 */
88245748Smckusick 
88345748Smckusick 	vm_page_lock_queues();
88445748Smckusick 
88545748Smckusick 	for (va = start; va < end; va += PAGE_SIZE) {
88645748Smckusick 		pa = pmap_extract(pmap, va);
88745748Smckusick 		if (pa == (vm_offset_t) 0) {
88845748Smckusick 			panic("unwire: page not in pmap");
88945748Smckusick 		}
89045748Smckusick 		pmap_change_wiring(pmap, va, FALSE);
89145748Smckusick 		vm_page_unwire(PHYS_TO_VM_PAGE(pa));
89245748Smckusick 	}
89345748Smckusick 	vm_page_unlock_queues();
89445748Smckusick 
89545748Smckusick 	/*
89645748Smckusick 	 *	Inform the physical mapping system that the range
89745748Smckusick 	 *	of addresses may fault, so that page tables and
89845748Smckusick 	 *	such may be unwired themselves.
89945748Smckusick 	 */
90045748Smckusick 
90145748Smckusick 	pmap_pageable(pmap, start, end, TRUE);
90245748Smckusick 
90345748Smckusick }
90445748Smckusick 
90545748Smckusick /*
90645748Smckusick  *	Routine:
90745748Smckusick  *		vm_fault_copy_entry
90845748Smckusick  *	Function:
90945748Smckusick  *		Copy all of the pages from a wired-down map entry to another.
91045748Smckusick  *
91145748Smckusick  *	In/out conditions:
91245748Smckusick  *		The source and destination maps must be locked for write.
91345748Smckusick  *		The source map entry must be wired down (or be a sharing map
91445748Smckusick  *		entry corresponding to a main map entry that is wired down).
91545748Smckusick  */
91645748Smckusick 
91745748Smckusick void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
91845748Smckusick 	vm_map_t	dst_map;
91945748Smckusick 	vm_map_t	src_map;
92045748Smckusick 	vm_map_entry_t	dst_entry;
92145748Smckusick 	vm_map_entry_t	src_entry;
92245748Smckusick {
92345748Smckusick 
92445748Smckusick 	vm_object_t	dst_object;
92545748Smckusick 	vm_object_t	src_object;
92645748Smckusick 	vm_offset_t	dst_offset;
92745748Smckusick 	vm_offset_t	src_offset;
92845748Smckusick 	vm_prot_t	prot;
92945748Smckusick 	vm_offset_t	vaddr;
93045748Smckusick 	vm_page_t	dst_m;
93145748Smckusick 	vm_page_t	src_m;
93245748Smckusick 
93345748Smckusick #ifdef	lint
93445748Smckusick 	src_map++;
93560345Storek #endif
93645748Smckusick 
93745748Smckusick 	src_object = src_entry->object.vm_object;
93845748Smckusick 	src_offset = src_entry->offset;
93945748Smckusick 
94045748Smckusick 	/*
94145748Smckusick 	 *	Create the top-level object for the destination entry.
94245748Smckusick 	 *	(Doesn't actually shadow anything - we copy the pages
94345748Smckusick 	 *	directly.)
94445748Smckusick 	 */
94545748Smckusick 	dst_object = vm_object_allocate(
94645748Smckusick 			(vm_size_t) (dst_entry->end - dst_entry->start));
94745748Smckusick 
94845748Smckusick 	dst_entry->object.vm_object = dst_object;
94945748Smckusick 	dst_entry->offset = 0;
95045748Smckusick 
95145748Smckusick 	prot  = dst_entry->max_protection;
95245748Smckusick 
95345748Smckusick 	/*
95445748Smckusick 	 *	Loop through all of the pages in the entry's range, copying
95545748Smckusick 	 *	each one from the source object (it should be there) to the
95645748Smckusick 	 *	destination object.
95745748Smckusick 	 */
95845748Smckusick 	for (vaddr = dst_entry->start, dst_offset = 0;
95945748Smckusick 	     vaddr < dst_entry->end;
96045748Smckusick 	     vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
96145748Smckusick 
96245748Smckusick 		/*
96345748Smckusick 		 *	Allocate a page in the destination object
96445748Smckusick 		 */
96545748Smckusick 		vm_object_lock(dst_object);
96645748Smckusick 		do {
96745748Smckusick 			dst_m = vm_page_alloc(dst_object, dst_offset);
96848386Skarels 			if (dst_m == NULL) {
96945748Smckusick 				vm_object_unlock(dst_object);
97045748Smckusick 				VM_WAIT;
97145748Smckusick 				vm_object_lock(dst_object);
97245748Smckusick 			}
97348386Skarels 		} while (dst_m == NULL);
97445748Smckusick 
97545748Smckusick 		/*
97645748Smckusick 		 *	Find the page in the source object, and copy it in.
97745748Smckusick 		 *	(Because the source is wired down, the page will be
97845748Smckusick 		 *	in memory.)
97945748Smckusick 		 */
98045748Smckusick 		vm_object_lock(src_object);
98145748Smckusick 		src_m = vm_page_lookup(src_object, dst_offset + src_offset);
98248386Skarels 		if (src_m == NULL)
98345748Smckusick 			panic("vm_fault_copy_wired: page missing");
98445748Smckusick 
98545748Smckusick 		vm_page_copy(src_m, dst_m);
98645748Smckusick 
98745748Smckusick 		/*
98845748Smckusick 		 *	Enter it in the pmap...
98945748Smckusick 		 */
99045748Smckusick 		vm_object_unlock(src_object);
99145748Smckusick 		vm_object_unlock(dst_object);
99245748Smckusick 
99345748Smckusick 		pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
99445748Smckusick 				prot, FALSE);
99545748Smckusick 
99645748Smckusick 		/*
99745748Smckusick 		 *	Mark it no longer busy, and put it on the active list.
99845748Smckusick 		 */
99945748Smckusick 		vm_object_lock(dst_object);
100045748Smckusick 		vm_page_lock_queues();
100145748Smckusick 		vm_page_activate(dst_m);
100245748Smckusick 		vm_page_unlock_queues();
100345748Smckusick 		PAGE_WAKEUP(dst_m);
100445748Smckusick 		vm_object_unlock(dst_object);
100545748Smckusick 	}
100645748Smckusick 
100745748Smckusick }
1008