xref: /netbsd-src/sys/arch/alpha/alpha/pmap.c (revision 179b12252ecaf3553d9c2b7458ce62b6a2203d0c)
1 /* $NetBSD: pmap.c,v 1.252 2009/11/26 00:19:11 matt Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and by Chris G. Demetriou.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
66  */
67 
68 /*
69  * DEC Alpha physical map management code.
70  *
71  * History:
72  *
73  *	This pmap started life as a Motorola 68851/68030 pmap,
74  *	written by Mike Hibler at the University of Utah.
75  *
76  *	It was modified for the DEC Alpha by Chris Demetriou
77  *	at Carnegie Mellon University.
78  *
79  *	Support for non-contiguous physical memory was added by
80  *	Jason R. Thorpe of the Numerical Aerospace Simulation
81  *	Facility, NASA Ames Research Center and Chris Demetriou.
82  *
83  *	Page table management and a major cleanup were undertaken
84  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
85  *	Avalon Computer Systems and from Chris Demetriou.
86  *
87  *	Support for the new UVM pmap interface was written by
88  *	Jason R. Thorpe.
89  *
90  *	Support for ASNs was written by Jason R. Thorpe, again
91  *	with help from Chris Demetriou and Ross Harvey.
92  *
93  *	The locking protocol was written by Jason R. Thorpe,
94  *	using Chuck Cranor's i386 pmap for UVM as a model.
95  *
96  *	TLB shootdown code was written by Jason R. Thorpe.
97  *
98  *	Multiprocessor modifications by Andrew Doran.
99  *
100  * Notes:
101  *
102  *	All page table access is done via K0SEG.  The one exception
103  *	to this is for kernel mappings.  Since all kernel page
104  *	tables are pre-allocated, we can use the Virtual Page Table
105  *	to access PTEs that map K1SEG addresses.
106  *
107  *	Kernel page table pages are statically allocated in
108  *	pmap_bootstrap(), and are never freed.  In the future,
109  *	support for dynamically adding additional kernel page
110  *	table pages may be added.  User page table pages are
111  *	dynamically allocated and freed.
112  *
113  * Bugs/misfeatures:
114  *
115  *	- Some things could be optimized.
116  */
117 
118 /*
119  *	Manages physical address maps.
120  *
121  *	Since the information managed by this module is
122  *	also stored by the logical address mapping module,
123  *	this module may throw away valid virtual-to-physical
124  *	mappings at almost any time.  However, invalidations
125  *	of virtual-to-physical mappings must be done as
126  *	requested.
127  *
128  *	In order to cope with hardware architectures which
129  *	make virtual-to-physical map invalidates expensive,
130  *	this module may delay invalidate or reduced protection
131  *	operations until such time as they are actually
132  *	necessary.  This module is given full information as
133  *	to which processors are currently using which maps,
134  *	and to when physical maps must be made correct.
135  */
136 
137 #include "opt_lockdebug.h"
138 #include "opt_sysv.h"
139 #include "opt_multiprocessor.h"
140 
141 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
142 
143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.252 2009/11/26 00:19:11 matt Exp $");
144 
145 #include <sys/param.h>
146 #include <sys/systm.h>
147 #include <sys/kernel.h>
148 #include <sys/proc.h>
149 #include <sys/malloc.h>
150 #include <sys/pool.h>
151 #include <sys/buf.h>
152 #include <sys/shm.h>
153 #include <sys/atomic.h>
154 #include <sys/cpu.h>
155 
156 #include <uvm/uvm.h>
157 
158 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
159 #include <machine/rpb.h>
160 #endif
161 
162 #ifdef DEBUG
163 #define	PDB_FOLLOW	0x0001
164 #define	PDB_INIT	0x0002
165 #define	PDB_ENTER	0x0004
166 #define	PDB_REMOVE	0x0008
167 #define	PDB_CREATE	0x0010
168 #define	PDB_PTPAGE	0x0020
169 #define	PDB_ASN		0x0040
170 #define	PDB_BITS	0x0080
171 #define	PDB_COLLECT	0x0100
172 #define	PDB_PROTECT	0x0200
173 #define	PDB_BOOTSTRAP	0x1000
174 #define	PDB_PARANOIA	0x2000
175 #define	PDB_WIRING	0x4000
176 #define	PDB_PVDUMP	0x8000
177 
178 int debugmap = 0;
179 int pmapdebug = PDB_PARANOIA;
180 #endif
181 
182 /*
183  * Given a map and a machine independent protection code,
184  * convert to an alpha protection code.
185  */
186 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
187 static int	protection_codes[2][8];
188 
189 /*
190  * kernel_lev1map:
191  *
192  *	Kernel level 1 page table.  This maps all kernel level 2
193  *	page table pages, and is used as a template for all user
194  *	pmap level 1 page tables.  When a new user level 1 page
195  *	table is allocated, all kernel_lev1map PTEs for kernel
196  *	addresses are copied to the new map.
197  *
198  *	The kernel also has an initial set of kernel level 2 page
199  *	table pages.  These map the kernel level 3 page table pages.
200  *	As kernel level 3 page table pages are added, more level 2
201  *	page table pages may be added to map them.  These pages are
202  *	never freed.
203  *
204  *	Finally, the kernel also has an initial set of kernel level
205  *	3 page table pages.  These map pages in K1SEG.  More level
206  *	3 page table pages may be added at run-time if additional
207  *	K1SEG address space is required.  These pages are never freed.
208  *
209  * NOTE: When mappings are inserted into the kernel pmap, all
210  * level 2 and level 3 page table pages must already be allocated
211  * and mapped into the parent page table.
212  */
213 pt_entry_t	*kernel_lev1map;
214 
215 /*
216  * Virtual Page Table.
217  */
218 static pt_entry_t *VPT;
219 
220 static struct pmap	kernel_pmap_store
221 	[(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1)
222 		/ sizeof(struct pmap)];
223 struct pmap *const kernel_pmap_ptr = kernel_pmap_store;
224 
225 paddr_t    	avail_start;	/* PA of first available physical page */
226 paddr_t		avail_end;	/* PA of last available physical page */
227 static vaddr_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
228 
229 static bool pmap_initialized;	/* Has pmap_init completed? */
230 
231 u_long		pmap_pages_stolen;	/* instrumentation */
232 
233 /*
234  * This variable contains the number of CPU IDs we need to allocate
235  * space for when allocating the pmap structure.  It is used to
236  * size a per-CPU array of ASN and ASN Generation number.
237  */
238 static u_long 	pmap_ncpuids;
239 
240 #ifndef PMAP_PV_LOWAT
241 #define	PMAP_PV_LOWAT	16
242 #endif
243 int		pmap_pv_lowat = PMAP_PV_LOWAT;
244 
245 /*
246  * List of all pmaps, used to update them when e.g. additional kernel
247  * page tables are allocated.  This list is kept LRU-ordered by
248  * pmap_activate().
249  */
250 static TAILQ_HEAD(, pmap) pmap_all_pmaps;
251 
252 /*
253  * The pools from which pmap structures and sub-structures are allocated.
254  */
255 static struct pool_cache pmap_pmap_cache;
256 static struct pool_cache pmap_l1pt_cache;
257 static struct pool_cache pmap_pv_cache;
258 
259 /*
260  * Address Space Numbers.
261  *
262  * On many implementations of the Alpha architecture, the TLB entries and
263  * I-cache blocks are tagged with a unique number within an implementation-
264  * specified range.  When a process context becomes active, the ASN is used
265  * to match TLB entries; if a TLB entry for a particular VA does not match
266  * the current ASN, it is ignored (one could think of the processor as
267  * having a collection of <max ASN> separate TLBs).  This allows operating
268  * system software to skip the TLB flush that would otherwise be necessary
269  * at context switch time.
270  *
271  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
272  * causes TLB entries to match any ASN.  The PALcode also provides
273  * a TBI (Translation Buffer Invalidate) operation that flushes all
274  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
275  * mappings, so that invalidation of all user mappings does not invalidate
276  * kernel mappings (which are consistent across all processes).
277  *
278  * pmap_next_asn always indicates to the next ASN to use.  When
279  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
280  *
281  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
282  * TLB entries and the I-cache are flushed, the generation number is bumped,
283  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
284  *
285  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
286  * prevents the following scenario:
287  *
288  *	* New ASN generation starts, and process A is given ASN #0.
289  *
290  *	* A new process B (and thus new pmap) is created.  The ASN,
291  *	  for lack of a better value, is initialized to 0.
292  *
293  *	* Process B runs.  It is now using the TLB entries tagged
294  *	  by process A.  *poof*
295  *
296  * In the scenario above, in addition to the processor using using incorrect
297  * TLB entires, the PALcode might use incorrect information to service a
298  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
299  * to locate the PTE for a faulting address, and tagged TLB entires exist
300  * for the Virtual Page Table addresses in order to speed up this procedure,
301  * as well.)
302  *
303  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
304  * new pmaps will initially run with no TLB entries for user addresses
305  * or VPT mappings that map user page tables.  Since kernel_lev1map only
306  * contains mappings for kernel addresses, and since those mappings
307  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
308  * safe (since PG_ASM mappings match any ASN).
309  *
310  * On processors that do not support ASNs, the PALcode invalidates
311  * the TLB and I-cache automatically on swpctx.  We still still go
312  * through the motions of assigning an ASN (really, just refreshing
313  * the ASN generation in this particular case) to keep the logic sane
314  * in other parts of the code.
315  */
316 static u_int	pmap_max_asn;		/* max ASN supported by the system */
317 					/* next ASN and cur ASN generation */
318 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
319 
320 /*
321  * Locking:
322  *
323  *	READ/WRITE LOCKS
324  *	----------------
325  *
326  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
327  *	  provide mutex access to the pmap module.  Most operations lock
328  *	  the pmap first, then PV lists as needed.  However, some operations,
329  *	  such as pmap_page_protect(), lock the PV lists before locking
330  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
331  *	  pmap module if locking in the PV->pmap direction.  This is
332  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
333  *	  if locking pmap->PV and a (exclusive) write lock if locking in
334  *	  the PV->pmap direction.  Since only one thread can hold a write
335  *	  lock at a time, this provides the mutex.
336  *
337  *	MUTEXES
338  *	-------
339  *
340  *	* pm_lock (per-pmap) - This lock protects all of the members
341  *	  of the pmap structure itself.  This lock will be asserted
342  *	  in pmap_activate() and pmap_deactivate() from a critical
343  *	  section of mi_switch(), and must never sleep.  Note that
344  *	  in the case of the kernel pmap, interrupts which cause
345  *	  memory allocation *must* be blocked while this lock is
346  *	  asserted.
347  *
348  *	* pvh_lock (global hash) - These locks protects the PV lists
349  *	  for managed pages.
350  *
351  *	* pmap_all_pmaps_lock - This lock protects the global list of
352  *	  all pmaps.  Note that a pm_lock must never be held while this
353  *	  lock is held.
354  *
355  *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
356  *	  and the virtual_end variable.
357  *
358  *	  There is a lock ordering constraint for pmap_growkernel_lock.
359  *	  pmap_growkernel() acquires the locks in the following order:
360  *
361  *		pmap_growkernel_lock (write) -> pmap_all_pmaps_lock ->
362  *		    pmap->pm_lock
363  *
364  *	  We need to ensure consistency between user pmaps and the
365  *	  kernel_lev1map.  For this reason, pmap_growkernel_lock must
366  *	  be held to prevent kernel_lev1map changing across pmaps
367  *	  being added to / removed from the global pmaps list.
368  *
369  *	Address space number management (global ASN counters and per-pmap
370  *	ASN state) are not locked; they use arrays of values indexed
371  *	per-processor.
372  *
373  *	All internal functions which operate on a pmap are called
374  *	with the pmap already locked by the caller (which will be
375  *	an interface function).
376  */
377 static krwlock_t pmap_main_lock;
378 static kmutex_t pmap_all_pmaps_lock;
379 static krwlock_t pmap_growkernel_lock;
380 
381 #define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
382 #define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
383 #define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
384 #define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
385 
386 struct {
387 	kmutex_t lock;
388 } __aligned(64) static pmap_pvh_locks[64] __aligned(64);
389 
390 static inline kmutex_t *
391 pmap_pvh_lock(struct vm_page *pg)
392 {
393 
394 	/* Cut bits 11-6 out of page address and use directly as offset. */
395 	return (kmutex_t *)((uintptr_t)&pmap_pvh_locks +
396 	    ((uintptr_t)pg & (63 << 6)));
397 }
398 
399 #if defined(MULTIPROCESSOR)
400 /*
401  * TLB Shootdown:
402  *
403  * When a mapping is changed in a pmap, the TLB entry corresponding to
404  * the virtual address must be invalidated on all processors.  In order
405  * to accomplish this on systems with multiple processors, messages are
406  * sent from the processor which performs the mapping change to all
407  * processors on which the pmap is active.  For other processors, the
408  * ASN generation numbers for that processor is invalidated, so that
409  * the next time the pmap is activated on that processor, a new ASN
410  * will be allocated (which implicitly invalidates all TLB entries).
411  *
412  * Note, we can use the pool allocator to allocate job entries
413  * since pool pages are mapped with K0SEG, not with the TLB.
414  */
415 struct pmap_tlb_shootdown_job {
416 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
417 	vaddr_t pj_va;			/* virtual address */
418 	pmap_t pj_pmap;			/* the pmap which maps the address */
419 	pt_entry_t pj_pte;		/* the PTE bits */
420 };
421 
422 static struct pmap_tlb_shootdown_q {
423 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;	/* queue 16b */
424 	kmutex_t pq_lock;		/* spin lock on queue 16b */
425 	int pq_pte;			/* aggregate PTE bits 4b */
426 	int pq_count;			/* number of pending requests 4b */
427 	int pq_tbia;			/* pending global flush 4b */
428 	uint8_t pq_pad[64-16-16-4-4-4];	/* pad to 64 bytes */
429 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE);
430 
431 /* If we have more pending jobs than this, we just nail the whole TLB. */
432 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
433 
434 static struct pool_cache pmap_tlb_shootdown_job_cache;
435 #endif /* MULTIPROCESSOR */
436 
437 /*
438  * Internal routines
439  */
440 static void	alpha_protection_init(void);
441 static bool	pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long);
442 static void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long);
443 
444 /*
445  * PT page management functions.
446  */
447 static int	pmap_lev1map_create(pmap_t, long);
448 static void	pmap_lev1map_destroy(pmap_t, long);
449 static int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
450 static void	pmap_ptpage_free(pmap_t, pt_entry_t *);
451 static void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long);
452 static void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long);
453 static void	pmap_l1pt_delref(pmap_t, pt_entry_t *, long);
454 
455 static void	*pmap_l1pt_alloc(struct pool *, int);
456 static void	pmap_l1pt_free(struct pool *, void *);
457 
458 static struct pool_allocator pmap_l1pt_allocator = {
459 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
460 };
461 
462 static int	pmap_l1pt_ctor(void *, void *, int);
463 
464 /*
465  * PV table management functions.
466  */
467 static int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
468 			      bool);
469 static void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool);
470 static void	*pmap_pv_page_alloc(struct pool *, int);
471 static void	pmap_pv_page_free(struct pool *, void *);
472 
473 static struct pool_allocator pmap_pv_page_allocator = {
474 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
475 };
476 
477 #ifdef DEBUG
478 void	pmap_pv_dump(paddr_t);
479 #endif
480 
481 #define	pmap_pv_alloc()		pool_cache_get(&pmap_pv_cache, PR_NOWAIT)
482 #define	pmap_pv_free(pv)	pool_cache_put(&pmap_pv_cache, (pv))
483 
484 /*
485  * ASN management functions.
486  */
487 static void	pmap_asn_alloc(pmap_t, long);
488 
489 /*
490  * Misc. functions.
491  */
492 static bool	pmap_physpage_alloc(int, paddr_t *);
493 static void	pmap_physpage_free(paddr_t);
494 static int	pmap_physpage_addref(void *);
495 static int	pmap_physpage_delref(void *);
496 
497 /*
498  * PMAP_ISACTIVE{,_TEST}:
499  *
500  *	Check to see if a pmap is active on the current processor.
501  */
502 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
503 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
504 
505 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
506 #define	PMAP_ISACTIVE(pm, cpu_id)					\
507 ({									\
508 	/*								\
509 	 * XXX This test is not MP-safe.				\
510 	 */								\
511 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
512 									\
513 	if ((curlwp->l_flag & LW_IDLE) != 0 &&				\
514 	    curproc->p_vmspace != NULL &&				\
515 	   ((curproc->p_sflag & PS_WEXIT) == 0) &&			\
516 	   (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
517 		panic("PMAP_ISACTIVE");					\
518 	(isactive_);							\
519 })
520 #else
521 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
522 #endif /* DEBUG && !MULTIPROCESSOR */
523 
524 /*
525  * PMAP_ACTIVATE_ASN_SANITY:
526  *
527  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
528  */
529 #ifdef DEBUG
530 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
531 do {									\
532 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
533 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
534 									\
535 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
536 		/*							\
537 		 * This pmap implementation also ensures that pmaps	\
538 		 * referencing kernel_lev1map use a reserved ASN	\
539 		 * ASN to prevent the PALcode from servicing a TLB	\
540 		 * miss	with the wrong PTE.				\
541 		 */							\
542 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
543 			printf("kernel_lev1map with non-reserved ASN "	\
544 			    "(line %d)\n", __LINE__);			\
545 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
546 		}							\
547 	} else {							\
548 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
549 			/*						\
550 			 * ASN generation number isn't valid!		\
551 			 */						\
552 			printf("pmap asngen %lu, current %lu "		\
553 			    "(line %d)\n",				\
554 			    __pma->pma_asngen,				\
555 			    __cpma->pma_asngen,				\
556 			    __LINE__);					\
557 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
558 		}							\
559 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
560 			/*						\
561 			 * DANGER WILL ROBINSON!  We're going to	\
562 			 * pollute the VPT TLB entries!			\
563 			 */						\
564 			printf("Using reserved ASN! (line %d)\n",	\
565 			    __LINE__);					\
566 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
567 		}							\
568 	}								\
569 } while (/*CONSTCOND*/0)
570 #else
571 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
572 #endif
573 
574 /*
575  * PMAP_ACTIVATE:
576  *
577  *	This is essentially the guts of pmap_activate(), without
578  *	ASN allocation.  This is used by pmap_activate(),
579  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
580  *
581  *	This is called only when it is known that a pmap is "active"
582  *	on the current processor; the ASN must already be valid.
583  */
584 #define	PMAP_ACTIVATE(pmap, l, cpu_id)					\
585 do {									\
586 	struct pcb *pcb = lwp_getpcb(l);				\
587 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
588 									\
589 	pcb->pcb_hw.apcb_ptbr =				\
590 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
591 	pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn;	\
592 									\
593 	if ((l) == curlwp) {						\
594 		/*							\
595 		 * Page table base register has changed; switch to	\
596 		 * our own context again so that it will take effect.	\
597 		 */							\
598 		(void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr);	\
599 	}								\
600 } while (/*CONSTCOND*/0)
601 
602 /*
603  * PMAP_SET_NEEDISYNC:
604  *
605  *	Mark that a user pmap needs an I-stream synch on its
606  *	way back out to userspace.
607  */
608 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
609 
610 /*
611  * PMAP_SYNC_ISTREAM:
612  *
613  *	Synchronize the I-stream for the specified pmap.  For user
614  *	pmaps, this is deferred until a process using the pmap returns
615  *	to userspace.
616  */
617 #if defined(MULTIPROCESSOR)
618 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
619 do {									\
620 	alpha_pal_imb();						\
621 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
622 } while (/*CONSTCOND*/0)
623 
624 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
625 do {									\
626 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
627 	/* for curcpu, will happen in userret() */			\
628 } while (/*CONSTCOND*/0)
629 #else
630 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
631 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* will happen in userret() */
632 #endif /* MULTIPROCESSOR */
633 
634 #define	PMAP_SYNC_ISTREAM(pmap)						\
635 do {									\
636 	if ((pmap) == pmap_kernel())					\
637 		PMAP_SYNC_ISTREAM_KERNEL();				\
638 	else								\
639 		PMAP_SYNC_ISTREAM_USER(pmap);				\
640 } while (/*CONSTCOND*/0)
641 
642 /*
643  * PMAP_INVALIDATE_ASN:
644  *
645  *	Invalidate the specified pmap's ASN, so as to force allocation
646  *	of a new one the next time pmap_asn_alloc() is called.
647  *
648  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
649  *	CONDITIONS ARE true:
650  *
651  *		(1) The pmap references the global kernel_lev1map.
652  *
653  *		(2) The pmap is not active on the current processor.
654  */
655 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
656 do {									\
657 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
658 } while (/*CONSTCOND*/0)
659 
660 /*
661  * PMAP_INVALIDATE_TLB:
662  *
663  *	Invalidate the TLB entry for the pmap/va pair.
664  */
665 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
666 do {									\
667 	if ((hadasm) || (isactive)) {					\
668 		/*							\
669 		 * Simply invalidating the TLB entry and I-cache	\
670 		 * works in this case.					\
671 		 */							\
672 		ALPHA_TBIS((va));					\
673 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
674 		   pmap_asn_info[(cpu_id)].pma_asngen) {		\
675 		/*							\
676 		 * We can't directly invalidate the TLB entry		\
677 		 * in this case, so we have to force allocation		\
678 		 * of a new ASN the next time this pmap becomes		\
679 		 * active.						\
680 		 */							\
681 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
682 	}								\
683 		/*							\
684 		 * Nothing to do in this case; the next time the	\
685 		 * pmap becomes active on this processor, a new		\
686 		 * ASN will be allocated anyway.			\
687 		 */							\
688 } while (/*CONSTCOND*/0)
689 
690 /*
691  * PMAP_KERNEL_PTE:
692  *
693  *	Get a kernel PTE.
694  *
695  *	If debugging, do a table walk.  If not debugging, just use
696  *	the Virtual Page Table, since all kernel page tables are
697  *	pre-allocated and mapped in.
698  */
699 #ifdef DEBUG
700 #define	PMAP_KERNEL_PTE(va)						\
701 ({									\
702 	pt_entry_t *l1pte_, *l2pte_;					\
703 									\
704 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
705 	if (pmap_pte_v(l1pte_) == 0) {					\
706 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
707 		    "(line %d)\n", (va), __LINE__);			\
708 		panic("PMAP_KERNEL_PTE");				\
709 	}								\
710 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
711 	if (pmap_pte_v(l2pte_) == 0) {					\
712 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
713 		    "(line %d)\n", (va), __LINE__);			\
714 		panic("PMAP_KERNEL_PTE");				\
715 	}								\
716 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
717 })
718 #else
719 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
720 #endif
721 
722 /*
723  * PMAP_SET_PTE:
724  *
725  *	Set a PTE to a specified value.
726  */
727 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
728 
729 /*
730  * PMAP_STAT_{INCR,DECR}:
731  *
732  *	Increment or decrement a pmap statistic.
733  */
734 #define	PMAP_STAT_INCR(s, v)	atomic_add_long((unsigned long *)(&(s)), (v))
735 #define	PMAP_STAT_DECR(s, v)	atomic_add_long((unsigned long *)(&(s)), -(v))
736 
737 /*
738  * pmap_bootstrap:
739  *
740  *	Bootstrap the system to run with virtual memory.
741  *
742  *	Note: no locking is necessary in this function.
743  */
744 void
745 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
746 {
747 	vsize_t lev2mapsize, lev3mapsize;
748 	pt_entry_t *lev2map, *lev3map;
749 	pt_entry_t pte;
750 	vsize_t bufsz;
751 	struct pcb *pcb;
752 	int i;
753 
754 #ifdef DEBUG
755 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
756 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
757 #endif
758 
759 	/*
760 	 * Compute the number of pages kmem_map will have.
761 	 */
762 	kmeminit_nkmempages();
763 
764 	/*
765 	 * Figure out how many initial PTE's are necessary to map the
766 	 * kernel.  We also reserve space for kmem_alloc_pageable()
767 	 * for vm_fork().
768 	 */
769 
770 	/* Get size of buffer cache and set an upper limit */
771 	bufsz = buf_memcalc();
772 	buf_setvalimit(bufsz);
773 
774 	lev3mapsize =
775 		(VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
776 		 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE +
777 		(maxproc * UPAGES) + nkmempages;
778 
779 #ifdef SYSVSHM
780 	lev3mapsize += shminfo.shmall;
781 #endif
782 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
783 
784 	/*
785 	 * Initialize `FYI' variables.  Note we're relying on
786 	 * the fact that BSEARCH sorts the vm_physmem[] array
787 	 * for us.
788 	 */
789 	avail_start = ptoa(vm_physmem[0].start);
790 	avail_end = ptoa(vm_physmem[vm_nphysseg - 1].end);
791 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
792 
793 #if 0
794 	printf("avail_start = 0x%lx\n", avail_start);
795 	printf("avail_end = 0x%lx\n", avail_end);
796 	printf("virtual_end = 0x%lx\n", virtual_end);
797 #endif
798 
799 	/*
800 	 * Allocate a level 1 PTE table for the kernel.
801 	 * This is always one page long.
802 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
803 	 */
804 	kernel_lev1map = (pt_entry_t *)
805 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
806 
807 	/*
808 	 * Allocate a level 2 PTE table for the kernel.
809 	 * These must map all of the level3 PTEs.
810 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
811 	 */
812 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
813 	lev2map = (pt_entry_t *)
814 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
815 
816 	/*
817 	 * Allocate a level 3 PTE table for the kernel.
818 	 * Contains lev3mapsize PTEs.
819 	 */
820 	lev3map = (pt_entry_t *)
821 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
822 
823 	/*
824 	 * Set up level 1 page table
825 	 */
826 
827 	/* Map all of the level 2 pte pages */
828 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
829 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
830 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
831 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
832 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
833 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
834 	}
835 
836 	/* Map the virtual page table */
837 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
838 	    << PG_SHIFT;
839 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
840 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
841 	VPT = (pt_entry_t *)VPTBASE;
842 
843 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
844     {
845 	extern pt_entry_t prom_pte;			/* XXX */
846 	extern int prom_mapped;				/* XXX */
847 
848 	if (pmap_uses_prom_console()) {
849 		/*
850 		 * XXX Save old PTE so we can remap the PROM, if
851 		 * XXX necessary.
852 		 */
853 		prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;
854 	}
855 	prom_mapped = 0;
856 
857 	/*
858 	 * Actually, this code lies.  The prom is still mapped, and will
859 	 * remain so until the context switch after alpha_init() returns.
860 	 */
861     }
862 #endif
863 
864 	/*
865 	 * Set up level 2 page table.
866 	 */
867 	/* Map all of the level 3 pte pages */
868 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
869 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
870 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
871 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
872 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
873 		    (i*PAGE_SIZE*NPTEPG))] = pte;
874 	}
875 
876 	/* Initialize the pmap_growkernel_lock. */
877 	rw_init(&pmap_growkernel_lock);
878 
879 	/*
880 	 * Set up level three page table (lev3map)
881 	 */
882 	/* Nothing to do; it's already zero'd */
883 
884 	/*
885 	 * Initialize the pmap pools and list.
886 	 */
887 	pmap_ncpuids = ncpuids;
888 	pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0,
889 	    0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL);
890 	pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt",
891 	    &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL);
892 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
893 	    PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL,
894 	    NULL, NULL);
895 
896 	TAILQ_INIT(&pmap_all_pmaps);
897 
898 	/*
899 	 * Initialize the ASN logic.
900 	 */
901 	pmap_max_asn = maxasn;
902 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
903 		pmap_asn_info[i].pma_asn = 1;
904 		pmap_asn_info[i].pma_asngen = 0;
905 	}
906 
907 	/*
908 	 * Initialize the locks.
909 	 */
910 	rw_init(&pmap_main_lock);
911 	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
912 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
913 		mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE);
914 	}
915 
916 	/*
917 	 * Initialize kernel pmap.  Note that all kernel mappings
918 	 * have PG_ASM set, so the ASN doesn't really matter for
919 	 * the kernel pmap.  Also, since the kernel pmap always
920 	 * references kernel_lev1map, it always has an invalid ASN
921 	 * generation.
922 	 */
923 	memset(pmap_kernel(), 0, sizeof(struct pmap));
924 	pmap_kernel()->pm_lev1map = kernel_lev1map;
925 	pmap_kernel()->pm_count = 1;
926 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
927 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
928 		pmap_kernel()->pm_asni[i].pma_asngen =
929 		    pmap_asn_info[i].pma_asngen;
930 	}
931 	mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE);
932 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
933 
934 #if defined(MULTIPROCESSOR)
935 	/*
936 	 * Initialize the TLB shootdown queues.
937 	 */
938 	pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache,
939 	    sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE,
940 	     0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL);
941 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
942 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
943 		mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT,
944 		    IPL_SCHED);
945 	}
946 #endif
947 
948 	/*
949 	 * Set up lwp0's PCB such that the ptbr points to the right place
950 	 * and has the kernel pmap's (really unused) ASN.
951 	 */
952 	pcb = lwp_getpcb(&lwp0);
953 	pcb->pcb_hw.apcb_ptbr =
954 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
955 	pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn;
956 
957 	/*
958 	 * Mark the kernel pmap `active' on this processor.
959 	 */
960 	atomic_or_ulong(&pmap_kernel()->pm_cpus,
961 	    (1UL << cpu_number()));
962 }
963 
964 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
965 int
966 pmap_uses_prom_console(void)
967 {
968 
969 	return (cputype == ST_DEC_21000);
970 }
971 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */
972 
973 /*
974  * pmap_virtual_space:		[ INTERFACE ]
975  *
976  *	Define the initial bounds of the kernel virtual address space.
977  */
978 void
979 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
980 {
981 
982 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
983 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
984 }
985 
986 /*
987  * pmap_steal_memory:		[ INTERFACE ]
988  *
989  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
990  *	This function allows for early dynamic memory allocation until the
991  *	virtual memory system has been bootstrapped.  After that point, either
992  *	kmem_alloc or malloc should be used.  This function works by stealing
993  *	pages from the (to be) managed page pool, then implicitly mapping the
994  *	pages (by using their k0seg addresses) and zeroing them.
995  *
996  *	It may be used once the physical memory segments have been pre-loaded
997  *	into the vm_physmem[] array.  Early memory allocation MUST use this
998  *	interface!  This cannot be used after vm_page_startup(), and will
999  *	generate a panic if tried.
1000  *
1001  *	Note that this memory will never be freed, and in essence it is wired
1002  *	down.
1003  *
1004  *	We must adjust *vstartp and/or *vendp iff we use address space
1005  *	from the kernel virtual address range defined by pmap_virtual_space().
1006  *
1007  *	Note: no locking is necessary in this function.
1008  */
1009 vaddr_t
1010 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
1011 {
1012 	int bank, npgs, x;
1013 	vaddr_t va;
1014 	paddr_t pa;
1015 
1016 	size = round_page(size);
1017 	npgs = atop(size);
1018 
1019 #if 0
1020 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
1021 #endif
1022 
1023 	for (bank = 0; bank < vm_nphysseg; bank++) {
1024 		if (uvm.page_init_done == true)
1025 			panic("pmap_steal_memory: called _after_ bootstrap");
1026 
1027 #if 0
1028 		printf("     bank %d: avail_start 0x%lx, start 0x%lx, "
1029 		    "avail_end 0x%lx\n", bank, vm_physmem[bank].avail_start,
1030 		    vm_physmem[bank].start, vm_physmem[bank].avail_end);
1031 #endif
1032 
1033 		if (vm_physmem[bank].avail_start != vm_physmem[bank].start ||
1034 		    vm_physmem[bank].avail_start >= vm_physmem[bank].avail_end)
1035 			continue;
1036 
1037 #if 0
1038 		printf("             avail_end - avail_start = 0x%lx\n",
1039 		    vm_physmem[bank].avail_end - vm_physmem[bank].avail_start);
1040 #endif
1041 
1042 		if ((vm_physmem[bank].avail_end - vm_physmem[bank].avail_start)
1043 		    < npgs)
1044 			continue;
1045 
1046 		/*
1047 		 * There are enough pages here; steal them!
1048 		 */
1049 		pa = ptoa(vm_physmem[bank].avail_start);
1050 		vm_physmem[bank].avail_start += npgs;
1051 		vm_physmem[bank].start += npgs;
1052 
1053 		/*
1054 		 * Have we used up this segment?
1055 		 */
1056 		if (vm_physmem[bank].avail_start == vm_physmem[bank].end) {
1057 			if (vm_nphysseg == 1)
1058 				panic("pmap_steal_memory: out of memory!");
1059 
1060 			/* Remove this segment from the list. */
1061 			vm_nphysseg--;
1062 			for (x = bank; x < vm_nphysseg; x++) {
1063 				/* structure copy */
1064 				vm_physmem[x] = vm_physmem[x + 1];
1065 			}
1066 		}
1067 
1068 		va = ALPHA_PHYS_TO_K0SEG(pa);
1069 		memset((void *)va, 0, size);
1070 		pmap_pages_stolen += npgs;
1071 		return (va);
1072 	}
1073 
1074 	/*
1075 	 * If we got here, this was no memory left.
1076 	 */
1077 	panic("pmap_steal_memory: no memory to steal");
1078 }
1079 
1080 /*
1081  * pmap_init:			[ INTERFACE ]
1082  *
1083  *	Initialize the pmap module.  Called by vm_init(), to initialize any
1084  *	structures that the pmap system needs to map virtual memory.
1085  *
1086  *	Note: no locking is necessary in this function.
1087  */
1088 void
1089 pmap_init(void)
1090 {
1091 
1092 #ifdef DEBUG
1093         if (pmapdebug & PDB_FOLLOW)
1094                 printf("pmap_init()\n");
1095 #endif
1096 
1097 	/* initialize protection array */
1098 	alpha_protection_init();
1099 
1100 	/*
1101 	 * Set a low water mark on the pv_entry pool, so that we are
1102 	 * more likely to have these around even in extreme memory
1103 	 * starvation.
1104 	 */
1105 	pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat);
1106 
1107 	/*
1108 	 * Now it is safe to enable pv entry recording.
1109 	 */
1110 	pmap_initialized = true;
1111 
1112 #if 0
1113 	for (bank = 0; bank < vm_nphysseg; bank++) {
1114 		printf("bank %d\n", bank);
1115 		printf("\tstart = 0x%x\n", ptoa(vm_physmem[bank].start));
1116 		printf("\tend = 0x%x\n", ptoa(vm_physmem[bank].end));
1117 		printf("\tavail_start = 0x%x\n",
1118 		    ptoa(vm_physmem[bank].avail_start));
1119 		printf("\tavail_end = 0x%x\n",
1120 		    ptoa(vm_physmem[bank].avail_end));
1121 	}
1122 #endif
1123 }
1124 
1125 /*
1126  * pmap_create:			[ INTERFACE ]
1127  *
1128  *	Create and return a physical map.
1129  *
1130  *	Note: no locking is necessary in this function.
1131  */
1132 pmap_t
1133 pmap_create(void)
1134 {
1135 	pmap_t pmap;
1136 	int i;
1137 
1138 #ifdef DEBUG
1139 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1140 		printf("pmap_create()\n");
1141 #endif
1142 
1143 	pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK);
1144 	memset(pmap, 0, sizeof(*pmap));
1145 
1146 	/*
1147 	 * Defer allocation of a new level 1 page table until
1148 	 * the first new mapping is entered; just take a reference
1149 	 * to the kernel kernel_lev1map.
1150 	 */
1151 	pmap->pm_lev1map = kernel_lev1map;
1152 
1153 	pmap->pm_count = 1;
1154 	for (i = 0; i < pmap_ncpuids; i++) {
1155 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1156 		/* XXX Locking? */
1157 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1158 	}
1159 	mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
1160 
1161  try_again:
1162 	rw_enter(&pmap_growkernel_lock, RW_READER);
1163 
1164 	if (pmap_lev1map_create(pmap, cpu_number()) != 0) {
1165 		rw_exit(&pmap_growkernel_lock);
1166 		(void) kpause("pmap_create", false, hz >> 2, NULL);
1167 		goto try_again;
1168 	}
1169 
1170 	mutex_enter(&pmap_all_pmaps_lock);
1171 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1172 	mutex_exit(&pmap_all_pmaps_lock);
1173 
1174 	rw_exit(&pmap_growkernel_lock);
1175 
1176 	return (pmap);
1177 }
1178 
1179 /*
1180  * pmap_destroy:		[ INTERFACE ]
1181  *
1182  *	Drop the reference count on the specified pmap, releasing
1183  *	all resources if the reference count drops to zero.
1184  */
1185 void
1186 pmap_destroy(pmap_t pmap)
1187 {
1188 
1189 #ifdef DEBUG
1190 	if (pmapdebug & PDB_FOLLOW)
1191 		printf("pmap_destroy(%p)\n", pmap);
1192 #endif
1193 
1194 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
1195 		return;
1196 
1197 	rw_enter(&pmap_growkernel_lock, RW_READER);
1198 
1199 	/*
1200 	 * Remove it from the global list of all pmaps.
1201 	 */
1202 	mutex_enter(&pmap_all_pmaps_lock);
1203 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1204 	mutex_exit(&pmap_all_pmaps_lock);
1205 
1206 	pmap_lev1map_destroy(pmap, cpu_number());
1207 
1208 	rw_exit(&pmap_growkernel_lock);
1209 
1210 	/*
1211 	 * Since the pmap is supposed to contain no valid
1212 	 * mappings at this point, we should always see
1213 	 * kernel_lev1map here.
1214 	 */
1215 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
1216 
1217 	mutex_destroy(&pmap->pm_lock);
1218 	pool_cache_put(&pmap_pmap_cache, pmap);
1219 }
1220 
1221 /*
1222  * pmap_reference:		[ INTERFACE ]
1223  *
1224  *	Add a reference to the specified pmap.
1225  */
1226 void
1227 pmap_reference(pmap_t pmap)
1228 {
1229 
1230 #ifdef DEBUG
1231 	if (pmapdebug & PDB_FOLLOW)
1232 		printf("pmap_reference(%p)\n", pmap);
1233 #endif
1234 
1235 	atomic_inc_uint(&pmap->pm_count);
1236 }
1237 
1238 /*
1239  * pmap_remove:			[ INTERFACE ]
1240  *
1241  *	Remove the given range of addresses from the specified map.
1242  *
1243  *	It is assumed that the start and end are properly
1244  *	rounded to the page size.
1245  */
1246 void
1247 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1248 {
1249 	pt_entry_t *l1pte, *l2pte, *l3pte;
1250 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1251 	vaddr_t l1eva, l2eva, vptva;
1252 	bool needisync = false;
1253 	long cpu_id = cpu_number();
1254 
1255 #ifdef DEBUG
1256 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1257 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1258 #endif
1259 
1260 	/*
1261 	 * If this is the kernel pmap, we can use a faster method
1262 	 * for accessing the PTEs (since the PT pages are always
1263 	 * resident).
1264 	 *
1265 	 * Note that this routine should NEVER be called from an
1266 	 * interrupt context; pmap_kremove() is used for that.
1267 	 */
1268 	if (pmap == pmap_kernel()) {
1269 		PMAP_MAP_TO_HEAD_LOCK();
1270 		PMAP_LOCK(pmap);
1271 
1272 		while (sva < eva) {
1273 			l3pte = PMAP_KERNEL_PTE(sva);
1274 			if (pmap_pte_v(l3pte)) {
1275 #ifdef DIAGNOSTIC
1276 				if (uvm_pageismanaged(pmap_pte_pa(l3pte)) &&
1277 				    pmap_pte_pv(l3pte) == 0)
1278 					panic("pmap_remove: managed page "
1279 					    "without PG_PVLIST for 0x%lx",
1280 					    sva);
1281 #endif
1282 				needisync |= pmap_remove_mapping(pmap, sva,
1283 				    l3pte, true, cpu_id);
1284 			}
1285 			sva += PAGE_SIZE;
1286 		}
1287 
1288 		PMAP_UNLOCK(pmap);
1289 		PMAP_MAP_TO_HEAD_UNLOCK();
1290 
1291 		if (needisync)
1292 			PMAP_SYNC_ISTREAM_KERNEL();
1293 		return;
1294 	}
1295 
1296 #ifdef DIAGNOSTIC
1297 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1298 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1299 		    "address range", sva, eva);
1300 #endif
1301 
1302 	PMAP_MAP_TO_HEAD_LOCK();
1303 	PMAP_LOCK(pmap);
1304 
1305 	/*
1306 	 * If we're already referencing the kernel_lev1map, there
1307 	 * is no work for us to do.
1308 	 */
1309 	if (pmap->pm_lev1map == kernel_lev1map)
1310 		goto out;
1311 
1312 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1313 
1314 	/*
1315 	 * Add a reference to the L1 table to it won't get
1316 	 * removed from under us.
1317 	 */
1318 	pmap_physpage_addref(saved_l1pte);
1319 
1320 	for (; sva < eva; sva = l1eva, l1pte++) {
1321 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1322 		if (pmap_pte_v(l1pte)) {
1323 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1324 
1325 			/*
1326 			 * Add a reference to the L2 table so it won't
1327 			 * get removed from under us.
1328 			 */
1329 			pmap_physpage_addref(saved_l2pte);
1330 
1331 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1332 				l2eva =
1333 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1334 				if (pmap_pte_v(l2pte)) {
1335 					saved_l3pte = l3pte =
1336 					    pmap_l3pte(pmap, sva, l2pte);
1337 
1338 					/*
1339 					 * Add a reference to the L3 table so
1340 					 * it won't get removed from under us.
1341 					 */
1342 					pmap_physpage_addref(saved_l3pte);
1343 
1344 					/*
1345 					 * Remember this sva; if the L3 table
1346 					 * gets removed, we need to invalidate
1347 					 * the VPT TLB entry for it.
1348 					 */
1349 					vptva = sva;
1350 
1351 					for (; sva < l2eva && sva < eva;
1352 					     sva += PAGE_SIZE, l3pte++) {
1353 						if (!pmap_pte_v(l3pte)) {
1354 							continue;
1355 						}
1356 						needisync |=
1357 						    pmap_remove_mapping(
1358 							pmap, sva,
1359 							l3pte, true,
1360 							cpu_id);
1361 					}
1362 
1363 					/*
1364 					 * Remove the reference to the L3
1365 					 * table that we added above.  This
1366 					 * may free the L3 table.
1367 					 */
1368 					pmap_l3pt_delref(pmap, vptva,
1369 					    saved_l3pte, cpu_id);
1370 				}
1371 			}
1372 
1373 			/*
1374 			 * Remove the reference to the L2 table that we
1375 			 * added above.  This may free the L2 table.
1376 			 */
1377 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id);
1378 		}
1379 	}
1380 
1381 	/*
1382 	 * Remove the reference to the L1 table that we added above.
1383 	 * This may free the L1 table.
1384 	 */
1385 	pmap_l1pt_delref(pmap, saved_l1pte, cpu_id);
1386 
1387 	if (needisync)
1388 		PMAP_SYNC_ISTREAM_USER(pmap);
1389 
1390  out:
1391 	PMAP_UNLOCK(pmap);
1392 	PMAP_MAP_TO_HEAD_UNLOCK();
1393 }
1394 
1395 /*
1396  * pmap_page_protect:		[ INTERFACE ]
1397  *
1398  *	Lower the permission for all mappings to a given page to
1399  *	the permissions specified.
1400  */
1401 void
1402 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1403 {
1404 	pmap_t pmap;
1405 	pv_entry_t pv, nextpv;
1406 	bool needkisync = false;
1407 	long cpu_id = cpu_number();
1408 	kmutex_t *lock;
1409 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1410 #ifdef DEBUG
1411 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1412 
1413 
1414 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1415 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1416 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1417 #endif
1418 
1419 	switch (prot) {
1420 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
1421 	case VM_PROT_READ|VM_PROT_WRITE:
1422 		return;
1423 
1424 	/* copy_on_write */
1425 	case VM_PROT_READ|VM_PROT_EXECUTE:
1426 	case VM_PROT_READ:
1427 		PMAP_HEAD_TO_MAP_LOCK();
1428 		lock = pmap_pvh_lock(pg);
1429 		mutex_enter(lock);
1430 		for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
1431 			PMAP_LOCK(pv->pv_pmap);
1432 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1433 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1434 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1435 				    pmap_pte_asm(pv->pv_pte),
1436 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1437 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1438 				    pmap_pte_asm(pv->pv_pte));
1439 			}
1440 			PMAP_UNLOCK(pv->pv_pmap);
1441 		}
1442 		mutex_exit(lock);
1443 		PMAP_HEAD_TO_MAP_UNLOCK();
1444 		PMAP_TLB_SHOOTNOW();
1445 		return;
1446 
1447 	/* remove_all */
1448 	default:
1449 		break;
1450 	}
1451 
1452 	PMAP_HEAD_TO_MAP_LOCK();
1453 	lock = pmap_pvh_lock(pg);
1454 	mutex_enter(lock);
1455 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = nextpv) {
1456 		nextpv = pv->pv_next;
1457 		pmap = pv->pv_pmap;
1458 
1459 		PMAP_LOCK(pmap);
1460 #ifdef DEBUG
1461 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1462 		    pmap_pte_pa(pv->pv_pte) != pa)
1463 			panic("pmap_page_protect: bad mapping");
1464 #endif
1465 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1466 		    false, cpu_id) == true) {
1467 			if (pmap == pmap_kernel())
1468 				needkisync |= true;
1469 			else
1470 				PMAP_SYNC_ISTREAM_USER(pmap);
1471 		}
1472 		PMAP_UNLOCK(pmap);
1473 	}
1474 
1475 	if (needkisync)
1476 		PMAP_SYNC_ISTREAM_KERNEL();
1477 
1478 	mutex_exit(lock);
1479 	PMAP_HEAD_TO_MAP_UNLOCK();
1480 }
1481 
1482 /*
1483  * pmap_protect:		[ INTERFACE ]
1484  *
1485  *	Set the physical protection on the specified range of this map
1486  *	as requested.
1487  */
1488 void
1489 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1490 {
1491 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1492 	bool isactive;
1493 	bool hadasm;
1494 	vaddr_t l1eva, l2eva;
1495 	long cpu_id = cpu_number();
1496 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1497 
1498 #ifdef DEBUG
1499 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1500 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1501 		    pmap, sva, eva, prot);
1502 #endif
1503 
1504 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1505 		pmap_remove(pmap, sva, eva);
1506 		return;
1507 	}
1508 
1509 	PMAP_LOCK(pmap);
1510 
1511 	bits = pte_prot(pmap, prot);
1512 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1513 
1514 	l1pte = pmap_l1pte(pmap, sva);
1515 	for (; sva < eva; sva = l1eva, l1pte++) {
1516 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1517 		if (pmap_pte_v(l1pte)) {
1518 			l2pte = pmap_l2pte(pmap, sva, l1pte);
1519 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1520 				l2eva =
1521 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1522 				if (pmap_pte_v(l2pte)) {
1523 					l3pte = pmap_l3pte(pmap, sva, l2pte);
1524 					for (; sva < l2eva && sva < eva;
1525 					     sva += PAGE_SIZE, l3pte++) {
1526 						if (pmap_pte_v(l3pte) &&
1527 						    pmap_pte_prot_chg(l3pte,
1528 						    bits)) {
1529 							hadasm =
1530 							   (pmap_pte_asm(l3pte)
1531 							    != 0);
1532 							pmap_pte_set_prot(l3pte,
1533 							   bits);
1534 							PMAP_INVALIDATE_TLB(
1535 							   pmap, sva, hadasm,
1536 							   isactive, cpu_id);
1537 							PMAP_TLB_SHOOTDOWN(
1538 							   pmap, sva,
1539 							   hadasm ? PG_ASM : 0);
1540 						}
1541 					}
1542 				}
1543 			}
1544 		}
1545 	}
1546 
1547 	PMAP_TLB_SHOOTNOW();
1548 
1549 	if (prot & VM_PROT_EXECUTE)
1550 		PMAP_SYNC_ISTREAM(pmap);
1551 
1552 	PMAP_UNLOCK(pmap);
1553 }
1554 
1555 /*
1556  * pmap_enter:			[ INTERFACE ]
1557  *
1558  *	Insert the given physical page (p) at
1559  *	the specified virtual address (v) in the
1560  *	target physical map with the protection requested.
1561  *
1562  *	If specified, the page will be wired down, meaning
1563  *	that the related pte can not be reclaimed.
1564  *
1565  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1566  *	or lose information.  That is, this routine must actually
1567  *	insert this page into the given map NOW.
1568  */
1569 int
1570 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1571 {
1572 	struct vm_page *pg;			/* if != NULL, managed page */
1573 	pt_entry_t *pte, npte, opte;
1574 	paddr_t opa;
1575 	bool tflush = true;
1576 	bool hadasm = false;	/* XXX gcc -Wuninitialized */
1577 	bool needisync = false;
1578 	bool setisync = false;
1579 	bool isactive;
1580 	bool wired;
1581 	long cpu_id = cpu_number();
1582 	int error = 0;
1583 	kmutex_t *lock;
1584 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1585 
1586 #ifdef DEBUG
1587 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1588 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1589 		       pmap, va, pa, prot, flags);
1590 #endif
1591 	pg = PHYS_TO_VM_PAGE(pa);
1592 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1593 	wired = (flags & PMAP_WIRED) != 0;
1594 
1595 	/*
1596 	 * Determine what we need to do about the I-stream.  If
1597 	 * VM_PROT_EXECUTE is set, we mark a user pmap as needing
1598 	 * an I-sync on the way back out to userspace.  We always
1599 	 * need an immediate I-sync for the kernel pmap.
1600 	 */
1601 	if (prot & VM_PROT_EXECUTE) {
1602 		if (pmap == pmap_kernel())
1603 			needisync = true;
1604 		else {
1605 			setisync = true;
1606 			needisync = (pmap->pm_cpus != 0);
1607 		}
1608 	}
1609 
1610 	PMAP_MAP_TO_HEAD_LOCK();
1611 	PMAP_LOCK(pmap);
1612 
1613 	if (pmap == pmap_kernel()) {
1614 #ifdef DIAGNOSTIC
1615 		/*
1616 		 * Sanity check the virtual address.
1617 		 */
1618 		if (va < VM_MIN_KERNEL_ADDRESS)
1619 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1620 #endif
1621 		pte = PMAP_KERNEL_PTE(va);
1622 	} else {
1623 		pt_entry_t *l1pte, *l2pte;
1624 
1625 #ifdef DIAGNOSTIC
1626 		/*
1627 		 * Sanity check the virtual address.
1628 		 */
1629 		if (va >= VM_MAXUSER_ADDRESS)
1630 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1631 #endif
1632 
1633 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
1634 
1635 		/*
1636 		 * Check to see if the level 1 PTE is valid, and
1637 		 * allocate a new level 2 page table page if it's not.
1638 		 * A reference will be added to the level 2 table when
1639 		 * the level 3 table is created.
1640 		 */
1641 		l1pte = pmap_l1pte(pmap, va);
1642 		if (pmap_pte_v(l1pte) == 0) {
1643 			pmap_physpage_addref(l1pte);
1644 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1645 			if (error) {
1646 				pmap_l1pt_delref(pmap, l1pte, cpu_id);
1647 				if (flags & PMAP_CANFAIL)
1648 					goto out;
1649 				panic("pmap_enter: unable to create L2 PT "
1650 				    "page");
1651 			}
1652 #ifdef DEBUG
1653 			if (pmapdebug & PDB_PTPAGE)
1654 				printf("pmap_enter: new level 2 table at "
1655 				    "0x%lx\n", pmap_pte_pa(l1pte));
1656 #endif
1657 		}
1658 
1659 		/*
1660 		 * Check to see if the level 2 PTE is valid, and
1661 		 * allocate a new level 3 page table page if it's not.
1662 		 * A reference will be added to the level 3 table when
1663 		 * the mapping is validated.
1664 		 */
1665 		l2pte = pmap_l2pte(pmap, va, l1pte);
1666 		if (pmap_pte_v(l2pte) == 0) {
1667 			pmap_physpage_addref(l2pte);
1668 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1669 			if (error) {
1670 				pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
1671 				if (flags & PMAP_CANFAIL)
1672 					goto out;
1673 				panic("pmap_enter: unable to create L3 PT "
1674 				    "page");
1675 			}
1676 #ifdef DEBUG
1677 			if (pmapdebug & PDB_PTPAGE)
1678 				printf("pmap_enter: new level 3 table at "
1679 				    "0x%lx\n", pmap_pte_pa(l2pte));
1680 #endif
1681 		}
1682 
1683 		/*
1684 		 * Get the PTE that will map the page.
1685 		 */
1686 		pte = pmap_l3pte(pmap, va, l2pte);
1687 	}
1688 
1689 	/* Remember all of the old PTE; used for TBI check later. */
1690 	opte = *pte;
1691 
1692 	/*
1693 	 * Check to see if the old mapping is valid.  If not, validate the
1694 	 * new one immediately.
1695 	 */
1696 	if (pmap_pte_v(pte) == 0) {
1697 		/*
1698 		 * No need to invalidate the TLB in this case; an invalid
1699 		 * mapping won't be in the TLB, and a previously valid
1700 		 * mapping would have been flushed when it was invalidated.
1701 		 */
1702 		tflush = false;
1703 
1704 		/*
1705 		 * No need to synchronize the I-stream, either, for basically
1706 		 * the same reason.
1707 		 */
1708 		setisync = needisync = false;
1709 
1710 		if (pmap != pmap_kernel()) {
1711 			/*
1712 			 * New mappings gain a reference on the level 3
1713 			 * table.
1714 			 */
1715 			pmap_physpage_addref(pte);
1716 		}
1717 		goto validate_enterpv;
1718 	}
1719 
1720 	opa = pmap_pte_pa(pte);
1721 	hadasm = (pmap_pte_asm(pte) != 0);
1722 
1723 	if (opa == pa) {
1724 		/*
1725 		 * Mapping has not changed; must be a protection or
1726 		 * wiring change.
1727 		 */
1728 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1729 #ifdef DEBUG
1730 			if (pmapdebug & PDB_ENTER)
1731 				printf("pmap_enter: wiring change -> %d\n",
1732 				    wired);
1733 #endif
1734 			/*
1735 			 * Adjust the wiring count.
1736 			 */
1737 			if (wired)
1738 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1739 			else
1740 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1741 		}
1742 
1743 		/*
1744 		 * Set the PTE.
1745 		 */
1746 		goto validate;
1747 	}
1748 
1749 	/*
1750 	 * The mapping has changed.  We need to invalidate the
1751 	 * old mapping before creating the new one.
1752 	 */
1753 #ifdef DEBUG
1754 	if (pmapdebug & PDB_ENTER)
1755 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1756 #endif
1757 	if (pmap != pmap_kernel()) {
1758 		/*
1759 		 * Gain an extra reference on the level 3 table.
1760 		 * pmap_remove_mapping() will delete a reference,
1761 		 * and we don't want the table to be erroneously
1762 		 * freed.
1763 		 */
1764 		pmap_physpage_addref(pte);
1765 	}
1766 	needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id);
1767 
1768  validate_enterpv:
1769 	/*
1770 	 * Enter the mapping into the pv_table if appropriate.
1771 	 */
1772 	if (pg != NULL) {
1773 		error = pmap_pv_enter(pmap, pg, va, pte, true);
1774 		if (error) {
1775 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1776 			if (flags & PMAP_CANFAIL)
1777 				goto out;
1778 			panic("pmap_enter: unable to enter mapping in PV "
1779 			    "table");
1780 		}
1781 	}
1782 
1783 	/*
1784 	 * Increment counters.
1785 	 */
1786 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1787 	if (wired)
1788 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1789 
1790  validate:
1791 	/*
1792 	 * Build the new PTE.
1793 	 */
1794 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1795 	if (pg != NULL) {
1796 		int attrs;
1797 
1798 #ifdef DIAGNOSTIC
1799 		if ((flags & VM_PROT_ALL) & ~prot)
1800 			panic("pmap_enter: access type exceeds prot");
1801 #endif
1802 		lock = pmap_pvh_lock(pg);
1803 		mutex_enter(lock);
1804 		if (flags & VM_PROT_WRITE)
1805 			pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
1806 		else if (flags & VM_PROT_ALL)
1807 			pg->mdpage.pvh_attrs |= PGA_REFERENCED;
1808 		attrs = pg->mdpage.pvh_attrs;
1809 		mutex_exit(lock);
1810 
1811 		/*
1812 		 * Set up referenced/modified emulation for new mapping.
1813 		 */
1814 		if ((attrs & PGA_REFERENCED) == 0)
1815 			npte |= PG_FOR | PG_FOW | PG_FOE;
1816 		else if ((attrs & PGA_MODIFIED) == 0)
1817 			npte |= PG_FOW;
1818 
1819 		/*
1820 		 * Mapping was entered on PV list.
1821 		 */
1822 		npte |= PG_PVLIST;
1823 	}
1824 	if (wired)
1825 		npte |= PG_WIRED;
1826 #ifdef DEBUG
1827 	if (pmapdebug & PDB_ENTER)
1828 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1829 #endif
1830 
1831 	/*
1832 	 * If the PALcode portion of the new PTE is the same as the
1833 	 * old PTE, no TBI is necessary.
1834 	 */
1835 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1836 		tflush = false;
1837 
1838 	/*
1839 	 * Set the new PTE.
1840 	 */
1841 	PMAP_SET_PTE(pte, npte);
1842 
1843 	/*
1844 	 * Invalidate the TLB entry for this VA and any appropriate
1845 	 * caches.
1846 	 */
1847 	if (tflush) {
1848 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1849 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1850 		PMAP_TLB_SHOOTNOW();
1851 	}
1852 	if (setisync)
1853 		PMAP_SET_NEEDISYNC(pmap);
1854 	if (needisync)
1855 		PMAP_SYNC_ISTREAM(pmap);
1856 
1857 out:
1858 	PMAP_UNLOCK(pmap);
1859 	PMAP_MAP_TO_HEAD_UNLOCK();
1860 
1861 	return error;
1862 }
1863 
1864 /*
1865  * pmap_kenter_pa:		[ INTERFACE ]
1866  *
1867  *	Enter a va -> pa mapping into the kernel pmap without any
1868  *	physical->virtual tracking.
1869  *
1870  *	Note: no locking is necessary in this function.
1871  */
1872 void
1873 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1874 {
1875 	pt_entry_t *pte, npte;
1876 	long cpu_id = cpu_number();
1877 	bool needisync = false;
1878 	pmap_t pmap = pmap_kernel();
1879 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1880 
1881 #ifdef DEBUG
1882 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1883 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1884 		    va, pa, prot);
1885 #endif
1886 
1887 #ifdef DIAGNOSTIC
1888 	/*
1889 	 * Sanity check the virtual address.
1890 	 */
1891 	if (va < VM_MIN_KERNEL_ADDRESS)
1892 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1893 #endif
1894 
1895 	pte = PMAP_KERNEL_PTE(va);
1896 
1897 	if (pmap_pte_v(pte) == 0)
1898 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1899 	if (pmap_pte_w(pte) == 0)
1900 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1901 
1902 	if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte))
1903 		needisync = true;
1904 
1905 	/*
1906 	 * Build the new PTE.
1907 	 */
1908 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1909 	    PG_V | PG_WIRED;
1910 
1911 	/*
1912 	 * Set the new PTE.
1913 	 */
1914 	PMAP_SET_PTE(pte, npte);
1915 #if defined(MULTIPROCESSOR)
1916 	alpha_mb();		/* XXX alpha_wmb()? */
1917 #endif
1918 
1919 	/*
1920 	 * Invalidate the TLB entry for this VA and any appropriate
1921 	 * caches.
1922 	 */
1923 	PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1924 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1925 	PMAP_TLB_SHOOTNOW();
1926 
1927 	if (needisync)
1928 		PMAP_SYNC_ISTREAM_KERNEL();
1929 }
1930 
1931 /*
1932  * pmap_kremove:		[ INTERFACE ]
1933  *
1934  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1935  *	for size bytes (assumed to be page rounded).
1936  */
1937 void
1938 pmap_kremove(vaddr_t va, vsize_t size)
1939 {
1940 	pt_entry_t *pte;
1941 	bool needisync = false;
1942 	long cpu_id = cpu_number();
1943 	pmap_t pmap = pmap_kernel();
1944 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1945 
1946 #ifdef DEBUG
1947 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1948 		printf("pmap_kremove(%lx, %lx)\n",
1949 		    va, size);
1950 #endif
1951 
1952 #ifdef DIAGNOSTIC
1953 	if (va < VM_MIN_KERNEL_ADDRESS)
1954 		panic("pmap_kremove: user address");
1955 #endif
1956 
1957 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1958 		pte = PMAP_KERNEL_PTE(va);
1959 		if (pmap_pte_v(pte)) {
1960 #ifdef DIAGNOSTIC
1961 			if (pmap_pte_pv(pte))
1962 				panic("pmap_kremove: PG_PVLIST mapping for "
1963 				    "0x%lx", va);
1964 #endif
1965 			if (pmap_pte_exec(pte))
1966 				needisync = true;
1967 
1968 			/* Zap the mapping. */
1969 			PMAP_SET_PTE(pte, PG_NV);
1970 #if defined(MULTIPROCESSOR)
1971 			alpha_mb();		/* XXX alpha_wmb()? */
1972 #endif
1973 			PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1974 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1975 
1976 			/* Update stats. */
1977 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1978 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1979 		}
1980 	}
1981 
1982 	PMAP_TLB_SHOOTNOW();
1983 
1984 	if (needisync)
1985 		PMAP_SYNC_ISTREAM_KERNEL();
1986 }
1987 
1988 /*
1989  * pmap_unwire:			[ INTERFACE ]
1990  *
1991  *	Clear the wired attribute for a map/virtual-address pair.
1992  *
1993  *	The mapping must already exist in the pmap.
1994  */
1995 void
1996 pmap_unwire(pmap_t pmap, vaddr_t va)
1997 {
1998 	pt_entry_t *pte;
1999 
2000 #ifdef DEBUG
2001 	if (pmapdebug & PDB_FOLLOW)
2002 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
2003 #endif
2004 
2005 	PMAP_LOCK(pmap);
2006 
2007 	pte = pmap_l3pte(pmap, va, NULL);
2008 #ifdef DIAGNOSTIC
2009 	if (pte == NULL || pmap_pte_v(pte) == 0)
2010 		panic("pmap_unwire");
2011 #endif
2012 
2013 	/*
2014 	 * If wiring actually changed (always?) clear the wire bit and
2015 	 * update the wire count.  Note that wiring is not a hardware
2016 	 * characteristic so there is no need to invalidate the TLB.
2017 	 */
2018 	if (pmap_pte_w_chg(pte, 0)) {
2019 		pmap_pte_set_w(pte, false);
2020 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2021 	}
2022 #ifdef DIAGNOSTIC
2023 	else {
2024 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2025 		    "didn't change!\n", pmap, va);
2026 	}
2027 #endif
2028 
2029 	PMAP_UNLOCK(pmap);
2030 }
2031 
2032 /*
2033  * pmap_extract:		[ INTERFACE ]
2034  *
2035  *	Extract the physical address associated with the given
2036  *	pmap/virtual address pair.
2037  */
2038 bool
2039 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2040 {
2041 	pt_entry_t *l1pte, *l2pte, *l3pte;
2042 	paddr_t pa;
2043 
2044 #ifdef DEBUG
2045 	if (pmapdebug & PDB_FOLLOW)
2046 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
2047 #endif
2048 
2049 	/*
2050 	 * Take a faster path for the kernel pmap.  Avoids locking,
2051 	 * handles K0SEG.
2052 	 */
2053 	if (pmap == pmap_kernel()) {
2054 		pa = vtophys(va);
2055 		if (pap != NULL)
2056 			*pap = pa;
2057 #ifdef DEBUG
2058 		if (pmapdebug & PDB_FOLLOW)
2059 			printf("0x%lx (kernel vtophys)\n", pa);
2060 #endif
2061 		return (pa != 0);	/* XXX */
2062 	}
2063 
2064 	PMAP_LOCK(pmap);
2065 
2066 	l1pte = pmap_l1pte(pmap, va);
2067 	if (pmap_pte_v(l1pte) == 0)
2068 		goto out;
2069 
2070 	l2pte = pmap_l2pte(pmap, va, l1pte);
2071 	if (pmap_pte_v(l2pte) == 0)
2072 		goto out;
2073 
2074 	l3pte = pmap_l3pte(pmap, va, l2pte);
2075 	if (pmap_pte_v(l3pte) == 0)
2076 		goto out;
2077 
2078 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2079 	PMAP_UNLOCK(pmap);
2080 	if (pap != NULL)
2081 		*pap = pa;
2082 #ifdef DEBUG
2083 	if (pmapdebug & PDB_FOLLOW)
2084 		printf("0x%lx\n", pa);
2085 #endif
2086 	return (true);
2087 
2088  out:
2089 	PMAP_UNLOCK(pmap);
2090 #ifdef DEBUG
2091 	if (pmapdebug & PDB_FOLLOW)
2092 		printf("failed\n");
2093 #endif
2094 	return (false);
2095 }
2096 
2097 /*
2098  * pmap_copy:			[ INTERFACE ]
2099  *
2100  *	Copy the mapping range specified by src_addr/len
2101  *	from the source map to the range dst_addr/len
2102  *	in the destination map.
2103  *
2104  *	This routine is only advisory and need not do anything.
2105  */
2106 /* call deleted in <machine/pmap.h> */
2107 
2108 /*
2109  * pmap_update:			[ INTERFACE ]
2110  *
2111  *	Require that all active physical maps contain no
2112  *	incorrect entries NOW, by processing any deferred
2113  *	pmap operations.
2114  */
2115 /* call deleted in <machine/pmap.h> */
2116 
2117 /*
2118  * pmap_activate:		[ INTERFACE ]
2119  *
2120  *	Activate the pmap used by the specified process.  This includes
2121  *	reloading the MMU context if the current process, and marking
2122  *	the pmap in use by the processor.
2123  */
2124 void
2125 pmap_activate(struct lwp *l)
2126 {
2127 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2128 	long cpu_id = cpu_number();
2129 
2130 #ifdef DEBUG
2131 	if (pmapdebug & PDB_FOLLOW)
2132 		printf("pmap_activate(%p)\n", l);
2133 #endif
2134 
2135 	/* Mark the pmap in use by this processor. */
2136 	atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2137 
2138 	/* Allocate an ASN. */
2139 	pmap_asn_alloc(pmap, cpu_id);
2140 
2141 	PMAP_ACTIVATE(pmap, l, cpu_id);
2142 }
2143 
2144 /*
2145  * pmap_deactivate:		[ INTERFACE ]
2146  *
2147  *	Mark that the pmap used by the specified process is no longer
2148  *	in use by the processor.
2149  *
2150  *	The comment above pmap_activate() wrt. locking applies here,
2151  *	as well.  Note that we use only a single `atomic' operation,
2152  *	so no locking is necessary.
2153  */
2154 void
2155 pmap_deactivate(struct lwp *l)
2156 {
2157 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2158 
2159 #ifdef DEBUG
2160 	if (pmapdebug & PDB_FOLLOW)
2161 		printf("pmap_deactivate(%p)\n", l);
2162 #endif
2163 
2164 	/*
2165 	 * Mark the pmap no longer in use by this processor.
2166 	 */
2167 	atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number()));
2168 }
2169 
2170 /*
2171  * pmap_zero_page:		[ INTERFACE ]
2172  *
2173  *	Zero the specified (machine independent) page by mapping the page
2174  *	into virtual memory and clear its contents, one machine dependent
2175  *	page at a time.
2176  *
2177  *	Note: no locking is necessary in this function.
2178  */
2179 void
2180 pmap_zero_page(paddr_t phys)
2181 {
2182 	u_long *p0, *p1, *pend;
2183 
2184 #ifdef DEBUG
2185 	if (pmapdebug & PDB_FOLLOW)
2186 		printf("pmap_zero_page(%lx)\n", phys);
2187 #endif
2188 
2189 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2190 	p1 = NULL;
2191 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2192 
2193 	/*
2194 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2195 	 * Do only 8 back-to-back stores, and alternate registers.
2196 	 */
2197 	do {
2198 		__asm volatile(
2199 		"# BEGIN loop body\n"
2200 		"	addq	%2, (8 * 8), %1		\n"
2201 		"	stq	$31, (0 * 8)(%0)	\n"
2202 		"	stq	$31, (1 * 8)(%0)	\n"
2203 		"	stq	$31, (2 * 8)(%0)	\n"
2204 		"	stq	$31, (3 * 8)(%0)	\n"
2205 		"	stq	$31, (4 * 8)(%0)	\n"
2206 		"	stq	$31, (5 * 8)(%0)	\n"
2207 		"	stq	$31, (6 * 8)(%0)	\n"
2208 		"	stq	$31, (7 * 8)(%0)	\n"
2209 		"					\n"
2210 		"	addq	%3, (8 * 8), %0		\n"
2211 		"	stq	$31, (0 * 8)(%1)	\n"
2212 		"	stq	$31, (1 * 8)(%1)	\n"
2213 		"	stq	$31, (2 * 8)(%1)	\n"
2214 		"	stq	$31, (3 * 8)(%1)	\n"
2215 		"	stq	$31, (4 * 8)(%1)	\n"
2216 		"	stq	$31, (5 * 8)(%1)	\n"
2217 		"	stq	$31, (6 * 8)(%1)	\n"
2218 		"	stq	$31, (7 * 8)(%1)	\n"
2219 		"	# END loop body"
2220 		: "=r" (p0), "=r" (p1)
2221 		: "0" (p0), "1" (p1)
2222 		: "memory");
2223 	} while (p0 < pend);
2224 }
2225 
2226 /*
2227  * pmap_copy_page:		[ INTERFACE ]
2228  *
2229  *	Copy the specified (machine independent) page by mapping the page
2230  *	into virtual memory and using memcpy to copy the page, one machine
2231  *	dependent page at a time.
2232  *
2233  *	Note: no locking is necessary in this function.
2234  */
2235 void
2236 pmap_copy_page(paddr_t src, paddr_t dst)
2237 {
2238 	const void *s;
2239 	void *d;
2240 
2241 #ifdef DEBUG
2242 	if (pmapdebug & PDB_FOLLOW)
2243 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2244 #endif
2245         s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
2246         d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
2247 	memcpy(d, s, PAGE_SIZE);
2248 }
2249 
2250 /*
2251  * pmap_pageidlezero:		[ INTERFACE ]
2252  *
2253  *	Page zero'er for the idle loop.  Returns true if the
2254  *	page was zero'd, FLASE if we aborted for some reason.
2255  */
2256 bool
2257 pmap_pageidlezero(paddr_t pa)
2258 {
2259 	u_long *ptr;
2260 	int i, cnt = PAGE_SIZE / sizeof(u_long);
2261 
2262 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
2263 		if (sched_curcpu_runnable_p()) {
2264 			/*
2265 			 * An LWP has become ready.  Abort now,
2266 			 * so we don't keep it waiting while we
2267 			 * finish zeroing the page.
2268 			 */
2269 			return (false);
2270 		}
2271 		*ptr++ = 0;
2272 	}
2273 
2274 	return (true);
2275 }
2276 
2277 /*
2278  * pmap_clear_modify:		[ INTERFACE ]
2279  *
2280  *	Clear the modify bits on the specified physical page.
2281  */
2282 bool
2283 pmap_clear_modify(struct vm_page *pg)
2284 {
2285 	bool rv = false;
2286 	long cpu_id = cpu_number();
2287 	kmutex_t *lock;
2288 
2289 #ifdef DEBUG
2290 	if (pmapdebug & PDB_FOLLOW)
2291 		printf("pmap_clear_modify(%p)\n", pg);
2292 #endif
2293 
2294 	PMAP_HEAD_TO_MAP_LOCK();
2295 	lock = pmap_pvh_lock(pg);
2296 	mutex_enter(lock);
2297 
2298 	if (pg->mdpage.pvh_attrs & PGA_MODIFIED) {
2299 		rv = true;
2300 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2301 		pg->mdpage.pvh_attrs &= ~PGA_MODIFIED;
2302 	}
2303 
2304 	mutex_exit(lock);
2305 	PMAP_HEAD_TO_MAP_UNLOCK();
2306 
2307 	return (rv);
2308 }
2309 
2310 /*
2311  * pmap_clear_reference:	[ INTERFACE ]
2312  *
2313  *	Clear the reference bit on the specified physical page.
2314  */
2315 bool
2316 pmap_clear_reference(struct vm_page *pg)
2317 {
2318 	bool rv = false;
2319 	long cpu_id = cpu_number();
2320 	kmutex_t *lock;
2321 
2322 #ifdef DEBUG
2323 	if (pmapdebug & PDB_FOLLOW)
2324 		printf("pmap_clear_reference(%p)\n", pg);
2325 #endif
2326 
2327 	PMAP_HEAD_TO_MAP_LOCK();
2328 	lock = pmap_pvh_lock(pg);
2329 	mutex_enter(lock);
2330 
2331 	if (pg->mdpage.pvh_attrs & PGA_REFERENCED) {
2332 		rv = true;
2333 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2334 		pg->mdpage.pvh_attrs &= ~PGA_REFERENCED;
2335 	}
2336 
2337 	mutex_exit(lock);
2338 	PMAP_HEAD_TO_MAP_UNLOCK();
2339 
2340 	return (rv);
2341 }
2342 
2343 /*
2344  * pmap_is_referenced:		[ INTERFACE ]
2345  *
2346  *	Return whether or not the specified physical page is referenced
2347  *	by any physical maps.
2348  */
2349 /* See <machine/pmap.h> */
2350 
2351 /*
2352  * pmap_is_modified:		[ INTERFACE ]
2353  *
2354  *	Return whether or not the specified physical page is modified
2355  *	by any physical maps.
2356  */
2357 /* See <machine/pmap.h> */
2358 
2359 /*
2360  * pmap_phys_address:		[ INTERFACE ]
2361  *
2362  *	Return the physical address corresponding to the specified
2363  *	cookie.  Used by the device pager to decode a device driver's
2364  *	mmap entry point return value.
2365  *
2366  *	Note: no locking is necessary in this function.
2367  */
2368 paddr_t
2369 pmap_phys_address(paddr_t ppn)
2370 {
2371 
2372 	return (alpha_ptob(ppn));
2373 }
2374 
2375 /*
2376  * Miscellaneous support routines follow
2377  */
2378 
2379 /*
2380  * alpha_protection_init:
2381  *
2382  *	Initialize Alpha protection code array.
2383  *
2384  *	Note: no locking is necessary in this function.
2385  */
2386 static void
2387 alpha_protection_init(void)
2388 {
2389 	int prot, *kp, *up;
2390 
2391 	kp = protection_codes[0];
2392 	up = protection_codes[1];
2393 
2394 	for (prot = 0; prot < 8; prot++) {
2395 		kp[prot] = PG_ASM;
2396 		up[prot] = 0;
2397 
2398 		if (prot & VM_PROT_READ) {
2399 			kp[prot] |= PG_KRE;
2400 			up[prot] |= PG_KRE | PG_URE;
2401 		}
2402 		if (prot & VM_PROT_WRITE) {
2403 			kp[prot] |= PG_KWE;
2404 			up[prot] |= PG_KWE | PG_UWE;
2405 		}
2406 		if (prot & VM_PROT_EXECUTE) {
2407 			kp[prot] |= PG_EXEC | PG_KRE;
2408 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2409 		} else {
2410 			kp[prot] |= PG_FOE;
2411 			up[prot] |= PG_FOE;
2412 		}
2413 	}
2414 }
2415 
2416 /*
2417  * pmap_remove_mapping:
2418  *
2419  *	Invalidate a single page denoted by pmap/va.
2420  *
2421  *	If (pte != NULL), it is the already computed PTE for the page.
2422  *
2423  *	Note: locking in this function is complicated by the fact
2424  *	that we can be called when the PV list is already locked.
2425  *	(pmap_page_protect()).  In this case, the caller must be
2426  *	careful to get the next PV entry while we remove this entry
2427  *	from beneath it.  We assume that the pmap itself is already
2428  *	locked; dolock applies only to the PV list.
2429  *
2430  *	Returns true or false, indicating if an I-stream sync needs
2431  *	to be initiated (for this CPU or for other CPUs).
2432  */
2433 static bool
2434 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2435     bool dolock, long cpu_id)
2436 {
2437 	paddr_t pa;
2438 	struct vm_page *pg;		/* if != NULL, page is managed */
2439 	bool onpv;
2440 	bool hadasm;
2441 	bool isactive;
2442 	bool needisync = false;
2443 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2444 
2445 #ifdef DEBUG
2446 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2447 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2448 		       pmap, va, pte, dolock, cpu_id);
2449 #endif
2450 
2451 	/*
2452 	 * PTE not provided, compute it from pmap and va.
2453 	 */
2454 	if (pte == NULL) {
2455 		pte = pmap_l3pte(pmap, va, NULL);
2456 		if (pmap_pte_v(pte) == 0)
2457 			return (false);
2458 	}
2459 
2460 	pa = pmap_pte_pa(pte);
2461 	onpv = (pmap_pte_pv(pte) != 0);
2462 	hadasm = (pmap_pte_asm(pte) != 0);
2463 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2464 
2465 	/*
2466 	 * Determine what we need to do about the I-stream.  If
2467 	 * PG_EXEC was set, we mark a user pmap as needing an
2468 	 * I-sync on the way out to userspace.  We always need
2469 	 * an immediate I-sync for the kernel pmap.
2470 	 */
2471 	if (pmap_pte_exec(pte)) {
2472 		if (pmap == pmap_kernel())
2473 			needisync = true;
2474 		else {
2475 			PMAP_SET_NEEDISYNC(pmap);
2476 			needisync = (pmap->pm_cpus != 0);
2477 		}
2478 	}
2479 
2480 	/*
2481 	 * Update statistics
2482 	 */
2483 	if (pmap_pte_w(pte))
2484 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2485 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2486 
2487 	/*
2488 	 * Invalidate the PTE after saving the reference modify info.
2489 	 */
2490 #ifdef DEBUG
2491 	if (pmapdebug & PDB_REMOVE)
2492 		printf("remove: invalidating pte at %p\n", pte);
2493 #endif
2494 	PMAP_SET_PTE(pte, PG_NV);
2495 
2496 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2497 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2498 	PMAP_TLB_SHOOTNOW();
2499 
2500 	/*
2501 	 * If we're removing a user mapping, check to see if we
2502 	 * can free page table pages.
2503 	 */
2504 	if (pmap != pmap_kernel()) {
2505 		/*
2506 		 * Delete the reference on the level 3 table.  It will
2507 		 * delete references on the level 2 and 1 tables as
2508 		 * appropriate.
2509 		 */
2510 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2511 	}
2512 
2513 	/*
2514 	 * If the mapping wasn't entered on the PV list, we're all done.
2515 	 */
2516 	if (onpv == false)
2517 		return (needisync);
2518 
2519 	/*
2520 	 * Remove it from the PV table.
2521 	 */
2522 	pg = PHYS_TO_VM_PAGE(pa);
2523 	KASSERT(pg != NULL);
2524 	pmap_pv_remove(pmap, pg, va, dolock);
2525 
2526 	return (needisync);
2527 }
2528 
2529 /*
2530  * pmap_changebit:
2531  *
2532  *	Set or clear the specified PTE bits for all mappings on the
2533  *	specified page.
2534  *
2535  *	Note: we assume that the pv_head is already locked, and that
2536  *	the caller has acquired a PV->pmap mutex so that we can lock
2537  *	the pmaps as we encounter them.
2538  */
2539 static void
2540 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id)
2541 {
2542 	pv_entry_t pv;
2543 	pt_entry_t *pte, npte;
2544 	vaddr_t va;
2545 	bool hadasm, isactive;
2546 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2547 
2548 #ifdef DEBUG
2549 	if (pmapdebug & PDB_BITS)
2550 		printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n",
2551 		    pg, set, mask);
2552 #endif
2553 
2554 	/*
2555 	 * Loop over all current mappings setting/clearing as apropos.
2556 	 */
2557 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2558 		va = pv->pv_va;
2559 
2560 		PMAP_LOCK(pv->pv_pmap);
2561 
2562 		pte = pv->pv_pte;
2563 		npte = (*pte | set) & mask;
2564 		if (*pte != npte) {
2565 			hadasm = (pmap_pte_asm(pte) != 0);
2566 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2567 			PMAP_SET_PTE(pte, npte);
2568 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2569 			    cpu_id);
2570 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2571 			    hadasm ? PG_ASM : 0);
2572 		}
2573 		PMAP_UNLOCK(pv->pv_pmap);
2574 	}
2575 
2576 	PMAP_TLB_SHOOTNOW();
2577 }
2578 
2579 /*
2580  * pmap_emulate_reference:
2581  *
2582  *	Emulate reference and/or modified bit hits.
2583  *	Return 1 if this was an execute fault on a non-exec mapping,
2584  *	otherwise return 0.
2585  */
2586 int
2587 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
2588 {
2589 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2590 	pt_entry_t faultoff, *pte;
2591 	struct vm_page *pg;
2592 	paddr_t pa;
2593 	bool didlock = false;
2594 	bool exec = false;
2595 	long cpu_id = cpu_number();
2596 	kmutex_t *lock;
2597 
2598 #ifdef DEBUG
2599 	if (pmapdebug & PDB_FOLLOW)
2600 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2601 		    l, v, user, type);
2602 #endif
2603 
2604 	/*
2605 	 * Convert process and virtual address to physical address.
2606 	 */
2607 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2608 		if (user)
2609 			panic("pmap_emulate_reference: user ref to kernel");
2610 		/*
2611 		 * No need to lock here; kernel PT pages never go away.
2612 		 */
2613 		pte = PMAP_KERNEL_PTE(v);
2614 	} else {
2615 #ifdef DIAGNOSTIC
2616 		if (l == NULL)
2617 			panic("pmap_emulate_reference: bad proc");
2618 		if (l->l_proc->p_vmspace == NULL)
2619 			panic("pmap_emulate_reference: bad p_vmspace");
2620 #endif
2621 		PMAP_LOCK(pmap);
2622 		didlock = true;
2623 		pte = pmap_l3pte(pmap, v, NULL);
2624 		/*
2625 		 * We'll unlock below where we're done with the PTE.
2626 		 */
2627 	}
2628 	exec = pmap_pte_exec(pte);
2629 	if (!exec && type == ALPHA_MMCSR_FOE) {
2630 		if (didlock)
2631 			PMAP_UNLOCK(pmap);
2632                return (1);
2633 	}
2634 #ifdef DEBUG
2635 	if (pmapdebug & PDB_FOLLOW) {
2636 		printf("\tpte = %p, ", pte);
2637 		printf("*pte = 0x%lx\n", *pte);
2638 	}
2639 #endif
2640 #ifdef DEBUG				/* These checks are more expensive */
2641 	if (!pmap_pte_v(pte))
2642 		panic("pmap_emulate_reference: invalid pte");
2643 	if (type == ALPHA_MMCSR_FOW) {
2644 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
2645 			panic("pmap_emulate_reference: write but unwritable");
2646 		if (!(*pte & PG_FOW))
2647 			panic("pmap_emulate_reference: write but not FOW");
2648 	} else {
2649 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
2650 			panic("pmap_emulate_reference: !write but unreadable");
2651 		if (!(*pte & (PG_FOR | PG_FOE)))
2652 			panic("pmap_emulate_reference: !write but not FOR|FOE");
2653 	}
2654 	/* Other diagnostics? */
2655 #endif
2656 	pa = pmap_pte_pa(pte);
2657 
2658 	/*
2659 	 * We're now done with the PTE.  If it was a user pmap, unlock
2660 	 * it now.
2661 	 */
2662 	if (didlock)
2663 		PMAP_UNLOCK(pmap);
2664 
2665 #ifdef DEBUG
2666 	if (pmapdebug & PDB_FOLLOW)
2667 		printf("\tpa = 0x%lx\n", pa);
2668 #endif
2669 #ifdef DIAGNOSTIC
2670 	if (!uvm_pageismanaged(pa))
2671 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2672 		      "pa 0x%lx not managed", l, v, user, type, pa);
2673 #endif
2674 
2675 	/*
2676 	 * Twiddle the appropriate bits to reflect the reference
2677 	 * and/or modification..
2678 	 *
2679 	 * The rules:
2680 	 * 	(1) always mark page as used, and
2681 	 *	(2) if it was a write fault, mark page as modified.
2682 	 */
2683 	pg = PHYS_TO_VM_PAGE(pa);
2684 
2685 	PMAP_HEAD_TO_MAP_LOCK();
2686 	lock = pmap_pvh_lock(pg);
2687 	mutex_enter(lock);
2688 
2689 	if (type == ALPHA_MMCSR_FOW) {
2690 		pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
2691 		faultoff = PG_FOR | PG_FOW;
2692 	} else {
2693 		pg->mdpage.pvh_attrs |= PGA_REFERENCED;
2694 		faultoff = PG_FOR;
2695 		if (exec) {
2696 			faultoff |= PG_FOE;
2697 		}
2698 	}
2699 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2700 
2701 	mutex_exit(lock);
2702 	PMAP_HEAD_TO_MAP_UNLOCK();
2703 	return (0);
2704 }
2705 
2706 #ifdef DEBUG
2707 /*
2708  * pmap_pv_dump:
2709  *
2710  *	Dump the physical->virtual data for the specified page.
2711  */
2712 void
2713 pmap_pv_dump(paddr_t pa)
2714 {
2715 	struct vm_page *pg;
2716 	pv_entry_t pv;
2717 	kmutex_t *lock;
2718 
2719 	pg = PHYS_TO_VM_PAGE(pa);
2720 
2721 	lock = pmap_pvh_lock(pg);
2722 	mutex_enter(lock);
2723 
2724 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, pg->mdpage.pvh_attrs);
2725 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next)
2726 		printf("     pmap %p, va 0x%lx\n",
2727 		    pv->pv_pmap, pv->pv_va);
2728 	printf("\n");
2729 
2730 	mutex_exit(lock);
2731 }
2732 #endif
2733 
2734 /*
2735  * vtophys:
2736  *
2737  *	Return the physical address corresponding to the K0SEG or
2738  *	K1SEG address provided.
2739  *
2740  *	Note: no locking is necessary in this function.
2741  */
2742 paddr_t
2743 vtophys(vaddr_t vaddr)
2744 {
2745 	pt_entry_t *pte;
2746 	paddr_t paddr = 0;
2747 
2748 	if (vaddr < ALPHA_K0SEG_BASE)
2749 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2750 	else if (vaddr <= ALPHA_K0SEG_END)
2751 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2752 	else {
2753 		pte = PMAP_KERNEL_PTE(vaddr);
2754 		if (pmap_pte_v(pte))
2755 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2756 	}
2757 
2758 #if 0
2759 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2760 #endif
2761 
2762 	return (paddr);
2763 }
2764 
2765 /******************** pv_entry management ********************/
2766 
2767 /*
2768  * pmap_pv_enter:
2769  *
2770  *	Add a physical->virtual entry to the pv_table.
2771  */
2772 static int
2773 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2774     bool dolock)
2775 {
2776 	pv_entry_t newpv;
2777 	kmutex_t *lock;
2778 
2779 	/*
2780 	 * Allocate and fill in the new pv_entry.
2781 	 */
2782 	newpv = pmap_pv_alloc();
2783 	if (newpv == NULL)
2784 		return ENOMEM;
2785 	newpv->pv_va = va;
2786 	newpv->pv_pmap = pmap;
2787 	newpv->pv_pte = pte;
2788 
2789 	if (dolock) {
2790 		lock = pmap_pvh_lock(pg);
2791 		mutex_enter(lock);
2792 	}
2793 
2794 #ifdef DEBUG
2795     {
2796 	pv_entry_t pv;
2797 	/*
2798 	 * Make sure the entry doesn't already exist.
2799 	 */
2800 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2801 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2802 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2803 			panic("pmap_pv_enter: already in pv table");
2804 		}
2805 	}
2806     }
2807 #endif
2808 
2809 	/*
2810 	 * ...and put it in the list.
2811 	 */
2812 	newpv->pv_next = pg->mdpage.pvh_list;
2813 	pg->mdpage.pvh_list = newpv;
2814 
2815 	if (dolock) {
2816 		mutex_exit(lock);
2817 	}
2818 
2819 	return 0;
2820 }
2821 
2822 /*
2823  * pmap_pv_remove:
2824  *
2825  *	Remove a physical->virtual entry from the pv_table.
2826  */
2827 static void
2828 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
2829 {
2830 	pv_entry_t pv, *pvp;
2831 	kmutex_t *lock;
2832 
2833 	if (dolock) {
2834 		lock = pmap_pvh_lock(pg);
2835 		mutex_enter(lock);
2836 	} else {
2837 		lock = NULL; /* XXX stupid gcc */
2838 	}
2839 
2840 	/*
2841 	 * Find the entry to remove.
2842 	 */
2843 	for (pvp = &pg->mdpage.pvh_list, pv = *pvp;
2844 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2845 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2846 			break;
2847 
2848 #ifdef DEBUG
2849 	if (pv == NULL)
2850 		panic("pmap_pv_remove: not in pv table");
2851 #endif
2852 
2853 	*pvp = pv->pv_next;
2854 
2855 	if (dolock) {
2856 		mutex_exit(lock);
2857 	}
2858 
2859 	pmap_pv_free(pv);
2860 }
2861 
2862 /*
2863  * pmap_pv_page_alloc:
2864  *
2865  *	Allocate a page for the pv_entry pool.
2866  */
2867 static void *
2868 pmap_pv_page_alloc(struct pool *pp, int flags)
2869 {
2870 	paddr_t pg;
2871 
2872 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2873 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2874 	return (NULL);
2875 }
2876 
2877 /*
2878  * pmap_pv_page_free:
2879  *
2880  *	Free a pv_entry pool page.
2881  */
2882 static void
2883 pmap_pv_page_free(struct pool *pp, void *v)
2884 {
2885 
2886 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2887 }
2888 
2889 /******************** misc. functions ********************/
2890 
2891 /*
2892  * pmap_physpage_alloc:
2893  *
2894  *	Allocate a single page from the VM system and return the
2895  *	physical address for that page.
2896  */
2897 static bool
2898 pmap_physpage_alloc(int usage, paddr_t *pap)
2899 {
2900 	struct vm_page *pg;
2901 	paddr_t pa;
2902 
2903 	/*
2904 	 * Don't ask for a zero'd page in the L1PT case -- we will
2905 	 * properly initialize it in the constructor.
2906 	 */
2907 
2908 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2909 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2910 	if (pg != NULL) {
2911 		pa = VM_PAGE_TO_PHYS(pg);
2912 #ifdef DEBUG
2913 		if (pg->mdpage.pvh_refcnt != 0) {
2914 			printf("pmap_physpage_alloc: page 0x%lx has "
2915 			    "%d references\n", pa, pg->mdpage.pvh_refcnt);
2916 			panic("pmap_physpage_alloc");
2917 		}
2918 #endif
2919 		*pap = pa;
2920 		return (true);
2921 	}
2922 	return (false);
2923 }
2924 
2925 /*
2926  * pmap_physpage_free:
2927  *
2928  *	Free the single page table page at the specified physical address.
2929  */
2930 static void
2931 pmap_physpage_free(paddr_t pa)
2932 {
2933 	struct vm_page *pg;
2934 
2935 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2936 		panic("pmap_physpage_free: bogus physical page address");
2937 
2938 #ifdef DEBUG
2939 	if (pg->mdpage.pvh_refcnt != 0)
2940 		panic("pmap_physpage_free: page still has references");
2941 #endif
2942 
2943 	uvm_pagefree(pg);
2944 }
2945 
2946 /*
2947  * pmap_physpage_addref:
2948  *
2949  *	Add a reference to the specified special use page.
2950  */
2951 static int
2952 pmap_physpage_addref(void *kva)
2953 {
2954 	struct vm_page *pg;
2955 	paddr_t pa;
2956 
2957 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2958 	pg = PHYS_TO_VM_PAGE(pa);
2959 
2960 	KASSERT((int)pg->mdpage.pvh_refcnt >= 0);
2961 
2962 	return atomic_inc_uint_nv(&pg->mdpage.pvh_refcnt);
2963 }
2964 
2965 /*
2966  * pmap_physpage_delref:
2967  *
2968  *	Delete a reference to the specified special use page.
2969  */
2970 static int
2971 pmap_physpage_delref(void *kva)
2972 {
2973 	struct vm_page *pg;
2974 	paddr_t pa;
2975 
2976 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2977 	pg = PHYS_TO_VM_PAGE(pa);
2978 
2979 	KASSERT((int)pg->mdpage.pvh_refcnt > 0);
2980 
2981 	return atomic_dec_uint_nv(&pg->mdpage.pvh_refcnt);
2982 }
2983 
2984 /******************** page table page management ********************/
2985 
2986 /*
2987  * pmap_growkernel:		[ INTERFACE ]
2988  *
2989  *	Grow the kernel address space.  This is a hint from the
2990  *	upper layer to pre-allocate more kernel PT pages.
2991  */
2992 vaddr_t
2993 pmap_growkernel(vaddr_t maxkvaddr)
2994 {
2995 	struct pmap *kpm = pmap_kernel(), *pm;
2996 	paddr_t ptaddr;
2997 	pt_entry_t *l1pte, *l2pte, pte;
2998 	vaddr_t va;
2999 	int l1idx;
3000 
3001 	rw_enter(&pmap_growkernel_lock, RW_WRITER);
3002 
3003 	if (maxkvaddr <= virtual_end)
3004 		goto out;		/* we are OK */
3005 
3006 	va = virtual_end;
3007 
3008 	while (va < maxkvaddr) {
3009 		/*
3010 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
3011 		 * allocate a new L2 PT page and insert it into the
3012 		 * L1 map.
3013 		 */
3014 		l1pte = pmap_l1pte(kpm, va);
3015 		if (pmap_pte_v(l1pte) == 0) {
3016 			/*
3017 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
3018 			 */
3019 			if (uvm.page_init_done == false) {
3020 				/*
3021 				 * We're growing the kernel pmap early (from
3022 				 * uvm_pageboot_alloc()).  This case must
3023 				 * be handled a little differently.
3024 				 */
3025 				ptaddr = ALPHA_K0SEG_TO_PHYS(
3026 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3027 			} else if (pmap_physpage_alloc(PGU_NORMAL,
3028 				   &ptaddr) == false)
3029 				goto die;
3030 			pte = (atop(ptaddr) << PG_SHIFT) |
3031 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3032 			*l1pte = pte;
3033 
3034 			l1idx = l1pte_index(va);
3035 
3036 			/* Update all the user pmaps. */
3037 			mutex_enter(&pmap_all_pmaps_lock);
3038 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
3039 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
3040 				/* Skip the kernel pmap. */
3041 				if (pm == pmap_kernel())
3042 					continue;
3043 
3044 				PMAP_LOCK(pm);
3045 				if (pm->pm_lev1map == kernel_lev1map) {
3046 					PMAP_UNLOCK(pm);
3047 					continue;
3048 				}
3049 				pm->pm_lev1map[l1idx] = pte;
3050 				PMAP_UNLOCK(pm);
3051 			}
3052 			mutex_exit(&pmap_all_pmaps_lock);
3053 		}
3054 
3055 		/*
3056 		 * Have an L2 PT page now, add the L3 PT page.
3057 		 */
3058 		l2pte = pmap_l2pte(kpm, va, l1pte);
3059 		KASSERT(pmap_pte_v(l2pte) == 0);
3060 		if (uvm.page_init_done == false) {
3061 			/*
3062 			 * See above.
3063 			 */
3064 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3065 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3066 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false)
3067 			goto die;
3068 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3069 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3070 		va += ALPHA_L2SEG_SIZE;
3071 	}
3072 
3073 	/* Invalidate the L1 PT cache. */
3074 	pool_cache_invalidate(&pmap_l1pt_cache);
3075 
3076 	virtual_end = va;
3077 
3078  out:
3079 	rw_exit(&pmap_growkernel_lock);
3080 
3081 	return (virtual_end);
3082 
3083  die:
3084 	panic("pmap_growkernel: out of memory");
3085 }
3086 
3087 /*
3088  * pmap_lev1map_create:
3089  *
3090  *	Create a new level 1 page table for the specified pmap.
3091  *
3092  *	Note: growkernel must already be held and the pmap either
3093  *	already locked or unreferenced globally.
3094  */
3095 static int
3096 pmap_lev1map_create(pmap_t pmap, long cpu_id)
3097 {
3098 	pt_entry_t *l1pt;
3099 
3100 	KASSERT(pmap != pmap_kernel());
3101 
3102 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
3103 	KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3104 
3105 	/* Don't sleep -- we're called with locks held. */
3106 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
3107 	if (l1pt == NULL)
3108 		return (ENOMEM);
3109 
3110 	pmap->pm_lev1map = l1pt;
3111 	return (0);
3112 }
3113 
3114 /*
3115  * pmap_lev1map_destroy:
3116  *
3117  *	Destroy the level 1 page table for the specified pmap.
3118  *
3119  *	Note: growkernel must be held and the pmap must already be
3120  *	locked or not globally referenced.
3121  */
3122 static void
3123 pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
3124 {
3125 	pt_entry_t *l1pt = pmap->pm_lev1map;
3126 
3127 	KASSERT(pmap != pmap_kernel());
3128 
3129 	/*
3130 	 * Go back to referencing the global kernel_lev1map.
3131 	 */
3132 	pmap->pm_lev1map = kernel_lev1map;
3133 
3134 	/*
3135 	 * Free the old level 1 page table page.
3136 	 */
3137 	pool_cache_put(&pmap_l1pt_cache, l1pt);
3138 }
3139 
3140 /*
3141  * pmap_l1pt_ctor:
3142  *
3143  *	Pool cache constructor for L1 PT pages.
3144  *
3145  *	Note: The growkernel lock is held across allocations
3146  *	from our pool_cache, so we don't need to acquire it
3147  *	ourselves.
3148  */
3149 static int
3150 pmap_l1pt_ctor(void *arg, void *object, int flags)
3151 {
3152 	pt_entry_t *l1pt = object, pte;
3153 	int i;
3154 
3155 	/*
3156 	 * Initialize the new level 1 table by zeroing the
3157 	 * user portion and copying the kernel mappings into
3158 	 * the kernel portion.
3159 	 */
3160 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3161 		l1pt[i] = 0;
3162 
3163 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3164 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3165 		l1pt[i] = kernel_lev1map[i];
3166 
3167 	/*
3168 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3169 	 */
3170 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3171 	    PG_V | PG_KRE | PG_KWE;
3172 	l1pt[l1pte_index(VPTBASE)] = pte;
3173 
3174 	return (0);
3175 }
3176 
3177 /*
3178  * pmap_l1pt_alloc:
3179  *
3180  *	Page alloctaor for L1 PT pages.
3181  */
3182 static void *
3183 pmap_l1pt_alloc(struct pool *pp, int flags)
3184 {
3185 	paddr_t ptpa;
3186 
3187 	/*
3188 	 * Attempt to allocate a free page.
3189 	 */
3190 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false)
3191 		return (NULL);
3192 
3193 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3194 }
3195 
3196 /*
3197  * pmap_l1pt_free:
3198  *
3199  *	Page freer for L1 PT pages.
3200  */
3201 static void
3202 pmap_l1pt_free(struct pool *pp, void *v)
3203 {
3204 
3205 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3206 }
3207 
3208 /*
3209  * pmap_ptpage_alloc:
3210  *
3211  *	Allocate a level 2 or level 3 page table page, and
3212  *	initialize the PTE that references it.
3213  *
3214  *	Note: the pmap must already be locked.
3215  */
3216 static int
3217 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3218 {
3219 	paddr_t ptpa;
3220 
3221 	/*
3222 	 * Allocate the page table page.
3223 	 */
3224 	if (pmap_physpage_alloc(usage, &ptpa) == false)
3225 		return (ENOMEM);
3226 
3227 	/*
3228 	 * Initialize the referencing PTE.
3229 	 */
3230 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3231 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3232 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3233 
3234 	return (0);
3235 }
3236 
3237 /*
3238  * pmap_ptpage_free:
3239  *
3240  *	Free the level 2 or level 3 page table page referenced
3241  *	be the provided PTE.
3242  *
3243  *	Note: the pmap must already be locked.
3244  */
3245 static void
3246 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3247 {
3248 	paddr_t ptpa;
3249 
3250 	/*
3251 	 * Extract the physical address of the page from the PTE
3252 	 * and clear the entry.
3253 	 */
3254 	ptpa = pmap_pte_pa(pte);
3255 	PMAP_SET_PTE(pte, PG_NV);
3256 
3257 #ifdef DEBUG
3258 	pmap_zero_page(ptpa);
3259 #endif
3260 	pmap_physpage_free(ptpa);
3261 }
3262 
3263 /*
3264  * pmap_l3pt_delref:
3265  *
3266  *	Delete a reference on a level 3 PT page.  If the reference drops
3267  *	to zero, free it.
3268  *
3269  *	Note: the pmap must already be locked.
3270  */
3271 static void
3272 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id)
3273 {
3274 	pt_entry_t *l1pte, *l2pte;
3275 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3276 
3277 	l1pte = pmap_l1pte(pmap, va);
3278 	l2pte = pmap_l2pte(pmap, va, l1pte);
3279 
3280 #ifdef DIAGNOSTIC
3281 	if (pmap == pmap_kernel())
3282 		panic("pmap_l3pt_delref: kernel pmap");
3283 #endif
3284 
3285 	if (pmap_physpage_delref(l3pte) == 0) {
3286 		/*
3287 		 * No more mappings; we can free the level 3 table.
3288 		 */
3289 #ifdef DEBUG
3290 		if (pmapdebug & PDB_PTPAGE)
3291 			printf("pmap_l3pt_delref: freeing level 3 table at "
3292 			    "0x%lx\n", pmap_pte_pa(l2pte));
3293 #endif
3294 		pmap_ptpage_free(pmap, l2pte);
3295 
3296 		/*
3297 		 * We've freed a level 3 table, so we must
3298 		 * invalidate the TLB entry for that PT page
3299 		 * in the Virtual Page Table VA range, because
3300 		 * otherwise the PALcode will service a TLB
3301 		 * miss using the stale VPT TLB entry it entered
3302 		 * behind our back to shortcut to the VA's PTE.
3303 		 */
3304 		PMAP_INVALIDATE_TLB(pmap,
3305 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), false,
3306 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3307 		PMAP_TLB_SHOOTDOWN(pmap,
3308 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3309 		PMAP_TLB_SHOOTNOW();
3310 
3311 		/*
3312 		 * We've freed a level 3 table, so delete the reference
3313 		 * on the level 2 table.
3314 		 */
3315 		pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
3316 	}
3317 }
3318 
3319 /*
3320  * pmap_l2pt_delref:
3321  *
3322  *	Delete a reference on a level 2 PT page.  If the reference drops
3323  *	to zero, free it.
3324  *
3325  *	Note: the pmap must already be locked.
3326  */
3327 static void
3328 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
3329     long cpu_id)
3330 {
3331 
3332 #ifdef DIAGNOSTIC
3333 	if (pmap == pmap_kernel())
3334 		panic("pmap_l2pt_delref: kernel pmap");
3335 #endif
3336 
3337 	if (pmap_physpage_delref(l2pte) == 0) {
3338 		/*
3339 		 * No more mappings in this segment; we can free the
3340 		 * level 2 table.
3341 		 */
3342 #ifdef DEBUG
3343 		if (pmapdebug & PDB_PTPAGE)
3344 			printf("pmap_l2pt_delref: freeing level 2 table at "
3345 			    "0x%lx\n", pmap_pte_pa(l1pte));
3346 #endif
3347 		pmap_ptpage_free(pmap, l1pte);
3348 
3349 		/*
3350 		 * We've freed a level 2 table, so delete the reference
3351 		 * on the level 1 table.
3352 		 */
3353 		pmap_l1pt_delref(pmap, l1pte, cpu_id);
3354 	}
3355 }
3356 
3357 /*
3358  * pmap_l1pt_delref:
3359  *
3360  *	Delete a reference on a level 1 PT page.  If the reference drops
3361  *	to zero, free it.
3362  *
3363  *	Note: the pmap must already be locked.
3364  */
3365 static void
3366 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
3367 {
3368 
3369 #ifdef DIAGNOSTIC
3370 	if (pmap == pmap_kernel())
3371 		panic("pmap_l1pt_delref: kernel pmap");
3372 #endif
3373 
3374 	(void)pmap_physpage_delref(l1pte);
3375 }
3376 
3377 /******************** Address Space Number management ********************/
3378 
3379 /*
3380  * pmap_asn_alloc:
3381  *
3382  *	Allocate and assign an ASN to the specified pmap.
3383  *
3384  *	Note: the pmap must already be locked.  This may be called from
3385  *	an interprocessor interrupt, and in that case, the sender of
3386  *	the IPI has the pmap lock.
3387  */
3388 static void
3389 pmap_asn_alloc(pmap_t pmap, long cpu_id)
3390 {
3391 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3392 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3393 
3394 #ifdef DEBUG
3395 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3396 		printf("pmap_asn_alloc(%p)\n", pmap);
3397 #endif
3398 
3399 	/*
3400 	 * If the pmap is still using the global kernel_lev1map, there
3401 	 * is no need to assign an ASN at this time, because only
3402 	 * kernel mappings exist in that map, and all kernel mappings
3403 	 * have PG_ASM set.  If the pmap eventually gets its own
3404 	 * lev1map, an ASN will be allocated at that time.
3405 	 *
3406 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
3407 	 * same old fixups, but note that we no longer need the pmap
3408 	 * to be locked if we're in this mode, since pm_lev1map will
3409 	 * never change.
3410 	 * #endif
3411 	 */
3412 	if (pmap->pm_lev1map == kernel_lev1map) {
3413 #ifdef DEBUG
3414 		if (pmapdebug & PDB_ASN)
3415 			printf("pmap_asn_alloc: still references "
3416 			    "kernel_lev1map\n");
3417 #endif
3418 #if defined(MULTIPROCESSOR)
3419 		/*
3420 		 * In a multiprocessor system, it's possible to
3421 		 * get here without having PMAP_ASN_RESERVED in
3422 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3423 		 *
3424 		 * So, what we do here, is simply assign the reserved
3425 		 * ASN for kernel_lev1map users and let things
3426 		 * continue on.  We do, however, let uniprocessor
3427 		 * configurations continue to make its assertion.
3428 		 */
3429 		pma->pma_asn = PMAP_ASN_RESERVED;
3430 #else
3431 		KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3432 #endif /* MULTIPROCESSOR */
3433 		return;
3434 	}
3435 
3436 	/*
3437 	 * On processors which do not implement ASNs, the swpctx PALcode
3438 	 * operation will automatically invalidate the TLB and I-cache,
3439 	 * so we don't need to do that here.
3440 	 */
3441 	if (pmap_max_asn == 0) {
3442 		/*
3443 		 * Refresh the pmap's generation number, to
3444 		 * simplify logic elsewhere.
3445 		 */
3446 		pma->pma_asngen = cpma->pma_asngen;
3447 #ifdef DEBUG
3448 		if (pmapdebug & PDB_ASN)
3449 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3450 			    pma->pma_asngen);
3451 #endif
3452 		return;
3453 	}
3454 
3455 	/*
3456 	 * Hopefully, we can continue using the one we have...
3457 	 */
3458 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3459 	    pma->pma_asngen == cpma->pma_asngen) {
3460 		/*
3461 		 * ASN is still in the current generation; keep on using it.
3462 		 */
3463 #ifdef DEBUG
3464 		if (pmapdebug & PDB_ASN)
3465 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3466 			    pma->pma_asn);
3467 #endif
3468 		return;
3469 	}
3470 
3471 	/*
3472 	 * Need to assign a new ASN.  Grab the next one, incrementing
3473 	 * the generation number if we have to.
3474 	 */
3475 	if (cpma->pma_asn > pmap_max_asn) {
3476 		/*
3477 		 * Invalidate all non-PG_ASM TLB entries and the
3478 		 * I-cache, and bump the generation number.
3479 		 */
3480 		ALPHA_TBIAP();
3481 		alpha_pal_imb();
3482 
3483 		cpma->pma_asn = 1;
3484 		cpma->pma_asngen++;
3485 #ifdef DIAGNOSTIC
3486 		if (cpma->pma_asngen == 0) {
3487 			/*
3488 			 * The generation number has wrapped.  We could
3489 			 * handle this scenario by traversing all of
3490 			 * the pmaps, and invalidating the generation
3491 			 * number on those which are not currently
3492 			 * in use by this processor.
3493 			 *
3494 			 * However... considering that we're using
3495 			 * an unsigned 64-bit integer for generation
3496 			 * numbers, on non-ASN CPUs, we won't wrap
3497 			 * for approx. 585 million years, or 75 billion
3498 			 * years on a 128-ASN CPU (assuming 1000 switch
3499 			 * operations per second).
3500 			 *
3501 			 * So, we don't bother.
3502 			 */
3503 			panic("pmap_asn_alloc: too much uptime");
3504 		}
3505 #endif
3506 #ifdef DEBUG
3507 		if (pmapdebug & PDB_ASN)
3508 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3509 			    cpma->pma_asngen);
3510 #endif
3511 	}
3512 
3513 	/*
3514 	 * Assign the new ASN and validate the generation number.
3515 	 */
3516 	pma->pma_asn = cpma->pma_asn++;
3517 	pma->pma_asngen = cpma->pma_asngen;
3518 
3519 #ifdef DEBUG
3520 	if (pmapdebug & PDB_ASN)
3521 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3522 		    pma->pma_asn, pmap);
3523 #endif
3524 
3525 	/*
3526 	 * Have a new ASN, so there's no need to sync the I-stream
3527 	 * on the way back out to userspace.
3528 	 */
3529 	atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id));
3530 }
3531 
3532 #if defined(MULTIPROCESSOR)
3533 /******************** TLB shootdown code ********************/
3534 
3535 /*
3536  * pmap_tlb_shootdown:
3537  *
3538  *	Cause the TLB entry for pmap/va to be shot down.
3539  *
3540  *	NOTE: The pmap must be locked here.
3541  */
3542 void
3543 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3544 {
3545 	struct pmap_tlb_shootdown_q *pq;
3546 	struct pmap_tlb_shootdown_job *pj;
3547 	struct cpu_info *ci, *self = curcpu();
3548 	u_long cpumask;
3549 	CPU_INFO_ITERATOR cii;
3550 
3551 	KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock));
3552 
3553 	cpumask = 0;
3554 
3555 	for (CPU_INFO_FOREACH(cii, ci)) {
3556 		if (ci == self)
3557 			continue;
3558 
3559 		/*
3560 		 * The pmap must be locked (unless its the kernel
3561 		 * pmap, in which case it is okay for it to be
3562 		 * unlocked), which prevents it from  becoming
3563 		 * active on any additional processors.  This makes
3564 		 * it safe to check for activeness.  If it's not
3565 		 * active on the processor in question, then just
3566 		 * mark it as needing a new ASN the next time it
3567 		 * does, saving the IPI.  We always have to send
3568 		 * the IPI for the kernel pmap.
3569 		 *
3570 		 * Note if it's marked active now, and it becomes
3571 		 * inactive by the time the processor receives
3572 		 * the IPI, that's okay, because it does the right
3573 		 * thing with it later.
3574 		 */
3575 		if (pmap != pmap_kernel() &&
3576 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3577 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3578 			continue;
3579 		}
3580 
3581 		cpumask |= 1UL << ci->ci_cpuid;
3582 
3583 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3584 		mutex_spin_enter(&pq->pq_lock);
3585 
3586 		/*
3587 		 * Allocate a job.
3588 		 */
3589 		if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) {
3590 			pj = pool_cache_get(&pmap_tlb_shootdown_job_cache,
3591 			    PR_NOWAIT);
3592 		} else {
3593 			pj = NULL;
3594 		}
3595 
3596 		/*
3597 		 * If a global flush is already pending, we
3598 		 * don't really have to do anything else.
3599 		 */
3600 		pq->pq_pte |= pte;
3601 		if (pq->pq_tbia) {
3602 			mutex_spin_exit(&pq->pq_lock);
3603 			if (pj != NULL) {
3604 				pool_cache_put(&pmap_tlb_shootdown_job_cache,
3605 				    pj);
3606 			}
3607 			continue;
3608 		}
3609 		if (pj == NULL) {
3610 			/*
3611 			 * Couldn't allocate a job entry.  Just
3612 			 * tell the processor to kill everything.
3613 			 */
3614 			pq->pq_tbia = 1;
3615 		} else {
3616 			pj->pj_pmap = pmap;
3617 			pj->pj_va = va;
3618 			pj->pj_pte = pte;
3619 			pq->pq_count++;
3620 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3621 		}
3622 		mutex_spin_exit(&pq->pq_lock);
3623 	}
3624 
3625 	*cpumaskp |= cpumask;
3626 }
3627 
3628 /*
3629  * pmap_tlb_shootnow:
3630  *
3631  *	Process the TLB shootdowns that we have been accumulating
3632  *	for the specified processor set.
3633  */
3634 void
3635 pmap_tlb_shootnow(u_long cpumask)
3636 {
3637 
3638 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3639 }
3640 
3641 /*
3642  * pmap_do_tlb_shootdown:
3643  *
3644  *	Process pending TLB shootdown operations for this processor.
3645  */
3646 void
3647 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3648 {
3649 	u_long cpu_id = ci->ci_cpuid;
3650 	u_long cpu_mask = (1UL << cpu_id);
3651 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3652 	struct pmap_tlb_shootdown_job *pj, *next;
3653 	TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs;
3654 
3655 	TAILQ_INIT(&jobs);
3656 
3657 	mutex_spin_enter(&pq->pq_lock);
3658 	TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list);
3659 	if (pq->pq_tbia) {
3660 		if (pq->pq_pte & PG_ASM)
3661 			ALPHA_TBIA();
3662 		else
3663 			ALPHA_TBIAP();
3664 		pq->pq_tbia = 0;
3665 		pq->pq_pte = 0;
3666 	} else {
3667 		TAILQ_FOREACH(pj, &jobs, pj_list) {
3668 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3669 			    pj->pj_pte & PG_ASM,
3670 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3671 		}
3672 		pq->pq_pte = 0;
3673 	}
3674 	pq->pq_count = 0;
3675 	mutex_spin_exit(&pq->pq_lock);
3676 
3677 	/* Free jobs back to the cache. */
3678 	for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) {
3679 		next = TAILQ_NEXT(pj, pj_list);
3680 		pool_cache_put(&pmap_tlb_shootdown_job_cache, pj);
3681 	}
3682 }
3683 #endif /* MULTIPROCESSOR */
3684