xref: /netbsd-src/sys/arch/alpha/alpha/pmap.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /* $NetBSD: pmap.c,v 1.261 2016/12/23 07:15:27 cherry Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and by Chris G. Demetriou.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
66  */
67 
68 /*
69  * DEC Alpha physical map management code.
70  *
71  * History:
72  *
73  *	This pmap started life as a Motorola 68851/68030 pmap,
74  *	written by Mike Hibler at the University of Utah.
75  *
76  *	It was modified for the DEC Alpha by Chris Demetriou
77  *	at Carnegie Mellon University.
78  *
79  *	Support for non-contiguous physical memory was added by
80  *	Jason R. Thorpe of the Numerical Aerospace Simulation
81  *	Facility, NASA Ames Research Center and Chris Demetriou.
82  *
83  *	Page table management and a major cleanup were undertaken
84  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
85  *	Avalon Computer Systems and from Chris Demetriou.
86  *
87  *	Support for the new UVM pmap interface was written by
88  *	Jason R. Thorpe.
89  *
90  *	Support for ASNs was written by Jason R. Thorpe, again
91  *	with help from Chris Demetriou and Ross Harvey.
92  *
93  *	The locking protocol was written by Jason R. Thorpe,
94  *	using Chuck Cranor's i386 pmap for UVM as a model.
95  *
96  *	TLB shootdown code was written by Jason R. Thorpe.
97  *
98  *	Multiprocessor modifications by Andrew Doran.
99  *
100  * Notes:
101  *
102  *	All page table access is done via K0SEG.  The one exception
103  *	to this is for kernel mappings.  Since all kernel page
104  *	tables are pre-allocated, we can use the Virtual Page Table
105  *	to access PTEs that map K1SEG addresses.
106  *
107  *	Kernel page table pages are statically allocated in
108  *	pmap_bootstrap(), and are never freed.  In the future,
109  *	support for dynamically adding additional kernel page
110  *	table pages may be added.  User page table pages are
111  *	dynamically allocated and freed.
112  *
113  * Bugs/misfeatures:
114  *
115  *	- Some things could be optimized.
116  */
117 
118 /*
119  *	Manages physical address maps.
120  *
121  *	Since the information managed by this module is
122  *	also stored by the logical address mapping module,
123  *	this module may throw away valid virtual-to-physical
124  *	mappings at almost any time.  However, invalidations
125  *	of virtual-to-physical mappings must be done as
126  *	requested.
127  *
128  *	In order to cope with hardware architectures which
129  *	make virtual-to-physical map invalidates expensive,
130  *	this module may delay invalidate or reduced protection
131  *	operations until such time as they are actually
132  *	necessary.  This module is given full information as
133  *	to which processors are currently using which maps,
134  *	and to when physical maps must be made correct.
135  */
136 
137 #include "opt_lockdebug.h"
138 #include "opt_sysv.h"
139 #include "opt_multiprocessor.h"
140 
141 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
142 
143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.261 2016/12/23 07:15:27 cherry Exp $");
144 
145 #include <sys/param.h>
146 #include <sys/systm.h>
147 #include <sys/kernel.h>
148 #include <sys/proc.h>
149 #include <sys/malloc.h>
150 #include <sys/pool.h>
151 #include <sys/buf.h>
152 #include <sys/atomic.h>
153 #include <sys/cpu.h>
154 
155 #include <uvm/uvm.h>
156 
157 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
158 #include <machine/rpb.h>
159 #endif
160 
161 #ifdef DEBUG
162 #define	PDB_FOLLOW	0x0001
163 #define	PDB_INIT	0x0002
164 #define	PDB_ENTER	0x0004
165 #define	PDB_REMOVE	0x0008
166 #define	PDB_CREATE	0x0010
167 #define	PDB_PTPAGE	0x0020
168 #define	PDB_ASN		0x0040
169 #define	PDB_BITS	0x0080
170 #define	PDB_COLLECT	0x0100
171 #define	PDB_PROTECT	0x0200
172 #define	PDB_BOOTSTRAP	0x1000
173 #define	PDB_PARANOIA	0x2000
174 #define	PDB_WIRING	0x4000
175 #define	PDB_PVDUMP	0x8000
176 
177 int debugmap = 0;
178 int pmapdebug = PDB_PARANOIA;
179 #endif
180 
181 /*
182  * Given a map and a machine independent protection code,
183  * convert to an alpha protection code.
184  */
185 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
186 static int	protection_codes[2][8];
187 
188 /*
189  * kernel_lev1map:
190  *
191  *	Kernel level 1 page table.  This maps all kernel level 2
192  *	page table pages, and is used as a template for all user
193  *	pmap level 1 page tables.  When a new user level 1 page
194  *	table is allocated, all kernel_lev1map PTEs for kernel
195  *	addresses are copied to the new map.
196  *
197  *	The kernel also has an initial set of kernel level 2 page
198  *	table pages.  These map the kernel level 3 page table pages.
199  *	As kernel level 3 page table pages are added, more level 2
200  *	page table pages may be added to map them.  These pages are
201  *	never freed.
202  *
203  *	Finally, the kernel also has an initial set of kernel level
204  *	3 page table pages.  These map pages in K1SEG.  More level
205  *	3 page table pages may be added at run-time if additional
206  *	K1SEG address space is required.  These pages are never freed.
207  *
208  * NOTE: When mappings are inserted into the kernel pmap, all
209  * level 2 and level 3 page table pages must already be allocated
210  * and mapped into the parent page table.
211  */
212 pt_entry_t	*kernel_lev1map;
213 
214 /*
215  * Virtual Page Table.
216  */
217 static pt_entry_t *VPT;
218 
219 static struct {
220 	struct pmap k_pmap;
221 	struct pmap_asn_info k_asni[ALPHA_MAXPROCS];
222 } kernel_pmap_store;
223 
224 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap;
225 
226 paddr_t    	avail_start;	/* PA of first available physical page */
227 paddr_t		avail_end;	/* PA of last available physical page */
228 static vaddr_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
229 
230 static bool pmap_initialized;	/* Has pmap_init completed? */
231 
232 u_long		pmap_pages_stolen;	/* instrumentation */
233 
234 /*
235  * This variable contains the number of CPU IDs we need to allocate
236  * space for when allocating the pmap structure.  It is used to
237  * size a per-CPU array of ASN and ASN Generation number.
238  */
239 static u_long 	pmap_ncpuids;
240 
241 #ifndef PMAP_PV_LOWAT
242 #define	PMAP_PV_LOWAT	16
243 #endif
244 int		pmap_pv_lowat = PMAP_PV_LOWAT;
245 
246 /*
247  * List of all pmaps, used to update them when e.g. additional kernel
248  * page tables are allocated.  This list is kept LRU-ordered by
249  * pmap_activate().
250  */
251 static TAILQ_HEAD(, pmap) pmap_all_pmaps;
252 
253 /*
254  * The pools from which pmap structures and sub-structures are allocated.
255  */
256 static struct pool_cache pmap_pmap_cache;
257 static struct pool_cache pmap_l1pt_cache;
258 static struct pool_cache pmap_pv_cache;
259 
260 /*
261  * Address Space Numbers.
262  *
263  * On many implementations of the Alpha architecture, the TLB entries and
264  * I-cache blocks are tagged with a unique number within an implementation-
265  * specified range.  When a process context becomes active, the ASN is used
266  * to match TLB entries; if a TLB entry for a particular VA does not match
267  * the current ASN, it is ignored (one could think of the processor as
268  * having a collection of <max ASN> separate TLBs).  This allows operating
269  * system software to skip the TLB flush that would otherwise be necessary
270  * at context switch time.
271  *
272  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
273  * causes TLB entries to match any ASN.  The PALcode also provides
274  * a TBI (Translation Buffer Invalidate) operation that flushes all
275  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
276  * mappings, so that invalidation of all user mappings does not invalidate
277  * kernel mappings (which are consistent across all processes).
278  *
279  * pmap_next_asn always indicates to the next ASN to use.  When
280  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
281  *
282  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
283  * TLB entries and the I-cache are flushed, the generation number is bumped,
284  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
285  *
286  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
287  * prevents the following scenario:
288  *
289  *	* New ASN generation starts, and process A is given ASN #0.
290  *
291  *	* A new process B (and thus new pmap) is created.  The ASN,
292  *	  for lack of a better value, is initialized to 0.
293  *
294  *	* Process B runs.  It is now using the TLB entries tagged
295  *	  by process A.  *poof*
296  *
297  * In the scenario above, in addition to the processor using using incorrect
298  * TLB entires, the PALcode might use incorrect information to service a
299  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
300  * to locate the PTE for a faulting address, and tagged TLB entires exist
301  * for the Virtual Page Table addresses in order to speed up this procedure,
302  * as well.)
303  *
304  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
305  * new pmaps will initially run with no TLB entries for user addresses
306  * or VPT mappings that map user page tables.  Since kernel_lev1map only
307  * contains mappings for kernel addresses, and since those mappings
308  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
309  * safe (since PG_ASM mappings match any ASN).
310  *
311  * On processors that do not support ASNs, the PALcode invalidates
312  * the TLB and I-cache automatically on swpctx.  We still still go
313  * through the motions of assigning an ASN (really, just refreshing
314  * the ASN generation in this particular case) to keep the logic sane
315  * in other parts of the code.
316  */
317 static u_int	pmap_max_asn;		/* max ASN supported by the system */
318 					/* next ASN and cur ASN generation */
319 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
320 
321 /*
322  * Locking:
323  *
324  *	READ/WRITE LOCKS
325  *	----------------
326  *
327  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
328  *	  provide mutex access to the pmap module.  Most operations lock
329  *	  the pmap first, then PV lists as needed.  However, some operations,
330  *	  such as pmap_page_protect(), lock the PV lists before locking
331  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
332  *	  pmap module if locking in the PV->pmap direction.  This is
333  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
334  *	  if locking pmap->PV and a (exclusive) write lock if locking in
335  *	  the PV->pmap direction.  Since only one thread can hold a write
336  *	  lock at a time, this provides the mutex.
337  *
338  *	MUTEXES
339  *	-------
340  *
341  *	* pm_lock (per-pmap) - This lock protects all of the members
342  *	  of the pmap structure itself.  This lock will be asserted
343  *	  in pmap_activate() and pmap_deactivate() from a critical
344  *	  section of mi_switch(), and must never sleep.  Note that
345  *	  in the case of the kernel pmap, interrupts which cause
346  *	  memory allocation *must* be blocked while this lock is
347  *	  asserted.
348  *
349  *	* pvh_lock (global hash) - These locks protects the PV lists
350  *	  for managed pages.
351  *
352  *	* pmap_all_pmaps_lock - This lock protects the global list of
353  *	  all pmaps.  Note that a pm_lock must never be held while this
354  *	  lock is held.
355  *
356  *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
357  *	  and the virtual_end variable.
358  *
359  *	  There is a lock ordering constraint for pmap_growkernel_lock.
360  *	  pmap_growkernel() acquires the locks in the following order:
361  *
362  *		pmap_growkernel_lock (write) -> pmap_all_pmaps_lock ->
363  *		    pmap->pm_lock
364  *
365  *	  We need to ensure consistency between user pmaps and the
366  *	  kernel_lev1map.  For this reason, pmap_growkernel_lock must
367  *	  be held to prevent kernel_lev1map changing across pmaps
368  *	  being added to / removed from the global pmaps list.
369  *
370  *	Address space number management (global ASN counters and per-pmap
371  *	ASN state) are not locked; they use arrays of values indexed
372  *	per-processor.
373  *
374  *	All internal functions which operate on a pmap are called
375  *	with the pmap already locked by the caller (which will be
376  *	an interface function).
377  */
378 static krwlock_t pmap_main_lock;
379 static kmutex_t pmap_all_pmaps_lock;
380 static krwlock_t pmap_growkernel_lock;
381 
382 #define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
383 #define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
384 #define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
385 #define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
386 
387 struct {
388 	kmutex_t lock;
389 } __aligned(64) static pmap_pvh_locks[64] __aligned(64);
390 
391 static inline kmutex_t *
392 pmap_pvh_lock(struct vm_page *pg)
393 {
394 
395 	/* Cut bits 11-6 out of page address and use directly as offset. */
396 	return (kmutex_t *)((uintptr_t)&pmap_pvh_locks +
397 	    ((uintptr_t)pg & (63 << 6)));
398 }
399 
400 #if defined(MULTIPROCESSOR)
401 /*
402  * TLB Shootdown:
403  *
404  * When a mapping is changed in a pmap, the TLB entry corresponding to
405  * the virtual address must be invalidated on all processors.  In order
406  * to accomplish this on systems with multiple processors, messages are
407  * sent from the processor which performs the mapping change to all
408  * processors on which the pmap is active.  For other processors, the
409  * ASN generation numbers for that processor is invalidated, so that
410  * the next time the pmap is activated on that processor, a new ASN
411  * will be allocated (which implicitly invalidates all TLB entries).
412  *
413  * Note, we can use the pool allocator to allocate job entries
414  * since pool pages are mapped with K0SEG, not with the TLB.
415  */
416 struct pmap_tlb_shootdown_job {
417 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
418 	vaddr_t pj_va;			/* virtual address */
419 	pmap_t pj_pmap;			/* the pmap which maps the address */
420 	pt_entry_t pj_pte;		/* the PTE bits */
421 };
422 
423 static struct pmap_tlb_shootdown_q {
424 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;	/* queue 16b */
425 	kmutex_t pq_lock;		/* spin lock on queue 16b */
426 	int pq_pte;			/* aggregate PTE bits 4b */
427 	int pq_count;			/* number of pending requests 4b */
428 	int pq_tbia;			/* pending global flush 4b */
429 	uint8_t pq_pad[64-16-16-4-4-4];	/* pad to 64 bytes */
430 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE);
431 
432 /* If we have more pending jobs than this, we just nail the whole TLB. */
433 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
434 
435 static struct pool_cache pmap_tlb_shootdown_job_cache;
436 #endif /* MULTIPROCESSOR */
437 
438 /*
439  * Internal routines
440  */
441 static void	alpha_protection_init(void);
442 static bool	pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long);
443 static void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long);
444 
445 /*
446  * PT page management functions.
447  */
448 static int	pmap_lev1map_create(pmap_t, long);
449 static void	pmap_lev1map_destroy(pmap_t, long);
450 static int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
451 static void	pmap_ptpage_free(pmap_t, pt_entry_t *);
452 static void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long);
453 static void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long);
454 static void	pmap_l1pt_delref(pmap_t, pt_entry_t *, long);
455 
456 static void	*pmap_l1pt_alloc(struct pool *, int);
457 static void	pmap_l1pt_free(struct pool *, void *);
458 
459 static struct pool_allocator pmap_l1pt_allocator = {
460 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
461 };
462 
463 static int	pmap_l1pt_ctor(void *, void *, int);
464 
465 /*
466  * PV table management functions.
467  */
468 static int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
469 			      bool);
470 static void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool);
471 static void	*pmap_pv_page_alloc(struct pool *, int);
472 static void	pmap_pv_page_free(struct pool *, void *);
473 
474 static struct pool_allocator pmap_pv_page_allocator = {
475 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
476 };
477 
478 #ifdef DEBUG
479 void	pmap_pv_dump(paddr_t);
480 #endif
481 
482 #define	pmap_pv_alloc()		pool_cache_get(&pmap_pv_cache, PR_NOWAIT)
483 #define	pmap_pv_free(pv)	pool_cache_put(&pmap_pv_cache, (pv))
484 
485 /*
486  * ASN management functions.
487  */
488 static void	pmap_asn_alloc(pmap_t, long);
489 
490 /*
491  * Misc. functions.
492  */
493 static bool	pmap_physpage_alloc(int, paddr_t *);
494 static void	pmap_physpage_free(paddr_t);
495 static int	pmap_physpage_addref(void *);
496 static int	pmap_physpage_delref(void *);
497 
498 /*
499  * PMAP_ISACTIVE{,_TEST}:
500  *
501  *	Check to see if a pmap is active on the current processor.
502  */
503 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
504 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
505 
506 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
507 #define	PMAP_ISACTIVE(pm, cpu_id)					\
508 ({									\
509 	/*								\
510 	 * XXX This test is not MP-safe.				\
511 	 */								\
512 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
513 									\
514 	if ((curlwp->l_flag & LW_IDLE) != 0 &&				\
515 	    curproc->p_vmspace != NULL &&				\
516 	   ((curproc->p_sflag & PS_WEXIT) == 0) &&			\
517 	   (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
518 		panic("PMAP_ISACTIVE");					\
519 	(isactive_);							\
520 })
521 #else
522 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
523 #endif /* DEBUG && !MULTIPROCESSOR */
524 
525 /*
526  * PMAP_ACTIVATE_ASN_SANITY:
527  *
528  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
529  */
530 #ifdef DEBUG
531 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
532 do {									\
533 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
534 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
535 									\
536 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
537 		/*							\
538 		 * This pmap implementation also ensures that pmaps	\
539 		 * referencing kernel_lev1map use a reserved ASN	\
540 		 * ASN to prevent the PALcode from servicing a TLB	\
541 		 * miss	with the wrong PTE.				\
542 		 */							\
543 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
544 			printf("kernel_lev1map with non-reserved ASN "	\
545 			    "(line %d)\n", __LINE__);			\
546 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
547 		}							\
548 	} else {							\
549 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
550 			/*						\
551 			 * ASN generation number isn't valid!		\
552 			 */						\
553 			printf("pmap asngen %lu, current %lu "		\
554 			    "(line %d)\n",				\
555 			    __pma->pma_asngen,				\
556 			    __cpma->pma_asngen,				\
557 			    __LINE__);					\
558 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
559 		}							\
560 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
561 			/*						\
562 			 * DANGER WILL ROBINSON!  We're going to	\
563 			 * pollute the VPT TLB entries!			\
564 			 */						\
565 			printf("Using reserved ASN! (line %d)\n",	\
566 			    __LINE__);					\
567 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
568 		}							\
569 	}								\
570 } while (/*CONSTCOND*/0)
571 #else
572 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
573 #endif
574 
575 /*
576  * PMAP_ACTIVATE:
577  *
578  *	This is essentially the guts of pmap_activate(), without
579  *	ASN allocation.  This is used by pmap_activate(),
580  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
581  *
582  *	This is called only when it is known that a pmap is "active"
583  *	on the current processor; the ASN must already be valid.
584  */
585 #define	PMAP_ACTIVATE(pmap, l, cpu_id)					\
586 do {									\
587 	struct pcb *pcb = lwp_getpcb(l);				\
588 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
589 									\
590 	pcb->pcb_hw.apcb_ptbr =				\
591 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
592 	pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn;	\
593 									\
594 	if ((l) == curlwp) {						\
595 		/*							\
596 		 * Page table base register has changed; switch to	\
597 		 * our own context again so that it will take effect.	\
598 		 */							\
599 		(void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr);	\
600 	}								\
601 } while (/*CONSTCOND*/0)
602 
603 /*
604  * PMAP_SET_NEEDISYNC:
605  *
606  *	Mark that a user pmap needs an I-stream synch on its
607  *	way back out to userspace.
608  */
609 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
610 
611 /*
612  * PMAP_SYNC_ISTREAM:
613  *
614  *	Synchronize the I-stream for the specified pmap.  For user
615  *	pmaps, this is deferred until a process using the pmap returns
616  *	to userspace.
617  */
618 #if defined(MULTIPROCESSOR)
619 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
620 do {									\
621 	alpha_pal_imb();						\
622 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
623 } while (/*CONSTCOND*/0)
624 
625 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
626 do {									\
627 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
628 	/* for curcpu, will happen in userret() */			\
629 } while (/*CONSTCOND*/0)
630 #else
631 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
632 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* will happen in userret() */
633 #endif /* MULTIPROCESSOR */
634 
635 #define	PMAP_SYNC_ISTREAM(pmap)						\
636 do {									\
637 	if ((pmap) == pmap_kernel())					\
638 		PMAP_SYNC_ISTREAM_KERNEL();				\
639 	else								\
640 		PMAP_SYNC_ISTREAM_USER(pmap);				\
641 } while (/*CONSTCOND*/0)
642 
643 /*
644  * PMAP_INVALIDATE_ASN:
645  *
646  *	Invalidate the specified pmap's ASN, so as to force allocation
647  *	of a new one the next time pmap_asn_alloc() is called.
648  *
649  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
650  *	CONDITIONS ARE true:
651  *
652  *		(1) The pmap references the global kernel_lev1map.
653  *
654  *		(2) The pmap is not active on the current processor.
655  */
656 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
657 do {									\
658 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
659 } while (/*CONSTCOND*/0)
660 
661 /*
662  * PMAP_INVALIDATE_TLB:
663  *
664  *	Invalidate the TLB entry for the pmap/va pair.
665  */
666 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
667 do {									\
668 	if ((hadasm) || (isactive)) {					\
669 		/*							\
670 		 * Simply invalidating the TLB entry and I-cache	\
671 		 * works in this case.					\
672 		 */							\
673 		ALPHA_TBIS((va));					\
674 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
675 		   pmap_asn_info[(cpu_id)].pma_asngen) {		\
676 		/*							\
677 		 * We can't directly invalidate the TLB entry		\
678 		 * in this case, so we have to force allocation		\
679 		 * of a new ASN the next time this pmap becomes		\
680 		 * active.						\
681 		 */							\
682 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
683 	}								\
684 		/*							\
685 		 * Nothing to do in this case; the next time the	\
686 		 * pmap becomes active on this processor, a new		\
687 		 * ASN will be allocated anyway.			\
688 		 */							\
689 } while (/*CONSTCOND*/0)
690 
691 /*
692  * PMAP_KERNEL_PTE:
693  *
694  *	Get a kernel PTE.
695  *
696  *	If debugging, do a table walk.  If not debugging, just use
697  *	the Virtual Page Table, since all kernel page tables are
698  *	pre-allocated and mapped in.
699  */
700 #ifdef DEBUG
701 #define	PMAP_KERNEL_PTE(va)						\
702 ({									\
703 	pt_entry_t *l1pte_, *l2pte_;					\
704 									\
705 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
706 	if (pmap_pte_v(l1pte_) == 0) {					\
707 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
708 		    "(line %d)\n", (va), __LINE__);			\
709 		panic("PMAP_KERNEL_PTE");				\
710 	}								\
711 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
712 	if (pmap_pte_v(l2pte_) == 0) {					\
713 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
714 		    "(line %d)\n", (va), __LINE__);			\
715 		panic("PMAP_KERNEL_PTE");				\
716 	}								\
717 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
718 })
719 #else
720 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
721 #endif
722 
723 /*
724  * PMAP_SET_PTE:
725  *
726  *	Set a PTE to a specified value.
727  */
728 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
729 
730 /*
731  * PMAP_STAT_{INCR,DECR}:
732  *
733  *	Increment or decrement a pmap statistic.
734  */
735 #define	PMAP_STAT_INCR(s, v)	atomic_add_long((unsigned long *)(&(s)), (v))
736 #define	PMAP_STAT_DECR(s, v)	atomic_add_long((unsigned long *)(&(s)), -(v))
737 
738 /*
739  * pmap_bootstrap:
740  *
741  *	Bootstrap the system to run with virtual memory.
742  *
743  *	Note: no locking is necessary in this function.
744  */
745 void
746 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
747 {
748 	vsize_t lev2mapsize, lev3mapsize;
749 	pt_entry_t *lev2map, *lev3map;
750 	pt_entry_t pte;
751 	vsize_t bufsz;
752 	struct pcb *pcb;
753 	int i;
754 
755 #ifdef DEBUG
756 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
757 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
758 #endif
759 
760 	/*
761 	 * Compute the number of pages kmem_arena will have.
762 	 */
763 	kmeminit_nkmempages();
764 
765 	/*
766 	 * Figure out how many initial PTE's are necessary to map the
767 	 * kernel.  We also reserve space for kmem_alloc_pageable()
768 	 * for vm_fork().
769 	 */
770 
771 	/* Get size of buffer cache and set an upper limit */
772 	bufsz = buf_memcalc();
773 	buf_setvalimit(bufsz);
774 
775 	lev3mapsize =
776 		(VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
777 		 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE +
778 		(maxproc * UPAGES) + nkmempages;
779 
780 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
781 
782 	/*
783 	 * Initialize `FYI' variables.  Note we're relying on
784 	 * the fact that BSEARCH sorts the vm_physmem[] array
785 	 * for us.
786 	 */
787 	avail_start = ptoa(uvm_physseg_get_avail_start(uvm_physseg_get_first()));
788 	avail_end = ptoa(uvm_physseg_get_avail_end(uvm_physseg_get_last()));
789 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
790 
791 #if 0
792 	printf("avail_start = 0x%lx\n", avail_start);
793 	printf("avail_end = 0x%lx\n", avail_end);
794 	printf("virtual_end = 0x%lx\n", virtual_end);
795 #endif
796 
797 	/*
798 	 * Allocate a level 1 PTE table for the kernel.
799 	 * This is always one page long.
800 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
801 	 */
802 	kernel_lev1map = (pt_entry_t *)
803 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
804 
805 	/*
806 	 * Allocate a level 2 PTE table for the kernel.
807 	 * These must map all of the level3 PTEs.
808 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
809 	 */
810 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
811 	lev2map = (pt_entry_t *)
812 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
813 
814 	/*
815 	 * Allocate a level 3 PTE table for the kernel.
816 	 * Contains lev3mapsize PTEs.
817 	 */
818 	lev3map = (pt_entry_t *)
819 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
820 
821 	/*
822 	 * Set up level 1 page table
823 	 */
824 
825 	/* Map all of the level 2 pte pages */
826 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
827 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
828 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
829 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
830 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
831 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
832 	}
833 
834 	/* Map the virtual page table */
835 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
836 	    << PG_SHIFT;
837 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
838 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
839 	VPT = (pt_entry_t *)VPTBASE;
840 
841 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
842     {
843 	extern pt_entry_t prom_pte;			/* XXX */
844 	extern int prom_mapped;				/* XXX */
845 
846 	if (pmap_uses_prom_console()) {
847 		/*
848 		 * XXX Save old PTE so we can remap the PROM, if
849 		 * XXX necessary.
850 		 */
851 		prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;
852 	}
853 	prom_mapped = 0;
854 
855 	/*
856 	 * Actually, this code lies.  The prom is still mapped, and will
857 	 * remain so until the context switch after alpha_init() returns.
858 	 */
859     }
860 #endif
861 
862 	/*
863 	 * Set up level 2 page table.
864 	 */
865 	/* Map all of the level 3 pte pages */
866 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
867 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
868 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
869 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
870 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
871 		    (i*PAGE_SIZE*NPTEPG))] = pte;
872 	}
873 
874 	/* Initialize the pmap_growkernel_lock. */
875 	rw_init(&pmap_growkernel_lock);
876 
877 	/*
878 	 * Set up level three page table (lev3map)
879 	 */
880 	/* Nothing to do; it's already zero'd */
881 
882 	/*
883 	 * Initialize the pmap pools and list.
884 	 */
885 	pmap_ncpuids = ncpuids;
886 	pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0,
887 	    0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL);
888 	pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt",
889 	    &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL);
890 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
891 	    PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL,
892 	    NULL, NULL);
893 
894 	TAILQ_INIT(&pmap_all_pmaps);
895 
896 	/*
897 	 * Initialize the ASN logic.
898 	 */
899 	pmap_max_asn = maxasn;
900 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
901 		pmap_asn_info[i].pma_asn = 1;
902 		pmap_asn_info[i].pma_asngen = 0;
903 	}
904 
905 	/*
906 	 * Initialize the locks.
907 	 */
908 	rw_init(&pmap_main_lock);
909 	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
910 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
911 		mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE);
912 	}
913 
914 	/*
915 	 * Initialize kernel pmap.  Note that all kernel mappings
916 	 * have PG_ASM set, so the ASN doesn't really matter for
917 	 * the kernel pmap.  Also, since the kernel pmap always
918 	 * references kernel_lev1map, it always has an invalid ASN
919 	 * generation.
920 	 */
921 	memset(pmap_kernel(), 0, sizeof(struct pmap));
922 	pmap_kernel()->pm_lev1map = kernel_lev1map;
923 	pmap_kernel()->pm_count = 1;
924 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
925 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
926 		pmap_kernel()->pm_asni[i].pma_asngen =
927 		    pmap_asn_info[i].pma_asngen;
928 	}
929 	mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE);
930 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
931 
932 #if defined(MULTIPROCESSOR)
933 	/*
934 	 * Initialize the TLB shootdown queues.
935 	 */
936 	pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache,
937 	    sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE,
938 	     0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL);
939 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
940 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
941 		mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT,
942 		    IPL_SCHED);
943 	}
944 #endif
945 
946 	/*
947 	 * Set up lwp0's PCB such that the ptbr points to the right place
948 	 * and has the kernel pmap's (really unused) ASN.
949 	 */
950 	pcb = lwp_getpcb(&lwp0);
951 	pcb->pcb_hw.apcb_ptbr =
952 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
953 	pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn;
954 
955 	/*
956 	 * Mark the kernel pmap `active' on this processor.
957 	 */
958 	atomic_or_ulong(&pmap_kernel()->pm_cpus,
959 	    (1UL << cpu_number()));
960 }
961 
962 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
963 int
964 pmap_uses_prom_console(void)
965 {
966 
967 	return (cputype == ST_DEC_21000);
968 }
969 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */
970 
971 /*
972  * pmap_virtual_space:		[ INTERFACE ]
973  *
974  *	Define the initial bounds of the kernel virtual address space.
975  */
976 void
977 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
978 {
979 
980 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
981 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
982 }
983 
984 /*
985  * pmap_steal_memory:		[ INTERFACE ]
986  *
987  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
988  *	This function allows for early dynamic memory allocation until the
989  *	virtual memory system has been bootstrapped.  After that point, either
990  *	kmem_alloc or malloc should be used.  This function works by stealing
991  *	pages from the (to be) managed page pool, then implicitly mapping the
992  *	pages (by using their k0seg addresses) and zeroing them.
993  *
994  *	It may be used once the physical memory segments have been pre-loaded
995  *	into the vm_physmem[] array.  Early memory allocation MUST use this
996  *	interface!  This cannot be used after vm_page_startup(), and will
997  *	generate a panic if tried.
998  *
999  *	Note that this memory will never be freed, and in essence it is wired
1000  *	down.
1001  *
1002  *	We must adjust *vstartp and/or *vendp iff we use address space
1003  *	from the kernel virtual address range defined by pmap_virtual_space().
1004  *
1005  *	Note: no locking is necessary in this function.
1006  */
1007 vaddr_t
1008 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
1009 {
1010 	int npgs;
1011 	vaddr_t va;
1012 	paddr_t pa;
1013 
1014 	uvm_physseg_t bank;
1015 
1016 	size = round_page(size);
1017 	npgs = atop(size);
1018 
1019 #if 0
1020 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
1021 #endif
1022 
1023 	for (bank = uvm_physseg_get_first();
1024 	     uvm_physseg_valid_p(bank);
1025 	     bank = uvm_physseg_get_next(bank)) {
1026 		if (uvm.page_init_done == true)
1027 			panic("pmap_steal_memory: called _after_ bootstrap");
1028 
1029 #if 0
1030 		printf("     bank %d: avail_start 0x%"PRIxPADDR", start 0x%"PRIxPADDR", "
1031 		    "avail_end 0x%"PRIxPADDR"\n", bank, VM_PHYSMEM_PTR(bank)->avail_start,
1032 		    VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end);
1033 #endif
1034 
1035 		if (uvm_physseg_get_avail_start(bank) != uvm_physseg_get_start(bank) ||
1036 		    uvm_physseg_get_avail_start(bank) >= uvm_physseg_get_avail_end(bank))
1037 			continue;
1038 
1039 #if 0
1040 		printf("             avail_end - avail_start = 0x%"PRIxPADDR"\n",
1041 		    VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start);
1042 #endif
1043 
1044 		if (uvm_physseg_get_avail_end(bank) - uvm_physseg_get_avail_start(bank)
1045 		    < npgs)
1046 			continue;
1047 
1048 		/*
1049 		 * There are enough pages here; steal them!
1050 		 */
1051 		pa = ptoa(uvm_physseg_get_start(bank));
1052 		uvm_physseg_unplug(atop(pa), npgs);
1053 
1054 		va = ALPHA_PHYS_TO_K0SEG(pa);
1055 		memset((void *)va, 0, size);
1056 		pmap_pages_stolen += npgs;
1057 		return (va);
1058 	}
1059 
1060 	/*
1061 	 * If we got here, this was no memory left.
1062 	 */
1063 	panic("pmap_steal_memory: no memory to steal");
1064 }
1065 
1066 /*
1067  * pmap_init:			[ INTERFACE ]
1068  *
1069  *	Initialize the pmap module.  Called by vm_init(), to initialize any
1070  *	structures that the pmap system needs to map virtual memory.
1071  *
1072  *	Note: no locking is necessary in this function.
1073  */
1074 void
1075 pmap_init(void)
1076 {
1077 
1078 #ifdef DEBUG
1079 	if (pmapdebug & PDB_FOLLOW)
1080 	        printf("pmap_init()\n");
1081 #endif
1082 
1083 	/* initialize protection array */
1084 	alpha_protection_init();
1085 
1086 	/*
1087 	 * Set a low water mark on the pv_entry pool, so that we are
1088 	 * more likely to have these around even in extreme memory
1089 	 * starvation.
1090 	 */
1091 	pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat);
1092 
1093 	/*
1094 	 * Now it is safe to enable pv entry recording.
1095 	 */
1096 	pmap_initialized = true;
1097 
1098 #if 0
1099 	for (bank = 0; bank < vm_nphysseg; bank++) {
1100 		printf("bank %d\n", bank);
1101 		printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start));
1102 		printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end));
1103 		printf("\tavail_start = 0x%x\n",
1104 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_start));
1105 		printf("\tavail_end = 0x%x\n",
1106 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_end));
1107 	}
1108 #endif
1109 }
1110 
1111 /*
1112  * pmap_create:			[ INTERFACE ]
1113  *
1114  *	Create and return a physical map.
1115  *
1116  *	Note: no locking is necessary in this function.
1117  */
1118 pmap_t
1119 pmap_create(void)
1120 {
1121 	pmap_t pmap;
1122 	int i;
1123 
1124 #ifdef DEBUG
1125 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1126 		printf("pmap_create()\n");
1127 #endif
1128 
1129 	pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK);
1130 	memset(pmap, 0, sizeof(*pmap));
1131 
1132 	/*
1133 	 * Defer allocation of a new level 1 page table until
1134 	 * the first new mapping is entered; just take a reference
1135 	 * to the kernel kernel_lev1map.
1136 	 */
1137 	pmap->pm_lev1map = kernel_lev1map;
1138 
1139 	pmap->pm_count = 1;
1140 	for (i = 0; i < pmap_ncpuids; i++) {
1141 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1142 		/* XXX Locking? */
1143 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1144 	}
1145 	mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
1146 
1147  try_again:
1148 	rw_enter(&pmap_growkernel_lock, RW_READER);
1149 
1150 	if (pmap_lev1map_create(pmap, cpu_number()) != 0) {
1151 		rw_exit(&pmap_growkernel_lock);
1152 		(void) kpause("pmap_create", false, hz >> 2, NULL);
1153 		goto try_again;
1154 	}
1155 
1156 	mutex_enter(&pmap_all_pmaps_lock);
1157 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1158 	mutex_exit(&pmap_all_pmaps_lock);
1159 
1160 	rw_exit(&pmap_growkernel_lock);
1161 
1162 	return (pmap);
1163 }
1164 
1165 /*
1166  * pmap_destroy:		[ INTERFACE ]
1167  *
1168  *	Drop the reference count on the specified pmap, releasing
1169  *	all resources if the reference count drops to zero.
1170  */
1171 void
1172 pmap_destroy(pmap_t pmap)
1173 {
1174 
1175 #ifdef DEBUG
1176 	if (pmapdebug & PDB_FOLLOW)
1177 		printf("pmap_destroy(%p)\n", pmap);
1178 #endif
1179 
1180 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
1181 		return;
1182 
1183 	rw_enter(&pmap_growkernel_lock, RW_READER);
1184 
1185 	/*
1186 	 * Remove it from the global list of all pmaps.
1187 	 */
1188 	mutex_enter(&pmap_all_pmaps_lock);
1189 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1190 	mutex_exit(&pmap_all_pmaps_lock);
1191 
1192 	pmap_lev1map_destroy(pmap, cpu_number());
1193 
1194 	rw_exit(&pmap_growkernel_lock);
1195 
1196 	/*
1197 	 * Since the pmap is supposed to contain no valid
1198 	 * mappings at this point, we should always see
1199 	 * kernel_lev1map here.
1200 	 */
1201 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
1202 
1203 	mutex_destroy(&pmap->pm_lock);
1204 	pool_cache_put(&pmap_pmap_cache, pmap);
1205 }
1206 
1207 /*
1208  * pmap_reference:		[ INTERFACE ]
1209  *
1210  *	Add a reference to the specified pmap.
1211  */
1212 void
1213 pmap_reference(pmap_t pmap)
1214 {
1215 
1216 #ifdef DEBUG
1217 	if (pmapdebug & PDB_FOLLOW)
1218 		printf("pmap_reference(%p)\n", pmap);
1219 #endif
1220 
1221 	atomic_inc_uint(&pmap->pm_count);
1222 }
1223 
1224 /*
1225  * pmap_remove:			[ INTERFACE ]
1226  *
1227  *	Remove the given range of addresses from the specified map.
1228  *
1229  *	It is assumed that the start and end are properly
1230  *	rounded to the page size.
1231  */
1232 void
1233 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1234 {
1235 	pt_entry_t *l1pte, *l2pte, *l3pte;
1236 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1237 	vaddr_t l1eva, l2eva, vptva;
1238 	bool needisync = false;
1239 	long cpu_id = cpu_number();
1240 
1241 #ifdef DEBUG
1242 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1243 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1244 #endif
1245 
1246 	/*
1247 	 * If this is the kernel pmap, we can use a faster method
1248 	 * for accessing the PTEs (since the PT pages are always
1249 	 * resident).
1250 	 *
1251 	 * Note that this routine should NEVER be called from an
1252 	 * interrupt context; pmap_kremove() is used for that.
1253 	 */
1254 	if (pmap == pmap_kernel()) {
1255 		PMAP_MAP_TO_HEAD_LOCK();
1256 		PMAP_LOCK(pmap);
1257 
1258 		while (sva < eva) {
1259 			l3pte = PMAP_KERNEL_PTE(sva);
1260 			if (pmap_pte_v(l3pte)) {
1261 #ifdef DIAGNOSTIC
1262 				if (uvm_pageismanaged(pmap_pte_pa(l3pte)) &&
1263 				    pmap_pte_pv(l3pte) == 0)
1264 					panic("pmap_remove: managed page "
1265 					    "without PG_PVLIST for 0x%lx",
1266 					    sva);
1267 #endif
1268 				needisync |= pmap_remove_mapping(pmap, sva,
1269 				    l3pte, true, cpu_id);
1270 			}
1271 			sva += PAGE_SIZE;
1272 		}
1273 
1274 		PMAP_UNLOCK(pmap);
1275 		PMAP_MAP_TO_HEAD_UNLOCK();
1276 
1277 		if (needisync)
1278 			PMAP_SYNC_ISTREAM_KERNEL();
1279 		return;
1280 	}
1281 
1282 #ifdef DIAGNOSTIC
1283 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1284 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1285 		    "address range", sva, eva);
1286 #endif
1287 
1288 	PMAP_MAP_TO_HEAD_LOCK();
1289 	PMAP_LOCK(pmap);
1290 
1291 	/*
1292 	 * If we're already referencing the kernel_lev1map, there
1293 	 * is no work for us to do.
1294 	 */
1295 	if (pmap->pm_lev1map == kernel_lev1map)
1296 		goto out;
1297 
1298 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1299 
1300 	/*
1301 	 * Add a reference to the L1 table to it won't get
1302 	 * removed from under us.
1303 	 */
1304 	pmap_physpage_addref(saved_l1pte);
1305 
1306 	for (; sva < eva; sva = l1eva, l1pte++) {
1307 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1308 		if (pmap_pte_v(l1pte)) {
1309 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1310 
1311 			/*
1312 			 * Add a reference to the L2 table so it won't
1313 			 * get removed from under us.
1314 			 */
1315 			pmap_physpage_addref(saved_l2pte);
1316 
1317 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1318 				l2eva =
1319 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1320 				if (pmap_pte_v(l2pte)) {
1321 					saved_l3pte = l3pte =
1322 					    pmap_l3pte(pmap, sva, l2pte);
1323 
1324 					/*
1325 					 * Add a reference to the L3 table so
1326 					 * it won't get removed from under us.
1327 					 */
1328 					pmap_physpage_addref(saved_l3pte);
1329 
1330 					/*
1331 					 * Remember this sva; if the L3 table
1332 					 * gets removed, we need to invalidate
1333 					 * the VPT TLB entry for it.
1334 					 */
1335 					vptva = sva;
1336 
1337 					for (; sva < l2eva && sva < eva;
1338 					     sva += PAGE_SIZE, l3pte++) {
1339 						if (!pmap_pte_v(l3pte)) {
1340 							continue;
1341 						}
1342 						needisync |=
1343 						    pmap_remove_mapping(
1344 							pmap, sva,
1345 							l3pte, true,
1346 							cpu_id);
1347 					}
1348 
1349 					/*
1350 					 * Remove the reference to the L3
1351 					 * table that we added above.  This
1352 					 * may free the L3 table.
1353 					 */
1354 					pmap_l3pt_delref(pmap, vptva,
1355 					    saved_l3pte, cpu_id);
1356 				}
1357 			}
1358 
1359 			/*
1360 			 * Remove the reference to the L2 table that we
1361 			 * added above.  This may free the L2 table.
1362 			 */
1363 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id);
1364 		}
1365 	}
1366 
1367 	/*
1368 	 * Remove the reference to the L1 table that we added above.
1369 	 * This may free the L1 table.
1370 	 */
1371 	pmap_l1pt_delref(pmap, saved_l1pte, cpu_id);
1372 
1373 	if (needisync)
1374 		PMAP_SYNC_ISTREAM_USER(pmap);
1375 
1376  out:
1377 	PMAP_UNLOCK(pmap);
1378 	PMAP_MAP_TO_HEAD_UNLOCK();
1379 }
1380 
1381 /*
1382  * pmap_page_protect:		[ INTERFACE ]
1383  *
1384  *	Lower the permission for all mappings to a given page to
1385  *	the permissions specified.
1386  */
1387 void
1388 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1389 {
1390 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1391 	pmap_t pmap;
1392 	pv_entry_t pv, nextpv;
1393 	bool needkisync = false;
1394 	long cpu_id = cpu_number();
1395 	kmutex_t *lock;
1396 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1397 #ifdef DEBUG
1398 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1399 
1400 
1401 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1402 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1403 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1404 #endif
1405 
1406 	switch (prot) {
1407 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
1408 	case VM_PROT_READ|VM_PROT_WRITE:
1409 		return;
1410 
1411 	/* copy_on_write */
1412 	case VM_PROT_READ|VM_PROT_EXECUTE:
1413 	case VM_PROT_READ:
1414 		PMAP_HEAD_TO_MAP_LOCK();
1415 		lock = pmap_pvh_lock(pg);
1416 		mutex_enter(lock);
1417 		for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
1418 			PMAP_LOCK(pv->pv_pmap);
1419 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1420 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1421 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1422 				    pmap_pte_asm(pv->pv_pte),
1423 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1424 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1425 				    pmap_pte_asm(pv->pv_pte));
1426 			}
1427 			PMAP_UNLOCK(pv->pv_pmap);
1428 		}
1429 		mutex_exit(lock);
1430 		PMAP_HEAD_TO_MAP_UNLOCK();
1431 		PMAP_TLB_SHOOTNOW();
1432 		return;
1433 
1434 	/* remove_all */
1435 	default:
1436 		break;
1437 	}
1438 
1439 	PMAP_HEAD_TO_MAP_LOCK();
1440 	lock = pmap_pvh_lock(pg);
1441 	mutex_enter(lock);
1442 	for (pv = md->pvh_list; pv != NULL; pv = nextpv) {
1443 		nextpv = pv->pv_next;
1444 		pmap = pv->pv_pmap;
1445 
1446 		PMAP_LOCK(pmap);
1447 #ifdef DEBUG
1448 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1449 		    pmap_pte_pa(pv->pv_pte) != pa)
1450 			panic("pmap_page_protect: bad mapping");
1451 #endif
1452 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1453 		    false, cpu_id) == true) {
1454 			if (pmap == pmap_kernel())
1455 				needkisync |= true;
1456 			else
1457 				PMAP_SYNC_ISTREAM_USER(pmap);
1458 		}
1459 		PMAP_UNLOCK(pmap);
1460 	}
1461 
1462 	if (needkisync)
1463 		PMAP_SYNC_ISTREAM_KERNEL();
1464 
1465 	mutex_exit(lock);
1466 	PMAP_HEAD_TO_MAP_UNLOCK();
1467 }
1468 
1469 /*
1470  * pmap_protect:		[ INTERFACE ]
1471  *
1472  *	Set the physical protection on the specified range of this map
1473  *	as requested.
1474  */
1475 void
1476 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1477 {
1478 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1479 	bool isactive;
1480 	bool hadasm;
1481 	vaddr_t l1eva, l2eva;
1482 	long cpu_id = cpu_number();
1483 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1484 
1485 #ifdef DEBUG
1486 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1487 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1488 		    pmap, sva, eva, prot);
1489 #endif
1490 
1491 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1492 		pmap_remove(pmap, sva, eva);
1493 		return;
1494 	}
1495 
1496 	PMAP_LOCK(pmap);
1497 
1498 	bits = pte_prot(pmap, prot);
1499 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1500 
1501 	l1pte = pmap_l1pte(pmap, sva);
1502 	for (; sva < eva; sva = l1eva, l1pte++) {
1503 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1504 		if (pmap_pte_v(l1pte)) {
1505 			l2pte = pmap_l2pte(pmap, sva, l1pte);
1506 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1507 				l2eva =
1508 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1509 				if (pmap_pte_v(l2pte)) {
1510 					l3pte = pmap_l3pte(pmap, sva, l2pte);
1511 					for (; sva < l2eva && sva < eva;
1512 					     sva += PAGE_SIZE, l3pte++) {
1513 						if (pmap_pte_v(l3pte) &&
1514 						    pmap_pte_prot_chg(l3pte,
1515 						    bits)) {
1516 							hadasm =
1517 							   (pmap_pte_asm(l3pte)
1518 							    != 0);
1519 							pmap_pte_set_prot(l3pte,
1520 							   bits);
1521 							PMAP_INVALIDATE_TLB(
1522 							   pmap, sva, hadasm,
1523 							   isactive, cpu_id);
1524 							PMAP_TLB_SHOOTDOWN(
1525 							   pmap, sva,
1526 							   hadasm ? PG_ASM : 0);
1527 						}
1528 					}
1529 				}
1530 			}
1531 		}
1532 	}
1533 
1534 	PMAP_TLB_SHOOTNOW();
1535 
1536 	if (prot & VM_PROT_EXECUTE)
1537 		PMAP_SYNC_ISTREAM(pmap);
1538 
1539 	PMAP_UNLOCK(pmap);
1540 }
1541 
1542 /*
1543  * pmap_enter:			[ INTERFACE ]
1544  *
1545  *	Insert the given physical page (p) at
1546  *	the specified virtual address (v) in the
1547  *	target physical map with the protection requested.
1548  *
1549  *	If specified, the page will be wired down, meaning
1550  *	that the related pte can not be reclaimed.
1551  *
1552  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1553  *	or lose information.  That is, this routine must actually
1554  *	insert this page into the given map NOW.
1555  */
1556 int
1557 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1558 {
1559 	struct vm_page *pg;			/* if != NULL, managed page */
1560 	pt_entry_t *pte, npte, opte;
1561 	paddr_t opa;
1562 	bool tflush = true;
1563 	bool hadasm = false;	/* XXX gcc -Wuninitialized */
1564 	bool needisync = false;
1565 	bool setisync = false;
1566 	bool isactive;
1567 	bool wired;
1568 	long cpu_id = cpu_number();
1569 	int error = 0;
1570 	kmutex_t *lock;
1571 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1572 
1573 #ifdef DEBUG
1574 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1575 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1576 		       pmap, va, pa, prot, flags);
1577 #endif
1578 	pg = PHYS_TO_VM_PAGE(pa);
1579 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1580 	wired = (flags & PMAP_WIRED) != 0;
1581 
1582 	/*
1583 	 * Determine what we need to do about the I-stream.  If
1584 	 * VM_PROT_EXECUTE is set, we mark a user pmap as needing
1585 	 * an I-sync on the way back out to userspace.  We always
1586 	 * need an immediate I-sync for the kernel pmap.
1587 	 */
1588 	if (prot & VM_PROT_EXECUTE) {
1589 		if (pmap == pmap_kernel())
1590 			needisync = true;
1591 		else {
1592 			setisync = true;
1593 			needisync = (pmap->pm_cpus != 0);
1594 		}
1595 	}
1596 
1597 	PMAP_MAP_TO_HEAD_LOCK();
1598 	PMAP_LOCK(pmap);
1599 
1600 	if (pmap == pmap_kernel()) {
1601 #ifdef DIAGNOSTIC
1602 		/*
1603 		 * Sanity check the virtual address.
1604 		 */
1605 		if (va < VM_MIN_KERNEL_ADDRESS)
1606 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1607 #endif
1608 		pte = PMAP_KERNEL_PTE(va);
1609 	} else {
1610 		pt_entry_t *l1pte, *l2pte;
1611 
1612 #ifdef DIAGNOSTIC
1613 		/*
1614 		 * Sanity check the virtual address.
1615 		 */
1616 		if (va >= VM_MAXUSER_ADDRESS)
1617 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1618 #endif
1619 
1620 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
1621 
1622 		/*
1623 		 * Check to see if the level 1 PTE is valid, and
1624 		 * allocate a new level 2 page table page if it's not.
1625 		 * A reference will be added to the level 2 table when
1626 		 * the level 3 table is created.
1627 		 */
1628 		l1pte = pmap_l1pte(pmap, va);
1629 		if (pmap_pte_v(l1pte) == 0) {
1630 			pmap_physpage_addref(l1pte);
1631 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1632 			if (error) {
1633 				pmap_l1pt_delref(pmap, l1pte, cpu_id);
1634 				if (flags & PMAP_CANFAIL)
1635 					goto out;
1636 				panic("pmap_enter: unable to create L2 PT "
1637 				    "page");
1638 			}
1639 #ifdef DEBUG
1640 			if (pmapdebug & PDB_PTPAGE)
1641 				printf("pmap_enter: new level 2 table at "
1642 				    "0x%lx\n", pmap_pte_pa(l1pte));
1643 #endif
1644 		}
1645 
1646 		/*
1647 		 * Check to see if the level 2 PTE is valid, and
1648 		 * allocate a new level 3 page table page if it's not.
1649 		 * A reference will be added to the level 3 table when
1650 		 * the mapping is validated.
1651 		 */
1652 		l2pte = pmap_l2pte(pmap, va, l1pte);
1653 		if (pmap_pte_v(l2pte) == 0) {
1654 			pmap_physpage_addref(l2pte);
1655 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1656 			if (error) {
1657 				pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
1658 				if (flags & PMAP_CANFAIL)
1659 					goto out;
1660 				panic("pmap_enter: unable to create L3 PT "
1661 				    "page");
1662 			}
1663 #ifdef DEBUG
1664 			if (pmapdebug & PDB_PTPAGE)
1665 				printf("pmap_enter: new level 3 table at "
1666 				    "0x%lx\n", pmap_pte_pa(l2pte));
1667 #endif
1668 		}
1669 
1670 		/*
1671 		 * Get the PTE that will map the page.
1672 		 */
1673 		pte = pmap_l3pte(pmap, va, l2pte);
1674 	}
1675 
1676 	/* Remember all of the old PTE; used for TBI check later. */
1677 	opte = *pte;
1678 
1679 	/*
1680 	 * Check to see if the old mapping is valid.  If not, validate the
1681 	 * new one immediately.
1682 	 */
1683 	if (pmap_pte_v(pte) == 0) {
1684 		/*
1685 		 * No need to invalidate the TLB in this case; an invalid
1686 		 * mapping won't be in the TLB, and a previously valid
1687 		 * mapping would have been flushed when it was invalidated.
1688 		 */
1689 		tflush = false;
1690 
1691 		/*
1692 		 * No need to synchronize the I-stream, either, for basically
1693 		 * the same reason.
1694 		 */
1695 		setisync = needisync = false;
1696 
1697 		if (pmap != pmap_kernel()) {
1698 			/*
1699 			 * New mappings gain a reference on the level 3
1700 			 * table.
1701 			 */
1702 			pmap_physpage_addref(pte);
1703 		}
1704 		goto validate_enterpv;
1705 	}
1706 
1707 	opa = pmap_pte_pa(pte);
1708 	hadasm = (pmap_pte_asm(pte) != 0);
1709 
1710 	if (opa == pa) {
1711 		/*
1712 		 * Mapping has not changed; must be a protection or
1713 		 * wiring change.
1714 		 */
1715 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1716 #ifdef DEBUG
1717 			if (pmapdebug & PDB_ENTER)
1718 				printf("pmap_enter: wiring change -> %d\n",
1719 				    wired);
1720 #endif
1721 			/*
1722 			 * Adjust the wiring count.
1723 			 */
1724 			if (wired)
1725 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1726 			else
1727 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1728 		}
1729 
1730 		/*
1731 		 * Set the PTE.
1732 		 */
1733 		goto validate;
1734 	}
1735 
1736 	/*
1737 	 * The mapping has changed.  We need to invalidate the
1738 	 * old mapping before creating the new one.
1739 	 */
1740 #ifdef DEBUG
1741 	if (pmapdebug & PDB_ENTER)
1742 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1743 #endif
1744 	if (pmap != pmap_kernel()) {
1745 		/*
1746 		 * Gain an extra reference on the level 3 table.
1747 		 * pmap_remove_mapping() will delete a reference,
1748 		 * and we don't want the table to be erroneously
1749 		 * freed.
1750 		 */
1751 		pmap_physpage_addref(pte);
1752 	}
1753 	needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id);
1754 
1755  validate_enterpv:
1756 	/*
1757 	 * Enter the mapping into the pv_table if appropriate.
1758 	 */
1759 	if (pg != NULL) {
1760 		error = pmap_pv_enter(pmap, pg, va, pte, true);
1761 		if (error) {
1762 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1763 			if (flags & PMAP_CANFAIL)
1764 				goto out;
1765 			panic("pmap_enter: unable to enter mapping in PV "
1766 			    "table");
1767 		}
1768 	}
1769 
1770 	/*
1771 	 * Increment counters.
1772 	 */
1773 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1774 	if (wired)
1775 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1776 
1777  validate:
1778 	/*
1779 	 * Build the new PTE.
1780 	 */
1781 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1782 	if (pg != NULL) {
1783 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1784 		int attrs;
1785 
1786 #ifdef DIAGNOSTIC
1787 		if ((flags & VM_PROT_ALL) & ~prot)
1788 			panic("pmap_enter: access type exceeds prot");
1789 #endif
1790 		lock = pmap_pvh_lock(pg);
1791 		mutex_enter(lock);
1792 		if (flags & VM_PROT_WRITE)
1793 			md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
1794 		else if (flags & VM_PROT_ALL)
1795 			md->pvh_attrs |= PGA_REFERENCED;
1796 		attrs = md->pvh_attrs;
1797 		mutex_exit(lock);
1798 
1799 		/*
1800 		 * Set up referenced/modified emulation for new mapping.
1801 		 */
1802 		if ((attrs & PGA_REFERENCED) == 0)
1803 			npte |= PG_FOR | PG_FOW | PG_FOE;
1804 		else if ((attrs & PGA_MODIFIED) == 0)
1805 			npte |= PG_FOW;
1806 
1807 		/*
1808 		 * Mapping was entered on PV list.
1809 		 */
1810 		npte |= PG_PVLIST;
1811 	}
1812 	if (wired)
1813 		npte |= PG_WIRED;
1814 #ifdef DEBUG
1815 	if (pmapdebug & PDB_ENTER)
1816 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1817 #endif
1818 
1819 	/*
1820 	 * If the PALcode portion of the new PTE is the same as the
1821 	 * old PTE, no TBI is necessary.
1822 	 */
1823 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1824 		tflush = false;
1825 
1826 	/*
1827 	 * Set the new PTE.
1828 	 */
1829 	PMAP_SET_PTE(pte, npte);
1830 
1831 	/*
1832 	 * Invalidate the TLB entry for this VA and any appropriate
1833 	 * caches.
1834 	 */
1835 	if (tflush) {
1836 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1837 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1838 		PMAP_TLB_SHOOTNOW();
1839 	}
1840 	if (setisync)
1841 		PMAP_SET_NEEDISYNC(pmap);
1842 	if (needisync)
1843 		PMAP_SYNC_ISTREAM(pmap);
1844 
1845 out:
1846 	PMAP_UNLOCK(pmap);
1847 	PMAP_MAP_TO_HEAD_UNLOCK();
1848 
1849 	return error;
1850 }
1851 
1852 /*
1853  * pmap_kenter_pa:		[ INTERFACE ]
1854  *
1855  *	Enter a va -> pa mapping into the kernel pmap without any
1856  *	physical->virtual tracking.
1857  *
1858  *	Note: no locking is necessary in this function.
1859  */
1860 void
1861 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1862 {
1863 	pt_entry_t *pte, npte;
1864 	long cpu_id = cpu_number();
1865 	bool needisync = false;
1866 	pmap_t pmap = pmap_kernel();
1867 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1868 
1869 #ifdef DEBUG
1870 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1871 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1872 		    va, pa, prot);
1873 #endif
1874 
1875 #ifdef DIAGNOSTIC
1876 	/*
1877 	 * Sanity check the virtual address.
1878 	 */
1879 	if (va < VM_MIN_KERNEL_ADDRESS)
1880 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1881 #endif
1882 
1883 	pte = PMAP_KERNEL_PTE(va);
1884 
1885 	if (pmap_pte_v(pte) == 0)
1886 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1887 	if (pmap_pte_w(pte) == 0)
1888 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1889 
1890 	if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte))
1891 		needisync = true;
1892 
1893 	/*
1894 	 * Build the new PTE.
1895 	 */
1896 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1897 	    PG_V | PG_WIRED;
1898 
1899 	/*
1900 	 * Set the new PTE.
1901 	 */
1902 	PMAP_SET_PTE(pte, npte);
1903 #if defined(MULTIPROCESSOR)
1904 	alpha_mb();		/* XXX alpha_wmb()? */
1905 #endif
1906 
1907 	/*
1908 	 * Invalidate the TLB entry for this VA and any appropriate
1909 	 * caches.
1910 	 */
1911 	PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1912 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1913 	PMAP_TLB_SHOOTNOW();
1914 
1915 	if (needisync)
1916 		PMAP_SYNC_ISTREAM_KERNEL();
1917 }
1918 
1919 /*
1920  * pmap_kremove:		[ INTERFACE ]
1921  *
1922  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1923  *	for size bytes (assumed to be page rounded).
1924  */
1925 void
1926 pmap_kremove(vaddr_t va, vsize_t size)
1927 {
1928 	pt_entry_t *pte;
1929 	bool needisync = false;
1930 	long cpu_id = cpu_number();
1931 	pmap_t pmap = pmap_kernel();
1932 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1933 
1934 #ifdef DEBUG
1935 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1936 		printf("pmap_kremove(%lx, %lx)\n",
1937 		    va, size);
1938 #endif
1939 
1940 #ifdef DIAGNOSTIC
1941 	if (va < VM_MIN_KERNEL_ADDRESS)
1942 		panic("pmap_kremove: user address");
1943 #endif
1944 
1945 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1946 		pte = PMAP_KERNEL_PTE(va);
1947 		if (pmap_pte_v(pte)) {
1948 #ifdef DIAGNOSTIC
1949 			if (pmap_pte_pv(pte))
1950 				panic("pmap_kremove: PG_PVLIST mapping for "
1951 				    "0x%lx", va);
1952 #endif
1953 			if (pmap_pte_exec(pte))
1954 				needisync = true;
1955 
1956 			/* Zap the mapping. */
1957 			PMAP_SET_PTE(pte, PG_NV);
1958 #if defined(MULTIPROCESSOR)
1959 			alpha_mb();		/* XXX alpha_wmb()? */
1960 #endif
1961 			PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1962 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1963 
1964 			/* Update stats. */
1965 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1966 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1967 		}
1968 	}
1969 
1970 	PMAP_TLB_SHOOTNOW();
1971 
1972 	if (needisync)
1973 		PMAP_SYNC_ISTREAM_KERNEL();
1974 }
1975 
1976 /*
1977  * pmap_unwire:			[ INTERFACE ]
1978  *
1979  *	Clear the wired attribute for a map/virtual-address pair.
1980  *
1981  *	The mapping must already exist in the pmap.
1982  */
1983 void
1984 pmap_unwire(pmap_t pmap, vaddr_t va)
1985 {
1986 	pt_entry_t *pte;
1987 
1988 #ifdef DEBUG
1989 	if (pmapdebug & PDB_FOLLOW)
1990 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
1991 #endif
1992 
1993 	PMAP_LOCK(pmap);
1994 
1995 	pte = pmap_l3pte(pmap, va, NULL);
1996 #ifdef DIAGNOSTIC
1997 	if (pte == NULL || pmap_pte_v(pte) == 0)
1998 		panic("pmap_unwire");
1999 #endif
2000 
2001 	/*
2002 	 * If wiring actually changed (always?) clear the wire bit and
2003 	 * update the wire count.  Note that wiring is not a hardware
2004 	 * characteristic so there is no need to invalidate the TLB.
2005 	 */
2006 	if (pmap_pte_w_chg(pte, 0)) {
2007 		pmap_pte_set_w(pte, false);
2008 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2009 	}
2010 #ifdef DIAGNOSTIC
2011 	else {
2012 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2013 		    "didn't change!\n", pmap, va);
2014 	}
2015 #endif
2016 
2017 	PMAP_UNLOCK(pmap);
2018 }
2019 
2020 /*
2021  * pmap_extract:		[ INTERFACE ]
2022  *
2023  *	Extract the physical address associated with the given
2024  *	pmap/virtual address pair.
2025  */
2026 bool
2027 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2028 {
2029 	pt_entry_t *l1pte, *l2pte, *l3pte;
2030 	paddr_t pa;
2031 
2032 #ifdef DEBUG
2033 	if (pmapdebug & PDB_FOLLOW)
2034 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
2035 #endif
2036 
2037 	/*
2038 	 * Take a faster path for the kernel pmap.  Avoids locking,
2039 	 * handles K0SEG.
2040 	 */
2041 	if (pmap == pmap_kernel()) {
2042 		pa = vtophys(va);
2043 		if (pap != NULL)
2044 			*pap = pa;
2045 #ifdef DEBUG
2046 		if (pmapdebug & PDB_FOLLOW)
2047 			printf("0x%lx (kernel vtophys)\n", pa);
2048 #endif
2049 		return (pa != 0);	/* XXX */
2050 	}
2051 
2052 	PMAP_LOCK(pmap);
2053 
2054 	l1pte = pmap_l1pte(pmap, va);
2055 	if (pmap_pte_v(l1pte) == 0)
2056 		goto out;
2057 
2058 	l2pte = pmap_l2pte(pmap, va, l1pte);
2059 	if (pmap_pte_v(l2pte) == 0)
2060 		goto out;
2061 
2062 	l3pte = pmap_l3pte(pmap, va, l2pte);
2063 	if (pmap_pte_v(l3pte) == 0)
2064 		goto out;
2065 
2066 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2067 	PMAP_UNLOCK(pmap);
2068 	if (pap != NULL)
2069 		*pap = pa;
2070 #ifdef DEBUG
2071 	if (pmapdebug & PDB_FOLLOW)
2072 		printf("0x%lx\n", pa);
2073 #endif
2074 	return (true);
2075 
2076  out:
2077 	PMAP_UNLOCK(pmap);
2078 #ifdef DEBUG
2079 	if (pmapdebug & PDB_FOLLOW)
2080 		printf("failed\n");
2081 #endif
2082 	return (false);
2083 }
2084 
2085 /*
2086  * pmap_copy:			[ INTERFACE ]
2087  *
2088  *	Copy the mapping range specified by src_addr/len
2089  *	from the source map to the range dst_addr/len
2090  *	in the destination map.
2091  *
2092  *	This routine is only advisory and need not do anything.
2093  */
2094 /* call deleted in <machine/pmap.h> */
2095 
2096 /*
2097  * pmap_update:			[ INTERFACE ]
2098  *
2099  *	Require that all active physical maps contain no
2100  *	incorrect entries NOW, by processing any deferred
2101  *	pmap operations.
2102  */
2103 /* call deleted in <machine/pmap.h> */
2104 
2105 /*
2106  * pmap_activate:		[ INTERFACE ]
2107  *
2108  *	Activate the pmap used by the specified process.  This includes
2109  *	reloading the MMU context if the current process, and marking
2110  *	the pmap in use by the processor.
2111  */
2112 void
2113 pmap_activate(struct lwp *l)
2114 {
2115 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2116 	long cpu_id = cpu_number();
2117 
2118 #ifdef DEBUG
2119 	if (pmapdebug & PDB_FOLLOW)
2120 		printf("pmap_activate(%p)\n", l);
2121 #endif
2122 
2123 	/* Mark the pmap in use by this processor. */
2124 	atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2125 
2126 	/* Allocate an ASN. */
2127 	pmap_asn_alloc(pmap, cpu_id);
2128 
2129 	PMAP_ACTIVATE(pmap, l, cpu_id);
2130 }
2131 
2132 /*
2133  * pmap_deactivate:		[ INTERFACE ]
2134  *
2135  *	Mark that the pmap used by the specified process is no longer
2136  *	in use by the processor.
2137  *
2138  *	The comment above pmap_activate() wrt. locking applies here,
2139  *	as well.  Note that we use only a single `atomic' operation,
2140  *	so no locking is necessary.
2141  */
2142 void
2143 pmap_deactivate(struct lwp *l)
2144 {
2145 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2146 
2147 #ifdef DEBUG
2148 	if (pmapdebug & PDB_FOLLOW)
2149 		printf("pmap_deactivate(%p)\n", l);
2150 #endif
2151 
2152 	/*
2153 	 * Mark the pmap no longer in use by this processor.
2154 	 */
2155 	atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number()));
2156 }
2157 
2158 /*
2159  * pmap_zero_page:		[ INTERFACE ]
2160  *
2161  *	Zero the specified (machine independent) page by mapping the page
2162  *	into virtual memory and clear its contents, one machine dependent
2163  *	page at a time.
2164  *
2165  *	Note: no locking is necessary in this function.
2166  */
2167 void
2168 pmap_zero_page(paddr_t phys)
2169 {
2170 	u_long *p0, *p1, *pend;
2171 
2172 #ifdef DEBUG
2173 	if (pmapdebug & PDB_FOLLOW)
2174 		printf("pmap_zero_page(%lx)\n", phys);
2175 #endif
2176 
2177 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2178 	p1 = NULL;
2179 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2180 
2181 	/*
2182 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2183 	 * Do only 8 back-to-back stores, and alternate registers.
2184 	 */
2185 	do {
2186 		__asm volatile(
2187 		"# BEGIN loop body\n"
2188 		"	addq	%2, (8 * 8), %1		\n"
2189 		"	stq	$31, (0 * 8)(%0)	\n"
2190 		"	stq	$31, (1 * 8)(%0)	\n"
2191 		"	stq	$31, (2 * 8)(%0)	\n"
2192 		"	stq	$31, (3 * 8)(%0)	\n"
2193 		"	stq	$31, (4 * 8)(%0)	\n"
2194 		"	stq	$31, (5 * 8)(%0)	\n"
2195 		"	stq	$31, (6 * 8)(%0)	\n"
2196 		"	stq	$31, (7 * 8)(%0)	\n"
2197 		"					\n"
2198 		"	addq	%3, (8 * 8), %0		\n"
2199 		"	stq	$31, (0 * 8)(%1)	\n"
2200 		"	stq	$31, (1 * 8)(%1)	\n"
2201 		"	stq	$31, (2 * 8)(%1)	\n"
2202 		"	stq	$31, (3 * 8)(%1)	\n"
2203 		"	stq	$31, (4 * 8)(%1)	\n"
2204 		"	stq	$31, (5 * 8)(%1)	\n"
2205 		"	stq	$31, (6 * 8)(%1)	\n"
2206 		"	stq	$31, (7 * 8)(%1)	\n"
2207 		"	# END loop body"
2208 		: "=r" (p0), "=r" (p1)
2209 		: "0" (p0), "1" (p1)
2210 		: "memory");
2211 	} while (p0 < pend);
2212 }
2213 
2214 /*
2215  * pmap_copy_page:		[ INTERFACE ]
2216  *
2217  *	Copy the specified (machine independent) page by mapping the page
2218  *	into virtual memory and using memcpy to copy the page, one machine
2219  *	dependent page at a time.
2220  *
2221  *	Note: no locking is necessary in this function.
2222  */
2223 void
2224 pmap_copy_page(paddr_t src, paddr_t dst)
2225 {
2226 	const void *s;
2227 	void *d;
2228 
2229 #ifdef DEBUG
2230 	if (pmapdebug & PDB_FOLLOW)
2231 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2232 #endif
2233 	s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
2234 	d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
2235 	memcpy(d, s, PAGE_SIZE);
2236 }
2237 
2238 /*
2239  * pmap_pageidlezero:		[ INTERFACE ]
2240  *
2241  *	Page zero'er for the idle loop.  Returns true if the
2242  *	page was zero'd, FLASE if we aborted for some reason.
2243  */
2244 bool
2245 pmap_pageidlezero(paddr_t pa)
2246 {
2247 	u_long *ptr;
2248 	int i, cnt = PAGE_SIZE / sizeof(u_long);
2249 
2250 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
2251 		if (sched_curcpu_runnable_p()) {
2252 			/*
2253 			 * An LWP has become ready.  Abort now,
2254 			 * so we don't keep it waiting while we
2255 			 * finish zeroing the page.
2256 			 */
2257 			return (false);
2258 		}
2259 		*ptr++ = 0;
2260 	}
2261 
2262 	return (true);
2263 }
2264 
2265 /*
2266  * pmap_clear_modify:		[ INTERFACE ]
2267  *
2268  *	Clear the modify bits on the specified physical page.
2269  */
2270 bool
2271 pmap_clear_modify(struct vm_page *pg)
2272 {
2273 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2274 	bool rv = false;
2275 	long cpu_id = cpu_number();
2276 	kmutex_t *lock;
2277 
2278 #ifdef DEBUG
2279 	if (pmapdebug & PDB_FOLLOW)
2280 		printf("pmap_clear_modify(%p)\n", pg);
2281 #endif
2282 
2283 	PMAP_HEAD_TO_MAP_LOCK();
2284 	lock = pmap_pvh_lock(pg);
2285 	mutex_enter(lock);
2286 
2287 	if (md->pvh_attrs & PGA_MODIFIED) {
2288 		rv = true;
2289 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2290 		md->pvh_attrs &= ~PGA_MODIFIED;
2291 	}
2292 
2293 	mutex_exit(lock);
2294 	PMAP_HEAD_TO_MAP_UNLOCK();
2295 
2296 	return (rv);
2297 }
2298 
2299 /*
2300  * pmap_clear_reference:	[ INTERFACE ]
2301  *
2302  *	Clear the reference bit on the specified physical page.
2303  */
2304 bool
2305 pmap_clear_reference(struct vm_page *pg)
2306 {
2307 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2308 	bool rv = false;
2309 	long cpu_id = cpu_number();
2310 	kmutex_t *lock;
2311 
2312 #ifdef DEBUG
2313 	if (pmapdebug & PDB_FOLLOW)
2314 		printf("pmap_clear_reference(%p)\n", pg);
2315 #endif
2316 
2317 	PMAP_HEAD_TO_MAP_LOCK();
2318 	lock = pmap_pvh_lock(pg);
2319 	mutex_enter(lock);
2320 
2321 	if (md->pvh_attrs & PGA_REFERENCED) {
2322 		rv = true;
2323 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2324 		md->pvh_attrs &= ~PGA_REFERENCED;
2325 	}
2326 
2327 	mutex_exit(lock);
2328 	PMAP_HEAD_TO_MAP_UNLOCK();
2329 
2330 	return (rv);
2331 }
2332 
2333 /*
2334  * pmap_is_referenced:		[ INTERFACE ]
2335  *
2336  *	Return whether or not the specified physical page is referenced
2337  *	by any physical maps.
2338  */
2339 /* See <machine/pmap.h> */
2340 
2341 /*
2342  * pmap_is_modified:		[ INTERFACE ]
2343  *
2344  *	Return whether or not the specified physical page is modified
2345  *	by any physical maps.
2346  */
2347 /* See <machine/pmap.h> */
2348 
2349 /*
2350  * pmap_phys_address:		[ INTERFACE ]
2351  *
2352  *	Return the physical address corresponding to the specified
2353  *	cookie.  Used by the device pager to decode a device driver's
2354  *	mmap entry point return value.
2355  *
2356  *	Note: no locking is necessary in this function.
2357  */
2358 paddr_t
2359 pmap_phys_address(paddr_t ppn)
2360 {
2361 
2362 	return (alpha_ptob(ppn));
2363 }
2364 
2365 /*
2366  * Miscellaneous support routines follow
2367  */
2368 
2369 /*
2370  * alpha_protection_init:
2371  *
2372  *	Initialize Alpha protection code array.
2373  *
2374  *	Note: no locking is necessary in this function.
2375  */
2376 static void
2377 alpha_protection_init(void)
2378 {
2379 	int prot, *kp, *up;
2380 
2381 	kp = protection_codes[0];
2382 	up = protection_codes[1];
2383 
2384 	for (prot = 0; prot < 8; prot++) {
2385 		kp[prot] = PG_ASM;
2386 		up[prot] = 0;
2387 
2388 		if (prot & VM_PROT_READ) {
2389 			kp[prot] |= PG_KRE;
2390 			up[prot] |= PG_KRE | PG_URE;
2391 		}
2392 		if (prot & VM_PROT_WRITE) {
2393 			kp[prot] |= PG_KWE;
2394 			up[prot] |= PG_KWE | PG_UWE;
2395 		}
2396 		if (prot & VM_PROT_EXECUTE) {
2397 			kp[prot] |= PG_EXEC | PG_KRE;
2398 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2399 		} else {
2400 			kp[prot] |= PG_FOE;
2401 			up[prot] |= PG_FOE;
2402 		}
2403 	}
2404 }
2405 
2406 /*
2407  * pmap_remove_mapping:
2408  *
2409  *	Invalidate a single page denoted by pmap/va.
2410  *
2411  *	If (pte != NULL), it is the already computed PTE for the page.
2412  *
2413  *	Note: locking in this function is complicated by the fact
2414  *	that we can be called when the PV list is already locked.
2415  *	(pmap_page_protect()).  In this case, the caller must be
2416  *	careful to get the next PV entry while we remove this entry
2417  *	from beneath it.  We assume that the pmap itself is already
2418  *	locked; dolock applies only to the PV list.
2419  *
2420  *	Returns true or false, indicating if an I-stream sync needs
2421  *	to be initiated (for this CPU or for other CPUs).
2422  */
2423 static bool
2424 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2425     bool dolock, long cpu_id)
2426 {
2427 	paddr_t pa;
2428 	struct vm_page *pg;		/* if != NULL, page is managed */
2429 	bool onpv;
2430 	bool hadasm;
2431 	bool isactive;
2432 	bool needisync = false;
2433 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2434 
2435 #ifdef DEBUG
2436 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2437 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2438 		       pmap, va, pte, dolock, cpu_id);
2439 #endif
2440 
2441 	/*
2442 	 * PTE not provided, compute it from pmap and va.
2443 	 */
2444 	if (pte == NULL) {
2445 		pte = pmap_l3pte(pmap, va, NULL);
2446 		if (pmap_pte_v(pte) == 0)
2447 			return (false);
2448 	}
2449 
2450 	pa = pmap_pte_pa(pte);
2451 	onpv = (pmap_pte_pv(pte) != 0);
2452 	hadasm = (pmap_pte_asm(pte) != 0);
2453 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2454 
2455 	/*
2456 	 * Determine what we need to do about the I-stream.  If
2457 	 * PG_EXEC was set, we mark a user pmap as needing an
2458 	 * I-sync on the way out to userspace.  We always need
2459 	 * an immediate I-sync for the kernel pmap.
2460 	 */
2461 	if (pmap_pte_exec(pte)) {
2462 		if (pmap == pmap_kernel())
2463 			needisync = true;
2464 		else {
2465 			PMAP_SET_NEEDISYNC(pmap);
2466 			needisync = (pmap->pm_cpus != 0);
2467 		}
2468 	}
2469 
2470 	/*
2471 	 * Update statistics
2472 	 */
2473 	if (pmap_pte_w(pte))
2474 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2475 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2476 
2477 	/*
2478 	 * Invalidate the PTE after saving the reference modify info.
2479 	 */
2480 #ifdef DEBUG
2481 	if (pmapdebug & PDB_REMOVE)
2482 		printf("remove: invalidating pte at %p\n", pte);
2483 #endif
2484 	PMAP_SET_PTE(pte, PG_NV);
2485 
2486 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2487 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2488 	PMAP_TLB_SHOOTNOW();
2489 
2490 	/*
2491 	 * If we're removing a user mapping, check to see if we
2492 	 * can free page table pages.
2493 	 */
2494 	if (pmap != pmap_kernel()) {
2495 		/*
2496 		 * Delete the reference on the level 3 table.  It will
2497 		 * delete references on the level 2 and 1 tables as
2498 		 * appropriate.
2499 		 */
2500 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2501 	}
2502 
2503 	/*
2504 	 * If the mapping wasn't entered on the PV list, we're all done.
2505 	 */
2506 	if (onpv == false)
2507 		return (needisync);
2508 
2509 	/*
2510 	 * Remove it from the PV table.
2511 	 */
2512 	pg = PHYS_TO_VM_PAGE(pa);
2513 	KASSERT(pg != NULL);
2514 	pmap_pv_remove(pmap, pg, va, dolock);
2515 
2516 	return (needisync);
2517 }
2518 
2519 /*
2520  * pmap_changebit:
2521  *
2522  *	Set or clear the specified PTE bits for all mappings on the
2523  *	specified page.
2524  *
2525  *	Note: we assume that the pv_head is already locked, and that
2526  *	the caller has acquired a PV->pmap mutex so that we can lock
2527  *	the pmaps as we encounter them.
2528  */
2529 static void
2530 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id)
2531 {
2532 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2533 	pv_entry_t pv;
2534 	pt_entry_t *pte, npte;
2535 	vaddr_t va;
2536 	bool hadasm, isactive;
2537 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2538 
2539 #ifdef DEBUG
2540 	if (pmapdebug & PDB_BITS)
2541 		printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n",
2542 		    pg, set, mask);
2543 #endif
2544 
2545 	/*
2546 	 * Loop over all current mappings setting/clearing as apropos.
2547 	 */
2548 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2549 		va = pv->pv_va;
2550 
2551 		PMAP_LOCK(pv->pv_pmap);
2552 
2553 		pte = pv->pv_pte;
2554 		npte = (*pte | set) & mask;
2555 		if (*pte != npte) {
2556 			hadasm = (pmap_pte_asm(pte) != 0);
2557 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2558 			PMAP_SET_PTE(pte, npte);
2559 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2560 			    cpu_id);
2561 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2562 			    hadasm ? PG_ASM : 0);
2563 		}
2564 		PMAP_UNLOCK(pv->pv_pmap);
2565 	}
2566 
2567 	PMAP_TLB_SHOOTNOW();
2568 }
2569 
2570 /*
2571  * pmap_emulate_reference:
2572  *
2573  *	Emulate reference and/or modified bit hits.
2574  *	Return 1 if this was an execute fault on a non-exec mapping,
2575  *	otherwise return 0.
2576  */
2577 int
2578 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
2579 {
2580 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2581 	pt_entry_t faultoff, *pte;
2582 	struct vm_page *pg;
2583 	paddr_t pa;
2584 	bool didlock = false;
2585 	bool exec = false;
2586 	long cpu_id = cpu_number();
2587 	kmutex_t *lock;
2588 
2589 #ifdef DEBUG
2590 	if (pmapdebug & PDB_FOLLOW)
2591 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2592 		    l, v, user, type);
2593 #endif
2594 
2595 	/*
2596 	 * Convert process and virtual address to physical address.
2597 	 */
2598 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2599 		if (user)
2600 			panic("pmap_emulate_reference: user ref to kernel");
2601 		/*
2602 		 * No need to lock here; kernel PT pages never go away.
2603 		 */
2604 		pte = PMAP_KERNEL_PTE(v);
2605 	} else {
2606 #ifdef DIAGNOSTIC
2607 		if (l == NULL)
2608 			panic("pmap_emulate_reference: bad proc");
2609 		if (l->l_proc->p_vmspace == NULL)
2610 			panic("pmap_emulate_reference: bad p_vmspace");
2611 #endif
2612 		PMAP_LOCK(pmap);
2613 		didlock = true;
2614 		pte = pmap_l3pte(pmap, v, NULL);
2615 		/*
2616 		 * We'll unlock below where we're done with the PTE.
2617 		 */
2618 	}
2619 	exec = pmap_pte_exec(pte);
2620 	if (!exec && type == ALPHA_MMCSR_FOE) {
2621 		if (didlock)
2622 			PMAP_UNLOCK(pmap);
2623 	       return (1);
2624 	}
2625 #ifdef DEBUG
2626 	if (pmapdebug & PDB_FOLLOW) {
2627 		printf("\tpte = %p, ", pte);
2628 		printf("*pte = 0x%lx\n", *pte);
2629 	}
2630 #endif
2631 #ifdef DEBUG				/* These checks are more expensive */
2632 	if (!pmap_pte_v(pte))
2633 		panic("pmap_emulate_reference: invalid pte");
2634 	if (type == ALPHA_MMCSR_FOW) {
2635 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
2636 			panic("pmap_emulate_reference: write but unwritable");
2637 		if (!(*pte & PG_FOW))
2638 			panic("pmap_emulate_reference: write but not FOW");
2639 	} else {
2640 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
2641 			panic("pmap_emulate_reference: !write but unreadable");
2642 		if (!(*pte & (PG_FOR | PG_FOE)))
2643 			panic("pmap_emulate_reference: !write but not FOR|FOE");
2644 	}
2645 	/* Other diagnostics? */
2646 #endif
2647 	pa = pmap_pte_pa(pte);
2648 
2649 	/*
2650 	 * We're now done with the PTE.  If it was a user pmap, unlock
2651 	 * it now.
2652 	 */
2653 	if (didlock)
2654 		PMAP_UNLOCK(pmap);
2655 
2656 #ifdef DEBUG
2657 	if (pmapdebug & PDB_FOLLOW)
2658 		printf("\tpa = 0x%lx\n", pa);
2659 #endif
2660 #ifdef DIAGNOSTIC
2661 	if (!uvm_pageismanaged(pa))
2662 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2663 		      "pa 0x%lx not managed", l, v, user, type, pa);
2664 #endif
2665 
2666 	/*
2667 	 * Twiddle the appropriate bits to reflect the reference
2668 	 * and/or modification..
2669 	 *
2670 	 * The rules:
2671 	 * 	(1) always mark page as used, and
2672 	 *	(2) if it was a write fault, mark page as modified.
2673 	 */
2674 	pg = PHYS_TO_VM_PAGE(pa);
2675 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2676 
2677 	PMAP_HEAD_TO_MAP_LOCK();
2678 	lock = pmap_pvh_lock(pg);
2679 	mutex_enter(lock);
2680 
2681 	if (type == ALPHA_MMCSR_FOW) {
2682 		md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
2683 		faultoff = PG_FOR | PG_FOW;
2684 	} else {
2685 		md->pvh_attrs |= PGA_REFERENCED;
2686 		faultoff = PG_FOR;
2687 		if (exec) {
2688 			faultoff |= PG_FOE;
2689 		}
2690 	}
2691 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2692 
2693 	mutex_exit(lock);
2694 	PMAP_HEAD_TO_MAP_UNLOCK();
2695 	return (0);
2696 }
2697 
2698 #ifdef DEBUG
2699 /*
2700  * pmap_pv_dump:
2701  *
2702  *	Dump the physical->virtual data for the specified page.
2703  */
2704 void
2705 pmap_pv_dump(paddr_t pa)
2706 {
2707 	struct vm_page *pg;
2708 	struct vm_page_md *md;
2709 	pv_entry_t pv;
2710 	kmutex_t *lock;
2711 
2712 	pg = PHYS_TO_VM_PAGE(pa);
2713 	md = VM_PAGE_TO_MD(pg);
2714 
2715 	lock = pmap_pvh_lock(pg);
2716 	mutex_enter(lock);
2717 
2718 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs);
2719 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next)
2720 		printf("     pmap %p, va 0x%lx\n",
2721 		    pv->pv_pmap, pv->pv_va);
2722 	printf("\n");
2723 
2724 	mutex_exit(lock);
2725 }
2726 #endif
2727 
2728 /*
2729  * vtophys:
2730  *
2731  *	Return the physical address corresponding to the K0SEG or
2732  *	K1SEG address provided.
2733  *
2734  *	Note: no locking is necessary in this function.
2735  */
2736 paddr_t
2737 vtophys(vaddr_t vaddr)
2738 {
2739 	pt_entry_t *pte;
2740 	paddr_t paddr = 0;
2741 
2742 	if (vaddr < ALPHA_K0SEG_BASE)
2743 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2744 	else if (vaddr <= ALPHA_K0SEG_END)
2745 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2746 	else {
2747 		pte = PMAP_KERNEL_PTE(vaddr);
2748 		if (pmap_pte_v(pte))
2749 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2750 	}
2751 
2752 #if 0
2753 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2754 #endif
2755 
2756 	return (paddr);
2757 }
2758 
2759 /******************** pv_entry management ********************/
2760 
2761 /*
2762  * pmap_pv_enter:
2763  *
2764  *	Add a physical->virtual entry to the pv_table.
2765  */
2766 static int
2767 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2768     bool dolock)
2769 {
2770 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2771 	pv_entry_t newpv;
2772 	kmutex_t *lock;
2773 
2774 	/*
2775 	 * Allocate and fill in the new pv_entry.
2776 	 */
2777 	newpv = pmap_pv_alloc();
2778 	if (newpv == NULL)
2779 		return ENOMEM;
2780 	newpv->pv_va = va;
2781 	newpv->pv_pmap = pmap;
2782 	newpv->pv_pte = pte;
2783 
2784 	if (dolock) {
2785 		lock = pmap_pvh_lock(pg);
2786 		mutex_enter(lock);
2787 	}
2788 
2789 #ifdef DEBUG
2790     {
2791 	pv_entry_t pv;
2792 	/*
2793 	 * Make sure the entry doesn't already exist.
2794 	 */
2795 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2796 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2797 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2798 			panic("pmap_pv_enter: already in pv table");
2799 		}
2800 	}
2801     }
2802 #endif
2803 
2804 	/*
2805 	 * ...and put it in the list.
2806 	 */
2807 	newpv->pv_next = md->pvh_list;
2808 	md->pvh_list = newpv;
2809 
2810 	if (dolock) {
2811 		mutex_exit(lock);
2812 	}
2813 
2814 	return 0;
2815 }
2816 
2817 /*
2818  * pmap_pv_remove:
2819  *
2820  *	Remove a physical->virtual entry from the pv_table.
2821  */
2822 static void
2823 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
2824 {
2825 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2826 	pv_entry_t pv, *pvp;
2827 	kmutex_t *lock;
2828 
2829 	if (dolock) {
2830 		lock = pmap_pvh_lock(pg);
2831 		mutex_enter(lock);
2832 	} else {
2833 		lock = NULL; /* XXX stupid gcc */
2834 	}
2835 
2836 	/*
2837 	 * Find the entry to remove.
2838 	 */
2839 	for (pvp = &md->pvh_list, pv = *pvp;
2840 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2841 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2842 			break;
2843 
2844 #ifdef DEBUG
2845 	if (pv == NULL)
2846 		panic("pmap_pv_remove: not in pv table");
2847 #endif
2848 
2849 	*pvp = pv->pv_next;
2850 
2851 	if (dolock) {
2852 		mutex_exit(lock);
2853 	}
2854 
2855 	pmap_pv_free(pv);
2856 }
2857 
2858 /*
2859  * pmap_pv_page_alloc:
2860  *
2861  *	Allocate a page for the pv_entry pool.
2862  */
2863 static void *
2864 pmap_pv_page_alloc(struct pool *pp, int flags)
2865 {
2866 	paddr_t pg;
2867 
2868 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2869 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2870 	return (NULL);
2871 }
2872 
2873 /*
2874  * pmap_pv_page_free:
2875  *
2876  *	Free a pv_entry pool page.
2877  */
2878 static void
2879 pmap_pv_page_free(struct pool *pp, void *v)
2880 {
2881 
2882 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2883 }
2884 
2885 /******************** misc. functions ********************/
2886 
2887 /*
2888  * pmap_physpage_alloc:
2889  *
2890  *	Allocate a single page from the VM system and return the
2891  *	physical address for that page.
2892  */
2893 static bool
2894 pmap_physpage_alloc(int usage, paddr_t *pap)
2895 {
2896 	struct vm_page *pg;
2897 	paddr_t pa;
2898 
2899 	/*
2900 	 * Don't ask for a zero'd page in the L1PT case -- we will
2901 	 * properly initialize it in the constructor.
2902 	 */
2903 
2904 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2905 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2906 	if (pg != NULL) {
2907 		pa = VM_PAGE_TO_PHYS(pg);
2908 #ifdef DEBUG
2909 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2910 		if (md->pvh_refcnt != 0) {
2911 			printf("pmap_physpage_alloc: page 0x%lx has "
2912 			    "%d references\n", pa, md->pvh_refcnt);
2913 			panic("pmap_physpage_alloc");
2914 		}
2915 #endif
2916 		*pap = pa;
2917 		return (true);
2918 	}
2919 	return (false);
2920 }
2921 
2922 /*
2923  * pmap_physpage_free:
2924  *
2925  *	Free the single page table page at the specified physical address.
2926  */
2927 static void
2928 pmap_physpage_free(paddr_t pa)
2929 {
2930 	struct vm_page *pg;
2931 
2932 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2933 		panic("pmap_physpage_free: bogus physical page address");
2934 
2935 #ifdef DEBUG
2936 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2937 	if (md->pvh_refcnt != 0)
2938 		panic("pmap_physpage_free: page still has references");
2939 #endif
2940 
2941 	uvm_pagefree(pg);
2942 }
2943 
2944 /*
2945  * pmap_physpage_addref:
2946  *
2947  *	Add a reference to the specified special use page.
2948  */
2949 static int
2950 pmap_physpage_addref(void *kva)
2951 {
2952 	struct vm_page *pg;
2953 	struct vm_page_md *md;
2954 	paddr_t pa;
2955 
2956 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2957 	pg = PHYS_TO_VM_PAGE(pa);
2958 	md = VM_PAGE_TO_MD(pg);
2959 
2960 	KASSERT((int)md->pvh_refcnt >= 0);
2961 
2962 	return atomic_inc_uint_nv(&md->pvh_refcnt);
2963 }
2964 
2965 /*
2966  * pmap_physpage_delref:
2967  *
2968  *	Delete a reference to the specified special use page.
2969  */
2970 static int
2971 pmap_physpage_delref(void *kva)
2972 {
2973 	struct vm_page *pg;
2974 	struct vm_page_md *md;
2975 	paddr_t pa;
2976 
2977 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2978 	pg = PHYS_TO_VM_PAGE(pa);
2979 	md = VM_PAGE_TO_MD(pg);
2980 
2981 	KASSERT((int)md->pvh_refcnt > 0);
2982 
2983 	return atomic_dec_uint_nv(&md->pvh_refcnt);
2984 }
2985 
2986 /******************** page table page management ********************/
2987 
2988 /*
2989  * pmap_growkernel:		[ INTERFACE ]
2990  *
2991  *	Grow the kernel address space.  This is a hint from the
2992  *	upper layer to pre-allocate more kernel PT pages.
2993  */
2994 vaddr_t
2995 pmap_growkernel(vaddr_t maxkvaddr)
2996 {
2997 	struct pmap *kpm = pmap_kernel(), *pm;
2998 	paddr_t ptaddr;
2999 	pt_entry_t *l1pte, *l2pte, pte;
3000 	vaddr_t va;
3001 	int l1idx;
3002 
3003 	rw_enter(&pmap_growkernel_lock, RW_WRITER);
3004 
3005 	if (maxkvaddr <= virtual_end)
3006 		goto out;		/* we are OK */
3007 
3008 	va = virtual_end;
3009 
3010 	while (va < maxkvaddr) {
3011 		/*
3012 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
3013 		 * allocate a new L2 PT page and insert it into the
3014 		 * L1 map.
3015 		 */
3016 		l1pte = pmap_l1pte(kpm, va);
3017 		if (pmap_pte_v(l1pte) == 0) {
3018 			/*
3019 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
3020 			 */
3021 			if (uvm.page_init_done == false) {
3022 				/*
3023 				 * We're growing the kernel pmap early (from
3024 				 * uvm_pageboot_alloc()).  This case must
3025 				 * be handled a little differently.
3026 				 */
3027 				ptaddr = ALPHA_K0SEG_TO_PHYS(
3028 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3029 			} else if (pmap_physpage_alloc(PGU_NORMAL,
3030 				   &ptaddr) == false)
3031 				goto die;
3032 			pte = (atop(ptaddr) << PG_SHIFT) |
3033 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3034 			*l1pte = pte;
3035 
3036 			l1idx = l1pte_index(va);
3037 
3038 			/* Update all the user pmaps. */
3039 			mutex_enter(&pmap_all_pmaps_lock);
3040 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
3041 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
3042 				/* Skip the kernel pmap. */
3043 				if (pm == pmap_kernel())
3044 					continue;
3045 
3046 				PMAP_LOCK(pm);
3047 				if (pm->pm_lev1map == kernel_lev1map) {
3048 					PMAP_UNLOCK(pm);
3049 					continue;
3050 				}
3051 				pm->pm_lev1map[l1idx] = pte;
3052 				PMAP_UNLOCK(pm);
3053 			}
3054 			mutex_exit(&pmap_all_pmaps_lock);
3055 		}
3056 
3057 		/*
3058 		 * Have an L2 PT page now, add the L3 PT page.
3059 		 */
3060 		l2pte = pmap_l2pte(kpm, va, l1pte);
3061 		KASSERT(pmap_pte_v(l2pte) == 0);
3062 		if (uvm.page_init_done == false) {
3063 			/*
3064 			 * See above.
3065 			 */
3066 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3067 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3068 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false)
3069 			goto die;
3070 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3071 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3072 		va += ALPHA_L2SEG_SIZE;
3073 	}
3074 
3075 	/* Invalidate the L1 PT cache. */
3076 	pool_cache_invalidate(&pmap_l1pt_cache);
3077 
3078 	virtual_end = va;
3079 
3080  out:
3081 	rw_exit(&pmap_growkernel_lock);
3082 
3083 	return (virtual_end);
3084 
3085  die:
3086 	panic("pmap_growkernel: out of memory");
3087 }
3088 
3089 /*
3090  * pmap_lev1map_create:
3091  *
3092  *	Create a new level 1 page table for the specified pmap.
3093  *
3094  *	Note: growkernel must already be held and the pmap either
3095  *	already locked or unreferenced globally.
3096  */
3097 static int
3098 pmap_lev1map_create(pmap_t pmap, long cpu_id)
3099 {
3100 	pt_entry_t *l1pt;
3101 
3102 	KASSERT(pmap != pmap_kernel());
3103 
3104 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
3105 	KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3106 
3107 	/* Don't sleep -- we're called with locks held. */
3108 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
3109 	if (l1pt == NULL)
3110 		return (ENOMEM);
3111 
3112 	pmap->pm_lev1map = l1pt;
3113 	return (0);
3114 }
3115 
3116 /*
3117  * pmap_lev1map_destroy:
3118  *
3119  *	Destroy the level 1 page table for the specified pmap.
3120  *
3121  *	Note: growkernel must be held and the pmap must already be
3122  *	locked or not globally referenced.
3123  */
3124 static void
3125 pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
3126 {
3127 	pt_entry_t *l1pt = pmap->pm_lev1map;
3128 
3129 	KASSERT(pmap != pmap_kernel());
3130 
3131 	/*
3132 	 * Go back to referencing the global kernel_lev1map.
3133 	 */
3134 	pmap->pm_lev1map = kernel_lev1map;
3135 
3136 	/*
3137 	 * Free the old level 1 page table page.
3138 	 */
3139 	pool_cache_put(&pmap_l1pt_cache, l1pt);
3140 }
3141 
3142 /*
3143  * pmap_l1pt_ctor:
3144  *
3145  *	Pool cache constructor for L1 PT pages.
3146  *
3147  *	Note: The growkernel lock is held across allocations
3148  *	from our pool_cache, so we don't need to acquire it
3149  *	ourselves.
3150  */
3151 static int
3152 pmap_l1pt_ctor(void *arg, void *object, int flags)
3153 {
3154 	pt_entry_t *l1pt = object, pte;
3155 	int i;
3156 
3157 	/*
3158 	 * Initialize the new level 1 table by zeroing the
3159 	 * user portion and copying the kernel mappings into
3160 	 * the kernel portion.
3161 	 */
3162 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3163 		l1pt[i] = 0;
3164 
3165 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3166 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3167 		l1pt[i] = kernel_lev1map[i];
3168 
3169 	/*
3170 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3171 	 */
3172 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3173 	    PG_V | PG_KRE | PG_KWE;
3174 	l1pt[l1pte_index(VPTBASE)] = pte;
3175 
3176 	return (0);
3177 }
3178 
3179 /*
3180  * pmap_l1pt_alloc:
3181  *
3182  *	Page alloctaor for L1 PT pages.
3183  */
3184 static void *
3185 pmap_l1pt_alloc(struct pool *pp, int flags)
3186 {
3187 	paddr_t ptpa;
3188 
3189 	/*
3190 	 * Attempt to allocate a free page.
3191 	 */
3192 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false)
3193 		return (NULL);
3194 
3195 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3196 }
3197 
3198 /*
3199  * pmap_l1pt_free:
3200  *
3201  *	Page freer for L1 PT pages.
3202  */
3203 static void
3204 pmap_l1pt_free(struct pool *pp, void *v)
3205 {
3206 
3207 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3208 }
3209 
3210 /*
3211  * pmap_ptpage_alloc:
3212  *
3213  *	Allocate a level 2 or level 3 page table page, and
3214  *	initialize the PTE that references it.
3215  *
3216  *	Note: the pmap must already be locked.
3217  */
3218 static int
3219 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3220 {
3221 	paddr_t ptpa;
3222 
3223 	/*
3224 	 * Allocate the page table page.
3225 	 */
3226 	if (pmap_physpage_alloc(usage, &ptpa) == false)
3227 		return (ENOMEM);
3228 
3229 	/*
3230 	 * Initialize the referencing PTE.
3231 	 */
3232 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3233 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3234 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3235 
3236 	return (0);
3237 }
3238 
3239 /*
3240  * pmap_ptpage_free:
3241  *
3242  *	Free the level 2 or level 3 page table page referenced
3243  *	be the provided PTE.
3244  *
3245  *	Note: the pmap must already be locked.
3246  */
3247 static void
3248 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3249 {
3250 	paddr_t ptpa;
3251 
3252 	/*
3253 	 * Extract the physical address of the page from the PTE
3254 	 * and clear the entry.
3255 	 */
3256 	ptpa = pmap_pte_pa(pte);
3257 	PMAP_SET_PTE(pte, PG_NV);
3258 
3259 #ifdef DEBUG
3260 	pmap_zero_page(ptpa);
3261 #endif
3262 	pmap_physpage_free(ptpa);
3263 }
3264 
3265 /*
3266  * pmap_l3pt_delref:
3267  *
3268  *	Delete a reference on a level 3 PT page.  If the reference drops
3269  *	to zero, free it.
3270  *
3271  *	Note: the pmap must already be locked.
3272  */
3273 static void
3274 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id)
3275 {
3276 	pt_entry_t *l1pte, *l2pte;
3277 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3278 
3279 	l1pte = pmap_l1pte(pmap, va);
3280 	l2pte = pmap_l2pte(pmap, va, l1pte);
3281 
3282 #ifdef DIAGNOSTIC
3283 	if (pmap == pmap_kernel())
3284 		panic("pmap_l3pt_delref: kernel pmap");
3285 #endif
3286 
3287 	if (pmap_physpage_delref(l3pte) == 0) {
3288 		/*
3289 		 * No more mappings; we can free the level 3 table.
3290 		 */
3291 #ifdef DEBUG
3292 		if (pmapdebug & PDB_PTPAGE)
3293 			printf("pmap_l3pt_delref: freeing level 3 table at "
3294 			    "0x%lx\n", pmap_pte_pa(l2pte));
3295 #endif
3296 		pmap_ptpage_free(pmap, l2pte);
3297 
3298 		/*
3299 		 * We've freed a level 3 table, so we must
3300 		 * invalidate the TLB entry for that PT page
3301 		 * in the Virtual Page Table VA range, because
3302 		 * otherwise the PALcode will service a TLB
3303 		 * miss using the stale VPT TLB entry it entered
3304 		 * behind our back to shortcut to the VA's PTE.
3305 		 */
3306 		PMAP_INVALIDATE_TLB(pmap,
3307 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), false,
3308 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3309 		PMAP_TLB_SHOOTDOWN(pmap,
3310 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3311 		PMAP_TLB_SHOOTNOW();
3312 
3313 		/*
3314 		 * We've freed a level 3 table, so delete the reference
3315 		 * on the level 2 table.
3316 		 */
3317 		pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
3318 	}
3319 }
3320 
3321 /*
3322  * pmap_l2pt_delref:
3323  *
3324  *	Delete a reference on a level 2 PT page.  If the reference drops
3325  *	to zero, free it.
3326  *
3327  *	Note: the pmap must already be locked.
3328  */
3329 static void
3330 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
3331     long cpu_id)
3332 {
3333 
3334 #ifdef DIAGNOSTIC
3335 	if (pmap == pmap_kernel())
3336 		panic("pmap_l2pt_delref: kernel pmap");
3337 #endif
3338 
3339 	if (pmap_physpage_delref(l2pte) == 0) {
3340 		/*
3341 		 * No more mappings in this segment; we can free the
3342 		 * level 2 table.
3343 		 */
3344 #ifdef DEBUG
3345 		if (pmapdebug & PDB_PTPAGE)
3346 			printf("pmap_l2pt_delref: freeing level 2 table at "
3347 			    "0x%lx\n", pmap_pte_pa(l1pte));
3348 #endif
3349 		pmap_ptpage_free(pmap, l1pte);
3350 
3351 		/*
3352 		 * We've freed a level 2 table, so delete the reference
3353 		 * on the level 1 table.
3354 		 */
3355 		pmap_l1pt_delref(pmap, l1pte, cpu_id);
3356 	}
3357 }
3358 
3359 /*
3360  * pmap_l1pt_delref:
3361  *
3362  *	Delete a reference on a level 1 PT page.  If the reference drops
3363  *	to zero, free it.
3364  *
3365  *	Note: the pmap must already be locked.
3366  */
3367 static void
3368 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
3369 {
3370 
3371 #ifdef DIAGNOSTIC
3372 	if (pmap == pmap_kernel())
3373 		panic("pmap_l1pt_delref: kernel pmap");
3374 #endif
3375 
3376 	(void)pmap_physpage_delref(l1pte);
3377 }
3378 
3379 /******************** Address Space Number management ********************/
3380 
3381 /*
3382  * pmap_asn_alloc:
3383  *
3384  *	Allocate and assign an ASN to the specified pmap.
3385  *
3386  *	Note: the pmap must already be locked.  This may be called from
3387  *	an interprocessor interrupt, and in that case, the sender of
3388  *	the IPI has the pmap lock.
3389  */
3390 static void
3391 pmap_asn_alloc(pmap_t pmap, long cpu_id)
3392 {
3393 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3394 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3395 
3396 #ifdef DEBUG
3397 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3398 		printf("pmap_asn_alloc(%p)\n", pmap);
3399 #endif
3400 
3401 	/*
3402 	 * If the pmap is still using the global kernel_lev1map, there
3403 	 * is no need to assign an ASN at this time, because only
3404 	 * kernel mappings exist in that map, and all kernel mappings
3405 	 * have PG_ASM set.  If the pmap eventually gets its own
3406 	 * lev1map, an ASN will be allocated at that time.
3407 	 *
3408 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
3409 	 * same old fixups, but note that we no longer need the pmap
3410 	 * to be locked if we're in this mode, since pm_lev1map will
3411 	 * never change.
3412 	 * #endif
3413 	 */
3414 	if (pmap->pm_lev1map == kernel_lev1map) {
3415 #ifdef DEBUG
3416 		if (pmapdebug & PDB_ASN)
3417 			printf("pmap_asn_alloc: still references "
3418 			    "kernel_lev1map\n");
3419 #endif
3420 #if defined(MULTIPROCESSOR)
3421 		/*
3422 		 * In a multiprocessor system, it's possible to
3423 		 * get here without having PMAP_ASN_RESERVED in
3424 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3425 		 *
3426 		 * So, what we do here, is simply assign the reserved
3427 		 * ASN for kernel_lev1map users and let things
3428 		 * continue on.  We do, however, let uniprocessor
3429 		 * configurations continue to make its assertion.
3430 		 */
3431 		pma->pma_asn = PMAP_ASN_RESERVED;
3432 #else
3433 		KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3434 #endif /* MULTIPROCESSOR */
3435 		return;
3436 	}
3437 
3438 	/*
3439 	 * On processors which do not implement ASNs, the swpctx PALcode
3440 	 * operation will automatically invalidate the TLB and I-cache,
3441 	 * so we don't need to do that here.
3442 	 */
3443 	if (pmap_max_asn == 0) {
3444 		/*
3445 		 * Refresh the pmap's generation number, to
3446 		 * simplify logic elsewhere.
3447 		 */
3448 		pma->pma_asngen = cpma->pma_asngen;
3449 #ifdef DEBUG
3450 		if (pmapdebug & PDB_ASN)
3451 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3452 			    pma->pma_asngen);
3453 #endif
3454 		return;
3455 	}
3456 
3457 	/*
3458 	 * Hopefully, we can continue using the one we have...
3459 	 */
3460 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3461 	    pma->pma_asngen == cpma->pma_asngen) {
3462 		/*
3463 		 * ASN is still in the current generation; keep on using it.
3464 		 */
3465 #ifdef DEBUG
3466 		if (pmapdebug & PDB_ASN)
3467 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3468 			    pma->pma_asn);
3469 #endif
3470 		return;
3471 	}
3472 
3473 	/*
3474 	 * Need to assign a new ASN.  Grab the next one, incrementing
3475 	 * the generation number if we have to.
3476 	 */
3477 	if (cpma->pma_asn > pmap_max_asn) {
3478 		/*
3479 		 * Invalidate all non-PG_ASM TLB entries and the
3480 		 * I-cache, and bump the generation number.
3481 		 */
3482 		ALPHA_TBIAP();
3483 		alpha_pal_imb();
3484 
3485 		cpma->pma_asn = 1;
3486 		cpma->pma_asngen++;
3487 #ifdef DIAGNOSTIC
3488 		if (cpma->pma_asngen == 0) {
3489 			/*
3490 			 * The generation number has wrapped.  We could
3491 			 * handle this scenario by traversing all of
3492 			 * the pmaps, and invalidating the generation
3493 			 * number on those which are not currently
3494 			 * in use by this processor.
3495 			 *
3496 			 * However... considering that we're using
3497 			 * an unsigned 64-bit integer for generation
3498 			 * numbers, on non-ASN CPUs, we won't wrap
3499 			 * for approx. 585 million years, or 75 billion
3500 			 * years on a 128-ASN CPU (assuming 1000 switch
3501 			 * operations per second).
3502 			 *
3503 			 * So, we don't bother.
3504 			 */
3505 			panic("pmap_asn_alloc: too much uptime");
3506 		}
3507 #endif
3508 #ifdef DEBUG
3509 		if (pmapdebug & PDB_ASN)
3510 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3511 			    cpma->pma_asngen);
3512 #endif
3513 	}
3514 
3515 	/*
3516 	 * Assign the new ASN and validate the generation number.
3517 	 */
3518 	pma->pma_asn = cpma->pma_asn++;
3519 	pma->pma_asngen = cpma->pma_asngen;
3520 
3521 #ifdef DEBUG
3522 	if (pmapdebug & PDB_ASN)
3523 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3524 		    pma->pma_asn, pmap);
3525 #endif
3526 
3527 	/*
3528 	 * Have a new ASN, so there's no need to sync the I-stream
3529 	 * on the way back out to userspace.
3530 	 */
3531 	atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id));
3532 }
3533 
3534 #if defined(MULTIPROCESSOR)
3535 /******************** TLB shootdown code ********************/
3536 
3537 /*
3538  * pmap_tlb_shootdown:
3539  *
3540  *	Cause the TLB entry for pmap/va to be shot down.
3541  *
3542  *	NOTE: The pmap must be locked here.
3543  */
3544 void
3545 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3546 {
3547 	struct pmap_tlb_shootdown_q *pq;
3548 	struct pmap_tlb_shootdown_job *pj;
3549 	struct cpu_info *ci, *self = curcpu();
3550 	u_long cpumask;
3551 	CPU_INFO_ITERATOR cii;
3552 
3553 	KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock));
3554 
3555 	cpumask = 0;
3556 
3557 	for (CPU_INFO_FOREACH(cii, ci)) {
3558 		if (ci == self)
3559 			continue;
3560 
3561 		/*
3562 		 * The pmap must be locked (unless its the kernel
3563 		 * pmap, in which case it is okay for it to be
3564 		 * unlocked), which prevents it from  becoming
3565 		 * active on any additional processors.  This makes
3566 		 * it safe to check for activeness.  If it's not
3567 		 * active on the processor in question, then just
3568 		 * mark it as needing a new ASN the next time it
3569 		 * does, saving the IPI.  We always have to send
3570 		 * the IPI for the kernel pmap.
3571 		 *
3572 		 * Note if it's marked active now, and it becomes
3573 		 * inactive by the time the processor receives
3574 		 * the IPI, that's okay, because it does the right
3575 		 * thing with it later.
3576 		 */
3577 		if (pmap != pmap_kernel() &&
3578 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3579 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3580 			continue;
3581 		}
3582 
3583 		cpumask |= 1UL << ci->ci_cpuid;
3584 
3585 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3586 		mutex_spin_enter(&pq->pq_lock);
3587 
3588 		/*
3589 		 * Allocate a job.
3590 		 */
3591 		if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) {
3592 			pj = pool_cache_get(&pmap_tlb_shootdown_job_cache,
3593 			    PR_NOWAIT);
3594 		} else {
3595 			pj = NULL;
3596 		}
3597 
3598 		/*
3599 		 * If a global flush is already pending, we
3600 		 * don't really have to do anything else.
3601 		 */
3602 		pq->pq_pte |= pte;
3603 		if (pq->pq_tbia) {
3604 			mutex_spin_exit(&pq->pq_lock);
3605 			if (pj != NULL) {
3606 				pool_cache_put(&pmap_tlb_shootdown_job_cache,
3607 				    pj);
3608 			}
3609 			continue;
3610 		}
3611 		if (pj == NULL) {
3612 			/*
3613 			 * Couldn't allocate a job entry.  Just
3614 			 * tell the processor to kill everything.
3615 			 */
3616 			pq->pq_tbia = 1;
3617 		} else {
3618 			pj->pj_pmap = pmap;
3619 			pj->pj_va = va;
3620 			pj->pj_pte = pte;
3621 			pq->pq_count++;
3622 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3623 		}
3624 		mutex_spin_exit(&pq->pq_lock);
3625 	}
3626 
3627 	*cpumaskp |= cpumask;
3628 }
3629 
3630 /*
3631  * pmap_tlb_shootnow:
3632  *
3633  *	Process the TLB shootdowns that we have been accumulating
3634  *	for the specified processor set.
3635  */
3636 void
3637 pmap_tlb_shootnow(u_long cpumask)
3638 {
3639 
3640 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3641 }
3642 
3643 /*
3644  * pmap_do_tlb_shootdown:
3645  *
3646  *	Process pending TLB shootdown operations for this processor.
3647  */
3648 void
3649 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3650 {
3651 	u_long cpu_id = ci->ci_cpuid;
3652 	u_long cpu_mask = (1UL << cpu_id);
3653 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3654 	struct pmap_tlb_shootdown_job *pj, *next;
3655 	TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs;
3656 
3657 	TAILQ_INIT(&jobs);
3658 
3659 	mutex_spin_enter(&pq->pq_lock);
3660 	TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list);
3661 	if (pq->pq_tbia) {
3662 		if (pq->pq_pte & PG_ASM)
3663 			ALPHA_TBIA();
3664 		else
3665 			ALPHA_TBIAP();
3666 		pq->pq_tbia = 0;
3667 		pq->pq_pte = 0;
3668 	} else {
3669 		TAILQ_FOREACH(pj, &jobs, pj_list) {
3670 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3671 			    pj->pj_pte & PG_ASM,
3672 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3673 		}
3674 		pq->pq_pte = 0;
3675 	}
3676 	pq->pq_count = 0;
3677 	mutex_spin_exit(&pq->pq_lock);
3678 
3679 	/* Free jobs back to the cache. */
3680 	for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) {
3681 		next = TAILQ_NEXT(pj, pj_list);
3682 		pool_cache_put(&pmap_tlb_shootdown_job_cache, pj);
3683 	}
3684 }
3685 #endif /* MULTIPROCESSOR */
3686