xref: /netbsd-src/sys/arch/alpha/alpha/pmap.c (revision 92e958de60c71aa0f2452bd7074cbb006fe6546b)
1 /* $NetBSD: pmap.c,v 1.260 2015/11/05 06:26:15 pgoyette Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and by Chris G. Demetriou.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
66  */
67 
68 /*
69  * DEC Alpha physical map management code.
70  *
71  * History:
72  *
73  *	This pmap started life as a Motorola 68851/68030 pmap,
74  *	written by Mike Hibler at the University of Utah.
75  *
76  *	It was modified for the DEC Alpha by Chris Demetriou
77  *	at Carnegie Mellon University.
78  *
79  *	Support for non-contiguous physical memory was added by
80  *	Jason R. Thorpe of the Numerical Aerospace Simulation
81  *	Facility, NASA Ames Research Center and Chris Demetriou.
82  *
83  *	Page table management and a major cleanup were undertaken
84  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
85  *	Avalon Computer Systems and from Chris Demetriou.
86  *
87  *	Support for the new UVM pmap interface was written by
88  *	Jason R. Thorpe.
89  *
90  *	Support for ASNs was written by Jason R. Thorpe, again
91  *	with help from Chris Demetriou and Ross Harvey.
92  *
93  *	The locking protocol was written by Jason R. Thorpe,
94  *	using Chuck Cranor's i386 pmap for UVM as a model.
95  *
96  *	TLB shootdown code was written by Jason R. Thorpe.
97  *
98  *	Multiprocessor modifications by Andrew Doran.
99  *
100  * Notes:
101  *
102  *	All page table access is done via K0SEG.  The one exception
103  *	to this is for kernel mappings.  Since all kernel page
104  *	tables are pre-allocated, we can use the Virtual Page Table
105  *	to access PTEs that map K1SEG addresses.
106  *
107  *	Kernel page table pages are statically allocated in
108  *	pmap_bootstrap(), and are never freed.  In the future,
109  *	support for dynamically adding additional kernel page
110  *	table pages may be added.  User page table pages are
111  *	dynamically allocated and freed.
112  *
113  * Bugs/misfeatures:
114  *
115  *	- Some things could be optimized.
116  */
117 
118 /*
119  *	Manages physical address maps.
120  *
121  *	Since the information managed by this module is
122  *	also stored by the logical address mapping module,
123  *	this module may throw away valid virtual-to-physical
124  *	mappings at almost any time.  However, invalidations
125  *	of virtual-to-physical mappings must be done as
126  *	requested.
127  *
128  *	In order to cope with hardware architectures which
129  *	make virtual-to-physical map invalidates expensive,
130  *	this module may delay invalidate or reduced protection
131  *	operations until such time as they are actually
132  *	necessary.  This module is given full information as
133  *	to which processors are currently using which maps,
134  *	and to when physical maps must be made correct.
135  */
136 
137 #include "opt_lockdebug.h"
138 #include "opt_sysv.h"
139 #include "opt_multiprocessor.h"
140 
141 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
142 
143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.260 2015/11/05 06:26:15 pgoyette Exp $");
144 
145 #include <sys/param.h>
146 #include <sys/systm.h>
147 #include <sys/kernel.h>
148 #include <sys/proc.h>
149 #include <sys/malloc.h>
150 #include <sys/pool.h>
151 #include <sys/buf.h>
152 #include <sys/atomic.h>
153 #include <sys/cpu.h>
154 
155 #include <uvm/uvm.h>
156 
157 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
158 #include <machine/rpb.h>
159 #endif
160 
161 #ifdef DEBUG
162 #define	PDB_FOLLOW	0x0001
163 #define	PDB_INIT	0x0002
164 #define	PDB_ENTER	0x0004
165 #define	PDB_REMOVE	0x0008
166 #define	PDB_CREATE	0x0010
167 #define	PDB_PTPAGE	0x0020
168 #define	PDB_ASN		0x0040
169 #define	PDB_BITS	0x0080
170 #define	PDB_COLLECT	0x0100
171 #define	PDB_PROTECT	0x0200
172 #define	PDB_BOOTSTRAP	0x1000
173 #define	PDB_PARANOIA	0x2000
174 #define	PDB_WIRING	0x4000
175 #define	PDB_PVDUMP	0x8000
176 
177 int debugmap = 0;
178 int pmapdebug = PDB_PARANOIA;
179 #endif
180 
181 /*
182  * Given a map and a machine independent protection code,
183  * convert to an alpha protection code.
184  */
185 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
186 static int	protection_codes[2][8];
187 
188 /*
189  * kernel_lev1map:
190  *
191  *	Kernel level 1 page table.  This maps all kernel level 2
192  *	page table pages, and is used as a template for all user
193  *	pmap level 1 page tables.  When a new user level 1 page
194  *	table is allocated, all kernel_lev1map PTEs for kernel
195  *	addresses are copied to the new map.
196  *
197  *	The kernel also has an initial set of kernel level 2 page
198  *	table pages.  These map the kernel level 3 page table pages.
199  *	As kernel level 3 page table pages are added, more level 2
200  *	page table pages may be added to map them.  These pages are
201  *	never freed.
202  *
203  *	Finally, the kernel also has an initial set of kernel level
204  *	3 page table pages.  These map pages in K1SEG.  More level
205  *	3 page table pages may be added at run-time if additional
206  *	K1SEG address space is required.  These pages are never freed.
207  *
208  * NOTE: When mappings are inserted into the kernel pmap, all
209  * level 2 and level 3 page table pages must already be allocated
210  * and mapped into the parent page table.
211  */
212 pt_entry_t	*kernel_lev1map;
213 
214 /*
215  * Virtual Page Table.
216  */
217 static pt_entry_t *VPT;
218 
219 static struct {
220 	struct pmap k_pmap;
221 	struct pmap_asn_info k_asni[ALPHA_MAXPROCS];
222 } kernel_pmap_store;
223 
224 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap;
225 
226 paddr_t    	avail_start;	/* PA of first available physical page */
227 paddr_t		avail_end;	/* PA of last available physical page */
228 static vaddr_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
229 
230 static bool pmap_initialized;	/* Has pmap_init completed? */
231 
232 u_long		pmap_pages_stolen;	/* instrumentation */
233 
234 /*
235  * This variable contains the number of CPU IDs we need to allocate
236  * space for when allocating the pmap structure.  It is used to
237  * size a per-CPU array of ASN and ASN Generation number.
238  */
239 static u_long 	pmap_ncpuids;
240 
241 #ifndef PMAP_PV_LOWAT
242 #define	PMAP_PV_LOWAT	16
243 #endif
244 int		pmap_pv_lowat = PMAP_PV_LOWAT;
245 
246 /*
247  * List of all pmaps, used to update them when e.g. additional kernel
248  * page tables are allocated.  This list is kept LRU-ordered by
249  * pmap_activate().
250  */
251 static TAILQ_HEAD(, pmap) pmap_all_pmaps;
252 
253 /*
254  * The pools from which pmap structures and sub-structures are allocated.
255  */
256 static struct pool_cache pmap_pmap_cache;
257 static struct pool_cache pmap_l1pt_cache;
258 static struct pool_cache pmap_pv_cache;
259 
260 /*
261  * Address Space Numbers.
262  *
263  * On many implementations of the Alpha architecture, the TLB entries and
264  * I-cache blocks are tagged with a unique number within an implementation-
265  * specified range.  When a process context becomes active, the ASN is used
266  * to match TLB entries; if a TLB entry for a particular VA does not match
267  * the current ASN, it is ignored (one could think of the processor as
268  * having a collection of <max ASN> separate TLBs).  This allows operating
269  * system software to skip the TLB flush that would otherwise be necessary
270  * at context switch time.
271  *
272  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
273  * causes TLB entries to match any ASN.  The PALcode also provides
274  * a TBI (Translation Buffer Invalidate) operation that flushes all
275  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
276  * mappings, so that invalidation of all user mappings does not invalidate
277  * kernel mappings (which are consistent across all processes).
278  *
279  * pmap_next_asn always indicates to the next ASN to use.  When
280  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
281  *
282  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
283  * TLB entries and the I-cache are flushed, the generation number is bumped,
284  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
285  *
286  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
287  * prevents the following scenario:
288  *
289  *	* New ASN generation starts, and process A is given ASN #0.
290  *
291  *	* A new process B (and thus new pmap) is created.  The ASN,
292  *	  for lack of a better value, is initialized to 0.
293  *
294  *	* Process B runs.  It is now using the TLB entries tagged
295  *	  by process A.  *poof*
296  *
297  * In the scenario above, in addition to the processor using using incorrect
298  * TLB entires, the PALcode might use incorrect information to service a
299  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
300  * to locate the PTE for a faulting address, and tagged TLB entires exist
301  * for the Virtual Page Table addresses in order to speed up this procedure,
302  * as well.)
303  *
304  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
305  * new pmaps will initially run with no TLB entries for user addresses
306  * or VPT mappings that map user page tables.  Since kernel_lev1map only
307  * contains mappings for kernel addresses, and since those mappings
308  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
309  * safe (since PG_ASM mappings match any ASN).
310  *
311  * On processors that do not support ASNs, the PALcode invalidates
312  * the TLB and I-cache automatically on swpctx.  We still still go
313  * through the motions of assigning an ASN (really, just refreshing
314  * the ASN generation in this particular case) to keep the logic sane
315  * in other parts of the code.
316  */
317 static u_int	pmap_max_asn;		/* max ASN supported by the system */
318 					/* next ASN and cur ASN generation */
319 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
320 
321 /*
322  * Locking:
323  *
324  *	READ/WRITE LOCKS
325  *	----------------
326  *
327  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
328  *	  provide mutex access to the pmap module.  Most operations lock
329  *	  the pmap first, then PV lists as needed.  However, some operations,
330  *	  such as pmap_page_protect(), lock the PV lists before locking
331  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
332  *	  pmap module if locking in the PV->pmap direction.  This is
333  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
334  *	  if locking pmap->PV and a (exclusive) write lock if locking in
335  *	  the PV->pmap direction.  Since only one thread can hold a write
336  *	  lock at a time, this provides the mutex.
337  *
338  *	MUTEXES
339  *	-------
340  *
341  *	* pm_lock (per-pmap) - This lock protects all of the members
342  *	  of the pmap structure itself.  This lock will be asserted
343  *	  in pmap_activate() and pmap_deactivate() from a critical
344  *	  section of mi_switch(), and must never sleep.  Note that
345  *	  in the case of the kernel pmap, interrupts which cause
346  *	  memory allocation *must* be blocked while this lock is
347  *	  asserted.
348  *
349  *	* pvh_lock (global hash) - These locks protects the PV lists
350  *	  for managed pages.
351  *
352  *	* pmap_all_pmaps_lock - This lock protects the global list of
353  *	  all pmaps.  Note that a pm_lock must never be held while this
354  *	  lock is held.
355  *
356  *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
357  *	  and the virtual_end variable.
358  *
359  *	  There is a lock ordering constraint for pmap_growkernel_lock.
360  *	  pmap_growkernel() acquires the locks in the following order:
361  *
362  *		pmap_growkernel_lock (write) -> pmap_all_pmaps_lock ->
363  *		    pmap->pm_lock
364  *
365  *	  We need to ensure consistency between user pmaps and the
366  *	  kernel_lev1map.  For this reason, pmap_growkernel_lock must
367  *	  be held to prevent kernel_lev1map changing across pmaps
368  *	  being added to / removed from the global pmaps list.
369  *
370  *	Address space number management (global ASN counters and per-pmap
371  *	ASN state) are not locked; they use arrays of values indexed
372  *	per-processor.
373  *
374  *	All internal functions which operate on a pmap are called
375  *	with the pmap already locked by the caller (which will be
376  *	an interface function).
377  */
378 static krwlock_t pmap_main_lock;
379 static kmutex_t pmap_all_pmaps_lock;
380 static krwlock_t pmap_growkernel_lock;
381 
382 #define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
383 #define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
384 #define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
385 #define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
386 
387 struct {
388 	kmutex_t lock;
389 } __aligned(64) static pmap_pvh_locks[64] __aligned(64);
390 
391 static inline kmutex_t *
392 pmap_pvh_lock(struct vm_page *pg)
393 {
394 
395 	/* Cut bits 11-6 out of page address and use directly as offset. */
396 	return (kmutex_t *)((uintptr_t)&pmap_pvh_locks +
397 	    ((uintptr_t)pg & (63 << 6)));
398 }
399 
400 #if defined(MULTIPROCESSOR)
401 /*
402  * TLB Shootdown:
403  *
404  * When a mapping is changed in a pmap, the TLB entry corresponding to
405  * the virtual address must be invalidated on all processors.  In order
406  * to accomplish this on systems with multiple processors, messages are
407  * sent from the processor which performs the mapping change to all
408  * processors on which the pmap is active.  For other processors, the
409  * ASN generation numbers for that processor is invalidated, so that
410  * the next time the pmap is activated on that processor, a new ASN
411  * will be allocated (which implicitly invalidates all TLB entries).
412  *
413  * Note, we can use the pool allocator to allocate job entries
414  * since pool pages are mapped with K0SEG, not with the TLB.
415  */
416 struct pmap_tlb_shootdown_job {
417 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
418 	vaddr_t pj_va;			/* virtual address */
419 	pmap_t pj_pmap;			/* the pmap which maps the address */
420 	pt_entry_t pj_pte;		/* the PTE bits */
421 };
422 
423 static struct pmap_tlb_shootdown_q {
424 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;	/* queue 16b */
425 	kmutex_t pq_lock;		/* spin lock on queue 16b */
426 	int pq_pte;			/* aggregate PTE bits 4b */
427 	int pq_count;			/* number of pending requests 4b */
428 	int pq_tbia;			/* pending global flush 4b */
429 	uint8_t pq_pad[64-16-16-4-4-4];	/* pad to 64 bytes */
430 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE);
431 
432 /* If we have more pending jobs than this, we just nail the whole TLB. */
433 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
434 
435 static struct pool_cache pmap_tlb_shootdown_job_cache;
436 #endif /* MULTIPROCESSOR */
437 
438 /*
439  * Internal routines
440  */
441 static void	alpha_protection_init(void);
442 static bool	pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long);
443 static void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long);
444 
445 /*
446  * PT page management functions.
447  */
448 static int	pmap_lev1map_create(pmap_t, long);
449 static void	pmap_lev1map_destroy(pmap_t, long);
450 static int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
451 static void	pmap_ptpage_free(pmap_t, pt_entry_t *);
452 static void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long);
453 static void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long);
454 static void	pmap_l1pt_delref(pmap_t, pt_entry_t *, long);
455 
456 static void	*pmap_l1pt_alloc(struct pool *, int);
457 static void	pmap_l1pt_free(struct pool *, void *);
458 
459 static struct pool_allocator pmap_l1pt_allocator = {
460 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
461 };
462 
463 static int	pmap_l1pt_ctor(void *, void *, int);
464 
465 /*
466  * PV table management functions.
467  */
468 static int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
469 			      bool);
470 static void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool);
471 static void	*pmap_pv_page_alloc(struct pool *, int);
472 static void	pmap_pv_page_free(struct pool *, void *);
473 
474 static struct pool_allocator pmap_pv_page_allocator = {
475 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
476 };
477 
478 #ifdef DEBUG
479 void	pmap_pv_dump(paddr_t);
480 #endif
481 
482 #define	pmap_pv_alloc()		pool_cache_get(&pmap_pv_cache, PR_NOWAIT)
483 #define	pmap_pv_free(pv)	pool_cache_put(&pmap_pv_cache, (pv))
484 
485 /*
486  * ASN management functions.
487  */
488 static void	pmap_asn_alloc(pmap_t, long);
489 
490 /*
491  * Misc. functions.
492  */
493 static bool	pmap_physpage_alloc(int, paddr_t *);
494 static void	pmap_physpage_free(paddr_t);
495 static int	pmap_physpage_addref(void *);
496 static int	pmap_physpage_delref(void *);
497 
498 /*
499  * PMAP_ISACTIVE{,_TEST}:
500  *
501  *	Check to see if a pmap is active on the current processor.
502  */
503 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
504 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
505 
506 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
507 #define	PMAP_ISACTIVE(pm, cpu_id)					\
508 ({									\
509 	/*								\
510 	 * XXX This test is not MP-safe.				\
511 	 */								\
512 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
513 									\
514 	if ((curlwp->l_flag & LW_IDLE) != 0 &&				\
515 	    curproc->p_vmspace != NULL &&				\
516 	   ((curproc->p_sflag & PS_WEXIT) == 0) &&			\
517 	   (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
518 		panic("PMAP_ISACTIVE");					\
519 	(isactive_);							\
520 })
521 #else
522 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
523 #endif /* DEBUG && !MULTIPROCESSOR */
524 
525 /*
526  * PMAP_ACTIVATE_ASN_SANITY:
527  *
528  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
529  */
530 #ifdef DEBUG
531 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
532 do {									\
533 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
534 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
535 									\
536 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
537 		/*							\
538 		 * This pmap implementation also ensures that pmaps	\
539 		 * referencing kernel_lev1map use a reserved ASN	\
540 		 * ASN to prevent the PALcode from servicing a TLB	\
541 		 * miss	with the wrong PTE.				\
542 		 */							\
543 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
544 			printf("kernel_lev1map with non-reserved ASN "	\
545 			    "(line %d)\n", __LINE__);			\
546 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
547 		}							\
548 	} else {							\
549 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
550 			/*						\
551 			 * ASN generation number isn't valid!		\
552 			 */						\
553 			printf("pmap asngen %lu, current %lu "		\
554 			    "(line %d)\n",				\
555 			    __pma->pma_asngen,				\
556 			    __cpma->pma_asngen,				\
557 			    __LINE__);					\
558 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
559 		}							\
560 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
561 			/*						\
562 			 * DANGER WILL ROBINSON!  We're going to	\
563 			 * pollute the VPT TLB entries!			\
564 			 */						\
565 			printf("Using reserved ASN! (line %d)\n",	\
566 			    __LINE__);					\
567 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
568 		}							\
569 	}								\
570 } while (/*CONSTCOND*/0)
571 #else
572 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
573 #endif
574 
575 /*
576  * PMAP_ACTIVATE:
577  *
578  *	This is essentially the guts of pmap_activate(), without
579  *	ASN allocation.  This is used by pmap_activate(),
580  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
581  *
582  *	This is called only when it is known that a pmap is "active"
583  *	on the current processor; the ASN must already be valid.
584  */
585 #define	PMAP_ACTIVATE(pmap, l, cpu_id)					\
586 do {									\
587 	struct pcb *pcb = lwp_getpcb(l);				\
588 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
589 									\
590 	pcb->pcb_hw.apcb_ptbr =				\
591 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
592 	pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn;	\
593 									\
594 	if ((l) == curlwp) {						\
595 		/*							\
596 		 * Page table base register has changed; switch to	\
597 		 * our own context again so that it will take effect.	\
598 		 */							\
599 		(void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr);	\
600 	}								\
601 } while (/*CONSTCOND*/0)
602 
603 /*
604  * PMAP_SET_NEEDISYNC:
605  *
606  *	Mark that a user pmap needs an I-stream synch on its
607  *	way back out to userspace.
608  */
609 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
610 
611 /*
612  * PMAP_SYNC_ISTREAM:
613  *
614  *	Synchronize the I-stream for the specified pmap.  For user
615  *	pmaps, this is deferred until a process using the pmap returns
616  *	to userspace.
617  */
618 #if defined(MULTIPROCESSOR)
619 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
620 do {									\
621 	alpha_pal_imb();						\
622 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
623 } while (/*CONSTCOND*/0)
624 
625 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
626 do {									\
627 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
628 	/* for curcpu, will happen in userret() */			\
629 } while (/*CONSTCOND*/0)
630 #else
631 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
632 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* will happen in userret() */
633 #endif /* MULTIPROCESSOR */
634 
635 #define	PMAP_SYNC_ISTREAM(pmap)						\
636 do {									\
637 	if ((pmap) == pmap_kernel())					\
638 		PMAP_SYNC_ISTREAM_KERNEL();				\
639 	else								\
640 		PMAP_SYNC_ISTREAM_USER(pmap);				\
641 } while (/*CONSTCOND*/0)
642 
643 /*
644  * PMAP_INVALIDATE_ASN:
645  *
646  *	Invalidate the specified pmap's ASN, so as to force allocation
647  *	of a new one the next time pmap_asn_alloc() is called.
648  *
649  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
650  *	CONDITIONS ARE true:
651  *
652  *		(1) The pmap references the global kernel_lev1map.
653  *
654  *		(2) The pmap is not active on the current processor.
655  */
656 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
657 do {									\
658 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
659 } while (/*CONSTCOND*/0)
660 
661 /*
662  * PMAP_INVALIDATE_TLB:
663  *
664  *	Invalidate the TLB entry for the pmap/va pair.
665  */
666 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
667 do {									\
668 	if ((hadasm) || (isactive)) {					\
669 		/*							\
670 		 * Simply invalidating the TLB entry and I-cache	\
671 		 * works in this case.					\
672 		 */							\
673 		ALPHA_TBIS((va));					\
674 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
675 		   pmap_asn_info[(cpu_id)].pma_asngen) {		\
676 		/*							\
677 		 * We can't directly invalidate the TLB entry		\
678 		 * in this case, so we have to force allocation		\
679 		 * of a new ASN the next time this pmap becomes		\
680 		 * active.						\
681 		 */							\
682 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
683 	}								\
684 		/*							\
685 		 * Nothing to do in this case; the next time the	\
686 		 * pmap becomes active on this processor, a new		\
687 		 * ASN will be allocated anyway.			\
688 		 */							\
689 } while (/*CONSTCOND*/0)
690 
691 /*
692  * PMAP_KERNEL_PTE:
693  *
694  *	Get a kernel PTE.
695  *
696  *	If debugging, do a table walk.  If not debugging, just use
697  *	the Virtual Page Table, since all kernel page tables are
698  *	pre-allocated and mapped in.
699  */
700 #ifdef DEBUG
701 #define	PMAP_KERNEL_PTE(va)						\
702 ({									\
703 	pt_entry_t *l1pte_, *l2pte_;					\
704 									\
705 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
706 	if (pmap_pte_v(l1pte_) == 0) {					\
707 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
708 		    "(line %d)\n", (va), __LINE__);			\
709 		panic("PMAP_KERNEL_PTE");				\
710 	}								\
711 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
712 	if (pmap_pte_v(l2pte_) == 0) {					\
713 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
714 		    "(line %d)\n", (va), __LINE__);			\
715 		panic("PMAP_KERNEL_PTE");				\
716 	}								\
717 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
718 })
719 #else
720 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
721 #endif
722 
723 /*
724  * PMAP_SET_PTE:
725  *
726  *	Set a PTE to a specified value.
727  */
728 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
729 
730 /*
731  * PMAP_STAT_{INCR,DECR}:
732  *
733  *	Increment or decrement a pmap statistic.
734  */
735 #define	PMAP_STAT_INCR(s, v)	atomic_add_long((unsigned long *)(&(s)), (v))
736 #define	PMAP_STAT_DECR(s, v)	atomic_add_long((unsigned long *)(&(s)), -(v))
737 
738 /*
739  * pmap_bootstrap:
740  *
741  *	Bootstrap the system to run with virtual memory.
742  *
743  *	Note: no locking is necessary in this function.
744  */
745 void
746 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
747 {
748 	vsize_t lev2mapsize, lev3mapsize;
749 	pt_entry_t *lev2map, *lev3map;
750 	pt_entry_t pte;
751 	vsize_t bufsz;
752 	struct pcb *pcb;
753 	int i;
754 
755 #ifdef DEBUG
756 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
757 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
758 #endif
759 
760 	/*
761 	 * Compute the number of pages kmem_arena will have.
762 	 */
763 	kmeminit_nkmempages();
764 
765 	/*
766 	 * Figure out how many initial PTE's are necessary to map the
767 	 * kernel.  We also reserve space for kmem_alloc_pageable()
768 	 * for vm_fork().
769 	 */
770 
771 	/* Get size of buffer cache and set an upper limit */
772 	bufsz = buf_memcalc();
773 	buf_setvalimit(bufsz);
774 
775 	lev3mapsize =
776 		(VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
777 		 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE +
778 		(maxproc * UPAGES) + nkmempages;
779 
780 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
781 
782 	/*
783 	 * Initialize `FYI' variables.  Note we're relying on
784 	 * the fact that BSEARCH sorts the vm_physmem[] array
785 	 * for us.
786 	 */
787 	avail_start = ptoa(VM_PHYSMEM_PTR(0)->start);
788 	avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end);
789 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
790 
791 #if 0
792 	printf("avail_start = 0x%lx\n", avail_start);
793 	printf("avail_end = 0x%lx\n", avail_end);
794 	printf("virtual_end = 0x%lx\n", virtual_end);
795 #endif
796 
797 	/*
798 	 * Allocate a level 1 PTE table for the kernel.
799 	 * This is always one page long.
800 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
801 	 */
802 	kernel_lev1map = (pt_entry_t *)
803 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
804 
805 	/*
806 	 * Allocate a level 2 PTE table for the kernel.
807 	 * These must map all of the level3 PTEs.
808 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
809 	 */
810 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
811 	lev2map = (pt_entry_t *)
812 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
813 
814 	/*
815 	 * Allocate a level 3 PTE table for the kernel.
816 	 * Contains lev3mapsize PTEs.
817 	 */
818 	lev3map = (pt_entry_t *)
819 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
820 
821 	/*
822 	 * Set up level 1 page table
823 	 */
824 
825 	/* Map all of the level 2 pte pages */
826 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
827 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
828 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
829 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
830 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
831 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
832 	}
833 
834 	/* Map the virtual page table */
835 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
836 	    << PG_SHIFT;
837 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
838 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
839 	VPT = (pt_entry_t *)VPTBASE;
840 
841 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
842     {
843 	extern pt_entry_t prom_pte;			/* XXX */
844 	extern int prom_mapped;				/* XXX */
845 
846 	if (pmap_uses_prom_console()) {
847 		/*
848 		 * XXX Save old PTE so we can remap the PROM, if
849 		 * XXX necessary.
850 		 */
851 		prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;
852 	}
853 	prom_mapped = 0;
854 
855 	/*
856 	 * Actually, this code lies.  The prom is still mapped, and will
857 	 * remain so until the context switch after alpha_init() returns.
858 	 */
859     }
860 #endif
861 
862 	/*
863 	 * Set up level 2 page table.
864 	 */
865 	/* Map all of the level 3 pte pages */
866 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
867 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
868 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
869 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
870 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
871 		    (i*PAGE_SIZE*NPTEPG))] = pte;
872 	}
873 
874 	/* Initialize the pmap_growkernel_lock. */
875 	rw_init(&pmap_growkernel_lock);
876 
877 	/*
878 	 * Set up level three page table (lev3map)
879 	 */
880 	/* Nothing to do; it's already zero'd */
881 
882 	/*
883 	 * Initialize the pmap pools and list.
884 	 */
885 	pmap_ncpuids = ncpuids;
886 	pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0,
887 	    0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL);
888 	pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt",
889 	    &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL);
890 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
891 	    PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL,
892 	    NULL, NULL);
893 
894 	TAILQ_INIT(&pmap_all_pmaps);
895 
896 	/*
897 	 * Initialize the ASN logic.
898 	 */
899 	pmap_max_asn = maxasn;
900 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
901 		pmap_asn_info[i].pma_asn = 1;
902 		pmap_asn_info[i].pma_asngen = 0;
903 	}
904 
905 	/*
906 	 * Initialize the locks.
907 	 */
908 	rw_init(&pmap_main_lock);
909 	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
910 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
911 		mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE);
912 	}
913 
914 	/*
915 	 * Initialize kernel pmap.  Note that all kernel mappings
916 	 * have PG_ASM set, so the ASN doesn't really matter for
917 	 * the kernel pmap.  Also, since the kernel pmap always
918 	 * references kernel_lev1map, it always has an invalid ASN
919 	 * generation.
920 	 */
921 	memset(pmap_kernel(), 0, sizeof(struct pmap));
922 	pmap_kernel()->pm_lev1map = kernel_lev1map;
923 	pmap_kernel()->pm_count = 1;
924 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
925 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
926 		pmap_kernel()->pm_asni[i].pma_asngen =
927 		    pmap_asn_info[i].pma_asngen;
928 	}
929 	mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE);
930 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
931 
932 #if defined(MULTIPROCESSOR)
933 	/*
934 	 * Initialize the TLB shootdown queues.
935 	 */
936 	pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache,
937 	    sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE,
938 	     0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL);
939 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
940 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
941 		mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT,
942 		    IPL_SCHED);
943 	}
944 #endif
945 
946 	/*
947 	 * Set up lwp0's PCB such that the ptbr points to the right place
948 	 * and has the kernel pmap's (really unused) ASN.
949 	 */
950 	pcb = lwp_getpcb(&lwp0);
951 	pcb->pcb_hw.apcb_ptbr =
952 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
953 	pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn;
954 
955 	/*
956 	 * Mark the kernel pmap `active' on this processor.
957 	 */
958 	atomic_or_ulong(&pmap_kernel()->pm_cpus,
959 	    (1UL << cpu_number()));
960 }
961 
962 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
963 int
964 pmap_uses_prom_console(void)
965 {
966 
967 	return (cputype == ST_DEC_21000);
968 }
969 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */
970 
971 /*
972  * pmap_virtual_space:		[ INTERFACE ]
973  *
974  *	Define the initial bounds of the kernel virtual address space.
975  */
976 void
977 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
978 {
979 
980 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
981 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
982 }
983 
984 /*
985  * pmap_steal_memory:		[ INTERFACE ]
986  *
987  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
988  *	This function allows for early dynamic memory allocation until the
989  *	virtual memory system has been bootstrapped.  After that point, either
990  *	kmem_alloc or malloc should be used.  This function works by stealing
991  *	pages from the (to be) managed page pool, then implicitly mapping the
992  *	pages (by using their k0seg addresses) and zeroing them.
993  *
994  *	It may be used once the physical memory segments have been pre-loaded
995  *	into the vm_physmem[] array.  Early memory allocation MUST use this
996  *	interface!  This cannot be used after vm_page_startup(), and will
997  *	generate a panic if tried.
998  *
999  *	Note that this memory will never be freed, and in essence it is wired
1000  *	down.
1001  *
1002  *	We must adjust *vstartp and/or *vendp iff we use address space
1003  *	from the kernel virtual address range defined by pmap_virtual_space().
1004  *
1005  *	Note: no locking is necessary in this function.
1006  */
1007 vaddr_t
1008 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
1009 {
1010 	int bank, npgs, x;
1011 	vaddr_t va;
1012 	paddr_t pa;
1013 
1014 	size = round_page(size);
1015 	npgs = atop(size);
1016 
1017 #if 0
1018 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
1019 #endif
1020 
1021 	for (bank = 0; bank < vm_nphysseg; bank++) {
1022 		if (uvm.page_init_done == true)
1023 			panic("pmap_steal_memory: called _after_ bootstrap");
1024 
1025 #if 0
1026 		printf("     bank %d: avail_start 0x%lx, start 0x%lx, "
1027 		    "avail_end 0x%lx\n", bank, VM_PHYSMEM_PTR(bank)->avail_start,
1028 		    VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end);
1029 #endif
1030 
1031 		if (VM_PHYSMEM_PTR(bank)->avail_start != VM_PHYSMEM_PTR(bank)->start ||
1032 		    VM_PHYSMEM_PTR(bank)->avail_start >= VM_PHYSMEM_PTR(bank)->avail_end)
1033 			continue;
1034 
1035 #if 0
1036 		printf("             avail_end - avail_start = 0x%lx\n",
1037 		    VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start);
1038 #endif
1039 
1040 		if ((VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start)
1041 		    < npgs)
1042 			continue;
1043 
1044 		/*
1045 		 * There are enough pages here; steal them!
1046 		 */
1047 		pa = ptoa(VM_PHYSMEM_PTR(bank)->avail_start);
1048 		VM_PHYSMEM_PTR(bank)->avail_start += npgs;
1049 		VM_PHYSMEM_PTR(bank)->start += npgs;
1050 
1051 		/*
1052 		 * Have we used up this segment?
1053 		 */
1054 		if (VM_PHYSMEM_PTR(bank)->avail_start == VM_PHYSMEM_PTR(bank)->end) {
1055 			if (vm_nphysseg == 1)
1056 				panic("pmap_steal_memory: out of memory!");
1057 
1058 			/* Remove this segment from the list. */
1059 			vm_nphysseg--;
1060 			for (x = bank; x < vm_nphysseg; x++) {
1061 				/* structure copy */
1062 				VM_PHYSMEM_PTR_SWAP(x, x + 1);
1063 			}
1064 		}
1065 
1066 		va = ALPHA_PHYS_TO_K0SEG(pa);
1067 		memset((void *)va, 0, size);
1068 		pmap_pages_stolen += npgs;
1069 		return (va);
1070 	}
1071 
1072 	/*
1073 	 * If we got here, this was no memory left.
1074 	 */
1075 	panic("pmap_steal_memory: no memory to steal");
1076 }
1077 
1078 /*
1079  * pmap_init:			[ INTERFACE ]
1080  *
1081  *	Initialize the pmap module.  Called by vm_init(), to initialize any
1082  *	structures that the pmap system needs to map virtual memory.
1083  *
1084  *	Note: no locking is necessary in this function.
1085  */
1086 void
1087 pmap_init(void)
1088 {
1089 
1090 #ifdef DEBUG
1091 	if (pmapdebug & PDB_FOLLOW)
1092 	        printf("pmap_init()\n");
1093 #endif
1094 
1095 	/* initialize protection array */
1096 	alpha_protection_init();
1097 
1098 	/*
1099 	 * Set a low water mark on the pv_entry pool, so that we are
1100 	 * more likely to have these around even in extreme memory
1101 	 * starvation.
1102 	 */
1103 	pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat);
1104 
1105 	/*
1106 	 * Now it is safe to enable pv entry recording.
1107 	 */
1108 	pmap_initialized = true;
1109 
1110 #if 0
1111 	for (bank = 0; bank < vm_nphysseg; bank++) {
1112 		printf("bank %d\n", bank);
1113 		printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start));
1114 		printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end));
1115 		printf("\tavail_start = 0x%x\n",
1116 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_start));
1117 		printf("\tavail_end = 0x%x\n",
1118 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_end));
1119 	}
1120 #endif
1121 }
1122 
1123 /*
1124  * pmap_create:			[ INTERFACE ]
1125  *
1126  *	Create and return a physical map.
1127  *
1128  *	Note: no locking is necessary in this function.
1129  */
1130 pmap_t
1131 pmap_create(void)
1132 {
1133 	pmap_t pmap;
1134 	int i;
1135 
1136 #ifdef DEBUG
1137 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1138 		printf("pmap_create()\n");
1139 #endif
1140 
1141 	pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK);
1142 	memset(pmap, 0, sizeof(*pmap));
1143 
1144 	/*
1145 	 * Defer allocation of a new level 1 page table until
1146 	 * the first new mapping is entered; just take a reference
1147 	 * to the kernel kernel_lev1map.
1148 	 */
1149 	pmap->pm_lev1map = kernel_lev1map;
1150 
1151 	pmap->pm_count = 1;
1152 	for (i = 0; i < pmap_ncpuids; i++) {
1153 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1154 		/* XXX Locking? */
1155 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1156 	}
1157 	mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
1158 
1159  try_again:
1160 	rw_enter(&pmap_growkernel_lock, RW_READER);
1161 
1162 	if (pmap_lev1map_create(pmap, cpu_number()) != 0) {
1163 		rw_exit(&pmap_growkernel_lock);
1164 		(void) kpause("pmap_create", false, hz >> 2, NULL);
1165 		goto try_again;
1166 	}
1167 
1168 	mutex_enter(&pmap_all_pmaps_lock);
1169 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1170 	mutex_exit(&pmap_all_pmaps_lock);
1171 
1172 	rw_exit(&pmap_growkernel_lock);
1173 
1174 	return (pmap);
1175 }
1176 
1177 /*
1178  * pmap_destroy:		[ INTERFACE ]
1179  *
1180  *	Drop the reference count on the specified pmap, releasing
1181  *	all resources if the reference count drops to zero.
1182  */
1183 void
1184 pmap_destroy(pmap_t pmap)
1185 {
1186 
1187 #ifdef DEBUG
1188 	if (pmapdebug & PDB_FOLLOW)
1189 		printf("pmap_destroy(%p)\n", pmap);
1190 #endif
1191 
1192 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
1193 		return;
1194 
1195 	rw_enter(&pmap_growkernel_lock, RW_READER);
1196 
1197 	/*
1198 	 * Remove it from the global list of all pmaps.
1199 	 */
1200 	mutex_enter(&pmap_all_pmaps_lock);
1201 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1202 	mutex_exit(&pmap_all_pmaps_lock);
1203 
1204 	pmap_lev1map_destroy(pmap, cpu_number());
1205 
1206 	rw_exit(&pmap_growkernel_lock);
1207 
1208 	/*
1209 	 * Since the pmap is supposed to contain no valid
1210 	 * mappings at this point, we should always see
1211 	 * kernel_lev1map here.
1212 	 */
1213 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
1214 
1215 	mutex_destroy(&pmap->pm_lock);
1216 	pool_cache_put(&pmap_pmap_cache, pmap);
1217 }
1218 
1219 /*
1220  * pmap_reference:		[ INTERFACE ]
1221  *
1222  *	Add a reference to the specified pmap.
1223  */
1224 void
1225 pmap_reference(pmap_t pmap)
1226 {
1227 
1228 #ifdef DEBUG
1229 	if (pmapdebug & PDB_FOLLOW)
1230 		printf("pmap_reference(%p)\n", pmap);
1231 #endif
1232 
1233 	atomic_inc_uint(&pmap->pm_count);
1234 }
1235 
1236 /*
1237  * pmap_remove:			[ INTERFACE ]
1238  *
1239  *	Remove the given range of addresses from the specified map.
1240  *
1241  *	It is assumed that the start and end are properly
1242  *	rounded to the page size.
1243  */
1244 void
1245 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1246 {
1247 	pt_entry_t *l1pte, *l2pte, *l3pte;
1248 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1249 	vaddr_t l1eva, l2eva, vptva;
1250 	bool needisync = false;
1251 	long cpu_id = cpu_number();
1252 
1253 #ifdef DEBUG
1254 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1255 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1256 #endif
1257 
1258 	/*
1259 	 * If this is the kernel pmap, we can use a faster method
1260 	 * for accessing the PTEs (since the PT pages are always
1261 	 * resident).
1262 	 *
1263 	 * Note that this routine should NEVER be called from an
1264 	 * interrupt context; pmap_kremove() is used for that.
1265 	 */
1266 	if (pmap == pmap_kernel()) {
1267 		PMAP_MAP_TO_HEAD_LOCK();
1268 		PMAP_LOCK(pmap);
1269 
1270 		while (sva < eva) {
1271 			l3pte = PMAP_KERNEL_PTE(sva);
1272 			if (pmap_pte_v(l3pte)) {
1273 #ifdef DIAGNOSTIC
1274 				if (uvm_pageismanaged(pmap_pte_pa(l3pte)) &&
1275 				    pmap_pte_pv(l3pte) == 0)
1276 					panic("pmap_remove: managed page "
1277 					    "without PG_PVLIST for 0x%lx",
1278 					    sva);
1279 #endif
1280 				needisync |= pmap_remove_mapping(pmap, sva,
1281 				    l3pte, true, cpu_id);
1282 			}
1283 			sva += PAGE_SIZE;
1284 		}
1285 
1286 		PMAP_UNLOCK(pmap);
1287 		PMAP_MAP_TO_HEAD_UNLOCK();
1288 
1289 		if (needisync)
1290 			PMAP_SYNC_ISTREAM_KERNEL();
1291 		return;
1292 	}
1293 
1294 #ifdef DIAGNOSTIC
1295 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1296 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1297 		    "address range", sva, eva);
1298 #endif
1299 
1300 	PMAP_MAP_TO_HEAD_LOCK();
1301 	PMAP_LOCK(pmap);
1302 
1303 	/*
1304 	 * If we're already referencing the kernel_lev1map, there
1305 	 * is no work for us to do.
1306 	 */
1307 	if (pmap->pm_lev1map == kernel_lev1map)
1308 		goto out;
1309 
1310 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1311 
1312 	/*
1313 	 * Add a reference to the L1 table to it won't get
1314 	 * removed from under us.
1315 	 */
1316 	pmap_physpage_addref(saved_l1pte);
1317 
1318 	for (; sva < eva; sva = l1eva, l1pte++) {
1319 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1320 		if (pmap_pte_v(l1pte)) {
1321 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1322 
1323 			/*
1324 			 * Add a reference to the L2 table so it won't
1325 			 * get removed from under us.
1326 			 */
1327 			pmap_physpage_addref(saved_l2pte);
1328 
1329 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1330 				l2eva =
1331 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1332 				if (pmap_pte_v(l2pte)) {
1333 					saved_l3pte = l3pte =
1334 					    pmap_l3pte(pmap, sva, l2pte);
1335 
1336 					/*
1337 					 * Add a reference to the L3 table so
1338 					 * it won't get removed from under us.
1339 					 */
1340 					pmap_physpage_addref(saved_l3pte);
1341 
1342 					/*
1343 					 * Remember this sva; if the L3 table
1344 					 * gets removed, we need to invalidate
1345 					 * the VPT TLB entry for it.
1346 					 */
1347 					vptva = sva;
1348 
1349 					for (; sva < l2eva && sva < eva;
1350 					     sva += PAGE_SIZE, l3pte++) {
1351 						if (!pmap_pte_v(l3pte)) {
1352 							continue;
1353 						}
1354 						needisync |=
1355 						    pmap_remove_mapping(
1356 							pmap, sva,
1357 							l3pte, true,
1358 							cpu_id);
1359 					}
1360 
1361 					/*
1362 					 * Remove the reference to the L3
1363 					 * table that we added above.  This
1364 					 * may free the L3 table.
1365 					 */
1366 					pmap_l3pt_delref(pmap, vptva,
1367 					    saved_l3pte, cpu_id);
1368 				}
1369 			}
1370 
1371 			/*
1372 			 * Remove the reference to the L2 table that we
1373 			 * added above.  This may free the L2 table.
1374 			 */
1375 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id);
1376 		}
1377 	}
1378 
1379 	/*
1380 	 * Remove the reference to the L1 table that we added above.
1381 	 * This may free the L1 table.
1382 	 */
1383 	pmap_l1pt_delref(pmap, saved_l1pte, cpu_id);
1384 
1385 	if (needisync)
1386 		PMAP_SYNC_ISTREAM_USER(pmap);
1387 
1388  out:
1389 	PMAP_UNLOCK(pmap);
1390 	PMAP_MAP_TO_HEAD_UNLOCK();
1391 }
1392 
1393 /*
1394  * pmap_page_protect:		[ INTERFACE ]
1395  *
1396  *	Lower the permission for all mappings to a given page to
1397  *	the permissions specified.
1398  */
1399 void
1400 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1401 {
1402 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1403 	pmap_t pmap;
1404 	pv_entry_t pv, nextpv;
1405 	bool needkisync = false;
1406 	long cpu_id = cpu_number();
1407 	kmutex_t *lock;
1408 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1409 #ifdef DEBUG
1410 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1411 
1412 
1413 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1414 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1415 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1416 #endif
1417 
1418 	switch (prot) {
1419 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
1420 	case VM_PROT_READ|VM_PROT_WRITE:
1421 		return;
1422 
1423 	/* copy_on_write */
1424 	case VM_PROT_READ|VM_PROT_EXECUTE:
1425 	case VM_PROT_READ:
1426 		PMAP_HEAD_TO_MAP_LOCK();
1427 		lock = pmap_pvh_lock(pg);
1428 		mutex_enter(lock);
1429 		for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
1430 			PMAP_LOCK(pv->pv_pmap);
1431 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1432 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1433 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1434 				    pmap_pte_asm(pv->pv_pte),
1435 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1436 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1437 				    pmap_pte_asm(pv->pv_pte));
1438 			}
1439 			PMAP_UNLOCK(pv->pv_pmap);
1440 		}
1441 		mutex_exit(lock);
1442 		PMAP_HEAD_TO_MAP_UNLOCK();
1443 		PMAP_TLB_SHOOTNOW();
1444 		return;
1445 
1446 	/* remove_all */
1447 	default:
1448 		break;
1449 	}
1450 
1451 	PMAP_HEAD_TO_MAP_LOCK();
1452 	lock = pmap_pvh_lock(pg);
1453 	mutex_enter(lock);
1454 	for (pv = md->pvh_list; pv != NULL; pv = nextpv) {
1455 		nextpv = pv->pv_next;
1456 		pmap = pv->pv_pmap;
1457 
1458 		PMAP_LOCK(pmap);
1459 #ifdef DEBUG
1460 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1461 		    pmap_pte_pa(pv->pv_pte) != pa)
1462 			panic("pmap_page_protect: bad mapping");
1463 #endif
1464 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1465 		    false, cpu_id) == true) {
1466 			if (pmap == pmap_kernel())
1467 				needkisync |= true;
1468 			else
1469 				PMAP_SYNC_ISTREAM_USER(pmap);
1470 		}
1471 		PMAP_UNLOCK(pmap);
1472 	}
1473 
1474 	if (needkisync)
1475 		PMAP_SYNC_ISTREAM_KERNEL();
1476 
1477 	mutex_exit(lock);
1478 	PMAP_HEAD_TO_MAP_UNLOCK();
1479 }
1480 
1481 /*
1482  * pmap_protect:		[ INTERFACE ]
1483  *
1484  *	Set the physical protection on the specified range of this map
1485  *	as requested.
1486  */
1487 void
1488 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1489 {
1490 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1491 	bool isactive;
1492 	bool hadasm;
1493 	vaddr_t l1eva, l2eva;
1494 	long cpu_id = cpu_number();
1495 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1496 
1497 #ifdef DEBUG
1498 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1499 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1500 		    pmap, sva, eva, prot);
1501 #endif
1502 
1503 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1504 		pmap_remove(pmap, sva, eva);
1505 		return;
1506 	}
1507 
1508 	PMAP_LOCK(pmap);
1509 
1510 	bits = pte_prot(pmap, prot);
1511 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1512 
1513 	l1pte = pmap_l1pte(pmap, sva);
1514 	for (; sva < eva; sva = l1eva, l1pte++) {
1515 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1516 		if (pmap_pte_v(l1pte)) {
1517 			l2pte = pmap_l2pte(pmap, sva, l1pte);
1518 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1519 				l2eva =
1520 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1521 				if (pmap_pte_v(l2pte)) {
1522 					l3pte = pmap_l3pte(pmap, sva, l2pte);
1523 					for (; sva < l2eva && sva < eva;
1524 					     sva += PAGE_SIZE, l3pte++) {
1525 						if (pmap_pte_v(l3pte) &&
1526 						    pmap_pte_prot_chg(l3pte,
1527 						    bits)) {
1528 							hadasm =
1529 							   (pmap_pte_asm(l3pte)
1530 							    != 0);
1531 							pmap_pte_set_prot(l3pte,
1532 							   bits);
1533 							PMAP_INVALIDATE_TLB(
1534 							   pmap, sva, hadasm,
1535 							   isactive, cpu_id);
1536 							PMAP_TLB_SHOOTDOWN(
1537 							   pmap, sva,
1538 							   hadasm ? PG_ASM : 0);
1539 						}
1540 					}
1541 				}
1542 			}
1543 		}
1544 	}
1545 
1546 	PMAP_TLB_SHOOTNOW();
1547 
1548 	if (prot & VM_PROT_EXECUTE)
1549 		PMAP_SYNC_ISTREAM(pmap);
1550 
1551 	PMAP_UNLOCK(pmap);
1552 }
1553 
1554 /*
1555  * pmap_enter:			[ INTERFACE ]
1556  *
1557  *	Insert the given physical page (p) at
1558  *	the specified virtual address (v) in the
1559  *	target physical map with the protection requested.
1560  *
1561  *	If specified, the page will be wired down, meaning
1562  *	that the related pte can not be reclaimed.
1563  *
1564  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1565  *	or lose information.  That is, this routine must actually
1566  *	insert this page into the given map NOW.
1567  */
1568 int
1569 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1570 {
1571 	struct vm_page *pg;			/* if != NULL, managed page */
1572 	pt_entry_t *pte, npte, opte;
1573 	paddr_t opa;
1574 	bool tflush = true;
1575 	bool hadasm = false;	/* XXX gcc -Wuninitialized */
1576 	bool needisync = false;
1577 	bool setisync = false;
1578 	bool isactive;
1579 	bool wired;
1580 	long cpu_id = cpu_number();
1581 	int error = 0;
1582 	kmutex_t *lock;
1583 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1584 
1585 #ifdef DEBUG
1586 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1587 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1588 		       pmap, va, pa, prot, flags);
1589 #endif
1590 	pg = PHYS_TO_VM_PAGE(pa);
1591 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1592 	wired = (flags & PMAP_WIRED) != 0;
1593 
1594 	/*
1595 	 * Determine what we need to do about the I-stream.  If
1596 	 * VM_PROT_EXECUTE is set, we mark a user pmap as needing
1597 	 * an I-sync on the way back out to userspace.  We always
1598 	 * need an immediate I-sync for the kernel pmap.
1599 	 */
1600 	if (prot & VM_PROT_EXECUTE) {
1601 		if (pmap == pmap_kernel())
1602 			needisync = true;
1603 		else {
1604 			setisync = true;
1605 			needisync = (pmap->pm_cpus != 0);
1606 		}
1607 	}
1608 
1609 	PMAP_MAP_TO_HEAD_LOCK();
1610 	PMAP_LOCK(pmap);
1611 
1612 	if (pmap == pmap_kernel()) {
1613 #ifdef DIAGNOSTIC
1614 		/*
1615 		 * Sanity check the virtual address.
1616 		 */
1617 		if (va < VM_MIN_KERNEL_ADDRESS)
1618 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1619 #endif
1620 		pte = PMAP_KERNEL_PTE(va);
1621 	} else {
1622 		pt_entry_t *l1pte, *l2pte;
1623 
1624 #ifdef DIAGNOSTIC
1625 		/*
1626 		 * Sanity check the virtual address.
1627 		 */
1628 		if (va >= VM_MAXUSER_ADDRESS)
1629 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1630 #endif
1631 
1632 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
1633 
1634 		/*
1635 		 * Check to see if the level 1 PTE is valid, and
1636 		 * allocate a new level 2 page table page if it's not.
1637 		 * A reference will be added to the level 2 table when
1638 		 * the level 3 table is created.
1639 		 */
1640 		l1pte = pmap_l1pte(pmap, va);
1641 		if (pmap_pte_v(l1pte) == 0) {
1642 			pmap_physpage_addref(l1pte);
1643 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1644 			if (error) {
1645 				pmap_l1pt_delref(pmap, l1pte, cpu_id);
1646 				if (flags & PMAP_CANFAIL)
1647 					goto out;
1648 				panic("pmap_enter: unable to create L2 PT "
1649 				    "page");
1650 			}
1651 #ifdef DEBUG
1652 			if (pmapdebug & PDB_PTPAGE)
1653 				printf("pmap_enter: new level 2 table at "
1654 				    "0x%lx\n", pmap_pte_pa(l1pte));
1655 #endif
1656 		}
1657 
1658 		/*
1659 		 * Check to see if the level 2 PTE is valid, and
1660 		 * allocate a new level 3 page table page if it's not.
1661 		 * A reference will be added to the level 3 table when
1662 		 * the mapping is validated.
1663 		 */
1664 		l2pte = pmap_l2pte(pmap, va, l1pte);
1665 		if (pmap_pte_v(l2pte) == 0) {
1666 			pmap_physpage_addref(l2pte);
1667 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1668 			if (error) {
1669 				pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
1670 				if (flags & PMAP_CANFAIL)
1671 					goto out;
1672 				panic("pmap_enter: unable to create L3 PT "
1673 				    "page");
1674 			}
1675 #ifdef DEBUG
1676 			if (pmapdebug & PDB_PTPAGE)
1677 				printf("pmap_enter: new level 3 table at "
1678 				    "0x%lx\n", pmap_pte_pa(l2pte));
1679 #endif
1680 		}
1681 
1682 		/*
1683 		 * Get the PTE that will map the page.
1684 		 */
1685 		pte = pmap_l3pte(pmap, va, l2pte);
1686 	}
1687 
1688 	/* Remember all of the old PTE; used for TBI check later. */
1689 	opte = *pte;
1690 
1691 	/*
1692 	 * Check to see if the old mapping is valid.  If not, validate the
1693 	 * new one immediately.
1694 	 */
1695 	if (pmap_pte_v(pte) == 0) {
1696 		/*
1697 		 * No need to invalidate the TLB in this case; an invalid
1698 		 * mapping won't be in the TLB, and a previously valid
1699 		 * mapping would have been flushed when it was invalidated.
1700 		 */
1701 		tflush = false;
1702 
1703 		/*
1704 		 * No need to synchronize the I-stream, either, for basically
1705 		 * the same reason.
1706 		 */
1707 		setisync = needisync = false;
1708 
1709 		if (pmap != pmap_kernel()) {
1710 			/*
1711 			 * New mappings gain a reference on the level 3
1712 			 * table.
1713 			 */
1714 			pmap_physpage_addref(pte);
1715 		}
1716 		goto validate_enterpv;
1717 	}
1718 
1719 	opa = pmap_pte_pa(pte);
1720 	hadasm = (pmap_pte_asm(pte) != 0);
1721 
1722 	if (opa == pa) {
1723 		/*
1724 		 * Mapping has not changed; must be a protection or
1725 		 * wiring change.
1726 		 */
1727 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1728 #ifdef DEBUG
1729 			if (pmapdebug & PDB_ENTER)
1730 				printf("pmap_enter: wiring change -> %d\n",
1731 				    wired);
1732 #endif
1733 			/*
1734 			 * Adjust the wiring count.
1735 			 */
1736 			if (wired)
1737 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1738 			else
1739 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1740 		}
1741 
1742 		/*
1743 		 * Set the PTE.
1744 		 */
1745 		goto validate;
1746 	}
1747 
1748 	/*
1749 	 * The mapping has changed.  We need to invalidate the
1750 	 * old mapping before creating the new one.
1751 	 */
1752 #ifdef DEBUG
1753 	if (pmapdebug & PDB_ENTER)
1754 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1755 #endif
1756 	if (pmap != pmap_kernel()) {
1757 		/*
1758 		 * Gain an extra reference on the level 3 table.
1759 		 * pmap_remove_mapping() will delete a reference,
1760 		 * and we don't want the table to be erroneously
1761 		 * freed.
1762 		 */
1763 		pmap_physpage_addref(pte);
1764 	}
1765 	needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id);
1766 
1767  validate_enterpv:
1768 	/*
1769 	 * Enter the mapping into the pv_table if appropriate.
1770 	 */
1771 	if (pg != NULL) {
1772 		error = pmap_pv_enter(pmap, pg, va, pte, true);
1773 		if (error) {
1774 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1775 			if (flags & PMAP_CANFAIL)
1776 				goto out;
1777 			panic("pmap_enter: unable to enter mapping in PV "
1778 			    "table");
1779 		}
1780 	}
1781 
1782 	/*
1783 	 * Increment counters.
1784 	 */
1785 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1786 	if (wired)
1787 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1788 
1789  validate:
1790 	/*
1791 	 * Build the new PTE.
1792 	 */
1793 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1794 	if (pg != NULL) {
1795 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1796 		int attrs;
1797 
1798 #ifdef DIAGNOSTIC
1799 		if ((flags & VM_PROT_ALL) & ~prot)
1800 			panic("pmap_enter: access type exceeds prot");
1801 #endif
1802 		lock = pmap_pvh_lock(pg);
1803 		mutex_enter(lock);
1804 		if (flags & VM_PROT_WRITE)
1805 			md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
1806 		else if (flags & VM_PROT_ALL)
1807 			md->pvh_attrs |= PGA_REFERENCED;
1808 		attrs = md->pvh_attrs;
1809 		mutex_exit(lock);
1810 
1811 		/*
1812 		 * Set up referenced/modified emulation for new mapping.
1813 		 */
1814 		if ((attrs & PGA_REFERENCED) == 0)
1815 			npte |= PG_FOR | PG_FOW | PG_FOE;
1816 		else if ((attrs & PGA_MODIFIED) == 0)
1817 			npte |= PG_FOW;
1818 
1819 		/*
1820 		 * Mapping was entered on PV list.
1821 		 */
1822 		npte |= PG_PVLIST;
1823 	}
1824 	if (wired)
1825 		npte |= PG_WIRED;
1826 #ifdef DEBUG
1827 	if (pmapdebug & PDB_ENTER)
1828 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1829 #endif
1830 
1831 	/*
1832 	 * If the PALcode portion of the new PTE is the same as the
1833 	 * old PTE, no TBI is necessary.
1834 	 */
1835 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1836 		tflush = false;
1837 
1838 	/*
1839 	 * Set the new PTE.
1840 	 */
1841 	PMAP_SET_PTE(pte, npte);
1842 
1843 	/*
1844 	 * Invalidate the TLB entry for this VA and any appropriate
1845 	 * caches.
1846 	 */
1847 	if (tflush) {
1848 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1849 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1850 		PMAP_TLB_SHOOTNOW();
1851 	}
1852 	if (setisync)
1853 		PMAP_SET_NEEDISYNC(pmap);
1854 	if (needisync)
1855 		PMAP_SYNC_ISTREAM(pmap);
1856 
1857 out:
1858 	PMAP_UNLOCK(pmap);
1859 	PMAP_MAP_TO_HEAD_UNLOCK();
1860 
1861 	return error;
1862 }
1863 
1864 /*
1865  * pmap_kenter_pa:		[ INTERFACE ]
1866  *
1867  *	Enter a va -> pa mapping into the kernel pmap without any
1868  *	physical->virtual tracking.
1869  *
1870  *	Note: no locking is necessary in this function.
1871  */
1872 void
1873 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1874 {
1875 	pt_entry_t *pte, npte;
1876 	long cpu_id = cpu_number();
1877 	bool needisync = false;
1878 	pmap_t pmap = pmap_kernel();
1879 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1880 
1881 #ifdef DEBUG
1882 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1883 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1884 		    va, pa, prot);
1885 #endif
1886 
1887 #ifdef DIAGNOSTIC
1888 	/*
1889 	 * Sanity check the virtual address.
1890 	 */
1891 	if (va < VM_MIN_KERNEL_ADDRESS)
1892 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1893 #endif
1894 
1895 	pte = PMAP_KERNEL_PTE(va);
1896 
1897 	if (pmap_pte_v(pte) == 0)
1898 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1899 	if (pmap_pte_w(pte) == 0)
1900 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1901 
1902 	if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte))
1903 		needisync = true;
1904 
1905 	/*
1906 	 * Build the new PTE.
1907 	 */
1908 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1909 	    PG_V | PG_WIRED;
1910 
1911 	/*
1912 	 * Set the new PTE.
1913 	 */
1914 	PMAP_SET_PTE(pte, npte);
1915 #if defined(MULTIPROCESSOR)
1916 	alpha_mb();		/* XXX alpha_wmb()? */
1917 #endif
1918 
1919 	/*
1920 	 * Invalidate the TLB entry for this VA and any appropriate
1921 	 * caches.
1922 	 */
1923 	PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1924 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1925 	PMAP_TLB_SHOOTNOW();
1926 
1927 	if (needisync)
1928 		PMAP_SYNC_ISTREAM_KERNEL();
1929 }
1930 
1931 /*
1932  * pmap_kremove:		[ INTERFACE ]
1933  *
1934  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1935  *	for size bytes (assumed to be page rounded).
1936  */
1937 void
1938 pmap_kremove(vaddr_t va, vsize_t size)
1939 {
1940 	pt_entry_t *pte;
1941 	bool needisync = false;
1942 	long cpu_id = cpu_number();
1943 	pmap_t pmap = pmap_kernel();
1944 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1945 
1946 #ifdef DEBUG
1947 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1948 		printf("pmap_kremove(%lx, %lx)\n",
1949 		    va, size);
1950 #endif
1951 
1952 #ifdef DIAGNOSTIC
1953 	if (va < VM_MIN_KERNEL_ADDRESS)
1954 		panic("pmap_kremove: user address");
1955 #endif
1956 
1957 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1958 		pte = PMAP_KERNEL_PTE(va);
1959 		if (pmap_pte_v(pte)) {
1960 #ifdef DIAGNOSTIC
1961 			if (pmap_pte_pv(pte))
1962 				panic("pmap_kremove: PG_PVLIST mapping for "
1963 				    "0x%lx", va);
1964 #endif
1965 			if (pmap_pte_exec(pte))
1966 				needisync = true;
1967 
1968 			/* Zap the mapping. */
1969 			PMAP_SET_PTE(pte, PG_NV);
1970 #if defined(MULTIPROCESSOR)
1971 			alpha_mb();		/* XXX alpha_wmb()? */
1972 #endif
1973 			PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1974 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1975 
1976 			/* Update stats. */
1977 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1978 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1979 		}
1980 	}
1981 
1982 	PMAP_TLB_SHOOTNOW();
1983 
1984 	if (needisync)
1985 		PMAP_SYNC_ISTREAM_KERNEL();
1986 }
1987 
1988 /*
1989  * pmap_unwire:			[ INTERFACE ]
1990  *
1991  *	Clear the wired attribute for a map/virtual-address pair.
1992  *
1993  *	The mapping must already exist in the pmap.
1994  */
1995 void
1996 pmap_unwire(pmap_t pmap, vaddr_t va)
1997 {
1998 	pt_entry_t *pte;
1999 
2000 #ifdef DEBUG
2001 	if (pmapdebug & PDB_FOLLOW)
2002 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
2003 #endif
2004 
2005 	PMAP_LOCK(pmap);
2006 
2007 	pte = pmap_l3pte(pmap, va, NULL);
2008 #ifdef DIAGNOSTIC
2009 	if (pte == NULL || pmap_pte_v(pte) == 0)
2010 		panic("pmap_unwire");
2011 #endif
2012 
2013 	/*
2014 	 * If wiring actually changed (always?) clear the wire bit and
2015 	 * update the wire count.  Note that wiring is not a hardware
2016 	 * characteristic so there is no need to invalidate the TLB.
2017 	 */
2018 	if (pmap_pte_w_chg(pte, 0)) {
2019 		pmap_pte_set_w(pte, false);
2020 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2021 	}
2022 #ifdef DIAGNOSTIC
2023 	else {
2024 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2025 		    "didn't change!\n", pmap, va);
2026 	}
2027 #endif
2028 
2029 	PMAP_UNLOCK(pmap);
2030 }
2031 
2032 /*
2033  * pmap_extract:		[ INTERFACE ]
2034  *
2035  *	Extract the physical address associated with the given
2036  *	pmap/virtual address pair.
2037  */
2038 bool
2039 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2040 {
2041 	pt_entry_t *l1pte, *l2pte, *l3pte;
2042 	paddr_t pa;
2043 
2044 #ifdef DEBUG
2045 	if (pmapdebug & PDB_FOLLOW)
2046 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
2047 #endif
2048 
2049 	/*
2050 	 * Take a faster path for the kernel pmap.  Avoids locking,
2051 	 * handles K0SEG.
2052 	 */
2053 	if (pmap == pmap_kernel()) {
2054 		pa = vtophys(va);
2055 		if (pap != NULL)
2056 			*pap = pa;
2057 #ifdef DEBUG
2058 		if (pmapdebug & PDB_FOLLOW)
2059 			printf("0x%lx (kernel vtophys)\n", pa);
2060 #endif
2061 		return (pa != 0);	/* XXX */
2062 	}
2063 
2064 	PMAP_LOCK(pmap);
2065 
2066 	l1pte = pmap_l1pte(pmap, va);
2067 	if (pmap_pte_v(l1pte) == 0)
2068 		goto out;
2069 
2070 	l2pte = pmap_l2pte(pmap, va, l1pte);
2071 	if (pmap_pte_v(l2pte) == 0)
2072 		goto out;
2073 
2074 	l3pte = pmap_l3pte(pmap, va, l2pte);
2075 	if (pmap_pte_v(l3pte) == 0)
2076 		goto out;
2077 
2078 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2079 	PMAP_UNLOCK(pmap);
2080 	if (pap != NULL)
2081 		*pap = pa;
2082 #ifdef DEBUG
2083 	if (pmapdebug & PDB_FOLLOW)
2084 		printf("0x%lx\n", pa);
2085 #endif
2086 	return (true);
2087 
2088  out:
2089 	PMAP_UNLOCK(pmap);
2090 #ifdef DEBUG
2091 	if (pmapdebug & PDB_FOLLOW)
2092 		printf("failed\n");
2093 #endif
2094 	return (false);
2095 }
2096 
2097 /*
2098  * pmap_copy:			[ INTERFACE ]
2099  *
2100  *	Copy the mapping range specified by src_addr/len
2101  *	from the source map to the range dst_addr/len
2102  *	in the destination map.
2103  *
2104  *	This routine is only advisory and need not do anything.
2105  */
2106 /* call deleted in <machine/pmap.h> */
2107 
2108 /*
2109  * pmap_update:			[ INTERFACE ]
2110  *
2111  *	Require that all active physical maps contain no
2112  *	incorrect entries NOW, by processing any deferred
2113  *	pmap operations.
2114  */
2115 /* call deleted in <machine/pmap.h> */
2116 
2117 /*
2118  * pmap_activate:		[ INTERFACE ]
2119  *
2120  *	Activate the pmap used by the specified process.  This includes
2121  *	reloading the MMU context if the current process, and marking
2122  *	the pmap in use by the processor.
2123  */
2124 void
2125 pmap_activate(struct lwp *l)
2126 {
2127 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2128 	long cpu_id = cpu_number();
2129 
2130 #ifdef DEBUG
2131 	if (pmapdebug & PDB_FOLLOW)
2132 		printf("pmap_activate(%p)\n", l);
2133 #endif
2134 
2135 	/* Mark the pmap in use by this processor. */
2136 	atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2137 
2138 	/* Allocate an ASN. */
2139 	pmap_asn_alloc(pmap, cpu_id);
2140 
2141 	PMAP_ACTIVATE(pmap, l, cpu_id);
2142 }
2143 
2144 /*
2145  * pmap_deactivate:		[ INTERFACE ]
2146  *
2147  *	Mark that the pmap used by the specified process is no longer
2148  *	in use by the processor.
2149  *
2150  *	The comment above pmap_activate() wrt. locking applies here,
2151  *	as well.  Note that we use only a single `atomic' operation,
2152  *	so no locking is necessary.
2153  */
2154 void
2155 pmap_deactivate(struct lwp *l)
2156 {
2157 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2158 
2159 #ifdef DEBUG
2160 	if (pmapdebug & PDB_FOLLOW)
2161 		printf("pmap_deactivate(%p)\n", l);
2162 #endif
2163 
2164 	/*
2165 	 * Mark the pmap no longer in use by this processor.
2166 	 */
2167 	atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number()));
2168 }
2169 
2170 /*
2171  * pmap_zero_page:		[ INTERFACE ]
2172  *
2173  *	Zero the specified (machine independent) page by mapping the page
2174  *	into virtual memory and clear its contents, one machine dependent
2175  *	page at a time.
2176  *
2177  *	Note: no locking is necessary in this function.
2178  */
2179 void
2180 pmap_zero_page(paddr_t phys)
2181 {
2182 	u_long *p0, *p1, *pend;
2183 
2184 #ifdef DEBUG
2185 	if (pmapdebug & PDB_FOLLOW)
2186 		printf("pmap_zero_page(%lx)\n", phys);
2187 #endif
2188 
2189 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2190 	p1 = NULL;
2191 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2192 
2193 	/*
2194 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2195 	 * Do only 8 back-to-back stores, and alternate registers.
2196 	 */
2197 	do {
2198 		__asm volatile(
2199 		"# BEGIN loop body\n"
2200 		"	addq	%2, (8 * 8), %1		\n"
2201 		"	stq	$31, (0 * 8)(%0)	\n"
2202 		"	stq	$31, (1 * 8)(%0)	\n"
2203 		"	stq	$31, (2 * 8)(%0)	\n"
2204 		"	stq	$31, (3 * 8)(%0)	\n"
2205 		"	stq	$31, (4 * 8)(%0)	\n"
2206 		"	stq	$31, (5 * 8)(%0)	\n"
2207 		"	stq	$31, (6 * 8)(%0)	\n"
2208 		"	stq	$31, (7 * 8)(%0)	\n"
2209 		"					\n"
2210 		"	addq	%3, (8 * 8), %0		\n"
2211 		"	stq	$31, (0 * 8)(%1)	\n"
2212 		"	stq	$31, (1 * 8)(%1)	\n"
2213 		"	stq	$31, (2 * 8)(%1)	\n"
2214 		"	stq	$31, (3 * 8)(%1)	\n"
2215 		"	stq	$31, (4 * 8)(%1)	\n"
2216 		"	stq	$31, (5 * 8)(%1)	\n"
2217 		"	stq	$31, (6 * 8)(%1)	\n"
2218 		"	stq	$31, (7 * 8)(%1)	\n"
2219 		"	# END loop body"
2220 		: "=r" (p0), "=r" (p1)
2221 		: "0" (p0), "1" (p1)
2222 		: "memory");
2223 	} while (p0 < pend);
2224 }
2225 
2226 /*
2227  * pmap_copy_page:		[ INTERFACE ]
2228  *
2229  *	Copy the specified (machine independent) page by mapping the page
2230  *	into virtual memory and using memcpy to copy the page, one machine
2231  *	dependent page at a time.
2232  *
2233  *	Note: no locking is necessary in this function.
2234  */
2235 void
2236 pmap_copy_page(paddr_t src, paddr_t dst)
2237 {
2238 	const void *s;
2239 	void *d;
2240 
2241 #ifdef DEBUG
2242 	if (pmapdebug & PDB_FOLLOW)
2243 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2244 #endif
2245 	s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
2246 	d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
2247 	memcpy(d, s, PAGE_SIZE);
2248 }
2249 
2250 /*
2251  * pmap_pageidlezero:		[ INTERFACE ]
2252  *
2253  *	Page zero'er for the idle loop.  Returns true if the
2254  *	page was zero'd, FLASE if we aborted for some reason.
2255  */
2256 bool
2257 pmap_pageidlezero(paddr_t pa)
2258 {
2259 	u_long *ptr;
2260 	int i, cnt = PAGE_SIZE / sizeof(u_long);
2261 
2262 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
2263 		if (sched_curcpu_runnable_p()) {
2264 			/*
2265 			 * An LWP has become ready.  Abort now,
2266 			 * so we don't keep it waiting while we
2267 			 * finish zeroing the page.
2268 			 */
2269 			return (false);
2270 		}
2271 		*ptr++ = 0;
2272 	}
2273 
2274 	return (true);
2275 }
2276 
2277 /*
2278  * pmap_clear_modify:		[ INTERFACE ]
2279  *
2280  *	Clear the modify bits on the specified physical page.
2281  */
2282 bool
2283 pmap_clear_modify(struct vm_page *pg)
2284 {
2285 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2286 	bool rv = false;
2287 	long cpu_id = cpu_number();
2288 	kmutex_t *lock;
2289 
2290 #ifdef DEBUG
2291 	if (pmapdebug & PDB_FOLLOW)
2292 		printf("pmap_clear_modify(%p)\n", pg);
2293 #endif
2294 
2295 	PMAP_HEAD_TO_MAP_LOCK();
2296 	lock = pmap_pvh_lock(pg);
2297 	mutex_enter(lock);
2298 
2299 	if (md->pvh_attrs & PGA_MODIFIED) {
2300 		rv = true;
2301 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2302 		md->pvh_attrs &= ~PGA_MODIFIED;
2303 	}
2304 
2305 	mutex_exit(lock);
2306 	PMAP_HEAD_TO_MAP_UNLOCK();
2307 
2308 	return (rv);
2309 }
2310 
2311 /*
2312  * pmap_clear_reference:	[ INTERFACE ]
2313  *
2314  *	Clear the reference bit on the specified physical page.
2315  */
2316 bool
2317 pmap_clear_reference(struct vm_page *pg)
2318 {
2319 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2320 	bool rv = false;
2321 	long cpu_id = cpu_number();
2322 	kmutex_t *lock;
2323 
2324 #ifdef DEBUG
2325 	if (pmapdebug & PDB_FOLLOW)
2326 		printf("pmap_clear_reference(%p)\n", pg);
2327 #endif
2328 
2329 	PMAP_HEAD_TO_MAP_LOCK();
2330 	lock = pmap_pvh_lock(pg);
2331 	mutex_enter(lock);
2332 
2333 	if (md->pvh_attrs & PGA_REFERENCED) {
2334 		rv = true;
2335 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2336 		md->pvh_attrs &= ~PGA_REFERENCED;
2337 	}
2338 
2339 	mutex_exit(lock);
2340 	PMAP_HEAD_TO_MAP_UNLOCK();
2341 
2342 	return (rv);
2343 }
2344 
2345 /*
2346  * pmap_is_referenced:		[ INTERFACE ]
2347  *
2348  *	Return whether or not the specified physical page is referenced
2349  *	by any physical maps.
2350  */
2351 /* See <machine/pmap.h> */
2352 
2353 /*
2354  * pmap_is_modified:		[ INTERFACE ]
2355  *
2356  *	Return whether or not the specified physical page is modified
2357  *	by any physical maps.
2358  */
2359 /* See <machine/pmap.h> */
2360 
2361 /*
2362  * pmap_phys_address:		[ INTERFACE ]
2363  *
2364  *	Return the physical address corresponding to the specified
2365  *	cookie.  Used by the device pager to decode a device driver's
2366  *	mmap entry point return value.
2367  *
2368  *	Note: no locking is necessary in this function.
2369  */
2370 paddr_t
2371 pmap_phys_address(paddr_t ppn)
2372 {
2373 
2374 	return (alpha_ptob(ppn));
2375 }
2376 
2377 /*
2378  * Miscellaneous support routines follow
2379  */
2380 
2381 /*
2382  * alpha_protection_init:
2383  *
2384  *	Initialize Alpha protection code array.
2385  *
2386  *	Note: no locking is necessary in this function.
2387  */
2388 static void
2389 alpha_protection_init(void)
2390 {
2391 	int prot, *kp, *up;
2392 
2393 	kp = protection_codes[0];
2394 	up = protection_codes[1];
2395 
2396 	for (prot = 0; prot < 8; prot++) {
2397 		kp[prot] = PG_ASM;
2398 		up[prot] = 0;
2399 
2400 		if (prot & VM_PROT_READ) {
2401 			kp[prot] |= PG_KRE;
2402 			up[prot] |= PG_KRE | PG_URE;
2403 		}
2404 		if (prot & VM_PROT_WRITE) {
2405 			kp[prot] |= PG_KWE;
2406 			up[prot] |= PG_KWE | PG_UWE;
2407 		}
2408 		if (prot & VM_PROT_EXECUTE) {
2409 			kp[prot] |= PG_EXEC | PG_KRE;
2410 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2411 		} else {
2412 			kp[prot] |= PG_FOE;
2413 			up[prot] |= PG_FOE;
2414 		}
2415 	}
2416 }
2417 
2418 /*
2419  * pmap_remove_mapping:
2420  *
2421  *	Invalidate a single page denoted by pmap/va.
2422  *
2423  *	If (pte != NULL), it is the already computed PTE for the page.
2424  *
2425  *	Note: locking in this function is complicated by the fact
2426  *	that we can be called when the PV list is already locked.
2427  *	(pmap_page_protect()).  In this case, the caller must be
2428  *	careful to get the next PV entry while we remove this entry
2429  *	from beneath it.  We assume that the pmap itself is already
2430  *	locked; dolock applies only to the PV list.
2431  *
2432  *	Returns true or false, indicating if an I-stream sync needs
2433  *	to be initiated (for this CPU or for other CPUs).
2434  */
2435 static bool
2436 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2437     bool dolock, long cpu_id)
2438 {
2439 	paddr_t pa;
2440 	struct vm_page *pg;		/* if != NULL, page is managed */
2441 	bool onpv;
2442 	bool hadasm;
2443 	bool isactive;
2444 	bool needisync = false;
2445 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2446 
2447 #ifdef DEBUG
2448 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2449 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2450 		       pmap, va, pte, dolock, cpu_id);
2451 #endif
2452 
2453 	/*
2454 	 * PTE not provided, compute it from pmap and va.
2455 	 */
2456 	if (pte == NULL) {
2457 		pte = pmap_l3pte(pmap, va, NULL);
2458 		if (pmap_pte_v(pte) == 0)
2459 			return (false);
2460 	}
2461 
2462 	pa = pmap_pte_pa(pte);
2463 	onpv = (pmap_pte_pv(pte) != 0);
2464 	hadasm = (pmap_pte_asm(pte) != 0);
2465 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2466 
2467 	/*
2468 	 * Determine what we need to do about the I-stream.  If
2469 	 * PG_EXEC was set, we mark a user pmap as needing an
2470 	 * I-sync on the way out to userspace.  We always need
2471 	 * an immediate I-sync for the kernel pmap.
2472 	 */
2473 	if (pmap_pte_exec(pte)) {
2474 		if (pmap == pmap_kernel())
2475 			needisync = true;
2476 		else {
2477 			PMAP_SET_NEEDISYNC(pmap);
2478 			needisync = (pmap->pm_cpus != 0);
2479 		}
2480 	}
2481 
2482 	/*
2483 	 * Update statistics
2484 	 */
2485 	if (pmap_pte_w(pte))
2486 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2487 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2488 
2489 	/*
2490 	 * Invalidate the PTE after saving the reference modify info.
2491 	 */
2492 #ifdef DEBUG
2493 	if (pmapdebug & PDB_REMOVE)
2494 		printf("remove: invalidating pte at %p\n", pte);
2495 #endif
2496 	PMAP_SET_PTE(pte, PG_NV);
2497 
2498 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2499 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2500 	PMAP_TLB_SHOOTNOW();
2501 
2502 	/*
2503 	 * If we're removing a user mapping, check to see if we
2504 	 * can free page table pages.
2505 	 */
2506 	if (pmap != pmap_kernel()) {
2507 		/*
2508 		 * Delete the reference on the level 3 table.  It will
2509 		 * delete references on the level 2 and 1 tables as
2510 		 * appropriate.
2511 		 */
2512 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2513 	}
2514 
2515 	/*
2516 	 * If the mapping wasn't entered on the PV list, we're all done.
2517 	 */
2518 	if (onpv == false)
2519 		return (needisync);
2520 
2521 	/*
2522 	 * Remove it from the PV table.
2523 	 */
2524 	pg = PHYS_TO_VM_PAGE(pa);
2525 	KASSERT(pg != NULL);
2526 	pmap_pv_remove(pmap, pg, va, dolock);
2527 
2528 	return (needisync);
2529 }
2530 
2531 /*
2532  * pmap_changebit:
2533  *
2534  *	Set or clear the specified PTE bits for all mappings on the
2535  *	specified page.
2536  *
2537  *	Note: we assume that the pv_head is already locked, and that
2538  *	the caller has acquired a PV->pmap mutex so that we can lock
2539  *	the pmaps as we encounter them.
2540  */
2541 static void
2542 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id)
2543 {
2544 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2545 	pv_entry_t pv;
2546 	pt_entry_t *pte, npte;
2547 	vaddr_t va;
2548 	bool hadasm, isactive;
2549 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2550 
2551 #ifdef DEBUG
2552 	if (pmapdebug & PDB_BITS)
2553 		printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n",
2554 		    pg, set, mask);
2555 #endif
2556 
2557 	/*
2558 	 * Loop over all current mappings setting/clearing as apropos.
2559 	 */
2560 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2561 		va = pv->pv_va;
2562 
2563 		PMAP_LOCK(pv->pv_pmap);
2564 
2565 		pte = pv->pv_pte;
2566 		npte = (*pte | set) & mask;
2567 		if (*pte != npte) {
2568 			hadasm = (pmap_pte_asm(pte) != 0);
2569 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2570 			PMAP_SET_PTE(pte, npte);
2571 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2572 			    cpu_id);
2573 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2574 			    hadasm ? PG_ASM : 0);
2575 		}
2576 		PMAP_UNLOCK(pv->pv_pmap);
2577 	}
2578 
2579 	PMAP_TLB_SHOOTNOW();
2580 }
2581 
2582 /*
2583  * pmap_emulate_reference:
2584  *
2585  *	Emulate reference and/or modified bit hits.
2586  *	Return 1 if this was an execute fault on a non-exec mapping,
2587  *	otherwise return 0.
2588  */
2589 int
2590 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
2591 {
2592 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2593 	pt_entry_t faultoff, *pte;
2594 	struct vm_page *pg;
2595 	paddr_t pa;
2596 	bool didlock = false;
2597 	bool exec = false;
2598 	long cpu_id = cpu_number();
2599 	kmutex_t *lock;
2600 
2601 #ifdef DEBUG
2602 	if (pmapdebug & PDB_FOLLOW)
2603 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2604 		    l, v, user, type);
2605 #endif
2606 
2607 	/*
2608 	 * Convert process and virtual address to physical address.
2609 	 */
2610 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2611 		if (user)
2612 			panic("pmap_emulate_reference: user ref to kernel");
2613 		/*
2614 		 * No need to lock here; kernel PT pages never go away.
2615 		 */
2616 		pte = PMAP_KERNEL_PTE(v);
2617 	} else {
2618 #ifdef DIAGNOSTIC
2619 		if (l == NULL)
2620 			panic("pmap_emulate_reference: bad proc");
2621 		if (l->l_proc->p_vmspace == NULL)
2622 			panic("pmap_emulate_reference: bad p_vmspace");
2623 #endif
2624 		PMAP_LOCK(pmap);
2625 		didlock = true;
2626 		pte = pmap_l3pte(pmap, v, NULL);
2627 		/*
2628 		 * We'll unlock below where we're done with the PTE.
2629 		 */
2630 	}
2631 	exec = pmap_pte_exec(pte);
2632 	if (!exec && type == ALPHA_MMCSR_FOE) {
2633 		if (didlock)
2634 			PMAP_UNLOCK(pmap);
2635 	       return (1);
2636 	}
2637 #ifdef DEBUG
2638 	if (pmapdebug & PDB_FOLLOW) {
2639 		printf("\tpte = %p, ", pte);
2640 		printf("*pte = 0x%lx\n", *pte);
2641 	}
2642 #endif
2643 #ifdef DEBUG				/* These checks are more expensive */
2644 	if (!pmap_pte_v(pte))
2645 		panic("pmap_emulate_reference: invalid pte");
2646 	if (type == ALPHA_MMCSR_FOW) {
2647 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
2648 			panic("pmap_emulate_reference: write but unwritable");
2649 		if (!(*pte & PG_FOW))
2650 			panic("pmap_emulate_reference: write but not FOW");
2651 	} else {
2652 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
2653 			panic("pmap_emulate_reference: !write but unreadable");
2654 		if (!(*pte & (PG_FOR | PG_FOE)))
2655 			panic("pmap_emulate_reference: !write but not FOR|FOE");
2656 	}
2657 	/* Other diagnostics? */
2658 #endif
2659 	pa = pmap_pte_pa(pte);
2660 
2661 	/*
2662 	 * We're now done with the PTE.  If it was a user pmap, unlock
2663 	 * it now.
2664 	 */
2665 	if (didlock)
2666 		PMAP_UNLOCK(pmap);
2667 
2668 #ifdef DEBUG
2669 	if (pmapdebug & PDB_FOLLOW)
2670 		printf("\tpa = 0x%lx\n", pa);
2671 #endif
2672 #ifdef DIAGNOSTIC
2673 	if (!uvm_pageismanaged(pa))
2674 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2675 		      "pa 0x%lx not managed", l, v, user, type, pa);
2676 #endif
2677 
2678 	/*
2679 	 * Twiddle the appropriate bits to reflect the reference
2680 	 * and/or modification..
2681 	 *
2682 	 * The rules:
2683 	 * 	(1) always mark page as used, and
2684 	 *	(2) if it was a write fault, mark page as modified.
2685 	 */
2686 	pg = PHYS_TO_VM_PAGE(pa);
2687 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2688 
2689 	PMAP_HEAD_TO_MAP_LOCK();
2690 	lock = pmap_pvh_lock(pg);
2691 	mutex_enter(lock);
2692 
2693 	if (type == ALPHA_MMCSR_FOW) {
2694 		md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
2695 		faultoff = PG_FOR | PG_FOW;
2696 	} else {
2697 		md->pvh_attrs |= PGA_REFERENCED;
2698 		faultoff = PG_FOR;
2699 		if (exec) {
2700 			faultoff |= PG_FOE;
2701 		}
2702 	}
2703 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2704 
2705 	mutex_exit(lock);
2706 	PMAP_HEAD_TO_MAP_UNLOCK();
2707 	return (0);
2708 }
2709 
2710 #ifdef DEBUG
2711 /*
2712  * pmap_pv_dump:
2713  *
2714  *	Dump the physical->virtual data for the specified page.
2715  */
2716 void
2717 pmap_pv_dump(paddr_t pa)
2718 {
2719 	struct vm_page *pg;
2720 	struct vm_page_md *md;
2721 	pv_entry_t pv;
2722 	kmutex_t *lock;
2723 
2724 	pg = PHYS_TO_VM_PAGE(pa);
2725 	md = VM_PAGE_TO_MD(pg);
2726 
2727 	lock = pmap_pvh_lock(pg);
2728 	mutex_enter(lock);
2729 
2730 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs);
2731 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next)
2732 		printf("     pmap %p, va 0x%lx\n",
2733 		    pv->pv_pmap, pv->pv_va);
2734 	printf("\n");
2735 
2736 	mutex_exit(lock);
2737 }
2738 #endif
2739 
2740 /*
2741  * vtophys:
2742  *
2743  *	Return the physical address corresponding to the K0SEG or
2744  *	K1SEG address provided.
2745  *
2746  *	Note: no locking is necessary in this function.
2747  */
2748 paddr_t
2749 vtophys(vaddr_t vaddr)
2750 {
2751 	pt_entry_t *pte;
2752 	paddr_t paddr = 0;
2753 
2754 	if (vaddr < ALPHA_K0SEG_BASE)
2755 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2756 	else if (vaddr <= ALPHA_K0SEG_END)
2757 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2758 	else {
2759 		pte = PMAP_KERNEL_PTE(vaddr);
2760 		if (pmap_pte_v(pte))
2761 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2762 	}
2763 
2764 #if 0
2765 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2766 #endif
2767 
2768 	return (paddr);
2769 }
2770 
2771 /******************** pv_entry management ********************/
2772 
2773 /*
2774  * pmap_pv_enter:
2775  *
2776  *	Add a physical->virtual entry to the pv_table.
2777  */
2778 static int
2779 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2780     bool dolock)
2781 {
2782 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2783 	pv_entry_t newpv;
2784 	kmutex_t *lock;
2785 
2786 	/*
2787 	 * Allocate and fill in the new pv_entry.
2788 	 */
2789 	newpv = pmap_pv_alloc();
2790 	if (newpv == NULL)
2791 		return ENOMEM;
2792 	newpv->pv_va = va;
2793 	newpv->pv_pmap = pmap;
2794 	newpv->pv_pte = pte;
2795 
2796 	if (dolock) {
2797 		lock = pmap_pvh_lock(pg);
2798 		mutex_enter(lock);
2799 	}
2800 
2801 #ifdef DEBUG
2802     {
2803 	pv_entry_t pv;
2804 	/*
2805 	 * Make sure the entry doesn't already exist.
2806 	 */
2807 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2808 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2809 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2810 			panic("pmap_pv_enter: already in pv table");
2811 		}
2812 	}
2813     }
2814 #endif
2815 
2816 	/*
2817 	 * ...and put it in the list.
2818 	 */
2819 	newpv->pv_next = md->pvh_list;
2820 	md->pvh_list = newpv;
2821 
2822 	if (dolock) {
2823 		mutex_exit(lock);
2824 	}
2825 
2826 	return 0;
2827 }
2828 
2829 /*
2830  * pmap_pv_remove:
2831  *
2832  *	Remove a physical->virtual entry from the pv_table.
2833  */
2834 static void
2835 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
2836 {
2837 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2838 	pv_entry_t pv, *pvp;
2839 	kmutex_t *lock;
2840 
2841 	if (dolock) {
2842 		lock = pmap_pvh_lock(pg);
2843 		mutex_enter(lock);
2844 	} else {
2845 		lock = NULL; /* XXX stupid gcc */
2846 	}
2847 
2848 	/*
2849 	 * Find the entry to remove.
2850 	 */
2851 	for (pvp = &md->pvh_list, pv = *pvp;
2852 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2853 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2854 			break;
2855 
2856 #ifdef DEBUG
2857 	if (pv == NULL)
2858 		panic("pmap_pv_remove: not in pv table");
2859 #endif
2860 
2861 	*pvp = pv->pv_next;
2862 
2863 	if (dolock) {
2864 		mutex_exit(lock);
2865 	}
2866 
2867 	pmap_pv_free(pv);
2868 }
2869 
2870 /*
2871  * pmap_pv_page_alloc:
2872  *
2873  *	Allocate a page for the pv_entry pool.
2874  */
2875 static void *
2876 pmap_pv_page_alloc(struct pool *pp, int flags)
2877 {
2878 	paddr_t pg;
2879 
2880 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2881 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2882 	return (NULL);
2883 }
2884 
2885 /*
2886  * pmap_pv_page_free:
2887  *
2888  *	Free a pv_entry pool page.
2889  */
2890 static void
2891 pmap_pv_page_free(struct pool *pp, void *v)
2892 {
2893 
2894 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2895 }
2896 
2897 /******************** misc. functions ********************/
2898 
2899 /*
2900  * pmap_physpage_alloc:
2901  *
2902  *	Allocate a single page from the VM system and return the
2903  *	physical address for that page.
2904  */
2905 static bool
2906 pmap_physpage_alloc(int usage, paddr_t *pap)
2907 {
2908 	struct vm_page *pg;
2909 	paddr_t pa;
2910 
2911 	/*
2912 	 * Don't ask for a zero'd page in the L1PT case -- we will
2913 	 * properly initialize it in the constructor.
2914 	 */
2915 
2916 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2917 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2918 	if (pg != NULL) {
2919 		pa = VM_PAGE_TO_PHYS(pg);
2920 #ifdef DEBUG
2921 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2922 		if (md->pvh_refcnt != 0) {
2923 			printf("pmap_physpage_alloc: page 0x%lx has "
2924 			    "%d references\n", pa, md->pvh_refcnt);
2925 			panic("pmap_physpage_alloc");
2926 		}
2927 #endif
2928 		*pap = pa;
2929 		return (true);
2930 	}
2931 	return (false);
2932 }
2933 
2934 /*
2935  * pmap_physpage_free:
2936  *
2937  *	Free the single page table page at the specified physical address.
2938  */
2939 static void
2940 pmap_physpage_free(paddr_t pa)
2941 {
2942 	struct vm_page *pg;
2943 
2944 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2945 		panic("pmap_physpage_free: bogus physical page address");
2946 
2947 #ifdef DEBUG
2948 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2949 	if (md->pvh_refcnt != 0)
2950 		panic("pmap_physpage_free: page still has references");
2951 #endif
2952 
2953 	uvm_pagefree(pg);
2954 }
2955 
2956 /*
2957  * pmap_physpage_addref:
2958  *
2959  *	Add a reference to the specified special use page.
2960  */
2961 static int
2962 pmap_physpage_addref(void *kva)
2963 {
2964 	struct vm_page *pg;
2965 	struct vm_page_md *md;
2966 	paddr_t pa;
2967 
2968 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2969 	pg = PHYS_TO_VM_PAGE(pa);
2970 	md = VM_PAGE_TO_MD(pg);
2971 
2972 	KASSERT((int)md->pvh_refcnt >= 0);
2973 
2974 	return atomic_inc_uint_nv(&md->pvh_refcnt);
2975 }
2976 
2977 /*
2978  * pmap_physpage_delref:
2979  *
2980  *	Delete a reference to the specified special use page.
2981  */
2982 static int
2983 pmap_physpage_delref(void *kva)
2984 {
2985 	struct vm_page *pg;
2986 	struct vm_page_md *md;
2987 	paddr_t pa;
2988 
2989 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2990 	pg = PHYS_TO_VM_PAGE(pa);
2991 	md = VM_PAGE_TO_MD(pg);
2992 
2993 	KASSERT((int)md->pvh_refcnt > 0);
2994 
2995 	return atomic_dec_uint_nv(&md->pvh_refcnt);
2996 }
2997 
2998 /******************** page table page management ********************/
2999 
3000 /*
3001  * pmap_growkernel:		[ INTERFACE ]
3002  *
3003  *	Grow the kernel address space.  This is a hint from the
3004  *	upper layer to pre-allocate more kernel PT pages.
3005  */
3006 vaddr_t
3007 pmap_growkernel(vaddr_t maxkvaddr)
3008 {
3009 	struct pmap *kpm = pmap_kernel(), *pm;
3010 	paddr_t ptaddr;
3011 	pt_entry_t *l1pte, *l2pte, pte;
3012 	vaddr_t va;
3013 	int l1idx;
3014 
3015 	rw_enter(&pmap_growkernel_lock, RW_WRITER);
3016 
3017 	if (maxkvaddr <= virtual_end)
3018 		goto out;		/* we are OK */
3019 
3020 	va = virtual_end;
3021 
3022 	while (va < maxkvaddr) {
3023 		/*
3024 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
3025 		 * allocate a new L2 PT page and insert it into the
3026 		 * L1 map.
3027 		 */
3028 		l1pte = pmap_l1pte(kpm, va);
3029 		if (pmap_pte_v(l1pte) == 0) {
3030 			/*
3031 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
3032 			 */
3033 			if (uvm.page_init_done == false) {
3034 				/*
3035 				 * We're growing the kernel pmap early (from
3036 				 * uvm_pageboot_alloc()).  This case must
3037 				 * be handled a little differently.
3038 				 */
3039 				ptaddr = ALPHA_K0SEG_TO_PHYS(
3040 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3041 			} else if (pmap_physpage_alloc(PGU_NORMAL,
3042 				   &ptaddr) == false)
3043 				goto die;
3044 			pte = (atop(ptaddr) << PG_SHIFT) |
3045 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3046 			*l1pte = pte;
3047 
3048 			l1idx = l1pte_index(va);
3049 
3050 			/* Update all the user pmaps. */
3051 			mutex_enter(&pmap_all_pmaps_lock);
3052 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
3053 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
3054 				/* Skip the kernel pmap. */
3055 				if (pm == pmap_kernel())
3056 					continue;
3057 
3058 				PMAP_LOCK(pm);
3059 				if (pm->pm_lev1map == kernel_lev1map) {
3060 					PMAP_UNLOCK(pm);
3061 					continue;
3062 				}
3063 				pm->pm_lev1map[l1idx] = pte;
3064 				PMAP_UNLOCK(pm);
3065 			}
3066 			mutex_exit(&pmap_all_pmaps_lock);
3067 		}
3068 
3069 		/*
3070 		 * Have an L2 PT page now, add the L3 PT page.
3071 		 */
3072 		l2pte = pmap_l2pte(kpm, va, l1pte);
3073 		KASSERT(pmap_pte_v(l2pte) == 0);
3074 		if (uvm.page_init_done == false) {
3075 			/*
3076 			 * See above.
3077 			 */
3078 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3079 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3080 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false)
3081 			goto die;
3082 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3083 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3084 		va += ALPHA_L2SEG_SIZE;
3085 	}
3086 
3087 	/* Invalidate the L1 PT cache. */
3088 	pool_cache_invalidate(&pmap_l1pt_cache);
3089 
3090 	virtual_end = va;
3091 
3092  out:
3093 	rw_exit(&pmap_growkernel_lock);
3094 
3095 	return (virtual_end);
3096 
3097  die:
3098 	panic("pmap_growkernel: out of memory");
3099 }
3100 
3101 /*
3102  * pmap_lev1map_create:
3103  *
3104  *	Create a new level 1 page table for the specified pmap.
3105  *
3106  *	Note: growkernel must already be held and the pmap either
3107  *	already locked or unreferenced globally.
3108  */
3109 static int
3110 pmap_lev1map_create(pmap_t pmap, long cpu_id)
3111 {
3112 	pt_entry_t *l1pt;
3113 
3114 	KASSERT(pmap != pmap_kernel());
3115 
3116 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
3117 	KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3118 
3119 	/* Don't sleep -- we're called with locks held. */
3120 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
3121 	if (l1pt == NULL)
3122 		return (ENOMEM);
3123 
3124 	pmap->pm_lev1map = l1pt;
3125 	return (0);
3126 }
3127 
3128 /*
3129  * pmap_lev1map_destroy:
3130  *
3131  *	Destroy the level 1 page table for the specified pmap.
3132  *
3133  *	Note: growkernel must be held and the pmap must already be
3134  *	locked or not globally referenced.
3135  */
3136 static void
3137 pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
3138 {
3139 	pt_entry_t *l1pt = pmap->pm_lev1map;
3140 
3141 	KASSERT(pmap != pmap_kernel());
3142 
3143 	/*
3144 	 * Go back to referencing the global kernel_lev1map.
3145 	 */
3146 	pmap->pm_lev1map = kernel_lev1map;
3147 
3148 	/*
3149 	 * Free the old level 1 page table page.
3150 	 */
3151 	pool_cache_put(&pmap_l1pt_cache, l1pt);
3152 }
3153 
3154 /*
3155  * pmap_l1pt_ctor:
3156  *
3157  *	Pool cache constructor for L1 PT pages.
3158  *
3159  *	Note: The growkernel lock is held across allocations
3160  *	from our pool_cache, so we don't need to acquire it
3161  *	ourselves.
3162  */
3163 static int
3164 pmap_l1pt_ctor(void *arg, void *object, int flags)
3165 {
3166 	pt_entry_t *l1pt = object, pte;
3167 	int i;
3168 
3169 	/*
3170 	 * Initialize the new level 1 table by zeroing the
3171 	 * user portion and copying the kernel mappings into
3172 	 * the kernel portion.
3173 	 */
3174 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3175 		l1pt[i] = 0;
3176 
3177 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3178 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3179 		l1pt[i] = kernel_lev1map[i];
3180 
3181 	/*
3182 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3183 	 */
3184 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3185 	    PG_V | PG_KRE | PG_KWE;
3186 	l1pt[l1pte_index(VPTBASE)] = pte;
3187 
3188 	return (0);
3189 }
3190 
3191 /*
3192  * pmap_l1pt_alloc:
3193  *
3194  *	Page alloctaor for L1 PT pages.
3195  */
3196 static void *
3197 pmap_l1pt_alloc(struct pool *pp, int flags)
3198 {
3199 	paddr_t ptpa;
3200 
3201 	/*
3202 	 * Attempt to allocate a free page.
3203 	 */
3204 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false)
3205 		return (NULL);
3206 
3207 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3208 }
3209 
3210 /*
3211  * pmap_l1pt_free:
3212  *
3213  *	Page freer for L1 PT pages.
3214  */
3215 static void
3216 pmap_l1pt_free(struct pool *pp, void *v)
3217 {
3218 
3219 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3220 }
3221 
3222 /*
3223  * pmap_ptpage_alloc:
3224  *
3225  *	Allocate a level 2 or level 3 page table page, and
3226  *	initialize the PTE that references it.
3227  *
3228  *	Note: the pmap must already be locked.
3229  */
3230 static int
3231 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3232 {
3233 	paddr_t ptpa;
3234 
3235 	/*
3236 	 * Allocate the page table page.
3237 	 */
3238 	if (pmap_physpage_alloc(usage, &ptpa) == false)
3239 		return (ENOMEM);
3240 
3241 	/*
3242 	 * Initialize the referencing PTE.
3243 	 */
3244 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3245 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3246 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3247 
3248 	return (0);
3249 }
3250 
3251 /*
3252  * pmap_ptpage_free:
3253  *
3254  *	Free the level 2 or level 3 page table page referenced
3255  *	be the provided PTE.
3256  *
3257  *	Note: the pmap must already be locked.
3258  */
3259 static void
3260 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3261 {
3262 	paddr_t ptpa;
3263 
3264 	/*
3265 	 * Extract the physical address of the page from the PTE
3266 	 * and clear the entry.
3267 	 */
3268 	ptpa = pmap_pte_pa(pte);
3269 	PMAP_SET_PTE(pte, PG_NV);
3270 
3271 #ifdef DEBUG
3272 	pmap_zero_page(ptpa);
3273 #endif
3274 	pmap_physpage_free(ptpa);
3275 }
3276 
3277 /*
3278  * pmap_l3pt_delref:
3279  *
3280  *	Delete a reference on a level 3 PT page.  If the reference drops
3281  *	to zero, free it.
3282  *
3283  *	Note: the pmap must already be locked.
3284  */
3285 static void
3286 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id)
3287 {
3288 	pt_entry_t *l1pte, *l2pte;
3289 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3290 
3291 	l1pte = pmap_l1pte(pmap, va);
3292 	l2pte = pmap_l2pte(pmap, va, l1pte);
3293 
3294 #ifdef DIAGNOSTIC
3295 	if (pmap == pmap_kernel())
3296 		panic("pmap_l3pt_delref: kernel pmap");
3297 #endif
3298 
3299 	if (pmap_physpage_delref(l3pte) == 0) {
3300 		/*
3301 		 * No more mappings; we can free the level 3 table.
3302 		 */
3303 #ifdef DEBUG
3304 		if (pmapdebug & PDB_PTPAGE)
3305 			printf("pmap_l3pt_delref: freeing level 3 table at "
3306 			    "0x%lx\n", pmap_pte_pa(l2pte));
3307 #endif
3308 		pmap_ptpage_free(pmap, l2pte);
3309 
3310 		/*
3311 		 * We've freed a level 3 table, so we must
3312 		 * invalidate the TLB entry for that PT page
3313 		 * in the Virtual Page Table VA range, because
3314 		 * otherwise the PALcode will service a TLB
3315 		 * miss using the stale VPT TLB entry it entered
3316 		 * behind our back to shortcut to the VA's PTE.
3317 		 */
3318 		PMAP_INVALIDATE_TLB(pmap,
3319 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), false,
3320 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3321 		PMAP_TLB_SHOOTDOWN(pmap,
3322 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3323 		PMAP_TLB_SHOOTNOW();
3324 
3325 		/*
3326 		 * We've freed a level 3 table, so delete the reference
3327 		 * on the level 2 table.
3328 		 */
3329 		pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
3330 	}
3331 }
3332 
3333 /*
3334  * pmap_l2pt_delref:
3335  *
3336  *	Delete a reference on a level 2 PT page.  If the reference drops
3337  *	to zero, free it.
3338  *
3339  *	Note: the pmap must already be locked.
3340  */
3341 static void
3342 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
3343     long cpu_id)
3344 {
3345 
3346 #ifdef DIAGNOSTIC
3347 	if (pmap == pmap_kernel())
3348 		panic("pmap_l2pt_delref: kernel pmap");
3349 #endif
3350 
3351 	if (pmap_physpage_delref(l2pte) == 0) {
3352 		/*
3353 		 * No more mappings in this segment; we can free the
3354 		 * level 2 table.
3355 		 */
3356 #ifdef DEBUG
3357 		if (pmapdebug & PDB_PTPAGE)
3358 			printf("pmap_l2pt_delref: freeing level 2 table at "
3359 			    "0x%lx\n", pmap_pte_pa(l1pte));
3360 #endif
3361 		pmap_ptpage_free(pmap, l1pte);
3362 
3363 		/*
3364 		 * We've freed a level 2 table, so delete the reference
3365 		 * on the level 1 table.
3366 		 */
3367 		pmap_l1pt_delref(pmap, l1pte, cpu_id);
3368 	}
3369 }
3370 
3371 /*
3372  * pmap_l1pt_delref:
3373  *
3374  *	Delete a reference on a level 1 PT page.  If the reference drops
3375  *	to zero, free it.
3376  *
3377  *	Note: the pmap must already be locked.
3378  */
3379 static void
3380 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
3381 {
3382 
3383 #ifdef DIAGNOSTIC
3384 	if (pmap == pmap_kernel())
3385 		panic("pmap_l1pt_delref: kernel pmap");
3386 #endif
3387 
3388 	(void)pmap_physpage_delref(l1pte);
3389 }
3390 
3391 /******************** Address Space Number management ********************/
3392 
3393 /*
3394  * pmap_asn_alloc:
3395  *
3396  *	Allocate and assign an ASN to the specified pmap.
3397  *
3398  *	Note: the pmap must already be locked.  This may be called from
3399  *	an interprocessor interrupt, and in that case, the sender of
3400  *	the IPI has the pmap lock.
3401  */
3402 static void
3403 pmap_asn_alloc(pmap_t pmap, long cpu_id)
3404 {
3405 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3406 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3407 
3408 #ifdef DEBUG
3409 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3410 		printf("pmap_asn_alloc(%p)\n", pmap);
3411 #endif
3412 
3413 	/*
3414 	 * If the pmap is still using the global kernel_lev1map, there
3415 	 * is no need to assign an ASN at this time, because only
3416 	 * kernel mappings exist in that map, and all kernel mappings
3417 	 * have PG_ASM set.  If the pmap eventually gets its own
3418 	 * lev1map, an ASN will be allocated at that time.
3419 	 *
3420 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
3421 	 * same old fixups, but note that we no longer need the pmap
3422 	 * to be locked if we're in this mode, since pm_lev1map will
3423 	 * never change.
3424 	 * #endif
3425 	 */
3426 	if (pmap->pm_lev1map == kernel_lev1map) {
3427 #ifdef DEBUG
3428 		if (pmapdebug & PDB_ASN)
3429 			printf("pmap_asn_alloc: still references "
3430 			    "kernel_lev1map\n");
3431 #endif
3432 #if defined(MULTIPROCESSOR)
3433 		/*
3434 		 * In a multiprocessor system, it's possible to
3435 		 * get here without having PMAP_ASN_RESERVED in
3436 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3437 		 *
3438 		 * So, what we do here, is simply assign the reserved
3439 		 * ASN for kernel_lev1map users and let things
3440 		 * continue on.  We do, however, let uniprocessor
3441 		 * configurations continue to make its assertion.
3442 		 */
3443 		pma->pma_asn = PMAP_ASN_RESERVED;
3444 #else
3445 		KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3446 #endif /* MULTIPROCESSOR */
3447 		return;
3448 	}
3449 
3450 	/*
3451 	 * On processors which do not implement ASNs, the swpctx PALcode
3452 	 * operation will automatically invalidate the TLB and I-cache,
3453 	 * so we don't need to do that here.
3454 	 */
3455 	if (pmap_max_asn == 0) {
3456 		/*
3457 		 * Refresh the pmap's generation number, to
3458 		 * simplify logic elsewhere.
3459 		 */
3460 		pma->pma_asngen = cpma->pma_asngen;
3461 #ifdef DEBUG
3462 		if (pmapdebug & PDB_ASN)
3463 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3464 			    pma->pma_asngen);
3465 #endif
3466 		return;
3467 	}
3468 
3469 	/*
3470 	 * Hopefully, we can continue using the one we have...
3471 	 */
3472 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3473 	    pma->pma_asngen == cpma->pma_asngen) {
3474 		/*
3475 		 * ASN is still in the current generation; keep on using it.
3476 		 */
3477 #ifdef DEBUG
3478 		if (pmapdebug & PDB_ASN)
3479 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3480 			    pma->pma_asn);
3481 #endif
3482 		return;
3483 	}
3484 
3485 	/*
3486 	 * Need to assign a new ASN.  Grab the next one, incrementing
3487 	 * the generation number if we have to.
3488 	 */
3489 	if (cpma->pma_asn > pmap_max_asn) {
3490 		/*
3491 		 * Invalidate all non-PG_ASM TLB entries and the
3492 		 * I-cache, and bump the generation number.
3493 		 */
3494 		ALPHA_TBIAP();
3495 		alpha_pal_imb();
3496 
3497 		cpma->pma_asn = 1;
3498 		cpma->pma_asngen++;
3499 #ifdef DIAGNOSTIC
3500 		if (cpma->pma_asngen == 0) {
3501 			/*
3502 			 * The generation number has wrapped.  We could
3503 			 * handle this scenario by traversing all of
3504 			 * the pmaps, and invalidating the generation
3505 			 * number on those which are not currently
3506 			 * in use by this processor.
3507 			 *
3508 			 * However... considering that we're using
3509 			 * an unsigned 64-bit integer for generation
3510 			 * numbers, on non-ASN CPUs, we won't wrap
3511 			 * for approx. 585 million years, or 75 billion
3512 			 * years on a 128-ASN CPU (assuming 1000 switch
3513 			 * operations per second).
3514 			 *
3515 			 * So, we don't bother.
3516 			 */
3517 			panic("pmap_asn_alloc: too much uptime");
3518 		}
3519 #endif
3520 #ifdef DEBUG
3521 		if (pmapdebug & PDB_ASN)
3522 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3523 			    cpma->pma_asngen);
3524 #endif
3525 	}
3526 
3527 	/*
3528 	 * Assign the new ASN and validate the generation number.
3529 	 */
3530 	pma->pma_asn = cpma->pma_asn++;
3531 	pma->pma_asngen = cpma->pma_asngen;
3532 
3533 #ifdef DEBUG
3534 	if (pmapdebug & PDB_ASN)
3535 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3536 		    pma->pma_asn, pmap);
3537 #endif
3538 
3539 	/*
3540 	 * Have a new ASN, so there's no need to sync the I-stream
3541 	 * on the way back out to userspace.
3542 	 */
3543 	atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id));
3544 }
3545 
3546 #if defined(MULTIPROCESSOR)
3547 /******************** TLB shootdown code ********************/
3548 
3549 /*
3550  * pmap_tlb_shootdown:
3551  *
3552  *	Cause the TLB entry for pmap/va to be shot down.
3553  *
3554  *	NOTE: The pmap must be locked here.
3555  */
3556 void
3557 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3558 {
3559 	struct pmap_tlb_shootdown_q *pq;
3560 	struct pmap_tlb_shootdown_job *pj;
3561 	struct cpu_info *ci, *self = curcpu();
3562 	u_long cpumask;
3563 	CPU_INFO_ITERATOR cii;
3564 
3565 	KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock));
3566 
3567 	cpumask = 0;
3568 
3569 	for (CPU_INFO_FOREACH(cii, ci)) {
3570 		if (ci == self)
3571 			continue;
3572 
3573 		/*
3574 		 * The pmap must be locked (unless its the kernel
3575 		 * pmap, in which case it is okay for it to be
3576 		 * unlocked), which prevents it from  becoming
3577 		 * active on any additional processors.  This makes
3578 		 * it safe to check for activeness.  If it's not
3579 		 * active on the processor in question, then just
3580 		 * mark it as needing a new ASN the next time it
3581 		 * does, saving the IPI.  We always have to send
3582 		 * the IPI for the kernel pmap.
3583 		 *
3584 		 * Note if it's marked active now, and it becomes
3585 		 * inactive by the time the processor receives
3586 		 * the IPI, that's okay, because it does the right
3587 		 * thing with it later.
3588 		 */
3589 		if (pmap != pmap_kernel() &&
3590 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3591 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3592 			continue;
3593 		}
3594 
3595 		cpumask |= 1UL << ci->ci_cpuid;
3596 
3597 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3598 		mutex_spin_enter(&pq->pq_lock);
3599 
3600 		/*
3601 		 * Allocate a job.
3602 		 */
3603 		if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) {
3604 			pj = pool_cache_get(&pmap_tlb_shootdown_job_cache,
3605 			    PR_NOWAIT);
3606 		} else {
3607 			pj = NULL;
3608 		}
3609 
3610 		/*
3611 		 * If a global flush is already pending, we
3612 		 * don't really have to do anything else.
3613 		 */
3614 		pq->pq_pte |= pte;
3615 		if (pq->pq_tbia) {
3616 			mutex_spin_exit(&pq->pq_lock);
3617 			if (pj != NULL) {
3618 				pool_cache_put(&pmap_tlb_shootdown_job_cache,
3619 				    pj);
3620 			}
3621 			continue;
3622 		}
3623 		if (pj == NULL) {
3624 			/*
3625 			 * Couldn't allocate a job entry.  Just
3626 			 * tell the processor to kill everything.
3627 			 */
3628 			pq->pq_tbia = 1;
3629 		} else {
3630 			pj->pj_pmap = pmap;
3631 			pj->pj_va = va;
3632 			pj->pj_pte = pte;
3633 			pq->pq_count++;
3634 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3635 		}
3636 		mutex_spin_exit(&pq->pq_lock);
3637 	}
3638 
3639 	*cpumaskp |= cpumask;
3640 }
3641 
3642 /*
3643  * pmap_tlb_shootnow:
3644  *
3645  *	Process the TLB shootdowns that we have been accumulating
3646  *	for the specified processor set.
3647  */
3648 void
3649 pmap_tlb_shootnow(u_long cpumask)
3650 {
3651 
3652 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3653 }
3654 
3655 /*
3656  * pmap_do_tlb_shootdown:
3657  *
3658  *	Process pending TLB shootdown operations for this processor.
3659  */
3660 void
3661 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3662 {
3663 	u_long cpu_id = ci->ci_cpuid;
3664 	u_long cpu_mask = (1UL << cpu_id);
3665 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3666 	struct pmap_tlb_shootdown_job *pj, *next;
3667 	TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs;
3668 
3669 	TAILQ_INIT(&jobs);
3670 
3671 	mutex_spin_enter(&pq->pq_lock);
3672 	TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list);
3673 	if (pq->pq_tbia) {
3674 		if (pq->pq_pte & PG_ASM)
3675 			ALPHA_TBIA();
3676 		else
3677 			ALPHA_TBIAP();
3678 		pq->pq_tbia = 0;
3679 		pq->pq_pte = 0;
3680 	} else {
3681 		TAILQ_FOREACH(pj, &jobs, pj_list) {
3682 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3683 			    pj->pj_pte & PG_ASM,
3684 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3685 		}
3686 		pq->pq_pte = 0;
3687 	}
3688 	pq->pq_count = 0;
3689 	mutex_spin_exit(&pq->pq_lock);
3690 
3691 	/* Free jobs back to the cache. */
3692 	for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) {
3693 		next = TAILQ_NEXT(pj, pj_list);
3694 		pool_cache_put(&pmap_tlb_shootdown_job_cache, pj);
3695 	}
3696 }
3697 #endif /* MULTIPROCESSOR */
3698