xref: /netbsd-src/sys/arch/alpha/alpha/pmap.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* $NetBSD: pmap.c,v 1.259 2014/01/01 22:35:54 matt Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and by Chris G. Demetriou.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
66  */
67 
68 /*
69  * DEC Alpha physical map management code.
70  *
71  * History:
72  *
73  *	This pmap started life as a Motorola 68851/68030 pmap,
74  *	written by Mike Hibler at the University of Utah.
75  *
76  *	It was modified for the DEC Alpha by Chris Demetriou
77  *	at Carnegie Mellon University.
78  *
79  *	Support for non-contiguous physical memory was added by
80  *	Jason R. Thorpe of the Numerical Aerospace Simulation
81  *	Facility, NASA Ames Research Center and Chris Demetriou.
82  *
83  *	Page table management and a major cleanup were undertaken
84  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
85  *	Avalon Computer Systems and from Chris Demetriou.
86  *
87  *	Support for the new UVM pmap interface was written by
88  *	Jason R. Thorpe.
89  *
90  *	Support for ASNs was written by Jason R. Thorpe, again
91  *	with help from Chris Demetriou and Ross Harvey.
92  *
93  *	The locking protocol was written by Jason R. Thorpe,
94  *	using Chuck Cranor's i386 pmap for UVM as a model.
95  *
96  *	TLB shootdown code was written by Jason R. Thorpe.
97  *
98  *	Multiprocessor modifications by Andrew Doran.
99  *
100  * Notes:
101  *
102  *	All page table access is done via K0SEG.  The one exception
103  *	to this is for kernel mappings.  Since all kernel page
104  *	tables are pre-allocated, we can use the Virtual Page Table
105  *	to access PTEs that map K1SEG addresses.
106  *
107  *	Kernel page table pages are statically allocated in
108  *	pmap_bootstrap(), and are never freed.  In the future,
109  *	support for dynamically adding additional kernel page
110  *	table pages may be added.  User page table pages are
111  *	dynamically allocated and freed.
112  *
113  * Bugs/misfeatures:
114  *
115  *	- Some things could be optimized.
116  */
117 
118 /*
119  *	Manages physical address maps.
120  *
121  *	Since the information managed by this module is
122  *	also stored by the logical address mapping module,
123  *	this module may throw away valid virtual-to-physical
124  *	mappings at almost any time.  However, invalidations
125  *	of virtual-to-physical mappings must be done as
126  *	requested.
127  *
128  *	In order to cope with hardware architectures which
129  *	make virtual-to-physical map invalidates expensive,
130  *	this module may delay invalidate or reduced protection
131  *	operations until such time as they are actually
132  *	necessary.  This module is given full information as
133  *	to which processors are currently using which maps,
134  *	and to when physical maps must be made correct.
135  */
136 
137 #include "opt_lockdebug.h"
138 #include "opt_sysv.h"
139 #include "opt_multiprocessor.h"
140 
141 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
142 
143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.259 2014/01/01 22:35:54 matt Exp $");
144 
145 #include <sys/param.h>
146 #include <sys/systm.h>
147 #include <sys/kernel.h>
148 #include <sys/proc.h>
149 #include <sys/malloc.h>
150 #include <sys/pool.h>
151 #include <sys/buf.h>
152 #include <sys/shm.h>
153 #include <sys/atomic.h>
154 #include <sys/cpu.h>
155 
156 #include <uvm/uvm.h>
157 
158 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
159 #include <machine/rpb.h>
160 #endif
161 
162 #ifdef DEBUG
163 #define	PDB_FOLLOW	0x0001
164 #define	PDB_INIT	0x0002
165 #define	PDB_ENTER	0x0004
166 #define	PDB_REMOVE	0x0008
167 #define	PDB_CREATE	0x0010
168 #define	PDB_PTPAGE	0x0020
169 #define	PDB_ASN		0x0040
170 #define	PDB_BITS	0x0080
171 #define	PDB_COLLECT	0x0100
172 #define	PDB_PROTECT	0x0200
173 #define	PDB_BOOTSTRAP	0x1000
174 #define	PDB_PARANOIA	0x2000
175 #define	PDB_WIRING	0x4000
176 #define	PDB_PVDUMP	0x8000
177 
178 int debugmap = 0;
179 int pmapdebug = PDB_PARANOIA;
180 #endif
181 
182 /*
183  * Given a map and a machine independent protection code,
184  * convert to an alpha protection code.
185  */
186 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
187 static int	protection_codes[2][8];
188 
189 /*
190  * kernel_lev1map:
191  *
192  *	Kernel level 1 page table.  This maps all kernel level 2
193  *	page table pages, and is used as a template for all user
194  *	pmap level 1 page tables.  When a new user level 1 page
195  *	table is allocated, all kernel_lev1map PTEs for kernel
196  *	addresses are copied to the new map.
197  *
198  *	The kernel also has an initial set of kernel level 2 page
199  *	table pages.  These map the kernel level 3 page table pages.
200  *	As kernel level 3 page table pages are added, more level 2
201  *	page table pages may be added to map them.  These pages are
202  *	never freed.
203  *
204  *	Finally, the kernel also has an initial set of kernel level
205  *	3 page table pages.  These map pages in K1SEG.  More level
206  *	3 page table pages may be added at run-time if additional
207  *	K1SEG address space is required.  These pages are never freed.
208  *
209  * NOTE: When mappings are inserted into the kernel pmap, all
210  * level 2 and level 3 page table pages must already be allocated
211  * and mapped into the parent page table.
212  */
213 pt_entry_t	*kernel_lev1map;
214 
215 /*
216  * Virtual Page Table.
217  */
218 static pt_entry_t *VPT;
219 
220 static struct {
221 	struct pmap k_pmap;
222 	struct pmap_asn_info k_asni[ALPHA_MAXPROCS];
223 } kernel_pmap_store;
224 
225 struct pmap *const kernel_pmap_ptr = &kernel_pmap_store.k_pmap;
226 
227 paddr_t    	avail_start;	/* PA of first available physical page */
228 paddr_t		avail_end;	/* PA of last available physical page */
229 static vaddr_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
230 
231 static bool pmap_initialized;	/* Has pmap_init completed? */
232 
233 u_long		pmap_pages_stolen;	/* instrumentation */
234 
235 /*
236  * This variable contains the number of CPU IDs we need to allocate
237  * space for when allocating the pmap structure.  It is used to
238  * size a per-CPU array of ASN and ASN Generation number.
239  */
240 static u_long 	pmap_ncpuids;
241 
242 #ifndef PMAP_PV_LOWAT
243 #define	PMAP_PV_LOWAT	16
244 #endif
245 int		pmap_pv_lowat = PMAP_PV_LOWAT;
246 
247 /*
248  * List of all pmaps, used to update them when e.g. additional kernel
249  * page tables are allocated.  This list is kept LRU-ordered by
250  * pmap_activate().
251  */
252 static TAILQ_HEAD(, pmap) pmap_all_pmaps;
253 
254 /*
255  * The pools from which pmap structures and sub-structures are allocated.
256  */
257 static struct pool_cache pmap_pmap_cache;
258 static struct pool_cache pmap_l1pt_cache;
259 static struct pool_cache pmap_pv_cache;
260 
261 /*
262  * Address Space Numbers.
263  *
264  * On many implementations of the Alpha architecture, the TLB entries and
265  * I-cache blocks are tagged with a unique number within an implementation-
266  * specified range.  When a process context becomes active, the ASN is used
267  * to match TLB entries; if a TLB entry for a particular VA does not match
268  * the current ASN, it is ignored (one could think of the processor as
269  * having a collection of <max ASN> separate TLBs).  This allows operating
270  * system software to skip the TLB flush that would otherwise be necessary
271  * at context switch time.
272  *
273  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
274  * causes TLB entries to match any ASN.  The PALcode also provides
275  * a TBI (Translation Buffer Invalidate) operation that flushes all
276  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
277  * mappings, so that invalidation of all user mappings does not invalidate
278  * kernel mappings (which are consistent across all processes).
279  *
280  * pmap_next_asn always indicates to the next ASN to use.  When
281  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
282  *
283  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
284  * TLB entries and the I-cache are flushed, the generation number is bumped,
285  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
286  *
287  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
288  * prevents the following scenario:
289  *
290  *	* New ASN generation starts, and process A is given ASN #0.
291  *
292  *	* A new process B (and thus new pmap) is created.  The ASN,
293  *	  for lack of a better value, is initialized to 0.
294  *
295  *	* Process B runs.  It is now using the TLB entries tagged
296  *	  by process A.  *poof*
297  *
298  * In the scenario above, in addition to the processor using using incorrect
299  * TLB entires, the PALcode might use incorrect information to service a
300  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
301  * to locate the PTE for a faulting address, and tagged TLB entires exist
302  * for the Virtual Page Table addresses in order to speed up this procedure,
303  * as well.)
304  *
305  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
306  * new pmaps will initially run with no TLB entries for user addresses
307  * or VPT mappings that map user page tables.  Since kernel_lev1map only
308  * contains mappings for kernel addresses, and since those mappings
309  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
310  * safe (since PG_ASM mappings match any ASN).
311  *
312  * On processors that do not support ASNs, the PALcode invalidates
313  * the TLB and I-cache automatically on swpctx.  We still still go
314  * through the motions of assigning an ASN (really, just refreshing
315  * the ASN generation in this particular case) to keep the logic sane
316  * in other parts of the code.
317  */
318 static u_int	pmap_max_asn;		/* max ASN supported by the system */
319 					/* next ASN and cur ASN generation */
320 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
321 
322 /*
323  * Locking:
324  *
325  *	READ/WRITE LOCKS
326  *	----------------
327  *
328  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
329  *	  provide mutex access to the pmap module.  Most operations lock
330  *	  the pmap first, then PV lists as needed.  However, some operations,
331  *	  such as pmap_page_protect(), lock the PV lists before locking
332  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
333  *	  pmap module if locking in the PV->pmap direction.  This is
334  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
335  *	  if locking pmap->PV and a (exclusive) write lock if locking in
336  *	  the PV->pmap direction.  Since only one thread can hold a write
337  *	  lock at a time, this provides the mutex.
338  *
339  *	MUTEXES
340  *	-------
341  *
342  *	* pm_lock (per-pmap) - This lock protects all of the members
343  *	  of the pmap structure itself.  This lock will be asserted
344  *	  in pmap_activate() and pmap_deactivate() from a critical
345  *	  section of mi_switch(), and must never sleep.  Note that
346  *	  in the case of the kernel pmap, interrupts which cause
347  *	  memory allocation *must* be blocked while this lock is
348  *	  asserted.
349  *
350  *	* pvh_lock (global hash) - These locks protects the PV lists
351  *	  for managed pages.
352  *
353  *	* pmap_all_pmaps_lock - This lock protects the global list of
354  *	  all pmaps.  Note that a pm_lock must never be held while this
355  *	  lock is held.
356  *
357  *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
358  *	  and the virtual_end variable.
359  *
360  *	  There is a lock ordering constraint for pmap_growkernel_lock.
361  *	  pmap_growkernel() acquires the locks in the following order:
362  *
363  *		pmap_growkernel_lock (write) -> pmap_all_pmaps_lock ->
364  *		    pmap->pm_lock
365  *
366  *	  We need to ensure consistency between user pmaps and the
367  *	  kernel_lev1map.  For this reason, pmap_growkernel_lock must
368  *	  be held to prevent kernel_lev1map changing across pmaps
369  *	  being added to / removed from the global pmaps list.
370  *
371  *	Address space number management (global ASN counters and per-pmap
372  *	ASN state) are not locked; they use arrays of values indexed
373  *	per-processor.
374  *
375  *	All internal functions which operate on a pmap are called
376  *	with the pmap already locked by the caller (which will be
377  *	an interface function).
378  */
379 static krwlock_t pmap_main_lock;
380 static kmutex_t pmap_all_pmaps_lock;
381 static krwlock_t pmap_growkernel_lock;
382 
383 #define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
384 #define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
385 #define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
386 #define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
387 
388 struct {
389 	kmutex_t lock;
390 } __aligned(64) static pmap_pvh_locks[64] __aligned(64);
391 
392 static inline kmutex_t *
393 pmap_pvh_lock(struct vm_page *pg)
394 {
395 
396 	/* Cut bits 11-6 out of page address and use directly as offset. */
397 	return (kmutex_t *)((uintptr_t)&pmap_pvh_locks +
398 	    ((uintptr_t)pg & (63 << 6)));
399 }
400 
401 #if defined(MULTIPROCESSOR)
402 /*
403  * TLB Shootdown:
404  *
405  * When a mapping is changed in a pmap, the TLB entry corresponding to
406  * the virtual address must be invalidated on all processors.  In order
407  * to accomplish this on systems with multiple processors, messages are
408  * sent from the processor which performs the mapping change to all
409  * processors on which the pmap is active.  For other processors, the
410  * ASN generation numbers for that processor is invalidated, so that
411  * the next time the pmap is activated on that processor, a new ASN
412  * will be allocated (which implicitly invalidates all TLB entries).
413  *
414  * Note, we can use the pool allocator to allocate job entries
415  * since pool pages are mapped with K0SEG, not with the TLB.
416  */
417 struct pmap_tlb_shootdown_job {
418 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
419 	vaddr_t pj_va;			/* virtual address */
420 	pmap_t pj_pmap;			/* the pmap which maps the address */
421 	pt_entry_t pj_pte;		/* the PTE bits */
422 };
423 
424 static struct pmap_tlb_shootdown_q {
425 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;	/* queue 16b */
426 	kmutex_t pq_lock;		/* spin lock on queue 16b */
427 	int pq_pte;			/* aggregate PTE bits 4b */
428 	int pq_count;			/* number of pending requests 4b */
429 	int pq_tbia;			/* pending global flush 4b */
430 	uint8_t pq_pad[64-16-16-4-4-4];	/* pad to 64 bytes */
431 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE);
432 
433 /* If we have more pending jobs than this, we just nail the whole TLB. */
434 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
435 
436 static struct pool_cache pmap_tlb_shootdown_job_cache;
437 #endif /* MULTIPROCESSOR */
438 
439 /*
440  * Internal routines
441  */
442 static void	alpha_protection_init(void);
443 static bool	pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long);
444 static void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long);
445 
446 /*
447  * PT page management functions.
448  */
449 static int	pmap_lev1map_create(pmap_t, long);
450 static void	pmap_lev1map_destroy(pmap_t, long);
451 static int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
452 static void	pmap_ptpage_free(pmap_t, pt_entry_t *);
453 static void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long);
454 static void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long);
455 static void	pmap_l1pt_delref(pmap_t, pt_entry_t *, long);
456 
457 static void	*pmap_l1pt_alloc(struct pool *, int);
458 static void	pmap_l1pt_free(struct pool *, void *);
459 
460 static struct pool_allocator pmap_l1pt_allocator = {
461 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
462 };
463 
464 static int	pmap_l1pt_ctor(void *, void *, int);
465 
466 /*
467  * PV table management functions.
468  */
469 static int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
470 			      bool);
471 static void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool);
472 static void	*pmap_pv_page_alloc(struct pool *, int);
473 static void	pmap_pv_page_free(struct pool *, void *);
474 
475 static struct pool_allocator pmap_pv_page_allocator = {
476 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
477 };
478 
479 #ifdef DEBUG
480 void	pmap_pv_dump(paddr_t);
481 #endif
482 
483 #define	pmap_pv_alloc()		pool_cache_get(&pmap_pv_cache, PR_NOWAIT)
484 #define	pmap_pv_free(pv)	pool_cache_put(&pmap_pv_cache, (pv))
485 
486 /*
487  * ASN management functions.
488  */
489 static void	pmap_asn_alloc(pmap_t, long);
490 
491 /*
492  * Misc. functions.
493  */
494 static bool	pmap_physpage_alloc(int, paddr_t *);
495 static void	pmap_physpage_free(paddr_t);
496 static int	pmap_physpage_addref(void *);
497 static int	pmap_physpage_delref(void *);
498 
499 /*
500  * PMAP_ISACTIVE{,_TEST}:
501  *
502  *	Check to see if a pmap is active on the current processor.
503  */
504 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
505 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
506 
507 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
508 #define	PMAP_ISACTIVE(pm, cpu_id)					\
509 ({									\
510 	/*								\
511 	 * XXX This test is not MP-safe.				\
512 	 */								\
513 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
514 									\
515 	if ((curlwp->l_flag & LW_IDLE) != 0 &&				\
516 	    curproc->p_vmspace != NULL &&				\
517 	   ((curproc->p_sflag & PS_WEXIT) == 0) &&			\
518 	   (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
519 		panic("PMAP_ISACTIVE");					\
520 	(isactive_);							\
521 })
522 #else
523 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
524 #endif /* DEBUG && !MULTIPROCESSOR */
525 
526 /*
527  * PMAP_ACTIVATE_ASN_SANITY:
528  *
529  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
530  */
531 #ifdef DEBUG
532 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
533 do {									\
534 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
535 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
536 									\
537 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
538 		/*							\
539 		 * This pmap implementation also ensures that pmaps	\
540 		 * referencing kernel_lev1map use a reserved ASN	\
541 		 * ASN to prevent the PALcode from servicing a TLB	\
542 		 * miss	with the wrong PTE.				\
543 		 */							\
544 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
545 			printf("kernel_lev1map with non-reserved ASN "	\
546 			    "(line %d)\n", __LINE__);			\
547 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
548 		}							\
549 	} else {							\
550 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
551 			/*						\
552 			 * ASN generation number isn't valid!		\
553 			 */						\
554 			printf("pmap asngen %lu, current %lu "		\
555 			    "(line %d)\n",				\
556 			    __pma->pma_asngen,				\
557 			    __cpma->pma_asngen,				\
558 			    __LINE__);					\
559 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
560 		}							\
561 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
562 			/*						\
563 			 * DANGER WILL ROBINSON!  We're going to	\
564 			 * pollute the VPT TLB entries!			\
565 			 */						\
566 			printf("Using reserved ASN! (line %d)\n",	\
567 			    __LINE__);					\
568 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
569 		}							\
570 	}								\
571 } while (/*CONSTCOND*/0)
572 #else
573 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
574 #endif
575 
576 /*
577  * PMAP_ACTIVATE:
578  *
579  *	This is essentially the guts of pmap_activate(), without
580  *	ASN allocation.  This is used by pmap_activate(),
581  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
582  *
583  *	This is called only when it is known that a pmap is "active"
584  *	on the current processor; the ASN must already be valid.
585  */
586 #define	PMAP_ACTIVATE(pmap, l, cpu_id)					\
587 do {									\
588 	struct pcb *pcb = lwp_getpcb(l);				\
589 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
590 									\
591 	pcb->pcb_hw.apcb_ptbr =				\
592 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
593 	pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn;	\
594 									\
595 	if ((l) == curlwp) {						\
596 		/*							\
597 		 * Page table base register has changed; switch to	\
598 		 * our own context again so that it will take effect.	\
599 		 */							\
600 		(void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr);	\
601 	}								\
602 } while (/*CONSTCOND*/0)
603 
604 /*
605  * PMAP_SET_NEEDISYNC:
606  *
607  *	Mark that a user pmap needs an I-stream synch on its
608  *	way back out to userspace.
609  */
610 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
611 
612 /*
613  * PMAP_SYNC_ISTREAM:
614  *
615  *	Synchronize the I-stream for the specified pmap.  For user
616  *	pmaps, this is deferred until a process using the pmap returns
617  *	to userspace.
618  */
619 #if defined(MULTIPROCESSOR)
620 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
621 do {									\
622 	alpha_pal_imb();						\
623 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
624 } while (/*CONSTCOND*/0)
625 
626 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
627 do {									\
628 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
629 	/* for curcpu, will happen in userret() */			\
630 } while (/*CONSTCOND*/0)
631 #else
632 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
633 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* will happen in userret() */
634 #endif /* MULTIPROCESSOR */
635 
636 #define	PMAP_SYNC_ISTREAM(pmap)						\
637 do {									\
638 	if ((pmap) == pmap_kernel())					\
639 		PMAP_SYNC_ISTREAM_KERNEL();				\
640 	else								\
641 		PMAP_SYNC_ISTREAM_USER(pmap);				\
642 } while (/*CONSTCOND*/0)
643 
644 /*
645  * PMAP_INVALIDATE_ASN:
646  *
647  *	Invalidate the specified pmap's ASN, so as to force allocation
648  *	of a new one the next time pmap_asn_alloc() is called.
649  *
650  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
651  *	CONDITIONS ARE true:
652  *
653  *		(1) The pmap references the global kernel_lev1map.
654  *
655  *		(2) The pmap is not active on the current processor.
656  */
657 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
658 do {									\
659 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
660 } while (/*CONSTCOND*/0)
661 
662 /*
663  * PMAP_INVALIDATE_TLB:
664  *
665  *	Invalidate the TLB entry for the pmap/va pair.
666  */
667 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
668 do {									\
669 	if ((hadasm) || (isactive)) {					\
670 		/*							\
671 		 * Simply invalidating the TLB entry and I-cache	\
672 		 * works in this case.					\
673 		 */							\
674 		ALPHA_TBIS((va));					\
675 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
676 		   pmap_asn_info[(cpu_id)].pma_asngen) {		\
677 		/*							\
678 		 * We can't directly invalidate the TLB entry		\
679 		 * in this case, so we have to force allocation		\
680 		 * of a new ASN the next time this pmap becomes		\
681 		 * active.						\
682 		 */							\
683 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
684 	}								\
685 		/*							\
686 		 * Nothing to do in this case; the next time the	\
687 		 * pmap becomes active on this processor, a new		\
688 		 * ASN will be allocated anyway.			\
689 		 */							\
690 } while (/*CONSTCOND*/0)
691 
692 /*
693  * PMAP_KERNEL_PTE:
694  *
695  *	Get a kernel PTE.
696  *
697  *	If debugging, do a table walk.  If not debugging, just use
698  *	the Virtual Page Table, since all kernel page tables are
699  *	pre-allocated and mapped in.
700  */
701 #ifdef DEBUG
702 #define	PMAP_KERNEL_PTE(va)						\
703 ({									\
704 	pt_entry_t *l1pte_, *l2pte_;					\
705 									\
706 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
707 	if (pmap_pte_v(l1pte_) == 0) {					\
708 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
709 		    "(line %d)\n", (va), __LINE__);			\
710 		panic("PMAP_KERNEL_PTE");				\
711 	}								\
712 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
713 	if (pmap_pte_v(l2pte_) == 0) {					\
714 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
715 		    "(line %d)\n", (va), __LINE__);			\
716 		panic("PMAP_KERNEL_PTE");				\
717 	}								\
718 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
719 })
720 #else
721 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
722 #endif
723 
724 /*
725  * PMAP_SET_PTE:
726  *
727  *	Set a PTE to a specified value.
728  */
729 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
730 
731 /*
732  * PMAP_STAT_{INCR,DECR}:
733  *
734  *	Increment or decrement a pmap statistic.
735  */
736 #define	PMAP_STAT_INCR(s, v)	atomic_add_long((unsigned long *)(&(s)), (v))
737 #define	PMAP_STAT_DECR(s, v)	atomic_add_long((unsigned long *)(&(s)), -(v))
738 
739 /*
740  * pmap_bootstrap:
741  *
742  *	Bootstrap the system to run with virtual memory.
743  *
744  *	Note: no locking is necessary in this function.
745  */
746 void
747 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
748 {
749 	vsize_t lev2mapsize, lev3mapsize;
750 	pt_entry_t *lev2map, *lev3map;
751 	pt_entry_t pte;
752 	vsize_t bufsz;
753 	struct pcb *pcb;
754 	int i;
755 
756 #ifdef DEBUG
757 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
758 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
759 #endif
760 
761 	/*
762 	 * Compute the number of pages kmem_arena will have.
763 	 */
764 	kmeminit_nkmempages();
765 
766 	/*
767 	 * Figure out how many initial PTE's are necessary to map the
768 	 * kernel.  We also reserve space for kmem_alloc_pageable()
769 	 * for vm_fork().
770 	 */
771 
772 	/* Get size of buffer cache and set an upper limit */
773 	bufsz = buf_memcalc();
774 	buf_setvalimit(bufsz);
775 
776 	lev3mapsize =
777 		(VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
778 		 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE +
779 		(maxproc * UPAGES) + nkmempages;
780 
781 #ifdef SYSVSHM
782 	lev3mapsize += shminfo.shmall;
783 #endif
784 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
785 
786 	/*
787 	 * Initialize `FYI' variables.  Note we're relying on
788 	 * the fact that BSEARCH sorts the vm_physmem[] array
789 	 * for us.
790 	 */
791 	avail_start = ptoa(VM_PHYSMEM_PTR(0)->start);
792 	avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end);
793 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
794 
795 #if 0
796 	printf("avail_start = 0x%lx\n", avail_start);
797 	printf("avail_end = 0x%lx\n", avail_end);
798 	printf("virtual_end = 0x%lx\n", virtual_end);
799 #endif
800 
801 	/*
802 	 * Allocate a level 1 PTE table for the kernel.
803 	 * This is always one page long.
804 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
805 	 */
806 	kernel_lev1map = (pt_entry_t *)
807 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
808 
809 	/*
810 	 * Allocate a level 2 PTE table for the kernel.
811 	 * These must map all of the level3 PTEs.
812 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
813 	 */
814 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
815 	lev2map = (pt_entry_t *)
816 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
817 
818 	/*
819 	 * Allocate a level 3 PTE table for the kernel.
820 	 * Contains lev3mapsize PTEs.
821 	 */
822 	lev3map = (pt_entry_t *)
823 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
824 
825 	/*
826 	 * Set up level 1 page table
827 	 */
828 
829 	/* Map all of the level 2 pte pages */
830 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
831 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
832 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
833 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
834 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
835 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
836 	}
837 
838 	/* Map the virtual page table */
839 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
840 	    << PG_SHIFT;
841 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
842 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
843 	VPT = (pt_entry_t *)VPTBASE;
844 
845 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
846     {
847 	extern pt_entry_t prom_pte;			/* XXX */
848 	extern int prom_mapped;				/* XXX */
849 
850 	if (pmap_uses_prom_console()) {
851 		/*
852 		 * XXX Save old PTE so we can remap the PROM, if
853 		 * XXX necessary.
854 		 */
855 		prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;
856 	}
857 	prom_mapped = 0;
858 
859 	/*
860 	 * Actually, this code lies.  The prom is still mapped, and will
861 	 * remain so until the context switch after alpha_init() returns.
862 	 */
863     }
864 #endif
865 
866 	/*
867 	 * Set up level 2 page table.
868 	 */
869 	/* Map all of the level 3 pte pages */
870 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
871 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
872 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
873 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
874 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
875 		    (i*PAGE_SIZE*NPTEPG))] = pte;
876 	}
877 
878 	/* Initialize the pmap_growkernel_lock. */
879 	rw_init(&pmap_growkernel_lock);
880 
881 	/*
882 	 * Set up level three page table (lev3map)
883 	 */
884 	/* Nothing to do; it's already zero'd */
885 
886 	/*
887 	 * Initialize the pmap pools and list.
888 	 */
889 	pmap_ncpuids = ncpuids;
890 	pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0,
891 	    0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL);
892 	pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt",
893 	    &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL);
894 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
895 	    PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL,
896 	    NULL, NULL);
897 
898 	TAILQ_INIT(&pmap_all_pmaps);
899 
900 	/*
901 	 * Initialize the ASN logic.
902 	 */
903 	pmap_max_asn = maxasn;
904 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
905 		pmap_asn_info[i].pma_asn = 1;
906 		pmap_asn_info[i].pma_asngen = 0;
907 	}
908 
909 	/*
910 	 * Initialize the locks.
911 	 */
912 	rw_init(&pmap_main_lock);
913 	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
914 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
915 		mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE);
916 	}
917 
918 	/*
919 	 * Initialize kernel pmap.  Note that all kernel mappings
920 	 * have PG_ASM set, so the ASN doesn't really matter for
921 	 * the kernel pmap.  Also, since the kernel pmap always
922 	 * references kernel_lev1map, it always has an invalid ASN
923 	 * generation.
924 	 */
925 	memset(pmap_kernel(), 0, sizeof(struct pmap));
926 	pmap_kernel()->pm_lev1map = kernel_lev1map;
927 	pmap_kernel()->pm_count = 1;
928 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
929 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
930 		pmap_kernel()->pm_asni[i].pma_asngen =
931 		    pmap_asn_info[i].pma_asngen;
932 	}
933 	mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE);
934 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
935 
936 #if defined(MULTIPROCESSOR)
937 	/*
938 	 * Initialize the TLB shootdown queues.
939 	 */
940 	pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache,
941 	    sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE,
942 	     0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL);
943 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
944 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
945 		mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT,
946 		    IPL_SCHED);
947 	}
948 #endif
949 
950 	/*
951 	 * Set up lwp0's PCB such that the ptbr points to the right place
952 	 * and has the kernel pmap's (really unused) ASN.
953 	 */
954 	pcb = lwp_getpcb(&lwp0);
955 	pcb->pcb_hw.apcb_ptbr =
956 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
957 	pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn;
958 
959 	/*
960 	 * Mark the kernel pmap `active' on this processor.
961 	 */
962 	atomic_or_ulong(&pmap_kernel()->pm_cpus,
963 	    (1UL << cpu_number()));
964 }
965 
966 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
967 int
968 pmap_uses_prom_console(void)
969 {
970 
971 	return (cputype == ST_DEC_21000);
972 }
973 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */
974 
975 /*
976  * pmap_virtual_space:		[ INTERFACE ]
977  *
978  *	Define the initial bounds of the kernel virtual address space.
979  */
980 void
981 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
982 {
983 
984 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
985 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
986 }
987 
988 /*
989  * pmap_steal_memory:		[ INTERFACE ]
990  *
991  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
992  *	This function allows for early dynamic memory allocation until the
993  *	virtual memory system has been bootstrapped.  After that point, either
994  *	kmem_alloc or malloc should be used.  This function works by stealing
995  *	pages from the (to be) managed page pool, then implicitly mapping the
996  *	pages (by using their k0seg addresses) and zeroing them.
997  *
998  *	It may be used once the physical memory segments have been pre-loaded
999  *	into the vm_physmem[] array.  Early memory allocation MUST use this
1000  *	interface!  This cannot be used after vm_page_startup(), and will
1001  *	generate a panic if tried.
1002  *
1003  *	Note that this memory will never be freed, and in essence it is wired
1004  *	down.
1005  *
1006  *	We must adjust *vstartp and/or *vendp iff we use address space
1007  *	from the kernel virtual address range defined by pmap_virtual_space().
1008  *
1009  *	Note: no locking is necessary in this function.
1010  */
1011 vaddr_t
1012 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
1013 {
1014 	int bank, npgs, x;
1015 	vaddr_t va;
1016 	paddr_t pa;
1017 
1018 	size = round_page(size);
1019 	npgs = atop(size);
1020 
1021 #if 0
1022 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
1023 #endif
1024 
1025 	for (bank = 0; bank < vm_nphysseg; bank++) {
1026 		if (uvm.page_init_done == true)
1027 			panic("pmap_steal_memory: called _after_ bootstrap");
1028 
1029 #if 0
1030 		printf("     bank %d: avail_start 0x%lx, start 0x%lx, "
1031 		    "avail_end 0x%lx\n", bank, VM_PHYSMEM_PTR(bank)->avail_start,
1032 		    VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end);
1033 #endif
1034 
1035 		if (VM_PHYSMEM_PTR(bank)->avail_start != VM_PHYSMEM_PTR(bank)->start ||
1036 		    VM_PHYSMEM_PTR(bank)->avail_start >= VM_PHYSMEM_PTR(bank)->avail_end)
1037 			continue;
1038 
1039 #if 0
1040 		printf("             avail_end - avail_start = 0x%lx\n",
1041 		    VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start);
1042 #endif
1043 
1044 		if ((VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start)
1045 		    < npgs)
1046 			continue;
1047 
1048 		/*
1049 		 * There are enough pages here; steal them!
1050 		 */
1051 		pa = ptoa(VM_PHYSMEM_PTR(bank)->avail_start);
1052 		VM_PHYSMEM_PTR(bank)->avail_start += npgs;
1053 		VM_PHYSMEM_PTR(bank)->start += npgs;
1054 
1055 		/*
1056 		 * Have we used up this segment?
1057 		 */
1058 		if (VM_PHYSMEM_PTR(bank)->avail_start == VM_PHYSMEM_PTR(bank)->end) {
1059 			if (vm_nphysseg == 1)
1060 				panic("pmap_steal_memory: out of memory!");
1061 
1062 			/* Remove this segment from the list. */
1063 			vm_nphysseg--;
1064 			for (x = bank; x < vm_nphysseg; x++) {
1065 				/* structure copy */
1066 				VM_PHYSMEM_PTR_SWAP(x, x + 1);
1067 			}
1068 		}
1069 
1070 		va = ALPHA_PHYS_TO_K0SEG(pa);
1071 		memset((void *)va, 0, size);
1072 		pmap_pages_stolen += npgs;
1073 		return (va);
1074 	}
1075 
1076 	/*
1077 	 * If we got here, this was no memory left.
1078 	 */
1079 	panic("pmap_steal_memory: no memory to steal");
1080 }
1081 
1082 /*
1083  * pmap_init:			[ INTERFACE ]
1084  *
1085  *	Initialize the pmap module.  Called by vm_init(), to initialize any
1086  *	structures that the pmap system needs to map virtual memory.
1087  *
1088  *	Note: no locking is necessary in this function.
1089  */
1090 void
1091 pmap_init(void)
1092 {
1093 
1094 #ifdef DEBUG
1095 	if (pmapdebug & PDB_FOLLOW)
1096 	        printf("pmap_init()\n");
1097 #endif
1098 
1099 	/* initialize protection array */
1100 	alpha_protection_init();
1101 
1102 	/*
1103 	 * Set a low water mark on the pv_entry pool, so that we are
1104 	 * more likely to have these around even in extreme memory
1105 	 * starvation.
1106 	 */
1107 	pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat);
1108 
1109 	/*
1110 	 * Now it is safe to enable pv entry recording.
1111 	 */
1112 	pmap_initialized = true;
1113 
1114 #if 0
1115 	for (bank = 0; bank < vm_nphysseg; bank++) {
1116 		printf("bank %d\n", bank);
1117 		printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start));
1118 		printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end));
1119 		printf("\tavail_start = 0x%x\n",
1120 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_start));
1121 		printf("\tavail_end = 0x%x\n",
1122 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_end));
1123 	}
1124 #endif
1125 }
1126 
1127 /*
1128  * pmap_create:			[ INTERFACE ]
1129  *
1130  *	Create and return a physical map.
1131  *
1132  *	Note: no locking is necessary in this function.
1133  */
1134 pmap_t
1135 pmap_create(void)
1136 {
1137 	pmap_t pmap;
1138 	int i;
1139 
1140 #ifdef DEBUG
1141 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1142 		printf("pmap_create()\n");
1143 #endif
1144 
1145 	pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK);
1146 	memset(pmap, 0, sizeof(*pmap));
1147 
1148 	/*
1149 	 * Defer allocation of a new level 1 page table until
1150 	 * the first new mapping is entered; just take a reference
1151 	 * to the kernel kernel_lev1map.
1152 	 */
1153 	pmap->pm_lev1map = kernel_lev1map;
1154 
1155 	pmap->pm_count = 1;
1156 	for (i = 0; i < pmap_ncpuids; i++) {
1157 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1158 		/* XXX Locking? */
1159 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1160 	}
1161 	mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
1162 
1163  try_again:
1164 	rw_enter(&pmap_growkernel_lock, RW_READER);
1165 
1166 	if (pmap_lev1map_create(pmap, cpu_number()) != 0) {
1167 		rw_exit(&pmap_growkernel_lock);
1168 		(void) kpause("pmap_create", false, hz >> 2, NULL);
1169 		goto try_again;
1170 	}
1171 
1172 	mutex_enter(&pmap_all_pmaps_lock);
1173 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1174 	mutex_exit(&pmap_all_pmaps_lock);
1175 
1176 	rw_exit(&pmap_growkernel_lock);
1177 
1178 	return (pmap);
1179 }
1180 
1181 /*
1182  * pmap_destroy:		[ INTERFACE ]
1183  *
1184  *	Drop the reference count on the specified pmap, releasing
1185  *	all resources if the reference count drops to zero.
1186  */
1187 void
1188 pmap_destroy(pmap_t pmap)
1189 {
1190 
1191 #ifdef DEBUG
1192 	if (pmapdebug & PDB_FOLLOW)
1193 		printf("pmap_destroy(%p)\n", pmap);
1194 #endif
1195 
1196 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
1197 		return;
1198 
1199 	rw_enter(&pmap_growkernel_lock, RW_READER);
1200 
1201 	/*
1202 	 * Remove it from the global list of all pmaps.
1203 	 */
1204 	mutex_enter(&pmap_all_pmaps_lock);
1205 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1206 	mutex_exit(&pmap_all_pmaps_lock);
1207 
1208 	pmap_lev1map_destroy(pmap, cpu_number());
1209 
1210 	rw_exit(&pmap_growkernel_lock);
1211 
1212 	/*
1213 	 * Since the pmap is supposed to contain no valid
1214 	 * mappings at this point, we should always see
1215 	 * kernel_lev1map here.
1216 	 */
1217 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
1218 
1219 	mutex_destroy(&pmap->pm_lock);
1220 	pool_cache_put(&pmap_pmap_cache, pmap);
1221 }
1222 
1223 /*
1224  * pmap_reference:		[ INTERFACE ]
1225  *
1226  *	Add a reference to the specified pmap.
1227  */
1228 void
1229 pmap_reference(pmap_t pmap)
1230 {
1231 
1232 #ifdef DEBUG
1233 	if (pmapdebug & PDB_FOLLOW)
1234 		printf("pmap_reference(%p)\n", pmap);
1235 #endif
1236 
1237 	atomic_inc_uint(&pmap->pm_count);
1238 }
1239 
1240 /*
1241  * pmap_remove:			[ INTERFACE ]
1242  *
1243  *	Remove the given range of addresses from the specified map.
1244  *
1245  *	It is assumed that the start and end are properly
1246  *	rounded to the page size.
1247  */
1248 void
1249 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1250 {
1251 	pt_entry_t *l1pte, *l2pte, *l3pte;
1252 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1253 	vaddr_t l1eva, l2eva, vptva;
1254 	bool needisync = false;
1255 	long cpu_id = cpu_number();
1256 
1257 #ifdef DEBUG
1258 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1259 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1260 #endif
1261 
1262 	/*
1263 	 * If this is the kernel pmap, we can use a faster method
1264 	 * for accessing the PTEs (since the PT pages are always
1265 	 * resident).
1266 	 *
1267 	 * Note that this routine should NEVER be called from an
1268 	 * interrupt context; pmap_kremove() is used for that.
1269 	 */
1270 	if (pmap == pmap_kernel()) {
1271 		PMAP_MAP_TO_HEAD_LOCK();
1272 		PMAP_LOCK(pmap);
1273 
1274 		while (sva < eva) {
1275 			l3pte = PMAP_KERNEL_PTE(sva);
1276 			if (pmap_pte_v(l3pte)) {
1277 #ifdef DIAGNOSTIC
1278 				if (uvm_pageismanaged(pmap_pte_pa(l3pte)) &&
1279 				    pmap_pte_pv(l3pte) == 0)
1280 					panic("pmap_remove: managed page "
1281 					    "without PG_PVLIST for 0x%lx",
1282 					    sva);
1283 #endif
1284 				needisync |= pmap_remove_mapping(pmap, sva,
1285 				    l3pte, true, cpu_id);
1286 			}
1287 			sva += PAGE_SIZE;
1288 		}
1289 
1290 		PMAP_UNLOCK(pmap);
1291 		PMAP_MAP_TO_HEAD_UNLOCK();
1292 
1293 		if (needisync)
1294 			PMAP_SYNC_ISTREAM_KERNEL();
1295 		return;
1296 	}
1297 
1298 #ifdef DIAGNOSTIC
1299 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1300 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1301 		    "address range", sva, eva);
1302 #endif
1303 
1304 	PMAP_MAP_TO_HEAD_LOCK();
1305 	PMAP_LOCK(pmap);
1306 
1307 	/*
1308 	 * If we're already referencing the kernel_lev1map, there
1309 	 * is no work for us to do.
1310 	 */
1311 	if (pmap->pm_lev1map == kernel_lev1map)
1312 		goto out;
1313 
1314 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1315 
1316 	/*
1317 	 * Add a reference to the L1 table to it won't get
1318 	 * removed from under us.
1319 	 */
1320 	pmap_physpage_addref(saved_l1pte);
1321 
1322 	for (; sva < eva; sva = l1eva, l1pte++) {
1323 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1324 		if (pmap_pte_v(l1pte)) {
1325 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1326 
1327 			/*
1328 			 * Add a reference to the L2 table so it won't
1329 			 * get removed from under us.
1330 			 */
1331 			pmap_physpage_addref(saved_l2pte);
1332 
1333 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1334 				l2eva =
1335 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1336 				if (pmap_pte_v(l2pte)) {
1337 					saved_l3pte = l3pte =
1338 					    pmap_l3pte(pmap, sva, l2pte);
1339 
1340 					/*
1341 					 * Add a reference to the L3 table so
1342 					 * it won't get removed from under us.
1343 					 */
1344 					pmap_physpage_addref(saved_l3pte);
1345 
1346 					/*
1347 					 * Remember this sva; if the L3 table
1348 					 * gets removed, we need to invalidate
1349 					 * the VPT TLB entry for it.
1350 					 */
1351 					vptva = sva;
1352 
1353 					for (; sva < l2eva && sva < eva;
1354 					     sva += PAGE_SIZE, l3pte++) {
1355 						if (!pmap_pte_v(l3pte)) {
1356 							continue;
1357 						}
1358 						needisync |=
1359 						    pmap_remove_mapping(
1360 							pmap, sva,
1361 							l3pte, true,
1362 							cpu_id);
1363 					}
1364 
1365 					/*
1366 					 * Remove the reference to the L3
1367 					 * table that we added above.  This
1368 					 * may free the L3 table.
1369 					 */
1370 					pmap_l3pt_delref(pmap, vptva,
1371 					    saved_l3pte, cpu_id);
1372 				}
1373 			}
1374 
1375 			/*
1376 			 * Remove the reference to the L2 table that we
1377 			 * added above.  This may free the L2 table.
1378 			 */
1379 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id);
1380 		}
1381 	}
1382 
1383 	/*
1384 	 * Remove the reference to the L1 table that we added above.
1385 	 * This may free the L1 table.
1386 	 */
1387 	pmap_l1pt_delref(pmap, saved_l1pte, cpu_id);
1388 
1389 	if (needisync)
1390 		PMAP_SYNC_ISTREAM_USER(pmap);
1391 
1392  out:
1393 	PMAP_UNLOCK(pmap);
1394 	PMAP_MAP_TO_HEAD_UNLOCK();
1395 }
1396 
1397 /*
1398  * pmap_page_protect:		[ INTERFACE ]
1399  *
1400  *	Lower the permission for all mappings to a given page to
1401  *	the permissions specified.
1402  */
1403 void
1404 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1405 {
1406 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1407 	pmap_t pmap;
1408 	pv_entry_t pv, nextpv;
1409 	bool needkisync = false;
1410 	long cpu_id = cpu_number();
1411 	kmutex_t *lock;
1412 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1413 #ifdef DEBUG
1414 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1415 
1416 
1417 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1418 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1419 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1420 #endif
1421 
1422 	switch (prot) {
1423 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
1424 	case VM_PROT_READ|VM_PROT_WRITE:
1425 		return;
1426 
1427 	/* copy_on_write */
1428 	case VM_PROT_READ|VM_PROT_EXECUTE:
1429 	case VM_PROT_READ:
1430 		PMAP_HEAD_TO_MAP_LOCK();
1431 		lock = pmap_pvh_lock(pg);
1432 		mutex_enter(lock);
1433 		for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
1434 			PMAP_LOCK(pv->pv_pmap);
1435 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1436 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1437 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1438 				    pmap_pte_asm(pv->pv_pte),
1439 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1440 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1441 				    pmap_pte_asm(pv->pv_pte));
1442 			}
1443 			PMAP_UNLOCK(pv->pv_pmap);
1444 		}
1445 		mutex_exit(lock);
1446 		PMAP_HEAD_TO_MAP_UNLOCK();
1447 		PMAP_TLB_SHOOTNOW();
1448 		return;
1449 
1450 	/* remove_all */
1451 	default:
1452 		break;
1453 	}
1454 
1455 	PMAP_HEAD_TO_MAP_LOCK();
1456 	lock = pmap_pvh_lock(pg);
1457 	mutex_enter(lock);
1458 	for (pv = md->pvh_list; pv != NULL; pv = nextpv) {
1459 		nextpv = pv->pv_next;
1460 		pmap = pv->pv_pmap;
1461 
1462 		PMAP_LOCK(pmap);
1463 #ifdef DEBUG
1464 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1465 		    pmap_pte_pa(pv->pv_pte) != pa)
1466 			panic("pmap_page_protect: bad mapping");
1467 #endif
1468 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1469 		    false, cpu_id) == true) {
1470 			if (pmap == pmap_kernel())
1471 				needkisync |= true;
1472 			else
1473 				PMAP_SYNC_ISTREAM_USER(pmap);
1474 		}
1475 		PMAP_UNLOCK(pmap);
1476 	}
1477 
1478 	if (needkisync)
1479 		PMAP_SYNC_ISTREAM_KERNEL();
1480 
1481 	mutex_exit(lock);
1482 	PMAP_HEAD_TO_MAP_UNLOCK();
1483 }
1484 
1485 /*
1486  * pmap_protect:		[ INTERFACE ]
1487  *
1488  *	Set the physical protection on the specified range of this map
1489  *	as requested.
1490  */
1491 void
1492 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1493 {
1494 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1495 	bool isactive;
1496 	bool hadasm;
1497 	vaddr_t l1eva, l2eva;
1498 	long cpu_id = cpu_number();
1499 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1500 
1501 #ifdef DEBUG
1502 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1503 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1504 		    pmap, sva, eva, prot);
1505 #endif
1506 
1507 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1508 		pmap_remove(pmap, sva, eva);
1509 		return;
1510 	}
1511 
1512 	PMAP_LOCK(pmap);
1513 
1514 	bits = pte_prot(pmap, prot);
1515 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1516 
1517 	l1pte = pmap_l1pte(pmap, sva);
1518 	for (; sva < eva; sva = l1eva, l1pte++) {
1519 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1520 		if (pmap_pte_v(l1pte)) {
1521 			l2pte = pmap_l2pte(pmap, sva, l1pte);
1522 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1523 				l2eva =
1524 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1525 				if (pmap_pte_v(l2pte)) {
1526 					l3pte = pmap_l3pte(pmap, sva, l2pte);
1527 					for (; sva < l2eva && sva < eva;
1528 					     sva += PAGE_SIZE, l3pte++) {
1529 						if (pmap_pte_v(l3pte) &&
1530 						    pmap_pte_prot_chg(l3pte,
1531 						    bits)) {
1532 							hadasm =
1533 							   (pmap_pte_asm(l3pte)
1534 							    != 0);
1535 							pmap_pte_set_prot(l3pte,
1536 							   bits);
1537 							PMAP_INVALIDATE_TLB(
1538 							   pmap, sva, hadasm,
1539 							   isactive, cpu_id);
1540 							PMAP_TLB_SHOOTDOWN(
1541 							   pmap, sva,
1542 							   hadasm ? PG_ASM : 0);
1543 						}
1544 					}
1545 				}
1546 			}
1547 		}
1548 	}
1549 
1550 	PMAP_TLB_SHOOTNOW();
1551 
1552 	if (prot & VM_PROT_EXECUTE)
1553 		PMAP_SYNC_ISTREAM(pmap);
1554 
1555 	PMAP_UNLOCK(pmap);
1556 }
1557 
1558 /*
1559  * pmap_enter:			[ INTERFACE ]
1560  *
1561  *	Insert the given physical page (p) at
1562  *	the specified virtual address (v) in the
1563  *	target physical map with the protection requested.
1564  *
1565  *	If specified, the page will be wired down, meaning
1566  *	that the related pte can not be reclaimed.
1567  *
1568  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1569  *	or lose information.  That is, this routine must actually
1570  *	insert this page into the given map NOW.
1571  */
1572 int
1573 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1574 {
1575 	struct vm_page *pg;			/* if != NULL, managed page */
1576 	pt_entry_t *pte, npte, opte;
1577 	paddr_t opa;
1578 	bool tflush = true;
1579 	bool hadasm = false;	/* XXX gcc -Wuninitialized */
1580 	bool needisync = false;
1581 	bool setisync = false;
1582 	bool isactive;
1583 	bool wired;
1584 	long cpu_id = cpu_number();
1585 	int error = 0;
1586 	kmutex_t *lock;
1587 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1588 
1589 #ifdef DEBUG
1590 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1591 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1592 		       pmap, va, pa, prot, flags);
1593 #endif
1594 	pg = PHYS_TO_VM_PAGE(pa);
1595 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1596 	wired = (flags & PMAP_WIRED) != 0;
1597 
1598 	/*
1599 	 * Determine what we need to do about the I-stream.  If
1600 	 * VM_PROT_EXECUTE is set, we mark a user pmap as needing
1601 	 * an I-sync on the way back out to userspace.  We always
1602 	 * need an immediate I-sync for the kernel pmap.
1603 	 */
1604 	if (prot & VM_PROT_EXECUTE) {
1605 		if (pmap == pmap_kernel())
1606 			needisync = true;
1607 		else {
1608 			setisync = true;
1609 			needisync = (pmap->pm_cpus != 0);
1610 		}
1611 	}
1612 
1613 	PMAP_MAP_TO_HEAD_LOCK();
1614 	PMAP_LOCK(pmap);
1615 
1616 	if (pmap == pmap_kernel()) {
1617 #ifdef DIAGNOSTIC
1618 		/*
1619 		 * Sanity check the virtual address.
1620 		 */
1621 		if (va < VM_MIN_KERNEL_ADDRESS)
1622 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1623 #endif
1624 		pte = PMAP_KERNEL_PTE(va);
1625 	} else {
1626 		pt_entry_t *l1pte, *l2pte;
1627 
1628 #ifdef DIAGNOSTIC
1629 		/*
1630 		 * Sanity check the virtual address.
1631 		 */
1632 		if (va >= VM_MAXUSER_ADDRESS)
1633 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1634 #endif
1635 
1636 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
1637 
1638 		/*
1639 		 * Check to see if the level 1 PTE is valid, and
1640 		 * allocate a new level 2 page table page if it's not.
1641 		 * A reference will be added to the level 2 table when
1642 		 * the level 3 table is created.
1643 		 */
1644 		l1pte = pmap_l1pte(pmap, va);
1645 		if (pmap_pte_v(l1pte) == 0) {
1646 			pmap_physpage_addref(l1pte);
1647 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1648 			if (error) {
1649 				pmap_l1pt_delref(pmap, l1pte, cpu_id);
1650 				if (flags & PMAP_CANFAIL)
1651 					goto out;
1652 				panic("pmap_enter: unable to create L2 PT "
1653 				    "page");
1654 			}
1655 #ifdef DEBUG
1656 			if (pmapdebug & PDB_PTPAGE)
1657 				printf("pmap_enter: new level 2 table at "
1658 				    "0x%lx\n", pmap_pte_pa(l1pte));
1659 #endif
1660 		}
1661 
1662 		/*
1663 		 * Check to see if the level 2 PTE is valid, and
1664 		 * allocate a new level 3 page table page if it's not.
1665 		 * A reference will be added to the level 3 table when
1666 		 * the mapping is validated.
1667 		 */
1668 		l2pte = pmap_l2pte(pmap, va, l1pte);
1669 		if (pmap_pte_v(l2pte) == 0) {
1670 			pmap_physpage_addref(l2pte);
1671 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1672 			if (error) {
1673 				pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
1674 				if (flags & PMAP_CANFAIL)
1675 					goto out;
1676 				panic("pmap_enter: unable to create L3 PT "
1677 				    "page");
1678 			}
1679 #ifdef DEBUG
1680 			if (pmapdebug & PDB_PTPAGE)
1681 				printf("pmap_enter: new level 3 table at "
1682 				    "0x%lx\n", pmap_pte_pa(l2pte));
1683 #endif
1684 		}
1685 
1686 		/*
1687 		 * Get the PTE that will map the page.
1688 		 */
1689 		pte = pmap_l3pte(pmap, va, l2pte);
1690 	}
1691 
1692 	/* Remember all of the old PTE; used for TBI check later. */
1693 	opte = *pte;
1694 
1695 	/*
1696 	 * Check to see if the old mapping is valid.  If not, validate the
1697 	 * new one immediately.
1698 	 */
1699 	if (pmap_pte_v(pte) == 0) {
1700 		/*
1701 		 * No need to invalidate the TLB in this case; an invalid
1702 		 * mapping won't be in the TLB, and a previously valid
1703 		 * mapping would have been flushed when it was invalidated.
1704 		 */
1705 		tflush = false;
1706 
1707 		/*
1708 		 * No need to synchronize the I-stream, either, for basically
1709 		 * the same reason.
1710 		 */
1711 		setisync = needisync = false;
1712 
1713 		if (pmap != pmap_kernel()) {
1714 			/*
1715 			 * New mappings gain a reference on the level 3
1716 			 * table.
1717 			 */
1718 			pmap_physpage_addref(pte);
1719 		}
1720 		goto validate_enterpv;
1721 	}
1722 
1723 	opa = pmap_pte_pa(pte);
1724 	hadasm = (pmap_pte_asm(pte) != 0);
1725 
1726 	if (opa == pa) {
1727 		/*
1728 		 * Mapping has not changed; must be a protection or
1729 		 * wiring change.
1730 		 */
1731 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1732 #ifdef DEBUG
1733 			if (pmapdebug & PDB_ENTER)
1734 				printf("pmap_enter: wiring change -> %d\n",
1735 				    wired);
1736 #endif
1737 			/*
1738 			 * Adjust the wiring count.
1739 			 */
1740 			if (wired)
1741 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1742 			else
1743 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1744 		}
1745 
1746 		/*
1747 		 * Set the PTE.
1748 		 */
1749 		goto validate;
1750 	}
1751 
1752 	/*
1753 	 * The mapping has changed.  We need to invalidate the
1754 	 * old mapping before creating the new one.
1755 	 */
1756 #ifdef DEBUG
1757 	if (pmapdebug & PDB_ENTER)
1758 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1759 #endif
1760 	if (pmap != pmap_kernel()) {
1761 		/*
1762 		 * Gain an extra reference on the level 3 table.
1763 		 * pmap_remove_mapping() will delete a reference,
1764 		 * and we don't want the table to be erroneously
1765 		 * freed.
1766 		 */
1767 		pmap_physpage_addref(pte);
1768 	}
1769 	needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id);
1770 
1771  validate_enterpv:
1772 	/*
1773 	 * Enter the mapping into the pv_table if appropriate.
1774 	 */
1775 	if (pg != NULL) {
1776 		error = pmap_pv_enter(pmap, pg, va, pte, true);
1777 		if (error) {
1778 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1779 			if (flags & PMAP_CANFAIL)
1780 				goto out;
1781 			panic("pmap_enter: unable to enter mapping in PV "
1782 			    "table");
1783 		}
1784 	}
1785 
1786 	/*
1787 	 * Increment counters.
1788 	 */
1789 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1790 	if (wired)
1791 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1792 
1793  validate:
1794 	/*
1795 	 * Build the new PTE.
1796 	 */
1797 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1798 	if (pg != NULL) {
1799 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1800 		int attrs;
1801 
1802 #ifdef DIAGNOSTIC
1803 		if ((flags & VM_PROT_ALL) & ~prot)
1804 			panic("pmap_enter: access type exceeds prot");
1805 #endif
1806 		lock = pmap_pvh_lock(pg);
1807 		mutex_enter(lock);
1808 		if (flags & VM_PROT_WRITE)
1809 			md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
1810 		else if (flags & VM_PROT_ALL)
1811 			md->pvh_attrs |= PGA_REFERENCED;
1812 		attrs = md->pvh_attrs;
1813 		mutex_exit(lock);
1814 
1815 		/*
1816 		 * Set up referenced/modified emulation for new mapping.
1817 		 */
1818 		if ((attrs & PGA_REFERENCED) == 0)
1819 			npte |= PG_FOR | PG_FOW | PG_FOE;
1820 		else if ((attrs & PGA_MODIFIED) == 0)
1821 			npte |= PG_FOW;
1822 
1823 		/*
1824 		 * Mapping was entered on PV list.
1825 		 */
1826 		npte |= PG_PVLIST;
1827 	}
1828 	if (wired)
1829 		npte |= PG_WIRED;
1830 #ifdef DEBUG
1831 	if (pmapdebug & PDB_ENTER)
1832 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1833 #endif
1834 
1835 	/*
1836 	 * If the PALcode portion of the new PTE is the same as the
1837 	 * old PTE, no TBI is necessary.
1838 	 */
1839 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1840 		tflush = false;
1841 
1842 	/*
1843 	 * Set the new PTE.
1844 	 */
1845 	PMAP_SET_PTE(pte, npte);
1846 
1847 	/*
1848 	 * Invalidate the TLB entry for this VA and any appropriate
1849 	 * caches.
1850 	 */
1851 	if (tflush) {
1852 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1853 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1854 		PMAP_TLB_SHOOTNOW();
1855 	}
1856 	if (setisync)
1857 		PMAP_SET_NEEDISYNC(pmap);
1858 	if (needisync)
1859 		PMAP_SYNC_ISTREAM(pmap);
1860 
1861 out:
1862 	PMAP_UNLOCK(pmap);
1863 	PMAP_MAP_TO_HEAD_UNLOCK();
1864 
1865 	return error;
1866 }
1867 
1868 /*
1869  * pmap_kenter_pa:		[ INTERFACE ]
1870  *
1871  *	Enter a va -> pa mapping into the kernel pmap without any
1872  *	physical->virtual tracking.
1873  *
1874  *	Note: no locking is necessary in this function.
1875  */
1876 void
1877 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1878 {
1879 	pt_entry_t *pte, npte;
1880 	long cpu_id = cpu_number();
1881 	bool needisync = false;
1882 	pmap_t pmap = pmap_kernel();
1883 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1884 
1885 #ifdef DEBUG
1886 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1887 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1888 		    va, pa, prot);
1889 #endif
1890 
1891 #ifdef DIAGNOSTIC
1892 	/*
1893 	 * Sanity check the virtual address.
1894 	 */
1895 	if (va < VM_MIN_KERNEL_ADDRESS)
1896 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1897 #endif
1898 
1899 	pte = PMAP_KERNEL_PTE(va);
1900 
1901 	if (pmap_pte_v(pte) == 0)
1902 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1903 	if (pmap_pte_w(pte) == 0)
1904 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1905 
1906 	if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte))
1907 		needisync = true;
1908 
1909 	/*
1910 	 * Build the new PTE.
1911 	 */
1912 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1913 	    PG_V | PG_WIRED;
1914 
1915 	/*
1916 	 * Set the new PTE.
1917 	 */
1918 	PMAP_SET_PTE(pte, npte);
1919 #if defined(MULTIPROCESSOR)
1920 	alpha_mb();		/* XXX alpha_wmb()? */
1921 #endif
1922 
1923 	/*
1924 	 * Invalidate the TLB entry for this VA and any appropriate
1925 	 * caches.
1926 	 */
1927 	PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1928 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1929 	PMAP_TLB_SHOOTNOW();
1930 
1931 	if (needisync)
1932 		PMAP_SYNC_ISTREAM_KERNEL();
1933 }
1934 
1935 /*
1936  * pmap_kremove:		[ INTERFACE ]
1937  *
1938  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1939  *	for size bytes (assumed to be page rounded).
1940  */
1941 void
1942 pmap_kremove(vaddr_t va, vsize_t size)
1943 {
1944 	pt_entry_t *pte;
1945 	bool needisync = false;
1946 	long cpu_id = cpu_number();
1947 	pmap_t pmap = pmap_kernel();
1948 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1949 
1950 #ifdef DEBUG
1951 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1952 		printf("pmap_kremove(%lx, %lx)\n",
1953 		    va, size);
1954 #endif
1955 
1956 #ifdef DIAGNOSTIC
1957 	if (va < VM_MIN_KERNEL_ADDRESS)
1958 		panic("pmap_kremove: user address");
1959 #endif
1960 
1961 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1962 		pte = PMAP_KERNEL_PTE(va);
1963 		if (pmap_pte_v(pte)) {
1964 #ifdef DIAGNOSTIC
1965 			if (pmap_pte_pv(pte))
1966 				panic("pmap_kremove: PG_PVLIST mapping for "
1967 				    "0x%lx", va);
1968 #endif
1969 			if (pmap_pte_exec(pte))
1970 				needisync = true;
1971 
1972 			/* Zap the mapping. */
1973 			PMAP_SET_PTE(pte, PG_NV);
1974 #if defined(MULTIPROCESSOR)
1975 			alpha_mb();		/* XXX alpha_wmb()? */
1976 #endif
1977 			PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1978 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1979 
1980 			/* Update stats. */
1981 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1982 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1983 		}
1984 	}
1985 
1986 	PMAP_TLB_SHOOTNOW();
1987 
1988 	if (needisync)
1989 		PMAP_SYNC_ISTREAM_KERNEL();
1990 }
1991 
1992 /*
1993  * pmap_unwire:			[ INTERFACE ]
1994  *
1995  *	Clear the wired attribute for a map/virtual-address pair.
1996  *
1997  *	The mapping must already exist in the pmap.
1998  */
1999 void
2000 pmap_unwire(pmap_t pmap, vaddr_t va)
2001 {
2002 	pt_entry_t *pte;
2003 
2004 #ifdef DEBUG
2005 	if (pmapdebug & PDB_FOLLOW)
2006 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
2007 #endif
2008 
2009 	PMAP_LOCK(pmap);
2010 
2011 	pte = pmap_l3pte(pmap, va, NULL);
2012 #ifdef DIAGNOSTIC
2013 	if (pte == NULL || pmap_pte_v(pte) == 0)
2014 		panic("pmap_unwire");
2015 #endif
2016 
2017 	/*
2018 	 * If wiring actually changed (always?) clear the wire bit and
2019 	 * update the wire count.  Note that wiring is not a hardware
2020 	 * characteristic so there is no need to invalidate the TLB.
2021 	 */
2022 	if (pmap_pte_w_chg(pte, 0)) {
2023 		pmap_pte_set_w(pte, false);
2024 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2025 	}
2026 #ifdef DIAGNOSTIC
2027 	else {
2028 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2029 		    "didn't change!\n", pmap, va);
2030 	}
2031 #endif
2032 
2033 	PMAP_UNLOCK(pmap);
2034 }
2035 
2036 /*
2037  * pmap_extract:		[ INTERFACE ]
2038  *
2039  *	Extract the physical address associated with the given
2040  *	pmap/virtual address pair.
2041  */
2042 bool
2043 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2044 {
2045 	pt_entry_t *l1pte, *l2pte, *l3pte;
2046 	paddr_t pa;
2047 
2048 #ifdef DEBUG
2049 	if (pmapdebug & PDB_FOLLOW)
2050 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
2051 #endif
2052 
2053 	/*
2054 	 * Take a faster path for the kernel pmap.  Avoids locking,
2055 	 * handles K0SEG.
2056 	 */
2057 	if (pmap == pmap_kernel()) {
2058 		pa = vtophys(va);
2059 		if (pap != NULL)
2060 			*pap = pa;
2061 #ifdef DEBUG
2062 		if (pmapdebug & PDB_FOLLOW)
2063 			printf("0x%lx (kernel vtophys)\n", pa);
2064 #endif
2065 		return (pa != 0);	/* XXX */
2066 	}
2067 
2068 	PMAP_LOCK(pmap);
2069 
2070 	l1pte = pmap_l1pte(pmap, va);
2071 	if (pmap_pte_v(l1pte) == 0)
2072 		goto out;
2073 
2074 	l2pte = pmap_l2pte(pmap, va, l1pte);
2075 	if (pmap_pte_v(l2pte) == 0)
2076 		goto out;
2077 
2078 	l3pte = pmap_l3pte(pmap, va, l2pte);
2079 	if (pmap_pte_v(l3pte) == 0)
2080 		goto out;
2081 
2082 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2083 	PMAP_UNLOCK(pmap);
2084 	if (pap != NULL)
2085 		*pap = pa;
2086 #ifdef DEBUG
2087 	if (pmapdebug & PDB_FOLLOW)
2088 		printf("0x%lx\n", pa);
2089 #endif
2090 	return (true);
2091 
2092  out:
2093 	PMAP_UNLOCK(pmap);
2094 #ifdef DEBUG
2095 	if (pmapdebug & PDB_FOLLOW)
2096 		printf("failed\n");
2097 #endif
2098 	return (false);
2099 }
2100 
2101 /*
2102  * pmap_copy:			[ INTERFACE ]
2103  *
2104  *	Copy the mapping range specified by src_addr/len
2105  *	from the source map to the range dst_addr/len
2106  *	in the destination map.
2107  *
2108  *	This routine is only advisory and need not do anything.
2109  */
2110 /* call deleted in <machine/pmap.h> */
2111 
2112 /*
2113  * pmap_update:			[ INTERFACE ]
2114  *
2115  *	Require that all active physical maps contain no
2116  *	incorrect entries NOW, by processing any deferred
2117  *	pmap operations.
2118  */
2119 /* call deleted in <machine/pmap.h> */
2120 
2121 /*
2122  * pmap_activate:		[ INTERFACE ]
2123  *
2124  *	Activate the pmap used by the specified process.  This includes
2125  *	reloading the MMU context if the current process, and marking
2126  *	the pmap in use by the processor.
2127  */
2128 void
2129 pmap_activate(struct lwp *l)
2130 {
2131 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2132 	long cpu_id = cpu_number();
2133 
2134 #ifdef DEBUG
2135 	if (pmapdebug & PDB_FOLLOW)
2136 		printf("pmap_activate(%p)\n", l);
2137 #endif
2138 
2139 	/* Mark the pmap in use by this processor. */
2140 	atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2141 
2142 	/* Allocate an ASN. */
2143 	pmap_asn_alloc(pmap, cpu_id);
2144 
2145 	PMAP_ACTIVATE(pmap, l, cpu_id);
2146 }
2147 
2148 /*
2149  * pmap_deactivate:		[ INTERFACE ]
2150  *
2151  *	Mark that the pmap used by the specified process is no longer
2152  *	in use by the processor.
2153  *
2154  *	The comment above pmap_activate() wrt. locking applies here,
2155  *	as well.  Note that we use only a single `atomic' operation,
2156  *	so no locking is necessary.
2157  */
2158 void
2159 pmap_deactivate(struct lwp *l)
2160 {
2161 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2162 
2163 #ifdef DEBUG
2164 	if (pmapdebug & PDB_FOLLOW)
2165 		printf("pmap_deactivate(%p)\n", l);
2166 #endif
2167 
2168 	/*
2169 	 * Mark the pmap no longer in use by this processor.
2170 	 */
2171 	atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number()));
2172 }
2173 
2174 /*
2175  * pmap_zero_page:		[ INTERFACE ]
2176  *
2177  *	Zero the specified (machine independent) page by mapping the page
2178  *	into virtual memory and clear its contents, one machine dependent
2179  *	page at a time.
2180  *
2181  *	Note: no locking is necessary in this function.
2182  */
2183 void
2184 pmap_zero_page(paddr_t phys)
2185 {
2186 	u_long *p0, *p1, *pend;
2187 
2188 #ifdef DEBUG
2189 	if (pmapdebug & PDB_FOLLOW)
2190 		printf("pmap_zero_page(%lx)\n", phys);
2191 #endif
2192 
2193 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2194 	p1 = NULL;
2195 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2196 
2197 	/*
2198 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2199 	 * Do only 8 back-to-back stores, and alternate registers.
2200 	 */
2201 	do {
2202 		__asm volatile(
2203 		"# BEGIN loop body\n"
2204 		"	addq	%2, (8 * 8), %1		\n"
2205 		"	stq	$31, (0 * 8)(%0)	\n"
2206 		"	stq	$31, (1 * 8)(%0)	\n"
2207 		"	stq	$31, (2 * 8)(%0)	\n"
2208 		"	stq	$31, (3 * 8)(%0)	\n"
2209 		"	stq	$31, (4 * 8)(%0)	\n"
2210 		"	stq	$31, (5 * 8)(%0)	\n"
2211 		"	stq	$31, (6 * 8)(%0)	\n"
2212 		"	stq	$31, (7 * 8)(%0)	\n"
2213 		"					\n"
2214 		"	addq	%3, (8 * 8), %0		\n"
2215 		"	stq	$31, (0 * 8)(%1)	\n"
2216 		"	stq	$31, (1 * 8)(%1)	\n"
2217 		"	stq	$31, (2 * 8)(%1)	\n"
2218 		"	stq	$31, (3 * 8)(%1)	\n"
2219 		"	stq	$31, (4 * 8)(%1)	\n"
2220 		"	stq	$31, (5 * 8)(%1)	\n"
2221 		"	stq	$31, (6 * 8)(%1)	\n"
2222 		"	stq	$31, (7 * 8)(%1)	\n"
2223 		"	# END loop body"
2224 		: "=r" (p0), "=r" (p1)
2225 		: "0" (p0), "1" (p1)
2226 		: "memory");
2227 	} while (p0 < pend);
2228 }
2229 
2230 /*
2231  * pmap_copy_page:		[ INTERFACE ]
2232  *
2233  *	Copy the specified (machine independent) page by mapping the page
2234  *	into virtual memory and using memcpy to copy the page, one machine
2235  *	dependent page at a time.
2236  *
2237  *	Note: no locking is necessary in this function.
2238  */
2239 void
2240 pmap_copy_page(paddr_t src, paddr_t dst)
2241 {
2242 	const void *s;
2243 	void *d;
2244 
2245 #ifdef DEBUG
2246 	if (pmapdebug & PDB_FOLLOW)
2247 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2248 #endif
2249 	s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
2250 	d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
2251 	memcpy(d, s, PAGE_SIZE);
2252 }
2253 
2254 /*
2255  * pmap_pageidlezero:		[ INTERFACE ]
2256  *
2257  *	Page zero'er for the idle loop.  Returns true if the
2258  *	page was zero'd, FLASE if we aborted for some reason.
2259  */
2260 bool
2261 pmap_pageidlezero(paddr_t pa)
2262 {
2263 	u_long *ptr;
2264 	int i, cnt = PAGE_SIZE / sizeof(u_long);
2265 
2266 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
2267 		if (sched_curcpu_runnable_p()) {
2268 			/*
2269 			 * An LWP has become ready.  Abort now,
2270 			 * so we don't keep it waiting while we
2271 			 * finish zeroing the page.
2272 			 */
2273 			return (false);
2274 		}
2275 		*ptr++ = 0;
2276 	}
2277 
2278 	return (true);
2279 }
2280 
2281 /*
2282  * pmap_clear_modify:		[ INTERFACE ]
2283  *
2284  *	Clear the modify bits on the specified physical page.
2285  */
2286 bool
2287 pmap_clear_modify(struct vm_page *pg)
2288 {
2289 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2290 	bool rv = false;
2291 	long cpu_id = cpu_number();
2292 	kmutex_t *lock;
2293 
2294 #ifdef DEBUG
2295 	if (pmapdebug & PDB_FOLLOW)
2296 		printf("pmap_clear_modify(%p)\n", pg);
2297 #endif
2298 
2299 	PMAP_HEAD_TO_MAP_LOCK();
2300 	lock = pmap_pvh_lock(pg);
2301 	mutex_enter(lock);
2302 
2303 	if (md->pvh_attrs & PGA_MODIFIED) {
2304 		rv = true;
2305 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2306 		md->pvh_attrs &= ~PGA_MODIFIED;
2307 	}
2308 
2309 	mutex_exit(lock);
2310 	PMAP_HEAD_TO_MAP_UNLOCK();
2311 
2312 	return (rv);
2313 }
2314 
2315 /*
2316  * pmap_clear_reference:	[ INTERFACE ]
2317  *
2318  *	Clear the reference bit on the specified physical page.
2319  */
2320 bool
2321 pmap_clear_reference(struct vm_page *pg)
2322 {
2323 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2324 	bool rv = false;
2325 	long cpu_id = cpu_number();
2326 	kmutex_t *lock;
2327 
2328 #ifdef DEBUG
2329 	if (pmapdebug & PDB_FOLLOW)
2330 		printf("pmap_clear_reference(%p)\n", pg);
2331 #endif
2332 
2333 	PMAP_HEAD_TO_MAP_LOCK();
2334 	lock = pmap_pvh_lock(pg);
2335 	mutex_enter(lock);
2336 
2337 	if (md->pvh_attrs & PGA_REFERENCED) {
2338 		rv = true;
2339 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2340 		md->pvh_attrs &= ~PGA_REFERENCED;
2341 	}
2342 
2343 	mutex_exit(lock);
2344 	PMAP_HEAD_TO_MAP_UNLOCK();
2345 
2346 	return (rv);
2347 }
2348 
2349 /*
2350  * pmap_is_referenced:		[ INTERFACE ]
2351  *
2352  *	Return whether or not the specified physical page is referenced
2353  *	by any physical maps.
2354  */
2355 /* See <machine/pmap.h> */
2356 
2357 /*
2358  * pmap_is_modified:		[ INTERFACE ]
2359  *
2360  *	Return whether or not the specified physical page is modified
2361  *	by any physical maps.
2362  */
2363 /* See <machine/pmap.h> */
2364 
2365 /*
2366  * pmap_phys_address:		[ INTERFACE ]
2367  *
2368  *	Return the physical address corresponding to the specified
2369  *	cookie.  Used by the device pager to decode a device driver's
2370  *	mmap entry point return value.
2371  *
2372  *	Note: no locking is necessary in this function.
2373  */
2374 paddr_t
2375 pmap_phys_address(paddr_t ppn)
2376 {
2377 
2378 	return (alpha_ptob(ppn));
2379 }
2380 
2381 /*
2382  * Miscellaneous support routines follow
2383  */
2384 
2385 /*
2386  * alpha_protection_init:
2387  *
2388  *	Initialize Alpha protection code array.
2389  *
2390  *	Note: no locking is necessary in this function.
2391  */
2392 static void
2393 alpha_protection_init(void)
2394 {
2395 	int prot, *kp, *up;
2396 
2397 	kp = protection_codes[0];
2398 	up = protection_codes[1];
2399 
2400 	for (prot = 0; prot < 8; prot++) {
2401 		kp[prot] = PG_ASM;
2402 		up[prot] = 0;
2403 
2404 		if (prot & VM_PROT_READ) {
2405 			kp[prot] |= PG_KRE;
2406 			up[prot] |= PG_KRE | PG_URE;
2407 		}
2408 		if (prot & VM_PROT_WRITE) {
2409 			kp[prot] |= PG_KWE;
2410 			up[prot] |= PG_KWE | PG_UWE;
2411 		}
2412 		if (prot & VM_PROT_EXECUTE) {
2413 			kp[prot] |= PG_EXEC | PG_KRE;
2414 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2415 		} else {
2416 			kp[prot] |= PG_FOE;
2417 			up[prot] |= PG_FOE;
2418 		}
2419 	}
2420 }
2421 
2422 /*
2423  * pmap_remove_mapping:
2424  *
2425  *	Invalidate a single page denoted by pmap/va.
2426  *
2427  *	If (pte != NULL), it is the already computed PTE for the page.
2428  *
2429  *	Note: locking in this function is complicated by the fact
2430  *	that we can be called when the PV list is already locked.
2431  *	(pmap_page_protect()).  In this case, the caller must be
2432  *	careful to get the next PV entry while we remove this entry
2433  *	from beneath it.  We assume that the pmap itself is already
2434  *	locked; dolock applies only to the PV list.
2435  *
2436  *	Returns true or false, indicating if an I-stream sync needs
2437  *	to be initiated (for this CPU or for other CPUs).
2438  */
2439 static bool
2440 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2441     bool dolock, long cpu_id)
2442 {
2443 	paddr_t pa;
2444 	struct vm_page *pg;		/* if != NULL, page is managed */
2445 	bool onpv;
2446 	bool hadasm;
2447 	bool isactive;
2448 	bool needisync = false;
2449 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2450 
2451 #ifdef DEBUG
2452 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2453 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2454 		       pmap, va, pte, dolock, cpu_id);
2455 #endif
2456 
2457 	/*
2458 	 * PTE not provided, compute it from pmap and va.
2459 	 */
2460 	if (pte == NULL) {
2461 		pte = pmap_l3pte(pmap, va, NULL);
2462 		if (pmap_pte_v(pte) == 0)
2463 			return (false);
2464 	}
2465 
2466 	pa = pmap_pte_pa(pte);
2467 	onpv = (pmap_pte_pv(pte) != 0);
2468 	hadasm = (pmap_pte_asm(pte) != 0);
2469 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2470 
2471 	/*
2472 	 * Determine what we need to do about the I-stream.  If
2473 	 * PG_EXEC was set, we mark a user pmap as needing an
2474 	 * I-sync on the way out to userspace.  We always need
2475 	 * an immediate I-sync for the kernel pmap.
2476 	 */
2477 	if (pmap_pte_exec(pte)) {
2478 		if (pmap == pmap_kernel())
2479 			needisync = true;
2480 		else {
2481 			PMAP_SET_NEEDISYNC(pmap);
2482 			needisync = (pmap->pm_cpus != 0);
2483 		}
2484 	}
2485 
2486 	/*
2487 	 * Update statistics
2488 	 */
2489 	if (pmap_pte_w(pte))
2490 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2491 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2492 
2493 	/*
2494 	 * Invalidate the PTE after saving the reference modify info.
2495 	 */
2496 #ifdef DEBUG
2497 	if (pmapdebug & PDB_REMOVE)
2498 		printf("remove: invalidating pte at %p\n", pte);
2499 #endif
2500 	PMAP_SET_PTE(pte, PG_NV);
2501 
2502 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2503 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2504 	PMAP_TLB_SHOOTNOW();
2505 
2506 	/*
2507 	 * If we're removing a user mapping, check to see if we
2508 	 * can free page table pages.
2509 	 */
2510 	if (pmap != pmap_kernel()) {
2511 		/*
2512 		 * Delete the reference on the level 3 table.  It will
2513 		 * delete references on the level 2 and 1 tables as
2514 		 * appropriate.
2515 		 */
2516 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2517 	}
2518 
2519 	/*
2520 	 * If the mapping wasn't entered on the PV list, we're all done.
2521 	 */
2522 	if (onpv == false)
2523 		return (needisync);
2524 
2525 	/*
2526 	 * Remove it from the PV table.
2527 	 */
2528 	pg = PHYS_TO_VM_PAGE(pa);
2529 	KASSERT(pg != NULL);
2530 	pmap_pv_remove(pmap, pg, va, dolock);
2531 
2532 	return (needisync);
2533 }
2534 
2535 /*
2536  * pmap_changebit:
2537  *
2538  *	Set or clear the specified PTE bits for all mappings on the
2539  *	specified page.
2540  *
2541  *	Note: we assume that the pv_head is already locked, and that
2542  *	the caller has acquired a PV->pmap mutex so that we can lock
2543  *	the pmaps as we encounter them.
2544  */
2545 static void
2546 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id)
2547 {
2548 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2549 	pv_entry_t pv;
2550 	pt_entry_t *pte, npte;
2551 	vaddr_t va;
2552 	bool hadasm, isactive;
2553 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2554 
2555 #ifdef DEBUG
2556 	if (pmapdebug & PDB_BITS)
2557 		printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n",
2558 		    pg, set, mask);
2559 #endif
2560 
2561 	/*
2562 	 * Loop over all current mappings setting/clearing as apropos.
2563 	 */
2564 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2565 		va = pv->pv_va;
2566 
2567 		PMAP_LOCK(pv->pv_pmap);
2568 
2569 		pte = pv->pv_pte;
2570 		npte = (*pte | set) & mask;
2571 		if (*pte != npte) {
2572 			hadasm = (pmap_pte_asm(pte) != 0);
2573 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2574 			PMAP_SET_PTE(pte, npte);
2575 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2576 			    cpu_id);
2577 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2578 			    hadasm ? PG_ASM : 0);
2579 		}
2580 		PMAP_UNLOCK(pv->pv_pmap);
2581 	}
2582 
2583 	PMAP_TLB_SHOOTNOW();
2584 }
2585 
2586 /*
2587  * pmap_emulate_reference:
2588  *
2589  *	Emulate reference and/or modified bit hits.
2590  *	Return 1 if this was an execute fault on a non-exec mapping,
2591  *	otherwise return 0.
2592  */
2593 int
2594 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
2595 {
2596 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2597 	pt_entry_t faultoff, *pte;
2598 	struct vm_page *pg;
2599 	paddr_t pa;
2600 	bool didlock = false;
2601 	bool exec = false;
2602 	long cpu_id = cpu_number();
2603 	kmutex_t *lock;
2604 
2605 #ifdef DEBUG
2606 	if (pmapdebug & PDB_FOLLOW)
2607 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2608 		    l, v, user, type);
2609 #endif
2610 
2611 	/*
2612 	 * Convert process and virtual address to physical address.
2613 	 */
2614 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2615 		if (user)
2616 			panic("pmap_emulate_reference: user ref to kernel");
2617 		/*
2618 		 * No need to lock here; kernel PT pages never go away.
2619 		 */
2620 		pte = PMAP_KERNEL_PTE(v);
2621 	} else {
2622 #ifdef DIAGNOSTIC
2623 		if (l == NULL)
2624 			panic("pmap_emulate_reference: bad proc");
2625 		if (l->l_proc->p_vmspace == NULL)
2626 			panic("pmap_emulate_reference: bad p_vmspace");
2627 #endif
2628 		PMAP_LOCK(pmap);
2629 		didlock = true;
2630 		pte = pmap_l3pte(pmap, v, NULL);
2631 		/*
2632 		 * We'll unlock below where we're done with the PTE.
2633 		 */
2634 	}
2635 	exec = pmap_pte_exec(pte);
2636 	if (!exec && type == ALPHA_MMCSR_FOE) {
2637 		if (didlock)
2638 			PMAP_UNLOCK(pmap);
2639 	       return (1);
2640 	}
2641 #ifdef DEBUG
2642 	if (pmapdebug & PDB_FOLLOW) {
2643 		printf("\tpte = %p, ", pte);
2644 		printf("*pte = 0x%lx\n", *pte);
2645 	}
2646 #endif
2647 #ifdef DEBUG				/* These checks are more expensive */
2648 	if (!pmap_pte_v(pte))
2649 		panic("pmap_emulate_reference: invalid pte");
2650 	if (type == ALPHA_MMCSR_FOW) {
2651 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
2652 			panic("pmap_emulate_reference: write but unwritable");
2653 		if (!(*pte & PG_FOW))
2654 			panic("pmap_emulate_reference: write but not FOW");
2655 	} else {
2656 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
2657 			panic("pmap_emulate_reference: !write but unreadable");
2658 		if (!(*pte & (PG_FOR | PG_FOE)))
2659 			panic("pmap_emulate_reference: !write but not FOR|FOE");
2660 	}
2661 	/* Other diagnostics? */
2662 #endif
2663 	pa = pmap_pte_pa(pte);
2664 
2665 	/*
2666 	 * We're now done with the PTE.  If it was a user pmap, unlock
2667 	 * it now.
2668 	 */
2669 	if (didlock)
2670 		PMAP_UNLOCK(pmap);
2671 
2672 #ifdef DEBUG
2673 	if (pmapdebug & PDB_FOLLOW)
2674 		printf("\tpa = 0x%lx\n", pa);
2675 #endif
2676 #ifdef DIAGNOSTIC
2677 	if (!uvm_pageismanaged(pa))
2678 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2679 		      "pa 0x%lx not managed", l, v, user, type, pa);
2680 #endif
2681 
2682 	/*
2683 	 * Twiddle the appropriate bits to reflect the reference
2684 	 * and/or modification..
2685 	 *
2686 	 * The rules:
2687 	 * 	(1) always mark page as used, and
2688 	 *	(2) if it was a write fault, mark page as modified.
2689 	 */
2690 	pg = PHYS_TO_VM_PAGE(pa);
2691 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2692 
2693 	PMAP_HEAD_TO_MAP_LOCK();
2694 	lock = pmap_pvh_lock(pg);
2695 	mutex_enter(lock);
2696 
2697 	if (type == ALPHA_MMCSR_FOW) {
2698 		md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
2699 		faultoff = PG_FOR | PG_FOW;
2700 	} else {
2701 		md->pvh_attrs |= PGA_REFERENCED;
2702 		faultoff = PG_FOR;
2703 		if (exec) {
2704 			faultoff |= PG_FOE;
2705 		}
2706 	}
2707 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2708 
2709 	mutex_exit(lock);
2710 	PMAP_HEAD_TO_MAP_UNLOCK();
2711 	return (0);
2712 }
2713 
2714 #ifdef DEBUG
2715 /*
2716  * pmap_pv_dump:
2717  *
2718  *	Dump the physical->virtual data for the specified page.
2719  */
2720 void
2721 pmap_pv_dump(paddr_t pa)
2722 {
2723 	struct vm_page *pg;
2724 	struct vm_page_md *md;
2725 	pv_entry_t pv;
2726 	kmutex_t *lock;
2727 
2728 	pg = PHYS_TO_VM_PAGE(pa);
2729 	md = VM_PAGE_TO_MD(pg);
2730 
2731 	lock = pmap_pvh_lock(pg);
2732 	mutex_enter(lock);
2733 
2734 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs);
2735 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next)
2736 		printf("     pmap %p, va 0x%lx\n",
2737 		    pv->pv_pmap, pv->pv_va);
2738 	printf("\n");
2739 
2740 	mutex_exit(lock);
2741 }
2742 #endif
2743 
2744 /*
2745  * vtophys:
2746  *
2747  *	Return the physical address corresponding to the K0SEG or
2748  *	K1SEG address provided.
2749  *
2750  *	Note: no locking is necessary in this function.
2751  */
2752 paddr_t
2753 vtophys(vaddr_t vaddr)
2754 {
2755 	pt_entry_t *pte;
2756 	paddr_t paddr = 0;
2757 
2758 	if (vaddr < ALPHA_K0SEG_BASE)
2759 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2760 	else if (vaddr <= ALPHA_K0SEG_END)
2761 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2762 	else {
2763 		pte = PMAP_KERNEL_PTE(vaddr);
2764 		if (pmap_pte_v(pte))
2765 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2766 	}
2767 
2768 #if 0
2769 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2770 #endif
2771 
2772 	return (paddr);
2773 }
2774 
2775 /******************** pv_entry management ********************/
2776 
2777 /*
2778  * pmap_pv_enter:
2779  *
2780  *	Add a physical->virtual entry to the pv_table.
2781  */
2782 static int
2783 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2784     bool dolock)
2785 {
2786 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2787 	pv_entry_t newpv;
2788 	kmutex_t *lock;
2789 
2790 	/*
2791 	 * Allocate and fill in the new pv_entry.
2792 	 */
2793 	newpv = pmap_pv_alloc();
2794 	if (newpv == NULL)
2795 		return ENOMEM;
2796 	newpv->pv_va = va;
2797 	newpv->pv_pmap = pmap;
2798 	newpv->pv_pte = pte;
2799 
2800 	if (dolock) {
2801 		lock = pmap_pvh_lock(pg);
2802 		mutex_enter(lock);
2803 	}
2804 
2805 #ifdef DEBUG
2806     {
2807 	pv_entry_t pv;
2808 	/*
2809 	 * Make sure the entry doesn't already exist.
2810 	 */
2811 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2812 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2813 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2814 			panic("pmap_pv_enter: already in pv table");
2815 		}
2816 	}
2817     }
2818 #endif
2819 
2820 	/*
2821 	 * ...and put it in the list.
2822 	 */
2823 	newpv->pv_next = md->pvh_list;
2824 	md->pvh_list = newpv;
2825 
2826 	if (dolock) {
2827 		mutex_exit(lock);
2828 	}
2829 
2830 	return 0;
2831 }
2832 
2833 /*
2834  * pmap_pv_remove:
2835  *
2836  *	Remove a physical->virtual entry from the pv_table.
2837  */
2838 static void
2839 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
2840 {
2841 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2842 	pv_entry_t pv, *pvp;
2843 	kmutex_t *lock;
2844 
2845 	if (dolock) {
2846 		lock = pmap_pvh_lock(pg);
2847 		mutex_enter(lock);
2848 	} else {
2849 		lock = NULL; /* XXX stupid gcc */
2850 	}
2851 
2852 	/*
2853 	 * Find the entry to remove.
2854 	 */
2855 	for (pvp = &md->pvh_list, pv = *pvp;
2856 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2857 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2858 			break;
2859 
2860 #ifdef DEBUG
2861 	if (pv == NULL)
2862 		panic("pmap_pv_remove: not in pv table");
2863 #endif
2864 
2865 	*pvp = pv->pv_next;
2866 
2867 	if (dolock) {
2868 		mutex_exit(lock);
2869 	}
2870 
2871 	pmap_pv_free(pv);
2872 }
2873 
2874 /*
2875  * pmap_pv_page_alloc:
2876  *
2877  *	Allocate a page for the pv_entry pool.
2878  */
2879 static void *
2880 pmap_pv_page_alloc(struct pool *pp, int flags)
2881 {
2882 	paddr_t pg;
2883 
2884 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2885 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2886 	return (NULL);
2887 }
2888 
2889 /*
2890  * pmap_pv_page_free:
2891  *
2892  *	Free a pv_entry pool page.
2893  */
2894 static void
2895 pmap_pv_page_free(struct pool *pp, void *v)
2896 {
2897 
2898 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2899 }
2900 
2901 /******************** misc. functions ********************/
2902 
2903 /*
2904  * pmap_physpage_alloc:
2905  *
2906  *	Allocate a single page from the VM system and return the
2907  *	physical address for that page.
2908  */
2909 static bool
2910 pmap_physpage_alloc(int usage, paddr_t *pap)
2911 {
2912 	struct vm_page *pg;
2913 	paddr_t pa;
2914 
2915 	/*
2916 	 * Don't ask for a zero'd page in the L1PT case -- we will
2917 	 * properly initialize it in the constructor.
2918 	 */
2919 
2920 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2921 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2922 	if (pg != NULL) {
2923 		pa = VM_PAGE_TO_PHYS(pg);
2924 #ifdef DEBUG
2925 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2926 		if (md->pvh_refcnt != 0) {
2927 			printf("pmap_physpage_alloc: page 0x%lx has "
2928 			    "%d references\n", pa, md->pvh_refcnt);
2929 			panic("pmap_physpage_alloc");
2930 		}
2931 #endif
2932 		*pap = pa;
2933 		return (true);
2934 	}
2935 	return (false);
2936 }
2937 
2938 /*
2939  * pmap_physpage_free:
2940  *
2941  *	Free the single page table page at the specified physical address.
2942  */
2943 static void
2944 pmap_physpage_free(paddr_t pa)
2945 {
2946 	struct vm_page *pg;
2947 
2948 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2949 		panic("pmap_physpage_free: bogus physical page address");
2950 
2951 #ifdef DEBUG
2952 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2953 	if (md->pvh_refcnt != 0)
2954 		panic("pmap_physpage_free: page still has references");
2955 #endif
2956 
2957 	uvm_pagefree(pg);
2958 }
2959 
2960 /*
2961  * pmap_physpage_addref:
2962  *
2963  *	Add a reference to the specified special use page.
2964  */
2965 static int
2966 pmap_physpage_addref(void *kva)
2967 {
2968 	struct vm_page *pg;
2969 	struct vm_page_md *md;
2970 	paddr_t pa;
2971 
2972 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2973 	pg = PHYS_TO_VM_PAGE(pa);
2974 	md = VM_PAGE_TO_MD(pg);
2975 
2976 	KASSERT((int)md->pvh_refcnt >= 0);
2977 
2978 	return atomic_inc_uint_nv(&md->pvh_refcnt);
2979 }
2980 
2981 /*
2982  * pmap_physpage_delref:
2983  *
2984  *	Delete a reference to the specified special use page.
2985  */
2986 static int
2987 pmap_physpage_delref(void *kva)
2988 {
2989 	struct vm_page *pg;
2990 	struct vm_page_md *md;
2991 	paddr_t pa;
2992 
2993 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2994 	pg = PHYS_TO_VM_PAGE(pa);
2995 	md = VM_PAGE_TO_MD(pg);
2996 
2997 	KASSERT((int)md->pvh_refcnt > 0);
2998 
2999 	return atomic_dec_uint_nv(&md->pvh_refcnt);
3000 }
3001 
3002 /******************** page table page management ********************/
3003 
3004 /*
3005  * pmap_growkernel:		[ INTERFACE ]
3006  *
3007  *	Grow the kernel address space.  This is a hint from the
3008  *	upper layer to pre-allocate more kernel PT pages.
3009  */
3010 vaddr_t
3011 pmap_growkernel(vaddr_t maxkvaddr)
3012 {
3013 	struct pmap *kpm = pmap_kernel(), *pm;
3014 	paddr_t ptaddr;
3015 	pt_entry_t *l1pte, *l2pte, pte;
3016 	vaddr_t va;
3017 	int l1idx;
3018 
3019 	rw_enter(&pmap_growkernel_lock, RW_WRITER);
3020 
3021 	if (maxkvaddr <= virtual_end)
3022 		goto out;		/* we are OK */
3023 
3024 	va = virtual_end;
3025 
3026 	while (va < maxkvaddr) {
3027 		/*
3028 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
3029 		 * allocate a new L2 PT page and insert it into the
3030 		 * L1 map.
3031 		 */
3032 		l1pte = pmap_l1pte(kpm, va);
3033 		if (pmap_pte_v(l1pte) == 0) {
3034 			/*
3035 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
3036 			 */
3037 			if (uvm.page_init_done == false) {
3038 				/*
3039 				 * We're growing the kernel pmap early (from
3040 				 * uvm_pageboot_alloc()).  This case must
3041 				 * be handled a little differently.
3042 				 */
3043 				ptaddr = ALPHA_K0SEG_TO_PHYS(
3044 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3045 			} else if (pmap_physpage_alloc(PGU_NORMAL,
3046 				   &ptaddr) == false)
3047 				goto die;
3048 			pte = (atop(ptaddr) << PG_SHIFT) |
3049 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3050 			*l1pte = pte;
3051 
3052 			l1idx = l1pte_index(va);
3053 
3054 			/* Update all the user pmaps. */
3055 			mutex_enter(&pmap_all_pmaps_lock);
3056 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
3057 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
3058 				/* Skip the kernel pmap. */
3059 				if (pm == pmap_kernel())
3060 					continue;
3061 
3062 				PMAP_LOCK(pm);
3063 				if (pm->pm_lev1map == kernel_lev1map) {
3064 					PMAP_UNLOCK(pm);
3065 					continue;
3066 				}
3067 				pm->pm_lev1map[l1idx] = pte;
3068 				PMAP_UNLOCK(pm);
3069 			}
3070 			mutex_exit(&pmap_all_pmaps_lock);
3071 		}
3072 
3073 		/*
3074 		 * Have an L2 PT page now, add the L3 PT page.
3075 		 */
3076 		l2pte = pmap_l2pte(kpm, va, l1pte);
3077 		KASSERT(pmap_pte_v(l2pte) == 0);
3078 		if (uvm.page_init_done == false) {
3079 			/*
3080 			 * See above.
3081 			 */
3082 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3083 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3084 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false)
3085 			goto die;
3086 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3087 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3088 		va += ALPHA_L2SEG_SIZE;
3089 	}
3090 
3091 	/* Invalidate the L1 PT cache. */
3092 	pool_cache_invalidate(&pmap_l1pt_cache);
3093 
3094 	virtual_end = va;
3095 
3096  out:
3097 	rw_exit(&pmap_growkernel_lock);
3098 
3099 	return (virtual_end);
3100 
3101  die:
3102 	panic("pmap_growkernel: out of memory");
3103 }
3104 
3105 /*
3106  * pmap_lev1map_create:
3107  *
3108  *	Create a new level 1 page table for the specified pmap.
3109  *
3110  *	Note: growkernel must already be held and the pmap either
3111  *	already locked or unreferenced globally.
3112  */
3113 static int
3114 pmap_lev1map_create(pmap_t pmap, long cpu_id)
3115 {
3116 	pt_entry_t *l1pt;
3117 
3118 	KASSERT(pmap != pmap_kernel());
3119 
3120 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
3121 	KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3122 
3123 	/* Don't sleep -- we're called with locks held. */
3124 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
3125 	if (l1pt == NULL)
3126 		return (ENOMEM);
3127 
3128 	pmap->pm_lev1map = l1pt;
3129 	return (0);
3130 }
3131 
3132 /*
3133  * pmap_lev1map_destroy:
3134  *
3135  *	Destroy the level 1 page table for the specified pmap.
3136  *
3137  *	Note: growkernel must be held and the pmap must already be
3138  *	locked or not globally referenced.
3139  */
3140 static void
3141 pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
3142 {
3143 	pt_entry_t *l1pt = pmap->pm_lev1map;
3144 
3145 	KASSERT(pmap != pmap_kernel());
3146 
3147 	/*
3148 	 * Go back to referencing the global kernel_lev1map.
3149 	 */
3150 	pmap->pm_lev1map = kernel_lev1map;
3151 
3152 	/*
3153 	 * Free the old level 1 page table page.
3154 	 */
3155 	pool_cache_put(&pmap_l1pt_cache, l1pt);
3156 }
3157 
3158 /*
3159  * pmap_l1pt_ctor:
3160  *
3161  *	Pool cache constructor for L1 PT pages.
3162  *
3163  *	Note: The growkernel lock is held across allocations
3164  *	from our pool_cache, so we don't need to acquire it
3165  *	ourselves.
3166  */
3167 static int
3168 pmap_l1pt_ctor(void *arg, void *object, int flags)
3169 {
3170 	pt_entry_t *l1pt = object, pte;
3171 	int i;
3172 
3173 	/*
3174 	 * Initialize the new level 1 table by zeroing the
3175 	 * user portion and copying the kernel mappings into
3176 	 * the kernel portion.
3177 	 */
3178 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3179 		l1pt[i] = 0;
3180 
3181 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3182 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3183 		l1pt[i] = kernel_lev1map[i];
3184 
3185 	/*
3186 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3187 	 */
3188 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3189 	    PG_V | PG_KRE | PG_KWE;
3190 	l1pt[l1pte_index(VPTBASE)] = pte;
3191 
3192 	return (0);
3193 }
3194 
3195 /*
3196  * pmap_l1pt_alloc:
3197  *
3198  *	Page alloctaor for L1 PT pages.
3199  */
3200 static void *
3201 pmap_l1pt_alloc(struct pool *pp, int flags)
3202 {
3203 	paddr_t ptpa;
3204 
3205 	/*
3206 	 * Attempt to allocate a free page.
3207 	 */
3208 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false)
3209 		return (NULL);
3210 
3211 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3212 }
3213 
3214 /*
3215  * pmap_l1pt_free:
3216  *
3217  *	Page freer for L1 PT pages.
3218  */
3219 static void
3220 pmap_l1pt_free(struct pool *pp, void *v)
3221 {
3222 
3223 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3224 }
3225 
3226 /*
3227  * pmap_ptpage_alloc:
3228  *
3229  *	Allocate a level 2 or level 3 page table page, and
3230  *	initialize the PTE that references it.
3231  *
3232  *	Note: the pmap must already be locked.
3233  */
3234 static int
3235 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3236 {
3237 	paddr_t ptpa;
3238 
3239 	/*
3240 	 * Allocate the page table page.
3241 	 */
3242 	if (pmap_physpage_alloc(usage, &ptpa) == false)
3243 		return (ENOMEM);
3244 
3245 	/*
3246 	 * Initialize the referencing PTE.
3247 	 */
3248 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3249 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3250 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3251 
3252 	return (0);
3253 }
3254 
3255 /*
3256  * pmap_ptpage_free:
3257  *
3258  *	Free the level 2 or level 3 page table page referenced
3259  *	be the provided PTE.
3260  *
3261  *	Note: the pmap must already be locked.
3262  */
3263 static void
3264 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3265 {
3266 	paddr_t ptpa;
3267 
3268 	/*
3269 	 * Extract the physical address of the page from the PTE
3270 	 * and clear the entry.
3271 	 */
3272 	ptpa = pmap_pte_pa(pte);
3273 	PMAP_SET_PTE(pte, PG_NV);
3274 
3275 #ifdef DEBUG
3276 	pmap_zero_page(ptpa);
3277 #endif
3278 	pmap_physpage_free(ptpa);
3279 }
3280 
3281 /*
3282  * pmap_l3pt_delref:
3283  *
3284  *	Delete a reference on a level 3 PT page.  If the reference drops
3285  *	to zero, free it.
3286  *
3287  *	Note: the pmap must already be locked.
3288  */
3289 static void
3290 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id)
3291 {
3292 	pt_entry_t *l1pte, *l2pte;
3293 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3294 
3295 	l1pte = pmap_l1pte(pmap, va);
3296 	l2pte = pmap_l2pte(pmap, va, l1pte);
3297 
3298 #ifdef DIAGNOSTIC
3299 	if (pmap == pmap_kernel())
3300 		panic("pmap_l3pt_delref: kernel pmap");
3301 #endif
3302 
3303 	if (pmap_physpage_delref(l3pte) == 0) {
3304 		/*
3305 		 * No more mappings; we can free the level 3 table.
3306 		 */
3307 #ifdef DEBUG
3308 		if (pmapdebug & PDB_PTPAGE)
3309 			printf("pmap_l3pt_delref: freeing level 3 table at "
3310 			    "0x%lx\n", pmap_pte_pa(l2pte));
3311 #endif
3312 		pmap_ptpage_free(pmap, l2pte);
3313 
3314 		/*
3315 		 * We've freed a level 3 table, so we must
3316 		 * invalidate the TLB entry for that PT page
3317 		 * in the Virtual Page Table VA range, because
3318 		 * otherwise the PALcode will service a TLB
3319 		 * miss using the stale VPT TLB entry it entered
3320 		 * behind our back to shortcut to the VA's PTE.
3321 		 */
3322 		PMAP_INVALIDATE_TLB(pmap,
3323 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), false,
3324 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3325 		PMAP_TLB_SHOOTDOWN(pmap,
3326 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3327 		PMAP_TLB_SHOOTNOW();
3328 
3329 		/*
3330 		 * We've freed a level 3 table, so delete the reference
3331 		 * on the level 2 table.
3332 		 */
3333 		pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
3334 	}
3335 }
3336 
3337 /*
3338  * pmap_l2pt_delref:
3339  *
3340  *	Delete a reference on a level 2 PT page.  If the reference drops
3341  *	to zero, free it.
3342  *
3343  *	Note: the pmap must already be locked.
3344  */
3345 static void
3346 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
3347     long cpu_id)
3348 {
3349 
3350 #ifdef DIAGNOSTIC
3351 	if (pmap == pmap_kernel())
3352 		panic("pmap_l2pt_delref: kernel pmap");
3353 #endif
3354 
3355 	if (pmap_physpage_delref(l2pte) == 0) {
3356 		/*
3357 		 * No more mappings in this segment; we can free the
3358 		 * level 2 table.
3359 		 */
3360 #ifdef DEBUG
3361 		if (pmapdebug & PDB_PTPAGE)
3362 			printf("pmap_l2pt_delref: freeing level 2 table at "
3363 			    "0x%lx\n", pmap_pte_pa(l1pte));
3364 #endif
3365 		pmap_ptpage_free(pmap, l1pte);
3366 
3367 		/*
3368 		 * We've freed a level 2 table, so delete the reference
3369 		 * on the level 1 table.
3370 		 */
3371 		pmap_l1pt_delref(pmap, l1pte, cpu_id);
3372 	}
3373 }
3374 
3375 /*
3376  * pmap_l1pt_delref:
3377  *
3378  *	Delete a reference on a level 1 PT page.  If the reference drops
3379  *	to zero, free it.
3380  *
3381  *	Note: the pmap must already be locked.
3382  */
3383 static void
3384 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
3385 {
3386 
3387 #ifdef DIAGNOSTIC
3388 	if (pmap == pmap_kernel())
3389 		panic("pmap_l1pt_delref: kernel pmap");
3390 #endif
3391 
3392 	(void)pmap_physpage_delref(l1pte);
3393 }
3394 
3395 /******************** Address Space Number management ********************/
3396 
3397 /*
3398  * pmap_asn_alloc:
3399  *
3400  *	Allocate and assign an ASN to the specified pmap.
3401  *
3402  *	Note: the pmap must already be locked.  This may be called from
3403  *	an interprocessor interrupt, and in that case, the sender of
3404  *	the IPI has the pmap lock.
3405  */
3406 static void
3407 pmap_asn_alloc(pmap_t pmap, long cpu_id)
3408 {
3409 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3410 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3411 
3412 #ifdef DEBUG
3413 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3414 		printf("pmap_asn_alloc(%p)\n", pmap);
3415 #endif
3416 
3417 	/*
3418 	 * If the pmap is still using the global kernel_lev1map, there
3419 	 * is no need to assign an ASN at this time, because only
3420 	 * kernel mappings exist in that map, and all kernel mappings
3421 	 * have PG_ASM set.  If the pmap eventually gets its own
3422 	 * lev1map, an ASN will be allocated at that time.
3423 	 *
3424 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
3425 	 * same old fixups, but note that we no longer need the pmap
3426 	 * to be locked if we're in this mode, since pm_lev1map will
3427 	 * never change.
3428 	 * #endif
3429 	 */
3430 	if (pmap->pm_lev1map == kernel_lev1map) {
3431 #ifdef DEBUG
3432 		if (pmapdebug & PDB_ASN)
3433 			printf("pmap_asn_alloc: still references "
3434 			    "kernel_lev1map\n");
3435 #endif
3436 #if defined(MULTIPROCESSOR)
3437 		/*
3438 		 * In a multiprocessor system, it's possible to
3439 		 * get here without having PMAP_ASN_RESERVED in
3440 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3441 		 *
3442 		 * So, what we do here, is simply assign the reserved
3443 		 * ASN for kernel_lev1map users and let things
3444 		 * continue on.  We do, however, let uniprocessor
3445 		 * configurations continue to make its assertion.
3446 		 */
3447 		pma->pma_asn = PMAP_ASN_RESERVED;
3448 #else
3449 		KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3450 #endif /* MULTIPROCESSOR */
3451 		return;
3452 	}
3453 
3454 	/*
3455 	 * On processors which do not implement ASNs, the swpctx PALcode
3456 	 * operation will automatically invalidate the TLB and I-cache,
3457 	 * so we don't need to do that here.
3458 	 */
3459 	if (pmap_max_asn == 0) {
3460 		/*
3461 		 * Refresh the pmap's generation number, to
3462 		 * simplify logic elsewhere.
3463 		 */
3464 		pma->pma_asngen = cpma->pma_asngen;
3465 #ifdef DEBUG
3466 		if (pmapdebug & PDB_ASN)
3467 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3468 			    pma->pma_asngen);
3469 #endif
3470 		return;
3471 	}
3472 
3473 	/*
3474 	 * Hopefully, we can continue using the one we have...
3475 	 */
3476 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3477 	    pma->pma_asngen == cpma->pma_asngen) {
3478 		/*
3479 		 * ASN is still in the current generation; keep on using it.
3480 		 */
3481 #ifdef DEBUG
3482 		if (pmapdebug & PDB_ASN)
3483 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3484 			    pma->pma_asn);
3485 #endif
3486 		return;
3487 	}
3488 
3489 	/*
3490 	 * Need to assign a new ASN.  Grab the next one, incrementing
3491 	 * the generation number if we have to.
3492 	 */
3493 	if (cpma->pma_asn > pmap_max_asn) {
3494 		/*
3495 		 * Invalidate all non-PG_ASM TLB entries and the
3496 		 * I-cache, and bump the generation number.
3497 		 */
3498 		ALPHA_TBIAP();
3499 		alpha_pal_imb();
3500 
3501 		cpma->pma_asn = 1;
3502 		cpma->pma_asngen++;
3503 #ifdef DIAGNOSTIC
3504 		if (cpma->pma_asngen == 0) {
3505 			/*
3506 			 * The generation number has wrapped.  We could
3507 			 * handle this scenario by traversing all of
3508 			 * the pmaps, and invalidating the generation
3509 			 * number on those which are not currently
3510 			 * in use by this processor.
3511 			 *
3512 			 * However... considering that we're using
3513 			 * an unsigned 64-bit integer for generation
3514 			 * numbers, on non-ASN CPUs, we won't wrap
3515 			 * for approx. 585 million years, or 75 billion
3516 			 * years on a 128-ASN CPU (assuming 1000 switch
3517 			 * operations per second).
3518 			 *
3519 			 * So, we don't bother.
3520 			 */
3521 			panic("pmap_asn_alloc: too much uptime");
3522 		}
3523 #endif
3524 #ifdef DEBUG
3525 		if (pmapdebug & PDB_ASN)
3526 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3527 			    cpma->pma_asngen);
3528 #endif
3529 	}
3530 
3531 	/*
3532 	 * Assign the new ASN and validate the generation number.
3533 	 */
3534 	pma->pma_asn = cpma->pma_asn++;
3535 	pma->pma_asngen = cpma->pma_asngen;
3536 
3537 #ifdef DEBUG
3538 	if (pmapdebug & PDB_ASN)
3539 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3540 		    pma->pma_asn, pmap);
3541 #endif
3542 
3543 	/*
3544 	 * Have a new ASN, so there's no need to sync the I-stream
3545 	 * on the way back out to userspace.
3546 	 */
3547 	atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id));
3548 }
3549 
3550 #if defined(MULTIPROCESSOR)
3551 /******************** TLB shootdown code ********************/
3552 
3553 /*
3554  * pmap_tlb_shootdown:
3555  *
3556  *	Cause the TLB entry for pmap/va to be shot down.
3557  *
3558  *	NOTE: The pmap must be locked here.
3559  */
3560 void
3561 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3562 {
3563 	struct pmap_tlb_shootdown_q *pq;
3564 	struct pmap_tlb_shootdown_job *pj;
3565 	struct cpu_info *ci, *self = curcpu();
3566 	u_long cpumask;
3567 	CPU_INFO_ITERATOR cii;
3568 
3569 	KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock));
3570 
3571 	cpumask = 0;
3572 
3573 	for (CPU_INFO_FOREACH(cii, ci)) {
3574 		if (ci == self)
3575 			continue;
3576 
3577 		/*
3578 		 * The pmap must be locked (unless its the kernel
3579 		 * pmap, in which case it is okay for it to be
3580 		 * unlocked), which prevents it from  becoming
3581 		 * active on any additional processors.  This makes
3582 		 * it safe to check for activeness.  If it's not
3583 		 * active on the processor in question, then just
3584 		 * mark it as needing a new ASN the next time it
3585 		 * does, saving the IPI.  We always have to send
3586 		 * the IPI for the kernel pmap.
3587 		 *
3588 		 * Note if it's marked active now, and it becomes
3589 		 * inactive by the time the processor receives
3590 		 * the IPI, that's okay, because it does the right
3591 		 * thing with it later.
3592 		 */
3593 		if (pmap != pmap_kernel() &&
3594 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3595 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3596 			continue;
3597 		}
3598 
3599 		cpumask |= 1UL << ci->ci_cpuid;
3600 
3601 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3602 		mutex_spin_enter(&pq->pq_lock);
3603 
3604 		/*
3605 		 * Allocate a job.
3606 		 */
3607 		if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) {
3608 			pj = pool_cache_get(&pmap_tlb_shootdown_job_cache,
3609 			    PR_NOWAIT);
3610 		} else {
3611 			pj = NULL;
3612 		}
3613 
3614 		/*
3615 		 * If a global flush is already pending, we
3616 		 * don't really have to do anything else.
3617 		 */
3618 		pq->pq_pte |= pte;
3619 		if (pq->pq_tbia) {
3620 			mutex_spin_exit(&pq->pq_lock);
3621 			if (pj != NULL) {
3622 				pool_cache_put(&pmap_tlb_shootdown_job_cache,
3623 				    pj);
3624 			}
3625 			continue;
3626 		}
3627 		if (pj == NULL) {
3628 			/*
3629 			 * Couldn't allocate a job entry.  Just
3630 			 * tell the processor to kill everything.
3631 			 */
3632 			pq->pq_tbia = 1;
3633 		} else {
3634 			pj->pj_pmap = pmap;
3635 			pj->pj_va = va;
3636 			pj->pj_pte = pte;
3637 			pq->pq_count++;
3638 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3639 		}
3640 		mutex_spin_exit(&pq->pq_lock);
3641 	}
3642 
3643 	*cpumaskp |= cpumask;
3644 }
3645 
3646 /*
3647  * pmap_tlb_shootnow:
3648  *
3649  *	Process the TLB shootdowns that we have been accumulating
3650  *	for the specified processor set.
3651  */
3652 void
3653 pmap_tlb_shootnow(u_long cpumask)
3654 {
3655 
3656 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3657 }
3658 
3659 /*
3660  * pmap_do_tlb_shootdown:
3661  *
3662  *	Process pending TLB shootdown operations for this processor.
3663  */
3664 void
3665 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3666 {
3667 	u_long cpu_id = ci->ci_cpuid;
3668 	u_long cpu_mask = (1UL << cpu_id);
3669 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3670 	struct pmap_tlb_shootdown_job *pj, *next;
3671 	TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs;
3672 
3673 	TAILQ_INIT(&jobs);
3674 
3675 	mutex_spin_enter(&pq->pq_lock);
3676 	TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list);
3677 	if (pq->pq_tbia) {
3678 		if (pq->pq_pte & PG_ASM)
3679 			ALPHA_TBIA();
3680 		else
3681 			ALPHA_TBIAP();
3682 		pq->pq_tbia = 0;
3683 		pq->pq_pte = 0;
3684 	} else {
3685 		TAILQ_FOREACH(pj, &jobs, pj_list) {
3686 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3687 			    pj->pj_pte & PG_ASM,
3688 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3689 		}
3690 		pq->pq_pte = 0;
3691 	}
3692 	pq->pq_count = 0;
3693 	mutex_spin_exit(&pq->pq_lock);
3694 
3695 	/* Free jobs back to the cache. */
3696 	for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) {
3697 		next = TAILQ_NEXT(pj, pj_list);
3698 		pool_cache_put(&pmap_tlb_shootdown_job_cache, pj);
3699 	}
3700 }
3701 #endif /* MULTIPROCESSOR */
3702