xref: /netbsd-src/sys/arch/alpha/alpha/pmap.c (revision 4817a0b0b8fe9612e8ebe21a9bf2d97b95038a97)
1 /* $NetBSD: pmap.c,v 1.255 2010/11/12 07:59:25 uebayasi Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and by Chris G. Demetriou.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the Systems Programming Group of the University of Utah Computer
39  * Science Department.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
66  */
67 
68 /*
69  * DEC Alpha physical map management code.
70  *
71  * History:
72  *
73  *	This pmap started life as a Motorola 68851/68030 pmap,
74  *	written by Mike Hibler at the University of Utah.
75  *
76  *	It was modified for the DEC Alpha by Chris Demetriou
77  *	at Carnegie Mellon University.
78  *
79  *	Support for non-contiguous physical memory was added by
80  *	Jason R. Thorpe of the Numerical Aerospace Simulation
81  *	Facility, NASA Ames Research Center and Chris Demetriou.
82  *
83  *	Page table management and a major cleanup were undertaken
84  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
85  *	Avalon Computer Systems and from Chris Demetriou.
86  *
87  *	Support for the new UVM pmap interface was written by
88  *	Jason R. Thorpe.
89  *
90  *	Support for ASNs was written by Jason R. Thorpe, again
91  *	with help from Chris Demetriou and Ross Harvey.
92  *
93  *	The locking protocol was written by Jason R. Thorpe,
94  *	using Chuck Cranor's i386 pmap for UVM as a model.
95  *
96  *	TLB shootdown code was written by Jason R. Thorpe.
97  *
98  *	Multiprocessor modifications by Andrew Doran.
99  *
100  * Notes:
101  *
102  *	All page table access is done via K0SEG.  The one exception
103  *	to this is for kernel mappings.  Since all kernel page
104  *	tables are pre-allocated, we can use the Virtual Page Table
105  *	to access PTEs that map K1SEG addresses.
106  *
107  *	Kernel page table pages are statically allocated in
108  *	pmap_bootstrap(), and are never freed.  In the future,
109  *	support for dynamically adding additional kernel page
110  *	table pages may be added.  User page table pages are
111  *	dynamically allocated and freed.
112  *
113  * Bugs/misfeatures:
114  *
115  *	- Some things could be optimized.
116  */
117 
118 /*
119  *	Manages physical address maps.
120  *
121  *	Since the information managed by this module is
122  *	also stored by the logical address mapping module,
123  *	this module may throw away valid virtual-to-physical
124  *	mappings at almost any time.  However, invalidations
125  *	of virtual-to-physical mappings must be done as
126  *	requested.
127  *
128  *	In order to cope with hardware architectures which
129  *	make virtual-to-physical map invalidates expensive,
130  *	this module may delay invalidate or reduced protection
131  *	operations until such time as they are actually
132  *	necessary.  This module is given full information as
133  *	to which processors are currently using which maps,
134  *	and to when physical maps must be made correct.
135  */
136 
137 #include "opt_lockdebug.h"
138 #include "opt_sysv.h"
139 #include "opt_multiprocessor.h"
140 
141 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
142 
143 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.255 2010/11/12 07:59:25 uebayasi Exp $");
144 
145 #include <sys/param.h>
146 #include <sys/systm.h>
147 #include <sys/kernel.h>
148 #include <sys/proc.h>
149 #include <sys/malloc.h>
150 #include <sys/pool.h>
151 #include <sys/buf.h>
152 #include <sys/shm.h>
153 #include <sys/atomic.h>
154 #include <sys/cpu.h>
155 
156 #include <uvm/uvm.h>
157 
158 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
159 #include <machine/rpb.h>
160 #endif
161 
162 #ifdef DEBUG
163 #define	PDB_FOLLOW	0x0001
164 #define	PDB_INIT	0x0002
165 #define	PDB_ENTER	0x0004
166 #define	PDB_REMOVE	0x0008
167 #define	PDB_CREATE	0x0010
168 #define	PDB_PTPAGE	0x0020
169 #define	PDB_ASN		0x0040
170 #define	PDB_BITS	0x0080
171 #define	PDB_COLLECT	0x0100
172 #define	PDB_PROTECT	0x0200
173 #define	PDB_BOOTSTRAP	0x1000
174 #define	PDB_PARANOIA	0x2000
175 #define	PDB_WIRING	0x4000
176 #define	PDB_PVDUMP	0x8000
177 
178 int debugmap = 0;
179 int pmapdebug = PDB_PARANOIA;
180 #endif
181 
182 /*
183  * Given a map and a machine independent protection code,
184  * convert to an alpha protection code.
185  */
186 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
187 static int	protection_codes[2][8];
188 
189 /*
190  * kernel_lev1map:
191  *
192  *	Kernel level 1 page table.  This maps all kernel level 2
193  *	page table pages, and is used as a template for all user
194  *	pmap level 1 page tables.  When a new user level 1 page
195  *	table is allocated, all kernel_lev1map PTEs for kernel
196  *	addresses are copied to the new map.
197  *
198  *	The kernel also has an initial set of kernel level 2 page
199  *	table pages.  These map the kernel level 3 page table pages.
200  *	As kernel level 3 page table pages are added, more level 2
201  *	page table pages may be added to map them.  These pages are
202  *	never freed.
203  *
204  *	Finally, the kernel also has an initial set of kernel level
205  *	3 page table pages.  These map pages in K1SEG.  More level
206  *	3 page table pages may be added at run-time if additional
207  *	K1SEG address space is required.  These pages are never freed.
208  *
209  * NOTE: When mappings are inserted into the kernel pmap, all
210  * level 2 and level 3 page table pages must already be allocated
211  * and mapped into the parent page table.
212  */
213 pt_entry_t	*kernel_lev1map;
214 
215 /*
216  * Virtual Page Table.
217  */
218 static pt_entry_t *VPT;
219 
220 static struct pmap	kernel_pmap_store
221 	[(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1)
222 		/ sizeof(struct pmap)];
223 struct pmap *const kernel_pmap_ptr = kernel_pmap_store;
224 
225 paddr_t    	avail_start;	/* PA of first available physical page */
226 paddr_t		avail_end;	/* PA of last available physical page */
227 static vaddr_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
228 
229 static bool pmap_initialized;	/* Has pmap_init completed? */
230 
231 u_long		pmap_pages_stolen;	/* instrumentation */
232 
233 /*
234  * This variable contains the number of CPU IDs we need to allocate
235  * space for when allocating the pmap structure.  It is used to
236  * size a per-CPU array of ASN and ASN Generation number.
237  */
238 static u_long 	pmap_ncpuids;
239 
240 #ifndef PMAP_PV_LOWAT
241 #define	PMAP_PV_LOWAT	16
242 #endif
243 int		pmap_pv_lowat = PMAP_PV_LOWAT;
244 
245 /*
246  * List of all pmaps, used to update them when e.g. additional kernel
247  * page tables are allocated.  This list is kept LRU-ordered by
248  * pmap_activate().
249  */
250 static TAILQ_HEAD(, pmap) pmap_all_pmaps;
251 
252 /*
253  * The pools from which pmap structures and sub-structures are allocated.
254  */
255 static struct pool_cache pmap_pmap_cache;
256 static struct pool_cache pmap_l1pt_cache;
257 static struct pool_cache pmap_pv_cache;
258 
259 /*
260  * Address Space Numbers.
261  *
262  * On many implementations of the Alpha architecture, the TLB entries and
263  * I-cache blocks are tagged with a unique number within an implementation-
264  * specified range.  When a process context becomes active, the ASN is used
265  * to match TLB entries; if a TLB entry for a particular VA does not match
266  * the current ASN, it is ignored (one could think of the processor as
267  * having a collection of <max ASN> separate TLBs).  This allows operating
268  * system software to skip the TLB flush that would otherwise be necessary
269  * at context switch time.
270  *
271  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
272  * causes TLB entries to match any ASN.  The PALcode also provides
273  * a TBI (Translation Buffer Invalidate) operation that flushes all
274  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
275  * mappings, so that invalidation of all user mappings does not invalidate
276  * kernel mappings (which are consistent across all processes).
277  *
278  * pmap_next_asn always indicates to the next ASN to use.  When
279  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
280  *
281  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
282  * TLB entries and the I-cache are flushed, the generation number is bumped,
283  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
284  *
285  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
286  * prevents the following scenario:
287  *
288  *	* New ASN generation starts, and process A is given ASN #0.
289  *
290  *	* A new process B (and thus new pmap) is created.  The ASN,
291  *	  for lack of a better value, is initialized to 0.
292  *
293  *	* Process B runs.  It is now using the TLB entries tagged
294  *	  by process A.  *poof*
295  *
296  * In the scenario above, in addition to the processor using using incorrect
297  * TLB entires, the PALcode might use incorrect information to service a
298  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
299  * to locate the PTE for a faulting address, and tagged TLB entires exist
300  * for the Virtual Page Table addresses in order to speed up this procedure,
301  * as well.)
302  *
303  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
304  * new pmaps will initially run with no TLB entries for user addresses
305  * or VPT mappings that map user page tables.  Since kernel_lev1map only
306  * contains mappings for kernel addresses, and since those mappings
307  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
308  * safe (since PG_ASM mappings match any ASN).
309  *
310  * On processors that do not support ASNs, the PALcode invalidates
311  * the TLB and I-cache automatically on swpctx.  We still still go
312  * through the motions of assigning an ASN (really, just refreshing
313  * the ASN generation in this particular case) to keep the logic sane
314  * in other parts of the code.
315  */
316 static u_int	pmap_max_asn;		/* max ASN supported by the system */
317 					/* next ASN and cur ASN generation */
318 static struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
319 
320 /*
321  * Locking:
322  *
323  *	READ/WRITE LOCKS
324  *	----------------
325  *
326  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
327  *	  provide mutex access to the pmap module.  Most operations lock
328  *	  the pmap first, then PV lists as needed.  However, some operations,
329  *	  such as pmap_page_protect(), lock the PV lists before locking
330  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
331  *	  pmap module if locking in the PV->pmap direction.  This is
332  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
333  *	  if locking pmap->PV and a (exclusive) write lock if locking in
334  *	  the PV->pmap direction.  Since only one thread can hold a write
335  *	  lock at a time, this provides the mutex.
336  *
337  *	MUTEXES
338  *	-------
339  *
340  *	* pm_lock (per-pmap) - This lock protects all of the members
341  *	  of the pmap structure itself.  This lock will be asserted
342  *	  in pmap_activate() and pmap_deactivate() from a critical
343  *	  section of mi_switch(), and must never sleep.  Note that
344  *	  in the case of the kernel pmap, interrupts which cause
345  *	  memory allocation *must* be blocked while this lock is
346  *	  asserted.
347  *
348  *	* pvh_lock (global hash) - These locks protects the PV lists
349  *	  for managed pages.
350  *
351  *	* pmap_all_pmaps_lock - This lock protects the global list of
352  *	  all pmaps.  Note that a pm_lock must never be held while this
353  *	  lock is held.
354  *
355  *	* pmap_growkernel_lock - This lock protects pmap_growkernel()
356  *	  and the virtual_end variable.
357  *
358  *	  There is a lock ordering constraint for pmap_growkernel_lock.
359  *	  pmap_growkernel() acquires the locks in the following order:
360  *
361  *		pmap_growkernel_lock (write) -> pmap_all_pmaps_lock ->
362  *		    pmap->pm_lock
363  *
364  *	  We need to ensure consistency between user pmaps and the
365  *	  kernel_lev1map.  For this reason, pmap_growkernel_lock must
366  *	  be held to prevent kernel_lev1map changing across pmaps
367  *	  being added to / removed from the global pmaps list.
368  *
369  *	Address space number management (global ASN counters and per-pmap
370  *	ASN state) are not locked; they use arrays of values indexed
371  *	per-processor.
372  *
373  *	All internal functions which operate on a pmap are called
374  *	with the pmap already locked by the caller (which will be
375  *	an interface function).
376  */
377 static krwlock_t pmap_main_lock;
378 static kmutex_t pmap_all_pmaps_lock;
379 static krwlock_t pmap_growkernel_lock;
380 
381 #define	PMAP_MAP_TO_HEAD_LOCK()		rw_enter(&pmap_main_lock, RW_READER)
382 #define	PMAP_MAP_TO_HEAD_UNLOCK()	rw_exit(&pmap_main_lock)
383 #define	PMAP_HEAD_TO_MAP_LOCK()		rw_enter(&pmap_main_lock, RW_WRITER)
384 #define	PMAP_HEAD_TO_MAP_UNLOCK()	rw_exit(&pmap_main_lock)
385 
386 struct {
387 	kmutex_t lock;
388 } __aligned(64) static pmap_pvh_locks[64] __aligned(64);
389 
390 static inline kmutex_t *
391 pmap_pvh_lock(struct vm_page *pg)
392 {
393 
394 	/* Cut bits 11-6 out of page address and use directly as offset. */
395 	return (kmutex_t *)((uintptr_t)&pmap_pvh_locks +
396 	    ((uintptr_t)pg & (63 << 6)));
397 }
398 
399 #if defined(MULTIPROCESSOR)
400 /*
401  * TLB Shootdown:
402  *
403  * When a mapping is changed in a pmap, the TLB entry corresponding to
404  * the virtual address must be invalidated on all processors.  In order
405  * to accomplish this on systems with multiple processors, messages are
406  * sent from the processor which performs the mapping change to all
407  * processors on which the pmap is active.  For other processors, the
408  * ASN generation numbers for that processor is invalidated, so that
409  * the next time the pmap is activated on that processor, a new ASN
410  * will be allocated (which implicitly invalidates all TLB entries).
411  *
412  * Note, we can use the pool allocator to allocate job entries
413  * since pool pages are mapped with K0SEG, not with the TLB.
414  */
415 struct pmap_tlb_shootdown_job {
416 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
417 	vaddr_t pj_va;			/* virtual address */
418 	pmap_t pj_pmap;			/* the pmap which maps the address */
419 	pt_entry_t pj_pte;		/* the PTE bits */
420 };
421 
422 static struct pmap_tlb_shootdown_q {
423 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;	/* queue 16b */
424 	kmutex_t pq_lock;		/* spin lock on queue 16b */
425 	int pq_pte;			/* aggregate PTE bits 4b */
426 	int pq_count;			/* number of pending requests 4b */
427 	int pq_tbia;			/* pending global flush 4b */
428 	uint8_t pq_pad[64-16-16-4-4-4];	/* pad to 64 bytes */
429 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS] __aligned(CACHE_LINE_SIZE);
430 
431 /* If we have more pending jobs than this, we just nail the whole TLB. */
432 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
433 
434 static struct pool_cache pmap_tlb_shootdown_job_cache;
435 #endif /* MULTIPROCESSOR */
436 
437 /*
438  * Internal routines
439  */
440 static void	alpha_protection_init(void);
441 static bool	pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *, bool, long);
442 static void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long);
443 
444 /*
445  * PT page management functions.
446  */
447 static int	pmap_lev1map_create(pmap_t, long);
448 static void	pmap_lev1map_destroy(pmap_t, long);
449 static int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
450 static void	pmap_ptpage_free(pmap_t, pt_entry_t *);
451 static void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long);
452 static void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long);
453 static void	pmap_l1pt_delref(pmap_t, pt_entry_t *, long);
454 
455 static void	*pmap_l1pt_alloc(struct pool *, int);
456 static void	pmap_l1pt_free(struct pool *, void *);
457 
458 static struct pool_allocator pmap_l1pt_allocator = {
459 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
460 };
461 
462 static int	pmap_l1pt_ctor(void *, void *, int);
463 
464 /*
465  * PV table management functions.
466  */
467 static int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
468 			      bool);
469 static void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, bool);
470 static void	*pmap_pv_page_alloc(struct pool *, int);
471 static void	pmap_pv_page_free(struct pool *, void *);
472 
473 static struct pool_allocator pmap_pv_page_allocator = {
474 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
475 };
476 
477 #ifdef DEBUG
478 void	pmap_pv_dump(paddr_t);
479 #endif
480 
481 #define	pmap_pv_alloc()		pool_cache_get(&pmap_pv_cache, PR_NOWAIT)
482 #define	pmap_pv_free(pv)	pool_cache_put(&pmap_pv_cache, (pv))
483 
484 /*
485  * ASN management functions.
486  */
487 static void	pmap_asn_alloc(pmap_t, long);
488 
489 /*
490  * Misc. functions.
491  */
492 static bool	pmap_physpage_alloc(int, paddr_t *);
493 static void	pmap_physpage_free(paddr_t);
494 static int	pmap_physpage_addref(void *);
495 static int	pmap_physpage_delref(void *);
496 
497 /*
498  * PMAP_ISACTIVE{,_TEST}:
499  *
500  *	Check to see if a pmap is active on the current processor.
501  */
502 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
503 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
504 
505 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
506 #define	PMAP_ISACTIVE(pm, cpu_id)					\
507 ({									\
508 	/*								\
509 	 * XXX This test is not MP-safe.				\
510 	 */								\
511 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
512 									\
513 	if ((curlwp->l_flag & LW_IDLE) != 0 &&				\
514 	    curproc->p_vmspace != NULL &&				\
515 	   ((curproc->p_sflag & PS_WEXIT) == 0) &&			\
516 	   (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
517 		panic("PMAP_ISACTIVE");					\
518 	(isactive_);							\
519 })
520 #else
521 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
522 #endif /* DEBUG && !MULTIPROCESSOR */
523 
524 /*
525  * PMAP_ACTIVATE_ASN_SANITY:
526  *
527  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
528  */
529 #ifdef DEBUG
530 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
531 do {									\
532 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
533 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
534 									\
535 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
536 		/*							\
537 		 * This pmap implementation also ensures that pmaps	\
538 		 * referencing kernel_lev1map use a reserved ASN	\
539 		 * ASN to prevent the PALcode from servicing a TLB	\
540 		 * miss	with the wrong PTE.				\
541 		 */							\
542 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
543 			printf("kernel_lev1map with non-reserved ASN "	\
544 			    "(line %d)\n", __LINE__);			\
545 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
546 		}							\
547 	} else {							\
548 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
549 			/*						\
550 			 * ASN generation number isn't valid!		\
551 			 */						\
552 			printf("pmap asngen %lu, current %lu "		\
553 			    "(line %d)\n",				\
554 			    __pma->pma_asngen,				\
555 			    __cpma->pma_asngen,				\
556 			    __LINE__);					\
557 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
558 		}							\
559 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
560 			/*						\
561 			 * DANGER WILL ROBINSON!  We're going to	\
562 			 * pollute the VPT TLB entries!			\
563 			 */						\
564 			printf("Using reserved ASN! (line %d)\n",	\
565 			    __LINE__);					\
566 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
567 		}							\
568 	}								\
569 } while (/*CONSTCOND*/0)
570 #else
571 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
572 #endif
573 
574 /*
575  * PMAP_ACTIVATE:
576  *
577  *	This is essentially the guts of pmap_activate(), without
578  *	ASN allocation.  This is used by pmap_activate(),
579  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
580  *
581  *	This is called only when it is known that a pmap is "active"
582  *	on the current processor; the ASN must already be valid.
583  */
584 #define	PMAP_ACTIVATE(pmap, l, cpu_id)					\
585 do {									\
586 	struct pcb *pcb = lwp_getpcb(l);				\
587 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
588 									\
589 	pcb->pcb_hw.apcb_ptbr =				\
590 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
591 	pcb->pcb_hw.apcb_asn = (pmap)->pm_asni[(cpu_id)].pma_asn;	\
592 									\
593 	if ((l) == curlwp) {						\
594 		/*							\
595 		 * Page table base register has changed; switch to	\
596 		 * our own context again so that it will take effect.	\
597 		 */							\
598 		(void) alpha_pal_swpctx((u_long)l->l_md.md_pcbpaddr);	\
599 	}								\
600 } while (/*CONSTCOND*/0)
601 
602 /*
603  * PMAP_SET_NEEDISYNC:
604  *
605  *	Mark that a user pmap needs an I-stream synch on its
606  *	way back out to userspace.
607  */
608 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
609 
610 /*
611  * PMAP_SYNC_ISTREAM:
612  *
613  *	Synchronize the I-stream for the specified pmap.  For user
614  *	pmaps, this is deferred until a process using the pmap returns
615  *	to userspace.
616  */
617 #if defined(MULTIPROCESSOR)
618 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
619 do {									\
620 	alpha_pal_imb();						\
621 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
622 } while (/*CONSTCOND*/0)
623 
624 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
625 do {									\
626 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
627 	/* for curcpu, will happen in userret() */			\
628 } while (/*CONSTCOND*/0)
629 #else
630 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
631 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* will happen in userret() */
632 #endif /* MULTIPROCESSOR */
633 
634 #define	PMAP_SYNC_ISTREAM(pmap)						\
635 do {									\
636 	if ((pmap) == pmap_kernel())					\
637 		PMAP_SYNC_ISTREAM_KERNEL();				\
638 	else								\
639 		PMAP_SYNC_ISTREAM_USER(pmap);				\
640 } while (/*CONSTCOND*/0)
641 
642 /*
643  * PMAP_INVALIDATE_ASN:
644  *
645  *	Invalidate the specified pmap's ASN, so as to force allocation
646  *	of a new one the next time pmap_asn_alloc() is called.
647  *
648  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
649  *	CONDITIONS ARE true:
650  *
651  *		(1) The pmap references the global kernel_lev1map.
652  *
653  *		(2) The pmap is not active on the current processor.
654  */
655 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
656 do {									\
657 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
658 } while (/*CONSTCOND*/0)
659 
660 /*
661  * PMAP_INVALIDATE_TLB:
662  *
663  *	Invalidate the TLB entry for the pmap/va pair.
664  */
665 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
666 do {									\
667 	if ((hadasm) || (isactive)) {					\
668 		/*							\
669 		 * Simply invalidating the TLB entry and I-cache	\
670 		 * works in this case.					\
671 		 */							\
672 		ALPHA_TBIS((va));					\
673 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
674 		   pmap_asn_info[(cpu_id)].pma_asngen) {		\
675 		/*							\
676 		 * We can't directly invalidate the TLB entry		\
677 		 * in this case, so we have to force allocation		\
678 		 * of a new ASN the next time this pmap becomes		\
679 		 * active.						\
680 		 */							\
681 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
682 	}								\
683 		/*							\
684 		 * Nothing to do in this case; the next time the	\
685 		 * pmap becomes active on this processor, a new		\
686 		 * ASN will be allocated anyway.			\
687 		 */							\
688 } while (/*CONSTCOND*/0)
689 
690 /*
691  * PMAP_KERNEL_PTE:
692  *
693  *	Get a kernel PTE.
694  *
695  *	If debugging, do a table walk.  If not debugging, just use
696  *	the Virtual Page Table, since all kernel page tables are
697  *	pre-allocated and mapped in.
698  */
699 #ifdef DEBUG
700 #define	PMAP_KERNEL_PTE(va)						\
701 ({									\
702 	pt_entry_t *l1pte_, *l2pte_;					\
703 									\
704 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
705 	if (pmap_pte_v(l1pte_) == 0) {					\
706 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
707 		    "(line %d)\n", (va), __LINE__);			\
708 		panic("PMAP_KERNEL_PTE");				\
709 	}								\
710 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
711 	if (pmap_pte_v(l2pte_) == 0) {					\
712 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
713 		    "(line %d)\n", (va), __LINE__);			\
714 		panic("PMAP_KERNEL_PTE");				\
715 	}								\
716 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
717 })
718 #else
719 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
720 #endif
721 
722 /*
723  * PMAP_SET_PTE:
724  *
725  *	Set a PTE to a specified value.
726  */
727 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
728 
729 /*
730  * PMAP_STAT_{INCR,DECR}:
731  *
732  *	Increment or decrement a pmap statistic.
733  */
734 #define	PMAP_STAT_INCR(s, v)	atomic_add_long((unsigned long *)(&(s)), (v))
735 #define	PMAP_STAT_DECR(s, v)	atomic_add_long((unsigned long *)(&(s)), -(v))
736 
737 /*
738  * pmap_bootstrap:
739  *
740  *	Bootstrap the system to run with virtual memory.
741  *
742  *	Note: no locking is necessary in this function.
743  */
744 void
745 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
746 {
747 	vsize_t lev2mapsize, lev3mapsize;
748 	pt_entry_t *lev2map, *lev3map;
749 	pt_entry_t pte;
750 	vsize_t bufsz;
751 	struct pcb *pcb;
752 	int i;
753 
754 #ifdef DEBUG
755 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
756 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
757 #endif
758 
759 	/*
760 	 * Compute the number of pages kmem_map will have.
761 	 */
762 	kmeminit_nkmempages();
763 
764 	/*
765 	 * Figure out how many initial PTE's are necessary to map the
766 	 * kernel.  We also reserve space for kmem_alloc_pageable()
767 	 * for vm_fork().
768 	 */
769 
770 	/* Get size of buffer cache and set an upper limit */
771 	bufsz = buf_memcalc();
772 	buf_setvalimit(bufsz);
773 
774 	lev3mapsize =
775 		(VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
776 		 bufsz + 16 * NCARGS + pager_map_size) / PAGE_SIZE +
777 		(maxproc * UPAGES) + nkmempages;
778 
779 #ifdef SYSVSHM
780 	lev3mapsize += shminfo.shmall;
781 #endif
782 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
783 
784 	/*
785 	 * Initialize `FYI' variables.  Note we're relying on
786 	 * the fact that BSEARCH sorts the vm_physmem[] array
787 	 * for us.
788 	 */
789 	avail_start = ptoa(VM_PHYSMEM_PTR(0)->start);
790 	avail_end = ptoa(VM_PHYSMEM_PTR(vm_nphysseg - 1)->end);
791 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
792 
793 #if 0
794 	printf("avail_start = 0x%lx\n", avail_start);
795 	printf("avail_end = 0x%lx\n", avail_end);
796 	printf("virtual_end = 0x%lx\n", virtual_end);
797 #endif
798 
799 	/*
800 	 * Allocate a level 1 PTE table for the kernel.
801 	 * This is always one page long.
802 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
803 	 */
804 	kernel_lev1map = (pt_entry_t *)
805 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
806 
807 	/*
808 	 * Allocate a level 2 PTE table for the kernel.
809 	 * These must map all of the level3 PTEs.
810 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
811 	 */
812 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
813 	lev2map = (pt_entry_t *)
814 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
815 
816 	/*
817 	 * Allocate a level 3 PTE table for the kernel.
818 	 * Contains lev3mapsize PTEs.
819 	 */
820 	lev3map = (pt_entry_t *)
821 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
822 
823 	/*
824 	 * Set up level 1 page table
825 	 */
826 
827 	/* Map all of the level 2 pte pages */
828 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
829 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
830 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
831 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
832 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
833 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
834 	}
835 
836 	/* Map the virtual page table */
837 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
838 	    << PG_SHIFT;
839 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
840 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
841 	VPT = (pt_entry_t *)VPTBASE;
842 
843 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
844     {
845 	extern pt_entry_t prom_pte;			/* XXX */
846 	extern int prom_mapped;				/* XXX */
847 
848 	if (pmap_uses_prom_console()) {
849 		/*
850 		 * XXX Save old PTE so we can remap the PROM, if
851 		 * XXX necessary.
852 		 */
853 		prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;
854 	}
855 	prom_mapped = 0;
856 
857 	/*
858 	 * Actually, this code lies.  The prom is still mapped, and will
859 	 * remain so until the context switch after alpha_init() returns.
860 	 */
861     }
862 #endif
863 
864 	/*
865 	 * Set up level 2 page table.
866 	 */
867 	/* Map all of the level 3 pte pages */
868 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
869 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
870 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
871 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
872 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
873 		    (i*PAGE_SIZE*NPTEPG))] = pte;
874 	}
875 
876 	/* Initialize the pmap_growkernel_lock. */
877 	rw_init(&pmap_growkernel_lock);
878 
879 	/*
880 	 * Set up level three page table (lev3map)
881 	 */
882 	/* Nothing to do; it's already zero'd */
883 
884 	/*
885 	 * Initialize the pmap pools and list.
886 	 */
887 	pmap_ncpuids = ncpuids;
888 	pool_cache_bootstrap(&pmap_pmap_cache, PMAP_SIZEOF(pmap_ncpuids), 0,
889 	    0, 0, "pmap", NULL, IPL_NONE, NULL, NULL, NULL);
890 	pool_cache_bootstrap(&pmap_l1pt_cache, PAGE_SIZE, 0, 0, 0, "pmapl1pt",
891 	    &pmap_l1pt_allocator, IPL_NONE, pmap_l1pt_ctor, NULL, NULL);
892 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
893 	    PR_LARGECACHE, "pmappv", &pmap_pv_page_allocator, IPL_NONE, NULL,
894 	    NULL, NULL);
895 
896 	TAILQ_INIT(&pmap_all_pmaps);
897 
898 	/*
899 	 * Initialize the ASN logic.
900 	 */
901 	pmap_max_asn = maxasn;
902 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
903 		pmap_asn_info[i].pma_asn = 1;
904 		pmap_asn_info[i].pma_asngen = 0;
905 	}
906 
907 	/*
908 	 * Initialize the locks.
909 	 */
910 	rw_init(&pmap_main_lock);
911 	mutex_init(&pmap_all_pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
912 	for (i = 0; i < __arraycount(pmap_pvh_locks); i++) {
913 		mutex_init(&pmap_pvh_locks[i].lock, MUTEX_DEFAULT, IPL_NONE);
914 	}
915 
916 	/*
917 	 * Initialize kernel pmap.  Note that all kernel mappings
918 	 * have PG_ASM set, so the ASN doesn't really matter for
919 	 * the kernel pmap.  Also, since the kernel pmap always
920 	 * references kernel_lev1map, it always has an invalid ASN
921 	 * generation.
922 	 */
923 	memset(pmap_kernel(), 0, sizeof(struct pmap));
924 	pmap_kernel()->pm_lev1map = kernel_lev1map;
925 	pmap_kernel()->pm_count = 1;
926 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
927 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
928 		pmap_kernel()->pm_asni[i].pma_asngen =
929 		    pmap_asn_info[i].pma_asngen;
930 	}
931 	mutex_init(&pmap_kernel()->pm_lock, MUTEX_DEFAULT, IPL_NONE);
932 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
933 
934 #if defined(MULTIPROCESSOR)
935 	/*
936 	 * Initialize the TLB shootdown queues.
937 	 */
938 	pool_cache_bootstrap(&pmap_tlb_shootdown_job_cache,
939 	    sizeof(struct pmap_tlb_shootdown_job), CACHE_LINE_SIZE,
940 	     0, PR_LARGECACHE, "pmaptlb", NULL, IPL_VM, NULL, NULL, NULL);
941 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
942 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
943 		mutex_init(&pmap_tlb_shootdown_q[i].pq_lock, MUTEX_DEFAULT,
944 		    IPL_SCHED);
945 	}
946 #endif
947 
948 	/*
949 	 * Set up lwp0's PCB such that the ptbr points to the right place
950 	 * and has the kernel pmap's (really unused) ASN.
951 	 */
952 	pcb = lwp_getpcb(&lwp0);
953 	pcb->pcb_hw.apcb_ptbr =
954 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
955 	pcb->pcb_hw.apcb_asn = pmap_kernel()->pm_asni[cpu_number()].pma_asn;
956 
957 	/*
958 	 * Mark the kernel pmap `active' on this processor.
959 	 */
960 	atomic_or_ulong(&pmap_kernel()->pm_cpus,
961 	    (1UL << cpu_number()));
962 }
963 
964 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
965 int
966 pmap_uses_prom_console(void)
967 {
968 
969 	return (cputype == ST_DEC_21000);
970 }
971 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */
972 
973 /*
974  * pmap_virtual_space:		[ INTERFACE ]
975  *
976  *	Define the initial bounds of the kernel virtual address space.
977  */
978 void
979 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
980 {
981 
982 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
983 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
984 }
985 
986 /*
987  * pmap_steal_memory:		[ INTERFACE ]
988  *
989  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
990  *	This function allows for early dynamic memory allocation until the
991  *	virtual memory system has been bootstrapped.  After that point, either
992  *	kmem_alloc or malloc should be used.  This function works by stealing
993  *	pages from the (to be) managed page pool, then implicitly mapping the
994  *	pages (by using their k0seg addresses) and zeroing them.
995  *
996  *	It may be used once the physical memory segments have been pre-loaded
997  *	into the vm_physmem[] array.  Early memory allocation MUST use this
998  *	interface!  This cannot be used after vm_page_startup(), and will
999  *	generate a panic if tried.
1000  *
1001  *	Note that this memory will never be freed, and in essence it is wired
1002  *	down.
1003  *
1004  *	We must adjust *vstartp and/or *vendp iff we use address space
1005  *	from the kernel virtual address range defined by pmap_virtual_space().
1006  *
1007  *	Note: no locking is necessary in this function.
1008  */
1009 vaddr_t
1010 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
1011 {
1012 	int bank, npgs, x;
1013 	vaddr_t va;
1014 	paddr_t pa;
1015 
1016 	size = round_page(size);
1017 	npgs = atop(size);
1018 
1019 #if 0
1020 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
1021 #endif
1022 
1023 	for (bank = 0; bank < vm_nphysseg; bank++) {
1024 		if (uvm.page_init_done == true)
1025 			panic("pmap_steal_memory: called _after_ bootstrap");
1026 
1027 #if 0
1028 		printf("     bank %d: avail_start 0x%lx, start 0x%lx, "
1029 		    "avail_end 0x%lx\n", bank, VM_PHYSMEM_PTR(bank)->avail_start,
1030 		    VM_PHYSMEM_PTR(bank)->start, VM_PHYSMEM_PTR(bank)->avail_end);
1031 #endif
1032 
1033 		if (VM_PHYSMEM_PTR(bank)->avail_start != VM_PHYSMEM_PTR(bank)->start ||
1034 		    VM_PHYSMEM_PTR(bank)->avail_start >= VM_PHYSMEM_PTR(bank)->avail_end)
1035 			continue;
1036 
1037 #if 0
1038 		printf("             avail_end - avail_start = 0x%lx\n",
1039 		    VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start);
1040 #endif
1041 
1042 		if ((VM_PHYSMEM_PTR(bank)->avail_end - VM_PHYSMEM_PTR(bank)->avail_start)
1043 		    < npgs)
1044 			continue;
1045 
1046 		/*
1047 		 * There are enough pages here; steal them!
1048 		 */
1049 		pa = ptoa(VM_PHYSMEM_PTR(bank)->avail_start);
1050 		VM_PHYSMEM_PTR(bank)->avail_start += npgs;
1051 		VM_PHYSMEM_PTR(bank)->start += npgs;
1052 
1053 		/*
1054 		 * Have we used up this segment?
1055 		 */
1056 		if (VM_PHYSMEM_PTR(bank)->avail_start == VM_PHYSMEM_PTR(bank)->end) {
1057 			if (vm_nphysseg == 1)
1058 				panic("pmap_steal_memory: out of memory!");
1059 
1060 			/* Remove this segment from the list. */
1061 			vm_nphysseg--;
1062 			for (x = bank; x < vm_nphysseg; x++) {
1063 				/* structure copy */
1064 				VM_PHYSMEM_PTR_SWAP(x, x + 1);
1065 			}
1066 		}
1067 
1068 		va = ALPHA_PHYS_TO_K0SEG(pa);
1069 		memset((void *)va, 0, size);
1070 		pmap_pages_stolen += npgs;
1071 		return (va);
1072 	}
1073 
1074 	/*
1075 	 * If we got here, this was no memory left.
1076 	 */
1077 	panic("pmap_steal_memory: no memory to steal");
1078 }
1079 
1080 /*
1081  * pmap_init:			[ INTERFACE ]
1082  *
1083  *	Initialize the pmap module.  Called by vm_init(), to initialize any
1084  *	structures that the pmap system needs to map virtual memory.
1085  *
1086  *	Note: no locking is necessary in this function.
1087  */
1088 void
1089 pmap_init(void)
1090 {
1091 
1092 #ifdef DEBUG
1093         if (pmapdebug & PDB_FOLLOW)
1094                 printf("pmap_init()\n");
1095 #endif
1096 
1097 	/* initialize protection array */
1098 	alpha_protection_init();
1099 
1100 	/*
1101 	 * Set a low water mark on the pv_entry pool, so that we are
1102 	 * more likely to have these around even in extreme memory
1103 	 * starvation.
1104 	 */
1105 	pool_cache_setlowat(&pmap_pv_cache, pmap_pv_lowat);
1106 
1107 	/*
1108 	 * Now it is safe to enable pv entry recording.
1109 	 */
1110 	pmap_initialized = true;
1111 
1112 #if 0
1113 	for (bank = 0; bank < vm_nphysseg; bank++) {
1114 		printf("bank %d\n", bank);
1115 		printf("\tstart = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->start));
1116 		printf("\tend = 0x%x\n", ptoa(VM_PHYSMEM_PTR(bank)->end));
1117 		printf("\tavail_start = 0x%x\n",
1118 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_start));
1119 		printf("\tavail_end = 0x%x\n",
1120 		    ptoa(VM_PHYSMEM_PTR(bank)->avail_end));
1121 	}
1122 #endif
1123 }
1124 
1125 /*
1126  * pmap_create:			[ INTERFACE ]
1127  *
1128  *	Create and return a physical map.
1129  *
1130  *	Note: no locking is necessary in this function.
1131  */
1132 pmap_t
1133 pmap_create(void)
1134 {
1135 	pmap_t pmap;
1136 	int i;
1137 
1138 #ifdef DEBUG
1139 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1140 		printf("pmap_create()\n");
1141 #endif
1142 
1143 	pmap = pool_cache_get(&pmap_pmap_cache, PR_WAITOK);
1144 	memset(pmap, 0, sizeof(*pmap));
1145 
1146 	/*
1147 	 * Defer allocation of a new level 1 page table until
1148 	 * the first new mapping is entered; just take a reference
1149 	 * to the kernel kernel_lev1map.
1150 	 */
1151 	pmap->pm_lev1map = kernel_lev1map;
1152 
1153 	pmap->pm_count = 1;
1154 	for (i = 0; i < pmap_ncpuids; i++) {
1155 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1156 		/* XXX Locking? */
1157 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1158 	}
1159 	mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
1160 
1161  try_again:
1162 	rw_enter(&pmap_growkernel_lock, RW_READER);
1163 
1164 	if (pmap_lev1map_create(pmap, cpu_number()) != 0) {
1165 		rw_exit(&pmap_growkernel_lock);
1166 		(void) kpause("pmap_create", false, hz >> 2, NULL);
1167 		goto try_again;
1168 	}
1169 
1170 	mutex_enter(&pmap_all_pmaps_lock);
1171 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1172 	mutex_exit(&pmap_all_pmaps_lock);
1173 
1174 	rw_exit(&pmap_growkernel_lock);
1175 
1176 	return (pmap);
1177 }
1178 
1179 /*
1180  * pmap_destroy:		[ INTERFACE ]
1181  *
1182  *	Drop the reference count on the specified pmap, releasing
1183  *	all resources if the reference count drops to zero.
1184  */
1185 void
1186 pmap_destroy(pmap_t pmap)
1187 {
1188 
1189 #ifdef DEBUG
1190 	if (pmapdebug & PDB_FOLLOW)
1191 		printf("pmap_destroy(%p)\n", pmap);
1192 #endif
1193 
1194 	if (atomic_dec_uint_nv(&pmap->pm_count) > 0)
1195 		return;
1196 
1197 	rw_enter(&pmap_growkernel_lock, RW_READER);
1198 
1199 	/*
1200 	 * Remove it from the global list of all pmaps.
1201 	 */
1202 	mutex_enter(&pmap_all_pmaps_lock);
1203 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1204 	mutex_exit(&pmap_all_pmaps_lock);
1205 
1206 	pmap_lev1map_destroy(pmap, cpu_number());
1207 
1208 	rw_exit(&pmap_growkernel_lock);
1209 
1210 	/*
1211 	 * Since the pmap is supposed to contain no valid
1212 	 * mappings at this point, we should always see
1213 	 * kernel_lev1map here.
1214 	 */
1215 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
1216 
1217 	mutex_destroy(&pmap->pm_lock);
1218 	pool_cache_put(&pmap_pmap_cache, pmap);
1219 }
1220 
1221 /*
1222  * pmap_reference:		[ INTERFACE ]
1223  *
1224  *	Add a reference to the specified pmap.
1225  */
1226 void
1227 pmap_reference(pmap_t pmap)
1228 {
1229 
1230 #ifdef DEBUG
1231 	if (pmapdebug & PDB_FOLLOW)
1232 		printf("pmap_reference(%p)\n", pmap);
1233 #endif
1234 
1235 	atomic_inc_uint(&pmap->pm_count);
1236 }
1237 
1238 /*
1239  * pmap_remove:			[ INTERFACE ]
1240  *
1241  *	Remove the given range of addresses from the specified map.
1242  *
1243  *	It is assumed that the start and end are properly
1244  *	rounded to the page size.
1245  */
1246 void
1247 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1248 {
1249 	pt_entry_t *l1pte, *l2pte, *l3pte;
1250 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1251 	vaddr_t l1eva, l2eva, vptva;
1252 	bool needisync = false;
1253 	long cpu_id = cpu_number();
1254 
1255 #ifdef DEBUG
1256 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1257 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1258 #endif
1259 
1260 	/*
1261 	 * If this is the kernel pmap, we can use a faster method
1262 	 * for accessing the PTEs (since the PT pages are always
1263 	 * resident).
1264 	 *
1265 	 * Note that this routine should NEVER be called from an
1266 	 * interrupt context; pmap_kremove() is used for that.
1267 	 */
1268 	if (pmap == pmap_kernel()) {
1269 		PMAP_MAP_TO_HEAD_LOCK();
1270 		PMAP_LOCK(pmap);
1271 
1272 		while (sva < eva) {
1273 			l3pte = PMAP_KERNEL_PTE(sva);
1274 			if (pmap_pte_v(l3pte)) {
1275 #ifdef DIAGNOSTIC
1276 				if (uvm_pageismanaged(pmap_pte_pa(l3pte)) &&
1277 				    pmap_pte_pv(l3pte) == 0)
1278 					panic("pmap_remove: managed page "
1279 					    "without PG_PVLIST for 0x%lx",
1280 					    sva);
1281 #endif
1282 				needisync |= pmap_remove_mapping(pmap, sva,
1283 				    l3pte, true, cpu_id);
1284 			}
1285 			sva += PAGE_SIZE;
1286 		}
1287 
1288 		PMAP_UNLOCK(pmap);
1289 		PMAP_MAP_TO_HEAD_UNLOCK();
1290 
1291 		if (needisync)
1292 			PMAP_SYNC_ISTREAM_KERNEL();
1293 		return;
1294 	}
1295 
1296 #ifdef DIAGNOSTIC
1297 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1298 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1299 		    "address range", sva, eva);
1300 #endif
1301 
1302 	PMAP_MAP_TO_HEAD_LOCK();
1303 	PMAP_LOCK(pmap);
1304 
1305 	/*
1306 	 * If we're already referencing the kernel_lev1map, there
1307 	 * is no work for us to do.
1308 	 */
1309 	if (pmap->pm_lev1map == kernel_lev1map)
1310 		goto out;
1311 
1312 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1313 
1314 	/*
1315 	 * Add a reference to the L1 table to it won't get
1316 	 * removed from under us.
1317 	 */
1318 	pmap_physpage_addref(saved_l1pte);
1319 
1320 	for (; sva < eva; sva = l1eva, l1pte++) {
1321 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1322 		if (pmap_pte_v(l1pte)) {
1323 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1324 
1325 			/*
1326 			 * Add a reference to the L2 table so it won't
1327 			 * get removed from under us.
1328 			 */
1329 			pmap_physpage_addref(saved_l2pte);
1330 
1331 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1332 				l2eva =
1333 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1334 				if (pmap_pte_v(l2pte)) {
1335 					saved_l3pte = l3pte =
1336 					    pmap_l3pte(pmap, sva, l2pte);
1337 
1338 					/*
1339 					 * Add a reference to the L3 table so
1340 					 * it won't get removed from under us.
1341 					 */
1342 					pmap_physpage_addref(saved_l3pte);
1343 
1344 					/*
1345 					 * Remember this sva; if the L3 table
1346 					 * gets removed, we need to invalidate
1347 					 * the VPT TLB entry for it.
1348 					 */
1349 					vptva = sva;
1350 
1351 					for (; sva < l2eva && sva < eva;
1352 					     sva += PAGE_SIZE, l3pte++) {
1353 						if (!pmap_pte_v(l3pte)) {
1354 							continue;
1355 						}
1356 						needisync |=
1357 						    pmap_remove_mapping(
1358 							pmap, sva,
1359 							l3pte, true,
1360 							cpu_id);
1361 					}
1362 
1363 					/*
1364 					 * Remove the reference to the L3
1365 					 * table that we added above.  This
1366 					 * may free the L3 table.
1367 					 */
1368 					pmap_l3pt_delref(pmap, vptva,
1369 					    saved_l3pte, cpu_id);
1370 				}
1371 			}
1372 
1373 			/*
1374 			 * Remove the reference to the L2 table that we
1375 			 * added above.  This may free the L2 table.
1376 			 */
1377 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id);
1378 		}
1379 	}
1380 
1381 	/*
1382 	 * Remove the reference to the L1 table that we added above.
1383 	 * This may free the L1 table.
1384 	 */
1385 	pmap_l1pt_delref(pmap, saved_l1pte, cpu_id);
1386 
1387 	if (needisync)
1388 		PMAP_SYNC_ISTREAM_USER(pmap);
1389 
1390  out:
1391 	PMAP_UNLOCK(pmap);
1392 	PMAP_MAP_TO_HEAD_UNLOCK();
1393 }
1394 
1395 /*
1396  * pmap_page_protect:		[ INTERFACE ]
1397  *
1398  *	Lower the permission for all mappings to a given page to
1399  *	the permissions specified.
1400  */
1401 void
1402 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1403 {
1404 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1405 	pmap_t pmap;
1406 	pv_entry_t pv, nextpv;
1407 	bool needkisync = false;
1408 	long cpu_id = cpu_number();
1409 	kmutex_t *lock;
1410 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1411 #ifdef DEBUG
1412 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1413 
1414 
1415 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1416 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1417 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1418 #endif
1419 
1420 	switch (prot) {
1421 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
1422 	case VM_PROT_READ|VM_PROT_WRITE:
1423 		return;
1424 
1425 	/* copy_on_write */
1426 	case VM_PROT_READ|VM_PROT_EXECUTE:
1427 	case VM_PROT_READ:
1428 		PMAP_HEAD_TO_MAP_LOCK();
1429 		lock = pmap_pvh_lock(pg);
1430 		mutex_enter(lock);
1431 		for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
1432 			PMAP_LOCK(pv->pv_pmap);
1433 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1434 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1435 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1436 				    pmap_pte_asm(pv->pv_pte),
1437 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1438 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1439 				    pmap_pte_asm(pv->pv_pte));
1440 			}
1441 			PMAP_UNLOCK(pv->pv_pmap);
1442 		}
1443 		mutex_exit(lock);
1444 		PMAP_HEAD_TO_MAP_UNLOCK();
1445 		PMAP_TLB_SHOOTNOW();
1446 		return;
1447 
1448 	/* remove_all */
1449 	default:
1450 		break;
1451 	}
1452 
1453 	PMAP_HEAD_TO_MAP_LOCK();
1454 	lock = pmap_pvh_lock(pg);
1455 	mutex_enter(lock);
1456 	for (pv = md->pvh_list; pv != NULL; pv = nextpv) {
1457 		nextpv = pv->pv_next;
1458 		pmap = pv->pv_pmap;
1459 
1460 		PMAP_LOCK(pmap);
1461 #ifdef DEBUG
1462 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1463 		    pmap_pte_pa(pv->pv_pte) != pa)
1464 			panic("pmap_page_protect: bad mapping");
1465 #endif
1466 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1467 		    false, cpu_id) == true) {
1468 			if (pmap == pmap_kernel())
1469 				needkisync |= true;
1470 			else
1471 				PMAP_SYNC_ISTREAM_USER(pmap);
1472 		}
1473 		PMAP_UNLOCK(pmap);
1474 	}
1475 
1476 	if (needkisync)
1477 		PMAP_SYNC_ISTREAM_KERNEL();
1478 
1479 	mutex_exit(lock);
1480 	PMAP_HEAD_TO_MAP_UNLOCK();
1481 }
1482 
1483 /*
1484  * pmap_protect:		[ INTERFACE ]
1485  *
1486  *	Set the physical protection on the specified range of this map
1487  *	as requested.
1488  */
1489 void
1490 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1491 {
1492 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1493 	bool isactive;
1494 	bool hadasm;
1495 	vaddr_t l1eva, l2eva;
1496 	long cpu_id = cpu_number();
1497 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1498 
1499 #ifdef DEBUG
1500 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1501 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1502 		    pmap, sva, eva, prot);
1503 #endif
1504 
1505 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1506 		pmap_remove(pmap, sva, eva);
1507 		return;
1508 	}
1509 
1510 	PMAP_LOCK(pmap);
1511 
1512 	bits = pte_prot(pmap, prot);
1513 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1514 
1515 	l1pte = pmap_l1pte(pmap, sva);
1516 	for (; sva < eva; sva = l1eva, l1pte++) {
1517 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1518 		if (pmap_pte_v(l1pte)) {
1519 			l2pte = pmap_l2pte(pmap, sva, l1pte);
1520 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1521 				l2eva =
1522 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1523 				if (pmap_pte_v(l2pte)) {
1524 					l3pte = pmap_l3pte(pmap, sva, l2pte);
1525 					for (; sva < l2eva && sva < eva;
1526 					     sva += PAGE_SIZE, l3pte++) {
1527 						if (pmap_pte_v(l3pte) &&
1528 						    pmap_pte_prot_chg(l3pte,
1529 						    bits)) {
1530 							hadasm =
1531 							   (pmap_pte_asm(l3pte)
1532 							    != 0);
1533 							pmap_pte_set_prot(l3pte,
1534 							   bits);
1535 							PMAP_INVALIDATE_TLB(
1536 							   pmap, sva, hadasm,
1537 							   isactive, cpu_id);
1538 							PMAP_TLB_SHOOTDOWN(
1539 							   pmap, sva,
1540 							   hadasm ? PG_ASM : 0);
1541 						}
1542 					}
1543 				}
1544 			}
1545 		}
1546 	}
1547 
1548 	PMAP_TLB_SHOOTNOW();
1549 
1550 	if (prot & VM_PROT_EXECUTE)
1551 		PMAP_SYNC_ISTREAM(pmap);
1552 
1553 	PMAP_UNLOCK(pmap);
1554 }
1555 
1556 /*
1557  * pmap_enter:			[ INTERFACE ]
1558  *
1559  *	Insert the given physical page (p) at
1560  *	the specified virtual address (v) in the
1561  *	target physical map with the protection requested.
1562  *
1563  *	If specified, the page will be wired down, meaning
1564  *	that the related pte can not be reclaimed.
1565  *
1566  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1567  *	or lose information.  That is, this routine must actually
1568  *	insert this page into the given map NOW.
1569  */
1570 int
1571 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1572 {
1573 	struct vm_page *pg;			/* if != NULL, managed page */
1574 	pt_entry_t *pte, npte, opte;
1575 	paddr_t opa;
1576 	bool tflush = true;
1577 	bool hadasm = false;	/* XXX gcc -Wuninitialized */
1578 	bool needisync = false;
1579 	bool setisync = false;
1580 	bool isactive;
1581 	bool wired;
1582 	long cpu_id = cpu_number();
1583 	int error = 0;
1584 	kmutex_t *lock;
1585 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1586 
1587 #ifdef DEBUG
1588 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1589 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1590 		       pmap, va, pa, prot, flags);
1591 #endif
1592 	pg = PHYS_TO_VM_PAGE(pa);
1593 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1594 	wired = (flags & PMAP_WIRED) != 0;
1595 
1596 	/*
1597 	 * Determine what we need to do about the I-stream.  If
1598 	 * VM_PROT_EXECUTE is set, we mark a user pmap as needing
1599 	 * an I-sync on the way back out to userspace.  We always
1600 	 * need an immediate I-sync for the kernel pmap.
1601 	 */
1602 	if (prot & VM_PROT_EXECUTE) {
1603 		if (pmap == pmap_kernel())
1604 			needisync = true;
1605 		else {
1606 			setisync = true;
1607 			needisync = (pmap->pm_cpus != 0);
1608 		}
1609 	}
1610 
1611 	PMAP_MAP_TO_HEAD_LOCK();
1612 	PMAP_LOCK(pmap);
1613 
1614 	if (pmap == pmap_kernel()) {
1615 #ifdef DIAGNOSTIC
1616 		/*
1617 		 * Sanity check the virtual address.
1618 		 */
1619 		if (va < VM_MIN_KERNEL_ADDRESS)
1620 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1621 #endif
1622 		pte = PMAP_KERNEL_PTE(va);
1623 	} else {
1624 		pt_entry_t *l1pte, *l2pte;
1625 
1626 #ifdef DIAGNOSTIC
1627 		/*
1628 		 * Sanity check the virtual address.
1629 		 */
1630 		if (va >= VM_MAXUSER_ADDRESS)
1631 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1632 #endif
1633 
1634 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
1635 
1636 		/*
1637 		 * Check to see if the level 1 PTE is valid, and
1638 		 * allocate a new level 2 page table page if it's not.
1639 		 * A reference will be added to the level 2 table when
1640 		 * the level 3 table is created.
1641 		 */
1642 		l1pte = pmap_l1pte(pmap, va);
1643 		if (pmap_pte_v(l1pte) == 0) {
1644 			pmap_physpage_addref(l1pte);
1645 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1646 			if (error) {
1647 				pmap_l1pt_delref(pmap, l1pte, cpu_id);
1648 				if (flags & PMAP_CANFAIL)
1649 					goto out;
1650 				panic("pmap_enter: unable to create L2 PT "
1651 				    "page");
1652 			}
1653 #ifdef DEBUG
1654 			if (pmapdebug & PDB_PTPAGE)
1655 				printf("pmap_enter: new level 2 table at "
1656 				    "0x%lx\n", pmap_pte_pa(l1pte));
1657 #endif
1658 		}
1659 
1660 		/*
1661 		 * Check to see if the level 2 PTE is valid, and
1662 		 * allocate a new level 3 page table page if it's not.
1663 		 * A reference will be added to the level 3 table when
1664 		 * the mapping is validated.
1665 		 */
1666 		l2pte = pmap_l2pte(pmap, va, l1pte);
1667 		if (pmap_pte_v(l2pte) == 0) {
1668 			pmap_physpage_addref(l2pte);
1669 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1670 			if (error) {
1671 				pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
1672 				if (flags & PMAP_CANFAIL)
1673 					goto out;
1674 				panic("pmap_enter: unable to create L3 PT "
1675 				    "page");
1676 			}
1677 #ifdef DEBUG
1678 			if (pmapdebug & PDB_PTPAGE)
1679 				printf("pmap_enter: new level 3 table at "
1680 				    "0x%lx\n", pmap_pte_pa(l2pte));
1681 #endif
1682 		}
1683 
1684 		/*
1685 		 * Get the PTE that will map the page.
1686 		 */
1687 		pte = pmap_l3pte(pmap, va, l2pte);
1688 	}
1689 
1690 	/* Remember all of the old PTE; used for TBI check later. */
1691 	opte = *pte;
1692 
1693 	/*
1694 	 * Check to see if the old mapping is valid.  If not, validate the
1695 	 * new one immediately.
1696 	 */
1697 	if (pmap_pte_v(pte) == 0) {
1698 		/*
1699 		 * No need to invalidate the TLB in this case; an invalid
1700 		 * mapping won't be in the TLB, and a previously valid
1701 		 * mapping would have been flushed when it was invalidated.
1702 		 */
1703 		tflush = false;
1704 
1705 		/*
1706 		 * No need to synchronize the I-stream, either, for basically
1707 		 * the same reason.
1708 		 */
1709 		setisync = needisync = false;
1710 
1711 		if (pmap != pmap_kernel()) {
1712 			/*
1713 			 * New mappings gain a reference on the level 3
1714 			 * table.
1715 			 */
1716 			pmap_physpage_addref(pte);
1717 		}
1718 		goto validate_enterpv;
1719 	}
1720 
1721 	opa = pmap_pte_pa(pte);
1722 	hadasm = (pmap_pte_asm(pte) != 0);
1723 
1724 	if (opa == pa) {
1725 		/*
1726 		 * Mapping has not changed; must be a protection or
1727 		 * wiring change.
1728 		 */
1729 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1730 #ifdef DEBUG
1731 			if (pmapdebug & PDB_ENTER)
1732 				printf("pmap_enter: wiring change -> %d\n",
1733 				    wired);
1734 #endif
1735 			/*
1736 			 * Adjust the wiring count.
1737 			 */
1738 			if (wired)
1739 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1740 			else
1741 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1742 		}
1743 
1744 		/*
1745 		 * Set the PTE.
1746 		 */
1747 		goto validate;
1748 	}
1749 
1750 	/*
1751 	 * The mapping has changed.  We need to invalidate the
1752 	 * old mapping before creating the new one.
1753 	 */
1754 #ifdef DEBUG
1755 	if (pmapdebug & PDB_ENTER)
1756 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1757 #endif
1758 	if (pmap != pmap_kernel()) {
1759 		/*
1760 		 * Gain an extra reference on the level 3 table.
1761 		 * pmap_remove_mapping() will delete a reference,
1762 		 * and we don't want the table to be erroneously
1763 		 * freed.
1764 		 */
1765 		pmap_physpage_addref(pte);
1766 	}
1767 	needisync |= pmap_remove_mapping(pmap, va, pte, true, cpu_id);
1768 
1769  validate_enterpv:
1770 	/*
1771 	 * Enter the mapping into the pv_table if appropriate.
1772 	 */
1773 	if (pg != NULL) {
1774 		error = pmap_pv_enter(pmap, pg, va, pte, true);
1775 		if (error) {
1776 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1777 			if (flags & PMAP_CANFAIL)
1778 				goto out;
1779 			panic("pmap_enter: unable to enter mapping in PV "
1780 			    "table");
1781 		}
1782 	}
1783 
1784 	/*
1785 	 * Increment counters.
1786 	 */
1787 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1788 	if (wired)
1789 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1790 
1791  validate:
1792 	/*
1793 	 * Build the new PTE.
1794 	 */
1795 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1796 	if (pg != NULL) {
1797 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1798 		int attrs;
1799 
1800 #ifdef DIAGNOSTIC
1801 		if ((flags & VM_PROT_ALL) & ~prot)
1802 			panic("pmap_enter: access type exceeds prot");
1803 #endif
1804 		lock = pmap_pvh_lock(pg);
1805 		mutex_enter(lock);
1806 		if (flags & VM_PROT_WRITE)
1807 			md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
1808 		else if (flags & VM_PROT_ALL)
1809 			md->pvh_attrs |= PGA_REFERENCED;
1810 		attrs = md->pvh_attrs;
1811 		mutex_exit(lock);
1812 
1813 		/*
1814 		 * Set up referenced/modified emulation for new mapping.
1815 		 */
1816 		if ((attrs & PGA_REFERENCED) == 0)
1817 			npte |= PG_FOR | PG_FOW | PG_FOE;
1818 		else if ((attrs & PGA_MODIFIED) == 0)
1819 			npte |= PG_FOW;
1820 
1821 		/*
1822 		 * Mapping was entered on PV list.
1823 		 */
1824 		npte |= PG_PVLIST;
1825 	}
1826 	if (wired)
1827 		npte |= PG_WIRED;
1828 #ifdef DEBUG
1829 	if (pmapdebug & PDB_ENTER)
1830 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1831 #endif
1832 
1833 	/*
1834 	 * If the PALcode portion of the new PTE is the same as the
1835 	 * old PTE, no TBI is necessary.
1836 	 */
1837 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1838 		tflush = false;
1839 
1840 	/*
1841 	 * Set the new PTE.
1842 	 */
1843 	PMAP_SET_PTE(pte, npte);
1844 
1845 	/*
1846 	 * Invalidate the TLB entry for this VA and any appropriate
1847 	 * caches.
1848 	 */
1849 	if (tflush) {
1850 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1851 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1852 		PMAP_TLB_SHOOTNOW();
1853 	}
1854 	if (setisync)
1855 		PMAP_SET_NEEDISYNC(pmap);
1856 	if (needisync)
1857 		PMAP_SYNC_ISTREAM(pmap);
1858 
1859 out:
1860 	PMAP_UNLOCK(pmap);
1861 	PMAP_MAP_TO_HEAD_UNLOCK();
1862 
1863 	return error;
1864 }
1865 
1866 /*
1867  * pmap_kenter_pa:		[ INTERFACE ]
1868  *
1869  *	Enter a va -> pa mapping into the kernel pmap without any
1870  *	physical->virtual tracking.
1871  *
1872  *	Note: no locking is necessary in this function.
1873  */
1874 void
1875 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1876 {
1877 	pt_entry_t *pte, npte;
1878 	long cpu_id = cpu_number();
1879 	bool needisync = false;
1880 	pmap_t pmap = pmap_kernel();
1881 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1882 
1883 #ifdef DEBUG
1884 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1885 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1886 		    va, pa, prot);
1887 #endif
1888 
1889 #ifdef DIAGNOSTIC
1890 	/*
1891 	 * Sanity check the virtual address.
1892 	 */
1893 	if (va < VM_MIN_KERNEL_ADDRESS)
1894 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1895 #endif
1896 
1897 	pte = PMAP_KERNEL_PTE(va);
1898 
1899 	if (pmap_pte_v(pte) == 0)
1900 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1901 	if (pmap_pte_w(pte) == 0)
1902 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1903 
1904 	if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte))
1905 		needisync = true;
1906 
1907 	/*
1908 	 * Build the new PTE.
1909 	 */
1910 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1911 	    PG_V | PG_WIRED;
1912 
1913 	/*
1914 	 * Set the new PTE.
1915 	 */
1916 	PMAP_SET_PTE(pte, npte);
1917 #if defined(MULTIPROCESSOR)
1918 	alpha_mb();		/* XXX alpha_wmb()? */
1919 #endif
1920 
1921 	/*
1922 	 * Invalidate the TLB entry for this VA and any appropriate
1923 	 * caches.
1924 	 */
1925 	PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1926 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1927 	PMAP_TLB_SHOOTNOW();
1928 
1929 	if (needisync)
1930 		PMAP_SYNC_ISTREAM_KERNEL();
1931 }
1932 
1933 /*
1934  * pmap_kremove:		[ INTERFACE ]
1935  *
1936  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1937  *	for size bytes (assumed to be page rounded).
1938  */
1939 void
1940 pmap_kremove(vaddr_t va, vsize_t size)
1941 {
1942 	pt_entry_t *pte;
1943 	bool needisync = false;
1944 	long cpu_id = cpu_number();
1945 	pmap_t pmap = pmap_kernel();
1946 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1947 
1948 #ifdef DEBUG
1949 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1950 		printf("pmap_kremove(%lx, %lx)\n",
1951 		    va, size);
1952 #endif
1953 
1954 #ifdef DIAGNOSTIC
1955 	if (va < VM_MIN_KERNEL_ADDRESS)
1956 		panic("pmap_kremove: user address");
1957 #endif
1958 
1959 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1960 		pte = PMAP_KERNEL_PTE(va);
1961 		if (pmap_pte_v(pte)) {
1962 #ifdef DIAGNOSTIC
1963 			if (pmap_pte_pv(pte))
1964 				panic("pmap_kremove: PG_PVLIST mapping for "
1965 				    "0x%lx", va);
1966 #endif
1967 			if (pmap_pte_exec(pte))
1968 				needisync = true;
1969 
1970 			/* Zap the mapping. */
1971 			PMAP_SET_PTE(pte, PG_NV);
1972 #if defined(MULTIPROCESSOR)
1973 			alpha_mb();		/* XXX alpha_wmb()? */
1974 #endif
1975 			PMAP_INVALIDATE_TLB(pmap, va, true, true, cpu_id);
1976 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1977 
1978 			/* Update stats. */
1979 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1980 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1981 		}
1982 	}
1983 
1984 	PMAP_TLB_SHOOTNOW();
1985 
1986 	if (needisync)
1987 		PMAP_SYNC_ISTREAM_KERNEL();
1988 }
1989 
1990 /*
1991  * pmap_unwire:			[ INTERFACE ]
1992  *
1993  *	Clear the wired attribute for a map/virtual-address pair.
1994  *
1995  *	The mapping must already exist in the pmap.
1996  */
1997 void
1998 pmap_unwire(pmap_t pmap, vaddr_t va)
1999 {
2000 	pt_entry_t *pte;
2001 
2002 #ifdef DEBUG
2003 	if (pmapdebug & PDB_FOLLOW)
2004 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
2005 #endif
2006 
2007 	PMAP_LOCK(pmap);
2008 
2009 	pte = pmap_l3pte(pmap, va, NULL);
2010 #ifdef DIAGNOSTIC
2011 	if (pte == NULL || pmap_pte_v(pte) == 0)
2012 		panic("pmap_unwire");
2013 #endif
2014 
2015 	/*
2016 	 * If wiring actually changed (always?) clear the wire bit and
2017 	 * update the wire count.  Note that wiring is not a hardware
2018 	 * characteristic so there is no need to invalidate the TLB.
2019 	 */
2020 	if (pmap_pte_w_chg(pte, 0)) {
2021 		pmap_pte_set_w(pte, false);
2022 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2023 	}
2024 #ifdef DIAGNOSTIC
2025 	else {
2026 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2027 		    "didn't change!\n", pmap, va);
2028 	}
2029 #endif
2030 
2031 	PMAP_UNLOCK(pmap);
2032 }
2033 
2034 /*
2035  * pmap_extract:		[ INTERFACE ]
2036  *
2037  *	Extract the physical address associated with the given
2038  *	pmap/virtual address pair.
2039  */
2040 bool
2041 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2042 {
2043 	pt_entry_t *l1pte, *l2pte, *l3pte;
2044 	paddr_t pa;
2045 
2046 #ifdef DEBUG
2047 	if (pmapdebug & PDB_FOLLOW)
2048 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
2049 #endif
2050 
2051 	/*
2052 	 * Take a faster path for the kernel pmap.  Avoids locking,
2053 	 * handles K0SEG.
2054 	 */
2055 	if (pmap == pmap_kernel()) {
2056 		pa = vtophys(va);
2057 		if (pap != NULL)
2058 			*pap = pa;
2059 #ifdef DEBUG
2060 		if (pmapdebug & PDB_FOLLOW)
2061 			printf("0x%lx (kernel vtophys)\n", pa);
2062 #endif
2063 		return (pa != 0);	/* XXX */
2064 	}
2065 
2066 	PMAP_LOCK(pmap);
2067 
2068 	l1pte = pmap_l1pte(pmap, va);
2069 	if (pmap_pte_v(l1pte) == 0)
2070 		goto out;
2071 
2072 	l2pte = pmap_l2pte(pmap, va, l1pte);
2073 	if (pmap_pte_v(l2pte) == 0)
2074 		goto out;
2075 
2076 	l3pte = pmap_l3pte(pmap, va, l2pte);
2077 	if (pmap_pte_v(l3pte) == 0)
2078 		goto out;
2079 
2080 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2081 	PMAP_UNLOCK(pmap);
2082 	if (pap != NULL)
2083 		*pap = pa;
2084 #ifdef DEBUG
2085 	if (pmapdebug & PDB_FOLLOW)
2086 		printf("0x%lx\n", pa);
2087 #endif
2088 	return (true);
2089 
2090  out:
2091 	PMAP_UNLOCK(pmap);
2092 #ifdef DEBUG
2093 	if (pmapdebug & PDB_FOLLOW)
2094 		printf("failed\n");
2095 #endif
2096 	return (false);
2097 }
2098 
2099 /*
2100  * pmap_copy:			[ INTERFACE ]
2101  *
2102  *	Copy the mapping range specified by src_addr/len
2103  *	from the source map to the range dst_addr/len
2104  *	in the destination map.
2105  *
2106  *	This routine is only advisory and need not do anything.
2107  */
2108 /* call deleted in <machine/pmap.h> */
2109 
2110 /*
2111  * pmap_update:			[ INTERFACE ]
2112  *
2113  *	Require that all active physical maps contain no
2114  *	incorrect entries NOW, by processing any deferred
2115  *	pmap operations.
2116  */
2117 /* call deleted in <machine/pmap.h> */
2118 
2119 /*
2120  * pmap_activate:		[ INTERFACE ]
2121  *
2122  *	Activate the pmap used by the specified process.  This includes
2123  *	reloading the MMU context if the current process, and marking
2124  *	the pmap in use by the processor.
2125  */
2126 void
2127 pmap_activate(struct lwp *l)
2128 {
2129 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2130 	long cpu_id = cpu_number();
2131 
2132 #ifdef DEBUG
2133 	if (pmapdebug & PDB_FOLLOW)
2134 		printf("pmap_activate(%p)\n", l);
2135 #endif
2136 
2137 	/* Mark the pmap in use by this processor. */
2138 	atomic_or_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2139 
2140 	/* Allocate an ASN. */
2141 	pmap_asn_alloc(pmap, cpu_id);
2142 
2143 	PMAP_ACTIVATE(pmap, l, cpu_id);
2144 }
2145 
2146 /*
2147  * pmap_deactivate:		[ INTERFACE ]
2148  *
2149  *	Mark that the pmap used by the specified process is no longer
2150  *	in use by the processor.
2151  *
2152  *	The comment above pmap_activate() wrt. locking applies here,
2153  *	as well.  Note that we use only a single `atomic' operation,
2154  *	so no locking is necessary.
2155  */
2156 void
2157 pmap_deactivate(struct lwp *l)
2158 {
2159 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2160 
2161 #ifdef DEBUG
2162 	if (pmapdebug & PDB_FOLLOW)
2163 		printf("pmap_deactivate(%p)\n", l);
2164 #endif
2165 
2166 	/*
2167 	 * Mark the pmap no longer in use by this processor.
2168 	 */
2169 	atomic_and_ulong(&pmap->pm_cpus, ~(1UL << cpu_number()));
2170 }
2171 
2172 /*
2173  * pmap_zero_page:		[ INTERFACE ]
2174  *
2175  *	Zero the specified (machine independent) page by mapping the page
2176  *	into virtual memory and clear its contents, one machine dependent
2177  *	page at a time.
2178  *
2179  *	Note: no locking is necessary in this function.
2180  */
2181 void
2182 pmap_zero_page(paddr_t phys)
2183 {
2184 	u_long *p0, *p1, *pend;
2185 
2186 #ifdef DEBUG
2187 	if (pmapdebug & PDB_FOLLOW)
2188 		printf("pmap_zero_page(%lx)\n", phys);
2189 #endif
2190 
2191 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2192 	p1 = NULL;
2193 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2194 
2195 	/*
2196 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2197 	 * Do only 8 back-to-back stores, and alternate registers.
2198 	 */
2199 	do {
2200 		__asm volatile(
2201 		"# BEGIN loop body\n"
2202 		"	addq	%2, (8 * 8), %1		\n"
2203 		"	stq	$31, (0 * 8)(%0)	\n"
2204 		"	stq	$31, (1 * 8)(%0)	\n"
2205 		"	stq	$31, (2 * 8)(%0)	\n"
2206 		"	stq	$31, (3 * 8)(%0)	\n"
2207 		"	stq	$31, (4 * 8)(%0)	\n"
2208 		"	stq	$31, (5 * 8)(%0)	\n"
2209 		"	stq	$31, (6 * 8)(%0)	\n"
2210 		"	stq	$31, (7 * 8)(%0)	\n"
2211 		"					\n"
2212 		"	addq	%3, (8 * 8), %0		\n"
2213 		"	stq	$31, (0 * 8)(%1)	\n"
2214 		"	stq	$31, (1 * 8)(%1)	\n"
2215 		"	stq	$31, (2 * 8)(%1)	\n"
2216 		"	stq	$31, (3 * 8)(%1)	\n"
2217 		"	stq	$31, (4 * 8)(%1)	\n"
2218 		"	stq	$31, (5 * 8)(%1)	\n"
2219 		"	stq	$31, (6 * 8)(%1)	\n"
2220 		"	stq	$31, (7 * 8)(%1)	\n"
2221 		"	# END loop body"
2222 		: "=r" (p0), "=r" (p1)
2223 		: "0" (p0), "1" (p1)
2224 		: "memory");
2225 	} while (p0 < pend);
2226 }
2227 
2228 /*
2229  * pmap_copy_page:		[ INTERFACE ]
2230  *
2231  *	Copy the specified (machine independent) page by mapping the page
2232  *	into virtual memory and using memcpy to copy the page, one machine
2233  *	dependent page at a time.
2234  *
2235  *	Note: no locking is necessary in this function.
2236  */
2237 void
2238 pmap_copy_page(paddr_t src, paddr_t dst)
2239 {
2240 	const void *s;
2241 	void *d;
2242 
2243 #ifdef DEBUG
2244 	if (pmapdebug & PDB_FOLLOW)
2245 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2246 #endif
2247         s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
2248         d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
2249 	memcpy(d, s, PAGE_SIZE);
2250 }
2251 
2252 /*
2253  * pmap_pageidlezero:		[ INTERFACE ]
2254  *
2255  *	Page zero'er for the idle loop.  Returns true if the
2256  *	page was zero'd, FLASE if we aborted for some reason.
2257  */
2258 bool
2259 pmap_pageidlezero(paddr_t pa)
2260 {
2261 	u_long *ptr;
2262 	int i, cnt = PAGE_SIZE / sizeof(u_long);
2263 
2264 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
2265 		if (sched_curcpu_runnable_p()) {
2266 			/*
2267 			 * An LWP has become ready.  Abort now,
2268 			 * so we don't keep it waiting while we
2269 			 * finish zeroing the page.
2270 			 */
2271 			return (false);
2272 		}
2273 		*ptr++ = 0;
2274 	}
2275 
2276 	return (true);
2277 }
2278 
2279 /*
2280  * pmap_clear_modify:		[ INTERFACE ]
2281  *
2282  *	Clear the modify bits on the specified physical page.
2283  */
2284 bool
2285 pmap_clear_modify(struct vm_page *pg)
2286 {
2287 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2288 	bool rv = false;
2289 	long cpu_id = cpu_number();
2290 	kmutex_t *lock;
2291 
2292 #ifdef DEBUG
2293 	if (pmapdebug & PDB_FOLLOW)
2294 		printf("pmap_clear_modify(%p)\n", pg);
2295 #endif
2296 
2297 	PMAP_HEAD_TO_MAP_LOCK();
2298 	lock = pmap_pvh_lock(pg);
2299 	mutex_enter(lock);
2300 
2301 	if (md->pvh_attrs & PGA_MODIFIED) {
2302 		rv = true;
2303 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2304 		md->pvh_attrs &= ~PGA_MODIFIED;
2305 	}
2306 
2307 	mutex_exit(lock);
2308 	PMAP_HEAD_TO_MAP_UNLOCK();
2309 
2310 	return (rv);
2311 }
2312 
2313 /*
2314  * pmap_clear_reference:	[ INTERFACE ]
2315  *
2316  *	Clear the reference bit on the specified physical page.
2317  */
2318 bool
2319 pmap_clear_reference(struct vm_page *pg)
2320 {
2321 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2322 	bool rv = false;
2323 	long cpu_id = cpu_number();
2324 	kmutex_t *lock;
2325 
2326 #ifdef DEBUG
2327 	if (pmapdebug & PDB_FOLLOW)
2328 		printf("pmap_clear_reference(%p)\n", pg);
2329 #endif
2330 
2331 	PMAP_HEAD_TO_MAP_LOCK();
2332 	lock = pmap_pvh_lock(pg);
2333 	mutex_enter(lock);
2334 
2335 	if (md->pvh_attrs & PGA_REFERENCED) {
2336 		rv = true;
2337 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2338 		md->pvh_attrs &= ~PGA_REFERENCED;
2339 	}
2340 
2341 	mutex_exit(lock);
2342 	PMAP_HEAD_TO_MAP_UNLOCK();
2343 
2344 	return (rv);
2345 }
2346 
2347 /*
2348  * pmap_is_referenced:		[ INTERFACE ]
2349  *
2350  *	Return whether or not the specified physical page is referenced
2351  *	by any physical maps.
2352  */
2353 /* See <machine/pmap.h> */
2354 
2355 /*
2356  * pmap_is_modified:		[ INTERFACE ]
2357  *
2358  *	Return whether or not the specified physical page is modified
2359  *	by any physical maps.
2360  */
2361 /* See <machine/pmap.h> */
2362 
2363 /*
2364  * pmap_phys_address:		[ INTERFACE ]
2365  *
2366  *	Return the physical address corresponding to the specified
2367  *	cookie.  Used by the device pager to decode a device driver's
2368  *	mmap entry point return value.
2369  *
2370  *	Note: no locking is necessary in this function.
2371  */
2372 paddr_t
2373 pmap_phys_address(paddr_t ppn)
2374 {
2375 
2376 	return (alpha_ptob(ppn));
2377 }
2378 
2379 /*
2380  * Miscellaneous support routines follow
2381  */
2382 
2383 /*
2384  * alpha_protection_init:
2385  *
2386  *	Initialize Alpha protection code array.
2387  *
2388  *	Note: no locking is necessary in this function.
2389  */
2390 static void
2391 alpha_protection_init(void)
2392 {
2393 	int prot, *kp, *up;
2394 
2395 	kp = protection_codes[0];
2396 	up = protection_codes[1];
2397 
2398 	for (prot = 0; prot < 8; prot++) {
2399 		kp[prot] = PG_ASM;
2400 		up[prot] = 0;
2401 
2402 		if (prot & VM_PROT_READ) {
2403 			kp[prot] |= PG_KRE;
2404 			up[prot] |= PG_KRE | PG_URE;
2405 		}
2406 		if (prot & VM_PROT_WRITE) {
2407 			kp[prot] |= PG_KWE;
2408 			up[prot] |= PG_KWE | PG_UWE;
2409 		}
2410 		if (prot & VM_PROT_EXECUTE) {
2411 			kp[prot] |= PG_EXEC | PG_KRE;
2412 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2413 		} else {
2414 			kp[prot] |= PG_FOE;
2415 			up[prot] |= PG_FOE;
2416 		}
2417 	}
2418 }
2419 
2420 /*
2421  * pmap_remove_mapping:
2422  *
2423  *	Invalidate a single page denoted by pmap/va.
2424  *
2425  *	If (pte != NULL), it is the already computed PTE for the page.
2426  *
2427  *	Note: locking in this function is complicated by the fact
2428  *	that we can be called when the PV list is already locked.
2429  *	(pmap_page_protect()).  In this case, the caller must be
2430  *	careful to get the next PV entry while we remove this entry
2431  *	from beneath it.  We assume that the pmap itself is already
2432  *	locked; dolock applies only to the PV list.
2433  *
2434  *	Returns true or false, indicating if an I-stream sync needs
2435  *	to be initiated (for this CPU or for other CPUs).
2436  */
2437 static bool
2438 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2439     bool dolock, long cpu_id)
2440 {
2441 	paddr_t pa;
2442 	struct vm_page *pg;		/* if != NULL, page is managed */
2443 	bool onpv;
2444 	bool hadasm;
2445 	bool isactive;
2446 	bool needisync = false;
2447 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2448 
2449 #ifdef DEBUG
2450 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2451 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2452 		       pmap, va, pte, dolock, cpu_id);
2453 #endif
2454 
2455 	/*
2456 	 * PTE not provided, compute it from pmap and va.
2457 	 */
2458 	if (pte == NULL) {
2459 		pte = pmap_l3pte(pmap, va, NULL);
2460 		if (pmap_pte_v(pte) == 0)
2461 			return (false);
2462 	}
2463 
2464 	pa = pmap_pte_pa(pte);
2465 	onpv = (pmap_pte_pv(pte) != 0);
2466 	hadasm = (pmap_pte_asm(pte) != 0);
2467 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2468 
2469 	/*
2470 	 * Determine what we need to do about the I-stream.  If
2471 	 * PG_EXEC was set, we mark a user pmap as needing an
2472 	 * I-sync on the way out to userspace.  We always need
2473 	 * an immediate I-sync for the kernel pmap.
2474 	 */
2475 	if (pmap_pte_exec(pte)) {
2476 		if (pmap == pmap_kernel())
2477 			needisync = true;
2478 		else {
2479 			PMAP_SET_NEEDISYNC(pmap);
2480 			needisync = (pmap->pm_cpus != 0);
2481 		}
2482 	}
2483 
2484 	/*
2485 	 * Update statistics
2486 	 */
2487 	if (pmap_pte_w(pte))
2488 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2489 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2490 
2491 	/*
2492 	 * Invalidate the PTE after saving the reference modify info.
2493 	 */
2494 #ifdef DEBUG
2495 	if (pmapdebug & PDB_REMOVE)
2496 		printf("remove: invalidating pte at %p\n", pte);
2497 #endif
2498 	PMAP_SET_PTE(pte, PG_NV);
2499 
2500 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2501 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2502 	PMAP_TLB_SHOOTNOW();
2503 
2504 	/*
2505 	 * If we're removing a user mapping, check to see if we
2506 	 * can free page table pages.
2507 	 */
2508 	if (pmap != pmap_kernel()) {
2509 		/*
2510 		 * Delete the reference on the level 3 table.  It will
2511 		 * delete references on the level 2 and 1 tables as
2512 		 * appropriate.
2513 		 */
2514 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2515 	}
2516 
2517 	/*
2518 	 * If the mapping wasn't entered on the PV list, we're all done.
2519 	 */
2520 	if (onpv == false)
2521 		return (needisync);
2522 
2523 	/*
2524 	 * Remove it from the PV table.
2525 	 */
2526 	pg = PHYS_TO_VM_PAGE(pa);
2527 	KASSERT(pg != NULL);
2528 	pmap_pv_remove(pmap, pg, va, dolock);
2529 
2530 	return (needisync);
2531 }
2532 
2533 /*
2534  * pmap_changebit:
2535  *
2536  *	Set or clear the specified PTE bits for all mappings on the
2537  *	specified page.
2538  *
2539  *	Note: we assume that the pv_head is already locked, and that
2540  *	the caller has acquired a PV->pmap mutex so that we can lock
2541  *	the pmaps as we encounter them.
2542  */
2543 static void
2544 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id)
2545 {
2546 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2547 	pv_entry_t pv;
2548 	pt_entry_t *pte, npte;
2549 	vaddr_t va;
2550 	bool hadasm, isactive;
2551 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2552 
2553 #ifdef DEBUG
2554 	if (pmapdebug & PDB_BITS)
2555 		printf("pmap_changebit(%p, 0x%lx, 0x%lx)\n",
2556 		    pg, set, mask);
2557 #endif
2558 
2559 	/*
2560 	 * Loop over all current mappings setting/clearing as apropos.
2561 	 */
2562 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2563 		va = pv->pv_va;
2564 
2565 		PMAP_LOCK(pv->pv_pmap);
2566 
2567 		pte = pv->pv_pte;
2568 		npte = (*pte | set) & mask;
2569 		if (*pte != npte) {
2570 			hadasm = (pmap_pte_asm(pte) != 0);
2571 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2572 			PMAP_SET_PTE(pte, npte);
2573 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2574 			    cpu_id);
2575 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2576 			    hadasm ? PG_ASM : 0);
2577 		}
2578 		PMAP_UNLOCK(pv->pv_pmap);
2579 	}
2580 
2581 	PMAP_TLB_SHOOTNOW();
2582 }
2583 
2584 /*
2585  * pmap_emulate_reference:
2586  *
2587  *	Emulate reference and/or modified bit hits.
2588  *	Return 1 if this was an execute fault on a non-exec mapping,
2589  *	otherwise return 0.
2590  */
2591 int
2592 pmap_emulate_reference(struct lwp *l, vaddr_t v, int user, int type)
2593 {
2594 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
2595 	pt_entry_t faultoff, *pte;
2596 	struct vm_page *pg;
2597 	paddr_t pa;
2598 	bool didlock = false;
2599 	bool exec = false;
2600 	long cpu_id = cpu_number();
2601 	kmutex_t *lock;
2602 
2603 #ifdef DEBUG
2604 	if (pmapdebug & PDB_FOLLOW)
2605 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2606 		    l, v, user, type);
2607 #endif
2608 
2609 	/*
2610 	 * Convert process and virtual address to physical address.
2611 	 */
2612 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2613 		if (user)
2614 			panic("pmap_emulate_reference: user ref to kernel");
2615 		/*
2616 		 * No need to lock here; kernel PT pages never go away.
2617 		 */
2618 		pte = PMAP_KERNEL_PTE(v);
2619 	} else {
2620 #ifdef DIAGNOSTIC
2621 		if (l == NULL)
2622 			panic("pmap_emulate_reference: bad proc");
2623 		if (l->l_proc->p_vmspace == NULL)
2624 			panic("pmap_emulate_reference: bad p_vmspace");
2625 #endif
2626 		PMAP_LOCK(pmap);
2627 		didlock = true;
2628 		pte = pmap_l3pte(pmap, v, NULL);
2629 		/*
2630 		 * We'll unlock below where we're done with the PTE.
2631 		 */
2632 	}
2633 	exec = pmap_pte_exec(pte);
2634 	if (!exec && type == ALPHA_MMCSR_FOE) {
2635 		if (didlock)
2636 			PMAP_UNLOCK(pmap);
2637                return (1);
2638 	}
2639 #ifdef DEBUG
2640 	if (pmapdebug & PDB_FOLLOW) {
2641 		printf("\tpte = %p, ", pte);
2642 		printf("*pte = 0x%lx\n", *pte);
2643 	}
2644 #endif
2645 #ifdef DEBUG				/* These checks are more expensive */
2646 	if (!pmap_pte_v(pte))
2647 		panic("pmap_emulate_reference: invalid pte");
2648 	if (type == ALPHA_MMCSR_FOW) {
2649 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
2650 			panic("pmap_emulate_reference: write but unwritable");
2651 		if (!(*pte & PG_FOW))
2652 			panic("pmap_emulate_reference: write but not FOW");
2653 	} else {
2654 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
2655 			panic("pmap_emulate_reference: !write but unreadable");
2656 		if (!(*pte & (PG_FOR | PG_FOE)))
2657 			panic("pmap_emulate_reference: !write but not FOR|FOE");
2658 	}
2659 	/* Other diagnostics? */
2660 #endif
2661 	pa = pmap_pte_pa(pte);
2662 
2663 	/*
2664 	 * We're now done with the PTE.  If it was a user pmap, unlock
2665 	 * it now.
2666 	 */
2667 	if (didlock)
2668 		PMAP_UNLOCK(pmap);
2669 
2670 #ifdef DEBUG
2671 	if (pmapdebug & PDB_FOLLOW)
2672 		printf("\tpa = 0x%lx\n", pa);
2673 #endif
2674 #ifdef DIAGNOSTIC
2675 	if (!uvm_pageismanaged(pa))
2676 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2677 		      "pa 0x%lx not managed", l, v, user, type, pa);
2678 #endif
2679 
2680 	/*
2681 	 * Twiddle the appropriate bits to reflect the reference
2682 	 * and/or modification..
2683 	 *
2684 	 * The rules:
2685 	 * 	(1) always mark page as used, and
2686 	 *	(2) if it was a write fault, mark page as modified.
2687 	 */
2688 	pg = PHYS_TO_VM_PAGE(pa);
2689 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2690 
2691 	PMAP_HEAD_TO_MAP_LOCK();
2692 	lock = pmap_pvh_lock(pg);
2693 	mutex_enter(lock);
2694 
2695 	if (type == ALPHA_MMCSR_FOW) {
2696 		md->pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
2697 		faultoff = PG_FOR | PG_FOW;
2698 	} else {
2699 		md->pvh_attrs |= PGA_REFERENCED;
2700 		faultoff = PG_FOR;
2701 		if (exec) {
2702 			faultoff |= PG_FOE;
2703 		}
2704 	}
2705 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2706 
2707 	mutex_exit(lock);
2708 	PMAP_HEAD_TO_MAP_UNLOCK();
2709 	return (0);
2710 }
2711 
2712 #ifdef DEBUG
2713 /*
2714  * pmap_pv_dump:
2715  *
2716  *	Dump the physical->virtual data for the specified page.
2717  */
2718 void
2719 pmap_pv_dump(paddr_t pa)
2720 {
2721 	struct vm_page *pg;
2722 	struct vm_page_md *md;
2723 	pv_entry_t pv;
2724 	kmutex_t *lock;
2725 
2726 	pg = PHYS_TO_VM_PAGE(pa);
2727 	md = VM_PAGE_TO_MD(pg);
2728 
2729 	lock = pmap_pvh_lock(pg);
2730 	mutex_enter(lock);
2731 
2732 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, md->pvh_attrs);
2733 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next)
2734 		printf("     pmap %p, va 0x%lx\n",
2735 		    pv->pv_pmap, pv->pv_va);
2736 	printf("\n");
2737 
2738 	mutex_exit(lock);
2739 }
2740 #endif
2741 
2742 /*
2743  * vtophys:
2744  *
2745  *	Return the physical address corresponding to the K0SEG or
2746  *	K1SEG address provided.
2747  *
2748  *	Note: no locking is necessary in this function.
2749  */
2750 paddr_t
2751 vtophys(vaddr_t vaddr)
2752 {
2753 	pt_entry_t *pte;
2754 	paddr_t paddr = 0;
2755 
2756 	if (vaddr < ALPHA_K0SEG_BASE)
2757 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2758 	else if (vaddr <= ALPHA_K0SEG_END)
2759 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2760 	else {
2761 		pte = PMAP_KERNEL_PTE(vaddr);
2762 		if (pmap_pte_v(pte))
2763 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2764 	}
2765 
2766 #if 0
2767 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2768 #endif
2769 
2770 	return (paddr);
2771 }
2772 
2773 /******************** pv_entry management ********************/
2774 
2775 /*
2776  * pmap_pv_enter:
2777  *
2778  *	Add a physical->virtual entry to the pv_table.
2779  */
2780 static int
2781 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2782     bool dolock)
2783 {
2784 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2785 	pv_entry_t newpv;
2786 	kmutex_t *lock;
2787 
2788 	/*
2789 	 * Allocate and fill in the new pv_entry.
2790 	 */
2791 	newpv = pmap_pv_alloc();
2792 	if (newpv == NULL)
2793 		return ENOMEM;
2794 	newpv->pv_va = va;
2795 	newpv->pv_pmap = pmap;
2796 	newpv->pv_pte = pte;
2797 
2798 	if (dolock) {
2799 		lock = pmap_pvh_lock(pg);
2800 		mutex_enter(lock);
2801 	}
2802 
2803 #ifdef DEBUG
2804     {
2805 	pv_entry_t pv;
2806 	/*
2807 	 * Make sure the entry doesn't already exist.
2808 	 */
2809 	for (pv = md->pvh_list; pv != NULL; pv = pv->pv_next) {
2810 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2811 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2812 			panic("pmap_pv_enter: already in pv table");
2813 		}
2814 	}
2815     }
2816 #endif
2817 
2818 	/*
2819 	 * ...and put it in the list.
2820 	 */
2821 	newpv->pv_next = md->pvh_list;
2822 	md->pvh_list = newpv;
2823 
2824 	if (dolock) {
2825 		mutex_exit(lock);
2826 	}
2827 
2828 	return 0;
2829 }
2830 
2831 /*
2832  * pmap_pv_remove:
2833  *
2834  *	Remove a physical->virtual entry from the pv_table.
2835  */
2836 static void
2837 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, bool dolock)
2838 {
2839 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2840 	pv_entry_t pv, *pvp;
2841 	kmutex_t *lock;
2842 
2843 	if (dolock) {
2844 		lock = pmap_pvh_lock(pg);
2845 		mutex_enter(lock);
2846 	} else {
2847 		lock = NULL; /* XXX stupid gcc */
2848 	}
2849 
2850 	/*
2851 	 * Find the entry to remove.
2852 	 */
2853 	for (pvp = &md->pvh_list, pv = *pvp;
2854 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2855 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2856 			break;
2857 
2858 #ifdef DEBUG
2859 	if (pv == NULL)
2860 		panic("pmap_pv_remove: not in pv table");
2861 #endif
2862 
2863 	*pvp = pv->pv_next;
2864 
2865 	if (dolock) {
2866 		mutex_exit(lock);
2867 	}
2868 
2869 	pmap_pv_free(pv);
2870 }
2871 
2872 /*
2873  * pmap_pv_page_alloc:
2874  *
2875  *	Allocate a page for the pv_entry pool.
2876  */
2877 static void *
2878 pmap_pv_page_alloc(struct pool *pp, int flags)
2879 {
2880 	paddr_t pg;
2881 
2882 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2883 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2884 	return (NULL);
2885 }
2886 
2887 /*
2888  * pmap_pv_page_free:
2889  *
2890  *	Free a pv_entry pool page.
2891  */
2892 static void
2893 pmap_pv_page_free(struct pool *pp, void *v)
2894 {
2895 
2896 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2897 }
2898 
2899 /******************** misc. functions ********************/
2900 
2901 /*
2902  * pmap_physpage_alloc:
2903  *
2904  *	Allocate a single page from the VM system and return the
2905  *	physical address for that page.
2906  */
2907 static bool
2908 pmap_physpage_alloc(int usage, paddr_t *pap)
2909 {
2910 	struct vm_page *pg;
2911 	paddr_t pa;
2912 
2913 	/*
2914 	 * Don't ask for a zero'd page in the L1PT case -- we will
2915 	 * properly initialize it in the constructor.
2916 	 */
2917 
2918 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2919 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2920 	if (pg != NULL) {
2921 		pa = VM_PAGE_TO_PHYS(pg);
2922 #ifdef DEBUG
2923 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2924 		if (md->pvh_refcnt != 0) {
2925 			printf("pmap_physpage_alloc: page 0x%lx has "
2926 			    "%d references\n", pa, md->pvh_refcnt);
2927 			panic("pmap_physpage_alloc");
2928 		}
2929 #endif
2930 		*pap = pa;
2931 		return (true);
2932 	}
2933 	return (false);
2934 }
2935 
2936 /*
2937  * pmap_physpage_free:
2938  *
2939  *	Free the single page table page at the specified physical address.
2940  */
2941 static void
2942 pmap_physpage_free(paddr_t pa)
2943 {
2944 	struct vm_page *pg;
2945 
2946 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2947 		panic("pmap_physpage_free: bogus physical page address");
2948 
2949 #ifdef DEBUG
2950 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2951 	if (md->pvh_refcnt != 0)
2952 		panic("pmap_physpage_free: page still has references");
2953 #endif
2954 
2955 	uvm_pagefree(pg);
2956 }
2957 
2958 /*
2959  * pmap_physpage_addref:
2960  *
2961  *	Add a reference to the specified special use page.
2962  */
2963 static int
2964 pmap_physpage_addref(void *kva)
2965 {
2966 	struct vm_page *pg;
2967 	struct vm_page_md *md;
2968 	paddr_t pa;
2969 
2970 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2971 	pg = PHYS_TO_VM_PAGE(pa);
2972 	md = VM_PAGE_TO_MD(pg);
2973 
2974 	KASSERT((int)md->pvh_refcnt >= 0);
2975 
2976 	return atomic_inc_uint_nv(&md->pvh_refcnt);
2977 }
2978 
2979 /*
2980  * pmap_physpage_delref:
2981  *
2982  *	Delete a reference to the specified special use page.
2983  */
2984 static int
2985 pmap_physpage_delref(void *kva)
2986 {
2987 	struct vm_page *pg;
2988 	struct vm_page_md *md;
2989 	paddr_t pa;
2990 
2991 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2992 	pg = PHYS_TO_VM_PAGE(pa);
2993 	md = VM_PAGE_TO_MD(pg);
2994 
2995 	KASSERT((int)md->pvh_refcnt > 0);
2996 
2997 	return atomic_dec_uint_nv(&md->pvh_refcnt);
2998 }
2999 
3000 /******************** page table page management ********************/
3001 
3002 /*
3003  * pmap_growkernel:		[ INTERFACE ]
3004  *
3005  *	Grow the kernel address space.  This is a hint from the
3006  *	upper layer to pre-allocate more kernel PT pages.
3007  */
3008 vaddr_t
3009 pmap_growkernel(vaddr_t maxkvaddr)
3010 {
3011 	struct pmap *kpm = pmap_kernel(), *pm;
3012 	paddr_t ptaddr;
3013 	pt_entry_t *l1pte, *l2pte, pte;
3014 	vaddr_t va;
3015 	int l1idx;
3016 
3017 	rw_enter(&pmap_growkernel_lock, RW_WRITER);
3018 
3019 	if (maxkvaddr <= virtual_end)
3020 		goto out;		/* we are OK */
3021 
3022 	va = virtual_end;
3023 
3024 	while (va < maxkvaddr) {
3025 		/*
3026 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
3027 		 * allocate a new L2 PT page and insert it into the
3028 		 * L1 map.
3029 		 */
3030 		l1pte = pmap_l1pte(kpm, va);
3031 		if (pmap_pte_v(l1pte) == 0) {
3032 			/*
3033 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
3034 			 */
3035 			if (uvm.page_init_done == false) {
3036 				/*
3037 				 * We're growing the kernel pmap early (from
3038 				 * uvm_pageboot_alloc()).  This case must
3039 				 * be handled a little differently.
3040 				 */
3041 				ptaddr = ALPHA_K0SEG_TO_PHYS(
3042 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3043 			} else if (pmap_physpage_alloc(PGU_NORMAL,
3044 				   &ptaddr) == false)
3045 				goto die;
3046 			pte = (atop(ptaddr) << PG_SHIFT) |
3047 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3048 			*l1pte = pte;
3049 
3050 			l1idx = l1pte_index(va);
3051 
3052 			/* Update all the user pmaps. */
3053 			mutex_enter(&pmap_all_pmaps_lock);
3054 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
3055 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
3056 				/* Skip the kernel pmap. */
3057 				if (pm == pmap_kernel())
3058 					continue;
3059 
3060 				PMAP_LOCK(pm);
3061 				if (pm->pm_lev1map == kernel_lev1map) {
3062 					PMAP_UNLOCK(pm);
3063 					continue;
3064 				}
3065 				pm->pm_lev1map[l1idx] = pte;
3066 				PMAP_UNLOCK(pm);
3067 			}
3068 			mutex_exit(&pmap_all_pmaps_lock);
3069 		}
3070 
3071 		/*
3072 		 * Have an L2 PT page now, add the L3 PT page.
3073 		 */
3074 		l2pte = pmap_l2pte(kpm, va, l1pte);
3075 		KASSERT(pmap_pte_v(l2pte) == 0);
3076 		if (uvm.page_init_done == false) {
3077 			/*
3078 			 * See above.
3079 			 */
3080 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3081 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3082 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == false)
3083 			goto die;
3084 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3085 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3086 		va += ALPHA_L2SEG_SIZE;
3087 	}
3088 
3089 	/* Invalidate the L1 PT cache. */
3090 	pool_cache_invalidate(&pmap_l1pt_cache);
3091 
3092 	virtual_end = va;
3093 
3094  out:
3095 	rw_exit(&pmap_growkernel_lock);
3096 
3097 	return (virtual_end);
3098 
3099  die:
3100 	panic("pmap_growkernel: out of memory");
3101 }
3102 
3103 /*
3104  * pmap_lev1map_create:
3105  *
3106  *	Create a new level 1 page table for the specified pmap.
3107  *
3108  *	Note: growkernel must already be held and the pmap either
3109  *	already locked or unreferenced globally.
3110  */
3111 static int
3112 pmap_lev1map_create(pmap_t pmap, long cpu_id)
3113 {
3114 	pt_entry_t *l1pt;
3115 
3116 	KASSERT(pmap != pmap_kernel());
3117 
3118 	KASSERT(pmap->pm_lev1map == kernel_lev1map);
3119 	KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3120 
3121 	/* Don't sleep -- we're called with locks held. */
3122 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
3123 	if (l1pt == NULL)
3124 		return (ENOMEM);
3125 
3126 	pmap->pm_lev1map = l1pt;
3127 	return (0);
3128 }
3129 
3130 /*
3131  * pmap_lev1map_destroy:
3132  *
3133  *	Destroy the level 1 page table for the specified pmap.
3134  *
3135  *	Note: growkernel must be held and the pmap must already be
3136  *	locked or not globally referenced.
3137  */
3138 static void
3139 pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
3140 {
3141 	pt_entry_t *l1pt = pmap->pm_lev1map;
3142 
3143 	KASSERT(pmap != pmap_kernel());
3144 
3145 	/*
3146 	 * Go back to referencing the global kernel_lev1map.
3147 	 */
3148 	pmap->pm_lev1map = kernel_lev1map;
3149 
3150 	/*
3151 	 * Free the old level 1 page table page.
3152 	 */
3153 	pool_cache_put(&pmap_l1pt_cache, l1pt);
3154 }
3155 
3156 /*
3157  * pmap_l1pt_ctor:
3158  *
3159  *	Pool cache constructor for L1 PT pages.
3160  *
3161  *	Note: The growkernel lock is held across allocations
3162  *	from our pool_cache, so we don't need to acquire it
3163  *	ourselves.
3164  */
3165 static int
3166 pmap_l1pt_ctor(void *arg, void *object, int flags)
3167 {
3168 	pt_entry_t *l1pt = object, pte;
3169 	int i;
3170 
3171 	/*
3172 	 * Initialize the new level 1 table by zeroing the
3173 	 * user portion and copying the kernel mappings into
3174 	 * the kernel portion.
3175 	 */
3176 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3177 		l1pt[i] = 0;
3178 
3179 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3180 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3181 		l1pt[i] = kernel_lev1map[i];
3182 
3183 	/*
3184 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3185 	 */
3186 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3187 	    PG_V | PG_KRE | PG_KWE;
3188 	l1pt[l1pte_index(VPTBASE)] = pte;
3189 
3190 	return (0);
3191 }
3192 
3193 /*
3194  * pmap_l1pt_alloc:
3195  *
3196  *	Page alloctaor for L1 PT pages.
3197  */
3198 static void *
3199 pmap_l1pt_alloc(struct pool *pp, int flags)
3200 {
3201 	paddr_t ptpa;
3202 
3203 	/*
3204 	 * Attempt to allocate a free page.
3205 	 */
3206 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == false)
3207 		return (NULL);
3208 
3209 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3210 }
3211 
3212 /*
3213  * pmap_l1pt_free:
3214  *
3215  *	Page freer for L1 PT pages.
3216  */
3217 static void
3218 pmap_l1pt_free(struct pool *pp, void *v)
3219 {
3220 
3221 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3222 }
3223 
3224 /*
3225  * pmap_ptpage_alloc:
3226  *
3227  *	Allocate a level 2 or level 3 page table page, and
3228  *	initialize the PTE that references it.
3229  *
3230  *	Note: the pmap must already be locked.
3231  */
3232 static int
3233 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3234 {
3235 	paddr_t ptpa;
3236 
3237 	/*
3238 	 * Allocate the page table page.
3239 	 */
3240 	if (pmap_physpage_alloc(usage, &ptpa) == false)
3241 		return (ENOMEM);
3242 
3243 	/*
3244 	 * Initialize the referencing PTE.
3245 	 */
3246 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3247 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3248 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3249 
3250 	return (0);
3251 }
3252 
3253 /*
3254  * pmap_ptpage_free:
3255  *
3256  *	Free the level 2 or level 3 page table page referenced
3257  *	be the provided PTE.
3258  *
3259  *	Note: the pmap must already be locked.
3260  */
3261 static void
3262 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3263 {
3264 	paddr_t ptpa;
3265 
3266 	/*
3267 	 * Extract the physical address of the page from the PTE
3268 	 * and clear the entry.
3269 	 */
3270 	ptpa = pmap_pte_pa(pte);
3271 	PMAP_SET_PTE(pte, PG_NV);
3272 
3273 #ifdef DEBUG
3274 	pmap_zero_page(ptpa);
3275 #endif
3276 	pmap_physpage_free(ptpa);
3277 }
3278 
3279 /*
3280  * pmap_l3pt_delref:
3281  *
3282  *	Delete a reference on a level 3 PT page.  If the reference drops
3283  *	to zero, free it.
3284  *
3285  *	Note: the pmap must already be locked.
3286  */
3287 static void
3288 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id)
3289 {
3290 	pt_entry_t *l1pte, *l2pte;
3291 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3292 
3293 	l1pte = pmap_l1pte(pmap, va);
3294 	l2pte = pmap_l2pte(pmap, va, l1pte);
3295 
3296 #ifdef DIAGNOSTIC
3297 	if (pmap == pmap_kernel())
3298 		panic("pmap_l3pt_delref: kernel pmap");
3299 #endif
3300 
3301 	if (pmap_physpage_delref(l3pte) == 0) {
3302 		/*
3303 		 * No more mappings; we can free the level 3 table.
3304 		 */
3305 #ifdef DEBUG
3306 		if (pmapdebug & PDB_PTPAGE)
3307 			printf("pmap_l3pt_delref: freeing level 3 table at "
3308 			    "0x%lx\n", pmap_pte_pa(l2pte));
3309 #endif
3310 		pmap_ptpage_free(pmap, l2pte);
3311 
3312 		/*
3313 		 * We've freed a level 3 table, so we must
3314 		 * invalidate the TLB entry for that PT page
3315 		 * in the Virtual Page Table VA range, because
3316 		 * otherwise the PALcode will service a TLB
3317 		 * miss using the stale VPT TLB entry it entered
3318 		 * behind our back to shortcut to the VA's PTE.
3319 		 */
3320 		PMAP_INVALIDATE_TLB(pmap,
3321 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), false,
3322 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3323 		PMAP_TLB_SHOOTDOWN(pmap,
3324 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3325 		PMAP_TLB_SHOOTNOW();
3326 
3327 		/*
3328 		 * We've freed a level 3 table, so delete the reference
3329 		 * on the level 2 table.
3330 		 */
3331 		pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
3332 	}
3333 }
3334 
3335 /*
3336  * pmap_l2pt_delref:
3337  *
3338  *	Delete a reference on a level 2 PT page.  If the reference drops
3339  *	to zero, free it.
3340  *
3341  *	Note: the pmap must already be locked.
3342  */
3343 static void
3344 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
3345     long cpu_id)
3346 {
3347 
3348 #ifdef DIAGNOSTIC
3349 	if (pmap == pmap_kernel())
3350 		panic("pmap_l2pt_delref: kernel pmap");
3351 #endif
3352 
3353 	if (pmap_physpage_delref(l2pte) == 0) {
3354 		/*
3355 		 * No more mappings in this segment; we can free the
3356 		 * level 2 table.
3357 		 */
3358 #ifdef DEBUG
3359 		if (pmapdebug & PDB_PTPAGE)
3360 			printf("pmap_l2pt_delref: freeing level 2 table at "
3361 			    "0x%lx\n", pmap_pte_pa(l1pte));
3362 #endif
3363 		pmap_ptpage_free(pmap, l1pte);
3364 
3365 		/*
3366 		 * We've freed a level 2 table, so delete the reference
3367 		 * on the level 1 table.
3368 		 */
3369 		pmap_l1pt_delref(pmap, l1pte, cpu_id);
3370 	}
3371 }
3372 
3373 /*
3374  * pmap_l1pt_delref:
3375  *
3376  *	Delete a reference on a level 1 PT page.  If the reference drops
3377  *	to zero, free it.
3378  *
3379  *	Note: the pmap must already be locked.
3380  */
3381 static void
3382 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
3383 {
3384 
3385 #ifdef DIAGNOSTIC
3386 	if (pmap == pmap_kernel())
3387 		panic("pmap_l1pt_delref: kernel pmap");
3388 #endif
3389 
3390 	(void)pmap_physpage_delref(l1pte);
3391 }
3392 
3393 /******************** Address Space Number management ********************/
3394 
3395 /*
3396  * pmap_asn_alloc:
3397  *
3398  *	Allocate and assign an ASN to the specified pmap.
3399  *
3400  *	Note: the pmap must already be locked.  This may be called from
3401  *	an interprocessor interrupt, and in that case, the sender of
3402  *	the IPI has the pmap lock.
3403  */
3404 static void
3405 pmap_asn_alloc(pmap_t pmap, long cpu_id)
3406 {
3407 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3408 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3409 
3410 #ifdef DEBUG
3411 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3412 		printf("pmap_asn_alloc(%p)\n", pmap);
3413 #endif
3414 
3415 	/*
3416 	 * If the pmap is still using the global kernel_lev1map, there
3417 	 * is no need to assign an ASN at this time, because only
3418 	 * kernel mappings exist in that map, and all kernel mappings
3419 	 * have PG_ASM set.  If the pmap eventually gets its own
3420 	 * lev1map, an ASN will be allocated at that time.
3421 	 *
3422 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
3423 	 * same old fixups, but note that we no longer need the pmap
3424 	 * to be locked if we're in this mode, since pm_lev1map will
3425 	 * never change.
3426 	 * #endif
3427 	 */
3428 	if (pmap->pm_lev1map == kernel_lev1map) {
3429 #ifdef DEBUG
3430 		if (pmapdebug & PDB_ASN)
3431 			printf("pmap_asn_alloc: still references "
3432 			    "kernel_lev1map\n");
3433 #endif
3434 #if defined(MULTIPROCESSOR)
3435 		/*
3436 		 * In a multiprocessor system, it's possible to
3437 		 * get here without having PMAP_ASN_RESERVED in
3438 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3439 		 *
3440 		 * So, what we do here, is simply assign the reserved
3441 		 * ASN for kernel_lev1map users and let things
3442 		 * continue on.  We do, however, let uniprocessor
3443 		 * configurations continue to make its assertion.
3444 		 */
3445 		pma->pma_asn = PMAP_ASN_RESERVED;
3446 #else
3447 		KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3448 #endif /* MULTIPROCESSOR */
3449 		return;
3450 	}
3451 
3452 	/*
3453 	 * On processors which do not implement ASNs, the swpctx PALcode
3454 	 * operation will automatically invalidate the TLB and I-cache,
3455 	 * so we don't need to do that here.
3456 	 */
3457 	if (pmap_max_asn == 0) {
3458 		/*
3459 		 * Refresh the pmap's generation number, to
3460 		 * simplify logic elsewhere.
3461 		 */
3462 		pma->pma_asngen = cpma->pma_asngen;
3463 #ifdef DEBUG
3464 		if (pmapdebug & PDB_ASN)
3465 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3466 			    pma->pma_asngen);
3467 #endif
3468 		return;
3469 	}
3470 
3471 	/*
3472 	 * Hopefully, we can continue using the one we have...
3473 	 */
3474 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3475 	    pma->pma_asngen == cpma->pma_asngen) {
3476 		/*
3477 		 * ASN is still in the current generation; keep on using it.
3478 		 */
3479 #ifdef DEBUG
3480 		if (pmapdebug & PDB_ASN)
3481 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3482 			    pma->pma_asn);
3483 #endif
3484 		return;
3485 	}
3486 
3487 	/*
3488 	 * Need to assign a new ASN.  Grab the next one, incrementing
3489 	 * the generation number if we have to.
3490 	 */
3491 	if (cpma->pma_asn > pmap_max_asn) {
3492 		/*
3493 		 * Invalidate all non-PG_ASM TLB entries and the
3494 		 * I-cache, and bump the generation number.
3495 		 */
3496 		ALPHA_TBIAP();
3497 		alpha_pal_imb();
3498 
3499 		cpma->pma_asn = 1;
3500 		cpma->pma_asngen++;
3501 #ifdef DIAGNOSTIC
3502 		if (cpma->pma_asngen == 0) {
3503 			/*
3504 			 * The generation number has wrapped.  We could
3505 			 * handle this scenario by traversing all of
3506 			 * the pmaps, and invalidating the generation
3507 			 * number on those which are not currently
3508 			 * in use by this processor.
3509 			 *
3510 			 * However... considering that we're using
3511 			 * an unsigned 64-bit integer for generation
3512 			 * numbers, on non-ASN CPUs, we won't wrap
3513 			 * for approx. 585 million years, or 75 billion
3514 			 * years on a 128-ASN CPU (assuming 1000 switch
3515 			 * operations per second).
3516 			 *
3517 			 * So, we don't bother.
3518 			 */
3519 			panic("pmap_asn_alloc: too much uptime");
3520 		}
3521 #endif
3522 #ifdef DEBUG
3523 		if (pmapdebug & PDB_ASN)
3524 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3525 			    cpma->pma_asngen);
3526 #endif
3527 	}
3528 
3529 	/*
3530 	 * Assign the new ASN and validate the generation number.
3531 	 */
3532 	pma->pma_asn = cpma->pma_asn++;
3533 	pma->pma_asngen = cpma->pma_asngen;
3534 
3535 #ifdef DEBUG
3536 	if (pmapdebug & PDB_ASN)
3537 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3538 		    pma->pma_asn, pmap);
3539 #endif
3540 
3541 	/*
3542 	 * Have a new ASN, so there's no need to sync the I-stream
3543 	 * on the way back out to userspace.
3544 	 */
3545 	atomic_and_ulong(&pmap->pm_needisync, ~(1UL << cpu_id));
3546 }
3547 
3548 #if defined(MULTIPROCESSOR)
3549 /******************** TLB shootdown code ********************/
3550 
3551 /*
3552  * pmap_tlb_shootdown:
3553  *
3554  *	Cause the TLB entry for pmap/va to be shot down.
3555  *
3556  *	NOTE: The pmap must be locked here.
3557  */
3558 void
3559 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3560 {
3561 	struct pmap_tlb_shootdown_q *pq;
3562 	struct pmap_tlb_shootdown_job *pj;
3563 	struct cpu_info *ci, *self = curcpu();
3564 	u_long cpumask;
3565 	CPU_INFO_ITERATOR cii;
3566 
3567 	KASSERT((pmap == pmap_kernel()) || mutex_owned(&pmap->pm_lock));
3568 
3569 	cpumask = 0;
3570 
3571 	for (CPU_INFO_FOREACH(cii, ci)) {
3572 		if (ci == self)
3573 			continue;
3574 
3575 		/*
3576 		 * The pmap must be locked (unless its the kernel
3577 		 * pmap, in which case it is okay for it to be
3578 		 * unlocked), which prevents it from  becoming
3579 		 * active on any additional processors.  This makes
3580 		 * it safe to check for activeness.  If it's not
3581 		 * active on the processor in question, then just
3582 		 * mark it as needing a new ASN the next time it
3583 		 * does, saving the IPI.  We always have to send
3584 		 * the IPI for the kernel pmap.
3585 		 *
3586 		 * Note if it's marked active now, and it becomes
3587 		 * inactive by the time the processor receives
3588 		 * the IPI, that's okay, because it does the right
3589 		 * thing with it later.
3590 		 */
3591 		if (pmap != pmap_kernel() &&
3592 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3593 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3594 			continue;
3595 		}
3596 
3597 		cpumask |= 1UL << ci->ci_cpuid;
3598 
3599 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3600 		mutex_spin_enter(&pq->pq_lock);
3601 
3602 		/*
3603 		 * Allocate a job.
3604 		 */
3605 		if (pq->pq_count < PMAP_TLB_SHOOTDOWN_MAXJOBS) {
3606 			pj = pool_cache_get(&pmap_tlb_shootdown_job_cache,
3607 			    PR_NOWAIT);
3608 		} else {
3609 			pj = NULL;
3610 		}
3611 
3612 		/*
3613 		 * If a global flush is already pending, we
3614 		 * don't really have to do anything else.
3615 		 */
3616 		pq->pq_pte |= pte;
3617 		if (pq->pq_tbia) {
3618 			mutex_spin_exit(&pq->pq_lock);
3619 			if (pj != NULL) {
3620 				pool_cache_put(&pmap_tlb_shootdown_job_cache,
3621 				    pj);
3622 			}
3623 			continue;
3624 		}
3625 		if (pj == NULL) {
3626 			/*
3627 			 * Couldn't allocate a job entry.  Just
3628 			 * tell the processor to kill everything.
3629 			 */
3630 			pq->pq_tbia = 1;
3631 		} else {
3632 			pj->pj_pmap = pmap;
3633 			pj->pj_va = va;
3634 			pj->pj_pte = pte;
3635 			pq->pq_count++;
3636 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3637 		}
3638 		mutex_spin_exit(&pq->pq_lock);
3639 	}
3640 
3641 	*cpumaskp |= cpumask;
3642 }
3643 
3644 /*
3645  * pmap_tlb_shootnow:
3646  *
3647  *	Process the TLB shootdowns that we have been accumulating
3648  *	for the specified processor set.
3649  */
3650 void
3651 pmap_tlb_shootnow(u_long cpumask)
3652 {
3653 
3654 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3655 }
3656 
3657 /*
3658  * pmap_do_tlb_shootdown:
3659  *
3660  *	Process pending TLB shootdown operations for this processor.
3661  */
3662 void
3663 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3664 {
3665 	u_long cpu_id = ci->ci_cpuid;
3666 	u_long cpu_mask = (1UL << cpu_id);
3667 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3668 	struct pmap_tlb_shootdown_job *pj, *next;
3669 	TAILQ_HEAD(, pmap_tlb_shootdown_job) jobs;
3670 
3671 	TAILQ_INIT(&jobs);
3672 
3673 	mutex_spin_enter(&pq->pq_lock);
3674 	TAILQ_CONCAT(&jobs, &pq->pq_head, pj_list);
3675 	if (pq->pq_tbia) {
3676 		if (pq->pq_pte & PG_ASM)
3677 			ALPHA_TBIA();
3678 		else
3679 			ALPHA_TBIAP();
3680 		pq->pq_tbia = 0;
3681 		pq->pq_pte = 0;
3682 	} else {
3683 		TAILQ_FOREACH(pj, &jobs, pj_list) {
3684 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3685 			    pj->pj_pte & PG_ASM,
3686 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3687 		}
3688 		pq->pq_pte = 0;
3689 	}
3690 	pq->pq_count = 0;
3691 	mutex_spin_exit(&pq->pq_lock);
3692 
3693 	/* Free jobs back to the cache. */
3694 	for (pj = TAILQ_FIRST(&jobs); pj != NULL; pj = next) {
3695 		next = TAILQ_NEXT(pj, pj_list);
3696 		pool_cache_put(&pmap_tlb_shootdown_job_cache, pj);
3697 	}
3698 }
3699 #endif /* MULTIPROCESSOR */
3700