1 /* $NetBSD: pmap.c,v 1.123 2024/05/11 06:37:54 andvar Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jeremy Cooper.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * XXX These comments aren't quite accurate. Need to change.
34 * The sun3x uses the MC68851 Memory Management Unit, which is built
35 * into the CPU. The 68851 maps virtual to physical addresses using
36 * a multi-level table lookup, which is stored in the very memory that
37 * it maps. The number of levels of lookup is configurable from one
38 * to four. In this implementation, we use three, named 'A' through 'C'.
39 *
40 * The MMU translates virtual addresses into physical addresses by
41 * traversing these tables in a process called a 'table walk'. The most
42 * significant 7 bits of the Virtual Address ('VA') being translated are
43 * used as an index into the level A table, whose base in physical memory
44 * is stored in a special MMU register, the 'CPU Root Pointer' or CRP. The
45 * address found at that index in the A table is used as the base
46 * address for the next table, the B table. The next six bits of the VA are
47 * used as an index into the B table, which in turn gives the base address
48 * of the third and final C table.
49 *
50 * The next six bits of the VA are used as an index into the C table to
51 * locate a Page Table Entry (PTE). The PTE is a physical address in memory
52 * to which the remaining 13 bits of the VA are added, producing the
53 * mapped physical address.
54 *
55 * To map the entire memory space in this manner would require 2114296 bytes
56 * of page tables per process - quite expensive. Instead we will
57 * allocate a fixed but considerably smaller space for the page tables at
58 * the time the VM system is initialized. When the pmap code is asked by
59 * the kernel to map a VA to a PA, it allocates tables as needed from this
60 * pool. When there are no more tables in the pool, tables are stolen
61 * from the oldest mapped entries in the tree. This is only possible
62 * because all memory mappings are stored in the kernel memory map
63 * structures, independent of the pmap structures. A VA which references
64 * one of these invalidated maps will cause a page fault. The kernel
65 * will determine that the page fault was caused by a task using a valid
66 * VA, but for some reason (which does not concern it), that address was
67 * not mapped. It will ask the pmap code to re-map the entry and then
68 * it will resume executing the faulting task.
69 *
70 * In this manner the most efficient use of the page table space is
71 * achieved. Tasks which do not execute often will have their tables
72 * stolen and reused by tasks which execute more frequently. The best
73 * size for the page table pool will probably be determined by
74 * experimentation.
75 *
76 * You read all of the comments so far. Good for you.
77 * Now go play!
78 */
79
80 /*** A Note About the 68851 Address Translation Cache
81 * The MC68851 has a 64 entry cache, called the Address Translation Cache
82 * or 'ATC'. This cache stores the most recently used page descriptors
83 * accessed by the MMU when it does translations. Using a marker called a
84 * 'task alias' the MMU can store the descriptors from 8 different table
85 * spaces concurrently. The task alias is associated with the base
86 * address of the level A table of that address space. When an address
87 * space is currently active (the CRP currently points to its A table)
88 * the only cached descriptors that will be obeyed are ones which have a
89 * matching task alias of the current space associated with them.
90 *
91 * Since the cache is always consulted before any table lookups are done,
92 * it is important that it accurately reflect the state of the MMU tables.
93 * Whenever a change has been made to a table that has been loaded into
94 * the MMU, the code must be sure to flush any cached entries that are
95 * affected by the change. These instances are documented in the code at
96 * various points.
97 */
98 /*** A Note About the Note About the 68851 Address Translation Cache
99 * 4 months into this code I discovered that the sun3x does not have
100 * a MC68851 chip. Instead, it has a version of this MMU that is part of the
101 * the 68030 CPU.
102 * All though it behaves very similarly to the 68851, it only has 1 task
103 * alias and a 22 entry cache. So sadly (or happily), the first paragraph
104 * of the previous note does not apply to the sun3x pmap.
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.123 2024/05/11 06:37:54 andvar Exp $");
109
110 #include "opt_ddb.h"
111 #include "opt_pmap_debug.h"
112
113 #include <sys/param.h>
114 #include <sys/systm.h>
115 #include <sys/proc.h>
116 #include <sys/pool.h>
117 #include <sys/queue.h>
118 #include <sys/kcore.h>
119 #include <sys/atomic.h>
120
121 #include <uvm/uvm.h>
122
123 #include <machine/cpu.h>
124 #include <machine/kcore.h>
125 #include <machine/mon.h>
126 #include <machine/pmap.h>
127 #include <machine/pte.h>
128 #include <machine/vmparam.h>
129 #include <m68k/cacheops.h>
130
131 #include <sun3/sun3/cache.h>
132 #include <sun3/sun3/machdep.h>
133
134 #include "pmap_pvt.h"
135
136 /* XXX - What headers declare these? */
137 extern struct pcb *curpcb;
138
139 /* Defined in locore.s */
140 extern char kernel_text[];
141
142 /* Defined by the linker */
143 extern char etext[], edata[], end[];
144 extern char *esym; /* DDB */
145
146 /*************************** DEBUGGING DEFINITIONS ***********************
147 * Macros, preprocessor defines and variables used in debugging can make *
148 * code hard to read. Anything used exclusively for debugging purposes *
149 * is defined here to avoid having such mess scattered around the file. *
150 *************************************************************************/
151 #ifdef PMAP_DEBUG
152 /*
153 * To aid the debugging process, macros should be expanded into smaller steps
154 * that accomplish the same goal, yet provide convenient places for placing
155 * breakpoints. When this code is compiled with PMAP_DEBUG mode defined, the
156 * 'INLINE' keyword is defined to an empty string. This way, any function
157 * defined to be a 'static INLINE' will become 'outlined' and compiled as
158 * a separate function, which is much easier to debug.
159 */
160 #define INLINE /* nothing */
161
162 /*
163 * It is sometimes convenient to watch the activity of a particular table
164 * in the system. The following variables are used for that purpose.
165 */
166 a_tmgr_t *pmap_watch_atbl = 0;
167 b_tmgr_t *pmap_watch_btbl = 0;
168 c_tmgr_t *pmap_watch_ctbl = 0;
169
170 int pmap_debug = 0;
171 #define DPRINT(args) if (pmap_debug) printf args
172
173 #else /********** Stuff below is defined if NOT debugging **************/
174
175 #define INLINE inline
176 #define DPRINT(args) /* nada */
177
178 #endif /* PMAP_DEBUG */
179 /*********************** END OF DEBUGGING DEFINITIONS ********************/
180
181 /*** Management Structure - Memory Layout
182 * For every MMU table in the sun3x pmap system there must be a way to
183 * manage it; we must know which process is using it, what other tables
184 * depend on it, and whether or not it contains any locked pages. This
185 * is solved by the creation of 'table management' or 'tmgr'
186 * structures. One for each MMU table in the system.
187 *
188 * MAP OF MEMORY USED BY THE PMAP SYSTEM
189 *
190 * towards lower memory
191 * kernAbase -> +-------------------------------------------------------+
192 * | Kernel MMU A level table |
193 * kernBbase -> +-------------------------------------------------------+
194 * | Kernel MMU B level tables |
195 * kernCbase -> +-------------------------------------------------------+
196 * | |
197 * | Kernel MMU C level tables |
198 * | |
199 * mmuCbase -> +-------------------------------------------------------+
200 * | User MMU C level tables |
201 * mmuAbase -> +-------------------------------------------------------+
202 * | |
203 * | User MMU A level tables |
204 * | |
205 * mmuBbase -> +-------------------------------------------------------+
206 * | User MMU B level tables |
207 * tmgrAbase -> +-------------------------------------------------------+
208 * | TMGR A level table structures |
209 * tmgrBbase -> +-------------------------------------------------------+
210 * | TMGR B level table structures |
211 * tmgrCbase -> +-------------------------------------------------------+
212 * | TMGR C level table structures |
213 * pvbase -> +-------------------------------------------------------+
214 * | Physical to Virtual mapping table (list heads) |
215 * pvebase -> +-------------------------------------------------------+
216 * | Physical to Virtual mapping table (list elements) |
217 * | |
218 * +-------------------------------------------------------+
219 * towards higher memory
220 *
221 * For every A table in the MMU A area, there will be a corresponding
222 * a_tmgr structure in the TMGR A area. The same will be true for
223 * the B and C tables. This arrangement will make it easy to find the
224 * controlling tmgr structure for any table in the system by use of
225 * (relatively) simple macros.
226 */
227
228 /*
229 * Global variables for storing the base addresses for the areas
230 * labeled above.
231 */
232 static vaddr_t kernAphys;
233 static mmu_long_dte_t *kernAbase;
234 static mmu_short_dte_t *kernBbase;
235 static mmu_short_pte_t *kernCbase;
236 static mmu_short_pte_t *mmuCbase;
237 static mmu_short_dte_t *mmuBbase;
238 static mmu_long_dte_t *mmuAbase;
239 static a_tmgr_t *Atmgrbase;
240 static b_tmgr_t *Btmgrbase;
241 static c_tmgr_t *Ctmgrbase;
242 static pv_t *pvbase;
243 static pv_elem_t *pvebase;
244 static struct pmap kernel_pmap;
245 struct pmap *const kernel_pmap_ptr = &kernel_pmap;
246
247 /*
248 * This holds the CRP currently loaded into the MMU.
249 */
250 struct mmu_rootptr kernel_crp;
251
252 /*
253 * Just all around global variables.
254 */
255 static TAILQ_HEAD(a_pool_head_struct, a_tmgr_struct) a_pool;
256 static TAILQ_HEAD(b_pool_head_struct, b_tmgr_struct) b_pool;
257 static TAILQ_HEAD(c_pool_head_struct, c_tmgr_struct) c_pool;
258
259
260 /*
261 * Flags used to mark the safety/availability of certain operations or
262 * resources.
263 */
264 /* Safe to use pmap_bootstrap_alloc(). */
265 static bool bootstrap_alloc_enabled = false;
266 /* Temporary virtual pages are in use */
267 int tmp_vpages_inuse;
268
269 /*
270 * XXX: For now, retain the traditional variables that were
271 * used in the old pmap/vm interface (without NONCONTIG).
272 */
273 /* Kernel virtual address space available: */
274 vaddr_t virtual_avail, virtual_end;
275 /* Physical address space available: */
276 paddr_t avail_start, avail_end;
277
278 /* This keep track of the end of the contiguously mapped range. */
279 vaddr_t virtual_contig_end;
280
281 /* Physical address used by pmap_next_page() */
282 paddr_t avail_next;
283
284 /* These are used by pmap_copy_page(), etc. */
285 vaddr_t tmp_vpages[2];
286
287 /* memory pool for pmap structures */
288 struct pool pmap_pmap_pool;
289
290 /*
291 * The 3/80 is the only member of the sun3x family that has non-contiguous
292 * physical memory. Memory is divided into 4 banks which are physically
293 * locatable on the system board. Although the size of these banks varies
294 * with the size of memory they contain, their base addresses are
295 * permanently fixed. The following structure, which describes these
296 * banks, is initialized by pmap_bootstrap() after it reads from a similar
297 * structure provided by the ROM Monitor.
298 *
299 * For the other machines in the sun3x architecture which do have contiguous
300 * RAM, this list will have only one entry, which will describe the entire
301 * range of available memory.
302 */
303 struct pmap_physmem_struct avail_mem[SUN3X_NPHYS_RAM_SEGS];
304 u_int total_phys_mem;
305
306 /*************************************************************************/
307
308 /*
309 * XXX - Should "tune" these based on statistics.
310 *
311 * My first guess about the relative numbers of these needed is
312 * based on the fact that a "typical" process will have several
313 * pages mapped at low virtual addresses (text, data, bss), then
314 * some mapped shared libraries, and then some stack pages mapped
315 * near the high end of the VA space. Each process can use only
316 * one A table, and most will use only two B tables (maybe three)
317 * and probably about four C tables. Therefore, the first guess
318 * at the relative numbers of these needed is 1:2:4 -gwr
319 *
320 * The number of C tables needed is closely related to the amount
321 * of physical memory available plus a certain amount attributable
322 * to the use of double mappings. With a few simulation statistics
323 * we can find a reasonably good estimation of this unknown value.
324 * Armed with that and the above ratios, we have a good idea of what
325 * is needed at each level. -j
326 *
327 * Note: It is not physical memory memory size, but the total mapped
328 * virtual space required by the combined working sets of all the
329 * currently _runnable_ processes. (Sleeping ones don't count.)
330 * The amount of physical memory should be irrelevant. -gwr
331 */
332 #ifdef FIXED_NTABLES
333 #define NUM_A_TABLES 16
334 #define NUM_B_TABLES 32
335 #define NUM_C_TABLES 64
336 #else
337 unsigned int NUM_A_TABLES, NUM_B_TABLES, NUM_C_TABLES;
338 #endif /* FIXED_NTABLES */
339
340 /*
341 * This determines our total virtual mapping capacity.
342 * Yes, it is a FIXED value so we can pre-allocate.
343 */
344 #define NUM_USER_PTES (NUM_C_TABLES * MMU_C_TBL_SIZE)
345
346 /*
347 * The size of the Kernel Virtual Address Space (KVAS)
348 * for purposes of MMU table allocation is -KERNBASE
349 * (length from KERNBASE to 0xFFFFffff)
350 */
351 #define KVAS_SIZE (-KERNBASE3X)
352
353 /* Numbers of kernel MMU tables to support KVAS_SIZE. */
354 #define KERN_B_TABLES (KVAS_SIZE >> MMU_TIA_SHIFT)
355 #define KERN_C_TABLES (KVAS_SIZE >> MMU_TIB_SHIFT)
356 #define NUM_KERN_PTES (KVAS_SIZE >> MMU_TIC_SHIFT)
357
358 /*************************** MISCELLANEOUS MACROS *************************/
359 void *pmap_bootstrap_alloc(int);
360
361 static INLINE void *mmu_ptov(paddr_t);
362 static INLINE paddr_t mmu_vtop(void *);
363
364 #if 0
365 static INLINE a_tmgr_t *mmuA2tmgr(mmu_long_dte_t *);
366 #endif /* 0 */
367 static INLINE b_tmgr_t *mmuB2tmgr(mmu_short_dte_t *);
368 static INLINE c_tmgr_t *mmuC2tmgr(mmu_short_pte_t *);
369
370 static INLINE pv_t *pa2pv(paddr_t);
371 static INLINE int pteidx(mmu_short_pte_t *);
372 static INLINE pmap_t current_pmap(void);
373
374 /*
375 * We can always convert between virtual and physical addresses
376 * for anything in the range [KERNBASE ... avail_start] because
377 * that range is GUARANTEED to be mapped linearly.
378 * We rely heavily upon this feature!
379 */
380 static INLINE void *
mmu_ptov(paddr_t pa)381 mmu_ptov(paddr_t pa)
382 {
383 vaddr_t va;
384
385 va = (pa + KERNBASE3X);
386 #ifdef PMAP_DEBUG
387 if ((va < KERNBASE3X) || (va >= virtual_contig_end))
388 panic("mmu_ptov");
389 #endif
390 return (void *)va;
391 }
392
393 static INLINE paddr_t
mmu_vtop(void * vva)394 mmu_vtop(void *vva)
395 {
396 vaddr_t va;
397
398 va = (vaddr_t)vva;
399 #ifdef PMAP_DEBUG
400 if ((va < KERNBASE3X) || (va >= virtual_contig_end))
401 panic("mmu_vtop");
402 #endif
403 return va - KERNBASE3X;
404 }
405
406 /*
407 * These macros map MMU tables to their corresponding manager structures.
408 * They are needed quite often because many of the pointers in the pmap
409 * system reference MMU tables and not the structures that control them.
410 * There needs to be a way to find one when given the other and these
411 * macros do so by taking advantage of the memory layout described above.
412 * Here's a quick step through the first macro, mmuA2tmgr():
413 *
414 * 1) find the offset of the given MMU A table from the base of its table
415 * pool (table - mmuAbase).
416 * 2) convert this offset into a table index by dividing it by the
417 * size of one MMU 'A' table. (sizeof(mmu_long_dte_t) * MMU_A_TBL_SIZE)
418 * 3) use this index to select the corresponding 'A' table manager
419 * structure from the 'A' table manager pool (Atmgrbase[index]).
420 */
421 /* This function is not currently used. */
422 #if 0
423 static INLINE a_tmgr_t *
424 mmuA2tmgr(mmu_long_dte_t *mmuAtbl)
425 {
426 int idx;
427
428 /* Which table is this in? */
429 idx = (mmuAtbl - mmuAbase) / MMU_A_TBL_SIZE;
430 #ifdef PMAP_DEBUG
431 if ((idx < 0) || (idx >= NUM_A_TABLES))
432 panic("mmuA2tmgr");
433 #endif
434 return &Atmgrbase[idx];
435 }
436 #endif /* 0 */
437
438 static INLINE b_tmgr_t *
mmuB2tmgr(mmu_short_dte_t * mmuBtbl)439 mmuB2tmgr(mmu_short_dte_t *mmuBtbl)
440 {
441 int idx;
442
443 /* Which table is this in? */
444 idx = (mmuBtbl - mmuBbase) / MMU_B_TBL_SIZE;
445 #ifdef PMAP_DEBUG
446 if ((idx < 0) || (idx >= NUM_B_TABLES))
447 panic("mmuB2tmgr");
448 #endif
449 return &Btmgrbase[idx];
450 }
451
452 /* mmuC2tmgr INTERNAL
453 **
454 * Given a pte known to belong to a C table, return the address of
455 * that table's management structure.
456 */
457 static INLINE c_tmgr_t *
mmuC2tmgr(mmu_short_pte_t * mmuCtbl)458 mmuC2tmgr(mmu_short_pte_t *mmuCtbl)
459 {
460 int idx;
461
462 /* Which table is this in? */
463 idx = (mmuCtbl - mmuCbase) / MMU_C_TBL_SIZE;
464 #ifdef PMAP_DEBUG
465 if ((idx < 0) || (idx >= NUM_C_TABLES))
466 panic("mmuC2tmgr");
467 #endif
468 return &Ctmgrbase[idx];
469 }
470
471 /* This is now a function call below.
472 * #define pa2pv(pa) \
473 * (&pvbase[(unsigned long)\
474 * m68k_btop(pa)\
475 * ])
476 */
477
478 /* pa2pv INTERNAL
479 **
480 * Return the pv_list_head element which manages the given physical
481 * address.
482 */
483 static INLINE pv_t *
pa2pv(paddr_t pa)484 pa2pv(paddr_t pa)
485 {
486 struct pmap_physmem_struct *bank;
487 int idx;
488
489 bank = &avail_mem[0];
490 while (pa >= bank->pmem_end)
491 bank = bank->pmem_next;
492
493 pa -= bank->pmem_start;
494 idx = bank->pmem_pvbase + m68k_btop(pa);
495 #ifdef PMAP_DEBUG
496 if ((idx < 0) || (idx >= physmem))
497 panic("pa2pv");
498 #endif
499 return &pvbase[idx];
500 }
501
502 /* pteidx INTERNAL
503 **
504 * Return the index of the given PTE within the entire fixed table of
505 * PTEs.
506 */
507 static INLINE int
pteidx(mmu_short_pte_t * pte)508 pteidx(mmu_short_pte_t *pte)
509 {
510
511 return pte - kernCbase;
512 }
513
514 /*
515 * This just offers a place to put some debugging checks,
516 * and reduces the number of places "curlwp" appears...
517 */
518 static INLINE pmap_t
current_pmap(void)519 current_pmap(void)
520 {
521 struct vmspace *vm;
522 struct vm_map *map;
523 pmap_t pmap;
524
525 vm = curproc->p_vmspace;
526 map = &vm->vm_map;
527 pmap = vm_map_pmap(map);
528
529 return pmap;
530 }
531
532
533 /*************************** FUNCTION DEFINITIONS ************************
534 * These appear here merely for the compiler to enforce type checking on *
535 * all function calls. *
536 *************************************************************************/
537
538 /*
539 * Internal functions
540 */
541 a_tmgr_t *get_a_table(void);
542 b_tmgr_t *get_b_table(void);
543 c_tmgr_t *get_c_table(void);
544 int free_a_table(a_tmgr_t *, bool);
545 int free_b_table(b_tmgr_t *, bool);
546 int free_c_table(c_tmgr_t *, bool);
547
548 void pmap_bootstrap_aalign(int);
549 void pmap_alloc_usermmu(void);
550 void pmap_alloc_usertmgr(void);
551 void pmap_alloc_pv(void);
552 void pmap_init_a_tables(void);
553 void pmap_init_b_tables(void);
554 void pmap_init_c_tables(void);
555 void pmap_init_pv(void);
556 void pmap_clear_pv(paddr_t, int);
557 static INLINE bool is_managed(paddr_t);
558
559 bool pmap_remove_a(a_tmgr_t *, vaddr_t, vaddr_t);
560 bool pmap_remove_b(b_tmgr_t *, vaddr_t, vaddr_t);
561 bool pmap_remove_c(c_tmgr_t *, vaddr_t, vaddr_t);
562 void pmap_remove_pte(mmu_short_pte_t *);
563
564 void pmap_enter_kernel(vaddr_t, paddr_t, vm_prot_t);
565 static INLINE void pmap_remove_kernel(vaddr_t, vaddr_t);
566 static INLINE void pmap_protect_kernel(vaddr_t, vaddr_t, vm_prot_t);
567 static INLINE bool pmap_extract_kernel(vaddr_t, paddr_t *);
568 vaddr_t pmap_get_pteinfo(u_int, pmap_t *, c_tmgr_t **);
569 static INLINE int pmap_dereference(pmap_t);
570
571 bool pmap_stroll(pmap_t, vaddr_t, a_tmgr_t **, b_tmgr_t **, c_tmgr_t **,
572 mmu_short_pte_t **, int *, int *, int *);
573 void pmap_bootstrap_copyprom(void);
574 void pmap_takeover_mmu(void);
575 void pmap_bootstrap_setprom(void);
576 static void pmap_page_upload(void);
577
578 #ifdef PMAP_DEBUG
579 /* Debugging function definitions */
580 void pv_list(paddr_t, int);
581 #endif /* PMAP_DEBUG */
582
583 /** Interface functions
584 ** - functions required by the Mach VM Pmap interface, with MACHINE_CONTIG
585 ** defined.
586 ** The new UVM doesn't require them so now INTERNAL.
587 **/
588 static INLINE void pmap_pinit(pmap_t);
589 static INLINE void pmap_release(pmap_t);
590
591 /********************************** CODE ********************************
592 * Functions that are called from other parts of the kernel are labeled *
593 * as 'INTERFACE' functions. Functions that are only called from *
594 * within the pmap module are labeled as 'INTERNAL' functions. *
595 * Functions that are internal, but are not (currently) used at all are *
596 * labeled 'INTERNAL_X'. *
597 ************************************************************************/
598
599 /* pmap_bootstrap INTERNAL
600 **
601 * Initializes the pmap system. Called at boot time from
602 * locore2.c:_vm_init()
603 *
604 * Reminder: having a pmap_bootstrap_alloc() and also having the VM
605 * system implement pmap_steal_memory() is redundant.
606 * Don't release this code without removing one or the other!
607 */
608 void
pmap_bootstrap(vaddr_t nextva)609 pmap_bootstrap(vaddr_t nextva)
610 {
611 struct physmemory *membank;
612 struct pmap_physmem_struct *pmap_membank;
613 vaddr_t va, eva;
614 paddr_t pa;
615 int b, c, i, j; /* running table counts */
616 int size, resvmem;
617
618 /*
619 * This function is called by __bootstrap after it has
620 * determined the type of machine and made the appropriate
621 * patches to the ROM vectors (XXX- I don't quite know what I meant
622 * by that.) It allocates and sets up enough of the pmap system
623 * to manage the kernel's address space.
624 */
625
626 /*
627 * Determine the range of kernel virtual and physical
628 * space available. Note that we ABSOLUTELY DEPEND on
629 * the fact that the first bank of memory (4MB) is
630 * mapped linearly to KERNBASE (which we guaranteed in
631 * the first instructions of locore.s).
632 * That is plenty for our bootstrap work.
633 */
634 virtual_avail = m68k_round_page(nextva);
635 virtual_contig_end = KERNBASE3X + 0x400000; /* +4MB */
636 virtual_end = VM_MAX_KERNEL_ADDRESS;
637 /* Don't need avail_start til later. */
638
639 /* We may now call pmap_bootstrap_alloc(). */
640 bootstrap_alloc_enabled = true;
641
642 /*
643 * This is a somewhat unwrapped loop to deal with
644 * copying the PROM's 'phsymem' banks into the pmap's
645 * banks. The following is always assumed:
646 * 1. There is always at least one bank of memory.
647 * 2. There is always a last bank of memory, and its
648 * pmem_next member must be set to NULL.
649 */
650 membank = romVectorPtr->v_physmemory;
651 pmap_membank = avail_mem;
652 total_phys_mem = 0;
653
654 for (;;) { /* break on !membank */
655 pmap_membank->pmem_start = membank->address;
656 pmap_membank->pmem_end = membank->address + membank->size;
657 total_phys_mem += membank->size;
658 membank = membank->next;
659 if (!membank)
660 break;
661 /* This silly syntax arises because pmap_membank
662 * is really a pre-allocated array, but it is put into
663 * use as a linked list.
664 */
665 pmap_membank->pmem_next = pmap_membank + 1;
666 pmap_membank = pmap_membank->pmem_next;
667 }
668 /* This is the last element. */
669 pmap_membank->pmem_next = NULL;
670
671 /*
672 * Note: total_phys_mem, physmem represent
673 * actual physical memory, including that
674 * reserved for the PROM monitor.
675 */
676 physmem = btoc(total_phys_mem);
677
678 /*
679 * Avail_end is set to the first byte of physical memory
680 * after the end of the last bank. We use this only to
681 * determine if a physical address is "managed" memory.
682 * This address range should be reduced to prevent the
683 * physical pages needed by the PROM monitor from being used
684 * in the VM system.
685 */
686 resvmem = total_phys_mem - *(romVectorPtr->memoryAvail);
687 resvmem = m68k_round_page(resvmem);
688 avail_end = pmap_membank->pmem_end - resvmem;
689
690 /*
691 * First allocate enough kernel MMU tables to map all
692 * of kernel virtual space from KERNBASE to 0xFFFFFFFF.
693 * Note: All must be aligned on 256 byte boundaries.
694 * Start with the level-A table (one of those).
695 */
696 size = sizeof(mmu_long_dte_t) * MMU_A_TBL_SIZE;
697 kernAbase = pmap_bootstrap_alloc(size);
698 memset(kernAbase, 0, size);
699
700 /* Now the level-B kernel tables... */
701 size = sizeof(mmu_short_dte_t) * MMU_B_TBL_SIZE * KERN_B_TABLES;
702 kernBbase = pmap_bootstrap_alloc(size);
703 memset(kernBbase, 0, size);
704
705 /* Now the level-C kernel tables... */
706 size = sizeof(mmu_short_pte_t) * MMU_C_TBL_SIZE * KERN_C_TABLES;
707 kernCbase = pmap_bootstrap_alloc(size);
708 memset(kernCbase, 0, size);
709 /*
710 * Note: In order for the PV system to work correctly, the kernel
711 * and user-level C tables must be allocated contiguously.
712 * Nothing should be allocated between here and the allocation of
713 * mmuCbase below. XXX: Should do this as one allocation, and
714 * then compute a pointer for mmuCbase instead of this...
715 *
716 * Allocate user MMU tables.
717 * These must be contiguous with the preceding.
718 */
719
720 #ifndef FIXED_NTABLES
721 /*
722 * The number of user-level C tables that should be allocated is
723 * related to the size of physical memory. In general, there should
724 * be enough tables to map four times the amount of available RAM.
725 * The extra amount is needed because some table space is wasted by
726 * fragmentation.
727 */
728 NUM_C_TABLES = (total_phys_mem * 4) / (MMU_C_TBL_SIZE * MMU_PAGE_SIZE);
729 NUM_B_TABLES = NUM_C_TABLES / 2;
730 NUM_A_TABLES = NUM_B_TABLES / 2;
731 #endif /* !FIXED_NTABLES */
732
733 size = sizeof(mmu_short_pte_t) * MMU_C_TBL_SIZE * NUM_C_TABLES;
734 mmuCbase = pmap_bootstrap_alloc(size);
735
736 size = sizeof(mmu_short_dte_t) * MMU_B_TBL_SIZE * NUM_B_TABLES;
737 mmuBbase = pmap_bootstrap_alloc(size);
738
739 size = sizeof(mmu_long_dte_t) * MMU_A_TBL_SIZE * NUM_A_TABLES;
740 mmuAbase = pmap_bootstrap_alloc(size);
741
742 /*
743 * Fill in the never-changing part of the kernel tables.
744 * For simplicity, the kernel's mappings will be editable as a
745 * flat array of page table entries at kernCbase. The
746 * higher level 'A' and 'B' tables must be initialized to point
747 * to this lower one.
748 */
749 b = c = 0;
750
751 /*
752 * Invalidate all mappings below KERNBASE in the A table.
753 * This area has already been zeroed out, but it is good
754 * practice to explicitly show that we are interpreting
755 * it as a list of A table descriptors.
756 */
757 for (i = 0; i < MMU_TIA(KERNBASE3X); i++) {
758 kernAbase[i].addr.raw = 0;
759 }
760
761 /*
762 * Set up the kernel A and B tables so that they will reference the
763 * correct spots in the contiguous table of PTEs allocated for the
764 * kernel's virtual memory space.
765 */
766 for (i = MMU_TIA(KERNBASE3X); i < MMU_A_TBL_SIZE; i++) {
767 kernAbase[i].attr.raw =
768 MMU_LONG_DTE_LU | MMU_LONG_DTE_SUPV | MMU_DT_SHORT;
769 kernAbase[i].addr.raw = mmu_vtop(&kernBbase[b]);
770
771 for (j = 0; j < MMU_B_TBL_SIZE; j++) {
772 kernBbase[b + j].attr.raw =
773 mmu_vtop(&kernCbase[c]) | MMU_DT_SHORT;
774 c += MMU_C_TBL_SIZE;
775 }
776 b += MMU_B_TBL_SIZE;
777 }
778
779 pmap_alloc_usermmu(); /* Allocate user MMU tables. */
780 pmap_alloc_usertmgr(); /* Allocate user MMU table managers.*/
781 pmap_alloc_pv(); /* Allocate physical->virtual map. */
782
783 /*
784 * We are now done with pmap_bootstrap_alloc(). Round up
785 * `virtual_avail' to the nearest page, and set the flag
786 * to prevent use of pmap_bootstrap_alloc() hereafter.
787 */
788 pmap_bootstrap_aalign(PAGE_SIZE);
789 bootstrap_alloc_enabled = false;
790
791 /*
792 * Now that we are done with pmap_bootstrap_alloc(), we
793 * must save the virtual and physical addresses of the
794 * end of the linearly mapped range, which are stored in
795 * virtual_contig_end and avail_start, respectively.
796 * These variables will never change after this point.
797 */
798 virtual_contig_end = virtual_avail;
799 avail_start = virtual_avail - KERNBASE3X;
800
801 /*
802 * `avail_next' is a running pointer used by pmap_next_page() to
803 * keep track of the next available physical page to be handed
804 * to the VM system during its initialization, in which it
805 * asks for physical pages, one at a time.
806 */
807 avail_next = avail_start;
808
809 /*
810 * Now allocate some virtual addresses, but not the physical pages
811 * behind them. Note that virtual_avail is already page-aligned.
812 *
813 * tmp_vpages[] is an array of two virtual pages used for temporary
814 * kernel mappings in the pmap module to facilitate various physical
815 * address-oritented operations.
816 */
817 tmp_vpages[0] = virtual_avail;
818 virtual_avail += PAGE_SIZE;
819 tmp_vpages[1] = virtual_avail;
820 virtual_avail += PAGE_SIZE;
821
822 /** Initialize the PV system **/
823 pmap_init_pv();
824
825 /*
826 * Fill in the kernel_pmap structure and kernel_crp.
827 */
828 kernAphys = mmu_vtop(kernAbase);
829 kernel_pmap.pm_a_tmgr = NULL;
830 kernel_pmap.pm_a_phys = kernAphys;
831 kernel_pmap.pm_refcount = 1; /* always in use */
832
833 kernel_crp.rp_attr = MMU_LONG_DTE_LU | MMU_DT_LONG;
834 kernel_crp.rp_addr = kernAphys;
835
836 /*
837 * Now pmap_enter_kernel() may be used safely and will be
838 * the main interface used hereafter to modify the kernel's
839 * virtual address space. Note that since we are still running
840 * under the PROM's address table, none of these table modifications
841 * actually take effect until pmap_takeover_mmu() is called.
842 *
843 * Note: Our tables do NOT have the PROM linear mappings!
844 * Only the mappings created here exist in our tables, so
845 * remember to map anything we expect to use.
846 */
847 va = (vaddr_t)KERNBASE3X;
848 pa = 0;
849
850 /*
851 * The first page of the kernel virtual address space is the msgbuf
852 * page. The page attributes (data, non-cached) are set here, while
853 * the address is assigned to this global pointer in cpu_startup().
854 * It is non-cached, mostly due to paranoia.
855 */
856 pmap_enter_kernel(va, pa|PMAP_NC, VM_PROT_ALL);
857 va += PAGE_SIZE;
858 pa += PAGE_SIZE;
859
860 /* Next page is used as the temporary stack. */
861 pmap_enter_kernel(va, pa, VM_PROT_ALL);
862 va += PAGE_SIZE;
863 pa += PAGE_SIZE;
864
865 /*
866 * Map all of the kernel's text segment as read-only and cacheable.
867 * (Cacheable is implied by default). Unfortunately, the last bytes
868 * of kernel text and the first bytes of kernel data will often be
869 * sharing the same page. Therefore, the last page of kernel text
870 * has to be mapped as read/write, to accommodate the data.
871 */
872 eva = m68k_trunc_page((vaddr_t)etext);
873 for (; va < eva; va += PAGE_SIZE, pa += PAGE_SIZE)
874 pmap_enter_kernel(va, pa, VM_PROT_READ|VM_PROT_EXECUTE);
875
876 /*
877 * Map all of the kernel's data as read/write and cacheable.
878 * This includes: data, BSS, symbols, and everything in the
879 * contiguous memory used by pmap_bootstrap_alloc()
880 */
881 for (; pa < avail_start; va += PAGE_SIZE, pa += PAGE_SIZE)
882 pmap_enter_kernel(va, pa, VM_PROT_READ|VM_PROT_WRITE);
883
884 /*
885 * At this point we are almost ready to take over the MMU. But first
886 * we must save the PROM's address space in our map, as we call its
887 * routines and make references to its data later in the kernel.
888 */
889 pmap_bootstrap_copyprom();
890 pmap_takeover_mmu();
891 pmap_bootstrap_setprom();
892
893 /* Notify the VM system of our page size. */
894 uvmexp.pagesize = PAGE_SIZE;
895 uvm_md_init();
896
897 pmap_page_upload();
898 }
899
900
901 /* pmap_alloc_usermmu INTERNAL
902 **
903 * Called from pmap_bootstrap() to allocate MMU tables that will
904 * eventually be used for user mappings.
905 */
906 void
pmap_alloc_usermmu(void)907 pmap_alloc_usermmu(void)
908 {
909
910 /* XXX: Moved into caller. */
911 }
912
913 /* pmap_alloc_pv INTERNAL
914 **
915 * Called from pmap_bootstrap() to allocate the physical
916 * to virtual mapping list. Each physical page of memory
917 * in the system has a corresponding element in this list.
918 */
919 void
pmap_alloc_pv(void)920 pmap_alloc_pv(void)
921 {
922 int i;
923 unsigned int total_mem;
924
925 /*
926 * Allocate a pv_head structure for every page of physical
927 * memory that will be managed by the system. Since memory on
928 * the 3/80 is non-contiguous, we cannot arrive at a total page
929 * count by subtraction of the lowest available address from the
930 * highest, but rather we have to step through each memory
931 * bank and add the number of pages in each to the total.
932 *
933 * At this time we also initialize the offset of each bank's
934 * starting pv_head within the pv_head list so that the physical
935 * memory state routines (pmap_is_referenced(),
936 * pmap_is_modified(), et al.) can quickly find corresponding
937 * pv_heads in spite of the non-contiguity.
938 */
939 total_mem = 0;
940 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
941 avail_mem[i].pmem_pvbase = m68k_btop(total_mem);
942 total_mem += avail_mem[i].pmem_end - avail_mem[i].pmem_start;
943 if (avail_mem[i].pmem_next == NULL)
944 break;
945 }
946 pvbase = (pv_t *)pmap_bootstrap_alloc(sizeof(pv_t) *
947 m68k_btop(total_phys_mem));
948 }
949
950 /* pmap_alloc_usertmgr INTERNAL
951 **
952 * Called from pmap_bootstrap() to allocate the structures which
953 * facilitate management of user MMU tables. Each user MMU table
954 * in the system has one such structure associated with it.
955 */
956 void
pmap_alloc_usertmgr(void)957 pmap_alloc_usertmgr(void)
958 {
959 /* Allocate user MMU table managers */
960 /* It would be a lot simpler to just make these BSS, but */
961 /* we may want to change their size at boot time... -j */
962 Atmgrbase =
963 (a_tmgr_t *)pmap_bootstrap_alloc(sizeof(a_tmgr_t) * NUM_A_TABLES);
964 Btmgrbase =
965 (b_tmgr_t *)pmap_bootstrap_alloc(sizeof(b_tmgr_t) * NUM_B_TABLES);
966 Ctmgrbase =
967 (c_tmgr_t *)pmap_bootstrap_alloc(sizeof(c_tmgr_t) * NUM_C_TABLES);
968
969 /*
970 * Allocate PV list elements for the physical to virtual
971 * mapping system.
972 */
973 pvebase = (pv_elem_t *)pmap_bootstrap_alloc(sizeof(pv_elem_t) *
974 (NUM_USER_PTES + NUM_KERN_PTES));
975 }
976
977 /* pmap_bootstrap_copyprom() INTERNAL
978 **
979 * Copy the PROM mappings into our own tables. Note, we
980 * can use physical addresses until __bootstrap returns.
981 */
982 void
pmap_bootstrap_copyprom(void)983 pmap_bootstrap_copyprom(void)
984 {
985 struct sunromvec *romp;
986 int *mon_ctbl;
987 mmu_short_pte_t *kpte;
988 int i, len;
989
990 romp = romVectorPtr;
991
992 /*
993 * Copy the mappings in SUN3X_MON_KDB_BASE...SUN3X_MONEND
994 * Note: mon_ctbl[0] maps SUN3X_MON_KDB_BASE
995 */
996 mon_ctbl = *romp->monptaddr;
997 i = m68k_btop(SUN3X_MON_KDB_BASE - KERNBASE3X);
998 kpte = &kernCbase[i];
999 len = m68k_btop(SUN3X_MONEND - SUN3X_MON_KDB_BASE);
1000
1001 for (i = 0; i < len; i++) {
1002 kpte[i].attr.raw = mon_ctbl[i];
1003 }
1004
1005 /*
1006 * Copy the mappings at MON_DVMA_BASE (to the end).
1007 * Note, in here, mon_ctbl[0] maps MON_DVMA_BASE.
1008 * Actually, we only want the last page, which the
1009 * PROM has set up for use by the "ie" driver.
1010 * (The i82686 needs its SCP there.)
1011 * If we copy all the mappings, pmap_enter_kernel
1012 * may complain about finding valid PTEs that are
1013 * not recorded in our PV lists...
1014 */
1015 mon_ctbl = *romp->shadowpteaddr;
1016 i = m68k_btop(SUN3X_MON_DVMA_BASE - KERNBASE3X);
1017 kpte = &kernCbase[i];
1018 len = m68k_btop(SUN3X_MON_DVMA_SIZE);
1019 for (i = (len - 1); i < len; i++) {
1020 kpte[i].attr.raw = mon_ctbl[i];
1021 }
1022 }
1023
1024 /* pmap_takeover_mmu INTERNAL
1025 **
1026 * Called from pmap_bootstrap() after it has copied enough of the
1027 * PROM mappings into the kernel map so that we can use our own
1028 * MMU table.
1029 */
1030 void
pmap_takeover_mmu(void)1031 pmap_takeover_mmu(void)
1032 {
1033
1034 loadcrp(&kernel_crp);
1035 }
1036
1037 /* pmap_bootstrap_setprom() INTERNAL
1038 **
1039 * Set the PROM mappings so it can see kernel space.
1040 * Note that physical addresses are used here, which
1041 * we can get away with because this runs with the
1042 * low 1GB set for transparent translation.
1043 */
1044 void
pmap_bootstrap_setprom(void)1045 pmap_bootstrap_setprom(void)
1046 {
1047 mmu_long_dte_t *mon_dte;
1048 extern struct mmu_rootptr mon_crp;
1049 int i;
1050
1051 mon_dte = (mmu_long_dte_t *)mon_crp.rp_addr;
1052 for (i = MMU_TIA(KERNBASE3X); i < MMU_TIA(KERN_END3X); i++) {
1053 mon_dte[i].attr.raw = kernAbase[i].attr.raw;
1054 mon_dte[i].addr.raw = kernAbase[i].addr.raw;
1055 }
1056 }
1057
1058
1059 /* pmap_init INTERFACE
1060 **
1061 * Called at the end of vm_init() to set up the pmap system to go
1062 * into full time operation. All initialization of kernel_pmap
1063 * should be already done by now, so this should just do things
1064 * needed for user-level pmaps to work.
1065 */
1066 void
pmap_init(void)1067 pmap_init(void)
1068 {
1069
1070 /** Initialize the manager pools **/
1071 TAILQ_INIT(&a_pool);
1072 TAILQ_INIT(&b_pool);
1073 TAILQ_INIT(&c_pool);
1074
1075 /**************************************************************
1076 * Initialize all tmgr structures and MMU tables they manage. *
1077 **************************************************************/
1078 /** Initialize A tables **/
1079 pmap_init_a_tables();
1080 /** Initialize B tables **/
1081 pmap_init_b_tables();
1082 /** Initialize C tables **/
1083 pmap_init_c_tables();
1084
1085 /** Initialize the pmap pools **/
1086 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
1087 &pool_allocator_nointr, IPL_NONE);
1088 }
1089
1090 /* pmap_init_a_tables() INTERNAL
1091 **
1092 * Initializes all A managers, their MMU A tables, and inserts
1093 * them into the A manager pool for use by the system.
1094 */
1095 void
pmap_init_a_tables(void)1096 pmap_init_a_tables(void)
1097 {
1098 int i;
1099 a_tmgr_t *a_tbl;
1100
1101 for (i = 0; i < NUM_A_TABLES; i++) {
1102 /* Select the next available A manager from the pool */
1103 a_tbl = &Atmgrbase[i];
1104
1105 /*
1106 * Clear its parent entry. Set its wired and valid
1107 * entry count to zero.
1108 */
1109 a_tbl->at_parent = NULL;
1110 a_tbl->at_wcnt = a_tbl->at_ecnt = 0;
1111
1112 /* Assign it the next available MMU A table from the pool */
1113 a_tbl->at_dtbl = &mmuAbase[i * MMU_A_TBL_SIZE];
1114
1115 /*
1116 * Initialize the MMU A table with the table in the `lwp0',
1117 * or kernel, mapping. This ensures that every process has
1118 * the kernel mapped in the top part of its address space.
1119 */
1120 memcpy(a_tbl->at_dtbl, kernAbase,
1121 MMU_A_TBL_SIZE * sizeof(mmu_long_dte_t));
1122
1123 /*
1124 * Finally, insert the manager into the A pool,
1125 * making it ready to be used by the system.
1126 */
1127 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
1128 }
1129 }
1130
1131 /* pmap_init_b_tables() INTERNAL
1132 **
1133 * Initializes all B table managers, their MMU B tables, and
1134 * inserts them into the B manager pool for use by the system.
1135 */
1136 void
pmap_init_b_tables(void)1137 pmap_init_b_tables(void)
1138 {
1139 int i, j;
1140 b_tmgr_t *b_tbl;
1141
1142 for (i = 0; i < NUM_B_TABLES; i++) {
1143 /* Select the next available B manager from the pool */
1144 b_tbl = &Btmgrbase[i];
1145
1146 b_tbl->bt_parent = NULL; /* clear its parent, */
1147 b_tbl->bt_pidx = 0; /* parent index, */
1148 b_tbl->bt_wcnt = 0; /* wired entry count, */
1149 b_tbl->bt_ecnt = 0; /* valid entry count. */
1150
1151 /* Assign it the next available MMU B table from the pool */
1152 b_tbl->bt_dtbl = &mmuBbase[i * MMU_B_TBL_SIZE];
1153
1154 /* Invalidate every descriptor in the table */
1155 for (j = 0; j < MMU_B_TBL_SIZE; j++)
1156 b_tbl->bt_dtbl[j].attr.raw = MMU_DT_INVALID;
1157
1158 /* Insert the manager into the B pool */
1159 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
1160 }
1161 }
1162
1163 /* pmap_init_c_tables() INTERNAL
1164 **
1165 * Initializes all C table managers, their MMU C tables, and
1166 * inserts them into the C manager pool for use by the system.
1167 */
1168 void
pmap_init_c_tables(void)1169 pmap_init_c_tables(void)
1170 {
1171 int i, j;
1172 c_tmgr_t *c_tbl;
1173
1174 for (i = 0; i < NUM_C_TABLES; i++) {
1175 /* Select the next available C manager from the pool */
1176 c_tbl = &Ctmgrbase[i];
1177
1178 c_tbl->ct_parent = NULL; /* clear its parent, */
1179 c_tbl->ct_pidx = 0; /* parent index, */
1180 c_tbl->ct_wcnt = 0; /* wired entry count, */
1181 c_tbl->ct_ecnt = 0; /* valid entry count, */
1182 c_tbl->ct_pmap = NULL; /* parent pmap, */
1183 c_tbl->ct_va = 0; /* base of managed range */
1184
1185 /* Assign it the next available MMU C table from the pool */
1186 c_tbl->ct_dtbl = &mmuCbase[i * MMU_C_TBL_SIZE];
1187
1188 for (j = 0; j < MMU_C_TBL_SIZE; j++)
1189 c_tbl->ct_dtbl[j].attr.raw = MMU_DT_INVALID;
1190
1191 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
1192 }
1193 }
1194
1195 /* pmap_init_pv() INTERNAL
1196 **
1197 * Initializes the Physical to Virtual mapping system.
1198 */
1199 void
pmap_init_pv(void)1200 pmap_init_pv(void)
1201 {
1202 int i;
1203
1204 /* Initialize every PV head. */
1205 for (i = 0; i < m68k_btop(total_phys_mem); i++) {
1206 pvbase[i].pv_idx = PVE_EOL; /* Indicate no mappings */
1207 pvbase[i].pv_flags = 0; /* Zero out page flags */
1208 }
1209 }
1210
1211 /* is_managed INTERNAL
1212 **
1213 * Determine if the given physical address is managed by the PV system.
1214 * Note that this logic assumes that no one will ask for the status of
1215 * addresses which lie in-between the memory banks on the 3/80. If they
1216 * do so, it will falsely report that it is managed.
1217 *
1218 * Note: A "managed" address is one that was reported to the VM system as
1219 * a "usable page" during system startup. As such, the VM system expects the
1220 * pmap module to keep an accurate track of the usage of those pages.
1221 * Any page not given to the VM system at startup does not exist (as far as
1222 * the VM system is concerned) and is therefore "unmanaged." Examples are
1223 * those pages which belong to the ROM monitor and the memory allocated before
1224 * the VM system was started.
1225 */
1226 static INLINE bool
is_managed(paddr_t pa)1227 is_managed(paddr_t pa)
1228 {
1229 if (pa >= avail_start && pa < avail_end)
1230 return true;
1231 else
1232 return false;
1233 }
1234
1235 /* get_a_table INTERNAL
1236 **
1237 * Retrieve and return a level A table for use in a user map.
1238 */
1239 a_tmgr_t *
get_a_table(void)1240 get_a_table(void)
1241 {
1242 a_tmgr_t *tbl;
1243 pmap_t pmap;
1244
1245 /* Get the top A table in the pool */
1246 tbl = TAILQ_FIRST(&a_pool);
1247 if (tbl == NULL) {
1248 /*
1249 * XXX - Instead of panicking here and in other get_x_table
1250 * functions, we do have the option of sleeping on the head of
1251 * the table pool. Any function which updates the table pool
1252 * would then issue a wakeup() on the head, thus waking up any
1253 * processes waiting for a table.
1254 *
1255 * Actually, the place to sleep would be when some process
1256 * asks for a "wired" mapping that would run us short of
1257 * mapping resources. This design DEPENDS on always having
1258 * some mapping resources in the pool for stealing, so we
1259 * must make sure we NEVER let the pool become empty. -gwr
1260 */
1261 panic("get_a_table: out of A tables.");
1262 }
1263
1264 TAILQ_REMOVE(&a_pool, tbl, at_link);
1265 /*
1266 * If the table has a non-null parent pointer then it is in use.
1267 * Forcibly abduct it from its parent and clear its entries.
1268 * No re-entrancy worries here. This table would not be in the
1269 * table pool unless it was available for use.
1270 *
1271 * Note that the second argument to free_a_table() is false. This
1272 * indicates that the table should not be relinked into the A table
1273 * pool. That is a job for the function that called us.
1274 */
1275 if (tbl->at_parent) {
1276 KASSERT(tbl->at_wcnt == 0);
1277 pmap = tbl->at_parent;
1278 free_a_table(tbl, false);
1279 pmap->pm_a_tmgr = NULL;
1280 pmap->pm_a_phys = kernAphys;
1281 }
1282 return tbl;
1283 }
1284
1285 /* get_b_table INTERNAL
1286 **
1287 * Return a level B table for use.
1288 */
1289 b_tmgr_t *
get_b_table(void)1290 get_b_table(void)
1291 {
1292 b_tmgr_t *tbl;
1293
1294 /* See 'get_a_table' for comments. */
1295 tbl = TAILQ_FIRST(&b_pool);
1296 if (tbl == NULL)
1297 panic("get_b_table: out of B tables.");
1298 TAILQ_REMOVE(&b_pool, tbl, bt_link);
1299 if (tbl->bt_parent) {
1300 KASSERT(tbl->bt_wcnt == 0);
1301 tbl->bt_parent->at_dtbl[tbl->bt_pidx].attr.raw = MMU_DT_INVALID;
1302 tbl->bt_parent->at_ecnt--;
1303 free_b_table(tbl, false);
1304 }
1305 return tbl;
1306 }
1307
1308 /* get_c_table INTERNAL
1309 **
1310 * Return a level C table for use.
1311 */
1312 c_tmgr_t *
get_c_table(void)1313 get_c_table(void)
1314 {
1315 c_tmgr_t *tbl;
1316
1317 /* See 'get_a_table' for comments */
1318 tbl = TAILQ_FIRST(&c_pool);
1319 if (tbl == NULL)
1320 panic("get_c_table: out of C tables.");
1321 TAILQ_REMOVE(&c_pool, tbl, ct_link);
1322 if (tbl->ct_parent) {
1323 KASSERT(tbl->ct_wcnt == 0);
1324 tbl->ct_parent->bt_dtbl[tbl->ct_pidx].attr.raw = MMU_DT_INVALID;
1325 tbl->ct_parent->bt_ecnt--;
1326 free_c_table(tbl, false);
1327 }
1328 return tbl;
1329 }
1330
1331 /*
1332 * The following 'free_table' and 'steal_table' functions are called to
1333 * detach tables from their current obligations (parents and children) and
1334 * prepare them for reuse in another mapping.
1335 *
1336 * Free_table is used when the calling function will handle the fate
1337 * of the parent table, such as returning it to the free pool when it has
1338 * no valid entries. Functions that do not want to handle this should
1339 * call steal_table, in which the parent table's descriptors and entry
1340 * count are automatically modified when this table is removed.
1341 */
1342
1343 /* free_a_table INTERNAL
1344 **
1345 * Unmaps the given A table and all child tables from their current
1346 * mappings. Returns the number of pages that were invalidated.
1347 * If 'relink' is true, the function will return the table to the head
1348 * of the available table pool.
1349 *
1350 * Cache note: The MC68851 will automatically flush all
1351 * descriptors derived from a given A table from its
1352 * Automatic Translation Cache (ATC) if we issue a
1353 * 'PFLUSHR' instruction with the base address of the
1354 * table. This function should do, and does so.
1355 * Note note: We are using an MC68030 - there is no
1356 * PFLUSHR.
1357 */
1358 int
free_a_table(a_tmgr_t * a_tbl,bool relink)1359 free_a_table(a_tmgr_t *a_tbl, bool relink)
1360 {
1361 int i, removed_cnt;
1362 mmu_long_dte_t *dte;
1363 mmu_short_dte_t *dtbl;
1364 b_tmgr_t *b_tbl;
1365 uint8_t at_wired, bt_wired;
1366
1367 /*
1368 * Flush the ATC cache of all cached descriptors derived
1369 * from this table.
1370 * Sun3x does not use 68851's cached table feature
1371 * flush_atc_crp(mmu_vtop(a_tbl->dte));
1372 */
1373
1374 /*
1375 * Remove any pending cache flushes that were designated
1376 * for the pmap this A table belongs to.
1377 * a_tbl->parent->atc_flushq[0] = 0;
1378 * Not implemented in sun3x.
1379 */
1380
1381 /*
1382 * All A tables in the system should retain a map for the
1383 * kernel. If the table contains any valid descriptors
1384 * (other than those for the kernel area), invalidate them all,
1385 * stopping short of the kernel's entries.
1386 */
1387 removed_cnt = 0;
1388 at_wired = a_tbl->at_wcnt;
1389 if (a_tbl->at_ecnt) {
1390 dte = a_tbl->at_dtbl;
1391 for (i = 0; i < MMU_TIA(KERNBASE3X); i++) {
1392 /*
1393 * If a table entry points to a valid B table, free
1394 * it and its children.
1395 */
1396 if (MMU_VALID_DT(dte[i])) {
1397 /*
1398 * The following block does several things,
1399 * from innermost expression to the
1400 * outermost:
1401 * 1) It extracts the base (cc 1996)
1402 * address of the B table pointed
1403 * to in the A table entry dte[i].
1404 * 2) It converts this base address into
1405 * the virtual address it can be
1406 * accessed with. (all MMU tables point
1407 * to physical addresses.)
1408 * 3) It finds the corresponding manager
1409 * structure which manages this MMU table.
1410 * 4) It frees the manager structure.
1411 * (This frees the MMU table and all
1412 * child tables. See 'free_b_table' for
1413 * details.)
1414 */
1415 dtbl = mmu_ptov(dte[i].addr.raw);
1416 b_tbl = mmuB2tmgr(dtbl);
1417 bt_wired = b_tbl->bt_wcnt;
1418 removed_cnt += free_b_table(b_tbl, true);
1419 if (bt_wired)
1420 a_tbl->at_wcnt--;
1421 dte[i].attr.raw = MMU_DT_INVALID;
1422 }
1423 }
1424 a_tbl->at_ecnt = 0;
1425 }
1426 KASSERT(a_tbl->at_wcnt == 0);
1427
1428 if (relink) {
1429 a_tbl->at_parent = NULL;
1430 if (!at_wired)
1431 TAILQ_REMOVE(&a_pool, a_tbl, at_link);
1432 TAILQ_INSERT_HEAD(&a_pool, a_tbl, at_link);
1433 }
1434 return removed_cnt;
1435 }
1436
1437 /* free_b_table INTERNAL
1438 **
1439 * Unmaps the given B table and all its children from their current
1440 * mappings. Returns the number of pages that were invalidated.
1441 * (For comments, see 'free_a_table()').
1442 */
1443 int
free_b_table(b_tmgr_t * b_tbl,bool relink)1444 free_b_table(b_tmgr_t *b_tbl, bool relink)
1445 {
1446 int i, removed_cnt;
1447 mmu_short_dte_t *dte;
1448 mmu_short_pte_t *dtbl;
1449 c_tmgr_t *c_tbl;
1450 uint8_t bt_wired, ct_wired;
1451
1452 removed_cnt = 0;
1453 bt_wired = b_tbl->bt_wcnt;
1454 if (b_tbl->bt_ecnt) {
1455 dte = b_tbl->bt_dtbl;
1456 for (i = 0; i < MMU_B_TBL_SIZE; i++) {
1457 if (MMU_VALID_DT(dte[i])) {
1458 dtbl = mmu_ptov(MMU_DTE_PA(dte[i]));
1459 c_tbl = mmuC2tmgr(dtbl);
1460 ct_wired = c_tbl->ct_wcnt;
1461 removed_cnt += free_c_table(c_tbl, true);
1462 if (ct_wired)
1463 b_tbl->bt_wcnt--;
1464 dte[i].attr.raw = MMU_DT_INVALID;
1465 }
1466 }
1467 b_tbl->bt_ecnt = 0;
1468 }
1469 KASSERT(b_tbl->bt_wcnt == 0);
1470
1471 if (relink) {
1472 b_tbl->bt_parent = NULL;
1473 if (!bt_wired)
1474 TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
1475 TAILQ_INSERT_HEAD(&b_pool, b_tbl, bt_link);
1476 }
1477 return removed_cnt;
1478 }
1479
1480 /* free_c_table INTERNAL
1481 **
1482 * Unmaps the given C table from use and returns it to the pool for
1483 * re-use. Returns the number of pages that were invalidated.
1484 *
1485 * This function preserves any physical page modification information
1486 * contained in the page descriptors within the C table by calling
1487 * 'pmap_remove_pte().'
1488 */
1489 int
free_c_table(c_tmgr_t * c_tbl,bool relink)1490 free_c_table(c_tmgr_t *c_tbl, bool relink)
1491 {
1492 mmu_short_pte_t *c_pte;
1493 int i, removed_cnt;
1494 uint8_t ct_wired;
1495
1496 removed_cnt = 0;
1497 ct_wired = c_tbl->ct_wcnt;
1498 if (c_tbl->ct_ecnt) {
1499 for (i = 0; i < MMU_C_TBL_SIZE; i++) {
1500 c_pte = &c_tbl->ct_dtbl[i];
1501 if (MMU_VALID_DT(*c_pte)) {
1502 if (c_pte->attr.raw & MMU_SHORT_PTE_WIRED)
1503 c_tbl->ct_wcnt--;
1504 pmap_remove_pte(c_pte);
1505 removed_cnt++;
1506 }
1507 }
1508 c_tbl->ct_ecnt = 0;
1509 }
1510 KASSERT(c_tbl->ct_wcnt == 0);
1511
1512 if (relink) {
1513 c_tbl->ct_parent = NULL;
1514 if (!ct_wired)
1515 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
1516 TAILQ_INSERT_HEAD(&c_pool, c_tbl, ct_link);
1517 }
1518 return removed_cnt;
1519 }
1520
1521
1522 /* pmap_remove_pte INTERNAL
1523 **
1524 * Unmap the given pte and preserve any page modification
1525 * information by transferring it to the pv head of the
1526 * physical page it maps to. This function does not update
1527 * any reference counts because it is assumed that the calling
1528 * function will do so.
1529 */
1530 void
pmap_remove_pte(mmu_short_pte_t * pte)1531 pmap_remove_pte(mmu_short_pte_t *pte)
1532 {
1533 u_short pv_idx, targ_idx;
1534 paddr_t pa;
1535 pv_t *pv;
1536
1537 pa = MMU_PTE_PA(*pte);
1538 if (is_managed(pa)) {
1539 pv = pa2pv(pa);
1540 targ_idx = pteidx(pte); /* Index of PTE being removed */
1541
1542 /*
1543 * If the PTE being removed is the first (or only) PTE in
1544 * the list of PTEs currently mapped to this page, remove the
1545 * PTE by changing the index found on the PV head. Otherwise
1546 * a linear search through the list will have to be executed
1547 * in order to find the PVE which points to the PTE being
1548 * removed, so that it may be modified to point to its new
1549 * neighbor.
1550 */
1551
1552 pv_idx = pv->pv_idx; /* Index of first PTE in PV list */
1553 if (pv_idx == targ_idx) {
1554 pv->pv_idx = pvebase[targ_idx].pve_next;
1555 } else {
1556
1557 /*
1558 * Find the PV element pointing to the target
1559 * element. Note: may have pv_idx==PVE_EOL
1560 */
1561
1562 for (;;) {
1563 if (pv_idx == PVE_EOL) {
1564 goto pv_not_found;
1565 }
1566 if (pvebase[pv_idx].pve_next == targ_idx)
1567 break;
1568 pv_idx = pvebase[pv_idx].pve_next;
1569 }
1570
1571 /*
1572 * At this point, pv_idx is the index of the PV
1573 * element just before the target element in the list.
1574 * Unlink the target.
1575 */
1576
1577 pvebase[pv_idx].pve_next = pvebase[targ_idx].pve_next;
1578 }
1579
1580 /*
1581 * Save the mod/ref bits of the pte by simply
1582 * ORing the entire pte onto the pv_flags member
1583 * of the pv structure.
1584 * There is no need to use a separate bit pattern
1585 * for usage information on the pv head than that
1586 * which is used on the MMU ptes.
1587 */
1588
1589 pv_not_found:
1590 pv->pv_flags |= (u_short) pte->attr.raw;
1591 }
1592 pte->attr.raw = MMU_DT_INVALID;
1593 }
1594
1595 /* pmap_stroll INTERNAL
1596 **
1597 * Retrieve the addresses of all table managers involved in the mapping of
1598 * the given virtual address. If the table walk completed successfully,
1599 * return true. If it was only partially successful, return false.
1600 * The table walk performed by this function is important to many other
1601 * functions in this module.
1602 *
1603 * Note: This function ought to be easier to read.
1604 */
1605 bool
pmap_stroll(pmap_t pmap,vaddr_t va,a_tmgr_t ** a_tbl,b_tmgr_t ** b_tbl,c_tmgr_t ** c_tbl,mmu_short_pte_t ** pte,int * a_idx,int * b_idx,int * pte_idx)1606 pmap_stroll(pmap_t pmap, vaddr_t va, a_tmgr_t **a_tbl, b_tmgr_t **b_tbl,
1607 c_tmgr_t **c_tbl, mmu_short_pte_t **pte, int *a_idx, int *b_idx,
1608 int *pte_idx)
1609 {
1610 mmu_long_dte_t *a_dte; /* A: long descriptor table */
1611 mmu_short_dte_t *b_dte; /* B: short descriptor table */
1612
1613 if (pmap == pmap_kernel())
1614 return false;
1615
1616 /* Does the given pmap have its own A table? */
1617 *a_tbl = pmap->pm_a_tmgr;
1618 if (*a_tbl == NULL)
1619 return false; /* No. Return unknown. */
1620 /* Does the A table have a valid B table
1621 * under the corresponding table entry?
1622 */
1623 *a_idx = MMU_TIA(va);
1624 a_dte = &((*a_tbl)->at_dtbl[*a_idx]);
1625 if (!MMU_VALID_DT(*a_dte))
1626 return false; /* No. Return unknown. */
1627 /* Yes. Extract B table from the A table. */
1628 *b_tbl = mmuB2tmgr(mmu_ptov(a_dte->addr.raw));
1629 /*
1630 * Does the B table have a valid C table
1631 * under the corresponding table entry?
1632 */
1633 *b_idx = MMU_TIB(va);
1634 b_dte = &((*b_tbl)->bt_dtbl[*b_idx]);
1635 if (!MMU_VALID_DT(*b_dte))
1636 return false; /* No. Return unknown. */
1637 /* Yes. Extract C table from the B table. */
1638 *c_tbl = mmuC2tmgr(mmu_ptov(MMU_DTE_PA(*b_dte)));
1639 *pte_idx = MMU_TIC(va);
1640 *pte = &((*c_tbl)->ct_dtbl[*pte_idx]);
1641
1642 return true;
1643 }
1644
1645 /* pmap_enter INTERFACE
1646 **
1647 * Called by the kernel to map a virtual address
1648 * to a physical address in the given process map.
1649 *
1650 * Note: this function should apply an exclusive lock
1651 * on the pmap system for its duration. (it certainly
1652 * would save my hair!!)
1653 * This function ought to be easier to read.
1654 */
1655 int
pmap_enter(pmap_t pmap,vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)1656 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1657 {
1658 bool insert, managed; /* Marks the need for PV insertion.*/
1659 u_short nidx; /* PV list index */
1660 int mapflags; /* Flags for the mapping (see NOTE1) */
1661 u_int a_idx, b_idx, pte_idx; /* table indices */
1662 a_tmgr_t *a_tbl; /* A: long descriptor table manager */
1663 b_tmgr_t *b_tbl; /* B: short descriptor table manager */
1664 c_tmgr_t *c_tbl; /* C: short page table manager */
1665 mmu_long_dte_t *a_dte; /* A: long descriptor table */
1666 mmu_short_dte_t *b_dte; /* B: short descriptor table */
1667 mmu_short_pte_t *c_pte; /* C: short page descriptor table */
1668 pv_t *pv; /* pv list head */
1669 bool wired; /* is the mapping to be wired? */
1670 enum {NONE, NEWA, NEWB, NEWC} llevel; /* used at end */
1671
1672 if (pmap == pmap_kernel()) {
1673 pmap_enter_kernel(va, pa, prot);
1674 return 0;
1675 }
1676
1677 /*
1678 * Determine if the mapping should be wired.
1679 */
1680 wired = ((flags & PMAP_WIRED) != 0);
1681
1682 /*
1683 * NOTE1:
1684 *
1685 * On November 13, 1999, someone changed the pmap_enter() API such
1686 * that it now accepts a 'flags' argument. This new argument
1687 * contains bit-flags for the architecture-independent (UVM) system to
1688 * use in signalling certain mapping requirements to the architecture-
1689 * dependent (pmap) system. The argument it replaces, 'wired', is now
1690 * one of the flags within it.
1691 *
1692 * In addition to flags signaled by the architecture-independent
1693 * system, parts of the architecture-dependent section of the sun3x
1694 * kernel pass their own flags in the lower, unused bits of the
1695 * physical address supplied to this function. These flags are
1696 * extracted and stored in the temporary variable 'mapflags'.
1697 *
1698 * Extract sun3x specific flags from the physical address.
1699 */
1700 mapflags = (pa & ~MMU_PAGE_MASK);
1701 pa &= MMU_PAGE_MASK;
1702
1703 /*
1704 * Determine if the physical address being mapped is on-board RAM.
1705 * Any other area of the address space is likely to belong to a
1706 * device and hence it would be disastrous to cache its contents.
1707 */
1708 if ((managed = is_managed(pa)) == false)
1709 mapflags |= PMAP_NC;
1710
1711 /*
1712 * For user mappings we walk along the MMU tables of the given
1713 * pmap, reaching a PTE which describes the virtual page being
1714 * mapped or changed. If any level of the walk ends in an invalid
1715 * entry, a table must be allocated and the entry must be updated
1716 * to point to it.
1717 * There is a bit of confusion as to whether this code must be
1718 * re-entrant. For now we will assume it is. To support
1719 * re-entrancy we must unlink tables from the table pool before
1720 * we assume we may use them. Tables are re-linked into the pool
1721 * when we are finished with them at the end of the function.
1722 * But I don't feel like doing that until we have proof that this
1723 * needs to be re-entrant.
1724 * 'llevel' records which tables need to be relinked.
1725 */
1726 llevel = NONE;
1727
1728 /*
1729 * Step 1 - Retrieve the A table from the pmap. If it has no
1730 * A table, allocate a new one from the available pool.
1731 */
1732
1733 a_tbl = pmap->pm_a_tmgr;
1734 if (a_tbl == NULL) {
1735 /*
1736 * This pmap does not currently have an A table. Allocate
1737 * a new one.
1738 */
1739 a_tbl = get_a_table();
1740 a_tbl->at_parent = pmap;
1741
1742 /*
1743 * Assign this new A table to the pmap, and calculate its
1744 * physical address so that loadcrp() can be used to make
1745 * the table active.
1746 */
1747 pmap->pm_a_tmgr = a_tbl;
1748 pmap->pm_a_phys = mmu_vtop(a_tbl->at_dtbl);
1749
1750 /*
1751 * If the process receiving a new A table is the current
1752 * process, we are responsible for setting the MMU so that
1753 * it becomes the current address space. This only adds
1754 * new mappings, so no need to flush anything.
1755 */
1756 if (pmap == current_pmap()) {
1757 kernel_crp.rp_addr = pmap->pm_a_phys;
1758 loadcrp(&kernel_crp);
1759 }
1760
1761 if (!wired)
1762 llevel = NEWA;
1763 } else {
1764 /*
1765 * Use the A table already allocated for this pmap.
1766 * Unlink it from the A table pool if necessary.
1767 */
1768 if (wired && !a_tbl->at_wcnt)
1769 TAILQ_REMOVE(&a_pool, a_tbl, at_link);
1770 }
1771
1772 /*
1773 * Step 2 - Walk into the B table. If there is no valid B table,
1774 * allocate one.
1775 */
1776
1777 a_idx = MMU_TIA(va); /* Calculate the TIA of the VA. */
1778 a_dte = &a_tbl->at_dtbl[a_idx]; /* Retrieve descriptor from table */
1779 if (MMU_VALID_DT(*a_dte)) { /* Is the descriptor valid? */
1780 /* The descriptor is valid. Use the B table it points to. */
1781 /*************************************
1782 * a_idx *
1783 * v *
1784 * a_tbl -> +-+-+-+-+-+-+-+-+-+-+-+- *
1785 * | | | | | | | | | | | | *
1786 * +-+-+-+-+-+-+-+-+-+-+-+- *
1787 * | *
1788 * \- b_tbl -> +-+- *
1789 * | | *
1790 * +-+- *
1791 *************************************/
1792 b_dte = mmu_ptov(a_dte->addr.raw);
1793 b_tbl = mmuB2tmgr(b_dte);
1794
1795 /*
1796 * If the requested mapping must be wired, but this table
1797 * being used to map it is not, the table must be removed
1798 * from the available pool and its wired entry count
1799 * incremented.
1800 */
1801 if (wired && !b_tbl->bt_wcnt) {
1802 TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
1803 a_tbl->at_wcnt++;
1804 }
1805 } else {
1806 /* The descriptor is invalid. Allocate a new B table. */
1807 b_tbl = get_b_table();
1808
1809 /* Point the parent A table descriptor to this new B table. */
1810 a_dte->addr.raw = mmu_vtop(b_tbl->bt_dtbl);
1811 a_dte->attr.raw = MMU_LONG_DTE_LU | MMU_DT_SHORT;
1812 a_tbl->at_ecnt++; /* Update parent's valid entry count */
1813
1814 /* Create the necessary back references to the parent table */
1815 b_tbl->bt_parent = a_tbl;
1816 b_tbl->bt_pidx = a_idx;
1817
1818 /*
1819 * If this table is to be wired, make sure the parent A table
1820 * wired count is updated to reflect that it has another wired
1821 * entry.
1822 */
1823 if (wired)
1824 a_tbl->at_wcnt++;
1825 else if (llevel == NONE)
1826 llevel = NEWB;
1827 }
1828
1829 /*
1830 * Step 3 - Walk into the C table, if there is no valid C table,
1831 * allocate one.
1832 */
1833
1834 b_idx = MMU_TIB(va); /* Calculate the TIB of the VA */
1835 b_dte = &b_tbl->bt_dtbl[b_idx]; /* Retrieve descriptor from table */
1836 if (MMU_VALID_DT(*b_dte)) { /* Is the descriptor valid? */
1837 /* The descriptor is valid. Use the C table it points to. */
1838 /**************************************
1839 * c_idx *
1840 * | v *
1841 * \- b_tbl -> +-+-+-+-+-+-+-+-+-+-+- *
1842 * | | | | | | | | | | | *
1843 * +-+-+-+-+-+-+-+-+-+-+- *
1844 * | *
1845 * \- c_tbl -> +-+-- *
1846 * | | | *
1847 * +-+-- *
1848 **************************************/
1849 c_pte = mmu_ptov(MMU_PTE_PA(*b_dte));
1850 c_tbl = mmuC2tmgr(c_pte);
1851
1852 /* If mapping is wired and table is not */
1853 if (wired && !c_tbl->ct_wcnt) {
1854 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
1855 b_tbl->bt_wcnt++;
1856 }
1857 } else {
1858 /* The descriptor is invalid. Allocate a new C table. */
1859 c_tbl = get_c_table();
1860
1861 /* Point the parent B table descriptor to this new C table. */
1862 b_dte->attr.raw = mmu_vtop(c_tbl->ct_dtbl);
1863 b_dte->attr.raw |= MMU_DT_SHORT;
1864 b_tbl->bt_ecnt++; /* Update parent's valid entry count */
1865
1866 /* Create the necessary back references to the parent table */
1867 c_tbl->ct_parent = b_tbl;
1868 c_tbl->ct_pidx = b_idx;
1869 /*
1870 * Store the pmap and base virtual managed address for faster
1871 * retrieval in the PV functions.
1872 */
1873 c_tbl->ct_pmap = pmap;
1874 c_tbl->ct_va = (va & (MMU_TIA_MASK|MMU_TIB_MASK));
1875
1876 /*
1877 * If this table is to be wired, make sure the parent B table
1878 * wired count is updated to reflect that it has another wired
1879 * entry.
1880 */
1881 if (wired)
1882 b_tbl->bt_wcnt++;
1883 else if (llevel == NONE)
1884 llevel = NEWC;
1885 }
1886
1887 /*
1888 * Step 4 - Deposit a page descriptor (PTE) into the appropriate
1889 * slot of the C table, describing the PA to which the VA is mapped.
1890 */
1891
1892 pte_idx = MMU_TIC(va);
1893 c_pte = &c_tbl->ct_dtbl[pte_idx];
1894 if (MMU_VALID_DT(*c_pte)) { /* Is the entry currently valid? */
1895 /*
1896 * The PTE is currently valid. This particular call
1897 * is just a synonym for one (or more) of the following
1898 * operations:
1899 * change protection of a page
1900 * change wiring status of a page
1901 * remove the mapping of a page
1902 */
1903
1904 /* First check if this is a wiring operation. */
1905 if (c_pte->attr.raw & MMU_SHORT_PTE_WIRED) {
1906 /*
1907 * The existing mapping is wired, so adjust wired
1908 * entry count here. If new mapping is still wired,
1909 * wired entry count will be incremented again later.
1910 */
1911 c_tbl->ct_wcnt--;
1912 if (!wired) {
1913 /*
1914 * The mapping of this PTE is being changed
1915 * from wired to unwired.
1916 * Adjust wired entry counts in each table and
1917 * set llevel flag to put unwired tables back
1918 * into the active pool.
1919 */
1920 if (c_tbl->ct_wcnt == 0) {
1921 llevel = NEWC;
1922 if (--b_tbl->bt_wcnt == 0) {
1923 llevel = NEWB;
1924 if (--a_tbl->at_wcnt == 0) {
1925 llevel = NEWA;
1926 }
1927 }
1928 }
1929 }
1930 }
1931
1932 /* Is the new address the same as the old? */
1933 if (MMU_PTE_PA(*c_pte) == pa) {
1934 /*
1935 * Yes, mark that it does not need to be reinserted
1936 * into the PV list.
1937 */
1938 insert = false;
1939
1940 /*
1941 * Clear all but the modified, referenced and wired
1942 * bits on the PTE.
1943 */
1944 c_pte->attr.raw &= (MMU_SHORT_PTE_M
1945 | MMU_SHORT_PTE_USED | MMU_SHORT_PTE_WIRED);
1946 } else {
1947 /* No, remove the old entry */
1948 pmap_remove_pte(c_pte);
1949 insert = true;
1950 }
1951
1952 /*
1953 * TLB flush is only necessary if modifying current map.
1954 * However, in pmap_enter(), the pmap almost always IS
1955 * the current pmap, so don't even bother to check.
1956 */
1957 TBIS(va);
1958 } else {
1959 /*
1960 * The PTE is invalid. Increment the valid entry count in
1961 * the C table manager to reflect the addition of a new entry.
1962 */
1963 c_tbl->ct_ecnt++;
1964
1965 /* XXX - temporarily make sure the PTE is cleared. */
1966 c_pte->attr.raw = 0;
1967
1968 /* It will also need to be inserted into the PV list. */
1969 insert = true;
1970 }
1971
1972 /*
1973 * If page is changing from unwired to wired status, set an unused bit
1974 * within the PTE to indicate that it is wired. Also increment the
1975 * wired entry count in the C table manager.
1976 */
1977 if (wired) {
1978 c_pte->attr.raw |= MMU_SHORT_PTE_WIRED;
1979 c_tbl->ct_wcnt++;
1980 }
1981
1982 /*
1983 * Map the page, being careful to preserve modify/reference/wired
1984 * bits. At this point it is assumed that the PTE either has no bits
1985 * set, or if there are set bits, they are only modified, reference or
1986 * wired bits. If not, the following statement will cause erratic
1987 * behavior.
1988 */
1989 #ifdef PMAP_DEBUG
1990 if (c_pte->attr.raw & ~(MMU_SHORT_PTE_M |
1991 MMU_SHORT_PTE_USED | MMU_SHORT_PTE_WIRED)) {
1992 printf("pmap_enter: junk left in PTE at %p\n", c_pte);
1993 Debugger();
1994 }
1995 #endif
1996 c_pte->attr.raw |= ((u_long) pa | MMU_DT_PAGE);
1997
1998 /*
1999 * If the mapping should be read-only, set the write protect
2000 * bit in the PTE.
2001 */
2002 if (!(prot & VM_PROT_WRITE))
2003 c_pte->attr.raw |= MMU_SHORT_PTE_WP;
2004
2005 /*
2006 * Mark the PTE as used and/or modified as specified by the flags arg.
2007 */
2008 if (flags & VM_PROT_ALL) {
2009 c_pte->attr.raw |= MMU_SHORT_PTE_USED;
2010 if (flags & VM_PROT_WRITE) {
2011 c_pte->attr.raw |= MMU_SHORT_PTE_M;
2012 }
2013 }
2014
2015 /*
2016 * If the mapping should be cache inhibited (indicated by the flag
2017 * bits found on the lower order of the physical address.)
2018 * mark the PTE as a cache inhibited page.
2019 */
2020 if (mapflags & PMAP_NC)
2021 c_pte->attr.raw |= MMU_SHORT_PTE_CI;
2022
2023 /*
2024 * If the physical address being mapped is managed by the PV
2025 * system then link the pte into the list of pages mapped to that
2026 * address.
2027 */
2028 if (insert && managed) {
2029 pv = pa2pv(pa);
2030 nidx = pteidx(c_pte);
2031
2032 pvebase[nidx].pve_next = pv->pv_idx;
2033 pv->pv_idx = nidx;
2034 }
2035
2036 /* Move any allocated or unwired tables back into the active pool. */
2037
2038 switch (llevel) {
2039 case NEWA:
2040 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
2041 /* FALLTHROUGH */
2042 case NEWB:
2043 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
2044 /* FALLTHROUGH */
2045 case NEWC:
2046 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
2047 /* FALLTHROUGH */
2048 default:
2049 break;
2050 }
2051
2052 return 0;
2053 }
2054
2055 /* pmap_enter_kernel INTERNAL
2056 **
2057 * Map the given virtual address to the given physical address within the
2058 * kernel address space. This function exists because the kernel map does
2059 * not do dynamic table allocation. It consists of a contiguous array of ptes
2060 * and can be edited directly without the need to walk through any tables.
2061 *
2062 * XXX: "Danger, Will Robinson!"
2063 * Note that the kernel should never take a fault on any page
2064 * between [ KERNBASE .. virtual_avail ] and this is checked in
2065 * trap.c for kernel-mode MMU faults. This means that mappings
2066 * created in that range must be implicitly wired. -gwr
2067 */
2068 void
pmap_enter_kernel(vaddr_t va,paddr_t pa,vm_prot_t prot)2069 pmap_enter_kernel(vaddr_t va, paddr_t pa, vm_prot_t prot)
2070 {
2071 bool was_valid, insert;
2072 u_short pte_idx;
2073 int flags;
2074 mmu_short_pte_t *pte;
2075 pv_t *pv;
2076 paddr_t old_pa;
2077
2078 flags = (pa & ~MMU_PAGE_MASK);
2079 pa &= MMU_PAGE_MASK;
2080
2081 if (is_managed(pa))
2082 insert = true;
2083 else
2084 insert = false;
2085
2086 /*
2087 * Calculate the index of the PTE being modified.
2088 */
2089 pte_idx = (u_long)m68k_btop(va - KERNBASE3X);
2090
2091 /* This array is traditionally named "Sysmap" */
2092 pte = &kernCbase[pte_idx];
2093
2094 if (MMU_VALID_DT(*pte)) {
2095 was_valid = true;
2096 /*
2097 * If the PTE already maps a different
2098 * physical address, umap and pv_unlink.
2099 */
2100 old_pa = MMU_PTE_PA(*pte);
2101 if (pa != old_pa)
2102 pmap_remove_pte(pte);
2103 else {
2104 /*
2105 * Old PA and new PA are the same. No need to
2106 * relink the mapping within the PV list.
2107 */
2108 insert = false;
2109
2110 /*
2111 * Save any mod/ref bits on the PTE.
2112 */
2113 pte->attr.raw &= (MMU_SHORT_PTE_USED|MMU_SHORT_PTE_M);
2114 }
2115 } else {
2116 pte->attr.raw = MMU_DT_INVALID;
2117 was_valid = false;
2118 }
2119
2120 /*
2121 * Map the page. Being careful to preserve modified/referenced bits
2122 * on the PTE.
2123 */
2124 pte->attr.raw |= (pa | MMU_DT_PAGE);
2125
2126 if (!(prot & VM_PROT_WRITE)) /* If access should be read-only */
2127 pte->attr.raw |= MMU_SHORT_PTE_WP;
2128 if (flags & PMAP_NC)
2129 pte->attr.raw |= MMU_SHORT_PTE_CI;
2130 if (was_valid)
2131 TBIS(va);
2132
2133 /*
2134 * Insert the PTE into the PV system, if need be.
2135 */
2136 if (insert) {
2137 pv = pa2pv(pa);
2138 pvebase[pte_idx].pve_next = pv->pv_idx;
2139 pv->pv_idx = pte_idx;
2140 }
2141 }
2142
2143 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)2144 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
2145 {
2146 mmu_short_pte_t *pte;
2147 u_int mapflags;
2148
2149 /* XXX: MD PMAP_NC should be replaced by MI PMAP_NOCACHE in flags. */
2150 mapflags = (pa & ~MMU_PAGE_MASK);
2151 if ((mapflags & PMAP_NC) != 0)
2152 flags |= PMAP_NOCACHE;
2153
2154 /* This array is traditionally named "Sysmap" */
2155 pte = &kernCbase[(u_long)m68k_btop(va - KERNBASE3X)];
2156
2157 KASSERT(!MMU_VALID_DT(*pte));
2158 pte->attr.raw = MMU_DT_INVALID | MMU_DT_PAGE | (pa & MMU_PAGE_MASK);
2159 if (!(prot & VM_PROT_WRITE))
2160 pte->attr.raw |= MMU_SHORT_PTE_WP;
2161 if ((flags & PMAP_NOCACHE) != 0)
2162 pte->attr.raw |= MMU_SHORT_PTE_CI;
2163 }
2164
2165 void
pmap_kremove(vaddr_t va,vsize_t len)2166 pmap_kremove(vaddr_t va, vsize_t len)
2167 {
2168 int idx, eidx;
2169
2170 #ifdef PMAP_DEBUG
2171 if ((va & PGOFSET) || (len & PGOFSET))
2172 panic("pmap_kremove: alignment");
2173 #endif
2174
2175 idx = m68k_btop(va - KERNBASE3X);
2176 eidx = m68k_btop(va + len - KERNBASE3X);
2177
2178 while (idx < eidx) {
2179 kernCbase[idx++].attr.raw = MMU_DT_INVALID;
2180 TBIS(va);
2181 va += PAGE_SIZE;
2182 }
2183 }
2184
2185 /* pmap_map INTERNAL
2186 **
2187 * Map a contiguous range of physical memory into a contiguous range of
2188 * the kernel virtual address space.
2189 *
2190 * Used for device mappings and early mapping of the kernel text/data/bss.
2191 * Returns the first virtual address beyond the end of the range.
2192 */
2193 vaddr_t
pmap_map(vaddr_t va,paddr_t pa,paddr_t endpa,int prot)2194 pmap_map(vaddr_t va, paddr_t pa, paddr_t endpa, int prot)
2195 {
2196 int sz;
2197
2198 sz = endpa - pa;
2199 do {
2200 pmap_enter_kernel(va, pa, prot);
2201 va += PAGE_SIZE;
2202 pa += PAGE_SIZE;
2203 sz -= PAGE_SIZE;
2204 } while (sz > 0);
2205 pmap_update(pmap_kernel());
2206 return va;
2207 }
2208
2209 /* pmap_protect_kernel INTERNAL
2210 **
2211 * Apply the given protection code to a kernel address range.
2212 */
2213 static INLINE void
pmap_protect_kernel(vaddr_t startva,vaddr_t endva,vm_prot_t prot)2214 pmap_protect_kernel(vaddr_t startva, vaddr_t endva, vm_prot_t prot)
2215 {
2216 vaddr_t va;
2217 mmu_short_pte_t *pte;
2218
2219 pte = &kernCbase[(unsigned long) m68k_btop(startva - KERNBASE3X)];
2220 for (va = startva; va < endva; va += PAGE_SIZE, pte++) {
2221 if (MMU_VALID_DT(*pte)) {
2222 switch (prot) {
2223 case VM_PROT_ALL:
2224 break;
2225 case VM_PROT_EXECUTE:
2226 case VM_PROT_READ:
2227 case VM_PROT_READ|VM_PROT_EXECUTE:
2228 pte->attr.raw |= MMU_SHORT_PTE_WP;
2229 break;
2230 case VM_PROT_NONE:
2231 /* this is an alias for 'pmap_remove_kernel' */
2232 pmap_remove_pte(pte);
2233 break;
2234 default:
2235 break;
2236 }
2237 /*
2238 * since this is the kernel, immediately flush any cached
2239 * descriptors for this address.
2240 */
2241 TBIS(va);
2242 }
2243 }
2244 }
2245
2246 /* pmap_protect INTERFACE
2247 **
2248 * Apply the given protection to the given virtual address range within
2249 * the given map.
2250 *
2251 * It is ok for the protection applied to be stronger than what is
2252 * specified. We use this to our advantage when the given map has no
2253 * mapping for the virtual address. By skipping a page when this
2254 * is discovered, we are effectively applying a protection of VM_PROT_NONE,
2255 * and therefore do not need to map the page just to apply a protection
2256 * code. Only pmap_enter() needs to create new mappings if they do not exist.
2257 *
2258 * XXX - This function could be speeded up by using pmap_stroll() for initial
2259 * setup, and then manual scrolling in the for() loop.
2260 */
2261 void
pmap_protect(pmap_t pmap,vaddr_t startva,vaddr_t endva,vm_prot_t prot)2262 pmap_protect(pmap_t pmap, vaddr_t startva, vaddr_t endva, vm_prot_t prot)
2263 {
2264 bool iscurpmap;
2265 int a_idx, b_idx, c_idx;
2266 a_tmgr_t *a_tbl;
2267 b_tmgr_t *b_tbl;
2268 c_tmgr_t *c_tbl;
2269 mmu_short_pte_t *pte;
2270
2271 if (pmap == pmap_kernel()) {
2272 pmap_protect_kernel(startva, endva, prot);
2273 return;
2274 }
2275
2276 /*
2277 * In this particular pmap implementation, there are only three
2278 * types of memory protection: 'all' (read/write/execute),
2279 * 'read-only' (read/execute) and 'none' (no mapping.)
2280 * It is not possible for us to treat 'executable' as a separate
2281 * protection type. Therefore, protection requests that seek to
2282 * remove execute permission while retaining read or write, and those
2283 * that make little sense (write-only for example) are ignored.
2284 */
2285 switch (prot) {
2286 case VM_PROT_NONE:
2287 /*
2288 * A request to apply the protection code of
2289 * 'VM_PROT_NONE' is a synonym for pmap_remove().
2290 */
2291 pmap_remove(pmap, startva, endva);
2292 return;
2293 case VM_PROT_EXECUTE:
2294 case VM_PROT_READ:
2295 case VM_PROT_READ|VM_PROT_EXECUTE:
2296 /* continue */
2297 break;
2298 case VM_PROT_WRITE:
2299 case VM_PROT_WRITE|VM_PROT_READ:
2300 case VM_PROT_WRITE|VM_PROT_EXECUTE:
2301 case VM_PROT_ALL:
2302 /* None of these should happen in a sane system. */
2303 return;
2304 }
2305
2306 /*
2307 * If the pmap has no A table, it has no mappings and therefore
2308 * there is nothing to protect.
2309 */
2310 if ((a_tbl = pmap->pm_a_tmgr) == NULL)
2311 return;
2312
2313 a_idx = MMU_TIA(startva);
2314 b_idx = MMU_TIB(startva);
2315 c_idx = MMU_TIC(startva);
2316 b_tbl = NULL;
2317 c_tbl = NULL;
2318
2319 iscurpmap = (pmap == current_pmap());
2320 while (startva < endva) {
2321 if (b_tbl || MMU_VALID_DT(a_tbl->at_dtbl[a_idx])) {
2322 if (b_tbl == NULL) {
2323 b_tbl = (b_tmgr_t *) a_tbl->at_dtbl[a_idx].addr.raw;
2324 b_tbl = mmu_ptov((vaddr_t)b_tbl);
2325 b_tbl = mmuB2tmgr((mmu_short_dte_t *)b_tbl);
2326 }
2327 if (c_tbl || MMU_VALID_DT(b_tbl->bt_dtbl[b_idx])) {
2328 if (c_tbl == NULL) {
2329 c_tbl = (c_tmgr_t *) MMU_DTE_PA(b_tbl->bt_dtbl[b_idx]);
2330 c_tbl = mmu_ptov((vaddr_t)c_tbl);
2331 c_tbl = mmuC2tmgr((mmu_short_pte_t *)c_tbl);
2332 }
2333 if (MMU_VALID_DT(c_tbl->ct_dtbl[c_idx])) {
2334 pte = &c_tbl->ct_dtbl[c_idx];
2335 /* make the mapping read-only */
2336 pte->attr.raw |= MMU_SHORT_PTE_WP;
2337 /*
2338 * If we just modified the current address space,
2339 * flush any translations for the modified page from
2340 * the translation cache and any data from it in the
2341 * data cache.
2342 */
2343 if (iscurpmap)
2344 TBIS(startva);
2345 }
2346 startva += PAGE_SIZE;
2347
2348 if (++c_idx >= MMU_C_TBL_SIZE) { /* exceeded C table? */
2349 c_tbl = NULL;
2350 c_idx = 0;
2351 if (++b_idx >= MMU_B_TBL_SIZE) { /* exceeded B table? */
2352 b_tbl = NULL;
2353 b_idx = 0;
2354 }
2355 }
2356 } else { /* C table wasn't valid */
2357 c_tbl = NULL;
2358 c_idx = 0;
2359 startva += MMU_TIB_RANGE;
2360 if (++b_idx >= MMU_B_TBL_SIZE) { /* exceeded B table? */
2361 b_tbl = NULL;
2362 b_idx = 0;
2363 }
2364 } /* C table */
2365 } else { /* B table wasn't valid */
2366 b_tbl = NULL;
2367 b_idx = 0;
2368 startva += MMU_TIA_RANGE;
2369 a_idx++;
2370 } /* B table */
2371 }
2372 }
2373
2374 /* pmap_unwire INTERFACE
2375 **
2376 * Clear the wired attribute of the specified page.
2377 *
2378 * This function is called from vm_fault.c to unwire
2379 * a mapping.
2380 */
2381 void
pmap_unwire(pmap_t pmap,vaddr_t va)2382 pmap_unwire(pmap_t pmap, vaddr_t va)
2383 {
2384 int a_idx, b_idx, c_idx;
2385 a_tmgr_t *a_tbl;
2386 b_tmgr_t *b_tbl;
2387 c_tmgr_t *c_tbl;
2388 mmu_short_pte_t *pte;
2389
2390 /* Kernel mappings always remain wired. */
2391 if (pmap == pmap_kernel())
2392 return;
2393
2394 /*
2395 * Walk through the tables. If the walk terminates without
2396 * a valid PTE then the address wasn't wired in the first place.
2397 * Return immediately.
2398 */
2399 if (pmap_stroll(pmap, va, &a_tbl, &b_tbl, &c_tbl, &pte, &a_idx,
2400 &b_idx, &c_idx) == false)
2401 return;
2402
2403
2404 /* Is the PTE wired? If not, return. */
2405 if (!(pte->attr.raw & MMU_SHORT_PTE_WIRED))
2406 return;
2407
2408 /* Remove the wiring bit. */
2409 pte->attr.raw &= ~(MMU_SHORT_PTE_WIRED);
2410
2411 /*
2412 * Decrement the wired entry count in the C table.
2413 * If it reaches zero the following things happen:
2414 * 1. The table no longer has any wired entries and is considered
2415 * unwired.
2416 * 2. It is placed on the available queue.
2417 * 3. The parent table's wired entry count is decremented.
2418 * 4. If it reaches zero, this process repeats at step 1 and
2419 * stops at after reaching the A table.
2420 */
2421 if (--c_tbl->ct_wcnt == 0) {
2422 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
2423 if (--b_tbl->bt_wcnt == 0) {
2424 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
2425 if (--a_tbl->at_wcnt == 0) {
2426 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
2427 }
2428 }
2429 }
2430 }
2431
2432 /* pmap_copy INTERFACE
2433 **
2434 * Copy the mappings of a range of addresses in one pmap, into
2435 * the destination address of another.
2436 *
2437 * This routine is advisory. Should we one day decide that MMU tables
2438 * may be shared by more than one pmap, this function should be used to
2439 * link them together. Until that day however, we do nothing.
2440 */
2441 void
pmap_copy(pmap_t pmap_a,pmap_t pmap_b,vaddr_t dst,vsize_t len,vaddr_t src)2442 pmap_copy(pmap_t pmap_a, pmap_t pmap_b, vaddr_t dst, vsize_t len, vaddr_t src)
2443 {
2444
2445 /* not implemented. */
2446 }
2447
2448 /* pmap_copy_page INTERFACE
2449 **
2450 * Copy the contents of one physical page into another.
2451 *
2452 * This function makes use of two virtual pages allocated in pmap_bootstrap()
2453 * to map the two specified physical pages into the kernel address space.
2454 *
2455 * Note: We could use the transparent translation registers to make the
2456 * mappings. If we do so, be sure to disable interrupts before using them.
2457 */
2458 void
pmap_copy_page(paddr_t srcpa,paddr_t dstpa)2459 pmap_copy_page(paddr_t srcpa, paddr_t dstpa)
2460 {
2461 vaddr_t srcva, dstva;
2462 int s;
2463
2464 srcva = tmp_vpages[0];
2465 dstva = tmp_vpages[1];
2466
2467 s = splvm();
2468 #ifdef DIAGNOSTIC
2469 if (tmp_vpages_inuse++)
2470 panic("pmap_copy_page: temporary vpages are in use.");
2471 #endif
2472
2473 /* Map pages as non-cacheable to avoid cache polution? */
2474 pmap_kenter_pa(srcva, srcpa, VM_PROT_READ, 0);
2475 pmap_kenter_pa(dstva, dstpa, VM_PROT_READ | VM_PROT_WRITE, 0);
2476
2477 /* Hand-optimized version of memcpy(dst, src, PAGE_SIZE) */
2478 copypage((char *)srcva, (char *)dstva);
2479
2480 pmap_kremove(srcva, PAGE_SIZE);
2481 pmap_kremove(dstva, PAGE_SIZE);
2482
2483 #ifdef DIAGNOSTIC
2484 --tmp_vpages_inuse;
2485 #endif
2486 splx(s);
2487 }
2488
2489 /* pmap_zero_page INTERFACE
2490 **
2491 * Zero the contents of the specified physical page.
2492 *
2493 * Uses one of the virtual pages allocated in pmap_bootstrap()
2494 * to map the specified page into the kernel address space.
2495 */
2496 void
pmap_zero_page(paddr_t dstpa)2497 pmap_zero_page(paddr_t dstpa)
2498 {
2499 vaddr_t dstva;
2500 int s;
2501
2502 dstva = tmp_vpages[1];
2503 s = splvm();
2504 #ifdef DIAGNOSTIC
2505 if (tmp_vpages_inuse++)
2506 panic("pmap_zero_page: temporary vpages are in use.");
2507 #endif
2508
2509 /* The comments in pmap_copy_page() above apply here also. */
2510 pmap_kenter_pa(dstva, dstpa, VM_PROT_READ | VM_PROT_WRITE, 0);
2511
2512 /* Hand-optimized version of memset(ptr, 0, PAGE_SIZE) */
2513 zeropage((char *)dstva);
2514
2515 pmap_kremove(dstva, PAGE_SIZE);
2516 #ifdef DIAGNOSTIC
2517 --tmp_vpages_inuse;
2518 #endif
2519 splx(s);
2520 }
2521
2522 /* pmap_pinit INTERNAL
2523 **
2524 * Initialize a pmap structure.
2525 */
2526 static INLINE void
pmap_pinit(pmap_t pmap)2527 pmap_pinit(pmap_t pmap)
2528 {
2529
2530 memset(pmap, 0, sizeof(struct pmap));
2531 pmap->pm_a_tmgr = NULL;
2532 pmap->pm_a_phys = kernAphys;
2533 pmap->pm_refcount = 1;
2534 }
2535
2536 /* pmap_create INTERFACE
2537 **
2538 * Create and return a pmap structure.
2539 */
2540 pmap_t
pmap_create(void)2541 pmap_create(void)
2542 {
2543 pmap_t pmap;
2544
2545 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
2546 pmap_pinit(pmap);
2547 return pmap;
2548 }
2549
2550 /* pmap_release INTERNAL
2551 **
2552 * Release any resources held by the given pmap.
2553 *
2554 * This is the reverse analog to pmap_pinit. It does not
2555 * necessarily mean for the pmap structure to be deallocated,
2556 * as in pmap_destroy.
2557 */
2558 static INLINE void
pmap_release(pmap_t pmap)2559 pmap_release(pmap_t pmap)
2560 {
2561
2562 /*
2563 * As long as the pmap contains no mappings,
2564 * which always should be the case whenever
2565 * this function is called, there really should
2566 * be nothing to do.
2567 */
2568 #ifdef PMAP_DEBUG
2569 if (pmap == pmap_kernel())
2570 panic("pmap_release: kernel pmap");
2571 #endif
2572 /*
2573 * XXX - If this pmap has an A table, give it back.
2574 * The pmap SHOULD be empty by now, and pmap_remove
2575 * should have already given back the A table...
2576 * However, I see: pmap->pm_a_tmgr->at_ecnt == 1
2577 * at this point, which means some mapping was not
2578 * removed when it should have been. -gwr
2579 */
2580 if (pmap->pm_a_tmgr != NULL) {
2581 /* First make sure we are not using it! */
2582 if (kernel_crp.rp_addr == pmap->pm_a_phys) {
2583 kernel_crp.rp_addr = kernAphys;
2584 loadcrp(&kernel_crp);
2585 }
2586 #ifdef PMAP_DEBUG /* XXX - todo! */
2587 /* XXX - Now complain... */
2588 printf("pmap_release: still have table\n");
2589 Debugger();
2590 #endif
2591 free_a_table(pmap->pm_a_tmgr, true);
2592 pmap->pm_a_tmgr = NULL;
2593 pmap->pm_a_phys = kernAphys;
2594 }
2595 }
2596
2597 /* pmap_reference INTERFACE
2598 **
2599 * Increment the reference count of a pmap.
2600 */
2601 void
pmap_reference(pmap_t pmap)2602 pmap_reference(pmap_t pmap)
2603 {
2604
2605 atomic_inc_uint(&pmap->pm_refcount);
2606 }
2607
2608 /* pmap_dereference INTERNAL
2609 **
2610 * Decrease the reference count on the given pmap
2611 * by one and return the current count.
2612 */
2613 static INLINE int
pmap_dereference(pmap_t pmap)2614 pmap_dereference(pmap_t pmap)
2615 {
2616 int rtn;
2617
2618 rtn = atomic_dec_uint_nv(&pmap->pm_refcount);
2619
2620 return rtn;
2621 }
2622
2623 /* pmap_destroy INTERFACE
2624 **
2625 * Decrement a pmap's reference count and delete
2626 * the pmap if it becomes zero. Will be called
2627 * only after all mappings have been removed.
2628 */
2629 void
pmap_destroy(pmap_t pmap)2630 pmap_destroy(pmap_t pmap)
2631 {
2632
2633 if (pmap_dereference(pmap) == 0) {
2634 pmap_release(pmap);
2635 pool_put(&pmap_pmap_pool, pmap);
2636 }
2637 }
2638
2639 /* pmap_is_referenced INTERFACE
2640 **
2641 * Determine if the given physical page has been
2642 * referenced (read from [or written to.])
2643 */
2644 bool
pmap_is_referenced(struct vm_page * pg)2645 pmap_is_referenced(struct vm_page *pg)
2646 {
2647 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2648 pv_t *pv;
2649 int idx;
2650
2651 /*
2652 * Check the flags on the pv head. If they are set,
2653 * return immediately. Otherwise a search must be done.
2654 */
2655
2656 pv = pa2pv(pa);
2657 if (pv->pv_flags & PV_FLAGS_USED)
2658 return true;
2659
2660 /*
2661 * Search through all pv elements pointing
2662 * to this page and query their reference bits
2663 */
2664
2665 for (idx = pv->pv_idx; idx != PVE_EOL; idx = pvebase[idx].pve_next) {
2666 if (MMU_PTE_USED(kernCbase[idx])) {
2667 return true;
2668 }
2669 }
2670 return false;
2671 }
2672
2673 /* pmap_is_modified INTERFACE
2674 **
2675 * Determine if the given physical page has been
2676 * modified (written to.)
2677 */
2678 bool
pmap_is_modified(struct vm_page * pg)2679 pmap_is_modified(struct vm_page *pg)
2680 {
2681 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2682 pv_t *pv;
2683 int idx;
2684
2685 /* see comments in pmap_is_referenced() */
2686 pv = pa2pv(pa);
2687 if (pv->pv_flags & PV_FLAGS_MDFY)
2688 return true;
2689
2690 for (idx = pv->pv_idx;
2691 idx != PVE_EOL;
2692 idx = pvebase[idx].pve_next) {
2693
2694 if (MMU_PTE_MODIFIED(kernCbase[idx])) {
2695 return true;
2696 }
2697 }
2698
2699 return false;
2700 }
2701
2702 /* pmap_page_protect INTERFACE
2703 **
2704 * Applies the given protection to all mappings to the given
2705 * physical page.
2706 */
2707 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)2708 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2709 {
2710 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2711 pv_t *pv;
2712 int idx;
2713 vaddr_t va;
2714 struct mmu_short_pte_struct *pte;
2715 c_tmgr_t *c_tbl;
2716 pmap_t pmap, curpmap;
2717
2718 curpmap = current_pmap();
2719 pv = pa2pv(pa);
2720
2721 for (idx = pv->pv_idx; idx != PVE_EOL; idx = pvebase[idx].pve_next) {
2722 pte = &kernCbase[idx];
2723 switch (prot) {
2724 case VM_PROT_ALL:
2725 /* do nothing */
2726 break;
2727 case VM_PROT_EXECUTE:
2728 case VM_PROT_READ:
2729 case VM_PROT_READ|VM_PROT_EXECUTE:
2730 /*
2731 * Determine the virtual address mapped by
2732 * the PTE and flush ATC entries if necessary.
2733 */
2734 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
2735 pte->attr.raw |= MMU_SHORT_PTE_WP;
2736 if (pmap == curpmap || pmap == pmap_kernel())
2737 TBIS(va);
2738 break;
2739 case VM_PROT_NONE:
2740 /* Save the mod/ref bits. */
2741 pv->pv_flags |= pte->attr.raw;
2742 /* Invalidate the PTE. */
2743 pte->attr.raw = MMU_DT_INVALID;
2744
2745 /*
2746 * Update table counts. And flush ATC entries
2747 * if necessary.
2748 */
2749 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
2750
2751 /*
2752 * If the PTE belongs to the kernel map,
2753 * be sure to flush the page it maps.
2754 */
2755 if (pmap == pmap_kernel()) {
2756 TBIS(va);
2757 } else {
2758 /*
2759 * The PTE belongs to a user map.
2760 * update the entry count in the C
2761 * table to which it belongs and flush
2762 * the ATC if the mapping belongs to
2763 * the current pmap.
2764 */
2765 c_tbl->ct_ecnt--;
2766 if (pmap == curpmap)
2767 TBIS(va);
2768 }
2769 break;
2770 default:
2771 break;
2772 }
2773 }
2774
2775 /*
2776 * If the protection code indicates that all mappings to the page
2777 * be removed, truncate the PV list to zero entries.
2778 */
2779 if (prot == VM_PROT_NONE)
2780 pv->pv_idx = PVE_EOL;
2781 }
2782
2783 /* pmap_get_pteinfo INTERNAL
2784 **
2785 * Called internally to find the pmap and virtual address within that
2786 * map to which the pte at the given index maps. Also includes the PTE's C
2787 * table manager.
2788 *
2789 * Returns the pmap in the argument provided, and the virtual address
2790 * by return value.
2791 */
2792 vaddr_t
pmap_get_pteinfo(u_int idx,pmap_t * pmap,c_tmgr_t ** tbl)2793 pmap_get_pteinfo(u_int idx, pmap_t *pmap, c_tmgr_t **tbl)
2794 {
2795 vaddr_t va = 0;
2796
2797 /*
2798 * Determine if the PTE is a kernel PTE or a user PTE.
2799 */
2800 if (idx >= NUM_KERN_PTES) {
2801 /*
2802 * The PTE belongs to a user mapping.
2803 */
2804 /* XXX: Would like an inline for this to validate idx... */
2805 *tbl = &Ctmgrbase[(idx - NUM_KERN_PTES) / MMU_C_TBL_SIZE];
2806
2807 *pmap = (*tbl)->ct_pmap;
2808 /*
2809 * To find the va to which the PTE maps, we first take
2810 * the table's base virtual address mapping which is stored
2811 * in ct_va. We then increment this address by a page for
2812 * every slot skipped until we reach the PTE.
2813 */
2814 va = (*tbl)->ct_va;
2815 va += m68k_ptob(idx % MMU_C_TBL_SIZE);
2816 } else {
2817 /*
2818 * The PTE belongs to the kernel map.
2819 */
2820 *pmap = pmap_kernel();
2821
2822 va = m68k_ptob(idx);
2823 va += KERNBASE3X;
2824 }
2825
2826 return va;
2827 }
2828
2829 /* pmap_clear_modify INTERFACE
2830 **
2831 * Clear the modification bit on the page at the specified
2832 * physical address.
2833 *
2834 */
2835 bool
pmap_clear_modify(struct vm_page * pg)2836 pmap_clear_modify(struct vm_page *pg)
2837 {
2838 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2839 bool rv;
2840
2841 rv = pmap_is_modified(pg);
2842 pmap_clear_pv(pa, PV_FLAGS_MDFY);
2843 return rv;
2844 }
2845
2846 /* pmap_clear_reference INTERFACE
2847 **
2848 * Clear the referenced bit on the page at the specified
2849 * physical address.
2850 */
2851 bool
pmap_clear_reference(struct vm_page * pg)2852 pmap_clear_reference(struct vm_page *pg)
2853 {
2854 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2855 bool rv;
2856
2857 rv = pmap_is_referenced(pg);
2858 pmap_clear_pv(pa, PV_FLAGS_USED);
2859 return rv;
2860 }
2861
2862 /* pmap_clear_pv INTERNAL
2863 **
2864 * Clears the specified flag from the specified physical address.
2865 * (Used by pmap_clear_modify() and pmap_clear_reference().)
2866 *
2867 * Flag is one of:
2868 * PV_FLAGS_MDFY - Page modified bit.
2869 * PV_FLAGS_USED - Page used (referenced) bit.
2870 *
2871 * This routine must not only clear the flag on the pv list
2872 * head. It must also clear the bit on every pte in the pv
2873 * list associated with the address.
2874 */
2875 void
pmap_clear_pv(paddr_t pa,int flag)2876 pmap_clear_pv(paddr_t pa, int flag)
2877 {
2878 pv_t *pv;
2879 int idx;
2880 vaddr_t va;
2881 pmap_t pmap;
2882 mmu_short_pte_t *pte;
2883 c_tmgr_t *c_tbl;
2884
2885 pv = pa2pv(pa);
2886 pv->pv_flags &= ~(flag);
2887 for (idx = pv->pv_idx; idx != PVE_EOL; idx = pvebase[idx].pve_next) {
2888 pte = &kernCbase[idx];
2889 pte->attr.raw &= ~(flag);
2890
2891 /*
2892 * The MC68030 MMU will not set the modified or
2893 * referenced bits on any MMU tables for which it has
2894 * a cached descriptor with its modify bit set. To insure
2895 * that it will modify these bits on the PTE during the next
2896 * time it is written to or read from, we must flush it from
2897 * the ATC.
2898 *
2899 * Ordinarily it is only necessary to flush the descriptor
2900 * if it is used in the current address space. But since I
2901 * am not sure that there will always be a notion of
2902 * 'the current address space' when this function is called,
2903 * I will skip the test and always flush the address. It
2904 * does no harm.
2905 */
2906
2907 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
2908 TBIS(va);
2909 }
2910 }
2911
2912 /* pmap_extract_kernel INTERNAL
2913 **
2914 * Extract a translation from the kernel address space.
2915 */
2916 static INLINE bool
pmap_extract_kernel(vaddr_t va,paddr_t * pap)2917 pmap_extract_kernel(vaddr_t va, paddr_t *pap)
2918 {
2919 mmu_short_pte_t *pte;
2920
2921 pte = &kernCbase[(u_int)m68k_btop(va - KERNBASE3X)];
2922 if (!MMU_VALID_DT(*pte))
2923 return false;
2924 if (pap != NULL)
2925 *pap = MMU_PTE_PA(*pte);
2926 return true;
2927 }
2928
2929 /* pmap_extract INTERFACE
2930 **
2931 * Return the physical address mapped by the virtual address
2932 * in the specified pmap.
2933 *
2934 * Note: this function should also apply an exclusive lock
2935 * on the pmap system during its duration.
2936 */
2937 bool
pmap_extract(pmap_t pmap,vaddr_t va,paddr_t * pap)2938 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2939 {
2940 int a_idx, b_idx, pte_idx;
2941 a_tmgr_t *a_tbl;
2942 b_tmgr_t *b_tbl;
2943 c_tmgr_t *c_tbl;
2944 mmu_short_pte_t *c_pte;
2945
2946 if (pmap == pmap_kernel())
2947 return pmap_extract_kernel(va, pap);
2948
2949 if (pmap_stroll(pmap, va, &a_tbl, &b_tbl, &c_tbl,
2950 &c_pte, &a_idx, &b_idx, &pte_idx) == false)
2951 return false;
2952
2953 if (!MMU_VALID_DT(*c_pte))
2954 return false;
2955
2956 if (pap != NULL)
2957 *pap = MMU_PTE_PA(*c_pte);
2958 return true;
2959 }
2960
2961 /* pmap_remove_kernel INTERNAL
2962 **
2963 * Remove the mapping of a range of virtual addresses from the kernel map.
2964 * The arguments are already page-aligned.
2965 */
2966 static INLINE void
pmap_remove_kernel(vaddr_t sva,vaddr_t eva)2967 pmap_remove_kernel(vaddr_t sva, vaddr_t eva)
2968 {
2969 int idx, eidx;
2970
2971 #ifdef PMAP_DEBUG
2972 if ((sva & PGOFSET) || (eva & PGOFSET))
2973 panic("pmap_remove_kernel: alignment");
2974 #endif
2975
2976 idx = m68k_btop(sva - KERNBASE3X);
2977 eidx = m68k_btop(eva - KERNBASE3X);
2978
2979 while (idx < eidx) {
2980 pmap_remove_pte(&kernCbase[idx++]);
2981 TBIS(sva);
2982 sva += PAGE_SIZE;
2983 }
2984 }
2985
2986 /* pmap_remove INTERFACE
2987 **
2988 * Remove the mapping of a range of virtual addresses from the given pmap.
2989 *
2990 */
2991 void
pmap_remove(pmap_t pmap,vaddr_t sva,vaddr_t eva)2992 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
2993 {
2994
2995 if (pmap == pmap_kernel()) {
2996 pmap_remove_kernel(sva, eva);
2997 return;
2998 }
2999
3000 /*
3001 * If the pmap doesn't have an A table of its own, it has no mappings
3002 * that can be removed.
3003 */
3004 if (pmap->pm_a_tmgr == NULL)
3005 return;
3006
3007 /*
3008 * Remove the specified range from the pmap. If the function
3009 * returns true, the operation removed all the valid mappings
3010 * in the pmap and freed its A table. If this happened to the
3011 * currently loaded pmap, the MMU root pointer must be reloaded
3012 * with the default 'kernel' map.
3013 */
3014 if (pmap_remove_a(pmap->pm_a_tmgr, sva, eva)) {
3015 if (kernel_crp.rp_addr == pmap->pm_a_phys) {
3016 kernel_crp.rp_addr = kernAphys;
3017 loadcrp(&kernel_crp);
3018 /* will do TLB flush below */
3019 }
3020 pmap->pm_a_tmgr = NULL;
3021 pmap->pm_a_phys = kernAphys;
3022 }
3023
3024 /*
3025 * If we just modified the current address space,
3026 * make sure to flush the MMU cache.
3027 *
3028 * XXX - this could be an unnecessarily large flush.
3029 * XXX - Could decide, based on the size of the VA range
3030 * to be removed, whether to flush "by pages" or "all".
3031 */
3032 if (pmap == current_pmap())
3033 TBIAU();
3034 }
3035
3036 /* pmap_remove_a INTERNAL
3037 **
3038 * This is function number one in a set of three that removes a range
3039 * of memory in the most efficient manner by removing the highest possible
3040 * tables from the memory space. This particular function attempts to remove
3041 * as many B tables as it can, delegating the remaining fragmented ranges to
3042 * pmap_remove_b().
3043 *
3044 * If the removal operation results in an empty A table, the function returns
3045 * true.
3046 *
3047 * It's ugly but will do for now.
3048 */
3049 bool
pmap_remove_a(a_tmgr_t * a_tbl,vaddr_t sva,vaddr_t eva)3050 pmap_remove_a(a_tmgr_t *a_tbl, vaddr_t sva, vaddr_t eva)
3051 {
3052 bool empty;
3053 int idx;
3054 vaddr_t nstart, nend;
3055 b_tmgr_t *b_tbl;
3056 mmu_long_dte_t *a_dte;
3057 mmu_short_dte_t *b_dte;
3058 uint8_t at_wired, bt_wired;
3059
3060 /*
3061 * The following code works with what I call a 'granularity
3062 * reduction algorithm'. A range of addresses will always have
3063 * the following properties, which are classified according to
3064 * how the range relates to the size of the current granularity
3065 * - an A table entry:
3066 *
3067 * 1 2 3 4
3068 * -+---+---+---+---+---+---+---+-
3069 * -+---+---+---+---+---+---+---+-
3070 *
3071 * A range will always start on a granularity boundary, illustrated
3072 * by '+' signs in the table above, or it will start at some point
3073 * in-between a granularity boundary, as illustrated by point 1.
3074 * The first step in removing a range of addresses is to remove the
3075 * range between 1 and 2, the nearest granularity boundary. This
3076 * job is handled by the section of code governed by the
3077 * 'if (start < nstart)' statement.
3078 *
3079 * A range will always encompass zero or more integral granules,
3080 * illustrated by points 2 and 3. Integral granules are easy to
3081 * remove. The removal of these granules is the second step, and
3082 * is handled by the code block 'if (nstart < nend)'.
3083 *
3084 * Lastly, a range will always end on a granularity boundary,
3085 * ill. by point 3, or it will fall just beyond one, ill. by point
3086 * 4. The last step involves removing this range and is handled by
3087 * the code block 'if (nend < end)'.
3088 */
3089 nstart = MMU_ROUND_UP_A(sva);
3090 nend = MMU_ROUND_A(eva);
3091
3092 at_wired = a_tbl->at_wcnt;
3093
3094 if (sva < nstart) {
3095 /*
3096 * This block is executed if the range starts between
3097 * a granularity boundary.
3098 *
3099 * First find the DTE which is responsible for mapping
3100 * the start of the range.
3101 */
3102 idx = MMU_TIA(sva);
3103 a_dte = &a_tbl->at_dtbl[idx];
3104
3105 /*
3106 * If the DTE is valid then delegate the removal of the sub
3107 * range to pmap_remove_b(), which can remove addresses at
3108 * a finer granularity.
3109 */
3110 if (MMU_VALID_DT(*a_dte)) {
3111 b_dte = mmu_ptov(a_dte->addr.raw);
3112 b_tbl = mmuB2tmgr(b_dte);
3113 bt_wired = b_tbl->bt_wcnt;
3114
3115 /*
3116 * The sub range to be removed starts at the start
3117 * of the full range we were asked to remove, and ends
3118 * at the greater of:
3119 * 1. The end of the full range, -or-
3120 * 2. The end of the full range, rounded down to the
3121 * nearest granularity boundary.
3122 */
3123 if (eva < nstart)
3124 empty = pmap_remove_b(b_tbl, sva, eva);
3125 else
3126 empty = pmap_remove_b(b_tbl, sva, nstart);
3127
3128 /*
3129 * If the child table no longer has wired entries,
3130 * decrement wired entry count.
3131 */
3132 if (bt_wired && b_tbl->bt_wcnt == 0)
3133 a_tbl->at_wcnt--;
3134
3135 /*
3136 * If the removal resulted in an empty B table,
3137 * invalidate the DTE that points to it and decrement
3138 * the valid entry count of the A table.
3139 */
3140 if (empty) {
3141 a_dte->attr.raw = MMU_DT_INVALID;
3142 a_tbl->at_ecnt--;
3143 }
3144 }
3145 /*
3146 * If the DTE is invalid, the address range is already non-
3147 * existent and can simply be skipped.
3148 */
3149 }
3150 if (nstart < nend) {
3151 /*
3152 * This block is executed if the range spans a whole number
3153 * multiple of granules (A table entries.)
3154 *
3155 * First find the DTE which is responsible for mapping
3156 * the start of the first granule involved.
3157 */
3158 idx = MMU_TIA(nstart);
3159 a_dte = &a_tbl->at_dtbl[idx];
3160
3161 /*
3162 * Remove entire sub-granules (B tables) one at a time,
3163 * until reaching the end of the range.
3164 */
3165 for (; nstart < nend; a_dte++, nstart += MMU_TIA_RANGE)
3166 if (MMU_VALID_DT(*a_dte)) {
3167 /*
3168 * Find the B table manager for the
3169 * entry and free it.
3170 */
3171 b_dte = mmu_ptov(a_dte->addr.raw);
3172 b_tbl = mmuB2tmgr(b_dte);
3173 bt_wired = b_tbl->bt_wcnt;
3174
3175 free_b_table(b_tbl, true);
3176
3177 /*
3178 * All child entries has been removed.
3179 * If there were any wired entries in it,
3180 * decrement wired entry count.
3181 */
3182 if (bt_wired)
3183 a_tbl->at_wcnt--;
3184
3185 /*
3186 * Invalidate the DTE that points to the
3187 * B table and decrement the valid entry
3188 * count of the A table.
3189 */
3190 a_dte->attr.raw = MMU_DT_INVALID;
3191 a_tbl->at_ecnt--;
3192 }
3193 }
3194 if (nend < eva) {
3195 /*
3196 * This block is executed if the range ends beyond a
3197 * granularity boundary.
3198 *
3199 * First find the DTE which is responsible for mapping
3200 * the start of the nearest (rounded down) granularity
3201 * boundary.
3202 */
3203 idx = MMU_TIA(nend);
3204 a_dte = &a_tbl->at_dtbl[idx];
3205
3206 /*
3207 * If the DTE is valid then delegate the removal of the sub
3208 * range to pmap_remove_b(), which can remove addresses at
3209 * a finer granularity.
3210 */
3211 if (MMU_VALID_DT(*a_dte)) {
3212 /*
3213 * Find the B table manager for the entry
3214 * and hand it to pmap_remove_b() along with
3215 * the sub range.
3216 */
3217 b_dte = mmu_ptov(a_dte->addr.raw);
3218 b_tbl = mmuB2tmgr(b_dte);
3219 bt_wired = b_tbl->bt_wcnt;
3220
3221 empty = pmap_remove_b(b_tbl, nend, eva);
3222
3223 /*
3224 * If the child table no longer has wired entries,
3225 * decrement wired entry count.
3226 */
3227 if (bt_wired && b_tbl->bt_wcnt == 0)
3228 a_tbl->at_wcnt--;
3229 /*
3230 * If the removal resulted in an empty B table,
3231 * invalidate the DTE that points to it and decrement
3232 * the valid entry count of the A table.
3233 */
3234 if (empty) {
3235 a_dte->attr.raw = MMU_DT_INVALID;
3236 a_tbl->at_ecnt--;
3237 }
3238 }
3239 }
3240
3241 /*
3242 * If there are no more entries in the A table, release it
3243 * back to the available pool and return true.
3244 */
3245 if (a_tbl->at_ecnt == 0) {
3246 KASSERT(a_tbl->at_wcnt == 0);
3247 a_tbl->at_parent = NULL;
3248 if (!at_wired)
3249 TAILQ_REMOVE(&a_pool, a_tbl, at_link);
3250 TAILQ_INSERT_HEAD(&a_pool, a_tbl, at_link);
3251 empty = true;
3252 } else {
3253 /*
3254 * If the table doesn't have wired entries any longer
3255 * but still has unwired entries, put it back into
3256 * the available queue.
3257 */
3258 if (at_wired && a_tbl->at_wcnt == 0)
3259 TAILQ_INSERT_TAIL(&a_pool, a_tbl, at_link);
3260 empty = false;
3261 }
3262
3263 return empty;
3264 }
3265
3266 /* pmap_remove_b INTERNAL
3267 **
3268 * Remove a range of addresses from an address space, trying to remove entire
3269 * C tables if possible.
3270 *
3271 * If the operation results in an empty B table, the function returns true.
3272 */
3273 bool
pmap_remove_b(b_tmgr_t * b_tbl,vaddr_t sva,vaddr_t eva)3274 pmap_remove_b(b_tmgr_t *b_tbl, vaddr_t sva, vaddr_t eva)
3275 {
3276 bool empty;
3277 int idx;
3278 vaddr_t nstart, nend, rstart;
3279 c_tmgr_t *c_tbl;
3280 mmu_short_dte_t *b_dte;
3281 mmu_short_pte_t *c_dte;
3282 uint8_t bt_wired, ct_wired;
3283
3284 nstart = MMU_ROUND_UP_B(sva);
3285 nend = MMU_ROUND_B(eva);
3286
3287 bt_wired = b_tbl->bt_wcnt;
3288
3289 if (sva < nstart) {
3290 idx = MMU_TIB(sva);
3291 b_dte = &b_tbl->bt_dtbl[idx];
3292 if (MMU_VALID_DT(*b_dte)) {
3293 c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
3294 c_tbl = mmuC2tmgr(c_dte);
3295 ct_wired = c_tbl->ct_wcnt;
3296
3297 if (eva < nstart)
3298 empty = pmap_remove_c(c_tbl, sva, eva);
3299 else
3300 empty = pmap_remove_c(c_tbl, sva, nstart);
3301
3302 /*
3303 * If the child table no longer has wired entries,
3304 * decrement wired entry count.
3305 */
3306 if (ct_wired && c_tbl->ct_wcnt == 0)
3307 b_tbl->bt_wcnt--;
3308
3309 if (empty) {
3310 b_dte->attr.raw = MMU_DT_INVALID;
3311 b_tbl->bt_ecnt--;
3312 }
3313 }
3314 }
3315 if (nstart < nend) {
3316 idx = MMU_TIB(nstart);
3317 b_dte = &b_tbl->bt_dtbl[idx];
3318 rstart = nstart;
3319 while (rstart < nend) {
3320 if (MMU_VALID_DT(*b_dte)) {
3321 c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
3322 c_tbl = mmuC2tmgr(c_dte);
3323 ct_wired = c_tbl->ct_wcnt;
3324
3325 free_c_table(c_tbl, true);
3326
3327 /*
3328 * All child entries has been removed.
3329 * If there were any wired entries in it,
3330 * decrement wired entry count.
3331 */
3332 if (ct_wired)
3333 b_tbl->bt_wcnt--;
3334
3335 b_dte->attr.raw = MMU_DT_INVALID;
3336 b_tbl->bt_ecnt--;
3337 }
3338 b_dte++;
3339 rstart += MMU_TIB_RANGE;
3340 }
3341 }
3342 if (nend < eva) {
3343 idx = MMU_TIB(nend);
3344 b_dte = &b_tbl->bt_dtbl[idx];
3345 if (MMU_VALID_DT(*b_dte)) {
3346 c_dte = mmu_ptov(MMU_DTE_PA(*b_dte));
3347 c_tbl = mmuC2tmgr(c_dte);
3348 ct_wired = c_tbl->ct_wcnt;
3349 empty = pmap_remove_c(c_tbl, nend, eva);
3350
3351 /*
3352 * If the child table no longer has wired entries,
3353 * decrement wired entry count.
3354 */
3355 if (ct_wired && c_tbl->ct_wcnt == 0)
3356 b_tbl->bt_wcnt--;
3357
3358 if (empty) {
3359 b_dte->attr.raw = MMU_DT_INVALID;
3360 b_tbl->bt_ecnt--;
3361 }
3362 }
3363 }
3364
3365 if (b_tbl->bt_ecnt == 0) {
3366 KASSERT(b_tbl->bt_wcnt == 0);
3367 b_tbl->bt_parent = NULL;
3368 if (!bt_wired)
3369 TAILQ_REMOVE(&b_pool, b_tbl, bt_link);
3370 TAILQ_INSERT_HEAD(&b_pool, b_tbl, bt_link);
3371 empty = true;
3372 } else {
3373 /*
3374 * If the table doesn't have wired entries any longer
3375 * but still has unwired entries, put it back into
3376 * the available queue.
3377 */
3378 if (bt_wired && b_tbl->bt_wcnt == 0)
3379 TAILQ_INSERT_TAIL(&b_pool, b_tbl, bt_link);
3380
3381 empty = false;
3382 }
3383
3384 return empty;
3385 }
3386
3387 /* pmap_remove_c INTERNAL
3388 **
3389 * Remove a range of addresses from the given C table.
3390 */
3391 bool
pmap_remove_c(c_tmgr_t * c_tbl,vaddr_t sva,vaddr_t eva)3392 pmap_remove_c(c_tmgr_t *c_tbl, vaddr_t sva, vaddr_t eva)
3393 {
3394 bool empty;
3395 int idx;
3396 mmu_short_pte_t *c_pte;
3397 uint8_t ct_wired;
3398
3399 ct_wired = c_tbl->ct_wcnt;
3400
3401 idx = MMU_TIC(sva);
3402 c_pte = &c_tbl->ct_dtbl[idx];
3403 for (; sva < eva; sva += MMU_PAGE_SIZE, c_pte++) {
3404 if (MMU_VALID_DT(*c_pte)) {
3405 if (c_pte->attr.raw & MMU_SHORT_PTE_WIRED)
3406 c_tbl->ct_wcnt--;
3407 pmap_remove_pte(c_pte);
3408 c_tbl->ct_ecnt--;
3409 }
3410 }
3411
3412 if (c_tbl->ct_ecnt == 0) {
3413 KASSERT(c_tbl->ct_wcnt == 0);
3414 c_tbl->ct_parent = NULL;
3415 if (!ct_wired)
3416 TAILQ_REMOVE(&c_pool, c_tbl, ct_link);
3417 TAILQ_INSERT_HEAD(&c_pool, c_tbl, ct_link);
3418 empty = true;
3419 } else {
3420 /*
3421 * If the table doesn't have wired entries any longer
3422 * but still has unwired entries, put it back into
3423 * the available queue.
3424 */
3425 if (ct_wired && c_tbl->ct_wcnt == 0)
3426 TAILQ_INSERT_TAIL(&c_pool, c_tbl, ct_link);
3427 empty = false;
3428 }
3429
3430 return empty;
3431 }
3432
3433 /* pmap_bootstrap_alloc INTERNAL
3434 **
3435 * Used internally for memory allocation at startup when malloc is not
3436 * available. This code will fail once it crosses the first memory
3437 * bank boundary on the 3/80. Hopefully by then however, the VM system
3438 * will be in charge of allocation.
3439 */
3440 void *
pmap_bootstrap_alloc(int size)3441 pmap_bootstrap_alloc(int size)
3442 {
3443 void *rtn;
3444
3445 #ifdef PMAP_DEBUG
3446 if (bootstrap_alloc_enabled == false) {
3447 mon_printf("pmap_bootstrap_alloc: disabled\n");
3448 sunmon_abort();
3449 }
3450 #endif
3451
3452 rtn = (void *) virtual_avail;
3453 virtual_avail += size;
3454
3455 #ifdef PMAP_DEBUG
3456 if (virtual_avail > virtual_contig_end) {
3457 mon_printf("pmap_bootstrap_alloc: out of mem\n");
3458 sunmon_abort();
3459 }
3460 #endif
3461
3462 return rtn;
3463 }
3464
3465 /* pmap_bootstrap_aalign INTERNAL
3466 **
3467 * Used to insure that the next call to pmap_bootstrap_alloc() will
3468 * return a chunk of memory aligned to the specified size.
3469 *
3470 * Note: This function will only support alignment sizes that are powers
3471 * of two.
3472 */
3473 void
pmap_bootstrap_aalign(int size)3474 pmap_bootstrap_aalign(int size)
3475 {
3476 int off;
3477
3478 off = virtual_avail & (size - 1);
3479 if (off) {
3480 (void)pmap_bootstrap_alloc(size - off);
3481 }
3482 }
3483
3484 /* pmap_pa_exists
3485 **
3486 * Used by the /dev/mem driver to see if a given PA is memory
3487 * that can be mapped. (The PA is not in a hole.)
3488 */
3489 int
pmap_pa_exists(paddr_t pa)3490 pmap_pa_exists(paddr_t pa)
3491 {
3492 int i;
3493
3494 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
3495 if ((pa >= avail_mem[i].pmem_start) &&
3496 (pa < avail_mem[i].pmem_end))
3497 return 1;
3498 if (avail_mem[i].pmem_next == NULL)
3499 break;
3500 }
3501 return 0;
3502 }
3503
3504 /* Called only from locore.s and pmap.c */
3505 void _pmap_switch(pmap_t pmap);
3506
3507 /*
3508 * _pmap_switch INTERNAL
3509 *
3510 * This is called by locore.s:cpu_switch() when it is
3511 * switching to a new process. Load new translations.
3512 * Note: done in-line by locore.s unless PMAP_DEBUG
3513 *
3514 * Note that we do NOT allocate a context here, but
3515 * share the "kernel only" context until we really
3516 * need our own context for user-space mappings in
3517 * pmap_enter_user(). [ s/context/mmu A table/ ]
3518 */
3519 void
_pmap_switch(pmap_t pmap)3520 _pmap_switch(pmap_t pmap)
3521 {
3522 u_long rootpa;
3523
3524 /*
3525 * Only do reload/flush if we have to.
3526 * Note that if the old and new process
3527 * were BOTH using the "null" context,
3528 * then this will NOT flush the TLB.
3529 */
3530 rootpa = pmap->pm_a_phys;
3531 if (kernel_crp.rp_addr != rootpa) {
3532 DPRINT(("pmap_activate(%p)\n", pmap));
3533 kernel_crp.rp_addr = rootpa;
3534 loadcrp(&kernel_crp);
3535 TBIAU();
3536 }
3537 }
3538
3539 /*
3540 * Exported version of pmap_activate(). This is called from the
3541 * machine-independent VM code when a process is given a new pmap.
3542 * If (p == curlwp) do like cpu_switch would do; otherwise just
3543 * take this as notification that the process has a new pmap.
3544 */
3545 void
pmap_activate(struct lwp * l)3546 pmap_activate(struct lwp *l)
3547 {
3548
3549 if (l->l_proc == curproc) {
3550 _pmap_switch(l->l_proc->p_vmspace->vm_map.pmap);
3551 }
3552 }
3553
3554 /*
3555 * pmap_deactivate INTERFACE
3556 **
3557 * This is called to deactivate the specified process's address space.
3558 */
3559 void
pmap_deactivate(struct lwp * l)3560 pmap_deactivate(struct lwp *l)
3561 {
3562
3563 /* Nothing to do. */
3564 }
3565
3566 /*
3567 * Fill in the sun3x-specific part of the kernel core header
3568 * for dumpsys(). (See machdep.c for the rest.)
3569 */
3570 void
pmap_kcore_hdr(struct sun3x_kcore_hdr * sh)3571 pmap_kcore_hdr(struct sun3x_kcore_hdr *sh)
3572 {
3573 u_long spa, len;
3574 int i;
3575
3576 sh->pg_frame = MMU_SHORT_PTE_BASEADDR;
3577 sh->pg_valid = MMU_DT_PAGE;
3578 sh->contig_end = virtual_contig_end;
3579 sh->kernCbase = (u_long)kernCbase;
3580 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
3581 spa = avail_mem[i].pmem_start;
3582 spa = m68k_trunc_page(spa);
3583 len = avail_mem[i].pmem_end - spa;
3584 len = m68k_round_page(len);
3585 sh->ram_segs[i].start = spa;
3586 sh->ram_segs[i].size = len;
3587 }
3588 }
3589
3590
3591 /* pmap_virtual_space INTERFACE
3592 **
3593 * Return the current available range of virtual addresses in the
3594 * arguments provided. Only really called once.
3595 */
3596 void
pmap_virtual_space(vaddr_t * vstart,vaddr_t * vend)3597 pmap_virtual_space(vaddr_t *vstart, vaddr_t *vend)
3598 {
3599
3600 *vstart = virtual_avail;
3601 *vend = virtual_end;
3602 }
3603
3604 /*
3605 * Provide memory to the VM system.
3606 *
3607 * Assume avail_start is always in the
3608 * first segment as pmap_bootstrap does.
3609 */
3610 static void
pmap_page_upload(void)3611 pmap_page_upload(void)
3612 {
3613 paddr_t a, b; /* memory range */
3614 int i;
3615
3616 /* Supply the memory in segments. */
3617 for (i = 0; i < SUN3X_NPHYS_RAM_SEGS; i++) {
3618 a = atop(avail_mem[i].pmem_start);
3619 b = atop(avail_mem[i].pmem_end);
3620 if (i == 0)
3621 a = atop(avail_start);
3622 if (avail_mem[i].pmem_end > avail_end)
3623 b = atop(avail_end);
3624
3625 uvm_page_physload(a, b, a, b, VM_FREELIST_DEFAULT);
3626
3627 if (avail_mem[i].pmem_next == NULL)
3628 break;
3629 }
3630 }
3631
3632 /* pmap_count INTERFACE
3633 **
3634 * Return the number of resident (valid) pages in the given pmap.
3635 *
3636 * Note: If this function is handed the kernel map, it will report
3637 * that it has no mappings. Hopefully the VM system won't ask for kernel
3638 * map statistics.
3639 */
3640 segsz_t
pmap_count(pmap_t pmap,int type)3641 pmap_count(pmap_t pmap, int type)
3642 {
3643 u_int count;
3644 int a_idx, b_idx;
3645 a_tmgr_t *a_tbl;
3646 b_tmgr_t *b_tbl;
3647 c_tmgr_t *c_tbl;
3648
3649 /*
3650 * If the pmap does not have its own A table manager, it has no
3651 * valid entries.
3652 */
3653 if (pmap->pm_a_tmgr == NULL)
3654 return 0;
3655
3656 a_tbl = pmap->pm_a_tmgr;
3657
3658 count = 0;
3659 for (a_idx = 0; a_idx < MMU_TIA(KERNBASE3X); a_idx++) {
3660 if (MMU_VALID_DT(a_tbl->at_dtbl[a_idx])) {
3661 b_tbl = mmuB2tmgr(mmu_ptov(a_tbl->at_dtbl[a_idx].addr.raw));
3662 for (b_idx = 0; b_idx < MMU_B_TBL_SIZE; b_idx++) {
3663 if (MMU_VALID_DT(b_tbl->bt_dtbl[b_idx])) {
3664 c_tbl = mmuC2tmgr(
3665 mmu_ptov(MMU_DTE_PA(b_tbl->bt_dtbl[b_idx])));
3666 if (type == 0)
3667 /*
3668 * A resident entry count has been requested.
3669 */
3670 count += c_tbl->ct_ecnt;
3671 else
3672 /*
3673 * A wired entry count has been requested.
3674 */
3675 count += c_tbl->ct_wcnt;
3676 }
3677 }
3678 }
3679 }
3680
3681 return count;
3682 }
3683
3684 /************************ SUN3 COMPATIBILITY ROUTINES ********************
3685 * The following routines are only used by DDB for tricky kernel text *
3686 * text operations in db_memrw.c. They are provided for sun3 *
3687 * compatibility. *
3688 *************************************************************************/
3689 /* get_pte INTERNAL
3690 **
3691 * Return the page descriptor the describes the kernel mapping
3692 * of the given virtual address.
3693 */
3694 extern u_long ptest_addr(u_long); /* XXX: locore.s */
3695 u_int
get_pte(vaddr_t va)3696 get_pte(vaddr_t va)
3697 {
3698 u_long pte_pa;
3699 mmu_short_pte_t *pte;
3700
3701 /* Get the physical address of the PTE */
3702 pte_pa = ptest_addr(va & ~PGOFSET);
3703
3704 /* Convert to a virtual address... */
3705 pte = (mmu_short_pte_t *) (KERNBASE3X + pte_pa);
3706
3707 /* Make sure it is in our level-C tables... */
3708 if ((pte < kernCbase) ||
3709 (pte >= &mmuCbase[NUM_USER_PTES]))
3710 return 0;
3711
3712 /* ... and just return its contents. */
3713 return (pte->attr.raw);
3714 }
3715
3716
3717 /* set_pte INTERNAL
3718 **
3719 * Set the page descriptor that describes the kernel mapping
3720 * of the given virtual address.
3721 */
3722 void
set_pte(vaddr_t va,u_int pte)3723 set_pte(vaddr_t va, u_int pte)
3724 {
3725 u_long idx;
3726
3727 if (va < KERNBASE3X)
3728 return;
3729
3730 idx = (unsigned long) m68k_btop(va - KERNBASE3X);
3731 kernCbase[idx].attr.raw = pte;
3732 TBIS(va);
3733 }
3734
3735 /*
3736 * Routine: pmap_procwr
3737 *
3738 * Function:
3739 * Synchronize caches corresponding to [addr, addr+len) in p.
3740 */
3741 void
pmap_procwr(struct proc * p,vaddr_t va,size_t len)3742 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
3743 {
3744
3745 (void)cachectl1(0x80000004, va, len, p);
3746 }
3747
3748
3749 #ifdef PMAP_DEBUG
3750 /************************** DEBUGGING ROUTINES **************************
3751 * The following routines are meant to be an aid to debugging the pmap *
3752 * system. They are callable from the DDB command line and should be *
3753 * prepared to be handed unstable or incomplete states of the system. *
3754 ************************************************************************/
3755
3756 /* pv_list
3757 **
3758 * List all pages found on the pv list for the given physical page.
3759 * To avoid endless loops, the listing will stop at the end of the list
3760 * or after 'n' entries - whichever comes first.
3761 */
3762 void
pv_list(paddr_t pa,int n)3763 pv_list(paddr_t pa, int n)
3764 {
3765 int idx;
3766 vaddr_t va;
3767 pv_t *pv;
3768 c_tmgr_t *c_tbl;
3769 pmap_t pmap;
3770
3771 pv = pa2pv(pa);
3772 idx = pv->pv_idx;
3773 for (; idx != PVE_EOL && n > 0; idx = pvebase[idx].pve_next, n--) {
3774 va = pmap_get_pteinfo(idx, &pmap, &c_tbl);
3775 printf("idx %d, pmap 0x%x, va 0x%x, c_tbl %x\n",
3776 idx, (u_int) pmap, (u_int) va, (u_int) c_tbl);
3777 }
3778 }
3779 #endif /* PMAP_DEBUG */
3780
3781 #ifdef NOT_YET
3782 /* and maybe not ever */
3783 /************************** LOW-LEVEL ROUTINES **************************
3784 * These routines will eventually be re-written into assembly and placed*
3785 * in locore.s. They are here now as stubs so that the pmap module can *
3786 * be linked as a standalone user program for testing. *
3787 ************************************************************************/
3788 /* flush_atc_crp INTERNAL
3789 **
3790 * Flush all page descriptors derived from the given CPU Root Pointer
3791 * (CRP), or 'A' table as it is known here, from the 68851's automatic
3792 * cache.
3793 */
3794 void
flush_atc_crp(int a_tbl)3795 flush_atc_crp(int a_tbl)
3796 {
3797 mmu_long_rp_t rp;
3798
3799 /* Create a temporary root table pointer that points to the
3800 * given A table.
3801 */
3802 rp.attr.raw = ~MMU_LONG_RP_LU;
3803 rp.addr.raw = (unsigned int) a_tbl;
3804
3805 mmu_pflushr(&rp);
3806 /* mmu_pflushr:
3807 * movel sp(4)@,a0
3808 * pflushr a0@
3809 * rts
3810 */
3811 }
3812 #endif /* NOT_YET */
3813