1 /* $NetBSD: e500_tlb.c,v 1.24 2022/05/31 08:43:15 andvar Exp $ */
2 /*-
3 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Raytheon BBN Technologies Corp and Defense Advanced Research Projects
8 * Agency and which was developed by Matt Thomas of 3am Software Foundry.
9 *
10 * This material is based upon work supported by the Defense Advanced Research
11 * Projects Agency and Space and Naval Warfare Systems Center, Pacific, under
12 * Contract No. N66001-09-C-2073.
13 * Approved for Public Release, Distribution Unlimited
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37 #define __PMAP_PRIVATE
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: e500_tlb.c,v 1.24 2022/05/31 08:43:15 andvar Exp $");
41
42 #ifdef _KERNEL_OPT
43 #include "opt_multiprocessor.h"
44 #include "opt_pmap.h"
45 #include "opt_ppcparam.h"
46 #endif
47
48 #include <sys/param.h>
49
50 #include <uvm/uvm_extern.h>
51
52 #include <powerpc/spr.h>
53 #include <powerpc/booke/spr.h>
54 #include <powerpc/booke/cpuvar.h>
55 #include <powerpc/booke/e500reg.h>
56 #include <powerpc/booke/e500var.h>
57 #include <powerpc/booke/pmap.h>
58
59 struct e500_tlb {
60 vaddr_t tlb_va;
61 uint32_t tlb_pte;
62 uint32_t tlb_asid;
63 vsize_t tlb_size;
64 };
65
66 struct e500_hwtlb {
67 uint32_t hwtlb_mas0;
68 uint32_t hwtlb_mas1;
69 uint32_t hwtlb_mas2;
70 uint32_t hwtlb_mas3;
71 };
72
73 struct e500_xtlb {
74 struct e500_tlb e_tlb;
75 struct e500_hwtlb e_hwtlb;
76 u_long e_refcnt;
77 };
78
79 static struct e500_tlb1 {
80 uint32_t tlb1_maxsize;
81 uint32_t tlb1_minsize;
82 u_int tlb1_numentries;
83 u_int tlb1_numfree;
84 u_int tlb1_freelist[32];
85 struct e500_xtlb tlb1_entries[32];
86 } e500_tlb1;
87
88 static inline register_t mftlb0cfg(void) __pure;
89 static inline register_t mftlb1cfg(void) __pure;
90
91 static inline register_t
mftlb0cfg(void)92 mftlb0cfg(void)
93 {
94 register_t tlb0cfg;
95 __asm("mfspr %0, %1" : "=r"(tlb0cfg) : "n"(SPR_TLB0CFG));
96 return tlb0cfg;
97 }
98
99 static inline register_t
mftlb1cfg(void)100 mftlb1cfg(void)
101 {
102 register_t tlb1cfg;
103 __asm("mfspr %0, %1" : "=r"(tlb1cfg) : "n"(SPR_TLB1CFG));
104 return tlb1cfg;
105 }
106
107 static struct e500_tlb
hwtlb_to_tlb(const struct e500_hwtlb hwtlb)108 hwtlb_to_tlb(const struct e500_hwtlb hwtlb)
109 {
110 struct e500_tlb tlb;
111 register_t prot_mask;
112 u_int prot_shift;
113
114 tlb.tlb_va = MAS2_EPN & hwtlb.hwtlb_mas2;
115 tlb.tlb_size = 1024 << (2 * MASX_TSIZE_GET(hwtlb.hwtlb_mas1));
116 tlb.tlb_asid = MASX_TID_GET(hwtlb.hwtlb_mas1);
117 tlb.tlb_pte = (hwtlb.hwtlb_mas2 & MAS2_WIMGE)
118 | (hwtlb.hwtlb_mas3 & MAS3_RPN);
119 if (hwtlb.hwtlb_mas1 & MAS1_TS) {
120 prot_mask = MAS3_UX|MAS3_UW|MAS3_UR;
121 prot_shift = PTE_RWX_SHIFT - 1;
122 } else {
123 prot_mask = MAS3_SX|MAS3_SW|MAS3_SR;
124 prot_shift = PTE_RWX_SHIFT;
125 }
126 tlb.tlb_pte |= (prot_mask & hwtlb.hwtlb_mas3) << prot_shift;
127 return tlb;
128 }
129
130 static inline struct e500_hwtlb
hwtlb_read(uint32_t mas0,u_int slot)131 hwtlb_read(uint32_t mas0, u_int slot)
132 {
133 struct e500_hwtlb hwtlb;
134 register_t tlbcfg;
135
136 if (__predict_true(mas0 == MAS0_TLBSEL_TLB0)) {
137 tlbcfg = mftlb0cfg();
138 } else if (mas0 == MAS0_TLBSEL_TLB1) {
139 tlbcfg = mftlb1cfg();
140 } else {
141 panic("%s:%d: unexpected MAS0 %#" PRIx32,
142 __func__, __LINE__, mas0);
143 }
144
145 /*
146 * ESEL is the way we want to look up.
147 * If tlbassoc is the same as tlbentries (like in TLB1) then the TLB is
148 * fully associative, the entire slot is placed into ESEL. If tlbassoc
149 * is less than the number of tlb entries, the slot is split in two
150 * fields. Since the TLB is M rows by N ways, the lowers bits are for
151 * row (MAS2[EPN]) and the upper for the way (MAS1[ESEL]).
152 */
153 const u_int tlbassoc = TLBCFG_ASSOC(tlbcfg);
154 const u_int tlbentries = TLBCFG_NENTRY(tlbcfg);
155 const u_int esel_shift =
156 __builtin_clz(tlbassoc) - __builtin_clz(tlbentries);
157
158 /*
159 * Disable interrupts since we don't want anyone else mucking with
160 * the MMU Assist registers
161 */
162 const register_t msr = wrtee(0);
163 const register_t saved_mas0 = mfspr(SPR_MAS0);
164 mtspr(SPR_MAS0, mas0 | MAS0_ESEL_MAKE(slot >> esel_shift));
165
166 if (__predict_true(tlbassoc > tlbentries))
167 mtspr(SPR_MAS2, slot << PAGE_SHIFT);
168
169 /*
170 * Now select the entry and grab its contents.
171 */
172 __asm volatile("tlbre");
173
174 hwtlb.hwtlb_mas0 = mfspr(SPR_MAS0);
175 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1);
176 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2);
177 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3);
178
179 mtspr(SPR_MAS0, saved_mas0);
180 wrtee(msr); /* restore interrupts */
181
182 return hwtlb;
183 }
184
185 static inline void
hwtlb_write(const struct e500_hwtlb hwtlb,bool needs_sync)186 hwtlb_write(const struct e500_hwtlb hwtlb, bool needs_sync)
187 {
188 const register_t msr = wrtee(0);
189 const uint32_t saved_mas0 = mfspr(SPR_MAS0);
190
191 /*
192 * Need to always write MAS0 and MAS1
193 */
194 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0);
195 mtspr(SPR_MAS1, hwtlb.hwtlb_mas1);
196
197 /*
198 * Only write the VPN/WIMGE if this is in TLB0 or if a valid mapping.
199 */
200 if ((hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB0
201 || (hwtlb.hwtlb_mas1 & MAS1_V)) {
202 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2);
203 }
204 /*
205 * Only need to write the RPN/prot if we are dealing with a valid
206 * mapping.
207 */
208 if (hwtlb.hwtlb_mas1 & MAS1_V) {
209 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3);
210 //mtspr(SPR_MAS7, 0);
211 }
212
213 #if 0
214 printf("%s->[%x,%x,%x,%x]\n",
215 __func__,
216 hwtlb.hwtlb_mas0, hwtlb.hwtlb_mas1,
217 hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3);
218 #endif
219 __asm volatile("tlbwe");
220 if (needs_sync) {
221 __asm volatile("tlbsync\n\tisync\n\tsync");
222 }
223
224 mtspr(SPR_MAS0, saved_mas0);
225 wrtee(msr);
226 }
227
228 static struct e500_hwtlb
tlb_to_hwtlb(const struct e500_tlb tlb)229 tlb_to_hwtlb(const struct e500_tlb tlb)
230 {
231 struct e500_hwtlb hwtlb;
232
233 KASSERT(trunc_page(tlb.tlb_va) == tlb.tlb_va);
234 KASSERT(tlb.tlb_size != 0);
235 KASSERT((tlb.tlb_size & (tlb.tlb_size - 1)) == 0);
236 const uint32_t prot_mask = tlb.tlb_pte & PTE_RWX_MASK;
237 if (__predict_true(tlb.tlb_size == PAGE_SIZE)) {
238 hwtlb.hwtlb_mas0 = 0;
239 hwtlb.hwtlb_mas1 = MAS1_V | MASX_TSIZE_MAKE(1);
240 /*
241 * A non-zero ASID means this is a user page so mark it as
242 * being in the user's address space.
243 */
244 if (tlb.tlb_asid) {
245 hwtlb.hwtlb_mas1 |= MAS1_TS
246 | MASX_TID_MAKE(tlb.tlb_asid);
247 hwtlb.hwtlb_mas3 = (prot_mask >> (PTE_RWX_SHIFT - 1))
248 | ((prot_mask & ~PTE_xX) >> PTE_RWX_SHIFT);
249 KASSERT(prot_mask & PTE_xR);
250 KASSERT(hwtlb.hwtlb_mas3 & MAS3_UR);
251 CTASSERT(MAS3_UR == (PTE_xR >> (PTE_RWX_SHIFT - 1)));
252 CTASSERT(MAS3_SR == (PTE_xR >> PTE_RWX_SHIFT));
253 } else {
254 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT;
255 }
256 if (tlb.tlb_pte & PTE_UNMODIFIED)
257 hwtlb.hwtlb_mas3 &= ~(MAS3_UW|MAS3_SW);
258 if (tlb.tlb_pte & PTE_UNSYNCED)
259 hwtlb.hwtlb_mas3 &= ~(MAS3_UX|MAS3_SX);
260 } else {
261 KASSERT(tlb.tlb_asid == 0);
262 KASSERT((tlb.tlb_size & 0xaaaaa7ff) == 0);
263 u_int cntlz = __builtin_clz(tlb.tlb_size);
264 KASSERT(cntlz & 1);
265 KASSERT(cntlz <= 19);
266 hwtlb.hwtlb_mas0 = MAS0_TLBSEL_TLB1;
267 /*
268 * TSIZE is defined (4^TSIZE) Kbytes except a TSIZE of 0 is not
269 * allowed. So 1K would be 0x00000400 giving 21 leading zero
270 * bits. Subtracting the leading number of zero bits from 21
271 * and dividing by 2 gives us the number that the MMU wants.
272 */
273 hwtlb.hwtlb_mas1 = MASX_TSIZE_MAKE(((31 - 10) - cntlz) / 2)
274 | MAS1_IPROT | MAS1_V;
275 hwtlb.hwtlb_mas3 = prot_mask >> PTE_RWX_SHIFT;
276 }
277 /* We are done with MAS1, on to MAS2 ... */
278 hwtlb.hwtlb_mas2 = tlb.tlb_va | (tlb.tlb_pte & PTE_WIMGE_MASK);
279 hwtlb.hwtlb_mas3 |= tlb.tlb_pte & PTE_RPN_MASK;
280
281 return hwtlb;
282 }
283
284 void *
e500_tlb1_fetch(size_t slot)285 e500_tlb1_fetch(size_t slot)
286 {
287 struct e500_tlb1 * const tlb1 = &e500_tlb1;
288
289 return &tlb1->tlb1_entries[slot].e_hwtlb;
290 }
291
292 void
e500_tlb1_sync(void)293 e500_tlb1_sync(void)
294 {
295 struct e500_tlb1 * const tlb1 = &e500_tlb1;
296 for (u_int slot = 1; slot < tlb1->tlb1_numentries; slot++) {
297 const struct e500_hwtlb * const new_hwtlb =
298 &tlb1->tlb1_entries[slot].e_hwtlb;
299 const struct e500_hwtlb old_hwtlb =
300 hwtlb_read(MAS0_TLBSEL_TLB1, slot);
301 #define CHANGED(n,o,f) ((n)->f != (o).f)
302 bool mas1_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas1);
303 bool mas2_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas2);
304 bool mas3_changed_p = CHANGED(new_hwtlb, old_hwtlb, hwtlb_mas3);
305 #undef CHANGED
306 bool new_valid_p = (new_hwtlb->hwtlb_mas1 & MAS1_V) != 0;
307 bool old_valid_p = (old_hwtlb.hwtlb_mas1 & MAS1_V) != 0;
308 if ((new_valid_p || old_valid_p)
309 && (mas1_changed_p
310 || (new_valid_p
311 && (mas2_changed_p || mas3_changed_p))))
312 hwtlb_write(*new_hwtlb, true);
313 }
314 }
315
316 static int
e500_alloc_tlb1_entry(void)317 e500_alloc_tlb1_entry(void)
318 {
319 struct e500_tlb1 * const tlb1 = &e500_tlb1;
320
321 if (tlb1->tlb1_numfree == 0)
322 return -1;
323 const u_int slot = tlb1->tlb1_freelist[--tlb1->tlb1_numfree];
324 KASSERT((tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas1 & MAS1_V) == 0);
325 tlb1->tlb1_entries[slot].e_hwtlb.hwtlb_mas0 =
326 MAS0_TLBSEL_TLB1 | __SHIFTIN(slot, MAS0_ESEL);
327 return (int)slot;
328 }
329
330 static void
e500_free_tlb1_entry(struct e500_xtlb * xtlb,u_int slot,bool needs_sync)331 e500_free_tlb1_entry(struct e500_xtlb *xtlb, u_int slot, bool needs_sync)
332 {
333 struct e500_tlb1 * const tlb1 = &e500_tlb1;
334 KASSERT(slot < tlb1->tlb1_numentries);
335 KASSERT(&tlb1->tlb1_entries[slot] == xtlb);
336
337 KASSERT(xtlb->e_hwtlb.hwtlb_mas0 == (MAS0_TLBSEL_TLB1|__SHIFTIN(slot, MAS0_ESEL)));
338 xtlb->e_hwtlb.hwtlb_mas1 &= ~(MAS1_V|MAS1_IPROT);
339 hwtlb_write(xtlb->e_hwtlb, needs_sync);
340
341 const register_t msr = wrtee(0);
342 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = slot;
343 wrtee(msr);
344 }
345
346 static tlb_asid_t
e500_tlb_get_asid(void)347 e500_tlb_get_asid(void)
348 {
349 return mfspr(SPR_PID0);
350 }
351
352 static void
e500_tlb_set_asid(tlb_asid_t asid)353 e500_tlb_set_asid(tlb_asid_t asid)
354 {
355 mtspr(SPR_PID0, asid);
356 }
357
358 static void
e500_tlb_invalidate_all(void)359 e500_tlb_invalidate_all(void)
360 {
361 /*
362 * This does a flash invalidate of all entries in TLB0.
363 * We don't touch TLB1 since we don't expect those to be volatile.
364 */
365 #if 1
366 __asm volatile("tlbivax\t0, %0" :: "b"(4)); /* INV_ALL */
367 __asm volatile("tlbsync\n\tisync\n\tsync");
368 #else
369 mtspr(SPR_MMUCSR0, MMUCSR0_TLB0_FI);
370 while (mfspr(SPR_MMUCSR0) != 0)
371 ;
372 #endif
373 }
374
375 static void
e500_tlb_invalidate_globals(void)376 e500_tlb_invalidate_globals(void)
377 {
378 #if defined(MULTIPROCESSOR)
379 e500_tlb_invalidate_all();
380 #else /* !MULTIPROCESSOR */
381 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
382 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
383 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
384 const vaddr_t kstack_lo = (uintptr_t)curlwp->l_addr;
385 const vaddr_t kstack_hi = kstack_lo + USPACE - 1;
386 const vaddr_t epn_kstack_lo = kstack_lo & (max_epn - 1);
387 const vaddr_t epn_kstack_hi = kstack_hi & (max_epn - 1);
388
389 const register_t msr = wrtee(0);
390 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
391 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0);
392 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
393 mtspr(SPR_MAS2, epn);
394 __asm volatile("tlbre");
395 uint32_t mas1 = mfspr(SPR_MAS1);
396
397 /*
398 * Make sure this is a valid kernel entry first.
399 */
400 if ((mas1 & (MAS1_V|MAS1_TID|MAS1_TS)) != MAS1_V)
401 continue;
402
403 /*
404 * We have a valid kernel TLB entry. But if it matches
405 * the stack we are currently running on, it would
406 * unwise to invalidate it. First see if the epn
407 * overlaps the stack. If it does then get the
408 * VA and see if it really is part of the stack.
409 */
410 if (epn_kstack_lo < epn_kstack_hi
411 ? (epn_kstack_lo <= epn && epn <= epn_kstack_hi)
412 : (epn <= epn_kstack_hi || epn_kstack_lo <= epn)) {
413 const uint32_t mas2_epn =
414 mfspr(SPR_MAS2) & MAS2_EPN;
415 if (kstack_lo <= mas2_epn
416 && mas2_epn <= kstack_hi)
417 continue;
418 }
419 mtspr(SPR_MAS1, mas1 ^ MAS1_V);
420 __asm volatile("tlbwe");
421 }
422 }
423 __asm volatile("isync\n\tsync");
424 wrtee(msr);
425 #endif /* MULTIPROCESSOR */
426 }
427
428 static void
e500_tlb_invalidate_asids(tlb_asid_t asid_lo,tlb_asid_t asid_hi)429 e500_tlb_invalidate_asids(tlb_asid_t asid_lo, tlb_asid_t asid_hi)
430 {
431 #if defined(MULTIPROCESSOR)
432 e500_tlb_invalidate_all();
433 #else /* !MULTIPROCESSOR */
434 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
435 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
436 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
437
438 asid_lo = __SHIFTIN(asid_lo, MAS1_TID);
439 asid_hi = __SHIFTIN(asid_hi, MAS1_TID);
440
441 const register_t msr = wrtee(0);
442 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
443 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0);
444 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
445 mtspr(SPR_MAS2, epn);
446 __asm volatile("tlbre");
447 const uint32_t mas1 = mfspr(SPR_MAS1);
448 /*
449 * If this is a valid entry for AS space 1 and
450 * its asid matches the constraints of the caller,
451 * clear its valid bit.
452 */
453 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)
454 && asid_lo <= (mas1 & MAS1_TID)
455 && (mas1 & MAS1_TID) <= asid_hi) {
456 mtspr(SPR_MAS1, mas1 ^ MAS1_V);
457 #if 0
458 printf("%s[%zu,%zu]->[%x]\n",
459 __func__, assoc, epn, mas1);
460 #endif
461 __asm volatile("tlbwe");
462 }
463 }
464 }
465 __asm volatile("isync\n\tsync");
466 wrtee(msr);
467 #endif /* MULTIPROCESSOR */
468 }
469
470 static u_int
e500_tlb_record_asids(u_long * bitmap,tlb_asid_t asid_max)471 e500_tlb_record_asids(u_long *bitmap, tlb_asid_t asid_max)
472 {
473 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
474 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
475 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
476 const size_t nbits = 8 * sizeof(bitmap[0]);
477 u_int found = 0;
478
479 const register_t msr = wrtee(0);
480 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
481 mtspr(SPR_MAS0, MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0);
482 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
483 mtspr(SPR_MAS2, epn);
484 __asm volatile("tlbre");
485 const uint32_t mas1 = mfspr(SPR_MAS1);
486 /*
487 * If this is a valid entry for AS space 1 and
488 * its asid matches the constraints of the caller,
489 * clear its valid bit.
490 */
491 if ((mas1 & (MAS1_V|MAS1_TS)) == (MAS1_V|MAS1_TS)) {
492 const uint32_t asid = MASX_TID_GET(mas1);
493 const u_int i = asid / nbits;
494 const u_long mask = 1UL << (asid & (nbits - 1));
495 if ((bitmap[i] & mask) == 0) {
496 bitmap[i] |= mask;
497 found++;
498 }
499 }
500 }
501 }
502 wrtee(msr);
503
504 return found;
505 }
506
507 static void
e500_tlb_invalidate_addr(vaddr_t va,tlb_asid_t asid)508 e500_tlb_invalidate_addr(vaddr_t va, tlb_asid_t asid)
509 {
510 KASSERT((va & PAGE_MASK) == 0);
511 /*
512 * Bits 60 & 61 have meaning
513 */
514 if (asid == KERNEL_PID) {
515 /*
516 * For data accesses, the context-synchronizing instruction
517 * before tlbwe or tlbivax ensures that all memory accesses
518 * due to preceding instructions have completed to a point
519 * at which they have reported all exceptions they will cause.
520 */
521 __asm volatile("isync");
522 }
523 __asm volatile("tlbivax\t0, %0" :: "b"(va));
524 __asm volatile("tlbsync");
525 __asm volatile("tlbsync"); /* Why? */
526 if (asid == KERNEL_PID) {
527 /*
528 * The context-synchronizing instruction after tlbwe or tlbivax
529 * ensures that subsequent accesses (data and instruction) use
530 * the updated value in any TLB entries affected.
531 */
532 __asm volatile("isync\n\tsync");
533 }
534 }
535
536 static bool
e500_tlb_update_addr(vaddr_t va,tlb_asid_t asid,pt_entry_t pte,bool insert)537 e500_tlb_update_addr(vaddr_t va, tlb_asid_t asid, pt_entry_t pte, bool insert)
538 {
539 #if defined(MULTIPROCESSOR)
540 e500_tlb_invalidate_addr(va, asid);
541 return true;
542 #else /* !MULTIPROCESSOR */
543 struct e500_hwtlb hwtlb = tlb_to_hwtlb(
544 (struct e500_tlb){ .tlb_va = va, .tlb_asid = asid,
545 .tlb_size = PAGE_SIZE, .tlb_pte = pte,});
546
547 register_t msr = wrtee(0);
548 mtspr(SPR_MAS6, asid ? __SHIFTIN(asid, MAS6_SPID0) | MAS6_SAS : 0);
549 __asm volatile("tlbsx 0, %0" :: "b"(va));
550 register_t mas1 = mfspr(SPR_MAS1);
551 if ((mas1 & MAS1_V) == 0) {
552 if (!insert) {
553 wrtee(msr);
554 #if 0
555 printf("%s(%#lx,%#x,%#x,%x)<no update>\n",
556 __func__, va, asid, pte, insert);
557 #endif
558 return false;
559 }
560 mas1 = hwtlb.hwtlb_mas1 | MAS1_V;
561 mtspr(SPR_MAS1, mas1);
562 }
563 mtspr(SPR_MAS2, hwtlb.hwtlb_mas2);
564 mtspr(SPR_MAS3, hwtlb.hwtlb_mas3);
565 //mtspr(SPR_MAS7, 0);
566 __asm volatile("tlbwe");
567 if (asid == KERNEL_PID)
568 __asm volatile("isync\n\tsync");
569 wrtee(msr);
570 #if 0
571 if (asid)
572 printf("%s(%#lx,%#x,%#x,%x)->[%x,%x,%x]\n",
573 __func__, va, asid, pte, insert,
574 hwtlb.hwtlb_mas1, hwtlb.hwtlb_mas2, hwtlb.hwtlb_mas3);
575 #endif
576 return (mas1 & MAS1_V) != 0;
577 #endif /* MULTIPROCESSOR */
578 }
579
580 static void
e500_tlb_write_entry(size_t index,const struct tlbmask * tlb)581 e500_tlb_write_entry(size_t index, const struct tlbmask *tlb)
582 {
583 }
584
585 static void
e500_tlb_read_entry(size_t index,struct tlbmask * tlb)586 e500_tlb_read_entry(size_t index, struct tlbmask *tlb)
587 {
588 }
589
590 static void
e500_tlb_dump(void (* pr)(const char *,...))591 e500_tlb_dump(void (*pr)(const char *, ...))
592 {
593 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
594 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
595 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
596 const uint32_t saved_mas0 = mfspr(SPR_MAS0);
597 size_t valid = 0;
598
599 if (pr == NULL)
600 pr = printf;
601
602 const register_t msr = wrtee(0);
603 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
604 struct e500_hwtlb hwtlb;
605 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0;
606 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0);
607 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
608 mtspr(SPR_MAS2, epn);
609 __asm volatile("tlbre");
610 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1);
611 /*
612 * If this is a valid entry for AS space 1 and
613 * its asid matches the constraints of the caller,
614 * clear its valid bit.
615 */
616 if (hwtlb.hwtlb_mas1 & MAS1_V) {
617 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2);
618 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3);
619 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb);
620 (*pr)("[%zu,%zu]->[%x,%x,%x]",
621 assoc, atop(epn),
622 hwtlb.hwtlb_mas1,
623 hwtlb.hwtlb_mas2,
624 hwtlb.hwtlb_mas3);
625 (*pr)(": VA=%#lx size=4KB asid=%u pte=%x",
626 tlb.tlb_va, tlb.tlb_asid, tlb.tlb_pte);
627 (*pr)(" (RPN=%#x,%s%s%s%s%s,%s%s%s%s%s)\n",
628 tlb.tlb_pte & PTE_RPN_MASK,
629 tlb.tlb_pte & PTE_xR ? "R" : "",
630 tlb.tlb_pte & PTE_xW ? "W" : "",
631 tlb.tlb_pte & PTE_UNMODIFIED ? "*" : "",
632 tlb.tlb_pte & PTE_xX ? "X" : "",
633 tlb.tlb_pte & PTE_UNSYNCED ? "*" : "",
634 tlb.tlb_pte & PTE_W ? "W" : "",
635 tlb.tlb_pte & PTE_I ? "I" : "",
636 tlb.tlb_pte & PTE_M ? "M" : "",
637 tlb.tlb_pte & PTE_G ? "G" : "",
638 tlb.tlb_pte & PTE_E ? "E" : "");
639 valid++;
640 }
641 }
642 }
643 mtspr(SPR_MAS0, saved_mas0);
644 wrtee(msr);
645 (*pr)("%s: %zu valid entries\n", __func__, valid);
646 }
647
648 static void
e500_tlb_walk(void * ctx,bool (* func)(void *,vaddr_t,uint32_t,uint32_t))649 e500_tlb_walk(void *ctx, bool (*func)(void *, vaddr_t, uint32_t, uint32_t))
650 {
651 const size_t tlbassoc = TLBCFG_ASSOC(mftlb0cfg());
652 const size_t tlbentries = TLBCFG_NENTRY(mftlb0cfg());
653 const size_t max_epn = (tlbentries / tlbassoc) << PAGE_SHIFT;
654 const uint32_t saved_mas0 = mfspr(SPR_MAS0);
655
656 const register_t msr = wrtee(0);
657 for (size_t assoc = 0; assoc < tlbassoc; assoc++) {
658 struct e500_hwtlb hwtlb;
659 hwtlb.hwtlb_mas0 = MAS0_ESEL_MAKE(assoc) | MAS0_TLBSEL_TLB0;
660 mtspr(SPR_MAS0, hwtlb.hwtlb_mas0);
661 for (size_t epn = 0; epn < max_epn; epn += PAGE_SIZE) {
662 mtspr(SPR_MAS2, epn);
663 __asm volatile("tlbre");
664 hwtlb.hwtlb_mas1 = mfspr(SPR_MAS1);
665 if (hwtlb.hwtlb_mas1 & MAS1_V) {
666 hwtlb.hwtlb_mas2 = mfspr(SPR_MAS2);
667 hwtlb.hwtlb_mas3 = mfspr(SPR_MAS3);
668 struct e500_tlb tlb = hwtlb_to_tlb(hwtlb);
669 if (!(*func)(ctx, tlb.tlb_va, tlb.tlb_asid,
670 tlb.tlb_pte))
671 break;
672 }
673 }
674 }
675 mtspr(SPR_MAS0, saved_mas0);
676 wrtee(msr);
677 }
678
679 static struct e500_xtlb *
e500_tlb_lookup_xtlb_pa(vaddr_t pa,u_int * slotp)680 e500_tlb_lookup_xtlb_pa(vaddr_t pa, u_int *slotp)
681 {
682 struct e500_tlb1 * const tlb1 = &e500_tlb1;
683 struct e500_xtlb *xtlb = tlb1->tlb1_entries;
684
685 /*
686 * See if we have a TLB entry for the pa.
687 */
688 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) {
689 psize_t mask = ~(xtlb->e_tlb.tlb_size - 1);
690 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V)
691 && ((pa ^ xtlb->e_tlb.tlb_pte) & mask) == 0) {
692 if (slotp != NULL)
693 *slotp = i;
694 return xtlb;
695 }
696 }
697
698 return NULL;
699 }
700
701 struct e500_xtlb *
e500_tlb_lookup_xtlb(vaddr_t va,u_int * slotp)702 e500_tlb_lookup_xtlb(vaddr_t va, u_int *slotp)
703 {
704 struct e500_tlb1 * const tlb1 = &e500_tlb1;
705 struct e500_xtlb *xtlb = tlb1->tlb1_entries;
706
707 /*
708 * See if we have a TLB entry for the va.
709 */
710 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) {
711 vsize_t mask = ~(xtlb->e_tlb.tlb_size - 1);
712 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V)
713 && ((va ^ xtlb->e_tlb.tlb_va) & mask) == 0) {
714 if (slotp != NULL)
715 *slotp = i;
716 return xtlb;
717 }
718 }
719
720 return NULL;
721 }
722
723 static struct e500_xtlb *
e500_tlb_lookup_xtlb2(vaddr_t va,vsize_t len)724 e500_tlb_lookup_xtlb2(vaddr_t va, vsize_t len)
725 {
726 struct e500_tlb1 * const tlb1 = &e500_tlb1;
727 struct e500_xtlb *xtlb = tlb1->tlb1_entries;
728
729 /*
730 * See if we have a TLB entry for the pa.
731 */
732 for (u_int i = 0; i < tlb1->tlb1_numentries; i++, xtlb++) {
733 vsize_t mask = ~(xtlb->e_tlb.tlb_size - 1);
734 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V)
735 && ((va ^ xtlb->e_tlb.tlb_va) & mask) == 0
736 && (((va + len - 1) ^ va) & mask) == 0) {
737 return xtlb;
738 }
739 }
740
741 return NULL;
742 }
743
744 static void *
e500_tlb_mapiodev(paddr_t pa,psize_t len,bool prefetchable)745 e500_tlb_mapiodev(paddr_t pa, psize_t len, bool prefetchable)
746 {
747 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb_pa(pa, NULL);
748
749 /*
750 * See if we have a TLB entry for the pa. If completely falls within
751 * mark the reference and return the pa. But only if the tlb entry
752 * is not cacheable.
753 */
754 if (xtlb
755 && (prefetchable
756 || (xtlb->e_tlb.tlb_pte & PTE_WIG) == (PTE_I|PTE_G))) {
757 xtlb->e_refcnt++;
758 return (void *) (xtlb->e_tlb.tlb_va
759 + pa - (xtlb->e_tlb.tlb_pte & PTE_RPN_MASK));
760 }
761 return NULL;
762 }
763
764 static void
e500_tlb_unmapiodev(vaddr_t va,vsize_t len)765 e500_tlb_unmapiodev(vaddr_t va, vsize_t len)
766 {
767 if (va < VM_MIN_KERNEL_ADDRESS || VM_MAX_KERNEL_ADDRESS <= va) {
768 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, NULL);
769 if (xtlb)
770 xtlb->e_refcnt--;
771 }
772 }
773
774 static int
e500_tlb_ioreserve(vaddr_t va,vsize_t len,pt_entry_t pte)775 e500_tlb_ioreserve(vaddr_t va, vsize_t len, pt_entry_t pte)
776 {
777 struct e500_tlb1 * const tlb1 = &e500_tlb1;
778 struct e500_xtlb *xtlb;
779
780 KASSERT(len & 0x55555000);
781 KASSERT((len & ~0x55555000) == 0);
782 KASSERT(len >= PAGE_SIZE);
783 KASSERT((len & (len - 1)) == 0);
784 KASSERT((va & (len - 1)) == 0);
785 KASSERT(((pte & PTE_RPN_MASK) & (len - 1)) == 0);
786
787 if ((xtlb = e500_tlb_lookup_xtlb2(va, len)) != NULL) {
788 psize_t mask __diagused = ~(xtlb->e_tlb.tlb_size - 1);
789 KASSERT(len <= xtlb->e_tlb.tlb_size);
790 KASSERT((pte & mask) == (xtlb->e_tlb.tlb_pte & mask));
791 xtlb->e_refcnt++;
792 return 0;
793 }
794
795 const int slot = e500_alloc_tlb1_entry();
796 if (slot < 0)
797 return ENOMEM;
798
799 xtlb = &tlb1->tlb1_entries[slot];
800 xtlb->e_tlb.tlb_va = va;
801 xtlb->e_tlb.tlb_size = len;
802 xtlb->e_tlb.tlb_pte = pte;
803 xtlb->e_tlb.tlb_asid = KERNEL_PID;
804
805 xtlb->e_hwtlb = tlb_to_hwtlb(xtlb->e_tlb);
806 xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(slot, MAS0_ESEL);
807 hwtlb_write(xtlb->e_hwtlb, true);
808
809 #if defined(MULTIPROCESSOR)
810 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC);
811 #endif
812
813 return 0;
814 }
815
816 static int
e500_tlb_iorelease(vaddr_t va)817 e500_tlb_iorelease(vaddr_t va)
818 {
819 u_int slot;
820 struct e500_xtlb * const xtlb = e500_tlb_lookup_xtlb(va, &slot);
821
822 if (xtlb == NULL)
823 return ENOENT;
824
825 if (xtlb->e_refcnt)
826 return EBUSY;
827
828 e500_free_tlb1_entry(xtlb, slot, true);
829
830 #if defined(MULTIPROCESSOR)
831 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC);
832 #endif
833
834 return 0;
835 }
836
837 static u_int
e500_tlbmemmap(paddr_t memstart,psize_t memsize,struct e500_tlb1 * tlb1)838 e500_tlbmemmap(paddr_t memstart, psize_t memsize, struct e500_tlb1 *tlb1)
839 {
840 u_int slotmask = 0;
841 u_int slots = 0, nextslot = 0;
842 KASSERT(tlb1->tlb1_numfree > 1);
843 KASSERT(((memstart + memsize - 1) & -memsize) == memstart);
844 for (paddr_t lastaddr = memstart; 0 < memsize; ) {
845 u_int cnt = __builtin_clz(memsize);
846 psize_t size = uimin(1UL << (31 - (cnt | 1)), tlb1->tlb1_maxsize);
847 slots += memsize / size;
848 if (slots > 4)
849 panic("%s: %d: can't map memory (%#lx) into TLB1: %s",
850 __func__, __LINE__, memsize, "too fragmented");
851 if (slots > tlb1->tlb1_numfree - 1)
852 panic("%s: %d: can't map memory (%#lx) into TLB1: %s",
853 __func__, __LINE__, memsize,
854 "insufficient TLB entries");
855 for (; nextslot < slots; nextslot++) {
856 const u_int freeslot = e500_alloc_tlb1_entry();
857 struct e500_xtlb * const xtlb =
858 &tlb1->tlb1_entries[freeslot];
859 xtlb->e_tlb.tlb_asid = KERNEL_PID;
860 xtlb->e_tlb.tlb_size = size;
861 xtlb->e_tlb.tlb_va = lastaddr;
862 xtlb->e_tlb.tlb_pte = lastaddr
863 | PTE_M | PTE_xX | PTE_xW | PTE_xR;
864 lastaddr += size;
865 memsize -= size;
866 slotmask |= 1 << (31 - freeslot); /* clz friendly */
867 }
868 }
869
870 #if defined(MULTIPROCESSOR)
871 cpu_send_ipi(IPI_DST_NOTME, IPI_TLB1SYNC);
872 #endif
873
874 return nextslot;
875 }
876
877 static const struct tlb_md_ops e500_tlb_ops = {
878 .md_tlb_get_asid = e500_tlb_get_asid,
879 .md_tlb_set_asid = e500_tlb_set_asid,
880 .md_tlb_invalidate_all = e500_tlb_invalidate_all,
881 .md_tlb_invalidate_globals = e500_tlb_invalidate_globals,
882 .md_tlb_invalidate_asids = e500_tlb_invalidate_asids,
883 .md_tlb_invalidate_addr = e500_tlb_invalidate_addr,
884 .md_tlb_update_addr = e500_tlb_update_addr,
885 .md_tlb_record_asids = e500_tlb_record_asids,
886 .md_tlb_write_entry = e500_tlb_write_entry,
887 .md_tlb_read_entry = e500_tlb_read_entry,
888 .md_tlb_dump = e500_tlb_dump,
889 .md_tlb_walk = e500_tlb_walk,
890 };
891
892 static const struct tlb_md_io_ops e500_tlb_io_ops = {
893 .md_tlb_mapiodev = e500_tlb_mapiodev,
894 .md_tlb_unmapiodev = e500_tlb_unmapiodev,
895 .md_tlb_ioreserve = e500_tlb_ioreserve,
896 .md_tlb_iorelease = e500_tlb_iorelease,
897 };
898
899 void
e500_tlb_init(vaddr_t endkernel,psize_t memsize)900 e500_tlb_init(vaddr_t endkernel, psize_t memsize)
901 {
902 struct e500_tlb1 * const tlb1 = &e500_tlb1;
903
904 #if 0
905 register_t mmucfg = mfspr(SPR_MMUCFG);
906 register_t mas4 = mfspr(SPR_MAS4);
907 #endif
908
909 const uint32_t tlb1cfg = mftlb1cfg();
910 tlb1->tlb1_numentries = TLBCFG_NENTRY(tlb1cfg);
911 KASSERT(tlb1->tlb1_numentries <= __arraycount(tlb1->tlb1_entries));
912 /*
913 * Limit maxsize to 1G since 4G isn't really useful to us.
914 */
915 tlb1->tlb1_minsize = 1024 << (2 * TLBCFG_MINSIZE(tlb1cfg));
916 tlb1->tlb1_maxsize = 1024 << (2 * uimin(10, TLBCFG_MAXSIZE(tlb1cfg)));
917
918 #ifdef VERBOSE_INITPPC
919 printf(" tlb1cfg=%#x numentries=%u minsize=%#xKB maxsize=%#xKB",
920 tlb1cfg, tlb1->tlb1_numentries, tlb1->tlb1_minsize >> 10,
921 tlb1->tlb1_maxsize >> 10);
922 #endif
923
924 /*
925 * Let's see what's in TLB1 and we need to invalidate any entry that
926 * would fit within the kernel's mapped address space.
927 */
928 psize_t memmapped = 0;
929 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) {
930 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i];
931
932 xtlb->e_hwtlb = hwtlb_read(MAS0_TLBSEL_TLB1, i);
933
934 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_V) == 0) {
935 tlb1->tlb1_freelist[tlb1->tlb1_numfree++] = i;
936 #ifdef VERBOSE_INITPPC
937 printf(" TLB1[%u]=<unused>", i);
938 #endif
939 continue;
940 }
941
942 xtlb->e_tlb = hwtlb_to_tlb(xtlb->e_hwtlb);
943 #ifdef VERBOSE_INITPPC
944 printf(" TLB1[%u]=<%#lx,%#lx,%#x,%#x>",
945 i, xtlb->e_tlb.tlb_va, xtlb->e_tlb.tlb_size,
946 xtlb->e_tlb.tlb_asid, xtlb->e_tlb.tlb_pte);
947 #endif
948 if ((VM_MIN_KERNEL_ADDRESS <= xtlb->e_tlb.tlb_va
949 && xtlb->e_tlb.tlb_va < VM_MAX_KERNEL_ADDRESS)
950 || (xtlb->e_tlb.tlb_va < VM_MIN_KERNEL_ADDRESS
951 && VM_MIN_KERNEL_ADDRESS <
952 xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size)) {
953 #ifdef VERBOSE_INITPPC
954 printf("free");
955 #endif
956 e500_free_tlb1_entry(xtlb, i, false);
957 #ifdef VERBOSE_INITPPC
958 printf("d");
959 #endif
960 continue;
961 }
962 if ((xtlb->e_hwtlb.hwtlb_mas1 & MAS1_IPROT) == 0) {
963 xtlb->e_hwtlb.hwtlb_mas1 |= MAS1_IPROT;
964 hwtlb_write(xtlb->e_hwtlb, false);
965 #ifdef VERBOSE_INITPPC
966 printf("+iprot");
967 #endif
968 }
969 if (xtlb->e_tlb.tlb_pte & PTE_I)
970 continue;
971
972 if (xtlb->e_tlb.tlb_va == 0
973 || xtlb->e_tlb.tlb_va + xtlb->e_tlb.tlb_size <= memsize) {
974 memmapped += xtlb->e_tlb.tlb_size;
975 /*
976 * Let make sure main memory is setup so it's memory
977 * coherent. For some reason u-boot doesn't set it up
978 * that way.
979 */
980 if ((xtlb->e_hwtlb.hwtlb_mas2 & MAS2_M) == 0) {
981 xtlb->e_hwtlb.hwtlb_mas2 |= MAS2_M;
982 hwtlb_write(xtlb->e_hwtlb, true);
983 }
984 }
985 }
986
987 cpu_md_ops.md_tlb_ops = &e500_tlb_ops;
988 cpu_md_ops.md_tlb_io_ops = &e500_tlb_io_ops;
989
990 if (__predict_false(memmapped < memsize)) {
991 /*
992 * Let's see how many TLB entries are needed to map memory.
993 */
994 u_int slotmask = e500_tlbmemmap(0, memsize, tlb1);
995
996 /*
997 * To map main memory into the TLB, we need to flush any
998 * existing entries from the TLB that overlap the virtual
999 * address space needed to map physical memory. That may
1000 * include the entries for the pages currently used by the
1001 * stack or that we are executing. So to avoid problems, we
1002 * are going to temporarily map the kernel and stack into AS 1,
1003 * switch to it, and clear out the TLB entries from AS 0,
1004 * install the new TLB entries to map memory, and then switch
1005 * back to AS 0 and free the temp entry used for AS1.
1006 */
1007 u_int b = __builtin_clz(endkernel);
1008
1009 /*
1010 * If the kernel doesn't end on a clean power of 2, we need
1011 * to round the size up (by decrementing the number of leading
1012 * zero bits). If the size isn't a power of 4KB, decrement
1013 * again to make it one.
1014 */
1015 if (endkernel & (endkernel - 1))
1016 b--;
1017 if ((b & 1) == 0)
1018 b--;
1019
1020 /*
1021 * Create a TLB1 mapping for the kernel in AS1.
1022 */
1023 const u_int kslot = e500_alloc_tlb1_entry();
1024 struct e500_xtlb * const kxtlb = &tlb1->tlb1_entries[kslot];
1025 kxtlb->e_tlb.tlb_va = 0;
1026 kxtlb->e_tlb.tlb_size = 1UL << (31 - b);
1027 kxtlb->e_tlb.tlb_pte = PTE_M|PTE_xR|PTE_xW|PTE_xX;
1028 kxtlb->e_tlb.tlb_asid = KERNEL_PID;
1029
1030 kxtlb->e_hwtlb = tlb_to_hwtlb(kxtlb->e_tlb);
1031 kxtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(kslot, MAS0_ESEL);
1032 kxtlb->e_hwtlb.hwtlb_mas1 |= MAS1_TS;
1033 hwtlb_write(kxtlb->e_hwtlb, true);
1034
1035 /*
1036 * Now that we have a TLB mapping in AS1 for the kernel and its
1037 * stack, we switch to AS1 to cleanup the TLB mappings for TLB0.
1038 */
1039 const register_t saved_msr = mfmsr();
1040 mtmsr(saved_msr | PSL_DS | PSL_IS);
1041 __asm volatile("isync");
1042
1043 /*
1044 *** Invalidate all the TLB0 entries.
1045 */
1046 e500_tlb_invalidate_all();
1047
1048 /*
1049 *** Now let's see if we have any entries in TLB1 that would
1050 *** overlap the ones we are about to install. If so, nuke 'em.
1051 */
1052 for (u_int i = 0; i < tlb1->tlb1_numentries; i++) {
1053 struct e500_xtlb * const xtlb = &tlb1->tlb1_entries[i];
1054 struct e500_hwtlb * const hwtlb = &xtlb->e_hwtlb;
1055 if ((hwtlb->hwtlb_mas1 & (MAS1_V|MAS1_TS)) == MAS1_V
1056 && (hwtlb->hwtlb_mas2 & MAS2_EPN) < memsize) {
1057 e500_free_tlb1_entry(xtlb, i, false);
1058 }
1059 }
1060
1061 /*
1062 *** Now we can add the TLB entries that will map physical
1063 *** memory. If bit 0 [MSB] in slotmask is set, then tlb
1064 *** entry 0 contains a mapping for physical memory...
1065 */
1066 struct e500_xtlb *entries = tlb1->tlb1_entries;
1067 while (slotmask != 0) {
1068 const u_int slot = __builtin_clz(slotmask);
1069 hwtlb_write(entries[slot].e_hwtlb, false);
1070 entries += slot + 1;
1071 slotmask <<= slot + 1;
1072 }
1073
1074 /*
1075 *** Synchronize the TLB and the instruction stream.
1076 */
1077 __asm volatile("tlbsync");
1078 __asm volatile("isync");
1079
1080 /*
1081 *** Switch back to AS 0.
1082 */
1083 mtmsr(saved_msr);
1084 __asm volatile("isync");
1085
1086 /*
1087 * Free the temporary TLB1 entry.
1088 */
1089 e500_free_tlb1_entry(kxtlb, kslot, true);
1090 }
1091
1092 /*
1093 * Finally set the MAS4 defaults.
1094 */
1095 mtspr(SPR_MAS4, MAS4_TSIZED_4KB | MAS4_MD);
1096
1097 /*
1098 * Invalidate all the TLB0 entries.
1099 */
1100 e500_tlb_invalidate_all();
1101 }
1102
1103 void
e500_tlb_minimize(vaddr_t endkernel)1104 e500_tlb_minimize(vaddr_t endkernel)
1105 {
1106 #ifdef PMAP_MINIMALTLB
1107 struct e500_tlb1 * const tlb1 = &e500_tlb1;
1108 extern uint32_t _fdata[];
1109
1110 u_int slot;
1111
1112 paddr_t boot_page = cpu_read_4(GUR_BPTR);
1113 if (boot_page & BPTR_EN) {
1114 /*
1115 * shift it to an address
1116 */
1117 boot_page = (boot_page & BPTR_BOOT_PAGE) << PAGE_SHIFT;
1118 pmap_kvptefill(boot_page, boot_page + NBPG,
1119 PTE_M | PTE_xR | PTE_xW | PTE_xX);
1120 }
1121
1122
1123 KASSERT(endkernel - (uintptr_t)_fdata < 0x400000);
1124 KASSERT((uintptr_t)_fdata == 0x400000);
1125
1126 struct e500_xtlb *xtlb = e500_tlb_lookup_xtlb(endkernel, &slot);
1127
1128 KASSERT(xtlb == e500_tlb_lookup_xtlb2(0, endkernel));
1129 const u_int tmp_slot = e500_alloc_tlb1_entry();
1130 KASSERT(tmp_slot != (u_int) -1);
1131
1132 struct e500_xtlb * const tmp_xtlb = &tlb1->tlb1_entries[tmp_slot];
1133 tmp_xtlb->e_tlb = xtlb->e_tlb;
1134 tmp_xtlb->e_hwtlb = tlb_to_hwtlb(tmp_xtlb->e_tlb);
1135 tmp_xtlb->e_hwtlb.hwtlb_mas1 |= MAS1_TS;
1136 KASSERT((tmp_xtlb->e_hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB1);
1137 tmp_xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(tmp_slot, MAS0_ESEL);
1138 hwtlb_write(tmp_xtlb->e_hwtlb, true);
1139
1140 const u_int text_slot = e500_alloc_tlb1_entry();
1141 KASSERT(text_slot != (u_int)-1);
1142 struct e500_xtlb * const text_xtlb = &tlb1->tlb1_entries[text_slot];
1143 text_xtlb->e_tlb.tlb_va = 0;
1144 text_xtlb->e_tlb.tlb_size = 0x400000;
1145 text_xtlb->e_tlb.tlb_pte = PTE_M | PTE_xR | PTE_xX | text_xtlb->e_tlb.tlb_va;
1146 text_xtlb->e_tlb.tlb_asid = 0;
1147 text_xtlb->e_hwtlb = tlb_to_hwtlb(text_xtlb->e_tlb);
1148 KASSERT((text_xtlb->e_hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB1);
1149 text_xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(text_slot, MAS0_ESEL);
1150
1151 const u_int data_slot = e500_alloc_tlb1_entry();
1152 KASSERT(data_slot != (u_int)-1);
1153 struct e500_xtlb * const data_xtlb = &tlb1->tlb1_entries[data_slot];
1154 data_xtlb->e_tlb.tlb_va = 0x400000;
1155 data_xtlb->e_tlb.tlb_size = 0x400000;
1156 data_xtlb->e_tlb.tlb_pte = PTE_M | PTE_xR | PTE_xW | data_xtlb->e_tlb.tlb_va;
1157 data_xtlb->e_tlb.tlb_asid = 0;
1158 data_xtlb->e_hwtlb = tlb_to_hwtlb(data_xtlb->e_tlb);
1159 KASSERT((data_xtlb->e_hwtlb.hwtlb_mas0 & MAS0_TLBSEL) == MAS0_TLBSEL_TLB1);
1160 data_xtlb->e_hwtlb.hwtlb_mas0 |= __SHIFTIN(data_slot, MAS0_ESEL);
1161
1162 const register_t msr = mfmsr();
1163 const register_t ts_msr = (msr | PSL_DS | PSL_IS) & ~PSL_EE;
1164
1165 __asm __volatile(
1166 "mtmsr %[ts_msr]" "\n\t"
1167 "sync" "\n\t"
1168 "isync"
1169 :: [ts_msr] "r" (ts_msr));
1170
1171 #if 0
1172 hwtlb_write(text_xtlb->e_hwtlb, false);
1173 hwtlb_write(data_xtlb->e_hwtlb, false);
1174 e500_free_tlb1_entry(xtlb, slot, true);
1175 #endif
1176
1177 __asm __volatile(
1178 "mtmsr %[msr]" "\n\t"
1179 "sync" "\n\t"
1180 "isync"
1181 :: [msr] "r" (msr));
1182
1183 e500_free_tlb1_entry(tmp_xtlb, tmp_slot, true);
1184 #endif /* PMAP_MINIMALTLB */
1185 }
1186