1 /*
2 * PL310 level 2 cache (non-architectural bag on the side)
3 *
4 * guaranteed to work incorrectly with default settings; must set Sharovr.
5 *
6 * clean & invalidate (wbinv) is buggy, so we work around erratum 588369
7 * by disabling write-back and cache line-fill before, and restoring after.
8 */
9 #include "u.h"
10 #include "../port/lib.h"
11 #include "mem.h"
12 #include "dat.h"
13 #include "fns.h"
14 #include "io.h"
15 #include "../port/error.h"
16 #include "arm.h"
17
18 #define NWAYS(l2p) ((l2p)->auxctl & Assoc16way? 16: 8)
19 #define L2P ((L2pl310 *)soc.l2cache)
20
21 enum {
22 L2size = 1024 * 1024, /* according to the tegra 2 manual */
23 Wayszgran = 16 * KiB, /* granularity of way sizes */
24 };
25
26 typedef struct L2pl310 L2pl310;
27 typedef struct Pl310op Pl310op;
28
29 struct Pl310op {
30 ulong pa;
31 ulong _pad;
32 ulong indexway;
33 ulong way;
34 };
35
36 struct L2pl310 {
37 ulong id;
38 ulong type;
39 uchar _pad0[0x100 - 0x8];
40 ulong ctl;
41 ulong auxctl;
42
43 uchar _pad1[0x730 - 0x108]; /* boring regs */
44 ulong sync;
45 uchar _pad2[0x740 - 0x734];
46 ulong r3p0sync; /* workaround for r3p0 bug */
47 uchar _pad3[0x770 - 0x744];
48 Pl310op inv; /* inv.indexway doesn't exist */
49 uchar _pad4[0x7b0 - 0x780];
50 Pl310op clean;
51 uchar _pad5[0x7f0 - 0x7c0];
52 Pl310op cleaninv;
53 uchar _pad6[0xc00 - 0x7d0];
54 ulong filtstart;
55 ulong filtend;
56 uchar _pad6[0xf40 - 0xc08];
57 ulong debug;
58 /* ... */
59 };
60
61 enum {
62 /* ctl bits */
63 L2enable = 1,
64
65 /* auxctl bits */
66 Ipref = 1<<29, /* prefetch enables */
67 Dpref = 1<<28,
68 Mbo = 1<<25,
69 Sharovr = 1<<22, /* shared attribute override (i.e., work right!) */
70 Parity = 1<<21,
71 Waycfgshift= 17,
72 Waycfgmask = (1<<3) - 1,
73 Assoc16way = 1<<16,
74 /*
75 * optim'n to 0 cache lines; must be enabled in a9(?!).
76 * set CpAClwr0line on all cpus 1st.
77 */
78 Fullline0= 1<<0,
79
80 /* debug bits */
81 Wt = 1<<1, /* write-through, not write-back */
82 Nolinefill= 1<<0,
83
84 Basecfg = Wt | Nolinefill,
85 };
86
87 static Lock l2lock;
88 static int disallowed; /* by user: *l2off= in plan9.ini */
89 static int l2ison;
90 static int bg_op_running;
91 static ulong waysmask;
92
93 static Cacheimpl l2cacheimpl;
94
95 static void
awaitbgop(void)96 awaitbgop(void)
97 {
98 while (bg_op_running)
99 ;
100 }
101
102 static void
getlock(void)103 getlock(void)
104 {
105 awaitbgop(); /* wait at normal PL first */
106 ilock(&l2lock);
107 awaitbgop(); /* wait under lock */
108 }
109
110 static void
l2pl310sync(void)111 l2pl310sync(void)
112 {
113 L2P->sync = 0;
114 coherence();
115 }
116
117 /* call this first to set sets/ways configuration */
118 void
l2pl310init(void)119 l2pl310init(void)
120 {
121 int waysz, nways;
122 ulong new;
123 L2pl310 *l2p = L2P;
124 static int configed;
125
126 if (getconf("*l2off") != nil) {
127 // iprint("l2 cache (pl310) disabled\n");
128 disallowed = 1;
129 return;
130 }
131 if (l2ison || configed)
132 return;
133 l2cache = &l2cacheimpl;
134 cachedwb();
135
136 /*
137 * default config is:
138 * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
139 * but the tegra 2 manual says there's 1MB available.
140 * ways or way-size may be fixed by hardware; the only way to tell
141 * is to try to change the setting and read it back.
142 */
143 l2pl310sync();
144 l2cache->inv();
145
146 /* figure out number of ways */
147 l2pl310sync();
148 nways = NWAYS(l2p);
149 if (!(l2p->auxctl & Assoc16way)) {
150 l2p->auxctl |= Assoc16way;
151 coherence();
152 l2pl310sync();
153 nways = NWAYS(l2p);
154 // iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
155 }
156 waysmask = MASK(nways);
157
158 /* figure out way size (and thus number of sets) */
159 waysz = L2size / nways;
160 new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
161 (log2(waysz / Wayszgran) + 1) << Waycfgshift;
162 l2p->auxctl = new;
163 coherence();
164 l2pl310sync();
165 l2cache->inv();
166
167 // iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
168 // waysz / CACHELINESZ, waysz);
169 if (l2p->auxctl != new)
170 iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
171 new, l2p->auxctl);
172 configed++;
173 }
174
175 void
l2pl310info(Memcache * cp)176 l2pl310info(Memcache *cp)
177 {
178 int pow2;
179 ulong waysz;
180 L2pl310 *l2p = L2P;
181
182 memset(cp, 0, sizeof *cp);
183 if (!l2ison)
184 return;
185
186 l2pl310init();
187 assert((l2p->id >> 24) == 'A');
188 cp->level = 2;
189 cp->type = Unified;
190 cp->external = Extcache;
191 cp->setsways = Cara | Cawa | Cawt | Cawb;
192 cp->l1ip = 3<<14; /* PIPT */
193 cp->setsh = cp->waysh = 0; /* bag on the side */
194
195 cp->linelen = CACHELINESZ;
196 cp->log2linelen = log2(CACHELINESZ);
197
198 cp->nways = NWAYS(l2p);
199 pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
200 if (pow2 < 0)
201 pow2 = 0;
202 waysz = (1 << pow2) * Wayszgran;
203 cp->nsets = waysz / CACHELINESZ;
204 }
205
206 void
l2pl310on(void)207 l2pl310on(void)
208 {
209 ulong ctl;
210 L2pl310 *l2p = L2P;
211
212 if (getconf("*l2off") != nil) {
213 // iprint("l2 cache (pl310) disabled\n");
214 disallowed = 1;
215 return;
216 }
217 if (l2ison)
218 return;
219
220 l2pl310init();
221 l2cache->inv();
222
223 /*
224 * drain l1. can't turn it off (which would make locks not work)
225 * because doing so makes references below to the l2 registers wedge
226 * the system.
227 */
228 cacheuwbinv();
229 cacheiinv();
230
231 /*
232 * this is only called once, on cpu0 at startup,
233 * so we don't need locks here.
234 * must do all configuration before enabling l2 cache.
235 */
236 l2p->filtend = 0;
237 coherence();
238 l2p->filtstart = 0; /* no enable bit */
239 l2p->debug = 0; /* write-back, line fills allowed */
240 coherence();
241
242 ctl = l2p->auxctl;
243 /* don't change number of sets & ways, but reset all else. */
244 ctl &= Waycfgmask << Waycfgshift | Assoc16way;
245 ctl |= Sharovr; /* actually work correctly for a change */
246 ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
247 l2p->auxctl = ctl;
248 coherence();
249
250 l2p->ctl |= L2enable;
251 coherence();
252
253 l2ison = 1;
254
255 // iprint("l2 cache (pl310) now on\n");
256 }
257
258 void
l2pl310off(void)259 l2pl310off(void)
260 {
261 if (!l2ison)
262 return;
263 l2cache->wbinv();
264 getlock();
265 L2P->ctl &= ~L2enable;
266 coherence();
267 l2ison = 0;
268 iunlock(&l2lock);
269 }
270
271
272 static void
applyrange(ulong * reg,void * ava,int len)273 applyrange(ulong *reg, void *ava, int len)
274 {
275 uintptr va, endva;
276
277 if (disallowed || !l2ison)
278 return;
279 if (len < 0)
280 panic("l2cache*se called with negative length");
281 endva = (uintptr)ava + len;
282 for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
283 va += CACHELINESZ)
284 *reg = PADDR(va);
285 l2pl310sync();
286 }
287
288 void
l2pl310invse(void * va,int bytes)289 l2pl310invse(void *va, int bytes)
290 {
291 uintptr start, end;
292 L2pl310 *l2p = L2P;
293
294 /*
295 * if start & end addresses are not on cache-line boundaries,
296 * flush first & last cachelines before invalidating.
297 */
298 start = (uintptr)va;
299 end = start + bytes;
300 getlock();
301 if (start % CACHELINESZ != 0) {
302 // iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
303 // getcallerpc(&va));
304 applyrange(&l2p->clean.pa, va, 1);
305 }
306 if (end % CACHELINESZ != 0) {
307 // iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
308 // getcallerpc(&va));
309 applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
310 }
311
312 applyrange(&l2p->inv.pa, va, bytes);
313 iunlock(&l2lock);
314 }
315
316 void
l2pl310wbse(void * va,int bytes)317 l2pl310wbse(void *va, int bytes)
318 {
319 getlock();
320 applyrange(&L2P->clean.pa, va, bytes);
321 iunlock(&l2lock);
322 }
323
324 /*
325 * assume that ldrex/strex (thus locks) won't work when Wt in is effect,
326 * so don't manipulate locks between setting and clearing Wt.
327 */
328 void
l2pl310wbinvse(void * va,int bytes)329 l2pl310wbinvse(void *va, int bytes)
330 {
331 int odb;
332 L2pl310 *l2p = L2P;
333
334 if (!l2ison)
335 return;
336 getlock();
337 applyrange(&l2p->clean.pa, va, bytes); /* paranoia */
338
339 odb = l2p->debug;
340 l2p->debug |= Wt | Nolinefill; /* erratum workaround */
341 coherence();
342
343 applyrange(&l2p->cleaninv.pa, va, bytes);
344
345 l2p->debug = odb;
346 iunlock(&l2lock);
347 }
348
349
350 /*
351 * we want to wait for completion at normal PL.
352 * if waiting is interrupted, interrupt code that calls
353 * these ops could deadlock on a uniprocessor, so we only
354 * give up l2lock before waiting on multiprocessors.
355 * in this port, only cpu 0 gets interrupts other than local timer ones.
356 */
357
358 void
l2pl310inv(void)359 l2pl310inv(void)
360 {
361 L2pl310 *l2p = L2P;
362
363 if (disallowed)
364 return;
365
366 getlock();
367 bg_op_running = 1;
368 l2p->inv.way = waysmask;
369 coherence();
370 if (conf.nmach > 1)
371 iunlock(&l2lock);
372
373 while (l2p->inv.way & waysmask)
374 ;
375
376 if (conf.nmach > 1)
377 ilock(&l2lock);
378 l2pl310sync();
379 bg_op_running = 0;
380 iunlock(&l2lock);
381 }
382
383 /*
384 * maximum time seen is 2542µs, typical is 625µs.
385 */
386 void
l2pl310wb(void)387 l2pl310wb(void)
388 {
389 L2pl310 *l2p = L2P;
390
391 if (disallowed || !l2ison)
392 return;
393
394 getlock();
395 bg_op_running = 1;
396 l2p->clean.way = waysmask;
397 coherence();
398 if (conf.nmach > 1)
399 iunlock(&l2lock);
400
401 while (l2p->clean.way & waysmask)
402 ;
403
404 if (conf.nmach > 1)
405 ilock(&l2lock);
406 l2pl310sync();
407 bg_op_running = 0;
408 iunlock(&l2lock);
409 }
410
411 void
l2pl310wbinv(void)412 l2pl310wbinv(void)
413 {
414 int odb;
415 L2pl310 *l2p = L2P;
416
417 if (disallowed || !l2ison)
418 return;
419
420 l2pl310wb(); /* paranoia */
421
422 getlock();
423 bg_op_running = 1;
424 odb = l2p->debug;
425 l2p->debug |= Wt | Nolinefill; /* erratum workaround */
426 coherence();
427
428 l2p->cleaninv.way = waysmask;
429 coherence();
430 if (conf.nmach > 1)
431 iunlock(&l2lock);
432
433 while (l2p->cleaninv.way & waysmask)
434 ;
435
436 if (conf.nmach > 1)
437 ilock(&l2lock);
438 l2pl310sync();
439 l2p->debug = odb;
440 bg_op_running = 0;
441 iunlock(&l2lock);
442 }
443
444 static Cacheimpl l2cacheimpl = {
445 .info = l2pl310info,
446 .on = l2pl310on,
447 .off = l2pl310off,
448
449 .inv = l2pl310inv,
450 .wb = l2pl310wb,
451 .wbinv = l2pl310wbinv,
452
453 .invse = l2pl310invse,
454 .wbse = l2pl310wbse,
455 .wbinvse= l2pl310wbinvse,
456 };
457