xref: /plan9/sys/src/9/teg2/cache-l2-pl310.c (revision 3de6a9c0b3d5cf34fc4090d0bf1930d83799a7fd)
1 /*
2  * PL310 level 2 cache (non-architectural bag on the side)
3  *
4  * guaranteed to work incorrectly with default settings; must set Sharovr.
5  *
6  * clean & invalidate (wbinv) is buggy, so we work around erratum 588369
7  * by disabling write-back and cache line-fill before, and restoring after.
8  */
9 #include "u.h"
10 #include "../port/lib.h"
11 #include "mem.h"
12 #include "dat.h"
13 #include "fns.h"
14 #include "io.h"
15 #include "../port/error.h"
16 #include "arm.h"
17 
18 #define NWAYS(l2p)	((l2p)->auxctl & Assoc16way? 16: 8)
19 #define L2P		((L2pl310 *)soc.l2cache)
20 
21 enum {
22 	L2size		= 1024 * 1024,	/* according to the tegra 2 manual */
23 	Wayszgran	= 16 * KiB,	/* granularity of way sizes */
24 };
25 
26 typedef struct L2pl310 L2pl310;
27 typedef struct Pl310op Pl310op;
28 
29 struct Pl310op {
30 	ulong	pa;
31 	ulong	_pad;
32 	ulong	indexway;
33 	ulong	way;
34 };
35 
36 struct L2pl310 {
37 	ulong	id;
38 	ulong	type;
39 	uchar	_pad0[0x100 - 0x8];
40 	ulong	ctl;
41 	ulong	auxctl;
42 
43 	uchar	_pad1[0x730 - 0x108];	/* boring regs */
44 	ulong	sync;
45 	uchar	_pad2[0x740 - 0x734];
46 	ulong	r3p0sync;		/* workaround for r3p0 bug */
47 	uchar	_pad3[0x770 - 0x744];
48 	Pl310op	inv;			/* inv.indexway doesn't exist */
49 	uchar	_pad4[0x7b0 - 0x780];
50 	Pl310op	clean;
51 	uchar	_pad5[0x7f0 - 0x7c0];
52 	Pl310op	cleaninv;
53 	uchar	_pad6[0xc00 - 0x7d0];
54 	ulong	filtstart;
55 	ulong	filtend;
56 	uchar	_pad6[0xf40 - 0xc08];
57 	ulong	debug;
58 	/* ... */
59 };
60 
61 enum {
62 	/* ctl bits */
63 	L2enable = 1,
64 
65 	/* auxctl bits */
66 	Ipref	= 1<<29,		/* prefetch enables */
67 	Dpref	= 1<<28,
68 	Mbo	= 1<<25,
69 	Sharovr	= 1<<22, /* shared attribute override (i.e., work right!) */
70 	Parity	= 1<<21,
71 	Waycfgshift= 17,
72 	Waycfgmask = (1<<3) - 1,
73 	Assoc16way = 1<<16,
74 	/*
75 	 * optim'n to 0 cache lines; must be enabled in a9(?!).
76 	 * set CpAClwr0line on all cpus 1st.
77 	 */
78 	Fullline0= 1<<0,
79 
80 	/* debug bits */
81 	Wt	= 1<<1,			/* write-through, not write-back */
82 	Nolinefill= 1<<0,
83 
84 	Basecfg = Wt | Nolinefill,
85 };
86 
87 static Lock l2lock;
88 static int disallowed;			/* by user: *l2off= in plan9.ini */
89 static int l2ison;
90 static int bg_op_running;
91 static ulong waysmask;
92 
93 static Cacheimpl l2cacheimpl;
94 
95 static void
awaitbgop(void)96 awaitbgop(void)
97 {
98 	while (bg_op_running)
99 		;
100 }
101 
102 static void
getlock(void)103 getlock(void)
104 {
105 	awaitbgop();		/* wait at normal PL first */
106 	ilock(&l2lock);
107 	awaitbgop();		/* wait under lock */
108 }
109 
110 static void
l2pl310sync(void)111 l2pl310sync(void)
112 {
113 	L2P->sync = 0;
114 	coherence();
115 }
116 
117 /* call this first to set sets/ways configuration */
118 void
l2pl310init(void)119 l2pl310init(void)
120 {
121 	int waysz, nways;
122 	ulong new;
123 	L2pl310 *l2p = L2P;
124 	static int configed;
125 
126 	if (getconf("*l2off") != nil) {
127 //		iprint("l2 cache (pl310) disabled\n");
128 		disallowed = 1;
129 		return;
130 	}
131 	if (l2ison || configed)
132 		return;
133 	l2cache = &l2cacheimpl;
134 	cachedwb();
135 
136 	/*
137 	 * default config is:
138 	 * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
139 	 * but the tegra 2 manual says there's 1MB available.
140 	 * ways or way-size may be fixed by hardware; the only way to tell
141 	 * is to try to change the setting and read it back.
142 	 */
143 	l2pl310sync();
144 	l2cache->inv();
145 
146 	/* figure out number of ways */
147 	l2pl310sync();
148 	nways = NWAYS(l2p);
149 	if (!(l2p->auxctl & Assoc16way)) {
150 		l2p->auxctl |= Assoc16way;
151 		coherence();
152 		l2pl310sync();
153 		nways = NWAYS(l2p);
154 //		iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
155 	}
156 	waysmask = MASK(nways);
157 
158 	/* figure out way size (and thus number of sets) */
159 	waysz = L2size / nways;
160 	new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
161 		(log2(waysz / Wayszgran) + 1) << Waycfgshift;
162 	l2p->auxctl = new;
163 	coherence();
164 	l2pl310sync();
165 	l2cache->inv();
166 
167 //	iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
168 //		waysz / CACHELINESZ, waysz);
169 	if (l2p->auxctl != new)
170 		iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
171 			new, l2p->auxctl);
172 	configed++;
173 }
174 
175 void
l2pl310info(Memcache * cp)176 l2pl310info(Memcache *cp)
177 {
178 	int pow2;
179 	ulong waysz;
180 	L2pl310 *l2p = L2P;
181 
182 	memset(cp, 0, sizeof *cp);
183 	if (!l2ison)
184 		return;
185 
186 	l2pl310init();
187 	assert((l2p->id >> 24) == 'A');
188 	cp->level = 2;
189 	cp->type = Unified;
190 	cp->external = Extcache;
191 	cp->setsways = Cara | Cawa | Cawt | Cawb;
192 	cp->l1ip = 3<<14;				/* PIPT */
193 	cp->setsh = cp->waysh = 0;			/* bag on the side */
194 
195 	cp->linelen = CACHELINESZ;
196 	cp->log2linelen = log2(CACHELINESZ);
197 
198 	cp->nways = NWAYS(l2p);
199 	pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
200 	if (pow2 < 0)
201 		pow2 = 0;
202 	waysz = (1 << pow2) * Wayszgran;
203 	cp->nsets = waysz / CACHELINESZ;
204 }
205 
206 void
l2pl310on(void)207 l2pl310on(void)
208 {
209 	ulong ctl;
210 	L2pl310 *l2p = L2P;
211 
212 	if (getconf("*l2off") != nil) {
213 //		iprint("l2 cache (pl310) disabled\n");
214 		disallowed = 1;
215 		return;
216 	}
217 	if (l2ison)
218 		return;
219 
220 	l2pl310init();
221 	l2cache->inv();
222 
223 	/*
224 	 * drain l1.  can't turn it off (which would make locks not work)
225 	 * because doing so makes references below to the l2 registers wedge
226 	 * the system.
227 	 */
228 	cacheuwbinv();
229 	cacheiinv();
230 
231 	/*
232 	 * this is only called once, on cpu0 at startup,
233 	 * so we don't need locks here.
234 	 * must do all configuration before enabling l2 cache.
235 	 */
236 	l2p->filtend = 0;
237 	coherence();
238 	l2p->filtstart = 0;		/* no enable bit */
239 	l2p->debug = 0;			/* write-back, line fills allowed */
240 	coherence();
241 
242 	ctl = l2p->auxctl;
243 	/* don't change number of sets & ways, but reset all else. */
244 	ctl &= Waycfgmask << Waycfgshift | Assoc16way;
245 	ctl |= Sharovr;		/* actually work correctly for a change */
246 	ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
247 	l2p->auxctl = ctl;
248 	coherence();
249 
250 	l2p->ctl |= L2enable;
251 	coherence();
252 
253 	l2ison = 1;
254 
255 //	iprint("l2 cache (pl310) now on\n");
256 }
257 
258 void
l2pl310off(void)259 l2pl310off(void)
260 {
261 	if (!l2ison)
262 		return;
263 	l2cache->wbinv();
264 	getlock();
265 	L2P->ctl &= ~L2enable;
266 	coherence();
267 	l2ison = 0;
268 	iunlock(&l2lock);
269 }
270 
271 
272 static void
applyrange(ulong * reg,void * ava,int len)273 applyrange(ulong *reg, void *ava, int len)
274 {
275 	uintptr va, endva;
276 
277 	if (disallowed || !l2ison)
278 		return;
279 	if (len < 0)
280 		panic("l2cache*se called with negative length");
281 	endva = (uintptr)ava + len;
282 	for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
283 	     va += CACHELINESZ)
284 		*reg = PADDR(va);
285 	l2pl310sync();
286 }
287 
288 void
l2pl310invse(void * va,int bytes)289 l2pl310invse(void *va, int bytes)
290 {
291 	uintptr start, end;
292 	L2pl310 *l2p = L2P;
293 
294 	/*
295 	 * if start & end addresses are not on cache-line boundaries,
296 	 * flush first & last cachelines before invalidating.
297 	 */
298 	start = (uintptr)va;
299 	end = start + bytes;
300 	getlock();
301 	if (start % CACHELINESZ != 0) {
302 //		iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
303 //			getcallerpc(&va));
304 		applyrange(&l2p->clean.pa, va, 1);
305 	}
306 	if (end % CACHELINESZ != 0) {
307 //		iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
308 //			getcallerpc(&va));
309 		applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
310 	}
311 
312 	applyrange(&l2p->inv.pa, va, bytes);
313 	iunlock(&l2lock);
314 }
315 
316 void
l2pl310wbse(void * va,int bytes)317 l2pl310wbse(void *va, int bytes)
318 {
319 	getlock();
320 	applyrange(&L2P->clean.pa, va, bytes);
321 	iunlock(&l2lock);
322 }
323 
324 /*
325  * assume that ldrex/strex (thus locks) won't work when Wt in is effect,
326  * so don't manipulate locks between setting and clearing Wt.
327  */
328 void
l2pl310wbinvse(void * va,int bytes)329 l2pl310wbinvse(void *va, int bytes)
330 {
331 	int odb;
332 	L2pl310 *l2p = L2P;
333 
334 	if (!l2ison)
335 		return;
336 	getlock();
337 	applyrange(&l2p->clean.pa, va, bytes);	/* paranoia */
338 
339 	odb = l2p->debug;
340 	l2p->debug |= Wt | Nolinefill;		/* erratum workaround */
341 	coherence();
342 
343 	applyrange(&l2p->cleaninv.pa, va, bytes);
344 
345 	l2p->debug = odb;
346 	iunlock(&l2lock);
347 }
348 
349 
350 /*
351  * we want to wait for completion at normal PL.
352  * if waiting is interrupted, interrupt code that calls
353  * these ops could deadlock on a uniprocessor, so we only
354  * give up l2lock before waiting on multiprocessors.
355  * in this port, only cpu 0 gets interrupts other than local timer ones.
356  */
357 
358 void
l2pl310inv(void)359 l2pl310inv(void)
360 {
361 	L2pl310 *l2p = L2P;
362 
363 	if (disallowed)
364 		return;
365 
366 	getlock();
367 	bg_op_running = 1;
368 	l2p->inv.way = waysmask;
369 	coherence();
370 	if (conf.nmach > 1)
371 		iunlock(&l2lock);
372 
373 	while (l2p->inv.way & waysmask)
374 		;
375 
376 	if (conf.nmach > 1)
377 		ilock(&l2lock);
378 	l2pl310sync();
379 	bg_op_running = 0;
380 	iunlock(&l2lock);
381 }
382 
383 /*
384  * maximum time seen is 2542µs, typical is 625µs.
385  */
386 void
l2pl310wb(void)387 l2pl310wb(void)
388 {
389 	L2pl310 *l2p = L2P;
390 
391 	if (disallowed || !l2ison)
392 		return;
393 
394 	getlock();
395 	bg_op_running = 1;
396 	l2p->clean.way = waysmask;
397 	coherence();
398 	if (conf.nmach > 1)
399 		iunlock(&l2lock);
400 
401 	while (l2p->clean.way & waysmask)
402 		;
403 
404 	if (conf.nmach > 1)
405 		ilock(&l2lock);
406 	l2pl310sync();
407 	bg_op_running = 0;
408 	iunlock(&l2lock);
409 }
410 
411 void
l2pl310wbinv(void)412 l2pl310wbinv(void)
413 {
414 	int odb;
415 	L2pl310 *l2p = L2P;
416 
417 	if (disallowed || !l2ison)
418 		return;
419 
420 	l2pl310wb();			/* paranoia */
421 
422 	getlock();
423 	bg_op_running = 1;
424 	odb = l2p->debug;
425 	l2p->debug |= Wt | Nolinefill;	/* erratum workaround */
426 	coherence();
427 
428 	l2p->cleaninv.way = waysmask;
429 	coherence();
430 	if (conf.nmach > 1)
431 		iunlock(&l2lock);
432 
433 	while (l2p->cleaninv.way & waysmask)
434 		;
435 
436 	if (conf.nmach > 1)
437 		ilock(&l2lock);
438 	l2pl310sync();
439 	l2p->debug = odb;
440 	bg_op_running = 0;
441 	iunlock(&l2lock);
442 }
443 
444 static Cacheimpl l2cacheimpl = {
445 	.info	= l2pl310info,
446 	.on	= l2pl310on,
447 	.off	= l2pl310off,
448 
449 	.inv	= l2pl310inv,
450 	.wb	= l2pl310wb,
451 	.wbinv	= l2pl310wbinv,
452 
453 	.invse	= l2pl310invse,
454 	.wbse	= l2pl310wbse,
455 	.wbinvse= l2pl310wbinvse,
456 };
457