1 /* 2 * PL310 level 2 cache (non-architectural bag on the side) 3 * 4 * guaranteed to work incorrectly with default settings; must set Sharovr. 5 * 6 * clean & invalidate (wbinv) is buggy, so we work around erratum 588369 7 * by disabling write-back and cache line-fill before, and restoring after. 8 */ 9 #include "u.h" 10 #include "../port/lib.h" 11 #include "mem.h" 12 #include "dat.h" 13 #include "fns.h" 14 #include "io.h" 15 #include "../port/error.h" 16 #include "arm.h" 17 18 #define NWAYS(l2p) ((l2p)->auxctl & Assoc16way? 16: 8) 19 #define L2P ((L2pl310 *)soc.l2cache) 20 21 enum { 22 L2size = 1024 * 1024, /* according to the tegra 2 manual */ 23 Wayszgran = 16 * KiB, /* granularity of way sizes */ 24 }; 25 26 typedef struct L2pl310 L2pl310; 27 typedef struct Pl310op Pl310op; 28 29 struct Pl310op { 30 ulong pa; 31 ulong _pad; 32 ulong indexway; 33 ulong way; 34 }; 35 36 struct L2pl310 { 37 ulong id; 38 ulong type; 39 uchar _pad0[0x100 - 0x8]; 40 ulong ctl; 41 ulong auxctl; 42 43 uchar _pad1[0x730 - 0x108]; /* boring regs */ 44 ulong sync; 45 uchar _pad2[0x740 - 0x734]; 46 ulong r3p0sync; /* workaround for r3p0 bug */ 47 uchar _pad3[0x770 - 0x744]; 48 Pl310op inv; /* inv.indexway doesn't exist */ 49 uchar _pad4[0x7b0 - 0x780]; 50 Pl310op clean; 51 uchar _pad5[0x7f0 - 0x7c0]; 52 Pl310op cleaninv; 53 uchar _pad6[0xc00 - 0x7d0]; 54 ulong filtstart; 55 ulong filtend; 56 uchar _pad6[0xf40 - 0xc08]; 57 ulong debug; 58 /* ... */ 59 }; 60 61 enum { 62 /* ctl bits */ 63 L2enable = 1, 64 65 /* auxctl bits */ 66 Ipref = 1<<29, /* prefetch enables */ 67 Dpref = 1<<28, 68 Mbo = 1<<25, 69 Sharovr = 1<<22, /* shared attribute override (i.e., work right!) */ 70 Parity = 1<<21, 71 Waycfgshift= 17, 72 Waycfgmask = (1<<3) - 1, 73 Assoc16way = 1<<16, 74 /* 75 * optim'n to 0 cache lines; must be enabled in a9(?!). 76 * set CpAClwr0line on all cpus 1st. 77 */ 78 Fullline0= 1<<0, 79 80 /* debug bits */ 81 Wt = 1<<1, /* write-through, not write-back */ 82 Nolinefill= 1<<0, 83 84 Basecfg = Wt | Nolinefill, 85 }; 86 87 static Lock l2lock; 88 static int disallowed; /* by user: *l2off= in plan9.ini */ 89 static int l2ison; 90 static int bg_op_running; 91 static ulong waysmask; 92 93 static Cacheimpl l2cacheimpl; 94 95 static void 96 awaitbgop(void) 97 { 98 while (bg_op_running) 99 ; 100 } 101 102 static void 103 getlock(void) 104 { 105 awaitbgop(); /* wait at normal PL first */ 106 ilock(&l2lock); 107 awaitbgop(); /* wait under lock */ 108 } 109 110 static void 111 l2pl310sync(void) 112 { 113 L2P->sync = 0; 114 coherence(); 115 } 116 117 /* call this first to set sets/ways configuration */ 118 void 119 l2pl310init(void) 120 { 121 int waysz, nways; 122 ulong new; 123 L2pl310 *l2p = L2P; 124 static int configed; 125 126 if (getconf("*l2off") != nil) { 127 // iprint("l2 cache (pl310) disabled\n"); 128 disallowed = 1; 129 return; 130 } 131 if (l2ison || configed) 132 return; 133 l2cache = &l2cacheimpl; 134 cachedwb(); 135 136 /* 137 * default config is: 138 * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB 139 * but the tegra 2 manual says there's 1MB available. 140 * ways or way-size may be fixed by hardware; the only way to tell 141 * is to try to change the setting and read it back. 142 */ 143 l2pl310sync(); 144 l2cache->inv(); 145 146 /* figure out number of ways */ 147 l2pl310sync(); 148 nways = NWAYS(l2p); 149 if (!(l2p->auxctl & Assoc16way)) { 150 l2p->auxctl |= Assoc16way; 151 coherence(); 152 l2pl310sync(); 153 nways = NWAYS(l2p); 154 // iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways); 155 } 156 waysmask = MASK(nways); 157 158 /* figure out way size (and thus number of sets) */ 159 waysz = L2size / nways; 160 new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) | 161 (log2(waysz / Wayszgran) + 1) << Waycfgshift; 162 l2p->auxctl = new; 163 coherence(); 164 l2pl310sync(); 165 l2cache->inv(); 166 167 // iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways, 168 // waysz / CACHELINESZ, waysz); 169 if (l2p->auxctl != new) 170 iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n", 171 new, l2p->auxctl); 172 configed++; 173 } 174 175 void 176 l2pl310info(Memcache *cp) 177 { 178 int pow2; 179 ulong waysz; 180 L2pl310 *l2p = L2P; 181 182 memset(cp, 0, sizeof *cp); 183 if (!l2ison) 184 return; 185 186 l2pl310init(); 187 assert((l2p->id >> 24) == 'A'); 188 cp->level = 2; 189 cp->type = Unified; 190 cp->external = Extcache; 191 cp->setsways = Cara | Cawa | Cawt | Cawb; 192 cp->l1ip = 3<<14; /* PIPT */ 193 cp->setsh = cp->waysh = 0; /* bag on the side */ 194 195 cp->linelen = CACHELINESZ; 196 cp->log2linelen = log2(CACHELINESZ); 197 198 cp->nways = NWAYS(l2p); 199 pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1; 200 if (pow2 < 0) 201 pow2 = 0; 202 waysz = (1 << pow2) * Wayszgran; 203 cp->nsets = waysz / CACHELINESZ; 204 } 205 206 void 207 l2pl310on(void) 208 { 209 ulong ctl; 210 L2pl310 *l2p = L2P; 211 212 if (getconf("*l2off") != nil) { 213 // iprint("l2 cache (pl310) disabled\n"); 214 disallowed = 1; 215 return; 216 } 217 if (l2ison) 218 return; 219 220 l2pl310init(); 221 l2cache->inv(); 222 223 /* 224 * drain l1. can't turn it off (which would make locks not work) 225 * because doing so makes references below to the l2 registers wedge 226 * the system. 227 */ 228 cacheuwbinv(); 229 cacheiinv(); 230 231 /* 232 * this is only called once, on cpu0 at startup, 233 * so we don't need locks here. 234 * must do all configuration before enabling l2 cache. 235 */ 236 l2p->filtend = 0; 237 coherence(); 238 l2p->filtstart = 0; /* no enable bit */ 239 l2p->debug = 0; /* write-back, line fills allowed */ 240 coherence(); 241 242 ctl = l2p->auxctl; 243 /* don't change number of sets & ways, but reset all else. */ 244 ctl &= Waycfgmask << Waycfgshift | Assoc16way; 245 ctl |= Sharovr; /* actually work correctly for a change */ 246 ctl |= Mbo | Ipref | Dpref | Parity | Fullline0; 247 l2p->auxctl = ctl; 248 coherence(); 249 250 l2p->ctl |= L2enable; 251 coherence(); 252 253 l2ison = 1; 254 255 // iprint("l2 cache (pl310) now on\n"); 256 } 257 258 void 259 l2pl310off(void) 260 { 261 if (!l2ison) 262 return; 263 l2cache->wbinv(); 264 getlock(); 265 L2P->ctl &= ~L2enable; 266 coherence(); 267 l2ison = 0; 268 iunlock(&l2lock); 269 } 270 271 272 static void 273 applyrange(ulong *reg, void *ava, int len) 274 { 275 uintptr va, endva; 276 277 if (disallowed || !l2ison) 278 return; 279 if (len < 0) 280 panic("l2cache*se called with negative length"); 281 endva = (uintptr)ava + len; 282 for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva; 283 va += CACHELINESZ) 284 *reg = PADDR(va); 285 l2pl310sync(); 286 } 287 288 void 289 l2pl310invse(void *va, int bytes) 290 { 291 uintptr start, end; 292 L2pl310 *l2p = L2P; 293 294 /* 295 * if start & end addresses are not on cache-line boundaries, 296 * flush first & last cachelines before invalidating. 297 */ 298 start = (uintptr)va; 299 end = start + bytes; 300 getlock(); 301 if (start % CACHELINESZ != 0) { 302 // iprint("l2pl310invse: unaligned start %#p from %#p\n", start, 303 // getcallerpc(&va)); 304 applyrange(&l2p->clean.pa, va, 1); 305 } 306 if (end % CACHELINESZ != 0) { 307 // iprint("l2pl310invse: unaligned end %#p from %#p\n", end, 308 // getcallerpc(&va)); 309 applyrange(&l2p->clean.pa, (char *)va + bytes, 1); 310 } 311 312 applyrange(&l2p->inv.pa, va, bytes); 313 iunlock(&l2lock); 314 } 315 316 void 317 l2pl310wbse(void *va, int bytes) 318 { 319 getlock(); 320 applyrange(&L2P->clean.pa, va, bytes); 321 iunlock(&l2lock); 322 } 323 324 /* 325 * assume that ldrex/strex (thus locks) won't work when Wt in is effect, 326 * so don't manipulate locks between setting and clearing Wt. 327 */ 328 void 329 l2pl310wbinvse(void *va, int bytes) 330 { 331 int odb; 332 L2pl310 *l2p = L2P; 333 334 if (!l2ison) 335 return; 336 getlock(); 337 applyrange(&l2p->clean.pa, va, bytes); /* paranoia */ 338 339 odb = l2p->debug; 340 l2p->debug |= Wt | Nolinefill; /* erratum workaround */ 341 coherence(); 342 343 applyrange(&l2p->cleaninv.pa, va, bytes); 344 345 l2p->debug = odb; 346 iunlock(&l2lock); 347 } 348 349 350 /* 351 * we want to wait for completion at normal PL. 352 * if waiting is interrupted, interrupt code that calls 353 * these ops could deadlock on a uniprocessor, so we only 354 * give up l2lock before waiting on multiprocessors. 355 * in this port, only cpu 0 gets interrupts other than local timer ones. 356 */ 357 358 void 359 l2pl310inv(void) 360 { 361 L2pl310 *l2p = L2P; 362 363 if (disallowed) 364 return; 365 366 getlock(); 367 bg_op_running = 1; 368 l2p->inv.way = waysmask; 369 coherence(); 370 if (conf.nmach > 1) 371 iunlock(&l2lock); 372 373 while (l2p->inv.way & waysmask) 374 ; 375 376 if (conf.nmach > 1) 377 ilock(&l2lock); 378 l2pl310sync(); 379 bg_op_running = 0; 380 iunlock(&l2lock); 381 } 382 383 /* 384 * maximum time seen is 2542µs, typical is 625µs. 385 */ 386 void 387 l2pl310wb(void) 388 { 389 L2pl310 *l2p = L2P; 390 391 if (disallowed || !l2ison) 392 return; 393 394 getlock(); 395 bg_op_running = 1; 396 l2p->clean.way = waysmask; 397 coherence(); 398 if (conf.nmach > 1) 399 iunlock(&l2lock); 400 401 while (l2p->clean.way & waysmask) 402 ; 403 404 if (conf.nmach > 1) 405 ilock(&l2lock); 406 l2pl310sync(); 407 bg_op_running = 0; 408 iunlock(&l2lock); 409 } 410 411 void 412 l2pl310wbinv(void) 413 { 414 int odb; 415 L2pl310 *l2p = L2P; 416 417 if (disallowed || !l2ison) 418 return; 419 420 l2pl310wb(); /* paranoia */ 421 422 getlock(); 423 bg_op_running = 1; 424 odb = l2p->debug; 425 l2p->debug |= Wt | Nolinefill; /* erratum workaround */ 426 coherence(); 427 428 l2p->cleaninv.way = waysmask; 429 coherence(); 430 if (conf.nmach > 1) 431 iunlock(&l2lock); 432 433 while (l2p->cleaninv.way & waysmask) 434 ; 435 436 if (conf.nmach > 1) 437 ilock(&l2lock); 438 l2pl310sync(); 439 l2p->debug = odb; 440 bg_op_running = 0; 441 iunlock(&l2lock); 442 } 443 444 static Cacheimpl l2cacheimpl = { 445 .info = l2pl310info, 446 .on = l2pl310on, 447 .off = l2pl310off, 448 449 .inv = l2pl310inv, 450 .wb = l2pl310wb, 451 .wbinv = l2pl310wbinv, 452 453 .invse = l2pl310invse, 454 .wbse = l2pl310wbse, 455 .wbinvse= l2pl310wbinvse, 456 }; 457