1 /* $NetBSD: dec_kn8ae.c,v 1.44 2024/03/31 19:06:31 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1997 by Matthew Jacob
5 * NASA AMES Research Center.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice immediately at the beginning of the file, without modification,
13 * this list of conditions, and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
34
35 __KERNEL_RCSID(0, "$NetBSD: dec_kn8ae.c,v 1.44 2024/03/31 19:06:31 thorpej Exp $");
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/device.h>
40 #include <sys/lwp.h>
41 #include <sys/termios.h>
42 #include <sys/conf.h>
43
44 #include <dev/cons.h>
45
46 #include <machine/rpb.h>
47 #include <machine/autoconf.h>
48 #include <machine/cpuconf.h>
49 #include <machine/frame.h>
50 #include <machine/alpha.h>
51 #include <machine/logout.h>
52
53 #include <dev/ic/comreg.h>
54 #include <dev/ic/comvar.h>
55
56 #include <dev/isa/isavar.h>
57 #include <dev/pci/pcireg.h>
58 #include <dev/pci/pcivar.h>
59
60 #include <dev/scsipi/scsi_all.h>
61 #include <dev/scsipi/scsipi_all.h>
62 #include <dev/scsipi/scsiconf.h>
63
64 #include <alpha/tlsb/tlsbreg.h>
65 #include <alpha/tlsb/tlsbvar.h>
66 #include <alpha/tlsb/kftxxreg.h>
67 #include <alpha/tlsb/kftxxvar.h>
68 #define KV(_addr) ((void *)ALPHA_PHYS_TO_K0SEG((_addr)))
69
70
71 void dec_kn8ae_init(void);
72 void dec_kn8ae_cons_init(void);
73 static void dec_kn8ae_device_register(device_t, void *);
74
75 static void dec_kn8ae_mcheck_handler
76 (unsigned long, struct trapframe *, unsigned long, unsigned long);
77
78 const struct alpha_variation_table dec_kn8ae_variations[] = {
79 { 0, "AlphaServer 8400" },
80 { 0, NULL },
81 };
82
83 void
dec_kn8ae_init(void)84 dec_kn8ae_init(void)
85 {
86 uint64_t variation;
87
88 platform.family = "AlphaServer 8400";
89
90 if ((platform.model = alpha_dsr_sysname()) == NULL) {
91 variation = hwrpb->rpb_variation & SV_ST_MASK;
92 if ((platform.model = alpha_variation_name(variation,
93 dec_kn8ae_variations)) == NULL)
94 platform.model = alpha_unknown_sysname();
95 }
96
97 platform.iobus = "tlsb";
98 platform.cons_init = dec_kn8ae_cons_init;
99 platform.device_register = dec_kn8ae_device_register;
100 platform.mcheck_handler = dec_kn8ae_mcheck_handler;
101 }
102
103 void
dec_kn8ae_cons_init(void)104 dec_kn8ae_cons_init(void)
105 {
106
107 /*
108 * Info to retain:
109 *
110 * The AXP 8X00 seems to encode the
111 * type of console in the ctb_type field,
112 * not the ctb_term_type field.
113 *
114 * XXX Not Type 4 CTB?
115 */
116 }
117
118 /* #define BDEBUG 1 */
119 static void
dec_kn8ae_device_register(device_t dev,void * aux)120 dec_kn8ae_device_register(device_t dev, void *aux)
121 {
122 static int found, initted, diskboot, netboot;
123 static device_t primarydev, pcidev, ctrlrdev;
124 struct bootdev_data *b = bootdev_data;
125 device_t parent = device_parent(dev);
126
127 if (b == NULL || found)
128 return;
129
130 if (!initted) {
131 diskboot = (strcasecmp(b->protocol, "SCSI") == 0);
132 netboot = (strcasecmp(b->protocol, "BOOTP") == 0) ||
133 (strcasecmp(b->protocol, "MOP") == 0);
134 #if BDEBUG
135 printf("proto:%s bus:%d slot:%d chan:%d", b->protocol,
136 b->bus, b->slot, b->channel);
137 if (b->remote_address)
138 printf(" remote_addr:%s", b->remote_address);
139 printf(" un:%d bdt:%d", b->unit, b->boot_dev_type);
140 if (b->ctrl_dev_type)
141 printf(" cdt:%s\n", b->ctrl_dev_type);
142 else
143 printf("\n");
144 printf("diskboot = %d, netboot = %d\n", diskboot, netboot);
145 #endif
146 initted = 1;
147 }
148
149 if (primarydev == NULL) {
150 if (!device_is_a(dev, "dwlpx"))
151 return;
152 else {
153 struct kft_dev_attach_args *ka = aux;
154
155 if (b->bus != ka->ka_hosenum)
156 return;
157 primarydev = dev;
158 #ifdef BDEBUG
159 printf("\nprimarydev = %s\n", device_xname(dev));
160 #endif
161 return;
162 }
163 }
164
165 if (pcidev == NULL) {
166 if (!device_is_a(dev, "pci"))
167 return;
168 /*
169 * Try to find primarydev anywhere in the ancestry. This is
170 * necessary if the PCI bus is hidden behind a bridge.
171 */
172 while (parent) {
173 if (parent == primarydev)
174 break;
175 parent = device_parent(parent);
176 }
177 if (!parent)
178 return;
179 else {
180 struct pcibus_attach_args *pba = aux;
181
182 if ((b->slot / 1000) != pba->pba_bus)
183 return;
184
185 pcidev = dev;
186 #if BDEBUG
187 printf("\npcidev = %s\n", device_xname(dev));
188 #endif
189 return;
190 }
191 }
192
193 if (ctrlrdev == NULL) {
194 if (parent != pcidev)
195 return;
196 else {
197 struct pci_attach_args *pa = aux;
198 int slot;
199
200 slot = pa->pa_bus * 1000 + pa->pa_function * 100 +
201 pa->pa_device;
202 if (b->slot != slot)
203 return;
204
205 if (netboot) {
206 booted_device = dev;
207 #ifdef BDEBUG
208 printf("\nbooted_device = %s\n", device_xname(dev));
209 #endif
210 found = 1;
211 } else {
212 ctrlrdev = dev;
213 #if BDEBUG
214 printf("\nctrlrdev = %s\n", device_xname(dev));
215 #endif
216 }
217 return;
218 }
219 }
220
221 if (!diskboot)
222 return;
223
224 if (device_is_a(dev, "sd") ||
225 device_is_a(dev, "st") ||
226 device_is_a(dev, "cd")) {
227 struct scsipibus_attach_args *sa = aux;
228 struct scsipi_periph *periph = sa->sa_periph;
229 int unit;
230
231 if (device_parent(parent) != ctrlrdev)
232 return;
233
234 unit = periph->periph_target * 100 + periph->periph_lun;
235 if (b->unit != unit)
236 return;
237 if (b->channel != periph->periph_channel->chan_channel)
238 return;
239
240 /* we've found it! */
241 booted_device = dev;
242 #if BDEBUG
243 printf("\nbooted_device = %s\n", device_xname(dev));
244 #endif
245 found = 1;
246 }
247 }
248
249 /*
250 * KN8AE Machine Check Handlers.
251 */
252 void kn8ae_harderr(unsigned long, unsigned long,
253 unsigned long, struct trapframe *);
254
255 static void kn8ae_softerr(unsigned long, unsigned long,
256 unsigned long, struct trapframe *);
257
258 void kn8ae_mcheck(unsigned long, unsigned long,
259 unsigned long, struct trapframe *);
260
261 /*
262 * Support routine for clearing errors
263 */
264 static void clear_tlsb_ebits(int);
265
266 static void
clear_tlsb_ebits(int cpuonly)267 clear_tlsb_ebits(int cpuonly)
268 {
269 int node;
270 uint32_t tldev;
271
272 for (node = 0; node <= TLSB_NODE_MAX; ++node) {
273 if ((tlsb_found & (1 << node)) == 0)
274 continue;
275 tldev = TLSB_GET_NODEREG(node, TLDEV);
276 if (tldev == 0) {
277 /* "cannot happen" */
278 continue;
279 }
280 /*
281 * Registers to clear for all nodes.
282 */
283 if (TLSB_GET_NODEREG(node, TLBER) &
284 (TLBER_UDE|TLBER_CWDE|TLBER_CRDE)) {
285 TLSB_PUT_NODEREG(node, TLESR0,
286 TLSB_GET_NODEREG(node, TLESR0));
287 TLSB_PUT_NODEREG(node, TLESR1,
288 TLSB_GET_NODEREG(node, TLESR1));
289 TLSB_PUT_NODEREG(node, TLESR2,
290 TLSB_GET_NODEREG(node, TLESR2));
291 TLSB_PUT_NODEREG(node, TLESR3,
292 TLSB_GET_NODEREG(node, TLESR3));
293 }
294 TLSB_PUT_NODEREG(node, TLBER,
295 TLSB_GET_NODEREG(node, TLBER));
296 TLSB_PUT_NODEREG(node, TLFADR0,
297 TLSB_GET_NODEREG(node, TLFADR0));
298 TLSB_PUT_NODEREG(node, TLFADR1,
299 TLSB_GET_NODEREG(node, TLFADR1));
300
301 if (TLDEV_ISCPU(tldev)) {
302 TLSB_PUT_NODEREG(node, TLEPAERR,
303 TLSB_GET_NODEREG(node, TLEPAERR));
304 TLSB_PUT_NODEREG(node, TLEPDERR,
305 TLSB_GET_NODEREG(node, TLEPDERR));
306 TLSB_PUT_NODEREG(node, TLEPMERR,
307 TLSB_GET_NODEREG(node, TLEPMERR));
308 continue;
309 }
310 /*
311 * If we're only doing CPU nodes, or this was a memory
312 * node, we're done. Onwards.
313 */
314 if (cpuonly || TLDEV_ISMEM(tldev)) {
315 continue;
316 }
317
318 TLSB_PUT_NODEREG(node, KFT_ICCNSE,
319 TLSB_GET_NODEREG(node, KFT_ICCNSE));
320 TLSB_PUT_NODEREG(node, KFT_IDPNSE0,
321 TLSB_GET_NODEREG(node, KFT_IDPNSE0));
322 TLSB_PUT_NODEREG(node, KFT_IDPNSE1,
323 TLSB_GET_NODEREG(node, KFT_IDPNSE1));
324 if (TLDEV_DTYPE(tldev) == TLDEV_DTYPE_KFTHA) {
325 TLSB_PUT_NODEREG(node, KFT_IDPNSE2,
326 TLSB_GET_NODEREG(node, KFT_IDPNSE2));
327 TLSB_PUT_NODEREG(node, KFT_IDPNSE3,
328 TLSB_GET_NODEREG(node, KFT_IDPNSE3));
329 }
330 /*
331 * Digital Unix cleares the Mailbox Transaction Register
332 * here. I don't think we should because we aren't using
333 * mailboxes yet, and the tech manual makes dire warnings
334 * about *not* rewriting this register.
335 */
336 }
337 }
338
339 /*
340 * System Corrected Errors.
341 */
342 static const char *fmt1 = " %-25s = 0x%l016x\n";
343
344 void
kn8ae_harderr(unsigned long mces,unsigned long type,unsigned long logout,struct trapframe * framep)345 kn8ae_harderr(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep)
346 {
347 int whami, cpuwerr, dof_cnt;
348 mc_hdr_ev5 *hdr;
349 mc_cc_ev5 *mptr;
350 struct tlsb_mchk_fatal *ptr;
351
352 hdr = (mc_hdr_ev5 *) logout;
353 mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr));
354 ptr = (struct tlsb_mchk_fatal *)
355 (logout + sizeof (*hdr) + sizeof (*mptr));
356 whami = alpha_pal_whami();
357
358 printf("kn8ae: CPU ID %d system correctable error\n", whami);
359
360 printf(" Machine Check Code 0x%lx\n", hdr->mcheck_code);
361 printf(fmt1, "EI Status", mptr->ei_stat);
362 printf(fmt1, "EI Address", mptr->ei_addr);
363 printf(fmt1, "Fill Syndrome", mptr->fill_syndrome);
364 printf(fmt1, "Interrupt Status Reg.", mptr->isr);
365 printf("\n");
366 dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32;
367 cpuwerr = ptr->rsvdheader & 0xffff;
368
369 printf(fmt1, "CPU W/Error.", cpuwerr);
370 printf(fmt1, "DOF Count.", dof_cnt);
371 printf(fmt1, "TLDEV", ptr->tldev);
372 printf(fmt1, "TLSB Bus Error", ptr->tlber);
373 printf(fmt1, "TLSB CNR", ptr->tlcnr);
374 printf(fmt1, "TLSB VID", ptr->tlvid);
375 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0);
376 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1);
377 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2);
378 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3);
379 printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr);
380 printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig);
381 printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr);
382 printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr);
383 printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0);
384 printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1);
385 printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0);
386 printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1);
387 printf(fmt1, "TLSB VMG", ptr->tlep_vmg);
388
389 /* CLEAN UP */
390 /*
391 * Here's what Digital Unix says to do-
392 *
393 * 1. Log the ECC error that got us here
394 *
395 * 2. Turn off error reporting
396 *
397 * 3. Attempt to have CPU read bad memory location (specified by the
398 * tlfadr reg of the TIOP or TMEM (depending on type of error,
399 * see upcoming code branches) and write data back to location.
400 *
401 * 4. When the CPU attempts to read the location, another 620 interrupt
402 * should occur for the CPU at which instant PAL will scrub the
403 * location. Then the o.s. scrub routine finishes. If the PAL scrubs
404 * the location then the scrubbed flag should be 0 (this is what we
405 * expect).
406 *
407 * If it's a 1 then the alpha_scrub_long routine did the scrub.
408 *
409 * 5. We renable correctable error logging and continue
410 */
411 printf("WARNING THIS IS NOT DONE YET YOU MAY GET DATA CORRUPTION");
412 clear_tlsb_ebits(0);
413 /*
414 * Clear error by rewriting register.
415 */
416 alpha_pal_wrmces(mces);
417 }
418
419 /*
420 * Processor Corrected Errors- BCACHE ECC errors.
421 */
422
423 static void
kn8ae_softerr(unsigned long mces,unsigned long type,unsigned long logout,struct trapframe * framep)424 kn8ae_softerr(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep)
425 {
426 int whami, cpuwerr, dof_cnt;
427 mc_hdr_ev5 *hdr;
428 mc_cc_ev5 *mptr;
429 struct tlsb_mchk_soft *ptr;
430
431 hdr = (mc_hdr_ev5 *) logout;
432 mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr));
433 ptr = (struct tlsb_mchk_soft *)
434 (logout + sizeof (*hdr) + sizeof (*mptr));
435 whami = alpha_pal_whami();
436
437 printf("kn8ae: CPU ID %d processor correctable error\n", whami);
438 printf(" Machine Check Code 0x%lx\n", hdr->mcheck_code);
439 printf(fmt1, "EI Status", mptr->ei_stat);
440 printf(fmt1, "EI Address", mptr->ei_addr);
441 printf(fmt1, "Fill Syndrome", mptr->fill_syndrome);
442 printf(fmt1, "Interrupt Status Reg.", mptr->isr);
443 printf("\n");
444 dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32;
445 cpuwerr = ptr->rsvdheader & 0xffff;
446
447 printf(fmt1, "CPU W/Error.", cpuwerr);
448 printf(fmt1, "DOF Count.", dof_cnt);
449 printf(fmt1, "TLDEV", ptr->tldev);
450 printf(fmt1, "TLSB Bus Error", ptr->tlber);
451 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0);
452 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1);
453 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2);
454 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3);
455
456 /*
457 * Clear TLSB bits on all CPU TLSB nodes.
458 */
459 clear_tlsb_ebits(1);
460
461 /*
462 * Clear error by rewriting register.
463 */
464 alpha_pal_wrmces(mces);
465 }
466
467 /*
468 * KN8AE specific machine check handler
469 */
470
471 void
kn8ae_mcheck(unsigned long mces,unsigned long type,unsigned long logout,struct trapframe * framep)472 kn8ae_mcheck(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep)
473 {
474 struct mchkinfo *mcp;
475 struct tlsb_mchk_fatal mcs[TLSB_NODE_MAX+1], *ptr;
476 mc_hdr_ev5 *hdr;
477 mc_uc_ev5 *mptr;
478
479 /*
480 * If we expected a machine check, just go handle it in common code.
481 */
482 mcp = &curcpu()->ci_mcinfo;
483 if (mcp->mc_expected) {
484 machine_check(mces, framep, type, logout);
485 return;
486 }
487
488 ptr = NULL;
489 memset(mcs, 0, sizeof (mcs));
490
491 hdr = (mc_hdr_ev5 *) logout;
492 mptr = (mc_uc_ev5 *) (logout + sizeof (*hdr));
493
494 /*
495 * If detected by the system, we print out some TLASER registers.
496 */
497 if (type == ALPHA_SYS_MCHECK) {
498 #if 0
499 int get_lsb_regs = 0;
500 int get_dwlpx_regs = 0;
501 #endif
502
503 ptr = (struct tlsb_mchk_fatal *)
504 (logout + sizeof (*hdr) + sizeof (*mptr));
505
506 #if 0
507 if (ptr->tlepaerr & TLEPAERR_WSPC_RD) {
508 get_dwlpx_regs++;
509 }
510 if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) &&
511 (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) &&
512 (ptr->tlepderr & TLEPDERR_GBTMO)) {
513 get_dwlpx_regs++;
514 }
515 #endif
516 } else {
517 /*
518 * We have a processor machine check- which doesn't
519 * have information with it about any TLSB related
520 * failures.
521 */
522 }
523
524 /*
525 * Now we can finally print some stuff...
526 */
527 ev5_logout_print(hdr, mptr);
528 if (type == ALPHA_SYS_MCHECK) {
529 if (ptr->tlepaerr & TLEPAERR_WSPC_RD) {
530 printf("\tWSPC READ error\n");
531 }
532 if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) &&
533 (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) &&
534 (ptr->tlepderr & TLEPDERR_GBTMO)) {
535 printf ("\tWSPC IBOX timeout detected\n");
536 }
537 #ifdef DIAGNOSTIC
538 printf(fmt1, "TLDEV", ptr->tldev);
539 printf(fmt1, "TLSB Bus Error", ptr->tlber);
540 printf(fmt1, "TLSB CNR", ptr->tlcnr);
541 printf(fmt1, "TLSB VID", ptr->tlvid);
542 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0);
543 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1);
544 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2);
545 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3);
546 printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr);
547 printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig);
548 printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr);
549 printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr);
550 printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0);
551 printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1);
552 printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0);
553 printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1);
554 printf(fmt1, "TLSB VMG", ptr->tlep_vmg);
555 #endif
556 } else {
557 }
558
559 /*
560 * Now that we've printed all sorts of useful information
561 * and have decided that we really can't do any more to
562 * respond to the error, go on to the common code for
563 * final disposition. Usually this means that we die.
564 */
565 clear_tlsb_ebits(0);
566
567 machine_check(mces, framep, type, logout);
568 }
569
570 static void
dec_kn8ae_mcheck_handler(unsigned long mces,struct trapframe * framep,unsigned long vector,unsigned long param)571 dec_kn8ae_mcheck_handler(unsigned long mces, struct trapframe *framep, unsigned long vector, unsigned long param)
572 {
573 switch (vector) {
574 case ALPHA_SYS_ERROR:
575 kn8ae_harderr(mces, vector, param, framep);
576 break;
577
578 case ALPHA_PROC_ERROR:
579 kn8ae_softerr(mces, vector, param, framep);
580 break;
581
582 case ALPHA_SYS_MCHECK:
583 case ALPHA_PROC_MCHECK:
584 kn8ae_mcheck(mces, vector, param, framep);
585 break;
586 default:
587 printf("KN8AE_MCHECK: unknown check vector 0x%lx\n", vector);
588 machine_check(mces, framep, vector, param);
589 break;
590 }
591 }
592