11708Sstevel /*
21708Sstevel * CDDL HEADER START
31708Sstevel *
41708Sstevel * The contents of this file are subject to the terms of the
51708Sstevel * Common Development and Distribution License (the "License").
61708Sstevel * You may not use this file except in compliance with the License.
71708Sstevel *
81708Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91708Sstevel * or http://www.opensolaris.org/os/licensing.
101708Sstevel * See the License for the specific language governing permissions
111708Sstevel * and limitations under the License.
121708Sstevel *
131708Sstevel * When distributing Covered Code, include this CDDL HEADER in each
141708Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151708Sstevel * If applicable, add the following below this CDDL HEADER, with the
161708Sstevel * fields enclosed by brackets "[]" replaced with your own identifying
171708Sstevel * information: Portions Copyright [yyyy] [name of copyright owner]
181708Sstevel *
191708Sstevel * CDDL HEADER END
201708Sstevel */
211708Sstevel
221708Sstevel /*
23*11311SSurya.Prakki@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
241708Sstevel * Use is subject to license terms.
251708Sstevel */
261708Sstevel
271708Sstevel /*
281708Sstevel * This file contains the Starcat Solaris Mailbox Client module. This module
291708Sstevel * handles mailbox messages from the SC to the OS (as opposed to messages sent
301708Sstevel * to specific drivers) and vice versa. Two task queues are created upon
311708Sstevel * startup; one handles reading and processing of all incoming messages, while
321708Sstevel * the other handles transmission of all outgoing messages.
331708Sstevel */
341708Sstevel
351708Sstevel #include <sys/types.h>
361708Sstevel #include <sys/param.h>
371708Sstevel #include <sys/systm.h>
381708Sstevel #include <sys/sysmacros.h>
391708Sstevel #include <sys/sunddi.h>
401708Sstevel #include <sys/errno.h>
411708Sstevel #include <sys/cmn_err.h>
421708Sstevel #include <sys/condvar.h>
431708Sstevel #include <sys/mutex.h>
441708Sstevel #include <sys/disp.h>
451708Sstevel #include <sys/thread.h>
461708Sstevel #include <sys/debug.h>
471708Sstevel #include <sys/cpu_sgnblk_defs.h>
481708Sstevel #include <sys/machsystm.h>
491708Sstevel #include <sys/modctl.h>
501708Sstevel #include <sys/iosramio.h>
511708Sstevel #include <sys/mboxsc.h>
521708Sstevel #include <sys/promif.h>
531708Sstevel #include <sys/uadmin.h>
541708Sstevel #include <sys/cred.h>
551708Sstevel #include <sys/taskq.h>
561708Sstevel #include <sys/utsname.h>
571708Sstevel #include <sys/plat_ecc_unum.h>
581708Sstevel #include <sys/fm/protocol.h>
591708Sstevel #include <sys/fm/util.h>
601708Sstevel #include <sys/starcat.h>
611708Sstevel #include <sys/plat_ecc_dimm.h>
621708Sstevel #include <sys/plat_datapath.h>
631708Sstevel
641708Sstevel /* mailbox keys */
651708Sstevel #define SCDM_KEY 0x5343444d /* 'S', 'C', 'D', 'M' */
661708Sstevel #define DMSC_KEY 0x444d5343 /* 'D', 'M', 'S', 'C' */
671708Sstevel
681708Sstevel /* mailbox commands */
691708Sstevel #define SCDM_CMD ('S' << 8) /* generic SSP */
701708Sstevel #define SCDM_CMD_SUCCESS (SCDM_CMD | 0x1)
711708Sstevel #define SCDM_GOTO_OBP (SCDM_CMD | 0x2)
721708Sstevel #define SCDM_GOTO_PANIC (SCDM_CMD | 0x3)
731708Sstevel #define SCDM_ENVIRON (SCDM_CMD | 0x4) /* environmental intr */
741708Sstevel #define SCDM_SHUTDOWN (SCDM_CMD | 0x5) /* setkeyswitch STANDBY */
751708Sstevel #define SCDM_GET_NODENAME (SCDM_CMD | 0x6) /* get domain nodename */
761708Sstevel #define SCDM_LOG_ECC_ERROR (SCDM_CMD | 0x7) /* ECC error logging */
771708Sstevel #define SCDM_LOG_ECC_INDICTMENT (SCDM_CMD | 0x8) /* ECC indictment logging */
781708Sstevel #define SCDM_LOG_ECC (SCDM_CMD | 0x9) /* ECC info */
791708Sstevel #define SCDM_LOG_ECC_CAP_INIT (SCDM_CMD | 0xa) /* ECC Capability Init */
801708Sstevel #define SCDM_LOG_ECC_CAP_RESP (SCDM_CMD | 0xb) /* ECC Capability Response */
811708Sstevel #define SCDM_DIMM_SERIAL_ID (SCDM_CMD | 0xc) /* DIMM ser# req/resp */
821708Sstevel #define SCDM_DP_ERROR_MSG (SCDM_CMD | 0xd) /* datapath error */
831708Sstevel #define SCDM_DP_FAULT_MSG (SCDM_CMD | 0xe) /* datapath fault */
841708Sstevel
851708Sstevel /* general constants */
861708Sstevel #define GETMSG_TIMEOUT_MS 500
871708Sstevel #define PUTMSG_TIMEOUT_MS 6000
881708Sstevel #define MIN_INPUTQ_TASKS 2
891708Sstevel #define MAX_INPUTQ_TASKS 4
901708Sstevel #define MIN_OUTPUTQ_TASKS 2
911708Sstevel #define MAX_OUTPUTQ_TASKS 512
921708Sstevel #ifndef TRUE
931708Sstevel #define TRUE 1
941708Sstevel #endif
951708Sstevel #ifndef FALSE
961708Sstevel #define FALSE 0
971708Sstevel #endif
981708Sstevel
991708Sstevel clock_t ecc_message_timeout_ms = PUTMSG_TIMEOUT_MS;
1001708Sstevel
1011708Sstevel /*
1021708Sstevel * When a message needs to be sent to the SC, an scosmb_msgdata_t should be
1031708Sstevel * populated with the data to be used for the message, and a call to
1041708Sstevel * scosmb_process_output should be dispatched on the scosmb_output_taskq, with
1051708Sstevel * the address of the scosmb_msgdata_t structure as its arg. The "length" and
1061708Sstevel * "data" fields can be used if the message needs to include data beyond the
1071708Sstevel * header fields (type, cmd, and transid) and that information must be recorded
1081708Sstevel * when the message is placed on the taskq. If appropriate for the message type
1091708Sstevel * (e.g. nodename info that should always be the most recent available), the
1101708Sstevel * "data" field can be set to NULL and the additional data can be assembled
1111708Sstevel * immediately prior to sending the message in scosmb_process_output().
1121708Sstevel *
1131708Sstevel * If log_error is set, any errors in delivering the message cause a
1141708Sstevel * cmn_err() message to be issued. If it is zero, the error is expressed
1151708Sstevel * only through return values.
1161708Sstevel */
1171708Sstevel typedef struct {
1181708Sstevel uint32_t type;
1191708Sstevel uint32_t cmd;
1201708Sstevel uint64_t transid;
1211708Sstevel uint32_t length;
1221708Sstevel int log_error;
1231708Sstevel void *data;
1241708Sstevel } scosmb_msgdata_t;
1251708Sstevel
1261708Sstevel /*
1271708Sstevel * Datapath error and fault messages arrive unsolicited. The message data
1281708Sstevel * is contained in a plat_datapath_info_t structure.
1291708Sstevel */
1301708Sstevel typedef struct {
1311708Sstevel uint8_t type; /* CDS, DX, EX, CP */
1321708Sstevel uint8_t pad; /* for alignment */
1331708Sstevel uint16_t cpuid; /* Safari ID of base CPU */
1341708Sstevel uint32_t t_value; /* SERD timeout threshold (seconds) */
1351708Sstevel } plat_datapath_info_t;
1361708Sstevel
1371708Sstevel /* externally visible routines */
1381708Sstevel void scosmb_update_nodename(uint64_t transid);
1391708Sstevel
1401708Sstevel /* local routines */
1411708Sstevel static void scosmb_inbox_handler();
1421708Sstevel static void scosmb_process_input(void *unused);
1431708Sstevel static int scosmb_process_output(scosmb_msgdata_t *arg);
1441708Sstevel
1451708Sstevel /* local variables */
1461708Sstevel static uint8_t scosmb_mboxsc_failed = FALSE;
1471708Sstevel static uint8_t scosmb_mboxsc_timedout = FALSE;
1481708Sstevel static uint8_t scosmb_nodename_event_pending = FALSE;
1491708Sstevel static char scosmb_hdr[] = "SCOSMB:";
1501708Sstevel static kmutex_t scosmb_mutex;
1511708Sstevel static taskq_t *scosmb_input_taskq = NULL;
1521708Sstevel static taskq_t *scosmb_output_taskq = NULL;
1531708Sstevel
1541708Sstevel static char *dperrtype[] = {
1551708Sstevel DP_ERROR_CDS,
1561708Sstevel DP_ERROR_DX,
1571708Sstevel DP_ERROR_EX,
1581708Sstevel DP_ERROR_CP
1591708Sstevel };
1601708Sstevel
1611708Sstevel /*
1621708Sstevel * Structures from modctl.h used for loadable module support.
1631708Sstevel * SCOSMB is a "miscellaneous" module.
1641708Sstevel */
1651708Sstevel extern struct mod_ops mod_miscops;
1661708Sstevel
1671708Sstevel static struct modlmisc modlmisc = {
1681708Sstevel &mod_miscops,
1691708Sstevel "Sun Fire 15000 OS Mbox Client v1.10",
1701708Sstevel };
1711708Sstevel
1721708Sstevel static struct modlinkage modlinkage = {
1731708Sstevel MODREV_1,
1741708Sstevel (void *)&modlmisc,
1751708Sstevel NULL
1761708Sstevel };
1771708Sstevel
1781708Sstevel
1791708Sstevel /*
1801708Sstevel * _init
1811708Sstevel *
1821708Sstevel * Loadable module support routine. Initializes mutex and condition variables
1831708Sstevel * and starts thread.
1841708Sstevel */
1851708Sstevel int
_init(void)1861708Sstevel _init(void)
1871708Sstevel {
1881708Sstevel int error;
1891708Sstevel
1901708Sstevel /*
1911708Sstevel * Initialize the mailboxes
1921708Sstevel */
1931708Sstevel if ((error = mboxsc_init(SCDM_KEY, MBOXSC_MBOX_IN,
1941708Sstevel scosmb_inbox_handler)) != 0) {
1951708Sstevel cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr,
1961708Sstevel error);
1971708Sstevel return (error);
1981708Sstevel }
1991708Sstevel
2001708Sstevel if ((error = mboxsc_init(DMSC_KEY, MBOXSC_MBOX_OUT, NULL)) != 0) {
2011708Sstevel cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr,
2021708Sstevel error);
203*11311SSurya.Prakki@Sun.COM (void) mboxsc_fini(SCDM_KEY);
2041708Sstevel return (error);
2051708Sstevel }
2061708Sstevel
2071708Sstevel /*
2081708Sstevel * Initialize the global lock
2091708Sstevel */
2101708Sstevel mutex_init(&scosmb_mutex, NULL, MUTEX_DEFAULT, NULL);
2111708Sstevel
2121708Sstevel /*
2131708Sstevel * Create the task queues used for processing input and output messages
2141708Sstevel */
2151708Sstevel scosmb_input_taskq = taskq_create("scosmb_input_taskq", 1,
2161708Sstevel minclsyspri, MIN_INPUTQ_TASKS, MAX_INPUTQ_TASKS, TASKQ_PREPOPULATE);
2171708Sstevel scosmb_output_taskq = taskq_create("scosmb_output_taskq", 1,
2181708Sstevel minclsyspri, MIN_OUTPUTQ_TASKS, MAX_OUTPUTQ_TASKS,
2191708Sstevel TASKQ_PREPOPULATE);
2201708Sstevel
2211708Sstevel /*
2221708Sstevel * Attempt to install the module. If unsuccessful, uninitialize
2231708Sstevel * everything.
2241708Sstevel */
2251708Sstevel error = mod_install(&modlinkage);
2261708Sstevel if (error != 0) {
2271708Sstevel taskq_destroy(scosmb_output_taskq);
2281708Sstevel taskq_destroy(scosmb_input_taskq);
2291708Sstevel mutex_destroy(&scosmb_mutex);
230*11311SSurya.Prakki@Sun.COM (void) mboxsc_fini(DMSC_KEY);
231*11311SSurya.Prakki@Sun.COM (void) mboxsc_fini(SCDM_KEY);
2321708Sstevel }
2331708Sstevel
2341708Sstevel return (error);
2351708Sstevel }
2361708Sstevel
2371708Sstevel /*
2381708Sstevel * _fini
2391708Sstevel *
2401708Sstevel * Loadable module support routine. Since this routine shouldn't be unloaded (it
2411708Sstevel * provides a critical service, and its symbols may be referenced externally),
2421708Sstevel * EBUSY is returned to prevent unloading.
2431708Sstevel */
2441708Sstevel int
_fini(void)2451708Sstevel _fini(void)
2461708Sstevel {
2471708Sstevel return (EBUSY);
2481708Sstevel }
2491708Sstevel
2501708Sstevel /*
2511708Sstevel * _info
2521708Sstevel *
2531708Sstevel * Loadable module support routine.
2541708Sstevel */
2551708Sstevel int
_info(struct modinfo * modinfop)2561708Sstevel _info(struct modinfo *modinfop)
2571708Sstevel {
2581708Sstevel int error = 0;
2591708Sstevel
2601708Sstevel error = mod_info(&modlinkage, modinfop);
2611708Sstevel return (error);
2621708Sstevel }
2631708Sstevel
2641708Sstevel /*
2651708Sstevel * scosmb_inbox_handler() - mbox API event handler.
2661708Sstevel *
2671708Sstevel * This routine adds an entry to the scosmb_input_taskq that will cause the
2681708Sstevel * scosmb_process_input() routine to be called to service the SCDM mailbox. The
2691708Sstevel * possibility that taskq_dispatch may fail when given KM_NOSLEEP is safely
2701708Sstevel * ignored because there can only be one message waiting in the mailbox at any
2711708Sstevel * given time, so the current message will end up being handled by one of the
2721708Sstevel * previously queued jobs (and a previous message presumably timed out before we
2731708Sstevel * got around to reading it).
2741708Sstevel */
2751708Sstevel static void
scosmb_inbox_handler()2761708Sstevel scosmb_inbox_handler()
2771708Sstevel {
2781708Sstevel (void) taskq_dispatch(scosmb_input_taskq, scosmb_process_input, NULL,
2791708Sstevel KM_NOSLEEP);
2801708Sstevel }
2811708Sstevel
2821708Sstevel /*
2831708Sstevel * dp_get_cores()
2841708Sstevel *
2851708Sstevel * Checks cpu implementation for the input cpuid and returns
2861708Sstevel * the number of cores.
2871708Sstevel * If implementation cannot be determined, returns 1
2881708Sstevel */
2891708Sstevel static int
dp_get_cores(uint16_t cpuid)2901708Sstevel dp_get_cores(uint16_t cpuid)
2911708Sstevel {
2921708Sstevel int exp, ii, impl = 0, nc, slot;
2931708Sstevel
2941708Sstevel exp = STARCAT_CPUID_TO_EXPANDER(cpuid);
2951708Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(cpuid);
2961708Sstevel if (slot == 1)
2971708Sstevel nc = STARCAT_SLOT1_CPU_MAX;
2981708Sstevel else
2991708Sstevel nc = plat_max_cpu_units_per_board();
3001708Sstevel
3011708Sstevel /* find first with valid implementation */
3021708Sstevel for (ii = 0; ii < nc; ii++)
3031708Sstevel if (cpu[MAKE_CPUID(exp, slot, ii)]) {
3041708Sstevel impl = cpunodes[MAKE_CPUID(exp, slot, ii)].
305*11311SSurya.Prakki@Sun.COM implementation;
3061708Sstevel break;
3071708Sstevel }
3081708Sstevel
3091708Sstevel if (IS_JAGUAR(impl) || IS_PANTHER(impl))
3101708Sstevel return (2);
3111708Sstevel else
3121708Sstevel return (1);
3131708Sstevel
3141708Sstevel }
3151708Sstevel
3161708Sstevel /*
3171708Sstevel * dp_payload_add_cpus()
3181708Sstevel *
3191708Sstevel * From datapath mailbox message, determines the number of and safari IDs
3201708Sstevel * for affected cpus, then adds this info to the datapath ereport.
3211708Sstevel *
3221708Sstevel * Input maxcat (if set) is a count of maxcat cpus actually present - it is
3231708Sstevel * a count of cpuids, which takes into account multi-core architecture.
3241708Sstevel */
3251708Sstevel static int
dp_payload_add_cpus(plat_datapath_info_t * dpmsg,nvlist_t * erp,int maxcat)3261708Sstevel dp_payload_add_cpus(plat_datapath_info_t *dpmsg, nvlist_t *erp, int maxcat)
3271708Sstevel {
3281708Sstevel int jj = 0, numcpus = 0, nummaxcpus = 0;
3291708Sstevel int count, exp, ii, num, ncores, ret, slot, port;
3301708Sstevel uint16_t *dparray, cpuid;
3311708Sstevel uint64_t *snarray;
3321708Sstevel
3331708Sstevel /* check for multiple core architectures */
3341708Sstevel ncores = dp_get_cores(dpmsg->cpuid);
3351708Sstevel
3361708Sstevel /*
3371708Sstevel * Determine the number of cpu cores impacted
3381708Sstevel */
3391708Sstevel switch (dpmsg->type) {
3401708Sstevel case DP_CDS_TYPE:
3411708Sstevel if (maxcat)
3421708Sstevel nummaxcpus = ncores;
3431708Sstevel else
3441708Sstevel numcpus = ncores;
3451708Sstevel break;
3461708Sstevel
3471708Sstevel case DP_DX_TYPE:
3481708Sstevel if (maxcat)
3491708Sstevel nummaxcpus = 2 * ncores;
3501708Sstevel else
3511708Sstevel numcpus = 2 * ncores;
3521708Sstevel break;
3531708Sstevel
3541708Sstevel case DP_EX_TYPE:
3551708Sstevel if (maxcat)
3561708Sstevel nummaxcpus = STARCAT_SLOT1_CPU_MAX;
3571708Sstevel else
3581708Sstevel numcpus = plat_max_cpu_units_per_board();
3591708Sstevel break;
3601708Sstevel
3611708Sstevel case DP_CP_TYPE:
3621708Sstevel /*
3631708Sstevel * SC-DE supplies the base cpuid affected, if
3641708Sstevel * maxcat id was given, there's no slot 0 board
3651708Sstevel * present.
3661708Sstevel */
3671708Sstevel
3681708Sstevel if (!maxcat) {
3691708Sstevel /* Slot 0 id was given - set numcpus */
3701708Sstevel numcpus = plat_max_cpu_units_per_board();
3711708Sstevel }
3721708Sstevel
3731708Sstevel /* there may/may not be maxcats. set a count anyway */
3741708Sstevel nummaxcpus = STARCAT_SLOT1_CPU_MAX;
3751708Sstevel
3761708Sstevel break;
3771708Sstevel
3781708Sstevel default:
3791708Sstevel ASSERT(0);
3801708Sstevel return (-1);
3811708Sstevel }
3821708Sstevel
3831708Sstevel /* Allocate space for cores */
3841708Sstevel num = numcpus + nummaxcpus;
3851708Sstevel dparray = kmem_zalloc(num * sizeof (uint16_t *), KM_SLEEP);
3861708Sstevel
3871708Sstevel /*
3881708Sstevel * populate dparray with impacted cores (only those present)
3891708Sstevel */
3901708Sstevel exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid);
3911708Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid);
3921708Sstevel port = STARCAT_CPUID_TO_LPORT(dpmsg->cpuid);
3931708Sstevel
3941708Sstevel mutex_enter(&cpu_lock);
3951708Sstevel
3961708Sstevel switch (dpmsg->type) {
3971708Sstevel case DP_CDS_TYPE:
3981708Sstevel /*
3991708Sstevel * For a CDS error, it's the reporting cpuid
4001708Sstevel * and it's other core (if present)
4011708Sstevel */
4021708Sstevel cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */
4031708Sstevel if (cpu[cpuid])
4041708Sstevel dparray[jj++] = cpuid;
4051708Sstevel
4061708Sstevel cpuid = dpmsg->cpuid | 0x4; /* core 1 */
4071708Sstevel if (cpu[cpuid])
4081708Sstevel dparray[jj++] = cpuid;
4091708Sstevel break;
4101708Sstevel
4111708Sstevel case DP_DX_TYPE:
4121708Sstevel /*
4131708Sstevel * For a DX error, it's the reporting cpuid (all
4141708Sstevel * cores), and the other CPU sharing the same
4151708Sstevel * DX<-->DCDS interface (all cores)
4161708Sstevel */
4171708Sstevel
4181708Sstevel /* reporting cpuid */
4191708Sstevel cpuid = dpmsg->cpuid & 0xFFFB; /* core 0 */
4201708Sstevel
4211708Sstevel if (cpu[cpuid])
4221708Sstevel dparray[jj++] = cpuid;
4231708Sstevel
4241708Sstevel cpuid = dpmsg->cpuid | 0x4; /* core 1 */
4251708Sstevel if (cpu[cpuid])
4261708Sstevel dparray[jj++] = cpuid;
4271708Sstevel
4281708Sstevel /* find partner cpuid */
4291708Sstevel if (port == 0 || port == 2)
4301708Sstevel cpuid = dpmsg->cpuid | 0x1;
4311708Sstevel else
4321708Sstevel cpuid = dpmsg->cpuid & 0xFFFE;
4331708Sstevel
4341708Sstevel /* add partner cpuid */
4351708Sstevel cpuid &= 0xFFFB; /* core 0 */
4361708Sstevel if (cpu[cpuid])
4371708Sstevel dparray[jj++] = cpuid;
4381708Sstevel
4391708Sstevel cpuid |= 0x4; /* core 1 */
4401708Sstevel if (cpu[cpuid])
4411708Sstevel dparray[jj++] = cpuid;
4421708Sstevel break;
4431708Sstevel
4441708Sstevel case DP_EX_TYPE:
4451708Sstevel /*
4461708Sstevel * For an EX error, it is all cpuids (all cores)
4471708Sstevel * on the reporting board
4481708Sstevel */
4491708Sstevel
4501708Sstevel if (slot == 1) /* maxcat */
4511708Sstevel count = nummaxcpus;
4521708Sstevel else
4531708Sstevel count = numcpus;
4541708Sstevel
4551708Sstevel for (ii = 0; ii < count; ii++) {
4561708Sstevel cpuid = MAKE_CPUID(exp, slot, ii);
4571708Sstevel if (cpu[cpuid])
4581708Sstevel dparray[jj++] = cpuid;
4591708Sstevel }
4601708Sstevel break;
4611708Sstevel
4621708Sstevel case DP_CP_TYPE:
4631708Sstevel /*
4641708Sstevel * For a CP error, it is all cpuids (all cores)
4651708Sstevel * on both boards (SB & IO) in the boardset
4661708Sstevel */
4671708Sstevel
4681708Sstevel /* Do slot 0 */
4691708Sstevel for (ii = 0; ii < numcpus; ii++) {
4701708Sstevel cpuid = MAKE_CPUID(exp, 0, ii);
4711708Sstevel if (cpu[cpuid])
4721708Sstevel dparray[jj++] = cpuid;
4731708Sstevel }
4741708Sstevel
4751708Sstevel /* Do slot 1 */
4761708Sstevel for (ii = 0; ii < nummaxcpus; ii++) {
4771708Sstevel cpuid = MAKE_CPUID(exp, 1, ii);
4781708Sstevel if (cpu[cpuid])
4791708Sstevel dparray[jj++] = cpuid;
4801708Sstevel }
4811708Sstevel break;
4821708Sstevel }
4831708Sstevel
4841708Sstevel mutex_exit(&cpu_lock);
4851708Sstevel
4861708Sstevel /*
4871708Sstevel * The datapath message could not be associated with any
4881708Sstevel * configured CPU.
4891708Sstevel */
4901708Sstevel if (!jj) {
4911708Sstevel kmem_free(dparray, num * sizeof (uint16_t *));
4921708Sstevel ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj);
4931708Sstevel ASSERT(ret == 0);
4941708Sstevel return (-1);
4951708Sstevel }
4961708Sstevel
4971708Sstevel snarray = kmem_zalloc(jj * sizeof (uint64_t *), KM_SLEEP);
4981708Sstevel for (ii = 0; ii < jj; ii++)
4991708Sstevel snarray[ii] = cpunodes[dparray[ii]].device_id;
5001708Sstevel
5011708Sstevel ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj);
5021708Sstevel ret |= nvlist_add_uint16_array(erp, DP_LIST, dparray, jj);
5031708Sstevel ret |= nvlist_add_uint64_array(erp, SN_LIST, snarray, jj);
5041708Sstevel ASSERT(ret == 0);
5051708Sstevel
5061708Sstevel kmem_free(dparray, num * sizeof (uint16_t *));
5071708Sstevel kmem_free(snarray, jj * sizeof (uint64_t *));
5081708Sstevel
5091708Sstevel return (0);
5101708Sstevel }
5111708Sstevel
5121708Sstevel /*
5131708Sstevel * dp_trans_event() - datapath message handler.
5141708Sstevel *
5151708Sstevel * Process datapath error and fault messages received from the SC. Checks
5161708Sstevel * for, and disregards, messages associated with I/O boards. Otherwise,
5171708Sstevel * extracts message info to produce a datapath ereport.
5181708Sstevel */
5191708Sstevel static void
dp_trans_event(plat_datapath_info_t * dpmsg,int msgtype)5201708Sstevel dp_trans_event(plat_datapath_info_t *dpmsg, int msgtype)
5211708Sstevel {
5221708Sstevel nvlist_t *erp, *detector, *hcelem;
5231708Sstevel char buf[FM_MAX_CLASS];
5241708Sstevel int exp, slot, i, maxcat = 0;
5251708Sstevel
5261708Sstevel /* check for I/O board message */
5271708Sstevel exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid);
5281708Sstevel slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid);
5291708Sstevel
5301708Sstevel if (slot) {
5311708Sstevel mutex_enter(&cpu_lock);
5321708Sstevel for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
5331708Sstevel if (cpu[MAKE_CPUID(exp, slot, i)]) {
5341708Sstevel /* maxcat cpu present */
5351708Sstevel maxcat++;
5361708Sstevel }
5371708Sstevel }
5381708Sstevel mutex_exit(&cpu_lock);
5391708Sstevel
5401708Sstevel /*
5411708Sstevel * Ignore I/O board msg
5421708Sstevel */
5431708Sstevel if (maxcat == 0)
5441708Sstevel return;
5451708Sstevel }
5461708Sstevel
5471708Sstevel /* allocate space for ereport */
5481708Sstevel erp = fm_nvlist_create(NULL);
5491708Sstevel
5501708Sstevel /*
5511708Sstevel *
5521708Sstevel * Member Name Data Type Comments
5531708Sstevel * ----------- --------- -----------
5541708Sstevel * version uint8 0
5551708Sstevel * class string "asic"
5561708Sstevel * ENA uint64 ENA Format 1
5571708Sstevel * detector fmri aggregated ID data for SC-DE
5581708Sstevel *
5591708Sstevel * Datapath ereport subclasses and data payloads:
5601708Sstevel * There will be two types of ereports (error and fault) which will be
5611708Sstevel * identified by the "type" member.
5621708Sstevel *
5631708Sstevel * ereport.asic.starcat.cds.cds-dp
5641708Sstevel * ereport.asic.starcat.dx.dx-dp
5651708Sstevel * ereport.asic.starcat.sdi.sdi-dp
5661708Sstevel * ereport.asic.starcat.cp.cp-dp
5671708Sstevel *
5681708Sstevel * Member Name Data Type Comments
5691708Sstevel * ----------- --------- -----------
5701708Sstevel * erptype uint16 derived from message type: error or
5711708Sstevel * fault
5721708Sstevel * t-value uint32 SC's datapath SERD timeout threshold
5731708Sstevel * dp-list-sz uint8 number of dp-list array elements
5741708Sstevel * dp-list array of uint16 Safari IDs of affected cpus
5751708Sstevel * sn-list array of uint64 Serial numbers of affected cpus
5761708Sstevel *
5771708Sstevel */
5781708Sstevel
5791708Sstevel /* compose common ereport elements */
5801708Sstevel detector = fm_nvlist_create(NULL);
5811708Sstevel
5821708Sstevel /*
5831708Sstevel * Create legacy FMRI for the detector
5841708Sstevel */
5851708Sstevel switch (dpmsg->type) {
5861708Sstevel case DP_CDS_TYPE:
5871708Sstevel case DP_DX_TYPE:
5881708Sstevel if (slot == 1)
5891708Sstevel (void) snprintf(buf, FM_MAX_CLASS, "IO%d", exp);
5901708Sstevel else
5911708Sstevel (void) snprintf(buf, FM_MAX_CLASS, "SB%d", exp);
5921708Sstevel break;
5931708Sstevel
5941708Sstevel case DP_EX_TYPE:
5951708Sstevel (void) snprintf(buf, FM_MAX_CLASS, "EX%d", exp);
5961708Sstevel break;
5971708Sstevel
5981708Sstevel case DP_CP_TYPE:
5991708Sstevel (void) snprintf(buf, FM_MAX_CLASS, "CP");
6001708Sstevel break;
6011708Sstevel
6021708Sstevel default:
6031708Sstevel (void) snprintf(buf, FM_MAX_CLASS, "UNKNOWN");
6041708Sstevel break;
6051708Sstevel }
6061708Sstevel
6071708Sstevel hcelem = fm_nvlist_create(NULL);
6081708Sstevel
6091708Sstevel (void) nvlist_add_string(hcelem, FM_FMRI_HC_NAME, FM_FMRI_LEGACY_HC);
6101708Sstevel (void) nvlist_add_string(hcelem, FM_FMRI_HC_ID, buf);
6111708Sstevel
6121708Sstevel (void) nvlist_add_uint8(detector, FM_VERSION, FM_HC_SCHEME_VERSION);
6131708Sstevel (void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
6141708Sstevel (void) nvlist_add_string(detector, FM_FMRI_HC_ROOT, "");
6151708Sstevel (void) nvlist_add_uint32(detector, FM_FMRI_HC_LIST_SZ, 1);
6161708Sstevel (void) nvlist_add_nvlist_array(detector, FM_FMRI_HC_LIST, &hcelem, 1);
6171708Sstevel
6181708Sstevel /* build ereport class name */
6191708Sstevel (void) snprintf(buf, FM_MAX_CLASS, "asic.starcat.%s.%s-%s",
620*11311SSurya.Prakki@Sun.COM dperrtype[dpmsg->type], dperrtype[dpmsg->type],
621*11311SSurya.Prakki@Sun.COM FM_ERROR_DATAPATH);
6221708Sstevel
6231708Sstevel fm_ereport_set(erp, FM_EREPORT_VERSION, buf,
624*11311SSurya.Prakki@Sun.COM fm_ena_generate(0, FM_ENA_FMT1), detector, NULL);
6251708Sstevel
6261708Sstevel /* add payload elements */
6271708Sstevel if (msgtype == SCDM_DP_ERROR_MSG) {
6281708Sstevel fm_payload_set(erp,
629*11311SSurya.Prakki@Sun.COM DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_ERROR, NULL);
6301708Sstevel } else {
6311708Sstevel fm_payload_set(erp,
632*11311SSurya.Prakki@Sun.COM DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_FAULT, NULL);
6331708Sstevel }
6341708Sstevel
6351708Sstevel fm_payload_set(erp, DP_TVALUE, DATA_TYPE_UINT32, dpmsg->t_value, NULL);
6361708Sstevel
6371708Sstevel if (dp_payload_add_cpus(dpmsg, erp, maxcat) == 0) {
6381708Sstevel /* post ereport */
6391708Sstevel fm_ereport_post(erp, EVCH_SLEEP);
6401708Sstevel }
6411708Sstevel
6421708Sstevel /* free ereport memory */
6431708Sstevel fm_nvlist_destroy(erp, FM_NVA_FREE);
6441708Sstevel fm_nvlist_destroy(detector, FM_NVA_FREE);
6451708Sstevel
6461708Sstevel }
6471708Sstevel
6481708Sstevel /*
6491708Sstevel * scosmb_process_input() - incoming message processing routine
6501708Sstevel *
6511708Sstevel * this routine attempts to read a message from the SCDM mailbox and, if
6521708Sstevel * successful, processes the command. if an unrecoverable error is encountered,
6531708Sstevel * the scosmb_task thread will be terminated.
6541708Sstevel */
6551708Sstevel /* ARGSUSED0 */
6561708Sstevel static void
scosmb_process_input(void * unused)6571708Sstevel scosmb_process_input(void *unused)
6581708Sstevel {
6591708Sstevel int error;
6601708Sstevel scosmb_msgdata_t msg;
6611708Sstevel proc_t *initpp;
6621708Sstevel plat_capability_data_t *cap; /* capability msg contents ptr */
6631708Sstevel int cap_size;
6641708Sstevel int cap_ver_len;
6651708Sstevel scosmb_msgdata_t *cap_msgdatap; /* capability msg response */
6661708Sstevel int max_size;
6671708Sstevel
6681708Sstevel /*
6691708Sstevel * Attempt to read a message from the SCDM mailbox.
6701708Sstevel *
6711708Sstevel * Setup a local buffer to read incoming messages from the SC.
6721708Sstevel */
6731708Sstevel cap_ver_len = strlen(utsname.release) + strlen(utsname.version) + 2;
6741708Sstevel cap_size = sizeof (plat_capability_data_t) + cap_ver_len;
6751708Sstevel max_size = MAX(cap_size, sizeof (plat_dimm_sid_board_data_t));
6761708Sstevel
6771708Sstevel msg.type = 0;
6781708Sstevel msg.cmd = 0;
6791708Sstevel msg.transid = 0;
6801708Sstevel msg.length = max_size;
6811708Sstevel msg.log_error = 0;
6821708Sstevel msg.data = kmem_zalloc(max_size, KM_SLEEP);
6831708Sstevel
6841708Sstevel error = mboxsc_getmsg(SCDM_KEY, &msg.type, &msg.cmd, &msg.transid,
6851708Sstevel &msg.length, msg.data, GETMSG_TIMEOUT_MS);
6861708Sstevel
6871708Sstevel /*
6881708Sstevel * If EAGAIN or ETIMEDOUT was received, give up. The SC can just try
6891708Sstevel * again if it was important. If any other non-zero error was
6901708Sstevel * encountered, the mailbox service is broken, and there's nothing more
6911708Sstevel * we can do.
6921708Sstevel */
6931708Sstevel mutex_enter(&scosmb_mutex);
6941708Sstevel if ((error == EAGAIN) || (error == ETIMEDOUT)) {
6951708Sstevel mutex_exit(&scosmb_mutex);
6961708Sstevel return;
6971708Sstevel } else if (error != 0) {
6981708Sstevel /*
6991708Sstevel * The mailbox service appears to be badly broken. If it was
7001708Sstevel * working previously, generate a warning and set a flag to
7011708Sstevel * avoid repeating the warning on subsequent failures.
7021708Sstevel */
7031708Sstevel if (!scosmb_mboxsc_failed) {
7041708Sstevel scosmb_mboxsc_failed = TRUE;
7051708Sstevel cmn_err(CE_WARN, "%s mboxsc error (0x%x)\n", scosmb_hdr,
7061708Sstevel error);
7071708Sstevel }
7081708Sstevel mutex_exit(&scosmb_mutex);
7091708Sstevel return;
7101708Sstevel } else {
7111708Sstevel /*
7121708Sstevel * If the mailbox module failed previously, it appears to have
7131708Sstevel * recovered, so we'll want to generate a warning if it fails
7141708Sstevel * again.
7151708Sstevel */
7161708Sstevel scosmb_mboxsc_failed = FALSE;
7171708Sstevel }
7181708Sstevel mutex_exit(&scosmb_mutex);
7191708Sstevel
7201708Sstevel /*
7211708Sstevel * A message was successfully received, so go ahead and process it.
7221708Sstevel */
7231708Sstevel switch (msg.cmd) {
7241708Sstevel
7251708Sstevel case SCDM_GOTO_OBP: /* jump to OBP */
7261708Sstevel debug_enter("SC requested jump to OBP");
7271708Sstevel break;
7281708Sstevel
7291708Sstevel case SCDM_GOTO_PANIC: /* Panic the domain */
7301708Sstevel cmn_err(CE_PANIC, "%s SC requested PANIC\n", scosmb_hdr);
7311708Sstevel break;
7321708Sstevel
7331708Sstevel case SCDM_SHUTDOWN: /* graceful shutdown */
7341708Sstevel cmn_err(CE_WARN, "%s SC requested a shutdown ", scosmb_hdr);
7351708Sstevel (void) kadmin(A_SHUTDOWN, AD_HALT, NULL, kcred);
7361708Sstevel /*
7371708Sstevel * In the event kadmin does not bring down the
7381708Sstevel * domain, environmental shutdown is forced
7391708Sstevel */
7401708Sstevel /*FALLTHROUGH*/
7411708Sstevel case SCDM_ENVIRON: /* environmental shutdown */
7421708Sstevel /*
7431708Sstevel * Send SIGPWR to init(1) it will run rc0,
7441708Sstevel * which will uadmin to power down.
7451708Sstevel */
7461708Sstevel mutex_enter(&pidlock);
7471708Sstevel initpp = prfind(P_INITPID);
7481708Sstevel mutex_exit(&pidlock);
7491708Sstevel
7501708Sstevel
7511708Sstevel /*
7521708Sstevel * If we're still booting and init(1) isn't set up yet,
7531708Sstevel * simply halt.
7541708Sstevel */
7551708Sstevel if (initpp == NULL) {
7561708Sstevel extern void halt(char *);
7571708Sstevel cmn_err(CE_WARN, "%s Environmental Interrupt",
7581708Sstevel scosmb_hdr);
7591708Sstevel power_down((char *)NULL);
7601708Sstevel halt("Power off the System!\n");
7611708Sstevel }
7621708Sstevel
7631708Sstevel /*
7641708Sstevel * else, graceful shutdown with inittab and all
7651708Sstevel * getting involved
7661708Sstevel */
7671708Sstevel psignal(initpp, SIGPWR);
7681708Sstevel break;
7691708Sstevel
7701708Sstevel case SCDM_GET_NODENAME:
7711708Sstevel scosmb_update_nodename(msg.transid);
7721708Sstevel break;
7731708Sstevel
7741708Sstevel case SCDM_LOG_ECC_CAP_RESP:
7751708Sstevel /*
7761708Sstevel * The SC has responded to our initiator capability message
7771708Sstevel * issued during the boot flow via scosmb_update_nodename().
7781708Sstevel *
7791708Sstevel * Parse the incoming data, and appropriately set SC
7801708Sstevel * capabilities...
7811708Sstevel */
7821708Sstevel cap = (plat_capability_data_t *)msg.data;
7831708Sstevel plat_ecc_capability_sc_set(cap->capd_capability);
7841708Sstevel break;
7851708Sstevel
7861708Sstevel case SCDM_LOG_ECC_CAP_INIT:
7871708Sstevel /*
7881708Sstevel * The SC has initiated a capability messaging exchange with
7891708Sstevel * the OS.
7901708Sstevel *
7911708Sstevel * We start out just as we do for an SC response capability
7921708Sstevel * message, a parse of incoming data to appropriately set SC
7931708Sstevel * described capabilities...
7941708Sstevel */
7951708Sstevel cap = (plat_capability_data_t *)msg.data;
7961708Sstevel plat_ecc_capability_sc_set(cap->capd_capability);
7971708Sstevel /*
7981708Sstevel * The next step is setting up our Response to the SC.
7991708Sstevel *
8001708Sstevel * Allocate memory for message data, initialize appropriately,
8011708Sstevel * and place a new job on the scosmb_output_taskq for
8021708Sstevel * SCDM_LOG_ECC_CAP_RESP, our OS capability messaging response
8031708Sstevel * to the SC initiated sequence detected here.
8041708Sstevel */
8051708Sstevel cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP);
8061708Sstevel cap_msgdatap->type = MBOXSC_MSG_EVENT;
8071708Sstevel cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_RESP;
8081708Sstevel cap_msgdatap->transid = 0;
8091708Sstevel (void) taskq_dispatch(scosmb_output_taskq,
8101708Sstevel (task_func_t *)scosmb_process_output, cap_msgdatap,
8111708Sstevel KM_SLEEP);
8121708Sstevel break;
8131708Sstevel
8141708Sstevel case SCDM_DP_ERROR_MSG:
8151708Sstevel case SCDM_DP_FAULT_MSG:
8161708Sstevel dp_trans_event(msg.data, msg.cmd);
8171708Sstevel break;
8181708Sstevel
8191708Sstevel case SCDM_DIMM_SERIAL_ID:
8201708Sstevel (void) plat_store_mem_sids(msg.data);
8211708Sstevel break;
8221708Sstevel
8231708Sstevel default:
8241708Sstevel cmn_err(CE_WARN, "%s invalid command (0x%x)\n", scosmb_hdr,
8251708Sstevel msg.cmd);
8261708Sstevel break;
8271708Sstevel }
8281708Sstevel
8291708Sstevel /*
8301708Sstevel * Free up buffer for incoming messasge data that we allocated earlier
8311708Sstevel */
8321708Sstevel kmem_free(msg.data, max_size);
8331708Sstevel }
8341708Sstevel
8351708Sstevel /*
8361708Sstevel * scosmb_process_output() - outgoing message processing routine
8371708Sstevel *
8381708Sstevel * This routine handles jobs that are queued on the scosmb_output_taskq, or
8391708Sstevel * sent directly from scosmb_log_ecc_error. Each job corresponds to a single
8401708Sstevel * mailbox message that needs to be sent to the SC via the DMSC mailbox. Some
8411708Sstevel * processing of the message may be performed before it is sent to the SC,
8421708Sstevel * depending on the value of the command field.
8431708Sstevel */
8441708Sstevel static int
scosmb_process_output(scosmb_msgdata_t * msgdatap)8451708Sstevel scosmb_process_output(scosmb_msgdata_t *msgdatap)
8461708Sstevel {
8471708Sstevel int error;
8481708Sstevel int length;
8491708Sstevel char nodename[_SYS_NMLN];
8501708Sstevel void *free_data;
8511708Sstevel int free_data_len;
8521708Sstevel int cap_size;
8531708Sstevel int cap_ver_len;
8541708Sstevel plat_capability_data_t *cap = NULL;
8551708Sstevel
8561708Sstevel /*
8571708Sstevel * This shouldn't ever happen, but it can't hurt to check anyway.
8581708Sstevel */
8591708Sstevel if (msgdatap == NULL) {
8601708Sstevel return (EINVAL);
8611708Sstevel }
8621708Sstevel
8631708Sstevel /*
8641708Sstevel * If data was passed in, we'll need to free it before returning.
8651708Sstevel */
8661708Sstevel free_data = msgdatap->data;
8671708Sstevel free_data_len = msgdatap->length;
8681708Sstevel
8691708Sstevel /*
8701708Sstevel * Some commands may need additional processing prior to transmission.
8711708Sstevel */
8721708Sstevel switch (msgdatap->cmd) {
8731708Sstevel /*
8741708Sstevel * Since the SC is only interested in the most recent value of
8751708Sstevel * utsname.nodename, we wait until now to collect that data. We
8761708Sstevel * also use a global flag to prevent multiple event-type
8771708Sstevel * nodename messages from being queued at the same time for the
8781708Sstevel * same reason.
8791708Sstevel */
8801708Sstevel case SCDM_GET_NODENAME:
8811708Sstevel mutex_enter(&scosmb_mutex);
8821708Sstevel length = strlen(utsname.nodename);
8831708Sstevel ASSERT(length < _SYS_NMLN);
8841708Sstevel if (length == 0) {
8851708Sstevel msgdatap->length = 0;
8861708Sstevel msgdatap->data = NULL;
8871708Sstevel } else {
8881708Sstevel bcopy(utsname.nodename, nodename, length);
8891708Sstevel nodename[length++] = '\0';
8901708Sstevel msgdatap->data = nodename;
8911708Sstevel msgdatap->length = length;
8921708Sstevel }
8931708Sstevel if (msgdatap->transid == 0) {
8941708Sstevel scosmb_nodename_event_pending = FALSE;
8951708Sstevel }
8961708Sstevel mutex_exit(&scosmb_mutex);
8971708Sstevel break;
8981708Sstevel
8991708Sstevel /*
9001708Sstevel * SCDM_LOG_ECC_CAP_INIT
9011708Sstevel * Initiator Capability message from OS to SC
9021708Sstevel *
9031708Sstevel * We construct and send an initiator capability message
9041708Sstevel * every time we go through scosmb_update_nodename(), which
9051708Sstevel * works out to getting an "initiator" capability message
9061708Sstevel * sent from the OS to the SC during the OS boot flow.
9071708Sstevel *
9081708Sstevel * The SC also issues a request to scosmb_update_nodename()
9091708Sstevel * during an SC reboot. Which results in an additional
9101708Sstevel * capability message exchange during SC reboot scenarios.
9111708Sstevel *
9121708Sstevel * SCDM_LOG_ECC_CAP_RESP
9131708Sstevel * Response Capability message from SC to OS
9141708Sstevel *
9151708Sstevel * In certain scenarios, the SC could initiate a capability
9161708Sstevel * messaging exchange with the OS. Processing starts in
9171708Sstevel * scosmb_process_input(), where we detect an incoming
9181708Sstevel * initiator capability message from the SC. We finish
9191708Sstevel * processing here, by sending a response capability message
9201708Sstevel * back to the SC that reflects OS capabilities.
9211708Sstevel */
9221708Sstevel case SCDM_LOG_ECC_CAP_INIT:
9231708Sstevel /*FALLTHROUGH*/
9241708Sstevel case SCDM_LOG_ECC_CAP_RESP:
9251708Sstevel mutex_enter(&scosmb_mutex);
9261708Sstevel
9271708Sstevel cap_ver_len = strlen(utsname.release) +
9281708Sstevel strlen(utsname.version) + 2;
9291708Sstevel
9301708Sstevel cap_size = sizeof (plat_capability_data_t) +
9311708Sstevel cap_ver_len;
9321708Sstevel
9331708Sstevel cap = kmem_zalloc(cap_size, KM_SLEEP);
9341708Sstevel
9351708Sstevel cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR;
9361708Sstevel cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR;
9371708Sstevel cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE;
9381708Sstevel cap->capd_msg_length = cap_size;
9391708Sstevel
9401708Sstevel cap->capd_capability =
9411708Sstevel PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT;
9421708Sstevel
9431708Sstevel /*
9441708Sstevel * Build the capability solaris_version string:
9451708Sstevel * utsname.release + " " + utsname.version
9461708Sstevel */
9471708Sstevel (void) snprintf(cap->capd_solaris_version,
9481708Sstevel cap_ver_len, "%s %s", utsname.release,
9491708Sstevel utsname.version);
9501708Sstevel
9511708Sstevel /*
9521708Sstevel * The capability message is constructed, now plug it
9531708Sstevel * into the starcat msgdatap:
9541708Sstevel */
9551708Sstevel msgdatap->data = (plat_capability_data_t *)cap;
9561708Sstevel msgdatap->length = cap_size;
9571708Sstevel
9581708Sstevel /*
9591708Sstevel * Finished with initiator/response capability
9601708Sstevel * message set up.
9611708Sstevel *
9621708Sstevel * Note that after sending an "initiator" capability
9631708Sstevel * message, we can expect a subsequent "response"
9641708Sstevel * capability message from the SC, which we will
9651708Sstevel * pick up and minimally handle later,
9661708Sstevel * in scosmb_process_input().
9671708Sstevel *
9681708Sstevel * If we're sending a "response" capability message
9691708Sstevel * to the SC, then we're done once the message is sent.
9701708Sstevel */
9711708Sstevel
9721708Sstevel if (msgdatap->transid == 0) {
9731708Sstevel scosmb_nodename_event_pending = FALSE;
9741708Sstevel }
9751708Sstevel mutex_exit(&scosmb_mutex);
9761708Sstevel break;
9771708Sstevel
9781708Sstevel default:
9791708Sstevel break;
9801708Sstevel }
9811708Sstevel
9821708Sstevel /*
9831708Sstevel * Attempt to send the message.
9841708Sstevel */
9851708Sstevel error = mboxsc_putmsg(DMSC_KEY, msgdatap->type, msgdatap->cmd,
9861708Sstevel &msgdatap->transid, msgdatap->length, msgdatap->data,
9871708Sstevel ecc_message_timeout_ms);
9881708Sstevel
9891708Sstevel /*
9901708Sstevel * Free any allocated memory that was passed in.
9911708Sstevel */
9921708Sstevel if (free_data != NULL) {
9931708Sstevel kmem_free(free_data, free_data_len);
9941708Sstevel }
9951708Sstevel
9961708Sstevel if (cap != NULL) {
9971708Sstevel kmem_free(cap, cap_size);
9981708Sstevel }
9991708Sstevel
10001708Sstevel kmem_free(msgdatap, sizeof (scosmb_msgdata_t));
10011708Sstevel
10021708Sstevel /*
10031708Sstevel * If EAGAIN or ETIMEDOUT was received, give up. The sender can try
10041708Sstevel * again if it was important. If any other non-zero error was
10051708Sstevel * encountered, the mailbox service is broken, and there's nothing more
10061708Sstevel * we can do.
10071708Sstevel */
10081708Sstevel mutex_enter(&scosmb_mutex);
10091708Sstevel if ((error == EAGAIN) || (error == ETIMEDOUT)) {
10101708Sstevel if (msgdatap->log_error && !scosmb_mboxsc_timedout) {
10111708Sstevel /*
10121708Sstevel * Indictment mailbox messages use the return value to
10131708Sstevel * indicate a problem in the mailbox. For Error
10141708Sstevel * mailbox messages, we'll have to use a syslog message.
10151708Sstevel */
10161708Sstevel scosmb_mboxsc_timedout = TRUE;
10171708Sstevel cmn_err(CE_NOTE, "!Solaris failed to send a message "
10181708Sstevel "(0x%x/0x%x) to the System Controller. Error: %d",
10191708Sstevel msgdatap->type, msgdatap->cmd, error);
10201708Sstevel }
10211708Sstevel } else if (error != 0) {
10221708Sstevel /*
10231708Sstevel * The mailbox service appears to be badly broken. If it was
10241708Sstevel * working previously, generate a warning and set a flag to
10251708Sstevel * avoid repeating the warning on subsequent failures.
10261708Sstevel */
10271708Sstevel if (msgdatap->log_error && !scosmb_mboxsc_failed) {
10281708Sstevel scosmb_mboxsc_failed = TRUE;
10291708Sstevel cmn_err(CE_NOTE, "!An internal error (%d) occurred "
10301708Sstevel "while processing this message (0x%x/0x%x)",
10311708Sstevel error, msgdatap->type, msgdatap->cmd);
10321708Sstevel }
10331708Sstevel } else {
10341708Sstevel /*
10351708Sstevel * If the mailbox module failed previously, it appears to have
10361708Sstevel * recovered, so we'll want to generate a warning if it fails
10371708Sstevel * again.
10381708Sstevel */
10391708Sstevel scosmb_mboxsc_failed = scosmb_mboxsc_timedout = FALSE;
10401708Sstevel }
10411708Sstevel mutex_exit(&scosmb_mutex);
10421708Sstevel return (error);
10431708Sstevel }
10441708Sstevel
10451708Sstevel /*
10461708Sstevel * scosmb_update_nodename() - nodename update routine
10471708Sstevel *
10481708Sstevel * this routine, which may be invoked from outside of the scosmb module, will
10491708Sstevel * cause the current nodename to be sent to the SC. The mailbox message sent to
10501708Sstevel * the SC will use the indicated transaction ID, and will either be a reply
10511708Sstevel * message if the ID is non-zero or an event message if it is 0.
10521708Sstevel *
10531708Sstevel * Capability messaging enhancements:
10541708Sstevel * Every time we move through this code flow, we put an "initiator
10551708Sstevel * capability message" on the message output taskq. This action will
10561708Sstevel * get a capability message sent to the SC from the OS during boot
10571708Sstevel * scenarios. A capability message exchange will also happen for
10581708Sstevel * SC reboot scenarios, as the SC will initiate a nodename update
10591708Sstevel * as a matter of course while coming back up.
10601708Sstevel *
10611708Sstevel * We'll also get an extraneous capability message sent
10621708Sstevel * to the SC from time to time, but that won't hurt anything.
10631708Sstevel */
10641708Sstevel void
scosmb_update_nodename(uint64_t transid)10651708Sstevel scosmb_update_nodename(uint64_t transid)
10661708Sstevel {
10671708Sstevel scosmb_msgdata_t *msgdatap, *cap_msgdatap;
10681708Sstevel
10691708Sstevel /*
10701708Sstevel * If we're generating an unsolicited nodename update (presumably having
10711708Sstevel * been called from platmod:plat_nodename_set()), there's no need to add
10721708Sstevel * a new job to the queue if there is already one on it that will be
10731708Sstevel * sending the latest nodename data.
10741708Sstevel */
10751708Sstevel mutex_enter(&scosmb_mutex);
10761708Sstevel if (transid == 0) {
10771708Sstevel if (scosmb_nodename_event_pending) {
10781708Sstevel mutex_exit(&scosmb_mutex);
10791708Sstevel return;
10801708Sstevel } else {
10811708Sstevel scosmb_nodename_event_pending = TRUE;
10821708Sstevel }
10831708Sstevel }
10841708Sstevel mutex_exit(&scosmb_mutex);
10851708Sstevel
10861708Sstevel /*
10871708Sstevel * Allocate memory for the message data, initialize it, and place a new
10881708Sstevel * job on the scosmb_output_taskq for SCDM_GET_NODENAME.
10891708Sstevel */
10901708Sstevel msgdatap = (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t),
10911708Sstevel KM_SLEEP);
10921708Sstevel
10931708Sstevel msgdatap->type = (transid == 0) ? MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY;
10941708Sstevel msgdatap->cmd = SCDM_GET_NODENAME;
10951708Sstevel msgdatap->transid = transid;
10961708Sstevel msgdatap->log_error = 1;
10971708Sstevel
10981708Sstevel (void) taskq_dispatch(scosmb_output_taskq,
10991708Sstevel (task_func_t *)scosmb_process_output, msgdatap, KM_SLEEP);
11001708Sstevel
11011708Sstevel /*
11021708Sstevel * Next, allocate memory, initialize, and place a new job on the
11031708Sstevel * scosmb_output_taskq for SCDM_LOG_ECC_CAP_INIT. That's a
11041708Sstevel * capability message, where we're the initiator.
11051708Sstevel */
11061708Sstevel cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP);
11071708Sstevel
11081708Sstevel cap_msgdatap->type = (transid == 0) ?
11091708Sstevel MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY;
11101708Sstevel cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_INIT;
11111708Sstevel cap_msgdatap->transid = transid;
11121708Sstevel cap_msgdatap->log_error = 1;
11131708Sstevel
11141708Sstevel (void) taskq_dispatch(scosmb_output_taskq,
11151708Sstevel (task_func_t *)scosmb_process_output, cap_msgdatap, KM_SLEEP);
11161708Sstevel }
11171708Sstevel
11181708Sstevel /*
11191708Sstevel * scosmb_log_ecc_error() - Record ECC error information to SC
11201708Sstevel * For ECC error messages, send the messages through a taskq mechanism
11211708Sstevel * to prevent impaired system performance during ECC floods. Indictment
11221708Sstevel * messages have already passed through a taskq, so directly call the
11231708Sstevel * output function.
11241708Sstevel */
11251708Sstevel int
scosmb_log_ecc_error(plat_ecc_message_type_t msg_type,void * datap)11261708Sstevel scosmb_log_ecc_error(plat_ecc_message_type_t msg_type, void *datap)
11271708Sstevel {
11281708Sstevel scosmb_msgdata_t *msg_header_ptr;
11291708Sstevel uint32_t msg_cmd, msg_length;
11301708Sstevel int sleep_flag, log_error;
11311708Sstevel int do_queue; /* Set to 1 if taskq needed */
11321708Sstevel
11331708Sstevel /*
11341708Sstevel * Set header type and length for message
11351708Sstevel */
11361708Sstevel switch (msg_type) {
11371708Sstevel case PLAT_ECC_ERROR_MESSAGE:
11381708Sstevel /*
11391708Sstevel * We do not want to sleep in an error logging thread. So,
11401708Sstevel * we set the NOSLEEP flag and go through a taskq before we
11411708Sstevel * send the message.
11421708Sstevel */
11431708Sstevel msg_cmd = SCDM_LOG_ECC_ERROR;
11441708Sstevel msg_length = sizeof (plat_ecc_error_data_t);
11451708Sstevel sleep_flag = KM_NOSLEEP;
11461708Sstevel log_error = 1;
11471708Sstevel do_queue = 1;
11481708Sstevel break;
11491708Sstevel case PLAT_ECC_ERROR2_MESSAGE:
11501708Sstevel msg_cmd = SCDM_LOG_ECC;
11511708Sstevel msg_length = sizeof (plat_ecc_error2_data_t);
11521708Sstevel sleep_flag = KM_NOSLEEP;
11531708Sstevel log_error = 1;
11541708Sstevel do_queue = 1;
11551708Sstevel break;
11561708Sstevel case PLAT_ECC_INDICTMENT_MESSAGE:
11571708Sstevel /*
11581708Sstevel * For indictment messages, we're allowed to sleep, and we
11591708Sstevel * can directly call the output function, since we've already
11601708Sstevel * gone through a taskq
11611708Sstevel */
11621708Sstevel msg_cmd = SCDM_LOG_ECC_INDICTMENT;
11631708Sstevel msg_length = sizeof (plat_ecc_indictment_data_t);
11641708Sstevel sleep_flag = KM_SLEEP;
11651708Sstevel log_error = 0;
11661708Sstevel do_queue = 0;
11671708Sstevel break;
11681708Sstevel case PLAT_ECC_INDICTMENT2_MESSAGE:
11691708Sstevel /*
11701708Sstevel * For indictment2 messages, we're allowed to sleep, and we
11711708Sstevel * can directly call the output function, since we've already
11721708Sstevel * gone through a taskq
11731708Sstevel */
11741708Sstevel msg_cmd = SCDM_LOG_ECC;
11751708Sstevel msg_length = sizeof (plat_ecc_indictment2_data_t);
11761708Sstevel sleep_flag = KM_SLEEP;
11771708Sstevel log_error = 0;
11781708Sstevel do_queue = 0;
11791708Sstevel break;
11801708Sstevel
11811708Sstevel case PLAT_ECC_DIMM_SID_MESSAGE:
11821708Sstevel /*
11831708Sstevel * For DIMM sid request messages, we're allowed to sleep, and we
11841708Sstevel * can directly call the output function, since we've already
11851708Sstevel * gone through a taskq
11861708Sstevel */
11871708Sstevel msg_cmd = SCDM_DIMM_SERIAL_ID;
11881708Sstevel msg_length = sizeof (plat_dimm_sid_request_data_t);
11891708Sstevel sleep_flag = KM_SLEEP;
11901708Sstevel log_error = 0;
11911708Sstevel do_queue = 0;
11921708Sstevel break;
11931708Sstevel
11941708Sstevel default:
11951708Sstevel return (EINVAL);
11961708Sstevel }
11971708Sstevel
11981708Sstevel /*
11991708Sstevel * Allocate memory for the mailbox message header.
12001708Sstevel */
12011708Sstevel msg_header_ptr =
12021708Sstevel (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t),
12031708Sstevel sleep_flag);
12041708Sstevel
12051708Sstevel if (msg_header_ptr == NULL) {
12061708Sstevel #ifdef DEBUG
12071708Sstevel cmn_err(CE_WARN, "failed to allocate space for scosmb "
12081708Sstevel "message header.");
12091708Sstevel #endif /* DEBUG */
12101708Sstevel return (ENOMEM);
12111708Sstevel }
12121708Sstevel
12131708Sstevel msg_header_ptr->type = MBOXSC_MSG_EVENT;
12141708Sstevel msg_header_ptr->cmd = msg_cmd;
12151708Sstevel msg_header_ptr->transid = 0;
12161708Sstevel msg_header_ptr->log_error = log_error;
12171708Sstevel
12181708Sstevel /*
12191708Sstevel * Allocate memory for the mailbox message payload.
12201708Sstevel */
12211708Sstevel msg_header_ptr->length = msg_length;
12221708Sstevel msg_header_ptr->data = kmem_zalloc((size_t)msg_length, sleep_flag);
12231708Sstevel
12241708Sstevel if (msg_header_ptr->data == NULL) {
12251708Sstevel #ifdef DEBUG
12261708Sstevel cmn_err(CE_WARN, "failed to allocate space for scosmb "
12271708Sstevel "message data.");
12281708Sstevel #endif /* DEBUG */
12291708Sstevel kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t));
12301708Sstevel return (ENOMEM);
12311708Sstevel }
12321708Sstevel
12331708Sstevel bcopy(datap, msg_header_ptr->data, (size_t)msg_length);
12341708Sstevel
12351708Sstevel /*
12361708Sstevel * Based on our earlier look at the message type, we either go through
12371708Sstevel * a taskq or directly call the output function.
12381708Sstevel */
12391708Sstevel if (do_queue != 0) {
12401708Sstevel /*
12411708Sstevel * Place a new job on the scosmb_output_taskq.
12421708Sstevel */
12431708Sstevel if (taskq_dispatch(scosmb_output_taskq,
12441708Sstevel (task_func_t *)scosmb_process_output,
12451708Sstevel (void *)msg_header_ptr, TQ_NOSLEEP) == 0) {
12461708Sstevel #ifdef DEBUG
12471708Sstevel cmn_err(CE_WARN, "failed to dispatch a task to send "
1248*11311SSurya.Prakki@Sun.COM "ECC mailbox message.");
12491708Sstevel #endif /* DEBUG */
12501708Sstevel kmem_free(msg_header_ptr->data, msg_header_ptr->length);
12511708Sstevel kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t));
12521708Sstevel return (ENOMEM);
12531708Sstevel }
12541708Sstevel return (0);
12551708Sstevel } else {
12561708Sstevel return (scosmb_process_output(msg_header_ptr));
12571708Sstevel }
12581708Sstevel }
1259