xref: /onnv-gate/usr/src/uts/sun4u/starcat/io/scosmb.c (revision 11311:639e7bc0b42f)
11708Sstevel /*
21708Sstevel  * CDDL HEADER START
31708Sstevel  *
41708Sstevel  * The contents of this file are subject to the terms of the
51708Sstevel  * Common Development and Distribution License (the "License").
61708Sstevel  * You may not use this file except in compliance with the License.
71708Sstevel  *
81708Sstevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91708Sstevel  * or http://www.opensolaris.org/os/licensing.
101708Sstevel  * See the License for the specific language governing permissions
111708Sstevel  * and limitations under the License.
121708Sstevel  *
131708Sstevel  * When distributing Covered Code, include this CDDL HEADER in each
141708Sstevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151708Sstevel  * If applicable, add the following below this CDDL HEADER, with the
161708Sstevel  * fields enclosed by brackets "[]" replaced with your own identifying
171708Sstevel  * information: Portions Copyright [yyyy] [name of copyright owner]
181708Sstevel  *
191708Sstevel  * CDDL HEADER END
201708Sstevel  */
211708Sstevel 
221708Sstevel /*
23*11311SSurya.Prakki@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
241708Sstevel  * Use is subject to license terms.
251708Sstevel  */
261708Sstevel 
271708Sstevel /*
281708Sstevel  * This file contains the Starcat Solaris Mailbox Client module.  This module
291708Sstevel  * handles mailbox messages from the SC to the OS (as opposed to messages sent
301708Sstevel  * to specific drivers) and vice versa.  Two task queues are created upon
311708Sstevel  * startup; one handles reading and processing of all incoming messages, while
321708Sstevel  * the other handles transmission of all outgoing messages.
331708Sstevel  */
341708Sstevel 
351708Sstevel #include <sys/types.h>
361708Sstevel #include <sys/param.h>
371708Sstevel #include <sys/systm.h>
381708Sstevel #include <sys/sysmacros.h>
391708Sstevel #include <sys/sunddi.h>
401708Sstevel #include <sys/errno.h>
411708Sstevel #include <sys/cmn_err.h>
421708Sstevel #include <sys/condvar.h>
431708Sstevel #include <sys/mutex.h>
441708Sstevel #include <sys/disp.h>
451708Sstevel #include <sys/thread.h>
461708Sstevel #include <sys/debug.h>
471708Sstevel #include <sys/cpu_sgnblk_defs.h>
481708Sstevel #include <sys/machsystm.h>
491708Sstevel #include <sys/modctl.h>
501708Sstevel #include <sys/iosramio.h>
511708Sstevel #include <sys/mboxsc.h>
521708Sstevel #include <sys/promif.h>
531708Sstevel #include <sys/uadmin.h>
541708Sstevel #include <sys/cred.h>
551708Sstevel #include <sys/taskq.h>
561708Sstevel #include <sys/utsname.h>
571708Sstevel #include <sys/plat_ecc_unum.h>
581708Sstevel #include <sys/fm/protocol.h>
591708Sstevel #include <sys/fm/util.h>
601708Sstevel #include <sys/starcat.h>
611708Sstevel #include <sys/plat_ecc_dimm.h>
621708Sstevel #include <sys/plat_datapath.h>
631708Sstevel 
641708Sstevel /* mailbox keys */
651708Sstevel #define	SCDM_KEY	0x5343444d	/* 'S', 'C', 'D', 'M' */
661708Sstevel #define	DMSC_KEY	0x444d5343	/* 'D', 'M', 'S', 'C' */
671708Sstevel 
681708Sstevel /* mailbox commands */
691708Sstevel #define	SCDM_CMD		('S' << 8)	/* generic SSP */
701708Sstevel #define	SCDM_CMD_SUCCESS	(SCDM_CMD | 0x1)
711708Sstevel #define	SCDM_GOTO_OBP		(SCDM_CMD | 0x2)
721708Sstevel #define	SCDM_GOTO_PANIC		(SCDM_CMD | 0x3)
731708Sstevel #define	SCDM_ENVIRON		(SCDM_CMD | 0x4) /* environmental intr */
741708Sstevel #define	SCDM_SHUTDOWN		(SCDM_CMD | 0x5) /* setkeyswitch STANDBY */
751708Sstevel #define	SCDM_GET_NODENAME	(SCDM_CMD | 0x6) /* get domain nodename */
761708Sstevel #define	SCDM_LOG_ECC_ERROR	(SCDM_CMD | 0x7) /* ECC error logging */
771708Sstevel #define	SCDM_LOG_ECC_INDICTMENT	(SCDM_CMD | 0x8) /* ECC indictment logging */
781708Sstevel #define	SCDM_LOG_ECC		(SCDM_CMD | 0x9) /* ECC info */
791708Sstevel #define	SCDM_LOG_ECC_CAP_INIT	(SCDM_CMD | 0xa) /* ECC Capability Init */
801708Sstevel #define	SCDM_LOG_ECC_CAP_RESP	(SCDM_CMD | 0xb) /* ECC Capability Response */
811708Sstevel #define	SCDM_DIMM_SERIAL_ID	(SCDM_CMD | 0xc) /* DIMM ser# req/resp */
821708Sstevel #define	SCDM_DP_ERROR_MSG	(SCDM_CMD | 0xd) /* datapath error */
831708Sstevel #define	SCDM_DP_FAULT_MSG	(SCDM_CMD | 0xe) /* datapath fault */
841708Sstevel 
851708Sstevel /* general constants */
861708Sstevel #define	GETMSG_TIMEOUT_MS	500
871708Sstevel #define	PUTMSG_TIMEOUT_MS	6000
881708Sstevel #define	MIN_INPUTQ_TASKS	2
891708Sstevel #define	MAX_INPUTQ_TASKS	4
901708Sstevel #define	MIN_OUTPUTQ_TASKS	2
911708Sstevel #define	MAX_OUTPUTQ_TASKS	512
921708Sstevel #ifndef TRUE
931708Sstevel #define	TRUE	1
941708Sstevel #endif
951708Sstevel #ifndef FALSE
961708Sstevel #define	FALSE	0
971708Sstevel #endif
981708Sstevel 
991708Sstevel clock_t ecc_message_timeout_ms = PUTMSG_TIMEOUT_MS;
1001708Sstevel 
1011708Sstevel /*
1021708Sstevel  * When a message needs to be sent to the SC, an scosmb_msgdata_t should be
1031708Sstevel  * populated with the data to be used for the message, and a call to
1041708Sstevel  * scosmb_process_output should be dispatched on the scosmb_output_taskq, with
1051708Sstevel  * the address of the scosmb_msgdata_t structure as its arg.  The "length" and
1061708Sstevel  * "data" fields can be used if the message needs to include data beyond the
1071708Sstevel  * header fields (type, cmd, and transid) and that information must be recorded
1081708Sstevel  * when the message is placed on the taskq.  If appropriate for the message type
1091708Sstevel  * (e.g. nodename info that should always be the most recent available), the
1101708Sstevel  * "data" field can be set to NULL and the additional data can be assembled
1111708Sstevel  * immediately prior to sending the message in scosmb_process_output().
1121708Sstevel  *
1131708Sstevel  * If log_error is set, any errors in delivering the message cause a
1141708Sstevel  * cmn_err() message to be issued.  If it is zero, the error is expressed
1151708Sstevel  * only through return values.
1161708Sstevel  */
1171708Sstevel typedef struct {
1181708Sstevel 	uint32_t	type;
1191708Sstevel 	uint32_t	cmd;
1201708Sstevel 	uint64_t	transid;
1211708Sstevel 	uint32_t	length;
1221708Sstevel 	int		log_error;
1231708Sstevel 	void		*data;
1241708Sstevel } scosmb_msgdata_t;
1251708Sstevel 
1261708Sstevel /*
1271708Sstevel  * Datapath error and fault messages arrive unsolicited.  The message data
1281708Sstevel  * is contained in a plat_datapath_info_t structure.
1291708Sstevel  */
1301708Sstevel typedef struct {
1311708Sstevel 	uint8_t		type;		/* CDS, DX, EX, CP */
1321708Sstevel 	uint8_t		pad;		/* for alignment */
1331708Sstevel 	uint16_t	cpuid;		/* Safari ID of base CPU */
1341708Sstevel 	uint32_t	t_value;	/* SERD timeout threshold (seconds) */
1351708Sstevel } plat_datapath_info_t;
1361708Sstevel 
1371708Sstevel /* externally visible routines */
1381708Sstevel void scosmb_update_nodename(uint64_t transid);
1391708Sstevel 
1401708Sstevel /* local routines */
1411708Sstevel static void scosmb_inbox_handler();
1421708Sstevel static void scosmb_process_input(void *unused);
1431708Sstevel static int scosmb_process_output(scosmb_msgdata_t *arg);
1441708Sstevel 
1451708Sstevel /* local variables */
1461708Sstevel static uint8_t	scosmb_mboxsc_failed = FALSE;
1471708Sstevel static uint8_t	scosmb_mboxsc_timedout = FALSE;
1481708Sstevel static uint8_t	scosmb_nodename_event_pending = FALSE;
1491708Sstevel static char	scosmb_hdr[] = "SCOSMB:";
1501708Sstevel static kmutex_t scosmb_mutex;
1511708Sstevel static taskq_t	*scosmb_input_taskq = NULL;
1521708Sstevel static taskq_t	*scosmb_output_taskq = NULL;
1531708Sstevel 
1541708Sstevel static char *dperrtype[] = {
1551708Sstevel 	DP_ERROR_CDS,
1561708Sstevel 	DP_ERROR_DX,
1571708Sstevel 	DP_ERROR_EX,
1581708Sstevel 	DP_ERROR_CP
1591708Sstevel };
1601708Sstevel 
1611708Sstevel /*
1621708Sstevel  * Structures from modctl.h used for loadable module support.
1631708Sstevel  * SCOSMB is a "miscellaneous" module.
1641708Sstevel  */
1651708Sstevel extern struct mod_ops mod_miscops;
1661708Sstevel 
1671708Sstevel static struct modlmisc modlmisc = {
1681708Sstevel 	&mod_miscops,
1691708Sstevel 	"Sun Fire 15000 OS Mbox Client v1.10",
1701708Sstevel };
1711708Sstevel 
1721708Sstevel static struct modlinkage modlinkage = {
1731708Sstevel 	MODREV_1,
1741708Sstevel 	(void *)&modlmisc,
1751708Sstevel 	NULL
1761708Sstevel };
1771708Sstevel 
1781708Sstevel 
1791708Sstevel /*
1801708Sstevel  * _init
1811708Sstevel  *
1821708Sstevel  * Loadable module support routine.  Initializes mutex and condition variables
1831708Sstevel  * and starts thread.
1841708Sstevel  */
1851708Sstevel int
_init(void)1861708Sstevel _init(void)
1871708Sstevel {
1881708Sstevel 	int error;
1891708Sstevel 
1901708Sstevel 	/*
1911708Sstevel 	 * Initialize the mailboxes
1921708Sstevel 	 */
1931708Sstevel 	if ((error = mboxsc_init(SCDM_KEY, MBOXSC_MBOX_IN,
1941708Sstevel 	    scosmb_inbox_handler)) != 0) {
1951708Sstevel 		cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr,
1961708Sstevel 		    error);
1971708Sstevel 		return (error);
1981708Sstevel 	}
1991708Sstevel 
2001708Sstevel 	if ((error = mboxsc_init(DMSC_KEY, MBOXSC_MBOX_OUT, NULL)) != 0) {
2011708Sstevel 		cmn_err(CE_WARN, "%s mboxsc_init failed (0x%x)\n", scosmb_hdr,
2021708Sstevel 		    error);
203*11311SSurya.Prakki@Sun.COM 		(void) mboxsc_fini(SCDM_KEY);
2041708Sstevel 		return (error);
2051708Sstevel 	}
2061708Sstevel 
2071708Sstevel 	/*
2081708Sstevel 	 * Initialize the global lock
2091708Sstevel 	 */
2101708Sstevel 	mutex_init(&scosmb_mutex, NULL, MUTEX_DEFAULT, NULL);
2111708Sstevel 
2121708Sstevel 	/*
2131708Sstevel 	 * Create the task queues used for processing input and output messages
2141708Sstevel 	 */
2151708Sstevel 	scosmb_input_taskq = taskq_create("scosmb_input_taskq", 1,
2161708Sstevel 	    minclsyspri, MIN_INPUTQ_TASKS, MAX_INPUTQ_TASKS, TASKQ_PREPOPULATE);
2171708Sstevel 	scosmb_output_taskq = taskq_create("scosmb_output_taskq", 1,
2181708Sstevel 	    minclsyspri, MIN_OUTPUTQ_TASKS, MAX_OUTPUTQ_TASKS,
2191708Sstevel 	    TASKQ_PREPOPULATE);
2201708Sstevel 
2211708Sstevel 	/*
2221708Sstevel 	 * Attempt to install the module.  If unsuccessful, uninitialize
2231708Sstevel 	 * everything.
2241708Sstevel 	 */
2251708Sstevel 	error = mod_install(&modlinkage);
2261708Sstevel 	if (error != 0) {
2271708Sstevel 		taskq_destroy(scosmb_output_taskq);
2281708Sstevel 		taskq_destroy(scosmb_input_taskq);
2291708Sstevel 		mutex_destroy(&scosmb_mutex);
230*11311SSurya.Prakki@Sun.COM 		(void) mboxsc_fini(DMSC_KEY);
231*11311SSurya.Prakki@Sun.COM 		(void) mboxsc_fini(SCDM_KEY);
2321708Sstevel 	}
2331708Sstevel 
2341708Sstevel 	return (error);
2351708Sstevel }
2361708Sstevel 
2371708Sstevel /*
2381708Sstevel  * _fini
2391708Sstevel  *
2401708Sstevel  * Loadable module support routine. Since this routine shouldn't be unloaded (it
2411708Sstevel  * provides a critical service, and its symbols may be referenced externally),
2421708Sstevel  * EBUSY is returned to prevent unloading.
2431708Sstevel  */
2441708Sstevel int
_fini(void)2451708Sstevel _fini(void)
2461708Sstevel {
2471708Sstevel 	return (EBUSY);
2481708Sstevel }
2491708Sstevel 
2501708Sstevel /*
2511708Sstevel  * _info
2521708Sstevel  *
2531708Sstevel  * Loadable module support routine.
2541708Sstevel  */
2551708Sstevel int
_info(struct modinfo * modinfop)2561708Sstevel _info(struct modinfo *modinfop)
2571708Sstevel {
2581708Sstevel 	int		error = 0;
2591708Sstevel 
2601708Sstevel 	error = mod_info(&modlinkage, modinfop);
2611708Sstevel 	return (error);
2621708Sstevel }
2631708Sstevel 
2641708Sstevel /*
2651708Sstevel  * scosmb_inbox_handler() - mbox API event handler.
2661708Sstevel  *
2671708Sstevel  * This routine adds an entry to the scosmb_input_taskq that will cause the
2681708Sstevel  * scosmb_process_input() routine to be called to service the SCDM mailbox.  The
2691708Sstevel  * possibility that taskq_dispatch may fail when given KM_NOSLEEP is safely
2701708Sstevel  * ignored because there can only be one message waiting in the mailbox at any
2711708Sstevel  * given time, so the current message will end up being handled by one of the
2721708Sstevel  * previously queued jobs (and a previous message presumably timed out before we
2731708Sstevel  * got around to reading it).
2741708Sstevel  */
2751708Sstevel static void
scosmb_inbox_handler()2761708Sstevel scosmb_inbox_handler()
2771708Sstevel {
2781708Sstevel 	(void) taskq_dispatch(scosmb_input_taskq, scosmb_process_input, NULL,
2791708Sstevel 	    KM_NOSLEEP);
2801708Sstevel }
2811708Sstevel 
2821708Sstevel /*
2831708Sstevel  * dp_get_cores()
2841708Sstevel  *
2851708Sstevel  * Checks cpu implementation for the input cpuid and returns
2861708Sstevel  * the number of cores.
2871708Sstevel  * If implementation cannot be determined, returns 1
2881708Sstevel  */
2891708Sstevel static int
dp_get_cores(uint16_t cpuid)2901708Sstevel dp_get_cores(uint16_t cpuid)
2911708Sstevel {
2921708Sstevel 	int	exp, ii, impl = 0, nc, slot;
2931708Sstevel 
2941708Sstevel 	exp = STARCAT_CPUID_TO_EXPANDER(cpuid);
2951708Sstevel 	slot = STARCAT_CPUID_TO_BOARDSLOT(cpuid);
2961708Sstevel 	if (slot == 1)
2971708Sstevel 		nc = STARCAT_SLOT1_CPU_MAX;
2981708Sstevel 	else
2991708Sstevel 		nc = plat_max_cpu_units_per_board();
3001708Sstevel 
3011708Sstevel 	/* find first with valid implementation */
3021708Sstevel 	for (ii = 0; ii < nc; ii++)
3031708Sstevel 		if (cpu[MAKE_CPUID(exp, slot, ii)]) {
3041708Sstevel 			impl = cpunodes[MAKE_CPUID(exp, slot, ii)].
305*11311SSurya.Prakki@Sun.COM 			    implementation;
3061708Sstevel 			break;
3071708Sstevel 		}
3081708Sstevel 
3091708Sstevel 	if (IS_JAGUAR(impl) || IS_PANTHER(impl))
3101708Sstevel 		return (2);
3111708Sstevel 	else
3121708Sstevel 		return (1);
3131708Sstevel 
3141708Sstevel }
3151708Sstevel 
3161708Sstevel /*
3171708Sstevel  * dp_payload_add_cpus()
3181708Sstevel  *
3191708Sstevel  * From datapath mailbox message, determines the number of and safari IDs
3201708Sstevel  * for affected cpus, then adds this info to the datapath ereport.
3211708Sstevel  *
3221708Sstevel  * Input maxcat (if set) is a count of maxcat cpus actually present - it is
3231708Sstevel  * a count of cpuids, which takes into account multi-core architecture.
3241708Sstevel  */
3251708Sstevel static int
dp_payload_add_cpus(plat_datapath_info_t * dpmsg,nvlist_t * erp,int maxcat)3261708Sstevel dp_payload_add_cpus(plat_datapath_info_t *dpmsg, nvlist_t *erp, int maxcat)
3271708Sstevel {
3281708Sstevel 	int		jj = 0, numcpus = 0, nummaxcpus = 0;
3291708Sstevel 	int		count, exp, ii, num, ncores, ret, slot, port;
3301708Sstevel 	uint16_t	*dparray, cpuid;
3311708Sstevel 	uint64_t	*snarray;
3321708Sstevel 
3331708Sstevel 	/* check for multiple core architectures */
3341708Sstevel 	ncores = dp_get_cores(dpmsg->cpuid);
3351708Sstevel 
3361708Sstevel 	/*
3371708Sstevel 	 * Determine the number of cpu cores impacted
3381708Sstevel 	 */
3391708Sstevel 	switch (dpmsg->type) {
3401708Sstevel 		case DP_CDS_TYPE:
3411708Sstevel 			if (maxcat)
3421708Sstevel 				nummaxcpus = ncores;
3431708Sstevel 			else
3441708Sstevel 				numcpus = ncores;
3451708Sstevel 			break;
3461708Sstevel 
3471708Sstevel 		case DP_DX_TYPE:
3481708Sstevel 			if (maxcat)
3491708Sstevel 				nummaxcpus = 2 * ncores;
3501708Sstevel 			else
3511708Sstevel 				numcpus = 2 * ncores;
3521708Sstevel 			break;
3531708Sstevel 
3541708Sstevel 		case DP_EX_TYPE:
3551708Sstevel 			if (maxcat)
3561708Sstevel 				nummaxcpus = STARCAT_SLOT1_CPU_MAX;
3571708Sstevel 			else
3581708Sstevel 				numcpus = plat_max_cpu_units_per_board();
3591708Sstevel 			break;
3601708Sstevel 
3611708Sstevel 		case DP_CP_TYPE:
3621708Sstevel 			/*
3631708Sstevel 			 * SC-DE supplies the base cpuid affected, if
3641708Sstevel 			 * maxcat id was given, there's no slot 0 board
3651708Sstevel 			 * present.
3661708Sstevel 			 */
3671708Sstevel 
3681708Sstevel 			if (!maxcat) {
3691708Sstevel 				/* Slot 0 id was given - set numcpus */
3701708Sstevel 				numcpus = plat_max_cpu_units_per_board();
3711708Sstevel 			}
3721708Sstevel 
3731708Sstevel 			/* there may/may not be maxcats. set a count anyway */
3741708Sstevel 			nummaxcpus = STARCAT_SLOT1_CPU_MAX;
3751708Sstevel 
3761708Sstevel 			break;
3771708Sstevel 
3781708Sstevel 		default:
3791708Sstevel 			ASSERT(0);
3801708Sstevel 			return (-1);
3811708Sstevel 	}
3821708Sstevel 
3831708Sstevel 	/* Allocate space for cores */
3841708Sstevel 	num = numcpus + nummaxcpus;
3851708Sstevel 	dparray = kmem_zalloc(num * sizeof (uint16_t *), KM_SLEEP);
3861708Sstevel 
3871708Sstevel 	/*
3881708Sstevel 	 * populate dparray with impacted cores (only those present)
3891708Sstevel 	 */
3901708Sstevel 	exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid);
3911708Sstevel 	slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid);
3921708Sstevel 	port = STARCAT_CPUID_TO_LPORT(dpmsg->cpuid);
3931708Sstevel 
3941708Sstevel 	mutex_enter(&cpu_lock);
3951708Sstevel 
3961708Sstevel 	switch (dpmsg->type) {
3971708Sstevel 		case DP_CDS_TYPE:
3981708Sstevel 			/*
3991708Sstevel 			 * For a CDS error, it's the reporting cpuid
4001708Sstevel 			 * and it's other core (if present)
4011708Sstevel 			 */
4021708Sstevel 			cpuid = dpmsg->cpuid & 0xFFFB; 	/* core 0 */
4031708Sstevel 			if (cpu[cpuid])
4041708Sstevel 				dparray[jj++] = cpuid;
4051708Sstevel 
4061708Sstevel 			cpuid = dpmsg->cpuid | 0x4; 	/* core 1 */
4071708Sstevel 			if (cpu[cpuid])
4081708Sstevel 				dparray[jj++] = cpuid;
4091708Sstevel 			break;
4101708Sstevel 
4111708Sstevel 		case DP_DX_TYPE:
4121708Sstevel 			/*
4131708Sstevel 			 * For a DX error, it's the reporting cpuid (all
4141708Sstevel 			 * cores), and the other CPU sharing the same
4151708Sstevel 			 * DX<-->DCDS interface (all cores)
4161708Sstevel 			 */
4171708Sstevel 
4181708Sstevel 			/* reporting cpuid */
4191708Sstevel 			cpuid = dpmsg->cpuid & 0xFFFB; 	/* core 0 */
4201708Sstevel 
4211708Sstevel 			if (cpu[cpuid])
4221708Sstevel 				dparray[jj++] = cpuid;
4231708Sstevel 
4241708Sstevel 			cpuid = dpmsg->cpuid | 0x4; 	/* core 1 */
4251708Sstevel 			if (cpu[cpuid])
4261708Sstevel 				dparray[jj++] = cpuid;
4271708Sstevel 
4281708Sstevel 			/* find partner cpuid */
4291708Sstevel 			if (port == 0 || port == 2)
4301708Sstevel 				cpuid = dpmsg->cpuid | 0x1;
4311708Sstevel 			else
4321708Sstevel 				cpuid = dpmsg->cpuid & 0xFFFE;
4331708Sstevel 
4341708Sstevel 			/* add partner cpuid */
4351708Sstevel 			cpuid &= 0xFFFB; 	/* core 0 */
4361708Sstevel 			if (cpu[cpuid])
4371708Sstevel 				dparray[jj++] = cpuid;
4381708Sstevel 
4391708Sstevel 			cpuid |= 0x4; 	/* core 1 */
4401708Sstevel 			if (cpu[cpuid])
4411708Sstevel 				dparray[jj++] = cpuid;
4421708Sstevel 			break;
4431708Sstevel 
4441708Sstevel 		case DP_EX_TYPE:
4451708Sstevel 			/*
4461708Sstevel 			 * For an EX error, it is all cpuids (all cores)
4471708Sstevel 			 * on the reporting board
4481708Sstevel 			 */
4491708Sstevel 
4501708Sstevel 			if (slot == 1) 			/* maxcat */
4511708Sstevel 				count = nummaxcpus;
4521708Sstevel 			else
4531708Sstevel 				count = numcpus;
4541708Sstevel 
4551708Sstevel 			for (ii = 0; ii < count; ii++) {
4561708Sstevel 				cpuid = MAKE_CPUID(exp, slot, ii);
4571708Sstevel 				if (cpu[cpuid])
4581708Sstevel 					dparray[jj++] = cpuid;
4591708Sstevel 			}
4601708Sstevel 			break;
4611708Sstevel 
4621708Sstevel 		case DP_CP_TYPE:
4631708Sstevel 			/*
4641708Sstevel 			 * For a CP error, it is all cpuids (all cores)
4651708Sstevel 			 * on both boards (SB & IO) in the boardset
4661708Sstevel 			 */
4671708Sstevel 
4681708Sstevel 			/* Do slot 0 */
4691708Sstevel 			for (ii = 0; ii < numcpus; ii++) {
4701708Sstevel 				cpuid = MAKE_CPUID(exp, 0, ii);
4711708Sstevel 				if (cpu[cpuid])
4721708Sstevel 					dparray[jj++] = cpuid;
4731708Sstevel 			}
4741708Sstevel 
4751708Sstevel 			/* Do slot 1 */
4761708Sstevel 			for (ii = 0; ii < nummaxcpus; ii++) {
4771708Sstevel 				cpuid = MAKE_CPUID(exp, 1, ii);
4781708Sstevel 				if (cpu[cpuid])
4791708Sstevel 					dparray[jj++] = cpuid;
4801708Sstevel 			}
4811708Sstevel 			break;
4821708Sstevel 	}
4831708Sstevel 
4841708Sstevel 	mutex_exit(&cpu_lock);
4851708Sstevel 
4861708Sstevel 	/*
4871708Sstevel 	 * The datapath message could not be associated with any
4881708Sstevel 	 * configured CPU.
4891708Sstevel 	 */
4901708Sstevel 	if (!jj) {
4911708Sstevel 		kmem_free(dparray, num * sizeof (uint16_t *));
4921708Sstevel 		ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj);
4931708Sstevel 		ASSERT(ret == 0);
4941708Sstevel 		return (-1);
4951708Sstevel 	}
4961708Sstevel 
4971708Sstevel 	snarray = kmem_zalloc(jj * sizeof (uint64_t *), KM_SLEEP);
4981708Sstevel 	for (ii = 0; ii < jj; ii++)
4991708Sstevel 		snarray[ii] = cpunodes[dparray[ii]].device_id;
5001708Sstevel 
5011708Sstevel 	ret = nvlist_add_uint32(erp, DP_LIST_SIZE, jj);
5021708Sstevel 	ret |= nvlist_add_uint16_array(erp, DP_LIST, dparray, jj);
5031708Sstevel 	ret |= nvlist_add_uint64_array(erp, SN_LIST, snarray, jj);
5041708Sstevel 	ASSERT(ret == 0);
5051708Sstevel 
5061708Sstevel 	kmem_free(dparray, num * sizeof (uint16_t *));
5071708Sstevel 	kmem_free(snarray, jj * sizeof (uint64_t *));
5081708Sstevel 
5091708Sstevel 	return (0);
5101708Sstevel }
5111708Sstevel 
5121708Sstevel /*
5131708Sstevel  * dp_trans_event() - datapath message handler.
5141708Sstevel  *
5151708Sstevel  * Process datapath error and fault messages received from the SC.  Checks
5161708Sstevel  * for, and disregards, messages associated with I/O boards.  Otherwise,
5171708Sstevel  * extracts message info to produce a datapath ereport.
5181708Sstevel  */
5191708Sstevel static void
dp_trans_event(plat_datapath_info_t * dpmsg,int msgtype)5201708Sstevel dp_trans_event(plat_datapath_info_t *dpmsg, int msgtype)
5211708Sstevel {
5221708Sstevel 	nvlist_t	*erp, *detector, *hcelem;
5231708Sstevel 	char		buf[FM_MAX_CLASS];
5241708Sstevel 	int		exp, slot, i, maxcat = 0;
5251708Sstevel 
5261708Sstevel 	/* check for I/O board message */
5271708Sstevel 	exp = STARCAT_CPUID_TO_EXPANDER(dpmsg->cpuid);
5281708Sstevel 	slot = STARCAT_CPUID_TO_BOARDSLOT(dpmsg->cpuid);
5291708Sstevel 
5301708Sstevel 	if (slot) {
5311708Sstevel 		mutex_enter(&cpu_lock);
5321708Sstevel 		for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
5331708Sstevel 			if (cpu[MAKE_CPUID(exp, slot, i)]) {
5341708Sstevel 				/* maxcat cpu present */
5351708Sstevel 				maxcat++;
5361708Sstevel 			}
5371708Sstevel 		}
5381708Sstevel 		mutex_exit(&cpu_lock);
5391708Sstevel 
5401708Sstevel 		/*
5411708Sstevel 		 * Ignore I/O board msg
5421708Sstevel 		 */
5431708Sstevel 		if (maxcat == 0)
5441708Sstevel 			return;
5451708Sstevel 	}
5461708Sstevel 
5471708Sstevel 	/* allocate space for ereport */
5481708Sstevel 	erp = fm_nvlist_create(NULL);
5491708Sstevel 
5501708Sstevel 	/*
5511708Sstevel 	 *
5521708Sstevel 	 * Member Name	Data Type	   Comments
5531708Sstevel 	 * -----------	---------	   -----------
5541708Sstevel 	 * version	uint8		   0
5551708Sstevel 	 * class	string		   "asic"
5561708Sstevel 	 * ENA		uint64		   ENA Format 1
5571708Sstevel 	 * detector	fmri		   aggregated ID data for SC-DE
5581708Sstevel 	 *
5591708Sstevel 	 * Datapath ereport subclasses and data payloads:
5601708Sstevel 	 * There will be two types of ereports (error and fault) which will be
5611708Sstevel 	 * identified by the "type" member.
5621708Sstevel 	 *
5631708Sstevel 	 * ereport.asic.starcat.cds.cds-dp
5641708Sstevel 	 * ereport.asic.starcat.dx.dx-dp
5651708Sstevel 	 * ereport.asic.starcat.sdi.sdi-dp
5661708Sstevel 	 * ereport.asic.starcat.cp.cp-dp
5671708Sstevel 	 *
5681708Sstevel 	 * Member Name	Data Type	Comments
5691708Sstevel 	 * -----------	---------	-----------
5701708Sstevel 	 * erptype	uint16		derived from message type: error or
5711708Sstevel 	 *				fault
5721708Sstevel 	 * t-value	uint32		SC's datapath SERD timeout threshold
5731708Sstevel 	 * dp-list-sz	uint8		number of dp-list array elements
5741708Sstevel 	 * dp-list	array of uint16	Safari IDs of affected cpus
5751708Sstevel 	 * sn-list	array of uint64	Serial numbers of affected cpus
5761708Sstevel 	 *
5771708Sstevel 	 */
5781708Sstevel 
5791708Sstevel 	/* compose common ereport elements */
5801708Sstevel 	detector = fm_nvlist_create(NULL);
5811708Sstevel 
5821708Sstevel 	/*
5831708Sstevel 	 * Create legacy FMRI for the detector
5841708Sstevel 	 */
5851708Sstevel 	switch (dpmsg->type) {
5861708Sstevel 		case DP_CDS_TYPE:
5871708Sstevel 		case DP_DX_TYPE:
5881708Sstevel 			if (slot == 1)
5891708Sstevel 				(void) snprintf(buf, FM_MAX_CLASS, "IO%d", exp);
5901708Sstevel 			else
5911708Sstevel 				(void) snprintf(buf, FM_MAX_CLASS, "SB%d", exp);
5921708Sstevel 			break;
5931708Sstevel 
5941708Sstevel 		case DP_EX_TYPE:
5951708Sstevel 			(void) snprintf(buf, FM_MAX_CLASS, "EX%d", exp);
5961708Sstevel 			break;
5971708Sstevel 
5981708Sstevel 		case DP_CP_TYPE:
5991708Sstevel 			(void) snprintf(buf, FM_MAX_CLASS, "CP");
6001708Sstevel 			break;
6011708Sstevel 
6021708Sstevel 		default:
6031708Sstevel 			(void) snprintf(buf, FM_MAX_CLASS, "UNKNOWN");
6041708Sstevel 			break;
6051708Sstevel 	}
6061708Sstevel 
6071708Sstevel 	hcelem = fm_nvlist_create(NULL);
6081708Sstevel 
6091708Sstevel 	(void) nvlist_add_string(hcelem, FM_FMRI_HC_NAME, FM_FMRI_LEGACY_HC);
6101708Sstevel 	(void) nvlist_add_string(hcelem, FM_FMRI_HC_ID, buf);
6111708Sstevel 
6121708Sstevel 	(void) nvlist_add_uint8(detector, FM_VERSION, FM_HC_SCHEME_VERSION);
6131708Sstevel 	(void) nvlist_add_string(detector, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
6141708Sstevel 	(void) nvlist_add_string(detector, FM_FMRI_HC_ROOT, "");
6151708Sstevel 	(void) nvlist_add_uint32(detector, FM_FMRI_HC_LIST_SZ, 1);
6161708Sstevel 	(void) nvlist_add_nvlist_array(detector, FM_FMRI_HC_LIST, &hcelem, 1);
6171708Sstevel 
6181708Sstevel 	/* build ereport class name */
6191708Sstevel 	(void) snprintf(buf, FM_MAX_CLASS, "asic.starcat.%s.%s-%s",
620*11311SSurya.Prakki@Sun.COM 	    dperrtype[dpmsg->type], dperrtype[dpmsg->type],
621*11311SSurya.Prakki@Sun.COM 	    FM_ERROR_DATAPATH);
6221708Sstevel 
6231708Sstevel 	fm_ereport_set(erp, FM_EREPORT_VERSION, buf,
624*11311SSurya.Prakki@Sun.COM 	    fm_ena_generate(0, FM_ENA_FMT1), detector, NULL);
6251708Sstevel 
6261708Sstevel 	/* add payload elements */
6271708Sstevel 	if (msgtype == SCDM_DP_ERROR_MSG) {
6281708Sstevel 		fm_payload_set(erp,
629*11311SSurya.Prakki@Sun.COM 		    DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_ERROR, NULL);
6301708Sstevel 	} else {
6311708Sstevel 		fm_payload_set(erp,
632*11311SSurya.Prakki@Sun.COM 		    DP_EREPORT_TYPE, DATA_TYPE_UINT16, DP_FAULT, NULL);
6331708Sstevel 	}
6341708Sstevel 
6351708Sstevel 	fm_payload_set(erp, DP_TVALUE, DATA_TYPE_UINT32, dpmsg->t_value, NULL);
6361708Sstevel 
6371708Sstevel 	if (dp_payload_add_cpus(dpmsg, erp, maxcat) == 0) {
6381708Sstevel 		/* post ereport */
6391708Sstevel 		fm_ereport_post(erp, EVCH_SLEEP);
6401708Sstevel 	}
6411708Sstevel 
6421708Sstevel 	/* free ereport memory */
6431708Sstevel 	fm_nvlist_destroy(erp, FM_NVA_FREE);
6441708Sstevel 	fm_nvlist_destroy(detector, FM_NVA_FREE);
6451708Sstevel 
6461708Sstevel }
6471708Sstevel 
6481708Sstevel /*
6491708Sstevel  * scosmb_process_input() - incoming message processing routine
6501708Sstevel  *
6511708Sstevel  * this routine attempts to read a message from the SCDM mailbox and, if
6521708Sstevel  * successful, processes the command.  if an unrecoverable error is encountered,
6531708Sstevel  * the scosmb_task thread will be terminated.
6541708Sstevel  */
6551708Sstevel /* ARGSUSED0 */
6561708Sstevel static void
scosmb_process_input(void * unused)6571708Sstevel scosmb_process_input(void *unused)
6581708Sstevel {
6591708Sstevel 	int 			error;
6601708Sstevel 	scosmb_msgdata_t	 msg;
6611708Sstevel 	proc_t			*initpp;
6621708Sstevel 	plat_capability_data_t	*cap;	/* capability msg contents ptr */
6631708Sstevel 	int			cap_size;
6641708Sstevel 	int			cap_ver_len;
6651708Sstevel 	scosmb_msgdata_t	*cap_msgdatap; /* capability msg response */
6661708Sstevel 	int			max_size;
6671708Sstevel 
6681708Sstevel 	/*
6691708Sstevel 	 * Attempt to read a message from the SCDM mailbox.
6701708Sstevel 	 *
6711708Sstevel 	 * Setup a local buffer to read incoming messages from the SC.
6721708Sstevel 	 */
6731708Sstevel 	cap_ver_len = strlen(utsname.release) + strlen(utsname.version) + 2;
6741708Sstevel 	cap_size = sizeof (plat_capability_data_t) + cap_ver_len;
6751708Sstevel 	max_size = MAX(cap_size, sizeof (plat_dimm_sid_board_data_t));
6761708Sstevel 
6771708Sstevel 	msg.type = 0;
6781708Sstevel 	msg.cmd = 0;
6791708Sstevel 	msg.transid = 0;
6801708Sstevel 	msg.length = max_size;
6811708Sstevel 	msg.log_error = 0;
6821708Sstevel 	msg.data = kmem_zalloc(max_size, KM_SLEEP);
6831708Sstevel 
6841708Sstevel 	error = mboxsc_getmsg(SCDM_KEY, &msg.type, &msg.cmd, &msg.transid,
6851708Sstevel 	    &msg.length, msg.data, GETMSG_TIMEOUT_MS);
6861708Sstevel 
6871708Sstevel 	/*
6881708Sstevel 	 * If EAGAIN or ETIMEDOUT was received, give up.  The SC can just try
6891708Sstevel 	 * again if it was important.  If any other non-zero error was
6901708Sstevel 	 * encountered, the mailbox service is broken, and there's nothing more
6911708Sstevel 	 * we can do.
6921708Sstevel 	 */
6931708Sstevel 	mutex_enter(&scosmb_mutex);
6941708Sstevel 	if ((error == EAGAIN) || (error == ETIMEDOUT)) {
6951708Sstevel 		mutex_exit(&scosmb_mutex);
6961708Sstevel 		return;
6971708Sstevel 	} else if (error != 0) {
6981708Sstevel 		/*
6991708Sstevel 		 * The mailbox service appears to be badly broken.  If it was
7001708Sstevel 		 * working previously, generate a warning and set a flag to
7011708Sstevel 		 * avoid repeating the warning on subsequent failures.
7021708Sstevel 		 */
7031708Sstevel 		if (!scosmb_mboxsc_failed) {
7041708Sstevel 			scosmb_mboxsc_failed = TRUE;
7051708Sstevel 			cmn_err(CE_WARN, "%s mboxsc error (0x%x)\n", scosmb_hdr,
7061708Sstevel 			    error);
7071708Sstevel 		}
7081708Sstevel 		mutex_exit(&scosmb_mutex);
7091708Sstevel 		return;
7101708Sstevel 	} else {
7111708Sstevel 		/*
7121708Sstevel 		 * If the mailbox module failed previously, it appears to have
7131708Sstevel 		 * recovered, so we'll want to generate a warning if it fails
7141708Sstevel 		 * again.
7151708Sstevel 		 */
7161708Sstevel 		scosmb_mboxsc_failed = FALSE;
7171708Sstevel 	}
7181708Sstevel 	mutex_exit(&scosmb_mutex);
7191708Sstevel 
7201708Sstevel 	/*
7211708Sstevel 	 * A message was successfully received, so go ahead and process it.
7221708Sstevel 	 */
7231708Sstevel 	switch (msg.cmd) {
7241708Sstevel 
7251708Sstevel 	case SCDM_GOTO_OBP:	/* jump to OBP */
7261708Sstevel 		debug_enter("SC requested jump to OBP");
7271708Sstevel 		break;
7281708Sstevel 
7291708Sstevel 	case SCDM_GOTO_PANIC:	/* Panic the domain */
7301708Sstevel 		cmn_err(CE_PANIC, "%s SC requested PANIC\n", scosmb_hdr);
7311708Sstevel 		break;
7321708Sstevel 
7331708Sstevel 	case SCDM_SHUTDOWN:	/* graceful shutdown */
7341708Sstevel 		cmn_err(CE_WARN, "%s SC requested a shutdown ", scosmb_hdr);
7351708Sstevel 		(void) kadmin(A_SHUTDOWN, AD_HALT, NULL, kcred);
7361708Sstevel 		/*
7371708Sstevel 		 * In the event kadmin does not bring down the
7381708Sstevel 		 * domain, environmental shutdown is forced
7391708Sstevel 		 */
7401708Sstevel 		/*FALLTHROUGH*/
7411708Sstevel 	case SCDM_ENVIRON:	/* environmental shutdown */
7421708Sstevel 		/*
7431708Sstevel 		 * Send SIGPWR to init(1) it will run rc0,
7441708Sstevel 		 * which will uadmin to power down.
7451708Sstevel 		 */
7461708Sstevel 		mutex_enter(&pidlock);
7471708Sstevel 		initpp = prfind(P_INITPID);
7481708Sstevel 		mutex_exit(&pidlock);
7491708Sstevel 
7501708Sstevel 
7511708Sstevel 		/*
7521708Sstevel 		 * If we're still booting and init(1) isn't set up yet,
7531708Sstevel 		 * simply halt.
7541708Sstevel 		 */
7551708Sstevel 		if (initpp == NULL) {
7561708Sstevel 			extern void halt(char *);
7571708Sstevel 			cmn_err(CE_WARN, "%s Environmental Interrupt",
7581708Sstevel 			    scosmb_hdr);
7591708Sstevel 			power_down((char *)NULL);
7601708Sstevel 			halt("Power off the System!\n");
7611708Sstevel 		}
7621708Sstevel 
7631708Sstevel 		/*
7641708Sstevel 		 * else, graceful shutdown with inittab and all
7651708Sstevel 		 * getting involved
7661708Sstevel 		 */
7671708Sstevel 		psignal(initpp, SIGPWR);
7681708Sstevel 		break;
7691708Sstevel 
7701708Sstevel 	case SCDM_GET_NODENAME:
7711708Sstevel 		scosmb_update_nodename(msg.transid);
7721708Sstevel 		break;
7731708Sstevel 
7741708Sstevel 	case SCDM_LOG_ECC_CAP_RESP:
7751708Sstevel 		/*
7761708Sstevel 		 * The SC has responded to our initiator capability message
7771708Sstevel 		 * issued during the boot flow via scosmb_update_nodename().
7781708Sstevel 		 *
7791708Sstevel 		 * Parse the incoming data, and appropriately set SC
7801708Sstevel 		 * capabilities...
7811708Sstevel 		 */
7821708Sstevel 		cap = (plat_capability_data_t *)msg.data;
7831708Sstevel 		plat_ecc_capability_sc_set(cap->capd_capability);
7841708Sstevel 		break;
7851708Sstevel 
7861708Sstevel 	case SCDM_LOG_ECC_CAP_INIT:
7871708Sstevel 		/*
7881708Sstevel 		 * The SC has initiated a capability messaging exchange with
7891708Sstevel 		 * the OS.
7901708Sstevel 		 *
7911708Sstevel 		 * We start out just as we do for an SC response capability
7921708Sstevel 		 * message, a parse of incoming data to appropriately set SC
7931708Sstevel 		 * described capabilities...
7941708Sstevel 		 */
7951708Sstevel 		cap = (plat_capability_data_t *)msg.data;
7961708Sstevel 		plat_ecc_capability_sc_set(cap->capd_capability);
7971708Sstevel 		/*
7981708Sstevel 		 * The next step is setting up our Response to the SC.
7991708Sstevel 		 *
8001708Sstevel 		 * Allocate memory for message data, initialize appropriately,
8011708Sstevel 		 * and place a new job on the scosmb_output_taskq for
8021708Sstevel 		 * SCDM_LOG_ECC_CAP_RESP, our OS capability messaging response
8031708Sstevel 		 * to the SC initiated sequence detected here.
8041708Sstevel 		 */
8051708Sstevel 		cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP);
8061708Sstevel 		cap_msgdatap->type = MBOXSC_MSG_EVENT;
8071708Sstevel 		cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_RESP;
8081708Sstevel 		cap_msgdatap->transid = 0;
8091708Sstevel 		(void) taskq_dispatch(scosmb_output_taskq,
8101708Sstevel 		    (task_func_t *)scosmb_process_output, cap_msgdatap,
8111708Sstevel 		    KM_SLEEP);
8121708Sstevel 		break;
8131708Sstevel 
8141708Sstevel 	case SCDM_DP_ERROR_MSG:
8151708Sstevel 	case SCDM_DP_FAULT_MSG:
8161708Sstevel 		dp_trans_event(msg.data, msg.cmd);
8171708Sstevel 		break;
8181708Sstevel 
8191708Sstevel 	case SCDM_DIMM_SERIAL_ID:
8201708Sstevel 		(void) plat_store_mem_sids(msg.data);
8211708Sstevel 		break;
8221708Sstevel 
8231708Sstevel 	default:
8241708Sstevel 		cmn_err(CE_WARN, "%s invalid command (0x%x)\n", scosmb_hdr,
8251708Sstevel 		    msg.cmd);
8261708Sstevel 		break;
8271708Sstevel 	}
8281708Sstevel 
8291708Sstevel 	/*
8301708Sstevel 	 * Free up buffer for incoming messasge data that we allocated earlier
8311708Sstevel 	 */
8321708Sstevel 	kmem_free(msg.data, max_size);
8331708Sstevel }
8341708Sstevel 
8351708Sstevel /*
8361708Sstevel  * scosmb_process_output() - outgoing message processing routine
8371708Sstevel  *
8381708Sstevel  * This routine handles jobs that are queued on the scosmb_output_taskq, or
8391708Sstevel  * sent directly from scosmb_log_ecc_error.  Each job corresponds to a single
8401708Sstevel  * mailbox message that needs to be sent to the SC via the DMSC mailbox.  Some
8411708Sstevel  * processing of the message may be performed before it is sent to the SC,
8421708Sstevel  * depending on the value of the command field.
8431708Sstevel  */
8441708Sstevel static int
scosmb_process_output(scosmb_msgdata_t * msgdatap)8451708Sstevel scosmb_process_output(scosmb_msgdata_t *msgdatap)
8461708Sstevel {
8471708Sstevel 	int 			error;
8481708Sstevel 	int			length;
8491708Sstevel 	char			nodename[_SYS_NMLN];
8501708Sstevel 	void			*free_data;
8511708Sstevel 	int			free_data_len;
8521708Sstevel 	int			cap_size;
8531708Sstevel 	int			cap_ver_len;
8541708Sstevel 	plat_capability_data_t	*cap = NULL;
8551708Sstevel 
8561708Sstevel 	/*
8571708Sstevel 	 * This shouldn't ever happen, but it can't hurt to check anyway.
8581708Sstevel 	 */
8591708Sstevel 	if (msgdatap == NULL) {
8601708Sstevel 		return (EINVAL);
8611708Sstevel 	}
8621708Sstevel 
8631708Sstevel 	/*
8641708Sstevel 	 * If data was passed in, we'll need to free it before returning.
8651708Sstevel 	 */
8661708Sstevel 	free_data = msgdatap->data;
8671708Sstevel 	free_data_len = msgdatap->length;
8681708Sstevel 
8691708Sstevel 	/*
8701708Sstevel 	 * Some commands may need additional processing prior to transmission.
8711708Sstevel 	 */
8721708Sstevel 	switch (msgdatap->cmd) {
8731708Sstevel 		/*
8741708Sstevel 		 * Since the SC is only interested in the most recent value of
8751708Sstevel 		 * utsname.nodename, we wait until now to collect that data.  We
8761708Sstevel 		 * also use a global flag to prevent multiple event-type
8771708Sstevel 		 * nodename messages from being queued at the same time for the
8781708Sstevel 		 * same reason.
8791708Sstevel 		 */
8801708Sstevel 		case SCDM_GET_NODENAME:
8811708Sstevel 			mutex_enter(&scosmb_mutex);
8821708Sstevel 			length = strlen(utsname.nodename);
8831708Sstevel 			ASSERT(length < _SYS_NMLN);
8841708Sstevel 			if (length == 0) {
8851708Sstevel 				msgdatap->length = 0;
8861708Sstevel 				msgdatap->data = NULL;
8871708Sstevel 			} else {
8881708Sstevel 				bcopy(utsname.nodename, nodename, length);
8891708Sstevel 				nodename[length++] = '\0';
8901708Sstevel 				msgdatap->data = nodename;
8911708Sstevel 				msgdatap->length = length;
8921708Sstevel 			}
8931708Sstevel 			if (msgdatap->transid == 0) {
8941708Sstevel 				scosmb_nodename_event_pending = FALSE;
8951708Sstevel 			}
8961708Sstevel 			mutex_exit(&scosmb_mutex);
8971708Sstevel 			break;
8981708Sstevel 
8991708Sstevel 		/*
9001708Sstevel 		 * SCDM_LOG_ECC_CAP_INIT
9011708Sstevel 		 * Initiator Capability message from OS to SC
9021708Sstevel 		 *
9031708Sstevel 		 * We construct and send an initiator capability message
9041708Sstevel 		 * every time we go through scosmb_update_nodename(), which
9051708Sstevel 		 * works out to getting an "initiator" capability message
9061708Sstevel 		 * sent from the OS to the SC during the OS boot flow.
9071708Sstevel 		 *
9081708Sstevel 		 * The SC also issues a request to scosmb_update_nodename()
9091708Sstevel 		 * during an SC reboot.  Which results in an additional
9101708Sstevel 		 * capability message exchange during SC reboot scenarios.
9111708Sstevel 		 *
9121708Sstevel 		 * SCDM_LOG_ECC_CAP_RESP
9131708Sstevel 		 * Response Capability message from SC to OS
9141708Sstevel 		 *
9151708Sstevel 		 * In certain scenarios, the SC could initiate a capability
9161708Sstevel 		 * messaging exchange with the OS.  Processing starts in
9171708Sstevel 		 * scosmb_process_input(), where we detect an incoming
9181708Sstevel 		 * initiator capability message from the SC.  We finish
9191708Sstevel 		 * processing here, by sending a response capability message
9201708Sstevel 		 * back to the SC that reflects OS capabilities.
9211708Sstevel 		 */
9221708Sstevel 		case SCDM_LOG_ECC_CAP_INIT:
9231708Sstevel 			/*FALLTHROUGH*/
9241708Sstevel 		case SCDM_LOG_ECC_CAP_RESP:
9251708Sstevel 			mutex_enter(&scosmb_mutex);
9261708Sstevel 
9271708Sstevel 			cap_ver_len = strlen(utsname.release) +
9281708Sstevel 			    strlen(utsname.version) + 2;
9291708Sstevel 
9301708Sstevel 			cap_size = sizeof (plat_capability_data_t) +
9311708Sstevel 			    cap_ver_len;
9321708Sstevel 
9331708Sstevel 			cap =  kmem_zalloc(cap_size, KM_SLEEP);
9341708Sstevel 
9351708Sstevel 			cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR;
9361708Sstevel 			cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR;
9371708Sstevel 			cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE;
9381708Sstevel 			cap->capd_msg_length =  cap_size;
9391708Sstevel 
9401708Sstevel 			cap->capd_capability =
9411708Sstevel 			    PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT;
9421708Sstevel 
9431708Sstevel 			/*
9441708Sstevel 			 * Build the capability solaris_version string:
9451708Sstevel 			 * utsname.release + " " + utsname.version
9461708Sstevel 			 */
9471708Sstevel 			(void) snprintf(cap->capd_solaris_version,
9481708Sstevel 			    cap_ver_len, "%s %s", utsname.release,
9491708Sstevel 			    utsname.version);
9501708Sstevel 
9511708Sstevel 			/*
9521708Sstevel 			 * The capability message is constructed, now plug it
9531708Sstevel 			 * into the starcat msgdatap:
9541708Sstevel 			 */
9551708Sstevel 			msgdatap->data   = (plat_capability_data_t *)cap;
9561708Sstevel 			msgdatap->length = cap_size;
9571708Sstevel 
9581708Sstevel 			/*
9591708Sstevel 			 * Finished with initiator/response capability
9601708Sstevel 			 * message set up.
9611708Sstevel 			 *
9621708Sstevel 			 * Note that after sending an "initiator" capability
9631708Sstevel 			 * message, we can expect a subsequent "response"
9641708Sstevel 			 * capability message from the SC, which we will
9651708Sstevel 			 * pick up and minimally handle later,
9661708Sstevel 			 * in scosmb_process_input().
9671708Sstevel 			 *
9681708Sstevel 			 * If we're sending a "response" capability message
9691708Sstevel 			 * to the SC, then we're done once the message is sent.
9701708Sstevel 			 */
9711708Sstevel 
9721708Sstevel 			if (msgdatap->transid == 0) {
9731708Sstevel 				scosmb_nodename_event_pending = FALSE;
9741708Sstevel 			}
9751708Sstevel 			mutex_exit(&scosmb_mutex);
9761708Sstevel 			break;
9771708Sstevel 
9781708Sstevel 		default:
9791708Sstevel 			break;
9801708Sstevel 	}
9811708Sstevel 
9821708Sstevel 	/*
9831708Sstevel 	 * Attempt to send the message.
9841708Sstevel 	 */
9851708Sstevel 	error = mboxsc_putmsg(DMSC_KEY, msgdatap->type, msgdatap->cmd,
9861708Sstevel 	    &msgdatap->transid, msgdatap->length, msgdatap->data,
9871708Sstevel 	    ecc_message_timeout_ms);
9881708Sstevel 
9891708Sstevel 	/*
9901708Sstevel 	 * Free any allocated memory that was passed in.
9911708Sstevel 	 */
9921708Sstevel 	if (free_data != NULL) {
9931708Sstevel 		kmem_free(free_data, free_data_len);
9941708Sstevel 	}
9951708Sstevel 
9961708Sstevel 	if (cap != NULL) {
9971708Sstevel 		kmem_free(cap, cap_size);
9981708Sstevel 	}
9991708Sstevel 
10001708Sstevel 	kmem_free(msgdatap, sizeof (scosmb_msgdata_t));
10011708Sstevel 
10021708Sstevel 	/*
10031708Sstevel 	 * If EAGAIN or ETIMEDOUT was received, give up.  The sender can try
10041708Sstevel 	 * again if it was important.  If any other non-zero error was
10051708Sstevel 	 * encountered, the mailbox service is broken, and there's nothing more
10061708Sstevel 	 * we can do.
10071708Sstevel 	 */
10081708Sstevel 	mutex_enter(&scosmb_mutex);
10091708Sstevel 	if ((error == EAGAIN) || (error == ETIMEDOUT)) {
10101708Sstevel 		if (msgdatap->log_error && !scosmb_mboxsc_timedout) {
10111708Sstevel 			/*
10121708Sstevel 			 * Indictment mailbox messages use the return value to
10131708Sstevel 			 * indicate a problem in the mailbox.  For Error
10141708Sstevel 			 * mailbox messages, we'll have to use a syslog message.
10151708Sstevel 			 */
10161708Sstevel 			scosmb_mboxsc_timedout = TRUE;
10171708Sstevel 			cmn_err(CE_NOTE, "!Solaris failed to send a message "
10181708Sstevel 			    "(0x%x/0x%x) to the System Controller. Error: %d",
10191708Sstevel 			    msgdatap->type, msgdatap->cmd, error);
10201708Sstevel 		}
10211708Sstevel 	} else if (error != 0) {
10221708Sstevel 		/*
10231708Sstevel 		 * The mailbox service appears to be badly broken.  If it was
10241708Sstevel 		 * working previously, generate a warning and set a flag to
10251708Sstevel 		 * avoid repeating the warning on subsequent failures.
10261708Sstevel 		 */
10271708Sstevel 		if (msgdatap->log_error && !scosmb_mboxsc_failed) {
10281708Sstevel 			scosmb_mboxsc_failed = TRUE;
10291708Sstevel 			cmn_err(CE_NOTE, "!An internal error (%d) occurred "
10301708Sstevel 			    "while processing this message (0x%x/0x%x)",
10311708Sstevel 			    error, msgdatap->type, msgdatap->cmd);
10321708Sstevel 		}
10331708Sstevel 	} else {
10341708Sstevel 		/*
10351708Sstevel 		 * If the mailbox module failed previously, it appears to have
10361708Sstevel 		 * recovered, so we'll want to generate a warning if it fails
10371708Sstevel 		 * again.
10381708Sstevel 		 */
10391708Sstevel 		scosmb_mboxsc_failed = scosmb_mboxsc_timedout = FALSE;
10401708Sstevel 	}
10411708Sstevel 	mutex_exit(&scosmb_mutex);
10421708Sstevel 	return (error);
10431708Sstevel }
10441708Sstevel 
10451708Sstevel /*
10461708Sstevel  * scosmb_update_nodename() - nodename update routine
10471708Sstevel  *
10481708Sstevel  * this routine, which may be invoked from outside of the scosmb module, will
10491708Sstevel  * cause the current nodename to be sent to the SC.  The mailbox message sent to
10501708Sstevel  * the SC will use the indicated transaction ID, and will either be a reply
10511708Sstevel  * message if the ID is non-zero or an event message if it is 0.
10521708Sstevel  *
10531708Sstevel  * Capability messaging enhancements:
10541708Sstevel  *    Every time we move through this code flow, we put an "initiator
10551708Sstevel  *    capability message" on the message output taskq.  This action will
10561708Sstevel  *    get a capability message sent to the SC from the OS during boot
10571708Sstevel  *    scenarios.  A capability message exchange will also happen for
10581708Sstevel  *    SC reboot scenarios, as the SC will initiate a nodename update
10591708Sstevel  *    as a matter of course while coming back up.
10601708Sstevel  *
10611708Sstevel  *    We'll also get an extraneous capability message sent
10621708Sstevel  *    to the SC from time to time, but that won't hurt anything.
10631708Sstevel  */
10641708Sstevel void
scosmb_update_nodename(uint64_t transid)10651708Sstevel scosmb_update_nodename(uint64_t transid)
10661708Sstevel {
10671708Sstevel 	scosmb_msgdata_t	*msgdatap, *cap_msgdatap;
10681708Sstevel 
10691708Sstevel 	/*
10701708Sstevel 	 * If we're generating an unsolicited nodename update (presumably having
10711708Sstevel 	 * been called from platmod:plat_nodename_set()), there's no need to add
10721708Sstevel 	 * a new job to the queue if there is already one on it that will be
10731708Sstevel 	 * sending the latest nodename data.
10741708Sstevel 	 */
10751708Sstevel 	mutex_enter(&scosmb_mutex);
10761708Sstevel 	if (transid == 0) {
10771708Sstevel 		if (scosmb_nodename_event_pending) {
10781708Sstevel 			mutex_exit(&scosmb_mutex);
10791708Sstevel 			return;
10801708Sstevel 		} else {
10811708Sstevel 			scosmb_nodename_event_pending = TRUE;
10821708Sstevel 		}
10831708Sstevel 	}
10841708Sstevel 	mutex_exit(&scosmb_mutex);
10851708Sstevel 
10861708Sstevel 	/*
10871708Sstevel 	 * Allocate memory for the message data, initialize it, and place a new
10881708Sstevel 	 * job on the scosmb_output_taskq for SCDM_GET_NODENAME.
10891708Sstevel 	 */
10901708Sstevel 	msgdatap = (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t),
10911708Sstevel 	    KM_SLEEP);
10921708Sstevel 
10931708Sstevel 	msgdatap->type = (transid == 0) ? MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY;
10941708Sstevel 	msgdatap->cmd = SCDM_GET_NODENAME;
10951708Sstevel 	msgdatap->transid = transid;
10961708Sstevel 	msgdatap->log_error = 1;
10971708Sstevel 
10981708Sstevel 	(void) taskq_dispatch(scosmb_output_taskq,
10991708Sstevel 	    (task_func_t *)scosmb_process_output, msgdatap, KM_SLEEP);
11001708Sstevel 
11011708Sstevel 	/*
11021708Sstevel 	 * Next, allocate memory, initialize, and place a new job on the
11031708Sstevel 	 * scosmb_output_taskq for SCDM_LOG_ECC_CAP_INIT.  That's a
11041708Sstevel 	 * capability message, where we're the initiator.
11051708Sstevel 	 */
11061708Sstevel 	cap_msgdatap = kmem_zalloc(sizeof (scosmb_msgdata_t), KM_SLEEP);
11071708Sstevel 
11081708Sstevel 	cap_msgdatap->type = (transid == 0) ?
11091708Sstevel 	    MBOXSC_MSG_EVENT : MBOXSC_MSG_REPLY;
11101708Sstevel 	cap_msgdatap->cmd = SCDM_LOG_ECC_CAP_INIT;
11111708Sstevel 	cap_msgdatap->transid = transid;
11121708Sstevel 	cap_msgdatap->log_error = 1;
11131708Sstevel 
11141708Sstevel 	(void) taskq_dispatch(scosmb_output_taskq,
11151708Sstevel 	    (task_func_t *)scosmb_process_output, cap_msgdatap, KM_SLEEP);
11161708Sstevel }
11171708Sstevel 
11181708Sstevel /*
11191708Sstevel  * scosmb_log_ecc_error() - Record ECC error information to SC
11201708Sstevel  * For ECC error messages, send the messages through a taskq mechanism
11211708Sstevel  * to prevent impaired system performance during ECC floods.  Indictment
11221708Sstevel  * messages have already passed through a taskq, so directly call the
11231708Sstevel  * output function.
11241708Sstevel  */
11251708Sstevel int
scosmb_log_ecc_error(plat_ecc_message_type_t msg_type,void * datap)11261708Sstevel scosmb_log_ecc_error(plat_ecc_message_type_t msg_type, void *datap)
11271708Sstevel {
11281708Sstevel 	scosmb_msgdata_t	*msg_header_ptr;
11291708Sstevel 	uint32_t		msg_cmd, msg_length;
11301708Sstevel 	int			sleep_flag, log_error;
11311708Sstevel 	int			do_queue;	/* Set to 1 if taskq needed */
11321708Sstevel 
11331708Sstevel 	/*
11341708Sstevel 	 * Set header type and length for message
11351708Sstevel 	 */
11361708Sstevel 	switch (msg_type) {
11371708Sstevel 	case PLAT_ECC_ERROR_MESSAGE:
11381708Sstevel 		/*
11391708Sstevel 		 * We do not want to sleep in an error logging thread.  So,
11401708Sstevel 		 * we set the NOSLEEP flag and go through a taskq before we
11411708Sstevel 		 * send the message.
11421708Sstevel 		 */
11431708Sstevel 		msg_cmd = SCDM_LOG_ECC_ERROR;
11441708Sstevel 		msg_length = sizeof (plat_ecc_error_data_t);
11451708Sstevel 		sleep_flag = KM_NOSLEEP;
11461708Sstevel 		log_error = 1;
11471708Sstevel 		do_queue = 1;
11481708Sstevel 		break;
11491708Sstevel 	case PLAT_ECC_ERROR2_MESSAGE:
11501708Sstevel 		msg_cmd = SCDM_LOG_ECC;
11511708Sstevel 		msg_length = sizeof (plat_ecc_error2_data_t);
11521708Sstevel 		sleep_flag = KM_NOSLEEP;
11531708Sstevel 		log_error = 1;
11541708Sstevel 		do_queue = 1;
11551708Sstevel 		break;
11561708Sstevel 	case PLAT_ECC_INDICTMENT_MESSAGE:
11571708Sstevel 		/*
11581708Sstevel 		 * For indictment messages, we're allowed to sleep, and we
11591708Sstevel 		 * can directly call the output function, since we've already
11601708Sstevel 		 * gone through a taskq
11611708Sstevel 		 */
11621708Sstevel 		msg_cmd = SCDM_LOG_ECC_INDICTMENT;
11631708Sstevel 		msg_length = sizeof (plat_ecc_indictment_data_t);
11641708Sstevel 		sleep_flag = KM_SLEEP;
11651708Sstevel 		log_error = 0;
11661708Sstevel 		do_queue = 0;
11671708Sstevel 		break;
11681708Sstevel 	case PLAT_ECC_INDICTMENT2_MESSAGE:
11691708Sstevel 		/*
11701708Sstevel 		 * For indictment2 messages, we're allowed to sleep, and we
11711708Sstevel 		 * can directly call the output function, since we've already
11721708Sstevel 		 * gone through a taskq
11731708Sstevel 		 */
11741708Sstevel 		msg_cmd = SCDM_LOG_ECC;
11751708Sstevel 		msg_length = sizeof (plat_ecc_indictment2_data_t);
11761708Sstevel 		sleep_flag = KM_SLEEP;
11771708Sstevel 		log_error = 0;
11781708Sstevel 		do_queue = 0;
11791708Sstevel 		break;
11801708Sstevel 
11811708Sstevel 	case PLAT_ECC_DIMM_SID_MESSAGE:
11821708Sstevel 		/*
11831708Sstevel 		 * For DIMM sid request messages, we're allowed to sleep, and we
11841708Sstevel 		 * can directly call the output function, since we've already
11851708Sstevel 		 * gone through a taskq
11861708Sstevel 		 */
11871708Sstevel 		msg_cmd = SCDM_DIMM_SERIAL_ID;
11881708Sstevel 		msg_length = sizeof (plat_dimm_sid_request_data_t);
11891708Sstevel 		sleep_flag = KM_SLEEP;
11901708Sstevel 		log_error = 0;
11911708Sstevel 		do_queue = 0;
11921708Sstevel 		break;
11931708Sstevel 
11941708Sstevel 	default:
11951708Sstevel 		return (EINVAL);
11961708Sstevel 	}
11971708Sstevel 
11981708Sstevel 	/*
11991708Sstevel 	 * Allocate memory for the mailbox message header.
12001708Sstevel 	 */
12011708Sstevel 	msg_header_ptr =
12021708Sstevel 	    (scosmb_msgdata_t *)kmem_zalloc(sizeof (scosmb_msgdata_t),
12031708Sstevel 	    sleep_flag);
12041708Sstevel 
12051708Sstevel 	if (msg_header_ptr == NULL) {
12061708Sstevel #ifdef DEBUG
12071708Sstevel 		cmn_err(CE_WARN, "failed to allocate space for scosmb "
12081708Sstevel 		    "message header.");
12091708Sstevel #endif	/* DEBUG */
12101708Sstevel 		return (ENOMEM);
12111708Sstevel 	}
12121708Sstevel 
12131708Sstevel 	msg_header_ptr->type = MBOXSC_MSG_EVENT;
12141708Sstevel 	msg_header_ptr->cmd = msg_cmd;
12151708Sstevel 	msg_header_ptr->transid = 0;
12161708Sstevel 	msg_header_ptr->log_error = log_error;
12171708Sstevel 
12181708Sstevel 	/*
12191708Sstevel 	 * Allocate memory for the mailbox message payload.
12201708Sstevel 	 */
12211708Sstevel 	msg_header_ptr->length = msg_length;
12221708Sstevel 	msg_header_ptr->data = kmem_zalloc((size_t)msg_length, sleep_flag);
12231708Sstevel 
12241708Sstevel 	if (msg_header_ptr->data == NULL) {
12251708Sstevel #ifdef DEBUG
12261708Sstevel 		cmn_err(CE_WARN, "failed to allocate space for scosmb "
12271708Sstevel 		    "message data.");
12281708Sstevel #endif	/* DEBUG */
12291708Sstevel 		kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t));
12301708Sstevel 		return (ENOMEM);
12311708Sstevel 	}
12321708Sstevel 
12331708Sstevel 	bcopy(datap, msg_header_ptr->data, (size_t)msg_length);
12341708Sstevel 
12351708Sstevel 	/*
12361708Sstevel 	 * Based on our earlier look at the message type, we either go through
12371708Sstevel 	 * a taskq or directly call the output function.
12381708Sstevel 	 */
12391708Sstevel 	if (do_queue != 0) {
12401708Sstevel 		/*
12411708Sstevel 		 * Place a new job on the scosmb_output_taskq.
12421708Sstevel 		 */
12431708Sstevel 		if (taskq_dispatch(scosmb_output_taskq,
12441708Sstevel 		    (task_func_t *)scosmb_process_output,
12451708Sstevel 		    (void *)msg_header_ptr, TQ_NOSLEEP) == 0) {
12461708Sstevel #ifdef DEBUG
12471708Sstevel 			cmn_err(CE_WARN, "failed to dispatch a task to send "
1248*11311SSurya.Prakki@Sun.COM 			    "ECC mailbox message.");
12491708Sstevel #endif	/* DEBUG */
12501708Sstevel 			kmem_free(msg_header_ptr->data, msg_header_ptr->length);
12511708Sstevel 			kmem_free(msg_header_ptr, sizeof (scosmb_msgdata_t));
12521708Sstevel 			return (ENOMEM);
12531708Sstevel 		}
12541708Sstevel 		return (0);
12551708Sstevel 	} else {
12561708Sstevel 		return (scosmb_process_output(msg_header_ptr));
12571708Sstevel 	}
12581708Sstevel }
1259