xref: /onnv-gate/usr/src/cmd/fm/fmd/common/fmd_case.c (revision 12967:ab9ae749152f)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51303Swesolows  * Common Development and Distribution License (the "License").
61303Swesolows  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211193Smws 
220Sstevel@tonic-gate /*
23*12967Sgavin.maltby@oracle.com  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
261193Smws /*
271193Smws  * FMD Case Subsystem
281193Smws  *
291193Smws  * Diagnosis engines are expected to group telemetry events related to the
301193Smws  * diagnosis of a particular problem on the system into a set of cases.  The
311193Smws  * diagnosis engine may have any number of cases open at a given point in time.
321193Smws  * Some cases may eventually be *solved* by associating a suspect list of one
331193Smws  * or more problems with the case, at which point fmd publishes a list.suspect
341193Smws  * event for the case and it becomes visible to administrators and agents.
351193Smws  *
361193Smws  * Every case is named using a UUID, and is globally visible in the case hash.
371193Smws  * Cases are reference-counted, except for the reference from the case hash
381193Smws  * itself.  Consumers of case references include modules, which store active
391193Smws  * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
401193Smws  *
411193Smws  * Cases obey the following state machine.  In states UNSOLVED, SOLVED, and
421193Smws  * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
431193Smws  * or transport) and the case is referenced by the mod_cases list.  Once the
441193Smws  * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
451193Smws  * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
461193Smws  *
471193Smws  *			+------------+
481193Smws  *	     +----------|  UNSOLVED  |
491193Smws  *	     |		+------------+
507275Sstephh  *	     |		      1 |
517275Sstephh  *	     |			|
527275Sstephh  *	     |		+-------v----+
537275Sstephh  *	   2 |		|    SOLVED  |
547275Sstephh  *	     |		+------------+
557275Sstephh  *	     |		    3 |  5 |
567275Sstephh  *	     +------------+   |    |
577275Sstephh  *			  |   |    |
587275Sstephh  *			+-v---v----v-+
597275Sstephh  *			| CLOSE_WAIT |
601193Smws  *			+------------+
617275Sstephh  *			  |   |    |
627275Sstephh  *	      +-----------+   |    +------------+
637275Sstephh  *	      |		    4 |			|
647275Sstephh  *	      v		+-----v------+		|
657275Sstephh  *	   discard      |   CLOSED   |	      6	|
667275Sstephh  *			+------------+		|
677275Sstephh  *			      |			|
687275Sstephh  *			      |	   +------------+
697275Sstephh  *			    7 |	   |
707275Sstephh  *			+-----v----v-+
717275Sstephh  *			|  REPAIRED  |
727275Sstephh  *			+------------+
737275Sstephh  *			      |
747275Sstephh  *			    8 |
757275Sstephh  *			+-----v------+
767275Sstephh  *			|  RESOLVED  |
777275Sstephh  *			+------------+
787275Sstephh  *			      |
797275Sstephh  *			      v
807275Sstephh  *			   discard
811193Smws  *
821193Smws  * The state machine changes are triggered by calls to fmd_case_transition()
831193Smws  * from various locations inside of fmd, as described below:
841193Smws  *
851193Smws  * [1] Called by: fmd_case_solve()
861193Smws  *       Actions: FMD_CF_SOLVED flag is set in ci_flags
871193Smws  *                conviction policy is applied to suspect list
881193Smws  *                suspects convicted are marked faulty (F) in R$
891193Smws  *                list.suspect event logged and dispatched
901193Smws  *
917275Sstephh  * [2] Called by: fmd_case_close(), fmd_case_uuclose()
927275Sstephh  *       Actions: diagnosis engine fmdo_close() entry point scheduled
937275Sstephh  *                case discarded upon exit from CLOSE_WAIT
947275Sstephh  *
957275Sstephh  * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
961193Smws  *       Actions: FMD_CF_ISOLATED flag is set in ci_flags
971193Smws  *                suspects convicted (F) are marked unusable (U) in R$
981193Smws  *                diagnosis engine fmdo_close() entry point scheduled
997275Sstephh  *                case transitions to CLOSED [4] upon exit from CLOSE_WAIT
1001193Smws  *
1017275Sstephh  * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
1021193Smws  *       Actions: list.isolated event dispatched
1031193Smws  *                case deleted from module's list of open cases
1041193Smws  *
1051193Smws  * [5] Called by: fmd_case_repair(), fmd_case_update()
1061193Smws  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
1071193Smws  *                diagnosis engine fmdo_close() entry point scheduled
1081193Smws  *                case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
1091193Smws  *
1107275Sstephh  * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
1117275Sstephh  *       Actions: suspects convicted are marked non faulty (!F) in R$
1127275Sstephh  *                list.repaired or list.updated event dispatched
1131193Smws  *
1141193Smws  * [7] Called by: fmd_case_repair(), fmd_case_update()
1151193Smws  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
1161193Smws  *                suspects convicted are marked non faulty (!F) in R$
1177275Sstephh  *                list.repaired or list.updated event dispatched
1187275Sstephh  *
1197275Sstephh  * [8] Called by: fmd_case_uuresolve()
1207275Sstephh  *       Actions: list.resolved event dispatched
1217275Sstephh  *		  case is discarded
1221193Smws  */
1231193Smws 
1240Sstevel@tonic-gate #include <sys/fm/protocol.h>
1250Sstevel@tonic-gate #include <uuid/uuid.h>
1260Sstevel@tonic-gate #include <alloca.h>
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate #include <fmd_alloc.h>
1290Sstevel@tonic-gate #include <fmd_module.h>
1300Sstevel@tonic-gate #include <fmd_error.h>
1310Sstevel@tonic-gate #include <fmd_conf.h>
1320Sstevel@tonic-gate #include <fmd_case.h>
1330Sstevel@tonic-gate #include <fmd_string.h>
1340Sstevel@tonic-gate #include <fmd_subr.h>
1350Sstevel@tonic-gate #include <fmd_protocol.h>
1360Sstevel@tonic-gate #include <fmd_event.h>
1370Sstevel@tonic-gate #include <fmd_eventq.h>
1380Sstevel@tonic-gate #include <fmd_dispq.h>
1390Sstevel@tonic-gate #include <fmd_buf.h>
1400Sstevel@tonic-gate #include <fmd_log.h>
1410Sstevel@tonic-gate #include <fmd_asru.h>
1421429Smws #include <fmd_fmri.h>
1431193Smws #include <fmd_xprt.h>
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate #include <fmd.h>
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate static const char *const _fmd_case_snames[] = {
1480Sstevel@tonic-gate 	"UNSOLVED",	/* FMD_CASE_UNSOLVED */
1490Sstevel@tonic-gate 	"SOLVED",	/* FMD_CASE_SOLVED */
1501193Smws 	"CLOSE_WAIT",	/* FMD_CASE_CLOSE_WAIT */
1510Sstevel@tonic-gate 	"CLOSED",	/* FMD_CASE_CLOSED */
1527275Sstephh 	"REPAIRED",	/* FMD_CASE_REPAIRED */
1537275Sstephh 	"RESOLVED"	/* FMD_CASE_RESOLVED */
1540Sstevel@tonic-gate };
1550Sstevel@tonic-gate 
1566081Scy152378 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
1576081Scy152378 
1580Sstevel@tonic-gate fmd_case_hash_t *
fmd_case_hash_create(void)1590Sstevel@tonic-gate fmd_case_hash_create(void)
1600Sstevel@tonic-gate {
1610Sstevel@tonic-gate 	fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	(void) pthread_rwlock_init(&chp->ch_lock, NULL);
1640Sstevel@tonic-gate 	chp->ch_hashlen = fmd.d_str_buckets;
1650Sstevel@tonic-gate 	chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
1666228Sstephh 	chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen,
1676228Sstephh 	    FMD_SLEEP);
1681193Smws 	chp->ch_count = 0;
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	return (chp);
1710Sstevel@tonic-gate }
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate /*
1740Sstevel@tonic-gate  * Destroy the case hash.  Unlike most of our hash tables, no active references
1751193Smws  * are kept by the case hash itself; all references come from other subsystems.
1760Sstevel@tonic-gate  * The hash must be destroyed after all modules are unloaded; if anything was
1770Sstevel@tonic-gate  * present in the hash it would be by definition a reference count leak.
1780Sstevel@tonic-gate  */
1790Sstevel@tonic-gate void
fmd_case_hash_destroy(fmd_case_hash_t * chp)1800Sstevel@tonic-gate fmd_case_hash_destroy(fmd_case_hash_t *chp)
1810Sstevel@tonic-gate {
1820Sstevel@tonic-gate 	fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
1836228Sstephh 	fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen);
1840Sstevel@tonic-gate 	fmd_free(chp, sizeof (fmd_case_hash_t));
1850Sstevel@tonic-gate }
1860Sstevel@tonic-gate 
1871193Smws /*
1881193Smws  * Take a snapshot of the case hash by placing an additional hold on each
1891193Smws  * member in an auxiliary array, and then call 'func' for each case.
1901193Smws  */
1911193Smws void
fmd_case_hash_apply(fmd_case_hash_t * chp,void (* func)(fmd_case_t *,void *),void * arg)1921193Smws fmd_case_hash_apply(fmd_case_hash_t *chp,
1931193Smws     void (*func)(fmd_case_t *, void *), void *arg)
1941193Smws {
1951193Smws 	fmd_case_impl_t *cp, **cps, **cpp;
1961193Smws 	uint_t cpc, i;
1971193Smws 
1981193Smws 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
1991193Smws 
2001193Smws 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
2011193Smws 	cpc = chp->ch_count;
2021193Smws 
2031193Smws 	for (i = 0; i < chp->ch_hashlen; i++) {
2047440SSean.Ye@Sun.COM 		for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next)
2057440SSean.Ye@Sun.COM 			*cpp++ = fmd_case_tryhold(cp);
2061193Smws 	}
2071193Smws 
2081193Smws 	ASSERT(cpp == cps + cpc);
2091193Smws 	(void) pthread_rwlock_unlock(&chp->ch_lock);
2101193Smws 
2111193Smws 	for (i = 0; i < cpc; i++) {
2127440SSean.Ye@Sun.COM 		if (cps[i] != NULL) {
2137440SSean.Ye@Sun.COM 			func((fmd_case_t *)cps[i], arg);
2147440SSean.Ye@Sun.COM 			fmd_case_rele((fmd_case_t *)cps[i]);
2157440SSean.Ye@Sun.COM 		}
2161193Smws 	}
2171193Smws 
2181193Smws 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
2191193Smws }
2201193Smws 
2216228Sstephh static void
fmd_case_code_hash_insert(fmd_case_hash_t * chp,fmd_case_impl_t * cip)2226228Sstephh fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
2236228Sstephh {
2246228Sstephh 	uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
2256228Sstephh 
2266228Sstephh 	cip->ci_code_next = chp->ch_code_hash[h];
2276228Sstephh 	chp->ch_code_hash[h] = cip;
2286228Sstephh }
2296228Sstephh 
2306228Sstephh static void
fmd_case_code_hash_delete(fmd_case_hash_t * chp,fmd_case_impl_t * cip)2316228Sstephh fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
2326228Sstephh {
2336228Sstephh 	fmd_case_impl_t **pp, *cp;
2346228Sstephh 
2356228Sstephh 	if (cip->ci_code) {
2366228Sstephh 		uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
2376228Sstephh 
2386228Sstephh 		pp = &chp->ch_code_hash[h];
2396228Sstephh 		for (cp = *pp; cp != NULL; cp = cp->ci_code_next) {
2406228Sstephh 			if (cp != cip)
2416228Sstephh 				pp = &cp->ci_code_next;
2426228Sstephh 			else
2436228Sstephh 				break;
2446228Sstephh 		}
2456228Sstephh 		if (cp != NULL) {
2466228Sstephh 			*pp = cp->ci_code_next;
2476228Sstephh 			cp->ci_code_next = NULL;
2486228Sstephh 		}
2496228Sstephh 	}
2506228Sstephh }
2516228Sstephh 
2521193Smws /*
2531193Smws  * Look up the diagcode for this case and cache it in ci_code.  If no suspects
2541193Smws  * were defined for this case or if the lookup fails, the event dictionary or
2551193Smws  * module code is broken, and we set the event code to a precomputed default.
2561193Smws  */
2571193Smws static const char *
fmd_case_mkcode(fmd_case_t * cp)2581193Smws fmd_case_mkcode(fmd_case_t *cp)
2590Sstevel@tonic-gate {
2600Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2610Sstevel@tonic-gate 	fmd_case_susp_t *cis;
2626228Sstephh 	fmd_case_hash_t *chp = fmd.d_cases;
2630Sstevel@tonic-gate 
2641193Smws 	char **keys, **keyp;
2650Sstevel@tonic-gate 	const char *s;
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cip->ci_lock));
2680Sstevel@tonic-gate 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
2690Sstevel@tonic-gate 
2706228Sstephh 	/*
2716228Sstephh 	 * delete any existing entry from code hash if it is on it
2726228Sstephh 	 */
2736228Sstephh 	fmd_case_code_hash_delete(chp, cip);
2746228Sstephh 
2751193Smws 	fmd_free(cip->ci_code, cip->ci_codelen);
2761193Smws 	cip->ci_codelen = cip->ci_mod->mod_codelen;
2771193Smws 	cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
2780Sstevel@tonic-gate 	keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
2810Sstevel@tonic-gate 		if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
2820Sstevel@tonic-gate 			keyp++;
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	*keyp = NULL; /* mark end of keys[] array for libdiagcode */
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 	if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
2881193Smws 	    cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
2890Sstevel@tonic-gate 		(void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
2901193Smws 		fmd_free(cip->ci_code, cip->ci_codelen);
2911193Smws 		cip->ci_codelen = strlen(s) + 1;
2921193Smws 		cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
2931193Smws 		(void) strcpy(cip->ci_code, s);
2940Sstevel@tonic-gate 	}
2950Sstevel@tonic-gate 
2966228Sstephh 	/*
2976228Sstephh 	 * add into hash of solved cases
2986228Sstephh 	 */
2996228Sstephh 	fmd_case_code_hash_insert(chp, cip);
3006228Sstephh 
3011193Smws 	return (cip->ci_code);
3021193Smws }
3031193Smws 
3046228Sstephh typedef struct {
3056228Sstephh 	int	*fcl_countp;
3067913SStephen.Hanson@Sun.COM 	int	fcl_maxcount;
3076228Sstephh 	uint8_t *fcl_ba;
3086228Sstephh 	nvlist_t **fcl_nva;
3096228Sstephh 	int	*fcl_msgp;
3106228Sstephh } fmd_case_lst_t;
3116228Sstephh 
3126228Sstephh static void
fmd_case_set_lst(fmd_asru_link_t * alp,void * arg)3136228Sstephh fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
3146228Sstephh {
3156228Sstephh 	fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg;
3166228Sstephh 	boolean_t b;
3176228Sstephh 	int state;
3186228Sstephh 
3197913SStephen.Hanson@Sun.COM 	if (*entryp->fcl_countp >= entryp->fcl_maxcount)
3207913SStephen.Hanson@Sun.COM 		return;
3216228Sstephh 	if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE,
3226228Sstephh 	    &b) == 0 && b == B_FALSE)
3236228Sstephh 		*entryp->fcl_msgp = B_FALSE;
3246228Sstephh 	entryp->fcl_ba[*entryp->fcl_countp] = 0;
3256228Sstephh 	state = fmd_asru_al_getstate(alp);
3267275Sstephh 	if (state & FMD_ASRU_DEGRADED)
3277275Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
3286228Sstephh 	if (state & FMD_ASRU_UNUSABLE)
3296228Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
3306228Sstephh 	if (state & FMD_ASRU_FAULTY)
3316228Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
3326228Sstephh 	if (!(state & FMD_ASRU_PRESENT))
3336228Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
3347275Sstephh 	if (alp->al_reason == FMD_ASRU_REPAIRED)
3357275Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
3367275Sstephh 	else if (alp->al_reason == FMD_ASRU_REPLACED)
3377275Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
3387275Sstephh 	else if (alp->al_reason == FMD_ASRU_ACQUITTED)
3397275Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
3406228Sstephh 	entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
3416228Sstephh 	(*entryp->fcl_countp)++;
3426228Sstephh }
3436228Sstephh 
3446228Sstephh static void
fmd_case_faulty(fmd_asru_link_t * alp,void * arg)3456228Sstephh fmd_case_faulty(fmd_asru_link_t *alp, void *arg)
3466228Sstephh {
3476228Sstephh 	int *faultyp = (int *)arg;
3486228Sstephh 
3496228Sstephh 	*faultyp |= (alp->al_flags & FMD_ASRU_FAULTY);
3506228Sstephh }
3516228Sstephh 
3526228Sstephh static void
fmd_case_usable(fmd_asru_link_t * alp,void * arg)3536228Sstephh fmd_case_usable(fmd_asru_link_t *alp, void *arg)
3546228Sstephh {
3556228Sstephh 	int *usablep = (int *)arg;
3566228Sstephh 
3576228Sstephh 	*usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
3586228Sstephh }
3596228Sstephh 
3607275Sstephh static void
fmd_case_not_faulty(fmd_asru_link_t * alp,void * arg)3617275Sstephh fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
3627275Sstephh {
3637275Sstephh 	int *not_faultyp = (int *)arg;
3647275Sstephh 
3657275Sstephh 	*not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
3667275Sstephh }
3677275Sstephh 
3687275Sstephh /*
3697275Sstephh  * Have we got any suspects with an asru that are still unusable and present?
3707275Sstephh  */
3717275Sstephh static void
fmd_case_unusable_and_present(fmd_asru_link_t * alp,void * arg)3727275Sstephh fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
3737275Sstephh {
3747275Sstephh 	int *rvalp = (int *)arg;
3759120SStephen.Hanson@Sun.COM 	int state;
3767275Sstephh 	nvlist_t *asru;
3777275Sstephh 
3789120SStephen.Hanson@Sun.COM 	/*
3799120SStephen.Hanson@Sun.COM 	 * if this a proxy case and this suspect doesn't have an local asru
3809120SStephen.Hanson@Sun.COM 	 * then state is unknown so we must assume it may still be unusable.
3819120SStephen.Hanson@Sun.COM 	 */
3829120SStephen.Hanson@Sun.COM 	if ((alp->al_flags & FMD_ASRU_PROXY) &&
3839120SStephen.Hanson@Sun.COM 	    !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
3849120SStephen.Hanson@Sun.COM 		*rvalp |= B_TRUE;
3859120SStephen.Hanson@Sun.COM 		return;
3869120SStephen.Hanson@Sun.COM 	}
3879120SStephen.Hanson@Sun.COM 
3889120SStephen.Hanson@Sun.COM 	state = fmd_asru_al_getstate(alp);
3897275Sstephh 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
3907275Sstephh 		return;
3917275Sstephh 	*rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
3927275Sstephh }
3937275Sstephh 
3941193Smws nvlist_t *
fmd_case_mkevent(fmd_case_t * cp,const char * class)3951193Smws fmd_case_mkevent(fmd_case_t *cp, const char *class)
3961193Smws {
3971193Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
3986228Sstephh 	nvlist_t **nva, *nvl;
3996228Sstephh 	uint8_t *ba;
4001193Smws 	int msg = B_TRUE;
4016276Scy152378 	const char *code;
4026228Sstephh 	fmd_case_lst_t fcl;
4036228Sstephh 	int count = 0;
4041193Smws 
4051193Smws 	(void) pthread_mutex_lock(&cip->ci_lock);
4061193Smws 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
4071193Smws 
4086228Sstephh 	nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
4096228Sstephh 	ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
4101193Smws 
4111193Smws 	/*
4121193Smws 	 * For each suspect associated with the case, store its fault event
4131193Smws 	 * nvlist in 'nva'.  We also look to see if any of the suspect faults
4141193Smws 	 * have asked not to be messaged.  If any of them have made such a
4151193Smws 	 * request, propagate that attribute to the composite list.* event.
4161193Smws 	 * Finally, store each suspect's faulty status into the bitmap 'ba'.
4171193Smws 	 */
4186228Sstephh 	fcl.fcl_countp = &count;
4197913SStephen.Hanson@Sun.COM 	fcl.fcl_maxcount = cip->ci_nsuspects;
4206228Sstephh 	fcl.fcl_msgp = &msg;
4216228Sstephh 	fcl.fcl_ba = ba;
4226228Sstephh 	fcl.fcl_nva = nva;
4236228Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
4241193Smws 
4251193Smws 	if (cip->ci_code == NULL)
4261193Smws 		(void) fmd_case_mkcode(cp);
4276276Scy152378 	/*
4287275Sstephh 	 * For repair and updated event, we lookup diagcode from dict using key
4297275Sstephh 	 * "list.repaired" or "list.updated" or "list.resolved".
4306276Scy152378 	 */
4316276Scy152378 	if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
4326276Scy152378 		(void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
4337275Sstephh 	else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
4347275Sstephh 		(void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
4357275Sstephh 	else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
4367275Sstephh 		(void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
4376276Scy152378 	else
4386276Scy152378 		code = cip->ci_code;
4391193Smws 
4405255Sstephh 	if (msg == B_FALSE)
4415255Sstephh 		cip->ci_flags |= FMD_CF_INVISIBLE;
4425255Sstephh 
4439120SStephen.Hanson@Sun.COM 	/*
4449120SStephen.Hanson@Sun.COM 	 * Use the ci_diag_de if one has been saved (eg for an injected fault).
4459120SStephen.Hanson@Sun.COM 	 * Otherwise use the authority for the current module.
4469120SStephen.Hanson@Sun.COM 	 */
4479120SStephen.Hanson@Sun.COM 	nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ?
4489120SStephen.Hanson@Sun.COM 	    cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count,
44910928SStephen.Hanson@Sun.COM 	    nva, ba, msg, &cip->ci_tv, cip->ci_injected);
4501193Smws 
4511193Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
4521193Smws 	return (nvl);
4530Sstevel@tonic-gate }
4540Sstevel@tonic-gate 
45510656SStephen.Hanson@Sun.COM static int fmd_case_match_on_faulty_overlap = 1;
45610656SStephen.Hanson@Sun.COM static int fmd_case_match_on_acquit_overlap = 1;
45710656SStephen.Hanson@Sun.COM static int fmd_case_auto_acquit_isolated = 1;
45810656SStephen.Hanson@Sun.COM static int fmd_case_auto_acquit_non_acquitted = 1;
45910656SStephen.Hanson@Sun.COM static int fmd_case_too_recent = 10; /* time in seconds */
46010656SStephen.Hanson@Sun.COM 
4616228Sstephh static boolean_t
fmd_case_compare_elem(nvlist_t * nvl,nvlist_t * xnvl,const char * elem)4626228Sstephh fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
4636228Sstephh {
4646228Sstephh 	nvlist_t *new_rsrc;
4656228Sstephh 	nvlist_t *rsrc;
4666228Sstephh 	char *new_name = NULL;
4676228Sstephh 	char *name = NULL;
4686228Sstephh 	ssize_t new_namelen;
4696228Sstephh 	ssize_t namelen;
4706228Sstephh 	int fmri_present = 1;
4716228Sstephh 	int new_fmri_present = 1;
4726228Sstephh 	int match = B_FALSE;
4736869Seschrock 	fmd_topo_t *ftp = fmd_topo_hold();
4746228Sstephh 
4756228Sstephh 	if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0)
4766228Sstephh 		fmri_present = 0;
4776228Sstephh 	else {
4786228Sstephh 		if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1)
4796228Sstephh 			goto done;
4806228Sstephh 		name = fmd_alloc(namelen + 1, FMD_SLEEP);
4816228Sstephh 		if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1)
4826228Sstephh 			goto done;
4836228Sstephh 	}
4846228Sstephh 	if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0)
4856228Sstephh 		new_fmri_present = 0;
4866228Sstephh 	else {
4876228Sstephh 		if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1)
4886228Sstephh 			goto done;
4896228Sstephh 		new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP);
4906228Sstephh 		if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1)
4916228Sstephh 			goto done;
4926228Sstephh 	}
4936228Sstephh 	match = (fmri_present == new_fmri_present &&
4946869Seschrock 	    (fmri_present == 0 ||
4956869Seschrock 	    topo_fmri_strcmp(ftp->ft_hdl, name, new_name)));
4966228Sstephh done:
4976228Sstephh 	if (name != NULL)
4986228Sstephh 		fmd_free(name, namelen + 1);
4996228Sstephh 	if (new_name != NULL)
5006228Sstephh 		fmd_free(new_name, new_namelen + 1);
5016869Seschrock 	fmd_topo_rele(ftp);
5026228Sstephh 	return (match);
5036228Sstephh }
5046228Sstephh 
5056228Sstephh static int
fmd_case_match_suspect(nvlist_t * nvl1,nvlist_t * nvl2)50610656SStephen.Hanson@Sun.COM fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2)
5076228Sstephh {
5086228Sstephh 	char *class, *new_class;
5096228Sstephh 
51010656SStephen.Hanson@Sun.COM 	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU))
5116228Sstephh 		return (0);
51210656SStephen.Hanson@Sun.COM 	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE))
51310656SStephen.Hanson@Sun.COM 		return (0);
51410656SStephen.Hanson@Sun.COM 	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU))
5156228Sstephh 		return (0);
51610656SStephen.Hanson@Sun.COM 	(void) nvlist_lookup_string(nvl2, FM_CLASS, &class);
51710656SStephen.Hanson@Sun.COM 	(void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class);
5186228Sstephh 	return (strcmp(class, new_class) == 0);
5196228Sstephh }
5206228Sstephh 
52110656SStephen.Hanson@Sun.COM typedef struct {
52210656SStephen.Hanson@Sun.COM 	int	*fcms_countp;
52310656SStephen.Hanson@Sun.COM 	int	fcms_maxcount;
52410656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *fcms_cip;
52510656SStephen.Hanson@Sun.COM 	uint8_t *fcms_new_susp_state;
52610656SStephen.Hanson@Sun.COM 	uint8_t *fcms_old_susp_state;
52710656SStephen.Hanson@Sun.COM 	uint8_t *fcms_old_match_state;
52810656SStephen.Hanson@Sun.COM } fcms_t;
52910656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_FAULTY				0x1
53010656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_ISOLATED				0x2
53110656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_REMOVED				0x4
53210656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_ACQUITED				0x8
53310656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_REPAIRED				0x10
53410656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_REPLACED				0x20
53510656SStephen.Hanson@Sun.COM #define	SUSPECT_STATE_NO_MATCH				0x1
53610656SStephen.Hanson@Sun.COM 
53710656SStephen.Hanson@Sun.COM /*
53810656SStephen.Hanson@Sun.COM  * This is called for each suspect in the old case. Compare it against each
53910656SStephen.Hanson@Sun.COM  * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
54010656SStephen.Hanson@Sun.COM  * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
54110656SStephen.Hanson@Sun.COM  * found in the old case.
54210656SStephen.Hanson@Sun.COM  */
54310656SStephen.Hanson@Sun.COM static void
fmd_case_match_suspects(fmd_asru_link_t * alp,void * arg)54410656SStephen.Hanson@Sun.COM fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg)
54510656SStephen.Hanson@Sun.COM {
54610656SStephen.Hanson@Sun.COM 	fcms_t *fcmsp = (fcms_t *)arg;
54710656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = fcmsp->fcms_cip;
54810656SStephen.Hanson@Sun.COM 	fmd_case_susp_t *cis;
54910656SStephen.Hanson@Sun.COM 	int i = 0;
55010656SStephen.Hanson@Sun.COM 	int state = fmd_asru_al_getstate(alp);
55110656SStephen.Hanson@Sun.COM 
55210656SStephen.Hanson@Sun.COM 	if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount)
55310656SStephen.Hanson@Sun.COM 		return;
55410656SStephen.Hanson@Sun.COM 
55510656SStephen.Hanson@Sun.COM 	if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) &&
55610656SStephen.Hanson@Sun.COM 	    alp->al_reason == FMD_ASRU_REMOVED))
55710656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
55810656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_REMOVED;
55910656SStephen.Hanson@Sun.COM 	else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY))
56010656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
56110656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_ISOLATED;
56210656SStephen.Hanson@Sun.COM 	else if (state & FMD_ASRU_FAULTY)
56310656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
56410656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_FAULTY;
56510656SStephen.Hanson@Sun.COM 	else if (alp->al_reason == FMD_ASRU_REPLACED)
56610656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
56710656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_REPLACED;
56810656SStephen.Hanson@Sun.COM 	else if (alp->al_reason == FMD_ASRU_ACQUITTED)
56910656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
57010656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_ACQUITED;
57110656SStephen.Hanson@Sun.COM 	else
57210656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
57310656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_REPAIRED;
57410656SStephen.Hanson@Sun.COM 
57510656SStephen.Hanson@Sun.COM 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++)
57610656SStephen.Hanson@Sun.COM 		if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1)
57710656SStephen.Hanson@Sun.COM 			break;
57810656SStephen.Hanson@Sun.COM 	if (cis != NULL)
57910656SStephen.Hanson@Sun.COM 		fcmsp->fcms_new_susp_state[i] =
58010656SStephen.Hanson@Sun.COM 		    fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp];
58110656SStephen.Hanson@Sun.COM 	else
58210656SStephen.Hanson@Sun.COM 		fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |=
58310656SStephen.Hanson@Sun.COM 		    SUSPECT_STATE_NO_MATCH;
58410656SStephen.Hanson@Sun.COM 	(*fcmsp->fcms_countp)++;
58510656SStephen.Hanson@Sun.COM }
58610656SStephen.Hanson@Sun.COM 
58710656SStephen.Hanson@Sun.COM typedef struct {
58810656SStephen.Hanson@Sun.COM 	int	*fca_do_update;
58910656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *fca_cip;
59010656SStephen.Hanson@Sun.COM } fca_t;
59110656SStephen.Hanson@Sun.COM 
59210656SStephen.Hanson@Sun.COM /*
59310656SStephen.Hanson@Sun.COM  * Re-fault all acquitted suspects that are still present in the new list.
59410656SStephen.Hanson@Sun.COM  */
59510656SStephen.Hanson@Sun.COM static void
fmd_case_fault_acquitted_matching(fmd_asru_link_t * alp,void * arg)59610656SStephen.Hanson@Sun.COM fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg)
59710656SStephen.Hanson@Sun.COM {
59810656SStephen.Hanson@Sun.COM 	fca_t *fcap = (fca_t *)arg;
59910656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = fcap->fca_cip;
60010656SStephen.Hanson@Sun.COM 	fmd_case_susp_t *cis;
60110656SStephen.Hanson@Sun.COM 	int state = fmd_asru_al_getstate(alp);
60210656SStephen.Hanson@Sun.COM 
60310656SStephen.Hanson@Sun.COM 	if (!(state & FMD_ASRU_FAULTY) &&
60410656SStephen.Hanson@Sun.COM 	    alp->al_reason == FMD_ASRU_ACQUITTED) {
60510656SStephen.Hanson@Sun.COM 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
60610656SStephen.Hanson@Sun.COM 			if (fmd_case_match_suspect(cis->cis_nvl,
60710656SStephen.Hanson@Sun.COM 			    alp->al_event) == 1)
60810656SStephen.Hanson@Sun.COM 				break;
60910656SStephen.Hanson@Sun.COM 		if (cis != NULL) {
61010656SStephen.Hanson@Sun.COM 			(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
61110656SStephen.Hanson@Sun.COM 			*fcap->fca_do_update = 1;
61210656SStephen.Hanson@Sun.COM 		}
61310656SStephen.Hanson@Sun.COM 	}
61410656SStephen.Hanson@Sun.COM }
61510656SStephen.Hanson@Sun.COM 
61610656SStephen.Hanson@Sun.COM /*
61710656SStephen.Hanson@Sun.COM  * Re-fault all suspects that are still present in the new list.
61810656SStephen.Hanson@Sun.COM  */
61910656SStephen.Hanson@Sun.COM static void
fmd_case_fault_all_matching(fmd_asru_link_t * alp,void * arg)62010656SStephen.Hanson@Sun.COM fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg)
62110656SStephen.Hanson@Sun.COM {
62210656SStephen.Hanson@Sun.COM 	fca_t *fcap = (fca_t *)arg;
62310656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = fcap->fca_cip;
62410656SStephen.Hanson@Sun.COM 	fmd_case_susp_t *cis;
62510656SStephen.Hanson@Sun.COM 	int state = fmd_asru_al_getstate(alp);
62610656SStephen.Hanson@Sun.COM 
62710656SStephen.Hanson@Sun.COM 	if (!(state & FMD_ASRU_FAULTY)) {
62810656SStephen.Hanson@Sun.COM 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
62910656SStephen.Hanson@Sun.COM 			if (fmd_case_match_suspect(cis->cis_nvl,
63010656SStephen.Hanson@Sun.COM 			    alp->al_event) == 1)
63110656SStephen.Hanson@Sun.COM 				break;
63210656SStephen.Hanson@Sun.COM 		if (cis != NULL) {
63310656SStephen.Hanson@Sun.COM 			(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
63410656SStephen.Hanson@Sun.COM 			*fcap->fca_do_update = 1;
63510656SStephen.Hanson@Sun.COM 		}
63610656SStephen.Hanson@Sun.COM 	}
63710656SStephen.Hanson@Sun.COM }
63810656SStephen.Hanson@Sun.COM 
63910656SStephen.Hanson@Sun.COM /*
64010656SStephen.Hanson@Sun.COM  * Acquit all suspects that are no longer present in the new list.
64110656SStephen.Hanson@Sun.COM  */
64210656SStephen.Hanson@Sun.COM static void
fmd_case_acquit_no_match(fmd_asru_link_t * alp,void * arg)64310656SStephen.Hanson@Sun.COM fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg)
64410656SStephen.Hanson@Sun.COM {
64510656SStephen.Hanson@Sun.COM 	fca_t *fcap = (fca_t *)arg;
64610656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = fcap->fca_cip;
64710656SStephen.Hanson@Sun.COM 	fmd_case_susp_t *cis;
64810656SStephen.Hanson@Sun.COM 	int state = fmd_asru_al_getstate(alp);
64910656SStephen.Hanson@Sun.COM 
65010656SStephen.Hanson@Sun.COM 	if (state & FMD_ASRU_FAULTY) {
65110656SStephen.Hanson@Sun.COM 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
65210656SStephen.Hanson@Sun.COM 			if (fmd_case_match_suspect(cis->cis_nvl,
65310656SStephen.Hanson@Sun.COM 			    alp->al_event) == 1)
65410656SStephen.Hanson@Sun.COM 				break;
65510656SStephen.Hanson@Sun.COM 		if (cis == NULL) {
65610656SStephen.Hanson@Sun.COM 			(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
65710656SStephen.Hanson@Sun.COM 			    FMD_ASRU_ACQUITTED);
65810656SStephen.Hanson@Sun.COM 			*fcap->fca_do_update = 1;
65910656SStephen.Hanson@Sun.COM 		}
66010656SStephen.Hanson@Sun.COM 	}
66110656SStephen.Hanson@Sun.COM }
66210656SStephen.Hanson@Sun.COM 
66310656SStephen.Hanson@Sun.COM /*
66410656SStephen.Hanson@Sun.COM  * Acquit all isolated suspects.
66510656SStephen.Hanson@Sun.COM  */
66610656SStephen.Hanson@Sun.COM static void
fmd_case_acquit_isolated(fmd_asru_link_t * alp,void * arg)66710656SStephen.Hanson@Sun.COM fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg)
66810656SStephen.Hanson@Sun.COM {
66910656SStephen.Hanson@Sun.COM 	int *do_update = (int *)arg;
67010656SStephen.Hanson@Sun.COM 	int state = fmd_asru_al_getstate(alp);
67110656SStephen.Hanson@Sun.COM 
67210656SStephen.Hanson@Sun.COM 	if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) &&
67310656SStephen.Hanson@Sun.COM 	    (state & FMD_ASRU_FAULTY)) {
67410656SStephen.Hanson@Sun.COM 		(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
67510656SStephen.Hanson@Sun.COM 		    FMD_ASRU_ACQUITTED);
67610656SStephen.Hanson@Sun.COM 		*do_update = 1;
67710656SStephen.Hanson@Sun.COM 	}
67810656SStephen.Hanson@Sun.COM }
67910656SStephen.Hanson@Sun.COM 
6806228Sstephh /*
68110656SStephen.Hanson@Sun.COM  * Acquit suspect which matches specified nvlist
6826228Sstephh  */
68310656SStephen.Hanson@Sun.COM static void
fmd_case_acquit_suspect(fmd_asru_link_t * alp,void * arg)68410656SStephen.Hanson@Sun.COM fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg)
68510656SStephen.Hanson@Sun.COM {
68610656SStephen.Hanson@Sun.COM 	nvlist_t *nvl = (nvlist_t *)arg;
68710656SStephen.Hanson@Sun.COM 	int state = fmd_asru_al_getstate(alp);
68810656SStephen.Hanson@Sun.COM 
68910656SStephen.Hanson@Sun.COM 	if ((state & FMD_ASRU_FAULTY) &&
69010656SStephen.Hanson@Sun.COM 	    fmd_case_match_suspect(nvl, alp->al_event) == 1)
69110656SStephen.Hanson@Sun.COM 		(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
69210656SStephen.Hanson@Sun.COM 		    FMD_ASRU_ACQUITTED);
69310656SStephen.Hanson@Sun.COM }
69410656SStephen.Hanson@Sun.COM 
69510656SStephen.Hanson@Sun.COM typedef struct {
69610656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *fccd_cip;
69710656SStephen.Hanson@Sun.COM 	uint8_t *fccd_new_susp_state;
69810656SStephen.Hanson@Sun.COM 	uint8_t *fccd_new_match_state;
69910656SStephen.Hanson@Sun.COM 	int *fccd_discard_new;
70010656SStephen.Hanson@Sun.COM 	int *fccd_adjust_new;
70110656SStephen.Hanson@Sun.COM } fccd_t;
70210656SStephen.Hanson@Sun.COM 
70310656SStephen.Hanson@Sun.COM /*
70410656SStephen.Hanson@Sun.COM  * see if a matching suspect list already exists in the cache
70510656SStephen.Hanson@Sun.COM  */
70610656SStephen.Hanson@Sun.COM static void
fmd_case_check_for_dups(fmd_case_t * old_cp,void * arg)70710656SStephen.Hanson@Sun.COM fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg)
7086228Sstephh {
70910656SStephen.Hanson@Sun.COM 	fccd_t *fccdp = (fccd_t *)arg;
71010656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *new_cip = fccdp->fccd_cip;
71110656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp;
71210656SStephen.Hanson@Sun.COM 	int i, count = 0, do_update = 0, got_isolated_overlap = 0;
71310656SStephen.Hanson@Sun.COM 	int got_faulty_overlap = 0;
71410656SStephen.Hanson@Sun.COM 	int got_acquit_overlap = 0;
71510656SStephen.Hanson@Sun.COM 	boolean_t too_recent;
71610656SStephen.Hanson@Sun.COM 	uint64_t most_recent = 0;
71710656SStephen.Hanson@Sun.COM 	fcms_t fcms;
71810656SStephen.Hanson@Sun.COM 	fca_t fca;
71910656SStephen.Hanson@Sun.COM 	uint8_t *new_susp_state;
72010656SStephen.Hanson@Sun.COM 	uint8_t *old_susp_state;
72110656SStephen.Hanson@Sun.COM 	uint8_t *old_match_state;
72210656SStephen.Hanson@Sun.COM 
72310656SStephen.Hanson@Sun.COM 	new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t));
72410656SStephen.Hanson@Sun.COM 	for (i = 0; i < new_cip->ci_nsuspects; i++)
72510656SStephen.Hanson@Sun.COM 		new_susp_state[i] = 0;
72610656SStephen.Hanson@Sun.COM 	old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
72710656SStephen.Hanson@Sun.COM 	for (i = 0; i < old_cip->ci_nsuspects; i++)
72810656SStephen.Hanson@Sun.COM 		old_susp_state[i] = 0;
72910656SStephen.Hanson@Sun.COM 	old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
73010656SStephen.Hanson@Sun.COM 	for (i = 0; i < old_cip->ci_nsuspects; i++)
73110656SStephen.Hanson@Sun.COM 		old_match_state[i] = 0;
7326228Sstephh 
73310656SStephen.Hanson@Sun.COM 	/*
73410656SStephen.Hanson@Sun.COM 	 * Compare with each suspect in the existing case.
73510656SStephen.Hanson@Sun.COM 	 */
73610656SStephen.Hanson@Sun.COM 	fcms.fcms_countp = &count;
73710656SStephen.Hanson@Sun.COM 	fcms.fcms_maxcount = old_cip->ci_nsuspects;
73810656SStephen.Hanson@Sun.COM 	fcms.fcms_cip = new_cip;
73910656SStephen.Hanson@Sun.COM 	fcms.fcms_new_susp_state = new_susp_state;
74010656SStephen.Hanson@Sun.COM 	fcms.fcms_old_susp_state = old_susp_state;
74110656SStephen.Hanson@Sun.COM 	fcms.fcms_old_match_state = old_match_state;
74210656SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip,
74310656SStephen.Hanson@Sun.COM 	    fmd_case_match_suspects, &fcms);
74410656SStephen.Hanson@Sun.COM 
74510656SStephen.Hanson@Sun.COM 	/*
74610656SStephen.Hanson@Sun.COM 	 * If we have some faulty, non-isolated suspects that overlap, then most
74710656SStephen.Hanson@Sun.COM 	 * likely it is the suspects that overlap in the suspect lists that are
74810656SStephen.Hanson@Sun.COM 	 * to blame. So we can consider this to be a match.
74910656SStephen.Hanson@Sun.COM 	 */
75010656SStephen.Hanson@Sun.COM 	for (i = 0; i < new_cip->ci_nsuspects; i++)
75110656SStephen.Hanson@Sun.COM 		if (new_susp_state[i] == SUSPECT_STATE_FAULTY)
75210656SStephen.Hanson@Sun.COM 			got_faulty_overlap = 1;
75310656SStephen.Hanson@Sun.COM 	if (got_faulty_overlap && fmd_case_match_on_faulty_overlap)
75410656SStephen.Hanson@Sun.COM 		goto got_match;
75510656SStephen.Hanson@Sun.COM 
75610656SStephen.Hanson@Sun.COM 	/*
75710656SStephen.Hanson@Sun.COM 	 * If we have no faulty, non-isolated suspects in the old case, but we
75810656SStephen.Hanson@Sun.COM 	 * do have some acquitted suspects that overlap, then most likely it is
75910656SStephen.Hanson@Sun.COM 	 * the acquitted suspects that overlap in the suspect lists that are
76010656SStephen.Hanson@Sun.COM 	 * to blame. So we can consider this to be a match.
76110656SStephen.Hanson@Sun.COM 	 */
76210656SStephen.Hanson@Sun.COM 	for (i = 0; i < new_cip->ci_nsuspects; i++)
76310656SStephen.Hanson@Sun.COM 		if (new_susp_state[i] == SUSPECT_STATE_ACQUITED)
76410656SStephen.Hanson@Sun.COM 			got_acquit_overlap = 1;
76510656SStephen.Hanson@Sun.COM 	for (i = 0; i < old_cip->ci_nsuspects; i++)
76610656SStephen.Hanson@Sun.COM 		if (old_susp_state[i] == SUSPECT_STATE_FAULTY)
76710656SStephen.Hanson@Sun.COM 			got_acquit_overlap = 0;
76810656SStephen.Hanson@Sun.COM 	if (got_acquit_overlap && fmd_case_match_on_acquit_overlap)
76910656SStephen.Hanson@Sun.COM 		goto got_match;
7706228Sstephh 
7716228Sstephh 	/*
77210656SStephen.Hanson@Sun.COM 	 * Check that all suspects in the new list are present in the old list.
77310656SStephen.Hanson@Sun.COM 	 * Return if we find one that isn't.
7746228Sstephh 	 */
77510656SStephen.Hanson@Sun.COM 	for (i = 0; i < new_cip->ci_nsuspects; i++)
77610656SStephen.Hanson@Sun.COM 		if (new_susp_state[i] == 0)
77710656SStephen.Hanson@Sun.COM 			return;
7786228Sstephh 
77910656SStephen.Hanson@Sun.COM 	/*
78010656SStephen.Hanson@Sun.COM 	 * Check that all suspects in the old list are present in the new list
78110656SStephen.Hanson@Sun.COM 	 * *or* they are isolated or removed/replaced (which would explain why
78210656SStephen.Hanson@Sun.COM 	 * they are not present in the new list). Return if we find one that is
78310656SStephen.Hanson@Sun.COM 	 * faulty and unisolated or repaired or acquitted, and that is not
78410656SStephen.Hanson@Sun.COM 	 * present in the new case.
78510656SStephen.Hanson@Sun.COM 	 */
78610656SStephen.Hanson@Sun.COM 	for (i = 0; i < old_cip->ci_nsuspects; i++)
78710656SStephen.Hanson@Sun.COM 		if (old_match_state[i] == SUSPECT_STATE_NO_MATCH &&
78810656SStephen.Hanson@Sun.COM 		    (old_susp_state[i] == SUSPECT_STATE_FAULTY ||
78910656SStephen.Hanson@Sun.COM 		    old_susp_state[i] == SUSPECT_STATE_ACQUITED ||
79010656SStephen.Hanson@Sun.COM 		    old_susp_state[i] == SUSPECT_STATE_REPAIRED))
79110656SStephen.Hanson@Sun.COM 			return;
79210656SStephen.Hanson@Sun.COM 
79310656SStephen.Hanson@Sun.COM got_match:
79410656SStephen.Hanson@Sun.COM 	/*
79510656SStephen.Hanson@Sun.COM 	 * If the old case is already in repaired/resolved state, we can't
79610656SStephen.Hanson@Sun.COM 	 * do anything more with it, so keep the new case, but acquit some
79710656SStephen.Hanson@Sun.COM 	 * of the suspects if appropriate.
79810656SStephen.Hanson@Sun.COM 	 */
79910656SStephen.Hanson@Sun.COM 	if (old_cip->ci_state >= FMD_CASE_REPAIRED) {
80010656SStephen.Hanson@Sun.COM 		if (fmd_case_auto_acquit_non_acquitted) {
80110656SStephen.Hanson@Sun.COM 			*fccdp->fccd_adjust_new = 1;
80210656SStephen.Hanson@Sun.COM 			for (i = 0; i < new_cip->ci_nsuspects; i++) {
80310656SStephen.Hanson@Sun.COM 				fccdp->fccd_new_susp_state[i] |=
80410656SStephen.Hanson@Sun.COM 				    new_susp_state[i];
80510656SStephen.Hanson@Sun.COM 				if (new_susp_state[i] == 0)
80610656SStephen.Hanson@Sun.COM 					fccdp->fccd_new_susp_state[i] =
80710656SStephen.Hanson@Sun.COM 					    SUSPECT_STATE_NO_MATCH;
8086228Sstephh 			}
8096228Sstephh 		}
81010656SStephen.Hanson@Sun.COM 		return;
81110656SStephen.Hanson@Sun.COM 	}
81210656SStephen.Hanson@Sun.COM 
81310656SStephen.Hanson@Sun.COM 	/*
81410656SStephen.Hanson@Sun.COM 	 * Otherwise discard the new case and keep the old, again updating the
81510656SStephen.Hanson@Sun.COM 	 * state of the suspects as appropriate
81610656SStephen.Hanson@Sun.COM 	 */
81710656SStephen.Hanson@Sun.COM 	*fccdp->fccd_discard_new = 1;
81810656SStephen.Hanson@Sun.COM 	fca.fca_cip = new_cip;
81910656SStephen.Hanson@Sun.COM 	fca.fca_do_update = &do_update;
82010656SStephen.Hanson@Sun.COM 
82110656SStephen.Hanson@Sun.COM 	/*
82210656SStephen.Hanson@Sun.COM 	 * See if new case occurred within fmd_case_too_recent seconds of the
82310656SStephen.Hanson@Sun.COM 	 * most recent modification to the old case and if so don't do
82410656SStephen.Hanson@Sun.COM 	 * auto-acquit. This avoids problems if a flood of ereports come in and
82510656SStephen.Hanson@Sun.COM 	 * they don't all get diagnosed before the first case causes some of
82610656SStephen.Hanson@Sun.COM 	 * the devices to be isolated making it appear that an isolated device
82710656SStephen.Hanson@Sun.COM 	 * was in the suspect list.
82810656SStephen.Hanson@Sun.COM 	 */
82910656SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
83010656SStephen.Hanson@Sun.COM 	    fmd_asru_most_recent, &most_recent);
83110656SStephen.Hanson@Sun.COM 	too_recent = (new_cip->ci_tv.tv_sec - most_recent <
83210656SStephen.Hanson@Sun.COM 	    fmd_case_too_recent);
83310656SStephen.Hanson@Sun.COM 
83410656SStephen.Hanson@Sun.COM 	if (got_faulty_overlap) {
83510656SStephen.Hanson@Sun.COM 		/*
83610656SStephen.Hanson@Sun.COM 		 * Acquit any suspects not present in the new list, plus
83710656SStephen.Hanson@Sun.COM 		 * any that are are present but are isolated.
83810656SStephen.Hanson@Sun.COM 		 */
83910656SStephen.Hanson@Sun.COM 		fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
84010656SStephen.Hanson@Sun.COM 		    fmd_case_acquit_no_match, &fca);
84110656SStephen.Hanson@Sun.COM 		if (fmd_case_auto_acquit_isolated && !too_recent)
84210656SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
84310656SStephen.Hanson@Sun.COM 			    fmd_case_acquit_isolated, &do_update);
84410656SStephen.Hanson@Sun.COM 	} else if (got_acquit_overlap) {
84510656SStephen.Hanson@Sun.COM 		/*
84610656SStephen.Hanson@Sun.COM 		 * Re-fault the acquitted matching suspects and acquit all
84710656SStephen.Hanson@Sun.COM 		 * isolated suspects.
84810656SStephen.Hanson@Sun.COM 		 */
84910656SStephen.Hanson@Sun.COM 		if (fmd_case_auto_acquit_isolated && !too_recent) {
85010656SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
85110656SStephen.Hanson@Sun.COM 			    fmd_case_fault_acquitted_matching, &fca);
85210656SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
85310656SStephen.Hanson@Sun.COM 			    fmd_case_acquit_isolated, &do_update);
8546228Sstephh 		}
85510656SStephen.Hanson@Sun.COM 	} else if (fmd_case_auto_acquit_isolated) {
85610656SStephen.Hanson@Sun.COM 		/*
85710656SStephen.Hanson@Sun.COM 		 * To get here, there must be no faulty or acquitted suspects,
85810656SStephen.Hanson@Sun.COM 		 * but there must be at least one isolated suspect. Just acquit
85910656SStephen.Hanson@Sun.COM 		 * non-matching isolated suspects. If there are no matching
86010656SStephen.Hanson@Sun.COM 		 * isolated suspects, then re-fault all matching suspects.
86110656SStephen.Hanson@Sun.COM 		 */
86210656SStephen.Hanson@Sun.COM 		for (i = 0; i < new_cip->ci_nsuspects; i++)
86310656SStephen.Hanson@Sun.COM 			if (new_susp_state[i] == SUSPECT_STATE_ISOLATED)
86410656SStephen.Hanson@Sun.COM 				got_isolated_overlap = 1;
86510656SStephen.Hanson@Sun.COM 		if (!got_isolated_overlap)
86610656SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
86710656SStephen.Hanson@Sun.COM 			    fmd_case_fault_all_matching, &fca);
86810656SStephen.Hanson@Sun.COM 		fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
86910656SStephen.Hanson@Sun.COM 		    fmd_case_acquit_no_match, &fca);
8706228Sstephh 	}
87110656SStephen.Hanson@Sun.COM 
87210656SStephen.Hanson@Sun.COM 	/*
87310656SStephen.Hanson@Sun.COM 	 * If we've updated anything in the old case, call fmd_case_update()
87410656SStephen.Hanson@Sun.COM 	 */
87510656SStephen.Hanson@Sun.COM 	if (do_update)
87610656SStephen.Hanson@Sun.COM 		fmd_case_update(old_cp);
8776228Sstephh }
8786228Sstephh 
8790Sstevel@tonic-gate /*
8801193Smws  * Convict suspects in a case by applying a conviction policy and updating the
8811193Smws  * resource cache prior to emitting the list.suspect event for the given case.
8821193Smws  * At present, our policy is very simple: convict every suspect in the case.
8831193Smws  * In the future, this policy can be extended and made configurable to permit:
8841193Smws  *
8851193Smws  * - convicting the suspect with the highest FIT rate
8861193Smws  * - convicting the suspect with the cheapest FRU
8871193Smws  * - convicting the suspect with the FRU that is in a depot's inventory
8881193Smws  * - convicting the suspect with the longest lifetime
8891193Smws  *
8901193Smws  * and so forth.  A word to the wise: this problem is significantly harder that
8911193Smws  * it seems at first glance.  Future work should heed the following advice:
8921193Smws  *
8931193Smws  * Hacking the policy into C code here is a very bad idea.  The policy needs to
8941193Smws  * be decided upon very carefully and fundamentally encodes knowledge of what
8951193Smws  * suspect list combinations can be emitted by what diagnosis engines.  As such
8961193Smws  * fmd's code is the wrong location, because that would require fmd itself to
8971193Smws  * be updated for every diagnosis engine change, defeating the entire design.
8981193Smws  * The FMA Event Registry knows the suspect list combinations: policy inputs
8991193Smws  * can be derived from it and used to produce per-module policy configuration.
9001193Smws  *
9011193Smws  * If the policy needs to be dynamic and not statically fixed at either fmd
9021193Smws  * startup or module load time, any implementation of dynamic policy retrieval
9031193Smws  * must employ some kind of caching mechanism or be part of a built-in module.
9041193Smws  * The fmd_case_convict() function is called with locks held inside of fmd and
9051193Smws  * is not a place where unbounded blocking on some inter-process or inter-
9061193Smws  * system communication to another service (e.g. another daemon) can occur.
9070Sstevel@tonic-gate  */
9086228Sstephh static int
fmd_case_convict(fmd_case_t * cp)9091193Smws fmd_case_convict(fmd_case_t *cp)
9101193Smws {
9111193Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
9121193Smws 	fmd_asru_hash_t *ahp = fmd.d_asrus;
91310656SStephen.Hanson@Sun.COM 	int discard_new = 0, i;
9141193Smws 	fmd_case_susp_t *cis;
9156228Sstephh 	fmd_asru_link_t *alp;
91610656SStephen.Hanson@Sun.COM 	uint8_t *new_susp_state;
91710656SStephen.Hanson@Sun.COM 	uint8_t *new_match_state;
91810656SStephen.Hanson@Sun.COM 	int adjust_new = 0;
91910656SStephen.Hanson@Sun.COM 	fccd_t fccd;
92011202SStephen.Hanson@Sun.COM 	fmd_case_impl_t *ncp, **cps, **cpp;
92111202SStephen.Hanson@Sun.COM 	uint_t cpc;
92211202SStephen.Hanson@Sun.COM 	fmd_case_hash_t *chp;
92310656SStephen.Hanson@Sun.COM 
92410656SStephen.Hanson@Sun.COM 	/*
92510656SStephen.Hanson@Sun.COM 	 * First we must see if any matching cases already exist.
92610656SStephen.Hanson@Sun.COM 	 */
92710656SStephen.Hanson@Sun.COM 	new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
92810656SStephen.Hanson@Sun.COM 	for (i = 0; i < cip->ci_nsuspects; i++)
92910656SStephen.Hanson@Sun.COM 		new_susp_state[i] = 0;
93010656SStephen.Hanson@Sun.COM 	new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
93110656SStephen.Hanson@Sun.COM 	for (i = 0; i < cip->ci_nsuspects; i++)
93210656SStephen.Hanson@Sun.COM 		new_match_state[i] = 0;
93310656SStephen.Hanson@Sun.COM 	fccd.fccd_cip = cip;
93410656SStephen.Hanson@Sun.COM 	fccd.fccd_adjust_new = &adjust_new;
93510656SStephen.Hanson@Sun.COM 	fccd.fccd_new_susp_state = new_susp_state;
93610656SStephen.Hanson@Sun.COM 	fccd.fccd_new_match_state = new_match_state;
93710656SStephen.Hanson@Sun.COM 	fccd.fccd_discard_new = &discard_new;
93811202SStephen.Hanson@Sun.COM 
93911202SStephen.Hanson@Sun.COM 	/*
94011202SStephen.Hanson@Sun.COM 	 * Hold all cases
94111202SStephen.Hanson@Sun.COM 	 */
94211202SStephen.Hanson@Sun.COM 	chp = fmd.d_cases;
94311202SStephen.Hanson@Sun.COM 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
94411202SStephen.Hanson@Sun.COM 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
94511202SStephen.Hanson@Sun.COM 	cpc = chp->ch_count;
94611202SStephen.Hanson@Sun.COM 	for (i = 0; i < chp->ch_hashlen; i++)
94711202SStephen.Hanson@Sun.COM 		for (ncp = chp->ch_hash[i]; ncp != NULL; ncp = ncp->ci_next)
94811202SStephen.Hanson@Sun.COM 			*cpp++ = fmd_case_tryhold(ncp);
94911202SStephen.Hanson@Sun.COM 	ASSERT(cpp == cps + cpc);
95011202SStephen.Hanson@Sun.COM 	(void) pthread_rwlock_unlock(&chp->ch_lock);
95111202SStephen.Hanson@Sun.COM 
95211202SStephen.Hanson@Sun.COM 	/*
95311202SStephen.Hanson@Sun.COM 	 * Run fmd_case_check_for_dups() on all cases except the current one.
95411202SStephen.Hanson@Sun.COM 	 */
95511202SStephen.Hanson@Sun.COM 	for (i = 0; i < cpc; i++) {
95611202SStephen.Hanson@Sun.COM 		if (cps[i] != NULL) {
95711202SStephen.Hanson@Sun.COM 			if (cps[i] != (fmd_case_impl_t *)cp)
95811202SStephen.Hanson@Sun.COM 				fmd_case_check_for_dups((fmd_case_t *)cps[i],
95911202SStephen.Hanson@Sun.COM 				    &fccd);
96011202SStephen.Hanson@Sun.COM 			fmd_case_rele((fmd_case_t *)cps[i]);
96111202SStephen.Hanson@Sun.COM 		}
96211202SStephen.Hanson@Sun.COM 	}
96311202SStephen.Hanson@Sun.COM 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
96411202SStephen.Hanson@Sun.COM 
96511202SStephen.Hanson@Sun.COM 	(void) pthread_mutex_lock(&cip->ci_lock);
96611202SStephen.Hanson@Sun.COM 	if (cip->ci_code == NULL)
96711202SStephen.Hanson@Sun.COM 		(void) fmd_case_mkcode(cp);
96811202SStephen.Hanson@Sun.COM 	else if (cip->ci_precanned)
96911202SStephen.Hanson@Sun.COM 		fmd_case_code_hash_insert(fmd.d_cases, cip);
97010656SStephen.Hanson@Sun.COM 
97110656SStephen.Hanson@Sun.COM 	if (discard_new) {
97210656SStephen.Hanson@Sun.COM 		/*
97310656SStephen.Hanson@Sun.COM 		 * We've found an existing case that is a match and it is not
97410656SStephen.Hanson@Sun.COM 		 * already in repaired or resolved state. So we can close this
97510656SStephen.Hanson@Sun.COM 		 * one as a duplicate.
97610656SStephen.Hanson@Sun.COM 		 */
9776228Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
9786228Sstephh 		return (1);
9796228Sstephh 	}
9801193Smws 
9816228Sstephh 	/*
98210656SStephen.Hanson@Sun.COM 	 * Allocate new cache entries
9836228Sstephh 	 */
9841193Smws 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
9856228Sstephh 		if ((alp = fmd_asru_hash_create_entry(ahp,
9866228Sstephh 		    cp, cis->cis_nvl)) == NULL) {
9871193Smws 			fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
9881193Smws 			    "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
9891193Smws 			continue;
9901193Smws 		}
9919120SStephen.Hanson@Sun.COM 		alp->al_flags |= FMD_ASRU_PRESENT;
9929120SStephen.Hanson@Sun.COM 		alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
9937275Sstephh 		(void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
9946228Sstephh 		(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
9951193Smws 	}
9961193Smws 
99710656SStephen.Hanson@Sun.COM 	if (adjust_new) {
99810656SStephen.Hanson@Sun.COM 		int some_suspect = 0, some_not_suspect = 0;
99910656SStephen.Hanson@Sun.COM 
100010656SStephen.Hanson@Sun.COM 		/*
100110656SStephen.Hanson@Sun.COM 		 * There is one or more matching case but they are already in
100210656SStephen.Hanson@Sun.COM 		 * repaired or resolved state. So we need to keep the new
100310656SStephen.Hanson@Sun.COM 		 * case, but we can adjust it. Repaired/removed/replaced
100410656SStephen.Hanson@Sun.COM 		 * suspects are unlikely to be to blame (unless there are
100510656SStephen.Hanson@Sun.COM 		 * actually two separate faults). So if we have a combination of
100610656SStephen.Hanson@Sun.COM 		 * repaired/replaced/removed suspects and acquitted suspects in
100710656SStephen.Hanson@Sun.COM 		 * the old lists, then we should acquit in the new list those
100810656SStephen.Hanson@Sun.COM 		 * that were repaired/replaced/removed in the old.
100910656SStephen.Hanson@Sun.COM 		 */
101010656SStephen.Hanson@Sun.COM 		for (i = 0; i < cip->ci_nsuspects; i++) {
101110656SStephen.Hanson@Sun.COM 			if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) ||
101210656SStephen.Hanson@Sun.COM 			    (new_susp_state[i] & SUSPECT_STATE_REPAIRED) ||
101310656SStephen.Hanson@Sun.COM 			    (new_susp_state[i] & SUSPECT_STATE_REMOVED) ||
101410656SStephen.Hanson@Sun.COM 			    (new_match_state[i] & SUSPECT_STATE_NO_MATCH))
101510656SStephen.Hanson@Sun.COM 				some_not_suspect = 1;
101610656SStephen.Hanson@Sun.COM 			else
101710656SStephen.Hanson@Sun.COM 				some_suspect = 1;
101810656SStephen.Hanson@Sun.COM 		}
101910656SStephen.Hanson@Sun.COM 		if (some_suspect && some_not_suspect) {
102010656SStephen.Hanson@Sun.COM 			for (cis = cip->ci_suspects, i = 0; cis != NULL;
102110656SStephen.Hanson@Sun.COM 			    cis = cis->cis_next, i++)
102210656SStephen.Hanson@Sun.COM 				if ((new_susp_state[i] &
102310656SStephen.Hanson@Sun.COM 				    SUSPECT_STATE_REPLACED) ||
102410656SStephen.Hanson@Sun.COM 				    (new_susp_state[i] &
102510656SStephen.Hanson@Sun.COM 				    SUSPECT_STATE_REPAIRED) ||
102610656SStephen.Hanson@Sun.COM 				    (new_susp_state[i] &
102710656SStephen.Hanson@Sun.COM 				    SUSPECT_STATE_REMOVED) ||
102810656SStephen.Hanson@Sun.COM 				    (new_match_state[i] &
102910656SStephen.Hanson@Sun.COM 				    SUSPECT_STATE_NO_MATCH))
103010656SStephen.Hanson@Sun.COM 					fmd_asru_hash_apply_by_case(fmd.d_asrus,
103110656SStephen.Hanson@Sun.COM 					    cp, fmd_case_acquit_suspect,
103210656SStephen.Hanson@Sun.COM 					    cis->cis_nvl);
103310656SStephen.Hanson@Sun.COM 		}
103410656SStephen.Hanson@Sun.COM 	}
103510656SStephen.Hanson@Sun.COM 
10361193Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
10376228Sstephh 	return (0);
10381193Smws }
10391193Smws 
10401193Smws void
fmd_case_publish(fmd_case_t * cp,uint_t state)10410Sstevel@tonic-gate fmd_case_publish(fmd_case_t *cp, uint_t state)
10420Sstevel@tonic-gate {
10430Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
10440Sstevel@tonic-gate 	fmd_event_t *e;
10450Sstevel@tonic-gate 	nvlist_t *nvl;
10460Sstevel@tonic-gate 	char *class;
10470Sstevel@tonic-gate 
10481193Smws 	if (state == FMD_CASE_CURRENT)
10491193Smws 		state = cip->ci_state; /* use current state */
10501193Smws 
10510Sstevel@tonic-gate 	switch (state) {
10520Sstevel@tonic-gate 	case FMD_CASE_SOLVED:
10535255Sstephh 		(void) pthread_mutex_lock(&cip->ci_lock);
10547913SStephen.Hanson@Sun.COM 
10557913SStephen.Hanson@Sun.COM 		/*
10567913SStephen.Hanson@Sun.COM 		 * If we already have a code, then case is already solved.
10577913SStephen.Hanson@Sun.COM 		 */
10589120SStephen.Hanson@Sun.COM 		if (cip->ci_precanned == 0 && cip->ci_xprt == NULL &&
10599120SStephen.Hanson@Sun.COM 		    cip->ci_code != NULL) {
10607913SStephen.Hanson@Sun.COM 			(void) pthread_mutex_unlock(&cip->ci_lock);
10617913SStephen.Hanson@Sun.COM 			break;
10627913SStephen.Hanson@Sun.COM 		}
10637913SStephen.Hanson@Sun.COM 
10645255Sstephh 		if (cip->ci_tv_valid == 0) {
10655255Sstephh 			fmd_time_gettimeofday(&cip->ci_tv);
10665255Sstephh 			cip->ci_tv_valid = 1;
10675255Sstephh 		}
10685255Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
10696228Sstephh 
10706228Sstephh 		if (fmd_case_convict(cp) == 1) { /* dupclose */
10716228Sstephh 			cip->ci_flags &= ~FMD_CF_SOLVED;
10726228Sstephh 			fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
10736228Sstephh 			break;
10746228Sstephh 		}
10759120SStephen.Hanson@Sun.COM 		if (cip->ci_xprt != NULL) {
10769120SStephen.Hanson@Sun.COM 			/*
10779120SStephen.Hanson@Sun.COM 			 * For proxy, save some information about the transport
10789120SStephen.Hanson@Sun.COM 			 * in the resource cache.
10799120SStephen.Hanson@Sun.COM 			 */
10809120SStephen.Hanson@Sun.COM 			int count = 0;
10819120SStephen.Hanson@Sun.COM 			fmd_asru_set_on_proxy_t fasp;
10829120SStephen.Hanson@Sun.COM 			fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt;
10839120SStephen.Hanson@Sun.COM 
10849120SStephen.Hanson@Sun.COM 			fasp.fasp_countp = &count;
10859120SStephen.Hanson@Sun.COM 			fasp.fasp_maxcount = cip->ci_nsuspects;
10869120SStephen.Hanson@Sun.COM 			fasp.fasp_proxy_asru = cip->ci_proxy_asru;
10879120SStephen.Hanson@Sun.COM 			fasp.fasp_proxy_external = xip->xi_flags &
10889120SStephen.Hanson@Sun.COM 			    FMD_XPRT_EXTERNAL;
10899120SStephen.Hanson@Sun.COM 			fasp.fasp_proxy_rdonly = ((xip->xi_flags &
10909120SStephen.Hanson@Sun.COM 			    FMD_XPRT_RDWR) == FMD_XPRT_RDONLY);
10919120SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
10929120SStephen.Hanson@Sun.COM 			    fmd_asru_set_on_proxy, &fasp);
10939120SStephen.Hanson@Sun.COM 		}
10941193Smws 		nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
10950Sstevel@tonic-gate 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
10980Sstevel@tonic-gate 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
10990Sstevel@tonic-gate 		fmd_log_append(fmd.d_fltlog, e, cp);
11000Sstevel@tonic-gate 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
11010Sstevel@tonic-gate 		fmd_dispq_dispatch(fmd.d_disp, e, class);
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
11040Sstevel@tonic-gate 		cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
11050Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 		break;
11080Sstevel@tonic-gate 
11091193Smws 	case FMD_CASE_CLOSE_WAIT:
11100Sstevel@tonic-gate 		fmd_case_hold(cp);
11110Sstevel@tonic-gate 		e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
11120Sstevel@tonic-gate 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
11150Sstevel@tonic-gate 		cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
11160Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
11170Sstevel@tonic-gate 
11180Sstevel@tonic-gate 		break;
11191193Smws 
11201193Smws 	case FMD_CASE_CLOSED:
11211193Smws 		nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
11221193Smws 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
11231193Smws 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
11241193Smws 		fmd_dispq_dispatch(fmd.d_disp, e, class);
11251193Smws 		break;
11260Sstevel@tonic-gate 
11271193Smws 	case FMD_CASE_REPAIRED:
11281193Smws 		nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
11291193Smws 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
11301193Smws 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
11316276Scy152378 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
11326276Scy152378 		fmd_log_append(fmd.d_fltlog, e, cp);
11336276Scy152378 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
11341193Smws 		fmd_dispq_dispatch(fmd.d_disp, e, class);
11351193Smws 		break;
11367275Sstephh 
11377275Sstephh 	case FMD_CASE_RESOLVED:
11387275Sstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
11397275Sstephh 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
11407275Sstephh 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
11417275Sstephh 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
11427275Sstephh 		fmd_log_append(fmd.d_fltlog, e, cp);
11437275Sstephh 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
11447275Sstephh 		fmd_dispq_dispatch(fmd.d_disp, e, class);
11457275Sstephh 		break;
11460Sstevel@tonic-gate 	}
11470Sstevel@tonic-gate }
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate fmd_case_t *
fmd_case_hash_lookup(fmd_case_hash_t * chp,const char * uuid)11500Sstevel@tonic-gate fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
11510Sstevel@tonic-gate {
11520Sstevel@tonic-gate 	fmd_case_impl_t *cip;
11530Sstevel@tonic-gate 	uint_t h;
11540Sstevel@tonic-gate 
11550Sstevel@tonic-gate 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
11560Sstevel@tonic-gate 	h = fmd_strhash(uuid) % chp->ch_hashlen;
11570Sstevel@tonic-gate 
11580Sstevel@tonic-gate 	for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
11590Sstevel@tonic-gate 		if (strcmp(cip->ci_uuid, uuid) == 0)
11600Sstevel@tonic-gate 			break;
11610Sstevel@tonic-gate 	}
11620Sstevel@tonic-gate 
11636081Scy152378 	/*
11646081Scy152378 	 * If deleting bit is set, treat the case as if it doesn't exist.
11656081Scy152378 	 */
11660Sstevel@tonic-gate 	if (cip != NULL)
11676081Scy152378 		cip = fmd_case_tryhold(cip);
11686081Scy152378 
11696081Scy152378 	if (cip == NULL)
11700Sstevel@tonic-gate 		(void) fmd_set_errno(EFMD_CASE_INVAL);
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate 	(void) pthread_rwlock_unlock(&chp->ch_lock);
11730Sstevel@tonic-gate 	return ((fmd_case_t *)cip);
11740Sstevel@tonic-gate }
11750Sstevel@tonic-gate 
11760Sstevel@tonic-gate static fmd_case_impl_t *
fmd_case_hash_insert(fmd_case_hash_t * chp,fmd_case_impl_t * cip)11770Sstevel@tonic-gate fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
11780Sstevel@tonic-gate {
11790Sstevel@tonic-gate 	fmd_case_impl_t *eip;
11800Sstevel@tonic-gate 	uint_t h;
11810Sstevel@tonic-gate 
11820Sstevel@tonic-gate 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
11830Sstevel@tonic-gate 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 	for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
11866081Scy152378 		if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 &&
11876081Scy152378 		    fmd_case_tryhold(eip) != NULL) {
11880Sstevel@tonic-gate 			(void) pthread_rwlock_unlock(&chp->ch_lock);
11891193Smws 			return (eip); /* uuid already present */
11900Sstevel@tonic-gate 		}
11910Sstevel@tonic-gate 	}
11920Sstevel@tonic-gate 
11930Sstevel@tonic-gate 	cip->ci_next = chp->ch_hash[h];
11940Sstevel@tonic-gate 	chp->ch_hash[h] = cip;
11950Sstevel@tonic-gate 
11961193Smws 	chp->ch_count++;
11971193Smws 	ASSERT(chp->ch_count != 0);
11981193Smws 
11990Sstevel@tonic-gate 	(void) pthread_rwlock_unlock(&chp->ch_lock);
12000Sstevel@tonic-gate 	return (cip);
12010Sstevel@tonic-gate }
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate static void
fmd_case_hash_delete(fmd_case_hash_t * chp,fmd_case_impl_t * cip)12040Sstevel@tonic-gate fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
12050Sstevel@tonic-gate {
12060Sstevel@tonic-gate 	fmd_case_impl_t *cp, **pp;
12070Sstevel@tonic-gate 	uint_t h;
12080Sstevel@tonic-gate 
12096081Scy152378 	ASSERT(MUTEX_HELD(&cip->ci_lock));
12106081Scy152378 
12116081Scy152378 	cip->ci_flags |= FMD_CF_DELETING;
12126081Scy152378 	(void) pthread_mutex_unlock(&cip->ci_lock);
12136081Scy152378 
12140Sstevel@tonic-gate 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
12150Sstevel@tonic-gate 
12160Sstevel@tonic-gate 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
12170Sstevel@tonic-gate 	pp = &chp->ch_hash[h];
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate 	for (cp = *pp; cp != NULL; cp = cp->ci_next) {
12200Sstevel@tonic-gate 		if (cp != cip)
12210Sstevel@tonic-gate 			pp = &cp->ci_next;
12220Sstevel@tonic-gate 		else
12230Sstevel@tonic-gate 			break;
12240Sstevel@tonic-gate 	}
12250Sstevel@tonic-gate 
12260Sstevel@tonic-gate 	if (cp == NULL) {
12270Sstevel@tonic-gate 		fmd_panic("case %p (%s) not found on hash chain %u\n",
12280Sstevel@tonic-gate 		    (void *)cip, cip->ci_uuid, h);
12290Sstevel@tonic-gate 	}
12300Sstevel@tonic-gate 
12310Sstevel@tonic-gate 	*pp = cp->ci_next;
12320Sstevel@tonic-gate 	cp->ci_next = NULL;
12330Sstevel@tonic-gate 
12346228Sstephh 	/*
12356228Sstephh 	 * delete from code hash if it is on it
12366228Sstephh 	 */
12376228Sstephh 	fmd_case_code_hash_delete(chp, cip);
12386228Sstephh 
12391193Smws 	ASSERT(chp->ch_count != 0);
12401193Smws 	chp->ch_count--;
12411193Smws 
12420Sstevel@tonic-gate 	(void) pthread_rwlock_unlock(&chp->ch_lock);
12436081Scy152378 
12446081Scy152378 	(void) pthread_mutex_lock(&cip->ci_lock);
12456081Scy152378 	ASSERT(cip->ci_flags & FMD_CF_DELETING);
12460Sstevel@tonic-gate }
12470Sstevel@tonic-gate 
12480Sstevel@tonic-gate fmd_case_t *
fmd_case_create(fmd_module_t * mp,const char * uuidstr,void * data)1249*12967Sgavin.maltby@oracle.com fmd_case_create(fmd_module_t *mp, const char *uuidstr, void *data)
12500Sstevel@tonic-gate {
12510Sstevel@tonic-gate 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
12521193Smws 	fmd_case_impl_t *eip = NULL;
12530Sstevel@tonic-gate 	uuid_t uuid;
12540Sstevel@tonic-gate 
12550Sstevel@tonic-gate 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
12560Sstevel@tonic-gate 	fmd_buf_hash_create(&cip->ci_bufs);
12570Sstevel@tonic-gate 
12580Sstevel@tonic-gate 	fmd_module_hold(mp);
12590Sstevel@tonic-gate 	cip->ci_mod = mp;
12600Sstevel@tonic-gate 	cip->ci_refs = 1;
12610Sstevel@tonic-gate 	cip->ci_state = FMD_CASE_UNSOLVED;
12620Sstevel@tonic-gate 	cip->ci_flags = FMD_CF_DIRTY;
12630Sstevel@tonic-gate 	cip->ci_data = data;
12640Sstevel@tonic-gate 
12650Sstevel@tonic-gate 	/*
12660Sstevel@tonic-gate 	 * Calling libuuid: get a clue.  The library interfaces cleverly do not
12670Sstevel@tonic-gate 	 * define any constant for the length of an unparse string, and do not
12680Sstevel@tonic-gate 	 * permit the caller to specify a buffer length for safety.  The spec
12690Sstevel@tonic-gate 	 * says it will be 36 bytes, but we make it tunable just in case.
12700Sstevel@tonic-gate 	 */
12710Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
12720Sstevel@tonic-gate 	cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
12730Sstevel@tonic-gate 
1274*12967Sgavin.maltby@oracle.com 	if (uuidstr == NULL) {
1275*12967Sgavin.maltby@oracle.com 		/*
1276*12967Sgavin.maltby@oracle.com 		 * We expect this loop to execute only once, but code it
1277*12967Sgavin.maltby@oracle.com 		 * defensively against the possibility of libuuid bugs.
1278*12967Sgavin.maltby@oracle.com 		 * Keep generating uuids and attempting to do a hash insert
1279*12967Sgavin.maltby@oracle.com 		 * until we get a unique one.
1280*12967Sgavin.maltby@oracle.com 		 */
1281*12967Sgavin.maltby@oracle.com 		do {
1282*12967Sgavin.maltby@oracle.com 			if (eip != NULL)
1283*12967Sgavin.maltby@oracle.com 				fmd_case_rele((fmd_case_t *)eip);
1284*12967Sgavin.maltby@oracle.com 			uuid_generate(uuid);
1285*12967Sgavin.maltby@oracle.com 			uuid_unparse(uuid, cip->ci_uuid);
1286*12967Sgavin.maltby@oracle.com 		} while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
1287*12967Sgavin.maltby@oracle.com 	} else {
1288*12967Sgavin.maltby@oracle.com 		/*
1289*12967Sgavin.maltby@oracle.com 		 * If a uuid was specified we must succeed with that uuid,
1290*12967Sgavin.maltby@oracle.com 		 * or return NULL indicating a case with that uuid already
1291*12967Sgavin.maltby@oracle.com 		 * exists.
1292*12967Sgavin.maltby@oracle.com 		 */
1293*12967Sgavin.maltby@oracle.com 		(void) strncpy(cip->ci_uuid, uuidstr, cip->ci_uuidlen + 1);
1294*12967Sgavin.maltby@oracle.com 		if (fmd_case_hash_insert(fmd.d_cases, cip) != cip) {
1295*12967Sgavin.maltby@oracle.com 			fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1296*12967Sgavin.maltby@oracle.com 			(void) fmd_buf_hash_destroy(&cip->ci_bufs);
1297*12967Sgavin.maltby@oracle.com 			fmd_module_rele(mp);
1298*12967Sgavin.maltby@oracle.com 			pthread_mutex_destroy(&cip->ci_lock);
1299*12967Sgavin.maltby@oracle.com 			fmd_free(cip, sizeof (*cip));
1300*12967Sgavin.maltby@oracle.com 			return (NULL);
1301*12967Sgavin.maltby@oracle.com 		}
1302*12967Sgavin.maltby@oracle.com 	}
13030Sstevel@tonic-gate 
13040Sstevel@tonic-gate 	ASSERT(fmd_module_locked(mp));
13050Sstevel@tonic-gate 	fmd_list_append(&mp->mod_cases, cip);
13060Sstevel@tonic-gate 	fmd_module_setcdirty(mp);
13070Sstevel@tonic-gate 
13080Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
13090Sstevel@tonic-gate 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
13100Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate 	return ((fmd_case_t *)cip);
13130Sstevel@tonic-gate }
13140Sstevel@tonic-gate 
13151552Smws static void
fmd_case_destroy_suspects(fmd_case_impl_t * cip)13161552Smws fmd_case_destroy_suspects(fmd_case_impl_t *cip)
13171552Smws {
13181552Smws 	fmd_case_susp_t *cis, *ncis;
13191552Smws 
13201552Smws 	ASSERT(MUTEX_HELD(&cip->ci_lock));
13211552Smws 
13229120SStephen.Hanson@Sun.COM 	if (cip->ci_proxy_asru)
13239120SStephen.Hanson@Sun.COM 		fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) *
13249120SStephen.Hanson@Sun.COM 		    cip->ci_nsuspects);
13259120SStephen.Hanson@Sun.COM 	if (cip->ci_diag_de)
13269120SStephen.Hanson@Sun.COM 		nvlist_free(cip->ci_diag_de);
13279120SStephen.Hanson@Sun.COM 	if (cip->ci_diag_asru)
13289120SStephen.Hanson@Sun.COM 		fmd_free(cip->ci_diag_asru, sizeof (uint8_t) *
13299120SStephen.Hanson@Sun.COM 		    cip->ci_nsuspects);
13309120SStephen.Hanson@Sun.COM 
13311552Smws 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
13321552Smws 		ncis = cis->cis_next;
13331552Smws 		nvlist_free(cis->cis_nvl);
13341552Smws 		fmd_free(cis, sizeof (fmd_case_susp_t));
13351552Smws 	}
13361552Smws 
13371552Smws 	cip->ci_suspects = NULL;
13381552Smws 	cip->ci_nsuspects = 0;
13391552Smws }
13401552Smws 
13410Sstevel@tonic-gate fmd_case_t *
fmd_case_recreate(fmd_module_t * mp,fmd_xprt_t * xp,uint_t state,const char * uuid,const char * code)13421193Smws fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
13431193Smws     uint_t state, const char *uuid, const char *code)
13440Sstevel@tonic-gate {
13450Sstevel@tonic-gate 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
13461193Smws 	fmd_case_impl_t *eip;
13471193Smws 
13480Sstevel@tonic-gate 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
13490Sstevel@tonic-gate 	fmd_buf_hash_create(&cip->ci_bufs);
13500Sstevel@tonic-gate 
13510Sstevel@tonic-gate 	fmd_module_hold(mp);
13520Sstevel@tonic-gate 	cip->ci_mod = mp;
13531193Smws 	cip->ci_xprt = xp;
13540Sstevel@tonic-gate 	cip->ci_refs = 1;
13551193Smws 	cip->ci_state = state;
13560Sstevel@tonic-gate 	cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
13570Sstevel@tonic-gate 	cip->ci_uuidlen = strlen(cip->ci_uuid);
13581193Smws 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
13591193Smws 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
13601193Smws 
13611193Smws 	if (state > FMD_CASE_CLOSE_WAIT)
13621193Smws 		cip->ci_flags |= FMD_CF_SOLVED;
13631193Smws 
13641193Smws 	/*
13651193Smws 	 * Insert the case into the global case hash.  If the specified UUID is
13661193Smws 	 * already present, check to see if it is an orphan: if so, reclaim it;
13671193Smws 	 * otherwise if it is owned by a different module then return NULL.
13681193Smws 	 */
13691193Smws 	if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
13701193Smws 		(void) pthread_mutex_lock(&cip->ci_lock);
13711193Smws 		cip->ci_refs--; /* decrement to zero */
13721193Smws 		fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
13731193Smws 
13741193Smws 		cip = eip; /* switch 'cip' to the existing case */
13751193Smws 		(void) pthread_mutex_lock(&cip->ci_lock);
13761193Smws 
13771193Smws 		/*
13781193Smws 		 * If the ASRU cache is trying to recreate an orphan, then just
13791193Smws 		 * return the existing case that we found without changing it.
13801193Smws 		 */
13811193Smws 		if (mp == fmd.d_rmod) {
13827275Sstephh 			/*
13839120SStephen.Hanson@Sun.COM 			 * In case the case has already been created from
13849120SStephen.Hanson@Sun.COM 			 * a checkpoint file we need to set up code now.
13859120SStephen.Hanson@Sun.COM 			 */
13869120SStephen.Hanson@Sun.COM 			if (cip->ci_state < FMD_CASE_CLOSED) {
13879120SStephen.Hanson@Sun.COM 				if (code != NULL && cip->ci_code == NULL) {
13889120SStephen.Hanson@Sun.COM 					cip->ci_code = fmd_strdup(code,
13899120SStephen.Hanson@Sun.COM 					    FMD_SLEEP);
13909120SStephen.Hanson@Sun.COM 					cip->ci_codelen = cip->ci_code ?
13919120SStephen.Hanson@Sun.COM 					    strlen(cip->ci_code) + 1 : 0;
13929120SStephen.Hanson@Sun.COM 					fmd_case_code_hash_insert(fmd.d_cases,
13939120SStephen.Hanson@Sun.COM 					    cip);
13949120SStephen.Hanson@Sun.COM 				}
13959120SStephen.Hanson@Sun.COM 			}
13969120SStephen.Hanson@Sun.COM 
13979120SStephen.Hanson@Sun.COM 			/*
13987275Sstephh 			 * When recreating an orphan case, state passed in may
139910656SStephen.Hanson@Sun.COM 			 * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
14007275Sstephh 			 * any suspects are still CLOSED (faulty) then the
14017275Sstephh 			 * overall state needs to be CLOSED.
14027275Sstephh 			 */
140310656SStephen.Hanson@Sun.COM 			if ((cip->ci_state == FMD_CASE_REPAIRED ||
140410656SStephen.Hanson@Sun.COM 			    cip->ci_state == FMD_CASE_RESOLVED) &&
14059120SStephen.Hanson@Sun.COM 			    state == FMD_CASE_CLOSED)
14067275Sstephh 				cip->ci_state = FMD_CASE_CLOSED;
14071193Smws 			(void) pthread_mutex_unlock(&cip->ci_lock);
14081193Smws 			fmd_case_rele((fmd_case_t *)cip);
14091193Smws 			return ((fmd_case_t *)cip);
14101193Smws 		}
14111193Smws 
14121193Smws 		/*
14131193Smws 		 * If the existing case isn't an orphan or is being proxied,
14141193Smws 		 * then we have a UUID conflict: return failure to the caller.
14151193Smws 		 */
14161193Smws 		if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
14171193Smws 			(void) pthread_mutex_unlock(&cip->ci_lock);
14181193Smws 			fmd_case_rele((fmd_case_t *)cip);
14191193Smws 			return (NULL);
14201193Smws 		}
14211193Smws 
14221193Smws 		/*
14231193Smws 		 * If the new module is reclaiming an orphaned case, remove
14241193Smws 		 * the case from the root module, switch ci_mod, and then fall
14251193Smws 		 * through to adding the case to the new owner module 'mp'.
14261193Smws 		 */
14271193Smws 		fmd_module_lock(cip->ci_mod);
14281193Smws 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
14291193Smws 		fmd_module_unlock(cip->ci_mod);
14301193Smws 
14311193Smws 		fmd_module_rele(cip->ci_mod);
14321193Smws 		cip->ci_mod = mp;
14331193Smws 		fmd_module_hold(mp);
14341193Smws 
14357913SStephen.Hanson@Sun.COM 		/*
14367913SStephen.Hanson@Sun.COM 		 * It's possible that fmd crashed or was restarted during a
14377913SStephen.Hanson@Sun.COM 		 * previous solve operation between the asru cache being created
14387913SStephen.Hanson@Sun.COM 		 * and the ckpt file being updated to SOLVED. Thus when the DE
14397913SStephen.Hanson@Sun.COM 		 * recreates the case here from the checkpoint file, the state
14407913SStephen.Hanson@Sun.COM 		 * will be UNSOLVED and yet we are having to reclaim because
14417913SStephen.Hanson@Sun.COM 		 * the case was in the asru cache. If this happens, revert the
14427913SStephen.Hanson@Sun.COM 		 * case back to the UNSOLVED state and let the DE solve it again
14437913SStephen.Hanson@Sun.COM 		 */
14447913SStephen.Hanson@Sun.COM 		if (state == FMD_CASE_UNSOLVED) {
14457913SStephen.Hanson@Sun.COM 			fmd_asru_hash_delete_case(fmd.d_asrus,
14467913SStephen.Hanson@Sun.COM 			    (fmd_case_t *)cip);
14477913SStephen.Hanson@Sun.COM 			fmd_case_destroy_suspects(cip);
14487913SStephen.Hanson@Sun.COM 			fmd_case_code_hash_delete(fmd.d_cases, cip);
14497913SStephen.Hanson@Sun.COM 			fmd_free(cip->ci_code, cip->ci_codelen);
14507913SStephen.Hanson@Sun.COM 			cip->ci_code = NULL;
14517913SStephen.Hanson@Sun.COM 			cip->ci_codelen = 0;
14527913SStephen.Hanson@Sun.COM 			cip->ci_tv_valid = 0;
14537913SStephen.Hanson@Sun.COM 		}
14547913SStephen.Hanson@Sun.COM 
14551552Smws 		cip->ci_state = state;
14561552Smws 
14571193Smws 		(void) pthread_mutex_unlock(&cip->ci_lock);
14581193Smws 		fmd_case_rele((fmd_case_t *)cip);
14596228Sstephh 	} else {
14606228Sstephh 		/*
14616228Sstephh 		 * add into hash of solved cases
14626228Sstephh 		 */
14636228Sstephh 		if (cip->ci_code)
14646228Sstephh 			fmd_case_code_hash_insert(fmd.d_cases, cip);
14651193Smws 	}
14660Sstevel@tonic-gate 
14670Sstevel@tonic-gate 	ASSERT(fmd_module_locked(mp));
14680Sstevel@tonic-gate 	fmd_list_append(&mp->mod_cases, cip);
14690Sstevel@tonic-gate 
14700Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
14710Sstevel@tonic-gate 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
14720Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 	return ((fmd_case_t *)cip);
14750Sstevel@tonic-gate }
14760Sstevel@tonic-gate 
14770Sstevel@tonic-gate void
fmd_case_destroy(fmd_case_t * cp,int visible)14781193Smws fmd_case_destroy(fmd_case_t *cp, int visible)
14790Sstevel@tonic-gate {
14800Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
14810Sstevel@tonic-gate 	fmd_case_item_t *cit, *ncit;
14820Sstevel@tonic-gate 
14830Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cip->ci_lock));
14840Sstevel@tonic-gate 	ASSERT(cip->ci_refs == 0);
14850Sstevel@tonic-gate 
14861193Smws 	if (visible) {
14871193Smws 		TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
14881193Smws 		fmd_case_hash_delete(fmd.d_cases, cip);
14891193Smws 	}
14900Sstevel@tonic-gate 
14910Sstevel@tonic-gate 	for (cit = cip->ci_items; cit != NULL; cit = ncit) {
14920Sstevel@tonic-gate 		ncit = cit->cit_next;
14930Sstevel@tonic-gate 		fmd_event_rele(cit->cit_event);
14940Sstevel@tonic-gate 		fmd_free(cit, sizeof (fmd_case_item_t));
14950Sstevel@tonic-gate 	}
14960Sstevel@tonic-gate 
14971552Smws 	fmd_case_destroy_suspects(cip);
14980Sstevel@tonic-gate 
14990Sstevel@tonic-gate 	if (cip->ci_principal != NULL)
15000Sstevel@tonic-gate 		fmd_event_rele(cip->ci_principal);
15010Sstevel@tonic-gate 
15020Sstevel@tonic-gate 	fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
15031193Smws 	fmd_free(cip->ci_code, cip->ci_codelen);
15041429Smws 	(void) fmd_buf_hash_destroy(&cip->ci_bufs);
15050Sstevel@tonic-gate 
15060Sstevel@tonic-gate 	fmd_module_rele(cip->ci_mod);
15070Sstevel@tonic-gate 	fmd_free(cip, sizeof (fmd_case_impl_t));
15080Sstevel@tonic-gate }
15090Sstevel@tonic-gate 
15100Sstevel@tonic-gate void
fmd_case_hold(fmd_case_t * cp)15110Sstevel@tonic-gate fmd_case_hold(fmd_case_t *cp)
15120Sstevel@tonic-gate {
15130Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15140Sstevel@tonic-gate 
15150Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
15166081Scy152378 	fmd_case_hold_locked(cp);
15170Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
15180Sstevel@tonic-gate }
15190Sstevel@tonic-gate 
15200Sstevel@tonic-gate void
fmd_case_hold_locked(fmd_case_t * cp)15211193Smws fmd_case_hold_locked(fmd_case_t *cp)
15221193Smws {
15231193Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15241193Smws 
15251193Smws 	ASSERT(MUTEX_HELD(&cip->ci_lock));
15266081Scy152378 	if (cip->ci_flags & FMD_CF_DELETING)
15276081Scy152378 		fmd_panic("attempt to hold a deleting case %p (%s)\n",
15286081Scy152378 		    (void *)cip, cip->ci_uuid);
15291193Smws 	cip->ci_refs++;
15301193Smws 	ASSERT(cip->ci_refs != 0);
15311193Smws }
15321193Smws 
15336081Scy152378 static fmd_case_impl_t *
fmd_case_tryhold(fmd_case_impl_t * cip)15346081Scy152378 fmd_case_tryhold(fmd_case_impl_t *cip)
15356081Scy152378 {
15366081Scy152378 	/*
15376081Scy152378 	 * If the case's "deleting" bit is unset, hold and return case,
15386081Scy152378 	 * otherwise, return NULL.
15396081Scy152378 	 */
15406081Scy152378 	(void) pthread_mutex_lock(&cip->ci_lock);
15416081Scy152378 	if (cip->ci_flags & FMD_CF_DELETING) {
15426081Scy152378 		(void) pthread_mutex_unlock(&cip->ci_lock);
15436081Scy152378 		cip = NULL;
15446081Scy152378 	} else {
15456081Scy152378 		fmd_case_hold_locked((fmd_case_t *)cip);
15466081Scy152378 		(void) pthread_mutex_unlock(&cip->ci_lock);
15476081Scy152378 	}
15486081Scy152378 	return (cip);
15496081Scy152378 }
15506081Scy152378 
15511193Smws void
fmd_case_rele(fmd_case_t * cp)15520Sstevel@tonic-gate fmd_case_rele(fmd_case_t *cp)
15530Sstevel@tonic-gate {
15540Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15550Sstevel@tonic-gate 
15560Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
15570Sstevel@tonic-gate 	ASSERT(cip->ci_refs != 0);
15580Sstevel@tonic-gate 
15590Sstevel@tonic-gate 	if (--cip->ci_refs == 0)
15601193Smws 		fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
15610Sstevel@tonic-gate 	else
15620Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
15630Sstevel@tonic-gate }
15640Sstevel@tonic-gate 
15656228Sstephh void
fmd_case_rele_locked(fmd_case_t * cp)15666228Sstephh fmd_case_rele_locked(fmd_case_t *cp)
15676228Sstephh {
15686228Sstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15696228Sstephh 
15706228Sstephh 	ASSERT(MUTEX_HELD(&cip->ci_lock));
15716228Sstephh 	--cip->ci_refs;
15726228Sstephh 	ASSERT(cip->ci_refs != 0);
15736228Sstephh }
15746228Sstephh 
15751414Scindi int
fmd_case_insert_principal(fmd_case_t * cp,fmd_event_t * ep)15760Sstevel@tonic-gate fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
15770Sstevel@tonic-gate {
15780Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15791414Scindi 	fmd_case_item_t *cit;
15800Sstevel@tonic-gate 	fmd_event_t *oep;
15810Sstevel@tonic-gate 	uint_t state;
15821414Scindi 	int new;
15830Sstevel@tonic-gate 
15840Sstevel@tonic-gate 	fmd_event_hold(ep);
15850Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
15860Sstevel@tonic-gate 
15871193Smws 	if (cip->ci_flags & FMD_CF_SOLVED)
15880Sstevel@tonic-gate 		state = FMD_EVS_DIAGNOSED;
15890Sstevel@tonic-gate 	else
15900Sstevel@tonic-gate 		state = FMD_EVS_ACCEPTED;
15910Sstevel@tonic-gate 
15920Sstevel@tonic-gate 	oep = cip->ci_principal;
15930Sstevel@tonic-gate 	cip->ci_principal = ep;
15940Sstevel@tonic-gate 
15951414Scindi 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
15961414Scindi 		if (cit->cit_event == ep)
15971414Scindi 			break;
15981414Scindi 	}
15991414Scindi 
16000Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
16011414Scindi 	new = cit == NULL && ep != oep;
16021414Scindi 
16030Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
16060Sstevel@tonic-gate 	fmd_event_transition(ep, state);
16070Sstevel@tonic-gate 
16080Sstevel@tonic-gate 	if (oep != NULL)
16090Sstevel@tonic-gate 		fmd_event_rele(oep);
16101414Scindi 
16111414Scindi 	return (new);
16120Sstevel@tonic-gate }
16130Sstevel@tonic-gate 
16141414Scindi int
fmd_case_insert_event(fmd_case_t * cp,fmd_event_t * ep)16150Sstevel@tonic-gate fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
16160Sstevel@tonic-gate {
16170Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16181414Scindi 	fmd_case_item_t *cit;
16190Sstevel@tonic-gate 	uint_t state;
16201414Scindi 	int new;
162110928SStephen.Hanson@Sun.COM 	boolean_t injected;
16221414Scindi 
16231414Scindi 	(void) pthread_mutex_lock(&cip->ci_lock);
16241414Scindi 
16251414Scindi 	if (cip->ci_flags & FMD_CF_SOLVED)
16261414Scindi 		state = FMD_EVS_DIAGNOSED;
16271414Scindi 	else
16281414Scindi 		state = FMD_EVS_ACCEPTED;
16290Sstevel@tonic-gate 
16301414Scindi 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
16311414Scindi 		if (cit->cit_event == ep)
16321414Scindi 			break;
16331414Scindi 	}
16341414Scindi 
16351414Scindi 	new = cit == NULL && ep != cip->ci_principal;
16361414Scindi 
16371414Scindi 	/*
16381414Scindi 	 * If the event is already in the case or the case is already solved,
16391414Scindi 	 * there is no reason to save it: just transition it appropriately.
16401414Scindi 	 */
16411414Scindi 	if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
16421414Scindi 		(void) pthread_mutex_unlock(&cip->ci_lock);
16431414Scindi 		fmd_event_transition(ep, state);
16441414Scindi 		return (new);
16451414Scindi 	}
16461414Scindi 
16471414Scindi 	cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
16480Sstevel@tonic-gate 	fmd_event_hold(ep);
16490Sstevel@tonic-gate 
165010928SStephen.Hanson@Sun.COM 	if (nvlist_lookup_boolean_value(((fmd_event_impl_t *)ep)->ev_nvl,
165110928SStephen.Hanson@Sun.COM 	    "__injected", &injected) == 0 && injected)
165210928SStephen.Hanson@Sun.COM 		fmd_case_set_injected(cp);
165310928SStephen.Hanson@Sun.COM 
16540Sstevel@tonic-gate 	cit->cit_next = cip->ci_items;
16550Sstevel@tonic-gate 	cit->cit_event = ep;
16560Sstevel@tonic-gate 
16570Sstevel@tonic-gate 	cip->ci_items = cit;
16580Sstevel@tonic-gate 	cip->ci_nitems++;
16590Sstevel@tonic-gate 
16600Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
16610Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
16640Sstevel@tonic-gate 	fmd_event_transition(ep, state);
16651414Scindi 
16661414Scindi 	return (new);
16670Sstevel@tonic-gate }
16680Sstevel@tonic-gate 
16690Sstevel@tonic-gate void
fmd_case_insert_suspect(fmd_case_t * cp,nvlist_t * nvl)16700Sstevel@tonic-gate fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
16710Sstevel@tonic-gate {
16720Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16730Sstevel@tonic-gate 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
16740Sstevel@tonic-gate 
16750Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
16766228Sstephh 	ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT);
16770Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
16780Sstevel@tonic-gate 
16790Sstevel@tonic-gate 	cis->cis_next = cip->ci_suspects;
16800Sstevel@tonic-gate 	cis->cis_nvl = nvl;
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 	cip->ci_suspects = cis;
16830Sstevel@tonic-gate 	cip->ci_nsuspects++;
16840Sstevel@tonic-gate 
16850Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
16869120SStephen.Hanson@Sun.COM 	if (cip->ci_xprt == NULL)
16879120SStephen.Hanson@Sun.COM 		fmd_module_setcdirty(cip->ci_mod);
16880Sstevel@tonic-gate }
16890Sstevel@tonic-gate 
16900Sstevel@tonic-gate void
fmd_case_recreate_suspect(fmd_case_t * cp,nvlist_t * nvl)16911193Smws fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
16921193Smws {
16931193Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16941193Smws 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
16955255Sstephh 	boolean_t b;
16961193Smws 
16971193Smws 	(void) pthread_mutex_lock(&cip->ci_lock);
16981193Smws 
16991193Smws 	cis->cis_next = cip->ci_suspects;
17001193Smws 	cis->cis_nvl = nvl;
17011193Smws 
17025255Sstephh 	if (nvlist_lookup_boolean_value(nvl,
17035255Sstephh 	    FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
17045255Sstephh 		cip->ci_flags |= FMD_CF_INVISIBLE;
17055255Sstephh 
17061193Smws 	cip->ci_suspects = cis;
17071193Smws 	cip->ci_nsuspects++;
17081193Smws 
17091193Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
17101193Smws }
17111193Smws 
17121193Smws void
fmd_case_reset_suspects(fmd_case_t * cp)17130Sstevel@tonic-gate fmd_case_reset_suspects(fmd_case_t *cp)
17140Sstevel@tonic-gate {
17150Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
17160Sstevel@tonic-gate 
17170Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
17180Sstevel@tonic-gate 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
17190Sstevel@tonic-gate 
17201552Smws 	fmd_case_destroy_suspects(cip);
17210Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
17220Sstevel@tonic-gate 
17230Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
17240Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
17250Sstevel@tonic-gate }
17260Sstevel@tonic-gate 
17276228Sstephh /*ARGSUSED*/
17286228Sstephh static void
fmd_case_unusable(fmd_asru_link_t * alp,void * arg)17296228Sstephh fmd_case_unusable(fmd_asru_link_t *alp, void *arg)
17306228Sstephh {
17316228Sstephh 	(void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
17326228Sstephh }
17336228Sstephh 
17341193Smws /*
17351193Smws  * Grab ci_lock and update the case state and set the dirty bit.  Then perform
17361193Smws  * whatever actions and emit whatever events are appropriate for the state.
17371193Smws  * Refer to the topmost block comment explaining the state machine for details.
17381193Smws  */
17390Sstevel@tonic-gate void
fmd_case_transition(fmd_case_t * cp,uint_t state,uint_t flags)17401193Smws fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
17410Sstevel@tonic-gate {
17420Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
17431193Smws 	fmd_case_item_t *cit;
17441193Smws 	fmd_event_t *e;
17457275Sstephh 	int resolved = 0;
17467275Sstephh 	int any_unusable_and_present = 0;
17470Sstevel@tonic-gate 
17487275Sstephh 	ASSERT(state <= FMD_CASE_RESOLVED);
17490Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
17501552Smws 
17515255Sstephh 	if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
17529120SStephen.Hanson@Sun.COM 		flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED);
17531552Smws 
17541193Smws 	cip->ci_flags |= flags;
17550Sstevel@tonic-gate 
17560Sstevel@tonic-gate 	if (cip->ci_state >= state) {
17570Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
17580Sstevel@tonic-gate 		return; /* already in specified state */
17590Sstevel@tonic-gate 	}
17600Sstevel@tonic-gate 
17610Sstevel@tonic-gate 	TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
17620Sstevel@tonic-gate 	    _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
17630Sstevel@tonic-gate 
17640Sstevel@tonic-gate 	cip->ci_state = state;
17650Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
17660Sstevel@tonic-gate 
17671193Smws 	if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
17681193Smws 		fmd_module_setcdirty(cip->ci_mod);
17690Sstevel@tonic-gate 
17701193Smws 	switch (state) {
17711193Smws 	case FMD_CASE_SOLVED:
17720Sstevel@tonic-gate 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
17730Sstevel@tonic-gate 			fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
17740Sstevel@tonic-gate 
17750Sstevel@tonic-gate 		if (cip->ci_principal != NULL) {
17760Sstevel@tonic-gate 			fmd_event_transition(cip->ci_principal,
17770Sstevel@tonic-gate 			    FMD_EVS_DIAGNOSED);
17780Sstevel@tonic-gate 		}
17790Sstevel@tonic-gate 		break;
17800Sstevel@tonic-gate 
17811193Smws 	case FMD_CASE_CLOSE_WAIT:
17821193Smws 		/*
17831193Smws 		 * If the case was never solved, do not change ASRUs.
17841193Smws 		 * If the case was never fmd_case_closed, do not change ASRUs.
17851193Smws 		 * If the case was repaired, do not change ASRUs.
17861193Smws 		 */
17871193Smws 		if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
17886228Sstephh 		    FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED))
17896228Sstephh 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
17906228Sstephh 			    fmd_case_unusable, NULL);
17910Sstevel@tonic-gate 
17920Sstevel@tonic-gate 		/*
17931193Smws 		 * If an orphaned case transitions to CLOSE_WAIT, the owning
179411416SStephen.Hanson@Sun.COM 		 * module is no longer loaded: continue on to CASE_CLOSED or
179511416SStephen.Hanson@Sun.COM 		 * CASE_REPAIRED as appropriate.
17961193Smws 		 */
179711416SStephen.Hanson@Sun.COM 		if (fmd_case_orphaned(cp)) {
179811416SStephen.Hanson@Sun.COM 			if (cip->ci_flags & FMD_CF_REPAIRED) {
179911416SStephen.Hanson@Sun.COM 				state = cip->ci_state = FMD_CASE_REPAIRED;
180011416SStephen.Hanson@Sun.COM 				TRACE((FMD_DBG_CASE, "case %s %s->%s",
180111416SStephen.Hanson@Sun.COM 				    cip->ci_uuid,
180211416SStephen.Hanson@Sun.COM 				    _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
180311416SStephen.Hanson@Sun.COM 				    _fmd_case_snames[FMD_CASE_REPAIRED]));
180411416SStephen.Hanson@Sun.COM 				goto do_repair;
180511416SStephen.Hanson@Sun.COM 			} else {
180611416SStephen.Hanson@Sun.COM 				state = cip->ci_state = FMD_CASE_CLOSED;
180711416SStephen.Hanson@Sun.COM 				TRACE((FMD_DBG_CASE, "case %s %s->%s",
180811416SStephen.Hanson@Sun.COM 				    cip->ci_uuid,
180911416SStephen.Hanson@Sun.COM 				    _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
181011416SStephen.Hanson@Sun.COM 				    _fmd_case_snames[FMD_CASE_CLOSED]));
181111416SStephen.Hanson@Sun.COM 			}
181211416SStephen.Hanson@Sun.COM 		}
18130Sstevel@tonic-gate 		break;
18141193Smws 
18151193Smws 	case FMD_CASE_REPAIRED:
181611416SStephen.Hanson@Sun.COM do_repair:
18179120SStephen.Hanson@Sun.COM 		ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp));
18187275Sstephh 
18197275Sstephh 		/*
18209120SStephen.Hanson@Sun.COM 		 * If we've been requested to transition straight on to the
18219120SStephen.Hanson@Sun.COM 		 * RESOLVED state (which can happen with fault proxying where a
18229120SStephen.Hanson@Sun.COM 		 * list.resolved or a uuresolved is received from the other
18239120SStephen.Hanson@Sun.COM 		 * side), or if all suspects are already either usable or not
18249120SStephen.Hanson@Sun.COM 		 * present then transition straight to RESOLVED state,
18259120SStephen.Hanson@Sun.COM 		 * publishing both the list.repaired and list.resolved. For a
18269120SStephen.Hanson@Sun.COM 		 * proxy, if we discover here that all suspects are already
18279120SStephen.Hanson@Sun.COM 		 * either usable or not present, notify the diag side instead
18289120SStephen.Hanson@Sun.COM 		 * using fmd_xprt_uuresolved().
18297275Sstephh 		 */
18309120SStephen.Hanson@Sun.COM 		if (flags & FMD_CF_RESOLVED) {
183110656SStephen.Hanson@Sun.COM 			if (cip->ci_xprt != NULL)
18329120SStephen.Hanson@Sun.COM 				fmd_list_delete(&cip->ci_mod->mod_cases, cip);
18339120SStephen.Hanson@Sun.COM 		} else {
18349120SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
18359120SStephen.Hanson@Sun.COM 			    fmd_case_unusable_and_present,
18369120SStephen.Hanson@Sun.COM 			    &any_unusable_and_present);
18379120SStephen.Hanson@Sun.COM 			if (any_unusable_and_present)
18389120SStephen.Hanson@Sun.COM 				break;
18399120SStephen.Hanson@Sun.COM 			if (cip->ci_xprt != NULL) {
18409120SStephen.Hanson@Sun.COM 				fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
18419120SStephen.Hanson@Sun.COM 				break;
18429120SStephen.Hanson@Sun.COM 			}
18439120SStephen.Hanson@Sun.COM 		}
18447275Sstephh 
18457275Sstephh 		cip->ci_state = FMD_CASE_RESOLVED;
18467275Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
18477275Sstephh 		fmd_case_publish(cp, state);
18487275Sstephh 		TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
18497275Sstephh 		    _fmd_case_snames[FMD_CASE_REPAIRED],
18507275Sstephh 		    _fmd_case_snames[FMD_CASE_RESOLVED]));
18517275Sstephh 		state = FMD_CASE_RESOLVED;
18527275Sstephh 		resolved = 1;
18537275Sstephh 		(void) pthread_mutex_lock(&cip->ci_lock);
18547275Sstephh 		break;
18557275Sstephh 
18567275Sstephh 	case FMD_CASE_RESOLVED:
18579120SStephen.Hanson@Sun.COM 		/*
18589120SStephen.Hanson@Sun.COM 		 * For a proxy, no need to check that all suspects are already
18599120SStephen.Hanson@Sun.COM 		 * either usable or not present - this request has come from
18609120SStephen.Hanson@Sun.COM 		 * the diagnosing side which makes the final decision on this.
18619120SStephen.Hanson@Sun.COM 		 */
18629120SStephen.Hanson@Sun.COM 		if (cip->ci_xprt != NULL) {
18639120SStephen.Hanson@Sun.COM 			fmd_list_delete(&cip->ci_mod->mod_cases, cip);
18649120SStephen.Hanson@Sun.COM 			resolved = 1;
18659120SStephen.Hanson@Sun.COM 			break;
18669120SStephen.Hanson@Sun.COM 		}
18679120SStephen.Hanson@Sun.COM 
18687275Sstephh 		ASSERT(fmd_case_orphaned(cp));
18697275Sstephh 
18707275Sstephh 		/*
18717275Sstephh 		 * If all suspects are already either usable or not present then
18727275Sstephh 		 * carry on, publish list.resolved and discard the case.
18737275Sstephh 		 */
18747275Sstephh 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
18757275Sstephh 		    fmd_case_unusable_and_present, &any_unusable_and_present);
18767275Sstephh 		if (any_unusable_and_present) {
18777275Sstephh 			(void) pthread_mutex_unlock(&cip->ci_lock);
18787275Sstephh 			return;
18797275Sstephh 		}
18807275Sstephh 
18817275Sstephh 		resolved = 1;
18821193Smws 		break;
18830Sstevel@tonic-gate 	}
18840Sstevel@tonic-gate 
18850Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
18860Sstevel@tonic-gate 
18870Sstevel@tonic-gate 	/*
18881193Smws 	 * If the module has initialized, then publish the appropriate event
18891193Smws 	 * for the new case state.  If not, we are being called from the
18901193Smws 	 * checkpoint code during module load, in which case the module's
18911193Smws 	 * _fmd_init() routine hasn't finished yet, and our event dictionaries
18921193Smws 	 * may not be open yet, which will prevent us from computing the event
18931193Smws 	 * code.  Defer the call to fmd_case_publish() by enqueuing a PUBLISH
18941193Smws 	 * event in our queue: this won't be processed until _fmd_init is done.
18950Sstevel@tonic-gate 	 */
18960Sstevel@tonic-gate 	if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
18970Sstevel@tonic-gate 		fmd_case_publish(cp, state);
18981193Smws 	else {
18991193Smws 		fmd_case_hold(cp);
19001193Smws 		e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
19011193Smws 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
19021193Smws 	}
19031193Smws 
19047275Sstephh 	if (resolved) {
190510656SStephen.Hanson@Sun.COM 		if (cip->ci_xprt != NULL) {
190610656SStephen.Hanson@Sun.COM 			/*
190710656SStephen.Hanson@Sun.COM 			 * If we transitioned to RESOLVED, adjust the reference
190810656SStephen.Hanson@Sun.COM 			 * count to reflect our removal from
190910656SStephen.Hanson@Sun.COM 			 * fmd.d_rmod->mod_cases above.  If the caller has not
191010656SStephen.Hanson@Sun.COM 			 * placed an additional hold on the case, it will now
191110656SStephen.Hanson@Sun.COM 			 * be freed.
191210656SStephen.Hanson@Sun.COM 			 */
191310656SStephen.Hanson@Sun.COM 			(void) pthread_mutex_lock(&cip->ci_lock);
191410656SStephen.Hanson@Sun.COM 			fmd_asru_hash_delete_case(fmd.d_asrus, cp);
191510656SStephen.Hanson@Sun.COM 			(void) pthread_mutex_unlock(&cip->ci_lock);
191610656SStephen.Hanson@Sun.COM 			fmd_case_rele(cp);
191710656SStephen.Hanson@Sun.COM 		} else {
191810656SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
191910656SStephen.Hanson@Sun.COM 			    fmd_asru_log_resolved, NULL);
192010656SStephen.Hanson@Sun.COM 			(void) pthread_mutex_lock(&cip->ci_lock);
192110656SStephen.Hanson@Sun.COM 			/* mark as "ready to be discarded */
192210656SStephen.Hanson@Sun.COM 			cip->ci_flags |= FMD_CF_RES_CMPL;
192310656SStephen.Hanson@Sun.COM 			(void) pthread_mutex_unlock(&cip->ci_lock);
192410656SStephen.Hanson@Sun.COM 		}
192510656SStephen.Hanson@Sun.COM 	}
192610656SStephen.Hanson@Sun.COM }
192710656SStephen.Hanson@Sun.COM 
192810656SStephen.Hanson@Sun.COM /*
192910656SStephen.Hanson@Sun.COM  * Discard any case if it is in RESOLVED state (and if check_if_aged argument
193010656SStephen.Hanson@Sun.COM  * is set if all suspects have passed the rsrc.aged time).
193110656SStephen.Hanson@Sun.COM  */
193210656SStephen.Hanson@Sun.COM void
fmd_case_discard_resolved(fmd_case_t * cp,void * arg)193310656SStephen.Hanson@Sun.COM fmd_case_discard_resolved(fmd_case_t *cp, void *arg)
193410656SStephen.Hanson@Sun.COM {
193510656SStephen.Hanson@Sun.COM 	int check_if_aged = *(int *)arg;
193610656SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
193710656SStephen.Hanson@Sun.COM 
193810656SStephen.Hanson@Sun.COM 	/*
193910656SStephen.Hanson@Sun.COM 	 * First check if case has completed transition to resolved.
194010656SStephen.Hanson@Sun.COM 	 */
194110656SStephen.Hanson@Sun.COM 	(void) pthread_mutex_lock(&cip->ci_lock);
194210656SStephen.Hanson@Sun.COM 	if (!(cip->ci_flags & FMD_CF_RES_CMPL)) {
19436228Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
194410656SStephen.Hanson@Sun.COM 		return;
19456228Sstephh 	}
194610656SStephen.Hanson@Sun.COM 
194710656SStephen.Hanson@Sun.COM 	/*
194810656SStephen.Hanson@Sun.COM 	 * Now if check_is_aged is set, see if all suspects have aged.
194910656SStephen.Hanson@Sun.COM 	 */
195010656SStephen.Hanson@Sun.COM 	if (check_if_aged) {
195110656SStephen.Hanson@Sun.COM 		int aged = 1;
195210656SStephen.Hanson@Sun.COM 
195310656SStephen.Hanson@Sun.COM 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
195410656SStephen.Hanson@Sun.COM 		    fmd_asru_check_if_aged, &aged);
195510656SStephen.Hanson@Sun.COM 		if (!aged) {
195610656SStephen.Hanson@Sun.COM 			(void) pthread_mutex_unlock(&cip->ci_lock);
195710656SStephen.Hanson@Sun.COM 			return;
195810656SStephen.Hanson@Sun.COM 		}
195910656SStephen.Hanson@Sun.COM 	}
196010656SStephen.Hanson@Sun.COM 
196110656SStephen.Hanson@Sun.COM 	/*
196210656SStephen.Hanson@Sun.COM 	 * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
196310656SStephen.Hanson@Sun.COM 	 * do it twice.
196410656SStephen.Hanson@Sun.COM 	 */
196510656SStephen.Hanson@Sun.COM 	fmd_module_lock(cip->ci_mod);
196610656SStephen.Hanson@Sun.COM 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
196710656SStephen.Hanson@Sun.COM 	fmd_module_unlock(cip->ci_mod);
196810656SStephen.Hanson@Sun.COM 	fmd_asru_hash_delete_case(fmd.d_asrus, cp);
196910656SStephen.Hanson@Sun.COM 	cip->ci_flags &= ~FMD_CF_RES_CMPL;
197010656SStephen.Hanson@Sun.COM 	(void) pthread_mutex_unlock(&cip->ci_lock);
197110656SStephen.Hanson@Sun.COM 	fmd_case_rele(cp);
19720Sstevel@tonic-gate }
19730Sstevel@tonic-gate 
19741429Smws /*
19751429Smws  * Transition the specified case to *at least* the specified state by first
19761429Smws  * re-validating the suspect list using the resource cache.  This function is
19771429Smws  * employed by the checkpoint code when restoring a saved, solved case to see
19781429Smws  * if the state of the case has effectively changed while fmd was not running
19797275Sstephh  * or the module was not loaded.
19801429Smws  */
19811429Smws void
fmd_case_transition_update(fmd_case_t * cp,uint_t state,uint_t flags)19821429Smws fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
19831429Smws {
19841429Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
19851429Smws 
19861429Smws 	int usable = 0;		/* are any suspects usable? */
19871429Smws 
19881429Smws 	ASSERT(state >= FMD_CASE_SOLVED);
19891429Smws 	(void) pthread_mutex_lock(&cip->ci_lock);
19901429Smws 
19916228Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
19921429Smws 
19931429Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
19941429Smws 
19957275Sstephh 	if (!usable) {
19961429Smws 		state = MAX(state, FMD_CASE_CLOSE_WAIT);
19971429Smws 		flags |= FMD_CF_ISOLATED;
19981429Smws 	}
19991429Smws 
20001429Smws 	fmd_case_transition(cp, state, flags);
20011429Smws }
20021429Smws 
20030Sstevel@tonic-gate void
fmd_case_setdirty(fmd_case_t * cp)20040Sstevel@tonic-gate fmd_case_setdirty(fmd_case_t *cp)
20050Sstevel@tonic-gate {
20060Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20070Sstevel@tonic-gate 
20080Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
20090Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
20100Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
20110Sstevel@tonic-gate 
20120Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
20130Sstevel@tonic-gate }
20140Sstevel@tonic-gate 
20150Sstevel@tonic-gate void
fmd_case_clrdirty(fmd_case_t * cp)20160Sstevel@tonic-gate fmd_case_clrdirty(fmd_case_t *cp)
20170Sstevel@tonic-gate {
20180Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20190Sstevel@tonic-gate 
20200Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
20210Sstevel@tonic-gate 	cip->ci_flags &= ~FMD_CF_DIRTY;
20220Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
20230Sstevel@tonic-gate }
20240Sstevel@tonic-gate 
20250Sstevel@tonic-gate void
fmd_case_commit(fmd_case_t * cp)20260Sstevel@tonic-gate fmd_case_commit(fmd_case_t *cp)
20270Sstevel@tonic-gate {
20280Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20290Sstevel@tonic-gate 	fmd_case_item_t *cit;
20300Sstevel@tonic-gate 
20310Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
20320Sstevel@tonic-gate 
20330Sstevel@tonic-gate 	if (cip->ci_flags & FMD_CF_DIRTY) {
20340Sstevel@tonic-gate 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
20350Sstevel@tonic-gate 			fmd_event_commit(cit->cit_event);
20360Sstevel@tonic-gate 
20370Sstevel@tonic-gate 		if (cip->ci_principal != NULL)
20380Sstevel@tonic-gate 			fmd_event_commit(cip->ci_principal);
20390Sstevel@tonic-gate 
20400Sstevel@tonic-gate 		fmd_buf_hash_commit(&cip->ci_bufs);
20410Sstevel@tonic-gate 		cip->ci_flags &= ~FMD_CF_DIRTY;
20420Sstevel@tonic-gate 	}
20430Sstevel@tonic-gate 
20440Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
20450Sstevel@tonic-gate }
20460Sstevel@tonic-gate 
20470Sstevel@tonic-gate /*
20489120SStephen.Hanson@Sun.COM  * On proxy side, send back repair/acquit/etc request to diagnosing side
20499120SStephen.Hanson@Sun.COM  */
20509120SStephen.Hanson@Sun.COM void
fmd_case_xprt_updated(fmd_case_t * cp)20519120SStephen.Hanson@Sun.COM fmd_case_xprt_updated(fmd_case_t *cp)
20529120SStephen.Hanson@Sun.COM {
20539120SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20549120SStephen.Hanson@Sun.COM 	nvlist_t **nva;
20559120SStephen.Hanson@Sun.COM 	uint8_t *ba;
20569120SStephen.Hanson@Sun.COM 	int msg = B_TRUE;
20579120SStephen.Hanson@Sun.COM 	int count = 0;
20589120SStephen.Hanson@Sun.COM 	fmd_case_lst_t fcl;
20599120SStephen.Hanson@Sun.COM 
20609120SStephen.Hanson@Sun.COM 	ASSERT(cip->ci_xprt != NULL);
20619120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_lock(&cip->ci_lock);
20629120SStephen.Hanson@Sun.COM 	ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
20639120SStephen.Hanson@Sun.COM 	nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
20649120SStephen.Hanson@Sun.COM 	fcl.fcl_countp = &count;
20659120SStephen.Hanson@Sun.COM 	fcl.fcl_maxcount = cip->ci_nsuspects;
20669120SStephen.Hanson@Sun.COM 	fcl.fcl_msgp = &msg;
20679120SStephen.Hanson@Sun.COM 	fcl.fcl_ba = ba;
20689120SStephen.Hanson@Sun.COM 	fcl.fcl_nva = nva;
20699120SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
20709120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_unlock(&cip->ci_lock);
20719120SStephen.Hanson@Sun.COM 	fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru,
20729120SStephen.Hanson@Sun.COM 	    count);
20739120SStephen.Hanson@Sun.COM }
20749120SStephen.Hanson@Sun.COM 
20759120SStephen.Hanson@Sun.COM /*
20769120SStephen.Hanson@Sun.COM  * fmd_case_update_status() can be called on either the proxy side when a
20779120SStephen.Hanson@Sun.COM  * list.suspect is received, or on the diagnosing side when an update request
20789120SStephen.Hanson@Sun.COM  * is received from the proxy. It updates the status in the resource cache.
20799120SStephen.Hanson@Sun.COM  */
20809120SStephen.Hanson@Sun.COM void
fmd_case_update_status(fmd_case_t * cp,uint8_t * statusp,uint8_t * proxy_asrup,uint8_t * diag_asrup)20819120SStephen.Hanson@Sun.COM fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup,
20829120SStephen.Hanson@Sun.COM     uint8_t *diag_asrup)
20839120SStephen.Hanson@Sun.COM {
20849120SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20859120SStephen.Hanson@Sun.COM 	int count = 0;
20869120SStephen.Hanson@Sun.COM 	fmd_asru_update_status_t faus;
20879120SStephen.Hanson@Sun.COM 
20889120SStephen.Hanson@Sun.COM 	/*
20899120SStephen.Hanson@Sun.COM 	 * update status of resource cache entries
20909120SStephen.Hanson@Sun.COM 	 */
20919120SStephen.Hanson@Sun.COM 	faus.faus_countp = &count;
20929120SStephen.Hanson@Sun.COM 	faus.faus_maxcount = cip->ci_nsuspects;
20939120SStephen.Hanson@Sun.COM 	faus.faus_ba = statusp;
20949120SStephen.Hanson@Sun.COM 	faus.faus_proxy_asru = proxy_asrup;
20959120SStephen.Hanson@Sun.COM 	faus.faus_diag_asru = diag_asrup;
20969120SStephen.Hanson@Sun.COM 	faus.faus_is_proxy = (cip->ci_xprt != NULL);
20979120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_lock(&cip->ci_lock);
20989120SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status,
20999120SStephen.Hanson@Sun.COM 	    &faus);
21009120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_unlock(&cip->ci_lock);
21019120SStephen.Hanson@Sun.COM }
21029120SStephen.Hanson@Sun.COM 
21039120SStephen.Hanson@Sun.COM /*
21049120SStephen.Hanson@Sun.COM  * Called on either the proxy side or the diag side when a repair has taken
21059120SStephen.Hanson@Sun.COM  * place on the other side but this side may know the asru "contains"
21069120SStephen.Hanson@Sun.COM  * relationships.
21079120SStephen.Hanson@Sun.COM  */
21089120SStephen.Hanson@Sun.COM void
fmd_case_update_containees(fmd_case_t * cp)21099120SStephen.Hanson@Sun.COM fmd_case_update_containees(fmd_case_t *cp)
21109120SStephen.Hanson@Sun.COM {
21119120SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21129120SStephen.Hanson@Sun.COM 
21139120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_lock(&cip->ci_lock);
21149120SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
21159120SStephen.Hanson@Sun.COM 	    fmd_asru_update_containees, NULL);
21169120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_unlock(&cip->ci_lock);
21179120SStephen.Hanson@Sun.COM }
21189120SStephen.Hanson@Sun.COM 
21199120SStephen.Hanson@Sun.COM /*
21209120SStephen.Hanson@Sun.COM  * fmd_case_close_status() is called on diagnosing side when proxy side
21219120SStephen.Hanson@Sun.COM  * has had a uuclose. It updates the status in the resource cache.
21229120SStephen.Hanson@Sun.COM  */
21239120SStephen.Hanson@Sun.COM void
fmd_case_close_status(fmd_case_t * cp)21249120SStephen.Hanson@Sun.COM fmd_case_close_status(fmd_case_t *cp)
21259120SStephen.Hanson@Sun.COM {
21269120SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21279120SStephen.Hanson@Sun.COM 	int count = 0;
21289120SStephen.Hanson@Sun.COM 	fmd_asru_close_status_t facs;
21299120SStephen.Hanson@Sun.COM 
21309120SStephen.Hanson@Sun.COM 	/*
21319120SStephen.Hanson@Sun.COM 	 * update status of resource cache entries
21329120SStephen.Hanson@Sun.COM 	 */
21339120SStephen.Hanson@Sun.COM 	facs.facs_countp = &count;
21349120SStephen.Hanson@Sun.COM 	facs.facs_maxcount = cip->ci_nsuspects;
21359120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_lock(&cip->ci_lock);
21369120SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status,
21379120SStephen.Hanson@Sun.COM 	    &facs);
21389120SStephen.Hanson@Sun.COM 	(void) pthread_mutex_unlock(&cip->ci_lock);
21399120SStephen.Hanson@Sun.COM }
21409120SStephen.Hanson@Sun.COM 
21419120SStephen.Hanson@Sun.COM /*
21420Sstevel@tonic-gate  * Indicate that the case may need to change state because one or more of the
21430Sstevel@tonic-gate  * ASRUs named as a suspect has changed state.  We examine all the suspects
21440Sstevel@tonic-gate  * and if none are still faulty, we initiate a case close transition.
21450Sstevel@tonic-gate  */
21460Sstevel@tonic-gate void
fmd_case_update(fmd_case_t * cp)21470Sstevel@tonic-gate fmd_case_update(fmd_case_t *cp)
21480Sstevel@tonic-gate {
21490Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21501193Smws 	uint_t cstate;
21516228Sstephh 	int faulty = 0;
21520Sstevel@tonic-gate 
21530Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
21541193Smws 	cstate = cip->ci_state;
21550Sstevel@tonic-gate 
21569120SStephen.Hanson@Sun.COM 	if (cip->ci_state < FMD_CASE_SOLVED) {
21570Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
21581193Smws 		return; /* update is not appropriate */
21590Sstevel@tonic-gate 	}
21600Sstevel@tonic-gate 
21616228Sstephh 	if (cip->ci_flags & FMD_CF_REPAIRED) {
21626228Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
21636228Sstephh 		return; /* already repaired */
21640Sstevel@tonic-gate 	}
21650Sstevel@tonic-gate 
21669120SStephen.Hanson@Sun.COM 	TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid));
21676228Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
21680Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
21690Sstevel@tonic-gate 
21707275Sstephh 	if (faulty) {
21717275Sstephh 		nvlist_t *nvl;
21727275Sstephh 		fmd_event_t *e;
21737275Sstephh 		char *class;
21747275Sstephh 
21759120SStephen.Hanson@Sun.COM 		TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid));
21767275Sstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
21777275Sstephh 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
21787275Sstephh 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
21797275Sstephh 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
21807275Sstephh 		fmd_log_append(fmd.d_fltlog, e, cp);
21817275Sstephh 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
21827275Sstephh 		fmd_dispq_dispatch(fmd.d_disp, e, class);
21831193Smws 		return; /* one or more suspects are still marked faulty */
21847275Sstephh 	}
21851193Smws 
21861193Smws 	if (cstate == FMD_CASE_CLOSED)
21871193Smws 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
21881193Smws 	else
21891193Smws 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
21901193Smws }
21911193Smws 
21921193Smws /*
21931193Smws  * Delete a closed case from the module's case list once the fmdo_close() entry
21941193Smws  * point has run to completion.  If the case is owned by a transport module,
21951193Smws  * tell the transport to proxy a case close on the other end of the transport.
21969120SStephen.Hanson@Sun.COM  * Transition to the appropriate next state based on ci_flags.  This
21971193Smws  * function represents the end of CLOSE_WAIT and transitions the case to either
21981193Smws  * CLOSED or REPAIRED or discards it entirely because it was never solved;
21991193Smws  * refer to the topmost block comment explaining the state machine for details.
22001193Smws  */
22011193Smws void
fmd_case_delete(fmd_case_t * cp)22021193Smws fmd_case_delete(fmd_case_t *cp)
22031193Smws {
22041193Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
22051429Smws 	fmd_modstat_t *msp;
22061429Smws 	size_t buftotal;
22071193Smws 
22089120SStephen.Hanson@Sun.COM 	TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid));
22091193Smws 	ASSERT(fmd_module_locked(cip->ci_mod));
22101193Smws 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
22111429Smws 	buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
22121429Smws 
22131429Smws 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
22141429Smws 	msp = cip->ci_mod->mod_stats;
22151429Smws 
22161429Smws 	ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0);
22171429Smws 	msp->ms_caseopen.fmds_value.ui64--;
22181429Smws 
22191429Smws 	ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal);
22201429Smws 	msp->ms_buftotal.fmds_value.ui64 -= buftotal;
22211429Smws 
22221429Smws 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
22231193Smws 
22241193Smws 	if (cip->ci_xprt == NULL)
22251193Smws 		fmd_module_setcdirty(cip->ci_mod);
22261193Smws 
22271193Smws 	fmd_module_rele(cip->ci_mod);
22281193Smws 	cip->ci_mod = fmd.d_rmod;
22291193Smws 	fmd_module_hold(cip->ci_mod);
22301193Smws 
22311193Smws 	/*
22329120SStephen.Hanson@Sun.COM 	 * If the case has been solved, then retain it
22331552Smws 	 * on the root module's case list at least until we're transitioned.
22341552Smws 	 * Otherwise free the case with our final fmd_case_rele() below.
22351552Smws 	 */
22369120SStephen.Hanson@Sun.COM 	if (cip->ci_flags & FMD_CF_SOLVED) {
22371552Smws 		fmd_module_lock(cip->ci_mod);
22381552Smws 		fmd_list_append(&cip->ci_mod->mod_cases, cip);
22391552Smws 		fmd_module_unlock(cip->ci_mod);
22401552Smws 		fmd_case_hold(cp);
22411552Smws 	}
22421552Smws 
22431552Smws 	/*
22449120SStephen.Hanson@Sun.COM 	 * Transition onwards to REPAIRED or CLOSED as originally requested.
22459120SStephen.Hanson@Sun.COM 	 * Note that for proxy case if we're transitioning to CLOSED it means
22469120SStephen.Hanson@Sun.COM 	 * the case was isolated locally, so call fmd_xprt_uuclose() to notify
22479120SStephen.Hanson@Sun.COM 	 * the diagnosing side. No need to notify the diagnosing side if we are
22489120SStephen.Hanson@Sun.COM 	 * transitioning to REPAIRED as we only do this when requested to do
22499120SStephen.Hanson@Sun.COM 	 * so by the diagnosing side anyway.
22501193Smws 	 */
22519120SStephen.Hanson@Sun.COM 	if (cip->ci_flags & FMD_CF_REPAIRED)
22521193Smws 		fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
22539120SStephen.Hanson@Sun.COM 	else if (cip->ci_flags & FMD_CF_ISOLATED) {
22541193Smws 		fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
22559120SStephen.Hanson@Sun.COM 		if (cip->ci_xprt != NULL)
22569120SStephen.Hanson@Sun.COM 			fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
22579120SStephen.Hanson@Sun.COM 	}
22581193Smws 
22591193Smws 	fmd_case_rele(cp);
22601193Smws }
22611193Smws 
22621193Smws void
fmd_case_discard(fmd_case_t * cp,boolean_t delete_from_asru_cache)22639120SStephen.Hanson@Sun.COM fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache)
22641193Smws {
22651193Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
22661193Smws 
22671193Smws 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
22681193Smws 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
22691193Smws 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
22701193Smws 
22711193Smws 	ASSERT(fmd_module_locked(cip->ci_mod));
22721193Smws 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
22739120SStephen.Hanson@Sun.COM 	if (delete_from_asru_cache) {
22749120SStephen.Hanson@Sun.COM 		(void) pthread_mutex_lock(&cip->ci_lock);
22759120SStephen.Hanson@Sun.COM 		fmd_asru_hash_delete_case(fmd.d_asrus, cp);
22769120SStephen.Hanson@Sun.COM 		(void) pthread_mutex_unlock(&cip->ci_lock);
22779120SStephen.Hanson@Sun.COM 	}
22781193Smws 	fmd_case_rele(cp);
22790Sstevel@tonic-gate }
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate /*
22820Sstevel@tonic-gate  * Indicate that the problem corresponding to a case has been repaired by
22831193Smws  * clearing the faulty bit on each ASRU named as a suspect.  If the case hasn't
22841193Smws  * already been closed, this function initiates the transition to CLOSE_WAIT.
22851193Smws  * The caller must have the case held from fmd_case_hash_lookup(), so we can
22861193Smws  * grab and drop ci_lock without the case being able to be freed in between.
22870Sstevel@tonic-gate  */
22880Sstevel@tonic-gate int
fmd_case_repair(fmd_case_t * cp)22890Sstevel@tonic-gate fmd_case_repair(fmd_case_t *cp)
22900Sstevel@tonic-gate {
22910Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
22921193Smws 	uint_t cstate;
22939120SStephen.Hanson@Sun.COM 	fmd_asru_rep_arg_t fara;
22941193Smws 
22950Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
22961193Smws 	cstate = cip->ci_state;
22970Sstevel@tonic-gate 
22986228Sstephh 	if (cstate < FMD_CASE_SOLVED) {
22990Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
23000Sstevel@tonic-gate 		return (fmd_set_errno(EFMD_CASE_STATE));
23010Sstevel@tonic-gate 	}
23020Sstevel@tonic-gate 
23036228Sstephh 	if (cip->ci_flags & FMD_CF_REPAIRED) {
23046228Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
23056228Sstephh 		return (0); /* already repaired */
23060Sstevel@tonic-gate 	}
23070Sstevel@tonic-gate 
23089120SStephen.Hanson@Sun.COM 	TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid));
23099120SStephen.Hanson@Sun.COM 	fara.fara_reason = FMD_ASRU_REPAIRED;
23109120SStephen.Hanson@Sun.COM 	fara.fara_bywhat = FARA_BY_CASE;
23119120SStephen.Hanson@Sun.COM 	fara.fara_rval = NULL;
23129120SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
23137275Sstephh 	(void) pthread_mutex_unlock(&cip->ci_lock);
23147275Sstephh 
23159120SStephen.Hanson@Sun.COM 	/*
23169120SStephen.Hanson@Sun.COM 	 * if this is a proxied case, send the repair across the transport.
23179120SStephen.Hanson@Sun.COM 	 * The remote side will then do the repair and send a list.repaired back
23189120SStephen.Hanson@Sun.COM 	 * again such that we can finally repair the case on this side.
23199120SStephen.Hanson@Sun.COM 	 */
23209120SStephen.Hanson@Sun.COM 	if (cip->ci_xprt != NULL) {
23219120SStephen.Hanson@Sun.COM 		fmd_case_xprt_updated(cp);
23229120SStephen.Hanson@Sun.COM 		return (0);
23239120SStephen.Hanson@Sun.COM 	}
23249120SStephen.Hanson@Sun.COM 
23257275Sstephh 	if (cstate == FMD_CASE_CLOSED)
23267275Sstephh 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
23277275Sstephh 	else
23287275Sstephh 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
23297275Sstephh 
23307275Sstephh 	return (0);
23317275Sstephh }
23327275Sstephh 
23337275Sstephh int
fmd_case_acquit(fmd_case_t * cp)23347275Sstephh fmd_case_acquit(fmd_case_t *cp)
23357275Sstephh {
23367275Sstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
23377275Sstephh 	uint_t cstate;
23389120SStephen.Hanson@Sun.COM 	fmd_asru_rep_arg_t fara;
23397275Sstephh 
23407275Sstephh 	(void) pthread_mutex_lock(&cip->ci_lock);
23417275Sstephh 	cstate = cip->ci_state;
23427275Sstephh 
23437275Sstephh 	if (cstate < FMD_CASE_SOLVED) {
23447275Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
23457275Sstephh 		return (fmd_set_errno(EFMD_CASE_STATE));
23467275Sstephh 	}
23477275Sstephh 
23487275Sstephh 	if (cip->ci_flags & FMD_CF_REPAIRED) {
23497275Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
23507275Sstephh 		return (0); /* already repaired */
23517275Sstephh 	}
23527275Sstephh 
23539120SStephen.Hanson@Sun.COM 	TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid));
23549120SStephen.Hanson@Sun.COM 	fara.fara_reason = FMD_ASRU_ACQUITTED;
23559120SStephen.Hanson@Sun.COM 	fara.fara_bywhat = FARA_BY_CASE;
23569120SStephen.Hanson@Sun.COM 	fara.fara_rval = NULL;
23579120SStephen.Hanson@Sun.COM 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
23581552Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
23591552Smws 
23609120SStephen.Hanson@Sun.COM 	/*
23619120SStephen.Hanson@Sun.COM 	 * if this is a proxied case, send the repair across the transport.
23629120SStephen.Hanson@Sun.COM 	 * The remote side will then do the repair and send a list.repaired back
23639120SStephen.Hanson@Sun.COM 	 * again such that we can finally repair the case on this side.
23649120SStephen.Hanson@Sun.COM 	 */
23659120SStephen.Hanson@Sun.COM 	if (cip->ci_xprt != NULL) {
23669120SStephen.Hanson@Sun.COM 		fmd_case_xprt_updated(cp);
23679120SStephen.Hanson@Sun.COM 		return (0);
23689120SStephen.Hanson@Sun.COM 	}
23699120SStephen.Hanson@Sun.COM 
23701193Smws 	if (cstate == FMD_CASE_CLOSED)
23711193Smws 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
23721193Smws 	else
23731193Smws 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
23741193Smws 
23750Sstevel@tonic-gate 	return (0);
23760Sstevel@tonic-gate }
23770Sstevel@tonic-gate 
23780Sstevel@tonic-gate int
fmd_case_contains(fmd_case_t * cp,fmd_event_t * ep)23790Sstevel@tonic-gate fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
23800Sstevel@tonic-gate {
23810Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
23820Sstevel@tonic-gate 	fmd_case_item_t *cit;
23830Sstevel@tonic-gate 	uint_t state;
23840Sstevel@tonic-gate 	int rv = 0;
23850Sstevel@tonic-gate 
23860Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
23870Sstevel@tonic-gate 
23880Sstevel@tonic-gate 	if (cip->ci_state >= FMD_CASE_SOLVED)
23890Sstevel@tonic-gate 		state = FMD_EVS_DIAGNOSED;
23900Sstevel@tonic-gate 	else
23910Sstevel@tonic-gate 		state = FMD_EVS_ACCEPTED;
23920Sstevel@tonic-gate 
23930Sstevel@tonic-gate 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
23940Sstevel@tonic-gate 		if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
23950Sstevel@tonic-gate 			break;
23960Sstevel@tonic-gate 	}
23970Sstevel@tonic-gate 
23980Sstevel@tonic-gate 	if (rv == 0 && cip->ci_principal != NULL)
23990Sstevel@tonic-gate 		rv = fmd_event_equal(ep, cip->ci_principal);
24000Sstevel@tonic-gate 
24010Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
24020Sstevel@tonic-gate 
24030Sstevel@tonic-gate 	if (rv != 0)
24040Sstevel@tonic-gate 		fmd_event_transition(ep, state);
24050Sstevel@tonic-gate 
24060Sstevel@tonic-gate 	return (rv);
24070Sstevel@tonic-gate }
24081193Smws 
24091193Smws int
fmd_case_orphaned(fmd_case_t * cp)24101193Smws fmd_case_orphaned(fmd_case_t *cp)
24111193Smws {
24121193Smws 	return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
24131193Smws }
24145255Sstephh 
24155255Sstephh void
fmd_case_settime(fmd_case_t * cp,time_t tv_sec,suseconds_t tv_usec)24165255Sstephh fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
24175255Sstephh {
24185255Sstephh 	((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec;
24195255Sstephh 	((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
24205255Sstephh 	((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
24215255Sstephh }
24227275Sstephh 
24239120SStephen.Hanson@Sun.COM void
fmd_case_set_injected(fmd_case_t * cp)242410928SStephen.Hanson@Sun.COM fmd_case_set_injected(fmd_case_t *cp)
242510928SStephen.Hanson@Sun.COM {
242610928SStephen.Hanson@Sun.COM 	((fmd_case_impl_t *)cp)->ci_injected = 1;
242710928SStephen.Hanson@Sun.COM }
242810928SStephen.Hanson@Sun.COM 
242910928SStephen.Hanson@Sun.COM void
fmd_case_set_de_fmri(fmd_case_t * cp,nvlist_t * nvl)24309120SStephen.Hanson@Sun.COM fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl)
24319120SStephen.Hanson@Sun.COM {
24329120SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
24339120SStephen.Hanson@Sun.COM 
24349120SStephen.Hanson@Sun.COM 	if (cip->ci_diag_de)
24359120SStephen.Hanson@Sun.COM 		nvlist_free(cip->ci_diag_de);
24369120SStephen.Hanson@Sun.COM 	cip->ci_diag_de = nvl;
24379120SStephen.Hanson@Sun.COM }
24389120SStephen.Hanson@Sun.COM 
24399120SStephen.Hanson@Sun.COM void
fmd_case_setcode(fmd_case_t * cp,char * code)24409120SStephen.Hanson@Sun.COM fmd_case_setcode(fmd_case_t *cp, char *code)
24419120SStephen.Hanson@Sun.COM {
24429120SStephen.Hanson@Sun.COM 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
24439120SStephen.Hanson@Sun.COM 
24449120SStephen.Hanson@Sun.COM 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
24459120SStephen.Hanson@Sun.COM 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
24469120SStephen.Hanson@Sun.COM }
24479120SStephen.Hanson@Sun.COM 
24487275Sstephh /*ARGSUSED*/
244910656SStephen.Hanson@Sun.COM static void
fmd_case_repair_replay_case(fmd_case_t * cp,void * arg)24507275Sstephh fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
24517275Sstephh {
24527275Sstephh 	int not_faulty = 0;
24537275Sstephh 	int faulty = 0;
24547275Sstephh 	nvlist_t *nvl;
24557275Sstephh 	fmd_event_t *e;
24567275Sstephh 	char *class;
24577275Sstephh 	int any_unusable_and_present = 0;
24587275Sstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
24597275Sstephh 
24609120SStephen.Hanson@Sun.COM 	if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
24617275Sstephh 		return;
24627275Sstephh 
246310656SStephen.Hanson@Sun.COM 	if (cip->ci_state == FMD_CASE_RESOLVED) {
246410656SStephen.Hanson@Sun.COM 		cip->ci_flags |= FMD_CF_RES_CMPL;
246510656SStephen.Hanson@Sun.COM 		return;
246610656SStephen.Hanson@Sun.COM 	}
246710656SStephen.Hanson@Sun.COM 
24687275Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
24697275Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
24707275Sstephh 	    &not_faulty);
24717275Sstephh 
24727913SStephen.Hanson@Sun.COM 	if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) {
24737275Sstephh 		/*
24747275Sstephh 		 * If none of the suspects is faulty, replay the list.repaired.
24757275Sstephh 		 * If all suspects are already either usable or not present then
24767275Sstephh 		 * also transition straight to RESOLVED state.
24777275Sstephh 		 */
24787275Sstephh 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
24797275Sstephh 		    fmd_case_unusable_and_present, &any_unusable_and_present);
24807275Sstephh 		if (!any_unusable_and_present) {
24817275Sstephh 			cip->ci_state = FMD_CASE_RESOLVED;
24827275Sstephh 
24839120SStephen.Hanson@Sun.COM 			TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
24849120SStephen.Hanson@Sun.COM 			    cip->ci_uuid));
24857275Sstephh 			nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
24867275Sstephh 			(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
24877275Sstephh 			e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
24887275Sstephh 			    class);
24897275Sstephh 			fmd_dispq_dispatch(fmd.d_disp, e, class);
24907275Sstephh 
24919120SStephen.Hanson@Sun.COM 			TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
24929120SStephen.Hanson@Sun.COM 			    cip->ci_uuid));
24937275Sstephh 			fmd_case_publish(cp, FMD_CASE_RESOLVED);
249410928SStephen.Hanson@Sun.COM 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
249510928SStephen.Hanson@Sun.COM 			    fmd_asru_log_resolved, NULL);
249610656SStephen.Hanson@Sun.COM 			cip->ci_flags |= FMD_CF_RES_CMPL;
24977275Sstephh 		} else {
24989120SStephen.Hanson@Sun.COM 			TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
24999120SStephen.Hanson@Sun.COM 			    cip->ci_uuid));
25007275Sstephh 			nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
25017275Sstephh 			(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
25027275Sstephh 			e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
25037275Sstephh 			    class);
25047275Sstephh 			fmd_dispq_dispatch(fmd.d_disp, e, class);
25057275Sstephh 		}
25067913SStephen.Hanson@Sun.COM 	} else if (faulty && not_faulty) {
25077275Sstephh 		/*
25087275Sstephh 		 * if some but not all of the suspects are not faulty, replay
25097275Sstephh 		 * the list.updated.
25107275Sstephh 		 */
25119120SStephen.Hanson@Sun.COM 		TRACE((FMD_DBG_CASE, "replay sending list.updated %s",
25129120SStephen.Hanson@Sun.COM 		    cip->ci_uuid));
25137275Sstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
25147275Sstephh 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
25157275Sstephh 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
25167275Sstephh 		fmd_dispq_dispatch(fmd.d_disp, e, class);
25177275Sstephh 	}
25187275Sstephh }
25197275Sstephh 
25207275Sstephh void
fmd_case_repair_replay()25217275Sstephh fmd_case_repair_replay()
25227275Sstephh {
25237275Sstephh 	fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
25247275Sstephh }
2525