10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51303Swesolows * Common Development and Distribution License (the "License").
61303Swesolows * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211193Smws
220Sstevel@tonic-gate /*
23*12967Sgavin.maltby@oracle.com * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
261193Smws /*
271193Smws * FMD Case Subsystem
281193Smws *
291193Smws * Diagnosis engines are expected to group telemetry events related to the
301193Smws * diagnosis of a particular problem on the system into a set of cases. The
311193Smws * diagnosis engine may have any number of cases open at a given point in time.
321193Smws * Some cases may eventually be *solved* by associating a suspect list of one
331193Smws * or more problems with the case, at which point fmd publishes a list.suspect
341193Smws * event for the case and it becomes visible to administrators and agents.
351193Smws *
361193Smws * Every case is named using a UUID, and is globally visible in the case hash.
371193Smws * Cases are reference-counted, except for the reference from the case hash
381193Smws * itself. Consumers of case references include modules, which store active
391193Smws * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
401193Smws *
411193Smws * Cases obey the following state machine. In states UNSOLVED, SOLVED, and
421193Smws * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
431193Smws * or transport) and the case is referenced by the mod_cases list. Once the
441193Smws * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
451193Smws * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
461193Smws *
471193Smws * +------------+
481193Smws * +----------| UNSOLVED |
491193Smws * | +------------+
507275Sstephh * | 1 |
517275Sstephh * | |
527275Sstephh * | +-------v----+
537275Sstephh * 2 | | SOLVED |
547275Sstephh * | +------------+
557275Sstephh * | 3 | 5 |
567275Sstephh * +------------+ | |
577275Sstephh * | | |
587275Sstephh * +-v---v----v-+
597275Sstephh * | CLOSE_WAIT |
601193Smws * +------------+
617275Sstephh * | | |
627275Sstephh * +-----------+ | +------------+
637275Sstephh * | 4 | |
647275Sstephh * v +-----v------+ |
657275Sstephh * discard | CLOSED | 6 |
667275Sstephh * +------------+ |
677275Sstephh * | |
687275Sstephh * | +------------+
697275Sstephh * 7 | |
707275Sstephh * +-----v----v-+
717275Sstephh * | REPAIRED |
727275Sstephh * +------------+
737275Sstephh * |
747275Sstephh * 8 |
757275Sstephh * +-----v------+
767275Sstephh * | RESOLVED |
777275Sstephh * +------------+
787275Sstephh * |
797275Sstephh * v
807275Sstephh * discard
811193Smws *
821193Smws * The state machine changes are triggered by calls to fmd_case_transition()
831193Smws * from various locations inside of fmd, as described below:
841193Smws *
851193Smws * [1] Called by: fmd_case_solve()
861193Smws * Actions: FMD_CF_SOLVED flag is set in ci_flags
871193Smws * conviction policy is applied to suspect list
881193Smws * suspects convicted are marked faulty (F) in R$
891193Smws * list.suspect event logged and dispatched
901193Smws *
917275Sstephh * [2] Called by: fmd_case_close(), fmd_case_uuclose()
927275Sstephh * Actions: diagnosis engine fmdo_close() entry point scheduled
937275Sstephh * case discarded upon exit from CLOSE_WAIT
947275Sstephh *
957275Sstephh * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
961193Smws * Actions: FMD_CF_ISOLATED flag is set in ci_flags
971193Smws * suspects convicted (F) are marked unusable (U) in R$
981193Smws * diagnosis engine fmdo_close() entry point scheduled
997275Sstephh * case transitions to CLOSED [4] upon exit from CLOSE_WAIT
1001193Smws *
1017275Sstephh * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
1021193Smws * Actions: list.isolated event dispatched
1031193Smws * case deleted from module's list of open cases
1041193Smws *
1051193Smws * [5] Called by: fmd_case_repair(), fmd_case_update()
1061193Smws * Actions: FMD_CF_REPAIR flag is set in ci_flags
1071193Smws * diagnosis engine fmdo_close() entry point scheduled
1081193Smws * case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
1091193Smws *
1107275Sstephh * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
1117275Sstephh * Actions: suspects convicted are marked non faulty (!F) in R$
1127275Sstephh * list.repaired or list.updated event dispatched
1131193Smws *
1141193Smws * [7] Called by: fmd_case_repair(), fmd_case_update()
1151193Smws * Actions: FMD_CF_REPAIR flag is set in ci_flags
1161193Smws * suspects convicted are marked non faulty (!F) in R$
1177275Sstephh * list.repaired or list.updated event dispatched
1187275Sstephh *
1197275Sstephh * [8] Called by: fmd_case_uuresolve()
1207275Sstephh * Actions: list.resolved event dispatched
1217275Sstephh * case is discarded
1221193Smws */
1231193Smws
1240Sstevel@tonic-gate #include <sys/fm/protocol.h>
1250Sstevel@tonic-gate #include <uuid/uuid.h>
1260Sstevel@tonic-gate #include <alloca.h>
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate #include <fmd_alloc.h>
1290Sstevel@tonic-gate #include <fmd_module.h>
1300Sstevel@tonic-gate #include <fmd_error.h>
1310Sstevel@tonic-gate #include <fmd_conf.h>
1320Sstevel@tonic-gate #include <fmd_case.h>
1330Sstevel@tonic-gate #include <fmd_string.h>
1340Sstevel@tonic-gate #include <fmd_subr.h>
1350Sstevel@tonic-gate #include <fmd_protocol.h>
1360Sstevel@tonic-gate #include <fmd_event.h>
1370Sstevel@tonic-gate #include <fmd_eventq.h>
1380Sstevel@tonic-gate #include <fmd_dispq.h>
1390Sstevel@tonic-gate #include <fmd_buf.h>
1400Sstevel@tonic-gate #include <fmd_log.h>
1410Sstevel@tonic-gate #include <fmd_asru.h>
1421429Smws #include <fmd_fmri.h>
1431193Smws #include <fmd_xprt.h>
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate #include <fmd.h>
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate static const char *const _fmd_case_snames[] = {
1480Sstevel@tonic-gate "UNSOLVED", /* FMD_CASE_UNSOLVED */
1490Sstevel@tonic-gate "SOLVED", /* FMD_CASE_SOLVED */
1501193Smws "CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */
1510Sstevel@tonic-gate "CLOSED", /* FMD_CASE_CLOSED */
1527275Sstephh "REPAIRED", /* FMD_CASE_REPAIRED */
1537275Sstephh "RESOLVED" /* FMD_CASE_RESOLVED */
1540Sstevel@tonic-gate };
1550Sstevel@tonic-gate
1566081Scy152378 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
1576081Scy152378
1580Sstevel@tonic-gate fmd_case_hash_t *
fmd_case_hash_create(void)1590Sstevel@tonic-gate fmd_case_hash_create(void)
1600Sstevel@tonic-gate {
1610Sstevel@tonic-gate fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate (void) pthread_rwlock_init(&chp->ch_lock, NULL);
1640Sstevel@tonic-gate chp->ch_hashlen = fmd.d_str_buckets;
1650Sstevel@tonic-gate chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
1666228Sstephh chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen,
1676228Sstephh FMD_SLEEP);
1681193Smws chp->ch_count = 0;
1690Sstevel@tonic-gate
1700Sstevel@tonic-gate return (chp);
1710Sstevel@tonic-gate }
1720Sstevel@tonic-gate
1730Sstevel@tonic-gate /*
1740Sstevel@tonic-gate * Destroy the case hash. Unlike most of our hash tables, no active references
1751193Smws * are kept by the case hash itself; all references come from other subsystems.
1760Sstevel@tonic-gate * The hash must be destroyed after all modules are unloaded; if anything was
1770Sstevel@tonic-gate * present in the hash it would be by definition a reference count leak.
1780Sstevel@tonic-gate */
1790Sstevel@tonic-gate void
fmd_case_hash_destroy(fmd_case_hash_t * chp)1800Sstevel@tonic-gate fmd_case_hash_destroy(fmd_case_hash_t *chp)
1810Sstevel@tonic-gate {
1820Sstevel@tonic-gate fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
1836228Sstephh fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen);
1840Sstevel@tonic-gate fmd_free(chp, sizeof (fmd_case_hash_t));
1850Sstevel@tonic-gate }
1860Sstevel@tonic-gate
1871193Smws /*
1881193Smws * Take a snapshot of the case hash by placing an additional hold on each
1891193Smws * member in an auxiliary array, and then call 'func' for each case.
1901193Smws */
1911193Smws void
fmd_case_hash_apply(fmd_case_hash_t * chp,void (* func)(fmd_case_t *,void *),void * arg)1921193Smws fmd_case_hash_apply(fmd_case_hash_t *chp,
1931193Smws void (*func)(fmd_case_t *, void *), void *arg)
1941193Smws {
1951193Smws fmd_case_impl_t *cp, **cps, **cpp;
1961193Smws uint_t cpc, i;
1971193Smws
1981193Smws (void) pthread_rwlock_rdlock(&chp->ch_lock);
1991193Smws
2001193Smws cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
2011193Smws cpc = chp->ch_count;
2021193Smws
2031193Smws for (i = 0; i < chp->ch_hashlen; i++) {
2047440SSean.Ye@Sun.COM for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next)
2057440SSean.Ye@Sun.COM *cpp++ = fmd_case_tryhold(cp);
2061193Smws }
2071193Smws
2081193Smws ASSERT(cpp == cps + cpc);
2091193Smws (void) pthread_rwlock_unlock(&chp->ch_lock);
2101193Smws
2111193Smws for (i = 0; i < cpc; i++) {
2127440SSean.Ye@Sun.COM if (cps[i] != NULL) {
2137440SSean.Ye@Sun.COM func((fmd_case_t *)cps[i], arg);
2147440SSean.Ye@Sun.COM fmd_case_rele((fmd_case_t *)cps[i]);
2157440SSean.Ye@Sun.COM }
2161193Smws }
2171193Smws
2181193Smws fmd_free(cps, cpc * sizeof (fmd_case_t *));
2191193Smws }
2201193Smws
2216228Sstephh static void
fmd_case_code_hash_insert(fmd_case_hash_t * chp,fmd_case_impl_t * cip)2226228Sstephh fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
2236228Sstephh {
2246228Sstephh uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
2256228Sstephh
2266228Sstephh cip->ci_code_next = chp->ch_code_hash[h];
2276228Sstephh chp->ch_code_hash[h] = cip;
2286228Sstephh }
2296228Sstephh
2306228Sstephh static void
fmd_case_code_hash_delete(fmd_case_hash_t * chp,fmd_case_impl_t * cip)2316228Sstephh fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
2326228Sstephh {
2336228Sstephh fmd_case_impl_t **pp, *cp;
2346228Sstephh
2356228Sstephh if (cip->ci_code) {
2366228Sstephh uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
2376228Sstephh
2386228Sstephh pp = &chp->ch_code_hash[h];
2396228Sstephh for (cp = *pp; cp != NULL; cp = cp->ci_code_next) {
2406228Sstephh if (cp != cip)
2416228Sstephh pp = &cp->ci_code_next;
2426228Sstephh else
2436228Sstephh break;
2446228Sstephh }
2456228Sstephh if (cp != NULL) {
2466228Sstephh *pp = cp->ci_code_next;
2476228Sstephh cp->ci_code_next = NULL;
2486228Sstephh }
2496228Sstephh }
2506228Sstephh }
2516228Sstephh
2521193Smws /*
2531193Smws * Look up the diagcode for this case and cache it in ci_code. If no suspects
2541193Smws * were defined for this case or if the lookup fails, the event dictionary or
2551193Smws * module code is broken, and we set the event code to a precomputed default.
2561193Smws */
2571193Smws static const char *
fmd_case_mkcode(fmd_case_t * cp)2581193Smws fmd_case_mkcode(fmd_case_t *cp)
2590Sstevel@tonic-gate {
2600Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2610Sstevel@tonic-gate fmd_case_susp_t *cis;
2626228Sstephh fmd_case_hash_t *chp = fmd.d_cases;
2630Sstevel@tonic-gate
2641193Smws char **keys, **keyp;
2650Sstevel@tonic-gate const char *s;
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cip->ci_lock));
2680Sstevel@tonic-gate ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
2690Sstevel@tonic-gate
2706228Sstephh /*
2716228Sstephh * delete any existing entry from code hash if it is on it
2726228Sstephh */
2736228Sstephh fmd_case_code_hash_delete(chp, cip);
2746228Sstephh
2751193Smws fmd_free(cip->ci_code, cip->ci_codelen);
2761193Smws cip->ci_codelen = cip->ci_mod->mod_codelen;
2771193Smws cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
2780Sstevel@tonic-gate keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
2810Sstevel@tonic-gate if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
2820Sstevel@tonic-gate keyp++;
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate
2850Sstevel@tonic-gate *keyp = NULL; /* mark end of keys[] array for libdiagcode */
2860Sstevel@tonic-gate
2870Sstevel@tonic-gate if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
2881193Smws cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
2890Sstevel@tonic-gate (void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
2901193Smws fmd_free(cip->ci_code, cip->ci_codelen);
2911193Smws cip->ci_codelen = strlen(s) + 1;
2921193Smws cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
2931193Smws (void) strcpy(cip->ci_code, s);
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate
2966228Sstephh /*
2976228Sstephh * add into hash of solved cases
2986228Sstephh */
2996228Sstephh fmd_case_code_hash_insert(chp, cip);
3006228Sstephh
3011193Smws return (cip->ci_code);
3021193Smws }
3031193Smws
3046228Sstephh typedef struct {
3056228Sstephh int *fcl_countp;
3067913SStephen.Hanson@Sun.COM int fcl_maxcount;
3076228Sstephh uint8_t *fcl_ba;
3086228Sstephh nvlist_t **fcl_nva;
3096228Sstephh int *fcl_msgp;
3106228Sstephh } fmd_case_lst_t;
3116228Sstephh
3126228Sstephh static void
fmd_case_set_lst(fmd_asru_link_t * alp,void * arg)3136228Sstephh fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
3146228Sstephh {
3156228Sstephh fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg;
3166228Sstephh boolean_t b;
3176228Sstephh int state;
3186228Sstephh
3197913SStephen.Hanson@Sun.COM if (*entryp->fcl_countp >= entryp->fcl_maxcount)
3207913SStephen.Hanson@Sun.COM return;
3216228Sstephh if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE,
3226228Sstephh &b) == 0 && b == B_FALSE)
3236228Sstephh *entryp->fcl_msgp = B_FALSE;
3246228Sstephh entryp->fcl_ba[*entryp->fcl_countp] = 0;
3256228Sstephh state = fmd_asru_al_getstate(alp);
3267275Sstephh if (state & FMD_ASRU_DEGRADED)
3277275Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
3286228Sstephh if (state & FMD_ASRU_UNUSABLE)
3296228Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
3306228Sstephh if (state & FMD_ASRU_FAULTY)
3316228Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
3326228Sstephh if (!(state & FMD_ASRU_PRESENT))
3336228Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
3347275Sstephh if (alp->al_reason == FMD_ASRU_REPAIRED)
3357275Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
3367275Sstephh else if (alp->al_reason == FMD_ASRU_REPLACED)
3377275Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
3387275Sstephh else if (alp->al_reason == FMD_ASRU_ACQUITTED)
3397275Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
3406228Sstephh entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
3416228Sstephh (*entryp->fcl_countp)++;
3426228Sstephh }
3436228Sstephh
3446228Sstephh static void
fmd_case_faulty(fmd_asru_link_t * alp,void * arg)3456228Sstephh fmd_case_faulty(fmd_asru_link_t *alp, void *arg)
3466228Sstephh {
3476228Sstephh int *faultyp = (int *)arg;
3486228Sstephh
3496228Sstephh *faultyp |= (alp->al_flags & FMD_ASRU_FAULTY);
3506228Sstephh }
3516228Sstephh
3526228Sstephh static void
fmd_case_usable(fmd_asru_link_t * alp,void * arg)3536228Sstephh fmd_case_usable(fmd_asru_link_t *alp, void *arg)
3546228Sstephh {
3556228Sstephh int *usablep = (int *)arg;
3566228Sstephh
3576228Sstephh *usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
3586228Sstephh }
3596228Sstephh
3607275Sstephh static void
fmd_case_not_faulty(fmd_asru_link_t * alp,void * arg)3617275Sstephh fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
3627275Sstephh {
3637275Sstephh int *not_faultyp = (int *)arg;
3647275Sstephh
3657275Sstephh *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
3667275Sstephh }
3677275Sstephh
3687275Sstephh /*
3697275Sstephh * Have we got any suspects with an asru that are still unusable and present?
3707275Sstephh */
3717275Sstephh static void
fmd_case_unusable_and_present(fmd_asru_link_t * alp,void * arg)3727275Sstephh fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
3737275Sstephh {
3747275Sstephh int *rvalp = (int *)arg;
3759120SStephen.Hanson@Sun.COM int state;
3767275Sstephh nvlist_t *asru;
3777275Sstephh
3789120SStephen.Hanson@Sun.COM /*
3799120SStephen.Hanson@Sun.COM * if this a proxy case and this suspect doesn't have an local asru
3809120SStephen.Hanson@Sun.COM * then state is unknown so we must assume it may still be unusable.
3819120SStephen.Hanson@Sun.COM */
3829120SStephen.Hanson@Sun.COM if ((alp->al_flags & FMD_ASRU_PROXY) &&
3839120SStephen.Hanson@Sun.COM !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
3849120SStephen.Hanson@Sun.COM *rvalp |= B_TRUE;
3859120SStephen.Hanson@Sun.COM return;
3869120SStephen.Hanson@Sun.COM }
3879120SStephen.Hanson@Sun.COM
3889120SStephen.Hanson@Sun.COM state = fmd_asru_al_getstate(alp);
3897275Sstephh if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
3907275Sstephh return;
3917275Sstephh *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
3927275Sstephh }
3937275Sstephh
3941193Smws nvlist_t *
fmd_case_mkevent(fmd_case_t * cp,const char * class)3951193Smws fmd_case_mkevent(fmd_case_t *cp, const char *class)
3961193Smws {
3971193Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
3986228Sstephh nvlist_t **nva, *nvl;
3996228Sstephh uint8_t *ba;
4001193Smws int msg = B_TRUE;
4016276Scy152378 const char *code;
4026228Sstephh fmd_case_lst_t fcl;
4036228Sstephh int count = 0;
4041193Smws
4051193Smws (void) pthread_mutex_lock(&cip->ci_lock);
4061193Smws ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
4071193Smws
4086228Sstephh nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
4096228Sstephh ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
4101193Smws
4111193Smws /*
4121193Smws * For each suspect associated with the case, store its fault event
4131193Smws * nvlist in 'nva'. We also look to see if any of the suspect faults
4141193Smws * have asked not to be messaged. If any of them have made such a
4151193Smws * request, propagate that attribute to the composite list.* event.
4161193Smws * Finally, store each suspect's faulty status into the bitmap 'ba'.
4171193Smws */
4186228Sstephh fcl.fcl_countp = &count;
4197913SStephen.Hanson@Sun.COM fcl.fcl_maxcount = cip->ci_nsuspects;
4206228Sstephh fcl.fcl_msgp = &msg;
4216228Sstephh fcl.fcl_ba = ba;
4226228Sstephh fcl.fcl_nva = nva;
4236228Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
4241193Smws
4251193Smws if (cip->ci_code == NULL)
4261193Smws (void) fmd_case_mkcode(cp);
4276276Scy152378 /*
4287275Sstephh * For repair and updated event, we lookup diagcode from dict using key
4297275Sstephh * "list.repaired" or "list.updated" or "list.resolved".
4306276Scy152378 */
4316276Scy152378 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
4326276Scy152378 (void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
4337275Sstephh else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
4347275Sstephh (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
4357275Sstephh else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
4367275Sstephh (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
4376276Scy152378 else
4386276Scy152378 code = cip->ci_code;
4391193Smws
4405255Sstephh if (msg == B_FALSE)
4415255Sstephh cip->ci_flags |= FMD_CF_INVISIBLE;
4425255Sstephh
4439120SStephen.Hanson@Sun.COM /*
4449120SStephen.Hanson@Sun.COM * Use the ci_diag_de if one has been saved (eg for an injected fault).
4459120SStephen.Hanson@Sun.COM * Otherwise use the authority for the current module.
4469120SStephen.Hanson@Sun.COM */
4479120SStephen.Hanson@Sun.COM nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ?
4489120SStephen.Hanson@Sun.COM cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count,
44910928SStephen.Hanson@Sun.COM nva, ba, msg, &cip->ci_tv, cip->ci_injected);
4501193Smws
4511193Smws (void) pthread_mutex_unlock(&cip->ci_lock);
4521193Smws return (nvl);
4530Sstevel@tonic-gate }
4540Sstevel@tonic-gate
45510656SStephen.Hanson@Sun.COM static int fmd_case_match_on_faulty_overlap = 1;
45610656SStephen.Hanson@Sun.COM static int fmd_case_match_on_acquit_overlap = 1;
45710656SStephen.Hanson@Sun.COM static int fmd_case_auto_acquit_isolated = 1;
45810656SStephen.Hanson@Sun.COM static int fmd_case_auto_acquit_non_acquitted = 1;
45910656SStephen.Hanson@Sun.COM static int fmd_case_too_recent = 10; /* time in seconds */
46010656SStephen.Hanson@Sun.COM
4616228Sstephh static boolean_t
fmd_case_compare_elem(nvlist_t * nvl,nvlist_t * xnvl,const char * elem)4626228Sstephh fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
4636228Sstephh {
4646228Sstephh nvlist_t *new_rsrc;
4656228Sstephh nvlist_t *rsrc;
4666228Sstephh char *new_name = NULL;
4676228Sstephh char *name = NULL;
4686228Sstephh ssize_t new_namelen;
4696228Sstephh ssize_t namelen;
4706228Sstephh int fmri_present = 1;
4716228Sstephh int new_fmri_present = 1;
4726228Sstephh int match = B_FALSE;
4736869Seschrock fmd_topo_t *ftp = fmd_topo_hold();
4746228Sstephh
4756228Sstephh if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0)
4766228Sstephh fmri_present = 0;
4776228Sstephh else {
4786228Sstephh if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1)
4796228Sstephh goto done;
4806228Sstephh name = fmd_alloc(namelen + 1, FMD_SLEEP);
4816228Sstephh if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1)
4826228Sstephh goto done;
4836228Sstephh }
4846228Sstephh if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0)
4856228Sstephh new_fmri_present = 0;
4866228Sstephh else {
4876228Sstephh if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1)
4886228Sstephh goto done;
4896228Sstephh new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP);
4906228Sstephh if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1)
4916228Sstephh goto done;
4926228Sstephh }
4936228Sstephh match = (fmri_present == new_fmri_present &&
4946869Seschrock (fmri_present == 0 ||
4956869Seschrock topo_fmri_strcmp(ftp->ft_hdl, name, new_name)));
4966228Sstephh done:
4976228Sstephh if (name != NULL)
4986228Sstephh fmd_free(name, namelen + 1);
4996228Sstephh if (new_name != NULL)
5006228Sstephh fmd_free(new_name, new_namelen + 1);
5016869Seschrock fmd_topo_rele(ftp);
5026228Sstephh return (match);
5036228Sstephh }
5046228Sstephh
5056228Sstephh static int
fmd_case_match_suspect(nvlist_t * nvl1,nvlist_t * nvl2)50610656SStephen.Hanson@Sun.COM fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2)
5076228Sstephh {
5086228Sstephh char *class, *new_class;
5096228Sstephh
51010656SStephen.Hanson@Sun.COM if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU))
5116228Sstephh return (0);
51210656SStephen.Hanson@Sun.COM if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE))
51310656SStephen.Hanson@Sun.COM return (0);
51410656SStephen.Hanson@Sun.COM if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU))
5156228Sstephh return (0);
51610656SStephen.Hanson@Sun.COM (void) nvlist_lookup_string(nvl2, FM_CLASS, &class);
51710656SStephen.Hanson@Sun.COM (void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class);
5186228Sstephh return (strcmp(class, new_class) == 0);
5196228Sstephh }
5206228Sstephh
52110656SStephen.Hanson@Sun.COM typedef struct {
52210656SStephen.Hanson@Sun.COM int *fcms_countp;
52310656SStephen.Hanson@Sun.COM int fcms_maxcount;
52410656SStephen.Hanson@Sun.COM fmd_case_impl_t *fcms_cip;
52510656SStephen.Hanson@Sun.COM uint8_t *fcms_new_susp_state;
52610656SStephen.Hanson@Sun.COM uint8_t *fcms_old_susp_state;
52710656SStephen.Hanson@Sun.COM uint8_t *fcms_old_match_state;
52810656SStephen.Hanson@Sun.COM } fcms_t;
52910656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_FAULTY 0x1
53010656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_ISOLATED 0x2
53110656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_REMOVED 0x4
53210656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_ACQUITED 0x8
53310656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_REPAIRED 0x10
53410656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_REPLACED 0x20
53510656SStephen.Hanson@Sun.COM #define SUSPECT_STATE_NO_MATCH 0x1
53610656SStephen.Hanson@Sun.COM
53710656SStephen.Hanson@Sun.COM /*
53810656SStephen.Hanson@Sun.COM * This is called for each suspect in the old case. Compare it against each
53910656SStephen.Hanson@Sun.COM * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
54010656SStephen.Hanson@Sun.COM * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
54110656SStephen.Hanson@Sun.COM * found in the old case.
54210656SStephen.Hanson@Sun.COM */
54310656SStephen.Hanson@Sun.COM static void
fmd_case_match_suspects(fmd_asru_link_t * alp,void * arg)54410656SStephen.Hanson@Sun.COM fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg)
54510656SStephen.Hanson@Sun.COM {
54610656SStephen.Hanson@Sun.COM fcms_t *fcmsp = (fcms_t *)arg;
54710656SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = fcmsp->fcms_cip;
54810656SStephen.Hanson@Sun.COM fmd_case_susp_t *cis;
54910656SStephen.Hanson@Sun.COM int i = 0;
55010656SStephen.Hanson@Sun.COM int state = fmd_asru_al_getstate(alp);
55110656SStephen.Hanson@Sun.COM
55210656SStephen.Hanson@Sun.COM if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount)
55310656SStephen.Hanson@Sun.COM return;
55410656SStephen.Hanson@Sun.COM
55510656SStephen.Hanson@Sun.COM if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) &&
55610656SStephen.Hanson@Sun.COM alp->al_reason == FMD_ASRU_REMOVED))
55710656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
55810656SStephen.Hanson@Sun.COM SUSPECT_STATE_REMOVED;
55910656SStephen.Hanson@Sun.COM else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY))
56010656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
56110656SStephen.Hanson@Sun.COM SUSPECT_STATE_ISOLATED;
56210656SStephen.Hanson@Sun.COM else if (state & FMD_ASRU_FAULTY)
56310656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
56410656SStephen.Hanson@Sun.COM SUSPECT_STATE_FAULTY;
56510656SStephen.Hanson@Sun.COM else if (alp->al_reason == FMD_ASRU_REPLACED)
56610656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
56710656SStephen.Hanson@Sun.COM SUSPECT_STATE_REPLACED;
56810656SStephen.Hanson@Sun.COM else if (alp->al_reason == FMD_ASRU_ACQUITTED)
56910656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
57010656SStephen.Hanson@Sun.COM SUSPECT_STATE_ACQUITED;
57110656SStephen.Hanson@Sun.COM else
57210656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
57310656SStephen.Hanson@Sun.COM SUSPECT_STATE_REPAIRED;
57410656SStephen.Hanson@Sun.COM
57510656SStephen.Hanson@Sun.COM for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++)
57610656SStephen.Hanson@Sun.COM if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1)
57710656SStephen.Hanson@Sun.COM break;
57810656SStephen.Hanson@Sun.COM if (cis != NULL)
57910656SStephen.Hanson@Sun.COM fcmsp->fcms_new_susp_state[i] =
58010656SStephen.Hanson@Sun.COM fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp];
58110656SStephen.Hanson@Sun.COM else
58210656SStephen.Hanson@Sun.COM fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |=
58310656SStephen.Hanson@Sun.COM SUSPECT_STATE_NO_MATCH;
58410656SStephen.Hanson@Sun.COM (*fcmsp->fcms_countp)++;
58510656SStephen.Hanson@Sun.COM }
58610656SStephen.Hanson@Sun.COM
58710656SStephen.Hanson@Sun.COM typedef struct {
58810656SStephen.Hanson@Sun.COM int *fca_do_update;
58910656SStephen.Hanson@Sun.COM fmd_case_impl_t *fca_cip;
59010656SStephen.Hanson@Sun.COM } fca_t;
59110656SStephen.Hanson@Sun.COM
59210656SStephen.Hanson@Sun.COM /*
59310656SStephen.Hanson@Sun.COM * Re-fault all acquitted suspects that are still present in the new list.
59410656SStephen.Hanson@Sun.COM */
59510656SStephen.Hanson@Sun.COM static void
fmd_case_fault_acquitted_matching(fmd_asru_link_t * alp,void * arg)59610656SStephen.Hanson@Sun.COM fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg)
59710656SStephen.Hanson@Sun.COM {
59810656SStephen.Hanson@Sun.COM fca_t *fcap = (fca_t *)arg;
59910656SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = fcap->fca_cip;
60010656SStephen.Hanson@Sun.COM fmd_case_susp_t *cis;
60110656SStephen.Hanson@Sun.COM int state = fmd_asru_al_getstate(alp);
60210656SStephen.Hanson@Sun.COM
60310656SStephen.Hanson@Sun.COM if (!(state & FMD_ASRU_FAULTY) &&
60410656SStephen.Hanson@Sun.COM alp->al_reason == FMD_ASRU_ACQUITTED) {
60510656SStephen.Hanson@Sun.COM for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
60610656SStephen.Hanson@Sun.COM if (fmd_case_match_suspect(cis->cis_nvl,
60710656SStephen.Hanson@Sun.COM alp->al_event) == 1)
60810656SStephen.Hanson@Sun.COM break;
60910656SStephen.Hanson@Sun.COM if (cis != NULL) {
61010656SStephen.Hanson@Sun.COM (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
61110656SStephen.Hanson@Sun.COM *fcap->fca_do_update = 1;
61210656SStephen.Hanson@Sun.COM }
61310656SStephen.Hanson@Sun.COM }
61410656SStephen.Hanson@Sun.COM }
61510656SStephen.Hanson@Sun.COM
61610656SStephen.Hanson@Sun.COM /*
61710656SStephen.Hanson@Sun.COM * Re-fault all suspects that are still present in the new list.
61810656SStephen.Hanson@Sun.COM */
61910656SStephen.Hanson@Sun.COM static void
fmd_case_fault_all_matching(fmd_asru_link_t * alp,void * arg)62010656SStephen.Hanson@Sun.COM fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg)
62110656SStephen.Hanson@Sun.COM {
62210656SStephen.Hanson@Sun.COM fca_t *fcap = (fca_t *)arg;
62310656SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = fcap->fca_cip;
62410656SStephen.Hanson@Sun.COM fmd_case_susp_t *cis;
62510656SStephen.Hanson@Sun.COM int state = fmd_asru_al_getstate(alp);
62610656SStephen.Hanson@Sun.COM
62710656SStephen.Hanson@Sun.COM if (!(state & FMD_ASRU_FAULTY)) {
62810656SStephen.Hanson@Sun.COM for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
62910656SStephen.Hanson@Sun.COM if (fmd_case_match_suspect(cis->cis_nvl,
63010656SStephen.Hanson@Sun.COM alp->al_event) == 1)
63110656SStephen.Hanson@Sun.COM break;
63210656SStephen.Hanson@Sun.COM if (cis != NULL) {
63310656SStephen.Hanson@Sun.COM (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
63410656SStephen.Hanson@Sun.COM *fcap->fca_do_update = 1;
63510656SStephen.Hanson@Sun.COM }
63610656SStephen.Hanson@Sun.COM }
63710656SStephen.Hanson@Sun.COM }
63810656SStephen.Hanson@Sun.COM
63910656SStephen.Hanson@Sun.COM /*
64010656SStephen.Hanson@Sun.COM * Acquit all suspects that are no longer present in the new list.
64110656SStephen.Hanson@Sun.COM */
64210656SStephen.Hanson@Sun.COM static void
fmd_case_acquit_no_match(fmd_asru_link_t * alp,void * arg)64310656SStephen.Hanson@Sun.COM fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg)
64410656SStephen.Hanson@Sun.COM {
64510656SStephen.Hanson@Sun.COM fca_t *fcap = (fca_t *)arg;
64610656SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = fcap->fca_cip;
64710656SStephen.Hanson@Sun.COM fmd_case_susp_t *cis;
64810656SStephen.Hanson@Sun.COM int state = fmd_asru_al_getstate(alp);
64910656SStephen.Hanson@Sun.COM
65010656SStephen.Hanson@Sun.COM if (state & FMD_ASRU_FAULTY) {
65110656SStephen.Hanson@Sun.COM for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
65210656SStephen.Hanson@Sun.COM if (fmd_case_match_suspect(cis->cis_nvl,
65310656SStephen.Hanson@Sun.COM alp->al_event) == 1)
65410656SStephen.Hanson@Sun.COM break;
65510656SStephen.Hanson@Sun.COM if (cis == NULL) {
65610656SStephen.Hanson@Sun.COM (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
65710656SStephen.Hanson@Sun.COM FMD_ASRU_ACQUITTED);
65810656SStephen.Hanson@Sun.COM *fcap->fca_do_update = 1;
65910656SStephen.Hanson@Sun.COM }
66010656SStephen.Hanson@Sun.COM }
66110656SStephen.Hanson@Sun.COM }
66210656SStephen.Hanson@Sun.COM
66310656SStephen.Hanson@Sun.COM /*
66410656SStephen.Hanson@Sun.COM * Acquit all isolated suspects.
66510656SStephen.Hanson@Sun.COM */
66610656SStephen.Hanson@Sun.COM static void
fmd_case_acquit_isolated(fmd_asru_link_t * alp,void * arg)66710656SStephen.Hanson@Sun.COM fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg)
66810656SStephen.Hanson@Sun.COM {
66910656SStephen.Hanson@Sun.COM int *do_update = (int *)arg;
67010656SStephen.Hanson@Sun.COM int state = fmd_asru_al_getstate(alp);
67110656SStephen.Hanson@Sun.COM
67210656SStephen.Hanson@Sun.COM if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) &&
67310656SStephen.Hanson@Sun.COM (state & FMD_ASRU_FAULTY)) {
67410656SStephen.Hanson@Sun.COM (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
67510656SStephen.Hanson@Sun.COM FMD_ASRU_ACQUITTED);
67610656SStephen.Hanson@Sun.COM *do_update = 1;
67710656SStephen.Hanson@Sun.COM }
67810656SStephen.Hanson@Sun.COM }
67910656SStephen.Hanson@Sun.COM
6806228Sstephh /*
68110656SStephen.Hanson@Sun.COM * Acquit suspect which matches specified nvlist
6826228Sstephh */
68310656SStephen.Hanson@Sun.COM static void
fmd_case_acquit_suspect(fmd_asru_link_t * alp,void * arg)68410656SStephen.Hanson@Sun.COM fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg)
68510656SStephen.Hanson@Sun.COM {
68610656SStephen.Hanson@Sun.COM nvlist_t *nvl = (nvlist_t *)arg;
68710656SStephen.Hanson@Sun.COM int state = fmd_asru_al_getstate(alp);
68810656SStephen.Hanson@Sun.COM
68910656SStephen.Hanson@Sun.COM if ((state & FMD_ASRU_FAULTY) &&
69010656SStephen.Hanson@Sun.COM fmd_case_match_suspect(nvl, alp->al_event) == 1)
69110656SStephen.Hanson@Sun.COM (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
69210656SStephen.Hanson@Sun.COM FMD_ASRU_ACQUITTED);
69310656SStephen.Hanson@Sun.COM }
69410656SStephen.Hanson@Sun.COM
69510656SStephen.Hanson@Sun.COM typedef struct {
69610656SStephen.Hanson@Sun.COM fmd_case_impl_t *fccd_cip;
69710656SStephen.Hanson@Sun.COM uint8_t *fccd_new_susp_state;
69810656SStephen.Hanson@Sun.COM uint8_t *fccd_new_match_state;
69910656SStephen.Hanson@Sun.COM int *fccd_discard_new;
70010656SStephen.Hanson@Sun.COM int *fccd_adjust_new;
70110656SStephen.Hanson@Sun.COM } fccd_t;
70210656SStephen.Hanson@Sun.COM
70310656SStephen.Hanson@Sun.COM /*
70410656SStephen.Hanson@Sun.COM * see if a matching suspect list already exists in the cache
70510656SStephen.Hanson@Sun.COM */
70610656SStephen.Hanson@Sun.COM static void
fmd_case_check_for_dups(fmd_case_t * old_cp,void * arg)70710656SStephen.Hanson@Sun.COM fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg)
7086228Sstephh {
70910656SStephen.Hanson@Sun.COM fccd_t *fccdp = (fccd_t *)arg;
71010656SStephen.Hanson@Sun.COM fmd_case_impl_t *new_cip = fccdp->fccd_cip;
71110656SStephen.Hanson@Sun.COM fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp;
71210656SStephen.Hanson@Sun.COM int i, count = 0, do_update = 0, got_isolated_overlap = 0;
71310656SStephen.Hanson@Sun.COM int got_faulty_overlap = 0;
71410656SStephen.Hanson@Sun.COM int got_acquit_overlap = 0;
71510656SStephen.Hanson@Sun.COM boolean_t too_recent;
71610656SStephen.Hanson@Sun.COM uint64_t most_recent = 0;
71710656SStephen.Hanson@Sun.COM fcms_t fcms;
71810656SStephen.Hanson@Sun.COM fca_t fca;
71910656SStephen.Hanson@Sun.COM uint8_t *new_susp_state;
72010656SStephen.Hanson@Sun.COM uint8_t *old_susp_state;
72110656SStephen.Hanson@Sun.COM uint8_t *old_match_state;
72210656SStephen.Hanson@Sun.COM
72310656SStephen.Hanson@Sun.COM new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t));
72410656SStephen.Hanson@Sun.COM for (i = 0; i < new_cip->ci_nsuspects; i++)
72510656SStephen.Hanson@Sun.COM new_susp_state[i] = 0;
72610656SStephen.Hanson@Sun.COM old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
72710656SStephen.Hanson@Sun.COM for (i = 0; i < old_cip->ci_nsuspects; i++)
72810656SStephen.Hanson@Sun.COM old_susp_state[i] = 0;
72910656SStephen.Hanson@Sun.COM old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
73010656SStephen.Hanson@Sun.COM for (i = 0; i < old_cip->ci_nsuspects; i++)
73110656SStephen.Hanson@Sun.COM old_match_state[i] = 0;
7326228Sstephh
73310656SStephen.Hanson@Sun.COM /*
73410656SStephen.Hanson@Sun.COM * Compare with each suspect in the existing case.
73510656SStephen.Hanson@Sun.COM */
73610656SStephen.Hanson@Sun.COM fcms.fcms_countp = &count;
73710656SStephen.Hanson@Sun.COM fcms.fcms_maxcount = old_cip->ci_nsuspects;
73810656SStephen.Hanson@Sun.COM fcms.fcms_cip = new_cip;
73910656SStephen.Hanson@Sun.COM fcms.fcms_new_susp_state = new_susp_state;
74010656SStephen.Hanson@Sun.COM fcms.fcms_old_susp_state = old_susp_state;
74110656SStephen.Hanson@Sun.COM fcms.fcms_old_match_state = old_match_state;
74210656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip,
74310656SStephen.Hanson@Sun.COM fmd_case_match_suspects, &fcms);
74410656SStephen.Hanson@Sun.COM
74510656SStephen.Hanson@Sun.COM /*
74610656SStephen.Hanson@Sun.COM * If we have some faulty, non-isolated suspects that overlap, then most
74710656SStephen.Hanson@Sun.COM * likely it is the suspects that overlap in the suspect lists that are
74810656SStephen.Hanson@Sun.COM * to blame. So we can consider this to be a match.
74910656SStephen.Hanson@Sun.COM */
75010656SStephen.Hanson@Sun.COM for (i = 0; i < new_cip->ci_nsuspects; i++)
75110656SStephen.Hanson@Sun.COM if (new_susp_state[i] == SUSPECT_STATE_FAULTY)
75210656SStephen.Hanson@Sun.COM got_faulty_overlap = 1;
75310656SStephen.Hanson@Sun.COM if (got_faulty_overlap && fmd_case_match_on_faulty_overlap)
75410656SStephen.Hanson@Sun.COM goto got_match;
75510656SStephen.Hanson@Sun.COM
75610656SStephen.Hanson@Sun.COM /*
75710656SStephen.Hanson@Sun.COM * If we have no faulty, non-isolated suspects in the old case, but we
75810656SStephen.Hanson@Sun.COM * do have some acquitted suspects that overlap, then most likely it is
75910656SStephen.Hanson@Sun.COM * the acquitted suspects that overlap in the suspect lists that are
76010656SStephen.Hanson@Sun.COM * to blame. So we can consider this to be a match.
76110656SStephen.Hanson@Sun.COM */
76210656SStephen.Hanson@Sun.COM for (i = 0; i < new_cip->ci_nsuspects; i++)
76310656SStephen.Hanson@Sun.COM if (new_susp_state[i] == SUSPECT_STATE_ACQUITED)
76410656SStephen.Hanson@Sun.COM got_acquit_overlap = 1;
76510656SStephen.Hanson@Sun.COM for (i = 0; i < old_cip->ci_nsuspects; i++)
76610656SStephen.Hanson@Sun.COM if (old_susp_state[i] == SUSPECT_STATE_FAULTY)
76710656SStephen.Hanson@Sun.COM got_acquit_overlap = 0;
76810656SStephen.Hanson@Sun.COM if (got_acquit_overlap && fmd_case_match_on_acquit_overlap)
76910656SStephen.Hanson@Sun.COM goto got_match;
7706228Sstephh
7716228Sstephh /*
77210656SStephen.Hanson@Sun.COM * Check that all suspects in the new list are present in the old list.
77310656SStephen.Hanson@Sun.COM * Return if we find one that isn't.
7746228Sstephh */
77510656SStephen.Hanson@Sun.COM for (i = 0; i < new_cip->ci_nsuspects; i++)
77610656SStephen.Hanson@Sun.COM if (new_susp_state[i] == 0)
77710656SStephen.Hanson@Sun.COM return;
7786228Sstephh
77910656SStephen.Hanson@Sun.COM /*
78010656SStephen.Hanson@Sun.COM * Check that all suspects in the old list are present in the new list
78110656SStephen.Hanson@Sun.COM * *or* they are isolated or removed/replaced (which would explain why
78210656SStephen.Hanson@Sun.COM * they are not present in the new list). Return if we find one that is
78310656SStephen.Hanson@Sun.COM * faulty and unisolated or repaired or acquitted, and that is not
78410656SStephen.Hanson@Sun.COM * present in the new case.
78510656SStephen.Hanson@Sun.COM */
78610656SStephen.Hanson@Sun.COM for (i = 0; i < old_cip->ci_nsuspects; i++)
78710656SStephen.Hanson@Sun.COM if (old_match_state[i] == SUSPECT_STATE_NO_MATCH &&
78810656SStephen.Hanson@Sun.COM (old_susp_state[i] == SUSPECT_STATE_FAULTY ||
78910656SStephen.Hanson@Sun.COM old_susp_state[i] == SUSPECT_STATE_ACQUITED ||
79010656SStephen.Hanson@Sun.COM old_susp_state[i] == SUSPECT_STATE_REPAIRED))
79110656SStephen.Hanson@Sun.COM return;
79210656SStephen.Hanson@Sun.COM
79310656SStephen.Hanson@Sun.COM got_match:
79410656SStephen.Hanson@Sun.COM /*
79510656SStephen.Hanson@Sun.COM * If the old case is already in repaired/resolved state, we can't
79610656SStephen.Hanson@Sun.COM * do anything more with it, so keep the new case, but acquit some
79710656SStephen.Hanson@Sun.COM * of the suspects if appropriate.
79810656SStephen.Hanson@Sun.COM */
79910656SStephen.Hanson@Sun.COM if (old_cip->ci_state >= FMD_CASE_REPAIRED) {
80010656SStephen.Hanson@Sun.COM if (fmd_case_auto_acquit_non_acquitted) {
80110656SStephen.Hanson@Sun.COM *fccdp->fccd_adjust_new = 1;
80210656SStephen.Hanson@Sun.COM for (i = 0; i < new_cip->ci_nsuspects; i++) {
80310656SStephen.Hanson@Sun.COM fccdp->fccd_new_susp_state[i] |=
80410656SStephen.Hanson@Sun.COM new_susp_state[i];
80510656SStephen.Hanson@Sun.COM if (new_susp_state[i] == 0)
80610656SStephen.Hanson@Sun.COM fccdp->fccd_new_susp_state[i] =
80710656SStephen.Hanson@Sun.COM SUSPECT_STATE_NO_MATCH;
8086228Sstephh }
8096228Sstephh }
81010656SStephen.Hanson@Sun.COM return;
81110656SStephen.Hanson@Sun.COM }
81210656SStephen.Hanson@Sun.COM
81310656SStephen.Hanson@Sun.COM /*
81410656SStephen.Hanson@Sun.COM * Otherwise discard the new case and keep the old, again updating the
81510656SStephen.Hanson@Sun.COM * state of the suspects as appropriate
81610656SStephen.Hanson@Sun.COM */
81710656SStephen.Hanson@Sun.COM *fccdp->fccd_discard_new = 1;
81810656SStephen.Hanson@Sun.COM fca.fca_cip = new_cip;
81910656SStephen.Hanson@Sun.COM fca.fca_do_update = &do_update;
82010656SStephen.Hanson@Sun.COM
82110656SStephen.Hanson@Sun.COM /*
82210656SStephen.Hanson@Sun.COM * See if new case occurred within fmd_case_too_recent seconds of the
82310656SStephen.Hanson@Sun.COM * most recent modification to the old case and if so don't do
82410656SStephen.Hanson@Sun.COM * auto-acquit. This avoids problems if a flood of ereports come in and
82510656SStephen.Hanson@Sun.COM * they don't all get diagnosed before the first case causes some of
82610656SStephen.Hanson@Sun.COM * the devices to be isolated making it appear that an isolated device
82710656SStephen.Hanson@Sun.COM * was in the suspect list.
82810656SStephen.Hanson@Sun.COM */
82910656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
83010656SStephen.Hanson@Sun.COM fmd_asru_most_recent, &most_recent);
83110656SStephen.Hanson@Sun.COM too_recent = (new_cip->ci_tv.tv_sec - most_recent <
83210656SStephen.Hanson@Sun.COM fmd_case_too_recent);
83310656SStephen.Hanson@Sun.COM
83410656SStephen.Hanson@Sun.COM if (got_faulty_overlap) {
83510656SStephen.Hanson@Sun.COM /*
83610656SStephen.Hanson@Sun.COM * Acquit any suspects not present in the new list, plus
83710656SStephen.Hanson@Sun.COM * any that are are present but are isolated.
83810656SStephen.Hanson@Sun.COM */
83910656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
84010656SStephen.Hanson@Sun.COM fmd_case_acquit_no_match, &fca);
84110656SStephen.Hanson@Sun.COM if (fmd_case_auto_acquit_isolated && !too_recent)
84210656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
84310656SStephen.Hanson@Sun.COM fmd_case_acquit_isolated, &do_update);
84410656SStephen.Hanson@Sun.COM } else if (got_acquit_overlap) {
84510656SStephen.Hanson@Sun.COM /*
84610656SStephen.Hanson@Sun.COM * Re-fault the acquitted matching suspects and acquit all
84710656SStephen.Hanson@Sun.COM * isolated suspects.
84810656SStephen.Hanson@Sun.COM */
84910656SStephen.Hanson@Sun.COM if (fmd_case_auto_acquit_isolated && !too_recent) {
85010656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
85110656SStephen.Hanson@Sun.COM fmd_case_fault_acquitted_matching, &fca);
85210656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
85310656SStephen.Hanson@Sun.COM fmd_case_acquit_isolated, &do_update);
8546228Sstephh }
85510656SStephen.Hanson@Sun.COM } else if (fmd_case_auto_acquit_isolated) {
85610656SStephen.Hanson@Sun.COM /*
85710656SStephen.Hanson@Sun.COM * To get here, there must be no faulty or acquitted suspects,
85810656SStephen.Hanson@Sun.COM * but there must be at least one isolated suspect. Just acquit
85910656SStephen.Hanson@Sun.COM * non-matching isolated suspects. If there are no matching
86010656SStephen.Hanson@Sun.COM * isolated suspects, then re-fault all matching suspects.
86110656SStephen.Hanson@Sun.COM */
86210656SStephen.Hanson@Sun.COM for (i = 0; i < new_cip->ci_nsuspects; i++)
86310656SStephen.Hanson@Sun.COM if (new_susp_state[i] == SUSPECT_STATE_ISOLATED)
86410656SStephen.Hanson@Sun.COM got_isolated_overlap = 1;
86510656SStephen.Hanson@Sun.COM if (!got_isolated_overlap)
86610656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
86710656SStephen.Hanson@Sun.COM fmd_case_fault_all_matching, &fca);
86810656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
86910656SStephen.Hanson@Sun.COM fmd_case_acquit_no_match, &fca);
8706228Sstephh }
87110656SStephen.Hanson@Sun.COM
87210656SStephen.Hanson@Sun.COM /*
87310656SStephen.Hanson@Sun.COM * If we've updated anything in the old case, call fmd_case_update()
87410656SStephen.Hanson@Sun.COM */
87510656SStephen.Hanson@Sun.COM if (do_update)
87610656SStephen.Hanson@Sun.COM fmd_case_update(old_cp);
8776228Sstephh }
8786228Sstephh
8790Sstevel@tonic-gate /*
8801193Smws * Convict suspects in a case by applying a conviction policy and updating the
8811193Smws * resource cache prior to emitting the list.suspect event for the given case.
8821193Smws * At present, our policy is very simple: convict every suspect in the case.
8831193Smws * In the future, this policy can be extended and made configurable to permit:
8841193Smws *
8851193Smws * - convicting the suspect with the highest FIT rate
8861193Smws * - convicting the suspect with the cheapest FRU
8871193Smws * - convicting the suspect with the FRU that is in a depot's inventory
8881193Smws * - convicting the suspect with the longest lifetime
8891193Smws *
8901193Smws * and so forth. A word to the wise: this problem is significantly harder that
8911193Smws * it seems at first glance. Future work should heed the following advice:
8921193Smws *
8931193Smws * Hacking the policy into C code here is a very bad idea. The policy needs to
8941193Smws * be decided upon very carefully and fundamentally encodes knowledge of what
8951193Smws * suspect list combinations can be emitted by what diagnosis engines. As such
8961193Smws * fmd's code is the wrong location, because that would require fmd itself to
8971193Smws * be updated for every diagnosis engine change, defeating the entire design.
8981193Smws * The FMA Event Registry knows the suspect list combinations: policy inputs
8991193Smws * can be derived from it and used to produce per-module policy configuration.
9001193Smws *
9011193Smws * If the policy needs to be dynamic and not statically fixed at either fmd
9021193Smws * startup or module load time, any implementation of dynamic policy retrieval
9031193Smws * must employ some kind of caching mechanism or be part of a built-in module.
9041193Smws * The fmd_case_convict() function is called with locks held inside of fmd and
9051193Smws * is not a place where unbounded blocking on some inter-process or inter-
9061193Smws * system communication to another service (e.g. another daemon) can occur.
9070Sstevel@tonic-gate */
9086228Sstephh static int
fmd_case_convict(fmd_case_t * cp)9091193Smws fmd_case_convict(fmd_case_t *cp)
9101193Smws {
9111193Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
9121193Smws fmd_asru_hash_t *ahp = fmd.d_asrus;
91310656SStephen.Hanson@Sun.COM int discard_new = 0, i;
9141193Smws fmd_case_susp_t *cis;
9156228Sstephh fmd_asru_link_t *alp;
91610656SStephen.Hanson@Sun.COM uint8_t *new_susp_state;
91710656SStephen.Hanson@Sun.COM uint8_t *new_match_state;
91810656SStephen.Hanson@Sun.COM int adjust_new = 0;
91910656SStephen.Hanson@Sun.COM fccd_t fccd;
92011202SStephen.Hanson@Sun.COM fmd_case_impl_t *ncp, **cps, **cpp;
92111202SStephen.Hanson@Sun.COM uint_t cpc;
92211202SStephen.Hanson@Sun.COM fmd_case_hash_t *chp;
92310656SStephen.Hanson@Sun.COM
92410656SStephen.Hanson@Sun.COM /*
92510656SStephen.Hanson@Sun.COM * First we must see if any matching cases already exist.
92610656SStephen.Hanson@Sun.COM */
92710656SStephen.Hanson@Sun.COM new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
92810656SStephen.Hanson@Sun.COM for (i = 0; i < cip->ci_nsuspects; i++)
92910656SStephen.Hanson@Sun.COM new_susp_state[i] = 0;
93010656SStephen.Hanson@Sun.COM new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
93110656SStephen.Hanson@Sun.COM for (i = 0; i < cip->ci_nsuspects; i++)
93210656SStephen.Hanson@Sun.COM new_match_state[i] = 0;
93310656SStephen.Hanson@Sun.COM fccd.fccd_cip = cip;
93410656SStephen.Hanson@Sun.COM fccd.fccd_adjust_new = &adjust_new;
93510656SStephen.Hanson@Sun.COM fccd.fccd_new_susp_state = new_susp_state;
93610656SStephen.Hanson@Sun.COM fccd.fccd_new_match_state = new_match_state;
93710656SStephen.Hanson@Sun.COM fccd.fccd_discard_new = &discard_new;
93811202SStephen.Hanson@Sun.COM
93911202SStephen.Hanson@Sun.COM /*
94011202SStephen.Hanson@Sun.COM * Hold all cases
94111202SStephen.Hanson@Sun.COM */
94211202SStephen.Hanson@Sun.COM chp = fmd.d_cases;
94311202SStephen.Hanson@Sun.COM (void) pthread_rwlock_rdlock(&chp->ch_lock);
94411202SStephen.Hanson@Sun.COM cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
94511202SStephen.Hanson@Sun.COM cpc = chp->ch_count;
94611202SStephen.Hanson@Sun.COM for (i = 0; i < chp->ch_hashlen; i++)
94711202SStephen.Hanson@Sun.COM for (ncp = chp->ch_hash[i]; ncp != NULL; ncp = ncp->ci_next)
94811202SStephen.Hanson@Sun.COM *cpp++ = fmd_case_tryhold(ncp);
94911202SStephen.Hanson@Sun.COM ASSERT(cpp == cps + cpc);
95011202SStephen.Hanson@Sun.COM (void) pthread_rwlock_unlock(&chp->ch_lock);
95111202SStephen.Hanson@Sun.COM
95211202SStephen.Hanson@Sun.COM /*
95311202SStephen.Hanson@Sun.COM * Run fmd_case_check_for_dups() on all cases except the current one.
95411202SStephen.Hanson@Sun.COM */
95511202SStephen.Hanson@Sun.COM for (i = 0; i < cpc; i++) {
95611202SStephen.Hanson@Sun.COM if (cps[i] != NULL) {
95711202SStephen.Hanson@Sun.COM if (cps[i] != (fmd_case_impl_t *)cp)
95811202SStephen.Hanson@Sun.COM fmd_case_check_for_dups((fmd_case_t *)cps[i],
95911202SStephen.Hanson@Sun.COM &fccd);
96011202SStephen.Hanson@Sun.COM fmd_case_rele((fmd_case_t *)cps[i]);
96111202SStephen.Hanson@Sun.COM }
96211202SStephen.Hanson@Sun.COM }
96311202SStephen.Hanson@Sun.COM fmd_free(cps, cpc * sizeof (fmd_case_t *));
96411202SStephen.Hanson@Sun.COM
96511202SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
96611202SStephen.Hanson@Sun.COM if (cip->ci_code == NULL)
96711202SStephen.Hanson@Sun.COM (void) fmd_case_mkcode(cp);
96811202SStephen.Hanson@Sun.COM else if (cip->ci_precanned)
96911202SStephen.Hanson@Sun.COM fmd_case_code_hash_insert(fmd.d_cases, cip);
97010656SStephen.Hanson@Sun.COM
97110656SStephen.Hanson@Sun.COM if (discard_new) {
97210656SStephen.Hanson@Sun.COM /*
97310656SStephen.Hanson@Sun.COM * We've found an existing case that is a match and it is not
97410656SStephen.Hanson@Sun.COM * already in repaired or resolved state. So we can close this
97510656SStephen.Hanson@Sun.COM * one as a duplicate.
97610656SStephen.Hanson@Sun.COM */
9776228Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
9786228Sstephh return (1);
9796228Sstephh }
9801193Smws
9816228Sstephh /*
98210656SStephen.Hanson@Sun.COM * Allocate new cache entries
9836228Sstephh */
9841193Smws for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
9856228Sstephh if ((alp = fmd_asru_hash_create_entry(ahp,
9866228Sstephh cp, cis->cis_nvl)) == NULL) {
9871193Smws fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
9881193Smws "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
9891193Smws continue;
9901193Smws }
9919120SStephen.Hanson@Sun.COM alp->al_flags |= FMD_ASRU_PRESENT;
9929120SStephen.Hanson@Sun.COM alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
9937275Sstephh (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
9946228Sstephh (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
9951193Smws }
9961193Smws
99710656SStephen.Hanson@Sun.COM if (adjust_new) {
99810656SStephen.Hanson@Sun.COM int some_suspect = 0, some_not_suspect = 0;
99910656SStephen.Hanson@Sun.COM
100010656SStephen.Hanson@Sun.COM /*
100110656SStephen.Hanson@Sun.COM * There is one or more matching case but they are already in
100210656SStephen.Hanson@Sun.COM * repaired or resolved state. So we need to keep the new
100310656SStephen.Hanson@Sun.COM * case, but we can adjust it. Repaired/removed/replaced
100410656SStephen.Hanson@Sun.COM * suspects are unlikely to be to blame (unless there are
100510656SStephen.Hanson@Sun.COM * actually two separate faults). So if we have a combination of
100610656SStephen.Hanson@Sun.COM * repaired/replaced/removed suspects and acquitted suspects in
100710656SStephen.Hanson@Sun.COM * the old lists, then we should acquit in the new list those
100810656SStephen.Hanson@Sun.COM * that were repaired/replaced/removed in the old.
100910656SStephen.Hanson@Sun.COM */
101010656SStephen.Hanson@Sun.COM for (i = 0; i < cip->ci_nsuspects; i++) {
101110656SStephen.Hanson@Sun.COM if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) ||
101210656SStephen.Hanson@Sun.COM (new_susp_state[i] & SUSPECT_STATE_REPAIRED) ||
101310656SStephen.Hanson@Sun.COM (new_susp_state[i] & SUSPECT_STATE_REMOVED) ||
101410656SStephen.Hanson@Sun.COM (new_match_state[i] & SUSPECT_STATE_NO_MATCH))
101510656SStephen.Hanson@Sun.COM some_not_suspect = 1;
101610656SStephen.Hanson@Sun.COM else
101710656SStephen.Hanson@Sun.COM some_suspect = 1;
101810656SStephen.Hanson@Sun.COM }
101910656SStephen.Hanson@Sun.COM if (some_suspect && some_not_suspect) {
102010656SStephen.Hanson@Sun.COM for (cis = cip->ci_suspects, i = 0; cis != NULL;
102110656SStephen.Hanson@Sun.COM cis = cis->cis_next, i++)
102210656SStephen.Hanson@Sun.COM if ((new_susp_state[i] &
102310656SStephen.Hanson@Sun.COM SUSPECT_STATE_REPLACED) ||
102410656SStephen.Hanson@Sun.COM (new_susp_state[i] &
102510656SStephen.Hanson@Sun.COM SUSPECT_STATE_REPAIRED) ||
102610656SStephen.Hanson@Sun.COM (new_susp_state[i] &
102710656SStephen.Hanson@Sun.COM SUSPECT_STATE_REMOVED) ||
102810656SStephen.Hanson@Sun.COM (new_match_state[i] &
102910656SStephen.Hanson@Sun.COM SUSPECT_STATE_NO_MATCH))
103010656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus,
103110656SStephen.Hanson@Sun.COM cp, fmd_case_acquit_suspect,
103210656SStephen.Hanson@Sun.COM cis->cis_nvl);
103310656SStephen.Hanson@Sun.COM }
103410656SStephen.Hanson@Sun.COM }
103510656SStephen.Hanson@Sun.COM
10361193Smws (void) pthread_mutex_unlock(&cip->ci_lock);
10376228Sstephh return (0);
10381193Smws }
10391193Smws
10401193Smws void
fmd_case_publish(fmd_case_t * cp,uint_t state)10410Sstevel@tonic-gate fmd_case_publish(fmd_case_t *cp, uint_t state)
10420Sstevel@tonic-gate {
10430Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
10440Sstevel@tonic-gate fmd_event_t *e;
10450Sstevel@tonic-gate nvlist_t *nvl;
10460Sstevel@tonic-gate char *class;
10470Sstevel@tonic-gate
10481193Smws if (state == FMD_CASE_CURRENT)
10491193Smws state = cip->ci_state; /* use current state */
10501193Smws
10510Sstevel@tonic-gate switch (state) {
10520Sstevel@tonic-gate case FMD_CASE_SOLVED:
10535255Sstephh (void) pthread_mutex_lock(&cip->ci_lock);
10547913SStephen.Hanson@Sun.COM
10557913SStephen.Hanson@Sun.COM /*
10567913SStephen.Hanson@Sun.COM * If we already have a code, then case is already solved.
10577913SStephen.Hanson@Sun.COM */
10589120SStephen.Hanson@Sun.COM if (cip->ci_precanned == 0 && cip->ci_xprt == NULL &&
10599120SStephen.Hanson@Sun.COM cip->ci_code != NULL) {
10607913SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
10617913SStephen.Hanson@Sun.COM break;
10627913SStephen.Hanson@Sun.COM }
10637913SStephen.Hanson@Sun.COM
10645255Sstephh if (cip->ci_tv_valid == 0) {
10655255Sstephh fmd_time_gettimeofday(&cip->ci_tv);
10665255Sstephh cip->ci_tv_valid = 1;
10675255Sstephh }
10685255Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
10696228Sstephh
10706228Sstephh if (fmd_case_convict(cp) == 1) { /* dupclose */
10716228Sstephh cip->ci_flags &= ~FMD_CF_SOLVED;
10726228Sstephh fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
10736228Sstephh break;
10746228Sstephh }
10759120SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL) {
10769120SStephen.Hanson@Sun.COM /*
10779120SStephen.Hanson@Sun.COM * For proxy, save some information about the transport
10789120SStephen.Hanson@Sun.COM * in the resource cache.
10799120SStephen.Hanson@Sun.COM */
10809120SStephen.Hanson@Sun.COM int count = 0;
10819120SStephen.Hanson@Sun.COM fmd_asru_set_on_proxy_t fasp;
10829120SStephen.Hanson@Sun.COM fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt;
10839120SStephen.Hanson@Sun.COM
10849120SStephen.Hanson@Sun.COM fasp.fasp_countp = &count;
10859120SStephen.Hanson@Sun.COM fasp.fasp_maxcount = cip->ci_nsuspects;
10869120SStephen.Hanson@Sun.COM fasp.fasp_proxy_asru = cip->ci_proxy_asru;
10879120SStephen.Hanson@Sun.COM fasp.fasp_proxy_external = xip->xi_flags &
10889120SStephen.Hanson@Sun.COM FMD_XPRT_EXTERNAL;
10899120SStephen.Hanson@Sun.COM fasp.fasp_proxy_rdonly = ((xip->xi_flags &
10909120SStephen.Hanson@Sun.COM FMD_XPRT_RDWR) == FMD_XPRT_RDONLY);
10919120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
10929120SStephen.Hanson@Sun.COM fmd_asru_set_on_proxy, &fasp);
10939120SStephen.Hanson@Sun.COM }
10941193Smws nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
10950Sstevel@tonic-gate (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
10960Sstevel@tonic-gate
10970Sstevel@tonic-gate e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
10980Sstevel@tonic-gate (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
10990Sstevel@tonic-gate fmd_log_append(fmd.d_fltlog, e, cp);
11000Sstevel@tonic-gate (void) pthread_rwlock_unlock(&fmd.d_log_lock);
11010Sstevel@tonic-gate fmd_dispq_dispatch(fmd.d_disp, e, class);
11020Sstevel@tonic-gate
11030Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
11040Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
11050Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
11060Sstevel@tonic-gate
11070Sstevel@tonic-gate break;
11080Sstevel@tonic-gate
11091193Smws case FMD_CASE_CLOSE_WAIT:
11100Sstevel@tonic-gate fmd_case_hold(cp);
11110Sstevel@tonic-gate e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
11120Sstevel@tonic-gate fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
11130Sstevel@tonic-gate
11140Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
11150Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
11160Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
11170Sstevel@tonic-gate
11180Sstevel@tonic-gate break;
11191193Smws
11201193Smws case FMD_CASE_CLOSED:
11211193Smws nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
11221193Smws (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
11231193Smws e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
11241193Smws fmd_dispq_dispatch(fmd.d_disp, e, class);
11251193Smws break;
11260Sstevel@tonic-gate
11271193Smws case FMD_CASE_REPAIRED:
11281193Smws nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
11291193Smws (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
11301193Smws e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
11316276Scy152378 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
11326276Scy152378 fmd_log_append(fmd.d_fltlog, e, cp);
11336276Scy152378 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
11341193Smws fmd_dispq_dispatch(fmd.d_disp, e, class);
11351193Smws break;
11367275Sstephh
11377275Sstephh case FMD_CASE_RESOLVED:
11387275Sstephh nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
11397275Sstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
11407275Sstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
11417275Sstephh (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
11427275Sstephh fmd_log_append(fmd.d_fltlog, e, cp);
11437275Sstephh (void) pthread_rwlock_unlock(&fmd.d_log_lock);
11447275Sstephh fmd_dispq_dispatch(fmd.d_disp, e, class);
11457275Sstephh break;
11460Sstevel@tonic-gate }
11470Sstevel@tonic-gate }
11480Sstevel@tonic-gate
11490Sstevel@tonic-gate fmd_case_t *
fmd_case_hash_lookup(fmd_case_hash_t * chp,const char * uuid)11500Sstevel@tonic-gate fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
11510Sstevel@tonic-gate {
11520Sstevel@tonic-gate fmd_case_impl_t *cip;
11530Sstevel@tonic-gate uint_t h;
11540Sstevel@tonic-gate
11550Sstevel@tonic-gate (void) pthread_rwlock_rdlock(&chp->ch_lock);
11560Sstevel@tonic-gate h = fmd_strhash(uuid) % chp->ch_hashlen;
11570Sstevel@tonic-gate
11580Sstevel@tonic-gate for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
11590Sstevel@tonic-gate if (strcmp(cip->ci_uuid, uuid) == 0)
11600Sstevel@tonic-gate break;
11610Sstevel@tonic-gate }
11620Sstevel@tonic-gate
11636081Scy152378 /*
11646081Scy152378 * If deleting bit is set, treat the case as if it doesn't exist.
11656081Scy152378 */
11660Sstevel@tonic-gate if (cip != NULL)
11676081Scy152378 cip = fmd_case_tryhold(cip);
11686081Scy152378
11696081Scy152378 if (cip == NULL)
11700Sstevel@tonic-gate (void) fmd_set_errno(EFMD_CASE_INVAL);
11710Sstevel@tonic-gate
11720Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock);
11730Sstevel@tonic-gate return ((fmd_case_t *)cip);
11740Sstevel@tonic-gate }
11750Sstevel@tonic-gate
11760Sstevel@tonic-gate static fmd_case_impl_t *
fmd_case_hash_insert(fmd_case_hash_t * chp,fmd_case_impl_t * cip)11770Sstevel@tonic-gate fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
11780Sstevel@tonic-gate {
11790Sstevel@tonic-gate fmd_case_impl_t *eip;
11800Sstevel@tonic-gate uint_t h;
11810Sstevel@tonic-gate
11820Sstevel@tonic-gate (void) pthread_rwlock_wrlock(&chp->ch_lock);
11830Sstevel@tonic-gate h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
11840Sstevel@tonic-gate
11850Sstevel@tonic-gate for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
11866081Scy152378 if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 &&
11876081Scy152378 fmd_case_tryhold(eip) != NULL) {
11880Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock);
11891193Smws return (eip); /* uuid already present */
11900Sstevel@tonic-gate }
11910Sstevel@tonic-gate }
11920Sstevel@tonic-gate
11930Sstevel@tonic-gate cip->ci_next = chp->ch_hash[h];
11940Sstevel@tonic-gate chp->ch_hash[h] = cip;
11950Sstevel@tonic-gate
11961193Smws chp->ch_count++;
11971193Smws ASSERT(chp->ch_count != 0);
11981193Smws
11990Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock);
12000Sstevel@tonic-gate return (cip);
12010Sstevel@tonic-gate }
12020Sstevel@tonic-gate
12030Sstevel@tonic-gate static void
fmd_case_hash_delete(fmd_case_hash_t * chp,fmd_case_impl_t * cip)12040Sstevel@tonic-gate fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
12050Sstevel@tonic-gate {
12060Sstevel@tonic-gate fmd_case_impl_t *cp, **pp;
12070Sstevel@tonic-gate uint_t h;
12080Sstevel@tonic-gate
12096081Scy152378 ASSERT(MUTEX_HELD(&cip->ci_lock));
12106081Scy152378
12116081Scy152378 cip->ci_flags |= FMD_CF_DELETING;
12126081Scy152378 (void) pthread_mutex_unlock(&cip->ci_lock);
12136081Scy152378
12140Sstevel@tonic-gate (void) pthread_rwlock_wrlock(&chp->ch_lock);
12150Sstevel@tonic-gate
12160Sstevel@tonic-gate h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
12170Sstevel@tonic-gate pp = &chp->ch_hash[h];
12180Sstevel@tonic-gate
12190Sstevel@tonic-gate for (cp = *pp; cp != NULL; cp = cp->ci_next) {
12200Sstevel@tonic-gate if (cp != cip)
12210Sstevel@tonic-gate pp = &cp->ci_next;
12220Sstevel@tonic-gate else
12230Sstevel@tonic-gate break;
12240Sstevel@tonic-gate }
12250Sstevel@tonic-gate
12260Sstevel@tonic-gate if (cp == NULL) {
12270Sstevel@tonic-gate fmd_panic("case %p (%s) not found on hash chain %u\n",
12280Sstevel@tonic-gate (void *)cip, cip->ci_uuid, h);
12290Sstevel@tonic-gate }
12300Sstevel@tonic-gate
12310Sstevel@tonic-gate *pp = cp->ci_next;
12320Sstevel@tonic-gate cp->ci_next = NULL;
12330Sstevel@tonic-gate
12346228Sstephh /*
12356228Sstephh * delete from code hash if it is on it
12366228Sstephh */
12376228Sstephh fmd_case_code_hash_delete(chp, cip);
12386228Sstephh
12391193Smws ASSERT(chp->ch_count != 0);
12401193Smws chp->ch_count--;
12411193Smws
12420Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock);
12436081Scy152378
12446081Scy152378 (void) pthread_mutex_lock(&cip->ci_lock);
12456081Scy152378 ASSERT(cip->ci_flags & FMD_CF_DELETING);
12460Sstevel@tonic-gate }
12470Sstevel@tonic-gate
12480Sstevel@tonic-gate fmd_case_t *
fmd_case_create(fmd_module_t * mp,const char * uuidstr,void * data)1249*12967Sgavin.maltby@oracle.com fmd_case_create(fmd_module_t *mp, const char *uuidstr, void *data)
12500Sstevel@tonic-gate {
12510Sstevel@tonic-gate fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
12521193Smws fmd_case_impl_t *eip = NULL;
12530Sstevel@tonic-gate uuid_t uuid;
12540Sstevel@tonic-gate
12550Sstevel@tonic-gate (void) pthread_mutex_init(&cip->ci_lock, NULL);
12560Sstevel@tonic-gate fmd_buf_hash_create(&cip->ci_bufs);
12570Sstevel@tonic-gate
12580Sstevel@tonic-gate fmd_module_hold(mp);
12590Sstevel@tonic-gate cip->ci_mod = mp;
12600Sstevel@tonic-gate cip->ci_refs = 1;
12610Sstevel@tonic-gate cip->ci_state = FMD_CASE_UNSOLVED;
12620Sstevel@tonic-gate cip->ci_flags = FMD_CF_DIRTY;
12630Sstevel@tonic-gate cip->ci_data = data;
12640Sstevel@tonic-gate
12650Sstevel@tonic-gate /*
12660Sstevel@tonic-gate * Calling libuuid: get a clue. The library interfaces cleverly do not
12670Sstevel@tonic-gate * define any constant for the length of an unparse string, and do not
12680Sstevel@tonic-gate * permit the caller to specify a buffer length for safety. The spec
12690Sstevel@tonic-gate * says it will be 36 bytes, but we make it tunable just in case.
12700Sstevel@tonic-gate */
12710Sstevel@tonic-gate (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
12720Sstevel@tonic-gate cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
12730Sstevel@tonic-gate
1274*12967Sgavin.maltby@oracle.com if (uuidstr == NULL) {
1275*12967Sgavin.maltby@oracle.com /*
1276*12967Sgavin.maltby@oracle.com * We expect this loop to execute only once, but code it
1277*12967Sgavin.maltby@oracle.com * defensively against the possibility of libuuid bugs.
1278*12967Sgavin.maltby@oracle.com * Keep generating uuids and attempting to do a hash insert
1279*12967Sgavin.maltby@oracle.com * until we get a unique one.
1280*12967Sgavin.maltby@oracle.com */
1281*12967Sgavin.maltby@oracle.com do {
1282*12967Sgavin.maltby@oracle.com if (eip != NULL)
1283*12967Sgavin.maltby@oracle.com fmd_case_rele((fmd_case_t *)eip);
1284*12967Sgavin.maltby@oracle.com uuid_generate(uuid);
1285*12967Sgavin.maltby@oracle.com uuid_unparse(uuid, cip->ci_uuid);
1286*12967Sgavin.maltby@oracle.com } while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
1287*12967Sgavin.maltby@oracle.com } else {
1288*12967Sgavin.maltby@oracle.com /*
1289*12967Sgavin.maltby@oracle.com * If a uuid was specified we must succeed with that uuid,
1290*12967Sgavin.maltby@oracle.com * or return NULL indicating a case with that uuid already
1291*12967Sgavin.maltby@oracle.com * exists.
1292*12967Sgavin.maltby@oracle.com */
1293*12967Sgavin.maltby@oracle.com (void) strncpy(cip->ci_uuid, uuidstr, cip->ci_uuidlen + 1);
1294*12967Sgavin.maltby@oracle.com if (fmd_case_hash_insert(fmd.d_cases, cip) != cip) {
1295*12967Sgavin.maltby@oracle.com fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1296*12967Sgavin.maltby@oracle.com (void) fmd_buf_hash_destroy(&cip->ci_bufs);
1297*12967Sgavin.maltby@oracle.com fmd_module_rele(mp);
1298*12967Sgavin.maltby@oracle.com pthread_mutex_destroy(&cip->ci_lock);
1299*12967Sgavin.maltby@oracle.com fmd_free(cip, sizeof (*cip));
1300*12967Sgavin.maltby@oracle.com return (NULL);
1301*12967Sgavin.maltby@oracle.com }
1302*12967Sgavin.maltby@oracle.com }
13030Sstevel@tonic-gate
13040Sstevel@tonic-gate ASSERT(fmd_module_locked(mp));
13050Sstevel@tonic-gate fmd_list_append(&mp->mod_cases, cip);
13060Sstevel@tonic-gate fmd_module_setcdirty(mp);
13070Sstevel@tonic-gate
13080Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
13090Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
13100Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
13110Sstevel@tonic-gate
13120Sstevel@tonic-gate return ((fmd_case_t *)cip);
13130Sstevel@tonic-gate }
13140Sstevel@tonic-gate
13151552Smws static void
fmd_case_destroy_suspects(fmd_case_impl_t * cip)13161552Smws fmd_case_destroy_suspects(fmd_case_impl_t *cip)
13171552Smws {
13181552Smws fmd_case_susp_t *cis, *ncis;
13191552Smws
13201552Smws ASSERT(MUTEX_HELD(&cip->ci_lock));
13211552Smws
13229120SStephen.Hanson@Sun.COM if (cip->ci_proxy_asru)
13239120SStephen.Hanson@Sun.COM fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) *
13249120SStephen.Hanson@Sun.COM cip->ci_nsuspects);
13259120SStephen.Hanson@Sun.COM if (cip->ci_diag_de)
13269120SStephen.Hanson@Sun.COM nvlist_free(cip->ci_diag_de);
13279120SStephen.Hanson@Sun.COM if (cip->ci_diag_asru)
13289120SStephen.Hanson@Sun.COM fmd_free(cip->ci_diag_asru, sizeof (uint8_t) *
13299120SStephen.Hanson@Sun.COM cip->ci_nsuspects);
13309120SStephen.Hanson@Sun.COM
13311552Smws for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
13321552Smws ncis = cis->cis_next;
13331552Smws nvlist_free(cis->cis_nvl);
13341552Smws fmd_free(cis, sizeof (fmd_case_susp_t));
13351552Smws }
13361552Smws
13371552Smws cip->ci_suspects = NULL;
13381552Smws cip->ci_nsuspects = 0;
13391552Smws }
13401552Smws
13410Sstevel@tonic-gate fmd_case_t *
fmd_case_recreate(fmd_module_t * mp,fmd_xprt_t * xp,uint_t state,const char * uuid,const char * code)13421193Smws fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
13431193Smws uint_t state, const char *uuid, const char *code)
13440Sstevel@tonic-gate {
13450Sstevel@tonic-gate fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
13461193Smws fmd_case_impl_t *eip;
13471193Smws
13480Sstevel@tonic-gate (void) pthread_mutex_init(&cip->ci_lock, NULL);
13490Sstevel@tonic-gate fmd_buf_hash_create(&cip->ci_bufs);
13500Sstevel@tonic-gate
13510Sstevel@tonic-gate fmd_module_hold(mp);
13520Sstevel@tonic-gate cip->ci_mod = mp;
13531193Smws cip->ci_xprt = xp;
13540Sstevel@tonic-gate cip->ci_refs = 1;
13551193Smws cip->ci_state = state;
13560Sstevel@tonic-gate cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
13570Sstevel@tonic-gate cip->ci_uuidlen = strlen(cip->ci_uuid);
13581193Smws cip->ci_code = fmd_strdup(code, FMD_SLEEP);
13591193Smws cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
13601193Smws
13611193Smws if (state > FMD_CASE_CLOSE_WAIT)
13621193Smws cip->ci_flags |= FMD_CF_SOLVED;
13631193Smws
13641193Smws /*
13651193Smws * Insert the case into the global case hash. If the specified UUID is
13661193Smws * already present, check to see if it is an orphan: if so, reclaim it;
13671193Smws * otherwise if it is owned by a different module then return NULL.
13681193Smws */
13691193Smws if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
13701193Smws (void) pthread_mutex_lock(&cip->ci_lock);
13711193Smws cip->ci_refs--; /* decrement to zero */
13721193Smws fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
13731193Smws
13741193Smws cip = eip; /* switch 'cip' to the existing case */
13751193Smws (void) pthread_mutex_lock(&cip->ci_lock);
13761193Smws
13771193Smws /*
13781193Smws * If the ASRU cache is trying to recreate an orphan, then just
13791193Smws * return the existing case that we found without changing it.
13801193Smws */
13811193Smws if (mp == fmd.d_rmod) {
13827275Sstephh /*
13839120SStephen.Hanson@Sun.COM * In case the case has already been created from
13849120SStephen.Hanson@Sun.COM * a checkpoint file we need to set up code now.
13859120SStephen.Hanson@Sun.COM */
13869120SStephen.Hanson@Sun.COM if (cip->ci_state < FMD_CASE_CLOSED) {
13879120SStephen.Hanson@Sun.COM if (code != NULL && cip->ci_code == NULL) {
13889120SStephen.Hanson@Sun.COM cip->ci_code = fmd_strdup(code,
13899120SStephen.Hanson@Sun.COM FMD_SLEEP);
13909120SStephen.Hanson@Sun.COM cip->ci_codelen = cip->ci_code ?
13919120SStephen.Hanson@Sun.COM strlen(cip->ci_code) + 1 : 0;
13929120SStephen.Hanson@Sun.COM fmd_case_code_hash_insert(fmd.d_cases,
13939120SStephen.Hanson@Sun.COM cip);
13949120SStephen.Hanson@Sun.COM }
13959120SStephen.Hanson@Sun.COM }
13969120SStephen.Hanson@Sun.COM
13979120SStephen.Hanson@Sun.COM /*
13987275Sstephh * When recreating an orphan case, state passed in may
139910656SStephen.Hanson@Sun.COM * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
14007275Sstephh * any suspects are still CLOSED (faulty) then the
14017275Sstephh * overall state needs to be CLOSED.
14027275Sstephh */
140310656SStephen.Hanson@Sun.COM if ((cip->ci_state == FMD_CASE_REPAIRED ||
140410656SStephen.Hanson@Sun.COM cip->ci_state == FMD_CASE_RESOLVED) &&
14059120SStephen.Hanson@Sun.COM state == FMD_CASE_CLOSED)
14067275Sstephh cip->ci_state = FMD_CASE_CLOSED;
14071193Smws (void) pthread_mutex_unlock(&cip->ci_lock);
14081193Smws fmd_case_rele((fmd_case_t *)cip);
14091193Smws return ((fmd_case_t *)cip);
14101193Smws }
14111193Smws
14121193Smws /*
14131193Smws * If the existing case isn't an orphan or is being proxied,
14141193Smws * then we have a UUID conflict: return failure to the caller.
14151193Smws */
14161193Smws if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
14171193Smws (void) pthread_mutex_unlock(&cip->ci_lock);
14181193Smws fmd_case_rele((fmd_case_t *)cip);
14191193Smws return (NULL);
14201193Smws }
14211193Smws
14221193Smws /*
14231193Smws * If the new module is reclaiming an orphaned case, remove
14241193Smws * the case from the root module, switch ci_mod, and then fall
14251193Smws * through to adding the case to the new owner module 'mp'.
14261193Smws */
14271193Smws fmd_module_lock(cip->ci_mod);
14281193Smws fmd_list_delete(&cip->ci_mod->mod_cases, cip);
14291193Smws fmd_module_unlock(cip->ci_mod);
14301193Smws
14311193Smws fmd_module_rele(cip->ci_mod);
14321193Smws cip->ci_mod = mp;
14331193Smws fmd_module_hold(mp);
14341193Smws
14357913SStephen.Hanson@Sun.COM /*
14367913SStephen.Hanson@Sun.COM * It's possible that fmd crashed or was restarted during a
14377913SStephen.Hanson@Sun.COM * previous solve operation between the asru cache being created
14387913SStephen.Hanson@Sun.COM * and the ckpt file being updated to SOLVED. Thus when the DE
14397913SStephen.Hanson@Sun.COM * recreates the case here from the checkpoint file, the state
14407913SStephen.Hanson@Sun.COM * will be UNSOLVED and yet we are having to reclaim because
14417913SStephen.Hanson@Sun.COM * the case was in the asru cache. If this happens, revert the
14427913SStephen.Hanson@Sun.COM * case back to the UNSOLVED state and let the DE solve it again
14437913SStephen.Hanson@Sun.COM */
14447913SStephen.Hanson@Sun.COM if (state == FMD_CASE_UNSOLVED) {
14457913SStephen.Hanson@Sun.COM fmd_asru_hash_delete_case(fmd.d_asrus,
14467913SStephen.Hanson@Sun.COM (fmd_case_t *)cip);
14477913SStephen.Hanson@Sun.COM fmd_case_destroy_suspects(cip);
14487913SStephen.Hanson@Sun.COM fmd_case_code_hash_delete(fmd.d_cases, cip);
14497913SStephen.Hanson@Sun.COM fmd_free(cip->ci_code, cip->ci_codelen);
14507913SStephen.Hanson@Sun.COM cip->ci_code = NULL;
14517913SStephen.Hanson@Sun.COM cip->ci_codelen = 0;
14527913SStephen.Hanson@Sun.COM cip->ci_tv_valid = 0;
14537913SStephen.Hanson@Sun.COM }
14547913SStephen.Hanson@Sun.COM
14551552Smws cip->ci_state = state;
14561552Smws
14571193Smws (void) pthread_mutex_unlock(&cip->ci_lock);
14581193Smws fmd_case_rele((fmd_case_t *)cip);
14596228Sstephh } else {
14606228Sstephh /*
14616228Sstephh * add into hash of solved cases
14626228Sstephh */
14636228Sstephh if (cip->ci_code)
14646228Sstephh fmd_case_code_hash_insert(fmd.d_cases, cip);
14651193Smws }
14660Sstevel@tonic-gate
14670Sstevel@tonic-gate ASSERT(fmd_module_locked(mp));
14680Sstevel@tonic-gate fmd_list_append(&mp->mod_cases, cip);
14690Sstevel@tonic-gate
14700Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
14710Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
14720Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
14730Sstevel@tonic-gate
14740Sstevel@tonic-gate return ((fmd_case_t *)cip);
14750Sstevel@tonic-gate }
14760Sstevel@tonic-gate
14770Sstevel@tonic-gate void
fmd_case_destroy(fmd_case_t * cp,int visible)14781193Smws fmd_case_destroy(fmd_case_t *cp, int visible)
14790Sstevel@tonic-gate {
14800Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
14810Sstevel@tonic-gate fmd_case_item_t *cit, *ncit;
14820Sstevel@tonic-gate
14830Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cip->ci_lock));
14840Sstevel@tonic-gate ASSERT(cip->ci_refs == 0);
14850Sstevel@tonic-gate
14861193Smws if (visible) {
14871193Smws TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
14881193Smws fmd_case_hash_delete(fmd.d_cases, cip);
14891193Smws }
14900Sstevel@tonic-gate
14910Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = ncit) {
14920Sstevel@tonic-gate ncit = cit->cit_next;
14930Sstevel@tonic-gate fmd_event_rele(cit->cit_event);
14940Sstevel@tonic-gate fmd_free(cit, sizeof (fmd_case_item_t));
14950Sstevel@tonic-gate }
14960Sstevel@tonic-gate
14971552Smws fmd_case_destroy_suspects(cip);
14980Sstevel@tonic-gate
14990Sstevel@tonic-gate if (cip->ci_principal != NULL)
15000Sstevel@tonic-gate fmd_event_rele(cip->ci_principal);
15010Sstevel@tonic-gate
15020Sstevel@tonic-gate fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
15031193Smws fmd_free(cip->ci_code, cip->ci_codelen);
15041429Smws (void) fmd_buf_hash_destroy(&cip->ci_bufs);
15050Sstevel@tonic-gate
15060Sstevel@tonic-gate fmd_module_rele(cip->ci_mod);
15070Sstevel@tonic-gate fmd_free(cip, sizeof (fmd_case_impl_t));
15080Sstevel@tonic-gate }
15090Sstevel@tonic-gate
15100Sstevel@tonic-gate void
fmd_case_hold(fmd_case_t * cp)15110Sstevel@tonic-gate fmd_case_hold(fmd_case_t *cp)
15120Sstevel@tonic-gate {
15130Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15140Sstevel@tonic-gate
15150Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
15166081Scy152378 fmd_case_hold_locked(cp);
15170Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
15180Sstevel@tonic-gate }
15190Sstevel@tonic-gate
15200Sstevel@tonic-gate void
fmd_case_hold_locked(fmd_case_t * cp)15211193Smws fmd_case_hold_locked(fmd_case_t *cp)
15221193Smws {
15231193Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15241193Smws
15251193Smws ASSERT(MUTEX_HELD(&cip->ci_lock));
15266081Scy152378 if (cip->ci_flags & FMD_CF_DELETING)
15276081Scy152378 fmd_panic("attempt to hold a deleting case %p (%s)\n",
15286081Scy152378 (void *)cip, cip->ci_uuid);
15291193Smws cip->ci_refs++;
15301193Smws ASSERT(cip->ci_refs != 0);
15311193Smws }
15321193Smws
15336081Scy152378 static fmd_case_impl_t *
fmd_case_tryhold(fmd_case_impl_t * cip)15346081Scy152378 fmd_case_tryhold(fmd_case_impl_t *cip)
15356081Scy152378 {
15366081Scy152378 /*
15376081Scy152378 * If the case's "deleting" bit is unset, hold and return case,
15386081Scy152378 * otherwise, return NULL.
15396081Scy152378 */
15406081Scy152378 (void) pthread_mutex_lock(&cip->ci_lock);
15416081Scy152378 if (cip->ci_flags & FMD_CF_DELETING) {
15426081Scy152378 (void) pthread_mutex_unlock(&cip->ci_lock);
15436081Scy152378 cip = NULL;
15446081Scy152378 } else {
15456081Scy152378 fmd_case_hold_locked((fmd_case_t *)cip);
15466081Scy152378 (void) pthread_mutex_unlock(&cip->ci_lock);
15476081Scy152378 }
15486081Scy152378 return (cip);
15496081Scy152378 }
15506081Scy152378
15511193Smws void
fmd_case_rele(fmd_case_t * cp)15520Sstevel@tonic-gate fmd_case_rele(fmd_case_t *cp)
15530Sstevel@tonic-gate {
15540Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15550Sstevel@tonic-gate
15560Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
15570Sstevel@tonic-gate ASSERT(cip->ci_refs != 0);
15580Sstevel@tonic-gate
15590Sstevel@tonic-gate if (--cip->ci_refs == 0)
15601193Smws fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
15610Sstevel@tonic-gate else
15620Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
15630Sstevel@tonic-gate }
15640Sstevel@tonic-gate
15656228Sstephh void
fmd_case_rele_locked(fmd_case_t * cp)15666228Sstephh fmd_case_rele_locked(fmd_case_t *cp)
15676228Sstephh {
15686228Sstephh fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15696228Sstephh
15706228Sstephh ASSERT(MUTEX_HELD(&cip->ci_lock));
15716228Sstephh --cip->ci_refs;
15726228Sstephh ASSERT(cip->ci_refs != 0);
15736228Sstephh }
15746228Sstephh
15751414Scindi int
fmd_case_insert_principal(fmd_case_t * cp,fmd_event_t * ep)15760Sstevel@tonic-gate fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
15770Sstevel@tonic-gate {
15780Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15791414Scindi fmd_case_item_t *cit;
15800Sstevel@tonic-gate fmd_event_t *oep;
15810Sstevel@tonic-gate uint_t state;
15821414Scindi int new;
15830Sstevel@tonic-gate
15840Sstevel@tonic-gate fmd_event_hold(ep);
15850Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
15860Sstevel@tonic-gate
15871193Smws if (cip->ci_flags & FMD_CF_SOLVED)
15880Sstevel@tonic-gate state = FMD_EVS_DIAGNOSED;
15890Sstevel@tonic-gate else
15900Sstevel@tonic-gate state = FMD_EVS_ACCEPTED;
15910Sstevel@tonic-gate
15920Sstevel@tonic-gate oep = cip->ci_principal;
15930Sstevel@tonic-gate cip->ci_principal = ep;
15940Sstevel@tonic-gate
15951414Scindi for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
15961414Scindi if (cit->cit_event == ep)
15971414Scindi break;
15981414Scindi }
15991414Scindi
16000Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY;
16011414Scindi new = cit == NULL && ep != oep;
16021414Scindi
16030Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
16040Sstevel@tonic-gate
16050Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod);
16060Sstevel@tonic-gate fmd_event_transition(ep, state);
16070Sstevel@tonic-gate
16080Sstevel@tonic-gate if (oep != NULL)
16090Sstevel@tonic-gate fmd_event_rele(oep);
16101414Scindi
16111414Scindi return (new);
16120Sstevel@tonic-gate }
16130Sstevel@tonic-gate
16141414Scindi int
fmd_case_insert_event(fmd_case_t * cp,fmd_event_t * ep)16150Sstevel@tonic-gate fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
16160Sstevel@tonic-gate {
16170Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16181414Scindi fmd_case_item_t *cit;
16190Sstevel@tonic-gate uint_t state;
16201414Scindi int new;
162110928SStephen.Hanson@Sun.COM boolean_t injected;
16221414Scindi
16231414Scindi (void) pthread_mutex_lock(&cip->ci_lock);
16241414Scindi
16251414Scindi if (cip->ci_flags & FMD_CF_SOLVED)
16261414Scindi state = FMD_EVS_DIAGNOSED;
16271414Scindi else
16281414Scindi state = FMD_EVS_ACCEPTED;
16290Sstevel@tonic-gate
16301414Scindi for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
16311414Scindi if (cit->cit_event == ep)
16321414Scindi break;
16331414Scindi }
16341414Scindi
16351414Scindi new = cit == NULL && ep != cip->ci_principal;
16361414Scindi
16371414Scindi /*
16381414Scindi * If the event is already in the case or the case is already solved,
16391414Scindi * there is no reason to save it: just transition it appropriately.
16401414Scindi */
16411414Scindi if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
16421414Scindi (void) pthread_mutex_unlock(&cip->ci_lock);
16431414Scindi fmd_event_transition(ep, state);
16441414Scindi return (new);
16451414Scindi }
16461414Scindi
16471414Scindi cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
16480Sstevel@tonic-gate fmd_event_hold(ep);
16490Sstevel@tonic-gate
165010928SStephen.Hanson@Sun.COM if (nvlist_lookup_boolean_value(((fmd_event_impl_t *)ep)->ev_nvl,
165110928SStephen.Hanson@Sun.COM "__injected", &injected) == 0 && injected)
165210928SStephen.Hanson@Sun.COM fmd_case_set_injected(cp);
165310928SStephen.Hanson@Sun.COM
16540Sstevel@tonic-gate cit->cit_next = cip->ci_items;
16550Sstevel@tonic-gate cit->cit_event = ep;
16560Sstevel@tonic-gate
16570Sstevel@tonic-gate cip->ci_items = cit;
16580Sstevel@tonic-gate cip->ci_nitems++;
16590Sstevel@tonic-gate
16600Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY;
16610Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
16620Sstevel@tonic-gate
16630Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod);
16640Sstevel@tonic-gate fmd_event_transition(ep, state);
16651414Scindi
16661414Scindi return (new);
16670Sstevel@tonic-gate }
16680Sstevel@tonic-gate
16690Sstevel@tonic-gate void
fmd_case_insert_suspect(fmd_case_t * cp,nvlist_t * nvl)16700Sstevel@tonic-gate fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
16710Sstevel@tonic-gate {
16720Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16730Sstevel@tonic-gate fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
16740Sstevel@tonic-gate
16750Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
16766228Sstephh ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT);
16770Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY;
16780Sstevel@tonic-gate
16790Sstevel@tonic-gate cis->cis_next = cip->ci_suspects;
16800Sstevel@tonic-gate cis->cis_nvl = nvl;
16810Sstevel@tonic-gate
16820Sstevel@tonic-gate cip->ci_suspects = cis;
16830Sstevel@tonic-gate cip->ci_nsuspects++;
16840Sstevel@tonic-gate
16850Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
16869120SStephen.Hanson@Sun.COM if (cip->ci_xprt == NULL)
16879120SStephen.Hanson@Sun.COM fmd_module_setcdirty(cip->ci_mod);
16880Sstevel@tonic-gate }
16890Sstevel@tonic-gate
16900Sstevel@tonic-gate void
fmd_case_recreate_suspect(fmd_case_t * cp,nvlist_t * nvl)16911193Smws fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
16921193Smws {
16931193Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16941193Smws fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
16955255Sstephh boolean_t b;
16961193Smws
16971193Smws (void) pthread_mutex_lock(&cip->ci_lock);
16981193Smws
16991193Smws cis->cis_next = cip->ci_suspects;
17001193Smws cis->cis_nvl = nvl;
17011193Smws
17025255Sstephh if (nvlist_lookup_boolean_value(nvl,
17035255Sstephh FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
17045255Sstephh cip->ci_flags |= FMD_CF_INVISIBLE;
17055255Sstephh
17061193Smws cip->ci_suspects = cis;
17071193Smws cip->ci_nsuspects++;
17081193Smws
17091193Smws (void) pthread_mutex_unlock(&cip->ci_lock);
17101193Smws }
17111193Smws
17121193Smws void
fmd_case_reset_suspects(fmd_case_t * cp)17130Sstevel@tonic-gate fmd_case_reset_suspects(fmd_case_t *cp)
17140Sstevel@tonic-gate {
17150Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
17160Sstevel@tonic-gate
17170Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
17180Sstevel@tonic-gate ASSERT(cip->ci_state < FMD_CASE_SOLVED);
17190Sstevel@tonic-gate
17201552Smws fmd_case_destroy_suspects(cip);
17210Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY;
17220Sstevel@tonic-gate
17230Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
17240Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod);
17250Sstevel@tonic-gate }
17260Sstevel@tonic-gate
17276228Sstephh /*ARGSUSED*/
17286228Sstephh static void
fmd_case_unusable(fmd_asru_link_t * alp,void * arg)17296228Sstephh fmd_case_unusable(fmd_asru_link_t *alp, void *arg)
17306228Sstephh {
17316228Sstephh (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
17326228Sstephh }
17336228Sstephh
17341193Smws /*
17351193Smws * Grab ci_lock and update the case state and set the dirty bit. Then perform
17361193Smws * whatever actions and emit whatever events are appropriate for the state.
17371193Smws * Refer to the topmost block comment explaining the state machine for details.
17381193Smws */
17390Sstevel@tonic-gate void
fmd_case_transition(fmd_case_t * cp,uint_t state,uint_t flags)17401193Smws fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
17410Sstevel@tonic-gate {
17420Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
17431193Smws fmd_case_item_t *cit;
17441193Smws fmd_event_t *e;
17457275Sstephh int resolved = 0;
17467275Sstephh int any_unusable_and_present = 0;
17470Sstevel@tonic-gate
17487275Sstephh ASSERT(state <= FMD_CASE_RESOLVED);
17490Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
17501552Smws
17515255Sstephh if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
17529120SStephen.Hanson@Sun.COM flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED);
17531552Smws
17541193Smws cip->ci_flags |= flags;
17550Sstevel@tonic-gate
17560Sstevel@tonic-gate if (cip->ci_state >= state) {
17570Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
17580Sstevel@tonic-gate return; /* already in specified state */
17590Sstevel@tonic-gate }
17600Sstevel@tonic-gate
17610Sstevel@tonic-gate TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
17620Sstevel@tonic-gate _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
17630Sstevel@tonic-gate
17640Sstevel@tonic-gate cip->ci_state = state;
17650Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY;
17660Sstevel@tonic-gate
17671193Smws if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
17681193Smws fmd_module_setcdirty(cip->ci_mod);
17690Sstevel@tonic-gate
17701193Smws switch (state) {
17711193Smws case FMD_CASE_SOLVED:
17720Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
17730Sstevel@tonic-gate fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
17740Sstevel@tonic-gate
17750Sstevel@tonic-gate if (cip->ci_principal != NULL) {
17760Sstevel@tonic-gate fmd_event_transition(cip->ci_principal,
17770Sstevel@tonic-gate FMD_EVS_DIAGNOSED);
17780Sstevel@tonic-gate }
17790Sstevel@tonic-gate break;
17800Sstevel@tonic-gate
17811193Smws case FMD_CASE_CLOSE_WAIT:
17821193Smws /*
17831193Smws * If the case was never solved, do not change ASRUs.
17841193Smws * If the case was never fmd_case_closed, do not change ASRUs.
17851193Smws * If the case was repaired, do not change ASRUs.
17861193Smws */
17871193Smws if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
17886228Sstephh FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED))
17896228Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
17906228Sstephh fmd_case_unusable, NULL);
17910Sstevel@tonic-gate
17920Sstevel@tonic-gate /*
17931193Smws * If an orphaned case transitions to CLOSE_WAIT, the owning
179411416SStephen.Hanson@Sun.COM * module is no longer loaded: continue on to CASE_CLOSED or
179511416SStephen.Hanson@Sun.COM * CASE_REPAIRED as appropriate.
17961193Smws */
179711416SStephen.Hanson@Sun.COM if (fmd_case_orphaned(cp)) {
179811416SStephen.Hanson@Sun.COM if (cip->ci_flags & FMD_CF_REPAIRED) {
179911416SStephen.Hanson@Sun.COM state = cip->ci_state = FMD_CASE_REPAIRED;
180011416SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "case %s %s->%s",
180111416SStephen.Hanson@Sun.COM cip->ci_uuid,
180211416SStephen.Hanson@Sun.COM _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
180311416SStephen.Hanson@Sun.COM _fmd_case_snames[FMD_CASE_REPAIRED]));
180411416SStephen.Hanson@Sun.COM goto do_repair;
180511416SStephen.Hanson@Sun.COM } else {
180611416SStephen.Hanson@Sun.COM state = cip->ci_state = FMD_CASE_CLOSED;
180711416SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "case %s %s->%s",
180811416SStephen.Hanson@Sun.COM cip->ci_uuid,
180911416SStephen.Hanson@Sun.COM _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
181011416SStephen.Hanson@Sun.COM _fmd_case_snames[FMD_CASE_CLOSED]));
181111416SStephen.Hanson@Sun.COM }
181211416SStephen.Hanson@Sun.COM }
18130Sstevel@tonic-gate break;
18141193Smws
18151193Smws case FMD_CASE_REPAIRED:
181611416SStephen.Hanson@Sun.COM do_repair:
18179120SStephen.Hanson@Sun.COM ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp));
18187275Sstephh
18197275Sstephh /*
18209120SStephen.Hanson@Sun.COM * If we've been requested to transition straight on to the
18219120SStephen.Hanson@Sun.COM * RESOLVED state (which can happen with fault proxying where a
18229120SStephen.Hanson@Sun.COM * list.resolved or a uuresolved is received from the other
18239120SStephen.Hanson@Sun.COM * side), or if all suspects are already either usable or not
18249120SStephen.Hanson@Sun.COM * present then transition straight to RESOLVED state,
18259120SStephen.Hanson@Sun.COM * publishing both the list.repaired and list.resolved. For a
18269120SStephen.Hanson@Sun.COM * proxy, if we discover here that all suspects are already
18279120SStephen.Hanson@Sun.COM * either usable or not present, notify the diag side instead
18289120SStephen.Hanson@Sun.COM * using fmd_xprt_uuresolved().
18297275Sstephh */
18309120SStephen.Hanson@Sun.COM if (flags & FMD_CF_RESOLVED) {
183110656SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL)
18329120SStephen.Hanson@Sun.COM fmd_list_delete(&cip->ci_mod->mod_cases, cip);
18339120SStephen.Hanson@Sun.COM } else {
18349120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
18359120SStephen.Hanson@Sun.COM fmd_case_unusable_and_present,
18369120SStephen.Hanson@Sun.COM &any_unusable_and_present);
18379120SStephen.Hanson@Sun.COM if (any_unusable_and_present)
18389120SStephen.Hanson@Sun.COM break;
18399120SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL) {
18409120SStephen.Hanson@Sun.COM fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
18419120SStephen.Hanson@Sun.COM break;
18429120SStephen.Hanson@Sun.COM }
18439120SStephen.Hanson@Sun.COM }
18447275Sstephh
18457275Sstephh cip->ci_state = FMD_CASE_RESOLVED;
18467275Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
18477275Sstephh fmd_case_publish(cp, state);
18487275Sstephh TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
18497275Sstephh _fmd_case_snames[FMD_CASE_REPAIRED],
18507275Sstephh _fmd_case_snames[FMD_CASE_RESOLVED]));
18517275Sstephh state = FMD_CASE_RESOLVED;
18527275Sstephh resolved = 1;
18537275Sstephh (void) pthread_mutex_lock(&cip->ci_lock);
18547275Sstephh break;
18557275Sstephh
18567275Sstephh case FMD_CASE_RESOLVED:
18579120SStephen.Hanson@Sun.COM /*
18589120SStephen.Hanson@Sun.COM * For a proxy, no need to check that all suspects are already
18599120SStephen.Hanson@Sun.COM * either usable or not present - this request has come from
18609120SStephen.Hanson@Sun.COM * the diagnosing side which makes the final decision on this.
18619120SStephen.Hanson@Sun.COM */
18629120SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL) {
18639120SStephen.Hanson@Sun.COM fmd_list_delete(&cip->ci_mod->mod_cases, cip);
18649120SStephen.Hanson@Sun.COM resolved = 1;
18659120SStephen.Hanson@Sun.COM break;
18669120SStephen.Hanson@Sun.COM }
18679120SStephen.Hanson@Sun.COM
18687275Sstephh ASSERT(fmd_case_orphaned(cp));
18697275Sstephh
18707275Sstephh /*
18717275Sstephh * If all suspects are already either usable or not present then
18727275Sstephh * carry on, publish list.resolved and discard the case.
18737275Sstephh */
18747275Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
18757275Sstephh fmd_case_unusable_and_present, &any_unusable_and_present);
18767275Sstephh if (any_unusable_and_present) {
18777275Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
18787275Sstephh return;
18797275Sstephh }
18807275Sstephh
18817275Sstephh resolved = 1;
18821193Smws break;
18830Sstevel@tonic-gate }
18840Sstevel@tonic-gate
18850Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
18860Sstevel@tonic-gate
18870Sstevel@tonic-gate /*
18881193Smws * If the module has initialized, then publish the appropriate event
18891193Smws * for the new case state. If not, we are being called from the
18901193Smws * checkpoint code during module load, in which case the module's
18911193Smws * _fmd_init() routine hasn't finished yet, and our event dictionaries
18921193Smws * may not be open yet, which will prevent us from computing the event
18931193Smws * code. Defer the call to fmd_case_publish() by enqueuing a PUBLISH
18941193Smws * event in our queue: this won't be processed until _fmd_init is done.
18950Sstevel@tonic-gate */
18960Sstevel@tonic-gate if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
18970Sstevel@tonic-gate fmd_case_publish(cp, state);
18981193Smws else {
18991193Smws fmd_case_hold(cp);
19001193Smws e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
19011193Smws fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
19021193Smws }
19031193Smws
19047275Sstephh if (resolved) {
190510656SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL) {
190610656SStephen.Hanson@Sun.COM /*
190710656SStephen.Hanson@Sun.COM * If we transitioned to RESOLVED, adjust the reference
190810656SStephen.Hanson@Sun.COM * count to reflect our removal from
190910656SStephen.Hanson@Sun.COM * fmd.d_rmod->mod_cases above. If the caller has not
191010656SStephen.Hanson@Sun.COM * placed an additional hold on the case, it will now
191110656SStephen.Hanson@Sun.COM * be freed.
191210656SStephen.Hanson@Sun.COM */
191310656SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
191410656SStephen.Hanson@Sun.COM fmd_asru_hash_delete_case(fmd.d_asrus, cp);
191510656SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
191610656SStephen.Hanson@Sun.COM fmd_case_rele(cp);
191710656SStephen.Hanson@Sun.COM } else {
191810656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
191910656SStephen.Hanson@Sun.COM fmd_asru_log_resolved, NULL);
192010656SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
192110656SStephen.Hanson@Sun.COM /* mark as "ready to be discarded */
192210656SStephen.Hanson@Sun.COM cip->ci_flags |= FMD_CF_RES_CMPL;
192310656SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
192410656SStephen.Hanson@Sun.COM }
192510656SStephen.Hanson@Sun.COM }
192610656SStephen.Hanson@Sun.COM }
192710656SStephen.Hanson@Sun.COM
192810656SStephen.Hanson@Sun.COM /*
192910656SStephen.Hanson@Sun.COM * Discard any case if it is in RESOLVED state (and if check_if_aged argument
193010656SStephen.Hanson@Sun.COM * is set if all suspects have passed the rsrc.aged time).
193110656SStephen.Hanson@Sun.COM */
193210656SStephen.Hanson@Sun.COM void
fmd_case_discard_resolved(fmd_case_t * cp,void * arg)193310656SStephen.Hanson@Sun.COM fmd_case_discard_resolved(fmd_case_t *cp, void *arg)
193410656SStephen.Hanson@Sun.COM {
193510656SStephen.Hanson@Sun.COM int check_if_aged = *(int *)arg;
193610656SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
193710656SStephen.Hanson@Sun.COM
193810656SStephen.Hanson@Sun.COM /*
193910656SStephen.Hanson@Sun.COM * First check if case has completed transition to resolved.
194010656SStephen.Hanson@Sun.COM */
194110656SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
194210656SStephen.Hanson@Sun.COM if (!(cip->ci_flags & FMD_CF_RES_CMPL)) {
19436228Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
194410656SStephen.Hanson@Sun.COM return;
19456228Sstephh }
194610656SStephen.Hanson@Sun.COM
194710656SStephen.Hanson@Sun.COM /*
194810656SStephen.Hanson@Sun.COM * Now if check_is_aged is set, see if all suspects have aged.
194910656SStephen.Hanson@Sun.COM */
195010656SStephen.Hanson@Sun.COM if (check_if_aged) {
195110656SStephen.Hanson@Sun.COM int aged = 1;
195210656SStephen.Hanson@Sun.COM
195310656SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
195410656SStephen.Hanson@Sun.COM fmd_asru_check_if_aged, &aged);
195510656SStephen.Hanson@Sun.COM if (!aged) {
195610656SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
195710656SStephen.Hanson@Sun.COM return;
195810656SStephen.Hanson@Sun.COM }
195910656SStephen.Hanson@Sun.COM }
196010656SStephen.Hanson@Sun.COM
196110656SStephen.Hanson@Sun.COM /*
196210656SStephen.Hanson@Sun.COM * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
196310656SStephen.Hanson@Sun.COM * do it twice.
196410656SStephen.Hanson@Sun.COM */
196510656SStephen.Hanson@Sun.COM fmd_module_lock(cip->ci_mod);
196610656SStephen.Hanson@Sun.COM fmd_list_delete(&cip->ci_mod->mod_cases, cip);
196710656SStephen.Hanson@Sun.COM fmd_module_unlock(cip->ci_mod);
196810656SStephen.Hanson@Sun.COM fmd_asru_hash_delete_case(fmd.d_asrus, cp);
196910656SStephen.Hanson@Sun.COM cip->ci_flags &= ~FMD_CF_RES_CMPL;
197010656SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
197110656SStephen.Hanson@Sun.COM fmd_case_rele(cp);
19720Sstevel@tonic-gate }
19730Sstevel@tonic-gate
19741429Smws /*
19751429Smws * Transition the specified case to *at least* the specified state by first
19761429Smws * re-validating the suspect list using the resource cache. This function is
19771429Smws * employed by the checkpoint code when restoring a saved, solved case to see
19781429Smws * if the state of the case has effectively changed while fmd was not running
19797275Sstephh * or the module was not loaded.
19801429Smws */
19811429Smws void
fmd_case_transition_update(fmd_case_t * cp,uint_t state,uint_t flags)19821429Smws fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
19831429Smws {
19841429Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
19851429Smws
19861429Smws int usable = 0; /* are any suspects usable? */
19871429Smws
19881429Smws ASSERT(state >= FMD_CASE_SOLVED);
19891429Smws (void) pthread_mutex_lock(&cip->ci_lock);
19901429Smws
19916228Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
19921429Smws
19931429Smws (void) pthread_mutex_unlock(&cip->ci_lock);
19941429Smws
19957275Sstephh if (!usable) {
19961429Smws state = MAX(state, FMD_CASE_CLOSE_WAIT);
19971429Smws flags |= FMD_CF_ISOLATED;
19981429Smws }
19991429Smws
20001429Smws fmd_case_transition(cp, state, flags);
20011429Smws }
20021429Smws
20030Sstevel@tonic-gate void
fmd_case_setdirty(fmd_case_t * cp)20040Sstevel@tonic-gate fmd_case_setdirty(fmd_case_t *cp)
20050Sstevel@tonic-gate {
20060Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20070Sstevel@tonic-gate
20080Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
20090Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY;
20100Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
20110Sstevel@tonic-gate
20120Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod);
20130Sstevel@tonic-gate }
20140Sstevel@tonic-gate
20150Sstevel@tonic-gate void
fmd_case_clrdirty(fmd_case_t * cp)20160Sstevel@tonic-gate fmd_case_clrdirty(fmd_case_t *cp)
20170Sstevel@tonic-gate {
20180Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20190Sstevel@tonic-gate
20200Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
20210Sstevel@tonic-gate cip->ci_flags &= ~FMD_CF_DIRTY;
20220Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
20230Sstevel@tonic-gate }
20240Sstevel@tonic-gate
20250Sstevel@tonic-gate void
fmd_case_commit(fmd_case_t * cp)20260Sstevel@tonic-gate fmd_case_commit(fmd_case_t *cp)
20270Sstevel@tonic-gate {
20280Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20290Sstevel@tonic-gate fmd_case_item_t *cit;
20300Sstevel@tonic-gate
20310Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
20320Sstevel@tonic-gate
20330Sstevel@tonic-gate if (cip->ci_flags & FMD_CF_DIRTY) {
20340Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
20350Sstevel@tonic-gate fmd_event_commit(cit->cit_event);
20360Sstevel@tonic-gate
20370Sstevel@tonic-gate if (cip->ci_principal != NULL)
20380Sstevel@tonic-gate fmd_event_commit(cip->ci_principal);
20390Sstevel@tonic-gate
20400Sstevel@tonic-gate fmd_buf_hash_commit(&cip->ci_bufs);
20410Sstevel@tonic-gate cip->ci_flags &= ~FMD_CF_DIRTY;
20420Sstevel@tonic-gate }
20430Sstevel@tonic-gate
20440Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
20450Sstevel@tonic-gate }
20460Sstevel@tonic-gate
20470Sstevel@tonic-gate /*
20489120SStephen.Hanson@Sun.COM * On proxy side, send back repair/acquit/etc request to diagnosing side
20499120SStephen.Hanson@Sun.COM */
20509120SStephen.Hanson@Sun.COM void
fmd_case_xprt_updated(fmd_case_t * cp)20519120SStephen.Hanson@Sun.COM fmd_case_xprt_updated(fmd_case_t *cp)
20529120SStephen.Hanson@Sun.COM {
20539120SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20549120SStephen.Hanson@Sun.COM nvlist_t **nva;
20559120SStephen.Hanson@Sun.COM uint8_t *ba;
20569120SStephen.Hanson@Sun.COM int msg = B_TRUE;
20579120SStephen.Hanson@Sun.COM int count = 0;
20589120SStephen.Hanson@Sun.COM fmd_case_lst_t fcl;
20599120SStephen.Hanson@Sun.COM
20609120SStephen.Hanson@Sun.COM ASSERT(cip->ci_xprt != NULL);
20619120SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
20629120SStephen.Hanson@Sun.COM ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
20639120SStephen.Hanson@Sun.COM nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
20649120SStephen.Hanson@Sun.COM fcl.fcl_countp = &count;
20659120SStephen.Hanson@Sun.COM fcl.fcl_maxcount = cip->ci_nsuspects;
20669120SStephen.Hanson@Sun.COM fcl.fcl_msgp = &msg;
20679120SStephen.Hanson@Sun.COM fcl.fcl_ba = ba;
20689120SStephen.Hanson@Sun.COM fcl.fcl_nva = nva;
20699120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
20709120SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
20719120SStephen.Hanson@Sun.COM fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru,
20729120SStephen.Hanson@Sun.COM count);
20739120SStephen.Hanson@Sun.COM }
20749120SStephen.Hanson@Sun.COM
20759120SStephen.Hanson@Sun.COM /*
20769120SStephen.Hanson@Sun.COM * fmd_case_update_status() can be called on either the proxy side when a
20779120SStephen.Hanson@Sun.COM * list.suspect is received, or on the diagnosing side when an update request
20789120SStephen.Hanson@Sun.COM * is received from the proxy. It updates the status in the resource cache.
20799120SStephen.Hanson@Sun.COM */
20809120SStephen.Hanson@Sun.COM void
fmd_case_update_status(fmd_case_t * cp,uint8_t * statusp,uint8_t * proxy_asrup,uint8_t * diag_asrup)20819120SStephen.Hanson@Sun.COM fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup,
20829120SStephen.Hanson@Sun.COM uint8_t *diag_asrup)
20839120SStephen.Hanson@Sun.COM {
20849120SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20859120SStephen.Hanson@Sun.COM int count = 0;
20869120SStephen.Hanson@Sun.COM fmd_asru_update_status_t faus;
20879120SStephen.Hanson@Sun.COM
20889120SStephen.Hanson@Sun.COM /*
20899120SStephen.Hanson@Sun.COM * update status of resource cache entries
20909120SStephen.Hanson@Sun.COM */
20919120SStephen.Hanson@Sun.COM faus.faus_countp = &count;
20929120SStephen.Hanson@Sun.COM faus.faus_maxcount = cip->ci_nsuspects;
20939120SStephen.Hanson@Sun.COM faus.faus_ba = statusp;
20949120SStephen.Hanson@Sun.COM faus.faus_proxy_asru = proxy_asrup;
20959120SStephen.Hanson@Sun.COM faus.faus_diag_asru = diag_asrup;
20969120SStephen.Hanson@Sun.COM faus.faus_is_proxy = (cip->ci_xprt != NULL);
20979120SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
20989120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status,
20999120SStephen.Hanson@Sun.COM &faus);
21009120SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
21019120SStephen.Hanson@Sun.COM }
21029120SStephen.Hanson@Sun.COM
21039120SStephen.Hanson@Sun.COM /*
21049120SStephen.Hanson@Sun.COM * Called on either the proxy side or the diag side when a repair has taken
21059120SStephen.Hanson@Sun.COM * place on the other side but this side may know the asru "contains"
21069120SStephen.Hanson@Sun.COM * relationships.
21079120SStephen.Hanson@Sun.COM */
21089120SStephen.Hanson@Sun.COM void
fmd_case_update_containees(fmd_case_t * cp)21099120SStephen.Hanson@Sun.COM fmd_case_update_containees(fmd_case_t *cp)
21109120SStephen.Hanson@Sun.COM {
21119120SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21129120SStephen.Hanson@Sun.COM
21139120SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
21149120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
21159120SStephen.Hanson@Sun.COM fmd_asru_update_containees, NULL);
21169120SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
21179120SStephen.Hanson@Sun.COM }
21189120SStephen.Hanson@Sun.COM
21199120SStephen.Hanson@Sun.COM /*
21209120SStephen.Hanson@Sun.COM * fmd_case_close_status() is called on diagnosing side when proxy side
21219120SStephen.Hanson@Sun.COM * has had a uuclose. It updates the status in the resource cache.
21229120SStephen.Hanson@Sun.COM */
21239120SStephen.Hanson@Sun.COM void
fmd_case_close_status(fmd_case_t * cp)21249120SStephen.Hanson@Sun.COM fmd_case_close_status(fmd_case_t *cp)
21259120SStephen.Hanson@Sun.COM {
21269120SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21279120SStephen.Hanson@Sun.COM int count = 0;
21289120SStephen.Hanson@Sun.COM fmd_asru_close_status_t facs;
21299120SStephen.Hanson@Sun.COM
21309120SStephen.Hanson@Sun.COM /*
21319120SStephen.Hanson@Sun.COM * update status of resource cache entries
21329120SStephen.Hanson@Sun.COM */
21339120SStephen.Hanson@Sun.COM facs.facs_countp = &count;
21349120SStephen.Hanson@Sun.COM facs.facs_maxcount = cip->ci_nsuspects;
21359120SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
21369120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status,
21379120SStephen.Hanson@Sun.COM &facs);
21389120SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
21399120SStephen.Hanson@Sun.COM }
21409120SStephen.Hanson@Sun.COM
21419120SStephen.Hanson@Sun.COM /*
21420Sstevel@tonic-gate * Indicate that the case may need to change state because one or more of the
21430Sstevel@tonic-gate * ASRUs named as a suspect has changed state. We examine all the suspects
21440Sstevel@tonic-gate * and if none are still faulty, we initiate a case close transition.
21450Sstevel@tonic-gate */
21460Sstevel@tonic-gate void
fmd_case_update(fmd_case_t * cp)21470Sstevel@tonic-gate fmd_case_update(fmd_case_t *cp)
21480Sstevel@tonic-gate {
21490Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21501193Smws uint_t cstate;
21516228Sstephh int faulty = 0;
21520Sstevel@tonic-gate
21530Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
21541193Smws cstate = cip->ci_state;
21550Sstevel@tonic-gate
21569120SStephen.Hanson@Sun.COM if (cip->ci_state < FMD_CASE_SOLVED) {
21570Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
21581193Smws return; /* update is not appropriate */
21590Sstevel@tonic-gate }
21600Sstevel@tonic-gate
21616228Sstephh if (cip->ci_flags & FMD_CF_REPAIRED) {
21626228Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
21636228Sstephh return; /* already repaired */
21640Sstevel@tonic-gate }
21650Sstevel@tonic-gate
21669120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid));
21676228Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
21680Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
21690Sstevel@tonic-gate
21707275Sstephh if (faulty) {
21717275Sstephh nvlist_t *nvl;
21727275Sstephh fmd_event_t *e;
21737275Sstephh char *class;
21747275Sstephh
21759120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid));
21767275Sstephh nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
21777275Sstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
21787275Sstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
21797275Sstephh (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
21807275Sstephh fmd_log_append(fmd.d_fltlog, e, cp);
21817275Sstephh (void) pthread_rwlock_unlock(&fmd.d_log_lock);
21827275Sstephh fmd_dispq_dispatch(fmd.d_disp, e, class);
21831193Smws return; /* one or more suspects are still marked faulty */
21847275Sstephh }
21851193Smws
21861193Smws if (cstate == FMD_CASE_CLOSED)
21871193Smws fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
21881193Smws else
21891193Smws fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
21901193Smws }
21911193Smws
21921193Smws /*
21931193Smws * Delete a closed case from the module's case list once the fmdo_close() entry
21941193Smws * point has run to completion. If the case is owned by a transport module,
21951193Smws * tell the transport to proxy a case close on the other end of the transport.
21969120SStephen.Hanson@Sun.COM * Transition to the appropriate next state based on ci_flags. This
21971193Smws * function represents the end of CLOSE_WAIT and transitions the case to either
21981193Smws * CLOSED or REPAIRED or discards it entirely because it was never solved;
21991193Smws * refer to the topmost block comment explaining the state machine for details.
22001193Smws */
22011193Smws void
fmd_case_delete(fmd_case_t * cp)22021193Smws fmd_case_delete(fmd_case_t *cp)
22031193Smws {
22041193Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
22051429Smws fmd_modstat_t *msp;
22061429Smws size_t buftotal;
22071193Smws
22089120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid));
22091193Smws ASSERT(fmd_module_locked(cip->ci_mod));
22101193Smws fmd_list_delete(&cip->ci_mod->mod_cases, cip);
22111429Smws buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
22121429Smws
22131429Smws (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
22141429Smws msp = cip->ci_mod->mod_stats;
22151429Smws
22161429Smws ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0);
22171429Smws msp->ms_caseopen.fmds_value.ui64--;
22181429Smws
22191429Smws ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal);
22201429Smws msp->ms_buftotal.fmds_value.ui64 -= buftotal;
22211429Smws
22221429Smws (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
22231193Smws
22241193Smws if (cip->ci_xprt == NULL)
22251193Smws fmd_module_setcdirty(cip->ci_mod);
22261193Smws
22271193Smws fmd_module_rele(cip->ci_mod);
22281193Smws cip->ci_mod = fmd.d_rmod;
22291193Smws fmd_module_hold(cip->ci_mod);
22301193Smws
22311193Smws /*
22329120SStephen.Hanson@Sun.COM * If the case has been solved, then retain it
22331552Smws * on the root module's case list at least until we're transitioned.
22341552Smws * Otherwise free the case with our final fmd_case_rele() below.
22351552Smws */
22369120SStephen.Hanson@Sun.COM if (cip->ci_flags & FMD_CF_SOLVED) {
22371552Smws fmd_module_lock(cip->ci_mod);
22381552Smws fmd_list_append(&cip->ci_mod->mod_cases, cip);
22391552Smws fmd_module_unlock(cip->ci_mod);
22401552Smws fmd_case_hold(cp);
22411552Smws }
22421552Smws
22431552Smws /*
22449120SStephen.Hanson@Sun.COM * Transition onwards to REPAIRED or CLOSED as originally requested.
22459120SStephen.Hanson@Sun.COM * Note that for proxy case if we're transitioning to CLOSED it means
22469120SStephen.Hanson@Sun.COM * the case was isolated locally, so call fmd_xprt_uuclose() to notify
22479120SStephen.Hanson@Sun.COM * the diagnosing side. No need to notify the diagnosing side if we are
22489120SStephen.Hanson@Sun.COM * transitioning to REPAIRED as we only do this when requested to do
22499120SStephen.Hanson@Sun.COM * so by the diagnosing side anyway.
22501193Smws */
22519120SStephen.Hanson@Sun.COM if (cip->ci_flags & FMD_CF_REPAIRED)
22521193Smws fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
22539120SStephen.Hanson@Sun.COM else if (cip->ci_flags & FMD_CF_ISOLATED) {
22541193Smws fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
22559120SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL)
22569120SStephen.Hanson@Sun.COM fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
22579120SStephen.Hanson@Sun.COM }
22581193Smws
22591193Smws fmd_case_rele(cp);
22601193Smws }
22611193Smws
22621193Smws void
fmd_case_discard(fmd_case_t * cp,boolean_t delete_from_asru_cache)22639120SStephen.Hanson@Sun.COM fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache)
22641193Smws {
22651193Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
22661193Smws
22671193Smws (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
22681193Smws cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
22691193Smws (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
22701193Smws
22711193Smws ASSERT(fmd_module_locked(cip->ci_mod));
22721193Smws fmd_list_delete(&cip->ci_mod->mod_cases, cip);
22739120SStephen.Hanson@Sun.COM if (delete_from_asru_cache) {
22749120SStephen.Hanson@Sun.COM (void) pthread_mutex_lock(&cip->ci_lock);
22759120SStephen.Hanson@Sun.COM fmd_asru_hash_delete_case(fmd.d_asrus, cp);
22769120SStephen.Hanson@Sun.COM (void) pthread_mutex_unlock(&cip->ci_lock);
22779120SStephen.Hanson@Sun.COM }
22781193Smws fmd_case_rele(cp);
22790Sstevel@tonic-gate }
22800Sstevel@tonic-gate
22810Sstevel@tonic-gate /*
22820Sstevel@tonic-gate * Indicate that the problem corresponding to a case has been repaired by
22831193Smws * clearing the faulty bit on each ASRU named as a suspect. If the case hasn't
22841193Smws * already been closed, this function initiates the transition to CLOSE_WAIT.
22851193Smws * The caller must have the case held from fmd_case_hash_lookup(), so we can
22861193Smws * grab and drop ci_lock without the case being able to be freed in between.
22870Sstevel@tonic-gate */
22880Sstevel@tonic-gate int
fmd_case_repair(fmd_case_t * cp)22890Sstevel@tonic-gate fmd_case_repair(fmd_case_t *cp)
22900Sstevel@tonic-gate {
22910Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
22921193Smws uint_t cstate;
22939120SStephen.Hanson@Sun.COM fmd_asru_rep_arg_t fara;
22941193Smws
22950Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
22961193Smws cstate = cip->ci_state;
22970Sstevel@tonic-gate
22986228Sstephh if (cstate < FMD_CASE_SOLVED) {
22990Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
23000Sstevel@tonic-gate return (fmd_set_errno(EFMD_CASE_STATE));
23010Sstevel@tonic-gate }
23020Sstevel@tonic-gate
23036228Sstephh if (cip->ci_flags & FMD_CF_REPAIRED) {
23046228Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
23056228Sstephh return (0); /* already repaired */
23060Sstevel@tonic-gate }
23070Sstevel@tonic-gate
23089120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid));
23099120SStephen.Hanson@Sun.COM fara.fara_reason = FMD_ASRU_REPAIRED;
23109120SStephen.Hanson@Sun.COM fara.fara_bywhat = FARA_BY_CASE;
23119120SStephen.Hanson@Sun.COM fara.fara_rval = NULL;
23129120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
23137275Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
23147275Sstephh
23159120SStephen.Hanson@Sun.COM /*
23169120SStephen.Hanson@Sun.COM * if this is a proxied case, send the repair across the transport.
23179120SStephen.Hanson@Sun.COM * The remote side will then do the repair and send a list.repaired back
23189120SStephen.Hanson@Sun.COM * again such that we can finally repair the case on this side.
23199120SStephen.Hanson@Sun.COM */
23209120SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL) {
23219120SStephen.Hanson@Sun.COM fmd_case_xprt_updated(cp);
23229120SStephen.Hanson@Sun.COM return (0);
23239120SStephen.Hanson@Sun.COM }
23249120SStephen.Hanson@Sun.COM
23257275Sstephh if (cstate == FMD_CASE_CLOSED)
23267275Sstephh fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
23277275Sstephh else
23287275Sstephh fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
23297275Sstephh
23307275Sstephh return (0);
23317275Sstephh }
23327275Sstephh
23337275Sstephh int
fmd_case_acquit(fmd_case_t * cp)23347275Sstephh fmd_case_acquit(fmd_case_t *cp)
23357275Sstephh {
23367275Sstephh fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
23377275Sstephh uint_t cstate;
23389120SStephen.Hanson@Sun.COM fmd_asru_rep_arg_t fara;
23397275Sstephh
23407275Sstephh (void) pthread_mutex_lock(&cip->ci_lock);
23417275Sstephh cstate = cip->ci_state;
23427275Sstephh
23437275Sstephh if (cstate < FMD_CASE_SOLVED) {
23447275Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
23457275Sstephh return (fmd_set_errno(EFMD_CASE_STATE));
23467275Sstephh }
23477275Sstephh
23487275Sstephh if (cip->ci_flags & FMD_CF_REPAIRED) {
23497275Sstephh (void) pthread_mutex_unlock(&cip->ci_lock);
23507275Sstephh return (0); /* already repaired */
23517275Sstephh }
23527275Sstephh
23539120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid));
23549120SStephen.Hanson@Sun.COM fara.fara_reason = FMD_ASRU_ACQUITTED;
23559120SStephen.Hanson@Sun.COM fara.fara_bywhat = FARA_BY_CASE;
23569120SStephen.Hanson@Sun.COM fara.fara_rval = NULL;
23579120SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
23581552Smws (void) pthread_mutex_unlock(&cip->ci_lock);
23591552Smws
23609120SStephen.Hanson@Sun.COM /*
23619120SStephen.Hanson@Sun.COM * if this is a proxied case, send the repair across the transport.
23629120SStephen.Hanson@Sun.COM * The remote side will then do the repair and send a list.repaired back
23639120SStephen.Hanson@Sun.COM * again such that we can finally repair the case on this side.
23649120SStephen.Hanson@Sun.COM */
23659120SStephen.Hanson@Sun.COM if (cip->ci_xprt != NULL) {
23669120SStephen.Hanson@Sun.COM fmd_case_xprt_updated(cp);
23679120SStephen.Hanson@Sun.COM return (0);
23689120SStephen.Hanson@Sun.COM }
23699120SStephen.Hanson@Sun.COM
23701193Smws if (cstate == FMD_CASE_CLOSED)
23711193Smws fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
23721193Smws else
23731193Smws fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
23741193Smws
23750Sstevel@tonic-gate return (0);
23760Sstevel@tonic-gate }
23770Sstevel@tonic-gate
23780Sstevel@tonic-gate int
fmd_case_contains(fmd_case_t * cp,fmd_event_t * ep)23790Sstevel@tonic-gate fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
23800Sstevel@tonic-gate {
23810Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
23820Sstevel@tonic-gate fmd_case_item_t *cit;
23830Sstevel@tonic-gate uint_t state;
23840Sstevel@tonic-gate int rv = 0;
23850Sstevel@tonic-gate
23860Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock);
23870Sstevel@tonic-gate
23880Sstevel@tonic-gate if (cip->ci_state >= FMD_CASE_SOLVED)
23890Sstevel@tonic-gate state = FMD_EVS_DIAGNOSED;
23900Sstevel@tonic-gate else
23910Sstevel@tonic-gate state = FMD_EVS_ACCEPTED;
23920Sstevel@tonic-gate
23930Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
23940Sstevel@tonic-gate if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
23950Sstevel@tonic-gate break;
23960Sstevel@tonic-gate }
23970Sstevel@tonic-gate
23980Sstevel@tonic-gate if (rv == 0 && cip->ci_principal != NULL)
23990Sstevel@tonic-gate rv = fmd_event_equal(ep, cip->ci_principal);
24000Sstevel@tonic-gate
24010Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock);
24020Sstevel@tonic-gate
24030Sstevel@tonic-gate if (rv != 0)
24040Sstevel@tonic-gate fmd_event_transition(ep, state);
24050Sstevel@tonic-gate
24060Sstevel@tonic-gate return (rv);
24070Sstevel@tonic-gate }
24081193Smws
24091193Smws int
fmd_case_orphaned(fmd_case_t * cp)24101193Smws fmd_case_orphaned(fmd_case_t *cp)
24111193Smws {
24121193Smws return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
24131193Smws }
24145255Sstephh
24155255Sstephh void
fmd_case_settime(fmd_case_t * cp,time_t tv_sec,suseconds_t tv_usec)24165255Sstephh fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
24175255Sstephh {
24185255Sstephh ((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec;
24195255Sstephh ((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
24205255Sstephh ((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
24215255Sstephh }
24227275Sstephh
24239120SStephen.Hanson@Sun.COM void
fmd_case_set_injected(fmd_case_t * cp)242410928SStephen.Hanson@Sun.COM fmd_case_set_injected(fmd_case_t *cp)
242510928SStephen.Hanson@Sun.COM {
242610928SStephen.Hanson@Sun.COM ((fmd_case_impl_t *)cp)->ci_injected = 1;
242710928SStephen.Hanson@Sun.COM }
242810928SStephen.Hanson@Sun.COM
242910928SStephen.Hanson@Sun.COM void
fmd_case_set_de_fmri(fmd_case_t * cp,nvlist_t * nvl)24309120SStephen.Hanson@Sun.COM fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl)
24319120SStephen.Hanson@Sun.COM {
24329120SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
24339120SStephen.Hanson@Sun.COM
24349120SStephen.Hanson@Sun.COM if (cip->ci_diag_de)
24359120SStephen.Hanson@Sun.COM nvlist_free(cip->ci_diag_de);
24369120SStephen.Hanson@Sun.COM cip->ci_diag_de = nvl;
24379120SStephen.Hanson@Sun.COM }
24389120SStephen.Hanson@Sun.COM
24399120SStephen.Hanson@Sun.COM void
fmd_case_setcode(fmd_case_t * cp,char * code)24409120SStephen.Hanson@Sun.COM fmd_case_setcode(fmd_case_t *cp, char *code)
24419120SStephen.Hanson@Sun.COM {
24429120SStephen.Hanson@Sun.COM fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
24439120SStephen.Hanson@Sun.COM
24449120SStephen.Hanson@Sun.COM cip->ci_code = fmd_strdup(code, FMD_SLEEP);
24459120SStephen.Hanson@Sun.COM cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
24469120SStephen.Hanson@Sun.COM }
24479120SStephen.Hanson@Sun.COM
24487275Sstephh /*ARGSUSED*/
244910656SStephen.Hanson@Sun.COM static void
fmd_case_repair_replay_case(fmd_case_t * cp,void * arg)24507275Sstephh fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
24517275Sstephh {
24527275Sstephh int not_faulty = 0;
24537275Sstephh int faulty = 0;
24547275Sstephh nvlist_t *nvl;
24557275Sstephh fmd_event_t *e;
24567275Sstephh char *class;
24577275Sstephh int any_unusable_and_present = 0;
24587275Sstephh fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
24597275Sstephh
24609120SStephen.Hanson@Sun.COM if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
24617275Sstephh return;
24627275Sstephh
246310656SStephen.Hanson@Sun.COM if (cip->ci_state == FMD_CASE_RESOLVED) {
246410656SStephen.Hanson@Sun.COM cip->ci_flags |= FMD_CF_RES_CMPL;
246510656SStephen.Hanson@Sun.COM return;
246610656SStephen.Hanson@Sun.COM }
246710656SStephen.Hanson@Sun.COM
24687275Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
24697275Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
24707275Sstephh ¬_faulty);
24717275Sstephh
24727913SStephen.Hanson@Sun.COM if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) {
24737275Sstephh /*
24747275Sstephh * If none of the suspects is faulty, replay the list.repaired.
24757275Sstephh * If all suspects are already either usable or not present then
24767275Sstephh * also transition straight to RESOLVED state.
24777275Sstephh */
24787275Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
24797275Sstephh fmd_case_unusable_and_present, &any_unusable_and_present);
24807275Sstephh if (!any_unusable_and_present) {
24817275Sstephh cip->ci_state = FMD_CASE_RESOLVED;
24827275Sstephh
24839120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
24849120SStephen.Hanson@Sun.COM cip->ci_uuid));
24857275Sstephh nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
24867275Sstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
24877275Sstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
24887275Sstephh class);
24897275Sstephh fmd_dispq_dispatch(fmd.d_disp, e, class);
24907275Sstephh
24919120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
24929120SStephen.Hanson@Sun.COM cip->ci_uuid));
24937275Sstephh fmd_case_publish(cp, FMD_CASE_RESOLVED);
249410928SStephen.Hanson@Sun.COM fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
249510928SStephen.Hanson@Sun.COM fmd_asru_log_resolved, NULL);
249610656SStephen.Hanson@Sun.COM cip->ci_flags |= FMD_CF_RES_CMPL;
24977275Sstephh } else {
24989120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
24999120SStephen.Hanson@Sun.COM cip->ci_uuid));
25007275Sstephh nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
25017275Sstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
25027275Sstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
25037275Sstephh class);
25047275Sstephh fmd_dispq_dispatch(fmd.d_disp, e, class);
25057275Sstephh }
25067913SStephen.Hanson@Sun.COM } else if (faulty && not_faulty) {
25077275Sstephh /*
25087275Sstephh * if some but not all of the suspects are not faulty, replay
25097275Sstephh * the list.updated.
25107275Sstephh */
25119120SStephen.Hanson@Sun.COM TRACE((FMD_DBG_CASE, "replay sending list.updated %s",
25129120SStephen.Hanson@Sun.COM cip->ci_uuid));
25137275Sstephh nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
25147275Sstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
25157275Sstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
25167275Sstephh fmd_dispq_dispatch(fmd.d_disp, e, class);
25177275Sstephh }
25187275Sstephh }
25197275Sstephh
25207275Sstephh void
fmd_case_repair_replay()25217275Sstephh fmd_case_repair_replay()
25227275Sstephh {
25237275Sstephh fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
25247275Sstephh }
2525