1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 *
25 * fme.c -- fault management exercise module
26 *
27 * this module provides the simulated fault management exercise.
28 */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <ctype.h>
35 #include <alloca.h>
36 #include <libnvpair.h>
37 #include <sys/fm/protocol.h>
38 #include <fm/fmd_api.h>
39 #include "alloc.h"
40 #include "out.h"
41 #include "stats.h"
42 #include "stable.h"
43 #include "literals.h"
44 #include "lut.h"
45 #include "tree.h"
46 #include "ptree.h"
47 #include "itree.h"
48 #include "ipath.h"
49 #include "fme.h"
50 #include "evnv.h"
51 #include "eval.h"
52 #include "config.h"
53 #include "platform.h"
54 #include "esclex.h"
55
56 /* imported from eft.c... */
57 extern hrtime_t Hesitate;
58 extern char *Serd_Override;
59 extern nv_alloc_t Eft_nv_hdl;
60 extern int Max_fme;
61 extern fmd_hdl_t *Hdl;
62
63 static int Istat_need_save;
64 static int Serd_need_save;
65 void istat_save(void);
66 void serd_save(void);
67
68 /* fme under construction is global so we can free it on module abort */
69 static struct fme *Nfmep;
70
71 static int Undiag_reason = UD_VAL_UNKNOWN;
72
73 static int Nextid = 0;
74
75 static int Open_fme_count = 0; /* Count of open FMEs */
76
77 /* list of fault management exercises underway */
78 static struct fme {
79 struct fme *next; /* next exercise */
80 unsigned long long ull; /* time when fme was created */
81 int id; /* FME id */
82 struct config *config; /* cooked configuration data */
83 struct lut *eventtree; /* propagation tree for this FME */
84 /*
85 * The initial error report that created this FME is kept in
86 * two forms. e0 points to the instance tree node and is used
87 * by fme_eval() as the starting point for the inference
88 * algorithm. e0r is the event handle FMD passed to us when
89 * the ereport first arrived and is used when setting timers,
90 * which are always relative to the time of this initial
91 * report.
92 */
93 struct event *e0;
94 fmd_event_t *e0r;
95
96 id_t timer; /* for setting an fmd time-out */
97
98 struct event *ecurrent; /* ereport under consideration */
99 struct event *suspects; /* current suspect list */
100 struct event *psuspects; /* previous suspect list */
101 int nsuspects; /* count of suspects */
102 int posted_suspects; /* true if we've posted a diagnosis */
103 int uniqobs; /* number of unique events observed */
104 int peek; /* just peeking, don't track suspects */
105 int overflow; /* true if overflow FME */
106 enum fme_state {
107 FME_NOTHING = 5000, /* not evaluated yet */
108 FME_WAIT, /* need to wait for more info */
109 FME_CREDIBLE, /* suspect list is credible */
110 FME_DISPROVED, /* no valid suspects found */
111 FME_DEFERRED /* don't know yet (k-count not met) */
112 } state;
113
114 unsigned long long pull; /* time passed since created */
115 unsigned long long wull; /* wait until this time for re-eval */
116 struct event *observations; /* observation list */
117 struct lut *globals; /* values of global variables */
118 /* fmd interfacing */
119 fmd_hdl_t *hdl; /* handle for talking with fmd */
120 fmd_case_t *fmcase; /* what fmd 'case' we associate with */
121 /* stats */
122 struct stats *Rcount;
123 struct stats *Hcallcount;
124 struct stats *Rcallcount;
125 struct stats *Ccallcount;
126 struct stats *Ecallcount;
127 struct stats *Tcallcount;
128 struct stats *Marrowcount;
129 struct stats *diags;
130 } *FMElist, *EFMElist, *ClosedFMEs;
131
132 static struct case_list {
133 fmd_case_t *fmcase;
134 struct case_list *next;
135 } *Undiagablecaselist;
136
137 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
138 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
139 unsigned long long at_latest_by, unsigned long long *pdelay);
140 static struct node *eventprop_lookup(struct event *ep, const char *propname);
141 static struct node *pathstring2epnamenp(char *path);
142 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
143 fmd_case_t *fmcase, nvlist_t *detector, char *arg);
144 static char *undiag_2reason_str(int ud, char *arg);
145 static const char *undiag_2defect_str(int ud);
146 static void restore_suspects(struct fme *fmep);
147 static void save_suspects(struct fme *fmep);
148 static void destroy_fme(struct fme *f);
149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
150 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
151 static void istat_counter_reset_cb(struct istat_entry *entp,
152 struct stats *statp, const struct ipath *ipp);
153 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
154 struct stats *statp, void *unused);
155 static void serd_reset_cb(struct serd_entry *entp, void *unused,
156 const struct ipath *ipp);
157 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
158 void *unused2);
159 static void destroy_fme_bufs(struct fme *fp);
160
161 static struct fme *
alloc_fme(void)162 alloc_fme(void)
163 {
164 struct fme *fmep;
165
166 fmep = MALLOC(sizeof (*fmep));
167 bzero(fmep, sizeof (*fmep));
168 return (fmep);
169 }
170
171 /*
172 * fme_ready -- called when all initialization of the FME (except for
173 * stats) has completed successfully. Adds the fme to global lists
174 * and establishes its stats.
175 */
176 static struct fme *
fme_ready(struct fme * fmep)177 fme_ready(struct fme *fmep)
178 {
179 char nbuf[100];
180
181 Nfmep = NULL; /* don't need to free this on module abort now */
182
183 if (EFMElist) {
184 EFMElist->next = fmep;
185 EFMElist = fmep;
186 } else
187 FMElist = EFMElist = fmep;
188
189 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
190 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
191 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
192 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
193 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
194 fmep->Rcallcount = stats_new_counter(nbuf,
195 "calls to requirements_test()", 1);
196 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
197 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
198 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
199 fmep->Ecallcount =
200 stats_new_counter(nbuf, "calls to effects_test()", 1);
201 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
202 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
203 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
204 fmep->Marrowcount = stats_new_counter(nbuf,
205 "arrows marked by mark_arrows()", 1);
206 (void) sprintf(nbuf, "fme%d.diags", fmep->id);
207 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
208
209 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
210 config_print(O_ALTFP|O_VERB2, fmep->config);
211
212 return (fmep);
213 }
214
215 extern void ipath_dummy_lut(struct arrow *);
216 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
217
218 /* ARGSUSED */
219 static void
set_needed_arrows(struct event * ep,struct event * ep2,struct fme * fmep)220 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
221 {
222 struct bubble *bp;
223 struct arrowlist *ap;
224
225 for (bp = itree_next_bubble(ep, NULL); bp;
226 bp = itree_next_bubble(ep, bp)) {
227 if (bp->t != B_FROM)
228 continue;
229 for (ap = itree_next_arrow(bp, NULL); ap;
230 ap = itree_next_arrow(bp, ap)) {
231 ap->arrowp->pnode->u.arrow.needed = 1;
232 ipath_dummy_lut(ap->arrowp);
233 }
234 }
235 }
236
237 /* ARGSUSED */
238 static void
unset_needed_arrows(struct event * ep,struct event * ep2,struct fme * fmep)239 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
240 {
241 struct bubble *bp;
242 struct arrowlist *ap;
243
244 for (bp = itree_next_bubble(ep, NULL); bp;
245 bp = itree_next_bubble(ep, bp)) {
246 if (bp->t != B_FROM)
247 continue;
248 for (ap = itree_next_arrow(bp, NULL); ap;
249 ap = itree_next_arrow(bp, ap))
250 ap->arrowp->pnode->u.arrow.needed = 0;
251 }
252 }
253
254 static void globals_destructor(void *left, void *right, void *arg);
255 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
256
257 static boolean_t
prune_propagations(const char * e0class,const struct ipath * e0ipp)258 prune_propagations(const char *e0class, const struct ipath *e0ipp)
259 {
260 char nbuf[100];
261 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
262 extern struct lut *Usednames;
263
264 Nfmep = alloc_fme();
265 Nfmep->id = Nextid;
266 Nfmep->state = FME_NOTHING;
267 Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
268 if ((Nfmep->e0 =
269 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
270 itree_free(Nfmep->eventtree);
271 FREE(Nfmep);
272 Nfmep = NULL;
273 return (B_FALSE);
274 }
275 Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
276 Nfmep->e0->count++;
277
278 (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
279 Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
280 (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
281 Nfmep->Hcallcount =
282 stats_new_counter(nbuf, "calls to hypothesise()", 1);
283 (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
284 Nfmep->Rcallcount = stats_new_counter(nbuf,
285 "calls to requirements_test()", 1);
286 (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
287 Nfmep->Ccallcount =
288 stats_new_counter(nbuf, "calls to causes_test()", 1);
289 (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
290 Nfmep->Ecallcount =
291 stats_new_counter(nbuf, "calls to effects_test()", 1);
292 (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
293 Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
294 (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
295 Nfmep->Marrowcount = stats_new_counter(nbuf,
296 "arrows marked by mark_arrows()", 1);
297 (void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
298 Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
299
300 Nfmep->peek = 1;
301 lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
302 lut_free(Usednames, NULL, NULL);
303 Usednames = NULL;
304 lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
305 (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
306 itree_prune(Nfmep->eventtree);
307 lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
308
309 stats_delete(Nfmep->Rcount);
310 stats_delete(Nfmep->Hcallcount);
311 stats_delete(Nfmep->Rcallcount);
312 stats_delete(Nfmep->Ccallcount);
313 stats_delete(Nfmep->Ecallcount);
314 stats_delete(Nfmep->Tcallcount);
315 stats_delete(Nfmep->Marrowcount);
316 stats_delete(Nfmep->diags);
317 itree_free(Nfmep->eventtree);
318 lut_free(Nfmep->globals, globals_destructor, NULL);
319 FREE(Nfmep);
320 return (B_TRUE);
321 }
322
323 static struct fme *
newfme(const char * e0class,const struct ipath * e0ipp,fmd_hdl_t * hdl,fmd_case_t * fmcase,fmd_event_t * ffep,nvlist_t * nvl)324 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
325 fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
326 {
327 struct cfgdata *cfgdata;
328 int init_size;
329 extern int alloc_total();
330 nvlist_t *detector = NULL;
331 char *pathstr;
332 char *arg;
333
334 /*
335 * First check if e0ipp is actually in the topology so we can give a
336 * more useful error message.
337 */
338 ipathlastcomp(e0ipp);
339 pathstr = ipath2str(NULL, e0ipp);
340 cfgdata = config_snapshot();
341 platform_units_translate(0, cfgdata->cooked, NULL, NULL,
342 &detector, pathstr);
343 FREE(pathstr);
344 structconfig_free(cfgdata->cooked);
345 config_free(cfgdata);
346 if (detector == NULL) {
347 /* See if class permits silent discard on unknown component. */
348 if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
349 out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
350 "to component path, but silent discard allowed.",
351 e0class);
352 } else {
353 Undiag_reason = UD_VAL_BADEVENTPATH;
354 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
355 &detector);
356 arg = ipath2str(e0class, e0ipp);
357 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
358 FREE(arg);
359 }
360 return (NULL);
361 }
362
363 /*
364 * Next run a quick first pass of the rules with a dummy config. This
365 * allows us to prune those rules which can't possibly cause this
366 * ereport.
367 */
368 if (!prune_propagations(e0class, e0ipp)) {
369 /*
370 * The fault class must have been in the rules or we would
371 * not have registered for it (and got a "nosub"), and the
372 * pathname must be in the topology or we would have failed the
373 * previous test. So to get here means the combination of
374 * class and pathname in the ereport must be invalid.
375 */
376 Undiag_reason = UD_VAL_BADEVENTCLASS;
377 arg = ipath2str(e0class, e0ipp);
378 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
379 nvlist_free(detector);
380 FREE(arg);
381 return (NULL);
382 }
383
384 /*
385 * Now go ahead and create the real fme using the pruned rules.
386 */
387 init_size = alloc_total();
388 out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
389 nvlist_free(detector);
390 pathstr = ipath2str(NULL, e0ipp);
391 cfgdata = config_snapshot();
392 platform_units_translate(0, cfgdata->cooked, NULL, NULL,
393 &detector, pathstr);
394 FREE(pathstr);
395 platform_save_config(hdl, fmcase);
396 out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
397 alloc_total() - init_size);
398
399 Nfmep = alloc_fme();
400
401 Nfmep->id = Nextid++;
402 Nfmep->config = cfgdata->cooked;
403 config_free(cfgdata);
404 Nfmep->posted_suspects = 0;
405 Nfmep->uniqobs = 0;
406 Nfmep->state = FME_NOTHING;
407 Nfmep->pull = 0ULL;
408 Nfmep->overflow = 0;
409
410 Nfmep->fmcase = fmcase;
411 Nfmep->hdl = hdl;
412
413 if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
414 Undiag_reason = UD_VAL_INSTFAIL;
415 arg = ipath2str(e0class, e0ipp);
416 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
417 nvlist_free(detector);
418 FREE(arg);
419 structconfig_free(Nfmep->config);
420 destroy_fme_bufs(Nfmep);
421 FREE(Nfmep);
422 Nfmep = NULL;
423 return (NULL);
424 }
425
426 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
427
428 if ((Nfmep->e0 =
429 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
430 Undiag_reason = UD_VAL_BADEVENTI;
431 arg = ipath2str(e0class, e0ipp);
432 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
433 nvlist_free(detector);
434 FREE(arg);
435 itree_free(Nfmep->eventtree);
436 structconfig_free(Nfmep->config);
437 destroy_fme_bufs(Nfmep);
438 FREE(Nfmep);
439 Nfmep = NULL;
440 return (NULL);
441 }
442
443 nvlist_free(detector);
444 return (fme_ready(Nfmep));
445 }
446
447 void
fme_fini(void)448 fme_fini(void)
449 {
450 struct fme *sfp, *fp;
451 struct case_list *ucasep, *nextcasep;
452
453 ucasep = Undiagablecaselist;
454 while (ucasep != NULL) {
455 nextcasep = ucasep->next;
456 FREE(ucasep);
457 ucasep = nextcasep;
458 }
459 Undiagablecaselist = NULL;
460
461 /* clean up closed fmes */
462 fp = ClosedFMEs;
463 while (fp != NULL) {
464 sfp = fp->next;
465 destroy_fme(fp);
466 fp = sfp;
467 }
468 ClosedFMEs = NULL;
469
470 fp = FMElist;
471 while (fp != NULL) {
472 sfp = fp->next;
473 destroy_fme(fp);
474 fp = sfp;
475 }
476 FMElist = EFMElist = NULL;
477
478 /* if we were in the middle of creating an fme, free it now */
479 if (Nfmep) {
480 destroy_fme(Nfmep);
481 Nfmep = NULL;
482 }
483 }
484
485 /*
486 * Allocated space for a buffer name. 20 bytes allows for
487 * a ridiculous 9,999,999 unique observations.
488 */
489 #define OBBUFNMSZ 20
490
491 /*
492 * serialize_observation
493 *
494 * Create a recoverable version of the current observation
495 * (f->ecurrent). We keep a serialized version of each unique
496 * observation in order that we may resume correctly the fme in the
497 * correct state if eft or fmd crashes and we're restarted.
498 */
499 static void
serialize_observation(struct fme * fp,const char * cls,const struct ipath * ipp)500 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
501 {
502 size_t pkdlen;
503 char tmpbuf[OBBUFNMSZ];
504 char *pkd = NULL;
505 char *estr;
506
507 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
508 estr = ipath2str(cls, ipp);
509 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
510 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
511 strlen(estr) + 1);
512 FREE(estr);
513
514 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
515 (void) snprintf(tmpbuf,
516 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
517 if (nvlist_xpack(fp->ecurrent->nvp,
518 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
519 out(O_DIE|O_SYS, "pack of observed nvl failed");
520 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
521 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
522 FREE(pkd);
523 }
524
525 fp->uniqobs++;
526 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
527 sizeof (fp->uniqobs));
528 }
529
530 /*
531 * init_fme_bufs -- We keep several bits of state about an fme for
532 * use if eft or fmd crashes and we're restarted.
533 */
534 static void
init_fme_bufs(struct fme * fp)535 init_fme_bufs(struct fme *fp)
536 {
537 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
538 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
539 sizeof (fp->pull));
540
541 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
542 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
543 sizeof (fp->id));
544
545 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
546 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
547 sizeof (fp->uniqobs));
548
549 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
550 sizeof (fp->posted_suspects));
551 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
552 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
553 }
554
555 static void
destroy_fme_bufs(struct fme * fp)556 destroy_fme_bufs(struct fme *fp)
557 {
558 char tmpbuf[OBBUFNMSZ];
559 int o;
560
561 platform_restore_config(fp->hdl, fp->fmcase);
562 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
563 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
564 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
565 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
566 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
567 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
568
569 for (o = 0; o < fp->uniqobs; o++) {
570 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
571 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
572 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
573 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
574 }
575 }
576
577 /*
578 * reconstitute_observations -- convert a case's serialized observations
579 * back into struct events. Returns zero if all observations are
580 * successfully reconstituted.
581 */
582 static int
reconstitute_observations(struct fme * fmep)583 reconstitute_observations(struct fme *fmep)
584 {
585 struct event *ep;
586 struct node *epnamenp = NULL;
587 size_t pkdlen;
588 char *pkd = NULL;
589 char *tmpbuf = alloca(OBBUFNMSZ);
590 char *sepptr;
591 char *estr;
592 int ocnt;
593 int elen;
594
595 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
596 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
597 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
598 if (elen == 0) {
599 out(O_ALTFP,
600 "reconstitute_observation: no %s buffer found.",
601 tmpbuf);
602 Undiag_reason = UD_VAL_MISSINGOBS;
603 break;
604 }
605
606 estr = MALLOC(elen);
607 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
608 sepptr = strchr(estr, '@');
609 if (sepptr == NULL) {
610 out(O_ALTFP,
611 "reconstitute_observation: %s: "
612 "missing @ separator in %s.",
613 tmpbuf, estr);
614 Undiag_reason = UD_VAL_MISSINGPATH;
615 FREE(estr);
616 break;
617 }
618
619 *sepptr = '\0';
620 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
621 out(O_ALTFP,
622 "reconstitute_observation: %s: "
623 "trouble converting path string \"%s\" "
624 "to internal representation.",
625 tmpbuf, sepptr + 1);
626 Undiag_reason = UD_VAL_MISSINGPATH;
627 FREE(estr);
628 break;
629 }
630
631 /* construct the event */
632 ep = itree_lookup(fmep->eventtree,
633 stable(estr), ipath(epnamenp));
634 if (ep == NULL) {
635 out(O_ALTFP,
636 "reconstitute_observation: %s: "
637 "lookup of \"%s\" in itree failed.",
638 tmpbuf, ipath2str(estr, ipath(epnamenp)));
639 Undiag_reason = UD_VAL_BADOBS;
640 tree_free(epnamenp);
641 FREE(estr);
642 break;
643 }
644 tree_free(epnamenp);
645
646 /*
647 * We may or may not have a saved nvlist for the observation
648 */
649 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
650 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
651 if (pkdlen != 0) {
652 pkd = MALLOC(pkdlen);
653 fmd_buf_read(fmep->hdl,
654 fmep->fmcase, tmpbuf, pkd, pkdlen);
655 ASSERT(ep->nvp == NULL);
656 if (nvlist_xunpack(pkd,
657 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
658 out(O_DIE|O_SYS, "pack of observed nvl failed");
659 FREE(pkd);
660 }
661
662 if (ocnt == 0)
663 fmep->e0 = ep;
664
665 FREE(estr);
666 fmep->ecurrent = ep;
667 ep->count++;
668
669 /* link it into list of observations seen */
670 ep->observations = fmep->observations;
671 fmep->observations = ep;
672 }
673
674 if (ocnt == fmep->uniqobs) {
675 (void) fme_ready(fmep);
676 return (0);
677 }
678
679 return (1);
680 }
681
682 /*
683 * restart_fme -- called during eft initialization. Reconstitutes
684 * an in-progress fme.
685 */
686 void
fme_restart(fmd_hdl_t * hdl,fmd_case_t * inprogress)687 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
688 {
689 nvlist_t *defect;
690 struct case_list *bad;
691 struct fme *fmep;
692 struct cfgdata *cfgdata;
693 size_t rawsz;
694 struct event *ep;
695 char *tmpbuf = alloca(OBBUFNMSZ);
696 char *sepptr;
697 char *estr;
698 int elen;
699 struct node *epnamenp = NULL;
700 int init_size;
701 extern int alloc_total();
702 char *reason;
703
704 /*
705 * ignore solved or closed cases
706 */
707 if (fmd_case_solved(hdl, inprogress) ||
708 fmd_case_closed(hdl, inprogress))
709 return;
710
711 fmep = alloc_fme();
712 fmep->fmcase = inprogress;
713 fmep->hdl = hdl;
714
715 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
716 out(O_ALTFP, "restart_fme: no saved posted status");
717 Undiag_reason = UD_VAL_MISSINGINFO;
718 goto badcase;
719 } else {
720 fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
721 (void *)&fmep->posted_suspects,
722 sizeof (fmep->posted_suspects));
723 }
724
725 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
726 out(O_ALTFP, "restart_fme: no saved id");
727 Undiag_reason = UD_VAL_MISSINGINFO;
728 goto badcase;
729 } else {
730 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
731 sizeof (fmep->id));
732 }
733 if (Nextid <= fmep->id)
734 Nextid = fmep->id + 1;
735
736 out(O_ALTFP, "Replay FME %d", fmep->id);
737
738 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
739 out(O_ALTFP, "restart_fme: No config data");
740 Undiag_reason = UD_VAL_MISSINGINFO;
741 goto badcase;
742 }
743 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
744 sizeof (size_t));
745
746 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
747 out(O_ALTFP, "restart_fme: No event zero");
748 Undiag_reason = UD_VAL_MISSINGZERO;
749 goto badcase;
750 }
751
752 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
753 out(O_ALTFP, "restart_fme: no saved wait time");
754 Undiag_reason = UD_VAL_MISSINGINFO;
755 goto badcase;
756 } else {
757 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
758 sizeof (fmep->pull));
759 }
760
761 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
762 out(O_ALTFP, "restart_fme: no count of observations");
763 Undiag_reason = UD_VAL_MISSINGINFO;
764 goto badcase;
765 } else {
766 fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
767 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
768 }
769
770 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
771 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
772 if (elen == 0) {
773 out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
774 tmpbuf);
775 Undiag_reason = UD_VAL_MISSINGOBS;
776 goto badcase;
777 }
778 estr = MALLOC(elen);
779 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
780 sepptr = strchr(estr, '@');
781 if (sepptr == NULL) {
782 out(O_ALTFP, "reconstitute_observation: %s: "
783 "missing @ separator in %s.",
784 tmpbuf, estr);
785 Undiag_reason = UD_VAL_MISSINGPATH;
786 FREE(estr);
787 goto badcase;
788 }
789 *sepptr = '\0';
790 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
791 out(O_ALTFP, "reconstitute_observation: %s: "
792 "trouble converting path string \"%s\" "
793 "to internal representation.", tmpbuf, sepptr + 1);
794 Undiag_reason = UD_VAL_MISSINGPATH;
795 FREE(estr);
796 goto badcase;
797 }
798 (void) prune_propagations(stable(estr), ipath(epnamenp));
799 tree_free(epnamenp);
800 FREE(estr);
801
802 init_size = alloc_total();
803 out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
804 cfgdata = MALLOC(sizeof (struct cfgdata));
805 cfgdata->cooked = NULL;
806 cfgdata->devcache = NULL;
807 cfgdata->devidcache = NULL;
808 cfgdata->tpcache = NULL;
809 cfgdata->cpucache = NULL;
810 cfgdata->raw_refcnt = 1;
811
812 if (rawsz > 0) {
813 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
814 out(O_ALTFP, "restart_fme: Config data size mismatch");
815 Undiag_reason = UD_VAL_CFGMISMATCH;
816 goto badcase;
817 }
818 cfgdata->begin = MALLOC(rawsz);
819 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
820 fmd_buf_read(hdl,
821 inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
822 } else {
823 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
824 }
825
826 config_cook(cfgdata);
827 fmep->config = cfgdata->cooked;
828 config_free(cfgdata);
829 out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
830 alloc_total() - init_size);
831
832 if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
833 /* case not properly saved or irretrievable */
834 out(O_ALTFP, "restart_fme: NULL instance tree");
835 Undiag_reason = UD_VAL_INSTFAIL;
836 goto badcase;
837 }
838
839 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
840
841 if (reconstitute_observations(fmep) != 0)
842 goto badcase;
843
844 out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
845 for (ep = fmep->observations; ep; ep = ep->observations) {
846 out(O_ALTFP|O_NONL, " ");
847 itree_pevent_brief(O_ALTFP|O_NONL, ep);
848 }
849 out(O_ALTFP, NULL);
850
851 Open_fme_count++;
852
853 /* give the diagnosis algorithm a shot at the new FME state */
854 fme_eval(fmep, fmep->e0r);
855 return;
856
857 badcase:
858 if (fmep->eventtree != NULL)
859 itree_free(fmep->eventtree);
860 if (fmep->config)
861 structconfig_free(fmep->config);
862 destroy_fme_bufs(fmep);
863 FREE(fmep);
864
865 /*
866 * Since we're unable to restart the case, add it to the undiagable
867 * list and solve and close it as appropriate.
868 */
869 bad = MALLOC(sizeof (struct case_list));
870 bad->next = NULL;
871
872 if (Undiagablecaselist != NULL)
873 bad->next = Undiagablecaselist;
874 Undiagablecaselist = bad;
875 bad->fmcase = inprogress;
876
877 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
878 fmd_case_uuid(hdl, bad->fmcase));
879
880 if (fmd_case_solved(hdl, bad->fmcase)) {
881 out(O_ALTFP|O_NONL, "already solved, ");
882 } else {
883 out(O_ALTFP|O_NONL, "solving, ");
884 defect = fmd_nvl_create_fault(hdl,
885 undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
886 reason = undiag_2reason_str(Undiag_reason, NULL);
887 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
888 FREE(reason);
889 fmd_case_add_suspect(hdl, bad->fmcase, defect);
890 fmd_case_solve(hdl, bad->fmcase);
891 Undiag_reason = UD_VAL_UNKNOWN;
892 }
893
894 if (fmd_case_closed(hdl, bad->fmcase)) {
895 out(O_ALTFP, "already closed ]");
896 } else {
897 out(O_ALTFP, "closing ]");
898 fmd_case_close(hdl, bad->fmcase);
899 }
900 }
901
902 /*ARGSUSED*/
903 static void
globals_destructor(void * left,void * right,void * arg)904 globals_destructor(void *left, void *right, void *arg)
905 {
906 struct evalue *evp = (struct evalue *)right;
907 if (evp->t == NODEPTR)
908 tree_free((struct node *)(uintptr_t)evp->v);
909 evp->v = (uintptr_t)NULL;
910 FREE(evp);
911 }
912
913 void
destroy_fme(struct fme * f)914 destroy_fme(struct fme *f)
915 {
916 stats_delete(f->Rcount);
917 stats_delete(f->Hcallcount);
918 stats_delete(f->Rcallcount);
919 stats_delete(f->Ccallcount);
920 stats_delete(f->Ecallcount);
921 stats_delete(f->Tcallcount);
922 stats_delete(f->Marrowcount);
923 stats_delete(f->diags);
924
925 if (f->eventtree != NULL)
926 itree_free(f->eventtree);
927 if (f->config)
928 structconfig_free(f->config);
929 lut_free(f->globals, globals_destructor, NULL);
930 FREE(f);
931 }
932
933 static const char *
fme_state2str(enum fme_state s)934 fme_state2str(enum fme_state s)
935 {
936 switch (s) {
937 case FME_NOTHING: return ("NOTHING");
938 case FME_WAIT: return ("WAIT");
939 case FME_CREDIBLE: return ("CREDIBLE");
940 case FME_DISPROVED: return ("DISPROVED");
941 case FME_DEFERRED: return ("DEFERRED");
942 default: return ("UNKNOWN");
943 }
944 }
945
946 static int
is_problem(enum nametype t)947 is_problem(enum nametype t)
948 {
949 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
950 }
951
952 static int
is_defect(enum nametype t)953 is_defect(enum nametype t)
954 {
955 return (t == N_DEFECT);
956 }
957
958 static int
is_upset(enum nametype t)959 is_upset(enum nametype t)
960 {
961 return (t == N_UPSET);
962 }
963
964 static void
fme_print(int flags,struct fme * fmep)965 fme_print(int flags, struct fme *fmep)
966 {
967 struct event *ep;
968
969 out(flags, "Fault Management Exercise %d", fmep->id);
970 out(flags, "\t State: %s", fme_state2str(fmep->state));
971 out(flags|O_NONL, "\t Start time: ");
972 ptree_timeval(flags|O_NONL, &fmep->ull);
973 out(flags, NULL);
974 if (fmep->wull) {
975 out(flags|O_NONL, "\t Wait time: ");
976 ptree_timeval(flags|O_NONL, &fmep->wull);
977 out(flags, NULL);
978 }
979 out(flags|O_NONL, "\t E0: ");
980 if (fmep->e0)
981 itree_pevent_brief(flags|O_NONL, fmep->e0);
982 else
983 out(flags|O_NONL, "NULL");
984 out(flags, NULL);
985 out(flags|O_NONL, "\tObservations:");
986 for (ep = fmep->observations; ep; ep = ep->observations) {
987 out(flags|O_NONL, " ");
988 itree_pevent_brief(flags|O_NONL, ep);
989 }
990 out(flags, NULL);
991 out(flags|O_NONL, "\tSuspect list:");
992 for (ep = fmep->suspects; ep; ep = ep->suspects) {
993 out(flags|O_NONL, " ");
994 itree_pevent_brief(flags|O_NONL, ep);
995 }
996 out(flags, NULL);
997 if (fmep->eventtree != NULL) {
998 out(flags|O_VERB2, "\t Tree:");
999 itree_ptree(flags|O_VERB2, fmep->eventtree);
1000 }
1001 }
1002
1003 static struct node *
pathstring2epnamenp(char * path)1004 pathstring2epnamenp(char *path)
1005 {
1006 char *sep = "/";
1007 struct node *ret;
1008 char *ptr;
1009
1010 if ((ptr = strtok(path, sep)) == NULL)
1011 out(O_DIE, "pathstring2epnamenp: invalid empty class");
1012
1013 ret = tree_iname(stable(ptr), NULL, 0);
1014
1015 while ((ptr = strtok(NULL, sep)) != NULL)
1016 ret = tree_name_append(ret,
1017 tree_iname(stable(ptr), NULL, 0));
1018
1019 return (ret);
1020 }
1021
1022 /*
1023 * for a given upset sp, increment the corresponding SERD engine. if the
1024 * SERD engine trips, return the ename and ipp of the resulting ereport.
1025 * returns true if engine tripped and *enamep and *ippp were filled in.
1026 */
1027 static int
serd_eval(struct fme * fmep,fmd_hdl_t * hdl,fmd_event_t * ffep,fmd_case_t * fmcase,struct event * sp,const char ** enamep,const struct ipath ** ippp)1028 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1029 fmd_case_t *fmcase, struct event *sp, const char **enamep,
1030 const struct ipath **ippp)
1031 {
1032 struct node *serdinst;
1033 char *serdname;
1034 char *serdresource;
1035 char *serdclass;
1036 struct node *nid;
1037 struct serd_entry *newentp;
1038 int i, serdn = -1, serdincrement = 1, len = 0;
1039 char *serdsuffix = NULL, *serdt = NULL;
1040 struct evalue *ep;
1041
1042 ASSERT(sp->t == N_UPSET);
1043 ASSERT(ffep != NULL);
1044
1045 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1046 (void *)"n", (lut_cmp)strcmp)) != NULL) {
1047 ASSERT(ep->t == UINT64);
1048 serdn = (int)ep->v;
1049 }
1050 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1051 (void *)"t", (lut_cmp)strcmp)) != NULL) {
1052 ASSERT(ep->t == STRING);
1053 serdt = (char *)(uintptr_t)ep->v;
1054 }
1055 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1056 (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1057 ASSERT(ep->t == STRING);
1058 serdsuffix = (char *)(uintptr_t)ep->v;
1059 }
1060 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1061 (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1062 ASSERT(ep->t == UINT64);
1063 serdincrement = (int)ep->v;
1064 }
1065
1066 /*
1067 * obtain instanced SERD engine from the upset sp. from this
1068 * derive serdname, the string used to identify the SERD engine.
1069 */
1070 serdinst = eventprop_lookup(sp, L_engine);
1071
1072 if (serdinst == NULL)
1073 return (-1);
1074
1075 len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1076 if (serdsuffix != NULL)
1077 len += strlen(serdsuffix);
1078 serdclass = MALLOC(len);
1079 if (serdsuffix != NULL)
1080 (void) snprintf(serdclass, len, "%s%s",
1081 serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1082 else
1083 (void) snprintf(serdclass, len, "%s",
1084 serdinst->u.stmt.np->u.event.ename->u.name.s);
1085 serdresource = ipath2str(NULL,
1086 ipath(serdinst->u.stmt.np->u.event.epname));
1087 len += strlen(serdresource) + 1;
1088 serdname = MALLOC(len);
1089 (void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1090 FREE(serdresource);
1091
1092 /* handle serd engine "id" property, if there is one */
1093 if ((nid =
1094 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1095 struct evalue *gval;
1096 char suffixbuf[200];
1097 char *suffix;
1098 char *nserdname;
1099 size_t nname;
1100
1101 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1102 ptree_name_iter(O_ALTFP|O_NONL, nid);
1103
1104 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1105
1106 if ((gval = lut_lookup(fmep->globals,
1107 (void *)nid->u.globid.s, NULL)) == NULL) {
1108 out(O_ALTFP, " undefined");
1109 } else if (gval->t == UINT64) {
1110 out(O_ALTFP, " %llu", gval->v);
1111 (void) sprintf(suffixbuf, "%llu", gval->v);
1112 suffix = suffixbuf;
1113 } else {
1114 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1115 suffix = (char *)(uintptr_t)gval->v;
1116 }
1117
1118 nname = strlen(serdname) + strlen(suffix) + 2;
1119 nserdname = MALLOC(nname);
1120 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1121 FREE(serdname);
1122 serdname = nserdname;
1123 }
1124
1125 /*
1126 * if the engine is empty, and we have an override for n/t then
1127 * destroy and recreate it.
1128 */
1129 if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1130 fmd_serd_empty(hdl, serdname))
1131 fmd_serd_destroy(hdl, serdname);
1132
1133 if (!fmd_serd_exists(hdl, serdname)) {
1134 struct node *nN, *nT;
1135 const char *s;
1136 struct node *nodep;
1137 struct config *cp;
1138 char *path;
1139 uint_t nval;
1140 hrtime_t tval;
1141 int i;
1142 char *ptr;
1143 int got_n_override = 0, got_t_override = 0;
1144
1145 /* no SERD engine yet, so create it */
1146 nodep = serdinst->u.stmt.np->u.event.epname;
1147 path = ipath2str(NULL, ipath(nodep));
1148 cp = config_lookup(fmep->config, path, 0);
1149 FREE((void *)path);
1150
1151 /*
1152 * We allow serd paramaters to be overridden, either from
1153 * eft.conf file values (if Serd_Override is set) or from
1154 * driver properties (for "serd.io.device" engines).
1155 */
1156 if (Serd_Override != NULL) {
1157 char *save_ptr, *ptr1, *ptr2, *ptr3;
1158 ptr3 = save_ptr = STRDUP(Serd_Override);
1159 while (*ptr3 != '\0') {
1160 ptr1 = strchr(ptr3, ',');
1161 *ptr1 = '\0';
1162 if (strcmp(ptr3, serdclass) == 0) {
1163 ptr2 = strchr(ptr1 + 1, ',');
1164 *ptr2 = '\0';
1165 nval = atoi(ptr1 + 1);
1166 out(O_ALTFP, "serd override %s_n %d",
1167 serdclass, nval);
1168 ptr3 = strchr(ptr2 + 1, ' ');
1169 if (ptr3)
1170 *ptr3 = '\0';
1171 ptr = STRDUP(ptr2 + 1);
1172 out(O_ALTFP, "serd override %s_t %s",
1173 serdclass, ptr);
1174 got_n_override = 1;
1175 got_t_override = 1;
1176 break;
1177 } else {
1178 ptr2 = strchr(ptr1 + 1, ',');
1179 ptr3 = strchr(ptr2 + 1, ' ');
1180 if (ptr3 == NULL)
1181 break;
1182 }
1183 ptr3++;
1184 }
1185 FREE(save_ptr);
1186 }
1187
1188 if (cp && got_n_override == 0) {
1189 /*
1190 * convert serd engine class into property name
1191 */
1192 char *prop_name = MALLOC(strlen(serdclass) + 3);
1193 for (i = 0; i < strlen(serdclass); i++) {
1194 if (serdclass[i] == '.')
1195 prop_name[i] = '_';
1196 else
1197 prop_name[i] = serdclass[i];
1198 }
1199 prop_name[i++] = '_';
1200 prop_name[i++] = 'n';
1201 prop_name[i] = '\0';
1202 if (s = config_getprop(cp, prop_name)) {
1203 nval = atoi(s);
1204 out(O_ALTFP, "serd override %s_n %s",
1205 serdclass, s);
1206 got_n_override = 1;
1207 }
1208 prop_name[i - 1] = 't';
1209 if (s = config_getprop(cp, prop_name)) {
1210 ptr = STRDUP(s);
1211 out(O_ALTFP, "serd override %s_t %s",
1212 serdclass, s);
1213 got_t_override = 1;
1214 }
1215 FREE(prop_name);
1216 }
1217
1218 if (serdn != -1 && got_n_override == 0) {
1219 nval = serdn;
1220 out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1221 got_n_override = 1;
1222 }
1223 if (serdt != NULL && got_t_override == 0) {
1224 ptr = STRDUP(serdt);
1225 out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1226 got_t_override = 1;
1227 }
1228
1229 if (!got_n_override) {
1230 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1231 NULL);
1232 ASSERT(nN->t == T_NUM);
1233 nval = (uint_t)nN->u.ull;
1234 }
1235 if (!got_t_override) {
1236 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1237 NULL);
1238 ASSERT(nT->t == T_TIMEVAL);
1239 tval = (hrtime_t)nT->u.ull;
1240 } else {
1241 const unsigned long long *ullp;
1242 const char *suffix;
1243 int len;
1244
1245 len = strspn(ptr, "0123456789");
1246 suffix = stable(&ptr[len]);
1247 ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1248 (void *)suffix, NULL);
1249 ptr[len] = '\0';
1250 tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1251 FREE(ptr);
1252 }
1253 fmd_serd_create(hdl, serdname, nval, tval);
1254 }
1255
1256 newentp = MALLOC(sizeof (*newentp));
1257 newentp->ename = stable(serdclass);
1258 FREE(serdclass);
1259 newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1260 newentp->hdl = hdl;
1261 if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1262 SerdEngines = lut_add(SerdEngines, (void *)newentp,
1263 (void *)newentp, (lut_cmp)serd_cmp);
1264 Serd_need_save = 1;
1265 serd_save();
1266 } else {
1267 FREE(newentp);
1268 }
1269
1270
1271 /*
1272 * increment SERD engine. if engine fires, reset serd
1273 * engine and return trip_strcode if required.
1274 */
1275 for (i = 0; i < serdincrement; i++) {
1276 if (fmd_serd_record(hdl, serdname, ffep)) {
1277 fmd_case_add_serd(hdl, fmcase, serdname);
1278 fmd_serd_reset(hdl, serdname);
1279
1280 if (ippp) {
1281 struct node *tripinst =
1282 lut_lookup(serdinst->u.stmt.lutp,
1283 (void *)L_trip, NULL);
1284 ASSERT(tripinst != NULL);
1285 *enamep = tripinst->u.event.ename->u.name.s;
1286 *ippp = ipath(tripinst->u.event.epname);
1287 out(O_ALTFP|O_NONL,
1288 "[engine fired: %s, sending: ", serdname);
1289 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1290 out(O_ALTFP, "]");
1291 } else {
1292 out(O_ALTFP, "[engine fired: %s, no trip]",
1293 serdname);
1294 }
1295 FREE(serdname);
1296 return (1);
1297 }
1298 }
1299
1300 FREE(serdname);
1301 return (0);
1302 }
1303
1304 /*
1305 * search a suspect list for upsets. feed each upset to serd_eval() and
1306 * build up tripped[], an array of ereports produced by the firing of
1307 * any SERD engines. then feed each ereport back into
1308 * fme_receive_report().
1309 *
1310 * returns ntrip, the number of these ereports produced.
1311 */
1312 static int
upsets_eval(struct fme * fmep,fmd_event_t * ffep)1313 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1314 {
1315 /* we build an array of tripped ereports that we send ourselves */
1316 struct {
1317 const char *ename;
1318 const struct ipath *ipp;
1319 } *tripped;
1320 struct event *sp;
1321 int ntrip, nupset, i;
1322
1323 /*
1324 * count the number of upsets to determine the upper limit on
1325 * expected trip ereport strings. remember that one upset can
1326 * lead to at most one ereport.
1327 */
1328 nupset = 0;
1329 for (sp = fmep->suspects; sp; sp = sp->suspects) {
1330 if (sp->t == N_UPSET)
1331 nupset++;
1332 }
1333
1334 if (nupset == 0)
1335 return (0);
1336
1337 /*
1338 * get to this point if we have upsets and expect some trip
1339 * ereports
1340 */
1341 tripped = alloca(sizeof (*tripped) * nupset);
1342 bzero((void *)tripped, sizeof (*tripped) * nupset);
1343
1344 ntrip = 0;
1345 for (sp = fmep->suspects; sp; sp = sp->suspects)
1346 if (sp->t == N_UPSET &&
1347 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1348 &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1349 ntrip++;
1350
1351 for (i = 0; i < ntrip; i++) {
1352 struct event *ep, *nep;
1353 struct fme *nfmep;
1354 fmd_case_t *fmcase;
1355 const struct ipath *ipp;
1356 const char *eventstring;
1357 int prev_verbose;
1358 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1359 enum fme_state state;
1360
1361 /*
1362 * First try and evaluate a case with the trip ereport plus
1363 * all the other ereports that cause the trip. If that fails
1364 * to evaluate then try again with just this ereport on its own.
1365 */
1366 out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1367 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1368 out(O_ALTFP|O_STAMP, NULL);
1369 ep = fmep->e0;
1370 eventstring = ep->enode->u.event.ename->u.name.s;
1371 ipp = ep->ipp;
1372
1373 /*
1374 * create a duplicate fme and case
1375 */
1376 fmcase = fmd_case_open(fmep->hdl, NULL);
1377 out(O_ALTFP|O_NONL, "duplicate fme for event [");
1378 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1379 out(O_ALTFP, " ]");
1380
1381 if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1382 fmcase, ffep, ep->nvp)) == NULL) {
1383 out(O_ALTFP|O_NONL, "[");
1384 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1385 out(O_ALTFP, " CANNOT DIAGNOSE]");
1386 continue;
1387 }
1388
1389 Open_fme_count++;
1390 nfmep->pull = fmep->pull;
1391 init_fme_bufs(nfmep);
1392 out(O_ALTFP|O_NONL, "[");
1393 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1394 out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1395 fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1396 if (ffep) {
1397 fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1398 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1399 nfmep->e0r = ffep;
1400 }
1401
1402 /*
1403 * add the original ereports
1404 */
1405 for (ep = fmep->observations; ep; ep = ep->observations) {
1406 eventstring = ep->enode->u.event.ename->u.name.s;
1407 ipp = ep->ipp;
1408 out(O_ALTFP|O_NONL, "adding event [");
1409 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1410 out(O_ALTFP, " ]");
1411 nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1412 if (nep->count++ == 0) {
1413 nep->observations = nfmep->observations;
1414 nfmep->observations = nep;
1415 serialize_observation(nfmep, eventstring, ipp);
1416 nep->nvp = evnv_dupnvl(ep->nvp);
1417 }
1418 if (ep->ffep && ep->ffep != ffep)
1419 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1420 ep->ffep);
1421 stats_counter_bump(nfmep->Rcount);
1422 }
1423
1424 /*
1425 * add the serd trigger ereport
1426 */
1427 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1428 tripped[i].ipp)) == NULL) {
1429 /*
1430 * The trigger ereport is not in the instance tree. It
1431 * was presumably removed by prune_propagations() as
1432 * this combination of events is not present in the
1433 * rules.
1434 */
1435 out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1436 Undiag_reason = UD_VAL_BADEVENTI;
1437 goto retry_lone_ereport;
1438 }
1439 out(O_ALTFP|O_NONL, "adding event [");
1440 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1441 out(O_ALTFP, " ]");
1442 nfmep->ecurrent = ep;
1443 ep->nvp = NULL;
1444 ep->count = 1;
1445 ep->observations = nfmep->observations;
1446 nfmep->observations = ep;
1447
1448 /*
1449 * just peek first.
1450 */
1451 nfmep->peek = 1;
1452 prev_verbose = Verbose;
1453 if (Debug == 0)
1454 Verbose = 0;
1455 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1456 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1457 nfmep->peek = 0;
1458 Verbose = prev_verbose;
1459 if (state == FME_DISPROVED) {
1460 out(O_ALTFP, "upsets_eval: hypothesis disproved");
1461 Undiag_reason = UD_VAL_UNSOLVD;
1462 retry_lone_ereport:
1463 /*
1464 * However the trigger ereport on its own might be
1465 * diagnosable, so check for that. Undo the new fme
1466 * and case we just created and call fme_receive_report.
1467 */
1468 out(O_ALTFP|O_NONL, "[");
1469 ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1470 tripped[i].ipp);
1471 out(O_ALTFP, " retrying with just trigger ereport]");
1472 itree_free(nfmep->eventtree);
1473 nfmep->eventtree = NULL;
1474 structconfig_free(nfmep->config);
1475 nfmep->config = NULL;
1476 destroy_fme_bufs(nfmep);
1477 fmd_case_close(nfmep->hdl, nfmep->fmcase);
1478 fme_receive_report(fmep->hdl, ffep,
1479 tripped[i].ename, tripped[i].ipp, NULL);
1480 continue;
1481 }
1482
1483 /*
1484 * and evaluate
1485 */
1486 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1487 fme_eval(nfmep, ffep);
1488 }
1489
1490 return (ntrip);
1491 }
1492
1493 /*
1494 * fme_receive_external_report -- call when an external ereport comes in
1495 *
1496 * this routine just converts the relevant information from the ereport
1497 * into a format used internally and passes it on to fme_receive_report().
1498 */
1499 void
fme_receive_external_report(fmd_hdl_t * hdl,fmd_event_t * ffep,nvlist_t * nvl,const char * class)1500 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1501 const char *class)
1502 {
1503 struct node *epnamenp;
1504 fmd_case_t *fmcase;
1505 const struct ipath *ipp;
1506 nvlist_t *detector = NULL;
1507
1508 class = stable(class);
1509
1510 /* Get the component path from the ereport */
1511 epnamenp = platform_getpath(nvl);
1512
1513 /* See if we ended up without a path. */
1514 if (epnamenp == NULL) {
1515 /* See if class permits silent discard on unknown component. */
1516 if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1517 out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1518 "to component path, but silent discard allowed.",
1519 class);
1520 } else {
1521 /*
1522 * XFILE: Failure to find a component is bad unless
1523 * 'discard_if_config_unknown=1' was specified in the
1524 * ereport definition. Indicate undiagnosable.
1525 */
1526 Undiag_reason = UD_VAL_NOPATH;
1527 fmcase = fmd_case_open(hdl, NULL);
1528
1529 /*
1530 * We don't have a component path here (which means that
1531 * the detector was not in hc-scheme and couldn't be
1532 * converted to hc-scheme. Report the raw detector as
1533 * the suspect resource if there is one.
1534 */
1535 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1536 &detector);
1537 publish_undiagnosable(hdl, ffep, fmcase, detector,
1538 (char *)class);
1539 }
1540 return;
1541 }
1542
1543 ipp = ipath(epnamenp);
1544 tree_free(epnamenp);
1545 fme_receive_report(hdl, ffep, class, ipp, nvl);
1546 }
1547
1548 /*ARGSUSED*/
1549 void
fme_receive_repair_list(fmd_hdl_t * hdl,fmd_event_t * ffep,nvlist_t * nvl,const char * eventstring)1550 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1551 const char *eventstring)
1552 {
1553 char *uuid;
1554 nvlist_t **nva;
1555 uint_t nvc;
1556 const struct ipath *ipp;
1557
1558 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1559 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1560 &nva, &nvc) != 0) {
1561 out(O_ALTFP, "No uuid or fault list for list.repaired event");
1562 return;
1563 }
1564
1565 out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1566
1567 while (nvc-- != 0) {
1568 /*
1569 * Reset any istat or serd engine associated with this path.
1570 */
1571 char *path;
1572
1573 if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1574 continue;
1575
1576 path = ipath2str(NULL, ipp);
1577 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1578 path);
1579 FREE(path);
1580
1581 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1582 istat_save();
1583
1584 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1585 serd_save();
1586 }
1587 }
1588
1589 /*ARGSUSED*/
1590 void
fme_receive_topology_change(void)1591 fme_receive_topology_change(void)
1592 {
1593 lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1594 istat_save();
1595
1596 lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1597 serd_save();
1598 }
1599
1600 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1601 unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1602
1603 /* ARGSUSED */
1604 static void
clear_arrows(struct event * ep,struct event * ep2,struct fme * fmep)1605 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1606 {
1607 struct bubble *bp;
1608 struct arrowlist *ap;
1609
1610 ep->cached_state = 0;
1611 ep->keep_in_tree = 0;
1612 for (bp = itree_next_bubble(ep, NULL); bp;
1613 bp = itree_next_bubble(ep, bp)) {
1614 if (bp->t != B_FROM)
1615 continue;
1616 bp->mark = 0;
1617 for (ap = itree_next_arrow(bp, NULL); ap;
1618 ap = itree_next_arrow(bp, ap))
1619 ap->arrowp->mark = 0;
1620 }
1621 }
1622
1623 static void
fme_receive_report(fmd_hdl_t * hdl,fmd_event_t * ffep,const char * eventstring,const struct ipath * ipp,nvlist_t * nvl)1624 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1625 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1626 {
1627 struct event *ep;
1628 struct fme *fmep = NULL;
1629 struct fme *ofmep = NULL;
1630 struct fme *cfmep, *svfmep;
1631 int matched = 0;
1632 nvlist_t *defect;
1633 fmd_case_t *fmcase;
1634 char *reason;
1635
1636 out(O_ALTFP|O_NONL, "fme_receive_report: ");
1637 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1638 out(O_ALTFP|O_STAMP, NULL);
1639
1640 /* decide which FME it goes to */
1641 for (fmep = FMElist; fmep; fmep = fmep->next) {
1642 int prev_verbose;
1643 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1644 enum fme_state state;
1645 nvlist_t *pre_peek_nvp = NULL;
1646
1647 if (fmep->overflow) {
1648 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1649 ofmep = fmep;
1650
1651 continue;
1652 }
1653
1654 /*
1655 * ignore solved or closed cases
1656 */
1657 if (fmep->posted_suspects ||
1658 fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1659 fmd_case_closed(fmep->hdl, fmep->fmcase))
1660 continue;
1661
1662 /* look up event in event tree for this FME */
1663 if ((ep = itree_lookup(fmep->eventtree,
1664 eventstring, ipp)) == NULL)
1665 continue;
1666
1667 /* note observation */
1668 fmep->ecurrent = ep;
1669 if (ep->count++ == 0) {
1670 /* link it into list of observations seen */
1671 ep->observations = fmep->observations;
1672 fmep->observations = ep;
1673 ep->nvp = evnv_dupnvl(nvl);
1674 } else {
1675 /* use new payload values for peek */
1676 pre_peek_nvp = ep->nvp;
1677 ep->nvp = evnv_dupnvl(nvl);
1678 }
1679
1680 /* tell hypothesise() not to mess with suspect list */
1681 fmep->peek = 1;
1682
1683 /* don't want this to be verbose (unless Debug is set) */
1684 prev_verbose = Verbose;
1685 if (Debug == 0)
1686 Verbose = 0;
1687
1688 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1689 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1690
1691 fmep->peek = 0;
1692
1693 /* put verbose flag back */
1694 Verbose = prev_verbose;
1695
1696 if (state != FME_DISPROVED) {
1697 /* found an FME that explains the ereport */
1698 matched++;
1699 out(O_ALTFP|O_NONL, "[");
1700 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1701 out(O_ALTFP, " explained by FME%d]", fmep->id);
1702
1703 if (pre_peek_nvp)
1704 nvlist_free(pre_peek_nvp);
1705
1706 if (ep->count == 1)
1707 serialize_observation(fmep, eventstring, ipp);
1708
1709 if (ffep) {
1710 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1711 ep->ffep = ffep;
1712 }
1713
1714 stats_counter_bump(fmep->Rcount);
1715
1716 /* re-eval FME */
1717 fme_eval(fmep, ffep);
1718 } else {
1719
1720 /* not a match, undo noting of observation */
1721 fmep->ecurrent = NULL;
1722 if (--ep->count == 0) {
1723 /* unlink it from observations */
1724 fmep->observations = ep->observations;
1725 ep->observations = NULL;
1726 nvlist_free(ep->nvp);
1727 ep->nvp = NULL;
1728 } else {
1729 nvlist_free(ep->nvp);
1730 ep->nvp = pre_peek_nvp;
1731 }
1732 }
1733 }
1734
1735 if (matched)
1736 return; /* explained by at least one existing FME */
1737
1738 /* clean up closed fmes */
1739 cfmep = ClosedFMEs;
1740 while (cfmep != NULL) {
1741 svfmep = cfmep->next;
1742 destroy_fme(cfmep);
1743 cfmep = svfmep;
1744 }
1745 ClosedFMEs = NULL;
1746
1747 if (ofmep) {
1748 out(O_ALTFP|O_NONL, "[");
1749 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1750 out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1751 if (ffep)
1752 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1753
1754 return;
1755
1756 } else if (Max_fme && (Open_fme_count >= Max_fme)) {
1757 out(O_ALTFP|O_NONL, "[");
1758 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1759 out(O_ALTFP, " MAX OPEN FME REACHED]");
1760
1761 fmcase = fmd_case_open(hdl, NULL);
1762
1763 /* Create overflow fme */
1764 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1765 nvl)) == NULL) {
1766 out(O_ALTFP|O_NONL, "[");
1767 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1768 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1769 return;
1770 }
1771
1772 Open_fme_count++;
1773
1774 init_fme_bufs(fmep);
1775 fmep->overflow = B_TRUE;
1776
1777 if (ffep)
1778 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1779
1780 Undiag_reason = UD_VAL_MAXFME;
1781 defect = fmd_nvl_create_fault(hdl,
1782 undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1783 reason = undiag_2reason_str(Undiag_reason, NULL);
1784 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1785 FREE(reason);
1786 fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1787 fmd_case_solve(hdl, fmep->fmcase);
1788 Undiag_reason = UD_VAL_UNKNOWN;
1789 return;
1790 }
1791
1792 /* open a case */
1793 fmcase = fmd_case_open(hdl, NULL);
1794
1795 /* start a new FME */
1796 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1797 out(O_ALTFP|O_NONL, "[");
1798 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1799 out(O_ALTFP, " CANNOT DIAGNOSE]");
1800 return;
1801 }
1802
1803 Open_fme_count++;
1804
1805 init_fme_bufs(fmep);
1806
1807 out(O_ALTFP|O_NONL, "[");
1808 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1809 out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1810 fmd_case_uuid(hdl, fmep->fmcase));
1811
1812 ep = fmep->e0;
1813 ASSERT(ep != NULL);
1814
1815 /* note observation */
1816 fmep->ecurrent = ep;
1817 if (ep->count++ == 0) {
1818 /* link it into list of observations seen */
1819 ep->observations = fmep->observations;
1820 fmep->observations = ep;
1821 ep->nvp = evnv_dupnvl(nvl);
1822 serialize_observation(fmep, eventstring, ipp);
1823 } else {
1824 /* new payload overrides any previous */
1825 nvlist_free(ep->nvp);
1826 ep->nvp = evnv_dupnvl(nvl);
1827 }
1828
1829 stats_counter_bump(fmep->Rcount);
1830
1831 if (ffep) {
1832 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1833 fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1834 fmep->e0r = ffep;
1835 ep->ffep = ffep;
1836 }
1837
1838 /* give the diagnosis algorithm a shot at the new FME state */
1839 fme_eval(fmep, ffep);
1840 }
1841
1842 void
fme_status(int flags)1843 fme_status(int flags)
1844 {
1845 struct fme *fmep;
1846
1847 if (FMElist == NULL) {
1848 out(flags, "No fault management exercises underway.");
1849 return;
1850 }
1851
1852 for (fmep = FMElist; fmep; fmep = fmep->next)
1853 fme_print(flags, fmep);
1854 }
1855
1856 /*
1857 * "indent" routines used mostly for nicely formatted debug output, but also
1858 * for sanity checking for infinite recursion bugs.
1859 */
1860
1861 #define MAX_INDENT 1024
1862 static const char *indent_s[MAX_INDENT];
1863 static int current_indent;
1864
1865 static void
indent_push(const char * s)1866 indent_push(const char *s)
1867 {
1868 if (current_indent < MAX_INDENT)
1869 indent_s[current_indent++] = s;
1870 else
1871 out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1872 }
1873
1874 static void
indent_set(const char * s)1875 indent_set(const char *s)
1876 {
1877 current_indent = 0;
1878 indent_push(s);
1879 }
1880
1881 static void
indent_pop(void)1882 indent_pop(void)
1883 {
1884 if (current_indent > 0)
1885 current_indent--;
1886 else
1887 out(O_DIE, "recursion underflow");
1888 }
1889
1890 static void
indent(void)1891 indent(void)
1892 {
1893 int i;
1894 if (!Verbose)
1895 return;
1896 for (i = 0; i < current_indent; i++)
1897 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1898 }
1899
1900 #define SLNEW 1
1901 #define SLCHANGED 2
1902 #define SLWAIT 3
1903 #define SLDISPROVED 4
1904
1905 static void
print_suspects(int circumstance,struct fme * fmep)1906 print_suspects(int circumstance, struct fme *fmep)
1907 {
1908 struct event *ep;
1909
1910 out(O_ALTFP|O_NONL, "[");
1911 if (circumstance == SLCHANGED) {
1912 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1913 "suspect list:", fmep->id, fme_state2str(fmep->state));
1914 } else if (circumstance == SLWAIT) {
1915 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1916 fmep->timer);
1917 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1918 } else if (circumstance == SLDISPROVED) {
1919 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1920 } else {
1921 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1922 }
1923
1924 if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1925 out(O_ALTFP, "]");
1926 return;
1927 }
1928
1929 for (ep = fmep->suspects; ep; ep = ep->suspects) {
1930 out(O_ALTFP|O_NONL, " ");
1931 itree_pevent_brief(O_ALTFP|O_NONL, ep);
1932 }
1933 out(O_ALTFP, "]");
1934 }
1935
1936 static struct node *
eventprop_lookup(struct event * ep,const char * propname)1937 eventprop_lookup(struct event *ep, const char *propname)
1938 {
1939 return (lut_lookup(ep->props, (void *)propname, NULL));
1940 }
1941
1942 #define MAXDIGITIDX 23
1943 static char numbuf[MAXDIGITIDX + 1];
1944
1945 static int
node2uint(struct node * n,uint_t * valp)1946 node2uint(struct node *n, uint_t *valp)
1947 {
1948 struct evalue value;
1949 struct lut *globals = NULL;
1950
1951 if (n == NULL)
1952 return (1);
1953
1954 /*
1955 * check value.v since we are being asked to convert an unsigned
1956 * long long int to an unsigned int
1957 */
1958 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1959 value.t != UINT64 || value.v > (1ULL << 32))
1960 return (1);
1961
1962 *valp = (uint_t)value.v;
1963
1964 return (0);
1965 }
1966
1967 static nvlist_t *
node2fmri(struct node * n)1968 node2fmri(struct node *n)
1969 {
1970 nvlist_t **pa, *f, *p;
1971 struct node *nc;
1972 uint_t depth = 0;
1973 char *numstr, *nullbyte;
1974 char *failure;
1975 int err, i;
1976
1977 /* XXX do we need to be able to handle a non-T_NAME node? */
1978 if (n == NULL || n->t != T_NAME)
1979 return (NULL);
1980
1981 for (nc = n; nc != NULL; nc = nc->u.name.next) {
1982 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1983 break;
1984 depth++;
1985 }
1986
1987 if (nc != NULL) {
1988 /* We bailed early, something went wrong */
1989 return (NULL);
1990 }
1991
1992 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1993 out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1994 pa = alloca(depth * sizeof (nvlist_t *));
1995 for (i = 0; i < depth; i++)
1996 pa[i] = NULL;
1997
1998 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1999 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2000 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2001 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2002 if (err != 0) {
2003 failure = "basic construction of FMRI failed";
2004 goto boom;
2005 }
2006
2007 numbuf[MAXDIGITIDX] = '\0';
2008 nullbyte = &numbuf[MAXDIGITIDX];
2009 i = 0;
2010
2011 for (nc = n; nc != NULL; nc = nc->u.name.next) {
2012 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2013 if (err != 0) {
2014 failure = "alloc of an hc-pair failed";
2015 goto boom;
2016 }
2017 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2018 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2019 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2020 if (err != 0) {
2021 failure = "construction of an hc-pair failed";
2022 goto boom;
2023 }
2024 pa[i++] = p;
2025 }
2026
2027 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2028 if (err == 0) {
2029 for (i = 0; i < depth; i++)
2030 if (pa[i] != NULL)
2031 nvlist_free(pa[i]);
2032 return (f);
2033 }
2034 failure = "addition of hc-pair array to FMRI failed";
2035
2036 boom:
2037 for (i = 0; i < depth; i++)
2038 if (pa[i] != NULL)
2039 nvlist_free(pa[i]);
2040 nvlist_free(f);
2041 out(O_DIE, "%s", failure);
2042 /*NOTREACHED*/
2043 return (NULL);
2044 }
2045
2046 /* an ipath cache entry is an array of these, with s==NULL at the end */
2047 struct ipath {
2048 const char *s; /* component name (in stable) */
2049 int i; /* instance number */
2050 };
2051
2052 static nvlist_t *
ipath2fmri(struct ipath * ipath)2053 ipath2fmri(struct ipath *ipath)
2054 {
2055 nvlist_t **pa, *f, *p;
2056 uint_t depth = 0;
2057 char *numstr, *nullbyte;
2058 char *failure;
2059 int err, i;
2060 struct ipath *ipp;
2061
2062 for (ipp = ipath; ipp->s != NULL; ipp++)
2063 depth++;
2064
2065 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2066 out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2067 pa = alloca(depth * sizeof (nvlist_t *));
2068 for (i = 0; i < depth; i++)
2069 pa[i] = NULL;
2070
2071 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2072 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2073 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2074 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2075 if (err != 0) {
2076 failure = "basic construction of FMRI failed";
2077 goto boom;
2078 }
2079
2080 numbuf[MAXDIGITIDX] = '\0';
2081 nullbyte = &numbuf[MAXDIGITIDX];
2082 i = 0;
2083
2084 for (ipp = ipath; ipp->s != NULL; ipp++) {
2085 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2086 if (err != 0) {
2087 failure = "alloc of an hc-pair failed";
2088 goto boom;
2089 }
2090 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2091 numstr = ulltostr(ipp->i, nullbyte);
2092 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2093 if (err != 0) {
2094 failure = "construction of an hc-pair failed";
2095 goto boom;
2096 }
2097 pa[i++] = p;
2098 }
2099
2100 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2101 if (err == 0) {
2102 for (i = 0; i < depth; i++)
2103 if (pa[i] != NULL)
2104 nvlist_free(pa[i]);
2105 return (f);
2106 }
2107 failure = "addition of hc-pair array to FMRI failed";
2108
2109 boom:
2110 for (i = 0; i < depth; i++)
2111 if (pa[i] != NULL)
2112 nvlist_free(pa[i]);
2113 nvlist_free(f);
2114 out(O_DIE, "%s", failure);
2115 /*NOTREACHED*/
2116 return (NULL);
2117 }
2118
2119 static uint8_t
percentof(uint_t part,uint_t whole)2120 percentof(uint_t part, uint_t whole)
2121 {
2122 unsigned long long p = part * 1000;
2123
2124 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2125 }
2126
2127 struct rsl {
2128 struct event *suspect;
2129 nvlist_t *asru;
2130 nvlist_t *fru;
2131 nvlist_t *rsrc;
2132 };
2133
2134 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2135
2136 /*
2137 * rslfree -- free internal members of struct rsl not expected to be
2138 * freed elsewhere.
2139 */
2140 static void
rslfree(struct rsl * freeme)2141 rslfree(struct rsl *freeme)
2142 {
2143 if (freeme->asru != NULL)
2144 nvlist_free(freeme->asru);
2145 if (freeme->fru != NULL)
2146 nvlist_free(freeme->fru);
2147 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2148 nvlist_free(freeme->rsrc);
2149 }
2150
2151 /*
2152 * rslcmp -- compare two rsl structures. Use the following
2153 * comparisons to establish cardinality:
2154 *
2155 * 1. Name of the suspect's class. (simple strcmp)
2156 * 2. Name of the suspect's ASRU. (trickier, since nvlist)
2157 *
2158 */
2159 static int
rslcmp(const void * a,const void * b)2160 rslcmp(const void *a, const void *b)
2161 {
2162 struct rsl *r1 = (struct rsl *)a;
2163 struct rsl *r2 = (struct rsl *)b;
2164 int rv;
2165
2166 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2167 r2->suspect->enode->u.event.ename->u.name.s);
2168 if (rv != 0)
2169 return (rv);
2170
2171 if (r1->rsrc == NULL && r2->rsrc == NULL)
2172 return (0);
2173 if (r1->rsrc == NULL)
2174 return (-1);
2175 if (r2->rsrc == NULL)
2176 return (1);
2177 return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2178 }
2179
2180 /*
2181 * get_resources -- for a given suspect, determine what ASRU, FRU and
2182 * RSRC nvlists should be advertised in the final suspect list.
2183 */
2184 void
get_resources(struct event * sp,struct rsl * rsrcs,struct config * croot)2185 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2186 {
2187 struct node *asrudef, *frudef;
2188 nvlist_t *asru, *fru;
2189 nvlist_t *rsrc = NULL;
2190 char *pathstr;
2191
2192 /*
2193 * First find any ASRU and/or FRU defined in the
2194 * initial fault tree.
2195 */
2196 asrudef = eventprop_lookup(sp, L_ASRU);
2197 frudef = eventprop_lookup(sp, L_FRU);
2198
2199 /*
2200 * Create FMRIs based on those definitions
2201 */
2202 asru = node2fmri(asrudef);
2203 fru = node2fmri(frudef);
2204 pathstr = ipath2str(NULL, sp->ipp);
2205
2206 /*
2207 * Allow for platform translations of the FMRIs
2208 */
2209 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2210 pathstr);
2211
2212 FREE(pathstr);
2213 rsrcs->suspect = sp;
2214 rsrcs->asru = asru;
2215 rsrcs->fru = fru;
2216 rsrcs->rsrc = rsrc;
2217 }
2218
2219 /*
2220 * trim_suspects -- prior to publishing, we may need to remove some
2221 * suspects from the list. If we're auto-closing upsets, we don't
2222 * want any of those in the published list. If the ASRUs for multiple
2223 * defects resolve to the same ASRU (driver) we only want to publish
2224 * that as a single suspect.
2225 */
2226 static int
trim_suspects(struct fme * fmep,struct rsl * begin,struct rsl * begin2,fmd_event_t * ffep)2227 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2228 fmd_event_t *ffep)
2229 {
2230 struct event *ep;
2231 struct rsl *rp = begin;
2232 struct rsl *rp2 = begin2;
2233 int mess_zero_count = 0;
2234 int serd_rval;
2235 uint_t messval;
2236
2237 /* remove any unwanted upsets and populate our array */
2238 for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2239 if (is_upset(ep->t))
2240 continue;
2241 serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2242 NULL, NULL);
2243 if (serd_rval == 0)
2244 continue;
2245 if (node2uint(eventprop_lookup(ep, L_message),
2246 &messval) == 0 && messval == 0) {
2247 get_resources(ep, rp2, fmep->config);
2248 rp2++;
2249 mess_zero_count++;
2250 } else {
2251 get_resources(ep, rp, fmep->config);
2252 rp++;
2253 fmep->nsuspects++;
2254 }
2255 }
2256 return (mess_zero_count);
2257 }
2258
2259 /*
2260 * addpayloadprop -- add a payload prop to a problem
2261 */
2262 static void
addpayloadprop(const char * lhs,struct evalue * rhs,nvlist_t * fault)2263 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2264 {
2265 nvlist_t *rsrc, *hcs;
2266
2267 ASSERT(fault != NULL);
2268 ASSERT(lhs != NULL);
2269 ASSERT(rhs != NULL);
2270
2271 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2272 out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2273
2274 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2275 out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2276 if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2277 out(O_DIE,
2278 "cannot add payloadprop \"%s\" to fault", lhs);
2279 if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2280 out(O_DIE,
2281 "cannot add payloadprop \"%s\" to fault", lhs);
2282 nvlist_free(hcs);
2283 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2284 out(O_DIE,
2285 "cannot add payloadprop \"%s\" to fault", lhs);
2286 } else
2287 out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2288
2289 if (rhs->t == UINT64) {
2290 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2291
2292 if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2293 out(O_DIE,
2294 "cannot add payloadprop \"%s\" to fault", lhs);
2295 } else {
2296 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2297 lhs, (char *)(uintptr_t)rhs->v);
2298
2299 if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2300 out(O_DIE,
2301 "cannot add payloadprop \"%s\" to fault", lhs);
2302 }
2303 }
2304
2305 static char *Istatbuf;
2306 static char *Istatbufptr;
2307 static int Istatsz;
2308
2309 /*
2310 * istataddsize -- calculate size of istat and add it to Istatsz
2311 */
2312 /*ARGSUSED2*/
2313 static void
istataddsize(const struct istat_entry * lhs,struct stats * rhs,void * arg)2314 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2315 {
2316 int val;
2317
2318 ASSERT(lhs != NULL);
2319 ASSERT(rhs != NULL);
2320
2321 if ((val = stats_counter_value(rhs)) == 0)
2322 return; /* skip zero-valued stats */
2323
2324 /* count up the size of the stat name */
2325 Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2326 Istatsz++; /* for the trailing NULL byte */
2327
2328 /* count up the size of the stat value */
2329 Istatsz += snprintf(NULL, 0, "%d", val);
2330 Istatsz++; /* for the trailing NULL byte */
2331 }
2332
2333 /*
2334 * istat2str -- serialize an istat, writing result to *Istatbufptr
2335 */
2336 /*ARGSUSED2*/
2337 static void
istat2str(const struct istat_entry * lhs,struct stats * rhs,void * arg)2338 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2339 {
2340 char *str;
2341 int len;
2342 int val;
2343
2344 ASSERT(lhs != NULL);
2345 ASSERT(rhs != NULL);
2346
2347 if ((val = stats_counter_value(rhs)) == 0)
2348 return; /* skip zero-valued stats */
2349
2350 /* serialize the stat name */
2351 str = ipath2str(lhs->ename, lhs->ipath);
2352 len = strlen(str);
2353
2354 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2355 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2356 Istatbufptr += len;
2357 FREE(str);
2358 *Istatbufptr++ = '\0';
2359
2360 /* serialize the stat value */
2361 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2362 "%d", val);
2363 *Istatbufptr++ = '\0';
2364
2365 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2366 }
2367
2368 void
istat_save()2369 istat_save()
2370 {
2371 if (Istat_need_save == 0)
2372 return;
2373
2374 /* figure out how big the serialzed info is */
2375 Istatsz = 0;
2376 lut_walk(Istats, (lut_cb)istataddsize, NULL);
2377
2378 if (Istatsz == 0) {
2379 /* no stats to save */
2380 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2381 return;
2382 }
2383
2384 /* create the serialized buffer */
2385 Istatbufptr = Istatbuf = MALLOC(Istatsz);
2386 lut_walk(Istats, (lut_cb)istat2str, NULL);
2387
2388 /* clear out current saved stats */
2389 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2390
2391 /* write out the new version */
2392 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2393 FREE(Istatbuf);
2394
2395 Istat_need_save = 0;
2396 }
2397
2398 int
istat_cmp(struct istat_entry * ent1,struct istat_entry * ent2)2399 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2400 {
2401 if (ent1->ename != ent2->ename)
2402 return (ent2->ename - ent1->ename);
2403 if (ent1->ipath != ent2->ipath)
2404 return ((char *)ent2->ipath - (char *)ent1->ipath);
2405
2406 return (0);
2407 }
2408
2409 /*
2410 * istat-verify -- verify the component associated with a stat still exists
2411 *
2412 * if the component no longer exists, this routine resets the stat and
2413 * returns 0. if the component still exists, it returns 1.
2414 */
2415 static int
istat_verify(struct node * snp,struct istat_entry * entp)2416 istat_verify(struct node *snp, struct istat_entry *entp)
2417 {
2418 struct stats *statp;
2419 nvlist_t *fmri;
2420
2421 fmri = node2fmri(snp->u.event.epname);
2422 if (platform_path_exists(fmri)) {
2423 nvlist_free(fmri);
2424 return (1);
2425 }
2426 nvlist_free(fmri);
2427
2428 /* component no longer in system. zero out the associated stats */
2429 if ((statp = (struct stats *)
2430 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2431 stats_counter_value(statp) == 0)
2432 return (0); /* stat is already reset */
2433
2434 Istat_need_save = 1;
2435 stats_counter_reset(statp);
2436 return (0);
2437 }
2438
2439 static void
istat_bump(struct node * snp,int n)2440 istat_bump(struct node *snp, int n)
2441 {
2442 struct stats *statp;
2443 struct istat_entry ent;
2444
2445 ASSERT(snp != NULL);
2446 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2447 ASSERT(snp->u.event.epname != NULL);
2448
2449 /* class name should be hoisted into a single stable entry */
2450 ASSERT(snp->u.event.ename->u.name.next == NULL);
2451 ent.ename = snp->u.event.ename->u.name.s;
2452 ent.ipath = ipath(snp->u.event.epname);
2453
2454 if (!istat_verify(snp, &ent)) {
2455 /* component no longer exists in system, nothing to do */
2456 return;
2457 }
2458
2459 if ((statp = (struct stats *)
2460 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2461 /* need to create the counter */
2462 int cnt = 0;
2463 struct node *np;
2464 char *sname;
2465 char *snamep;
2466 struct istat_entry *newentp;
2467
2468 /* count up the size of the stat name */
2469 np = snp->u.event.ename;
2470 while (np != NULL) {
2471 cnt += strlen(np->u.name.s);
2472 cnt++; /* for the '.' or '@' */
2473 np = np->u.name.next;
2474 }
2475 np = snp->u.event.epname;
2476 while (np != NULL) {
2477 cnt += snprintf(NULL, 0, "%s%llu",
2478 np->u.name.s, np->u.name.child->u.ull);
2479 cnt++; /* for the '/' or trailing NULL byte */
2480 np = np->u.name.next;
2481 }
2482
2483 /* build the stat name */
2484 snamep = sname = alloca(cnt);
2485 np = snp->u.event.ename;
2486 while (np != NULL) {
2487 snamep += snprintf(snamep, &sname[cnt] - snamep,
2488 "%s", np->u.name.s);
2489 np = np->u.name.next;
2490 if (np)
2491 *snamep++ = '.';
2492 }
2493 *snamep++ = '@';
2494 np = snp->u.event.epname;
2495 while (np != NULL) {
2496 snamep += snprintf(snamep, &sname[cnt] - snamep,
2497 "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2498 np = np->u.name.next;
2499 if (np)
2500 *snamep++ = '/';
2501 }
2502 *snamep++ = '\0';
2503
2504 /* create the new stat & add it to our list */
2505 newentp = MALLOC(sizeof (*newentp));
2506 *newentp = ent;
2507 statp = stats_new_counter(NULL, sname, 0);
2508 Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2509 (lut_cmp)istat_cmp);
2510 }
2511
2512 /* if n is non-zero, set that value instead of bumping */
2513 if (n) {
2514 stats_counter_reset(statp);
2515 stats_counter_add(statp, n);
2516 } else
2517 stats_counter_bump(statp);
2518 Istat_need_save = 1;
2519
2520 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2521 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2522 stats_counter_value(statp));
2523 }
2524
2525 /*ARGSUSED*/
2526 static void
istat_destructor(void * left,void * right,void * arg)2527 istat_destructor(void *left, void *right, void *arg)
2528 {
2529 struct istat_entry *entp = (struct istat_entry *)left;
2530 struct stats *statp = (struct stats *)right;
2531 FREE(entp);
2532 stats_delete(statp);
2533 }
2534
2535 /*
2536 * Callback used in a walk of the Istats to reset matching stat counters.
2537 */
2538 static void
istat_counter_reset_cb(struct istat_entry * entp,struct stats * statp,const struct ipath * ipp)2539 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2540 const struct ipath *ipp)
2541 {
2542 char *path;
2543
2544 if (entp->ipath == ipp) {
2545 path = ipath2str(entp->ename, ipp);
2546 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2547 FREE(path);
2548 stats_counter_reset(statp);
2549 Istat_need_save = 1;
2550 }
2551 }
2552
2553 /*ARGSUSED*/
2554 static void
istat_counter_topo_chg_cb(struct istat_entry * entp,struct stats * statp,void * unused)2555 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2556 void *unused)
2557 {
2558 char *path;
2559 nvlist_t *fmri;
2560
2561 fmri = ipath2fmri((struct ipath *)(entp->ipath));
2562 if (!platform_path_exists(fmri)) {
2563 path = ipath2str(entp->ename, entp->ipath);
2564 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2565 FREE(path);
2566 stats_counter_reset(statp);
2567 Istat_need_save = 1;
2568 }
2569 nvlist_free(fmri);
2570 }
2571
2572 void
istat_fini(void)2573 istat_fini(void)
2574 {
2575 lut_free(Istats, istat_destructor, NULL);
2576 }
2577
2578 static char *Serdbuf;
2579 static char *Serdbufptr;
2580 static int Serdsz;
2581
2582 /*
2583 * serdaddsize -- calculate size of serd and add it to Serdsz
2584 */
2585 /*ARGSUSED*/
2586 static void
serdaddsize(const struct serd_entry * lhs,struct stats * rhs,void * arg)2587 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2588 {
2589 ASSERT(lhs != NULL);
2590
2591 /* count up the size of the stat name */
2592 Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2593 Serdsz++; /* for the trailing NULL byte */
2594 }
2595
2596 /*
2597 * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2598 */
2599 /*ARGSUSED*/
2600 static void
serd2str(const struct serd_entry * lhs,struct stats * rhs,void * arg)2601 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2602 {
2603 char *str;
2604 int len;
2605
2606 ASSERT(lhs != NULL);
2607
2608 /* serialize the serd engine name */
2609 str = ipath2str(lhs->ename, lhs->ipath);
2610 len = strlen(str);
2611
2612 ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2613 (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2614 Serdbufptr += len;
2615 FREE(str);
2616 *Serdbufptr++ = '\0';
2617 ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2618 }
2619
2620 void
serd_save()2621 serd_save()
2622 {
2623 if (Serd_need_save == 0)
2624 return;
2625
2626 /* figure out how big the serialzed info is */
2627 Serdsz = 0;
2628 lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2629
2630 if (Serdsz == 0) {
2631 /* no serd engines to save */
2632 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2633 return;
2634 }
2635
2636 /* create the serialized buffer */
2637 Serdbufptr = Serdbuf = MALLOC(Serdsz);
2638 lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2639
2640 /* clear out current saved stats */
2641 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2642
2643 /* write out the new version */
2644 fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2645 FREE(Serdbuf);
2646 Serd_need_save = 0;
2647 }
2648
2649 int
serd_cmp(struct serd_entry * ent1,struct serd_entry * ent2)2650 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2651 {
2652 if (ent1->ename != ent2->ename)
2653 return (ent2->ename - ent1->ename);
2654 if (ent1->ipath != ent2->ipath)
2655 return ((char *)ent2->ipath - (char *)ent1->ipath);
2656
2657 return (0);
2658 }
2659
2660 void
fme_serd_load(fmd_hdl_t * hdl)2661 fme_serd_load(fmd_hdl_t *hdl)
2662 {
2663 int sz;
2664 char *sbuf;
2665 char *sepptr;
2666 char *ptr;
2667 struct serd_entry *newentp;
2668 struct node *epname;
2669 nvlist_t *fmri;
2670 char *namestring;
2671
2672 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2673 return;
2674 sbuf = alloca(sz);
2675 fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2676 ptr = sbuf;
2677 while (ptr < &sbuf[sz]) {
2678 sepptr = strchr(ptr, '@');
2679 *sepptr = '\0';
2680 namestring = ptr;
2681 sepptr++;
2682 ptr = sepptr;
2683 ptr += strlen(ptr);
2684 ptr++; /* move past the '\0' separating paths */
2685 epname = pathstring2epnamenp(sepptr);
2686 fmri = node2fmri(epname);
2687 if (platform_path_exists(fmri)) {
2688 newentp = MALLOC(sizeof (*newentp));
2689 newentp->hdl = hdl;
2690 newentp->ipath = ipath(epname);
2691 newentp->ename = stable(namestring);
2692 SerdEngines = lut_add(SerdEngines, (void *)newentp,
2693 (void *)newentp, (lut_cmp)serd_cmp);
2694 } else
2695 Serd_need_save = 1;
2696 tree_free(epname);
2697 nvlist_free(fmri);
2698 }
2699 /* save it back again in case some of the paths no longer exist */
2700 serd_save();
2701 }
2702
2703 /*ARGSUSED*/
2704 static void
serd_destructor(void * left,void * right,void * arg)2705 serd_destructor(void *left, void *right, void *arg)
2706 {
2707 struct serd_entry *entp = (struct serd_entry *)left;
2708 FREE(entp);
2709 }
2710
2711 /*
2712 * Callback used in a walk of the SerdEngines to reset matching serd engines.
2713 */
2714 /*ARGSUSED*/
2715 static void
serd_reset_cb(struct serd_entry * entp,void * unused,const struct ipath * ipp)2716 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2717 {
2718 char *path;
2719
2720 if (entp->ipath == ipp) {
2721 path = ipath2str(entp->ename, ipp);
2722 out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2723 fmd_serd_reset(entp->hdl, path);
2724 FREE(path);
2725 Serd_need_save = 1;
2726 }
2727 }
2728
2729 /*ARGSUSED*/
2730 static void
serd_topo_chg_cb(struct serd_entry * entp,void * unused,void * unused2)2731 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2732 {
2733 char *path;
2734 nvlist_t *fmri;
2735
2736 fmri = ipath2fmri((struct ipath *)(entp->ipath));
2737 if (!platform_path_exists(fmri)) {
2738 path = ipath2str(entp->ename, entp->ipath);
2739 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2740 fmd_serd_reset(entp->hdl, path);
2741 FREE(path);
2742 Serd_need_save = 1;
2743 }
2744 nvlist_free(fmri);
2745 }
2746
2747 void
serd_fini(void)2748 serd_fini(void)
2749 {
2750 lut_free(SerdEngines, serd_destructor, NULL);
2751 }
2752
2753 static void
publish_suspects(struct fme * fmep,struct rsl * srl)2754 publish_suspects(struct fme *fmep, struct rsl *srl)
2755 {
2756 struct rsl *rp;
2757 nvlist_t *fault;
2758 uint8_t cert;
2759 uint_t *frs;
2760 uint_t frsum, fr;
2761 uint_t messval;
2762 uint_t retireval;
2763 uint_t responseval;
2764 struct node *snp;
2765 int frcnt, fridx;
2766 boolean_t allfaulty = B_TRUE;
2767 struct rsl *erl = srl + fmep->nsuspects - 1;
2768
2769 /*
2770 * sort the array
2771 */
2772 qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2773
2774 /* sum the fitrates */
2775 frs = alloca(fmep->nsuspects * sizeof (uint_t));
2776 fridx = frcnt = frsum = 0;
2777
2778 for (rp = srl; rp <= erl; rp++) {
2779 struct node *n;
2780
2781 n = eventprop_lookup(rp->suspect, L_FITrate);
2782 if (node2uint(n, &fr) != 0) {
2783 out(O_DEBUG|O_NONL, "event ");
2784 ipath_print(O_DEBUG|O_NONL,
2785 rp->suspect->enode->u.event.ename->u.name.s,
2786 rp->suspect->ipp);
2787 out(O_VERB, " has no FITrate (using 1)");
2788 fr = 1;
2789 } else if (fr == 0) {
2790 out(O_DEBUG|O_NONL, "event ");
2791 ipath_print(O_DEBUG|O_NONL,
2792 rp->suspect->enode->u.event.ename->u.name.s,
2793 rp->suspect->ipp);
2794 out(O_VERB, " has zero FITrate (using 1)");
2795 fr = 1;
2796 }
2797
2798 frs[fridx++] = fr;
2799 frsum += fr;
2800 frcnt++;
2801 }
2802
2803 /* Add them in reverse order of our sort, as fmd reverses order */
2804 for (rp = erl; rp >= srl; rp--) {
2805 cert = percentof(frs[--fridx], frsum);
2806 fault = fmd_nvl_create_fault(fmep->hdl,
2807 rp->suspect->enode->u.event.ename->u.name.s,
2808 cert,
2809 rp->asru,
2810 rp->fru,
2811 rp->rsrc);
2812 if (fault == NULL)
2813 out(O_DIE, "fault creation failed");
2814 /* if "message" property exists, add it to the fault */
2815 if (node2uint(eventprop_lookup(rp->suspect, L_message),
2816 &messval) == 0) {
2817
2818 out(O_ALTFP,
2819 "[FME%d, %s adds message=%d to suspect list]",
2820 fmep->id,
2821 rp->suspect->enode->u.event.ename->u.name.s,
2822 messval);
2823 if (nvlist_add_boolean_value(fault,
2824 FM_SUSPECT_MESSAGE,
2825 (messval) ? B_TRUE : B_FALSE) != 0) {
2826 out(O_DIE, "cannot add no-message to fault");
2827 }
2828 }
2829
2830 /* if "retire" property exists, add it to the fault */
2831 if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2832 &retireval) == 0) {
2833
2834 out(O_ALTFP,
2835 "[FME%d, %s adds retire=%d to suspect list]",
2836 fmep->id,
2837 rp->suspect->enode->u.event.ename->u.name.s,
2838 retireval);
2839 if (nvlist_add_boolean_value(fault,
2840 FM_SUSPECT_RETIRE,
2841 (retireval) ? B_TRUE : B_FALSE) != 0) {
2842 out(O_DIE, "cannot add no-retire to fault");
2843 }
2844 }
2845
2846 /* if "response" property exists, add it to the fault */
2847 if (node2uint(eventprop_lookup(rp->suspect, L_response),
2848 &responseval) == 0) {
2849
2850 out(O_ALTFP,
2851 "[FME%d, %s adds response=%d to suspect list]",
2852 fmep->id,
2853 rp->suspect->enode->u.event.ename->u.name.s,
2854 responseval);
2855 if (nvlist_add_boolean_value(fault,
2856 FM_SUSPECT_RESPONSE,
2857 (responseval) ? B_TRUE : B_FALSE) != 0) {
2858 out(O_DIE, "cannot add no-response to fault");
2859 }
2860 }
2861
2862 /* add any payload properties */
2863 lut_walk(rp->suspect->payloadprops,
2864 (lut_cb)addpayloadprop, (void *)fault);
2865 rslfree(rp);
2866
2867 /*
2868 * If "action" property exists, evaluate it; this must be done
2869 * before the allfaulty check below since some actions may
2870 * modify the asru to be used in fmd_nvl_fmri_has_fault. This
2871 * needs to be restructured if any new actions are introduced
2872 * that have effects that we do not want to be visible if
2873 * we decide not to publish in the dupclose check below.
2874 */
2875 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2876 struct evalue evalue;
2877
2878 out(O_ALTFP|O_NONL,
2879 "[FME%d, %s action ", fmep->id,
2880 rp->suspect->enode->u.event.ename->u.name.s);
2881 ptree_name_iter(O_ALTFP|O_NONL, snp);
2882 out(O_ALTFP, "]");
2883 Action_nvl = fault;
2884 (void) eval_expr(snp, NULL, NULL, NULL, NULL,
2885 NULL, 0, &evalue);
2886 }
2887
2888 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2889
2890 /*
2891 * check if the asru is already marked as "faulty".
2892 */
2893 if (allfaulty) {
2894 nvlist_t *asru;
2895
2896 out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2897 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2898 out(O_ALTFP|O_VERB|O_NONL, " ");
2899 if (nvlist_lookup_nvlist(fault,
2900 FM_FAULT_ASRU, &asru) != 0) {
2901 out(O_ALTFP|O_VERB, "NULL asru");
2902 allfaulty = B_FALSE;
2903 } else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2904 FMD_HAS_FAULT_ASRU, NULL)) {
2905 out(O_ALTFP|O_VERB, "faulty");
2906 } else {
2907 out(O_ALTFP|O_VERB, "not faulty");
2908 allfaulty = B_FALSE;
2909 }
2910 }
2911
2912 }
2913
2914 if (!allfaulty) {
2915 /*
2916 * don't update the count stat if all asrus are already
2917 * present and unrepaired in the asru cache
2918 */
2919 for (rp = erl; rp >= srl; rp--) {
2920 struct event *suspect = rp->suspect;
2921
2922 if (suspect == NULL)
2923 continue;
2924
2925 /* if "count" exists, increment the appropriate stat */
2926 if ((snp = eventprop_lookup(suspect,
2927 L_count)) != NULL) {
2928 out(O_ALTFP|O_NONL,
2929 "[FME%d, %s count ", fmep->id,
2930 suspect->enode->u.event.ename->u.name.s);
2931 ptree_name_iter(O_ALTFP|O_NONL, snp);
2932 out(O_ALTFP, "]");
2933 istat_bump(snp, 0);
2934
2935 }
2936 }
2937 istat_save(); /* write out any istat changes */
2938 }
2939 }
2940
2941 static const char *
undiag_2defect_str(int ud)2942 undiag_2defect_str(int ud)
2943 {
2944 switch (ud) {
2945 case UD_VAL_MISSINGINFO:
2946 case UD_VAL_MISSINGOBS:
2947 case UD_VAL_MISSINGPATH:
2948 case UD_VAL_MISSINGZERO:
2949 case UD_VAL_BADOBS:
2950 case UD_VAL_CFGMISMATCH:
2951 return (UNDIAG_DEFECT_CHKPT);
2952 break;
2953
2954 case UD_VAL_BADEVENTI:
2955 case UD_VAL_BADEVENTPATH:
2956 case UD_VAL_BADEVENTCLASS:
2957 case UD_VAL_INSTFAIL:
2958 case UD_VAL_NOPATH:
2959 case UD_VAL_UNSOLVD:
2960 return (UNDIAG_DEFECT_FME);
2961 break;
2962
2963 case UD_VAL_MAXFME:
2964 return (UNDIAG_DEFECT_LIMIT);
2965 break;
2966
2967 case UD_VAL_UNKNOWN:
2968 default:
2969 return (UNDIAG_DEFECT_UNKNOWN);
2970 break;
2971 }
2972 }
2973
2974 static const char *
undiag_2fault_str(int ud)2975 undiag_2fault_str(int ud)
2976 {
2977 switch (ud) {
2978 case UD_VAL_BADEVENTI:
2979 case UD_VAL_BADEVENTPATH:
2980 case UD_VAL_BADEVENTCLASS:
2981 case UD_VAL_INSTFAIL:
2982 case UD_VAL_NOPATH:
2983 case UD_VAL_UNSOLVD:
2984 return (UNDIAG_FAULT_FME);
2985 default:
2986 return (NULL);
2987 }
2988 }
2989
2990 static char *
undiag_2reason_str(int ud,char * arg)2991 undiag_2reason_str(int ud, char *arg)
2992 {
2993 const char *ptr;
2994 char *buf;
2995 int with_arg = 0;
2996
2997 switch (ud) {
2998 case UD_VAL_BADEVENTPATH:
2999 ptr = UD_STR_BADEVENTPATH;
3000 with_arg = 1;
3001 break;
3002 case UD_VAL_BADEVENTCLASS:
3003 ptr = UD_STR_BADEVENTCLASS;
3004 with_arg = 1;
3005 break;
3006 case UD_VAL_BADEVENTI:
3007 ptr = UD_STR_BADEVENTI;
3008 with_arg = 1;
3009 break;
3010 case UD_VAL_BADOBS:
3011 ptr = UD_STR_BADOBS;
3012 break;
3013 case UD_VAL_CFGMISMATCH:
3014 ptr = UD_STR_CFGMISMATCH;
3015 break;
3016 case UD_VAL_INSTFAIL:
3017 ptr = UD_STR_INSTFAIL;
3018 with_arg = 1;
3019 break;
3020 case UD_VAL_MAXFME:
3021 ptr = UD_STR_MAXFME;
3022 break;
3023 case UD_VAL_MISSINGINFO:
3024 ptr = UD_STR_MISSINGINFO;
3025 break;
3026 case UD_VAL_MISSINGOBS:
3027 ptr = UD_STR_MISSINGOBS;
3028 break;
3029 case UD_VAL_MISSINGPATH:
3030 ptr = UD_STR_MISSINGPATH;
3031 break;
3032 case UD_VAL_MISSINGZERO:
3033 ptr = UD_STR_MISSINGZERO;
3034 break;
3035 case UD_VAL_NOPATH:
3036 ptr = UD_STR_NOPATH;
3037 with_arg = 1;
3038 break;
3039 case UD_VAL_UNSOLVD:
3040 ptr = UD_STR_UNSOLVD;
3041 break;
3042 case UD_VAL_UNKNOWN:
3043 default:
3044 ptr = UD_STR_UNKNOWN;
3045 break;
3046 }
3047 if (with_arg) {
3048 buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3049 (void) sprintf(buf, ptr, arg);
3050 } else {
3051 buf = MALLOC(strlen(ptr) + 1);
3052 (void) sprintf(buf, ptr);
3053 }
3054 return (buf);
3055 }
3056
3057 static void
publish_undiagnosable(fmd_hdl_t * hdl,fmd_event_t * ffep,fmd_case_t * fmcase,nvlist_t * detector,char * arg)3058 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3059 nvlist_t *detector, char *arg)
3060 {
3061 struct case_list *newcase;
3062 nvlist_t *defect, *fault;
3063 const char *faultstr;
3064 char *reason = undiag_2reason_str(Undiag_reason, arg);
3065
3066 out(O_ALTFP,
3067 "[undiagnosable ereport received, "
3068 "creating and closing a new case (%s)]", reason);
3069
3070 newcase = MALLOC(sizeof (struct case_list));
3071 newcase->next = NULL;
3072 newcase->fmcase = fmcase;
3073 if (Undiagablecaselist != NULL)
3074 newcase->next = Undiagablecaselist;
3075 Undiagablecaselist = newcase;
3076
3077 if (ffep != NULL)
3078 fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3079
3080 /* add defect */
3081 defect = fmd_nvl_create_fault(hdl,
3082 undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3083 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3084 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3085 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3086 fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3087
3088 /* add fault if appropriate */
3089 faultstr = undiag_2fault_str(Undiag_reason);
3090 if (faultstr != NULL) {
3091 fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3092 detector);
3093 (void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3094 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3095 B_FALSE);
3096 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3097 B_FALSE);
3098 fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3099 }
3100 FREE(reason);
3101
3102 /* solve and close case */
3103 fmd_case_solve(hdl, newcase->fmcase);
3104 fmd_case_close(hdl, newcase->fmcase);
3105 Undiag_reason = UD_VAL_UNKNOWN;
3106 }
3107
3108 static void
fme_undiagnosable(struct fme * f)3109 fme_undiagnosable(struct fme *f)
3110 {
3111 nvlist_t *defect, *fault, *detector = NULL;
3112 struct event *ep;
3113 char *pathstr;
3114 const char *faultstr;
3115 char *reason = undiag_2reason_str(Undiag_reason, NULL);
3116
3117 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3118 f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3119
3120 for (ep = f->observations; ep; ep = ep->observations) {
3121
3122 if (ep->ffep != f->e0r)
3123 fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3124
3125 pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3126 platform_units_translate(0, f->config, NULL, NULL, &detector,
3127 pathstr);
3128 FREE(pathstr);
3129
3130 /* add defect */
3131 defect = fmd_nvl_create_fault(f->hdl,
3132 undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3133 NULL, NULL, detector);
3134 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3135 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3136 B_FALSE);
3137 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3138 B_FALSE);
3139 fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3140
3141 /* add fault if appropriate */
3142 faultstr = undiag_2fault_str(Undiag_reason);
3143 if (faultstr == NULL)
3144 continue;
3145 fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3146 NULL, NULL, detector);
3147 (void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3148 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3149 B_FALSE);
3150 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3151 B_FALSE);
3152 fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3153 nvlist_free(detector);
3154 }
3155 FREE(reason);
3156 fmd_case_solve(f->hdl, f->fmcase);
3157 fmd_case_close(f->hdl, f->fmcase);
3158 Undiag_reason = UD_VAL_UNKNOWN;
3159 }
3160
3161 /*
3162 * fme_close_case
3163 *
3164 * Find the requested case amongst our fmes and close it. Free up
3165 * the related fme.
3166 */
3167 void
fme_close_case(fmd_hdl_t * hdl,fmd_case_t * fmcase)3168 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3169 {
3170 struct case_list *ucasep, *prevcasep = NULL;
3171 struct fme *prev = NULL;
3172 struct fme *fmep;
3173
3174 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3175 if (fmcase != ucasep->fmcase) {
3176 prevcasep = ucasep;
3177 continue;
3178 }
3179
3180 if (prevcasep == NULL)
3181 Undiagablecaselist = Undiagablecaselist->next;
3182 else
3183 prevcasep->next = ucasep->next;
3184
3185 FREE(ucasep);
3186 return;
3187 }
3188
3189 for (fmep = FMElist; fmep; fmep = fmep->next) {
3190 if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3191 break;
3192 prev = fmep;
3193 }
3194
3195 if (fmep == NULL) {
3196 out(O_WARN, "Eft asked to close unrecognized case [%s].",
3197 fmd_case_uuid(hdl, fmcase));
3198 return;
3199 }
3200
3201 if (EFMElist == fmep)
3202 EFMElist = prev;
3203
3204 if (prev == NULL)
3205 FMElist = FMElist->next;
3206 else
3207 prev->next = fmep->next;
3208
3209 fmep->next = NULL;
3210
3211 /* Get rid of any timer this fme has set */
3212 if (fmep->wull != 0)
3213 fmd_timer_remove(fmep->hdl, fmep->timer);
3214
3215 if (ClosedFMEs == NULL) {
3216 ClosedFMEs = fmep;
3217 } else {
3218 fmep->next = ClosedFMEs;
3219 ClosedFMEs = fmep;
3220 }
3221
3222 Open_fme_count--;
3223
3224 /* See if we can close the overflow FME */
3225 if (Open_fme_count <= Max_fme) {
3226 for (fmep = FMElist; fmep; fmep = fmep->next) {
3227 if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3228 fmep->fmcase)))
3229 break;
3230 }
3231
3232 if (fmep != NULL)
3233 fmd_case_close(fmep->hdl, fmep->fmcase);
3234 }
3235 }
3236
3237 /*
3238 * fme_set_timer()
3239 * If the time we need to wait for the given FME is less than the
3240 * current timer, kick that old timer out and establish a new one.
3241 */
3242 static int
fme_set_timer(struct fme * fmep,unsigned long long wull)3243 fme_set_timer(struct fme *fmep, unsigned long long wull)
3244 {
3245 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3246 ptree_timeval(O_ALTFP|O_VERB, &wull);
3247
3248 if (wull <= fmep->pull) {
3249 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3250 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3251 out(O_ALTFP|O_VERB, NULL);
3252 /* we've waited at least wull already, don't need timer */
3253 return (0);
3254 }
3255
3256 out(O_ALTFP|O_VERB|O_NONL, " currently ");
3257 if (fmep->wull != 0) {
3258 out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3259 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3260 out(O_ALTFP|O_VERB, NULL);
3261 } else {
3262 out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3263 out(O_ALTFP|O_VERB, NULL);
3264 }
3265
3266 if (fmep->wull != 0)
3267 if (wull >= fmep->wull)
3268 /* New timer would fire later than established timer */
3269 return (0);
3270
3271 if (fmep->wull != 0) {
3272 fmd_timer_remove(fmep->hdl, fmep->timer);
3273 }
3274
3275 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3276 fmep->e0r, wull);
3277 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3278 fmep->wull = wull;
3279 return (1);
3280 }
3281
3282 void
fme_timer_fired(struct fme * fmep,id_t tid)3283 fme_timer_fired(struct fme *fmep, id_t tid)
3284 {
3285 struct fme *ffmep = NULL;
3286
3287 for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3288 if (ffmep == fmep)
3289 break;
3290
3291 if (ffmep == NULL) {
3292 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3293 (void *)fmep);
3294 return;
3295 }
3296
3297 out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3298 fmep->pull = fmep->wull;
3299 fmep->wull = 0;
3300 fmd_buf_write(fmep->hdl, fmep->fmcase,
3301 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3302
3303 fme_eval(fmep, fmep->e0r);
3304 }
3305
3306 /*
3307 * Preserve the fme's suspect list in its psuspects list, NULLing the
3308 * suspects list in the meantime.
3309 */
3310 static void
save_suspects(struct fme * fmep)3311 save_suspects(struct fme *fmep)
3312 {
3313 struct event *ep;
3314 struct event *nextep;
3315
3316 /* zero out the previous suspect list */
3317 for (ep = fmep->psuspects; ep; ep = nextep) {
3318 nextep = ep->psuspects;
3319 ep->psuspects = NULL;
3320 }
3321 fmep->psuspects = NULL;
3322
3323 /* zero out the suspect list, copying it to previous suspect list */
3324 fmep->psuspects = fmep->suspects;
3325 for (ep = fmep->suspects; ep; ep = nextep) {
3326 nextep = ep->suspects;
3327 ep->psuspects = ep->suspects;
3328 ep->suspects = NULL;
3329 ep->is_suspect = 0;
3330 }
3331 fmep->suspects = NULL;
3332 fmep->nsuspects = 0;
3333 }
3334
3335 /*
3336 * Retrieve the fme's suspect list from its psuspects list.
3337 */
3338 static void
restore_suspects(struct fme * fmep)3339 restore_suspects(struct fme *fmep)
3340 {
3341 struct event *ep;
3342 struct event *nextep;
3343
3344 fmep->nsuspects = 0;
3345 fmep->suspects = fmep->psuspects;
3346 for (ep = fmep->psuspects; ep; ep = nextep) {
3347 fmep->nsuspects++;
3348 nextep = ep->psuspects;
3349 ep->suspects = ep->psuspects;
3350 }
3351 }
3352
3353 /*
3354 * this is what we use to call the Emrys prototype code instead of main()
3355 */
3356 static void
fme_eval(struct fme * fmep,fmd_event_t * ffep)3357 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3358 {
3359 struct event *ep;
3360 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3361 struct rsl *srl = NULL;
3362 struct rsl *srl2 = NULL;
3363 int mess_zero_count;
3364 int rpcnt;
3365
3366 save_suspects(fmep);
3367
3368 out(O_ALTFP, "Evaluate FME %d", fmep->id);
3369 indent_set(" ");
3370
3371 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3372 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3373
3374 out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3375 fme_state2str(fmep->state));
3376 for (ep = fmep->suspects; ep; ep = ep->suspects) {
3377 out(O_ALTFP|O_NONL, " ");
3378 itree_pevent_brief(O_ALTFP|O_NONL, ep);
3379 }
3380 out(O_ALTFP, NULL);
3381
3382 switch (fmep->state) {
3383 case FME_CREDIBLE:
3384 print_suspects(SLNEW, fmep);
3385 (void) upsets_eval(fmep, ffep);
3386
3387 /*
3388 * we may have already posted suspects in upsets_eval() which
3389 * can recurse into fme_eval() again. If so then just return.
3390 */
3391 if (fmep->posted_suspects)
3392 return;
3393
3394 stats_counter_bump(fmep->diags);
3395 rpcnt = fmep->nsuspects;
3396 save_suspects(fmep);
3397
3398 /*
3399 * create two lists, one for "message=1" faults and one for
3400 * "message=0" faults. If we have a mixture we will generate
3401 * two separate suspect lists.
3402 */
3403 srl = MALLOC(rpcnt * sizeof (struct rsl));
3404 bzero(srl, rpcnt * sizeof (struct rsl));
3405 srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3406 bzero(srl2, rpcnt * sizeof (struct rsl));
3407 mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3408
3409 /*
3410 * If the resulting suspect list has no members, we're
3411 * done so simply close the case. Otherwise sort and publish.
3412 */
3413 if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3414 out(O_ALTFP,
3415 "[FME%d, case %s (all suspects are upsets)]",
3416 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3417 fmd_case_close(fmep->hdl, fmep->fmcase);
3418 } else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3419 publish_suspects(fmep, srl);
3420 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3421 fmd_case_uuid(fmep->hdl, fmep->fmcase));
3422 fmd_case_solve(fmep->hdl, fmep->fmcase);
3423 } else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3424 fmep->nsuspects = mess_zero_count;
3425 publish_suspects(fmep, srl2);
3426 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3427 fmd_case_uuid(fmep->hdl, fmep->fmcase));
3428 fmd_case_solve(fmep->hdl, fmep->fmcase);
3429 } else {
3430 struct event *obsp;
3431 struct fme *nfmep;
3432
3433 publish_suspects(fmep, srl);
3434 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3435 fmd_case_uuid(fmep->hdl, fmep->fmcase));
3436 fmd_case_solve(fmep->hdl, fmep->fmcase);
3437
3438 /*
3439 * Got both message=0 and message=1 so create a
3440 * duplicate case. Also need a temporary duplicate fme
3441 * structure for use by publish_suspects().
3442 */
3443 nfmep = alloc_fme();
3444 nfmep->id = Nextid++;
3445 nfmep->hdl = fmep->hdl;
3446 nfmep->nsuspects = mess_zero_count;
3447 nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3448 out(O_ALTFP|O_STAMP,
3449 "[creating parallel FME%d, case %s]", nfmep->id,
3450 fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3451 Open_fme_count++;
3452 if (ffep) {
3453 fmd_case_setprincipal(nfmep->hdl,
3454 nfmep->fmcase, ffep);
3455 fmd_case_add_ereport(nfmep->hdl,
3456 nfmep->fmcase, ffep);
3457 }
3458 for (obsp = fmep->observations; obsp;
3459 obsp = obsp->observations)
3460 if (obsp->ffep && obsp->ffep != ffep)
3461 fmd_case_add_ereport(nfmep->hdl,
3462 nfmep->fmcase, obsp->ffep);
3463
3464 publish_suspects(nfmep, srl2);
3465 out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3466 fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3467 fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3468 FREE(nfmep);
3469 }
3470 FREE(srl);
3471 FREE(srl2);
3472 restore_suspects(fmep);
3473
3474 fmep->posted_suspects = 1;
3475 fmd_buf_write(fmep->hdl, fmep->fmcase,
3476 WOBUF_POSTD,
3477 (void *)&fmep->posted_suspects,
3478 sizeof (fmep->posted_suspects));
3479
3480 /*
3481 * Now the suspects have been posted, we can clear up
3482 * the instance tree as we won't be looking at it again.
3483 * Also cancel the timer as the case is now solved.
3484 */
3485 if (fmep->wull != 0) {
3486 fmd_timer_remove(fmep->hdl, fmep->timer);
3487 fmep->wull = 0;
3488 }
3489 break;
3490
3491 case FME_WAIT:
3492 ASSERT(my_delay > fmep->ull);
3493 (void) fme_set_timer(fmep, my_delay);
3494 print_suspects(SLWAIT, fmep);
3495 itree_prune(fmep->eventtree);
3496 return;
3497
3498 case FME_DISPROVED:
3499 print_suspects(SLDISPROVED, fmep);
3500 Undiag_reason = UD_VAL_UNSOLVD;
3501 fme_undiagnosable(fmep);
3502 break;
3503 }
3504
3505 itree_free(fmep->eventtree);
3506 fmep->eventtree = NULL;
3507 structconfig_free(fmep->config);
3508 fmep->config = NULL;
3509 destroy_fme_bufs(fmep);
3510 }
3511
3512 static void indent(void);
3513 static int triggered(struct fme *fmep, struct event *ep, int mark);
3514 static enum fme_state effects_test(struct fme *fmep,
3515 struct event *fault_event, unsigned long long at_latest_by,
3516 unsigned long long *pdelay);
3517 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3518 unsigned long long at_latest_by, unsigned long long *pdelay);
3519 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3520 unsigned long long at_latest_by, unsigned long long *pdelay);
3521
3522 static int
checkconstraints(struct fme * fmep,struct arrow * arrowp)3523 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3524 {
3525 struct constraintlist *ctp;
3526 struct evalue value;
3527 char *sep = "";
3528
3529 if (arrowp->forever_false) {
3530 indent();
3531 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: ");
3532 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3533 out(O_ALTFP|O_VERB|O_NONL, sep);
3534 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3535 sep = ", ";
3536 }
3537 out(O_ALTFP|O_VERB, NULL);
3538 return (0);
3539 }
3540 if (arrowp->forever_true) {
3541 indent();
3542 out(O_ALTFP|O_VERB|O_NONL, " Forever true constraint: ");
3543 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3544 out(O_ALTFP|O_VERB|O_NONL, sep);
3545 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3546 sep = ", ";
3547 }
3548 out(O_ALTFP|O_VERB, NULL);
3549 return (1);
3550 }
3551
3552 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3553 if (eval_expr(ctp->cnode, NULL, NULL,
3554 &fmep->globals, fmep->config,
3555 arrowp, 0, &value)) {
3556 /* evaluation successful */
3557 if (value.t == UNDEFINED || value.v == 0) {
3558 /* known false */
3559 arrowp->forever_false = 1;
3560 indent();
3561 out(O_ALTFP|O_VERB|O_NONL,
3562 " False constraint: ");
3563 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3564 out(O_ALTFP|O_VERB, NULL);
3565 return (0);
3566 }
3567 } else {
3568 /* evaluation unsuccessful -- unknown value */
3569 indent();
3570 out(O_ALTFP|O_VERB|O_NONL,
3571 " Deferred constraint: ");
3572 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3573 out(O_ALTFP|O_VERB, NULL);
3574 return (1);
3575 }
3576 }
3577 /* known true */
3578 arrowp->forever_true = 1;
3579 indent();
3580 out(O_ALTFP|O_VERB|O_NONL, " True constraint: ");
3581 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3582 out(O_ALTFP|O_VERB|O_NONL, sep);
3583 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3584 sep = ", ";
3585 }
3586 out(O_ALTFP|O_VERB, NULL);
3587 return (1);
3588 }
3589
3590 static int
triggered(struct fme * fmep,struct event * ep,int mark)3591 triggered(struct fme *fmep, struct event *ep, int mark)
3592 {
3593 struct bubble *bp;
3594 struct arrowlist *ap;
3595 int count = 0;
3596
3597 stats_counter_bump(fmep->Tcallcount);
3598 for (bp = itree_next_bubble(ep, NULL); bp;
3599 bp = itree_next_bubble(ep, bp)) {
3600 if (bp->t != B_TO)
3601 continue;
3602 for (ap = itree_next_arrow(bp, NULL); ap;
3603 ap = itree_next_arrow(bp, ap)) {
3604 /* check count of marks against K in the bubble */
3605 if ((ap->arrowp->mark & mark) &&
3606 ++count >= bp->nork)
3607 return (1);
3608 }
3609 }
3610 return (0);
3611 }
3612
3613 static int
mark_arrows(struct fme * fmep,struct event * ep,int mark,unsigned long long at_latest_by,unsigned long long * pdelay,int keep)3614 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3615 unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3616 {
3617 struct bubble *bp;
3618 struct arrowlist *ap;
3619 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3620 unsigned long long my_delay;
3621 enum fme_state result;
3622 int retval = 0;
3623
3624 for (bp = itree_next_bubble(ep, NULL); bp;
3625 bp = itree_next_bubble(ep, bp)) {
3626 if (bp->t != B_FROM)
3627 continue;
3628 stats_counter_bump(fmep->Marrowcount);
3629 for (ap = itree_next_arrow(bp, NULL); ap;
3630 ap = itree_next_arrow(bp, ap)) {
3631 struct event *ep2 = ap->arrowp->head->myevent;
3632 /*
3633 * if we're clearing marks, we can avoid doing
3634 * all that work evaluating constraints.
3635 */
3636 if (mark == 0) {
3637 if (ap->arrowp->arrow_marked == 0)
3638 continue;
3639 ap->arrowp->arrow_marked = 0;
3640 ap->arrowp->mark &= ~EFFECTS_COUNTER;
3641 if (keep && (ep2->cached_state &
3642 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3643 ep2->keep_in_tree = 1;
3644 ep2->cached_state &=
3645 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3646 (void) mark_arrows(fmep, ep2, mark, 0, NULL,
3647 keep);
3648 continue;
3649 }
3650 ap->arrowp->arrow_marked = 1;
3651 if (ep2->cached_state & REQMNTS_DISPROVED) {
3652 indent();
3653 out(O_ALTFP|O_VERB|O_NONL,
3654 " ALREADY DISPROVED ");
3655 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3656 out(O_ALTFP|O_VERB, NULL);
3657 continue;
3658 }
3659 if (ep2->cached_state & WAIT_EFFECT) {
3660 indent();
3661 out(O_ALTFP|O_VERB|O_NONL,
3662 " ALREADY EFFECTS WAIT ");
3663 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3664 out(O_ALTFP|O_VERB, NULL);
3665 continue;
3666 }
3667 if (ep2->cached_state & CREDIBLE_EFFECT) {
3668 indent();
3669 out(O_ALTFP|O_VERB|O_NONL,
3670 " ALREADY EFFECTS CREDIBLE ");
3671 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3672 out(O_ALTFP|O_VERB, NULL);
3673 continue;
3674 }
3675 if ((ep2->cached_state & PARENT_WAIT) &&
3676 (mark & PARENT_WAIT)) {
3677 indent();
3678 out(O_ALTFP|O_VERB|O_NONL,
3679 " ALREADY PARENT EFFECTS WAIT ");
3680 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3681 out(O_ALTFP|O_VERB, NULL);
3682 continue;
3683 }
3684 platform_set_payloadnvp(ep2->nvp);
3685 if (checkconstraints(fmep, ap->arrowp) == 0) {
3686 platform_set_payloadnvp(NULL);
3687 indent();
3688 out(O_ALTFP|O_VERB|O_NONL,
3689 " CONSTRAINTS FAIL ");
3690 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3691 out(O_ALTFP|O_VERB, NULL);
3692 continue;
3693 }
3694 platform_set_payloadnvp(NULL);
3695 ap->arrowp->mark |= EFFECTS_COUNTER;
3696 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3697 indent();
3698 out(O_ALTFP|O_VERB|O_NONL,
3699 " K-COUNT NOT YET MET ");
3700 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3701 out(O_ALTFP|O_VERB, NULL);
3702 continue;
3703 }
3704 ep2->cached_state &= ~PARENT_WAIT;
3705 /*
3706 * if we've reached an ereport and no propagation time
3707 * is specified, use the Hesitate value
3708 */
3709 if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3710 ap->arrowp->maxdelay == 0ULL) {
3711 out(O_ALTFP|O_VERB|O_NONL, " default wait ");
3712 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3713 out(O_ALTFP|O_VERB, NULL);
3714 result = requirements_test(fmep, ep2, Hesitate,
3715 &my_delay);
3716 } else {
3717 result = requirements_test(fmep, ep2,
3718 at_latest_by + ap->arrowp->maxdelay,
3719 &my_delay);
3720 }
3721 if (result == FME_WAIT) {
3722 retval = WAIT_EFFECT;
3723 if (overall_delay > my_delay)
3724 overall_delay = my_delay;
3725 ep2->cached_state |= WAIT_EFFECT;
3726 indent();
3727 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT ");
3728 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3729 out(O_ALTFP|O_VERB, NULL);
3730 indent_push(" E");
3731 if (mark_arrows(fmep, ep2, PARENT_WAIT,
3732 at_latest_by, &my_delay, 0) ==
3733 WAIT_EFFECT) {
3734 retval = WAIT_EFFECT;
3735 if (overall_delay > my_delay)
3736 overall_delay = my_delay;
3737 }
3738 indent_pop();
3739 } else if (result == FME_DISPROVED) {
3740 indent();
3741 out(O_ALTFP|O_VERB|O_NONL,
3742 " EFFECTS DISPROVED ");
3743 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3744 out(O_ALTFP|O_VERB, NULL);
3745 } else {
3746 ep2->cached_state |= mark;
3747 indent();
3748 if (mark == CREDIBLE_EFFECT)
3749 out(O_ALTFP|O_VERB|O_NONL,
3750 " EFFECTS CREDIBLE ");
3751 else
3752 out(O_ALTFP|O_VERB|O_NONL,
3753 " PARENT EFFECTS WAIT ");
3754 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3755 out(O_ALTFP|O_VERB, NULL);
3756 indent_push(" E");
3757 if (mark_arrows(fmep, ep2, mark, at_latest_by,
3758 &my_delay, 0) == WAIT_EFFECT) {
3759 retval = WAIT_EFFECT;
3760 if (overall_delay > my_delay)
3761 overall_delay = my_delay;
3762 }
3763 indent_pop();
3764 }
3765 }
3766 }
3767 if (retval == WAIT_EFFECT)
3768 *pdelay = overall_delay;
3769 return (retval);
3770 }
3771
3772 static enum fme_state
effects_test(struct fme * fmep,struct event * fault_event,unsigned long long at_latest_by,unsigned long long * pdelay)3773 effects_test(struct fme *fmep, struct event *fault_event,
3774 unsigned long long at_latest_by, unsigned long long *pdelay)
3775 {
3776 struct event *error_event;
3777 enum fme_state return_value = FME_CREDIBLE;
3778 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3779 unsigned long long my_delay;
3780
3781 stats_counter_bump(fmep->Ecallcount);
3782 indent_push(" E");
3783 indent();
3784 out(O_ALTFP|O_VERB|O_NONL, "->");
3785 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3786 out(O_ALTFP|O_VERB, NULL);
3787
3788 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3789 &my_delay, 0) == WAIT_EFFECT) {
3790 return_value = FME_WAIT;
3791 if (overall_delay > my_delay)
3792 overall_delay = my_delay;
3793 }
3794 for (error_event = fmep->observations;
3795 error_event; error_event = error_event->observations) {
3796 indent();
3797 out(O_ALTFP|O_VERB|O_NONL, " ");
3798 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3799 if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3800 if (error_event->cached_state &
3801 (PARENT_WAIT|WAIT_EFFECT)) {
3802 out(O_ALTFP|O_VERB, " NOT YET triggered");
3803 continue;
3804 }
3805 return_value = FME_DISPROVED;
3806 out(O_ALTFP|O_VERB, " NOT triggered");
3807 break;
3808 } else {
3809 out(O_ALTFP|O_VERB, " triggered");
3810 }
3811 }
3812 if (return_value == FME_DISPROVED) {
3813 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3814 } else {
3815 fault_event->keep_in_tree = 1;
3816 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3817 }
3818
3819 indent();
3820 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3821 fme_state2str(return_value));
3822 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3823 out(O_ALTFP|O_VERB, NULL);
3824 indent_pop();
3825 if (return_value == FME_WAIT)
3826 *pdelay = overall_delay;
3827 return (return_value);
3828 }
3829
3830 static enum fme_state
requirements_test(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)3831 requirements_test(struct fme *fmep, struct event *ep,
3832 unsigned long long at_latest_by, unsigned long long *pdelay)
3833 {
3834 int waiting_events;
3835 int credible_events;
3836 int deferred_events;
3837 enum fme_state return_value = FME_CREDIBLE;
3838 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3839 unsigned long long arrow_delay;
3840 unsigned long long my_delay;
3841 struct event *ep2;
3842 struct bubble *bp;
3843 struct arrowlist *ap;
3844
3845 if (ep->cached_state & REQMNTS_CREDIBLE) {
3846 indent();
3847 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE ");
3848 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3849 out(O_ALTFP|O_VERB, NULL);
3850 return (FME_CREDIBLE);
3851 }
3852 if (ep->cached_state & REQMNTS_DISPROVED) {
3853 indent();
3854 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED ");
3855 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3856 out(O_ALTFP|O_VERB, NULL);
3857 return (FME_DISPROVED);
3858 }
3859 if (ep->cached_state & REQMNTS_WAIT) {
3860 indent();
3861 *pdelay = ep->cached_delay;
3862 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT ");
3863 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3864 out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3865 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3866 out(O_ALTFP|O_VERB, NULL);
3867 return (FME_WAIT);
3868 }
3869 stats_counter_bump(fmep->Rcallcount);
3870 indent_push(" R");
3871 indent();
3872 out(O_ALTFP|O_VERB|O_NONL, "->");
3873 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3874 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3875 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3876 out(O_ALTFP|O_VERB, NULL);
3877
3878 if (ep->t == N_EREPORT) {
3879 if (ep->count == 0) {
3880 if (fmep->pull >= at_latest_by) {
3881 return_value = FME_DISPROVED;
3882 } else {
3883 ep->cached_delay = *pdelay = at_latest_by;
3884 return_value = FME_WAIT;
3885 }
3886 }
3887
3888 indent();
3889 switch (return_value) {
3890 case FME_CREDIBLE:
3891 ep->cached_state |= REQMNTS_CREDIBLE;
3892 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3893 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3894 break;
3895 case FME_DISPROVED:
3896 ep->cached_state |= REQMNTS_DISPROVED;
3897 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3898 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3899 break;
3900 case FME_WAIT:
3901 ep->cached_state |= REQMNTS_WAIT;
3902 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3903 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3904 out(O_ALTFP|O_VERB|O_NONL, " to ");
3905 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3906 break;
3907 default:
3908 out(O_DIE, "requirements_test: unexpected fme_state");
3909 break;
3910 }
3911 out(O_ALTFP|O_VERB, NULL);
3912 indent_pop();
3913
3914 return (return_value);
3915 }
3916
3917 /* this event is not a report, descend the tree */
3918 for (bp = itree_next_bubble(ep, NULL); bp;
3919 bp = itree_next_bubble(ep, bp)) {
3920 int n;
3921
3922 if (bp->t != B_FROM)
3923 continue;
3924
3925 n = bp->nork;
3926
3927 credible_events = 0;
3928 waiting_events = 0;
3929 deferred_events = 0;
3930 arrow_delay = TIMEVAL_EVENTUALLY;
3931 /*
3932 * n is -1 for 'A' so adjust it.
3933 * XXX just count up the arrows for now.
3934 */
3935 if (n < 0) {
3936 n = 0;
3937 for (ap = itree_next_arrow(bp, NULL); ap;
3938 ap = itree_next_arrow(bp, ap))
3939 n++;
3940 indent();
3941 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3942 } else {
3943 indent();
3944 out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3945 }
3946
3947 if (n == 0)
3948 continue;
3949 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3950 for (ap = itree_next_arrow(bp, NULL); ap;
3951 ap = itree_next_arrow(bp, ap)) {
3952 ep2 = ap->arrowp->head->myevent;
3953 platform_set_payloadnvp(ep2->nvp);
3954 (void) checkconstraints(fmep, ap->arrowp);
3955 if (!ap->arrowp->forever_false) {
3956 /*
3957 * if all arrows are invalidated by the
3958 * constraints, then we should elide the
3959 * whole bubble to be consistant with
3960 * the tree creation time behaviour
3961 */
3962 bp->mark |= BUBBLE_OK;
3963 platform_set_payloadnvp(NULL);
3964 break;
3965 }
3966 platform_set_payloadnvp(NULL);
3967 }
3968 }
3969 for (ap = itree_next_arrow(bp, NULL); ap;
3970 ap = itree_next_arrow(bp, ap)) {
3971 ep2 = ap->arrowp->head->myevent;
3972 if (n <= credible_events)
3973 break;
3974
3975 ap->arrowp->mark |= REQMNTS_COUNTER;
3976 if (triggered(fmep, ep2, REQMNTS_COUNTER))
3977 /* XXX adding max timevals! */
3978 switch (requirements_test(fmep, ep2,
3979 at_latest_by + ap->arrowp->maxdelay,
3980 &my_delay)) {
3981 case FME_DEFERRED:
3982 deferred_events++;
3983 break;
3984 case FME_CREDIBLE:
3985 credible_events++;
3986 break;
3987 case FME_DISPROVED:
3988 break;
3989 case FME_WAIT:
3990 if (my_delay < arrow_delay)
3991 arrow_delay = my_delay;
3992 waiting_events++;
3993 break;
3994 default:
3995 out(O_DIE,
3996 "Bug in requirements_test.");
3997 }
3998 else
3999 deferred_events++;
4000 }
4001 if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
4002 bp->mark |= BUBBLE_ELIDED;
4003 continue;
4004 }
4005 indent();
4006 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4007 credible_events + deferred_events, waiting_events);
4008 if (credible_events + deferred_events + waiting_events < n) {
4009 /* Can never meet requirements */
4010 ep->cached_state |= REQMNTS_DISPROVED;
4011 indent();
4012 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4013 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4014 out(O_ALTFP|O_VERB, NULL);
4015 indent_pop();
4016 return (FME_DISPROVED);
4017 }
4018 if (credible_events + deferred_events < n) {
4019 /* will have to wait */
4020 /* wait time is shortest known */
4021 if (arrow_delay < overall_delay)
4022 overall_delay = arrow_delay;
4023 return_value = FME_WAIT;
4024 } else if (credible_events < n) {
4025 if (return_value != FME_WAIT)
4026 return_value = FME_DEFERRED;
4027 }
4028 }
4029
4030 /*
4031 * don't mark as FME_DEFERRED. If this event isn't reached by another
4032 * path, then this will be considered FME_CREDIBLE. But if it is
4033 * reached by a different path so the K-count is met, then might
4034 * get overridden by FME_WAIT or FME_DISPROVED.
4035 */
4036 if (return_value == FME_WAIT) {
4037 ep->cached_state |= REQMNTS_WAIT;
4038 ep->cached_delay = *pdelay = overall_delay;
4039 } else if (return_value == FME_CREDIBLE) {
4040 ep->cached_state |= REQMNTS_CREDIBLE;
4041 }
4042 indent();
4043 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4044 fme_state2str(return_value));
4045 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4046 out(O_ALTFP|O_VERB, NULL);
4047 indent_pop();
4048 return (return_value);
4049 }
4050
4051 static enum fme_state
causes_test(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)4052 causes_test(struct fme *fmep, struct event *ep,
4053 unsigned long long at_latest_by, unsigned long long *pdelay)
4054 {
4055 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4056 unsigned long long my_delay;
4057 int credible_results = 0;
4058 int waiting_results = 0;
4059 enum fme_state fstate;
4060 struct event *tail_event;
4061 struct bubble *bp;
4062 struct arrowlist *ap;
4063 int k = 1;
4064
4065 stats_counter_bump(fmep->Ccallcount);
4066 indent_push(" C");
4067 indent();
4068 out(O_ALTFP|O_VERB|O_NONL, "->");
4069 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4070 out(O_ALTFP|O_VERB, NULL);
4071
4072 for (bp = itree_next_bubble(ep, NULL); bp;
4073 bp = itree_next_bubble(ep, bp)) {
4074 if (bp->t != B_TO)
4075 continue;
4076 k = bp->nork; /* remember the K value */
4077 for (ap = itree_next_arrow(bp, NULL); ap;
4078 ap = itree_next_arrow(bp, ap)) {
4079 int do_not_follow = 0;
4080
4081 /*
4082 * if we get to the same event multiple times
4083 * only worry about the first one.
4084 */
4085 if (ap->arrowp->tail->myevent->cached_state &
4086 CAUSES_TESTED) {
4087 indent();
4088 out(O_ALTFP|O_VERB|O_NONL,
4089 " causes test already run for ");
4090 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4091 ap->arrowp->tail->myevent);
4092 out(O_ALTFP|O_VERB, NULL);
4093 continue;
4094 }
4095
4096 /*
4097 * see if false constraint prevents us
4098 * from traversing this arrow
4099 */
4100 platform_set_payloadnvp(ep->nvp);
4101 if (checkconstraints(fmep, ap->arrowp) == 0)
4102 do_not_follow = 1;
4103 platform_set_payloadnvp(NULL);
4104 if (do_not_follow) {
4105 indent();
4106 out(O_ALTFP|O_VERB|O_NONL,
4107 " False arrow from ");
4108 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4109 ap->arrowp->tail->myevent);
4110 out(O_ALTFP|O_VERB, NULL);
4111 continue;
4112 }
4113
4114 ap->arrowp->tail->myevent->cached_state |=
4115 CAUSES_TESTED;
4116 tail_event = ap->arrowp->tail->myevent;
4117 fstate = hypothesise(fmep, tail_event, at_latest_by,
4118 &my_delay);
4119
4120 switch (fstate) {
4121 case FME_WAIT:
4122 if (my_delay < overall_delay)
4123 overall_delay = my_delay;
4124 waiting_results++;
4125 break;
4126 case FME_CREDIBLE:
4127 credible_results++;
4128 break;
4129 case FME_DISPROVED:
4130 break;
4131 default:
4132 out(O_DIE, "Bug in causes_test");
4133 }
4134 }
4135 }
4136 /* compare against K */
4137 if (credible_results + waiting_results < k) {
4138 indent();
4139 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4140 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4141 out(O_ALTFP|O_VERB, NULL);
4142 indent_pop();
4143 return (FME_DISPROVED);
4144 }
4145 if (waiting_results != 0) {
4146 *pdelay = overall_delay;
4147 indent();
4148 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4149 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4150 out(O_ALTFP|O_VERB|O_NONL, " to ");
4151 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4152 out(O_ALTFP|O_VERB, NULL);
4153 indent_pop();
4154 return (FME_WAIT);
4155 }
4156 indent();
4157 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4158 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4159 out(O_ALTFP|O_VERB, NULL);
4160 indent_pop();
4161 return (FME_CREDIBLE);
4162 }
4163
4164 static enum fme_state
hypothesise(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)4165 hypothesise(struct fme *fmep, struct event *ep,
4166 unsigned long long at_latest_by, unsigned long long *pdelay)
4167 {
4168 enum fme_state rtr, otr;
4169 unsigned long long my_delay;
4170 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4171
4172 stats_counter_bump(fmep->Hcallcount);
4173 indent_push(" H");
4174 indent();
4175 out(O_ALTFP|O_VERB|O_NONL, "->");
4176 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4177 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4178 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4179 out(O_ALTFP|O_VERB, NULL);
4180
4181 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4182 if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4183 overall_delay = my_delay;
4184 if (rtr != FME_DISPROVED) {
4185 if (is_problem(ep->t)) {
4186 otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4187 if (otr != FME_DISPROVED) {
4188 if (fmep->peek == 0 && ep->is_suspect == 0) {
4189 ep->suspects = fmep->suspects;
4190 ep->is_suspect = 1;
4191 fmep->suspects = ep;
4192 fmep->nsuspects++;
4193 }
4194 }
4195 } else
4196 otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4197 if ((otr == FME_WAIT) && (my_delay < overall_delay))
4198 overall_delay = my_delay;
4199 if ((otr != FME_DISPROVED) &&
4200 ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4201 *pdelay = overall_delay;
4202 }
4203 if (rtr == FME_DISPROVED) {
4204 indent();
4205 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4206 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4207 out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4208 indent_pop();
4209 return (FME_DISPROVED);
4210 }
4211 if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4212 indent();
4213 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4214 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4215 out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4216 indent_pop();
4217 return (FME_DISPROVED);
4218 }
4219 if (otr == FME_DISPROVED) {
4220 indent();
4221 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4222 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4223 out(O_ALTFP|O_VERB, " (causes are not credible)");
4224 indent_pop();
4225 return (FME_DISPROVED);
4226 }
4227 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4228 indent();
4229 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4230 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4231 out(O_ALTFP|O_VERB|O_NONL, " to ");
4232 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4233 out(O_ALTFP|O_VERB, NULL);
4234 indent_pop();
4235 return (FME_WAIT);
4236 }
4237 indent();
4238 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4239 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4240 out(O_ALTFP|O_VERB, NULL);
4241 indent_pop();
4242 return (FME_CREDIBLE);
4243 }
4244
4245 /*
4246 * fme_istat_load -- reconstitute any persistent istats
4247 */
4248 void
fme_istat_load(fmd_hdl_t * hdl)4249 fme_istat_load(fmd_hdl_t *hdl)
4250 {
4251 int sz;
4252 char *sbuf;
4253 char *ptr;
4254
4255 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4256 out(O_ALTFP, "fme_istat_load: No stats");
4257 return;
4258 }
4259
4260 sbuf = alloca(sz);
4261
4262 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4263
4264 /*
4265 * pick apart the serialized stats
4266 *
4267 * format is:
4268 * <class-name>, '@', <path>, '\0', <value>, '\0'
4269 * for example:
4270 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4271 *
4272 * since this is parsing our own serialized data, any parsing issues
4273 * are fatal, so we check for them all with ASSERT() below.
4274 */
4275 ptr = sbuf;
4276 while (ptr < &sbuf[sz]) {
4277 char *sepptr;
4278 struct node *np;
4279 int val;
4280
4281 sepptr = strchr(ptr, '@');
4282 ASSERT(sepptr != NULL);
4283 *sepptr = '\0';
4284
4285 /* construct the event */
4286 np = newnode(T_EVENT, NULL, 0);
4287 np->u.event.ename = newnode(T_NAME, NULL, 0);
4288 np->u.event.ename->u.name.t = N_STAT;
4289 np->u.event.ename->u.name.s = stable(ptr);
4290 np->u.event.ename->u.name.it = IT_ENAME;
4291 np->u.event.ename->u.name.last = np->u.event.ename;
4292
4293 ptr = sepptr + 1;
4294 ASSERT(ptr < &sbuf[sz]);
4295 ptr += strlen(ptr);
4296 ptr++; /* move past the '\0' separating path from value */
4297 ASSERT(ptr < &sbuf[sz]);
4298 ASSERT(isdigit(*ptr));
4299 val = atoi(ptr);
4300 ASSERT(val > 0);
4301 ptr += strlen(ptr);
4302 ptr++; /* move past the final '\0' for this entry */
4303
4304 np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4305 ASSERT(np->u.event.epname != NULL);
4306
4307 istat_bump(np, val);
4308 tree_free(np);
4309 }
4310
4311 istat_save();
4312 }
4313