Home | History | Annotate | Download | only in eversholt
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  *
     26  * fme.c -- fault management exercise module
     27  *
     28  * this module provides the simulated fault management exercise.
     29  */
     30 
     31 #include <stdio.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 #include <strings.h>
     35 #include <ctype.h>
     36 #include <alloca.h>
     37 #include <libnvpair.h>
     38 #include <sys/fm/protocol.h>
     39 #include <fm/fmd_api.h>
     40 #include "alloc.h"
     41 #include "out.h"
     42 #include "stats.h"
     43 #include "stable.h"
     44 #include "literals.h"
     45 #include "lut.h"
     46 #include "tree.h"
     47 #include "ptree.h"
     48 #include "itree.h"
     49 #include "ipath.h"
     50 #include "fme.h"
     51 #include "evnv.h"
     52 #include "eval.h"
     53 #include "config.h"
     54 #include "platform.h"
     55 #include "esclex.h"
     56 
     57 /* imported from eft.c... */
     58 extern hrtime_t Hesitate;
     59 extern char *Serd_Override;
     60 extern nv_alloc_t Eft_nv_hdl;
     61 extern int Max_fme;
     62 extern fmd_hdl_t *Hdl;
     63 
     64 static int Istat_need_save;
     65 static int Serd_need_save;
     66 void istat_save(void);
     67 void serd_save(void);
     68 
     69 /* fme under construction is global so we can free it on module abort */
     70 static struct fme *Nfmep;
     71 
     72 static int Undiag_reason = UD_VAL_UNKNOWN;
     73 
     74 static int Nextid = 0;
     75 
     76 static int Open_fme_count = 0;	/* Count of open FMEs */
     77 
     78 /* list of fault management exercises underway */
     79 static struct fme {
     80 	struct fme *next;		/* next exercise */
     81 	unsigned long long ull;		/* time when fme was created */
     82 	int id;				/* FME id */
     83 	struct config *config;		/* cooked configuration data */
     84 	struct lut *eventtree;		/* propagation tree for this FME */
     85 	/*
     86 	 * The initial error report that created this FME is kept in
     87 	 * two forms.  e0 points to the instance tree node and is used
     88 	 * by fme_eval() as the starting point for the inference
     89 	 * algorithm.  e0r is the event handle FMD passed to us when
     90 	 * the ereport first arrived and is used when setting timers,
     91 	 * which are always relative to the time of this initial
     92 	 * report.
     93 	 */
     94 	struct event *e0;
     95 	fmd_event_t *e0r;
     96 
     97 	id_t    timer;			/* for setting an fmd time-out */
     98 
     99 	struct event *ecurrent;		/* ereport under consideration */
    100 	struct event *suspects;		/* current suspect list */
    101 	struct event *psuspects;	/* previous suspect list */
    102 	int nsuspects;			/* count of suspects */
    103 	int posted_suspects;		/* true if we've posted a diagnosis */
    104 	int uniqobs;			/* number of unique events observed */
    105 	int peek;			/* just peeking, don't track suspects */
    106 	int overflow;			/* true if overflow FME */
    107 	enum fme_state {
    108 		FME_NOTHING = 5000,	/* not evaluated yet */
    109 		FME_WAIT,		/* need to wait for more info */
    110 		FME_CREDIBLE,		/* suspect list is credible */
    111 		FME_DISPROVED,		/* no valid suspects found */
    112 		FME_DEFERRED		/* don't know yet (k-count not met) */
    113 	} state;
    114 
    115 	unsigned long long pull;	/* time passed since created */
    116 	unsigned long long wull;	/* wait until this time for re-eval */
    117 	struct event *observations;	/* observation list */
    118 	struct lut *globals;		/* values of global variables */
    119 	/* fmd interfacing */
    120 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
    121 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
    122 	/* stats */
    123 	struct stats *Rcount;
    124 	struct stats *Hcallcount;
    125 	struct stats *Rcallcount;
    126 	struct stats *Ccallcount;
    127 	struct stats *Ecallcount;
    128 	struct stats *Tcallcount;
    129 	struct stats *Marrowcount;
    130 	struct stats *diags;
    131 } *FMElist, *EFMElist, *ClosedFMEs;
    132 
    133 static struct case_list {
    134 	fmd_case_t *fmcase;
    135 	struct case_list *next;
    136 } *Undiagablecaselist;
    137 
    138 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
    139 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
    140 	unsigned long long at_latest_by, unsigned long long *pdelay);
    141 static struct node *eventprop_lookup(struct event *ep, const char *propname);
    142 static struct node *pathstring2epnamenp(char *path);
    143 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
    144 	fmd_case_t *fmcase);
    145 static const char *undiag_2reason_str(int ud);
    146 static const char *undiag_2defect_str(int ud);
    147 static void restore_suspects(struct fme *fmep);
    148 static void save_suspects(struct fme *fmep);
    149 static void destroy_fme(struct fme *f);
    150 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
    151     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
    152 static void istat_counter_reset_cb(struct istat_entry *entp,
    153     struct stats *statp, const struct ipath *ipp);
    154 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
    155     struct stats *statp, void *unused);
    156 static void serd_reset_cb(struct serd_entry *entp, void *unused,
    157     const struct ipath *ipp);
    158 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
    159     void *unused2);
    160 static void destroy_fme_bufs(struct fme *fp);
    161 
    162 static struct fme *
    163 alloc_fme(void)
    164 {
    165 	struct fme *fmep;
    166 
    167 	fmep = MALLOC(sizeof (*fmep));
    168 	bzero(fmep, sizeof (*fmep));
    169 	return (fmep);
    170 }
    171 
    172 /*
    173  * fme_ready -- called when all initialization of the FME (except for
    174  *	stats) has completed successfully.  Adds the fme to global lists
    175  *	and establishes its stats.
    176  */
    177 static struct fme *
    178 fme_ready(struct fme *fmep)
    179 {
    180 	char nbuf[100];
    181 
    182 	Nfmep = NULL;	/* don't need to free this on module abort now */
    183 
    184 	if (EFMElist) {
    185 		EFMElist->next = fmep;
    186 		EFMElist = fmep;
    187 	} else
    188 		FMElist = EFMElist = fmep;
    189 
    190 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
    191 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
    192 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
    193 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
    194 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
    195 	fmep->Rcallcount = stats_new_counter(nbuf,
    196 	    "calls to requirements_test()", 1);
    197 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
    198 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
    199 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
    200 	fmep->Ecallcount =
    201 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
    202 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
    203 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
    204 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
    205 	fmep->Marrowcount = stats_new_counter(nbuf,
    206 	    "arrows marked by mark_arrows()", 1);
    207 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
    208 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
    209 
    210 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
    211 	config_print(O_ALTFP|O_VERB2, fmep->config);
    212 
    213 	return (fmep);
    214 }
    215 
    216 extern void ipath_dummy_lut(struct arrow *);
    217 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
    218 
    219 /* ARGSUSED */
    220 static void
    221 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
    222 {
    223 	struct bubble *bp;
    224 	struct arrowlist *ap;
    225 
    226 	for (bp = itree_next_bubble(ep, NULL); bp;
    227 	    bp = itree_next_bubble(ep, bp)) {
    228 		if (bp->t != B_FROM)
    229 			continue;
    230 		for (ap = itree_next_arrow(bp, NULL); ap;
    231 		    ap = itree_next_arrow(bp, ap)) {
    232 			ap->arrowp->pnode->u.arrow.needed = 1;
    233 			ipath_dummy_lut(ap->arrowp);
    234 		}
    235 	}
    236 }
    237 
    238 /* ARGSUSED */
    239 static void
    240 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
    241 {
    242 	struct bubble *bp;
    243 	struct arrowlist *ap;
    244 
    245 	for (bp = itree_next_bubble(ep, NULL); bp;
    246 	    bp = itree_next_bubble(ep, bp)) {
    247 		if (bp->t != B_FROM)
    248 			continue;
    249 		for (ap = itree_next_arrow(bp, NULL); ap;
    250 		    ap = itree_next_arrow(bp, ap))
    251 			ap->arrowp->pnode->u.arrow.needed = 0;
    252 	}
    253 }
    254 
    255 static void globals_destructor(void *left, void *right, void *arg);
    256 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
    257 
    258 static void
    259 prune_propagations(const char *e0class, const struct ipath *e0ipp)
    260 {
    261 	char nbuf[100];
    262 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
    263 	extern struct lut *Usednames;
    264 
    265 	Nfmep = alloc_fme();
    266 	Nfmep->id = Nextid;
    267 	Nfmep->state = FME_NOTHING;
    268 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
    269 	if ((Nfmep->e0 =
    270 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
    271 		out(O_ALTFP, "prune_propagations: e0 not in instance tree");
    272 		itree_free(Nfmep->eventtree);
    273 		FREE(Nfmep);
    274 		Nfmep = NULL;
    275 		return;
    276 	}
    277 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
    278 	Nfmep->e0->count++;
    279 
    280 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
    281 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
    282 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
    283 	Nfmep->Hcallcount =
    284 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
    285 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
    286 	Nfmep->Rcallcount = stats_new_counter(nbuf,
    287 	    "calls to requirements_test()", 1);
    288 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
    289 	Nfmep->Ccallcount =
    290 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
    291 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
    292 	Nfmep->Ecallcount =
    293 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
    294 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
    295 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
    296 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
    297 	Nfmep->Marrowcount = stats_new_counter(nbuf,
    298 	    "arrows marked by mark_arrows()", 1);
    299 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
    300 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
    301 
    302 	Nfmep->peek = 1;
    303 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
    304 	lut_free(Usednames, NULL, NULL);
    305 	Usednames = NULL;
    306 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
    307 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
    308 	itree_prune(Nfmep->eventtree);
    309 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
    310 
    311 	stats_delete(Nfmep->Rcount);
    312 	stats_delete(Nfmep->Hcallcount);
    313 	stats_delete(Nfmep->Rcallcount);
    314 	stats_delete(Nfmep->Ccallcount);
    315 	stats_delete(Nfmep->Ecallcount);
    316 	stats_delete(Nfmep->Tcallcount);
    317 	stats_delete(Nfmep->Marrowcount);
    318 	stats_delete(Nfmep->diags);
    319 	itree_free(Nfmep->eventtree);
    320 	lut_free(Nfmep->globals, globals_destructor, NULL);
    321 	FREE(Nfmep);
    322 }
    323 
    324 static struct fme *
    325 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
    326 	fmd_case_t *fmcase)
    327 {
    328 	struct cfgdata *cfgdata;
    329 	int init_size;
    330 	extern int alloc_total();
    331 
    332 	init_size = alloc_total();
    333 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
    334 	cfgdata = config_snapshot();
    335 	platform_save_config(hdl, fmcase);
    336 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
    337 	    alloc_total() - init_size);
    338 
    339 	Nfmep = alloc_fme();
    340 
    341 	Nfmep->id = Nextid++;
    342 	Nfmep->config = cfgdata->cooked;
    343 	config_free(cfgdata);
    344 	Nfmep->posted_suspects = 0;
    345 	Nfmep->uniqobs = 0;
    346 	Nfmep->state = FME_NOTHING;
    347 	Nfmep->pull = 0ULL;
    348 	Nfmep->overflow = 0;
    349 
    350 	Nfmep->fmcase = fmcase;
    351 	Nfmep->hdl = hdl;
    352 
    353 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
    354 		out(O_ALTFP, "newfme: NULL instance tree");
    355 		Undiag_reason = UD_VAL_INSTFAIL;
    356 		structconfig_free(Nfmep->config);
    357 		destroy_fme_bufs(Nfmep);
    358 		FREE(Nfmep);
    359 		Nfmep = NULL;
    360 		return (NULL);
    361 	}
    362 
    363 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
    364 
    365 	if ((Nfmep->e0 =
    366 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
    367 		out(O_ALTFP, "newfme: e0 not in instance tree");
    368 		Undiag_reason = UD_VAL_BADEVENTI;
    369 		itree_free(Nfmep->eventtree);
    370 		structconfig_free(Nfmep->config);
    371 		destroy_fme_bufs(Nfmep);
    372 		FREE(Nfmep);
    373 		Nfmep = NULL;
    374 		return (NULL);
    375 	}
    376 
    377 	return (fme_ready(Nfmep));
    378 }
    379 
    380 void
    381 fme_fini(void)
    382 {
    383 	struct fme *sfp, *fp;
    384 	struct case_list *ucasep, *nextcasep;
    385 
    386 	ucasep = Undiagablecaselist;
    387 	while (ucasep != NULL) {
    388 		nextcasep = ucasep->next;
    389 		FREE(ucasep);
    390 		ucasep = nextcasep;
    391 	}
    392 	Undiagablecaselist = NULL;
    393 
    394 	/* clean up closed fmes */
    395 	fp = ClosedFMEs;
    396 	while (fp != NULL) {
    397 		sfp = fp->next;
    398 		destroy_fme(fp);
    399 		fp = sfp;
    400 	}
    401 	ClosedFMEs = NULL;
    402 
    403 	fp = FMElist;
    404 	while (fp != NULL) {
    405 		sfp = fp->next;
    406 		destroy_fme(fp);
    407 		fp = sfp;
    408 	}
    409 	FMElist = EFMElist = NULL;
    410 
    411 	/* if we were in the middle of creating an fme, free it now */
    412 	if (Nfmep) {
    413 		destroy_fme(Nfmep);
    414 		Nfmep = NULL;
    415 	}
    416 }
    417 
    418 /*
    419  * Allocated space for a buffer name.  20 bytes allows for
    420  * a ridiculous 9,999,999 unique observations.
    421  */
    422 #define	OBBUFNMSZ 20
    423 
    424 /*
    425  *  serialize_observation
    426  *
    427  *  Create a recoverable version of the current observation
    428  *  (f->ecurrent).  We keep a serialized version of each unique
    429  *  observation in order that we may resume correctly the fme in the
    430  *  correct state if eft or fmd crashes and we're restarted.
    431  */
    432 static void
    433 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
    434 {
    435 	size_t pkdlen;
    436 	char tmpbuf[OBBUFNMSZ];
    437 	char *pkd = NULL;
    438 	char *estr;
    439 
    440 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
    441 	estr = ipath2str(cls, ipp);
    442 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
    443 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
    444 	    strlen(estr) + 1);
    445 	FREE(estr);
    446 
    447 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
    448 		(void) snprintf(tmpbuf,
    449 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
    450 		if (nvlist_xpack(fp->ecurrent->nvp,
    451 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
    452 			out(O_DIE|O_SYS, "pack of observed nvl failed");
    453 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
    454 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
    455 		FREE(pkd);
    456 	}
    457 
    458 	fp->uniqobs++;
    459 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
    460 	    sizeof (fp->uniqobs));
    461 }
    462 
    463 /*
    464  *  init_fme_bufs -- We keep several bits of state about an fme for
    465  *	use if eft or fmd crashes and we're restarted.
    466  */
    467 static void
    468 init_fme_bufs(struct fme *fp)
    469 {
    470 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
    471 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
    472 	    sizeof (fp->pull));
    473 
    474 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
    475 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
    476 	    sizeof (fp->id));
    477 
    478 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
    479 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
    480 	    sizeof (fp->uniqobs));
    481 
    482 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
    483 	    sizeof (fp->posted_suspects));
    484 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
    485 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
    486 }
    487 
    488 static void
    489 destroy_fme_bufs(struct fme *fp)
    490 {
    491 	char tmpbuf[OBBUFNMSZ];
    492 	int o;
    493 
    494 	platform_restore_config(fp->hdl, fp->fmcase);
    495 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
    496 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
    497 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
    498 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
    499 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
    500 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
    501 
    502 	for (o = 0; o < fp->uniqobs; o++) {
    503 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
    504 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
    505 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
    506 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
    507 	}
    508 }
    509 
    510 /*
    511  * reconstitute_observations -- convert a case's serialized observations
    512  *	back into struct events.  Returns zero if all observations are
    513  *	successfully reconstituted.
    514  */
    515 static int
    516 reconstitute_observations(struct fme *fmep)
    517 {
    518 	struct event *ep;
    519 	struct node *epnamenp = NULL;
    520 	size_t pkdlen;
    521 	char *pkd = NULL;
    522 	char *tmpbuf = alloca(OBBUFNMSZ);
    523 	char *sepptr;
    524 	char *estr;
    525 	int ocnt;
    526 	int elen;
    527 
    528 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
    529 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
    530 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
    531 		if (elen == 0) {
    532 			out(O_ALTFP,
    533 			    "reconstitute_observation: no %s buffer found.",
    534 			    tmpbuf);
    535 			Undiag_reason = UD_VAL_MISSINGOBS;
    536 			break;
    537 		}
    538 
    539 		estr = MALLOC(elen);
    540 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
    541 		sepptr = strchr(estr, '@');
    542 		if (sepptr == NULL) {
    543 			out(O_ALTFP,
    544 			    "reconstitute_observation: %s: "
    545 			    "missing @ separator in %s.",
    546 			    tmpbuf, estr);
    547 			Undiag_reason = UD_VAL_MISSINGPATH;
    548 			FREE(estr);
    549 			break;
    550 		}
    551 
    552 		*sepptr = '\0';
    553 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
    554 			out(O_ALTFP,
    555 			    "reconstitute_observation: %s: "
    556 			    "trouble converting path string \"%s\" "
    557 			    "to internal representation.",
    558 			    tmpbuf, sepptr + 1);
    559 			Undiag_reason = UD_VAL_MISSINGPATH;
    560 			FREE(estr);
    561 			break;
    562 		}
    563 
    564 		/* construct the event */
    565 		ep = itree_lookup(fmep->eventtree,
    566 		    stable(estr), ipath(epnamenp));
    567 		if (ep == NULL) {
    568 			out(O_ALTFP,
    569 			    "reconstitute_observation: %s: "
    570 			    "lookup of  \"%s\" in itree failed.",
    571 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
    572 			Undiag_reason = UD_VAL_BADOBS;
    573 			tree_free(epnamenp);
    574 			FREE(estr);
    575 			break;
    576 		}
    577 		tree_free(epnamenp);
    578 
    579 		/*
    580 		 * We may or may not have a saved nvlist for the observation
    581 		 */
    582 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
    583 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
    584 		if (pkdlen != 0) {
    585 			pkd = MALLOC(pkdlen);
    586 			fmd_buf_read(fmep->hdl,
    587 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
    588 			ASSERT(ep->nvp == NULL);
    589 			if (nvlist_xunpack(pkd,
    590 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
    591 				out(O_DIE|O_SYS, "pack of observed nvl failed");
    592 			FREE(pkd);
    593 		}
    594 
    595 		if (ocnt == 0)
    596 			fmep->e0 = ep;
    597 
    598 		FREE(estr);
    599 		fmep->ecurrent = ep;
    600 		ep->count++;
    601 
    602 		/* link it into list of observations seen */
    603 		ep->observations = fmep->observations;
    604 		fmep->observations = ep;
    605 	}
    606 
    607 	if (ocnt == fmep->uniqobs) {
    608 		(void) fme_ready(fmep);
    609 		return (0);
    610 	}
    611 
    612 	return (1);
    613 }
    614 
    615 /*
    616  * restart_fme -- called during eft initialization.  Reconstitutes
    617  *	an in-progress fme.
    618  */
    619 void
    620 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
    621 {
    622 	nvlist_t *defect;
    623 	struct case_list *bad;
    624 	struct fme *fmep;
    625 	struct cfgdata *cfgdata;
    626 	size_t rawsz;
    627 	struct event *ep;
    628 	char *tmpbuf = alloca(OBBUFNMSZ);
    629 	char *sepptr;
    630 	char *estr;
    631 	int elen;
    632 	struct node *epnamenp = NULL;
    633 	int init_size;
    634 	extern int alloc_total();
    635 
    636 	/*
    637 	 * ignore solved or closed cases
    638 	 */
    639 	if (fmd_case_solved(hdl, inprogress) ||
    640 	    fmd_case_closed(hdl, inprogress))
    641 		return;
    642 
    643 	fmep = alloc_fme();
    644 	fmep->fmcase = inprogress;
    645 	fmep->hdl = hdl;
    646 
    647 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
    648 		out(O_ALTFP, "restart_fme: no saved posted status");
    649 		Undiag_reason = UD_VAL_MISSINGINFO;
    650 		goto badcase;
    651 	} else {
    652 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
    653 		    (void *)&fmep->posted_suspects,
    654 		    sizeof (fmep->posted_suspects));
    655 	}
    656 
    657 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
    658 		out(O_ALTFP, "restart_fme: no saved id");
    659 		Undiag_reason = UD_VAL_MISSINGINFO;
    660 		goto badcase;
    661 	} else {
    662 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
    663 		    sizeof (fmep->id));
    664 	}
    665 	if (Nextid <= fmep->id)
    666 		Nextid = fmep->id + 1;
    667 
    668 	out(O_ALTFP, "Replay FME %d", fmep->id);
    669 
    670 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
    671 		out(O_ALTFP, "restart_fme: No config data");
    672 		Undiag_reason = UD_VAL_MISSINGINFO;
    673 		goto badcase;
    674 	}
    675 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
    676 	    sizeof (size_t));
    677 
    678 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
    679 		out(O_ALTFP, "restart_fme: No event zero");
    680 		Undiag_reason = UD_VAL_MISSINGZERO;
    681 		goto badcase;
    682 	}
    683 
    684 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
    685 		out(O_ALTFP, "restart_fme: no saved wait time");
    686 		Undiag_reason = UD_VAL_MISSINGINFO;
    687 		goto badcase;
    688 	} else {
    689 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
    690 		    sizeof (fmep->pull));
    691 	}
    692 
    693 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
    694 		out(O_ALTFP, "restart_fme: no count of observations");
    695 		Undiag_reason = UD_VAL_MISSINGINFO;
    696 		goto badcase;
    697 	} else {
    698 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
    699 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
    700 	}
    701 
    702 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
    703 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
    704 	if (elen == 0) {
    705 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
    706 		    tmpbuf);
    707 		Undiag_reason = UD_VAL_MISSINGOBS;
    708 		goto badcase;
    709 	}
    710 	estr = MALLOC(elen);
    711 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
    712 	sepptr = strchr(estr, '@');
    713 	if (sepptr == NULL) {
    714 		out(O_ALTFP, "reconstitute_observation: %s: "
    715 		    "missing @ separator in %s.",
    716 		    tmpbuf, estr);
    717 		Undiag_reason = UD_VAL_MISSINGPATH;
    718 		FREE(estr);
    719 		goto badcase;
    720 	}
    721 	*sepptr = '\0';
    722 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
    723 		out(O_ALTFP, "reconstitute_observation: %s: "
    724 		    "trouble converting path string \"%s\" "
    725 		    "to internal representation.", tmpbuf, sepptr + 1);
    726 		Undiag_reason = UD_VAL_MISSINGPATH;
    727 		FREE(estr);
    728 		goto badcase;
    729 	}
    730 	prune_propagations(stable(estr), ipath(epnamenp));
    731 	tree_free(epnamenp);
    732 	FREE(estr);
    733 
    734 	init_size = alloc_total();
    735 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
    736 	cfgdata = MALLOC(sizeof (struct cfgdata));
    737 	cfgdata->cooked = NULL;
    738 	cfgdata->devcache = NULL;
    739 	cfgdata->devidcache = NULL;
    740 	cfgdata->cpucache = NULL;
    741 	cfgdata->raw_refcnt = 1;
    742 
    743 	if (rawsz > 0) {
    744 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
    745 			out(O_ALTFP, "restart_fme: Config data size mismatch");
    746 			Undiag_reason = UD_VAL_CFGMISMATCH;
    747 			goto badcase;
    748 		}
    749 		cfgdata->begin = MALLOC(rawsz);
    750 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
    751 		fmd_buf_read(hdl,
    752 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
    753 	} else {
    754 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
    755 	}
    756 
    757 	config_cook(cfgdata);
    758 	fmep->config = cfgdata->cooked;
    759 	config_free(cfgdata);
    760 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
    761 	    alloc_total() - init_size);
    762 
    763 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
    764 		/* case not properly saved or irretrievable */
    765 		out(O_ALTFP, "restart_fme: NULL instance tree");
    766 		Undiag_reason = UD_VAL_INSTFAIL;
    767 		goto badcase;
    768 	}
    769 
    770 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
    771 
    772 	if (reconstitute_observations(fmep) != 0)
    773 		goto badcase;
    774 
    775 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
    776 	for (ep = fmep->observations; ep; ep = ep->observations) {
    777 		out(O_ALTFP|O_NONL, " ");
    778 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
    779 	}
    780 	out(O_ALTFP, NULL);
    781 
    782 	Open_fme_count++;
    783 
    784 	/* give the diagnosis algorithm a shot at the new FME state */
    785 	fme_eval(fmep, fmep->e0r);
    786 	return;
    787 
    788 badcase:
    789 	if (fmep->eventtree != NULL)
    790 		itree_free(fmep->eventtree);
    791 	if (fmep->config)
    792 		structconfig_free(fmep->config);
    793 	destroy_fme_bufs(fmep);
    794 	FREE(fmep);
    795 
    796 	/*
    797 	 * Since we're unable to restart the case, add it to the undiagable
    798 	 * list and solve and close it as appropriate.
    799 	 */
    800 	bad = MALLOC(sizeof (struct case_list));
    801 	bad->next = NULL;
    802 
    803 	if (Undiagablecaselist != NULL)
    804 		bad->next = Undiagablecaselist;
    805 	Undiagablecaselist = bad;
    806 	bad->fmcase = inprogress;
    807 
    808 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
    809 	    fmd_case_uuid(hdl, bad->fmcase));
    810 
    811 	if (fmd_case_solved(hdl, bad->fmcase)) {
    812 		out(O_ALTFP|O_NONL, "already solved, ");
    813 	} else {
    814 		out(O_ALTFP|O_NONL, "solving, ");
    815 		defect = fmd_nvl_create_fault(hdl,
    816 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
    817 		(void) nvlist_add_string(defect, UNDIAG_REASON,
    818 		    undiag_2reason_str(Undiag_reason));
    819 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
    820 		fmd_case_solve(hdl, bad->fmcase);
    821 		Undiag_reason = UD_VAL_UNKNOWN;
    822 	}
    823 
    824 	if (fmd_case_closed(hdl, bad->fmcase)) {
    825 		out(O_ALTFP, "already closed ]");
    826 	} else {
    827 		out(O_ALTFP, "closing ]");
    828 		fmd_case_close(hdl, bad->fmcase);
    829 	}
    830 }
    831 
    832 /*ARGSUSED*/
    833 static void
    834 globals_destructor(void *left, void *right, void *arg)
    835 {
    836 	struct evalue *evp = (struct evalue *)right;
    837 	if (evp->t == NODEPTR)
    838 		tree_free((struct node *)(uintptr_t)evp->v);
    839 	evp->v = (uintptr_t)NULL;
    840 	FREE(evp);
    841 }
    842 
    843 void
    844 destroy_fme(struct fme *f)
    845 {
    846 	stats_delete(f->Rcount);
    847 	stats_delete(f->Hcallcount);
    848 	stats_delete(f->Rcallcount);
    849 	stats_delete(f->Ccallcount);
    850 	stats_delete(f->Ecallcount);
    851 	stats_delete(f->Tcallcount);
    852 	stats_delete(f->Marrowcount);
    853 	stats_delete(f->diags);
    854 
    855 	if (f->eventtree != NULL)
    856 		itree_free(f->eventtree);
    857 	if (f->config)
    858 		structconfig_free(f->config);
    859 	lut_free(f->globals, globals_destructor, NULL);
    860 	FREE(f);
    861 }
    862 
    863 static const char *
    864 fme_state2str(enum fme_state s)
    865 {
    866 	switch (s) {
    867 	case FME_NOTHING:	return ("NOTHING");
    868 	case FME_WAIT:		return ("WAIT");
    869 	case FME_CREDIBLE:	return ("CREDIBLE");
    870 	case FME_DISPROVED:	return ("DISPROVED");
    871 	case FME_DEFERRED:	return ("DEFERRED");
    872 	default:		return ("UNKNOWN");
    873 	}
    874 }
    875 
    876 static int
    877 is_problem(enum nametype t)
    878 {
    879 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
    880 }
    881 
    882 static int
    883 is_defect(enum nametype t)
    884 {
    885 	return (t == N_DEFECT);
    886 }
    887 
    888 static int
    889 is_upset(enum nametype t)
    890 {
    891 	return (t == N_UPSET);
    892 }
    893 
    894 static void
    895 fme_print(int flags, struct fme *fmep)
    896 {
    897 	struct event *ep;
    898 
    899 	out(flags, "Fault Management Exercise %d", fmep->id);
    900 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
    901 	out(flags|O_NONL, "\t  Start time: ");
    902 	ptree_timeval(flags|O_NONL, &fmep->ull);
    903 	out(flags, NULL);
    904 	if (fmep->wull) {
    905 		out(flags|O_NONL, "\t   Wait time: ");
    906 		ptree_timeval(flags|O_NONL, &fmep->wull);
    907 		out(flags, NULL);
    908 	}
    909 	out(flags|O_NONL, "\t          E0: ");
    910 	if (fmep->e0)
    911 		itree_pevent_brief(flags|O_NONL, fmep->e0);
    912 	else
    913 		out(flags|O_NONL, "NULL");
    914 	out(flags, NULL);
    915 	out(flags|O_NONL, "\tObservations:");
    916 	for (ep = fmep->observations; ep; ep = ep->observations) {
    917 		out(flags|O_NONL, " ");
    918 		itree_pevent_brief(flags|O_NONL, ep);
    919 	}
    920 	out(flags, NULL);
    921 	out(flags|O_NONL, "\tSuspect list:");
    922 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
    923 		out(flags|O_NONL, " ");
    924 		itree_pevent_brief(flags|O_NONL, ep);
    925 	}
    926 	out(flags, NULL);
    927 	if (fmep->eventtree != NULL) {
    928 		out(flags|O_VERB2, "\t        Tree:");
    929 		itree_ptree(flags|O_VERB2, fmep->eventtree);
    930 	}
    931 }
    932 
    933 static struct node *
    934 pathstring2epnamenp(char *path)
    935 {
    936 	char *sep = "/";
    937 	struct node *ret;
    938 	char *ptr;
    939 
    940 	if ((ptr = strtok(path, sep)) == NULL)
    941 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
    942 
    943 	ret = tree_iname(stable(ptr), NULL, 0);
    944 
    945 	while ((ptr = strtok(NULL, sep)) != NULL)
    946 		ret = tree_name_append(ret,
    947 		    tree_iname(stable(ptr), NULL, 0));
    948 
    949 	return (ret);
    950 }
    951 
    952 /*
    953  * for a given upset sp, increment the corresponding SERD engine.  if the
    954  * SERD engine trips, return the ename and ipp of the resulting ereport.
    955  * returns true if engine tripped and *enamep and *ippp were filled in.
    956  */
    957 static int
    958 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
    959     fmd_case_t *fmcase, struct event *sp, const char **enamep,
    960     const struct ipath **ippp)
    961 {
    962 	struct node *serdinst;
    963 	char *serdname;
    964 	char *serdresource;
    965 	char *serdclass;
    966 	struct node *nid;
    967 	struct serd_entry *newentp;
    968 	int i, serdn = -1, serdincrement = 1, len = 0;
    969 	char *serdsuffix = NULL, *serdt = NULL;
    970 	struct evalue *ep;
    971 
    972 	ASSERT(sp->t == N_UPSET);
    973 	ASSERT(ffep != NULL);
    974 
    975 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
    976 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
    977 		ASSERT(ep->t == UINT64);
    978 		serdn = (int)ep->v;
    979 	}
    980 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
    981 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
    982 		ASSERT(ep->t == STRING);
    983 		serdt = (char *)(uintptr_t)ep->v;
    984 	}
    985 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
    986 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
    987 		ASSERT(ep->t == STRING);
    988 		serdsuffix = (char *)(uintptr_t)ep->v;
    989 	}
    990 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
    991 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
    992 		ASSERT(ep->t == UINT64);
    993 		serdincrement = (int)ep->v;
    994 	}
    995 
    996 	/*
    997 	 * obtain instanced SERD engine from the upset sp.  from this
    998 	 * derive serdname, the string used to identify the SERD engine.
    999 	 */
   1000 	serdinst = eventprop_lookup(sp, L_engine);
   1001 
   1002 	if (serdinst == NULL)
   1003 		return (-1);
   1004 
   1005 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
   1006 	if (serdsuffix != NULL)
   1007 		len += strlen(serdsuffix);
   1008 	serdclass = MALLOC(len);
   1009 	if (serdsuffix != NULL)
   1010 		(void) snprintf(serdclass, len, "%s%s",
   1011 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
   1012 	else
   1013 		(void) snprintf(serdclass, len, "%s",
   1014 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
   1015 	serdresource = ipath2str(NULL,
   1016 	    ipath(serdinst->u.stmt.np->u.event.epname));
   1017 	len += strlen(serdresource) + 1;
   1018 	serdname = MALLOC(len);
   1019 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
   1020 	FREE(serdresource);
   1021 
   1022 	/* handle serd engine "id" property, if there is one */
   1023 	if ((nid =
   1024 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
   1025 		struct evalue *gval;
   1026 		char suffixbuf[200];
   1027 		char *suffix;
   1028 		char *nserdname;
   1029 		size_t nname;
   1030 
   1031 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
   1032 		ptree_name_iter(O_ALTFP|O_NONL, nid);
   1033 
   1034 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
   1035 
   1036 		if ((gval = lut_lookup(fmep->globals,
   1037 		    (void *)nid->u.globid.s, NULL)) == NULL) {
   1038 			out(O_ALTFP, " undefined");
   1039 		} else if (gval->t == UINT64) {
   1040 			out(O_ALTFP, " %llu", gval->v);
   1041 			(void) sprintf(suffixbuf, "%llu", gval->v);
   1042 			suffix = suffixbuf;
   1043 		} else {
   1044 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
   1045 			suffix = (char *)(uintptr_t)gval->v;
   1046 		}
   1047 
   1048 		nname = strlen(serdname) + strlen(suffix) + 2;
   1049 		nserdname = MALLOC(nname);
   1050 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
   1051 		FREE(serdname);
   1052 		serdname = nserdname;
   1053 	}
   1054 
   1055 	/*
   1056 	 * if the engine is empty, and we have an override for n/t then
   1057 	 * destroy and recreate it.
   1058 	 */
   1059 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
   1060 	    fmd_serd_empty(hdl, serdname))
   1061 		fmd_serd_destroy(hdl, serdname);
   1062 
   1063 	if (!fmd_serd_exists(hdl, serdname)) {
   1064 		struct node *nN, *nT;
   1065 		const char *s;
   1066 		struct node *nodep;
   1067 		struct config *cp;
   1068 		char *path;
   1069 		uint_t nval;
   1070 		hrtime_t tval;
   1071 		int i;
   1072 		char *ptr;
   1073 		int got_n_override = 0, got_t_override = 0;
   1074 
   1075 		/* no SERD engine yet, so create it */
   1076 		nodep = serdinst->u.stmt.np->u.event.epname;
   1077 		path = ipath2str(NULL, ipath(nodep));
   1078 		cp = config_lookup(fmep->config, path, 0);
   1079 		FREE((void *)path);
   1080 
   1081 		/*
   1082 		 * We allow serd paramaters to be overridden, either from
   1083 		 * eft.conf file values (if Serd_Override is set) or from
   1084 		 * driver properties (for "serd.io.device" engines).
   1085 		 */
   1086 		if (Serd_Override != NULL) {
   1087 			char *save_ptr, *ptr1, *ptr2, *ptr3;
   1088 			ptr3 = save_ptr = STRDUP(Serd_Override);
   1089 			while (*ptr3 != '\0') {
   1090 				ptr1 = strchr(ptr3, ',');
   1091 				*ptr1 = '\0';
   1092 				if (strcmp(ptr3, serdclass) == 0) {
   1093 					ptr2 =  strchr(ptr1 + 1, ',');
   1094 					*ptr2 = '\0';
   1095 					nval = atoi(ptr1 + 1);
   1096 					out(O_ALTFP, "serd override %s_n %d",
   1097 					    serdclass, nval);
   1098 					ptr3 =  strchr(ptr2 + 1, ' ');
   1099 					if (ptr3)
   1100 						*ptr3 = '\0';
   1101 					ptr = STRDUP(ptr2 + 1);
   1102 					out(O_ALTFP, "serd override %s_t %s",
   1103 					    serdclass, ptr);
   1104 					got_n_override = 1;
   1105 					got_t_override = 1;
   1106 					break;
   1107 				} else {
   1108 					ptr2 =  strchr(ptr1 + 1, ',');
   1109 					ptr3 =  strchr(ptr2 + 1, ' ');
   1110 					if (ptr3 == NULL)
   1111 						break;
   1112 				}
   1113 				ptr3++;
   1114 			}
   1115 			FREE(save_ptr);
   1116 		}
   1117 
   1118 		if (cp && got_n_override == 0) {
   1119 			/*
   1120 			 * convert serd engine class into property name
   1121 			 */
   1122 			char *prop_name = MALLOC(strlen(serdclass) + 3);
   1123 			for (i = 0; i < strlen(serdclass); i++) {
   1124 				if (serdclass[i] == '.')
   1125 					prop_name[i] = '_';
   1126 				else
   1127 					prop_name[i] = serdclass[i];
   1128 			}
   1129 			prop_name[i++] = '_';
   1130 			prop_name[i++] = 'n';
   1131 			prop_name[i] = '\0';
   1132 			if (s = config_getprop(cp, prop_name)) {
   1133 				nval = atoi(s);
   1134 				out(O_ALTFP, "serd override %s_n %s",
   1135 				    serdclass, s);
   1136 				got_n_override = 1;
   1137 			}
   1138 			prop_name[i - 1] = 't';
   1139 			if (s = config_getprop(cp, prop_name)) {
   1140 				ptr = STRDUP(s);
   1141 				out(O_ALTFP, "serd override %s_t %s",
   1142 				    serdclass, s);
   1143 				got_t_override = 1;
   1144 			}
   1145 			FREE(prop_name);
   1146 		}
   1147 
   1148 		if (serdn != -1 && got_n_override == 0) {
   1149 			nval = serdn;
   1150 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
   1151 			got_n_override = 1;
   1152 		}
   1153 		if (serdt != NULL && got_t_override == 0) {
   1154 			ptr = STRDUP(serdt);
   1155 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
   1156 			got_t_override = 1;
   1157 		}
   1158 
   1159 		if (!got_n_override) {
   1160 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
   1161 			    NULL);
   1162 			ASSERT(nN->t == T_NUM);
   1163 			nval = (uint_t)nN->u.ull;
   1164 		}
   1165 		if (!got_t_override) {
   1166 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
   1167 			    NULL);
   1168 			ASSERT(nT->t == T_TIMEVAL);
   1169 			tval = (hrtime_t)nT->u.ull;
   1170 		} else {
   1171 			const unsigned long long *ullp;
   1172 			const char *suffix;
   1173 			int len;
   1174 
   1175 			len = strspn(ptr, "0123456789");
   1176 			suffix = stable(&ptr[len]);
   1177 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
   1178 			    (void *)suffix, NULL);
   1179 			ptr[len] = '\0';
   1180 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
   1181 			FREE(ptr);
   1182 		}
   1183 		fmd_serd_create(hdl, serdname, nval, tval);
   1184 	}
   1185 
   1186 	newentp = MALLOC(sizeof (*newentp));
   1187 	newentp->ename = stable(serdclass);
   1188 	FREE(serdclass);
   1189 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
   1190 	newentp->hdl = hdl;
   1191 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
   1192 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
   1193 		    (void *)newentp, (lut_cmp)serd_cmp);
   1194 		Serd_need_save = 1;
   1195 		serd_save();
   1196 	} else {
   1197 		FREE(newentp);
   1198 	}
   1199 
   1200 
   1201 	/*
   1202 	 * increment SERD engine.  if engine fires, reset serd
   1203 	 * engine and return trip_strcode if required.
   1204 	 */
   1205 	for (i = 0; i < serdincrement; i++) {
   1206 		if (fmd_serd_record(hdl, serdname, ffep)) {
   1207 			fmd_case_add_serd(hdl, fmcase, serdname);
   1208 			fmd_serd_reset(hdl, serdname);
   1209 
   1210 			if (ippp) {
   1211 				struct node *tripinst =
   1212 				    lut_lookup(serdinst->u.stmt.lutp,
   1213 				    (void *)L_trip, NULL);
   1214 				ASSERT(tripinst != NULL);
   1215 				*enamep = tripinst->u.event.ename->u.name.s;
   1216 				*ippp = ipath(tripinst->u.event.epname);
   1217 				out(O_ALTFP|O_NONL,
   1218 				    "[engine fired: %s, sending: ", serdname);
   1219 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
   1220 				out(O_ALTFP, "]");
   1221 			} else {
   1222 				out(O_ALTFP, "[engine fired: %s, no trip]",
   1223 				    serdname);
   1224 			}
   1225 			FREE(serdname);
   1226 			return (1);
   1227 		}
   1228 	}
   1229 
   1230 	FREE(serdname);
   1231 	return (0);
   1232 }
   1233 
   1234 /*
   1235  * search a suspect list for upsets.  feed each upset to serd_eval() and
   1236  * build up tripped[], an array of ereports produced by the firing of
   1237  * any SERD engines.  then feed each ereport back into
   1238  * fme_receive_report().
   1239  *
   1240  * returns ntrip, the number of these ereports produced.
   1241  */
   1242 static int
   1243 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
   1244 {
   1245 	/* we build an array of tripped ereports that we send ourselves */
   1246 	struct {
   1247 		const char *ename;
   1248 		const struct ipath *ipp;
   1249 	} *tripped;
   1250 	struct event *sp;
   1251 	int ntrip, nupset, i;
   1252 
   1253 	/*
   1254 	 * count the number of upsets to determine the upper limit on
   1255 	 * expected trip ereport strings.  remember that one upset can
   1256 	 * lead to at most one ereport.
   1257 	 */
   1258 	nupset = 0;
   1259 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
   1260 		if (sp->t == N_UPSET)
   1261 			nupset++;
   1262 	}
   1263 
   1264 	if (nupset == 0)
   1265 		return (0);
   1266 
   1267 	/*
   1268 	 * get to this point if we have upsets and expect some trip
   1269 	 * ereports
   1270 	 */
   1271 	tripped = alloca(sizeof (*tripped) * nupset);
   1272 	bzero((void *)tripped, sizeof (*tripped) * nupset);
   1273 
   1274 	ntrip = 0;
   1275 	for (sp = fmep->suspects; sp; sp = sp->suspects)
   1276 		if (sp->t == N_UPSET &&
   1277 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
   1278 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
   1279 			ntrip++;
   1280 
   1281 	for (i = 0; i < ntrip; i++) {
   1282 		struct event *ep, *nep;
   1283 		struct fme *nfmep;
   1284 		fmd_case_t *fmcase;
   1285 		const struct ipath *ipp;
   1286 		const char *eventstring;
   1287 		int prev_verbose;
   1288 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
   1289 		enum fme_state state;
   1290 
   1291 		/*
   1292 		 * First try and evaluate a case with the trip ereport plus
   1293 		 * all the other ereports that cause the trip. If that fails
   1294 		 * to evaluate then try again with just this ereport on its own.
   1295 		 */
   1296 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
   1297 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
   1298 		out(O_ALTFP|O_STAMP, NULL);
   1299 		ep = fmep->e0;
   1300 		eventstring = ep->enode->u.event.ename->u.name.s;
   1301 		ipp = ep->ipp;
   1302 		prune_propagations(eventstring, ipp);
   1303 
   1304 		/*
   1305 		 * create a duplicate fme and case
   1306 		 */
   1307 		fmcase = fmd_case_open(fmep->hdl, NULL);
   1308 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
   1309 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1310 		out(O_ALTFP, " ]");
   1311 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
   1312 		    fmcase)) == NULL) {
   1313 			out(O_ALTFP|O_NONL, "[");
   1314 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1315 			out(O_ALTFP, " CANNOT DIAGNOSE]");
   1316 			publish_undiagnosable(fmep->hdl, ffep, fmcase);
   1317 			continue;
   1318 		}
   1319 		Open_fme_count++;
   1320 		nfmep->pull = fmep->pull;
   1321 		init_fme_bufs(nfmep);
   1322 		out(O_ALTFP|O_NONL, "[");
   1323 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1324 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
   1325 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
   1326 		if (ffep) {
   1327 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
   1328 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
   1329 			nfmep->e0r = ffep;
   1330 		}
   1331 
   1332 		/*
   1333 		 * add the original ereports
   1334 		 */
   1335 		for (ep = fmep->observations; ep; ep = ep->observations) {
   1336 			eventstring = ep->enode->u.event.ename->u.name.s;
   1337 			ipp = ep->ipp;
   1338 			out(O_ALTFP|O_NONL, "adding event [");
   1339 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1340 			out(O_ALTFP, " ]");
   1341 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
   1342 			if (nep->count++ == 0) {
   1343 				nep->observations = nfmep->observations;
   1344 				nfmep->observations = nep;
   1345 				serialize_observation(nfmep, eventstring, ipp);
   1346 				nep->nvp = evnv_dupnvl(ep->nvp);
   1347 			}
   1348 			if (ep->ffep && ep->ffep != ffep)
   1349 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
   1350 				    ep->ffep);
   1351 			stats_counter_bump(nfmep->Rcount);
   1352 		}
   1353 
   1354 		/*
   1355 		 * add the serd trigger ereport
   1356 		 */
   1357 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
   1358 		    tripped[i].ipp)) == NULL) {
   1359 			/*
   1360 			 * The trigger ereport is not in the instance tree. It
   1361 			 * was presumably removed by prune_propagations() as
   1362 			 * this combination of events is not present in the
   1363 			 * rules.
   1364 			 */
   1365 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
   1366 			Undiag_reason = UD_VAL_BADEVENTI;
   1367 			goto retry_lone_ereport;
   1368 		}
   1369 		out(O_ALTFP|O_NONL, "adding event [");
   1370 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
   1371 		out(O_ALTFP, " ]");
   1372 		nfmep->ecurrent = ep;
   1373 		ep->nvp = NULL;
   1374 		ep->count = 1;
   1375 		ep->observations = nfmep->observations;
   1376 		nfmep->observations = ep;
   1377 
   1378 		/*
   1379 		 * just peek first.
   1380 		 */
   1381 		nfmep->peek = 1;
   1382 		prev_verbose = Verbose;
   1383 		if (Debug == 0)
   1384 			Verbose = 0;
   1385 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
   1386 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
   1387 		nfmep->peek = 0;
   1388 		Verbose = prev_verbose;
   1389 		if (state == FME_DISPROVED) {
   1390 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
   1391 			Undiag_reason = UD_VAL_UNSOLVD;
   1392 retry_lone_ereport:
   1393 			/*
   1394 			 * However the trigger ereport on its own might be
   1395 			 * diagnosable, so check for that. Undo the new fme
   1396 			 * and case we just created and call fme_receive_report.
   1397 			 */
   1398 			out(O_ALTFP|O_NONL, "[");
   1399 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
   1400 			    tripped[i].ipp);
   1401 			out(O_ALTFP, " retrying with just trigger ereport]");
   1402 			itree_free(nfmep->eventtree);
   1403 			nfmep->eventtree = NULL;
   1404 			structconfig_free(nfmep->config);
   1405 			nfmep->config = NULL;
   1406 			destroy_fme_bufs(nfmep);
   1407 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
   1408 			fme_receive_report(fmep->hdl, ffep,
   1409 			    tripped[i].ename, tripped[i].ipp, NULL);
   1410 			continue;
   1411 		}
   1412 
   1413 		/*
   1414 		 * and evaluate
   1415 		 */
   1416 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
   1417 		fme_eval(nfmep, ffep);
   1418 	}
   1419 
   1420 	return (ntrip);
   1421 }
   1422 
   1423 /*
   1424  * fme_receive_external_report -- call when an external ereport comes in
   1425  *
   1426  * this routine just converts the relevant information from the ereport
   1427  * into a format used internally and passes it on to fme_receive_report().
   1428  */
   1429 void
   1430 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
   1431     const char *class)
   1432 {
   1433 	struct node		*epnamenp;
   1434 	fmd_case_t		*fmcase;
   1435 	const struct ipath	*ipp;
   1436 
   1437 	class = stable(class);
   1438 
   1439 	/* Get the component path from the ereport */
   1440 	epnamenp = platform_getpath(nvl);
   1441 
   1442 	/* See if we ended up without a path. */
   1443 	if (epnamenp == NULL) {
   1444 		/* See if class permits silent discard on unknown component. */
   1445 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
   1446 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
   1447 			    "to component path, but silent discard allowed.",
   1448 			    class);
   1449 		} else {
   1450 			/*
   1451 			 * XFILE: Failure to find a component is bad unless
   1452 			 * 'discard_if_config_unknown=1' was specified in the
   1453 			 * ereport definition. Indicate undiagnosable.
   1454 			 */
   1455 			out(O_ALTFP, "XFILE: Unable to map \"%s\" ereport "
   1456 			    "to component path.", class);
   1457 			Undiag_reason = UD_VAL_NOPATH;
   1458 			fmcase = fmd_case_open(hdl, NULL);
   1459 			publish_undiagnosable(hdl, ffep, fmcase);
   1460 		}
   1461 		return;
   1462 	}
   1463 
   1464 	ipp = ipath(epnamenp);
   1465 	tree_free(epnamenp);
   1466 	fme_receive_report(hdl, ffep, class, ipp, nvl);
   1467 }
   1468 
   1469 /*ARGSUSED*/
   1470 void
   1471 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
   1472     const char *eventstring)
   1473 {
   1474 	char *uuid;
   1475 	nvlist_t **nva;
   1476 	uint_t nvc;
   1477 	const struct ipath *ipp;
   1478 
   1479 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
   1480 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
   1481 	    &nva, &nvc) != 0) {
   1482 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
   1483 		return;
   1484 	}
   1485 
   1486 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
   1487 
   1488 	while (nvc-- != 0) {
   1489 		/*
   1490 		 * Reset any istat or serd engine associated with this path.
   1491 		 */
   1492 		char *path;
   1493 
   1494 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
   1495 			continue;
   1496 
   1497 		path = ipath2str(NULL, ipp);
   1498 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
   1499 		    path);
   1500 		FREE(path);
   1501 
   1502 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
   1503 		istat_save();
   1504 
   1505 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
   1506 		serd_save();
   1507 	}
   1508 }
   1509 
   1510 /*ARGSUSED*/
   1511 void
   1512 fme_receive_topology_change(void)
   1513 {
   1514 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
   1515 	istat_save();
   1516 
   1517 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
   1518 	serd_save();
   1519 }
   1520 
   1521 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
   1522     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
   1523 
   1524 /* ARGSUSED */
   1525 static void
   1526 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
   1527 {
   1528 	struct bubble *bp;
   1529 	struct arrowlist *ap;
   1530 
   1531 	ep->cached_state = 0;
   1532 	ep->keep_in_tree = 0;
   1533 	for (bp = itree_next_bubble(ep, NULL); bp;
   1534 	    bp = itree_next_bubble(ep, bp)) {
   1535 		if (bp->t != B_FROM)
   1536 			continue;
   1537 		bp->mark = 0;
   1538 		for (ap = itree_next_arrow(bp, NULL); ap;
   1539 		    ap = itree_next_arrow(bp, ap))
   1540 			ap->arrowp->mark = 0;
   1541 	}
   1542 }
   1543 
   1544 static void
   1545 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
   1546     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
   1547 {
   1548 	struct event *ep;
   1549 	struct fme *fmep = NULL;
   1550 	struct fme *ofmep = NULL;
   1551 	struct fme *cfmep, *svfmep;
   1552 	int matched = 0;
   1553 	nvlist_t *defect;
   1554 	fmd_case_t *fmcase;
   1555 
   1556 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
   1557 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1558 	out(O_ALTFP|O_STAMP, NULL);
   1559 
   1560 	/* decide which FME it goes to */
   1561 	for (fmep = FMElist; fmep; fmep = fmep->next) {
   1562 		int prev_verbose;
   1563 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
   1564 		enum fme_state state;
   1565 		nvlist_t *pre_peek_nvp = NULL;
   1566 
   1567 		if (fmep->overflow) {
   1568 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
   1569 				ofmep = fmep;
   1570 
   1571 			continue;
   1572 		}
   1573 
   1574 		/*
   1575 		 * ignore solved or closed cases
   1576 		 */
   1577 		if (fmep->posted_suspects ||
   1578 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
   1579 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
   1580 			continue;
   1581 
   1582 		/* look up event in event tree for this FME */
   1583 		if ((ep = itree_lookup(fmep->eventtree,
   1584 		    eventstring, ipp)) == NULL)
   1585 			continue;
   1586 
   1587 		/* note observation */
   1588 		fmep->ecurrent = ep;
   1589 		if (ep->count++ == 0) {
   1590 			/* link it into list of observations seen */
   1591 			ep->observations = fmep->observations;
   1592 			fmep->observations = ep;
   1593 			ep->nvp = evnv_dupnvl(nvl);
   1594 		} else {
   1595 			/* use new payload values for peek */
   1596 			pre_peek_nvp = ep->nvp;
   1597 			ep->nvp = evnv_dupnvl(nvl);
   1598 		}
   1599 
   1600 		/* tell hypothesise() not to mess with suspect list */
   1601 		fmep->peek = 1;
   1602 
   1603 		/* don't want this to be verbose (unless Debug is set) */
   1604 		prev_verbose = Verbose;
   1605 		if (Debug == 0)
   1606 			Verbose = 0;
   1607 
   1608 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
   1609 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
   1610 
   1611 		fmep->peek = 0;
   1612 
   1613 		/* put verbose flag back */
   1614 		Verbose = prev_verbose;
   1615 
   1616 		if (state != FME_DISPROVED) {
   1617 			/* found an FME that explains the ereport */
   1618 			matched++;
   1619 			out(O_ALTFP|O_NONL, "[");
   1620 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1621 			out(O_ALTFP, " explained by FME%d]", fmep->id);
   1622 
   1623 			if (pre_peek_nvp)
   1624 				nvlist_free(pre_peek_nvp);
   1625 
   1626 			if (ep->count == 1)
   1627 				serialize_observation(fmep, eventstring, ipp);
   1628 
   1629 			if (ffep) {
   1630 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
   1631 				ep->ffep = ffep;
   1632 			}
   1633 
   1634 			stats_counter_bump(fmep->Rcount);
   1635 
   1636 			/* re-eval FME */
   1637 			fme_eval(fmep, ffep);
   1638 		} else {
   1639 
   1640 			/* not a match, undo noting of observation */
   1641 			fmep->ecurrent = NULL;
   1642 			if (--ep->count == 0) {
   1643 				/* unlink it from observations */
   1644 				fmep->observations = ep->observations;
   1645 				ep->observations = NULL;
   1646 				nvlist_free(ep->nvp);
   1647 				ep->nvp = NULL;
   1648 			} else {
   1649 				nvlist_free(ep->nvp);
   1650 				ep->nvp = pre_peek_nvp;
   1651 			}
   1652 		}
   1653 	}
   1654 
   1655 	if (matched)
   1656 		return;	/* explained by at least one existing FME */
   1657 
   1658 	/* clean up closed fmes */
   1659 	cfmep = ClosedFMEs;
   1660 	while (cfmep != NULL) {
   1661 		svfmep = cfmep->next;
   1662 		destroy_fme(cfmep);
   1663 		cfmep = svfmep;
   1664 	}
   1665 	ClosedFMEs = NULL;
   1666 	prune_propagations(eventstring, ipp);
   1667 
   1668 	if (ofmep) {
   1669 		out(O_ALTFP|O_NONL, "[");
   1670 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1671 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
   1672 		if (ffep)
   1673 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
   1674 
   1675 		return;
   1676 
   1677 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
   1678 		out(O_ALTFP|O_NONL, "[");
   1679 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1680 		out(O_ALTFP, " MAX OPEN FME REACHED]");
   1681 
   1682 		fmcase = fmd_case_open(hdl, NULL);
   1683 
   1684 		/* Create overflow fme */
   1685 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
   1686 			out(O_ALTFP|O_NONL, "[");
   1687 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1688 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
   1689 			publish_undiagnosable(hdl, ffep, fmcase);
   1690 			return;
   1691 		}
   1692 
   1693 		Open_fme_count++;
   1694 
   1695 		init_fme_bufs(fmep);
   1696 		fmep->overflow = B_TRUE;
   1697 
   1698 		if (ffep)
   1699 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
   1700 
   1701 		Undiag_reason = UD_VAL_MAXFME;
   1702 		defect = fmd_nvl_create_fault(hdl,
   1703 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
   1704 		(void) nvlist_add_string(defect, UNDIAG_REASON,
   1705 		    undiag_2reason_str(Undiag_reason));
   1706 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
   1707 		fmd_case_solve(hdl, fmep->fmcase);
   1708 		Undiag_reason = UD_VAL_UNKNOWN;
   1709 		return;
   1710 	}
   1711 
   1712 	/* open a case */
   1713 	fmcase = fmd_case_open(hdl, NULL);
   1714 
   1715 	/* start a new FME */
   1716 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
   1717 		out(O_ALTFP|O_NONL, "[");
   1718 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1719 		out(O_ALTFP, " CANNOT DIAGNOSE]");
   1720 		publish_undiagnosable(hdl, ffep, fmcase);
   1721 		return;
   1722 	}
   1723 
   1724 	Open_fme_count++;
   1725 
   1726 	init_fme_bufs(fmep);
   1727 
   1728 	out(O_ALTFP|O_NONL, "[");
   1729 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1730 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
   1731 	    fmd_case_uuid(hdl, fmep->fmcase));
   1732 
   1733 	ep = fmep->e0;
   1734 	ASSERT(ep != NULL);
   1735 
   1736 	/* note observation */
   1737 	fmep->ecurrent = ep;
   1738 	if (ep->count++ == 0) {
   1739 		/* link it into list of observations seen */
   1740 		ep->observations = fmep->observations;
   1741 		fmep->observations = ep;
   1742 		ep->nvp = evnv_dupnvl(nvl);
   1743 		serialize_observation(fmep, eventstring, ipp);
   1744 	} else {
   1745 		/* new payload overrides any previous */
   1746 		nvlist_free(ep->nvp);
   1747 		ep->nvp = evnv_dupnvl(nvl);
   1748 	}
   1749 
   1750 	stats_counter_bump(fmep->Rcount);
   1751 
   1752 	if (ffep) {
   1753 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
   1754 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
   1755 		fmep->e0r = ffep;
   1756 		ep->ffep = ffep;
   1757 	}
   1758 
   1759 	/* give the diagnosis algorithm a shot at the new FME state */
   1760 	fme_eval(fmep, ffep);
   1761 }
   1762 
   1763 void
   1764 fme_status(int flags)
   1765 {
   1766 	struct fme *fmep;
   1767 
   1768 	if (FMElist == NULL) {
   1769 		out(flags, "No fault management exercises underway.");
   1770 		return;
   1771 	}
   1772 
   1773 	for (fmep = FMElist; fmep; fmep = fmep->next)
   1774 		fme_print(flags, fmep);
   1775 }
   1776 
   1777 /*
   1778  * "indent" routines used mostly for nicely formatted debug output, but also
   1779  * for sanity checking for infinite recursion bugs.
   1780  */
   1781 
   1782 #define	MAX_INDENT 1024
   1783 static const char *indent_s[MAX_INDENT];
   1784 static int current_indent;
   1785 
   1786 static void
   1787 indent_push(const char *s)
   1788 {
   1789 	if (current_indent < MAX_INDENT)
   1790 		indent_s[current_indent++] = s;
   1791 	else
   1792 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
   1793 }
   1794 
   1795 static void
   1796 indent_set(const char *s)
   1797 {
   1798 	current_indent = 0;
   1799 	indent_push(s);
   1800 }
   1801 
   1802 static void
   1803 indent_pop(void)
   1804 {
   1805 	if (current_indent > 0)
   1806 		current_indent--;
   1807 	else
   1808 		out(O_DIE, "recursion underflow");
   1809 }
   1810 
   1811 static void
   1812 indent(void)
   1813 {
   1814 	int i;
   1815 	if (!Verbose)
   1816 		return;
   1817 	for (i = 0; i < current_indent; i++)
   1818 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
   1819 }
   1820 
   1821 #define	SLNEW		1
   1822 #define	SLCHANGED	2
   1823 #define	SLWAIT		3
   1824 #define	SLDISPROVED	4
   1825 
   1826 static void
   1827 print_suspects(int circumstance, struct fme *fmep)
   1828 {
   1829 	struct event *ep;
   1830 
   1831 	out(O_ALTFP|O_NONL, "[");
   1832 	if (circumstance == SLCHANGED) {
   1833 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
   1834 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
   1835 	} else if (circumstance == SLWAIT) {
   1836 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
   1837 		    fmep->timer);
   1838 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
   1839 	} else if (circumstance == SLDISPROVED) {
   1840 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
   1841 	} else {
   1842 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
   1843 	}
   1844 
   1845 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
   1846 		out(O_ALTFP, "]");
   1847 		return;
   1848 	}
   1849 
   1850 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
   1851 		out(O_ALTFP|O_NONL, " ");
   1852 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
   1853 	}
   1854 	out(O_ALTFP, "]");
   1855 }
   1856 
   1857 static struct node *
   1858 eventprop_lookup(struct event *ep, const char *propname)
   1859 {
   1860 	return (lut_lookup(ep->props, (void *)propname, NULL));
   1861 }
   1862 
   1863 #define	MAXDIGITIDX	23
   1864 static char numbuf[MAXDIGITIDX + 1];
   1865 
   1866 static int
   1867 node2uint(struct node *n, uint_t *valp)
   1868 {
   1869 	struct evalue value;
   1870 	struct lut *globals = NULL;
   1871 
   1872 	if (n == NULL)
   1873 		return (1);
   1874 
   1875 	/*
   1876 	 * check value.v since we are being asked to convert an unsigned
   1877 	 * long long int to an unsigned int
   1878 	 */
   1879 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
   1880 	    value.t != UINT64 || value.v > (1ULL << 32))
   1881 		return (1);
   1882 
   1883 	*valp = (uint_t)value.v;
   1884 
   1885 	return (0);
   1886 }
   1887 
   1888 static nvlist_t *
   1889 node2fmri(struct node *n)
   1890 {
   1891 	nvlist_t **pa, *f, *p;
   1892 	struct node *nc;
   1893 	uint_t depth = 0;
   1894 	char *numstr, *nullbyte;
   1895 	char *failure;
   1896 	int err, i;
   1897 
   1898 	/* XXX do we need to be able to handle a non-T_NAME node? */
   1899 	if (n == NULL || n->t != T_NAME)
   1900 		return (NULL);
   1901 
   1902 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
   1903 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
   1904 			break;
   1905 		depth++;
   1906 	}
   1907 
   1908 	if (nc != NULL) {
   1909 		/* We bailed early, something went wrong */
   1910 		return (NULL);
   1911 	}
   1912 
   1913 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
   1914 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
   1915 	pa = alloca(depth * sizeof (nvlist_t *));
   1916 	for (i = 0; i < depth; i++)
   1917 		pa[i] = NULL;
   1918 
   1919 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
   1920 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
   1921 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
   1922 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
   1923 	if (err != 0) {
   1924 		failure = "basic construction of FMRI failed";
   1925 		goto boom;
   1926 	}
   1927 
   1928 	numbuf[MAXDIGITIDX] = '\0';
   1929 	nullbyte = &numbuf[MAXDIGITIDX];
   1930 	i = 0;
   1931 
   1932 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
   1933 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
   1934 		if (err != 0) {
   1935 			failure = "alloc of an hc-pair failed";
   1936 			goto boom;
   1937 		}
   1938 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
   1939 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
   1940 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
   1941 		if (err != 0) {
   1942 			failure = "construction of an hc-pair failed";
   1943 			goto boom;
   1944 		}
   1945 		pa[i++] = p;
   1946 	}
   1947 
   1948 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
   1949 	if (err == 0) {
   1950 		for (i = 0; i < depth; i++)
   1951 			if (pa[i] != NULL)
   1952 				nvlist_free(pa[i]);
   1953 		return (f);
   1954 	}
   1955 	failure = "addition of hc-pair array to FMRI failed";
   1956 
   1957 boom:
   1958 	for (i = 0; i < depth; i++)
   1959 		if (pa[i] != NULL)
   1960 			nvlist_free(pa[i]);
   1961 	nvlist_free(f);
   1962 	out(O_DIE, "%s", failure);
   1963 	/*NOTREACHED*/
   1964 	return (NULL);
   1965 }
   1966 
   1967 /* an ipath cache entry is an array of these, with s==NULL at the end */
   1968 struct ipath {
   1969 	const char *s;	/* component name (in stable) */
   1970 	int i;		/* instance number */
   1971 };
   1972 
   1973 static nvlist_t *
   1974 ipath2fmri(struct ipath *ipath)
   1975 {
   1976 	nvlist_t **pa, *f, *p;
   1977 	uint_t depth = 0;
   1978 	char *numstr, *nullbyte;
   1979 	char *failure;
   1980 	int err, i;
   1981 	struct ipath *ipp;
   1982 
   1983 	for (ipp = ipath; ipp->s != NULL; ipp++)
   1984 		depth++;
   1985 
   1986 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
   1987 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
   1988 	pa = alloca(depth * sizeof (nvlist_t *));
   1989 	for (i = 0; i < depth; i++)
   1990 		pa[i] = NULL;
   1991 
   1992 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
   1993 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
   1994 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
   1995 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
   1996 	if (err != 0) {
   1997 		failure = "basic construction of FMRI failed";
   1998 		goto boom;
   1999 	}
   2000 
   2001 	numbuf[MAXDIGITIDX] = '\0';
   2002 	nullbyte = &numbuf[MAXDIGITIDX];
   2003 	i = 0;
   2004 
   2005 	for (ipp = ipath; ipp->s != NULL; ipp++) {
   2006 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
   2007 		if (err != 0) {
   2008 			failure = "alloc of an hc-pair failed";
   2009 			goto boom;
   2010 		}
   2011 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
   2012 		numstr = ulltostr(ipp->i, nullbyte);
   2013 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
   2014 		if (err != 0) {
   2015 			failure = "construction of an hc-pair failed";
   2016 			goto boom;
   2017 		}
   2018 		pa[i++] = p;
   2019 	}
   2020 
   2021 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
   2022 	if (err == 0) {
   2023 		for (i = 0; i < depth; i++)
   2024 			if (pa[i] != NULL)
   2025 				nvlist_free(pa[i]);
   2026 		return (f);
   2027 	}
   2028 	failure = "addition of hc-pair array to FMRI failed";
   2029 
   2030 boom:
   2031 	for (i = 0; i < depth; i++)
   2032 		if (pa[i] != NULL)
   2033 			nvlist_free(pa[i]);
   2034 	nvlist_free(f);
   2035 	out(O_DIE, "%s", failure);
   2036 	/*NOTREACHED*/
   2037 	return (NULL);
   2038 }
   2039 
   2040 static uint8_t
   2041 percentof(uint_t part, uint_t whole)
   2042 {
   2043 	unsigned long long p = part * 1000;
   2044 
   2045 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
   2046 }
   2047 
   2048 struct rsl {
   2049 	struct event *suspect;
   2050 	nvlist_t *asru;
   2051 	nvlist_t *fru;
   2052 	nvlist_t *rsrc;
   2053 };
   2054 
   2055 static void publish_suspects(struct fme *fmep, struct rsl *srl);
   2056 
   2057 /*
   2058  *  rslfree -- free internal members of struct rsl not expected to be
   2059  *	freed elsewhere.
   2060  */
   2061 static void
   2062 rslfree(struct rsl *freeme)
   2063 {
   2064 	if (freeme->asru != NULL)
   2065 		nvlist_free(freeme->asru);
   2066 	if (freeme->fru != NULL)
   2067 		nvlist_free(freeme->fru);
   2068 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
   2069 		nvlist_free(freeme->rsrc);
   2070 }
   2071 
   2072 /*
   2073  *  rslcmp -- compare two rsl structures.  Use the following
   2074  *	comparisons to establish cardinality:
   2075  *
   2076  *	1. Name of the suspect's class. (simple strcmp)
   2077  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
   2078  *
   2079  */
   2080 static int
   2081 rslcmp(const void *a, const void *b)
   2082 {
   2083 	struct rsl *r1 = (struct rsl *)a;
   2084 	struct rsl *r2 = (struct rsl *)b;
   2085 	int rv;
   2086 
   2087 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
   2088 	    r2->suspect->enode->u.event.ename->u.name.s);
   2089 	if (rv != 0)
   2090 		return (rv);
   2091 
   2092 	if (r1->rsrc == NULL && r2->rsrc == NULL)
   2093 		return (0);
   2094 	if (r1->rsrc == NULL)
   2095 		return (-1);
   2096 	if (r2->rsrc == NULL)
   2097 		return (1);
   2098 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
   2099 }
   2100 
   2101 /*
   2102  * get_resources -- for a given suspect, determine what ASRU, FRU and
   2103  *     RSRC nvlists should be advertised in the final suspect list.
   2104  */
   2105 void
   2106 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
   2107 {
   2108 	struct node *asrudef, *frudef;
   2109 	nvlist_t *asru, *fru;
   2110 	nvlist_t *rsrc = NULL;
   2111 	char *pathstr;
   2112 
   2113 	/*
   2114 	 * First find any ASRU and/or FRU defined in the
   2115 	 * initial fault tree.
   2116 	 */
   2117 	asrudef = eventprop_lookup(sp, L_ASRU);
   2118 	frudef = eventprop_lookup(sp, L_FRU);
   2119 
   2120 	/*
   2121 	 * Create FMRIs based on those definitions
   2122 	 */
   2123 	asru = node2fmri(asrudef);
   2124 	fru = node2fmri(frudef);
   2125 	pathstr = ipath2str(NULL, sp->ipp);
   2126 
   2127 	/*
   2128 	 *  Allow for platform translations of the FMRIs
   2129 	 */
   2130 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
   2131 	    pathstr);
   2132 
   2133 	FREE(pathstr);
   2134 	rsrcs->suspect = sp;
   2135 	rsrcs->asru = asru;
   2136 	rsrcs->fru = fru;
   2137 	rsrcs->rsrc = rsrc;
   2138 }
   2139 
   2140 /*
   2141  * trim_suspects -- prior to publishing, we may need to remove some
   2142  *    suspects from the list.  If we're auto-closing upsets, we don't
   2143  *    want any of those in the published list.  If the ASRUs for multiple
   2144  *    defects resolve to the same ASRU (driver) we only want to publish
   2145  *    that as a single suspect.
   2146  */
   2147 static int
   2148 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
   2149     fmd_event_t *ffep)
   2150 {
   2151 	struct event *ep;
   2152 	struct rsl *rp = begin;
   2153 	struct rsl *rp2 = begin2;
   2154 	int mess_zero_count = 0;
   2155 	int serd_rval;
   2156 	uint_t messval;
   2157 
   2158 	/* remove any unwanted upsets and populate our array */
   2159 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
   2160 		if (is_upset(ep->t))
   2161 			continue;
   2162 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
   2163 		    NULL, NULL);
   2164 		if (serd_rval == 0)
   2165 			continue;
   2166 		if (node2uint(eventprop_lookup(ep, L_message),
   2167 		    &messval) == 0 && messval == 0) {
   2168 			get_resources(ep, rp2, fmep->config);
   2169 			rp2++;
   2170 			mess_zero_count++;
   2171 		} else {
   2172 			get_resources(ep, rp, fmep->config);
   2173 			rp++;
   2174 			fmep->nsuspects++;
   2175 		}
   2176 	}
   2177 	return (mess_zero_count);
   2178 }
   2179 
   2180 /*
   2181  * addpayloadprop -- add a payload prop to a problem
   2182  */
   2183 static void
   2184 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
   2185 {
   2186 	nvlist_t *rsrc, *hcs;
   2187 
   2188 	ASSERT(fault != NULL);
   2189 	ASSERT(lhs != NULL);
   2190 	ASSERT(rhs != NULL);
   2191 
   2192 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
   2193 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
   2194 
   2195 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
   2196 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
   2197 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
   2198 			out(O_DIE,
   2199 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2200 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
   2201 			out(O_DIE,
   2202 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2203 		nvlist_free(hcs);
   2204 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
   2205 			out(O_DIE,
   2206 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2207 	} else
   2208 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
   2209 
   2210 	if (rhs->t == UINT64) {
   2211 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
   2212 
   2213 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
   2214 			out(O_DIE,
   2215 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2216 	} else {
   2217 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
   2218 		    lhs, (char *)(uintptr_t)rhs->v);
   2219 
   2220 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
   2221 			out(O_DIE,
   2222 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2223 	}
   2224 }
   2225 
   2226 static char *Istatbuf;
   2227 static char *Istatbufptr;
   2228 static int Istatsz;
   2229 
   2230 /*
   2231  * istataddsize -- calculate size of istat and add it to Istatsz
   2232  */
   2233 /*ARGSUSED2*/
   2234 static void
   2235 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
   2236 {
   2237 	int val;
   2238 
   2239 	ASSERT(lhs != NULL);
   2240 	ASSERT(rhs != NULL);
   2241 
   2242 	if ((val = stats_counter_value(rhs)) == 0)
   2243 		return;	/* skip zero-valued stats */
   2244 
   2245 	/* count up the size of the stat name */
   2246 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
   2247 	Istatsz++;	/* for the trailing NULL byte */
   2248 
   2249 	/* count up the size of the stat value */
   2250 	Istatsz += snprintf(NULL, 0, "%d", val);
   2251 	Istatsz++;	/* for the trailing NULL byte */
   2252 }
   2253 
   2254 /*
   2255  * istat2str -- serialize an istat, writing result to *Istatbufptr
   2256  */
   2257 /*ARGSUSED2*/
   2258 static void
   2259 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
   2260 {
   2261 	char *str;
   2262 	int len;
   2263 	int val;
   2264 
   2265 	ASSERT(lhs != NULL);
   2266 	ASSERT(rhs != NULL);
   2267 
   2268 	if ((val = stats_counter_value(rhs)) == 0)
   2269 		return;	/* skip zero-valued stats */
   2270 
   2271 	/* serialize the stat name */
   2272 	str = ipath2str(lhs->ename, lhs->ipath);
   2273 	len = strlen(str);
   2274 
   2275 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
   2276 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
   2277 	Istatbufptr += len;
   2278 	FREE(str);
   2279 	*Istatbufptr++ = '\0';
   2280 
   2281 	/* serialize the stat value */
   2282 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
   2283 	    "%d", val);
   2284 	*Istatbufptr++ = '\0';
   2285 
   2286 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
   2287 }
   2288 
   2289 void
   2290 istat_save()
   2291 {
   2292 	if (Istat_need_save == 0)
   2293 		return;
   2294 
   2295 	/* figure out how big the serialzed info is */
   2296 	Istatsz = 0;
   2297 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
   2298 
   2299 	if (Istatsz == 0) {
   2300 		/* no stats to save */
   2301 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
   2302 		return;
   2303 	}
   2304 
   2305 	/* create the serialized buffer */
   2306 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
   2307 	lut_walk(Istats, (lut_cb)istat2str, NULL);
   2308 
   2309 	/* clear out current saved stats */
   2310 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
   2311 
   2312 	/* write out the new version */
   2313 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
   2314 	FREE(Istatbuf);
   2315 
   2316 	Istat_need_save = 0;
   2317 }
   2318 
   2319 int
   2320 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
   2321 {
   2322 	if (ent1->ename != ent2->ename)
   2323 		return (ent2->ename - ent1->ename);
   2324 	if (ent1->ipath != ent2->ipath)
   2325 		return ((char *)ent2->ipath - (char *)ent1->ipath);
   2326 
   2327 	return (0);
   2328 }
   2329 
   2330 /*
   2331  * istat-verify -- verify the component associated with a stat still exists
   2332  *
   2333  * if the component no longer exists, this routine resets the stat and
   2334  * returns 0.  if the component still exists, it returns 1.
   2335  */
   2336 static int
   2337 istat_verify(struct node *snp, struct istat_entry *entp)
   2338 {
   2339 	struct stats *statp;
   2340 	nvlist_t *fmri;
   2341 
   2342 	fmri = node2fmri(snp->u.event.epname);
   2343 	if (platform_path_exists(fmri)) {
   2344 		nvlist_free(fmri);
   2345 		return (1);
   2346 	}
   2347 	nvlist_free(fmri);
   2348 
   2349 	/* component no longer in system.  zero out the associated stats */
   2350 	if ((statp = (struct stats *)
   2351 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
   2352 	    stats_counter_value(statp) == 0)
   2353 		return (0);	/* stat is already reset */
   2354 
   2355 	Istat_need_save = 1;
   2356 	stats_counter_reset(statp);
   2357 	return (0);
   2358 }
   2359 
   2360 static void
   2361 istat_bump(struct node *snp, int n)
   2362 {
   2363 	struct stats *statp;
   2364 	struct istat_entry ent;
   2365 
   2366 	ASSERT(snp != NULL);
   2367 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
   2368 	ASSERT(snp->u.event.epname != NULL);
   2369 
   2370 	/* class name should be hoisted into a single stable entry */
   2371 	ASSERT(snp->u.event.ename->u.name.next == NULL);
   2372 	ent.ename = snp->u.event.ename->u.name.s;
   2373 	ent.ipath = ipath(snp->u.event.epname);
   2374 
   2375 	if (!istat_verify(snp, &ent)) {
   2376 		/* component no longer exists in system, nothing to do */
   2377 		return;
   2378 	}
   2379 
   2380 	if ((statp = (struct stats *)
   2381 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
   2382 		/* need to create the counter */
   2383 		int cnt = 0;
   2384 		struct node *np;
   2385 		char *sname;
   2386 		char *snamep;
   2387 		struct istat_entry *newentp;
   2388 
   2389 		/* count up the size of the stat name */
   2390 		np = snp->u.event.ename;
   2391 		while (np != NULL) {
   2392 			cnt += strlen(np->u.name.s);
   2393 			cnt++;	/* for the '.' or '@' */
   2394 			np = np->u.name.next;
   2395 		}
   2396 		np = snp->u.event.epname;
   2397 		while (np != NULL) {
   2398 			cnt += snprintf(NULL, 0, "%s%llu",
   2399 			    np->u.name.s, np->u.name.child->u.ull);
   2400 			cnt++;	/* for the '/' or trailing NULL byte */
   2401 			np = np->u.name.next;
   2402 		}
   2403 
   2404 		/* build the stat name */
   2405 		snamep = sname = alloca(cnt);
   2406 		np = snp->u.event.ename;
   2407 		while (np != NULL) {
   2408 			snamep += snprintf(snamep, &sname[cnt] - snamep,
   2409 			    "%s", np->u.name.s);
   2410 			np = np->u.name.next;
   2411 			if (np)
   2412 				*snamep++ = '.';
   2413 		}
   2414 		*snamep++ = '@';
   2415 		np = snp->u.event.epname;
   2416 		while (np != NULL) {
   2417 			snamep += snprintf(snamep, &sname[cnt] - snamep,
   2418 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
   2419 			np = np->u.name.next;
   2420 			if (np)
   2421 				*snamep++ = '/';
   2422 		}
   2423 		*snamep++ = '\0';
   2424 
   2425 		/* create the new stat & add it to our list */
   2426 		newentp = MALLOC(sizeof (*newentp));
   2427 		*newentp = ent;
   2428 		statp = stats_new_counter(NULL, sname, 0);
   2429 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
   2430 		    (lut_cmp)istat_cmp);
   2431 	}
   2432 
   2433 	/* if n is non-zero, set that value instead of bumping */
   2434 	if (n) {
   2435 		stats_counter_reset(statp);
   2436 		stats_counter_add(statp, n);
   2437 	} else
   2438 		stats_counter_bump(statp);
   2439 	Istat_need_save = 1;
   2440 
   2441 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
   2442 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
   2443 	    stats_counter_value(statp));
   2444 }
   2445 
   2446 /*ARGSUSED*/
   2447 static void
   2448 istat_destructor(void *left, void *right, void *arg)
   2449 {
   2450 	struct istat_entry *entp = (struct istat_entry *)left;
   2451 	struct stats *statp = (struct stats *)right;
   2452 	FREE(entp);
   2453 	stats_delete(statp);
   2454 }
   2455 
   2456 /*
   2457  * Callback used in a walk of the Istats to reset matching stat counters.
   2458  */
   2459 static void
   2460 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
   2461     const struct ipath *ipp)
   2462 {
   2463 	char *path;
   2464 
   2465 	if (entp->ipath == ipp) {
   2466 		path = ipath2str(entp->ename, ipp);
   2467 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
   2468 		FREE(path);
   2469 		stats_counter_reset(statp);
   2470 		Istat_need_save = 1;
   2471 	}
   2472 }
   2473 
   2474 /*ARGSUSED*/
   2475 static void
   2476 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
   2477     void *unused)
   2478 {
   2479 	char *path;
   2480 	nvlist_t *fmri;
   2481 
   2482 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
   2483 	if (!platform_path_exists(fmri)) {
   2484 		path = ipath2str(entp->ename, entp->ipath);
   2485 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
   2486 		FREE(path);
   2487 		stats_counter_reset(statp);
   2488 		Istat_need_save = 1;
   2489 	}
   2490 	nvlist_free(fmri);
   2491 }
   2492 
   2493 void
   2494 istat_fini(void)
   2495 {
   2496 	lut_free(Istats, istat_destructor, NULL);
   2497 }
   2498 
   2499 static char *Serdbuf;
   2500 static char *Serdbufptr;
   2501 static int Serdsz;
   2502 
   2503 /*
   2504  * serdaddsize -- calculate size of serd and add it to Serdsz
   2505  */
   2506 /*ARGSUSED*/
   2507 static void
   2508 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
   2509 {
   2510 	ASSERT(lhs != NULL);
   2511 
   2512 	/* count up the size of the stat name */
   2513 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
   2514 	Serdsz++;	/* for the trailing NULL byte */
   2515 }
   2516 
   2517 /*
   2518  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
   2519  */
   2520 /*ARGSUSED*/
   2521 static void
   2522 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
   2523 {
   2524 	char *str;
   2525 	int len;
   2526 
   2527 	ASSERT(lhs != NULL);
   2528 
   2529 	/* serialize the serd engine name */
   2530 	str = ipath2str(lhs->ename, lhs->ipath);
   2531 	len = strlen(str);
   2532 
   2533 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
   2534 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
   2535 	Serdbufptr += len;
   2536 	FREE(str);
   2537 	*Serdbufptr++ = '\0';
   2538 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
   2539 }
   2540 
   2541 void
   2542 serd_save()
   2543 {
   2544 	if (Serd_need_save == 0)
   2545 		return;
   2546 
   2547 	/* figure out how big the serialzed info is */
   2548 	Serdsz = 0;
   2549 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
   2550 
   2551 	if (Serdsz == 0) {
   2552 		/* no serd engines to save */
   2553 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
   2554 		return;
   2555 	}
   2556 
   2557 	/* create the serialized buffer */
   2558 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
   2559 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
   2560 
   2561 	/* clear out current saved stats */
   2562 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
   2563 
   2564 	/* write out the new version */
   2565 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
   2566 	FREE(Serdbuf);
   2567 	Serd_need_save = 0;
   2568 }
   2569 
   2570 int
   2571 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
   2572 {
   2573 	if (ent1->ename != ent2->ename)
   2574 		return (ent2->ename - ent1->ename);
   2575 	if (ent1->ipath != ent2->ipath)
   2576 		return ((char *)ent2->ipath - (char *)ent1->ipath);
   2577 
   2578 	return (0);
   2579 }
   2580 
   2581 void
   2582 fme_serd_load(fmd_hdl_t *hdl)
   2583 {
   2584 	int sz;
   2585 	char *sbuf;
   2586 	char *sepptr;
   2587 	char *ptr;
   2588 	struct serd_entry *newentp;
   2589 	struct node *epname;
   2590 	nvlist_t *fmri;
   2591 	char *namestring;
   2592 
   2593 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
   2594 		return;
   2595 	sbuf = alloca(sz);
   2596 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
   2597 	ptr = sbuf;
   2598 	while (ptr < &sbuf[sz]) {
   2599 		sepptr = strchr(ptr, '@');
   2600 		*sepptr = '\0';
   2601 		namestring = ptr;
   2602 		sepptr++;
   2603 		ptr = sepptr;
   2604 		ptr += strlen(ptr);
   2605 		ptr++;	/* move past the '\0' separating paths */
   2606 		epname = pathstring2epnamenp(sepptr);
   2607 		fmri = node2fmri(epname);
   2608 		if (platform_path_exists(fmri)) {
   2609 			newentp = MALLOC(sizeof (*newentp));
   2610 			newentp->hdl = hdl;
   2611 			newentp->ipath = ipath(epname);
   2612 			newentp->ename = stable(namestring);
   2613 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
   2614 			    (void *)newentp, (lut_cmp)serd_cmp);
   2615 		} else
   2616 			Serd_need_save = 1;
   2617 		tree_free(epname);
   2618 		nvlist_free(fmri);
   2619 	}
   2620 	/* save it back again in case some of the paths no longer exist */
   2621 	serd_save();
   2622 }
   2623 
   2624 /*ARGSUSED*/
   2625 static void
   2626 serd_destructor(void *left, void *right, void *arg)
   2627 {
   2628 	struct serd_entry *entp = (struct serd_entry *)left;
   2629 	FREE(entp);
   2630 }
   2631 
   2632 /*
   2633  * Callback used in a walk of the SerdEngines to reset matching serd engines.
   2634  */
   2635 /*ARGSUSED*/
   2636 static void
   2637 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
   2638 {
   2639 	char *path;
   2640 
   2641 	if (entp->ipath == ipp) {
   2642 		path = ipath2str(entp->ename, ipp);
   2643 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
   2644 		fmd_serd_reset(entp->hdl, path);
   2645 		FREE(path);
   2646 		Serd_need_save = 1;
   2647 	}
   2648 }
   2649 
   2650 /*ARGSUSED*/
   2651 static void
   2652 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
   2653 {
   2654 	char *path;
   2655 	nvlist_t *fmri;
   2656 
   2657 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
   2658 	if (!platform_path_exists(fmri)) {
   2659 		path = ipath2str(entp->ename, entp->ipath);
   2660 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
   2661 		fmd_serd_reset(entp->hdl, path);
   2662 		FREE(path);
   2663 		Serd_need_save = 1;
   2664 	}
   2665 	nvlist_free(fmri);
   2666 }
   2667 
   2668 void
   2669 serd_fini(void)
   2670 {
   2671 	lut_free(SerdEngines, serd_destructor, NULL);
   2672 }
   2673 
   2674 static void
   2675 publish_suspects(struct fme *fmep, struct rsl *srl)
   2676 {
   2677 	struct rsl *rp;
   2678 	nvlist_t *fault;
   2679 	uint8_t cert;
   2680 	uint_t *frs;
   2681 	uint_t frsum, fr;
   2682 	uint_t messval;
   2683 	uint_t retireval;
   2684 	uint_t responseval;
   2685 	struct node *snp;
   2686 	int frcnt, fridx;
   2687 	boolean_t allfaulty = B_TRUE;
   2688 	struct rsl *erl = srl + fmep->nsuspects - 1;
   2689 
   2690 	/*
   2691 	 * sort the array
   2692 	 */
   2693 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
   2694 
   2695 	/* sum the fitrates */
   2696 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
   2697 	fridx = frcnt = frsum = 0;
   2698 
   2699 	for (rp = srl; rp <= erl; rp++) {
   2700 		struct node *n;
   2701 
   2702 		n = eventprop_lookup(rp->suspect, L_FITrate);
   2703 		if (node2uint(n, &fr) != 0) {
   2704 			out(O_DEBUG|O_NONL, "event ");
   2705 			ipath_print(O_DEBUG|O_NONL,
   2706 			    rp->suspect->enode->u.event.ename->u.name.s,
   2707 			    rp->suspect->ipp);
   2708 			out(O_DEBUG, " has no FITrate (using 1)");
   2709 			fr = 1;
   2710 		} else if (fr == 0) {
   2711 			out(O_DEBUG|O_NONL, "event ");
   2712 			ipath_print(O_DEBUG|O_NONL,
   2713 			    rp->suspect->enode->u.event.ename->u.name.s,
   2714 			    rp->suspect->ipp);
   2715 			out(O_DEBUG, " has zero FITrate (using 1)");
   2716 			fr = 1;
   2717 		}
   2718 
   2719 		frs[fridx++] = fr;
   2720 		frsum += fr;
   2721 		frcnt++;
   2722 	}
   2723 
   2724 	/* Add them in reverse order of our sort, as fmd reverses order */
   2725 	for (rp = erl; rp >= srl; rp--) {
   2726 		cert = percentof(frs[--fridx], frsum);
   2727 		fault = fmd_nvl_create_fault(fmep->hdl,
   2728 		    rp->suspect->enode->u.event.ename->u.name.s,
   2729 		    cert,
   2730 		    rp->asru,
   2731 		    rp->fru,
   2732 		    rp->rsrc);
   2733 		if (fault == NULL)
   2734 			out(O_DIE, "fault creation failed");
   2735 		/* if "message" property exists, add it to the fault */
   2736 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
   2737 		    &messval) == 0) {
   2738 
   2739 			out(O_ALTFP,
   2740 			    "[FME%d, %s adds message=%d to suspect list]",
   2741 			    fmep->id,
   2742 			    rp->suspect->enode->u.event.ename->u.name.s,
   2743 			    messval);
   2744 			if (nvlist_add_boolean_value(fault,
   2745 			    FM_SUSPECT_MESSAGE,
   2746 			    (messval) ? B_TRUE : B_FALSE) != 0) {
   2747 				out(O_DIE, "cannot add no-message to fault");
   2748 			}
   2749 		}
   2750 
   2751 		/* if "retire" property exists, add it to the fault */
   2752 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
   2753 		    &retireval) == 0) {
   2754 
   2755 			out(O_ALTFP,
   2756 			    "[FME%d, %s adds retire=%d to suspect list]",
   2757 			    fmep->id,
   2758 			    rp->suspect->enode->u.event.ename->u.name.s,
   2759 			    retireval);
   2760 			if (nvlist_add_boolean_value(fault,
   2761 			    FM_SUSPECT_RETIRE,
   2762 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
   2763 				out(O_DIE, "cannot add no-retire to fault");
   2764 			}
   2765 		}
   2766 
   2767 		/* if "response" property exists, add it to the fault */
   2768 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
   2769 		    &responseval) == 0) {
   2770 
   2771 			out(O_ALTFP,
   2772 			    "[FME%d, %s adds response=%d to suspect list]",
   2773 			    fmep->id,
   2774 			    rp->suspect->enode->u.event.ename->u.name.s,
   2775 			    responseval);
   2776 			if (nvlist_add_boolean_value(fault,
   2777 			    FM_SUSPECT_RESPONSE,
   2778 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
   2779 				out(O_DIE, "cannot add no-response to fault");
   2780 			}
   2781 		}
   2782 
   2783 		/* add any payload properties */
   2784 		lut_walk(rp->suspect->payloadprops,
   2785 		    (lut_cb)addpayloadprop, (void *)fault);
   2786 		rslfree(rp);
   2787 
   2788 		/*
   2789 		 * If "action" property exists, evaluate it;  this must be done
   2790 		 * before the allfaulty check below since some actions may
   2791 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
   2792 		 * needs to be restructured if any new actions are introduced
   2793 		 * that have effects that we do not want to be visible if
   2794 		 * we decide not to publish in the dupclose check below.
   2795 		 */
   2796 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
   2797 			struct evalue evalue;
   2798 
   2799 			out(O_ALTFP|O_NONL,
   2800 			    "[FME%d, %s action ", fmep->id,
   2801 			    rp->suspect->enode->u.event.ename->u.name.s);
   2802 			ptree_name_iter(O_ALTFP|O_NONL, snp);
   2803 			out(O_ALTFP, "]");
   2804 			Action_nvl = fault;
   2805 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
   2806 			    NULL, 0, &evalue);
   2807 		}
   2808 
   2809 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
   2810 
   2811 		/*
   2812 		 * check if the asru is already marked as "faulty".
   2813 		 */
   2814 		if (allfaulty) {
   2815 			nvlist_t *asru;
   2816 
   2817 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
   2818 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
   2819 			out(O_ALTFP|O_VERB|O_NONL, " ");
   2820 			if (nvlist_lookup_nvlist(fault,
   2821 			    FM_FAULT_ASRU, &asru) != 0) {
   2822 				out(O_ALTFP|O_VERB, "NULL asru");
   2823 				allfaulty = B_FALSE;
   2824 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
   2825 			    FMD_HAS_FAULT_ASRU, NULL)) {
   2826 				out(O_ALTFP|O_VERB, "faulty");
   2827 			} else {
   2828 				out(O_ALTFP|O_VERB, "not faulty");
   2829 				allfaulty = B_FALSE;
   2830 			}
   2831 		}
   2832 
   2833 	}
   2834 
   2835 	if (!allfaulty) {
   2836 		/*
   2837 		 * don't update the count stat if all asrus are already
   2838 		 * present and unrepaired in the asru cache
   2839 		 */
   2840 		for (rp = erl; rp >= srl; rp--) {
   2841 			struct event *suspect = rp->suspect;
   2842 
   2843 			if (suspect == NULL)
   2844 				continue;
   2845 
   2846 			/* if "count" exists, increment the appropriate stat */
   2847 			if ((snp = eventprop_lookup(suspect,
   2848 			    L_count)) != NULL) {
   2849 				out(O_ALTFP|O_NONL,
   2850 				    "[FME%d, %s count ", fmep->id,
   2851 				    suspect->enode->u.event.ename->u.name.s);
   2852 				ptree_name_iter(O_ALTFP|O_NONL, snp);
   2853 				out(O_ALTFP, "]");
   2854 				istat_bump(snp, 0);
   2855 
   2856 			}
   2857 		}
   2858 		istat_save();	/* write out any istat changes */
   2859 	}
   2860 }
   2861 
   2862 static const char *
   2863 undiag_2defect_str(int ud)
   2864 {
   2865 	switch (ud) {
   2866 	case UD_VAL_MISSINGINFO:
   2867 	case UD_VAL_MISSINGOBS:
   2868 	case UD_VAL_MISSINGPATH:
   2869 	case UD_VAL_MISSINGZERO:
   2870 	case UD_VAL_BADOBS:
   2871 	case UD_VAL_CFGMISMATCH:
   2872 		return (UNDIAG_DEFECT_CHKPT);
   2873 		break;
   2874 
   2875 	case UD_VAL_BADEVENTI:
   2876 	case UD_VAL_INSTFAIL:
   2877 	case UD_VAL_NOPATH:
   2878 	case UD_VAL_UNSOLVD:
   2879 		return (UNDIAG_DEFECT_FME);
   2880 		break;
   2881 
   2882 	case UD_VAL_MAXFME:
   2883 		return (UNDIAG_DEFECT_LIMIT);
   2884 		break;
   2885 
   2886 	case UD_VAL_UNKNOWN:
   2887 	default:
   2888 		return (UNDIAG_DEFECT_UNKNOWN);
   2889 		break;
   2890 	}
   2891 }
   2892 
   2893 const char *
   2894 undiag_2reason_str(int ud)
   2895 {
   2896 	switch (ud) {
   2897 	case UD_VAL_BADEVENTI:
   2898 		return (UD_STR_BADEVENTI);
   2899 	case UD_VAL_BADOBS:
   2900 		return (UD_STR_BADOBS);
   2901 	case UD_VAL_CFGMISMATCH:
   2902 		return (UD_STR_CFGMISMATCH);
   2903 	case UD_VAL_INSTFAIL:
   2904 		return (UD_STR_INSTFAIL);
   2905 	case UD_VAL_MAXFME:
   2906 		return (UD_STR_MAXFME);
   2907 	case UD_VAL_MISSINGINFO:
   2908 		return (UD_STR_MISSINGINFO);
   2909 	case UD_VAL_MISSINGOBS:
   2910 		return (UD_STR_MISSINGOBS);
   2911 	case UD_VAL_MISSINGPATH:
   2912 		return (UD_STR_MISSINGPATH);
   2913 	case UD_VAL_MISSINGZERO:
   2914 		return (UD_STR_MISSINGZERO);
   2915 	case UD_VAL_NOPATH:
   2916 		return (UD_STR_NOPATH);
   2917 	case UD_VAL_UNSOLVD:
   2918 		return (UD_STR_UNSOLVD);
   2919 	case UD_VAL_UNKNOWN:
   2920 	default:
   2921 		return (UD_STR_UNKNOWN);
   2922 	}
   2923 }
   2924 
   2925 static void
   2926 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase)
   2927 {
   2928 	struct case_list *newcase;
   2929 	nvlist_t *defect;
   2930 
   2931 	out(O_ALTFP,
   2932 	    "[undiagnosable ereport received, "
   2933 	    "creating and closing a new case (%s)]",
   2934 	    undiag_2reason_str(Undiag_reason));
   2935 
   2936 	newcase = MALLOC(sizeof (struct case_list));
   2937 	newcase->next = NULL;
   2938 	newcase->fmcase = fmcase;
   2939 	if (Undiagablecaselist != NULL)
   2940 		newcase->next = Undiagablecaselist;
   2941 	Undiagablecaselist = newcase;
   2942 
   2943 	if (ffep != NULL)
   2944 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
   2945 
   2946 	defect = fmd_nvl_create_fault(hdl,
   2947 	    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
   2948 	(void) nvlist_add_string(defect, UNDIAG_REASON,
   2949 	    undiag_2reason_str(Undiag_reason));
   2950 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
   2951 
   2952 	fmd_case_solve(hdl, newcase->fmcase);
   2953 	fmd_case_close(hdl, newcase->fmcase);
   2954 	Undiag_reason = UD_VAL_UNKNOWN;
   2955 }
   2956 
   2957 static void
   2958 fme_undiagnosable(struct fme *f)
   2959 {
   2960 	nvlist_t *defect;
   2961 
   2962 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
   2963 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
   2964 	    undiag_2reason_str(Undiag_reason));
   2965 
   2966 	defect = fmd_nvl_create_fault(f->hdl,
   2967 	    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
   2968 	(void) nvlist_add_string(defect, UNDIAG_REASON,
   2969 	    undiag_2reason_str(Undiag_reason));
   2970 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
   2971 	fmd_case_solve(f->hdl, f->fmcase);
   2972 	fmd_case_close(f->hdl, f->fmcase);
   2973 	Undiag_reason = UD_VAL_UNKNOWN;
   2974 }
   2975 
   2976 /*
   2977  * fme_close_case
   2978  *
   2979  *	Find the requested case amongst our fmes and close it.  Free up
   2980  *	the related fme.
   2981  */
   2982 void
   2983 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
   2984 {
   2985 	struct case_list *ucasep, *prevcasep = NULL;
   2986 	struct fme *prev = NULL;
   2987 	struct fme *fmep;
   2988 
   2989 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
   2990 		if (fmcase != ucasep->fmcase) {
   2991 			prevcasep = ucasep;
   2992 			continue;
   2993 		}
   2994 
   2995 		if (prevcasep == NULL)
   2996 			Undiagablecaselist = Undiagablecaselist->next;
   2997 		else
   2998 			prevcasep->next = ucasep->next;
   2999 
   3000 		FREE(ucasep);
   3001 		return;
   3002 	}
   3003 
   3004 	for (fmep = FMElist; fmep; fmep = fmep->next) {
   3005 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
   3006 			break;
   3007 		prev = fmep;
   3008 	}
   3009 
   3010 	if (fmep == NULL) {
   3011 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
   3012 		    fmd_case_uuid(hdl, fmcase));
   3013 		return;
   3014 	}
   3015 
   3016 	if (EFMElist == fmep)
   3017 		EFMElist = prev;
   3018 
   3019 	if (prev == NULL)
   3020 		FMElist = FMElist->next;
   3021 	else
   3022 		prev->next = fmep->next;
   3023 
   3024 	fmep->next = NULL;
   3025 
   3026 	/* Get rid of any timer this fme has set */
   3027 	if (fmep->wull != 0)
   3028 		fmd_timer_remove(fmep->hdl, fmep->timer);
   3029 
   3030 	if (ClosedFMEs == NULL) {
   3031 		ClosedFMEs = fmep;
   3032 	} else {
   3033 		fmep->next = ClosedFMEs;
   3034 		ClosedFMEs = fmep;
   3035 	}
   3036 
   3037 	Open_fme_count--;
   3038 
   3039 	/* See if we can close the overflow FME */
   3040 	if (Open_fme_count <= Max_fme) {
   3041 		for (fmep = FMElist; fmep; fmep = fmep->next) {
   3042 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
   3043 			    fmep->fmcase)))
   3044 				break;
   3045 		}
   3046 
   3047 		if (fmep != NULL)
   3048 			fmd_case_close(fmep->hdl, fmep->fmcase);
   3049 	}
   3050 }
   3051 
   3052 /*
   3053  * fme_set_timer()
   3054  *	If the time we need to wait for the given FME is less than the
   3055  *	current timer, kick that old timer out and establish a new one.
   3056  */
   3057 static int
   3058 fme_set_timer(struct fme *fmep, unsigned long long wull)
   3059 {
   3060 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
   3061 	ptree_timeval(O_ALTFP|O_VERB, &wull);
   3062 
   3063 	if (wull <= fmep->pull) {
   3064 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
   3065 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
   3066 		out(O_ALTFP|O_VERB, NULL);
   3067 		/* we've waited at least wull already, don't need timer */
   3068 		return (0);
   3069 	}
   3070 
   3071 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
   3072 	if (fmep->wull != 0) {
   3073 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
   3074 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
   3075 		out(O_ALTFP|O_VERB, NULL);
   3076 	} else {
   3077 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
   3078 		out(O_ALTFP|O_VERB, NULL);
   3079 	}
   3080 
   3081 	if (fmep->wull != 0)
   3082 		if (wull >= fmep->wull)
   3083 			/* New timer would fire later than established timer */
   3084 			return (0);
   3085 
   3086 	if (fmep->wull != 0) {
   3087 		fmd_timer_remove(fmep->hdl, fmep->timer);
   3088 	}
   3089 
   3090 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
   3091 	    fmep->e0r, wull);
   3092 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
   3093 	fmep->wull = wull;
   3094 	return (1);
   3095 }
   3096 
   3097 void
   3098 fme_timer_fired(struct fme *fmep, id_t tid)
   3099 {
   3100 	struct fme *ffmep = NULL;
   3101 
   3102 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
   3103 		if (ffmep == fmep)
   3104 			break;
   3105 
   3106 	if (ffmep == NULL) {
   3107 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
   3108 		    (void *)fmep);
   3109 		return;
   3110 	}
   3111 
   3112 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
   3113 	fmep->pull = fmep->wull;
   3114 	fmep->wull = 0;
   3115 	fmd_buf_write(fmep->hdl, fmep->fmcase,
   3116 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
   3117 
   3118 	fme_eval(fmep, fmep->e0r);
   3119 }
   3120 
   3121 /*
   3122  * Preserve the fme's suspect list in its psuspects list, NULLing the
   3123  * suspects list in the meantime.
   3124  */
   3125 static void
   3126 save_suspects(struct fme *fmep)
   3127 {
   3128 	struct event *ep;
   3129 	struct event *nextep;
   3130 
   3131 	/* zero out the previous suspect list */
   3132 	for (ep = fmep->psuspects; ep; ep = nextep) {
   3133 		nextep = ep->psuspects;
   3134 		ep->psuspects = NULL;
   3135 	}
   3136 	fmep->psuspects = NULL;
   3137 
   3138 	/* zero out the suspect list, copying it to previous suspect list */
   3139 	fmep->psuspects = fmep->suspects;
   3140 	for (ep = fmep->suspects; ep; ep = nextep) {
   3141 		nextep = ep->suspects;
   3142 		ep->psuspects = ep->suspects;
   3143 		ep->suspects = NULL;
   3144 		ep->is_suspect = 0;
   3145 	}
   3146 	fmep->suspects = NULL;
   3147 	fmep->nsuspects = 0;
   3148 }
   3149 
   3150 /*
   3151  * Retrieve the fme's suspect list from its psuspects list.
   3152  */
   3153 static void
   3154 restore_suspects(struct fme *fmep)
   3155 {
   3156 	struct event *ep;
   3157 	struct event *nextep;
   3158 
   3159 	fmep->nsuspects = 0;
   3160 	fmep->suspects = fmep->psuspects;
   3161 	for (ep = fmep->psuspects; ep; ep = nextep) {
   3162 		fmep->nsuspects++;
   3163 		nextep = ep->psuspects;
   3164 		ep->suspects = ep->psuspects;
   3165 	}
   3166 }
   3167 
   3168 /*
   3169  * this is what we use to call the Emrys prototype code instead of main()
   3170  */
   3171 static void
   3172 fme_eval(struct fme *fmep, fmd_event_t *ffep)
   3173 {
   3174 	struct event *ep;
   3175 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
   3176 	struct rsl *srl = NULL;
   3177 	struct rsl *srl2 = NULL;
   3178 	int mess_zero_count;
   3179 	int rpcnt;
   3180 
   3181 	save_suspects(fmep);
   3182 
   3183 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
   3184 	indent_set("  ");
   3185 
   3186 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
   3187 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
   3188 
   3189 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
   3190 	    fme_state2str(fmep->state));
   3191 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
   3192 		out(O_ALTFP|O_NONL, " ");
   3193 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
   3194 	}
   3195 	out(O_ALTFP, NULL);
   3196 
   3197 	switch (fmep->state) {
   3198 	case FME_CREDIBLE:
   3199 		print_suspects(SLNEW, fmep);
   3200 		(void) upsets_eval(fmep, ffep);
   3201 
   3202 		/*
   3203 		 * we may have already posted suspects in upsets_eval() which
   3204 		 * can recurse into fme_eval() again. If so then just return.
   3205 		 */
   3206 		if (fmep->posted_suspects)
   3207 			return;
   3208 
   3209 		stats_counter_bump(fmep->diags);
   3210 		rpcnt = fmep->nsuspects;
   3211 		save_suspects(fmep);
   3212 
   3213 		/*
   3214 		 * create two lists, one for "message=1" faults and one for
   3215 		 * "message=0" faults. If we have a mixture we will generate
   3216 		 * two separate suspect lists.
   3217 		 */
   3218 		srl = MALLOC(rpcnt * sizeof (struct rsl));
   3219 		bzero(srl, rpcnt * sizeof (struct rsl));
   3220 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
   3221 		bzero(srl2, rpcnt * sizeof (struct rsl));
   3222 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
   3223 
   3224 		/*
   3225 		 * If the resulting suspect list has no members, we're
   3226 		 * done so simply close the case. Otherwise sort and publish.
   3227 		 */
   3228 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
   3229 			out(O_ALTFP,
   3230 			    "[FME%d, case %s (all suspects are upsets)]",
   3231 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3232 			fmd_case_close(fmep->hdl, fmep->fmcase);
   3233 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
   3234 			publish_suspects(fmep, srl);
   3235 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
   3236 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3237 			fmd_case_solve(fmep->hdl, fmep->fmcase);
   3238 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
   3239 			fmep->nsuspects = mess_zero_count;
   3240 			publish_suspects(fmep, srl2);
   3241 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
   3242 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3243 			fmd_case_solve(fmep->hdl, fmep->fmcase);
   3244 		} else {
   3245 			struct event *obsp;
   3246 			struct fme *nfmep;
   3247 
   3248 			publish_suspects(fmep, srl);
   3249 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
   3250 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3251 			fmd_case_solve(fmep->hdl, fmep->fmcase);
   3252 
   3253 			/*
   3254 			 * Got both message=0 and message=1 so create a
   3255 			 * duplicate case. Also need a temporary duplicate fme
   3256 			 * structure for use by publish_suspects().
   3257 			 */
   3258 			nfmep = alloc_fme();
   3259 			nfmep->id =  Nextid++;
   3260 			nfmep->hdl = fmep->hdl;
   3261 			nfmep->nsuspects = mess_zero_count;
   3262 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
   3263 			out(O_ALTFP|O_STAMP,
   3264 			    "[creating parallel FME%d, case %s]", nfmep->id,
   3265 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
   3266 			Open_fme_count++;
   3267 			if (ffep) {
   3268 				fmd_case_setprincipal(nfmep->hdl,
   3269 				    nfmep->fmcase, ffep);
   3270 				fmd_case_add_ereport(nfmep->hdl,
   3271 				    nfmep->fmcase, ffep);
   3272 			}
   3273 			for (obsp = fmep->observations; obsp;
   3274 			    obsp = obsp->observations)
   3275 				if (obsp->ffep && obsp->ffep != ffep)
   3276 					fmd_case_add_ereport(nfmep->hdl,
   3277 					    nfmep->fmcase, obsp->ffep);
   3278 
   3279 			publish_suspects(nfmep, srl2);
   3280 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
   3281 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
   3282 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
   3283 			FREE(nfmep);
   3284 		}
   3285 		FREE(srl);
   3286 		FREE(srl2);
   3287 		restore_suspects(fmep);
   3288 
   3289 		fmep->posted_suspects = 1;
   3290 		fmd_buf_write(fmep->hdl, fmep->fmcase,
   3291 		    WOBUF_POSTD,
   3292 		    (void *)&fmep->posted_suspects,
   3293 		    sizeof (fmep->posted_suspects));
   3294 
   3295 		/*
   3296 		 * Now the suspects have been posted, we can clear up
   3297 		 * the instance tree as we won't be looking at it again.
   3298 		 * Also cancel the timer as the case is now solved.
   3299 		 */
   3300 		if (fmep->wull != 0) {
   3301 			fmd_timer_remove(fmep->hdl, fmep->timer);
   3302 			fmep->wull = 0;
   3303 		}
   3304 		break;
   3305 
   3306 	case FME_WAIT:
   3307 		ASSERT(my_delay > fmep->ull);
   3308 		(void) fme_set_timer(fmep, my_delay);
   3309 		print_suspects(SLWAIT, fmep);
   3310 		itree_prune(fmep->eventtree);
   3311 		return;
   3312 
   3313 	case FME_DISPROVED:
   3314 		print_suspects(SLDISPROVED, fmep);
   3315 		Undiag_reason = UD_VAL_UNSOLVD;
   3316 		fme_undiagnosable(fmep);
   3317 		break;
   3318 	}
   3319 
   3320 	itree_free(fmep->eventtree);
   3321 	fmep->eventtree = NULL;
   3322 	structconfig_free(fmep->config);
   3323 	fmep->config = NULL;
   3324 	destroy_fme_bufs(fmep);
   3325 }
   3326 
   3327 static void indent(void);
   3328 static int triggered(struct fme *fmep, struct event *ep, int mark);
   3329 static enum fme_state effects_test(struct fme *fmep,
   3330     struct event *fault_event, unsigned long long at_latest_by,
   3331     unsigned long long *pdelay);
   3332 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
   3333     unsigned long long at_latest_by, unsigned long long *pdelay);
   3334 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
   3335     unsigned long long at_latest_by, unsigned long long *pdelay);
   3336 
   3337 static int
   3338 checkconstraints(struct fme *fmep, struct arrow *arrowp)
   3339 {
   3340 	struct constraintlist *ctp;
   3341 	struct evalue value;
   3342 	char *sep = "";
   3343 
   3344 	if (arrowp->forever_false) {
   3345 		indent();
   3346 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
   3347 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3348 			out(O_ALTFP|O_VERB|O_NONL, sep);
   3349 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3350 			sep = ", ";
   3351 		}
   3352 		out(O_ALTFP|O_VERB, NULL);
   3353 		return (0);
   3354 	}
   3355 	if (arrowp->forever_true) {
   3356 		indent();
   3357 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
   3358 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3359 			out(O_ALTFP|O_VERB|O_NONL, sep);
   3360 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3361 			sep = ", ";
   3362 		}
   3363 		out(O_ALTFP|O_VERB, NULL);
   3364 		return (1);
   3365 	}
   3366 
   3367 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3368 		if (eval_expr(ctp->cnode, NULL, NULL,
   3369 		    &fmep->globals, fmep->config,
   3370 		    arrowp, 0, &value)) {
   3371 			/* evaluation successful */
   3372 			if (value.t == UNDEFINED || value.v == 0) {
   3373 				/* known false */
   3374 				arrowp->forever_false = 1;
   3375 				indent();
   3376 				out(O_ALTFP|O_VERB|O_NONL,
   3377 				    "  False constraint: ");
   3378 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3379 				out(O_ALTFP|O_VERB, NULL);
   3380 				return (0);
   3381 			}
   3382 		} else {
   3383 			/* evaluation unsuccessful -- unknown value */
   3384 			indent();
   3385 			out(O_ALTFP|O_VERB|O_NONL,
   3386 			    "  Deferred constraint: ");
   3387 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3388 			out(O_ALTFP|O_VERB, NULL);
   3389 			return (1);
   3390 		}
   3391 	}
   3392 	/* known true */
   3393 	arrowp->forever_true = 1;
   3394 	indent();
   3395 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
   3396 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3397 		out(O_ALTFP|O_VERB|O_NONL, sep);
   3398 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3399 		sep = ", ";
   3400 	}
   3401 	out(O_ALTFP|O_VERB, NULL);
   3402 	return (1);
   3403 }
   3404 
   3405 static int
   3406 triggered(struct fme *fmep, struct event *ep, int mark)
   3407 {
   3408 	struct bubble *bp;
   3409 	struct arrowlist *ap;
   3410 	int count = 0;
   3411 
   3412 	stats_counter_bump(fmep->Tcallcount);
   3413 	for (bp = itree_next_bubble(ep, NULL); bp;
   3414 	    bp = itree_next_bubble(ep, bp)) {
   3415 		if (bp->t != B_TO)
   3416 			continue;
   3417 		for (ap = itree_next_arrow(bp, NULL); ap;
   3418 		    ap = itree_next_arrow(bp, ap)) {
   3419 			/* check count of marks against K in the bubble */
   3420 			if ((ap->arrowp->mark & mark) &&
   3421 			    ++count >= bp->nork)
   3422 				return (1);
   3423 		}
   3424 	}
   3425 	return (0);
   3426 }
   3427 
   3428 static int
   3429 mark_arrows(struct fme *fmep, struct event *ep, int mark,
   3430     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
   3431 {
   3432 	struct bubble *bp;
   3433 	struct arrowlist *ap;
   3434 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3435 	unsigned long long my_delay;
   3436 	enum fme_state result;
   3437 	int retval = 0;
   3438 
   3439 	for (bp = itree_next_bubble(ep, NULL); bp;
   3440 	    bp = itree_next_bubble(ep, bp)) {
   3441 		if (bp->t != B_FROM)
   3442 			continue;
   3443 		stats_counter_bump(fmep->Marrowcount);
   3444 		for (ap = itree_next_arrow(bp, NULL); ap;
   3445 		    ap = itree_next_arrow(bp, ap)) {
   3446 			struct event *ep2 = ap->arrowp->head->myevent;
   3447 			/*
   3448 			 * if we're clearing marks, we can avoid doing
   3449 			 * all that work evaluating constraints.
   3450 			 */
   3451 			if (mark == 0) {
   3452 				if (ap->arrowp->arrow_marked == 0)
   3453 					continue;
   3454 				ap->arrowp->arrow_marked = 0;
   3455 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
   3456 				if (keep && (ep2->cached_state &
   3457 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
   3458 					ep2->keep_in_tree = 1;
   3459 				ep2->cached_state &=
   3460 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
   3461 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
   3462 				    keep);
   3463 				continue;
   3464 			}
   3465 			ap->arrowp->arrow_marked = 1;
   3466 			if (ep2->cached_state & REQMNTS_DISPROVED) {
   3467 				indent();
   3468 				out(O_ALTFP|O_VERB|O_NONL,
   3469 				    "  ALREADY DISPROVED ");
   3470 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3471 				out(O_ALTFP|O_VERB, NULL);
   3472 				continue;
   3473 			}
   3474 			if (ep2->cached_state & WAIT_EFFECT) {
   3475 				indent();
   3476 				out(O_ALTFP|O_VERB|O_NONL,
   3477 				    "  ALREADY EFFECTS WAIT ");
   3478 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3479 				out(O_ALTFP|O_VERB, NULL);
   3480 				continue;
   3481 			}
   3482 			if (ep2->cached_state & CREDIBLE_EFFECT) {
   3483 				indent();
   3484 				out(O_ALTFP|O_VERB|O_NONL,
   3485 				    "  ALREADY EFFECTS CREDIBLE ");
   3486 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3487 				out(O_ALTFP|O_VERB, NULL);
   3488 				continue;
   3489 			}
   3490 			if ((ep2->cached_state & PARENT_WAIT) &&
   3491 			    (mark & PARENT_WAIT)) {
   3492 				indent();
   3493 				out(O_ALTFP|O_VERB|O_NONL,
   3494 				    "  ALREADY PARENT EFFECTS WAIT ");
   3495 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3496 				out(O_ALTFP|O_VERB, NULL);
   3497 				continue;
   3498 			}
   3499 			platform_set_payloadnvp(ep2->nvp);
   3500 			if (checkconstraints(fmep, ap->arrowp) == 0) {
   3501 				platform_set_payloadnvp(NULL);
   3502 				indent();
   3503 				out(O_ALTFP|O_VERB|O_NONL,
   3504 				    "  CONSTRAINTS FAIL ");
   3505 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3506 				out(O_ALTFP|O_VERB, NULL);
   3507 				continue;
   3508 			}
   3509 			platform_set_payloadnvp(NULL);
   3510 			ap->arrowp->mark |= EFFECTS_COUNTER;
   3511 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
   3512 				indent();
   3513 				out(O_ALTFP|O_VERB|O_NONL,
   3514 				    "  K-COUNT NOT YET MET ");
   3515 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3516 				out(O_ALTFP|O_VERB, NULL);
   3517 				continue;
   3518 			}
   3519 			ep2->cached_state &= ~PARENT_WAIT;
   3520 			/*
   3521 			 * if we've reached an ereport and no propagation time
   3522 			 * is specified, use the Hesitate value
   3523 			 */
   3524 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
   3525 			    ap->arrowp->maxdelay == 0ULL) {
   3526 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
   3527 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3528 				out(O_ALTFP|O_VERB, NULL);
   3529 				result = requirements_test(fmep, ep2, Hesitate,
   3530 				    &my_delay);
   3531 			} else {
   3532 				result = requirements_test(fmep, ep2,
   3533 				    at_latest_by + ap->arrowp->maxdelay,
   3534 				    &my_delay);
   3535 			}
   3536 			if (result == FME_WAIT) {
   3537 				retval = WAIT_EFFECT;
   3538 				if (overall_delay > my_delay)
   3539 					overall_delay = my_delay;
   3540 				ep2->cached_state |= WAIT_EFFECT;
   3541 				indent();
   3542 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
   3543 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3544 				out(O_ALTFP|O_VERB, NULL);
   3545 				indent_push("  E");
   3546 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
   3547 				    at_latest_by, &my_delay, 0) ==
   3548 				    WAIT_EFFECT) {
   3549 					retval = WAIT_EFFECT;
   3550 					if (overall_delay > my_delay)
   3551 						overall_delay = my_delay;
   3552 				}
   3553 				indent_pop();
   3554 			} else if (result == FME_DISPROVED) {
   3555 				indent();
   3556 				out(O_ALTFP|O_VERB|O_NONL,
   3557 				    "  EFFECTS DISPROVED ");
   3558 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3559 				out(O_ALTFP|O_VERB, NULL);
   3560 			} else {
   3561 				ep2->cached_state |= mark;
   3562 				indent();
   3563 				if (mark == CREDIBLE_EFFECT)
   3564 					out(O_ALTFP|O_VERB|O_NONL,
   3565 					    "  EFFECTS CREDIBLE ");
   3566 				else
   3567 					out(O_ALTFP|O_VERB|O_NONL,
   3568 					    "  PARENT EFFECTS WAIT ");
   3569 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3570 				out(O_ALTFP|O_VERB, NULL);
   3571 				indent_push("  E");
   3572 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
   3573 				    &my_delay, 0) == WAIT_EFFECT) {
   3574 					retval = WAIT_EFFECT;
   3575 					if (overall_delay > my_delay)
   3576 						overall_delay = my_delay;
   3577 				}
   3578 				indent_pop();
   3579 			}
   3580 		}
   3581 	}
   3582 	if (retval == WAIT_EFFECT)
   3583 		*pdelay = overall_delay;
   3584 	return (retval);
   3585 }
   3586 
   3587 static enum fme_state
   3588 effects_test(struct fme *fmep, struct event *fault_event,
   3589     unsigned long long at_latest_by, unsigned long long *pdelay)
   3590 {
   3591 	struct event *error_event;
   3592 	enum fme_state return_value = FME_CREDIBLE;
   3593 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3594 	unsigned long long my_delay;
   3595 
   3596 	stats_counter_bump(fmep->Ecallcount);
   3597 	indent_push("  E");
   3598 	indent();
   3599 	out(O_ALTFP|O_VERB|O_NONL, "->");
   3600 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
   3601 	out(O_ALTFP|O_VERB, NULL);
   3602 
   3603 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
   3604 	    &my_delay, 0) == WAIT_EFFECT) {
   3605 		return_value = FME_WAIT;
   3606 		if (overall_delay > my_delay)
   3607 			overall_delay = my_delay;
   3608 	}
   3609 	for (error_event = fmep->observations;
   3610 	    error_event; error_event = error_event->observations) {
   3611 		indent();
   3612 		out(O_ALTFP|O_VERB|O_NONL, " ");
   3613 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
   3614 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
   3615 			if (error_event->cached_state &
   3616 			    (PARENT_WAIT|WAIT_EFFECT)) {
   3617 				out(O_ALTFP|O_VERB, " NOT YET triggered");
   3618 				continue;
   3619 			}
   3620 			return_value = FME_DISPROVED;
   3621 			out(O_ALTFP|O_VERB, " NOT triggered");
   3622 			break;
   3623 		} else {
   3624 			out(O_ALTFP|O_VERB, " triggered");
   3625 		}
   3626 	}
   3627 	if (return_value == FME_DISPROVED) {
   3628 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
   3629 	} else {
   3630 		fault_event->keep_in_tree = 1;
   3631 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
   3632 	}
   3633 
   3634 	indent();
   3635 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
   3636 	    fme_state2str(return_value));
   3637 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
   3638 	out(O_ALTFP|O_VERB, NULL);
   3639 	indent_pop();
   3640 	if (return_value == FME_WAIT)
   3641 		*pdelay = overall_delay;
   3642 	return (return_value);
   3643 }
   3644 
   3645 static enum fme_state
   3646 requirements_test(struct fme *fmep, struct event *ep,
   3647     unsigned long long at_latest_by, unsigned long long *pdelay)
   3648 {
   3649 	int waiting_events;
   3650 	int credible_events;
   3651 	int deferred_events;
   3652 	enum fme_state return_value = FME_CREDIBLE;
   3653 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3654 	unsigned long long arrow_delay;
   3655 	unsigned long long my_delay;
   3656 	struct event *ep2;
   3657 	struct bubble *bp;
   3658 	struct arrowlist *ap;
   3659 
   3660 	if (ep->cached_state & REQMNTS_CREDIBLE) {
   3661 		indent();
   3662 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
   3663 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3664 		out(O_ALTFP|O_VERB, NULL);
   3665 		return (FME_CREDIBLE);
   3666 	}
   3667 	if (ep->cached_state & REQMNTS_DISPROVED) {
   3668 		indent();
   3669 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
   3670 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3671 		out(O_ALTFP|O_VERB, NULL);
   3672 		return (FME_DISPROVED);
   3673 	}
   3674 	if (ep->cached_state & REQMNTS_WAIT) {
   3675 		indent();
   3676 		*pdelay = ep->cached_delay;
   3677 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
   3678 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3679 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
   3680 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3681 		out(O_ALTFP|O_VERB, NULL);
   3682 		return (FME_WAIT);
   3683 	}
   3684 	stats_counter_bump(fmep->Rcallcount);
   3685 	indent_push("  R");
   3686 	indent();
   3687 	out(O_ALTFP|O_VERB|O_NONL, "->");
   3688 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3689 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
   3690 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3691 	out(O_ALTFP|O_VERB, NULL);
   3692 
   3693 	if (ep->t == N_EREPORT) {
   3694 		if (ep->count == 0) {
   3695 			if (fmep->pull >= at_latest_by) {
   3696 				return_value = FME_DISPROVED;
   3697 			} else {
   3698 				ep->cached_delay = *pdelay = at_latest_by;
   3699 				return_value = FME_WAIT;
   3700 			}
   3701 		}
   3702 
   3703 		indent();
   3704 		switch (return_value) {
   3705 		case FME_CREDIBLE:
   3706 			ep->cached_state |= REQMNTS_CREDIBLE;
   3707 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
   3708 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3709 			break;
   3710 		case FME_DISPROVED:
   3711 			ep->cached_state |= REQMNTS_DISPROVED;
   3712 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
   3713 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3714 			break;
   3715 		case FME_WAIT:
   3716 			ep->cached_state |= REQMNTS_WAIT;
   3717 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
   3718 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3719 			out(O_ALTFP|O_VERB|O_NONL, " to ");
   3720 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3721 			break;
   3722 		default:
   3723 			out(O_DIE, "requirements_test: unexpected fme_state");
   3724 			break;
   3725 		}
   3726 		out(O_ALTFP|O_VERB, NULL);
   3727 		indent_pop();
   3728 
   3729 		return (return_value);
   3730 	}
   3731 
   3732 	/* this event is not a report, descend the tree */
   3733 	for (bp = itree_next_bubble(ep, NULL); bp;
   3734 	    bp = itree_next_bubble(ep, bp)) {
   3735 		int n;
   3736 
   3737 		if (bp->t != B_FROM)
   3738 			continue;
   3739 
   3740 		n = bp->nork;
   3741 
   3742 		credible_events = 0;
   3743 		waiting_events = 0;
   3744 		deferred_events = 0;
   3745 		arrow_delay = TIMEVAL_EVENTUALLY;
   3746 		/*
   3747 		 * n is -1 for 'A' so adjust it.
   3748 		 * XXX just count up the arrows for now.
   3749 		 */
   3750 		if (n < 0) {
   3751 			n = 0;
   3752 			for (ap = itree_next_arrow(bp, NULL); ap;
   3753 			    ap = itree_next_arrow(bp, ap))
   3754 				n++;
   3755 			indent();
   3756 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
   3757 		} else {
   3758 			indent();
   3759 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
   3760 		}
   3761 
   3762 		if (n == 0)
   3763 			continue;
   3764 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
   3765 			for (ap = itree_next_arrow(bp, NULL); ap;
   3766 			    ap = itree_next_arrow(bp, ap)) {
   3767 				ep2 = ap->arrowp->head->myevent;
   3768 				platform_set_payloadnvp(ep2->nvp);
   3769 				(void) checkconstraints(fmep, ap->arrowp);
   3770 				if (!ap->arrowp->forever_false) {
   3771 					/*
   3772 					 * if all arrows are invalidated by the
   3773 					 * constraints, then we should elide the
   3774 					 * whole bubble to be consistant with
   3775 					 * the tree creation time behaviour
   3776 					 */
   3777 					bp->mark |= BUBBLE_OK;
   3778 					platform_set_payloadnvp(NULL);
   3779 					break;
   3780 				}
   3781 				platform_set_payloadnvp(NULL);
   3782 			}
   3783 		}
   3784 		for (ap = itree_next_arrow(bp, NULL); ap;
   3785 		    ap = itree_next_arrow(bp, ap)) {
   3786 			ep2 = ap->arrowp->head->myevent;
   3787 			if (n <= credible_events)
   3788 				break;
   3789 
   3790 			ap->arrowp->mark |= REQMNTS_COUNTER;
   3791 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
   3792 				/* XXX adding max timevals! */
   3793 				switch (requirements_test(fmep, ep2,
   3794 				    at_latest_by + ap->arrowp->maxdelay,
   3795 				    &my_delay)) {
   3796 				case FME_DEFERRED:
   3797 					deferred_events++;
   3798 					break;
   3799 				case FME_CREDIBLE:
   3800 					credible_events++;
   3801 					break;
   3802 				case FME_DISPROVED:
   3803 					break;
   3804 				case FME_WAIT:
   3805 					if (my_delay < arrow_delay)
   3806 						arrow_delay = my_delay;
   3807 					waiting_events++;
   3808 					break;
   3809 				default:
   3810 					out(O_DIE,
   3811 					"Bug in requirements_test.");
   3812 				}
   3813 			else
   3814 				deferred_events++;
   3815 		}
   3816 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
   3817 			bp->mark |= BUBBLE_ELIDED;
   3818 			continue;
   3819 		}
   3820 		indent();
   3821 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
   3822 		    credible_events + deferred_events, waiting_events);
   3823 		if (credible_events + deferred_events + waiting_events < n) {
   3824 			/* Can never meet requirements */
   3825 			ep->cached_state |= REQMNTS_DISPROVED;
   3826 			indent();
   3827 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
   3828 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3829 			out(O_ALTFP|O_VERB, NULL);
   3830 			indent_pop();
   3831 			return (FME_DISPROVED);
   3832 		}
   3833 		if (credible_events + deferred_events < n) {
   3834 			/* will have to wait */
   3835 			/* wait time is shortest known */
   3836 			if (arrow_delay < overall_delay)
   3837 				overall_delay = arrow_delay;
   3838 			return_value = FME_WAIT;
   3839 		} else if (credible_events < n) {
   3840 			if (return_value != FME_WAIT)
   3841 				return_value = FME_DEFERRED;
   3842 		}
   3843 	}
   3844 
   3845 	/*
   3846 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
   3847 	 * path, then this will be considered FME_CREDIBLE. But if it is
   3848 	 * reached by a different path so the K-count is met, then might
   3849 	 * get overridden by FME_WAIT or FME_DISPROVED.
   3850 	 */
   3851 	if (return_value == FME_WAIT) {
   3852 		ep->cached_state |= REQMNTS_WAIT;
   3853 		ep->cached_delay = *pdelay = overall_delay;
   3854 	} else if (return_value == FME_CREDIBLE) {
   3855 		ep->cached_state |= REQMNTS_CREDIBLE;
   3856 	}
   3857 	indent();
   3858 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
   3859 	    fme_state2str(return_value));
   3860 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3861 	out(O_ALTFP|O_VERB, NULL);
   3862 	indent_pop();
   3863 	return (return_value);
   3864 }
   3865 
   3866 static enum fme_state
   3867 causes_test(struct fme *fmep, struct event *ep,
   3868     unsigned long long at_latest_by, unsigned long long *pdelay)
   3869 {
   3870 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3871 	unsigned long long my_delay;
   3872 	int credible_results = 0;
   3873 	int waiting_results = 0;
   3874 	enum fme_state fstate;
   3875 	struct event *tail_event;
   3876 	struct bubble *bp;
   3877 	struct arrowlist *ap;
   3878 	int k = 1;
   3879 
   3880 	stats_counter_bump(fmep->Ccallcount);
   3881 	indent_push("  C");
   3882 	indent();
   3883 	out(O_ALTFP|O_VERB|O_NONL, "->");
   3884 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3885 	out(O_ALTFP|O_VERB, NULL);
   3886 
   3887 	for (bp = itree_next_bubble(ep, NULL); bp;
   3888 	    bp = itree_next_bubble(ep, bp)) {
   3889 		if (bp->t != B_TO)
   3890 			continue;
   3891 		k = bp->nork;	/* remember the K value */
   3892 		for (ap = itree_next_arrow(bp, NULL); ap;
   3893 		    ap = itree_next_arrow(bp, ap)) {
   3894 			int do_not_follow = 0;
   3895 
   3896 			/*
   3897 			 * if we get to the same event multiple times
   3898 			 * only worry about the first one.
   3899 			 */
   3900 			if (ap->arrowp->tail->myevent->cached_state &
   3901 			    CAUSES_TESTED) {
   3902 				indent();
   3903 				out(O_ALTFP|O_VERB|O_NONL,
   3904 				    "  causes test already run for ");
   3905 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
   3906 				    ap->arrowp->tail->myevent);
   3907 				out(O_ALTFP|O_VERB, NULL);
   3908 				continue;
   3909 			}
   3910 
   3911 			/*
   3912 			 * see if false constraint prevents us
   3913 			 * from traversing this arrow
   3914 			 */
   3915 			platform_set_payloadnvp(ep->nvp);
   3916 			if (checkconstraints(fmep, ap->arrowp) == 0)
   3917 				do_not_follow = 1;
   3918 			platform_set_payloadnvp(NULL);
   3919 			if (do_not_follow) {
   3920 				indent();
   3921 				out(O_ALTFP|O_VERB|O_NONL,
   3922 				    "  False arrow from ");
   3923 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
   3924 				    ap->arrowp->tail->myevent);
   3925 				out(O_ALTFP|O_VERB, NULL);
   3926 				continue;
   3927 			}
   3928 
   3929 			ap->arrowp->tail->myevent->cached_state |=
   3930 			    CAUSES_TESTED;
   3931 			tail_event = ap->arrowp->tail->myevent;
   3932 			fstate = hypothesise(fmep, tail_event, at_latest_by,
   3933 			    &my_delay);
   3934 
   3935 			switch (fstate) {
   3936 			case FME_WAIT:
   3937 				if (my_delay < overall_delay)
   3938 					overall_delay = my_delay;
   3939 				waiting_results++;
   3940 				break;
   3941 			case FME_CREDIBLE:
   3942 				credible_results++;
   3943 				break;
   3944 			case FME_DISPROVED:
   3945 				break;
   3946 			default:
   3947 				out(O_DIE, "Bug in causes_test");
   3948 			}
   3949 		}
   3950 	}
   3951 	/* compare against K */
   3952 	if (credible_results + waiting_results < k) {
   3953 		indent();
   3954 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
   3955 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3956 		out(O_ALTFP|O_VERB, NULL);
   3957 		indent_pop();
   3958 		return (FME_DISPROVED);
   3959 	}
   3960 	if (waiting_results != 0) {
   3961 		*pdelay = overall_delay;
   3962 		indent();
   3963 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
   3964 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3965 		out(O_ALTFP|O_VERB|O_NONL, " to ");
   3966 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3967 		out(O_ALTFP|O_VERB, NULL);
   3968 		indent_pop();
   3969 		return (FME_WAIT);
   3970 	}
   3971 	indent();
   3972 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
   3973 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3974 	out(O_ALTFP|O_VERB, NULL);
   3975 	indent_pop();
   3976 	return (FME_CREDIBLE);
   3977 }
   3978 
   3979 static enum fme_state
   3980 hypothesise(struct fme *fmep, struct event *ep,
   3981 	unsigned long long at_latest_by, unsigned long long *pdelay)
   3982 {
   3983 	enum fme_state rtr, otr;
   3984 	unsigned long long my_delay;
   3985 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3986 
   3987 	stats_counter_bump(fmep->Hcallcount);
   3988 	indent_push("  H");
   3989 	indent();
   3990 	out(O_ALTFP|O_VERB|O_NONL, "->");
   3991 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3992 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
   3993 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3994 	out(O_ALTFP|O_VERB, NULL);
   3995 
   3996 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
   3997 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
   3998 		overall_delay = my_delay;
   3999 	if (rtr != FME_DISPROVED) {
   4000 		if (is_problem(ep->t)) {
   4001 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
   4002 			if (otr != FME_DISPROVED) {
   4003 				if (fmep->peek == 0 && ep->is_suspect == 0) {
   4004 					ep->suspects = fmep->suspects;
   4005 					ep->is_suspect = 1;
   4006 					fmep->suspects = ep;
   4007 					fmep->nsuspects++;
   4008 				}
   4009 			}
   4010 		} else
   4011 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
   4012 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
   4013 			overall_delay = my_delay;
   4014 		if ((otr != FME_DISPROVED) &&
   4015 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
   4016 			*pdelay = overall_delay;
   4017 	}
   4018 	if (rtr == FME_DISPROVED) {
   4019 		indent();
   4020 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
   4021 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4022 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
   4023 		indent_pop();
   4024 		return (FME_DISPROVED);
   4025 	}
   4026 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
   4027 		indent();
   4028 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
   4029 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4030 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
   4031 		indent_pop();
   4032 		return (FME_DISPROVED);
   4033 	}
   4034 	if (otr == FME_DISPROVED) {
   4035 		indent();
   4036 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
   4037 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4038 		out(O_ALTFP|O_VERB, " (causes are not credible)");
   4039 		indent_pop();
   4040 		return (FME_DISPROVED);
   4041 	}
   4042 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
   4043 		indent();
   4044 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
   4045 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4046 		out(O_ALTFP|O_VERB|O_NONL, " to ");
   4047 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
   4048 		out(O_ALTFP|O_VERB, NULL);
   4049 		indent_pop();
   4050 		return (FME_WAIT);
   4051 	}
   4052 	indent();
   4053 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
   4054 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4055 	out(O_ALTFP|O_VERB, NULL);
   4056 	indent_pop();
   4057 	return (FME_CREDIBLE);
   4058 }
   4059 
   4060 /*
   4061  * fme_istat_load -- reconstitute any persistent istats
   4062  */
   4063 void
   4064 fme_istat_load(fmd_hdl_t *hdl)
   4065 {
   4066 	int sz;
   4067 	char *sbuf;
   4068 	char *ptr;
   4069 
   4070 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
   4071 		out(O_ALTFP, "fme_istat_load: No stats");
   4072 		return;
   4073 	}
   4074 
   4075 	sbuf = alloca(sz);
   4076 
   4077 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
   4078 
   4079 	/*
   4080 	 * pick apart the serialized stats
   4081 	 *
   4082 	 * format is:
   4083 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
   4084 	 * for example:
   4085 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
   4086 	 *
   4087 	 * since this is parsing our own serialized data, any parsing issues
   4088 	 * are fatal, so we check for them all with ASSERT() below.
   4089 	 */
   4090 	ptr = sbuf;
   4091 	while (ptr < &sbuf[sz]) {
   4092 		char *sepptr;
   4093 		struct node *np;
   4094 		int val;
   4095 
   4096 		sepptr = strchr(ptr, '@');
   4097 		ASSERT(sepptr != NULL);
   4098 		*sepptr = '\0';
   4099 
   4100 		/* construct the event */
   4101 		np = newnode(T_EVENT, NULL, 0);
   4102 		np->u.event.ename = newnode(T_NAME, NULL, 0);
   4103 		np->u.event.ename->u.name.t = N_STAT;
   4104 		np->u.event.ename->u.name.s = stable(ptr);
   4105 		np->u.event.ename->u.name.it = IT_ENAME;
   4106 		np->u.event.ename->u.name.last = np->u.event.ename;
   4107 
   4108 		ptr = sepptr + 1;
   4109 		ASSERT(ptr < &sbuf[sz]);
   4110 		ptr += strlen(ptr);
   4111 		ptr++;	/* move past the '\0' separating path from value */
   4112 		ASSERT(ptr < &sbuf[sz]);
   4113 		ASSERT(isdigit(*ptr));
   4114 		val = atoi(ptr);
   4115 		ASSERT(val > 0);
   4116 		ptr += strlen(ptr);
   4117 		ptr++;	/* move past the final '\0' for this entry */
   4118 
   4119 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
   4120 		ASSERT(np->u.event.epname != NULL);
   4121 
   4122 		istat_bump(np, val);
   4123 		tree_free(np);
   4124 	}
   4125 
   4126 	istat_save();
   4127 }
   4128