Home | History | Annotate | Download | only in eversholt
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  *
     26  * fme.c -- fault management exercise module
     27  *
     28  * this module provides the simulated fault management exercise.
     29  */
     30 
     31 #include <stdio.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 #include <strings.h>
     35 #include <ctype.h>
     36 #include <alloca.h>
     37 #include <libnvpair.h>
     38 #include <sys/fm/protocol.h>
     39 #include <fm/fmd_api.h>
     40 #include "alloc.h"
     41 #include "out.h"
     42 #include "stats.h"
     43 #include "stable.h"
     44 #include "literals.h"
     45 #include "lut.h"
     46 #include "tree.h"
     47 #include "ptree.h"
     48 #include "itree.h"
     49 #include "ipath.h"
     50 #include "fme.h"
     51 #include "evnv.h"
     52 #include "eval.h"
     53 #include "config.h"
     54 #include "platform.h"
     55 #include "esclex.h"
     56 
     57 /* imported from eft.c... */
     58 extern hrtime_t Hesitate;
     59 extern char *Serd_Override;
     60 extern nv_alloc_t Eft_nv_hdl;
     61 extern int Max_fme;
     62 extern fmd_hdl_t *Hdl;
     63 
     64 static int Istat_need_save;
     65 static int Serd_need_save;
     66 void istat_save(void);
     67 void serd_save(void);
     68 
     69 /* fme under construction is global so we can free it on module abort */
     70 static struct fme *Nfmep;
     71 
     72 static int Undiag_reason = UD_VAL_UNKNOWN;
     73 
     74 static int Nextid = 0;
     75 
     76 static int Open_fme_count = 0;	/* Count of open FMEs */
     77 
     78 /* list of fault management exercises underway */
     79 static struct fme {
     80 	struct fme *next;		/* next exercise */
     81 	unsigned long long ull;		/* time when fme was created */
     82 	int id;				/* FME id */
     83 	struct config *config;		/* cooked configuration data */
     84 	struct lut *eventtree;		/* propagation tree for this FME */
     85 	/*
     86 	 * The initial error report that created this FME is kept in
     87 	 * two forms.  e0 points to the instance tree node and is used
     88 	 * by fme_eval() as the starting point for the inference
     89 	 * algorithm.  e0r is the event handle FMD passed to us when
     90 	 * the ereport first arrived and is used when setting timers,
     91 	 * which are always relative to the time of this initial
     92 	 * report.
     93 	 */
     94 	struct event *e0;
     95 	fmd_event_t *e0r;
     96 
     97 	id_t    timer;			/* for setting an fmd time-out */
     98 
     99 	struct event *ecurrent;		/* ereport under consideration */
    100 	struct event *suspects;		/* current suspect list */
    101 	struct event *psuspects;	/* previous suspect list */
    102 	int nsuspects;			/* count of suspects */
    103 	int posted_suspects;		/* true if we've posted a diagnosis */
    104 	int uniqobs;			/* number of unique events observed */
    105 	int peek;			/* just peeking, don't track suspects */
    106 	int overflow;			/* true if overflow FME */
    107 	enum fme_state {
    108 		FME_NOTHING = 5000,	/* not evaluated yet */
    109 		FME_WAIT,		/* need to wait for more info */
    110 		FME_CREDIBLE,		/* suspect list is credible */
    111 		FME_DISPROVED,		/* no valid suspects found */
    112 		FME_DEFERRED		/* don't know yet (k-count not met) */
    113 	} state;
    114 
    115 	unsigned long long pull;	/* time passed since created */
    116 	unsigned long long wull;	/* wait until this time for re-eval */
    117 	struct event *observations;	/* observation list */
    118 	struct lut *globals;		/* values of global variables */
    119 	/* fmd interfacing */
    120 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
    121 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
    122 	/* stats */
    123 	struct stats *Rcount;
    124 	struct stats *Hcallcount;
    125 	struct stats *Rcallcount;
    126 	struct stats *Ccallcount;
    127 	struct stats *Ecallcount;
    128 	struct stats *Tcallcount;
    129 	struct stats *Marrowcount;
    130 	struct stats *diags;
    131 } *FMElist, *EFMElist, *ClosedFMEs;
    132 
    133 static struct case_list {
    134 	fmd_case_t *fmcase;
    135 	struct case_list *next;
    136 } *Undiagablecaselist;
    137 
    138 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
    139 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
    140 	unsigned long long at_latest_by, unsigned long long *pdelay);
    141 static struct node *eventprop_lookup(struct event *ep, const char *propname);
    142 static struct node *pathstring2epnamenp(char *path);
    143 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
    144 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
    145 static char *undiag_2reason_str(int ud, char *arg);
    146 static const char *undiag_2defect_str(int ud);
    147 static void restore_suspects(struct fme *fmep);
    148 static void save_suspects(struct fme *fmep);
    149 static void destroy_fme(struct fme *f);
    150 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
    151     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
    152 static void istat_counter_reset_cb(struct istat_entry *entp,
    153     struct stats *statp, const struct ipath *ipp);
    154 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
    155     struct stats *statp, void *unused);
    156 static void serd_reset_cb(struct serd_entry *entp, void *unused,
    157     const struct ipath *ipp);
    158 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
    159     void *unused2);
    160 static void destroy_fme_bufs(struct fme *fp);
    161 
    162 static struct fme *
    163 alloc_fme(void)
    164 {
    165 	struct fme *fmep;
    166 
    167 	fmep = MALLOC(sizeof (*fmep));
    168 	bzero(fmep, sizeof (*fmep));
    169 	return (fmep);
    170 }
    171 
    172 /*
    173  * fme_ready -- called when all initialization of the FME (except for
    174  *	stats) has completed successfully.  Adds the fme to global lists
    175  *	and establishes its stats.
    176  */
    177 static struct fme *
    178 fme_ready(struct fme *fmep)
    179 {
    180 	char nbuf[100];
    181 
    182 	Nfmep = NULL;	/* don't need to free this on module abort now */
    183 
    184 	if (EFMElist) {
    185 		EFMElist->next = fmep;
    186 		EFMElist = fmep;
    187 	} else
    188 		FMElist = EFMElist = fmep;
    189 
    190 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
    191 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
    192 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
    193 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
    194 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
    195 	fmep->Rcallcount = stats_new_counter(nbuf,
    196 	    "calls to requirements_test()", 1);
    197 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
    198 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
    199 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
    200 	fmep->Ecallcount =
    201 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
    202 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
    203 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
    204 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
    205 	fmep->Marrowcount = stats_new_counter(nbuf,
    206 	    "arrows marked by mark_arrows()", 1);
    207 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
    208 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
    209 
    210 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
    211 	config_print(O_ALTFP|O_VERB2, fmep->config);
    212 
    213 	return (fmep);
    214 }
    215 
    216 extern void ipath_dummy_lut(struct arrow *);
    217 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
    218 
    219 /* ARGSUSED */
    220 static void
    221 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
    222 {
    223 	struct bubble *bp;
    224 	struct arrowlist *ap;
    225 
    226 	for (bp = itree_next_bubble(ep, NULL); bp;
    227 	    bp = itree_next_bubble(ep, bp)) {
    228 		if (bp->t != B_FROM)
    229 			continue;
    230 		for (ap = itree_next_arrow(bp, NULL); ap;
    231 		    ap = itree_next_arrow(bp, ap)) {
    232 			ap->arrowp->pnode->u.arrow.needed = 1;
    233 			ipath_dummy_lut(ap->arrowp);
    234 		}
    235 	}
    236 }
    237 
    238 /* ARGSUSED */
    239 static void
    240 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
    241 {
    242 	struct bubble *bp;
    243 	struct arrowlist *ap;
    244 
    245 	for (bp = itree_next_bubble(ep, NULL); bp;
    246 	    bp = itree_next_bubble(ep, bp)) {
    247 		if (bp->t != B_FROM)
    248 			continue;
    249 		for (ap = itree_next_arrow(bp, NULL); ap;
    250 		    ap = itree_next_arrow(bp, ap))
    251 			ap->arrowp->pnode->u.arrow.needed = 0;
    252 	}
    253 }
    254 
    255 static void globals_destructor(void *left, void *right, void *arg);
    256 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
    257 
    258 static boolean_t
    259 prune_propagations(const char *e0class, const struct ipath *e0ipp)
    260 {
    261 	char nbuf[100];
    262 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
    263 	extern struct lut *Usednames;
    264 
    265 	Nfmep = alloc_fme();
    266 	Nfmep->id = Nextid;
    267 	Nfmep->state = FME_NOTHING;
    268 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
    269 	if ((Nfmep->e0 =
    270 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
    271 		itree_free(Nfmep->eventtree);
    272 		FREE(Nfmep);
    273 		Nfmep = NULL;
    274 		return (B_FALSE);
    275 	}
    276 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
    277 	Nfmep->e0->count++;
    278 
    279 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
    280 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
    281 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
    282 	Nfmep->Hcallcount =
    283 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
    284 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
    285 	Nfmep->Rcallcount = stats_new_counter(nbuf,
    286 	    "calls to requirements_test()", 1);
    287 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
    288 	Nfmep->Ccallcount =
    289 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
    290 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
    291 	Nfmep->Ecallcount =
    292 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
    293 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
    294 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
    295 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
    296 	Nfmep->Marrowcount = stats_new_counter(nbuf,
    297 	    "arrows marked by mark_arrows()", 1);
    298 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
    299 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
    300 
    301 	Nfmep->peek = 1;
    302 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
    303 	lut_free(Usednames, NULL, NULL);
    304 	Usednames = NULL;
    305 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
    306 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
    307 	itree_prune(Nfmep->eventtree);
    308 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
    309 
    310 	stats_delete(Nfmep->Rcount);
    311 	stats_delete(Nfmep->Hcallcount);
    312 	stats_delete(Nfmep->Rcallcount);
    313 	stats_delete(Nfmep->Ccallcount);
    314 	stats_delete(Nfmep->Ecallcount);
    315 	stats_delete(Nfmep->Tcallcount);
    316 	stats_delete(Nfmep->Marrowcount);
    317 	stats_delete(Nfmep->diags);
    318 	itree_free(Nfmep->eventtree);
    319 	lut_free(Nfmep->globals, globals_destructor, NULL);
    320 	FREE(Nfmep);
    321 	return (B_TRUE);
    322 }
    323 
    324 static struct fme *
    325 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
    326 	fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
    327 {
    328 	struct cfgdata *cfgdata;
    329 	int init_size;
    330 	extern int alloc_total();
    331 	nvlist_t *detector = NULL;
    332 	char *pathstr;
    333 	char *arg;
    334 
    335 	/*
    336 	 * First check if e0ipp is actually in the topology so we can give a
    337 	 * more useful error message.
    338 	 */
    339 	ipathlastcomp(e0ipp);
    340 	pathstr = ipath2str(NULL, e0ipp);
    341 	cfgdata = config_snapshot();
    342 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
    343 	    &detector, pathstr);
    344 	FREE(pathstr);
    345 	structconfig_free(cfgdata->cooked);
    346 	config_free(cfgdata);
    347 	if (detector == NULL) {
    348 		Undiag_reason = UD_VAL_BADEVENTPATH;
    349 		(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
    350 		    &detector);
    351 		arg = ipath2str(e0class, e0ipp);
    352 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
    353 		FREE(arg);
    354 		return (NULL);
    355 	}
    356 
    357 	/*
    358 	 * Next run a quick first pass of the rules with a dummy config. This
    359 	 * allows us to prune those rules which can't possibly cause this
    360 	 * ereport.
    361 	 */
    362 	if (!prune_propagations(e0class, e0ipp)) {
    363 		/*
    364 		 * The fault class must have been in the rules or we would
    365 		 * not have registered for it (and got a "nosub"), and the
    366 		 * pathname must be in the topology or we would have failed the
    367 		 * previous test. So to get here means the combination of
    368 		 * class and pathname in the ereport must be invalid.
    369 		 */
    370 		Undiag_reason = UD_VAL_BADEVENTCLASS;
    371 		arg = ipath2str(e0class, e0ipp);
    372 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
    373 		nvlist_free(detector);
    374 		FREE(arg);
    375 		return (NULL);
    376 	}
    377 
    378 	/*
    379 	 * Now go ahead and create the real fme using the pruned rules.
    380 	 */
    381 	init_size = alloc_total();
    382 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
    383 	cfgdata = config_snapshot();
    384 	platform_save_config(hdl, fmcase);
    385 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
    386 	    alloc_total() - init_size);
    387 
    388 	Nfmep = alloc_fme();
    389 
    390 	Nfmep->id = Nextid++;
    391 	Nfmep->config = cfgdata->cooked;
    392 	config_free(cfgdata);
    393 	Nfmep->posted_suspects = 0;
    394 	Nfmep->uniqobs = 0;
    395 	Nfmep->state = FME_NOTHING;
    396 	Nfmep->pull = 0ULL;
    397 	Nfmep->overflow = 0;
    398 
    399 	Nfmep->fmcase = fmcase;
    400 	Nfmep->hdl = hdl;
    401 
    402 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
    403 		Undiag_reason = UD_VAL_INSTFAIL;
    404 		arg = ipath2str(e0class, e0ipp);
    405 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
    406 		nvlist_free(detector);
    407 		FREE(arg);
    408 		structconfig_free(Nfmep->config);
    409 		destroy_fme_bufs(Nfmep);
    410 		FREE(Nfmep);
    411 		Nfmep = NULL;
    412 		return (NULL);
    413 	}
    414 
    415 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
    416 
    417 	if ((Nfmep->e0 =
    418 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
    419 		Undiag_reason = UD_VAL_BADEVENTI;
    420 		arg = ipath2str(e0class, e0ipp);
    421 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
    422 		nvlist_free(detector);
    423 		FREE(arg);
    424 		itree_free(Nfmep->eventtree);
    425 		structconfig_free(Nfmep->config);
    426 		destroy_fme_bufs(Nfmep);
    427 		FREE(Nfmep);
    428 		Nfmep = NULL;
    429 		return (NULL);
    430 	}
    431 
    432 	nvlist_free(detector);
    433 	return (fme_ready(Nfmep));
    434 }
    435 
    436 void
    437 fme_fini(void)
    438 {
    439 	struct fme *sfp, *fp;
    440 	struct case_list *ucasep, *nextcasep;
    441 
    442 	ucasep = Undiagablecaselist;
    443 	while (ucasep != NULL) {
    444 		nextcasep = ucasep->next;
    445 		FREE(ucasep);
    446 		ucasep = nextcasep;
    447 	}
    448 	Undiagablecaselist = NULL;
    449 
    450 	/* clean up closed fmes */
    451 	fp = ClosedFMEs;
    452 	while (fp != NULL) {
    453 		sfp = fp->next;
    454 		destroy_fme(fp);
    455 		fp = sfp;
    456 	}
    457 	ClosedFMEs = NULL;
    458 
    459 	fp = FMElist;
    460 	while (fp != NULL) {
    461 		sfp = fp->next;
    462 		destroy_fme(fp);
    463 		fp = sfp;
    464 	}
    465 	FMElist = EFMElist = NULL;
    466 
    467 	/* if we were in the middle of creating an fme, free it now */
    468 	if (Nfmep) {
    469 		destroy_fme(Nfmep);
    470 		Nfmep = NULL;
    471 	}
    472 }
    473 
    474 /*
    475  * Allocated space for a buffer name.  20 bytes allows for
    476  * a ridiculous 9,999,999 unique observations.
    477  */
    478 #define	OBBUFNMSZ 20
    479 
    480 /*
    481  *  serialize_observation
    482  *
    483  *  Create a recoverable version of the current observation
    484  *  (f->ecurrent).  We keep a serialized version of each unique
    485  *  observation in order that we may resume correctly the fme in the
    486  *  correct state if eft or fmd crashes and we're restarted.
    487  */
    488 static void
    489 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
    490 {
    491 	size_t pkdlen;
    492 	char tmpbuf[OBBUFNMSZ];
    493 	char *pkd = NULL;
    494 	char *estr;
    495 
    496 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
    497 	estr = ipath2str(cls, ipp);
    498 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
    499 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
    500 	    strlen(estr) + 1);
    501 	FREE(estr);
    502 
    503 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
    504 		(void) snprintf(tmpbuf,
    505 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
    506 		if (nvlist_xpack(fp->ecurrent->nvp,
    507 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
    508 			out(O_DIE|O_SYS, "pack of observed nvl failed");
    509 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
    510 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
    511 		FREE(pkd);
    512 	}
    513 
    514 	fp->uniqobs++;
    515 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
    516 	    sizeof (fp->uniqobs));
    517 }
    518 
    519 /*
    520  *  init_fme_bufs -- We keep several bits of state about an fme for
    521  *	use if eft or fmd crashes and we're restarted.
    522  */
    523 static void
    524 init_fme_bufs(struct fme *fp)
    525 {
    526 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
    527 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
    528 	    sizeof (fp->pull));
    529 
    530 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
    531 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
    532 	    sizeof (fp->id));
    533 
    534 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
    535 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
    536 	    sizeof (fp->uniqobs));
    537 
    538 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
    539 	    sizeof (fp->posted_suspects));
    540 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
    541 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
    542 }
    543 
    544 static void
    545 destroy_fme_bufs(struct fme *fp)
    546 {
    547 	char tmpbuf[OBBUFNMSZ];
    548 	int o;
    549 
    550 	platform_restore_config(fp->hdl, fp->fmcase);
    551 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
    552 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
    553 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
    554 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
    555 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
    556 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
    557 
    558 	for (o = 0; o < fp->uniqobs; o++) {
    559 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
    560 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
    561 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
    562 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
    563 	}
    564 }
    565 
    566 /*
    567  * reconstitute_observations -- convert a case's serialized observations
    568  *	back into struct events.  Returns zero if all observations are
    569  *	successfully reconstituted.
    570  */
    571 static int
    572 reconstitute_observations(struct fme *fmep)
    573 {
    574 	struct event *ep;
    575 	struct node *epnamenp = NULL;
    576 	size_t pkdlen;
    577 	char *pkd = NULL;
    578 	char *tmpbuf = alloca(OBBUFNMSZ);
    579 	char *sepptr;
    580 	char *estr;
    581 	int ocnt;
    582 	int elen;
    583 
    584 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
    585 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
    586 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
    587 		if (elen == 0) {
    588 			out(O_ALTFP,
    589 			    "reconstitute_observation: no %s buffer found.",
    590 			    tmpbuf);
    591 			Undiag_reason = UD_VAL_MISSINGOBS;
    592 			break;
    593 		}
    594 
    595 		estr = MALLOC(elen);
    596 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
    597 		sepptr = strchr(estr, '@');
    598 		if (sepptr == NULL) {
    599 			out(O_ALTFP,
    600 			    "reconstitute_observation: %s: "
    601 			    "missing @ separator in %s.",
    602 			    tmpbuf, estr);
    603 			Undiag_reason = UD_VAL_MISSINGPATH;
    604 			FREE(estr);
    605 			break;
    606 		}
    607 
    608 		*sepptr = '\0';
    609 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
    610 			out(O_ALTFP,
    611 			    "reconstitute_observation: %s: "
    612 			    "trouble converting path string \"%s\" "
    613 			    "to internal representation.",
    614 			    tmpbuf, sepptr + 1);
    615 			Undiag_reason = UD_VAL_MISSINGPATH;
    616 			FREE(estr);
    617 			break;
    618 		}
    619 
    620 		/* construct the event */
    621 		ep = itree_lookup(fmep->eventtree,
    622 		    stable(estr), ipath(epnamenp));
    623 		if (ep == NULL) {
    624 			out(O_ALTFP,
    625 			    "reconstitute_observation: %s: "
    626 			    "lookup of  \"%s\" in itree failed.",
    627 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
    628 			Undiag_reason = UD_VAL_BADOBS;
    629 			tree_free(epnamenp);
    630 			FREE(estr);
    631 			break;
    632 		}
    633 		tree_free(epnamenp);
    634 
    635 		/*
    636 		 * We may or may not have a saved nvlist for the observation
    637 		 */
    638 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
    639 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
    640 		if (pkdlen != 0) {
    641 			pkd = MALLOC(pkdlen);
    642 			fmd_buf_read(fmep->hdl,
    643 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
    644 			ASSERT(ep->nvp == NULL);
    645 			if (nvlist_xunpack(pkd,
    646 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
    647 				out(O_DIE|O_SYS, "pack of observed nvl failed");
    648 			FREE(pkd);
    649 		}
    650 
    651 		if (ocnt == 0)
    652 			fmep->e0 = ep;
    653 
    654 		FREE(estr);
    655 		fmep->ecurrent = ep;
    656 		ep->count++;
    657 
    658 		/* link it into list of observations seen */
    659 		ep->observations = fmep->observations;
    660 		fmep->observations = ep;
    661 	}
    662 
    663 	if (ocnt == fmep->uniqobs) {
    664 		(void) fme_ready(fmep);
    665 		return (0);
    666 	}
    667 
    668 	return (1);
    669 }
    670 
    671 /*
    672  * restart_fme -- called during eft initialization.  Reconstitutes
    673  *	an in-progress fme.
    674  */
    675 void
    676 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
    677 {
    678 	nvlist_t *defect;
    679 	struct case_list *bad;
    680 	struct fme *fmep;
    681 	struct cfgdata *cfgdata;
    682 	size_t rawsz;
    683 	struct event *ep;
    684 	char *tmpbuf = alloca(OBBUFNMSZ);
    685 	char *sepptr;
    686 	char *estr;
    687 	int elen;
    688 	struct node *epnamenp = NULL;
    689 	int init_size;
    690 	extern int alloc_total();
    691 	char *reason;
    692 
    693 	/*
    694 	 * ignore solved or closed cases
    695 	 */
    696 	if (fmd_case_solved(hdl, inprogress) ||
    697 	    fmd_case_closed(hdl, inprogress))
    698 		return;
    699 
    700 	fmep = alloc_fme();
    701 	fmep->fmcase = inprogress;
    702 	fmep->hdl = hdl;
    703 
    704 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
    705 		out(O_ALTFP, "restart_fme: no saved posted status");
    706 		Undiag_reason = UD_VAL_MISSINGINFO;
    707 		goto badcase;
    708 	} else {
    709 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
    710 		    (void *)&fmep->posted_suspects,
    711 		    sizeof (fmep->posted_suspects));
    712 	}
    713 
    714 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
    715 		out(O_ALTFP, "restart_fme: no saved id");
    716 		Undiag_reason = UD_VAL_MISSINGINFO;
    717 		goto badcase;
    718 	} else {
    719 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
    720 		    sizeof (fmep->id));
    721 	}
    722 	if (Nextid <= fmep->id)
    723 		Nextid = fmep->id + 1;
    724 
    725 	out(O_ALTFP, "Replay FME %d", fmep->id);
    726 
    727 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
    728 		out(O_ALTFP, "restart_fme: No config data");
    729 		Undiag_reason = UD_VAL_MISSINGINFO;
    730 		goto badcase;
    731 	}
    732 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
    733 	    sizeof (size_t));
    734 
    735 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
    736 		out(O_ALTFP, "restart_fme: No event zero");
    737 		Undiag_reason = UD_VAL_MISSINGZERO;
    738 		goto badcase;
    739 	}
    740 
    741 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
    742 		out(O_ALTFP, "restart_fme: no saved wait time");
    743 		Undiag_reason = UD_VAL_MISSINGINFO;
    744 		goto badcase;
    745 	} else {
    746 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
    747 		    sizeof (fmep->pull));
    748 	}
    749 
    750 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
    751 		out(O_ALTFP, "restart_fme: no count of observations");
    752 		Undiag_reason = UD_VAL_MISSINGINFO;
    753 		goto badcase;
    754 	} else {
    755 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
    756 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
    757 	}
    758 
    759 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
    760 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
    761 	if (elen == 0) {
    762 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
    763 		    tmpbuf);
    764 		Undiag_reason = UD_VAL_MISSINGOBS;
    765 		goto badcase;
    766 	}
    767 	estr = MALLOC(elen);
    768 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
    769 	sepptr = strchr(estr, '@');
    770 	if (sepptr == NULL) {
    771 		out(O_ALTFP, "reconstitute_observation: %s: "
    772 		    "missing @ separator in %s.",
    773 		    tmpbuf, estr);
    774 		Undiag_reason = UD_VAL_MISSINGPATH;
    775 		FREE(estr);
    776 		goto badcase;
    777 	}
    778 	*sepptr = '\0';
    779 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
    780 		out(O_ALTFP, "reconstitute_observation: %s: "
    781 		    "trouble converting path string \"%s\" "
    782 		    "to internal representation.", tmpbuf, sepptr + 1);
    783 		Undiag_reason = UD_VAL_MISSINGPATH;
    784 		FREE(estr);
    785 		goto badcase;
    786 	}
    787 	(void) prune_propagations(stable(estr), ipath(epnamenp));
    788 	tree_free(epnamenp);
    789 	FREE(estr);
    790 
    791 	init_size = alloc_total();
    792 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
    793 	cfgdata = MALLOC(sizeof (struct cfgdata));
    794 	cfgdata->cooked = NULL;
    795 	cfgdata->devcache = NULL;
    796 	cfgdata->devidcache = NULL;
    797 	cfgdata->cpucache = NULL;
    798 	cfgdata->raw_refcnt = 1;
    799 
    800 	if (rawsz > 0) {
    801 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
    802 			out(O_ALTFP, "restart_fme: Config data size mismatch");
    803 			Undiag_reason = UD_VAL_CFGMISMATCH;
    804 			goto badcase;
    805 		}
    806 		cfgdata->begin = MALLOC(rawsz);
    807 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
    808 		fmd_buf_read(hdl,
    809 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
    810 	} else {
    811 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
    812 	}
    813 
    814 	config_cook(cfgdata);
    815 	fmep->config = cfgdata->cooked;
    816 	config_free(cfgdata);
    817 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
    818 	    alloc_total() - init_size);
    819 
    820 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
    821 		/* case not properly saved or irretrievable */
    822 		out(O_ALTFP, "restart_fme: NULL instance tree");
    823 		Undiag_reason = UD_VAL_INSTFAIL;
    824 		goto badcase;
    825 	}
    826 
    827 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
    828 
    829 	if (reconstitute_observations(fmep) != 0)
    830 		goto badcase;
    831 
    832 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
    833 	for (ep = fmep->observations; ep; ep = ep->observations) {
    834 		out(O_ALTFP|O_NONL, " ");
    835 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
    836 	}
    837 	out(O_ALTFP, NULL);
    838 
    839 	Open_fme_count++;
    840 
    841 	/* give the diagnosis algorithm a shot at the new FME state */
    842 	fme_eval(fmep, fmep->e0r);
    843 	return;
    844 
    845 badcase:
    846 	if (fmep->eventtree != NULL)
    847 		itree_free(fmep->eventtree);
    848 	if (fmep->config)
    849 		structconfig_free(fmep->config);
    850 	destroy_fme_bufs(fmep);
    851 	FREE(fmep);
    852 
    853 	/*
    854 	 * Since we're unable to restart the case, add it to the undiagable
    855 	 * list and solve and close it as appropriate.
    856 	 */
    857 	bad = MALLOC(sizeof (struct case_list));
    858 	bad->next = NULL;
    859 
    860 	if (Undiagablecaselist != NULL)
    861 		bad->next = Undiagablecaselist;
    862 	Undiagablecaselist = bad;
    863 	bad->fmcase = inprogress;
    864 
    865 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
    866 	    fmd_case_uuid(hdl, bad->fmcase));
    867 
    868 	if (fmd_case_solved(hdl, bad->fmcase)) {
    869 		out(O_ALTFP|O_NONL, "already solved, ");
    870 	} else {
    871 		out(O_ALTFP|O_NONL, "solving, ");
    872 		defect = fmd_nvl_create_fault(hdl,
    873 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
    874 		reason = undiag_2reason_str(Undiag_reason, NULL);
    875 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
    876 		FREE(reason);
    877 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
    878 		fmd_case_solve(hdl, bad->fmcase);
    879 		Undiag_reason = UD_VAL_UNKNOWN;
    880 	}
    881 
    882 	if (fmd_case_closed(hdl, bad->fmcase)) {
    883 		out(O_ALTFP, "already closed ]");
    884 	} else {
    885 		out(O_ALTFP, "closing ]");
    886 		fmd_case_close(hdl, bad->fmcase);
    887 	}
    888 }
    889 
    890 /*ARGSUSED*/
    891 static void
    892 globals_destructor(void *left, void *right, void *arg)
    893 {
    894 	struct evalue *evp = (struct evalue *)right;
    895 	if (evp->t == NODEPTR)
    896 		tree_free((struct node *)(uintptr_t)evp->v);
    897 	evp->v = (uintptr_t)NULL;
    898 	FREE(evp);
    899 }
    900 
    901 void
    902 destroy_fme(struct fme *f)
    903 {
    904 	stats_delete(f->Rcount);
    905 	stats_delete(f->Hcallcount);
    906 	stats_delete(f->Rcallcount);
    907 	stats_delete(f->Ccallcount);
    908 	stats_delete(f->Ecallcount);
    909 	stats_delete(f->Tcallcount);
    910 	stats_delete(f->Marrowcount);
    911 	stats_delete(f->diags);
    912 
    913 	if (f->eventtree != NULL)
    914 		itree_free(f->eventtree);
    915 	if (f->config)
    916 		structconfig_free(f->config);
    917 	lut_free(f->globals, globals_destructor, NULL);
    918 	FREE(f);
    919 }
    920 
    921 static const char *
    922 fme_state2str(enum fme_state s)
    923 {
    924 	switch (s) {
    925 	case FME_NOTHING:	return ("NOTHING");
    926 	case FME_WAIT:		return ("WAIT");
    927 	case FME_CREDIBLE:	return ("CREDIBLE");
    928 	case FME_DISPROVED:	return ("DISPROVED");
    929 	case FME_DEFERRED:	return ("DEFERRED");
    930 	default:		return ("UNKNOWN");
    931 	}
    932 }
    933 
    934 static int
    935 is_problem(enum nametype t)
    936 {
    937 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
    938 }
    939 
    940 static int
    941 is_defect(enum nametype t)
    942 {
    943 	return (t == N_DEFECT);
    944 }
    945 
    946 static int
    947 is_upset(enum nametype t)
    948 {
    949 	return (t == N_UPSET);
    950 }
    951 
    952 static void
    953 fme_print(int flags, struct fme *fmep)
    954 {
    955 	struct event *ep;
    956 
    957 	out(flags, "Fault Management Exercise %d", fmep->id);
    958 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
    959 	out(flags|O_NONL, "\t  Start time: ");
    960 	ptree_timeval(flags|O_NONL, &fmep->ull);
    961 	out(flags, NULL);
    962 	if (fmep->wull) {
    963 		out(flags|O_NONL, "\t   Wait time: ");
    964 		ptree_timeval(flags|O_NONL, &fmep->wull);
    965 		out(flags, NULL);
    966 	}
    967 	out(flags|O_NONL, "\t          E0: ");
    968 	if (fmep->e0)
    969 		itree_pevent_brief(flags|O_NONL, fmep->e0);
    970 	else
    971 		out(flags|O_NONL, "NULL");
    972 	out(flags, NULL);
    973 	out(flags|O_NONL, "\tObservations:");
    974 	for (ep = fmep->observations; ep; ep = ep->observations) {
    975 		out(flags|O_NONL, " ");
    976 		itree_pevent_brief(flags|O_NONL, ep);
    977 	}
    978 	out(flags, NULL);
    979 	out(flags|O_NONL, "\tSuspect list:");
    980 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
    981 		out(flags|O_NONL, " ");
    982 		itree_pevent_brief(flags|O_NONL, ep);
    983 	}
    984 	out(flags, NULL);
    985 	if (fmep->eventtree != NULL) {
    986 		out(flags|O_VERB2, "\t        Tree:");
    987 		itree_ptree(flags|O_VERB2, fmep->eventtree);
    988 	}
    989 }
    990 
    991 static struct node *
    992 pathstring2epnamenp(char *path)
    993 {
    994 	char *sep = "/";
    995 	struct node *ret;
    996 	char *ptr;
    997 
    998 	if ((ptr = strtok(path, sep)) == NULL)
    999 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
   1000 
   1001 	ret = tree_iname(stable(ptr), NULL, 0);
   1002 
   1003 	while ((ptr = strtok(NULL, sep)) != NULL)
   1004 		ret = tree_name_append(ret,
   1005 		    tree_iname(stable(ptr), NULL, 0));
   1006 
   1007 	return (ret);
   1008 }
   1009 
   1010 /*
   1011  * for a given upset sp, increment the corresponding SERD engine.  if the
   1012  * SERD engine trips, return the ename and ipp of the resulting ereport.
   1013  * returns true if engine tripped and *enamep and *ippp were filled in.
   1014  */
   1015 static int
   1016 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
   1017     fmd_case_t *fmcase, struct event *sp, const char **enamep,
   1018     const struct ipath **ippp)
   1019 {
   1020 	struct node *serdinst;
   1021 	char *serdname;
   1022 	char *serdresource;
   1023 	char *serdclass;
   1024 	struct node *nid;
   1025 	struct serd_entry *newentp;
   1026 	int i, serdn = -1, serdincrement = 1, len = 0;
   1027 	char *serdsuffix = NULL, *serdt = NULL;
   1028 	struct evalue *ep;
   1029 
   1030 	ASSERT(sp->t == N_UPSET);
   1031 	ASSERT(ffep != NULL);
   1032 
   1033 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
   1034 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
   1035 		ASSERT(ep->t == UINT64);
   1036 		serdn = (int)ep->v;
   1037 	}
   1038 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
   1039 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
   1040 		ASSERT(ep->t == STRING);
   1041 		serdt = (char *)(uintptr_t)ep->v;
   1042 	}
   1043 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
   1044 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
   1045 		ASSERT(ep->t == STRING);
   1046 		serdsuffix = (char *)(uintptr_t)ep->v;
   1047 	}
   1048 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
   1049 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
   1050 		ASSERT(ep->t == UINT64);
   1051 		serdincrement = (int)ep->v;
   1052 	}
   1053 
   1054 	/*
   1055 	 * obtain instanced SERD engine from the upset sp.  from this
   1056 	 * derive serdname, the string used to identify the SERD engine.
   1057 	 */
   1058 	serdinst = eventprop_lookup(sp, L_engine);
   1059 
   1060 	if (serdinst == NULL)
   1061 		return (-1);
   1062 
   1063 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
   1064 	if (serdsuffix != NULL)
   1065 		len += strlen(serdsuffix);
   1066 	serdclass = MALLOC(len);
   1067 	if (serdsuffix != NULL)
   1068 		(void) snprintf(serdclass, len, "%s%s",
   1069 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
   1070 	else
   1071 		(void) snprintf(serdclass, len, "%s",
   1072 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
   1073 	serdresource = ipath2str(NULL,
   1074 	    ipath(serdinst->u.stmt.np->u.event.epname));
   1075 	len += strlen(serdresource) + 1;
   1076 	serdname = MALLOC(len);
   1077 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
   1078 	FREE(serdresource);
   1079 
   1080 	/* handle serd engine "id" property, if there is one */
   1081 	if ((nid =
   1082 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
   1083 		struct evalue *gval;
   1084 		char suffixbuf[200];
   1085 		char *suffix;
   1086 		char *nserdname;
   1087 		size_t nname;
   1088 
   1089 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
   1090 		ptree_name_iter(O_ALTFP|O_NONL, nid);
   1091 
   1092 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
   1093 
   1094 		if ((gval = lut_lookup(fmep->globals,
   1095 		    (void *)nid->u.globid.s, NULL)) == NULL) {
   1096 			out(O_ALTFP, " undefined");
   1097 		} else if (gval->t == UINT64) {
   1098 			out(O_ALTFP, " %llu", gval->v);
   1099 			(void) sprintf(suffixbuf, "%llu", gval->v);
   1100 			suffix = suffixbuf;
   1101 		} else {
   1102 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
   1103 			suffix = (char *)(uintptr_t)gval->v;
   1104 		}
   1105 
   1106 		nname = strlen(serdname) + strlen(suffix) + 2;
   1107 		nserdname = MALLOC(nname);
   1108 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
   1109 		FREE(serdname);
   1110 		serdname = nserdname;
   1111 	}
   1112 
   1113 	/*
   1114 	 * if the engine is empty, and we have an override for n/t then
   1115 	 * destroy and recreate it.
   1116 	 */
   1117 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
   1118 	    fmd_serd_empty(hdl, serdname))
   1119 		fmd_serd_destroy(hdl, serdname);
   1120 
   1121 	if (!fmd_serd_exists(hdl, serdname)) {
   1122 		struct node *nN, *nT;
   1123 		const char *s;
   1124 		struct node *nodep;
   1125 		struct config *cp;
   1126 		char *path;
   1127 		uint_t nval;
   1128 		hrtime_t tval;
   1129 		int i;
   1130 		char *ptr;
   1131 		int got_n_override = 0, got_t_override = 0;
   1132 
   1133 		/* no SERD engine yet, so create it */
   1134 		nodep = serdinst->u.stmt.np->u.event.epname;
   1135 		path = ipath2str(NULL, ipath(nodep));
   1136 		cp = config_lookup(fmep->config, path, 0);
   1137 		FREE((void *)path);
   1138 
   1139 		/*
   1140 		 * We allow serd paramaters to be overridden, either from
   1141 		 * eft.conf file values (if Serd_Override is set) or from
   1142 		 * driver properties (for "serd.io.device" engines).
   1143 		 */
   1144 		if (Serd_Override != NULL) {
   1145 			char *save_ptr, *ptr1, *ptr2, *ptr3;
   1146 			ptr3 = save_ptr = STRDUP(Serd_Override);
   1147 			while (*ptr3 != '\0') {
   1148 				ptr1 = strchr(ptr3, ',');
   1149 				*ptr1 = '\0';
   1150 				if (strcmp(ptr3, serdclass) == 0) {
   1151 					ptr2 =  strchr(ptr1 + 1, ',');
   1152 					*ptr2 = '\0';
   1153 					nval = atoi(ptr1 + 1);
   1154 					out(O_ALTFP, "serd override %s_n %d",
   1155 					    serdclass, nval);
   1156 					ptr3 =  strchr(ptr2 + 1, ' ');
   1157 					if (ptr3)
   1158 						*ptr3 = '\0';
   1159 					ptr = STRDUP(ptr2 + 1);
   1160 					out(O_ALTFP, "serd override %s_t %s",
   1161 					    serdclass, ptr);
   1162 					got_n_override = 1;
   1163 					got_t_override = 1;
   1164 					break;
   1165 				} else {
   1166 					ptr2 =  strchr(ptr1 + 1, ',');
   1167 					ptr3 =  strchr(ptr2 + 1, ' ');
   1168 					if (ptr3 == NULL)
   1169 						break;
   1170 				}
   1171 				ptr3++;
   1172 			}
   1173 			FREE(save_ptr);
   1174 		}
   1175 
   1176 		if (cp && got_n_override == 0) {
   1177 			/*
   1178 			 * convert serd engine class into property name
   1179 			 */
   1180 			char *prop_name = MALLOC(strlen(serdclass) + 3);
   1181 			for (i = 0; i < strlen(serdclass); i++) {
   1182 				if (serdclass[i] == '.')
   1183 					prop_name[i] = '_';
   1184 				else
   1185 					prop_name[i] = serdclass[i];
   1186 			}
   1187 			prop_name[i++] = '_';
   1188 			prop_name[i++] = 'n';
   1189 			prop_name[i] = '\0';
   1190 			if (s = config_getprop(cp, prop_name)) {
   1191 				nval = atoi(s);
   1192 				out(O_ALTFP, "serd override %s_n %s",
   1193 				    serdclass, s);
   1194 				got_n_override = 1;
   1195 			}
   1196 			prop_name[i - 1] = 't';
   1197 			if (s = config_getprop(cp, prop_name)) {
   1198 				ptr = STRDUP(s);
   1199 				out(O_ALTFP, "serd override %s_t %s",
   1200 				    serdclass, s);
   1201 				got_t_override = 1;
   1202 			}
   1203 			FREE(prop_name);
   1204 		}
   1205 
   1206 		if (serdn != -1 && got_n_override == 0) {
   1207 			nval = serdn;
   1208 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
   1209 			got_n_override = 1;
   1210 		}
   1211 		if (serdt != NULL && got_t_override == 0) {
   1212 			ptr = STRDUP(serdt);
   1213 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
   1214 			got_t_override = 1;
   1215 		}
   1216 
   1217 		if (!got_n_override) {
   1218 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
   1219 			    NULL);
   1220 			ASSERT(nN->t == T_NUM);
   1221 			nval = (uint_t)nN->u.ull;
   1222 		}
   1223 		if (!got_t_override) {
   1224 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
   1225 			    NULL);
   1226 			ASSERT(nT->t == T_TIMEVAL);
   1227 			tval = (hrtime_t)nT->u.ull;
   1228 		} else {
   1229 			const unsigned long long *ullp;
   1230 			const char *suffix;
   1231 			int len;
   1232 
   1233 			len = strspn(ptr, "0123456789");
   1234 			suffix = stable(&ptr[len]);
   1235 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
   1236 			    (void *)suffix, NULL);
   1237 			ptr[len] = '\0';
   1238 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
   1239 			FREE(ptr);
   1240 		}
   1241 		fmd_serd_create(hdl, serdname, nval, tval);
   1242 	}
   1243 
   1244 	newentp = MALLOC(sizeof (*newentp));
   1245 	newentp->ename = stable(serdclass);
   1246 	FREE(serdclass);
   1247 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
   1248 	newentp->hdl = hdl;
   1249 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
   1250 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
   1251 		    (void *)newentp, (lut_cmp)serd_cmp);
   1252 		Serd_need_save = 1;
   1253 		serd_save();
   1254 	} else {
   1255 		FREE(newentp);
   1256 	}
   1257 
   1258 
   1259 	/*
   1260 	 * increment SERD engine.  if engine fires, reset serd
   1261 	 * engine and return trip_strcode if required.
   1262 	 */
   1263 	for (i = 0; i < serdincrement; i++) {
   1264 		if (fmd_serd_record(hdl, serdname, ffep)) {
   1265 			fmd_case_add_serd(hdl, fmcase, serdname);
   1266 			fmd_serd_reset(hdl, serdname);
   1267 
   1268 			if (ippp) {
   1269 				struct node *tripinst =
   1270 				    lut_lookup(serdinst->u.stmt.lutp,
   1271 				    (void *)L_trip, NULL);
   1272 				ASSERT(tripinst != NULL);
   1273 				*enamep = tripinst->u.event.ename->u.name.s;
   1274 				*ippp = ipath(tripinst->u.event.epname);
   1275 				out(O_ALTFP|O_NONL,
   1276 				    "[engine fired: %s, sending: ", serdname);
   1277 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
   1278 				out(O_ALTFP, "]");
   1279 			} else {
   1280 				out(O_ALTFP, "[engine fired: %s, no trip]",
   1281 				    serdname);
   1282 			}
   1283 			FREE(serdname);
   1284 			return (1);
   1285 		}
   1286 	}
   1287 
   1288 	FREE(serdname);
   1289 	return (0);
   1290 }
   1291 
   1292 /*
   1293  * search a suspect list for upsets.  feed each upset to serd_eval() and
   1294  * build up tripped[], an array of ereports produced by the firing of
   1295  * any SERD engines.  then feed each ereport back into
   1296  * fme_receive_report().
   1297  *
   1298  * returns ntrip, the number of these ereports produced.
   1299  */
   1300 static int
   1301 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
   1302 {
   1303 	/* we build an array of tripped ereports that we send ourselves */
   1304 	struct {
   1305 		const char *ename;
   1306 		const struct ipath *ipp;
   1307 	} *tripped;
   1308 	struct event *sp;
   1309 	int ntrip, nupset, i;
   1310 
   1311 	/*
   1312 	 * count the number of upsets to determine the upper limit on
   1313 	 * expected trip ereport strings.  remember that one upset can
   1314 	 * lead to at most one ereport.
   1315 	 */
   1316 	nupset = 0;
   1317 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
   1318 		if (sp->t == N_UPSET)
   1319 			nupset++;
   1320 	}
   1321 
   1322 	if (nupset == 0)
   1323 		return (0);
   1324 
   1325 	/*
   1326 	 * get to this point if we have upsets and expect some trip
   1327 	 * ereports
   1328 	 */
   1329 	tripped = alloca(sizeof (*tripped) * nupset);
   1330 	bzero((void *)tripped, sizeof (*tripped) * nupset);
   1331 
   1332 	ntrip = 0;
   1333 	for (sp = fmep->suspects; sp; sp = sp->suspects)
   1334 		if (sp->t == N_UPSET &&
   1335 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
   1336 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
   1337 			ntrip++;
   1338 
   1339 	for (i = 0; i < ntrip; i++) {
   1340 		struct event *ep, *nep;
   1341 		struct fme *nfmep;
   1342 		fmd_case_t *fmcase;
   1343 		const struct ipath *ipp;
   1344 		const char *eventstring;
   1345 		int prev_verbose;
   1346 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
   1347 		enum fme_state state;
   1348 
   1349 		/*
   1350 		 * First try and evaluate a case with the trip ereport plus
   1351 		 * all the other ereports that cause the trip. If that fails
   1352 		 * to evaluate then try again with just this ereport on its own.
   1353 		 */
   1354 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
   1355 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
   1356 		out(O_ALTFP|O_STAMP, NULL);
   1357 		ep = fmep->e0;
   1358 		eventstring = ep->enode->u.event.ename->u.name.s;
   1359 		ipp = ep->ipp;
   1360 
   1361 		/*
   1362 		 * create a duplicate fme and case
   1363 		 */
   1364 		fmcase = fmd_case_open(fmep->hdl, NULL);
   1365 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
   1366 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1367 		out(O_ALTFP, " ]");
   1368 
   1369 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
   1370 		    fmcase, ffep, ep->nvp)) == NULL) {
   1371 			out(O_ALTFP|O_NONL, "[");
   1372 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1373 			out(O_ALTFP, " CANNOT DIAGNOSE]");
   1374 			continue;
   1375 		}
   1376 
   1377 		Open_fme_count++;
   1378 		nfmep->pull = fmep->pull;
   1379 		init_fme_bufs(nfmep);
   1380 		out(O_ALTFP|O_NONL, "[");
   1381 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1382 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
   1383 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
   1384 		if (ffep) {
   1385 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
   1386 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
   1387 			nfmep->e0r = ffep;
   1388 		}
   1389 
   1390 		/*
   1391 		 * add the original ereports
   1392 		 */
   1393 		for (ep = fmep->observations; ep; ep = ep->observations) {
   1394 			eventstring = ep->enode->u.event.ename->u.name.s;
   1395 			ipp = ep->ipp;
   1396 			out(O_ALTFP|O_NONL, "adding event [");
   1397 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1398 			out(O_ALTFP, " ]");
   1399 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
   1400 			if (nep->count++ == 0) {
   1401 				nep->observations = nfmep->observations;
   1402 				nfmep->observations = nep;
   1403 				serialize_observation(nfmep, eventstring, ipp);
   1404 				nep->nvp = evnv_dupnvl(ep->nvp);
   1405 			}
   1406 			if (ep->ffep && ep->ffep != ffep)
   1407 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
   1408 				    ep->ffep);
   1409 			stats_counter_bump(nfmep->Rcount);
   1410 		}
   1411 
   1412 		/*
   1413 		 * add the serd trigger ereport
   1414 		 */
   1415 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
   1416 		    tripped[i].ipp)) == NULL) {
   1417 			/*
   1418 			 * The trigger ereport is not in the instance tree. It
   1419 			 * was presumably removed by prune_propagations() as
   1420 			 * this combination of events is not present in the
   1421 			 * rules.
   1422 			 */
   1423 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
   1424 			Undiag_reason = UD_VAL_BADEVENTI;
   1425 			goto retry_lone_ereport;
   1426 		}
   1427 		out(O_ALTFP|O_NONL, "adding event [");
   1428 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
   1429 		out(O_ALTFP, " ]");
   1430 		nfmep->ecurrent = ep;
   1431 		ep->nvp = NULL;
   1432 		ep->count = 1;
   1433 		ep->observations = nfmep->observations;
   1434 		nfmep->observations = ep;
   1435 
   1436 		/*
   1437 		 * just peek first.
   1438 		 */
   1439 		nfmep->peek = 1;
   1440 		prev_verbose = Verbose;
   1441 		if (Debug == 0)
   1442 			Verbose = 0;
   1443 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
   1444 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
   1445 		nfmep->peek = 0;
   1446 		Verbose = prev_verbose;
   1447 		if (state == FME_DISPROVED) {
   1448 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
   1449 			Undiag_reason = UD_VAL_UNSOLVD;
   1450 retry_lone_ereport:
   1451 			/*
   1452 			 * However the trigger ereport on its own might be
   1453 			 * diagnosable, so check for that. Undo the new fme
   1454 			 * and case we just created and call fme_receive_report.
   1455 			 */
   1456 			out(O_ALTFP|O_NONL, "[");
   1457 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
   1458 			    tripped[i].ipp);
   1459 			out(O_ALTFP, " retrying with just trigger ereport]");
   1460 			itree_free(nfmep->eventtree);
   1461 			nfmep->eventtree = NULL;
   1462 			structconfig_free(nfmep->config);
   1463 			nfmep->config = NULL;
   1464 			destroy_fme_bufs(nfmep);
   1465 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
   1466 			fme_receive_report(fmep->hdl, ffep,
   1467 			    tripped[i].ename, tripped[i].ipp, NULL);
   1468 			continue;
   1469 		}
   1470 
   1471 		/*
   1472 		 * and evaluate
   1473 		 */
   1474 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
   1475 		fme_eval(nfmep, ffep);
   1476 	}
   1477 
   1478 	return (ntrip);
   1479 }
   1480 
   1481 /*
   1482  * fme_receive_external_report -- call when an external ereport comes in
   1483  *
   1484  * this routine just converts the relevant information from the ereport
   1485  * into a format used internally and passes it on to fme_receive_report().
   1486  */
   1487 void
   1488 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
   1489     const char *class)
   1490 {
   1491 	struct node		*epnamenp;
   1492 	fmd_case_t		*fmcase;
   1493 	const struct ipath	*ipp;
   1494 	nvlist_t		*detector = NULL;
   1495 
   1496 	class = stable(class);
   1497 
   1498 	/* Get the component path from the ereport */
   1499 	epnamenp = platform_getpath(nvl);
   1500 
   1501 	/* See if we ended up without a path. */
   1502 	if (epnamenp == NULL) {
   1503 		/* See if class permits silent discard on unknown component. */
   1504 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
   1505 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
   1506 			    "to component path, but silent discard allowed.",
   1507 			    class);
   1508 		} else {
   1509 			/*
   1510 			 * XFILE: Failure to find a component is bad unless
   1511 			 * 'discard_if_config_unknown=1' was specified in the
   1512 			 * ereport definition. Indicate undiagnosable.
   1513 			 */
   1514 			Undiag_reason = UD_VAL_NOPATH;
   1515 			fmcase = fmd_case_open(hdl, NULL);
   1516 
   1517 			/*
   1518 			 * We don't have a component path here (which means that
   1519 			 * the detector was not in hc-scheme and couldn't be
   1520 			 * converted to hc-scheme. Report the raw detector as
   1521 			 * the suspect resource if there is one.
   1522 			 */
   1523 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
   1524 			    &detector);
   1525 			publish_undiagnosable(hdl, ffep, fmcase, detector,
   1526 			    (char *)class);
   1527 		}
   1528 		return;
   1529 	}
   1530 
   1531 	ipp = ipath(epnamenp);
   1532 	tree_free(epnamenp);
   1533 	fme_receive_report(hdl, ffep, class, ipp, nvl);
   1534 }
   1535 
   1536 /*ARGSUSED*/
   1537 void
   1538 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
   1539     const char *eventstring)
   1540 {
   1541 	char *uuid;
   1542 	nvlist_t **nva;
   1543 	uint_t nvc;
   1544 	const struct ipath *ipp;
   1545 
   1546 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
   1547 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
   1548 	    &nva, &nvc) != 0) {
   1549 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
   1550 		return;
   1551 	}
   1552 
   1553 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
   1554 
   1555 	while (nvc-- != 0) {
   1556 		/*
   1557 		 * Reset any istat or serd engine associated with this path.
   1558 		 */
   1559 		char *path;
   1560 
   1561 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
   1562 			continue;
   1563 
   1564 		path = ipath2str(NULL, ipp);
   1565 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
   1566 		    path);
   1567 		FREE(path);
   1568 
   1569 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
   1570 		istat_save();
   1571 
   1572 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
   1573 		serd_save();
   1574 	}
   1575 }
   1576 
   1577 /*ARGSUSED*/
   1578 void
   1579 fme_receive_topology_change(void)
   1580 {
   1581 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
   1582 	istat_save();
   1583 
   1584 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
   1585 	serd_save();
   1586 }
   1587 
   1588 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
   1589     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
   1590 
   1591 /* ARGSUSED */
   1592 static void
   1593 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
   1594 {
   1595 	struct bubble *bp;
   1596 	struct arrowlist *ap;
   1597 
   1598 	ep->cached_state = 0;
   1599 	ep->keep_in_tree = 0;
   1600 	for (bp = itree_next_bubble(ep, NULL); bp;
   1601 	    bp = itree_next_bubble(ep, bp)) {
   1602 		if (bp->t != B_FROM)
   1603 			continue;
   1604 		bp->mark = 0;
   1605 		for (ap = itree_next_arrow(bp, NULL); ap;
   1606 		    ap = itree_next_arrow(bp, ap))
   1607 			ap->arrowp->mark = 0;
   1608 	}
   1609 }
   1610 
   1611 static void
   1612 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
   1613     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
   1614 {
   1615 	struct event *ep;
   1616 	struct fme *fmep = NULL;
   1617 	struct fme *ofmep = NULL;
   1618 	struct fme *cfmep, *svfmep;
   1619 	int matched = 0;
   1620 	nvlist_t *defect;
   1621 	fmd_case_t *fmcase;
   1622 	char *reason;
   1623 
   1624 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
   1625 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1626 	out(O_ALTFP|O_STAMP, NULL);
   1627 
   1628 	/* decide which FME it goes to */
   1629 	for (fmep = FMElist; fmep; fmep = fmep->next) {
   1630 		int prev_verbose;
   1631 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
   1632 		enum fme_state state;
   1633 		nvlist_t *pre_peek_nvp = NULL;
   1634 
   1635 		if (fmep->overflow) {
   1636 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
   1637 				ofmep = fmep;
   1638 
   1639 			continue;
   1640 		}
   1641 
   1642 		/*
   1643 		 * ignore solved or closed cases
   1644 		 */
   1645 		if (fmep->posted_suspects ||
   1646 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
   1647 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
   1648 			continue;
   1649 
   1650 		/* look up event in event tree for this FME */
   1651 		if ((ep = itree_lookup(fmep->eventtree,
   1652 		    eventstring, ipp)) == NULL)
   1653 			continue;
   1654 
   1655 		/* note observation */
   1656 		fmep->ecurrent = ep;
   1657 		if (ep->count++ == 0) {
   1658 			/* link it into list of observations seen */
   1659 			ep->observations = fmep->observations;
   1660 			fmep->observations = ep;
   1661 			ep->nvp = evnv_dupnvl(nvl);
   1662 		} else {
   1663 			/* use new payload values for peek */
   1664 			pre_peek_nvp = ep->nvp;
   1665 			ep->nvp = evnv_dupnvl(nvl);
   1666 		}
   1667 
   1668 		/* tell hypothesise() not to mess with suspect list */
   1669 		fmep->peek = 1;
   1670 
   1671 		/* don't want this to be verbose (unless Debug is set) */
   1672 		prev_verbose = Verbose;
   1673 		if (Debug == 0)
   1674 			Verbose = 0;
   1675 
   1676 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
   1677 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
   1678 
   1679 		fmep->peek = 0;
   1680 
   1681 		/* put verbose flag back */
   1682 		Verbose = prev_verbose;
   1683 
   1684 		if (state != FME_DISPROVED) {
   1685 			/* found an FME that explains the ereport */
   1686 			matched++;
   1687 			out(O_ALTFP|O_NONL, "[");
   1688 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1689 			out(O_ALTFP, " explained by FME%d]", fmep->id);
   1690 
   1691 			if (pre_peek_nvp)
   1692 				nvlist_free(pre_peek_nvp);
   1693 
   1694 			if (ep->count == 1)
   1695 				serialize_observation(fmep, eventstring, ipp);
   1696 
   1697 			if (ffep) {
   1698 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
   1699 				ep->ffep = ffep;
   1700 			}
   1701 
   1702 			stats_counter_bump(fmep->Rcount);
   1703 
   1704 			/* re-eval FME */
   1705 			fme_eval(fmep, ffep);
   1706 		} else {
   1707 
   1708 			/* not a match, undo noting of observation */
   1709 			fmep->ecurrent = NULL;
   1710 			if (--ep->count == 0) {
   1711 				/* unlink it from observations */
   1712 				fmep->observations = ep->observations;
   1713 				ep->observations = NULL;
   1714 				nvlist_free(ep->nvp);
   1715 				ep->nvp = NULL;
   1716 			} else {
   1717 				nvlist_free(ep->nvp);
   1718 				ep->nvp = pre_peek_nvp;
   1719 			}
   1720 		}
   1721 	}
   1722 
   1723 	if (matched)
   1724 		return;	/* explained by at least one existing FME */
   1725 
   1726 	/* clean up closed fmes */
   1727 	cfmep = ClosedFMEs;
   1728 	while (cfmep != NULL) {
   1729 		svfmep = cfmep->next;
   1730 		destroy_fme(cfmep);
   1731 		cfmep = svfmep;
   1732 	}
   1733 	ClosedFMEs = NULL;
   1734 
   1735 	if (ofmep) {
   1736 		out(O_ALTFP|O_NONL, "[");
   1737 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1738 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
   1739 		if (ffep)
   1740 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
   1741 
   1742 		return;
   1743 
   1744 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
   1745 		out(O_ALTFP|O_NONL, "[");
   1746 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1747 		out(O_ALTFP, " MAX OPEN FME REACHED]");
   1748 
   1749 		fmcase = fmd_case_open(hdl, NULL);
   1750 
   1751 		/* Create overflow fme */
   1752 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
   1753 		    nvl)) == NULL) {
   1754 			out(O_ALTFP|O_NONL, "[");
   1755 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1756 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
   1757 			return;
   1758 		}
   1759 
   1760 		Open_fme_count++;
   1761 
   1762 		init_fme_bufs(fmep);
   1763 		fmep->overflow = B_TRUE;
   1764 
   1765 		if (ffep)
   1766 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
   1767 
   1768 		Undiag_reason = UD_VAL_MAXFME;
   1769 		defect = fmd_nvl_create_fault(hdl,
   1770 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
   1771 		reason = undiag_2reason_str(Undiag_reason, NULL);
   1772 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
   1773 		FREE(reason);
   1774 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
   1775 		fmd_case_solve(hdl, fmep->fmcase);
   1776 		Undiag_reason = UD_VAL_UNKNOWN;
   1777 		return;
   1778 	}
   1779 
   1780 	/* open a case */
   1781 	fmcase = fmd_case_open(hdl, NULL);
   1782 
   1783 	/* start a new FME */
   1784 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
   1785 		out(O_ALTFP|O_NONL, "[");
   1786 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1787 		out(O_ALTFP, " CANNOT DIAGNOSE]");
   1788 		return;
   1789 	}
   1790 
   1791 	Open_fme_count++;
   1792 
   1793 	init_fme_bufs(fmep);
   1794 
   1795 	out(O_ALTFP|O_NONL, "[");
   1796 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
   1797 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
   1798 	    fmd_case_uuid(hdl, fmep->fmcase));
   1799 
   1800 	ep = fmep->e0;
   1801 	ASSERT(ep != NULL);
   1802 
   1803 	/* note observation */
   1804 	fmep->ecurrent = ep;
   1805 	if (ep->count++ == 0) {
   1806 		/* link it into list of observations seen */
   1807 		ep->observations = fmep->observations;
   1808 		fmep->observations = ep;
   1809 		ep->nvp = evnv_dupnvl(nvl);
   1810 		serialize_observation(fmep, eventstring, ipp);
   1811 	} else {
   1812 		/* new payload overrides any previous */
   1813 		nvlist_free(ep->nvp);
   1814 		ep->nvp = evnv_dupnvl(nvl);
   1815 	}
   1816 
   1817 	stats_counter_bump(fmep->Rcount);
   1818 
   1819 	if (ffep) {
   1820 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
   1821 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
   1822 		fmep->e0r = ffep;
   1823 		ep->ffep = ffep;
   1824 	}
   1825 
   1826 	/* give the diagnosis algorithm a shot at the new FME state */
   1827 	fme_eval(fmep, ffep);
   1828 }
   1829 
   1830 void
   1831 fme_status(int flags)
   1832 {
   1833 	struct fme *fmep;
   1834 
   1835 	if (FMElist == NULL) {
   1836 		out(flags, "No fault management exercises underway.");
   1837 		return;
   1838 	}
   1839 
   1840 	for (fmep = FMElist; fmep; fmep = fmep->next)
   1841 		fme_print(flags, fmep);
   1842 }
   1843 
   1844 /*
   1845  * "indent" routines used mostly for nicely formatted debug output, but also
   1846  * for sanity checking for infinite recursion bugs.
   1847  */
   1848 
   1849 #define	MAX_INDENT 1024
   1850 static const char *indent_s[MAX_INDENT];
   1851 static int current_indent;
   1852 
   1853 static void
   1854 indent_push(const char *s)
   1855 {
   1856 	if (current_indent < MAX_INDENT)
   1857 		indent_s[current_indent++] = s;
   1858 	else
   1859 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
   1860 }
   1861 
   1862 static void
   1863 indent_set(const char *s)
   1864 {
   1865 	current_indent = 0;
   1866 	indent_push(s);
   1867 }
   1868 
   1869 static void
   1870 indent_pop(void)
   1871 {
   1872 	if (current_indent > 0)
   1873 		current_indent--;
   1874 	else
   1875 		out(O_DIE, "recursion underflow");
   1876 }
   1877 
   1878 static void
   1879 indent(void)
   1880 {
   1881 	int i;
   1882 	if (!Verbose)
   1883 		return;
   1884 	for (i = 0; i < current_indent; i++)
   1885 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
   1886 }
   1887 
   1888 #define	SLNEW		1
   1889 #define	SLCHANGED	2
   1890 #define	SLWAIT		3
   1891 #define	SLDISPROVED	4
   1892 
   1893 static void
   1894 print_suspects(int circumstance, struct fme *fmep)
   1895 {
   1896 	struct event *ep;
   1897 
   1898 	out(O_ALTFP|O_NONL, "[");
   1899 	if (circumstance == SLCHANGED) {
   1900 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
   1901 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
   1902 	} else if (circumstance == SLWAIT) {
   1903 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
   1904 		    fmep->timer);
   1905 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
   1906 	} else if (circumstance == SLDISPROVED) {
   1907 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
   1908 	} else {
   1909 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
   1910 	}
   1911 
   1912 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
   1913 		out(O_ALTFP, "]");
   1914 		return;
   1915 	}
   1916 
   1917 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
   1918 		out(O_ALTFP|O_NONL, " ");
   1919 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
   1920 	}
   1921 	out(O_ALTFP, "]");
   1922 }
   1923 
   1924 static struct node *
   1925 eventprop_lookup(struct event *ep, const char *propname)
   1926 {
   1927 	return (lut_lookup(ep->props, (void *)propname, NULL));
   1928 }
   1929 
   1930 #define	MAXDIGITIDX	23
   1931 static char numbuf[MAXDIGITIDX + 1];
   1932 
   1933 static int
   1934 node2uint(struct node *n, uint_t *valp)
   1935 {
   1936 	struct evalue value;
   1937 	struct lut *globals = NULL;
   1938 
   1939 	if (n == NULL)
   1940 		return (1);
   1941 
   1942 	/*
   1943 	 * check value.v since we are being asked to convert an unsigned
   1944 	 * long long int to an unsigned int
   1945 	 */
   1946 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
   1947 	    value.t != UINT64 || value.v > (1ULL << 32))
   1948 		return (1);
   1949 
   1950 	*valp = (uint_t)value.v;
   1951 
   1952 	return (0);
   1953 }
   1954 
   1955 static nvlist_t *
   1956 node2fmri(struct node *n)
   1957 {
   1958 	nvlist_t **pa, *f, *p;
   1959 	struct node *nc;
   1960 	uint_t depth = 0;
   1961 	char *numstr, *nullbyte;
   1962 	char *failure;
   1963 	int err, i;
   1964 
   1965 	/* XXX do we need to be able to handle a non-T_NAME node? */
   1966 	if (n == NULL || n->t != T_NAME)
   1967 		return (NULL);
   1968 
   1969 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
   1970 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
   1971 			break;
   1972 		depth++;
   1973 	}
   1974 
   1975 	if (nc != NULL) {
   1976 		/* We bailed early, something went wrong */
   1977 		return (NULL);
   1978 	}
   1979 
   1980 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
   1981 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
   1982 	pa = alloca(depth * sizeof (nvlist_t *));
   1983 	for (i = 0; i < depth; i++)
   1984 		pa[i] = NULL;
   1985 
   1986 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
   1987 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
   1988 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
   1989 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
   1990 	if (err != 0) {
   1991 		failure = "basic construction of FMRI failed";
   1992 		goto boom;
   1993 	}
   1994 
   1995 	numbuf[MAXDIGITIDX] = '\0';
   1996 	nullbyte = &numbuf[MAXDIGITIDX];
   1997 	i = 0;
   1998 
   1999 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
   2000 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
   2001 		if (err != 0) {
   2002 			failure = "alloc of an hc-pair failed";
   2003 			goto boom;
   2004 		}
   2005 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
   2006 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
   2007 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
   2008 		if (err != 0) {
   2009 			failure = "construction of an hc-pair failed";
   2010 			goto boom;
   2011 		}
   2012 		pa[i++] = p;
   2013 	}
   2014 
   2015 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
   2016 	if (err == 0) {
   2017 		for (i = 0; i < depth; i++)
   2018 			if (pa[i] != NULL)
   2019 				nvlist_free(pa[i]);
   2020 		return (f);
   2021 	}
   2022 	failure = "addition of hc-pair array to FMRI failed";
   2023 
   2024 boom:
   2025 	for (i = 0; i < depth; i++)
   2026 		if (pa[i] != NULL)
   2027 			nvlist_free(pa[i]);
   2028 	nvlist_free(f);
   2029 	out(O_DIE, "%s", failure);
   2030 	/*NOTREACHED*/
   2031 	return (NULL);
   2032 }
   2033 
   2034 /* an ipath cache entry is an array of these, with s==NULL at the end */
   2035 struct ipath {
   2036 	const char *s;	/* component name (in stable) */
   2037 	int i;		/* instance number */
   2038 };
   2039 
   2040 static nvlist_t *
   2041 ipath2fmri(struct ipath *ipath)
   2042 {
   2043 	nvlist_t **pa, *f, *p;
   2044 	uint_t depth = 0;
   2045 	char *numstr, *nullbyte;
   2046 	char *failure;
   2047 	int err, i;
   2048 	struct ipath *ipp;
   2049 
   2050 	for (ipp = ipath; ipp->s != NULL; ipp++)
   2051 		depth++;
   2052 
   2053 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
   2054 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
   2055 	pa = alloca(depth * sizeof (nvlist_t *));
   2056 	for (i = 0; i < depth; i++)
   2057 		pa[i] = NULL;
   2058 
   2059 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
   2060 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
   2061 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
   2062 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
   2063 	if (err != 0) {
   2064 		failure = "basic construction of FMRI failed";
   2065 		goto boom;
   2066 	}
   2067 
   2068 	numbuf[MAXDIGITIDX] = '\0';
   2069 	nullbyte = &numbuf[MAXDIGITIDX];
   2070 	i = 0;
   2071 
   2072 	for (ipp = ipath; ipp->s != NULL; ipp++) {
   2073 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
   2074 		if (err != 0) {
   2075 			failure = "alloc of an hc-pair failed";
   2076 			goto boom;
   2077 		}
   2078 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
   2079 		numstr = ulltostr(ipp->i, nullbyte);
   2080 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
   2081 		if (err != 0) {
   2082 			failure = "construction of an hc-pair failed";
   2083 			goto boom;
   2084 		}
   2085 		pa[i++] = p;
   2086 	}
   2087 
   2088 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
   2089 	if (err == 0) {
   2090 		for (i = 0; i < depth; i++)
   2091 			if (pa[i] != NULL)
   2092 				nvlist_free(pa[i]);
   2093 		return (f);
   2094 	}
   2095 	failure = "addition of hc-pair array to FMRI failed";
   2096 
   2097 boom:
   2098 	for (i = 0; i < depth; i++)
   2099 		if (pa[i] != NULL)
   2100 			nvlist_free(pa[i]);
   2101 	nvlist_free(f);
   2102 	out(O_DIE, "%s", failure);
   2103 	/*NOTREACHED*/
   2104 	return (NULL);
   2105 }
   2106 
   2107 static uint8_t
   2108 percentof(uint_t part, uint_t whole)
   2109 {
   2110 	unsigned long long p = part * 1000;
   2111 
   2112 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
   2113 }
   2114 
   2115 struct rsl {
   2116 	struct event *suspect;
   2117 	nvlist_t *asru;
   2118 	nvlist_t *fru;
   2119 	nvlist_t *rsrc;
   2120 };
   2121 
   2122 static void publish_suspects(struct fme *fmep, struct rsl *srl);
   2123 
   2124 /*
   2125  *  rslfree -- free internal members of struct rsl not expected to be
   2126  *	freed elsewhere.
   2127  */
   2128 static void
   2129 rslfree(struct rsl *freeme)
   2130 {
   2131 	if (freeme->asru != NULL)
   2132 		nvlist_free(freeme->asru);
   2133 	if (freeme->fru != NULL)
   2134 		nvlist_free(freeme->fru);
   2135 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
   2136 		nvlist_free(freeme->rsrc);
   2137 }
   2138 
   2139 /*
   2140  *  rslcmp -- compare two rsl structures.  Use the following
   2141  *	comparisons to establish cardinality:
   2142  *
   2143  *	1. Name of the suspect's class. (simple strcmp)
   2144  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
   2145  *
   2146  */
   2147 static int
   2148 rslcmp(const void *a, const void *b)
   2149 {
   2150 	struct rsl *r1 = (struct rsl *)a;
   2151 	struct rsl *r2 = (struct rsl *)b;
   2152 	int rv;
   2153 
   2154 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
   2155 	    r2->suspect->enode->u.event.ename->u.name.s);
   2156 	if (rv != 0)
   2157 		return (rv);
   2158 
   2159 	if (r1->rsrc == NULL && r2->rsrc == NULL)
   2160 		return (0);
   2161 	if (r1->rsrc == NULL)
   2162 		return (-1);
   2163 	if (r2->rsrc == NULL)
   2164 		return (1);
   2165 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
   2166 }
   2167 
   2168 /*
   2169  * get_resources -- for a given suspect, determine what ASRU, FRU and
   2170  *     RSRC nvlists should be advertised in the final suspect list.
   2171  */
   2172 void
   2173 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
   2174 {
   2175 	struct node *asrudef, *frudef;
   2176 	nvlist_t *asru, *fru;
   2177 	nvlist_t *rsrc = NULL;
   2178 	char *pathstr;
   2179 
   2180 	/*
   2181 	 * First find any ASRU and/or FRU defined in the
   2182 	 * initial fault tree.
   2183 	 */
   2184 	asrudef = eventprop_lookup(sp, L_ASRU);
   2185 	frudef = eventprop_lookup(sp, L_FRU);
   2186 
   2187 	/*
   2188 	 * Create FMRIs based on those definitions
   2189 	 */
   2190 	asru = node2fmri(asrudef);
   2191 	fru = node2fmri(frudef);
   2192 	pathstr = ipath2str(NULL, sp->ipp);
   2193 
   2194 	/*
   2195 	 *  Allow for platform translations of the FMRIs
   2196 	 */
   2197 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
   2198 	    pathstr);
   2199 
   2200 	FREE(pathstr);
   2201 	rsrcs->suspect = sp;
   2202 	rsrcs->asru = asru;
   2203 	rsrcs->fru = fru;
   2204 	rsrcs->rsrc = rsrc;
   2205 }
   2206 
   2207 /*
   2208  * trim_suspects -- prior to publishing, we may need to remove some
   2209  *    suspects from the list.  If we're auto-closing upsets, we don't
   2210  *    want any of those in the published list.  If the ASRUs for multiple
   2211  *    defects resolve to the same ASRU (driver) we only want to publish
   2212  *    that as a single suspect.
   2213  */
   2214 static int
   2215 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
   2216     fmd_event_t *ffep)
   2217 {
   2218 	struct event *ep;
   2219 	struct rsl *rp = begin;
   2220 	struct rsl *rp2 = begin2;
   2221 	int mess_zero_count = 0;
   2222 	int serd_rval;
   2223 	uint_t messval;
   2224 
   2225 	/* remove any unwanted upsets and populate our array */
   2226 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
   2227 		if (is_upset(ep->t))
   2228 			continue;
   2229 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
   2230 		    NULL, NULL);
   2231 		if (serd_rval == 0)
   2232 			continue;
   2233 		if (node2uint(eventprop_lookup(ep, L_message),
   2234 		    &messval) == 0 && messval == 0) {
   2235 			get_resources(ep, rp2, fmep->config);
   2236 			rp2++;
   2237 			mess_zero_count++;
   2238 		} else {
   2239 			get_resources(ep, rp, fmep->config);
   2240 			rp++;
   2241 			fmep->nsuspects++;
   2242 		}
   2243 	}
   2244 	return (mess_zero_count);
   2245 }
   2246 
   2247 /*
   2248  * addpayloadprop -- add a payload prop to a problem
   2249  */
   2250 static void
   2251 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
   2252 {
   2253 	nvlist_t *rsrc, *hcs;
   2254 
   2255 	ASSERT(fault != NULL);
   2256 	ASSERT(lhs != NULL);
   2257 	ASSERT(rhs != NULL);
   2258 
   2259 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
   2260 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
   2261 
   2262 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
   2263 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
   2264 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
   2265 			out(O_DIE,
   2266 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2267 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
   2268 			out(O_DIE,
   2269 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2270 		nvlist_free(hcs);
   2271 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
   2272 			out(O_DIE,
   2273 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2274 	} else
   2275 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
   2276 
   2277 	if (rhs->t == UINT64) {
   2278 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
   2279 
   2280 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
   2281 			out(O_DIE,
   2282 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2283 	} else {
   2284 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
   2285 		    lhs, (char *)(uintptr_t)rhs->v);
   2286 
   2287 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
   2288 			out(O_DIE,
   2289 			    "cannot add payloadprop \"%s\" to fault", lhs);
   2290 	}
   2291 }
   2292 
   2293 static char *Istatbuf;
   2294 static char *Istatbufptr;
   2295 static int Istatsz;
   2296 
   2297 /*
   2298  * istataddsize -- calculate size of istat and add it to Istatsz
   2299  */
   2300 /*ARGSUSED2*/
   2301 static void
   2302 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
   2303 {
   2304 	int val;
   2305 
   2306 	ASSERT(lhs != NULL);
   2307 	ASSERT(rhs != NULL);
   2308 
   2309 	if ((val = stats_counter_value(rhs)) == 0)
   2310 		return;	/* skip zero-valued stats */
   2311 
   2312 	/* count up the size of the stat name */
   2313 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
   2314 	Istatsz++;	/* for the trailing NULL byte */
   2315 
   2316 	/* count up the size of the stat value */
   2317 	Istatsz += snprintf(NULL, 0, "%d", val);
   2318 	Istatsz++;	/* for the trailing NULL byte */
   2319 }
   2320 
   2321 /*
   2322  * istat2str -- serialize an istat, writing result to *Istatbufptr
   2323  */
   2324 /*ARGSUSED2*/
   2325 static void
   2326 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
   2327 {
   2328 	char *str;
   2329 	int len;
   2330 	int val;
   2331 
   2332 	ASSERT(lhs != NULL);
   2333 	ASSERT(rhs != NULL);
   2334 
   2335 	if ((val = stats_counter_value(rhs)) == 0)
   2336 		return;	/* skip zero-valued stats */
   2337 
   2338 	/* serialize the stat name */
   2339 	str = ipath2str(lhs->ename, lhs->ipath);
   2340 	len = strlen(str);
   2341 
   2342 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
   2343 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
   2344 	Istatbufptr += len;
   2345 	FREE(str);
   2346 	*Istatbufptr++ = '\0';
   2347 
   2348 	/* serialize the stat value */
   2349 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
   2350 	    "%d", val);
   2351 	*Istatbufptr++ = '\0';
   2352 
   2353 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
   2354 }
   2355 
   2356 void
   2357 istat_save()
   2358 {
   2359 	if (Istat_need_save == 0)
   2360 		return;
   2361 
   2362 	/* figure out how big the serialzed info is */
   2363 	Istatsz = 0;
   2364 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
   2365 
   2366 	if (Istatsz == 0) {
   2367 		/* no stats to save */
   2368 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
   2369 		return;
   2370 	}
   2371 
   2372 	/* create the serialized buffer */
   2373 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
   2374 	lut_walk(Istats, (lut_cb)istat2str, NULL);
   2375 
   2376 	/* clear out current saved stats */
   2377 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
   2378 
   2379 	/* write out the new version */
   2380 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
   2381 	FREE(Istatbuf);
   2382 
   2383 	Istat_need_save = 0;
   2384 }
   2385 
   2386 int
   2387 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
   2388 {
   2389 	if (ent1->ename != ent2->ename)
   2390 		return (ent2->ename - ent1->ename);
   2391 	if (ent1->ipath != ent2->ipath)
   2392 		return ((char *)ent2->ipath - (char *)ent1->ipath);
   2393 
   2394 	return (0);
   2395 }
   2396 
   2397 /*
   2398  * istat-verify -- verify the component associated with a stat still exists
   2399  *
   2400  * if the component no longer exists, this routine resets the stat and
   2401  * returns 0.  if the component still exists, it returns 1.
   2402  */
   2403 static int
   2404 istat_verify(struct node *snp, struct istat_entry *entp)
   2405 {
   2406 	struct stats *statp;
   2407 	nvlist_t *fmri;
   2408 
   2409 	fmri = node2fmri(snp->u.event.epname);
   2410 	if (platform_path_exists(fmri)) {
   2411 		nvlist_free(fmri);
   2412 		return (1);
   2413 	}
   2414 	nvlist_free(fmri);
   2415 
   2416 	/* component no longer in system.  zero out the associated stats */
   2417 	if ((statp = (struct stats *)
   2418 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
   2419 	    stats_counter_value(statp) == 0)
   2420 		return (0);	/* stat is already reset */
   2421 
   2422 	Istat_need_save = 1;
   2423 	stats_counter_reset(statp);
   2424 	return (0);
   2425 }
   2426 
   2427 static void
   2428 istat_bump(struct node *snp, int n)
   2429 {
   2430 	struct stats *statp;
   2431 	struct istat_entry ent;
   2432 
   2433 	ASSERT(snp != NULL);
   2434 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
   2435 	ASSERT(snp->u.event.epname != NULL);
   2436 
   2437 	/* class name should be hoisted into a single stable entry */
   2438 	ASSERT(snp->u.event.ename->u.name.next == NULL);
   2439 	ent.ename = snp->u.event.ename->u.name.s;
   2440 	ent.ipath = ipath(snp->u.event.epname);
   2441 
   2442 	if (!istat_verify(snp, &ent)) {
   2443 		/* component no longer exists in system, nothing to do */
   2444 		return;
   2445 	}
   2446 
   2447 	if ((statp = (struct stats *)
   2448 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
   2449 		/* need to create the counter */
   2450 		int cnt = 0;
   2451 		struct node *np;
   2452 		char *sname;
   2453 		char *snamep;
   2454 		struct istat_entry *newentp;
   2455 
   2456 		/* count up the size of the stat name */
   2457 		np = snp->u.event.ename;
   2458 		while (np != NULL) {
   2459 			cnt += strlen(np->u.name.s);
   2460 			cnt++;	/* for the '.' or '@' */
   2461 			np = np->u.name.next;
   2462 		}
   2463 		np = snp->u.event.epname;
   2464 		while (np != NULL) {
   2465 			cnt += snprintf(NULL, 0, "%s%llu",
   2466 			    np->u.name.s, np->u.name.child->u.ull);
   2467 			cnt++;	/* for the '/' or trailing NULL byte */
   2468 			np = np->u.name.next;
   2469 		}
   2470 
   2471 		/* build the stat name */
   2472 		snamep = sname = alloca(cnt);
   2473 		np = snp->u.event.ename;
   2474 		while (np != NULL) {
   2475 			snamep += snprintf(snamep, &sname[cnt] - snamep,
   2476 			    "%s", np->u.name.s);
   2477 			np = np->u.name.next;
   2478 			if (np)
   2479 				*snamep++ = '.';
   2480 		}
   2481 		*snamep++ = '@';
   2482 		np = snp->u.event.epname;
   2483 		while (np != NULL) {
   2484 			snamep += snprintf(snamep, &sname[cnt] - snamep,
   2485 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
   2486 			np = np->u.name.next;
   2487 			if (np)
   2488 				*snamep++ = '/';
   2489 		}
   2490 		*snamep++ = '\0';
   2491 
   2492 		/* create the new stat & add it to our list */
   2493 		newentp = MALLOC(sizeof (*newentp));
   2494 		*newentp = ent;
   2495 		statp = stats_new_counter(NULL, sname, 0);
   2496 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
   2497 		    (lut_cmp)istat_cmp);
   2498 	}
   2499 
   2500 	/* if n is non-zero, set that value instead of bumping */
   2501 	if (n) {
   2502 		stats_counter_reset(statp);
   2503 		stats_counter_add(statp, n);
   2504 	} else
   2505 		stats_counter_bump(statp);
   2506 	Istat_need_save = 1;
   2507 
   2508 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
   2509 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
   2510 	    stats_counter_value(statp));
   2511 }
   2512 
   2513 /*ARGSUSED*/
   2514 static void
   2515 istat_destructor(void *left, void *right, void *arg)
   2516 {
   2517 	struct istat_entry *entp = (struct istat_entry *)left;
   2518 	struct stats *statp = (struct stats *)right;
   2519 	FREE(entp);
   2520 	stats_delete(statp);
   2521 }
   2522 
   2523 /*
   2524  * Callback used in a walk of the Istats to reset matching stat counters.
   2525  */
   2526 static void
   2527 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
   2528     const struct ipath *ipp)
   2529 {
   2530 	char *path;
   2531 
   2532 	if (entp->ipath == ipp) {
   2533 		path = ipath2str(entp->ename, ipp);
   2534 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
   2535 		FREE(path);
   2536 		stats_counter_reset(statp);
   2537 		Istat_need_save = 1;
   2538 	}
   2539 }
   2540 
   2541 /*ARGSUSED*/
   2542 static void
   2543 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
   2544     void *unused)
   2545 {
   2546 	char *path;
   2547 	nvlist_t *fmri;
   2548 
   2549 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
   2550 	if (!platform_path_exists(fmri)) {
   2551 		path = ipath2str(entp->ename, entp->ipath);
   2552 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
   2553 		FREE(path);
   2554 		stats_counter_reset(statp);
   2555 		Istat_need_save = 1;
   2556 	}
   2557 	nvlist_free(fmri);
   2558 }
   2559 
   2560 void
   2561 istat_fini(void)
   2562 {
   2563 	lut_free(Istats, istat_destructor, NULL);
   2564 }
   2565 
   2566 static char *Serdbuf;
   2567 static char *Serdbufptr;
   2568 static int Serdsz;
   2569 
   2570 /*
   2571  * serdaddsize -- calculate size of serd and add it to Serdsz
   2572  */
   2573 /*ARGSUSED*/
   2574 static void
   2575 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
   2576 {
   2577 	ASSERT(lhs != NULL);
   2578 
   2579 	/* count up the size of the stat name */
   2580 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
   2581 	Serdsz++;	/* for the trailing NULL byte */
   2582 }
   2583 
   2584 /*
   2585  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
   2586  */
   2587 /*ARGSUSED*/
   2588 static void
   2589 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
   2590 {
   2591 	char *str;
   2592 	int len;
   2593 
   2594 	ASSERT(lhs != NULL);
   2595 
   2596 	/* serialize the serd engine name */
   2597 	str = ipath2str(lhs->ename, lhs->ipath);
   2598 	len = strlen(str);
   2599 
   2600 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
   2601 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
   2602 	Serdbufptr += len;
   2603 	FREE(str);
   2604 	*Serdbufptr++ = '\0';
   2605 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
   2606 }
   2607 
   2608 void
   2609 serd_save()
   2610 {
   2611 	if (Serd_need_save == 0)
   2612 		return;
   2613 
   2614 	/* figure out how big the serialzed info is */
   2615 	Serdsz = 0;
   2616 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
   2617 
   2618 	if (Serdsz == 0) {
   2619 		/* no serd engines to save */
   2620 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
   2621 		return;
   2622 	}
   2623 
   2624 	/* create the serialized buffer */
   2625 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
   2626 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
   2627 
   2628 	/* clear out current saved stats */
   2629 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
   2630 
   2631 	/* write out the new version */
   2632 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
   2633 	FREE(Serdbuf);
   2634 	Serd_need_save = 0;
   2635 }
   2636 
   2637 int
   2638 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
   2639 {
   2640 	if (ent1->ename != ent2->ename)
   2641 		return (ent2->ename - ent1->ename);
   2642 	if (ent1->ipath != ent2->ipath)
   2643 		return ((char *)ent2->ipath - (char *)ent1->ipath);
   2644 
   2645 	return (0);
   2646 }
   2647 
   2648 void
   2649 fme_serd_load(fmd_hdl_t *hdl)
   2650 {
   2651 	int sz;
   2652 	char *sbuf;
   2653 	char *sepptr;
   2654 	char *ptr;
   2655 	struct serd_entry *newentp;
   2656 	struct node *epname;
   2657 	nvlist_t *fmri;
   2658 	char *namestring;
   2659 
   2660 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
   2661 		return;
   2662 	sbuf = alloca(sz);
   2663 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
   2664 	ptr = sbuf;
   2665 	while (ptr < &sbuf[sz]) {
   2666 		sepptr = strchr(ptr, '@');
   2667 		*sepptr = '\0';
   2668 		namestring = ptr;
   2669 		sepptr++;
   2670 		ptr = sepptr;
   2671 		ptr += strlen(ptr);
   2672 		ptr++;	/* move past the '\0' separating paths */
   2673 		epname = pathstring2epnamenp(sepptr);
   2674 		fmri = node2fmri(epname);
   2675 		if (platform_path_exists(fmri)) {
   2676 			newentp = MALLOC(sizeof (*newentp));
   2677 			newentp->hdl = hdl;
   2678 			newentp->ipath = ipath(epname);
   2679 			newentp->ename = stable(namestring);
   2680 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
   2681 			    (void *)newentp, (lut_cmp)serd_cmp);
   2682 		} else
   2683 			Serd_need_save = 1;
   2684 		tree_free(epname);
   2685 		nvlist_free(fmri);
   2686 	}
   2687 	/* save it back again in case some of the paths no longer exist */
   2688 	serd_save();
   2689 }
   2690 
   2691 /*ARGSUSED*/
   2692 static void
   2693 serd_destructor(void *left, void *right, void *arg)
   2694 {
   2695 	struct serd_entry *entp = (struct serd_entry *)left;
   2696 	FREE(entp);
   2697 }
   2698 
   2699 /*
   2700  * Callback used in a walk of the SerdEngines to reset matching serd engines.
   2701  */
   2702 /*ARGSUSED*/
   2703 static void
   2704 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
   2705 {
   2706 	char *path;
   2707 
   2708 	if (entp->ipath == ipp) {
   2709 		path = ipath2str(entp->ename, ipp);
   2710 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
   2711 		fmd_serd_reset(entp->hdl, path);
   2712 		FREE(path);
   2713 		Serd_need_save = 1;
   2714 	}
   2715 }
   2716 
   2717 /*ARGSUSED*/
   2718 static void
   2719 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
   2720 {
   2721 	char *path;
   2722 	nvlist_t *fmri;
   2723 
   2724 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
   2725 	if (!platform_path_exists(fmri)) {
   2726 		path = ipath2str(entp->ename, entp->ipath);
   2727 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
   2728 		fmd_serd_reset(entp->hdl, path);
   2729 		FREE(path);
   2730 		Serd_need_save = 1;
   2731 	}
   2732 	nvlist_free(fmri);
   2733 }
   2734 
   2735 void
   2736 serd_fini(void)
   2737 {
   2738 	lut_free(SerdEngines, serd_destructor, NULL);
   2739 }
   2740 
   2741 static void
   2742 publish_suspects(struct fme *fmep, struct rsl *srl)
   2743 {
   2744 	struct rsl *rp;
   2745 	nvlist_t *fault;
   2746 	uint8_t cert;
   2747 	uint_t *frs;
   2748 	uint_t frsum, fr;
   2749 	uint_t messval;
   2750 	uint_t retireval;
   2751 	uint_t responseval;
   2752 	struct node *snp;
   2753 	int frcnt, fridx;
   2754 	boolean_t allfaulty = B_TRUE;
   2755 	struct rsl *erl = srl + fmep->nsuspects - 1;
   2756 
   2757 	/*
   2758 	 * sort the array
   2759 	 */
   2760 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
   2761 
   2762 	/* sum the fitrates */
   2763 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
   2764 	fridx = frcnt = frsum = 0;
   2765 
   2766 	for (rp = srl; rp <= erl; rp++) {
   2767 		struct node *n;
   2768 
   2769 		n = eventprop_lookup(rp->suspect, L_FITrate);
   2770 		if (node2uint(n, &fr) != 0) {
   2771 			out(O_DEBUG|O_NONL, "event ");
   2772 			ipath_print(O_DEBUG|O_NONL,
   2773 			    rp->suspect->enode->u.event.ename->u.name.s,
   2774 			    rp->suspect->ipp);
   2775 			out(O_DEBUG, " has no FITrate (using 1)");
   2776 			fr = 1;
   2777 		} else if (fr == 0) {
   2778 			out(O_DEBUG|O_NONL, "event ");
   2779 			ipath_print(O_DEBUG|O_NONL,
   2780 			    rp->suspect->enode->u.event.ename->u.name.s,
   2781 			    rp->suspect->ipp);
   2782 			out(O_DEBUG, " has zero FITrate (using 1)");
   2783 			fr = 1;
   2784 		}
   2785 
   2786 		frs[fridx++] = fr;
   2787 		frsum += fr;
   2788 		frcnt++;
   2789 	}
   2790 
   2791 	/* Add them in reverse order of our sort, as fmd reverses order */
   2792 	for (rp = erl; rp >= srl; rp--) {
   2793 		cert = percentof(frs[--fridx], frsum);
   2794 		fault = fmd_nvl_create_fault(fmep->hdl,
   2795 		    rp->suspect->enode->u.event.ename->u.name.s,
   2796 		    cert,
   2797 		    rp->asru,
   2798 		    rp->fru,
   2799 		    rp->rsrc);
   2800 		if (fault == NULL)
   2801 			out(O_DIE, "fault creation failed");
   2802 		/* if "message" property exists, add it to the fault */
   2803 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
   2804 		    &messval) == 0) {
   2805 
   2806 			out(O_ALTFP,
   2807 			    "[FME%d, %s adds message=%d to suspect list]",
   2808 			    fmep->id,
   2809 			    rp->suspect->enode->u.event.ename->u.name.s,
   2810 			    messval);
   2811 			if (nvlist_add_boolean_value(fault,
   2812 			    FM_SUSPECT_MESSAGE,
   2813 			    (messval) ? B_TRUE : B_FALSE) != 0) {
   2814 				out(O_DIE, "cannot add no-message to fault");
   2815 			}
   2816 		}
   2817 
   2818 		/* if "retire" property exists, add it to the fault */
   2819 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
   2820 		    &retireval) == 0) {
   2821 
   2822 			out(O_ALTFP,
   2823 			    "[FME%d, %s adds retire=%d to suspect list]",
   2824 			    fmep->id,
   2825 			    rp->suspect->enode->u.event.ename->u.name.s,
   2826 			    retireval);
   2827 			if (nvlist_add_boolean_value(fault,
   2828 			    FM_SUSPECT_RETIRE,
   2829 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
   2830 				out(O_DIE, "cannot add no-retire to fault");
   2831 			}
   2832 		}
   2833 
   2834 		/* if "response" property exists, add it to the fault */
   2835 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
   2836 		    &responseval) == 0) {
   2837 
   2838 			out(O_ALTFP,
   2839 			    "[FME%d, %s adds response=%d to suspect list]",
   2840 			    fmep->id,
   2841 			    rp->suspect->enode->u.event.ename->u.name.s,
   2842 			    responseval);
   2843 			if (nvlist_add_boolean_value(fault,
   2844 			    FM_SUSPECT_RESPONSE,
   2845 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
   2846 				out(O_DIE, "cannot add no-response to fault");
   2847 			}
   2848 		}
   2849 
   2850 		/* add any payload properties */
   2851 		lut_walk(rp->suspect->payloadprops,
   2852 		    (lut_cb)addpayloadprop, (void *)fault);
   2853 		rslfree(rp);
   2854 
   2855 		/*
   2856 		 * If "action" property exists, evaluate it;  this must be done
   2857 		 * before the allfaulty check below since some actions may
   2858 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
   2859 		 * needs to be restructured if any new actions are introduced
   2860 		 * that have effects that we do not want to be visible if
   2861 		 * we decide not to publish in the dupclose check below.
   2862 		 */
   2863 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
   2864 			struct evalue evalue;
   2865 
   2866 			out(O_ALTFP|O_NONL,
   2867 			    "[FME%d, %s action ", fmep->id,
   2868 			    rp->suspect->enode->u.event.ename->u.name.s);
   2869 			ptree_name_iter(O_ALTFP|O_NONL, snp);
   2870 			out(O_ALTFP, "]");
   2871 			Action_nvl = fault;
   2872 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
   2873 			    NULL, 0, &evalue);
   2874 		}
   2875 
   2876 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
   2877 
   2878 		/*
   2879 		 * check if the asru is already marked as "faulty".
   2880 		 */
   2881 		if (allfaulty) {
   2882 			nvlist_t *asru;
   2883 
   2884 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
   2885 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
   2886 			out(O_ALTFP|O_VERB|O_NONL, " ");
   2887 			if (nvlist_lookup_nvlist(fault,
   2888 			    FM_FAULT_ASRU, &asru) != 0) {
   2889 				out(O_ALTFP|O_VERB, "NULL asru");
   2890 				allfaulty = B_FALSE;
   2891 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
   2892 			    FMD_HAS_FAULT_ASRU, NULL)) {
   2893 				out(O_ALTFP|O_VERB, "faulty");
   2894 			} else {
   2895 				out(O_ALTFP|O_VERB, "not faulty");
   2896 				allfaulty = B_FALSE;
   2897 			}
   2898 		}
   2899 
   2900 	}
   2901 
   2902 	if (!allfaulty) {
   2903 		/*
   2904 		 * don't update the count stat if all asrus are already
   2905 		 * present and unrepaired in the asru cache
   2906 		 */
   2907 		for (rp = erl; rp >= srl; rp--) {
   2908 			struct event *suspect = rp->suspect;
   2909 
   2910 			if (suspect == NULL)
   2911 				continue;
   2912 
   2913 			/* if "count" exists, increment the appropriate stat */
   2914 			if ((snp = eventprop_lookup(suspect,
   2915 			    L_count)) != NULL) {
   2916 				out(O_ALTFP|O_NONL,
   2917 				    "[FME%d, %s count ", fmep->id,
   2918 				    suspect->enode->u.event.ename->u.name.s);
   2919 				ptree_name_iter(O_ALTFP|O_NONL, snp);
   2920 				out(O_ALTFP, "]");
   2921 				istat_bump(snp, 0);
   2922 
   2923 			}
   2924 		}
   2925 		istat_save();	/* write out any istat changes */
   2926 	}
   2927 }
   2928 
   2929 static const char *
   2930 undiag_2defect_str(int ud)
   2931 {
   2932 	switch (ud) {
   2933 	case UD_VAL_MISSINGINFO:
   2934 	case UD_VAL_MISSINGOBS:
   2935 	case UD_VAL_MISSINGPATH:
   2936 	case UD_VAL_MISSINGZERO:
   2937 	case UD_VAL_BADOBS:
   2938 	case UD_VAL_CFGMISMATCH:
   2939 		return (UNDIAG_DEFECT_CHKPT);
   2940 		break;
   2941 
   2942 	case UD_VAL_BADEVENTI:
   2943 	case UD_VAL_BADEVENTPATH:
   2944 	case UD_VAL_BADEVENTCLASS:
   2945 	case UD_VAL_INSTFAIL:
   2946 	case UD_VAL_NOPATH:
   2947 	case UD_VAL_UNSOLVD:
   2948 		return (UNDIAG_DEFECT_FME);
   2949 		break;
   2950 
   2951 	case UD_VAL_MAXFME:
   2952 		return (UNDIAG_DEFECT_LIMIT);
   2953 		break;
   2954 
   2955 	case UD_VAL_UNKNOWN:
   2956 	default:
   2957 		return (UNDIAG_DEFECT_UNKNOWN);
   2958 		break;
   2959 	}
   2960 }
   2961 
   2962 static const char *
   2963 undiag_2fault_str(int ud)
   2964 {
   2965 	switch (ud) {
   2966 	case UD_VAL_BADEVENTI:
   2967 	case UD_VAL_BADEVENTPATH:
   2968 	case UD_VAL_BADEVENTCLASS:
   2969 	case UD_VAL_INSTFAIL:
   2970 	case UD_VAL_NOPATH:
   2971 	case UD_VAL_UNSOLVD:
   2972 		return (UNDIAG_FAULT_FME);
   2973 	default:
   2974 		return (NULL);
   2975 	}
   2976 }
   2977 
   2978 static char *
   2979 undiag_2reason_str(int ud, char *arg)
   2980 {
   2981 	const char *ptr;
   2982 	char *buf;
   2983 	int with_arg = 0;
   2984 
   2985 	switch (ud) {
   2986 	case UD_VAL_BADEVENTPATH:
   2987 		ptr = UD_STR_BADEVENTPATH;
   2988 		with_arg = 1;
   2989 		break;
   2990 	case UD_VAL_BADEVENTCLASS:
   2991 		ptr = UD_STR_BADEVENTCLASS;
   2992 		with_arg = 1;
   2993 		break;
   2994 	case UD_VAL_BADEVENTI:
   2995 		ptr = UD_STR_BADEVENTI;
   2996 		with_arg = 1;
   2997 		break;
   2998 	case UD_VAL_BADOBS:
   2999 		ptr = UD_STR_BADOBS;
   3000 		break;
   3001 	case UD_VAL_CFGMISMATCH:
   3002 		ptr = UD_STR_CFGMISMATCH;
   3003 		break;
   3004 	case UD_VAL_INSTFAIL:
   3005 		ptr = UD_STR_INSTFAIL;
   3006 		with_arg = 1;
   3007 		break;
   3008 	case UD_VAL_MAXFME:
   3009 		ptr = UD_STR_MAXFME;
   3010 		break;
   3011 	case UD_VAL_MISSINGINFO:
   3012 		ptr = UD_STR_MISSINGINFO;
   3013 		break;
   3014 	case UD_VAL_MISSINGOBS:
   3015 		ptr = UD_STR_MISSINGOBS;
   3016 		break;
   3017 	case UD_VAL_MISSINGPATH:
   3018 		ptr = UD_STR_MISSINGPATH;
   3019 		break;
   3020 	case UD_VAL_MISSINGZERO:
   3021 		ptr = UD_STR_MISSINGZERO;
   3022 		break;
   3023 	case UD_VAL_NOPATH:
   3024 		ptr = UD_STR_NOPATH;
   3025 		with_arg = 1;
   3026 		break;
   3027 	case UD_VAL_UNSOLVD:
   3028 		ptr = UD_STR_UNSOLVD;
   3029 		break;
   3030 	case UD_VAL_UNKNOWN:
   3031 	default:
   3032 		ptr = UD_STR_UNKNOWN;
   3033 		break;
   3034 	}
   3035 	if (with_arg) {
   3036 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
   3037 		(void) sprintf(buf, ptr, arg);
   3038 	} else {
   3039 		buf = MALLOC(strlen(ptr) + 1);
   3040 		(void) sprintf(buf, ptr);
   3041 	}
   3042 	return (buf);
   3043 }
   3044 
   3045 static void
   3046 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
   3047     nvlist_t *detector, char *arg)
   3048 {
   3049 	struct case_list *newcase;
   3050 	nvlist_t *defect, *fault;
   3051 	const char *faultstr;
   3052 	char *reason = undiag_2reason_str(Undiag_reason, arg);
   3053 
   3054 	out(O_ALTFP,
   3055 	    "[undiagnosable ereport received, "
   3056 	    "creating and closing a new case (%s)]", reason);
   3057 
   3058 	newcase = MALLOC(sizeof (struct case_list));
   3059 	newcase->next = NULL;
   3060 	newcase->fmcase = fmcase;
   3061 	if (Undiagablecaselist != NULL)
   3062 		newcase->next = Undiagablecaselist;
   3063 	Undiagablecaselist = newcase;
   3064 
   3065 	if (ffep != NULL)
   3066 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
   3067 
   3068 	/* add defect */
   3069 	defect = fmd_nvl_create_fault(hdl,
   3070 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
   3071 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
   3072 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
   3073 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
   3074 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
   3075 
   3076 	/* add fault if appropriate */
   3077 	faultstr = undiag_2fault_str(Undiag_reason);
   3078 	if (faultstr != NULL) {
   3079 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
   3080 		    detector);
   3081 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
   3082 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
   3083 		    B_FALSE);
   3084 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
   3085 		    B_FALSE);
   3086 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
   3087 	}
   3088 	FREE(reason);
   3089 
   3090 	/* solve and close case */
   3091 	fmd_case_solve(hdl, newcase->fmcase);
   3092 	fmd_case_close(hdl, newcase->fmcase);
   3093 	Undiag_reason = UD_VAL_UNKNOWN;
   3094 }
   3095 
   3096 static void
   3097 fme_undiagnosable(struct fme *f)
   3098 {
   3099 	nvlist_t *defect, *fault, *detector = NULL;
   3100 	struct event *ep;
   3101 	char *pathstr;
   3102 	const char *faultstr;
   3103 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
   3104 
   3105 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
   3106 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
   3107 
   3108 	for (ep = f->observations; ep; ep = ep->observations) {
   3109 
   3110 		if (ep->ffep != f->e0r)
   3111 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
   3112 
   3113 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
   3114 		platform_units_translate(0, f->config, NULL, NULL, &detector,
   3115 		    pathstr);
   3116 		FREE(pathstr);
   3117 
   3118 		/* add defect */
   3119 		defect = fmd_nvl_create_fault(f->hdl,
   3120 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
   3121 		    NULL, NULL, detector);
   3122 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
   3123 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
   3124 		    B_FALSE);
   3125 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
   3126 		    B_FALSE);
   3127 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
   3128 
   3129 		/* add fault if appropriate */
   3130 		faultstr = undiag_2fault_str(Undiag_reason);
   3131 		if (faultstr == NULL)
   3132 			continue;
   3133 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
   3134 		    NULL, NULL, detector);
   3135 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
   3136 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
   3137 		    B_FALSE);
   3138 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
   3139 		    B_FALSE);
   3140 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
   3141 		nvlist_free(detector);
   3142 	}
   3143 	FREE(reason);
   3144 	fmd_case_solve(f->hdl, f->fmcase);
   3145 	fmd_case_close(f->hdl, f->fmcase);
   3146 	Undiag_reason = UD_VAL_UNKNOWN;
   3147 }
   3148 
   3149 /*
   3150  * fme_close_case
   3151  *
   3152  *	Find the requested case amongst our fmes and close it.  Free up
   3153  *	the related fme.
   3154  */
   3155 void
   3156 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
   3157 {
   3158 	struct case_list *ucasep, *prevcasep = NULL;
   3159 	struct fme *prev = NULL;
   3160 	struct fme *fmep;
   3161 
   3162 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
   3163 		if (fmcase != ucasep->fmcase) {
   3164 			prevcasep = ucasep;
   3165 			continue;
   3166 		}
   3167 
   3168 		if (prevcasep == NULL)
   3169 			Undiagablecaselist = Undiagablecaselist->next;
   3170 		else
   3171 			prevcasep->next = ucasep->next;
   3172 
   3173 		FREE(ucasep);
   3174 		return;
   3175 	}
   3176 
   3177 	for (fmep = FMElist; fmep; fmep = fmep->next) {
   3178 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
   3179 			break;
   3180 		prev = fmep;
   3181 	}
   3182 
   3183 	if (fmep == NULL) {
   3184 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
   3185 		    fmd_case_uuid(hdl, fmcase));
   3186 		return;
   3187 	}
   3188 
   3189 	if (EFMElist == fmep)
   3190 		EFMElist = prev;
   3191 
   3192 	if (prev == NULL)
   3193 		FMElist = FMElist->next;
   3194 	else
   3195 		prev->next = fmep->next;
   3196 
   3197 	fmep->next = NULL;
   3198 
   3199 	/* Get rid of any timer this fme has set */
   3200 	if (fmep->wull != 0)
   3201 		fmd_timer_remove(fmep->hdl, fmep->timer);
   3202 
   3203 	if (ClosedFMEs == NULL) {
   3204 		ClosedFMEs = fmep;
   3205 	} else {
   3206 		fmep->next = ClosedFMEs;
   3207 		ClosedFMEs = fmep;
   3208 	}
   3209 
   3210 	Open_fme_count--;
   3211 
   3212 	/* See if we can close the overflow FME */
   3213 	if (Open_fme_count <= Max_fme) {
   3214 		for (fmep = FMElist; fmep; fmep = fmep->next) {
   3215 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
   3216 			    fmep->fmcase)))
   3217 				break;
   3218 		}
   3219 
   3220 		if (fmep != NULL)
   3221 			fmd_case_close(fmep->hdl, fmep->fmcase);
   3222 	}
   3223 }
   3224 
   3225 /*
   3226  * fme_set_timer()
   3227  *	If the time we need to wait for the given FME is less than the
   3228  *	current timer, kick that old timer out and establish a new one.
   3229  */
   3230 static int
   3231 fme_set_timer(struct fme *fmep, unsigned long long wull)
   3232 {
   3233 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
   3234 	ptree_timeval(O_ALTFP|O_VERB, &wull);
   3235 
   3236 	if (wull <= fmep->pull) {
   3237 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
   3238 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
   3239 		out(O_ALTFP|O_VERB, NULL);
   3240 		/* we've waited at least wull already, don't need timer */
   3241 		return (0);
   3242 	}
   3243 
   3244 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
   3245 	if (fmep->wull != 0) {
   3246 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
   3247 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
   3248 		out(O_ALTFP|O_VERB, NULL);
   3249 	} else {
   3250 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
   3251 		out(O_ALTFP|O_VERB, NULL);
   3252 	}
   3253 
   3254 	if (fmep->wull != 0)
   3255 		if (wull >= fmep->wull)
   3256 			/* New timer would fire later than established timer */
   3257 			return (0);
   3258 
   3259 	if (fmep->wull != 0) {
   3260 		fmd_timer_remove(fmep->hdl, fmep->timer);
   3261 	}
   3262 
   3263 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
   3264 	    fmep->e0r, wull);
   3265 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
   3266 	fmep->wull = wull;
   3267 	return (1);
   3268 }
   3269 
   3270 void
   3271 fme_timer_fired(struct fme *fmep, id_t tid)
   3272 {
   3273 	struct fme *ffmep = NULL;
   3274 
   3275 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
   3276 		if (ffmep == fmep)
   3277 			break;
   3278 
   3279 	if (ffmep == NULL) {
   3280 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
   3281 		    (void *)fmep);
   3282 		return;
   3283 	}
   3284 
   3285 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
   3286 	fmep->pull = fmep->wull;
   3287 	fmep->wull = 0;
   3288 	fmd_buf_write(fmep->hdl, fmep->fmcase,
   3289 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
   3290 
   3291 	fme_eval(fmep, fmep->e0r);
   3292 }
   3293 
   3294 /*
   3295  * Preserve the fme's suspect list in its psuspects list, NULLing the
   3296  * suspects list in the meantime.
   3297  */
   3298 static void
   3299 save_suspects(struct fme *fmep)
   3300 {
   3301 	struct event *ep;
   3302 	struct event *nextep;
   3303 
   3304 	/* zero out the previous suspect list */
   3305 	for (ep = fmep->psuspects; ep; ep = nextep) {
   3306 		nextep = ep->psuspects;
   3307 		ep->psuspects = NULL;
   3308 	}
   3309 	fmep->psuspects = NULL;
   3310 
   3311 	/* zero out the suspect list, copying it to previous suspect list */
   3312 	fmep->psuspects = fmep->suspects;
   3313 	for (ep = fmep->suspects; ep; ep = nextep) {
   3314 		nextep = ep->suspects;
   3315 		ep->psuspects = ep->suspects;
   3316 		ep->suspects = NULL;
   3317 		ep->is_suspect = 0;
   3318 	}
   3319 	fmep->suspects = NULL;
   3320 	fmep->nsuspects = 0;
   3321 }
   3322 
   3323 /*
   3324  * Retrieve the fme's suspect list from its psuspects list.
   3325  */
   3326 static void
   3327 restore_suspects(struct fme *fmep)
   3328 {
   3329 	struct event *ep;
   3330 	struct event *nextep;
   3331 
   3332 	fmep->nsuspects = 0;
   3333 	fmep->suspects = fmep->psuspects;
   3334 	for (ep = fmep->psuspects; ep; ep = nextep) {
   3335 		fmep->nsuspects++;
   3336 		nextep = ep->psuspects;
   3337 		ep->suspects = ep->psuspects;
   3338 	}
   3339 }
   3340 
   3341 /*
   3342  * this is what we use to call the Emrys prototype code instead of main()
   3343  */
   3344 static void
   3345 fme_eval(struct fme *fmep, fmd_event_t *ffep)
   3346 {
   3347 	struct event *ep;
   3348 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
   3349 	struct rsl *srl = NULL;
   3350 	struct rsl *srl2 = NULL;
   3351 	int mess_zero_count;
   3352 	int rpcnt;
   3353 
   3354 	save_suspects(fmep);
   3355 
   3356 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
   3357 	indent_set("  ");
   3358 
   3359 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
   3360 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
   3361 
   3362 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
   3363 	    fme_state2str(fmep->state));
   3364 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
   3365 		out(O_ALTFP|O_NONL, " ");
   3366 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
   3367 	}
   3368 	out(O_ALTFP, NULL);
   3369 
   3370 	switch (fmep->state) {
   3371 	case FME_CREDIBLE:
   3372 		print_suspects(SLNEW, fmep);
   3373 		(void) upsets_eval(fmep, ffep);
   3374 
   3375 		/*
   3376 		 * we may have already posted suspects in upsets_eval() which
   3377 		 * can recurse into fme_eval() again. If so then just return.
   3378 		 */
   3379 		if (fmep->posted_suspects)
   3380 			return;
   3381 
   3382 		stats_counter_bump(fmep->diags);
   3383 		rpcnt = fmep->nsuspects;
   3384 		save_suspects(fmep);
   3385 
   3386 		/*
   3387 		 * create two lists, one for "message=1" faults and one for
   3388 		 * "message=0" faults. If we have a mixture we will generate
   3389 		 * two separate suspect lists.
   3390 		 */
   3391 		srl = MALLOC(rpcnt * sizeof (struct rsl));
   3392 		bzero(srl, rpcnt * sizeof (struct rsl));
   3393 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
   3394 		bzero(srl2, rpcnt * sizeof (struct rsl));
   3395 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
   3396 
   3397 		/*
   3398 		 * If the resulting suspect list has no members, we're
   3399 		 * done so simply close the case. Otherwise sort and publish.
   3400 		 */
   3401 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
   3402 			out(O_ALTFP,
   3403 			    "[FME%d, case %s (all suspects are upsets)]",
   3404 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3405 			fmd_case_close(fmep->hdl, fmep->fmcase);
   3406 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
   3407 			publish_suspects(fmep, srl);
   3408 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
   3409 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3410 			fmd_case_solve(fmep->hdl, fmep->fmcase);
   3411 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
   3412 			fmep->nsuspects = mess_zero_count;
   3413 			publish_suspects(fmep, srl2);
   3414 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
   3415 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3416 			fmd_case_solve(fmep->hdl, fmep->fmcase);
   3417 		} else {
   3418 			struct event *obsp;
   3419 			struct fme *nfmep;
   3420 
   3421 			publish_suspects(fmep, srl);
   3422 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
   3423 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
   3424 			fmd_case_solve(fmep->hdl, fmep->fmcase);
   3425 
   3426 			/*
   3427 			 * Got both message=0 and message=1 so create a
   3428 			 * duplicate case. Also need a temporary duplicate fme
   3429 			 * structure for use by publish_suspects().
   3430 			 */
   3431 			nfmep = alloc_fme();
   3432 			nfmep->id =  Nextid++;
   3433 			nfmep->hdl = fmep->hdl;
   3434 			nfmep->nsuspects = mess_zero_count;
   3435 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
   3436 			out(O_ALTFP|O_STAMP,
   3437 			    "[creating parallel FME%d, case %s]", nfmep->id,
   3438 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
   3439 			Open_fme_count++;
   3440 			if (ffep) {
   3441 				fmd_case_setprincipal(nfmep->hdl,
   3442 				    nfmep->fmcase, ffep);
   3443 				fmd_case_add_ereport(nfmep->hdl,
   3444 				    nfmep->fmcase, ffep);
   3445 			}
   3446 			for (obsp = fmep->observations; obsp;
   3447 			    obsp = obsp->observations)
   3448 				if (obsp->ffep && obsp->ffep != ffep)
   3449 					fmd_case_add_ereport(nfmep->hdl,
   3450 					    nfmep->fmcase, obsp->ffep);
   3451 
   3452 			publish_suspects(nfmep, srl2);
   3453 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
   3454 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
   3455 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
   3456 			FREE(nfmep);
   3457 		}
   3458 		FREE(srl);
   3459 		FREE(srl2);
   3460 		restore_suspects(fmep);
   3461 
   3462 		fmep->posted_suspects = 1;
   3463 		fmd_buf_write(fmep->hdl, fmep->fmcase,
   3464 		    WOBUF_POSTD,
   3465 		    (void *)&fmep->posted_suspects,
   3466 		    sizeof (fmep->posted_suspects));
   3467 
   3468 		/*
   3469 		 * Now the suspects have been posted, we can clear up
   3470 		 * the instance tree as we won't be looking at it again.
   3471 		 * Also cancel the timer as the case is now solved.
   3472 		 */
   3473 		if (fmep->wull != 0) {
   3474 			fmd_timer_remove(fmep->hdl, fmep->timer);
   3475 			fmep->wull = 0;
   3476 		}
   3477 		break;
   3478 
   3479 	case FME_WAIT:
   3480 		ASSERT(my_delay > fmep->ull);
   3481 		(void) fme_set_timer(fmep, my_delay);
   3482 		print_suspects(SLWAIT, fmep);
   3483 		itree_prune(fmep->eventtree);
   3484 		return;
   3485 
   3486 	case FME_DISPROVED:
   3487 		print_suspects(SLDISPROVED, fmep);
   3488 		Undiag_reason = UD_VAL_UNSOLVD;
   3489 		fme_undiagnosable(fmep);
   3490 		break;
   3491 	}
   3492 
   3493 	itree_free(fmep->eventtree);
   3494 	fmep->eventtree = NULL;
   3495 	structconfig_free(fmep->config);
   3496 	fmep->config = NULL;
   3497 	destroy_fme_bufs(fmep);
   3498 }
   3499 
   3500 static void indent(void);
   3501 static int triggered(struct fme *fmep, struct event *ep, int mark);
   3502 static enum fme_state effects_test(struct fme *fmep,
   3503     struct event *fault_event, unsigned long long at_latest_by,
   3504     unsigned long long *pdelay);
   3505 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
   3506     unsigned long long at_latest_by, unsigned long long *pdelay);
   3507 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
   3508     unsigned long long at_latest_by, unsigned long long *pdelay);
   3509 
   3510 static int
   3511 checkconstraints(struct fme *fmep, struct arrow *arrowp)
   3512 {
   3513 	struct constraintlist *ctp;
   3514 	struct evalue value;
   3515 	char *sep = "";
   3516 
   3517 	if (arrowp->forever_false) {
   3518 		indent();
   3519 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
   3520 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3521 			out(O_ALTFP|O_VERB|O_NONL, sep);
   3522 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3523 			sep = ", ";
   3524 		}
   3525 		out(O_ALTFP|O_VERB, NULL);
   3526 		return (0);
   3527 	}
   3528 	if (arrowp->forever_true) {
   3529 		indent();
   3530 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
   3531 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3532 			out(O_ALTFP|O_VERB|O_NONL, sep);
   3533 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3534 			sep = ", ";
   3535 		}
   3536 		out(O_ALTFP|O_VERB, NULL);
   3537 		return (1);
   3538 	}
   3539 
   3540 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3541 		if (eval_expr(ctp->cnode, NULL, NULL,
   3542 		    &fmep->globals, fmep->config,
   3543 		    arrowp, 0, &value)) {
   3544 			/* evaluation successful */
   3545 			if (value.t == UNDEFINED || value.v == 0) {
   3546 				/* known false */
   3547 				arrowp->forever_false = 1;
   3548 				indent();
   3549 				out(O_ALTFP|O_VERB|O_NONL,
   3550 				    "  False constraint: ");
   3551 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3552 				out(O_ALTFP|O_VERB, NULL);
   3553 				return (0);
   3554 			}
   3555 		} else {
   3556 			/* evaluation unsuccessful -- unknown value */
   3557 			indent();
   3558 			out(O_ALTFP|O_VERB|O_NONL,
   3559 			    "  Deferred constraint: ");
   3560 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3561 			out(O_ALTFP|O_VERB, NULL);
   3562 			return (1);
   3563 		}
   3564 	}
   3565 	/* known true */
   3566 	arrowp->forever_true = 1;
   3567 	indent();
   3568 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
   3569 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
   3570 		out(O_ALTFP|O_VERB|O_NONL, sep);
   3571 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
   3572 		sep = ", ";
   3573 	}
   3574 	out(O_ALTFP|O_VERB, NULL);
   3575 	return (1);
   3576 }
   3577 
   3578 static int
   3579 triggered(struct fme *fmep, struct event *ep, int mark)
   3580 {
   3581 	struct bubble *bp;
   3582 	struct arrowlist *ap;
   3583 	int count = 0;
   3584 
   3585 	stats_counter_bump(fmep->Tcallcount);
   3586 	for (bp = itree_next_bubble(ep, NULL); bp;
   3587 	    bp = itree_next_bubble(ep, bp)) {
   3588 		if (bp->t != B_TO)
   3589 			continue;
   3590 		for (ap = itree_next_arrow(bp, NULL); ap;
   3591 		    ap = itree_next_arrow(bp, ap)) {
   3592 			/* check count of marks against K in the bubble */
   3593 			if ((ap->arrowp->mark & mark) &&
   3594 			    ++count >= bp->nork)
   3595 				return (1);
   3596 		}
   3597 	}
   3598 	return (0);
   3599 }
   3600 
   3601 static int
   3602 mark_arrows(struct fme *fmep, struct event *ep, int mark,
   3603     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
   3604 {
   3605 	struct bubble *bp;
   3606 	struct arrowlist *ap;
   3607 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3608 	unsigned long long my_delay;
   3609 	enum fme_state result;
   3610 	int retval = 0;
   3611 
   3612 	for (bp = itree_next_bubble(ep, NULL); bp;
   3613 	    bp = itree_next_bubble(ep, bp)) {
   3614 		if (bp->t != B_FROM)
   3615 			continue;
   3616 		stats_counter_bump(fmep->Marrowcount);
   3617 		for (ap = itree_next_arrow(bp, NULL); ap;
   3618 		    ap = itree_next_arrow(bp, ap)) {
   3619 			struct event *ep2 = ap->arrowp->head->myevent;
   3620 			/*
   3621 			 * if we're clearing marks, we can avoid doing
   3622 			 * all that work evaluating constraints.
   3623 			 */
   3624 			if (mark == 0) {
   3625 				if (ap->arrowp->arrow_marked == 0)
   3626 					continue;
   3627 				ap->arrowp->arrow_marked = 0;
   3628 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
   3629 				if (keep && (ep2->cached_state &
   3630 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
   3631 					ep2->keep_in_tree = 1;
   3632 				ep2->cached_state &=
   3633 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
   3634 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
   3635 				    keep);
   3636 				continue;
   3637 			}
   3638 			ap->arrowp->arrow_marked = 1;
   3639 			if (ep2->cached_state & REQMNTS_DISPROVED) {
   3640 				indent();
   3641 				out(O_ALTFP|O_VERB|O_NONL,
   3642 				    "  ALREADY DISPROVED ");
   3643 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3644 				out(O_ALTFP|O_VERB, NULL);
   3645 				continue;
   3646 			}
   3647 			if (ep2->cached_state & WAIT_EFFECT) {
   3648 				indent();
   3649 				out(O_ALTFP|O_VERB|O_NONL,
   3650 				    "  ALREADY EFFECTS WAIT ");
   3651 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3652 				out(O_ALTFP|O_VERB, NULL);
   3653 				continue;
   3654 			}
   3655 			if (ep2->cached_state & CREDIBLE_EFFECT) {
   3656 				indent();
   3657 				out(O_ALTFP|O_VERB|O_NONL,
   3658 				    "  ALREADY EFFECTS CREDIBLE ");
   3659 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3660 				out(O_ALTFP|O_VERB, NULL);
   3661 				continue;
   3662 			}
   3663 			if ((ep2->cached_state & PARENT_WAIT) &&
   3664 			    (mark & PARENT_WAIT)) {
   3665 				indent();
   3666 				out(O_ALTFP|O_VERB|O_NONL,
   3667 				    "  ALREADY PARENT EFFECTS WAIT ");
   3668 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3669 				out(O_ALTFP|O_VERB, NULL);
   3670 				continue;
   3671 			}
   3672 			platform_set_payloadnvp(ep2->nvp);
   3673 			if (checkconstraints(fmep, ap->arrowp) == 0) {
   3674 				platform_set_payloadnvp(NULL);
   3675 				indent();
   3676 				out(O_ALTFP|O_VERB|O_NONL,
   3677 				    "  CONSTRAINTS FAIL ");
   3678 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3679 				out(O_ALTFP|O_VERB, NULL);
   3680 				continue;
   3681 			}
   3682 			platform_set_payloadnvp(NULL);
   3683 			ap->arrowp->mark |= EFFECTS_COUNTER;
   3684 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
   3685 				indent();
   3686 				out(O_ALTFP|O_VERB|O_NONL,
   3687 				    "  K-COUNT NOT YET MET ");
   3688 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3689 				out(O_ALTFP|O_VERB, NULL);
   3690 				continue;
   3691 			}
   3692 			ep2->cached_state &= ~PARENT_WAIT;
   3693 			/*
   3694 			 * if we've reached an ereport and no propagation time
   3695 			 * is specified, use the Hesitate value
   3696 			 */
   3697 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
   3698 			    ap->arrowp->maxdelay == 0ULL) {
   3699 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
   3700 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3701 				out(O_ALTFP|O_VERB, NULL);
   3702 				result = requirements_test(fmep, ep2, Hesitate,
   3703 				    &my_delay);
   3704 			} else {
   3705 				result = requirements_test(fmep, ep2,
   3706 				    at_latest_by + ap->arrowp->maxdelay,
   3707 				    &my_delay);
   3708 			}
   3709 			if (result == FME_WAIT) {
   3710 				retval = WAIT_EFFECT;
   3711 				if (overall_delay > my_delay)
   3712 					overall_delay = my_delay;
   3713 				ep2->cached_state |= WAIT_EFFECT;
   3714 				indent();
   3715 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
   3716 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3717 				out(O_ALTFP|O_VERB, NULL);
   3718 				indent_push("  E");
   3719 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
   3720 				    at_latest_by, &my_delay, 0) ==
   3721 				    WAIT_EFFECT) {
   3722 					retval = WAIT_EFFECT;
   3723 					if (overall_delay > my_delay)
   3724 						overall_delay = my_delay;
   3725 				}
   3726 				indent_pop();
   3727 			} else if (result == FME_DISPROVED) {
   3728 				indent();
   3729 				out(O_ALTFP|O_VERB|O_NONL,
   3730 				    "  EFFECTS DISPROVED ");
   3731 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3732 				out(O_ALTFP|O_VERB, NULL);
   3733 			} else {
   3734 				ep2->cached_state |= mark;
   3735 				indent();
   3736 				if (mark == CREDIBLE_EFFECT)
   3737 					out(O_ALTFP|O_VERB|O_NONL,
   3738 					    "  EFFECTS CREDIBLE ");
   3739 				else
   3740 					out(O_ALTFP|O_VERB|O_NONL,
   3741 					    "  PARENT EFFECTS WAIT ");
   3742 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
   3743 				out(O_ALTFP|O_VERB, NULL);
   3744 				indent_push("  E");
   3745 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
   3746 				    &my_delay, 0) == WAIT_EFFECT) {
   3747 					retval = WAIT_EFFECT;
   3748 					if (overall_delay > my_delay)
   3749 						overall_delay = my_delay;
   3750 				}
   3751 				indent_pop();
   3752 			}
   3753 		}
   3754 	}
   3755 	if (retval == WAIT_EFFECT)
   3756 		*pdelay = overall_delay;
   3757 	return (retval);
   3758 }
   3759 
   3760 static enum fme_state
   3761 effects_test(struct fme *fmep, struct event *fault_event,
   3762     unsigned long long at_latest_by, unsigned long long *pdelay)
   3763 {
   3764 	struct event *error_event;
   3765 	enum fme_state return_value = FME_CREDIBLE;
   3766 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3767 	unsigned long long my_delay;
   3768 
   3769 	stats_counter_bump(fmep->Ecallcount);
   3770 	indent_push("  E");
   3771 	indent();
   3772 	out(O_ALTFP|O_VERB|O_NONL, "->");
   3773 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
   3774 	out(O_ALTFP|O_VERB, NULL);
   3775 
   3776 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
   3777 	    &my_delay, 0) == WAIT_EFFECT) {
   3778 		return_value = FME_WAIT;
   3779 		if (overall_delay > my_delay)
   3780 			overall_delay = my_delay;
   3781 	}
   3782 	for (error_event = fmep->observations;
   3783 	    error_event; error_event = error_event->observations) {
   3784 		indent();
   3785 		out(O_ALTFP|O_VERB|O_NONL, " ");
   3786 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
   3787 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
   3788 			if (error_event->cached_state &
   3789 			    (PARENT_WAIT|WAIT_EFFECT)) {
   3790 				out(O_ALTFP|O_VERB, " NOT YET triggered");
   3791 				continue;
   3792 			}
   3793 			return_value = FME_DISPROVED;
   3794 			out(O_ALTFP|O_VERB, " NOT triggered");
   3795 			break;
   3796 		} else {
   3797 			out(O_ALTFP|O_VERB, " triggered");
   3798 		}
   3799 	}
   3800 	if (return_value == FME_DISPROVED) {
   3801 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
   3802 	} else {
   3803 		fault_event->keep_in_tree = 1;
   3804 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
   3805 	}
   3806 
   3807 	indent();
   3808 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
   3809 	    fme_state2str(return_value));
   3810 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
   3811 	out(O_ALTFP|O_VERB, NULL);
   3812 	indent_pop();
   3813 	if (return_value == FME_WAIT)
   3814 		*pdelay = overall_delay;
   3815 	return (return_value);
   3816 }
   3817 
   3818 static enum fme_state
   3819 requirements_test(struct fme *fmep, struct event *ep,
   3820     unsigned long long at_latest_by, unsigned long long *pdelay)
   3821 {
   3822 	int waiting_events;
   3823 	int credible_events;
   3824 	int deferred_events;
   3825 	enum fme_state return_value = FME_CREDIBLE;
   3826 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   3827 	unsigned long long arrow_delay;
   3828 	unsigned long long my_delay;
   3829 	struct event *ep2;
   3830 	struct bubble *bp;
   3831 	struct arrowlist *ap;
   3832 
   3833 	if (ep->cached_state & REQMNTS_CREDIBLE) {
   3834 		indent();
   3835 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
   3836 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3837 		out(O_ALTFP|O_VERB, NULL);
   3838 		return (FME_CREDIBLE);
   3839 	}
   3840 	if (ep->cached_state & REQMNTS_DISPROVED) {
   3841 		indent();
   3842 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
   3843 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3844 		out(O_ALTFP|O_VERB, NULL);
   3845 		return (FME_DISPROVED);
   3846 	}
   3847 	if (ep->cached_state & REQMNTS_WAIT) {
   3848 		indent();
   3849 		*pdelay = ep->cached_delay;
   3850 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
   3851 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3852 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
   3853 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3854 		out(O_ALTFP|O_VERB, NULL);
   3855 		return (FME_WAIT);
   3856 	}
   3857 	stats_counter_bump(fmep->Rcallcount);
   3858 	indent_push("  R");
   3859 	indent();
   3860 	out(O_ALTFP|O_VERB|O_NONL, "->");
   3861 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3862 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
   3863 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3864 	out(O_ALTFP|O_VERB, NULL);
   3865 
   3866 	if (ep->t == N_EREPORT) {
   3867 		if (ep->count == 0) {
   3868 			if (fmep->pull >= at_latest_by) {
   3869 				return_value = FME_DISPROVED;
   3870 			} else {
   3871 				ep->cached_delay = *pdelay = at_latest_by;
   3872 				return_value = FME_WAIT;
   3873 			}
   3874 		}
   3875 
   3876 		indent();
   3877 		switch (return_value) {
   3878 		case FME_CREDIBLE:
   3879 			ep->cached_state |= REQMNTS_CREDIBLE;
   3880 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
   3881 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3882 			break;
   3883 		case FME_DISPROVED:
   3884 			ep->cached_state |= REQMNTS_DISPROVED;
   3885 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
   3886 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3887 			break;
   3888 		case FME_WAIT:
   3889 			ep->cached_state |= REQMNTS_WAIT;
   3890 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
   3891 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   3892 			out(O_ALTFP|O_VERB|O_NONL, " to ");
   3893 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   3894 			break;
   3895 		default:
   3896 			out(O_DIE, "requirements_test: unexpected fme_state");
   3897 			break;
   3898 		}
   3899 		out(O_ALTFP|O_VERB, NULL);
   3900 		indent_pop();
   3901 
   3902 		return (return_value);
   3903 	}
   3904 
   3905 	/* this event is not a report, descend the tree */
   3906 	for (bp = itree_next_bubble(ep, NULL); bp;
   3907 	    bp = itree_next_bubble(ep, bp)) {
   3908 		int n;
   3909 
   3910 		if (bp->t != B_FROM)
   3911 			continue;
   3912 
   3913 		n = bp->nork;
   3914 
   3915 		credible_events = 0;
   3916 		waiting_events = 0;
   3917 		deferred_events = 0;
   3918 		arrow_delay = TIMEVAL_EVENTUALLY;
   3919 		/*
   3920 		 * n is -1 for 'A' so adjust it.
   3921 		 * XXX just count up the arrows for now.
   3922 		 */
   3923 		if (n < 0) {
   3924 			n = 0;
   3925 			for (ap = itree_next_arrow(bp, NULL); ap;
   3926 			    ap = itree_next_arrow(bp, ap))
   3927 				n++;
   3928 			indent();
   3929 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
   3930 		} else {
   3931 			indent();
   3932 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
   3933 		}
   3934 
   3935 		if (n == 0)
   3936 			continue;
   3937 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
   3938 			for (ap = itree_next_arrow(bp, NULL); ap;
   3939 			    ap = itree_next_arrow(bp, ap)) {
   3940 				ep2 = ap->arrowp->head->myevent;
   3941 				platform_set_payloadnvp(ep2->nvp);
   3942 				(void) checkconstraints(fmep, ap->arrowp);
   3943 				if (!ap->arrowp->forever_false) {
   3944 					/*
   3945 					 * if all arrows are invalidated by the
   3946 					 * constraints, then we should elide the
   3947 					 * whole bubble to be consistant with
   3948 					 * the tree creation time behaviour
   3949 					 */
   3950 					bp->mark |= BUBBLE_OK;
   3951 					platform_set_payloadnvp(NULL);
   3952 					break;
   3953 				}
   3954 				platform_set_payloadnvp(NULL);
   3955 			}
   3956 		}
   3957 		for (ap = itree_next_arrow(bp, NULL); ap;
   3958 		    ap = itree_next_arrow(bp, ap)) {
   3959 			ep2 = ap->arrowp->head->myevent;
   3960 			if (n <= credible_events)
   3961 				break;
   3962 
   3963 			ap->arrowp->mark |= REQMNTS_COUNTER;
   3964 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
   3965 				/* XXX adding max timevals! */
   3966 				switch (requirements_test(fmep, ep2,
   3967 				    at_latest_by + ap->arrowp->maxdelay,
   3968 				    &my_delay)) {
   3969 				case FME_DEFERRED:
   3970 					deferred_events++;
   3971 					break;
   3972 				case FME_CREDIBLE:
   3973 					credible_events++;
   3974 					break;
   3975 				case FME_DISPROVED:
   3976 					break;
   3977 				case FME_WAIT:
   3978 					if (my_delay < arrow_delay)
   3979 						arrow_delay = my_delay;
   3980 					waiting_events++;
   3981 					break;
   3982 				default:
   3983 					out(O_DIE,
   3984 					"Bug in requirements_test.");
   3985 				}
   3986 			else
   3987 				deferred_events++;
   3988 		}
   3989 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
   3990 			bp->mark |= BUBBLE_ELIDED;
   3991 			continue;
   3992 		}
   3993 		indent();
   3994 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
   3995 		    credible_events + deferred_events, waiting_events);
   3996 		if (credible_events + deferred_events + waiting_events < n) {
   3997 			/* Can never meet requirements */
   3998 			ep->cached_state |= REQMNTS_DISPROVED;
   3999 			indent();
   4000 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
   4001 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4002 			out(O_ALTFP|O_VERB, NULL);
   4003 			indent_pop();
   4004 			return (FME_DISPROVED);
   4005 		}
   4006 		if (credible_events + deferred_events < n) {
   4007 			/* will have to wait */
   4008 			/* wait time is shortest known */
   4009 			if (arrow_delay < overall_delay)
   4010 				overall_delay = arrow_delay;
   4011 			return_value = FME_WAIT;
   4012 		} else if (credible_events < n) {
   4013 			if (return_value != FME_WAIT)
   4014 				return_value = FME_DEFERRED;
   4015 		}
   4016 	}
   4017 
   4018 	/*
   4019 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
   4020 	 * path, then this will be considered FME_CREDIBLE. But if it is
   4021 	 * reached by a different path so the K-count is met, then might
   4022 	 * get overridden by FME_WAIT or FME_DISPROVED.
   4023 	 */
   4024 	if (return_value == FME_WAIT) {
   4025 		ep->cached_state |= REQMNTS_WAIT;
   4026 		ep->cached_delay = *pdelay = overall_delay;
   4027 	} else if (return_value == FME_CREDIBLE) {
   4028 		ep->cached_state |= REQMNTS_CREDIBLE;
   4029 	}
   4030 	indent();
   4031 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
   4032 	    fme_state2str(return_value));
   4033 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4034 	out(O_ALTFP|O_VERB, NULL);
   4035 	indent_pop();
   4036 	return (return_value);
   4037 }
   4038 
   4039 static enum fme_state
   4040 causes_test(struct fme *fmep, struct event *ep,
   4041     unsigned long long at_latest_by, unsigned long long *pdelay)
   4042 {
   4043 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   4044 	unsigned long long my_delay;
   4045 	int credible_results = 0;
   4046 	int waiting_results = 0;
   4047 	enum fme_state fstate;
   4048 	struct event *tail_event;
   4049 	struct bubble *bp;
   4050 	struct arrowlist *ap;
   4051 	int k = 1;
   4052 
   4053 	stats_counter_bump(fmep->Ccallcount);
   4054 	indent_push("  C");
   4055 	indent();
   4056 	out(O_ALTFP|O_VERB|O_NONL, "->");
   4057 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4058 	out(O_ALTFP|O_VERB, NULL);
   4059 
   4060 	for (bp = itree_next_bubble(ep, NULL); bp;
   4061 	    bp = itree_next_bubble(ep, bp)) {
   4062 		if (bp->t != B_TO)
   4063 			continue;
   4064 		k = bp->nork;	/* remember the K value */
   4065 		for (ap = itree_next_arrow(bp, NULL); ap;
   4066 		    ap = itree_next_arrow(bp, ap)) {
   4067 			int do_not_follow = 0;
   4068 
   4069 			/*
   4070 			 * if we get to the same event multiple times
   4071 			 * only worry about the first one.
   4072 			 */
   4073 			if (ap->arrowp->tail->myevent->cached_state &
   4074 			    CAUSES_TESTED) {
   4075 				indent();
   4076 				out(O_ALTFP|O_VERB|O_NONL,
   4077 				    "  causes test already run for ");
   4078 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
   4079 				    ap->arrowp->tail->myevent);
   4080 				out(O_ALTFP|O_VERB, NULL);
   4081 				continue;
   4082 			}
   4083 
   4084 			/*
   4085 			 * see if false constraint prevents us
   4086 			 * from traversing this arrow
   4087 			 */
   4088 			platform_set_payloadnvp(ep->nvp);
   4089 			if (checkconstraints(fmep, ap->arrowp) == 0)
   4090 				do_not_follow = 1;
   4091 			platform_set_payloadnvp(NULL);
   4092 			if (do_not_follow) {
   4093 				indent();
   4094 				out(O_ALTFP|O_VERB|O_NONL,
   4095 				    "  False arrow from ");
   4096 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
   4097 				    ap->arrowp->tail->myevent);
   4098 				out(O_ALTFP|O_VERB, NULL);
   4099 				continue;
   4100 			}
   4101 
   4102 			ap->arrowp->tail->myevent->cached_state |=
   4103 			    CAUSES_TESTED;
   4104 			tail_event = ap->arrowp->tail->myevent;
   4105 			fstate = hypothesise(fmep, tail_event, at_latest_by,
   4106 			    &my_delay);
   4107 
   4108 			switch (fstate) {
   4109 			case FME_WAIT:
   4110 				if (my_delay < overall_delay)
   4111 					overall_delay = my_delay;
   4112 				waiting_results++;
   4113 				break;
   4114 			case FME_CREDIBLE:
   4115 				credible_results++;
   4116 				break;
   4117 			case FME_DISPROVED:
   4118 				break;
   4119 			default:
   4120 				out(O_DIE, "Bug in causes_test");
   4121 			}
   4122 		}
   4123 	}
   4124 	/* compare against K */
   4125 	if (credible_results + waiting_results < k) {
   4126 		indent();
   4127 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
   4128 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4129 		out(O_ALTFP|O_VERB, NULL);
   4130 		indent_pop();
   4131 		return (FME_DISPROVED);
   4132 	}
   4133 	if (waiting_results != 0) {
   4134 		*pdelay = overall_delay;
   4135 		indent();
   4136 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
   4137 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4138 		out(O_ALTFP|O_VERB|O_NONL, " to ");
   4139 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   4140 		out(O_ALTFP|O_VERB, NULL);
   4141 		indent_pop();
   4142 		return (FME_WAIT);
   4143 	}
   4144 	indent();
   4145 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
   4146 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4147 	out(O_ALTFP|O_VERB, NULL);
   4148 	indent_pop();
   4149 	return (FME_CREDIBLE);
   4150 }
   4151 
   4152 static enum fme_state
   4153 hypothesise(struct fme *fmep, struct event *ep,
   4154 	unsigned long long at_latest_by, unsigned long long *pdelay)
   4155 {
   4156 	enum fme_state rtr, otr;
   4157 	unsigned long long my_delay;
   4158 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
   4159 
   4160 	stats_counter_bump(fmep->Hcallcount);
   4161 	indent_push("  H");
   4162 	indent();
   4163 	out(O_ALTFP|O_VERB|O_NONL, "->");
   4164 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4165 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
   4166 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
   4167 	out(O_ALTFP|O_VERB, NULL);
   4168 
   4169 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
   4170 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
   4171 		overall_delay = my_delay;
   4172 	if (rtr != FME_DISPROVED) {
   4173 		if (is_problem(ep->t)) {
   4174 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
   4175 			if (otr != FME_DISPROVED) {
   4176 				if (fmep->peek == 0 && ep->is_suspect == 0) {
   4177 					ep->suspects = fmep->suspects;
   4178 					ep->is_suspect = 1;
   4179 					fmep->suspects = ep;
   4180 					fmep->nsuspects++;
   4181 				}
   4182 			}
   4183 		} else
   4184 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
   4185 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
   4186 			overall_delay = my_delay;
   4187 		if ((otr != FME_DISPROVED) &&
   4188 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
   4189 			*pdelay = overall_delay;
   4190 	}
   4191 	if (rtr == FME_DISPROVED) {
   4192 		indent();
   4193 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
   4194 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4195 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
   4196 		indent_pop();
   4197 		return (FME_DISPROVED);
   4198 	}
   4199 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
   4200 		indent();
   4201 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
   4202 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4203 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
   4204 		indent_pop();
   4205 		return (FME_DISPROVED);
   4206 	}
   4207 	if (otr == FME_DISPROVED) {
   4208 		indent();
   4209 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
   4210 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4211 		out(O_ALTFP|O_VERB, " (causes are not credible)");
   4212 		indent_pop();
   4213 		return (FME_DISPROVED);
   4214 	}
   4215 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
   4216 		indent();
   4217 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
   4218 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4219 		out(O_ALTFP|O_VERB|O_NONL, " to ");
   4220 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
   4221 		out(O_ALTFP|O_VERB, NULL);
   4222 		indent_pop();
   4223 		return (FME_WAIT);
   4224 	}
   4225 	indent();
   4226 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
   4227 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
   4228 	out(O_ALTFP|O_VERB, NULL);
   4229 	indent_pop();
   4230 	return (FME_CREDIBLE);
   4231 }
   4232 
   4233 /*
   4234  * fme_istat_load -- reconstitute any persistent istats
   4235  */
   4236 void
   4237 fme_istat_load(fmd_hdl_t *hdl)
   4238 {
   4239 	int sz;
   4240 	char *sbuf;
   4241 	char *ptr;
   4242 
   4243 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
   4244 		out(O_ALTFP, "fme_istat_load: No stats");
   4245 		return;
   4246 	}
   4247 
   4248 	sbuf = alloca(sz);
   4249 
   4250 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
   4251 
   4252 	/*
   4253 	 * pick apart the serialized stats
   4254 	 *
   4255 	 * format is:
   4256 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
   4257 	 * for example:
   4258 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
   4259 	 *
   4260 	 * since this is parsing our own serialized data, any parsing issues
   4261 	 * are fatal, so we check for them all with ASSERT() below.
   4262 	 */
   4263 	ptr = sbuf;
   4264 	while (ptr < &sbuf[sz]) {
   4265 		char *sepptr;
   4266 		struct node *np;
   4267 		int val;
   4268 
   4269 		sepptr = strchr(ptr, '@');
   4270 		ASSERT(sepptr != NULL);
   4271 		*sepptr = '\0';
   4272 
   4273 		/* construct the event */
   4274 		np = newnode(T_EVENT, NULL, 0);
   4275 		np->u.event.ename = newnode(T_NAME, NULL, 0);
   4276 		np->u.event.ename->u.name.t = N_STAT;
   4277 		np->u.event.ename->u.name.s = stable(ptr);
   4278 		np->u.event.ename->u.name.it = IT_ENAME;
   4279 		np->u.event.ename->u.name.last = np->u.event.ename;
   4280 
   4281 		ptr = sepptr + 1;
   4282 		ASSERT(ptr < &sbuf[sz]);
   4283 		ptr += strlen(ptr);
   4284 		ptr++;	/* move past the '\0' separating path from value */
   4285 		ASSERT(ptr < &sbuf[sz]);
   4286 		ASSERT(isdigit(*ptr));
   4287 		val = atoi(ptr);
   4288 		ASSERT(val > 0);
   4289 		ptr += strlen(ptr);
   4290 		ptr++;	/* move past the final '\0' for this entry */
   4291 
   4292 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
   4293 		ASSERT(np->u.event.epname != NULL);
   4294 
   4295 		istat_bump(np, val);
   4296 		tree_free(np);
   4297 	}
   4298 
   4299 	istat_save();
   4300 }
   4301