Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Fault Management Architecture (FMA) Resource and Protocol Support
     28  *
     29  * The routines contained herein provide services to support kernel subsystems
     30  * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
     31  *
     32  * Name-Value Pair Lists
     33  *
     34  * The embodiment of an FMA protocol element (event, fmri or authority) is a
     35  * name-value pair list (nvlist_t).  FMA-specific nvlist construtor and
     36  * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
     37  * to create an nvpair list using custom allocators.  Callers may choose to
     38  * allocate either from the kernel memory allocator, or from a preallocated
     39  * buffer, useful in constrained contexts like high-level interrupt routines.
     40  *
     41  * Protocol Event and FMRI Construction
     42  *
     43  * Convenience routines are provided to construct nvlist events according to
     44  * the FMA Event Protocol and Naming Schema specification for ereports and
     45  * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
     46  *
     47  * ENA Manipulation
     48  *
     49  * Routines to generate ENA formats 0, 1 and 2 are available as well as
     50  * routines to increment formats 1 and 2.  Individual fields within the
     51  * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
     52  * fm_ena_format_get() and fm_ena_gen_get().
     53  */
     54 
     55 #include <sys/types.h>
     56 #include <sys/time.h>
     57 #include <sys/sysevent.h>
     58 #include <sys/sysevent_impl.h>
     59 #include <sys/nvpair.h>
     60 #include <sys/cmn_err.h>
     61 #include <sys/cpuvar.h>
     62 #include <sys/sysmacros.h>
     63 #include <sys/systm.h>
     64 #include <sys/ddifm.h>
     65 #include <sys/ddifm_impl.h>
     66 #include <sys/spl.h>
     67 #include <sys/dumphdr.h>
     68 #include <sys/compress.h>
     69 #include <sys/cpuvar.h>
     70 #include <sys/console.h>
     71 #include <sys/panic.h>
     72 #include <sys/kobj.h>
     73 #include <sys/sunddi.h>
     74 #include <sys/systeminfo.h>
     75 #include <sys/sysevent/eventdefs.h>
     76 #include <sys/fm/util.h>
     77 #include <sys/fm/protocol.h>
     78 
     79 /*
     80  * URL and SUNW-MSG-ID value to display for fm_panic(), defined below.  These
     81  * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
     82  */
     83 static const char *fm_url = "http://www.sun.com/msg";
     84 static const char *fm_msgid = "SUNOS-8000-0G";
     85 static char *volatile fm_panicstr = NULL;
     86 
     87 errorq_t *ereport_errorq;
     88 void *ereport_dumpbuf;
     89 size_t ereport_dumplen;
     90 
     91 static uint_t ereport_chanlen = ERPT_EVCH_MAX;
     92 static evchan_t *ereport_chan = NULL;
     93 static ulong_t ereport_qlen = 0;
     94 static size_t ereport_size = 0;
     95 static int ereport_cols = 80;
     96 
     97 /*
     98  * Common fault management kstats to record ereport generation
     99  * failures
    100  */
    101 
    102 struct erpt_kstat {
    103 	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
    104 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
    105 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
    106 	kstat_named_t	payload_set_failed;	/* num payload set failures */
    107 };
    108 
    109 static struct erpt_kstat erpt_kstat_data = {
    110 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
    111 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
    112 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
    113 	{ "payload-set-failed", KSTAT_DATA_UINT64 }
    114 };
    115 
    116 /*ARGSUSED*/
    117 static void
    118 fm_drain(void *private, void *data, errorq_elem_t *eep)
    119 {
    120 	nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep);
    121 
    122 	if (!panicstr)
    123 		(void) fm_ereport_post(nvl, EVCH_TRYHARD);
    124 	else
    125 		fm_nvprint(nvl);
    126 }
    127 
    128 void
    129 fm_init(void)
    130 {
    131 	kstat_t *ksp;
    132 
    133 	(void) sysevent_evc_bind(FM_ERROR_CHAN,
    134 	    &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND);
    135 
    136 	(void) sysevent_evc_control(ereport_chan,
    137 	    EVCH_SET_CHAN_LEN, &ereport_chanlen);
    138 
    139 	if (ereport_qlen == 0)
    140 		ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
    141 
    142 	if (ereport_size == 0)
    143 		ereport_size = ERPT_DATA_SZ;
    144 
    145 	ereport_errorq = errorq_nvcreate("fm_ereport_queue",
    146 	    (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size,
    147 	    FM_ERR_PIL, ERRORQ_VITAL);
    148 	if (ereport_errorq == NULL)
    149 		panic("failed to create required ereport error queue");
    150 
    151 	ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP);
    152 	ereport_dumplen = ereport_size;
    153 
    154 	/* Initialize ereport allocation and generation kstats */
    155 	ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED,
    156 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
    157 	    KSTAT_FLAG_VIRTUAL);
    158 
    159 	if (ksp != NULL) {
    160 		ksp->ks_data = &erpt_kstat_data;
    161 		kstat_install(ksp);
    162 	} else {
    163 		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
    164 
    165 	}
    166 }
    167 
    168 /*
    169  * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
    170  * output so they aren't split across console lines, and return the end column.
    171  */
    172 /*PRINTFLIKE4*/
    173 static int
    174 fm_printf(int depth, int c, int cols, const char *format, ...)
    175 {
    176 	va_list ap;
    177 	int width;
    178 	char c1;
    179 
    180 	va_start(ap, format);
    181 	width = vsnprintf(&c1, sizeof (c1), format, ap);
    182 	va_end(ap);
    183 
    184 	if (c + width >= cols) {
    185 		console_printf("\n\r");
    186 		c = 0;
    187 		if (format[0] != ' ' && depth > 0) {
    188 			console_printf(" ");
    189 			c++;
    190 		}
    191 	}
    192 
    193 	va_start(ap, format);
    194 	console_vprintf(format, ap);
    195 	va_end(ap);
    196 
    197 	return ((c + width) % cols);
    198 }
    199 
    200 /*
    201  * Recursively print a nvlist in the specified column width and return the
    202  * column we end up in.  This function is called recursively by fm_nvprint(),
    203  * below.  We generically format the entire nvpair using hexadecimal
    204  * integers and strings, and elide any integer arrays.  Arrays are basically
    205  * used for cache dumps right now, so we suppress them so as not to overwhelm
    206  * the amount of console output we produce at panic time.  This can be further
    207  * enhanced as FMA technology grows based upon the needs of consumers.  All
    208  * FMA telemetry is logged using the dump device transport, so the console
    209  * output serves only as a fallback in case this procedure is unsuccessful.
    210  */
    211 static int
    212 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
    213 {
    214 	nvpair_t *nvp;
    215 
    216 	for (nvp = nvlist_next_nvpair(nvl, NULL);
    217 	    nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
    218 
    219 		data_type_t type = nvpair_type(nvp);
    220 		const char *name = nvpair_name(nvp);
    221 
    222 		boolean_t b;
    223 		uint8_t i8;
    224 		uint16_t i16;
    225 		uint32_t i32;
    226 		uint64_t i64;
    227 		char *str;
    228 		nvlist_t *cnv;
    229 
    230 		if (strcmp(name, FM_CLASS) == 0)
    231 			continue; /* already printed by caller */
    232 
    233 		c = fm_printf(d, c, cols, " %s=", name);
    234 
    235 		switch (type) {
    236 		case DATA_TYPE_BOOLEAN:
    237 			c = fm_printf(d + 1, c, cols, " 1");
    238 			break;
    239 
    240 		case DATA_TYPE_BOOLEAN_VALUE:
    241 			(void) nvpair_value_boolean_value(nvp, &b);
    242 			c = fm_printf(d + 1, c, cols, b ? "1" : "0");
    243 			break;
    244 
    245 		case DATA_TYPE_BYTE:
    246 			(void) nvpair_value_byte(nvp, &i8);
    247 			c = fm_printf(d + 1, c, cols, "%x", i8);
    248 			break;
    249 
    250 		case DATA_TYPE_INT8:
    251 			(void) nvpair_value_int8(nvp, (void *)&i8);
    252 			c = fm_printf(d + 1, c, cols, "%x", i8);
    253 			break;
    254 
    255 		case DATA_TYPE_UINT8:
    256 			(void) nvpair_value_uint8(nvp, &i8);
    257 			c = fm_printf(d + 1, c, cols, "%x", i8);
    258 			break;
    259 
    260 		case DATA_TYPE_INT16:
    261 			(void) nvpair_value_int16(nvp, (void *)&i16);
    262 			c = fm_printf(d + 1, c, cols, "%x", i16);
    263 			break;
    264 
    265 		case DATA_TYPE_UINT16:
    266 			(void) nvpair_value_uint16(nvp, &i16);
    267 			c = fm_printf(d + 1, c, cols, "%x", i16);
    268 			break;
    269 
    270 		case DATA_TYPE_INT32:
    271 			(void) nvpair_value_int32(nvp, (void *)&i32);
    272 			c = fm_printf(d + 1, c, cols, "%x", i32);
    273 			break;
    274 
    275 		case DATA_TYPE_UINT32:
    276 			(void) nvpair_value_uint32(nvp, &i32);
    277 			c = fm_printf(d + 1, c, cols, "%x", i32);
    278 			break;
    279 
    280 		case DATA_TYPE_INT64:
    281 			(void) nvpair_value_int64(nvp, (void *)&i64);
    282 			c = fm_printf(d + 1, c, cols, "%llx",
    283 			    (u_longlong_t)i64);
    284 			break;
    285 
    286 		case DATA_TYPE_UINT64:
    287 			(void) nvpair_value_uint64(nvp, &i64);
    288 			c = fm_printf(d + 1, c, cols, "%llx",
    289 			    (u_longlong_t)i64);
    290 			break;
    291 
    292 		case DATA_TYPE_HRTIME:
    293 			(void) nvpair_value_hrtime(nvp, (void *)&i64);
    294 			c = fm_printf(d + 1, c, cols, "%llx",
    295 			    (u_longlong_t)i64);
    296 			break;
    297 
    298 		case DATA_TYPE_STRING:
    299 			(void) nvpair_value_string(nvp, &str);
    300 			c = fm_printf(d + 1, c, cols, "\"%s\"",
    301 			    str ? str : "<NULL>");
    302 			break;
    303 
    304 		case DATA_TYPE_NVLIST:
    305 			c = fm_printf(d + 1, c, cols, "[");
    306 			(void) nvpair_value_nvlist(nvp, &cnv);
    307 			c = fm_nvprintr(cnv, d + 1, c, cols);
    308 			c = fm_printf(d + 1, c, cols, " ]");
    309 			break;
    310 
    311 		case DATA_TYPE_NVLIST_ARRAY: {
    312 			nvlist_t **val;
    313 			uint_t i, nelem;
    314 
    315 			c = fm_printf(d + 1, c, cols, "[");
    316 			(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
    317 			for (i = 0; i < nelem; i++) {
    318 				c = fm_nvprintr(val[i], d + 1, c, cols);
    319 			}
    320 			c = fm_printf(d + 1, c, cols, " ]");
    321 			}
    322 			break;
    323 
    324 		case DATA_TYPE_BOOLEAN_ARRAY:
    325 		case DATA_TYPE_BYTE_ARRAY:
    326 		case DATA_TYPE_INT8_ARRAY:
    327 		case DATA_TYPE_UINT8_ARRAY:
    328 		case DATA_TYPE_INT16_ARRAY:
    329 		case DATA_TYPE_UINT16_ARRAY:
    330 		case DATA_TYPE_INT32_ARRAY:
    331 		case DATA_TYPE_UINT32_ARRAY:
    332 		case DATA_TYPE_INT64_ARRAY:
    333 		case DATA_TYPE_UINT64_ARRAY:
    334 		case DATA_TYPE_STRING_ARRAY:
    335 			c = fm_printf(d + 1, c, cols, "[...]");
    336 			break;
    337 		case DATA_TYPE_UNKNOWN:
    338 			c = fm_printf(d + 1, c, cols, "<unknown>");
    339 			break;
    340 		}
    341 	}
    342 
    343 	return (c);
    344 }
    345 
    346 void
    347 fm_nvprint(nvlist_t *nvl)
    348 {
    349 	char *class;
    350 	int c = 0;
    351 
    352 	console_printf("\r");
    353 
    354 	if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
    355 		c = fm_printf(0, c, ereport_cols, "%s", class);
    356 
    357 	if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0)
    358 		console_printf("\n");
    359 
    360 	console_printf("\n");
    361 }
    362 
    363 /*
    364  * Wrapper for panic() that first produces an FMA-style message for admins.
    365  * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
    366  * is the one exception to that rule and the only error that gets messaged.
    367  * This function is intended for use by subsystems that have detected a fatal
    368  * error and enqueued appropriate ereports and wish to then force a panic.
    369  */
    370 /*PRINTFLIKE1*/
    371 void
    372 fm_panic(const char *format, ...)
    373 {
    374 	va_list ap;
    375 
    376 	(void) casptr((void *)&fm_panicstr, NULL, (void *)format);
    377 	va_start(ap, format);
    378 	vpanic(format, ap);
    379 	va_end(ap);
    380 }
    381 
    382 /*
    383  * Print any appropriate FMA banner message before the panic message.  This
    384  * function is called by panicsys() and prints the message for fm_panic().
    385  * We print the message here so that it comes after the system is quiesced.
    386  * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
    387  * The rest of the message is for the console only and not needed in the log,
    388  * so it is printed using console_printf().  We break it up into multiple
    389  * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
    390  */
    391 void
    392 fm_banner(void)
    393 {
    394 	timespec_t tod;
    395 	hrtime_t now;
    396 
    397 	if (!fm_panicstr)
    398 		return; /* panic was not initiated by fm_panic(); do nothing */
    399 
    400 	if (panicstr) {
    401 		tod = panic_hrestime;
    402 		now = panic_hrtime;
    403 	} else {
    404 		gethrestime(&tod);
    405 		now = gethrtime_waitfree();
    406 	}
    407 
    408 	cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, "
    409 	    "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid);
    410 
    411 	console_printf(
    412 "\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
    413 "EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
    414 	    fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now);
    415 
    416 	console_printf(
    417 "PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
    418 "SOURCE: %s, REV: %s %s\n",
    419 	    platform, utsname.nodename, utsname.sysname,
    420 	    utsname.release, utsname.version);
    421 
    422 	console_printf(
    423 "DESC: Errors have been detected that require a reboot to ensure system\n"
    424 "integrity.  See %s/%s for more information.\n",
    425 	    fm_url, fm_msgid);
    426 
    427 	console_printf(
    428 "AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n"
    429 "IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
    430 "REC-ACTION: Save the error summary below in case telemetry cannot be saved\n");
    431 
    432 	console_printf("\n");
    433 }
    434 
    435 /*
    436  * Utility function to write all of the pending ereports to the dump device.
    437  * This function is called at either normal reboot or panic time, and simply
    438  * iterates over the in-transit messages in the ereport sysevent channel.
    439  */
    440 void
    441 fm_ereport_dump(void)
    442 {
    443 	evchanq_t *chq;
    444 	sysevent_t *sep;
    445 	erpt_dump_t ed;
    446 
    447 	timespec_t tod;
    448 	hrtime_t now;
    449 	char *buf;
    450 	size_t len;
    451 
    452 	if (panicstr) {
    453 		tod = panic_hrestime;
    454 		now = panic_hrtime;
    455 	} else {
    456 		if (ereport_errorq != NULL)
    457 			errorq_drain(ereport_errorq);
    458 		gethrestime(&tod);
    459 		now = gethrtime_waitfree();
    460 	}
    461 
    462 	/*
    463 	 * In the panic case, sysevent_evc_walk_init() will return NULL.
    464 	 */
    465 	if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL &&
    466 	    !panicstr)
    467 		return; /* event channel isn't initialized yet */
    468 
    469 	while ((sep = sysevent_evc_walk_step(chq)) != NULL) {
    470 		if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL)
    471 			break;
    472 
    473 		ed.ed_magic = ERPT_MAGIC;
    474 		ed.ed_chksum = checksum32(buf, len);
    475 		ed.ed_size = (uint32_t)len;
    476 		ed.ed_pad = 0;
    477 		ed.ed_hrt_nsec = SE_TIME(sep);
    478 		ed.ed_hrt_base = now;
    479 		ed.ed_tod_base.sec = tod.tv_sec;
    480 		ed.ed_tod_base.nsec = tod.tv_nsec;
    481 
    482 		dumpvp_write(&ed, sizeof (ed));
    483 		dumpvp_write(buf, len);
    484 	}
    485 
    486 	sysevent_evc_walk_fini(chq);
    487 }
    488 
    489 /*
    490  * Post an error report (ereport) to the sysevent error channel.  The error
    491  * channel must be established with a prior call to sysevent_evc_create()
    492  * before publication may occur.
    493  */
    494 void
    495 fm_ereport_post(nvlist_t *ereport, int evc_flag)
    496 {
    497 	size_t nvl_size = 0;
    498 	evchan_t *error_chan;
    499 
    500 	(void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE);
    501 	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
    502 		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
    503 		return;
    504 	}
    505 
    506 	if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan,
    507 	    EVCH_CREAT|EVCH_HOLD_PEND) != 0) {
    508 		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
    509 		return;
    510 	}
    511 
    512 	if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
    513 	    SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
    514 		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
    515 		sysevent_evc_unbind(error_chan);
    516 		return;
    517 	}
    518 	sysevent_evc_unbind(error_chan);
    519 }
    520 
    521 /*
    522  * Wrapppers for FM nvlist allocators
    523  */
    524 /* ARGSUSED */
    525 static void *
    526 i_fm_alloc(nv_alloc_t *nva, size_t size)
    527 {
    528 	return (kmem_zalloc(size, KM_SLEEP));
    529 }
    530 
    531 /* ARGSUSED */
    532 static void
    533 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
    534 {
    535 	kmem_free(buf, size);
    536 }
    537 
    538 const nv_alloc_ops_t fm_mem_alloc_ops = {
    539 	NULL,
    540 	NULL,
    541 	i_fm_alloc,
    542 	i_fm_free,
    543 	NULL
    544 };
    545 
    546 /*
    547  * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
    548  * to the newly allocated nv_alloc_t structure is returned upon success or NULL
    549  * is returned to indicate that the nv_alloc structure could not be created.
    550  */
    551 nv_alloc_t *
    552 fm_nva_xcreate(char *buf, size_t bufsz)
    553 {
    554 	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
    555 
    556 	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
    557 		kmem_free(nvhdl, sizeof (nv_alloc_t));
    558 		return (NULL);
    559 	}
    560 
    561 	return (nvhdl);
    562 }
    563 
    564 /*
    565  * Destroy a previously allocated nv_alloc structure.  The fixed buffer
    566  * associated with nva must be freed by the caller.
    567  */
    568 void
    569 fm_nva_xdestroy(nv_alloc_t *nva)
    570 {
    571 	nv_alloc_fini(nva);
    572 	kmem_free(nva, sizeof (nv_alloc_t));
    573 }
    574 
    575 /*
    576  * Create a new nv list.  A pointer to a new nv list structure is returned
    577  * upon success or NULL is returned to indicate that the structure could
    578  * not be created.  The newly created nv list is created and managed by the
    579  * operations installed in nva.   If nva is NULL, the default FMA nva
    580  * operations are installed and used.
    581  *
    582  * When called from the kernel and nva == NULL, this function must be called
    583  * from passive kernel context with no locks held that can prevent a
    584  * sleeping memory allocation from occurring.  Otherwise, this function may
    585  * be called from other kernel contexts as long a valid nva created via
    586  * fm_nva_create() is supplied.
    587  */
    588 nvlist_t *
    589 fm_nvlist_create(nv_alloc_t *nva)
    590 {
    591 	int hdl_alloced = 0;
    592 	nvlist_t *nvl;
    593 	nv_alloc_t *nvhdl;
    594 
    595 	if (nva == NULL) {
    596 		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
    597 
    598 		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
    599 			kmem_free(nvhdl, sizeof (nv_alloc_t));
    600 			return (NULL);
    601 		}
    602 		hdl_alloced = 1;
    603 	} else {
    604 		nvhdl = nva;
    605 	}
    606 
    607 	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
    608 		if (hdl_alloced) {
    609 			kmem_free(nvhdl, sizeof (nv_alloc_t));
    610 			nv_alloc_fini(nvhdl);
    611 		}
    612 		return (NULL);
    613 	}
    614 
    615 	return (nvl);
    616 }
    617 
    618 /*
    619  * Destroy a previously allocated nvlist structure.  flag indicates whether
    620  * or not the associated nva structure should be freed (FM_NVA_FREE) or
    621  * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
    622  * it to be re-used for future nvlist creation operations.
    623  */
    624 void
    625 fm_nvlist_destroy(nvlist_t *nvl, int flag)
    626 {
    627 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
    628 
    629 	nvlist_free(nvl);
    630 
    631 	if (nva != NULL) {
    632 		if (flag == FM_NVA_FREE)
    633 			fm_nva_xdestroy(nva);
    634 	}
    635 }
    636 
    637 int
    638 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
    639 {
    640 	int nelem, ret = 0;
    641 	data_type_t type;
    642 
    643 	while (ret == 0 && name != NULL) {
    644 		type = va_arg(ap, data_type_t);
    645 		switch (type) {
    646 		case DATA_TYPE_BYTE:
    647 			ret = nvlist_add_byte(payload, name,
    648 			    va_arg(ap, uint_t));
    649 			break;
    650 		case DATA_TYPE_BYTE_ARRAY:
    651 			nelem = va_arg(ap, int);
    652 			ret = nvlist_add_byte_array(payload, name,
    653 			    va_arg(ap, uchar_t *), nelem);
    654 			break;
    655 		case DATA_TYPE_BOOLEAN_VALUE:
    656 			ret = nvlist_add_boolean_value(payload, name,
    657 			    va_arg(ap, boolean_t));
    658 			break;
    659 		case DATA_TYPE_BOOLEAN_ARRAY:
    660 			nelem = va_arg(ap, int);
    661 			ret = nvlist_add_boolean_array(payload, name,
    662 			    va_arg(ap, boolean_t *), nelem);
    663 			break;
    664 		case DATA_TYPE_INT8:
    665 			ret = nvlist_add_int8(payload, name,
    666 			    va_arg(ap, int));
    667 			break;
    668 		case DATA_TYPE_INT8_ARRAY:
    669 			nelem = va_arg(ap, int);
    670 			ret = nvlist_add_int8_array(payload, name,
    671 			    va_arg(ap, int8_t *), nelem);
    672 			break;
    673 		case DATA_TYPE_UINT8:
    674 			ret = nvlist_add_uint8(payload, name,
    675 			    va_arg(ap, uint_t));
    676 			break;
    677 		case DATA_TYPE_UINT8_ARRAY:
    678 			nelem = va_arg(ap, int);
    679 			ret = nvlist_add_uint8_array(payload, name,
    680 			    va_arg(ap, uint8_t *), nelem);
    681 			break;
    682 		case DATA_TYPE_INT16:
    683 			ret = nvlist_add_int16(payload, name,
    684 			    va_arg(ap, int));
    685 			break;
    686 		case DATA_TYPE_INT16_ARRAY:
    687 			nelem = va_arg(ap, int);
    688 			ret = nvlist_add_int16_array(payload, name,
    689 			    va_arg(ap, int16_t *), nelem);
    690 			break;
    691 		case DATA_TYPE_UINT16:
    692 			ret = nvlist_add_uint16(payload, name,
    693 			    va_arg(ap, uint_t));
    694 			break;
    695 		case DATA_TYPE_UINT16_ARRAY:
    696 			nelem = va_arg(ap, int);
    697 			ret = nvlist_add_uint16_array(payload, name,
    698 			    va_arg(ap, uint16_t *), nelem);
    699 			break;
    700 		case DATA_TYPE_INT32:
    701 			ret = nvlist_add_int32(payload, name,
    702 			    va_arg(ap, int32_t));
    703 			break;
    704 		case DATA_TYPE_INT32_ARRAY:
    705 			nelem = va_arg(ap, int);
    706 			ret = nvlist_add_int32_array(payload, name,
    707 			    va_arg(ap, int32_t *), nelem);
    708 			break;
    709 		case DATA_TYPE_UINT32:
    710 			ret = nvlist_add_uint32(payload, name,
    711 			    va_arg(ap, uint32_t));
    712 			break;
    713 		case DATA_TYPE_UINT32_ARRAY:
    714 			nelem = va_arg(ap, int);
    715 			ret = nvlist_add_uint32_array(payload, name,
    716 			    va_arg(ap, uint32_t *), nelem);
    717 			break;
    718 		case DATA_TYPE_INT64:
    719 			ret = nvlist_add_int64(payload, name,
    720 			    va_arg(ap, int64_t));
    721 			break;
    722 		case DATA_TYPE_INT64_ARRAY:
    723 			nelem = va_arg(ap, int);
    724 			ret = nvlist_add_int64_array(payload, name,
    725 			    va_arg(ap, int64_t *), nelem);
    726 			break;
    727 		case DATA_TYPE_UINT64:
    728 			ret = nvlist_add_uint64(payload, name,
    729 			    va_arg(ap, uint64_t));
    730 			break;
    731 		case DATA_TYPE_UINT64_ARRAY:
    732 			nelem = va_arg(ap, int);
    733 			ret = nvlist_add_uint64_array(payload, name,
    734 			    va_arg(ap, uint64_t *), nelem);
    735 			break;
    736 		case DATA_TYPE_STRING:
    737 			ret = nvlist_add_string(payload, name,
    738 			    va_arg(ap, char *));
    739 			break;
    740 		case DATA_TYPE_STRING_ARRAY:
    741 			nelem = va_arg(ap, int);
    742 			ret = nvlist_add_string_array(payload, name,
    743 			    va_arg(ap, char **), nelem);
    744 			break;
    745 		case DATA_TYPE_NVLIST:
    746 			ret = nvlist_add_nvlist(payload, name,
    747 			    va_arg(ap, nvlist_t *));
    748 			break;
    749 		case DATA_TYPE_NVLIST_ARRAY:
    750 			nelem = va_arg(ap, int);
    751 			ret = nvlist_add_nvlist_array(payload, name,
    752 			    va_arg(ap, nvlist_t **), nelem);
    753 			break;
    754 		default:
    755 			ret = EINVAL;
    756 		}
    757 
    758 		name = va_arg(ap, char *);
    759 	}
    760 	return (ret);
    761 }
    762 
    763 void
    764 fm_payload_set(nvlist_t *payload, ...)
    765 {
    766 	int ret;
    767 	const char *name;
    768 	va_list ap;
    769 
    770 	va_start(ap, payload);
    771 	name = va_arg(ap, char *);
    772 	ret = i_fm_payload_set(payload, name, ap);
    773 	va_end(ap);
    774 
    775 	if (ret)
    776 		atomic_add_64(
    777 		    &erpt_kstat_data.payload_set_failed.value.ui64, 1);
    778 }
    779 
    780 /*
    781  * Set-up and validate the members of an ereport event according to:
    782  *
    783  *	Member name		Type		Value
    784  *	====================================================
    785  *	class			string		ereport
    786  *	version			uint8_t		0
    787  *	ena			uint64_t	<ena>
    788  *	detector		nvlist_t	<detector>
    789  *	ereport-payload		nvlist_t	<var args>
    790  *
    791  */
    792 void
    793 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
    794     uint64_t ena, const nvlist_t *detector, ...)
    795 {
    796 	char ereport_class[FM_MAX_CLASS];
    797 	const char *name;
    798 	va_list ap;
    799 	int ret;
    800 
    801 	if (version != FM_EREPORT_VERS0) {
    802 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
    803 		return;
    804 	}
    805 
    806 	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
    807 	    FM_EREPORT_CLASS, erpt_class);
    808 	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
    809 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
    810 		return;
    811 	}
    812 
    813 	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
    814 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
    815 	}
    816 
    817 	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
    818 	    (nvlist_t *)detector) != 0) {
    819 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
    820 	}
    821 
    822 	va_start(ap, detector);
    823 	name = va_arg(ap, const char *);
    824 	ret = i_fm_payload_set(ereport, name, ap);
    825 	va_end(ap);
    826 
    827 	if (ret)
    828 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
    829 }
    830 
    831 /*
    832  * Set-up and validate the members of an hc fmri according to;
    833  *
    834  *	Member name		Type		Value
    835  *	===================================================
    836  *	version			uint8_t		0
    837  *	auth			nvlist_t	<auth>
    838  *	hc-name			string		<name>
    839  *	hc-id			string		<id>
    840  *
    841  * Note that auth and hc-id are optional members.
    842  */
    843 
    844 #define	HC_MAXPAIRS	20
    845 #define	HC_MAXNAMELEN	50
    846 
    847 static int
    848 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
    849 {
    850 	if (version != FM_HC_SCHEME_VERSION) {
    851 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    852 		return (0);
    853 	}
    854 
    855 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
    856 	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
    857 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    858 		return (0);
    859 	}
    860 
    861 	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
    862 	    (nvlist_t *)auth) != 0) {
    863 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    864 		return (0);
    865 	}
    866 
    867 	return (1);
    868 }
    869 
    870 void
    871 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
    872     nvlist_t *snvl, int npairs, ...)
    873 {
    874 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
    875 	nvlist_t *pairs[HC_MAXPAIRS];
    876 	va_list ap;
    877 	int i;
    878 
    879 	if (!fm_fmri_hc_set_common(fmri, version, auth))
    880 		return;
    881 
    882 	npairs = MIN(npairs, HC_MAXPAIRS);
    883 
    884 	va_start(ap, npairs);
    885 	for (i = 0; i < npairs; i++) {
    886 		const char *name = va_arg(ap, const char *);
    887 		uint32_t id = va_arg(ap, uint32_t);
    888 		char idstr[11];
    889 
    890 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
    891 
    892 		pairs[i] = fm_nvlist_create(nva);
    893 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
    894 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
    895 			atomic_add_64(
    896 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    897 		}
    898 	}
    899 	va_end(ap);
    900 
    901 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
    902 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    903 
    904 	for (i = 0; i < npairs; i++)
    905 		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
    906 
    907 	if (snvl != NULL) {
    908 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
    909 			atomic_add_64(
    910 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    911 		}
    912 	}
    913 }
    914 
    915 /*
    916  * Set-up and validate the members of an dev fmri according to:
    917  *
    918  *	Member name		Type		Value
    919  *	====================================================
    920  *	version			uint8_t		0
    921  *	auth			nvlist_t	<auth>
    922  *	devpath			string		<devpath>
    923  *	devid			string		<devid>
    924  *
    925  * Note that auth and devid are optional members.
    926  */
    927 void
    928 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
    929     const char *devpath, const char *devid)
    930 {
    931 	if (version != DEV_SCHEME_VERSION0) {
    932 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    933 		return;
    934 	}
    935 
    936 	if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) {
    937 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    938 		return;
    939 	}
    940 
    941 	if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME,
    942 	    FM_FMRI_SCHEME_DEV) != 0) {
    943 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    944 		return;
    945 	}
    946 
    947 	if (auth != NULL) {
    948 		if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
    949 		    (nvlist_t *)auth) != 0) {
    950 			atomic_add_64(
    951 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    952 		}
    953 	}
    954 
    955 	if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) {
    956 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    957 	}
    958 
    959 	if (devid != NULL)
    960 		if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0)
    961 			atomic_add_64(
    962 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
    963 }
    964 
    965 /*
    966  * Set-up and validate the members of an cpu fmri according to:
    967  *
    968  *	Member name		Type		Value
    969  *	====================================================
    970  *	version			uint8_t		0
    971  *	auth			nvlist_t	<auth>
    972  *	cpuid			uint32_t	<cpu_id>
    973  *	cpumask			uint8_t		<cpu_mask>
    974  *	serial			uint64_t	<serial_id>
    975  *
    976  * Note that auth, cpumask, serial are optional members.
    977  *
    978  */
    979 void
    980 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
    981     uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
    982 {
    983 	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
    984 
    985 	if (version < CPU_SCHEME_VERSION1) {
    986 		atomic_add_64(failedp, 1);
    987 		return;
    988 	}
    989 
    990 	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
    991 		atomic_add_64(failedp, 1);
    992 		return;
    993 	}
    994 
    995 	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
    996 	    FM_FMRI_SCHEME_CPU) != 0) {
    997 		atomic_add_64(failedp, 1);
    998 		return;
    999 	}
   1000 
   1001 	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
   1002 	    (nvlist_t *)auth) != 0)
   1003 		atomic_add_64(failedp, 1);
   1004 
   1005 	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
   1006 		atomic_add_64(failedp, 1);
   1007 
   1008 	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
   1009 	    *cpu_maskp) != 0)
   1010 		atomic_add_64(failedp, 1);
   1011 
   1012 	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
   1013 	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
   1014 			atomic_add_64(failedp, 1);
   1015 }
   1016 
   1017 /*
   1018  * Set-up and validate the members of a mem according to:
   1019  *
   1020  *	Member name		Type		Value
   1021  *	====================================================
   1022  *	version			uint8_t		0
   1023  *	auth			nvlist_t	<auth>		[optional]
   1024  *	unum			string		<unum>
   1025  *	serial			string		<serial>	[optional*]
   1026  *	offset			uint64_t	<offset>	[optional]
   1027  *
   1028  *	* serial is required if offset is present
   1029  */
   1030 void
   1031 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
   1032     const char *unum, const char *serial, uint64_t offset)
   1033 {
   1034 	if (version != MEM_SCHEME_VERSION0) {
   1035 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
   1036 		return;
   1037 	}
   1038 
   1039 	if (!serial && (offset != (uint64_t)-1)) {
   1040 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
   1041 		return;
   1042 	}
   1043 
   1044 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
   1045 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
   1046 		return;
   1047 	}
   1048 
   1049 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
   1050 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
   1051 		return;
   1052 	}
   1053 
   1054 	if (auth != NULL) {
   1055 		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
   1056 		    (nvlist_t *)auth) != 0) {
   1057 			atomic_add_64(
   1058 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
   1059 		}
   1060 	}
   1061 
   1062 	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
   1063 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
   1064 	}
   1065