Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/errno.h>
     30 #include <sys/stropts.h>
     31 #include <sys/debug.h>
     32 #include <sys/ddi.h>
     33 #include <sys/sunddi.h>
     34 #include <sys/vmem.h>
     35 #include <sys/cmn_err.h>
     36 #include <sys/callb.h>
     37 #include <sys/sysevent.h>
     38 #include <sys/sysevent_impl.h>
     39 #include <sys/modctl.h>
     40 #include <sys/sysmacros.h>
     41 #include <sys/disp.h>
     42 #include <sys/autoconf.h>
     43 #include <sys/atomic.h>
     44 #include <sys/sdt.h>
     45 
     46 /* for doors */
     47 #include <sys/pathname.h>
     48 #include <sys/door.h>
     49 #include <sys/kmem.h>
     50 #include <sys/cpuvar.h>
     51 #include <sys/fs/snode.h>
     52 
     53 /*
     54  * log_sysevent.c - Provides the interfaces for kernel event publication
     55  *			to the sysevent event daemon (syseventd).
     56  */
     57 
     58 /*
     59  * Debug stuff
     60  */
     61 static int log_event_debug = 0;
     62 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
     63 #ifdef DEBUG
     64 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
     65 #else
     66 #define	LOG_DEBUG1(args)
     67 #endif
     68 
     69 /*
     70  * Local static vars
     71  */
     72 /* queue of event buffers sent to syseventd */
     73 static log_eventq_t *log_eventq_sent = NULL;
     74 
     75 /*
     76  * Count of event buffers in the queue
     77  */
     78 int log_eventq_cnt = 0;
     79 
     80 /* queue of event buffers awaiting delivery to syseventd */
     81 static log_eventq_t *log_eventq_head = NULL;
     82 static log_eventq_t *log_eventq_tail = NULL;
     83 static uint64_t kernel_event_id = 0;
     84 static int encoding = NV_ENCODE_NATIVE;
     85 
     86 /* log event delivery flag */
     87 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
     88 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
     89 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
     90 
     91 /*
     92  * Tunable maximum event buffer queue size. Size depends on how many events
     93  * the queue must hold when syseventd is not available, for example during
     94  * system startup. Experience showed that more than 2000 events could be posted
     95  * due to correctable memory errors.
     96  */
     97 int logevent_max_q_sz = 5000;
     98 
     99 
    100 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
    101 static char *logevent_door_upcall_filename = NULL;
    102 static int logevent_door_upcall_filename_size;
    103 
    104 static door_handle_t event_door = NULL;		/* Door for upcalls */
    105 
    106 /*
    107  * async thread-related variables
    108  *
    109  * eventq_head_mutex - synchronizes access to the kernel event queue
    110  *
    111  * eventq_sent_mutex - synchronizes access to the queue of event sents to
    112  *			userlevel
    113  *
    114  * log_event_cv - condition variable signaled when an event has arrived or
    115  *			userlevel ready to process event buffers
    116  *
    117  * async_thread - asynchronous event delivery thread to userlevel daemon.
    118  *
    119  * sysevent_upcall_status - status of the door upcall link
    120  */
    121 static kmutex_t eventq_head_mutex;
    122 static kmutex_t eventq_sent_mutex;
    123 static kcondvar_t log_event_cv;
    124 static kthread_id_t async_thread = NULL;
    125 
    126 static kmutex_t event_qfull_mutex;
    127 static kcondvar_t event_qfull_cv;
    128 static int event_qfull_blocked = 0;
    129 
    130 static int sysevent_upcall_status = -1;
    131 static kmutex_t registered_channel_mutex;
    132 
    133 /*
    134  * Indicates the syseventd daemon has begun taking events
    135  */
    136 int sysevent_daemon_init = 0;
    137 
    138 /*
    139  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
    140  * caused by the server process doing a forkall().  Since all threads
    141  * but the thread actually doing the forkall() need to be quiesced,
    142  * the fork may take some time.  The min/max pause are in units
    143  * of clock ticks.
    144  */
    145 #define	LOG_EVENT_MIN_PAUSE	8
    146 #define	LOG_EVENT_MAX_PAUSE	128
    147 
    148 static kmutex_t	event_pause_mutex;
    149 static kcondvar_t event_pause_cv;
    150 static int event_pause_state = 0;
    151 
    152 /*
    153  * log_event_upcall_lookup - Establish door connection with user event
    154  *				daemon (syseventd)
    155  */
    156 static int
    157 log_event_upcall_lookup()
    158 {
    159 	int	error;
    160 
    161 	if (event_door) {	/* Release our previous hold (if any) */
    162 		door_ki_rele(event_door);
    163 	}
    164 
    165 	event_door = NULL;
    166 
    167 	/*
    168 	 * Locate the door used for upcalls
    169 	 */
    170 	if ((error =
    171 	    door_ki_open(logevent_door_upcall_filename, &event_door)) != 0) {
    172 		return (error);
    173 	}
    174 
    175 	return (0);
    176 }
    177 
    178 
    179 /*ARGSUSED*/
    180 static void
    181 log_event_busy_timeout(void *arg)
    182 {
    183 	mutex_enter(&event_pause_mutex);
    184 	event_pause_state = 0;
    185 	cv_signal(&event_pause_cv);
    186 	mutex_exit(&event_pause_mutex);
    187 }
    188 
    189 static void
    190 log_event_pause(int nticks)
    191 {
    192 	timeout_id_t id;
    193 
    194 	/*
    195 	 * Only one use of log_event_pause at a time
    196 	 */
    197 	ASSERT(event_pause_state == 0);
    198 
    199 	event_pause_state = 1;
    200 	id = timeout(log_event_busy_timeout, NULL, nticks);
    201 	if (id != 0) {
    202 		mutex_enter(&event_pause_mutex);
    203 		while (event_pause_state)
    204 			cv_wait(&event_pause_cv, &event_pause_mutex);
    205 		mutex_exit(&event_pause_mutex);
    206 	}
    207 	event_pause_state = 0;
    208 }
    209 
    210 
    211 /*
    212  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
    213  * 			Check for rebinding errors
    214  * 			This buffer is reused to by the syseventd door_return
    215  *			to hold the result code
    216  */
    217 static int
    218 log_event_upcall(log_event_upcall_arg_t *arg)
    219 {
    220 	int error;
    221 	size_t size;
    222 	sysevent_t *ev;
    223 	door_arg_t darg, save_arg;
    224 	int retry;
    225 	int neagain = 0;
    226 	int neintr = 0;
    227 	int nticks = LOG_EVENT_MIN_PAUSE;
    228 
    229 	/* Initialize door args */
    230 	ev = (sysevent_t *)&arg->buf;
    231 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
    232 
    233 	darg.rbuf = (char *)arg;
    234 	darg.data_ptr = (char *)arg;
    235 	darg.rsize = size;
    236 	darg.data_size = size;
    237 	darg.desc_ptr = NULL;
    238 	darg.desc_num = 0;
    239 
    240 	if ((event_door == NULL) &&
    241 	    ((error = log_event_upcall_lookup()) != 0)) {
    242 		LOG_DEBUG((CE_CONT,
    243 		    "log_event_upcall: event_door error (%d)\n", error));
    244 
    245 		return (error);
    246 	}
    247 
    248 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
    249 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
    250 
    251 	save_arg = darg;
    252 	for (retry = 0; ; retry++) {
    253 		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
    254 		    SIZE_MAX, 0)) == 0) {
    255 			break;
    256 		}
    257 		switch (error) {
    258 		case EINTR:
    259 			neintr++;
    260 			log_event_pause(2);
    261 			darg = save_arg;
    262 			break;
    263 		case EAGAIN:
    264 			/* cannot deliver upcall - process may be forking */
    265 			neagain++;
    266 			log_event_pause(nticks);
    267 			nticks <<= 1;
    268 			if (nticks > LOG_EVENT_MAX_PAUSE)
    269 				nticks = LOG_EVENT_MAX_PAUSE;
    270 			darg = save_arg;
    271 			break;
    272 		case EBADF:
    273 			LOG_DEBUG((CE_CONT, "log_event_upcall: rebinding\n"));
    274 			/* Server may have died. Try rebinding */
    275 			if ((error = log_event_upcall_lookup()) != 0) {
    276 				LOG_DEBUG((CE_CONT,
    277 				    "log_event_upcall: lookup error %d\n",
    278 				    error));
    279 				return (EBADF);
    280 			}
    281 			if (retry > 4) {
    282 				LOG_DEBUG((CE_CONT,
    283 					"log_event_upcall: ebadf\n"));
    284 				return (EBADF);
    285 			}
    286 			LOG_DEBUG((CE_CONT, "log_event_upcall: "
    287 				"retrying upcall after lookup\n"));
    288 			darg = save_arg;
    289 			break;
    290 		default:
    291 			cmn_err(CE_CONT,
    292 			    "log_event_upcall: door_ki_upcall error %d\n",
    293 			    error);
    294 			return (error);
    295 		}
    296 	}
    297 
    298 	if (neagain > 0 || neintr > 0) {
    299 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
    300 			neagain, neintr, nticks));
    301 	}
    302 
    303 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
    304 		"error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
    305 		error, (void *)arg, (void *)darg.rbuf,
    306 		(void *)darg.data_ptr,
    307 		*((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
    308 
    309 	if (!error) {
    310 		/*
    311 		 * upcall was successfully executed. Check return code.
    312 		 */
    313 		error = *((int *)(darg.rbuf));
    314 	}
    315 
    316 	return (error);
    317 }
    318 
    319 /*
    320  * log_event_deliver - event delivery thread
    321  *			Deliver all events on the event queue to syseventd.
    322  *			If the daemon can not process events, stop event
    323  *			delivery and wait for an indication from the
    324  *			daemon to resume delivery.
    325  *
    326  *			Once all event buffers have been delivered, wait
    327  *			until there are more to deliver.
    328  */
    329 static void
    330 log_event_deliver()
    331 {
    332 	log_eventq_t *q;
    333 	int upcall_err;
    334 	callb_cpr_t cprinfo;
    335 
    336 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
    337 				"logevent");
    338 
    339 	/*
    340 	 * eventq_head_mutex is exited (released) when there are no more
    341 	 * events to process from the eventq in cv_wait().
    342 	 */
    343 	mutex_enter(&eventq_head_mutex);
    344 
    345 	for (;;) {
    346 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
    347 		    (void *)log_eventq_head));
    348 
    349 		upcall_err = 0;
    350 		q = log_eventq_head;
    351 
    352 		while (q) {
    353 			log_eventq_t *next;
    354 
    355 			/*
    356 			 * Release event queue lock during upcall to
    357 			 * syseventd
    358 			 */
    359 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
    360 				upcall_err = EAGAIN;
    361 				break;
    362 			}
    363 
    364 			mutex_exit(&eventq_head_mutex);
    365 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
    366 				mutex_enter(&eventq_head_mutex);
    367 				break;
    368 			}
    369 
    370 			/*
    371 			 * We may be able to add entries to
    372 			 * the queue now.
    373 			 */
    374 			if (event_qfull_blocked > 0 &&
    375 			    log_eventq_cnt < logevent_max_q_sz) {
    376 				mutex_enter(&event_qfull_mutex);
    377 				if (event_qfull_blocked > 0) {
    378 					cv_signal(&event_qfull_cv);
    379 				}
    380 				mutex_exit(&event_qfull_mutex);
    381 			}
    382 
    383 			mutex_enter(&eventq_head_mutex);
    384 
    385 			/*
    386 			 * Daemon restart can cause entries to be moved from
    387 			 * the sent queue and put back on the event queue.
    388 			 * If this has occurred, replay event queue
    389 			 * processing from the new queue head.
    390 			 */
    391 			if (q != log_eventq_head) {
    392 				q = log_eventq_head;
    393 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
    394 				    "door upcall/daemon restart race\n"));
    395 			} else {
    396 				/*
    397 				 * Move the event to the sent queue when a
    398 				 * successful delivery has been made.
    399 				 */
    400 				mutex_enter(&eventq_sent_mutex);
    401 				next = q->next;
    402 				q->next = log_eventq_sent;
    403 				log_eventq_sent = q;
    404 				q = next;
    405 				log_eventq_head = q;
    406 				log_eventq_cnt--;
    407 				if (q == NULL) {
    408 					ASSERT(log_eventq_cnt == 0);
    409 					log_eventq_tail = NULL;
    410 				}
    411 				mutex_exit(&eventq_sent_mutex);
    412 			}
    413 		}
    414 
    415 		switch (upcall_err) {
    416 		case 0:
    417 			/*
    418 			 * Success. The queue is empty.
    419 			 */
    420 			sysevent_upcall_status = 0;
    421 			break;
    422 		case EAGAIN:
    423 			/*
    424 			 * Delivery is on hold (but functional).
    425 			 */
    426 			sysevent_upcall_status = 0;
    427 			/*
    428 			 * If the user has already signaled for delivery
    429 			 * resumption, continue.  Otherwise, we wait until
    430 			 * we are signaled to continue.
    431 			 */
    432 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT) {
    433 				log_event_delivery = LOGEVENT_DELIVERY_OK;
    434 				continue;
    435 			} else {
    436 				log_event_delivery = LOGEVENT_DELIVERY_HOLD;
    437 			}
    438 
    439 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
    440 			break;
    441 		default:
    442 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
    443 				"upcall err %d\n", upcall_err));
    444 			sysevent_upcall_status = upcall_err;
    445 			/*
    446 			 * Signal everyone waiting that transport is down
    447 			 */
    448 			if (event_qfull_blocked > 0) {
    449 				mutex_enter(&event_qfull_mutex);
    450 				if (event_qfull_blocked > 0) {
    451 					cv_broadcast(&event_qfull_cv);
    452 				}
    453 				mutex_exit(&event_qfull_mutex);
    454 			}
    455 			break;
    456 		}
    457 
    458 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
    459 		cv_wait(&log_event_cv, &eventq_head_mutex);
    460 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
    461 	}
    462 	/* NOTREACHED */
    463 }
    464 
    465 /*
    466  * log_event_init - Allocate and initialize log_event data structures.
    467  */
    468 void
    469 log_event_init()
    470 {
    471 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
    472 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
    473 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
    474 
    475 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
    476 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
    477 
    478 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
    479 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
    480 
    481 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
    482 	sysevent_evc_init();
    483 }
    484 
    485 /*
    486  * The following routines are used by kernel event publishers to
    487  * allocate, append and free event buffers
    488  */
    489 /*
    490  * sysevent_alloc - Allocate new eventq struct.  This element contains
    491  *			an event buffer that will be used in a subsequent
    492  *			call to log_sysevent.
    493  */
    494 sysevent_t *
    495 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
    496 {
    497 	int payload_sz;
    498 	int class_sz, subclass_sz, pub_sz;
    499 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
    500 	sysevent_t *ev;
    501 	log_eventq_t *q;
    502 
    503 	ASSERT(class != NULL);
    504 	ASSERT(subclass != NULL);
    505 	ASSERT(pub != NULL);
    506 
    507 	/*
    508 	 * Calculate and reserve space for the class, subclass and
    509 	 * publisher strings in the event buffer
    510 	 */
    511 	class_sz = strlen(class) + 1;
    512 	subclass_sz = strlen(subclass) + 1;
    513 	pub_sz = strlen(pub) + 1;
    514 
    515 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
    516 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
    517 
    518 	/* String sizes must be 64-bit aligned in the event buffer */
    519 	aligned_class_sz = SE_ALIGN(class_sz);
    520 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
    521 	aligned_pub_sz = SE_ALIGN(pub_sz);
    522 
    523 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
    524 		(aligned_subclass_sz - sizeof (uint64_t)) +
    525 		(aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
    526 
    527 	/*
    528 	 * Allocate event buffer plus additional sysevent queue
    529 	 * and payload overhead.
    530 	 */
    531 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
    532 	if (q == NULL) {
    533 		return (NULL);
    534 	}
    535 
    536 	/* Initialize the event buffer data */
    537 	ev = (sysevent_t *)&q->arg.buf;
    538 	SE_VERSION(ev) = SYS_EVENT_VERSION;
    539 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
    540 
    541 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
    542 		+ aligned_class_sz;
    543 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
    544 
    545 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
    546 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
    547 
    548 	SE_ATTR_PTR(ev) = UINT64_C(0);
    549 	SE_PAYLOAD_SZ(ev) = payload_sz;
    550 
    551 	return (ev);
    552 }
    553 
    554 /*
    555  * sysevent_free - Free event buffer and any attribute data.
    556  */
    557 void
    558 sysevent_free(sysevent_t *ev)
    559 {
    560 	log_eventq_t *q;
    561 	nvlist_t *nvl;
    562 
    563 	ASSERT(ev != NULL);
    564 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
    565 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
    566 
    567 	if (nvl != NULL) {
    568 		size_t size = 0;
    569 		(void) nvlist_size(nvl, &size, encoding);
    570 		SE_PAYLOAD_SZ(ev) -= size;
    571 		nvlist_free(nvl);
    572 	}
    573 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
    574 }
    575 
    576 /*
    577  * free_packed_event - Free packed event buffer
    578  */
    579 static void
    580 free_packed_event(sysevent_t *ev)
    581 {
    582 	log_eventq_t *q;
    583 
    584 	ASSERT(ev != NULL);
    585 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
    586 
    587 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
    588 }
    589 
    590 /*
    591  * sysevent_add_attr - Add new attribute element to an event attribute list
    592  *			If attribute list is NULL, start a new list.
    593  */
    594 int
    595 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
    596 	sysevent_value_t *se_value, int flag)
    597 {
    598 	int error;
    599 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
    600 
    601 	if (nvlp == NULL || se_value == NULL) {
    602 		return (SE_EINVAL);
    603 	}
    604 
    605 	/*
    606 	 * attr_sz is composed of the value data size + the name data size +
    607 	 * any header data.  64-bit aligned.
    608 	 */
    609 	if (strlen(name) >= MAX_ATTR_NAME) {
    610 		return (SE_EINVAL);
    611 	}
    612 
    613 	/*
    614 	 * Allocate nvlist
    615 	 */
    616 	if ((*nvlp == NULL) &&
    617 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
    618 		return (SE_ENOMEM);
    619 
    620 	/* add the attribute */
    621 	switch (se_value->value_type) {
    622 	case SE_DATA_TYPE_BYTE:
    623 		error = nvlist_add_byte(*ev_attr_list, name,
    624 		    se_value->value.sv_byte);
    625 		break;
    626 	case SE_DATA_TYPE_INT16:
    627 		error = nvlist_add_int16(*ev_attr_list, name,
    628 		    se_value->value.sv_int16);
    629 		break;
    630 	case SE_DATA_TYPE_UINT16:
    631 		error = nvlist_add_uint16(*ev_attr_list, name,
    632 		    se_value->value.sv_uint16);
    633 		break;
    634 	case SE_DATA_TYPE_INT32:
    635 		error = nvlist_add_int32(*ev_attr_list, name,
    636 		    se_value->value.sv_int32);
    637 		break;
    638 	case SE_DATA_TYPE_UINT32:
    639 		error = nvlist_add_uint32(*ev_attr_list, name,
    640 		    se_value->value.sv_uint32);
    641 		break;
    642 	case SE_DATA_TYPE_INT64:
    643 		error = nvlist_add_int64(*ev_attr_list, name,
    644 		    se_value->value.sv_int64);
    645 		break;
    646 	case SE_DATA_TYPE_UINT64:
    647 		error = nvlist_add_uint64(*ev_attr_list, name,
    648 		    se_value->value.sv_uint64);
    649 		break;
    650 	case SE_DATA_TYPE_STRING:
    651 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
    652 			return (SE_EINVAL);
    653 		error = nvlist_add_string(*ev_attr_list, name,
    654 		    se_value->value.sv_string);
    655 		break;
    656 	case SE_DATA_TYPE_BYTES:
    657 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
    658 			return (SE_EINVAL);
    659 		error = nvlist_add_byte_array(*ev_attr_list, name,
    660 		    se_value->value.sv_bytes.data,
    661 		    se_value->value.sv_bytes.size);
    662 		break;
    663 	case SE_DATA_TYPE_TIME:
    664 		error = nvlist_add_hrtime(*ev_attr_list, name,
    665 		    se_value->value.sv_time);
    666 		break;
    667 	default:
    668 		return (SE_EINVAL);
    669 	}
    670 
    671 	return (error ? SE_ENOMEM : 0);
    672 }
    673 
    674 /*
    675  * sysevent_free_attr - Free an attribute list not associated with an
    676  *			event buffer.
    677  */
    678 void
    679 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
    680 {
    681 	nvlist_free((nvlist_t *)ev_attr_list);
    682 }
    683 
    684 /*
    685  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
    686  *
    687  *	This data will be re-packed into contiguous memory when the event
    688  *	buffer is posted to log_sysevent.
    689  */
    690 int
    691 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
    692 {
    693 	size_t size = 0;
    694 
    695 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
    696 		return (SE_EINVAL);
    697 	}
    698 
    699 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
    700 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
    701 	SE_PAYLOAD_SZ(ev) += size;
    702 	SE_FLAG(ev) = 0;
    703 
    704 	return (0);
    705 }
    706 
    707 /*
    708  * sysevent_detach_attributes - Detach but don't free attribute list from the
    709  *				event buffer.
    710  */
    711 void
    712 sysevent_detach_attributes(sysevent_t *ev)
    713 {
    714 	size_t size = 0;
    715 	nvlist_t *nvl;
    716 
    717 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
    718 		return;
    719 	}
    720 
    721 	SE_ATTR_PTR(ev) = UINT64_C(0);
    722 	(void) nvlist_size(nvl, &size, encoding);
    723 	SE_PAYLOAD_SZ(ev) -= size;
    724 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
    725 }
    726 
    727 /*
    728  * sysevent_attr_name - Get name of attribute
    729  */
    730 char *
    731 sysevent_attr_name(sysevent_attr_t *attr)
    732 {
    733 	if (attr == NULL) {
    734 		return (NULL);
    735 	}
    736 
    737 	return (nvpair_name(attr));
    738 }
    739 
    740 /*
    741  * sysevent_attr_type - Get type of attribute
    742  */
    743 int
    744 sysevent_attr_type(sysevent_attr_t *attr)
    745 {
    746 	/*
    747 	 * The SE_DATA_TYPE_* are typedef'ed to be the
    748 	 * same value as DATA_TYPE_*
    749 	 */
    750 	return (nvpair_type((nvpair_t *)attr));
    751 }
    752 
    753 /*
    754  * Repack event buffer into contiguous memory
    755  */
    756 static sysevent_t *
    757 se_repack(sysevent_t *ev, int flag)
    758 {
    759 	size_t copy_len;
    760 	caddr_t attr;
    761 	size_t size;
    762 	uint64_t attr_offset;
    763 	sysevent_t *copy;
    764 	log_eventq_t *qcopy;
    765 	sysevent_attr_list_t *nvl;
    766 
    767 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
    768 	qcopy = kmem_zalloc(copy_len, flag);
    769 	if (qcopy == NULL) {
    770 		return (NULL);
    771 	}
    772 	copy = (sysevent_t *)&qcopy->arg.buf;
    773 
    774 	/*
    775 	 * Copy event header, class, subclass and publisher names
    776 	 * Set the attribute offset (in number of bytes) to contiguous
    777 	 * memory after the header.
    778 	 */
    779 
    780 	attr_offset = SE_ATTR_OFF(ev);
    781 
    782 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
    783 
    784 	bcopy(ev, copy, attr_offset);
    785 
    786 	/* Check if attribute list exists */
    787 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
    788 		return (copy);
    789 	}
    790 
    791 	/*
    792 	 * Copy attribute data to contiguous memory
    793 	 */
    794 	attr = (char *)copy + attr_offset;
    795 	(void) nvlist_size(nvl, &size, encoding);
    796 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
    797 		kmem_free(qcopy, copy_len);
    798 		return (NULL);
    799 	}
    800 	SE_ATTR_PTR(copy) = UINT64_C(0);
    801 	SE_FLAG(copy) = SE_PACKED_BUF;
    802 
    803 	return (copy);
    804 }
    805 
    806 /*
    807  * The sysevent registration provides a persistent and reliable database
    808  * for channel information for sysevent channel publishers and
    809  * subscribers.
    810  *
    811  * A channel is created and maintained by the kernel upon the first
    812  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
    813  * event subscription information is updated as publishers or subscribers
    814  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
    815  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
    816  *
    817  * For consistency, id's are assigned for every publisher or subscriber
    818  * bound to a particular channel.  The id's are used to constrain resources
    819  * and perform subscription lookup.
    820  *
    821  * Associated with each channel is a hashed list of the current subscriptions
    822  * based upon event class and subclasses.  A subscription contains a class name,
    823  * list of possible subclasses and an array of subscriber ids.  Subscriptions
    824  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
    825  *
    826  * Channels are closed once the last subscriber or publisher performs a
    827  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
    828  * channel are freed upon last close.
    829  *
    830  * Locking:
    831  *	Every operation to log_sysevent() is protected by a single lock,
    832  *	registered_channel_mutex.  It is expected that the granularity of
    833  *	a single lock is sufficient given the frequency that updates will
    834  *	occur.
    835  *
    836  *	If this locking strategy proves to be too contentious, a per-hash
    837  *	or per-channel locking strategy may be implemented.
    838  */
    839 
    840 
    841 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
    842 					% CHAN_HASH_SZ)
    843 
    844 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
    845 static int channel_cnt;
    846 static void remove_all_class(sysevent_channel_descriptor_t *chan,
    847 	uint32_t sub_id);
    848 
    849 static uint32_t
    850 hash_func(const char *s)
    851 {
    852 	uint32_t result = 0;
    853 	uint_t g;
    854 
    855 	while (*s != '\0') {
    856 		result <<= 4;
    857 		result += (uint32_t)*s++;
    858 		g = result & 0xf0000000;
    859 		if (g != 0) {
    860 			result ^= g >> 24;
    861 			result ^= g;
    862 		}
    863 	}
    864 
    865 	return (result);
    866 }
    867 
    868 static sysevent_channel_descriptor_t *
    869 get_channel(char *channel_name)
    870 {
    871 	int hash_index;
    872 	sysevent_channel_descriptor_t *chan_list;
    873 
    874 	if (channel_name == NULL)
    875 		return (NULL);
    876 
    877 	/* Find channel descriptor */
    878 	hash_index = CHANN_HASH(channel_name);
    879 	chan_list = registered_channels[hash_index];
    880 	while (chan_list != NULL) {
    881 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
    882 			break;
    883 		} else {
    884 			chan_list = chan_list->scd_next;
    885 		}
    886 	}
    887 
    888 	return (chan_list);
    889 }
    890 
    891 static class_lst_t *
    892 create_channel_registration(sysevent_channel_descriptor_t *chan,
    893     char *event_class, int index)
    894 {
    895 	size_t class_len;
    896 	class_lst_t *c_list;
    897 
    898 	class_len = strlen(event_class) + 1;
    899 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
    900 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
    901 	bcopy(event_class, c_list->cl_name, class_len);
    902 
    903 	c_list->cl_subclass_list =
    904 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
    905 	c_list->cl_subclass_list->sl_name =
    906 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
    907 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
    908 	    sizeof (EC_SUB_ALL));
    909 
    910 	c_list->cl_next = chan->scd_class_list_tbl[index];
    911 	chan->scd_class_list_tbl[index] = c_list;
    912 
    913 	return (c_list);
    914 }
    915 
    916 static void
    917 free_channel_registration(sysevent_channel_descriptor_t *chan)
    918 {
    919 	int i;
    920 	class_lst_t *clist, *next_clist;
    921 	subclass_lst_t *sclist, *next_sc;
    922 
    923 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
    924 
    925 		clist = chan->scd_class_list_tbl[i];
    926 		while (clist != NULL) {
    927 			sclist = clist->cl_subclass_list;
    928 			while (sclist != NULL) {
    929 				kmem_free(sclist->sl_name,
    930 				    strlen(sclist->sl_name) + 1);
    931 				next_sc = sclist->sl_next;
    932 				kmem_free(sclist, sizeof (subclass_lst_t));
    933 				sclist = next_sc;
    934 			}
    935 			kmem_free(clist->cl_name,
    936 			    strlen(clist->cl_name) + 1);
    937 			next_clist = clist->cl_next;
    938 			kmem_free(clist, sizeof (class_lst_t));
    939 			clist = next_clist;
    940 		}
    941 	}
    942 	chan->scd_class_list_tbl[0] = NULL;
    943 }
    944 
    945 static int
    946 open_channel(char *channel_name)
    947 {
    948 	int hash_index;
    949 	sysevent_channel_descriptor_t *chan, *chan_list;
    950 
    951 
    952 	if (channel_cnt > MAX_CHAN) {
    953 		return (-1);
    954 	}
    955 
    956 	/* Find channel descriptor */
    957 	hash_index = CHANN_HASH(channel_name);
    958 	chan_list = registered_channels[hash_index];
    959 	while (chan_list != NULL) {
    960 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
    961 			chan_list->scd_ref_cnt++;
    962 			kmem_free(channel_name, strlen(channel_name) + 1);
    963 			return (0);
    964 		} else {
    965 			chan_list = chan_list->scd_next;
    966 		}
    967 	}
    968 
    969 
    970 	/* New channel descriptor */
    971 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
    972 	chan->scd_channel_name = channel_name;
    973 
    974 	/*
    975 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
    976 	 * Subscriber id 0 is never allocated, but is used as a reserved id
    977 	 * by libsysevent
    978 	 */
    979 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
    980 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
    981 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
    982 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
    983 		return (-1);
    984 	}
    985 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
    986 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
    987 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
    988 		vmem_destroy(chan->scd_subscriber_cache);
    989 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
    990 		return (-1);
    991 	}
    992 
    993 	chan->scd_ref_cnt = 1;
    994 
    995 	(void) create_channel_registration(chan, EC_ALL, 0);
    996 
    997 	if (registered_channels[hash_index] != NULL)
    998 		chan->scd_next = registered_channels[hash_index];
    999 
   1000 	registered_channels[hash_index] = chan;
   1001 
   1002 	++channel_cnt;
   1003 
   1004 	return (0);
   1005 }
   1006 
   1007 static void
   1008 close_channel(char *channel_name)
   1009 {
   1010 	int hash_index;
   1011 	sysevent_channel_descriptor_t *chan, *prev_chan;
   1012 
   1013 	/* Find channel descriptor */
   1014 	hash_index = CHANN_HASH(channel_name);
   1015 	prev_chan = chan = registered_channels[hash_index];
   1016 
   1017 	while (chan != NULL) {
   1018 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
   1019 			break;
   1020 		} else {
   1021 			prev_chan = chan;
   1022 			chan = chan->scd_next;
   1023 		}
   1024 	}
   1025 
   1026 	if (chan == NULL)
   1027 		return;
   1028 
   1029 	chan->scd_ref_cnt--;
   1030 	if (chan->scd_ref_cnt > 0)
   1031 		return;
   1032 
   1033 	free_channel_registration(chan);
   1034 	vmem_destroy(chan->scd_subscriber_cache);
   1035 	vmem_destroy(chan->scd_publisher_cache);
   1036 	kmem_free(chan->scd_channel_name,
   1037 	    strlen(chan->scd_channel_name) + 1);
   1038 	if (registered_channels[hash_index] == chan)
   1039 		registered_channels[hash_index] = chan->scd_next;
   1040 	else
   1041 		prev_chan->scd_next = chan->scd_next;
   1042 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
   1043 	--channel_cnt;
   1044 }
   1045 
   1046 static id_t
   1047 bind_common(sysevent_channel_descriptor_t *chan, int type)
   1048 {
   1049 	id_t id;
   1050 
   1051 	if (type == SUBSCRIBER) {
   1052 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
   1053 		    VM_NOSLEEP | VM_NEXTFIT);
   1054 		if (id <= 0 || id > MAX_SUBSCRIBERS)
   1055 			return (0);
   1056 		chan->scd_subscriber_ids[id] = 1;
   1057 	} else {
   1058 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
   1059 		    VM_NOSLEEP | VM_NEXTFIT);
   1060 		if (id <= 0 || id > MAX_PUBLISHERS)
   1061 			return (0);
   1062 		chan->scd_publisher_ids[id] = 1;
   1063 	}
   1064 
   1065 	return (id);
   1066 }
   1067 
   1068 static int
   1069 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
   1070 {
   1071 	if (type == SUBSCRIBER) {
   1072 		if (id <= 0 || id > MAX_SUBSCRIBERS)
   1073 			return (0);
   1074 		if (chan->scd_subscriber_ids[id] == 0)
   1075 			return (0);
   1076 		(void) remove_all_class(chan, id);
   1077 		chan->scd_subscriber_ids[id] = 0;
   1078 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
   1079 	} else {
   1080 		if (id <= 0 || id > MAX_PUBLISHERS)
   1081 			return (0);
   1082 		if (chan->scd_publisher_ids[id] == 0)
   1083 			return (0);
   1084 		chan->scd_publisher_ids[id] = 0;
   1085 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
   1086 	}
   1087 
   1088 	return (1);
   1089 }
   1090 
   1091 static void
   1092 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
   1093 {
   1094 	if (unbind_common(chan, type, id))
   1095 		close_channel(chan->scd_channel_name);
   1096 }
   1097 
   1098 static subclass_lst_t *
   1099 find_subclass(class_lst_t *c_list, char *subclass)
   1100 {
   1101 	subclass_lst_t *sc_list;
   1102 
   1103 	if (c_list == NULL)
   1104 		return (NULL);
   1105 
   1106 	sc_list = c_list->cl_subclass_list;
   1107 
   1108 	while (sc_list != NULL) {
   1109 		if (strcmp(sc_list->sl_name, subclass) == 0) {
   1110 			return (sc_list);
   1111 		}
   1112 		sc_list = sc_list->sl_next;
   1113 	}
   1114 
   1115 	return (NULL);
   1116 }
   1117 
   1118 static void
   1119 insert_subclass(class_lst_t *c_list, char **subclass_names,
   1120 	int subclass_num, uint32_t sub_id)
   1121 {
   1122 	int i, subclass_sz;
   1123 	subclass_lst_t *sc_list;
   1124 
   1125 	for (i = 0; i < subclass_num; ++i) {
   1126 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
   1127 		    != NULL) {
   1128 			sc_list->sl_num[sub_id] = 1;
   1129 		} else {
   1130 
   1131 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
   1132 			    KM_SLEEP);
   1133 			subclass_sz = strlen(subclass_names[i]) + 1;
   1134 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
   1135 			bcopy(subclass_names[i], sc_list->sl_name,
   1136 			    subclass_sz);
   1137 
   1138 			sc_list->sl_num[sub_id] = 1;
   1139 
   1140 			sc_list->sl_next = c_list->cl_subclass_list;
   1141 			c_list->cl_subclass_list = sc_list;
   1142 		}
   1143 	}
   1144 }
   1145 
   1146 static class_lst_t *
   1147 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
   1148 {
   1149 	class_lst_t *c_list;
   1150 
   1151 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
   1152 	while (c_list != NULL) {
   1153 		if (strcmp(class_name, c_list->cl_name) == 0)
   1154 			break;
   1155 		c_list = c_list->cl_next;
   1156 	}
   1157 
   1158 	return (c_list);
   1159 }
   1160 
   1161 static void
   1162 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
   1163 {
   1164 	int i;
   1165 	class_lst_t *c_list;
   1166 	subclass_lst_t *sc_list;
   1167 
   1168 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
   1169 
   1170 		c_list = chan->scd_class_list_tbl[i];
   1171 		while (c_list != NULL) {
   1172 			sc_list = c_list->cl_subclass_list;
   1173 			while (sc_list != NULL) {
   1174 				sc_list->sl_num[sub_id] = 0;
   1175 				sc_list = sc_list->sl_next;
   1176 			}
   1177 			c_list = c_list->cl_next;
   1178 		}
   1179 	}
   1180 }
   1181 
   1182 static void
   1183 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
   1184 	char *class_name)
   1185 {
   1186 	class_lst_t *c_list;
   1187 	subclass_lst_t *sc_list;
   1188 
   1189 	if (strcmp(class_name, EC_ALL) == 0) {
   1190 		remove_all_class(chan, sub_id);
   1191 		return;
   1192 	}
   1193 
   1194 	if ((c_list = find_class(chan, class_name)) == NULL) {
   1195 		return;
   1196 	}
   1197 
   1198 	sc_list = c_list->cl_subclass_list;
   1199 	while (sc_list != NULL) {
   1200 		sc_list->sl_num[sub_id] = 0;
   1201 		sc_list = sc_list->sl_next;
   1202 	}
   1203 }
   1204 
   1205 static int
   1206 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
   1207 	char **event_subclass_lst, int subclass_num, uint32_t sub_id)
   1208 {
   1209 	class_lst_t *c_list;
   1210 
   1211 	if (strcmp(event_class, EC_ALL) == 0) {
   1212 		insert_subclass(chan->scd_class_list_tbl[0],
   1213 		    event_subclass_lst, 1, sub_id);
   1214 		return (0);
   1215 	}
   1216 
   1217 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
   1218 		return (-1);
   1219 
   1220 	/* New class, add to the registration cache */
   1221 	if ((c_list = find_class(chan, event_class)) == NULL) {
   1222 		c_list = create_channel_registration(chan, event_class,
   1223 		    CLASS_HASH(event_class));
   1224 	}
   1225 
   1226 	/* Update the subclass list */
   1227 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
   1228 
   1229 	return (0);
   1230 }
   1231 
   1232 static int
   1233 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
   1234 	char *nvlbuf, size_t nvlsize)
   1235 {
   1236 	uint_t num_elem;
   1237 	char *event_class;
   1238 	char **event_list;
   1239 	nvlist_t *nvl;
   1240 	nvpair_t *nvpair = NULL;
   1241 
   1242 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
   1243 		return (-1);
   1244 
   1245 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
   1246 		nvlist_free(nvl);
   1247 		return (-1);
   1248 	}
   1249 
   1250 	if ((event_class = nvpair_name(nvpair)) == NULL) {
   1251 		nvlist_free(nvl);
   1252 		return (-1);
   1253 	}
   1254 	if (nvpair_value_string_array(nvpair, &event_list,
   1255 	    &num_elem) != 0) {
   1256 		nvlist_free(nvl);
   1257 		return (-1);
   1258 	}
   1259 
   1260 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
   1261 		nvlist_free(nvl);
   1262 		return (-1);
   1263 	}
   1264 
   1265 	nvlist_free(nvl);
   1266 
   1267 	return (0);
   1268 }
   1269 
   1270 /*
   1271  * get_registration - Return the requested class hash chain
   1272  */
   1273 static int
   1274 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
   1275 	uint32_t *bufsz, uint32_t class_index)
   1276 {
   1277 	int num_classes = 0;
   1278 	char *nvlbuf = NULL;
   1279 	size_t nvlsize;
   1280 	nvlist_t *nvl;
   1281 	class_lst_t *clist;
   1282 	subclass_lst_t *sc_list;
   1283 
   1284 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
   1285 		return (EINVAL);
   1286 
   1287 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
   1288 		return (ENOENT);
   1289 	}
   1290 
   1291 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
   1292 		return (EFAULT);
   1293 	}
   1294 
   1295 	while (clist != NULL) {
   1296 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
   1297 		    != 0) {
   1298 			nvlist_free(nvl);
   1299 			return (EFAULT);
   1300 		}
   1301 
   1302 		sc_list = clist->cl_subclass_list;
   1303 		while (sc_list != NULL) {
   1304 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
   1305 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
   1306 				nvlist_free(nvl);
   1307 				return (EFAULT);
   1308 			}
   1309 			sc_list = sc_list->sl_next;
   1310 		}
   1311 		num_classes++;
   1312 		clist = clist->cl_next;
   1313 	}
   1314 
   1315 	if (num_classes == 0) {
   1316 		nvlist_free(nvl);
   1317 		return (ENOENT);
   1318 	}
   1319 
   1320 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
   1321 	    KM_SLEEP)
   1322 	    != 0) {
   1323 		nvlist_free(nvl);
   1324 		return (EFAULT);
   1325 	}
   1326 
   1327 	nvlist_free(nvl);
   1328 
   1329 	if (nvlsize > *bufsz) {
   1330 		kmem_free(nvlbuf, nvlsize);
   1331 		*bufsz = nvlsize;
   1332 		return (EAGAIN);
   1333 	}
   1334 
   1335 	bcopy(nvlbuf, databuf, nvlsize);
   1336 	kmem_free(nvlbuf, nvlsize);
   1337 
   1338 	return (0);
   1339 }
   1340 
   1341 /*
   1342  * log_sysevent_register - Register event subscriber for a particular
   1343  *		event channel.
   1344  */
   1345 int
   1346 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
   1347 {
   1348 	int error = 0;
   1349 	char *kchannel, *databuf = NULL;
   1350 	size_t bufsz;
   1351 	se_pubsub_t kdata;
   1352 	sysevent_channel_descriptor_t *chan;
   1353 
   1354 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
   1355 		return (EFAULT);
   1356 	}
   1357 	if (kdata.ps_channel_name_len == 0) {
   1358 		return (EINVAL);
   1359 	}
   1360 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
   1361 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
   1362 		kmem_free(kchannel, kdata.ps_channel_name_len);
   1363 		return (EFAULT);
   1364 	}
   1365 	bufsz = kdata.ps_buflen;
   1366 	if (bufsz > 0) {
   1367 		databuf = kmem_alloc(bufsz, KM_SLEEP);
   1368 		if (copyin(udatabuf, databuf, bufsz) == -1) {
   1369 			kmem_free(kchannel, kdata.ps_channel_name_len);
   1370 			kmem_free(databuf, bufsz);
   1371 			return (EFAULT);
   1372 		}
   1373 	}
   1374 
   1375 	mutex_enter(&registered_channel_mutex);
   1376 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
   1377 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
   1378 		chan = get_channel(kchannel);
   1379 		if (chan == NULL) {
   1380 			mutex_exit(&registered_channel_mutex);
   1381 			kmem_free(kchannel, kdata.ps_channel_name_len);
   1382 			if (bufsz > 0)
   1383 				kmem_free(databuf, bufsz);
   1384 			return (ENOENT);
   1385 		}
   1386 	}
   1387 
   1388 	switch (kdata.ps_op) {
   1389 	case SE_OPEN_REGISTRATION:
   1390 		if (open_channel(kchannel) != 0) {
   1391 			error = ENOMEM;
   1392 			if (bufsz > 0)
   1393 				kmem_free(databuf, bufsz);
   1394 			kmem_free(kchannel, kdata.ps_channel_name_len);
   1395 		}
   1396 
   1397 		mutex_exit(&registered_channel_mutex);
   1398 		return (error);
   1399 	case SE_CLOSE_REGISTRATION:
   1400 		close_channel(kchannel);
   1401 		break;
   1402 	case SE_BIND_REGISTRATION:
   1403 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
   1404 			error = EBUSY;
   1405 		break;
   1406 	case SE_UNBIND_REGISTRATION:
   1407 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
   1408 		break;
   1409 	case SE_REGISTER:
   1410 		if (bufsz == 0) {
   1411 			error = EINVAL;
   1412 			break;
   1413 		}
   1414 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
   1415 			error = EINVAL;
   1416 		break;
   1417 	case SE_UNREGISTER:
   1418 		if (bufsz == 0) {
   1419 			error = EINVAL;
   1420 			break;
   1421 		}
   1422 		remove_class(chan, kdata.ps_id, databuf);
   1423 		break;
   1424 	case SE_CLEANUP:
   1425 		/* Cleanup the indicated subscriber or publisher */
   1426 		release_id(chan, kdata.ps_type, kdata.ps_id);
   1427 		break;
   1428 	case SE_GET_REGISTRATION:
   1429 		error = get_registration(chan, databuf,
   1430 		    &kdata.ps_buflen, kdata.ps_id);
   1431 		break;
   1432 	default:
   1433 		error = ENOTSUP;
   1434 	}
   1435 
   1436 	mutex_exit(&registered_channel_mutex);
   1437 
   1438 	kmem_free(kchannel, kdata.ps_channel_name_len);
   1439 
   1440 	if (bufsz > 0) {
   1441 		if (copyout(databuf, udatabuf, bufsz) == -1)
   1442 			error = EFAULT;
   1443 		kmem_free(databuf, bufsz);
   1444 	}
   1445 
   1446 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
   1447 		return (EFAULT);
   1448 
   1449 	return (error);
   1450 }
   1451 
   1452 /*
   1453  * log_sysevent_copyout_data - Copyout event data to userland.
   1454  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
   1455  *			The buffer size is always sufficient.
   1456  */
   1457 int
   1458 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
   1459 {
   1460 	int error = ENOENT;
   1461 	log_eventq_t *q;
   1462 	sysevent_t *ev;
   1463 	sysevent_id_t eid_copy;
   1464 
   1465 	/*
   1466 	 * Copy eid
   1467 	 */
   1468 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
   1469 		return (EFAULT);
   1470 	}
   1471 
   1472 	mutex_enter(&eventq_sent_mutex);
   1473 	q = log_eventq_sent;
   1474 
   1475 	/*
   1476 	 * Search for event buffer on the sent queue with matching
   1477 	 * event identifier
   1478 	 */
   1479 	while (q) {
   1480 		ev = (sysevent_t *)&q->arg.buf;
   1481 
   1482 		if (SE_TIME(ev) != eid_copy.eid_ts ||
   1483 		    SE_SEQ(ev) != eid_copy.eid_seq) {
   1484 			q = q->next;
   1485 			continue;
   1486 		}
   1487 
   1488 		if (ubuflen < SE_SIZE(ev)) {
   1489 			error = EFAULT;
   1490 			break;
   1491 		}
   1492 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
   1493 			error = EFAULT;
   1494 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
   1495 			    "0x%" PRIx64 " from queue: EFAULT\n",
   1496 			    eid->eid_seq));
   1497 		} else {
   1498 			error = 0;
   1499 		}
   1500 		break;
   1501 	}
   1502 
   1503 	mutex_exit(&eventq_sent_mutex);
   1504 
   1505 	return (error);
   1506 }
   1507 
   1508 /*
   1509  * log_sysevent_free_data - Free kernel copy of the event buffer identified
   1510  *			by eid (must have already been sent).  Called from
   1511  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
   1512  */
   1513 int
   1514 log_sysevent_free_data(sysevent_id_t *eid)
   1515 {
   1516 	int error = ENOENT;
   1517 	sysevent_t *ev;
   1518 	log_eventq_t *q, *prev = NULL;
   1519 	sysevent_id_t eid_copy;
   1520 
   1521 	/*
   1522 	 * Copy eid
   1523 	 */
   1524 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
   1525 		return (EFAULT);
   1526 	}
   1527 
   1528 	mutex_enter(&eventq_sent_mutex);
   1529 	q = log_eventq_sent;
   1530 
   1531 	/*
   1532 	 * Look for the event to be freed on the sent queue.  Due to delayed
   1533 	 * processing of the event, it may not be on the sent queue yet.
   1534 	 * It is up to the user to retry the free operation to ensure that the
   1535 	 * event is properly freed.
   1536 	 */
   1537 	while (q) {
   1538 		ev = (sysevent_t *)&q->arg.buf;
   1539 
   1540 		if (SE_TIME(ev) != eid_copy.eid_ts ||
   1541 		    SE_SEQ(ev) != eid_copy.eid_seq) {
   1542 			prev = q;
   1543 			q = q->next;
   1544 			continue;
   1545 		}
   1546 		/*
   1547 		 * Take it out of log_eventq_sent and free it
   1548 		 */
   1549 		if (prev) {
   1550 			prev->next = q->next;
   1551 		} else {
   1552 			log_eventq_sent = q->next;
   1553 		}
   1554 		free_packed_event(ev);
   1555 		error = 0;
   1556 		break;
   1557 	}
   1558 
   1559 	mutex_exit(&eventq_sent_mutex);
   1560 
   1561 	return (error);
   1562 }
   1563 
   1564 /*
   1565  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
   1566  *			create log_event_deliver thread or wake it up
   1567  */
   1568 /*ARGSUSED*/
   1569 void
   1570 log_sysevent_flushq(int cmd, uint_t flag)
   1571 {
   1572 	mutex_enter(&eventq_head_mutex);
   1573 
   1574 	/*
   1575 	 * Start the event delivery thread
   1576 	 * Mark the upcall status as active since we should
   1577 	 * now be able to begin emptying the queue normally.
   1578 	 */
   1579 	if (!async_thread) {
   1580 		sysevent_upcall_status = 0;
   1581 		sysevent_daemon_init = 1;
   1582 		setup_ddi_poststartup();
   1583 		async_thread = thread_create(NULL, 0, log_event_deliver,
   1584 		    NULL, 0, &p0, TS_RUN, minclsyspri);
   1585 	}
   1586 
   1587 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
   1588 	cv_signal(&log_event_cv);
   1589 	mutex_exit(&eventq_head_mutex);
   1590 }
   1591 
   1592 /*
   1593  * log_sysevent_filename - Called by syseventd via
   1594  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
   1595  *			to subsequently bind the event_door.
   1596  *
   1597  *			This routine is called everytime syseventd (re)starts
   1598  *			and must therefore replay any events buffers that have
   1599  *			been sent but not freed.
   1600  *
   1601  *			Event buffer delivery begins after a call to
   1602  *			log_sysevent_flushq().
   1603  */
   1604 int
   1605 log_sysevent_filename(char *file)
   1606 {
   1607 	/*
   1608 	 * Called serially by syseventd init code, no need to protect door
   1609 	 * data.
   1610 	 */
   1611 	/* Unbind old event door */
   1612 	if (logevent_door_upcall_filename) {
   1613 		kmem_free(logevent_door_upcall_filename,
   1614 			logevent_door_upcall_filename_size);
   1615 		if (event_door) {
   1616 			door_ki_rele(event_door);
   1617 			event_door = NULL;
   1618 		}
   1619 	}
   1620 	logevent_door_upcall_filename_size = strlen(file) + 1;
   1621 	logevent_door_upcall_filename = kmem_alloc(
   1622 		logevent_door_upcall_filename_size, KM_SLEEP);
   1623 	(void) strcpy(logevent_door_upcall_filename, file);
   1624 
   1625 	/*
   1626 	 * We are called when syseventd restarts. Move all sent, but
   1627 	 * not committed events from log_eventq_sent to log_eventq_head.
   1628 	 * Do it in proper order to maintain increasing event id.
   1629 	 */
   1630 	mutex_enter(&eventq_head_mutex);
   1631 
   1632 	mutex_enter(&eventq_sent_mutex);
   1633 	while (log_eventq_sent) {
   1634 		log_eventq_t *tmp = log_eventq_sent->next;
   1635 		log_eventq_sent->next = log_eventq_head;
   1636 		if (log_eventq_head == NULL) {
   1637 			ASSERT(log_eventq_cnt == 0);
   1638 			log_eventq_tail = log_eventq_sent;
   1639 			log_eventq_tail->next = NULL;
   1640 		} else if (log_eventq_head == log_eventq_tail) {
   1641 			ASSERT(log_eventq_cnt == 1);
   1642 			ASSERT(log_eventq_head->next == NULL);
   1643 			ASSERT(log_eventq_tail->next == NULL);
   1644 		}
   1645 		log_eventq_head = log_eventq_sent;
   1646 		log_eventq_sent = tmp;
   1647 		log_eventq_cnt++;
   1648 	}
   1649 	mutex_exit(&eventq_sent_mutex);
   1650 	mutex_exit(&eventq_head_mutex);
   1651 
   1652 	return (0);
   1653 }
   1654 
   1655 /*
   1656  * queue_sysevent - queue an event buffer
   1657  */
   1658 static int
   1659 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
   1660 {
   1661 	log_eventq_t *q;
   1662 
   1663 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
   1664 
   1665 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
   1666 
   1667 restart:
   1668 
   1669 	/* Max Q size exceeded */
   1670 	mutex_enter(&event_qfull_mutex);
   1671 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
   1672 		/*
   1673 		 * If queue full and transport down, return no transport
   1674 		 */
   1675 		if (sysevent_upcall_status != 0) {
   1676 			mutex_exit(&event_qfull_mutex);
   1677 			free_packed_event(ev);
   1678 			eid->eid_seq = UINT64_C(0);
   1679 			eid->eid_ts = INT64_C(0);
   1680 			return (SE_NO_TRANSPORT);
   1681 		}
   1682 		if (flag == SE_NOSLEEP) {
   1683 			mutex_exit(&event_qfull_mutex);
   1684 			free_packed_event(ev);
   1685 			eid->eid_seq = UINT64_C(0);
   1686 			eid->eid_ts = INT64_C(0);
   1687 			return (SE_EQSIZE);
   1688 		}
   1689 		event_qfull_blocked++;
   1690 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
   1691 		event_qfull_blocked--;
   1692 		mutex_exit(&event_qfull_mutex);
   1693 		goto restart;
   1694 	}
   1695 	mutex_exit(&event_qfull_mutex);
   1696 
   1697 	mutex_enter(&eventq_head_mutex);
   1698 
   1699 	/* Time stamp and assign ID */
   1700 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
   1701 		(uint64_t)1);
   1702 	SE_TIME(ev) = eid->eid_ts = gethrtime();
   1703 
   1704 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
   1705 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
   1706 
   1707 	/*
   1708 	 * Put event on eventq
   1709 	 */
   1710 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
   1711 	q->next = NULL;
   1712 	if (log_eventq_head == NULL) {
   1713 		ASSERT(log_eventq_cnt == 0);
   1714 		log_eventq_head = q;
   1715 		log_eventq_tail = q;
   1716 	} else {
   1717 		if (log_eventq_head == log_eventq_tail) {
   1718 			ASSERT(log_eventq_cnt == 1);
   1719 			ASSERT(log_eventq_head->next == NULL);
   1720 			ASSERT(log_eventq_tail->next == NULL);
   1721 		}
   1722 		log_eventq_tail->next = q;
   1723 		log_eventq_tail = q;
   1724 	}
   1725 	log_eventq_cnt++;
   1726 
   1727 	/* Signal event delivery thread */
   1728 	if (log_eventq_cnt == 1) {
   1729 		cv_signal(&log_event_cv);
   1730 	}
   1731 	mutex_exit(&eventq_head_mutex);
   1732 
   1733 	return (0);
   1734 }
   1735 
   1736 /*
   1737  * log_sysevent - kernel system event logger.
   1738  *
   1739  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
   1740  * maximum event queue size will be exceeded
   1741  * Returns 0 for successfully queued event buffer
   1742  */
   1743 int
   1744 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
   1745 {
   1746 	sysevent_t *ev_copy;
   1747 	int rval;
   1748 
   1749 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
   1750 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
   1751 
   1752 	ev_copy = se_repack(ev, flag);
   1753 	if (ev_copy == NULL) {
   1754 		ASSERT(flag == SE_NOSLEEP);
   1755 		return (SE_ENOMEM);
   1756 	}
   1757 	rval = queue_sysevent(ev_copy, eid, flag);
   1758 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
   1759 		rval == SE_NO_TRANSPORT);
   1760 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
   1761 	return (rval);
   1762 }
   1763 
   1764 /*
   1765  * log_usr_sysevent - user system event logger
   1766  *			Private to devfsadm and accessible only via
   1767  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
   1768  */
   1769 int
   1770 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
   1771 {
   1772 	int ret, copy_sz;
   1773 	sysevent_t *ev_copy;
   1774 	sysevent_id_t new_eid;
   1775 	log_eventq_t *qcopy;
   1776 
   1777 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
   1778 		offsetof(log_event_upcall_arg_t, buf);
   1779 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
   1780 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
   1781 
   1782 	/*
   1783 	 * Copy event
   1784 	 */
   1785 	if (copyin(ev, ev_copy, ev_size) == -1) {
   1786 		kmem_free(qcopy, copy_sz);
   1787 		return (EFAULT);
   1788 	}
   1789 
   1790 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
   1791 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
   1792 			return (EAGAIN);
   1793 		else
   1794 			return (EIO);
   1795 	}
   1796 
   1797 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
   1798 		return (EFAULT);
   1799 	}
   1800 
   1801 	return (0);
   1802 }
   1803 
   1804 
   1805 
   1806 int
   1807 ddi_log_sysevent(
   1808 	dev_info_t		*dip,
   1809 	char			*vendor,
   1810 	char			*class,
   1811 	char			*subclass,
   1812 	nvlist_t		*attr_list,
   1813 	sysevent_id_t		*eidp,
   1814 	int			sleep_flag)
   1815 {
   1816 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
   1817 	char			pubstr[32];
   1818 	sysevent_t		*event;
   1819 	sysevent_id_t		eid;
   1820 	const char		*drvname;
   1821 	char			*publisher;
   1822 	int			se_flag;
   1823 	int			rval;
   1824 	int			n;
   1825 
   1826 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
   1827 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
   1828 			"event from interrupt context with sleep semantics\n",
   1829 			ddi_driver_name(dip), ddi_get_instance(dip));
   1830 		return (DDI_ECONTEXT);
   1831 	}
   1832 
   1833 	drvname = ddi_driver_name(dip);
   1834 	n = strlen(vendor) + strlen(drvname) + 7;
   1835 	if (n < sizeof (pubstr)) {
   1836 		publisher = pubstr;
   1837 	} else {
   1838 		publisher = kmem_alloc(n,
   1839 			(sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
   1840 		if (publisher == NULL) {
   1841 			return (DDI_ENOMEM);
   1842 		}
   1843 	}
   1844 	(void) strcpy(publisher, vendor);
   1845 	(void) strcat(publisher, ":kern:");
   1846 	(void) strcat(publisher, drvname);
   1847 
   1848 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
   1849 	event = sysevent_alloc(class, subclass, publisher, se_flag);
   1850 
   1851 	if (publisher != pubstr) {
   1852 		kmem_free(publisher, n);
   1853 	}
   1854 
   1855 	if (event == NULL) {
   1856 		return (DDI_ENOMEM);
   1857 	}
   1858 
   1859 	if (list) {
   1860 		(void) sysevent_attach_attributes(event, list);
   1861 	}
   1862 
   1863 	rval = log_sysevent(event, se_flag, &eid);
   1864 	if (list) {
   1865 		sysevent_detach_attributes(event);
   1866 	}
   1867 	sysevent_free(event);
   1868 	if (rval == 0) {
   1869 		if (eidp) {
   1870 			eidp->eid_seq = eid.eid_seq;
   1871 			eidp->eid_ts = eid.eid_ts;
   1872 		}
   1873 		return (DDI_SUCCESS);
   1874 	}
   1875 	if (rval == SE_NO_TRANSPORT)
   1876 		return (DDI_ETRANSPORT);
   1877 
   1878 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
   1879 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
   1880 }
   1881 
   1882 uint64_t
   1883 log_sysevent_new_id()
   1884 {
   1885 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
   1886 }
   1887