Home | History | Annotate | Download | only in disk-monitor
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/sysevent/dr.h>
     29 #include <sys/sysevent/eventdefs.h>
     30 #include <sys/sunddi.h>	/* for the EC's for DEVFS */
     31 
     32 #include <errno.h>
     33 #include <string.h>
     34 #include <strings.h>
     35 #include <stdio.h>
     36 #include <unistd.h>
     37 #include <time.h>
     38 #include <pthread.h>
     39 
     40 #include <libsysevent.h>
     41 #include <sys/sysevent_impl.h>
     42 
     43 #include <libnvpair.h>
     44 #include <config_admin.h>
     45 
     46 #include "disk_monitor.h"
     47 #include "hotplug_mgr.h"
     48 #include "schg_mgr.h"
     49 #include "dm_platform.h"
     50 
     51 typedef struct sysevent_event {
     52 	sysevent_t	*evp;
     53 } sysevent_event_t;
     54 
     55 /* Lock guarantees the ordering of the incoming sysevents */
     56 static pthread_t g_sysev_tid;
     57 static pthread_mutex_t g_event_handler_lock = PTHREAD_MUTEX_INITIALIZER;
     58 static pthread_cond_t g_event_handler_cond = PTHREAD_COND_INITIALIZER;
     59 static qu_t *g_sysev_queue = NULL;
     60 static thread_state_t g_sysev_thread_state = TS_NOT_RUNNING;
     61 /*
     62  * The sysevent handle is bound to the main sysevent handler
     63  * (event_handler), for each of the hotplug sysevents.
     64  */
     65 static sysevent_handle_t *sysevent_handle = NULL;
     66 
     67 static void free_sysevent_event(void *p);
     68 
     69 static int
     70 nsleep(int seconds)
     71 {
     72 	struct timespec tspec;
     73 
     74 	tspec.tv_sec = seconds;
     75 	tspec.tv_nsec = 0;
     76 
     77 	return (nanosleep(&tspec, NULL));
     78 }
     79 
     80 static int
     81 config_list_ext_poll(int num, char * const *path,
     82     cfga_list_data_t **list_array, int *nlist, int flag)
     83 {
     84 	boolean_t done = B_FALSE;
     85 	boolean_t timedout = B_FALSE;
     86 	boolean_t interrupted = B_FALSE;
     87 	int timeout = 0;
     88 	int e;
     89 #define	TIMEOUT_MAX 60
     90 
     91 	do {
     92 		switch ((e = config_list_ext(num, path, list_array,
     93 		    nlist, NULL, NULL, NULL, flag))) {
     94 
     95 		case CFGA_OK:
     96 
     97 			return (CFGA_OK);
     98 
     99 		case CFGA_BUSY:
    100 		case CFGA_SYSTEM_BUSY:
    101 
    102 			if (timeout++ >= TIMEOUT_MAX)
    103 				timedout = B_TRUE;
    104 			else {
    105 				if (nsleep(1) < 0)
    106 					interrupted = (errno == EINTR);
    107 			}
    108 			break;
    109 
    110 		default:
    111 			done = B_TRUE;
    112 			break;
    113 
    114 		}
    115 	} while (!done && !timedout && !interrupted);
    116 
    117 	return (e);
    118 }
    119 
    120 /*
    121  * Given a physical attachment point with a dynamic component
    122  * (as in the case of SCSI APs), ensure the 'controller'
    123  * portion of the dynamic component matches the physical portion.
    124  * Argument 'adjusted' must point to a buffer of at least
    125  * MAXPATHLEN bytes.
    126  */
    127 void
    128 adjust_dynamic_ap(const char *apid, char *adjusted)
    129 {
    130 	cfga_list_data_t *list_array = NULL;
    131 	int nlist;
    132 	char *ap_path[1];
    133 	char phys[MAXPATHLEN];
    134 	char dev_phys[MAXPATHLEN];
    135 	char *dyn;
    136 	int c, t, d;
    137 
    138 	dm_assert((strlen(apid) + 8 /* strlen("/devices") */) < MAXPATHLEN);
    139 
    140 	/* In the case of any error, return the unadjusted APID */
    141 	(void) strcpy(adjusted, apid);
    142 
    143 	/* if AP is not dynamic or not a disk node, no need to adjust it */
    144 	dyn = strstr(apid, "::");
    145 	if ((dyn == NULL) || (dyn == apid) ||
    146 	    (sscanf(dyn, "::dsk/c%dt%dd%d", &c, &t, &d) != 3))
    147 		return;
    148 
    149 	/*
    150 	 * Copy the AP_ID and terminate it at the '::' that we know
    151 	 * for a fact it contains.  Pre-pend '/devices' for the sake
    152 	 * of cfgadm_scsi, and get the cfgadm data for the controller.
    153 	 */
    154 	(void) strcpy(phys, apid);
    155 	*strstr(phys, "::") = '\0';
    156 	(void) snprintf(dev_phys, MAXPATHLEN, "/devices%s", phys);
    157 	ap_path[0] = dev_phys;
    158 
    159 	if (config_list_ext_poll(1, ap_path, &list_array, &nlist, 0)
    160 	    != CFGA_OK)
    161 		return;
    162 
    163 	dm_assert(nlist == 1);
    164 
    165 	if (sscanf(list_array[0].ap_log_id, "c%d", &c) == 1)
    166 		(void) snprintf(adjusted, MAXPATHLEN, "%s::dsk/c%dt%dd%d",
    167 		    phys, c, t, d);
    168 
    169 	free(list_array);
    170 }
    171 
    172 static int
    173 disk_ap_is_scsi(const char *ap_path)
    174 {
    175 	return (strstr(ap_path, ":scsi:") != NULL);
    176 }
    177 
    178 /*
    179  * Looks up the attachment point's state and returns it in one of
    180  * the hotplug states that the state change manager understands.
    181  */
    182 hotplug_state_t
    183 disk_ap_state_to_hotplug_state(diskmon_t *diskp)
    184 {
    185 	hotplug_state_t state = HPS_UNKNOWN;
    186 	cfga_list_data_t *list_array = NULL;
    187 	int rv, nlist;
    188 	char *app = (char *)dm_prop_lookup(diskp->app_props,
    189 	    DISK_AP_PROP_APID);
    190 	char adj_app[MAXPATHLEN];
    191 	char *ap_path[1];
    192 	char *devices_app;
    193 	int len;
    194 	boolean_t list_valid = B_FALSE;
    195 
    196 	dm_assert(app != NULL);
    197 
    198 	adjust_dynamic_ap(app, adj_app);
    199 	ap_path[0] = adj_app;
    200 	devices_app = NULL;
    201 
    202 	rv = config_list_ext_poll(1, ap_path, &list_array, &nlist,
    203 	    CFGA_FLAG_LIST_ALL);
    204 
    205 	if (rv != CFGA_OK) {
    206 		/*
    207 		 * The SATA and SCSI libcfgadm plugins add a
    208 		 * /devices to the phys id; to use it, we must
    209 		 * prepend this string before the call.
    210 		 */
    211 		len = 8 /* strlen("/devices") */ + strlen(adj_app) + 1;
    212 		devices_app = dmalloc(len);
    213 		(void) snprintf(devices_app, len, "/devices%s",
    214 		    adj_app);
    215 		ap_path[0] = devices_app;
    216 
    217 		rv = config_list_ext_poll(1, ap_path, &list_array, &nlist,
    218 		    CFGA_FLAG_LIST_ALL);
    219 	}
    220 
    221 	/*
    222 	 * cfgadm_scsi will return an error for an absent target,
    223 	 * so treat an error as "absent"; otherwise, make sure
    224 	 * cfgadm_xxx has returned a list of 1 item
    225 	 */
    226 	if (rv == CFGA_OK) {
    227 		dm_assert(nlist == 1);
    228 		list_valid = B_TRUE;
    229 	} else if (disk_ap_is_scsi(ap_path[0]))
    230 		state = HPS_ABSENT;
    231 
    232 	if (devices_app != NULL)
    233 		dfree(devices_app, len);
    234 
    235 	if (list_valid) {
    236 		/*
    237 		 * The following truth table defines how each state is
    238 		 * computed:
    239 		 *
    240 		 * +----------------------------------------------+
    241 		 * |		  | o_state | r_state | condition |
    242 		 * |		  +---------+---------+-----------|
    243 		 * | Absent	  |Don'tCare|Disc/Empt|	Don'tCare |
    244 		 * | Present	  |Unconfgrd|Connected|	 unknown  |
    245 		 * | Configured	  |Configred|Connected|	Don'tCare |
    246 		 * | Unconfigured |Unconfgrd|Connected|	   OK	  |
    247 		 * +--------------+---------+---------+-----------+
    248 		 */
    249 
    250 		if (list_array[0].ap_r_state == CFGA_STAT_EMPTY ||
    251 		    list_array[0].ap_r_state == CFGA_STAT_DISCONNECTED)
    252 			state = HPS_ABSENT;
    253 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
    254 		    list_array[0].ap_o_state == CFGA_STAT_UNCONFIGURED &&
    255 		    list_array[0].ap_cond == CFGA_COND_UNKNOWN)
    256 			state = HPS_PRESENT;
    257 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
    258 		    list_array[0].ap_o_state == CFGA_STAT_UNCONFIGURED &&
    259 		    list_array[0].ap_cond != CFGA_COND_UNKNOWN)
    260 			state = HPS_UNCONFIGURED;
    261 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
    262 		    list_array[0].ap_o_state == CFGA_STAT_CONFIGURED)
    263 			state = HPS_CONFIGURED;
    264 
    265 		free(list_array);
    266 	}
    267 
    268 	return (state);
    269 }
    270 
    271 /*
    272  * Examine the sysevent passed in and returns the hotplug state that
    273  * the sysevent states (or implies, in the case of attachment point
    274  * events).
    275  */
    276 static hotplug_state_t
    277 disk_sysev_to_state(diskmon_t *diskp, sysevent_t *evp)
    278 {
    279 	const char *class_name, *subclass;
    280 	hotplug_state_t state = HPS_UNKNOWN;
    281 	sysevent_value_t se_val;
    282 
    283 	/*
    284 	 * The state mapping is as follows:
    285 	 *
    286 	 * Sysevent				State
    287 	 * --------------------------------------------------------
    288 	 * EC_DEVFS/ESC_DEVFS_DEVI_ADD		Configured
    289 	 * EC_DEVFS/ESC_DEVFS_DEVI_REMOVE	Unconfigured
    290 	 * EC_DR/ESC_DR_AP_STATE_CHANGE		*[Absent/Present]
    291 	 *
    292 	 * (The EC_DR event requires a probe of the attachment point
    293 	 * to determine the AP's state if there is no usable HINT)
    294 	 *
    295 	 */
    296 
    297 	class_name = sysevent_get_class_name(evp);
    298 	subclass = sysevent_get_subclass_name(evp);
    299 
    300 	if (strcmp(class_name, EC_DEVFS) == 0) {
    301 		if (strcmp(subclass, ESC_DEVFS_DEVI_ADD) == 0) {
    302 
    303 			state = HPS_CONFIGURED;
    304 
    305 		} else if (strcmp(subclass, ESC_DEVFS_DEVI_REMOVE) == 0) {
    306 
    307 			state = HPS_UNCONFIGURED;
    308 
    309 		}
    310 
    311 	} else if (strcmp(class_name, EC_DR) == 0 &&
    312 	    ((strcmp(subclass, ESC_DR_AP_STATE_CHANGE) == 0) ||
    313 	    (strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) == 0))) {
    314 
    315 		if (sysevent_lookup_attr(evp, DR_HINT, SE_DATA_TYPE_STRING,
    316 		    &se_val) == 0 && se_val.value.sv_string != NULL) {
    317 
    318 			if (strcmp(se_val.value.sv_string, DR_HINT_INSERT)
    319 			    == 0) {
    320 
    321 				state = HPS_PRESENT;
    322 
    323 			} else if (strcmp(se_val.value.sv_string,
    324 			    DR_HINT_REMOVE) == 0) {
    325 
    326 				state = HPS_ABSENT;
    327 			}
    328 
    329 		}
    330 
    331 		/*
    332 		 * If the state could not be determined by the hint
    333 		 * (or there was no hint), ask the AP directly.
    334 		 * SCSI HBAs may send an insertion sysevent
    335 		 * *after* configuring the target node, so double-
    336 		 * check HPS_PRESENT
    337 		 */
    338 		if ((state == HPS_UNKNOWN) || (state = HPS_PRESENT))
    339 			state = disk_ap_state_to_hotplug_state(diskp);
    340 	}
    341 
    342 	return (state);
    343 }
    344 
    345 static void
    346 disk_split_ap_path_sata(const char *ap_path, char *device, int *target)
    347 {
    348 	char *p;
    349 	int n;
    350 
    351 	/*
    352 	 *  /devices/rootnode/.../device:target
    353 	 */
    354 	(void) strncpy(device, ap_path, MAXPATHLEN);
    355 	p = strrchr(device, ':');
    356 	dm_assert(p != NULL);
    357 	n = sscanf(p, ":%d", target);
    358 	dm_assert(n == 1);
    359 	*p = '\0';
    360 }
    361 
    362 static void
    363 disk_split_ap_path_scsi(const char *ap_path, char *device, int *target)
    364 {
    365 	char *p;
    366 	int n;
    367 
    368 	/*
    369 	 *  /devices/rootnode/.../device:scsi::dsk/cXtXdX
    370 	 */
    371 
    372 	(void) strncpy(device, ap_path, MAXPATHLEN);
    373 	p = strrchr(device, ':');
    374 	dm_assert(p != NULL);
    375 
    376 	n = sscanf(p, ":dsk/c%*dt%dd%*d", target);
    377 	dm_assert(n == 1);
    378 
    379 	*strchr(device, ':') = '\0';
    380 }
    381 
    382 static void
    383 disk_split_ap_path(const char *ap_path, char *device, int *target)
    384 {
    385 	/*
    386 	 * The AP path comes in two forms; for SATA devices,
    387 	 * is is of the form:
    388 	 *   /devices/rootnode/.../device:portnum
    389 	 * and for SCSI devices, it is of the form:
    390 	 *  /devices/rootnode/.../device:scsi::dsk/cXtXdX
    391 	 */
    392 
    393 	if (disk_ap_is_scsi(ap_path))
    394 		disk_split_ap_path_scsi(ap_path, device, target);
    395 	else
    396 		disk_split_ap_path_sata(ap_path, device, target);
    397 }
    398 
    399 static void
    400 disk_split_device_path(const char *dev_path, char *device, int *target)
    401 {
    402 	char *t, *p, *e;
    403 
    404 	/*
    405 	 * The disk device path is of the form:
    406 	 * /rootnode/.../device/target@tgtid,tgtlun
    407 	 */
    408 
    409 	(void) strncpy(device, dev_path, MAXPATHLEN);
    410 	e = t = strrchr(device, '/');
    411 	dm_assert(t != NULL);
    412 
    413 	t = strchr(t, '@');
    414 	dm_assert(t != NULL);
    415 	t += 1;
    416 
    417 	if ((p = strchr(t, ',')) != NULL)
    418 		*p = '\0';
    419 
    420 	*target = strtol(t, 0, 16);
    421 	*e = '\0';
    422 }
    423 
    424 /*
    425  * Returns the diskmon that corresponds to the physical disk path
    426  * passed in.
    427  */
    428 static diskmon_t *
    429 disk_match_by_device_path(diskmon_t *disklistp, const char *dev_path)
    430 {
    431 	char dev_device[MAXPATHLEN];
    432 	int dev_target;
    433 	char ap_device[MAXPATHLEN];
    434 	int ap_target;
    435 
    436 	dm_assert(disklistp != NULL);
    437 	dm_assert(dev_path != NULL);
    438 
    439 	if (strncmp(dev_path, DEVICES_PREFIX, 8) == 0)
    440 		dev_path += 8;
    441 
    442 	/* pare dev_path into device and target components */
    443 	disk_split_device_path(dev_path, (char *)&dev_device, &dev_target);
    444 
    445 	/*
    446 	 * The AP path specified in the configuration properties is
    447 	 * the path to an attachment point minor node whose port number is
    448 	 * equal to the target number on the disk "major" node sent by the
    449 	 * sysevent.  To match them, we need to extract the target id and
    450 	 * construct an AP string to compare to the AP path in the diskmon.
    451 	 */
    452 	while (disklistp != NULL) {
    453 		char *app = (char *)dm_prop_lookup(disklistp->app_props,
    454 		    DISK_AP_PROP_APID);
    455 		dm_assert(app != NULL);
    456 
    457 		/* Not necessary to adjust the APID here */
    458 		if (strncmp(app, DEVICES_PREFIX, 8) == 0)
    459 			app += 8;
    460 
    461 		disk_split_ap_path(app, (char *)&ap_device, &ap_target);
    462 
    463 		if ((strcmp(dev_device, ap_device) == 0) &&
    464 		    (dev_target == ap_target))
    465 			return (disklistp);
    466 
    467 		disklistp = disklistp->next;
    468 	}
    469 	return (NULL);
    470 }
    471 
    472 static diskmon_t *
    473 disk_match_by_ap_id(diskmon_t *disklistp, const char *ap_id)
    474 {
    475 	const char *disk_ap_id;
    476 	dm_assert(disklistp != NULL);
    477 	dm_assert(ap_id != NULL);
    478 
    479 	/* Match only the device-tree portion of the name */
    480 	if (strncmp(ap_id, DEVICES_PREFIX, 8 /* strlen("/devices") */) == 0)
    481 		ap_id += 8;
    482 
    483 	while (disklistp != NULL) {
    484 		disk_ap_id = dm_prop_lookup(disklistp->app_props,
    485 		    DISK_AP_PROP_APID);
    486 
    487 		dm_assert(disk_ap_id != NULL);
    488 
    489 		if (strcmp(disk_ap_id, ap_id) == 0)
    490 			return (disklistp);
    491 
    492 		disklistp = disklistp->next;
    493 	}
    494 	return (NULL);
    495 }
    496 
    497 static diskmon_t *
    498 disk_match_by_target_id(diskmon_t *disklistp, const char *target_path)
    499 {
    500 	const char *disk_ap_id;
    501 
    502 	char match_device[MAXPATHLEN];
    503 	int match_target;
    504 
    505 	char ap_device[MAXPATHLEN];
    506 	int ap_target;
    507 
    508 
    509 	/* Match only the device-tree portion of the name */
    510 	if (strncmp(target_path, DEVICES_PREFIX, 8) == 0)
    511 		target_path += 8;
    512 	disk_split_ap_path(target_path, (char *)&match_device, &match_target);
    513 
    514 	while (disklistp != NULL) {
    515 
    516 		disk_ap_id = dm_prop_lookup(disklistp->app_props,
    517 		    DISK_AP_PROP_APID);
    518 		dm_assert(disk_ap_id != NULL);
    519 
    520 		disk_split_ap_path(disk_ap_id, (char *)&ap_device, &ap_target);
    521 		if ((match_target == ap_target) &&
    522 		    (strcmp(match_device, ap_device) == 0))
    523 			return (disklistp);
    524 
    525 		disklistp = disklistp->next;
    526 	}
    527 	return (NULL);
    528 }
    529 
    530 static diskmon_t *
    531 match_sysevent_to_disk(diskmon_t *disklistp, sysevent_t *evp)
    532 {
    533 	diskmon_t *dmp = NULL;
    534 	sysevent_value_t se_val;
    535 	char *class_name = sysevent_get_class_name(evp);
    536 	char *subclass = sysevent_get_subclass_name(evp);
    537 
    538 	se_val.value.sv_string = NULL;
    539 
    540 	if (strcmp(class_name, EC_DEVFS) == 0) {
    541 		/* EC_DEVFS-class events have a `DEVFS_PATHNAME' property */
    542 		if (sysevent_lookup_attr(evp, DEVFS_PATHNAME,
    543 		    SE_DATA_TYPE_STRING, &se_val) == 0 &&
    544 		    se_val.value.sv_string != NULL) {
    545 
    546 			dmp = disk_match_by_device_path(disklistp,
    547 			    se_val.value.sv_string);
    548 
    549 		}
    550 
    551 	} else if (strcmp(class_name, EC_DR) == 0 &&
    552 	    strcmp(subclass, ESC_DR_AP_STATE_CHANGE) == 0) {
    553 
    554 		/* EC_DR-class events have a `DR_AP_ID' property */
    555 		if (sysevent_lookup_attr(evp, DR_AP_ID, SE_DATA_TYPE_STRING,
    556 		    &se_val) == 0 && se_val.value.sv_string != NULL) {
    557 
    558 			dmp = disk_match_by_ap_id(disklistp,
    559 			    se_val.value.sv_string);
    560 		}
    561 	} else if (strcmp(class_name, EC_DR) == 0 &&
    562 	    strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) == 0) {
    563 		/* get DR_TARGET_ID */
    564 		if (sysevent_lookup_attr(evp, DR_TARGET_ID,
    565 		    SE_DATA_TYPE_STRING, &se_val) == 0 &&
    566 		    se_val.value.sv_string != NULL) {
    567 			dmp = disk_match_by_target_id(disklistp,
    568 			    se_val.value.sv_string);
    569 		}
    570 	}
    571 
    572 	if (se_val.value.sv_string)
    573 		log_msg(MM_HPMGR, "match_sysevent_to_disk: device/ap: %s\n",
    574 		    se_val.value.sv_string);
    575 
    576 	return (dmp);
    577 }
    578 
    579 
    580 /*
    581  * The disk hotplug monitor (DHPM) listens for disk hotplug events and calls the
    582  * state-change functionality when a disk's state changes.  The DHPM listens for
    583  * hotplug events via sysevent subscriptions to the following sysevent
    584  * classes/subclasses: { EC_DEVFS/ESC_DEVFS_BRANCH_ADD,
    585  * EC_DEVFS/ESC_DEVFS_BRANCH_REMOVE, EC_DEVFS/ESC_DEVFS_DEVI_ADD,
    586  * EC_DEVFS/ESC_DEVFS_DEVI_REMOVE, EC_DR/ESC_DR_AP_STATE_CHANGE }.  Once the
    587  * event is received, the device path sent as part of the event is matched
    588  * to one of the disks described by the configuration data structures.
    589  */
    590 static void
    591 dm_process_sysevent(sysevent_t *dupev)
    592 {
    593 	char		*class_name;
    594 	char		*pub;
    595 	char		*subclass = sysevent_get_subclass_name(dupev);
    596 	diskmon_t	*diskp;
    597 
    598 	class_name = sysevent_get_class_name(dupev);
    599 	log_msg(MM_HPMGR, "****EVENT: %s %s (by %s)\n", class_name,
    600 	    subclass,
    601 	    ((pub = sysevent_get_pub_name(dupev)) != NULL) ? pub : "UNKNOWN");
    602 
    603 	if (pub)
    604 		free(pub);
    605 
    606 	if (strcmp(class_name, EC_PLATFORM) == 0 &&
    607 	    strcmp(subclass, ESC_PLATFORM_SP_RESET) == 0) {
    608 		if (dm_platform_resync() != 0)
    609 			log_warn("failed to resync SP platform\n");
    610 		sysevent_free(dupev);
    611 		return;
    612 	}
    613 
    614 	/*
    615 	 * We will handle this event if the event's target matches one of the
    616 	 * disks we're monitoring
    617 	 */
    618 	if ((diskp = match_sysevent_to_disk(config_data->disk_list, dupev))
    619 	    != NULL) {
    620 
    621 		dm_state_change(diskp, disk_sysev_to_state(diskp, dupev));
    622 	}
    623 
    624 	sysevent_free(dupev);
    625 }
    626 
    627 static void
    628 dm_fmd_sysevent_thread(void *queuep)
    629 {
    630 	qu_t			*qp = (qu_t *)queuep;
    631 	sysevent_event_t	*sevevp;
    632 
    633 	/* Signal the thread spawner that we're running */
    634 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
    635 	if (g_sysev_thread_state != TS_EXIT_REQUESTED)
    636 		g_sysev_thread_state = TS_RUNNING;
    637 	(void) pthread_cond_broadcast(&g_event_handler_cond);
    638 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
    639 
    640 	while (g_sysev_thread_state != TS_EXIT_REQUESTED) {
    641 		if ((sevevp = (sysevent_event_t *)queue_remove(qp)) == NULL)
    642 			continue;
    643 
    644 		dm_process_sysevent(sevevp->evp);
    645 
    646 		free_sysevent_event(sevevp);
    647 	}
    648 
    649 	/* Signal the thread spawner that we've exited */
    650 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
    651 	g_sysev_thread_state = TS_EXITED;
    652 	(void) pthread_cond_broadcast(&g_event_handler_cond);
    653 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
    654 
    655 	log_msg(MM_HPMGR, "FMD sysevent handler thread exiting...");
    656 }
    657 
    658 static sysevent_event_t *
    659 new_sysevent_event(sysevent_t *ev)
    660 {
    661 	/*
    662 	 * Cannot use dmalloc for this because the thread isn't a FMD-created
    663 	 * thread!
    664 	 */
    665 	sysevent_event_t *sevevp = malloc(sizeof (sysevent_event_t));
    666 	sevevp->evp = ev;
    667 	return (sevevp);
    668 }
    669 
    670 static void
    671 free_sysevent_event(void *p)
    672 {
    673 	/* the sysevent_event was allocated with malloc(): */
    674 	free(p);
    675 }
    676 
    677 static void
    678 event_handler(sysevent_t *ev)
    679 {
    680 	/* The duplicated sysevent will be freed in the child thread */
    681 	sysevent_t	*dupev = sysevent_dup(ev);
    682 
    683 	/*
    684 	 * Add this sysevent to the work queue of our FMA thread so we can
    685 	 * handle the sysevent and use the FMA API (e.g. for memory
    686 	 * allocation, etc.) in the sysevent handler.
    687 	 */
    688 	queue_add(g_sysev_queue, new_sysevent_event(dupev));
    689 }
    690 
    691 static void
    692 fini_sysevents(void)
    693 {
    694 	sysevent_unsubscribe_event(sysevent_handle, EC_ALL);
    695 }
    696 
    697 static int
    698 init_sysevents(void)
    699 {
    700 	int rv = 0;
    701 	const char *devfs_subclasses[] = {
    702 		ESC_DEVFS_DEVI_ADD,
    703 		ESC_DEVFS_DEVI_REMOVE
    704 	};
    705 	const char *dr_subclasses[] = {
    706 		ESC_DR_AP_STATE_CHANGE,
    707 		ESC_DR_TARGET_STATE_CHANGE
    708 	};
    709 	const char *platform_subclasses[] = {
    710 		ESC_PLATFORM_SP_RESET
    711 	};
    712 
    713 	if ((sysevent_handle = sysevent_bind_handle(event_handler)) == NULL) {
    714 		rv = errno;
    715 		log_err("Could not initialize the hotplug manager ("
    716 		    "sysevent_bind_handle failure");
    717 	}
    718 
    719 	if (sysevent_subscribe_event(sysevent_handle, EC_DEVFS,
    720 	    devfs_subclasses,
    721 	    sizeof (devfs_subclasses)/sizeof (devfs_subclasses[0])) != 0) {
    722 
    723 		log_err("Could not initialize the hotplug manager "
    724 		    "sysevent_subscribe_event(event class = EC_DEVFS) "
    725 		    "failure");
    726 
    727 		rv = -1;
    728 
    729 	} else if (sysevent_subscribe_event(sysevent_handle, EC_DR,
    730 	    dr_subclasses,
    731 	    sizeof (dr_subclasses)/sizeof (dr_subclasses[0])) != 0) {
    732 
    733 		log_err("Could not initialize the hotplug manager "
    734 		    "sysevent_subscribe_event(event class = EC_DR) "
    735 		    "failure");
    736 
    737 		/* Unsubscribe from all sysevents in the event of a failure */
    738 		fini_sysevents();
    739 
    740 		rv = -1;
    741 	} else if (sysevent_subscribe_event(sysevent_handle, EC_PLATFORM,
    742 	    platform_subclasses,
    743 	    sizeof (platform_subclasses)/sizeof (platform_subclasses[0]))
    744 	    != 0) {
    745 
    746 		log_err("Could not initialize the hotplug manager "
    747 		    "sysevent_subscribe_event(event class = EC_PLATFORM) "
    748 		    "failure");
    749 
    750 		/* Unsubscribe from all sysevents in the event of a failure */
    751 		fini_sysevents();
    752 
    753 		rv = -1;
    754 	}
    755 
    756 
    757 	return (rv);
    758 }
    759 
    760 /*ARGSUSED*/
    761 static void
    762 stdfree(void *p, size_t sz)
    763 {
    764 	free(p);
    765 }
    766 
    767 /*
    768  * Assumptions: Each disk's current state was determined and stored in
    769  * its diskmon_t.
    770  */
    771 hotplug_mgr_init_err_t
    772 init_hotplug_manager()
    773 {
    774 	/* Create the queue to which we'll add sysevents */
    775 	g_sysev_queue = new_queue(B_TRUE, malloc, stdfree, free_sysevent_event);
    776 
    777 	/*
    778 	 * Grab the event handler lock before spawning the thread so we can
    779 	 * wait for the thread to transition to the running state.
    780 	 */
    781 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
    782 
    783 	/* Create the sysevent handling thread */
    784 	g_sysev_tid = fmd_thr_create(g_fm_hdl, dm_fmd_sysevent_thread,
    785 	    g_sysev_queue);
    786 
    787 	/* Wait for the thread's acknowledgement */
    788 	while (g_sysev_thread_state != TS_RUNNING)
    789 		(void) pthread_cond_wait(&g_event_handler_cond,
    790 		    &g_event_handler_lock);
    791 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
    792 
    793 	if (init_sysevents() != 0) {
    794 		log_warn_e("Error initializing sysevents");
    795 		return (HPM_ERR_SYSEVENT_INIT);
    796 	}
    797 
    798 	return (0);
    799 }
    800 
    801 void
    802 cleanup_hotplug_manager()
    803 {
    804 	/* Unsubscribe from the sysevents */
    805 	fini_sysevents();
    806 
    807 	/*
    808 	 * Wait for the thread to exit before we can destroy
    809 	 * the event queue.
    810 	 */
    811 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
    812 	g_sysev_thread_state = TS_EXIT_REQUESTED;
    813 	queue_add(g_sysev_queue, NULL);
    814 	while (g_sysev_thread_state != TS_EXITED)
    815 		(void) pthread_cond_wait(&g_event_handler_cond,
    816 		    &g_event_handler_lock);
    817 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
    818 	(void) pthread_join(g_sysev_tid, NULL);
    819 	fmd_thr_destroy(g_fm_hdl, g_sysev_tid);
    820 
    821 	/* Finally, destroy the event queue and reset the thread state */
    822 	queue_free(&g_sysev_queue);
    823 	g_sysev_thread_state = TS_NOT_RUNNING;
    824 }
    825