Home | History | Annotate | Download | only in disk-monitor
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <string.h>
     30 #include <inttypes.h>
     31 #include <atomic.h>
     32 #include <fm/fmd_api.h>
     33 #include <sys/fm/protocol.h>
     34 
     35 #include "disk_monitor.h"
     36 #include "schg_mgr.h"
     37 #include "hotplug_mgr.h"
     38 #include "topo_gather.h"
     39 #include "dm_platform.h"
     40 
     41 /* State-change event processing thread data */
     42 static pthread_t	g_schg_tid;
     43 static thread_state_t	g_schgt_state = TS_NOT_RUNNING;
     44 static pthread_mutex_t	g_schgt_state_mutex = PTHREAD_MUTEX_INITIALIZER;
     45 static pthread_cond_t	g_schgt_state_cvar = PTHREAD_COND_INITIALIZER;
     46 static pthread_mutex_t	g_schgt_add_mutex = PTHREAD_MUTEX_INITIALIZER;
     47 static qu_t		*g_schg_queue = NULL;
     48 
     49 static void dm_state_change_nolock(diskmon_t *diskp, hotplug_state_t newstate);
     50 
     51 /*
     52  * Each disk state change is described by an instance of the following
     53  * structure (which includes the disk object and the new state)
     54  */
     55 typedef struct disk_statechg {
     56 	diskmon_t	*diskp;
     57 	hotplug_state_t	newstate;
     58 } disk_statechg_t;
     59 
     60 static disk_statechg_t *
     61 new_statechange(diskmon_t *diskp, hotplug_state_t state)
     62 {
     63 	disk_statechg_t *dscp =
     64 	    (disk_statechg_t *)dmalloc(sizeof (disk_statechg_t));
     65 
     66 	/*
     67 	 * The states are additive -- we don't need to preserve
     68 	 * the current faulted state in the newstate:
     69 	 */
     70 	dscp->diskp = diskp;
     71 	dscp->newstate = state;
     72 
     73 	return (dscp);
     74 }
     75 
     76 static void
     77 free_statechange(void *dscp)
     78 {
     79 	dfree(dscp, sizeof (disk_statechg_t));
     80 }
     81 
     82 static void
     83 add_to_statechange_queue(diskmon_t *diskp, hotplug_state_t newstate)
     84 {
     85 	queue_add(g_schg_queue, new_statechange(diskp, newstate));
     86 }
     87 
     88 static const char *
     89 lookup_action_string(indicator_t *ind_listp, ind_state_t state, char *name)
     90 {
     91 	const char *str = NULL;
     92 
     93 	while (ind_listp != NULL) {
     94 
     95 		if (state == ind_listp->ind_state &&
     96 		    strcasecmp(ind_listp->ind_name, name) == 0) {
     97 
     98 			str = ind_listp->ind_instr_spec;
     99 			break;
    100 		}
    101 
    102 		ind_listp = ind_listp->next;
    103 	}
    104 
    105 	return (str);
    106 }
    107 
    108 void
    109 dm_fault_indicator_set(diskmon_t *diskp, ind_state_t istate)
    110 {
    111 	const char *astring;
    112 
    113 	dm_assert(pthread_mutex_lock(&diskp->fault_indicator_mutex) == 0);
    114 
    115 	/*
    116 	 * No need to execute redundant indicator actions
    117 	 */
    118 	if (istate == INDICATOR_UNKNOWN ||
    119 	    diskp->fault_indicator_state == istate) {
    120 		dm_assert(pthread_mutex_unlock(&diskp->fault_indicator_mutex)
    121 		    == 0);
    122 		return;
    123 	}
    124 
    125 	astring = lookup_action_string(diskp->ind_list, istate,
    126 	    INDICATOR_FAULT_IDENTIFIER);
    127 
    128 	if (astring != NULL) {
    129 		log_msg(MM_SCHGMGR, "Executing action `%s'\n", astring);
    130 
    131 		if (dm_platform_indicator_execute(astring) != 0) {
    132 			log_warn("[Disk in %s] Action `%s' did not complete "
    133 			    "successfully.\n",
    134 			    diskp->location,
    135 			    astring);
    136 		} else  {
    137 
    138 			diskp->fault_indicator_state = istate;
    139 
    140 			log_msg(MM_SCHGMGR, "Action `%s' executed "
    141 			    "successfully\n", astring);
    142 		}
    143 	}
    144 
    145 	dm_assert(pthread_mutex_unlock(&diskp->fault_indicator_mutex) == 0);
    146 }
    147 
    148 static void
    149 schg_execute_state_change_action(diskmon_t *diskp, hotplug_state_t oldstate,
    150     hotplug_state_t newstate)
    151 {
    152 	indrule_t *rulelist;
    153 	ind_action_t *actions;
    154 	const char *astring;
    155 
    156 	log_msg(MM_SCHGMGR, "[Disk in %s] State change action: %s -> %s\n",
    157 	    diskp->location,
    158 	    hotplug_state_string(oldstate),
    159 	    hotplug_state_string(newstate));
    160 
    161 	/*
    162 	 * Find the list of actions that correspond to this state change.
    163 	 * If the old state is UNKNOWN, then we'll match to first action
    164 	 * whose transition state is the new state.
    165 	 */
    166 	rulelist = diskp->indrule_list;
    167 
    168 	while (rulelist != NULL) {
    169 
    170 		if ((oldstate == HPS_UNKNOWN ||
    171 		    rulelist->strans.begin == oldstate) &&
    172 		    rulelist->strans.end == newstate)
    173 			break;
    174 
    175 		rulelist = rulelist->next;
    176 	}
    177 
    178 	if (rulelist != NULL) {
    179 		/* Now we have a set of actions to perform: */
    180 		actions = rulelist->action_list;
    181 
    182 		while (actions != NULL) {
    183 
    184 			astring = lookup_action_string(diskp->ind_list,
    185 			    actions->ind_state, actions->ind_name);
    186 
    187 			dm_assert(astring != NULL);
    188 
    189 			log_msg(MM_SCHGMGR, "Executing action `%s'\n", astring);
    190 
    191 			if (dm_platform_indicator_execute(astring) != 0) {
    192 				log_warn("[Disk in %s][State transition from "
    193 				    "%s to %s] Action `%s' did not complete "
    194 				    "successfully.\n",
    195 				    diskp->location,
    196 				    hotplug_state_string(oldstate),
    197 				    hotplug_state_string(newstate),
    198 				    astring);
    199 
    200 			} else
    201 				log_msg(MM_SCHGMGR,
    202 				    "Action `%s' executed successfully\n",
    203 				    astring);
    204 
    205 			actions = actions->next;
    206 		}
    207 	}
    208 
    209 }
    210 
    211 static void
    212 schg_send_fru_update(diskmon_t *diskp, dm_fru_t *frup)
    213 {
    214 	const char *action = dm_prop_lookup(diskp->props, DISK_PROP_FRUACTION);
    215 
    216 	if (action == NULL) {
    217 		log_msg(MM_SCHGMGR|MM_NOTE, "No FRU update action for disk "
    218 		    "in %s\n", diskp->location);
    219 		return;
    220 	}
    221 
    222 	if (dm_platform_update_fru(action, frup) != 0) {
    223 		log_warn("Error updating FRU information for disk in %s.\n",
    224 		    diskp->location);
    225 	}
    226 }
    227 
    228 static void
    229 schg_update_fru_info(diskmon_t *diskp)
    230 {
    231 	if (diskp->initial_configuration ||
    232 	    update_configuration_from_topo(g_fm_hdl, diskp) == TOPO_SUCCESS) {
    233 		diskp->initial_configuration = B_FALSE;
    234 		dm_assert(pthread_mutex_lock(&diskp->fru_mutex) == 0);
    235 		if (diskp->frup != NULL)
    236 			schg_send_fru_update(diskp, diskp->frup);
    237 		else
    238 			log_warn("frup unexpectedly went away: not updating "
    239 			    "FRU information for disk %s!\n", diskp->location);
    240 		dm_assert(pthread_mutex_unlock(&diskp->fru_mutex) == 0);
    241 	} else {
    242 		log_warn_e("Error retrieving FRU information "
    243 		    "for disk in %s", diskp->location);
    244 	}
    245 }
    246 
    247 void
    248 block_state_change_events(void)
    249 {
    250 	dm_assert(pthread_mutex_lock(&g_schgt_add_mutex) == 0);
    251 }
    252 
    253 void
    254 unblock_state_change_events(void)
    255 {
    256 	dm_assert(pthread_mutex_unlock(&g_schgt_add_mutex) == 0);
    257 }
    258 
    259 static void
    260 disk_state_change_first_time(diskmon_t *diskp)
    261 {
    262 	hotplug_state_t firststate;
    263 
    264 	/*
    265 	 * Grab the current state of the attachment point to initialize the
    266 	 * initial disk state.  Create a disk state change with this new
    267 	 * state so it will be processed in the loop below.  If we can't get
    268 	 * the initial state for some reason, then we'll just end up doing it
    269 	 * later when we get a state change from the hotplug monitor or the
    270 	 * fault monitor.
    271 	 */
    272 	firststate = disk_ap_state_to_hotplug_state(diskp);
    273 	if (firststate != HPS_UNKNOWN)
    274 		dm_state_change_nolock(diskp, firststate);
    275 
    276 	/*
    277 	 * The fault indicators will be updated when faults are replayed
    278 	 * based on the state of the disk as faulty in the fmd resource cache.
    279 	 * A FAULTED state change will come from the _recv function when the
    280 	 * fault component event is replayed.
    281 	 */
    282 }
    283 
    284 static void
    285 disk_state_change_thread(void *vdisklistp)
    286 {
    287 	diskmon_t	*disklistp = (diskmon_t *)vdisklistp;
    288 	diskmon_t	*diskp;
    289 	disk_statechg_t	*dscp;
    290 	hotplug_state_t	nextstate;
    291 	const char	*pth;
    292 
    293 	/*
    294 	 * Perform startup activities to initialize the state of the
    295 	 * indicators for each disk.
    296 	 */
    297 	diskp = disklistp;
    298 	while (diskp != NULL) {
    299 		disk_state_change_first_time(diskp);
    300 		diskp = diskp->next;
    301 	}
    302 
    303 	unblock_state_change_events();
    304 
    305 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
    306 	if (g_schgt_state != TS_EXIT_REQUESTED) {
    307 		g_schgt_state = TS_RUNNING;
    308 		dm_assert(pthread_cond_broadcast(&g_schgt_state_cvar) == 0);
    309 	}
    310 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
    311 
    312 	while (g_schgt_state != TS_EXIT_REQUESTED) {
    313 
    314 		if ((dscp = (disk_statechg_t *)queue_remove(g_schg_queue))
    315 		    == NULL) {
    316 			dm_assert(g_schgt_state == TS_EXIT_REQUESTED);
    317 			continue;
    318 		}
    319 
    320 		diskp = dscp->diskp;
    321 
    322 		/*
    323 		 * If the new state is the faulted state, add that state to
    324 		 * the disk's current state.
    325 		 */
    326 		if (dscp->newstate == HPS_FAULTED) {
    327 
    328 			/*
    329 			 * If the disk wasn't previously in the faulted state,
    330 			 * execute the generic fault action.  Even if we're
    331 			 * in the faulted state, accept additional faults.
    332 			 */
    333 			nextstate = DISK_STATE(diskp->state) | HPS_FAULTED;
    334 
    335 		} else if (dscp->newstate == HPS_REPAIRED) {
    336 			nextstate = DISK_STATE(diskp->state);
    337 
    338 		} else if (dscp->newstate == HPS_ABSENT) {
    339 			/*
    340 			 * If the new state is ABSENT, forget any faults
    341 			 */
    342 
    343 			nextstate = HPS_ABSENT;
    344 		} else
    345 			nextstate = dscp->newstate | DISK_FAULTED(diskp->state);
    346 
    347 		/*
    348 		 * When a new disk is inserted and reaches the CONFIGURED state,
    349 		 * the following actions must be done in the following order:
    350 		 *
    351 		 * (1) Execute the configuration-specified action on the
    352 		 * state change.
    353 		 * (2) Retreive the FRU information from the disk and execute
    354 		 * the FRU-update action specified,
    355 		 * (3) Initialize the fault monitor state associated with
    356 		 * the new drive.
    357 		 *
    358 		 * Once the disk is no longer "new" (a disk is "new" when it
    359 		 * has not yet reached the CONFIGURED state), subsequent
    360 		 * transitions away and back to CONFIGURED (as long as the
    361 		 * disk is not physically removed) will result in the
    362 		 * execution of the predefined action ONLY.
    363 		 *
    364 		 */
    365 
    366 		if (dscp->newstate != HPS_FAULTED &&
    367 		    DISK_STATE(nextstate) != HPS_UNKNOWN &&
    368 		    dscp->newstate != HPS_REPAIRED) {
    369 
    370 			schg_execute_state_change_action(diskp,
    371 			    DISK_STATE(diskp->state), DISK_STATE(nextstate));
    372 		}
    373 
    374 		if (!diskp->configured_yet &&
    375 		    DISK_STATE(nextstate) == HPS_CONFIGURED) {
    376 
    377 			schg_update_fru_info(diskp);
    378 
    379 			/*
    380 			 * If this state transition is lagging the true
    381 			 * state of the system (e.g. if the true state of
    382 			 * the disk is UNCONFIGURED, there's another
    383 			 * state change somewhere later in the queue), then
    384 			 * it's possible for the disk path property to not
    385 			 * exist.
    386 			 */
    387 			if (dm_prop_lookup(diskp->props,
    388 			    DISK_PROP_DEVPATH) == NULL) {
    389 
    390 				log_msg(MM_SCHGMGR,
    391 				    "Processed stale state change "
    392 				    "for disk %s\n", diskp->location);
    393 
    394 			} else {
    395 				diskp->configured_yet = B_TRUE;
    396 			}
    397 
    398 		}
    399 
    400 		dm_assert(pthread_mutex_lock(&diskp->manager_mutex) == 0);
    401 
    402 		/*
    403 		 * Make the new state visible to all observers
    404 		 */
    405 		diskp->state = nextstate;
    406 
    407 		/*
    408 		 * Now, update the diskmon if the disk is now absent -- it's
    409 		 * essential to do this after the state is set (above) so that
    410 		 * state observers in other threads don't try to access the
    411 		 * data structures that we're freeing here.
    412 		 */
    413 
    414 		if (diskp->configured_yet &&
    415 		    DISK_STATE(nextstate) == HPS_ABSENT) {
    416 			/*
    417 			 * When the disk is removed, the fault monitor state is
    418 			 * useless, so discard it.
    419 			 */
    420 			dm_assert(DISK_STATE(nextstate) != HPS_CONFIGURED);
    421 
    422 			diskp->configured_yet = B_FALSE;
    423 
    424 		}
    425 		dm_assert(pthread_mutex_unlock(&diskp->manager_mutex) == 0);
    426 
    427 		pth = dm_prop_lookup(diskp->props, DISK_PROP_DEVPATH);
    428 
    429 		log_msg(MM_SCHGMGR,
    430 		    "[State change #%d][%s]: Disk path = %s\n",
    431 		    diskp->state_change_count,
    432 		    diskp->location, pth == NULL ? "Unknown" : pth);
    433 
    434 		log_msg(MM_SCHGMGR,
    435 		    "[State change #%d][%s]: New state = %s%s\n",
    436 		    diskp->state_change_count, diskp->location,
    437 		    hotplug_state_string(diskp->state),
    438 		    DISK_FAULTED(diskp->state) ? "+FAULTED" : "");
    439 
    440 		atomic_inc_uint(&diskp->state_change_count);
    441 
    442 		/* The caller is responsible for freeing the state change: */
    443 		free_statechange(dscp);
    444 	}
    445 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
    446 	g_schgt_state = TS_EXITED;
    447 	dm_assert(pthread_cond_broadcast(&g_schgt_state_cvar) == 0);
    448 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
    449 
    450 	log_msg(MM_SCHGMGR, "State change thread exiting...\n");
    451 }
    452 
    453 static void
    454 dm_state_change_nolock(diskmon_t *diskp, hotplug_state_t newstate)
    455 {
    456 	/* Enqueue a new state change for the state-change thread */
    457 	add_to_statechange_queue(diskp, newstate);
    458 }
    459 
    460 void
    461 dm_state_change(diskmon_t *diskp, hotplug_state_t newstate)
    462 {
    463 	dm_assert(pthread_mutex_lock(&g_schgt_add_mutex) == 0);
    464 	dm_state_change_nolock(diskp, newstate);
    465 	dm_assert(pthread_mutex_unlock(&g_schgt_add_mutex) == 0);
    466 }
    467 
    468 int
    469 init_state_change_manager(cfgdata_t *cfgdatap)
    470 {
    471 	/* new_queue() is guaranteed to succeed */
    472 	g_schg_queue = new_queue(B_TRUE, dmalloc, dfree, free_statechange);
    473 
    474 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
    475 	g_schg_tid = fmd_thr_create(g_fm_hdl, disk_state_change_thread,
    476 	    cfgdatap->disk_list);
    477 
    478 	/*
    479 	 * Now, wait for the thread to enter the TS_RUNNING state.  This
    480 	 * is important because we want the state-change thread to pull the
    481 	 * initial state of the disks on startup (without the wait, we could
    482 	 * have the hotplug event handler race and deliver a state change
    483 	 * before the state-change thread initialized the initial disk state).
    484 	 */
    485 
    486 	while (g_schgt_state != TS_RUNNING) {
    487 		(void) pthread_cond_wait(&g_schgt_state_cvar,
    488 		    &g_schgt_state_mutex);
    489 	}
    490 
    491 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
    492 
    493 	return (0);
    494 }
    495 
    496 /*ARGSUSED*/
    497 void
    498 cleanup_state_change_manager(cfgdata_t *cfgdatap)
    499 {
    500 	if (g_schgt_state != TS_RUNNING)
    501 		return;
    502 
    503 	g_schgt_state = TS_EXIT_REQUESTED;
    504 	queue_add(g_schg_queue, NULL);
    505 	dm_assert(pthread_mutex_lock(&g_schgt_state_mutex) == 0);
    506 	while (g_schgt_state != TS_EXITED)
    507 		dm_assert(pthread_cond_wait(&g_schgt_state_cvar,
    508 		    &g_schgt_state_mutex) == 0);
    509 	dm_assert(pthread_mutex_unlock(&g_schgt_state_mutex) == 0);
    510 	(void) pthread_join(g_schg_tid, NULL);
    511 	fmd_thr_destroy(g_fm_hdl, g_schg_tid);
    512 	queue_free(&g_schg_queue);
    513 	g_schgt_state = TS_NOT_RUNNING;
    514 }
    515