Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * FMD Dynamic Reconfiguration (DR) Event Handling
     28  *
     29  * Fault manager scheme plug-ins must track characteristics of individual
     30  * pieces of hardware.  As these components can be added or removed by a DR
     31  * operation, we need to provide a means by which plug-ins can determine when
     32  * they need to re-examine the current configuration.  We provide a simple
     33  * mechanism whereby this task can be implemented using lazy evaluation: a
     34  * simple 64-bit generation counter is maintained and incremented on *any* DR.
     35  * Schemes can store the generation number in scheme-specific data structures,
     36  * and then revalidate their contents if the current generation number has
     37  * changed since the resource information was cached.  This method saves time,
     38  * avoids the complexity of direct participation in DR, avoids the need for
     39  * resource-specific processing of DR events, and is relatively easy to port
     40  * to other systems that support dynamic reconfiguration.
     41  *
     42  * The dr generation is only incremented in response to hardware changes.  Since
     43  * ASRUs can be in any scheme, including the device scheme, we must also be
     44  * aware of software configuration changes which may affect the resource cache.
     45  * In addition, we take a snapshot of the topology whenever a reconfiguration
     46  * event occurs and notify any modules of the change.
     47  */
     48 
     49 #include <sys/types.h>
     50 #include <sys/sunddi.h>
     51 #include <sys/sysevent/dr.h>
     52 #include <sys/sysevent/eventdefs.h>
     53 
     54 #include <stdio.h>
     55 #include <string.h>
     56 #include <unistd.h>
     57 #include <libsysevent.h>
     58 
     59 #undef MUTEX_HELD
     60 #undef RW_READ_HELD
     61 #undef RW_WRITE_HELD
     62 
     63 #include <fmd_asru.h>
     64 #include <fmd_error.h>
     65 #include <fmd_event.h>
     66 #include <fmd_fmri.h>
     67 #include <fmd_module.h>
     68 #include <fmd_subr.h>
     69 #include <fmd_topo.h>
     70 #include <fmd.h>
     71 
     72 void
     73 fmd_dr_event(sysevent_t *sep)
     74 {
     75 	uint64_t gen;
     76 	fmd_event_t *e;
     77 	const char *class = sysevent_get_class_name(sep);
     78 	const char *subclass = sysevent_get_subclass_name(sep);
     79 	hrtime_t evtime;
     80 	fmd_topo_t *ftp, *prev;
     81 	boolean_t update_topo = B_FALSE;
     82 
     83 	if (strcmp(class, EC_DR) == 0) {
     84 		if (strcmp(subclass, ESC_DR_AP_STATE_CHANGE) != 0 &&
     85 		    strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) != 0)
     86 			return;
     87 
     88 		/*
     89 		 * The DR generation is only changed in response to DR events.
     90 		 */
     91 		update_topo = B_TRUE;
     92 
     93 		(void) pthread_mutex_lock(&fmd.d_stats_lock);
     94 		gen = fmd.d_stats->ds_dr_gen.fmds_value.ui64++;
     95 		(void) pthread_mutex_unlock(&fmd.d_stats_lock);
     96 
     97 		TRACE((FMD_DBG_XPRT, "dr event %p, gen=%llu",
     98 		    (void *)sep, gen));
     99 	} else if (strcmp(class, EC_DEVFS) == 0) {
    100 		/*
    101 		 * A devfs configuration event can change the topology,
    102 		 * as disk nodes only exist when the device is configured.
    103 		 */
    104 		update_topo = B_TRUE;
    105 	} else if (strcmp(class, EC_PLATFORM) == 0) {
    106 		if (strcmp(subclass, ESC_PLATFORM_SP_RESET) == 0) {
    107 			/*
    108 			 * Since we rely on the SP to enumerate fans,
    109 			 * power-supplies and sensors/leds, it would be prudent
    110 			 * to take a new snapshot if the SP resets.
    111 			 */
    112 			update_topo = B_TRUE;
    113 		}
    114 	} else if (strcmp(class, EC_ZFS) == 0) {
    115 		/*
    116 		 * These events can change the resource cache.
    117 		 */
    118 		if (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) != 0 &&
    119 		    strcmp(subclass, ESC_ZFS_VDEV_REMOVE) != 0 &&
    120 		    strcmp(subclass, ESC_ZFS_POOL_DESTROY) != 0)
    121 			return;
    122 	} else if (strcmp(class, EC_DEV_ADD) == 0 ||
    123 	    strcmp(class, EC_DEV_REMOVE) == 0) {
    124 		if (strcmp(subclass, ESC_DISK) != 0)
    125 			return;
    126 
    127 		update_topo = B_TRUE;
    128 	}
    129 
    130 	/*
    131 	 * Take a topo snapshot and notify modules of the change.  Picking an
    132 	 * accurate time here is difficult.  On one hand, we have the timestamp
    133 	 * of the underlying sysevent, indicating when the reconfiguration event
    134 	 * occurred.  On the other hand, we are taking the topo snapshot
    135 	 * asynchronously, and hence the timestamp of the snapshot is the
    136 	 * current time.  Pretending this topo snapshot was valid at the time
    137 	 * the sysevent was posted seems wrong, so we instead opt for the
    138 	 * current time as an upper bound on the snapshot validity.
    139 	 *
    140 	 * Along these lines, we keep track of the last time we dispatched a
    141 	 * topo snapshot.  If the sysevent occurred before the last topo
    142 	 * snapshot, then don't bother dispatching another topo change event.
    143 	 * We've already indicated (to the best of our ability) the change in
    144 	 * topology.  This prevents endless topo snapshots in response to a
    145 	 * flurry of sysevents.
    146 	 */
    147 	sysevent_get_time(sep, &evtime);
    148 	prev = fmd_topo_hold();
    149 	if (evtime <= prev->ft_time_begin &&
    150 	    fmd.d_clockops == &fmd_timeops_native) {
    151 		fmd_topo_rele(prev);
    152 		return;
    153 	}
    154 	fmd_topo_rele(prev);
    155 
    156 	if (update_topo)
    157 		fmd_topo_update(B_FALSE);
    158 
    159 	ftp = fmd_topo_hold();
    160 	e = fmd_event_create(FMD_EVT_TOPO, ftp->ft_time_end, NULL, ftp);
    161 	fmd_modhash_dispatch(fmd.d_mod_hash, e);
    162 }
    163