Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 /*
     29  * RCM module supporting multiplexed I/O controllers (MPxIO).
     30  */
     31 #include <stdlib.h>
     32 #include <stdarg.h>
     33 #include <unistd.h>
     34 #include <assert.h>
     35 #include <syslog.h>
     36 #include <string.h>
     37 #include <synch.h>
     38 #include <libintl.h>
     39 #include <locale.h>
     40 #include <ctype.h>
     41 #include <errno.h>
     42 #include <libdevinfo.h>
     43 #include <sys/types.h>
     44 #include "rcm_module.h"
     45 
     46 #define	MPXIO_PROP_NAME		"mpxio-component"
     47 #define	MPXIO_PROP_CLIENT	"client"
     48 
     49 #define	CMD_GETINFO		0
     50 #define	CMD_OFFLINE		1
     51 #define	CMD_ONLINE		2
     52 #define	CMD_REMOVE		3
     53 
     54 #define	CACHE_NEW		0
     55 #define	CACHE_REFERENCED	1
     56 #define	CACHE_STALE		2
     57 
     58 #define	MPXIO_MSG_CACHEFAIL	gettext("Internal analysis failure.")
     59 #define	MPXIO_MSG_LASTPATH	gettext("Last path to busy resources.")
     60 #define	MPXIO_MSG_USAGE		gettext("SCSI Multipathing PHCI (%s)")
     61 #define	MPXIO_MSG_USAGEUNKNOWN	gettext("SCSI Multipathing PHCI (<unknown>)")
     62 
     63 typedef struct {
     64 	char *path;
     65 	di_path_state_t state;
     66 } phci_t;
     67 
     68 typedef struct phci_list {
     69 	phci_t phci;
     70 	int referenced;
     71 	struct phci_list *next;
     72 } phci_list_t;
     73 
     74 typedef struct group {
     75 	int offline;
     76 	int nphcis;
     77 	int nclients;
     78 	phci_t *phcis;
     79 	char **clients;
     80 	struct group *next;
     81 } group_t;
     82 
     83 static int mpxio_register(rcm_handle_t *);
     84 static int mpxio_unregister(rcm_handle_t *);
     85 static int mpxio_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, char **,
     86     nvlist_t *, rcm_info_t **);
     87 static int mpxio_suspend(rcm_handle_t *, char *, id_t, timespec_t *, uint_t,
     88     char **, rcm_info_t **);
     89 static int mpxio_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
     90     rcm_info_t **);
     91 static int mpxio_offline(rcm_handle_t *, char *, id_t, uint_t, char **,
     92     rcm_info_t **);
     93 static int mpxio_online(rcm_handle_t *, char *, id_t, uint_t, char **,
     94     rcm_info_t **);
     95 static int mpxio_remove(rcm_handle_t *, char *, id_t, uint_t, char **,
     96     rcm_info_t **);
     97 static int get_nclients(di_node_t, void *);
     98 static int build_groups(di_node_t, void *);
     99 static void refresh_regs(rcm_handle_t *);
    100 static int get_affected_clients(rcm_handle_t *, char *, int, int, char ***);
    101 static int detect_client_change(rcm_handle_t *, int, int, group_t *, char *);
    102 static int merge_clients(int *, char ***, group_t *);
    103 static phci_list_t *lookup_phci(char *);
    104 static int is_client(di_node_t);
    105 static char *get_rsrcname(di_node_t);
    106 static char *s_state(di_path_state_t);
    107 static int compare_phci(const void *, const void *);
    108 static void free_grouplist();
    109 static void free_group(group_t *);
    110 static void free_clients(int, char **);
    111 static void free_phcis(int, phci_t *);
    112 
    113 static struct rcm_mod_ops mpxio_ops =
    114 {
    115 	RCM_MOD_OPS_VERSION,
    116 	mpxio_register,
    117 	mpxio_unregister,
    118 	mpxio_getinfo,
    119 	mpxio_suspend,
    120 	mpxio_resume,
    121 	mpxio_offline,
    122 	mpxio_online,
    123 	mpxio_remove,
    124 	NULL,
    125 	NULL,
    126 	NULL
    127 };
    128 
    129 static group_t *group_list;
    130 static phci_list_t *reg_list;
    131 static mutex_t mpxio_lock;
    132 
    133 extern int errno;
    134 
    135 /*
    136  * Return the mod-ops vector for initialization.
    137  */
    138 struct rcm_mod_ops *
    139 rcm_mod_init()
    140 {
    141 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_init()\n");
    142 
    143 	return (&mpxio_ops);
    144 }
    145 
    146 /*
    147  * Return name and version number for mod_info.
    148  */
    149 const char *
    150 rcm_mod_info()
    151 {
    152 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_info()\n");
    153 
    154 	return (gettext("RCM MPxIO module 1.6"));
    155 }
    156 
    157 /*
    158  * Destroy the cache and mutex lock when being unloaded.
    159  */
    160 int
    161 rcm_mod_fini()
    162 {
    163 	phci_list_t *reg;
    164 	phci_list_t *next;
    165 
    166 	rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_fini()\n");
    167 
    168 	/* Free the cache of MPxIO group information */
    169 	free_grouplist();
    170 
    171 	/* Free the cache of registrants */
    172 	reg = reg_list;
    173 	while (reg) {
    174 		next = reg->next;
    175 		free(reg->phci.path);
    176 		free(reg);
    177 		reg = next;
    178 	}
    179 
    180 	/* Destroy the mutex for locking the caches */
    181 	(void) mutex_destroy(&mpxio_lock);
    182 
    183 	return (RCM_SUCCESS);
    184 }
    185 
    186 /*
    187  * During each register callback: totally rebuild the group list from a new
    188  * libdevinfo snapshot, and then update the registrants.
    189  */
    190 static int
    191 mpxio_register(rcm_handle_t *hdl)
    192 {
    193 	int nclients = 0;
    194 	di_node_t devroot;
    195 
    196 	rcm_log_message(RCM_TRACE1, "MPXIO: register()\n");
    197 
    198 	(void) mutex_lock(&mpxio_lock);
    199 
    200 	/* Destroy the previous group list */
    201 	free_grouplist();
    202 
    203 	/* Get a current libdevinfo snapshot */
    204 	if ((devroot = di_init("/", DINFOCPYALL | DINFOPATH)) == DI_NODE_NIL) {
    205 		rcm_log_message(RCM_ERROR,
    206 		    "MPXIO: libdevinfo initialization failed (%s).\n",
    207 		    strerror(errno));
    208 		(void) mutex_unlock(&mpxio_lock);
    209 		return (RCM_FAILURE);
    210 	}
    211 
    212 	/*
    213 	 * First count the total number of clients.  This'll be a useful
    214 	 * upper bound when allocating client arrays within each group.
    215 	 */
    216 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, get_nclients);
    217 
    218 	rcm_log_message(RCM_TRACE2, gettext("MPXIO: found %d clients.\n"),
    219 	    nclients);
    220 
    221 	/*
    222 	 * Then walk the libdevinfo snapshot, building up the new group list
    223 	 * along the way.  Pass in the total number of clients (from above) to
    224 	 * assist in group construction.
    225 	 */
    226 	(void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, build_groups);
    227 
    228 	/* Now with a new group list constructed, refresh the registrants */
    229 	refresh_regs(hdl);
    230 
    231 	/* Free the libdevinfo snapshot */
    232 	di_fini(devroot);
    233 
    234 	(void) mutex_unlock(&mpxio_lock);
    235 
    236 	return (0);
    237 }
    238 
    239 /*
    240  * Unregister all PHCIs and mark the whole registrants list as stale.
    241  */
    242 static int
    243 mpxio_unregister(rcm_handle_t *hdl)
    244 {
    245 	phci_list_t *reg;
    246 
    247 	rcm_log_message(RCM_TRACE1, "MPXIO: unregister()\n");
    248 
    249 	(void) mutex_lock(&mpxio_lock);
    250 
    251 	for (reg = reg_list; reg != NULL; reg = reg->next) {
    252 		(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
    253 		reg->referenced = CACHE_STALE;
    254 	}
    255 
    256 	(void) mutex_unlock(&mpxio_lock);
    257 
    258 	return (RCM_SUCCESS);
    259 }
    260 
    261 /*
    262  * To return usage information, just lookup the PHCI in the cache and return
    263  * a string identifying that it's a PHCI and describing its cached MPxIO state.
    264  * Recurse with the cached list of disks if dependents are to be included.
    265  */
    266 static int
    267 mpxio_getinfo(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
    268     char **infostr, char **errstr, nvlist_t *props, rcm_info_t **infop)
    269 {
    270 	size_t len;
    271 	int rv = RCM_SUCCESS;
    272 	char *buf = NULL;
    273 	char **clients = NULL;
    274 	phci_list_t *reg;
    275 	char c;
    276 
    277 	rcm_log_message(RCM_TRACE1, "MPXIO: getinfo(%s)\n", rsrc);
    278 
    279 	*infostr = NULL;
    280 	*errstr = NULL;
    281 
    282 	(void) mutex_lock(&mpxio_lock);
    283 
    284 	if ((reg = lookup_phci(rsrc)) == NULL) {
    285 		*errstr = strdup(MPXIO_MSG_CACHEFAIL);
    286 		(void) mutex_unlock(&mpxio_lock);
    287 		return (RCM_FAILURE);
    288 	}
    289 
    290 	len = snprintf(&c, 1, MPXIO_MSG_USAGE, s_state(reg->phci.state));
    291 	buf = calloc(len + 1, sizeof (char));
    292 	if ((buf == NULL) || (snprintf(buf, len + 1, MPXIO_MSG_USAGE,
    293 	    s_state(reg->phci.state)) > len + 1)) {
    294 		*infostr = strdup(MPXIO_MSG_USAGEUNKNOWN);
    295 		*errstr = strdup(gettext("Cannot construct usage string."));
    296 		(void) mutex_unlock(&mpxio_lock);
    297 		if (buf)
    298 			free(buf);
    299 		return (RCM_FAILURE);
    300 	}
    301 	*infostr = buf;
    302 
    303 	if (flags & RCM_INCLUDE_DEPENDENT) {
    304 		rcm_log_message(RCM_TRACE2, "MPXIO: getting clients\n");
    305 		if (get_affected_clients(hdl, rsrc, CMD_GETINFO, flags,
    306 		    &clients) < 0) {
    307 			*errstr = strdup(gettext("Cannot lookup clients."));
    308 			(void) mutex_unlock(&mpxio_lock);
    309 			return (RCM_FAILURE);
    310 		}
    311 		if (clients) {
    312 			rv = rcm_get_info_list(hdl, clients, flags, infop);
    313 			free(clients);
    314 		} else {
    315 			rcm_log_message(RCM_TRACE2, "MPXIO: none found\n");
    316 		}
    317 	}
    318 
    319 	(void) mutex_unlock(&mpxio_lock);
    320 	return (rv);
    321 }
    322 
    323 /*
    324  * Nothing is implemented for suspend operations.
    325  */
    326 static int
    327 mpxio_suspend(rcm_handle_t *hdl, char *rsrc, id_t id, timespec_t *interval,
    328     uint_t flags, char **errstr, rcm_info_t **infop)
    329 {
    330 	rcm_log_message(RCM_TRACE1, "MPXIO: suspend(%s)\n", rsrc);
    331 
    332 	return (RCM_SUCCESS);
    333 }
    334 
    335 /*
    336  * Nothing is implemented for resume operations.
    337  */
    338 static int
    339 mpxio_resume(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
    340     char **errstr, rcm_info_t **infop)
    341 {
    342 	rcm_log_message(RCM_TRACE1, "MPXIO: resume(%s)\n", rsrc);
    343 
    344 	return (RCM_SUCCESS);
    345 }
    346 
    347 /*
    348  * MPxIO has no policy against offlining.  If disks will be affected, then
    349  * base the return value for this request on the results of offlining the
    350  * list of disks.  Otherwise succeed.
    351  */
    352 static int
    353 mpxio_offline(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
    354     char **errstr, rcm_info_t **infop)
    355 {
    356 	char **clients = NULL;
    357 	int rv = RCM_SUCCESS;
    358 
    359 	rcm_log_message(RCM_TRACE1, "MPXIO: offline(%s)\n", rsrc);
    360 
    361 	(void) mutex_lock(&mpxio_lock);
    362 
    363 	if (get_affected_clients(hdl, rsrc, CMD_OFFLINE, flags, &clients) < 0) {
    364 		*errstr = strdup(gettext("Cannot lookup clients."));
    365 		(void) mutex_unlock(&mpxio_lock);
    366 		return (RCM_FAILURE);
    367 	}
    368 
    369 	if (clients) {
    370 		rv = rcm_request_offline_list(hdl, clients, flags, infop);
    371 		if (rv != RCM_SUCCESS)
    372 			*errstr = strdup(MPXIO_MSG_LASTPATH);
    373 		free(clients);
    374 	}
    375 
    376 	(void) mutex_unlock(&mpxio_lock);
    377 
    378 	return (rv);
    379 }
    380 
    381 /*
    382  * If disks are affected, then they are probably offline and we need to
    383  * propagate this online notification to them.
    384  */
    385 static int
    386 mpxio_online(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
    387     char **errstr, rcm_info_t **infop)
    388 {
    389 	char **clients;
    390 	int rv = RCM_SUCCESS;
    391 
    392 	rcm_log_message(RCM_TRACE1, "MPXIO: online(%s)\n", rsrc);
    393 
    394 	(void) mutex_lock(&mpxio_lock);
    395 
    396 	if (get_affected_clients(hdl, rsrc, CMD_ONLINE, flags, &clients) < 0) {
    397 		*errstr = strdup(gettext("Cannot lookup clients."));
    398 		(void) mutex_unlock(&mpxio_lock);
    399 		return (RCM_FAILURE);
    400 	}
    401 
    402 	if (clients) {
    403 		rv = rcm_notify_online_list(hdl, clients, flags, infop);
    404 		free(clients);
    405 	}
    406 
    407 	(void) mutex_unlock(&mpxio_lock);
    408 
    409 	return (rv);
    410 }
    411 
    412 /*
    413  * If clients are affected, then they are probably offline and we need to
    414  * propagate this removal notification to them.  We can also remove the
    415  * cache entry for this PHCI.  If that leaves its group empty, then the
    416  * group will be removed during the next register callback.
    417  */
    418 static int
    419 mpxio_remove(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags,
    420     char **errstr, rcm_info_t **infop)
    421 {
    422 	char **clients;
    423 	int rv = RCM_SUCCESS;
    424 
    425 	rcm_log_message(RCM_TRACE1, "MPXIO: remove(%s)\n", rsrc);
    426 
    427 	(void) mutex_lock(&mpxio_lock);
    428 
    429 	if (get_affected_clients(hdl, rsrc, CMD_REMOVE, flags, &clients) < 0) {
    430 		*errstr = strdup(gettext("Cannot lookup clients."));
    431 		(void) mutex_unlock(&mpxio_lock);
    432 		return (RCM_FAILURE);
    433 	}
    434 
    435 	if (clients) {
    436 		rv = rcm_notify_remove_list(hdl, clients, flags, infop);
    437 		free(clients);
    438 	}
    439 
    440 	(void) mutex_unlock(&mpxio_lock);
    441 
    442 	return (rv);
    443 }
    444 
    445 
    446 /*
    447  * Returns a string representation of a given libdevinfo path state.
    448  */
    449 static char *
    450 s_state(di_path_state_t state)
    451 {
    452 	switch (state) {
    453 	case DI_PATH_STATE_ONLINE:
    454 		return ("online");
    455 	case DI_PATH_STATE_OFFLINE:
    456 		return ("offline");
    457 	case DI_PATH_STATE_STANDBY:
    458 		return ("standby");
    459 	case DI_PATH_STATE_FAULT:
    460 		return ("faulted");
    461 	default:
    462 		return ("<unknown>");
    463 	}
    464 }
    465 
    466 static int
    467 get_affected_clients(rcm_handle_t *hdl, char *rsrc, int cmd, int flags,
    468     char ***clientsp)
    469 {
    470 	int nclients = 0;
    471 	phci_t phci;
    472 	group_t *group;
    473 	char **clients = NULL;
    474 
    475 	/* Build a dummy phci_t for use with bsearch(). */
    476 	phci.path = rsrc;
    477 
    478 	/* Analyze the effects upon each group. */
    479 	for (group = group_list; group != NULL; group = group->next) {
    480 
    481 		/* If the PHCI isn't in the group, then no effects.  Skip. */
    482 		if (bsearch(&phci, group->phcis, group->nphcis, sizeof (phci_t),
    483 		    compare_phci) == NULL)
    484 			continue;
    485 
    486 		/*
    487 		 * Merge in the clients.  All clients are merged in for getinfo
    488 		 * operations.  Otherwise it's contingent upon a state change
    489 		 * being transferred to the clients as a result of changing
    490 		 * the PHCI's state.
    491 		 */
    492 		if ((cmd == CMD_GETINFO) ||
    493 		    detect_client_change(hdl, cmd, flags, group, rsrc)) {
    494 			if (merge_clients(&nclients, &clients, group) < 0) {
    495 				free_clients(nclients, clients);
    496 				return (-1);
    497 			}
    498 		}
    499 	}
    500 
    501 	/* Return the array of affected disks */
    502 	*clientsp = clients;
    503 	return (0);
    504 }
    505 
    506 /*
    507  * Iterates through the members of a PHCI list, returning the entry
    508  * corresponding to the named PHCI resource.  Returns NULL when the lookup
    509  * fails.
    510  */
    511 static phci_list_t *
    512 lookup_phci(char *rsrc)
    513 {
    514 	phci_list_t *reg;
    515 
    516 	for (reg = reg_list; reg != NULL; reg = reg->next) {
    517 		if (strcmp(reg->phci.path, rsrc) == 0)
    518 			return (reg);
    519 	}
    520 
    521 	return (NULL);
    522 }
    523 
    524 /*
    525  * Tests whether or not an operation on a specific PHCI resource would affect
    526  * the array of client devices attached to the PHCI's MPxIO group.
    527  *
    528  * Returns: 1 if clients would be affected, 0 if not.
    529  */
    530 static int
    531 detect_client_change(rcm_handle_t *hdl, int cmd, int flags, group_t *group,
    532     char *rsrc)
    533 {
    534 	int i;
    535 	int state;
    536 
    537 	/*
    538 	 * Perform a full set analysis on the set of redundant PHCIs.  When
    539 	 * there are no unaffected and online PHCIs, then changing the state
    540 	 * of the named PHCI results in a client state change.
    541 	 */
    542 	for (i = 0; i < group->nphcis; i++) {
    543 
    544 		/* Filter the named resource out of the analysis */
    545 		if (strcmp(group->phcis[i].path, rsrc) == 0)
    546 			continue;
    547 
    548 		/*
    549 		 * If we find a path that's in the ONLINE or STANDBY state
    550 		 * that would be left over in the system after completing
    551 		 * whatever DR or hotplugging operation is in progress, then
    552 		 * return a 0.
    553 		 */
    554 		if ((group->phcis[i].state == DI_PATH_STATE_ONLINE) ||
    555 		    (group->phcis[i].state == DI_PATH_STATE_STANDBY)) {
    556 			if (rcm_get_rsrcstate(hdl, group->phcis[i].path, &state)
    557 			    != RCM_SUCCESS) {
    558 				rcm_log_message(RCM_ERROR,
    559 				    "MPXIO: Failed to query resource state\n");
    560 				continue;
    561 			}
    562 			rcm_log_message(RCM_TRACE2, "MPXIO: state of %s: %d\n",
    563 			    group->phcis[i].path, state);
    564 			if (state == RCM_STATE_ONLINE) {
    565 				return (0);
    566 			}
    567 		}
    568 	}
    569 
    570 	/*
    571 	 * The analysis above didn't find a redundant path to take over.  So
    572 	 * report that the state of the client resources will change.
    573 	 */
    574 	return (1);
    575 }
    576 
    577 /*
    578  * Merges the client disks connected to a particular MPxIO group in with a
    579  * previous array of disk clients.  The result is to adjust the 'nclients'
    580  * value with the new count of disks in the array, and to adjust the 'disks'
    581  * value to be a larger array of disks including its original contents along
    582  * with the current group's contents merged in.
    583  */
    584 static int
    585 merge_clients(int *nclients, char ***clientsp, group_t *group)
    586 {
    587 	int i;
    588 	int old_nclients;
    589 	char **clients_new;
    590 
    591 	if (group->nclients) {
    592 		old_nclients = *nclients;
    593 		*nclients += group->nclients;
    594 		clients_new = realloc(*clientsp,
    595 		    ((*nclients) + 1) * sizeof (char *));
    596 		if (clients_new == NULL) {
    597 			rcm_log_message(RCM_ERROR,
    598 			    "MPXIO: cannot reallocate client array (%s).\n",
    599 			    strerror(errno));
    600 			return (-1);
    601 		}
    602 		for (i = old_nclients; i < (*nclients); i++) {
    603 			/*
    604 			 * Don't allocate space for individual disks in the
    605 			 * merged list.  Just make references to the previously
    606 			 * allocated strings in the group_t structs themselves.
    607 			 */
    608 			clients_new[i] = group->clients[i - old_nclients];
    609 		}
    610 		clients_new[(*nclients)] = NULL;
    611 		*clientsp = clients_new;
    612 	}
    613 
    614 	return (0);
    615 }
    616 
    617 /*
    618  * A libdevinfo di_walk_node() callback.  It's passed an integer pointer as an
    619  * argument, and it increments the integer each time it encounters an MPxIO
    620  * client.  By initializing the integer to zero and doing a libdevinfo walk with
    621  * this function, the total count of MPxIO clients in the system can be found.
    622  */
    623 static int
    624 get_nclients(di_node_t dinode, void *arg)
    625 {
    626 	int *nclients = arg;
    627 
    628 	if (is_client(dinode))
    629 		(*nclients)++;
    630 
    631 	return (DI_WALK_CONTINUE);
    632 }
    633 
    634 /*
    635  * Tests a libdevinfo node to determine if it's an MPxIO client.
    636  *
    637  * Returns: non-zero for true, 0 for false.
    638  */
    639 static int
    640 is_client(di_node_t dinode)
    641 {
    642 	return (di_path_client_next_path(dinode, DI_PATH_NIL) != DI_PATH_NIL);
    643 }
    644 
    645 /*
    646  * After a new group_list has been constructed, this refreshes the RCM
    647  * registrations and the reg_list contents.  It uses a clock like algorithm
    648  * with reference bits in the reg_list to know which registrants are new or
    649  * old.
    650  */
    651 static void
    652 refresh_regs(rcm_handle_t *hdl)
    653 {
    654 	int i;
    655 	group_t *group;
    656 	phci_list_t *reg;
    657 	phci_list_t *prev_reg;
    658 
    659 	/*
    660 	 * First part of the clock-like algorithm: clear reference bits.
    661 	 */
    662 	for (reg = reg_list; reg != NULL; reg = reg->next)
    663 		reg->referenced = CACHE_STALE;
    664 
    665 	/*
    666 	 * Second part of the clock-like algorithm: set the reference bits
    667 	 * on every registrant that's still active.  (Also add new list nodes
    668 	 * for new registrants.)
    669 	 */
    670 	for (group = group_list; group != NULL; group = group->next) {
    671 		for (i = 0; i < group->nphcis; i++) {
    672 
    673 			/*
    674 			 * If already stale in the registrants list, just set
    675 			 * its reference bit to REFERENCED and update its state.
    676 			 */
    677 			if ((reg = lookup_phci(group->phcis[i].path)) != NULL) {
    678 				if (reg->referenced == CACHE_STALE)
    679 					reg->referenced = CACHE_REFERENCED;
    680 				reg->phci.state = group->phcis[i].state;
    681 				continue;
    682 			}
    683 
    684 			/*
    685 			 * Otherwise, build a new list node and mark it NEW.
    686 			 */
    687 			reg = (phci_list_t *)calloc(1, sizeof (*reg));
    688 			if (reg == NULL) {
    689 				rcm_log_message(RCM_ERROR,
    690 				    "MPXIO: cannot allocate phci_list (%s).\n",
    691 				    strerror(errno));
    692 				continue;
    693 			}
    694 			reg->phci.path = strdup(group->phcis[i].path);
    695 			if (reg->phci.path == NULL) {
    696 				free(reg);
    697 				rcm_log_message(RCM_ERROR,
    698 				    "MPXIO: cannot allocate phci path (%s).\n",
    699 				    strerror(errno));
    700 				continue;
    701 			}
    702 			reg->phci.state = group->phcis[i].state;
    703 			reg->referenced = CACHE_NEW;
    704 
    705 			/* Link it at the head of reg_list */
    706 			reg->next = reg_list;
    707 			reg_list = reg;
    708 		}
    709 	}
    710 
    711 	/*
    712 	 * Final part of the clock algorithm: unregister stale entries, and
    713 	 * register new entries.  Stale entries get removed from the list.
    714 	 */
    715 	reg = reg_list;
    716 	prev_reg = NULL;
    717 	while (reg) {
    718 
    719 		/* Unregister and remove stale entries. */
    720 		if (reg->referenced == CACHE_STALE) {
    721 			(void) rcm_unregister_interest(hdl, reg->phci.path, 0);
    722 			free(reg->phci.path);
    723 			if (prev_reg == NULL) {
    724 				reg_list = reg->next;
    725 				free(reg);
    726 				reg = reg_list;
    727 			} else {
    728 				prev_reg->next = reg->next;
    729 				free(reg);
    730 				reg = prev_reg->next;
    731 			}
    732 			continue;
    733 		}
    734 
    735 		/* Register new entries. */
    736 		if (reg->referenced == CACHE_NEW) {
    737 			if (rcm_register_interest(hdl, reg->phci.path, 0, NULL)
    738 			    != RCM_SUCCESS) {
    739 				rcm_log_message(RCM_ERROR,
    740 				    "MPXIO: failed to register %s (%s).\n",
    741 				    reg->phci.path, strerror(errno));
    742 			}
    743 		}
    744 
    745 		prev_reg = reg;
    746 		reg = reg->next;
    747 	}
    748 }
    749 
    750 
    751 /*
    752  * A libdevinfo di_walk_node() callback that builds up the MPxIO group list.
    753  *
    754  * Every node encountered that's a client node is added into a group's client
    755  * list.  Whenever a group doesn't already exist with a matching set of
    756  * related PHCIs, then a new group is constructed and put at the head of the
    757  * group list.
    758  */
    759 static int
    760 build_groups(di_node_t dinode, void *arg)
    761 {
    762 	int i = 0;
    763 	int nphcis = 0;
    764 	int *nclients = (int *)arg;
    765 	phci_t *phcis;
    766 	group_t *group;
    767 	di_node_t phcinode;
    768 	di_path_t dipath = DI_PATH_NIL;
    769 
    770 	/* Safety check */
    771 	if (nclients == NULL)
    772 		return (DI_WALK_TERMINATE);
    773 
    774 	/*
    775 	 * Build a sorted array of PHCIs pertaining to the client.
    776 	 */
    777 	while ((dipath =
    778 	    di_path_client_next_path(dinode, dipath)) != DI_PATH_NIL)
    779 		nphcis++;
    780 
    781 	/* Skip non-clients. */
    782 	if (nphcis == 0)
    783 		return (DI_WALK_CONTINUE);
    784 
    785 	if ((phcis = (phci_t *)calloc(nphcis, sizeof (phci_t))) == NULL) {
    786 		rcm_log_message(RCM_ERROR,
    787 		    "MPXIO: failed to allocate client's PHCIs (%s).\n",
    788 		    strerror(errno));
    789 		return (DI_WALK_TERMINATE);
    790 	}
    791 	while ((dipath =
    792 	    di_path_client_next_path(dinode, dipath)) != DI_PATH_NIL) {
    793 		phcinode = di_path_phci_node(dipath);
    794 		if (phcinode == DI_NODE_NIL) {
    795 			free_phcis(i, phcis);	/* free preceeding PHCIs */
    796 			rcm_log_message(RCM_ERROR,
    797 			    "MPXIO: client appears to have no PHCIs.\n");
    798 			return (DI_WALK_TERMINATE);
    799 		}
    800 		if ((phcis[i].path = get_rsrcname(phcinode)) == NULL) {
    801 			free_phcis(i, phcis);
    802 			return (DI_WALK_TERMINATE);
    803 		}
    804 		phcis[i].state = di_path_state(dipath);
    805 		i++;
    806 	}
    807 	qsort(phcis, nphcis, sizeof (phci_t), compare_phci);
    808 
    809 	/*
    810 	 * Compare that PHCI set to each existing group's set.  We just add
    811 	 * the client to the group and exit successfully once a match is made.
    812 	 * Falling out of this loop means no match was found.
    813 	 */
    814 	for (group = group_list; group != NULL; group = group->next) {
    815 
    816 		/* There is no match if the number of PHCIs is inequal */
    817 		if (nphcis != group->nphcis)
    818 			continue;
    819 
    820 		/* Compare the PHCIs linearly (which is okay; they're sorted) */
    821 		for (i = 0; i < nphcis; i++)
    822 			if (strcmp(phcis[i].path, group->phcis[i].path) != 0)
    823 				break;
    824 
    825 		/*
    826 		 * If the loop above completed, we have a match.  Add the client
    827 		 * to the group's disk array in that case, and return
    828 		 * successfully.
    829 		 */
    830 		if (i == nphcis) {
    831 			free_phcis(nphcis, phcis);
    832 			if ((group->clients[group->nclients] =
    833 			    get_rsrcname(dinode)) == NULL)
    834 				return (DI_WALK_TERMINATE);
    835 			group->nclients++;
    836 			return (DI_WALK_CONTINUE);
    837 		}
    838 	}
    839 
    840 	/* The loop above didn't find a match.  So build a new group. */
    841 	if ((group = (group_t *)calloc(1, sizeof (*group))) == NULL) {
    842 		rcm_log_message(RCM_ERROR,
    843 		    "MPXIO: failed to allocate PHCI group (%s).\n",
    844 		    strerror(errno));
    845 		free_phcis(nphcis, phcis);
    846 		return (DI_WALK_TERMINATE);
    847 	}
    848 	if ((group->clients = (char **)calloc(*nclients, sizeof (char *))) ==
    849 	    NULL) {
    850 		free(group);
    851 		free_phcis(nphcis, phcis);
    852 		return (DI_WALK_TERMINATE);
    853 	}
    854 	group->nphcis = nphcis;
    855 	group->phcis = phcis;
    856 	if ((group->clients[0] = get_rsrcname(dinode)) == NULL) {
    857 		free_group(group);
    858 		return (DI_WALK_TERMINATE);
    859 	}
    860 	group->nclients = 1;
    861 
    862 	/* Link the group into the group list and return successfully. */
    863 	group->next = group_list;
    864 	group_list = group;
    865 	return (DI_WALK_CONTINUE);
    866 }
    867 
    868 /*
    869  * For bsearch() and qsort().  Returns the results of a strcmp() on the names
    870  * of two phci_t's.
    871  */
    872 static int
    873 compare_phci(const void *arg1, const void *arg2)
    874 {
    875 	phci_t *p1 = (phci_t *)arg1;
    876 	phci_t *p2 = (phci_t *)arg2;
    877 
    878 	if ((p1 == NULL) || (p2 == NULL)) {
    879 		if (p1 != NULL)
    880 			return (-1);
    881 		else if (p2 != NULL)
    882 			return (1);
    883 		return (0);
    884 	}
    885 
    886 	return (strcmp(p1->path, p2->path));
    887 }
    888 
    889 /*
    890  * Free the whole list of group's in the global group_list.
    891  */
    892 static void
    893 free_grouplist()
    894 {
    895 	group_t *group = group_list;
    896 	group_t *next;
    897 
    898 	while (group) {
    899 		next = group->next;
    900 		free_group(group);
    901 		group = next;
    902 	}
    903 
    904 	group_list = NULL;
    905 }
    906 
    907 /*
    908  * Free the contents of a single group_t.
    909  */
    910 static void
    911 free_group(group_t *group)
    912 {
    913 	if (group) {
    914 		free_phcis(group->nphcis, group->phcis);
    915 		free_clients(group->nclients, group->clients);
    916 		free(group);
    917 	}
    918 }
    919 
    920 /*
    921  * Free an array of clients.
    922  */
    923 static void
    924 free_clients(int nclients, char **clients)
    925 {
    926 	int i;
    927 
    928 	if (clients != NULL) {
    929 		if (nclients > 0) {
    930 			for (i = 0; i < nclients; i++)
    931 				if (clients[i])
    932 					free(clients[i]);
    933 		}
    934 		free(clients);
    935 	}
    936 }
    937 
    938 /*
    939  * Free an array of phci_t's.
    940  */
    941 static void
    942 free_phcis(int nphcis, phci_t *phcis)
    943 {
    944 	int i;
    945 
    946 	if ((phcis != NULL) && (nphcis > 0)) {
    947 		for (i = 0; i < nphcis; i++)
    948 			if (phcis[i].path)
    949 				free(phcis[i].path);
    950 		free(phcis);
    951 	}
    952 }
    953 
    954 /*
    955  * Converts a libdevinfo node into a /devices path.  Caller must free results.
    956  */
    957 static char *
    958 get_rsrcname(di_node_t dinode)
    959 {
    960 	int len;
    961 	char *rsrcname;
    962 	char *devfspath;
    963 	char name[MAXPATHLEN];
    964 
    965 	if ((devfspath = di_devfs_path(dinode)) == NULL) {
    966 		rcm_log_message(RCM_ERROR, "MPXIO: resource has null path.\n");
    967 		return (NULL);
    968 	}
    969 
    970 	len = snprintf(name, sizeof (name), "/devices%s", devfspath);
    971 	di_devfs_path_free(devfspath);
    972 	if (len >= sizeof (name)) {
    973 		rcm_log_message(RCM_ERROR, "MPXIO: resource path too long.\n");
    974 		return (NULL);
    975 	}
    976 
    977 	if ((rsrcname = strdup(name)) == NULL)
    978 		rcm_log_message(RCM_ERROR,
    979 		    "MPXIO: failed to allocate resource name (%s).\n",
    980 		    strerror(errno));
    981 
    982 	return (rsrcname);
    983 }
    984