Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
     28  * detailed discussion of the overall mpxio architecture.
     29  *
     30  * Default locking order:
     31  *
     32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
     33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
     34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
     35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
     36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
     37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
     38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
     39  */
     40 
     41 #include <sys/note.h>
     42 #include <sys/types.h>
     43 #include <sys/varargs.h>
     44 #include <sys/param.h>
     45 #include <sys/errno.h>
     46 #include <sys/uio.h>
     47 #include <sys/buf.h>
     48 #include <sys/modctl.h>
     49 #include <sys/open.h>
     50 #include <sys/kmem.h>
     51 #include <sys/poll.h>
     52 #include <sys/conf.h>
     53 #include <sys/bootconf.h>
     54 #include <sys/cmn_err.h>
     55 #include <sys/stat.h>
     56 #include <sys/ddi.h>
     57 #include <sys/sunddi.h>
     58 #include <sys/ddipropdefs.h>
     59 #include <sys/sunndi.h>
     60 #include <sys/ndi_impldefs.h>
     61 #include <sys/promif.h>
     62 #include <sys/sunmdi.h>
     63 #include <sys/mdi_impldefs.h>
     64 #include <sys/taskq.h>
     65 #include <sys/epm.h>
     66 #include <sys/sunpm.h>
     67 #include <sys/modhash.h>
     68 #include <sys/disp.h>
     69 #include <sys/autoconf.h>
     70 #include <sys/sysmacros.h>
     71 
     72 #ifdef	DEBUG
     73 #include <sys/debug.h>
     74 int	mdi_debug = 1;
     75 int	mdi_debug_logonly = 0;
     76 #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
     77 #define	MDI_WARN	CE_WARN, __func__
     78 #define	MDI_NOTE	CE_NOTE, __func__
     79 #define	MDI_CONT	CE_CONT, __func__
     80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
     81 #else	/* !DEBUG */
     82 #define	MDI_DEBUG(dbglevel, pargs)
     83 #endif	/* DEBUG */
     84 int	mdi_debug_consoleonly = 0;
     85 int	mdi_delay = 3;
     86 
     87 extern pri_t	minclsyspri;
     88 extern int	modrootloaded;
     89 
     90 /*
     91  * Global mutex:
     92  * Protects vHCI list and structure members.
     93  */
     94 kmutex_t	mdi_mutex;
     95 
     96 /*
     97  * Registered vHCI class driver lists
     98  */
     99 int		mdi_vhci_count;
    100 mdi_vhci_t	*mdi_vhci_head;
    101 mdi_vhci_t	*mdi_vhci_tail;
    102 
    103 /*
    104  * Client Hash Table size
    105  */
    106 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
    107 
    108 /*
    109  * taskq interface definitions
    110  */
    111 #define	MDI_TASKQ_N_THREADS	8
    112 #define	MDI_TASKQ_PRI		minclsyspri
    113 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
    114 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
    115 
    116 taskq_t				*mdi_taskq;
    117 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
    118 
    119 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
    120 
    121 /*
    122  * The data should be "quiet" for this interval (in seconds) before the
    123  * vhci cached data is flushed to the disk.
    124  */
    125 static int mdi_vhcache_flush_delay = 10;
    126 
    127 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
    128 static int mdi_vhcache_flush_daemon_idle_time = 60;
    129 
    130 /*
    131  * MDI falls back to discovery of all paths when a bus_config_one fails.
    132  * The following parameters can be used to tune this operation.
    133  *
    134  * mdi_path_discovery_boot
    135  *	Number of times path discovery will be attempted during early boot.
    136  *	Probably there is no reason to ever set this value to greater than one.
    137  *
    138  * mdi_path_discovery_postboot
    139  *	Number of times path discovery will be attempted after early boot.
    140  *	Set it to a minimum of two to allow for discovery of iscsi paths which
    141  *	may happen very late during booting.
    142  *
    143  * mdi_path_discovery_interval
    144  *	Minimum number of seconds MDI will wait between successive discovery
    145  *	of all paths. Set it to -1 to disable discovery of all paths.
    146  */
    147 static int mdi_path_discovery_boot = 1;
    148 static int mdi_path_discovery_postboot = 2;
    149 static int mdi_path_discovery_interval = 10;
    150 
    151 /*
    152  * number of seconds the asynchronous configuration thread will sleep idle
    153  * before exiting.
    154  */
    155 static int mdi_async_config_idle_time = 600;
    156 
    157 static int mdi_bus_config_cache_hash_size = 256;
    158 
    159 /* turns off multithreaded configuration for certain operations */
    160 static int mdi_mtc_off = 0;
    161 
    162 /*
    163  * The "path" to a pathinfo node is identical to the /devices path to a
    164  * devinfo node had the device been enumerated under a pHCI instead of
    165  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
    166  * This association persists across create/delete of the pathinfo nodes,
    167  * but not across reboot.
    168  */
    169 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
    170 static int		mdi_pathmap_hash_size = 256;
    171 static kmutex_t		mdi_pathmap_mutex;
    172 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
    173 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
    174 static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
    175 
    176 /*
    177  * MDI component property name/value string definitions
    178  */
    179 const char 		*mdi_component_prop = "mpxio-component";
    180 const char		*mdi_component_prop_vhci = "vhci";
    181 const char		*mdi_component_prop_phci = "phci";
    182 const char		*mdi_component_prop_client = "client";
    183 
    184 /*
    185  * MDI client global unique identifier property name
    186  */
    187 const char		*mdi_client_guid_prop = "client-guid";
    188 
    189 /*
    190  * MDI client load balancing property name/value string definitions
    191  */
    192 const char		*mdi_load_balance = "load-balance";
    193 const char		*mdi_load_balance_none = "none";
    194 const char		*mdi_load_balance_rr = "round-robin";
    195 const char		*mdi_load_balance_lba = "logical-block";
    196 
    197 /*
    198  * Obsolete vHCI class definition; to be removed after Leadville update
    199  */
    200 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
    201 
    202 static char vhci_greeting[] =
    203 	"\tThere already exists one vHCI driver for class %s\n"
    204 	"\tOnly one vHCI driver for each class is allowed\n";
    205 
    206 /*
    207  * Static function prototypes
    208  */
    209 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
    210 static int		i_mdi_client_offline(dev_info_t *, uint_t);
    211 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
    212 static void		i_mdi_phci_post_detach(dev_info_t *,
    213 			    ddi_detach_cmd_t, int);
    214 static int		i_mdi_client_pre_detach(dev_info_t *,
    215 			    ddi_detach_cmd_t);
    216 static void		i_mdi_client_post_detach(dev_info_t *,
    217 			    ddi_detach_cmd_t, int);
    218 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
    219 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
    220 static int 		i_mdi_lba_lb(mdi_client_t *ct,
    221 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
    222 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
    223 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
    224 static void		i_mdi_pm_reset_client(mdi_client_t *);
    225 static int		i_mdi_power_all_phci(mdi_client_t *);
    226 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
    227 
    228 
    229 /*
    230  * Internal mdi_pathinfo node functions
    231  */
    232 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
    233 
    234 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
    235 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
    236 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
    237 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
    238 static void		i_mdi_phci_unlock(mdi_phci_t *);
    239 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
    240 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
    241 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
    242 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
    243 			    mdi_client_t *);
    244 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
    245 static void		i_mdi_client_remove_path(mdi_client_t *,
    246 			    mdi_pathinfo_t *);
    247 
    248 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
    249 			    mdi_pathinfo_state_t, int);
    250 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
    251 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
    252 			    char **, int);
    253 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
    254 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
    255 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
    256 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
    257 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
    258 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
    259 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
    260 static void		i_mdi_client_update_state(mdi_client_t *);
    261 static int		i_mdi_client_compute_state(mdi_client_t *,
    262 			    mdi_phci_t *);
    263 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
    264 static void		i_mdi_client_unlock(mdi_client_t *);
    265 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
    266 static mdi_client_t	*i_devi_get_client(dev_info_t *);
    267 /*
    268  * NOTE: this will be removed once the NWS files are changed to use the new
    269  * mdi_{enable,disable}_path interfaces
    270  */
    271 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
    272 				int, int);
    273 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
    274 				mdi_vhci_t *vh, int flags, int op);
    275 /*
    276  * Failover related function prototypes
    277  */
    278 static int		i_mdi_failover(void *);
    279 
    280 /*
    281  * misc internal functions
    282  */
    283 static int		i_mdi_get_hash_key(char *);
    284 static int		i_map_nvlist_error_to_mdi(int);
    285 static void		i_mdi_report_path_state(mdi_client_t *,
    286 			    mdi_pathinfo_t *);
    287 
    288 static void		setup_vhci_cache(mdi_vhci_t *);
    289 static int		destroy_vhci_cache(mdi_vhci_t *);
    290 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
    291 static boolean_t	stop_vhcache_flush_thread(void *, int);
    292 static void		free_string_array(char **, int);
    293 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
    294 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
    295 static void		free_vhcache_client(mdi_vhcache_client_t *);
    296 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
    297 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
    298 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
    299 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
    300 static void		vhcache_pi_add(mdi_vhci_config_t *,
    301 			    struct mdi_pathinfo *);
    302 static void		vhcache_pi_remove(mdi_vhci_config_t *,
    303 			    struct mdi_pathinfo *);
    304 static void		free_phclient_path_list(mdi_phys_path_t *);
    305 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
    306 static int		flush_vhcache(mdi_vhci_config_t *, int);
    307 static void		vhcache_dirty(mdi_vhci_config_t *);
    308 static void		free_async_client_config(mdi_async_client_config_t *);
    309 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
    310 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
    311 static nvlist_t		*read_on_disk_vhci_cache(char *);
    312 extern int		fread_nvlist(char *, nvlist_t **);
    313 extern int		fwrite_nvlist(char *, nvlist_t *);
    314 
    315 /* called once when first vhci registers with mdi */
    316 static void
    317 i_mdi_init()
    318 {
    319 	static int initialized = 0;
    320 
    321 	if (initialized)
    322 		return;
    323 	initialized = 1;
    324 
    325 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
    326 
    327 	/* Create our taskq resources */
    328 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
    329 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
    330 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
    331 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
    332 
    333 	/* Allocate ['path_instance' <-> "path"] maps */
    334 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
    335 	mdi_pathmap_bypath = mod_hash_create_strhash(
    336 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
    337 	    mod_hash_null_valdtor);
    338 	mdi_pathmap_byinstance = mod_hash_create_idhash(
    339 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
    340 	    mod_hash_null_valdtor);
    341 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
    342 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
    343 	    mod_hash_null_valdtor);
    344 }
    345 
    346 /*
    347  * mdi_get_component_type():
    348  *		Return mpxio component type
    349  * Return Values:
    350  *		MDI_COMPONENT_NONE
    351  *		MDI_COMPONENT_VHCI
    352  *		MDI_COMPONENT_PHCI
    353  *		MDI_COMPONENT_CLIENT
    354  * XXX This doesn't work under multi-level MPxIO and should be
    355  *	removed when clients migrate mdi_component_is_*() interfaces.
    356  */
    357 int
    358 mdi_get_component_type(dev_info_t *dip)
    359 {
    360 	return (DEVI(dip)->devi_mdi_component);
    361 }
    362 
    363 /*
    364  * mdi_vhci_register():
    365  *		Register a vHCI module with the mpxio framework
    366  *		mdi_vhci_register() is called by vHCI drivers to register the
    367  *		'class_driver' vHCI driver and its MDI entrypoints with the
    368  *		mpxio framework.  The vHCI driver must call this interface as
    369  *		part of its attach(9e) handler.
    370  *		Competing threads may try to attach mdi_vhci_register() as
    371  *		the vHCI drivers are loaded and attached as a result of pHCI
    372  *		driver instance registration (mdi_phci_register()) with the
    373  *		framework.
    374  * Return Values:
    375  *		MDI_SUCCESS
    376  *		MDI_FAILURE
    377  */
    378 /*ARGSUSED*/
    379 int
    380 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
    381     int flags)
    382 {
    383 	mdi_vhci_t		*vh = NULL;
    384 
    385 	/* Registrant can't be older */
    386 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
    387 
    388 #ifdef DEBUG
    389 	/*
    390 	 * IB nexus driver is loaded only when IB hardware is present.
    391 	 * In order to be able to do this there is a need to drive the loading
    392 	 * and attaching of the IB nexus driver (especially when an IB hardware
    393 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
    394 	 * is being attached. Unfortunately this gets into the limitations
    395 	 * of devfs as there seems to be no clean way to drive configuration
    396 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
    397 	 * for IB.
    398 	 */
    399 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
    400 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
    401 #endif
    402 
    403 	i_mdi_init();
    404 
    405 	mutex_enter(&mdi_mutex);
    406 	/*
    407 	 * Scan for already registered vhci
    408 	 */
    409 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
    410 		if (strcmp(vh->vh_class, class) == 0) {
    411 			/*
    412 			 * vHCI has already been created.  Check for valid
    413 			 * vHCI ops registration.  We only support one vHCI
    414 			 * module per class
    415 			 */
    416 			if (vh->vh_ops != NULL) {
    417 				mutex_exit(&mdi_mutex);
    418 				cmn_err(CE_NOTE, vhci_greeting, class);
    419 				return (MDI_FAILURE);
    420 			}
    421 			break;
    422 		}
    423 	}
    424 
    425 	/*
    426 	 * if not yet created, create the vHCI component
    427 	 */
    428 	if (vh == NULL) {
    429 		struct client_hash	*hash = NULL;
    430 		char			*load_balance;
    431 
    432 		/*
    433 		 * Allocate and initialize the mdi extensions
    434 		 */
    435 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
    436 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
    437 		    KM_SLEEP);
    438 		vh->vh_client_table = hash;
    439 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
    440 		(void) strcpy(vh->vh_class, class);
    441 		vh->vh_lb = LOAD_BALANCE_RR;
    442 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
    443 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
    444 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
    445 				vh->vh_lb = LOAD_BALANCE_NONE;
    446 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
    447 				    == 0) {
    448 				vh->vh_lb = LOAD_BALANCE_LBA;
    449 			}
    450 			ddi_prop_free(load_balance);
    451 		}
    452 
    453 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
    454 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
    455 
    456 		/*
    457 		 * Store the vHCI ops vectors
    458 		 */
    459 		vh->vh_dip = vdip;
    460 		vh->vh_ops = vops;
    461 
    462 		setup_vhci_cache(vh);
    463 
    464 		if (mdi_vhci_head == NULL) {
    465 			mdi_vhci_head = vh;
    466 		}
    467 		if (mdi_vhci_tail) {
    468 			mdi_vhci_tail->vh_next = vh;
    469 		}
    470 		mdi_vhci_tail = vh;
    471 		mdi_vhci_count++;
    472 	}
    473 
    474 	/*
    475 	 * Claim the devfs node as a vhci component
    476 	 */
    477 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
    478 
    479 	/*
    480 	 * Initialize our back reference from dev_info node
    481 	 */
    482 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
    483 	mutex_exit(&mdi_mutex);
    484 	return (MDI_SUCCESS);
    485 }
    486 
    487 /*
    488  * mdi_vhci_unregister():
    489  *		Unregister a vHCI module from mpxio framework
    490  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
    491  * 		of a vhci to unregister it from the framework.
    492  * Return Values:
    493  *		MDI_SUCCESS
    494  *		MDI_FAILURE
    495  */
    496 /*ARGSUSED*/
    497 int
    498 mdi_vhci_unregister(dev_info_t *vdip, int flags)
    499 {
    500 	mdi_vhci_t	*found, *vh, *prev = NULL;
    501 
    502 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
    503 
    504 	/*
    505 	 * Check for invalid VHCI
    506 	 */
    507 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
    508 		return (MDI_FAILURE);
    509 
    510 	/*
    511 	 * Scan the list of registered vHCIs for a match
    512 	 */
    513 	mutex_enter(&mdi_mutex);
    514 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
    515 		if (found == vh)
    516 			break;
    517 		prev = found;
    518 	}
    519 
    520 	if (found == NULL) {
    521 		mutex_exit(&mdi_mutex);
    522 		return (MDI_FAILURE);
    523 	}
    524 
    525 	/*
    526 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
    527 	 * should have been unregistered, before a vHCI can be
    528 	 * unregistered.
    529 	 */
    530 	MDI_VHCI_PHCI_LOCK(vh);
    531 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
    532 		MDI_VHCI_PHCI_UNLOCK(vh);
    533 		mutex_exit(&mdi_mutex);
    534 		return (MDI_FAILURE);
    535 	}
    536 	MDI_VHCI_PHCI_UNLOCK(vh);
    537 
    538 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
    539 		mutex_exit(&mdi_mutex);
    540 		return (MDI_FAILURE);
    541 	}
    542 
    543 	/*
    544 	 * Remove the vHCI from the global list
    545 	 */
    546 	if (vh == mdi_vhci_head) {
    547 		mdi_vhci_head = vh->vh_next;
    548 	} else {
    549 		prev->vh_next = vh->vh_next;
    550 	}
    551 	if (vh == mdi_vhci_tail) {
    552 		mdi_vhci_tail = prev;
    553 	}
    554 	mdi_vhci_count--;
    555 	mutex_exit(&mdi_mutex);
    556 
    557 	vh->vh_ops = NULL;
    558 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
    559 	DEVI(vdip)->devi_mdi_xhci = NULL;
    560 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
    561 	kmem_free(vh->vh_client_table,
    562 	    mdi_client_table_size * sizeof (struct client_hash));
    563 	mutex_destroy(&vh->vh_phci_mutex);
    564 	mutex_destroy(&vh->vh_client_mutex);
    565 
    566 	kmem_free(vh, sizeof (mdi_vhci_t));
    567 	return (MDI_SUCCESS);
    568 }
    569 
    570 /*
    571  * i_mdi_vhci_class2vhci():
    572  *		Look for a matching vHCI module given a vHCI class name
    573  * Return Values:
    574  *		Handle to a vHCI component
    575  *		NULL
    576  */
    577 static mdi_vhci_t *
    578 i_mdi_vhci_class2vhci(char *class)
    579 {
    580 	mdi_vhci_t	*vh = NULL;
    581 
    582 	ASSERT(!MUTEX_HELD(&mdi_mutex));
    583 
    584 	mutex_enter(&mdi_mutex);
    585 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
    586 		if (strcmp(vh->vh_class, class) == 0) {
    587 			break;
    588 		}
    589 	}
    590 	mutex_exit(&mdi_mutex);
    591 	return (vh);
    592 }
    593 
    594 /*
    595  * i_devi_get_vhci():
    596  *		Utility function to get the handle to a vHCI component
    597  * Return Values:
    598  *		Handle to a vHCI component
    599  *		NULL
    600  */
    601 mdi_vhci_t *
    602 i_devi_get_vhci(dev_info_t *vdip)
    603 {
    604 	mdi_vhci_t	*vh = NULL;
    605 	if (MDI_VHCI(vdip)) {
    606 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
    607 	}
    608 	return (vh);
    609 }
    610 
    611 /*
    612  * mdi_phci_register():
    613  *		Register a pHCI module with mpxio framework
    614  *		mdi_phci_register() is called by pHCI drivers to register with
    615  *		the mpxio framework and a specific 'class_driver' vHCI.  The
    616  *		pHCI driver must call this interface as part of its attach(9e)
    617  *		handler.
    618  * Return Values:
    619  *		MDI_SUCCESS
    620  *		MDI_FAILURE
    621  */
    622 /*ARGSUSED*/
    623 int
    624 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
    625 {
    626 	mdi_phci_t		*ph;
    627 	mdi_vhci_t		*vh;
    628 	char			*data;
    629 
    630 	/*
    631 	 * Some subsystems, like fcp, perform pHCI registration from a
    632 	 * different thread than the one doing the pHCI attach(9E) - the
    633 	 * driver attach code is waiting for this other thread to complete.
    634 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
    635 	 * (indicating that some thread has done an ndi_devi_enter of parent)
    636 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
    637 	 */
    638 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
    639 
    640 	/*
    641 	 * Check for mpxio-disable property. Enable mpxio if the property is
    642 	 * missing or not set to "yes".
    643 	 * If the property is set to "yes" then emit a brief message.
    644 	 */
    645 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
    646 	    &data) == DDI_SUCCESS)) {
    647 		if (strcmp(data, "yes") == 0) {
    648 			MDI_DEBUG(1, (MDI_CONT, pdip,
    649 			    "?multipath capabilities disabled via %s.conf.",
    650 			    ddi_driver_name(pdip)));
    651 			ddi_prop_free(data);
    652 			return (MDI_FAILURE);
    653 		}
    654 		ddi_prop_free(data);
    655 	}
    656 
    657 	/*
    658 	 * Search for a matching vHCI
    659 	 */
    660 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
    661 	if (vh == NULL) {
    662 		return (MDI_FAILURE);
    663 	}
    664 
    665 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
    666 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
    667 	ph->ph_dip = pdip;
    668 	ph->ph_vhci = vh;
    669 	ph->ph_next = NULL;
    670 	ph->ph_unstable = 0;
    671 	ph->ph_vprivate = 0;
    672 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
    673 
    674 	MDI_PHCI_LOCK(ph);
    675 	MDI_PHCI_SET_POWER_UP(ph);
    676 	MDI_PHCI_UNLOCK(ph);
    677 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
    678 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
    679 
    680 	vhcache_phci_add(vh->vh_config, ph);
    681 
    682 	MDI_VHCI_PHCI_LOCK(vh);
    683 	if (vh->vh_phci_head == NULL) {
    684 		vh->vh_phci_head = ph;
    685 	}
    686 	if (vh->vh_phci_tail) {
    687 		vh->vh_phci_tail->ph_next = ph;
    688 	}
    689 	vh->vh_phci_tail = ph;
    690 	vh->vh_phci_count++;
    691 	MDI_VHCI_PHCI_UNLOCK(vh);
    692 
    693 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
    694 	return (MDI_SUCCESS);
    695 }
    696 
    697 /*
    698  * mdi_phci_unregister():
    699  *		Unregister a pHCI module from mpxio framework
    700  *		mdi_phci_unregister() is called by the pHCI drivers from their
    701  *		detach(9E) handler to unregister their instances from the
    702  *		framework.
    703  * Return Values:
    704  *		MDI_SUCCESS
    705  *		MDI_FAILURE
    706  */
    707 /*ARGSUSED*/
    708 int
    709 mdi_phci_unregister(dev_info_t *pdip, int flags)
    710 {
    711 	mdi_vhci_t		*vh;
    712 	mdi_phci_t		*ph;
    713 	mdi_phci_t		*tmp;
    714 	mdi_phci_t		*prev = NULL;
    715 	mdi_pathinfo_t		*pip;
    716 
    717 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
    718 
    719 	ph = i_devi_get_phci(pdip);
    720 	if (ph == NULL) {
    721 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
    722 		return (MDI_FAILURE);
    723 	}
    724 
    725 	vh = ph->ph_vhci;
    726 	ASSERT(vh != NULL);
    727 	if (vh == NULL) {
    728 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
    729 		return (MDI_FAILURE);
    730 	}
    731 
    732 	MDI_VHCI_PHCI_LOCK(vh);
    733 	tmp = vh->vh_phci_head;
    734 	while (tmp) {
    735 		if (tmp == ph) {
    736 			break;
    737 		}
    738 		prev = tmp;
    739 		tmp = tmp->ph_next;
    740 	}
    741 
    742 	if (ph == vh->vh_phci_head) {
    743 		vh->vh_phci_head = ph->ph_next;
    744 	} else {
    745 		prev->ph_next = ph->ph_next;
    746 	}
    747 
    748 	if (ph == vh->vh_phci_tail) {
    749 		vh->vh_phci_tail = prev;
    750 	}
    751 
    752 	vh->vh_phci_count--;
    753 	MDI_VHCI_PHCI_UNLOCK(vh);
    754 
    755 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
    756 	MDI_PHCI_LOCK(ph);
    757 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
    758 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
    759 		MDI_PI(pip)->pi_phci = NULL;
    760 	MDI_PHCI_UNLOCK(ph);
    761 
    762 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
    763 	    ESC_DDI_INITIATOR_UNREGISTER);
    764 	vhcache_phci_remove(vh->vh_config, ph);
    765 	cv_destroy(&ph->ph_unstable_cv);
    766 	mutex_destroy(&ph->ph_mutex);
    767 	kmem_free(ph, sizeof (mdi_phci_t));
    768 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
    769 	DEVI(pdip)->devi_mdi_xhci = NULL;
    770 	return (MDI_SUCCESS);
    771 }
    772 
    773 /*
    774  * i_devi_get_phci():
    775  * 		Utility function to return the phci extensions.
    776  */
    777 static mdi_phci_t *
    778 i_devi_get_phci(dev_info_t *pdip)
    779 {
    780 	mdi_phci_t	*ph = NULL;
    781 
    782 	if (MDI_PHCI(pdip)) {
    783 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
    784 	}
    785 	return (ph);
    786 }
    787 
    788 /*
    789  * Single thread mdi entry into devinfo node for modifying its children.
    790  * If necessary we perform an ndi_devi_enter of the vHCI before doing
    791  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
    792  * for the vHCI and one for the pHCI.
    793  */
    794 void
    795 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
    796 {
    797 	dev_info_t	*vdip;
    798 	int		vcircular, pcircular;
    799 
    800 	/* Verify calling context */
    801 	ASSERT(MDI_PHCI(phci_dip));
    802 	vdip = mdi_devi_get_vdip(phci_dip);
    803 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    804 
    805 	/*
    806 	 * If pHCI is detaching then the framework has already entered the
    807 	 * vHCI on a threads that went down the code path leading to
    808 	 * detach_node().  This framework enter of the vHCI during pHCI
    809 	 * detach is done to avoid deadlock with vHCI power management
    810 	 * operations which enter the vHCI and the enter down the path
    811 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
    812 	 * enter of the vHCI on frameworks vHCI enter that has already
    813 	 * occurred - this is OK because we know that the framework thread
    814 	 * doing detach is waiting for our completion.
    815 	 *
    816 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
    817 	 * race with detach - but we can't do that because the framework has
    818 	 * already entered the parent, so we have some complexity instead.
    819 	 */
    820 	for (;;) {
    821 		if (ndi_devi_tryenter(vdip, &vcircular)) {
    822 			ASSERT(vcircular != -1);
    823 			if (DEVI_IS_DETACHING(phci_dip)) {
    824 				ndi_devi_exit(vdip, vcircular);
    825 				vcircular = -1;
    826 			}
    827 			break;
    828 		} else if (DEVI_IS_DETACHING(phci_dip)) {
    829 			vcircular = -1;
    830 			break;
    831 		} else if (servicing_interrupt()) {
    832 			/*
    833 			 * Don't delay an interrupt (and ensure adaptive
    834 			 * mutex inversion support).
    835 			 */
    836 			ndi_devi_enter(vdip, &vcircular);
    837 			break;
    838 		} else {
    839 			delay_random(mdi_delay);
    840 		}
    841 	}
    842 
    843 	ndi_devi_enter(phci_dip, &pcircular);
    844 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
    845 }
    846 
    847 /*
    848  * Attempt to mdi_devi_enter.
    849  */
    850 int
    851 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
    852 {
    853 	dev_info_t	*vdip;
    854 	int		vcircular, pcircular;
    855 
    856 	/* Verify calling context */
    857 	ASSERT(MDI_PHCI(phci_dip));
    858 	vdip = mdi_devi_get_vdip(phci_dip);
    859 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    860 
    861 	if (ndi_devi_tryenter(vdip, &vcircular)) {
    862 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
    863 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
    864 			return (1);	/* locked */
    865 		}
    866 		ndi_devi_exit(vdip, vcircular);
    867 	}
    868 	return (0);			/* busy */
    869 }
    870 
    871 /*
    872  * Release mdi_devi_enter or successful mdi_devi_tryenter.
    873  */
    874 void
    875 mdi_devi_exit(dev_info_t *phci_dip, int circular)
    876 {
    877 	dev_info_t	*vdip;
    878 	int		vcircular, pcircular;
    879 
    880 	/* Verify calling context */
    881 	ASSERT(MDI_PHCI(phci_dip));
    882 	vdip = mdi_devi_get_vdip(phci_dip);
    883 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    884 
    885 	/* extract two circular recursion values from single int */
    886 	pcircular = (short)(circular & 0xFFFF);
    887 	vcircular = (short)((circular >> 16) & 0xFFFF);
    888 
    889 	ndi_devi_exit(phci_dip, pcircular);
    890 	if (vcircular != -1)
    891 		ndi_devi_exit(vdip, vcircular);
    892 }
    893 
    894 /*
    895  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
    896  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
    897  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
    898  * with vHCI power management code during path online/offline.  Each
    899  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
    900  * occur within the scope of an active mdi_devi_enter that establishes the
    901  * circular value.
    902  */
    903 void
    904 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
    905 {
    906 	int		pcircular;
    907 
    908 	/* Verify calling context */
    909 	ASSERT(MDI_PHCI(phci_dip));
    910 
    911 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
    912 	ndi_hold_devi(phci_dip);
    913 
    914 	pcircular = (short)(circular & 0xFFFF);
    915 	ndi_devi_exit(phci_dip, pcircular);
    916 }
    917 
    918 void
    919 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
    920 {
    921 	int		pcircular;
    922 
    923 	/* Verify calling context */
    924 	ASSERT(MDI_PHCI(phci_dip));
    925 
    926 	ndi_devi_enter(phci_dip, &pcircular);
    927 
    928 	/* Drop hold from mdi_devi_exit_phci. */
    929 	ndi_rele_devi(phci_dip);
    930 
    931 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
    932 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
    933 }
    934 
    935 /*
    936  * mdi_devi_get_vdip():
    937  *		given a pHCI dip return vHCI dip
    938  */
    939 dev_info_t *
    940 mdi_devi_get_vdip(dev_info_t *pdip)
    941 {
    942 	mdi_phci_t	*ph;
    943 
    944 	ph = i_devi_get_phci(pdip);
    945 	if (ph && ph->ph_vhci)
    946 		return (ph->ph_vhci->vh_dip);
    947 	return (NULL);
    948 }
    949 
    950 /*
    951  * mdi_devi_pdip_entered():
    952  *		Return 1 if we are vHCI and have done an ndi_devi_enter
    953  *		of a pHCI
    954  */
    955 int
    956 mdi_devi_pdip_entered(dev_info_t *vdip)
    957 {
    958 	mdi_vhci_t	*vh;
    959 	mdi_phci_t	*ph;
    960 
    961 	vh = i_devi_get_vhci(vdip);
    962 	if (vh == NULL)
    963 		return (0);
    964 
    965 	MDI_VHCI_PHCI_LOCK(vh);
    966 	ph = vh->vh_phci_head;
    967 	while (ph) {
    968 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
    969 			MDI_VHCI_PHCI_UNLOCK(vh);
    970 			return (1);
    971 		}
    972 		ph = ph->ph_next;
    973 	}
    974 	MDI_VHCI_PHCI_UNLOCK(vh);
    975 	return (0);
    976 }
    977 
    978 /*
    979  * mdi_phci_path2devinfo():
    980  * 		Utility function to search for a valid phci device given
    981  *		the devfs pathname.
    982  */
    983 dev_info_t *
    984 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
    985 {
    986 	char		*temp_pathname;
    987 	mdi_vhci_t	*vh;
    988 	mdi_phci_t	*ph;
    989 	dev_info_t 	*pdip = NULL;
    990 
    991 	vh = i_devi_get_vhci(vdip);
    992 	ASSERT(vh != NULL);
    993 
    994 	if (vh == NULL) {
    995 		/*
    996 		 * Invalid vHCI component, return failure
    997 		 */
    998 		return (NULL);
    999 	}
   1000 
   1001 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1002 	MDI_VHCI_PHCI_LOCK(vh);
   1003 	ph = vh->vh_phci_head;
   1004 	while (ph != NULL) {
   1005 		pdip = ph->ph_dip;
   1006 		ASSERT(pdip != NULL);
   1007 		*temp_pathname = '\0';
   1008 		(void) ddi_pathname(pdip, temp_pathname);
   1009 		if (strcmp(temp_pathname, pathname) == 0) {
   1010 			break;
   1011 		}
   1012 		ph = ph->ph_next;
   1013 	}
   1014 	if (ph == NULL) {
   1015 		pdip = NULL;
   1016 	}
   1017 	MDI_VHCI_PHCI_UNLOCK(vh);
   1018 	kmem_free(temp_pathname, MAXPATHLEN);
   1019 	return (pdip);
   1020 }
   1021 
   1022 /*
   1023  * mdi_phci_get_path_count():
   1024  * 		get number of path information nodes associated with a given
   1025  *		pHCI device.
   1026  */
   1027 int
   1028 mdi_phci_get_path_count(dev_info_t *pdip)
   1029 {
   1030 	mdi_phci_t	*ph;
   1031 	int		count = 0;
   1032 
   1033 	ph = i_devi_get_phci(pdip);
   1034 	if (ph != NULL) {
   1035 		count = ph->ph_path_count;
   1036 	}
   1037 	return (count);
   1038 }
   1039 
   1040 /*
   1041  * i_mdi_phci_lock():
   1042  *		Lock a pHCI device
   1043  * Return Values:
   1044  *		None
   1045  * Note:
   1046  *		The default locking order is:
   1047  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
   1048  *		But there are number of situations where locks need to be
   1049  *		grabbed in reverse order.  This routine implements try and lock
   1050  *		mechanism depending on the requested parameter option.
   1051  */
   1052 static void
   1053 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   1054 {
   1055 	if (pip) {
   1056 		/* Reverse locking is requested. */
   1057 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
   1058 			if (servicing_interrupt()) {
   1059 				MDI_PI_HOLD(pip);
   1060 				MDI_PI_UNLOCK(pip);
   1061 				MDI_PHCI_LOCK(ph);
   1062 				MDI_PI_LOCK(pip);
   1063 				MDI_PI_RELE(pip);
   1064 				break;
   1065 			} else {
   1066 				/*
   1067 				 * tryenter failed. Try to grab again
   1068 				 * after a small delay
   1069 				 */
   1070 				MDI_PI_HOLD(pip);
   1071 				MDI_PI_UNLOCK(pip);
   1072 				delay_random(mdi_delay);
   1073 				MDI_PI_LOCK(pip);
   1074 				MDI_PI_RELE(pip);
   1075 			}
   1076 		}
   1077 	} else {
   1078 		MDI_PHCI_LOCK(ph);
   1079 	}
   1080 }
   1081 
   1082 /*
   1083  * i_mdi_phci_unlock():
   1084  *		Unlock the pHCI component
   1085  */
   1086 static void
   1087 i_mdi_phci_unlock(mdi_phci_t *ph)
   1088 {
   1089 	MDI_PHCI_UNLOCK(ph);
   1090 }
   1091 
   1092 /*
   1093  * i_mdi_devinfo_create():
   1094  *		create client device's devinfo node
   1095  * Return Values:
   1096  *		dev_info
   1097  *		NULL
   1098  * Notes:
   1099  */
   1100 static dev_info_t *
   1101 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
   1102 	char **compatible, int ncompatible)
   1103 {
   1104 	dev_info_t *cdip = NULL;
   1105 
   1106 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1107 
   1108 	/* Verify for duplicate entry */
   1109 	cdip = i_mdi_devinfo_find(vh, name, guid);
   1110 	ASSERT(cdip == NULL);
   1111 	if (cdip) {
   1112 		cmn_err(CE_WARN,
   1113 		    "i_mdi_devinfo_create: client %s@%s already exists",
   1114 			name ? name : "", guid ? guid : "");
   1115 	}
   1116 
   1117 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
   1118 	if (cdip == NULL)
   1119 		goto fail;
   1120 
   1121 	/*
   1122 	 * Create component type and Global unique identifier
   1123 	 * properties
   1124 	 */
   1125 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
   1126 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
   1127 		goto fail;
   1128 	}
   1129 
   1130 	/* Decorate the node with compatible property */
   1131 	if (compatible &&
   1132 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
   1133 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
   1134 		goto fail;
   1135 	}
   1136 
   1137 	return (cdip);
   1138 
   1139 fail:
   1140 	if (cdip) {
   1141 		(void) ndi_prop_remove_all(cdip);
   1142 		(void) ndi_devi_free(cdip);
   1143 	}
   1144 	return (NULL);
   1145 }
   1146 
   1147 /*
   1148  * i_mdi_devinfo_find():
   1149  *		Find a matching devinfo node for given client node name
   1150  *		and its guid.
   1151  * Return Values:
   1152  *		Handle to a dev_info node or NULL
   1153  */
   1154 static dev_info_t *
   1155 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
   1156 {
   1157 	char			*data;
   1158 	dev_info_t 		*cdip = NULL;
   1159 	dev_info_t 		*ndip = NULL;
   1160 	int			circular;
   1161 
   1162 	ndi_devi_enter(vh->vh_dip, &circular);
   1163 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
   1164 	while ((cdip = ndip) != NULL) {
   1165 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
   1166 
   1167 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
   1168 			continue;
   1169 		}
   1170 
   1171 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
   1172 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
   1173 		    &data) != DDI_PROP_SUCCESS) {
   1174 			continue;
   1175 		}
   1176 
   1177 		if (strcmp(data, guid) != 0) {
   1178 			ddi_prop_free(data);
   1179 			continue;
   1180 		}
   1181 		ddi_prop_free(data);
   1182 		break;
   1183 	}
   1184 	ndi_devi_exit(vh->vh_dip, circular);
   1185 	return (cdip);
   1186 }
   1187 
   1188 /*
   1189  * i_mdi_devinfo_remove():
   1190  *		Remove a client device node
   1191  */
   1192 static int
   1193 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
   1194 {
   1195 	int	rv = MDI_SUCCESS;
   1196 
   1197 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
   1198 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
   1199 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
   1200 		if (rv != NDI_SUCCESS) {
   1201 			MDI_DEBUG(1, (MDI_NOTE, cdip,
   1202 			    "!failed: cdip %p", (void *)cdip));
   1203 		}
   1204 		/*
   1205 		 * Convert to MDI error code
   1206 		 */
   1207 		switch (rv) {
   1208 		case NDI_SUCCESS:
   1209 			rv = MDI_SUCCESS;
   1210 			break;
   1211 		case NDI_BUSY:
   1212 			rv = MDI_BUSY;
   1213 			break;
   1214 		default:
   1215 			rv = MDI_FAILURE;
   1216 			break;
   1217 		}
   1218 	}
   1219 	return (rv);
   1220 }
   1221 
   1222 /*
   1223  * i_devi_get_client()
   1224  *		Utility function to get mpxio component extensions
   1225  */
   1226 static mdi_client_t *
   1227 i_devi_get_client(dev_info_t *cdip)
   1228 {
   1229 	mdi_client_t	*ct = NULL;
   1230 
   1231 	if (MDI_CLIENT(cdip)) {
   1232 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
   1233 	}
   1234 	return (ct);
   1235 }
   1236 
   1237 /*
   1238  * i_mdi_is_child_present():
   1239  *		Search for the presence of client device dev_info node
   1240  */
   1241 static int
   1242 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
   1243 {
   1244 	int		rv = MDI_FAILURE;
   1245 	struct dev_info	*dip;
   1246 	int		circular;
   1247 
   1248 	ndi_devi_enter(vdip, &circular);
   1249 	dip = DEVI(vdip)->devi_child;
   1250 	while (dip) {
   1251 		if (dip == DEVI(cdip)) {
   1252 			rv = MDI_SUCCESS;
   1253 			break;
   1254 		}
   1255 		dip = dip->devi_sibling;
   1256 	}
   1257 	ndi_devi_exit(vdip, circular);
   1258 	return (rv);
   1259 }
   1260 
   1261 
   1262 /*
   1263  * i_mdi_client_lock():
   1264  *		Grab client component lock
   1265  * Return Values:
   1266  *		None
   1267  * Note:
   1268  *		The default locking order is:
   1269  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
   1270  *		But there are number of situations where locks need to be
   1271  *		grabbed in reverse order.  This routine implements try and lock
   1272  *		mechanism depending on the requested parameter option.
   1273  */
   1274 static void
   1275 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
   1276 {
   1277 	if (pip) {
   1278 		/*
   1279 		 * Reverse locking is requested.
   1280 		 */
   1281 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
   1282 			if (servicing_interrupt()) {
   1283 				MDI_PI_HOLD(pip);
   1284 				MDI_PI_UNLOCK(pip);
   1285 				MDI_CLIENT_LOCK(ct);
   1286 				MDI_PI_LOCK(pip);
   1287 				MDI_PI_RELE(pip);
   1288 				break;
   1289 			} else {
   1290 				/*
   1291 				 * tryenter failed. Try to grab again
   1292 				 * after a small delay
   1293 				 */
   1294 				MDI_PI_HOLD(pip);
   1295 				MDI_PI_UNLOCK(pip);
   1296 				delay_random(mdi_delay);
   1297 				MDI_PI_LOCK(pip);
   1298 				MDI_PI_RELE(pip);
   1299 			}
   1300 		}
   1301 	} else {
   1302 		MDI_CLIENT_LOCK(ct);
   1303 	}
   1304 }
   1305 
   1306 /*
   1307  * i_mdi_client_unlock():
   1308  *		Unlock a client component
   1309  */
   1310 static void
   1311 i_mdi_client_unlock(mdi_client_t *ct)
   1312 {
   1313 	MDI_CLIENT_UNLOCK(ct);
   1314 }
   1315 
   1316 /*
   1317  * i_mdi_client_alloc():
   1318  * 		Allocate and initialize a client structure.  Caller should
   1319  *		hold the vhci client lock.
   1320  * Return Values:
   1321  *		Handle to a client component
   1322  */
   1323 /*ARGSUSED*/
   1324 static mdi_client_t *
   1325 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
   1326 {
   1327 	mdi_client_t	*ct;
   1328 
   1329 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1330 
   1331 	/*
   1332 	 * Allocate and initialize a component structure.
   1333 	 */
   1334 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
   1335 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
   1336 	ct->ct_hnext = NULL;
   1337 	ct->ct_hprev = NULL;
   1338 	ct->ct_dip = NULL;
   1339 	ct->ct_vhci = vh;
   1340 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
   1341 	(void) strcpy(ct->ct_drvname, name);
   1342 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
   1343 	(void) strcpy(ct->ct_guid, lguid);
   1344 	ct->ct_cprivate = NULL;
   1345 	ct->ct_vprivate = NULL;
   1346 	ct->ct_flags = 0;
   1347 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
   1348 	MDI_CLIENT_LOCK(ct);
   1349 	MDI_CLIENT_SET_OFFLINE(ct);
   1350 	MDI_CLIENT_SET_DETACH(ct);
   1351 	MDI_CLIENT_SET_POWER_UP(ct);
   1352 	MDI_CLIENT_UNLOCK(ct);
   1353 	ct->ct_failover_flags = 0;
   1354 	ct->ct_failover_status = 0;
   1355 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
   1356 	ct->ct_unstable = 0;
   1357 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
   1358 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
   1359 	ct->ct_lb = vh->vh_lb;
   1360 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
   1361 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
   1362 	ct->ct_path_count = 0;
   1363 	ct->ct_path_head = NULL;
   1364 	ct->ct_path_tail = NULL;
   1365 	ct->ct_path_last = NULL;
   1366 
   1367 	/*
   1368 	 * Add this client component to our client hash queue
   1369 	 */
   1370 	i_mdi_client_enlist_table(vh, ct);
   1371 	return (ct);
   1372 }
   1373 
   1374 /*
   1375  * i_mdi_client_enlist_table():
   1376  *		Attach the client device to the client hash table. Caller
   1377  *		should hold the vhci client lock.
   1378  */
   1379 static void
   1380 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
   1381 {
   1382 	int 			index;
   1383 	struct client_hash	*head;
   1384 
   1385 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1386 
   1387 	index = i_mdi_get_hash_key(ct->ct_guid);
   1388 	head = &vh->vh_client_table[index];
   1389 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
   1390 	head->ct_hash_head = ct;
   1391 	head->ct_hash_count++;
   1392 	vh->vh_client_count++;
   1393 }
   1394 
   1395 /*
   1396  * i_mdi_client_delist_table():
   1397  *		Attach the client device to the client hash table.
   1398  *		Caller should hold the vhci client lock.
   1399  */
   1400 static void
   1401 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
   1402 {
   1403 	int			index;
   1404 	char			*guid;
   1405 	struct client_hash 	*head;
   1406 	mdi_client_t		*next;
   1407 	mdi_client_t		*last;
   1408 
   1409 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1410 
   1411 	guid = ct->ct_guid;
   1412 	index = i_mdi_get_hash_key(guid);
   1413 	head = &vh->vh_client_table[index];
   1414 
   1415 	last = NULL;
   1416 	next = (mdi_client_t *)head->ct_hash_head;
   1417 	while (next != NULL) {
   1418 		if (next == ct) {
   1419 			break;
   1420 		}
   1421 		last = next;
   1422 		next = next->ct_hnext;
   1423 	}
   1424 
   1425 	if (next) {
   1426 		head->ct_hash_count--;
   1427 		if (last == NULL) {
   1428 			head->ct_hash_head = ct->ct_hnext;
   1429 		} else {
   1430 			last->ct_hnext = ct->ct_hnext;
   1431 		}
   1432 		ct->ct_hnext = NULL;
   1433 		vh->vh_client_count--;
   1434 	}
   1435 }
   1436 
   1437 
   1438 /*
   1439  * i_mdi_client_free():
   1440  *		Free a client component
   1441  */
   1442 static int
   1443 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
   1444 {
   1445 	int		rv = MDI_SUCCESS;
   1446 	int		flags = ct->ct_flags;
   1447 	dev_info_t	*cdip;
   1448 	dev_info_t	*vdip;
   1449 
   1450 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1451 
   1452 	vdip = vh->vh_dip;
   1453 	cdip = ct->ct_dip;
   1454 
   1455 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
   1456 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
   1457 	DEVI(cdip)->devi_mdi_client = NULL;
   1458 
   1459 	/*
   1460 	 * Clear out back ref. to dev_info_t node
   1461 	 */
   1462 	ct->ct_dip = NULL;
   1463 
   1464 	/*
   1465 	 * Remove this client from our hash queue
   1466 	 */
   1467 	i_mdi_client_delist_table(vh, ct);
   1468 
   1469 	/*
   1470 	 * Uninitialize and free the component
   1471 	 */
   1472 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
   1473 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
   1474 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
   1475 	cv_destroy(&ct->ct_failover_cv);
   1476 	cv_destroy(&ct->ct_unstable_cv);
   1477 	cv_destroy(&ct->ct_powerchange_cv);
   1478 	mutex_destroy(&ct->ct_mutex);
   1479 	kmem_free(ct, sizeof (*ct));
   1480 
   1481 	if (cdip != NULL) {
   1482 		MDI_VHCI_CLIENT_UNLOCK(vh);
   1483 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
   1484 		MDI_VHCI_CLIENT_LOCK(vh);
   1485 	}
   1486 	return (rv);
   1487 }
   1488 
   1489 /*
   1490  * i_mdi_client_find():
   1491  * 		Find the client structure corresponding to a given guid
   1492  *		Caller should hold the vhci client lock.
   1493  */
   1494 static mdi_client_t *
   1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
   1496 {
   1497 	int			index;
   1498 	struct client_hash	*head;
   1499 	mdi_client_t		*ct;
   1500 
   1501 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1502 
   1503 	index = i_mdi_get_hash_key(guid);
   1504 	head = &vh->vh_client_table[index];
   1505 
   1506 	ct = head->ct_hash_head;
   1507 	while (ct != NULL) {
   1508 		if (strcmp(ct->ct_guid, guid) == 0 &&
   1509 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
   1510 			break;
   1511 		}
   1512 		ct = ct->ct_hnext;
   1513 	}
   1514 	return (ct);
   1515 }
   1516 
   1517 /*
   1518  * i_mdi_client_update_state():
   1519  *		Compute and update client device state
   1520  * Notes:
   1521  *		A client device can be in any of three possible states:
   1522  *
   1523  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
   1524  *		one online/standby paths. Can tolerate failures.
   1525  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
   1526  *		no alternate paths available as standby. A failure on the online
   1527  *		would result in loss of access to device data.
   1528  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
   1529  *		no paths available to access the device.
   1530  */
   1531 static void
   1532 i_mdi_client_update_state(mdi_client_t *ct)
   1533 {
   1534 	int state;
   1535 
   1536 	ASSERT(MDI_CLIENT_LOCKED(ct));
   1537 	state = i_mdi_client_compute_state(ct, NULL);
   1538 	MDI_CLIENT_SET_STATE(ct, state);
   1539 }
   1540 
   1541 /*
   1542  * i_mdi_client_compute_state():
   1543  *		Compute client device state
   1544  *
   1545  *		mdi_phci_t *	Pointer to pHCI structure which should
   1546  *				while computing the new value.  Used by
   1547  *				i_mdi_phci_offline() to find the new
   1548  *				client state after DR of a pHCI.
   1549  */
   1550 static int
   1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
   1552 {
   1553 	int		state;
   1554 	int		online_count = 0;
   1555 	int		standby_count = 0;
   1556 	mdi_pathinfo_t	*pip, *next;
   1557 
   1558 	ASSERT(MDI_CLIENT_LOCKED(ct));
   1559 	pip = ct->ct_path_head;
   1560 	while (pip != NULL) {
   1561 		MDI_PI_LOCK(pip);
   1562 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   1563 		if (MDI_PI(pip)->pi_phci == ph) {
   1564 			MDI_PI_UNLOCK(pip);
   1565 			pip = next;
   1566 			continue;
   1567 		}
   1568 
   1569 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
   1570 				== MDI_PATHINFO_STATE_ONLINE)
   1571 			online_count++;
   1572 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
   1573 				== MDI_PATHINFO_STATE_STANDBY)
   1574 			standby_count++;
   1575 		MDI_PI_UNLOCK(pip);
   1576 		pip = next;
   1577 	}
   1578 
   1579 	if (online_count == 0) {
   1580 		if (standby_count == 0) {
   1581 			state = MDI_CLIENT_STATE_FAILED;
   1582 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
   1583 			    "client state failed: ct = %p", (void *)ct));
   1584 		} else if (standby_count == 1) {
   1585 			state = MDI_CLIENT_STATE_DEGRADED;
   1586 		} else {
   1587 			state = MDI_CLIENT_STATE_OPTIMAL;
   1588 		}
   1589 	} else if (online_count == 1) {
   1590 		if (standby_count == 0) {
   1591 			state = MDI_CLIENT_STATE_DEGRADED;
   1592 		} else {
   1593 			state = MDI_CLIENT_STATE_OPTIMAL;
   1594 		}
   1595 	} else {
   1596 		state = MDI_CLIENT_STATE_OPTIMAL;
   1597 	}
   1598 	return (state);
   1599 }
   1600 
   1601 /*
   1602  * i_mdi_client2devinfo():
   1603  *		Utility function
   1604  */
   1605 dev_info_t *
   1606 i_mdi_client2devinfo(mdi_client_t *ct)
   1607 {
   1608 	return (ct->ct_dip);
   1609 }
   1610 
   1611 /*
   1612  * mdi_client_path2_devinfo():
   1613  * 		Given the parent devinfo and child devfs pathname, search for
   1614  *		a valid devfs node handle.
   1615  */
   1616 dev_info_t *
   1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
   1618 {
   1619 	dev_info_t 	*cdip = NULL;
   1620 	dev_info_t 	*ndip = NULL;
   1621 	char		*temp_pathname;
   1622 	int		circular;
   1623 
   1624 	/*
   1625 	 * Allocate temp buffer
   1626 	 */
   1627 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1628 
   1629 	/*
   1630 	 * Lock parent against changes
   1631 	 */
   1632 	ndi_devi_enter(vdip, &circular);
   1633 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
   1634 	while ((cdip = ndip) != NULL) {
   1635 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
   1636 
   1637 		*temp_pathname = '\0';
   1638 		(void) ddi_pathname(cdip, temp_pathname);
   1639 		if (strcmp(temp_pathname, pathname) == 0) {
   1640 			break;
   1641 		}
   1642 	}
   1643 	/*
   1644 	 * Release devinfo lock
   1645 	 */
   1646 	ndi_devi_exit(vdip, circular);
   1647 
   1648 	/*
   1649 	 * Free the temp buffer
   1650 	 */
   1651 	kmem_free(temp_pathname, MAXPATHLEN);
   1652 	return (cdip);
   1653 }
   1654 
   1655 /*
   1656  * mdi_client_get_path_count():
   1657  * 		Utility function to get number of path information nodes
   1658  *		associated with a given client device.
   1659  */
   1660 int
   1661 mdi_client_get_path_count(dev_info_t *cdip)
   1662 {
   1663 	mdi_client_t	*ct;
   1664 	int		count = 0;
   1665 
   1666 	ct = i_devi_get_client(cdip);
   1667 	if (ct != NULL) {
   1668 		count = ct->ct_path_count;
   1669 	}
   1670 	return (count);
   1671 }
   1672 
   1673 
   1674 /*
   1675  * i_mdi_get_hash_key():
   1676  * 		Create a hash using strings as keys
   1677  *
   1678  */
   1679 static int
   1680 i_mdi_get_hash_key(char *str)
   1681 {
   1682 	uint32_t	g, hash = 0;
   1683 	char		*p;
   1684 
   1685 	for (p = str; *p != '\0'; p++) {
   1686 		g = *p;
   1687 		hash += g;
   1688 	}
   1689 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
   1690 }
   1691 
   1692 /*
   1693  * mdi_get_lb_policy():
   1694  * 		Get current load balancing policy for a given client device
   1695  */
   1696 client_lb_t
   1697 mdi_get_lb_policy(dev_info_t *cdip)
   1698 {
   1699 	client_lb_t	lb = LOAD_BALANCE_NONE;
   1700 	mdi_client_t	*ct;
   1701 
   1702 	ct = i_devi_get_client(cdip);
   1703 	if (ct != NULL) {
   1704 		lb = ct->ct_lb;
   1705 	}
   1706 	return (lb);
   1707 }
   1708 
   1709 /*
   1710  * mdi_set_lb_region_size():
   1711  * 		Set current region size for the load-balance
   1712  */
   1713 int
   1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
   1715 {
   1716 	mdi_client_t	*ct;
   1717 	int		rv = MDI_FAILURE;
   1718 
   1719 	ct = i_devi_get_client(cdip);
   1720 	if (ct != NULL && ct->ct_lb_args != NULL) {
   1721 		ct->ct_lb_args->region_size = region_size;
   1722 		rv = MDI_SUCCESS;
   1723 	}
   1724 	return (rv);
   1725 }
   1726 
   1727 /*
   1728  * mdi_Set_lb_policy():
   1729  * 		Set current load balancing policy for a given client device
   1730  */
   1731 int
   1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
   1733 {
   1734 	mdi_client_t	*ct;
   1735 	int		rv = MDI_FAILURE;
   1736 
   1737 	ct = i_devi_get_client(cdip);
   1738 	if (ct != NULL) {
   1739 		ct->ct_lb = lb;
   1740 		rv = MDI_SUCCESS;
   1741 	}
   1742 	return (rv);
   1743 }
   1744 
   1745 /*
   1746  * mdi_failover():
   1747  *		failover function called by the vHCI drivers to initiate
   1748  *		a failover operation.  This is typically due to non-availability
   1749  *		of online paths to route I/O requests.  Failover can be
   1750  *		triggered through user application also.
   1751  *
   1752  *		The vHCI driver calls mdi_failover() to initiate a failover
   1753  *		operation. mdi_failover() calls back into the vHCI driver's
   1754  *		vo_failover() entry point to perform the actual failover
   1755  *		operation.  The reason for requiring the vHCI driver to
   1756  *		initiate failover by calling mdi_failover(), instead of directly
   1757  *		executing vo_failover() itself, is to ensure that the mdi
   1758  *		framework can keep track of the client state properly.
   1759  *		Additionally, mdi_failover() provides as a convenience the
   1760  *		option of performing the failover operation synchronously or
   1761  *		asynchronously
   1762  *
   1763  *		Upon successful completion of the failover operation, the
   1764  *		paths that were previously ONLINE will be in the STANDBY state,
   1765  *		and the newly activated paths will be in the ONLINE state.
   1766  *
   1767  *		The flags modifier determines whether the activation is done
   1768  *		synchronously: MDI_FAILOVER_SYNC
   1769  * Return Values:
   1770  *		MDI_SUCCESS
   1771  *		MDI_FAILURE
   1772  *		MDI_BUSY
   1773  */
   1774 /*ARGSUSED*/
   1775 int
   1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
   1777 {
   1778 	int			rv;
   1779 	mdi_client_t		*ct;
   1780 
   1781 	ct = i_devi_get_client(cdip);
   1782 	ASSERT(ct != NULL);
   1783 	if (ct == NULL) {
   1784 		/* cdip is not a valid client device. Nothing more to do. */
   1785 		return (MDI_FAILURE);
   1786 	}
   1787 
   1788 	MDI_CLIENT_LOCK(ct);
   1789 
   1790 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
   1791 		/* A path to the client is being freed */
   1792 		MDI_CLIENT_UNLOCK(ct);
   1793 		return (MDI_BUSY);
   1794 	}
   1795 
   1796 
   1797 	if (MDI_CLIENT_IS_FAILED(ct)) {
   1798 		/*
   1799 		 * Client is in failed state. Nothing more to do.
   1800 		 */
   1801 		MDI_CLIENT_UNLOCK(ct);
   1802 		return (MDI_FAILURE);
   1803 	}
   1804 
   1805 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   1806 		/*
   1807 		 * Failover is already in progress; return BUSY
   1808 		 */
   1809 		MDI_CLIENT_UNLOCK(ct);
   1810 		return (MDI_BUSY);
   1811 	}
   1812 	/*
   1813 	 * Make sure that mdi_pathinfo node state changes are processed.
   1814 	 * We do not allow failovers to progress while client path state
   1815 	 * changes are in progress
   1816 	 */
   1817 	if (ct->ct_unstable) {
   1818 		if (flags == MDI_FAILOVER_ASYNC) {
   1819 			MDI_CLIENT_UNLOCK(ct);
   1820 			return (MDI_BUSY);
   1821 		} else {
   1822 			while (ct->ct_unstable)
   1823 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
   1824 		}
   1825 	}
   1826 
   1827 	/*
   1828 	 * Client device is in stable state. Before proceeding, perform sanity
   1829 	 * checks again.
   1830 	 */
   1831 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
   1832 	    (!i_ddi_devi_attached(ct->ct_dip))) {
   1833 		/*
   1834 		 * Client is in failed state. Nothing more to do.
   1835 		 */
   1836 		MDI_CLIENT_UNLOCK(ct);
   1837 		return (MDI_FAILURE);
   1838 	}
   1839 
   1840 	/*
   1841 	 * Set the client state as failover in progress.
   1842 	 */
   1843 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
   1844 	ct->ct_failover_flags = flags;
   1845 	MDI_CLIENT_UNLOCK(ct);
   1846 
   1847 	if (flags == MDI_FAILOVER_ASYNC) {
   1848 		/*
   1849 		 * Submit the initiate failover request via CPR safe
   1850 		 * taskq threads.
   1851 		 */
   1852 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
   1853 		    ct, KM_SLEEP);
   1854 		return (MDI_ACCEPT);
   1855 	} else {
   1856 		/*
   1857 		 * Synchronous failover mode.  Typically invoked from the user
   1858 		 * land.
   1859 		 */
   1860 		rv = i_mdi_failover(ct);
   1861 	}
   1862 	return (rv);
   1863 }
   1864 
   1865 /*
   1866  * i_mdi_failover():
   1867  *		internal failover function. Invokes vHCI drivers failover
   1868  *		callback function and process the failover status
   1869  * Return Values:
   1870  *		None
   1871  *
   1872  * Note: A client device in failover state can not be detached or freed.
   1873  */
   1874 static int
   1875 i_mdi_failover(void *arg)
   1876 {
   1877 	int		rv = MDI_SUCCESS;
   1878 	mdi_client_t	*ct = (mdi_client_t *)arg;
   1879 	mdi_vhci_t	*vh = ct->ct_vhci;
   1880 
   1881 	ASSERT(!MDI_CLIENT_LOCKED(ct));
   1882 
   1883 	if (vh->vh_ops->vo_failover != NULL) {
   1884 		/*
   1885 		 * Call vHCI drivers callback routine
   1886 		 */
   1887 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
   1888 		    ct->ct_failover_flags);
   1889 	}
   1890 
   1891 	MDI_CLIENT_LOCK(ct);
   1892 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
   1893 
   1894 	/*
   1895 	 * Save the failover return status
   1896 	 */
   1897 	ct->ct_failover_status = rv;
   1898 
   1899 	/*
   1900 	 * As a result of failover, client status would have been changed.
   1901 	 * Update the client state and wake up anyone waiting on this client
   1902 	 * device.
   1903 	 */
   1904 	i_mdi_client_update_state(ct);
   1905 
   1906 	cv_broadcast(&ct->ct_failover_cv);
   1907 	MDI_CLIENT_UNLOCK(ct);
   1908 	return (rv);
   1909 }
   1910 
   1911 /*
   1912  * Load balancing is logical block.
   1913  * IOs within the range described by region_size
   1914  * would go on the same path. This would improve the
   1915  * performance by cache-hit on some of the RAID devices.
   1916  * Search only for online paths(At some point we
   1917  * may want to balance across target ports).
   1918  * If no paths are found then default to round-robin.
   1919  */
   1920 static int
   1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
   1922 {
   1923 	int		path_index = -1;
   1924 	int		online_path_count = 0;
   1925 	int		online_nonpref_path_count = 0;
   1926 	int 		region_size = ct->ct_lb_args->region_size;
   1927 	mdi_pathinfo_t	*pip;
   1928 	mdi_pathinfo_t	*next;
   1929 	int		preferred, path_cnt;
   1930 
   1931 	pip = ct->ct_path_head;
   1932 	while (pip) {
   1933 		MDI_PI_LOCK(pip);
   1934 		if (MDI_PI(pip)->pi_state ==
   1935 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
   1936 			online_path_count++;
   1937 		} else if (MDI_PI(pip)->pi_state ==
   1938 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
   1939 			online_nonpref_path_count++;
   1940 		}
   1941 		next = (mdi_pathinfo_t *)
   1942 		    MDI_PI(pip)->pi_client_link;
   1943 		MDI_PI_UNLOCK(pip);
   1944 		pip = next;
   1945 	}
   1946 	/* if found any online/preferred then use this type */
   1947 	if (online_path_count > 0) {
   1948 		path_cnt = online_path_count;
   1949 		preferred = 1;
   1950 	} else if (online_nonpref_path_count > 0) {
   1951 		path_cnt = online_nonpref_path_count;
   1952 		preferred = 0;
   1953 	} else {
   1954 		path_cnt = 0;
   1955 	}
   1956 	if (path_cnt) {
   1957 		path_index = (bp->b_blkno >> region_size) % path_cnt;
   1958 		pip = ct->ct_path_head;
   1959 		while (pip && path_index != -1) {
   1960 			MDI_PI_LOCK(pip);
   1961 			if (path_index == 0 &&
   1962 			    (MDI_PI(pip)->pi_state ==
   1963 			    MDI_PATHINFO_STATE_ONLINE) &&
   1964 				MDI_PI(pip)->pi_preferred == preferred) {
   1965 				MDI_PI_HOLD(pip);
   1966 				MDI_PI_UNLOCK(pip);
   1967 				*ret_pip = pip;
   1968 				return (MDI_SUCCESS);
   1969 			}
   1970 			path_index --;
   1971 			next = (mdi_pathinfo_t *)
   1972 			    MDI_PI(pip)->pi_client_link;
   1973 			MDI_PI_UNLOCK(pip);
   1974 			pip = next;
   1975 		}
   1976 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   1977 		    "lba %llx: path %s %p",
   1978 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
   1979 	}
   1980 	return (MDI_FAILURE);
   1981 }
   1982 
   1983 /*
   1984  * mdi_select_path():
   1985  *		select a path to access a client device.
   1986  *
   1987  *		mdi_select_path() function is called by the vHCI drivers to
   1988  *		select a path to route the I/O request to.  The caller passes
   1989  *		the block I/O data transfer structure ("buf") as one of the
   1990  *		parameters.  The mpxio framework uses the buf structure
   1991  *		contents to maintain per path statistics (total I/O size /
   1992  *		count pending).  If more than one online paths are available to
   1993  *		select, the framework automatically selects a suitable path
   1994  *		for routing I/O request. If a failover operation is active for
   1995  *		this client device the call shall be failed with MDI_BUSY error
   1996  *		code.
   1997  *
   1998  *		By default this function returns a suitable path in online
   1999  *		state based on the current load balancing policy.  Currently
   2000  *		we support LOAD_BALANCE_NONE (Previously selected online path
   2001  *		will continue to be used till the path is usable) and
   2002  *		LOAD_BALANCE_RR (Online paths will be selected in a round
   2003  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
   2004  *		based on the logical block).  The load balancing
   2005  *		through vHCI drivers configuration file (driver.conf).
   2006  *
   2007  *		vHCI drivers may override this default behavior by specifying
   2008  *		appropriate flags.  The meaning of the thrid argument depends
   2009  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
   2010  *		then the argument is the "path instance" of the path to select.
   2011  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
   2012  *		"start_pip". A non NULL "start_pip" is the starting point to
   2013  *		walk and find the next appropriate path.  The following values
   2014  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
   2015  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
   2016  *		STANDBY path).
   2017  *
   2018  *		The non-standard behavior is used by the scsi_vhci driver,
   2019  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
   2020  *		attach of client devices (to avoid an unnecessary failover
   2021  *		when the STANDBY path comes up first), during failover
   2022  *		(to activate a STANDBY path as ONLINE).
   2023  *
   2024  *		The selected path is returned in a a mdi_hold_path() state
   2025  *		(pi_ref_cnt). Caller should release the hold by calling
   2026  *		mdi_rele_path().
   2027  *
   2028  * Return Values:
   2029  *		MDI_SUCCESS	- Completed successfully
   2030  *		MDI_BUSY 	- Client device is busy failing over
   2031  *		MDI_NOPATH	- Client device is online, but no valid path are
   2032  *				  available to access this client device
   2033  *		MDI_FAILURE	- Invalid client device or state
   2034  *		MDI_DEVI_ONLINING
   2035  *				- Client device (struct dev_info state) is in
   2036  *				  onlining state.
   2037  */
   2038 
   2039 /*ARGSUSED*/
   2040 int
   2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
   2042     void *arg, mdi_pathinfo_t **ret_pip)
   2043 {
   2044 	mdi_client_t	*ct;
   2045 	mdi_pathinfo_t	*pip;
   2046 	mdi_pathinfo_t	*next;
   2047 	mdi_pathinfo_t	*head;
   2048 	mdi_pathinfo_t	*start;
   2049 	client_lb_t	lbp;	/* load balancing policy */
   2050 	int		sb = 1;	/* standard behavior */
   2051 	int		preferred = 1;	/* preferred path */
   2052 	int		cond, cont = 1;
   2053 	int		retry = 0;
   2054 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
   2055 	int		path_instance;	/* request specific path instance */
   2056 
   2057 	/* determine type of arg based on flags */
   2058 	if (flags & MDI_SELECT_PATH_INSTANCE) {
   2059 		path_instance = (int)(intptr_t)arg;
   2060 		start_pip = NULL;
   2061 	} else {
   2062 		path_instance = 0;
   2063 		start_pip = (mdi_pathinfo_t *)arg;
   2064 	}
   2065 
   2066 	if (flags != 0) {
   2067 		/*
   2068 		 * disable default behavior
   2069 		 */
   2070 		sb = 0;
   2071 	}
   2072 
   2073 	*ret_pip = NULL;
   2074 	ct = i_devi_get_client(cdip);
   2075 	if (ct == NULL) {
   2076 		/* mdi extensions are NULL, Nothing more to do */
   2077 		return (MDI_FAILURE);
   2078 	}
   2079 
   2080 	MDI_CLIENT_LOCK(ct);
   2081 
   2082 	if (sb) {
   2083 		if (MDI_CLIENT_IS_FAILED(ct)) {
   2084 			/*
   2085 			 * Client is not ready to accept any I/O requests.
   2086 			 * Fail this request.
   2087 			 */
   2088 			MDI_DEBUG(2, (MDI_NOTE, cdip,
   2089 			    "client state offline ct = %p", (void *)ct));
   2090 			MDI_CLIENT_UNLOCK(ct);
   2091 			return (MDI_FAILURE);
   2092 		}
   2093 
   2094 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   2095 			/*
   2096 			 * Check for Failover is in progress. If so tell the
   2097 			 * caller that this device is busy.
   2098 			 */
   2099 			MDI_DEBUG(2, (MDI_NOTE, cdip,
   2100 			    "client failover in progress ct = %p",
   2101 			    (void *)ct));
   2102 			MDI_CLIENT_UNLOCK(ct);
   2103 			return (MDI_BUSY);
   2104 		}
   2105 
   2106 		/*
   2107 		 * Check to see whether the client device is attached.
   2108 		 * If not so, let the vHCI driver manually select a path
   2109 		 * (standby) and let the probe/attach process to continue.
   2110 		 */
   2111 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
   2112 			MDI_DEBUG(4, (MDI_NOTE, cdip,
   2113 			    "devi is onlining ct = %p", (void *)ct));
   2114 			MDI_CLIENT_UNLOCK(ct);
   2115 			return (MDI_DEVI_ONLINING);
   2116 		}
   2117 	}
   2118 
   2119 	/*
   2120 	 * Cache in the client list head.  If head of the list is NULL
   2121 	 * return MDI_NOPATH
   2122 	 */
   2123 	head = ct->ct_path_head;
   2124 	if (head == NULL) {
   2125 		MDI_CLIENT_UNLOCK(ct);
   2126 		return (MDI_NOPATH);
   2127 	}
   2128 
   2129 	/* Caller is specifying a specific pathinfo path by path_instance */
   2130 	if (path_instance) {
   2131 		/* search for pathinfo with correct path_instance */
   2132 		for (pip = head;
   2133 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
   2134 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
   2135 			;
   2136 
   2137 		/* If path can't be selected then MDI_NOPATH is returned. */
   2138 		if (pip == NULL) {
   2139 			MDI_CLIENT_UNLOCK(ct);
   2140 			return (MDI_NOPATH);
   2141 		}
   2142 
   2143 		/*
   2144 		 * Verify state of path. When asked to select a specific
   2145 		 * path_instance, we select the requested path in any
   2146 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
   2147 		 * We don't however select paths where the pHCI has detached.
   2148 		 * NOTE: last pathinfo node of an opened client device may
   2149 		 * exist in an OFFLINE state after the pHCI associated with
   2150 		 * that path has detached (but pi_phci will be NULL if that
   2151 		 * has occurred).
   2152 		 */
   2153 		MDI_PI_LOCK(pip);
   2154 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
   2155 		    (MDI_PI(pip)->pi_phci == NULL)) {
   2156 			MDI_PI_UNLOCK(pip);
   2157 			MDI_CLIENT_UNLOCK(ct);
   2158 			return (MDI_FAILURE);
   2159 		}
   2160 
   2161 		/* Return MDI_BUSY if we have a transient condition */
   2162 		if (MDI_PI_IS_TRANSIENT(pip)) {
   2163 			MDI_PI_UNLOCK(pip);
   2164 			MDI_CLIENT_UNLOCK(ct);
   2165 			return (MDI_BUSY);
   2166 		}
   2167 
   2168 		/*
   2169 		 * Return the path in hold state. Caller should release the
   2170 		 * lock by calling mdi_rele_path()
   2171 		 */
   2172 		MDI_PI_HOLD(pip);
   2173 		MDI_PI_UNLOCK(pip);
   2174 		*ret_pip = pip;
   2175 		MDI_CLIENT_UNLOCK(ct);
   2176 		return (MDI_SUCCESS);
   2177 	}
   2178 
   2179 	/*
   2180 	 * for non default behavior, bypass current
   2181 	 * load balancing policy and always use LOAD_BALANCE_RR
   2182 	 * except that the start point will be adjusted based
   2183 	 * on the provided start_pip
   2184 	 */
   2185 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
   2186 
   2187 	switch (lbp) {
   2188 	case LOAD_BALANCE_NONE:
   2189 		/*
   2190 		 * Load balancing is None  or Alternate path mode
   2191 		 * Start looking for a online mdi_pathinfo node starting from
   2192 		 * last known selected path
   2193 		 */
   2194 		preferred = 1;
   2195 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
   2196 		if (pip == NULL) {
   2197 			pip = head;
   2198 		}
   2199 		start = pip;
   2200 		do {
   2201 			MDI_PI_LOCK(pip);
   2202 			/*
   2203 			 * No need to explicitly check if the path is disabled.
   2204 			 * Since we are checking for state == ONLINE and the
   2205 			 * same variable is used for DISABLE/ENABLE information.
   2206 			 */
   2207 			if ((MDI_PI(pip)->pi_state  ==
   2208 				MDI_PATHINFO_STATE_ONLINE) &&
   2209 				preferred == MDI_PI(pip)->pi_preferred) {
   2210 				/*
   2211 				 * Return the path in hold state. Caller should
   2212 				 * release the lock by calling mdi_rele_path()
   2213 				 */
   2214 				MDI_PI_HOLD(pip);
   2215 				MDI_PI_UNLOCK(pip);
   2216 				ct->ct_path_last = pip;
   2217 				*ret_pip = pip;
   2218 				MDI_CLIENT_UNLOCK(ct);
   2219 				return (MDI_SUCCESS);
   2220 			}
   2221 
   2222 			/*
   2223 			 * Path is busy.
   2224 			 */
   2225 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
   2226 			    MDI_PI_IS_TRANSIENT(pip))
   2227 				retry = 1;
   2228 			/*
   2229 			 * Keep looking for a next available online path
   2230 			 */
   2231 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2232 			if (next == NULL) {
   2233 				next = head;
   2234 			}
   2235 			MDI_PI_UNLOCK(pip);
   2236 			pip = next;
   2237 			if (start == pip && preferred) {
   2238 				preferred = 0;
   2239 			} else if (start == pip && !preferred) {
   2240 				cont = 0;
   2241 			}
   2242 		} while (cont);
   2243 		break;
   2244 
   2245 	case LOAD_BALANCE_LBA:
   2246 		/*
   2247 		 * Make sure we are looking
   2248 		 * for an online path. Otherwise, if it is for a STANDBY
   2249 		 * path request, it will go through and fetch an ONLINE
   2250 		 * path which is not desirable.
   2251 		 */
   2252 		if ((ct->ct_lb_args != NULL) &&
   2253 			    (ct->ct_lb_args->region_size) && bp &&
   2254 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
   2255 			if (i_mdi_lba_lb(ct, ret_pip, bp)
   2256 				    == MDI_SUCCESS) {
   2257 				MDI_CLIENT_UNLOCK(ct);
   2258 				return (MDI_SUCCESS);
   2259 			}
   2260 		}
   2261 		/* FALLTHROUGH */
   2262 	case LOAD_BALANCE_RR:
   2263 		/*
   2264 		 * Load balancing is Round Robin. Start looking for a online
   2265 		 * mdi_pathinfo node starting from last known selected path
   2266 		 * as the start point.  If override flags are specified,
   2267 		 * process accordingly.
   2268 		 * If the search is already in effect(start_pip not null),
   2269 		 * then lets just use the same path preference to continue the
   2270 		 * traversal.
   2271 		 */
   2272 
   2273 		if (start_pip != NULL) {
   2274 			preferred = MDI_PI(start_pip)->pi_preferred;
   2275 		} else {
   2276 			preferred = 1;
   2277 		}
   2278 
   2279 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
   2280 		if (start == NULL) {
   2281 			pip = head;
   2282 		} else {
   2283 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
   2284 			if (pip == NULL) {
   2285 				if ( flags & MDI_SELECT_NO_PREFERRED) {
   2286 					/*
   2287 					 * Return since we hit the end of list
   2288 					 */
   2289 					MDI_CLIENT_UNLOCK(ct);
   2290 					return (MDI_NOPATH);
   2291 				}
   2292 
   2293 				if (!sb) {
   2294 					if (preferred == 0) {
   2295 						/*
   2296 						 * Looks like we have completed
   2297 						 * the traversal as preferred
   2298 						 * value is 0. Time to bail out.
   2299 						 */
   2300 						*ret_pip = NULL;
   2301 						MDI_CLIENT_UNLOCK(ct);
   2302 						return (MDI_NOPATH);
   2303 					} else {
   2304 						/*
   2305 						 * Looks like we reached the
   2306 						 * end of the list. Lets enable
   2307 						 * traversal of non preferred
   2308 						 * paths.
   2309 						 */
   2310 						preferred = 0;
   2311 					}
   2312 				}
   2313 				pip = head;
   2314 			}
   2315 		}
   2316 		start = pip;
   2317 		do {
   2318 			MDI_PI_LOCK(pip);
   2319 			if (sb) {
   2320 				cond = ((MDI_PI(pip)->pi_state ==
   2321 				    MDI_PATHINFO_STATE_ONLINE &&
   2322 					MDI_PI(pip)->pi_preferred ==
   2323 						preferred) ? 1 : 0);
   2324 			} else {
   2325 				if (flags == MDI_SELECT_ONLINE_PATH) {
   2326 					cond = ((MDI_PI(pip)->pi_state ==
   2327 					    MDI_PATHINFO_STATE_ONLINE &&
   2328 						MDI_PI(pip)->pi_preferred ==
   2329 						preferred) ? 1 : 0);
   2330 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
   2331 					cond = ((MDI_PI(pip)->pi_state ==
   2332 					    MDI_PATHINFO_STATE_STANDBY &&
   2333 						MDI_PI(pip)->pi_preferred ==
   2334 						preferred) ? 1 : 0);
   2335 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
   2336 				    MDI_SELECT_STANDBY_PATH)) {
   2337 					cond = (((MDI_PI(pip)->pi_state ==
   2338 					    MDI_PATHINFO_STATE_ONLINE ||
   2339 					    (MDI_PI(pip)->pi_state ==
   2340 					    MDI_PATHINFO_STATE_STANDBY)) &&
   2341 						MDI_PI(pip)->pi_preferred ==
   2342 						preferred) ? 1 : 0);
   2343 				} else if (flags ==
   2344 					(MDI_SELECT_STANDBY_PATH |
   2345 					MDI_SELECT_ONLINE_PATH |
   2346 					MDI_SELECT_USER_DISABLE_PATH)) {
   2347 					cond = (((MDI_PI(pip)->pi_state ==
   2348 					    MDI_PATHINFO_STATE_ONLINE ||
   2349 					    (MDI_PI(pip)->pi_state ==
   2350 					    MDI_PATHINFO_STATE_STANDBY) ||
   2351 						(MDI_PI(pip)->pi_state ==
   2352 					    (MDI_PATHINFO_STATE_ONLINE|
   2353 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
   2354 						(MDI_PI(pip)->pi_state ==
   2355 					    (MDI_PATHINFO_STATE_STANDBY |
   2356 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
   2357 						MDI_PI(pip)->pi_preferred ==
   2358 						preferred) ? 1 : 0);
   2359 				} else if (flags ==
   2360 				    (MDI_SELECT_STANDBY_PATH |
   2361 				    MDI_SELECT_ONLINE_PATH |
   2362 				    MDI_SELECT_NO_PREFERRED)) {
   2363 					cond = (((MDI_PI(pip)->pi_state ==
   2364 					    MDI_PATHINFO_STATE_ONLINE) ||
   2365 					    (MDI_PI(pip)->pi_state ==
   2366 					    MDI_PATHINFO_STATE_STANDBY))
   2367 					    ? 1 : 0);
   2368 				} else {
   2369 					cond = 0;
   2370 				}
   2371 			}
   2372 			/*
   2373 			 * No need to explicitly check if the path is disabled.
   2374 			 * Since we are checking for state == ONLINE and the
   2375 			 * same variable is used for DISABLE/ENABLE information.
   2376 			 */
   2377 			if (cond) {
   2378 				/*
   2379 				 * Return the path in hold state. Caller should
   2380 				 * release the lock by calling mdi_rele_path()
   2381 				 */
   2382 				MDI_PI_HOLD(pip);
   2383 				MDI_PI_UNLOCK(pip);
   2384 				if (sb)
   2385 					ct->ct_path_last = pip;
   2386 				*ret_pip = pip;
   2387 				MDI_CLIENT_UNLOCK(ct);
   2388 				return (MDI_SUCCESS);
   2389 			}
   2390 			/*
   2391 			 * Path is busy.
   2392 			 */
   2393 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
   2394 			    MDI_PI_IS_TRANSIENT(pip))
   2395 				retry = 1;
   2396 
   2397 			/*
   2398 			 * Keep looking for a next available online path
   2399 			 */
   2400 do_again:
   2401 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2402 			if (next == NULL) {
   2403 				if ( flags & MDI_SELECT_NO_PREFERRED) {
   2404 					/*
   2405 					 * Bail out since we hit the end of list
   2406 					 */
   2407 					MDI_PI_UNLOCK(pip);
   2408 					break;
   2409 				}
   2410 
   2411 				if (!sb) {
   2412 					if (preferred == 1) {
   2413 						/*
   2414 						 * Looks like we reached the
   2415 						 * end of the list. Lets enable
   2416 						 * traversal of non preferred
   2417 						 * paths.
   2418 						 */
   2419 						preferred = 0;
   2420 						next = head;
   2421 					} else {
   2422 						/*
   2423 						 * We have done both the passes
   2424 						 * Preferred as well as for
   2425 						 * Non-preferred. Bail out now.
   2426 						 */
   2427 						cont = 0;
   2428 					}
   2429 				} else {
   2430 					/*
   2431 					 * Standard behavior case.
   2432 					 */
   2433 					next = head;
   2434 				}
   2435 			}
   2436 			MDI_PI_UNLOCK(pip);
   2437 			if (cont == 0) {
   2438 				break;
   2439 			}
   2440 			pip = next;
   2441 
   2442 			if (!sb) {
   2443 				/*
   2444 				 * We need to handle the selection of
   2445 				 * non-preferred path in the following
   2446 				 * case:
   2447 				 *
   2448 				 * +------+   +------+   +------+   +-----+
   2449 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
   2450 				 * +------+   +------+   +------+   +-----+
   2451 				 *
   2452 				 * If we start the search with B, we need to
   2453 				 * skip beyond B to pick C which is non -
   2454 				 * preferred in the second pass. The following
   2455 				 * test, if true, will allow us to skip over
   2456 				 * the 'start'(B in the example) to select
   2457 				 * other non preferred elements.
   2458 				 */
   2459 				if ((start_pip != NULL) && (start_pip == pip) &&
   2460 				    (MDI_PI(start_pip)->pi_preferred
   2461 				    != preferred)) {
   2462 					/*
   2463 					 * try again after going past the start
   2464 					 * pip
   2465 					 */
   2466 					MDI_PI_LOCK(pip);
   2467 					goto do_again;
   2468 				}
   2469 			} else {
   2470 				/*
   2471 				 * Standard behavior case
   2472 				 */
   2473 				if (start == pip && preferred) {
   2474 					/* look for nonpreferred paths */
   2475 					preferred = 0;
   2476 				} else if (start == pip && !preferred) {
   2477 					/*
   2478 					 * Exit condition
   2479 					 */
   2480 					cont = 0;
   2481 				}
   2482 			}
   2483 		} while (cont);
   2484 		break;
   2485 	}
   2486 
   2487 	MDI_CLIENT_UNLOCK(ct);
   2488 	if (retry == 1) {
   2489 		return (MDI_BUSY);
   2490 	} else {
   2491 		return (MDI_NOPATH);
   2492 	}
   2493 }
   2494 
   2495 /*
   2496  * For a client, return the next available path to any phci
   2497  *
   2498  * Note:
   2499  *		Caller should hold the branch's devinfo node to get a consistent
   2500  *		snap shot of the mdi_pathinfo nodes.
   2501  *
   2502  *		Please note that even the list is stable the mdi_pathinfo
   2503  *		node state and properties are volatile.  The caller should lock
   2504  *		and unlock the nodes by calling mdi_pi_lock() and
   2505  *		mdi_pi_unlock() functions to get a stable properties.
   2506  *
   2507  *		If there is a need to use the nodes beyond the hold of the
   2508  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
   2509  *		need to be held against unexpected removal by calling
   2510  *		mdi_hold_path() and should be released by calling
   2511  *		mdi_rele_path() on completion.
   2512  */
   2513 mdi_pathinfo_t *
   2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
   2515 {
   2516 	mdi_client_t *ct;
   2517 
   2518 	if (!MDI_CLIENT(ct_dip))
   2519 		return (NULL);
   2520 
   2521 	/*
   2522 	 * Walk through client link
   2523 	 */
   2524 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
   2525 	ASSERT(ct != NULL);
   2526 
   2527 	if (pip == NULL)
   2528 		return ((mdi_pathinfo_t *)ct->ct_path_head);
   2529 
   2530 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
   2531 }
   2532 
   2533 /*
   2534  * For a phci, return the next available path to any client
   2535  * Note: ditto mdi_get_next_phci_path()
   2536  */
   2537 mdi_pathinfo_t *
   2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
   2539 {
   2540 	mdi_phci_t *ph;
   2541 
   2542 	if (!MDI_PHCI(ph_dip))
   2543 		return (NULL);
   2544 
   2545 	/*
   2546 	 * Walk through pHCI link
   2547 	 */
   2548 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
   2549 	ASSERT(ph != NULL);
   2550 
   2551 	if (pip == NULL)
   2552 		return ((mdi_pathinfo_t *)ph->ph_path_head);
   2553 
   2554 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
   2555 }
   2556 
   2557 /*
   2558  * mdi_hold_path():
   2559  *		Hold the mdi_pathinfo node against unwanted unexpected free.
   2560  * Return Values:
   2561  *		None
   2562  */
   2563 void
   2564 mdi_hold_path(mdi_pathinfo_t *pip)
   2565 {
   2566 	if (pip) {
   2567 		MDI_PI_LOCK(pip);
   2568 		MDI_PI_HOLD(pip);
   2569 		MDI_PI_UNLOCK(pip);
   2570 	}
   2571 }
   2572 
   2573 
   2574 /*
   2575  * mdi_rele_path():
   2576  *		Release the mdi_pathinfo node which was selected
   2577  *		through mdi_select_path() mechanism or manually held by
   2578  *		calling mdi_hold_path().
   2579  * Return Values:
   2580  *		None
   2581  */
   2582 void
   2583 mdi_rele_path(mdi_pathinfo_t *pip)
   2584 {
   2585 	if (pip) {
   2586 		MDI_PI_LOCK(pip);
   2587 		MDI_PI_RELE(pip);
   2588 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
   2589 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
   2590 		}
   2591 		MDI_PI_UNLOCK(pip);
   2592 	}
   2593 }
   2594 
   2595 /*
   2596  * mdi_pi_lock():
   2597  * 		Lock the mdi_pathinfo node.
   2598  * Note:
   2599  *		The caller should release the lock by calling mdi_pi_unlock()
   2600  */
   2601 void
   2602 mdi_pi_lock(mdi_pathinfo_t *pip)
   2603 {
   2604 	ASSERT(pip != NULL);
   2605 	if (pip) {
   2606 		MDI_PI_LOCK(pip);
   2607 	}
   2608 }
   2609 
   2610 
   2611 /*
   2612  * mdi_pi_unlock():
   2613  * 		Unlock the mdi_pathinfo node.
   2614  * Note:
   2615  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
   2616  */
   2617 void
   2618 mdi_pi_unlock(mdi_pathinfo_t *pip)
   2619 {
   2620 	ASSERT(pip != NULL);
   2621 	if (pip) {
   2622 		MDI_PI_UNLOCK(pip);
   2623 	}
   2624 }
   2625 
   2626 /*
   2627  * mdi_pi_find():
   2628  *		Search the list of mdi_pathinfo nodes attached to the
   2629  *		pHCI/Client device node whose path address matches "paddr".
   2630  *		Returns a pointer to the mdi_pathinfo node if a matching node is
   2631  *		found.
   2632  * Return Values:
   2633  *		mdi_pathinfo node handle
   2634  *		NULL
   2635  * Notes:
   2636  *		Caller need not hold any locks to call this function.
   2637  */
   2638 mdi_pathinfo_t *
   2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
   2640 {
   2641 	mdi_phci_t		*ph;
   2642 	mdi_vhci_t		*vh;
   2643 	mdi_client_t		*ct;
   2644 	mdi_pathinfo_t		*pip = NULL;
   2645 
   2646 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2647 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
   2648 	if ((pdip == NULL) || (paddr == NULL)) {
   2649 		return (NULL);
   2650 	}
   2651 	ph = i_devi_get_phci(pdip);
   2652 	if (ph == NULL) {
   2653 		/*
   2654 		 * Invalid pHCI device, Nothing more to do.
   2655 		 */
   2656 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
   2657 		return (NULL);
   2658 	}
   2659 
   2660 	vh = ph->ph_vhci;
   2661 	if (vh == NULL) {
   2662 		/*
   2663 		 * Invalid vHCI device, Nothing more to do.
   2664 		 */
   2665 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
   2666 		return (NULL);
   2667 	}
   2668 
   2669 	/*
   2670 	 * Look for pathinfo node identified by paddr.
   2671 	 */
   2672 	if (caddr == NULL) {
   2673 		/*
   2674 		 * Find a mdi_pathinfo node under pHCI list for a matching
   2675 		 * unit address.
   2676 		 */
   2677 		MDI_PHCI_LOCK(ph);
   2678 		if (MDI_PHCI_IS_OFFLINE(ph)) {
   2679 			MDI_DEBUG(2, (MDI_WARN, pdip,
   2680 			    "offline phci %p", (void *)ph));
   2681 			MDI_PHCI_UNLOCK(ph);
   2682 			return (NULL);
   2683 		}
   2684 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
   2685 
   2686 		while (pip != NULL) {
   2687 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2688 				break;
   2689 			}
   2690 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   2691 		}
   2692 		MDI_PHCI_UNLOCK(ph);
   2693 		MDI_DEBUG(2, (MDI_NOTE, pdip,
   2694 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
   2695 		return (pip);
   2696 	}
   2697 
   2698 	/*
   2699 	 * XXX - Is the rest of the code in this function really necessary?
   2700 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
   2701 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
   2702 	 * whether the search is based on the pathinfo nodes attached to
   2703 	 * the pHCI or the client node, the result will be the same.
   2704 	 */
   2705 
   2706 	/*
   2707 	 * Find the client device corresponding to 'caddr'
   2708 	 */
   2709 	MDI_VHCI_CLIENT_LOCK(vh);
   2710 
   2711 	/*
   2712 	 * XXX - Passing NULL to the following function works as long as the
   2713 	 * the client addresses (caddr) are unique per vhci basis.
   2714 	 */
   2715 	ct = i_mdi_client_find(vh, NULL, caddr);
   2716 	if (ct == NULL) {
   2717 		/*
   2718 		 * Client not found, Obviously mdi_pathinfo node has not been
   2719 		 * created yet.
   2720 		 */
   2721 		MDI_VHCI_CLIENT_UNLOCK(vh);
   2722 		MDI_DEBUG(2, (MDI_NOTE, pdip,
   2723 		    "client not found for caddr @%s", caddr ? caddr : ""));
   2724 		return (NULL);
   2725 	}
   2726 
   2727 	/*
   2728 	 * Hold the client lock and look for a mdi_pathinfo node with matching
   2729 	 * pHCI and paddr
   2730 	 */
   2731 	MDI_CLIENT_LOCK(ct);
   2732 
   2733 	/*
   2734 	 * Release the global mutex as it is no more needed. Note: We always
   2735 	 * respect the locking order while acquiring.
   2736 	 */
   2737 	MDI_VHCI_CLIENT_UNLOCK(vh);
   2738 
   2739 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   2740 	while (pip != NULL) {
   2741 		/*
   2742 		 * Compare the unit address
   2743 		 */
   2744 		if ((MDI_PI(pip)->pi_phci == ph) &&
   2745 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2746 			break;
   2747 		}
   2748 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2749 	}
   2750 	MDI_CLIENT_UNLOCK(ct);
   2751 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2752 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
   2753 	return (pip);
   2754 }
   2755 
   2756 /*
   2757  * mdi_pi_alloc():
   2758  *		Allocate and initialize a new instance of a mdi_pathinfo node.
   2759  *		The mdi_pathinfo node returned by this function identifies a
   2760  *		unique device path is capable of having properties attached
   2761  *		and passed to mdi_pi_online() to fully attach and online the
   2762  *		path and client device node.
   2763  *		The mdi_pathinfo node returned by this function must be
   2764  *		destroyed using mdi_pi_free() if the path is no longer
   2765  *		operational or if the caller fails to attach a client device
   2766  *		node when calling mdi_pi_online(). The framework will not free
   2767  *		the resources allocated.
   2768  *		This function can be called from both interrupt and kernel
   2769  *		contexts.  DDI_NOSLEEP flag should be used while calling
   2770  *		from interrupt contexts.
   2771  * Return Values:
   2772  *		MDI_SUCCESS
   2773  *		MDI_FAILURE
   2774  *		MDI_NOMEM
   2775  */
   2776 /*ARGSUSED*/
   2777 int
   2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
   2779     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
   2780 {
   2781 	mdi_vhci_t	*vh;
   2782 	mdi_phci_t	*ph;
   2783 	mdi_client_t	*ct;
   2784 	mdi_pathinfo_t	*pip = NULL;
   2785 	dev_info_t	*cdip;
   2786 	int		rv = MDI_NOMEM;
   2787 	int		path_allocated = 0;
   2788 
   2789 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2790 	    "cname %s: caddr@%s paddr@%s",
   2791 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
   2792 
   2793 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
   2794 	    ret_pip == NULL) {
   2795 		/* Nothing more to do */
   2796 		return (MDI_FAILURE);
   2797 	}
   2798 
   2799 	*ret_pip = NULL;
   2800 
   2801 	/* No allocations on detaching pHCI */
   2802 	if (DEVI_IS_DETACHING(pdip)) {
   2803 		/* Invalid pHCI device, return failure */
   2804 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2805 		    "!detaching pHCI=%p", (void *)pdip));
   2806 		return (MDI_FAILURE);
   2807 	}
   2808 
   2809 	ph = i_devi_get_phci(pdip);
   2810 	ASSERT(ph != NULL);
   2811 	if (ph == NULL) {
   2812 		/* Invalid pHCI device, return failure */
   2813 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2814 		    "!invalid pHCI=%p", (void *)pdip));
   2815 		return (MDI_FAILURE);
   2816 	}
   2817 
   2818 	MDI_PHCI_LOCK(ph);
   2819 	vh = ph->ph_vhci;
   2820 	if (vh == NULL) {
   2821 		/* Invalid vHCI device, return failure */
   2822 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2823 		    "!invalid vHCI=%p", (void *)pdip));
   2824 		MDI_PHCI_UNLOCK(ph);
   2825 		return (MDI_FAILURE);
   2826 	}
   2827 
   2828 	if (MDI_PHCI_IS_READY(ph) == 0) {
   2829 		/*
   2830 		 * Do not allow new node creation when pHCI is in
   2831 		 * offline/suspended states
   2832 		 */
   2833 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2834 		    "pHCI=%p is not ready", (void *)ph));
   2835 		MDI_PHCI_UNLOCK(ph);
   2836 		return (MDI_BUSY);
   2837 	}
   2838 	MDI_PHCI_UNSTABLE(ph);
   2839 	MDI_PHCI_UNLOCK(ph);
   2840 
   2841 	/* look for a matching client, create one if not found */
   2842 	MDI_VHCI_CLIENT_LOCK(vh);
   2843 	ct = i_mdi_client_find(vh, cname, caddr);
   2844 	if (ct == NULL) {
   2845 		ct = i_mdi_client_alloc(vh, cname, caddr);
   2846 		ASSERT(ct != NULL);
   2847 	}
   2848 
   2849 	if (ct->ct_dip == NULL) {
   2850 		/*
   2851 		 * Allocate a devinfo node
   2852 		 */
   2853 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
   2854 		    compatible, ncompatible);
   2855 		if (ct->ct_dip == NULL) {
   2856 			(void) i_mdi_client_free(vh, ct);
   2857 			goto fail;
   2858 		}
   2859 	}
   2860 	cdip = ct->ct_dip;
   2861 
   2862 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
   2863 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
   2864 
   2865 	MDI_CLIENT_LOCK(ct);
   2866 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   2867 	while (pip != NULL) {
   2868 		/*
   2869 		 * Compare the unit address
   2870 		 */
   2871 		if ((MDI_PI(pip)->pi_phci == ph) &&
   2872 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2873 			break;
   2874 		}
   2875 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2876 	}
   2877 	MDI_CLIENT_UNLOCK(ct);
   2878 
   2879 	if (pip == NULL) {
   2880 		/*
   2881 		 * This is a new path for this client device.  Allocate and
   2882 		 * initialize a new pathinfo node
   2883 		 */
   2884 		pip = i_mdi_pi_alloc(ph, paddr, ct);
   2885 		ASSERT(pip != NULL);
   2886 		path_allocated = 1;
   2887 	}
   2888 	rv = MDI_SUCCESS;
   2889 
   2890 fail:
   2891 	/*
   2892 	 * Release the global mutex.
   2893 	 */
   2894 	MDI_VHCI_CLIENT_UNLOCK(vh);
   2895 
   2896 	/*
   2897 	 * Mark the pHCI as stable
   2898 	 */
   2899 	MDI_PHCI_LOCK(ph);
   2900 	MDI_PHCI_STABLE(ph);
   2901 	MDI_PHCI_UNLOCK(ph);
   2902 	*ret_pip = pip;
   2903 
   2904 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2905 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
   2906 
   2907 	if (path_allocated)
   2908 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
   2909 
   2910 	return (rv);
   2911 }
   2912 
   2913 /*ARGSUSED*/
   2914 int
   2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
   2916     int flags, mdi_pathinfo_t **ret_pip)
   2917 {
   2918 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
   2919 	    flags, ret_pip));
   2920 }
   2921 
   2922 /*
   2923  * i_mdi_pi_alloc():
   2924  *		Allocate a mdi_pathinfo node and add to the pHCI path list
   2925  * Return Values:
   2926  *		mdi_pathinfo
   2927  */
   2928 /*ARGSUSED*/
   2929 static mdi_pathinfo_t *
   2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
   2931 {
   2932 	mdi_pathinfo_t	*pip;
   2933 	int		ct_circular;
   2934 	int		ph_circular;
   2935 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
   2936 	char		*path_persistent;
   2937 	int		path_instance;
   2938 	mod_hash_val_t	hv;
   2939 
   2940 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
   2941 
   2942 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
   2943 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
   2944 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
   2945 	    MDI_PATHINFO_STATE_TRANSIENT;
   2946 
   2947 	if (MDI_PHCI_IS_USER_DISABLED(ph))
   2948 		MDI_PI_SET_USER_DISABLE(pip);
   2949 
   2950 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
   2951 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
   2952 
   2953 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
   2954 		MDI_PI_SET_DRV_DISABLE(pip);
   2955 
   2956 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
   2957 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
   2958 	MDI_PI(pip)->pi_client = ct;
   2959 	MDI_PI(pip)->pi_phci = ph;
   2960 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
   2961 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
   2962 
   2963         /*
   2964 	 * We form the "path" to the pathinfo node, and see if we have
   2965 	 * already allocated a 'path_instance' for that "path".  If so,
   2966 	 * we use the already allocated 'path_instance'.  If not, we
   2967 	 * allocate a new 'path_instance' and associate it with a copy of
   2968 	 * the "path" string (which is never freed). The association
   2969 	 * between a 'path_instance' this "path" string persists until
   2970 	 * reboot.
   2971 	 */
   2972         mutex_enter(&mdi_pathmap_mutex);
   2973 	(void) ddi_pathname(ph->ph_dip, path);
   2974 	(void) sprintf(path + strlen(path), "/%s@%s",
   2975 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
   2976         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
   2977                 path_instance = (uint_t)(intptr_t)hv;
   2978         } else {
   2979 		/* allocate a new 'path_instance' and persistent "path" */
   2980 		path_instance = mdi_pathmap_instance++;
   2981 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
   2982                 (void) mod_hash_insert(mdi_pathmap_bypath,
   2983                     (mod_hash_key_t)path_persistent,
   2984                     (mod_hash_val_t)(intptr_t)path_instance);
   2985 		(void) mod_hash_insert(mdi_pathmap_byinstance,
   2986 		    (mod_hash_key_t)(intptr_t)path_instance,
   2987 		    (mod_hash_val_t)path_persistent);
   2988 
   2989 		/* create shortpath name */
   2990 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
   2991 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
   2992 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
   2993 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
   2994 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
   2995 		    (mod_hash_key_t)(intptr_t)path_instance,
   2996 		    (mod_hash_val_t)path_persistent);
   2997         }
   2998         mutex_exit(&mdi_pathmap_mutex);
   2999 	MDI_PI(pip)->pi_path_instance = path_instance;
   3000 
   3001 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
   3002 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
   3003 	MDI_PI(pip)->pi_pprivate = NULL;
   3004 	MDI_PI(pip)->pi_cprivate = NULL;
   3005 	MDI_PI(pip)->pi_vprivate = NULL;
   3006 	MDI_PI(pip)->pi_client_link = NULL;
   3007 	MDI_PI(pip)->pi_phci_link = NULL;
   3008 	MDI_PI(pip)->pi_ref_cnt = 0;
   3009 	MDI_PI(pip)->pi_kstats = NULL;
   3010 	MDI_PI(pip)->pi_preferred = 1;
   3011 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
   3012 
   3013 	/*
   3014 	 * Lock both dev_info nodes against changes in parallel.
   3015 	 *
   3016 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
   3017 	 * This atypical operation is done to synchronize pathinfo nodes
   3018 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
   3019 	 * the pathinfo nodes are children of the Client.
   3020 	 */
   3021 	ndi_devi_enter(ct->ct_dip, &ct_circular);
   3022 	ndi_devi_enter(ph->ph_dip, &ph_circular);
   3023 
   3024 	i_mdi_phci_add_path(ph, pip);
   3025 	i_mdi_client_add_path(ct, pip);
   3026 
   3027 	ndi_devi_exit(ph->ph_dip, ph_circular);
   3028 	ndi_devi_exit(ct->ct_dip, ct_circular);
   3029 
   3030 	return (pip);
   3031 }
   3032 
   3033 /*
   3034  * mdi_pi_pathname_by_instance():
   3035  *	Lookup of "path" by 'path_instance'. Return "path".
   3036  *	NOTE: returned "path" remains valid forever (until reboot).
   3037  */
   3038 char *
   3039 mdi_pi_pathname_by_instance(int path_instance)
   3040 {
   3041 	char		*path;
   3042 	mod_hash_val_t	hv;
   3043 
   3044 	/* mdi_pathmap lookup of "path" by 'path_instance' */
   3045 	mutex_enter(&mdi_pathmap_mutex);
   3046 	if (mod_hash_find(mdi_pathmap_byinstance,
   3047 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
   3048 		path = (char *)hv;
   3049 	else
   3050 		path = NULL;
   3051 	mutex_exit(&mdi_pathmap_mutex);
   3052 	return (path);
   3053 }
   3054 
   3055 /*
   3056  * mdi_pi_spathname_by_instance():
   3057  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
   3058  *	NOTE: returned "shortpath" remains valid forever (until reboot).
   3059  */
   3060 char *
   3061 mdi_pi_spathname_by_instance(int path_instance)
   3062 {
   3063 	char		*path;
   3064 	mod_hash_val_t	hv;
   3065 
   3066 	/* mdi_pathmap lookup of "path" by 'path_instance' */
   3067 	mutex_enter(&mdi_pathmap_mutex);
   3068 	if (mod_hash_find(mdi_pathmap_sbyinstance,
   3069 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
   3070 		path = (char *)hv;
   3071 	else
   3072 		path = NULL;
   3073 	mutex_exit(&mdi_pathmap_mutex);
   3074 	return (path);
   3075 }
   3076 
   3077 
   3078 /*
   3079  * i_mdi_phci_add_path():
   3080  * 		Add a mdi_pathinfo node to pHCI list.
   3081  * Notes:
   3082  *		Caller should per-pHCI mutex
   3083  */
   3084 static void
   3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   3086 {
   3087 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
   3088 
   3089 	MDI_PHCI_LOCK(ph);
   3090 	if (ph->ph_path_head == NULL) {
   3091 		ph->ph_path_head = pip;
   3092 	} else {
   3093 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
   3094 	}
   3095 	ph->ph_path_tail = pip;
   3096 	ph->ph_path_count++;
   3097 	MDI_PHCI_UNLOCK(ph);
   3098 }
   3099 
   3100 /*
   3101  * i_mdi_client_add_path():
   3102  *		Add mdi_pathinfo node to client list
   3103  */
   3104 static void
   3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
   3106 {
   3107 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
   3108 
   3109 	MDI_CLIENT_LOCK(ct);
   3110 	if (ct->ct_path_head == NULL) {
   3111 		ct->ct_path_head = pip;
   3112 	} else {
   3113 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
   3114 	}
   3115 	ct->ct_path_tail = pip;
   3116 	ct->ct_path_count++;
   3117 	MDI_CLIENT_UNLOCK(ct);
   3118 }
   3119 
   3120 /*
   3121  * mdi_pi_free():
   3122  *		Free the mdi_pathinfo node and also client device node if this
   3123  *		is the last path to the device
   3124  * Return Values:
   3125  *		MDI_SUCCESS
   3126  *		MDI_FAILURE
   3127  *		MDI_BUSY
   3128  */
   3129 /*ARGSUSED*/
   3130 int
   3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
   3132 {
   3133 	int		rv;
   3134 	mdi_vhci_t	*vh;
   3135 	mdi_phci_t	*ph;
   3136 	mdi_client_t	*ct;
   3137 	int		(*f)();
   3138 	int		client_held = 0;
   3139 
   3140 	MDI_PI_LOCK(pip);
   3141 	ph = MDI_PI(pip)->pi_phci;
   3142 	ASSERT(ph != NULL);
   3143 	if (ph == NULL) {
   3144 		/*
   3145 		 * Invalid pHCI device, return failure
   3146 		 */
   3147 		MDI_DEBUG(1, (MDI_WARN, NULL,
   3148 		    "!invalid pHCI: pip %s %p",
   3149 		    mdi_pi_spathname(pip), (void *)pip));
   3150 		MDI_PI_UNLOCK(pip);
   3151 		return (MDI_FAILURE);
   3152 	}
   3153 
   3154 	vh = ph->ph_vhci;
   3155 	ASSERT(vh != NULL);
   3156 	if (vh == NULL) {
   3157 		/* Invalid pHCI device, return failure */
   3158 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3159 		    "!invalid vHCI: pip %s %p",
   3160 		    mdi_pi_spathname(pip), (void *)pip));
   3161 		MDI_PI_UNLOCK(pip);
   3162 		return (MDI_FAILURE);
   3163 	}
   3164 
   3165 	ct = MDI_PI(pip)->pi_client;
   3166 	ASSERT(ct != NULL);
   3167 	if (ct == NULL) {
   3168 		/*
   3169 		 * Invalid Client device, return failure
   3170 		 */
   3171 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3172 		    "!invalid client: pip %s %p",
   3173 		    mdi_pi_spathname(pip), (void *)pip));
   3174 		MDI_PI_UNLOCK(pip);
   3175 		return (MDI_FAILURE);
   3176 	}
   3177 
   3178 	/*
   3179 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
   3180 	 * if the node state is either offline or init and the reference count
   3181 	 * is zero.
   3182 	 */
   3183 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
   3184 	    MDI_PI_IS_INITING(pip))) {
   3185 		/*
   3186 		 * Node is busy
   3187 		 */
   3188 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3189 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
   3190 		MDI_PI_UNLOCK(pip);
   3191 		return (MDI_BUSY);
   3192 	}
   3193 
   3194 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
   3195 		/*
   3196 		 * Give a chance for pending I/Os to complete.
   3197 		 */
   3198 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3199 		    "!%d cmds still pending on path: %s %p",
   3200 		    MDI_PI(pip)->pi_ref_cnt,
   3201 		    mdi_pi_spathname(pip), (void *)pip));
   3202 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
   3203 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
   3204 		    TR_CLOCK_TICK) == -1) {
   3205 			/*
   3206 			 * The timeout time reached without ref_cnt being zero
   3207 			 * being signaled.
   3208 			 */
   3209 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3210 			    "!Timeout reached on path %s %p without the cond",
   3211 			    mdi_pi_spathname(pip), (void *)pip));
   3212 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3213 			    "!%d cmds still pending on path %s %p",
   3214 			    MDI_PI(pip)->pi_ref_cnt,
   3215 			    mdi_pi_spathname(pip), (void *)pip));
   3216 			MDI_PI_UNLOCK(pip);
   3217 			return (MDI_BUSY);
   3218 		}
   3219 	}
   3220 	if (MDI_PI(pip)->pi_pm_held) {
   3221 		client_held = 1;
   3222 	}
   3223 	MDI_PI_UNLOCK(pip);
   3224 
   3225 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
   3226 
   3227 	MDI_CLIENT_LOCK(ct);
   3228 
   3229 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
   3230 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
   3231 
   3232 	/*
   3233 	 * Wait till failover is complete before removing this node.
   3234 	 */
   3235 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
   3236 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
   3237 
   3238 	MDI_CLIENT_UNLOCK(ct);
   3239 	MDI_VHCI_CLIENT_LOCK(vh);
   3240 	MDI_CLIENT_LOCK(ct);
   3241 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
   3242 
   3243 	if (!MDI_PI_IS_INITING(pip)) {
   3244 		f = vh->vh_ops->vo_pi_uninit;
   3245 		if (f != NULL) {
   3246 			rv = (*f)(vh->vh_dip, pip, 0);
   3247 		}
   3248 	} else
   3249 		rv = MDI_SUCCESS;
   3250 
   3251 	/*
   3252 	 * If vo_pi_uninit() completed successfully.
   3253 	 */
   3254 	if (rv == MDI_SUCCESS) {
   3255 		if (client_held) {
   3256 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3257 			    "i_mdi_pm_rele_client\n"));
   3258 			i_mdi_pm_rele_client(ct, 1);
   3259 		}
   3260 		i_mdi_pi_free(ph, pip, ct);
   3261 		if (ct->ct_path_count == 0) {
   3262 			/*
   3263 			 * Client lost its last path.
   3264 			 * Clean up the client device
   3265 			 */
   3266 			MDI_CLIENT_UNLOCK(ct);
   3267 			(void) i_mdi_client_free(ct->ct_vhci, ct);
   3268 			MDI_VHCI_CLIENT_UNLOCK(vh);
   3269 			return (rv);
   3270 		}
   3271 	}
   3272 	MDI_CLIENT_UNLOCK(ct);
   3273 	MDI_VHCI_CLIENT_UNLOCK(vh);
   3274 
   3275 	if (rv == MDI_FAILURE)
   3276 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
   3277 
   3278 	return (rv);
   3279 }
   3280 
   3281 /*
   3282  * i_mdi_pi_free():
   3283  *		Free the mdi_pathinfo node
   3284  */
   3285 static void
   3286 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
   3287 {
   3288 	int	ct_circular;
   3289 	int	ph_circular;
   3290 
   3291 	ASSERT(MDI_CLIENT_LOCKED(ct));
   3292 
   3293 	/*
   3294 	 * remove any per-path kstats
   3295 	 */
   3296 	i_mdi_pi_kstat_destroy(pip);
   3297 
   3298 	/* See comments in i_mdi_pi_alloc() */
   3299 	ndi_devi_enter(ct->ct_dip, &ct_circular);
   3300 	ndi_devi_enter(ph->ph_dip, &ph_circular);
   3301 
   3302 	i_mdi_client_remove_path(ct, pip);
   3303 	i_mdi_phci_remove_path(ph, pip);
   3304 
   3305 	ndi_devi_exit(ph->ph_dip, ph_circular);
   3306 	ndi_devi_exit(ct->ct_dip, ct_circular);
   3307 
   3308 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
   3309 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
   3310 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
   3311 	if (MDI_PI(pip)->pi_addr) {
   3312 		kmem_free(MDI_PI(pip)->pi_addr,
   3313 		    strlen(MDI_PI(pip)->pi_addr) + 1);
   3314 		MDI_PI(pip)->pi_addr = NULL;
   3315 	}
   3316 
   3317 	if (MDI_PI(pip)->pi_prop) {
   3318 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
   3319 		MDI_PI(pip)->pi_prop = NULL;
   3320 	}
   3321 	kmem_free(pip, sizeof (struct mdi_pathinfo));
   3322 }
   3323 
   3324 
   3325 /*
   3326  * i_mdi_phci_remove_path():
   3327  * 		Remove a mdi_pathinfo node from pHCI list.
   3328  * Notes:
   3329  *		Caller should hold per-pHCI mutex
   3330  */
   3331 static void
   3332 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   3333 {
   3334 	mdi_pathinfo_t	*prev = NULL;
   3335 	mdi_pathinfo_t	*path = NULL;
   3336 
   3337 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
   3338 
   3339 	MDI_PHCI_LOCK(ph);
   3340 	path = ph->ph_path_head;
   3341 	while (path != NULL) {
   3342 		if (path == pip) {
   3343 			break;
   3344 		}
   3345 		prev = path;
   3346 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
   3347 	}
   3348 
   3349 	if (path) {
   3350 		ph->ph_path_count--;
   3351 		if (prev) {
   3352 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
   3353 		} else {
   3354 			ph->ph_path_head =
   3355 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
   3356 		}
   3357 		if (ph->ph_path_tail == path) {
   3358 			ph->ph_path_tail = prev;
   3359 		}
   3360 	}
   3361 
   3362 	/*
   3363 	 * Clear the pHCI link
   3364 	 */
   3365 	MDI_PI(pip)->pi_phci_link = NULL;
   3366 	MDI_PI(pip)->pi_phci = NULL;
   3367 	MDI_PHCI_UNLOCK(ph);
   3368 }
   3369 
   3370 /*
   3371  * i_mdi_client_remove_path():
   3372  * 		Remove a mdi_pathinfo node from client path list.
   3373  */
   3374 static void
   3375 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
   3376 {
   3377 	mdi_pathinfo_t	*prev = NULL;
   3378 	mdi_pathinfo_t	*path;
   3379 
   3380 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
   3381 
   3382 	ASSERT(MDI_CLIENT_LOCKED(ct));
   3383 	path = ct->ct_path_head;
   3384 	while (path != NULL) {
   3385 		if (path == pip) {
   3386 			break;
   3387 		}
   3388 		prev = path;
   3389 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
   3390 	}
   3391 
   3392 	if (path) {
   3393 		ct->ct_path_count--;
   3394 		if (prev) {
   3395 			MDI_PI(prev)->pi_client_link =
   3396 			    MDI_PI(path)->pi_client_link;
   3397 		} else {
   3398 			ct->ct_path_head =
   3399 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
   3400 		}
   3401 		if (ct->ct_path_tail == path) {
   3402 			ct->ct_path_tail = prev;
   3403 		}
   3404 		if (ct->ct_path_last == path) {
   3405 			ct->ct_path_last = ct->ct_path_head;
   3406 		}
   3407 	}
   3408 	MDI_PI(pip)->pi_client_link = NULL;
   3409 	MDI_PI(pip)->pi_client = NULL;
   3410 }
   3411 
   3412 /*
   3413  * i_mdi_pi_state_change():
   3414  *		online a mdi_pathinfo node
   3415  *
   3416  * Return Values:
   3417  *		MDI_SUCCESS
   3418  *		MDI_FAILURE
   3419  */
   3420 /*ARGSUSED*/
   3421 static int
   3422 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
   3423 {
   3424 	int		rv = MDI_SUCCESS;
   3425 	mdi_vhci_t	*vh;
   3426 	mdi_phci_t	*ph;
   3427 	mdi_client_t	*ct;
   3428 	int		(*f)();
   3429 	dev_info_t	*cdip;
   3430 
   3431 	MDI_PI_LOCK(pip);
   3432 
   3433 	ph = MDI_PI(pip)->pi_phci;
   3434 	ASSERT(ph);
   3435 	if (ph == NULL) {
   3436 		/*
   3437 		 * Invalid pHCI device, fail the request
   3438 		 */
   3439 		MDI_PI_UNLOCK(pip);
   3440 		MDI_DEBUG(1, (MDI_WARN, NULL,
   3441 		    "!invalid phci: pip %s %p",
   3442 		    mdi_pi_spathname(pip), (void *)pip));
   3443 		return (MDI_FAILURE);
   3444 	}
   3445 
   3446 	vh = ph->ph_vhci;
   3447 	ASSERT(vh);
   3448 	if (vh == NULL) {
   3449 		/*
   3450 		 * Invalid vHCI device, fail the request
   3451 		 */
   3452 		MDI_PI_UNLOCK(pip);
   3453 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3454 		    "!invalid vhci: pip %s %p",
   3455 		    mdi_pi_spathname(pip), (void *)pip));
   3456 		return (MDI_FAILURE);
   3457 	}
   3458 
   3459 	ct = MDI_PI(pip)->pi_client;
   3460 	ASSERT(ct != NULL);
   3461 	if (ct == NULL) {
   3462 		/*
   3463 		 * Invalid client device, fail the request
   3464 		 */
   3465 		MDI_PI_UNLOCK(pip);
   3466 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3467 		    "!invalid client: pip %s %p",
   3468 		    mdi_pi_spathname(pip), (void *)pip));
   3469 		return (MDI_FAILURE);
   3470 	}
   3471 
   3472 	/*
   3473 	 * If this path has not been initialized yet, Callback vHCI driver's
   3474 	 * pathinfo node initialize entry point
   3475 	 */
   3476 
   3477 	if (MDI_PI_IS_INITING(pip)) {
   3478 		MDI_PI_UNLOCK(pip);
   3479 		f = vh->vh_ops->vo_pi_init;
   3480 		if (f != NULL) {
   3481 			rv = (*f)(vh->vh_dip, pip, 0);
   3482 			if (rv != MDI_SUCCESS) {
   3483 				MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3484 				    "!vo_pi_init failed: vHCI %p, pip %s %p",
   3485 				    (void *)vh, mdi_pi_spathname(pip),
   3486 				    (void *)pip));
   3487 				return (MDI_FAILURE);
   3488 			}
   3489 		}
   3490 		MDI_PI_LOCK(pip);
   3491 		MDI_PI_CLEAR_TRANSIENT(pip);
   3492 	}
   3493 
   3494 	/*
   3495 	 * Do not allow state transition when pHCI is in offline/suspended
   3496 	 * states
   3497 	 */
   3498 	i_mdi_phci_lock(ph, pip);
   3499 	if (MDI_PHCI_IS_READY(ph) == 0) {
   3500 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3501 		    "!pHCI not ready, pHCI=%p", (void *)ph));
   3502 		MDI_PI_UNLOCK(pip);
   3503 		i_mdi_phci_unlock(ph);
   3504 		return (MDI_BUSY);
   3505 	}
   3506 	MDI_PHCI_UNSTABLE(ph);
   3507 	i_mdi_phci_unlock(ph);
   3508 
   3509 	/*
   3510 	 * Check if mdi_pathinfo state is in transient state.
   3511 	 * If yes, offlining is in progress and wait till transient state is
   3512 	 * cleared.
   3513 	 */
   3514 	if (MDI_PI_IS_TRANSIENT(pip)) {
   3515 		while (MDI_PI_IS_TRANSIENT(pip)) {
   3516 			cv_wait(&MDI_PI(pip)->pi_state_cv,
   3517 			    &MDI_PI(pip)->pi_mutex);
   3518 		}
   3519 	}
   3520 
   3521 	/*
   3522 	 * Grab the client lock in reverse order sequence and release the
   3523 	 * mdi_pathinfo mutex.
   3524 	 */
   3525 	i_mdi_client_lock(ct, pip);
   3526 	MDI_PI_UNLOCK(pip);
   3527 
   3528 	/*
   3529 	 * Wait till failover state is cleared
   3530 	 */
   3531 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
   3532 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
   3533 
   3534 	/*
   3535 	 * Mark the mdi_pathinfo node state as transient
   3536 	 */
   3537 	MDI_PI_LOCK(pip);
   3538 	switch (state) {
   3539 	case MDI_PATHINFO_STATE_ONLINE:
   3540 		MDI_PI_SET_ONLINING(pip);
   3541 		break;
   3542 
   3543 	case MDI_PATHINFO_STATE_STANDBY:
   3544 		MDI_PI_SET_STANDBYING(pip);
   3545 		break;
   3546 
   3547 	case MDI_PATHINFO_STATE_FAULT:
   3548 		/*
   3549 		 * Mark the pathinfo state as FAULTED
   3550 		 */
   3551 		MDI_PI_SET_FAULTING(pip);
   3552 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
   3553 		break;
   3554 
   3555 	case MDI_PATHINFO_STATE_OFFLINE:
   3556 		/*
   3557 		 * ndi_devi_offline() cannot hold pip or ct locks.
   3558 		 */
   3559 		MDI_PI_UNLOCK(pip);
   3560 
   3561 		/*
   3562 		 * If this is a user initiated path online->offline operation
   3563 		 * who's success would transition a client from DEGRADED to
   3564 		 * FAILED then only proceed if we can offline the client first.
   3565 		 */
   3566 		cdip = ct->ct_dip;
   3567 		if ((flag & NDI_USER_REQ) &&
   3568 		    MDI_PI_IS_ONLINE(pip) &&
   3569 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
   3570 			i_mdi_client_unlock(ct);
   3571 			rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
   3572 			if (rv != NDI_SUCCESS) {
   3573 				/*
   3574 				 * Convert to MDI error code
   3575 				 */
   3576 				switch (rv) {
   3577 				case NDI_BUSY:
   3578 					rv = MDI_BUSY;
   3579 					break;
   3580 				default:
   3581 					rv = MDI_FAILURE;
   3582 					break;
   3583 				}
   3584 				goto state_change_exit;
   3585 			} else {
   3586 				i_mdi_client_lock(ct, NULL);
   3587 			}
   3588 		}
   3589 		/*
   3590 		 * Mark the mdi_pathinfo node state as transient
   3591 		 */
   3592 		MDI_PI_LOCK(pip);
   3593 		MDI_PI_SET_OFFLINING(pip);
   3594 		break;
   3595 	}
   3596 	MDI_PI_UNLOCK(pip);
   3597 	MDI_CLIENT_UNSTABLE(ct);
   3598 	i_mdi_client_unlock(ct);
   3599 
   3600 	f = vh->vh_ops->vo_pi_state_change;
   3601 	if (f != NULL)
   3602 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
   3603 
   3604 	MDI_CLIENT_LOCK(ct);
   3605 	MDI_PI_LOCK(pip);
   3606 	if (rv == MDI_NOT_SUPPORTED) {
   3607 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
   3608 	}
   3609 	if (rv != MDI_SUCCESS) {
   3610 		MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
   3611 		    "vo_pi_state_change failed: rv %x", rv));
   3612 	}
   3613 	if (MDI_PI_IS_TRANSIENT(pip)) {
   3614 		if (rv == MDI_SUCCESS) {
   3615 			MDI_PI_CLEAR_TRANSIENT(pip);
   3616 		} else {
   3617 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
   3618 		}
   3619 	}
   3620 
   3621 	/*
   3622 	 * Wake anyone waiting for this mdi_pathinfo node
   3623 	 */
   3624 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
   3625 	MDI_PI_UNLOCK(pip);
   3626 
   3627 	/*
   3628 	 * Mark the client device as stable
   3629 	 */
   3630 	MDI_CLIENT_STABLE(ct);
   3631 	if (rv == MDI_SUCCESS) {
   3632 		if (ct->ct_unstable == 0) {
   3633 			cdip = ct->ct_dip;
   3634 
   3635 			/*
   3636 			 * Onlining the mdi_pathinfo node will impact the
   3637 			 * client state Update the client and dev_info node
   3638 			 * state accordingly
   3639 			 */
   3640 			rv = NDI_SUCCESS;
   3641 			i_mdi_client_update_state(ct);
   3642 			switch (MDI_CLIENT_STATE(ct)) {
   3643 			case MDI_CLIENT_STATE_OPTIMAL:
   3644 			case MDI_CLIENT_STATE_DEGRADED:
   3645 				if (cdip && !i_ddi_devi_attached(cdip) &&
   3646 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
   3647 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
   3648 
   3649 					/*
   3650 					 * Must do ndi_devi_online() through
   3651 					 * hotplug thread for deferred
   3652 					 * attach mechanism to work
   3653 					 */
   3654 					MDI_CLIENT_UNLOCK(ct);
   3655 					rv = ndi_devi_online(cdip, 0);
   3656 					MDI_CLIENT_LOCK(ct);
   3657 					if ((rv != NDI_SUCCESS) &&
   3658 					    (MDI_CLIENT_STATE(ct) ==
   3659 					    MDI_CLIENT_STATE_DEGRADED)) {
   3660 						/*
   3661 						 * ndi_devi_online failed.
   3662 						 * Reset client flags to
   3663 						 * offline.
   3664 						 */
   3665 						MDI_DEBUG(1, (MDI_WARN, cdip,
   3666 						    "!ndi_devi_online failed "
   3667 						    "error %x", rv));
   3668 						MDI_CLIENT_SET_OFFLINE(ct);
   3669 					}
   3670 					if (rv != NDI_SUCCESS) {
   3671 						/* Reset the path state */
   3672 						MDI_PI_LOCK(pip);
   3673 						MDI_PI(pip)->pi_state =
   3674 						    MDI_PI_OLD_STATE(pip);
   3675 						MDI_PI_UNLOCK(pip);
   3676 					}
   3677 				}
   3678 				break;
   3679 
   3680 			case MDI_CLIENT_STATE_FAILED:
   3681 				/*
   3682 				 * This is the last path case for
   3683 				 * non-user initiated events.
   3684 				 */
   3685 				if (((flag & NDI_USER_REQ) == 0) &&
   3686 				    cdip && (i_ddi_node_state(cdip) >=
   3687 				    DS_INITIALIZED)) {
   3688 					MDI_CLIENT_UNLOCK(ct);
   3689 					rv = ndi_devi_offline(cdip,
   3690 					    NDI_DEVFS_CLEAN);
   3691 					MDI_CLIENT_LOCK(ct);
   3692 
   3693 					if (rv != NDI_SUCCESS) {
   3694 						/*
   3695 						 * ndi_devi_offline failed.
   3696 						 * Reset client flags to
   3697 						 * online as the path could not
   3698 						 * be offlined.
   3699 						 */
   3700 						MDI_DEBUG(1, (MDI_WARN, cdip,
   3701 						    "!ndi_devi_offline failed: "
   3702 						    "error %x", rv));
   3703 						MDI_CLIENT_SET_ONLINE(ct);
   3704 					}
   3705 				}
   3706 				break;
   3707 			}
   3708 			/*
   3709 			 * Convert to MDI error code
   3710 			 */
   3711 			switch (rv) {
   3712 			case NDI_SUCCESS:
   3713 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
   3714 				i_mdi_report_path_state(ct, pip);
   3715 				rv = MDI_SUCCESS;
   3716 				break;
   3717 			case NDI_BUSY:
   3718 				rv = MDI_BUSY;
   3719 				break;
   3720 			default:
   3721 				rv = MDI_FAILURE;
   3722 				break;
   3723 			}
   3724 		}
   3725 	}
   3726 	MDI_CLIENT_UNLOCK(ct);
   3727 
   3728 state_change_exit:
   3729 	/*
   3730 	 * Mark the pHCI as stable again.
   3731 	 */
   3732 	MDI_PHCI_LOCK(ph);
   3733 	MDI_PHCI_STABLE(ph);
   3734 	MDI_PHCI_UNLOCK(ph);
   3735 	return (rv);
   3736 }
   3737 
   3738 /*
   3739  * mdi_pi_online():
   3740  *		Place the path_info node in the online state.  The path is
   3741  *		now available to be selected by mdi_select_path() for
   3742  *		transporting I/O requests to client devices.
   3743  * Return Values:
   3744  *		MDI_SUCCESS
   3745  *		MDI_FAILURE
   3746  */
   3747 int
   3748 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
   3749 {
   3750 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
   3751 	int		client_held = 0;
   3752 	int		rv;
   3753 
   3754 	ASSERT(ct != NULL);
   3755 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
   3756 	if (rv != MDI_SUCCESS)
   3757 		return (rv);
   3758 
   3759 	MDI_PI_LOCK(pip);
   3760 	if (MDI_PI(pip)->pi_pm_held == 0) {
   3761 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3762 		    "i_mdi_pm_hold_pip %p", (void *)pip));
   3763 		i_mdi_pm_hold_pip(pip);
   3764 		client_held = 1;
   3765 	}
   3766 	MDI_PI_UNLOCK(pip);
   3767 
   3768 	if (client_held) {
   3769 		MDI_CLIENT_LOCK(ct);
   3770 		if (ct->ct_power_cnt == 0) {
   3771 			rv = i_mdi_power_all_phci(ct);
   3772 		}
   3773 
   3774 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3775 		    "i_mdi_pm_hold_client %p", (void *)ct));
   3776 		i_mdi_pm_hold_client(ct, 1);
   3777 		MDI_CLIENT_UNLOCK(ct);
   3778 	}
   3779 
   3780 	return (rv);
   3781 }
   3782 
   3783 /*
   3784  * mdi_pi_standby():
   3785  *		Place the mdi_pathinfo node in standby state
   3786  *
   3787  * Return Values:
   3788  *		MDI_SUCCESS
   3789  *		MDI_FAILURE
   3790  */
   3791 int
   3792 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
   3793 {
   3794 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
   3795 }
   3796 
   3797 /*
   3798  * mdi_pi_fault():
   3799  *		Place the mdi_pathinfo node in fault'ed state
   3800  * Return Values:
   3801  *		MDI_SUCCESS
   3802  *		MDI_FAILURE
   3803  */
   3804 int
   3805 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
   3806 {
   3807 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
   3808 }
   3809 
   3810 /*
   3811  * mdi_pi_offline():
   3812  *		Offline a mdi_pathinfo node.
   3813  * Return Values:
   3814  *		MDI_SUCCESS
   3815  *		MDI_FAILURE
   3816  */
   3817 int
   3818 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
   3819 {
   3820 	int	ret, client_held = 0;
   3821 	mdi_client_t	*ct;
   3822 
   3823 	/*
   3824 	 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
   3825 	 * used it to mean "user initiated operation" (i.e. devctl). Callers
   3826 	 * should now just use NDI_USER_REQ.
   3827 	 */
   3828 	if (flags & NDI_DEVI_REMOVE) {
   3829 		flags &= ~NDI_DEVI_REMOVE;
   3830 		flags |= NDI_USER_REQ;
   3831 	}
   3832 
   3833 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
   3834 
   3835 	if (ret == MDI_SUCCESS) {
   3836 		MDI_PI_LOCK(pip);
   3837 		if (MDI_PI(pip)->pi_pm_held) {
   3838 			client_held = 1;
   3839 		}
   3840 		MDI_PI_UNLOCK(pip);
   3841 
   3842 		if (client_held) {
   3843 			ct = MDI_PI(pip)->pi_client;
   3844 			MDI_CLIENT_LOCK(ct);
   3845 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3846 			    "i_mdi_pm_rele_client\n"));
   3847 			i_mdi_pm_rele_client(ct, 1);
   3848 			MDI_CLIENT_UNLOCK(ct);
   3849 		}
   3850 	}
   3851 
   3852 	return (ret);
   3853 }
   3854 
   3855 /*
   3856  * i_mdi_pi_offline():
   3857  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
   3858  */
   3859 static int
   3860 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
   3861 {
   3862 	dev_info_t	*vdip = NULL;
   3863 	mdi_vhci_t	*vh = NULL;
   3864 	mdi_client_t	*ct = NULL;
   3865 	int		(*f)();
   3866 	int		rv;
   3867 
   3868 	MDI_PI_LOCK(pip);
   3869 	ct = MDI_PI(pip)->pi_client;
   3870 	ASSERT(ct != NULL);
   3871 
   3872 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
   3873 		/*
   3874 		 * Give a chance for pending I/Os to complete.
   3875 		 */
   3876 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3877 		    "!%d cmds still pending on path %s %p",
   3878 		    MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
   3879 		    (void *)pip));
   3880 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
   3881 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
   3882 		    TR_CLOCK_TICK) == -1) {
   3883 			/*
   3884 			 * The timeout time reached without ref_cnt being zero
   3885 			 * being signaled.
   3886 			 */
   3887 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3888 			    "!Timeout reached on path %s %p without the cond",
   3889 			    mdi_pi_spathname(pip), (void *)pip));
   3890 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3891 			    "!%d cmds still pending on path %s %p",
   3892 			    MDI_PI(pip)->pi_ref_cnt,
   3893 			    mdi_pi_spathname(pip), (void *)pip));
   3894 		}
   3895 	}
   3896 	vh = ct->ct_vhci;
   3897 	vdip = vh->vh_dip;
   3898 
   3899 	/*
   3900 	 * Notify vHCI that has registered this event
   3901 	 */
   3902 	ASSERT(vh->vh_ops);
   3903 	f = vh->vh_ops->vo_pi_state_change;
   3904 
   3905 	if (f != NULL) {
   3906 		MDI_PI_UNLOCK(pip);
   3907 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
   3908 		    flags)) != MDI_SUCCESS) {
   3909 			MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3910 			    "!vo_path_offline failed: vdip %s%d %p: path %s %p",
   3911 			    ddi_driver_name(vdip), ddi_get_instance(vdip),
   3912 			    (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
   3913 		}
   3914 		MDI_PI_LOCK(pip);
   3915 	}
   3916 
   3917 	/*
   3918 	 * Set the mdi_pathinfo node state and clear the transient condition
   3919 	 */
   3920 	MDI_PI_SET_OFFLINE(pip);
   3921 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
   3922 	MDI_PI_UNLOCK(pip);
   3923 
   3924 	MDI_CLIENT_LOCK(ct);
   3925 	if (rv == MDI_SUCCESS) {
   3926 		if (ct->ct_unstable == 0) {
   3927 			dev_info_t	*cdip = ct->ct_dip;
   3928 
   3929 			/*
   3930 			 * Onlining the mdi_pathinfo node will impact the
   3931 			 * client state Update the client and dev_info node
   3932 			 * state accordingly
   3933 			 */
   3934 			i_mdi_client_update_state(ct);
   3935 			rv = NDI_SUCCESS;
   3936 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
   3937 				if (cdip &&
   3938 				    (i_ddi_node_state(cdip) >=
   3939 				    DS_INITIALIZED)) {
   3940 					MDI_CLIENT_UNLOCK(ct);
   3941 					rv = ndi_devi_offline(cdip,
   3942 					    NDI_DEVFS_CLEAN);
   3943 					MDI_CLIENT_LOCK(ct);
   3944 					if (rv != NDI_SUCCESS) {
   3945 						/*
   3946 						 * ndi_devi_offline failed.
   3947 						 * Reset client flags to
   3948 						 * online.
   3949 						 */
   3950 						MDI_DEBUG(4, (MDI_WARN, cdip,
   3951 						    "ndi_devi_offline failed: "
   3952 						    "error %x", rv));
   3953 						MDI_CLIENT_SET_ONLINE(ct);
   3954 					}
   3955 				}
   3956 			}
   3957 			/*
   3958 			 * Convert to MDI error code
   3959 			 */
   3960 			switch (rv) {
   3961 			case NDI_SUCCESS:
   3962 				rv = MDI_SUCCESS;
   3963 				break;
   3964 			case NDI_BUSY:
   3965 				rv = MDI_BUSY;
   3966 				break;
   3967 			default:
   3968 				rv = MDI_FAILURE;
   3969 				break;
   3970 			}
   3971 		}
   3972 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
   3973 		i_mdi_report_path_state(ct, pip);
   3974 	}
   3975 
   3976 	MDI_CLIENT_UNLOCK(ct);
   3977 
   3978 	/*
   3979 	 * Change in the mdi_pathinfo node state will impact the client state
   3980 	 */
   3981 	MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
   3982 	    "ct = %p pip = %p", (void *)ct, (void *)pip));
   3983 	return (rv);
   3984 }
   3985 
   3986 /*
   3987  * mdi_pi_get_node_name():
   3988  *              Get the name associated with a mdi_pathinfo node.
   3989  *              Since pathinfo nodes are not directly named, we
   3990  *              return the node_name of the client.
   3991  *
   3992  * Return Values:
   3993  *              char *
   3994  */
   3995 char *
   3996 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
   3997 {
   3998 	mdi_client_t    *ct;
   3999 
   4000 	if (pip == NULL)
   4001 		return (NULL);
   4002 	ct = MDI_PI(pip)->pi_client;
   4003 	if ((ct == NULL) || (ct->ct_dip == NULL))
   4004 		return (NULL);
   4005 	return (ddi_node_name(ct->ct_dip));
   4006 }
   4007 
   4008 /*
   4009  * mdi_pi_get_addr():
   4010  *		Get the unit address associated with a mdi_pathinfo node
   4011  *
   4012  * Return Values:
   4013  *		char *
   4014  */
   4015 char *
   4016 mdi_pi_get_addr(mdi_pathinfo_t *pip)
   4017 {
   4018 	if (pip == NULL)
   4019 		return (NULL);
   4020 
   4021 	return (MDI_PI(pip)->pi_addr);
   4022 }
   4023 
   4024 /*
   4025  * mdi_pi_get_path_instance():
   4026  *		Get the 'path_instance' of a mdi_pathinfo node
   4027  *
   4028  * Return Values:
   4029  *		path_instance
   4030  */
   4031 int
   4032 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
   4033 {
   4034 	if (pip == NULL)
   4035 		return (0);
   4036 
   4037 	return (MDI_PI(pip)->pi_path_instance);
   4038 }
   4039 
   4040 /*
   4041  * mdi_pi_pathname():
   4042  *		Return pointer to path to pathinfo node.
   4043  */
   4044 char *
   4045 mdi_pi_pathname(mdi_pathinfo_t *pip)
   4046 {
   4047 	if (pip == NULL)
   4048 		return (NULL);
   4049 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
   4050 }
   4051 
   4052 /*
   4053  * mdi_pi_spathname():
   4054  *		Return pointer to shortpath to pathinfo node. Used for debug
   4055  *		messages, so return "" instead of NULL when unknown.
   4056  */
   4057 char *
   4058 mdi_pi_spathname(mdi_pathinfo_t *pip)
   4059 {
   4060 	char	*spath = "";
   4061 
   4062 	if (pip) {
   4063 		spath = mdi_pi_spathname_by_instance(
   4064 		    mdi_pi_get_path_instance(pip));
   4065 		if (spath == NULL)
   4066 			spath = "";
   4067 	}
   4068 	return (spath);
   4069 }
   4070 
   4071 char *
   4072 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
   4073 {
   4074 	char *obp_path = NULL;
   4075 	if ((pip == NULL) || (path == NULL))
   4076 		return (NULL);
   4077 
   4078 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
   4079 		(void) strcpy(path, obp_path);
   4080 		(void) mdi_prop_free(obp_path);
   4081 	} else {
   4082 		path = NULL;
   4083 	}
   4084 	return (path);
   4085 }
   4086 
   4087 int
   4088 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
   4089 {
   4090 	dev_info_t *pdip;
   4091 	char *obp_path = NULL;
   4092 	int rc = MDI_FAILURE;
   4093 
   4094 	if (pip == NULL)
   4095 		return (MDI_FAILURE);
   4096 
   4097 	pdip = mdi_pi_get_phci(pip);
   4098 	if (pdip == NULL)
   4099 		return (MDI_FAILURE);
   4100 
   4101 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   4102 
   4103 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
   4104 		(void) ddi_pathname(pdip, obp_path);
   4105 	}
   4106 
   4107 	if (component) {
   4108 		(void) strncat(obp_path, "/", MAXPATHLEN);
   4109 		(void) strncat(obp_path, component, MAXPATHLEN);
   4110 	}
   4111 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
   4112 
   4113 	if (obp_path)
   4114 		kmem_free(obp_path, MAXPATHLEN);
   4115 	return (rc);
   4116 }
   4117 
   4118 /*
   4119  * mdi_pi_get_client():
   4120  *		Get the client devinfo associated with a mdi_pathinfo node
   4121  *
   4122  * Return Values:
   4123  *		Handle to client device dev_info node
   4124  */
   4125 dev_info_t *
   4126 mdi_pi_get_client(mdi_pathinfo_t *pip)
   4127 {
   4128 	dev_info_t	*dip = NULL;
   4129 	if (pip) {
   4130 		dip = MDI_PI(pip)->pi_client->ct_dip;
   4131 	}
   4132 	return (dip);
   4133 }
   4134 
   4135 /*
   4136  * mdi_pi_get_phci():
   4137  *		Get the pHCI devinfo associated with the mdi_pathinfo node
   4138  * Return Values:
   4139  *		Handle to dev_info node
   4140  */
   4141 dev_info_t *
   4142 mdi_pi_get_phci(mdi_pathinfo_t *pip)
   4143 {
   4144 	dev_info_t	*dip = NULL;
   4145 	mdi_phci_t	*ph;
   4146 
   4147 	if (pip) {
   4148 		ph = MDI_PI(pip)->pi_phci;
   4149 		if (ph)
   4150 			dip = ph->ph_dip;
   4151 	}
   4152 	return (dip);
   4153 }
   4154 
   4155 /*
   4156  * mdi_pi_get_client_private():
   4157  *		Get the client private information associated with the
   4158  *		mdi_pathinfo node
   4159  */
   4160 void *
   4161 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
   4162 {
   4163 	void *cprivate = NULL;
   4164 	if (pip) {
   4165 		cprivate = MDI_PI(pip)->pi_cprivate;
   4166 	}
   4167 	return (cprivate);
   4168 }
   4169 
   4170 /*
   4171  * mdi_pi_set_client_private():
   4172  *		Set the client private information in the mdi_pathinfo node
   4173  */
   4174 void
   4175 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
   4176 {
   4177 	if (pip) {
   4178 		MDI_PI(pip)->pi_cprivate = priv;
   4179 	}
   4180 }
   4181 
   4182 /*
   4183  * mdi_pi_get_phci_private():
   4184  *		Get the pHCI private information associated with the
   4185  *		mdi_pathinfo node
   4186  */
   4187 caddr_t
   4188 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
   4189 {
   4190 	caddr_t	pprivate = NULL;
   4191 
   4192 	if (pip) {
   4193 		pprivate = MDI_PI(pip)->pi_pprivate;
   4194 	}
   4195 	return (pprivate);
   4196 }
   4197 
   4198 /*
   4199  * mdi_pi_set_phci_private():
   4200  *		Set the pHCI private information in the mdi_pathinfo node
   4201  */
   4202 void
   4203 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
   4204 {
   4205 	if (pip) {
   4206 		MDI_PI(pip)->pi_pprivate = priv;
   4207 	}
   4208 }
   4209 
   4210 /*
   4211  * mdi_pi_get_state():
   4212  *		Get the mdi_pathinfo node state. Transient states are internal
   4213  *		and not provided to the users
   4214  */
   4215 mdi_pathinfo_state_t
   4216 mdi_pi_get_state(mdi_pathinfo_t *pip)
   4217 {
   4218 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
   4219 
   4220 	if (pip) {
   4221 		if (MDI_PI_IS_TRANSIENT(pip)) {
   4222 			/*
   4223 			 * mdi_pathinfo is in state transition.  Return the
   4224 			 * last good state.
   4225 			 */
   4226 			state = MDI_PI_OLD_STATE(pip);
   4227 		} else {
   4228 			state = MDI_PI_STATE(pip);
   4229 		}
   4230 	}
   4231 	return (state);
   4232 }
   4233 
   4234 /*
   4235  * mdi_pi_get_flags():
   4236  *		Get the mdi_pathinfo node flags.
   4237  */
   4238 uint_t
   4239 mdi_pi_get_flags(mdi_pathinfo_t *pip)
   4240 {
   4241 	return (pip ? MDI_PI(pip)->pi_flags : 0);
   4242 }
   4243 
   4244 /*
   4245  * Note that the following function needs to be the new interface for
   4246  * mdi_pi_get_state when mpxio gets integrated to ON.
   4247  */
   4248 int
   4249 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
   4250 		uint32_t *ext_state)
   4251 {
   4252 	*state = MDI_PATHINFO_STATE_INIT;
   4253 
   4254 	if (pip) {
   4255 		if (MDI_PI_IS_TRANSIENT(pip)) {
   4256 			/*
   4257 			 * mdi_pathinfo is in state transition.  Return the
   4258 			 * last good state.
   4259 			 */
   4260 			*state = MDI_PI_OLD_STATE(pip);
   4261 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
   4262 		} else {
   4263 			*state = MDI_PI_STATE(pip);
   4264 			*ext_state = MDI_PI_EXT_STATE(pip);
   4265 		}
   4266 	}
   4267 	return (MDI_SUCCESS);
   4268 }
   4269 
   4270 /*
   4271  * mdi_pi_get_preferred:
   4272  *	Get the preferred path flag
   4273  */
   4274 int
   4275 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
   4276 {
   4277 	if (pip) {
   4278 		return (MDI_PI(pip)->pi_preferred);
   4279 	}
   4280 	return (0);
   4281 }
   4282 
   4283 /*
   4284  * mdi_pi_set_preferred:
   4285  *	Set the preferred path flag
   4286  */
   4287 void
   4288 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
   4289 {
   4290 	if (pip) {
   4291 		MDI_PI(pip)->pi_preferred = preferred;
   4292 	}
   4293 }
   4294 
   4295 /*
   4296  * mdi_pi_set_state():
   4297  *		Set the mdi_pathinfo node state
   4298  */
   4299 void
   4300 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
   4301 {
   4302 	uint32_t	ext_state;
   4303 
   4304 	if (pip) {
   4305 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
   4306 		MDI_PI(pip)->pi_state = state;
   4307 		MDI_PI(pip)->pi_state |= ext_state;
   4308 
   4309 		/* Path has changed state, invalidate DINFOCACHE snap shot. */
   4310 		i_ddi_di_cache_invalidate();
   4311 	}
   4312 }
   4313 
   4314 /*
   4315  * Property functions:
   4316  */
   4317 int
   4318 i_map_nvlist_error_to_mdi(int val)
   4319 {
   4320 	int rv;
   4321 
   4322 	switch (val) {
   4323 	case 0:
   4324 		rv = DDI_PROP_SUCCESS;
   4325 		break;
   4326 	case EINVAL:
   4327 	case ENOTSUP:
   4328 		rv = DDI_PROP_INVAL_ARG;
   4329 		break;
   4330 	case ENOMEM:
   4331 		rv = DDI_PROP_NO_MEMORY;
   4332 		break;
   4333 	default:
   4334 		rv = DDI_PROP_NOT_FOUND;
   4335 		break;
   4336 	}
   4337 	return (rv);
   4338 }
   4339 
   4340 /*
   4341  * mdi_pi_get_next_prop():
   4342  * 		Property walk function.  The caller should hold mdi_pi_lock()
   4343  *		and release by calling mdi_pi_unlock() at the end of walk to
   4344  *		get a consistent value.
   4345  */
   4346 nvpair_t *
   4347 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
   4348 {
   4349 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4350 		return (NULL);
   4351 	}
   4352 	ASSERT(MDI_PI_LOCKED(pip));
   4353 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
   4354 }
   4355 
   4356 /*
   4357  * mdi_prop_remove():
   4358  * 		Remove the named property from the named list.
   4359  */
   4360 int
   4361 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
   4362 {
   4363 	if (pip == NULL) {
   4364 		return (DDI_PROP_NOT_FOUND);
   4365 	}
   4366 	ASSERT(!MDI_PI_LOCKED(pip));
   4367 	MDI_PI_LOCK(pip);
   4368 	if (MDI_PI(pip)->pi_prop == NULL) {
   4369 		MDI_PI_UNLOCK(pip);
   4370 		return (DDI_PROP_NOT_FOUND);
   4371 	}
   4372 	if (name) {
   4373 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
   4374 	} else {
   4375 		char		nvp_name[MAXNAMELEN];
   4376 		nvpair_t	*nvp;
   4377 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
   4378 		while (nvp) {
   4379 			nvpair_t	*next;
   4380 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
   4381 			(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
   4382 			    nvpair_name(nvp));
   4383 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
   4384 			    nvp_name);
   4385 			nvp = next;
   4386 		}
   4387 	}
   4388 	MDI_PI_UNLOCK(pip);
   4389 	return (DDI_PROP_SUCCESS);
   4390 }
   4391 
   4392 /*
   4393  * mdi_prop_size():
   4394  * 		Get buffer size needed to pack the property data.
   4395  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
   4396  *		buffer size.
   4397  */
   4398 int
   4399 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
   4400 {
   4401 	int	rv;
   4402 	size_t	bufsize;
   4403 
   4404 	*buflenp = 0;
   4405 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4406 		return (DDI_PROP_NOT_FOUND);
   4407 	}
   4408 	ASSERT(MDI_PI_LOCKED(pip));
   4409 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
   4410 	    &bufsize, NV_ENCODE_NATIVE);
   4411 	*buflenp = bufsize;
   4412 	return (i_map_nvlist_error_to_mdi(rv));
   4413 }
   4414 
   4415 /*
   4416  * mdi_prop_pack():
   4417  * 		pack the property list.  The caller should hold the
   4418  *		mdi_pathinfo_t node to get a consistent data
   4419  */
   4420 int
   4421 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
   4422 {
   4423 	int	rv;
   4424 	size_t	bufsize;
   4425 
   4426 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
   4427 		return (DDI_PROP_NOT_FOUND);
   4428 	}
   4429 
   4430 	ASSERT(MDI_PI_LOCKED(pip));
   4431 
   4432 	bufsize = buflen;
   4433 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
   4434 	    NV_ENCODE_NATIVE, KM_SLEEP);
   4435 
   4436 	return (i_map_nvlist_error_to_mdi(rv));
   4437 }
   4438 
   4439 /*
   4440  * mdi_prop_update_byte():
   4441  *		Create/Update a byte property
   4442  */
   4443 int
   4444 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
   4445 {
   4446 	int rv;
   4447 
   4448 	if (pip == NULL) {
   4449 		return (DDI_PROP_INVAL_ARG);
   4450 	}
   4451 	ASSERT(!MDI_PI_LOCKED(pip));
   4452 	MDI_PI_LOCK(pip);
   4453 	if (MDI_PI(pip)->pi_prop == NULL) {
   4454 		MDI_PI_UNLOCK(pip);
   4455 		return (DDI_PROP_NOT_FOUND);
   4456 	}
   4457 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
   4458 	MDI_PI_UNLOCK(pip);
   4459 	return (i_map_nvlist_error_to_mdi(rv));
   4460 }
   4461 
   4462 /*
   4463  * mdi_prop_update_byte_array():
   4464  *		Create/Update a byte array property
   4465  */
   4466 int
   4467 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
   4468     uint_t nelements)
   4469 {
   4470 	int rv;
   4471 
   4472 	if (pip == NULL) {
   4473 		return (DDI_PROP_INVAL_ARG);
   4474 	}
   4475 	ASSERT(!MDI_PI_LOCKED(pip));
   4476 	MDI_PI_LOCK(pip);
   4477 	if (MDI_PI(pip)->pi_prop == NULL) {
   4478 		MDI_PI_UNLOCK(pip);
   4479 		return (DDI_PROP_NOT_FOUND);
   4480 	}
   4481 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
   4482 	MDI_PI_UNLOCK(pip);
   4483 	return (i_map_nvlist_error_to_mdi(rv));
   4484 }
   4485 
   4486 /*
   4487  * mdi_prop_update_int():
   4488  *		Create/Update a 32 bit integer property
   4489  */
   4490 int
   4491 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
   4492 {
   4493 	int rv;
   4494 
   4495 	if (pip == NULL) {
   4496 		return (DDI_PROP_INVAL_ARG);
   4497 	}
   4498 	ASSERT(!MDI_PI_LOCKED(pip));
   4499 	MDI_PI_LOCK(pip);
   4500 	if (MDI_PI(pip)->pi_prop == NULL) {
   4501 		MDI_PI_UNLOCK(pip);
   4502 		return (DDI_PROP_NOT_FOUND);
   4503 	}
   4504 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
   4505 	MDI_PI_UNLOCK(pip);
   4506 	return (i_map_nvlist_error_to_mdi(rv));
   4507 }
   4508 
   4509 /*
   4510  * mdi_prop_update_int64():
   4511  *		Create/Update a 64 bit integer property
   4512  */
   4513 int
   4514 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
   4515 {
   4516 	int rv;
   4517 
   4518 	if (pip == NULL) {
   4519 		return (DDI_PROP_INVAL_ARG);
   4520 	}
   4521 	ASSERT(!MDI_PI_LOCKED(pip));
   4522 	MDI_PI_LOCK(pip);
   4523 	if (MDI_PI(pip)->pi_prop == NULL) {
   4524 		MDI_PI_UNLOCK(pip);
   4525 		return (DDI_PROP_NOT_FOUND);
   4526 	}
   4527 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
   4528 	MDI_PI_UNLOCK(pip);
   4529 	return (i_map_nvlist_error_to_mdi(rv));
   4530 }
   4531 
   4532 /*
   4533  * mdi_prop_update_int_array():
   4534  *		Create/Update a int array property
   4535  */
   4536 int
   4537 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
   4538 	    uint_t nelements)
   4539 {
   4540 	int rv;
   4541 
   4542 	if (pip == NULL) {
   4543 		return (DDI_PROP_INVAL_ARG);
   4544 	}
   4545 	ASSERT(!MDI_PI_LOCKED(pip));
   4546 	MDI_PI_LOCK(pip);
   4547 	if (MDI_PI(pip)->pi_prop == NULL) {
   4548 		MDI_PI_UNLOCK(pip);
   4549 		return (DDI_PROP_NOT_FOUND);
   4550 	}
   4551 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
   4552 	    nelements);
   4553 	MDI_PI_UNLOCK(pip);
   4554 	return (i_map_nvlist_error_to_mdi(rv));
   4555 }
   4556 
   4557 /*
   4558  * mdi_prop_update_string():
   4559  *		Create/Update a string property
   4560  */
   4561 int
   4562 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
   4563 {
   4564 	int rv;
   4565 
   4566 	if (pip == NULL) {
   4567 		return (DDI_PROP_INVAL_ARG);
   4568 	}
   4569 	ASSERT(!MDI_PI_LOCKED(pip));
   4570 	MDI_PI_LOCK(pip);
   4571 	if (MDI_PI(pip)->pi_prop == NULL) {
   4572 		MDI_PI_UNLOCK(pip);
   4573 		return (DDI_PROP_NOT_FOUND);
   4574 	}
   4575 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
   4576 	MDI_PI_UNLOCK(pip);
   4577 	return (i_map_nvlist_error_to_mdi(rv));
   4578 }
   4579 
   4580 /*
   4581  * mdi_prop_update_string_array():
   4582  *		Create/Update a string array property
   4583  */
   4584 int
   4585 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
   4586     uint_t nelements)
   4587 {
   4588 	int rv;
   4589 
   4590 	if (pip == NULL) {
   4591 		return (DDI_PROP_INVAL_ARG);
   4592 	}
   4593 	ASSERT(!MDI_PI_LOCKED(pip));
   4594 	MDI_PI_LOCK(pip);
   4595 	if (MDI_PI(pip)->pi_prop == NULL) {
   4596 		MDI_PI_UNLOCK(pip);
   4597 		return (DDI_PROP_NOT_FOUND);
   4598 	}
   4599 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
   4600 	    nelements);
   4601 	MDI_PI_UNLOCK(pip);
   4602 	return (i_map_nvlist_error_to_mdi(rv));
   4603 }
   4604 
   4605 /*
   4606  * mdi_prop_lookup_byte():
   4607  * 		Look for byte property identified by name.  The data returned
   4608  *		is the actual property and valid as long as mdi_pathinfo_t node
   4609  *		is alive.
   4610  */
   4611 int
   4612 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
   4613 {
   4614 	int rv;
   4615 
   4616 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4617 		return (DDI_PROP_NOT_FOUND);
   4618 	}
   4619 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
   4620 	return (i_map_nvlist_error_to_mdi(rv));
   4621 }
   4622 
   4623 
   4624 /*
   4625  * mdi_prop_lookup_byte_array():
   4626  * 		Look for byte array property identified by name.  The data
   4627  *		returned is the actual property and valid as long as
   4628  *		mdi_pathinfo_t node is alive.
   4629  */
   4630 int
   4631 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
   4632     uint_t *nelements)
   4633 {
   4634 	int rv;
   4635 
   4636 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4637 		return (DDI_PROP_NOT_FOUND);
   4638 	}
   4639 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
   4640 	    nelements);
   4641 	return (i_map_nvlist_error_to_mdi(rv));
   4642 }
   4643 
   4644 /*
   4645  * mdi_prop_lookup_int():
   4646  * 		Look for int property identified by name.  The data returned
   4647  *		is the actual property and valid as long as mdi_pathinfo_t
   4648  *		node is alive.
   4649  */
   4650 int
   4651 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
   4652 {
   4653 	int rv;
   4654 
   4655 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4656 		return (DDI_PROP_NOT_FOUND);
   4657 	}
   4658 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
   4659 	return (i_map_nvlist_error_to_mdi(rv));
   4660 }
   4661 
   4662 /*
   4663  * mdi_prop_lookup_int64():
   4664  * 		Look for int64 property identified by name.  The data returned
   4665  *		is the actual property and valid as long as mdi_pathinfo_t node
   4666  *		is alive.
   4667  */
   4668 int
   4669 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
   4670 {
   4671 	int rv;
   4672 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4673 		return (DDI_PROP_NOT_FOUND);
   4674 	}
   4675 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
   4676 	return (i_map_nvlist_error_to_mdi(rv));
   4677 }
   4678 
   4679 /*
   4680  * mdi_prop_lookup_int_array():
   4681  * 		Look for int array property identified by name.  The data
   4682  *		returned is the actual property and valid as long as
   4683  *		mdi_pathinfo_t node is alive.
   4684  */
   4685 int
   4686 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
   4687     uint_t *nelements)
   4688 {
   4689 	int rv;
   4690 
   4691 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4692 		return (DDI_PROP_NOT_FOUND);
   4693 	}
   4694 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
   4695 	    (int32_t **)data, nelements);
   4696 	return (i_map_nvlist_error_to_mdi(rv));
   4697 }
   4698 
   4699 /*
   4700  * mdi_prop_lookup_string():
   4701  * 		Look for string property identified by name.  The data
   4702  *		returned is the actual property and valid as long as
   4703  *		mdi_pathinfo_t node is alive.
   4704  */
   4705 int
   4706 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
   4707 {
   4708 	int rv;
   4709 
   4710 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4711 		return (DDI_PROP_NOT_FOUND);
   4712 	}
   4713 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
   4714 	return (i_map_nvlist_error_to_mdi(rv));
   4715 }
   4716 
   4717 /*
   4718  * mdi_prop_lookup_string_array():
   4719  * 		Look for string array property identified by name.  The data
   4720  *		returned is the actual property and valid as long as
   4721  *		mdi_pathinfo_t node is alive.
   4722  */
   4723 int
   4724 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
   4725     uint_t *nelements)
   4726 {
   4727 	int rv;
   4728 
   4729 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4730 		return (DDI_PROP_NOT_FOUND);
   4731 	}
   4732 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
   4733 	    nelements);
   4734 	return (i_map_nvlist_error_to_mdi(rv));
   4735 }
   4736 
   4737 /*
   4738  * mdi_prop_free():
   4739  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
   4740  *		functions return the pointer to actual property data and not a
   4741  *		copy of it.  So the data returned is valid as long as
   4742  *		mdi_pathinfo_t node is valid.
   4743  */
   4744 /*ARGSUSED*/
   4745 int
   4746 mdi_prop_free(void *data)
   4747 {
   4748 	return (DDI_PROP_SUCCESS);
   4749 }
   4750 
   4751 /*ARGSUSED*/
   4752 static void
   4753 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
   4754 {
   4755 	char		*ct_path;
   4756 	char		*ct_status;
   4757 	char		*status;
   4758 	dev_info_t	*cdip = ct->ct_dip;
   4759 	char		lb_buf[64];
   4760 	int		report_lb_c = 0, report_lb_p = 0;
   4761 
   4762 	ASSERT(MDI_CLIENT_LOCKED(ct));
   4763 	if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
   4764 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
   4765 		return;
   4766 	}
   4767 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
   4768 		ct_status = "optimal";
   4769 		report_lb_c = 1;
   4770 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
   4771 		ct_status = "degraded";
   4772 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
   4773 		ct_status = "failed";
   4774 	} else {
   4775 		ct_status = "unknown";
   4776 	}
   4777 
   4778 	lb_buf[0] = 0;		/* not interested in load balancing config */
   4779 
   4780 	if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
   4781 		status = "removed";
   4782 	} else if (MDI_PI_IS_OFFLINE(pip)) {
   4783 		status = "offline";
   4784 	} else if (MDI_PI_IS_ONLINE(pip)) {
   4785 		status = "online";
   4786 		report_lb_p = 1;
   4787 	} else if (MDI_PI_IS_STANDBY(pip)) {
   4788 		status = "standby";
   4789 	} else if (MDI_PI_IS_FAULT(pip)) {
   4790 		status = "faulted";
   4791 	} else {
   4792 		status = "unknown";
   4793 	}
   4794 
   4795 	if (cdip) {
   4796 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   4797 
   4798 		/*
   4799 		 * NOTE: Keeping "multipath status: %s" and
   4800 		 * "Load balancing: %s" format unchanged in case someone
   4801 		 * scrubs /var/adm/messages looking for these messages.
   4802 		 */
   4803 		if (report_lb_c && report_lb_p) {
   4804 			if (ct->ct_lb == LOAD_BALANCE_LBA) {
   4805 				(void) snprintf(lb_buf, sizeof (lb_buf),
   4806 				    "%s, region-size: %d", mdi_load_balance_lba,
   4807 				    ct->ct_lb_args->region_size);
   4808 			} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
   4809 				(void) snprintf(lb_buf, sizeof (lb_buf),
   4810 				    "%s", mdi_load_balance_none);
   4811 			} else {
   4812 				(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
   4813 				    mdi_load_balance_rr);
   4814 			}
   4815 
   4816 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
   4817 			    "?%s (%s%d) multipath status: %s: "
   4818 			    "path %d %s is %s: Load balancing: %s\n",
   4819 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
   4820 			    ddi_get_instance(cdip), ct_status,
   4821 			    mdi_pi_get_path_instance(pip),
   4822 			    mdi_pi_spathname(pip), status, lb_buf);
   4823 		} else {
   4824 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
   4825 			    "?%s (%s%d) multipath status: %s: "
   4826 			    "path %d %s is %s\n",
   4827 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
   4828 			    ddi_get_instance(cdip), ct_status,
   4829 			    mdi_pi_get_path_instance(pip),
   4830 			    mdi_pi_spathname(pip), status);
   4831 		}
   4832 
   4833 		kmem_free(ct_path, MAXPATHLEN);
   4834 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
   4835 	}
   4836 }
   4837 
   4838 #ifdef	DEBUG
   4839 /*
   4840  * i_mdi_log():
   4841  *		Utility function for error message management
   4842  *
   4843  *		NOTE: Implementation takes care of trailing \n for cmn_err,
   4844  *		MDI_DEBUG should not terminate fmt strings with \n.
   4845  *
   4846  *		NOTE: If the level is >= 2, and there is no leading !?^
   4847  *		then a leading ! is implied (but can be overriden via
   4848  *		mdi_debug_consoleonly). If you are using kmdb on the console,
   4849  *		consider setting mdi_debug_consoleonly to 1 as an aid.
   4850  */
   4851 /*PRINTFLIKE4*/
   4852 static void
   4853 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
   4854 {
   4855 	char		name[MAXNAMELEN];
   4856 	char		buf[512];
   4857 	char		*bp;
   4858 	va_list		ap;
   4859 	int		log_only = 0;
   4860 	int		boot_only = 0;
   4861 	int		console_only = 0;
   4862 
   4863 	if (dip) {
   4864 		(void) snprintf(name, sizeof(name), "%s%d: ",
   4865 		    ddi_driver_name(dip), ddi_get_instance(dip));
   4866 	} else {
   4867 		name[0] = 0;
   4868 	}
   4869 
   4870 	va_start(ap, fmt);
   4871 	(void) vsnprintf(buf, sizeof(buf), fmt, ap);
   4872 	va_end(ap);
   4873 
   4874 	switch (buf[0]) {
   4875 	case '!':
   4876 		bp = &buf[1];
   4877 		log_only = 1;
   4878 		break;
   4879 	case '?':
   4880 		bp = &buf[1];
   4881 		boot_only = 1;
   4882 		break;
   4883 	case '^':
   4884 		bp = &buf[1];
   4885 		console_only = 1;
   4886 		break;
   4887 	default:
   4888 		if (level >= 2)
   4889 			log_only = 1;		/* ! implied */
   4890 		bp = buf;
   4891 		break;
   4892 	}
   4893 	if (mdi_debug_logonly) {
   4894 		log_only = 1;
   4895 		boot_only = 0;
   4896 		console_only = 0;
   4897 	}
   4898 	if (mdi_debug_consoleonly) {
   4899 		log_only = 0;
   4900 		boot_only = 0;
   4901 		console_only = 1;
   4902 		level = CE_NOTE;
   4903 		goto console;
   4904 	}
   4905 
   4906 	switch (level) {
   4907 	case CE_NOTE:
   4908 		level = CE_CONT;
   4909 		/* FALLTHROUGH */
   4910 	case CE_CONT:
   4911 		if (boot_only) {
   4912 			cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
   4913 		} else if (console_only) {
   4914 			cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
   4915 		} else if (log_only) {
   4916 			cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
   4917 		} else {
   4918 			cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
   4919 		}
   4920 		break;
   4921 
   4922 	case CE_WARN:
   4923 	case CE_PANIC:
   4924 	console:
   4925 		if (boot_only) {
   4926 			cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
   4927 		} else if (console_only) {
   4928 			cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
   4929 		} else if (log_only) {
   4930 			cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
   4931 		} else {
   4932 			cmn_err(level, "mdi: %s%s: %s", name, func, bp);
   4933 		}
   4934 		break;
   4935 	default:
   4936 		cmn_err(level, "mdi: %s%s", name, bp);
   4937 		break;
   4938 	}
   4939 }
   4940 #endif	/* DEBUG */
   4941 
   4942 void
   4943 i_mdi_client_online(dev_info_t *ct_dip)
   4944 {
   4945 	mdi_client_t	*ct;
   4946 
   4947 	/*
   4948 	 * Client online notification. Mark client state as online
   4949 	 * restore our binding with dev_info node
   4950 	 */
   4951 	ct = i_devi_get_client(ct_dip);
   4952 	ASSERT(ct != NULL);
   4953 	MDI_CLIENT_LOCK(ct);
   4954 	MDI_CLIENT_SET_ONLINE(ct);
   4955 	/* catch for any memory leaks */
   4956 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
   4957 	ct->ct_dip = ct_dip;
   4958 
   4959 	if (ct->ct_power_cnt == 0)
   4960 		(void) i_mdi_power_all_phci(ct);
   4961 
   4962 	MDI_DEBUG(4, (MDI_NOTE, ct_dip,
   4963 	    "i_mdi_pm_hold_client %p", (void *)ct));
   4964 	i_mdi_pm_hold_client(ct, 1);
   4965 
   4966 	MDI_CLIENT_UNLOCK(ct);
   4967 }
   4968 
   4969 void
   4970 i_mdi_phci_online(dev_info_t *ph_dip)
   4971 {
   4972 	mdi_phci_t	*ph;
   4973 
   4974 	/* pHCI online notification. Mark state accordingly */
   4975 	ph = i_devi_get_phci(ph_dip);
   4976 	ASSERT(ph != NULL);
   4977 	MDI_PHCI_LOCK(ph);
   4978 	MDI_PHCI_SET_ONLINE(ph);
   4979 	MDI_PHCI_UNLOCK(ph);
   4980 }
   4981 
   4982 /*
   4983  * mdi_devi_online():
   4984  * 		Online notification from NDI framework on pHCI/client
   4985  *		device online.
   4986  * Return Values:
   4987  *		NDI_SUCCESS
   4988  *		MDI_FAILURE
   4989  */
   4990 /*ARGSUSED*/
   4991 int
   4992 mdi_devi_online(dev_info_t *dip, uint_t flags)
   4993 {
   4994 	if (MDI_PHCI(dip)) {
   4995 		i_mdi_phci_online(dip);
   4996 	}
   4997 
   4998 	if (MDI_CLIENT(dip)) {
   4999 		i_mdi_client_online(dip);
   5000 	}
   5001 	return (NDI_SUCCESS);
   5002 }
   5003 
   5004 /*
   5005  * mdi_devi_offline():
   5006  * 		Offline notification from NDI framework on pHCI/Client device
   5007  *		offline.
   5008  *
   5009  * Return Values:
   5010  *		NDI_SUCCESS
   5011  *		NDI_FAILURE
   5012  */
   5013 /*ARGSUSED*/
   5014 int
   5015 mdi_devi_offline(dev_info_t *dip, uint_t flags)
   5016 {
   5017 	int		rv = NDI_SUCCESS;
   5018 
   5019 	if (MDI_CLIENT(dip)) {
   5020 		rv = i_mdi_client_offline(dip, flags);
   5021 		if (rv != NDI_SUCCESS)
   5022 			return (rv);
   5023 	}
   5024 
   5025 	if (MDI_PHCI(dip)) {
   5026 		rv = i_mdi_phci_offline(dip, flags);
   5027 
   5028 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
   5029 			/* set client back online */
   5030 			i_mdi_client_online(dip);
   5031 		}
   5032 	}
   5033 
   5034 	return (rv);
   5035 }
   5036 
   5037 /*ARGSUSED*/
   5038 static int
   5039 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
   5040 {
   5041 	int		rv = NDI_SUCCESS;
   5042 	mdi_phci_t	*ph;
   5043 	mdi_client_t	*ct;
   5044 	mdi_pathinfo_t	*pip;
   5045 	mdi_pathinfo_t	*next;
   5046 	mdi_pathinfo_t	*failed_pip = NULL;
   5047 	dev_info_t	*cdip;
   5048 
   5049 	/*
   5050 	 * pHCI component offline notification
   5051 	 * Make sure that this pHCI instance is free to be offlined.
   5052 	 * If it is OK to proceed, Offline and remove all the child
   5053 	 * mdi_pathinfo nodes.  This process automatically offlines
   5054 	 * corresponding client devices, for which this pHCI provides
   5055 	 * critical services.
   5056 	 */
   5057 	ph = i_devi_get_phci(dip);
   5058 	MDI_DEBUG(2, (MDI_NOTE, dip,
   5059 	    "called %p %p", (void *)dip, (void *)ph));
   5060 	if (ph == NULL) {
   5061 		return (rv);
   5062 	}
   5063 
   5064 	MDI_PHCI_LOCK(ph);
   5065 
   5066 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5067 		MDI_DEBUG(1, (MDI_WARN, dip,
   5068 		    "!pHCI already offlined: %p", (void *)dip));
   5069 		MDI_PHCI_UNLOCK(ph);
   5070 		return (NDI_SUCCESS);
   5071 	}
   5072 
   5073 	/*
   5074 	 * Check to see if the pHCI can be offlined
   5075 	 */
   5076 	if (ph->ph_unstable) {
   5077 		MDI_DEBUG(1, (MDI_WARN, dip,
   5078 		    "!One or more target devices are in transient state. "
   5079 		    "This device can not be removed at this moment. "
   5080 		    "Please try again later."));
   5081 		MDI_PHCI_UNLOCK(ph);
   5082 		return (NDI_BUSY);
   5083 	}
   5084 
   5085 	pip = ph->ph_path_head;
   5086 	while (pip != NULL) {
   5087 		MDI_PI_LOCK(pip);
   5088 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5089 
   5090 		/*
   5091 		 * The mdi_pathinfo state is OK. Check the client state.
   5092 		 * If failover in progress fail the pHCI from offlining
   5093 		 */
   5094 		ct = MDI_PI(pip)->pi_client;
   5095 		i_mdi_client_lock(ct, pip);
   5096 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
   5097 		    (ct->ct_unstable)) {
   5098 			/*
   5099 			 * Failover is in progress, Fail the DR
   5100 			 */
   5101 			MDI_DEBUG(1, (MDI_WARN, dip,
   5102 			    "!pHCI device is busy. "
   5103 			    "This device can not be removed at this moment. "
   5104 			    "Please try again later."));
   5105 			MDI_PI_UNLOCK(pip);
   5106 			i_mdi_client_unlock(ct);
   5107 			MDI_PHCI_UNLOCK(ph);
   5108 			return (NDI_BUSY);
   5109 		}
   5110 		MDI_PI_UNLOCK(pip);
   5111 
   5112 		/*
   5113 		 * Check to see of we are removing the last path of this
   5114 		 * client device...
   5115 		 */
   5116 		cdip = ct->ct_dip;
   5117 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5118 		    (i_mdi_client_compute_state(ct, ph) ==
   5119 		    MDI_CLIENT_STATE_FAILED)) {
   5120 			i_mdi_client_unlock(ct);
   5121 			MDI_PHCI_UNLOCK(ph);
   5122 			if (ndi_devi_offline(cdip,
   5123 			    NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
   5124 				/*
   5125 				 * ndi_devi_offline() failed.
   5126 				 * This pHCI provides the critical path
   5127 				 * to one or more client devices.
   5128 				 * Return busy.
   5129 				 */
   5130 				MDI_PHCI_LOCK(ph);
   5131 				MDI_DEBUG(1, (MDI_WARN, dip,
   5132 				    "!pHCI device is busy. "
   5133 				    "This device can not be removed at this "
   5134 				    "moment. Please try again later."));
   5135 				failed_pip = pip;
   5136 				break;
   5137 			} else {
   5138 				MDI_PHCI_LOCK(ph);
   5139 				pip = next;
   5140 			}
   5141 		} else {
   5142 			i_mdi_client_unlock(ct);
   5143 			pip = next;
   5144 		}
   5145 	}
   5146 
   5147 	if (failed_pip) {
   5148 		pip = ph->ph_path_head;
   5149 		while (pip != failed_pip) {
   5150 			MDI_PI_LOCK(pip);
   5151 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5152 			ct = MDI_PI(pip)->pi_client;
   5153 			i_mdi_client_lock(ct, pip);
   5154 			cdip = ct->ct_dip;
   5155 			switch (MDI_CLIENT_STATE(ct)) {
   5156 			case MDI_CLIENT_STATE_OPTIMAL:
   5157 			case MDI_CLIENT_STATE_DEGRADED:
   5158 				if (cdip) {
   5159 					MDI_PI_UNLOCK(pip);
   5160 					i_mdi_client_unlock(ct);
   5161 					MDI_PHCI_UNLOCK(ph);
   5162 					(void) ndi_devi_online(cdip, 0);
   5163 					MDI_PHCI_LOCK(ph);
   5164 					pip = next;
   5165 					continue;
   5166 				}
   5167 				break;
   5168 
   5169 			case MDI_CLIENT_STATE_FAILED:
   5170 				if (cdip) {
   5171 					MDI_PI_UNLOCK(pip);
   5172 					i_mdi_client_unlock(ct);
   5173 					MDI_PHCI_UNLOCK(ph);
   5174 					(void) ndi_devi_offline(cdip,
   5175 						NDI_DEVFS_CLEAN);
   5176 					MDI_PHCI_LOCK(ph);
   5177 					pip = next;
   5178 					continue;
   5179 				}
   5180 				break;
   5181 			}
   5182 			MDI_PI_UNLOCK(pip);
   5183 			i_mdi_client_unlock(ct);
   5184 			pip = next;
   5185 		}
   5186 		MDI_PHCI_UNLOCK(ph);
   5187 		return (NDI_BUSY);
   5188 	}
   5189 
   5190 	/*
   5191 	 * Mark the pHCI as offline
   5192 	 */
   5193 	MDI_PHCI_SET_OFFLINE(ph);
   5194 
   5195 	/*
   5196 	 * Mark the child mdi_pathinfo nodes as transient
   5197 	 */
   5198 	pip = ph->ph_path_head;
   5199 	while (pip != NULL) {
   5200 		MDI_PI_LOCK(pip);
   5201 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5202 		MDI_PI_SET_OFFLINING(pip);
   5203 		MDI_PI_UNLOCK(pip);
   5204 		pip = next;
   5205 	}
   5206 	MDI_PHCI_UNLOCK(ph);
   5207 	/*
   5208 	 * Give a chance for any pending commands to execute
   5209 	 */
   5210 	delay_random(mdi_delay);
   5211 	MDI_PHCI_LOCK(ph);
   5212 	pip = ph->ph_path_head;
   5213 	while (pip != NULL) {
   5214 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5215 		(void) i_mdi_pi_offline(pip, flags);
   5216 		MDI_PI_LOCK(pip);
   5217 		ct = MDI_PI(pip)->pi_client;
   5218 		if (!MDI_PI_IS_OFFLINE(pip)) {
   5219 			MDI_DEBUG(1, (MDI_WARN, dip,
   5220 			    "!pHCI device is busy. "
   5221 			    "This device can not be removed at this moment. "
   5222 			    "Please try again later."));
   5223 			MDI_PI_UNLOCK(pip);
   5224 			MDI_PHCI_SET_ONLINE(ph);
   5225 			MDI_PHCI_UNLOCK(ph);
   5226 			return (NDI_BUSY);
   5227 		}
   5228 		MDI_PI_UNLOCK(pip);
   5229 		pip = next;
   5230 	}
   5231 	MDI_PHCI_UNLOCK(ph);
   5232 
   5233 	return (rv);
   5234 }
   5235 
   5236 void
   5237 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
   5238 {
   5239 	mdi_phci_t	*ph;
   5240 	mdi_client_t	*ct;
   5241 	mdi_pathinfo_t	*pip;
   5242 	mdi_pathinfo_t	*next;
   5243 	dev_info_t	*cdip;
   5244 
   5245 	if (!MDI_PHCI(dip))
   5246 		return;
   5247 
   5248 	ph = i_devi_get_phci(dip);
   5249 	if (ph == NULL) {
   5250 		return;
   5251 	}
   5252 
   5253 	MDI_PHCI_LOCK(ph);
   5254 
   5255 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5256 		/* has no last path */
   5257 		MDI_PHCI_UNLOCK(ph);
   5258 		return;
   5259 	}
   5260 
   5261 	pip = ph->ph_path_head;
   5262 	while (pip != NULL) {
   5263 		MDI_PI_LOCK(pip);
   5264 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5265 
   5266 		ct = MDI_PI(pip)->pi_client;
   5267 		i_mdi_client_lock(ct, pip);
   5268 		MDI_PI_UNLOCK(pip);
   5269 
   5270 		cdip = ct->ct_dip;
   5271 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5272 		    (i_mdi_client_compute_state(ct, ph) ==
   5273 		    MDI_CLIENT_STATE_FAILED)) {
   5274 			/* Last path. Mark client dip as retiring */
   5275 			i_mdi_client_unlock(ct);
   5276 			MDI_PHCI_UNLOCK(ph);
   5277 			(void) e_ddi_mark_retiring(cdip, cons_array);
   5278 			MDI_PHCI_LOCK(ph);
   5279 			pip = next;
   5280 		} else {
   5281 			i_mdi_client_unlock(ct);
   5282 			pip = next;
   5283 		}
   5284 	}
   5285 
   5286 	MDI_PHCI_UNLOCK(ph);
   5287 
   5288 	return;
   5289 }
   5290 
   5291 void
   5292 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
   5293 {
   5294 	mdi_phci_t	*ph;
   5295 	mdi_client_t	*ct;
   5296 	mdi_pathinfo_t	*pip;
   5297 	mdi_pathinfo_t	*next;
   5298 	dev_info_t	*cdip;
   5299 
   5300 	if (!MDI_PHCI(dip))
   5301 		return;
   5302 
   5303 	ph = i_devi_get_phci(dip);
   5304 	if (ph == NULL)
   5305 		return;
   5306 
   5307 	MDI_PHCI_LOCK(ph);
   5308 
   5309 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5310 		MDI_PHCI_UNLOCK(ph);
   5311 		/* not last path */
   5312 		return;
   5313 	}
   5314 
   5315 	if (ph->ph_unstable) {
   5316 		MDI_PHCI_UNLOCK(ph);
   5317 		/* can't check for constraints */
   5318 		*constraint = 0;
   5319 		return;
   5320 	}
   5321 
   5322 	pip = ph->ph_path_head;
   5323 	while (pip != NULL) {
   5324 		MDI_PI_LOCK(pip);
   5325 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5326 
   5327 		/*
   5328 		 * The mdi_pathinfo state is OK. Check the client state.
   5329 		 * If failover in progress fail the pHCI from offlining
   5330 		 */
   5331 		ct = MDI_PI(pip)->pi_client;
   5332 		i_mdi_client_lock(ct, pip);
   5333 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
   5334 		    (ct->ct_unstable)) {
   5335 			/*
   5336 			 * Failover is in progress, can't check for constraints
   5337 			 */
   5338 			MDI_PI_UNLOCK(pip);
   5339 			i_mdi_client_unlock(ct);
   5340 			MDI_PHCI_UNLOCK(ph);
   5341 			*constraint = 0;
   5342 			return;
   5343 		}
   5344 		MDI_PI_UNLOCK(pip);
   5345 
   5346 		/*
   5347 		 * Check to see of we are retiring the last path of this
   5348 		 * client device...
   5349 		 */
   5350 		cdip = ct->ct_dip;
   5351 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5352 		    (i_mdi_client_compute_state(ct, ph) ==
   5353 		    MDI_CLIENT_STATE_FAILED)) {
   5354 			i_mdi_client_unlock(ct);
   5355 			MDI_PHCI_UNLOCK(ph);
   5356 			(void) e_ddi_retire_notify(cdip, constraint);
   5357 			MDI_PHCI_LOCK(ph);
   5358 			pip = next;
   5359 		} else {
   5360 			i_mdi_client_unlock(ct);
   5361 			pip = next;
   5362 		}
   5363 	}
   5364 
   5365 	MDI_PHCI_UNLOCK(ph);
   5366 
   5367 	return;
   5368 }
   5369 
   5370 /*
   5371  * offline the path(s) hanging off the pHCI. If the
   5372  * last path to any client, check that constraints
   5373  * have been applied.
   5374  */
   5375 void
   5376 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
   5377 {
   5378 	mdi_phci_t	*ph;
   5379 	mdi_client_t	*ct;
   5380 	mdi_pathinfo_t	*pip;
   5381 	mdi_pathinfo_t	*next;
   5382 	dev_info_t	*cdip;
   5383 	int		unstable = 0;
   5384 	int		constraint;
   5385 
   5386 	if (!MDI_PHCI(dip))
   5387 		return;
   5388 
   5389 	ph = i_devi_get_phci(dip);
   5390 	if (ph == NULL) {
   5391 		/* no last path and no pips */
   5392 		return;
   5393 	}
   5394 
   5395 	MDI_PHCI_LOCK(ph);
   5396 
   5397 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5398 		MDI_PHCI_UNLOCK(ph);
   5399 		/* no last path and no pips */
   5400 		return;
   5401 	}
   5402 
   5403 	/*
   5404 	 * Check to see if the pHCI can be offlined
   5405 	 */
   5406 	if (ph->ph_unstable) {
   5407 		unstable = 1;
   5408 	}
   5409 
   5410 	pip = ph->ph_path_head;
   5411 	while (pip != NULL) {
   5412 		MDI_PI_LOCK(pip);
   5413 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5414 
   5415 		/*
   5416 		 * if failover in progress fail the pHCI from offlining
   5417 		 */
   5418 		ct = MDI_PI(pip)->pi_client;
   5419 		i_mdi_client_lock(ct, pip);
   5420 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
   5421 		    (ct->ct_unstable)) {
   5422 			unstable = 1;
   5423 		}
   5424 		MDI_PI_UNLOCK(pip);
   5425 
   5426 		/*
   5427 		 * Check to see of we are removing the last path of this
   5428 		 * client device...
   5429 		 */
   5430 		cdip = ct->ct_dip;
   5431 		if (!phci_only && cdip &&
   5432 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5433 		    (i_mdi_client_compute_state(ct, ph) ==
   5434 		    MDI_CLIENT_STATE_FAILED)) {
   5435 			i_mdi_client_unlock(ct);
   5436 			MDI_PHCI_UNLOCK(ph);
   5437 			/*
   5438 			 * We don't retire clients we just retire the
   5439 			 * path to a client. If it is the last path
   5440 			 * to a client, constraints are checked and
   5441 			 * if we pass the last path is offlined. MPXIO will
   5442 			 * then fail all I/Os to the client. Since we don't
   5443 			 * want to retire the client on a path error
   5444 			 * set constraint = 0 so that the client dip
   5445 			 * is not retired.
   5446 			 */
   5447 			constraint = 0;
   5448 			(void) e_ddi_retire_finalize(cdip, &constraint);
   5449 			MDI_PHCI_LOCK(ph);
   5450 			pip = next;
   5451 		} else {
   5452 			i_mdi_client_unlock(ct);
   5453 			pip = next;
   5454 		}
   5455 	}
   5456 
   5457 	/*
   5458 	 * Cannot offline pip(s)
   5459 	 */
   5460 	if (unstable) {
   5461 		cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
   5462 		    "pHCI in transient state, cannot retire",
   5463 		    ddi_driver_name(dip), ddi_get_instance(dip));
   5464 		MDI_PHCI_UNLOCK(ph);
   5465 		return;
   5466 	}
   5467 
   5468 	/*
   5469 	 * Mark the pHCI as offline
   5470 	 */
   5471 	MDI_PHCI_SET_OFFLINE(ph);
   5472 
   5473 	/*
   5474 	 * Mark the child mdi_pathinfo nodes as transient
   5475 	 */
   5476 	pip = ph->ph_path_head;
   5477 	while (pip != NULL) {
   5478 		MDI_PI_LOCK(pip);
   5479 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5480 		MDI_PI_SET_OFFLINING(pip);
   5481 		MDI_PI_UNLOCK(pip);
   5482 		pip = next;
   5483 	}
   5484 	MDI_PHCI_UNLOCK(ph);
   5485 	/*
   5486 	 * Give a chance for any pending commands to execute
   5487 	 */
   5488 	delay_random(mdi_delay);
   5489 	MDI_PHCI_LOCK(ph);
   5490 	pip = ph->ph_path_head;
   5491 	while (pip != NULL) {
   5492 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5493 		(void) i_mdi_pi_offline(pip, 0);
   5494 		MDI_PI_LOCK(pip);
   5495 		ct = MDI_PI(pip)->pi_client;
   5496 		if (!MDI_PI_IS_OFFLINE(pip)) {
   5497 			cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
   5498 			    "path %d %s busy, cannot offline",
   5499 			    mdi_pi_get_path_instance(pip),
   5500 			    mdi_pi_spathname(pip));
   5501 			MDI_PI_UNLOCK(pip);
   5502 			MDI_PHCI_SET_ONLINE(ph);
   5503 			MDI_PHCI_UNLOCK(ph);
   5504 			return;
   5505 		}
   5506 		MDI_PI_UNLOCK(pip);
   5507 		pip = next;
   5508 	}
   5509 	MDI_PHCI_UNLOCK(ph);
   5510 
   5511 	return;
   5512 }
   5513 
   5514 void
   5515 mdi_phci_unretire(dev_info_t *dip)
   5516 {
   5517 	ASSERT(MDI_PHCI(dip));
   5518 
   5519 	/*
   5520 	 * Online the phci
   5521 	 */
   5522 	i_mdi_phci_online(dip);
   5523 }
   5524 
   5525 /*ARGSUSED*/
   5526 static int
   5527 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
   5528 {
   5529 	int		rv = NDI_SUCCESS;
   5530 	mdi_client_t	*ct;
   5531 
   5532 	/*
   5533 	 * Client component to go offline.  Make sure that we are
   5534 	 * not in failing over state and update client state
   5535 	 * accordingly
   5536 	 */
   5537 	ct = i_devi_get_client(dip);
   5538 	MDI_DEBUG(2, (MDI_NOTE, dip,
   5539 	    "called %p %p", (void *)dip, (void *)ct));
   5540 	if (ct != NULL) {
   5541 		MDI_CLIENT_LOCK(ct);
   5542 		if (ct->ct_unstable) {
   5543 			/*
   5544 			 * One or more paths are in transient state,
   5545 			 * Dont allow offline of a client device
   5546 			 */
   5547 			MDI_DEBUG(1, (MDI_WARN, dip,
   5548 			    "!One or more paths to "
   5549 			    "this device are in transient state. "
   5550 			    "This device can not be removed at this moment. "
   5551 			    "Please try again later."));
   5552 			MDI_CLIENT_UNLOCK(ct);
   5553 			return (NDI_BUSY);
   5554 		}
   5555 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   5556 			/*
   5557 			 * Failover is in progress, Dont allow DR of
   5558 			 * a client device
   5559 			 */
   5560 			MDI_DEBUG(1, (MDI_WARN, dip,
   5561 			    "!Client device is Busy. "
   5562 			    "This device can not be removed at this moment. "
   5563 			    "Please try again later."));
   5564 			MDI_CLIENT_UNLOCK(ct);
   5565 			return (NDI_BUSY);
   5566 		}
   5567 		MDI_CLIENT_SET_OFFLINE(ct);
   5568 
   5569 		/*
   5570 		 * Unbind our relationship with the dev_info node
   5571 		 */
   5572 		if (flags & NDI_DEVI_REMOVE) {
   5573 			ct->ct_dip = NULL;
   5574 		}
   5575 		MDI_CLIENT_UNLOCK(ct);
   5576 	}
   5577 	return (rv);
   5578 }
   5579 
   5580 /*
   5581  * mdi_pre_attach():
   5582  *		Pre attach() notification handler
   5583  */
   5584 /*ARGSUSED*/
   5585 int
   5586 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
   5587 {
   5588 	/* don't support old DDI_PM_RESUME */
   5589 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
   5590 	    (cmd == DDI_PM_RESUME))
   5591 		return (DDI_FAILURE);
   5592 
   5593 	return (DDI_SUCCESS);
   5594 }
   5595 
   5596 /*
   5597  * mdi_post_attach():
   5598  *		Post attach() notification handler
   5599  */
   5600 /*ARGSUSED*/
   5601 void
   5602 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
   5603 {
   5604 	mdi_phci_t	*ph;
   5605 	mdi_client_t	*ct;
   5606 	mdi_vhci_t	*vh;
   5607 
   5608 	if (MDI_PHCI(dip)) {
   5609 		ph = i_devi_get_phci(dip);
   5610 		ASSERT(ph != NULL);
   5611 
   5612 		MDI_PHCI_LOCK(ph);
   5613 		switch (cmd) {
   5614 		case DDI_ATTACH:
   5615 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5616 			    "phci post_attach called %p", (void *)ph));
   5617 			if (error == DDI_SUCCESS) {
   5618 				MDI_PHCI_SET_ATTACH(ph);
   5619 			} else {
   5620 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5621 				    "!pHCI post_attach failed: error %d",
   5622 				    error));
   5623 				MDI_PHCI_SET_DETACH(ph);
   5624 			}
   5625 			break;
   5626 
   5627 		case DDI_RESUME:
   5628 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5629 			    "pHCI post_resume: called %p", (void *)ph));
   5630 			if (error == DDI_SUCCESS) {
   5631 				MDI_PHCI_SET_RESUME(ph);
   5632 			} else {
   5633 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5634 				    "!pHCI post_resume failed: error %d",
   5635 				    error));
   5636 				MDI_PHCI_SET_SUSPEND(ph);
   5637 			}
   5638 			break;
   5639 		}
   5640 		MDI_PHCI_UNLOCK(ph);
   5641 	}
   5642 
   5643 	if (MDI_CLIENT(dip)) {
   5644 		ct = i_devi_get_client(dip);
   5645 		ASSERT(ct != NULL);
   5646 
   5647 		MDI_CLIENT_LOCK(ct);
   5648 		switch (cmd) {
   5649 		case DDI_ATTACH:
   5650 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5651 			    "client post_attach called %p", (void *)ct));
   5652 			if (error != DDI_SUCCESS) {
   5653 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5654 				    "!client post_attach failed: error %d",
   5655 				    error));
   5656 				MDI_CLIENT_SET_DETACH(ct);
   5657 				MDI_DEBUG(4, (MDI_WARN, dip,
   5658 				    "i_mdi_pm_reset_client"));
   5659 				i_mdi_pm_reset_client(ct);
   5660 				break;
   5661 			}
   5662 
   5663 			/*
   5664 			 * Client device has successfully attached, inform
   5665 			 * the vhci.
   5666 			 */
   5667 			vh = ct->ct_vhci;
   5668 			if (vh->vh_ops->vo_client_attached)
   5669 				(*vh->vh_ops->vo_client_attached)(dip);
   5670 
   5671 			MDI_CLIENT_SET_ATTACH(ct);
   5672 			break;
   5673 
   5674 		case DDI_RESUME:
   5675 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5676 			    "client post_attach: called %p", (void *)ct));
   5677 			if (error == DDI_SUCCESS) {
   5678 				MDI_CLIENT_SET_RESUME(ct);
   5679 			} else {
   5680 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5681 				    "!client post_resume failed: error %d",
   5682 				    error));
   5683 				MDI_CLIENT_SET_SUSPEND(ct);
   5684 			}
   5685 			break;
   5686 		}
   5687 		MDI_CLIENT_UNLOCK(ct);
   5688 	}
   5689 }
   5690 
   5691 /*
   5692  * mdi_pre_detach():
   5693  *		Pre detach notification handler
   5694  */
   5695 /*ARGSUSED*/
   5696 int
   5697 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   5698 {
   5699 	int rv = DDI_SUCCESS;
   5700 
   5701 	if (MDI_CLIENT(dip)) {
   5702 		(void) i_mdi_client_pre_detach(dip, cmd);
   5703 	}
   5704 
   5705 	if (MDI_PHCI(dip)) {
   5706 		rv = i_mdi_phci_pre_detach(dip, cmd);
   5707 	}
   5708 
   5709 	return (rv);
   5710 }
   5711 
   5712 /*ARGSUSED*/
   5713 static int
   5714 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   5715 {
   5716 	int		rv = DDI_SUCCESS;
   5717 	mdi_phci_t	*ph;
   5718 	mdi_client_t	*ct;
   5719 	mdi_pathinfo_t	*pip;
   5720 	mdi_pathinfo_t	*failed_pip = NULL;
   5721 	mdi_pathinfo_t	*next;
   5722 
   5723 	ph = i_devi_get_phci(dip);
   5724 	if (ph == NULL) {
   5725 		return (rv);
   5726 	}
   5727 
   5728 	MDI_PHCI_LOCK(ph);
   5729 	switch (cmd) {
   5730 	case DDI_DETACH:
   5731 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5732 		    "pHCI pre_detach: called %p", (void *)ph));
   5733 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
   5734 			/*
   5735 			 * mdi_pathinfo nodes are still attached to
   5736 			 * this pHCI. Fail the detach for this pHCI.
   5737 			 */
   5738 			MDI_DEBUG(2, (MDI_WARN, dip,
   5739 			    "pHCI pre_detach: paths are still attached %p",
   5740 			    (void *)ph));
   5741 			rv = DDI_FAILURE;
   5742 			break;
   5743 		}
   5744 		MDI_PHCI_SET_DETACH(ph);
   5745 		break;
   5746 
   5747 	case DDI_SUSPEND:
   5748 		/*
   5749 		 * pHCI is getting suspended.  Since mpxio client
   5750 		 * devices may not be suspended at this point, to avoid
   5751 		 * a potential stack overflow, it is important to suspend
   5752 		 * client devices before pHCI can be suspended.
   5753 		 */
   5754 
   5755 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5756 		    "pHCI pre_suspend: called %p", (void *)ph));
   5757 		/*
   5758 		 * Suspend all the client devices accessible through this pHCI
   5759 		 */
   5760 		pip = ph->ph_path_head;
   5761 		while (pip != NULL && rv == DDI_SUCCESS) {
   5762 			dev_info_t *cdip;
   5763 			MDI_PI_LOCK(pip);
   5764 			next =
   5765 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5766 			ct = MDI_PI(pip)->pi_client;
   5767 			i_mdi_client_lock(ct, pip);
   5768 			cdip = ct->ct_dip;
   5769 			MDI_PI_UNLOCK(pip);
   5770 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
   5771 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
   5772 				i_mdi_client_unlock(ct);
   5773 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
   5774 				    DDI_SUCCESS) {
   5775 					/*
   5776 					 * Suspend of one of the client
   5777 					 * device has failed.
   5778 					 */
   5779 					MDI_DEBUG(1, (MDI_WARN, dip,
   5780 					    "!suspend of device (%s%d) failed.",
   5781 					    ddi_driver_name(cdip),
   5782 					    ddi_get_instance(cdip)));
   5783 					failed_pip = pip;
   5784 					break;
   5785 				}
   5786 			} else {
   5787 				i_mdi_client_unlock(ct);
   5788 			}
   5789 			pip = next;
   5790 		}
   5791 
   5792 		if (rv == DDI_SUCCESS) {
   5793 			/*
   5794 			 * Suspend of client devices is complete. Proceed
   5795 			 * with pHCI suspend.
   5796 			 */
   5797 			MDI_PHCI_SET_SUSPEND(ph);
   5798 		} else {
   5799 			/*
   5800 			 * Revert back all the suspended client device states
   5801 			 * to converse.
   5802 			 */
   5803 			pip = ph->ph_path_head;
   5804 			while (pip != failed_pip) {
   5805 				dev_info_t *cdip;
   5806 				MDI_PI_LOCK(pip);
   5807 				next =
   5808 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5809 				ct = MDI_PI(pip)->pi_client;
   5810 				i_mdi_client_lock(ct, pip);
   5811 				cdip = ct->ct_dip;
   5812 				MDI_PI_UNLOCK(pip);
   5813 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
   5814 					i_mdi_client_unlock(ct);
   5815 					(void) devi_attach(cdip, DDI_RESUME);
   5816 				} else {
   5817 					i_mdi_client_unlock(ct);
   5818 				}
   5819 				pip = next;
   5820 			}
   5821 		}
   5822 		break;
   5823 
   5824 	default:
   5825 		rv = DDI_FAILURE;
   5826 		break;
   5827 	}
   5828 	MDI_PHCI_UNLOCK(ph);
   5829 	return (rv);
   5830 }
   5831 
   5832 /*ARGSUSED*/
   5833 static int
   5834 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   5835 {
   5836 	int		rv = DDI_SUCCESS;
   5837 	mdi_client_t	*ct;
   5838 
   5839 	ct = i_devi_get_client(dip);
   5840 	if (ct == NULL) {
   5841 		return (rv);
   5842 	}
   5843 
   5844 	MDI_CLIENT_LOCK(ct);
   5845 	switch (cmd) {
   5846 	case DDI_DETACH:
   5847 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5848 		    "client pre_detach: called %p",
   5849 		     (void *)ct));
   5850 		MDI_CLIENT_SET_DETACH(ct);
   5851 		break;
   5852 
   5853 	case DDI_SUSPEND:
   5854 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5855 		    "client pre_suspend: called %p",
   5856 		    (void *)ct));
   5857 		MDI_CLIENT_SET_SUSPEND(ct);
   5858 		break;
   5859 
   5860 	default:
   5861 		rv = DDI_FAILURE;
   5862 		break;
   5863 	}
   5864 	MDI_CLIENT_UNLOCK(ct);
   5865 	return (rv);
   5866 }
   5867 
   5868 /*
   5869  * mdi_post_detach():
   5870  *		Post detach notification handler
   5871  */
   5872 /*ARGSUSED*/
   5873 void
   5874 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
   5875 {
   5876 	/*
   5877 	 * Detach/Suspend of mpxio component failed. Update our state
   5878 	 * too
   5879 	 */
   5880 	if (MDI_PHCI(dip))
   5881 		i_mdi_phci_post_detach(dip, cmd, error);
   5882 
   5883 	if (MDI_CLIENT(dip))
   5884 		i_mdi_client_post_detach(dip, cmd, error);
   5885 }
   5886 
   5887 /*ARGSUSED*/
   5888 static void
   5889 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
   5890 {
   5891 	mdi_phci_t	*ph;
   5892 
   5893 	/*
   5894 	 * Detach/Suspend of phci component failed. Update our state
   5895 	 * too
   5896 	 */
   5897 	ph = i_devi_get_phci(dip);
   5898 	if (ph == NULL) {
   5899 		return;
   5900 	}
   5901 
   5902 	MDI_PHCI_LOCK(ph);
   5903 	/*
   5904 	 * Detach of pHCI failed. Restore back converse
   5905 	 * state
   5906 	 */
   5907 	switch (cmd) {
   5908 	case DDI_DETACH:
   5909 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5910 		    "pHCI post_detach: called %p",
   5911 		    (void *)ph));
   5912 		if (error != DDI_SUCCESS)
   5913 			MDI_PHCI_SET_ATTACH(ph);
   5914 		break;
   5915 
   5916 	case DDI_SUSPEND:
   5917 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5918 		    "pHCI post_suspend: called %p",
   5919 		    (void *)ph));
   5920 		if (error != DDI_SUCCESS)
   5921 			MDI_PHCI_SET_RESUME(ph);
   5922 		break;
   5923 	}
   5924 	MDI_PHCI_UNLOCK(ph);
   5925 }
   5926 
   5927 /*ARGSUSED*/
   5928 static void
   5929 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
   5930 {
   5931 	mdi_client_t	*ct;
   5932 
   5933 	ct = i_devi_get_client(dip);
   5934 	if (ct == NULL) {
   5935 		return;
   5936 	}
   5937 	MDI_CLIENT_LOCK(ct);
   5938 	/*
   5939 	 * Detach of Client failed. Restore back converse
   5940 	 * state
   5941 	 */
   5942 	switch (cmd) {
   5943 	case DDI_DETACH:
   5944 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5945 		    "client post_detach: called %p", (void *)ct));
   5946 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
   5947 			MDI_DEBUG(4, (MDI_NOTE, dip,
   5948 			    "i_mdi_pm_rele_client\n"));
   5949 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
   5950 		} else {
   5951 			MDI_DEBUG(4, (MDI_NOTE, dip,
   5952 			    "i_mdi_pm_reset_client\n"));
   5953 			i_mdi_pm_reset_client(ct);
   5954 		}
   5955 		if (error != DDI_SUCCESS)
   5956 			MDI_CLIENT_SET_ATTACH(ct);
   5957 		break;
   5958 
   5959 	case DDI_SUSPEND:
   5960 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5961 		    "called %p", (void *)ct));
   5962 		if (error != DDI_SUCCESS)
   5963 			MDI_CLIENT_SET_RESUME(ct);
   5964 		break;
   5965 	}
   5966 	MDI_CLIENT_UNLOCK(ct);
   5967 }
   5968 
   5969 int
   5970 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
   5971 {
   5972 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
   5973 }
   5974 
   5975 /*
   5976  * create and install per-path (client - pHCI) statistics
   5977  * I/O stats supported: nread, nwritten, reads, and writes
   5978  * Error stats - hard errors, soft errors, & transport errors
   5979  */
   5980 int
   5981 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
   5982 {
   5983 	kstat_t			*kiosp, *kerrsp;
   5984 	struct pi_errs		*nsp;
   5985 	struct mdi_pi_kstats	*mdi_statp;
   5986 
   5987 	if (MDI_PI(pip)->pi_kstats != NULL)
   5988 		return (MDI_SUCCESS);
   5989 
   5990 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
   5991 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
   5992 		return (MDI_FAILURE);
   5993 	}
   5994 
   5995 	(void) strcat(ksname, ",err");
   5996 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
   5997 	    KSTAT_TYPE_NAMED,
   5998 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
   5999 	if (kerrsp == NULL) {
   6000 		kstat_delete(kiosp);
   6001 		return (MDI_FAILURE);
   6002 	}
   6003 
   6004 	nsp = (struct pi_errs *)kerrsp->ks_data;
   6005 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
   6006 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
   6007 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
   6008 	    KSTAT_DATA_UINT32);
   6009 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
   6010 	    KSTAT_DATA_UINT32);
   6011 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
   6012 	    KSTAT_DATA_UINT32);
   6013 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
   6014 	    KSTAT_DATA_UINT32);
   6015 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
   6016 	    KSTAT_DATA_UINT32);
   6017 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
   6018 	    KSTAT_DATA_UINT32);
   6019 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
   6020 	    KSTAT_DATA_UINT32);
   6021 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
   6022 
   6023 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
   6024 	mdi_statp->pi_kstat_ref = 1;
   6025 	mdi_statp->pi_kstat_iostats = kiosp;
   6026 	mdi_statp->pi_kstat_errstats = kerrsp;
   6027 	kstat_install(kiosp);
   6028 	kstat_install(kerrsp);
   6029 	MDI_PI(pip)->pi_kstats = mdi_statp;
   6030 	return (MDI_SUCCESS);
   6031 }
   6032 
   6033 /*
   6034  * destroy per-path properties
   6035  */
   6036 static void
   6037 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
   6038 {
   6039 
   6040 	struct mdi_pi_kstats *mdi_statp;
   6041 
   6042 	if (MDI_PI(pip)->pi_kstats == NULL)
   6043 		return;
   6044 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
   6045 		return;
   6046 
   6047 	MDI_PI(pip)->pi_kstats = NULL;
   6048 
   6049 	/*
   6050 	 * the kstat may be shared between multiple pathinfo nodes
   6051 	 * decrement this pathinfo's usage, removing the kstats
   6052 	 * themselves when the last pathinfo reference is removed.
   6053 	 */
   6054 	ASSERT(mdi_statp->pi_kstat_ref > 0);
   6055 	if (--mdi_statp->pi_kstat_ref != 0)
   6056 		return;
   6057 
   6058 	kstat_delete(mdi_statp->pi_kstat_iostats);
   6059 	kstat_delete(mdi_statp->pi_kstat_errstats);
   6060 	kmem_free(mdi_statp, sizeof (*mdi_statp));
   6061 }
   6062 
   6063 /*
   6064  * update I/O paths KSTATS
   6065  */
   6066 void
   6067 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
   6068 {
   6069 	kstat_t *iostatp;
   6070 	size_t xfer_cnt;
   6071 
   6072 	ASSERT(pip != NULL);
   6073 
   6074 	/*
   6075 	 * I/O can be driven across a path prior to having path
   6076 	 * statistics available, i.e. probe(9e).
   6077 	 */
   6078 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
   6079 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
   6080 		xfer_cnt = bp->b_bcount - bp->b_resid;
   6081 		if (bp->b_flags & B_READ) {
   6082 			KSTAT_IO_PTR(iostatp)->reads++;
   6083 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
   6084 		} else {
   6085 			KSTAT_IO_PTR(iostatp)->writes++;
   6086 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
   6087 		}
   6088 	}
   6089 }
   6090 
   6091 /*
   6092  * Enable the path(specific client/target/initiator)
   6093  * Enabling a path means that MPxIO may select the enabled path for routing
   6094  * future I/O requests, subject to other path state constraints.
   6095  */
   6096 int
   6097 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
   6098 {
   6099 	mdi_phci_t	*ph;
   6100 
   6101 	ph = MDI_PI(pip)->pi_phci;
   6102 	if (ph == NULL) {
   6103 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
   6104 		    "!failed: path %s %p: NULL ph",
   6105 		    mdi_pi_spathname(pip), (void *)pip));
   6106 		return (MDI_FAILURE);
   6107 	}
   6108 
   6109 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
   6110 		MDI_ENABLE_OP);
   6111 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
   6112 	    "!returning success pip = %p. ph = %p",
   6113 	    (void *)pip, (void *)ph));
   6114 	return (MDI_SUCCESS);
   6115 
   6116 }
   6117 
   6118 /*
   6119  * Disable the path (specific client/target/initiator)
   6120  * Disabling a path means that MPxIO will not select the disabled path for
   6121  * routing any new I/O requests.
   6122  */
   6123 int
   6124 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
   6125 {
   6126 	mdi_phci_t	*ph;
   6127 
   6128 	ph = MDI_PI(pip)->pi_phci;
   6129 	if (ph == NULL) {
   6130 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
   6131 		    "!failed: path %s %p: NULL ph",
   6132 		    mdi_pi_spathname(pip), (void *)pip));
   6133 		return (MDI_FAILURE);
   6134 	}
   6135 
   6136 	(void) i_mdi_enable_disable_path(pip,
   6137 	    ph->ph_vhci, flags, MDI_DISABLE_OP);
   6138 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
   6139 	    "!returning success pip = %p. ph = %p",
   6140 	    (void *)pip, (void *)ph));
   6141 	return (MDI_SUCCESS);
   6142 }
   6143 
   6144 /*
   6145  * disable the path to a particular pHCI (pHCI specified in the phci_path
   6146  * argument) for a particular client (specified in the client_path argument).
   6147  * Disabling a path means that MPxIO will not select the disabled path for
   6148  * routing any new I/O requests.
   6149  * NOTE: this will be removed once the NWS files are changed to use the new
   6150  * mdi_{enable,disable}_path interfaces
   6151  */
   6152 int
   6153 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
   6154 {
   6155 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
   6156 }
   6157 
   6158 /*
   6159  * Enable the path to a particular pHCI (pHCI specified in the phci_path
   6160  * argument) for a particular client (specified in the client_path argument).
   6161  * Enabling a path means that MPxIO may select the enabled path for routing
   6162  * future I/O requests, subject to other path state constraints.
   6163  * NOTE: this will be removed once the NWS files are changed to use the new
   6164  * mdi_{enable,disable}_path interfaces
   6165  */
   6166 
   6167 int
   6168 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
   6169 {
   6170 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
   6171 }
   6172 
   6173 /*
   6174  * Common routine for doing enable/disable.
   6175  */
   6176 static mdi_pathinfo_t *
   6177 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
   6178 		int op)
   6179 {
   6180 	int		sync_flag = 0;
   6181 	int		rv;
   6182 	mdi_pathinfo_t 	*next;
   6183 	int		(*f)() = NULL;
   6184 
   6185 	/*
   6186 	 * Check to make sure the path is not already in the
   6187 	 * requested state. If it is just return the next path
   6188 	 * as we have nothing to do here.
   6189 	 */
   6190 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
   6191 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
   6192 		MDI_PI_LOCK(pip);
   6193 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   6194 		MDI_PI_UNLOCK(pip);
   6195 		return (next);
   6196 	}
   6197 
   6198 	f = vh->vh_ops->vo_pi_state_change;
   6199 
   6200 	sync_flag = (flags << 8) & 0xf00;
   6201 
   6202 	/*
   6203 	 * Do a callback into the mdi consumer to let it
   6204 	 * know that path is about to get enabled/disabled.
   6205 	 */
   6206 	if (f != NULL) {
   6207 		rv = (*f)(vh->vh_dip, pip, 0,
   6208 			MDI_PI_EXT_STATE(pip),
   6209 			MDI_EXT_STATE_CHANGE | sync_flag |
   6210 			op | MDI_BEFORE_STATE_CHANGE);
   6211 		if (rv != MDI_SUCCESS) {
   6212 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
   6213 			    "vo_pi_state_change: failed rv = %x", rv));
   6214 		}
   6215 	}
   6216 	MDI_PI_LOCK(pip);
   6217 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   6218 
   6219 	switch (flags) {
   6220 		case USER_DISABLE:
   6221 			if (op == MDI_DISABLE_OP) {
   6222 				MDI_PI_SET_USER_DISABLE(pip);
   6223 			} else {
   6224 				MDI_PI_SET_USER_ENABLE(pip);
   6225 			}
   6226 			break;
   6227 		case DRIVER_DISABLE:
   6228 			if (op == MDI_DISABLE_OP) {
   6229 				MDI_PI_SET_DRV_DISABLE(pip);
   6230 			} else {
   6231 				MDI_PI_SET_DRV_ENABLE(pip);
   6232 			}
   6233 			break;
   6234 		case DRIVER_DISABLE_TRANSIENT:
   6235 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
   6236 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
   6237 			} else {
   6238 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
   6239 			}
   6240 			break;
   6241 	}
   6242 	MDI_PI_UNLOCK(pip);
   6243 	/*
   6244 	 * Do a callback into the mdi consumer to let it
   6245 	 * know that path is now enabled/disabled.
   6246 	 */
   6247 	if (f != NULL) {
   6248 		rv = (*f)(vh->vh_dip, pip, 0,
   6249 			MDI_PI_EXT_STATE(pip),
   6250 			MDI_EXT_STATE_CHANGE | sync_flag |
   6251 			op | MDI_AFTER_STATE_CHANGE);
   6252 		if (rv != MDI_SUCCESS) {
   6253 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
   6254 			    "vo_pi_state_change failed: rv = %x", rv));
   6255 		}
   6256 	}
   6257 	return (next);
   6258 }
   6259 
   6260 /*
   6261  * Common routine for doing enable/disable.
   6262  * NOTE: this will be removed once the NWS files are changed to use the new
   6263  * mdi_{enable,disable}_path has been putback
   6264  */
   6265 int
   6266 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
   6267 {
   6268 
   6269 	mdi_phci_t	*ph;
   6270 	mdi_vhci_t	*vh = NULL;
   6271 	mdi_client_t	*ct;
   6272 	mdi_pathinfo_t	*next, *pip;
   6273 	int		found_it;
   6274 
   6275 	ph = i_devi_get_phci(pdip);
   6276 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
   6277 	    "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
   6278 	    (void *)cdip));
   6279 	if (ph == NULL) {
   6280 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6281 		    "!failed: operation %d: NULL ph", op));
   6282 		return (MDI_FAILURE);
   6283 	}
   6284 
   6285 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
   6286 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6287 		    "!failed: invalid operation %d", op));
   6288 		return (MDI_FAILURE);
   6289 	}
   6290 
   6291 	vh = ph->ph_vhci;
   6292 
   6293 	if (cdip == NULL) {
   6294 		/*
   6295 		 * Need to mark the Phci as enabled/disabled.
   6296 		 */
   6297 		MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
   6298 		    "op %d for the phci", op));
   6299 		MDI_PHCI_LOCK(ph);
   6300 		switch (flags) {
   6301 			case USER_DISABLE:
   6302 				if (op == MDI_DISABLE_OP) {
   6303 					MDI_PHCI_SET_USER_DISABLE(ph);
   6304 				} else {
   6305 					MDI_PHCI_SET_USER_ENABLE(ph);
   6306 				}
   6307 				break;
   6308 			case DRIVER_DISABLE:
   6309 				if (op == MDI_DISABLE_OP) {
   6310 					MDI_PHCI_SET_DRV_DISABLE(ph);
   6311 				} else {
   6312 					MDI_PHCI_SET_DRV_ENABLE(ph);
   6313 				}
   6314 				break;
   6315 			case DRIVER_DISABLE_TRANSIENT:
   6316 				if (op == MDI_DISABLE_OP) {
   6317 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
   6318 				} else {
   6319 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
   6320 				}
   6321 				break;
   6322 			default:
   6323 				MDI_PHCI_UNLOCK(ph);
   6324 				MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6325 				    "!invalid flag argument= %d", flags));
   6326 		}
   6327 
   6328 		/*
   6329 		 * Phci has been disabled. Now try to enable/disable
   6330 		 * path info's to each client.
   6331 		 */
   6332 		pip = ph->ph_path_head;
   6333 		while (pip != NULL) {
   6334 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
   6335 		}
   6336 		MDI_PHCI_UNLOCK(ph);
   6337 	} else {
   6338 
   6339 		/*
   6340 		 * Disable a specific client.
   6341 		 */
   6342 		ct = i_devi_get_client(cdip);
   6343 		if (ct == NULL) {
   6344 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6345 			    "!failed: operation = %d: NULL ct", op));
   6346 			return (MDI_FAILURE);
   6347 		}
   6348 
   6349 		MDI_CLIENT_LOCK(ct);
   6350 		pip = ct->ct_path_head;
   6351 		found_it = 0;
   6352 		while (pip != NULL) {
   6353 			MDI_PI_LOCK(pip);
   6354 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6355 			if (MDI_PI(pip)->pi_phci == ph) {
   6356 				MDI_PI_UNLOCK(pip);
   6357 				found_it = 1;
   6358 				break;
   6359 			}
   6360 			MDI_PI_UNLOCK(pip);
   6361 			pip = next;
   6362 		}
   6363 
   6364 
   6365 		MDI_CLIENT_UNLOCK(ct);
   6366 		if (found_it == 0) {
   6367 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6368 			    "!failed. Could not find corresponding pip\n"));
   6369 			return (MDI_FAILURE);
   6370 		}
   6371 
   6372 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
   6373 	}
   6374 
   6375 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
   6376 	    "!op %d returning success pdip = %p cdip = %p",
   6377 	    op, (void *)pdip, (void *)cdip));
   6378 	return (MDI_SUCCESS);
   6379 }
   6380 
   6381 /*
   6382  * Ensure phci powered up
   6383  */
   6384 static void
   6385 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
   6386 {
   6387 	dev_info_t	*ph_dip;
   6388 
   6389 	ASSERT(pip != NULL);
   6390 	ASSERT(MDI_PI_LOCKED(pip));
   6391 
   6392 	if (MDI_PI(pip)->pi_pm_held) {
   6393 		return;
   6394 	}
   6395 
   6396 	ph_dip = mdi_pi_get_phci(pip);
   6397 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6398 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
   6399 	if (ph_dip == NULL) {
   6400 		return;
   6401 	}
   6402 
   6403 	MDI_PI_UNLOCK(pip);
   6404 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
   6405 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
   6406 	pm_hold_power(ph_dip);
   6407 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
   6408 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
   6409 	MDI_PI_LOCK(pip);
   6410 
   6411 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
   6412 	if (DEVI(ph_dip)->devi_pm_info)
   6413 		MDI_PI(pip)->pi_pm_held = 1;
   6414 }
   6415 
   6416 /*
   6417  * Allow phci powered down
   6418  */
   6419 static void
   6420 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
   6421 {
   6422 	dev_info_t	*ph_dip = NULL;
   6423 
   6424 	ASSERT(pip != NULL);
   6425 	ASSERT(MDI_PI_LOCKED(pip));
   6426 
   6427 	if (MDI_PI(pip)->pi_pm_held == 0) {
   6428 		return;
   6429 	}
   6430 
   6431 	ph_dip = mdi_pi_get_phci(pip);
   6432 	ASSERT(ph_dip != NULL);
   6433 
   6434 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6435 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
   6436 
   6437 	MDI_PI_UNLOCK(pip);
   6438 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6439 	    "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
   6440 	pm_rele_power(ph_dip);
   6441 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6442 	    "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
   6443 	MDI_PI_LOCK(pip);
   6444 
   6445 	MDI_PI(pip)->pi_pm_held = 0;
   6446 }
   6447 
   6448 static void
   6449 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
   6450 {
   6451 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6452 
   6453 	ct->ct_power_cnt += incr;
   6454 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   6455 	    "%p ct_power_cnt = %d incr = %d",
   6456 	    (void *)ct, ct->ct_power_cnt, incr));
   6457 	ASSERT(ct->ct_power_cnt >= 0);
   6458 }
   6459 
   6460 static void
   6461 i_mdi_rele_all_phci(mdi_client_t *ct)
   6462 {
   6463 	mdi_pathinfo_t  *pip;
   6464 
   6465 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6466 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   6467 	while (pip != NULL) {
   6468 		mdi_hold_path(pip);
   6469 		MDI_PI_LOCK(pip);
   6470 		i_mdi_pm_rele_pip(pip);
   6471 		MDI_PI_UNLOCK(pip);
   6472 		mdi_rele_path(pip);
   6473 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6474 	}
   6475 }
   6476 
   6477 static void
   6478 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
   6479 {
   6480 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6481 
   6482 	if (i_ddi_devi_attached(ct->ct_dip)) {
   6483 		ct->ct_power_cnt -= decr;
   6484 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   6485 		    "%p ct_power_cnt = %d decr = %d",
   6486 		    (void *)ct, ct->ct_power_cnt, decr));
   6487 	}
   6488 
   6489 	ASSERT(ct->ct_power_cnt >= 0);
   6490 	if (ct->ct_power_cnt == 0) {
   6491 		i_mdi_rele_all_phci(ct);
   6492 		return;
   6493 	}
   6494 }
   6495 
   6496 static void
   6497 i_mdi_pm_reset_client(mdi_client_t *ct)
   6498 {
   6499 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   6500 	    "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
   6501 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6502 	ct->ct_power_cnt = 0;
   6503 	i_mdi_rele_all_phci(ct);
   6504 	ct->ct_powercnt_config = 0;
   6505 	ct->ct_powercnt_unconfig = 0;
   6506 	ct->ct_powercnt_reset = 1;
   6507 }
   6508 
   6509 static int
   6510 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
   6511 {
   6512 	int		ret;
   6513 	dev_info_t	*ph_dip;
   6514 
   6515 	MDI_PI_LOCK(pip);
   6516 	i_mdi_pm_hold_pip(pip);
   6517 
   6518 	ph_dip = mdi_pi_get_phci(pip);
   6519 	MDI_PI_UNLOCK(pip);
   6520 
   6521 	/* bring all components of phci to full power */
   6522 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6523 	    "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
   6524 	    ddi_get_instance(ph_dip), (void *)pip));
   6525 
   6526 	ret = pm_powerup(ph_dip);
   6527 
   6528 	if (ret == DDI_FAILURE) {
   6529 		MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6530 		    "pm_powerup FAILED for %s%d %p",
   6531 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
   6532 		    (void *)pip));
   6533 
   6534 		MDI_PI_LOCK(pip);
   6535 		i_mdi_pm_rele_pip(pip);
   6536 		MDI_PI_UNLOCK(pip);
   6537 		return (MDI_FAILURE);
   6538 	}
   6539 
   6540 	return (MDI_SUCCESS);
   6541 }
   6542 
   6543 static int
   6544 i_mdi_power_all_phci(mdi_client_t *ct)
   6545 {
   6546 	mdi_pathinfo_t  *pip;
   6547 	int		succeeded = 0;
   6548 
   6549 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6550 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   6551 	while (pip != NULL) {
   6552 		/*
   6553 		 * Don't power if MDI_PATHINFO_STATE_FAULT
   6554 		 * or MDI_PATHINFO_STATE_OFFLINE.
   6555 		 */
   6556 		if (MDI_PI_IS_INIT(pip) ||
   6557 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
   6558 			mdi_hold_path(pip);
   6559 			MDI_CLIENT_UNLOCK(ct);
   6560 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
   6561 				succeeded = 1;
   6562 
   6563 			ASSERT(ct == MDI_PI(pip)->pi_client);
   6564 			MDI_CLIENT_LOCK(ct);
   6565 			mdi_rele_path(pip);
   6566 		}
   6567 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6568 	}
   6569 
   6570 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
   6571 }
   6572 
   6573 /*
   6574  * mdi_bus_power():
   6575  *		1. Place the phci(s) into powered up state so that
   6576  *		   client can do power management
   6577  *		2. Ensure phci powered up as client power managing
   6578  * Return Values:
   6579  *		MDI_SUCCESS
   6580  *		MDI_FAILURE
   6581  */
   6582 int
   6583 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
   6584     void *arg, void *result)
   6585 {
   6586 	int			ret = MDI_SUCCESS;
   6587 	pm_bp_child_pwrchg_t	*bpc;
   6588 	mdi_client_t		*ct;
   6589 	dev_info_t		*cdip;
   6590 	pm_bp_has_changed_t	*bphc;
   6591 
   6592 	/*
   6593 	 * BUS_POWER_NOINVOL not supported
   6594 	 */
   6595 	if (op == BUS_POWER_NOINVOL)
   6596 		return (MDI_FAILURE);
   6597 
   6598 	/*
   6599 	 * ignore other OPs.
   6600 	 * return quickly to save cou cycles on the ct processing
   6601 	 */
   6602 	switch (op) {
   6603 	case BUS_POWER_PRE_NOTIFICATION:
   6604 	case BUS_POWER_POST_NOTIFICATION:
   6605 		bpc = (pm_bp_child_pwrchg_t *)arg;
   6606 		cdip = bpc->bpc_dip;
   6607 		break;
   6608 	case BUS_POWER_HAS_CHANGED:
   6609 		bphc = (pm_bp_has_changed_t *)arg;
   6610 		cdip = bphc->bphc_dip;
   6611 		break;
   6612 	default:
   6613 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
   6614 	}
   6615 
   6616 	ASSERT(MDI_CLIENT(cdip));
   6617 
   6618 	ct = i_devi_get_client(cdip);
   6619 	if (ct == NULL)
   6620 		return (MDI_FAILURE);
   6621 
   6622 	/*
   6623 	 * wait till the mdi_pathinfo node state change are processed
   6624 	 */
   6625 	MDI_CLIENT_LOCK(ct);
   6626 	switch (op) {
   6627 	case BUS_POWER_PRE_NOTIFICATION:
   6628 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6629 		    "BUS_POWER_PRE_NOTIFICATION:"
   6630 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
   6631 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
   6632 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
   6633 
   6634 		/* serialize power level change per client */
   6635 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6636 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6637 
   6638 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
   6639 
   6640 		if (ct->ct_power_cnt == 0) {
   6641 			ret = i_mdi_power_all_phci(ct);
   6642 		}
   6643 
   6644 		/*
   6645 		 * if new_level > 0:
   6646 		 *	- hold phci(s)
   6647 		 *	- power up phci(s) if not already
   6648 		 * ignore power down
   6649 		 */
   6650 		if (bpc->bpc_nlevel > 0) {
   6651 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
   6652 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6653 				    "i_mdi_pm_hold_client\n"));
   6654 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6655 			}
   6656 		}
   6657 		break;
   6658 	case BUS_POWER_POST_NOTIFICATION:
   6659 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6660 		    "BUS_POWER_POST_NOTIFICATION:"
   6661 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
   6662 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
   6663 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
   6664 		    *(int *)result));
   6665 
   6666 		if (*(int *)result == DDI_SUCCESS) {
   6667 			if (bpc->bpc_nlevel > 0) {
   6668 				MDI_CLIENT_SET_POWER_UP(ct);
   6669 			} else {
   6670 				MDI_CLIENT_SET_POWER_DOWN(ct);
   6671 			}
   6672 		}
   6673 
   6674 		/* release the hold we did in pre-notification */
   6675 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
   6676 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
   6677 			MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6678 			    "i_mdi_pm_rele_client\n"));
   6679 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
   6680 		}
   6681 
   6682 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
   6683 			/* another thread might started attaching */
   6684 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
   6685 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6686 				    "i_mdi_pm_rele_client\n"));
   6687 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
   6688 			/* detaching has been taken care in pm_post_unconfig */
   6689 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
   6690 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6691 				    "i_mdi_pm_reset_client\n"));
   6692 				i_mdi_pm_reset_client(ct);
   6693 			}
   6694 		}
   6695 
   6696 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
   6697 		cv_broadcast(&ct->ct_powerchange_cv);
   6698 
   6699 		break;
   6700 
   6701 	/* need to do more */
   6702 	case BUS_POWER_HAS_CHANGED:
   6703 		MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
   6704 		    "BUS_POWER_HAS_CHANGED:"
   6705 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
   6706 		    ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
   6707 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
   6708 
   6709 		if (bphc->bphc_nlevel > 0 &&
   6710 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
   6711 			if (ct->ct_power_cnt == 0) {
   6712 				ret = i_mdi_power_all_phci(ct);
   6713 			}
   6714 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
   6715 			    "i_mdi_pm_hold_client\n"));
   6716 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6717 		}
   6718 
   6719 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
   6720 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
   6721 			    "i_mdi_pm_rele_client\n"));
   6722 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
   6723 		}
   6724 		break;
   6725 	}
   6726 
   6727 	MDI_CLIENT_UNLOCK(ct);
   6728 	return (ret);
   6729 }
   6730 
   6731 static int
   6732 i_mdi_pm_pre_config_one(dev_info_t *child)
   6733 {
   6734 	int		ret = MDI_SUCCESS;
   6735 	mdi_client_t	*ct;
   6736 
   6737 	ct = i_devi_get_client(child);
   6738 	if (ct == NULL)
   6739 		return (MDI_FAILURE);
   6740 
   6741 	MDI_CLIENT_LOCK(ct);
   6742 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6743 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6744 
   6745 	if (!MDI_CLIENT_IS_FAILED(ct)) {
   6746 		MDI_CLIENT_UNLOCK(ct);
   6747 		MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
   6748 		return (MDI_SUCCESS);
   6749 	}
   6750 
   6751 	if (ct->ct_powercnt_config) {
   6752 		MDI_CLIENT_UNLOCK(ct);
   6753 		MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
   6754 		return (MDI_SUCCESS);
   6755 	}
   6756 
   6757 	if (ct->ct_power_cnt == 0) {
   6758 		ret = i_mdi_power_all_phci(ct);
   6759 	}
   6760 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
   6761 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6762 	ct->ct_powercnt_config = 1;
   6763 	ct->ct_powercnt_reset = 0;
   6764 	MDI_CLIENT_UNLOCK(ct);
   6765 	return (ret);
   6766 }
   6767 
   6768 static int
   6769 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
   6770 {
   6771 	int			ret = MDI_SUCCESS;
   6772 	dev_info_t		*cdip;
   6773 	int			circ;
   6774 
   6775 	ASSERT(MDI_VHCI(vdip));
   6776 
   6777 	/* ndi_devi_config_one */
   6778 	if (child) {
   6779 		ASSERT(DEVI_BUSY_OWNED(vdip));
   6780 		return (i_mdi_pm_pre_config_one(child));
   6781 	}
   6782 
   6783 	/* devi_config_common */
   6784 	ndi_devi_enter(vdip, &circ);
   6785 	cdip = ddi_get_child(vdip);
   6786 	while (cdip) {
   6787 		dev_info_t *next = ddi_get_next_sibling(cdip);
   6788 
   6789 		ret = i_mdi_pm_pre_config_one(cdip);
   6790 		if (ret != MDI_SUCCESS)
   6791 			break;
   6792 		cdip = next;
   6793 	}
   6794 	ndi_devi_exit(vdip, circ);
   6795 	return (ret);
   6796 }
   6797 
   6798 static int
   6799 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
   6800 {
   6801 	int		ret = MDI_SUCCESS;
   6802 	mdi_client_t	*ct;
   6803 
   6804 	ct = i_devi_get_client(child);
   6805 	if (ct == NULL)
   6806 		return (MDI_FAILURE);
   6807 
   6808 	MDI_CLIENT_LOCK(ct);
   6809 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6810 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6811 
   6812 	if (!i_ddi_devi_attached(ct->ct_dip)) {
   6813 		MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
   6814 		MDI_CLIENT_UNLOCK(ct);
   6815 		return (MDI_SUCCESS);
   6816 	}
   6817 
   6818 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
   6819 	    (flags & NDI_AUTODETACH)) {
   6820 		MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
   6821 		MDI_CLIENT_UNLOCK(ct);
   6822 		return (MDI_FAILURE);
   6823 	}
   6824 
   6825 	if (ct->ct_powercnt_unconfig) {
   6826 		MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
   6827 		MDI_CLIENT_UNLOCK(ct);
   6828 		*held = 1;
   6829 		return (MDI_SUCCESS);
   6830 	}
   6831 
   6832 	if (ct->ct_power_cnt == 0) {
   6833 		ret = i_mdi_power_all_phci(ct);
   6834 	}
   6835 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
   6836 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6837 	ct->ct_powercnt_unconfig = 1;
   6838 	ct->ct_powercnt_reset = 0;
   6839 	MDI_CLIENT_UNLOCK(ct);
   6840 	if (ret == MDI_SUCCESS)
   6841 		*held = 1;
   6842 	return (ret);
   6843 }
   6844 
   6845 static int
   6846 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
   6847     int flags)
   6848 {
   6849 	int			ret = MDI_SUCCESS;
   6850 	dev_info_t		*cdip;
   6851 	int			circ;
   6852 
   6853 	ASSERT(MDI_VHCI(vdip));
   6854 	*held = 0;
   6855 
   6856 	/* ndi_devi_unconfig_one */
   6857 	if (child) {
   6858 		ASSERT(DEVI_BUSY_OWNED(vdip));
   6859 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
   6860 	}
   6861 
   6862 	/* devi_unconfig_common */
   6863 	ndi_devi_enter(vdip, &circ);
   6864 	cdip = ddi_get_child(vdip);
   6865 	while (cdip) {
   6866 		dev_info_t *next = ddi_get_next_sibling(cdip);
   6867 
   6868 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
   6869 		cdip = next;
   6870 	}
   6871 	ndi_devi_exit(vdip, circ);
   6872 
   6873 	if (*held)
   6874 		ret = MDI_SUCCESS;
   6875 
   6876 	return (ret);
   6877 }
   6878 
   6879 static void
   6880 i_mdi_pm_post_config_one(dev_info_t *child)
   6881 {
   6882 	mdi_client_t	*ct;
   6883 
   6884 	ct = i_devi_get_client(child);
   6885 	if (ct == NULL)
   6886 		return;
   6887 
   6888 	MDI_CLIENT_LOCK(ct);
   6889 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6890 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6891 
   6892 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
   6893 		MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
   6894 		MDI_CLIENT_UNLOCK(ct);
   6895 		return;
   6896 	}
   6897 
   6898 	/* client has not been updated */
   6899 	if (MDI_CLIENT_IS_FAILED(ct)) {
   6900 		MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
   6901 		MDI_CLIENT_UNLOCK(ct);
   6902 		return;
   6903 	}
   6904 
   6905 	/* another thread might have powered it down or detached it */
   6906 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
   6907 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
   6908 	    (!i_ddi_devi_attached(ct->ct_dip) &&
   6909 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
   6910 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
   6911 		i_mdi_pm_reset_client(ct);
   6912 	} else {
   6913 		mdi_pathinfo_t  *pip, *next;
   6914 		int	valid_path_count = 0;
   6915 
   6916 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
   6917 		pip = ct->ct_path_head;
   6918 		while (pip != NULL) {
   6919 			MDI_PI_LOCK(pip);
   6920 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6921 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
   6922 				valid_path_count ++;
   6923 			MDI_PI_UNLOCK(pip);
   6924 			pip = next;
   6925 		}
   6926 		i_mdi_pm_rele_client(ct, valid_path_count);
   6927 	}
   6928 	ct->ct_powercnt_config = 0;
   6929 	MDI_CLIENT_UNLOCK(ct);
   6930 }
   6931 
   6932 static void
   6933 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
   6934 {
   6935 	int		circ;
   6936 	dev_info_t	*cdip;
   6937 
   6938 	ASSERT(MDI_VHCI(vdip));
   6939 
   6940 	/* ndi_devi_config_one */
   6941 	if (child) {
   6942 		ASSERT(DEVI_BUSY_OWNED(vdip));
   6943 		i_mdi_pm_post_config_one(child);
   6944 		return;
   6945 	}
   6946 
   6947 	/* devi_config_common */
   6948 	ndi_devi_enter(vdip, &circ);
   6949 	cdip = ddi_get_child(vdip);
   6950 	while (cdip) {
   6951 		dev_info_t *next = ddi_get_next_sibling(cdip);
   6952 
   6953 		i_mdi_pm_post_config_one(cdip);
   6954 		cdip = next;
   6955 	}
   6956 	ndi_devi_exit(vdip, circ);
   6957 }
   6958 
   6959 static void
   6960 i_mdi_pm_post_unconfig_one(dev_info_t *child)
   6961 {
   6962 	mdi_client_t	*ct;
   6963 
   6964 	ct = i_devi_get_client(child);
   6965 	if (ct == NULL)
   6966 		return;
   6967 
   6968 	MDI_CLIENT_LOCK(ct);
   6969 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6970 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6971 
   6972 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
   6973 		MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
   6974 		MDI_CLIENT_UNLOCK(ct);
   6975 		return;
   6976 	}
   6977 
   6978 	/* failure detaching or another thread just attached it */
   6979 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
   6980 	    i_ddi_devi_attached(ct->ct_dip)) ||
   6981 	    (!i_ddi_devi_attached(ct->ct_dip) &&
   6982 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
   6983 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
   6984 		i_mdi_pm_reset_client(ct);
   6985 	} else {
   6986 		mdi_pathinfo_t  *pip, *next;
   6987 		int	valid_path_count = 0;
   6988 
   6989 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
   6990 		pip = ct->ct_path_head;
   6991 		while (pip != NULL) {
   6992 			MDI_PI_LOCK(pip);
   6993 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6994 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
   6995 				valid_path_count ++;
   6996 			MDI_PI_UNLOCK(pip);
   6997 			pip = next;
   6998 		}
   6999 		i_mdi_pm_rele_client(ct, valid_path_count);
   7000 		ct->ct_powercnt_unconfig = 0;
   7001 	}
   7002 
   7003 	MDI_CLIENT_UNLOCK(ct);
   7004 }
   7005 
   7006 static void
   7007 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
   7008 {
   7009 	int			circ;
   7010 	dev_info_t		*cdip;
   7011 
   7012 	ASSERT(MDI_VHCI(vdip));
   7013 
   7014 	if (!held) {
   7015 		MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
   7016 		return;
   7017 	}
   7018 
   7019 	if (child) {
   7020 		ASSERT(DEVI_BUSY_OWNED(vdip));
   7021 		i_mdi_pm_post_unconfig_one(child);
   7022 		return;
   7023 	}
   7024 
   7025 	ndi_devi_enter(vdip, &circ);
   7026 	cdip = ddi_get_child(vdip);
   7027 	while (cdip) {
   7028 		dev_info_t *next = ddi_get_next_sibling(cdip);
   7029 
   7030 		i_mdi_pm_post_unconfig_one(cdip);
   7031 		cdip = next;
   7032 	}
   7033 	ndi_devi_exit(vdip, circ);
   7034 }
   7035 
   7036 int
   7037 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
   7038 {
   7039 	int			circ, ret = MDI_SUCCESS;
   7040 	dev_info_t		*client_dip = NULL;
   7041 	mdi_client_t		*ct;
   7042 
   7043 	/*
   7044 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
   7045 	 * Power up pHCI for the named client device.
   7046 	 * Note: Before the client is enumerated under vhci by phci,
   7047 	 * client_dip can be NULL. Then proceed to power up all the
   7048 	 * pHCIs.
   7049 	 */
   7050 	if (devnm != NULL) {
   7051 		ndi_devi_enter(vdip, &circ);
   7052 		client_dip = ndi_devi_findchild(vdip, devnm);
   7053 	}
   7054 
   7055 	MDI_DEBUG(4, (MDI_NOTE, vdip,
   7056 	    "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
   7057 
   7058 	switch (op) {
   7059 	case MDI_PM_PRE_CONFIG:
   7060 		ret = i_mdi_pm_pre_config(vdip, client_dip);
   7061 		break;
   7062 
   7063 	case MDI_PM_PRE_UNCONFIG:
   7064 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
   7065 		    flags);
   7066 		break;
   7067 
   7068 	case MDI_PM_POST_CONFIG:
   7069 		i_mdi_pm_post_config(vdip, client_dip);
   7070 		break;
   7071 
   7072 	case MDI_PM_POST_UNCONFIG:
   7073 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
   7074 		break;
   7075 
   7076 	case MDI_PM_HOLD_POWER:
   7077 	case MDI_PM_RELE_POWER:
   7078 		ASSERT(args);
   7079 
   7080 		client_dip = (dev_info_t *)args;
   7081 		ASSERT(MDI_CLIENT(client_dip));
   7082 
   7083 		ct = i_devi_get_client(client_dip);
   7084 		MDI_CLIENT_LOCK(ct);
   7085 
   7086 		if (op == MDI_PM_HOLD_POWER) {
   7087 			if (ct->ct_power_cnt == 0) {
   7088 				(void) i_mdi_power_all_phci(ct);
   7089 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
   7090 				    "i_mdi_pm_hold_client\n"));
   7091 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
   7092 			}
   7093 		} else {
   7094 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
   7095 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
   7096 				    "i_mdi_pm_rele_client\n"));
   7097 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
   7098 			} else {
   7099 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
   7100 				    "i_mdi_pm_reset_client\n"));
   7101 				i_mdi_pm_reset_client(ct);
   7102 			}
   7103 		}
   7104 
   7105 		MDI_CLIENT_UNLOCK(ct);
   7106 		break;
   7107 
   7108 	default:
   7109 		break;
   7110 	}
   7111 
   7112 	if (devnm)
   7113 		ndi_devi_exit(vdip, circ);
   7114 
   7115 	return (ret);
   7116 }
   7117 
   7118 int
   7119 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
   7120 {
   7121 	mdi_vhci_t *vhci;
   7122 
   7123 	if (!MDI_VHCI(dip))
   7124 		return (MDI_FAILURE);
   7125 
   7126 	if (mdi_class) {
   7127 		vhci = DEVI(dip)->devi_mdi_xhci;
   7128 		ASSERT(vhci);
   7129 		*mdi_class = vhci->vh_class;
   7130 	}
   7131 
   7132 	return (MDI_SUCCESS);
   7133 }
   7134 
   7135 int
   7136 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
   7137 {
   7138 	mdi_phci_t *phci;
   7139 
   7140 	if (!MDI_PHCI(dip))
   7141 		return (MDI_FAILURE);
   7142 
   7143 	if (mdi_class) {
   7144 		phci = DEVI(dip)->devi_mdi_xhci;
   7145 		ASSERT(phci);
   7146 		*mdi_class = phci->ph_vhci->vh_class;
   7147 	}
   7148 
   7149 	return (MDI_SUCCESS);
   7150 }
   7151 
   7152 int
   7153 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
   7154 {
   7155 	mdi_client_t *client;
   7156 
   7157 	if (!MDI_CLIENT(dip))
   7158 		return (MDI_FAILURE);
   7159 
   7160 	if (mdi_class) {
   7161 		client = DEVI(dip)->devi_mdi_client;
   7162 		ASSERT(client);
   7163 		*mdi_class = client->ct_vhci->vh_class;
   7164 	}
   7165 
   7166 	return (MDI_SUCCESS);
   7167 }
   7168 
   7169 void *
   7170 mdi_client_get_vhci_private(dev_info_t *dip)
   7171 {
   7172 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
   7173 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
   7174 		mdi_client_t	*ct;
   7175 		ct = i_devi_get_client(dip);
   7176 		return (ct->ct_vprivate);
   7177 	}
   7178 	return (NULL);
   7179 }
   7180 
   7181 void
   7182 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
   7183 {
   7184 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
   7185 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
   7186 		mdi_client_t	*ct;
   7187 		ct = i_devi_get_client(dip);
   7188 		ct->ct_vprivate = data;
   7189 	}
   7190 }
   7191 /*
   7192  * mdi_pi_get_vhci_private():
   7193  *		Get the vhci private information associated with the
   7194  *		mdi_pathinfo node
   7195  */
   7196 void *
   7197 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
   7198 {
   7199 	caddr_t	vprivate = NULL;
   7200 	if (pip) {
   7201 		vprivate = MDI_PI(pip)->pi_vprivate;
   7202 	}
   7203 	return (vprivate);
   7204 }
   7205 
   7206 /*
   7207  * mdi_pi_set_vhci_private():
   7208  *		Set the vhci private information in the mdi_pathinfo node
   7209  */
   7210 void
   7211 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
   7212 {
   7213 	if (pip) {
   7214 		MDI_PI(pip)->pi_vprivate = priv;
   7215 	}
   7216 }
   7217 
   7218 /*
   7219  * mdi_phci_get_vhci_private():
   7220  *		Get the vhci private information associated with the
   7221  *		mdi_phci node
   7222  */
   7223 void *
   7224 mdi_phci_get_vhci_private(dev_info_t *dip)
   7225 {
   7226 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
   7227 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
   7228 		mdi_phci_t	*ph;
   7229 		ph = i_devi_get_phci(dip);
   7230 		return (ph->ph_vprivate);
   7231 	}
   7232 	return (NULL);
   7233 }
   7234 
   7235 /*
   7236  * mdi_phci_set_vhci_private():
   7237  *		Set the vhci private information in the mdi_phci node
   7238  */
   7239 void
   7240 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
   7241 {
   7242 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
   7243 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
   7244 		mdi_phci_t	*ph;
   7245 		ph = i_devi_get_phci(dip);
   7246 		ph->ph_vprivate = priv;
   7247 	}
   7248 }
   7249 
   7250 int
   7251 mdi_pi_ishidden(mdi_pathinfo_t *pip)
   7252 {
   7253 	return (MDI_PI_FLAGS_IS_HIDDEN(pip));
   7254 }
   7255 
   7256 int
   7257 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
   7258 {
   7259 	return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
   7260 }
   7261 
   7262 /*
   7263  * When processing hotplug, if mdi_pi_offline-mdi_pi_free fails then this
   7264  * interface is used to represent device removal.
   7265  */
   7266 int
   7267 mdi_pi_device_remove(mdi_pathinfo_t *pip)
   7268 {
   7269 	MDI_PI_LOCK(pip);
   7270 	if (mdi_pi_device_isremoved(pip)) {
   7271 		MDI_PI_UNLOCK(pip);
   7272 		return (0);
   7273 	}
   7274 	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
   7275 	MDI_PI_FLAGS_SET_HIDDEN(pip);
   7276 	MDI_PI_UNLOCK(pip);
   7277 
   7278 	i_ddi_di_cache_invalidate();
   7279 
   7280 	return (1);
   7281 }
   7282 
   7283 /*
   7284  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
   7285  * is now accessible then this interfaces is used to represent device insertion.
   7286  */
   7287 int
   7288 mdi_pi_device_insert(mdi_pathinfo_t *pip)
   7289 {
   7290 	MDI_PI_LOCK(pip);
   7291 	if (!mdi_pi_device_isremoved(pip)) {
   7292 		MDI_PI_UNLOCK(pip);
   7293 		return (0);
   7294 	}
   7295 	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
   7296 	MDI_PI_FLAGS_CLR_HIDDEN(pip);
   7297 	MDI_PI_UNLOCK(pip);
   7298 
   7299 	i_ddi_di_cache_invalidate();
   7300 
   7301 	return (1);
   7302 }
   7303 
   7304 /*
   7305  * List of vhci class names:
   7306  * A vhci class name must be in this list only if the corresponding vhci
   7307  * driver intends to use the mdi provided bus config implementation
   7308  * (i.e., mdi_vhci_bus_config()).
   7309  */
   7310 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
   7311 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
   7312 
   7313 /*
   7314  * During boot time, the on-disk vhci cache for every vhci class is read
   7315  * in the form of an nvlist and stored here.
   7316  */
   7317 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
   7318 
   7319 /* nvpair names in vhci cache nvlist */
   7320 #define	MDI_VHCI_CACHE_VERSION	1
   7321 #define	MDI_NVPNAME_VERSION	"version"
   7322 #define	MDI_NVPNAME_PHCIS	"phcis"
   7323 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
   7324 
   7325 /*
   7326  * Given vhci class name, return its on-disk vhci cache filename.
   7327  * Memory for the returned filename which includes the full path is allocated
   7328  * by this function.
   7329  */
   7330 static char *
   7331 vhclass2vhcache_filename(char *vhclass)
   7332 {
   7333 	char *filename;
   7334 	int len;
   7335 	static char *fmt = "/etc/devices/mdi_%s_cache";
   7336 
   7337 	/*
   7338 	 * fmt contains the on-disk vhci cache file name format;
   7339 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
   7340 	 */
   7341 
   7342 	/* the -1 below is to account for "%s" in the format string */
   7343 	len = strlen(fmt) + strlen(vhclass) - 1;
   7344 	filename = kmem_alloc(len, KM_SLEEP);
   7345 	(void) snprintf(filename, len, fmt, vhclass);
   7346 	ASSERT(len == (strlen(filename) + 1));
   7347 	return (filename);
   7348 }
   7349 
   7350 /*
   7351  * initialize the vhci cache related data structures and read the on-disk
   7352  * vhci cached data into memory.
   7353  */
   7354 static void
   7355 setup_vhci_cache(mdi_vhci_t *vh)
   7356 {
   7357 	mdi_vhci_config_t *vhc;
   7358 	mdi_vhci_cache_t *vhcache;
   7359 	int i;
   7360 	nvlist_t *nvl = NULL;
   7361 
   7362 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
   7363 	vh->vh_config = vhc;
   7364 	vhcache = &vhc->vhc_vhcache;
   7365 
   7366 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
   7367 
   7368 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
   7369 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
   7370 
   7371 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
   7372 
   7373 	/*
   7374 	 * Create string hash; same as mod_hash_create_strhash() except that
   7375 	 * we use NULL key destructor.
   7376 	 */
   7377 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
   7378 	    mdi_bus_config_cache_hash_size,
   7379 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
   7380 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
   7381 
   7382 	/*
   7383 	 * The on-disk vhci cache is read during booting prior to the
   7384 	 * lights-out period by mdi_read_devices_files().
   7385 	 */
   7386 	for (i = 0; i < N_VHCI_CLASSES; i++) {
   7387 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
   7388 			nvl = vhcache_nvl[i];
   7389 			vhcache_nvl[i] = NULL;
   7390 			break;
   7391 		}
   7392 	}
   7393 
   7394 	/*
   7395 	 * this is to cover the case of some one manually causing unloading
   7396 	 * (or detaching) and reloading (or attaching) of a vhci driver.
   7397 	 */
   7398 	if (nvl == NULL && modrootloaded)
   7399 		nvl = read_on_disk_vhci_cache(vh->vh_class);
   7400 
   7401 	if (nvl != NULL) {
   7402 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   7403 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
   7404 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
   7405 		else  {
   7406 			cmn_err(CE_WARN,
   7407 			    "%s: data file corrupted, will recreate",
   7408 			    vhc->vhc_vhcache_filename);
   7409 		}
   7410 		rw_exit(&vhcache->vhcache_lock);
   7411 		nvlist_free(nvl);
   7412 	}
   7413 
   7414 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
   7415 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
   7416 
   7417 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
   7418 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
   7419 }
   7420 
   7421 /*
   7422  * free all vhci cache related resources
   7423  */
   7424 static int
   7425 destroy_vhci_cache(mdi_vhci_t *vh)
   7426 {
   7427 	mdi_vhci_config_t *vhc = vh->vh_config;
   7428 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   7429 	mdi_vhcache_phci_t *cphci, *cphci_next;
   7430 	mdi_vhcache_client_t *cct, *cct_next;
   7431 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
   7432 
   7433 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
   7434 		return (MDI_FAILURE);
   7435 
   7436 	kmem_free(vhc->vhc_vhcache_filename,
   7437 	    strlen(vhc->vhc_vhcache_filename) + 1);
   7438 
   7439 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
   7440 
   7441 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7442 	    cphci = cphci_next) {
   7443 		cphci_next = cphci->cphci_next;
   7444 		free_vhcache_phci(cphci);
   7445 	}
   7446 
   7447 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
   7448 		cct_next = cct->cct_next;
   7449 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
   7450 			cpi_next = cpi->cpi_next;
   7451 			free_vhcache_pathinfo(cpi);
   7452 		}
   7453 		free_vhcache_client(cct);
   7454 	}
   7455 
   7456 	rw_destroy(&vhcache->vhcache_lock);
   7457 
   7458 	mutex_destroy(&vhc->vhc_lock);
   7459 	cv_destroy(&vhc->vhc_cv);
   7460 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
   7461 	return (MDI_SUCCESS);
   7462 }
   7463 
   7464 /*
   7465  * Stop all vhci cache related async threads and free their resources.
   7466  */
   7467 static int
   7468 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
   7469 {
   7470 	mdi_async_client_config_t *acc, *acc_next;
   7471 
   7472 	mutex_enter(&vhc->vhc_lock);
   7473 	vhc->vhc_flags |= MDI_VHC_EXIT;
   7474 	ASSERT(vhc->vhc_acc_thrcount >= 0);
   7475 	cv_broadcast(&vhc->vhc_cv);
   7476 
   7477 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
   7478 	    vhc->vhc_acc_thrcount != 0) {
   7479 		mutex_exit(&vhc->vhc_lock);
   7480 		delay_random(mdi_delay);
   7481 		mutex_enter(&vhc->vhc_lock);
   7482 	}
   7483 
   7484 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
   7485 
   7486 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
   7487 		acc_next = acc->acc_next;
   7488 		free_async_client_config(acc);
   7489 	}
   7490 	vhc->vhc_acc_list_head = NULL;
   7491 	vhc->vhc_acc_list_tail = NULL;
   7492 	vhc->vhc_acc_count = 0;
   7493 
   7494 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
   7495 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
   7496 		mutex_exit(&vhc->vhc_lock);
   7497 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
   7498 			vhcache_dirty(vhc);
   7499 			return (MDI_FAILURE);
   7500 		}
   7501 	} else
   7502 		mutex_exit(&vhc->vhc_lock);
   7503 
   7504 	if (callb_delete(vhc->vhc_cbid) != 0)
   7505 		return (MDI_FAILURE);
   7506 
   7507 	return (MDI_SUCCESS);
   7508 }
   7509 
   7510 /*
   7511  * Stop vhci cache flush thread
   7512  */
   7513 /* ARGSUSED */
   7514 static boolean_t
   7515 stop_vhcache_flush_thread(void *arg, int code)
   7516 {
   7517 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
   7518 
   7519 	mutex_enter(&vhc->vhc_lock);
   7520 	vhc->vhc_flags |= MDI_VHC_EXIT;
   7521 	cv_broadcast(&vhc->vhc_cv);
   7522 
   7523 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
   7524 		mutex_exit(&vhc->vhc_lock);
   7525 		delay_random(mdi_delay);
   7526 		mutex_enter(&vhc->vhc_lock);
   7527 	}
   7528 
   7529 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
   7530 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
   7531 		mutex_exit(&vhc->vhc_lock);
   7532 		(void) flush_vhcache(vhc, 1);
   7533 	} else
   7534 		mutex_exit(&vhc->vhc_lock);
   7535 
   7536 	return (B_TRUE);
   7537 }
   7538 
   7539 /*
   7540  * Enqueue the vhcache phci (cphci) at the tail of the list
   7541  */
   7542 static void
   7543 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
   7544 {
   7545 	cphci->cphci_next = NULL;
   7546 	if (vhcache->vhcache_phci_head == NULL)
   7547 		vhcache->vhcache_phci_head = cphci;
   7548 	else
   7549 		vhcache->vhcache_phci_tail->cphci_next = cphci;
   7550 	vhcache->vhcache_phci_tail = cphci;
   7551 }
   7552 
   7553 /*
   7554  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
   7555  */
   7556 static void
   7557 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
   7558     mdi_vhcache_pathinfo_t *cpi)
   7559 {
   7560 	cpi->cpi_next = NULL;
   7561 	if (cct->cct_cpi_head == NULL)
   7562 		cct->cct_cpi_head = cpi;
   7563 	else
   7564 		cct->cct_cpi_tail->cpi_next = cpi;
   7565 	cct->cct_cpi_tail = cpi;
   7566 }
   7567 
   7568 /*
   7569  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
   7570  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
   7571  * flag set come at the beginning of the list. All cpis which have this
   7572  * flag set come at the end of the list.
   7573  */
   7574 static void
   7575 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
   7576     mdi_vhcache_pathinfo_t *newcpi)
   7577 {
   7578 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
   7579 
   7580 	if (cct->cct_cpi_head == NULL ||
   7581 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
   7582 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
   7583 	else {
   7584 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
   7585 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
   7586 		    prev_cpi = cpi, cpi = cpi->cpi_next)
   7587 			;
   7588 
   7589 		if (prev_cpi == NULL)
   7590 			cct->cct_cpi_head = newcpi;
   7591 		else
   7592 			prev_cpi->cpi_next = newcpi;
   7593 
   7594 		newcpi->cpi_next = cpi;
   7595 
   7596 		if (cpi == NULL)
   7597 			cct->cct_cpi_tail = newcpi;
   7598 	}
   7599 }
   7600 
   7601 /*
   7602  * Enqueue the vhcache client (cct) at the tail of the list
   7603  */
   7604 static void
   7605 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
   7606     mdi_vhcache_client_t *cct)
   7607 {
   7608 	cct->cct_next = NULL;
   7609 	if (vhcache->vhcache_client_head == NULL)
   7610 		vhcache->vhcache_client_head = cct;
   7611 	else
   7612 		vhcache->vhcache_client_tail->cct_next = cct;
   7613 	vhcache->vhcache_client_tail = cct;
   7614 }
   7615 
   7616 static void
   7617 free_string_array(char **str, int nelem)
   7618 {
   7619 	int i;
   7620 
   7621 	if (str) {
   7622 		for (i = 0; i < nelem; i++) {
   7623 			if (str[i])
   7624 				kmem_free(str[i], strlen(str[i]) + 1);
   7625 		}
   7626 		kmem_free(str, sizeof (char *) * nelem);
   7627 	}
   7628 }
   7629 
   7630 static void
   7631 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
   7632 {
   7633 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
   7634 	kmem_free(cphci, sizeof (*cphci));
   7635 }
   7636 
   7637 static void
   7638 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
   7639 {
   7640 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
   7641 	kmem_free(cpi, sizeof (*cpi));
   7642 }
   7643 
   7644 static void
   7645 free_vhcache_client(mdi_vhcache_client_t *cct)
   7646 {
   7647 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
   7648 	kmem_free(cct, sizeof (*cct));
   7649 }
   7650 
   7651 static char *
   7652 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
   7653 {
   7654 	char *name_addr;
   7655 	int len;
   7656 
   7657 	len = strlen(ct_name) + strlen(ct_addr) + 2;
   7658 	name_addr = kmem_alloc(len, KM_SLEEP);
   7659 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
   7660 
   7661 	if (ret_len)
   7662 		*ret_len = len;
   7663 	return (name_addr);
   7664 }
   7665 
   7666 /*
   7667  * Copy the contents of paddrnvl to vhci cache.
   7668  * paddrnvl nvlist contains path information for a vhci client.
   7669  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
   7670  */
   7671 static void
   7672 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
   7673     mdi_vhcache_client_t *cct)
   7674 {
   7675 	nvpair_t *nvp = NULL;
   7676 	mdi_vhcache_pathinfo_t *cpi;
   7677 	uint_t nelem;
   7678 	uint32_t *val;
   7679 
   7680 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
   7681 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
   7682 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
   7683 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
   7684 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
   7685 		ASSERT(nelem == 2);
   7686 		cpi->cpi_cphci = cphci_list[val[0]];
   7687 		cpi->cpi_flags = val[1];
   7688 		enqueue_tail_vhcache_pathinfo(cct, cpi);
   7689 	}
   7690 }
   7691 
   7692 /*
   7693  * Copy the contents of caddrmapnvl to vhci cache.
   7694  * caddrmapnvl nvlist contains vhci client address to phci client address
   7695  * mappings. See the comment in mainnvl_to_vhcache() for the format of
   7696  * this nvlist.
   7697  */
   7698 static void
   7699 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
   7700     mdi_vhcache_phci_t *cphci_list[])
   7701 {
   7702 	nvpair_t *nvp = NULL;
   7703 	nvlist_t *paddrnvl;
   7704 	mdi_vhcache_client_t *cct;
   7705 
   7706 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
   7707 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
   7708 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
   7709 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
   7710 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
   7711 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
   7712 		/* the client must contain at least one path */
   7713 		ASSERT(cct->cct_cpi_head != NULL);
   7714 
   7715 		enqueue_vhcache_client(vhcache, cct);
   7716 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
   7717 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
   7718 	}
   7719 }
   7720 
   7721 /*
   7722  * Copy the contents of the main nvlist to vhci cache.
   7723  *
   7724  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
   7725  * The nvlist contains the mappings between the vhci client addresses and
   7726  * their corresponding phci client addresses.
   7727  *
   7728  * The structure of the nvlist is as follows:
   7729  *
   7730  * Main nvlist:
   7731  *	NAME		TYPE		DATA
   7732  *	version		int32		version number
   7733  *	phcis		string array	array of phci paths
   7734  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
   7735  *
   7736  * structure of c2paddrs_nvl:
   7737  *	NAME		TYPE		DATA
   7738  *	caddr1		nvlist_t	paddrs_nvl1
   7739  *	caddr2		nvlist_t	paddrs_nvl2
   7740  *	...
   7741  * where caddr1, caddr2, ... are vhci client name and addresses in the
   7742  * form of "<clientname>@<clientaddress>".
   7743  * (for example: "ssd@2000002037cd9f72");
   7744  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
   7745  *
   7746  * structure of paddrs_nvl:
   7747  *	NAME		TYPE		DATA
   7748  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
   7749  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
   7750  *	...
   7751  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
   7752  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
   7753  * phci-ids are integers that identify pHCIs to which the
   7754  * the bus specific address belongs to. These integers are used as an index
   7755  * into to the phcis string array in the main nvlist to get the pHCI path.
   7756  */
   7757 static int
   7758 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
   7759 {
   7760 	char **phcis, **phci_namep;
   7761 	uint_t nphcis;
   7762 	mdi_vhcache_phci_t *cphci, **cphci_list;
   7763 	nvlist_t *caddrmapnvl;
   7764 	int32_t ver;
   7765 	int i;
   7766 	size_t cphci_list_size;
   7767 
   7768 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
   7769 
   7770 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
   7771 	    ver != MDI_VHCI_CACHE_VERSION)
   7772 		return (MDI_FAILURE);
   7773 
   7774 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
   7775 	    &nphcis) != 0)
   7776 		return (MDI_SUCCESS);
   7777 
   7778 	ASSERT(nphcis > 0);
   7779 
   7780 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
   7781 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
   7782 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
   7783 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
   7784 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
   7785 		enqueue_vhcache_phci(vhcache, cphci);
   7786 		cphci_list[i] = cphci;
   7787 	}
   7788 
   7789 	ASSERT(vhcache->vhcache_phci_head != NULL);
   7790 
   7791 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
   7792 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
   7793 
   7794 	kmem_free(cphci_list, cphci_list_size);
   7795 	return (MDI_SUCCESS);
   7796 }
   7797 
   7798 /*
   7799  * Build paddrnvl for the specified client using the information in the
   7800  * vhci cache and add it to the caddrmapnnvl.
   7801  * Returns 0 on success, errno on failure.
   7802  */
   7803 static int
   7804 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
   7805     nvlist_t *caddrmapnvl)
   7806 {
   7807 	mdi_vhcache_pathinfo_t *cpi;
   7808 	nvlist_t *nvl;
   7809 	int err;
   7810 	uint32_t val[2];
   7811 
   7812 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7813 
   7814 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
   7815 		return (err);
   7816 
   7817 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
   7818 		val[0] = cpi->cpi_cphci->cphci_id;
   7819 		val[1] = cpi->cpi_flags;
   7820 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
   7821 		    != 0)
   7822 			goto out;
   7823 	}
   7824 
   7825 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
   7826 out:
   7827 	nvlist_free(nvl);
   7828 	return (err);
   7829 }
   7830 
   7831 /*
   7832  * Build caddrmapnvl using the information in the vhci cache
   7833  * and add it to the mainnvl.
   7834  * Returns 0 on success, errno on failure.
   7835  */
   7836 static int
   7837 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
   7838 {
   7839 	mdi_vhcache_client_t *cct;
   7840 	nvlist_t *nvl;
   7841 	int err;
   7842 
   7843 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7844 
   7845 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
   7846 		return (err);
   7847 
   7848 	for (cct = vhcache->vhcache_client_head; cct != NULL;
   7849 	    cct = cct->cct_next) {
   7850 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
   7851 			goto out;
   7852 	}
   7853 
   7854 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
   7855 out:
   7856 	nvlist_free(nvl);
   7857 	return (err);
   7858 }
   7859 
   7860 /*
   7861  * Build nvlist using the information in the vhci cache.
   7862  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
   7863  * Returns nvl on success, NULL on failure.
   7864  */
   7865 static nvlist_t *
   7866 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
   7867 {
   7868 	mdi_vhcache_phci_t *cphci;
   7869 	uint_t phci_count;
   7870 	char **phcis;
   7871 	nvlist_t *nvl;
   7872 	int err, i;
   7873 
   7874 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
   7875 		nvl = NULL;
   7876 		goto out;
   7877 	}
   7878 
   7879 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
   7880 	    MDI_VHCI_CACHE_VERSION)) != 0)
   7881 		goto out;
   7882 
   7883 	rw_enter(&vhcache->vhcache_lock, RW_READER);
   7884 	if (vhcache->vhcache_phci_head == NULL) {
   7885 		rw_exit(&vhcache->vhcache_lock);
   7886 		return (nvl);
   7887 	}
   7888 
   7889 	phci_count = 0;
   7890 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7891 	    cphci = cphci->cphci_next)
   7892 		cphci->cphci_id = phci_count++;
   7893 
   7894 	/* build phci pathname list */
   7895 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
   7896 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
   7897 	    cphci = cphci->cphci_next, i++)
   7898 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
   7899 
   7900 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
   7901 	    phci_count);
   7902 	free_string_array(phcis, phci_count);
   7903 
   7904 	if (err == 0 &&
   7905 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
   7906 		rw_exit(&vhcache->vhcache_lock);
   7907 		return (nvl);
   7908 	}
   7909 
   7910 	rw_exit(&vhcache->vhcache_lock);
   7911 out:
   7912 	if (nvl)
   7913 		nvlist_free(nvl);
   7914 	return (NULL);
   7915 }
   7916 
   7917 /*
   7918  * Lookup vhcache phci structure for the specified phci path.
   7919  */
   7920 static mdi_vhcache_phci_t *
   7921 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
   7922 {
   7923 	mdi_vhcache_phci_t *cphci;
   7924 
   7925 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7926 
   7927 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7928 	    cphci = cphci->cphci_next) {
   7929 		if (strcmp(cphci->cphci_path, phci_path) == 0)
   7930 			return (cphci);
   7931 	}
   7932 
   7933 	return (NULL);
   7934 }
   7935 
   7936 /*
   7937  * Lookup vhcache phci structure for the specified phci.
   7938  */
   7939 static mdi_vhcache_phci_t *
   7940 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
   7941 {
   7942 	mdi_vhcache_phci_t *cphci;
   7943 
   7944 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7945 
   7946 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7947 	    cphci = cphci->cphci_next) {
   7948 		if (cphci->cphci_phci == ph)
   7949 			return (cphci);
   7950 	}
   7951 
   7952 	return (NULL);
   7953 }
   7954 
   7955 /*
   7956  * Add the specified phci to the vhci cache if not already present.
   7957  */
   7958 static void
   7959 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
   7960 {
   7961 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   7962 	mdi_vhcache_phci_t *cphci;
   7963 	char *pathname;
   7964 	int cache_updated;
   7965 
   7966 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   7967 
   7968 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   7969 	(void) ddi_pathname(ph->ph_dip, pathname);
   7970 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
   7971 	    != NULL) {
   7972 		cphci->cphci_phci = ph;
   7973 		cache_updated = 0;
   7974 	} else {
   7975 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
   7976 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
   7977 		cphci->cphci_phci = ph;
   7978 		enqueue_vhcache_phci(vhcache, cphci);
   7979 		cache_updated = 1;
   7980 	}
   7981 
   7982 	rw_exit(&vhcache->vhcache_lock);
   7983 
   7984 	/*
   7985 	 * Since a new phci has been added, reset
   7986 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
   7987 	 * during next vhcache_discover_paths().
   7988 	 */
   7989 	mutex_enter(&vhc->vhc_lock);
   7990 	vhc->vhc_path_discovery_cutoff_time = 0;
   7991 	mutex_exit(&vhc->vhc_lock);
   7992 
   7993 	kmem_free(pathname, MAXPATHLEN);
   7994 	if (cache_updated)
   7995 		vhcache_dirty(vhc);
   7996 }
   7997 
   7998 /*
   7999  * Remove the reference to the specified phci from the vhci cache.
   8000  */
   8001 static void
   8002 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
   8003 {
   8004 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8005 	mdi_vhcache_phci_t *cphci;
   8006 
   8007 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   8008 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
   8009 		/* do not remove the actual mdi_vhcache_phci structure */
   8010 		cphci->cphci_phci = NULL;
   8011 	}
   8012 	rw_exit(&vhcache->vhcache_lock);
   8013 }
   8014 
   8015 static void
   8016 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
   8017     mdi_vhcache_lookup_token_t *src)
   8018 {
   8019 	if (src == NULL) {
   8020 		dst->lt_cct = NULL;
   8021 		dst->lt_cct_lookup_time = 0;
   8022 	} else {
   8023 		dst->lt_cct = src->lt_cct;
   8024 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
   8025 	}
   8026 }
   8027 
   8028 /*
   8029  * Look up vhcache client for the specified client.
   8030  */
   8031 static mdi_vhcache_client_t *
   8032 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
   8033     mdi_vhcache_lookup_token_t *token)
   8034 {
   8035 	mod_hash_val_t hv;
   8036 	char *name_addr;
   8037 	int len;
   8038 
   8039 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   8040 
   8041 	/*
   8042 	 * If no vhcache clean occurred since the last lookup, we can
   8043 	 * simply return the cct from the last lookup operation.
   8044 	 * It works because ccts are never freed except during the vhcache
   8045 	 * cleanup operation.
   8046 	 */
   8047 	if (token != NULL &&
   8048 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
   8049 		return (token->lt_cct);
   8050 
   8051 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
   8052 	if (mod_hash_find(vhcache->vhcache_client_hash,
   8053 	    (mod_hash_key_t)name_addr, &hv) == 0) {
   8054 		if (token) {
   8055 			token->lt_cct = (mdi_vhcache_client_t *)hv;
   8056 			token->lt_cct_lookup_time = ddi_get_lbolt64();
   8057 		}
   8058 	} else {
   8059 		if (token) {
   8060 			token->lt_cct = NULL;
   8061 			token->lt_cct_lookup_time = 0;
   8062 		}
   8063 		hv = NULL;
   8064 	}
   8065 	kmem_free(name_addr, len);
   8066 	return ((mdi_vhcache_client_t *)hv);
   8067 }
   8068 
   8069 /*
   8070  * Add the specified path to the vhci cache if not already present.
   8071  * Also add the vhcache client for the client corresponding to this path
   8072  * if it doesn't already exist.
   8073  */
   8074 static void
   8075 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
   8076 {
   8077 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8078 	mdi_vhcache_client_t *cct;
   8079 	mdi_vhcache_pathinfo_t *cpi;
   8080 	mdi_phci_t *ph = pip->pi_phci;
   8081 	mdi_client_t *ct = pip->pi_client;
   8082 	int cache_updated = 0;
   8083 
   8084 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   8085 
   8086 	/* if vhcache client for this pip doesn't already exist, add it */
   8087 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
   8088 	    NULL)) == NULL) {
   8089 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
   8090 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
   8091 		    ct->ct_guid, NULL);
   8092 		enqueue_vhcache_client(vhcache, cct);
   8093 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
   8094 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
   8095 		cache_updated = 1;
   8096 	}
   8097 
   8098 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
   8099 		if (cpi->cpi_cphci->cphci_phci == ph &&
   8100 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
   8101 			cpi->cpi_pip = pip;
   8102 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
   8103 				cpi->cpi_flags &=
   8104 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
   8105 				sort_vhcache_paths(cct);
   8106 				cache_updated = 1;
   8107 			}
   8108 			break;
   8109 		}
   8110 	}
   8111 
   8112 	if (cpi == NULL) {
   8113 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
   8114 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
   8115 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
   8116 		ASSERT(cpi->cpi_cphci != NULL);
   8117 		cpi->cpi_pip = pip;
   8118 		enqueue_vhcache_pathinfo(cct, cpi);
   8119 		cache_updated = 1;
   8120 	}
   8121 
   8122 	rw_exit(&vhcache->vhcache_lock);
   8123 
   8124 	if (cache_updated)
   8125 		vhcache_dirty(vhc);
   8126 }
   8127 
   8128 /*
   8129  * Remove the reference to the specified path from the vhci cache.
   8130  */
   8131 static void
   8132 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
   8133 {
   8134 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8135 	mdi_client_t *ct = pip->pi_client;
   8136 	mdi_vhcache_client_t *cct;
   8137 	mdi_vhcache_pathinfo_t *cpi;
   8138 
   8139 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   8140 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
   8141 	    NULL)) != NULL) {
   8142 		for (cpi = cct->cct_cpi_head; cpi != NULL;
   8143 		    cpi = cpi->cpi_next) {
   8144 			if (cpi->cpi_pip == pip) {
   8145 				cpi->cpi_pip = NULL;
   8146 				break;
   8147 			}
   8148 		}
   8149 	}
   8150 	rw_exit(&vhcache->vhcache_lock);
   8151 }
   8152 
   8153 /*
   8154  * Flush the vhci cache to disk.
   8155  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
   8156  */
   8157 static int
   8158 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
   8159 {
   8160 	nvlist_t *nvl;
   8161 	int err;
   8162 	int rv;
   8163 
   8164 	/*
   8165 	 * It is possible that the system may shutdown before
   8166 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
   8167 	 * flushing the cache in this case do not check for
   8168 	 * i_ddi_io_initialized when force flag is set.
   8169 	 */
   8170 	if (force_flag == 0 && !i_ddi_io_initialized())
   8171 		return (MDI_FAILURE);
   8172 
   8173 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
   8174 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
   8175 		nvlist_free(nvl);
   8176 	} else
   8177 		err = EFAULT;
   8178 
   8179 	rv = MDI_SUCCESS;
   8180 	mutex_enter(&vhc->vhc_lock);
   8181 	if (err != 0) {
   8182 		if (err == EROFS) {
   8183 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
   8184 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
   8185 			    MDI_VHC_VHCACHE_DIRTY);
   8186 		} else {
   8187 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
   8188 				cmn_err(CE_CONT, "%s: update failed\n",
   8189 				    vhc->vhc_vhcache_filename);
   8190 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
   8191 			}
   8192 			rv = MDI_FAILURE;
   8193 		}
   8194 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
   8195 		cmn_err(CE_CONT,
   8196 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
   8197 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
   8198 	}
   8199 	mutex_exit(&vhc->vhc_lock);
   8200 
   8201 	return (rv);
   8202 }
   8203 
   8204 /*
   8205  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
   8206  * Exits itself if left idle for the idle timeout period.
   8207  */
   8208 static void
   8209 vhcache_flush_thread(void *arg)
   8210 {
   8211 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
   8212 	clock_t idle_time, quit_at_ticks;
   8213 	callb_cpr_t cprinfo;
   8214 
   8215 	/* number of seconds to sleep idle before exiting */
   8216 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
   8217 
   8218 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
   8219 	    "mdi_vhcache_flush");
   8220 	mutex_enter(&vhc->vhc_lock);
   8221 	for (; ; ) {
   8222 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
   8223 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
   8224 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
   8225 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
   8226 				(void) cv_timedwait(&vhc->vhc_cv,
   8227 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
   8228 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
   8229 			} else {
   8230 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
   8231 				mutex_exit(&vhc->vhc_lock);
   8232 
   8233 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
   8234 					vhcache_dirty(vhc);
   8235 
   8236 				mutex_enter(&vhc->vhc_lock);
   8237 			}
   8238 		}
   8239 
   8240 		quit_at_ticks = ddi_get_lbolt() + idle_time;
   8241 
   8242 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
   8243 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
   8244 		    ddi_get_lbolt() < quit_at_ticks) {
   8245 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
   8246 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
   8247 			    quit_at_ticks);
   8248 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
   8249 		}
   8250 
   8251 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
   8252 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
   8253 			goto out;
   8254 	}
   8255 
   8256 out:
   8257 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
   8258 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
   8259 	CALLB_CPR_EXIT(&cprinfo);
   8260 }
   8261 
   8262 /*
   8263  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
   8264  */
   8265 static void
   8266 vhcache_dirty(mdi_vhci_config_t *vhc)
   8267 {
   8268 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8269 	int create_thread;
   8270 
   8271 	rw_enter(&vhcache->vhcache_lock, RW_READER);
   8272 	/* do not flush cache until the cache is fully built */
   8273 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
   8274 		rw_exit(&vhcache->vhcache_lock);
   8275 		return;
   8276 	}
   8277 	rw_exit(&vhcache->vhcache_lock);
   8278 
   8279 	mutex_enter(&vhc->vhc_lock);
   8280 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
   8281 		mutex_exit(&vhc->vhc_lock);
   8282 		return;
   8283 	}
   8284 
   8285 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
   8286 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
   8287 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
   8288 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
   8289 		cv_broadcast(&vhc->vhc_cv);
   8290 		create_thread = 0;
   8291 	} else {
   8292 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
   8293 		create_thread = 1;
   8294 	}
   8295 	mutex_exit(&vhc->vhc_lock);
   8296 
   8297 	if (create_thread)
   8298 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
   8299 		    0, &p0, TS_RUN, minclsyspri);
   8300 }
   8301 
   8302 /*
   8303  * phci bus config structure - one for for each phci bus config operation that
   8304  * we initiate on behalf of a vhci.
   8305  */
   8306 typedef struct mdi_phci_bus_config_s {
   8307 	char *phbc_phci_path;
   8308 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
   8309 	struct mdi_phci_bus_config_s *phbc_next;
   8310 } mdi_phci_bus_config_t;
   8311 
   8312 /* vhci bus config structure - one for each vhci bus config operation */
   8313 typedef struct mdi_vhci_bus_config_s {
   8314 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
   8315 	major_t vhbc_op_major;		/* bus config op major */
   8316 	uint_t vhbc_op_flags;		/* bus config op flags */
   8317 	kmutex_t vhbc_lock;
   8318 	kcondvar_t vhbc_cv;
   8319 	int vhbc_thr_count;
   8320 } mdi_vhci_bus_config_t;
   8321 
   8322 /*
   8323  * bus config the specified phci
   8324  */
   8325 static void
   8326 bus_config_phci(void *arg)
   8327 {
   8328 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
   8329 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
   8330 	dev_info_t *ph_dip;
   8331 
   8332 	/*
   8333 	 * first configure all path components upto phci and then configure
   8334 	 * the phci children.
   8335 	 */
   8336 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
   8337 	    != NULL) {
   8338 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
   8339 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
   8340 			(void) ndi_devi_config_driver(ph_dip,
   8341 			    vhbc->vhbc_op_flags,
   8342 			    vhbc->vhbc_op_major);
   8343 		} else
   8344 			(void) ndi_devi_config(ph_dip,
   8345 			    vhbc->vhbc_op_flags);
   8346 
   8347 		/* release the hold that e_ddi_hold_devi_by_path() placed */
   8348 		ndi_rele_devi(ph_dip);
   8349 	}
   8350 
   8351 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
   8352 	kmem_free(phbc, sizeof (*phbc));
   8353 
   8354 	mutex_enter(&vhbc->vhbc_lock);
   8355 	vhbc->vhbc_thr_count--;
   8356 	if (vhbc->vhbc_thr_count == 0)
   8357 		cv_broadcast(&vhbc->vhbc_cv);
   8358 	mutex_exit(&vhbc->vhbc_lock);
   8359 }
   8360 
   8361 /*
   8362  * Bus config all phcis associated with the vhci in parallel.
   8363  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
   8364  */
   8365 static void
   8366 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
   8367     ddi_bus_config_op_t op, major_t maj)
   8368 {
   8369 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
   8370 	mdi_vhci_bus_config_t *vhbc;
   8371 	mdi_vhcache_phci_t *cphci;
   8372 
   8373 	rw_enter(&vhcache->vhcache_lock, RW_READER);
   8374 	if (vhcache->vhcache_phci_head == NULL) {
   8375 		rw_exit(&vhcache->vhcache_lock);
   8376 		return;
   8377 	}
   8378 
   8379 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
   8380 
   8381 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   8382 	    cphci = cphci->cphci_next) {
   8383 		/* skip phcis that haven't attached before root is available */
   8384 		if (!modrootloaded && (cphci->cphci_phci == NULL))
   8385 			continue;
   8386 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
   8387 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
   8388 		    KM_SLEEP);
   8389 		phbc->phbc_vhbusconfig = vhbc;
   8390 		phbc->phbc_next = phbc_head;
   8391 		phbc_head = phbc;
   8392 		vhbc->vhbc_thr_count++;
   8393 	}
   8394 	rw_exit(&vhcache->vhcache_lock);
   8395 
   8396 	vhbc->vhbc_op = op;
   8397 	vhbc->vhbc_op_major = maj;
   8398 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
   8399 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
   8400 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
   8401 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
   8402 
   8403 	/* now create threads to initiate bus config on all phcis in parallel */
   8404 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
   8405 		phbc_next = phbc->phbc_next;
   8406 		if (mdi_mtc_off)
   8407 			bus_config_phci((void *)phbc);
   8408 		else
   8409 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
   8410 			    0, &p0, TS_RUN, minclsyspri);
   8411 	}
   8412 
   8413 	mutex_enter(&vhbc->vhbc_lock);
   8414 	/* wait until all threads exit */
   8415 	while (vhbc->vhbc_thr_count > 0)
   8416 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
   8417 	mutex_exit(&vhbc->vhbc_lock);
   8418 
   8419 	mutex_destroy(&vhbc->vhbc_lock);
   8420 	cv_destroy(&vhbc->vhbc_cv);
   8421 	kmem_free(vhbc, sizeof (*vhbc));
   8422 }
   8423 
   8424 /*
   8425  * Single threaded version of bus_config_all_phcis()
   8426  */
   8427 static void
   8428 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
   8429     ddi_bus_config_op_t op, major_t maj)
   8430 {
   8431 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8432 
   8433 	single_threaded_vhconfig_enter(vhc);
   8434 	bus_config_all_phcis(vhcache, flags, op, maj);
   8435 	single_threaded_vhconfig_exit(vhc);
   8436 }
   8437 
   8438 /*
   8439  * Perform BUS_CONFIG_ONE on the specified child of the phci.
   8440  * The path includes the child component in addition to the phci path.
   8441  */
   8442 static int
   8443 bus_config_one_phci_child(char *path)
   8444 {
   8445 	dev_info_t *ph_dip, *child;
   8446 	char *devnm;
   8447 	int rv = MDI_FAILURE;
   8448 
   8449 	/* extract the child component of the phci */
   8450 	devnm = strrchr(path, '/');
   8451 	*devnm++ = '\0';
   8452 
   8453 	/*
   8454 	 * first configure all path components upto phci and then
   8455 	 * configure the phci child.
   8456 	 */
   8457 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
   8458 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
   8459 		    NDI_SUCCESS) {
   8460 			/*
   8461 			 * release the hold that ndi_devi_config_one() placed
   8462 			 */
   8463 			ndi_rele_devi(child);
   8464 			rv = MDI_SUCCESS;
   8465 		}
   8466 
   8467 		/* release the hold that e_ddi_hold_devi_by_path() placed */
   8468 		ndi_rele_devi(ph_dip);
   8469 	}
   8470 
   8471 	devnm--;
   8472 	*devnm = '/';
   8473 	return (rv);
   8474 }
   8475 
   8476 /*
   8477  * Build a list of phci client paths for the specified vhci client.
   8478  * The list includes only those phci client paths which aren't configured yet.
   8479  */
   8480 static mdi_phys_path_t *
   8481 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
   8482 {
   8483 	mdi_vhcache_pathinfo_t *cpi;
   8484 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
   8485 	int config_path, len;
   8486 
   8487 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
   8488 		/*
   8489 		 * include only those paths that aren't configured.
   8490 		 */
   8491 		config_path = 0;
   8492 		if (cpi->cpi_pip == NULL)
   8493 			config_path = 1;
   8494 		else {
   8495 			MDI_PI_LOCK(cpi->cpi_pip);
   8496 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
   8497 				config_path = 1;
   8498 			MDI_PI_UNLOCK(cpi->cpi_pip);
   8499 		}
   8500 
   8501 		if (config_path) {
   8502 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
   8503 			len = strlen(cpi->cpi_cphci->cphci_path) +
   8504 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
   8505 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
   8506 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
   8507 			    cpi->cpi_cphci->cphci_path, ct_name,
   8508 			    cpi->cpi_addr);
   8509 			pp->phys_path_next = NULL;
   8510 
   8511 			if (pp_head == NULL)
   8512 				pp_head = pp;
   8513 			else
   8514 				pp_tail->phys_path_next = pp;
   8515 			pp_tail = pp;
   8516 		}
   8517 	}
   8518 
   8519 	return (pp_head);
   8520 }
   8521 
   8522 /*
   8523  * Free the memory allocated for phci client path list.
   8524  */
   8525 static void
   8526 free_phclient_path_list(mdi_phys_path_t *pp_head)
   8527 {
   8528 	mdi_phys_path_t *pp, *pp_next;
   8529 
   8530 	for (pp = pp_head; pp != NULL; pp = pp_next) {
   8531 		pp_next = pp->phys_path_next;
   8532 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
   8533 		kmem_free(pp, sizeof (*pp));
   8534 	}
   8535 }
   8536 
   8537 /*
   8538  * Allocated async client structure and initialize with the specified values.
   8539  */
   8540 static mdi_async_client_config_t *
   8541 alloc_async_client_config(char *ct_name, char *ct_addr,
   8542     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
   8543 {
   8544 	mdi_async_client_config_t *acc;
   8545 
   8546 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
   8547 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
   8548 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
   8549 	acc->acc_phclient_path_list_head = pp_head;
   8550 	init_vhcache_lookup_token(&acc->acc_token, tok);
   8551 	acc->acc_next = NULL;
   8552 	return (acc);
   8553 }
   8554 
   8555 /*
   8556  * Free the memory allocated for the async client structure and their members.
   8557  */
   8558 static void
   8559 free_async_client_config(mdi_async_client_config_t *acc)
   8560 {
   8561 	if (acc->acc_phclient_path_list_head)
   8562 		free_phclient_path_list(acc->acc_phclient_path_list_head);
   8563 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
   8564 	kmem_free(acc->acc_ct_addr,