Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
     28  * detailed discussion of the overall mpxio architecture.
     29  *
     30  * Default locking order:
     31  *
     32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
     33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
     34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
     35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
     36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
     37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
     38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
     39  */
     40 
     41 #include <sys/note.h>
     42 #include <sys/types.h>
     43 #include <sys/varargs.h>
     44 #include <sys/param.h>
     45 #include <sys/errno.h>
     46 #include <sys/uio.h>
     47 #include <sys/buf.h>
     48 #include <sys/modctl.h>
     49 #include <sys/open.h>
     50 #include <sys/kmem.h>
     51 #include <sys/poll.h>
     52 #include <sys/conf.h>
     53 #include <sys/bootconf.h>
     54 #include <sys/cmn_err.h>
     55 #include <sys/stat.h>
     56 #include <sys/ddi.h>
     57 #include <sys/sunddi.h>
     58 #include <sys/ddipropdefs.h>
     59 #include <sys/sunndi.h>
     60 #include <sys/ndi_impldefs.h>
     61 #include <sys/promif.h>
     62 #include <sys/sunmdi.h>
     63 #include <sys/mdi_impldefs.h>
     64 #include <sys/taskq.h>
     65 #include <sys/epm.h>
     66 #include <sys/sunpm.h>
     67 #include <sys/modhash.h>
     68 #include <sys/disp.h>
     69 #include <sys/autoconf.h>
     70 #include <sys/sysmacros.h>
     71 
     72 #ifdef	DEBUG
     73 #include <sys/debug.h>
     74 int	mdi_debug = 1;
     75 int	mdi_debug_logonly = 0;
     76 #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
     77 #define	MDI_WARN	CE_WARN, __func__
     78 #define	MDI_NOTE	CE_NOTE, __func__
     79 #define	MDI_CONT	CE_CONT, __func__
     80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
     81 #else	/* !DEBUG */
     82 #define	MDI_DEBUG(dbglevel, pargs)
     83 #endif	/* DEBUG */
     84 int	mdi_debug_consoleonly = 0;
     85 int	mdi_delay = 3;
     86 
     87 extern pri_t	minclsyspri;
     88 extern int	modrootloaded;
     89 
     90 /*
     91  * Global mutex:
     92  * Protects vHCI list and structure members.
     93  */
     94 kmutex_t	mdi_mutex;
     95 
     96 /*
     97  * Registered vHCI class driver lists
     98  */
     99 int		mdi_vhci_count;
    100 mdi_vhci_t	*mdi_vhci_head;
    101 mdi_vhci_t	*mdi_vhci_tail;
    102 
    103 /*
    104  * Client Hash Table size
    105  */
    106 static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
    107 
    108 /*
    109  * taskq interface definitions
    110  */
    111 #define	MDI_TASKQ_N_THREADS	8
    112 #define	MDI_TASKQ_PRI		minclsyspri
    113 #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
    114 #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
    115 
    116 taskq_t				*mdi_taskq;
    117 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
    118 
    119 #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
    120 
    121 /*
    122  * The data should be "quiet" for this interval (in seconds) before the
    123  * vhci cached data is flushed to the disk.
    124  */
    125 static int mdi_vhcache_flush_delay = 10;
    126 
    127 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
    128 static int mdi_vhcache_flush_daemon_idle_time = 60;
    129 
    130 /*
    131  * MDI falls back to discovery of all paths when a bus_config_one fails.
    132  * The following parameters can be used to tune this operation.
    133  *
    134  * mdi_path_discovery_boot
    135  *	Number of times path discovery will be attempted during early boot.
    136  *	Probably there is no reason to ever set this value to greater than one.
    137  *
    138  * mdi_path_discovery_postboot
    139  *	Number of times path discovery will be attempted after early boot.
    140  *	Set it to a minimum of two to allow for discovery of iscsi paths which
    141  *	may happen very late during booting.
    142  *
    143  * mdi_path_discovery_interval
    144  *	Minimum number of seconds MDI will wait between successive discovery
    145  *	of all paths. Set it to -1 to disable discovery of all paths.
    146  */
    147 static int mdi_path_discovery_boot = 1;
    148 static int mdi_path_discovery_postboot = 2;
    149 static int mdi_path_discovery_interval = 10;
    150 
    151 /*
    152  * number of seconds the asynchronous configuration thread will sleep idle
    153  * before exiting.
    154  */
    155 static int mdi_async_config_idle_time = 600;
    156 
    157 static int mdi_bus_config_cache_hash_size = 256;
    158 
    159 /* turns off multithreaded configuration for certain operations */
    160 static int mdi_mtc_off = 0;
    161 
    162 /*
    163  * The "path" to a pathinfo node is identical to the /devices path to a
    164  * devinfo node had the device been enumerated under a pHCI instead of
    165  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
    166  * This association persists across create/delete of the pathinfo nodes,
    167  * but not across reboot.
    168  */
    169 static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
    170 static int		mdi_pathmap_hash_size = 256;
    171 static kmutex_t		mdi_pathmap_mutex;
    172 static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
    173 static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
    174 static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
    175 
    176 /*
    177  * MDI component property name/value string definitions
    178  */
    179 const char 		*mdi_component_prop = "mpxio-component";
    180 const char		*mdi_component_prop_vhci = "vhci";
    181 const char		*mdi_component_prop_phci = "phci";
    182 const char		*mdi_component_prop_client = "client";
    183 
    184 /*
    185  * MDI client global unique identifier property name
    186  */
    187 const char		*mdi_client_guid_prop = "client-guid";
    188 
    189 /*
    190  * MDI client load balancing property name/value string definitions
    191  */
    192 const char		*mdi_load_balance = "load-balance";
    193 const char		*mdi_load_balance_none = "none";
    194 const char		*mdi_load_balance_rr = "round-robin";
    195 const char		*mdi_load_balance_lba = "logical-block";
    196 
    197 /*
    198  * Obsolete vHCI class definition; to be removed after Leadville update
    199  */
    200 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
    201 
    202 static char vhci_greeting[] =
    203 	"\tThere already exists one vHCI driver for class %s\n"
    204 	"\tOnly one vHCI driver for each class is allowed\n";
    205 
    206 /*
    207  * Static function prototypes
    208  */
    209 static int		i_mdi_phci_offline(dev_info_t *, uint_t);
    210 static int		i_mdi_client_offline(dev_info_t *, uint_t);
    211 static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
    212 static void		i_mdi_phci_post_detach(dev_info_t *,
    213 			    ddi_detach_cmd_t, int);
    214 static int		i_mdi_client_pre_detach(dev_info_t *,
    215 			    ddi_detach_cmd_t);
    216 static void		i_mdi_client_post_detach(dev_info_t *,
    217 			    ddi_detach_cmd_t, int);
    218 static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
    219 static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
    220 static int 		i_mdi_lba_lb(mdi_client_t *ct,
    221 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
    222 static void		i_mdi_pm_hold_client(mdi_client_t *, int);
    223 static void		i_mdi_pm_rele_client(mdi_client_t *, int);
    224 static void		i_mdi_pm_reset_client(mdi_client_t *);
    225 static int		i_mdi_power_all_phci(mdi_client_t *);
    226 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
    227 
    228 
    229 /*
    230  * Internal mdi_pathinfo node functions
    231  */
    232 static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
    233 
    234 static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
    235 static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
    236 static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
    237 static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
    238 static void		i_mdi_phci_unlock(mdi_phci_t *);
    239 static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
    240 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
    241 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
    242 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
    243 			    mdi_client_t *);
    244 static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
    245 static void		i_mdi_client_remove_path(mdi_client_t *,
    246 			    mdi_pathinfo_t *);
    247 
    248 static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
    249 			    mdi_pathinfo_state_t, int);
    250 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
    251 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
    252 			    char **, int);
    253 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
    254 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
    255 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
    256 static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
    257 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
    258 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
    259 static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
    260 static void		i_mdi_client_update_state(mdi_client_t *);
    261 static int		i_mdi_client_compute_state(mdi_client_t *,
    262 			    mdi_phci_t *);
    263 static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
    264 static void		i_mdi_client_unlock(mdi_client_t *);
    265 static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
    266 static mdi_client_t	*i_devi_get_client(dev_info_t *);
    267 /*
    268  * NOTE: this will be removed once the NWS files are changed to use the new
    269  * mdi_{enable,disable}_path interfaces
    270  */
    271 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
    272 				int, int);
    273 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
    274 				mdi_vhci_t *vh, int flags, int op);
    275 /*
    276  * Failover related function prototypes
    277  */
    278 static int		i_mdi_failover(void *);
    279 
    280 /*
    281  * misc internal functions
    282  */
    283 static int		i_mdi_get_hash_key(char *);
    284 static int		i_map_nvlist_error_to_mdi(int);
    285 static void		i_mdi_report_path_state(mdi_client_t *,
    286 			    mdi_pathinfo_t *);
    287 
    288 static void		setup_vhci_cache(mdi_vhci_t *);
    289 static int		destroy_vhci_cache(mdi_vhci_t *);
    290 static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
    291 static boolean_t	stop_vhcache_flush_thread(void *, int);
    292 static void		free_string_array(char **, int);
    293 static void		free_vhcache_phci(mdi_vhcache_phci_t *);
    294 static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
    295 static void		free_vhcache_client(mdi_vhcache_client_t *);
    296 static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
    297 static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
    298 static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
    299 static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
    300 static void		vhcache_pi_add(mdi_vhci_config_t *,
    301 			    struct mdi_pathinfo *);
    302 static void		vhcache_pi_remove(mdi_vhci_config_t *,
    303 			    struct mdi_pathinfo *);
    304 static void		free_phclient_path_list(mdi_phys_path_t *);
    305 static void		sort_vhcache_paths(mdi_vhcache_client_t *);
    306 static int		flush_vhcache(mdi_vhci_config_t *, int);
    307 static void		vhcache_dirty(mdi_vhci_config_t *);
    308 static void		free_async_client_config(mdi_async_client_config_t *);
    309 static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
    310 static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
    311 static nvlist_t		*read_on_disk_vhci_cache(char *);
    312 extern int		fread_nvlist(char *, nvlist_t **);
    313 extern int		fwrite_nvlist(char *, nvlist_t *);
    314 
    315 /* called once when first vhci registers with mdi */
    316 static void
    317 i_mdi_init()
    318 {
    319 	static int initialized = 0;
    320 
    321 	if (initialized)
    322 		return;
    323 	initialized = 1;
    324 
    325 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
    326 
    327 	/* Create our taskq resources */
    328 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
    329 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
    330 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
    331 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
    332 
    333 	/* Allocate ['path_instance' <-> "path"] maps */
    334 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
    335 	mdi_pathmap_bypath = mod_hash_create_strhash(
    336 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
    337 	    mod_hash_null_valdtor);
    338 	mdi_pathmap_byinstance = mod_hash_create_idhash(
    339 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
    340 	    mod_hash_null_valdtor);
    341 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
    342 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
    343 	    mod_hash_null_valdtor);
    344 }
    345 
    346 /*
    347  * mdi_get_component_type():
    348  *		Return mpxio component type
    349  * Return Values:
    350  *		MDI_COMPONENT_NONE
    351  *		MDI_COMPONENT_VHCI
    352  *		MDI_COMPONENT_PHCI
    353  *		MDI_COMPONENT_CLIENT
    354  * XXX This doesn't work under multi-level MPxIO and should be
    355  *	removed when clients migrate mdi_component_is_*() interfaces.
    356  */
    357 int
    358 mdi_get_component_type(dev_info_t *dip)
    359 {
    360 	return (DEVI(dip)->devi_mdi_component);
    361 }
    362 
    363 /*
    364  * mdi_vhci_register():
    365  *		Register a vHCI module with the mpxio framework
    366  *		mdi_vhci_register() is called by vHCI drivers to register the
    367  *		'class_driver' vHCI driver and its MDI entrypoints with the
    368  *		mpxio framework.  The vHCI driver must call this interface as
    369  *		part of its attach(9e) handler.
    370  *		Competing threads may try to attach mdi_vhci_register() as
    371  *		the vHCI drivers are loaded and attached as a result of pHCI
    372  *		driver instance registration (mdi_phci_register()) with the
    373  *		framework.
    374  * Return Values:
    375  *		MDI_SUCCESS
    376  *		MDI_FAILURE
    377  */
    378 /*ARGSUSED*/
    379 int
    380 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
    381     int flags)
    382 {
    383 	mdi_vhci_t		*vh = NULL;
    384 
    385 	/* Registrant can't be older */
    386 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
    387 
    388 #ifdef DEBUG
    389 	/*
    390 	 * IB nexus driver is loaded only when IB hardware is present.
    391 	 * In order to be able to do this there is a need to drive the loading
    392 	 * and attaching of the IB nexus driver (especially when an IB hardware
    393 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
    394 	 * is being attached. Unfortunately this gets into the limitations
    395 	 * of devfs as there seems to be no clean way to drive configuration
    396 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
    397 	 * for IB.
    398 	 */
    399 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
    400 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
    401 #endif
    402 
    403 	i_mdi_init();
    404 
    405 	mutex_enter(&mdi_mutex);
    406 	/*
    407 	 * Scan for already registered vhci
    408 	 */
    409 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
    410 		if (strcmp(vh->vh_class, class) == 0) {
    411 			/*
    412 			 * vHCI has already been created.  Check for valid
    413 			 * vHCI ops registration.  We only support one vHCI
    414 			 * module per class
    415 			 */
    416 			if (vh->vh_ops != NULL) {
    417 				mutex_exit(&mdi_mutex);
    418 				cmn_err(CE_NOTE, vhci_greeting, class);
    419 				return (MDI_FAILURE);
    420 			}
    421 			break;
    422 		}
    423 	}
    424 
    425 	/*
    426 	 * if not yet created, create the vHCI component
    427 	 */
    428 	if (vh == NULL) {
    429 		struct client_hash	*hash = NULL;
    430 		char			*load_balance;
    431 
    432 		/*
    433 		 * Allocate and initialize the mdi extensions
    434 		 */
    435 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
    436 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
    437 		    KM_SLEEP);
    438 		vh->vh_client_table = hash;
    439 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
    440 		(void) strcpy(vh->vh_class, class);
    441 		vh->vh_lb = LOAD_BALANCE_RR;
    442 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
    443 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
    444 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
    445 				vh->vh_lb = LOAD_BALANCE_NONE;
    446 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
    447 				    == 0) {
    448 				vh->vh_lb = LOAD_BALANCE_LBA;
    449 			}
    450 			ddi_prop_free(load_balance);
    451 		}
    452 
    453 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
    454 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
    455 
    456 		/*
    457 		 * Store the vHCI ops vectors
    458 		 */
    459 		vh->vh_dip = vdip;
    460 		vh->vh_ops = vops;
    461 
    462 		setup_vhci_cache(vh);
    463 
    464 		if (mdi_vhci_head == NULL) {
    465 			mdi_vhci_head = vh;
    466 		}
    467 		if (mdi_vhci_tail) {
    468 			mdi_vhci_tail->vh_next = vh;
    469 		}
    470 		mdi_vhci_tail = vh;
    471 		mdi_vhci_count++;
    472 	}
    473 
    474 	/*
    475 	 * Claim the devfs node as a vhci component
    476 	 */
    477 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
    478 
    479 	/*
    480 	 * Initialize our back reference from dev_info node
    481 	 */
    482 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
    483 	mutex_exit(&mdi_mutex);
    484 	return (MDI_SUCCESS);
    485 }
    486 
    487 /*
    488  * mdi_vhci_unregister():
    489  *		Unregister a vHCI module from mpxio framework
    490  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
    491  * 		of a vhci to unregister it from the framework.
    492  * Return Values:
    493  *		MDI_SUCCESS
    494  *		MDI_FAILURE
    495  */
    496 /*ARGSUSED*/
    497 int
    498 mdi_vhci_unregister(dev_info_t *vdip, int flags)
    499 {
    500 	mdi_vhci_t	*found, *vh, *prev = NULL;
    501 
    502 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
    503 
    504 	/*
    505 	 * Check for invalid VHCI
    506 	 */
    507 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
    508 		return (MDI_FAILURE);
    509 
    510 	/*
    511 	 * Scan the list of registered vHCIs for a match
    512 	 */
    513 	mutex_enter(&mdi_mutex);
    514 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
    515 		if (found == vh)
    516 			break;
    517 		prev = found;
    518 	}
    519 
    520 	if (found == NULL) {
    521 		mutex_exit(&mdi_mutex);
    522 		return (MDI_FAILURE);
    523 	}
    524 
    525 	/*
    526 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
    527 	 * should have been unregistered, before a vHCI can be
    528 	 * unregistered.
    529 	 */
    530 	MDI_VHCI_PHCI_LOCK(vh);
    531 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
    532 		MDI_VHCI_PHCI_UNLOCK(vh);
    533 		mutex_exit(&mdi_mutex);
    534 		return (MDI_FAILURE);
    535 	}
    536 	MDI_VHCI_PHCI_UNLOCK(vh);
    537 
    538 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
    539 		mutex_exit(&mdi_mutex);
    540 		return (MDI_FAILURE);
    541 	}
    542 
    543 	/*
    544 	 * Remove the vHCI from the global list
    545 	 */
    546 	if (vh == mdi_vhci_head) {
    547 		mdi_vhci_head = vh->vh_next;
    548 	} else {
    549 		prev->vh_next = vh->vh_next;
    550 	}
    551 	if (vh == mdi_vhci_tail) {
    552 		mdi_vhci_tail = prev;
    553 	}
    554 	mdi_vhci_count--;
    555 	mutex_exit(&mdi_mutex);
    556 
    557 	vh->vh_ops = NULL;
    558 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
    559 	DEVI(vdip)->devi_mdi_xhci = NULL;
    560 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
    561 	kmem_free(vh->vh_client_table,
    562 	    mdi_client_table_size * sizeof (struct client_hash));
    563 	mutex_destroy(&vh->vh_phci_mutex);
    564 	mutex_destroy(&vh->vh_client_mutex);
    565 
    566 	kmem_free(vh, sizeof (mdi_vhci_t));
    567 	return (MDI_SUCCESS);
    568 }
    569 
    570 /*
    571  * i_mdi_vhci_class2vhci():
    572  *		Look for a matching vHCI module given a vHCI class name
    573  * Return Values:
    574  *		Handle to a vHCI component
    575  *		NULL
    576  */
    577 static mdi_vhci_t *
    578 i_mdi_vhci_class2vhci(char *class)
    579 {
    580 	mdi_vhci_t	*vh = NULL;
    581 
    582 	ASSERT(!MUTEX_HELD(&mdi_mutex));
    583 
    584 	mutex_enter(&mdi_mutex);
    585 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
    586 		if (strcmp(vh->vh_class, class) == 0) {
    587 			break;
    588 		}
    589 	}
    590 	mutex_exit(&mdi_mutex);
    591 	return (vh);
    592 }
    593 
    594 /*
    595  * i_devi_get_vhci():
    596  *		Utility function to get the handle to a vHCI component
    597  * Return Values:
    598  *		Handle to a vHCI component
    599  *		NULL
    600  */
    601 mdi_vhci_t *
    602 i_devi_get_vhci(dev_info_t *vdip)
    603 {
    604 	mdi_vhci_t	*vh = NULL;
    605 	if (MDI_VHCI(vdip)) {
    606 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
    607 	}
    608 	return (vh);
    609 }
    610 
    611 /*
    612  * mdi_phci_register():
    613  *		Register a pHCI module with mpxio framework
    614  *		mdi_phci_register() is called by pHCI drivers to register with
    615  *		the mpxio framework and a specific 'class_driver' vHCI.  The
    616  *		pHCI driver must call this interface as part of its attach(9e)
    617  *		handler.
    618  * Return Values:
    619  *		MDI_SUCCESS
    620  *		MDI_FAILURE
    621  */
    622 /*ARGSUSED*/
    623 int
    624 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
    625 {
    626 	mdi_phci_t		*ph;
    627 	mdi_vhci_t		*vh;
    628 	char			*data;
    629 
    630 	/*
    631 	 * Some subsystems, like fcp, perform pHCI registration from a
    632 	 * different thread than the one doing the pHCI attach(9E) - the
    633 	 * driver attach code is waiting for this other thread to complete.
    634 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
    635 	 * (indicating that some thread has done an ndi_devi_enter of parent)
    636 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
    637 	 */
    638 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
    639 
    640 	/*
    641 	 * Check for mpxio-disable property. Enable mpxio if the property is
    642 	 * missing or not set to "yes".
    643 	 * If the property is set to "yes" then emit a brief message.
    644 	 */
    645 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
    646 	    &data) == DDI_SUCCESS)) {
    647 		if (strcmp(data, "yes") == 0) {
    648 			MDI_DEBUG(1, (MDI_CONT, pdip,
    649 			    "?multipath capabilities disabled via %s.conf.",
    650 			    ddi_driver_name(pdip)));
    651 			ddi_prop_free(data);
    652 			return (MDI_FAILURE);
    653 		}
    654 		ddi_prop_free(data);
    655 	}
    656 
    657 	/*
    658 	 * Search for a matching vHCI
    659 	 */
    660 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
    661 	if (vh == NULL) {
    662 		return (MDI_FAILURE);
    663 	}
    664 
    665 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
    666 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
    667 	ph->ph_dip = pdip;
    668 	ph->ph_vhci = vh;
    669 	ph->ph_next = NULL;
    670 	ph->ph_unstable = 0;
    671 	ph->ph_vprivate = 0;
    672 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
    673 
    674 	MDI_PHCI_LOCK(ph);
    675 	MDI_PHCI_SET_POWER_UP(ph);
    676 	MDI_PHCI_UNLOCK(ph);
    677 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
    678 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
    679 
    680 	vhcache_phci_add(vh->vh_config, ph);
    681 
    682 	MDI_VHCI_PHCI_LOCK(vh);
    683 	if (vh->vh_phci_head == NULL) {
    684 		vh->vh_phci_head = ph;
    685 	}
    686 	if (vh->vh_phci_tail) {
    687 		vh->vh_phci_tail->ph_next = ph;
    688 	}
    689 	vh->vh_phci_tail = ph;
    690 	vh->vh_phci_count++;
    691 	MDI_VHCI_PHCI_UNLOCK(vh);
    692 
    693 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
    694 	return (MDI_SUCCESS);
    695 }
    696 
    697 /*
    698  * mdi_phci_unregister():
    699  *		Unregister a pHCI module from mpxio framework
    700  *		mdi_phci_unregister() is called by the pHCI drivers from their
    701  *		detach(9E) handler to unregister their instances from the
    702  *		framework.
    703  * Return Values:
    704  *		MDI_SUCCESS
    705  *		MDI_FAILURE
    706  */
    707 /*ARGSUSED*/
    708 int
    709 mdi_phci_unregister(dev_info_t *pdip, int flags)
    710 {
    711 	mdi_vhci_t		*vh;
    712 	mdi_phci_t		*ph;
    713 	mdi_phci_t		*tmp;
    714 	mdi_phci_t		*prev = NULL;
    715 	mdi_pathinfo_t		*pip;
    716 
    717 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
    718 
    719 	ph = i_devi_get_phci(pdip);
    720 	if (ph == NULL) {
    721 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
    722 		return (MDI_FAILURE);
    723 	}
    724 
    725 	vh = ph->ph_vhci;
    726 	ASSERT(vh != NULL);
    727 	if (vh == NULL) {
    728 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
    729 		return (MDI_FAILURE);
    730 	}
    731 
    732 	MDI_VHCI_PHCI_LOCK(vh);
    733 	tmp = vh->vh_phci_head;
    734 	while (tmp) {
    735 		if (tmp == ph) {
    736 			break;
    737 		}
    738 		prev = tmp;
    739 		tmp = tmp->ph_next;
    740 	}
    741 
    742 	if (ph == vh->vh_phci_head) {
    743 		vh->vh_phci_head = ph->ph_next;
    744 	} else {
    745 		prev->ph_next = ph->ph_next;
    746 	}
    747 
    748 	if (ph == vh->vh_phci_tail) {
    749 		vh->vh_phci_tail = prev;
    750 	}
    751 
    752 	vh->vh_phci_count--;
    753 	MDI_VHCI_PHCI_UNLOCK(vh);
    754 
    755 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
    756 	MDI_PHCI_LOCK(ph);
    757 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
    758 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
    759 		MDI_PI(pip)->pi_phci = NULL;
    760 	MDI_PHCI_UNLOCK(ph);
    761 
    762 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
    763 	    ESC_DDI_INITIATOR_UNREGISTER);
    764 	vhcache_phci_remove(vh->vh_config, ph);
    765 	cv_destroy(&ph->ph_unstable_cv);
    766 	mutex_destroy(&ph->ph_mutex);
    767 	kmem_free(ph, sizeof (mdi_phci_t));
    768 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
    769 	DEVI(pdip)->devi_mdi_xhci = NULL;
    770 	return (MDI_SUCCESS);
    771 }
    772 
    773 /*
    774  * i_devi_get_phci():
    775  * 		Utility function to return the phci extensions.
    776  */
    777 static mdi_phci_t *
    778 i_devi_get_phci(dev_info_t *pdip)
    779 {
    780 	mdi_phci_t	*ph = NULL;
    781 
    782 	if (MDI_PHCI(pdip)) {
    783 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
    784 	}
    785 	return (ph);
    786 }
    787 
    788 /*
    789  * Single thread mdi entry into devinfo node for modifying its children.
    790  * If necessary we perform an ndi_devi_enter of the vHCI before doing
    791  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
    792  * for the vHCI and one for the pHCI.
    793  */
    794 void
    795 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
    796 {
    797 	dev_info_t	*vdip;
    798 	int		vcircular, pcircular;
    799 
    800 	/* Verify calling context */
    801 	ASSERT(MDI_PHCI(phci_dip));
    802 	vdip = mdi_devi_get_vdip(phci_dip);
    803 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    804 
    805 	/*
    806 	 * If pHCI is detaching then the framework has already entered the
    807 	 * vHCI on a threads that went down the code path leading to
    808 	 * detach_node().  This framework enter of the vHCI during pHCI
    809 	 * detach is done to avoid deadlock with vHCI power management
    810 	 * operations which enter the vHCI and the enter down the path
    811 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
    812 	 * enter of the vHCI on frameworks vHCI enter that has already
    813 	 * occurred - this is OK because we know that the framework thread
    814 	 * doing detach is waiting for our completion.
    815 	 *
    816 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
    817 	 * race with detach - but we can't do that because the framework has
    818 	 * already entered the parent, so we have some complexity instead.
    819 	 */
    820 	for (;;) {
    821 		if (ndi_devi_tryenter(vdip, &vcircular)) {
    822 			ASSERT(vcircular != -1);
    823 			if (DEVI_IS_DETACHING(phci_dip)) {
    824 				ndi_devi_exit(vdip, vcircular);
    825 				vcircular = -1;
    826 			}
    827 			break;
    828 		} else if (DEVI_IS_DETACHING(phci_dip)) {
    829 			vcircular = -1;
    830 			break;
    831 		} else if (servicing_interrupt()) {
    832 			/*
    833 			 * Don't delay an interrupt (and ensure adaptive
    834 			 * mutex inversion support).
    835 			 */
    836 			ndi_devi_enter(vdip, &vcircular);
    837 			break;
    838 		} else {
    839 			delay_random(mdi_delay);
    840 		}
    841 	}
    842 
    843 	ndi_devi_enter(phci_dip, &pcircular);
    844 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
    845 }
    846 
    847 /*
    848  * Attempt to mdi_devi_enter.
    849  */
    850 int
    851 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
    852 {
    853 	dev_info_t	*vdip;
    854 	int		vcircular, pcircular;
    855 
    856 	/* Verify calling context */
    857 	ASSERT(MDI_PHCI(phci_dip));
    858 	vdip = mdi_devi_get_vdip(phci_dip);
    859 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    860 
    861 	if (ndi_devi_tryenter(vdip, &vcircular)) {
    862 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
    863 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
    864 			return (1);	/* locked */
    865 		}
    866 		ndi_devi_exit(vdip, vcircular);
    867 	}
    868 	return (0);			/* busy */
    869 }
    870 
    871 /*
    872  * Release mdi_devi_enter or successful mdi_devi_tryenter.
    873  */
    874 void
    875 mdi_devi_exit(dev_info_t *phci_dip, int circular)
    876 {
    877 	dev_info_t	*vdip;
    878 	int		vcircular, pcircular;
    879 
    880 	/* Verify calling context */
    881 	ASSERT(MDI_PHCI(phci_dip));
    882 	vdip = mdi_devi_get_vdip(phci_dip);
    883 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    884 
    885 	/* extract two circular recursion values from single int */
    886 	pcircular = (short)(circular & 0xFFFF);
    887 	vcircular = (short)((circular >> 16) & 0xFFFF);
    888 
    889 	ndi_devi_exit(phci_dip, pcircular);
    890 	if (vcircular != -1)
    891 		ndi_devi_exit(vdip, vcircular);
    892 }
    893 
    894 /*
    895  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
    896  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
    897  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
    898  * with vHCI power management code during path online/offline.  Each
    899  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
    900  * occur within the scope of an active mdi_devi_enter that establishes the
    901  * circular value.
    902  */
    903 void
    904 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
    905 {
    906 	int		pcircular;
    907 
    908 	/* Verify calling context */
    909 	ASSERT(MDI_PHCI(phci_dip));
    910 
    911 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
    912 	ndi_hold_devi(phci_dip);
    913 
    914 	pcircular = (short)(circular & 0xFFFF);
    915 	ndi_devi_exit(phci_dip, pcircular);
    916 }
    917 
    918 void
    919 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
    920 {
    921 	int		pcircular;
    922 
    923 	/* Verify calling context */
    924 	ASSERT(MDI_PHCI(phci_dip));
    925 
    926 	ndi_devi_enter(phci_dip, &pcircular);
    927 
    928 	/* Drop hold from mdi_devi_exit_phci. */
    929 	ndi_rele_devi(phci_dip);
    930 
    931 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
    932 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
    933 }
    934 
    935 /*
    936  * mdi_devi_get_vdip():
    937  *		given a pHCI dip return vHCI dip
    938  */
    939 dev_info_t *
    940 mdi_devi_get_vdip(dev_info_t *pdip)
    941 {
    942 	mdi_phci_t	*ph;
    943 
    944 	ph = i_devi_get_phci(pdip);
    945 	if (ph && ph->ph_vhci)
    946 		return (ph->ph_vhci->vh_dip);
    947 	return (NULL);
    948 }
    949 
    950 /*
    951  * mdi_devi_pdip_entered():
    952  *		Return 1 if we are vHCI and have done an ndi_devi_enter
    953  *		of a pHCI
    954  */
    955 int
    956 mdi_devi_pdip_entered(dev_info_t *vdip)
    957 {
    958 	mdi_vhci_t	*vh;
    959 	mdi_phci_t	*ph;
    960 
    961 	vh = i_devi_get_vhci(vdip);
    962 	if (vh == NULL)
    963 		return (0);
    964 
    965 	MDI_VHCI_PHCI_LOCK(vh);
    966 	ph = vh->vh_phci_head;
    967 	while (ph) {
    968 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
    969 			MDI_VHCI_PHCI_UNLOCK(vh);
    970 			return (1);
    971 		}
    972 		ph = ph->ph_next;
    973 	}
    974 	MDI_VHCI_PHCI_UNLOCK(vh);
    975 	return (0);
    976 }
    977 
    978 /*
    979  * mdi_phci_path2devinfo():
    980  * 		Utility function to search for a valid phci device given
    981  *		the devfs pathname.
    982  */
    983 dev_info_t *
    984 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
    985 {
    986 	char		*temp_pathname;
    987 	mdi_vhci_t	*vh;
    988 	mdi_phci_t	*ph;
    989 	dev_info_t 	*pdip = NULL;
    990 
    991 	vh = i_devi_get_vhci(vdip);
    992 	ASSERT(vh != NULL);
    993 
    994 	if (vh == NULL) {
    995 		/*
    996 		 * Invalid vHCI component, return failure
    997 		 */
    998 		return (NULL);
    999 	}
   1000 
   1001 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1002 	MDI_VHCI_PHCI_LOCK(vh);
   1003 	ph = vh->vh_phci_head;
   1004 	while (ph != NULL) {
   1005 		pdip = ph->ph_dip;
   1006 		ASSERT(pdip != NULL);
   1007 		*temp_pathname = '\0';
   1008 		(void) ddi_pathname(pdip, temp_pathname);
   1009 		if (strcmp(temp_pathname, pathname) == 0) {
   1010 			break;
   1011 		}
   1012 		ph = ph->ph_next;
   1013 	}
   1014 	if (ph == NULL) {
   1015 		pdip = NULL;
   1016 	}
   1017 	MDI_VHCI_PHCI_UNLOCK(vh);
   1018 	kmem_free(temp_pathname, MAXPATHLEN);
   1019 	return (pdip);
   1020 }
   1021 
   1022 /*
   1023  * mdi_phci_get_path_count():
   1024  * 		get number of path information nodes associated with a given
   1025  *		pHCI device.
   1026  */
   1027 int
   1028 mdi_phci_get_path_count(dev_info_t *pdip)
   1029 {
   1030 	mdi_phci_t	*ph;
   1031 	int		count = 0;
   1032 
   1033 	ph = i_devi_get_phci(pdip);
   1034 	if (ph != NULL) {
   1035 		count = ph->ph_path_count;
   1036 	}
   1037 	return (count);
   1038 }
   1039 
   1040 /*
   1041  * i_mdi_phci_lock():
   1042  *		Lock a pHCI device
   1043  * Return Values:
   1044  *		None
   1045  * Note:
   1046  *		The default locking order is:
   1047  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
   1048  *		But there are number of situations where locks need to be
   1049  *		grabbed in reverse order.  This routine implements try and lock
   1050  *		mechanism depending on the requested parameter option.
   1051  */
   1052 static void
   1053 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   1054 {
   1055 	if (pip) {
   1056 		/* Reverse locking is requested. */
   1057 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
   1058 			if (servicing_interrupt()) {
   1059 				MDI_PI_HOLD(pip);
   1060 				MDI_PI_UNLOCK(pip);
   1061 				MDI_PHCI_LOCK(ph);
   1062 				MDI_PI_LOCK(pip);
   1063 				MDI_PI_RELE(pip);
   1064 				break;
   1065 			} else {
   1066 				/*
   1067 				 * tryenter failed. Try to grab again
   1068 				 * after a small delay
   1069 				 */
   1070 				MDI_PI_HOLD(pip);
   1071 				MDI_PI_UNLOCK(pip);
   1072 				delay_random(mdi_delay);
   1073 				MDI_PI_LOCK(pip);
   1074 				MDI_PI_RELE(pip);
   1075 			}
   1076 		}
   1077 	} else {
   1078 		MDI_PHCI_LOCK(ph);
   1079 	}
   1080 }
   1081 
   1082 /*
   1083  * i_mdi_phci_unlock():
   1084  *		Unlock the pHCI component
   1085  */
   1086 static void
   1087 i_mdi_phci_unlock(mdi_phci_t *ph)
   1088 {
   1089 	MDI_PHCI_UNLOCK(ph);
   1090 }
   1091 
   1092 /*
   1093  * i_mdi_devinfo_create():
   1094  *		create client device's devinfo node
   1095  * Return Values:
   1096  *		dev_info
   1097  *		NULL
   1098  * Notes:
   1099  */
   1100 static dev_info_t *
   1101 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
   1102 	char **compatible, int ncompatible)
   1103 {
   1104 	dev_info_t *cdip = NULL;
   1105 
   1106 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1107 
   1108 	/* Verify for duplicate entry */
   1109 	cdip = i_mdi_devinfo_find(vh, name, guid);
   1110 	ASSERT(cdip == NULL);
   1111 	if (cdip) {
   1112 		cmn_err(CE_WARN,
   1113 		    "i_mdi_devinfo_create: client %s@%s already exists",
   1114 			name ? name : "", guid ? guid : "");
   1115 	}
   1116 
   1117 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
   1118 	if (cdip == NULL)
   1119 		goto fail;
   1120 
   1121 	/*
   1122 	 * Create component type and Global unique identifier
   1123 	 * properties
   1124 	 */
   1125 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
   1126 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
   1127 		goto fail;
   1128 	}
   1129 
   1130 	/* Decorate the node with compatible property */
   1131 	if (compatible &&
   1132 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
   1133 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
   1134 		goto fail;
   1135 	}
   1136 
   1137 	return (cdip);
   1138 
   1139 fail:
   1140 	if (cdip) {
   1141 		(void) ndi_prop_remove_all(cdip);
   1142 		(void) ndi_devi_free(cdip);
   1143 	}
   1144 	return (NULL);
   1145 }
   1146 
   1147 /*
   1148  * i_mdi_devinfo_find():
   1149  *		Find a matching devinfo node for given client node name
   1150  *		and its guid.
   1151  * Return Values:
   1152  *		Handle to a dev_info node or NULL
   1153  */
   1154 static dev_info_t *
   1155 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
   1156 {
   1157 	char			*data;
   1158 	dev_info_t 		*cdip = NULL;
   1159 	dev_info_t 		*ndip = NULL;
   1160 	int			circular;
   1161 
   1162 	ndi_devi_enter(vh->vh_dip, &circular);
   1163 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
   1164 	while ((cdip = ndip) != NULL) {
   1165 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
   1166 
   1167 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
   1168 			continue;
   1169 		}
   1170 
   1171 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
   1172 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
   1173 		    &data) != DDI_PROP_SUCCESS) {
   1174 			continue;
   1175 		}
   1176 
   1177 		if (strcmp(data, guid) != 0) {
   1178 			ddi_prop_free(data);
   1179 			continue;
   1180 		}
   1181 		ddi_prop_free(data);
   1182 		break;
   1183 	}
   1184 	ndi_devi_exit(vh->vh_dip, circular);
   1185 	return (cdip);
   1186 }
   1187 
   1188 /*
   1189  * i_mdi_devinfo_remove():
   1190  *		Remove a client device node
   1191  */
   1192 static int
   1193 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
   1194 {
   1195 	int	rv = MDI_SUCCESS;
   1196 
   1197 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
   1198 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
   1199 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
   1200 		if (rv != NDI_SUCCESS) {
   1201 			MDI_DEBUG(1, (MDI_NOTE, cdip,
   1202 			    "!failed: cdip %p", (void *)cdip));
   1203 		}
   1204 		/*
   1205 		 * Convert to MDI error code
   1206 		 */
   1207 		switch (rv) {
   1208 		case NDI_SUCCESS:
   1209 			rv = MDI_SUCCESS;
   1210 			break;
   1211 		case NDI_BUSY:
   1212 			rv = MDI_BUSY;
   1213 			break;
   1214 		default:
   1215 			rv = MDI_FAILURE;
   1216 			break;
   1217 		}
   1218 	}
   1219 	return (rv);
   1220 }
   1221 
   1222 /*
   1223  * i_devi_get_client()
   1224  *		Utility function to get mpxio component extensions
   1225  */
   1226 static mdi_client_t *
   1227 i_devi_get_client(dev_info_t *cdip)
   1228 {
   1229 	mdi_client_t	*ct = NULL;
   1230 
   1231 	if (MDI_CLIENT(cdip)) {
   1232 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
   1233 	}
   1234 	return (ct);
   1235 }
   1236 
   1237 /*
   1238  * i_mdi_is_child_present():
   1239  *		Search for the presence of client device dev_info node
   1240  */
   1241 static int
   1242 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
   1243 {
   1244 	int		rv = MDI_FAILURE;
   1245 	struct dev_info	*dip;
   1246 	int		circular;
   1247 
   1248 	ndi_devi_enter(vdip, &circular);
   1249 	dip = DEVI(vdip)->devi_child;
   1250 	while (dip) {
   1251 		if (dip == DEVI(cdip)) {
   1252 			rv = MDI_SUCCESS;
   1253 			break;
   1254 		}
   1255 		dip = dip->devi_sibling;
   1256 	}
   1257 	ndi_devi_exit(vdip, circular);
   1258 	return (rv);
   1259 }
   1260 
   1261 
   1262 /*
   1263  * i_mdi_client_lock():
   1264  *		Grab client component lock
   1265  * Return Values:
   1266  *		None
   1267  * Note:
   1268  *		The default locking order is:
   1269  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
   1270  *		But there are number of situations where locks need to be
   1271  *		grabbed in reverse order.  This routine implements try and lock
   1272  *		mechanism depending on the requested parameter option.
   1273  */
   1274 static void
   1275 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
   1276 {
   1277 	if (pip) {
   1278 		/*
   1279 		 * Reverse locking is requested.
   1280 		 */
   1281 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
   1282 			if (servicing_interrupt()) {
   1283 				MDI_PI_HOLD(pip);
   1284 				MDI_PI_UNLOCK(pip);
   1285 				MDI_CLIENT_LOCK(ct);
   1286 				MDI_PI_LOCK(pip);
   1287 				MDI_PI_RELE(pip);
   1288 				break;
   1289 			} else {
   1290 				/*
   1291 				 * tryenter failed. Try to grab again
   1292 				 * after a small delay
   1293 				 */
   1294 				MDI_PI_HOLD(pip);
   1295 				MDI_PI_UNLOCK(pip);
   1296 				delay_random(mdi_delay);
   1297 				MDI_PI_LOCK(pip);
   1298 				MDI_PI_RELE(pip);
   1299 			}
   1300 		}
   1301 	} else {
   1302 		MDI_CLIENT_LOCK(ct);
   1303 	}
   1304 }
   1305 
   1306 /*
   1307  * i_mdi_client_unlock():
   1308  *		Unlock a client component
   1309  */
   1310 static void
   1311 i_mdi_client_unlock(mdi_client_t *ct)
   1312 {
   1313 	MDI_CLIENT_UNLOCK(ct);
   1314 }
   1315 
   1316 /*
   1317  * i_mdi_client_alloc():
   1318  * 		Allocate and initialize a client structure.  Caller should
   1319  *		hold the vhci client lock.
   1320  * Return Values:
   1321  *		Handle to a client component
   1322  */
   1323 /*ARGSUSED*/
   1324 static mdi_client_t *
   1325 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
   1326 {
   1327 	mdi_client_t	*ct;
   1328 
   1329 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1330 
   1331 	/*
   1332 	 * Allocate and initialize a component structure.
   1333 	 */
   1334 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
   1335 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
   1336 	ct->ct_hnext = NULL;
   1337 	ct->ct_hprev = NULL;
   1338 	ct->ct_dip = NULL;
   1339 	ct->ct_vhci = vh;
   1340 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
   1341 	(void) strcpy(ct->ct_drvname, name);
   1342 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
   1343 	(void) strcpy(ct->ct_guid, lguid);
   1344 	ct->ct_cprivate = NULL;
   1345 	ct->ct_vprivate = NULL;
   1346 	ct->ct_flags = 0;
   1347 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
   1348 	MDI_CLIENT_LOCK(ct);
   1349 	MDI_CLIENT_SET_OFFLINE(ct);
   1350 	MDI_CLIENT_SET_DETACH(ct);
   1351 	MDI_CLIENT_SET_POWER_UP(ct);
   1352 	MDI_CLIENT_UNLOCK(ct);
   1353 	ct->ct_failover_flags = 0;
   1354 	ct->ct_failover_status = 0;
   1355 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
   1356 	ct->ct_unstable = 0;
   1357 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
   1358 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
   1359 	ct->ct_lb = vh->vh_lb;
   1360 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
   1361 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
   1362 	ct->ct_path_count = 0;
   1363 	ct->ct_path_head = NULL;
   1364 	ct->ct_path_tail = NULL;
   1365 	ct->ct_path_last = NULL;
   1366 
   1367 	/*
   1368 	 * Add this client component to our client hash queue
   1369 	 */
   1370 	i_mdi_client_enlist_table(vh, ct);
   1371 	return (ct);
   1372 }
   1373 
   1374 /*
   1375  * i_mdi_client_enlist_table():
   1376  *		Attach the client device to the client hash table. Caller
   1377  *		should hold the vhci client lock.
   1378  */
   1379 static void
   1380 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
   1381 {
   1382 	int 			index;
   1383 	struct client_hash	*head;
   1384 
   1385 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1386 
   1387 	index = i_mdi_get_hash_key(ct->ct_guid);
   1388 	head = &vh->vh_client_table[index];
   1389 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
   1390 	head->ct_hash_head = ct;
   1391 	head->ct_hash_count++;
   1392 	vh->vh_client_count++;
   1393 }
   1394 
   1395 /*
   1396  * i_mdi_client_delist_table():
   1397  *		Attach the client device to the client hash table.
   1398  *		Caller should hold the vhci client lock.
   1399  */
   1400 static void
   1401 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
   1402 {
   1403 	int			index;
   1404 	char			*guid;
   1405 	struct client_hash 	*head;
   1406 	mdi_client_t		*next;
   1407 	mdi_client_t		*last;
   1408 
   1409 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1410 
   1411 	guid = ct->ct_guid;
   1412 	index = i_mdi_get_hash_key(guid);
   1413 	head = &vh->vh_client_table[index];
   1414 
   1415 	last = NULL;
   1416 	next = (mdi_client_t *)head->ct_hash_head;
   1417 	while (next != NULL) {
   1418 		if (next == ct) {
   1419 			break;
   1420 		}
   1421 		last = next;
   1422 		next = next->ct_hnext;
   1423 	}
   1424 
   1425 	if (next) {
   1426 		head->ct_hash_count--;
   1427 		if (last == NULL) {
   1428 			head->ct_hash_head = ct->ct_hnext;
   1429 		} else {
   1430 			last->ct_hnext = ct->ct_hnext;
   1431 		}
   1432 		ct->ct_hnext = NULL;
   1433 		vh->vh_client_count--;
   1434 	}
   1435 }
   1436 
   1437 
   1438 /*
   1439  * i_mdi_client_free():
   1440  *		Free a client component
   1441  */
   1442 static int
   1443 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
   1444 {
   1445 	int		rv = MDI_SUCCESS;
   1446 	int		flags = ct->ct_flags;
   1447 	dev_info_t	*cdip;
   1448 	dev_info_t	*vdip;
   1449 
   1450 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1451 
   1452 	vdip = vh->vh_dip;
   1453 	cdip = ct->ct_dip;
   1454 
   1455 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
   1456 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
   1457 	DEVI(cdip)->devi_mdi_client = NULL;
   1458 
   1459 	/*
   1460 	 * Clear out back ref. to dev_info_t node
   1461 	 */
   1462 	ct->ct_dip = NULL;
   1463 
   1464 	/*
   1465 	 * Remove this client from our hash queue
   1466 	 */
   1467 	i_mdi_client_delist_table(vh, ct);
   1468 
   1469 	/*
   1470 	 * Uninitialize and free the component
   1471 	 */
   1472 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
   1473 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
   1474 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
   1475 	cv_destroy(&ct->ct_failover_cv);
   1476 	cv_destroy(&ct->ct_unstable_cv);
   1477 	cv_destroy(&ct->ct_powerchange_cv);
   1478 	mutex_destroy(&ct->ct_mutex);
   1479 	kmem_free(ct, sizeof (*ct));
   1480 
   1481 	if (cdip != NULL) {
   1482 		MDI_VHCI_CLIENT_UNLOCK(vh);
   1483 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
   1484 		MDI_VHCI_CLIENT_LOCK(vh);
   1485 	}
   1486 	return (rv);
   1487 }
   1488 
   1489 /*
   1490  * i_mdi_client_find():
   1491  * 		Find the client structure corresponding to a given guid
   1492  *		Caller should hold the vhci client lock.
   1493  */
   1494 static mdi_client_t *
   1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
   1496 {
   1497 	int			index;
   1498 	struct client_hash	*head;
   1499 	mdi_client_t		*ct;
   1500 
   1501 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1502 
   1503 	index = i_mdi_get_hash_key(guid);
   1504 	head = &vh->vh_client_table[index];
   1505 
   1506 	ct = head->ct_hash_head;
   1507 	while (ct != NULL) {
   1508 		if (strcmp(ct->ct_guid, guid) == 0 &&
   1509 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
   1510 			break;
   1511 		}
   1512 		ct = ct->ct_hnext;
   1513 	}
   1514 	return (ct);
   1515 }
   1516 
   1517 /*
   1518  * i_mdi_client_update_state():
   1519  *		Compute and update client device state
   1520  * Notes:
   1521  *		A client device can be in any of three possible states:
   1522  *
   1523  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
   1524  *		one online/standby paths. Can tolerate failures.
   1525  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
   1526  *		no alternate paths available as standby. A failure on the online
   1527  *		would result in loss of access to device data.
   1528  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
   1529  *		no paths available to access the device.
   1530  */
   1531 static void
   1532 i_mdi_client_update_state(mdi_client_t *ct)
   1533 {
   1534 	int state;
   1535 
   1536 	ASSERT(MDI_CLIENT_LOCKED(ct));
   1537 	state = i_mdi_client_compute_state(ct, NULL);
   1538 	MDI_CLIENT_SET_STATE(ct, state);
   1539 }
   1540 
   1541 /*
   1542  * i_mdi_client_compute_state():
   1543  *		Compute client device state
   1544  *
   1545  *		mdi_phci_t *	Pointer to pHCI structure which should
   1546  *				while computing the new value.  Used by
   1547  *				i_mdi_phci_offline() to find the new
   1548  *				client state after DR of a pHCI.
   1549  */
   1550 static int
   1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
   1552 {
   1553 	int		state;
   1554 	int		online_count = 0;
   1555 	int		standby_count = 0;
   1556 	mdi_pathinfo_t	*pip, *next;
   1557 
   1558 	ASSERT(MDI_CLIENT_LOCKED(ct));
   1559 	pip = ct->ct_path_head;
   1560 	while (pip != NULL) {
   1561 		MDI_PI_LOCK(pip);
   1562 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   1563 		if (MDI_PI(pip)->pi_phci == ph) {
   1564 			MDI_PI_UNLOCK(pip);
   1565 			pip = next;
   1566 			continue;
   1567 		}
   1568 
   1569 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
   1570 				== MDI_PATHINFO_STATE_ONLINE)
   1571 			online_count++;
   1572 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
   1573 				== MDI_PATHINFO_STATE_STANDBY)
   1574 			standby_count++;
   1575 		MDI_PI_UNLOCK(pip);
   1576 		pip = next;
   1577 	}
   1578 
   1579 	if (online_count == 0) {
   1580 		if (standby_count == 0) {
   1581 			state = MDI_CLIENT_STATE_FAILED;
   1582 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
   1583 			    "client state failed: ct = %p", (void *)ct));
   1584 		} else if (standby_count == 1) {
   1585 			state = MDI_CLIENT_STATE_DEGRADED;
   1586 		} else {
   1587 			state = MDI_CLIENT_STATE_OPTIMAL;
   1588 		}
   1589 	} else if (online_count == 1) {
   1590 		if (standby_count == 0) {
   1591 			state = MDI_CLIENT_STATE_DEGRADED;
   1592 		} else {
   1593 			state = MDI_CLIENT_STATE_OPTIMAL;
   1594 		}
   1595 	} else {
   1596 		state = MDI_CLIENT_STATE_OPTIMAL;
   1597 	}
   1598 	return (state);
   1599 }
   1600 
   1601 /*
   1602  * i_mdi_client2devinfo():
   1603  *		Utility function
   1604  */
   1605 dev_info_t *
   1606 i_mdi_client2devinfo(mdi_client_t *ct)
   1607 {
   1608 	return (ct->ct_dip);
   1609 }
   1610 
   1611 /*
   1612  * mdi_client_path2_devinfo():
   1613  * 		Given the parent devinfo and child devfs pathname, search for
   1614  *		a valid devfs node handle.
   1615  */
   1616 dev_info_t *
   1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
   1618 {
   1619 	dev_info_t 	*cdip = NULL;
   1620 	dev_info_t 	*ndip = NULL;
   1621 	char		*temp_pathname;
   1622 	int		circular;
   1623 
   1624 	/*
   1625 	 * Allocate temp buffer
   1626 	 */
   1627 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1628 
   1629 	/*
   1630 	 * Lock parent against changes
   1631 	 */
   1632 	ndi_devi_enter(vdip, &circular);
   1633 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
   1634 	while ((cdip = ndip) != NULL) {
   1635 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
   1636 
   1637 		*temp_pathname = '\0';
   1638 		(void) ddi_pathname(cdip, temp_pathname);
   1639 		if (strcmp(temp_pathname, pathname) == 0) {
   1640 			break;
   1641 		}
   1642 	}
   1643 	/*
   1644 	 * Release devinfo lock
   1645 	 */
   1646 	ndi_devi_exit(vdip, circular);
   1647 
   1648 	/*
   1649 	 * Free the temp buffer
   1650 	 */
   1651 	kmem_free(temp_pathname, MAXPATHLEN);
   1652 	return (cdip);
   1653 }
   1654 
   1655 /*
   1656  * mdi_client_get_path_count():
   1657  * 		Utility function to get number of path information nodes
   1658  *		associated with a given client device.
   1659  */
   1660 int
   1661 mdi_client_get_path_count(dev_info_t *cdip)
   1662 {
   1663 	mdi_client_t	*ct;
   1664 	int		count = 0;
   1665 
   1666 	ct = i_devi_get_client(cdip);
   1667 	if (ct != NULL) {
   1668 		count = ct->ct_path_count;
   1669 	}
   1670 	return (count);
   1671 }
   1672 
   1673 
   1674 /*
   1675  * i_mdi_get_hash_key():
   1676  * 		Create a hash using strings as keys
   1677  *
   1678  */
   1679 static int
   1680 i_mdi_get_hash_key(char *str)
   1681 {
   1682 	uint32_t	g, hash = 0;
   1683 	char		*p;
   1684 
   1685 	for (p = str; *p != '\0'; p++) {
   1686 		g = *p;
   1687 		hash += g;
   1688 	}
   1689 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
   1690 }
   1691 
   1692 /*
   1693  * mdi_get_lb_policy():
   1694  * 		Get current load balancing policy for a given client device
   1695  */
   1696 client_lb_t
   1697 mdi_get_lb_policy(dev_info_t *cdip)
   1698 {
   1699 	client_lb_t	lb = LOAD_BALANCE_NONE;
   1700 	mdi_client_t	*ct;
   1701 
   1702 	ct = i_devi_get_client(cdip);
   1703 	if (ct != NULL) {
   1704 		lb = ct->ct_lb;
   1705 	}
   1706 	return (lb);
   1707 }
   1708 
   1709 /*
   1710  * mdi_set_lb_region_size():
   1711  * 		Set current region size for the load-balance
   1712  */
   1713 int
   1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
   1715 {
   1716 	mdi_client_t	*ct;
   1717 	int		rv = MDI_FAILURE;
   1718 
   1719 	ct = i_devi_get_client(cdip);
   1720 	if (ct != NULL && ct->ct_lb_args != NULL) {
   1721 		ct->ct_lb_args->region_size = region_size;
   1722 		rv = MDI_SUCCESS;
   1723 	}
   1724 	return (rv);
   1725 }
   1726 
   1727 /*
   1728  * mdi_Set_lb_policy():
   1729  * 		Set current load balancing policy for a given client device
   1730  */
   1731 int
   1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
   1733 {
   1734 	mdi_client_t	*ct;
   1735 	int		rv = MDI_FAILURE;
   1736 
   1737 	ct = i_devi_get_client(cdip);
   1738 	if (ct != NULL) {
   1739 		ct->ct_lb = lb;
   1740 		rv = MDI_SUCCESS;
   1741 	}
   1742 	return (rv);
   1743 }
   1744 
   1745 /*
   1746  * mdi_failover():
   1747  *		failover function called by the vHCI drivers to initiate
   1748  *		a failover operation.  This is typically due to non-availability
   1749  *		of online paths to route I/O requests.  Failover can be
   1750  *		triggered through user application also.
   1751  *
   1752  *		The vHCI driver calls mdi_failover() to initiate a failover
   1753  *		operation. mdi_failover() calls back into the vHCI driver's
   1754  *		vo_failover() entry point to perform the actual failover
   1755  *		operation.  The reason for requiring the vHCI driver to
   1756  *		initiate failover by calling mdi_failover(), instead of directly
   1757  *		executing vo_failover() itself, is to ensure that the mdi
   1758  *		framework can keep track of the client state properly.
   1759  *		Additionally, mdi_failover() provides as a convenience the
   1760  *		option of performing the failover operation synchronously or
   1761  *		asynchronously
   1762  *
   1763  *		Upon successful completion of the failover operation, the
   1764  *		paths that were previously ONLINE will be in the STANDBY state,
   1765  *		and the newly activated paths will be in the ONLINE state.
   1766  *
   1767  *		The flags modifier determines whether the activation is done
   1768  *		synchronously: MDI_FAILOVER_SYNC
   1769  * Return Values:
   1770  *		MDI_SUCCESS
   1771  *		MDI_FAILURE
   1772  *		MDI_BUSY
   1773  */
   1774 /*ARGSUSED*/
   1775 int
   1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
   1777 {
   1778 	int			rv;
   1779 	mdi_client_t		*ct;
   1780 
   1781 	ct = i_devi_get_client(cdip);
   1782 	ASSERT(ct != NULL);
   1783 	if (ct == NULL) {
   1784 		/* cdip is not a valid client device. Nothing more to do. */
   1785 		return (MDI_FAILURE);
   1786 	}
   1787 
   1788 	MDI_CLIENT_LOCK(ct);
   1789 
   1790 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
   1791 		/* A path to the client is being freed */
   1792 		MDI_CLIENT_UNLOCK(ct);
   1793 		return (MDI_BUSY);
   1794 	}
   1795 
   1796 
   1797 	if (MDI_CLIENT_IS_FAILED(ct)) {
   1798 		/*
   1799 		 * Client is in failed state. Nothing more to do.
   1800 		 */
   1801 		MDI_CLIENT_UNLOCK(ct);
   1802 		return (MDI_FAILURE);
   1803 	}
   1804 
   1805 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   1806 		/*
   1807 		 * Failover is already in progress; return BUSY
   1808 		 */
   1809 		MDI_CLIENT_UNLOCK(ct);
   1810 		return (MDI_BUSY);
   1811 	}
   1812 	/*
   1813 	 * Make sure that mdi_pathinfo node state changes are processed.
   1814 	 * We do not allow failovers to progress while client path state
   1815 	 * changes are in progress
   1816 	 */
   1817 	if (ct->ct_unstable) {
   1818 		if (flags == MDI_FAILOVER_ASYNC) {
   1819 			MDI_CLIENT_UNLOCK(ct);
   1820 			return (MDI_BUSY);
   1821 		} else {
   1822 			while (ct->ct_unstable)
   1823 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
   1824 		}
   1825 	}
   1826 
   1827 	/*
   1828 	 * Client device is in stable state. Before proceeding, perform sanity
   1829 	 * checks again.
   1830 	 */
   1831 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
   1832 	    (!i_ddi_devi_attached(ct->ct_dip))) {
   1833 		/*
   1834 		 * Client is in failed state. Nothing more to do.
   1835 		 */
   1836 		MDI_CLIENT_UNLOCK(ct);
   1837 		return (MDI_FAILURE);
   1838 	}
   1839 
   1840 	/*
   1841 	 * Set the client state as failover in progress.
   1842 	 */
   1843 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
   1844 	ct->ct_failover_flags = flags;
   1845 	MDI_CLIENT_UNLOCK(ct);
   1846 
   1847 	if (flags == MDI_FAILOVER_ASYNC) {
   1848 		/*
   1849 		 * Submit the initiate failover request via CPR safe
   1850 		 * taskq threads.
   1851 		 */
   1852 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
   1853 		    ct, KM_SLEEP);
   1854 		return (MDI_ACCEPT);
   1855 	} else {
   1856 		/*
   1857 		 * Synchronous failover mode.  Typically invoked from the user
   1858 		 * land.
   1859 		 */
   1860 		rv = i_mdi_failover(ct);
   1861 	}
   1862 	return (rv);
   1863 }
   1864 
   1865 /*
   1866  * i_mdi_failover():
   1867  *		internal failover function. Invokes vHCI drivers failover
   1868  *		callback function and process the failover status
   1869  * Return Values:
   1870  *		None
   1871  *
   1872  * Note: A client device in failover state can not be detached or freed.
   1873  */
   1874 static int
   1875 i_mdi_failover(void *arg)
   1876 {
   1877 	int		rv = MDI_SUCCESS;
   1878 	mdi_client_t	*ct = (mdi_client_t *)arg;
   1879 	mdi_vhci_t	*vh = ct->ct_vhci;
   1880 
   1881 	ASSERT(!MDI_CLIENT_LOCKED(ct));
   1882 
   1883 	if (vh->vh_ops->vo_failover != NULL) {
   1884 		/*
   1885 		 * Call vHCI drivers callback routine
   1886 		 */
   1887 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
   1888 		    ct->ct_failover_flags);
   1889 	}
   1890 
   1891 	MDI_CLIENT_LOCK(ct);
   1892 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
   1893 
   1894 	/*
   1895 	 * Save the failover return status
   1896 	 */
   1897 	ct->ct_failover_status = rv;
   1898 
   1899 	/*
   1900 	 * As a result of failover, client status would have been changed.
   1901 	 * Update the client state and wake up anyone waiting on this client
   1902 	 * device.
   1903 	 */
   1904 	i_mdi_client_update_state(ct);
   1905 
   1906 	cv_broadcast(&ct->ct_failover_cv);
   1907 	MDI_CLIENT_UNLOCK(ct);
   1908 	return (rv);
   1909 }
   1910 
   1911 /*
   1912  * Load balancing is logical block.
   1913  * IOs within the range described by region_size
   1914  * would go on the same path. This would improve the
   1915  * performance by cache-hit on some of the RAID devices.
   1916  * Search only for online paths(At some point we
   1917  * may want to balance across target ports).
   1918  * If no paths are found then default to round-robin.
   1919  */
   1920 static int
   1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
   1922 {
   1923 	int		path_index = -1;
   1924 	int		online_path_count = 0;
   1925 	int		online_nonpref_path_count = 0;
   1926 	int 		region_size = ct->ct_lb_args->region_size;
   1927 	mdi_pathinfo_t	*pip;
   1928 	mdi_pathinfo_t	*next;
   1929 	int		preferred, path_cnt;
   1930 
   1931 	pip = ct->ct_path_head;
   1932 	while (pip) {
   1933 		MDI_PI_LOCK(pip);
   1934 		if (MDI_PI(pip)->pi_state ==
   1935 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
   1936 			online_path_count++;
   1937 		} else if (MDI_PI(pip)->pi_state ==
   1938 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
   1939 			online_nonpref_path_count++;
   1940 		}
   1941 		next = (mdi_pathinfo_t *)
   1942 		    MDI_PI(pip)->pi_client_link;
   1943 		MDI_PI_UNLOCK(pip);
   1944 		pip = next;
   1945 	}
   1946 	/* if found any online/preferred then use this type */
   1947 	if (online_path_count > 0) {
   1948 		path_cnt = online_path_count;
   1949 		preferred = 1;
   1950 	} else if (online_nonpref_path_count > 0) {
   1951 		path_cnt = online_nonpref_path_count;
   1952 		preferred = 0;
   1953 	} else {
   1954 		path_cnt = 0;
   1955 	}
   1956 	if (path_cnt) {
   1957 		path_index = (bp->b_blkno >> region_size) % path_cnt;
   1958 		pip = ct->ct_path_head;
   1959 		while (pip && path_index != -1) {
   1960 			MDI_PI_LOCK(pip);
   1961 			if (path_index == 0 &&
   1962 			    (MDI_PI(pip)->pi_state ==
   1963 			    MDI_PATHINFO_STATE_ONLINE) &&
   1964 				MDI_PI(pip)->pi_preferred == preferred) {
   1965 				MDI_PI_HOLD(pip);
   1966 				MDI_PI_UNLOCK(pip);
   1967 				*ret_pip = pip;
   1968 				return (MDI_SUCCESS);
   1969 			}
   1970 			path_index --;
   1971 			next = (mdi_pathinfo_t *)
   1972 			    MDI_PI(pip)->pi_client_link;
   1973 			MDI_PI_UNLOCK(pip);
   1974 			pip = next;
   1975 		}
   1976 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   1977 		    "lba %llx: path %s %p",
   1978 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
   1979 	}
   1980 	return (MDI_FAILURE);
   1981 }
   1982 
   1983 /*
   1984  * mdi_select_path():
   1985  *		select a path to access a client device.
   1986  *
   1987  *		mdi_select_path() function is called by the vHCI drivers to
   1988  *		select a path to route the I/O request to.  The caller passes
   1989  *		the block I/O data transfer structure ("buf") as one of the
   1990  *		parameters.  The mpxio framework uses the buf structure
   1991  *		contents to maintain per path statistics (total I/O size /
   1992  *		count pending).  If more than one online paths are available to
   1993  *		select, the framework automatically selects a suitable path
   1994  *		for routing I/O request. If a failover operation is active for
   1995  *		this client device the call shall be failed with MDI_BUSY error
   1996  *		code.
   1997  *
   1998  *		By default this function returns a suitable path in online
   1999  *		state based on the current load balancing policy.  Currently
   2000  *		we support LOAD_BALANCE_NONE (Previously selected online path
   2001  *		will continue to be used till the path is usable) and
   2002  *		LOAD_BALANCE_RR (Online paths will be selected in a round
   2003  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
   2004  *		based on the logical block).  The load balancing
   2005  *		through vHCI drivers configuration file (driver.conf).
   2006  *
   2007  *		vHCI drivers may override this default behavior by specifying
   2008  *		appropriate flags.  The meaning of the thrid argument depends
   2009  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
   2010  *		then the argument is the "path instance" of the path to select.
   2011  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
   2012  *		"start_pip". A non NULL "start_pip" is the starting point to
   2013  *		walk and find the next appropriate path.  The following values
   2014  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
   2015  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
   2016  *		STANDBY path).
   2017  *
   2018  *		The non-standard behavior is used by the scsi_vhci driver,
   2019  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
   2020  *		attach of client devices (to avoid an unnecessary failover
   2021  *		when the STANDBY path comes up first), during failover
   2022  *		(to activate a STANDBY path as ONLINE).
   2023  *
   2024  *		The selected path is returned in a a mdi_hold_path() state
   2025  *		(pi_ref_cnt). Caller should release the hold by calling
   2026  *		mdi_rele_path().
   2027  *
   2028  * Return Values:
   2029  *		MDI_SUCCESS	- Completed successfully
   2030  *		MDI_BUSY 	- Client device is busy failing over
   2031  *		MDI_NOPATH	- Client device is online, but no valid path are
   2032  *				  available to access this client device
   2033  *		MDI_FAILURE	- Invalid client device or state
   2034  *		MDI_DEVI_ONLINING
   2035  *				- Client device (struct dev_info state) is in
   2036  *				  onlining state.
   2037  */
   2038 
   2039 /*ARGSUSED*/
   2040 int
   2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
   2042     void *arg, mdi_pathinfo_t **ret_pip)
   2043 {
   2044 	mdi_client_t	*ct;
   2045 	mdi_pathinfo_t	*pip;
   2046 	mdi_pathinfo_t	*next;
   2047 	mdi_pathinfo_t	*head;
   2048 	mdi_pathinfo_t	*start;
   2049 	client_lb_t	lbp;	/* load balancing policy */
   2050 	int		sb = 1;	/* standard behavior */
   2051 	int		preferred = 1;	/* preferred path */
   2052 	int		cond, cont = 1;
   2053 	int		retry = 0;
   2054 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
   2055 	int		path_instance;	/* request specific path instance */
   2056 
   2057 	/* determine type of arg based on flags */
   2058 	if (flags & MDI_SELECT_PATH_INSTANCE) {
   2059 		path_instance = (int)(intptr_t)arg;
   2060 		start_pip = NULL;
   2061 	} else {
   2062 		path_instance = 0;
   2063 		start_pip = (mdi_pathinfo_t *)arg;
   2064 	}
   2065 
   2066 	if (flags != 0) {
   2067 		/*
   2068 		 * disable default behavior
   2069 		 */
   2070 		sb = 0;
   2071 	}
   2072 
   2073 	*ret_pip = NULL;
   2074 	ct = i_devi_get_client(cdip);
   2075 	if (ct == NULL) {
   2076 		/* mdi extensions are NULL, Nothing more to do */
   2077 		return (MDI_FAILURE);
   2078 	}
   2079 
   2080 	MDI_CLIENT_LOCK(ct);
   2081 
   2082 	if (sb) {
   2083 		if (MDI_CLIENT_IS_FAILED(ct)) {
   2084 			/*
   2085 			 * Client is not ready to accept any I/O requests.
   2086 			 * Fail this request.
   2087 			 */
   2088 			MDI_DEBUG(2, (MDI_NOTE, cdip,
   2089 			    "client state offline ct = %p", (void *)ct));
   2090 			MDI_CLIENT_UNLOCK(ct);
   2091 			return (MDI_FAILURE);
   2092 		}
   2093 
   2094 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   2095 			/*
   2096 			 * Check for Failover is in progress. If so tell the
   2097 			 * caller that this device is busy.
   2098 			 */
   2099 			MDI_DEBUG(2, (MDI_NOTE, cdip,
   2100 			    "client failover in progress ct = %p",
   2101 			    (void *)ct));
   2102 			MDI_CLIENT_UNLOCK(ct);
   2103 			return (MDI_BUSY);
   2104 		}
   2105 
   2106 		/*
   2107 		 * Check to see whether the client device is attached.
   2108 		 * If not so, let the vHCI driver manually select a path
   2109 		 * (standby) and let the probe/attach process to continue.
   2110 		 */
   2111 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
   2112 			MDI_DEBUG(4, (MDI_NOTE, cdip,
   2113 			    "devi is onlining ct = %p", (void *)ct));
   2114 			MDI_CLIENT_UNLOCK(ct);
   2115 			return (MDI_DEVI_ONLINING);
   2116 		}
   2117 	}
   2118 
   2119 	/*
   2120 	 * Cache in the client list head.  If head of the list is NULL
   2121 	 * return MDI_NOPATH
   2122 	 */
   2123 	head = ct->ct_path_head;
   2124 	if (head == NULL) {
   2125 		MDI_CLIENT_UNLOCK(ct);
   2126 		return (MDI_NOPATH);
   2127 	}
   2128 
   2129 	/* Caller is specifying a specific pathinfo path by path_instance */
   2130 	if (path_instance) {
   2131 		/* search for pathinfo with correct path_instance */
   2132 		for (pip = head;
   2133 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
   2134 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
   2135 			;
   2136 
   2137 		/* If path can't be selected then MDI_NOPATH is returned. */
   2138 		if (pip == NULL) {
   2139 			MDI_CLIENT_UNLOCK(ct);
   2140 			return (MDI_NOPATH);
   2141 		}
   2142 
   2143 		/*
   2144 		 * Verify state of path. When asked to select a specific
   2145 		 * path_instance, we select the requested path in any
   2146 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
   2147 		 * We don't however select paths where the pHCI has detached.
   2148 		 * NOTE: last pathinfo node of an opened client device may
   2149 		 * exist in an OFFLINE state after the pHCI associated with
   2150 		 * that path has detached (but pi_phci will be NULL if that
   2151 		 * has occurred).
   2152 		 */
   2153 		MDI_PI_LOCK(pip);
   2154 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
   2155 		    (MDI_PI(pip)->pi_phci == NULL)) {
   2156 			MDI_PI_UNLOCK(pip);
   2157 			MDI_CLIENT_UNLOCK(ct);
   2158 			return (MDI_FAILURE);
   2159 		}
   2160 
   2161 		/* Return MDI_BUSY if we have a transient condition */
   2162 		if (MDI_PI_IS_TRANSIENT(pip)) {
   2163 			MDI_PI_UNLOCK(pip);
   2164 			MDI_CLIENT_UNLOCK(ct);
   2165 			return (MDI_BUSY);
   2166 		}
   2167 
   2168 		/*
   2169 		 * Return the path in hold state. Caller should release the
   2170 		 * lock by calling mdi_rele_path()
   2171 		 */
   2172 		MDI_PI_HOLD(pip);
   2173 		MDI_PI_UNLOCK(pip);
   2174 		*ret_pip = pip;
   2175 		MDI_CLIENT_UNLOCK(ct);
   2176 		return (MDI_SUCCESS);
   2177 	}
   2178 
   2179 	/*
   2180 	 * for non default behavior, bypass current
   2181 	 * load balancing policy and always use LOAD_BALANCE_RR
   2182 	 * except that the start point will be adjusted based
   2183 	 * on the provided start_pip
   2184 	 */
   2185 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
   2186 
   2187 	switch (lbp) {
   2188 	case LOAD_BALANCE_NONE:
   2189 		/*
   2190 		 * Load balancing is None  or Alternate path mode
   2191 		 * Start looking for a online mdi_pathinfo node starting from
   2192 		 * last known selected path
   2193 		 */
   2194 		preferred = 1;
   2195 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
   2196 		if (pip == NULL) {
   2197 			pip = head;
   2198 		}
   2199 		start = pip;
   2200 		do {
   2201 			MDI_PI_LOCK(pip);
   2202 			/*
   2203 			 * No need to explicitly check if the path is disabled.
   2204 			 * Since we are checking for state == ONLINE and the
   2205 			 * same variable is used for DISABLE/ENABLE information.
   2206 			 */
   2207 			if ((MDI_PI(pip)->pi_state  ==
   2208 				MDI_PATHINFO_STATE_ONLINE) &&
   2209 				preferred == MDI_PI(pip)->pi_preferred) {
   2210 				/*
   2211 				 * Return the path in hold state. Caller should
   2212 				 * release the lock by calling mdi_rele_path()
   2213 				 */
   2214 				MDI_PI_HOLD(pip);
   2215 				MDI_PI_UNLOCK(pip);
   2216 				ct->ct_path_last = pip;
   2217 				*ret_pip = pip;
   2218 				MDI_CLIENT_UNLOCK(ct);
   2219 				return (MDI_SUCCESS);
   2220 			}
   2221 
   2222 			/*
   2223 			 * Path is busy.
   2224 			 */
   2225 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
   2226 			    MDI_PI_IS_TRANSIENT(pip))
   2227 				retry = 1;
   2228 			/*
   2229 			 * Keep looking for a next available online path
   2230 			 */
   2231 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2232 			if (next == NULL) {
   2233 				next = head;
   2234 			}
   2235 			MDI_PI_UNLOCK(pip);
   2236 			pip = next;
   2237 			if (start == pip && preferred) {
   2238 				preferred = 0;
   2239 			} else if (start == pip && !preferred) {
   2240 				cont = 0;
   2241 			}
   2242 		} while (cont);
   2243 		break;
   2244 
   2245 	case LOAD_BALANCE_LBA:
   2246 		/*
   2247 		 * Make sure we are looking
   2248 		 * for an online path. Otherwise, if it is for a STANDBY
   2249 		 * path request, it will go through and fetch an ONLINE
   2250 		 * path which is not desirable.
   2251 		 */
   2252 		if ((ct->ct_lb_args != NULL) &&
   2253 			    (ct->ct_lb_args->region_size) && bp &&
   2254 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
   2255 			if (i_mdi_lba_lb(ct, ret_pip, bp)
   2256 				    == MDI_SUCCESS) {
   2257 				MDI_CLIENT_UNLOCK(ct);
   2258 				return (MDI_SUCCESS);
   2259 			}
   2260 		}
   2261 		/* FALLTHROUGH */
   2262 	case LOAD_BALANCE_RR:
   2263 		/*
   2264 		 * Load balancing is Round Robin. Start looking for a online
   2265 		 * mdi_pathinfo node starting from last known selected path
   2266 		 * as the start point.  If override flags are specified,
   2267 		 * process accordingly.
   2268 		 * If the search is already in effect(start_pip not null),
   2269 		 * then lets just use the same path preference to continue the
   2270 		 * traversal.
   2271 		 */
   2272 
   2273 		if (start_pip != NULL) {
   2274 			preferred = MDI_PI(start_pip)->pi_preferred;
   2275 		} else {
   2276 			preferred = 1;
   2277 		}
   2278 
   2279 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
   2280 		if (start == NULL) {
   2281 			pip = head;
   2282 		} else {
   2283 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
   2284 			if (pip == NULL) {
   2285 				if ( flags & MDI_SELECT_NO_PREFERRED) {
   2286 					/*
   2287 					 * Return since we hit the end of list
   2288 					 */
   2289 					MDI_CLIENT_UNLOCK(ct);
   2290 					return (MDI_NOPATH);
   2291 				}
   2292 
   2293 				if (!sb) {
   2294 					if (preferred == 0) {
   2295 						/*
   2296 						 * Looks like we have completed
   2297 						 * the traversal as preferred
   2298 						 * value is 0. Time to bail out.
   2299 						 */
   2300 						*ret_pip = NULL;
   2301 						MDI_CLIENT_UNLOCK(ct);
   2302 						return (MDI_NOPATH);
   2303 					} else {
   2304 						/*
   2305 						 * Looks like we reached the
   2306 						 * end of the list. Lets enable
   2307 						 * traversal of non preferred
   2308 						 * paths.
   2309 						 */
   2310 						preferred = 0;
   2311 					}
   2312 				}
   2313 				pip = head;
   2314 			}
   2315 		}
   2316 		start = pip;
   2317 		do {
   2318 			MDI_PI_LOCK(pip);
   2319 			if (sb) {
   2320 				cond = ((MDI_PI(pip)->pi_state ==
   2321 				    MDI_PATHINFO_STATE_ONLINE &&
   2322 					MDI_PI(pip)->pi_preferred ==
   2323 						preferred) ? 1 : 0);
   2324 			} else {
   2325 				if (flags == MDI_SELECT_ONLINE_PATH) {
   2326 					cond = ((MDI_PI(pip)->pi_state ==
   2327 					    MDI_PATHINFO_STATE_ONLINE &&
   2328 						MDI_PI(pip)->pi_preferred ==
   2329 						preferred) ? 1 : 0);
   2330 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
   2331 					cond = ((MDI_PI(pip)->pi_state ==
   2332 					    MDI_PATHINFO_STATE_STANDBY &&
   2333 						MDI_PI(pip)->pi_preferred ==
   2334 						preferred) ? 1 : 0);
   2335 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
   2336 				    MDI_SELECT_STANDBY_PATH)) {
   2337 					cond = (((MDI_PI(pip)->pi_state ==
   2338 					    MDI_PATHINFO_STATE_ONLINE ||
   2339 					    (MDI_PI(pip)->pi_state ==
   2340 					    MDI_PATHINFO_STATE_STANDBY)) &&
   2341 						MDI_PI(pip)->pi_preferred ==
   2342 						preferred) ? 1 : 0);
   2343 				} else if (flags ==
   2344 					(MDI_SELECT_STANDBY_PATH |
   2345 					MDI_SELECT_ONLINE_PATH |
   2346 					MDI_SELECT_USER_DISABLE_PATH)) {
   2347 					cond = (((MDI_PI(pip)->pi_state ==
   2348 					    MDI_PATHINFO_STATE_ONLINE ||
   2349 					    (MDI_PI(pip)->pi_state ==
   2350 					    MDI_PATHINFO_STATE_STANDBY) ||
   2351 						(MDI_PI(pip)->pi_state ==
   2352 					    (MDI_PATHINFO_STATE_ONLINE|
   2353 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
   2354 						(MDI_PI(pip)->pi_state ==
   2355 					    (MDI_PATHINFO_STATE_STANDBY |
   2356 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
   2357 						MDI_PI(pip)->pi_preferred ==
   2358 						preferred) ? 1 : 0);
   2359 				} else if (flags ==
   2360 				    (MDI_SELECT_STANDBY_PATH |
   2361 				    MDI_SELECT_ONLINE_PATH |
   2362 				    MDI_SELECT_NO_PREFERRED)) {
   2363 					cond = (((MDI_PI(pip)->pi_state ==
   2364 					    MDI_PATHINFO_STATE_ONLINE) ||
   2365 					    (MDI_PI(pip)->pi_state ==
   2366 					    MDI_PATHINFO_STATE_STANDBY))
   2367 					    ? 1 : 0);
   2368 				} else {
   2369 					cond = 0;
   2370 				}
   2371 			}
   2372 			/*
   2373 			 * No need to explicitly check if the path is disabled.
   2374 			 * Since we are checking for state == ONLINE and the
   2375 			 * same variable is used for DISABLE/ENABLE information.
   2376 			 */
   2377 			if (cond) {
   2378 				/*
   2379 				 * Return the path in hold state. Caller should
   2380 				 * release the lock by calling mdi_rele_path()
   2381 				 */
   2382 				MDI_PI_HOLD(pip);
   2383 				MDI_PI_UNLOCK(pip);
   2384 				if (sb)
   2385 					ct->ct_path_last = pip;
   2386 				*ret_pip = pip;
   2387 				MDI_CLIENT_UNLOCK(ct);
   2388 				return (MDI_SUCCESS);
   2389 			}
   2390 			/*
   2391 			 * Path is busy.
   2392 			 */
   2393 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
   2394 			    MDI_PI_IS_TRANSIENT(pip))
   2395 				retry = 1;
   2396 
   2397 			/*
   2398 			 * Keep looking for a next available online path
   2399 			 */
   2400 do_again:
   2401 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2402 			if (next == NULL) {
   2403 				if ( flags & MDI_SELECT_NO_PREFERRED) {
   2404 					/*
   2405 					 * Bail out since we hit the end of list
   2406 					 */
   2407 					MDI_PI_UNLOCK(pip);
   2408 					break;
   2409 				}
   2410 
   2411 				if (!sb) {
   2412 					if (preferred == 1) {
   2413 						/*
   2414 						 * Looks like we reached the
   2415 						 * end of the list. Lets enable
   2416 						 * traversal of non preferred
   2417 						 * paths.
   2418 						 */
   2419 						preferred = 0;
   2420 						next = head;
   2421 					} else {
   2422 						/*
   2423 						 * We have done both the passes
   2424 						 * Preferred as well as for
   2425 						 * Non-preferred. Bail out now.
   2426 						 */
   2427 						cont = 0;
   2428 					}
   2429 				} else {
   2430 					/*
   2431 					 * Standard behavior case.
   2432 					 */
   2433 					next = head;
   2434 				}
   2435 			}
   2436 			MDI_PI_UNLOCK(pip);
   2437 			if (cont == 0) {
   2438 				break;
   2439 			}
   2440 			pip = next;
   2441 
   2442 			if (!sb) {
   2443 				/*
   2444 				 * We need to handle the selection of
   2445 				 * non-preferred path in the following
   2446 				 * case:
   2447 				 *
   2448 				 * +------+   +------+   +------+   +-----+
   2449 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
   2450 				 * +------+   +------+   +------+   +-----+
   2451 				 *
   2452 				 * If we start the search with B, we need to
   2453 				 * skip beyond B to pick C which is non -
   2454 				 * preferred in the second pass. The following
   2455 				 * test, if true, will allow us to skip over
   2456 				 * the 'start'(B in the example) to select
   2457 				 * other non preferred elements.
   2458 				 */
   2459 				if ((start_pip != NULL) && (start_pip == pip) &&
   2460 				    (MDI_PI(start_pip)->pi_preferred
   2461 				    != preferred)) {
   2462 					/*
   2463 					 * try again after going past the start
   2464 					 * pip
   2465 					 */
   2466 					MDI_PI_LOCK(pip);
   2467 					goto do_again;
   2468 				}
   2469 			} else {
   2470 				/*
   2471 				 * Standard behavior case
   2472 				 */
   2473 				if (start == pip && preferred) {
   2474 					/* look for nonpreferred paths */
   2475 					preferred = 0;
   2476 				} else if (start == pip && !preferred) {
   2477 					/*
   2478 					 * Exit condition
   2479 					 */
   2480 					cont = 0;
   2481 				}
   2482 			}
   2483 		} while (cont);
   2484 		break;
   2485 	}
   2486 
   2487 	MDI_CLIENT_UNLOCK(ct);
   2488 	if (retry == 1) {
   2489 		return (MDI_BUSY);
   2490 	} else {
   2491 		return (MDI_NOPATH);
   2492 	}
   2493 }
   2494 
   2495 /*
   2496  * For a client, return the next available path to any phci
   2497  *
   2498  * Note:
   2499  *		Caller should hold the branch's devinfo node to get a consistent
   2500  *		snap shot of the mdi_pathinfo nodes.
   2501  *
   2502  *		Please note that even the list is stable the mdi_pathinfo
   2503  *		node state and properties are volatile.  The caller should lock
   2504  *		and unlock the nodes by calling mdi_pi_lock() and
   2505  *		mdi_pi_unlock() functions to get a stable properties.
   2506  *
   2507  *		If there is a need to use the nodes beyond the hold of the
   2508  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
   2509  *		need to be held against unexpected removal by calling
   2510  *		mdi_hold_path() and should be released by calling
   2511  *		mdi_rele_path() on completion.
   2512  */
   2513 mdi_pathinfo_t *
   2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
   2515 {
   2516 	mdi_client_t *ct;
   2517 
   2518 	if (!MDI_CLIENT(ct_dip))
   2519 		return (NULL);
   2520 
   2521 	/*
   2522 	 * Walk through client link
   2523 	 */
   2524 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
   2525 	ASSERT(ct != NULL);
   2526 
   2527 	if (pip == NULL)
   2528 		return ((mdi_pathinfo_t *)ct->ct_path_head);
   2529 
   2530 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
   2531 }
   2532 
   2533 /*
   2534  * For a phci, return the next available path to any client
   2535  * Note: ditto mdi_get_next_phci_path()
   2536  */
   2537 mdi_pathinfo_t *
   2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
   2539 {
   2540 	mdi_phci_t *ph;
   2541 
   2542 	if (!MDI_PHCI(ph_dip))
   2543 		return (NULL);
   2544 
   2545 	/*
   2546 	 * Walk through pHCI link
   2547 	 */
   2548 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
   2549 	ASSERT(ph != NULL);
   2550 
   2551 	if (pip == NULL)
   2552 		return ((mdi_pathinfo_t *)ph->ph_path_head);
   2553 
   2554 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
   2555 }
   2556 
   2557 /*
   2558  * mdi_hold_path():
   2559  *		Hold the mdi_pathinfo node against unwanted unexpected free.
   2560  * Return Values:
   2561  *		None
   2562  */
   2563 void
   2564 mdi_hold_path(mdi_pathinfo_t *pip)
   2565 {
   2566 	if (pip) {
   2567 		MDI_PI_LOCK(pip);
   2568 		MDI_PI_HOLD(pip);
   2569 		MDI_PI_UNLOCK(pip);
   2570 	}
   2571 }
   2572 
   2573 
   2574 /*
   2575  * mdi_rele_path():
   2576  *		Release the mdi_pathinfo node which was selected
   2577  *		through mdi_select_path() mechanism or manually held by
   2578  *		calling mdi_hold_path().
   2579  * Return Values:
   2580  *		None
   2581  */
   2582 void
   2583 mdi_rele_path(mdi_pathinfo_t *pip)
   2584 {
   2585 	if (pip) {
   2586 		MDI_PI_LOCK(pip);
   2587 		MDI_PI_RELE(pip);
   2588 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
   2589 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
   2590 		}
   2591 		MDI_PI_UNLOCK(pip);
   2592 	}
   2593 }
   2594 
   2595 /*
   2596  * mdi_pi_lock():
   2597  * 		Lock the mdi_pathinfo node.
   2598  * Note:
   2599  *		The caller should release the lock by calling mdi_pi_unlock()
   2600  */
   2601 void
   2602 mdi_pi_lock(mdi_pathinfo_t *pip)
   2603 {
   2604 	ASSERT(pip != NULL);
   2605 	if (pip) {
   2606 		MDI_PI_LOCK(pip);
   2607 	}
   2608 }
   2609 
   2610 
   2611 /*
   2612  * mdi_pi_unlock():
   2613  * 		Unlock the mdi_pathinfo node.
   2614  * Note:
   2615  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
   2616  */
   2617 void
   2618 mdi_pi_unlock(mdi_pathinfo_t *pip)
   2619 {
   2620 	ASSERT(pip != NULL);
   2621 	if (pip) {
   2622 		MDI_PI_UNLOCK(pip);
   2623 	}
   2624 }
   2625 
   2626 /*
   2627  * mdi_pi_find():
   2628  *		Search the list of mdi_pathinfo nodes attached to the
   2629  *		pHCI/Client device node whose path address matches "paddr".
   2630  *		Returns a pointer to the mdi_pathinfo node if a matching node is
   2631  *		found.
   2632  * Return Values:
   2633  *		mdi_pathinfo node handle
   2634  *		NULL
   2635  * Notes:
   2636  *		Caller need not hold any locks to call this function.
   2637  */
   2638 mdi_pathinfo_t *
   2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
   2640 {
   2641 	mdi_phci_t		*ph;
   2642 	mdi_vhci_t		*vh;
   2643 	mdi_client_t		*ct;
   2644 	mdi_pathinfo_t		*pip = NULL;
   2645 
   2646 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2647 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
   2648 	if ((pdip == NULL) || (paddr == NULL)) {
   2649 		return (NULL);
   2650 	}
   2651 	ph = i_devi_get_phci(pdip);
   2652 	if (ph == NULL) {
   2653 		/*
   2654 		 * Invalid pHCI device, Nothing more to do.
   2655 		 */
   2656 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
   2657 		return (NULL);
   2658 	}
   2659 
   2660 	vh = ph->ph_vhci;
   2661 	if (vh == NULL) {
   2662 		/*
   2663 		 * Invalid vHCI device, Nothing more to do.
   2664 		 */
   2665 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
   2666 		return (NULL);
   2667 	}
   2668 
   2669 	/*
   2670 	 * Look for pathinfo node identified by paddr.
   2671 	 */
   2672 	if (caddr == NULL) {
   2673 		/*
   2674 		 * Find a mdi_pathinfo node under pHCI list for a matching
   2675 		 * unit address.
   2676 		 */
   2677 		MDI_PHCI_LOCK(ph);
   2678 		if (MDI_PHCI_IS_OFFLINE(ph)) {
   2679 			MDI_DEBUG(2, (MDI_WARN, pdip,
   2680 			    "offline phci %p", (void *)ph));
   2681 			MDI_PHCI_UNLOCK(ph);
   2682 			return (NULL);
   2683 		}
   2684 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
   2685 
   2686 		while (pip != NULL) {
   2687 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2688 				break;
   2689 			}
   2690 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   2691 		}
   2692 		MDI_PHCI_UNLOCK(ph);
   2693 		MDI_DEBUG(2, (MDI_NOTE, pdip,
   2694 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
   2695 		return (pip);
   2696 	}
   2697 
   2698 	/*
   2699 	 * XXX - Is the rest of the code in this function really necessary?
   2700 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
   2701 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
   2702 	 * whether the search is based on the pathinfo nodes attached to
   2703 	 * the pHCI or the client node, the result will be the same.
   2704 	 */
   2705 
   2706 	/*
   2707 	 * Find the client device corresponding to 'caddr'
   2708 	 */
   2709 	MDI_VHCI_CLIENT_LOCK(vh);
   2710 
   2711 	/*
   2712 	 * XXX - Passing NULL to the following function works as long as the
   2713 	 * the client addresses (caddr) are unique per vhci basis.
   2714 	 */
   2715 	ct = i_mdi_client_find(vh, NULL, caddr);
   2716 	if (ct == NULL) {
   2717 		/*
   2718 		 * Client not found, Obviously mdi_pathinfo node has not been
   2719 		 * created yet.
   2720 		 */
   2721 		MDI_VHCI_CLIENT_UNLOCK(vh);
   2722 		MDI_DEBUG(2, (MDI_NOTE, pdip,
   2723 		    "client not found for caddr @%s", caddr ? caddr : ""));
   2724 		return (NULL);
   2725 	}
   2726 
   2727 	/*
   2728 	 * Hold the client lock and look for a mdi_pathinfo node with matching
   2729 	 * pHCI and paddr
   2730 	 */
   2731 	MDI_CLIENT_LOCK(ct);
   2732 
   2733 	/*
   2734 	 * Release the global mutex as it is no more needed. Note: We always
   2735 	 * respect the locking order while acquiring.
   2736 	 */
   2737 	MDI_VHCI_CLIENT_UNLOCK(vh);
   2738 
   2739 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   2740 	while (pip != NULL) {
   2741 		/*
   2742 		 * Compare the unit address
   2743 		 */
   2744 		if ((MDI_PI(pip)->pi_phci == ph) &&
   2745 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2746 			break;
   2747 		}
   2748 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2749 	}
   2750 	MDI_CLIENT_UNLOCK(ct);
   2751 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2752 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
   2753 	return (pip);
   2754 }
   2755 
   2756 /*
   2757  * mdi_pi_alloc():
   2758  *		Allocate and initialize a new instance of a mdi_pathinfo node.
   2759  *		The mdi_pathinfo node returned by this function identifies a
   2760  *		unique device path is capable of having properties attached
   2761  *		and passed to mdi_pi_online() to fully attach and online the
   2762  *		path and client device node.
   2763  *		The mdi_pathinfo node returned by this function must be
   2764  *		destroyed using mdi_pi_free() if the path is no longer
   2765  *		operational or if the caller fails to attach a client device
   2766  *		node when calling mdi_pi_online(). The framework will not free
   2767  *		the resources allocated.
   2768  *		This function can be called from both interrupt and kernel
   2769  *		contexts.  DDI_NOSLEEP flag should be used while calling
   2770  *		from interrupt contexts.
   2771  * Return Values:
   2772  *		MDI_SUCCESS
   2773  *		MDI_FAILURE
   2774  *		MDI_NOMEM
   2775  */
   2776 /*ARGSUSED*/
   2777 int
   2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
   2779     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
   2780 {
   2781 	mdi_vhci_t	*vh;
   2782 	mdi_phci_t	*ph;
   2783 	mdi_client_t	*ct;
   2784 	mdi_pathinfo_t	*pip = NULL;
   2785 	dev_info_t	*cdip;
   2786 	int		rv = MDI_NOMEM;
   2787 	int		path_allocated = 0;
   2788 
   2789 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2790 	    "cname %s: caddr@%s paddr@%s",
   2791 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
   2792 
   2793 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
   2794 	    ret_pip == NULL) {
   2795 		/* Nothing more to do */
   2796 		return (MDI_FAILURE);
   2797 	}
   2798 
   2799 	*ret_pip = NULL;
   2800 
   2801 	/* No allocations on detaching pHCI */
   2802 	if (DEVI_IS_DETACHING(pdip)) {
   2803 		/* Invalid pHCI device, return failure */
   2804 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2805 		    "!detaching pHCI=%p", (void *)pdip));
   2806 		return (MDI_FAILURE);
   2807 	}
   2808 
   2809 	ph = i_devi_get_phci(pdip);
   2810 	ASSERT(ph != NULL);
   2811 	if (ph == NULL) {
   2812 		/* Invalid pHCI device, return failure */
   2813 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2814 		    "!invalid pHCI=%p", (void *)pdip));
   2815 		return (MDI_FAILURE);
   2816 	}
   2817 
   2818 	MDI_PHCI_LOCK(ph);
   2819 	vh = ph->ph_vhci;
   2820 	if (vh == NULL) {
   2821 		/* Invalid vHCI device, return failure */
   2822 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2823 		    "!invalid vHCI=%p", (void *)pdip));
   2824 		MDI_PHCI_UNLOCK(ph);
   2825 		return (MDI_FAILURE);
   2826 	}
   2827 
   2828 	if (MDI_PHCI_IS_READY(ph) == 0) {
   2829 		/*
   2830 		 * Do not allow new node creation when pHCI is in
   2831 		 * offline/suspended states
   2832 		 */
   2833 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2834 		    "pHCI=%p is not ready", (void *)ph));
   2835 		MDI_PHCI_UNLOCK(ph);
   2836 		return (MDI_BUSY);
   2837 	}
   2838 	MDI_PHCI_UNSTABLE(ph);
   2839 	MDI_PHCI_UNLOCK(ph);
   2840 
   2841 	/* look for a matching client, create one if not found */
   2842 	MDI_VHCI_CLIENT_LOCK(vh);
   2843 	ct = i_mdi_client_find(vh, cname, caddr);
   2844 	if (ct == NULL) {
   2845 		ct = i_mdi_client_alloc(vh, cname, caddr);
   2846 		ASSERT(ct != NULL);
   2847 	}
   2848 
   2849 	if (ct->ct_dip == NULL) {
   2850 		/*
   2851 		 * Allocate a devinfo node
   2852 		 */
   2853 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
   2854 		    compatible, ncompatible);
   2855 		if (ct->ct_dip == NULL) {
   2856 			(void) i_mdi_client_free(vh, ct);
   2857 			goto fail;
   2858 		}
   2859 	}
   2860 	cdip = ct->ct_dip;
   2861 
   2862 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
   2863 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
   2864 
   2865 	MDI_CLIENT_LOCK(ct);
   2866 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   2867 	while (pip != NULL) {
   2868 		/*
   2869 		 * Compare the unit address
   2870 		 */
   2871 		if ((MDI_PI(pip)->pi_phci == ph) &&
   2872 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2873 			break;
   2874 		}
   2875 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2876 	}
   2877 	MDI_CLIENT_UNLOCK(ct);
   2878 
   2879 	if (pip == NULL) {
   2880 		/*
   2881 		 * This is a new path for this client device.  Allocate and
   2882 		 * initialize a new pathinfo node
   2883 		 */
   2884 		pip = i_mdi_pi_alloc(ph, paddr, ct);
   2885 		ASSERT(pip != NULL);
   2886 		path_allocated = 1;
   2887 	}
   2888 	rv = MDI_SUCCESS;
   2889 
   2890 fail:
   2891 	/*
   2892 	 * Release the global mutex.
   2893 	 */
   2894 	MDI_VHCI_CLIENT_UNLOCK(vh);
   2895 
   2896 	/*
   2897 	 * Mark the pHCI as stable
   2898 	 */
   2899 	MDI_PHCI_LOCK(ph);
   2900 	MDI_PHCI_STABLE(ph);
   2901 	MDI_PHCI_UNLOCK(ph);
   2902 	*ret_pip = pip;
   2903 
   2904 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2905 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
   2906 
   2907 	if (path_allocated)
   2908 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
   2909 
   2910 	return (rv);
   2911 }
   2912 
   2913 /*ARGSUSED*/
   2914 int
   2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
   2916     int flags, mdi_pathinfo_t **ret_pip)
   2917 {
   2918 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
   2919 	    flags, ret_pip));
   2920 }
   2921 
   2922 /*
   2923  * i_mdi_pi_alloc():
   2924  *		Allocate a mdi_pathinfo node and add to the pHCI path list
   2925  * Return Values:
   2926  *		mdi_pathinfo
   2927  */
   2928 /*ARGSUSED*/
   2929 static mdi_pathinfo_t *
   2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
   2931 {
   2932 	mdi_pathinfo_t	*pip;
   2933 	int		ct_circular;
   2934 	int		ph_circular;
   2935 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
   2936 	char		*path_persistent;
   2937 	int		path_instance;
   2938 	mod_hash_val_t	hv;
   2939 
   2940 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
   2941 
   2942 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
   2943 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
   2944 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
   2945 	    MDI_PATHINFO_STATE_TRANSIENT;
   2946 
   2947 	if (MDI_PHCI_IS_USER_DISABLED(ph))
   2948 		MDI_PI_SET_USER_DISABLE(pip);
   2949 
   2950 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
   2951 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
   2952 
   2953 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
   2954 		MDI_PI_SET_DRV_DISABLE(pip);
   2955 
   2956 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
   2957 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
   2958 	MDI_PI(pip)->pi_client = ct;
   2959 	MDI_PI(pip)->pi_phci = ph;
   2960 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
   2961 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
   2962 
   2963         /*
   2964 	 * We form the "path" to the pathinfo node, and see if we have
   2965 	 * already allocated a 'path_instance' for that "path".  If so,
   2966 	 * we use the already allocated 'path_instance'.  If not, we
   2967 	 * allocate a new 'path_instance' and associate it with a copy of
   2968 	 * the "path" string (which is never freed). The association
   2969 	 * between a 'path_instance' this "path" string persists until
   2970 	 * reboot.
   2971 	 */
   2972         mutex_enter(&mdi_pathmap_mutex);
   2973 	(void) ddi_pathname(ph->ph_dip, path);
   2974 	(void) sprintf(path + strlen(path), "/%s@%s",
   2975 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
   2976         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
   2977                 path_instance = (uint_t)(intptr_t)hv;
   2978         } else {
   2979 		/* allocate a new 'path_instance' and persistent "path" */
   2980 		path_instance = mdi_pathmap_instance++;
   2981 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
   2982                 (void) mod_hash_insert(mdi_pathmap_bypath,
   2983                     (mod_hash_key_t)path_persistent,
   2984                     (mod_hash_val_t)(intptr_t)path_instance);
   2985 		(void) mod_hash_insert(mdi_pathmap_byinstance,
   2986 		    (mod_hash_key_t)(intptr_t)path_instance,
   2987 		    (mod_hash_val_t)path_persistent);
   2988 
   2989 		/* create shortpath name */
   2990 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
   2991 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
   2992 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
   2993 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
   2994 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
   2995 		    (mod_hash_key_t)(intptr_t)path_instance,
   2996 		    (mod_hash_val_t)path_persistent);
   2997         }
   2998         mutex_exit(&mdi_pathmap_mutex);
   2999 	MDI_PI(pip)->pi_path_instance = path_instance;
   3000 
   3001 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
   3002 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
   3003 	MDI_PI(pip)->pi_pprivate = NULL;
   3004 	MDI_PI(pip)->pi_cprivate = NULL;
   3005 	MDI_PI(pip)->pi_vprivate = NULL;
   3006 	MDI_PI(pip)->pi_client_link = NULL;
   3007 	MDI_PI(pip)->pi_phci_link = NULL;
   3008 	MDI_PI(pip)->pi_ref_cnt = 0;
   3009 	MDI_PI(pip)->pi_kstats = NULL;
   3010 	MDI_PI(pip)->pi_preferred = 1;
   3011 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
   3012 
   3013 	/*
   3014 	 * Lock both dev_info nodes against changes in parallel.
   3015 	 *
   3016 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
   3017 	 * This atypical operation is done to synchronize pathinfo nodes
   3018 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
   3019 	 * the pathinfo nodes are children of the Client.
   3020 	 */
   3021 	ndi_devi_enter(ct->ct_dip, &ct_circular);
   3022 	ndi_devi_enter(ph->ph_dip, &ph_circular);
   3023 
   3024 	i_mdi_phci_add_path(ph, pip);
   3025 	i_mdi_client_add_path(ct, pip);
   3026 
   3027 	ndi_devi_exit(ph->ph_dip, ph_circular);
   3028 	ndi_devi_exit(ct->ct_dip, ct_circular);
   3029 
   3030 	return (pip);
   3031 }
   3032 
   3033 /*
   3034  * mdi_pi_pathname_by_instance():
   3035  *	Lookup of "path" by 'path_instance'. Return "path".
   3036  *	NOTE: returned "path" remains valid forever (until reboot).
   3037  */
   3038 char *
   3039 mdi_pi_pathname_by_instance(int path_instance)
   3040 {
   3041 	char		*path;
   3042 	mod_hash_val_t	hv;
   3043 
   3044 	/* mdi_pathmap lookup of "path" by 'path_instance' */
   3045 	mutex_enter(&mdi_pathmap_mutex);
   3046 	if (mod_hash_find(mdi_pathmap_byinstance,
   3047 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
   3048 		path = (char *)hv;
   3049 	else
   3050 		path = NULL;
   3051 	mutex_exit(&mdi_pathmap_mutex);
   3052 	return (path);
   3053 }
   3054 
   3055 /*
   3056  * mdi_pi_spathname_by_instance():
   3057  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
   3058  *	NOTE: returned "shortpath" remains valid forever (until reboot).
   3059  */
   3060 char *
   3061 mdi_pi_spathname_by_instance(int path_instance)
   3062 {
   3063 	char		*path;
   3064 	mod_hash_val_t	hv;
   3065 
   3066 	/* mdi_pathmap lookup of "path" by 'path_instance' */
   3067 	mutex_enter(&mdi_pathmap_mutex);
   3068 	if (mod_hash_find(mdi_pathmap_sbyinstance,
   3069 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
   3070 		path = (char *)hv;
   3071 	else
   3072 		path = NULL;
   3073 	mutex_exit(&mdi_pathmap_mutex);
   3074 	return (path);
   3075 }
   3076 
   3077 
   3078 /*
   3079  * i_mdi_phci_add_path():
   3080  * 		Add a mdi_pathinfo node to pHCI list.
   3081  * Notes:
   3082  *		Caller should per-pHCI mutex
   3083  */
   3084 static void
   3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   3086 {
   3087 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
   3088 
   3089 	MDI_PHCI_LOCK(ph);
   3090 	if (ph->ph_path_head == NULL) {
   3091 		ph->ph_path_head = pip;
   3092 	} else {
   3093 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
   3094 	}
   3095 	ph->ph_path_tail = pip;
   3096 	ph->ph_path_count++;
   3097 	MDI_PHCI_UNLOCK(ph);
   3098 }
   3099 
   3100 /*
   3101  * i_mdi_client_add_path():
   3102  *		Add mdi_pathinfo node to client list
   3103  */
   3104 static void
   3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
   3106 {
   3107 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
   3108 
   3109 	MDI_CLIENT_LOCK(ct);
   3110 	if (ct->ct_path_head == NULL) {
   3111 		ct->ct_path_head = pip;
   3112 	} else {
   3113 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
   3114 	}
   3115 	ct->ct_path_tail = pip;
   3116 	ct->ct_path_count++;
   3117 	MDI_CLIENT_UNLOCK(ct);
   3118 }
   3119 
   3120 /*
   3121  * mdi_pi_free():
   3122  *		Free the mdi_pathinfo node and also client device node if this
   3123  *		is the last path to the device
   3124  * Return Values:
   3125  *		MDI_SUCCESS
   3126  *		MDI_FAILURE
   3127  *		MDI_BUSY
   3128  */
   3129 /*ARGSUSED*/
   3130 int
   3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
   3132 {
   3133 	int		rv = MDI_FAILURE;
   3134 	mdi_vhci_t	*vh;
   3135 	mdi_phci_t	*ph;
   3136 	mdi_client_t	*ct;
   3137 	int		(*f)();
   3138 	int		client_held = 0;
   3139 
   3140 	MDI_PI_LOCK(pip);
   3141 	ph = MDI_PI(pip)->pi_phci;
   3142 	ASSERT(ph != NULL);
   3143 	if (ph == NULL) {
   3144 		/*
   3145 		 * Invalid pHCI device, return failure
   3146 		 */
   3147 		MDI_DEBUG(1, (MDI_WARN, NULL,
   3148 		    "!invalid pHCI: pip %s %p",
   3149 		    mdi_pi_spathname(pip), (void *)pip));
   3150 		MDI_PI_UNLOCK(pip);
   3151 		return (MDI_FAILURE);
   3152 	}
   3153 
   3154 	vh = ph->ph_vhci;
   3155 	ASSERT(vh != NULL);
   3156 	if (vh == NULL) {
   3157 		/* Invalid pHCI device, return failure */
   3158 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3159 		    "!invalid vHCI: pip %s %p",
   3160 		    mdi_pi_spathname(pip), (void *)pip));
   3161 		MDI_PI_UNLOCK(pip);
   3162 		return (MDI_FAILURE);
   3163 	}
   3164 
   3165 	ct = MDI_PI(pip)->pi_client;
   3166 	ASSERT(ct != NULL);
   3167 	if (ct == NULL) {
   3168 		/*
   3169 		 * Invalid Client device, return failure
   3170 		 */
   3171 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3172 		    "!invalid client: pip %s %p",
   3173 		    mdi_pi_spathname(pip), (void *)pip));
   3174 		MDI_PI_UNLOCK(pip);
   3175 		return (MDI_FAILURE);
   3176 	}
   3177 
   3178 	/*
   3179 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
   3180 	 * if the node state is either offline or init and the reference count
   3181 	 * is zero.
   3182 	 */
   3183 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
   3184 	    MDI_PI_IS_INITING(pip))) {
   3185 		/*
   3186 		 * Node is busy
   3187 		 */
   3188 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3189 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
   3190 		MDI_PI_UNLOCK(pip);
   3191 		return (MDI_BUSY);
   3192 	}
   3193 
   3194 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
   3195 		/*
   3196 		 * Give a chance for pending I/Os to complete.
   3197 		 */
   3198 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3199 		    "!%d cmds still pending on path: %s %p",
   3200 		    MDI_PI(pip)->pi_ref_cnt,
   3201 		    mdi_pi_spathname(pip), (void *)pip));
   3202 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
   3203 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
   3204 		    TR_CLOCK_TICK) == -1) {
   3205 			/*
   3206 			 * The timeout time reached without ref_cnt being zero
   3207 			 * being signaled.
   3208 			 */
   3209 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3210 			    "!Timeout reached on path %s %p without the cond",
   3211 			    mdi_pi_spathname(pip), (void *)pip));
   3212 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3213 			    "!%d cmds still pending on path %s %p",
   3214 			    MDI_PI(pip)->pi_ref_cnt,
   3215 			    mdi_pi_spathname(pip), (void *)pip));
   3216 			MDI_PI_UNLOCK(pip);
   3217 			return (MDI_BUSY);
   3218 		}
   3219 	}
   3220 	if (MDI_PI(pip)->pi_pm_held) {
   3221 		client_held = 1;
   3222 	}
   3223 	MDI_PI_UNLOCK(pip);
   3224 
   3225 	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
   3226 
   3227 	MDI_CLIENT_LOCK(ct);
   3228 
   3229 	/* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
   3230 	MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
   3231 
   3232 	/*
   3233 	 * Wait till failover is complete before removing this node.
   3234 	 */
   3235 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
   3236 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
   3237 
   3238 	MDI_CLIENT_UNLOCK(ct);
   3239 	MDI_VHCI_CLIENT_LOCK(vh);
   3240 	MDI_CLIENT_LOCK(ct);
   3241 	MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
   3242 
   3243 	if (!MDI_PI_IS_INITING(pip)) {
   3244 		f = vh->vh_ops->vo_pi_uninit;
   3245 		if (f != NULL) {
   3246 			rv = (*f)(vh->vh_dip, pip, 0);
   3247 		}
   3248 	}
   3249 	/*
   3250 	 * If vo_pi_uninit() completed successfully.
   3251 	 */
   3252 	if (rv == MDI_SUCCESS) {
   3253 		if (client_held) {
   3254 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3255 			    "i_mdi_pm_rele_client\n"));
   3256 			i_mdi_pm_rele_client(ct, 1);
   3257 		}
   3258 		i_mdi_pi_free(ph, pip, ct);
   3259 		if (ct->ct_path_count == 0) {
   3260 			/*
   3261 			 * Client lost its last path.
   3262 			 * Clean up the client device
   3263 			 */
   3264 			MDI_CLIENT_UNLOCK(ct);
   3265 			(void) i_mdi_client_free(ct->ct_vhci, ct);
   3266 			MDI_VHCI_CLIENT_UNLOCK(vh);
   3267 			return (rv);
   3268 		}
   3269 	}
   3270 	MDI_CLIENT_UNLOCK(ct);
   3271 	MDI_VHCI_CLIENT_UNLOCK(vh);
   3272 
   3273 	if (rv == MDI_FAILURE)
   3274 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
   3275 
   3276 	return (rv);
   3277 }
   3278 
   3279 /*
   3280  * i_mdi_pi_free():
   3281  *		Free the mdi_pathinfo node
   3282  */
   3283 static void
   3284 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
   3285 {
   3286 	int	ct_circular;
   3287 	int	ph_circular;
   3288 
   3289 	ASSERT(MDI_CLIENT_LOCKED(ct));
   3290 
   3291 	/*
   3292 	 * remove any per-path kstats
   3293 	 */
   3294 	i_mdi_pi_kstat_destroy(pip);
   3295 
   3296 	/* See comments in i_mdi_pi_alloc() */
   3297 	ndi_devi_enter(ct->ct_dip, &ct_circular);
   3298 	ndi_devi_enter(ph->ph_dip, &ph_circular);
   3299 
   3300 	i_mdi_client_remove_path(ct, pip);
   3301 	i_mdi_phci_remove_path(ph, pip);
   3302 
   3303 	ndi_devi_exit(ph->ph_dip, ph_circular);
   3304 	ndi_devi_exit(ct->ct_dip, ct_circular);
   3305 
   3306 	mutex_destroy(&MDI_PI(pip)->pi_mutex);
   3307 	cv_destroy(&MDI_PI(pip)->pi_state_cv);
   3308 	cv_destroy(&MDI_PI(pip)->pi_ref_cv);
   3309 	if (MDI_PI(pip)->pi_addr) {
   3310 		kmem_free(MDI_PI(pip)->pi_addr,
   3311 		    strlen(MDI_PI(pip)->pi_addr) + 1);
   3312 		MDI_PI(pip)->pi_addr = NULL;
   3313 	}
   3314 
   3315 	if (MDI_PI(pip)->pi_prop) {
   3316 		(void) nvlist_free(MDI_PI(pip)->pi_prop);
   3317 		MDI_PI(pip)->pi_prop = NULL;
   3318 	}
   3319 	kmem_free(pip, sizeof (struct mdi_pathinfo));
   3320 }
   3321 
   3322 
   3323 /*
   3324  * i_mdi_phci_remove_path():
   3325  * 		Remove a mdi_pathinfo node from pHCI list.
   3326  * Notes:
   3327  *		Caller should hold per-pHCI mutex
   3328  */
   3329 static void
   3330 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   3331 {
   3332 	mdi_pathinfo_t	*prev = NULL;
   3333 	mdi_pathinfo_t	*path = NULL;
   3334 
   3335 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
   3336 
   3337 	MDI_PHCI_LOCK(ph);
   3338 	path = ph->ph_path_head;
   3339 	while (path != NULL) {
   3340 		if (path == pip) {
   3341 			break;
   3342 		}
   3343 		prev = path;
   3344 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
   3345 	}
   3346 
   3347 	if (path) {
   3348 		ph->ph_path_count--;
   3349 		if (prev) {
   3350 			MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
   3351 		} else {
   3352 			ph->ph_path_head =
   3353 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
   3354 		}
   3355 		if (ph->ph_path_tail == path) {
   3356 			ph->ph_path_tail = prev;
   3357 		}
   3358 	}
   3359 
   3360 	/*
   3361 	 * Clear the pHCI link
   3362 	 */
   3363 	MDI_PI(pip)->pi_phci_link = NULL;
   3364 	MDI_PI(pip)->pi_phci = NULL;
   3365 	MDI_PHCI_UNLOCK(ph);
   3366 }
   3367 
   3368 /*
   3369  * i_mdi_client_remove_path():
   3370  * 		Remove a mdi_pathinfo node from client path list.
   3371  */
   3372 static void
   3373 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
   3374 {
   3375 	mdi_pathinfo_t	*prev = NULL;
   3376 	mdi_pathinfo_t	*path;
   3377 
   3378 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
   3379 
   3380 	ASSERT(MDI_CLIENT_LOCKED(ct));
   3381 	path = ct->ct_path_head;
   3382 	while (path != NULL) {
   3383 		if (path == pip) {
   3384 			break;
   3385 		}
   3386 		prev = path;
   3387 		path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
   3388 	}
   3389 
   3390 	if (path) {
   3391 		ct->ct_path_count--;
   3392 		if (prev) {
   3393 			MDI_PI(prev)->pi_client_link =
   3394 			    MDI_PI(path)->pi_client_link;
   3395 		} else {
   3396 			ct->ct_path_head =
   3397 			    (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
   3398 		}
   3399 		if (ct->ct_path_tail == path) {
   3400 			ct->ct_path_tail = prev;
   3401 		}
   3402 		if (ct->ct_path_last == path) {
   3403 			ct->ct_path_last = ct->ct_path_head;
   3404 		}
   3405 	}
   3406 	MDI_PI(pip)->pi_client_link = NULL;
   3407 	MDI_PI(pip)->pi_client = NULL;
   3408 }
   3409 
   3410 /*
   3411  * i_mdi_pi_state_change():
   3412  *		online a mdi_pathinfo node
   3413  *
   3414  * Return Values:
   3415  *		MDI_SUCCESS
   3416  *		MDI_FAILURE
   3417  */
   3418 /*ARGSUSED*/
   3419 static int
   3420 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
   3421 {
   3422 	int		rv = MDI_SUCCESS;
   3423 	mdi_vhci_t	*vh;
   3424 	mdi_phci_t	*ph;
   3425 	mdi_client_t	*ct;
   3426 	int		(*f)();
   3427 	dev_info_t	*cdip;
   3428 
   3429 	MDI_PI_LOCK(pip);
   3430 
   3431 	ph = MDI_PI(pip)->pi_phci;
   3432 	ASSERT(ph);
   3433 	if (ph == NULL) {
   3434 		/*
   3435 		 * Invalid pHCI device, fail the request
   3436 		 */
   3437 		MDI_PI_UNLOCK(pip);
   3438 		MDI_DEBUG(1, (MDI_WARN, NULL,
   3439 		    "!invalid phci: pip %s %p",
   3440 		    mdi_pi_spathname(pip), (void *)pip));
   3441 		return (MDI_FAILURE);
   3442 	}
   3443 
   3444 	vh = ph->ph_vhci;
   3445 	ASSERT(vh);
   3446 	if (vh == NULL) {
   3447 		/*
   3448 		 * Invalid vHCI device, fail the request
   3449 		 */
   3450 		MDI_PI_UNLOCK(pip);
   3451 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3452 		    "!invalid vhci: pip %s %p",
   3453 		    mdi_pi_spathname(pip), (void *)pip));
   3454 		return (MDI_FAILURE);
   3455 	}
   3456 
   3457 	ct = MDI_PI(pip)->pi_client;
   3458 	ASSERT(ct != NULL);
   3459 	if (ct == NULL) {
   3460 		/*
   3461 		 * Invalid client device, fail the request
   3462 		 */
   3463 		MDI_PI_UNLOCK(pip);
   3464 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3465 		    "!invalid client: pip %s %p",
   3466 		    mdi_pi_spathname(pip), (void *)pip));
   3467 		return (MDI_FAILURE);
   3468 	}
   3469 
   3470 	/*
   3471 	 * If this path has not been initialized yet, Callback vHCI driver's
   3472 	 * pathinfo node initialize entry point
   3473 	 */
   3474 
   3475 	if (MDI_PI_IS_INITING(pip)) {
   3476 		MDI_PI_UNLOCK(pip);
   3477 		f = vh->vh_ops->vo_pi_init;
   3478 		if (f != NULL) {
   3479 			rv = (*f)(vh->vh_dip, pip, 0);
   3480 			if (rv != MDI_SUCCESS) {
   3481 				MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3482 				    "!vo_pi_init failed: vHCI %p, pip %s %p",
   3483 				    (void *)vh, mdi_pi_spathname(pip),
   3484 				    (void *)pip));
   3485 				return (MDI_FAILURE);
   3486 			}
   3487 		}
   3488 		MDI_PI_LOCK(pip);
   3489 		MDI_PI_CLEAR_TRANSIENT(pip);
   3490 	}
   3491 
   3492 	/*
   3493 	 * Do not allow state transition when pHCI is in offline/suspended
   3494 	 * states
   3495 	 */
   3496 	i_mdi_phci_lock(ph, pip);
   3497 	if (MDI_PHCI_IS_READY(ph) == 0) {
   3498 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3499 		    "!pHCI not ready, pHCI=%p", (void *)ph));
   3500 		MDI_PI_UNLOCK(pip);
   3501 		i_mdi_phci_unlock(ph);
   3502 		return (MDI_BUSY);
   3503 	}
   3504 	MDI_PHCI_UNSTABLE(ph);
   3505 	i_mdi_phci_unlock(ph);
   3506 
   3507 	/*
   3508 	 * Check if mdi_pathinfo state is in transient state.
   3509 	 * If yes, offlining is in progress and wait till transient state is
   3510 	 * cleared.
   3511 	 */
   3512 	if (MDI_PI_IS_TRANSIENT(pip)) {
   3513 		while (MDI_PI_IS_TRANSIENT(pip)) {
   3514 			cv_wait(&MDI_PI(pip)->pi_state_cv,
   3515 			    &MDI_PI(pip)->pi_mutex);
   3516 		}
   3517 	}
   3518 
   3519 	/*
   3520 	 * Grab the client lock in reverse order sequence and release the
   3521 	 * mdi_pathinfo mutex.
   3522 	 */
   3523 	i_mdi_client_lock(ct, pip);
   3524 	MDI_PI_UNLOCK(pip);
   3525 
   3526 	/*
   3527 	 * Wait till failover state is cleared
   3528 	 */
   3529 	while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
   3530 		cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
   3531 
   3532 	/*
   3533 	 * Mark the mdi_pathinfo node state as transient
   3534 	 */
   3535 	MDI_PI_LOCK(pip);
   3536 	switch (state) {
   3537 	case MDI_PATHINFO_STATE_ONLINE:
   3538 		MDI_PI_SET_ONLINING(pip);
   3539 		break;
   3540 
   3541 	case MDI_PATHINFO_STATE_STANDBY:
   3542 		MDI_PI_SET_STANDBYING(pip);
   3543 		break;
   3544 
   3545 	case MDI_PATHINFO_STATE_FAULT:
   3546 		/*
   3547 		 * Mark the pathinfo state as FAULTED
   3548 		 */
   3549 		MDI_PI_SET_FAULTING(pip);
   3550 		MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
   3551 		break;
   3552 
   3553 	case MDI_PATHINFO_STATE_OFFLINE:
   3554 		/*
   3555 		 * ndi_devi_offline() cannot hold pip or ct locks.
   3556 		 */
   3557 		MDI_PI_UNLOCK(pip);
   3558 
   3559 		/*
   3560 		 * If this is a user initiated path online->offline operation
   3561 		 * who's success would transition a client from DEGRADED to
   3562 		 * FAILED then only proceed if we can offline the client first.
   3563 		 */
   3564 		cdip = ct->ct_dip;
   3565 		if ((flag & NDI_USER_REQ) &&
   3566 		    MDI_PI_IS_ONLINE(pip) &&
   3567 		    (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
   3568 			i_mdi_client_unlock(ct);
   3569 			rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
   3570 			if (rv != NDI_SUCCESS) {
   3571 				/*
   3572 				 * Convert to MDI error code
   3573 				 */
   3574 				switch (rv) {
   3575 				case NDI_BUSY:
   3576 					rv = MDI_BUSY;
   3577 					break;
   3578 				default:
   3579 					rv = MDI_FAILURE;
   3580 					break;
   3581 				}
   3582 				goto state_change_exit;
   3583 			} else {
   3584 				i_mdi_client_lock(ct, NULL);
   3585 			}
   3586 		}
   3587 		/*
   3588 		 * Mark the mdi_pathinfo node state as transient
   3589 		 */
   3590 		MDI_PI_LOCK(pip);
   3591 		MDI_PI_SET_OFFLINING(pip);
   3592 		break;
   3593 	}
   3594 	MDI_PI_UNLOCK(pip);
   3595 	MDI_CLIENT_UNSTABLE(ct);
   3596 	i_mdi_client_unlock(ct);
   3597 
   3598 	f = vh->vh_ops->vo_pi_state_change;
   3599 	if (f != NULL)
   3600 		rv = (*f)(vh->vh_dip, pip, state, 0, flag);
   3601 
   3602 	MDI_CLIENT_LOCK(ct);
   3603 	MDI_PI_LOCK(pip);
   3604 	if (rv == MDI_NOT_SUPPORTED) {
   3605 		MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
   3606 	}
   3607 	if (rv != MDI_SUCCESS) {
   3608 		MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
   3609 		    "vo_pi_state_change failed: rv %x", rv));
   3610 	}
   3611 	if (MDI_PI_IS_TRANSIENT(pip)) {
   3612 		if (rv == MDI_SUCCESS) {
   3613 			MDI_PI_CLEAR_TRANSIENT(pip);
   3614 		} else {
   3615 			MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
   3616 		}
   3617 	}
   3618 
   3619 	/*
   3620 	 * Wake anyone waiting for this mdi_pathinfo node
   3621 	 */
   3622 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
   3623 	MDI_PI_UNLOCK(pip);
   3624 
   3625 	/*
   3626 	 * Mark the client device as stable
   3627 	 */
   3628 	MDI_CLIENT_STABLE(ct);
   3629 	if (rv == MDI_SUCCESS) {
   3630 		if (ct->ct_unstable == 0) {
   3631 			cdip = ct->ct_dip;
   3632 
   3633 			/*
   3634 			 * Onlining the mdi_pathinfo node will impact the
   3635 			 * client state Update the client and dev_info node
   3636 			 * state accordingly
   3637 			 */
   3638 			rv = NDI_SUCCESS;
   3639 			i_mdi_client_update_state(ct);
   3640 			switch (MDI_CLIENT_STATE(ct)) {
   3641 			case MDI_CLIENT_STATE_OPTIMAL:
   3642 			case MDI_CLIENT_STATE_DEGRADED:
   3643 				if (cdip && !i_ddi_devi_attached(cdip) &&
   3644 				    ((state == MDI_PATHINFO_STATE_ONLINE) ||
   3645 				    (state == MDI_PATHINFO_STATE_STANDBY))) {
   3646 
   3647 					/*
   3648 					 * Must do ndi_devi_online() through
   3649 					 * hotplug thread for deferred
   3650 					 * attach mechanism to work
   3651 					 */
   3652 					MDI_CLIENT_UNLOCK(ct);
   3653 					rv = ndi_devi_online(cdip, 0);
   3654 					MDI_CLIENT_LOCK(ct);
   3655 					if ((rv != NDI_SUCCESS) &&
   3656 					    (MDI_CLIENT_STATE(ct) ==
   3657 					    MDI_CLIENT_STATE_DEGRADED)) {
   3658 						/*
   3659 						 * ndi_devi_online failed.
   3660 						 * Reset client flags to
   3661 						 * offline.
   3662 						 */
   3663 						MDI_DEBUG(1, (MDI_WARN, cdip,
   3664 						    "!ndi_devi_online failed "
   3665 						    "error %x", rv));
   3666 						MDI_CLIENT_SET_OFFLINE(ct);
   3667 					}
   3668 					if (rv != NDI_SUCCESS) {
   3669 						/* Reset the path state */
   3670 						MDI_PI_LOCK(pip);
   3671 						MDI_PI(pip)->pi_state =
   3672 						    MDI_PI_OLD_STATE(pip);
   3673 						MDI_PI_UNLOCK(pip);
   3674 					}
   3675 				}
   3676 				break;
   3677 
   3678 			case MDI_CLIENT_STATE_FAILED:
   3679 				/*
   3680 				 * This is the last path case for
   3681 				 * non-user initiated events.
   3682 				 */
   3683 				if (((flag & NDI_USER_REQ) == 0) &&
   3684 				    cdip && (i_ddi_node_state(cdip) >=
   3685 				    DS_INITIALIZED)) {
   3686 					MDI_CLIENT_UNLOCK(ct);
   3687 					rv = ndi_devi_offline(cdip,
   3688 					    NDI_DEVFS_CLEAN);
   3689 					MDI_CLIENT_LOCK(ct);
   3690 
   3691 					if (rv != NDI_SUCCESS) {
   3692 						/*
   3693 						 * ndi_devi_offline failed.
   3694 						 * Reset client flags to
   3695 						 * online as the path could not
   3696 						 * be offlined.
   3697 						 */
   3698 						MDI_DEBUG(1, (MDI_WARN, cdip,
   3699 						    "!ndi_devi_offline failed: "
   3700 						    "error %x", rv));
   3701 						MDI_CLIENT_SET_ONLINE(ct);
   3702 					}
   3703 				}
   3704 				break;
   3705 			}
   3706 			/*
   3707 			 * Convert to MDI error code
   3708 			 */
   3709 			switch (rv) {
   3710 			case NDI_SUCCESS:
   3711 				MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
   3712 				i_mdi_report_path_state(ct, pip);
   3713 				rv = MDI_SUCCESS;
   3714 				break;
   3715 			case NDI_BUSY:
   3716 				rv = MDI_BUSY;
   3717 				break;
   3718 			default:
   3719 				rv = MDI_FAILURE;
   3720 				break;
   3721 			}
   3722 		}
   3723 	}
   3724 	MDI_CLIENT_UNLOCK(ct);
   3725 
   3726 state_change_exit:
   3727 	/*
   3728 	 * Mark the pHCI as stable again.
   3729 	 */
   3730 	MDI_PHCI_LOCK(ph);
   3731 	MDI_PHCI_STABLE(ph);
   3732 	MDI_PHCI_UNLOCK(ph);
   3733 	return (rv);
   3734 }
   3735 
   3736 /*
   3737  * mdi_pi_online():
   3738  *		Place the path_info node in the online state.  The path is
   3739  *		now available to be selected by mdi_select_path() for
   3740  *		transporting I/O requests to client devices.
   3741  * Return Values:
   3742  *		MDI_SUCCESS
   3743  *		MDI_FAILURE
   3744  */
   3745 int
   3746 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
   3747 {
   3748 	mdi_client_t	*ct = MDI_PI(pip)->pi_client;
   3749 	int		client_held = 0;
   3750 	int		rv;
   3751 
   3752 	ASSERT(ct != NULL);
   3753 	rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
   3754 	if (rv != MDI_SUCCESS)
   3755 		return (rv);
   3756 
   3757 	MDI_PI_LOCK(pip);
   3758 	if (MDI_PI(pip)->pi_pm_held == 0) {
   3759 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3760 		    "i_mdi_pm_hold_pip %p", (void *)pip));
   3761 		i_mdi_pm_hold_pip(pip);
   3762 		client_held = 1;
   3763 	}
   3764 	MDI_PI_UNLOCK(pip);
   3765 
   3766 	if (client_held) {
   3767 		MDI_CLIENT_LOCK(ct);
   3768 		if (ct->ct_power_cnt == 0) {
   3769 			rv = i_mdi_power_all_phci(ct);
   3770 		}
   3771 
   3772 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3773 		    "i_mdi_pm_hold_client %p", (void *)ct));
   3774 		i_mdi_pm_hold_client(ct, 1);
   3775 		MDI_CLIENT_UNLOCK(ct);
   3776 	}
   3777 
   3778 	return (rv);
   3779 }
   3780 
   3781 /*
   3782  * mdi_pi_standby():
   3783  *		Place the mdi_pathinfo node in standby state
   3784  *
   3785  * Return Values:
   3786  *		MDI_SUCCESS
   3787  *		MDI_FAILURE
   3788  */
   3789 int
   3790 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
   3791 {
   3792 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
   3793 }
   3794 
   3795 /*
   3796  * mdi_pi_fault():
   3797  *		Place the mdi_pathinfo node in fault'ed state
   3798  * Return Values:
   3799  *		MDI_SUCCESS
   3800  *		MDI_FAILURE
   3801  */
   3802 int
   3803 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
   3804 {
   3805 	return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
   3806 }
   3807 
   3808 /*
   3809  * mdi_pi_offline():
   3810  *		Offline a mdi_pathinfo node.
   3811  * Return Values:
   3812  *		MDI_SUCCESS
   3813  *		MDI_FAILURE
   3814  */
   3815 int
   3816 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
   3817 {
   3818 	int	ret, client_held = 0;
   3819 	mdi_client_t	*ct;
   3820 
   3821 	/*
   3822 	 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
   3823 	 * used it to mean "user initiated operation" (i.e. devctl). Callers
   3824 	 * should now just use NDI_USER_REQ.
   3825 	 */
   3826 	if (flags & NDI_DEVI_REMOVE) {
   3827 		flags &= ~NDI_DEVI_REMOVE;
   3828 		flags |= NDI_USER_REQ;
   3829 	}
   3830 
   3831 	ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
   3832 
   3833 	if (ret == MDI_SUCCESS) {
   3834 		MDI_PI_LOCK(pip);
   3835 		if (MDI_PI(pip)->pi_pm_held) {
   3836 			client_held = 1;
   3837 		}
   3838 		MDI_PI_UNLOCK(pip);
   3839 
   3840 		if (client_held) {
   3841 			ct = MDI_PI(pip)->pi_client;
   3842 			MDI_CLIENT_LOCK(ct);
   3843 			MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   3844 			    "i_mdi_pm_rele_client\n"));
   3845 			i_mdi_pm_rele_client(ct, 1);
   3846 			MDI_CLIENT_UNLOCK(ct);
   3847 		}
   3848 	}
   3849 
   3850 	return (ret);
   3851 }
   3852 
   3853 /*
   3854  * i_mdi_pi_offline():
   3855  *		Offline a mdi_pathinfo node and call the vHCI driver's callback
   3856  */
   3857 static int
   3858 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
   3859 {
   3860 	dev_info_t	*vdip = NULL;
   3861 	mdi_vhci_t	*vh = NULL;
   3862 	mdi_client_t	*ct = NULL;
   3863 	int		(*f)();
   3864 	int		rv;
   3865 
   3866 	MDI_PI_LOCK(pip);
   3867 	ct = MDI_PI(pip)->pi_client;
   3868 	ASSERT(ct != NULL);
   3869 
   3870 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
   3871 		/*
   3872 		 * Give a chance for pending I/Os to complete.
   3873 		 */
   3874 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3875 		    "!%d cmds still pending on path %s %p",
   3876 		    MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
   3877 		    (void *)pip));
   3878 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
   3879 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
   3880 		    TR_CLOCK_TICK) == -1) {
   3881 			/*
   3882 			 * The timeout time reached without ref_cnt being zero
   3883 			 * being signaled.
   3884 			 */
   3885 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3886 			    "!Timeout reached on path %s %p without the cond",
   3887 			    mdi_pi_spathname(pip), (void *)pip));
   3888 			MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3889 			    "!%d cmds still pending on path %s %p",
   3890 			    MDI_PI(pip)->pi_ref_cnt,
   3891 			    mdi_pi_spathname(pip), (void *)pip));
   3892 		}
   3893 	}
   3894 	vh = ct->ct_vhci;
   3895 	vdip = vh->vh_dip;
   3896 
   3897 	/*
   3898 	 * Notify vHCI that has registered this event
   3899 	 */
   3900 	ASSERT(vh->vh_ops);
   3901 	f = vh->vh_ops->vo_pi_state_change;
   3902 
   3903 	if (f != NULL) {
   3904 		MDI_PI_UNLOCK(pip);
   3905 		if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
   3906 		    flags)) != MDI_SUCCESS) {
   3907 			MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3908 			    "!vo_path_offline failed: vdip %s%d %p: path %s %p",
   3909 			    ddi_driver_name(vdip), ddi_get_instance(vdip),
   3910 			    (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
   3911 		}
   3912 		MDI_PI_LOCK(pip);
   3913 	}
   3914 
   3915 	/*
   3916 	 * Set the mdi_pathinfo node state and clear the transient condition
   3917 	 */
   3918 	MDI_PI_SET_OFFLINE(pip);
   3919 	cv_broadcast(&MDI_PI(pip)->pi_state_cv);
   3920 	MDI_PI_UNLOCK(pip);
   3921 
   3922 	MDI_CLIENT_LOCK(ct);
   3923 	if (rv == MDI_SUCCESS) {
   3924 		if (ct->ct_unstable == 0) {
   3925 			dev_info_t	*cdip = ct->ct_dip;
   3926 
   3927 			/*
   3928 			 * Onlining the mdi_pathinfo node will impact the
   3929 			 * client state Update the client and dev_info node
   3930 			 * state accordingly
   3931 			 */
   3932 			i_mdi_client_update_state(ct);
   3933 			rv = NDI_SUCCESS;
   3934 			if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
   3935 				if (cdip &&
   3936 				    (i_ddi_node_state(cdip) >=
   3937 				    DS_INITIALIZED)) {
   3938 					MDI_CLIENT_UNLOCK(ct);
   3939 					rv = ndi_devi_offline(cdip,
   3940 					    NDI_DEVFS_CLEAN);
   3941 					MDI_CLIENT_LOCK(ct);
   3942 					if (rv != NDI_SUCCESS) {
   3943 						/*
   3944 						 * ndi_devi_offline failed.
   3945 						 * Reset client flags to
   3946 						 * online.
   3947 						 */
   3948 						MDI_DEBUG(4, (MDI_WARN, cdip,
   3949 						    "ndi_devi_offline failed: "
   3950 						    "error %x", rv));
   3951 						MDI_CLIENT_SET_ONLINE(ct);
   3952 					}
   3953 				}
   3954 			}
   3955 			/*
   3956 			 * Convert to MDI error code
   3957 			 */
   3958 			switch (rv) {
   3959 			case NDI_SUCCESS:
   3960 				rv = MDI_SUCCESS;
   3961 				break;
   3962 			case NDI_BUSY:
   3963 				rv = MDI_BUSY;
   3964 				break;
   3965 			default:
   3966 				rv = MDI_FAILURE;
   3967 				break;
   3968 			}
   3969 		}
   3970 		MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
   3971 		i_mdi_report_path_state(ct, pip);
   3972 	}
   3973 
   3974 	MDI_CLIENT_UNLOCK(ct);
   3975 
   3976 	/*
   3977 	 * Change in the mdi_pathinfo node state will impact the client state
   3978 	 */
   3979 	MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
   3980 	    "ct = %p pip = %p", (void *)ct, (void *)pip));
   3981 	return (rv);
   3982 }
   3983 
   3984 /*
   3985  * mdi_pi_get_node_name():
   3986  *              Get the name associated with a mdi_pathinfo node.
   3987  *              Since pathinfo nodes are not directly named, we
   3988  *              return the node_name of the client.
   3989  *
   3990  * Return Values:
   3991  *              char *
   3992  */
   3993 char *
   3994 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
   3995 {
   3996 	mdi_client_t    *ct;
   3997 
   3998 	if (pip == NULL)
   3999 		return (NULL);
   4000 	ct = MDI_PI(pip)->pi_client;
   4001 	if ((ct == NULL) || (ct->ct_dip == NULL))
   4002 		return (NULL);
   4003 	return (ddi_node_name(ct->ct_dip));
   4004 }
   4005 
   4006 /*
   4007  * mdi_pi_get_addr():
   4008  *		Get the unit address associated with a mdi_pathinfo node
   4009  *
   4010  * Return Values:
   4011  *		char *
   4012  */
   4013 char *
   4014 mdi_pi_get_addr(mdi_pathinfo_t *pip)
   4015 {
   4016 	if (pip == NULL)
   4017 		return (NULL);
   4018 
   4019 	return (MDI_PI(pip)->pi_addr);
   4020 }
   4021 
   4022 /*
   4023  * mdi_pi_get_path_instance():
   4024  *		Get the 'path_instance' of a mdi_pathinfo node
   4025  *
   4026  * Return Values:
   4027  *		path_instance
   4028  */
   4029 int
   4030 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
   4031 {
   4032 	if (pip == NULL)
   4033 		return (0);
   4034 
   4035 	return (MDI_PI(pip)->pi_path_instance);
   4036 }
   4037 
   4038 /*
   4039  * mdi_pi_pathname():
   4040  *		Return pointer to path to pathinfo node.
   4041  */
   4042 char *
   4043 mdi_pi_pathname(mdi_pathinfo_t *pip)
   4044 {
   4045 	if (pip == NULL)
   4046 		return (NULL);
   4047 	return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
   4048 }
   4049 
   4050 /*
   4051  * mdi_pi_spathname():
   4052  *		Return pointer to shortpath to pathinfo node. Used for debug
   4053  *		messages, so return "" instead of NULL when unknown.
   4054  */
   4055 char *
   4056 mdi_pi_spathname(mdi_pathinfo_t *pip)
   4057 {
   4058 	char	*spath = "";
   4059 
   4060 	if (pip) {
   4061 		spath = mdi_pi_spathname_by_instance(
   4062 		    mdi_pi_get_path_instance(pip));
   4063 		if (spath == NULL)
   4064 			spath = "";
   4065 	}
   4066 	return (spath);
   4067 }
   4068 
   4069 char *
   4070 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
   4071 {
   4072 	char *obp_path = NULL;
   4073 	if ((pip == NULL) || (path == NULL))
   4074 		return (NULL);
   4075 
   4076 	if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
   4077 		(void) strcpy(path, obp_path);
   4078 		(void) mdi_prop_free(obp_path);
   4079 	} else {
   4080 		path = NULL;
   4081 	}
   4082 	return (path);
   4083 }
   4084 
   4085 int
   4086 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
   4087 {
   4088 	dev_info_t *pdip;
   4089 	char *obp_path = NULL;
   4090 	int rc = MDI_FAILURE;
   4091 
   4092 	if (pip == NULL)
   4093 		return (MDI_FAILURE);
   4094 
   4095 	pdip = mdi_pi_get_phci(pip);
   4096 	if (pdip == NULL)
   4097 		return (MDI_FAILURE);
   4098 
   4099 	obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   4100 
   4101 	if (ddi_pathname_obp(pdip, obp_path) == NULL) {
   4102 		(void) ddi_pathname(pdip, obp_path);
   4103 	}
   4104 
   4105 	if (component) {
   4106 		(void) strncat(obp_path, "/", MAXPATHLEN);
   4107 		(void) strncat(obp_path, component, MAXPATHLEN);
   4108 	}
   4109 	rc = mdi_prop_update_string(pip, "obp-path", obp_path);
   4110 
   4111 	if (obp_path)
   4112 		kmem_free(obp_path, MAXPATHLEN);
   4113 	return (rc);
   4114 }
   4115 
   4116 /*
   4117  * mdi_pi_get_client():
   4118  *		Get the client devinfo associated with a mdi_pathinfo node
   4119  *
   4120  * Return Values:
   4121  *		Handle to client device dev_info node
   4122  */
   4123 dev_info_t *
   4124 mdi_pi_get_client(mdi_pathinfo_t *pip)
   4125 {
   4126 	dev_info_t	*dip = NULL;
   4127 	if (pip) {
   4128 		dip = MDI_PI(pip)->pi_client->ct_dip;
   4129 	}
   4130 	return (dip);
   4131 }
   4132 
   4133 /*
   4134  * mdi_pi_get_phci():
   4135  *		Get the pHCI devinfo associated with the mdi_pathinfo node
   4136  * Return Values:
   4137  *		Handle to dev_info node
   4138  */
   4139 dev_info_t *
   4140 mdi_pi_get_phci(mdi_pathinfo_t *pip)
   4141 {
   4142 	dev_info_t	*dip = NULL;
   4143 	mdi_phci_t	*ph;
   4144 
   4145 	if (pip) {
   4146 		ph = MDI_PI(pip)->pi_phci;
   4147 		if (ph)
   4148 			dip = ph->ph_dip;
   4149 	}
   4150 	return (dip);
   4151 }
   4152 
   4153 /*
   4154  * mdi_pi_get_client_private():
   4155  *		Get the client private information associated with the
   4156  *		mdi_pathinfo node
   4157  */
   4158 void *
   4159 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
   4160 {
   4161 	void *cprivate = NULL;
   4162 	if (pip) {
   4163 		cprivate = MDI_PI(pip)->pi_cprivate;
   4164 	}
   4165 	return (cprivate);
   4166 }
   4167 
   4168 /*
   4169  * mdi_pi_set_client_private():
   4170  *		Set the client private information in the mdi_pathinfo node
   4171  */
   4172 void
   4173 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
   4174 {
   4175 	if (pip) {
   4176 		MDI_PI(pip)->pi_cprivate = priv;
   4177 	}
   4178 }
   4179 
   4180 /*
   4181  * mdi_pi_get_phci_private():
   4182  *		Get the pHCI private information associated with the
   4183  *		mdi_pathinfo node
   4184  */
   4185 caddr_t
   4186 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
   4187 {
   4188 	caddr_t	pprivate = NULL;
   4189 
   4190 	if (pip) {
   4191 		pprivate = MDI_PI(pip)->pi_pprivate;
   4192 	}
   4193 	return (pprivate);
   4194 }
   4195 
   4196 /*
   4197  * mdi_pi_set_phci_private():
   4198  *		Set the pHCI private information in the mdi_pathinfo node
   4199  */
   4200 void
   4201 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
   4202 {
   4203 	if (pip) {
   4204 		MDI_PI(pip)->pi_pprivate = priv;
   4205 	}
   4206 }
   4207 
   4208 /*
   4209  * mdi_pi_get_state():
   4210  *		Get the mdi_pathinfo node state. Transient states are internal
   4211  *		and not provided to the users
   4212  */
   4213 mdi_pathinfo_state_t
   4214 mdi_pi_get_state(mdi_pathinfo_t *pip)
   4215 {
   4216 	mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
   4217 
   4218 	if (pip) {
   4219 		if (MDI_PI_IS_TRANSIENT(pip)) {
   4220 			/*
   4221 			 * mdi_pathinfo is in state transition.  Return the
   4222 			 * last good state.
   4223 			 */
   4224 			state = MDI_PI_OLD_STATE(pip);
   4225 		} else {
   4226 			state = MDI_PI_STATE(pip);
   4227 		}
   4228 	}
   4229 	return (state);
   4230 }
   4231 
   4232 /*
   4233  * mdi_pi_get_flags():
   4234  *		Get the mdi_pathinfo node flags.
   4235  */
   4236 uint_t
   4237 mdi_pi_get_flags(mdi_pathinfo_t *pip)
   4238 {
   4239 	return (pip ? MDI_PI(pip)->pi_flags : 0);
   4240 }
   4241 
   4242 /*
   4243  * Note that the following function needs to be the new interface for
   4244  * mdi_pi_get_state when mpxio gets integrated to ON.
   4245  */
   4246 int
   4247 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
   4248 		uint32_t *ext_state)
   4249 {
   4250 	*state = MDI_PATHINFO_STATE_INIT;
   4251 
   4252 	if (pip) {
   4253 		if (MDI_PI_IS_TRANSIENT(pip)) {
   4254 			/*
   4255 			 * mdi_pathinfo is in state transition.  Return the
   4256 			 * last good state.
   4257 			 */
   4258 			*state = MDI_PI_OLD_STATE(pip);
   4259 			*ext_state = MDI_PI_OLD_EXT_STATE(pip);
   4260 		} else {
   4261 			*state = MDI_PI_STATE(pip);
   4262 			*ext_state = MDI_PI_EXT_STATE(pip);
   4263 		}
   4264 	}
   4265 	return (MDI_SUCCESS);
   4266 }
   4267 
   4268 /*
   4269  * mdi_pi_get_preferred:
   4270  *	Get the preferred path flag
   4271  */
   4272 int
   4273 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
   4274 {
   4275 	if (pip) {
   4276 		return (MDI_PI(pip)->pi_preferred);
   4277 	}
   4278 	return (0);
   4279 }
   4280 
   4281 /*
   4282  * mdi_pi_set_preferred:
   4283  *	Set the preferred path flag
   4284  */
   4285 void
   4286 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
   4287 {
   4288 	if (pip) {
   4289 		MDI_PI(pip)->pi_preferred = preferred;
   4290 	}
   4291 }
   4292 
   4293 /*
   4294  * mdi_pi_set_state():
   4295  *		Set the mdi_pathinfo node state
   4296  */
   4297 void
   4298 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
   4299 {
   4300 	uint32_t	ext_state;
   4301 
   4302 	if (pip) {
   4303 		ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
   4304 		MDI_PI(pip)->pi_state = state;
   4305 		MDI_PI(pip)->pi_state |= ext_state;
   4306 
   4307 		/* Path has changed state, invalidate DINFOCACHE snap shot. */
   4308 		i_ddi_di_cache_invalidate();
   4309 	}
   4310 }
   4311 
   4312 /*
   4313  * Property functions:
   4314  */
   4315 int
   4316 i_map_nvlist_error_to_mdi(int val)
   4317 {
   4318 	int rv;
   4319 
   4320 	switch (val) {
   4321 	case 0:
   4322 		rv = DDI_PROP_SUCCESS;
   4323 		break;
   4324 	case EINVAL:
   4325 	case ENOTSUP:
   4326 		rv = DDI_PROP_INVAL_ARG;
   4327 		break;
   4328 	case ENOMEM:
   4329 		rv = DDI_PROP_NO_MEMORY;
   4330 		break;
   4331 	default:
   4332 		rv = DDI_PROP_NOT_FOUND;
   4333 		break;
   4334 	}
   4335 	return (rv);
   4336 }
   4337 
   4338 /*
   4339  * mdi_pi_get_next_prop():
   4340  * 		Property walk function.  The caller should hold mdi_pi_lock()
   4341  *		and release by calling mdi_pi_unlock() at the end of walk to
   4342  *		get a consistent value.
   4343  */
   4344 nvpair_t *
   4345 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
   4346 {
   4347 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4348 		return (NULL);
   4349 	}
   4350 	ASSERT(MDI_PI_LOCKED(pip));
   4351 	return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
   4352 }
   4353 
   4354 /*
   4355  * mdi_prop_remove():
   4356  * 		Remove the named property from the named list.
   4357  */
   4358 int
   4359 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
   4360 {
   4361 	if (pip == NULL) {
   4362 		return (DDI_PROP_NOT_FOUND);
   4363 	}
   4364 	ASSERT(!MDI_PI_LOCKED(pip));
   4365 	MDI_PI_LOCK(pip);
   4366 	if (MDI_PI(pip)->pi_prop == NULL) {
   4367 		MDI_PI_UNLOCK(pip);
   4368 		return (DDI_PROP_NOT_FOUND);
   4369 	}
   4370 	if (name) {
   4371 		(void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
   4372 	} else {
   4373 		char		nvp_name[MAXNAMELEN];
   4374 		nvpair_t	*nvp;
   4375 		nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
   4376 		while (nvp) {
   4377 			nvpair_t	*next;
   4378 			next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
   4379 			(void) snprintf(nvp_name, sizeof(nvp_name), "%s",
   4380 			    nvpair_name(nvp));
   4381 			(void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
   4382 			    nvp_name);
   4383 			nvp = next;
   4384 		}
   4385 	}
   4386 	MDI_PI_UNLOCK(pip);
   4387 	return (DDI_PROP_SUCCESS);
   4388 }
   4389 
   4390 /*
   4391  * mdi_prop_size():
   4392  * 		Get buffer size needed to pack the property data.
   4393  * 		Caller should hold the mdi_pathinfo_t lock to get a consistent
   4394  *		buffer size.
   4395  */
   4396 int
   4397 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
   4398 {
   4399 	int	rv;
   4400 	size_t	bufsize;
   4401 
   4402 	*buflenp = 0;
   4403 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4404 		return (DDI_PROP_NOT_FOUND);
   4405 	}
   4406 	ASSERT(MDI_PI_LOCKED(pip));
   4407 	rv = nvlist_size(MDI_PI(pip)->pi_prop,
   4408 	    &bufsize, NV_ENCODE_NATIVE);
   4409 	*buflenp = bufsize;
   4410 	return (i_map_nvlist_error_to_mdi(rv));
   4411 }
   4412 
   4413 /*
   4414  * mdi_prop_pack():
   4415  * 		pack the property list.  The caller should hold the
   4416  *		mdi_pathinfo_t node to get a consistent data
   4417  */
   4418 int
   4419 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
   4420 {
   4421 	int	rv;
   4422 	size_t	bufsize;
   4423 
   4424 	if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
   4425 		return (DDI_PROP_NOT_FOUND);
   4426 	}
   4427 
   4428 	ASSERT(MDI_PI_LOCKED(pip));
   4429 
   4430 	bufsize = buflen;
   4431 	rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
   4432 	    NV_ENCODE_NATIVE, KM_SLEEP);
   4433 
   4434 	return (i_map_nvlist_error_to_mdi(rv));
   4435 }
   4436 
   4437 /*
   4438  * mdi_prop_update_byte():
   4439  *		Create/Update a byte property
   4440  */
   4441 int
   4442 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
   4443 {
   4444 	int rv;
   4445 
   4446 	if (pip == NULL) {
   4447 		return (DDI_PROP_INVAL_ARG);
   4448 	}
   4449 	ASSERT(!MDI_PI_LOCKED(pip));
   4450 	MDI_PI_LOCK(pip);
   4451 	if (MDI_PI(pip)->pi_prop == NULL) {
   4452 		MDI_PI_UNLOCK(pip);
   4453 		return (DDI_PROP_NOT_FOUND);
   4454 	}
   4455 	rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
   4456 	MDI_PI_UNLOCK(pip);
   4457 	return (i_map_nvlist_error_to_mdi(rv));
   4458 }
   4459 
   4460 /*
   4461  * mdi_prop_update_byte_array():
   4462  *		Create/Update a byte array property
   4463  */
   4464 int
   4465 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
   4466     uint_t nelements)
   4467 {
   4468 	int rv;
   4469 
   4470 	if (pip == NULL) {
   4471 		return (DDI_PROP_INVAL_ARG);
   4472 	}
   4473 	ASSERT(!MDI_PI_LOCKED(pip));
   4474 	MDI_PI_LOCK(pip);
   4475 	if (MDI_PI(pip)->pi_prop == NULL) {
   4476 		MDI_PI_UNLOCK(pip);
   4477 		return (DDI_PROP_NOT_FOUND);
   4478 	}
   4479 	rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
   4480 	MDI_PI_UNLOCK(pip);
   4481 	return (i_map_nvlist_error_to_mdi(rv));
   4482 }
   4483 
   4484 /*
   4485  * mdi_prop_update_int():
   4486  *		Create/Update a 32 bit integer property
   4487  */
   4488 int
   4489 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
   4490 {
   4491 	int rv;
   4492 
   4493 	if (pip == NULL) {
   4494 		return (DDI_PROP_INVAL_ARG);
   4495 	}
   4496 	ASSERT(!MDI_PI_LOCKED(pip));
   4497 	MDI_PI_LOCK(pip);
   4498 	if (MDI_PI(pip)->pi_prop == NULL) {
   4499 		MDI_PI_UNLOCK(pip);
   4500 		return (DDI_PROP_NOT_FOUND);
   4501 	}
   4502 	rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
   4503 	MDI_PI_UNLOCK(pip);
   4504 	return (i_map_nvlist_error_to_mdi(rv));
   4505 }
   4506 
   4507 /*
   4508  * mdi_prop_update_int64():
   4509  *		Create/Update a 64 bit integer property
   4510  */
   4511 int
   4512 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
   4513 {
   4514 	int rv;
   4515 
   4516 	if (pip == NULL) {
   4517 		return (DDI_PROP_INVAL_ARG);
   4518 	}
   4519 	ASSERT(!MDI_PI_LOCKED(pip));
   4520 	MDI_PI_LOCK(pip);
   4521 	if (MDI_PI(pip)->pi_prop == NULL) {
   4522 		MDI_PI_UNLOCK(pip);
   4523 		return (DDI_PROP_NOT_FOUND);
   4524 	}
   4525 	rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
   4526 	MDI_PI_UNLOCK(pip);
   4527 	return (i_map_nvlist_error_to_mdi(rv));
   4528 }
   4529 
   4530 /*
   4531  * mdi_prop_update_int_array():
   4532  *		Create/Update a int array property
   4533  */
   4534 int
   4535 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
   4536 	    uint_t nelements)
   4537 {
   4538 	int rv;
   4539 
   4540 	if (pip == NULL) {
   4541 		return (DDI_PROP_INVAL_ARG);
   4542 	}
   4543 	ASSERT(!MDI_PI_LOCKED(pip));
   4544 	MDI_PI_LOCK(pip);
   4545 	if (MDI_PI(pip)->pi_prop == NULL) {
   4546 		MDI_PI_UNLOCK(pip);
   4547 		return (DDI_PROP_NOT_FOUND);
   4548 	}
   4549 	rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
   4550 	    nelements);
   4551 	MDI_PI_UNLOCK(pip);
   4552 	return (i_map_nvlist_error_to_mdi(rv));
   4553 }
   4554 
   4555 /*
   4556  * mdi_prop_update_string():
   4557  *		Create/Update a string property
   4558  */
   4559 int
   4560 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
   4561 {
   4562 	int rv;
   4563 
   4564 	if (pip == NULL) {
   4565 		return (DDI_PROP_INVAL_ARG);
   4566 	}
   4567 	ASSERT(!MDI_PI_LOCKED(pip));
   4568 	MDI_PI_LOCK(pip);
   4569 	if (MDI_PI(pip)->pi_prop == NULL) {
   4570 		MDI_PI_UNLOCK(pip);
   4571 		return (DDI_PROP_NOT_FOUND);
   4572 	}
   4573 	rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
   4574 	MDI_PI_UNLOCK(pip);
   4575 	return (i_map_nvlist_error_to_mdi(rv));
   4576 }
   4577 
   4578 /*
   4579  * mdi_prop_update_string_array():
   4580  *		Create/Update a string array property
   4581  */
   4582 int
   4583 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
   4584     uint_t nelements)
   4585 {
   4586 	int rv;
   4587 
   4588 	if (pip == NULL) {
   4589 		return (DDI_PROP_INVAL_ARG);
   4590 	}
   4591 	ASSERT(!MDI_PI_LOCKED(pip));
   4592 	MDI_PI_LOCK(pip);
   4593 	if (MDI_PI(pip)->pi_prop == NULL) {
   4594 		MDI_PI_UNLOCK(pip);
   4595 		return (DDI_PROP_NOT_FOUND);
   4596 	}
   4597 	rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
   4598 	    nelements);
   4599 	MDI_PI_UNLOCK(pip);
   4600 	return (i_map_nvlist_error_to_mdi(rv));
   4601 }
   4602 
   4603 /*
   4604  * mdi_prop_lookup_byte():
   4605  * 		Look for byte property identified by name.  The data returned
   4606  *		is the actual property and valid as long as mdi_pathinfo_t node
   4607  *		is alive.
   4608  */
   4609 int
   4610 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
   4611 {
   4612 	int rv;
   4613 
   4614 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4615 		return (DDI_PROP_NOT_FOUND);
   4616 	}
   4617 	rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
   4618 	return (i_map_nvlist_error_to_mdi(rv));
   4619 }
   4620 
   4621 
   4622 /*
   4623  * mdi_prop_lookup_byte_array():
   4624  * 		Look for byte array property identified by name.  The data
   4625  *		returned is the actual property and valid as long as
   4626  *		mdi_pathinfo_t node is alive.
   4627  */
   4628 int
   4629 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
   4630     uint_t *nelements)
   4631 {
   4632 	int rv;
   4633 
   4634 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4635 		return (DDI_PROP_NOT_FOUND);
   4636 	}
   4637 	rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
   4638 	    nelements);
   4639 	return (i_map_nvlist_error_to_mdi(rv));
   4640 }
   4641 
   4642 /*
   4643  * mdi_prop_lookup_int():
   4644  * 		Look for int property identified by name.  The data returned
   4645  *		is the actual property and valid as long as mdi_pathinfo_t
   4646  *		node is alive.
   4647  */
   4648 int
   4649 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
   4650 {
   4651 	int rv;
   4652 
   4653 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4654 		return (DDI_PROP_NOT_FOUND);
   4655 	}
   4656 	rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
   4657 	return (i_map_nvlist_error_to_mdi(rv));
   4658 }
   4659 
   4660 /*
   4661  * mdi_prop_lookup_int64():
   4662  * 		Look for int64 property identified by name.  The data returned
   4663  *		is the actual property and valid as long as mdi_pathinfo_t node
   4664  *		is alive.
   4665  */
   4666 int
   4667 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
   4668 {
   4669 	int rv;
   4670 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4671 		return (DDI_PROP_NOT_FOUND);
   4672 	}
   4673 	rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
   4674 	return (i_map_nvlist_error_to_mdi(rv));
   4675 }
   4676 
   4677 /*
   4678  * mdi_prop_lookup_int_array():
   4679  * 		Look for int array property identified by name.  The data
   4680  *		returned is the actual property and valid as long as
   4681  *		mdi_pathinfo_t node is alive.
   4682  */
   4683 int
   4684 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
   4685     uint_t *nelements)
   4686 {
   4687 	int rv;
   4688 
   4689 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4690 		return (DDI_PROP_NOT_FOUND);
   4691 	}
   4692 	rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
   4693 	    (int32_t **)data, nelements);
   4694 	return (i_map_nvlist_error_to_mdi(rv));
   4695 }
   4696 
   4697 /*
   4698  * mdi_prop_lookup_string():
   4699  * 		Look for string property identified by name.  The data
   4700  *		returned is the actual property and valid as long as
   4701  *		mdi_pathinfo_t node is alive.
   4702  */
   4703 int
   4704 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
   4705 {
   4706 	int rv;
   4707 
   4708 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4709 		return (DDI_PROP_NOT_FOUND);
   4710 	}
   4711 	rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
   4712 	return (i_map_nvlist_error_to_mdi(rv));
   4713 }
   4714 
   4715 /*
   4716  * mdi_prop_lookup_string_array():
   4717  * 		Look for string array property identified by name.  The data
   4718  *		returned is the actual property and valid as long as
   4719  *		mdi_pathinfo_t node is alive.
   4720  */
   4721 int
   4722 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
   4723     uint_t *nelements)
   4724 {
   4725 	int rv;
   4726 
   4727 	if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
   4728 		return (DDI_PROP_NOT_FOUND);
   4729 	}
   4730 	rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
   4731 	    nelements);
   4732 	return (i_map_nvlist_error_to_mdi(rv));
   4733 }
   4734 
   4735 /*
   4736  * mdi_prop_free():
   4737  * 		Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
   4738  *		functions return the pointer to actual property data and not a
   4739  *		copy of it.  So the data returned is valid as long as
   4740  *		mdi_pathinfo_t node is valid.
   4741  */
   4742 /*ARGSUSED*/
   4743 int
   4744 mdi_prop_free(void *data)
   4745 {
   4746 	return (DDI_PROP_SUCCESS);
   4747 }
   4748 
   4749 /*ARGSUSED*/
   4750 static void
   4751 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
   4752 {
   4753 	char		*ct_path;
   4754 	char		*ct_status;
   4755 	char		*status;
   4756 	dev_info_t	*cdip = ct->ct_dip;
   4757 	char		lb_buf[64];
   4758 	int		report_lb_c = 0, report_lb_p = 0;
   4759 
   4760 	ASSERT(MDI_CLIENT_LOCKED(ct));
   4761 	if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
   4762 	    (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
   4763 		return;
   4764 	}
   4765 	if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
   4766 		ct_status = "optimal";
   4767 		report_lb_c = 1;
   4768 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
   4769 		ct_status = "degraded";
   4770 	} else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
   4771 		ct_status = "failed";
   4772 	} else {
   4773 		ct_status = "unknown";
   4774 	}
   4775 
   4776 	lb_buf[0] = 0;		/* not interested in load balancing config */
   4777 
   4778 	if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
   4779 		status = "removed";
   4780 	} else if (MDI_PI_IS_OFFLINE(pip)) {
   4781 		status = "offline";
   4782 	} else if (MDI_PI_IS_ONLINE(pip)) {
   4783 		status = "online";
   4784 		report_lb_p = 1;
   4785 	} else if (MDI_PI_IS_STANDBY(pip)) {
   4786 		status = "standby";
   4787 	} else if (MDI_PI_IS_FAULT(pip)) {
   4788 		status = "faulted";
   4789 	} else {
   4790 		status = "unknown";
   4791 	}
   4792 
   4793 	if (cdip) {
   4794 		ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   4795 
   4796 		/*
   4797 		 * NOTE: Keeping "multipath status: %s" and
   4798 		 * "Load balancing: %s" format unchanged in case someone
   4799 		 * scrubs /var/adm/messages looking for these messages.
   4800 		 */
   4801 		if (report_lb_c && report_lb_p) {
   4802 			if (ct->ct_lb == LOAD_BALANCE_LBA) {
   4803 				(void) snprintf(lb_buf, sizeof (lb_buf),
   4804 				    "%s, region-size: %d", mdi_load_balance_lba,
   4805 				    ct->ct_lb_args->region_size);
   4806 			} else if (ct->ct_lb == LOAD_BALANCE_NONE) {
   4807 				(void) snprintf(lb_buf, sizeof (lb_buf),
   4808 				    "%s", mdi_load_balance_none);
   4809 			} else {
   4810 				(void) snprintf(lb_buf, sizeof (lb_buf), "%s",
   4811 				    mdi_load_balance_rr);
   4812 			}
   4813 
   4814 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
   4815 			    "?%s (%s%d) multipath status: %s: "
   4816 			    "path %d %s is %s: Load balancing: %s\n",
   4817 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
   4818 			    ddi_get_instance(cdip), ct_status,
   4819 			    mdi_pi_get_path_instance(pip),
   4820 			    mdi_pi_spathname(pip), status, lb_buf);
   4821 		} else {
   4822 			cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
   4823 			    "?%s (%s%d) multipath status: %s: "
   4824 			    "path %d %s is %s\n",
   4825 			    ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
   4826 			    ddi_get_instance(cdip), ct_status,
   4827 			    mdi_pi_get_path_instance(pip),
   4828 			    mdi_pi_spathname(pip), status);
   4829 		}
   4830 
   4831 		kmem_free(ct_path, MAXPATHLEN);
   4832 		MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
   4833 	}
   4834 }
   4835 
   4836 #ifdef	DEBUG
   4837 /*
   4838  * i_mdi_log():
   4839  *		Utility function for error message management
   4840  *
   4841  *		NOTE: Implementation takes care of trailing \n for cmn_err,
   4842  *		MDI_DEBUG should not terminate fmt strings with \n.
   4843  *
   4844  *		NOTE: If the level is >= 2, and there is no leading !?^
   4845  *		then a leading ! is implied (but can be overriden via
   4846  *		mdi_debug_consoleonly). If you are using kmdb on the console,
   4847  *		consider setting mdi_debug_consoleonly to 1 as an aid.
   4848  */
   4849 /*PRINTFLIKE4*/
   4850 static void
   4851 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
   4852 {
   4853 	char		name[MAXNAMELEN];
   4854 	char		buf[512];
   4855 	char		*bp;
   4856 	va_list		ap;
   4857 	int		log_only = 0;
   4858 	int		boot_only = 0;
   4859 	int		console_only = 0;
   4860 
   4861 	if (dip) {
   4862 		(void) snprintf(name, sizeof(name), "%s%d: ",
   4863 		    ddi_driver_name(dip), ddi_get_instance(dip));
   4864 	} else {
   4865 		name[0] = 0;
   4866 	}
   4867 
   4868 	va_start(ap, fmt);
   4869 	(void) vsnprintf(buf, sizeof(buf), fmt, ap);
   4870 	va_end(ap);
   4871 
   4872 	switch (buf[0]) {
   4873 	case '!':
   4874 		bp = &buf[1];
   4875 		log_only = 1;
   4876 		break;
   4877 	case '?':
   4878 		bp = &buf[1];
   4879 		boot_only = 1;
   4880 		break;
   4881 	case '^':
   4882 		bp = &buf[1];
   4883 		console_only = 1;
   4884 		break;
   4885 	default:
   4886 		if (level >= 2)
   4887 			log_only = 1;		/* ! implied */
   4888 		bp = buf;
   4889 		break;
   4890 	}
   4891 	if (mdi_debug_logonly) {
   4892 		log_only = 1;
   4893 		boot_only = 0;
   4894 		console_only = 0;
   4895 	}
   4896 	if (mdi_debug_consoleonly) {
   4897 		log_only = 0;
   4898 		boot_only = 0;
   4899 		console_only = 1;
   4900 		level = CE_NOTE;
   4901 		goto console;
   4902 	}
   4903 
   4904 	switch (level) {
   4905 	case CE_NOTE:
   4906 		level = CE_CONT;
   4907 		/* FALLTHROUGH */
   4908 	case CE_CONT:
   4909 		if (boot_only) {
   4910 			cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
   4911 		} else if (console_only) {
   4912 			cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
   4913 		} else if (log_only) {
   4914 			cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
   4915 		} else {
   4916 			cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
   4917 		}
   4918 		break;
   4919 
   4920 	case CE_WARN:
   4921 	case CE_PANIC:
   4922 	console:
   4923 		if (boot_only) {
   4924 			cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
   4925 		} else if (console_only) {
   4926 			cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
   4927 		} else if (log_only) {
   4928 			cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
   4929 		} else {
   4930 			cmn_err(level, "mdi: %s%s: %s", name, func, bp);
   4931 		}
   4932 		break;
   4933 	default:
   4934 		cmn_err(level, "mdi: %s%s", name, bp);
   4935 		break;
   4936 	}
   4937 }
   4938 #endif	/* DEBUG */
   4939 
   4940 void
   4941 i_mdi_client_online(dev_info_t *ct_dip)
   4942 {
   4943 	mdi_client_t	*ct;
   4944 
   4945 	/*
   4946 	 * Client online notification. Mark client state as online
   4947 	 * restore our binding with dev_info node
   4948 	 */
   4949 	ct = i_devi_get_client(ct_dip);
   4950 	ASSERT(ct != NULL);
   4951 	MDI_CLIENT_LOCK(ct);
   4952 	MDI_CLIENT_SET_ONLINE(ct);
   4953 	/* catch for any memory leaks */
   4954 	ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
   4955 	ct->ct_dip = ct_dip;
   4956 
   4957 	if (ct->ct_power_cnt == 0)
   4958 		(void) i_mdi_power_all_phci(ct);
   4959 
   4960 	MDI_DEBUG(4, (MDI_NOTE, ct_dip,
   4961 	    "i_mdi_pm_hold_client %p", (void *)ct));
   4962 	i_mdi_pm_hold_client(ct, 1);
   4963 
   4964 	MDI_CLIENT_UNLOCK(ct);
   4965 }
   4966 
   4967 void
   4968 i_mdi_phci_online(dev_info_t *ph_dip)
   4969 {
   4970 	mdi_phci_t	*ph;
   4971 
   4972 	/* pHCI online notification. Mark state accordingly */
   4973 	ph = i_devi_get_phci(ph_dip);
   4974 	ASSERT(ph != NULL);
   4975 	MDI_PHCI_LOCK(ph);
   4976 	MDI_PHCI_SET_ONLINE(ph);
   4977 	MDI_PHCI_UNLOCK(ph);
   4978 }
   4979 
   4980 /*
   4981  * mdi_devi_online():
   4982  * 		Online notification from NDI framework on pHCI/client
   4983  *		device online.
   4984  * Return Values:
   4985  *		NDI_SUCCESS
   4986  *		MDI_FAILURE
   4987  */
   4988 /*ARGSUSED*/
   4989 int
   4990 mdi_devi_online(dev_info_t *dip, uint_t flags)
   4991 {
   4992 	if (MDI_PHCI(dip)) {
   4993 		i_mdi_phci_online(dip);
   4994 	}
   4995 
   4996 	if (MDI_CLIENT(dip)) {
   4997 		i_mdi_client_online(dip);
   4998 	}
   4999 	return (NDI_SUCCESS);
   5000 }
   5001 
   5002 /*
   5003  * mdi_devi_offline():
   5004  * 		Offline notification from NDI framework on pHCI/Client device
   5005  *		offline.
   5006  *
   5007  * Return Values:
   5008  *		NDI_SUCCESS
   5009  *		NDI_FAILURE
   5010  */
   5011 /*ARGSUSED*/
   5012 int
   5013 mdi_devi_offline(dev_info_t *dip, uint_t flags)
   5014 {
   5015 	int		rv = NDI_SUCCESS;
   5016 
   5017 	if (MDI_CLIENT(dip)) {
   5018 		rv = i_mdi_client_offline(dip, flags);
   5019 		if (rv != NDI_SUCCESS)
   5020 			return (rv);
   5021 	}
   5022 
   5023 	if (MDI_PHCI(dip)) {
   5024 		rv = i_mdi_phci_offline(dip, flags);
   5025 
   5026 		if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
   5027 			/* set client back online */
   5028 			i_mdi_client_online(dip);
   5029 		}
   5030 	}
   5031 
   5032 	return (rv);
   5033 }
   5034 
   5035 /*ARGSUSED*/
   5036 static int
   5037 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
   5038 {
   5039 	int		rv = NDI_SUCCESS;
   5040 	mdi_phci_t	*ph;
   5041 	mdi_client_t	*ct;
   5042 	mdi_pathinfo_t	*pip;
   5043 	mdi_pathinfo_t	*next;
   5044 	mdi_pathinfo_t	*failed_pip = NULL;
   5045 	dev_info_t	*cdip;
   5046 
   5047 	/*
   5048 	 * pHCI component offline notification
   5049 	 * Make sure that this pHCI instance is free to be offlined.
   5050 	 * If it is OK to proceed, Offline and remove all the child
   5051 	 * mdi_pathinfo nodes.  This process automatically offlines
   5052 	 * corresponding client devices, for which this pHCI provides
   5053 	 * critical services.
   5054 	 */
   5055 	ph = i_devi_get_phci(dip);
   5056 	MDI_DEBUG(2, (MDI_NOTE, dip,
   5057 	    "called %p %p", (void *)dip, (void *)ph));
   5058 	if (ph == NULL) {
   5059 		return (rv);
   5060 	}
   5061 
   5062 	MDI_PHCI_LOCK(ph);
   5063 
   5064 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5065 		MDI_DEBUG(1, (MDI_WARN, dip,
   5066 		    "!pHCI already offlined: %p", (void *)dip));
   5067 		MDI_PHCI_UNLOCK(ph);
   5068 		return (NDI_SUCCESS);
   5069 	}
   5070 
   5071 	/*
   5072 	 * Check to see if the pHCI can be offlined
   5073 	 */
   5074 	if (ph->ph_unstable) {
   5075 		MDI_DEBUG(1, (MDI_WARN, dip,
   5076 		    "!One or more target devices are in transient state. "
   5077 		    "This device can not be removed at this moment. "
   5078 		    "Please try again later."));
   5079 		MDI_PHCI_UNLOCK(ph);
   5080 		return (NDI_BUSY);
   5081 	}
   5082 
   5083 	pip = ph->ph_path_head;
   5084 	while (pip != NULL) {
   5085 		MDI_PI_LOCK(pip);
   5086 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5087 
   5088 		/*
   5089 		 * The mdi_pathinfo state is OK. Check the client state.
   5090 		 * If failover in progress fail the pHCI from offlining
   5091 		 */
   5092 		ct = MDI_PI(pip)->pi_client;
   5093 		i_mdi_client_lock(ct, pip);
   5094 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
   5095 		    (ct->ct_unstable)) {
   5096 			/*
   5097 			 * Failover is in progress, Fail the DR
   5098 			 */
   5099 			MDI_DEBUG(1, (MDI_WARN, dip,
   5100 			    "!pHCI device is busy. "
   5101 			    "This device can not be removed at this moment. "
   5102 			    "Please try again later."));
   5103 			MDI_PI_UNLOCK(pip);
   5104 			i_mdi_client_unlock(ct);
   5105 			MDI_PHCI_UNLOCK(ph);
   5106 			return (NDI_BUSY);
   5107 		}
   5108 		MDI_PI_UNLOCK(pip);
   5109 
   5110 		/*
   5111 		 * Check to see of we are removing the last path of this
   5112 		 * client device...
   5113 		 */
   5114 		cdip = ct->ct_dip;
   5115 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5116 		    (i_mdi_client_compute_state(ct, ph) ==
   5117 		    MDI_CLIENT_STATE_FAILED)) {
   5118 			i_mdi_client_unlock(ct);
   5119 			MDI_PHCI_UNLOCK(ph);
   5120 			if (ndi_devi_offline(cdip,
   5121 			    NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
   5122 				/*
   5123 				 * ndi_devi_offline() failed.
   5124 				 * This pHCI provides the critical path
   5125 				 * to one or more client devices.
   5126 				 * Return busy.
   5127 				 */
   5128 				MDI_PHCI_LOCK(ph);
   5129 				MDI_DEBUG(1, (MDI_WARN, dip,
   5130 				    "!pHCI device is busy. "
   5131 				    "This device can not be removed at this "
   5132 				    "moment. Please try again later."));
   5133 				failed_pip = pip;
   5134 				break;
   5135 			} else {
   5136 				MDI_PHCI_LOCK(ph);
   5137 				pip = next;
   5138 			}
   5139 		} else {
   5140 			i_mdi_client_unlock(ct);
   5141 			pip = next;
   5142 		}
   5143 	}
   5144 
   5145 	if (failed_pip) {
   5146 		pip = ph->ph_path_head;
   5147 		while (pip != failed_pip) {
   5148 			MDI_PI_LOCK(pip);
   5149 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5150 			ct = MDI_PI(pip)->pi_client;
   5151 			i_mdi_client_lock(ct, pip);
   5152 			cdip = ct->ct_dip;
   5153 			switch (MDI_CLIENT_STATE(ct)) {
   5154 			case MDI_CLIENT_STATE_OPTIMAL:
   5155 			case MDI_CLIENT_STATE_DEGRADED:
   5156 				if (cdip) {
   5157 					MDI_PI_UNLOCK(pip);
   5158 					i_mdi_client_unlock(ct);
   5159 					MDI_PHCI_UNLOCK(ph);
   5160 					(void) ndi_devi_online(cdip, 0);
   5161 					MDI_PHCI_LOCK(ph);
   5162 					pip = next;
   5163 					continue;
   5164 				}
   5165 				break;
   5166 
   5167 			case MDI_CLIENT_STATE_FAILED:
   5168 				if (cdip) {
   5169 					MDI_PI_UNLOCK(pip);
   5170 					i_mdi_client_unlock(ct);
   5171 					MDI_PHCI_UNLOCK(ph);
   5172 					(void) ndi_devi_offline(cdip,
   5173 						NDI_DEVFS_CLEAN);
   5174 					MDI_PHCI_LOCK(ph);
   5175 					pip = next;
   5176 					continue;
   5177 				}
   5178 				break;
   5179 			}
   5180 			MDI_PI_UNLOCK(pip);
   5181 			i_mdi_client_unlock(ct);
   5182 			pip = next;
   5183 		}
   5184 		MDI_PHCI_UNLOCK(ph);
   5185 		return (NDI_BUSY);
   5186 	}
   5187 
   5188 	/*
   5189 	 * Mark the pHCI as offline
   5190 	 */
   5191 	MDI_PHCI_SET_OFFLINE(ph);
   5192 
   5193 	/*
   5194 	 * Mark the child mdi_pathinfo nodes as transient
   5195 	 */
   5196 	pip = ph->ph_path_head;
   5197 	while (pip != NULL) {
   5198 		MDI_PI_LOCK(pip);
   5199 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5200 		MDI_PI_SET_OFFLINING(pip);
   5201 		MDI_PI_UNLOCK(pip);
   5202 		pip = next;
   5203 	}
   5204 	MDI_PHCI_UNLOCK(ph);
   5205 	/*
   5206 	 * Give a chance for any pending commands to execute
   5207 	 */
   5208 	delay_random(mdi_delay);
   5209 	MDI_PHCI_LOCK(ph);
   5210 	pip = ph->ph_path_head;
   5211 	while (pip != NULL) {
   5212 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5213 		(void) i_mdi_pi_offline(pip, flags);
   5214 		MDI_PI_LOCK(pip);
   5215 		ct = MDI_PI(pip)->pi_client;
   5216 		if (!MDI_PI_IS_OFFLINE(pip)) {
   5217 			MDI_DEBUG(1, (MDI_WARN, dip,
   5218 			    "!pHCI device is busy. "
   5219 			    "This device can not be removed at this moment. "
   5220 			    "Please try again later."));
   5221 			MDI_PI_UNLOCK(pip);
   5222 			MDI_PHCI_SET_ONLINE(ph);
   5223 			MDI_PHCI_UNLOCK(ph);
   5224 			return (NDI_BUSY);
   5225 		}
   5226 		MDI_PI_UNLOCK(pip);
   5227 		pip = next;
   5228 	}
   5229 	MDI_PHCI_UNLOCK(ph);
   5230 
   5231 	return (rv);
   5232 }
   5233 
   5234 void
   5235 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
   5236 {
   5237 	mdi_phci_t	*ph;
   5238 	mdi_client_t	*ct;
   5239 	mdi_pathinfo_t	*pip;
   5240 	mdi_pathinfo_t	*next;
   5241 	dev_info_t	*cdip;
   5242 
   5243 	if (!MDI_PHCI(dip))
   5244 		return;
   5245 
   5246 	ph = i_devi_get_phci(dip);
   5247 	if (ph == NULL) {
   5248 		return;
   5249 	}
   5250 
   5251 	MDI_PHCI_LOCK(ph);
   5252 
   5253 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5254 		/* has no last path */
   5255 		MDI_PHCI_UNLOCK(ph);
   5256 		return;
   5257 	}
   5258 
   5259 	pip = ph->ph_path_head;
   5260 	while (pip != NULL) {
   5261 		MDI_PI_LOCK(pip);
   5262 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5263 
   5264 		ct = MDI_PI(pip)->pi_client;
   5265 		i_mdi_client_lock(ct, pip);
   5266 		MDI_PI_UNLOCK(pip);
   5267 
   5268 		cdip = ct->ct_dip;
   5269 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5270 		    (i_mdi_client_compute_state(ct, ph) ==
   5271 		    MDI_CLIENT_STATE_FAILED)) {
   5272 			/* Last path. Mark client dip as retiring */
   5273 			i_mdi_client_unlock(ct);
   5274 			MDI_PHCI_UNLOCK(ph);
   5275 			(void) e_ddi_mark_retiring(cdip, cons_array);
   5276 			MDI_PHCI_LOCK(ph);
   5277 			pip = next;
   5278 		} else {
   5279 			i_mdi_client_unlock(ct);
   5280 			pip = next;
   5281 		}
   5282 	}
   5283 
   5284 	MDI_PHCI_UNLOCK(ph);
   5285 
   5286 	return;
   5287 }
   5288 
   5289 void
   5290 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
   5291 {
   5292 	mdi_phci_t	*ph;
   5293 	mdi_client_t	*ct;
   5294 	mdi_pathinfo_t	*pip;
   5295 	mdi_pathinfo_t	*next;
   5296 	dev_info_t	*cdip;
   5297 
   5298 	if (!MDI_PHCI(dip))
   5299 		return;
   5300 
   5301 	ph = i_devi_get_phci(dip);
   5302 	if (ph == NULL)
   5303 		return;
   5304 
   5305 	MDI_PHCI_LOCK(ph);
   5306 
   5307 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5308 		MDI_PHCI_UNLOCK(ph);
   5309 		/* not last path */
   5310 		return;
   5311 	}
   5312 
   5313 	if (ph->ph_unstable) {
   5314 		MDI_PHCI_UNLOCK(ph);
   5315 		/* can't check for constraints */
   5316 		*constraint = 0;
   5317 		return;
   5318 	}
   5319 
   5320 	pip = ph->ph_path_head;
   5321 	while (pip != NULL) {
   5322 		MDI_PI_LOCK(pip);
   5323 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5324 
   5325 		/*
   5326 		 * The mdi_pathinfo state is OK. Check the client state.
   5327 		 * If failover in progress fail the pHCI from offlining
   5328 		 */
   5329 		ct = MDI_PI(pip)->pi_client;
   5330 		i_mdi_client_lock(ct, pip);
   5331 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
   5332 		    (ct->ct_unstable)) {
   5333 			/*
   5334 			 * Failover is in progress, can't check for constraints
   5335 			 */
   5336 			MDI_PI_UNLOCK(pip);
   5337 			i_mdi_client_unlock(ct);
   5338 			MDI_PHCI_UNLOCK(ph);
   5339 			*constraint = 0;
   5340 			return;
   5341 		}
   5342 		MDI_PI_UNLOCK(pip);
   5343 
   5344 		/*
   5345 		 * Check to see of we are retiring the last path of this
   5346 		 * client device...
   5347 		 */
   5348 		cdip = ct->ct_dip;
   5349 		if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5350 		    (i_mdi_client_compute_state(ct, ph) ==
   5351 		    MDI_CLIENT_STATE_FAILED)) {
   5352 			i_mdi_client_unlock(ct);
   5353 			MDI_PHCI_UNLOCK(ph);
   5354 			(void) e_ddi_retire_notify(cdip, constraint);
   5355 			MDI_PHCI_LOCK(ph);
   5356 			pip = next;
   5357 		} else {
   5358 			i_mdi_client_unlock(ct);
   5359 			pip = next;
   5360 		}
   5361 	}
   5362 
   5363 	MDI_PHCI_UNLOCK(ph);
   5364 
   5365 	return;
   5366 }
   5367 
   5368 /*
   5369  * offline the path(s) hanging off the pHCI. If the
   5370  * last path to any client, check that constraints
   5371  * have been applied.
   5372  */
   5373 void
   5374 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
   5375 {
   5376 	mdi_phci_t	*ph;
   5377 	mdi_client_t	*ct;
   5378 	mdi_pathinfo_t	*pip;
   5379 	mdi_pathinfo_t	*next;
   5380 	dev_info_t	*cdip;
   5381 	int		unstable = 0;
   5382 	int		constraint;
   5383 
   5384 	if (!MDI_PHCI(dip))
   5385 		return;
   5386 
   5387 	ph = i_devi_get_phci(dip);
   5388 	if (ph == NULL) {
   5389 		/* no last path and no pips */
   5390 		return;
   5391 	}
   5392 
   5393 	MDI_PHCI_LOCK(ph);
   5394 
   5395 	if (MDI_PHCI_IS_OFFLINE(ph)) {
   5396 		MDI_PHCI_UNLOCK(ph);
   5397 		/* no last path and no pips */
   5398 		return;
   5399 	}
   5400 
   5401 	/*
   5402 	 * Check to see if the pHCI can be offlined
   5403 	 */
   5404 	if (ph->ph_unstable) {
   5405 		unstable = 1;
   5406 	}
   5407 
   5408 	pip = ph->ph_path_head;
   5409 	while (pip != NULL) {
   5410 		MDI_PI_LOCK(pip);
   5411 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5412 
   5413 		/*
   5414 		 * if failover in progress fail the pHCI from offlining
   5415 		 */
   5416 		ct = MDI_PI(pip)->pi_client;
   5417 		i_mdi_client_lock(ct, pip);
   5418 		if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
   5419 		    (ct->ct_unstable)) {
   5420 			unstable = 1;
   5421 		}
   5422 		MDI_PI_UNLOCK(pip);
   5423 
   5424 		/*
   5425 		 * Check to see of we are removing the last path of this
   5426 		 * client device...
   5427 		 */
   5428 		cdip = ct->ct_dip;
   5429 		if (!phci_only && cdip &&
   5430 		    (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
   5431 		    (i_mdi_client_compute_state(ct, ph) ==
   5432 		    MDI_CLIENT_STATE_FAILED)) {
   5433 			i_mdi_client_unlock(ct);
   5434 			MDI_PHCI_UNLOCK(ph);
   5435 			/*
   5436 			 * We don't retire clients we just retire the
   5437 			 * path to a client. If it is the last path
   5438 			 * to a client, constraints are checked and
   5439 			 * if we pass the last path is offlined. MPXIO will
   5440 			 * then fail all I/Os to the client. Since we don't
   5441 			 * want to retire the client on a path error
   5442 			 * set constraint = 0 so that the client dip
   5443 			 * is not retired.
   5444 			 */
   5445 			constraint = 0;
   5446 			(void) e_ddi_retire_finalize(cdip, &constraint);
   5447 			MDI_PHCI_LOCK(ph);
   5448 			pip = next;
   5449 		} else {
   5450 			i_mdi_client_unlock(ct);
   5451 			pip = next;
   5452 		}
   5453 	}
   5454 
   5455 	/*
   5456 	 * Cannot offline pip(s)
   5457 	 */
   5458 	if (unstable) {
   5459 		cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
   5460 		    "pHCI in transient state, cannot retire",
   5461 		    ddi_driver_name(dip), ddi_get_instance(dip));
   5462 		MDI_PHCI_UNLOCK(ph);
   5463 		return;
   5464 	}
   5465 
   5466 	/*
   5467 	 * Mark the pHCI as offline
   5468 	 */
   5469 	MDI_PHCI_SET_OFFLINE(ph);
   5470 
   5471 	/*
   5472 	 * Mark the child mdi_pathinfo nodes as transient
   5473 	 */
   5474 	pip = ph->ph_path_head;
   5475 	while (pip != NULL) {
   5476 		MDI_PI_LOCK(pip);
   5477 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5478 		MDI_PI_SET_OFFLINING(pip);
   5479 		MDI_PI_UNLOCK(pip);
   5480 		pip = next;
   5481 	}
   5482 	MDI_PHCI_UNLOCK(ph);
   5483 	/*
   5484 	 * Give a chance for any pending commands to execute
   5485 	 */
   5486 	delay_random(mdi_delay);
   5487 	MDI_PHCI_LOCK(ph);
   5488 	pip = ph->ph_path_head;
   5489 	while (pip != NULL) {
   5490 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5491 		(void) i_mdi_pi_offline(pip, 0);
   5492 		MDI_PI_LOCK(pip);
   5493 		ct = MDI_PI(pip)->pi_client;
   5494 		if (!MDI_PI_IS_OFFLINE(pip)) {
   5495 			cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
   5496 			    "path %d %s busy, cannot offline",
   5497 			    mdi_pi_get_path_instance(pip),
   5498 			    mdi_pi_spathname(pip));
   5499 			MDI_PI_UNLOCK(pip);
   5500 			MDI_PHCI_SET_ONLINE(ph);
   5501 			MDI_PHCI_UNLOCK(ph);
   5502 			return;
   5503 		}
   5504 		MDI_PI_UNLOCK(pip);
   5505 		pip = next;
   5506 	}
   5507 	MDI_PHCI_UNLOCK(ph);
   5508 
   5509 	return;
   5510 }
   5511 
   5512 void
   5513 mdi_phci_unretire(dev_info_t *dip)
   5514 {
   5515 	ASSERT(MDI_PHCI(dip));
   5516 
   5517 	/*
   5518 	 * Online the phci
   5519 	 */
   5520 	i_mdi_phci_online(dip);
   5521 }
   5522 
   5523 /*ARGSUSED*/
   5524 static int
   5525 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
   5526 {
   5527 	int		rv = NDI_SUCCESS;
   5528 	mdi_client_t	*ct;
   5529 
   5530 	/*
   5531 	 * Client component to go offline.  Make sure that we are
   5532 	 * not in failing over state and update client state
   5533 	 * accordingly
   5534 	 */
   5535 	ct = i_devi_get_client(dip);
   5536 	MDI_DEBUG(2, (MDI_NOTE, dip,
   5537 	    "called %p %p", (void *)dip, (void *)ct));
   5538 	if (ct != NULL) {
   5539 		MDI_CLIENT_LOCK(ct);
   5540 		if (ct->ct_unstable) {
   5541 			/*
   5542 			 * One or more paths are in transient state,
   5543 			 * Dont allow offline of a client device
   5544 			 */
   5545 			MDI_DEBUG(1, (MDI_WARN, dip,
   5546 			    "!One or more paths to "
   5547 			    "this device are in transient state. "
   5548 			    "This device can not be removed at this moment. "
   5549 			    "Please try again later."));
   5550 			MDI_CLIENT_UNLOCK(ct);
   5551 			return (NDI_BUSY);
   5552 		}
   5553 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   5554 			/*
   5555 			 * Failover is in progress, Dont allow DR of
   5556 			 * a client device
   5557 			 */
   5558 			MDI_DEBUG(1, (MDI_WARN, dip,
   5559 			    "!Client device is Busy. "
   5560 			    "This device can not be removed at this moment. "
   5561 			    "Please try again later."));
   5562 			MDI_CLIENT_UNLOCK(ct);
   5563 			return (NDI_BUSY);
   5564 		}
   5565 		MDI_CLIENT_SET_OFFLINE(ct);
   5566 
   5567 		/*
   5568 		 * Unbind our relationship with the dev_info node
   5569 		 */
   5570 		if (flags & NDI_DEVI_REMOVE) {
   5571 			ct->ct_dip = NULL;
   5572 		}
   5573 		MDI_CLIENT_UNLOCK(ct);
   5574 	}
   5575 	return (rv);
   5576 }
   5577 
   5578 /*
   5579  * mdi_pre_attach():
   5580  *		Pre attach() notification handler
   5581  */
   5582 /*ARGSUSED*/
   5583 int
   5584 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
   5585 {
   5586 	/* don't support old DDI_PM_RESUME */
   5587 	if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
   5588 	    (cmd == DDI_PM_RESUME))
   5589 		return (DDI_FAILURE);
   5590 
   5591 	return (DDI_SUCCESS);
   5592 }
   5593 
   5594 /*
   5595  * mdi_post_attach():
   5596  *		Post attach() notification handler
   5597  */
   5598 /*ARGSUSED*/
   5599 void
   5600 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
   5601 {
   5602 	mdi_phci_t	*ph;
   5603 	mdi_client_t	*ct;
   5604 	mdi_vhci_t	*vh;
   5605 
   5606 	if (MDI_PHCI(dip)) {
   5607 		ph = i_devi_get_phci(dip);
   5608 		ASSERT(ph != NULL);
   5609 
   5610 		MDI_PHCI_LOCK(ph);
   5611 		switch (cmd) {
   5612 		case DDI_ATTACH:
   5613 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5614 			    "phci post_attach called %p", (void *)ph));
   5615 			if (error == DDI_SUCCESS) {
   5616 				MDI_PHCI_SET_ATTACH(ph);
   5617 			} else {
   5618 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5619 				    "!pHCI post_attach failed: error %d",
   5620 				    error));
   5621 				MDI_PHCI_SET_DETACH(ph);
   5622 			}
   5623 			break;
   5624 
   5625 		case DDI_RESUME:
   5626 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5627 			    "pHCI post_resume: called %p", (void *)ph));
   5628 			if (error == DDI_SUCCESS) {
   5629 				MDI_PHCI_SET_RESUME(ph);
   5630 			} else {
   5631 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5632 				    "!pHCI post_resume failed: error %d",
   5633 				    error));
   5634 				MDI_PHCI_SET_SUSPEND(ph);
   5635 			}
   5636 			break;
   5637 		}
   5638 		MDI_PHCI_UNLOCK(ph);
   5639 	}
   5640 
   5641 	if (MDI_CLIENT(dip)) {
   5642 		ct = i_devi_get_client(dip);
   5643 		ASSERT(ct != NULL);
   5644 
   5645 		MDI_CLIENT_LOCK(ct);
   5646 		switch (cmd) {
   5647 		case DDI_ATTACH:
   5648 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5649 			    "client post_attach called %p", (void *)ct));
   5650 			if (error != DDI_SUCCESS) {
   5651 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5652 				    "!client post_attach failed: error %d",
   5653 				    error));
   5654 				MDI_CLIENT_SET_DETACH(ct);
   5655 				MDI_DEBUG(4, (MDI_WARN, dip,
   5656 				    "i_mdi_pm_reset_client"));
   5657 				i_mdi_pm_reset_client(ct);
   5658 				break;
   5659 			}
   5660 
   5661 			/*
   5662 			 * Client device has successfully attached, inform
   5663 			 * the vhci.
   5664 			 */
   5665 			vh = ct->ct_vhci;
   5666 			if (vh->vh_ops->vo_client_attached)
   5667 				(*vh->vh_ops->vo_client_attached)(dip);
   5668 
   5669 			MDI_CLIENT_SET_ATTACH(ct);
   5670 			break;
   5671 
   5672 		case DDI_RESUME:
   5673 			MDI_DEBUG(2, (MDI_NOTE, dip,
   5674 			    "client post_attach: called %p", (void *)ct));
   5675 			if (error == DDI_SUCCESS) {
   5676 				MDI_CLIENT_SET_RESUME(ct);
   5677 			} else {
   5678 				MDI_DEBUG(1, (MDI_NOTE, dip,
   5679 				    "!client post_resume failed: error %d",
   5680 				    error));
   5681 				MDI_CLIENT_SET_SUSPEND(ct);
   5682 			}
   5683 			break;
   5684 		}
   5685 		MDI_CLIENT_UNLOCK(ct);
   5686 	}
   5687 }
   5688 
   5689 /*
   5690  * mdi_pre_detach():
   5691  *		Pre detach notification handler
   5692  */
   5693 /*ARGSUSED*/
   5694 int
   5695 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   5696 {
   5697 	int rv = DDI_SUCCESS;
   5698 
   5699 	if (MDI_CLIENT(dip)) {
   5700 		(void) i_mdi_client_pre_detach(dip, cmd);
   5701 	}
   5702 
   5703 	if (MDI_PHCI(dip)) {
   5704 		rv = i_mdi_phci_pre_detach(dip, cmd);
   5705 	}
   5706 
   5707 	return (rv);
   5708 }
   5709 
   5710 /*ARGSUSED*/
   5711 static int
   5712 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   5713 {
   5714 	int		rv = DDI_SUCCESS;
   5715 	mdi_phci_t	*ph;
   5716 	mdi_client_t	*ct;
   5717 	mdi_pathinfo_t	*pip;
   5718 	mdi_pathinfo_t	*failed_pip = NULL;
   5719 	mdi_pathinfo_t	*next;
   5720 
   5721 	ph = i_devi_get_phci(dip);
   5722 	if (ph == NULL) {
   5723 		return (rv);
   5724 	}
   5725 
   5726 	MDI_PHCI_LOCK(ph);
   5727 	switch (cmd) {
   5728 	case DDI_DETACH:
   5729 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5730 		    "pHCI pre_detach: called %p", (void *)ph));
   5731 		if (!MDI_PHCI_IS_OFFLINE(ph)) {
   5732 			/*
   5733 			 * mdi_pathinfo nodes are still attached to
   5734 			 * this pHCI. Fail the detach for this pHCI.
   5735 			 */
   5736 			MDI_DEBUG(2, (MDI_WARN, dip,
   5737 			    "pHCI pre_detach: paths are still attached %p",
   5738 			    (void *)ph));
   5739 			rv = DDI_FAILURE;
   5740 			break;
   5741 		}
   5742 		MDI_PHCI_SET_DETACH(ph);
   5743 		break;
   5744 
   5745 	case DDI_SUSPEND:
   5746 		/*
   5747 		 * pHCI is getting suspended.  Since mpxio client
   5748 		 * devices may not be suspended at this point, to avoid
   5749 		 * a potential stack overflow, it is important to suspend
   5750 		 * client devices before pHCI can be suspended.
   5751 		 */
   5752 
   5753 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5754 		    "pHCI pre_suspend: called %p", (void *)ph));
   5755 		/*
   5756 		 * Suspend all the client devices accessible through this pHCI
   5757 		 */
   5758 		pip = ph->ph_path_head;
   5759 		while (pip != NULL && rv == DDI_SUCCESS) {
   5760 			dev_info_t *cdip;
   5761 			MDI_PI_LOCK(pip);
   5762 			next =
   5763 			    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5764 			ct = MDI_PI(pip)->pi_client;
   5765 			i_mdi_client_lock(ct, pip);
   5766 			cdip = ct->ct_dip;
   5767 			MDI_PI_UNLOCK(pip);
   5768 			if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
   5769 			    MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
   5770 				i_mdi_client_unlock(ct);
   5771 				if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
   5772 				    DDI_SUCCESS) {
   5773 					/*
   5774 					 * Suspend of one of the client
   5775 					 * device has failed.
   5776 					 */
   5777 					MDI_DEBUG(1, (MDI_WARN, dip,
   5778 					    "!suspend of device (%s%d) failed.",
   5779 					    ddi_driver_name(cdip),
   5780 					    ddi_get_instance(cdip)));
   5781 					failed_pip = pip;
   5782 					break;
   5783 				}
   5784 			} else {
   5785 				i_mdi_client_unlock(ct);
   5786 			}
   5787 			pip = next;
   5788 		}
   5789 
   5790 		if (rv == DDI_SUCCESS) {
   5791 			/*
   5792 			 * Suspend of client devices is complete. Proceed
   5793 			 * with pHCI suspend.
   5794 			 */
   5795 			MDI_PHCI_SET_SUSPEND(ph);
   5796 		} else {
   5797 			/*
   5798 			 * Revert back all the suspended client device states
   5799 			 * to converse.
   5800 			 */
   5801 			pip = ph->ph_path_head;
   5802 			while (pip != failed_pip) {
   5803 				dev_info_t *cdip;
   5804 				MDI_PI_LOCK(pip);
   5805 				next =
   5806 				    (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   5807 				ct = MDI_PI(pip)->pi_client;
   5808 				i_mdi_client_lock(ct, pip);
   5809 				cdip = ct->ct_dip;
   5810 				MDI_PI_UNLOCK(pip);
   5811 				if (MDI_CLIENT_IS_SUSPENDED(ct)) {
   5812 					i_mdi_client_unlock(ct);
   5813 					(void) devi_attach(cdip, DDI_RESUME);
   5814 				} else {
   5815 					i_mdi_client_unlock(ct);
   5816 				}
   5817 				pip = next;
   5818 			}
   5819 		}
   5820 		break;
   5821 
   5822 	default:
   5823 		rv = DDI_FAILURE;
   5824 		break;
   5825 	}
   5826 	MDI_PHCI_UNLOCK(ph);
   5827 	return (rv);
   5828 }
   5829 
   5830 /*ARGSUSED*/
   5831 static int
   5832 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   5833 {
   5834 	int		rv = DDI_SUCCESS;
   5835 	mdi_client_t	*ct;
   5836 
   5837 	ct = i_devi_get_client(dip);
   5838 	if (ct == NULL) {
   5839 		return (rv);
   5840 	}
   5841 
   5842 	MDI_CLIENT_LOCK(ct);
   5843 	switch (cmd) {
   5844 	case DDI_DETACH:
   5845 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5846 		    "client pre_detach: called %p",
   5847 		     (void *)ct));
   5848 		MDI_CLIENT_SET_DETACH(ct);
   5849 		break;
   5850 
   5851 	case DDI_SUSPEND:
   5852 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5853 		    "client pre_suspend: called %p",
   5854 		    (void *)ct));
   5855 		MDI_CLIENT_SET_SUSPEND(ct);
   5856 		break;
   5857 
   5858 	default:
   5859 		rv = DDI_FAILURE;
   5860 		break;
   5861 	}
   5862 	MDI_CLIENT_UNLOCK(ct);
   5863 	return (rv);
   5864 }
   5865 
   5866 /*
   5867  * mdi_post_detach():
   5868  *		Post detach notification handler
   5869  */
   5870 /*ARGSUSED*/
   5871 void
   5872 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
   5873 {
   5874 	/*
   5875 	 * Detach/Suspend of mpxio component failed. Update our state
   5876 	 * too
   5877 	 */
   5878 	if (MDI_PHCI(dip))
   5879 		i_mdi_phci_post_detach(dip, cmd, error);
   5880 
   5881 	if (MDI_CLIENT(dip))
   5882 		i_mdi_client_post_detach(dip, cmd, error);
   5883 }
   5884 
   5885 /*ARGSUSED*/
   5886 static void
   5887 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
   5888 {
   5889 	mdi_phci_t	*ph;
   5890 
   5891 	/*
   5892 	 * Detach/Suspend of phci component failed. Update our state
   5893 	 * too
   5894 	 */
   5895 	ph = i_devi_get_phci(dip);
   5896 	if (ph == NULL) {
   5897 		return;
   5898 	}
   5899 
   5900 	MDI_PHCI_LOCK(ph);
   5901 	/*
   5902 	 * Detach of pHCI failed. Restore back converse
   5903 	 * state
   5904 	 */
   5905 	switch (cmd) {
   5906 	case DDI_DETACH:
   5907 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5908 		    "pHCI post_detach: called %p",
   5909 		    (void *)ph));
   5910 		if (error != DDI_SUCCESS)
   5911 			MDI_PHCI_SET_ATTACH(ph);
   5912 		break;
   5913 
   5914 	case DDI_SUSPEND:
   5915 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5916 		    "pHCI post_suspend: called %p",
   5917 		    (void *)ph));
   5918 		if (error != DDI_SUCCESS)
   5919 			MDI_PHCI_SET_RESUME(ph);
   5920 		break;
   5921 	}
   5922 	MDI_PHCI_UNLOCK(ph);
   5923 }
   5924 
   5925 /*ARGSUSED*/
   5926 static void
   5927 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
   5928 {
   5929 	mdi_client_t	*ct;
   5930 
   5931 	ct = i_devi_get_client(dip);
   5932 	if (ct == NULL) {
   5933 		return;
   5934 	}
   5935 	MDI_CLIENT_LOCK(ct);
   5936 	/*
   5937 	 * Detach of Client failed. Restore back converse
   5938 	 * state
   5939 	 */
   5940 	switch (cmd) {
   5941 	case DDI_DETACH:
   5942 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5943 		    "client post_detach: called %p", (void *)ct));
   5944 		if (DEVI_IS_ATTACHING(ct->ct_dip)) {
   5945 			MDI_DEBUG(4, (MDI_NOTE, dip,
   5946 			    "i_mdi_pm_rele_client\n"));
   5947 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
   5948 		} else {
   5949 			MDI_DEBUG(4, (MDI_NOTE, dip,
   5950 			    "i_mdi_pm_reset_client\n"));
   5951 			i_mdi_pm_reset_client(ct);
   5952 		}
   5953 		if (error != DDI_SUCCESS)
   5954 			MDI_CLIENT_SET_ATTACH(ct);
   5955 		break;
   5956 
   5957 	case DDI_SUSPEND:
   5958 		MDI_DEBUG(2, (MDI_NOTE, dip,
   5959 		    "called %p", (void *)ct));
   5960 		if (error != DDI_SUCCESS)
   5961 			MDI_CLIENT_SET_RESUME(ct);
   5962 		break;
   5963 	}
   5964 	MDI_CLIENT_UNLOCK(ct);
   5965 }
   5966 
   5967 int
   5968 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
   5969 {
   5970 	return (MDI_PI(pip)->pi_kstats ? 1 : 0);
   5971 }
   5972 
   5973 /*
   5974  * create and install per-path (client - pHCI) statistics
   5975  * I/O stats supported: nread, nwritten, reads, and writes
   5976  * Error stats - hard errors, soft errors, & transport errors
   5977  */
   5978 int
   5979 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
   5980 {
   5981 	kstat_t			*kiosp, *kerrsp;
   5982 	struct pi_errs		*nsp;
   5983 	struct mdi_pi_kstats	*mdi_statp;
   5984 
   5985 	if (MDI_PI(pip)->pi_kstats != NULL)
   5986 		return (MDI_SUCCESS);
   5987 
   5988 	if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
   5989 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
   5990 		return (MDI_FAILURE);
   5991 	}
   5992 
   5993 	(void) strcat(ksname, ",err");
   5994 	kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
   5995 	    KSTAT_TYPE_NAMED,
   5996 	    sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
   5997 	if (kerrsp == NULL) {
   5998 		kstat_delete(kiosp);
   5999 		return (MDI_FAILURE);
   6000 	}
   6001 
   6002 	nsp = (struct pi_errs *)kerrsp->ks_data;
   6003 	kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
   6004 	kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
   6005 	kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
   6006 	    KSTAT_DATA_UINT32);
   6007 	kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
   6008 	    KSTAT_DATA_UINT32);
   6009 	kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
   6010 	    KSTAT_DATA_UINT32);
   6011 	kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
   6012 	    KSTAT_DATA_UINT32);
   6013 	kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
   6014 	    KSTAT_DATA_UINT32);
   6015 	kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
   6016 	    KSTAT_DATA_UINT32);
   6017 	kstat_named_init(&nsp->pi_failedfrom, "Failed From",
   6018 	    KSTAT_DATA_UINT32);
   6019 	kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
   6020 
   6021 	mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
   6022 	mdi_statp->pi_kstat_ref = 1;
   6023 	mdi_statp->pi_kstat_iostats = kiosp;
   6024 	mdi_statp->pi_kstat_errstats = kerrsp;
   6025 	kstat_install(kiosp);
   6026 	kstat_install(kerrsp);
   6027 	MDI_PI(pip)->pi_kstats = mdi_statp;
   6028 	return (MDI_SUCCESS);
   6029 }
   6030 
   6031 /*
   6032  * destroy per-path properties
   6033  */
   6034 static void
   6035 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
   6036 {
   6037 
   6038 	struct mdi_pi_kstats *mdi_statp;
   6039 
   6040 	if (MDI_PI(pip)->pi_kstats == NULL)
   6041 		return;
   6042 	if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
   6043 		return;
   6044 
   6045 	MDI_PI(pip)->pi_kstats = NULL;
   6046 
   6047 	/*
   6048 	 * the kstat may be shared between multiple pathinfo nodes
   6049 	 * decrement this pathinfo's usage, removing the kstats
   6050 	 * themselves when the last pathinfo reference is removed.
   6051 	 */
   6052 	ASSERT(mdi_statp->pi_kstat_ref > 0);
   6053 	if (--mdi_statp->pi_kstat_ref != 0)
   6054 		return;
   6055 
   6056 	kstat_delete(mdi_statp->pi_kstat_iostats);
   6057 	kstat_delete(mdi_statp->pi_kstat_errstats);
   6058 	kmem_free(mdi_statp, sizeof (*mdi_statp));
   6059 }
   6060 
   6061 /*
   6062  * update I/O paths KSTATS
   6063  */
   6064 void
   6065 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
   6066 {
   6067 	kstat_t *iostatp;
   6068 	size_t xfer_cnt;
   6069 
   6070 	ASSERT(pip != NULL);
   6071 
   6072 	/*
   6073 	 * I/O can be driven across a path prior to having path
   6074 	 * statistics available, i.e. probe(9e).
   6075 	 */
   6076 	if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
   6077 		iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
   6078 		xfer_cnt = bp->b_bcount - bp->b_resid;
   6079 		if (bp->b_flags & B_READ) {
   6080 			KSTAT_IO_PTR(iostatp)->reads++;
   6081 			KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
   6082 		} else {
   6083 			KSTAT_IO_PTR(iostatp)->writes++;
   6084 			KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
   6085 		}
   6086 	}
   6087 }
   6088 
   6089 /*
   6090  * Enable the path(specific client/target/initiator)
   6091  * Enabling a path means that MPxIO may select the enabled path for routing
   6092  * future I/O requests, subject to other path state constraints.
   6093  */
   6094 int
   6095 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
   6096 {
   6097 	mdi_phci_t	*ph;
   6098 
   6099 	ph = MDI_PI(pip)->pi_phci;
   6100 	if (ph == NULL) {
   6101 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
   6102 		    "!failed: path %s %p: NULL ph",
   6103 		    mdi_pi_spathname(pip), (void *)pip));
   6104 		return (MDI_FAILURE);
   6105 	}
   6106 
   6107 	(void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
   6108 		MDI_ENABLE_OP);
   6109 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
   6110 	    "!returning success pip = %p. ph = %p",
   6111 	    (void *)pip, (void *)ph));
   6112 	return (MDI_SUCCESS);
   6113 
   6114 }
   6115 
   6116 /*
   6117  * Disable the path (specific client/target/initiator)
   6118  * Disabling a path means that MPxIO will not select the disabled path for
   6119  * routing any new I/O requests.
   6120  */
   6121 int
   6122 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
   6123 {
   6124 	mdi_phci_t	*ph;
   6125 
   6126 	ph = MDI_PI(pip)->pi_phci;
   6127 	if (ph == NULL) {
   6128 		MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
   6129 		    "!failed: path %s %p: NULL ph",
   6130 		    mdi_pi_spathname(pip), (void *)pip));
   6131 		return (MDI_FAILURE);
   6132 	}
   6133 
   6134 	(void) i_mdi_enable_disable_path(pip,
   6135 	    ph->ph_vhci, flags, MDI_DISABLE_OP);
   6136 	MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
   6137 	    "!returning success pip = %p. ph = %p",
   6138 	    (void *)pip, (void *)ph));
   6139 	return (MDI_SUCCESS);
   6140 }
   6141 
   6142 /*
   6143  * disable the path to a particular pHCI (pHCI specified in the phci_path
   6144  * argument) for a particular client (specified in the client_path argument).
   6145  * Disabling a path means that MPxIO will not select the disabled path for
   6146  * routing any new I/O requests.
   6147  * NOTE: this will be removed once the NWS files are changed to use the new
   6148  * mdi_{enable,disable}_path interfaces
   6149  */
   6150 int
   6151 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
   6152 {
   6153 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
   6154 }
   6155 
   6156 /*
   6157  * Enable the path to a particular pHCI (pHCI specified in the phci_path
   6158  * argument) for a particular client (specified in the client_path argument).
   6159  * Enabling a path means that MPxIO may select the enabled path for routing
   6160  * future I/O requests, subject to other path state constraints.
   6161  * NOTE: this will be removed once the NWS files are changed to use the new
   6162  * mdi_{enable,disable}_path interfaces
   6163  */
   6164 
   6165 int
   6166 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
   6167 {
   6168 	return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
   6169 }
   6170 
   6171 /*
   6172  * Common routine for doing enable/disable.
   6173  */
   6174 static mdi_pathinfo_t *
   6175 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
   6176 		int op)
   6177 {
   6178 	int		sync_flag = 0;
   6179 	int		rv;
   6180 	mdi_pathinfo_t 	*next;
   6181 	int		(*f)() = NULL;
   6182 
   6183 	/*
   6184 	 * Check to make sure the path is not already in the
   6185 	 * requested state. If it is just return the next path
   6186 	 * as we have nothing to do here.
   6187 	 */
   6188 	if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
   6189 	    (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
   6190 		MDI_PI_LOCK(pip);
   6191 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   6192 		MDI_PI_UNLOCK(pip);
   6193 		return (next);
   6194 	}
   6195 
   6196 	f = vh->vh_ops->vo_pi_state_change;
   6197 
   6198 	sync_flag = (flags << 8) & 0xf00;
   6199 
   6200 	/*
   6201 	 * Do a callback into the mdi consumer to let it
   6202 	 * know that path is about to get enabled/disabled.
   6203 	 */
   6204 	if (f != NULL) {
   6205 		rv = (*f)(vh->vh_dip, pip, 0,
   6206 			MDI_PI_EXT_STATE(pip),
   6207 			MDI_EXT_STATE_CHANGE | sync_flag |
   6208 			op | MDI_BEFORE_STATE_CHANGE);
   6209 		if (rv != MDI_SUCCESS) {
   6210 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
   6211 			    "vo_pi_state_change: failed rv = %x", rv));
   6212 		}
   6213 	}
   6214 	MDI_PI_LOCK(pip);
   6215 	next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   6216 
   6217 	switch (flags) {
   6218 		case USER_DISABLE:
   6219 			if (op == MDI_DISABLE_OP) {
   6220 				MDI_PI_SET_USER_DISABLE(pip);
   6221 			} else {
   6222 				MDI_PI_SET_USER_ENABLE(pip);
   6223 			}
   6224 			break;
   6225 		case DRIVER_DISABLE:
   6226 			if (op == MDI_DISABLE_OP) {
   6227 				MDI_PI_SET_DRV_DISABLE(pip);
   6228 			} else {
   6229 				MDI_PI_SET_DRV_ENABLE(pip);
   6230 			}
   6231 			break;
   6232 		case DRIVER_DISABLE_TRANSIENT:
   6233 			if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
   6234 				MDI_PI_SET_DRV_DISABLE_TRANS(pip);
   6235 			} else {
   6236 				MDI_PI_SET_DRV_ENABLE_TRANS(pip);
   6237 			}
   6238 			break;
   6239 	}
   6240 	MDI_PI_UNLOCK(pip);
   6241 	/*
   6242 	 * Do a callback into the mdi consumer to let it
   6243 	 * know that path is now enabled/disabled.
   6244 	 */
   6245 	if (f != NULL) {
   6246 		rv = (*f)(vh->vh_dip, pip, 0,
   6247 			MDI_PI_EXT_STATE(pip),
   6248 			MDI_EXT_STATE_CHANGE | sync_flag |
   6249 			op | MDI_AFTER_STATE_CHANGE);
   6250 		if (rv != MDI_SUCCESS) {
   6251 			MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
   6252 			    "vo_pi_state_change failed: rv = %x", rv));
   6253 		}
   6254 	}
   6255 	return (next);
   6256 }
   6257 
   6258 /*
   6259  * Common routine for doing enable/disable.
   6260  * NOTE: this will be removed once the NWS files are changed to use the new
   6261  * mdi_{enable,disable}_path has been putback
   6262  */
   6263 int
   6264 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
   6265 {
   6266 
   6267 	mdi_phci_t	*ph;
   6268 	mdi_vhci_t	*vh = NULL;
   6269 	mdi_client_t	*ct;
   6270 	mdi_pathinfo_t	*next, *pip;
   6271 	int		found_it;
   6272 
   6273 	ph = i_devi_get_phci(pdip);
   6274 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
   6275 	    "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
   6276 	    (void *)cdip));
   6277 	if (ph == NULL) {
   6278 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6279 		    "!failed: operation %d: NULL ph", op));
   6280 		return (MDI_FAILURE);
   6281 	}
   6282 
   6283 	if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
   6284 		MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6285 		    "!failed: invalid operation %d", op));
   6286 		return (MDI_FAILURE);
   6287 	}
   6288 
   6289 	vh = ph->ph_vhci;
   6290 
   6291 	if (cdip == NULL) {
   6292 		/*
   6293 		 * Need to mark the Phci as enabled/disabled.
   6294 		 */
   6295 		MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
   6296 		    "op %d for the phci", op));
   6297 		MDI_PHCI_LOCK(ph);
   6298 		switch (flags) {
   6299 			case USER_DISABLE:
   6300 				if (op == MDI_DISABLE_OP) {
   6301 					MDI_PHCI_SET_USER_DISABLE(ph);
   6302 				} else {
   6303 					MDI_PHCI_SET_USER_ENABLE(ph);
   6304 				}
   6305 				break;
   6306 			case DRIVER_DISABLE:
   6307 				if (op == MDI_DISABLE_OP) {
   6308 					MDI_PHCI_SET_DRV_DISABLE(ph);
   6309 				} else {
   6310 					MDI_PHCI_SET_DRV_ENABLE(ph);
   6311 				}
   6312 				break;
   6313 			case DRIVER_DISABLE_TRANSIENT:
   6314 				if (op == MDI_DISABLE_OP) {
   6315 					MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
   6316 				} else {
   6317 					MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
   6318 				}
   6319 				break;
   6320 			default:
   6321 				MDI_PHCI_UNLOCK(ph);
   6322 				MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6323 				    "!invalid flag argument= %d", flags));
   6324 		}
   6325 
   6326 		/*
   6327 		 * Phci has been disabled. Now try to enable/disable
   6328 		 * path info's to each client.
   6329 		 */
   6330 		pip = ph->ph_path_head;
   6331 		while (pip != NULL) {
   6332 			pip = i_mdi_enable_disable_path(pip, vh, flags, op);
   6333 		}
   6334 		MDI_PHCI_UNLOCK(ph);
   6335 	} else {
   6336 
   6337 		/*
   6338 		 * Disable a specific client.
   6339 		 */
   6340 		ct = i_devi_get_client(cdip);
   6341 		if (ct == NULL) {
   6342 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6343 			    "!failed: operation = %d: NULL ct", op));
   6344 			return (MDI_FAILURE);
   6345 		}
   6346 
   6347 		MDI_CLIENT_LOCK(ct);
   6348 		pip = ct->ct_path_head;
   6349 		found_it = 0;
   6350 		while (pip != NULL) {
   6351 			MDI_PI_LOCK(pip);
   6352 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6353 			if (MDI_PI(pip)->pi_phci == ph) {
   6354 				MDI_PI_UNLOCK(pip);
   6355 				found_it = 1;
   6356 				break;
   6357 			}
   6358 			MDI_PI_UNLOCK(pip);
   6359 			pip = next;
   6360 		}
   6361 
   6362 
   6363 		MDI_CLIENT_UNLOCK(ct);
   6364 		if (found_it == 0) {
   6365 			MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
   6366 			    "!failed. Could not find corresponding pip\n"));
   6367 			return (MDI_FAILURE);
   6368 		}
   6369 
   6370 		(void) i_mdi_enable_disable_path(pip, vh, flags, op);
   6371 	}
   6372 
   6373 	MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
   6374 	    "!op %d returning success pdip = %p cdip = %p",
   6375 	    op, (void *)pdip, (void *)cdip));
   6376 	return (MDI_SUCCESS);
   6377 }
   6378 
   6379 /*
   6380  * Ensure phci powered up
   6381  */
   6382 static void
   6383 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
   6384 {
   6385 	dev_info_t	*ph_dip;
   6386 
   6387 	ASSERT(pip != NULL);
   6388 	ASSERT(MDI_PI_LOCKED(pip));
   6389 
   6390 	if (MDI_PI(pip)->pi_pm_held) {
   6391 		return;
   6392 	}
   6393 
   6394 	ph_dip = mdi_pi_get_phci(pip);
   6395 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6396 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
   6397 	if (ph_dip == NULL) {
   6398 		return;
   6399 	}
   6400 
   6401 	MDI_PI_UNLOCK(pip);
   6402 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
   6403 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
   6404 	pm_hold_power(ph_dip);
   6405 	MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
   6406 	    DEVI(ph_dip)->devi_pm_kidsupcnt));
   6407 	MDI_PI_LOCK(pip);
   6408 
   6409 	/* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
   6410 	if (DEVI(ph_dip)->devi_pm_info)
   6411 		MDI_PI(pip)->pi_pm_held = 1;
   6412 }
   6413 
   6414 /*
   6415  * Allow phci powered down
   6416  */
   6417 static void
   6418 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
   6419 {
   6420 	dev_info_t	*ph_dip = NULL;
   6421 
   6422 	ASSERT(pip != NULL);
   6423 	ASSERT(MDI_PI_LOCKED(pip));
   6424 
   6425 	if (MDI_PI(pip)->pi_pm_held == 0) {
   6426 		return;
   6427 	}
   6428 
   6429 	ph_dip = mdi_pi_get_phci(pip);
   6430 	ASSERT(ph_dip != NULL);
   6431 
   6432 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6433 	    "%s %p", mdi_pi_spathname(pip), (void *)pip));
   6434 
   6435 	MDI_PI_UNLOCK(pip);
   6436 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6437 	    "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
   6438 	pm_rele_power(ph_dip);
   6439 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6440 	    "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
   6441 	MDI_PI_LOCK(pip);
   6442 
   6443 	MDI_PI(pip)->pi_pm_held = 0;
   6444 }
   6445 
   6446 static void
   6447 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
   6448 {
   6449 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6450 
   6451 	ct->ct_power_cnt += incr;
   6452 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   6453 	    "%p ct_power_cnt = %d incr = %d",
   6454 	    (void *)ct, ct->ct_power_cnt, incr));
   6455 	ASSERT(ct->ct_power_cnt >= 0);
   6456 }
   6457 
   6458 static void
   6459 i_mdi_rele_all_phci(mdi_client_t *ct)
   6460 {
   6461 	mdi_pathinfo_t  *pip;
   6462 
   6463 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6464 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   6465 	while (pip != NULL) {
   6466 		mdi_hold_path(pip);
   6467 		MDI_PI_LOCK(pip);
   6468 		i_mdi_pm_rele_pip(pip);
   6469 		MDI_PI_UNLOCK(pip);
   6470 		mdi_rele_path(pip);
   6471 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6472 	}
   6473 }
   6474 
   6475 static void
   6476 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
   6477 {
   6478 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6479 
   6480 	if (i_ddi_devi_attached(ct->ct_dip)) {
   6481 		ct->ct_power_cnt -= decr;
   6482 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   6483 		    "%p ct_power_cnt = %d decr = %d",
   6484 		    (void *)ct, ct->ct_power_cnt, decr));
   6485 	}
   6486 
   6487 	ASSERT(ct->ct_power_cnt >= 0);
   6488 	if (ct->ct_power_cnt == 0) {
   6489 		i_mdi_rele_all_phci(ct);
   6490 		return;
   6491 	}
   6492 }
   6493 
   6494 static void
   6495 i_mdi_pm_reset_client(mdi_client_t *ct)
   6496 {
   6497 	MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   6498 	    "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
   6499 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6500 	ct->ct_power_cnt = 0;
   6501 	i_mdi_rele_all_phci(ct);
   6502 	ct->ct_powercnt_config = 0;
   6503 	ct->ct_powercnt_unconfig = 0;
   6504 	ct->ct_powercnt_reset = 1;
   6505 }
   6506 
   6507 static int
   6508 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
   6509 {
   6510 	int		ret;
   6511 	dev_info_t	*ph_dip;
   6512 
   6513 	MDI_PI_LOCK(pip);
   6514 	i_mdi_pm_hold_pip(pip);
   6515 
   6516 	ph_dip = mdi_pi_get_phci(pip);
   6517 	MDI_PI_UNLOCK(pip);
   6518 
   6519 	/* bring all components of phci to full power */
   6520 	MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6521 	    "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
   6522 	    ddi_get_instance(ph_dip), (void *)pip));
   6523 
   6524 	ret = pm_powerup(ph_dip);
   6525 
   6526 	if (ret == DDI_FAILURE) {
   6527 		MDI_DEBUG(4, (MDI_NOTE, ph_dip,
   6528 		    "pm_powerup FAILED for %s%d %p",
   6529 		    ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
   6530 		    (void *)pip));
   6531 
   6532 		MDI_PI_LOCK(pip);
   6533 		i_mdi_pm_rele_pip(pip);
   6534 		MDI_PI_UNLOCK(pip);
   6535 		return (MDI_FAILURE);
   6536 	}
   6537 
   6538 	return (MDI_SUCCESS);
   6539 }
   6540 
   6541 static int
   6542 i_mdi_power_all_phci(mdi_client_t *ct)
   6543 {
   6544 	mdi_pathinfo_t  *pip;
   6545 	int		succeeded = 0;
   6546 
   6547 	ASSERT(MDI_CLIENT_LOCKED(ct));
   6548 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   6549 	while (pip != NULL) {
   6550 		/*
   6551 		 * Don't power if MDI_PATHINFO_STATE_FAULT
   6552 		 * or MDI_PATHINFO_STATE_OFFLINE.
   6553 		 */
   6554 		if (MDI_PI_IS_INIT(pip) ||
   6555 		    MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
   6556 			mdi_hold_path(pip);
   6557 			MDI_CLIENT_UNLOCK(ct);
   6558 			if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
   6559 				succeeded = 1;
   6560 
   6561 			ASSERT(ct == MDI_PI(pip)->pi_client);
   6562 			MDI_CLIENT_LOCK(ct);
   6563 			mdi_rele_path(pip);
   6564 		}
   6565 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6566 	}
   6567 
   6568 	return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
   6569 }
   6570 
   6571 /*
   6572  * mdi_bus_power():
   6573  *		1. Place the phci(s) into powered up state so that
   6574  *		   client can do power management
   6575  *		2. Ensure phci powered up as client power managing
   6576  * Return Values:
   6577  *		MDI_SUCCESS
   6578  *		MDI_FAILURE
   6579  */
   6580 int
   6581 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
   6582     void *arg, void *result)
   6583 {
   6584 	int			ret = MDI_SUCCESS;
   6585 	pm_bp_child_pwrchg_t	*bpc;
   6586 	mdi_client_t		*ct;
   6587 	dev_info_t		*cdip;
   6588 	pm_bp_has_changed_t	*bphc;
   6589 
   6590 	/*
   6591 	 * BUS_POWER_NOINVOL not supported
   6592 	 */
   6593 	if (op == BUS_POWER_NOINVOL)
   6594 		return (MDI_FAILURE);
   6595 
   6596 	/*
   6597 	 * ignore other OPs.
   6598 	 * return quickly to save cou cycles on the ct processing
   6599 	 */
   6600 	switch (op) {
   6601 	case BUS_POWER_PRE_NOTIFICATION:
   6602 	case BUS_POWER_POST_NOTIFICATION:
   6603 		bpc = (pm_bp_child_pwrchg_t *)arg;
   6604 		cdip = bpc->bpc_dip;
   6605 		break;
   6606 	case BUS_POWER_HAS_CHANGED:
   6607 		bphc = (pm_bp_has_changed_t *)arg;
   6608 		cdip = bphc->bphc_dip;
   6609 		break;
   6610 	default:
   6611 		return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
   6612 	}
   6613 
   6614 	ASSERT(MDI_CLIENT(cdip));
   6615 
   6616 	ct = i_devi_get_client(cdip);
   6617 	if (ct == NULL)
   6618 		return (MDI_FAILURE);
   6619 
   6620 	/*
   6621 	 * wait till the mdi_pathinfo node state change are processed
   6622 	 */
   6623 	MDI_CLIENT_LOCK(ct);
   6624 	switch (op) {
   6625 	case BUS_POWER_PRE_NOTIFICATION:
   6626 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6627 		    "BUS_POWER_PRE_NOTIFICATION:"
   6628 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
   6629 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
   6630 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
   6631 
   6632 		/* serialize power level change per client */
   6633 		while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6634 			cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6635 
   6636 		MDI_CLIENT_SET_POWER_TRANSITION(ct);
   6637 
   6638 		if (ct->ct_power_cnt == 0) {
   6639 			ret = i_mdi_power_all_phci(ct);
   6640 		}
   6641 
   6642 		/*
   6643 		 * if new_level > 0:
   6644 		 *	- hold phci(s)
   6645 		 *	- power up phci(s) if not already
   6646 		 * ignore power down
   6647 		 */
   6648 		if (bpc->bpc_nlevel > 0) {
   6649 			if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
   6650 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6651 				    "i_mdi_pm_hold_client\n"));
   6652 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6653 			}
   6654 		}
   6655 		break;
   6656 	case BUS_POWER_POST_NOTIFICATION:
   6657 		MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6658 		    "BUS_POWER_POST_NOTIFICATION:"
   6659 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
   6660 		    ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
   6661 		    bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
   6662 		    *(int *)result));
   6663 
   6664 		if (*(int *)result == DDI_SUCCESS) {
   6665 			if (bpc->bpc_nlevel > 0) {
   6666 				MDI_CLIENT_SET_POWER_UP(ct);
   6667 			} else {
   6668 				MDI_CLIENT_SET_POWER_DOWN(ct);
   6669 			}
   6670 		}
   6671 
   6672 		/* release the hold we did in pre-notification */
   6673 		if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
   6674 		    !DEVI_IS_ATTACHING(ct->ct_dip)) {
   6675 			MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6676 			    "i_mdi_pm_rele_client\n"));
   6677 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
   6678 		}
   6679 
   6680 		if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
   6681 			/* another thread might started attaching */
   6682 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
   6683 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6684 				    "i_mdi_pm_rele_client\n"));
   6685 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
   6686 			/* detaching has been taken care in pm_post_unconfig */
   6687 			} else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
   6688 				MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
   6689 				    "i_mdi_pm_reset_client\n"));
   6690 				i_mdi_pm_reset_client(ct);
   6691 			}
   6692 		}
   6693 
   6694 		MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
   6695 		cv_broadcast(&ct->ct_powerchange_cv);
   6696 
   6697 		break;
   6698 
   6699 	/* need to do more */
   6700 	case BUS_POWER_HAS_CHANGED:
   6701 		MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
   6702 		    "BUS_POWER_HAS_CHANGED:"
   6703 		    "%s@%s, olevel=%d, nlevel=%d, comp=%d",
   6704 		    ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
   6705 		    bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
   6706 
   6707 		if (bphc->bphc_nlevel > 0 &&
   6708 		    bphc->bphc_nlevel > bphc->bphc_olevel) {
   6709 			if (ct->ct_power_cnt == 0) {
   6710 				ret = i_mdi_power_all_phci(ct);
   6711 			}
   6712 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
   6713 			    "i_mdi_pm_hold_client\n"));
   6714 			i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6715 		}
   6716 
   6717 		if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
   6718 			MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
   6719 			    "i_mdi_pm_rele_client\n"));
   6720 			i_mdi_pm_rele_client(ct, ct->ct_path_count);
   6721 		}
   6722 		break;
   6723 	}
   6724 
   6725 	MDI_CLIENT_UNLOCK(ct);
   6726 	return (ret);
   6727 }
   6728 
   6729 static int
   6730 i_mdi_pm_pre_config_one(dev_info_t *child)
   6731 {
   6732 	int		ret = MDI_SUCCESS;
   6733 	mdi_client_t	*ct;
   6734 
   6735 	ct = i_devi_get_client(child);
   6736 	if (ct == NULL)
   6737 		return (MDI_FAILURE);
   6738 
   6739 	MDI_CLIENT_LOCK(ct);
   6740 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6741 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6742 
   6743 	if (!MDI_CLIENT_IS_FAILED(ct)) {
   6744 		MDI_CLIENT_UNLOCK(ct);
   6745 		MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
   6746 		return (MDI_SUCCESS);
   6747 	}
   6748 
   6749 	if (ct->ct_powercnt_config) {
   6750 		MDI_CLIENT_UNLOCK(ct);
   6751 		MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
   6752 		return (MDI_SUCCESS);
   6753 	}
   6754 
   6755 	if (ct->ct_power_cnt == 0) {
   6756 		ret = i_mdi_power_all_phci(ct);
   6757 	}
   6758 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
   6759 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6760 	ct->ct_powercnt_config = 1;
   6761 	ct->ct_powercnt_reset = 0;
   6762 	MDI_CLIENT_UNLOCK(ct);
   6763 	return (ret);
   6764 }
   6765 
   6766 static int
   6767 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
   6768 {
   6769 	int			ret = MDI_SUCCESS;
   6770 	dev_info_t		*cdip;
   6771 	int			circ;
   6772 
   6773 	ASSERT(MDI_VHCI(vdip));
   6774 
   6775 	/* ndi_devi_config_one */
   6776 	if (child) {
   6777 		ASSERT(DEVI_BUSY_OWNED(vdip));
   6778 		return (i_mdi_pm_pre_config_one(child));
   6779 	}
   6780 
   6781 	/* devi_config_common */
   6782 	ndi_devi_enter(vdip, &circ);
   6783 	cdip = ddi_get_child(vdip);
   6784 	while (cdip) {
   6785 		dev_info_t *next = ddi_get_next_sibling(cdip);
   6786 
   6787 		ret = i_mdi_pm_pre_config_one(cdip);
   6788 		if (ret != MDI_SUCCESS)
   6789 			break;
   6790 		cdip = next;
   6791 	}
   6792 	ndi_devi_exit(vdip, circ);
   6793 	return (ret);
   6794 }
   6795 
   6796 static int
   6797 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
   6798 {
   6799 	int		ret = MDI_SUCCESS;
   6800 	mdi_client_t	*ct;
   6801 
   6802 	ct = i_devi_get_client(child);
   6803 	if (ct == NULL)
   6804 		return (MDI_FAILURE);
   6805 
   6806 	MDI_CLIENT_LOCK(ct);
   6807 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6808 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6809 
   6810 	if (!i_ddi_devi_attached(ct->ct_dip)) {
   6811 		MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
   6812 		MDI_CLIENT_UNLOCK(ct);
   6813 		return (MDI_SUCCESS);
   6814 	}
   6815 
   6816 	if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
   6817 	    (flags & NDI_AUTODETACH)) {
   6818 		MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
   6819 		MDI_CLIENT_UNLOCK(ct);
   6820 		return (MDI_FAILURE);
   6821 	}
   6822 
   6823 	if (ct->ct_powercnt_unconfig) {
   6824 		MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
   6825 		MDI_CLIENT_UNLOCK(ct);
   6826 		*held = 1;
   6827 		return (MDI_SUCCESS);
   6828 	}
   6829 
   6830 	if (ct->ct_power_cnt == 0) {
   6831 		ret = i_mdi_power_all_phci(ct);
   6832 	}
   6833 	MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
   6834 	i_mdi_pm_hold_client(ct, ct->ct_path_count);
   6835 	ct->ct_powercnt_unconfig = 1;
   6836 	ct->ct_powercnt_reset = 0;
   6837 	MDI_CLIENT_UNLOCK(ct);
   6838 	if (ret == MDI_SUCCESS)
   6839 		*held = 1;
   6840 	return (ret);
   6841 }
   6842 
   6843 static int
   6844 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
   6845     int flags)
   6846 {
   6847 	int			ret = MDI_SUCCESS;
   6848 	dev_info_t		*cdip;
   6849 	int			circ;
   6850 
   6851 	ASSERT(MDI_VHCI(vdip));
   6852 	*held = 0;
   6853 
   6854 	/* ndi_devi_unconfig_one */
   6855 	if (child) {
   6856 		ASSERT(DEVI_BUSY_OWNED(vdip));
   6857 		return (i_mdi_pm_pre_unconfig_one(child, held, flags));
   6858 	}
   6859 
   6860 	/* devi_unconfig_common */
   6861 	ndi_devi_enter(vdip, &circ);
   6862 	cdip = ddi_get_child(vdip);
   6863 	while (cdip) {
   6864 		dev_info_t *next = ddi_get_next_sibling(cdip);
   6865 
   6866 		ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
   6867 		cdip = next;
   6868 	}
   6869 	ndi_devi_exit(vdip, circ);
   6870 
   6871 	if (*held)
   6872 		ret = MDI_SUCCESS;
   6873 
   6874 	return (ret);
   6875 }
   6876 
   6877 static void
   6878 i_mdi_pm_post_config_one(dev_info_t *child)
   6879 {
   6880 	mdi_client_t	*ct;
   6881 
   6882 	ct = i_devi_get_client(child);
   6883 	if (ct == NULL)
   6884 		return;
   6885 
   6886 	MDI_CLIENT_LOCK(ct);
   6887 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6888 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6889 
   6890 	if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
   6891 		MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
   6892 		MDI_CLIENT_UNLOCK(ct);
   6893 		return;
   6894 	}
   6895 
   6896 	/* client has not been updated */
   6897 	if (MDI_CLIENT_IS_FAILED(ct)) {
   6898 		MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
   6899 		MDI_CLIENT_UNLOCK(ct);
   6900 		return;
   6901 	}
   6902 
   6903 	/* another thread might have powered it down or detached it */
   6904 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
   6905 	    !DEVI_IS_ATTACHING(ct->ct_dip)) ||
   6906 	    (!i_ddi_devi_attached(ct->ct_dip) &&
   6907 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
   6908 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
   6909 		i_mdi_pm_reset_client(ct);
   6910 	} else {
   6911 		mdi_pathinfo_t  *pip, *next;
   6912 		int	valid_path_count = 0;
   6913 
   6914 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
   6915 		pip = ct->ct_path_head;
   6916 		while (pip != NULL) {
   6917 			MDI_PI_LOCK(pip);
   6918 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6919 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
   6920 				valid_path_count ++;
   6921 			MDI_PI_UNLOCK(pip);
   6922 			pip = next;
   6923 		}
   6924 		i_mdi_pm_rele_client(ct, valid_path_count);
   6925 	}
   6926 	ct->ct_powercnt_config = 0;
   6927 	MDI_CLIENT_UNLOCK(ct);
   6928 }
   6929 
   6930 static void
   6931 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
   6932 {
   6933 	int		circ;
   6934 	dev_info_t	*cdip;
   6935 
   6936 	ASSERT(MDI_VHCI(vdip));
   6937 
   6938 	/* ndi_devi_config_one */
   6939 	if (child) {
   6940 		ASSERT(DEVI_BUSY_OWNED(vdip));
   6941 		i_mdi_pm_post_config_one(child);
   6942 		return;
   6943 	}
   6944 
   6945 	/* devi_config_common */
   6946 	ndi_devi_enter(vdip, &circ);
   6947 	cdip = ddi_get_child(vdip);
   6948 	while (cdip) {
   6949 		dev_info_t *next = ddi_get_next_sibling(cdip);
   6950 
   6951 		i_mdi_pm_post_config_one(cdip);
   6952 		cdip = next;
   6953 	}
   6954 	ndi_devi_exit(vdip, circ);
   6955 }
   6956 
   6957 static void
   6958 i_mdi_pm_post_unconfig_one(dev_info_t *child)
   6959 {
   6960 	mdi_client_t	*ct;
   6961 
   6962 	ct = i_devi_get_client(child);
   6963 	if (ct == NULL)
   6964 		return;
   6965 
   6966 	MDI_CLIENT_LOCK(ct);
   6967 	while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
   6968 		cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
   6969 
   6970 	if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
   6971 		MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
   6972 		MDI_CLIENT_UNLOCK(ct);
   6973 		return;
   6974 	}
   6975 
   6976 	/* failure detaching or another thread just attached it */
   6977 	if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
   6978 	    i_ddi_devi_attached(ct->ct_dip)) ||
   6979 	    (!i_ddi_devi_attached(ct->ct_dip) &&
   6980 	    !DEVI_IS_ATTACHING(ct->ct_dip))) {
   6981 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
   6982 		i_mdi_pm_reset_client(ct);
   6983 	} else {
   6984 		mdi_pathinfo_t  *pip, *next;
   6985 		int	valid_path_count = 0;
   6986 
   6987 		MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
   6988 		pip = ct->ct_path_head;
   6989 		while (pip != NULL) {
   6990 			MDI_PI_LOCK(pip);
   6991 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   6992 			if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
   6993 				valid_path_count ++;
   6994 			MDI_PI_UNLOCK(pip);
   6995 			pip = next;
   6996 		}
   6997 		i_mdi_pm_rele_client(ct, valid_path_count);
   6998 		ct->ct_powercnt_unconfig = 0;
   6999 	}
   7000 
   7001 	MDI_CLIENT_UNLOCK(ct);
   7002 }
   7003 
   7004 static void
   7005 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
   7006 {
   7007 	int			circ;
   7008 	dev_info_t		*cdip;
   7009 
   7010 	ASSERT(MDI_VHCI(vdip));
   7011 
   7012 	if (!held) {
   7013 		MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
   7014 		return;
   7015 	}
   7016 
   7017 	if (child) {
   7018 		ASSERT(DEVI_BUSY_OWNED(vdip));
   7019 		i_mdi_pm_post_unconfig_one(child);
   7020 		return;
   7021 	}
   7022 
   7023 	ndi_devi_enter(vdip, &circ);
   7024 	cdip = ddi_get_child(vdip);
   7025 	while (cdip) {
   7026 		dev_info_t *next = ddi_get_next_sibling(cdip);
   7027 
   7028 		i_mdi_pm_post_unconfig_one(cdip);
   7029 		cdip = next;
   7030 	}
   7031 	ndi_devi_exit(vdip, circ);
   7032 }
   7033 
   7034 int
   7035 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
   7036 {
   7037 	int			circ, ret = MDI_SUCCESS;
   7038 	dev_info_t		*client_dip = NULL;
   7039 	mdi_client_t		*ct;
   7040 
   7041 	/*
   7042 	 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
   7043 	 * Power up pHCI for the named client device.
   7044 	 * Note: Before the client is enumerated under vhci by phci,
   7045 	 * client_dip can be NULL. Then proceed to power up all the
   7046 	 * pHCIs.
   7047 	 */
   7048 	if (devnm != NULL) {
   7049 		ndi_devi_enter(vdip, &circ);
   7050 		client_dip = ndi_devi_findchild(vdip, devnm);
   7051 	}
   7052 
   7053 	MDI_DEBUG(4, (MDI_NOTE, vdip,
   7054 	    "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
   7055 
   7056 	switch (op) {
   7057 	case MDI_PM_PRE_CONFIG:
   7058 		ret = i_mdi_pm_pre_config(vdip, client_dip);
   7059 		break;
   7060 
   7061 	case MDI_PM_PRE_UNCONFIG:
   7062 		ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
   7063 		    flags);
   7064 		break;
   7065 
   7066 	case MDI_PM_POST_CONFIG:
   7067 		i_mdi_pm_post_config(vdip, client_dip);
   7068 		break;
   7069 
   7070 	case MDI_PM_POST_UNCONFIG:
   7071 		i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
   7072 		break;
   7073 
   7074 	case MDI_PM_HOLD_POWER:
   7075 	case MDI_PM_RELE_POWER:
   7076 		ASSERT(args);
   7077 
   7078 		client_dip = (dev_info_t *)args;
   7079 		ASSERT(MDI_CLIENT(client_dip));
   7080 
   7081 		ct = i_devi_get_client(client_dip);
   7082 		MDI_CLIENT_LOCK(ct);
   7083 
   7084 		if (op == MDI_PM_HOLD_POWER) {
   7085 			if (ct->ct_power_cnt == 0) {
   7086 				(void) i_mdi_power_all_phci(ct);
   7087 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
   7088 				    "i_mdi_pm_hold_client\n"));
   7089 				i_mdi_pm_hold_client(ct, ct->ct_path_count);
   7090 			}
   7091 		} else {
   7092 			if (DEVI_IS_ATTACHING(ct->ct_dip)) {
   7093 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
   7094 				    "i_mdi_pm_rele_client\n"));
   7095 				i_mdi_pm_rele_client(ct, ct->ct_path_count);
   7096 			} else {
   7097 				MDI_DEBUG(4, (MDI_NOTE, client_dip,
   7098 				    "i_mdi_pm_reset_client\n"));
   7099 				i_mdi_pm_reset_client(ct);
   7100 			}
   7101 		}
   7102 
   7103 		MDI_CLIENT_UNLOCK(ct);
   7104 		break;
   7105 
   7106 	default:
   7107 		break;
   7108 	}
   7109 
   7110 	if (devnm)
   7111 		ndi_devi_exit(vdip, circ);
   7112 
   7113 	return (ret);
   7114 }
   7115 
   7116 int
   7117 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
   7118 {
   7119 	mdi_vhci_t *vhci;
   7120 
   7121 	if (!MDI_VHCI(dip))
   7122 		return (MDI_FAILURE);
   7123 
   7124 	if (mdi_class) {
   7125 		vhci = DEVI(dip)->devi_mdi_xhci;
   7126 		ASSERT(vhci);
   7127 		*mdi_class = vhci->vh_class;
   7128 	}
   7129 
   7130 	return (MDI_SUCCESS);
   7131 }
   7132 
   7133 int
   7134 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
   7135 {
   7136 	mdi_phci_t *phci;
   7137 
   7138 	if (!MDI_PHCI(dip))
   7139 		return (MDI_FAILURE);
   7140 
   7141 	if (mdi_class) {
   7142 		phci = DEVI(dip)->devi_mdi_xhci;
   7143 		ASSERT(phci);
   7144 		*mdi_class = phci->ph_vhci->vh_class;
   7145 	}
   7146 
   7147 	return (MDI_SUCCESS);
   7148 }
   7149 
   7150 int
   7151 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
   7152 {
   7153 	mdi_client_t *client;
   7154 
   7155 	if (!MDI_CLIENT(dip))
   7156 		return (MDI_FAILURE);
   7157 
   7158 	if (mdi_class) {
   7159 		client = DEVI(dip)->devi_mdi_client;
   7160 		ASSERT(client);
   7161 		*mdi_class = client->ct_vhci->vh_class;
   7162 	}
   7163 
   7164 	return (MDI_SUCCESS);
   7165 }
   7166 
   7167 void *
   7168 mdi_client_get_vhci_private(dev_info_t *dip)
   7169 {
   7170 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
   7171 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
   7172 		mdi_client_t	*ct;
   7173 		ct = i_devi_get_client(dip);
   7174 		return (ct->ct_vprivate);
   7175 	}
   7176 	return (NULL);
   7177 }
   7178 
   7179 void
   7180 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
   7181 {
   7182 	ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
   7183 	if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
   7184 		mdi_client_t	*ct;
   7185 		ct = i_devi_get_client(dip);
   7186 		ct->ct_vprivate = data;
   7187 	}
   7188 }
   7189 /*
   7190  * mdi_pi_get_vhci_private():
   7191  *		Get the vhci private information associated with the
   7192  *		mdi_pathinfo node
   7193  */
   7194 void *
   7195 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
   7196 {
   7197 	caddr_t	vprivate = NULL;
   7198 	if (pip) {
   7199 		vprivate = MDI_PI(pip)->pi_vprivate;
   7200 	}
   7201 	return (vprivate);
   7202 }
   7203 
   7204 /*
   7205  * mdi_pi_set_vhci_private():
   7206  *		Set the vhci private information in the mdi_pathinfo node
   7207  */
   7208 void
   7209 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
   7210 {
   7211 	if (pip) {
   7212 		MDI_PI(pip)->pi_vprivate = priv;
   7213 	}
   7214 }
   7215 
   7216 /*
   7217  * mdi_phci_get_vhci_private():
   7218  *		Get the vhci private information associated with the
   7219  *		mdi_phci node
   7220  */
   7221 void *
   7222 mdi_phci_get_vhci_private(dev_info_t *dip)
   7223 {
   7224 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
   7225 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
   7226 		mdi_phci_t	*ph;
   7227 		ph = i_devi_get_phci(dip);
   7228 		return (ph->ph_vprivate);
   7229 	}
   7230 	return (NULL);
   7231 }
   7232 
   7233 /*
   7234  * mdi_phci_set_vhci_private():
   7235  *		Set the vhci private information in the mdi_phci node
   7236  */
   7237 void
   7238 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
   7239 {
   7240 	ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
   7241 	if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
   7242 		mdi_phci_t	*ph;
   7243 		ph = i_devi_get_phci(dip);
   7244 		ph->ph_vprivate = priv;
   7245 	}
   7246 }
   7247 
   7248 int
   7249 mdi_pi_ishidden(mdi_pathinfo_t *pip)
   7250 {
   7251 	return (MDI_PI_FLAGS_IS_HIDDEN(pip));
   7252 }
   7253 
   7254 int
   7255 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
   7256 {
   7257 	return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
   7258 }
   7259 
   7260 /*
   7261  * When processing hotplug, if mdi_pi_offline-mdi_pi_free fails then this
   7262  * interface is used to represent device removal.
   7263  */
   7264 int
   7265 mdi_pi_device_remove(mdi_pathinfo_t *pip)
   7266 {
   7267 	MDI_PI_LOCK(pip);
   7268 	if (mdi_pi_device_isremoved(pip)) {
   7269 		MDI_PI_UNLOCK(pip);
   7270 		return (0);
   7271 	}
   7272 	MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
   7273 	MDI_PI_FLAGS_SET_HIDDEN(pip);
   7274 	MDI_PI_UNLOCK(pip);
   7275 
   7276 	i_ddi_di_cache_invalidate();
   7277 
   7278 	return (1);
   7279 }
   7280 
   7281 /*
   7282  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
   7283  * is now accessible then this interfaces is used to represent device insertion.
   7284  */
   7285 int
   7286 mdi_pi_device_insert(mdi_pathinfo_t *pip)
   7287 {
   7288 	MDI_PI_LOCK(pip);
   7289 	if (!mdi_pi_device_isremoved(pip)) {
   7290 		MDI_PI_UNLOCK(pip);
   7291 		return (0);
   7292 	}
   7293 	MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
   7294 	MDI_PI_FLAGS_CLR_HIDDEN(pip);
   7295 	MDI_PI_UNLOCK(pip);
   7296 
   7297 	i_ddi_di_cache_invalidate();
   7298 
   7299 	return (1);
   7300 }
   7301 
   7302 /*
   7303  * List of vhci class names:
   7304  * A vhci class name must be in this list only if the corresponding vhci
   7305  * driver intends to use the mdi provided bus config implementation
   7306  * (i.e., mdi_vhci_bus_config()).
   7307  */
   7308 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
   7309 #define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
   7310 
   7311 /*
   7312  * During boot time, the on-disk vhci cache for every vhci class is read
   7313  * in the form of an nvlist and stored here.
   7314  */
   7315 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
   7316 
   7317 /* nvpair names in vhci cache nvlist */
   7318 #define	MDI_VHCI_CACHE_VERSION	1
   7319 #define	MDI_NVPNAME_VERSION	"version"
   7320 #define	MDI_NVPNAME_PHCIS	"phcis"
   7321 #define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
   7322 
   7323 /*
   7324  * Given vhci class name, return its on-disk vhci cache filename.
   7325  * Memory for the returned filename which includes the full path is allocated
   7326  * by this function.
   7327  */
   7328 static char *
   7329 vhclass2vhcache_filename(char *vhclass)
   7330 {
   7331 	char *filename;
   7332 	int len;
   7333 	static char *fmt = "/etc/devices/mdi_%s_cache";
   7334 
   7335 	/*
   7336 	 * fmt contains the on-disk vhci cache file name format;
   7337 	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
   7338 	 */
   7339 
   7340 	/* the -1 below is to account for "%s" in the format string */
   7341 	len = strlen(fmt) + strlen(vhclass) - 1;
   7342 	filename = kmem_alloc(len, KM_SLEEP);
   7343 	(void) snprintf(filename, len, fmt, vhclass);
   7344 	ASSERT(len == (strlen(filename) + 1));
   7345 	return (filename);
   7346 }
   7347 
   7348 /*
   7349  * initialize the vhci cache related data structures and read the on-disk
   7350  * vhci cached data into memory.
   7351  */
   7352 static void
   7353 setup_vhci_cache(mdi_vhci_t *vh)
   7354 {
   7355 	mdi_vhci_config_t *vhc;
   7356 	mdi_vhci_cache_t *vhcache;
   7357 	int i;
   7358 	nvlist_t *nvl = NULL;
   7359 
   7360 	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
   7361 	vh->vh_config = vhc;
   7362 	vhcache = &vhc->vhc_vhcache;
   7363 
   7364 	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
   7365 
   7366 	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
   7367 	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
   7368 
   7369 	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
   7370 
   7371 	/*
   7372 	 * Create string hash; same as mod_hash_create_strhash() except that
   7373 	 * we use NULL key destructor.
   7374 	 */
   7375 	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
   7376 	    mdi_bus_config_cache_hash_size,
   7377 	    mod_hash_null_keydtor, mod_hash_null_valdtor,
   7378 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
   7379 
   7380 	/*
   7381 	 * The on-disk vhci cache is read during booting prior to the
   7382 	 * lights-out period by mdi_read_devices_files().
   7383 	 */
   7384 	for (i = 0; i < N_VHCI_CLASSES; i++) {
   7385 		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
   7386 			nvl = vhcache_nvl[i];
   7387 			vhcache_nvl[i] = NULL;
   7388 			break;
   7389 		}
   7390 	}
   7391 
   7392 	/*
   7393 	 * this is to cover the case of some one manually causing unloading
   7394 	 * (or detaching) and reloading (or attaching) of a vhci driver.
   7395 	 */
   7396 	if (nvl == NULL && modrootloaded)
   7397 		nvl = read_on_disk_vhci_cache(vh->vh_class);
   7398 
   7399 	if (nvl != NULL) {
   7400 		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   7401 		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
   7402 			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
   7403 		else  {
   7404 			cmn_err(CE_WARN,
   7405 			    "%s: data file corrupted, will recreate",
   7406 			    vhc->vhc_vhcache_filename);
   7407 		}
   7408 		rw_exit(&vhcache->vhcache_lock);
   7409 		nvlist_free(nvl);
   7410 	}
   7411 
   7412 	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
   7413 	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
   7414 
   7415 	vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
   7416 	vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
   7417 }
   7418 
   7419 /*
   7420  * free all vhci cache related resources
   7421  */
   7422 static int
   7423 destroy_vhci_cache(mdi_vhci_t *vh)
   7424 {
   7425 	mdi_vhci_config_t *vhc = vh->vh_config;
   7426 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   7427 	mdi_vhcache_phci_t *cphci, *cphci_next;
   7428 	mdi_vhcache_client_t *cct, *cct_next;
   7429 	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
   7430 
   7431 	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
   7432 		return (MDI_FAILURE);
   7433 
   7434 	kmem_free(vhc->vhc_vhcache_filename,
   7435 	    strlen(vhc->vhc_vhcache_filename) + 1);
   7436 
   7437 	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
   7438 
   7439 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7440 	    cphci = cphci_next) {
   7441 		cphci_next = cphci->cphci_next;
   7442 		free_vhcache_phci(cphci);
   7443 	}
   7444 
   7445 	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
   7446 		cct_next = cct->cct_next;
   7447 		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
   7448 			cpi_next = cpi->cpi_next;
   7449 			free_vhcache_pathinfo(cpi);
   7450 		}
   7451 		free_vhcache_client(cct);
   7452 	}
   7453 
   7454 	rw_destroy(&vhcache->vhcache_lock);
   7455 
   7456 	mutex_destroy(&vhc->vhc_lock);
   7457 	cv_destroy(&vhc->vhc_cv);
   7458 	kmem_free(vhc, sizeof (mdi_vhci_config_t));
   7459 	return (MDI_SUCCESS);
   7460 }
   7461 
   7462 /*
   7463  * Stop all vhci cache related async threads and free their resources.
   7464  */
   7465 static int
   7466 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
   7467 {
   7468 	mdi_async_client_config_t *acc, *acc_next;
   7469 
   7470 	mutex_enter(&vhc->vhc_lock);
   7471 	vhc->vhc_flags |= MDI_VHC_EXIT;
   7472 	ASSERT(vhc->vhc_acc_thrcount >= 0);
   7473 	cv_broadcast(&vhc->vhc_cv);
   7474 
   7475 	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
   7476 	    vhc->vhc_acc_thrcount != 0) {
   7477 		mutex_exit(&vhc->vhc_lock);
   7478 		delay_random(mdi_delay);
   7479 		mutex_enter(&vhc->vhc_lock);
   7480 	}
   7481 
   7482 	vhc->vhc_flags &= ~MDI_VHC_EXIT;
   7483 
   7484 	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
   7485 		acc_next = acc->acc_next;
   7486 		free_async_client_config(acc);
   7487 	}
   7488 	vhc->vhc_acc_list_head = NULL;
   7489 	vhc->vhc_acc_list_tail = NULL;
   7490 	vhc->vhc_acc_count = 0;
   7491 
   7492 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
   7493 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
   7494 		mutex_exit(&vhc->vhc_lock);
   7495 		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
   7496 			vhcache_dirty(vhc);
   7497 			return (MDI_FAILURE);
   7498 		}
   7499 	} else
   7500 		mutex_exit(&vhc->vhc_lock);
   7501 
   7502 	if (callb_delete(vhc->vhc_cbid) != 0)
   7503 		return (MDI_FAILURE);
   7504 
   7505 	return (MDI_SUCCESS);
   7506 }
   7507 
   7508 /*
   7509  * Stop vhci cache flush thread
   7510  */
   7511 /* ARGSUSED */
   7512 static boolean_t
   7513 stop_vhcache_flush_thread(void *arg, int code)
   7514 {
   7515 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
   7516 
   7517 	mutex_enter(&vhc->vhc_lock);
   7518 	vhc->vhc_flags |= MDI_VHC_EXIT;
   7519 	cv_broadcast(&vhc->vhc_cv);
   7520 
   7521 	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
   7522 		mutex_exit(&vhc->vhc_lock);
   7523 		delay_random(mdi_delay);
   7524 		mutex_enter(&vhc->vhc_lock);
   7525 	}
   7526 
   7527 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
   7528 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
   7529 		mutex_exit(&vhc->vhc_lock);
   7530 		(void) flush_vhcache(vhc, 1);
   7531 	} else
   7532 		mutex_exit(&vhc->vhc_lock);
   7533 
   7534 	return (B_TRUE);
   7535 }
   7536 
   7537 /*
   7538  * Enqueue the vhcache phci (cphci) at the tail of the list
   7539  */
   7540 static void
   7541 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
   7542 {
   7543 	cphci->cphci_next = NULL;
   7544 	if (vhcache->vhcache_phci_head == NULL)
   7545 		vhcache->vhcache_phci_head = cphci;
   7546 	else
   7547 		vhcache->vhcache_phci_tail->cphci_next = cphci;
   7548 	vhcache->vhcache_phci_tail = cphci;
   7549 }
   7550 
   7551 /*
   7552  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
   7553  */
   7554 static void
   7555 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
   7556     mdi_vhcache_pathinfo_t *cpi)
   7557 {
   7558 	cpi->cpi_next = NULL;
   7559 	if (cct->cct_cpi_head == NULL)
   7560 		cct->cct_cpi_head = cpi;
   7561 	else
   7562 		cct->cct_cpi_tail->cpi_next = cpi;
   7563 	cct->cct_cpi_tail = cpi;
   7564 }
   7565 
   7566 /*
   7567  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
   7568  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
   7569  * flag set come at the beginning of the list. All cpis which have this
   7570  * flag set come at the end of the list.
   7571  */
   7572 static void
   7573 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
   7574     mdi_vhcache_pathinfo_t *newcpi)
   7575 {
   7576 	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
   7577 
   7578 	if (cct->cct_cpi_head == NULL ||
   7579 	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
   7580 		enqueue_tail_vhcache_pathinfo(cct, newcpi);
   7581 	else {
   7582 		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
   7583 		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
   7584 		    prev_cpi = cpi, cpi = cpi->cpi_next)
   7585 			;
   7586 
   7587 		if (prev_cpi == NULL)
   7588 			cct->cct_cpi_head = newcpi;
   7589 		else
   7590 			prev_cpi->cpi_next = newcpi;
   7591 
   7592 		newcpi->cpi_next = cpi;
   7593 
   7594 		if (cpi == NULL)
   7595 			cct->cct_cpi_tail = newcpi;
   7596 	}
   7597 }
   7598 
   7599 /*
   7600  * Enqueue the vhcache client (cct) at the tail of the list
   7601  */
   7602 static void
   7603 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
   7604     mdi_vhcache_client_t *cct)
   7605 {
   7606 	cct->cct_next = NULL;
   7607 	if (vhcache->vhcache_client_head == NULL)
   7608 		vhcache->vhcache_client_head = cct;
   7609 	else
   7610 		vhcache->vhcache_client_tail->cct_next = cct;
   7611 	vhcache->vhcache_client_tail = cct;
   7612 }
   7613 
   7614 static void
   7615 free_string_array(char **str, int nelem)
   7616 {
   7617 	int i;
   7618 
   7619 	if (str) {
   7620 		for (i = 0; i < nelem; i++) {
   7621 			if (str[i])
   7622 				kmem_free(str[i], strlen(str[i]) + 1);
   7623 		}
   7624 		kmem_free(str, sizeof (char *) * nelem);
   7625 	}
   7626 }
   7627 
   7628 static void
   7629 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
   7630 {
   7631 	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
   7632 	kmem_free(cphci, sizeof (*cphci));
   7633 }
   7634 
   7635 static void
   7636 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
   7637 {
   7638 	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
   7639 	kmem_free(cpi, sizeof (*cpi));
   7640 }
   7641 
   7642 static void
   7643 free_vhcache_client(mdi_vhcache_client_t *cct)
   7644 {
   7645 	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
   7646 	kmem_free(cct, sizeof (*cct));
   7647 }
   7648 
   7649 static char *
   7650 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
   7651 {
   7652 	char *name_addr;
   7653 	int len;
   7654 
   7655 	len = strlen(ct_name) + strlen(ct_addr) + 2;
   7656 	name_addr = kmem_alloc(len, KM_SLEEP);
   7657 	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
   7658 
   7659 	if (ret_len)
   7660 		*ret_len = len;
   7661 	return (name_addr);
   7662 }
   7663 
   7664 /*
   7665  * Copy the contents of paddrnvl to vhci cache.
   7666  * paddrnvl nvlist contains path information for a vhci client.
   7667  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
   7668  */
   7669 static void
   7670 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
   7671     mdi_vhcache_client_t *cct)
   7672 {
   7673 	nvpair_t *nvp = NULL;
   7674 	mdi_vhcache_pathinfo_t *cpi;
   7675 	uint_t nelem;
   7676 	uint32_t *val;
   7677 
   7678 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
   7679 		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
   7680 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
   7681 		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
   7682 		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
   7683 		ASSERT(nelem == 2);
   7684 		cpi->cpi_cphci = cphci_list[val[0]];
   7685 		cpi->cpi_flags = val[1];
   7686 		enqueue_tail_vhcache_pathinfo(cct, cpi);
   7687 	}
   7688 }
   7689 
   7690 /*
   7691  * Copy the contents of caddrmapnvl to vhci cache.
   7692  * caddrmapnvl nvlist contains vhci client address to phci client address
   7693  * mappings. See the comment in mainnvl_to_vhcache() for the format of
   7694  * this nvlist.
   7695  */
   7696 static void
   7697 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
   7698     mdi_vhcache_phci_t *cphci_list[])
   7699 {
   7700 	nvpair_t *nvp = NULL;
   7701 	nvlist_t *paddrnvl;
   7702 	mdi_vhcache_client_t *cct;
   7703 
   7704 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
   7705 		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
   7706 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
   7707 		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
   7708 		(void) nvpair_value_nvlist(nvp, &paddrnvl);
   7709 		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
   7710 		/* the client must contain at least one path */
   7711 		ASSERT(cct->cct_cpi_head != NULL);
   7712 
   7713 		enqueue_vhcache_client(vhcache, cct);
   7714 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
   7715 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
   7716 	}
   7717 }
   7718 
   7719 /*
   7720  * Copy the contents of the main nvlist to vhci cache.
   7721  *
   7722  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
   7723  * The nvlist contains the mappings between the vhci client addresses and
   7724  * their corresponding phci client addresses.
   7725  *
   7726  * The structure of the nvlist is as follows:
   7727  *
   7728  * Main nvlist:
   7729  *	NAME		TYPE		DATA
   7730  *	version		int32		version number
   7731  *	phcis		string array	array of phci paths
   7732  *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
   7733  *
   7734  * structure of c2paddrs_nvl:
   7735  *	NAME		TYPE		DATA
   7736  *	caddr1		nvlist_t	paddrs_nvl1
   7737  *	caddr2		nvlist_t	paddrs_nvl2
   7738  *	...
   7739  * where caddr1, caddr2, ... are vhci client name and addresses in the
   7740  * form of "<clientname>@<clientaddress>".
   7741  * (for example: "ssd@2000002037cd9f72");
   7742  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
   7743  *
   7744  * structure of paddrs_nvl:
   7745  *	NAME		TYPE		DATA
   7746  *	pi_addr1	uint32_array	(phci-id, cpi_flags)
   7747  *	pi_addr2	uint32_array	(phci-id, cpi_flags)
   7748  *	...
   7749  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
   7750  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
   7751  * phci-ids are integers that identify pHCIs to which the
   7752  * the bus specific address belongs to. These integers are used as an index
   7753  * into to the phcis string array in the main nvlist to get the pHCI path.
   7754  */
   7755 static int
   7756 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
   7757 {
   7758 	char **phcis, **phci_namep;
   7759 	uint_t nphcis;
   7760 	mdi_vhcache_phci_t *cphci, **cphci_list;
   7761 	nvlist_t *caddrmapnvl;
   7762 	int32_t ver;
   7763 	int i;
   7764 	size_t cphci_list_size;
   7765 
   7766 	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
   7767 
   7768 	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
   7769 	    ver != MDI_VHCI_CACHE_VERSION)
   7770 		return (MDI_FAILURE);
   7771 
   7772 	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
   7773 	    &nphcis) != 0)
   7774 		return (MDI_SUCCESS);
   7775 
   7776 	ASSERT(nphcis > 0);
   7777 
   7778 	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
   7779 	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
   7780 	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
   7781 		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
   7782 		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
   7783 		enqueue_vhcache_phci(vhcache, cphci);
   7784 		cphci_list[i] = cphci;
   7785 	}
   7786 
   7787 	ASSERT(vhcache->vhcache_phci_head != NULL);
   7788 
   7789 	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
   7790 		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
   7791 
   7792 	kmem_free(cphci_list, cphci_list_size);
   7793 	return (MDI_SUCCESS);
   7794 }
   7795 
   7796 /*
   7797  * Build paddrnvl for the specified client using the information in the
   7798  * vhci cache and add it to the caddrmapnnvl.
   7799  * Returns 0 on success, errno on failure.
   7800  */
   7801 static int
   7802 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
   7803     nvlist_t *caddrmapnvl)
   7804 {
   7805 	mdi_vhcache_pathinfo_t *cpi;
   7806 	nvlist_t *nvl;
   7807 	int err;
   7808 	uint32_t val[2];
   7809 
   7810 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7811 
   7812 	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
   7813 		return (err);
   7814 
   7815 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
   7816 		val[0] = cpi->cpi_cphci->cphci_id;
   7817 		val[1] = cpi->cpi_flags;
   7818 		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
   7819 		    != 0)
   7820 			goto out;
   7821 	}
   7822 
   7823 	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
   7824 out:
   7825 	nvlist_free(nvl);
   7826 	return (err);
   7827 }
   7828 
   7829 /*
   7830  * Build caddrmapnvl using the information in the vhci cache
   7831  * and add it to the mainnvl.
   7832  * Returns 0 on success, errno on failure.
   7833  */
   7834 static int
   7835 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
   7836 {
   7837 	mdi_vhcache_client_t *cct;
   7838 	nvlist_t *nvl;
   7839 	int err;
   7840 
   7841 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7842 
   7843 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
   7844 		return (err);
   7845 
   7846 	for (cct = vhcache->vhcache_client_head; cct != NULL;
   7847 	    cct = cct->cct_next) {
   7848 		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
   7849 			goto out;
   7850 	}
   7851 
   7852 	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
   7853 out:
   7854 	nvlist_free(nvl);
   7855 	return (err);
   7856 }
   7857 
   7858 /*
   7859  * Build nvlist using the information in the vhci cache.
   7860  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
   7861  * Returns nvl on success, NULL on failure.
   7862  */
   7863 static nvlist_t *
   7864 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
   7865 {
   7866 	mdi_vhcache_phci_t *cphci;
   7867 	uint_t phci_count;
   7868 	char **phcis;
   7869 	nvlist_t *nvl;
   7870 	int err, i;
   7871 
   7872 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
   7873 		nvl = NULL;
   7874 		goto out;
   7875 	}
   7876 
   7877 	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
   7878 	    MDI_VHCI_CACHE_VERSION)) != 0)
   7879 		goto out;
   7880 
   7881 	rw_enter(&vhcache->vhcache_lock, RW_READER);
   7882 	if (vhcache->vhcache_phci_head == NULL) {
   7883 		rw_exit(&vhcache->vhcache_lock);
   7884 		return (nvl);
   7885 	}
   7886 
   7887 	phci_count = 0;
   7888 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7889 	    cphci = cphci->cphci_next)
   7890 		cphci->cphci_id = phci_count++;
   7891 
   7892 	/* build phci pathname list */
   7893 	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
   7894 	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
   7895 	    cphci = cphci->cphci_next, i++)
   7896 		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
   7897 
   7898 	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
   7899 	    phci_count);
   7900 	free_string_array(phcis, phci_count);
   7901 
   7902 	if (err == 0 &&
   7903 	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
   7904 		rw_exit(&vhcache->vhcache_lock);
   7905 		return (nvl);
   7906 	}
   7907 
   7908 	rw_exit(&vhcache->vhcache_lock);
   7909 out:
   7910 	if (nvl)
   7911 		nvlist_free(nvl);
   7912 	return (NULL);
   7913 }
   7914 
   7915 /*
   7916  * Lookup vhcache phci structure for the specified phci path.
   7917  */
   7918 static mdi_vhcache_phci_t *
   7919 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
   7920 {
   7921 	mdi_vhcache_phci_t *cphci;
   7922 
   7923 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7924 
   7925 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7926 	    cphci = cphci->cphci_next) {
   7927 		if (strcmp(cphci->cphci_path, phci_path) == 0)
   7928 			return (cphci);
   7929 	}
   7930 
   7931 	return (NULL);
   7932 }
   7933 
   7934 /*
   7935  * Lookup vhcache phci structure for the specified phci.
   7936  */
   7937 static mdi_vhcache_phci_t *
   7938 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
   7939 {
   7940 	mdi_vhcache_phci_t *cphci;
   7941 
   7942 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   7943 
   7944 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   7945 	    cphci = cphci->cphci_next) {
   7946 		if (cphci->cphci_phci == ph)
   7947 			return (cphci);
   7948 	}
   7949 
   7950 	return (NULL);
   7951 }
   7952 
   7953 /*
   7954  * Add the specified phci to the vhci cache if not already present.
   7955  */
   7956 static void
   7957 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
   7958 {
   7959 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   7960 	mdi_vhcache_phci_t *cphci;
   7961 	char *pathname;
   7962 	int cache_updated;
   7963 
   7964 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   7965 
   7966 	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   7967 	(void) ddi_pathname(ph->ph_dip, pathname);
   7968 	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
   7969 	    != NULL) {
   7970 		cphci->cphci_phci = ph;
   7971 		cache_updated = 0;
   7972 	} else {
   7973 		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
   7974 		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
   7975 		cphci->cphci_phci = ph;
   7976 		enqueue_vhcache_phci(vhcache, cphci);
   7977 		cache_updated = 1;
   7978 	}
   7979 
   7980 	rw_exit(&vhcache->vhcache_lock);
   7981 
   7982 	/*
   7983 	 * Since a new phci has been added, reset
   7984 	 * vhc_path_discovery_cutoff_time to allow for discovery of paths
   7985 	 * during next vhcache_discover_paths().
   7986 	 */
   7987 	mutex_enter(&vhc->vhc_lock);
   7988 	vhc->vhc_path_discovery_cutoff_time = 0;
   7989 	mutex_exit(&vhc->vhc_lock);
   7990 
   7991 	kmem_free(pathname, MAXPATHLEN);
   7992 	if (cache_updated)
   7993 		vhcache_dirty(vhc);
   7994 }
   7995 
   7996 /*
   7997  * Remove the reference to the specified phci from the vhci cache.
   7998  */
   7999 static void
   8000 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
   8001 {
   8002 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8003 	mdi_vhcache_phci_t *cphci;
   8004 
   8005 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   8006 	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
   8007 		/* do not remove the actual mdi_vhcache_phci structure */
   8008 		cphci->cphci_phci = NULL;
   8009 	}
   8010 	rw_exit(&vhcache->vhcache_lock);
   8011 }
   8012 
   8013 static void
   8014 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
   8015     mdi_vhcache_lookup_token_t *src)
   8016 {
   8017 	if (src == NULL) {
   8018 		dst->lt_cct = NULL;
   8019 		dst->lt_cct_lookup_time = 0;
   8020 	} else {
   8021 		dst->lt_cct = src->lt_cct;
   8022 		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
   8023 	}
   8024 }
   8025 
   8026 /*
   8027  * Look up vhcache client for the specified client.
   8028  */
   8029 static mdi_vhcache_client_t *
   8030 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
   8031     mdi_vhcache_lookup_token_t *token)
   8032 {
   8033 	mod_hash_val_t hv;
   8034 	char *name_addr;
   8035 	int len;
   8036 
   8037 	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
   8038 
   8039 	/*
   8040 	 * If no vhcache clean occurred since the last lookup, we can
   8041 	 * simply return the cct from the last lookup operation.
   8042 	 * It works because ccts are never freed except during the vhcache
   8043 	 * cleanup operation.
   8044 	 */
   8045 	if (token != NULL &&
   8046 	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
   8047 		return (token->lt_cct);
   8048 
   8049 	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
   8050 	if (mod_hash_find(vhcache->vhcache_client_hash,
   8051 	    (mod_hash_key_t)name_addr, &hv) == 0) {
   8052 		if (token) {
   8053 			token->lt_cct = (mdi_vhcache_client_t *)hv;
   8054 			token->lt_cct_lookup_time = ddi_get_lbolt64();
   8055 		}
   8056 	} else {
   8057 		if (token) {
   8058 			token->lt_cct = NULL;
   8059 			token->lt_cct_lookup_time = 0;
   8060 		}
   8061 		hv = NULL;
   8062 	}
   8063 	kmem_free(name_addr, len);
   8064 	return ((mdi_vhcache_client_t *)hv);
   8065 }
   8066 
   8067 /*
   8068  * Add the specified path to the vhci cache if not already present.
   8069  * Also add the vhcache client for the client corresponding to this path
   8070  * if it doesn't already exist.
   8071  */
   8072 static void
   8073 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
   8074 {
   8075 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8076 	mdi_vhcache_client_t *cct;
   8077 	mdi_vhcache_pathinfo_t *cpi;
   8078 	mdi_phci_t *ph = pip->pi_phci;
   8079 	mdi_client_t *ct = pip->pi_client;
   8080 	int cache_updated = 0;
   8081 
   8082 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   8083 
   8084 	/* if vhcache client for this pip doesn't already exist, add it */
   8085 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
   8086 	    NULL)) == NULL) {
   8087 		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
   8088 		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
   8089 		    ct->ct_guid, NULL);
   8090 		enqueue_vhcache_client(vhcache, cct);
   8091 		(void) mod_hash_insert(vhcache->vhcache_client_hash,
   8092 		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
   8093 		cache_updated = 1;
   8094 	}
   8095 
   8096 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
   8097 		if (cpi->cpi_cphci->cphci_phci == ph &&
   8098 		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
   8099 			cpi->cpi_pip = pip;
   8100 			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
   8101 				cpi->cpi_flags &=
   8102 				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
   8103 				sort_vhcache_paths(cct);
   8104 				cache_updated = 1;
   8105 			}
   8106 			break;
   8107 		}
   8108 	}
   8109 
   8110 	if (cpi == NULL) {
   8111 		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
   8112 		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
   8113 		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
   8114 		ASSERT(cpi->cpi_cphci != NULL);
   8115 		cpi->cpi_pip = pip;
   8116 		enqueue_vhcache_pathinfo(cct, cpi);
   8117 		cache_updated = 1;
   8118 	}
   8119 
   8120 	rw_exit(&vhcache->vhcache_lock);
   8121 
   8122 	if (cache_updated)
   8123 		vhcache_dirty(vhc);
   8124 }
   8125 
   8126 /*
   8127  * Remove the reference to the specified path from the vhci cache.
   8128  */
   8129 static void
   8130 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
   8131 {
   8132 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8133 	mdi_client_t *ct = pip->pi_client;
   8134 	mdi_vhcache_client_t *cct;
   8135 	mdi_vhcache_pathinfo_t *cpi;
   8136 
   8137 	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
   8138 	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
   8139 	    NULL)) != NULL) {
   8140 		for (cpi = cct->cct_cpi_head; cpi != NULL;
   8141 		    cpi = cpi->cpi_next) {
   8142 			if (cpi->cpi_pip == pip) {
   8143 				cpi->cpi_pip = NULL;
   8144 				break;
   8145 			}
   8146 		}
   8147 	}
   8148 	rw_exit(&vhcache->vhcache_lock);
   8149 }
   8150 
   8151 /*
   8152  * Flush the vhci cache to disk.
   8153  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
   8154  */
   8155 static int
   8156 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
   8157 {
   8158 	nvlist_t *nvl;
   8159 	int err;
   8160 	int rv;
   8161 
   8162 	/*
   8163 	 * It is possible that the system may shutdown before
   8164 	 * i_ddi_io_initialized (during stmsboot for example). To allow for
   8165 	 * flushing the cache in this case do not check for
   8166 	 * i_ddi_io_initialized when force flag is set.
   8167 	 */
   8168 	if (force_flag == 0 && !i_ddi_io_initialized())
   8169 		return (MDI_FAILURE);
   8170 
   8171 	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
   8172 		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
   8173 		nvlist_free(nvl);
   8174 	} else
   8175 		err = EFAULT;
   8176 
   8177 	rv = MDI_SUCCESS;
   8178 	mutex_enter(&vhc->vhc_lock);
   8179 	if (err != 0) {
   8180 		if (err == EROFS) {
   8181 			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
   8182 			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
   8183 			    MDI_VHC_VHCACHE_DIRTY);
   8184 		} else {
   8185 			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
   8186 				cmn_err(CE_CONT, "%s: update failed\n",
   8187 				    vhc->vhc_vhcache_filename);
   8188 				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
   8189 			}
   8190 			rv = MDI_FAILURE;
   8191 		}
   8192 	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
   8193 		cmn_err(CE_CONT,
   8194 		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
   8195 		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
   8196 	}
   8197 	mutex_exit(&vhc->vhc_lock);
   8198 
   8199 	return (rv);
   8200 }
   8201 
   8202 /*
   8203  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
   8204  * Exits itself if left idle for the idle timeout period.
   8205  */
   8206 static void
   8207 vhcache_flush_thread(void *arg)
   8208 {
   8209 	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
   8210 	clock_t idle_time, quit_at_ticks;
   8211 	callb_cpr_t cprinfo;
   8212 
   8213 	/* number of seconds to sleep idle before exiting */
   8214 	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
   8215 
   8216 	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
   8217 	    "mdi_vhcache_flush");
   8218 	mutex_enter(&vhc->vhc_lock);
   8219 	for (; ; ) {
   8220 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
   8221 		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
   8222 			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
   8223 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
   8224 				(void) cv_timedwait(&vhc->vhc_cv,
   8225 				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
   8226 				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
   8227 			} else {
   8228 				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
   8229 				mutex_exit(&vhc->vhc_lock);
   8230 
   8231 				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
   8232 					vhcache_dirty(vhc);
   8233 
   8234 				mutex_enter(&vhc->vhc_lock);
   8235 			}
   8236 		}
   8237 
   8238 		quit_at_ticks = ddi_get_lbolt() + idle_time;
   8239 
   8240 		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
   8241 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
   8242 		    ddi_get_lbolt() < quit_at_ticks) {
   8243 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
   8244 			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
   8245 			    quit_at_ticks);
   8246 			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
   8247 		}
   8248 
   8249 		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
   8250 		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
   8251 			goto out;
   8252 	}
   8253 
   8254 out:
   8255 	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
   8256 	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
   8257 	CALLB_CPR_EXIT(&cprinfo);
   8258 }
   8259 
   8260 /*
   8261  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
   8262  */
   8263 static void
   8264 vhcache_dirty(mdi_vhci_config_t *vhc)
   8265 {
   8266 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8267 	int create_thread;
   8268 
   8269 	rw_enter(&vhcache->vhcache_lock, RW_READER);
   8270 	/* do not flush cache until the cache is fully built */
   8271 	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
   8272 		rw_exit(&vhcache->vhcache_lock);
   8273 		return;
   8274 	}
   8275 	rw_exit(&vhcache->vhcache_lock);
   8276 
   8277 	mutex_enter(&vhc->vhc_lock);
   8278 	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
   8279 		mutex_exit(&vhc->vhc_lock);
   8280 		return;
   8281 	}
   8282 
   8283 	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
   8284 	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
   8285 	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
   8286 	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
   8287 		cv_broadcast(&vhc->vhc_cv);
   8288 		create_thread = 0;
   8289 	} else {
   8290 		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
   8291 		create_thread = 1;
   8292 	}
   8293 	mutex_exit(&vhc->vhc_lock);
   8294 
   8295 	if (create_thread)
   8296 		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
   8297 		    0, &p0, TS_RUN, minclsyspri);
   8298 }
   8299 
   8300 /*
   8301  * phci bus config structure - one for for each phci bus config operation that
   8302  * we initiate on behalf of a vhci.
   8303  */
   8304 typedef struct mdi_phci_bus_config_s {
   8305 	char *phbc_phci_path;
   8306 	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
   8307 	struct mdi_phci_bus_config_s *phbc_next;
   8308 } mdi_phci_bus_config_t;
   8309 
   8310 /* vhci bus config structure - one for each vhci bus config operation */
   8311 typedef struct mdi_vhci_bus_config_s {
   8312 	ddi_bus_config_op_t vhbc_op;	/* bus config op */
   8313 	major_t vhbc_op_major;		/* bus config op major */
   8314 	uint_t vhbc_op_flags;		/* bus config op flags */
   8315 	kmutex_t vhbc_lock;
   8316 	kcondvar_t vhbc_cv;
   8317 	int vhbc_thr_count;
   8318 } mdi_vhci_bus_config_t;
   8319 
   8320 /*
   8321  * bus config the specified phci
   8322  */
   8323 static void
   8324 bus_config_phci(void *arg)
   8325 {
   8326 	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
   8327 	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
   8328 	dev_info_t *ph_dip;
   8329 
   8330 	/*
   8331 	 * first configure all path components upto phci and then configure
   8332 	 * the phci children.
   8333 	 */
   8334 	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
   8335 	    != NULL) {
   8336 		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
   8337 		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
   8338 			(void) ndi_devi_config_driver(ph_dip,
   8339 			    vhbc->vhbc_op_flags,
   8340 			    vhbc->vhbc_op_major);
   8341 		} else
   8342 			(void) ndi_devi_config(ph_dip,
   8343 			    vhbc->vhbc_op_flags);
   8344 
   8345 		/* release the hold that e_ddi_hold_devi_by_path() placed */
   8346 		ndi_rele_devi(ph_dip);
   8347 	}
   8348 
   8349 	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
   8350 	kmem_free(phbc, sizeof (*phbc));
   8351 
   8352 	mutex_enter(&vhbc->vhbc_lock);
   8353 	vhbc->vhbc_thr_count--;
   8354 	if (vhbc->vhbc_thr_count == 0)
   8355 		cv_broadcast(&vhbc->vhbc_cv);
   8356 	mutex_exit(&vhbc->vhbc_lock);
   8357 }
   8358 
   8359 /*
   8360  * Bus config all phcis associated with the vhci in parallel.
   8361  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
   8362  */
   8363 static void
   8364 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
   8365     ddi_bus_config_op_t op, major_t maj)
   8366 {
   8367 	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
   8368 	mdi_vhci_bus_config_t *vhbc;
   8369 	mdi_vhcache_phci_t *cphci;
   8370 
   8371 	rw_enter(&vhcache->vhcache_lock, RW_READER);
   8372 	if (vhcache->vhcache_phci_head == NULL) {
   8373 		rw_exit(&vhcache->vhcache_lock);
   8374 		return;
   8375 	}
   8376 
   8377 	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
   8378 
   8379 	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
   8380 	    cphci = cphci->cphci_next) {
   8381 		/* skip phcis that haven't attached before root is available */
   8382 		if (!modrootloaded && (cphci->cphci_phci == NULL))
   8383 			continue;
   8384 		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
   8385 		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
   8386 		    KM_SLEEP);
   8387 		phbc->phbc_vhbusconfig = vhbc;
   8388 		phbc->phbc_next = phbc_head;
   8389 		phbc_head = phbc;
   8390 		vhbc->vhbc_thr_count++;
   8391 	}
   8392 	rw_exit(&vhcache->vhcache_lock);
   8393 
   8394 	vhbc->vhbc_op = op;
   8395 	vhbc->vhbc_op_major = maj;
   8396 	vhbc->vhbc_op_flags = NDI_NO_EVENT |
   8397 	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
   8398 	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
   8399 	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
   8400 
   8401 	/* now create threads to initiate bus config on all phcis in parallel */
   8402 	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
   8403 		phbc_next = phbc->phbc_next;
   8404 		if (mdi_mtc_off)
   8405 			bus_config_phci((void *)phbc);
   8406 		else
   8407 			(void) thread_create(NULL, 0, bus_config_phci, phbc,
   8408 			    0, &p0, TS_RUN, minclsyspri);
   8409 	}
   8410 
   8411 	mutex_enter(&vhbc->vhbc_lock);
   8412 	/* wait until all threads exit */
   8413 	while (vhbc->vhbc_thr_count > 0)
   8414 		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
   8415 	mutex_exit(&vhbc->vhbc_lock);
   8416 
   8417 	mutex_destroy(&vhbc->vhbc_lock);
   8418 	cv_destroy(&vhbc->vhbc_cv);
   8419 	kmem_free(vhbc, sizeof (*vhbc));
   8420 }
   8421 
   8422 /*
   8423  * Single threaded version of bus_config_all_phcis()
   8424  */
   8425 static void
   8426 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
   8427     ddi_bus_config_op_t op, major_t maj)
   8428 {
   8429 	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
   8430 
   8431 	single_threaded_vhconfig_enter(vhc);
   8432 	bus_config_all_phcis(vhcache, flags, op, maj);
   8433 	single_threaded_vhconfig_exit(vhc);
   8434 }
   8435 
   8436 /*
   8437  * Perform BUS_CONFIG_ONE on the specified child of the phci.
   8438  * The path includes the child component in addition to the phci path.
   8439  */
   8440 static int
   8441 bus_config_one_phci_child(char *path)
   8442 {
   8443 	dev_info_t *ph_dip, *child;
   8444 	char *devnm;
   8445 	int rv = MDI_FAILURE;
   8446 
   8447 	/* extract the child component of the phci */
   8448 	devnm = strrchr(path, '/');
   8449 	*devnm++ = '\0';
   8450 
   8451 	/*
   8452 	 * first configure all path components upto phci and then
   8453 	 * configure the phci child.
   8454 	 */
   8455 	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
   8456 		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
   8457 		    NDI_SUCCESS) {
   8458 			/*
   8459 			 * release the hold that ndi_devi_config_one() placed
   8460 			 */
   8461 			ndi_rele_devi(child);
   8462 			rv = MDI_SUCCESS;
   8463 		}
   8464 
   8465 		/* release the hold that e_ddi_hold_devi_by_path() placed */
   8466 		ndi_rele_devi(ph_dip);
   8467 	}
   8468 
   8469 	devnm--;
   8470 	*devnm = '/';
   8471 	return (rv);
   8472 }
   8473 
   8474 /*
   8475  * Build a list of phci client paths for the specified vhci client.
   8476  * The list includes only those phci client paths which aren't configured yet.
   8477  */
   8478 static mdi_phys_path_t *
   8479 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
   8480 {
   8481 	mdi_vhcache_pathinfo_t *cpi;
   8482 	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
   8483 	int config_path, len;
   8484 
   8485 	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
   8486 		/*
   8487 		 * include only those paths that aren't configured.
   8488 		 */
   8489 		config_path = 0;
   8490 		if (cpi->cpi_pip == NULL)
   8491 			config_path = 1;
   8492 		else {
   8493 			MDI_PI_LOCK(cpi->cpi_pip);
   8494 			if (MDI_PI_IS_INIT(cpi->cpi_pip))
   8495 				config_path = 1;
   8496 			MDI_PI_UNLOCK(cpi->cpi_pip);
   8497 		}
   8498 
   8499 		if (config_path) {
   8500 			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
   8501 			len = strlen(cpi->cpi_cphci->cphci_path) +
   8502 			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
   8503 			pp->phys_path = kmem_alloc(len, KM_SLEEP);
   8504 			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
   8505 			    cpi->cpi_cphci->cphci_path, ct_name,
   8506 			    cpi->cpi_addr);
   8507 			pp->phys_path_next = NULL;
   8508 
   8509 			if (pp_head == NULL)
   8510 				pp_head = pp;
   8511 			else
   8512 				pp_tail->phys_path_next = pp;
   8513 			pp_tail = pp;
   8514 		}
   8515 	}
   8516 
   8517 	return (pp_head);
   8518 }
   8519 
   8520 /*
   8521  * Free the memory allocated for phci client path list.
   8522  */
   8523 static void
   8524 free_phclient_path_list(mdi_phys_path_t *pp_head)
   8525 {
   8526 	mdi_phys_path_t *pp, *pp_next;
   8527 
   8528 	for (pp = pp_head; pp != NULL; pp = pp_next) {
   8529 		pp_next = pp->phys_path_next;
   8530 		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
   8531 		kmem_free(pp, sizeof (*pp));
   8532 	}
   8533 }
   8534 
   8535 /*
   8536  * Allocated async client structure and initialize with the specified values.
   8537  */
   8538 static mdi_async_client_config_t *
   8539 alloc_async_client_config(char *ct_name, char *ct_addr,
   8540     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
   8541 {
   8542 	mdi_async_client_config_t *acc;
   8543 
   8544 	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
   8545 	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
   8546 	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
   8547 	acc->acc_phclient_path_list_head = pp_head;
   8548 	init_vhcache_lookup_token(&acc->acc_token, tok);
   8549 	acc->acc_next = NULL;
   8550 	return (acc);
   8551 }
   8552 
   8553 /*
   8554  * Free the memory allocated for the async client structure and their members.
   8555  */
   8556 static void
   8557 free_async_client_config(mdi_async_client_config_t *acc)
   8558 {
   8559 	if (acc->acc_phclient_path_list_head)
   8560 		free_phclient_path_list(acc->acc_phclient_path_list_head);
   8561 	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
   8562 	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
   8563 	kmem_free(acc, sizeof (*