Home | History | Annotate | Download | only in os
      1      0     stevel /*
      2      0     stevel  * CDDL HEADER START
      3      0     stevel  *
      4      0     stevel  * The contents of this file are subject to the terms of the
      5   1909   cm136836  * Common Development and Distribution License (the "License").
      6   1909   cm136836  * You may not use this file except in compliance with the License.
      7      0     stevel  *
      8      0     stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0     stevel  * or http://www.opensolaris.org/os/licensing.
     10      0     stevel  * See the License for the specific language governing permissions
     11      0     stevel  * and limitations under the License.
     12      0     stevel  *
     13      0     stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0     stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0     stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0     stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0     stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0     stevel  *
     19      0     stevel  * CDDL HEADER END
     20      0     stevel  */
     21      0     stevel /*
     22   9167    Randall  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0     stevel  * Use is subject to license terms.
     24      0     stevel  */
     25      0     stevel 
     26      0     stevel /*
     27      0     stevel  * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more
     28      0     stevel  * detailed discussion of the overall mpxio architecture.
     29      0     stevel  *
     30      0     stevel  * Default locking order:
     31      0     stevel  *
     32   2155        cth  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
     33   2155        cth  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
     34   2155        cth  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
     35   2155        cth  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
     36      0     stevel  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
     37      0     stevel  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
     38      0     stevel  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
     39      0     stevel  */
     40      0     stevel 
     41      0     stevel #include <sys/note.h>
     42      0     stevel #include <sys/types.h>
     43      0     stevel #include <sys/varargs.h>
     44      0     stevel #include <sys/param.h>
     45      0     stevel #include <sys/errno.h>
     46      0     stevel #include <sys/uio.h>
     47      0     stevel #include <sys/buf.h>
     48      0     stevel #include <sys/modctl.h>
     49      0     stevel #include <sys/open.h>
     50      0     stevel #include <sys/kmem.h>
     51      0     stevel #include <sys/poll.h>
     52      0     stevel #include <sys/conf.h>
     53      0     stevel #include <sys/bootconf.h>
     54      0     stevel #include <sys/cmn_err.h>
     55      0     stevel #include <sys/stat.h>
     56      0     stevel #include <sys/ddi.h>
     57      0     stevel #include <sys/sunddi.h>
     58      0     stevel #include <sys/ddipropdefs.h>
     59      0     stevel #include <sys/sunndi.h>
     60      0     stevel #include <sys/ndi_impldefs.h>
     61      0     stevel #include <sys/promif.h>
     62      0     stevel #include <sys/sunmdi.h>
     63      0     stevel #include <sys/mdi_impldefs.h>
     64      0     stevel #include <sys/taskq.h>
     65      0     stevel #include <sys/epm.h>
     66      0     stevel #include <sys/sunpm.h>
     67    878      ramat #include <sys/modhash.h>
     68    893   rs135747 #include <sys/disp.h>
     69    893   rs135747 #include <sys/autoconf.h>
     70   2402   pramodbg #include <sys/sysmacros.h>
     71      0     stevel 
     72      0     stevel #ifdef	DEBUG
     73      0     stevel #include <sys/debug.h>
     74      0     stevel int	mdi_debug = 1;
     75   2155        cth int	mdi_debug_logonly = 0;
     76  10696      David #define	MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))	i_mdi_log pargs
     77  10696      David #define	MDI_WARN	CE_WARN, __func__
     78  10696      David #define	MDI_NOTE	CE_NOTE, __func__
     79  10696      David #define	MDI_CONT	CE_CONT, __func__
     80  10696      David static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
     81      0     stevel #else	/* !DEBUG */
     82  10696      David #define	MDI_DEBUG(dbglevel, pargs)
     83      0     stevel #endif	/* DEBUG */
     84  10696      David int	mdi_debug_consoleonly = 0;
     85  11052      Chris int	mdi_delay = 3;
     86      0     stevel 
     87      0     stevel extern pri_t	minclsyspri;
     88      0     stevel extern int	modrootloaded;
     89      0     stevel 
     90      0     stevel /*
     91      0     stevel  * Global mutex:
     92   2155        cth  * Protects vHCI list and structure members.
     93      0     stevel  */
     94      0     stevel kmutex_t	mdi_mutex;
     95      0     stevel 
     96      0     stevel /*
     97      0     stevel  * Registered vHCI class driver lists
     98      0     stevel  */
     99      0     stevel int		mdi_vhci_count;
    100      0     stevel mdi_vhci_t	*mdi_vhci_head;
    101      0     stevel mdi_vhci_t	*mdi_vhci_tail;
    102      0     stevel 
    103      0     stevel /*
    104      0     stevel  * Client Hash Table size
    105      0     stevel  */
    106      0     stevel static int	mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
    107      0     stevel 
    108      0     stevel /*
    109      0     stevel  * taskq interface definitions
    110      0     stevel  */
    111      0     stevel #define	MDI_TASKQ_N_THREADS	8
    112      0     stevel #define	MDI_TASKQ_PRI		minclsyspri
    113      0     stevel #define	MDI_TASKQ_MINALLOC	(4*mdi_taskq_n_threads)
    114      0     stevel #define	MDI_TASKQ_MAXALLOC	(500*mdi_taskq_n_threads)
    115      0     stevel 
    116      0     stevel taskq_t				*mdi_taskq;
    117      0     stevel static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
    118      0     stevel 
    119    878      ramat #define	TICKS_PER_SECOND	(drv_usectohz(1000000))
    120    878      ramat 
    121    878      ramat /*
    122    878      ramat  * The data should be "quiet" for this interval (in seconds) before the
    123    878      ramat  * vhci cached data is flushed to the disk.
    124    878      ramat  */
    125    878      ramat static int mdi_vhcache_flush_delay = 10;
    126    878      ramat 
    127    878      ramat /* number of seconds the vhcache flush daemon will sleep idle before exiting */
    128    878      ramat static int mdi_vhcache_flush_daemon_idle_time = 60;
    129    878      ramat 
    130    878      ramat /*
    131    972      ramat  * MDI falls back to discovery of all paths when a bus_config_one fails.
    132    972      ramat  * The following parameters can be used to tune this operation.
    133    972      ramat  *
    134    972      ramat  * mdi_path_discovery_boot
    135    972      ramat  *	Number of times path discovery will be attempted during early boot.
    136    972      ramat  *	Probably there is no reason to ever set this value to greater than one.
    137    972      ramat  *
    138    972      ramat  * mdi_path_discovery_postboot
    139    972      ramat  *	Number of times path discovery will be attempted after early boot.
    140    972      ramat  *	Set it to a minimum of two to allow for discovery of iscsi paths which
    141    972      ramat  *	may happen very late during booting.
    142    972      ramat  *
    143    972      ramat  * mdi_path_discovery_interval
    144    972      ramat  *	Minimum number of seconds MDI will wait between successive discovery
    145    972      ramat  *	of all paths. Set it to -1 to disable discovery of all paths.
    146    972      ramat  */
    147    972      ramat static int mdi_path_discovery_boot = 1;
    148    972      ramat static int mdi_path_discovery_postboot = 2;
    149    972      ramat static int mdi_path_discovery_interval = 10;
    150    972      ramat 
    151    972      ramat /*
    152    878      ramat  * number of seconds the asynchronous configuration thread will sleep idle
    153    878      ramat  * before exiting.
    154    878      ramat  */
    155    878      ramat static int mdi_async_config_idle_time = 600;
    156    878      ramat 
    157    878      ramat static int mdi_bus_config_cache_hash_size = 256;
    158    878      ramat 
    159    878      ramat /* turns off multithreaded configuration for certain operations */
    160    878      ramat static int mdi_mtc_off = 0;
    161      0     stevel 
    162      0     stevel /*
    163   6640        cth  * The "path" to a pathinfo node is identical to the /devices path to a
    164   6640        cth  * devinfo node had the device been enumerated under a pHCI instead of
    165   6640        cth  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
    166   6640        cth  * This association persists across create/delete of the pathinfo nodes,
    167   6640        cth  * but not across reboot.
    168   6640        cth  */
    169   6640        cth static uint_t		mdi_pathmap_instance = 1;	/* 0 -> any path */
    170   6640        cth static int		mdi_pathmap_hash_size = 256;
    171   6640        cth static kmutex_t		mdi_pathmap_mutex;
    172   6640        cth static mod_hash_t	*mdi_pathmap_bypath;		/* "path"->instance */
    173   6640        cth static mod_hash_t	*mdi_pathmap_byinstance;	/* instance->"path" */
    174  10696      David static mod_hash_t	*mdi_pathmap_sbyinstance;	/* inst->shortpath */
    175   6640        cth 
    176   6640        cth /*
    177      0     stevel  * MDI component property name/value string definitions
    178      0     stevel  */
    179      0     stevel const char 		*mdi_component_prop = "mpxio-component";
    180      0     stevel const char		*mdi_component_prop_vhci = "vhci";
    181      0     stevel const char		*mdi_component_prop_phci = "phci";
    182      0     stevel const char		*mdi_component_prop_client = "client";
    183      0     stevel 
    184      0     stevel /*
    185      0     stevel  * MDI client global unique identifier property name
    186      0     stevel  */
    187      0     stevel const char		*mdi_client_guid_prop = "client-guid";
    188      0     stevel 
    189      0     stevel /*
    190      0     stevel  * MDI client load balancing property name/value string definitions
    191      0     stevel  */
    192      0     stevel const char		*mdi_load_balance = "load-balance";
    193      0     stevel const char		*mdi_load_balance_none = "none";
    194      0     stevel const char		*mdi_load_balance_rr = "round-robin";
    195      0     stevel const char		*mdi_load_balance_lba = "logical-block";
    196      0     stevel 
    197      0     stevel /*
    198      0     stevel  * Obsolete vHCI class definition; to be removed after Leadville update
    199      0     stevel  */
    200      0     stevel const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
    201      0     stevel 
    202      0     stevel static char vhci_greeting[] =
    203      0     stevel 	"\tThere already exists one vHCI driver for class %s\n"
    204      0     stevel 	"\tOnly one vHCI driver for each class is allowed\n";
    205      0     stevel 
    206      0     stevel /*
    207      0     stevel  * Static function prototypes
    208      0     stevel  */
    209      0     stevel static int		i_mdi_phci_offline(dev_info_t *, uint_t);
    210      0     stevel static int		i_mdi_client_offline(dev_info_t *, uint_t);
    211      0     stevel static int		i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
    212      0     stevel static void		i_mdi_phci_post_detach(dev_info_t *,
    213      0     stevel 			    ddi_detach_cmd_t, int);
    214      0     stevel static int		i_mdi_client_pre_detach(dev_info_t *,
    215      0     stevel 			    ddi_detach_cmd_t);
    216      0     stevel static void		i_mdi_client_post_detach(dev_info_t *,
    217      0     stevel 			    ddi_detach_cmd_t, int);
    218      0     stevel static void		i_mdi_pm_hold_pip(mdi_pathinfo_t *);
    219      0     stevel static void		i_mdi_pm_rele_pip(mdi_pathinfo_t *);
    220      0     stevel static int 		i_mdi_lba_lb(mdi_client_t *ct,
    221      0     stevel 			    mdi_pathinfo_t **ret_pip, struct buf *buf);
    222      0     stevel static void		i_mdi_pm_hold_client(mdi_client_t *, int);
    223      0     stevel static void		i_mdi_pm_rele_client(mdi_client_t *, int);
    224      0     stevel static void		i_mdi_pm_reset_client(mdi_client_t *);
    225      0     stevel static int		i_mdi_power_all_phci(mdi_client_t *);
    226    893   rs135747 static void		i_mdi_log_sysevent(dev_info_t *, char *, char *);
    227      0     stevel 
    228      0     stevel 
    229      0     stevel /*
    230      0     stevel  * Internal mdi_pathinfo node functions
    231      0     stevel  */
    232      0     stevel static void		i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
    233      0     stevel 
    234      0     stevel static mdi_vhci_t	*i_mdi_vhci_class2vhci(char *);
    235      0     stevel static mdi_vhci_t	*i_devi_get_vhci(dev_info_t *);
    236      0     stevel static mdi_phci_t	*i_devi_get_phci(dev_info_t *);
    237      0     stevel static void		i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
    238      0     stevel static void		i_mdi_phci_unlock(mdi_phci_t *);
    239    878      ramat static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
    240      0     stevel static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
    241      0     stevel static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
    242      0     stevel static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
    243      0     stevel 			    mdi_client_t *);
    244      0     stevel static void		i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
    245      0     stevel static void		i_mdi_client_remove_path(mdi_client_t *,
    246      0     stevel 			    mdi_pathinfo_t *);
    247      0     stevel 
    248      0     stevel static int		i_mdi_pi_state_change(mdi_pathinfo_t *,
    249      0     stevel 			    mdi_pathinfo_state_t, int);
    250      0     stevel static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
    251      0     stevel static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
    252    878      ramat 			    char **, int);
    253      0     stevel static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
    254      0     stevel static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
    255      0     stevel static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
    256    878      ramat static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
    257      0     stevel static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
    258      0     stevel static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
    259    878      ramat static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
    260      0     stevel static void		i_mdi_client_update_state(mdi_client_t *);
    261      0     stevel static int		i_mdi_client_compute_state(mdi_client_t *,
    262      0     stevel 			    mdi_phci_t *);
    263      0     stevel static void		i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
    264      0     stevel static void		i_mdi_client_unlock(mdi_client_t *);
    265      0     stevel static int		i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
    266      0     stevel static mdi_client_t	*i_devi_get_client(dev_info_t *);
    267   1909   cm136836 /*
    268   1909   cm136836  * NOTE: this will be removed once the NWS files are changed to use the new
    269   1909   cm136836  * mdi_{enable,disable}_path interfaces
    270   1909   cm136836  */
    271   1909   cm136836 static int		i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
    272   1909   cm136836 				int, int);
    273   1909   cm136836 static mdi_pathinfo_t 	*i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
    274   1909   cm136836 				mdi_vhci_t *vh, int flags, int op);
    275      0     stevel /*
    276      0     stevel  * Failover related function prototypes
    277      0     stevel  */
    278      0     stevel static int		i_mdi_failover(void *);
    279      0     stevel 
    280      0     stevel /*
    281      0     stevel  * misc internal functions
    282      0     stevel  */
    283      0     stevel static int		i_mdi_get_hash_key(char *);
    284      0     stevel static int		i_map_nvlist_error_to_mdi(int);
    285      0     stevel static void		i_mdi_report_path_state(mdi_client_t *,
    286      0     stevel 			    mdi_pathinfo_t *);
    287    878      ramat 
    288    878      ramat static void		setup_vhci_cache(mdi_vhci_t *);
    289    878      ramat static int		destroy_vhci_cache(mdi_vhci_t *);
    290    878      ramat static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
    291    878      ramat static boolean_t	stop_vhcache_flush_thread(void *, int);
    292    878      ramat static void		free_string_array(char **, int);
    293    878      ramat static void		free_vhcache_phci(mdi_vhcache_phci_t *);
    294    878      ramat static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
    295    878      ramat static void		free_vhcache_client(mdi_vhcache_client_t *);
    296    878      ramat static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
    297    878      ramat static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
    298    878      ramat static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
    299    878      ramat static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
    300    878      ramat static void		vhcache_pi_add(mdi_vhci_config_t *,
    301    878      ramat 			    struct mdi_pathinfo *);
    302    878      ramat static void		vhcache_pi_remove(mdi_vhci_config_t *,
    303    878      ramat 			    struct mdi_pathinfo *);
    304    878      ramat static void		free_phclient_path_list(mdi_phys_path_t *);
    305    878      ramat static void		sort_vhcache_paths(mdi_vhcache_client_t *);
    306    878      ramat static int		flush_vhcache(mdi_vhci_config_t *, int);
    307    878      ramat static void		vhcache_dirty(mdi_vhci_config_t *);
    308    878      ramat static void		free_async_client_config(mdi_async_client_config_t *);
    309    972      ramat static void		single_threaded_vhconfig_enter(mdi_vhci_config_t *);
    310    972      ramat static void		single_threaded_vhconfig_exit(mdi_vhci_config_t *);
    311    878      ramat static nvlist_t		*read_on_disk_vhci_cache(char *);
    312    878      ramat extern int		fread_nvlist(char *, nvlist_t **);
    313    878      ramat extern int		fwrite_nvlist(char *, nvlist_t *);
    314      0     stevel 
    315      0     stevel /* called once when first vhci registers with mdi */
    316      0     stevel static void
    317      0     stevel i_mdi_init()
    318      0     stevel {
    319      0     stevel 	static int initialized = 0;
    320      0     stevel 
    321      0     stevel 	if (initialized)
    322      0     stevel 		return;
    323      0     stevel 	initialized = 1;
    324      0     stevel 
    325      0     stevel 	mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
    326   6640        cth 
    327   6640        cth 	/* Create our taskq resources */
    328      0     stevel 	mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
    329      0     stevel 	    MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
    330      0     stevel 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
    331      0     stevel 	ASSERT(mdi_taskq != NULL);	/* taskq_create never fails */
    332   6640        cth 
    333   6640        cth 	/* Allocate ['path_instance' <-> "path"] maps */
    334   6640        cth 	mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
    335   6640        cth 	mdi_pathmap_bypath = mod_hash_create_strhash(
    336   6640        cth 	    "mdi_pathmap_bypath", mdi_pathmap_hash_size,
    337   6640        cth 	    mod_hash_null_valdtor);
    338   6640        cth 	mdi_pathmap_byinstance = mod_hash_create_idhash(
    339   6640        cth 	    "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
    340   6640        cth 	    mod_hash_null_valdtor);
    341  10696      David 	mdi_pathmap_sbyinstance = mod_hash_create_idhash(
    342  10696      David 	    "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
    343  10696      David 	    mod_hash_null_valdtor);
    344      0     stevel }
    345      0     stevel 
    346      0     stevel /*
    347      0     stevel  * mdi_get_component_type():
    348      0     stevel  *		Return mpxio component type
    349      0     stevel  * Return Values:
    350      0     stevel  *		MDI_COMPONENT_NONE
    351      0     stevel  *		MDI_COMPONENT_VHCI
    352      0     stevel  *		MDI_COMPONENT_PHCI
    353      0     stevel  *		MDI_COMPONENT_CLIENT
    354      0     stevel  * XXX This doesn't work under multi-level MPxIO and should be
    355   2155        cth  *	removed when clients migrate mdi_component_is_*() interfaces.
    356      0     stevel  */
    357      0     stevel int
    358      0     stevel mdi_get_component_type(dev_info_t *dip)
    359      0     stevel {
    360      0     stevel 	return (DEVI(dip)->devi_mdi_component);
    361      0     stevel }
    362      0     stevel 
    363      0     stevel /*
    364      0     stevel  * mdi_vhci_register():
    365      0     stevel  *		Register a vHCI module with the mpxio framework
    366      0     stevel  *		mdi_vhci_register() is called by vHCI drivers to register the
    367      0     stevel  *		'class_driver' vHCI driver and its MDI entrypoints with the
    368      0     stevel  *		mpxio framework.  The vHCI driver must call this interface as
    369      0     stevel  *		part of its attach(9e) handler.
    370      0     stevel  *		Competing threads may try to attach mdi_vhci_register() as
    371      0     stevel  *		the vHCI drivers are loaded and attached as a result of pHCI
    372      0     stevel  *		driver instance registration (mdi_phci_register()) with the
    373      0     stevel  *		framework.
    374      0     stevel  * Return Values:
    375      0     stevel  *		MDI_SUCCESS
    376      0     stevel  *		MDI_FAILURE
    377      0     stevel  */
    378      0     stevel /*ARGSUSED*/
    379      0     stevel int
    380      0     stevel mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
    381      0     stevel     int flags)
    382      0     stevel {
    383      0     stevel 	mdi_vhci_t		*vh = NULL;
    384      0     stevel 
    385   9167    Randall 	/* Registrant can't be older */
    386   9167    Randall 	ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
    387   9167    Randall 
    388   8082  Ramaswamy #ifdef DEBUG
    389   8082  Ramaswamy 	/*
    390   8082  Ramaswamy 	 * IB nexus driver is loaded only when IB hardware is present.
    391   8082  Ramaswamy 	 * In order to be able to do this there is a need to drive the loading
    392   8082  Ramaswamy 	 * and attaching of the IB nexus driver (especially when an IB hardware
    393   8082  Ramaswamy 	 * is dynamically plugged in) when an IB HCA driver (PHCI)
    394   8082  Ramaswamy 	 * is being attached. Unfortunately this gets into the limitations
    395   8082  Ramaswamy 	 * of devfs as there seems to be no clean way to drive configuration
    396   8082  Ramaswamy 	 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
    397   8082  Ramaswamy 	 * for IB.
    398   8082  Ramaswamy 	 */
    399   8082  Ramaswamy 	if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
    400   8082  Ramaswamy 		ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
    401   8082  Ramaswamy #endif
    402      0     stevel 
    403      0     stevel 	i_mdi_init();
    404      0     stevel 
    405      0     stevel 	mutex_enter(&mdi_mutex);
    406      0     stevel 	/*
    407      0     stevel 	 * Scan for already registered vhci
    408      0     stevel 	 */
    409      0     stevel 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
    410      0     stevel 		if (strcmp(vh->vh_class, class) == 0) {
    411      0     stevel 			/*
    412      0     stevel 			 * vHCI has already been created.  Check for valid
    413      0     stevel 			 * vHCI ops registration.  We only support one vHCI
    414      0     stevel 			 * module per class
    415      0     stevel 			 */
    416      0     stevel 			if (vh->vh_ops != NULL) {
    417      0     stevel 				mutex_exit(&mdi_mutex);
    418      0     stevel 				cmn_err(CE_NOTE, vhci_greeting, class);
    419      0     stevel 				return (MDI_FAILURE);
    420      0     stevel 			}
    421      0     stevel 			break;
    422      0     stevel 		}
    423      0     stevel 	}
    424      0     stevel 
    425      0     stevel 	/*
    426      0     stevel 	 * if not yet created, create the vHCI component
    427      0     stevel 	 */
    428      0     stevel 	if (vh == NULL) {
    429      0     stevel 		struct client_hash	*hash = NULL;
    430      0     stevel 		char			*load_balance;
    431      0     stevel 
    432      0     stevel 		/*
    433      0     stevel 		 * Allocate and initialize the mdi extensions
    434      0     stevel 		 */
    435      0     stevel 		vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
    436      0     stevel 		hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
    437      0     stevel 		    KM_SLEEP);
    438      0     stevel 		vh->vh_client_table = hash;
    439      0     stevel 		vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
    440      0     stevel 		(void) strcpy(vh->vh_class, class);
    441      0     stevel 		vh->vh_lb = LOAD_BALANCE_RR;
    442      0     stevel 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
    443      0     stevel 		    0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
    444      0     stevel 			if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
    445      0     stevel 				vh->vh_lb = LOAD_BALANCE_NONE;
    446      0     stevel 			} else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
    447      0     stevel 				    == 0) {
    448      0     stevel 				vh->vh_lb = LOAD_BALANCE_LBA;
    449      0     stevel 			}
    450      0     stevel 			ddi_prop_free(load_balance);
    451      0     stevel 		}
    452      0     stevel 
    453   2155        cth 		mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
    454   2155        cth 		mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
    455   2155        cth 
    456      0     stevel 		/*
    457      0     stevel 		 * Store the vHCI ops vectors
    458      0     stevel 		 */
    459      0     stevel 		vh->vh_dip = vdip;
    460      0     stevel 		vh->vh_ops = vops;
    461      0     stevel 
    462    878      ramat 		setup_vhci_cache(vh);
    463      0     stevel 
    464      0     stevel 		if (mdi_vhci_head == NULL) {
    465      0     stevel 			mdi_vhci_head = vh;
    466      0     stevel 		}
    467      0     stevel 		if (mdi_vhci_tail) {
    468      0     stevel 			mdi_vhci_tail->vh_next = vh;
    469      0     stevel 		}
    470      0     stevel 		mdi_vhci_tail = vh;
    471      0     stevel 		mdi_vhci_count++;
    472      0     stevel 	}
    473      0     stevel 
    474      0     stevel 	/*
    475      0     stevel 	 * Claim the devfs node as a vhci component
    476      0     stevel 	 */
    477      0     stevel 	DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
    478      0     stevel 
    479      0     stevel 	/*
    480      0     stevel 	 * Initialize our back reference from dev_info node
    481      0     stevel 	 */
    482      0     stevel 	DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
    483      0     stevel 	mutex_exit(&mdi_mutex);
    484      0     stevel 	return (MDI_SUCCESS);
    485      0     stevel }
    486      0     stevel 
    487      0     stevel /*
    488      0     stevel  * mdi_vhci_unregister():
    489      0     stevel  *		Unregister a vHCI module from mpxio framework
    490      0     stevel  *		mdi_vhci_unregister() is called from the detach(9E) entrypoint
    491      0     stevel  * 		of a vhci to unregister it from the framework.
    492      0     stevel  * Return Values:
    493      0     stevel  *		MDI_SUCCESS
    494      0     stevel  *		MDI_FAILURE
    495      0     stevel  */
    496      0     stevel /*ARGSUSED*/
    497      0     stevel int
    498      0     stevel mdi_vhci_unregister(dev_info_t *vdip, int flags)
    499      0     stevel {
    500      0     stevel 	mdi_vhci_t	*found, *vh, *prev = NULL;
    501      0     stevel 
    502   2155        cth 	ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
    503   2155        cth 
    504      0     stevel 	/*
    505      0     stevel 	 * Check for invalid VHCI
    506      0     stevel 	 */
    507      0     stevel 	if ((vh = i_devi_get_vhci(vdip)) == NULL)
    508      0     stevel 		return (MDI_FAILURE);
    509      0     stevel 
    510   2155        cth 	/*
    511   2155        cth 	 * Scan the list of registered vHCIs for a match
    512   2155        cth 	 */
    513   2009   dm120769 	mutex_enter(&mdi_mutex);
    514      0     stevel 	for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
    515      0     stevel 		if (found == vh)
    516      0     stevel 			break;
    517      0     stevel 		prev = found;
    518      0     stevel 	}
    519      0     stevel 
    520      0     stevel 	if (found == NULL) {
    521      0     stevel 		mutex_exit(&mdi_mutex);
    522      0     stevel 		return (MDI_FAILURE);
    523      0     stevel 	}
    524      0     stevel 
    525      0     stevel 	/*
    526    893   rs135747 	 * Check the vHCI, pHCI and client count. All the pHCIs and clients
    527      0     stevel 	 * should have been unregistered, before a vHCI can be
    528      0     stevel 	 * unregistered.
    529      0     stevel 	 */
    530   2155        cth 	MDI_VHCI_PHCI_LOCK(vh);
    531   2155        cth 	if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
    532   2155        cth 		MDI_VHCI_PHCI_UNLOCK(vh);
    533   2155        cth 		mutex_exit(&mdi_mutex);
    534   2155        cth 		return (MDI_FAILURE);
    535   2155        cth 	}
    536   2155        cth 	MDI_VHCI_PHCI_UNLOCK(vh);
    537   2155        cth 
    538   2155        cth 	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
    539      0     stevel 		mutex_exit(&mdi_mutex);
    540      0     stevel 		return (MDI_FAILURE);
    541      0     stevel 	}
    542      0     stevel 
    543      0     stevel 	/*
    544      0     stevel 	 * Remove the vHCI from the global list
    545      0     stevel 	 */
    546      0     stevel 	if (vh == mdi_vhci_head) {
    547      0     stevel 		mdi_vhci_head = vh->vh_next;
    548      0     stevel 	} else {
    549      0     stevel 		prev->vh_next = vh->vh_next;
    550      0     stevel 	}
    551      0     stevel 	if (vh == mdi_vhci_tail) {
    552      0     stevel 		mdi_vhci_tail = prev;
    553      0     stevel 	}
    554    878      ramat 	mdi_vhci_count--;
    555    878      ramat 	mutex_exit(&mdi_mutex);
    556    878      ramat 
    557      0     stevel 	vh->vh_ops = NULL;
    558      0     stevel 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
    559      0     stevel 	DEVI(vdip)->devi_mdi_xhci = NULL;
    560      0     stevel 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
    561      0     stevel 	kmem_free(vh->vh_client_table,
    562      0     stevel 	    mdi_client_table_size * sizeof (struct client_hash));
    563   2155        cth 	mutex_destroy(&vh->vh_phci_mutex);
    564   2155        cth 	mutex_destroy(&vh->vh_client_mutex);
    565   1140      llai1 
    566      0     stevel 	kmem_free(vh, sizeof (mdi_vhci_t));
    567      0     stevel 	return (MDI_SUCCESS);
    568      0     stevel }
    569      0     stevel 
    570      0     stevel /*
    571      0     stevel  * i_mdi_vhci_class2vhci():
    572      0     stevel  *		Look for a matching vHCI module given a vHCI class name
    573      0     stevel  * Return Values:
    574      0     stevel  *		Handle to a vHCI component
    575      0     stevel  *		NULL
    576      0     stevel  */
    577      0     stevel static mdi_vhci_t *
    578      0     stevel i_mdi_vhci_class2vhci(char *class)
    579      0     stevel {
    580      0     stevel 	mdi_vhci_t	*vh = NULL;
    581      0     stevel 
    582      0     stevel 	ASSERT(!MUTEX_HELD(&mdi_mutex));
    583      0     stevel 
    584      0     stevel 	mutex_enter(&mdi_mutex);
    585      0     stevel 	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
    586      0     stevel 		if (strcmp(vh->vh_class, class) == 0) {
    587      0     stevel 			break;
    588      0     stevel 		}
    589      0     stevel 	}
    590      0     stevel 	mutex_exit(&mdi_mutex);
    591      0     stevel 	return (vh);
    592      0     stevel }
    593      0     stevel 
    594      0     stevel /*
    595      0     stevel  * i_devi_get_vhci():
    596      0     stevel  *		Utility function to get the handle to a vHCI component
    597      0     stevel  * Return Values:
    598      0     stevel  *		Handle to a vHCI component
    599      0     stevel  *		NULL
    600      0     stevel  */
    601      0     stevel mdi_vhci_t *
    602      0     stevel i_devi_get_vhci(dev_info_t *vdip)
    603      0     stevel {
    604      0     stevel 	mdi_vhci_t	*vh = NULL;
    605      0     stevel 	if (MDI_VHCI(vdip)) {
    606      0     stevel 		vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
    607      0     stevel 	}
    608      0     stevel 	return (vh);
    609      0     stevel }
    610      0     stevel 
    611      0     stevel /*
    612      0     stevel  * mdi_phci_register():
    613      0     stevel  *		Register a pHCI module with mpxio framework
    614      0     stevel  *		mdi_phci_register() is called by pHCI drivers to register with
    615      0     stevel  *		the mpxio framework and a specific 'class_driver' vHCI.  The
    616      0     stevel  *		pHCI driver must call this interface as part of its attach(9e)
    617      0     stevel  *		handler.
    618      0     stevel  * Return Values:
    619      0     stevel  *		MDI_SUCCESS
    620      0     stevel  *		MDI_FAILURE
    621      0     stevel  */
    622      0     stevel /*ARGSUSED*/
    623      0     stevel int
    624      0     stevel mdi_phci_register(char *class, dev_info_t *pdip, int flags)
    625      0     stevel {
    626      0     stevel 	mdi_phci_t		*ph;
    627      0     stevel 	mdi_vhci_t		*vh;
    628      0     stevel 	char			*data;
    629   2155        cth 
    630   2155        cth 	/*
    631   2155        cth 	 * Some subsystems, like fcp, perform pHCI registration from a
    632   2155        cth 	 * different thread than the one doing the pHCI attach(9E) - the
    633   2155        cth 	 * driver attach code is waiting for this other thread to complete.
    634   2155        cth 	 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
    635   2155        cth 	 * (indicating that some thread has done an ndi_devi_enter of parent)
    636   2155        cth 	 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
    637   2155        cth 	 */
    638   2155        cth 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
    639      0     stevel 
    640      0     stevel 	/*
    641      0     stevel 	 * Check for mpxio-disable property. Enable mpxio if the property is
    642      0     stevel 	 * missing or not set to "yes".
    643      0     stevel 	 * If the property is set to "yes" then emit a brief message.
    644      0     stevel 	 */
    645      0     stevel 	if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
    646      0     stevel 	    &data) == DDI_SUCCESS)) {
    647      0     stevel 		if (strcmp(data, "yes") == 0) {
    648  10696      David 			MDI_DEBUG(1, (MDI_CONT, pdip,
    649  10696      David 			    "?multipath capabilities disabled via %s.conf.",
    650      0     stevel 			    ddi_driver_name(pdip)));
    651      0     stevel 			ddi_prop_free(data);
    652      0     stevel 			return (MDI_FAILURE);
    653      0     stevel 		}
    654      0     stevel 		ddi_prop_free(data);
    655      0     stevel 	}
    656      0     stevel 
    657      0     stevel 	/*
    658      0     stevel 	 * Search for a matching vHCI
    659      0     stevel 	 */
    660      0     stevel 	vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
    661      0     stevel 	if (vh == NULL) {
    662      0     stevel 		return (MDI_FAILURE);
    663      0     stevel 	}
    664      0     stevel 
    665      0     stevel 	ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
    666      0     stevel 	mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
    667      0     stevel 	ph->ph_dip = pdip;
    668      0     stevel 	ph->ph_vhci = vh;
    669      0     stevel 	ph->ph_next = NULL;
    670      0     stevel 	ph->ph_unstable = 0;
    671      0     stevel 	ph->ph_vprivate = 0;
    672      0     stevel 	cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
    673   2155        cth 
    674   2155        cth 	MDI_PHCI_LOCK(ph);
    675      0     stevel 	MDI_PHCI_SET_POWER_UP(ph);
    676   2155        cth 	MDI_PHCI_UNLOCK(ph);
    677      0     stevel 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
    678      0     stevel 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
    679      0     stevel 
    680    878      ramat 	vhcache_phci_add(vh->vh_config, ph);
    681    878      ramat 
    682   2155        cth 	MDI_VHCI_PHCI_LOCK(vh);
    683      0     stevel 	if (vh->vh_phci_head == NULL) {
    684      0     stevel 		vh->vh_phci_head = ph;
    685      0     stevel 	}
    686      0     stevel 	if (vh->vh_phci_tail) {
    687      0     stevel 		vh->vh_phci_tail->ph_next = ph;
    688      0     stevel 	}
    689      0     stevel 	vh->vh_phci_tail = ph;
    690      0     stevel 	vh->vh_phci_count++;
    691   2155        cth 	MDI_VHCI_PHCI_UNLOCK(vh);
    692   2155        cth 
    693    893   rs135747 	i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
    694      0     stevel 	return (MDI_SUCCESS);
    695      0     stevel }
    696      0     stevel 
    697      0     stevel /*
    698      0     stevel  * mdi_phci_unregister():
    699      0     stevel  *		Unregister a pHCI module from mpxio framework
    700      0     stevel  *		mdi_phci_unregister() is called by the pHCI drivers from their
    701      0     stevel  *		detach(9E) handler to unregister their instances from the
    702      0     stevel  *		framework.
    703      0     stevel  * Return Values:
    704      0     stevel  *		MDI_SUCCESS
    705      0     stevel  *		MDI_FAILURE
    706      0     stevel  */
    707      0     stevel /*ARGSUSED*/
    708      0     stevel int
    709      0     stevel mdi_phci_unregister(dev_info_t *pdip, int flags)
    710      0     stevel {
    711      0     stevel 	mdi_vhci_t		*vh;
    712      0     stevel 	mdi_phci_t		*ph;
    713      0     stevel 	mdi_phci_t		*tmp;
    714      0     stevel 	mdi_phci_t		*prev = NULL;
    715  10696      David 	mdi_pathinfo_t		*pip;
    716   2155        cth 
    717   2155        cth 	ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
    718      0     stevel 
    719      0     stevel 	ph = i_devi_get_phci(pdip);
    720      0     stevel 	if (ph == NULL) {
    721  10696      David 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
    722      0     stevel 		return (MDI_FAILURE);
    723      0     stevel 	}
    724      0     stevel 
    725      0     stevel 	vh = ph->ph_vhci;
    726      0     stevel 	ASSERT(vh != NULL);
    727      0     stevel 	if (vh == NULL) {
    728  10696      David 		MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
    729      0     stevel 		return (MDI_FAILURE);
    730      0     stevel 	}
    731      0     stevel 
    732   2155        cth 	MDI_VHCI_PHCI_LOCK(vh);
    733      0     stevel 	tmp = vh->vh_phci_head;
    734      0     stevel 	while (tmp) {
    735      0     stevel 		if (tmp == ph) {
    736      0     stevel 			break;
    737      0     stevel 		}
    738      0     stevel 		prev = tmp;
    739      0     stevel 		tmp = tmp->ph_next;
    740      0     stevel 	}
    741      0     stevel 
    742      0     stevel 	if (ph == vh->vh_phci_head) {
    743      0     stevel 		vh->vh_phci_head = ph->ph_next;
    744      0     stevel 	} else {
    745      0     stevel 		prev->ph_next = ph->ph_next;
    746      0     stevel 	}
    747      0     stevel 
    748      0     stevel 	if (ph == vh->vh_phci_tail) {
    749      0     stevel 		vh->vh_phci_tail = prev;
    750      0     stevel 	}
    751      0     stevel 
    752      0     stevel 	vh->vh_phci_count--;
    753   2155        cth 	MDI_VHCI_PHCI_UNLOCK(vh);
    754  10696      David 
    755  10696      David 	/* Walk remaining pathinfo nodes and disassociate them from pHCI */
    756  10696      David 	MDI_PHCI_LOCK(ph);
    757  10696      David 	for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
    758  10696      David 	    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
    759  10696      David 		MDI_PI(pip)->pi_phci = NULL;
    760  10696      David 	MDI_PHCI_UNLOCK(ph);
    761    878      ramat 
    762    893   rs135747 	i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
    763    893   rs135747 	    ESC_DDI_INITIATOR_UNREGISTER);
    764    878      ramat 	vhcache_phci_remove(vh->vh_config, ph);
    765      0     stevel 	cv_destroy(&ph->ph_unstable_cv);
    766      0     stevel 	mutex_destroy(&ph->ph_mutex);
    767      0     stevel 	kmem_free(ph, sizeof (mdi_phci_t));
    768      0     stevel 	DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
    769      0     stevel 	DEVI(pdip)->devi_mdi_xhci = NULL;
    770      0     stevel 	return (MDI_SUCCESS);
    771      0     stevel }
    772      0     stevel 
    773      0     stevel /*
    774      0     stevel  * i_devi_get_phci():
    775      0     stevel  * 		Utility function to return the phci extensions.
    776      0     stevel  */
    777      0     stevel static mdi_phci_t *
    778      0     stevel i_devi_get_phci(dev_info_t *pdip)
    779      0     stevel {
    780      0     stevel 	mdi_phci_t	*ph = NULL;
    781   9167    Randall 
    782      0     stevel 	if (MDI_PHCI(pdip)) {
    783      0     stevel 		ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
    784      0     stevel 	}
    785      0     stevel 	return (ph);
    786      0     stevel }
    787      0     stevel 
    788      0     stevel /*
    789   2155        cth  * Single thread mdi entry into devinfo node for modifying its children.
    790   2155        cth  * If necessary we perform an ndi_devi_enter of the vHCI before doing
    791   2155        cth  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
    792   2155        cth  * for the vHCI and one for the pHCI.
    793   2155        cth  */
    794   2155        cth void
    795   2155        cth mdi_devi_enter(dev_info_t *phci_dip, int *circular)
    796   2155        cth {
    797   2155        cth 	dev_info_t	*vdip;
    798   2155        cth 	int		vcircular, pcircular;
    799   2155        cth 
    800   2155        cth 	/* Verify calling context */
    801   2155        cth 	ASSERT(MDI_PHCI(phci_dip));
    802   2155        cth 	vdip = mdi_devi_get_vdip(phci_dip);
    803   2155        cth 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    804   2155        cth 
    805   2155        cth 	/*
    806   2155        cth 	 * If pHCI is detaching then the framework has already entered the
    807   2155        cth 	 * vHCI on a threads that went down the code path leading to
    808   2155        cth 	 * detach_node().  This framework enter of the vHCI during pHCI
    809   2155        cth 	 * detach is done to avoid deadlock with vHCI power management
    810   2155        cth 	 * operations which enter the vHCI and the enter down the path
    811   2155        cth 	 * to the pHCI. If pHCI is detaching then we piggyback this calls
    812   2155        cth 	 * enter of the vHCI on frameworks vHCI enter that has already
    813   2155        cth 	 * occurred - this is OK because we know that the framework thread
    814   2155        cth 	 * doing detach is waiting for our completion.
    815   2155        cth 	 *
    816   2155        cth 	 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
    817   2155        cth 	 * race with detach - but we can't do that because the framework has
    818   2155        cth 	 * already entered the parent, so we have some complexity instead.
    819   2155        cth 	 */
    820   2155        cth 	for (;;) {
    821   2155        cth 		if (ndi_devi_tryenter(vdip, &vcircular)) {
    822   2155        cth 			ASSERT(vcircular != -1);
    823   2155        cth 			if (DEVI_IS_DETACHING(phci_dip)) {
    824   2155        cth 				ndi_devi_exit(vdip, vcircular);
    825   2155        cth 				vcircular = -1;
    826   2155        cth 			}
    827   2155        cth 			break;
    828   2155        cth 		} else if (DEVI_IS_DETACHING(phci_dip)) {
    829   2155        cth 			vcircular = -1;
    830   2155        cth 			break;
    831  10696      David 		} else if (servicing_interrupt()) {
    832  10696      David 			/*
    833  10696      David 			 * Don't delay an interrupt (and ensure adaptive
    834  10696      David 			 * mutex inversion support).
    835  10696      David 			 */
    836  10696      David 			ndi_devi_enter(vdip, &vcircular);
    837  10696      David 			break;
    838  10696      David 		} else {
    839  11052      Chris 			delay_random(mdi_delay);
    840   2155        cth 		}
    841   2155        cth 	}
    842   2155        cth 
    843   2155        cth 	ndi_devi_enter(phci_dip, &pcircular);
    844   2155        cth 	*circular = (vcircular << 16) | (pcircular & 0xFFFF);
    845   9167    Randall }
    846   9167    Randall 
    847   9167    Randall /*
    848   9167    Randall  * Attempt to mdi_devi_enter.
    849   9167    Randall  */
    850   9167    Randall int
    851   9167    Randall mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
    852   9167    Randall {
    853   9167    Randall 	dev_info_t	*vdip;
    854   9167    Randall 	int		vcircular, pcircular;
    855   9167    Randall 
    856   9167    Randall 	/* Verify calling context */
    857   9167    Randall 	ASSERT(MDI_PHCI(phci_dip));
    858   9167    Randall 	vdip = mdi_devi_get_vdip(phci_dip);
    859   9167    Randall 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    860   9167    Randall 
    861   9167    Randall 	if (ndi_devi_tryenter(vdip, &vcircular)) {
    862   9167    Randall 		if (ndi_devi_tryenter(phci_dip, &pcircular)) {
    863   9167    Randall 			*circular = (vcircular << 16) | (pcircular & 0xFFFF);
    864   9167    Randall 			return (1);	/* locked */
    865   9167    Randall 		}
    866   9167    Randall 		ndi_devi_exit(vdip, vcircular);
    867   9167    Randall 	}
    868   9167    Randall 	return (0);			/* busy */
    869   2155        cth }
    870   2155        cth 
    871   2155        cth /*
    872   2155        cth  * Release mdi_devi_enter or successful mdi_devi_tryenter.
    873   2155        cth  */
    874   2155        cth void
    875   2155        cth mdi_devi_exit(dev_info_t *phci_dip, int circular)
    876   2155        cth {
    877   2155        cth 	dev_info_t	*vdip;
    878   2155        cth 	int		vcircular, pcircular;
    879   2155        cth 
    880   2155        cth 	/* Verify calling context */
    881   2155        cth 	ASSERT(MDI_PHCI(phci_dip));
    882   2155        cth 	vdip = mdi_devi_get_vdip(phci_dip);
    883   2155        cth 	ASSERT(vdip);			/* A pHCI always has a vHCI */
    884   2155        cth 
    885   2155        cth 	/* extract two circular recursion values from single int */
    886   2155        cth 	pcircular = (short)(circular & 0xFFFF);
    887   2155        cth 	vcircular = (short)((circular >> 16) & 0xFFFF);
    888   2155        cth 
    889   2155        cth 	ndi_devi_exit(phci_dip, pcircular);
    890   2155        cth 	if (vcircular != -1)
    891   2155        cth 		ndi_devi_exit(vdip, vcircular);
    892   2155        cth }
    893   2155        cth 
    894   2155        cth /*
    895   2155        cth  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
    896   2155        cth  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
    897   2155        cth  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
    898   2155        cth  * with vHCI power management code during path online/offline.  Each
    899   2155        cth  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
    900   2155        cth  * occur within the scope of an active mdi_devi_enter that establishes the
    901   2155        cth  * circular value.
    902   2155        cth  */
    903   2155        cth void
    904   2155        cth mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
    905   2155        cth {
    906   2155        cth 	int		pcircular;
    907   2155        cth 
    908   2155        cth 	/* Verify calling context */
    909   2155        cth 	ASSERT(MDI_PHCI(phci_dip));
    910   2155        cth 
    911  10696      David 	/* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
    912  10696      David 	ndi_hold_devi(phci_dip);
    913  10696      David 
    914   2155        cth 	pcircular = (short)(circular & 0xFFFF);
    915   2155        cth 	ndi_devi_exit(phci_dip, pcircular);
    916   2155        cth }
    917   2155        cth 
    918   2155        cth void
    919   2155        cth mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
    920   2155        cth {
    921   2155        cth 	int		pcircular;
    922   2155        cth 
    923   2155        cth 	/* Verify calling context */
    924   2155        cth 	ASSERT(MDI_PHCI(phci_dip));
    925   2155        cth 
    926   2155        cth 	ndi_devi_enter(phci_dip, &pcircular);
    927  10696      David 
    928  10696      David 	/* Drop hold from mdi_devi_exit_phci. */
    929  10696      David 	ndi_rele_devi(phci_dip);
    930   2155        cth 
    931   2155        cth 	/* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
    932   2155        cth 	ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
    933   2155        cth }
    934   2155        cth 
    935   2155        cth /*
    936   2155        cth  * mdi_devi_get_vdip():
    937   2155        cth  *		given a pHCI dip return vHCI dip
    938   2155        cth  */
    939   2155        cth dev_info_t *
    940   2155        cth mdi_devi_get_vdip(dev_info_t *pdip)
    941   2155        cth {
    942   2155        cth 	mdi_phci_t	*ph;
    943   2155        cth 
    944   2155        cth 	ph = i_devi_get_phci(pdip);
    945   2155        cth 	if (ph && ph->ph_vhci)
    946   2155        cth 		return (ph->ph_vhci->vh_dip);
    947   2155        cth 	return (NULL);
    948   2155        cth }
    949   2155        cth 
    950   2155        cth /*
    951   2155        cth  * mdi_devi_pdip_entered():
    952   2155        cth  *		Return 1 if we are vHCI and have done an ndi_devi_enter
    953   2155        cth  *		of a pHCI
    954   2155        cth  */
    955   2155        cth int
    956   2155        cth mdi_devi_pdip_entered(dev_info_t *vdip)
    957   2155        cth {
    958   2155        cth 	mdi_vhci_t	*vh;
    959   2155        cth 	mdi_phci_t	*ph;
    960   2155        cth 
    961   2155        cth 	vh = i_devi_get_vhci(vdip);
    962   2155        cth 	if (vh == NULL)
    963   2155        cth 		return (0);
    964   2155        cth 
    965   2155        cth 	MDI_VHCI_PHCI_LOCK(vh);
    966   2155        cth 	ph = vh->vh_phci_head;
    967   2155        cth 	while (ph) {
    968   2155        cth 		if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
    969   2155        cth 			MDI_VHCI_PHCI_UNLOCK(vh);
    970   2155        cth 			return (1);
    971   2155        cth 		}
    972   2155        cth 		ph = ph->ph_next;
    973   2155        cth 	}
    974   2155        cth 	MDI_VHCI_PHCI_UNLOCK(vh);
    975   2155        cth 	return (0);
    976   2155        cth }
    977   2155        cth 
    978   2155        cth /*
    979      0     stevel  * mdi_phci_path2devinfo():
    980      0     stevel  * 		Utility function to search for a valid phci device given
    981      0     stevel  *		the devfs pathname.
    982      0     stevel  */
    983      0     stevel dev_info_t *
    984      0     stevel mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
    985      0     stevel {
    986      0     stevel 	char		*temp_pathname;
    987      0     stevel 	mdi_vhci_t	*vh;
    988      0     stevel 	mdi_phci_t	*ph;
    989      0     stevel 	dev_info_t 	*pdip = NULL;
    990      0     stevel 
    991      0     stevel 	vh = i_devi_get_vhci(vdip);
    992      0     stevel 	ASSERT(vh != NULL);
    993      0     stevel 
    994      0     stevel 	if (vh == NULL) {
    995      0     stevel 		/*
    996      0     stevel 		 * Invalid vHCI component, return failure
    997      0     stevel 		 */
    998      0     stevel 		return (NULL);
    999      0     stevel 	}
   1000      0     stevel 
   1001      0     stevel 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1002   2155        cth 	MDI_VHCI_PHCI_LOCK(vh);
   1003      0     stevel 	ph = vh->vh_phci_head;
   1004      0     stevel 	while (ph != NULL) {
   1005      0     stevel 		pdip = ph->ph_dip;
   1006      0     stevel 		ASSERT(pdip != NULL);
   1007      0     stevel 		*temp_pathname = '\0';
   1008      0     stevel 		(void) ddi_pathname(pdip, temp_pathname);
   1009      0     stevel 		if (strcmp(temp_pathname, pathname) == 0) {
   1010      0     stevel 			break;
   1011      0     stevel 		}
   1012      0     stevel 		ph = ph->ph_next;
   1013      0     stevel 	}
   1014      0     stevel 	if (ph == NULL) {
   1015      0     stevel 		pdip = NULL;
   1016      0     stevel 	}
   1017   2155        cth 	MDI_VHCI_PHCI_UNLOCK(vh);
   1018      0     stevel 	kmem_free(temp_pathname, MAXPATHLEN);
   1019      0     stevel 	return (pdip);
   1020      0     stevel }
   1021      0     stevel 
   1022      0     stevel /*
   1023      0     stevel  * mdi_phci_get_path_count():
   1024      0     stevel  * 		get number of path information nodes associated with a given
   1025      0     stevel  *		pHCI device.
   1026      0     stevel  */
   1027      0     stevel int
   1028      0     stevel mdi_phci_get_path_count(dev_info_t *pdip)
   1029      0     stevel {
   1030      0     stevel 	mdi_phci_t	*ph;
   1031      0     stevel 	int		count = 0;
   1032      0     stevel 
   1033      0     stevel 	ph = i_devi_get_phci(pdip);
   1034      0     stevel 	if (ph != NULL) {
   1035      0     stevel 		count = ph->ph_path_count;
   1036      0     stevel 	}
   1037      0     stevel 	return (count);
   1038      0     stevel }
   1039      0     stevel 
   1040      0     stevel /*
   1041      0     stevel  * i_mdi_phci_lock():
   1042      0     stevel  *		Lock a pHCI device
   1043      0     stevel  * Return Values:
   1044      0     stevel  *		None
   1045      0     stevel  * Note:
   1046      0     stevel  *		The default locking order is:
   1047      0     stevel  *		_NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
   1048      0     stevel  *		But there are number of situations where locks need to be
   1049      0     stevel  *		grabbed in reverse order.  This routine implements try and lock
   1050      0     stevel  *		mechanism depending on the requested parameter option.
   1051      0     stevel  */
   1052      0     stevel static void
   1053      0     stevel i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   1054      0     stevel {
   1055      0     stevel 	if (pip) {
   1056      0     stevel 		/* Reverse locking is requested. */
   1057      0     stevel 		while (MDI_PHCI_TRYLOCK(ph) == 0) {
   1058  10696      David 			if (servicing_interrupt()) {
   1059  10696      David 				MDI_PI_HOLD(pip);
   1060  10696      David 				MDI_PI_UNLOCK(pip);
   1061  10696      David 				MDI_PHCI_LOCK(ph);
   1062  10696      David 				MDI_PI_LOCK(pip);
   1063  10696      David 				MDI_PI_RELE(pip);
   1064  10696      David 				break;
   1065  10696      David 			} else {
   1066  10696      David 				/*
   1067  10696      David 				 * tryenter failed. Try to grab again
   1068  10696      David 				 * after a small delay
   1069  10696      David 				 */
   1070  10696      David 				MDI_PI_HOLD(pip);
   1071  10696      David 				MDI_PI_UNLOCK(pip);
   1072  11052      Chris 				delay_random(mdi_delay);
   1073  10696      David 				MDI_PI_LOCK(pip);
   1074  10696      David 				MDI_PI_RELE(pip);
   1075  10696      David 			}
   1076      0     stevel 		}
   1077      0     stevel 	} else {
   1078      0     stevel 		MDI_PHCI_LOCK(ph);
   1079      0     stevel 	}
   1080      0     stevel }
   1081      0     stevel 
   1082      0     stevel /*
   1083      0     stevel  * i_mdi_phci_unlock():
   1084      0     stevel  *		Unlock the pHCI component
   1085      0     stevel  */
   1086      0     stevel static void
   1087      0     stevel i_mdi_phci_unlock(mdi_phci_t *ph)
   1088      0     stevel {
   1089      0     stevel 	MDI_PHCI_UNLOCK(ph);
   1090      0     stevel }
   1091      0     stevel 
   1092      0     stevel /*
   1093      0     stevel  * i_mdi_devinfo_create():
   1094      0     stevel  *		create client device's devinfo node
   1095      0     stevel  * Return Values:
   1096      0     stevel  *		dev_info
   1097      0     stevel  *		NULL
   1098      0     stevel  * Notes:
   1099      0     stevel  */
   1100      0     stevel static dev_info_t *
   1101      0     stevel i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
   1102    878      ramat 	char **compatible, int ncompatible)
   1103      0     stevel {
   1104      0     stevel 	dev_info_t *cdip = NULL;
   1105      0     stevel 
   1106   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1107      0     stevel 
   1108      0     stevel 	/* Verify for duplicate entry */
   1109      0     stevel 	cdip = i_mdi_devinfo_find(vh, name, guid);
   1110      0     stevel 	ASSERT(cdip == NULL);
   1111      0     stevel 	if (cdip) {
   1112      0     stevel 		cmn_err(CE_WARN,
   1113  10696      David 		    "i_mdi_devinfo_create: client %s@%s already exists",
   1114  10696      David 			name ? name : "", guid ? guid : "");
   1115      0     stevel 	}
   1116      0     stevel 
   1117    878      ramat 	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
   1118      0     stevel 	if (cdip == NULL)
   1119      0     stevel 		goto fail;
   1120      0     stevel 
   1121      0     stevel 	/*
   1122      0     stevel 	 * Create component type and Global unique identifier
   1123      0     stevel 	 * properties
   1124      0     stevel 	 */
   1125      0     stevel 	if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
   1126      0     stevel 	    MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
   1127      0     stevel 		goto fail;
   1128      0     stevel 	}
   1129      0     stevel 
   1130      0     stevel 	/* Decorate the node with compatible property */
   1131      0     stevel 	if (compatible &&
   1132      0     stevel 	    (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
   1133      0     stevel 	    "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
   1134      0     stevel 		goto fail;
   1135      0     stevel 	}
   1136      0     stevel 
   1137      0     stevel 	return (cdip);
   1138      0     stevel 
   1139      0     stevel fail:
   1140      0     stevel 	if (cdip) {
   1141      0     stevel 		(void) ndi_prop_remove_all(cdip);
   1142      0     stevel 		(void) ndi_devi_free(cdip);
   1143      0     stevel 	}
   1144      0     stevel 	return (NULL);
   1145      0     stevel }
   1146      0     stevel 
   1147      0     stevel /*
   1148      0     stevel  * i_mdi_devinfo_find():
   1149      0     stevel  *		Find a matching devinfo node for given client node name
   1150      0     stevel  *		and its guid.
   1151      0     stevel  * Return Values:
   1152      0     stevel  *		Handle to a dev_info node or NULL
   1153      0     stevel  */
   1154      0     stevel static dev_info_t *
   1155      0     stevel i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
   1156      0     stevel {
   1157      0     stevel 	char			*data;
   1158      0     stevel 	dev_info_t 		*cdip = NULL;
   1159      0     stevel 	dev_info_t 		*ndip = NULL;
   1160      0     stevel 	int			circular;
   1161      0     stevel 
   1162      0     stevel 	ndi_devi_enter(vh->vh_dip, &circular);
   1163      0     stevel 	ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
   1164      0     stevel 	while ((cdip = ndip) != NULL) {
   1165      0     stevel 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
   1166      0     stevel 
   1167      0     stevel 		if (strcmp(DEVI(cdip)->devi_node_name, name)) {
   1168      0     stevel 			continue;
   1169      0     stevel 		}
   1170      0     stevel 
   1171      0     stevel 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
   1172      0     stevel 		    DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
   1173      0     stevel 		    &data) != DDI_PROP_SUCCESS) {
   1174      0     stevel 			continue;
   1175      0     stevel 		}
   1176      0     stevel 
   1177      0     stevel 		if (strcmp(data, guid) != 0) {
   1178      0     stevel 			ddi_prop_free(data);
   1179      0     stevel 			continue;
   1180      0     stevel 		}
   1181      0     stevel 		ddi_prop_free(data);
   1182      0     stevel 		break;
   1183      0     stevel 	}
   1184      0     stevel 	ndi_devi_exit(vh->vh_dip, circular);
   1185      0     stevel 	return (cdip);
   1186      0     stevel }
   1187      0     stevel 
   1188      0     stevel /*
   1189      0     stevel  * i_mdi_devinfo_remove():
   1190      0     stevel  *		Remove a client device node
   1191      0     stevel  */
   1192      0     stevel static int
   1193      0     stevel i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
   1194      0     stevel {
   1195      0     stevel 	int	rv = MDI_SUCCESS;
   1196   2155        cth 
   1197      0     stevel 	if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
   1198      0     stevel 	    (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
   1199  10696      David 		rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
   1200      0     stevel 		if (rv != NDI_SUCCESS) {
   1201  10696      David 			MDI_DEBUG(1, (MDI_NOTE, cdip,
   1202  10696      David 			    "!failed: cdip %p", (void *)cdip));
   1203      0     stevel 		}
   1204      0     stevel 		/*
   1205      0     stevel 		 * Convert to MDI error code
   1206      0     stevel 		 */
   1207      0     stevel 		switch (rv) {
   1208      0     stevel 		case NDI_SUCCESS:
   1209      0     stevel 			rv = MDI_SUCCESS;
   1210      0     stevel 			break;
   1211      0     stevel 		case NDI_BUSY:
   1212      0     stevel 			rv = MDI_BUSY;
   1213      0     stevel 			break;
   1214      0     stevel 		default:
   1215      0     stevel 			rv = MDI_FAILURE;
   1216      0     stevel 			break;
   1217      0     stevel 		}
   1218      0     stevel 	}
   1219      0     stevel 	return (rv);
   1220      0     stevel }
   1221      0     stevel 
   1222      0     stevel /*
   1223      0     stevel  * i_devi_get_client()
   1224      0     stevel  *		Utility function to get mpxio component extensions
   1225      0     stevel  */
   1226      0     stevel static mdi_client_t *
   1227      0     stevel i_devi_get_client(dev_info_t *cdip)
   1228      0     stevel {
   1229      0     stevel 	mdi_client_t	*ct = NULL;
   1230   2155        cth 
   1231      0     stevel 	if (MDI_CLIENT(cdip)) {
   1232      0     stevel 		ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
   1233      0     stevel 	}
   1234      0     stevel 	return (ct);
   1235      0     stevel }
   1236      0     stevel 
   1237      0     stevel /*
   1238      0     stevel  * i_mdi_is_child_present():
   1239      0     stevel  *		Search for the presence of client device dev_info node
   1240      0     stevel  */
   1241      0     stevel static int
   1242      0     stevel i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
   1243      0     stevel {
   1244      0     stevel 	int		rv = MDI_FAILURE;
   1245      0     stevel 	struct dev_info	*dip;
   1246      0     stevel 	int		circular;
   1247      0     stevel 
   1248      0     stevel 	ndi_devi_enter(vdip, &circular);
   1249      0     stevel 	dip = DEVI(vdip)->devi_child;
   1250      0     stevel 	while (dip) {
   1251      0     stevel 		if (dip == DEVI(cdip)) {
   1252      0     stevel 			rv = MDI_SUCCESS;
   1253      0     stevel 			break;
   1254      0     stevel 		}
   1255      0     stevel 		dip = dip->devi_sibling;
   1256      0     stevel 	}
   1257      0     stevel 	ndi_devi_exit(vdip, circular);
   1258      0     stevel 	return (rv);
   1259      0     stevel }
   1260      0     stevel 
   1261      0     stevel 
   1262      0     stevel /*
   1263      0     stevel  * i_mdi_client_lock():
   1264      0     stevel  *		Grab client component lock
   1265      0     stevel  * Return Values:
   1266      0     stevel  *		None
   1267      0     stevel  * Note:
   1268      0     stevel  *		The default locking order is:
   1269      0     stevel  *		_NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
   1270      0     stevel  *		But there are number of situations where locks need to be
   1271      0     stevel  *		grabbed in reverse order.  This routine implements try and lock
   1272      0     stevel  *		mechanism depending on the requested parameter option.
   1273      0     stevel  */
   1274      0     stevel static void
   1275      0     stevel i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
   1276      0     stevel {
   1277      0     stevel 	if (pip) {
   1278      0     stevel 		/*
   1279      0     stevel 		 * Reverse locking is requested.
   1280      0     stevel 		 */
   1281      0     stevel 		while (MDI_CLIENT_TRYLOCK(ct) == 0) {
   1282  10696      David 			if (servicing_interrupt()) {
   1283  10696      David 				MDI_PI_HOLD(pip);
   1284  10696      David 				MDI_PI_UNLOCK(pip);
   1285  10696      David 				MDI_CLIENT_LOCK(ct);
   1286  10696      David 				MDI_PI_LOCK(pip);
   1287  10696      David 				MDI_PI_RELE(pip);
   1288  10696      David 				break;
   1289  10696      David 			} else {
   1290  10696      David 				/*
   1291  10696      David 				 * tryenter failed. Try to grab again
   1292  10696      David 				 * after a small delay
   1293  10696      David 				 */
   1294  10696      David 				MDI_PI_HOLD(pip);
   1295  10696      David 				MDI_PI_UNLOCK(pip);
   1296  11052      Chris 				delay_random(mdi_delay);
   1297  10696      David 				MDI_PI_LOCK(pip);
   1298  10696      David 				MDI_PI_RELE(pip);
   1299  10696      David 			}
   1300      0     stevel 		}
   1301      0     stevel 	} else {
   1302      0     stevel 		MDI_CLIENT_LOCK(ct);
   1303      0     stevel 	}
   1304      0     stevel }
   1305      0     stevel 
   1306      0     stevel /*
   1307      0     stevel  * i_mdi_client_unlock():
   1308      0     stevel  *		Unlock a client component
   1309      0     stevel  */
   1310      0     stevel static void
   1311      0     stevel i_mdi_client_unlock(mdi_client_t *ct)
   1312      0     stevel {
   1313      0     stevel 	MDI_CLIENT_UNLOCK(ct);
   1314      0     stevel }
   1315      0     stevel 
   1316      0     stevel /*
   1317      0     stevel  * i_mdi_client_alloc():
   1318      0     stevel  * 		Allocate and initialize a client structure.  Caller should
   1319   2155        cth  *		hold the vhci client lock.
   1320      0     stevel  * Return Values:
   1321      0     stevel  *		Handle to a client component
   1322      0     stevel  */
   1323      0     stevel /*ARGSUSED*/
   1324      0     stevel static mdi_client_t *
   1325    878      ramat i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
   1326    878      ramat {
   1327    878      ramat 	mdi_client_t	*ct;
   1328      0     stevel 
   1329   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1330      0     stevel 
   1331      0     stevel 	/*
   1332      0     stevel 	 * Allocate and initialize a component structure.
   1333      0     stevel 	 */
   1334    878      ramat 	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
   1335      0     stevel 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
   1336      0     stevel 	ct->ct_hnext = NULL;
   1337      0     stevel 	ct->ct_hprev = NULL;
   1338      0     stevel 	ct->ct_dip = NULL;
   1339      0     stevel 	ct->ct_vhci = vh;
   1340    878      ramat 	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
   1341      0     stevel 	(void) strcpy(ct->ct_drvname, name);
   1342    878      ramat 	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
   1343      0     stevel 	(void) strcpy(ct->ct_guid, lguid);
   1344      0     stevel 	ct->ct_cprivate = NULL;
   1345      0     stevel 	ct->ct_vprivate = NULL;
   1346      0     stevel 	ct->ct_flags = 0;
   1347      0     stevel 	ct->ct_state = MDI_CLIENT_STATE_FAILED;
   1348   2155        cth 	MDI_CLIENT_LOCK(ct);
   1349      0     stevel 	MDI_CLIENT_SET_OFFLINE(ct);
   1350      0     stevel 	MDI_CLIENT_SET_DETACH(ct);
   1351      0     stevel 	MDI_CLIENT_SET_POWER_UP(ct);
   1352   2155        cth 	MDI_CLIENT_UNLOCK(ct);
   1353      0     stevel 	ct->ct_failover_flags = 0;
   1354      0     stevel 	ct->ct_failover_status = 0;
   1355      0     stevel 	cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
   1356      0     stevel 	ct->ct_unstable = 0;
   1357      0     stevel 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
   1358      0     stevel 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
   1359      0     stevel 	ct->ct_lb = vh->vh_lb;
   1360    878      ramat 	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
   1361      0     stevel 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
   1362      0     stevel 	ct->ct_path_count = 0;
   1363      0     stevel 	ct->ct_path_head = NULL;
   1364      0     stevel 	ct->ct_path_tail = NULL;
   1365      0     stevel 	ct->ct_path_last = NULL;
   1366      0     stevel 
   1367      0     stevel 	/*
   1368      0     stevel 	 * Add this client component to our client hash queue
   1369      0     stevel 	 */
   1370      0     stevel 	i_mdi_client_enlist_table(vh, ct);
   1371      0     stevel 	return (ct);
   1372      0     stevel }
   1373      0     stevel 
   1374      0     stevel /*
   1375      0     stevel  * i_mdi_client_enlist_table():
   1376      0     stevel  *		Attach the client device to the client hash table. Caller
   1377   2155        cth  *		should hold the vhci client lock.
   1378   2155        cth  */
   1379      0     stevel static void
   1380      0     stevel i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
   1381      0     stevel {
   1382      0     stevel 	int 			index;
   1383      0     stevel 	struct client_hash	*head;
   1384      0     stevel 
   1385   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1386   2155        cth 
   1387      0     stevel 	index = i_mdi_get_hash_key(ct->ct_guid);
   1388      0     stevel 	head = &vh->vh_client_table[index];
   1389      0     stevel 	ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
   1390      0     stevel 	head->ct_hash_head = ct;
   1391      0     stevel 	head->ct_hash_count++;
   1392      0     stevel 	vh->vh_client_count++;
   1393      0     stevel }
   1394      0     stevel 
   1395      0     stevel /*
   1396      0     stevel  * i_mdi_client_delist_table():
   1397      0     stevel  *		Attach the client device to the client hash table.
   1398   2155        cth  *		Caller should hold the vhci client lock.
   1399   2155        cth  */
   1400      0     stevel static void
   1401      0     stevel i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
   1402      0     stevel {
   1403      0     stevel 	int			index;
   1404      0     stevel 	char			*guid;
   1405      0     stevel 	struct client_hash 	*head;
   1406      0     stevel 	mdi_client_t		*next;
   1407      0     stevel 	mdi_client_t		*last;
   1408      0     stevel 
   1409   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1410   2155        cth 
   1411      0     stevel 	guid = ct->ct_guid;
   1412      0     stevel 	index = i_mdi_get_hash_key(guid);
   1413      0     stevel 	head = &vh->vh_client_table[index];
   1414      0     stevel 
   1415      0     stevel 	last = NULL;
   1416      0     stevel 	next = (mdi_client_t *)head->ct_hash_head;
   1417      0     stevel 	while (next != NULL) {
   1418      0     stevel 		if (next == ct) {
   1419      0     stevel 			break;
   1420      0     stevel 		}
   1421      0     stevel 		last = next;
   1422      0     stevel 		next = next->ct_hnext;
   1423      0     stevel 	}
   1424      0     stevel 
   1425      0     stevel 	if (next) {
   1426      0     stevel 		head->ct_hash_count--;
   1427      0     stevel 		if (last == NULL) {
   1428      0     stevel 			head->ct_hash_head = ct->ct_hnext;
   1429      0     stevel 		} else {
   1430      0     stevel 			last->ct_hnext = ct->ct_hnext;
   1431      0     stevel 		}
   1432      0     stevel 		ct->ct_hnext = NULL;
   1433      0     stevel 		vh->vh_client_count--;
   1434      0     stevel 	}
   1435      0     stevel }
   1436      0     stevel 
   1437      0     stevel 
   1438      0     stevel /*
   1439      0     stevel  * i_mdi_client_free():
   1440      0     stevel  *		Free a client component
   1441      0     stevel  */
   1442      0     stevel static int
   1443      0     stevel i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
   1444      0     stevel {
   1445      0     stevel 	int		rv = MDI_SUCCESS;
   1446      0     stevel 	int		flags = ct->ct_flags;
   1447      0     stevel 	dev_info_t	*cdip;
   1448      0     stevel 	dev_info_t	*vdip;
   1449      0     stevel 
   1450   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1451   2155        cth 
   1452      0     stevel 	vdip = vh->vh_dip;
   1453      0     stevel 	cdip = ct->ct_dip;
   1454      0     stevel 
   1455      0     stevel 	(void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
   1456      0     stevel 	DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
   1457      0     stevel 	DEVI(cdip)->devi_mdi_client = NULL;
   1458      0     stevel 
   1459      0     stevel 	/*
   1460      0     stevel 	 * Clear out back ref. to dev_info_t node
   1461      0     stevel 	 */
   1462      0     stevel 	ct->ct_dip = NULL;
   1463      0     stevel 
   1464      0     stevel 	/*
   1465      0     stevel 	 * Remove this client from our hash queue
   1466      0     stevel 	 */
   1467      0     stevel 	i_mdi_client_delist_table(vh, ct);
   1468      0     stevel 
   1469      0     stevel 	/*
   1470      0     stevel 	 * Uninitialize and free the component
   1471      0     stevel 	 */
   1472      0     stevel 	kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
   1473      0     stevel 	kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
   1474      0     stevel 	kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
   1475      0     stevel 	cv_destroy(&ct->ct_failover_cv);
   1476      0     stevel 	cv_destroy(&ct->ct_unstable_cv);
   1477      0     stevel 	cv_destroy(&ct->ct_powerchange_cv);
   1478      0     stevel 	mutex_destroy(&ct->ct_mutex);
   1479      0     stevel 	kmem_free(ct, sizeof (*ct));
   1480      0     stevel 
   1481      0     stevel 	if (cdip != NULL) {
   1482   2155        cth 		MDI_VHCI_CLIENT_UNLOCK(vh);
   1483      0     stevel 		(void) i_mdi_devinfo_remove(vdip, cdip, flags);
   1484   2155        cth 		MDI_VHCI_CLIENT_LOCK(vh);
   1485      0     stevel 	}
   1486      0     stevel 	return (rv);
   1487      0     stevel }
   1488      0     stevel 
   1489      0     stevel /*
   1490      0     stevel  * i_mdi_client_find():
   1491      0     stevel  * 		Find the client structure corresponding to a given guid
   1492   2155        cth  *		Caller should hold the vhci client lock.
   1493      0     stevel  */
   1494      0     stevel static mdi_client_t *
   1495    878      ramat i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
   1496      0     stevel {
   1497      0     stevel 	int			index;
   1498      0     stevel 	struct client_hash	*head;
   1499      0     stevel 	mdi_client_t		*ct;
   1500      0     stevel 
   1501   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
   1502   2155        cth 
   1503      0     stevel 	index = i_mdi_get_hash_key(guid);
   1504      0     stevel 	head = &vh->vh_client_table[index];
   1505      0     stevel 
   1506      0     stevel 	ct = head->ct_hash_head;
   1507      0     stevel 	while (ct != NULL) {
   1508    878      ramat 		if (strcmp(ct->ct_guid, guid) == 0 &&
   1509    878      ramat 		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
   1510      0     stevel 			break;
   1511      0     stevel 		}
   1512      0     stevel 		ct = ct->ct_hnext;
   1513      0     stevel 	}
   1514      0     stevel 	return (ct);
   1515      0     stevel }
   1516      0     stevel 
   1517      0     stevel /*
   1518      0     stevel  * i_mdi_client_update_state():
   1519      0     stevel  *		Compute and update client device state
   1520      0     stevel  * Notes:
   1521      0     stevel  *		A client device can be in any of three possible states:
   1522      0     stevel  *
   1523      0     stevel  *		MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
   1524      0     stevel  *		one online/standby paths. Can tolerate failures.
   1525      0     stevel  *		MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
   1526      0     stevel  *		no alternate paths available as standby. A failure on the online
   1527      0     stevel  *		would result in loss of access to device data.
   1528      0     stevel  *		MDI_CLIENT_STATE_FAILED - Client device in failed state with
   1529      0     stevel  *		no paths available to access the device.
   1530      0     stevel  */
   1531      0     stevel static void
   1532      0     stevel i_mdi_client_update_state(mdi_client_t *ct)
   1533      0     stevel {
   1534      0     stevel 	int state;
   1535   2155        cth 
   1536   2155        cth 	ASSERT(MDI_CLIENT_LOCKED(ct));
   1537      0     stevel 	state = i_mdi_client_compute_state(ct, NULL);
   1538      0     stevel 	MDI_CLIENT_SET_STATE(ct, state);
   1539      0     stevel }
   1540      0     stevel 
   1541      0     stevel /*
   1542      0     stevel  * i_mdi_client_compute_state():
   1543      0     stevel  *		Compute client device state
   1544      0     stevel  *
   1545      0     stevel  *		mdi_phci_t *	Pointer to pHCI structure which should
   1546      0     stevel  *				while computing the new value.  Used by
   1547      0     stevel  *				i_mdi_phci_offline() to find the new
   1548      0     stevel  *				client state after DR of a pHCI.
   1549      0     stevel  */
   1550      0     stevel static int
   1551      0     stevel i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
   1552      0     stevel {
   1553      0     stevel 	int		state;
   1554      0     stevel 	int		online_count = 0;
   1555      0     stevel 	int		standby_count = 0;
   1556      0     stevel 	mdi_pathinfo_t	*pip, *next;
   1557      0     stevel 
   1558   2155        cth 	ASSERT(MDI_CLIENT_LOCKED(ct));
   1559      0     stevel 	pip = ct->ct_path_head;
   1560      0     stevel 	while (pip != NULL) {
   1561      0     stevel 		MDI_PI_LOCK(pip);
   1562      0     stevel 		next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   1563      0     stevel 		if (MDI_PI(pip)->pi_phci == ph) {
   1564      0     stevel 			MDI_PI_UNLOCK(pip);
   1565      0     stevel 			pip = next;
   1566      0     stevel 			continue;
   1567      0     stevel 		}
   1568   2155        cth 
   1569      0     stevel 		if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
   1570      0     stevel 				== MDI_PATHINFO_STATE_ONLINE)
   1571      0     stevel 			online_count++;
   1572      0     stevel 		else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
   1573      0     stevel 				== MDI_PATHINFO_STATE_STANDBY)
   1574      0     stevel 			standby_count++;
   1575      0     stevel 		MDI_PI_UNLOCK(pip);
   1576      0     stevel 		pip = next;
   1577      0     stevel 	}
   1578      0     stevel 
   1579      0     stevel 	if (online_count == 0) {
   1580      0     stevel 		if (standby_count == 0) {
   1581      0     stevel 			state = MDI_CLIENT_STATE_FAILED;
   1582  10696      David 			MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
   1583  10696      David 			    "client state failed: ct = %p", (void *)ct));
   1584      0     stevel 		} else if (standby_count == 1) {
   1585      0     stevel 			state = MDI_CLIENT_STATE_DEGRADED;
   1586      0     stevel 		} else {
   1587      0     stevel 			state = MDI_CLIENT_STATE_OPTIMAL;
   1588      0     stevel 		}
   1589      0     stevel 	} else if (online_count == 1) {
   1590      0     stevel 		if (standby_count == 0) {
   1591      0     stevel 			state = MDI_CLIENT_STATE_DEGRADED;
   1592      0     stevel 		} else {
   1593      0     stevel 			state = MDI_CLIENT_STATE_OPTIMAL;
   1594      0     stevel 		}
   1595      0     stevel 	} else {
   1596      0     stevel 		state = MDI_CLIENT_STATE_OPTIMAL;
   1597      0     stevel 	}
   1598      0     stevel 	return (state);
   1599      0     stevel }
   1600      0     stevel 
   1601      0     stevel /*
   1602      0     stevel  * i_mdi_client2devinfo():
   1603      0     stevel  *		Utility function
   1604      0     stevel  */
   1605      0     stevel dev_info_t *
   1606      0     stevel i_mdi_client2devinfo(mdi_client_t *ct)
   1607      0     stevel {
   1608      0     stevel 	return (ct->ct_dip);
   1609      0     stevel }
   1610      0     stevel 
   1611      0     stevel /*
   1612      0     stevel  * mdi_client_path2_devinfo():
   1613      0     stevel  * 		Given the parent devinfo and child devfs pathname, search for
   1614      0     stevel  *		a valid devfs node handle.
   1615      0     stevel  */
   1616      0     stevel dev_info_t *
   1617      0     stevel mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
   1618      0     stevel {
   1619      0     stevel 	dev_info_t 	*cdip = NULL;
   1620      0     stevel 	dev_info_t 	*ndip = NULL;
   1621      0     stevel 	char		*temp_pathname;
   1622      0     stevel 	int		circular;
   1623      0     stevel 
   1624      0     stevel 	/*
   1625      0     stevel 	 * Allocate temp buffer
   1626      0     stevel 	 */
   1627      0     stevel 	temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
   1628      0     stevel 
   1629      0     stevel 	/*
   1630      0     stevel 	 * Lock parent against changes
   1631      0     stevel 	 */
   1632      0     stevel 	ndi_devi_enter(vdip, &circular);
   1633      0     stevel 	ndip = (dev_info_t *)DEVI(vdip)->devi_child;
   1634      0     stevel 	while ((cdip = ndip) != NULL) {
   1635      0     stevel 		ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
   1636      0     stevel 
   1637      0     stevel 		*temp_pathname = '\0';
   1638      0     stevel 		(void) ddi_pathname(cdip, temp_pathname);
   1639      0     stevel 		if (strcmp(temp_pathname, pathname) == 0) {
   1640      0     stevel 			break;
   1641      0     stevel 		}
   1642      0     stevel 	}
   1643      0     stevel 	/*
   1644      0     stevel 	 * Release devinfo lock
   1645      0     stevel 	 */
   1646      0     stevel 	ndi_devi_exit(vdip, circular);
   1647      0     stevel 
   1648      0     stevel 	/*
   1649      0     stevel 	 * Free the temp buffer
   1650      0     stevel 	 */
   1651      0     stevel 	kmem_free(temp_pathname, MAXPATHLEN);
   1652      0     stevel 	return (cdip);
   1653      0     stevel }
   1654      0     stevel 
   1655      0     stevel /*
   1656      0     stevel  * mdi_client_get_path_count():
   1657      0     stevel  * 		Utility function to get number of path information nodes
   1658      0     stevel  *		associated with a given client device.
   1659      0     stevel  */
   1660      0     stevel int
   1661      0     stevel mdi_client_get_path_count(dev_info_t *cdip)
   1662      0     stevel {
   1663      0     stevel 	mdi_client_t	*ct;
   1664      0     stevel 	int		count = 0;
   1665      0     stevel 
   1666      0     stevel 	ct = i_devi_get_client(cdip);
   1667      0     stevel 	if (ct != NULL) {
   1668      0     stevel 		count = ct->ct_path_count;
   1669      0     stevel 	}
   1670      0     stevel 	return (count);
   1671      0     stevel }
   1672      0     stevel 
   1673      0     stevel 
   1674      0     stevel /*
   1675      0     stevel  * i_mdi_get_hash_key():
   1676      0     stevel  * 		Create a hash using strings as keys
   1677      0     stevel  *
   1678      0     stevel  */
   1679      0     stevel static int
   1680      0     stevel i_mdi_get_hash_key(char *str)
   1681      0     stevel {
   1682      0     stevel 	uint32_t	g, hash = 0;
   1683      0     stevel 	char		*p;
   1684      0     stevel 
   1685      0     stevel 	for (p = str; *p != '\0'; p++) {
   1686      0     stevel 		g = *p;
   1687      0     stevel 		hash += g;
   1688      0     stevel 	}
   1689      0     stevel 	return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
   1690      0     stevel }
   1691      0     stevel 
   1692      0     stevel /*
   1693      0     stevel  * mdi_get_lb_policy():
   1694      0     stevel  * 		Get current load balancing policy for a given client device
   1695      0     stevel  */
   1696      0     stevel client_lb_t
   1697      0     stevel mdi_get_lb_policy(dev_info_t *cdip)
   1698      0     stevel {
   1699      0     stevel 	client_lb_t	lb = LOAD_BALANCE_NONE;
   1700      0     stevel 	mdi_client_t	*ct;
   1701      0     stevel 
   1702      0     stevel 	ct = i_devi_get_client(cdip);
   1703      0     stevel 	if (ct != NULL) {
   1704      0     stevel 		lb = ct->ct_lb;
   1705      0     stevel 	}
   1706      0     stevel 	return (lb);
   1707      0     stevel }
   1708      0     stevel 
   1709      0     stevel /*
   1710      0     stevel  * mdi_set_lb_region_size():
   1711      0     stevel  * 		Set current region size for the load-balance
   1712      0     stevel  */
   1713      0     stevel int
   1714      0     stevel mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
   1715      0     stevel {
   1716      0     stevel 	mdi_client_t	*ct;
   1717      0     stevel 	int		rv = MDI_FAILURE;
   1718      0     stevel 
   1719      0     stevel 	ct = i_devi_get_client(cdip);
   1720      0     stevel 	if (ct != NULL && ct->ct_lb_args != NULL) {
   1721      0     stevel 		ct->ct_lb_args->region_size = region_size;
   1722      0     stevel 		rv = MDI_SUCCESS;
   1723      0     stevel 	}
   1724      0     stevel 	return (rv);
   1725      0     stevel }
   1726      0     stevel 
   1727      0     stevel /*
   1728      0     stevel  * mdi_Set_lb_policy():
   1729      0     stevel  * 		Set current load balancing policy for a given client device
   1730      0     stevel  */
   1731      0     stevel int
   1732      0     stevel mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
   1733      0     stevel {
   1734      0     stevel 	mdi_client_t	*ct;
   1735      0     stevel 	int		rv = MDI_FAILURE;
   1736      0     stevel 
   1737      0     stevel 	ct = i_devi_get_client(cdip);
   1738      0     stevel 	if (ct != NULL) {
   1739      0     stevel 		ct->ct_lb = lb;
   1740      0     stevel 		rv = MDI_SUCCESS;
   1741      0     stevel 	}
   1742      0     stevel 	return (rv);
   1743      0     stevel }
   1744      0     stevel 
   1745      0     stevel /*
   1746      0     stevel  * mdi_failover():
   1747      0     stevel  *		failover function called by the vHCI drivers to initiate
   1748      0     stevel  *		a failover operation.  This is typically due to non-availability
   1749      0     stevel  *		of online paths to route I/O requests.  Failover can be
   1750      0     stevel  *		triggered through user application also.
   1751      0     stevel  *
   1752      0     stevel  *		The vHCI driver calls mdi_failover() to initiate a failover
   1753      0     stevel  *		operation. mdi_failover() calls back into the vHCI driver's
   1754      0     stevel  *		vo_failover() entry point to perform the actual failover
   1755      0     stevel  *		operation.  The reason for requiring the vHCI driver to
   1756      0     stevel  *		initiate failover by calling mdi_failover(), instead of directly
   1757      0     stevel  *		executing vo_failover() itself, is to ensure that the mdi
   1758      0     stevel  *		framework can keep track of the client state properly.
   1759      0     stevel  *		Additionally, mdi_failover() provides as a convenience the
   1760      0     stevel  *		option of performing the failover operation synchronously or
   1761      0     stevel  *		asynchronously
   1762      0     stevel  *
   1763      0     stevel  *		Upon successful completion of the failover operation, the
   1764      0     stevel  *		paths that were previously ONLINE will be in the STANDBY state,
   1765      0     stevel  *		and the newly activated paths will be in the ONLINE state.
   1766      0     stevel  *
   1767      0     stevel  *		The flags modifier determines whether the activation is done
   1768      0     stevel  *		synchronously: MDI_FAILOVER_SYNC
   1769      0     stevel  * Return Values:
   1770      0     stevel  *		MDI_SUCCESS
   1771      0     stevel  *		MDI_FAILURE
   1772      0     stevel  *		MDI_BUSY
   1773      0     stevel  */
   1774      0     stevel /*ARGSUSED*/
   1775      0     stevel int
   1776      0     stevel mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
   1777      0     stevel {
   1778      0     stevel 	int			rv;
   1779      0     stevel 	mdi_client_t		*ct;
   1780      0     stevel 
   1781      0     stevel 	ct = i_devi_get_client(cdip);
   1782      0     stevel 	ASSERT(ct != NULL);
   1783      0     stevel 	if (ct == NULL) {
   1784      0     stevel 		/* cdip is not a valid client device. Nothing more to do. */
   1785      0     stevel 		return (MDI_FAILURE);
   1786      0     stevel 	}
   1787      0     stevel 
   1788      0     stevel 	MDI_CLIENT_LOCK(ct);
   1789      0     stevel 
   1790      0     stevel 	if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
   1791      0     stevel 		/* A path to the client is being freed */
   1792      0     stevel 		MDI_CLIENT_UNLOCK(ct);
   1793      0     stevel 		return (MDI_BUSY);
   1794      0     stevel 	}
   1795      0     stevel 
   1796      0     stevel 
   1797      0     stevel 	if (MDI_CLIENT_IS_FAILED(ct)) {
   1798      0     stevel 		/*
   1799      0     stevel 		 * Client is in failed state. Nothing more to do.
   1800      0     stevel 		 */
   1801      0     stevel 		MDI_CLIENT_UNLOCK(ct);
   1802      0     stevel 		return (MDI_FAILURE);
   1803      0     stevel 	}
   1804      0     stevel 
   1805      0     stevel 	if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   1806      0     stevel 		/*
   1807      0     stevel 		 * Failover is already in progress; return BUSY
   1808      0     stevel 		 */
   1809      0     stevel 		MDI_CLIENT_UNLOCK(ct);
   1810      0     stevel 		return (MDI_BUSY);
   1811      0     stevel 	}
   1812      0     stevel 	/*
   1813      0     stevel 	 * Make sure that mdi_pathinfo node state changes are processed.
   1814      0     stevel 	 * We do not allow failovers to progress while client path state
   1815      0     stevel 	 * changes are in progress
   1816      0     stevel 	 */
   1817      0     stevel 	if (ct->ct_unstable) {
   1818      0     stevel 		if (flags == MDI_FAILOVER_ASYNC) {
   1819      0     stevel 			MDI_CLIENT_UNLOCK(ct);
   1820      0     stevel 			return (MDI_BUSY);
   1821      0     stevel 		} else {
   1822      0     stevel 			while (ct->ct_unstable)
   1823      0     stevel 				cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
   1824      0     stevel 		}
   1825      0     stevel 	}
   1826      0     stevel 
   1827      0     stevel 	/*
   1828      0     stevel 	 * Client device is in stable state. Before proceeding, perform sanity
   1829      0     stevel 	 * checks again.
   1830      0     stevel 	 */
   1831      0     stevel 	if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
   1832   1333        cth 	    (!i_ddi_devi_attached(ct->ct_dip))) {
   1833      0     stevel 		/*
   1834      0     stevel 		 * Client is in failed state. Nothing more to do.
   1835      0     stevel 		 */
   1836      0     stevel 		MDI_CLIENT_UNLOCK(ct);
   1837      0     stevel 		return (MDI_FAILURE);
   1838      0     stevel 	}
   1839      0     stevel 
   1840      0     stevel 	/*
   1841      0     stevel 	 * Set the client state as failover in progress.
   1842      0     stevel 	 */
   1843      0     stevel 	MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
   1844      0     stevel 	ct->ct_failover_flags = flags;
   1845      0     stevel 	MDI_CLIENT_UNLOCK(ct);
   1846      0     stevel 
   1847      0     stevel 	if (flags == MDI_FAILOVER_ASYNC) {
   1848      0     stevel 		/*
   1849      0     stevel 		 * Submit the initiate failover request via CPR safe
   1850      0     stevel 		 * taskq threads.
   1851      0     stevel 		 */
   1852      0     stevel 		(void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
   1853      0     stevel 		    ct, KM_SLEEP);
   1854      0     stevel 		return (MDI_ACCEPT);
   1855      0     stevel 	} else {
   1856      0     stevel 		/*
   1857      0     stevel 		 * Synchronous failover mode.  Typically invoked from the user
   1858      0     stevel 		 * land.
   1859      0     stevel 		 */
   1860      0     stevel 		rv = i_mdi_failover(ct);
   1861      0     stevel 	}
   1862      0     stevel 	return (rv);
   1863      0     stevel }
   1864      0     stevel 
   1865      0     stevel /*
   1866      0     stevel  * i_mdi_failover():
   1867      0     stevel  *		internal failover function. Invokes vHCI drivers failover
   1868      0     stevel  *		callback function and process the failover status
   1869      0     stevel  * Return Values:
   1870      0     stevel  *		None
   1871      0     stevel  *
   1872      0     stevel  * Note: A client device in failover state can not be detached or freed.
   1873      0     stevel  */
   1874      0     stevel static int
   1875      0     stevel i_mdi_failover(void *arg)
   1876      0     stevel {
   1877      0     stevel 	int		rv = MDI_SUCCESS;
   1878      0     stevel 	mdi_client_t	*ct = (mdi_client_t *)arg;
   1879      0     stevel 	mdi_vhci_t	*vh = ct->ct_vhci;
   1880      0     stevel 
   1881   2155        cth 	ASSERT(!MDI_CLIENT_LOCKED(ct));
   1882      0     stevel 
   1883      0     stevel 	if (vh->vh_ops->vo_failover != NULL) {
   1884      0     stevel 		/*
   1885      0     stevel 		 * Call vHCI drivers callback routine
   1886      0     stevel 		 */
   1887      0     stevel 		rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
   1888      0     stevel 		    ct->ct_failover_flags);
   1889      0     stevel 	}
   1890      0     stevel 
   1891      0     stevel 	MDI_CLIENT_LOCK(ct);
   1892      0     stevel 	MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
   1893      0     stevel 
   1894      0     stevel 	/*
   1895      0     stevel 	 * Save the failover return status
   1896      0     stevel 	 */
   1897      0     stevel 	ct->ct_failover_status = rv;
   1898      0     stevel 
   1899      0     stevel 	/*
   1900      0     stevel 	 * As a result of failover, client status would have been changed.
   1901      0     stevel 	 * Update the client state and wake up anyone waiting on this client
   1902      0     stevel 	 * device.
   1903      0     stevel 	 */
   1904      0     stevel 	i_mdi_client_update_state(ct);
   1905      0     stevel 
   1906      0     stevel 	cv_broadcast(&ct->ct_failover_cv);
   1907      0     stevel 	MDI_CLIENT_UNLOCK(ct);
   1908      0     stevel 	return (rv);
   1909      0     stevel }
   1910      0     stevel 
   1911      0     stevel /*
   1912      0     stevel  * Load balancing is logical block.
   1913      0     stevel  * IOs within the range described by region_size
   1914      0     stevel  * would go on the same path. This would improve the
   1915      0     stevel  * performance by cache-hit on some of the RAID devices.
   1916      0     stevel  * Search only for online paths(At some point we
   1917      0     stevel  * may want to balance across target ports).
   1918      0     stevel  * If no paths are found then default to round-robin.
   1919      0     stevel  */
   1920      0     stevel static int
   1921      0     stevel i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
   1922      0     stevel {
   1923      0     stevel 	int		path_index = -1;
   1924      0     stevel 	int		online_path_count = 0;
   1925      0     stevel 	int		online_nonpref_path_count = 0;
   1926      0     stevel 	int 		region_size = ct->ct_lb_args->region_size;
   1927      0     stevel 	mdi_pathinfo_t	*pip;
   1928      0     stevel 	mdi_pathinfo_t	*next;
   1929      0     stevel 	int		preferred, path_cnt;
   1930      0     stevel 
   1931      0     stevel 	pip = ct->ct_path_head;
   1932      0     stevel 	while (pip) {
   1933      0     stevel 		MDI_PI_LOCK(pip);
   1934      0     stevel 		if (MDI_PI(pip)->pi_state ==
   1935      0     stevel 		    MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
   1936      0     stevel 			online_path_count++;
   1937      0     stevel 		} else if (MDI_PI(pip)->pi_state ==
   1938      0     stevel 		    MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
   1939      0     stevel 			online_nonpref_path_count++;
   1940      0     stevel 		}
   1941      0     stevel 		next = (mdi_pathinfo_t *)
   1942      0     stevel 		    MDI_PI(pip)->pi_client_link;
   1943      0     stevel 		MDI_PI_UNLOCK(pip);
   1944      0     stevel 		pip = next;
   1945      0     stevel 	}
   1946      0     stevel 	/* if found any online/preferred then use this type */
   1947      0     stevel 	if (online_path_count > 0) {
   1948      0     stevel 		path_cnt = online_path_count;
   1949      0     stevel 		preferred = 1;
   1950      0     stevel 	} else if (online_nonpref_path_count > 0) {
   1951      0     stevel 		path_cnt = online_nonpref_path_count;
   1952      0     stevel 		preferred = 0;
   1953      0     stevel 	} else {
   1954      0     stevel 		path_cnt = 0;
   1955      0     stevel 	}
   1956      0     stevel 	if (path_cnt) {
   1957      0     stevel 		path_index = (bp->b_blkno >> region_size) % path_cnt;
   1958      0     stevel 		pip = ct->ct_path_head;
   1959      0     stevel 		while (pip && path_index != -1) {
   1960      0     stevel 			MDI_PI_LOCK(pip);
   1961      0     stevel 			if (path_index == 0 &&
   1962      0     stevel 			    (MDI_PI(pip)->pi_state ==
   1963      0     stevel 			    MDI_PATHINFO_STATE_ONLINE) &&
   1964      0     stevel 				MDI_PI(pip)->pi_preferred == preferred) {
   1965      0     stevel 				MDI_PI_HOLD(pip);
   1966      0     stevel 				MDI_PI_UNLOCK(pip);
   1967      0     stevel 				*ret_pip = pip;
   1968      0     stevel 				return (MDI_SUCCESS);
   1969      0     stevel 			}
   1970      0     stevel 			path_index --;
   1971      0     stevel 			next = (mdi_pathinfo_t *)
   1972      0     stevel 			    MDI_PI(pip)->pi_client_link;
   1973      0     stevel 			MDI_PI_UNLOCK(pip);
   1974      0     stevel 			pip = next;
   1975      0     stevel 		}
   1976  10696      David 		MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
   1977  10696      David 		    "lba %llx: path %s %p",
   1978  10696      David 		    bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
   1979      0     stevel 	}
   1980      0     stevel 	return (MDI_FAILURE);
   1981      0     stevel }
   1982      0     stevel 
   1983      0     stevel /*
   1984      0     stevel  * mdi_select_path():
   1985      0     stevel  *		select a path to access a client device.
   1986      0     stevel  *
   1987      0     stevel  *		mdi_select_path() function is called by the vHCI drivers to
   1988      0     stevel  *		select a path to route the I/O request to.  The caller passes
   1989      0     stevel  *		the block I/O data transfer structure ("buf") as one of the
   1990      0     stevel  *		parameters.  The mpxio framework uses the buf structure
   1991      0     stevel  *		contents to maintain per path statistics (total I/O size /
   1992      0     stevel  *		count pending).  If more than one online paths are available to
   1993      0     stevel  *		select, the framework automatically selects a suitable path
   1994      0     stevel  *		for routing I/O request. If a failover operation is active for
   1995      0     stevel  *		this client device the call shall be failed with MDI_BUSY error
   1996      0     stevel  *		code.
   1997      0     stevel  *
   1998      0     stevel  *		By default this function returns a suitable path in online
   1999      0     stevel  *		state based on the current load balancing policy.  Currently
   2000      0     stevel  *		we support LOAD_BALANCE_NONE (Previously selected online path
   2001      0     stevel  *		will continue to be used till the path is usable) and
   2002      0     stevel  *		LOAD_BALANCE_RR (Online paths will be selected in a round
   2003      0     stevel  *		robin fashion), LOAD_BALANCE_LB(Online paths will be selected
   2004      0     stevel  *		based on the logical block).  The load balancing
   2005      0     stevel  *		through vHCI drivers configuration file (driver.conf).
   2006      0     stevel  *
   2007      0     stevel  *		vHCI drivers may override this default behavior by specifying
   2008   6640        cth  *		appropriate flags.  The meaning of the thrid argument depends
   2009   6640        cth  *		on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
   2010   6640        cth  *		then the argument is the "path instance" of the path to select.
   2011   6640        cth  *		If MDI_SELECT_PATH_INSTANCE is not set then the argument is
   2012   6640        cth  *		"start_pip". A non NULL "start_pip" is the starting point to
   2013   6640        cth  *		walk and find the next appropriate path.  The following values
   2014   6640        cth  *		are currently defined: MDI_SELECT_ONLINE_PATH (to select an
   2015   6640        cth  *		ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
   2016   6640        cth  *		STANDBY path).
   2017      0     stevel  *
   2018      0     stevel  *		The non-standard behavior is used by the scsi_vhci driver,
   2019      0     stevel  *		whenever it has to use a STANDBY/FAULTED path.  Eg. during
   2020      0     stevel  *		attach of client devices (to avoid an unnecessary failover
   2021      0     stevel  *		when the STANDBY path comes up first), during failover
   2022      0     stevel  *		(to activate a STANDBY path as ONLINE).
   2023      0     stevel  *
   2024   2155        cth  *		The selected path is returned in a a mdi_hold_path() state
   2025   2155        cth  *		(pi_ref_cnt). Caller should release the hold by calling
   2026   2155        cth  *		mdi_rele_path().
   2027      0     stevel  *
   2028      0     stevel  * Return Values:
   2029      0     stevel  *		MDI_SUCCESS	- Completed successfully
   2030      0     stevel  *		MDI_BUSY 	- Client device is busy failing over
   2031      0     stevel  *		MDI_NOPATH	- Client device is online, but no valid path are
   2032      0     stevel  *				  available to access this client device
   2033      0     stevel  *		MDI_FAILURE	- Invalid client device or state
   2034      0     stevel  *		MDI_DEVI_ONLINING
   2035      0     stevel  *				- Client device (struct dev_info state) is in
   2036      0     stevel  *				  onlining state.
   2037      0     stevel  */
   2038      0     stevel 
   2039      0     stevel /*ARGSUSED*/
   2040      0     stevel int
   2041      0     stevel mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
   2042   6640        cth     void *arg, mdi_pathinfo_t **ret_pip)
   2043      0     stevel {
   2044      0     stevel 	mdi_client_t	*ct;
   2045      0     stevel 	mdi_pathinfo_t	*pip;
   2046      0     stevel 	mdi_pathinfo_t	*next;
   2047      0     stevel 	mdi_pathinfo_t	*head;
   2048      0     stevel 	mdi_pathinfo_t	*start;
   2049      0     stevel 	client_lb_t	lbp;	/* load balancing policy */
   2050      0     stevel 	int		sb = 1;	/* standard behavior */
   2051      0     stevel 	int		preferred = 1;	/* preferred path */
   2052      0     stevel 	int		cond, cont = 1;
   2053      0     stevel 	int		retry = 0;
   2054   6640        cth 	mdi_pathinfo_t	*start_pip;	/* request starting pathinfo */
   2055   6640        cth 	int		path_instance;	/* request specific path instance */
   2056   6640        cth 
   2057   6640        cth 	/* determine type of arg based on flags */
   2058   6640        cth 	if (flags & MDI_SELECT_PATH_INSTANCE) {
   2059   6640        cth 		path_instance = (int)(intptr_t)arg;
   2060   6640        cth 		start_pip = NULL;
   2061   6640        cth 	} else {
   2062   6640        cth 		path_instance = 0;
   2063   6640        cth 		start_pip = (mdi_pathinfo_t *)arg;
   2064   6640        cth 	}
   2065      0     stevel 
   2066      0     stevel 	if (flags != 0) {
   2067      0     stevel 		/*
   2068      0     stevel 		 * disable default behavior
   2069      0     stevel 		 */
   2070      0     stevel 		sb = 0;
   2071      0     stevel 	}
   2072      0     stevel 
   2073      0     stevel 	*ret_pip = NULL;
   2074      0     stevel 	ct = i_devi_get_client(cdip);
   2075      0     stevel 	if (ct == NULL) {
   2076      0     stevel 		/* mdi extensions are NULL, Nothing more to do */
   2077      0     stevel 		return (MDI_FAILURE);
   2078      0     stevel 	}
   2079      0     stevel 
   2080      0     stevel 	MDI_CLIENT_LOCK(ct);
   2081      0     stevel 
   2082      0     stevel 	if (sb) {
   2083      0     stevel 		if (MDI_CLIENT_IS_FAILED(ct)) {
   2084      0     stevel 			/*
   2085      0     stevel 			 * Client is not ready to accept any I/O requests.
   2086      0     stevel 			 * Fail this request.
   2087      0     stevel 			 */
   2088  10696      David 			MDI_DEBUG(2, (MDI_NOTE, cdip,
   2089  10696      David 			    "client state offline ct = %p", (void *)ct));
   2090      0     stevel 			MDI_CLIENT_UNLOCK(ct);
   2091      0     stevel 			return (MDI_FAILURE);
   2092      0     stevel 		}
   2093      0     stevel 
   2094      0     stevel 		if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
   2095      0     stevel 			/*
   2096      0     stevel 			 * Check for Failover is in progress. If so tell the
   2097      0     stevel 			 * caller that this device is busy.
   2098      0     stevel 			 */
   2099  10696      David 			MDI_DEBUG(2, (MDI_NOTE, cdip,
   2100  10696      David 			    "client failover in progress ct = %p",
   2101   2155        cth 			    (void *)ct));
   2102      0     stevel 			MDI_CLIENT_UNLOCK(ct);
   2103      0     stevel 			return (MDI_BUSY);
   2104      0     stevel 		}
   2105      0     stevel 
   2106      0     stevel 		/*
   2107      0     stevel 		 * Check to see whether the client device is attached.
   2108      0     stevel 		 * If not so, let the vHCI driver manually select a path
   2109      0     stevel 		 * (standby) and let the probe/attach process to continue.
   2110      0     stevel 		 */
   2111   1333        cth 		if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
   2112  10696      David 			MDI_DEBUG(4, (MDI_NOTE, cdip,
   2113  10696      David 			    "devi is onlining ct = %p", (void *)ct));
   2114      0     stevel 			MDI_CLIENT_UNLOCK(ct);
   2115      0     stevel 			return (MDI_DEVI_ONLINING);
   2116      0     stevel 		}
   2117      0     stevel 	}
   2118      0     stevel 
   2119      0     stevel 	/*
   2120      0     stevel 	 * Cache in the client list head.  If head of the list is NULL
   2121      0     stevel 	 * return MDI_NOPATH
   2122      0     stevel 	 */
   2123      0     stevel 	head = ct->ct_path_head;
   2124      0     stevel 	if (head == NULL) {
   2125      0     stevel 		MDI_CLIENT_UNLOCK(ct);
   2126      0     stevel 		return (MDI_NOPATH);
   2127   6640        cth 	}
   2128   6640        cth 
   2129   6640        cth 	/* Caller is specifying a specific pathinfo path by path_instance */
   2130   6640        cth 	if (path_instance) {
   2131   6640        cth 		/* search for pathinfo with correct path_instance */
   2132   6640        cth 		for (pip = head;
   2133   6640        cth 		    pip && (mdi_pi_get_path_instance(pip) != path_instance);
   2134   6640        cth 		    pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
   2135   6640        cth 			;
   2136   6640        cth 
   2137  10726     Ramana 		/* If path can't be selected then MDI_NOPATH is returned. */
   2138   6640        cth 		if (pip == NULL) {
   2139   6640        cth 			MDI_CLIENT_UNLOCK(ct);
   2140  10726     Ramana 			return (MDI_NOPATH);
   2141   6640        cth 		}
   2142   6640        cth 
   2143  10696      David 		/*
   2144  10696      David 		 * Verify state of path. When asked to select a specific
   2145  10696      David 		 * path_instance, we select the requested path in any
   2146  10696      David 		 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
   2147  10696      David 		 * We don't however select paths where the pHCI has detached.
   2148  10696      David 		 * NOTE: last pathinfo node of an opened client device may
   2149  10696      David 		 * exist in an OFFLINE state after the pHCI associated with
   2150  10696      David 		 * that path has detached (but pi_phci will be NULL if that
   2151  10696      David 		 * has occurred).
   2152  10696      David 		 */
   2153  10696      David 		MDI_PI_LOCK(pip);
   2154  10696      David 		if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
   2155  10696      David 		    (MDI_PI(pip)->pi_phci == NULL)) {
   2156   6640        cth 			MDI_PI_UNLOCK(pip);
   2157   6640        cth 			MDI_CLIENT_UNLOCK(ct);
   2158   6640        cth 			return (MDI_FAILURE);
   2159  10726     Ramana 		}
   2160  10726     Ramana 
   2161  10726     Ramana 		/* Return MDI_BUSY if we have a transient condition */
   2162  10726     Ramana 		if (MDI_PI_IS_TRANSIENT(pip)) {
   2163  10726     Ramana 			MDI_PI_UNLOCK(pip);
   2164  10726     Ramana 			MDI_CLIENT_UNLOCK(ct);
   2165  10726     Ramana 			return (MDI_BUSY);
   2166   6640        cth 		}
   2167   6640        cth 
   2168   6640        cth 		/*
   2169   6640        cth 		 * Return the path in hold state. Caller should release the
   2170   6640        cth 		 * lock by calling mdi_rele_path()
   2171   6640        cth 		 */
   2172   6640        cth 		MDI_PI_HOLD(pip);
   2173   6640        cth 		MDI_PI_UNLOCK(pip);
   2174   6640        cth 		*ret_pip = pip;
   2175   6640        cth 		MDI_CLIENT_UNLOCK(ct);
   2176   6640        cth 		return (MDI_SUCCESS);
   2177      0     stevel 	}
   2178      0     stevel 
   2179      0     stevel 	/*
   2180      0     stevel 	 * for non default behavior, bypass current
   2181      0     stevel 	 * load balancing policy and always use LOAD_BALANCE_RR
   2182      0     stevel 	 * except that the start point will be adjusted based
   2183      0     stevel 	 * on the provided start_pip
   2184      0     stevel 	 */
   2185      0     stevel 	lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
   2186      0     stevel 
   2187      0     stevel 	switch (lbp) {
   2188      0     stevel 	case LOAD_BALANCE_NONE:
   2189      0     stevel 		/*
   2190      0     stevel 		 * Load balancing is None  or Alternate path mode
   2191      0     stevel 		 * Start looking for a online mdi_pathinfo node starting from
   2192      0     stevel 		 * last known selected path
   2193      0     stevel 		 */
   2194      0     stevel 		preferred = 1;
   2195      0     stevel 		pip = (mdi_pathinfo_t *)ct->ct_path_last;
   2196      0     stevel 		if (pip == NULL) {
   2197      0     stevel 			pip = head;
   2198      0     stevel 		}
   2199      0     stevel 		start = pip;
   2200      0     stevel 		do {
   2201      0     stevel 			MDI_PI_LOCK(pip);
   2202      0     stevel 			/*
   2203      0     stevel 			 * No need to explicitly check if the path is disabled.
   2204      0     stevel 			 * Since we are checking for state == ONLINE and the
   2205   9167    Randall 			 * same variable is used for DISABLE/ENABLE information.
   2206      0     stevel 			 */
   2207   1909   cm136836 			if ((MDI_PI(pip)->pi_state  ==
   2208   1909   cm136836 				MDI_PATHINFO_STATE_ONLINE) &&
   2209      0     stevel 				preferred == MDI_PI(pip)->pi_preferred) {
   2210      0     stevel 				/*
   2211      0     stevel 				 * Return the path in hold state. Caller should
   2212      0     stevel 				 * release the lock by calling mdi_rele_path()
   2213      0     stevel 				 */
   2214      0     stevel 				MDI_PI_HOLD(pip);
   2215      0     stevel 				MDI_PI_UNLOCK(pip);
   2216      0     stevel 				ct->ct_path_last = pip;
   2217      0     stevel 				*ret_pip = pip;
   2218      0     stevel 				MDI_CLIENT_UNLOCK(ct);
   2219      0     stevel 				return (MDI_SUCCESS);
   2220      0     stevel 			}
   2221      0     stevel 
   2222      0     stevel 			/*
   2223      0     stevel 			 * Path is busy.
   2224      0     stevel 			 */
   2225      0     stevel 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
   2226      0     stevel 			    MDI_PI_IS_TRANSIENT(pip))
   2227      0     stevel 				retry = 1;
   2228      0     stevel 			/*
   2229      0     stevel 			 * Keep looking for a next available online path
   2230      0     stevel 			 */
   2231      0     stevel 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2232      0     stevel 			if (next == NULL) {
   2233      0     stevel 				next = head;
   2234      0     stevel 			}
   2235      0     stevel 			MDI_PI_UNLOCK(pip);
   2236      0     stevel 			pip = next;
   2237      0     stevel 			if (start == pip && preferred) {
   2238      0     stevel 				preferred = 0;
   2239      0     stevel 			} else if (start == pip && !preferred) {
   2240      0     stevel 				cont = 0;
   2241      0     stevel 			}
   2242      0     stevel 		} while (cont);
   2243      0     stevel 		break;
   2244      0     stevel 
   2245      0     stevel 	case LOAD_BALANCE_LBA:
   2246      0     stevel 		/*
   2247      0     stevel 		 * Make sure we are looking
   2248      0     stevel 		 * for an online path. Otherwise, if it is for a STANDBY
   2249      0     stevel 		 * path request, it will go through and fetch an ONLINE
   2250      0     stevel 		 * path which is not desirable.
   2251      0     stevel 		 */
   2252      0     stevel 		if ((ct->ct_lb_args != NULL) &&
   2253      0     stevel 			    (ct->ct_lb_args->region_size) && bp &&
   2254      0     stevel 				(sb || (flags == MDI_SELECT_ONLINE_PATH))) {
   2255      0     stevel 			if (i_mdi_lba_lb(ct, ret_pip, bp)
   2256      0     stevel 				    == MDI_SUCCESS) {
   2257      0     stevel 				MDI_CLIENT_UNLOCK(ct);
   2258      0     stevel 				return (MDI_SUCCESS);
   2259      0     stevel 			}
   2260      0     stevel 		}
   2261  10696      David 		/* FALLTHROUGH */
   2262      0     stevel 	case LOAD_BALANCE_RR:
   2263      0     stevel 		/*
   2264      0     stevel 		 * Load balancing is Round Robin. Start looking for a online
   2265      0     stevel 		 * mdi_pathinfo node starting from last known selected path
   2266      0     stevel 		 * as the start point.  If override flags are specified,
   2267      0     stevel 		 * process accordingly.
   2268      0     stevel 		 * If the search is already in effect(start_pip not null),
   2269      0     stevel 		 * then lets just use the same path preference to continue the
   2270      0     stevel 		 * traversal.
   2271      0     stevel 		 */
   2272      0     stevel 
   2273      0     stevel 		if (start_pip != NULL) {
   2274      0     stevel 			preferred = MDI_PI(start_pip)->pi_preferred;
   2275      0     stevel 		} else {
   2276      0     stevel 			preferred = 1;
   2277      0     stevel 		}
   2278      0     stevel 
   2279      0     stevel 		start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
   2280      0     stevel 		if (start == NULL) {
   2281      0     stevel 			pip = head;
   2282      0     stevel 		} else {
   2283      0     stevel 			pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
   2284      0     stevel 			if (pip == NULL) {
   2285   7461  Sheshadri 				if ( flags & MDI_SELECT_NO_PREFERRED) {
   2286   7461  Sheshadri 					/*
   2287   7461  Sheshadri 					 * Return since we hit the end of list
   2288   7461  Sheshadri 					 */
   2289   7461  Sheshadri 					MDI_CLIENT_UNLOCK(ct);
   2290   7461  Sheshadri 					return (MDI_NOPATH);
   2291   7461  Sheshadri 				}
   2292   7461  Sheshadri 
   2293      0     stevel 				if (!sb) {
   2294      0     stevel 					if (preferred == 0) {
   2295      0     stevel 						/*
   2296      0     stevel 						 * Looks like we have completed
   2297      0     stevel 						 * the traversal as preferred
   2298      0     stevel 						 * value is 0. Time to bail out.
   2299      0     stevel 						 */
   2300      0     stevel 						*ret_pip = NULL;
   2301      0     stevel 						MDI_CLIENT_UNLOCK(ct);
   2302      0     stevel 						return (MDI_NOPATH);
   2303      0     stevel 					} else {
   2304      0     stevel 						/*
   2305      0     stevel 						 * Looks like we reached the
   2306      0     stevel 						 * end of the list. Lets enable
   2307      0     stevel 						 * traversal of non preferred
   2308      0     stevel 						 * paths.
   2309      0     stevel 						 */
   2310      0     stevel 						preferred = 0;
   2311      0     stevel 					}
   2312      0     stevel 				}
   2313      0     stevel 				pip = head;
   2314      0     stevel 			}
   2315      0     stevel 		}
   2316      0     stevel 		start = pip;
   2317      0     stevel 		do {
   2318      0     stevel 			MDI_PI_LOCK(pip);
   2319      0     stevel 			if (sb) {
   2320      0     stevel 				cond = ((MDI_PI(pip)->pi_state ==
   2321      0     stevel 				    MDI_PATHINFO_STATE_ONLINE &&
   2322      0     stevel 					MDI_PI(pip)->pi_preferred ==
   2323      0     stevel 						preferred) ? 1 : 0);
   2324      0     stevel 			} else {
   2325      0     stevel 				if (flags == MDI_SELECT_ONLINE_PATH) {
   2326      0     stevel 					cond = ((MDI_PI(pip)->pi_state ==
   2327      0     stevel 					    MDI_PATHINFO_STATE_ONLINE &&
   2328      0     stevel 						MDI_PI(pip)->pi_preferred ==
   2329      0     stevel 						preferred) ? 1 : 0);
   2330      0     stevel 				} else if (flags == MDI_SELECT_STANDBY_PATH) {
   2331      0     stevel 					cond = ((MDI_PI(pip)->pi_state ==
   2332      0     stevel 					    MDI_PATHINFO_STATE_STANDBY &&
   2333      0     stevel 						MDI_PI(pip)->pi_preferred ==
   2334      0     stevel 						preferred) ? 1 : 0);
   2335      0     stevel 				} else if (flags == (MDI_SELECT_ONLINE_PATH |
   2336      0     stevel 				    MDI_SELECT_STANDBY_PATH)) {
   2337      0     stevel 					cond = (((MDI_PI(pip)->pi_state ==
   2338      0     stevel 					    MDI_PATHINFO_STATE_ONLINE ||
   2339      0     stevel 					    (MDI_PI(pip)->pi_state ==
   2340      0     stevel 					    MDI_PATHINFO_STATE_STANDBY)) &&
   2341   1909   cm136836 						MDI_PI(pip)->pi_preferred ==
   2342   1909   cm136836 						preferred) ? 1 : 0);
   2343   1909   cm136836 				} else if (flags ==
   2344   1909   cm136836 					(MDI_SELECT_STANDBY_PATH |
   2345   1909   cm136836 					MDI_SELECT_ONLINE_PATH |
   2346   1909   cm136836 					MDI_SELECT_USER_DISABLE_PATH)) {
   2347   1909   cm136836 					cond = (((MDI_PI(pip)->pi_state ==
   2348   1909   cm136836 					    MDI_PATHINFO_STATE_ONLINE ||
   2349   1909   cm136836 					    (MDI_PI(pip)->pi_state ==
   2350   1909   cm136836 					    MDI_PATHINFO_STATE_STANDBY) ||
   2351   1909   cm136836 						(MDI_PI(pip)->pi_state ==
   2352   1909   cm136836 					    (MDI_PATHINFO_STATE_ONLINE|
   2353   1909   cm136836 					    MDI_PATHINFO_STATE_USER_DISABLE)) ||
   2354   1909   cm136836 						(MDI_PI(pip)->pi_state ==
   2355   1909   cm136836 					    (MDI_PATHINFO_STATE_STANDBY |
   2356   1909   cm136836 					    MDI_PATHINFO_STATE_USER_DISABLE)))&&
   2357      0     stevel 						MDI_PI(pip)->pi_preferred ==
   2358      0     stevel 						preferred) ? 1 : 0);
   2359   7461  Sheshadri 				} else if (flags ==
   2360   7461  Sheshadri 				    (MDI_SELECT_STANDBY_PATH |
   2361   7461  Sheshadri 				    MDI_SELECT_ONLINE_PATH |
   2362   7461  Sheshadri 				    MDI_SELECT_NO_PREFERRED)) {
   2363   7461  Sheshadri 					cond = (((MDI_PI(pip)->pi_state ==
   2364   7461  Sheshadri 					    MDI_PATHINFO_STATE_ONLINE) ||
   2365   7461  Sheshadri 					    (MDI_PI(pip)->pi_state ==
   2366   7461  Sheshadri 					    MDI_PATHINFO_STATE_STANDBY))
   2367   7461  Sheshadri 					    ? 1 : 0);
   2368      0     stevel 				} else {
   2369      0     stevel 					cond = 0;
   2370      0     stevel 				}
   2371      0     stevel 			}
   2372      0     stevel 			/*
   2373      0     stevel 			 * No need to explicitly check if the path is disabled.
   2374      0     stevel 			 * Since we are checking for state == ONLINE and the
   2375   9167    Randall 			 * same variable is used for DISABLE/ENABLE information.
   2376      0     stevel 			 */
   2377      0     stevel 			if (cond) {
   2378      0     stevel 				/*
   2379      0     stevel 				 * Return the path in hold state. Caller should
   2380      0     stevel 				 * release the lock by calling mdi_rele_path()
   2381      0     stevel 				 */
   2382      0     stevel 				MDI_PI_HOLD(pip);
   2383      0     stevel 				MDI_PI_UNLOCK(pip);
   2384      0     stevel 				if (sb)
   2385      0     stevel 					ct->ct_path_last = pip;
   2386      0     stevel 				*ret_pip = pip;
   2387      0     stevel 				MDI_CLIENT_UNLOCK(ct);
   2388      0     stevel 				return (MDI_SUCCESS);
   2389      0     stevel 			}
   2390      0     stevel 			/*
   2391      0     stevel 			 * Path is busy.
   2392      0     stevel 			 */
   2393      0     stevel 			if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
   2394      0     stevel 			    MDI_PI_IS_TRANSIENT(pip))
   2395      0     stevel 				retry = 1;
   2396      0     stevel 
   2397      0     stevel 			/*
   2398      0     stevel 			 * Keep looking for a next available online path
   2399      0     stevel 			 */
   2400      0     stevel do_again:
   2401      0     stevel 			next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2402      0     stevel 			if (next == NULL) {
   2403   7461  Sheshadri 				if ( flags & MDI_SELECT_NO_PREFERRED) {
   2404   7461  Sheshadri 					/*
   2405   7461  Sheshadri 					 * Bail out since we hit the end of list
   2406   7461  Sheshadri 					 */
   2407   7461  Sheshadri 					MDI_PI_UNLOCK(pip);
   2408   7461  Sheshadri 					break;
   2409   7461  Sheshadri 				}
   2410   7461  Sheshadri 
   2411      0     stevel 				if (!sb) {
   2412      0     stevel 					if (preferred == 1) {
   2413      0     stevel 						/*
   2414      0     stevel 						 * Looks like we reached the
   2415      0     stevel 						 * end of the list. Lets enable
   2416      0     stevel 						 * traversal of non preferred
   2417      0     stevel 						 * paths.
   2418      0     stevel 						 */
   2419      0     stevel 						preferred = 0;
   2420      0     stevel 						next = head;
   2421      0     stevel 					} else {
   2422      0     stevel 						/*
   2423      0     stevel 						 * We have done both the passes
   2424      0     stevel 						 * Preferred as well as for
   2425      0     stevel 						 * Non-preferred. Bail out now.
   2426      0     stevel 						 */
   2427      0     stevel 						cont = 0;
   2428      0     stevel 					}
   2429      0     stevel 				} else {
   2430      0     stevel 					/*
   2431      0     stevel 					 * Standard behavior case.
   2432      0     stevel 					 */
   2433      0     stevel 					next = head;
   2434      0     stevel 				}
   2435      0     stevel 			}
   2436      0     stevel 			MDI_PI_UNLOCK(pip);
   2437      0     stevel 			if (cont == 0) {
   2438      0     stevel 				break;
   2439      0     stevel 			}
   2440      0     stevel 			pip = next;
   2441      0     stevel 
   2442      0     stevel 			if (!sb) {
   2443      0     stevel 				/*
   2444      0     stevel 				 * We need to handle the selection of
   2445      0     stevel 				 * non-preferred path in the following
   2446      0     stevel 				 * case:
   2447      0     stevel 				 *
   2448      0     stevel 				 * +------+   +------+   +------+   +-----+
   2449      0     stevel 				 * | A : 1| - | B : 1| - | C : 0| - |NULL |
   2450      0     stevel 				 * +------+   +------+   +------+   +-----+
   2451      0     stevel 				 *
   2452      0     stevel 				 * If we start the search with B, we need to
   2453      0     stevel 				 * skip beyond B to pick C which is non -
   2454      0     stevel 				 * preferred in the second pass. The following
   2455      0     stevel 				 * test, if true, will allow us to skip over
   2456      0     stevel 				 * the 'start'(B in the example) to select
   2457      0     stevel 				 * other non preferred elements.
   2458      0     stevel 				 */
   2459      0     stevel 				if ((start_pip != NULL) && (start_pip == pip) &&
   2460      0     stevel 				    (MDI_PI(start_pip)->pi_preferred
   2461      0     stevel 				    != preferred)) {
   2462      0     stevel 					/*
   2463      0     stevel 					 * try again after going past the start
   2464      0     stevel 					 * pip
   2465      0     stevel 					 */
   2466      0     stevel 					MDI_PI_LOCK(pip);
   2467      0     stevel 					goto do_again;
   2468      0     stevel 				}
   2469      0     stevel 			} else {
   2470      0     stevel 				/*
   2471      0     stevel 				 * Standard behavior case
   2472      0     stevel 				 */
   2473      0     stevel 				if (start == pip && preferred) {
   2474      0     stevel 					/* look for nonpreferred paths */
   2475      0     stevel 					preferred = 0;
   2476      0     stevel 				} else if (start == pip && !preferred) {
   2477      0     stevel 					/*
   2478      0     stevel 					 * Exit condition
   2479      0     stevel 					 */
   2480      0     stevel 					cont = 0;
   2481      0     stevel 				}
   2482      0     stevel 			}
   2483      0     stevel 		} while (cont);
   2484      0     stevel 		break;
   2485      0     stevel 	}
   2486      0     stevel 
   2487      0     stevel 	MDI_CLIENT_UNLOCK(ct);
   2488      0     stevel 	if (retry == 1) {
   2489      0     stevel 		return (MDI_BUSY);
   2490      0     stevel 	} else {
   2491      0     stevel 		return (MDI_NOPATH);
   2492      0     stevel 	}
   2493      0     stevel }
   2494      0     stevel 
   2495      0     stevel /*
   2496      0     stevel  * For a client, return the next available path to any phci
   2497      0     stevel  *
   2498      0     stevel  * Note:
   2499      0     stevel  *		Caller should hold the branch's devinfo node to get a consistent
   2500      0     stevel  *		snap shot of the mdi_pathinfo nodes.
   2501      0     stevel  *
   2502      0     stevel  *		Please note that even the list is stable the mdi_pathinfo
   2503      0     stevel  *		node state and properties are volatile.  The caller should lock
   2504      0     stevel  *		and unlock the nodes by calling mdi_pi_lock() and
   2505      0     stevel  *		mdi_pi_unlock() functions to get a stable properties.
   2506      0     stevel  *
   2507      0     stevel  *		If there is a need to use the nodes beyond the hold of the
   2508      0     stevel  *		devinfo node period (For ex. I/O), then mdi_pathinfo node
   2509      0     stevel  *		need to be held against unexpected removal by calling
   2510      0     stevel  *		mdi_hold_path() and should be released by calling
   2511      0     stevel  *		mdi_rele_path() on completion.
   2512      0     stevel  */
   2513      0     stevel mdi_pathinfo_t *
   2514      0     stevel mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
   2515      0     stevel {
   2516      0     stevel 	mdi_client_t *ct;
   2517      0     stevel 
   2518      0     stevel 	if (!MDI_CLIENT(ct_dip))
   2519      0     stevel 		return (NULL);
   2520      0     stevel 
   2521      0     stevel 	/*
   2522      0     stevel 	 * Walk through client link
   2523      0     stevel 	 */
   2524      0     stevel 	ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
   2525      0     stevel 	ASSERT(ct != NULL);
   2526      0     stevel 
   2527      0     stevel 	if (pip == NULL)
   2528      0     stevel 		return ((mdi_pathinfo_t *)ct->ct_path_head);
   2529      0     stevel 
   2530      0     stevel 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
   2531      0     stevel }
   2532      0     stevel 
   2533      0     stevel /*
   2534      0     stevel  * For a phci, return the next available path to any client
   2535      0     stevel  * Note: ditto mdi_get_next_phci_path()
   2536      0     stevel  */
   2537      0     stevel mdi_pathinfo_t *
   2538      0     stevel mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
   2539      0     stevel {
   2540      0     stevel 	mdi_phci_t *ph;
   2541      0     stevel 
   2542      0     stevel 	if (!MDI_PHCI(ph_dip))
   2543      0     stevel 		return (NULL);
   2544      0     stevel 
   2545      0     stevel 	/*
   2546      0     stevel 	 * Walk through pHCI link
   2547      0     stevel 	 */
   2548      0     stevel 	ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
   2549      0     stevel 	ASSERT(ph != NULL);
   2550      0     stevel 
   2551      0     stevel 	if (pip == NULL)
   2552      0     stevel 		return ((mdi_pathinfo_t *)ph->ph_path_head);
   2553      0     stevel 
   2554      0     stevel 	return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
   2555      0     stevel }
   2556      0     stevel 
   2557      0     stevel /*
   2558      0     stevel  * mdi_hold_path():
   2559      0     stevel  *		Hold the mdi_pathinfo node against unwanted unexpected free.
   2560      0     stevel  * Return Values:
   2561      0     stevel  *		None
   2562      0     stevel  */
   2563      0     stevel void
   2564      0     stevel mdi_hold_path(mdi_pathinfo_t *pip)
   2565      0     stevel {
   2566      0     stevel 	if (pip) {
   2567      0     stevel 		MDI_PI_LOCK(pip);
   2568      0     stevel 		MDI_PI_HOLD(pip);
   2569      0     stevel 		MDI_PI_UNLOCK(pip);
   2570      0     stevel 	}
   2571      0     stevel }
   2572      0     stevel 
   2573      0     stevel 
   2574      0     stevel /*
   2575      0     stevel  * mdi_rele_path():
   2576      0     stevel  *		Release the mdi_pathinfo node which was selected
   2577      0     stevel  *		through mdi_select_path() mechanism or manually held by
   2578      0     stevel  *		calling mdi_hold_path().
   2579      0     stevel  * Return Values:
   2580      0     stevel  *		None
   2581      0     stevel  */
   2582      0     stevel void
   2583      0     stevel mdi_rele_path(mdi_pathinfo_t *pip)
   2584      0     stevel {
   2585      0     stevel 	if (pip) {
   2586      0     stevel 		MDI_PI_LOCK(pip);
   2587      0     stevel 		MDI_PI_RELE(pip);
   2588      0     stevel 		if (MDI_PI(pip)->pi_ref_cnt == 0) {
   2589      0     stevel 			cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
   2590      0     stevel 		}
   2591      0     stevel 		MDI_PI_UNLOCK(pip);
   2592      0     stevel 	}
   2593      0     stevel }
   2594      0     stevel 
   2595      0     stevel /*
   2596      0     stevel  * mdi_pi_lock():
   2597      0     stevel  * 		Lock the mdi_pathinfo node.
   2598      0     stevel  * Note:
   2599      0     stevel  *		The caller should release the lock by calling mdi_pi_unlock()
   2600      0     stevel  */
   2601      0     stevel void
   2602      0     stevel mdi_pi_lock(mdi_pathinfo_t *pip)
   2603      0     stevel {
   2604      0     stevel 	ASSERT(pip != NULL);
   2605      0     stevel 	if (pip) {
   2606      0     stevel 		MDI_PI_LOCK(pip);
   2607      0     stevel 	}
   2608      0     stevel }
   2609      0     stevel 
   2610      0     stevel 
   2611      0     stevel /*
   2612      0     stevel  * mdi_pi_unlock():
   2613      0     stevel  * 		Unlock the mdi_pathinfo node.
   2614      0     stevel  * Note:
   2615      0     stevel  *		The mdi_pathinfo node should have been locked with mdi_pi_lock()
   2616      0     stevel  */
   2617      0     stevel void
   2618      0     stevel mdi_pi_unlock(mdi_pathinfo_t *pip)
   2619      0     stevel {
   2620      0     stevel 	ASSERT(pip != NULL);
   2621      0     stevel 	if (pip) {
   2622      0     stevel 		MDI_PI_UNLOCK(pip);
   2623      0     stevel 	}
   2624      0     stevel }
   2625      0     stevel 
   2626      0     stevel /*
   2627      0     stevel  * mdi_pi_find():
   2628      0     stevel  *		Search the list of mdi_pathinfo nodes attached to the
   2629      0     stevel  *		pHCI/Client device node whose path address matches "paddr".
   2630      0     stevel  *		Returns a pointer to the mdi_pathinfo node if a matching node is
   2631      0     stevel  *		found.
   2632      0     stevel  * Return Values:
   2633      0     stevel  *		mdi_pathinfo node handle
   2634      0     stevel  *		NULL
   2635      0     stevel  * Notes:
   2636      0     stevel  *		Caller need not hold any locks to call this function.
   2637      0     stevel  */
   2638      0     stevel mdi_pathinfo_t *
   2639      0     stevel mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
   2640      0     stevel {
   2641      0     stevel 	mdi_phci_t		*ph;
   2642      0     stevel 	mdi_vhci_t		*vh;
   2643      0     stevel 	mdi_client_t		*ct;
   2644      0     stevel 	mdi_pathinfo_t		*pip = NULL;
   2645      0     stevel 
   2646  10696      David 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2647  10696      David 	    "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
   2648      0     stevel 	if ((pdip == NULL) || (paddr == NULL)) {
   2649      0     stevel 		return (NULL);
   2650      0     stevel 	}
   2651      0     stevel 	ph = i_devi_get_phci(pdip);
   2652      0     stevel 	if (ph == NULL) {
   2653      0     stevel 		/*
   2654      0     stevel 		 * Invalid pHCI device, Nothing more to do.
   2655      0     stevel 		 */
   2656  10696      David 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
   2657      0     stevel 		return (NULL);
   2658      0     stevel 	}
   2659      0     stevel 
   2660      0     stevel 	vh = ph->ph_vhci;
   2661      0     stevel 	if (vh == NULL) {
   2662      0     stevel 		/*
   2663      0     stevel 		 * Invalid vHCI device, Nothing more to do.
   2664      0     stevel 		 */
   2665  10696      David 		MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
   2666   2155        cth 		return (NULL);
   2667   2155        cth 	}
   2668   2155        cth 
   2669   2155        cth 	/*
   2670   2155        cth 	 * Look for pathinfo node identified by paddr.
   2671      0     stevel 	 */
   2672      0     stevel 	if (caddr == NULL) {
   2673      0     stevel 		/*
   2674      0     stevel 		 * Find a mdi_pathinfo node under pHCI list for a matching
   2675      0     stevel 		 * unit address.
   2676      0     stevel 		 */
   2677   2155        cth 		MDI_PHCI_LOCK(ph);
   2678   2155        cth 		if (MDI_PHCI_IS_OFFLINE(ph)) {
   2679  10696      David 			MDI_DEBUG(2, (MDI_WARN, pdip,
   2680  10696      David 			    "offline phci %p", (void *)ph));
   2681   2155        cth 			MDI_PHCI_UNLOCK(ph);
   2682   2155        cth 			return (NULL);
   2683   2155        cth 		}
   2684      0     stevel 		pip = (mdi_pathinfo_t *)ph->ph_path_head;
   2685      0     stevel 
   2686      0     stevel 		while (pip != NULL) {
   2687      0     stevel 			if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2688      0     stevel 				break;
   2689      0     stevel 			}
   2690      0     stevel 			pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
   2691      0     stevel 		}
   2692   2155        cth 		MDI_PHCI_UNLOCK(ph);
   2693  10696      David 		MDI_DEBUG(2, (MDI_NOTE, pdip,
   2694  10696      David 		    "found %s %p", mdi_pi_spathname(pip), (void *)pip));
   2695      0     stevel 		return (pip);
   2696      0     stevel 	}
   2697      0     stevel 
   2698      0     stevel 	/*
   2699    878      ramat 	 * XXX - Is the rest of the code in this function really necessary?
   2700    878      ramat 	 * The consumers of mdi_pi_find() can search for the desired pathinfo
   2701    878      ramat 	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
   2702    878      ramat 	 * whether the search is based on the pathinfo nodes attached to
   2703    878      ramat 	 * the pHCI or the client node, the result will be the same.
   2704    878      ramat 	 */
   2705    878      ramat 
   2706    878      ramat 	/*
   2707      0     stevel 	 * Find the client device corresponding to 'caddr'
   2708      0     stevel 	 */
   2709   2155        cth 	MDI_VHCI_CLIENT_LOCK(vh);
   2710    878      ramat 
   2711    878      ramat 	/*
   2712    878      ramat 	 * XXX - Passing NULL to the following function works as long as the
   2713    878      ramat 	 * the client addresses (caddr) are unique per vhci basis.
   2714    878      ramat 	 */
   2715    878      ramat 	ct = i_mdi_client_find(vh, NULL, caddr);
   2716      0     stevel 	if (ct == NULL) {
   2717      0     stevel 		/*
   2718      0     stevel 		 * Client not found, Obviously mdi_pathinfo node has not been
   2719      0     stevel 		 * created yet.
   2720      0     stevel 		 */
   2721   2155        cth 		MDI_VHCI_CLIENT_UNLOCK(vh);
   2722  10696      David 		MDI_DEBUG(2, (MDI_NOTE, pdip,
   2723  10696      David 		    "client not found for caddr @%s", caddr ? caddr : ""));
   2724   2155        cth 		return (NULL);
   2725      0     stevel 	}
   2726      0     stevel 
   2727      0     stevel 	/*
   2728      0     stevel 	 * Hold the client lock and look for a mdi_pathinfo node with matching
   2729      0     stevel 	 * pHCI and paddr
   2730      0     stevel 	 */
   2731      0     stevel 	MDI_CLIENT_LOCK(ct);
   2732      0     stevel 
   2733      0     stevel 	/*
   2734      0     stevel 	 * Release the global mutex as it is no more needed. Note: We always
   2735      0     stevel 	 * respect the locking order while acquiring.
   2736      0     stevel 	 */
   2737   2155        cth 	MDI_VHCI_CLIENT_UNLOCK(vh);
   2738      0     stevel 
   2739      0     stevel 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   2740      0     stevel 	while (pip != NULL) {
   2741      0     stevel 		/*
   2742      0     stevel 		 * Compare the unit address
   2743      0     stevel 		 */
   2744      0     stevel 		if ((MDI_PI(pip)->pi_phci == ph) &&
   2745      0     stevel 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2746      0     stevel 			break;
   2747      0     stevel 		}
   2748      0     stevel 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2749      0     stevel 	}
   2750      0     stevel 	MDI_CLIENT_UNLOCK(ct);
   2751  10696      David 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2752  10696      David 	    "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
   2753      0     stevel 	return (pip);
   2754      0     stevel }
   2755      0     stevel 
   2756      0     stevel /*
   2757      0     stevel  * mdi_pi_alloc():
   2758      0     stevel  *		Allocate and initialize a new instance of a mdi_pathinfo node.
   2759      0     stevel  *		The mdi_pathinfo node returned by this function identifies a
   2760      0     stevel  *		unique device path is capable of having properties attached
   2761      0     stevel  *		and passed to mdi_pi_online() to fully attach and online the
   2762      0     stevel  *		path and client device node.
   2763      0     stevel  *		The mdi_pathinfo node returned by this function must be
   2764      0     stevel  *		destroyed using mdi_pi_free() if the path is no longer
   2765      0     stevel  *		operational or if the caller fails to attach a client device
   2766      0     stevel  *		node when calling mdi_pi_online(). The framework will not free
   2767      0     stevel  *		the resources allocated.
   2768      0     stevel  *		This function can be called from both interrupt and kernel
   2769      0     stevel  *		contexts.  DDI_NOSLEEP flag should be used while calling
   2770      0     stevel  *		from interrupt contexts.
   2771      0     stevel  * Return Values:
   2772      0     stevel  *		MDI_SUCCESS
   2773      0     stevel  *		MDI_FAILURE
   2774      0     stevel  *		MDI_NOMEM
   2775      0     stevel  */
   2776      0     stevel /*ARGSUSED*/
   2777      0     stevel int
   2778      0     stevel mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
   2779      0     stevel     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
   2780      0     stevel {
   2781      0     stevel 	mdi_vhci_t	*vh;
   2782      0     stevel 	mdi_phci_t	*ph;
   2783      0     stevel 	mdi_client_t	*ct;
   2784      0     stevel 	mdi_pathinfo_t	*pip = NULL;
   2785      0     stevel 	dev_info_t	*cdip;
   2786      0     stevel 	int		rv = MDI_NOMEM;
   2787    878      ramat 	int		path_allocated = 0;
   2788      0     stevel 
   2789  10696      David 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2790  10696      David 	    "cname %s: caddr@%s paddr@%s",
   2791  10696      David 	    cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
   2792   2155        cth 
   2793      0     stevel 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
   2794      0     stevel 	    ret_pip == NULL) {
   2795      0     stevel 		/* Nothing more to do */
   2796      0     stevel 		return (MDI_FAILURE);
   2797      0     stevel 	}
   2798      0     stevel 
   2799      0     stevel 	*ret_pip = NULL;
   2800   2155        cth 
   2801   2155        cth 	/* No allocations on detaching pHCI */
   2802   2155        cth 	if (DEVI_IS_DETACHING(pdip)) {
   2803   2155        cth 		/* Invalid pHCI device, return failure */
   2804  10696      David 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2805  10696      David 		    "!detaching pHCI=%p", (void *)pdip));
   2806   2155        cth 		return (MDI_FAILURE);
   2807   2155        cth 	}
   2808   2155        cth 
   2809      0     stevel 	ph = i_devi_get_phci(pdip);
   2810      0     stevel 	ASSERT(ph != NULL);
   2811      0     stevel 	if (ph == NULL) {
   2812      0     stevel 		/* Invalid pHCI device, return failure */
   2813  10696      David 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2814  10696      David 		    "!invalid pHCI=%p", (void *)pdip));
   2815      0     stevel 		return (MDI_FAILURE);
   2816      0     stevel 	}
   2817      0     stevel 
   2818      0     stevel 	MDI_PHCI_LOCK(ph);
   2819      0     stevel 	vh = ph->ph_vhci;
   2820      0     stevel 	if (vh == NULL) {
   2821      0     stevel 		/* Invalid vHCI device, return failure */
   2822  10696      David 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2823  10696      David 		    "!invalid vHCI=%p", (void *)pdip));
   2824      0     stevel 		MDI_PHCI_UNLOCK(ph);
   2825      0     stevel 		return (MDI_FAILURE);
   2826      0     stevel 	}
   2827      0     stevel 
   2828      0     stevel 	if (MDI_PHCI_IS_READY(ph) == 0) {
   2829      0     stevel 		/*
   2830      0     stevel 		 * Do not allow new node creation when pHCI is in
   2831      0     stevel 		 * offline/suspended states
   2832      0     stevel 		 */
   2833  10696      David 		MDI_DEBUG(1, (MDI_WARN, pdip,
   2834  10696      David 		    "pHCI=%p is not ready", (void *)ph));
   2835      0     stevel 		MDI_PHCI_UNLOCK(ph);
   2836      0     stevel 		return (MDI_BUSY);
   2837      0     stevel 	}
   2838      0     stevel 	MDI_PHCI_UNSTABLE(ph);
   2839      0     stevel 	MDI_PHCI_UNLOCK(ph);
   2840      0     stevel 
   2841    878      ramat 	/* look for a matching client, create one if not found */
   2842   2155        cth 	MDI_VHCI_CLIENT_LOCK(vh);
   2843    878      ramat 	ct = i_mdi_client_find(vh, cname, caddr);
   2844      0     stevel 	if (ct == NULL) {
   2845    878      ramat 		ct = i_mdi_client_alloc(vh, cname, caddr);
   2846    878      ramat 		ASSERT(ct != NULL);
   2847      0     stevel 	}
   2848      0     stevel 
   2849      0     stevel 	if (ct->ct_dip == NULL) {
   2850      0     stevel 		/*
   2851      0     stevel 		 * Allocate a devinfo node
   2852      0     stevel 		 */
   2853      0     stevel 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
   2854    878      ramat 		    compatible, ncompatible);
   2855      0     stevel 		if (ct->ct_dip == NULL) {
   2856      0     stevel 			(void) i_mdi_client_free(vh, ct);
   2857      0     stevel 			goto fail;
   2858      0     stevel 		}
   2859      0     stevel 	}
   2860      0     stevel 	cdip = ct->ct_dip;
   2861      0     stevel 
   2862      0     stevel 	DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
   2863      0     stevel 	DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
   2864      0     stevel 
   2865   2155        cth 	MDI_CLIENT_LOCK(ct);
   2866      0     stevel 	pip = (mdi_pathinfo_t *)ct->ct_path_head;
   2867      0     stevel 	while (pip != NULL) {
   2868      0     stevel 		/*
   2869      0     stevel 		 * Compare the unit address
   2870      0     stevel 		 */
   2871      0     stevel 		if ((MDI_PI(pip)->pi_phci == ph) &&
   2872      0     stevel 		    strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
   2873      0     stevel 			break;
   2874      0     stevel 		}
   2875      0     stevel 		pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
   2876      0     stevel 	}
   2877   2155        cth 	MDI_CLIENT_UNLOCK(ct);
   2878      0     stevel 
   2879      0     stevel 	if (pip == NULL) {
   2880      0     stevel 		/*
   2881      0     stevel 		 * This is a new path for this client device.  Allocate and
   2882      0     stevel 		 * initialize a new pathinfo node
   2883      0     stevel 		 */
   2884    878      ramat 		pip = i_mdi_pi_alloc(ph, paddr, ct);
   2885    878      ramat 		ASSERT(pip != NULL);
   2886    878      ramat 		path_allocated = 1;
   2887      0     stevel 	}
   2888      0     stevel 	rv = MDI_SUCCESS;
   2889      0     stevel 
   2890      0     stevel fail:
   2891      0     stevel 	/*
   2892      0     stevel 	 * Release the global mutex.
   2893      0     stevel 	 */
   2894   2155        cth 	MDI_VHCI_CLIENT_UNLOCK(vh);
   2895      0     stevel 
   2896      0     stevel 	/*
   2897      0     stevel 	 * Mark the pHCI as stable
   2898      0     stevel 	 */
   2899      0     stevel 	MDI_PHCI_LOCK(ph);
   2900      0     stevel 	MDI_PHCI_STABLE(ph);
   2901      0     stevel 	MDI_PHCI_UNLOCK(ph);
   2902      0     stevel 	*ret_pip = pip;
   2903   2155        cth 
   2904  10696      David 	MDI_DEBUG(2, (MDI_NOTE, pdip,
   2905  10696      David 	    "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
   2906   1961        cth 
   2907    878      ramat 	if (path_allocated)
   2908    878      ramat 		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
   2909    878      ramat 
   2910      0     stevel 	return (rv);
   2911      0     stevel }
   2912      0     stevel 
   2913      0     stevel /*ARGSUSED*/
   2914      0     stevel int
   2915      0     stevel mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
   2916      0     stevel     int flags, mdi_pathinfo_t **ret_pip)
   2917      0     stevel {
   2918      0     stevel 	return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
   2919      0     stevel 	    flags, ret_pip));
   2920      0     stevel }
   2921      0     stevel 
   2922      0     stevel /*
   2923      0     stevel  * i_mdi_pi_alloc():
   2924      0     stevel  *		Allocate a mdi_pathinfo node and add to the pHCI path list
   2925      0     stevel  * Return Values:
   2926      0     stevel  *		mdi_pathinfo
   2927      0     stevel  */
   2928      0     stevel /*ARGSUSED*/
   2929      0     stevel static mdi_pathinfo_t *
   2930    878      ramat i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
   2931    878      ramat {
   2932    878      ramat 	mdi_pathinfo_t	*pip;
   2933      0     stevel 	int		ct_circular;
   2934      0     stevel 	int		ph_circular;
   2935  10696      David 	static char	path[MAXPATHLEN];	/* mdi_pathmap_mutex protects */
   2936   6640        cth 	char		*path_persistent;
   2937   6640        cth 	int		path_instance;
   2938   6640        cth 	mod_hash_val_t	hv;
   2939   2155        cth 
   2940   2155        cth 	ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
   2941      0     stevel 
   2942    878      ramat 	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
   2943      0     stevel 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
   2944      0     stevel 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
   2945      0     stevel 	    MDI_PATHINFO_STATE_TRANSIENT;
   2946      0     stevel 
   2947      0     stevel 	if (MDI_PHCI_IS_USER_DISABLED(ph))
   2948      0     stevel 		MDI_PI_SET_USER_DISABLE(pip);
   2949      0     stevel 
   2950      0     stevel 	if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
   2951      0     stevel 		MDI_PI_SET_DRV_DISABLE_TRANS(pip);
   2952      0     stevel 
   2953      0     stevel 	if (MDI_PHCI_IS_DRV_DISABLED(ph))
   2954      0     stevel 		MDI_PI_SET_DRV_DISABLE(pip);
   2955      0     stevel 
   2956      0     stevel 	MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
   2957      0     stevel 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
   2958      0     stevel 	MDI_PI(pip)->pi_client = ct;
   2959      0     stevel 	MDI_PI(pip)->pi_phci = ph;
   2960    878      ramat 	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
   2961      0     stevel 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
   2962   6640        cth 
   2963   6640        cth         /*
   2964   6640        cth 	 * We form the "path" to the pathinfo node, and see if we have
   2965   6640        cth 	 * already allocated a 'path_instance' for that "path".  If so,
   2966   6640        cth 	 * we use the already allocated 'path_instance'.  If not, we
   2967   6640        cth 	 * allocate a new 'path_instance' and associate it with a copy of
   2968   6640        cth 	 * the "path" string (which is never freed). The association
   2969   6640        cth 	 * between a 'path_instance' this "path" string persists until
   2970   6640        cth 	 * reboot.
   2971   6640        cth 	 */
   2972   6640        cth         mutex_enter(&mdi_pathmap_mutex);
   2973   6640        cth 	(void) ddi_pathname(ph->ph_dip, path);
   2974  10696      David 	(void) sprintf(path + strlen(path), "/%s@%s",
   2975   9167    Randall 	    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
   2976   6640        cth         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
   2977   6640        cth                 path_instance = (uint_t)(intptr_t)hv;
   2978   6640        cth         } else {
   2979   6640        cth 		/* allocate a new 'path_instance' and persistent "path" */
   2980   6640        cth 		path_instance = mdi_pathmap_instance++;
   2981   6640        cth 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
   2982   6640        cth                 (void) mod_hash_insert(mdi_pathmap_bypath,
   2983   6640        cth                     (mod_hash_key_t)path_persistent,
   2984   6640        cth                     (mod_hash_val_t)(intptr_t)path_instance);
   2985   6640        cth 		(void) mod_hash_insert(mdi_pathmap_byinstance,
   2986   6640        cth 		    (mod_hash_key_t)(intptr_t)path_instance,
   2987   6640        cth 		    (mod_hash_val_t)path_persistent);
   2988  10696      David 
   2989  10696      David 		/* create shortpath name */
   2990  10696      David 		(void) snprintf(path, sizeof(path), "%s%d/%s@%s",
   2991  10696      David 		    ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
   2992  10696      David 		    mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
   2993  10696      David 		path_persistent = i_ddi_strdup(path, KM_SLEEP);
   2994  10696      David 		(void) mod_hash_insert(mdi_pathmap_sbyinstance,
   2995  10696      David 		    (mod_hash_key_t)(intptr_t)path_instance,
   2996  10696      David 		    (mod_hash_val_t)path_persistent);
   2997   6640        cth         }
   2998   6640        cth         mutex_exit(&mdi_pathmap_mutex);
   2999   6640        cth 	MDI_PI(pip)->pi_path_instance = path_instance;
   3000   6640        cth 
   3001    878      ramat 	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
   3002    878      ramat 	ASSERT(MDI_PI(pip)->pi_prop != NULL);
   3003      0     stevel 	MDI_PI(pip)->pi_pprivate = NULL;
   3004      0     stevel 	MDI_PI(pip)->pi_cprivate = NULL;
   3005      0     stevel 	MDI_PI(pip)->pi_vprivate = NULL;
   3006      0     stevel 	MDI_PI(pip)->pi_client_link = NULL;
   3007      0     stevel 	MDI_PI(pip)->pi_phci_link = NULL;
   3008      0     stevel 	MDI_PI(pip)->pi_ref_cnt = 0;
   3009      0     stevel 	MDI_PI(pip)->pi_kstats = NULL;
   3010      0     stevel 	MDI_PI(pip)->pi_preferred = 1;
   3011      0     stevel 	cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
   3012      0     stevel 
   3013      0     stevel 	/*
   3014      0     stevel 	 * Lock both dev_info nodes against changes in parallel.
   3015   2155        cth 	 *
   3016   2155        cth 	 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
   3017   2155        cth 	 * This atypical operation is done to synchronize pathinfo nodes
   3018   2155        cth 	 * during devinfo snapshot (see di_register_pip) by 'pretending' that
   3019   2155        cth 	 * the pathinfo nodes are children of the Client.
   3020      0     stevel 	 */
   3021      0     stevel 	ndi_devi_enter(ct->ct_dip, &ct_circular);
   3022      0     stevel 	ndi_devi_enter(ph->ph_dip, &ph_circular);
   3023      0     stevel 
   3024      0     stevel 	i_mdi_phci_add_path(ph, pip);
   3025      0     stevel 	i_mdi_client_add_path(ct, pip);
   3026      0     stevel 
   3027      0     stevel 	ndi_devi_exit(ph->ph_dip, ph_circular);
   3028      0     stevel 	ndi_devi_exit(ct->ct_dip, ct_circular);
   3029      0     stevel 
   3030      0     stevel 	return (pip);
   3031   6640        cth }
   3032   6640        cth 
   3033   6640        cth /*
   3034   6640        cth  * mdi_pi_pathname_by_instance():
   3035   6640        cth  *	Lookup of "path" by 'path_instance'. Return "path".
   3036   6640        cth  *	NOTE: returned "path" remains valid forever (until reboot).
   3037   6640        cth  */
   3038   6640        cth char *
   3039   6640        cth mdi_pi_pathname_by_instance(int path_instance)
   3040   6640        cth {
   3041   6640        cth 	char		*path;
   3042   6640        cth 	mod_hash_val_t	hv;
   3043   6640        cth 
   3044   6640        cth 	/* mdi_pathmap lookup of "path" by 'path_instance' */
   3045   6640        cth 	mutex_enter(&mdi_pathmap_mutex);
   3046   6640        cth 	if (mod_hash_find(mdi_pathmap_byinstance,
   3047   6640        cth 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
   3048   6640        cth 		path = (char *)hv;
   3049   6640        cth 	else
   3050   6640        cth 		path = NULL;
   3051   6640        cth 	mutex_exit(&mdi_pathmap_mutex);
   3052   6640        cth 	return (path);
   3053      0     stevel }
   3054      0     stevel 
   3055      0     stevel /*
   3056  10696      David  * mdi_pi_spathname_by_instance():
   3057  10696      David  *	Lookup of "shortpath" by 'path_instance'. Return "shortpath".
   3058  10696      David  *	NOTE: returned "shortpath" remains valid forever (until reboot).
   3059  10696      David  */
   3060  10696      David char *
   3061  10696      David mdi_pi_spathname_by_instance(int path_instance)
   3062  10696      David {
   3063  10696      David 	char		*path;
   3064  10696      David 	mod_hash_val_t	hv;
   3065  10696      David 
   3066  10696      David 	/* mdi_pathmap lookup of "path" by 'path_instance' */
   3067  10696      David 	mutex_enter(&mdi_pathmap_mutex);
   3068  10696      David 	if (mod_hash_find(mdi_pathmap_sbyinstance,
   3069  10696      David 	    (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
   3070  10696      David 		path = (char *)hv;
   3071  10696      David 	else
   3072  10696      David 		path = NULL;
   3073  10696      David 	mutex_exit(&mdi_pathmap_mutex);
   3074  10696      David 	return (path);
   3075  10696      David }
   3076  10696      David 
   3077  10696      David 
   3078  10696      David /*
   3079      0     stevel  * i_mdi_phci_add_path():
   3080      0     stevel  * 		Add a mdi_pathinfo node to pHCI list.
   3081      0     stevel  * Notes:
   3082      0     stevel  *		Caller should per-pHCI mutex
   3083      0     stevel  */
   3084      0     stevel static void
   3085      0     stevel i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
   3086      0     stevel {
   3087      0     stevel 	ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
   3088      0     stevel 
   3089   2155        cth 	MDI_PHCI_LOCK(ph);
   3090      0     stevel 	if (ph->ph_path_head == NULL) {
   3091      0     stevel 		ph->ph_path_head = pip;
   3092      0     stevel 	} else {
   3093      0     stevel 		MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
   3094      0     stevel 	}
   3095      0     stevel 	ph->ph_path_tail = pip;
   3096      0     stevel 	ph->ph_path_count++;
   3097   2155        cth 	MDI_PHCI_UNLOCK(ph);
   3098      0     stevel }
   3099      0     stevel 
   3100      0     stevel /*
   3101      0     stevel  * i_mdi_client_add_path():
   3102      0     stevel  *		Add mdi_pathinfo node to client list
   3103      0     stevel  */
   3104      0     stevel static void
   3105      0     stevel i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
   3106      0     stevel {
   3107      0     stevel 	ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
   3108      0     stevel 
   3109   2155        cth 	MDI_CLIENT_LOCK(ct);
   3110      0     stevel 	if (ct->ct_path_head == NULL) {
   3111      0     stevel 		ct->ct_path_head = pip;
   3112      0     stevel 	} else {
   3113      0     stevel 		MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
   3114      0     stevel 	}
   3115      0     stevel 	ct->ct_path_tail = pip;
   3116      0     stevel 	ct->ct_path_count++;
   3117   2155        cth 	MDI_CLIENT_UNLOCK(ct);
   3118      0     stevel }
   3119      0     stevel 
   3120      0     stevel /*
   3121      0     stevel  * mdi_pi_free():
   3122      0     stevel  *		Free the mdi_pathinfo node and also client device node if this
   3123      0     stevel  *		is the last path to the device
   3124      0     stevel  * Return Values:
   3125      0     stevel  *		MDI_SUCCESS
   3126      0     stevel  *		MDI_FAILURE
   3127      0     stevel  *		MDI_BUSY
   3128      0     stevel  */
   3129      0     stevel /*ARGSUSED*/
   3130      0     stevel int
   3131      0     stevel mdi_pi_free(mdi_pathinfo_t *pip, int flags)
   3132      0     stevel {
   3133   4881   rs135747 	int		rv = MDI_FAILURE;
   3134      0     stevel 	mdi_vhci_t	*vh;
   3135      0     stevel 	mdi_phci_t	*ph;
   3136      0     stevel 	mdi_client_t	*ct;
   3137      0     stevel 	int		(*f)();
   3138      0     stevel 	int		client_held = 0;
   3139      0     stevel 
   3140      0     stevel 	MDI_PI_LOCK(pip);
   3141      0     stevel 	ph = MDI_PI(pip)->pi_phci;
   3142      0     stevel 	ASSERT(ph != NULL);
   3143      0     stevel 	if (ph == NULL) {
   3144      0     stevel 		/*
   3145      0     stevel 		 * Invalid pHCI device, return failure
   3146      0     stevel 		 */
   3147  10696      David 		MDI_DEBUG(1, (MDI_WARN, NULL,
   3148  10696      David 		    "!invalid pHCI: pip %s %p",
   3149  10696      David 		    mdi_pi_spathname(pip), (void *)pip));
   3150      0     stevel 		MDI_PI_UNLOCK(pip);
   3151      0     stevel 		return (MDI_FAILURE);
   3152      0     stevel 	}
   3153      0     stevel 
   3154      0     stevel 	vh = ph->ph_vhci;
   3155      0     stevel 	ASSERT(vh != NULL);
   3156      0     stevel 	if (vh == NULL) {
   3157      0     stevel 		/* Invalid pHCI device, return failure */
   3158  10696      David 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3159  10696      David 		    "!invalid vHCI: pip %s %p",
   3160  10696      David 		    mdi_pi_spathname(pip), (void *)pip));
   3161      0     stevel 		MDI_PI_UNLOCK(pip);
   3162      0     stevel 		return (MDI_FAILURE);
   3163      0     stevel 	}
   3164      0     stevel 
   3165      0     stevel 	ct = MDI_PI(pip)->pi_client;
   3166      0     stevel 	ASSERT(ct != NULL);
   3167      0     stevel 	if (ct == NULL) {
   3168      0     stevel 		/*
   3169      0     stevel 		 * Invalid Client device, return failure
   3170      0     stevel 		 */
   3171  10696      David 		MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
   3172  10696      David 		    "!invalid client: pip %s %p",
   3173  10696      David 		    mdi_pi_spathname(pip), (void *)pip));
   3174      0     stevel 		MDI_PI_UNLOCK(pip);
   3175      0     stevel 		return (MDI_FAILURE);
   3176      0     stevel 	}
   3177      0     stevel 
   3178      0     stevel 	/*
   3179      0     stevel 	 * Check to see for busy condition.  A mdi_pathinfo can only be freed
   3180      0     stevel 	 * if the node state is either offline or init and the reference count
   3181      0     stevel 	 * is zero.
   3182      0     stevel 	 */
   3183      0     stevel 	if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
   3184      0     stevel 	    MDI_PI_IS_INITING(pip))) {
   3185      0     stevel 		/*
   3186      0     stevel 		 * Node is busy
   3187      0     stevel 		 */
   3188  10696      David 		MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
   3189  10696      David 		    "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
   3190      0     stevel 		MDI_PI_UNLOCK(pip);
   3191      0     stevel 		return (MDI_BUSY);
   3192      0     stevel 	}
   3193      0     stevel 
   3194      0     stevel 	while (MDI_PI(pip)->pi_ref_cnt != 0) {
   3195      0     stevel 		/*
   3196      0     stevel 		 * Give a chance for pending I/Os to complete.
   3197      0     stevel 		 */
   3198  10696      David 		MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
   3199  10696      David 		    "!%d cmds still pending on path: %s %p",
   3200  10696      David 		    MDI_PI(pip)->pi_ref_cnt,
   3201  10696      David 		    mdi_pi_spathname(pip), (void *)pip));
   3202  11066     rafael 		if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
   3203  11066     rafael 		    &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
   3204  11066     rafael 		    TR_CLOCK_TICK) == -1) {
   3205      0     stevel 			/*
   3206      0     stevel 			 * The timeout time reached without ref_cnt being zero
   3207      0