1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1909 cm136836 * Common Development and Distribution License (the "License"). 6 1909 cm136836 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 9167 Randall * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel /* 27 0 stevel * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 0 stevel * detailed discussion of the overall mpxio architecture. 29 0 stevel * 30 0 stevel * Default locking order: 31 0 stevel * 32 2155 cth * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 2155 cth * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 2155 cth * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 2155 cth * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 0 stevel * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 0 stevel * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 0 stevel * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 0 stevel */ 40 0 stevel 41 0 stevel #include <sys/note.h> 42 0 stevel #include <sys/types.h> 43 0 stevel #include <sys/varargs.h> 44 0 stevel #include <sys/param.h> 45 0 stevel #include <sys/errno.h> 46 0 stevel #include <sys/uio.h> 47 0 stevel #include <sys/buf.h> 48 0 stevel #include <sys/modctl.h> 49 0 stevel #include <sys/open.h> 50 0 stevel #include <sys/kmem.h> 51 0 stevel #include <sys/poll.h> 52 0 stevel #include <sys/conf.h> 53 0 stevel #include <sys/bootconf.h> 54 0 stevel #include <sys/cmn_err.h> 55 0 stevel #include <sys/stat.h> 56 0 stevel #include <sys/ddi.h> 57 0 stevel #include <sys/sunddi.h> 58 0 stevel #include <sys/ddipropdefs.h> 59 0 stevel #include <sys/sunndi.h> 60 0 stevel #include <sys/ndi_impldefs.h> 61 0 stevel #include <sys/promif.h> 62 0 stevel #include <sys/sunmdi.h> 63 0 stevel #include <sys/mdi_impldefs.h> 64 0 stevel #include <sys/taskq.h> 65 0 stevel #include <sys/epm.h> 66 0 stevel #include <sys/sunpm.h> 67 878 ramat #include <sys/modhash.h> 68 893 rs135747 #include <sys/disp.h> 69 893 rs135747 #include <sys/autoconf.h> 70 2402 pramodbg #include <sys/sysmacros.h> 71 0 stevel 72 0 stevel #ifdef DEBUG 73 0 stevel #include <sys/debug.h> 74 0 stevel int mdi_debug = 1; 75 2155 cth int mdi_debug_logonly = 0; 76 10696 David #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 77 10696 David #define MDI_WARN CE_WARN, __func__ 78 10696 David #define MDI_NOTE CE_NOTE, __func__ 79 10696 David #define MDI_CONT CE_CONT, __func__ 80 10696 David static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 81 0 stevel #else /* !DEBUG */ 82 10696 David #define MDI_DEBUG(dbglevel, pargs) 83 0 stevel #endif /* DEBUG */ 84 10696 David int mdi_debug_consoleonly = 0; 85 11052 Chris int mdi_delay = 3; 86 0 stevel 87 0 stevel extern pri_t minclsyspri; 88 0 stevel extern int modrootloaded; 89 0 stevel 90 0 stevel /* 91 0 stevel * Global mutex: 92 2155 cth * Protects vHCI list and structure members. 93 0 stevel */ 94 0 stevel kmutex_t mdi_mutex; 95 0 stevel 96 0 stevel /* 97 0 stevel * Registered vHCI class driver lists 98 0 stevel */ 99 0 stevel int mdi_vhci_count; 100 0 stevel mdi_vhci_t *mdi_vhci_head; 101 0 stevel mdi_vhci_t *mdi_vhci_tail; 102 0 stevel 103 0 stevel /* 104 0 stevel * Client Hash Table size 105 0 stevel */ 106 0 stevel static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 107 0 stevel 108 0 stevel /* 109 0 stevel * taskq interface definitions 110 0 stevel */ 111 0 stevel #define MDI_TASKQ_N_THREADS 8 112 0 stevel #define MDI_TASKQ_PRI minclsyspri 113 0 stevel #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 114 0 stevel #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 115 0 stevel 116 0 stevel taskq_t *mdi_taskq; 117 0 stevel static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 118 0 stevel 119 878 ramat #define TICKS_PER_SECOND (drv_usectohz(1000000)) 120 878 ramat 121 878 ramat /* 122 878 ramat * The data should be "quiet" for this interval (in seconds) before the 123 878 ramat * vhci cached data is flushed to the disk. 124 878 ramat */ 125 878 ramat static int mdi_vhcache_flush_delay = 10; 126 878 ramat 127 878 ramat /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 128 878 ramat static int mdi_vhcache_flush_daemon_idle_time = 60; 129 878 ramat 130 878 ramat /* 131 972 ramat * MDI falls back to discovery of all paths when a bus_config_one fails. 132 972 ramat * The following parameters can be used to tune this operation. 133 972 ramat * 134 972 ramat * mdi_path_discovery_boot 135 972 ramat * Number of times path discovery will be attempted during early boot. 136 972 ramat * Probably there is no reason to ever set this value to greater than one. 137 972 ramat * 138 972 ramat * mdi_path_discovery_postboot 139 972 ramat * Number of times path discovery will be attempted after early boot. 140 972 ramat * Set it to a minimum of two to allow for discovery of iscsi paths which 141 972 ramat * may happen very late during booting. 142 972 ramat * 143 972 ramat * mdi_path_discovery_interval 144 972 ramat * Minimum number of seconds MDI will wait between successive discovery 145 972 ramat * of all paths. Set it to -1 to disable discovery of all paths. 146 972 ramat */ 147 972 ramat static int mdi_path_discovery_boot = 1; 148 972 ramat static int mdi_path_discovery_postboot = 2; 149 972 ramat static int mdi_path_discovery_interval = 10; 150 972 ramat 151 972 ramat /* 152 878 ramat * number of seconds the asynchronous configuration thread will sleep idle 153 878 ramat * before exiting. 154 878 ramat */ 155 878 ramat static int mdi_async_config_idle_time = 600; 156 878 ramat 157 878 ramat static int mdi_bus_config_cache_hash_size = 256; 158 878 ramat 159 878 ramat /* turns off multithreaded configuration for certain operations */ 160 878 ramat static int mdi_mtc_off = 0; 161 0 stevel 162 0 stevel /* 163 6640 cth * The "path" to a pathinfo node is identical to the /devices path to a 164 6640 cth * devinfo node had the device been enumerated under a pHCI instead of 165 6640 cth * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 166 6640 cth * This association persists across create/delete of the pathinfo nodes, 167 6640 cth * but not across reboot. 168 6640 cth */ 169 6640 cth static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 170 6640 cth static int mdi_pathmap_hash_size = 256; 171 6640 cth static kmutex_t mdi_pathmap_mutex; 172 6640 cth static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 173 6640 cth static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 174 10696 David static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 175 6640 cth 176 6640 cth /* 177 0 stevel * MDI component property name/value string definitions 178 0 stevel */ 179 0 stevel const char *mdi_component_prop = "mpxio-component"; 180 0 stevel const char *mdi_component_prop_vhci = "vhci"; 181 0 stevel const char *mdi_component_prop_phci = "phci"; 182 0 stevel const char *mdi_component_prop_client = "client"; 183 0 stevel 184 0 stevel /* 185 0 stevel * MDI client global unique identifier property name 186 0 stevel */ 187 0 stevel const char *mdi_client_guid_prop = "client-guid"; 188 0 stevel 189 0 stevel /* 190 0 stevel * MDI client load balancing property name/value string definitions 191 0 stevel */ 192 0 stevel const char *mdi_load_balance = "load-balance"; 193 0 stevel const char *mdi_load_balance_none = "none"; 194 0 stevel const char *mdi_load_balance_rr = "round-robin"; 195 0 stevel const char *mdi_load_balance_lba = "logical-block"; 196 0 stevel 197 0 stevel /* 198 0 stevel * Obsolete vHCI class definition; to be removed after Leadville update 199 0 stevel */ 200 0 stevel const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 201 0 stevel 202 0 stevel static char vhci_greeting[] = 203 0 stevel "\tThere already exists one vHCI driver for class %s\n" 204 0 stevel "\tOnly one vHCI driver for each class is allowed\n"; 205 0 stevel 206 0 stevel /* 207 0 stevel * Static function prototypes 208 0 stevel */ 209 0 stevel static int i_mdi_phci_offline(dev_info_t *, uint_t); 210 0 stevel static int i_mdi_client_offline(dev_info_t *, uint_t); 211 0 stevel static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 212 0 stevel static void i_mdi_phci_post_detach(dev_info_t *, 213 0 stevel ddi_detach_cmd_t, int); 214 0 stevel static int i_mdi_client_pre_detach(dev_info_t *, 215 0 stevel ddi_detach_cmd_t); 216 0 stevel static void i_mdi_client_post_detach(dev_info_t *, 217 0 stevel ddi_detach_cmd_t, int); 218 0 stevel static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 219 0 stevel static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 220 0 stevel static int i_mdi_lba_lb(mdi_client_t *ct, 221 0 stevel mdi_pathinfo_t **ret_pip, struct buf *buf); 222 0 stevel static void i_mdi_pm_hold_client(mdi_client_t *, int); 223 0 stevel static void i_mdi_pm_rele_client(mdi_client_t *, int); 224 0 stevel static void i_mdi_pm_reset_client(mdi_client_t *); 225 0 stevel static int i_mdi_power_all_phci(mdi_client_t *); 226 893 rs135747 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 227 0 stevel 228 0 stevel 229 0 stevel /* 230 0 stevel * Internal mdi_pathinfo node functions 231 0 stevel */ 232 0 stevel static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 233 0 stevel 234 0 stevel static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 235 0 stevel static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 236 0 stevel static mdi_phci_t *i_devi_get_phci(dev_info_t *); 237 0 stevel static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 238 0 stevel static void i_mdi_phci_unlock(mdi_phci_t *); 239 878 ramat static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 240 0 stevel static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 241 0 stevel static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 242 0 stevel static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 243 0 stevel mdi_client_t *); 244 0 stevel static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 245 0 stevel static void i_mdi_client_remove_path(mdi_client_t *, 246 0 stevel mdi_pathinfo_t *); 247 0 stevel 248 0 stevel static int i_mdi_pi_state_change(mdi_pathinfo_t *, 249 0 stevel mdi_pathinfo_state_t, int); 250 0 stevel static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 251 0 stevel static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 252 878 ramat char **, int); 253 0 stevel static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 254 0 stevel static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 255 0 stevel static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 256 878 ramat static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 257 0 stevel static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 258 0 stevel static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 259 878 ramat static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 260 0 stevel static void i_mdi_client_update_state(mdi_client_t *); 261 0 stevel static int i_mdi_client_compute_state(mdi_client_t *, 262 0 stevel mdi_phci_t *); 263 0 stevel static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 264 0 stevel static void i_mdi_client_unlock(mdi_client_t *); 265 0 stevel static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 266 0 stevel static mdi_client_t *i_devi_get_client(dev_info_t *); 267 1909 cm136836 /* 268 1909 cm136836 * NOTE: this will be removed once the NWS files are changed to use the new 269 1909 cm136836 * mdi_{enable,disable}_path interfaces 270 1909 cm136836 */ 271 1909 cm136836 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 272 1909 cm136836 int, int); 273 1909 cm136836 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 274 1909 cm136836 mdi_vhci_t *vh, int flags, int op); 275 0 stevel /* 276 0 stevel * Failover related function prototypes 277 0 stevel */ 278 0 stevel static int i_mdi_failover(void *); 279 0 stevel 280 0 stevel /* 281 0 stevel * misc internal functions 282 0 stevel */ 283 0 stevel static int i_mdi_get_hash_key(char *); 284 0 stevel static int i_map_nvlist_error_to_mdi(int); 285 0 stevel static void i_mdi_report_path_state(mdi_client_t *, 286 0 stevel mdi_pathinfo_t *); 287 878 ramat 288 878 ramat static void setup_vhci_cache(mdi_vhci_t *); 289 878 ramat static int destroy_vhci_cache(mdi_vhci_t *); 290 878 ramat static int stop_vhcache_async_threads(mdi_vhci_config_t *); 291 878 ramat static boolean_t stop_vhcache_flush_thread(void *, int); 292 878 ramat static void free_string_array(char **, int); 293 878 ramat static void free_vhcache_phci(mdi_vhcache_phci_t *); 294 878 ramat static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 295 878 ramat static void free_vhcache_client(mdi_vhcache_client_t *); 296 878 ramat static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 297 878 ramat static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 298 878 ramat static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 299 878 ramat static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 300 878 ramat static void vhcache_pi_add(mdi_vhci_config_t *, 301 878 ramat struct mdi_pathinfo *); 302 878 ramat static void vhcache_pi_remove(mdi_vhci_config_t *, 303 878 ramat struct mdi_pathinfo *); 304 878 ramat static void free_phclient_path_list(mdi_phys_path_t *); 305 878 ramat static void sort_vhcache_paths(mdi_vhcache_client_t *); 306 878 ramat static int flush_vhcache(mdi_vhci_config_t *, int); 307 878 ramat static void vhcache_dirty(mdi_vhci_config_t *); 308 878 ramat static void free_async_client_config(mdi_async_client_config_t *); 309 972 ramat static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 310 972 ramat static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 311 878 ramat static nvlist_t *read_on_disk_vhci_cache(char *); 312 878 ramat extern int fread_nvlist(char *, nvlist_t **); 313 878 ramat extern int fwrite_nvlist(char *, nvlist_t *); 314 0 stevel 315 0 stevel /* called once when first vhci registers with mdi */ 316 0 stevel static void 317 0 stevel i_mdi_init() 318 0 stevel { 319 0 stevel static int initialized = 0; 320 0 stevel 321 0 stevel if (initialized) 322 0 stevel return; 323 0 stevel initialized = 1; 324 0 stevel 325 0 stevel mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 326 6640 cth 327 6640 cth /* Create our taskq resources */ 328 0 stevel mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 329 0 stevel MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 330 0 stevel TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 331 0 stevel ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 332 6640 cth 333 6640 cth /* Allocate ['path_instance' <-> "path"] maps */ 334 6640 cth mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 335 6640 cth mdi_pathmap_bypath = mod_hash_create_strhash( 336 6640 cth "mdi_pathmap_bypath", mdi_pathmap_hash_size, 337 6640 cth mod_hash_null_valdtor); 338 6640 cth mdi_pathmap_byinstance = mod_hash_create_idhash( 339 6640 cth "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 340 6640 cth mod_hash_null_valdtor); 341 10696 David mdi_pathmap_sbyinstance = mod_hash_create_idhash( 342 10696 David "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 343 10696 David mod_hash_null_valdtor); 344 0 stevel } 345 0 stevel 346 0 stevel /* 347 0 stevel * mdi_get_component_type(): 348 0 stevel * Return mpxio component type 349 0 stevel * Return Values: 350 0 stevel * MDI_COMPONENT_NONE 351 0 stevel * MDI_COMPONENT_VHCI 352 0 stevel * MDI_COMPONENT_PHCI 353 0 stevel * MDI_COMPONENT_CLIENT 354 0 stevel * XXX This doesn't work under multi-level MPxIO and should be 355 2155 cth * removed when clients migrate mdi_component_is_*() interfaces. 356 0 stevel */ 357 0 stevel int 358 0 stevel mdi_get_component_type(dev_info_t *dip) 359 0 stevel { 360 0 stevel return (DEVI(dip)->devi_mdi_component); 361 0 stevel } 362 0 stevel 363 0 stevel /* 364 0 stevel * mdi_vhci_register(): 365 0 stevel * Register a vHCI module with the mpxio framework 366 0 stevel * mdi_vhci_register() is called by vHCI drivers to register the 367 0 stevel * 'class_driver' vHCI driver and its MDI entrypoints with the 368 0 stevel * mpxio framework. The vHCI driver must call this interface as 369 0 stevel * part of its attach(9e) handler. 370 0 stevel * Competing threads may try to attach mdi_vhci_register() as 371 0 stevel * the vHCI drivers are loaded and attached as a result of pHCI 372 0 stevel * driver instance registration (mdi_phci_register()) with the 373 0 stevel * framework. 374 0 stevel * Return Values: 375 0 stevel * MDI_SUCCESS 376 0 stevel * MDI_FAILURE 377 0 stevel */ 378 0 stevel /*ARGSUSED*/ 379 0 stevel int 380 0 stevel mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 381 0 stevel int flags) 382 0 stevel { 383 0 stevel mdi_vhci_t *vh = NULL; 384 0 stevel 385 9167 Randall /* Registrant can't be older */ 386 9167 Randall ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 387 9167 Randall 388 8082 Ramaswamy #ifdef DEBUG 389 8082 Ramaswamy /* 390 8082 Ramaswamy * IB nexus driver is loaded only when IB hardware is present. 391 8082 Ramaswamy * In order to be able to do this there is a need to drive the loading 392 8082 Ramaswamy * and attaching of the IB nexus driver (especially when an IB hardware 393 8082 Ramaswamy * is dynamically plugged in) when an IB HCA driver (PHCI) 394 8082 Ramaswamy * is being attached. Unfortunately this gets into the limitations 395 8082 Ramaswamy * of devfs as there seems to be no clean way to drive configuration 396 8082 Ramaswamy * of a subtree from another subtree of a devfs. Hence, do not ASSERT 397 8082 Ramaswamy * for IB. 398 8082 Ramaswamy */ 399 8082 Ramaswamy if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 400 8082 Ramaswamy ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 401 8082 Ramaswamy #endif 402 0 stevel 403 0 stevel i_mdi_init(); 404 0 stevel 405 0 stevel mutex_enter(&mdi_mutex); 406 0 stevel /* 407 0 stevel * Scan for already registered vhci 408 0 stevel */ 409 0 stevel for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 410 0 stevel if (strcmp(vh->vh_class, class) == 0) { 411 0 stevel /* 412 0 stevel * vHCI has already been created. Check for valid 413 0 stevel * vHCI ops registration. We only support one vHCI 414 0 stevel * module per class 415 0 stevel */ 416 0 stevel if (vh->vh_ops != NULL) { 417 0 stevel mutex_exit(&mdi_mutex); 418 0 stevel cmn_err(CE_NOTE, vhci_greeting, class); 419 0 stevel return (MDI_FAILURE); 420 0 stevel } 421 0 stevel break; 422 0 stevel } 423 0 stevel } 424 0 stevel 425 0 stevel /* 426 0 stevel * if not yet created, create the vHCI component 427 0 stevel */ 428 0 stevel if (vh == NULL) { 429 0 stevel struct client_hash *hash = NULL; 430 0 stevel char *load_balance; 431 0 stevel 432 0 stevel /* 433 0 stevel * Allocate and initialize the mdi extensions 434 0 stevel */ 435 0 stevel vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 436 0 stevel hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 437 0 stevel KM_SLEEP); 438 0 stevel vh->vh_client_table = hash; 439 0 stevel vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 440 0 stevel (void) strcpy(vh->vh_class, class); 441 0 stevel vh->vh_lb = LOAD_BALANCE_RR; 442 0 stevel if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 443 0 stevel 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 444 0 stevel if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 445 0 stevel vh->vh_lb = LOAD_BALANCE_NONE; 446 0 stevel } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 447 0 stevel == 0) { 448 0 stevel vh->vh_lb = LOAD_BALANCE_LBA; 449 0 stevel } 450 0 stevel ddi_prop_free(load_balance); 451 0 stevel } 452 0 stevel 453 2155 cth mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 454 2155 cth mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 455 2155 cth 456 0 stevel /* 457 0 stevel * Store the vHCI ops vectors 458 0 stevel */ 459 0 stevel vh->vh_dip = vdip; 460 0 stevel vh->vh_ops = vops; 461 0 stevel 462 878 ramat setup_vhci_cache(vh); 463 0 stevel 464 0 stevel if (mdi_vhci_head == NULL) { 465 0 stevel mdi_vhci_head = vh; 466 0 stevel } 467 0 stevel if (mdi_vhci_tail) { 468 0 stevel mdi_vhci_tail->vh_next = vh; 469 0 stevel } 470 0 stevel mdi_vhci_tail = vh; 471 0 stevel mdi_vhci_count++; 472 0 stevel } 473 0 stevel 474 0 stevel /* 475 0 stevel * Claim the devfs node as a vhci component 476 0 stevel */ 477 0 stevel DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 478 0 stevel 479 0 stevel /* 480 0 stevel * Initialize our back reference from dev_info node 481 0 stevel */ 482 0 stevel DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 483 0 stevel mutex_exit(&mdi_mutex); 484 0 stevel return (MDI_SUCCESS); 485 0 stevel } 486 0 stevel 487 0 stevel /* 488 0 stevel * mdi_vhci_unregister(): 489 0 stevel * Unregister a vHCI module from mpxio framework 490 0 stevel * mdi_vhci_unregister() is called from the detach(9E) entrypoint 491 0 stevel * of a vhci to unregister it from the framework. 492 0 stevel * Return Values: 493 0 stevel * MDI_SUCCESS 494 0 stevel * MDI_FAILURE 495 0 stevel */ 496 0 stevel /*ARGSUSED*/ 497 0 stevel int 498 0 stevel mdi_vhci_unregister(dev_info_t *vdip, int flags) 499 0 stevel { 500 0 stevel mdi_vhci_t *found, *vh, *prev = NULL; 501 0 stevel 502 2155 cth ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 503 2155 cth 504 0 stevel /* 505 0 stevel * Check for invalid VHCI 506 0 stevel */ 507 0 stevel if ((vh = i_devi_get_vhci(vdip)) == NULL) 508 0 stevel return (MDI_FAILURE); 509 0 stevel 510 2155 cth /* 511 2155 cth * Scan the list of registered vHCIs for a match 512 2155 cth */ 513 2009 dm120769 mutex_enter(&mdi_mutex); 514 0 stevel for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 515 0 stevel if (found == vh) 516 0 stevel break; 517 0 stevel prev = found; 518 0 stevel } 519 0 stevel 520 0 stevel if (found == NULL) { 521 0 stevel mutex_exit(&mdi_mutex); 522 0 stevel return (MDI_FAILURE); 523 0 stevel } 524 0 stevel 525 0 stevel /* 526 893 rs135747 * Check the vHCI, pHCI and client count. All the pHCIs and clients 527 0 stevel * should have been unregistered, before a vHCI can be 528 0 stevel * unregistered. 529 0 stevel */ 530 2155 cth MDI_VHCI_PHCI_LOCK(vh); 531 2155 cth if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 532 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 533 2155 cth mutex_exit(&mdi_mutex); 534 2155 cth return (MDI_FAILURE); 535 2155 cth } 536 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 537 2155 cth 538 2155 cth if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 539 0 stevel mutex_exit(&mdi_mutex); 540 0 stevel return (MDI_FAILURE); 541 0 stevel } 542 0 stevel 543 0 stevel /* 544 0 stevel * Remove the vHCI from the global list 545 0 stevel */ 546 0 stevel if (vh == mdi_vhci_head) { 547 0 stevel mdi_vhci_head = vh->vh_next; 548 0 stevel } else { 549 0 stevel prev->vh_next = vh->vh_next; 550 0 stevel } 551 0 stevel if (vh == mdi_vhci_tail) { 552 0 stevel mdi_vhci_tail = prev; 553 0 stevel } 554 878 ramat mdi_vhci_count--; 555 878 ramat mutex_exit(&mdi_mutex); 556 878 ramat 557 0 stevel vh->vh_ops = NULL; 558 0 stevel DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 559 0 stevel DEVI(vdip)->devi_mdi_xhci = NULL; 560 0 stevel kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 561 0 stevel kmem_free(vh->vh_client_table, 562 0 stevel mdi_client_table_size * sizeof (struct client_hash)); 563 2155 cth mutex_destroy(&vh->vh_phci_mutex); 564 2155 cth mutex_destroy(&vh->vh_client_mutex); 565 1140 llai1 566 0 stevel kmem_free(vh, sizeof (mdi_vhci_t)); 567 0 stevel return (MDI_SUCCESS); 568 0 stevel } 569 0 stevel 570 0 stevel /* 571 0 stevel * i_mdi_vhci_class2vhci(): 572 0 stevel * Look for a matching vHCI module given a vHCI class name 573 0 stevel * Return Values: 574 0 stevel * Handle to a vHCI component 575 0 stevel * NULL 576 0 stevel */ 577 0 stevel static mdi_vhci_t * 578 0 stevel i_mdi_vhci_class2vhci(char *class) 579 0 stevel { 580 0 stevel mdi_vhci_t *vh = NULL; 581 0 stevel 582 0 stevel ASSERT(!MUTEX_HELD(&mdi_mutex)); 583 0 stevel 584 0 stevel mutex_enter(&mdi_mutex); 585 0 stevel for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 586 0 stevel if (strcmp(vh->vh_class, class) == 0) { 587 0 stevel break; 588 0 stevel } 589 0 stevel } 590 0 stevel mutex_exit(&mdi_mutex); 591 0 stevel return (vh); 592 0 stevel } 593 0 stevel 594 0 stevel /* 595 0 stevel * i_devi_get_vhci(): 596 0 stevel * Utility function to get the handle to a vHCI component 597 0 stevel * Return Values: 598 0 stevel * Handle to a vHCI component 599 0 stevel * NULL 600 0 stevel */ 601 0 stevel mdi_vhci_t * 602 0 stevel i_devi_get_vhci(dev_info_t *vdip) 603 0 stevel { 604 0 stevel mdi_vhci_t *vh = NULL; 605 0 stevel if (MDI_VHCI(vdip)) { 606 0 stevel vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 607 0 stevel } 608 0 stevel return (vh); 609 0 stevel } 610 0 stevel 611 0 stevel /* 612 0 stevel * mdi_phci_register(): 613 0 stevel * Register a pHCI module with mpxio framework 614 0 stevel * mdi_phci_register() is called by pHCI drivers to register with 615 0 stevel * the mpxio framework and a specific 'class_driver' vHCI. The 616 0 stevel * pHCI driver must call this interface as part of its attach(9e) 617 0 stevel * handler. 618 0 stevel * Return Values: 619 0 stevel * MDI_SUCCESS 620 0 stevel * MDI_FAILURE 621 0 stevel */ 622 0 stevel /*ARGSUSED*/ 623 0 stevel int 624 0 stevel mdi_phci_register(char *class, dev_info_t *pdip, int flags) 625 0 stevel { 626 0 stevel mdi_phci_t *ph; 627 0 stevel mdi_vhci_t *vh; 628 0 stevel char *data; 629 2155 cth 630 2155 cth /* 631 2155 cth * Some subsystems, like fcp, perform pHCI registration from a 632 2155 cth * different thread than the one doing the pHCI attach(9E) - the 633 2155 cth * driver attach code is waiting for this other thread to complete. 634 2155 cth * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 635 2155 cth * (indicating that some thread has done an ndi_devi_enter of parent) 636 2155 cth * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 637 2155 cth */ 638 2155 cth ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 639 0 stevel 640 0 stevel /* 641 0 stevel * Check for mpxio-disable property. Enable mpxio if the property is 642 0 stevel * missing or not set to "yes". 643 0 stevel * If the property is set to "yes" then emit a brief message. 644 0 stevel */ 645 0 stevel if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 646 0 stevel &data) == DDI_SUCCESS)) { 647 0 stevel if (strcmp(data, "yes") == 0) { 648 10696 David MDI_DEBUG(1, (MDI_CONT, pdip, 649 10696 David "?multipath capabilities disabled via %s.conf.", 650 0 stevel ddi_driver_name(pdip))); 651 0 stevel ddi_prop_free(data); 652 0 stevel return (MDI_FAILURE); 653 0 stevel } 654 0 stevel ddi_prop_free(data); 655 0 stevel } 656 0 stevel 657 0 stevel /* 658 0 stevel * Search for a matching vHCI 659 0 stevel */ 660 0 stevel vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 661 0 stevel if (vh == NULL) { 662 0 stevel return (MDI_FAILURE); 663 0 stevel } 664 0 stevel 665 0 stevel ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 666 0 stevel mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 667 0 stevel ph->ph_dip = pdip; 668 0 stevel ph->ph_vhci = vh; 669 0 stevel ph->ph_next = NULL; 670 0 stevel ph->ph_unstable = 0; 671 0 stevel ph->ph_vprivate = 0; 672 0 stevel cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 673 2155 cth 674 2155 cth MDI_PHCI_LOCK(ph); 675 0 stevel MDI_PHCI_SET_POWER_UP(ph); 676 2155 cth MDI_PHCI_UNLOCK(ph); 677 0 stevel DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 678 0 stevel DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 679 0 stevel 680 878 ramat vhcache_phci_add(vh->vh_config, ph); 681 878 ramat 682 2155 cth MDI_VHCI_PHCI_LOCK(vh); 683 0 stevel if (vh->vh_phci_head == NULL) { 684 0 stevel vh->vh_phci_head = ph; 685 0 stevel } 686 0 stevel if (vh->vh_phci_tail) { 687 0 stevel vh->vh_phci_tail->ph_next = ph; 688 0 stevel } 689 0 stevel vh->vh_phci_tail = ph; 690 0 stevel vh->vh_phci_count++; 691 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 692 2155 cth 693 893 rs135747 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 694 0 stevel return (MDI_SUCCESS); 695 0 stevel } 696 0 stevel 697 0 stevel /* 698 0 stevel * mdi_phci_unregister(): 699 0 stevel * Unregister a pHCI module from mpxio framework 700 0 stevel * mdi_phci_unregister() is called by the pHCI drivers from their 701 0 stevel * detach(9E) handler to unregister their instances from the 702 0 stevel * framework. 703 0 stevel * Return Values: 704 0 stevel * MDI_SUCCESS 705 0 stevel * MDI_FAILURE 706 0 stevel */ 707 0 stevel /*ARGSUSED*/ 708 0 stevel int 709 0 stevel mdi_phci_unregister(dev_info_t *pdip, int flags) 710 0 stevel { 711 0 stevel mdi_vhci_t *vh; 712 0 stevel mdi_phci_t *ph; 713 0 stevel mdi_phci_t *tmp; 714 0 stevel mdi_phci_t *prev = NULL; 715 10696 David mdi_pathinfo_t *pip; 716 2155 cth 717 2155 cth ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 718 0 stevel 719 0 stevel ph = i_devi_get_phci(pdip); 720 0 stevel if (ph == NULL) { 721 10696 David MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 722 0 stevel return (MDI_FAILURE); 723 0 stevel } 724 0 stevel 725 0 stevel vh = ph->ph_vhci; 726 0 stevel ASSERT(vh != NULL); 727 0 stevel if (vh == NULL) { 728 10696 David MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 729 0 stevel return (MDI_FAILURE); 730 0 stevel } 731 0 stevel 732 2155 cth MDI_VHCI_PHCI_LOCK(vh); 733 0 stevel tmp = vh->vh_phci_head; 734 0 stevel while (tmp) { 735 0 stevel if (tmp == ph) { 736 0 stevel break; 737 0 stevel } 738 0 stevel prev = tmp; 739 0 stevel tmp = tmp->ph_next; 740 0 stevel } 741 0 stevel 742 0 stevel if (ph == vh->vh_phci_head) { 743 0 stevel vh->vh_phci_head = ph->ph_next; 744 0 stevel } else { 745 0 stevel prev->ph_next = ph->ph_next; 746 0 stevel } 747 0 stevel 748 0 stevel if (ph == vh->vh_phci_tail) { 749 0 stevel vh->vh_phci_tail = prev; 750 0 stevel } 751 0 stevel 752 0 stevel vh->vh_phci_count--; 753 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 754 10696 David 755 10696 David /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 756 10696 David MDI_PHCI_LOCK(ph); 757 10696 David for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 758 10696 David pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 759 10696 David MDI_PI(pip)->pi_phci = NULL; 760 10696 David MDI_PHCI_UNLOCK(ph); 761 878 ramat 762 893 rs135747 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 763 893 rs135747 ESC_DDI_INITIATOR_UNREGISTER); 764 878 ramat vhcache_phci_remove(vh->vh_config, ph); 765 0 stevel cv_destroy(&ph->ph_unstable_cv); 766 0 stevel mutex_destroy(&ph->ph_mutex); 767 0 stevel kmem_free(ph, sizeof (mdi_phci_t)); 768 0 stevel DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 769 0 stevel DEVI(pdip)->devi_mdi_xhci = NULL; 770 0 stevel return (MDI_SUCCESS); 771 0 stevel } 772 0 stevel 773 0 stevel /* 774 0 stevel * i_devi_get_phci(): 775 0 stevel * Utility function to return the phci extensions. 776 0 stevel */ 777 0 stevel static mdi_phci_t * 778 0 stevel i_devi_get_phci(dev_info_t *pdip) 779 0 stevel { 780 0 stevel mdi_phci_t *ph = NULL; 781 9167 Randall 782 0 stevel if (MDI_PHCI(pdip)) { 783 0 stevel ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 784 0 stevel } 785 0 stevel return (ph); 786 0 stevel } 787 0 stevel 788 0 stevel /* 789 2155 cth * Single thread mdi entry into devinfo node for modifying its children. 790 2155 cth * If necessary we perform an ndi_devi_enter of the vHCI before doing 791 2155 cth * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 792 2155 cth * for the vHCI and one for the pHCI. 793 2155 cth */ 794 2155 cth void 795 2155 cth mdi_devi_enter(dev_info_t *phci_dip, int *circular) 796 2155 cth { 797 2155 cth dev_info_t *vdip; 798 2155 cth int vcircular, pcircular; 799 2155 cth 800 2155 cth /* Verify calling context */ 801 2155 cth ASSERT(MDI_PHCI(phci_dip)); 802 2155 cth vdip = mdi_devi_get_vdip(phci_dip); 803 2155 cth ASSERT(vdip); /* A pHCI always has a vHCI */ 804 2155 cth 805 2155 cth /* 806 2155 cth * If pHCI is detaching then the framework has already entered the 807 2155 cth * vHCI on a threads that went down the code path leading to 808 2155 cth * detach_node(). This framework enter of the vHCI during pHCI 809 2155 cth * detach is done to avoid deadlock with vHCI power management 810 2155 cth * operations which enter the vHCI and the enter down the path 811 2155 cth * to the pHCI. If pHCI is detaching then we piggyback this calls 812 2155 cth * enter of the vHCI on frameworks vHCI enter that has already 813 2155 cth * occurred - this is OK because we know that the framework thread 814 2155 cth * doing detach is waiting for our completion. 815 2155 cth * 816 2155 cth * We should DEVI_IS_DETACHING under an enter of the parent to avoid 817 2155 cth * race with detach - but we can't do that because the framework has 818 2155 cth * already entered the parent, so we have some complexity instead. 819 2155 cth */ 820 2155 cth for (;;) { 821 2155 cth if (ndi_devi_tryenter(vdip, &vcircular)) { 822 2155 cth ASSERT(vcircular != -1); 823 2155 cth if (DEVI_IS_DETACHING(phci_dip)) { 824 2155 cth ndi_devi_exit(vdip, vcircular); 825 2155 cth vcircular = -1; 826 2155 cth } 827 2155 cth break; 828 2155 cth } else if (DEVI_IS_DETACHING(phci_dip)) { 829 2155 cth vcircular = -1; 830 2155 cth break; 831 10696 David } else if (servicing_interrupt()) { 832 10696 David /* 833 10696 David * Don't delay an interrupt (and ensure adaptive 834 10696 David * mutex inversion support). 835 10696 David */ 836 10696 David ndi_devi_enter(vdip, &vcircular); 837 10696 David break; 838 10696 David } else { 839 11052 Chris delay_random(mdi_delay); 840 2155 cth } 841 2155 cth } 842 2155 cth 843 2155 cth ndi_devi_enter(phci_dip, &pcircular); 844 2155 cth *circular = (vcircular << 16) | (pcircular & 0xFFFF); 845 9167 Randall } 846 9167 Randall 847 9167 Randall /* 848 9167 Randall * Attempt to mdi_devi_enter. 849 9167 Randall */ 850 9167 Randall int 851 9167 Randall mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 852 9167 Randall { 853 9167 Randall dev_info_t *vdip; 854 9167 Randall int vcircular, pcircular; 855 9167 Randall 856 9167 Randall /* Verify calling context */ 857 9167 Randall ASSERT(MDI_PHCI(phci_dip)); 858 9167 Randall vdip = mdi_devi_get_vdip(phci_dip); 859 9167 Randall ASSERT(vdip); /* A pHCI always has a vHCI */ 860 9167 Randall 861 9167 Randall if (ndi_devi_tryenter(vdip, &vcircular)) { 862 9167 Randall if (ndi_devi_tryenter(phci_dip, &pcircular)) { 863 9167 Randall *circular = (vcircular << 16) | (pcircular & 0xFFFF); 864 9167 Randall return (1); /* locked */ 865 9167 Randall } 866 9167 Randall ndi_devi_exit(vdip, vcircular); 867 9167 Randall } 868 9167 Randall return (0); /* busy */ 869 2155 cth } 870 2155 cth 871 2155 cth /* 872 2155 cth * Release mdi_devi_enter or successful mdi_devi_tryenter. 873 2155 cth */ 874 2155 cth void 875 2155 cth mdi_devi_exit(dev_info_t *phci_dip, int circular) 876 2155 cth { 877 2155 cth dev_info_t *vdip; 878 2155 cth int vcircular, pcircular; 879 2155 cth 880 2155 cth /* Verify calling context */ 881 2155 cth ASSERT(MDI_PHCI(phci_dip)); 882 2155 cth vdip = mdi_devi_get_vdip(phci_dip); 883 2155 cth ASSERT(vdip); /* A pHCI always has a vHCI */ 884 2155 cth 885 2155 cth /* extract two circular recursion values from single int */ 886 2155 cth pcircular = (short)(circular & 0xFFFF); 887 2155 cth vcircular = (short)((circular >> 16) & 0xFFFF); 888 2155 cth 889 2155 cth ndi_devi_exit(phci_dip, pcircular); 890 2155 cth if (vcircular != -1) 891 2155 cth ndi_devi_exit(vdip, vcircular); 892 2155 cth } 893 2155 cth 894 2155 cth /* 895 2155 cth * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 896 2155 cth * around a pHCI drivers calls to mdi_pi_online/offline, after holding 897 2155 cth * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 898 2155 cth * with vHCI power management code during path online/offline. Each 899 2155 cth * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 900 2155 cth * occur within the scope of an active mdi_devi_enter that establishes the 901 2155 cth * circular value. 902 2155 cth */ 903 2155 cth void 904 2155 cth mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 905 2155 cth { 906 2155 cth int pcircular; 907 2155 cth 908 2155 cth /* Verify calling context */ 909 2155 cth ASSERT(MDI_PHCI(phci_dip)); 910 2155 cth 911 10696 David /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 912 10696 David ndi_hold_devi(phci_dip); 913 10696 David 914 2155 cth pcircular = (short)(circular & 0xFFFF); 915 2155 cth ndi_devi_exit(phci_dip, pcircular); 916 2155 cth } 917 2155 cth 918 2155 cth void 919 2155 cth mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 920 2155 cth { 921 2155 cth int pcircular; 922 2155 cth 923 2155 cth /* Verify calling context */ 924 2155 cth ASSERT(MDI_PHCI(phci_dip)); 925 2155 cth 926 2155 cth ndi_devi_enter(phci_dip, &pcircular); 927 10696 David 928 10696 David /* Drop hold from mdi_devi_exit_phci. */ 929 10696 David ndi_rele_devi(phci_dip); 930 2155 cth 931 2155 cth /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 932 2155 cth ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 933 2155 cth } 934 2155 cth 935 2155 cth /* 936 2155 cth * mdi_devi_get_vdip(): 937 2155 cth * given a pHCI dip return vHCI dip 938 2155 cth */ 939 2155 cth dev_info_t * 940 2155 cth mdi_devi_get_vdip(dev_info_t *pdip) 941 2155 cth { 942 2155 cth mdi_phci_t *ph; 943 2155 cth 944 2155 cth ph = i_devi_get_phci(pdip); 945 2155 cth if (ph && ph->ph_vhci) 946 2155 cth return (ph->ph_vhci->vh_dip); 947 2155 cth return (NULL); 948 2155 cth } 949 2155 cth 950 2155 cth /* 951 2155 cth * mdi_devi_pdip_entered(): 952 2155 cth * Return 1 if we are vHCI and have done an ndi_devi_enter 953 2155 cth * of a pHCI 954 2155 cth */ 955 2155 cth int 956 2155 cth mdi_devi_pdip_entered(dev_info_t *vdip) 957 2155 cth { 958 2155 cth mdi_vhci_t *vh; 959 2155 cth mdi_phci_t *ph; 960 2155 cth 961 2155 cth vh = i_devi_get_vhci(vdip); 962 2155 cth if (vh == NULL) 963 2155 cth return (0); 964 2155 cth 965 2155 cth MDI_VHCI_PHCI_LOCK(vh); 966 2155 cth ph = vh->vh_phci_head; 967 2155 cth while (ph) { 968 2155 cth if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 969 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 970 2155 cth return (1); 971 2155 cth } 972 2155 cth ph = ph->ph_next; 973 2155 cth } 974 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 975 2155 cth return (0); 976 2155 cth } 977 2155 cth 978 2155 cth /* 979 0 stevel * mdi_phci_path2devinfo(): 980 0 stevel * Utility function to search for a valid phci device given 981 0 stevel * the devfs pathname. 982 0 stevel */ 983 0 stevel dev_info_t * 984 0 stevel mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 985 0 stevel { 986 0 stevel char *temp_pathname; 987 0 stevel mdi_vhci_t *vh; 988 0 stevel mdi_phci_t *ph; 989 0 stevel dev_info_t *pdip = NULL; 990 0 stevel 991 0 stevel vh = i_devi_get_vhci(vdip); 992 0 stevel ASSERT(vh != NULL); 993 0 stevel 994 0 stevel if (vh == NULL) { 995 0 stevel /* 996 0 stevel * Invalid vHCI component, return failure 997 0 stevel */ 998 0 stevel return (NULL); 999 0 stevel } 1000 0 stevel 1001 0 stevel temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1002 2155 cth MDI_VHCI_PHCI_LOCK(vh); 1003 0 stevel ph = vh->vh_phci_head; 1004 0 stevel while (ph != NULL) { 1005 0 stevel pdip = ph->ph_dip; 1006 0 stevel ASSERT(pdip != NULL); 1007 0 stevel *temp_pathname = '\0'; 1008 0 stevel (void) ddi_pathname(pdip, temp_pathname); 1009 0 stevel if (strcmp(temp_pathname, pathname) == 0) { 1010 0 stevel break; 1011 0 stevel } 1012 0 stevel ph = ph->ph_next; 1013 0 stevel } 1014 0 stevel if (ph == NULL) { 1015 0 stevel pdip = NULL; 1016 0 stevel } 1017 2155 cth MDI_VHCI_PHCI_UNLOCK(vh); 1018 0 stevel kmem_free(temp_pathname, MAXPATHLEN); 1019 0 stevel return (pdip); 1020 0 stevel } 1021 0 stevel 1022 0 stevel /* 1023 0 stevel * mdi_phci_get_path_count(): 1024 0 stevel * get number of path information nodes associated with a given 1025 0 stevel * pHCI device. 1026 0 stevel */ 1027 0 stevel int 1028 0 stevel mdi_phci_get_path_count(dev_info_t *pdip) 1029 0 stevel { 1030 0 stevel mdi_phci_t *ph; 1031 0 stevel int count = 0; 1032 0 stevel 1033 0 stevel ph = i_devi_get_phci(pdip); 1034 0 stevel if (ph != NULL) { 1035 0 stevel count = ph->ph_path_count; 1036 0 stevel } 1037 0 stevel return (count); 1038 0 stevel } 1039 0 stevel 1040 0 stevel /* 1041 0 stevel * i_mdi_phci_lock(): 1042 0 stevel * Lock a pHCI device 1043 0 stevel * Return Values: 1044 0 stevel * None 1045 0 stevel * Note: 1046 0 stevel * The default locking order is: 1047 0 stevel * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1048 0 stevel * But there are number of situations where locks need to be 1049 0 stevel * grabbed in reverse order. This routine implements try and lock 1050 0 stevel * mechanism depending on the requested parameter option. 1051 0 stevel */ 1052 0 stevel static void 1053 0 stevel i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1054 0 stevel { 1055 0 stevel if (pip) { 1056 0 stevel /* Reverse locking is requested. */ 1057 0 stevel while (MDI_PHCI_TRYLOCK(ph) == 0) { 1058 10696 David if (servicing_interrupt()) { 1059 10696 David MDI_PI_HOLD(pip); 1060 10696 David MDI_PI_UNLOCK(pip); 1061 10696 David MDI_PHCI_LOCK(ph); 1062 10696 David MDI_PI_LOCK(pip); 1063 10696 David MDI_PI_RELE(pip); 1064 10696 David break; 1065 10696 David } else { 1066 10696 David /* 1067 10696 David * tryenter failed. Try to grab again 1068 10696 David * after a small delay 1069 10696 David */ 1070 10696 David MDI_PI_HOLD(pip); 1071 10696 David MDI_PI_UNLOCK(pip); 1072 11052 Chris delay_random(mdi_delay); 1073 10696 David MDI_PI_LOCK(pip); 1074 10696 David MDI_PI_RELE(pip); 1075 10696 David } 1076 0 stevel } 1077 0 stevel } else { 1078 0 stevel MDI_PHCI_LOCK(ph); 1079 0 stevel } 1080 0 stevel } 1081 0 stevel 1082 0 stevel /* 1083 0 stevel * i_mdi_phci_unlock(): 1084 0 stevel * Unlock the pHCI component 1085 0 stevel */ 1086 0 stevel static void 1087 0 stevel i_mdi_phci_unlock(mdi_phci_t *ph) 1088 0 stevel { 1089 0 stevel MDI_PHCI_UNLOCK(ph); 1090 0 stevel } 1091 0 stevel 1092 0 stevel /* 1093 0 stevel * i_mdi_devinfo_create(): 1094 0 stevel * create client device's devinfo node 1095 0 stevel * Return Values: 1096 0 stevel * dev_info 1097 0 stevel * NULL 1098 0 stevel * Notes: 1099 0 stevel */ 1100 0 stevel static dev_info_t * 1101 0 stevel i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1102 878 ramat char **compatible, int ncompatible) 1103 0 stevel { 1104 0 stevel dev_info_t *cdip = NULL; 1105 0 stevel 1106 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1107 0 stevel 1108 0 stevel /* Verify for duplicate entry */ 1109 0 stevel cdip = i_mdi_devinfo_find(vh, name, guid); 1110 0 stevel ASSERT(cdip == NULL); 1111 0 stevel if (cdip) { 1112 0 stevel cmn_err(CE_WARN, 1113 10696 David "i_mdi_devinfo_create: client %s@%s already exists", 1114 10696 David name ? name : "", guid ? guid : ""); 1115 0 stevel } 1116 0 stevel 1117 878 ramat ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1118 0 stevel if (cdip == NULL) 1119 0 stevel goto fail; 1120 0 stevel 1121 0 stevel /* 1122 0 stevel * Create component type and Global unique identifier 1123 0 stevel * properties 1124 0 stevel */ 1125 0 stevel if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1126 0 stevel MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1127 0 stevel goto fail; 1128 0 stevel } 1129 0 stevel 1130 0 stevel /* Decorate the node with compatible property */ 1131 0 stevel if (compatible && 1132 0 stevel (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1133 0 stevel "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1134 0 stevel goto fail; 1135 0 stevel } 1136 0 stevel 1137 0 stevel return (cdip); 1138 0 stevel 1139 0 stevel fail: 1140 0 stevel if (cdip) { 1141 0 stevel (void) ndi_prop_remove_all(cdip); 1142 0 stevel (void) ndi_devi_free(cdip); 1143 0 stevel } 1144 0 stevel return (NULL); 1145 0 stevel } 1146 0 stevel 1147 0 stevel /* 1148 0 stevel * i_mdi_devinfo_find(): 1149 0 stevel * Find a matching devinfo node for given client node name 1150 0 stevel * and its guid. 1151 0 stevel * Return Values: 1152 0 stevel * Handle to a dev_info node or NULL 1153 0 stevel */ 1154 0 stevel static dev_info_t * 1155 0 stevel i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1156 0 stevel { 1157 0 stevel char *data; 1158 0 stevel dev_info_t *cdip = NULL; 1159 0 stevel dev_info_t *ndip = NULL; 1160 0 stevel int circular; 1161 0 stevel 1162 0 stevel ndi_devi_enter(vh->vh_dip, &circular); 1163 0 stevel ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1164 0 stevel while ((cdip = ndip) != NULL) { 1165 0 stevel ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1166 0 stevel 1167 0 stevel if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1168 0 stevel continue; 1169 0 stevel } 1170 0 stevel 1171 0 stevel if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1172 0 stevel DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1173 0 stevel &data) != DDI_PROP_SUCCESS) { 1174 0 stevel continue; 1175 0 stevel } 1176 0 stevel 1177 0 stevel if (strcmp(data, guid) != 0) { 1178 0 stevel ddi_prop_free(data); 1179 0 stevel continue; 1180 0 stevel } 1181 0 stevel ddi_prop_free(data); 1182 0 stevel break; 1183 0 stevel } 1184 0 stevel ndi_devi_exit(vh->vh_dip, circular); 1185 0 stevel return (cdip); 1186 0 stevel } 1187 0 stevel 1188 0 stevel /* 1189 0 stevel * i_mdi_devinfo_remove(): 1190 0 stevel * Remove a client device node 1191 0 stevel */ 1192 0 stevel static int 1193 0 stevel i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1194 0 stevel { 1195 0 stevel int rv = MDI_SUCCESS; 1196 2155 cth 1197 0 stevel if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1198 0 stevel (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1199 10696 David rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1200 0 stevel if (rv != NDI_SUCCESS) { 1201 10696 David MDI_DEBUG(1, (MDI_NOTE, cdip, 1202 10696 David "!failed: cdip %p", (void *)cdip)); 1203 0 stevel } 1204 0 stevel /* 1205 0 stevel * Convert to MDI error code 1206 0 stevel */ 1207 0 stevel switch (rv) { 1208 0 stevel case NDI_SUCCESS: 1209 0 stevel rv = MDI_SUCCESS; 1210 0 stevel break; 1211 0 stevel case NDI_BUSY: 1212 0 stevel rv = MDI_BUSY; 1213 0 stevel break; 1214 0 stevel default: 1215 0 stevel rv = MDI_FAILURE; 1216 0 stevel break; 1217 0 stevel } 1218 0 stevel } 1219 0 stevel return (rv); 1220 0 stevel } 1221 0 stevel 1222 0 stevel /* 1223 0 stevel * i_devi_get_client() 1224 0 stevel * Utility function to get mpxio component extensions 1225 0 stevel */ 1226 0 stevel static mdi_client_t * 1227 0 stevel i_devi_get_client(dev_info_t *cdip) 1228 0 stevel { 1229 0 stevel mdi_client_t *ct = NULL; 1230 2155 cth 1231 0 stevel if (MDI_CLIENT(cdip)) { 1232 0 stevel ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1233 0 stevel } 1234 0 stevel return (ct); 1235 0 stevel } 1236 0 stevel 1237 0 stevel /* 1238 0 stevel * i_mdi_is_child_present(): 1239 0 stevel * Search for the presence of client device dev_info node 1240 0 stevel */ 1241 0 stevel static int 1242 0 stevel i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1243 0 stevel { 1244 0 stevel int rv = MDI_FAILURE; 1245 0 stevel struct dev_info *dip; 1246 0 stevel int circular; 1247 0 stevel 1248 0 stevel ndi_devi_enter(vdip, &circular); 1249 0 stevel dip = DEVI(vdip)->devi_child; 1250 0 stevel while (dip) { 1251 0 stevel if (dip == DEVI(cdip)) { 1252 0 stevel rv = MDI_SUCCESS; 1253 0 stevel break; 1254 0 stevel } 1255 0 stevel dip = dip->devi_sibling; 1256 0 stevel } 1257 0 stevel ndi_devi_exit(vdip, circular); 1258 0 stevel return (rv); 1259 0 stevel } 1260 0 stevel 1261 0 stevel 1262 0 stevel /* 1263 0 stevel * i_mdi_client_lock(): 1264 0 stevel * Grab client component lock 1265 0 stevel * Return Values: 1266 0 stevel * None 1267 0 stevel * Note: 1268 0 stevel * The default locking order is: 1269 0 stevel * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1270 0 stevel * But there are number of situations where locks need to be 1271 0 stevel * grabbed in reverse order. This routine implements try and lock 1272 0 stevel * mechanism depending on the requested parameter option. 1273 0 stevel */ 1274 0 stevel static void 1275 0 stevel i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1276 0 stevel { 1277 0 stevel if (pip) { 1278 0 stevel /* 1279 0 stevel * Reverse locking is requested. 1280 0 stevel */ 1281 0 stevel while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1282 10696 David if (servicing_interrupt()) { 1283 10696 David MDI_PI_HOLD(pip); 1284 10696 David MDI_PI_UNLOCK(pip); 1285 10696 David MDI_CLIENT_LOCK(ct); 1286 10696 David MDI_PI_LOCK(pip); 1287 10696 David MDI_PI_RELE(pip); 1288 10696 David break; 1289 10696 David } else { 1290 10696 David /* 1291 10696 David * tryenter failed. Try to grab again 1292 10696 David * after a small delay 1293 10696 David */ 1294 10696 David MDI_PI_HOLD(pip); 1295 10696 David MDI_PI_UNLOCK(pip); 1296 11052 Chris delay_random(mdi_delay); 1297 10696 David MDI_PI_LOCK(pip); 1298 10696 David MDI_PI_RELE(pip); 1299 10696 David } 1300 0 stevel } 1301 0 stevel } else { 1302 0 stevel MDI_CLIENT_LOCK(ct); 1303 0 stevel } 1304 0 stevel } 1305 0 stevel 1306 0 stevel /* 1307 0 stevel * i_mdi_client_unlock(): 1308 0 stevel * Unlock a client component 1309 0 stevel */ 1310 0 stevel static void 1311 0 stevel i_mdi_client_unlock(mdi_client_t *ct) 1312 0 stevel { 1313 0 stevel MDI_CLIENT_UNLOCK(ct); 1314 0 stevel } 1315 0 stevel 1316 0 stevel /* 1317 0 stevel * i_mdi_client_alloc(): 1318 0 stevel * Allocate and initialize a client structure. Caller should 1319 2155 cth * hold the vhci client lock. 1320 0 stevel * Return Values: 1321 0 stevel * Handle to a client component 1322 0 stevel */ 1323 0 stevel /*ARGSUSED*/ 1324 0 stevel static mdi_client_t * 1325 878 ramat i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1326 878 ramat { 1327 878 ramat mdi_client_t *ct; 1328 0 stevel 1329 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1330 0 stevel 1331 0 stevel /* 1332 0 stevel * Allocate and initialize a component structure. 1333 0 stevel */ 1334 878 ramat ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1335 0 stevel mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1336 0 stevel ct->ct_hnext = NULL; 1337 0 stevel ct->ct_hprev = NULL; 1338 0 stevel ct->ct_dip = NULL; 1339 0 stevel ct->ct_vhci = vh; 1340 878 ramat ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1341 0 stevel (void) strcpy(ct->ct_drvname, name); 1342 878 ramat ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1343 0 stevel (void) strcpy(ct->ct_guid, lguid); 1344 0 stevel ct->ct_cprivate = NULL; 1345 0 stevel ct->ct_vprivate = NULL; 1346 0 stevel ct->ct_flags = 0; 1347 0 stevel ct->ct_state = MDI_CLIENT_STATE_FAILED; 1348 2155 cth MDI_CLIENT_LOCK(ct); 1349 0 stevel MDI_CLIENT_SET_OFFLINE(ct); 1350 0 stevel MDI_CLIENT_SET_DETACH(ct); 1351 0 stevel MDI_CLIENT_SET_POWER_UP(ct); 1352 2155 cth MDI_CLIENT_UNLOCK(ct); 1353 0 stevel ct->ct_failover_flags = 0; 1354 0 stevel ct->ct_failover_status = 0; 1355 0 stevel cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1356 0 stevel ct->ct_unstable = 0; 1357 0 stevel cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1358 0 stevel cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1359 0 stevel ct->ct_lb = vh->vh_lb; 1360 878 ramat ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1361 0 stevel ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1362 0 stevel ct->ct_path_count = 0; 1363 0 stevel ct->ct_path_head = NULL; 1364 0 stevel ct->ct_path_tail = NULL; 1365 0 stevel ct->ct_path_last = NULL; 1366 0 stevel 1367 0 stevel /* 1368 0 stevel * Add this client component to our client hash queue 1369 0 stevel */ 1370 0 stevel i_mdi_client_enlist_table(vh, ct); 1371 0 stevel return (ct); 1372 0 stevel } 1373 0 stevel 1374 0 stevel /* 1375 0 stevel * i_mdi_client_enlist_table(): 1376 0 stevel * Attach the client device to the client hash table. Caller 1377 2155 cth * should hold the vhci client lock. 1378 2155 cth */ 1379 0 stevel static void 1380 0 stevel i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1381 0 stevel { 1382 0 stevel int index; 1383 0 stevel struct client_hash *head; 1384 0 stevel 1385 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1386 2155 cth 1387 0 stevel index = i_mdi_get_hash_key(ct->ct_guid); 1388 0 stevel head = &vh->vh_client_table[index]; 1389 0 stevel ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1390 0 stevel head->ct_hash_head = ct; 1391 0 stevel head->ct_hash_count++; 1392 0 stevel vh->vh_client_count++; 1393 0 stevel } 1394 0 stevel 1395 0 stevel /* 1396 0 stevel * i_mdi_client_delist_table(): 1397 0 stevel * Attach the client device to the client hash table. 1398 2155 cth * Caller should hold the vhci client lock. 1399 2155 cth */ 1400 0 stevel static void 1401 0 stevel i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1402 0 stevel { 1403 0 stevel int index; 1404 0 stevel char *guid; 1405 0 stevel struct client_hash *head; 1406 0 stevel mdi_client_t *next; 1407 0 stevel mdi_client_t *last; 1408 0 stevel 1409 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1410 2155 cth 1411 0 stevel guid = ct->ct_guid; 1412 0 stevel index = i_mdi_get_hash_key(guid); 1413 0 stevel head = &vh->vh_client_table[index]; 1414 0 stevel 1415 0 stevel last = NULL; 1416 0 stevel next = (mdi_client_t *)head->ct_hash_head; 1417 0 stevel while (next != NULL) { 1418 0 stevel if (next == ct) { 1419 0 stevel break; 1420 0 stevel } 1421 0 stevel last = next; 1422 0 stevel next = next->ct_hnext; 1423 0 stevel } 1424 0 stevel 1425 0 stevel if (next) { 1426 0 stevel head->ct_hash_count--; 1427 0 stevel if (last == NULL) { 1428 0 stevel head->ct_hash_head = ct->ct_hnext; 1429 0 stevel } else { 1430 0 stevel last->ct_hnext = ct->ct_hnext; 1431 0 stevel } 1432 0 stevel ct->ct_hnext = NULL; 1433 0 stevel vh->vh_client_count--; 1434 0 stevel } 1435 0 stevel } 1436 0 stevel 1437 0 stevel 1438 0 stevel /* 1439 0 stevel * i_mdi_client_free(): 1440 0 stevel * Free a client component 1441 0 stevel */ 1442 0 stevel static int 1443 0 stevel i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1444 0 stevel { 1445 0 stevel int rv = MDI_SUCCESS; 1446 0 stevel int flags = ct->ct_flags; 1447 0 stevel dev_info_t *cdip; 1448 0 stevel dev_info_t *vdip; 1449 0 stevel 1450 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1451 2155 cth 1452 0 stevel vdip = vh->vh_dip; 1453 0 stevel cdip = ct->ct_dip; 1454 0 stevel 1455 0 stevel (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1456 0 stevel DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1457 0 stevel DEVI(cdip)->devi_mdi_client = NULL; 1458 0 stevel 1459 0 stevel /* 1460 0 stevel * Clear out back ref. to dev_info_t node 1461 0 stevel */ 1462 0 stevel ct->ct_dip = NULL; 1463 0 stevel 1464 0 stevel /* 1465 0 stevel * Remove this client from our hash queue 1466 0 stevel */ 1467 0 stevel i_mdi_client_delist_table(vh, ct); 1468 0 stevel 1469 0 stevel /* 1470 0 stevel * Uninitialize and free the component 1471 0 stevel */ 1472 0 stevel kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1473 0 stevel kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1474 0 stevel kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1475 0 stevel cv_destroy(&ct->ct_failover_cv); 1476 0 stevel cv_destroy(&ct->ct_unstable_cv); 1477 0 stevel cv_destroy(&ct->ct_powerchange_cv); 1478 0 stevel mutex_destroy(&ct->ct_mutex); 1479 0 stevel kmem_free(ct, sizeof (*ct)); 1480 0 stevel 1481 0 stevel if (cdip != NULL) { 1482 2155 cth MDI_VHCI_CLIENT_UNLOCK(vh); 1483 0 stevel (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1484 2155 cth MDI_VHCI_CLIENT_LOCK(vh); 1485 0 stevel } 1486 0 stevel return (rv); 1487 0 stevel } 1488 0 stevel 1489 0 stevel /* 1490 0 stevel * i_mdi_client_find(): 1491 0 stevel * Find the client structure corresponding to a given guid 1492 2155 cth * Caller should hold the vhci client lock. 1493 0 stevel */ 1494 0 stevel static mdi_client_t * 1495 878 ramat i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1496 0 stevel { 1497 0 stevel int index; 1498 0 stevel struct client_hash *head; 1499 0 stevel mdi_client_t *ct; 1500 0 stevel 1501 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1502 2155 cth 1503 0 stevel index = i_mdi_get_hash_key(guid); 1504 0 stevel head = &vh->vh_client_table[index]; 1505 0 stevel 1506 0 stevel ct = head->ct_hash_head; 1507 0 stevel while (ct != NULL) { 1508 878 ramat if (strcmp(ct->ct_guid, guid) == 0 && 1509 878 ramat (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1510 0 stevel break; 1511 0 stevel } 1512 0 stevel ct = ct->ct_hnext; 1513 0 stevel } 1514 0 stevel return (ct); 1515 0 stevel } 1516 0 stevel 1517 0 stevel /* 1518 0 stevel * i_mdi_client_update_state(): 1519 0 stevel * Compute and update client device state 1520 0 stevel * Notes: 1521 0 stevel * A client device can be in any of three possible states: 1522 0 stevel * 1523 0 stevel * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1524 0 stevel * one online/standby paths. Can tolerate failures. 1525 0 stevel * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1526 0 stevel * no alternate paths available as standby. A failure on the online 1527 0 stevel * would result in loss of access to device data. 1528 0 stevel * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1529 0 stevel * no paths available to access the device. 1530 0 stevel */ 1531 0 stevel static void 1532 0 stevel i_mdi_client_update_state(mdi_client_t *ct) 1533 0 stevel { 1534 0 stevel int state; 1535 2155 cth 1536 2155 cth ASSERT(MDI_CLIENT_LOCKED(ct)); 1537 0 stevel state = i_mdi_client_compute_state(ct, NULL); 1538 0 stevel MDI_CLIENT_SET_STATE(ct, state); 1539 0 stevel } 1540 0 stevel 1541 0 stevel /* 1542 0 stevel * i_mdi_client_compute_state(): 1543 0 stevel * Compute client device state 1544 0 stevel * 1545 0 stevel * mdi_phci_t * Pointer to pHCI structure which should 1546 0 stevel * while computing the new value. Used by 1547 0 stevel * i_mdi_phci_offline() to find the new 1548 0 stevel * client state after DR of a pHCI. 1549 0 stevel */ 1550 0 stevel static int 1551 0 stevel i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1552 0 stevel { 1553 0 stevel int state; 1554 0 stevel int online_count = 0; 1555 0 stevel int standby_count = 0; 1556 0 stevel mdi_pathinfo_t *pip, *next; 1557 0 stevel 1558 2155 cth ASSERT(MDI_CLIENT_LOCKED(ct)); 1559 0 stevel pip = ct->ct_path_head; 1560 0 stevel while (pip != NULL) { 1561 0 stevel MDI_PI_LOCK(pip); 1562 0 stevel next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1563 0 stevel if (MDI_PI(pip)->pi_phci == ph) { 1564 0 stevel MDI_PI_UNLOCK(pip); 1565 0 stevel pip = next; 1566 0 stevel continue; 1567 0 stevel } 1568 2155 cth 1569 0 stevel if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1570 0 stevel == MDI_PATHINFO_STATE_ONLINE) 1571 0 stevel online_count++; 1572 0 stevel else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1573 0 stevel == MDI_PATHINFO_STATE_STANDBY) 1574 0 stevel standby_count++; 1575 0 stevel MDI_PI_UNLOCK(pip); 1576 0 stevel pip = next; 1577 0 stevel } 1578 0 stevel 1579 0 stevel if (online_count == 0) { 1580 0 stevel if (standby_count == 0) { 1581 0 stevel state = MDI_CLIENT_STATE_FAILED; 1582 10696 David MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1583 10696 David "client state failed: ct = %p", (void *)ct)); 1584 0 stevel } else if (standby_count == 1) { 1585 0 stevel state = MDI_CLIENT_STATE_DEGRADED; 1586 0 stevel } else { 1587 0 stevel state = MDI_CLIENT_STATE_OPTIMAL; 1588 0 stevel } 1589 0 stevel } else if (online_count == 1) { 1590 0 stevel if (standby_count == 0) { 1591 0 stevel state = MDI_CLIENT_STATE_DEGRADED; 1592 0 stevel } else { 1593 0 stevel state = MDI_CLIENT_STATE_OPTIMAL; 1594 0 stevel } 1595 0 stevel } else { 1596 0 stevel state = MDI_CLIENT_STATE_OPTIMAL; 1597 0 stevel } 1598 0 stevel return (state); 1599 0 stevel } 1600 0 stevel 1601 0 stevel /* 1602 0 stevel * i_mdi_client2devinfo(): 1603 0 stevel * Utility function 1604 0 stevel */ 1605 0 stevel dev_info_t * 1606 0 stevel i_mdi_client2devinfo(mdi_client_t *ct) 1607 0 stevel { 1608 0 stevel return (ct->ct_dip); 1609 0 stevel } 1610 0 stevel 1611 0 stevel /* 1612 0 stevel * mdi_client_path2_devinfo(): 1613 0 stevel * Given the parent devinfo and child devfs pathname, search for 1614 0 stevel * a valid devfs node handle. 1615 0 stevel */ 1616 0 stevel dev_info_t * 1617 0 stevel mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1618 0 stevel { 1619 0 stevel dev_info_t *cdip = NULL; 1620 0 stevel dev_info_t *ndip = NULL; 1621 0 stevel char *temp_pathname; 1622 0 stevel int circular; 1623 0 stevel 1624 0 stevel /* 1625 0 stevel * Allocate temp buffer 1626 0 stevel */ 1627 0 stevel temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1628 0 stevel 1629 0 stevel /* 1630 0 stevel * Lock parent against changes 1631 0 stevel */ 1632 0 stevel ndi_devi_enter(vdip, &circular); 1633 0 stevel ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1634 0 stevel while ((cdip = ndip) != NULL) { 1635 0 stevel ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1636 0 stevel 1637 0 stevel *temp_pathname = '\0'; 1638 0 stevel (void) ddi_pathname(cdip, temp_pathname); 1639 0 stevel if (strcmp(temp_pathname, pathname) == 0) { 1640 0 stevel break; 1641 0 stevel } 1642 0 stevel } 1643 0 stevel /* 1644 0 stevel * Release devinfo lock 1645 0 stevel */ 1646 0 stevel ndi_devi_exit(vdip, circular); 1647 0 stevel 1648 0 stevel /* 1649 0 stevel * Free the temp buffer 1650 0 stevel */ 1651 0 stevel kmem_free(temp_pathname, MAXPATHLEN); 1652 0 stevel return (cdip); 1653 0 stevel } 1654 0 stevel 1655 0 stevel /* 1656 0 stevel * mdi_client_get_path_count(): 1657 0 stevel * Utility function to get number of path information nodes 1658 0 stevel * associated with a given client device. 1659 0 stevel */ 1660 0 stevel int 1661 0 stevel mdi_client_get_path_count(dev_info_t *cdip) 1662 0 stevel { 1663 0 stevel mdi_client_t *ct; 1664 0 stevel int count = 0; 1665 0 stevel 1666 0 stevel ct = i_devi_get_client(cdip); 1667 0 stevel if (ct != NULL) { 1668 0 stevel count = ct->ct_path_count; 1669 0 stevel } 1670 0 stevel return (count); 1671 0 stevel } 1672 0 stevel 1673 0 stevel 1674 0 stevel /* 1675 0 stevel * i_mdi_get_hash_key(): 1676 0 stevel * Create a hash using strings as keys 1677 0 stevel * 1678 0 stevel */ 1679 0 stevel static int 1680 0 stevel i_mdi_get_hash_key(char *str) 1681 0 stevel { 1682 0 stevel uint32_t g, hash = 0; 1683 0 stevel char *p; 1684 0 stevel 1685 0 stevel for (p = str; *p != '\0'; p++) { 1686 0 stevel g = *p; 1687 0 stevel hash += g; 1688 0 stevel } 1689 0 stevel return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1690 0 stevel } 1691 0 stevel 1692 0 stevel /* 1693 0 stevel * mdi_get_lb_policy(): 1694 0 stevel * Get current load balancing policy for a given client device 1695 0 stevel */ 1696 0 stevel client_lb_t 1697 0 stevel mdi_get_lb_policy(dev_info_t *cdip) 1698 0 stevel { 1699 0 stevel client_lb_t lb = LOAD_BALANCE_NONE; 1700 0 stevel mdi_client_t *ct; 1701 0 stevel 1702 0 stevel ct = i_devi_get_client(cdip); 1703 0 stevel if (ct != NULL) { 1704 0 stevel lb = ct->ct_lb; 1705 0 stevel } 1706 0 stevel return (lb); 1707 0 stevel } 1708 0 stevel 1709 0 stevel /* 1710 0 stevel * mdi_set_lb_region_size(): 1711 0 stevel * Set current region size for the load-balance 1712 0 stevel */ 1713 0 stevel int 1714 0 stevel mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1715 0 stevel { 1716 0 stevel mdi_client_t *ct; 1717 0 stevel int rv = MDI_FAILURE; 1718 0 stevel 1719 0 stevel ct = i_devi_get_client(cdip); 1720 0 stevel if (ct != NULL && ct->ct_lb_args != NULL) { 1721 0 stevel ct->ct_lb_args->region_size = region_size; 1722 0 stevel rv = MDI_SUCCESS; 1723 0 stevel } 1724 0 stevel return (rv); 1725 0 stevel } 1726 0 stevel 1727 0 stevel /* 1728 0 stevel * mdi_Set_lb_policy(): 1729 0 stevel * Set current load balancing policy for a given client device 1730 0 stevel */ 1731 0 stevel int 1732 0 stevel mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1733 0 stevel { 1734 0 stevel mdi_client_t *ct; 1735 0 stevel int rv = MDI_FAILURE; 1736 0 stevel 1737 0 stevel ct = i_devi_get_client(cdip); 1738 0 stevel if (ct != NULL) { 1739 0 stevel ct->ct_lb = lb; 1740 0 stevel rv = MDI_SUCCESS; 1741 0 stevel } 1742 0 stevel return (rv); 1743 0 stevel } 1744 0 stevel 1745 0 stevel /* 1746 0 stevel * mdi_failover(): 1747 0 stevel * failover function called by the vHCI drivers to initiate 1748 0 stevel * a failover operation. This is typically due to non-availability 1749 0 stevel * of online paths to route I/O requests. Failover can be 1750 0 stevel * triggered through user application also. 1751 0 stevel * 1752 0 stevel * The vHCI driver calls mdi_failover() to initiate a failover 1753 0 stevel * operation. mdi_failover() calls back into the vHCI driver's 1754 0 stevel * vo_failover() entry point to perform the actual failover 1755 0 stevel * operation. The reason for requiring the vHCI driver to 1756 0 stevel * initiate failover by calling mdi_failover(), instead of directly 1757 0 stevel * executing vo_failover() itself, is to ensure that the mdi 1758 0 stevel * framework can keep track of the client state properly. 1759 0 stevel * Additionally, mdi_failover() provides as a convenience the 1760 0 stevel * option of performing the failover operation synchronously or 1761 0 stevel * asynchronously 1762 0 stevel * 1763 0 stevel * Upon successful completion of the failover operation, the 1764 0 stevel * paths that were previously ONLINE will be in the STANDBY state, 1765 0 stevel * and the newly activated paths will be in the ONLINE state. 1766 0 stevel * 1767 0 stevel * The flags modifier determines whether the activation is done 1768 0 stevel * synchronously: MDI_FAILOVER_SYNC 1769 0 stevel * Return Values: 1770 0 stevel * MDI_SUCCESS 1771 0 stevel * MDI_FAILURE 1772 0 stevel * MDI_BUSY 1773 0 stevel */ 1774 0 stevel /*ARGSUSED*/ 1775 0 stevel int 1776 0 stevel mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1777 0 stevel { 1778 0 stevel int rv; 1779 0 stevel mdi_client_t *ct; 1780 0 stevel 1781 0 stevel ct = i_devi_get_client(cdip); 1782 0 stevel ASSERT(ct != NULL); 1783 0 stevel if (ct == NULL) { 1784 0 stevel /* cdip is not a valid client device. Nothing more to do. */ 1785 0 stevel return (MDI_FAILURE); 1786 0 stevel } 1787 0 stevel 1788 0 stevel MDI_CLIENT_LOCK(ct); 1789 0 stevel 1790 0 stevel if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1791 0 stevel /* A path to the client is being freed */ 1792 0 stevel MDI_CLIENT_UNLOCK(ct); 1793 0 stevel return (MDI_BUSY); 1794 0 stevel } 1795 0 stevel 1796 0 stevel 1797 0 stevel if (MDI_CLIENT_IS_FAILED(ct)) { 1798 0 stevel /* 1799 0 stevel * Client is in failed state. Nothing more to do. 1800 0 stevel */ 1801 0 stevel MDI_CLIENT_UNLOCK(ct); 1802 0 stevel return (MDI_FAILURE); 1803 0 stevel } 1804 0 stevel 1805 0 stevel if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1806 0 stevel /* 1807 0 stevel * Failover is already in progress; return BUSY 1808 0 stevel */ 1809 0 stevel MDI_CLIENT_UNLOCK(ct); 1810 0 stevel return (MDI_BUSY); 1811 0 stevel } 1812 0 stevel /* 1813 0 stevel * Make sure that mdi_pathinfo node state changes are processed. 1814 0 stevel * We do not allow failovers to progress while client path state 1815 0 stevel * changes are in progress 1816 0 stevel */ 1817 0 stevel if (ct->ct_unstable) { 1818 0 stevel if (flags == MDI_FAILOVER_ASYNC) { 1819 0 stevel MDI_CLIENT_UNLOCK(ct); 1820 0 stevel return (MDI_BUSY); 1821 0 stevel } else { 1822 0 stevel while (ct->ct_unstable) 1823 0 stevel cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1824 0 stevel } 1825 0 stevel } 1826 0 stevel 1827 0 stevel /* 1828 0 stevel * Client device is in stable state. Before proceeding, perform sanity 1829 0 stevel * checks again. 1830 0 stevel */ 1831 0 stevel if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1832 1333 cth (!i_ddi_devi_attached(ct->ct_dip))) { 1833 0 stevel /* 1834 0 stevel * Client is in failed state. Nothing more to do. 1835 0 stevel */ 1836 0 stevel MDI_CLIENT_UNLOCK(ct); 1837 0 stevel return (MDI_FAILURE); 1838 0 stevel } 1839 0 stevel 1840 0 stevel /* 1841 0 stevel * Set the client state as failover in progress. 1842 0 stevel */ 1843 0 stevel MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1844 0 stevel ct->ct_failover_flags = flags; 1845 0 stevel MDI_CLIENT_UNLOCK(ct); 1846 0 stevel 1847 0 stevel if (flags == MDI_FAILOVER_ASYNC) { 1848 0 stevel /* 1849 0 stevel * Submit the initiate failover request via CPR safe 1850 0 stevel * taskq threads. 1851 0 stevel */ 1852 0 stevel (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1853 0 stevel ct, KM_SLEEP); 1854 0 stevel return (MDI_ACCEPT); 1855 0 stevel } else { 1856 0 stevel /* 1857 0 stevel * Synchronous failover mode. Typically invoked from the user 1858 0 stevel * land. 1859 0 stevel */ 1860 0 stevel rv = i_mdi_failover(ct); 1861 0 stevel } 1862 0 stevel return (rv); 1863 0 stevel } 1864 0 stevel 1865 0 stevel /* 1866 0 stevel * i_mdi_failover(): 1867 0 stevel * internal failover function. Invokes vHCI drivers failover 1868 0 stevel * callback function and process the failover status 1869 0 stevel * Return Values: 1870 0 stevel * None 1871 0 stevel * 1872 0 stevel * Note: A client device in failover state can not be detached or freed. 1873 0 stevel */ 1874 0 stevel static int 1875 0 stevel i_mdi_failover(void *arg) 1876 0 stevel { 1877 0 stevel int rv = MDI_SUCCESS; 1878 0 stevel mdi_client_t *ct = (mdi_client_t *)arg; 1879 0 stevel mdi_vhci_t *vh = ct->ct_vhci; 1880 0 stevel 1881 2155 cth ASSERT(!MDI_CLIENT_LOCKED(ct)); 1882 0 stevel 1883 0 stevel if (vh->vh_ops->vo_failover != NULL) { 1884 0 stevel /* 1885 0 stevel * Call vHCI drivers callback routine 1886 0 stevel */ 1887 0 stevel rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1888 0 stevel ct->ct_failover_flags); 1889 0 stevel } 1890 0 stevel 1891 0 stevel MDI_CLIENT_LOCK(ct); 1892 0 stevel MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1893 0 stevel 1894 0 stevel /* 1895 0 stevel * Save the failover return status 1896 0 stevel */ 1897 0 stevel ct->ct_failover_status = rv; 1898 0 stevel 1899 0 stevel /* 1900 0 stevel * As a result of failover, client status would have been changed. 1901 0 stevel * Update the client state and wake up anyone waiting on this client 1902 0 stevel * device. 1903 0 stevel */ 1904 0 stevel i_mdi_client_update_state(ct); 1905 0 stevel 1906 0 stevel cv_broadcast(&ct->ct_failover_cv); 1907 0 stevel MDI_CLIENT_UNLOCK(ct); 1908 0 stevel return (rv); 1909 0 stevel } 1910 0 stevel 1911 0 stevel /* 1912 0 stevel * Load balancing is logical block. 1913 0 stevel * IOs within the range described by region_size 1914 0 stevel * would go on the same path. This would improve the 1915 0 stevel * performance by cache-hit on some of the RAID devices. 1916 0 stevel * Search only for online paths(At some point we 1917 0 stevel * may want to balance across target ports). 1918 0 stevel * If no paths are found then default to round-robin. 1919 0 stevel */ 1920 0 stevel static int 1921 0 stevel i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1922 0 stevel { 1923 0 stevel int path_index = -1; 1924 0 stevel int online_path_count = 0; 1925 0 stevel int online_nonpref_path_count = 0; 1926 0 stevel int region_size = ct->ct_lb_args->region_size; 1927 0 stevel mdi_pathinfo_t *pip; 1928 0 stevel mdi_pathinfo_t *next; 1929 0 stevel int preferred, path_cnt; 1930 0 stevel 1931 0 stevel pip = ct->ct_path_head; 1932 0 stevel while (pip) { 1933 0 stevel MDI_PI_LOCK(pip); 1934 0 stevel if (MDI_PI(pip)->pi_state == 1935 0 stevel MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1936 0 stevel online_path_count++; 1937 0 stevel } else if (MDI_PI(pip)->pi_state == 1938 0 stevel MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1939 0 stevel online_nonpref_path_count++; 1940 0 stevel } 1941 0 stevel next = (mdi_pathinfo_t *) 1942 0 stevel MDI_PI(pip)->pi_client_link; 1943 0 stevel MDI_PI_UNLOCK(pip); 1944 0 stevel pip = next; 1945 0 stevel } 1946 0 stevel /* if found any online/preferred then use this type */ 1947 0 stevel if (online_path_count > 0) { 1948 0 stevel path_cnt = online_path_count; 1949 0 stevel preferred = 1; 1950 0 stevel } else if (online_nonpref_path_count > 0) { 1951 0 stevel path_cnt = online_nonpref_path_count; 1952 0 stevel preferred = 0; 1953 0 stevel } else { 1954 0 stevel path_cnt = 0; 1955 0 stevel } 1956 0 stevel if (path_cnt) { 1957 0 stevel path_index = (bp->b_blkno >> region_size) % path_cnt; 1958 0 stevel pip = ct->ct_path_head; 1959 0 stevel while (pip && path_index != -1) { 1960 0 stevel MDI_PI_LOCK(pip); 1961 0 stevel if (path_index == 0 && 1962 0 stevel (MDI_PI(pip)->pi_state == 1963 0 stevel MDI_PATHINFO_STATE_ONLINE) && 1964 0 stevel MDI_PI(pip)->pi_preferred == preferred) { 1965 0 stevel MDI_PI_HOLD(pip); 1966 0 stevel MDI_PI_UNLOCK(pip); 1967 0 stevel *ret_pip = pip; 1968 0 stevel return (MDI_SUCCESS); 1969 0 stevel } 1970 0 stevel path_index --; 1971 0 stevel next = (mdi_pathinfo_t *) 1972 0 stevel MDI_PI(pip)->pi_client_link; 1973 0 stevel MDI_PI_UNLOCK(pip); 1974 0 stevel pip = next; 1975 0 stevel } 1976 10696 David MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1977 10696 David "lba %llx: path %s %p", 1978 10696 David bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1979 0 stevel } 1980 0 stevel return (MDI_FAILURE); 1981 0 stevel } 1982 0 stevel 1983 0 stevel /* 1984 0 stevel * mdi_select_path(): 1985 0 stevel * select a path to access a client device. 1986 0 stevel * 1987 0 stevel * mdi_select_path() function is called by the vHCI drivers to 1988 0 stevel * select a path to route the I/O request to. The caller passes 1989 0 stevel * the block I/O data transfer structure ("buf") as one of the 1990 0 stevel * parameters. The mpxio framework uses the buf structure 1991 0 stevel * contents to maintain per path statistics (total I/O size / 1992 0 stevel * count pending). If more than one online paths are available to 1993 0 stevel * select, the framework automatically selects a suitable path 1994 0 stevel * for routing I/O request. If a failover operation is active for 1995 0 stevel * this client device the call shall be failed with MDI_BUSY error 1996 0 stevel * code. 1997 0 stevel * 1998 0 stevel * By default this function returns a suitable path in online 1999 0 stevel * state based on the current load balancing policy. Currently 2000 0 stevel * we support LOAD_BALANCE_NONE (Previously selected online path 2001 0 stevel * will continue to be used till the path is usable) and 2002 0 stevel * LOAD_BALANCE_RR (Online paths will be selected in a round 2003 0 stevel * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2004 0 stevel * based on the logical block). The load balancing 2005 0 stevel * through vHCI drivers configuration file (driver.conf). 2006 0 stevel * 2007 0 stevel * vHCI drivers may override this default behavior by specifying 2008 6640 cth * appropriate flags. The meaning of the thrid argument depends 2009 6640 cth * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2010 6640 cth * then the argument is the "path instance" of the path to select. 2011 6640 cth * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2012 6640 cth * "start_pip". A non NULL "start_pip" is the starting point to 2013 6640 cth * walk and find the next appropriate path. The following values 2014 6640 cth * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2015 6640 cth * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2016 6640 cth * STANDBY path). 2017 0 stevel * 2018 0 stevel * The non-standard behavior is used by the scsi_vhci driver, 2019 0 stevel * whenever it has to use a STANDBY/FAULTED path. Eg. during 2020 0 stevel * attach of client devices (to avoid an unnecessary failover 2021 0 stevel * when the STANDBY path comes up first), during failover 2022 0 stevel * (to activate a STANDBY path as ONLINE). 2023 0 stevel * 2024 2155 cth * The selected path is returned in a a mdi_hold_path() state 2025 2155 cth * (pi_ref_cnt). Caller should release the hold by calling 2026 2155 cth * mdi_rele_path(). 2027 0 stevel * 2028 0 stevel * Return Values: 2029 0 stevel * MDI_SUCCESS - Completed successfully 2030 0 stevel * MDI_BUSY - Client device is busy failing over 2031 0 stevel * MDI_NOPATH - Client device is online, but no valid path are 2032 0 stevel * available to access this client device 2033 0 stevel * MDI_FAILURE - Invalid client device or state 2034 0 stevel * MDI_DEVI_ONLINING 2035 0 stevel * - Client device (struct dev_info state) is in 2036 0 stevel * onlining state. 2037 0 stevel */ 2038 0 stevel 2039 0 stevel /*ARGSUSED*/ 2040 0 stevel int 2041 0 stevel mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2042 6640 cth void *arg, mdi_pathinfo_t **ret_pip) 2043 0 stevel { 2044 0 stevel mdi_client_t *ct; 2045 0 stevel mdi_pathinfo_t *pip; 2046 0 stevel mdi_pathinfo_t *next; 2047 0 stevel mdi_pathinfo_t *head; 2048 0 stevel mdi_pathinfo_t *start; 2049 0 stevel client_lb_t lbp; /* load balancing policy */ 2050 0 stevel int sb = 1; /* standard behavior */ 2051 0 stevel int preferred = 1; /* preferred path */ 2052 0 stevel int cond, cont = 1; 2053 0 stevel int retry = 0; 2054 6640 cth mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2055 6640 cth int path_instance; /* request specific path instance */ 2056 6640 cth 2057 6640 cth /* determine type of arg based on flags */ 2058 6640 cth if (flags & MDI_SELECT_PATH_INSTANCE) { 2059 6640 cth path_instance = (int)(intptr_t)arg; 2060 6640 cth start_pip = NULL; 2061 6640 cth } else { 2062 6640 cth path_instance = 0; 2063 6640 cth start_pip = (mdi_pathinfo_t *)arg; 2064 6640 cth } 2065 0 stevel 2066 0 stevel if (flags != 0) { 2067 0 stevel /* 2068 0 stevel * disable default behavior 2069 0 stevel */ 2070 0 stevel sb = 0; 2071 0 stevel } 2072 0 stevel 2073 0 stevel *ret_pip = NULL; 2074 0 stevel ct = i_devi_get_client(cdip); 2075 0 stevel if (ct == NULL) { 2076 0 stevel /* mdi extensions are NULL, Nothing more to do */ 2077 0 stevel return (MDI_FAILURE); 2078 0 stevel } 2079 0 stevel 2080 0 stevel MDI_CLIENT_LOCK(ct); 2081 0 stevel 2082 0 stevel if (sb) { 2083 0 stevel if (MDI_CLIENT_IS_FAILED(ct)) { 2084 0 stevel /* 2085 0 stevel * Client is not ready to accept any I/O requests. 2086 0 stevel * Fail this request. 2087 0 stevel */ 2088 10696 David MDI_DEBUG(2, (MDI_NOTE, cdip, 2089 10696 David "client state offline ct = %p", (void *)ct)); 2090 0 stevel MDI_CLIENT_UNLOCK(ct); 2091 0 stevel return (MDI_FAILURE); 2092 0 stevel } 2093 0 stevel 2094 0 stevel if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2095 0 stevel /* 2096 0 stevel * Check for Failover is in progress. If so tell the 2097 0 stevel * caller that this device is busy. 2098 0 stevel */ 2099 10696 David MDI_DEBUG(2, (MDI_NOTE, cdip, 2100 10696 David "client failover in progress ct = %p", 2101 2155 cth (void *)ct)); 2102 0 stevel MDI_CLIENT_UNLOCK(ct); 2103 0 stevel return (MDI_BUSY); 2104 0 stevel } 2105 0 stevel 2106 0 stevel /* 2107 0 stevel * Check to see whether the client device is attached. 2108 0 stevel * If not so, let the vHCI driver manually select a path 2109 0 stevel * (standby) and let the probe/attach process to continue. 2110 0 stevel */ 2111 1333 cth if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2112 10696 David MDI_DEBUG(4, (MDI_NOTE, cdip, 2113 10696 David "devi is onlining ct = %p", (void *)ct)); 2114 0 stevel MDI_CLIENT_UNLOCK(ct); 2115 0 stevel return (MDI_DEVI_ONLINING); 2116 0 stevel } 2117 0 stevel } 2118 0 stevel 2119 0 stevel /* 2120 0 stevel * Cache in the client list head. If head of the list is NULL 2121 0 stevel * return MDI_NOPATH 2122 0 stevel */ 2123 0 stevel head = ct->ct_path_head; 2124 0 stevel if (head == NULL) { 2125 0 stevel MDI_CLIENT_UNLOCK(ct); 2126 0 stevel return (MDI_NOPATH); 2127 6640 cth } 2128 6640 cth 2129 6640 cth /* Caller is specifying a specific pathinfo path by path_instance */ 2130 6640 cth if (path_instance) { 2131 6640 cth /* search for pathinfo with correct path_instance */ 2132 6640 cth for (pip = head; 2133 6640 cth pip && (mdi_pi_get_path_instance(pip) != path_instance); 2134 6640 cth pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2135 6640 cth ; 2136 6640 cth 2137 10726 Ramana /* If path can't be selected then MDI_NOPATH is returned. */ 2138 6640 cth if (pip == NULL) { 2139 6640 cth MDI_CLIENT_UNLOCK(ct); 2140 10726 Ramana return (MDI_NOPATH); 2141 6640 cth } 2142 6640 cth 2143 10696 David /* 2144 10696 David * Verify state of path. When asked to select a specific 2145 10696 David * path_instance, we select the requested path in any 2146 10696 David * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2147 10696 David * We don't however select paths where the pHCI has detached. 2148 10696 David * NOTE: last pathinfo node of an opened client device may 2149 10696 David * exist in an OFFLINE state after the pHCI associated with 2150 10696 David * that path has detached (but pi_phci will be NULL if that 2151 10696 David * has occurred). 2152 10696 David */ 2153 10696 David MDI_PI_LOCK(pip); 2154 10696 David if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2155 10696 David (MDI_PI(pip)->pi_phci == NULL)) { 2156 6640 cth MDI_PI_UNLOCK(pip); 2157 6640 cth MDI_CLIENT_UNLOCK(ct); 2158 6640 cth return (MDI_FAILURE); 2159 10726 Ramana } 2160 10726 Ramana 2161 10726 Ramana /* Return MDI_BUSY if we have a transient condition */ 2162 10726 Ramana if (MDI_PI_IS_TRANSIENT(pip)) { 2163 10726 Ramana MDI_PI_UNLOCK(pip); 2164 10726 Ramana MDI_CLIENT_UNLOCK(ct); 2165 10726 Ramana return (MDI_BUSY); 2166 6640 cth } 2167 6640 cth 2168 6640 cth /* 2169 6640 cth * Return the path in hold state. Caller should release the 2170 6640 cth * lock by calling mdi_rele_path() 2171 6640 cth */ 2172 6640 cth MDI_PI_HOLD(pip); 2173 6640 cth MDI_PI_UNLOCK(pip); 2174 6640 cth *ret_pip = pip; 2175 6640 cth MDI_CLIENT_UNLOCK(ct); 2176 6640 cth return (MDI_SUCCESS); 2177 0 stevel } 2178 0 stevel 2179 0 stevel /* 2180 0 stevel * for non default behavior, bypass current 2181 0 stevel * load balancing policy and always use LOAD_BALANCE_RR 2182 0 stevel * except that the start point will be adjusted based 2183 0 stevel * on the provided start_pip 2184 0 stevel */ 2185 0 stevel lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2186 0 stevel 2187 0 stevel switch (lbp) { 2188 0 stevel case LOAD_BALANCE_NONE: 2189 0 stevel /* 2190 0 stevel * Load balancing is None or Alternate path mode 2191 0 stevel * Start looking for a online mdi_pathinfo node starting from 2192 0 stevel * last known selected path 2193 0 stevel */ 2194 0 stevel preferred = 1; 2195 0 stevel pip = (mdi_pathinfo_t *)ct->ct_path_last; 2196 0 stevel if (pip == NULL) { 2197 0 stevel pip = head; 2198 0 stevel } 2199 0 stevel start = pip; 2200 0 stevel do { 2201 0 stevel MDI_PI_LOCK(pip); 2202 0 stevel /* 2203 0 stevel * No need to explicitly check if the path is disabled. 2204 0 stevel * Since we are checking for state == ONLINE and the 2205 9167 Randall * same variable is used for DISABLE/ENABLE information. 2206 0 stevel */ 2207 1909 cm136836 if ((MDI_PI(pip)->pi_state == 2208 1909 cm136836 MDI_PATHINFO_STATE_ONLINE) && 2209 0 stevel preferred == MDI_PI(pip)->pi_preferred) { 2210 0 stevel /* 2211 0 stevel * Return the path in hold state. Caller should 2212 0 stevel * release the lock by calling mdi_rele_path() 2213 0 stevel */ 2214 0 stevel MDI_PI_HOLD(pip); 2215 0 stevel MDI_PI_UNLOCK(pip); 2216 0 stevel ct->ct_path_last = pip; 2217 0 stevel *ret_pip = pip; 2218 0 stevel MDI_CLIENT_UNLOCK(ct); 2219 0 stevel return (MDI_SUCCESS); 2220 0 stevel } 2221 0 stevel 2222 0 stevel /* 2223 0 stevel * Path is busy. 2224 0 stevel */ 2225 0 stevel if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2226 0 stevel MDI_PI_IS_TRANSIENT(pip)) 2227 0 stevel retry = 1; 2228 0 stevel /* 2229 0 stevel * Keep looking for a next available online path 2230 0 stevel */ 2231 0 stevel next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2232 0 stevel if (next == NULL) { 2233 0 stevel next = head; 2234 0 stevel } 2235 0 stevel MDI_PI_UNLOCK(pip); 2236 0 stevel pip = next; 2237 0 stevel if (start == pip && preferred) { 2238 0 stevel preferred = 0; 2239 0 stevel } else if (start == pip && !preferred) { 2240 0 stevel cont = 0; 2241 0 stevel } 2242 0 stevel } while (cont); 2243 0 stevel break; 2244 0 stevel 2245 0 stevel case LOAD_BALANCE_LBA: 2246 0 stevel /* 2247 0 stevel * Make sure we are looking 2248 0 stevel * for an online path. Otherwise, if it is for a STANDBY 2249 0 stevel * path request, it will go through and fetch an ONLINE 2250 0 stevel * path which is not desirable. 2251 0 stevel */ 2252 0 stevel if ((ct->ct_lb_args != NULL) && 2253 0 stevel (ct->ct_lb_args->region_size) && bp && 2254 0 stevel (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2255 0 stevel if (i_mdi_lba_lb(ct, ret_pip, bp) 2256 0 stevel == MDI_SUCCESS) { 2257 0 stevel MDI_CLIENT_UNLOCK(ct); 2258 0 stevel return (MDI_SUCCESS); 2259 0 stevel } 2260 0 stevel } 2261 10696 David /* FALLTHROUGH */ 2262 0 stevel case LOAD_BALANCE_RR: 2263 0 stevel /* 2264 0 stevel * Load balancing is Round Robin. Start looking for a online 2265 0 stevel * mdi_pathinfo node starting from last known selected path 2266 0 stevel * as the start point. If override flags are specified, 2267 0 stevel * process accordingly. 2268 0 stevel * If the search is already in effect(start_pip not null), 2269 0 stevel * then lets just use the same path preference to continue the 2270 0 stevel * traversal. 2271 0 stevel */ 2272 0 stevel 2273 0 stevel if (start_pip != NULL) { 2274 0 stevel preferred = MDI_PI(start_pip)->pi_preferred; 2275 0 stevel } else { 2276 0 stevel preferred = 1; 2277 0 stevel } 2278 0 stevel 2279 0 stevel start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2280 0 stevel if (start == NULL) { 2281 0 stevel pip = head; 2282 0 stevel } else { 2283 0 stevel pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2284 0 stevel if (pip == NULL) { 2285 7461 Sheshadri if ( flags & MDI_SELECT_NO_PREFERRED) { 2286 7461 Sheshadri /* 2287 7461 Sheshadri * Return since we hit the end of list 2288 7461 Sheshadri */ 2289 7461 Sheshadri MDI_CLIENT_UNLOCK(ct); 2290 7461 Sheshadri return (MDI_NOPATH); 2291 7461 Sheshadri } 2292 7461 Sheshadri 2293 0 stevel if (!sb) { 2294 0 stevel if (preferred == 0) { 2295 0 stevel /* 2296 0 stevel * Looks like we have completed 2297 0 stevel * the traversal as preferred 2298 0 stevel * value is 0. Time to bail out. 2299 0 stevel */ 2300 0 stevel *ret_pip = NULL; 2301 0 stevel MDI_CLIENT_UNLOCK(ct); 2302 0 stevel return (MDI_NOPATH); 2303 0 stevel } else { 2304 0 stevel /* 2305 0 stevel * Looks like we reached the 2306 0 stevel * end of the list. Lets enable 2307 0 stevel * traversal of non preferred 2308 0 stevel * paths. 2309 0 stevel */ 2310 0 stevel preferred = 0; 2311 0 stevel } 2312 0 stevel } 2313 0 stevel pip = head; 2314 0 stevel } 2315 0 stevel } 2316 0 stevel start = pip; 2317 0 stevel do { 2318 0 stevel MDI_PI_LOCK(pip); 2319 0 stevel if (sb) { 2320 0 stevel cond = ((MDI_PI(pip)->pi_state == 2321 0 stevel MDI_PATHINFO_STATE_ONLINE && 2322 0 stevel MDI_PI(pip)->pi_preferred == 2323 0 stevel preferred) ? 1 : 0); 2324 0 stevel } else { 2325 0 stevel if (flags == MDI_SELECT_ONLINE_PATH) { 2326 0 stevel cond = ((MDI_PI(pip)->pi_state == 2327 0 stevel MDI_PATHINFO_STATE_ONLINE && 2328 0 stevel MDI_PI(pip)->pi_preferred == 2329 0 stevel preferred) ? 1 : 0); 2330 0 stevel } else if (flags == MDI_SELECT_STANDBY_PATH) { 2331 0 stevel cond = ((MDI_PI(pip)->pi_state == 2332 0 stevel MDI_PATHINFO_STATE_STANDBY && 2333 0 stevel MDI_PI(pip)->pi_preferred == 2334 0 stevel preferred) ? 1 : 0); 2335 0 stevel } else if (flags == (MDI_SELECT_ONLINE_PATH | 2336 0 stevel MDI_SELECT_STANDBY_PATH)) { 2337 0 stevel cond = (((MDI_PI(pip)->pi_state == 2338 0 stevel MDI_PATHINFO_STATE_ONLINE || 2339 0 stevel (MDI_PI(pip)->pi_state == 2340 0 stevel MDI_PATHINFO_STATE_STANDBY)) && 2341 1909 cm136836 MDI_PI(pip)->pi_preferred == 2342 1909 cm136836 preferred) ? 1 : 0); 2343 1909 cm136836 } else if (flags == 2344 1909 cm136836 (MDI_SELECT_STANDBY_PATH | 2345 1909 cm136836 MDI_SELECT_ONLINE_PATH | 2346 1909 cm136836 MDI_SELECT_USER_DISABLE_PATH)) { 2347 1909 cm136836 cond = (((MDI_PI(pip)->pi_state == 2348 1909 cm136836 MDI_PATHINFO_STATE_ONLINE || 2349 1909 cm136836 (MDI_PI(pip)->pi_state == 2350 1909 cm136836 MDI_PATHINFO_STATE_STANDBY) || 2351 1909 cm136836 (MDI_PI(pip)->pi_state == 2352 1909 cm136836 (MDI_PATHINFO_STATE_ONLINE| 2353 1909 cm136836 MDI_PATHINFO_STATE_USER_DISABLE)) || 2354 1909 cm136836 (MDI_PI(pip)->pi_state == 2355 1909 cm136836 (MDI_PATHINFO_STATE_STANDBY | 2356 1909 cm136836 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2357 0 stevel MDI_PI(pip)->pi_preferred == 2358 0 stevel preferred) ? 1 : 0); 2359 7461 Sheshadri } else if (flags == 2360 7461 Sheshadri (MDI_SELECT_STANDBY_PATH | 2361 7461 Sheshadri MDI_SELECT_ONLINE_PATH | 2362 7461 Sheshadri MDI_SELECT_NO_PREFERRED)) { 2363 7461 Sheshadri cond = (((MDI_PI(pip)->pi_state == 2364 7461 Sheshadri MDI_PATHINFO_STATE_ONLINE) || 2365 7461 Sheshadri (MDI_PI(pip)->pi_state == 2366 7461 Sheshadri MDI_PATHINFO_STATE_STANDBY)) 2367 7461 Sheshadri ? 1 : 0); 2368 0 stevel } else { 2369 0 stevel cond = 0; 2370 0 stevel } 2371 0 stevel } 2372 0 stevel /* 2373 0 stevel * No need to explicitly check if the path is disabled. 2374 0 stevel * Since we are checking for state == ONLINE and the 2375 9167 Randall * same variable is used for DISABLE/ENABLE information. 2376 0 stevel */ 2377 0 stevel if (cond) { 2378 0 stevel /* 2379 0 stevel * Return the path in hold state. Caller should 2380 0 stevel * release the lock by calling mdi_rele_path() 2381 0 stevel */ 2382 0 stevel MDI_PI_HOLD(pip); 2383 0 stevel MDI_PI_UNLOCK(pip); 2384 0 stevel if (sb) 2385 0 stevel ct->ct_path_last = pip; 2386 0 stevel *ret_pip = pip; 2387 0 stevel MDI_CLIENT_UNLOCK(ct); 2388 0 stevel return (MDI_SUCCESS); 2389 0 stevel } 2390 0 stevel /* 2391 0 stevel * Path is busy. 2392 0 stevel */ 2393 0 stevel if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2394 0 stevel MDI_PI_IS_TRANSIENT(pip)) 2395 0 stevel retry = 1; 2396 0 stevel 2397 0 stevel /* 2398 0 stevel * Keep looking for a next available online path 2399 0 stevel */ 2400 0 stevel do_again: 2401 0 stevel next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2402 0 stevel if (next == NULL) { 2403 7461 Sheshadri if ( flags & MDI_SELECT_NO_PREFERRED) { 2404 7461 Sheshadri /* 2405 7461 Sheshadri * Bail out since we hit the end of list 2406 7461 Sheshadri */ 2407 7461 Sheshadri MDI_PI_UNLOCK(pip); 2408 7461 Sheshadri break; 2409 7461 Sheshadri } 2410 7461 Sheshadri 2411 0 stevel if (!sb) { 2412 0 stevel if (preferred == 1) { 2413 0 stevel /* 2414 0 stevel * Looks like we reached the 2415 0 stevel * end of the list. Lets enable 2416 0 stevel * traversal of non preferred 2417 0 stevel * paths. 2418 0 stevel */ 2419 0 stevel preferred = 0; 2420 0 stevel next = head; 2421 0 stevel } else { 2422 0 stevel /* 2423 0 stevel * We have done both the passes 2424 0 stevel * Preferred as well as for 2425 0 stevel * Non-preferred. Bail out now. 2426 0 stevel */ 2427 0 stevel cont = 0; 2428 0 stevel } 2429 0 stevel } else { 2430 0 stevel /* 2431 0 stevel * Standard behavior case. 2432 0 stevel */ 2433 0 stevel next = head; 2434 0 stevel } 2435 0 stevel } 2436 0 stevel MDI_PI_UNLOCK(pip); 2437 0 stevel if (cont == 0) { 2438 0 stevel break; 2439 0 stevel } 2440 0 stevel pip = next; 2441 0 stevel 2442 0 stevel if (!sb) { 2443 0 stevel /* 2444 0 stevel * We need to handle the selection of 2445 0 stevel * non-preferred path in the following 2446 0 stevel * case: 2447 0 stevel * 2448 0 stevel * +------+ +------+ +------+ +-----+ 2449 0 stevel * | A : 1| - | B : 1| - | C : 0| - |NULL | 2450 0 stevel * +------+ +------+ +------+ +-----+ 2451 0 stevel * 2452 0 stevel * If we start the search with B, we need to 2453 0 stevel * skip beyond B to pick C which is non - 2454 0 stevel * preferred in the second pass. The following 2455 0 stevel * test, if true, will allow us to skip over 2456 0 stevel * the 'start'(B in the example) to select 2457 0 stevel * other non preferred elements. 2458 0 stevel */ 2459 0 stevel if ((start_pip != NULL) && (start_pip == pip) && 2460 0 stevel (MDI_PI(start_pip)->pi_preferred 2461 0 stevel != preferred)) { 2462 0 stevel /* 2463 0 stevel * try again after going past the start 2464 0 stevel * pip 2465 0 stevel */ 2466 0 stevel MDI_PI_LOCK(pip); 2467 0 stevel goto do_again; 2468 0 stevel } 2469 0 stevel } else { 2470 0 stevel /* 2471 0 stevel * Standard behavior case 2472 0 stevel */ 2473 0 stevel if (start == pip && preferred) { 2474 0 stevel /* look for nonpreferred paths */ 2475 0 stevel preferred = 0; 2476 0 stevel } else if (start == pip && !preferred) { 2477 0 stevel /* 2478 0 stevel * Exit condition 2479 0 stevel */ 2480 0 stevel cont = 0; 2481 0 stevel } 2482 0 stevel } 2483 0 stevel } while (cont); 2484 0 stevel break; 2485 0 stevel } 2486 0 stevel 2487 0 stevel MDI_CLIENT_UNLOCK(ct); 2488 0 stevel if (retry == 1) { 2489 0 stevel return (MDI_BUSY); 2490 0 stevel } else { 2491 0 stevel return (MDI_NOPATH); 2492 0 stevel } 2493 0 stevel } 2494 0 stevel 2495 0 stevel /* 2496 0 stevel * For a client, return the next available path to any phci 2497 0 stevel * 2498 0 stevel * Note: 2499 0 stevel * Caller should hold the branch's devinfo node to get a consistent 2500 0 stevel * snap shot of the mdi_pathinfo nodes. 2501 0 stevel * 2502 0 stevel * Please note that even the list is stable the mdi_pathinfo 2503 0 stevel * node state and properties are volatile. The caller should lock 2504 0 stevel * and unlock the nodes by calling mdi_pi_lock() and 2505 0 stevel * mdi_pi_unlock() functions to get a stable properties. 2506 0 stevel * 2507 0 stevel * If there is a need to use the nodes beyond the hold of the 2508 0 stevel * devinfo node period (For ex. I/O), then mdi_pathinfo node 2509 0 stevel * need to be held against unexpected removal by calling 2510 0 stevel * mdi_hold_path() and should be released by calling 2511 0 stevel * mdi_rele_path() on completion. 2512 0 stevel */ 2513 0 stevel mdi_pathinfo_t * 2514 0 stevel mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2515 0 stevel { 2516 0 stevel mdi_client_t *ct; 2517 0 stevel 2518 0 stevel if (!MDI_CLIENT(ct_dip)) 2519 0 stevel return (NULL); 2520 0 stevel 2521 0 stevel /* 2522 0 stevel * Walk through client link 2523 0 stevel */ 2524 0 stevel ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2525 0 stevel ASSERT(ct != NULL); 2526 0 stevel 2527 0 stevel if (pip == NULL) 2528 0 stevel return ((mdi_pathinfo_t *)ct->ct_path_head); 2529 0 stevel 2530 0 stevel return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2531 0 stevel } 2532 0 stevel 2533 0 stevel /* 2534 0 stevel * For a phci, return the next available path to any client 2535 0 stevel * Note: ditto mdi_get_next_phci_path() 2536 0 stevel */ 2537 0 stevel mdi_pathinfo_t * 2538 0 stevel mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2539 0 stevel { 2540 0 stevel mdi_phci_t *ph; 2541 0 stevel 2542 0 stevel if (!MDI_PHCI(ph_dip)) 2543 0 stevel return (NULL); 2544 0 stevel 2545 0 stevel /* 2546 0 stevel * Walk through pHCI link 2547 0 stevel */ 2548 0 stevel ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2549 0 stevel ASSERT(ph != NULL); 2550 0 stevel 2551 0 stevel if (pip == NULL) 2552 0 stevel return ((mdi_pathinfo_t *)ph->ph_path_head); 2553 0 stevel 2554 0 stevel return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2555 0 stevel } 2556 0 stevel 2557 0 stevel /* 2558 0 stevel * mdi_hold_path(): 2559 0 stevel * Hold the mdi_pathinfo node against unwanted unexpected free. 2560 0 stevel * Return Values: 2561 0 stevel * None 2562 0 stevel */ 2563 0 stevel void 2564 0 stevel mdi_hold_path(mdi_pathinfo_t *pip) 2565 0 stevel { 2566 0 stevel if (pip) { 2567 0 stevel MDI_PI_LOCK(pip); 2568 0 stevel MDI_PI_HOLD(pip); 2569 0 stevel MDI_PI_UNLOCK(pip); 2570 0 stevel } 2571 0 stevel } 2572 0 stevel 2573 0 stevel 2574 0 stevel /* 2575 0 stevel * mdi_rele_path(): 2576 0 stevel * Release the mdi_pathinfo node which was selected 2577 0 stevel * through mdi_select_path() mechanism or manually held by 2578 0 stevel * calling mdi_hold_path(). 2579 0 stevel * Return Values: 2580 0 stevel * None 2581 0 stevel */ 2582 0 stevel void 2583 0 stevel mdi_rele_path(mdi_pathinfo_t *pip) 2584 0 stevel { 2585 0 stevel if (pip) { 2586 0 stevel MDI_PI_LOCK(pip); 2587 0 stevel MDI_PI_RELE(pip); 2588 0 stevel if (MDI_PI(pip)->pi_ref_cnt == 0) { 2589 0 stevel cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2590 0 stevel } 2591 0 stevel MDI_PI_UNLOCK(pip); 2592 0 stevel } 2593 0 stevel } 2594 0 stevel 2595 0 stevel /* 2596 0 stevel * mdi_pi_lock(): 2597 0 stevel * Lock the mdi_pathinfo node. 2598 0 stevel * Note: 2599 0 stevel * The caller should release the lock by calling mdi_pi_unlock() 2600 0 stevel */ 2601 0 stevel void 2602 0 stevel mdi_pi_lock(mdi_pathinfo_t *pip) 2603 0 stevel { 2604 0 stevel ASSERT(pip != NULL); 2605 0 stevel if (pip) { 2606 0 stevel MDI_PI_LOCK(pip); 2607 0 stevel } 2608 0 stevel } 2609 0 stevel 2610 0 stevel 2611 0 stevel /* 2612 0 stevel * mdi_pi_unlock(): 2613 0 stevel * Unlock the mdi_pathinfo node. 2614 0 stevel * Note: 2615 0 stevel * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2616 0 stevel */ 2617 0 stevel void 2618 0 stevel mdi_pi_unlock(mdi_pathinfo_t *pip) 2619 0 stevel { 2620 0 stevel ASSERT(pip != NULL); 2621 0 stevel if (pip) { 2622 0 stevel MDI_PI_UNLOCK(pip); 2623 0 stevel } 2624 0 stevel } 2625 0 stevel 2626 0 stevel /* 2627 0 stevel * mdi_pi_find(): 2628 0 stevel * Search the list of mdi_pathinfo nodes attached to the 2629 0 stevel * pHCI/Client device node whose path address matches "paddr". 2630 0 stevel * Returns a pointer to the mdi_pathinfo node if a matching node is 2631 0 stevel * found. 2632 0 stevel * Return Values: 2633 0 stevel * mdi_pathinfo node handle 2634 0 stevel * NULL 2635 0 stevel * Notes: 2636 0 stevel * Caller need not hold any locks to call this function. 2637 0 stevel */ 2638 0 stevel mdi_pathinfo_t * 2639 0 stevel mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2640 0 stevel { 2641 0 stevel mdi_phci_t *ph; 2642 0 stevel mdi_vhci_t *vh; 2643 0 stevel mdi_client_t *ct; 2644 0 stevel mdi_pathinfo_t *pip = NULL; 2645 0 stevel 2646 10696 David MDI_DEBUG(2, (MDI_NOTE, pdip, 2647 10696 David "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2648 0 stevel if ((pdip == NULL) || (paddr == NULL)) { 2649 0 stevel return (NULL); 2650 0 stevel } 2651 0 stevel ph = i_devi_get_phci(pdip); 2652 0 stevel if (ph == NULL) { 2653 0 stevel /* 2654 0 stevel * Invalid pHCI device, Nothing more to do. 2655 0 stevel */ 2656 10696 David MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2657 0 stevel return (NULL); 2658 0 stevel } 2659 0 stevel 2660 0 stevel vh = ph->ph_vhci; 2661 0 stevel if (vh == NULL) { 2662 0 stevel /* 2663 0 stevel * Invalid vHCI device, Nothing more to do. 2664 0 stevel */ 2665 10696 David MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2666 2155 cth return (NULL); 2667 2155 cth } 2668 2155 cth 2669 2155 cth /* 2670 2155 cth * Look for pathinfo node identified by paddr. 2671 0 stevel */ 2672 0 stevel if (caddr == NULL) { 2673 0 stevel /* 2674 0 stevel * Find a mdi_pathinfo node under pHCI list for a matching 2675 0 stevel * unit address. 2676 0 stevel */ 2677 2155 cth MDI_PHCI_LOCK(ph); 2678 2155 cth if (MDI_PHCI_IS_OFFLINE(ph)) { 2679 10696 David MDI_DEBUG(2, (MDI_WARN, pdip, 2680 10696 David "offline phci %p", (void *)ph)); 2681 2155 cth MDI_PHCI_UNLOCK(ph); 2682 2155 cth return (NULL); 2683 2155 cth } 2684 0 stevel pip = (mdi_pathinfo_t *)ph->ph_path_head; 2685 0 stevel 2686 0 stevel while (pip != NULL) { 2687 0 stevel if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2688 0 stevel break; 2689 0 stevel } 2690 0 stevel pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2691 0 stevel } 2692 2155 cth MDI_PHCI_UNLOCK(ph); 2693 10696 David MDI_DEBUG(2, (MDI_NOTE, pdip, 2694 10696 David "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2695 0 stevel return (pip); 2696 0 stevel } 2697 0 stevel 2698 0 stevel /* 2699 878 ramat * XXX - Is the rest of the code in this function really necessary? 2700 878 ramat * The consumers of mdi_pi_find() can search for the desired pathinfo 2701 878 ramat * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2702 878 ramat * whether the search is based on the pathinfo nodes attached to 2703 878 ramat * the pHCI or the client node, the result will be the same. 2704 878 ramat */ 2705 878 ramat 2706 878 ramat /* 2707 0 stevel * Find the client device corresponding to 'caddr' 2708 0 stevel */ 2709 2155 cth MDI_VHCI_CLIENT_LOCK(vh); 2710 878 ramat 2711 878 ramat /* 2712 878 ramat * XXX - Passing NULL to the following function works as long as the 2713 878 ramat * the client addresses (caddr) are unique per vhci basis. 2714 878 ramat */ 2715 878 ramat ct = i_mdi_client_find(vh, NULL, caddr); 2716 0 stevel if (ct == NULL) { 2717 0 stevel /* 2718 0 stevel * Client not found, Obviously mdi_pathinfo node has not been 2719 0 stevel * created yet. 2720 0 stevel */ 2721 2155 cth MDI_VHCI_CLIENT_UNLOCK(vh); 2722 10696 David MDI_DEBUG(2, (MDI_NOTE, pdip, 2723 10696 David "client not found for caddr @%s", caddr ? caddr : "")); 2724 2155 cth return (NULL); 2725 0 stevel } 2726 0 stevel 2727 0 stevel /* 2728 0 stevel * Hold the client lock and look for a mdi_pathinfo node with matching 2729 0 stevel * pHCI and paddr 2730 0 stevel */ 2731 0 stevel MDI_CLIENT_LOCK(ct); 2732 0 stevel 2733 0 stevel /* 2734 0 stevel * Release the global mutex as it is no more needed. Note: We always 2735 0 stevel * respect the locking order while acquiring. 2736 0 stevel */ 2737 2155 cth MDI_VHCI_CLIENT_UNLOCK(vh); 2738 0 stevel 2739 0 stevel pip = (mdi_pathinfo_t *)ct->ct_path_head; 2740 0 stevel while (pip != NULL) { 2741 0 stevel /* 2742 0 stevel * Compare the unit address 2743 0 stevel */ 2744 0 stevel if ((MDI_PI(pip)->pi_phci == ph) && 2745 0 stevel strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2746 0 stevel break; 2747 0 stevel } 2748 0 stevel pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2749 0 stevel } 2750 0 stevel MDI_CLIENT_UNLOCK(ct); 2751 10696 David MDI_DEBUG(2, (MDI_NOTE, pdip, 2752 10696 David "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2753 0 stevel return (pip); 2754 0 stevel } 2755 0 stevel 2756 0 stevel /* 2757 0 stevel * mdi_pi_alloc(): 2758 0 stevel * Allocate and initialize a new instance of a mdi_pathinfo node. 2759 0 stevel * The mdi_pathinfo node returned by this function identifies a 2760 0 stevel * unique device path is capable of having properties attached 2761 0 stevel * and passed to mdi_pi_online() to fully attach and online the 2762 0 stevel * path and client device node. 2763 0 stevel * The mdi_pathinfo node returned by this function must be 2764 0 stevel * destroyed using mdi_pi_free() if the path is no longer 2765 0 stevel * operational or if the caller fails to attach a client device 2766 0 stevel * node when calling mdi_pi_online(). The framework will not free 2767 0 stevel * the resources allocated. 2768 0 stevel * This function can be called from both interrupt and kernel 2769 0 stevel * contexts. DDI_NOSLEEP flag should be used while calling 2770 0 stevel * from interrupt contexts. 2771 0 stevel * Return Values: 2772 0 stevel * MDI_SUCCESS 2773 0 stevel * MDI_FAILURE 2774 0 stevel * MDI_NOMEM 2775 0 stevel */ 2776 0 stevel /*ARGSUSED*/ 2777 0 stevel int 2778 0 stevel mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2779 0 stevel char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2780 0 stevel { 2781 0 stevel mdi_vhci_t *vh; 2782 0 stevel mdi_phci_t *ph; 2783 0 stevel mdi_client_t *ct; 2784 0 stevel mdi_pathinfo_t *pip = NULL; 2785 0 stevel dev_info_t *cdip; 2786 0 stevel int rv = MDI_NOMEM; 2787 878 ramat int path_allocated = 0; 2788 0 stevel 2789 10696 David MDI_DEBUG(2, (MDI_NOTE, pdip, 2790 10696 David "cname %s: caddr@%s paddr@%s", 2791 10696 David cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2792 2155 cth 2793 0 stevel if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2794 0 stevel ret_pip == NULL) { 2795 0 stevel /* Nothing more to do */ 2796 0 stevel return (MDI_FAILURE); 2797 0 stevel } 2798 0 stevel 2799 0 stevel *ret_pip = NULL; 2800 2155 cth 2801 2155 cth /* No allocations on detaching pHCI */ 2802 2155 cth if (DEVI_IS_DETACHING(pdip)) { 2803 2155 cth /* Invalid pHCI device, return failure */ 2804 10696 David MDI_DEBUG(1, (MDI_WARN, pdip, 2805 10696 David "!detaching pHCI=%p", (void *)pdip)); 2806 2155 cth return (MDI_FAILURE); 2807 2155 cth } 2808 2155 cth 2809 0 stevel ph = i_devi_get_phci(pdip); 2810 0 stevel ASSERT(ph != NULL); 2811 0 stevel if (ph == NULL) { 2812 0 stevel /* Invalid pHCI device, return failure */ 2813 10696 David MDI_DEBUG(1, (MDI_WARN, pdip, 2814 10696 David "!invalid pHCI=%p", (void *)pdip)); 2815 0 stevel return (MDI_FAILURE); 2816 0 stevel } 2817 0 stevel 2818 0 stevel MDI_PHCI_LOCK(ph); 2819 0 stevel vh = ph->ph_vhci; 2820 0 stevel if (vh == NULL) { 2821 0 stevel /* Invalid vHCI device, return failure */ 2822 10696 David MDI_DEBUG(1, (MDI_WARN, pdip, 2823 10696 David "!invalid vHCI=%p", (void *)pdip)); 2824 0 stevel MDI_PHCI_UNLOCK(ph); 2825 0 stevel return (MDI_FAILURE); 2826 0 stevel } 2827 0 stevel 2828 0 stevel if (MDI_PHCI_IS_READY(ph) == 0) { 2829 0 stevel /* 2830 0 stevel * Do not allow new node creation when pHCI is in 2831 0 stevel * offline/suspended states 2832 0 stevel */ 2833 10696 David MDI_DEBUG(1, (MDI_WARN, pdip, 2834 10696 David "pHCI=%p is not ready", (void *)ph)); 2835 0 stevel MDI_PHCI_UNLOCK(ph); 2836 0 stevel return (MDI_BUSY); 2837 0 stevel } 2838 0 stevel MDI_PHCI_UNSTABLE(ph); 2839 0 stevel MDI_PHCI_UNLOCK(ph); 2840 0 stevel 2841 878 ramat /* look for a matching client, create one if not found */ 2842 2155 cth MDI_VHCI_CLIENT_LOCK(vh); 2843 878 ramat ct = i_mdi_client_find(vh, cname, caddr); 2844 0 stevel if (ct == NULL) { 2845 878 ramat ct = i_mdi_client_alloc(vh, cname, caddr); 2846 878 ramat ASSERT(ct != NULL); 2847 0 stevel } 2848 0 stevel 2849 0 stevel if (ct->ct_dip == NULL) { 2850 0 stevel /* 2851 0 stevel * Allocate a devinfo node 2852 0 stevel */ 2853 0 stevel ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2854 878 ramat compatible, ncompatible); 2855 0 stevel if (ct->ct_dip == NULL) { 2856 0 stevel (void) i_mdi_client_free(vh, ct); 2857 0 stevel goto fail; 2858 0 stevel } 2859 0 stevel } 2860 0 stevel cdip = ct->ct_dip; 2861 0 stevel 2862 0 stevel DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2863 0 stevel DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2864 0 stevel 2865 2155 cth MDI_CLIENT_LOCK(ct); 2866 0 stevel pip = (mdi_pathinfo_t *)ct->ct_path_head; 2867 0 stevel while (pip != NULL) { 2868 0 stevel /* 2869 0 stevel * Compare the unit address 2870 0 stevel */ 2871 0 stevel if ((MDI_PI(pip)->pi_phci == ph) && 2872 0 stevel strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2873 0 stevel break; 2874 0 stevel } 2875 0 stevel pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2876 0 stevel } 2877 2155 cth MDI_CLIENT_UNLOCK(ct); 2878 0 stevel 2879 0 stevel if (pip == NULL) { 2880 0 stevel /* 2881 0 stevel * This is a new path for this client device. Allocate and 2882 0 stevel * initialize a new pathinfo node 2883 0 stevel */ 2884 878 ramat pip = i_mdi_pi_alloc(ph, paddr, ct); 2885 878 ramat ASSERT(pip != NULL); 2886 878 ramat path_allocated = 1; 2887 0 stevel } 2888 0 stevel rv = MDI_SUCCESS; 2889 0 stevel 2890 0 stevel fail: 2891 0 stevel /* 2892 0 stevel * Release the global mutex. 2893 0 stevel */ 2894 2155 cth MDI_VHCI_CLIENT_UNLOCK(vh); 2895 0 stevel 2896 0 stevel /* 2897 0 stevel * Mark the pHCI as stable 2898 0 stevel */ 2899 0 stevel MDI_PHCI_LOCK(ph); 2900 0 stevel MDI_PHCI_STABLE(ph); 2901 0 stevel MDI_PHCI_UNLOCK(ph); 2902 0 stevel *ret_pip = pip; 2903 2155 cth 2904 10696 David MDI_DEBUG(2, (MDI_NOTE, pdip, 2905 10696 David "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2906 1961 cth 2907 878 ramat if (path_allocated) 2908 878 ramat vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2909 878 ramat 2910 0 stevel return (rv); 2911 0 stevel } 2912 0 stevel 2913 0 stevel /*ARGSUSED*/ 2914 0 stevel int 2915 0 stevel mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2916 0 stevel int flags, mdi_pathinfo_t **ret_pip) 2917 0 stevel { 2918 0 stevel return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2919 0 stevel flags, ret_pip)); 2920 0 stevel } 2921 0 stevel 2922 0 stevel /* 2923 0 stevel * i_mdi_pi_alloc(): 2924 0 stevel * Allocate a mdi_pathinfo node and add to the pHCI path list 2925 0 stevel * Return Values: 2926 0 stevel * mdi_pathinfo 2927 0 stevel */ 2928 0 stevel /*ARGSUSED*/ 2929 0 stevel static mdi_pathinfo_t * 2930 878 ramat i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2931 878 ramat { 2932 878 ramat mdi_pathinfo_t *pip; 2933 0 stevel int ct_circular; 2934 0 stevel int ph_circular; 2935 10696 David static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2936 6640 cth char *path_persistent; 2937 6640 cth int path_instance; 2938 6640 cth mod_hash_val_t hv; 2939 2155 cth 2940 2155 cth ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2941 0 stevel 2942 878 ramat pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2943 0 stevel mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2944 0 stevel MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2945 0 stevel MDI_PATHINFO_STATE_TRANSIENT; 2946 0 stevel 2947 0 stevel if (MDI_PHCI_IS_USER_DISABLED(ph)) 2948 0 stevel MDI_PI_SET_USER_DISABLE(pip); 2949 0 stevel 2950 0 stevel if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2951 0 stevel MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2952 0 stevel 2953 0 stevel if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2954 0 stevel MDI_PI_SET_DRV_DISABLE(pip); 2955 0 stevel 2956 0 stevel MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2957 0 stevel cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2958 0 stevel MDI_PI(pip)->pi_client = ct; 2959 0 stevel MDI_PI(pip)->pi_phci = ph; 2960 878 ramat MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2961 0 stevel (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2962 6640 cth 2963 6640 cth /* 2964 6640 cth * We form the "path" to the pathinfo node, and see if we have 2965 6640 cth * already allocated a 'path_instance' for that "path". If so, 2966 6640 cth * we use the already allocated 'path_instance'. If not, we 2967 6640 cth * allocate a new 'path_instance' and associate it with a copy of 2968 6640 cth * the "path" string (which is never freed). The association 2969 6640 cth * between a 'path_instance' this "path" string persists until 2970 6640 cth * reboot. 2971 6640 cth */ 2972 6640 cth mutex_enter(&mdi_pathmap_mutex); 2973 6640 cth (void) ddi_pathname(ph->ph_dip, path); 2974 10696 David (void) sprintf(path + strlen(path), "/%s@%s", 2975 9167 Randall mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2976 6640 cth if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2977 6640 cth path_instance = (uint_t)(intptr_t)hv; 2978 6640 cth } else { 2979 6640 cth /* allocate a new 'path_instance' and persistent "path" */ 2980 6640 cth path_instance = mdi_pathmap_instance++; 2981 6640 cth path_persistent = i_ddi_strdup(path, KM_SLEEP); 2982 6640 cth (void) mod_hash_insert(mdi_pathmap_bypath, 2983 6640 cth (mod_hash_key_t)path_persistent, 2984 6640 cth (mod_hash_val_t)(intptr_t)path_instance); 2985 6640 cth (void) mod_hash_insert(mdi_pathmap_byinstance, 2986 6640 cth (mod_hash_key_t)(intptr_t)path_instance, 2987 6640 cth (mod_hash_val_t)path_persistent); 2988 10696 David 2989 10696 David /* create shortpath name */ 2990 10696 David (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2991 10696 David ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2992 10696 David mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2993 10696 David path_persistent = i_ddi_strdup(path, KM_SLEEP); 2994 10696 David (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2995 10696 David (mod_hash_key_t)(intptr_t)path_instance, 2996 10696 David (mod_hash_val_t)path_persistent); 2997 6640 cth } 2998 6640 cth mutex_exit(&mdi_pathmap_mutex); 2999 6640 cth MDI_PI(pip)->pi_path_instance = path_instance; 3000 6640 cth 3001 878 ramat (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3002 878 ramat ASSERT(MDI_PI(pip)->pi_prop != NULL); 3003 0 stevel MDI_PI(pip)->pi_pprivate = NULL; 3004 0 stevel MDI_PI(pip)->pi_cprivate = NULL; 3005 0 stevel MDI_PI(pip)->pi_vprivate = NULL; 3006 0 stevel MDI_PI(pip)->pi_client_link = NULL; 3007 0 stevel MDI_PI(pip)->pi_phci_link = NULL; 3008 0 stevel MDI_PI(pip)->pi_ref_cnt = 0; 3009 0 stevel MDI_PI(pip)->pi_kstats = NULL; 3010 0 stevel MDI_PI(pip)->pi_preferred = 1; 3011 0 stevel cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3012 0 stevel 3013 0 stevel /* 3014 0 stevel * Lock both dev_info nodes against changes in parallel. 3015 2155 cth * 3016 2155 cth * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3017 2155 cth * This atypical operation is done to synchronize pathinfo nodes 3018 2155 cth * during devinfo snapshot (see di_register_pip) by 'pretending' that 3019 2155 cth * the pathinfo nodes are children of the Client. 3020 0 stevel */ 3021 0 stevel ndi_devi_enter(ct->ct_dip, &ct_circular); 3022 0 stevel ndi_devi_enter(ph->ph_dip, &ph_circular); 3023 0 stevel 3024 0 stevel i_mdi_phci_add_path(ph, pip); 3025 0 stevel i_mdi_client_add_path(ct, pip); 3026 0 stevel 3027 0 stevel ndi_devi_exit(ph->ph_dip, ph_circular); 3028 0 stevel ndi_devi_exit(ct->ct_dip, ct_circular); 3029 0 stevel 3030 0 stevel return (pip); 3031 6640 cth } 3032 6640 cth 3033 6640 cth /* 3034 6640 cth * mdi_pi_pathname_by_instance(): 3035 6640 cth * Lookup of "path" by 'path_instance'. Return "path". 3036 6640 cth * NOTE: returned "path" remains valid forever (until reboot). 3037 6640 cth */ 3038 6640 cth char * 3039 6640 cth mdi_pi_pathname_by_instance(int path_instance) 3040 6640 cth { 3041 6640 cth char *path; 3042 6640 cth mod_hash_val_t hv; 3043 6640 cth 3044 6640 cth /* mdi_pathmap lookup of "path" by 'path_instance' */ 3045 6640 cth mutex_enter(&mdi_pathmap_mutex); 3046 6640 cth if (mod_hash_find(mdi_pathmap_byinstance, 3047 6640 cth (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3048 6640 cth path = (char *)hv; 3049 6640 cth else 3050 6640 cth path = NULL; 3051 6640 cth mutex_exit(&mdi_pathmap_mutex); 3052 6640 cth return (path); 3053 0 stevel } 3054 0 stevel 3055 0 stevel /* 3056 10696 David * mdi_pi_spathname_by_instance(): 3057 10696 David * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3058 10696 David * NOTE: returned "shortpath" remains valid forever (until reboot). 3059 10696 David */ 3060 10696 David char * 3061 10696 David mdi_pi_spathname_by_instance(int path_instance) 3062 10696 David { 3063 10696 David char *path; 3064 10696 David mod_hash_val_t hv; 3065 10696 David 3066 10696 David /* mdi_pathmap lookup of "path" by 'path_instance' */ 3067 10696 David mutex_enter(&mdi_pathmap_mutex); 3068 10696 David if (mod_hash_find(mdi_pathmap_sbyinstance, 3069 10696 David (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3070 10696 David path = (char *)hv; 3071 10696 David else 3072 10696 David path = NULL; 3073 10696 David mutex_exit(&mdi_pathmap_mutex); 3074 10696 David return (path); 3075 10696 David } 3076 10696 David 3077 10696 David 3078 10696 David /* 3079 0 stevel * i_mdi_phci_add_path(): 3080 0 stevel * Add a mdi_pathinfo node to pHCI list. 3081 0 stevel * Notes: 3082 0 stevel * Caller should per-pHCI mutex 3083 0 stevel */ 3084 0 stevel static void 3085 0 stevel i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3086 0 stevel { 3087 0 stevel ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3088 0 stevel 3089 2155 cth MDI_PHCI_LOCK(ph); 3090 0 stevel if (ph->ph_path_head == NULL) { 3091 0 stevel ph->ph_path_head = pip; 3092 0 stevel } else { 3093 0 stevel MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3094 0 stevel } 3095 0 stevel ph->ph_path_tail = pip; 3096 0 stevel ph->ph_path_count++; 3097 2155 cth MDI_PHCI_UNLOCK(ph); 3098 0 stevel } 3099 0 stevel 3100 0 stevel /* 3101 0 stevel * i_mdi_client_add_path(): 3102 0 stevel * Add mdi_pathinfo node to client list 3103 0 stevel */ 3104 0 stevel static void 3105 0 stevel i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3106 0 stevel { 3107 0 stevel ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3108 0 stevel 3109 2155 cth MDI_CLIENT_LOCK(ct); 3110 0 stevel if (ct->ct_path_head == NULL) { 3111 0 stevel ct->ct_path_head = pip; 3112 0 stevel } else { 3113 0 stevel MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3114 0 stevel } 3115 0 stevel ct->ct_path_tail = pip; 3116 0 stevel ct->ct_path_count++; 3117 2155 cth MDI_CLIENT_UNLOCK(ct); 3118 0 stevel } 3119 0 stevel 3120 0 stevel /* 3121 0 stevel * mdi_pi_free(): 3122 0 stevel * Free the mdi_pathinfo node and also client device node if this 3123 0 stevel * is the last path to the device 3124 0 stevel * Return Values: 3125 0 stevel * MDI_SUCCESS 3126 0 stevel * MDI_FAILURE 3127 0 stevel * MDI_BUSY 3128 0 stevel */ 3129 0 stevel /*ARGSUSED*/ 3130 0 stevel int 3131 0 stevel mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3132 0 stevel { 3133 4881 rs135747 int rv = MDI_FAILURE; 3134 0 stevel mdi_vhci_t *vh; 3135 0 stevel mdi_phci_t *ph; 3136 0 stevel mdi_client_t *ct; 3137 0 stevel int (*f)(); 3138 0 stevel int client_held = 0; 3139 0 stevel 3140 0 stevel MDI_PI_LOCK(pip); 3141 0 stevel ph = MDI_PI(pip)->pi_phci; 3142 0 stevel ASSERT(ph != NULL); 3143 0 stevel if (ph == NULL) { 3144 0 stevel /* 3145 0 stevel * Invalid pHCI device, return failure 3146 0 stevel */ 3147 10696 David MDI_DEBUG(1, (MDI_WARN, NULL, 3148 10696 David "!invalid pHCI: pip %s %p", 3149 10696 David mdi_pi_spathname(pip), (void *)pip)); 3150 0 stevel MDI_PI_UNLOCK(pip); 3151 0 stevel return (MDI_FAILURE); 3152 0 stevel } 3153 0 stevel 3154 0 stevel vh = ph->ph_vhci; 3155 0 stevel ASSERT(vh != NULL); 3156 0 stevel if (vh == NULL) { 3157 0 stevel /* Invalid pHCI device, return failure */ 3158 10696 David MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3159 10696 David "!invalid vHCI: pip %s %p", 3160 10696 David mdi_pi_spathname(pip), (void *)pip)); 3161 0 stevel MDI_PI_UNLOCK(pip); 3162 0 stevel return (MDI_FAILURE); 3163 0 stevel } 3164 0 stevel 3165 0 stevel ct = MDI_PI(pip)->pi_client; 3166 0 stevel ASSERT(ct != NULL); 3167 0 stevel if (ct == NULL) { 3168 0 stevel /* 3169 0 stevel * Invalid Client device, return failure 3170 0 stevel */ 3171 10696 David MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3172 10696 David "!invalid client: pip %s %p", 3173 10696 David mdi_pi_spathname(pip), (void *)pip)); 3174 0 stevel MDI_PI_UNLOCK(pip); 3175 0 stevel return (MDI_FAILURE); 3176 0 stevel } 3177 0 stevel 3178 0 stevel /* 3179 0 stevel * Check to see for busy condition. A mdi_pathinfo can only be freed 3180 0 stevel * if the node state is either offline or init and the reference count 3181 0 stevel * is zero. 3182 0 stevel */ 3183 0 stevel if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3184 0 stevel MDI_PI_IS_INITING(pip))) { 3185 0 stevel /* 3186 0 stevel * Node is busy 3187 0 stevel */ 3188 10696 David MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3189 10696 David "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3190 0 stevel MDI_PI_UNLOCK(pip); 3191 0 stevel return (MDI_BUSY); 3192 0 stevel } 3193 0 stevel 3194 0 stevel while (MDI_PI(pip)->pi_ref_cnt != 0) { 3195 0 stevel /* 3196 0 stevel * Give a chance for pending I/Os to complete. 3197 0 stevel */ 3198 10696 David MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3199 10696 David "!%d cmds still pending on path: %s %p", 3200 10696 David MDI_PI(pip)->pi_ref_cnt, 3201 10696 David mdi_pi_spathname(pip), (void *)pip)); 3202 11066 rafael if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3203 11066 rafael &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3204 11066 rafael TR_CLOCK_TICK) == -1) { 3205 0 stevel /* 3206 0 stevel * The timeout time reached without ref_cnt being zero 3207 0