1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 77 #define MDI_WARN CE_WARN, __func__ 78 #define MDI_NOTE CE_NOTE, __func__ 79 #define MDI_CONT CE_CONT, __func__ 80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 81 #else /* !DEBUG */ 82 #define MDI_DEBUG(dbglevel, pargs) 83 #endif /* DEBUG */ 84 int mdi_debug_consoleonly = 0; 85 86 extern pri_t minclsyspri; 87 extern int modrootloaded; 88 89 /* 90 * Global mutex: 91 * Protects vHCI list and structure members. 92 */ 93 kmutex_t mdi_mutex; 94 95 /* 96 * Registered vHCI class driver lists 97 */ 98 int mdi_vhci_count; 99 mdi_vhci_t *mdi_vhci_head; 100 mdi_vhci_t *mdi_vhci_tail; 101 102 /* 103 * Client Hash Table size 104 */ 105 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 106 107 /* 108 * taskq interface definitions 109 */ 110 #define MDI_TASKQ_N_THREADS 8 111 #define MDI_TASKQ_PRI minclsyspri 112 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 113 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 114 115 taskq_t *mdi_taskq; 116 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 117 118 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 119 120 /* 121 * The data should be "quiet" for this interval (in seconds) before the 122 * vhci cached data is flushed to the disk. 123 */ 124 static int mdi_vhcache_flush_delay = 10; 125 126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 127 static int mdi_vhcache_flush_daemon_idle_time = 60; 128 129 /* 130 * MDI falls back to discovery of all paths when a bus_config_one fails. 131 * The following parameters can be used to tune this operation. 132 * 133 * mdi_path_discovery_boot 134 * Number of times path discovery will be attempted during early boot. 135 * Probably there is no reason to ever set this value to greater than one. 136 * 137 * mdi_path_discovery_postboot 138 * Number of times path discovery will be attempted after early boot. 139 * Set it to a minimum of two to allow for discovery of iscsi paths which 140 * may happen very late during booting. 141 * 142 * mdi_path_discovery_interval 143 * Minimum number of seconds MDI will wait between successive discovery 144 * of all paths. Set it to -1 to disable discovery of all paths. 145 */ 146 static int mdi_path_discovery_boot = 1; 147 static int mdi_path_discovery_postboot = 2; 148 static int mdi_path_discovery_interval = 10; 149 150 /* 151 * number of seconds the asynchronous configuration thread will sleep idle 152 * before exiting. 153 */ 154 static int mdi_async_config_idle_time = 600; 155 156 static int mdi_bus_config_cache_hash_size = 256; 157 158 /* turns off multithreaded configuration for certain operations */ 159 static int mdi_mtc_off = 0; 160 161 /* 162 * The "path" to a pathinfo node is identical to the /devices path to a 163 * devinfo node had the device been enumerated under a pHCI instead of 164 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 165 * This association persists across create/delete of the pathinfo nodes, 166 * but not across reboot. 167 */ 168 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 169 static int mdi_pathmap_hash_size = 256; 170 static kmutex_t mdi_pathmap_mutex; 171 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 172 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 173 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 174 175 /* 176 * MDI component property name/value string definitions 177 */ 178 const char *mdi_component_prop = "mpxio-component"; 179 const char *mdi_component_prop_vhci = "vhci"; 180 const char *mdi_component_prop_phci = "phci"; 181 const char *mdi_component_prop_client = "client"; 182 183 /* 184 * MDI client global unique identifier property name 185 */ 186 const char *mdi_client_guid_prop = "client-guid"; 187 188 /* 189 * MDI client load balancing property name/value string definitions 190 */ 191 const char *mdi_load_balance = "load-balance"; 192 const char *mdi_load_balance_none = "none"; 193 const char *mdi_load_balance_rr = "round-robin"; 194 const char *mdi_load_balance_lba = "logical-block"; 195 196 /* 197 * Obsolete vHCI class definition; to be removed after Leadville update 198 */ 199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 200 201 static char vhci_greeting[] = 202 "\tThere already exists one vHCI driver for class %s\n" 203 "\tOnly one vHCI driver for each class is allowed\n"; 204 205 /* 206 * Static function prototypes 207 */ 208 static int i_mdi_phci_offline(dev_info_t *, uint_t); 209 static int i_mdi_client_offline(dev_info_t *, uint_t); 210 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 211 static void i_mdi_phci_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static int i_mdi_client_pre_detach(dev_info_t *, 214 ddi_detach_cmd_t); 215 static void i_mdi_client_post_detach(dev_info_t *, 216 ddi_detach_cmd_t, int); 217 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 218 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 219 static int i_mdi_lba_lb(mdi_client_t *ct, 220 mdi_pathinfo_t **ret_pip, struct buf *buf); 221 static void i_mdi_pm_hold_client(mdi_client_t *, int); 222 static void i_mdi_pm_rele_client(mdi_client_t *, int); 223 static void i_mdi_pm_reset_client(mdi_client_t *); 224 static int i_mdi_power_all_phci(mdi_client_t *); 225 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 226 227 228 /* 229 * Internal mdi_pathinfo node functions 230 */ 231 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 232 233 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 234 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 235 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 236 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 237 static void i_mdi_phci_unlock(mdi_phci_t *); 238 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 239 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 241 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 242 mdi_client_t *); 243 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 244 static void i_mdi_client_remove_path(mdi_client_t *, 245 mdi_pathinfo_t *); 246 247 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 248 mdi_pathinfo_state_t, int); 249 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 250 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 251 char **, int); 252 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 253 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 254 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 255 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 256 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 257 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 258 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 259 static void i_mdi_client_update_state(mdi_client_t *); 260 static int i_mdi_client_compute_state(mdi_client_t *, 261 mdi_phci_t *); 262 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 263 static void i_mdi_client_unlock(mdi_client_t *); 264 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 265 static mdi_client_t *i_devi_get_client(dev_info_t *); 266 /* 267 * NOTE: this will be removed once the NWS files are changed to use the new 268 * mdi_{enable,disable}_path interfaces 269 */ 270 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 271 int, int); 272 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 273 mdi_vhci_t *vh, int flags, int op); 274 /* 275 * Failover related function prototypes 276 */ 277 static int i_mdi_failover(void *); 278 279 /* 280 * misc internal functions 281 */ 282 static int i_mdi_get_hash_key(char *); 283 static int i_map_nvlist_error_to_mdi(int); 284 static void i_mdi_report_path_state(mdi_client_t *, 285 mdi_pathinfo_t *); 286 287 static void setup_vhci_cache(mdi_vhci_t *); 288 static int destroy_vhci_cache(mdi_vhci_t *); 289 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 290 static boolean_t stop_vhcache_flush_thread(void *, int); 291 static void free_string_array(char **, int); 292 static void free_vhcache_phci(mdi_vhcache_phci_t *); 293 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 294 static void free_vhcache_client(mdi_vhcache_client_t *); 295 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 296 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 297 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 298 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 299 static void vhcache_pi_add(mdi_vhci_config_t *, 300 struct mdi_pathinfo *); 301 static void vhcache_pi_remove(mdi_vhci_config_t *, 302 struct mdi_pathinfo *); 303 static void free_phclient_path_list(mdi_phys_path_t *); 304 static void sort_vhcache_paths(mdi_vhcache_client_t *); 305 static int flush_vhcache(mdi_vhci_config_t *, int); 306 static void vhcache_dirty(mdi_vhci_config_t *); 307 static void free_async_client_config(mdi_async_client_config_t *); 308 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 309 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 310 static nvlist_t *read_on_disk_vhci_cache(char *); 311 extern int fread_nvlist(char *, nvlist_t **); 312 extern int fwrite_nvlist(char *, nvlist_t *); 313 314 /* called once when first vhci registers with mdi */ 315 static void 316 i_mdi_init() 317 { 318 static int initialized = 0; 319 320 if (initialized) 321 return; 322 initialized = 1; 323 324 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 325 326 /* Create our taskq resources */ 327 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 328 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 329 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 330 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 331 332 /* Allocate ['path_instance' <-> "path"] maps */ 333 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 334 mdi_pathmap_bypath = mod_hash_create_strhash( 335 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 336 mod_hash_null_valdtor); 337 mdi_pathmap_byinstance = mod_hash_create_idhash( 338 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 339 mod_hash_null_valdtor); 340 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 341 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 342 mod_hash_null_valdtor); 343 } 344 345 /* 346 * mdi_get_component_type(): 347 * Return mpxio component type 348 * Return Values: 349 * MDI_COMPONENT_NONE 350 * MDI_COMPONENT_VHCI 351 * MDI_COMPONENT_PHCI 352 * MDI_COMPONENT_CLIENT 353 * XXX This doesn't work under multi-level MPxIO and should be 354 * removed when clients migrate mdi_component_is_*() interfaces. 355 */ 356 int 357 mdi_get_component_type(dev_info_t *dip) 358 { 359 return (DEVI(dip)->devi_mdi_component); 360 } 361 362 /* 363 * mdi_vhci_register(): 364 * Register a vHCI module with the mpxio framework 365 * mdi_vhci_register() is called by vHCI drivers to register the 366 * 'class_driver' vHCI driver and its MDI entrypoints with the 367 * mpxio framework. The vHCI driver must call this interface as 368 * part of its attach(9e) handler. 369 * Competing threads may try to attach mdi_vhci_register() as 370 * the vHCI drivers are loaded and attached as a result of pHCI 371 * driver instance registration (mdi_phci_register()) with the 372 * framework. 373 * Return Values: 374 * MDI_SUCCESS 375 * MDI_FAILURE 376 */ 377 /*ARGSUSED*/ 378 int 379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 380 int flags) 381 { 382 mdi_vhci_t *vh = NULL; 383 384 /* Registrant can't be older */ 385 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 386 387 #ifdef DEBUG 388 /* 389 * IB nexus driver is loaded only when IB hardware is present. 390 * In order to be able to do this there is a need to drive the loading 391 * and attaching of the IB nexus driver (especially when an IB hardware 392 * is dynamically plugged in) when an IB HCA driver (PHCI) 393 * is being attached. Unfortunately this gets into the limitations 394 * of devfs as there seems to be no clean way to drive configuration 395 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 396 * for IB. 397 */ 398 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 399 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 400 #endif 401 402 i_mdi_init(); 403 404 mutex_enter(&mdi_mutex); 405 /* 406 * Scan for already registered vhci 407 */ 408 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 409 if (strcmp(vh->vh_class, class) == 0) { 410 /* 411 * vHCI has already been created. Check for valid 412 * vHCI ops registration. We only support one vHCI 413 * module per class 414 */ 415 if (vh->vh_ops != NULL) { 416 mutex_exit(&mdi_mutex); 417 cmn_err(CE_NOTE, vhci_greeting, class); 418 return (MDI_FAILURE); 419 } 420 break; 421 } 422 } 423 424 /* 425 * if not yet created, create the vHCI component 426 */ 427 if (vh == NULL) { 428 struct client_hash *hash = NULL; 429 char *load_balance; 430 431 /* 432 * Allocate and initialize the mdi extensions 433 */ 434 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 435 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 436 KM_SLEEP); 437 vh->vh_client_table = hash; 438 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 439 (void) strcpy(vh->vh_class, class); 440 vh->vh_lb = LOAD_BALANCE_RR; 441 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 442 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 443 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 444 vh->vh_lb = LOAD_BALANCE_NONE; 445 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 446 == 0) { 447 vh->vh_lb = LOAD_BALANCE_LBA; 448 } 449 ddi_prop_free(load_balance); 450 } 451 452 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 453 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 454 455 /* 456 * Store the vHCI ops vectors 457 */ 458 vh->vh_dip = vdip; 459 vh->vh_ops = vops; 460 461 setup_vhci_cache(vh); 462 463 if (mdi_vhci_head == NULL) { 464 mdi_vhci_head = vh; 465 } 466 if (mdi_vhci_tail) { 467 mdi_vhci_tail->vh_next = vh; 468 } 469 mdi_vhci_tail = vh; 470 mdi_vhci_count++; 471 } 472 473 /* 474 * Claim the devfs node as a vhci component 475 */ 476 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 477 478 /* 479 * Initialize our back reference from dev_info node 480 */ 481 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 482 mutex_exit(&mdi_mutex); 483 return (MDI_SUCCESS); 484 } 485 486 /* 487 * mdi_vhci_unregister(): 488 * Unregister a vHCI module from mpxio framework 489 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 490 * of a vhci to unregister it from the framework. 491 * Return Values: 492 * MDI_SUCCESS 493 * MDI_FAILURE 494 */ 495 /*ARGSUSED*/ 496 int 497 mdi_vhci_unregister(dev_info_t *vdip, int flags) 498 { 499 mdi_vhci_t *found, *vh, *prev = NULL; 500 501 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 502 503 /* 504 * Check for invalid VHCI 505 */ 506 if ((vh = i_devi_get_vhci(vdip)) == NULL) 507 return (MDI_FAILURE); 508 509 /* 510 * Scan the list of registered vHCIs for a match 511 */ 512 mutex_enter(&mdi_mutex); 513 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 514 if (found == vh) 515 break; 516 prev = found; 517 } 518 519 if (found == NULL) { 520 mutex_exit(&mdi_mutex); 521 return (MDI_FAILURE); 522 } 523 524 /* 525 * Check the vHCI, pHCI and client count. All the pHCIs and clients 526 * should have been unregistered, before a vHCI can be 527 * unregistered. 528 */ 529 MDI_VHCI_PHCI_LOCK(vh); 530 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 531 MDI_VHCI_PHCI_UNLOCK(vh); 532 mutex_exit(&mdi_mutex); 533 return (MDI_FAILURE); 534 } 535 MDI_VHCI_PHCI_UNLOCK(vh); 536 537 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 538 mutex_exit(&mdi_mutex); 539 return (MDI_FAILURE); 540 } 541 542 /* 543 * Remove the vHCI from the global list 544 */ 545 if (vh == mdi_vhci_head) { 546 mdi_vhci_head = vh->vh_next; 547 } else { 548 prev->vh_next = vh->vh_next; 549 } 550 if (vh == mdi_vhci_tail) { 551 mdi_vhci_tail = prev; 552 } 553 mdi_vhci_count--; 554 mutex_exit(&mdi_mutex); 555 556 vh->vh_ops = NULL; 557 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 558 DEVI(vdip)->devi_mdi_xhci = NULL; 559 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 560 kmem_free(vh->vh_client_table, 561 mdi_client_table_size * sizeof (struct client_hash)); 562 mutex_destroy(&vh->vh_phci_mutex); 563 mutex_destroy(&vh->vh_client_mutex); 564 565 kmem_free(vh, sizeof (mdi_vhci_t)); 566 return (MDI_SUCCESS); 567 } 568 569 /* 570 * i_mdi_vhci_class2vhci(): 571 * Look for a matching vHCI module given a vHCI class name 572 * Return Values: 573 * Handle to a vHCI component 574 * NULL 575 */ 576 static mdi_vhci_t * 577 i_mdi_vhci_class2vhci(char *class) 578 { 579 mdi_vhci_t *vh = NULL; 580 581 ASSERT(!MUTEX_HELD(&mdi_mutex)); 582 583 mutex_enter(&mdi_mutex); 584 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 585 if (strcmp(vh->vh_class, class) == 0) { 586 break; 587 } 588 } 589 mutex_exit(&mdi_mutex); 590 return (vh); 591 } 592 593 /* 594 * i_devi_get_vhci(): 595 * Utility function to get the handle to a vHCI component 596 * Return Values: 597 * Handle to a vHCI component 598 * NULL 599 */ 600 mdi_vhci_t * 601 i_devi_get_vhci(dev_info_t *vdip) 602 { 603 mdi_vhci_t *vh = NULL; 604 if (MDI_VHCI(vdip)) { 605 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 606 } 607 return (vh); 608 } 609 610 /* 611 * mdi_phci_register(): 612 * Register a pHCI module with mpxio framework 613 * mdi_phci_register() is called by pHCI drivers to register with 614 * the mpxio framework and a specific 'class_driver' vHCI. The 615 * pHCI driver must call this interface as part of its attach(9e) 616 * handler. 617 * Return Values: 618 * MDI_SUCCESS 619 * MDI_FAILURE 620 */ 621 /*ARGSUSED*/ 622 int 623 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 624 { 625 mdi_phci_t *ph; 626 mdi_vhci_t *vh; 627 char *data; 628 629 /* 630 * Some subsystems, like fcp, perform pHCI registration from a 631 * different thread than the one doing the pHCI attach(9E) - the 632 * driver attach code is waiting for this other thread to complete. 633 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 634 * (indicating that some thread has done an ndi_devi_enter of parent) 635 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 636 */ 637 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 638 639 /* 640 * Check for mpxio-disable property. Enable mpxio if the property is 641 * missing or not set to "yes". 642 * If the property is set to "yes" then emit a brief message. 643 */ 644 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 645 &data) == DDI_SUCCESS)) { 646 if (strcmp(data, "yes") == 0) { 647 MDI_DEBUG(1, (MDI_CONT, pdip, 648 "?multipath capabilities disabled via %s.conf.", 649 ddi_driver_name(pdip))); 650 ddi_prop_free(data); 651 return (MDI_FAILURE); 652 } 653 ddi_prop_free(data); 654 } 655 656 /* 657 * Search for a matching vHCI 658 */ 659 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 660 if (vh == NULL) { 661 return (MDI_FAILURE); 662 } 663 664 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 665 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 666 ph->ph_dip = pdip; 667 ph->ph_vhci = vh; 668 ph->ph_next = NULL; 669 ph->ph_unstable = 0; 670 ph->ph_vprivate = 0; 671 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 672 673 MDI_PHCI_LOCK(ph); 674 MDI_PHCI_SET_POWER_UP(ph); 675 MDI_PHCI_UNLOCK(ph); 676 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 677 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 678 679 vhcache_phci_add(vh->vh_config, ph); 680 681 MDI_VHCI_PHCI_LOCK(vh); 682 if (vh->vh_phci_head == NULL) { 683 vh->vh_phci_head = ph; 684 } 685 if (vh->vh_phci_tail) { 686 vh->vh_phci_tail->ph_next = ph; 687 } 688 vh->vh_phci_tail = ph; 689 vh->vh_phci_count++; 690 MDI_VHCI_PHCI_UNLOCK(vh); 691 692 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 693 return (MDI_SUCCESS); 694 } 695 696 /* 697 * mdi_phci_unregister(): 698 * Unregister a pHCI module from mpxio framework 699 * mdi_phci_unregister() is called by the pHCI drivers from their 700 * detach(9E) handler to unregister their instances from the 701 * framework. 702 * Return Values: 703 * MDI_SUCCESS 704 * MDI_FAILURE 705 */ 706 /*ARGSUSED*/ 707 int 708 mdi_phci_unregister(dev_info_t *pdip, int flags) 709 { 710 mdi_vhci_t *vh; 711 mdi_phci_t *ph; 712 mdi_phci_t *tmp; 713 mdi_phci_t *prev = NULL; 714 mdi_pathinfo_t *pip; 715 716 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 717 718 ph = i_devi_get_phci(pdip); 719 if (ph == NULL) { 720 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 721 return (MDI_FAILURE); 722 } 723 724 vh = ph->ph_vhci; 725 ASSERT(vh != NULL); 726 if (vh == NULL) { 727 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 728 return (MDI_FAILURE); 729 } 730 731 MDI_VHCI_PHCI_LOCK(vh); 732 tmp = vh->vh_phci_head; 733 while (tmp) { 734 if (tmp == ph) { 735 break; 736 } 737 prev = tmp; 738 tmp = tmp->ph_next; 739 } 740 741 if (ph == vh->vh_phci_head) { 742 vh->vh_phci_head = ph->ph_next; 743 } else { 744 prev->ph_next = ph->ph_next; 745 } 746 747 if (ph == vh->vh_phci_tail) { 748 vh->vh_phci_tail = prev; 749 } 750 751 vh->vh_phci_count--; 752 MDI_VHCI_PHCI_UNLOCK(vh); 753 754 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 755 MDI_PHCI_LOCK(ph); 756 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 757 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 758 MDI_PI(pip)->pi_phci = NULL; 759 MDI_PHCI_UNLOCK(ph); 760 761 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 762 ESC_DDI_INITIATOR_UNREGISTER); 763 vhcache_phci_remove(vh->vh_config, ph); 764 cv_destroy(&ph->ph_unstable_cv); 765 mutex_destroy(&ph->ph_mutex); 766 kmem_free(ph, sizeof (mdi_phci_t)); 767 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 768 DEVI(pdip)->devi_mdi_xhci = NULL; 769 return (MDI_SUCCESS); 770 } 771 772 /* 773 * i_devi_get_phci(): 774 * Utility function to return the phci extensions. 775 */ 776 static mdi_phci_t * 777 i_devi_get_phci(dev_info_t *pdip) 778 { 779 mdi_phci_t *ph = NULL; 780 781 if (MDI_PHCI(pdip)) { 782 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 783 } 784 return (ph); 785 } 786 787 /* 788 * Single thread mdi entry into devinfo node for modifying its children. 789 * If necessary we perform an ndi_devi_enter of the vHCI before doing 790 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 791 * for the vHCI and one for the pHCI. 792 */ 793 void 794 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 795 { 796 dev_info_t *vdip; 797 int vcircular, pcircular; 798 799 /* Verify calling context */ 800 ASSERT(MDI_PHCI(phci_dip)); 801 vdip = mdi_devi_get_vdip(phci_dip); 802 ASSERT(vdip); /* A pHCI always has a vHCI */ 803 804 /* 805 * If pHCI is detaching then the framework has already entered the 806 * vHCI on a threads that went down the code path leading to 807 * detach_node(). This framework enter of the vHCI during pHCI 808 * detach is done to avoid deadlock with vHCI power management 809 * operations which enter the vHCI and the enter down the path 810 * to the pHCI. If pHCI is detaching then we piggyback this calls 811 * enter of the vHCI on frameworks vHCI enter that has already 812 * occurred - this is OK because we know that the framework thread 813 * doing detach is waiting for our completion. 814 * 815 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 816 * race with detach - but we can't do that because the framework has 817 * already entered the parent, so we have some complexity instead. 818 */ 819 for (;;) { 820 if (ndi_devi_tryenter(vdip, &vcircular)) { 821 ASSERT(vcircular != -1); 822 if (DEVI_IS_DETACHING(phci_dip)) { 823 ndi_devi_exit(vdip, vcircular); 824 vcircular = -1; 825 } 826 break; 827 } else if (DEVI_IS_DETACHING(phci_dip)) { 828 vcircular = -1; 829 break; 830 } else if (servicing_interrupt()) { 831 /* 832 * Don't delay an interrupt (and ensure adaptive 833 * mutex inversion support). 834 */ 835 ndi_devi_enter(vdip, &vcircular); 836 break; 837 } else { 838 delay_random(2); 839 } 840 } 841 842 ndi_devi_enter(phci_dip, &pcircular); 843 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 844 } 845 846 /* 847 * Attempt to mdi_devi_enter. 848 */ 849 int 850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 851 { 852 dev_info_t *vdip; 853 int vcircular, pcircular; 854 855 /* Verify calling context */ 856 ASSERT(MDI_PHCI(phci_dip)); 857 vdip = mdi_devi_get_vdip(phci_dip); 858 ASSERT(vdip); /* A pHCI always has a vHCI */ 859 860 if (ndi_devi_tryenter(vdip, &vcircular)) { 861 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 862 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 863 return (1); /* locked */ 864 } 865 ndi_devi_exit(vdip, vcircular); 866 } 867 return (0); /* busy */ 868 } 869 870 /* 871 * Release mdi_devi_enter or successful mdi_devi_tryenter. 872 */ 873 void 874 mdi_devi_exit(dev_info_t *phci_dip, int circular) 875 { 876 dev_info_t *vdip; 877 int vcircular, pcircular; 878 879 /* Verify calling context */ 880 ASSERT(MDI_PHCI(phci_dip)); 881 vdip = mdi_devi_get_vdip(phci_dip); 882 ASSERT(vdip); /* A pHCI always has a vHCI */ 883 884 /* extract two circular recursion values from single int */ 885 pcircular = (short)(circular & 0xFFFF); 886 vcircular = (short)((circular >> 16) & 0xFFFF); 887 888 ndi_devi_exit(phci_dip, pcircular); 889 if (vcircular != -1) 890 ndi_devi_exit(vdip, vcircular); 891 } 892 893 /* 894 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 895 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 896 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 897 * with vHCI power management code during path online/offline. Each 898 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 899 * occur within the scope of an active mdi_devi_enter that establishes the 900 * circular value. 901 */ 902 void 903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 904 { 905 int pcircular; 906 907 /* Verify calling context */ 908 ASSERT(MDI_PHCI(phci_dip)); 909 910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 911 ndi_hold_devi(phci_dip); 912 913 pcircular = (short)(circular & 0xFFFF); 914 ndi_devi_exit(phci_dip, pcircular); 915 } 916 917 void 918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 919 { 920 int pcircular; 921 922 /* Verify calling context */ 923 ASSERT(MDI_PHCI(phci_dip)); 924 925 ndi_devi_enter(phci_dip, &pcircular); 926 927 /* Drop hold from mdi_devi_exit_phci. */ 928 ndi_rele_devi(phci_dip); 929 930 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 931 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 932 } 933 934 /* 935 * mdi_devi_get_vdip(): 936 * given a pHCI dip return vHCI dip 937 */ 938 dev_info_t * 939 mdi_devi_get_vdip(dev_info_t *pdip) 940 { 941 mdi_phci_t *ph; 942 943 ph = i_devi_get_phci(pdip); 944 if (ph && ph->ph_vhci) 945 return (ph->ph_vhci->vh_dip); 946 return (NULL); 947 } 948 949 /* 950 * mdi_devi_pdip_entered(): 951 * Return 1 if we are vHCI and have done an ndi_devi_enter 952 * of a pHCI 953 */ 954 int 955 mdi_devi_pdip_entered(dev_info_t *vdip) 956 { 957 mdi_vhci_t *vh; 958 mdi_phci_t *ph; 959 960 vh = i_devi_get_vhci(vdip); 961 if (vh == NULL) 962 return (0); 963 964 MDI_VHCI_PHCI_LOCK(vh); 965 ph = vh->vh_phci_head; 966 while (ph) { 967 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 968 MDI_VHCI_PHCI_UNLOCK(vh); 969 return (1); 970 } 971 ph = ph->ph_next; 972 } 973 MDI_VHCI_PHCI_UNLOCK(vh); 974 return (0); 975 } 976 977 /* 978 * mdi_phci_path2devinfo(): 979 * Utility function to search for a valid phci device given 980 * the devfs pathname. 981 */ 982 dev_info_t * 983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 984 { 985 char *temp_pathname; 986 mdi_vhci_t *vh; 987 mdi_phci_t *ph; 988 dev_info_t *pdip = NULL; 989 990 vh = i_devi_get_vhci(vdip); 991 ASSERT(vh != NULL); 992 993 if (vh == NULL) { 994 /* 995 * Invalid vHCI component, return failure 996 */ 997 return (NULL); 998 } 999 1000 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1001 MDI_VHCI_PHCI_LOCK(vh); 1002 ph = vh->vh_phci_head; 1003 while (ph != NULL) { 1004 pdip = ph->ph_dip; 1005 ASSERT(pdip != NULL); 1006 *temp_pathname = '\0'; 1007 (void) ddi_pathname(pdip, temp_pathname); 1008 if (strcmp(temp_pathname, pathname) == 0) { 1009 break; 1010 } 1011 ph = ph->ph_next; 1012 } 1013 if (ph == NULL) { 1014 pdip = NULL; 1015 } 1016 MDI_VHCI_PHCI_UNLOCK(vh); 1017 kmem_free(temp_pathname, MAXPATHLEN); 1018 return (pdip); 1019 } 1020 1021 /* 1022 * mdi_phci_get_path_count(): 1023 * get number of path information nodes associated with a given 1024 * pHCI device. 1025 */ 1026 int 1027 mdi_phci_get_path_count(dev_info_t *pdip) 1028 { 1029 mdi_phci_t *ph; 1030 int count = 0; 1031 1032 ph = i_devi_get_phci(pdip); 1033 if (ph != NULL) { 1034 count = ph->ph_path_count; 1035 } 1036 return (count); 1037 } 1038 1039 /* 1040 * i_mdi_phci_lock(): 1041 * Lock a pHCI device 1042 * Return Values: 1043 * None 1044 * Note: 1045 * The default locking order is: 1046 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1047 * But there are number of situations where locks need to be 1048 * grabbed in reverse order. This routine implements try and lock 1049 * mechanism depending on the requested parameter option. 1050 */ 1051 static void 1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1053 { 1054 if (pip) { 1055 /* Reverse locking is requested. */ 1056 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1057 if (servicing_interrupt()) { 1058 MDI_PI_HOLD(pip); 1059 MDI_PI_UNLOCK(pip); 1060 MDI_PHCI_LOCK(ph); 1061 MDI_PI_LOCK(pip); 1062 MDI_PI_RELE(pip); 1063 break; 1064 } else { 1065 /* 1066 * tryenter failed. Try to grab again 1067 * after a small delay 1068 */ 1069 MDI_PI_HOLD(pip); 1070 MDI_PI_UNLOCK(pip); 1071 delay_random(2); 1072 MDI_PI_LOCK(pip); 1073 MDI_PI_RELE(pip); 1074 } 1075 } 1076 } else { 1077 MDI_PHCI_LOCK(ph); 1078 } 1079 } 1080 1081 /* 1082 * i_mdi_phci_unlock(): 1083 * Unlock the pHCI component 1084 */ 1085 static void 1086 i_mdi_phci_unlock(mdi_phci_t *ph) 1087 { 1088 MDI_PHCI_UNLOCK(ph); 1089 } 1090 1091 /* 1092 * i_mdi_devinfo_create(): 1093 * create client device's devinfo node 1094 * Return Values: 1095 * dev_info 1096 * NULL 1097 * Notes: 1098 */ 1099 static dev_info_t * 1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1101 char **compatible, int ncompatible) 1102 { 1103 dev_info_t *cdip = NULL; 1104 1105 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1106 1107 /* Verify for duplicate entry */ 1108 cdip = i_mdi_devinfo_find(vh, name, guid); 1109 ASSERT(cdip == NULL); 1110 if (cdip) { 1111 cmn_err(CE_WARN, 1112 "i_mdi_devinfo_create: client %s@%s already exists", 1113 name ? name : "", guid ? guid : ""); 1114 } 1115 1116 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1117 if (cdip == NULL) 1118 goto fail; 1119 1120 /* 1121 * Create component type and Global unique identifier 1122 * properties 1123 */ 1124 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1125 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1126 goto fail; 1127 } 1128 1129 /* Decorate the node with compatible property */ 1130 if (compatible && 1131 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1132 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1133 goto fail; 1134 } 1135 1136 return (cdip); 1137 1138 fail: 1139 if (cdip) { 1140 (void) ndi_prop_remove_all(cdip); 1141 (void) ndi_devi_free(cdip); 1142 } 1143 return (NULL); 1144 } 1145 1146 /* 1147 * i_mdi_devinfo_find(): 1148 * Find a matching devinfo node for given client node name 1149 * and its guid. 1150 * Return Values: 1151 * Handle to a dev_info node or NULL 1152 */ 1153 static dev_info_t * 1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1155 { 1156 char *data; 1157 dev_info_t *cdip = NULL; 1158 dev_info_t *ndip = NULL; 1159 int circular; 1160 1161 ndi_devi_enter(vh->vh_dip, &circular); 1162 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1163 while ((cdip = ndip) != NULL) { 1164 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1165 1166 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1167 continue; 1168 } 1169 1170 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1171 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1172 &data) != DDI_PROP_SUCCESS) { 1173 continue; 1174 } 1175 1176 if (strcmp(data, guid) != 0) { 1177 ddi_prop_free(data); 1178 continue; 1179 } 1180 ddi_prop_free(data); 1181 break; 1182 } 1183 ndi_devi_exit(vh->vh_dip, circular); 1184 return (cdip); 1185 } 1186 1187 /* 1188 * i_mdi_devinfo_remove(): 1189 * Remove a client device node 1190 */ 1191 static int 1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1193 { 1194 int rv = MDI_SUCCESS; 1195 1196 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1197 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1198 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1199 if (rv != NDI_SUCCESS) { 1200 MDI_DEBUG(1, (MDI_NOTE, cdip, 1201 "!failed: cdip %p", (void *)cdip)); 1202 } 1203 /* 1204 * Convert to MDI error code 1205 */ 1206 switch (rv) { 1207 case NDI_SUCCESS: 1208 rv = MDI_SUCCESS; 1209 break; 1210 case NDI_BUSY: 1211 rv = MDI_BUSY; 1212 break; 1213 default: 1214 rv = MDI_FAILURE; 1215 break; 1216 } 1217 } 1218 return (rv); 1219 } 1220 1221 /* 1222 * i_devi_get_client() 1223 * Utility function to get mpxio component extensions 1224 */ 1225 static mdi_client_t * 1226 i_devi_get_client(dev_info_t *cdip) 1227 { 1228 mdi_client_t *ct = NULL; 1229 1230 if (MDI_CLIENT(cdip)) { 1231 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1232 } 1233 return (ct); 1234 } 1235 1236 /* 1237 * i_mdi_is_child_present(): 1238 * Search for the presence of client device dev_info node 1239 */ 1240 static int 1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1242 { 1243 int rv = MDI_FAILURE; 1244 struct dev_info *dip; 1245 int circular; 1246 1247 ndi_devi_enter(vdip, &circular); 1248 dip = DEVI(vdip)->devi_child; 1249 while (dip) { 1250 if (dip == DEVI(cdip)) { 1251 rv = MDI_SUCCESS; 1252 break; 1253 } 1254 dip = dip->devi_sibling; 1255 } 1256 ndi_devi_exit(vdip, circular); 1257 return (rv); 1258 } 1259 1260 1261 /* 1262 * i_mdi_client_lock(): 1263 * Grab client component lock 1264 * Return Values: 1265 * None 1266 * Note: 1267 * The default locking order is: 1268 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1269 * But there are number of situations where locks need to be 1270 * grabbed in reverse order. This routine implements try and lock 1271 * mechanism depending on the requested parameter option. 1272 */ 1273 static void 1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1275 { 1276 if (pip) { 1277 /* 1278 * Reverse locking is requested. 1279 */ 1280 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1281 if (servicing_interrupt()) { 1282 MDI_PI_HOLD(pip); 1283 MDI_PI_UNLOCK(pip); 1284 MDI_CLIENT_LOCK(ct); 1285 MDI_PI_LOCK(pip); 1286 MDI_PI_RELE(pip); 1287 break; 1288 } else { 1289 /* 1290 * tryenter failed. Try to grab again 1291 * after a small delay 1292 */ 1293 MDI_PI_HOLD(pip); 1294 MDI_PI_UNLOCK(pip); 1295 delay_random(2); 1296 MDI_PI_LOCK(pip); 1297 MDI_PI_RELE(pip); 1298 } 1299 } 1300 } else { 1301 MDI_CLIENT_LOCK(ct); 1302 } 1303 } 1304 1305 /* 1306 * i_mdi_client_unlock(): 1307 * Unlock a client component 1308 */ 1309 static void 1310 i_mdi_client_unlock(mdi_client_t *ct) 1311 { 1312 MDI_CLIENT_UNLOCK(ct); 1313 } 1314 1315 /* 1316 * i_mdi_client_alloc(): 1317 * Allocate and initialize a client structure. Caller should 1318 * hold the vhci client lock. 1319 * Return Values: 1320 * Handle to a client component 1321 */ 1322 /*ARGSUSED*/ 1323 static mdi_client_t * 1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1325 { 1326 mdi_client_t *ct; 1327 1328 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1329 1330 /* 1331 * Allocate and initialize a component structure. 1332 */ 1333 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1334 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1335 ct->ct_hnext = NULL; 1336 ct->ct_hprev = NULL; 1337 ct->ct_dip = NULL; 1338 ct->ct_vhci = vh; 1339 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1340 (void) strcpy(ct->ct_drvname, name); 1341 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1342 (void) strcpy(ct->ct_guid, lguid); 1343 ct->ct_cprivate = NULL; 1344 ct->ct_vprivate = NULL; 1345 ct->ct_flags = 0; 1346 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1347 MDI_CLIENT_LOCK(ct); 1348 MDI_CLIENT_SET_OFFLINE(ct); 1349 MDI_CLIENT_SET_DETACH(ct); 1350 MDI_CLIENT_SET_POWER_UP(ct); 1351 MDI_CLIENT_UNLOCK(ct); 1352 ct->ct_failover_flags = 0; 1353 ct->ct_failover_status = 0; 1354 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1355 ct->ct_unstable = 0; 1356 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1357 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1358 ct->ct_lb = vh->vh_lb; 1359 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1360 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1361 ct->ct_path_count = 0; 1362 ct->ct_path_head = NULL; 1363 ct->ct_path_tail = NULL; 1364 ct->ct_path_last = NULL; 1365 1366 /* 1367 * Add this client component to our client hash queue 1368 */ 1369 i_mdi_client_enlist_table(vh, ct); 1370 return (ct); 1371 } 1372 1373 /* 1374 * i_mdi_client_enlist_table(): 1375 * Attach the client device to the client hash table. Caller 1376 * should hold the vhci client lock. 1377 */ 1378 static void 1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1380 { 1381 int index; 1382 struct client_hash *head; 1383 1384 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1385 1386 index = i_mdi_get_hash_key(ct->ct_guid); 1387 head = &vh->vh_client_table[index]; 1388 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1389 head->ct_hash_head = ct; 1390 head->ct_hash_count++; 1391 vh->vh_client_count++; 1392 } 1393 1394 /* 1395 * i_mdi_client_delist_table(): 1396 * Attach the client device to the client hash table. 1397 * Caller should hold the vhci client lock. 1398 */ 1399 static void 1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1401 { 1402 int index; 1403 char *guid; 1404 struct client_hash *head; 1405 mdi_client_t *next; 1406 mdi_client_t *last; 1407 1408 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1409 1410 guid = ct->ct_guid; 1411 index = i_mdi_get_hash_key(guid); 1412 head = &vh->vh_client_table[index]; 1413 1414 last = NULL; 1415 next = (mdi_client_t *)head->ct_hash_head; 1416 while (next != NULL) { 1417 if (next == ct) { 1418 break; 1419 } 1420 last = next; 1421 next = next->ct_hnext; 1422 } 1423 1424 if (next) { 1425 head->ct_hash_count--; 1426 if (last == NULL) { 1427 head->ct_hash_head = ct->ct_hnext; 1428 } else { 1429 last->ct_hnext = ct->ct_hnext; 1430 } 1431 ct->ct_hnext = NULL; 1432 vh->vh_client_count--; 1433 } 1434 } 1435 1436 1437 /* 1438 * i_mdi_client_free(): 1439 * Free a client component 1440 */ 1441 static int 1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1443 { 1444 int rv = MDI_SUCCESS; 1445 int flags = ct->ct_flags; 1446 dev_info_t *cdip; 1447 dev_info_t *vdip; 1448 1449 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1450 1451 vdip = vh->vh_dip; 1452 cdip = ct->ct_dip; 1453 1454 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1455 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1456 DEVI(cdip)->devi_mdi_client = NULL; 1457 1458 /* 1459 * Clear out back ref. to dev_info_t node 1460 */ 1461 ct->ct_dip = NULL; 1462 1463 /* 1464 * Remove this client from our hash queue 1465 */ 1466 i_mdi_client_delist_table(vh, ct); 1467 1468 /* 1469 * Uninitialize and free the component 1470 */ 1471 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1472 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1473 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1474 cv_destroy(&ct->ct_failover_cv); 1475 cv_destroy(&ct->ct_unstable_cv); 1476 cv_destroy(&ct->ct_powerchange_cv); 1477 mutex_destroy(&ct->ct_mutex); 1478 kmem_free(ct, sizeof (*ct)); 1479 1480 if (cdip != NULL) { 1481 MDI_VHCI_CLIENT_UNLOCK(vh); 1482 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1483 MDI_VHCI_CLIENT_LOCK(vh); 1484 } 1485 return (rv); 1486 } 1487 1488 /* 1489 * i_mdi_client_find(): 1490 * Find the client structure corresponding to a given guid 1491 * Caller should hold the vhci client lock. 1492 */ 1493 static mdi_client_t * 1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1495 { 1496 int index; 1497 struct client_hash *head; 1498 mdi_client_t *ct; 1499 1500 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1501 1502 index = i_mdi_get_hash_key(guid); 1503 head = &vh->vh_client_table[index]; 1504 1505 ct = head->ct_hash_head; 1506 while (ct != NULL) { 1507 if (strcmp(ct->ct_guid, guid) == 0 && 1508 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1509 break; 1510 } 1511 ct = ct->ct_hnext; 1512 } 1513 return (ct); 1514 } 1515 1516 /* 1517 * i_mdi_client_update_state(): 1518 * Compute and update client device state 1519 * Notes: 1520 * A client device can be in any of three possible states: 1521 * 1522 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1523 * one online/standby paths. Can tolerate failures. 1524 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1525 * no alternate paths available as standby. A failure on the online 1526 * would result in loss of access to device data. 1527 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1528 * no paths available to access the device. 1529 */ 1530 static void 1531 i_mdi_client_update_state(mdi_client_t *ct) 1532 { 1533 int state; 1534 1535 ASSERT(MDI_CLIENT_LOCKED(ct)); 1536 state = i_mdi_client_compute_state(ct, NULL); 1537 MDI_CLIENT_SET_STATE(ct, state); 1538 } 1539 1540 /* 1541 * i_mdi_client_compute_state(): 1542 * Compute client device state 1543 * 1544 * mdi_phci_t * Pointer to pHCI structure which should 1545 * while computing the new value. Used by 1546 * i_mdi_phci_offline() to find the new 1547 * client state after DR of a pHCI. 1548 */ 1549 static int 1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1551 { 1552 int state; 1553 int online_count = 0; 1554 int standby_count = 0; 1555 mdi_pathinfo_t *pip, *next; 1556 1557 ASSERT(MDI_CLIENT_LOCKED(ct)); 1558 pip = ct->ct_path_head; 1559 while (pip != NULL) { 1560 MDI_PI_LOCK(pip); 1561 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1562 if (MDI_PI(pip)->pi_phci == ph) { 1563 MDI_PI_UNLOCK(pip); 1564 pip = next; 1565 continue; 1566 } 1567 1568 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1569 == MDI_PATHINFO_STATE_ONLINE) 1570 online_count++; 1571 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1572 == MDI_PATHINFO_STATE_STANDBY) 1573 standby_count++; 1574 MDI_PI_UNLOCK(pip); 1575 pip = next; 1576 } 1577 1578 if (online_count == 0) { 1579 if (standby_count == 0) { 1580 state = MDI_CLIENT_STATE_FAILED; 1581 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1582 "client state failed: ct = %p", (void *)ct)); 1583 } else if (standby_count == 1) { 1584 state = MDI_CLIENT_STATE_DEGRADED; 1585 } else { 1586 state = MDI_CLIENT_STATE_OPTIMAL; 1587 } 1588 } else if (online_count == 1) { 1589 if (standby_count == 0) { 1590 state = MDI_CLIENT_STATE_DEGRADED; 1591 } else { 1592 state = MDI_CLIENT_STATE_OPTIMAL; 1593 } 1594 } else { 1595 state = MDI_CLIENT_STATE_OPTIMAL; 1596 } 1597 return (state); 1598 } 1599 1600 /* 1601 * i_mdi_client2devinfo(): 1602 * Utility function 1603 */ 1604 dev_info_t * 1605 i_mdi_client2devinfo(mdi_client_t *ct) 1606 { 1607 return (ct->ct_dip); 1608 } 1609 1610 /* 1611 * mdi_client_path2_devinfo(): 1612 * Given the parent devinfo and child devfs pathname, search for 1613 * a valid devfs node handle. 1614 */ 1615 dev_info_t * 1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1617 { 1618 dev_info_t *cdip = NULL; 1619 dev_info_t *ndip = NULL; 1620 char *temp_pathname; 1621 int circular; 1622 1623 /* 1624 * Allocate temp buffer 1625 */ 1626 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1627 1628 /* 1629 * Lock parent against changes 1630 */ 1631 ndi_devi_enter(vdip, &circular); 1632 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1633 while ((cdip = ndip) != NULL) { 1634 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1635 1636 *temp_pathname = '\0'; 1637 (void) ddi_pathname(cdip, temp_pathname); 1638 if (strcmp(temp_pathname, pathname) == 0) { 1639 break; 1640 } 1641 } 1642 /* 1643 * Release devinfo lock 1644 */ 1645 ndi_devi_exit(vdip, circular); 1646 1647 /* 1648 * Free the temp buffer 1649 */ 1650 kmem_free(temp_pathname, MAXPATHLEN); 1651 return (cdip); 1652 } 1653 1654 /* 1655 * mdi_client_get_path_count(): 1656 * Utility function to get number of path information nodes 1657 * associated with a given client device. 1658 */ 1659 int 1660 mdi_client_get_path_count(dev_info_t *cdip) 1661 { 1662 mdi_client_t *ct; 1663 int count = 0; 1664 1665 ct = i_devi_get_client(cdip); 1666 if (ct != NULL) { 1667 count = ct->ct_path_count; 1668 } 1669 return (count); 1670 } 1671 1672 1673 /* 1674 * i_mdi_get_hash_key(): 1675 * Create a hash using strings as keys 1676 * 1677 */ 1678 static int 1679 i_mdi_get_hash_key(char *str) 1680 { 1681 uint32_t g, hash = 0; 1682 char *p; 1683 1684 for (p = str; *p != '\0'; p++) { 1685 g = *p; 1686 hash += g; 1687 } 1688 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1689 } 1690 1691 /* 1692 * mdi_get_lb_policy(): 1693 * Get current load balancing policy for a given client device 1694 */ 1695 client_lb_t 1696 mdi_get_lb_policy(dev_info_t *cdip) 1697 { 1698 client_lb_t lb = LOAD_BALANCE_NONE; 1699 mdi_client_t *ct; 1700 1701 ct = i_devi_get_client(cdip); 1702 if (ct != NULL) { 1703 lb = ct->ct_lb; 1704 } 1705 return (lb); 1706 } 1707 1708 /* 1709 * mdi_set_lb_region_size(): 1710 * Set current region size for the load-balance 1711 */ 1712 int 1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1714 { 1715 mdi_client_t *ct; 1716 int rv = MDI_FAILURE; 1717 1718 ct = i_devi_get_client(cdip); 1719 if (ct != NULL && ct->ct_lb_args != NULL) { 1720 ct->ct_lb_args->region_size = region_size; 1721 rv = MDI_SUCCESS; 1722 } 1723 return (rv); 1724 } 1725 1726 /* 1727 * mdi_Set_lb_policy(): 1728 * Set current load balancing policy for a given client device 1729 */ 1730 int 1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1732 { 1733 mdi_client_t *ct; 1734 int rv = MDI_FAILURE; 1735 1736 ct = i_devi_get_client(cdip); 1737 if (ct != NULL) { 1738 ct->ct_lb = lb; 1739 rv = MDI_SUCCESS; 1740 } 1741 return (rv); 1742 } 1743 1744 /* 1745 * mdi_failover(): 1746 * failover function called by the vHCI drivers to initiate 1747 * a failover operation. This is typically due to non-availability 1748 * of online paths to route I/O requests. Failover can be 1749 * triggered through user application also. 1750 * 1751 * The vHCI driver calls mdi_failover() to initiate a failover 1752 * operation. mdi_failover() calls back into the vHCI driver's 1753 * vo_failover() entry point to perform the actual failover 1754 * operation. The reason for requiring the vHCI driver to 1755 * initiate failover by calling mdi_failover(), instead of directly 1756 * executing vo_failover() itself, is to ensure that the mdi 1757 * framework can keep track of the client state properly. 1758 * Additionally, mdi_failover() provides as a convenience the 1759 * option of performing the failover operation synchronously or 1760 * asynchronously 1761 * 1762 * Upon successful completion of the failover operation, the 1763 * paths that were previously ONLINE will be in the STANDBY state, 1764 * and the newly activated paths will be in the ONLINE state. 1765 * 1766 * The flags modifier determines whether the activation is done 1767 * synchronously: MDI_FAILOVER_SYNC 1768 * Return Values: 1769 * MDI_SUCCESS 1770 * MDI_FAILURE 1771 * MDI_BUSY 1772 */ 1773 /*ARGSUSED*/ 1774 int 1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1776 { 1777 int rv; 1778 mdi_client_t *ct; 1779 1780 ct = i_devi_get_client(cdip); 1781 ASSERT(ct != NULL); 1782 if (ct == NULL) { 1783 /* cdip is not a valid client device. Nothing more to do. */ 1784 return (MDI_FAILURE); 1785 } 1786 1787 MDI_CLIENT_LOCK(ct); 1788 1789 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1790 /* A path to the client is being freed */ 1791 MDI_CLIENT_UNLOCK(ct); 1792 return (MDI_BUSY); 1793 } 1794 1795 1796 if (MDI_CLIENT_IS_FAILED(ct)) { 1797 /* 1798 * Client is in failed state. Nothing more to do. 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_FAILURE); 1802 } 1803 1804 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1805 /* 1806 * Failover is already in progress; return BUSY 1807 */ 1808 MDI_CLIENT_UNLOCK(ct); 1809 return (MDI_BUSY); 1810 } 1811 /* 1812 * Make sure that mdi_pathinfo node state changes are processed. 1813 * We do not allow failovers to progress while client path state 1814 * changes are in progress 1815 */ 1816 if (ct->ct_unstable) { 1817 if (flags == MDI_FAILOVER_ASYNC) { 1818 MDI_CLIENT_UNLOCK(ct); 1819 return (MDI_BUSY); 1820 } else { 1821 while (ct->ct_unstable) 1822 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1823 } 1824 } 1825 1826 /* 1827 * Client device is in stable state. Before proceeding, perform sanity 1828 * checks again. 1829 */ 1830 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1831 (!i_ddi_devi_attached(ct->ct_dip))) { 1832 /* 1833 * Client is in failed state. Nothing more to do. 1834 */ 1835 MDI_CLIENT_UNLOCK(ct); 1836 return (MDI_FAILURE); 1837 } 1838 1839 /* 1840 * Set the client state as failover in progress. 1841 */ 1842 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1843 ct->ct_failover_flags = flags; 1844 MDI_CLIENT_UNLOCK(ct); 1845 1846 if (flags == MDI_FAILOVER_ASYNC) { 1847 /* 1848 * Submit the initiate failover request via CPR safe 1849 * taskq threads. 1850 */ 1851 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1852 ct, KM_SLEEP); 1853 return (MDI_ACCEPT); 1854 } else { 1855 /* 1856 * Synchronous failover mode. Typically invoked from the user 1857 * land. 1858 */ 1859 rv = i_mdi_failover(ct); 1860 } 1861 return (rv); 1862 } 1863 1864 /* 1865 * i_mdi_failover(): 1866 * internal failover function. Invokes vHCI drivers failover 1867 * callback function and process the failover status 1868 * Return Values: 1869 * None 1870 * 1871 * Note: A client device in failover state can not be detached or freed. 1872 */ 1873 static int 1874 i_mdi_failover(void *arg) 1875 { 1876 int rv = MDI_SUCCESS; 1877 mdi_client_t *ct = (mdi_client_t *)arg; 1878 mdi_vhci_t *vh = ct->ct_vhci; 1879 1880 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1881 1882 if (vh->vh_ops->vo_failover != NULL) { 1883 /* 1884 * Call vHCI drivers callback routine 1885 */ 1886 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1887 ct->ct_failover_flags); 1888 } 1889 1890 MDI_CLIENT_LOCK(ct); 1891 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1892 1893 /* 1894 * Save the failover return status 1895 */ 1896 ct->ct_failover_status = rv; 1897 1898 /* 1899 * As a result of failover, client status would have been changed. 1900 * Update the client state and wake up anyone waiting on this client 1901 * device. 1902 */ 1903 i_mdi_client_update_state(ct); 1904 1905 cv_broadcast(&ct->ct_failover_cv); 1906 MDI_CLIENT_UNLOCK(ct); 1907 return (rv); 1908 } 1909 1910 /* 1911 * Load balancing is logical block. 1912 * IOs within the range described by region_size 1913 * would go on the same path. This would improve the 1914 * performance by cache-hit on some of the RAID devices. 1915 * Search only for online paths(At some point we 1916 * may want to balance across target ports). 1917 * If no paths are found then default to round-robin. 1918 */ 1919 static int 1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1921 { 1922 int path_index = -1; 1923 int online_path_count = 0; 1924 int online_nonpref_path_count = 0; 1925 int region_size = ct->ct_lb_args->region_size; 1926 mdi_pathinfo_t *pip; 1927 mdi_pathinfo_t *next; 1928 int preferred, path_cnt; 1929 1930 pip = ct->ct_path_head; 1931 while (pip) { 1932 MDI_PI_LOCK(pip); 1933 if (MDI_PI(pip)->pi_state == 1934 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1935 online_path_count++; 1936 } else if (MDI_PI(pip)->pi_state == 1937 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1938 online_nonpref_path_count++; 1939 } 1940 next = (mdi_pathinfo_t *) 1941 MDI_PI(pip)->pi_client_link; 1942 MDI_PI_UNLOCK(pip); 1943 pip = next; 1944 } 1945 /* if found any online/preferred then use this type */ 1946 if (online_path_count > 0) { 1947 path_cnt = online_path_count; 1948 preferred = 1; 1949 } else if (online_nonpref_path_count > 0) { 1950 path_cnt = online_nonpref_path_count; 1951 preferred = 0; 1952 } else { 1953 path_cnt = 0; 1954 } 1955 if (path_cnt) { 1956 path_index = (bp->b_blkno >> region_size) % path_cnt; 1957 pip = ct->ct_path_head; 1958 while (pip && path_index != -1) { 1959 MDI_PI_LOCK(pip); 1960 if (path_index == 0 && 1961 (MDI_PI(pip)->pi_state == 1962 MDI_PATHINFO_STATE_ONLINE) && 1963 MDI_PI(pip)->pi_preferred == preferred) { 1964 MDI_PI_HOLD(pip); 1965 MDI_PI_UNLOCK(pip); 1966 *ret_pip = pip; 1967 return (MDI_SUCCESS); 1968 } 1969 path_index --; 1970 next = (mdi_pathinfo_t *) 1971 MDI_PI(pip)->pi_client_link; 1972 MDI_PI_UNLOCK(pip); 1973 pip = next; 1974 } 1975 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1976 "lba %llx: path %s %p", 1977 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1978 } 1979 return (MDI_FAILURE); 1980 } 1981 1982 /* 1983 * mdi_select_path(): 1984 * select a path to access a client device. 1985 * 1986 * mdi_select_path() function is called by the vHCI drivers to 1987 * select a path to route the I/O request to. The caller passes 1988 * the block I/O data transfer structure ("buf") as one of the 1989 * parameters. The mpxio framework uses the buf structure 1990 * contents to maintain per path statistics (total I/O size / 1991 * count pending). If more than one online paths are available to 1992 * select, the framework automatically selects a suitable path 1993 * for routing I/O request. If a failover operation is active for 1994 * this client device the call shall be failed with MDI_BUSY error 1995 * code. 1996 * 1997 * By default this function returns a suitable path in online 1998 * state based on the current load balancing policy. Currently 1999 * we support LOAD_BALANCE_NONE (Previously selected online path 2000 * will continue to be used till the path is usable) and 2001 * LOAD_BALANCE_RR (Online paths will be selected in a round 2002 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2003 * based on the logical block). The load balancing 2004 * through vHCI drivers configuration file (driver.conf). 2005 * 2006 * vHCI drivers may override this default behavior by specifying 2007 * appropriate flags. The meaning of the thrid argument depends 2008 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2009 * then the argument is the "path instance" of the path to select. 2010 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2011 * "start_pip". A non NULL "start_pip" is the starting point to 2012 * walk and find the next appropriate path. The following values 2013 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2014 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2015 * STANDBY path). 2016 * 2017 * The non-standard behavior is used by the scsi_vhci driver, 2018 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2019 * attach of client devices (to avoid an unnecessary failover 2020 * when the STANDBY path comes up first), during failover 2021 * (to activate a STANDBY path as ONLINE). 2022 * 2023 * The selected path is returned in a a mdi_hold_path() state 2024 * (pi_ref_cnt). Caller should release the hold by calling 2025 * mdi_rele_path(). 2026 * 2027 * Return Values: 2028 * MDI_SUCCESS - Completed successfully 2029 * MDI_BUSY - Client device is busy failing over 2030 * MDI_NOPATH - Client device is online, but no valid path are 2031 * available to access this client device 2032 * MDI_FAILURE - Invalid client device or state 2033 * MDI_DEVI_ONLINING 2034 * - Client device (struct dev_info state) is in 2035 * onlining state. 2036 */ 2037 2038 /*ARGSUSED*/ 2039 int 2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2041 void *arg, mdi_pathinfo_t **ret_pip) 2042 { 2043 mdi_client_t *ct; 2044 mdi_pathinfo_t *pip; 2045 mdi_pathinfo_t *next; 2046 mdi_pathinfo_t *head; 2047 mdi_pathinfo_t *start; 2048 client_lb_t lbp; /* load balancing policy */ 2049 int sb = 1; /* standard behavior */ 2050 int preferred = 1; /* preferred path */ 2051 int cond, cont = 1; 2052 int retry = 0; 2053 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2054 int path_instance; /* request specific path instance */ 2055 2056 /* determine type of arg based on flags */ 2057 if (flags & MDI_SELECT_PATH_INSTANCE) { 2058 path_instance = (int)(intptr_t)arg; 2059 start_pip = NULL; 2060 } else { 2061 path_instance = 0; 2062 start_pip = (mdi_pathinfo_t *)arg; 2063 } 2064 2065 if (flags != 0) { 2066 /* 2067 * disable default behavior 2068 */ 2069 sb = 0; 2070 } 2071 2072 *ret_pip = NULL; 2073 ct = i_devi_get_client(cdip); 2074 if (ct == NULL) { 2075 /* mdi extensions are NULL, Nothing more to do */ 2076 return (MDI_FAILURE); 2077 } 2078 2079 MDI_CLIENT_LOCK(ct); 2080 2081 if (sb) { 2082 if (MDI_CLIENT_IS_FAILED(ct)) { 2083 /* 2084 * Client is not ready to accept any I/O requests. 2085 * Fail this request. 2086 */ 2087 MDI_DEBUG(2, (MDI_NOTE, cdip, 2088 "client state offline ct = %p", (void *)ct)); 2089 MDI_CLIENT_UNLOCK(ct); 2090 return (MDI_FAILURE); 2091 } 2092 2093 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2094 /* 2095 * Check for Failover is in progress. If so tell the 2096 * caller that this device is busy. 2097 */ 2098 MDI_DEBUG(2, (MDI_NOTE, cdip, 2099 "client failover in progress ct = %p", 2100 (void *)ct)); 2101 MDI_CLIENT_UNLOCK(ct); 2102 return (MDI_BUSY); 2103 } 2104 2105 /* 2106 * Check to see whether the client device is attached. 2107 * If not so, let the vHCI driver manually select a path 2108 * (standby) and let the probe/attach process to continue. 2109 */ 2110 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2111 MDI_DEBUG(4, (MDI_NOTE, cdip, 2112 "devi is onlining ct = %p", (void *)ct)); 2113 MDI_CLIENT_UNLOCK(ct); 2114 return (MDI_DEVI_ONLINING); 2115 } 2116 } 2117 2118 /* 2119 * Cache in the client list head. If head of the list is NULL 2120 * return MDI_NOPATH 2121 */ 2122 head = ct->ct_path_head; 2123 if (head == NULL) { 2124 MDI_CLIENT_UNLOCK(ct); 2125 return (MDI_NOPATH); 2126 } 2127 2128 /* Caller is specifying a specific pathinfo path by path_instance */ 2129 if (path_instance) { 2130 /* search for pathinfo with correct path_instance */ 2131 for (pip = head; 2132 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2133 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2134 ; 2135 2136 /* If path can't be selected then MDI_NOPATH is returned. */ 2137 if (pip == NULL) { 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } 2141 2142 /* 2143 * Verify state of path. When asked to select a specific 2144 * path_instance, we select the requested path in any 2145 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2146 * We don't however select paths where the pHCI has detached. 2147 * NOTE: last pathinfo node of an opened client device may 2148 * exist in an OFFLINE state after the pHCI associated with 2149 * that path has detached (but pi_phci will be NULL if that 2150 * has occurred). 2151 */ 2152 MDI_PI_LOCK(pip); 2153 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2154 (MDI_PI(pip)->pi_phci == NULL)) { 2155 MDI_PI_UNLOCK(pip); 2156 MDI_CLIENT_UNLOCK(ct); 2157 return (MDI_FAILURE); 2158 } 2159 2160 /* Return MDI_BUSY if we have a transient condition */ 2161 if (MDI_PI_IS_TRANSIENT(pip)) { 2162 MDI_PI_UNLOCK(pip); 2163 MDI_CLIENT_UNLOCK(ct); 2164 return (MDI_BUSY); 2165 } 2166 2167 /* 2168 * Return the path in hold state. Caller should release the 2169 * lock by calling mdi_rele_path() 2170 */ 2171 MDI_PI_HOLD(pip); 2172 MDI_PI_UNLOCK(pip); 2173 *ret_pip = pip; 2174 MDI_CLIENT_UNLOCK(ct); 2175 return (MDI_SUCCESS); 2176 } 2177 2178 /* 2179 * for non default behavior, bypass current 2180 * load balancing policy and always use LOAD_BALANCE_RR 2181 * except that the start point will be adjusted based 2182 * on the provided start_pip 2183 */ 2184 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2185 2186 switch (lbp) { 2187 case LOAD_BALANCE_NONE: 2188 /* 2189 * Load balancing is None or Alternate path mode 2190 * Start looking for a online mdi_pathinfo node starting from 2191 * last known selected path 2192 */ 2193 preferred = 1; 2194 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2195 if (pip == NULL) { 2196 pip = head; 2197 } 2198 start = pip; 2199 do { 2200 MDI_PI_LOCK(pip); 2201 /* 2202 * No need to explicitly check if the path is disabled. 2203 * Since we are checking for state == ONLINE and the 2204 * same variable is used for DISABLE/ENABLE information. 2205 */ 2206 if ((MDI_PI(pip)->pi_state == 2207 MDI_PATHINFO_STATE_ONLINE) && 2208 preferred == MDI_PI(pip)->pi_preferred) { 2209 /* 2210 * Return the path in hold state. Caller should 2211 * release the lock by calling mdi_rele_path() 2212 */ 2213 MDI_PI_HOLD(pip); 2214 MDI_PI_UNLOCK(pip); 2215 ct->ct_path_last = pip; 2216 *ret_pip = pip; 2217 MDI_CLIENT_UNLOCK(ct); 2218 return (MDI_SUCCESS); 2219 } 2220 2221 /* 2222 * Path is busy. 2223 */ 2224 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2225 MDI_PI_IS_TRANSIENT(pip)) 2226 retry = 1; 2227 /* 2228 * Keep looking for a next available online path 2229 */ 2230 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2231 if (next == NULL) { 2232 next = head; 2233 } 2234 MDI_PI_UNLOCK(pip); 2235 pip = next; 2236 if (start == pip && preferred) { 2237 preferred = 0; 2238 } else if (start == pip && !preferred) { 2239 cont = 0; 2240 } 2241 } while (cont); 2242 break; 2243 2244 case LOAD_BALANCE_LBA: 2245 /* 2246 * Make sure we are looking 2247 * for an online path. Otherwise, if it is for a STANDBY 2248 * path request, it will go through and fetch an ONLINE 2249 * path which is not desirable. 2250 */ 2251 if ((ct->ct_lb_args != NULL) && 2252 (ct->ct_lb_args->region_size) && bp && 2253 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2254 if (i_mdi_lba_lb(ct, ret_pip, bp) 2255 == MDI_SUCCESS) { 2256 MDI_CLIENT_UNLOCK(ct); 2257 return (MDI_SUCCESS); 2258 } 2259 } 2260 /* FALLTHROUGH */ 2261 case LOAD_BALANCE_RR: 2262 /* 2263 * Load balancing is Round Robin. Start looking for a online 2264 * mdi_pathinfo node starting from last known selected path 2265 * as the start point. If override flags are specified, 2266 * process accordingly. 2267 * If the search is already in effect(start_pip not null), 2268 * then lets just use the same path preference to continue the 2269 * traversal. 2270 */ 2271 2272 if (start_pip != NULL) { 2273 preferred = MDI_PI(start_pip)->pi_preferred; 2274 } else { 2275 preferred = 1; 2276 } 2277 2278 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2279 if (start == NULL) { 2280 pip = head; 2281 } else { 2282 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2283 if (pip == NULL) { 2284 if ( flags & MDI_SELECT_NO_PREFERRED) { 2285 /* 2286 * Return since we hit the end of list 2287 */ 2288 MDI_CLIENT_UNLOCK(ct); 2289 return (MDI_NOPATH); 2290 } 2291 2292 if (!sb) { 2293 if (preferred == 0) { 2294 /* 2295 * Looks like we have completed 2296 * the traversal as preferred 2297 * value is 0. Time to bail out. 2298 */ 2299 *ret_pip = NULL; 2300 MDI_CLIENT_UNLOCK(ct); 2301 return (MDI_NOPATH); 2302 } else { 2303 /* 2304 * Looks like we reached the 2305 * end of the list. Lets enable 2306 * traversal of non preferred 2307 * paths. 2308 */ 2309 preferred = 0; 2310 } 2311 } 2312 pip = head; 2313 } 2314 } 2315 start = pip; 2316 do { 2317 MDI_PI_LOCK(pip); 2318 if (sb) { 2319 cond = ((MDI_PI(pip)->pi_state == 2320 MDI_PATHINFO_STATE_ONLINE && 2321 MDI_PI(pip)->pi_preferred == 2322 preferred) ? 1 : 0); 2323 } else { 2324 if (flags == MDI_SELECT_ONLINE_PATH) { 2325 cond = ((MDI_PI(pip)->pi_state == 2326 MDI_PATHINFO_STATE_ONLINE && 2327 MDI_PI(pip)->pi_preferred == 2328 preferred) ? 1 : 0); 2329 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2330 cond = ((MDI_PI(pip)->pi_state == 2331 MDI_PATHINFO_STATE_STANDBY && 2332 MDI_PI(pip)->pi_preferred == 2333 preferred) ? 1 : 0); 2334 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2335 MDI_SELECT_STANDBY_PATH)) { 2336 cond = (((MDI_PI(pip)->pi_state == 2337 MDI_PATHINFO_STATE_ONLINE || 2338 (MDI_PI(pip)->pi_state == 2339 MDI_PATHINFO_STATE_STANDBY)) && 2340 MDI_PI(pip)->pi_preferred == 2341 preferred) ? 1 : 0); 2342 } else if (flags == 2343 (MDI_SELECT_STANDBY_PATH | 2344 MDI_SELECT_ONLINE_PATH | 2345 MDI_SELECT_USER_DISABLE_PATH)) { 2346 cond = (((MDI_PI(pip)->pi_state == 2347 MDI_PATHINFO_STATE_ONLINE || 2348 (MDI_PI(pip)->pi_state == 2349 MDI_PATHINFO_STATE_STANDBY) || 2350 (MDI_PI(pip)->pi_state == 2351 (MDI_PATHINFO_STATE_ONLINE| 2352 MDI_PATHINFO_STATE_USER_DISABLE)) || 2353 (MDI_PI(pip)->pi_state == 2354 (MDI_PATHINFO_STATE_STANDBY | 2355 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2356 MDI_PI(pip)->pi_preferred == 2357 preferred) ? 1 : 0); 2358 } else if (flags == 2359 (MDI_SELECT_STANDBY_PATH | 2360 MDI_SELECT_ONLINE_PATH | 2361 MDI_SELECT_NO_PREFERRED)) { 2362 cond = (((MDI_PI(pip)->pi_state == 2363 MDI_PATHINFO_STATE_ONLINE) || 2364 (MDI_PI(pip)->pi_state == 2365 MDI_PATHINFO_STATE_STANDBY)) 2366 ? 1 : 0); 2367 } else { 2368 cond = 0; 2369 } 2370 } 2371 /* 2372 * No need to explicitly check if the path is disabled. 2373 * Since we are checking for state == ONLINE and the 2374 * same variable is used for DISABLE/ENABLE information. 2375 */ 2376 if (cond) { 2377 /* 2378 * Return the path in hold state. Caller should 2379 * release the lock by calling mdi_rele_path() 2380 */ 2381 MDI_PI_HOLD(pip); 2382 MDI_PI_UNLOCK(pip); 2383 if (sb) 2384 ct->ct_path_last = pip; 2385 *ret_pip = pip; 2386 MDI_CLIENT_UNLOCK(ct); 2387 return (MDI_SUCCESS); 2388 } 2389 /* 2390 * Path is busy. 2391 */ 2392 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2393 MDI_PI_IS_TRANSIENT(pip)) 2394 retry = 1; 2395 2396 /* 2397 * Keep looking for a next available online path 2398 */ 2399 do_again: 2400 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2401 if (next == NULL) { 2402 if ( flags & MDI_SELECT_NO_PREFERRED) { 2403 /* 2404 * Bail out since we hit the end of list 2405 */ 2406 MDI_PI_UNLOCK(pip); 2407 break; 2408 } 2409 2410 if (!sb) { 2411 if (preferred == 1) { 2412 /* 2413 * Looks like we reached the 2414 * end of the list. Lets enable 2415 * traversal of non preferred 2416 * paths. 2417 */ 2418 preferred = 0; 2419 next = head; 2420 } else { 2421 /* 2422 * We have done both the passes 2423 * Preferred as well as for 2424 * Non-preferred. Bail out now. 2425 */ 2426 cont = 0; 2427 } 2428 } else { 2429 /* 2430 * Standard behavior case. 2431 */ 2432 next = head; 2433 } 2434 } 2435 MDI_PI_UNLOCK(pip); 2436 if (cont == 0) { 2437 break; 2438 } 2439 pip = next; 2440 2441 if (!sb) { 2442 /* 2443 * We need to handle the selection of 2444 * non-preferred path in the following 2445 * case: 2446 * 2447 * +------+ +------+ +------+ +-----+ 2448 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2449 * +------+ +------+ +------+ +-----+ 2450 * 2451 * If we start the search with B, we need to 2452 * skip beyond B to pick C which is non - 2453 * preferred in the second pass. The following 2454 * test, if true, will allow us to skip over 2455 * the 'start'(B in the example) to select 2456 * other non preferred elements. 2457 */ 2458 if ((start_pip != NULL) && (start_pip == pip) && 2459 (MDI_PI(start_pip)->pi_preferred 2460 != preferred)) { 2461 /* 2462 * try again after going past the start 2463 * pip 2464 */ 2465 MDI_PI_LOCK(pip); 2466 goto do_again; 2467 } 2468 } else { 2469 /* 2470 * Standard behavior case 2471 */ 2472 if (start == pip && preferred) { 2473 /* look for nonpreferred paths */ 2474 preferred = 0; 2475 } else if (start == pip && !preferred) { 2476 /* 2477 * Exit condition 2478 */ 2479 cont = 0; 2480 } 2481 } 2482 } while (cont); 2483 break; 2484 } 2485 2486 MDI_CLIENT_UNLOCK(ct); 2487 if (retry == 1) { 2488 return (MDI_BUSY); 2489 } else { 2490 return (MDI_NOPATH); 2491 } 2492 } 2493 2494 /* 2495 * For a client, return the next available path to any phci 2496 * 2497 * Note: 2498 * Caller should hold the branch's devinfo node to get a consistent 2499 * snap shot of the mdi_pathinfo nodes. 2500 * 2501 * Please note that even the list is stable the mdi_pathinfo 2502 * node state and properties are volatile. The caller should lock 2503 * and unlock the nodes by calling mdi_pi_lock() and 2504 * mdi_pi_unlock() functions to get a stable properties. 2505 * 2506 * If there is a need to use the nodes beyond the hold of the 2507 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2508 * need to be held against unexpected removal by calling 2509 * mdi_hold_path() and should be released by calling 2510 * mdi_rele_path() on completion. 2511 */ 2512 mdi_pathinfo_t * 2513 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2514 { 2515 mdi_client_t *ct; 2516 2517 if (!MDI_CLIENT(ct_dip)) 2518 return (NULL); 2519 2520 /* 2521 * Walk through client link 2522 */ 2523 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2524 ASSERT(ct != NULL); 2525 2526 if (pip == NULL) 2527 return ((mdi_pathinfo_t *)ct->ct_path_head); 2528 2529 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2530 } 2531 2532 /* 2533 * For a phci, return the next available path to any client 2534 * Note: ditto mdi_get_next_phci_path() 2535 */ 2536 mdi_pathinfo_t * 2537 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2538 { 2539 mdi_phci_t *ph; 2540 2541 if (!MDI_PHCI(ph_dip)) 2542 return (NULL); 2543 2544 /* 2545 * Walk through pHCI link 2546 */ 2547 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2548 ASSERT(ph != NULL); 2549 2550 if (pip == NULL) 2551 return ((mdi_pathinfo_t *)ph->ph_path_head); 2552 2553 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2554 } 2555 2556 /* 2557 * mdi_hold_path(): 2558 * Hold the mdi_pathinfo node against unwanted unexpected free. 2559 * Return Values: 2560 * None 2561 */ 2562 void 2563 mdi_hold_path(mdi_pathinfo_t *pip) 2564 { 2565 if (pip) { 2566 MDI_PI_LOCK(pip); 2567 MDI_PI_HOLD(pip); 2568 MDI_PI_UNLOCK(pip); 2569 } 2570 } 2571 2572 2573 /* 2574 * mdi_rele_path(): 2575 * Release the mdi_pathinfo node which was selected 2576 * through mdi_select_path() mechanism or manually held by 2577 * calling mdi_hold_path(). 2578 * Return Values: 2579 * None 2580 */ 2581 void 2582 mdi_rele_path(mdi_pathinfo_t *pip) 2583 { 2584 if (pip) { 2585 MDI_PI_LOCK(pip); 2586 MDI_PI_RELE(pip); 2587 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2588 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2589 } 2590 MDI_PI_UNLOCK(pip); 2591 } 2592 } 2593 2594 /* 2595 * mdi_pi_lock(): 2596 * Lock the mdi_pathinfo node. 2597 * Note: 2598 * The caller should release the lock by calling mdi_pi_unlock() 2599 */ 2600 void 2601 mdi_pi_lock(mdi_pathinfo_t *pip) 2602 { 2603 ASSERT(pip != NULL); 2604 if (pip) { 2605 MDI_PI_LOCK(pip); 2606 } 2607 } 2608 2609 2610 /* 2611 * mdi_pi_unlock(): 2612 * Unlock the mdi_pathinfo node. 2613 * Note: 2614 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2615 */ 2616 void 2617 mdi_pi_unlock(mdi_pathinfo_t *pip) 2618 { 2619 ASSERT(pip != NULL); 2620 if (pip) { 2621 MDI_PI_UNLOCK(pip); 2622 } 2623 } 2624 2625 /* 2626 * mdi_pi_find(): 2627 * Search the list of mdi_pathinfo nodes attached to the 2628 * pHCI/Client device node whose path address matches "paddr". 2629 * Returns a pointer to the mdi_pathinfo node if a matching node is 2630 * found. 2631 * Return Values: 2632 * mdi_pathinfo node handle 2633 * NULL 2634 * Notes: 2635 * Caller need not hold any locks to call this function. 2636 */ 2637 mdi_pathinfo_t * 2638 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2639 { 2640 mdi_phci_t *ph; 2641 mdi_vhci_t *vh; 2642 mdi_client_t *ct; 2643 mdi_pathinfo_t *pip = NULL; 2644 2645 MDI_DEBUG(2, (MDI_NOTE, pdip, 2646 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2647 if ((pdip == NULL) || (paddr == NULL)) { 2648 return (NULL); 2649 } 2650 ph = i_devi_get_phci(pdip); 2651 if (ph == NULL) { 2652 /* 2653 * Invalid pHCI device, Nothing more to do. 2654 */ 2655 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2656 return (NULL); 2657 } 2658 2659 vh = ph->ph_vhci; 2660 if (vh == NULL) { 2661 /* 2662 * Invalid vHCI device, Nothing more to do. 2663 */ 2664 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2665 return (NULL); 2666 } 2667 2668 /* 2669 * Look for pathinfo node identified by paddr. 2670 */ 2671 if (caddr == NULL) { 2672 /* 2673 * Find a mdi_pathinfo node under pHCI list for a matching 2674 * unit address. 2675 */ 2676 MDI_PHCI_LOCK(ph); 2677 if (MDI_PHCI_IS_OFFLINE(ph)) { 2678 MDI_DEBUG(2, (MDI_WARN, pdip, 2679 "offline phci %p", (void *)ph)); 2680 MDI_PHCI_UNLOCK(ph); 2681 return (NULL); 2682 } 2683 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2684 2685 while (pip != NULL) { 2686 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2687 break; 2688 } 2689 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2690 } 2691 MDI_PHCI_UNLOCK(ph); 2692 MDI_DEBUG(2, (MDI_NOTE, pdip, 2693 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2694 return (pip); 2695 } 2696 2697 /* 2698 * XXX - Is the rest of the code in this function really necessary? 2699 * The consumers of mdi_pi_find() can search for the desired pathinfo 2700 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2701 * whether the search is based on the pathinfo nodes attached to 2702 * the pHCI or the client node, the result will be the same. 2703 */ 2704 2705 /* 2706 * Find the client device corresponding to 'caddr' 2707 */ 2708 MDI_VHCI_CLIENT_LOCK(vh); 2709 2710 /* 2711 * XXX - Passing NULL to the following function works as long as the 2712 * the client addresses (caddr) are unique per vhci basis. 2713 */ 2714 ct = i_mdi_client_find(vh, NULL, caddr); 2715 if (ct == NULL) { 2716 /* 2717 * Client not found, Obviously mdi_pathinfo node has not been 2718 * created yet. 2719 */ 2720 MDI_VHCI_CLIENT_UNLOCK(vh); 2721 MDI_DEBUG(2, (MDI_NOTE, pdip, 2722 "client not found for caddr @%s", caddr ? caddr : "")); 2723 return (NULL); 2724 } 2725 2726 /* 2727 * Hold the client lock and look for a mdi_pathinfo node with matching 2728 * pHCI and paddr 2729 */ 2730 MDI_CLIENT_LOCK(ct); 2731 2732 /* 2733 * Release the global mutex as it is no more needed. Note: We always 2734 * respect the locking order while acquiring. 2735 */ 2736 MDI_VHCI_CLIENT_UNLOCK(vh); 2737 2738 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2739 while (pip != NULL) { 2740 /* 2741 * Compare the unit address 2742 */ 2743 if ((MDI_PI(pip)->pi_phci == ph) && 2744 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2745 break; 2746 } 2747 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2748 } 2749 MDI_CLIENT_UNLOCK(ct); 2750 MDI_DEBUG(2, (MDI_NOTE, pdip, 2751 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2752 return (pip); 2753 } 2754 2755 /* 2756 * mdi_pi_alloc(): 2757 * Allocate and initialize a new instance of a mdi_pathinfo node. 2758 * The mdi_pathinfo node returned by this function identifies a 2759 * unique device path is capable of having properties attached 2760 * and passed to mdi_pi_online() to fully attach and online the 2761 * path and client device node. 2762 * The mdi_pathinfo node returned by this function must be 2763 * destroyed using mdi_pi_free() if the path is no longer 2764 * operational or if the caller fails to attach a client device 2765 * node when calling mdi_pi_online(). The framework will not free 2766 * the resources allocated. 2767 * This function can be called from both interrupt and kernel 2768 * contexts. DDI_NOSLEEP flag should be used while calling 2769 * from interrupt contexts. 2770 * Return Values: 2771 * MDI_SUCCESS 2772 * MDI_FAILURE 2773 * MDI_NOMEM 2774 */ 2775 /*ARGSUSED*/ 2776 int 2777 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2778 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2779 { 2780 mdi_vhci_t *vh; 2781 mdi_phci_t *ph; 2782 mdi_client_t *ct; 2783 mdi_pathinfo_t *pip = NULL; 2784 dev_info_t *cdip; 2785 int rv = MDI_NOMEM; 2786 int path_allocated = 0; 2787 2788 MDI_DEBUG(2, (MDI_NOTE, pdip, 2789 "cname %s: caddr@%s paddr@%s", 2790 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2791 2792 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2793 ret_pip == NULL) { 2794 /* Nothing more to do */ 2795 return (MDI_FAILURE); 2796 } 2797 2798 *ret_pip = NULL; 2799 2800 /* No allocations on detaching pHCI */ 2801 if (DEVI_IS_DETACHING(pdip)) { 2802 /* Invalid pHCI device, return failure */ 2803 MDI_DEBUG(1, (MDI_WARN, pdip, 2804 "!detaching pHCI=%p", (void *)pdip)); 2805 return (MDI_FAILURE); 2806 } 2807 2808 ph = i_devi_get_phci(pdip); 2809 ASSERT(ph != NULL); 2810 if (ph == NULL) { 2811 /* Invalid pHCI device, return failure */ 2812 MDI_DEBUG(1, (MDI_WARN, pdip, 2813 "!invalid pHCI=%p", (void *)pdip)); 2814 return (MDI_FAILURE); 2815 } 2816 2817 MDI_PHCI_LOCK(ph); 2818 vh = ph->ph_vhci; 2819 if (vh == NULL) { 2820 /* Invalid vHCI device, return failure */ 2821 MDI_DEBUG(1, (MDI_WARN, pdip, 2822 "!invalid vHCI=%p", (void *)pdip)); 2823 MDI_PHCI_UNLOCK(ph); 2824 return (MDI_FAILURE); 2825 } 2826 2827 if (MDI_PHCI_IS_READY(ph) == 0) { 2828 /* 2829 * Do not allow new node creation when pHCI is in 2830 * offline/suspended states 2831 */ 2832 MDI_DEBUG(1, (MDI_WARN, pdip, 2833 "pHCI=%p is not ready", (void *)ph)); 2834 MDI_PHCI_UNLOCK(ph); 2835 return (MDI_BUSY); 2836 } 2837 MDI_PHCI_UNSTABLE(ph); 2838 MDI_PHCI_UNLOCK(ph); 2839 2840 /* look for a matching client, create one if not found */ 2841 MDI_VHCI_CLIENT_LOCK(vh); 2842 ct = i_mdi_client_find(vh, cname, caddr); 2843 if (ct == NULL) { 2844 ct = i_mdi_client_alloc(vh, cname, caddr); 2845 ASSERT(ct != NULL); 2846 } 2847 2848 if (ct->ct_dip == NULL) { 2849 /* 2850 * Allocate a devinfo node 2851 */ 2852 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2853 compatible, ncompatible); 2854 if (ct->ct_dip == NULL) { 2855 (void) i_mdi_client_free(vh, ct); 2856 goto fail; 2857 } 2858 } 2859 cdip = ct->ct_dip; 2860 2861 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2862 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2863 2864 MDI_CLIENT_LOCK(ct); 2865 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2866 while (pip != NULL) { 2867 /* 2868 * Compare the unit address 2869 */ 2870 if ((MDI_PI(pip)->pi_phci == ph) && 2871 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2872 break; 2873 } 2874 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2875 } 2876 MDI_CLIENT_UNLOCK(ct); 2877 2878 if (pip == NULL) { 2879 /* 2880 * This is a new path for this client device. Allocate and 2881 * initialize a new pathinfo node 2882 */ 2883 pip = i_mdi_pi_alloc(ph, paddr, ct); 2884 ASSERT(pip != NULL); 2885 path_allocated = 1; 2886 } 2887 rv = MDI_SUCCESS; 2888 2889 fail: 2890 /* 2891 * Release the global mutex. 2892 */ 2893 MDI_VHCI_CLIENT_UNLOCK(vh); 2894 2895 /* 2896 * Mark the pHCI as stable 2897 */ 2898 MDI_PHCI_LOCK(ph); 2899 MDI_PHCI_STABLE(ph); 2900 MDI_PHCI_UNLOCK(ph); 2901 *ret_pip = pip; 2902 2903 MDI_DEBUG(2, (MDI_NOTE, pdip, 2904 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2905 2906 if (path_allocated) 2907 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2908 2909 return (rv); 2910 } 2911 2912 /*ARGSUSED*/ 2913 int 2914 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2915 int flags, mdi_pathinfo_t **ret_pip) 2916 { 2917 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2918 flags, ret_pip)); 2919 } 2920 2921 /* 2922 * i_mdi_pi_alloc(): 2923 * Allocate a mdi_pathinfo node and add to the pHCI path list 2924 * Return Values: 2925 * mdi_pathinfo 2926 */ 2927 /*ARGSUSED*/ 2928 static mdi_pathinfo_t * 2929 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2930 { 2931 mdi_pathinfo_t *pip; 2932 int ct_circular; 2933 int ph_circular; 2934 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2935 char *path_persistent; 2936 int path_instance; 2937 mod_hash_val_t hv; 2938 2939 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2940 2941 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2942 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2943 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2944 MDI_PATHINFO_STATE_TRANSIENT; 2945 2946 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2947 MDI_PI_SET_USER_DISABLE(pip); 2948 2949 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2950 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2951 2952 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2953 MDI_PI_SET_DRV_DISABLE(pip); 2954 2955 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2956 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2957 MDI_PI(pip)->pi_client = ct; 2958 MDI_PI(pip)->pi_phci = ph; 2959 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2960 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2961 2962 /* 2963 * We form the "path" to the pathinfo node, and see if we have 2964 * already allocated a 'path_instance' for that "path". If so, 2965 * we use the already allocated 'path_instance'. If not, we 2966 * allocate a new 'path_instance' and associate it with a copy of 2967 * the "path" string (which is never freed). The association 2968 * between a 'path_instance' this "path" string persists until 2969 * reboot. 2970 */ 2971 mutex_enter(&mdi_pathmap_mutex); 2972 (void) ddi_pathname(ph->ph_dip, path); 2973 (void) sprintf(path + strlen(path), "/%s@%s", 2974 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2975 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2976 path_instance = (uint_t)(intptr_t)hv; 2977 } else { 2978 /* allocate a new 'path_instance' and persistent "path" */ 2979 path_instance = mdi_pathmap_instance++; 2980 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2981 (void) mod_hash_insert(mdi_pathmap_bypath, 2982 (mod_hash_key_t)path_persistent, 2983 (mod_hash_val_t)(intptr_t)path_instance); 2984 (void) mod_hash_insert(mdi_pathmap_byinstance, 2985 (mod_hash_key_t)(intptr_t)path_instance, 2986 (mod_hash_val_t)path_persistent); 2987 2988 /* create shortpath name */ 2989 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2990 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2991 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2992 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2993 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2994 (mod_hash_key_t)(intptr_t)path_instance, 2995 (mod_hash_val_t)path_persistent); 2996 } 2997 mutex_exit(&mdi_pathmap_mutex); 2998 MDI_PI(pip)->pi_path_instance = path_instance; 2999 3000 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3001 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3002 MDI_PI(pip)->pi_pprivate = NULL; 3003 MDI_PI(pip)->pi_cprivate = NULL; 3004 MDI_PI(pip)->pi_vprivate = NULL; 3005 MDI_PI(pip)->pi_client_link = NULL; 3006 MDI_PI(pip)->pi_phci_link = NULL; 3007 MDI_PI(pip)->pi_ref_cnt = 0; 3008 MDI_PI(pip)->pi_kstats = NULL; 3009 MDI_PI(pip)->pi_preferred = 1; 3010 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3011 3012 /* 3013 * Lock both dev_info nodes against changes in parallel. 3014 * 3015 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3016 * This atypical operation is done to synchronize pathinfo nodes 3017 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3018 * the pathinfo nodes are children of the Client. 3019 */ 3020 ndi_devi_enter(ct->ct_dip, &ct_circular); 3021 ndi_devi_enter(ph->ph_dip, &ph_circular); 3022 3023 i_mdi_phci_add_path(ph, pip); 3024 i_mdi_client_add_path(ct, pip); 3025 3026 ndi_devi_exit(ph->ph_dip, ph_circular); 3027 ndi_devi_exit(ct->ct_dip, ct_circular); 3028 3029 return (pip); 3030 } 3031 3032 /* 3033 * mdi_pi_pathname_by_instance(): 3034 * Lookup of "path" by 'path_instance'. Return "path". 3035 * NOTE: returned "path" remains valid forever (until reboot). 3036 */ 3037 char * 3038 mdi_pi_pathname_by_instance(int path_instance) 3039 { 3040 char *path; 3041 mod_hash_val_t hv; 3042 3043 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3044 mutex_enter(&mdi_pathmap_mutex); 3045 if (mod_hash_find(mdi_pathmap_byinstance, 3046 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3047 path = (char *)hv; 3048 else 3049 path = NULL; 3050 mutex_exit(&mdi_pathmap_mutex); 3051 return (path); 3052 } 3053 3054 /* 3055 * mdi_pi_spathname_by_instance(): 3056 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3057 * NOTE: returned "shortpath" remains valid forever (until reboot). 3058 */ 3059 char * 3060 mdi_pi_spathname_by_instance(int path_instance) 3061 { 3062 char *path; 3063 mod_hash_val_t hv; 3064 3065 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3066 mutex_enter(&mdi_pathmap_mutex); 3067 if (mod_hash_find(mdi_pathmap_sbyinstance, 3068 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3069 path = (char *)hv; 3070 else 3071 path = NULL; 3072 mutex_exit(&mdi_pathmap_mutex); 3073 return (path); 3074 } 3075 3076 3077 /* 3078 * i_mdi_phci_add_path(): 3079 * Add a mdi_pathinfo node to pHCI list. 3080 * Notes: 3081 * Caller should per-pHCI mutex 3082 */ 3083 static void 3084 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3085 { 3086 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3087 3088 MDI_PHCI_LOCK(ph); 3089 if (ph->ph_path_head == NULL) { 3090 ph->ph_path_head = pip; 3091 } else { 3092 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3093 } 3094 ph->ph_path_tail = pip; 3095 ph->ph_path_count++; 3096 MDI_PHCI_UNLOCK(ph); 3097 } 3098 3099 /* 3100 * i_mdi_client_add_path(): 3101 * Add mdi_pathinfo node to client list 3102 */ 3103 static void 3104 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3105 { 3106 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3107 3108 MDI_CLIENT_LOCK(ct); 3109 if (ct->ct_path_head == NULL) { 3110 ct->ct_path_head = pip; 3111 } else { 3112 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3113 } 3114 ct->ct_path_tail = pip; 3115 ct->ct_path_count++; 3116 MDI_CLIENT_UNLOCK(ct); 3117 } 3118 3119 /* 3120 * mdi_pi_free(): 3121 * Free the mdi_pathinfo node and also client device node if this 3122 * is the last path to the device 3123 * Return Values: 3124 * MDI_SUCCESS 3125 * MDI_FAILURE 3126 * MDI_BUSY 3127 */ 3128 /*ARGSUSED*/ 3129 int 3130 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3131 { 3132 int rv = MDI_FAILURE; 3133 mdi_vhci_t *vh; 3134 mdi_phci_t *ph; 3135 mdi_client_t *ct; 3136 int (*f)(); 3137 int client_held = 0; 3138 3139 MDI_PI_LOCK(pip); 3140 ph = MDI_PI(pip)->pi_phci; 3141 ASSERT(ph != NULL); 3142 if (ph == NULL) { 3143 /* 3144 * Invalid pHCI device, return failure 3145 */ 3146 MDI_DEBUG(1, (MDI_WARN, NULL, 3147 "!invalid pHCI: pip %s %p", 3148 mdi_pi_spathname(pip), (void *)pip)); 3149 MDI_PI_UNLOCK(pip); 3150 return (MDI_FAILURE); 3151 } 3152 3153 vh = ph->ph_vhci; 3154 ASSERT(vh != NULL); 3155 if (vh == NULL) { 3156 /* Invalid pHCI device, return failure */ 3157 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3158 "!invalid vHCI: pip %s %p", 3159 mdi_pi_spathname(pip), (void *)pip)); 3160 MDI_PI_UNLOCK(pip); 3161 return (MDI_FAILURE); 3162 } 3163 3164 ct = MDI_PI(pip)->pi_client; 3165 ASSERT(ct != NULL); 3166 if (ct == NULL) { 3167 /* 3168 * Invalid Client device, return failure 3169 */ 3170 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3171 "!invalid client: pip %s %p", 3172 mdi_pi_spathname(pip), (void *)pip)); 3173 MDI_PI_UNLOCK(pip); 3174 return (MDI_FAILURE); 3175 } 3176 3177 /* 3178 * Check to see for busy condition. A mdi_pathinfo can only be freed 3179 * if the node state is either offline or init and the reference count 3180 * is zero. 3181 */ 3182 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3183 MDI_PI_IS_INITING(pip))) { 3184 /* 3185 * Node is busy 3186 */ 3187 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3188 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3189 MDI_PI_UNLOCK(pip); 3190 return (MDI_BUSY); 3191 } 3192 3193 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3194 /* 3195 * Give a chance for pending I/Os to complete. 3196 */ 3197 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3198 "!%d cmds still pending on path: %s %p", 3199 MDI_PI(pip)->pi_ref_cnt, 3200 mdi_pi_spathname(pip), (void *)pip)); 3201 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3202 &MDI_PI(pip)->pi_mutex, 3203 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3204 /* 3205 * The timeout time reached without ref_cnt being zero 3206 * being signaled. 3207 */ 3208 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3209 "!Timeout reached on path %s %p without the cond", 3210 mdi_pi_spathname(pip), (void *)pip)); 3211 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3212 "!%d cmds still pending on path %s %p", 3213 MDI_PI(pip)->pi_ref_cnt, 3214 mdi_pi_spathname(pip), (void *)pip)); 3215 MDI_PI_UNLOCK(pip); 3216 return (MDI_BUSY); 3217 } 3218 } 3219 if (MDI_PI(pip)->pi_pm_held) { 3220 client_held = 1; 3221 } 3222 MDI_PI_UNLOCK(pip); 3223 3224 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3225 3226 MDI_CLIENT_LOCK(ct); 3227 3228 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3229 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3230 3231 /* 3232 * Wait till failover is complete before removing this node. 3233 */ 3234 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3235 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3236 3237 MDI_CLIENT_UNLOCK(ct); 3238 MDI_VHCI_CLIENT_LOCK(vh); 3239 MDI_CLIENT_LOCK(ct); 3240 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3241 3242 if (!MDI_PI_IS_INITING(pip)) { 3243 f = vh->vh_ops->vo_pi_uninit; 3244 if (f != NULL) { 3245 rv = (*f)(vh->vh_dip, pip, 0); 3246 } 3247 } 3248 /* 3249 * If vo_pi_uninit() completed successfully. 3250 */ 3251 if (rv == MDI_SUCCESS) { 3252 if (client_held) { 3253 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3254 "i_mdi_pm_rele_client\n")); 3255 i_mdi_pm_rele_client(ct, 1); 3256 } 3257 i_mdi_pi_free(ph, pip, ct); 3258 if (ct->ct_path_count == 0) { 3259 /* 3260 * Client lost its last path. 3261 * Clean up the client device 3262 */ 3263 MDI_CLIENT_UNLOCK(ct); 3264 (void) i_mdi_client_free(ct->ct_vhci, ct); 3265 MDI_VHCI_CLIENT_UNLOCK(vh); 3266 return (rv); 3267 } 3268 } 3269 MDI_CLIENT_UNLOCK(ct); 3270 MDI_VHCI_CLIENT_UNLOCK(vh); 3271 3272 if (rv == MDI_FAILURE) 3273 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3274 3275 return (rv); 3276 } 3277 3278 /* 3279 * i_mdi_pi_free(): 3280 * Free the mdi_pathinfo node 3281 */ 3282 static void 3283 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3284 { 3285 int ct_circular; 3286 int ph_circular; 3287 3288 ASSERT(MDI_CLIENT_LOCKED(ct)); 3289 3290 /* 3291 * remove any per-path kstats 3292 */ 3293 i_mdi_pi_kstat_destroy(pip); 3294 3295 /* See comments in i_mdi_pi_alloc() */ 3296 ndi_devi_enter(ct->ct_dip, &ct_circular); 3297 ndi_devi_enter(ph->ph_dip, &ph_circular); 3298 3299 i_mdi_client_remove_path(ct, pip); 3300 i_mdi_phci_remove_path(ph, pip); 3301 3302 ndi_devi_exit(ph->ph_dip, ph_circular); 3303 ndi_devi_exit(ct->ct_dip, ct_circular); 3304 3305 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3306 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3307 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3308 if (MDI_PI(pip)->pi_addr) { 3309 kmem_free(MDI_PI(pip)->pi_addr, 3310 strlen(MDI_PI(pip)->pi_addr) + 1); 3311 MDI_PI(pip)->pi_addr = NULL; 3312 } 3313 3314 if (MDI_PI(pip)->pi_prop) { 3315 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3316 MDI_PI(pip)->pi_prop = NULL; 3317 } 3318 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3319 } 3320 3321 3322 /* 3323 * i_mdi_phci_remove_path(): 3324 * Remove a mdi_pathinfo node from pHCI list. 3325 * Notes: 3326 * Caller should hold per-pHCI mutex 3327 */ 3328 static void 3329 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3330 { 3331 mdi_pathinfo_t *prev = NULL; 3332 mdi_pathinfo_t *path = NULL; 3333 3334 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3335 3336 MDI_PHCI_LOCK(ph); 3337 path = ph->ph_path_head; 3338 while (path != NULL) { 3339 if (path == pip) { 3340 break; 3341 } 3342 prev = path; 3343 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3344 } 3345 3346 if (path) { 3347 ph->ph_path_count--; 3348 if (prev) { 3349 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3350 } else { 3351 ph->ph_path_head = 3352 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3353 } 3354 if (ph->ph_path_tail == path) { 3355 ph->ph_path_tail = prev; 3356 } 3357 } 3358 3359 /* 3360 * Clear the pHCI link 3361 */ 3362 MDI_PI(pip)->pi_phci_link = NULL; 3363 MDI_PI(pip)->pi_phci = NULL; 3364 MDI_PHCI_UNLOCK(ph); 3365 } 3366 3367 /* 3368 * i_mdi_client_remove_path(): 3369 * Remove a mdi_pathinfo node from client path list. 3370 */ 3371 static void 3372 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3373 { 3374 mdi_pathinfo_t *prev = NULL; 3375 mdi_pathinfo_t *path; 3376 3377 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3378 3379 ASSERT(MDI_CLIENT_LOCKED(ct)); 3380 path = ct->ct_path_head; 3381 while (path != NULL) { 3382 if (path == pip) { 3383 break; 3384 } 3385 prev = path; 3386 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3387 } 3388 3389 if (path) { 3390 ct->ct_path_count--; 3391 if (prev) { 3392 MDI_PI(prev)->pi_client_link = 3393 MDI_PI(path)->pi_client_link; 3394 } else { 3395 ct->ct_path_head = 3396 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3397 } 3398 if (ct->ct_path_tail == path) { 3399 ct->ct_path_tail = prev; 3400 } 3401 if (ct->ct_path_last == path) { 3402 ct->ct_path_last = ct->ct_path_head; 3403 } 3404 } 3405 MDI_PI(pip)->pi_client_link = NULL; 3406 MDI_PI(pip)->pi_client = NULL; 3407 } 3408 3409 /* 3410 * i_mdi_pi_state_change(): 3411 * online a mdi_pathinfo node 3412 * 3413 * Return Values: 3414 * MDI_SUCCESS 3415 * MDI_FAILURE 3416 */ 3417 /*ARGSUSED*/ 3418 static int 3419 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3420 { 3421 int rv = MDI_SUCCESS; 3422 mdi_vhci_t *vh; 3423 mdi_phci_t *ph; 3424 mdi_client_t *ct; 3425 int (*f)(); 3426 dev_info_t *cdip; 3427 3428 MDI_PI_LOCK(pip); 3429 3430 ph = MDI_PI(pip)->pi_phci; 3431 ASSERT(ph); 3432 if (ph == NULL) { 3433 /* 3434 * Invalid pHCI device, fail the request 3435 */ 3436 MDI_PI_UNLOCK(pip); 3437 MDI_DEBUG(1, (MDI_WARN, NULL, 3438 "!invalid phci: pip %s %p", 3439 mdi_pi_spathname(pip), (void *)pip)); 3440 return (MDI_FAILURE); 3441 } 3442 3443 vh = ph->ph_vhci; 3444 ASSERT(vh); 3445 if (vh == NULL) { 3446 /* 3447 * Invalid vHCI device, fail the request 3448 */ 3449 MDI_PI_UNLOCK(pip); 3450 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3451 "!invalid vhci: pip %s %p", 3452 mdi_pi_spathname(pip), (void *)pip)); 3453 return (MDI_FAILURE); 3454 } 3455 3456 ct = MDI_PI(pip)->pi_client; 3457 ASSERT(ct != NULL); 3458 if (ct == NULL) { 3459 /* 3460 * Invalid client device, fail the request 3461 */ 3462 MDI_PI_UNLOCK(pip); 3463 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3464 "!invalid client: pip %s %p", 3465 mdi_pi_spathname(pip), (void *)pip)); 3466 return (MDI_FAILURE); 3467 } 3468 3469 /* 3470 * If this path has not been initialized yet, Callback vHCI driver's 3471 * pathinfo node initialize entry point 3472 */ 3473 3474 if (MDI_PI_IS_INITING(pip)) { 3475 MDI_PI_UNLOCK(pip); 3476 f = vh->vh_ops->vo_pi_init; 3477 if (f != NULL) { 3478 rv = (*f)(vh->vh_dip, pip, 0); 3479 if (rv != MDI_SUCCESS) { 3480 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3481 "!vo_pi_init failed: vHCI %p, pip %s %p", 3482 (void *)vh, mdi_pi_spathname(pip), 3483 (void *)pip)); 3484 return (MDI_FAILURE); 3485 } 3486 } 3487 MDI_PI_LOCK(pip); 3488 MDI_PI_CLEAR_TRANSIENT(pip); 3489 } 3490 3491 /* 3492 * Do not allow state transition when pHCI is in offline/suspended 3493 * states 3494 */ 3495 i_mdi_phci_lock(ph, pip); 3496 if (MDI_PHCI_IS_READY(ph) == 0) { 3497 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3498 "!pHCI not ready, pHCI=%p", (void *)ph)); 3499 MDI_PI_UNLOCK(pip); 3500 i_mdi_phci_unlock(ph); 3501 return (MDI_BUSY); 3502 } 3503 MDI_PHCI_UNSTABLE(ph); 3504 i_mdi_phci_unlock(ph); 3505 3506 /* 3507 * Check if mdi_pathinfo state is in transient state. 3508 * If yes, offlining is in progress and wait till transient state is 3509 * cleared. 3510 */ 3511 if (MDI_PI_IS_TRANSIENT(pip)) { 3512 while (MDI_PI_IS_TRANSIENT(pip)) { 3513 cv_wait(&MDI_PI(pip)->pi_state_cv, 3514 &MDI_PI(pip)->pi_mutex); 3515 } 3516 } 3517 3518 /* 3519 * Grab the client lock in reverse order sequence and release the 3520 * mdi_pathinfo mutex. 3521 */ 3522 i_mdi_client_lock(ct, pip); 3523 MDI_PI_UNLOCK(pip); 3524 3525 /* 3526 * Wait till failover state is cleared 3527 */ 3528 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3529 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3530 3531 /* 3532 * Mark the mdi_pathinfo node state as transient 3533 */ 3534 MDI_PI_LOCK(pip); 3535 switch (state) { 3536 case MDI_PATHINFO_STATE_ONLINE: 3537 MDI_PI_SET_ONLINING(pip); 3538 break; 3539 3540 case MDI_PATHINFO_STATE_STANDBY: 3541 MDI_PI_SET_STANDBYING(pip); 3542 break; 3543 3544 case MDI_PATHINFO_STATE_FAULT: 3545 /* 3546 * Mark the pathinfo state as FAULTED 3547 */ 3548 MDI_PI_SET_FAULTING(pip); 3549 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3550 break; 3551 3552 case MDI_PATHINFO_STATE_OFFLINE: 3553 /* 3554 * ndi_devi_offline() cannot hold pip or ct locks. 3555 */ 3556 MDI_PI_UNLOCK(pip); 3557 3558 /* 3559 * If this is a user initiated path online->offline operation 3560 * who's success would transition a client from DEGRADED to 3561 * FAILED then only proceed if we can offline the client first. 3562 */ 3563 cdip = ct->ct_dip; 3564 if ((flag & NDI_USER_REQ) && 3565 MDI_PI_IS_ONLINE(pip) && 3566 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3567 i_mdi_client_unlock(ct); 3568 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3569 if (rv != NDI_SUCCESS) { 3570 /* 3571 * Convert to MDI error code 3572 */ 3573 switch (rv) { 3574 case NDI_BUSY: 3575 rv = MDI_BUSY; 3576 break; 3577 default: 3578 rv = MDI_FAILURE; 3579 break; 3580 } 3581 goto state_change_exit; 3582 } else { 3583 i_mdi_client_lock(ct, NULL); 3584 } 3585 } 3586 /* 3587 * Mark the mdi_pathinfo node state as transient 3588 */ 3589 MDI_PI_LOCK(pip); 3590 MDI_PI_SET_OFFLINING(pip); 3591 break; 3592 } 3593 MDI_PI_UNLOCK(pip); 3594 MDI_CLIENT_UNSTABLE(ct); 3595 i_mdi_client_unlock(ct); 3596 3597 f = vh->vh_ops->vo_pi_state_change; 3598 if (f != NULL) 3599 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3600 3601 MDI_CLIENT_LOCK(ct); 3602 MDI_PI_LOCK(pip); 3603 if (rv == MDI_NOT_SUPPORTED) { 3604 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3605 } 3606 if (rv != MDI_SUCCESS) { 3607 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3608 "vo_pi_state_change failed: rv %x", rv)); 3609 } 3610 if (MDI_PI_IS_TRANSIENT(pip)) { 3611 if (rv == MDI_SUCCESS) { 3612 MDI_PI_CLEAR_TRANSIENT(pip); 3613 } else { 3614 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3615 } 3616 } 3617 3618 /* 3619 * Wake anyone waiting for this mdi_pathinfo node 3620 */ 3621 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3622 MDI_PI_UNLOCK(pip); 3623 3624 /* 3625 * Mark the client device as stable 3626 */ 3627 MDI_CLIENT_STABLE(ct); 3628 if (rv == MDI_SUCCESS) { 3629 if (ct->ct_unstable == 0) { 3630 cdip = ct->ct_dip; 3631 3632 /* 3633 * Onlining the mdi_pathinfo node will impact the 3634 * client state Update the client and dev_info node 3635 * state accordingly 3636 */ 3637 rv = NDI_SUCCESS; 3638 i_mdi_client_update_state(ct); 3639 switch (MDI_CLIENT_STATE(ct)) { 3640 case MDI_CLIENT_STATE_OPTIMAL: 3641 case MDI_CLIENT_STATE_DEGRADED: 3642 if (cdip && !i_ddi_devi_attached(cdip) && 3643 ((state == MDI_PATHINFO_STATE_ONLINE) || 3644 (state == MDI_PATHINFO_STATE_STANDBY))) { 3645 3646 /* 3647 * Must do ndi_devi_online() through 3648 * hotplug thread for deferred 3649 * attach mechanism to work 3650 */ 3651 MDI_CLIENT_UNLOCK(ct); 3652 rv = ndi_devi_online(cdip, 0); 3653 MDI_CLIENT_LOCK(ct); 3654 if ((rv != NDI_SUCCESS) && 3655 (MDI_CLIENT_STATE(ct) == 3656 MDI_CLIENT_STATE_DEGRADED)) { 3657 /* 3658 * ndi_devi_online failed. 3659 * Reset client flags to 3660 * offline. 3661 */ 3662 MDI_DEBUG(1, (MDI_WARN, cdip, 3663 "!ndi_devi_online failed " 3664 "error %x", rv)); 3665 MDI_CLIENT_SET_OFFLINE(ct); 3666 } 3667 if (rv != NDI_SUCCESS) { 3668 /* Reset the path state */ 3669 MDI_PI_LOCK(pip); 3670 MDI_PI(pip)->pi_state = 3671 MDI_PI_OLD_STATE(pip); 3672 MDI_PI_UNLOCK(pip); 3673 } 3674 } 3675 break; 3676 3677 case MDI_CLIENT_STATE_FAILED: 3678 /* 3679 * This is the last path case for 3680 * non-user initiated events. 3681 */ 3682 if (((flag & NDI_USER_REQ) == 0) && 3683 cdip && (i_ddi_node_state(cdip) >= 3684 DS_INITIALIZED)) { 3685 MDI_CLIENT_UNLOCK(ct); 3686 rv = ndi_devi_offline(cdip, 3687 NDI_DEVFS_CLEAN); 3688 MDI_CLIENT_LOCK(ct); 3689 3690 if (rv != NDI_SUCCESS) { 3691 /* 3692 * ndi_devi_offline failed. 3693 * Reset client flags to 3694 * online as the path could not 3695 * be offlined. 3696 */ 3697 MDI_DEBUG(1, (MDI_WARN, cdip, 3698 "!ndi_devi_offline failed: " 3699 "error %x", rv)); 3700 MDI_CLIENT_SET_ONLINE(ct); 3701 } 3702 } 3703 break; 3704 } 3705 /* 3706 * Convert to MDI error code 3707 */ 3708 switch (rv) { 3709 case NDI_SUCCESS: 3710 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3711 i_mdi_report_path_state(ct, pip); 3712 rv = MDI_SUCCESS; 3713 break; 3714 case NDI_BUSY: 3715 rv = MDI_BUSY; 3716 break; 3717 default: 3718 rv = MDI_FAILURE; 3719 break; 3720 } 3721 } 3722 } 3723 MDI_CLIENT_UNLOCK(ct); 3724 3725 state_change_exit: 3726 /* 3727 * Mark the pHCI as stable again. 3728 */ 3729 MDI_PHCI_LOCK(ph); 3730 MDI_PHCI_STABLE(ph); 3731 MDI_PHCI_UNLOCK(ph); 3732 return (rv); 3733 } 3734 3735 /* 3736 * mdi_pi_online(): 3737 * Place the path_info node in the online state. The path is 3738 * now available to be selected by mdi_select_path() for 3739 * transporting I/O requests to client devices. 3740 * Return Values: 3741 * MDI_SUCCESS 3742 * MDI_FAILURE 3743 */ 3744 int 3745 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3746 { 3747 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3748 int client_held = 0; 3749 int rv; 3750 3751 ASSERT(ct != NULL); 3752 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3753 if (rv != MDI_SUCCESS) 3754 return (rv); 3755 3756 MDI_PI_LOCK(pip); 3757 if (MDI_PI(pip)->pi_pm_held == 0) { 3758 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3759 "i_mdi_pm_hold_pip %p", (void *)pip)); 3760 i_mdi_pm_hold_pip(pip); 3761 client_held = 1; 3762 } 3763 MDI_PI_UNLOCK(pip); 3764 3765 if (client_held) { 3766 MDI_CLIENT_LOCK(ct); 3767 if (ct->ct_power_cnt == 0) { 3768 rv = i_mdi_power_all_phci(ct); 3769 } 3770 3771 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3772 "i_mdi_pm_hold_client %p", (void *)ct)); 3773 i_mdi_pm_hold_client(ct, 1); 3774 MDI_CLIENT_UNLOCK(ct); 3775 } 3776 3777 return (rv); 3778 } 3779 3780 /* 3781 * mdi_pi_standby(): 3782 * Place the mdi_pathinfo node in standby state 3783 * 3784 * Return Values: 3785 * MDI_SUCCESS 3786 * MDI_FAILURE 3787 */ 3788 int 3789 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3790 { 3791 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3792 } 3793 3794 /* 3795 * mdi_pi_fault(): 3796 * Place the mdi_pathinfo node in fault'ed state 3797 * Return Values: 3798 * MDI_SUCCESS 3799 * MDI_FAILURE 3800 */ 3801 int 3802 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3803 { 3804 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3805 } 3806 3807 /* 3808 * mdi_pi_offline(): 3809 * Offline a mdi_pathinfo node. 3810 * Return Values: 3811 * MDI_SUCCESS 3812 * MDI_FAILURE 3813 */ 3814 int 3815 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3816 { 3817 int ret, client_held = 0; 3818 mdi_client_t *ct; 3819 3820 /* 3821 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3822 * used it to mean "user initiated operation" (i.e. devctl). Callers 3823 * should now just use NDI_USER_REQ. 3824 */ 3825 if (flags & NDI_DEVI_REMOVE) { 3826 flags &= ~NDI_DEVI_REMOVE; 3827 flags |= NDI_USER_REQ; 3828 } 3829 3830 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3831 3832 if (ret == MDI_SUCCESS) { 3833 MDI_PI_LOCK(pip); 3834 if (MDI_PI(pip)->pi_pm_held) { 3835 client_held = 1; 3836 } 3837 MDI_PI_UNLOCK(pip); 3838 3839 if (client_held) { 3840 ct = MDI_PI(pip)->pi_client; 3841 MDI_CLIENT_LOCK(ct); 3842 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3843 "i_mdi_pm_rele_client\n")); 3844 i_mdi_pm_rele_client(ct, 1); 3845 MDI_CLIENT_UNLOCK(ct); 3846 } 3847 } 3848 3849 return (ret); 3850 } 3851 3852 /* 3853 * i_mdi_pi_offline(): 3854 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3855 */ 3856 static int 3857 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3858 { 3859 dev_info_t *vdip = NULL; 3860 mdi_vhci_t *vh = NULL; 3861 mdi_client_t *ct = NULL; 3862 int (*f)(); 3863 int rv; 3864 3865 MDI_PI_LOCK(pip); 3866 ct = MDI_PI(pip)->pi_client; 3867 ASSERT(ct != NULL); 3868 3869 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3870 /* 3871 * Give a chance for pending I/Os to complete. 3872 */ 3873 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3874 "!%d cmds still pending on path %s %p", 3875 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3876 (void *)pip)); 3877 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3878 &MDI_PI(pip)->pi_mutex, 3879 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3880 /* 3881 * The timeout time reached without ref_cnt being zero 3882 * being signaled. 3883 */ 3884 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3885 "!Timeout reached on path %s %p without the cond", 3886 mdi_pi_spathname(pip), (void *)pip)); 3887 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3888 "!%d cmds still pending on path %s %p", 3889 MDI_PI(pip)->pi_ref_cnt, 3890 mdi_pi_spathname(pip), (void *)pip)); 3891 } 3892 } 3893 vh = ct->ct_vhci; 3894 vdip = vh->vh_dip; 3895 3896 /* 3897 * Notify vHCI that has registered this event 3898 */ 3899 ASSERT(vh->vh_ops); 3900 f = vh->vh_ops->vo_pi_state_change; 3901 3902 if (f != NULL) { 3903 MDI_PI_UNLOCK(pip); 3904 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3905 flags)) != MDI_SUCCESS) { 3906 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3907 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3908 ddi_driver_name(vdip), ddi_get_instance(vdip), 3909 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3910 } 3911 MDI_PI_LOCK(pip); 3912 } 3913 3914 /* 3915 * Set the mdi_pathinfo node state and clear the transient condition 3916 */ 3917 MDI_PI_SET_OFFLINE(pip); 3918 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3919 MDI_PI_UNLOCK(pip); 3920 3921 MDI_CLIENT_LOCK(ct); 3922 if (rv == MDI_SUCCESS) { 3923 if (ct->ct_unstable == 0) { 3924 dev_info_t *cdip = ct->ct_dip; 3925 3926 /* 3927 * Onlining the mdi_pathinfo node will impact the 3928 * client state Update the client and dev_info node 3929 * state accordingly 3930 */ 3931 i_mdi_client_update_state(ct); 3932 rv = NDI_SUCCESS; 3933 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3934 if (cdip && 3935 (i_ddi_node_state(cdip) >= 3936 DS_INITIALIZED)) { 3937 MDI_CLIENT_UNLOCK(ct); 3938 rv = ndi_devi_offline(cdip, 3939 NDI_DEVFS_CLEAN); 3940 MDI_CLIENT_LOCK(ct); 3941 if (rv != NDI_SUCCESS) { 3942 /* 3943 * ndi_devi_offline failed. 3944 * Reset client flags to 3945 * online. 3946 */ 3947 MDI_DEBUG(4, (MDI_WARN, cdip, 3948 "ndi_devi_offline failed: " 3949 "error %x", rv)); 3950 MDI_CLIENT_SET_ONLINE(ct); 3951 } 3952 } 3953 } 3954 /* 3955 * Convert to MDI error code 3956 */ 3957 switch (rv) { 3958 case NDI_SUCCESS: 3959 rv = MDI_SUCCESS; 3960 break; 3961 case NDI_BUSY: 3962 rv = MDI_BUSY; 3963 break; 3964 default: 3965 rv = MDI_FAILURE; 3966 break; 3967 } 3968 } 3969 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3970 i_mdi_report_path_state(ct, pip); 3971 } 3972 3973 MDI_CLIENT_UNLOCK(ct); 3974 3975 /* 3976 * Change in the mdi_pathinfo node state will impact the client state 3977 */ 3978 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3979 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3980 return (rv); 3981 } 3982 3983 /* 3984 * mdi_pi_get_node_name(): 3985 * Get the name associated with a mdi_pathinfo node. 3986 * Since pathinfo nodes are not directly named, we 3987 * return the node_name of the client. 3988 * 3989 * Return Values: 3990 * char * 3991 */ 3992 char * 3993 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3994 { 3995 mdi_client_t *ct; 3996 3997 if (pip == NULL) 3998 return (NULL); 3999 ct = MDI_PI(pip)->pi_client; 4000 if ((ct == NULL) || (ct->ct_dip == NULL)) 4001 return (NULL); 4002 return (ddi_node_name(ct->ct_dip)); 4003 } 4004 4005 /* 4006 * mdi_pi_get_addr(): 4007 * Get the unit address associated with a mdi_pathinfo node 4008 * 4009 * Return Values: 4010 * char * 4011 */ 4012 char * 4013 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4014 { 4015 if (pip == NULL) 4016 return (NULL); 4017 4018 return (MDI_PI(pip)->pi_addr); 4019 } 4020 4021 /* 4022 * mdi_pi_get_path_instance(): 4023 * Get the 'path_instance' of a mdi_pathinfo node 4024 * 4025 * Return Values: 4026 * path_instance 4027 */ 4028 int 4029 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4030 { 4031 if (pip == NULL) 4032 return (0); 4033 4034 return (MDI_PI(pip)->pi_path_instance); 4035 } 4036 4037 /* 4038 * mdi_pi_pathname(): 4039 * Return pointer to path to pathinfo node. 4040 */ 4041 char * 4042 mdi_pi_pathname(mdi_pathinfo_t *pip) 4043 { 4044 if (pip == NULL) 4045 return (NULL); 4046 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4047 } 4048 4049 /* 4050 * mdi_pi_spathname(): 4051 * Return pointer to shortpath to pathinfo node. Used for debug 4052 * messages, so return "" instead of NULL when unknown. 4053 */ 4054 char * 4055 mdi_pi_spathname(mdi_pathinfo_t *pip) 4056 { 4057 char *spath = ""; 4058 4059 if (pip) { 4060 spath = mdi_pi_spathname_by_instance( 4061 mdi_pi_get_path_instance(pip)); 4062 if (spath == NULL) 4063 spath = ""; 4064 } 4065 return (spath); 4066 } 4067 4068 char * 4069 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4070 { 4071 char *obp_path = NULL; 4072 if ((pip == NULL) || (path == NULL)) 4073 return (NULL); 4074 4075 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4076 (void) strcpy(path, obp_path); 4077 (void) mdi_prop_free(obp_path); 4078 } else { 4079 path = NULL; 4080 } 4081 return (path); 4082 } 4083 4084 int 4085 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4086 { 4087 dev_info_t *pdip; 4088 char *obp_path = NULL; 4089 int rc = MDI_FAILURE; 4090 4091 if (pip == NULL) 4092 return (MDI_FAILURE); 4093 4094 pdip = mdi_pi_get_phci(pip); 4095 if (pdip == NULL) 4096 return (MDI_FAILURE); 4097 4098 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4099 4100 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4101 (void) ddi_pathname(pdip, obp_path); 4102 } 4103 4104 if (component) { 4105 (void) strncat(obp_path, "/", MAXPATHLEN); 4106 (void) strncat(obp_path, component, MAXPATHLEN); 4107 } 4108 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4109 4110 if (obp_path) 4111 kmem_free(obp_path, MAXPATHLEN); 4112 return (rc); 4113 } 4114 4115 /* 4116 * mdi_pi_get_client(): 4117 * Get the client devinfo associated with a mdi_pathinfo node 4118 * 4119 * Return Values: 4120 * Handle to client device dev_info node 4121 */ 4122 dev_info_t * 4123 mdi_pi_get_client(mdi_pathinfo_t *pip) 4124 { 4125 dev_info_t *dip = NULL; 4126 if (pip) { 4127 dip = MDI_PI(pip)->pi_client->ct_dip; 4128 } 4129 return (dip); 4130 } 4131 4132 /* 4133 * mdi_pi_get_phci(): 4134 * Get the pHCI devinfo associated with the mdi_pathinfo node 4135 * Return Values: 4136 * Handle to dev_info node 4137 */ 4138 dev_info_t * 4139 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4140 { 4141 dev_info_t *dip = NULL; 4142 mdi_phci_t *ph; 4143 4144 if (pip) { 4145 ph = MDI_PI(pip)->pi_phci; 4146 if (ph) 4147 dip = ph->ph_dip; 4148 } 4149 return (dip); 4150 } 4151 4152 /* 4153 * mdi_pi_get_client_private(): 4154 * Get the client private information associated with the 4155 * mdi_pathinfo node 4156 */ 4157 void * 4158 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4159 { 4160 void *cprivate = NULL; 4161 if (pip) { 4162 cprivate = MDI_PI(pip)->pi_cprivate; 4163 } 4164 return (cprivate); 4165 } 4166 4167 /* 4168 * mdi_pi_set_client_private(): 4169 * Set the client private information in the mdi_pathinfo node 4170 */ 4171 void 4172 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4173 { 4174 if (pip) { 4175 MDI_PI(pip)->pi_cprivate = priv; 4176 } 4177 } 4178 4179 /* 4180 * mdi_pi_get_phci_private(): 4181 * Get the pHCI private information associated with the 4182 * mdi_pathinfo node 4183 */ 4184 caddr_t 4185 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4186 { 4187 caddr_t pprivate = NULL; 4188 4189 if (pip) { 4190 pprivate = MDI_PI(pip)->pi_pprivate; 4191 } 4192 return (pprivate); 4193 } 4194 4195 /* 4196 * mdi_pi_set_phci_private(): 4197 * Set the pHCI private information in the mdi_pathinfo node 4198 */ 4199 void 4200 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4201 { 4202 if (pip) { 4203 MDI_PI(pip)->pi_pprivate = priv; 4204 } 4205 } 4206 4207 /* 4208 * mdi_pi_get_state(): 4209 * Get the mdi_pathinfo node state. Transient states are internal 4210 * and not provided to the users 4211 */ 4212 mdi_pathinfo_state_t 4213 mdi_pi_get_state(mdi_pathinfo_t *pip) 4214 { 4215 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4216 4217 if (pip) { 4218 if (MDI_PI_IS_TRANSIENT(pip)) { 4219 /* 4220 * mdi_pathinfo is in state transition. Return the 4221 * last good state. 4222 */ 4223 state = MDI_PI_OLD_STATE(pip); 4224 } else { 4225 state = MDI_PI_STATE(pip); 4226 } 4227 } 4228 return (state); 4229 } 4230 4231 /* 4232 * mdi_pi_get_flags(): 4233 * Get the mdi_pathinfo node flags. 4234 */ 4235 uint_t 4236 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4237 { 4238 return (pip ? MDI_PI(pip)->pi_flags : 0); 4239 } 4240 4241 /* 4242 * Note that the following function needs to be the new interface for 4243 * mdi_pi_get_state when mpxio gets integrated to ON. 4244 */ 4245 int 4246 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4247 uint32_t *ext_state) 4248 { 4249 *state = MDI_PATHINFO_STATE_INIT; 4250 4251 if (pip) { 4252 if (MDI_PI_IS_TRANSIENT(pip)) { 4253 /* 4254 * mdi_pathinfo is in state transition. Return the 4255 * last good state. 4256 */ 4257 *state = MDI_PI_OLD_STATE(pip); 4258 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4259 } else { 4260 *state = MDI_PI_STATE(pip); 4261 *ext_state = MDI_PI_EXT_STATE(pip); 4262 } 4263 } 4264 return (MDI_SUCCESS); 4265 } 4266 4267 /* 4268 * mdi_pi_get_preferred: 4269 * Get the preferred path flag 4270 */ 4271 int 4272 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4273 { 4274 if (pip) { 4275 return (MDI_PI(pip)->pi_preferred); 4276 } 4277 return (0); 4278 } 4279 4280 /* 4281 * mdi_pi_set_preferred: 4282 * Set the preferred path flag 4283 */ 4284 void 4285 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4286 { 4287 if (pip) { 4288 MDI_PI(pip)->pi_preferred = preferred; 4289 } 4290 } 4291 4292 /* 4293 * mdi_pi_set_state(): 4294 * Set the mdi_pathinfo node state 4295 */ 4296 void 4297 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4298 { 4299 uint32_t ext_state; 4300 4301 if (pip) { 4302 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4303 MDI_PI(pip)->pi_state = state; 4304 MDI_PI(pip)->pi_state |= ext_state; 4305 4306 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4307 i_ddi_di_cache_invalidate(); 4308 } 4309 } 4310 4311 /* 4312 * Property functions: 4313 */ 4314 int 4315 i_map_nvlist_error_to_mdi(int val) 4316 { 4317 int rv; 4318 4319 switch (val) { 4320 case 0: 4321 rv = DDI_PROP_SUCCESS; 4322 break; 4323 case EINVAL: 4324 case ENOTSUP: 4325 rv = DDI_PROP_INVAL_ARG; 4326 break; 4327 case ENOMEM: 4328 rv = DDI_PROP_NO_MEMORY; 4329 break; 4330 default: 4331 rv = DDI_PROP_NOT_FOUND; 4332 break; 4333 } 4334 return (rv); 4335 } 4336 4337 /* 4338 * mdi_pi_get_next_prop(): 4339 * Property walk function. The caller should hold mdi_pi_lock() 4340 * and release by calling mdi_pi_unlock() at the end of walk to 4341 * get a consistent value. 4342 */ 4343 nvpair_t * 4344 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4345 { 4346 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4347 return (NULL); 4348 } 4349 ASSERT(MDI_PI_LOCKED(pip)); 4350 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4351 } 4352 4353 /* 4354 * mdi_prop_remove(): 4355 * Remove the named property from the named list. 4356 */ 4357 int 4358 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4359 { 4360 if (pip == NULL) { 4361 return (DDI_PROP_NOT_FOUND); 4362 } 4363 ASSERT(!MDI_PI_LOCKED(pip)); 4364 MDI_PI_LOCK(pip); 4365 if (MDI_PI(pip)->pi_prop == NULL) { 4366 MDI_PI_UNLOCK(pip); 4367 return (DDI_PROP_NOT_FOUND); 4368 } 4369 if (name) { 4370 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4371 } else { 4372 char nvp_name[MAXNAMELEN]; 4373 nvpair_t *nvp; 4374 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4375 while (nvp) { 4376 nvpair_t *next; 4377 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4378 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4379 nvpair_name(nvp)); 4380 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4381 nvp_name); 4382 nvp = next; 4383 } 4384 } 4385 MDI_PI_UNLOCK(pip); 4386 return (DDI_PROP_SUCCESS); 4387 } 4388 4389 /* 4390 * mdi_prop_size(): 4391 * Get buffer size needed to pack the property data. 4392 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4393 * buffer size. 4394 */ 4395 int 4396 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4397 { 4398 int rv; 4399 size_t bufsize; 4400 4401 *buflenp = 0; 4402 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4403 return (DDI_PROP_NOT_FOUND); 4404 } 4405 ASSERT(MDI_PI_LOCKED(pip)); 4406 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4407 &bufsize, NV_ENCODE_NATIVE); 4408 *buflenp = bufsize; 4409 return (i_map_nvlist_error_to_mdi(rv)); 4410 } 4411 4412 /* 4413 * mdi_prop_pack(): 4414 * pack the property list. The caller should hold the 4415 * mdi_pathinfo_t node to get a consistent data 4416 */ 4417 int 4418 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4419 { 4420 int rv; 4421 size_t bufsize; 4422 4423 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4424 return (DDI_PROP_NOT_FOUND); 4425 } 4426 4427 ASSERT(MDI_PI_LOCKED(pip)); 4428 4429 bufsize = buflen; 4430 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4431 NV_ENCODE_NATIVE, KM_SLEEP); 4432 4433 return (i_map_nvlist_error_to_mdi(rv)); 4434 } 4435 4436 /* 4437 * mdi_prop_update_byte(): 4438 * Create/Update a byte property 4439 */ 4440 int 4441 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4442 { 4443 int rv; 4444 4445 if (pip == NULL) { 4446 return (DDI_PROP_INVAL_ARG); 4447 } 4448 ASSERT(!MDI_PI_LOCKED(pip)); 4449 MDI_PI_LOCK(pip); 4450 if (MDI_PI(pip)->pi_prop == NULL) { 4451 MDI_PI_UNLOCK(pip); 4452 return (