1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1623 tw21770 * Common Development and Distribution License (the "License"). 6 1623 tw21770 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 7563 Prasad 22 0 stevel /* 23 10549 Achim * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 0 stevel * Use is subject to license terms. 25 0 stevel */ 26 0 stevel 27 0 stevel /* 28 0 stevel * Driver for Virtual Disk. 29 0 stevel */ 30 0 stevel #include <sys/param.h> 31 0 stevel #include <sys/systm.h> 32 0 stevel #include <sys/buf.h> 33 0 stevel #include <sys/conf.h> 34 0 stevel #include <sys/user.h> 35 0 stevel #include <sys/uio.h> 36 0 stevel #include <sys/proc.h> 37 0 stevel #include <sys/t_lock.h> 38 0 stevel #include <sys/dkio.h> 39 0 stevel #include <sys/kmem.h> 40 0 stevel #include <sys/debug.h> 41 0 stevel #include <sys/cmn_err.h> 42 0 stevel #include <sys/sysmacros.h> 43 0 stevel #include <sys/types.h> 44 0 stevel #include <sys/mkdev.h> 45 0 stevel #include <sys/vtoc.h> 46 0 stevel #include <sys/open.h> 47 0 stevel #include <sys/file.h> 48 0 stevel #include <vm/page.h> 49 0 stevel #include <sys/callb.h> 50 0 stevel #include <sys/disp.h> 51 0 stevel #include <sys/modctl.h> 52 0 stevel #include <sys/errno.h> 53 0 stevel #include <sys/door.h> 54 0 stevel #include <sys/lvm/mdmn_commd.h> 55 1623 tw21770 #include <sys/lvm/md_hotspares.h> 56 0 stevel 57 0 stevel #include <sys/lvm/mdvar.h> 58 0 stevel #include <sys/lvm/md_names.h> 59 0 stevel 60 0 stevel #include <sys/ddi.h> 61 0 stevel #include <sys/proc.h> 62 0 stevel #include <sys/sunddi.h> 63 0 stevel #include <sys/esunddi.h> 64 0 stevel 65 0 stevel #include <sys/sysevent.h> 66 0 stevel #include <sys/sysevent/eventdefs.h> 67 0 stevel 68 0 stevel #include <sys/sysevent/svm.h> 69 734 mw145384 #include <sys/lvm/md_basic.h> 70 734 mw145384 71 0 stevel 72 0 stevel /* 73 0 stevel * Machine specific Hertz is kept here 74 0 stevel */ 75 0 stevel extern clock_t md_hz; 76 0 stevel 77 0 stevel /* 78 0 stevel * Externs. 79 0 stevel */ 80 0 stevel extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*); 81 0 stevel extern major_t md_major; 82 0 stevel extern unit_t md_nunits; 83 0 stevel extern set_t md_nsets; 84 0 stevel extern md_set_t md_set[]; 85 0 stevel extern md_set_io_t md_set_io[]; 86 0 stevel extern md_ops_t **md_ops; 87 0 stevel extern md_ops_t *md_opslist; 88 0 stevel extern ddi_modhandle_t *md_mods; 89 8452 John extern dev_info_t *md_devinfo; 90 0 stevel 91 0 stevel extern md_krwlock_t md_unit_array_rw; 92 0 stevel extern kmutex_t md_mx; 93 0 stevel extern kcondvar_t md_cv; 94 0 stevel 95 0 stevel extern md_krwlock_t hsp_rwlp; 96 0 stevel extern md_krwlock_t ni_rwlp; 97 0 stevel 98 0 stevel extern int md_num_daemons; 99 0 stevel extern int md_status; 100 0 stevel extern int md_ioctl_cnt; 101 0 stevel extern int md_mtioctl_cnt; 102 0 stevel 103 0 stevel extern struct metatransops metatransops; 104 0 stevel extern md_event_queue_t *md_event_queue; 105 0 stevel extern md_resync_t md_cpr_resync; 106 0 stevel extern int md_done_daemon_threads; 107 0 stevel extern int md_ff_daemon_threads; 108 0 stevel 109 0 stevel 110 0 stevel extern mddb_set_t *mddb_setenter(set_t setno, int flag, int *errorcodep); 111 0 stevel extern void mddb_setexit(mddb_set_t *s); 112 1623 tw21770 extern void *lookup_entry(struct nm_next_hdr *, set_t, 113 1623 tw21770 side_t, mdkey_t, md_dev64_t, int); 114 1623 tw21770 extern struct nm_next_hdr *get_first_record(set_t, int, int); 115 10549 Achim extern dev_t getrootdev(void); 116 0 stevel 117 0 stevel struct mdq_anchor md_done_daemon; /* done request queue */ 118 8452 John struct mdq_anchor md_mstr_daemon; /* mirror error, WOW requests */ 119 0 stevel struct mdq_anchor md_mhs_daemon; /* mirror hotspare requests queue */ 120 0 stevel struct mdq_anchor md_hs_daemon; /* raid hotspare requests queue */ 121 0 stevel struct mdq_anchor md_ff_daemonq; /* failfast request queue */ 122 0 stevel struct mdq_anchor md_mirror_daemon; /* mirror owner queue */ 123 0 stevel struct mdq_anchor md_mirror_io_daemon; /* mirror owner i/o queue */ 124 0 stevel struct mdq_anchor md_mirror_rs_daemon; /* mirror resync done queue */ 125 0 stevel struct mdq_anchor md_sp_daemon; /* soft-part error daemon queue */ 126 8452 John struct mdq_anchor md_mto_daemon; /* mirror timeout daemon queue */ 127 0 stevel 128 0 stevel int md_done_daemon_threads = 1; /* threads for md_done_daemon requestq */ 129 0 stevel int md_mstr_daemon_threads = 1; /* threads for md_mstr_daemon requestq */ 130 0 stevel int md_mhs_daemon_threads = 1; /* threads for md_mhs_daemon requestq */ 131 0 stevel int md_hs_daemon_threads = 1; /* threads for md_hs_daemon requestq */ 132 0 stevel int md_ff_daemon_threads = 3; /* threads for md_ff_daemon requestq */ 133 0 stevel int md_mirror_daemon_threads = 1; /* threads for md_mirror_daemon requestq */ 134 0 stevel int md_sp_daemon_threads = 1; /* threads for md_sp_daemon requestq */ 135 8452 John int md_mto_daemon_threads = 1; /* threads for md_mto_daemon requestq */ 136 0 stevel 137 0 stevel #ifdef DEBUG 138 0 stevel /* Flag to switch on debug messages */ 139 0 stevel int md_release_reacquire_debug = 0; /* debug flag */ 140 0 stevel #endif 141 0 stevel 142 0 stevel /* 143 0 stevel * 144 0 stevel * The md_request_queues is table of pointers to request queues and the number 145 0 stevel * of threads associated with the request queues. 146 0 stevel * When the number of threads is set to 1, then the order of execution is 147 0 stevel * sequential. 148 0 stevel * The number of threads for all the queues have been defined as global 149 0 stevel * variables to enable kernel tuning. 150 0 stevel * 151 0 stevel */ 152 0 stevel 153 8452 John #define MD_DAEMON_QUEUES 11 154 0 stevel 155 0 stevel md_requestq_entry_t md_daemon_queues[MD_DAEMON_QUEUES] = { 156 0 stevel {&md_done_daemon, &md_done_daemon_threads}, 157 0 stevel {&md_mstr_daemon, &md_mstr_daemon_threads}, 158 0 stevel {&md_hs_daemon, &md_hs_daemon_threads}, 159 0 stevel {&md_ff_daemonq, &md_ff_daemon_threads}, 160 0 stevel {&md_mirror_daemon, &md_mirror_daemon_threads}, 161 0 stevel {&md_mirror_io_daemon, &md_mirror_daemon_threads}, 162 0 stevel {&md_mirror_rs_daemon, &md_mirror_daemon_threads}, 163 0 stevel {&md_sp_daemon, &md_sp_daemon_threads}, 164 0 stevel {&md_mhs_daemon, &md_mhs_daemon_threads}, 165 8452 John {&md_mto_daemon, &md_mto_daemon_threads}, 166 0 stevel {0, 0} 167 0 stevel }; 168 0 stevel 169 0 stevel /* 170 0 stevel * Number of times a message is retried before issuing a warning to the operator 171 0 stevel */ 172 0 stevel #define MD_MN_WARN_INTVL 10 173 0 stevel 174 0 stevel /* 175 0 stevel * Setting retry cnt to one (pre decremented) so that we actually do no 176 0 stevel * retries when committing/deleting a mddb rec. The underlying disk driver 177 0 stevel * does several retries to check if the disk is really dead or not so there 178 0 stevel * is no reason for us to retry on top of the drivers retries. 179 0 stevel */ 180 0 stevel 181 0 stevel uint_t md_retry_cnt = 1; /* global so it can be patched */ 182 8452 John 183 8452 John /* 184 8452 John * How many times to try to do the door_ki_upcall() in mdmn_ksend_message. 185 8452 John * Again, made patchable here should it prove useful. 186 8452 John */ 187 8452 John uint_t md_send_retry_limit = 30; 188 0 stevel 189 0 stevel /* 190 0 stevel * Bug # 1212146 191 0 stevel * Before this change the user had to pass in a short aligned buffer because of 192 0 stevel * problems in some underlying device drivers. This problem seems to have been 193 0 stevel * corrected in the underlying drivers so we will default to not requiring any 194 0 stevel * alignment. If the user needs to check for a specific alignment, 195 0 stevel * md_uio_alignment_mask may be set in /etc/system to accomplish this. To get 196 0 stevel * the behavior before this fix, the md_uio_alignment_mask would be set to 1, 197 0 stevel * to check for word alignment, it can be set to 3, for double word alignment, 198 0 stevel * it can be set to 7, etc. 199 0 stevel * 200 0 stevel * [Other part of fix is in function md_chk_uio()] 201 0 stevel */ 202 0 stevel static int md_uio_alignment_mask = 0; 203 0 stevel 204 0 stevel /* 205 0 stevel * for md_dev64_t translation 206 0 stevel */ 207 0 stevel struct md_xlate_table *md_tuple_table; 208 0 stevel struct md_xlate_major_table *md_major_tuple_table; 209 0 stevel int md_tuple_length; 210 0 stevel uint_t md_majortab_len; 211 0 stevel 212 0 stevel /* Function declarations */ 213 0 stevel 214 0 stevel static int md_create_probe_rqlist(md_probedev_impl_t *plist, 215 0 stevel daemon_queue_t **hdr, intptr_t (*probe_test)()); 216 0 stevel 217 0 stevel /* 218 0 stevel * manipulate global status 219 0 stevel */ 220 0 stevel void 221 0 stevel md_set_status(int bits) 222 0 stevel { 223 0 stevel mutex_enter(&md_mx); 224 0 stevel md_status |= bits; 225 0 stevel mutex_exit(&md_mx); 226 0 stevel } 227 0 stevel 228 0 stevel void 229 0 stevel md_clr_status(int bits) 230 0 stevel { 231 0 stevel mutex_enter(&md_mx); 232 0 stevel md_status &= ~bits; 233 0 stevel mutex_exit(&md_mx); 234 0 stevel } 235 0 stevel 236 0 stevel int 237 0 stevel md_get_status() 238 0 stevel { 239 0 stevel int result; 240 0 stevel mutex_enter(&md_mx); 241 0 stevel result = md_status; 242 0 stevel mutex_exit(&md_mx); 243 0 stevel return (result); 244 0 stevel } 245 0 stevel 246 0 stevel void 247 0 stevel md_set_setstatus(set_t setno, int bits) 248 0 stevel { 249 0 stevel ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS); 250 0 stevel 251 0 stevel mutex_enter(&md_mx); 252 0 stevel md_set[setno].s_status |= bits; 253 0 stevel mutex_exit(&md_mx); 254 0 stevel } 255 0 stevel 256 0 stevel void 257 0 stevel md_clr_setstatus(set_t setno, int bits) 258 0 stevel { 259 0 stevel ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS); 260 0 stevel 261 0 stevel mutex_enter(&md_mx); 262 0 stevel md_set[setno].s_status &= ~bits; 263 0 stevel mutex_exit(&md_mx); 264 0 stevel } 265 0 stevel 266 0 stevel uint_t 267 0 stevel md_get_setstatus(set_t setno) 268 0 stevel { 269 0 stevel uint_t result; 270 0 stevel 271 0 stevel ASSERT(setno != MD_SET_BAD && setno < MD_MAXSETS); 272 0 stevel 273 0 stevel mutex_enter(&md_mx); 274 0 stevel result = md_set[setno].s_status; 275 0 stevel mutex_exit(&md_mx); 276 0 stevel return (result); 277 0 stevel } 278 0 stevel 279 0 stevel /* 280 0 stevel * md_unit_readerlock_common: 281 0 stevel * ------------------------- 282 0 stevel * Mark the given unit as having a reader reference. Spin waiting for any 283 0 stevel * writer references to be released. 284 0 stevel * 285 0 stevel * Input: 286 0 stevel * ui unit reference 287 0 stevel * lock_held 0 => ui_mx needs to be grabbed 288 0 stevel * 1 => ui_mx already held 289 0 stevel * Output: 290 0 stevel * mm_unit_t corresponding to unit structure 291 0 stevel * ui->ui_readercnt incremented 292 0 stevel */ 293 0 stevel static void * 294 0 stevel md_unit_readerlock_common(mdi_unit_t *ui, int lock_held) 295 0 stevel { 296 0 stevel uint_t flag = MD_UL_WRITER | MD_UL_WANABEWRITER; 297 0 stevel 298 0 stevel if (!lock_held) 299 0 stevel mutex_enter(&ui->ui_mx); 300 0 stevel while (ui->ui_lock & flag) { 301 0 stevel if (panicstr) { 302 0 stevel if (ui->ui_lock & MD_UL_WRITER) 303 0 stevel panic("md: writer lock is held"); 304 0 stevel break; 305 0 stevel } 306 0 stevel cv_wait(&ui->ui_cv, &ui->ui_mx); 307 0 stevel } 308 0 stevel ui->ui_readercnt++; 309 0 stevel if (!lock_held) 310 0 stevel mutex_exit(&ui->ui_mx); 311 0 stevel return (MD_UNIT(ui->ui_link.ln_id)); 312 0 stevel } 313 0 stevel 314 0 stevel void * 315 0 stevel md_unit_readerlock(mdi_unit_t *ui) 316 0 stevel { 317 0 stevel return (md_unit_readerlock_common(ui, 0)); 318 0 stevel } 319 0 stevel 320 0 stevel /* 321 0 stevel * md_unit_writerlock_common: 322 0 stevel * ------------------------- 323 0 stevel * Acquire a unique writer reference. Causes previous readers to drain. 324 0 stevel * Spins if a writer reference already exists or if a previous reader/writer 325 0 stevel * dropped the lock to allow a ksend_message to be despatched. 326 0 stevel * 327 0 stevel * Input: 328 0 stevel * ui unit reference 329 0 stevel * lock_held 0 => grab ui_mx 330 0 stevel * 1 => ui_mx already held on entry 331 0 stevel * Output: 332 0 stevel * mm_unit_t reference 333 0 stevel */ 334 0 stevel static void * 335 0 stevel md_unit_writerlock_common(mdi_unit_t *ui, int lock_held) 336 0 stevel { 337 0 stevel uint_t flag = MD_UL_WRITER; 338 0 stevel 339 0 stevel if (panicstr) 340 0 stevel panic("md: writer lock not allowed"); 341 0 stevel 342 0 stevel if (!lock_held) 343 0 stevel mutex_enter(&ui->ui_mx); 344 0 stevel 345 0 stevel while ((ui->ui_lock & flag) || (ui->ui_readercnt != 0)) { 346 0 stevel ui->ui_wanabecnt++; 347 0 stevel ui->ui_lock |= MD_UL_WANABEWRITER; 348 0 stevel cv_wait(&ui->ui_cv, &ui->ui_mx); 349 0 stevel if (--ui->ui_wanabecnt == 0) 350 0 stevel ui->ui_lock &= ~MD_UL_WANABEWRITER; 351 0 stevel } 352 0 stevel ui->ui_lock |= MD_UL_WRITER; 353 0 stevel ui->ui_owner = curthread; 354 0 stevel 355 0 stevel if (!lock_held) 356 0 stevel mutex_exit(&ui->ui_mx); 357 0 stevel return (MD_UNIT(ui->ui_link.ln_id)); 358 0 stevel } 359 0 stevel 360 0 stevel void * 361 0 stevel md_unit_writerlock(mdi_unit_t *ui) 362 0 stevel { 363 0 stevel return (md_unit_writerlock_common(ui, 0)); 364 0 stevel } 365 0 stevel 366 0 stevel /* 367 0 stevel * md_unit_readerexit_common: 368 0 stevel * ------------------------- 369 0 stevel * Release the readerlock for the specified unit. If the reader count reaches 370 0 stevel * zero and there are waiting writers (MD_UL_WANABEWRITER set) wake them up. 371 0 stevel * 372 0 stevel * Input: 373 0 stevel * ui unit reference 374 0 stevel * lock_held 0 => ui_mx needs to be acquired 375 0 stevel * 1 => ui_mx already held 376 0 stevel */ 377 0 stevel static void 378 0 stevel md_unit_readerexit_common(mdi_unit_t *ui, int lock_held) 379 0 stevel { 380 0 stevel if (!lock_held) 381 0 stevel mutex_enter(&ui->ui_mx); 382 0 stevel ASSERT((ui->ui_lock & MD_UL_WRITER) == 0); 383 0 stevel ASSERT(ui->ui_readercnt != 0); 384 0 stevel ui->ui_readercnt--; 385 0 stevel if ((ui->ui_wanabecnt != 0) && (ui->ui_readercnt == 0)) 386 0 stevel cv_broadcast(&ui->ui_cv); 387 0 stevel 388 0 stevel if (!lock_held) 389 0 stevel mutex_exit(&ui->ui_mx); 390 0 stevel } 391 0 stevel 392 0 stevel void 393 0 stevel md_unit_readerexit(mdi_unit_t *ui) 394 0 stevel { 395 0 stevel md_unit_readerexit_common(ui, 0); 396 0 stevel } 397 0 stevel 398 0 stevel /* 399 0 stevel * md_unit_writerexit_common: 400 0 stevel * ------------------------- 401 0 stevel * Release the writerlock currently held on the unit. Wake any threads waiting 402 0 stevel * on becoming reader or writer (MD_UL_WANABEWRITER set). 403 0 stevel * 404 0 stevel * Input: 405 0 stevel * ui unit reference 406 0 stevel * lock_held 0 => ui_mx to be acquired 407 0 stevel * 1 => ui_mx already held 408 0 stevel */ 409 0 stevel static void 410 0 stevel md_unit_writerexit_common(mdi_unit_t *ui, int lock_held) 411 0 stevel { 412 0 stevel if (!lock_held) 413 0 stevel mutex_enter(&ui->ui_mx); 414 0 stevel ASSERT((ui->ui_lock & MD_UL_WRITER) != 0); 415 0 stevel ASSERT(ui->ui_readercnt == 0); 416 0 stevel ui->ui_lock &= ~MD_UL_WRITER; 417 0 stevel ui->ui_owner = NULL; 418 0 stevel 419 0 stevel cv_broadcast(&ui->ui_cv); 420 0 stevel if (!lock_held) 421 0 stevel mutex_exit(&ui->ui_mx); 422 0 stevel } 423 0 stevel 424 0 stevel void 425 0 stevel md_unit_writerexit(mdi_unit_t *ui) 426 0 stevel { 427 0 stevel md_unit_writerexit_common(ui, 0); 428 0 stevel } 429 0 stevel 430 0 stevel void * 431 0 stevel md_io_readerlock(mdi_unit_t *ui) 432 0 stevel { 433 0 stevel md_io_lock_t *io = ui->ui_io_lock; 434 0 stevel 435 0 stevel ASSERT(io); /* checks case where no io lock allocated */ 436 0 stevel mutex_enter(&io->io_mx); 437 0 stevel while (io->io_lock & (MD_UL_WRITER | MD_UL_WANABEWRITER)) { 438 0 stevel if (panicstr) { 439 0 stevel if (io->io_lock & MD_UL_WRITER) 440 0 stevel panic("md: writer lock is held"); 441 0 stevel break; 442 0 stevel } 443 0 stevel cv_wait(&io->io_cv, &io->io_mx); 444 0 stevel } 445 0 stevel io->io_readercnt++; 446 0 stevel mutex_exit(&io->io_mx); 447 0 stevel return (MD_UNIT(ui->ui_link.ln_id)); 448 0 stevel } 449 0 stevel 450 0 stevel void * 451 0 stevel md_io_writerlock(mdi_unit_t *ui) 452 0 stevel { 453 0 stevel md_io_lock_t *io = ui->ui_io_lock; 454 0 stevel 455 0 stevel ASSERT(io); /* checks case where no io lock allocated */ 456 0 stevel if (panicstr) 457 0 stevel panic("md: writer lock not allowed"); 458 0 stevel 459 0 stevel mutex_enter(&io->io_mx); 460 0 stevel while ((io->io_lock & MD_UL_WRITER) || (io->io_readercnt != 0)) { 461 0 stevel io->io_wanabecnt++; 462 0 stevel io->io_lock |= MD_UL_WANABEWRITER; 463 0 stevel cv_wait(&io->io_cv, &io->io_mx); 464 0 stevel if (--io->io_wanabecnt == 0) 465 0 stevel io->io_lock &= ~MD_UL_WANABEWRITER; 466 0 stevel } 467 0 stevel io->io_lock |= MD_UL_WRITER; 468 0 stevel io->io_owner = curthread; 469 0 stevel 470 0 stevel mutex_exit(&io->io_mx); 471 0 stevel return (MD_UNIT(ui->ui_link.ln_id)); 472 0 stevel } 473 0 stevel 474 0 stevel void 475 0 stevel md_io_readerexit(mdi_unit_t *ui) 476 0 stevel { 477 0 stevel md_io_lock_t *io = ui->ui_io_lock; 478 0 stevel 479 0 stevel mutex_enter(&io->io_mx); 480 0 stevel ASSERT((io->io_lock & MD_UL_WRITER) == 0); 481 0 stevel ASSERT(io->io_readercnt != 0); 482 0 stevel io->io_readercnt--; 483 0 stevel if ((io->io_wanabecnt != 0) && (io->io_readercnt == 0)) { 484 0 stevel cv_broadcast(&io->io_cv); 485 0 stevel } 486 0 stevel mutex_exit(&io->io_mx); 487 0 stevel } 488 0 stevel 489 0 stevel void 490 0 stevel md_io_writerexit(mdi_unit_t *ui) 491 0 stevel { 492 0 stevel md_io_lock_t *io = ui->ui_io_lock; 493 0 stevel 494 0 stevel mutex_enter(&io->io_mx); 495 0 stevel ASSERT((io->io_lock & MD_UL_WRITER) != 0); 496 0 stevel ASSERT(io->io_readercnt == 0); 497 0 stevel io->io_lock &= ~MD_UL_WRITER; 498 0 stevel io->io_owner = NULL; 499 0 stevel 500 0 stevel cv_broadcast(&io->io_cv); 501 0 stevel mutex_exit(&io->io_mx); 502 0 stevel } 503 0 stevel 504 0 stevel /* 505 0 stevel * Attempt to grab that set of locks defined as global. 506 0 stevel * A mask containing the set of global locks that are owned upon 507 0 stevel * entry is input. Any additional global locks are then grabbed. 508 0 stevel * This keeps the caller from having to know the set of global 509 0 stevel * locks. 510 0 stevel */ 511 0 stevel static int 512 0 stevel md_global_lock_enter(int global_locks_owned_mask) 513 0 stevel { 514 0 stevel 515 0 stevel /* 516 0 stevel * The current implementation has been verified by inspection 517 0 stevel * and test to be deadlock free. If another global lock is 518 0 stevel * added, changing the algorithm used by this function should 519 0 stevel * be considered. With more than 2 locks it is difficult to 520 0 stevel * guarantee that locks are being acquired in the correct order. 521 0 stevel * The safe approach would be to drop all of the locks that are 522 0 stevel * owned at function entry and then reacquire all of the locks 523 0 stevel * in the order defined by the lock hierarchy. 524 0 stevel */ 525 0 stevel mutex_enter(&md_mx); 526 0 stevel if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) { 527 0 stevel while ((md_mtioctl_cnt != 0) || 528 0 stevel (md_status & MD_GBL_IOCTL_LOCK)) { 529 0 stevel if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) { 530 0 stevel mutex_exit(&md_mx); 531 0 stevel return (EINTR); 532 0 stevel } 533 0 stevel } 534 0 stevel md_status |= MD_GBL_IOCTL_LOCK; 535 0 stevel md_ioctl_cnt++; 536 0 stevel } 537 0 stevel if (!(global_locks_owned_mask & MD_GBL_HS_LOCK)) { 538 0 stevel while (md_status & MD_GBL_HS_LOCK) { 539 0 stevel if (cv_wait_sig_swap(&md_cv, &md_mx) == 0) { 540 0 stevel md_status &= ~MD_GBL_IOCTL_LOCK; 541 0 stevel mutex_exit(&md_mx); 542 0 stevel return (EINTR); 543 0 stevel } 544 0 stevel } 545 0 stevel md_status |= MD_GBL_HS_LOCK; 546 0 stevel } 547 0 stevel mutex_exit(&md_mx); 548 0 stevel return (0); 549 0 stevel } 550 0 stevel 551 0 stevel /* 552 0 stevel * Release the set of global locks that were grabbed in md_global_lock_enter 553 0 stevel * that were not already owned by the calling thread. The set of previously 554 0 stevel * owned global locks is passed in as a mask parameter. 555 0 stevel */ 556 0 stevel static int 557 0 stevel md_global_lock_exit(int global_locks_owned_mask, int code, 558 0 stevel int flags, mdi_unit_t *ui) 559 0 stevel { 560 0 stevel mutex_enter(&md_mx); 561 0 stevel 562 0 stevel /* If MT ioctl decrement mt_ioctl_cnt */ 563 0 stevel if ((flags & MD_MT_IOCTL)) { 564 0 stevel md_mtioctl_cnt--; 565 0 stevel } else { 566 0 stevel if (!(global_locks_owned_mask & MD_GBL_IOCTL_LOCK)) { 567 0 stevel /* clear the lock and decrement count */ 568 0 stevel ASSERT(md_ioctl_cnt == 1); 569 0 stevel md_ioctl_cnt--; 570 0 stevel md_status &= ~MD_GBL_IOCTL_LOCK; 571 0 stevel } 572 0 stevel if (!(global_locks_owned_mask & MD_GBL_HS_LOCK)) 573 0 stevel md_status &= ~MD_GBL_HS_LOCK; 574 0 stevel } 575 0 stevel if (flags & MD_READER_HELD) 576 0 stevel md_unit_readerexit(ui); 577 0 stevel if (flags & MD_WRITER_HELD) 578 0 stevel md_unit_writerexit(ui); 579 0 stevel if (flags & MD_IO_HELD) 580 0 stevel md_io_writerexit(ui); 581 0 stevel if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) { 582 0 stevel rw_exit(&md_unit_array_rw.lock); 583 0 stevel } 584 0 stevel cv_broadcast(&md_cv); 585 0 stevel mutex_exit(&md_mx); 586 0 stevel 587 0 stevel return (code); 588 0 stevel } 589 0 stevel 590 0 stevel /* 591 0 stevel * The two functions, md_ioctl_lock_enter, and md_ioctl_lock_exit make 592 0 stevel * use of the md_global_lock_{enter|exit} functions to avoid duplication 593 0 stevel * of code. They rely upon the fact that the locks that are specified in 594 0 stevel * the input mask are not acquired or freed. If this algorithm changes 595 0 stevel * as described in the block comment at the beginning of md_global_lock_enter 596 0 stevel * then it will be necessary to change these 2 functions. Otherwise these 597 0 stevel * functions will be grabbing and holding global locks unnecessarily. 598 0 stevel */ 599 0 stevel int 600 0 stevel md_ioctl_lock_enter(void) 601 0 stevel { 602 0 stevel /* grab only the ioctl lock */ 603 0 stevel return (md_global_lock_enter(~MD_GBL_IOCTL_LOCK)); 604 0 stevel } 605 0 stevel 606 0 stevel /* 607 0 stevel * If md_ioctl_lock_exit is being called at the end of an ioctl before 608 0 stevel * returning to user space, then ioctl_end is set to 1. 609 0 stevel * Otherwise, the ioctl lock is being dropped in the middle of handling 610 0 stevel * an ioctl and will be reacquired before the end of the ioctl. 611 0 stevel * Do not attempt to process the MN diskset mddb parse flags unless 612 0 stevel * ioctl_end is true - otherwise a deadlock situation could arise. 613 0 stevel */ 614 0 stevel int 615 0 stevel md_ioctl_lock_exit(int code, int flags, mdi_unit_t *ui, int ioctl_end) 616 0 stevel { 617 0 stevel int ret_val; 618 0 stevel uint_t status; 619 0 stevel mddb_set_t *s; 620 0 stevel int i; 621 0 stevel int err; 622 0 stevel md_mn_msg_mddb_parse_t *mddb_parse_msg; 623 0 stevel md_mn_kresult_t *kresult; 624 0 stevel mddb_lb_t *lbp; 625 0 stevel int rval = 1; 626 0 stevel int flag; 627 0 stevel 628 0 stevel /* release only the ioctl lock */ 629 0 stevel ret_val = md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui); 630 0 stevel 631 0 stevel /* 632 0 stevel * If md_ioctl_lock_exit is being called with a possible lock held 633 0 stevel * (ioctl_end is 0), then don't check the MN disksets since the 634 0 stevel * call to mddb_setenter may cause a lock ordering deadlock. 635 0 stevel */ 636 0 stevel if (!ioctl_end) 637 0 stevel return (ret_val); 638 0 stevel 639 0 stevel /* 640 0 stevel * Walk through disksets to see if there is a MN diskset that 641 0 stevel * has messages that need to be sent. Set must be snarfed and 642 0 stevel * be a MN diskset in order to be checked. 643 0 stevel * 644 0 stevel * In a MN diskset, this routine may send messages to the 645 0 stevel * rpc.mdcommd in order to have the slave nodes re-parse parts 646 0 stevel * of the mddb. Messages can only be sent with no locks held, 647 0 stevel * so if mddb change occurred while the ioctl lock is held, this 648 0 stevel * routine must send the messages. 649 0 stevel */ 650 0 stevel for (i = 1; i < md_nsets; i++) { 651 0 stevel status = md_get_setstatus(i); 652 0 stevel 653 0 stevel /* Set must be snarfed and be a MN diskset */ 654 0 stevel if ((status & (MD_SET_SNARFED | MD_SET_MNSET)) != 655 0 stevel (MD_SET_SNARFED | MD_SET_MNSET)) 656 0 stevel continue; 657 0 stevel 658 0 stevel /* Grab set lock so that set can't change */ 659 0 stevel if ((s = mddb_setenter(i, MDDB_MUSTEXIST, &err)) == NULL) 660 0 stevel continue; 661 0 stevel 662 0 stevel lbp = s->s_lbp; 663 0 stevel 664 0 stevel /* Re-get set status now that lock is held */ 665 0 stevel status = md_get_setstatus(i); 666 0 stevel 667 0 stevel /* 668 0 stevel * If MN parsing block flag is set - continue to next set. 669 0 stevel * 670 0 stevel * If s_mn_parseflags_sending is non-zero, then another thread 671 0 stevel * is already currently sending a parse message, so just 672 0 stevel * release the set mutex. If this ioctl had caused an mddb 673 0 stevel * change that results in a parse message to be generated, 674 0 stevel * the thread that is currently sending a parse message would 675 0 stevel * generate the additional parse message. 676 0 stevel * 677 0 stevel * If s_mn_parseflags_sending is zero then loop until 678 0 stevel * s_mn_parseflags is 0 (until there are no more 679 0 stevel * messages to send). 680 0 stevel * While s_mn_parseflags is non-zero, 681 0 stevel * put snapshot of parse_flags in s_mn_parseflags_sending 682 0 stevel * set s_mn_parseflags to zero 683 0 stevel * release set mutex 684 0 stevel * send message 685 0 stevel * re-grab set mutex 686 0 stevel * set s_mn_parseflags_sending to zero 687 0 stevel * 688 0 stevel * If set is STALE, send message with NO_LOG flag so that 689 0 stevel * rpc.mdcommd won't attempt to log message to non-writeable 690 0 stevel * replica. 691 0 stevel */ 692 0 stevel mddb_parse_msg = kmem_zalloc(sizeof (md_mn_msg_mddb_parse_t), 693 7563 Prasad KM_SLEEP); 694 0 stevel while (((s->s_mn_parseflags_sending & MDDB_PARSE_MASK) == 0) && 695 0 stevel (s->s_mn_parseflags & MDDB_PARSE_MASK) && 696 0 stevel (!(status & MD_SET_MNPARSE_BLK))) { 697 0 stevel 698 0 stevel /* Grab snapshot of parse flags */ 699 0 stevel s->s_mn_parseflags_sending = s->s_mn_parseflags; 700 0 stevel s->s_mn_parseflags = 0; 701 0 stevel 702 0 stevel mutex_exit(&md_set[(s)->s_setno].s_dbmx); 703 0 stevel 704 0 stevel /* 705 0 stevel * Send the message to the slaves to re-parse 706 0 stevel * the indicated portions of the mddb. Send the status 707 0 stevel * of the 50 mddbs in this set so that slaves know 708 0 stevel * which mddbs that the master node thinks are 'good'. 709 0 stevel * Otherwise, slave may reparse, but from wrong 710 0 stevel * replica. 711 0 stevel */ 712 0 stevel mddb_parse_msg->msg_parse_flags = 713 7563 Prasad s->s_mn_parseflags_sending; 714 0 stevel 715 0 stevel for (i = 0; i < MDDB_NLB; i++) { 716 0 stevel mddb_parse_msg->msg_lb_flags[i] = 717 7563 Prasad lbp->lb_locators[i].l_flags; 718 0 stevel } 719 11130 James kresult = kmem_alloc(sizeof (md_mn_kresult_t), 720 7563 Prasad KM_SLEEP); 721 0 stevel while (rval != 0) { 722 0 stevel flag = 0; 723 0 stevel if (status & MD_SET_STALE) 724 0 stevel flag |= MD_MSGF_NO_LOG; 725 0 stevel rval = mdmn_ksend_message(s->s_setno, 726 8452 John MD_MN_MSG_MDDB_PARSE, flag, 0, 727 0 stevel (char *)mddb_parse_msg, 728 8452 John sizeof (md_mn_msg_mddb_parse_t), kresult); 729 0 stevel /* if the node hasn't yet joined, it's Ok. */ 730 0 stevel if ((!MDMN_KSEND_MSG_OK(rval, kresult)) && 731 0 stevel (kresult->kmmr_comm_state != 732 7563 Prasad MDMNE_NOT_JOINED)) { 733 0 stevel mdmn_ksend_show_error(rval, kresult, 734 0 stevel "MD_MN_MSG_MDDB_PARSE"); 735 0 stevel cmn_err(CE_WARN, "md_ioctl_lock_exit: " 736 0 stevel "Unable to send mddb update " 737 0 stevel "message to other nodes in " 738 0 stevel "diskset %s\n", s->s_setname); 739 0 stevel rval = 1; 740 0 stevel } 741 0 stevel } 742 0 stevel kmem_free(kresult, sizeof (md_mn_kresult_t)); 743 0 stevel 744 0 stevel /* 745 0 stevel * Re-grab mutex to clear sending field and to 746 0 stevel * see if another parse message needs to be generated. 747 0 stevel */ 748 0 stevel mutex_enter(&md_set[(s)->s_setno].s_dbmx); 749 0 stevel s->s_mn_parseflags_sending = 0; 750 0 stevel } 751 0 stevel kmem_free(mddb_parse_msg, sizeof (md_mn_msg_mddb_parse_t)); 752 0 stevel mutex_exit(&md_set[(s)->s_setno].s_dbmx); 753 0 stevel } 754 0 stevel return (ret_val); 755 0 stevel } 756 0 stevel 757 0 stevel /* 758 0 stevel * Called when in an ioctl and need readerlock. 759 0 stevel */ 760 0 stevel void * 761 0 stevel md_ioctl_readerlock(IOLOCK *lock, mdi_unit_t *ui) 762 0 stevel { 763 0 stevel ASSERT(lock != NULL); 764 0 stevel lock->l_ui = ui; 765 0 stevel lock->l_flags |= MD_READER_HELD; 766 0 stevel return (md_unit_readerlock_common(ui, 0)); 767 0 stevel } 768 0 stevel 769 0 stevel /* 770 0 stevel * Called when in an ioctl and need writerlock. 771 0 stevel */ 772 0 stevel void * 773 0 stevel md_ioctl_writerlock(IOLOCK *lock, mdi_unit_t *ui) 774 0 stevel { 775 0 stevel ASSERT(lock != NULL); 776 0 stevel lock->l_ui = ui; 777 0 stevel lock->l_flags |= MD_WRITER_HELD; 778 0 stevel return (md_unit_writerlock_common(ui, 0)); 779 0 stevel } 780 0 stevel 781 0 stevel void * 782 0 stevel md_ioctl_io_lock(IOLOCK *lock, mdi_unit_t *ui) 783 0 stevel { 784 0 stevel ASSERT(lock != NULL); 785 0 stevel lock->l_ui = ui; 786 0 stevel lock->l_flags |= MD_IO_HELD; 787 0 stevel return (md_io_writerlock(ui)); 788 0 stevel } 789 0 stevel 790 0 stevel void 791 0 stevel md_ioctl_readerexit(IOLOCK *lock) 792 0 stevel { 793 0 stevel ASSERT(lock != NULL); 794 0 stevel lock->l_flags &= ~MD_READER_HELD; 795 0 stevel md_unit_readerexit(lock->l_ui); 796 0 stevel } 797 0 stevel 798 0 stevel void 799 0 stevel md_ioctl_writerexit(IOLOCK *lock) 800 0 stevel { 801 0 stevel ASSERT(lock != NULL); 802 0 stevel lock->l_flags &= ~MD_WRITER_HELD; 803 0 stevel md_unit_writerexit(lock->l_ui); 804 0 stevel } 805 0 stevel 806 0 stevel void 807 0 stevel md_ioctl_io_exit(IOLOCK *lock) 808 0 stevel { 809 0 stevel ASSERT(lock != NULL); 810 0 stevel lock->l_flags &= ~MD_IO_HELD; 811 0 stevel md_io_writerexit(lock->l_ui); 812 0 stevel } 813 0 stevel 814 0 stevel /* 815 0 stevel * md_ioctl_releaselocks: 816 0 stevel * -------------------- 817 0 stevel * Release the unit locks that are held and stop subsequent 818 0 stevel * md_unit_reader/writerlock calls from progressing. This allows the caller 819 0 stevel * to send messages across the cluster when running in a multinode 820 0 stevel * environment. 821 0 stevel * ioctl originated locks (via md_ioctl_readerlock/md_ioctl_writerlock) are 822 0 stevel * allowed to progress as normal. This is required as these typically are 823 0 stevel * invoked by the message handler that may be called while a unit lock is 824 0 stevel * marked as released. 825 0 stevel * 826 0 stevel * On entry: 827 0 stevel * variety of unit locks may be held including ioctl lock 828 0 stevel * 829 0 stevel * On exit: 830 0 stevel * locks released and unit structure updated to prevent subsequent reader/ 831 0 stevel * writer locks being acquired until md_ioctl_reacquirelocks is called 832 0 stevel */ 833 0 stevel void 834 0 stevel md_ioctl_releaselocks(int code, int flags, mdi_unit_t *ui) 835 0 stevel { 836 0 stevel /* This actually releases the locks. */ 837 0 stevel (void) md_global_lock_exit(~MD_GBL_IOCTL_LOCK, code, flags, ui); 838 0 stevel } 839 0 stevel 840 0 stevel /* 841 0 stevel * md_ioctl_reacquirelocks: 842 0 stevel * ---------------------- 843 0 stevel * Reacquire the locks that were held when md_ioctl_releaselocks 844 0 stevel * was called. 845 0 stevel * 846 0 stevel * On entry: 847 0 stevel * No unit locks held 848 0 stevel * On exit: 849 0 stevel * locks held that were held at md_ioctl_releaselocks time including 850 0 stevel * the ioctl lock. 851 0 stevel */ 852 0 stevel void 853 0 stevel md_ioctl_reacquirelocks(int flags, mdi_unit_t *ui) 854 0 stevel { 855 0 stevel if (flags & MD_MT_IOCTL) { 856 0 stevel mutex_enter(&md_mx); 857 0 stevel md_mtioctl_cnt++; 858 0 stevel mutex_exit(&md_mx); 859 0 stevel } else { 860 7563 Prasad while (md_ioctl_lock_enter() == EINTR) 861 7563 Prasad ; 862 0 stevel } 863 0 stevel if (flags & MD_ARRAY_WRITER) { 864 0 stevel rw_enter(&md_unit_array_rw.lock, RW_WRITER); 865 0 stevel } else if (flags & MD_ARRAY_READER) { 866 0 stevel rw_enter(&md_unit_array_rw.lock, RW_READER); 867 0 stevel } 868 0 stevel if (ui != (mdi_unit_t *)NULL) { 869 0 stevel if (flags & MD_IO_HELD) { 870 0 stevel (void) md_io_writerlock(ui); 871 0 stevel } 872 0 stevel 873 0 stevel mutex_enter(&ui->ui_mx); 874 0 stevel if (flags & MD_READER_HELD) { 875 0 stevel (void) md_unit_readerlock_common(ui, 1); 876 0 stevel } else if (flags & MD_WRITER_HELD) { 877 0 stevel (void) md_unit_writerlock_common(ui, 1); 878 0 stevel } 879 0 stevel /* Wake up any blocked readerlock() calls */ 880 0 stevel cv_broadcast(&ui->ui_cv); 881 0 stevel mutex_exit(&ui->ui_mx); 882 0 stevel } 883 0 stevel } 884 0 stevel 885 0 stevel void 886 0 stevel md_ioctl_droplocks(IOLOCK *lock) 887 0 stevel { 888 0 stevel mdi_unit_t *ui; 889 0 stevel int flags; 890 0 stevel 891 0 stevel ASSERT(lock != NULL); 892 0 stevel ui = lock->l_ui; 893 0 stevel flags = lock->l_flags; 894 0 stevel if (flags & MD_READER_HELD) { 895 0 stevel lock->l_flags &= ~MD_READER_HELD; 896 0 stevel md_unit_readerexit(ui); 897 0 stevel } 898 0 stevel if (flags & MD_WRITER_HELD) { 899 0 stevel lock->l_flags &= ~MD_WRITER_HELD; 900 0 stevel md_unit_writerexit(ui); 901 0 stevel } 902 0 stevel if (flags & MD_IO_HELD) { 903 0 stevel lock->l_flags &= ~MD_IO_HELD; 904 0 stevel md_io_writerexit(ui); 905 0 stevel } 906 0 stevel if (flags & (MD_ARRAY_WRITER | MD_ARRAY_READER)) { 907 0 stevel lock->l_flags &= ~(MD_ARRAY_WRITER | MD_ARRAY_READER); 908 0 stevel rw_exit(&md_unit_array_rw.lock); 909 0 stevel } 910 0 stevel } 911 0 stevel 912 0 stevel void 913 0 stevel md_array_writer(IOLOCK *lock) 914 0 stevel { 915 0 stevel ASSERT(lock != NULL); 916 0 stevel lock->l_flags |= MD_ARRAY_WRITER; 917 0 stevel rw_enter(&md_unit_array_rw.lock, RW_WRITER); 918 0 stevel } 919 0 stevel 920 0 stevel void 921 0 stevel md_array_reader(IOLOCK *lock) 922 0 stevel { 923 0 stevel ASSERT(lock != NULL); 924 0 stevel lock->l_flags |= MD_ARRAY_READER; 925 0 stevel rw_enter(&md_unit_array_rw.lock, RW_READER); 926 0 stevel } 927 0 stevel 928 0 stevel /* 929 0 stevel * Called when in an ioctl and need opencloselock. 930 0 stevel * Sets flags in lockp for READER_HELD. 931 0 stevel */ 932 0 stevel void * 933 0 stevel md_ioctl_openclose_enter(IOLOCK *lockp, mdi_unit_t *ui) 934 0 stevel { 935 0 stevel void *un; 936 0 stevel 937 0 stevel ASSERT(lockp != NULL); 938 0 stevel mutex_enter(&ui->ui_mx); 939 0 stevel while (ui->ui_lock & MD_UL_OPENORCLOSE) 940 0 stevel cv_wait(&ui->ui_cv, &ui->ui_mx); 941 0 stevel ui->ui_lock |= MD_UL_OPENORCLOSE; 942 0 stevel 943 0 stevel /* Maintain mutex across the readerlock call */ 944 0 stevel lockp->l_ui = ui; 945 0 stevel lockp->l_flags |= MD_READER_HELD; 946 0 stevel un = md_unit_readerlock_common(ui, 1); 947 0 stevel mutex_exit(&ui->ui_mx); 948 0 stevel 949 0 stevel return (un); 950 0 stevel } 951 0 stevel 952 0 stevel /* 953 0 stevel * Clears reader lock using md_ioctl instead of md_unit 954 0 stevel * and updates lockp. 955 0 stevel */ 956 0 stevel void 957 0 stevel md_ioctl_openclose_exit(IOLOCK *lockp) 958 0 stevel { 959 0 stevel mdi_unit_t *ui; 960 0 stevel 961 0 stevel ASSERT(lockp != NULL); 962 0 stevel ui = lockp->l_ui; 963 0 stevel ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); 964 0 stevel 965 0 stevel md_ioctl_readerexit(lockp); 966 0 stevel 967 0 stevel mutex_enter(&ui->ui_mx); 968 0 stevel ui->ui_lock &= ~MD_UL_OPENORCLOSE; 969 0 stevel 970 0 stevel cv_broadcast(&ui->ui_cv); 971 0 stevel mutex_exit(&ui->ui_mx); 972 0 stevel } 973 0 stevel 974 0 stevel /* 975 0 stevel * Clears reader lock using md_ioctl instead of md_unit 976 0 stevel * and updates lockp. 977 0 stevel * Does not acquire or release the ui_mx lock since the calling 978 0 stevel * routine has already acquired this lock. 979 0 stevel */ 980 0 stevel void 981 0 stevel md_ioctl_openclose_exit_lh(IOLOCK *lockp) 982 0 stevel { 983 0 stevel mdi_unit_t *ui; 984 0 stevel 985 0 stevel ASSERT(lockp != NULL); 986 0 stevel ui = lockp->l_ui; 987 0 stevel ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); 988 0 stevel 989 0 stevel lockp->l_flags &= ~MD_READER_HELD; 990 0 stevel md_unit_readerexit_common(lockp->l_ui, 1); 991 0 stevel 992 0 stevel ui->ui_lock &= ~MD_UL_OPENORCLOSE; 993 0 stevel cv_broadcast(&ui->ui_cv); 994 0 stevel } 995 0 stevel 996 0 stevel void * 997 0 stevel md_unit_openclose_enter(mdi_unit_t *ui) 998 0 stevel { 999 0 stevel void *un; 1000 0 stevel 1001 0 stevel mutex_enter(&ui->ui_mx); 1002 0 stevel while (ui->ui_lock & (MD_UL_OPENORCLOSE)) 1003 0 stevel cv_wait(&ui->ui_cv, &ui->ui_mx); 1004 0 stevel ui->ui_lock |= MD_UL_OPENORCLOSE; 1005 0 stevel 1006 0 stevel /* Maintain mutex across the readerlock call */ 1007 0 stevel un = md_unit_readerlock_common(ui, 1); 1008 0 stevel mutex_exit(&ui->ui_mx); 1009 0 stevel 1010 0 stevel return (un); 1011 0 stevel } 1012 0 stevel 1013 0 stevel void 1014 0 stevel md_unit_openclose_exit(mdi_unit_t *ui) 1015 0 stevel { 1016 0 stevel md_unit_readerexit(ui); 1017 0 stevel 1018 0 stevel mutex_enter(&ui->ui_mx); 1019 0 stevel ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); 1020 0 stevel ui->ui_lock &= ~MD_UL_OPENORCLOSE; 1021 0 stevel 1022 0 stevel cv_broadcast(&ui->ui_cv); 1023 0 stevel mutex_exit(&ui->ui_mx); 1024 0 stevel } 1025 0 stevel 1026 0 stevel /* 1027 0 stevel * Drop the openclose and readerlocks without acquiring or 1028 0 stevel * releasing the ui_mx lock since the calling routine has 1029 0 stevel * already acquired this lock. 1030 0 stevel */ 1031 0 stevel void 1032 0 stevel md_unit_openclose_exit_lh(mdi_unit_t *ui) 1033 0 stevel { 1034 0 stevel md_unit_readerexit_common(ui, 1); 1035 0 stevel ASSERT(ui->ui_lock & MD_UL_OPENORCLOSE); 1036 0 stevel ui->ui_lock &= ~MD_UL_OPENORCLOSE; 1037 0 stevel cv_broadcast(&ui->ui_cv); 1038 0 stevel } 1039 0 stevel 1040 0 stevel int 1041 0 stevel md_unit_isopen( 1042 0 stevel mdi_unit_t *ui 1043 0 stevel ) 1044 0 stevel { 1045 0 stevel int isopen; 1046 0 stevel 1047 0 stevel /* check status */ 1048 0 stevel mutex_enter(&ui->ui_mx); 1049 0 stevel isopen = ((ui->ui_lock & MD_UL_OPEN) ? 1 : 0); 1050 0 stevel mutex_exit(&ui->ui_mx); 1051 0 stevel return (isopen); 1052 0 stevel } 1053 0 stevel 1054 0 stevel int 1055 0 stevel md_unit_incopen( 1056 0 stevel minor_t mnum, 1057 0 stevel int flag, 1058 0 stevel int otyp 1059 0 stevel ) 1060 0 stevel { 1061 0 stevel mdi_unit_t *ui = MDI_UNIT(mnum); 1062 0 stevel int err = 0; 1063 0 stevel 1064 0 stevel /* check type and flags */ 1065 0 stevel ASSERT(ui != NULL); 1066 0 stevel mutex_enter(&ui->ui_mx); 1067 0 stevel if ((otyp < 0) || (otyp >= OTYPCNT)) { 1068 0 stevel err = EINVAL; 1069 0 stevel goto out; 1070 0 stevel } 1071 0 stevel if (((flag & FEXCL) && (ui->ui_lock & MD_UL_OPEN)) || 1072 0 stevel (ui->ui_lock & MD_UL_EXCL)) { 1073 0 stevel err = EBUSY; 1074 0 stevel goto out; 1075 0 stevel } 1076 0 stevel 1077 0 stevel /* count and flag open */ 1078 0 stevel ui->ui_ocnt[otyp]++; 1079 0 stevel ui->ui_lock |= MD_UL_OPEN; 1080 0 stevel if (flag & FEXCL) 1081 0 stevel ui->ui_lock |= MD_UL_EXCL; 1082 0 stevel 1083 0 stevel /* setup kstat, return success */ 1084 0 stevel mutex_exit(&ui->ui_mx); 1085 0 stevel md_kstat_init(mnum); 1086 0 stevel return (0); 1087 0 stevel 1088 0 stevel /* return error */ 1089 0 stevel out: 1090 0 stevel mutex_exit(&ui->ui_mx); 1091 0 stevel return (err); 1092 0 stevel } 1093 0 stevel 1094 0 stevel int 1095 0 stevel md_unit_decopen( 1096 0 stevel minor_t mnum, 1097 0 stevel int otyp 1098 0 stevel ) 1099 0 stevel { 1100 0 stevel mdi_unit_t *ui = MDI_UNIT(mnum); 1101 0 stevel int err = 0; 1102 0 stevel unsigned i; 1103 0 stevel 1104 0 stevel /* check type and flags */ 1105 0 stevel ASSERT(ui != NULL); 1106 0 stevel mutex_enter(&ui->ui_mx); 1107 0 stevel if ((otyp < 0) || (otyp >= OTYPCNT)) { 1108 0 stevel err = EINVAL; 1109 0 stevel goto out; 1110 0 stevel } else if (ui->ui_ocnt[otyp] == 0) { 1111 0 stevel err = ENXIO; 1112 0 stevel goto out; 1113 0 stevel } 1114 0 stevel 1115 0 stevel /* count and flag closed */ 1116 0 stevel if (otyp == OTYP_LYR) 1117 0 stevel ui->ui_ocnt[otyp]--; 1118 0 stevel else 1119 0 stevel ui->ui_ocnt[otyp] = 0; 1120 0 stevel ui->ui_lock &= ~MD_UL_OPEN; 1121 0 stevel for (i = 0; (i < OTYPCNT); ++i) 1122 0 stevel if (ui->ui_ocnt[i] != 0) 1123 0 stevel ui->ui_lock |= MD_UL_OPEN; 1124 0 stevel if (! (ui->ui_lock & MD_UL_OPEN)) 1125 0 stevel ui->ui_lock &= ~MD_UL_EXCL; 1126 0 stevel 1127 0 stevel /* teardown kstat, return success */ 1128 0 stevel if (! (ui->ui_lock & MD_UL_OPEN)) { 1129 10667 Ray 1130 10667 Ray /* 1131 10667 Ray * We have a race condition inherited from specfs between 1132 10667 Ray * open() and close() calls. This results in the kstat 1133 10667 Ray * for a pending I/O being torn down, and then a panic. 1134 10667 Ray * To avoid this, only tear the kstat down if there are 1135 10667 Ray * no other readers on this device. 1136 10667 Ray */ 1137 10667 Ray if (ui->ui_readercnt > 1) { 1138 10667 Ray mutex_exit(&ui->ui_mx); 1139 10667 Ray } else { 1140 10667 Ray mutex_exit(&ui->ui_mx); 1141 10667 Ray md_kstat_destroy(mnum); 1142 10667 Ray } 1143 0 stevel return (0); 1144 0 stevel } 1145 0 stevel 1146 0 stevel /* return success */ 1147 0 stevel out: 1148 0 stevel mutex_exit(&ui->ui_mx); 1149 0 stevel return (err); 1150 0 stevel } 1151 0 stevel 1152 0 stevel md_dev64_t 1153 0 stevel md_xlate_targ_2_mini(md_dev64_t targ_devt) 1154 0 stevel { 1155 0 stevel dev32_t mini_32_devt, targ_32_devt; 1156 0 stevel int i; 1157 0 stevel 1158 0 stevel /* 1159 0 stevel * check to see if we're in an upgrade situation 1160 0 stevel * if we are not in upgrade just return the input device 1161 0 stevel */ 1162 0 stevel 1163 0 stevel if (!MD_UPGRADE) 1164 0 stevel return (targ_devt); 1165 0 stevel 1166 0 stevel targ_32_devt = md_cmpldev(targ_devt); 1167 0 stevel 1168 0 stevel i = 0; 1169 0 stevel while (i != md_tuple_length) { 1170 0 stevel if (md_tuple_table[i].targ_devt == targ_32_devt) { 1171 0 stevel mini_32_devt = md_tuple_table[i].mini_devt; 1172 0 stevel return (md_expldev((md_dev64_t)mini_32_devt)); 1173 0 stevel } 1174 0 stevel i++; 1175 0 stevel } 1176 0 stevel return (NODEV64); 1177 0 stevel } 1178 0 stevel 1179 0 stevel md_dev64_t 1180 0 stevel md_xlate_mini_2_targ(md_dev64_t mini_devt) 1181 0 stevel { 1182 0 stevel dev32_t mini_32_devt, targ_32_devt; 1183 0 stevel int i; 1184 0 stevel 1185 0 stevel if (!MD_UPGRADE) 1186 0 stevel return (mini_devt); 1187 0 stevel 1188 0 stevel mini_32_devt = md_cmpldev(mini_devt); 1189 0 stevel 1190 0 stevel i = 0; 1191 0 stevel while (i != md_tuple_length) { 1192 0 stevel if (md_tuple_table[i].mini_devt == mini_32_devt) { 1193 0 stevel targ_32_devt = md_tuple_table[i].targ_devt; 1194 0 stevel return (md_expldev((md_dev64_t)targ_32_devt)); 1195 0 stevel } 1196 0 stevel i++; 1197 0 stevel } 1198 0 stevel return (NODEV64); 1199 0 stevel } 1200 0 stevel 1201 0 stevel void 1202 0 stevel md_xlate_free(int size) 1203 0 stevel { 1204 0 stevel kmem_free(md_tuple_table, size); 1205 0 stevel } 1206 0 stevel 1207 0 stevel char * 1208 0 stevel md_targ_major_to_name(major_t maj) 1209 0 stevel { 1210 0 stevel char *drv_name = NULL; 1211 0 stevel int i; 1212 0 stevel 1213 0 stevel if (!MD_UPGRADE) 1214 0 stevel return (ddi_major_to_name(maj)); 1215 0 stevel 1216 0 stevel for (i = 0; i < md_majortab_len; i++) { 1217 0 stevel if (md_major_tuple_table[i].targ_maj == maj) { 1218 0 stevel drv_name = md_major_tuple_table[i].drv_name; 1219 0 stevel break; 1220 0 stevel } 1221 0 stevel } 1222 0 stevel return (drv_name); 1223 0 stevel } 1224 0 stevel 1225 0 stevel major_t 1226 0 stevel md_targ_name_to_major(char *drv_name) 1227 0 stevel { 1228 0 stevel major_t maj; 1229 0 stevel int i; 1230 0 stevel 1231 0 stevel maj = md_getmajor(NODEV64); 1232 0 stevel if (!MD_UPGRADE) 1233 0 stevel return (ddi_name_to_major(drv_name)); 1234 0 stevel 1235 0 stevel for (i = 0; i < md_majortab_len; i++) { 1236 0 stevel if ((strcmp(md_major_tuple_table[i].drv_name, 1237 0 stevel drv_name)) == 0) { 1238 0 stevel maj = md_major_tuple_table[i].targ_maj; 1239 0 stevel break; 1240 0 stevel } 1241 0 stevel } 1242 0 stevel 1243 0 stevel return (maj); 1244 0 stevel } 1245 0 stevel 1246 0 stevel void 1247 0 stevel md_majortab_free() 1248 0 stevel { 1249 0 stevel size_t sz; 1250 0 stevel int i; 1251 0 stevel 1252 0 stevel for (i = 0; i < md_majortab_len; i++) { 1253 0 stevel freestr(md_major_tuple_table[i].drv_name); 1254 0 stevel } 1255 0 stevel 1256 0 stevel sz = md_majortab_len * sizeof (struct md_xlate_major_table); 1257 0 stevel kmem_free(md_major_tuple_table, sz); 1258 0 stevel } 1259 0 stevel 1260 0 stevel /* functions return a pointer to a function which returns an int */ 1261 0 stevel 1262 0 stevel intptr_t (* 1263 0 stevel md_get_named_service(md_dev64_t dev, int modindex, char *name, 1264 0 stevel intptr_t (*Default)()))() 1265 0 stevel { 1266 0 stevel mdi_unit_t *ui; 1267 0 stevel md_named_services_t *sp; 1268 0 stevel int i; 1269 0 stevel 1270 0 stevel /* 1271 0 stevel * Return the first named service found. 1272 0 stevel * Use this path when it is known that there is only 1273 0 stevel * one named service possible (e.g., hotspare interface) 1274 0 stevel */ 1275 0 stevel if ((dev == NODEV64) && (modindex == ANY_SERVICE)) { 1276 0 stevel for (i = 0; i < MD_NOPS; i++) { 1277 0 stevel if (md_ops[i] == NULL) { 1278 0 stevel continue; 1279 0 stevel } 1280 0 stevel sp = md_ops[i]->md_services; 1281 0 stevel if (sp == NULL) 1282 0 stevel continue; 1283 0 stevel while (sp->md_service != NULL) { 1284 0 stevel if (strcmp(name, sp->md_name) == 0) 1285 0 stevel return (sp->md_service); 1286 0 stevel sp++; 1287 0 stevel } 1288 0 stevel } 1289 0 stevel return (Default); 1290 0 stevel } 1291 0 stevel 1292 0 stevel /* 1293 0 stevel * Return the named service for the given modindex. 1294 0 stevel * This is used if there are multiple possible named services 1295 0 stevel * and each one needs to be called (e.g., poke hotspares) 1296 0 stevel */ 1297 0 stevel if (dev == NODEV64) { 1298 0 stevel if (modindex >= MD_NOPS) 1299 0 stevel return (Default); 1300 0 stevel 1301 0 stevel if (md_ops[modindex] == NULL) 1302 0 stevel return (Default); 1303 0 stevel 1304 0 stevel sp = md_ops[modindex]->md_services; 1305 0 stevel if (sp == NULL) 1306 0 stevel return (Default); 1307 0 stevel 1308 0 stevel while (sp->md_service != NULL) { 1309 0 stevel if (strcmp(name, sp->md_name) == 0) 1310 0 stevel return (sp->md_service); 1311 0 stevel sp++; 1312 0 stevel } 1313 0 stevel return (Default); 1314 0 stevel } 1315 0 stevel 1316 0 stevel /* 1317 0 stevel * Return the named service for this md_dev64_t 1318 0 stevel */ 1319 0 stevel if (md_getmajor(dev) != md_major) 1320 0 stevel return (Default); 1321 0 stevel 1322 0 stevel if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) || 1323 0 stevel (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits)) 1324 0 stevel return (NULL); 1325 0 stevel 1326 0 stevel 1327 0 stevel if ((ui = MDI_UNIT(md_getminor(dev))) == NULL) 1328 0 stevel return (NULL); 1329 0 stevel 1330 0 stevel sp = md_ops[ui->ui_opsindex]->md_services; 1331 0 stevel if (sp == NULL) 1332 0 stevel return (Default); 1333 0 stevel while (sp->md_service != NULL) { 1334 0 stevel if (strcmp(name, sp->md_name) == 0) 1335 0 stevel return (sp->md_service); 1336 0 stevel sp++; 1337 0 stevel } 1338 0 stevel return (Default); 1339 0 stevel } 1340 0 stevel 1341 0 stevel /* 1342 0 stevel * md_daemon callback routine 1343 0 stevel */ 1344 0 stevel boolean_t 1345 0 stevel callb_md_cpr(void *arg, int code) 1346 0 stevel { 1347 0 stevel callb_cpr_t *cp = (callb_cpr_t *)arg; 1348 0 stevel int ret = 0; /* assume success */ 1349 11066 rafael clock_t delta; 1350 0 stevel 1351 0 stevel mutex_enter(cp->cc_lockp); 1352 0 stevel 1353 0 stevel switch (code) { 1354 0 stevel case CB_CODE_CPR_CHKPT: 1355 0 stevel /* 1356 0 stevel * Check for active resync threads 1357 0 stevel */ 1358 0 stevel mutex_enter(&md_cpr_resync.md_resync_mutex); 1359 0 stevel if ((md_cpr_resync.md_mirror_resync > 0) || 1360 7563 Prasad (md_cpr_resync.md_raid_resync > 0)) { 1361 0 stevel mutex_exit(&md_cpr_resync.md_resync_mutex); 1362 0 stevel cmn_err(CE_WARN, "There are Solaris Volume Manager " 1363 0 stevel "synchronization threads running."); 1364 0 stevel cmn_err(CE_WARN, "Please try system suspension at " 1365 7563 Prasad "a later time."); 1366 0 stevel ret = -1; 1367 0 stevel break; 1368 0 stevel } 1369 0 stevel mutex_exit(&md_cpr_resync.md_resync_mutex); 1370 0 stevel 1371 0 stevel cp->cc_events |= CALLB_CPR_START; 1372 11066 rafael delta = CPR_KTHREAD_TIMEOUT_SEC * hz; 1373 0 stevel while (!(cp->cc_events & CALLB_CPR_SAFE)) 1374 11066 rafael /* cv_reltimedwait() returns -1 if it times out. */ 1375 11066 rafael if ((ret = cv_reltimedwait(&cp->cc_callb_cv, 1376 11066 rafael cp->cc_lockp, delta, TR_CLOCK_TICK)) == -1) 1377 0 stevel break; 1378 0 stevel break; 1379 0 stevel 1380 0 stevel case CB_CODE_CPR_RESUME: 1381 0 stevel cp->cc_events &= ~CALLB_CPR_START; 1382 0 stevel cv_signal(&cp->cc_stop_cv); 1383 0 stevel break; 1384 0 stevel } 1385 0 stevel mutex_exit(cp->cc_lockp); 1386 0 stevel return (ret != -1); 1387 0 stevel } 1388 0 stevel 1389 0 stevel void 1390 0 stevel md_daemon(int pass_thru, mdq_anchor_t *anchor) 1391 0 stevel { 1392 0 stevel daemon_queue_t *dq; 1393 0 stevel callb_cpr_t cprinfo; 1394 0 stevel 1395 0 stevel if (pass_thru && (md_get_status() & MD_GBL_DAEMONS_LIVE)) 1396 0 stevel return; 1397 0 stevel /* 1398 0 stevel * Register cpr callback 1399 0 stevel */ 1400 0 stevel CALLB_CPR_INIT(&cprinfo, &anchor->a_mx, callb_md_cpr, "md_daemon"); 1401 0 stevel 1402 0 stevel /*CONSTCOND*/ 1403 0 stevel while (1) { 1404 0 stevel mutex_enter(&anchor->a_mx); 1405 0 stevel while ((dq = anchor->dq.dq_next) == &(anchor->dq)) { 1406 0 stevel if (pass_thru) { 1407 0 stevel /* 1408 0 stevel * CALLB_CPR_EXIT Will do 1409 0 stevel * mutex_exit(&anchor->a_mx) 1410 0 stevel */ 1411 0 stevel CALLB_CPR_EXIT(&cprinfo); 1412 0 stevel return; 1413 0 stevel } 1414 0 stevel if (md_get_status() & MD_GBL_DAEMONS_DIE) { 1415 0 stevel mutex_exit(&anchor->a_mx); 1416 0 stevel mutex_enter(&md_mx); 1417 0 stevel md_num_daemons--; 1418 0 stevel mutex_exit(&md_mx); 1419 0 stevel /* 1420 0 stevel * CALLB_CPR_EXIT will do 1421 0 stevel * mutex_exit(&anchor->a_mx) 1422 0 stevel */ 1423 0 stevel mutex_enter(&anchor->a_mx); 1424 0 stevel CALLB_CPR_EXIT(&cprinfo); 1425 0 stevel thread_exit(); 1426 0 stevel } 1427 0 stevel CALLB_CPR_SAFE_BEGIN(&cprinfo); 1428 0 stevel cv_wait(&anchor->a_cv, &anchor->a_mx); 1429 0 stevel CALLB_CPR_SAFE_END(&cprinfo, &anchor->a_mx); 1430 0 stevel } 1431 0 stevel dq->dq_prev->dq_next = dq->dq_next; 1432 0 stevel dq->dq_next->dq_prev = dq->dq_prev; 1433 0 stevel dq->dq_prev = dq->dq_next = NULL; 1434 0 stevel anchor->dq.qlen--; 1435 0 stevel mutex_exit(&anchor->a_mx); 1436 0 stevel (*(dq->dq_call))(dq); 1437 0 stevel } 1438 0 stevel /*NOTREACHED*/ 1439 0 stevel } 1440 0 stevel 1441 0 stevel /* 1442 0 stevel * daemon_request: 1443 0 stevel * 1444 0 stevel * Adds requests to appropriate requestq which is 1445 0 stevel * anchored by *anchor. 1446 0 stevel * The request is the first element of a doubly linked circular list. 1447 0 stevel * When the request is a single element, the forward and backward 1448 0 stevel * pointers MUST point to the element itself. 1449 0 stevel */ 1450 0 stevel 1451 0 stevel void 1452 0 stevel daemon_request(mdq_anchor_t *anchor, void (*func)(), 1453 0 stevel daemon_queue_t *request, callstyle_t style) 1454 0 stevel { 1455 0 stevel daemon_queue_t *rqtp; 1456 0 stevel int i = 0; 1457 0 stevel 1458 0 stevel rqtp = request; 1459 0 stevel if (style == REQ_OLD) { 1460 0 stevel ASSERT((rqtp->dq_next == NULL) && (rqtp->dq_prev == NULL)); 1461 0 stevel /* set it to the new style */ 1462 0 stevel rqtp->dq_prev = rqtp->dq_next = rqtp; 1463 0 stevel } 1464 0 stevel ASSERT((rqtp->dq_next != NULL) && (rqtp->dq_prev != NULL)); 1465 0 stevel 1466 0 stevel /* scan the list and add the function to each element */ 1467 0 stevel 1468 0 stevel do { 1469 0 stevel rqtp->dq_call = func; 1470 0 stevel i++; 1471 0 stevel rqtp = rqtp->dq_next; 1472 0 stevel } while (rqtp != request); 1473 0 stevel 1474 0 stevel /* save pointer to tail of the request list */ 1475 0 stevel rqtp = request->dq_prev; 1476 0 stevel 1477 0 stevel mutex_enter(&anchor->a_mx); 1478 0 stevel /* stats */ 1479 0 stevel anchor->dq.qlen += i; 1480 0 stevel anchor->dq.treqs += i; 1481 0 stevel anchor->dq.maxq_len = (anchor->dq.qlen > anchor->dq.maxq_len) ? 1482 7563 Prasad anchor->dq.qlen : anchor->dq.maxq_len; 1483 0 stevel 1484 0 stevel /* now add the list to request queue */ 1485 0 stevel request->dq_prev = anchor->dq.dq_prev; 1486 0 stevel rqtp->dq_next = &anchor->dq; 1487 0 stevel anchor->dq.dq_prev->dq_next = request; 1488 0 stevel anchor->dq.dq_prev = rqtp; 1489 0 stevel cv_broadcast(&anchor->a_cv); 1490 0 stevel mutex_exit(&anchor->a_mx); 1491 0 stevel } 1492 0 stevel 1493 0 stevel void 1494 0 stevel mddb_commitrec_wrapper(mddb_recid_t recid) 1495 0 stevel { 1496 0 stevel int sent_log = 0; 1497 0 stevel uint_t retry = md_retry_cnt; 1498 0 stevel set_t setno; 1499 0 stevel 1500 0 stevel while (mddb_commitrec(recid)) { 1501 0 stevel if (! sent_log) { 1502 0 stevel cmn_err(CE_WARN, 1503 0 stevel "md: state database commit failed"); 1504 0 stevel sent_log = 1; 1505 0 stevel } 1506 0 stevel delay(md_hz); 1507 0 stevel 1508 0 stevel /* 1509 0 stevel * Setting retry cnt to one (pre decremented) so that we 1510 0 stevel * actually do no retries when committing/deleting a mddb rec. 1511 0 stevel * The underlying disk driver does several retries to check 1512 0 stevel * if the disk is really dead or not so there 1513 0 stevel * is no reason for us to retry on top of the drivers retries. 1514 0 stevel */ 1515 0 stevel 1516 0 stevel if (--retry == 0) { 1517 0 stevel setno = mddb_getsetnum(recid); 1518 0 stevel if (md_get_setstatus(setno) & MD_SET_TOOFEW) { 1519 0 stevel panic( 1520 0 stevel "md: Panic due to lack of DiskSuite state\n" 1521 0 stevel " database replicas. Fewer than 50%% of " 1522 0 stevel "the total were available,\n so panic to " 1523 0 stevel "ensure data integrity."); 1524 0 stevel } else { 1525 0 stevel panic("md: state database problem"); 1526 0 stevel } 1527 0 stevel /*NOTREACHED*/ 1528 0 stevel } 1529 0 stevel } 1530 0 stevel } 1531 0 stevel 1532 0 stevel void 1533 0 stevel mddb_commitrecs_wrapper(mddb_recid_t *recids) 1534 0 stevel { 1535 0 stevel int sent_log = 0; 1536 0 stevel uint_t retry = md_retry_cnt; 1537 0 stevel set_t setno; 1538 0 stevel 1539 0 stevel while (mddb_commitrecs(recids)) { 1540 0 stevel if (! sent_log) { 1541 0 stevel cmn_err(CE_WARN, 1542 0 stevel "md: state database commit failed"); 1543 0 stevel sent_log = 1; 1544 0 stevel } 1545 0 stevel delay(md_hz); 1546 0 stevel 1547 0 stevel /* 1548 0 stevel * Setting retry cnt to one (pre decremented) so that we 1549 0 stevel * actually do no retries when committing/deleting a mddb rec. 1550 0 stevel * The underlying disk driver does several retries to check 1551 0 stevel * if the disk is really dead or not so there 1552 0 stevel * is no reason for us to retry on top of the drivers retries. 1553 0 stevel */ 1554 0 stevel 1555 0 stevel if (--retry == 0) { 1556 0 stevel /* 1557 0 stevel * since all the records are part of the same set 1558 0 stevel * use the first one to get setno 1559 0 stevel */ 1560 0 stevel setno = mddb_getsetnum(*recids); 1561 0 stevel if (md_get_setstatus(setno) & MD_SET_TOOFEW) { 1562 0 stevel panic( 1563 0 stevel "md: Panic due to lack of DiskSuite state\n" 1564 0 stevel " database replicas. Fewer than 50%% of " 1565 0 stevel "the total were available,\n so panic to " 1566 0 stevel "ensure data integrity."); 1567 0 stevel } else { 1568 0 stevel panic("md: state database problem"); 1569 0 stevel } 1570 0 stevel /*NOTREACHED*/ 1571 0 stevel } 1572 0 stevel } 1573 0 stevel } 1574 0 stevel 1575 0 stevel void 1576 0 stevel mddb_deleterec_wrapper(mddb_recid_t recid) 1577 0 stevel { 1578 0 stevel int sent_log = 0; 1579 0 stevel uint_t retry = md_retry_cnt; 1580 0 stevel set_t setno; 1581 0 stevel 1582 0 stevel while (mddb_deleterec(recid)) { 1583 0 stevel if (! sent_log) { 1584 0 stevel cmn_err(CE_WARN, 1585 0 stevel "md: state database delete failed"); 1586 0 stevel sent_log = 1; 1587 0 stevel } 1588 0 stevel delay(md_hz); 1589 0 stevel 1590 0 stevel /* 1591 0 stevel * Setting retry cnt to one (pre decremented) so that we 1592 0 stevel * actually do no retries when committing/deleting a mddb rec. 1593 0 stevel * The underlying disk driver does several retries to check 1594 0 stevel * if the disk is really dead or not so there 1595 0 stevel * is no reason for us to retry on top of the drivers retries. 1596 0 stevel */ 1597 0 stevel 1598 0 stevel if (--retry == 0) { 1599 0 stevel setno = mddb_getsetnum(recid); 1600 0 stevel if (md_get_setstatus(setno) & MD_SET_TOOFEW) { 1601 0 stevel panic( 1602 0 stevel "md: Panic due to lack of DiskSuite state\n" 1603 0 stevel " database replicas. Fewer than 50%% of " 1604 0 stevel "the total were available,\n so panic to " 1605 0 stevel "ensure data integrity."); 1606 0 stevel } else { 1607 0 stevel panic("md: state database problem"); 1608 0 stevel } 1609 0 stevel /*NOTREACHED*/ 1610 0 stevel } 1611 0 stevel } 1612 0 stevel } 1613 0 stevel 1614 0 stevel /* 1615 0 stevel * md_holdset_enter is called in order to hold the set in its 1616 0 stevel * current state (loaded, unloaded, snarfed, unsnarfed, etc) 1617 0 stevel * until md_holdset_exit is called. This is used by the mirror 1618 0 stevel * code to mark the set as HOLD so that the set won't be 1619 0 stevel * unloaded while hotspares are being allocated in check_4_hotspares. 1620 0 stevel * The original fix to the mirror code to hold the set was to call 1621 0 stevel * md_haltsnarf_enter, but this will block all ioctls and ioctls 1622 0 stevel * must work for a MN diskset while hotspares are allocated. 1623 0 stevel */ 1624 0 stevel void 1625 0 stevel md_holdset_enter(set_t setno) 1626 0 stevel { 1627 0 stevel mutex_enter(&md_mx); 1628 0 stevel while (md_set[setno].s_status & MD_SET_HOLD) 1629 0 stevel cv_wait(&md_cv, &md_mx); 1630 0 stevel md_set[setno].s_status |= MD_SET_HOLD; 1631 0 stevel mutex_exit(&md_mx); 1632 0 stevel } 1633 0 stevel 1634 0 stevel void 1635 0 stevel md_holdset_exit(set_t setno) 1636 0 stevel { 1637 0 stevel mutex_enter(&md_mx); 1638 0 stevel md_set[setno].s_status &= ~MD_SET_HOLD; 1639 0 stevel cv_broadcast(&md_cv); 1640 0 stevel mutex_exit(&md_mx); 1641 0 stevel } 1642 0 stevel 1643 0 stevel /* 1644 0 stevel * Returns a 0 if this thread marked the set as HOLD (success), 1645 0 stevel * returns a -1 if set was already marked HOLD (failure). 1646 0 stevel * Used by the release_set code to see if set is marked HOLD. 1647 0 stevel * HOLD is set by a daemon when hotspares are being allocated 1648 0 stevel * to mirror units. 1649 0 stevel */ 1650 0 stevel int 1651 0 stevel md_holdset_testandenter(set_t setno) 1652 0 stevel { 1653 0 stevel mutex_enter(&md_mx); 1654 0 stevel if (md_set[setno].s_status & MD_SET_HOLD) { 1655 0 stevel mutex_exit(&md_mx); 1656 0 stevel return (-1); 1657 0 stevel } 1658 0 stevel md_set[setno].s_status |= MD_SET_HOLD; 1659 0 stevel mutex_exit(&md_mx); 1660 0 stevel return (0); 1661 0 stevel } 1662 0 stevel 1663 0 stevel void 1664 0 stevel md_haltsnarf_enter(set_t setno) 1665 0 stevel { 1666 0 stevel mutex_enter(&md_mx); 1667 0 stevel while (md_set[setno].s_status & MD_SET_SNARFING) 1668 0 stevel cv_wait(&md_cv, &md_mx); 1669 0 stevel 1670 0 stevel md_set[setno].s_status |= MD_SET_SNARFING; 1671 0 stevel mutex_exit(&md_mx); 1672 0 stevel } 1673 0 stevel 1674 0 stevel void 1675 0 stevel md_haltsnarf_exit(set_t setno) 1676 0 stevel { 1677 0 stevel mutex_enter(&md_mx); 1678 0 stevel md_set[setno].s_status &= ~MD_SET_SNARFING; 1679 0 stevel cv_broadcast(&md_cv); 1680 0 stevel mutex_exit(&md_mx); 1681 0 stevel } 1682 0 stevel 1683 0 stevel void 1684 0 stevel md_haltsnarf_wait(set_t setno) 1685 0 stevel { 1686 0 stevel mutex_enter(&md_mx); 1687 0 stevel while (md_set[setno].s_status & MD_SET_SNARFING) 1688 0 stevel cv_wait(&md_cv, &md_mx); 1689 0 stevel mutex_exit(&md_mx); 1690 0 stevel } 1691 0 stevel 1692 0 stevel /* 1693 0 stevel * ASSUMED that the md_unit_array_rw WRITER lock is held. 1694 0 stevel */ 1695 0 stevel int 1696 0 stevel md_halt_set(set_t setno, enum md_haltcmd cmd) 1697 0 stevel { 1698 0 stevel int i, err; 1699 0 stevel 1700 0 stevel if (md_set[setno].s_un == NULL || md_set[setno].s_ui == NULL) { 1701 0 stevel return (0); 1702 0 stevel } 1703 0 stevel 1704 0 stevel if ((cmd == MD_HALT_CHECK) || (cmd == MD_HALT_ALL)) { 1705 0 stevel for (i = 0; i < MD_NOPS; i++) { 1706 0 stevel if (md_ops[i] == NULL) 1707 0 stevel continue; 1708 0 stevel if ((*(md_ops[i]->md_halt))(MD_HALT_CLOSE, setno)) { 1709 0 stevel for (--i; i > 0; --i) { 1710 0 stevel if (md_ops[i] == NULL) 1711 0 stevel continue; 1712 0 stevel (void) (*(md_ops[i]->md_halt)) 1713 0 stevel (MD_HALT_OPEN, setno); 1714 0 stevel } 1715 0 stevel return (EBUSY); 1716 0 stevel } 1717 0 stevel } 1718 0 stevel 1719 0 stevel for (i = 0; i < MD_NOPS; i++) { 1720 0 stevel if (md_ops[i] == NULL) 1721 0 stevel continue; 1722 0 stevel if ((*(md_ops[i]->md_halt))(MD_HALT_CHECK, setno)) { 1723 0 stevel for (i = 0; i < MD_NOPS; i++) { 1724 0 stevel if (md_ops[i] == NULL) 1725 0 stevel continue; 1726 0 stevel (void) (*(md_ops[i]->md_halt)) 1727 0 stevel (MD_HALT_OPEN, setno); 1728 0 stevel } 1729 0 stevel return (EBUSY); 1730 0 stevel } 1731 0 stevel } 1732 0 stevel } 1733 0 stevel 1734 0 stevel if ((cmd == MD_HALT_DOIT) || (cmd == MD_HALT_ALL)) { 1735 0 stevel for (i = 0; i < MD_NOPS; i++) { 1736 0 stevel if (md_ops[i] == NULL) 1737 0 stevel continue; 1738 0 stevel err = (*(md_ops[i]->md_halt))(MD_HALT_DOIT, setno); 1739 0 stevel if (err != 0) 1740 0 stevel cmn_err(CE_NOTE, 1741 0 stevel "md: halt failed for %s, error %d", 1742 0 stevel md_ops[i]->md_driver.md_drivername, err); 1743 0 stevel } 1744 0 stevel 1745 0 stevel /* 1746 0 stevel * Unload the devid namespace if it is loaded 1747 0 stevel */ 1748 0 stevel md_unload_namespace(setno, NM_DEVID); 1749 0 stevel md_unload_namespace(setno, 0L); 1750 0 stevel md_clr_setstatus(setno, MD_SET_SNARFED); 1751 0 stevel } 1752 0 stevel 1753 0 stevel return (0); 1754 0 stevel } 1755 0 stevel 1756 0 stevel int 1757 0 stevel md_halt(int global_locks_owned_mask) 1758 0 stevel { 1759 0 stevel set_t i, j; 1760 0 stevel int err; 1761 0 stevel int init_queues; 1762 0 stevel md_requestq_entry_t *rqp; 1763 0 stevel md_ops_t **pops, *ops, *lops; 1764 0 stevel ddi_modhandle_t mod; 1765 0 stevel char *name; 1766 0 stevel 1767 0 stevel rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1768 0 stevel 1769 0 stevel /* 1770 0 stevel * Grab the all of the global locks that are not 1771 0 stevel * already owned to ensure that there isn't another 1772 0 stevel * thread trying to access a global resource 1773 0 stevel * while the halt is in progress 1774 0 stevel */ 1775 0 stevel if (md_global_lock_enter(global_locks_owned_mask) == EINTR) 1776 0 stevel return (EINTR); 1777 0 stevel 1778 0 stevel for (i = 0; i < md_nsets; i++) 1779 0 stevel md_haltsnarf_enter(i); 1780 0 stevel 1781 0 stevel /* 1782 0 stevel * Kill the daemon threads. 1783 0 stevel */ 1784 0 stevel init_queues = ((md_get_status() & MD_GBL_DAEMONS_LIVE) ? FALSE : TRUE); 1785 0 stevel md_clr_status(MD_GBL_DAEMONS_LIVE); 1786 0 stevel md_set_status(MD_GBL_DAEMONS_DIE); 1787 0 stevel 1788 0 stevel rqp = &md_daemon_queues[0]; 1789 0 stevel i = 0; 1790 0 stevel while (!NULL_REQUESTQ_ENTRY(rqp)) { 1791 0 stevel cv_broadcast(&rqp->dispq_headp->a_cv); 1792 0 stevel rqp = &md_daemon_queues[++i]; 1793 0 stevel } 1794 0 stevel 1795 0 stevel mutex_enter(&md_mx); 1796 0 stevel while (md_num_daemons != 0) { 1797 0 stevel mutex_exit(&md_mx); 1798 0 stevel delay(md_hz); 1799 0 stevel mutex_enter(&md_mx); 1800 0 stevel } 1801 0 stevel mutex_exit(&md_mx); 1802 0 stevel md_clr_status(MD_GBL_DAEMONS_DIE); 1803 0 stevel 1804 0 stevel for (i = 0; i < md_nsets; i++) 1805 0 stevel /* 1806 0 stevel * Only call into md_halt_set if s_un / s_ui are both set. 1807 0 stevel * If they are NULL this set hasn't been accessed, so its 1808 0 stevel * pointless performing the call. 1809 0 stevel */ 1810 0 stevel if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) { 1811 0 stevel if (md_halt_set(i, MD_HALT_CHECK)) { 1812 0 stevel if (md_start_daemons(init_queues)) 1813 0 stevel cmn_err(CE_WARN, 1814 0 stevel "md: restart of daemon threads " 1815 0 stevel "failed"); 1816 0 stevel for (j = 0; j < md_nsets; j++) 1817 0 stevel md_haltsnarf_exit(j); 1818 0 stevel 1819 0 stevel return (md_global_lock_exit( 1820 0 stevel global_locks_owned_mask, EBUSY, 1821 0 stevel MD_ARRAY_WRITER, NULL)); 1822 0 stevel } 1823 0 stevel } 1824 0 stevel 1825 0 stevel /* 1826 0 stevel * if we get here we are going to do it 1827 0 stevel */ 1828 0 stevel for (i = 0; i < md_nsets; i++) { 1829 0 stevel /* 1830 0 stevel * Only call into md_halt_set if s_un / s_ui are both set. 1831 0 stevel * If they are NULL this set hasn't been accessed, so its 1832 0 stevel * pointless performing the call. 1833 0 stevel */ 1834 0 stevel if (md_set[i].s_un != NULL && md_set[i].s_ui != NULL) { 1835 0 stevel err = md_halt_set(i, MD_HALT_DOIT); 1836 0 stevel if (err != 0) 1837 0 stevel cmn_err(CE_NOTE, 1838 0 stevel "md: halt failed set %u, error %d", 1839 0 stevel (unsigned)i, err); 1840 0 stevel } 1841 0 stevel } 1842 0 stevel 1843 0 stevel /* 1844 0 stevel * issue a halt unload to each module to indicate that it 1845 0 stevel * is about to be unloaded. Each module is called once, set 1846 0 stevel * has no meaning at this point in time. 1847 0 stevel */ 1848 0 stevel for (i = 0; i < MD_NOPS; i++) { 1849 0 stevel if (md_ops[i] == NULL) 1850 0 stevel continue; 1851 0 stevel err = (*(md_ops[i]->md_halt))(MD_HALT_UNLOAD, 0); 1852 0 stevel if (err != 0) 1853 0 stevel cmn_err(CE_NOTE, 1854 0 stevel "md: halt failed for %s, error %d", 1855 0 stevel md_ops[i]->md_driver.md_drivername, err); 1856 0 stevel } 1857 0 stevel 1858 0 stevel /* ddi_modclose the submodules */ 1859 0 stevel for (i = 0; i < MD_NOPS; i++) { 1860 0 stevel /* skip if not open */ 1861 0 stevel if ((md_ops[i] == NULL) || (md_mods[i] == NULL)) 1862 0 stevel continue; 1863 0 stevel 1864 0 stevel /* find and unlink from md_opslist */ 1865 0 stevel ops = md_ops[i]; 1866 0 stevel mod = md_mods[i]; 1867 0 stevel pops = &md_opslist; 1868 0 stevel for (lops = *pops; lops; 1869 0 stevel pops = &lops->md_next, lops = *pops) { 1870 0 stevel if (lops == ops) { 1871 0 stevel *pops = ops->md_next; 1872 0 stevel ops->md_next = NULL; 1873 0 stevel break; 1874 0 stevel } 1875 0 stevel } 1876 0 stevel 1877 0 stevel /* uninitialize */ 1878 7563 Prasad name = ops->md_driver.md_drivername; 1879 0 stevel md_ops[i] = NULL; 1880 0 stevel md_mods[i] = NULL; 1881 0 stevel ops->md_selfindex = 0; 1882 0 stevel ops->md_driver.md_drivername[0] = '\0'; 1883 0 stevel rw_destroy(&ops->md_link_rw.lock); 1884 0 stevel 1885 0 stevel /* close */ 1886 0 stevel err = ddi_modclose(mod); 1887 0 stevel if (err != 0) 1888 0 stevel cmn_err(CE_NOTE, 1889 0 stevel "md: halt close failed for %s, error %d", 1890 0 stevel name ? name : "UNKNOWN", err); 1891 0 stevel } 1892 0 stevel 1893 0 stevel /* Unload the database */ 1894 0 stevel mddb_unload(); 1895 0 stevel 1896 0 stevel md_set_status(MD_GBL_HALTED); /* we are ready to be unloaded */ 1897 0 stevel 1898 0 stevel for (i = 0; i < md_nsets; i++) 1899 0 stevel md_haltsnarf_exit(i); 1900 0 stevel 1901 0 stevel return (md_global_lock_exit(global_locks_owned_mask, 0, 1902 7563 Prasad MD_ARRAY_WRITER, NULL)); 1903 0 stevel } 1904 0 stevel 1905 0 stevel /* 1906 0 stevel * md_layered_open() is an internal routine only for SVM modules. 1907 0 stevel * So the input device will be a md_dev64_t, because all SVM modules internally 1908 0 stevel * work with that device type. 1909 0 stevel * ddi routines on the other hand work with dev_t. So, if we call any ddi 1910 0 stevel * routines from here we first have to convert that device into a dev_t. 1911 0 stevel */ 1912 0 stevel 1913 0 stevel int 1914 0 stevel md_layered_open( 1915 0 stevel minor_t mnum, 1916 0 stevel md_dev64_t *dev, 1917 0 stevel int md_oflags 1918 0 stevel ) 1919 0 stevel { 1920 0 stevel int flag = (FREAD | FWRITE); 1921 0 stevel cred_t *cred_p = kcred; 1922 0 stevel major_t major; 1923 0 stevel int err; 1924 0 stevel dev_t ddi_dev = md_dev64_to_dev(*dev); 1925 0 stevel 1926 0 stevel if (ddi_dev == NODEV) 1927 0 stevel return (ENODEV); 1928 0 stevel 1929 0 stevel major = getmajor(ddi_dev); 1930 0 stevel 1931 0 stevel /* metadevice */ 1932 0 stevel if (major == md_major) { 1933 0 stevel mdi_unit_t *ui; 1934 0 stevel 1935 0 stevel /* open underlying driver */ 1936 0 stevel mnum = getminor(ddi_dev); 1937 0 stevel 1938 0 stevel ui = MDI_UNIT(mnum); 1939 0 stevel if (md_ops[ui->ui_opsindex]->md_open != NULL) { 1940 0 stevel int ret = (*md_ops[ui->ui_opsindex]->md_open)(&ddi_dev, 1941 7563 Prasad flag, OTYP_LYR, cred_p, md_oflags); 1942 0 stevel /* 1943 0 stevel * As open() may change the device, 1944 0 stevel * send this info back to the caller. 1945 0 stevel */ 1946 0 stevel *dev = md_expldev(ddi_dev); 1947 0 stevel return (ret); 1948 0 stevel } 1949 0 stevel 1950 0 stevel /* or do it ourselves */ 1951 0 stevel (void) md_unit_openclose_enter(ui); 1952 0 stevel err = md_unit_incopen(mnum, flag, OTYP_LYR); 1953 0 stevel md_unit_openclose_exit(ui); 1954 0 stevel /* convert our ddi_dev back to the dev we were given */ 1955 0 stevel *dev = md_expldev(ddi_dev); 1956 0 stevel return (err); 1957 0 stevel } 1958 0 stevel 1959 0 stevel /* 1960 0 stevel * Open regular device, since open() may change dev_t give new dev_t 1961 0 stevel * back to the caller. 1962 0 stevel */ 1963 0 stevel err = dev_lopen(&ddi_dev, flag, OTYP_LYR, cred_p); 1964 0 stevel *dev = md_expldev(ddi_dev); 1965 0 stevel return (err); 1966 0 stevel } 1967 0 stevel 1968 0 stevel /* 1969 0 stevel * md_layered_close() is an internal routine only for SVM modules. 1970 0 stevel * So the input device will be a md_dev64_t, because all SVM modules internally 1971 0 stevel * work with that device type. 1972 0 stevel * ddi routines on the other hand work with dev_t. So, if we call any ddi 1973 0 stevel * routines from here we first have to convert that device into a dev_t. 1974 0 stevel */ 1975 0 stevel void 1976 0 stevel md_layered_close( 1977 0 stevel md_dev64_t dev, 1978 0 stevel int md_cflags 1979 0 stevel ) 1980 0 stevel { 1981 0 stevel int flag = (FREAD | FWRITE); 1982 0 stevel cred_t *cred_p = kcred; 1983 0 stevel dev_t ddi_dev = md_dev64_to_dev(dev); 1984 0 stevel major_t major = getmajor(ddi_dev); 1985 0 stevel minor_t mnum = getminor(ddi_dev); 1986 0 stevel 1987 0 stevel /* metadevice */ 1988 0 stevel if (major == md_major) { 1989 0 stevel mdi_unit_t *ui = MDI_UNIT(mnum); 1990 0 stevel 1991 0 stevel /* close underlying driver */ 1992 0 stevel if (md_ops[ui->ui_opsindex]->md_close != NULL) { 1993 0 stevel (*md_ops[ui->ui_opsindex]->md_close) 1994 0 stevel (ddi_dev, flag, OTYP_LYR, cred_p, md_cflags); 1995 0 stevel return; 1996 0 stevel } 1997 0 stevel 1998 0 stevel /* or do it ourselves */ 1999 0 stevel (void) md_unit_openclose_enter(ui); 2000 0 stevel (void) md_unit_decopen(mnum, OTYP_LYR); 2001 0 stevel md_unit_openclose_exit(ui); 2002 0 stevel return; 2003 0 stevel } 2004 0 stevel 2005 0 stevel /* close regular device */ 2006 0 stevel (void) dev_lclose(ddi_dev, flag, OTYP_LYR, cred_p); 2007 0 stevel } 2008 0 stevel 2009 0 stevel /* 2010 0 stevel * saves a little code in mdstrategy 2011 0 stevel */ 2012 0 stevel int 2013 0 stevel errdone(mdi_unit_t *ui, struct buf *bp, int err) 2014 0 stevel { 2015 0 stevel if ((bp->b_error = err) != 0) 2016 0 stevel bp->b_flags |= B_ERROR; 2017 0 stevel else 2018 0 stevel bp->b_resid = bp->b_bcount; 2019 0 stevel md_unit_readerexit(ui); 2020 0 stevel md_biodone(bp); 2021 0 stevel return (1); 2022 0 stevel } 2023 0 stevel 2024 0 stevel static int md_write_label = 0; 2025 0 stevel 2026 0 stevel int 2027 0 stevel md_checkbuf(mdi_unit_t *ui, md_unit_t *un, buf_t *bp) 2028 0 stevel { 2029 0 stevel diskaddr_t endblk; 2030 0 stevel set_t setno = MD_UN2SET(un); 2031 0 stevel 2032 0 stevel if ((md_get_setstatus(setno) & MD_SET_STALE) && 2033 0 stevel (! (bp->b_flags & B_READ))) 2034 0 stevel return (errdone(ui, bp, EROFS)); 2035 0 stevel /* 2036 0 stevel * Check early for unreasonable block number. 2037 0 stevel * 2038 0 stevel * b_blkno is defined as adaddr_t which is typedef'd to a long. 2039 0 stevel * A problem occurs if b_blkno has bit 31 set and un_total_blocks 2040 0 stevel * doesn't, b_blkno is then compared as a negative number which is 2041 0 stevel * always less than a positive. 2042 0 stevel */ 2043 0 stevel if ((u_longlong_t)bp->b_lblkno > (u_longlong_t)un->c.un_total_blocks) 2044 0 stevel return (errdone(ui, bp, EINVAL)); 2045 0 stevel 2046 0 stevel if (bp->b_lblkno == un->c.un_total_blocks) 2047 0 stevel return (errdone(ui, bp, 0)); 2048 0 stevel 2049 0 stevel /* 2050 0 stevel * make sure we don't clobber any labels 2051 0 stevel */ 2052 0 stevel if ((bp->b_lblkno == 0) && (! (bp->b_flags & B_READ)) && 2053 0 stevel (un->c.un_flag & MD_LABELED) && (! md_write_label)) { 2054 0 stevel cmn_err(CE_NOTE, "md: %s: write to label", 2055 0 stevel md_shortname(getminor(bp->b_edev))); 2056 0 stevel return (errdone(ui, bp, EINVAL)); 2057 0 stevel } 2058 0 stevel 2059 0 stevel bp->b_resid = 0; 2060 0 stevel endblk = (diskaddr_t)(bp->b_lblkno + 2061 7563 Prasad howmany(bp->b_bcount, DEV_BSIZE) - 1); 2062 0 stevel 2063 0 stevel if (endblk > (un->c.un_total_blocks - 1)) { 2064 0 stevel bp->b_resid = dbtob(endblk - (un->c.un_total_blocks - 1)); 2065 0 stevel endblk = un->c.un_total_blocks - 1; 2066 0 stevel bp->b_bcount -= bp->b_resid; 2067 0 stevel } 2068 0 stevel return (0); 2069 0 stevel } 2070 0 stevel 2071 0 stevel /* 2072 0 stevel * init_request_queue: initializes the request queues and creates the threads. 2073 0 stevel * return value = 0 :invalid num_threads 2074 0 stevel * = n : n is the number of threads created. 2075 0 stevel */ 2076 0 stevel 2077 0 stevel int 2078 0 stevel init_requestq( 2079 0 stevel md_requestq_entry_t *rq, /* request queue info */ 2080 0 stevel void (*threadfn)(), /* function to start the thread */ 2081 0 stevel caddr_t threadfn_args, /* args to the function */ 2082 0 stevel int pri, /* thread priority */ 2083 0 stevel int init_queue) /* flag to init queues */ 2084 0 stevel { 2085 0 stevel struct mdq_anchor *rqhead; 2086 0 stevel int i; 2087 0 stevel int num_threads; 2088 0 stevel 2089 0 stevel 2090 0 stevel num_threads = *(rq->num_threadsp); 2091 0 stevel rqhead = rq->dispq_headp; 2092 0 stevel 2093 0 stevel if (NULL_REQUESTQ_ENTRY(rq) || num_threads == 0) 2094 0 stevel return (0); 2095 0 stevel 2096 0 stevel if (init_queue) { 2097 0 stevel rqhead->dq.maxq_len = 0; 2098 0 stevel rqhead->dq.treqs = 0; 2099 0 stevel rqhead->dq.dq_next = &rqhead->dq; 2100 0 stevel rqhead->dq.dq_prev = &rqhead->dq; 2101 0 stevel cv_init(&rqhead->a_cv, NULL, CV_DEFAULT, NULL); 2102 0 stevel mutex_init(&rqhead->a_mx, NULL, MUTEX_DEFAULT, NULL); 2103 0 stevel } 2104 0 stevel for (i = 0; i < num_threads; i++) { 2105 0 stevel (void) thread_create(NULL, 0, threadfn, threadfn_args, 0, &p0, 2106 0 stevel TS_RUN, pri); 2107 0 stevel } 2108 0 stevel return (i); 2109 0 stevel } 2110 0 stevel 2111 0 stevel static void 2112 0 stevel start_daemon(struct mdq_anchor *q) 2113 0 stevel { 2114 0 stevel md_daemon(0, q); 2115 0 stevel ASSERT(0); 2116 0 stevel } 2117 0 stevel 2118 0 stevel /* 2119 0 stevel * Creates all the md daemons. 2120 0 stevel * Global: 2121 0 stevel * md_num_daemons is set to number of daemons. 2122 0 stevel * MD_GBL_DAEMONS_LIVE flag set to indicate the daemons are active. 2123 0 stevel * 2124 0 stevel * Return value: 0 success 2125 0 stevel * 1 failure 2126 0 stevel */ 2127 0 stevel int 2128 0 stevel md_start_daemons(int init_queue) 2129 0 stevel { 2130 0 stevel md_requestq_entry_t *rqp; 2131 0 stevel int cnt; 2132 0 stevel int i; 2133 0 stevel int retval = 0; 2134 0 stevel 2135 0 stevel 2136 0 stevel if (md_get_status() & MD_GBL_DAEMONS_LIVE) { 2137 0 stevel return (retval); 2138 0 stevel } 2139 0 stevel md_clr_status(MD_GBL_DAEMONS_DIE); 2140 0 stevel 2141 0 stevel rqp = &md_daemon_queues[0]; 2142 0 stevel i = 0; 2143 0 stevel while (!NULL_REQUESTQ_ENTRY(rqp)) { 2144 0 stevel cnt = init_requestq(rqp, start_daemon, 2145 7563 Prasad (caddr_t)rqp->dispq_headp, minclsyspri, init_queue); 2146 0 stevel 2147 0 stevel if (cnt && cnt != *rqp->num_threadsp) { 2148 0 stevel retval = 1; 2149 0 stevel break; 2150 0 stevel } 2151 0 stevel /* 2152 0 stevel * initialize variables 2153 0 stevel */ 2154 0 stevel md_num_daemons += cnt; 2155 0 stevel rqp = &md_daemon_queues[++i]; 2156 0 stevel } 2157 0 stevel 2158 0 stevel md_set_status(MD_GBL_DAEMONS_LIVE); 2159 0 stevel return (retval); 2160 0 stevel } 2161 0 stevel 2162 0 stevel int 2163 0 stevel md_loadsubmod(set_t setno, char *name, int drvrid) 2164 0 stevel { 2165 0 stevel ddi_modhandle_t mod; 2166 0 stevel md_ops_t **pops, *ops; 2167 0 stevel int i, err; 2168 0 stevel 2169 0 stevel /* 2170 0 stevel * See if the submodule is mdopened. If not, i is the index of the 2171 0 stevel * next empty slot. 2172 0 stevel */ 2173 0 stevel for (i = 0; md_ops[i] != NULL; i++) { 2174 0 stevel if (strncmp(name, md_ops[i]->md_driver.md_drivername, 2175 0 stevel MD_DRIVERNAMELEN) == 0) 2176 0 stevel return (i); 2177 0 stevel 2178 0 stevel if (i == (MD_NOPS - 1)) 2179 0 stevel return (-1); 2180 0 stevel } 2181 0 stevel 2182 0 stevel if (drvrid < 0) { 2183 0 stevel /* Do not try to add any records to the DB when stale. */ 2184 0 stevel if (md_get_setstatus(setno) & MD_SET_STALE) 2185 0 stevel return (-1); 2186 0 stevel drvrid = md_setshared_name(setno, name, 0L); 2187 0 stevel } 2188 0 stevel 2189 0 stevel if (drvrid < 0) 2190 0 stevel return (-1); 2191 0 stevel 2192 0 stevel /* open and import the md_ops of the submodules */ 2193 0 stevel mod = ddi_modopen(name, KRTLD_MODE_FIRST, &err); 2194 0 stevel if (mod == NULL) { 2195 0 stevel cmn_err(CE_WARN, "md_loadsubmod: " 2196 0 stevel "unable to ddi_modopen %s, error %d\n", name, err); 2197 0 stevel return (-1); 2198 0 stevel } 2199 0 stevel pops = ddi_modsym(mod, "md_interface_ops", &err); 2200 0 stevel if (pops == NULL) { 2201 0 stevel cmn_err(CE_WARN, "md_loadsubmod: " 2202 0 stevel "unable to import md_interface_ops from %s, error %d\n", 2203 0 stevel name, err); 2204 0 stevel (void) ddi_modclose(mod); 2205 0 stevel return (-1); 2206 0 stevel } 2207 0 stevel 2208 0 stevel /* ddi_modsym returns pointer to md_interface_ops in submod */ 2209 0 stevel ops = *pops; 2210 0 stevel 2211 0 stevel /* initialize */ 2212 0 stevel ops->md_selfindex = i; 2213 0 stevel rw_init(&ops->md_link_rw.lock, NULL, RW_DEFAULT, NULL); 2214 0 stevel (void) strncpy(ops->md_driver.md_drivername, name, 2215 0 stevel MD_DRIVERNAMELEN); 2216 0 stevel 2217 0 stevel /* plumb */ 2218 0 stevel md_ops[i] = ops; 2219 0 stevel md_mods[i] = mod; 2220 0 stevel ops->md_next = md_opslist; 2221 0 stevel md_opslist = ops; 2222 0 stevel 2223 0 stevel /* return index */ 2224 0 stevel return (i); 2225 0 stevel } 2226 0 stevel 2227 0 stevel int 2228 0 stevel md_getmodindex(md_driver_t *driver, int dont_load, int db_notrequired) 2229 0 stevel { 2230 0 stevel int i; 2231 0 stevel int modindex; 2232 0 stevel char *name = driver->md_drivername; 2233 0 stevel set_t setno = driver->md_setno; 2234 0 stevel int drvid; 2235 0 stevel int local_dont_load; 2236 0 stevel 2237 0 stevel if (setno >= md_nsets) 2238 0 stevel return (-1); 2239 0 stevel 2240 0 stevel for (i = 0; name[i] != 0; i++) 2241 0 stevel if (i == (MD_DRIVERNAMELEN -1)) 2242 0 stevel return (-1); 2243 0 stevel 2244 0 stevel /* 2245 0 stevel * If set is STALE, set local_dont_load to 1 since no records 2246 0 stevel * should be added to DB when stale. 2247 0 stevel */ 2248 0 stevel if (md_get_setstatus(setno) & MD_SET_STALE) { 2249 0 stevel local_dont_load = 1; 2250 0 stevel } else { 2251 0 stevel local_dont_load = dont_load; 2252 0 stevel } 2253 0 stevel 2254 0 stevel /* 2255 0 stevel * Single thread ioctl module binding with respect to 2256 0 stevel * similar code executed in md_loadsubmod that is called 2257 0 stevel * from md_snarf_db_set (which is where that path does 2258 0 stevel * its md_haltsnarf_enter call). 2259 0 stevel */ 2260 0 stevel md_haltsnarf_enter(setno); 2261 0 stevel 2262 0 stevel /* See if the submodule is already ddi_modopened. */ 2263 0 stevel for (i = 0; md_ops[i] != NULL; i++) { 2264 0 stevel if (strncmp(name, md_ops[i]->md_driver.md_drivername, 2265 0 stevel MD_DRIVERNAMELEN) == 0) { 2266 0 stevel if (! local_dont_load && 2267 0 stevel (md_getshared_key(setno, name) == MD_KEYBAD)) { 2268 0 stevel if (md_setshared_name(setno, name, 0L) 2269 0 stevel == MD_KEYBAD) { 2270 0 stevel if (!db_notrequired) 2271 0 stevel goto err; 2272 0 stevel } 2273 0 stevel } 2274 0 stevel md_haltsnarf_exit(setno); 2275 0 stevel return (i); 2276 0 stevel } 2277 0 stevel 2278 0 stevel if (i == (MD_NOPS -1)) 2279 0 stevel break; 2280 0 stevel } 2281 0 stevel 2282 0 stevel if (local_dont_load) 2283 0 stevel goto err; 2284 0 stevel 2285 0 stevel drvid = ((db_notrequired) ? 0 : (int)md_getshared_key(setno, name)); 2286 0 stevel 2287 0 stevel /* ddi_modopen the submodule */ 2288 0 stevel modindex = md_loadsubmod(setno, name, drvid); 2289 0 stevel if (modindex < 0) 2290 0 stevel goto err; 2291 0 stevel 2292 0 stevel if (md_ops[modindex]->md_snarf != NULL) 2293 0 stevel (*(md_ops[modindex]->md_snarf))(MD_SNARF_DOIT, setno); 2294 0 stevel 2295 0 stevel md_haltsnarf_exit(setno); 2296 0 stevel return (modindex); 2297 0 stevel 2298 0 stevel err: md_haltsnarf_exit(setno); 2299 0 stevel return (-1); 2300 0 stevel } 2301 0 stevel 2302 0 stevel void 2303 0 stevel md_call_strategy(buf_t *bp, int flags, void *private) 2304 0 stevel { 2305 0 stevel mdi_unit_t *ui; 2306 0 stevel 2307 0 stevel if (mdv_strategy_tstpnt) 2308 0 stevel if ((*mdv_strategy_tstpnt)(bp, flags, private) != 0) 2309 0 stevel return; 2310 0 stevel if (getmajor(bp->b_edev) != md_major) { 2311 0 stevel (void) bdev_strategy(bp); 2312 0 stevel return; 2313 0 stevel } 2314 0 stevel 2315 0 stevel flags = (flags & MD_STR_PASSEDON) | MD_STR_NOTTOP; 2316 0 stevel ui = MDI_UNIT(getminor(bp->b_edev)); 2317 0 stevel ASSERT(ui != NULL); 2318 0 stevel (*md_ops[ui->ui_opsindex]->md_strategy)(bp, flags, private); 2319 0 stevel } 2320 0 stevel 2321 0 stevel /* 2322 0 stevel * md_call_ioctl: 2323 0 stevel * ------------- 2324 0 stevel * Issue the specified ioctl to the device associated with the given md_dev64_t 2325 0 stevel * 2326 0 stevel * Arguments: 2327 0 stevel * dev - underlying device [md_dev64_t] 2328 0 stevel * cmd - ioctl to perform 2329 0 stevel * data - arguments / result location 2330 0 stevel * mode - read/write/layered ioctl 2331 0 stevel * lockp - lock reference 2332 0 stevel * 2333 0 stevel * Returns: 2334 0 stevel * 0 success 2335 0 stevel * !=0 Failure (error code) 2336 0 stevel */ 2337 0 stevel int 2338 0 stevel md_call_ioctl(md_dev64_t dev, int cmd, void *data, int mode, IOLOCK *lockp) 2339 0 stevel { 2340 0 stevel dev_t device = md_dev64_to_dev(dev); 2341 0 stevel int rval; 2342 0 stevel mdi_unit_t *ui; 2343 0 stevel 2344 0 stevel /* 2345 0 stevel * See if device is a metadevice. If not call cdev_ioctl(), otherwise 2346 0 stevel * call the ioctl entry-point in the metadevice. 2347 0 stevel */ 2348 0 stevel if (md_getmajor(dev) != md_major) { 2349 0 stevel int rv; 2350 0 stevel rval = cdev_ioctl(device, cmd, (intptr_t)data, mode, 2351 0 stevel ddi_get_cred(), &rv); 2352 0 stevel } else { 2353 0 stevel ui = MDI_UNIT(md_getminor(dev)); 2354 0 stevel ASSERT(ui != NULL); 2355 0 stevel rval = (*md_ops[ui->ui_opsindex]->md_ioctl)(device, cmd, data, 2356 0 stevel mode, lockp); 2357 0 stevel } 2358 0 stevel return (rval); 2359 0 stevel } 2360 0 stevel 2361 0 stevel void 2362 0 stevel md_rem_link(set_t setno, int id, krwlock_t *rw, md_link_t **head) 2363 0 stevel { 2364 0 stevel md_link_t *next; 2365 0 stevel md_link_t **pprev; 2366 0 stevel 2367 0 stevel rw_enter(rw, RW_WRITER); 2368 0 stevel 2369 0 stevel next = *head; 2370 0 stevel pprev = head; 2371 0 stevel while (next) { 2372 0 stevel if ((next->ln_setno == setno) && (next->ln_id == id)) { 2373 0 stevel *pprev = next->ln_next; 2374 0 stevel rw_exit(rw); 2375 0 stevel return; 2376 0 stevel } 2377 0 stevel pprev = &next->ln_next; 2378 0 stevel next = next->ln_next; 2379 0 stevel } 2380 0 stevel 2381 0 stevel rw_exit(rw); 2382 0 stevel } 2383 0 stevel 2384 0 stevel int 2385 0 stevel md_dev_exists(md_dev64_t dev) 2386 0 stevel { 2387 0 stevel 2388 0 stevel if (dev == NODEV64) 2389 0 stevel return (0); 2390 0 stevel 2391 0 stevel if (strcmp(ddi_major_to_name(md_getmajor(dev)), "md") != 0) 2392 0 stevel return (1); 2393 0 stevel 2394 0 stevel if ((MD_MIN2SET(md_getminor(dev)) >= md_nsets) || 2395 0 stevel (MD_MIN2UNIT(md_getminor(dev)) >= md_nunits)) 2396 0 stevel return (0); 2397 0 stevel 2398 0 stevel if (MDI_UNIT(md_getminor(dev)) != NULL) 2399 0 stevel return (1); 2400 0 stevel 2401 0 stevel return (0); 2402 0 stevel } 2403 0 stevel 2404 0 stevel md_parent_t 2405 0 stevel md_get_parent(md_dev64_t dev) 2406 0 stevel { 2407 0 stevel md_unit_t *un; 2408 0 stevel mdi_unit_t *ui; 2409 0 stevel md_parent_t parent; 2410 0 stevel 2411 0 stevel if (md_getmajor(dev) != md_major) 2412 0 stevel return (MD_NO_PARENT); 2413 0 stevel 2414 0 stevel ui = MDI_UNIT(md_getminor(dev)); 2415 0 stevel 2416 0 stevel un = (md_unit_t *)md_unit_readerlock(ui); 2417 0 stevel parent = un->c.un_parent; 2418 0 stevel md_unit_readerexit(ui); 2419 0 stevel 2420 0 stevel return (parent); 2421 0 stevel } 2422 0 stevel 2423 0 stevel void 2424 0 stevel md_set_parent(md_dev64_t dev, md_parent_t parent) 2425 0 stevel { 2426 0 stevel md_unit_t *un; 2427 0 stevel mdi_unit_t *ui; 2428 0 stevel 2429 0 stevel if (md_getmajor(dev) != md_major) 2430 0 stevel return; 2431 0 stevel 2432 0 stevel ui = MDI_UNIT(md_getminor(dev)); 2433 0 stevel 2434 0 stevel un = (md_unit_t *)md_unit_readerlock(ui); 2435 0 stevel un->c.un_parent = parent; 2436 0 stevel md_unit_readerexit(ui); 2437 0 stevel } 2438 0 stevel 2439 0 stevel void 2440 0 stevel md_reset_parent(md_dev64_t dev) 2441 0 stevel { 2442 0 stevel md_unit_t *un; 2443 0 stevel mdi_unit_t *ui; 2444 0 stevel 2445 0 stevel if (md_getmajor(dev) != md_major) 2446 0 stevel return; 2447 0 stevel 2448 0 stevel ui = MDI_UNIT(md_getminor(dev)); 2449 0 stevel 2450 0 stevel un = (md_unit_t *)md_unit_readerlock(ui); 2451 0 stevel un->c.un_parent = MD_NO_PARENT; 2452 0 stevel md_unit_readerexit(ui); 2453 0 stevel } 2454 0 stevel 2455 0 stevel 2456 0 stevel static intptr_t (*hot_spare_interface)() = (intptr_t (*)())NULL; 2457 0 stevel 2458 0 stevel int 2459 0 stevel md_hot_spare_ifc( 2460 0 stevel hs_cmds_t cmd, 2461 0 stevel mddb_recid_t id, 2462 0 stevel u_longlong_t size, 2463 0 stevel int labeled, 2464 0 stevel mddb_recid_t *hs_id, 2465 0 stevel mdkey_t *key, 2466 0 stevel md_dev64_t *dev, 2467 0 stevel diskaddr_t *sblock) 2468 0 stevel { 2469 0 stevel int err; 2470 0 stevel 2471 0 stevel /* 2472 0 stevel * RW lock on hot_spare_interface. We don't want it to change from 2473 0 stevel * underneath us. If hot_spare_interface is NULL we're going to 2474 0 stevel * need to set it. So we need to upgrade to a WRITER lock. If that 2475 0 stevel * doesn't work, we drop the lock and reenter as WRITER. This leaves 2476 0 stevel * a small hole during which hot_spare_interface could be modified 2477 0 stevel * so we check it for NULL again. What a pain. Then if still null 2478 0 stevel * load from md_get_named_service. 2479 0 stevel */ 2480 0 stevel 2481 0 stevel rw_enter(&hsp_rwlp.lock, RW_READER); 2482 0 stevel if (hot_spare_interface == NULL) { 2483 0 stevel if (rw_tryupgrade(&hsp_rwlp.lock) == 0) { 2484 0 stevel rw_exit(&hsp_rwlp.lock); 2485 0 stevel rw_enter(&hsp_rwlp.lock, RW_WRITER); 2486 0 stevel if (hot_spare_interface != NULL) { 2487 0 stevel err = ((*hot_spare_interface) 2488 0 stevel (cmd, id, size, labeled, hs_id, key, dev, 2489 0 stevel sblock)); 2490 0 stevel rw_exit(&hsp_rwlp.lock); 2491 0 stevel return (err); 2492 0 stevel } 2493 0 stevel } 2494 0 stevel hot_spare_interface = md_get_named_service(NODEV64, ANY_SERVICE, 2495 0 stevel "hot spare interface", 0); 2496 0 stevel rw_downgrade(&hsp_rwlp.lock); 2497 0 stevel } 2498 0 stevel 2499 0 stevel if (hot_spare_interface == NULL) { 2500 0 stevel cmn_err(CE_WARN, "md: no hotspare interface"); 2501 0 stevel rw_exit(&hsp_rwlp.lock); 2502 0 stevel return (0); 2503 0 stevel } 2504 0 stevel 2505 0 stevel err = ((*hot_spare_interface) 2506 0 stevel (cmd, id, size, labeled, hs_id, key, dev, sblock)); 2507 0 stevel rw_exit(&hsp_rwlp.lock); 2508 0 stevel return (err); 2509 0 stevel } 2510 0 stevel 2511 0 stevel void 2512 0 stevel md_clear_hot_spare_interface() 2513 0 stevel { 2514 0 stevel rw_enter(&hsp_rwlp.lock, RW_WRITER); 2515 0 stevel hot_spare_interface = NULL; 2516 0 stevel rw_exit(&hsp_rwlp.lock); 2517 0 stevel } 2518 0 stevel 2519 0 stevel 2520 0 stevel static intptr_t (*notify_interface)() = (intptr_t (*)())NULL; 2521 0 stevel 2522 0 stevel int 2523 0 stevel md_notify_interface( 2524 0 stevel md_event_cmds_t cmd, 2525 0 stevel md_tags_t tag, 2526 0 stevel set_t set, 2527 0 stevel md_dev64_t dev, 2528 0 stevel md_event_type_t event 2529 0 stevel ) 2530 0 stevel { 2531 0 stevel int err; 2532 0 stevel 2533 0 stevel if (md_event_queue == NULL) 2534 0 stevel return (0); 2535 0 stevel rw_enter(&ni_rwlp.lock, RW_READER); 2536 0 stevel if (notify_interface == NULL) { 2537 0 stevel if (rw_tryupgrade(&ni_rwlp.lock) == 0) { 2538 0 stevel rw_exit(&ni_rwlp.lock); 2539 0 stevel rw_enter(&ni_rwlp.lock, RW_WRITER); 2540 0 stevel if (notify_interface != NULL) { 2541 0 stevel err = ((*notify_interface) 2542 0 stevel (cmd, tag, set, dev, event)); 2543 0 stevel rw_exit(&ni_rwlp.lock); 2544 0 stevel return (err); 2545 0 stevel } 2546 0 stevel } 2547 0 stevel notify_interface = md_get_named_service(NODEV64, ANY_SERVICE, 2548 0 stevel "notify interface", 0); 2549 0 stevel rw_downgrade(&ni_rwlp.lock); 2550 0 stevel } 2551 0 stevel if (notify_interface == NULL) { 2552 0 stevel cmn_err(CE_WARN, "md: no notify interface"); 2553 0 stevel rw_exit(&ni_rwlp.lock); 2554 0 stevel return (0); 2555 0 stevel } 2556 0 stevel err = ((*notify_interface)(cmd, tag, set, dev, event)); 2557 0 stevel rw_exit(&ni_rwlp.lock); 2558 0 stevel return (err); 2559 0 stevel } 2560 0 stevel 2561 0 stevel char * 2562 0 stevel obj2devname(uint32_t tag, uint_t setno, md_dev64_t dev) 2563 0 stevel { 2564 0 stevel char *setname; 2565 0 stevel char name[MD_MAX_CTDLEN]; 2566 0 stevel minor_t mnum = md_getminor(dev); 2567 0 stevel major_t maj = md_getmajor(dev); 2568 0 stevel int rtn = 0; 2569 0 stevel 2570 0 stevel /* 2571 0 stevel * Verify that the passed dev_t refers to a valid metadevice. 2572 0 stevel * If it doesn't we can make no assumptions as to what the device 2573 0 stevel * name is. Return NULL in these cases. 2574 0 stevel */ 2575 0 stevel if (((maj != md_major) || (MD_MIN2UNIT(mnum) >= md_nunits)) || 2576 0 stevel (MD_MIN2SET(mnum) >= md_nsets)) { 2577 0 stevel return (NULL); 2578 0 stevel } 2579 0 stevel 2580 0 stevel setname = NULL; 2581 0 stevel name[0] = '\0'; 2582 0 stevel switch (tag) { 2583 0 stevel case SVM_TAG_HSP: 2584 0 stevel if (setno == 0) { 2585 0 stevel rtn = snprintf(name, sizeof (name), "hsp%u", 2586 0 stevel (unsigned)MD_MIN2UNIT(mnum)); 2587 0 stevel } else { 2588 0 stevel setname = mddb_getsetname(setno); 2589 0 stevel if (setname != NULL) { 2590 0 stevel rtn = snprintf(name, sizeof (name), "%s/hsp%u", 2591 0 stevel setname, (unsigned)MD_MIN2UNIT(mnum)); 2592 0 stevel } 2593 0 stevel } 2594 0 stevel break; 2595 0 stevel case SVM_TAG_DRIVE: 2596 0 stevel (void) sprintf(name, "drive"); 2597 0 stevel break; 2598 0 stevel case SVM_TAG_HOST: 2599 0 stevel (void) sprintf(name, "host"); 2600 0 stevel break; 2601 0 stevel case SVM_TAG_SET: 2602 0 stevel rtn = snprintf(name, sizeof (name), "%s", 2603 0 stevel mddb_getsetname(setno)); 2604 0 stevel if ((name[0] == '\0') || (rtn >= sizeof (name))) { 2605 0 stevel (void) sprintf(name, "diskset"); 2606 0 stevel rtn = 0; 2607 0 stevel } 2608 0 stevel break; 2609 0 stevel default: 2610 0 stevel rtn = snprintf(name, sizeof (name), "%s", md_shortname(mnum)); 2611 0 stevel break; 2612 0 stevel } 2613 0 stevel 2614 0 stevel /* Check if we got any rubbish for any of the snprintf's */ 2615 0 stevel if ((name[0] == '\0') || (rtn >= sizeof (name))) { 2616 0 stevel return (NULL); 2617 0 stevel } 2618 0 stevel 2619 0 stevel return (md_strdup(name)); 2620 0 stevel } 2621 0 stevel 2622 0 stevel /* Sysevent subclass and mdnotify event type pairs */ 2623 0 stevel struct node { 2624 0 stevel char *se_ev; 2625 0 stevel md_event_type_t md_ev; 2626 0 stevel }; 2627 0 stevel 2628 0 stevel /* 2629 0 stevel * Table must be sorted in case sensitive ascending order of 2630 0 stevel * the sysevents values 2631 0 stevel */ 2632 0 stevel static struct node ev_table[] = { 2633 0 stevel { ESC_SVM_ADD, EQ_ADD }, 2634 0 stevel { ESC_SVM_ATTACH, EQ_ATTACH }, 2635 0 stevel { ESC_SVM_ATTACHING, EQ_ATTACHING }, 2636 0 stevel { ESC_SVM_CHANGE, EQ_CHANGE }, 2637 0 stevel { ESC_SVM_CREATE, EQ_CREATE }, 2638 0 stevel { ESC_SVM_DELETE, EQ_DELETE }, 2639 0 stevel { ESC_SVM_DETACH, EQ_DETACH }, 2640 0 stevel { ESC_SVM_DETACHING, EQ_DETACHING }, 2641 0 stevel { ESC_SVM_DRIVE_ADD, EQ_DRIVE_ADD }, 2642 0 stevel { ESC_SVM_DRIVE_DELETE, EQ_DRIVE_DELETE }, 2643 0 stevel { ESC_SVM_ENABLE, EQ_ENABLE }, 2644 0 stevel { ESC_SVM_ERRED, EQ_ERRED }, 2645 0 stevel { ESC_SVM_EXCHANGE, EQ_EXCHANGE }, 2646 0 stevel { ESC_SVM_GROW, EQ_GROW }, 2647 0 stevel { ESC_SVM_HS_CHANGED, EQ_HS_CHANGED }, 2648 0 stevel { ESC_SVM_HS_FREED, EQ_HS_FREED }, 2649 0 stevel { ESC_SVM_HOST_ADD, EQ_HOST_ADD }, 2650 0 stevel { ESC_SVM_HOST_DELETE, EQ_HOST_DELETE }, 2651 0 stevel { ESC_SVM_HOTSPARED, EQ_HOTSPARED }, 2652 0 stevel { ESC_SVM_INIT_FAILED, EQ_INIT_FAILED }, 2653 0 stevel { ESC_SVM_INIT_FATAL, EQ_INIT_FATAL }, 2654 0 stevel { ESC_SVM_INIT_START, EQ_INIT_START }, 2655 0 stevel { ESC_SVM_INIT_SUCCESS, EQ_INIT_SUCCESS }, 2656 0 stevel { ESC_SVM_IOERR, EQ_IOERR }, 2657 0 stevel { ESC_SVM_LASTERRED, EQ_LASTERRED }, 2658 0 stevel { ESC_SVM_MEDIATOR_ADD, EQ_MEDIATOR_ADD }, 2659 0 stevel { ESC_SVM_MEDIATOR_DELETE, EQ_MEDIATOR_DELETE }, 2660 0 stevel { ESC_SVM_OFFLINE, EQ_OFFLINE }, 2661 0 stevel { ESC_SVM_OK, EQ_OK }, 2662 0 stevel { ESC_SVM_ONLINE, EQ_ONLINE }, 2663 0 stevel { ESC_SVM_OPEN_FAIL, EQ_OPEN_FAIL }, 2664 0 stevel { ESC_SVM_REGEN_DONE, EQ_REGEN_DONE }, 2665 0 stevel { ESC_SVM_REGEN_FAILED, EQ_REGEN_FAILED }, 2666 0 stevel { ESC_SVM_REGEN_START, EQ_REGEN_START }, 2667 0 stevel { ESC_SVM_RELEASE, EQ_RELEASE }, 2668 0 stevel { ESC_SVM_REMOVE, EQ_REMOVE }, 2669 0 stevel { ESC_SVM_RENAME_DST, EQ_RENAME_DST }, 2670 0 stevel { ESC_SVM_RENAME_SRC, EQ_RENAME_SRC }, 2671 0 stevel { ESC_SVM_REPLACE, EQ_REPLACE }, 2672 0 stevel { ESC_SVM_RESYNC_DONE, EQ_RESYNC_DONE }, 2673 0 stevel { ESC_SVM_RESYNC_FAILED, EQ_RESYNC_FAILED }, 2674 0 stevel { ESC_SVM_RESYNC_START, EQ_RESYNC_START }, 2675 0 stevel { ESC_SVM_RESYNC_SUCCESS, EQ_RESYNC_SUCCESS }, 2676 0 stevel { ESC_SVM_TAKEOVER, EQ_TAKEOVER } 2677 0 stevel }; 2678 0 stevel 2679 0 stevel static md_tags_t md_tags[] = { 2680 0 stevel TAG_UNK, 2681 0 stevel TAG_METADEVICE, 2682 0 stevel TAG_UNK, 2683 0 stevel TAG_UNK, 2684 0 stevel TAG_UNK, 2685 0 stevel TAG_UNK, 2686 0 stevel TAG_REPLICA, 2687 0 stevel TAG_HSP, 2688 0 stevel TAG_HS, 2689 0 stevel TAG_SET, 2690 0 stevel TAG_DRIVE, 2691 0 stevel TAG_HOST, 2692 0 stevel TAG_MEDIATOR 2693 0 stevel }; 2694 0 stevel 2695 0 stevel md_event_type_t 2696 0 stevel ev_get(char *subclass) 2697 0 stevel { 2698 0 stevel int high, mid, low, p; 2699 0 stevel 2700 0 stevel low = 0; 2701 0 stevel high = (sizeof (ev_table) / sizeof (ev_table[0])) - 1; 2702 0 stevel while (low <= high) { 2703 0 stevel mid = (high + low) / 2; 2704 0 stevel p = strcmp(subclass, ev_table[mid].se_ev); 2705 0 stevel if (p == 0) { 2706 0 stevel return (ev_table[mid].md_ev); 2707 0 stevel } else if (p < 0) { 2708 0 stevel high = mid - 1; 2709 0 stevel } else { 2710 0 stevel low = mid + 1; 2711 0 stevel } 2712 0 stevel } 2713 0 stevel 2714 0 stevel return (EQ_EMPTY); 2715 0 stevel } 2716 0 stevel 2717 0 stevel /* 2718 0 stevel * Log mdnotify event 2719 0 stevel */ 2720 0 stevel void 2721 0 stevel do_mdnotify(char *se_subclass, uint32_t tag, set_t setno, md_dev64_t devid) 2722 0 stevel { 2723 0 stevel md_event_type_t ev_type; 2724 0 stevel md_tags_t md_tag; 2725 0 stevel 2726 0 stevel /* Translate sysevent into mdnotify event */ 2727 0 stevel ev_type = ev_get(se_subclass); 2728 0 stevel 2729 0 stevel if (tag >= (sizeof (md_tags) / sizeof (md_tags[0]))) { 2730 0 stevel md_tag = TAG_UNK; 2731 0 stevel } else { 2732 0 stevel md_tag = md_tags[tag]; 2733 0 stevel } 2734 0 stevel 2735 0 stevel NOTIFY_MD(md_tag, setno, devid, ev_type); 2736 0 stevel } 2737 0 stevel 2738 0 stevel /* 2739 0 stevel * Log SVM sys events 2740 0 stevel */ 2741 0 stevel void 2742 0 stevel svm_gen_sysevent( 2743 0 stevel char *se_class, 2744 0 stevel char *se_subclass, 2745 0 stevel uint32_t tag, 2746 0 stevel set_t setno, 2747 0 stevel md_dev64_t devid 2748 0 stevel ) 2749 0 stevel { 2750 0 stevel nvlist_t *attr_list; 2751 0 stevel sysevent_id_t eid; 2752 0 stevel int err = DDI_SUCCESS; 2753 0 stevel char *devname; 2754 0 stevel extern dev_info_t *md_devinfo; 2755 0 stevel 2756 0 stevel /* Raise the mdnotify event before anything else */ 2757 0 stevel do_mdnotify(se_subclass, tag, setno, devid); 2758 0 stevel 2759 0 stevel if (md_devinfo == NULL) { 2760 0 stevel return; 2761 0 stevel } 2762 0 stevel 2763 0 stevel err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME, KM_NOSLEEP); 2764 0 stevel 2765 0 stevel if (err == DDI_SUCCESS) { 2766 0 stevel /* Add the version numver */ 2767 0 stevel err = nvlist_add_uint32(attr_list, SVM_VERSION_NO, 2768 0 stevel (uint32_t)SVM_VERSION); 2769 0 stevel if (err != DDI_SUCCESS) { 2770 0 stevel goto fail; 2771 0 stevel } 2772 0 stevel 2773 0 stevel /* Add the tag attribute */ 2774 0 stevel err = nvlist_add_uint32(attr_list, SVM_TAG, (uint32_t)tag); 2775 0 stevel if (err != DDI_SUCCESS) { 2776 0 stevel goto fail; 2777 0 stevel } 2778 0 stevel 2779 0 stevel /* Add the set number attribute */ 2780 0 stevel err = nvlist_add_uint32(attr_list, SVM_SET_NO, (uint32_t)setno); 2781 0 stevel if (err != DDI_SUCCESS) { 2782 0 stevel goto fail; 2783 0 stevel } 2784 0 stevel 2785 0 stevel /* Add the device id attribute */ 2786 0 stevel err = nvlist_add_uint64(attr_list, SVM_DEV_ID, (uint64_t)devid); 2787 0 stevel if (err != DDI_SUCCESS) { 2788 0 stevel goto fail; 2789 0 stevel } 2790 0 stevel 2791 0 stevel /* Add the device name attribute */ 2792 0 stevel devname = obj2devname(tag, setno, devid); 2793 0 stevel if (devname != NULL) { 2794 0 stevel err = nvlist_add_string(attr_list, SVM_DEV_NAME, 2795 0 stevel devname); 2796 0 stevel freestr(devname); 2797 0 stevel } else { 2798 0 stevel err = nvlist_add_string(attr_list, SVM_DEV_NAME, 2799 0 stevel "unspecified"); 2800 0 stevel } 2801 0 stevel if (err != DDI_SUCCESS) { 2802 0 stevel goto fail; 2803 0 stevel } 2804 0 stevel 2805 0 stevel /* Attempt to post event */ 2806 0 stevel err = ddi_log_sysevent(md_devinfo, DDI_VENDOR_SUNW, se_class, 2807 0 stevel se_subclass, attr_list, &eid, DDI_SLEEP); 2808 0 stevel 2809 0 stevel nvlist_free(attr_list); 2810 0 stevel if (err != DDI_SUCCESS) { 2811 0 stevel cmn_err(CE_WARN, "Failed to log event for %s, %s," 2812 0 stevel " err=%x", se_class, se_subclass, err); 2813 0 stevel } 2814 0 stevel } 2815 0 stevel 2816 0 stevel return; 2817 0 stevel 2818 0 stevel fail: 2819 0 stevel nvlist_free(attr_list); 2820 0 stevel cmn_err(CE_WARN, "Failed to setup attributes for event %s, %s, err=%x", 2821 0 stevel se_class, se_subclass, err); 2822 0 stevel } 2823 0 stevel 2824 0 stevel void 2825 0 stevel md_clear_named_service() 2826 0 stevel { 2827 0 stevel rw_enter(&ni_rwlp.lock, RW_WRITER); 2828 0 stevel notify_interface = NULL; 2829 0 stevel rw_exit(&ni_rwlp.lock); 2830 0 stevel } 2831 0 stevel 2832 0 stevel void 2833 0 stevel md_create_unit_incore(minor_t mnum, md_ops_t *ops, int alloc_lock) 2834 0 stevel { 2835 0 stevel mdi_unit_t *ui; 2836 0 stevel set_t setno = MD_MIN2SET(mnum); 2837 0 stevel 2838 0 stevel ui = (mdi_unit_t *)kmem_zalloc(sizeof (mdi_unit_t), KM_SLEEP); 2839 0 stevel ui->ui_opsindex = ops->md_selfindex; 2840 0 stevel 2841 0 stevel /* initialize all the incore conditional variables */ 2842 0 stevel mutex_init(&ui->ui_mx, NULL, MUTEX_DEFAULT, NULL); 2843 0 stevel cv_init(&ui->ui_cv, NULL, CV_DEFAULT, NULL); 2844 0 stevel 2845 0 stevel if (alloc_lock) { 2846 0 stevel ui->ui_io_lock = kmem_zalloc(sizeof (md_io_lock_t), KM_SLEEP); 2847 0 stevel mutex_init(&ui->ui_io_lock->io_mx, NULL, MUTEX_DEFAULT, NULL); 2848 0 stevel cv_init(&ui->ui_io_lock->io_cv, NULL, CV_DEFAULT, NULL); 2849 0 stevel mutex_init(&ui->ui_io_lock->io_list_mutex, NULL, 2850 0 stevel MUTEX_DEFAULT, NULL); 2851 0 stevel ui->ui_io_lock->io_list_front = NULL; 2852 0 stevel ui->ui_io_lock->io_list_back = NULL; 2853 0 stevel } 2854 8452 John if (! (md_get_setstatus(setno) & MD_SET_SNARFING)) { 2855 8452 John rw_enter(&md_unit_array_rw.lock, RW_WRITER); 2856 8452 John MDI_VOIDUNIT(mnum) = (void *) ui; 2857 8452 John rw_exit(&md_unit_array_rw.lock); 2858 8452 John } else 2859 8452 John MDI_VOIDUNIT(mnum) = (void *) ui; 2860 8452 John 2861 8452 John rw_enter(&ops->md_link_rw.lock, RW_WRITER); 2862 8452 John ui->ui_link.ln_next = ops->md_head; 2863 8452 John ui->ui_link.ln_setno = setno; 2864 8452 John ui->ui_link.ln_id = mnum; 2865 8452 John ops->md_head = &ui->ui_link; 2866 0 stevel /* setup the unavailable field */ 2867 0 stevel #if defined(_ILP32) 2868 1623 tw21770 if (((md_unit_t *)MD_UNIT(mnum))->c.un_revision & MD_64BIT_META_DEV) { 2869 0 stevel ui->ui_tstate |= MD_64MD_ON_32KERNEL; 2870 0 stevel cmn_err(CE_NOTE, "d%d is unavailable because 64 bit " 2871 0 stevel "metadevices are not accessible on a 32 bit kernel", 2872 0 stevel mnum); 2873 0 stevel } 2874 0 stevel #endif 2875 0 stevel 2876 0 stevel rw_exit(&ops->md_link_rw.lock); 2877 0 stevel } 2878 0 stevel 2879 0 stevel void 2880 0 stevel md_destroy_unit_incore(minor_t mnum, md_ops_t *ops) 2881 0 stevel { 2882 0 stevel mdi_unit_t *ui; 2883 0 stevel 2884 0 stevel /* 2885 0 stevel * ASSUMPTION: md_unit_array_rw WRITER lock is held. 2886 0 stevel */ 2887 0 stevel ui = MDI_UNIT(mnum); 2888 0 stevel if (ui == NULL) 2889 0 stevel return; 2890 0 stevel 2891 0 stevel md_rem_link(MD_MIN2SET(mnum), mnum, &ops->md_link_rw.lock, 2892 0 stevel &ops->md_head); 2893 0 stevel 2894 0 stevel /* destroy the io lock if one is being used */ 2895 0 stevel if (ui->ui_io_lock) { 2896 0 stevel mutex_destroy(&ui->ui_io_lock->io_mx); 2897 0 stevel cv_destroy(&ui->ui_io_lock->io_cv); 2898 0 stevel kmem_free(ui->ui_io_lock, sizeof (md_io_lock_t)); 2899 0 stevel } 2900 0 stevel 2901 0 stevel /* teardown kstat */ 2902 0 stevel md_kstat_destroy(mnum); 2903 0 stevel 2904 0 stevel /* destroy all the incore conditional variables */ 2905 0 stevel mutex_destroy(&ui->ui_mx); 2906 0 stevel cv_destroy(&ui->ui_cv); 2907 0 stevel 2908 0 stevel kmem_free(ui, sizeof (mdi_unit_t)); 2909 0 stevel MDI_VOIDUNIT(mnum) = (void *) NULL; 2910 0 stevel } 2911 0 stevel 2912 0 stevel void 2913 0 stevel md_rem_names(sv_dev_t *sv, int nsv) 2914 0 stevel { 2915 0 stevel int i, s; 2916 0 stevel int max_sides; 2917 0 stevel 2918 0 stevel if (nsv == 0) 2919 0 stevel return; 2920 0 stevel 2921 0 stevel /* All entries removed are in the same diskset */ 2922 0 stevel if (md_get_setstatus(sv[0].setno) & MD_SET_MNSET) 2923 0 stevel max_sides = MD_MNMAXSIDES; 2924 0 stevel else 2925 0 stevel max_sides = MD_MAXSIDES; 2926 0 stevel 2927 0 stevel for (i = 0; i < nsv; i++) 2928 0 stevel for (s = 0; s < max_sides; s++) 2929 0 stevel (void) md_remdevname(sv[i].setno, s, sv[i].key); 2930 0 stevel } 2931 0 stevel 2932 0 stevel /* 2933 0 stevel * Checking user args before we get into physio - returns 0 for ok, else errno 2934 0 stevel * We do a lot of checking against illegal arguments here because some of the 2935 0 stevel * real disk drivers don't like certain kinds of arguments. (e.g xy doesn't 2936 0 stevel * like odd address user buffer.) Those drivers capture bad arguments in 2937 0 stevel * xxread and xxwrite. But since meta-driver calls their strategy routines 2938 0 stevel * directly, two bad scenario might happen: 2939 0 stevel * 1. the real strategy doesn't like it and panic. 2940 0 stevel * 2. the real strategy doesn't like it and set B_ERROR. 2941 0 stevel * 2942 0 stevel * The second case is no better than the first one, since the meta-driver 2943 0 stevel * will treat it as a media-error and off line the mirror metapartition. 2944 0 stevel * (Too bad there is no way to tell what error it is.) 2945 0 stevel * 2946 0 stevel */ 2947 0 stevel int 2948 0 stevel md_chk_uio(struct uio *uio) 2949 0 stevel { 2950 0 stevel int i; 2951 0 stevel struct iovec *iov; 2952 0 stevel 2953 0 stevel /* 2954 0 stevel * Check for negative or not block-aligned offset 2955 0 stevel */ 2956 0 stevel if ((uio->uio_loffset < 0) || 2957 0 stevel ((uio->uio_loffset & (DEV_BSIZE - 1)) != 0)) { 2958 0 stevel return (EINVAL); 2959 0 stevel } 2960 0 stevel iov = uio->uio_iov; 2961 0 stevel i = uio->uio_iovcnt; 2962 0 stevel 2963 0 stevel while (i--) { 2964 0 stevel if ((iov->iov_len & (DEV_BSIZE - 1)) != 0) 2965 0 stevel return (EINVAL); 2966 0 stevel /* 2967 0 stevel * Bug # 1212146 2968 0 stevel * The default is to not check alignment, but we can now check 2969 0 stevel * for a larger number of alignments if desired. 2970 0 stevel */ 2971 0 stevel if ((uintptr_t)(iov->iov_base) & md_uio_alignment_mask) 2972 0 stevel return (EINVAL); 2973 0 stevel iov++; 2974 0 stevel } 2975 0 stevel return (0); 2976 0 stevel } 2977 0 stevel 2978 0 stevel char * 2979 0 stevel md_shortname( 2980 0 stevel minor_t mnum 2981 0 stevel ) 2982 0 stevel { 2983 1623 tw21770 static char buf[MAXPATHLEN]; 2984 1623 tw21770 char *devname; 2985 1623 tw21770 char *invalid = " (Invalid minor number %u) "; 2986 1623 tw21770 char *metaname; 2987 1623 tw21770 mdc_unit_t *un; 2988 1623 tw21770 side_t side; 2989 0 stevel set_t setno = MD_MIN2SET(mnum); 2990 0 stevel unit_t unit = MD_MIN2UNIT(mnum); 2991 0 stevel 2992 1623 tw21770 if ((un = MD_UNIT(mnum)) == NULL) { 2993 1623 tw21770 (void) snprintf(buf, sizeof (buf), invalid, mnum); 2994 0 stevel return (buf); 2995 0 stevel } 2996 0 stevel 2997 1623 tw21770 /* 2998 1623 tw21770 * If unit is not a friendly name unit, derive the name from the 2999 1623 tw21770 * minor number. 3000 1623 tw21770 */ 3001 1623 tw21770 if ((un->un_revision & MD_FN_META_DEV) == 0) { 3002 1623 tw21770 /* This is a traditional metadevice */ 3003 1623 tw21770 if (setno == MD_LOCAL_SET) { 3004 1623 tw21770 (void) snprintf(buf, sizeof (buf), "d%u", 3005 7563 Prasad (unsigned)unit); 3006 1623 tw21770 } else { 3007 1623 tw21770 (void) snprintf(buf, sizeof (buf), "%s/d%u", 3008 1623 tw21770 mddb_getsetname(setno), (unsigned)unit); 3009 1623 tw21770 } 3010 1623 tw21770 return (buf); 3011 1623 tw21770 } 3012 1623 tw21770 3013 1623 tw21770 /* 3014 1623 tw21770 * It is a friendly name metadevice, so we need to get its name. 3015 1623 tw21770 */ 3016 1623 tw21770 side = mddb_getsidenum(setno); 3017 1623 tw21770 devname = (char *)kmem_alloc(MAXPATHLEN, KM_SLEEP); 3018 1623 tw21770 if (md_getdevname(setno, side, MD_KEYWILD, 3019 7563 Prasad md_makedevice(md_major, mnum), devname, MAXPATHLEN) == 0) { 3020 1623 tw21770 /* 3021 1623 tw21770 * md_getdevname has given us either /dev/md/dsk/<metaname> 3022 1623 tw21770 * or /dev/md/<setname>/dsk/<metname> depending on whether 3023 1623 tw21770 * or not we are in the local set. Thus, we'll pull the 3024 1623 tw21770 * metaname from this string. 3025 1623 tw21770 */ 3026 1623 tw21770 if ((metaname = strrchr(devname, '/')) == NULL) { 3027 1623 tw21770 (void) snprintf(buf, sizeof (buf), invalid, mnum); 3028 1623 tw21770 goto out; 3029 1623 tw21770 } 3030 1623 tw21770 metaname++; /* move past slash */ 3031 1623 tw21770 if (setno == MD_LOCAL_SET) { 3032 1623 tw21770 /* No set name. */ 3033 1623 tw21770 (void) snprintf(buf, sizeof (buf), "%s", metaname); 3034 1623 tw21770 } else { 3035 1623 tw21770 /* Include setname */ 3036 1623 tw21770 (void) snprintf(buf, sizeof (buf), "%s/%s", 3037 7563 Prasad mddb_getsetname(setno), metaname); 3038 1623 tw21770 } 3039 1623 tw21770 } else { 3040 1623 tw21770 /* We couldn't find the name. */ 3041 1623 tw21770 (void) snprintf(buf, sizeof (buf), invalid, mnum); 3042 1623 tw21770 } 3043 1623 tw21770 3044 1623 tw21770 out: 3045 1623 tw21770 kmem_free(devname, MAXPATHLEN); 3046 0 stevel return (buf); 3047 0 stevel } 3048 0 stevel 3049 0 stevel char * 3050 0 stevel md_devname( 3051 0 stevel set_t setno, 3052 0 stevel md_dev64_t dev, 3053 0 stevel char *buf, 3054 0 stevel size_t size 3055 0 stevel ) 3056 0 stevel { 3057 0 stevel static char mybuf[MD_MAX_CTDLEN]; 3058 0 stevel int err; 3059 0 stevel 3060 0 stevel if (buf == NULL) { 3061 0 stevel buf = mybuf; 3062 0 stevel size = sizeof (mybuf); 3063 0 stevel } else { 3064 0 stevel ASSERT(size >= MD_MAX_CTDLEN); 3065 0 stevel } 3066 0 stevel 3067 4491 jmf err = md_getdevname_common(setno, mddb_getsidenum(setno), 3068 7563 Prasad 0, dev, buf, size, MD_NOWAIT_LOCK); 3069 0 stevel if (err) { 3070 0 stevel if (err == ENOENT) { 3071 0 stevel (void) sprintf(buf, "(Unavailable)"); 3072 0 stevel } else { 3073 0 stevel (void) sprintf(buf, "(%u.%u)", 3074 0 stevel md_getmajor(dev), md_getminor(dev)); 3075 0 stevel } 3076 0 stevel } 3077 0 stevel 3078 0 stevel return (buf); 3079 0 stevel } 3080 0 stevel void 3081 0 stevel md_minphys(buf_t *pb) 3082 0 stevel { 3083 0 stevel extern unsigned md_maxbcount; 3084 0 stevel 3085 0 stevel if (pb->b_bcount > md_maxbcount) 3086 0 stevel pb->b_bcount = md_maxbcount; 3087 0 stevel } 3088 0 stevel 3089 0 stevel void 3090 0 stevel md_bioinit(struct buf *bp) 3091 0 stevel { 3092 0 stevel ASSERT(bp); 3093 0 stevel 3094 0 stevel bioinit(bp); 3095 0 stevel bp->b_back = bp; 3096 0 stevel bp->b_forw = bp; 3097 0 stevel bp->b_flags = B_BUSY; /* initialize flags */ 3098 0 stevel } 3099 0 stevel 3100 0 stevel void 3101 0 stevel md_bioreset(struct buf *bp) 3102 0 stevel { 3103 0 stevel ASSERT(bp); 3104 0 stevel 3105 0 stevel bioreset(bp); 3106 0 stevel bp->b_back = bp; 3107 0 stevel bp->b_forw = bp; 3108 0 stevel bp->b_flags = B_BUSY; /* initialize flags */ 3109 0 stevel } 3110 0 stevel 3111 0 stevel /* 3112 0 stevel * md_bioclone is needed as long as the real bioclone only takes a daddr_t 3113 0 stevel * as block number. 3114 0 stevel * We simply call bioclone with all input parameters but blkno, and set the 3115 0 stevel * correct blkno afterwards. 3116 0 stevel * Caveat Emptor: bp_mem must not be NULL! 3117 0 stevel */ 3118 0 stevel buf_t * 3119 0 stevel md_bioclone(buf_t *bp, off_t off, size_t len, dev_t dev, diskaddr_t blkno, 3120 0 stevel int (*iodone)(buf_t *), buf_t *bp_mem, int sleep) 3121 0 stevel { 3122 0 stevel (void) bioclone(bp, off, len, dev, 0, iodone, bp_mem, sleep); 3123 0 stevel bp_mem->b_lblkno = blkno; 3124 0 stevel return (bp_mem); 3125 0 stevel } 3126 0 stevel 3127 0 stevel 3128 0 stevel /* 3129 0 stevel * kstat stuff 3130 0 stevel */ 3131 0 stevel void 3132 0 stevel md_kstat_init_ui( 3133 0 stevel minor_t mnum, 3134 0 stevel mdi_unit_t *ui 3135 0 stevel ) 3136 0 stevel { 3137 0 stevel if ((ui != NULL) && (ui->ui_kstat == NULL)) { 3138 0 stevel set_t setno = MD_MIN2SET(mnum); 3139 0 stevel unit_t unit = MD_MIN2UNIT(mnum); 3140 0 stevel char module[KSTAT_STRLEN]; 3141 0 stevel char *p = module; 3142 0 stevel 3143 0 stevel if (setno != MD_LOCAL_SET) { 3144 0 stevel char buf[64]; 3145 0 stevel char *s = buf; 3146 0 stevel char *e = module + sizeof (module) - 4; 3147 0 stevel 3148 0 stevel (void) sprintf(buf, "%u", setno); 3149 0 stevel while ((p < e) && (*s != '\0')) 3150 0 stevel *p++ = *s++; 3151 0 stevel *p++ = '/'; 3152 0 stevel } 3153 0 stevel *p++ = 'm'; 3154 0 stevel *p++ = 'd'; 3155 0 stevel *p = '\0'; 3156 0 stevel if ((ui->ui_kstat = kstat_create(module, unit, NULL, "disk", 3157 0 stevel KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) != NULL) { 3158 0 stevel ui->ui_kstat->ks_lock = &ui->ui_mx; 3159 0 stevel kstat_install(ui->ui_kstat); 3160 0 stevel } 3161 0 stevel } 3162 0 stevel } 3163 0 stevel 3164 0 stevel void 3165 0 stevel md_kstat_init( 3166 0 stevel minor_t mnum 3167 0 stevel ) 3168 0 stevel { 3169 0 stevel md_kstat_init_ui(mnum, MDI_UNIT(mnum)); 3170 0 stevel } 3171 0 stevel 3172 0 stevel void 3173 0 stevel md_kstat_destroy_ui( 3174