1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/conf.h> 30 #include <sys/file.h> 31 #include <sys/user.h> 32 #include <sys/uio.h> 33 #include <sys/t_lock.h> 34 #include <sys/buf.h> 35 #include <sys/dkio.h> 36 #include <sys/vtoc.h> 37 #include <sys/kmem.h> 38 #include <vm/page.h> 39 #include <sys/sysmacros.h> 40 #include <sys/types.h> 41 #include <sys/mkdev.h> 42 #include <sys/stat.h> 43 #include <sys/open.h> 44 #include <sys/modctl.h> 45 #include <sys/ddi.h> 46 #include <sys/sunddi.h> 47 48 #include <sys/lvm/mdvar.h> 49 #include <sys/lvm/md_names.h> 50 #include <sys/lvm/md_mddb.h> 51 #include <sys/lvm/md_stripe.h> 52 #include <sys/lvm/md_mirror.h> 53 54 #include <sys/model.h> 55 56 #include <sys/sysevent/eventdefs.h> 57 #include <sys/sysevent/svm.h> 58 #include <sys/lvm/mdmn_commd.h> 59 60 extern int md_status; 61 extern kmutex_t md_mx; 62 extern kcondvar_t md_cv; 63 64 extern unit_t md_nunits; 65 extern set_t md_nsets; 66 extern md_set_t md_set[]; 67 68 extern md_ops_t mirror_md_ops; 69 extern int md_ioctl_cnt; 70 extern md_krwlock_t md_unit_array_rw; 71 extern major_t md_major; 72 extern mdq_anchor_t md_ff_daemonq; 73 extern void md_probe_one(); 74 extern void mirror_openfail_console_info(); 75 76 #ifdef DEBUG 77 extern int mirror_debug_flag; 78 #endif 79 80 static void 81 mirror_resume_writes(mm_unit_t *un) 82 { 83 /* 84 * Release the block on writes to the mirror and resume any blocked 85 * resync thread. 86 * This is only required for MN sets 87 */ 88 if (MD_MNSET_SETNO(MD_UN2SET(un))) { 89 #ifdef DEBUG 90 if (mirror_debug_flag) 91 printf("mirror_resume_writes: mnum %x\n", MD_SID(un)); 92 #endif 93 mutex_enter(&un->un_suspend_wr_mx); 94 un->un_suspend_wr_flag = 0; 95 cv_broadcast(&un->un_suspend_wr_cv); 96 mutex_exit(&un->un_suspend_wr_mx); 97 mutex_enter(&un->un_rs_thread_mx); 98 un->un_rs_thread_flags &= ~MD_RI_BLOCK; 99 cv_signal(&un->un_rs_thread_cv); 100 mutex_exit(&un->un_rs_thread_mx); 101 } 102 } 103 104 mm_unit_t * 105 mirror_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock) 106 { 107 mm_unit_t *un; 108 mdi_unit_t *ui; 109 set_t setno = MD_MIN2SET(mnum); 110 111 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { 112 (void) mdmderror(mde, MDE_INVAL_UNIT, mnum); 113 return (NULL); 114 } 115 116 if (!(flags & STALE_OK)) { 117 if (md_get_setstatus(setno) & MD_SET_STALE) { 118 (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno); 119 return (NULL); 120 } 121 } 122 123 ui = MDI_UNIT(mnum); 124 if (flags & NO_OLD) { 125 if (ui != NULL) { 126 (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum); 127 return (NULL); 128 } 129 return ((mm_unit_t *)1); 130 } 131 132 if (ui == NULL) { 133 (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum); 134 return (NULL); 135 } 136 137 if (flags & ARRAY_WRITER) 138 md_array_writer(lock); 139 else if (flags & ARRAY_READER) 140 md_array_reader(lock); 141 142 if (!(flags & NO_LOCK)) { 143 if (flags & WR_LOCK) 144 (void) md_ioctl_writerlock(lock, ui); 145 else /* RD_LOCK */ 146 (void) md_ioctl_readerlock(lock, ui); 147 } 148 un = (mm_unit_t *)MD_UNIT(mnum); 149 150 if (un->c.un_type != MD_METAMIRROR) { 151 (void) mdmderror(mde, MDE_NOT_MM, mnum); 152 return (NULL); 153 } 154 155 return (un); 156 } 157 158 static int 159 mirror_set( 160 void *d, 161 int mode 162 ) 163 { 164 minor_t mnum; 165 mm_unit_t *un; 166 mddb_recid_t recid; 167 mddb_type_t typ1; 168 int err; 169 int i; 170 set_t setno; 171 md_set_params_t *msp = d; 172 173 174 mnum = msp->mnum; 175 176 mdclrerror(&msp->mde); 177 178 if (mirror_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL) 179 return (0); 180 181 setno = MD_MIN2SET(mnum); 182 183 typ1 = (mddb_type_t)md_getshared_key(setno, 184 mirror_md_ops.md_driver.md_drivername); 185 186 /* 187 * Create the db record for this mdstruct 188 * We don't store incore elements ondisk 189 */ 190 191 if (msp->options & MD_CRO_64BIT) { 192 #if defined(_ILP32) 193 return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum)); 194 #else 195 recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC, 196 MD_CRO_64BIT | MD_CRO_MIRROR | MD_CRO_FN, setno); 197 #endif 198 } else { 199 /* 200 * It's important to use the correct size here 201 */ 202 msp->size = sizeof (mm_unit32_od_t); 203 recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC, 204 MD_CRO_32BIT | MD_CRO_MIRROR | MD_CRO_FN, setno); 205 } 206 if (recid < 0) 207 return (mddbstatus2error(&msp->mde, (int)recid, 208 mnum, setno)); 209 210 /* Resize to include incore fields */ 211 un = (mm_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*un), 0); 212 /* 213 * It is okay that we muck with the mdstruct here, 214 * since no one else will know about the mdstruct 215 * until we commit it. If we crash, the record will 216 * be automatically purged, since we haven't 217 * committed it yet. 218 */ 219 220 /* copy in the user's mdstruct */ 221 if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un, 222 (uint_t)msp->size, mode)) { 223 mddb_deleterec_wrapper(recid); 224 return (EFAULT); 225 } 226 /* All 64 bit metadevices only support EFI labels. */ 227 if (msp->options & MD_CRO_64BIT) { 228 un->c.un_flag |= MD_EFILABEL; 229 } 230 231 un->c.un_revision |= MD_FN_META_DEV; 232 MD_RECID(un) = recid; 233 MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_META_CHILD | MD_CAN_SP; 234 MD_PARENT(un) = MD_NO_PARENT; 235 236 for (i = 0; i < NMIRROR; i++) { 237 struct mm_submirror *sm; 238 239 sm = &un->un_sm[i]; 240 if (!SMS_IS(sm, SMS_INUSE)) 241 continue; 242 243 /* ensure that the submirror is a metadevice */ 244 if (md_getmajor(sm->sm_dev) != md_major) 245 return (mdmderror(&msp->mde, MDE_INVAL_UNIT, 246 md_getminor(sm->sm_dev))); 247 248 if (md_get_parent(sm->sm_dev) == MD_NO_PARENT) 249 continue; 250 251 /* mirror creation should fail here */ 252 md_nblocks_set(mnum, -1ULL); 253 MD_UNIT(mnum) = NULL; 254 255 mddb_deleterec_wrapper(recid); 256 return (mdmderror(&msp->mde, MDE_IN_USE, 257 md_getminor(sm->sm_dev))); 258 } 259 260 if (err = mirror_build_incore(un, 0)) { 261 md_nblocks_set(mnum, -1ULL); 262 MD_UNIT(mnum) = NULL; 263 264 mddb_deleterec_wrapper(recid); 265 return (err); 266 } 267 268 /* 269 * Update unit availability 270 */ 271 md_set[setno].s_un_avail--; 272 273 mirror_commit(un, ALL_SUBMIRRORS, 0); 274 md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0); 275 mirror_check_failfast(mnum); 276 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno, 277 MD_SID(un)); 278 279 resync_start_timeout(setno); 280 return (0); 281 } 282 283 static int 284 mirror_get( 285 void *migp, 286 int mode, 287 IOLOCK *lock 288 ) 289 { 290 mm_unit_t *un; 291 md_i_get_t *migph = migp; 292 293 mdclrerror(&migph->mde); 294 295 if ((un = mirror_getun(migph->id, &migph->mde, RD_LOCK, lock)) == NULL) 296 return (0); 297 298 if (migph->size == 0) { 299 migph->size = un->c.un_size; 300 return (0); 301 } 302 303 if (migph->size < un->c.un_size) { 304 return (EFAULT); 305 } 306 if (ddi_copyout(un, (caddr_t)(uintptr_t)migph->mdp, 307 un->c.un_size, mode)) 308 return (EFAULT); 309 return (0); 310 } 311 312 static int 313 mirror_getdevs( 314 void *mgdp, 315 int mode, 316 IOLOCK *lock 317 ) 318 { 319 mm_unit_t *un; 320 md_dev64_t *udevs; 321 int cnt; 322 int i; 323 md_dev64_t unit_dev; 324 md_getdevs_params_t *mgdph = mgdp; 325 326 327 mdclrerror(&mgdph->mde); 328 329 if ((un = mirror_getun(mgdph->mnum, 330 &mgdph->mde, RD_LOCK, lock)) == NULL) 331 return (0); 332 333 udevs = (md_dev64_t *)(uintptr_t)mgdph->devs; 334 335 for (cnt = 0, i = 0; i < NMIRROR; i++) { 336 if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE)) 337 continue; 338 if (cnt < mgdph->cnt) { 339 unit_dev = un->un_sm[i].sm_dev; 340 if (md_getmajor(unit_dev) != md_major) { 341 unit_dev = md_xlate_mini_2_targ(unit_dev); 342 if (unit_dev == NODEV64) 343 return (ENODEV); 344 } 345 346 if (ddi_copyout((caddr_t)&unit_dev, (caddr_t)udevs, 347 sizeof (*udevs), mode) != 0) 348 return (EFAULT); 349 ++udevs; 350 } 351 ++cnt; 352 } 353 354 mgdph->cnt = cnt; 355 return (0); 356 } 357 358 static int 359 mirror_reset( 360 md_i_reset_t *mirp 361 ) 362 { 363 minor_t mnum = mirp->mnum; 364 mm_unit_t *un; 365 mdi_unit_t *ui; 366 set_t setno = MD_MIN2SET(mnum); 367 368 mdclrerror(&mirp->mde); 369 370 if ((un = mirror_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == NULL) 371 return (0); 372 373 if (MD_HAS_PARENT(un->c.un_parent)) { 374 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); 375 } 376 377 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 378 379 /* single thread */ 380 ui = MDI_UNIT(mnum); 381 (void) md_unit_openclose_enter(ui); 382 383 if (md_unit_isopen(ui)) { 384 md_unit_openclose_exit(ui); 385 rw_exit(&md_unit_array_rw.lock); 386 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); 387 } 388 389 md_unit_openclose_exit(ui); 390 391 if (!mirp->force) { 392 int smi; 393 for (smi = 0; smi < NMIRROR; smi++) { 394 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) 395 continue; 396 397 if (!SMS_BY_INDEX_IS(un, smi, SMS_RUNNING)) { 398 rw_exit(&md_unit_array_rw.lock); 399 return (mdmderror(&mirp->mde, 400 MDE_C_WITH_INVAL_SM, mnum)); 401 } 402 } 403 } 404 405 reset_mirror(un, mnum, 1); 406 407 /* 408 * Update unit availability 409 */ 410 md_set[setno].s_un_avail++; 411 412 /* 413 * If MN set, reset s_un_next so all nodes can have 414 * the same view of the next available slot when 415 * nodes are -w and -j 416 */ 417 if (MD_MNSET_SETNO(setno)) { 418 (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum)); 419 } 420 421 rw_exit(&md_unit_array_rw.lock); 422 return (0); 423 } 424 425 static int 426 mirror_get_geom( 427 mm_unit_t *un, 428 struct dk_geom *geomp 429 ) 430 { 431 md_get_geom((md_unit_t *)un, geomp); 432 433 return (0); 434 } 435 436 static int 437 mirror_get_vtoc( 438 mm_unit_t *un, 439 struct vtoc *vtocp 440 ) 441 { 442 md_get_vtoc((md_unit_t *)un, vtocp); 443 444 return (0); 445 } 446 447 static int 448 mirror_set_vtoc( 449 mm_unit_t *un, 450 struct vtoc *vtocp 451 ) 452 { 453 return (md_set_vtoc((md_unit_t *)un, vtocp)); 454 } 455 456 static int 457 mirror_get_extvtoc( 458 mm_unit_t *un, 459 struct extvtoc *vtocp 460 ) 461 { 462 md_get_extvtoc((md_unit_t *)un, vtocp); 463 464 return (0); 465 } 466 467 static int 468 mirror_set_extvtoc( 469 mm_unit_t *un, 470 struct extvtoc *vtocp 471 ) 472 { 473 return (md_set_extvtoc((md_unit_t *)un, vtocp)); 474 } 475 476 static int 477 mirror_get_cgapart( 478 mm_unit_t *un, 479 struct dk_map *dkmapp 480 ) 481 { 482 md_get_cgapart((md_unit_t *)un, dkmapp); 483 return (0); 484 } 485 486 static int 487 mirror_getcomp_by_dev(mm_unit_t *un, replace_params_t *params, 488 int *smi, int *cip) 489 { 490 mm_submirror_t *sm; 491 mm_submirror_ic_t *smic; 492 ms_comp_t *comp; 493 ms_unit_t *mous; 494 int ci; 495 int i; 496 int compcnt; 497 ms_cd_info_t cd; 498 void (*get_dev)(); 499 md_dev64_t dev = md_expldev(params->old_dev); 500 md_error_t *ep = ¶ms->mde; 501 minor_t mnum = params->mnum; 502 mdkey_t devkey; 503 int nkeys; 504 set_t setno; 505 side_t side; 506 507 setno = MD_MIN2SET(MD_SID(un)); 508 side = mddb_getsidenum(setno); 509 510 if (md_getkeyfromdev(setno, side, dev, &devkey, &nkeys) != 0) 511 return (mddeverror(ep, MDE_NAME_SPACE, dev)); 512 513 for (i = 0; i < NMIRROR; i++) { 514 sm = &un->un_sm[i]; 515 smic = &un->un_smic[i]; 516 517 if (!SMS_IS(sm, SMS_INUSE)) 518 continue; 519 520 get_dev = 521 (void (*)())md_get_named_service(sm->sm_dev, 0, 522 "get device", 0); 523 compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un); 524 525 /* 526 * For each of the underlying stripe components get 527 * the info. 528 */ 529 for (ci = 0; ci < compcnt; ci++) { 530 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); 531 if ((cd.cd_dev == dev) || (cd.cd_orig_dev == dev)) { 532 *cip = ci; 533 *smi = i; 534 return (1); 535 } 536 } 537 538 /* 539 * now we rescan looking only for NODEV. If we find 540 * NODEV then we will check the keys to see if its a match. 541 * 542 * If no key was found to match dev, then there is 543 * no way to compare keys - so continue. 544 */ 545 if (nkeys == 0) { 546 continue; 547 } 548 mous = MD_UNIT(md_getminor(sm->sm_dev)); 549 550 for (ci = 0; ci < compcnt; ci++) { 551 552 comp = (struct ms_comp *) 553 ((void *)&((char *)mous)[mous->un_ocomp]); 554 555 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); 556 557 if (cd.cd_dev == NODEV64 || cd.cd_orig_dev == NODEV64) { 558 comp += ci; 559 if (comp->un_key == devkey) { 560 if (nkeys > 1) { 561 return (mddeverror( 562 ep, MDE_MULTNM, dev)); 563 } 564 *cip = ci; 565 *smi = i; 566 return (1); 567 } 568 } 569 } 570 } 571 return (mdcomperror(ep, MDE_CANT_FIND_COMP, mnum, dev)); 572 } 573 574 /* 575 * comp_replace: 576 * ---------------- 577 * Called to implement the component replace function 578 * 579 * Owner is returned in the parameter block passed in by the caller. 580 * 581 * Returns: 582 * 0 success 583 * error code if the functions fails 584 * 585 * For a MN set, on entry all writes to the mirror are suspended, on exit 586 * from this function, writes must be resumed when not a dryrun. 587 */ 588 static int 589 comp_replace( 590 replace_params_t *params, 591 IOLOCK *lock 592 ) 593 { 594 minor_t mnum = params->mnum; 595 set_t setno; 596 side_t side; 597 mm_unit_t *un; 598 mdi_unit_t *ui; 599 ms_unit_t *ms_un; 600 mdi_unit_t *ms_ui; 601 ms_comp_t *comp; 602 mm_submirror_t *sm; 603 md_dev64_t smdev; 604 mddb_recid_t recids[6]; /* recids for stripe on SP */ 605 int smi, ci; 606 ms_new_dev_t nd; 607 int (*repl_dev)(); 608 void (*repl_done)(); 609 void *repl_data; 610 int err = 0; 611 ms_cd_info_t cd; 612 void (*get_dev)(); 613 614 mdclrerror(¶ms->mde); 615 616 if ((un = mirror_getun(mnum, ¶ms->mde, WRITERS, lock)) == NULL) { 617 return (0); 618 } 619 620 ui = MDI_UNIT(mnum); 621 if (ui->ui_tstate & MD_INACCESSIBLE) { 622 (void) mdmderror(¶ms->mde, MDE_IN_UNAVAIL_STATE, mnum); 623 goto errexit; 624 } 625 626 /* 627 * replace cannot be done while a resync is active or we are 628 * still waiting for an optimized resync to be started 629 */ 630 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { 631 (void) mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum); 632 goto errexit; 633 } 634 635 if (mirror_getcomp_by_dev(un, params, &smi, &ci) == 0) { 636 goto errexit; 637 } 638 639 if (un->un_nsm == 1) { 640 (void) mdmderror(¶ms->mde, MDE_LAST_SM_RE, mnum); 641 goto errexit; 642 } 643 644 if (mirror_other_sources(un, smi, ci, 0) != 0) { 645 (void) mdcomperror(¶ms->mde, MDE_REPL_INVAL_STATE, 646 mnum, md_expldev(params->old_dev)); 647 goto errexit; 648 } 649 650 sm = &un->un_sm[smi]; 651 if (sm->sm_state & (SMS_OFFLINE | SMS_OFFLINE_RESYNC)) { 652 (void) mdmderror(¶ms->mde, MDE_ILLEGAL_SM_STATE, mnum); 653 goto errexit; 654 } 655 656 get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, 657 "get device", 0); 658 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd); 659 660 repl_dev = (int (*)())md_get_named_service(sm->sm_dev, 0, 661 "replace device", 0); 662 663 smdev = sm->sm_dev; 664 ms_un = MD_UNIT(md_getminor(smdev)); 665 666 if (params->cmd == ENABLE_COMP) { 667 md_dev64_t this_dev; 668 int numkeys; 669 mdkey_t this_key; 670 671 this_dev = ((cd.cd_orig_dev == 0) ? cd.cd_dev : 672 cd.cd_orig_dev); 673 setno = MD_MIN2SET(md_getminor(smdev)); 674 side = mddb_getsidenum(setno); 675 comp = (struct ms_comp *) 676 ((void *)&((char *)ms_un)[ms_un->un_ocomp]); 677 comp += ci; 678 /* 679 * We trust the dev_t because we cannot determine the 680 * dev_t from the device id since a new disk is in the 681 * same location. Since this is a call from metareplace -e dx 682 * AND it is SCSI a new dev_t is not generated. So the 683 * dev_t from the mddb is used. Before enabling the device 684 * we check to make sure that multiple entries for the same 685 * device does not exist in the namespace. If they do we 686 * fail the ioctl. 687 * One of the many ways multiple entries in the name space 688 * can occur is if one removed the failed component in the 689 * stripe of a mirror and put another disk that was part of 690 * another metadevice. After reboot metadevadm would correctly 691 * update the device name for the metadevice whose component 692 * has moved. However now in the metadb there are two entries 693 * for the same name (ctds) that belong to different 694 * metadevices. One is valid, the other is a ghost or "last 695 * know as" ctds. 696 */ 697 this_dev = md_getdevnum(setno, side, 698 comp->un_key, MD_TRUST_DEVT); 699 700 /* 701 * Verify that multiple keys for the same 702 * dev_t don't exist 703 */ 704 705 if (md_getkeyfromdev(setno, side, this_dev, 706 &this_key, &numkeys) != 0) { 707 (void) mddeverror(¶ms->mde, MDE_NAME_SPACE, 708 md_expldev(params->old_dev)); 709 goto errexit; 710 } 711 /* 712 * Namespace has multiple entries 713 * for the same devt 714 */ 715 if (numkeys > 1) { 716 (void) mddeverror(¶ms->mde, MDE_MULTNM, 717 md_expldev(params->old_dev)); 718 goto errexit; 719 } 720 if ((numkeys == 0) || (comp->un_key != this_key)) { 721 (void) mdcomperror(¶ms->mde, MDE_CANT_FIND_COMP, 722 mnum, this_dev); 723 goto errexit; 724 } 725 726 if ((md_getmajor(this_dev) != md_major) && 727 (md_devid_found(setno, side, this_key) == 1)) { 728 if (md_update_namespace_did(setno, side, 729 this_key, ¶ms->mde) != 0) { 730 (void) mddeverror(¶ms->mde, MDE_NAME_SPACE, 731 this_dev); 732 goto errexit; 733 } 734 } 735 736 if (md_expldev(params->new_dev) != this_dev) { 737 (void) mddeverror(¶ms->mde, MDE_FIX_INVAL_STATE, 738 md_expldev(params->new_dev)); 739 goto errexit; 740 } 741 742 /* in case of dryrun, don't actually do anything */ 743 if ((params->options & MDIOCTL_DRYRUN) == 0) { 744 err = (*repl_dev)(sm->sm_dev, 0, ci, NULL, recids, 6, 745 &repl_done, &repl_data); 746 } 747 } else if ((params->options & MDIOCTL_DRYRUN) == 0) { 748 nd.nd_dev = md_expldev(params->new_dev); 749 nd.nd_key = params->new_key; 750 nd.nd_start_blk = params->start_blk; 751 nd.nd_nblks = params->number_blks; 752 nd.nd_labeled = params->has_label; 753 nd.nd_hs_id = 0; 754 755 err = (*repl_dev)(sm->sm_dev, 0, ci, &nd, recids, 6, 756 &repl_done, &repl_data); 757 758 } 759 760 if (err != 0) { 761 (void) mdcomperror(¶ms->mde, err, mnum, 762 md_expldev(params->new_dev)); 763 goto errexit; 764 } 765 /* In case of a dryun we're done. */ 766 if (params->options & MDIOCTL_DRYRUN) { 767 mdclrerror(¶ms->mde); 768 return (0); 769 } 770 771 /* set_sm_comp_state() commits the modified records */ 772 set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, MD_STATE_NO_XMIT, 773 lock); 774 775 (*repl_done)(sm->sm_dev, repl_data); 776 777 /* 778 * If the mirror is open then need to make sure that the submirror, 779 * on which the replace ran, is also open and if not then open it. 780 * This is only a concern for a single component sub-mirror stripe 781 * as it may not be open due to the failure of the single component. 782 * 783 * This check has to be done after the call to (*repl_done) 784 * as that function releases the writer lock on the submirror. 785 */ 786 if (md_unit_isopen(ui)) { 787 minor_t ms_mnum = md_getminor(sm->sm_dev); 788 789 ms_ui = MDI_UNIT(ms_mnum); 790 791 if (!md_unit_isopen(ms_ui)) { 792 /* 793 * Underlying submirror is not open so open it. 794 */ 795 if (md_layered_open(ms_mnum, &smdev, MD_OFLG_NULL)) { 796 mirror_openfail_console_info(un, smi, ci); 797 goto errexit; 798 } 799 } 800 } 801 802 mirror_check_failfast(mnum); 803 804 if (params->cmd == ENABLE_COMP) { 805 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE, 806 MD_UN2SET(un), MD_SID(un)); 807 } else { 808 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE, 809 MD_UN2SET(un), MD_SID(un)); 810 } 811 812 md_ioctl_writerexit(lock); 813 /* 814 * Reset any saved resync location flags as we've now replaced the 815 * component. This means we have to resync the _whole_ component. 816 */ 817 un->un_rs_resync_done = un->un_rs_resync_2_do = 0; 818 un->un_rs_type = MD_RS_NONE; 819 mirror_resume_writes(un); 820 if (!MD_MNSET_SETNO(MD_UN2SET(un))) 821 (void) mirror_resync_unit(mnum, NULL, ¶ms->mde, lock); 822 mdclrerror(¶ms->mde); 823 return (0); 824 errexit: 825 /* We need to resume writes unless this is a dryrun */ 826 if (!(params->options & MDIOCTL_DRYRUN)) 827 mirror_resume_writes(un); 828 return (0); 829 } 830 831 /* 832 * mirror_attach: 833 * ---------------- 834 * Called to implement the submirror attach function 835 * 836 * Owner is returned in the parameter block passed in by the caller. 837 * 838 * Returns: 839 * 0 success 840 * error code if the functions fails 841 * 842 * For a MN set, on entry all writes to the mirror are suspended, on exit 843 * from this function, writes must be resumed when not a dryrun. 844 */ 845 static int 846 mirror_attach( 847 md_att_struct_t *att, 848 IOLOCK *lock 849 ) 850 { 851 minor_t mnum = att->mnum; 852 mm_unit_t *un; 853 md_unit_t *su; 854 mm_submirror_t *sm; 855 mm_submirror_ic_t *smic; 856 int smi; 857 md_dev64_t sm_dev; 858 minor_t sm_mnum; 859 mdkey_t indx; 860 set_t setno; 861 uint_t options; 862 863 /* 864 * This routine should not be called during upgrade. 865 */ 866 if (MD_UPGRADE) { 867 return (0); 868 } 869 870 mdclrerror(&att->mde); 871 options = att->options; 872 873 if ((un = mirror_getun(mnum, &att->mde, WRITERS, lock)) == NULL) { 874 return (0); 875 } 876 877 setno = MD_UN2SET(un); 878 879 for (smi = 0; smi < NMIRROR; smi++) 880 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) 881 break; 882 883 if (smi == NMIRROR) { 884 (void) mdmderror(&att->mde, MDE_MIRROR_FULL, mnum); 885 goto errexit; 886 } 887 888 sm = &un->un_sm[smi]; 889 smic = &un->un_smic[smi]; 890 sm_dev = att->submirror; 891 sm_mnum = md_getminor(sm_dev); 892 893 if (md_get_parent(sm_dev) != MD_NO_PARENT) { 894 (void) mdmderror(&att->mde, MDE_IN_USE, sm_mnum); 895 goto errexit; 896 } 897 898 if (md_unit_isopen(MDI_UNIT(sm_mnum))) { 899 (void) mdmderror(&att->mde, MDE_IS_OPEN, sm_mnum); 900 goto errexit; 901 } 902 903 /* Check the size */ 904 su = (md_unit_t *)MD_UNIT(sm_mnum); 905 if (un->c.un_total_blocks > su->c.un_total_blocks) { 906 (void) mdmderror(&att->mde, MDE_SM_TOO_SMALL, sm_mnum); 907 goto errexit; 908 } 909 910 /* Don't attach labeled sm to unlabeled mirrors */ 911 if ((su->c.un_flag & MD_LABELED) && !(un->c.un_flag & MD_LABELED)) { 912 (void) mdmderror(&att->mde, MDE_NO_LABELED_SM, sm_mnum); 913 goto errexit; 914 } 915 916 indx = md_setshared_name(setno, 917 ddi_major_to_name(md_getmajor(sm_dev)), 0L); 918 919 /* Open the sm, only if the mirror is open */ 920 if (md_unit_isopen(MDI_UNIT(mnum))) { 921 if (md_layered_open(mnum, &sm_dev, MD_OFLG_NULL)) { 922 (void) md_remshared_name(setno, indx); 923 (void) mdmderror(&att->mde, MDE_SM_OPEN_ERR, 924 md_getminor(att->submirror)); 925 goto errexit; 926 } 927 /* in dryrun mode, don't leave the device open */ 928 if (options & MDIOCTL_DRYRUN) { 929 md_layered_close(sm_dev, MD_OFLG_NULL); 930 } 931 } 932 933 /* 934 * After this point the checks are done and action is taken. 935 * So, clean up and return in case of dryrun. 936 */ 937 938 if (options & MDIOCTL_DRYRUN) { 939 md_ioctl_writerexit(lock); 940 mdclrerror(&att->mde); 941 return (0); 942 } 943 944 sm->sm_key = att->key; 945 sm->sm_dev = sm_dev; 946 md_set_parent(sm_dev, MD_SID(un)); 947 mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1); 948 build_submirror(un, smi, 0); 949 un->un_nsm++; 950 mirror_commit(un, SMI2BIT(smi), 0); 951 mirror_check_failfast(mnum); 952 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ATTACH, SVM_TAG_METADEVICE, 953 MD_UN2SET(un), MD_SID(un)); 954 955 mirror_resume_writes(un); 956 md_ioctl_writerexit(lock); 957 if (!MD_MNSET_SETNO(setno)) 958 (void) mirror_resync_unit(mnum, NULL, &att->mde, lock); 959 mdclrerror(&att->mde); 960 return (0); 961 errexit: 962 /* We need to resume writes unless this is a dryrun */ 963 if (!(options & MDIOCTL_DRYRUN)) 964 mirror_resume_writes(un); 965 return (0); 966 } 967 968 969 void 970 reset_comp_states(mm_submirror_t *sm, mm_submirror_ic_t *smic) 971 { 972 int compcnt; 973 int i; 974 md_m_shared_t *shared; 975 976 compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm); 977 for (i = 0; i < compcnt; i++) { 978 shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) 979 (sm->sm_dev, sm, i); 980 981 shared->ms_state = CS_OKAY; 982 shared->ms_flags &= ~MDM_S_NOWRITE; 983 shared->ms_lasterrcnt = 0; 984 } 985 } 986 987 988 /* 989 * mirror_detach: 990 * ---------------- 991 * Called to implement the submirror detach function 992 * 993 * Owner is returned in the parameter block passed in by the caller. 994 * 995 * Returns: 996 * 0 success 997 * error code if the functions fails 998 * 999 * For a MN set, on entry all writes to the mirror are suspended, on exit 1000 * from this function, writes must be resumed. 1001 */ 1002 static int 1003 mirror_detach( 1004 md_detach_params_t *det, 1005 IOLOCK *lock 1006 ) 1007 { 1008 minor_t mnum = det->mnum; 1009 mm_unit_t *un; 1010 mdi_unit_t *ui; 1011 mm_submirror_t *sm; 1012 mm_submirror_t *old_sm; 1013 mm_submirror_t *new_sm; 1014 mm_submirror_ic_t *smic; 1015 int smi; 1016 md_dev64_t sm_dev; 1017 md_unit_t *su; 1018 sv_dev_t sv; 1019 mddb_recid_t recids[2]; 1020 int nsv = 0; 1021 int smi_remove; 1022 mm_submirror_ic_t *old_smic; 1023 mm_submirror_ic_t *new_smic; 1024 1025 mdclrerror(&det->mde); 1026 1027 if ((un = mirror_getun(mnum, &det->mde, WRITERS, lock)) == NULL) { 1028 return (0); 1029 } 1030 1031 ui = MDI_UNIT(mnum); 1032 if (ui->ui_tstate & MD_INACCESSIBLE) { 1033 mirror_resume_writes(un); 1034 return (mdmderror(&det->mde, MDE_IN_UNAVAIL_STATE, mnum)); 1035 } 1036 /* 1037 * detach cannot be done while a resync is active or we are 1038 * still waiting for an optimized resync to be started 1039 */ 1040 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { 1041 mirror_resume_writes(un); 1042 return (mdmderror(&det->mde, MDE_RESYNC_ACTIVE, mnum)); 1043 } 1044 1045 for (smi = 0; smi < NMIRROR; smi++) { 1046 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { 1047 continue; 1048 } 1049 if (un->un_sm[smi].sm_dev == det->submirror) { 1050 smi_remove = smi; 1051 break; 1052 } 1053 } 1054 1055 if (smi == NMIRROR) { 1056 mirror_resume_writes(un); 1057 return (mdmderror(&det->mde, MDE_CANT_FIND_SM, mnum)); 1058 } 1059 1060 if (un->un_nsm == 1) { 1061 mirror_resume_writes(un); 1062 return (mdmderror(&det->mde, MDE_LAST_SM, mnum)); 1063 } 1064 1065 if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) { 1066 mirror_resume_writes(un); 1067 return (mdmderror(&det->mde, MDE_NO_READABLE_SM, mnum)); 1068 } 1069 1070 sm = &un->un_sm[smi]; 1071 smic = &un->un_smic[smi]; 1072 sm_dev = sm->sm_dev; 1073 su = (md_unit_t *)MD_UNIT(md_getminor(sm_dev)); 1074 1075 /* 1076 * Need to pass in the extra record id, 1077 * cause mirror_commit() will not commit 1078 * a sm (from the smmask) if the slot is unused. 1079 * Which it is, since we are detaching. 1080 */ 1081 recids[0] = ((md_unit_t *)MD_UNIT(md_getminor(sm_dev)))->c.un_record_id; 1082 recids[1] = 0; 1083 1084 mirror_set_sm_state(sm, smic, SMS_UNUSED, det->force_detach); 1085 /* 1086 * If there are any erred components 1087 * then make the detach fail and do not unparent the 1088 * submirror. 1089 */ 1090 if (sm->sm_state == SMS_UNUSED) { 1091 /* reallow soft partitioning of submirror */ 1092 MD_CAPAB(su) |= MD_CAN_SP; 1093 md_reset_parent(sm_dev); 1094 reset_comp_states(sm, smic); 1095 un->un_nsm--; 1096 /* Close the sm, only if the mirror is open */ 1097 if (md_unit_isopen(MDI_UNIT(mnum))) 1098 md_layered_close(sm_dev, MD_OFLG_NULL); 1099 sv.setno = MD_UN2SET(un); 1100 sv.key = sm->sm_key; 1101 nsv = 1; 1102 } else 1103 (void) mdmderror(&det->mde, MDE_SM_FAILED_COMPS, mnum); 1104 1105 /* 1106 * Perhaps the mirror changed it's size due to this detach. 1107 * (void) mirror_grow_unit(un, &mde); 1108 */ 1109 1110 /* 1111 * NOTE: We are passing the detached sm recid 1112 * and not the smmask field. This is correct. 1113 */ 1114 mirror_commit(un, 0, recids); 1115 md_rem_names(&sv, nsv); 1116 if (sm->sm_state == SMS_UNUSED) { 1117 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, SVM_TAG_METADEVICE, 1118 MD_UN2SET(un), MD_SID(un)); 1119 } 1120 1121 /* 1122 * Reshuffle the submirror devices in the array as we potentially 1123 * have a dead record in the middle of it. 1124 */ 1125 for (smi = 0; nsv && (smi < NMIRROR); smi++) { 1126 if (smi < smi_remove) { 1127 continue; 1128 } 1129 if (smi > smi_remove) { 1130 old_sm = &un->un_sm[smi]; 1131 new_sm = &un->un_sm[smi - 1]; 1132 new_sm->sm_key = old_sm->sm_key; 1133 new_sm->sm_dev = old_sm->sm_dev; 1134 new_sm->sm_state = old_sm->sm_state; 1135 new_sm->sm_flags = old_sm->sm_flags; 1136 new_sm->sm_shared = old_sm->sm_shared; 1137 new_sm->sm_hsp_id = old_sm->sm_hsp_id; 1138 new_sm->sm_timestamp = old_sm->sm_timestamp; 1139 bzero(old_sm, sizeof (mm_submirror_t)); 1140 old_smic = &un->un_smic[smi]; 1141 new_smic = &un->un_smic[smi - 1]; 1142 bcopy(old_smic, new_smic, sizeof (mm_submirror_ic_t)); 1143 bzero(old_smic, sizeof (mm_submirror_ic_t)); 1144 } 1145 } 1146 mirror_commit(un, 0, NULL); 1147 mirror_resume_writes(un); 1148 return (0); 1149 } 1150 1151 /* 1152 * mirror_offline: 1153 * ---------------- 1154 * Called to implement the submirror offline function 1155 * 1156 * Owner is returned in the parameter block passed in by the caller. 1157 * 1158 * Returns: 1159 * 0 success 1160 * error code if the functions fails 1161 * 1162 * For a MN set, on entry all writes to the mirror are suspended, on exit 1163 * from this function, writes must be resumed. 1164 */ 1165 static int 1166 mirror_offline( 1167 md_i_off_on_t *miop, 1168 IOLOCK *lock 1169 ) 1170 { 1171 minor_t mnum = miop->mnum; 1172 mm_unit_t *un; 1173 mm_submirror_t *sm; 1174 mm_submirror_ic_t *smic; 1175 int smi; 1176 mdi_unit_t *ui = MDI_UNIT(mnum); 1177 1178 mdclrerror(&miop->mde); 1179 1180 if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) { 1181 return (0); 1182 } 1183 1184 /* 1185 * offline cannot be done while a resync is active or we are 1186 * still waiting for an optimized resync to be started 1187 */ 1188 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { 1189 mirror_resume_writes(un); 1190 return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum)); 1191 } 1192 1193 /* 1194 * Reject mirror_offline if ABR is set 1195 */ 1196 if ((ui->ui_tstate & MD_ABR_CAP) || un->un_abr_count) { 1197 mirror_resume_writes(un); 1198 return (mderror(&miop->mde, MDE_ABR_SET)); 1199 } 1200 1201 for (smi = 0; smi < NMIRROR; smi++) { 1202 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) 1203 continue; 1204 if (un->un_sm[smi].sm_dev == miop->submirror) 1205 break; 1206 } 1207 1208 if (smi == NMIRROR) { 1209 mirror_resume_writes(un); 1210 return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum)); 1211 } 1212 1213 sm = &un->un_sm[smi]; 1214 smic = &un->un_smic[smi]; 1215 if (!SMS_IS(sm, SMS_RUNNING) && !miop->force_offline) { 1216 mirror_resume_writes(un); 1217 return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum)); 1218 } 1219 1220 if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) { 1221 mirror_resume_writes(un); 1222 return (mdmderror(&miop->mde, MDE_NO_READABLE_SM, mnum)); 1223 } 1224 mirror_set_sm_state(sm, smic, SMS_OFFLINE, 1); 1225 mirror_resume_writes(un); 1226 1227 MD_STATUS(un) |= MD_UN_OFFLINE_SM; 1228 mirror_commit(un, NO_SUBMIRRORS, 0); 1229 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OFFLINE, SVM_TAG_METADEVICE, 1230 MD_UN2SET(un), MD_SID(un)); 1231 return (0); 1232 } 1233 1234 /* 1235 * mirror_online: 1236 * ---------------- 1237 * Called to implement the submirror online function 1238 * 1239 * Owner is returned in the parameter block passed in by the caller. 1240 * 1241 * Returns: 1242 * 0 success 1243 * error code if the functions fails 1244 * 1245 * For a MN set, on entry all writes to the mirror are suspended, on exit 1246 * from this function, writes must be resumed. 1247 */ 1248 static int 1249 mirror_online( 1250 md_i_off_on_t *miop, 1251 IOLOCK *lock 1252 ) 1253 { 1254 minor_t mnum = miop->mnum; 1255 mm_unit_t *un; 1256 mm_submirror_t *sm; 1257 mm_submirror_ic_t *smic; 1258 int smi; 1259 set_t setno = MD_MIN2SET(mnum); 1260 1261 mdclrerror(&miop->mde); 1262 1263 if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) { 1264 return (0); 1265 } 1266 1267 for (smi = 0; smi < NMIRROR; smi++) { 1268 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) 1269 continue; 1270 if (un->un_sm[smi].sm_dev == miop->submirror) 1271 break; 1272 } 1273 if (smi == NMIRROR) { 1274 mirror_resume_writes(un); 1275 return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum)); 1276 } 1277 1278 sm = &un->un_sm[smi]; 1279 smic = &un->un_smic[smi]; 1280 if (!SMS_IS(sm, SMS_OFFLINE)) { 1281 mirror_resume_writes(un); 1282 return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum)); 1283 } 1284 1285 /* 1286 * online cannot be done while a resync is active or we are 1287 * still waiting for an optimized resync to be started 1288 */ 1289 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { 1290 mirror_resume_writes(un); 1291 return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum)); 1292 } 1293 1294 mirror_set_sm_state(sm, smic, SMS_OFFLINE_RESYNC, 1); 1295 mirror_commit(un, NO_SUBMIRRORS, 0); 1296 mirror_check_failfast(mnum); 1297 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ONLINE, SVM_TAG_METADEVICE, 1298 MD_UN2SET(un), MD_SID(un)); 1299 1300 1301 /* for MN sets, re-read the resync record from disk */ 1302 if (MD_MNSET_SETNO(MD_UN2SET(un))) 1303 (void) mddb_reread_rr(setno, un->un_rr_dirty_recid); 1304 1305 bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm, 1306 howmany(un->un_rrd_num, NBBY)); 1307 MD_STATUS(un) |= MD_UN_OPT_NOT_DONE; 1308 sm->sm_flags |= MD_SM_RESYNC_TARGET; 1309 mirror_resume_writes(un); 1310 md_ioctl_writerexit(lock); 1311 if (!MD_MNSET_SETNO(setno)) 1312 return (mirror_resync_unit(mnum, NULL, &miop->mde, lock)); 1313 else return (0); 1314 } 1315 1316 int 1317 mirror_grow_unit( 1318 mm_unit_t *un, 1319 md_error_t *ep 1320 ) 1321 { 1322 md_unit_t *su; 1323 mm_submirror_t *sm; 1324 int smi; 1325 diskaddr_t total_blocks; 1326 diskaddr_t current_tb; 1327 int spc; /* sectors per head */ 1328 minor_t mnum = MD_SID(un); 1329 1330 /* 1331 * grow_unit cannot be done while a resync is active or we are 1332 * still waiting for an optimized resync to be started. Set 1333 * flag to indicate GROW_PENDING and once the resync is complete 1334 * the grow_unit function will be executed. 1335 */ 1336 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) { 1337 MD_STATUS(un) |= MD_UN_GROW_PENDING; 1338 mirror_commit(un, NO_SUBMIRRORS, 0); 1339 return (mdmderror(ep, MDE_GROW_DELAYED, MD_SID(un))); 1340 } 1341 1342 /* 1343 * Find the smallest submirror 1344 */ 1345 total_blocks = 0; 1346 for (smi = 0; smi < NMIRROR; smi++) { 1347 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) 1348 continue; 1349 sm = &un->un_sm[smi]; 1350 /* 1351 * Growth is not possible if there is one or more 1352 * submirrors made up of non-Metadevices. 1353 */ 1354 if (md_getmajor(sm->sm_dev) != md_major) 1355 return (0); 1356 1357 su = MD_UNIT(md_getminor(sm->sm_dev)); 1358 if ((total_blocks == 0) || 1359 (su->c.un_total_blocks < total_blocks)) 1360 total_blocks = su->c.un_total_blocks; 1361 } 1362 1363 /* 1364 * If the smallest submirror is not larger 1365 * than the mirror, we are all done. 1366 */ 1367 if (total_blocks <= un->c.un_total_blocks) 1368 return (0); 1369 1370 /* 1371 * Growing the mirror now. 1372 * First: Round down the actual_tb to be a multiple 1373 * of nheads * nsects. 1374 */ 1375 spc = un->c.un_nhead * un->c.un_nsect; 1376 current_tb = (total_blocks/spc) * spc; 1377 1378 un->c.un_total_blocks = current_tb; 1379 md_nblocks_set(mnum, un->c.un_total_blocks); 1380 un->c.un_actual_tb = total_blocks; 1381 1382 /* Is the mirror growing from 32 bit device to 64 bit device? */ 1383 if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && 1384 (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS)) { 1385 #if defined(_ILP32) 1386 return (mdmderror(ep, MDE_UNIT_TOO_LARGE, mnum)); 1387 #else 1388 mddb_type_t typ1; 1389 mddb_recid_t recid; 1390 set_t setno; 1391 mddb_recid_t old_recid = un->c.un_record_id; 1392 mddb_recid_t old_vtoc; 1393 mddb_de_ic_t *dep, *old_dep; 1394 md_create_rec_option_t options; 1395 1396 /* yup, new device size. So we need to replace the record */ 1397 typ1 = (mddb_type_t)md_getshared_key(MD_UN2SET(un), 1398 mirror_md_ops.md_driver.md_drivername); 1399 setno = MD_MIN2SET(mnum); 1400 1401 /* Preserve the friendly name properties of growing unit */ 1402 options = MD_CRO_64BIT | MD_CRO_MIRROR; 1403 if (un->c.un_revision & MD_FN_META_DEV) 1404 options |= MD_CRO_FN; 1405 recid = mddb_createrec(offsetof(mm_unit_t, un_smic), typ1, 1406 MIRROR_REC, options, setno); 1407 /* Resize to include incore fields */ 1408 un->c.un_revision |= MD_64BIT_META_DEV; 1409 /* All 64 bit metadevices only support EFI labels. */ 1410 un->c.un_flag |= MD_EFILABEL; 1411 /* 1412 * If the device had a vtoc record attached to it, we remove 1413 * the vtoc record, because the layout has changed completely. 1414 */ 1415 old_vtoc = un->c.un_vtoc_id; 1416 if (old_vtoc != 0) { 1417 un->c.un_vtoc_id = 1418 md_vtoc_to_efi_record(old_vtoc, setno); 1419 } 1420 MD_RECID(un) = recid; 1421 dep = mddb_getrecdep(recid); 1422 old_dep = mddb_getrecdep(old_recid); 1423 kmem_free(dep->de_rb_userdata, dep->de_reqsize); 1424 dep->de_rb_userdata = old_dep->de_rb_userdata; 1425 dep->de_reqsize = old_dep->de_reqsize; 1426 dep->de_rb_userdata_ic = old_dep->de_rb_userdata_ic; 1427 dep->de_icreqsize = old_dep->de_icreqsize; 1428 mirror_commit(un, NO_SUBMIRRORS, 0); 1429 old_dep->de_rb_userdata = NULL; 1430 old_dep->de_rb_userdata_ic = NULL; 1431 mddb_deleterec_wrapper(old_recid); 1432 /* 1433 * If there was a vtoc record, it is no longer needed, because 1434 * a new efi record has been created for this un. 1435 */ 1436 if (old_vtoc != 0) { 1437 mddb_deleterec_wrapper(old_vtoc); 1438 } 1439 #endif 1440 } 1441 1442 if ((current_tb/un->un_rrd_blksize) > MD_MAX_NUM_RR) { 1443 if (mirror_resize_resync_regions(un, current_tb)) { 1444 return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un))); 1445 } 1446 mirror_check_failfast(mnum); 1447 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, 1448 MD_UN2SET(un), MD_SID(un)); 1449 return (0); 1450 } 1451 1452 if (mirror_add_resync_regions(un, current_tb)) { 1453 return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un))); 1454 } 1455 1456 mirror_check_failfast(mnum); 1457 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, 1458 MD_UN2SET(un), MD_SID(un)); 1459 1460 return (0); 1461 } 1462 1463 static int 1464 mirror_grow( 1465 void *mgp, 1466 IOLOCK *lock 1467 ) 1468 { 1469 mm_unit_t *un; 1470 md_grow_params_t *mgph = mgp; 1471 1472 mdclrerror(&mgph->mde); 1473 1474 if ((un = mirror_getun(mgph->mnum, 1475 &mgph->mde, WR_LOCK, lock)) == NULL) 1476 return (0); 1477 1478 if (MD_STATUS(un) & MD_UN_GROW_PENDING) 1479 return (0); 1480 1481 return (mirror_grow_unit(un, &mgph->mde)); 1482 } 1483 1484 static int 1485 mirror_change( 1486 md_mirror_params_t *mmp, 1487 IOLOCK *lock 1488 ) 1489 { 1490 mm_params_t *pp = &mmp->params; 1491 mm_unit_t *un; 1492 1493 mdclrerror(&mmp->mde); 1494 1495 if ((un = mirror_getun(mmp->mnum, &mmp->mde, WR_LOCK, lock)) == NULL) 1496 return (0); 1497 1498 if (pp->change_read_option) 1499 un->un_read_option = pp->read_option; 1500 1501 if (pp->change_write_option) 1502 un->un_write_option = pp->write_option; 1503 1504 if (pp->change_pass_num) 1505 un->un_pass_num = pp->pass_num; 1506 1507 mirror_commit(un, NO_SUBMIRRORS, 0); 1508 1509 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE, 1510 MD_UN2SET(un), MD_SID(un)); 1511 return (0); 1512 } 1513 1514 static int 1515 mirror_get_resync( 1516 md_resync_ioctl_t *ri 1517 ) 1518 { 1519 minor_t mnum = ri->ri_mnum; 1520 mm_unit_t *un; 1521 u_longlong_t percent; 1522 uint_t cnt; 1523 uint_t rr; 1524 diskaddr_t d; 1525 1526 mdclrerror(&ri->mde); 1527 1528 if ((un = mirror_getun(mnum, &ri->mde, STALE_OK|NO_LOCK, NULL)) == NULL) 1529 return (0); 1530 1531 ri->ri_flags = 0; 1532 if (md_get_setstatus(MD_MIN2SET(mnum)) & MD_SET_STALE) { 1533 ri->ri_percent_done = 0; 1534 ri->ri_percent_dirty = 0; 1535 return (0); 1536 } 1537 1538 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE|MD_UN_RESYNC_CANCEL)) { 1539 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) 1540 ri->ri_flags |= MD_RI_INPROGRESS; 1541 /* Return state of resync thread */ 1542 ri->ri_flags |= (un->un_rs_thread_flags & MD_RI_BLOCK); 1543 d = un->un_rs_resync_2_do; 1544 if (d) { 1545 percent = un->un_rs_resync_done; 1546 if (un->c.un_total_blocks > 1547 MD_MAX_BLKS_FOR_SMALL_DEVS) { 1548 percent *= 1000; 1549 percent /= d; 1550 if (percent > 1000) 1551 percent = 1000; 1552 } else { 1553 percent *= 100; 1554 percent /= d; 1555 } 1556 ri->ri_percent_done = (int)percent; 1557 } else { 1558 ri->ri_percent_done = 0; 1559 } 1560 } 1561 if (un->un_nsm < 2) { 1562 ri->ri_percent_dirty = 0; 1563 return (0); 1564 } 1565 cnt = 0; 1566 for (rr = 0; rr < un->un_rrd_num; rr++) 1567 if (IS_REGION_DIRTY(rr, un)) 1568 cnt++; 1569 d = un->un_rrd_num; 1570 if (d) { 1571 percent = cnt; 1572 percent *= 100; 1573 percent += d - 1; /* round up */ 1574 percent /= d; 1575 } else 1576 percent = 0; 1577 ri->ri_percent_dirty = (int)percent; 1578 return (0); 1579 } 1580 1581 /* 1582 * mirror_get_owner: 1583 * ---------------- 1584 * Called to obtain the current owner of a mirror. 1585 * 1586 * Owner is returned in the parameter block passed in by the caller. 1587 * 1588 * Returns: 1589 * 0 success 1590 * EINVAL metadevice does not exist or is not a member of a multi-owned 1591 * set. 1592 */ 1593 static int 1594 mirror_get_owner(md_set_mmown_params_t *p, IOLOCK *lock) 1595 { 1596 mm_unit_t *un; 1597 set_t setno; 1598 1599 if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL) 1600 return (EINVAL); 1601 1602 setno = MD_UN2SET(un); 1603 if (!MD_MNSET_SETNO(setno)) { 1604 return (EINVAL); 1605 } 1606 p->d.owner = un->un_mirror_owner; 1607 return (0); 1608 } 1609 1610 /* 1611 * mirror_choose_owner_thread: 1612 * -------------------------- 1613 * Called to send a CHOOSE_OWNER message to the commd running on the master 1614 * node. This needs to run in a separate context so that mutex livelock is 1615 * avoided. This can occur because the original request is issued from a call 1616 * to metaioctl() which acquires the global ioctl lock, calls down into the 1617 * mirror_ioctl code and then attempts to mdmn_ksend_message() to the master 1618 * node. As the handler for the choose_owner message needs to send another 1619 * ioctl through the metaioctl() entry point, any other use (by rpc.metad or 1620 * mdcommd checking on set ownership) will deadlock the system leading to 1621 * cluster reconfiguration timeouts and eventually a node or (at worst) a 1622 * cluster-wide panic 1623 */ 1624 static void 1625 mirror_choose_owner_thread(md_mn_msg_chooseid_t *msg) 1626 { 1627 int rval; 1628 md_mn_kresult_t *kres; 1629 set_t setno = MD_MIN2SET(msg->msg_chooseid_mnum); 1630 1631 kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); 1632 rval = mdmn_ksend_message(setno, MD_MN_MSG_CHOOSE_OWNER, 1633 MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)msg, 1634 sizeof (md_mn_msg_chooseid_t), kres); 1635 if (!MDMN_KSEND_MSG_OK(rval, kres)) { 1636 mdmn_ksend_show_error(rval, kres, "CHOOSE OWNER"); 1637 cmn_err(CE_WARN, "ksend_message failure: CHOOSE_OWNER"); 1638 } 1639 1640 kmem_free(kres, sizeof (md_mn_kresult_t)); 1641 kmem_free(msg, sizeof (md_mn_msg_chooseid_t)); 1642 thread_exit(); 1643 } 1644 1645 /* 1646 * mirror_owner_thread: 1647 * ------------------- 1648 * Called to request an ownership change from a thread context. This issues 1649 * a mdmn_ksend_message() and then completes the appropriate ownership change 1650 * on successful completion of the message transport. 1651 * The originating application must poll for completion on the 'flags' member 1652 * of the MD_MN_MM_OWNER_STATUS ioctl() parameter block. 1653 * Success is marked by a return value of MD_MN_MM_RES_OK, Failure by 1654 * MD_MN_MM_RES_FAIL 1655 */ 1656 static void 1657 mirror_owner_thread(md_mn_req_owner_t *ownp) 1658 { 1659 int rval; 1660 set_t setno = MD_MIN2SET(ownp->mnum); 1661 mm_unit_t *un = MD_UNIT(ownp->mnum); 1662 md_mn_kresult_t *kresult; 1663 md_mps_t *ps1; 1664 1665 un->un_mirror_owner_status = 0; 1666 1667 mutex_enter(&un->un_owner_mx); 1668 un->un_owner_state |= MM_MN_OWNER_SENT; 1669 mutex_exit(&un->un_owner_mx); 1670 1671 kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); 1672 rval = mdmn_ksend_message(setno, MD_MN_MSG_REQUIRE_OWNER, 1673 MD_MSGF_NO_LOG, 0, (char *)ownp, sizeof (md_mn_req_owner_t), 1674 kresult); 1675 1676 if (!MDMN_KSEND_MSG_OK(rval, kresult)) { 1677 /* 1678 * Message transport layer failed. Return the failure code to 1679 * the application. 1680 */ 1681 mdmn_ksend_show_error(rval, kresult, "CHANGE OWNER"); 1682 mutex_enter(&un->un_owner_mx); 1683 un->un_owner_state &= ~(MM_MN_BECOME_OWNER|MM_MN_OWNER_SENT); 1684 mutex_exit(&un->un_owner_mx); 1685 un->un_mirror_owner_status = 1686 MD_MN_MM_RESULT | MD_MN_MM_RES_FAIL; 1687 } else { 1688 /* 1689 * Ownership change succeeded. Update in-core version of 1690 * mirror owner. 1691 */ 1692 mutex_enter(&un->un_owner_mx); 1693 if (un->un_owner_state & MM_MN_BECOME_OWNER) { 1694 un->un_mirror_owner = md_mn_mynode_id; 1695 /* Sets node owner of un_rr_dirty record */ 1696 if (un->un_rr_dirty_recid) 1697 (void) mddb_setowner(un->un_rr_dirty_recid, 1698 md_mn_mynode_id); 1699 /* 1700 * Release the block on the current resync region if it 1701 * is blocked 1702 */ 1703 ps1 = un->un_rs_prev_overlap; 1704 if ((ps1 != NULL) && 1705 (ps1->ps_flags & MD_MPS_ON_OVERLAP)) 1706 mirror_overlap_tree_remove(ps1); 1707 } 1708 1709 un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER); 1710 mutex_exit(&un->un_owner_mx); 1711 un->un_mirror_owner_status = 1712 MD_MN_MM_RESULT | MD_MN_MM_RES_OK; 1713 1714 /* Restart the resync thread if it was previously blocked */ 1715 if (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) { 1716 mutex_enter(&un->un_rs_thread_mx); 1717 un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER; 1718 cv_signal(&un->un_rs_thread_cv); 1719 mutex_exit(&un->un_rs_thread_mx); 1720 } 1721 } 1722 kmem_free(kresult, sizeof (md_mn_kresult_t)); 1723 kmem_free(ownp, sizeof (md_mn_req_owner_t)); 1724 thread_exit(); 1725 } 1726 1727 /* 1728 * mirror_set_owner: 1729 * ---------------- 1730 * Called to change the owner of a mirror to the specified node. If we 1731 * are not the owner of the mirror, we do nothing apart from update the in-core 1732 * ownership. It can also be used to choose a new owner for the resync of a 1733 * mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below. 1734 * 1735 * The p->d.flags bitfield controls how subsequent ownership changes will be 1736 * handled: 1737 * MD_MN_MM_SPAWN_THREAD 1738 * a separate thread is created which emulates the behaviour of 1739 * become_owner() [mirror.c]. This is needed when changing the 1740 * ownership from user context as there needs to be a controlling 1741 * kernel thread which updates the owner info on the originating 1742 * node. Successful completion of the mdmn_ksend_message() means 1743 * that the owner field can be changed. 1744 * 1745 * MD_MN_MM_PREVENT_CHANGE 1746 * Disallow any change of ownership once this ownership change has 1747 * been processed. The only way of changing the owner away from 1748 * the p->d.owner node specified in the call is to issue a request 1749 * with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to 1750 * become owner from a different node while the PREVENT_CHANGE 1751 * is in operation will result in an EAGAIN return value. 1752 * un->un_owner_state has MM_MN_PREVENT_CHANGE set. 1753 * 1754 * MD_MN_MM_ALLOW_CHANGE 1755 * Allow the owner to be changed by a subsequent request. 1756 * un->un_owner_state has MM_MN_PREVENT_CHANGE cleared. 1757 * 1758 * MD_MN_MM_CHOOSE_OWNER 1759 * Choose a new owner for a mirror resync. In this case, the new 1760 * owner argument is not used. The selection of a new owner 1761 * is a round robin allocation using a resync owner count. This 1762 * ioctl passes this value in a message to the master node 1763 * which uses it to select a node from the node list and then 1764 * sends it a message to become the owner. 1765 * 1766 * If we are the current owner, we must stop further i/o from being scheduled 1767 * and wait for any pending i/o to drain. We wait for any in-progress resync 1768 * bitmap updates to complete and we can then set the owner. If an update to 1769 * the resync bitmap is attempted after this we simply don't write this out to 1770 * disk until the ownership is restored. 1771 * 1772 * If we are the node that wants to become the owner we update the in-core 1773 * owner and return. The i/o that initiated the ownership change will complete 1774 * on successful return from this ioctl. 1775 * 1776 * Return Value: 1777 * 0 Success 1778 * EINVAL Invalid unit referenced 1779 * EAGAIN Ownership couldn't be transferred away or change of 1780 * ownership is prevented. Caller should retry later on. 1781 */ 1782 static int 1783 mirror_set_owner(md_set_mmown_params_t *p, IOLOCK *lock) 1784 { 1785 mdi_unit_t *ui; 1786 mm_unit_t *un; 1787 set_t setno; 1788 1789 if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL) 1790 return (EINVAL); 1791 ui = MDI_UNIT(p->d.mnum); 1792 setno = MD_MIN2SET(p->d.mnum); 1793 if (!MD_MNSET_SETNO(setno)) { 1794 return (EINVAL); 1795 } 1796 1797 /* 1798 * If we are choosing a new resync owner, send a message to the master 1799 * to make the choice. 1800 */ 1801 if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) { 1802 /* Release ioctl lock before we call ksend_message() */ 1803 md_ioctl_readerexit(lock); 1804 /* If we're resetting the owner pass the node id in */ 1805 if (p->d.owner != MD_MN_MIRROR_UNOWNED) { 1806 return (mirror_choose_owner(un, &p->d)); 1807 } else { 1808 return (mirror_choose_owner(un, NULL)); 1809 } 1810 } 1811 1812 /* 1813 * Check for whether we have to spawn a thread to issue this request. 1814 * If set we issue a mdmn_ksend_message() to cause the appropriate 1815 * ownership change. On completion of this request the calling 1816 * application _must_ poll the structure 'flags' field to determine the 1817 * result of the request. All this is necessary until we have true 1818 * multi-entrant ioctl support. 1819 * If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can 1820 * be ignored. 1821 */ 1822 if ((p->d.flags & MD_MN_MM_SPAWN_THREAD) && (p->d.owner != 0)) { 1823 md_mn_req_owner_t *ownp; 1824 ownp = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP); 1825 p->d.flags &= ~MD_MN_MM_SPAWN_THREAD; 1826 bcopy(&p->d, ownp, sizeof (md_mn_req_owner_t)); 1827 if (thread_create(NULL, 0, mirror_owner_thread, (caddr_t)ownp, 1828 0, &p0, TS_RUN, 60) == NULL) { 1829 kmem_free(ownp, sizeof (md_mn_req_owner_t)); 1830 return (EFAULT); 1831 } else { 1832 return (0); 1833 } 1834 } 1835 1836 /* 1837 * If setting owner to NULL, this is being done because the owner has 1838 * died and therefore we set OPT_NOT_DONE to ensure that the 1839 * mirror is marked as "Needs Maintenance" and that an optimized 1840 * resync will be done when we resync the mirror, Also clear the 1841 * PREVENT_CHANGE flag and remove the last resync region from the 1842 * overlap tree. 1843 */ 1844 if (p->d.owner == 0) { 1845 md_mps_t *ps; 1846 int i; 1847 1848 md_ioctl_readerexit(lock); 1849 un = md_ioctl_writerlock(lock, ui); 1850 /* 1851 * If the ABR capability is not set and the pass_num is non-zero 1852 * there is need to perform an optimized resync 1853 * Therefore set OPT_NOT_DONE, setup the resync_bm and set 1854 * the submirrors as resync targets. 1855 */ 1856 if (!(ui->ui_tstate & MD_ABR_CAP) && un->un_pass_num) { 1857 MD_STATUS(un) |= MD_UN_OPT_NOT_DONE; 1858 1859 (void) mddb_reread_rr(setno, un->un_rr_dirty_recid); 1860 bcopy((caddr_t)un->un_dirty_bm, 1861 (caddr_t)un->un_resync_bm, 1862 howmany(un->un_rrd_num, NBBY)); 1863 for (i = 0; i < NMIRROR; i++) { 1864 if ((SUBMIRROR_IS_READABLE(un, i)) || 1865 SMS_BY_INDEX_IS(un, i, 1866 SMS_OFFLINE_RESYNC)) 1867 un->un_sm[i].sm_flags |= 1868 MD_SM_RESYNC_TARGET; 1869 } 1870 } 1871 mutex_enter(&un->un_owner_mx); 1872 un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE; 1873 mutex_exit(&un->un_owner_mx); 1874 ps = un->un_rs_prev_overlap; 1875 if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) { 1876 mirror_overlap_tree_remove(ps); 1877 ps->ps_firstblk = 0; 1878 ps->ps_lastblk = 0; 1879 } 1880 md_ioctl_writerexit(lock); 1881 un = md_ioctl_readerlock(lock, ui); 1882 } 1883 1884 mutex_enter(&un->un_owner_mx); 1885 if (!(un->un_owner_state & MM_MN_BECOME_OWNER)) { 1886 /* 1887 * If we are not trying to become owner ourselves check 1888 * to see if we have to change the owner 1889 */ 1890 if (un->un_mirror_owner == p->d.owner) { 1891 /* 1892 * No need to change owner, 1893 * Clear/set PREVENT_CHANGE bit 1894 */ 1895 if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) { 1896 un->un_owner_state |= MM_MN_PREVENT_CHANGE; 1897 } else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) { 1898 un->un_owner_state &= ~MM_MN_PREVENT_CHANGE; 1899 } 1900 mutex_exit(&un->un_owner_mx); 1901 return (0); 1902 } 1903 } 1904 1905 /* 1906 * Disallow ownership change if previously requested to. This can only 1907 * be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the 1908 * flags field. 1909 */ 1910 if ((un->un_owner_state & MM_MN_PREVENT_CHANGE) && 1911 !(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) { 1912 mutex_exit(&un->un_owner_mx); 1913 #ifdef DEBUG 1914 cmn_err(CE_WARN, "mirror_ioctl: Node %x attempted to become " 1915 "owner while node %x has exclusive access to %s", 1916 p->d.owner, un->un_mirror_owner, md_shortname(MD_SID(un))); 1917 #endif 1918 return (EAGAIN); 1919 } 1920 if (p->d.owner == md_mn_mynode_id) { 1921 /* 1922 * I'm becoming the mirror owner. Flag this so that the 1923 * message sender can change the in-core owner when all 1924 * nodes have processed this message 1925 */ 1926 un->un_owner_state &= ~MM_MN_OWNER_SENT; 1927 un->un_owner_state |= MM_MN_BECOME_OWNER; 1928 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ? 1929 MM_MN_PREVENT_CHANGE : 0; 1930 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ? 1931 ~MM_MN_PREVENT_CHANGE : ~0; 1932 1933 mutex_exit(&un->un_owner_mx); 1934 } else if ((un->un_mirror_owner == md_mn_mynode_id) || 1935 un->un_owner_state & MM_MN_BECOME_OWNER) { 1936 mutex_exit(&un->un_owner_mx); 1937 1938 /* 1939 * I'm releasing ownership. Block and drain i/o. This also 1940 * blocks until any in-progress resync record update completes. 1941 */ 1942 md_ioctl_readerexit(lock); 1943 un = md_ioctl_writerlock(lock, ui); 1944 /* Block the resync thread */ 1945 mutex_enter(&un->un_rs_thread_mx); 1946 un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER; 1947 mutex_exit(&un->un_rs_thread_mx); 1948 mutex_enter(&un->un_owner_mx); 1949 un->un_mirror_owner = p->d.owner; 1950 1951 /* Sets node owner of un_rr_dirty record */ 1952 if (un->un_rr_dirty_recid) 1953 (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner); 1954 un->un_owner_state &= ~MM_MN_BECOME_OWNER; 1955 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ? 1956 MM_MN_PREVENT_CHANGE : 0; 1957 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ? 1958 ~MM_MN_PREVENT_CHANGE : ~0; 1959 mutex_exit(&un->un_owner_mx); 1960 /* 1961 * Allow further i/o to occur. Any write() from another node 1962 * will now cause another ownership change to occur. 1963 */ 1964 md_ioctl_writerexit(lock); 1965 } else { 1966 /* Update the in-core mirror owner */ 1967 un->un_mirror_owner = p->d.owner; 1968 /* Sets node owner of un_rr_dirty record */ 1969 if (un->un_rr_dirty_recid) 1970 (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner); 1971 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ? 1972 MM_MN_PREVENT_CHANGE : 0; 1973 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ? 1974 ~MM_MN_PREVENT_CHANGE : ~0; 1975 mutex_exit(&un->un_owner_mx); 1976 } 1977 return (0); 1978 } 1979 /* 1980 * mirror_allocate_hotspare: 1981 * ------------------------ 1982 * Called to allocate a hotspare for a failed component. This function is 1983 * called by the MD_MN_ALLOCATE_HOTSPARE ioctl. 1984 */ 1985 static int 1986 mirror_allocate_hotspare(md_alloc_hotsp_params_t *p, IOLOCK *lockp) 1987 { 1988 set_t setno; 1989 mm_unit_t *un; 1990 1991 #ifdef DEBUG 1992 if (mirror_debug_flag) 1993 printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n", 1994 p->mnum, p->sm, p->comp); 1995 #endif 1996 1997 if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL) 1998 return (EINVAL); 1999 2000 /* This function is only valid for a multi-node set */ 2001 setno = MD_MIN2SET(p->mnum); 2002 if (!MD_MNSET_SETNO(setno)) { 2003 return (EINVAL); 2004 } 2005 (void) check_comp_4_hotspares(un, p->sm, p->comp, MD_HOTSPARE_NO_XMIT, 2006 p->hs_id, lockp); 2007 md_ioctl_writerexit(lockp); 2008 return (0); 2009 } 2010 2011 /* 2012 * mirror_get_owner_status: 2013 * ----------------------- 2014 * Return the status of a previously issued ioctl to change ownership. This is 2015 * required for soft-partition support as the request to change mirror owner 2016 * needs to be run from a separate daemon thread. 2017 * 2018 * Returns: 2019 * 0 Success (contents of un_mirror_owner_status placed in 'flags') 2020 * EINVAL Invalid unit 2021 */ 2022 static int 2023 mirror_get_owner_status(md_mn_own_status_t *p, IOLOCK *lock) 2024 { 2025 mm_unit_t *un; 2026 set_t setno; 2027 2028 if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lock)) == NULL) 2029 return (EINVAL); 2030 2031 setno = MD_MIN2SET(p->mnum); 2032 if (!MD_MNSET_SETNO(setno)) { 2033 return (EINVAL); 2034 } 2035 2036 p->flags = un->un_mirror_owner_status; 2037 return (0); 2038 } 2039 2040 /* 2041 * mirror_set_state: 2042 * --------------- 2043 * Called to set the state of the component of a submirror to the specified 2044 * value. This function is called by the MD_MN_SET_STATE ioctl. 2045 */ 2046 static int 2047 mirror_set_state(md_set_state_params_t *p, IOLOCK *lockp) 2048 { 2049 mm_unit_t *un; 2050 mm_submirror_t *sm; 2051 mm_submirror_ic_t *smic; 2052 md_m_shared_t *shared; 2053 set_t setno; 2054 2055 #ifdef DEBUG 2056 if (mirror_debug_flag) 2057 printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, " 2058 "%x, %x, %x %x\n", p->mnum, p->sm, p->comp, 2059 p->state, p->hs_id); 2060 #endif 2061 if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL) 2062 return (EINVAL); 2063 2064 /* This function is only valid for a multi-node set */ 2065 setno = MD_MIN2SET(p->mnum); 2066 if (!MD_MNSET_SETNO(setno)) { 2067 return (EINVAL); 2068 } 2069 sm = &un->un_sm[p->sm]; 2070 smic = &un->un_smic[p->sm]; 2071 2072 /* Set state in component and update ms_flags */ 2073 shared = (md_m_shared_t *) 2074 (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, p->comp); 2075 /* 2076 * If a CS_ERRED state is being sent, verify that the sender 2077 * has the same view of the component that this node currently has. 2078 * 2079 * There is a case where the sender was sending a CS_ERRED when a 2080 * component was in error, but before the sender returns from 2081 * ksend_message the component has been hotspared and resync'd. 2082 * 2083 * In this case, the hs_id will be different from the shared ms_hs_id, 2084 * so the component has already been hotspared. Just return in this 2085 * case. 2086 */ 2087 if (p->state == CS_ERRED) { 2088 if (shared->ms_hs_id != p->hs_id) { 2089 #ifdef DEBUG 2090 if (mirror_debug_flag) { 2091 printf("mirror_set_state: short circuit " 2092 "hs_id=0x%x, ms_hs_id=0x%x\n", 2093 p->hs_id, shared->ms_hs_id); 2094 } 2095 #endif 2096 /* release the block on writes to the mirror */ 2097 mirror_resume_writes(un); 2098 md_ioctl_writerexit(lockp); 2099 return (0); 2100 } 2101 } 2102 2103 /* 2104 * If the device is newly errored then make sure that it is 2105 * closed. Closing the device allows for the RCM framework 2106 * to unconfigure the device if required. 2107 */ 2108 if (!(shared->ms_state & CS_ERRED) && (p->state & CS_ERRED) && 2109 (shared->ms_flags & MDM_S_ISOPEN)) { 2110 void (*get_dev)(); 2111 ms_cd_info_t cd; 2112 2113 get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0, 2114 "get device", 0); 2115 (void) (*get_dev)(sm->sm_dev, sm, p->comp, &cd); 2116 2117 md_layered_close(cd.cd_dev, MD_OFLG_NULL); 2118 shared->ms_flags &= ~MDM_S_ISOPEN; 2119 } 2120 2121 shared->ms_state = p->state; 2122 uniqtime32(&shared->ms_timestamp); 2123 2124 if (p->state == CS_ERRED) { 2125 shared->ms_flags |= MDM_S_NOWRITE; 2126 } else 2127 shared->ms_flags &= ~MDM_S_NOWRITE; 2128 2129 shared->ms_flags &= ~MDM_S_IOERR; 2130 un->un_changecnt++; 2131 shared->ms_lasterrcnt = un->un_changecnt; 2132 2133 /* Update state in submirror */ 2134 mirror_set_sm_state(sm, smic, SMS_RUNNING, 0); 2135 /* 2136 * Commit the state change to the metadb, only the master will write 2137 * to disk 2138 */ 2139 mirror_commit(un, SMI2BIT(p->sm), 0); 2140 2141 /* release the block on writes to the mirror */ 2142 mirror_resume_writes(un); 2143 2144 /* generate NOTIFY events for error state changes */ 2145 if (p->state == CS_ERRED) { 2146 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, 2147 MD_UN2SET(un), MD_SID(un)); 2148 } else if (p->state == CS_LAST_ERRED) { 2149 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, 2150 MD_UN2SET(un), MD_SID(un)); 2151 } 2152 md_ioctl_writerexit(lockp); 2153 return (0); 2154 } 2155 2156 /* 2157 * mirror_suspend_writes: 2158 * --------------------- 2159 * Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is 2160 * tested in mirror_write_strategy, and if set all writes are blocked. 2161 * This function is called by the MD_MN_SUSPEND_WRITES ioctl. 2162 */ 2163 static int 2164 mirror_suspend_writes(md_suspend_wr_params_t *p) 2165 { 2166 set_t setno; 2167 mm_unit_t *un; 2168 2169 #ifdef DEBUG 2170 if (mirror_debug_flag) 2171 printf("mirror_suspend_writes: mnum = %x\n", p->mnum); 2172 #endif 2173 if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL) 2174 return (EINVAL); /* No unit */ 2175 2176 /* This function is only valid for a multi-node set */ 2177 setno = MD_MIN2SET(p->mnum); 2178 if (!MD_MNSET_SETNO(setno)) { 2179 return (EINVAL); 2180 } 2181 2182 /* 2183 * Mark the resync as blocked. This will stop any currently running 2184 * thread and will prevent a new resync from attempting to perform 2185 * i/o 2186 */ 2187 mutex_enter(&un->un_rs_thread_mx); 2188 un->un_rs_thread_flags |= MD_RI_BLOCK; 2189 mutex_exit(&un->un_rs_thread_mx); 2190 2191 mutex_enter(&un->un_suspend_wr_mx); 2192 un->un_suspend_wr_flag = 1; 2193 mutex_exit(&un->un_suspend_wr_mx); 2194 2195 return (0); 2196 } 2197 2198 /* 2199 * mirror_set_capability: 2200 * ------------------------ 2201 * Called to set or clear a capability for a mirror 2202 * called by the MD_MN_SET_CAP ioctl. 2203 */ 2204 static int 2205 mirror_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp) 2206 { 2207 set_t setno; 2208 mm_unit_t *un; 2209 mdi_unit_t *ui; 2210 2211 #ifdef DEBUG 2212 if (mirror_debug_flag) 2213 printf("mirror_set_capability: mnum = %x\n", p->mnum); 2214 #endif 2215 if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lockp)) == NULL) 2216 return (EINVAL); 2217 2218 /* This function is only valid for a multi-node set */ 2219 setno = MD_MIN2SET(p->mnum); 2220 if (!MD_MNSET_SETNO(setno)) { 2221 return (EINVAL); 2222 } 2223 ui = MDI_UNIT(p->mnum); 2224 2225 if (p->sc_set & DKV_ABR_CAP) { 2226 ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */ 2227 /* Clear DRL and set owner to 0 if no resync active */ 2228 mirror_process_unit_resync(un); 2229 if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) { 2230 mutex_enter(&un->un_owner_mx); 2231 un->un_mirror_owner = 0; 2232 mutex_exit(&un->un_owner_mx); 2233 } 2234 } else { 2235 ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */ 2236 } 2237 if (p->sc_set & DKV_DMR_CAP) { 2238 ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */ 2239 } else { 2240 ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */ 2241 } 2242 return (0); 2243 } 2244 2245 /* 2246 * mirror_choose_owner: 2247 * ------------------------ 2248 * Called to choose an owner for a mirror resync. Can be called when starting 2249 * resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag 2250 * set. The ioctl is called with this flag set when we are in the cluster 2251 * reconfig and we wish to set a new owner for a resync whose owner has left 2252 * the cluster. We use a resync owner count to implement a round robin 2253 * allocation of resync owners. We send a message to the master including 2254 * this count and the message handler uses it to select an owner from the 2255 * nodelist and then sends a SET_MM_OWNER message to the chosen node to 2256 * become the owner. 2257 * 2258 * Input: 2259 * un - unit reference 2260 * ownp - owner information (if non-NULL) 2261 */ 2262 int 2263 mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *ownp) 2264 { 2265 set_t setno; 2266 md_mn_msg_chooseid_t *msg; 2267 2268 /* This function is only valid for a multi-node set */ 2269 setno = MD_UN2SET(un); 2270 if (!MD_MNSET_SETNO(setno)) { 2271 return (EINVAL); 2272 } 2273 2274 2275 #ifdef DEBUG 2276 if (mirror_debug_flag) 2277 printf("send choose owner message, mnum = %x," 2278 "rcnt = %d\n", MD_SID(un), md_set[setno].s_rcnt); 2279 #endif 2280 2281 /* 2282 * setup message with current resync count 2283 * and then increment the count. If we're called with a non-NULL 2284 * owner then we are reestablishing the owner of the mirror. In this 2285 * case we have to flag this to the message handler and set rcnt to 2286 * the new owner node. 2287 */ 2288 msg = kmem_zalloc(sizeof (md_mn_msg_chooseid_t), KM_SLEEP); 2289 msg->msg_chooseid_mnum = MD_SID(un); 2290 if (ownp == NULL) { 2291 mutex_enter(&md_mx); 2292 msg->msg_chooseid_rcnt = md_set[setno].s_rcnt; 2293 md_set[setno].s_rcnt++; 2294 mutex_exit(&md_mx); 2295 msg->msg_chooseid_set_node = B_FALSE; 2296 } else { 2297 msg->msg_chooseid_rcnt = ownp->owner; 2298 msg->msg_chooseid_set_node = B_TRUE; 2299 } 2300 2301 /* 2302 * Spawn a thread to issue the ksend_message() call so that we can 2303 * drop the ioctl lock hierarchy that is blocking further rpc.metad and 2304 * commd set ownership checking. 2305 */ 2306 if (thread_create(NULL, 0, mirror_choose_owner_thread, (caddr_t)msg, 2307 0, &p0, TS_RUN, 60) == NULL) { 2308 kmem_free(msg, sizeof (md_mn_msg_chooseid_t)); 2309 return (EFAULT); 2310 } else { 2311 return (0); 2312 } 2313 } 2314 2315 /* 2316 * mirror_get_status: 2317 * ---------------------------------- 2318 * Called by nodes which are not the master node of the cluster. Obtains the 2319 * master abr state and the submirror status for each valid submirror of the 2320 * unit so that the status returned by metastat is consistent across the 2321 * cluster. 2322 * We update tstate for the mirror and both the sm_flag and the sm_state for 2323 * each submirror. 2324 * 2325 * Input: 2326 * un mirror to obtain status from 2327 * 2328 * Calling Convention: 2329 * writerlock (either ioctl or unit) must be held 2330 */ 2331 void 2332 mirror_get_status(mm_unit_t *un, IOLOCK *lockp) 2333 { 2334 mm_submirror_t *sm; 2335 int smi; 2336 int rval; 2337 md_mn_kresult_t *kres; 2338 md_mn_msg_mir_state_t msg; 2339 md_mn_msg_mir_state_res_t *res; 2340 set_t setno = MD_UN2SET(un); 2341 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 2342 2343 2344 ASSERT(ui->ui_lock & MD_UL_WRITER); 2345 2346 /* 2347 * Get all of the information for the mirror. 2348 */ 2349 bzero(&msg, sizeof (msg)); 2350 msg.mir_state_mnum = MD_SID(un); 2351 2352 /* 2353 * Must drop the writerlock over ksend_message since another 2354 * thread on this node could be running a higher class message 2355 * and be trying grab the readerlock. 2356 * 2357 * If we are in the context of an ioctl, drop the ioctl lock. 2358 * lockp holds the list of locks held. 2359 */ 2360 if (lockp) { 2361 IOLOCK_RETURN_RELEASE(0, lockp); 2362 } else { 2363 md_unit_writerexit(ui); 2364 } 2365 2366 kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP); 2367 rval = mdmn_ksend_message(setno, MD_MN_MSG_GET_MIRROR_STATE, 2368 MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)&msg, 2369 sizeof (msg), kres); 2370 2371 /* if the node hasn't yet joined, it's Ok. */ 2372 if ((!MDMN_KSEND_MSG_OK(rval, kres)) && 2373 (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) { 2374 mdmn_ksend_show_error(rval, kres, "GET_MIRROR_STATE"); 2375 cmn_err(CE_WARN, "ksend_message failure: GET_MIRROR_STATE"); 2376 } 2377 2378 /* if dropped the lock previously, regain it */ 2379 if (lockp) { 2380 IOLOCK_RETURN_REACQUIRE(lockp); 2381 } else { 2382 /* 2383 * Reacquire dropped locks and update acquirecnts 2384 * appropriately. 2385 */ 2386 (void) md_unit_writerlock(ui); 2387 } 2388 2389 /* 2390 * Check to see if we've got a believable amount of returned data. 2391 * If not, we simply return as there is no usable information. 2392 */ 2393 if (kres->kmmr_res_size < sizeof (*res)) { 2394 cmn_err(CE_WARN, "GET_MIRROR_STATE: returned %d bytes, expected" 2395 " %d\n", kres->kmmr_res_size, (int)sizeof (*res)); 2396 kmem_free(kres, sizeof (md_mn_kresult_t)); 2397 return; 2398 } 2399 2400 /* 2401 * Copy the results from the call back into our sm_state/sm_flags 2402 */ 2403 res = (md_mn_msg_mir_state_res_t *)kres->kmmr_res_data; 2404 #ifdef DEBUG 2405 if (mirror_debug_flag) 2406 printf("mirror_get_status: %s\n", md_shortname(MD_SID(un))); 2407 #endif 2408 for (smi = 0; smi < NMIRROR; smi++) { 2409 sm = &un->un_sm[smi]; 2410 #ifdef DEBUG 2411 if (mirror_debug_flag) { 2412 printf("curr state %4x, new state %4x\n", sm->sm_state, 2413 res->sm_state[smi]); 2414 printf("curr_flags %4x, new flags %4x\n", sm->sm_flags, 2415 res->sm_flags[smi]); 2416 } 2417 #endif 2418 sm->sm_state = res->sm_state[smi]; 2419 sm->sm_flags = res->sm_flags[smi]; 2420 } 2421 2422 /* Set ABR if set on the Master node */ 2423 ui->ui_tstate |= (res->mir_tstate & MD_ABR_CAP); 2424 2425 kmem_free(kres, sizeof (md_mn_kresult_t)); 2426 } 2427 2428 /* 2429 * mirror_get_mir_state: 2430 * ------------------- 2431 * Obtain the ABR state of a mirror and the state of all submirrors from the 2432 * master node for the unit specified in sm_state->mnum. 2433 * Called by MD_MN_GET_MIRROR_STATE ioctl. 2434 */ 2435 static int 2436 mirror_get_mir_state(md_mn_get_mir_state_t *p, IOLOCK *lockp) 2437 { 2438 mm_unit_t *un; 2439 set_t setno; 2440 md_error_t mde; 2441 2442 mdclrerror(&mde); 2443 2444 if ((un = mirror_getun(p->mnum, &mde, WR_LOCK, lockp)) == NULL) { 2445 return (EINVAL); 2446 } 2447 setno = MD_MIN2SET(p->mnum); 2448 if (!MD_MNSET_SETNO(setno)) { 2449 return (EINVAL); 2450 } 2451 2452 /* 2453 * We've now got a writerlock on the unit structure (so no-one can 2454 * modify the incore values) and we'll now send the message to the 2455 * master node. Since we're only called as part of a reconfig cycle 2456 * we don't need to release the unit locks across the ksend_message as 2457 * only the master node will process it, and we never send this to 2458 * ourselves if we're the master. 2459 */ 2460 2461 mirror_get_status(un, lockp); 2462 2463 return (0); 2464 } 2465 2466 static int 2467 mirror_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp) 2468 { 2469 size_t sz = 0; 2470 void *d = NULL; 2471 int err = 0; 2472 2473 /* We can only handle 32-bit clients for internal commands */ 2474 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { 2475 return (EINVAL); 2476 } 2477 /* dispatch ioctl */ 2478 switch (cmd) { 2479 2480 case MD_IOCSET: 2481 { 2482 if (! (mode & FWRITE)) 2483 return (EACCES); 2484 2485 sz = sizeof (md_set_params_t); 2486 2487 d = kmem_alloc(sz, KM_SLEEP); 2488 2489 if (ddi_copyin(data, d, sz, mode)) { 2490 err = EFAULT; 2491 break; 2492 } 2493 2494 err = mirror_set(d, mode); 2495 break; 2496 } 2497 2498 case MD_IOCGET: 2499 { 2500 if (! (mode & FREAD)) 2501 return (EACCES); 2502 2503 sz = sizeof (md_i_get_t); 2504 2505 d = kmem_alloc(sz, KM_SLEEP); 2506 2507 if (ddi_copyin(data, d, sz, mode)) { 2508 err = EFAULT; 2509 break; 2510 } 2511 2512 err = mirror_get(d, mode, lockp); 2513 break; 2514 } 2515 2516 case MD_IOCRESET: 2517 { 2518 if (! (mode & FWRITE)) 2519 return (EACCES); 2520 2521 sz = sizeof (md_i_reset_t); 2522 d = kmem_alloc(sz, KM_SLEEP); 2523 2524 if (ddi_copyin(data, d, sz, mode)) { 2525 err = EFAULT; 2526 break; 2527 } 2528 2529 err = mirror_reset((md_i_reset_t *)d); 2530 break; 2531 } 2532 2533 case MD_IOCSETSYNC: 2534 case MD_MN_SETSYNC: 2535 { 2536 if (! (mode & FWRITE)) 2537 return (EACCES); 2538 2539 sz = sizeof (md_resync_ioctl_t); 2540 d = kmem_alloc(sz, KM_SLEEP); 2541 2542 if (ddi_copyin(data, d, sz, mode)) { 2543 err = EFAULT; 2544 break; 2545 } 2546 2547 err = mirror_ioctl_resync((md_resync_ioctl_t *)d, lockp); 2548 break; 2549 } 2550 2551 case MD_IOCGETSYNC: 2552 { 2553 if (! (mode & FREAD)) 2554 return (EACCES); 2555 2556 sz = sizeof (md_resync_ioctl_t); 2557 d = kmem_alloc(sz, KM_SLEEP); 2558 2559 if (ddi_copyin(data, d, sz, mode)) { 2560 err = EFAULT; 2561 break; 2562 } 2563 2564 err = mirror_get_resync((md_resync_ioctl_t *)d); 2565 break; 2566 } 2567 2568 case MD_IOCREPLACE: 2569 { 2570 if (! (mode & FWRITE)) 2571 return (EACCES); 2572 2573 sz = sizeof (replace_params_t); 2574 d = kmem_alloc(sz, KM_SLEEP); 2575 2576 if (ddi_copyin(data, d, sz, mode)) { 2577 err = EFAULT; 2578 break; 2579 } 2580 2581 err = comp_replace((replace_params_t *)d, lockp); 2582 break; 2583 } 2584 2585 case MD_IOCOFFLINE: 2586 { 2587 if (! (mode & FWRITE)) 2588 return (EACCES); 2589 2590 sz = sizeof (md_i_off_on_t); 2591 d = kmem_alloc(sz, KM_SLEEP); 2592 2593 if (ddi_copyin(data, d, sz, mode)) { 2594 err = EFAULT; 2595 break; 2596 } 2597 2598 err = mirror_offline((md_i_off_on_t *)d, lockp); 2599 break; 2600 } 2601 2602 case MD_IOCONLINE: 2603 { 2604 if (! (mode & FWRITE)) 2605 return (EACCES); 2606 2607 sz = sizeof (md_i_off_on_t); 2608 d = kmem_alloc(sz, KM_SLEEP); 2609 2610 if (ddi_copyin(data, d, sz, mode)) { 2611 err = EFAULT; 2612 break; 2613 } 2614 2615 err = mirror_online((md_i_off_on_t *)d, lockp); 2616 break; 2617 } 2618 2619 case MD_IOCDETACH: 2620 { 2621 if (! (mode & FWRITE)) 2622 return (EACCES); 2623 2624 sz = sizeof (md_detach_params_t); 2625 d = kmem_alloc(sz, KM_SLEEP); 2626 2627 if (ddi_copyin(data, d, sz, mode)) { 2628 err = EFAULT; 2629 break; 2630 } 2631 2632 err = mirror_detach((md_detach_params_t *)d, lockp); 2633 break; 2634 } 2635 2636 case MD_IOCATTACH: 2637 { 2638 2639 if (! (mode & FWRITE)) 2640 return (EACCES); 2641 2642 sz = sizeof (md_att_struct_t); 2643 d = kmem_alloc(sz, KM_SLEEP); 2644 2645 if (ddi_copyin(data, d, sz, mode)) { 2646 err = EFAULT; 2647 break; 2648 } 2649 2650 err = mirror_attach((md_att_struct_t *)d, lockp); 2651 break; 2652 } 2653 2654 case MD_IOCGET_DEVS: 2655 { 2656 if (! (mode & FREAD)) 2657 return (EACCES); 2658 2659 sz = sizeof (md_getdevs_params_t); 2660 2661 d = kmem_alloc(sz, KM_SLEEP); 2662 2663 if (ddi_copyin(data, d, sz, mode)) { 2664 err = EFAULT; 2665 break; 2666 } 2667 2668 err = mirror_getdevs(d, mode, lockp); 2669 break; 2670 } 2671 2672 case MD_IOCGROW: 2673 { 2674 if (! (mode & FWRITE)) 2675 return (EACCES); 2676 2677 sz = sizeof (md_grow_params_t); 2678 2679 d = kmem_alloc(sz, KM_SLEEP); 2680 2681 if (ddi_copyin(data, d, sz, mode)) { 2682 err = EFAULT; 2683 break; 2684 } 2685 2686 err = mirror_grow(d, lockp); 2687 break; 2688 } 2689 2690 case MD_IOCCHANGE: 2691 { 2692 if (! (mode & FWRITE)) 2693 return (EACCES); 2694 2695 sz = sizeof (md_mirror_params_t); 2696 d = kmem_alloc(sz, KM_SLEEP); 2697 2698 if (ddi_copyin(data, d, sz, mode)) { 2699 err = EFAULT; 2700 break; 2701 } 2702 2703 err = mirror_change((md_mirror_params_t *)d, lockp); 2704 break; 2705 } 2706 2707 case MD_IOCPROBE_DEV: 2708 { 2709 md_probedev_impl_t *p = NULL; 2710 md_probedev_t *ph = NULL; 2711 daemon_queue_t *hdr = NULL; 2712 int i; 2713 size_t sz2 = 0; 2714 2715 if (! (mode & FREAD)) 2716 return (EACCES); 2717 2718 2719 sz = sizeof (md_probedev_t); 2720 d = kmem_alloc(sz, KM_SLEEP); 2721 2722 /* now copy in the data */ 2723 if (ddi_copyin(data, d, sz, mode)) { 2724 err = EFAULT; 2725 goto free_mem; 2726 } 2727 2728 /* 2729 * Sanity test the args. Test name should have the keyword 2730 * probe. 2731 */ 2732 2733 p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP); 2734 2735 p->probe_sema = NULL; 2736 p->probe_mx = NULL; 2737 p->probe.mnum_list = (uint64_t)NULL; 2738 2739 ph = (struct md_probedev *)d; 2740 2741 p->probe.nmdevs = ph->nmdevs; 2742 (void) strcpy(p->probe.test_name, ph->test_name); 2743 bcopy(&ph->md_driver, &(p->probe.md_driver), 2744 sizeof (md_driver_t)); 2745 2746 if ((p->probe.nmdevs < 1) || 2747 (strstr(p->probe.test_name, "probe") == NULL)) { 2748 err = EINVAL; 2749 goto free_mem; 2750 } 2751 2752 2753 sz2 = sizeof (minor_t) * p->probe.nmdevs; 2754 p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz2, 2755 KM_SLEEP); 2756 2757 if (ddi_copyin((void *)(uintptr_t)ph->mnum_list, 2758 (void *)(uintptr_t)p->probe.mnum_list, sz2, mode)) { 2759 err = EFAULT; 2760 goto free_mem; 2761 } 2762 2763 if (err = md_init_probereq(p, &hdr)) 2764 goto free_mem; 2765 2766 /* 2767 * put the request on the queue and wait. 2768 */ 2769 2770 daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW); 2771 2772 (void) IOLOCK_RETURN(0, lockp); 2773 /* wait for the events to occur */ 2774 for (i = 0; i < p->probe.nmdevs; i++) { 2775 sema_p(PROBE_SEMA(p)); 2776 } 2777 while (md_ioctl_lock_enter() == EINTR) 2778 ; 2779 2780 /* 2781 * clean up. The hdr list is freed in the probe routines 2782 * since the list is NULL by the time we get here. 2783 */ 2784 free_mem: 2785 if (p) { 2786 if (p->probe_sema != NULL) { 2787 sema_destroy(PROBE_SEMA(p)); 2788 kmem_free(p->probe_sema, sizeof (ksema_t)); 2789 } 2790 if (p->probe_mx != NULL) { 2791 mutex_destroy(PROBE_MX(p)); 2792 kmem_free(p->probe_mx, sizeof (kmutex_t)); 2793 } 2794 if ((uintptr_t)p->probe.mnum_list) 2795 kmem_free((void *)(uintptr_t) 2796 p->probe.mnum_list, sz2); 2797 2798 kmem_free(p, sizeof (md_probedev_impl_t)); 2799 } 2800 break; 2801 } 2802 2803 case MD_MN_SET_MM_OWNER: 2804 { 2805 if (! (mode & FWRITE)) 2806 return (EACCES); 2807 2808 sz = sizeof (md_set_mmown_params_t); 2809 d = kmem_alloc(sz, KM_SLEEP); 2810 2811 if (ddi_copyin(data, d, sz, mode) != 0) { 2812 err = EFAULT; 2813 break; 2814 } 2815 2816 err = mirror_set_owner((md_set_mmown_params_t *)d, lockp); 2817 break; 2818 } 2819 2820 case MD_MN_GET_MM_OWNER: 2821 { 2822 if (! (mode & FREAD)) 2823 return (EACCES); 2824 2825 sz = sizeof (md_set_mmown_params_t); 2826 d = kmem_alloc(sz, KM_SLEEP); 2827 2828 if (ddi_copyin(data, d, sz, mode) != 0) { 2829 err = EFAULT; 2830 break; 2831 } 2832 2833 err = mirror_get_owner((md_set_mmown_params_t *)d, lockp); 2834 break; 2835 } 2836 2837 case MD_MN_MM_OWNER_STATUS: 2838 { 2839 if (! (mode & FREAD)) 2840 return (EACCES); 2841 2842 sz = sizeof (md_mn_own_status_t); 2843 d = kmem_alloc(sz, KM_SLEEP); 2844 2845 if (ddi_copyin(data, d, sz, mode) != 0) { 2846 err = EFAULT; 2847 break; 2848 } 2849 2850 err = mirror_get_owner_status((md_mn_own_status_t *)d, lockp); 2851 break; 2852 } 2853 2854 case MD_MN_SET_STATE: 2855 { 2856 if (! (mode & FWRITE)) 2857 return (EACCES); 2858 2859 sz = sizeof (md_set_state_params_t); 2860 d = kmem_alloc(sz, KM_SLEEP); 2861 2862 if (ddi_copyin(data, d, sz, mode)) { 2863 err = EFAULT; 2864 break; 2865 } 2866 2867 err = mirror_set_state((md_set_state_params_t *)d, lockp); 2868 break; 2869 } 2870 2871 case MD_MN_SUSPEND_WRITES: 2872 { 2873 if (! (mode & FREAD)) 2874 return (EACCES); 2875 2876 sz = sizeof (md_suspend_wr_params_t); 2877 d = kmem_alloc(sz, KM_SLEEP); 2878 2879 if (ddi_copyin(data, d, sz, mode) != 0) { 2880 err = EFAULT; 2881 break; 2882 } 2883 2884 err = mirror_suspend_writes((md_suspend_wr_params_t *)d); 2885 break; 2886 } 2887 2888 case MD_MN_RESYNC: 2889 { 2890 sz = sizeof (md_mn_rs_params_t); 2891 d = kmem_alloc(sz, KM_SLEEP); 2892 2893 if (ddi_copyin(data, d, sz, mode) != 0) { 2894 err = EFAULT; 2895 break; 2896 } 2897 2898 err = mirror_resync_message((md_mn_rs_params_t *)d, lockp); 2899 break; 2900 } 2901 2902 case MD_MN_ALLOCATE_HOTSPARE: 2903 { 2904 if (! (mode & FWRITE)) 2905 return (EACCES); 2906 2907 sz = sizeof (md_alloc_hotsp_params_t); 2908 d = kmem_alloc(sz, KM_SLEEP); 2909 2910 if (ddi_copyin(data, d, sz, mode)) { 2911 err = EFAULT; 2912 break; 2913 } 2914 2915 err = mirror_allocate_hotspare((md_alloc_hotsp_params_t *)d, 2916 lockp); 2917 break; 2918 } 2919 2920 case MD_MN_POKE_HOTSPARES: 2921 { 2922 (void) poke_hotspares(); 2923 break; 2924 } 2925 2926 case MD_MN_SET_CAP: 2927 { 2928 if (! (mode & FWRITE)) 2929 return (EACCES); 2930 2931 sz = sizeof (md_mn_setcap_params_t); 2932 d = kmem_alloc(sz, KM_SLEEP); 2933 2934 if (ddi_copyin(data, d, sz, mode)) { 2935 err = EFAULT; 2936 break; 2937 } 2938 2939 err = mirror_set_capability((md_mn_setcap_params_t *)d, 2940 lockp); 2941 break; 2942 } 2943 2944 case MD_MN_GET_MIRROR_STATE: 2945 { 2946 sz = sizeof (md_mn_get_mir_state_t); 2947 d = kmem_zalloc(sz, KM_SLEEP); 2948 2949 if (ddi_copyin(data, d, sz, mode)) { 2950 err = EFAULT; 2951 break; 2952 } 2953 2954 err = mirror_get_mir_state((md_mn_get_mir_state_t *)d, 2955 lockp); 2956 break; 2957 } 2958 2959 case MD_MN_RR_DIRTY: 2960 { 2961 sz = sizeof (md_mn_rr_dirty_params_t); 2962 d = kmem_zalloc(sz, KM_SLEEP); 2963 2964 if (ddi_copyin(data, d, sz, mode)) { 2965 err = EFAULT; 2966 break; 2967 } 2968 2969 err = mirror_set_dirty_rr((md_mn_rr_dirty_params_t *)d); 2970 break; 2971 } 2972 2973 case MD_MN_RR_CLEAN: 2974 { 2975 md_mn_rr_clean_params_t tmp; 2976 2977 /* get the first part of the structure to find the size */ 2978 if (ddi_copyin(data, &tmp, sizeof (tmp), mode)) { 2979 err = EFAULT; 2980 break; 2981 } 2982 2983 sz = MDMN_RR_CLEAN_PARAMS_SIZE(&tmp); 2984 d = kmem_zalloc(sz, KM_SLEEP); 2985 2986 if (ddi_copyin(data, d, sz, mode)) { 2987 err = EFAULT; 2988 break; 2989 } 2990 2991 err = mirror_set_clean_rr((md_mn_rr_clean_params_t *)d); 2992 break; 2993 } 2994 2995 default: 2996 return (ENOTTY); 2997 } 2998 2999 /* 3000 * copyout and free any args 3001 */ 3002 if (sz != 0) { 3003 if (err == 0) { 3004 if (ddi_copyout(d, data, sz, mode) != 0) { 3005 err = EFAULT; 3006 } 3007 } 3008 kmem_free(d, sz); 3009 } 3010 return (err); 3011 } 3012 3013 int 3014 md_mirror_ioctl( 3015 dev_t ddi_dev, 3016 int cmd, 3017 void *data, 3018 int mode, 3019 IOLOCK *lockp 3020 ) 3021 { 3022 minor_t mnum = getminor(ddi_dev); 3023 mm_unit_t *un; 3024 int err = 0; 3025 3026 /* handle admin ioctls */ 3027 if (mnum == MD_ADM_MINOR) 3028 return (mirror_admin_ioctl(cmd, data, mode, lockp)); 3029 3030 /* check unit */ 3031 if ((MD_MIN2SET(mnum) >= md_nsets) || 3032 (MD_MIN2UNIT(mnum) >= md_nunits) || 3033 ((un = MD_UNIT(mnum)) == NULL)) 3034 return (ENXIO); 3035 /* is this a supported ioctl? */ 3036 err = md_check_ioctl_against_unit(cmd, un->c); 3037 if (err != 0) { 3038 return (err); 3039 } 3040 3041 /* dispatch ioctl */ 3042 switch (cmd) { 3043 3044 case DKIOCINFO: 3045 { 3046 struct dk_cinfo *p; 3047 3048 if (! (mode & FREAD)) 3049 return (EACCES); 3050 3051 p = kmem_alloc(sizeof (*p), KM_SLEEP); 3052 3053 get_info(p, mnum); 3054 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) 3055 err = EFAULT; 3056 3057 kmem_free(p, sizeof (*p)); 3058 return (err); 3059 } 3060 3061 case DKIOCGMEDIAINFO: 3062 { 3063 struct dk_minfo p; 3064 3065 if (! (mode & FREAD)) 3066 return (EACCES); 3067 3068 get_minfo(&p, mnum); 3069 if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0) 3070 err = EFAULT; 3071 3072 return (err); 3073 } 3074 3075 case DKIOCGGEOM: 3076 { 3077 struct dk_geom *p; 3078 3079 if (! (mode & FREAD)) 3080 return (EACCES); 3081 3082 p = kmem_alloc(sizeof (*p), KM_SLEEP); 3083 3084 if ((err = mirror_get_geom(un, p)) == 0) { 3085 if (ddi_copyout((caddr_t)p, data, sizeof (*p), 3086 mode) != 0) 3087 err = EFAULT; 3088 } 3089 3090 kmem_free(p, sizeof (*p)); 3091 return (err); 3092 } 3093 3094 case DKIOCGVTOC: 3095 { 3096 struct vtoc *vtoc; 3097 3098 if (! (mode & FREAD)) 3099 return (EACCES); 3100 3101 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); 3102 3103 if ((err = mirror_get_vtoc(un, vtoc)) != 0) { 3104 kmem_free(vtoc, sizeof (*vtoc)); 3105 return (err); 3106 } 3107 3108 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 3109 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) 3110 err = EFAULT; 3111 } 3112 #ifdef _SYSCALL32 3113 else { 3114 struct vtoc32 *vtoc32; 3115 3116 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); 3117 3118 vtoctovtoc32((*vtoc), (*vtoc32)); 3119 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) 3120 err = EFAULT; 3121 kmem_free(vtoc32, sizeof (*vtoc32)); 3122 } 3123 #endif /* _SYSCALL32 */ 3124 3125 kmem_free(vtoc, sizeof (*vtoc)); 3126 return (err); 3127 } 3128 3129 case DKIOCSVTOC: 3130 { 3131 struct vtoc *vtoc; 3132 3133 if (! (mode & FWRITE)) 3134 return (EACCES); 3135 3136 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); 3137 3138 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 3139 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { 3140 err = EFAULT; 3141 } 3142 } 3143 #ifdef _SYSCALL32 3144 else { 3145 struct vtoc32 *vtoc32; 3146 3147 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); 3148 3149 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { 3150 err = EFAULT; 3151 } else { 3152 vtoc32tovtoc((*vtoc32), (*vtoc)); 3153 } 3154 kmem_free(vtoc32, sizeof (*vtoc32)); 3155 } 3156 #endif /* _SYSCALL32 */ 3157 3158 if (err == 0) 3159 err = mirror_set_vtoc(un, vtoc); 3160 3161 kmem_free(vtoc, sizeof (*vtoc)); 3162 return (err); 3163 } 3164 3165 case DKIOCGEXTVTOC: 3166 { 3167 struct extvtoc *extvtoc; 3168 3169 if (! (mode & FREAD)) 3170 return (EACCES); 3171 3172 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); 3173 3174 if ((err = mirror_get_extvtoc(un, extvtoc)) != 0) { 3175 kmem_free(extvtoc, sizeof (*extvtoc)); 3176 return (err); 3177 } 3178 3179 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) 3180 err = EFAULT; 3181 3182 kmem_free(extvtoc, sizeof (*extvtoc)); 3183 return (err); 3184 } 3185 3186 case DKIOCSEXTVTOC: 3187 { 3188 struct extvtoc *extvtoc; 3189 3190 if (! (mode & FWRITE)) 3191 return (EACCES); 3192 3193 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); 3194 3195 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { 3196 err = EFAULT; 3197 } 3198 3199 if (err == 0) 3200 err = mirror_set_extvtoc(un, extvtoc); 3201 3202 kmem_free(extvtoc, sizeof (*extvtoc)); 3203 return (err); 3204 } 3205 3206 case DKIOCGAPART: 3207 { 3208 struct dk_map dmp; 3209 3210 if ((err = mirror_get_cgapart(un, &dmp)) != 0) { 3211 return (err); 3212 } 3213 3214 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 3215 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), 3216 mode) != 0) 3217 err = EFAULT; 3218 } 3219 #ifdef _SYSCALL32 3220 else { 3221 struct dk_map32 dmp32; 3222 3223 dmp32.dkl_cylno = dmp.dkl_cylno; 3224 dmp32.dkl_nblk = dmp.dkl_nblk; 3225 3226 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), 3227 mode) != 0) 3228 err = EFAULT; 3229 } 3230 #endif /* _SYSCALL32 */ 3231 3232 return (err); 3233 } 3234 case DKIOCGETEFI: 3235 { 3236 /* 3237 * This one can be done centralized, 3238 * no need to put in the same code for all types of metadevices 3239 */ 3240 return (md_dkiocgetefi(mnum, data, mode)); 3241 } 3242 case DKIOCSETEFI: 3243 { 3244 /* 3245 * This one can be done centralized, 3246 * no need to put in the same code for all types of metadevices 3247 */ 3248 return (md_dkiocsetefi(mnum, data, mode)); 3249 } 3250 case DKIOCPARTITION: 3251 { 3252 return (md_dkiocpartition(mnum, data, mode)); 3253 } 3254 3255 case DKIOCGETVOLCAP: 3256 { 3257 volcap_t vc; 3258 mdi_unit_t *ui; 3259 3260 /* Only valid for MN sets */ 3261 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) 3262 return (EINVAL); 3263 3264 ui = MDI_UNIT(mnum); 3265 if (! (mode & FREAD)) 3266 return (EACCES); 3267 3268 vc.vc_info = DKV_ABR_CAP | DKV_DMR_CAP; 3269 vc.vc_set = 0; 3270 if (ui->ui_tstate & MD_ABR_CAP) { 3271 vc.vc_set |= DKV_ABR_CAP; 3272 } 3273 if (ddi_copyout(&vc, data, sizeof (volcap_t), mode)) 3274 err = EFAULT; 3275 return (err); 3276 } 3277 3278 case DKIOCSETVOLCAP: 3279 { 3280 volcap_t vc; 3281 volcapset_t volcap = 0; 3282 mdi_unit_t *ui; 3283 3284 /* Only valid for MN sets */ 3285 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) 3286 return (EINVAL); 3287 3288 ui = MDI_UNIT(mnum); 3289 if (! (mode & FWRITE)) 3290 return (EACCES); 3291 3292 if (ddi_copyin(data, &vc, sizeof (volcap_t), mode)) 3293 return (EFAULT); 3294 3295 /* Not valid if a submirror is offline */ 3296 if (un->c.un_status & MD_UN_OFFLINE_SM) { 3297 return (EINVAL); 3298 } 3299 if (ui->ui_tstate & MD_ABR_CAP) 3300 volcap |= DKV_ABR_CAP; 3301 /* Only send capability message if there is a change */ 3302 if ((vc.vc_set & (DKV_ABR_CAP)) != volcap) 3303 err = mdmn_send_capability_message(mnum, vc, lockp); 3304 return (err); 3305 } 3306 3307 case DKIOCDMR: 3308 { 3309 vol_directed_rd_t *vdr; 3310 3311 #ifdef _MULTI_DATAMODEL 3312 vol_directed_rd32_t *vdr32; 3313 #endif /* _MULTI_DATAMODEL */ 3314 3315 /* Only valid for MN sets */ 3316 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum))) 3317 return (EINVAL); 3318 3319 vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP); 3320 if (vdr == NULL) 3321 return (ENOMEM); 3322 3323 #ifdef _MULTI_DATAMODEL 3324 vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP); 3325 if (vdr32 == NULL) { 3326 kmem_free(vdr, sizeof (vol_directed_rd_t)); 3327 return (ENOMEM); 3328 } 3329 3330 switch (ddi_model_convert_from(mode & FMODELS)) { 3331 case DDI_MODEL_ILP32: 3332 /* 3333 * If we're called from a higher-level driver we don't 3334 * need to manipulate the data. Its already been done by 3335 * the caller. 3336 */ 3337 if (!(mode & FKIOCTL)) { 3338 if (ddi_copyin(data, vdr32, sizeof (*vdr32), 3339 mode)) { 3340 kmem_free(vdr, sizeof (*vdr)); 3341 return (EFAULT); 3342 } 3343 vdr->vdr_flags = vdr32->vdr_flags; 3344 vdr->vdr_offset = vdr32->vdr_offset; 3345 vdr->vdr_nbytes = vdr32->vdr_nbytes; 3346 vdr->vdr_data = 3347 (void *)(uintptr_t)vdr32->vdr_data; 3348 vdr->vdr_side = vdr32->vdr_side; 3349 break; 3350 } 3351 /* FALLTHROUGH */ 3352 3353 case DDI_MODEL_NONE: 3354 if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) { 3355 kmem_free(vdr32, sizeof (*vdr32)); 3356 kmem_free(vdr, sizeof (*vdr)); 3357 return (EFAULT); 3358 } 3359 break; 3360 3361 default: 3362 kmem_free(vdr32, sizeof (*vdr32)); 3363 kmem_free(vdr, sizeof (*vdr)); 3364 return (EFAULT); 3365 } 3366 #else /* ! _MULTI_DATAMODEL */ 3367 if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) { 3368 kmem_free(vdr, sizeof (*vdr)); 3369 return (EFAULT); 3370 } 3371 #endif /* _MULTI_DATAMODEL */ 3372 3373 err = mirror_directed_read(ddi_dev, vdr, mode); 3374 3375 if (err == 0) { 3376 #ifdef _MULTI_DATAMODEL 3377 switch (ddi_model_convert_from(mode & FMODELS)) { 3378 case DDI_MODEL_ILP32: 3379 if (!(mode & FKIOCTL)) { 3380 vdr32->vdr_flags = vdr->vdr_flags; 3381 vdr32->vdr_offset = vdr->vdr_offset; 3382 vdr32->vdr_side = vdr->vdr_side; 3383 vdr32->vdr_bytesread = 3384 vdr->vdr_bytesread; 3385 bcopy(vdr->vdr_side_name, 3386 vdr32->vdr_side_name, 3387 sizeof (vdr32->vdr_side_name)); 3388 3389 if (ddi_copyout(vdr32, data, 3390 sizeof (*vdr32), mode)) { 3391 err = EFAULT; 3392 } 3393 break; 3394 } 3395 /* FALLTHROUGH */ 3396 3397 case DDI_MODEL_NONE: 3398 if (ddi_copyout(vdr, data, sizeof (*vdr), mode)) 3399 err = EFAULT; 3400 break; 3401 } 3402 #else /* ! _MULTI_DATAMODEL */ 3403 if (ddi_copyout(vdr, data, sizeof (*vdr), mode)) 3404 err = EFAULT; 3405 #endif /* _MULTI_DATAMODEL */ 3406 if (vdr->vdr_flags & DKV_DMR_ERROR) 3407 err = EIO; 3408 } 3409 3410 #ifdef _MULTI_DATAMODEL 3411 kmem_free(vdr32, sizeof (*vdr32)); 3412 #endif /* _MULTI_DATAMODEL */ 3413 3414 kmem_free(vdr, sizeof (*vdr)); 3415 3416 return (err); 3417 } 3418 3419 default: 3420 return (ENOTTY); 3421 } 3422 } 3423 3424 /* 3425 * rename named service entry points and support functions 3426 */ 3427 3428 /* 3429 * rename/exchange role swap functions 3430 * 3431 * most of these are handled by generic role swap functions 3432 */ 3433 3434 /* 3435 * MDRNM_UPDATE_KIDS 3436 * rename/exchange of our child or grandchild 3437 */ 3438 void 3439 mirror_renexch_update_kids(md_rendelta_t *delta, md_rentxn_t *rtxnp) 3440 { 3441 mm_submirror_t *sm; 3442 int smi; 3443 3444 ASSERT(rtxnp); 3445 ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE)); 3446 ASSERT(rtxnp->recids); 3447 ASSERT(delta); 3448 ASSERT(delta->unp); 3449 ASSERT(delta->old_role == MDRR_PARENT); 3450 ASSERT(delta->new_role == MDRR_PARENT); 3451 3452 /* 3453 * since our role isn't changing (parent->parent) 3454 * one of our children must be changing 3455 * find the child being modified, and update 3456 * our notion of it 3457 */ 3458 for (smi = 0; smi < NMIRROR; smi++) { 3459 mm_unit_t *un = (mm_unit_t *)delta->unp; 3460 3461 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { 3462 continue; 3463 } 3464 sm = &un->un_sm[smi]; 3465 3466 if (md_getminor(sm->sm_dev) == rtxnp->from.mnum) { 3467 sm->sm_dev = md_makedevice(md_major, rtxnp->to.mnum); 3468 sm->sm_key = rtxnp->to.key; 3469 break; 3470 } 3471 } 3472 3473 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 3474 } 3475 3476 /* 3477 * exchange down (self->child) 3478 */ 3479 void 3480 mirror_exchange_self_update_from_down( 3481 md_rendelta_t *delta, 3482 md_rentxn_t *rtxnp 3483 ) 3484 { 3485 int smi; 3486 mm_submirror_t *found; 3487 minor_t from_min, to_min; 3488 sv_dev_t sv; 3489 3490 ASSERT(rtxnp); 3491 ASSERT(MDRNOP_EXCHANGE == rtxnp->op); 3492 ASSERT(rtxnp->recids); 3493 ASSERT(rtxnp->rec_idx >= 0); 3494 ASSERT(delta); 3495 ASSERT(delta->unp); 3496 ASSERT(delta->uip); 3497 ASSERT(delta->old_role == MDRR_SELF); 3498 ASSERT(delta->new_role == MDRR_CHILD); 3499 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); 3500 3501 from_min = rtxnp->from.mnum; 3502 to_min = rtxnp->to.mnum; 3503 3504 /* 3505 * self id changes in our own unit struct 3506 */ 3507 3508 MD_SID(delta->unp) = to_min; 3509 3510 /* 3511 * parent identifier need not change 3512 */ 3513 3514 /* 3515 * point the set array pointers at the "new" unit and unit in-cores 3516 * Note: the other half of this transfer is done in the "update_to" 3517 * exchange named service. 3518 */ 3519 3520 MDI_VOIDUNIT(to_min) = delta->uip; 3521 MD_VOIDUNIT(to_min) = delta->unp; 3522 3523 /* 3524 * transfer kstats 3525 */ 3526 3527 delta->uip->ui_kstat = rtxnp->to.kstatp; 3528 3529 /* 3530 * the unit in-core reference to the get next link's id changes 3531 */ 3532 3533 delta->uip->ui_link.ln_id = to_min; 3534 3535 /* 3536 * find the child whose identity we're assuming 3537 */ 3538 3539 for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) { 3540 mm_submirror_t *sm; 3541 mm_unit_t *un = (mm_unit_t *)delta->unp; 3542 3543 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { 3544 continue; 3545 } 3546 sm = &un->un_sm[smi]; 3547 3548 if (md_getminor(sm->sm_dev) == to_min) { 3549 found = sm; 3550 } 3551 } 3552 ASSERT(found); 3553 3554 /* 3555 * Update the sub-mirror's identity 3556 */ 3557 found->sm_dev = md_makedevice(md_major, rtxnp->from.mnum); 3558 sv.key = found->sm_key; 3559 3560 ASSERT(rtxnp->from.key != MD_KEYWILD); 3561 ASSERT(rtxnp->from.key != MD_KEYBAD); 3562 3563 found->sm_key = rtxnp->from.key; 3564 3565 /* 3566 * delete the key for the old sub-mirror from the name space 3567 */ 3568 3569 sv.setno = MD_MIN2SET(from_min); 3570 md_rem_names(&sv, 1); 3571 3572 /* 3573 * and store the record id (from the unit struct) into recids 3574 */ 3575 3576 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 3577 } 3578 3579 /* 3580 * exchange down (parent->self) 3581 */ 3582 void 3583 mirror_exchange_parent_update_to( 3584 md_rendelta_t *delta, 3585 md_rentxn_t *rtxnp 3586 ) 3587 { 3588 int smi; 3589 mm_submirror_t *found; 3590 minor_t from_min, to_min; 3591 sv_dev_t sv; 3592 3593 ASSERT(rtxnp); 3594 ASSERT(MDRNOP_EXCHANGE == rtxnp->op); 3595 ASSERT(rtxnp->recids); 3596 ASSERT(rtxnp->rec_idx >= 0); 3597 ASSERT(delta); 3598 ASSERT(delta->unp); 3599 ASSERT(delta->uip); 3600 ASSERT(delta->old_role == MDRR_PARENT); 3601 ASSERT(delta->new_role == MDRR_SELF); 3602 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum); 3603 3604 from_min = rtxnp->from.mnum; 3605 to_min = rtxnp->to.mnum; 3606 3607 /* 3608 * self id changes in our own unit struct 3609 */ 3610 3611 MD_SID(delta->unp) = from_min; 3612 3613 /* 3614 * parent identifier need not change 3615 */ 3616 3617 /* 3618 * point the set array pointers at the "new" unit and unit in-cores 3619 * Note: the other half of this transfer is done in the "update_to" 3620 * exchange named service. 3621 */ 3622 3623 MDI_VOIDUNIT(from_min) = delta->uip; 3624 MD_VOIDUNIT(from_min) = delta->unp; 3625 3626 /* 3627 * transfer kstats 3628 */ 3629 3630 delta->uip->ui_kstat = rtxnp->from.kstatp; 3631 3632 /* 3633 * the unit in-core reference to the get next link's id changes 3634 */ 3635 3636 delta->uip->ui_link.ln_id = from_min; 3637 3638 /* 3639 * find the child whose identity we're assuming 3640 */ 3641 3642 for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) { 3643 mm_submirror_t *sm; 3644 mm_unit_t *un = (mm_unit_t *)delta->unp; 3645 3646 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { 3647 continue; 3648 } 3649 sm = &un->un_sm[smi]; 3650 3651 if (md_getminor(sm->sm_dev) == from_min) { 3652 found = sm; 3653 } 3654 } 3655 ASSERT(found); 3656 3657 /* 3658 * Update the sub-mirror's identity 3659 */ 3660 found->sm_dev = md_makedevice(md_major, rtxnp->to.mnum); 3661 sv.key = found->sm_key; 3662 3663 ASSERT(rtxnp->to.key != MD_KEYWILD); 3664 ASSERT(rtxnp->to.key != MD_KEYBAD); 3665 3666 found->sm_key = rtxnp->to.key; 3667 3668 /* 3669 * delete the key for the old sub-mirror from the name space 3670 */ 3671 3672 sv.setno = MD_MIN2SET(to_min); 3673 md_rem_names(&sv, 1); 3674 3675 /* 3676 * and store the record id (from the unit struct) into recids 3677 */ 3678 3679 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); 3680 } 3681 3682 /* 3683 * MDRNM_LIST_URKIDS: named svc entry point 3684 * all all delta entries appropriate for our children onto the 3685 * deltalist pointd to by dlpp 3686 */ 3687 int 3688 mirror_rename_listkids(md_rendelta_t **dlpp, md_rentxn_t *rtxnp) 3689 { 3690 minor_t from_min, to_min; 3691 mm_unit_t *from_un; 3692 md_rendelta_t *new, *p; 3693 int smi; 3694 int n_children; 3695 mm_submirror_t *sm; 3696 3697 ASSERT(rtxnp); 3698 ASSERT(dlpp); 3699 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); 3700 3701 from_min = rtxnp->from.mnum; 3702 to_min = rtxnp->to.mnum; 3703 n_children = 0; 3704 3705 if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) { 3706 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); 3707 return (-1); 3708 } 3709 3710 for (p = *dlpp; p && p->next != NULL; p = p->next) { 3711 /* NULL */ 3712 } 3713 3714 for (smi = 0; smi < NMIRROR; smi++) { 3715 minor_t child_min; 3716 3717 if (!SMS_BY_INDEX_IS(from_un, smi, SMS_INUSE)) { 3718 continue; 3719 } 3720 3721 sm = &from_un->un_sm[smi]; 3722 child_min = md_getminor(sm->sm_dev); 3723 3724 p = new = md_build_rendelta(MDRR_CHILD, 3725 to_min == child_min? MDRR_SELF: MDRR_CHILD, 3726 sm->sm_dev, p, 3727 MD_UNIT(child_min), MDI_UNIT(child_min), 3728 &rtxnp->mde); 3729 3730 if (!new) { 3731 if (mdisok(&rtxnp->mde)) { 3732 (void) mdsyserror(&rtxnp->mde, ENOMEM); 3733 } 3734 return (-1); 3735 } 3736 ++n_children; 3737 } 3738 3739 return (n_children); 3740 } 3741 3742 /* 3743 * support routine for MDRNM_CHECK 3744 */ 3745 static int 3746 mirror_may_renexch_self( 3747 mm_unit_t *un, 3748 mdi_unit_t *ui, 3749 md_rentxn_t *rtxnp) 3750 { 3751 minor_t from_min; 3752 minor_t to_min; 3753 bool_t toplevel; 3754 bool_t related; 3755 int smi; 3756 mm_submirror_t *sm; 3757 3758 from_min = rtxnp->from.mnum; 3759 to_min = rtxnp->to.mnum; 3760 3761 if (!un || !ui) { 3762 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 3763 from_min); 3764 return (EINVAL); 3765 } 3766 3767 ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD); 3768 if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) { 3769 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); 3770 return (EINVAL); 3771 } 3772 3773 if (MD_PARENT(un) == MD_MULTI_PARENT) { 3774 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); 3775 return (EINVAL); 3776 } 3777 3778 toplevel = !MD_HAS_PARENT(MD_PARENT(un)); 3779 3780 /* we're related if trying to swap with our parent */ 3781 related = (!toplevel) && (MD_PARENT(un) == to_min); 3782 3783 switch (rtxnp->op) { 3784 case MDRNOP_EXCHANGE: 3785 /* 3786 * check for a swap with our child 3787 */ 3788 for (smi = 0; smi < NMIRROR; smi++) { 3789 3790 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) { 3791 continue; 3792 } 3793 3794 sm = &un->un_sm[smi]; 3795 if (md_getminor(sm->sm_dev) == to_min) { 3796 related |= TRUE; 3797 } 3798 } 3799 if (!related) { 3800 (void) mdmderror(&rtxnp->mde, 3801 MDE_RENAME_TARGET_UNRELATED, to_min); 3802 return (EINVAL); 3803 } 3804 3805 break; 3806 3807 case MDRNOP_RENAME: 3808 /* 3809 * if from is top-level and is open, then the kernel is using 3810 * the md_dev64_t. 3811 */ 3812 3813 if (toplevel && md_unit_isopen(ui)) { 3814 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, 3815 from_min); 3816 return (EBUSY); 3817 } 3818 break; 3819 3820 default: 3821 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 3822 from_min); 3823 return (EINVAL); 3824 } 3825 3826 return (0); /* ok */ 3827 } 3828 3829 /* 3830 * Named service entry point: MDRNM_CHECK 3831 */ 3832 intptr_t 3833 mirror_rename_check( 3834 md_rendelta_t *delta, 3835 md_rentxn_t *rtxnp) 3836 { 3837 mm_submirror_t *sm; 3838 mm_submirror_ic_t *smic; 3839 md_m_shared_t *shared; 3840 int ci; 3841 int i; 3842 int compcnt; 3843 mm_unit_t *un; 3844 int err = 0; 3845 3846 ASSERT(delta); 3847 ASSERT(rtxnp); 3848 ASSERT(delta->unp); 3849 ASSERT(delta->uip); 3850 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); 3851 3852 if (!delta || !rtxnp || !delta->unp || !delta->uip) { 3853 (void) mdsyserror(&rtxnp->mde, EINVAL); 3854 return (EINVAL); 3855 } 3856 3857 un = (mm_unit_t *)delta->unp; 3858 3859 for (i = 0; i < NMIRROR; i++) { 3860 sm = &un->un_sm[i]; 3861 smic = &un->un_smic[i]; 3862 3863 if (!SMS_IS(sm, SMS_INUSE)) 3864 continue; 3865 3866 ASSERT(smic->sm_get_component_count); 3867 if (!smic->sm_get_component_count) { 3868 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 3869 md_getminor(delta->dev)); 3870 return (ENXIO); 3871 } 3872 3873 compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un); 3874 3875 for (ci = 0; ci < compcnt; ci++) { 3876 3877 ASSERT(smic->sm_shared_by_indx); 3878 if (!smic->sm_shared_by_indx) { 3879 (void) mdmderror(&rtxnp->mde, 3880 MDE_RENAME_CONFIG_ERROR, 3881 md_getminor(delta->dev)); 3882 return (ENXIO); 3883 } 3884 3885 shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx)) 3886 (sm->sm_dev, sm, ci); 3887 3888 ASSERT(shared); 3889 if (!shared) { 3890 (void) mdmderror(&rtxnp->mde, 3891 MDE_RENAME_CONFIG_ERROR, 3892 md_getminor(delta->dev)); 3893 return (ENXIO); 3894 } 3895 3896 if (shared->ms_hs_id != 0) { 3897 (void) mdmderror(&rtxnp->mde, 3898 MDE_SM_FAILED_COMPS, 3899 md_getminor(delta->dev)); 3900 return (EIO); 3901 } 3902 3903 switch (shared->ms_state) { 3904 case CS_OKAY: 3905 break; 3906 3907 case CS_RESYNC: 3908 (void) mdmderror(&rtxnp->mde, 3909 MDE_RESYNC_ACTIVE, 3910 md_getminor(delta->dev)); 3911 return (EBUSY); 3912 3913 default: 3914 (void) mdmderror(&rtxnp->mde, 3915 MDE_SM_FAILED_COMPS, 3916 md_getminor(delta->dev)); 3917 return (EINVAL); 3918 } 3919 3920 } 3921 } 3922 3923 /* self does additional checks */ 3924 if (delta->old_role == MDRR_SELF) { 3925 err = mirror_may_renexch_self(un, delta->uip, rtxnp); 3926 } 3927 3928 return (err); 3929 } 3930 3931 /* end of rename/exchange */ 3932