1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/conf.h> 30 #include <sys/file.h> 31 #include <sys/user.h> 32 #include <sys/uio.h> 33 #include <sys/t_lock.h> 34 #include <sys/buf.h> 35 #include <sys/dkio.h> 36 #include <sys/vtoc.h> 37 #include <sys/kmem.h> 38 #include <vm/page.h> 39 #include <sys/sysmacros.h> 40 #include <sys/types.h> 41 #include <sys/mkdev.h> 42 #include <sys/stat.h> 43 #include <sys/open.h> 44 #include <sys/lvm/mdvar.h> 45 #include <sys/lvm/md_stripe.h> 46 #include <sys/lvm/md_notify.h> 47 #include <sys/modctl.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/debug.h> 51 #include <sys/model.h> 52 53 #include <sys/sysevent/eventdefs.h> 54 #include <sys/sysevent/svm.h> 55 56 extern int md_status; 57 58 extern unit_t md_nunits; 59 extern set_t md_nsets; 60 extern md_set_t md_set[]; 61 62 extern md_ops_t stripe_md_ops; 63 extern md_krwlock_t md_unit_array_rw; 64 extern major_t md_major; 65 66 static int 67 stripe_replace(replace_params_t *params) 68 { 69 minor_t mnum = params->mnum; 70 ms_unit_t *un; 71 mddb_recid_t recids[6]; 72 ms_new_dev_t nd; 73 ms_cd_info_t cd; 74 int ci; 75 int cmpcnt; 76 void *repl_data; 77 md_dev64_t fake_devt; 78 void (*repl_done)(); 79 80 mdclrerror(¶ms->mde); 81 82 un = (ms_unit_t *)MD_UNIT(mnum); 83 84 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { 85 return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum)); 86 } 87 88 nd.nd_dev = params->new_dev; 89 nd.nd_key = params->new_key; 90 nd.nd_nblks = params->number_blks; 91 nd.nd_start_blk = params->start_blk; 92 nd.nd_labeled = params->has_label; 93 nd.nd_hs_id = 0; 94 95 /* 96 * stripe_component_count and stripe_get_dev only care about the 97 * minor number associated with the first argument which is a 98 * md_dev64_t 99 * 100 * The comments section for these two routines have been updated 101 * to indicate that this routine calls with fake major numbers. 102 */ 103 fake_devt = md_makedevice(0, mnum); 104 cmpcnt = stripe_component_count(fake_devt, NULL); 105 for (ci = 0; ci < cmpcnt; ci++) { 106 (void) stripe_get_dev(fake_devt, NULL, ci, &cd); 107 if ((cd.cd_dev == params->old_dev) || 108 (cd.cd_orig_dev == params->old_dev)) 109 break; 110 } 111 if (ci == cmpcnt) { 112 return (EINVAL); 113 } 114 115 /* In case of a dryrun we're done here */ 116 if (params->options & MDIOCTL_DRYRUN) { 117 return (0); 118 } 119 120 (void) stripe_replace_dev(fake_devt, 0, ci, &nd, recids, 6, 121 &repl_done, &repl_data); 122 mddb_commitrecs_wrapper(recids); 123 (*repl_done)(fake_devt, repl_data); 124 125 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE, 126 MD_UN2SET(un), MD_SID(un)); 127 return (0); 128 } 129 130 static int 131 stripe_set(void *d, int mode) 132 { 133 minor_t mnum; 134 ms_unit_t *un; 135 void *p; 136 mddb_recid_t ms_recid; 137 mddb_recid_t *recids; 138 mddb_type_t typ1; 139 int err; 140 set_t setno; 141 md_error_t *mdep; 142 struct ms_comp *mdcomp; 143 int row; 144 int rid; 145 int num_recs; 146 int i, c; 147 md_set_params_t *msp = d; 148 149 mnum = msp->mnum; 150 setno = MD_MIN2SET(mnum); 151 152 mdep = &msp->mde; 153 154 mdclrerror(mdep); 155 156 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { 157 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); 158 } 159 160 if (md_get_setstatus(setno) & MD_SET_STALE) 161 return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); 162 163 un = MD_UNIT(mnum); 164 if (un != NULL) { 165 return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum)); 166 } 167 168 169 typ1 = (mddb_type_t)md_getshared_key(setno, 170 stripe_md_ops.md_driver.md_drivername); 171 172 /* create the db record for this mdstruct */ 173 if (msp->options & MD_CRO_64BIT) { 174 #if defined(_ILP32) 175 return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum)); 176 #else 177 ms_recid = mddb_createrec((size_t)msp->size, typ1, 0, 178 MD_CRO_64BIT | MD_CRO_STRIPE | MD_CRO_FN, setno); 179 #endif 180 } else { 181 ms_recid = mddb_createrec((size_t)msp->size, typ1, 0, 182 MD_CRO_32BIT | MD_CRO_STRIPE | MD_CRO_FN, setno); 183 } 184 if (ms_recid < 0) 185 return (mddbstatus2error(mdep, ms_recid, mnum, setno)); 186 187 /* get the address of the mdstruct */ 188 p = (void *) mddb_getrecaddr(ms_recid); 189 /* 190 * It is okay that we muck with the mdstruct here, 191 * since no one else will know about the mdstruct 192 * until we commit it. If we crash, the record will 193 * be automatically purged, since we haven't 194 * committed it yet. 195 */ 196 197 /* copy in the user's mdstruct */ 198 if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, (caddr_t)p, 199 (size_t)msp->size, mode)) { 200 mddb_deleterec_wrapper(ms_recid); 201 return (EFAULT); 202 } 203 204 un = (ms_unit_t *)p; 205 206 /* All 64 bit metadevices only support EFI labels. */ 207 if (msp->options & MD_CRO_64BIT) { 208 un->c.un_flag |= MD_EFILABEL; 209 } 210 211 /* 212 * allocate the real recids array. since we may have to commit 213 * underlying metadevice records, we need an array 214 * of size: total number of components in stripe + 3 215 * (1 for the stripe itself, one for the hotspare, one 216 * for the end marker). 217 */ 218 num_recs = 3; 219 rid = 0; 220 for (row = 0; row < un->un_nrows; row++) { 221 struct ms_row *mdr = &un->un_row[row]; 222 num_recs += mdr->un_ncomp; 223 } 224 recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); 225 recids[rid++] = ms_recid; 226 227 MD_SID(un) = mnum; 228 MD_RECID(un) = recids[0]; 229 MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SUB_MIRROR | MD_CAN_SP; 230 MD_PARENT(un) = MD_NO_PARENT; 231 un->c.un_revision |= MD_FN_META_DEV; 232 233 if (err = stripe_build_incore(p, 0)) { 234 md_nblocks_set(mnum, -1ULL); 235 MD_UNIT(mnum) = NULL; 236 237 mddb_deleterec_wrapper(recids[0]); 238 kmem_free(recids, num_recs * sizeof (mddb_recid_t)); 239 return (err); 240 } 241 242 /* 243 * Update unit availability 244 */ 245 md_set[setno].s_un_avail--; 246 247 recids[rid] = 0; 248 if (un->un_hsp_id != -1) 249 err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0, 250 &recids[rid++], NULL, NULL, NULL); 251 252 253 if (err) { 254 md_nblocks_set(mnum, -1ULL); 255 MD_UNIT(mnum) = NULL; 256 257 mddb_deleterec_wrapper(recids[0]); 258 kmem_free(recids, num_recs * sizeof (mddb_recid_t)); 259 return (mdhsperror(mdep, MDE_INVAL_HSP, un->un_hsp_id)); 260 } 261 262 /* 263 * set the parent on any metadevice components. 264 * NOTE: currently soft partitions are the only metadevices 265 * which can appear within a stripe. 266 */ 267 mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]); 268 for (row = 0; row < un->un_nrows; row++) { 269 struct ms_row *mdr = &un->un_row[row]; 270 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 271 ms_comp_t *mdc = &mdcomp[c++]; 272 md_dev64_t comp_dev; 273 md_unit_t *comp_un; 274 275 comp_dev = mdc->un_dev; 276 if (md_getmajor(comp_dev) == md_major) { 277 /* set parent and disallow soft partitioning */ 278 comp_un = MD_UNIT(md_getminor(comp_dev)); 279 recids[rid++] = MD_RECID(comp_un); 280 md_set_parent(mdc->un_dev, MD_SID(un)); 281 } 282 } 283 } 284 285 /* set end marker */ 286 recids[rid] = 0; 287 mddb_commitrecs_wrapper(recids); 288 289 md_create_unit_incore(mnum, &stripe_md_ops, 0); 290 kmem_free(recids, (num_recs * sizeof (mddb_recid_t))); 291 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, 292 MD_UN2SET(un), MD_SID(un)); 293 return (0); 294 } 295 296 297 /*ARGSUSED*/ 298 static int 299 stripe_get(void *d, int mode, IOLOCK *lock) 300 { 301 minor_t mnum; 302 mdi_unit_t *ui; 303 ms_unit_t *un; 304 md_error_t *mdep; 305 md_i_get_t *migp = d; 306 307 308 mnum = migp->id; 309 mdep = &migp->mde; 310 311 mdclrerror(mdep); 312 313 if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) 314 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); 315 316 if ((ui = MDI_UNIT(mnum)) == NULL) { 317 return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); 318 } 319 320 un = (ms_unit_t *)md_ioctl_readerlock(lock, ui); 321 322 if (migp->size == 0) { 323 migp->size = un->c.un_size; 324 return (0); 325 } 326 327 if (migp->size < un->c.un_size) { 328 return (EFAULT); 329 } 330 331 if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, 332 un->c.un_size, mode)) 333 return (EFAULT); 334 return (0); 335 } 336 337 static int 338 stripe_reset(md_i_reset_t *mirp) 339 { 340 minor_t mnum = mirp->mnum; 341 ms_unit_t *un; 342 mdi_unit_t *ui; 343 set_t setno = MD_MIN2SET(mnum); 344 345 mdclrerror(&mirp->mde); 346 347 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) 348 return (mdmderror(&mirp->mde, MDE_INVAL_UNIT, mnum)); 349 350 if (md_get_setstatus(setno) & MD_SET_STALE) 351 return (mdmddberror(&mirp->mde, MDE_DB_STALE, mnum, setno)); 352 353 un = MD_UNIT(mnum); 354 if (un == NULL) { 355 return (mdmderror(&mirp->mde, MDE_UNIT_NOT_SETUP, mnum)); 356 } 357 358 /* This prevents new opens */ 359 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 360 361 if (MD_HAS_PARENT(un->c.un_parent)) { 362 rw_exit(&md_unit_array_rw.lock); 363 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); 364 } 365 366 /* single thread */ 367 ui = MDI_UNIT(mnum); 368 un = md_unit_openclose_enter(ui); 369 370 if (md_unit_isopen(ui)) { 371 md_unit_openclose_exit(ui); 372 rw_exit(&md_unit_array_rw.lock); 373 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); 374 } 375 376 md_unit_openclose_exit(ui); 377 reset_stripe(un, mnum, 1); 378 379 /* 380 * Update unit availability 381 */ 382 md_set[setno].s_un_avail++; 383 384 /* 385 * If MN set, reset s_un_next so all nodes can have 386 * the same view of the next available slot when 387 * nodes are -w and -j 388 */ 389 if (MD_MNSET_SETNO(setno)) { 390 (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum)); 391 } 392 393 rw_exit(&md_unit_array_rw.lock); 394 return (0); 395 } 396 397 static int 398 stripe_grow(void *d, int mode, IOLOCK *lockp) 399 { 400 minor_t mnum; 401 ms_unit_t *un, *new_un; 402 mdi_unit_t *ui; 403 minor_t *par = NULL; 404 IOLOCK *plock = NULL; 405 ms_comp_t *mdcomp, *new_comp; 406 int row, i, c; 407 mddb_recid_t ms_recid; 408 mddb_recid_t old_vtoc = 0; 409 mddb_recid_t *recids; 410 md_create_rec_option_t options; 411 mddb_type_t typ1; 412 int err; 413 int64_t tb, atb; 414 uint_t nr, oc; 415 int opened; 416 int rval = 0; 417 set_t setno; 418 md_error_t *mdep; 419 int npar; 420 int rid; 421 int num_recs; 422 u_longlong_t rev; 423 md_grow_params_t *mgp = d; 424 425 426 mnum = mgp->mnum; 427 mdep = &mgp->mde; 428 setno = MD_MIN2SET(mnum); 429 npar = mgp->npar; 430 431 mdclrerror(mdep); 432 433 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) 434 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); 435 436 if (md_get_setstatus(setno) & MD_SET_STALE) 437 return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); 438 439 ui = MDI_UNIT(mnum); 440 if (ui == NULL) { 441 return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); 442 } 443 444 if (npar >= 1) { 445 ASSERT((minor_t *)(uintptr_t)mgp->par != NULL); 446 par = kmem_alloc(npar * sizeof (*par), KM_SLEEP); 447 plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP); 448 if (ddi_copyin((caddr_t)(uintptr_t)mgp->par, (caddr_t)par, 449 (npar * sizeof (*par)), mode) != 0) { 450 kmem_free(par, npar * sizeof (*par)); 451 kmem_free(plock, npar * sizeof (*plock)); 452 return (EFAULT); 453 } 454 } 455 456 /* 457 * we grab unit reader/writer first, then parent locks, 458 * then our own. 459 * we expect parent units to be sorted to avoid deadlock 460 */ 461 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 462 for (i = 0; i < npar; ++i) { 463 (void) md_ioctl_writerlock(&plock[i], 464 MDI_UNIT(par[i])); 465 } 466 un = (ms_unit_t *)md_ioctl_writerlock(lockp, ui); 467 468 if (un->un_nrows != mgp->nrows) { 469 rval = EINVAL; 470 goto out; 471 } 472 473 typ1 = (mddb_type_t)md_getshared_key(setno, 474 stripe_md_ops.md_driver.md_drivername); 475 476 /* 477 * Preserve the friendly name nature of growing device. 478 */ 479 options = MD_CRO_STRIPE; 480 if (un->c.un_revision & MD_FN_META_DEV) 481 options |= MD_CRO_FN; 482 if (mgp->options & MD_CRO_64BIT) { 483 #if defined(_ILP32) 484 rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum); 485 goto out; 486 #else 487 ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0, 488 MD_CRO_64BIT | options, setno); 489 #endif 490 } else { 491 ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0, 492 MD_CRO_32BIT | options, setno); 493 } 494 495 496 if (ms_recid < 0) { 497 rval = mddbstatus2error(mdep, (int)ms_recid, mnum, setno); 498 goto out; 499 } 500 501 /* get the address of the new unit */ 502 new_un = (ms_unit_t *)mddb_getrecaddr(ms_recid); 503 504 /* 505 * It is okay that we muck with the new unit here, 506 * since no one else will know about the unit struct 507 * until we commit it. If we crash, the record will 508 * be automatically purged, since we haven't 509 * committed it yet and the old unit struct will be found. 510 */ 511 512 /* copy in the user's unit struct */ 513 err = ddi_copyin((caddr_t)(uintptr_t)mgp->mdp, (caddr_t)new_un, 514 (size_t)mgp->size, mode); 515 if (err) { 516 mddb_deleterec_wrapper(ms_recid); 517 rval = EFAULT; 518 goto out; 519 } 520 if (options & MD_CRO_FN) 521 new_un->c.un_revision |= MD_FN_META_DEV; 522 523 /* 524 * allocate the real recids array. since we may have to 525 * commit underlying metadevice records, we need an 526 * array of size: total number of new components being 527 * attached + 2 (one for the stripe itself, one for the 528 * end marker). 529 */ 530 num_recs = 2; 531 rid = 0; 532 for (row = 0; row < new_un->un_nrows; row++) { 533 struct ms_row *mdr = &new_un->un_row[row]; 534 num_recs += mdr->un_ncomp; 535 } 536 recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); 537 recids[rid++] = ms_recid; 538 539 /* 540 * Save a few of the new unit structs fields. 541 * Before they get clobbered. 542 */ 543 tb = new_un->c.un_total_blocks; 544 atb = new_un->c.un_actual_tb; 545 nr = new_un->un_nrows; 546 oc = new_un->un_ocomp; 547 rev = new_un->c.un_revision; 548 549 /* 550 * Copy the old unit struct (static stuff) 551 * into new unit struct 552 */ 553 bcopy((caddr_t)un, (caddr_t)new_un, 554 sizeof (ms_unit_t) + ((nr - 2) * (sizeof (struct ms_row)))); 555 556 /* 557 * Restore the saved stuff. 558 */ 559 new_un->c.un_total_blocks = tb; 560 md_nblocks_set(mnum, new_un->c.un_total_blocks); 561 new_un->c.un_actual_tb = atb; 562 new_un->un_nrows = nr; 563 new_un->un_ocomp = oc; 564 new_un->c.un_revision = rev; 565 566 new_un->c.un_record_id = ms_recid; 567 new_un->c.un_size = mgp->size; 568 569 /* All 64 bit metadevices only support EFI labels. */ 570 if (mgp->options & MD_CRO_64BIT) { 571 new_un->c.un_flag |= MD_EFILABEL; 572 /* 573 * If the device was previously smaller than a terabyte, 574 * and had a vtoc record attached to it, we remove the 575 * vtoc record, because the layout has changed completely. 576 */ 577 if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && 578 (un->c.un_vtoc_id != 0)) { 579 old_vtoc = un->c.un_vtoc_id; 580 new_un->c.un_vtoc_id = 581 md_vtoc_to_efi_record(old_vtoc, setno); 582 } 583 } 584 585 /* 586 * Copy the old component structs into the new unit struct. 587 */ 588 mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]); 589 new_comp = (ms_comp_t *)((void *)&((char *)new_un)[new_un->un_ocomp]); 590 for (row = 0; row < un->un_nrows; row++) { 591 struct ms_row *mdr = &un->un_row[row]; 592 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++, c++) { 593 bcopy((caddr_t)&mdcomp[c], (caddr_t)&new_comp[c], 594 sizeof (ms_comp_t)); 595 } 596 } 597 598 opened = md_unit_isopen(ui); 599 600 /* 601 * Set parent on metadevices being added. 602 * Open the new devices being added. 603 * NOTE: currently soft partitions are the only metadevices 604 * which can appear within a stripe. 605 */ 606 for (row = un->un_nrows; row < new_un->un_nrows; row++) { 607 struct ms_row *mdr = &new_un->un_row[row]; 608 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 609 struct ms_comp *mdc = &new_comp[c++]; 610 md_dev64_t comp_dev; 611 md_unit_t *comp_un; 612 613 comp_dev = mdc->un_dev; 614 /* set parent on any metadevices */ 615 if (md_getmajor(comp_dev) == md_major) { 616 comp_un = MD_UNIT(md_getminor(comp_dev)); 617 recids[rid++] = MD_RECID(comp_un); 618 md_set_parent(comp_dev, MD_SID(new_un)); 619 } 620 621 if (opened) { 622 md_dev64_t tmpdev = mdc->un_dev; 623 /* 624 * Open by device id 625 * Check if this comp is hotspared and 626 * if it is then use the key for hotspare 627 */ 628 tmpdev = md_resolve_bydevid(mnum, tmpdev, 629 mdc->un_mirror.ms_hs_id ? 630 mdc->un_mirror.ms_hs_key : mdc->un_key); 631 (void) md_layered_open(mnum, &tmpdev, 632 MD_OFLG_NULL); 633 mdc->un_dev = tmpdev; 634 mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; 635 } 636 } 637 } 638 639 /* set end marker */ 640 recids[rid] = 0; 641 /* commit new unit struct */ 642 mddb_commitrecs_wrapper(recids); 643 644 /* delete old unit struct */ 645 mddb_deleterec_wrapper(un->c.un_record_id); 646 647 /* place new unit in in-core array */ 648 md_nblocks_set(mnum, new_un->c.un_total_blocks); 649 MD_UNIT(mnum) = new_un; 650 651 /* 652 * If old_vtoc has a non zero value, we know: 653 * - This unit crossed the border from smaller to larger one TB 654 * - There was a vtoc record for the unit, 655 * - This vtoc record is no longer needed, because 656 * a new efi record has been created for this un. 657 */ 658 if (old_vtoc != 0) { 659 mddb_deleterec_wrapper(old_vtoc); 660 } 661 662 /* free recids array */ 663 kmem_free(recids, num_recs * sizeof (mddb_recid_t)); 664 665 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, 666 MD_UN2SET(new_un), MD_SID(new_un)); 667 668 /* release locks, return success */ 669 out: 670 for (i = npar - 1; (i >= 0); --i) 671 md_ioctl_writerexit(&plock[i]); 672 rw_exit(&md_unit_array_rw.lock); 673 if (plock != NULL) 674 kmem_free(plock, npar * sizeof (*plock)); 675 if (par != NULL) 676 kmem_free(par, npar * sizeof (*par)); 677 return (rval); 678 } 679 680 static int 681 stripe_get_geom( 682 ms_unit_t *un, 683 struct dk_geom *geomp 684 ) 685 { 686 md_get_geom((md_unit_t *)un, geomp); 687 688 return (0); 689 } 690 691 static int 692 stripe_get_vtoc( 693 ms_unit_t *un, 694 struct vtoc *vtocp 695 ) 696 { 697 md_get_vtoc((md_unit_t *)un, vtocp); 698 699 return (0); 700 } 701 702 static int 703 stripe_set_vtoc( 704 ms_unit_t *un, 705 struct vtoc *vtocp 706 ) 707 { 708 return (md_set_vtoc((md_unit_t *)un, vtocp)); 709 } 710 711 static int 712 stripe_get_extvtoc( 713 ms_unit_t *un, 714 struct extvtoc *vtocp 715 ) 716 { 717 md_get_extvtoc((md_unit_t *)un, vtocp); 718 719 return (0); 720 } 721 722 static int 723 stripe_set_extvtoc( 724 ms_unit_t *un, 725 struct extvtoc *vtocp 726 ) 727 { 728 return (md_set_extvtoc((md_unit_t *)un, vtocp)); 729 } 730 731 static int 732 stripe_get_cgapart( 733 ms_unit_t *un, 734 struct dk_map *dkmapp 735 ) 736 { 737 md_get_cgapart((md_unit_t *)un, dkmapp); 738 return (0); 739 } 740 741 static int 742 stripe_getdevs( 743 void *d, 744 int mode, 745 IOLOCK *lock 746 ) 747 { 748 minor_t mnum; 749 mdi_unit_t *ui; 750 ms_unit_t *un; 751 struct ms_row *mdr; 752 ms_comp_t *mdcomp, *mdc; 753 int r, c, i; 754 int cnt; 755 md_error_t *mdep; 756 md_dev64_t *devsp; 757 md_dev64_t unit_dev; 758 md_getdevs_params_t *mgdp = d; 759 760 761 mnum = mgdp->mnum; 762 mdep = &mgdp->mde; 763 764 /* check out unit */ 765 mdclrerror(mdep); 766 767 if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) 768 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); 769 770 if ((ui = MDI_UNIT(mnum)) == NULL) { 771 return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); 772 } 773 774 un = (ms_unit_t *)md_ioctl_readerlock(lock, ui); 775 776 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 777 devsp = (md_dev64_t *)(uintptr_t)mgdp->devs; 778 779 for (cnt = 0, r = 0; (r < un->un_nrows); ++r) { 780 mdr = &un->un_row[r]; 781 for (c = 0, i = mdr->un_icomp; (c < mdr->un_ncomp); ++c) { 782 mdc = &mdcomp[i++]; 783 if (cnt < mgdp->cnt) { 784 unit_dev = mdc->un_dev; 785 if (md_getmajor(unit_dev) != md_major) { 786 if ((unit_dev = md_xlate_mini_2_targ 787 (unit_dev)) == NODEV64) 788 return (ENODEV); 789 } 790 791 if (ddi_copyout((caddr_t)&unit_dev, devsp, 792 sizeof (*devsp), mode) != 0) 793 return (EFAULT); 794 ++devsp; 795 } 796 ++cnt; 797 } 798 } 799 mgdp->cnt = cnt; 800 return (0); 801 } 802 803 static int 804 stripe_change( 805 md_stripe_params_t *msp, 806 IOLOCK *lock 807 ) 808 { 809 ms_params_t *pp = &msp->params; 810 minor_t mnum = msp->mnum; 811 ms_unit_t *un; 812 mdi_unit_t *ui; 813 int r, c, i; 814 struct ms_row *mdr; 815 ms_comp_t *mdcomp, *mdc; 816 mddb_recid_t recids[4]; 817 int irecid; 818 int inc_new_hsp = 0; 819 int err; 820 set_t setno = MD_MIN2SET(mnum); 821 822 mdclrerror(&msp->mde); 823 824 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) 825 return (mdmderror(&msp->mde, MDE_INVAL_UNIT, mnum)); 826 827 if (md_get_setstatus(setno) & MD_SET_STALE) 828 return (mdmddberror(&msp->mde, MDE_DB_STALE, mnum, setno)); 829 830 if ((ui = MDI_UNIT(mnum)) == NULL) { 831 return (mdmderror(&msp->mde, MDE_UNIT_NOT_SETUP, mnum)); 832 } 833 834 if (!pp->change_hsp_id) 835 return (0); 836 837 un = (ms_unit_t *)md_ioctl_writerlock(lock, ui); 838 839 /* verify that no hot spares are in use */ 840 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 841 for (r = 0; r < un->un_nrows; r++) { 842 mdr = &un->un_row[r]; 843 for (c = 0, i = mdr->un_icomp; c < mdr->un_ncomp; c++) { 844 mdc = &mdcomp[i++]; 845 if (mdc->un_mirror.ms_hs_id != 0) { 846 return (mdmderror(&msp->mde, MDE_HS_IN_USE, 847 mnum)); 848 } 849 } 850 } 851 852 recids[1] = 0; 853 recids[2] = 0; 854 irecid = 1; 855 if (pp->hsp_id != -1) { 856 /* increment the reference count of the new hsp */ 857 err = md_hot_spare_ifc(HSP_INCREF, pp->hsp_id, 0, 0, 858 &recids[1], NULL, NULL, NULL); 859 if (err) { 860 return (mdhsperror(&msp->mde, MDE_INVAL_HSP, 861 pp->hsp_id)); 862 } 863 inc_new_hsp = 1; 864 irecid++; 865 } 866 867 if (un->un_hsp_id != -1) { 868 /* decrement the reference count of the old hsp */ 869 err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 870 &recids[irecid], NULL, NULL, NULL); 871 if (err) { 872 err = mdhsperror(&msp->mde, MDE_INVAL_HSP, 873 pp->hsp_id); 874 if (inc_new_hsp) { 875 (void) md_hot_spare_ifc(HSP_DECREF, 876 pp->hsp_id, 0, 0, 877 &recids[1], NULL, NULL, NULL); 878 /* 879 * Don't need to commit the record, 880 * cause it never got commit before 881 */ 882 } 883 return (err); 884 } 885 } 886 887 un->un_hsp_id = pp->hsp_id; 888 889 recids[0] = un->c.un_record_id; 890 recids[3] = 0; 891 mddb_commitrecs_wrapper(recids); 892 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE, 893 MD_UN2SET(un), MD_SID(un)); 894 895 return (0); 896 } 897 898 static int 899 stripe_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp) 900 { 901 size_t sz = 0; 902 void *d = NULL; 903 int err = 0; 904 905 /* We can only handle 32-bit clients for internal commands */ 906 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { 907 return (EINVAL); 908 } 909 910 /* handle ioctl */ 911 switch (cmd) { 912 913 case MD_IOCSET: 914 { 915 if (! (mode & FWRITE)) 916 return (EACCES); 917 918 sz = sizeof (struct md_set_params); 919 d = kmem_alloc(sz, KM_SLEEP); 920 921 if (ddi_copyin(data, d, sz, mode)) { 922 err = EFAULT; 923 break; 924 } 925 926 err = stripe_set(d, mode); 927 break; 928 } 929 930 case MD_IOCGET: 931 { 932 if (! (mode & FREAD)) 933 return (EACCES); 934 935 sz = sizeof (struct md_i_get); 936 d = kmem_alloc(sz, KM_SLEEP); 937 938 if (ddi_copyin(data, d, sz, mode)) { 939 err = EFAULT; 940 break; 941 } 942 943 err = stripe_get(d, mode, lockp); 944 break; 945 } 946 947 case MD_IOCRESET: 948 { 949 if (! (mode & FWRITE)) 950 return (EACCES); 951 952 sz = sizeof (md_i_reset_t); 953 d = kmem_alloc(sz, KM_SLEEP); 954 955 if (ddi_copyin(data, d, sz, mode)) { 956 err = EFAULT; 957 break; 958 } 959 960 err = stripe_reset((md_i_reset_t *)d); 961 break; 962 } 963 964 case MD_IOCGROW: 965 { 966 if (! (mode & FWRITE)) 967 return (EACCES); 968 969 sz = sizeof (struct md_grow_params); 970 d = kmem_alloc(sz, KM_SLEEP); 971 972 if (ddi_copyin(data, d, sz, mode)) { 973 err = EFAULT; 974 break; 975 } 976 977 err = stripe_grow(d, mode, lockp); 978 break; 979 } 980 981 case MD_IOCGET_DEVS: 982 { 983 if (! (mode & FREAD)) 984 return (EACCES); 985 986 sz = sizeof (struct md_getdevs_params); 987 d = kmem_alloc(sz, KM_SLEEP); 988 989 if (ddi_copyin(data, d, sz, mode)) { 990 err = EFAULT; 991 break; 992 } 993 994 err = stripe_getdevs(d, mode, lockp); 995 break; 996 } 997 998 case MD_IOCCHANGE: 999 { 1000 if (! (mode & FWRITE)) 1001 return (EACCES); 1002 1003 sz = sizeof (md_stripe_params_t); 1004 d = kmem_alloc(sz, KM_SLEEP); 1005 1006 if (ddi_copyin(data, d, sz, mode)) { 1007 err = EFAULT; 1008 break; 1009 } 1010 1011 err = stripe_change((md_stripe_params_t *)d, lockp); 1012 break; 1013 } 1014 1015 case MD_IOCREPLACE: 1016 { 1017 if (! (mode & FWRITE)) 1018 return (EACCES); 1019 1020 sz = sizeof (replace_params_t); 1021 d = kmem_alloc(sz, KM_SLEEP); 1022 1023 if (ddi_copyin(data, d, sz, mode)) { 1024 err = EFAULT; 1025 break; 1026 } 1027 1028 err = stripe_replace((replace_params_t *)d); 1029 break; 1030 } 1031 1032 case MD_IOCPROBE_DEV: 1033 { 1034 /* 1035 * Ignore the request since stripe is not 1036 * a type of 'redundant' metadevice 1037 */ 1038 break; 1039 } 1040 1041 default: 1042 return (ENOTTY); 1043 } 1044 1045 /* 1046 * copyout and free any args 1047 */ 1048 if (sz != 0) { 1049 if (err == 0) { 1050 if (ddi_copyout(d, data, sz, mode) != 0) { 1051 err = EFAULT; 1052 } 1053 } 1054 kmem_free(d, sz); 1055 } 1056 return (err); 1057 } 1058 1059 /* 1060 * The parameters of md_stripe_ioctl are defined by the ddi and so 1061 * dev is of type dev_t and not md_dev64_t 1062 */ 1063 int 1064 md_stripe_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp) 1065 { 1066 minor_t mnum = getminor(dev); 1067 ms_unit_t *un; 1068 int err = 0; 1069 1070 /* handle admin ioctls */ 1071 if (mnum == MD_ADM_MINOR) 1072 return (stripe_admin_ioctl(cmd, data, mode, lockp)); 1073 1074 /* check unit */ 1075 if ((MD_MIN2SET(mnum) >= md_nsets) || 1076 (MD_MIN2UNIT(mnum) >= md_nunits) || 1077 ((un = MD_UNIT(mnum)) == NULL)) 1078 return (ENXIO); 1079 1080 /* is this a supported ioctl? */ 1081 err = md_check_ioctl_against_unit(cmd, un->c); 1082 if (err != 0) { 1083 return (err); 1084 } 1085 1086 /* handle ioctl */ 1087 switch (cmd) { 1088 1089 case DKIOCINFO: 1090 { 1091 struct dk_cinfo *p; 1092 1093 if (! (mode & FREAD)) 1094 return (EACCES); 1095 1096 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1097 1098 get_info(p, mnum); 1099 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) 1100 err = EFAULT; 1101 1102 kmem_free(p, sizeof (*p)); 1103 return (err); 1104 } 1105 1106 case DKIOCGMEDIAINFO: 1107 { 1108 struct dk_minfo p; 1109 1110 if (! (mode & FREAD)) 1111 return (EACCES); 1112 1113 get_minfo(&p, mnum); 1114 if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0) 1115 err = EFAULT; 1116 1117 return (err); 1118 } 1119 1120 case DKIOCGGEOM: 1121 { 1122 struct dk_geom *p; 1123 1124 if (! (mode & FREAD)) 1125 return (EACCES); 1126 1127 p = kmem_alloc(sizeof (*p), KM_SLEEP); 1128 1129 if ((err = stripe_get_geom(un, p)) == 0) { 1130 if (ddi_copyout((caddr_t)p, data, sizeof (*p), 1131 mode) != 0) 1132 err = EFAULT; 1133 } 1134 1135 kmem_free(p, sizeof (*p)); 1136 return (err); 1137 } 1138 1139 case DKIOCGVTOC: 1140 { 1141 struct vtoc *vtoc; 1142 1143 if (! (mode & FREAD)) 1144 return (EACCES); 1145 1146 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); 1147 if ((err = stripe_get_vtoc(un, vtoc)) != 0) { 1148 kmem_free(vtoc, sizeof (*vtoc)); 1149 return (err); 1150 } 1151 1152 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 1153 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) 1154 err = EFAULT; 1155 } 1156 #ifdef _SYSCALL32 1157 else { 1158 struct vtoc32 *vtoc32; 1159 1160 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); 1161 1162 vtoctovtoc32((*vtoc), (*vtoc32)); 1163 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) 1164 err = EFAULT; 1165 kmem_free(vtoc32, sizeof (*vtoc32)); 1166 } 1167 #endif /* _SYSCALL32 */ 1168 1169 kmem_free(vtoc, sizeof (*vtoc)); 1170 return (err); 1171 } 1172 1173 case DKIOCSVTOC: 1174 { 1175 struct vtoc *vtoc; 1176 1177 if (! (mode & FWRITE)) 1178 return (EACCES); 1179 1180 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); 1181 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 1182 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { 1183 err = EFAULT; 1184 } 1185 } 1186 #ifdef _SYSCALL32 1187 else { 1188 struct vtoc32 *vtoc32; 1189 1190 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); 1191 1192 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { 1193 err = EFAULT; 1194 } else { 1195 vtoc32tovtoc((*vtoc32), (*vtoc)); 1196 } 1197 kmem_free(vtoc32, sizeof (*vtoc32)); 1198 } 1199 #endif /* _SYSCALL32 */ 1200 1201 if (err == 0) { 1202 err = stripe_set_vtoc(un, vtoc); 1203 } 1204 1205 kmem_free(vtoc, sizeof (*vtoc)); 1206 return (err); 1207 } 1208 1209 1210 case DKIOCGEXTVTOC: 1211 { 1212 struct extvtoc *extvtoc; 1213 1214 if (! (mode & FREAD)) 1215 return (EACCES); 1216 1217 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); 1218 if ((err = stripe_get_extvtoc(un, extvtoc)) != 0) { 1219 kmem_free(extvtoc, sizeof (*extvtoc)); 1220 return (err); 1221 } 1222 1223 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) 1224 err = EFAULT; 1225 1226 kmem_free(extvtoc, sizeof (*extvtoc)); 1227 return (err); 1228 } 1229 1230 case DKIOCSEXTVTOC: 1231 { 1232 struct extvtoc *extvtoc; 1233 1234 if (! (mode & FWRITE)) 1235 return (EACCES); 1236 1237 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); 1238 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { 1239 err = EFAULT; 1240 } 1241 1242 if (err == 0) { 1243 err = stripe_set_extvtoc(un, extvtoc); 1244 } 1245 1246 kmem_free(extvtoc, sizeof (*extvtoc)); 1247 return (err); 1248 } 1249 1250 case DKIOCGAPART: 1251 { 1252 struct dk_map dmp; 1253 1254 if ((err = stripe_get_cgapart(un, &dmp)) != 0) { 1255 return (err); 1256 } 1257 1258 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { 1259 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), 1260 mode) != 0) 1261 err = EFAULT; 1262 } 1263 #ifdef _SYSCALL32 1264 else { 1265 struct dk_map32 dmp32; 1266 1267 dmp32.dkl_cylno = dmp.dkl_cylno; 1268 dmp32.dkl_nblk = dmp.dkl_nblk; 1269 1270 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), 1271 mode) != 0) 1272 err = EFAULT; 1273 } 1274 #endif /* _SYSCALL32 */ 1275 1276 return (err); 1277 } 1278 case DKIOCGETEFI: 1279 { 1280 /* 1281 * This one can be done centralized, 1282 * no need to put in the same code for all types of metadevices 1283 */ 1284 return (md_dkiocgetefi(mnum, data, mode)); 1285 } 1286 case DKIOCSETEFI: 1287 { 1288 /* 1289 * This one can be done centralized, 1290 * no need to put in the same code for all types of metadevices 1291 */ 1292 return (md_dkiocsetefi(mnum, data, mode)); 1293 } 1294 case DKIOCPARTITION: 1295 { 1296 return (md_dkiocpartition(mnum, data, mode)); 1297 } 1298 1299 default: 1300 return (ENOTTY); 1301 } 1302 } 1303 1304 /* 1305 * rename named service entry points and support functions 1306 */ 1307 1308 /* 1309 * rename/exchange role swap functions are handled generically 1310 */ 1311 1312 /* 1313 * support routine for MDRNM_CHECK 1314 */ 1315 static int 1316 stripe_may_renexch_self( 1317 ms_unit_t *un, 1318 mdi_unit_t *ui, 1319 md_rentxn_t *rtxnp) 1320 { 1321 minor_t from_min; 1322 minor_t to_min; 1323 bool_t toplevel; 1324 bool_t related; 1325 1326 ASSERT(rtxnp); 1327 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); 1328 1329 from_min = rtxnp->from.mnum; 1330 to_min = rtxnp->to.mnum; 1331 1332 if (!un || !ui) { 1333 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1334 from_min); 1335 return (EINVAL); 1336 } 1337 1338 ASSERT(!(MD_CAPAB(un) & MD_CAN_META_CHILD)); 1339 if (MD_CAPAB(un) & MD_CAN_META_CHILD) { 1340 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); 1341 return (EINVAL); 1342 } 1343 1344 if (MD_PARENT(un) == MD_MULTI_PARENT) { 1345 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); 1346 return (EINVAL); 1347 } 1348 1349 toplevel = !MD_HAS_PARENT(MD_PARENT(un)); 1350 1351 /* we're related if trying to swap with our parent */ 1352 related = (!toplevel) && (MD_PARENT(un) == to_min); 1353 1354 switch (rtxnp->op) { 1355 case MDRNOP_EXCHANGE: 1356 1357 if (!related) { 1358 (void) mdmderror(&rtxnp->mde, 1359 MDE_RENAME_TARGET_UNRELATED, to_min); 1360 return (EINVAL); 1361 } 1362 1363 break; 1364 1365 case MDRNOP_RENAME: 1366 /* 1367 * if from is top-level and is open, then the kernel is using 1368 * the device and we return EBUSY. 1369 */ 1370 1371 if (toplevel && md_unit_isopen(ui)) { 1372 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, 1373 from_min); 1374 return (EBUSY); 1375 } 1376 break; 1377 1378 default: 1379 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, 1380 from_min); 1381 return (EINVAL); 1382 } 1383 1384 return (0); /* ok */ 1385 } 1386 1387 /* 1388 * Named service entry point: MDRNM_CHECK 1389 */ 1390 intptr_t 1391 stripe_rename_check( 1392 md_rendelta_t *delta, 1393 md_rentxn_t *rtxnp) 1394 { 1395 int err = 0; 1396 1397 ASSERT(delta); 1398 ASSERT(rtxnp); 1399 ASSERT(delta->unp); 1400 ASSERT(delta->uip); 1401 ASSERT((rtxnp->op == MDRNOP_RENAME) || (MDRNOP_EXCHANGE == rtxnp->op)); 1402 1403 if (!delta || !rtxnp || !delta->uip || !delta->unp) { 1404 (void) mdsyserror(&rtxnp->mde, EINVAL); 1405 return (EINVAL); 1406 } 1407 1408 /* self does additional checks */ 1409 if (delta->old_role == MDRR_SELF) { 1410 err = stripe_may_renexch_self((ms_unit_t *)delta->unp, 1411 delta->uip, rtxnp); 1412 } 1413 out: 1414 return (err); 1415 } 1416 /* end of rename/exchange */ 1417