Home | History | Annotate | Download | only in softpart
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Soft partitioning metadevice driver (md_sp), administrative routines.
     29  *
     30  * This file contains the administrative routines for the soft partitioning
     31  * metadevice driver.  All administration is done through the use of ioctl's.
     32  *
     33  * The primary ioctl's supported by soft partitions are as follows:
     34  *
     35  *	MD_IOCSET	- set up a new soft partition.
     36  *	MD_IOCGET	- get the unit structure of a soft partition.
     37  *	MD_IOCRESET	- delete a soft partition.
     38  *	MD_IOCGROW	- add space to a soft partition.
     39  *	MD_IOCGETDEVS	- get the device the soft partition is built on.
     40  *	MD_IOC_SPSTATUS	- set the status (un_status field in the soft
     41  *			  partition unit structure) for one or more soft
     42  *			  partitions.
     43  *
     44  * Note that, as with other metadevices, the majority of the work for
     45  * building/growing/deleting soft partitions is performed in userland
     46  * (specifically in libmeta, see meta_sp.c).  The driver's main administrative
     47  * function is to maintain the in-core & metadb entries associated with a soft
     48  * partition.
     49  *
     50  * In addition, a few other ioctl's are supported via helper routines in
     51  * the md driver.  These are:
     52  *
     53  *	DKIOCINFO	- get "disk" information.
     54  *	DKIOCGEOM	- get geometry information.
     55  *	DKIOCGVTOC	- get vtoc information.
     56  */
     57 #include <sys/param.h>
     58 #include <sys/systm.h>
     59 #include <sys/conf.h>
     60 #include <sys/file.h>
     61 #include <sys/user.h>
     62 #include <sys/uio.h>
     63 #include <sys/t_lock.h>
     64 #include <sys/buf.h>
     65 #include <sys/dkio.h>
     66 #include <sys/vtoc.h>
     67 #include <sys/kmem.h>
     68 #include <vm/page.h>
     69 #include <sys/sysmacros.h>
     70 #include <sys/types.h>
     71 #include <sys/mkdev.h>
     72 #include <sys/stat.h>
     73 #include <sys/open.h>
     74 #include <sys/lvm/mdvar.h>
     75 #include <sys/lvm/md_sp.h>
     76 #include <sys/lvm/md_notify.h>
     77 #include <sys/modctl.h>
     78 #include <sys/ddi.h>
     79 #include <sys/sunddi.h>
     80 #include <sys/debug.h>
     81 #include <sys/model.h>
     82 
     83 #include <sys/sysevent/eventdefs.h>
     84 #include <sys/sysevent/svm.h>
     85 
     86 extern int		md_status;
     87 
     88 extern unit_t		md_nunits;
     89 extern set_t		md_nsets;
     90 extern md_set_t		md_set[];
     91 
     92 extern md_ops_t		sp_md_ops;
     93 extern md_krwlock_t	md_unit_array_rw;
     94 extern major_t		md_major;
     95 
     96 /*
     97  * FUNCTION:	sp_getun()
     98  * INPUT:	mnum	- minor number of soft partition to get.
     99  * OUTPUT:	mde	- return error pointer.
    100  * RETURNS:	mp_unit_t *	- ptr to unit structure requested
    101  *		NULL		- error
    102  * PURPOSE:	Returns a reference to the soft partition unit structure
    103  *		indicated by the passed-in minor number.
    104  */
    105 static mp_unit_t *
    106 sp_getun(minor_t mnum, md_error_t *mde)
    107 {
    108 	mp_unit_t	*un;
    109 	mdi_unit_t	*ui;
    110 	set_t		setno = MD_MIN2SET(mnum);
    111 
    112 	/* check set */
    113 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
    114 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
    115 		return (NULL);
    116 	}
    117 
    118 	if (md_get_setstatus(setno) & MD_SET_STALE) {
    119 		(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
    120 		return (NULL);
    121 	}
    122 
    123 	ui = MDI_UNIT(mnum);
    124 
    125 	if (ui == NULL) {
    126 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
    127 		return (NULL);
    128 	}
    129 
    130 	un = (mp_unit_t *)MD_UNIT(mnum);
    131 
    132 	if (un->c.un_type != MD_METASP) {
    133 		(void) mdmderror(mde, MDE_NOT_SP, mnum);
    134 		return (NULL);
    135 	}
    136 
    137 	return (un);
    138 }
    139 
    140 
    141 /*
    142  * FUNCTION:	sp_setstatus()
    143  * INPUT:	d	- data ptr passed in from ioctl.
    144  *		mode	- pass-through to ddi_copyin.
    145  *		lockp	- lock ptr.
    146  * OUTPUT:	none.
    147  * RETURNS:	0		- success.
    148  *		non-zero	- error.
    149  * PURPOSE:	Set the status of one or more soft partitions atomically.
    150  *		this implements the MD_IOC_SPSTATUS ioctl.  Soft partitions
    151  *		are passed in as an array of minor numbers.  The un_status
    152  *		field in the unit structure of each soft partition is set to
    153  *		the status passed in and all unit structures are recommitted
    154  *		to the metadb at once.
    155  */
    156 static int
    157 sp_setstatus(void *d, int mode, IOLOCK *lockp)
    158 {
    159 	minor_t		*minors;
    160 	mp_unit_t	*un;
    161 	mddb_recid_t	*recids;
    162 	int		i, nunits, sz;
    163 	int		err = 0;
    164 	sp_status_t	status;
    165 	md_error_t	*mdep;
    166 
    167 	md_sp_statusset_t	*msp = (md_sp_statusset_t *)d;
    168 
    169 	nunits = msp->num_units;
    170 	sz = msp->size;
    171 	status = msp->new_status;
    172 	mdep = &msp->mde;
    173 
    174 	mdclrerror(mdep);
    175 	/* allocate minor number and recids arrays */
    176 	minors = kmem_alloc(sz, KM_SLEEP);
    177 	recids = kmem_alloc((nunits + 1) * sizeof (mddb_recid_t), KM_SLEEP);
    178 
    179 	/* copyin minor number array */
    180 	if (err = ddi_copyin((void *)(uintptr_t)msp->minors, minors, sz, mode))
    181 		goto out;
    182 
    183 	/* check to make sure all units are valid first */
    184 	for (i = 0; i < nunits; i++) {
    185 		if ((un = sp_getun(minors[i], mdep)) == NULL) {
    186 			err = mdmderror(mdep, MDE_INVAL_UNIT, minors[i]);
    187 			goto out;
    188 		}
    189 	}
    190 
    191 	/* update state for all units */
    192 	for (i = 0; i < nunits; i++) {
    193 		un = sp_getun(minors[i], mdep);
    194 		(void) md_ioctl_writerlock(lockp, MDI_UNIT(minors[i]));
    195 		un->un_status = status;
    196 		recids[i] = un->c.un_record_id;
    197 		md_ioctl_writerexit(lockp);
    198 	}
    199 
    200 	recids[i] = 0;
    201 	mddb_commitrecs_wrapper(recids);
    202 
    203 out:
    204 	kmem_free(minors, sz);
    205 	kmem_free(recids, ((nunits + 1) * sizeof (mddb_recid_t)));
    206 	return (err);
    207 }
    208 
    209 
    210 /*
    211  * FUNCTION:	sp_update_watermarks()
    212  * INPUT:	d	- data ptr passed in from ioctl.
    213  *		mode	- pass-through to ddi_copyin.
    214  * OUTPUT:	none.
    215  * RETURNS:	0		- success.
    216  *		non-zero	- error.
    217  * PURPOSE:	This implements the MD_IOC_SPUPDATEWM ioctl.
    218  *              Watermarks are passed in an array.
    219  */
    220 static int
    221 sp_update_watermarks(void *d, int mode)
    222 {
    223 	minor_t			mnum;
    224 	set_t			setno;
    225 	md_error_t		*mdep;
    226 	mp_unit_t		*un;
    227 	int			err = 0;
    228 	size_t			wsz;
    229 	size_t			osz;
    230 	mp_watermark_t		*watermarks;
    231 	sp_ext_offset_t		*offsets;
    232 	md_dev64_t		device;
    233 	buf_t			*bp;
    234 	int			i;
    235 	md_sp_update_wm_t	*mup = (md_sp_update_wm_t *)d;
    236 	side_t			side;
    237 
    238 	mnum = mup->mnum;
    239 	setno = MD_MIN2SET(mnum);
    240 	side = mddb_getsidenum(setno);
    241 	un = MD_UNIT(mnum);
    242 
    243 	if (un == NULL)
    244 		return (EFAULT);
    245 
    246 	mdep = &mup->mde;
    247 
    248 	mdclrerror(mdep);
    249 
    250 	/* Validate the set */
    251 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
    252 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
    253 	if (md_get_setstatus(setno) & MD_SET_STALE)
    254 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
    255 
    256 	wsz = mup->count * sizeof (mp_watermark_t);
    257 	watermarks = kmem_alloc(wsz, KM_SLEEP);
    258 
    259 	osz = mup->count * sizeof (sp_ext_offset_t);
    260 	offsets = kmem_alloc(osz, KM_SLEEP);
    261 
    262 	/*
    263 	 * Once we're here, we are no longer stateless: we cannot
    264 	 * return without first freeing the watermarks and offset
    265 	 * arrays we just allocated.  So use the "out" label instead
    266 	 * of "return."
    267 	 */
    268 
    269 	/* Retrieve the watermark and offset arrays from user land */
    270 
    271 	if (ddi_copyin((void *)(uintptr_t)mup->wmp, watermarks, wsz, mode)) {
    272 		err = EFAULT;
    273 		goto out;
    274 	}
    275 
    276 	if (ddi_copyin((void *)(uintptr_t)mup->osp, offsets, osz, mode)) {
    277 		err = EFAULT;
    278 		goto out;
    279 	}
    280 
    281 	/*
    282 	 * NOTE: For multi-node sets we only commit the watermarks if we are
    283 	 * the master node. This avoids an ioctl-within-ioctl deadlock if the
    284 	 * underlying device is a mirror.
    285 	 */
    286 	if (MD_MNSET_SETNO(setno) && !md_set[setno].s_am_i_master) {
    287 		goto out;
    288 	}
    289 
    290 	device = un->un_dev;
    291 	if ((md_getmajor(device) != md_major) &&
    292 	    (md_devid_found(setno, side, un->un_key) == 1)) {
    293 		device = md_resolve_bydevid(mnum, device, un->un_key);
    294 	}
    295 	/*
    296 	 * Flag the fact that we're coming from an ioctl handler to the
    297 	 * underlying device so that it can take appropriate action if needed.
    298 	 * This is necessary for multi-owner mirrors as they may need to
    299 	 * update the metadevice state as a result of the layered open.
    300 	 */
    301 	if (md_layered_open(mnum, &device, MD_OFLG_FROMIOCTL)) {
    302 		err = mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
    303 		    mnum, device);
    304 		goto out;
    305 	}
    306 
    307 	bp = kmem_alloc(biosize(), KM_SLEEP);
    308 	bioinit(bp);
    309 
    310 	for (i = 0; i < mup->count; i++) {
    311 
    312 		/*
    313 		 * Even the "constant" fields should be initialized
    314 		 * here, since bioreset() below will clear them.
    315 		 */
    316 		bp->b_flags = B_WRITE;
    317 		bp->b_bcount = sizeof (mp_watermark_t);
    318 		bp->b_bufsize = sizeof (mp_watermark_t);
    319 		bp->b_un.b_addr = (caddr_t)&watermarks[i];
    320 		bp->b_lblkno = offsets[i];
    321 		bp->b_edev = md_dev64_to_dev(device);
    322 
    323 		/*
    324 		 * For MN sets only:
    325 		 * Use a special flag MD_STR_WMUPDATE, for the following case:
    326 		 * If the watermarks reside on a mirror disk and a switch
    327 		 * of ownership is triggered by this IO,
    328 		 * the message that is generated by that request must be
    329 		 * processed even if the commd subsystem is currently suspended.
    330 		 *
    331 		 * For non-MN sets or non-mirror metadevices,
    332 		 * this flag has no meaning and is not checked.
    333 		 */
    334 
    335 		md_call_strategy(bp, MD_NOBLOCK | MD_STR_WMUPDATE, NULL);
    336 
    337 		if (biowait(bp)) {
    338 			err = mdmderror(mdep,
    339 			    MDE_SP_BADWMWRITE, mnum);
    340 			break;
    341 		}
    342 
    343 		/* Get the buf_t ready for the next iteration */
    344 		bioreset(bp);
    345 	}
    346 
    347 	biofini(bp);
    348 	kmem_free(bp, biosize());
    349 
    350 	md_layered_close(device, MD_OFLG_NULL);
    351 
    352 out:
    353 	kmem_free(watermarks, wsz);
    354 	kmem_free(offsets, osz);
    355 
    356 	return (err);
    357 }
    358 
    359 
    360 /*
    361  * FUNCTION:	sp_read_watermark()
    362  * INPUT:	d	- data ptr passed in from ioctl.
    363  *		mode	- pass-through to ddi_copyin.
    364  * OUTPUT:	none.
    365  * RETURNS:	0		- success.
    366  *		non-zero	- error.
    367  * PURPOSE:	This implements the MD_IOC_SPREADWM ioctl.
    368  */
    369 static int
    370 sp_read_watermark(void *d, int mode)
    371 {
    372 	md_error_t		*mdep;
    373 	mp_watermark_t		watermark;
    374 	md_dev64_t		device;
    375 	buf_t			*bp;
    376 	md_sp_read_wm_t		*mrp = (md_sp_read_wm_t *)d;
    377 
    378 	mdep = &mrp->mde;
    379 
    380 	mdclrerror(mdep);
    381 
    382 	device = mrp->rdev;
    383 
    384 	/*
    385 	 * Flag the fact that we are being called from ioctl context so that
    386 	 * the underlying device can take any necessary extra steps to handle
    387 	 * this scenario.
    388 	 */
    389 	if (md_layered_open((minor_t)-1, &device, MD_OFLG_FROMIOCTL)) {
    390 		return (mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
    391 		    (minor_t)NODEV, device));
    392 	}
    393 
    394 	bp = kmem_alloc(biosize(), KM_SLEEP);
    395 	bioinit(bp);
    396 
    397 	bp->b_flags = B_READ;
    398 	bp->b_bcount = sizeof (mp_watermark_t);
    399 	bp->b_bufsize = sizeof (mp_watermark_t);
    400 	bp->b_un.b_addr = (caddr_t)&watermark;
    401 	bp->b_lblkno = mrp->offset;
    402 	bp->b_edev = md_dev64_to_dev(device);
    403 
    404 	md_call_strategy(bp, MD_NOBLOCK, NULL);
    405 
    406 	if (biowait(bp)) {
    407 		/*
    408 		 * Taking advantage of the knowledge that mdmderror()
    409 		 * returns 0, so we don't really need to keep track of
    410 		 * an error code other than in the error struct.
    411 		 */
    412 		(void) mdmderror(mdep, MDE_SP_BADWMREAD,
    413 		    getminor(device));
    414 	}
    415 
    416 	biofini(bp);
    417 	kmem_free(bp, biosize());
    418 
    419 	md_layered_close(device, MD_OFLG_NULL);
    420 
    421 	if (ddi_copyout(&watermark, (void *)(uintptr_t)mrp->wmp,
    422 	    sizeof (mp_watermark_t), mode)) {
    423 		return (EFAULT);
    424 	}
    425 
    426 	return (0);
    427 }
    428 
    429 
    430 /*
    431  * FUNCTION:	sp_set()
    432  * INPUT:	d	- data ptr passed in from ioctl.
    433  *		mode	- pass-through to ddi_copyin.
    434  * OUTPUT:	none.
    435  * RETURNS:	0		- success.
    436  *		non-zero	- error.
    437  * PURPOSE:	Create a soft partition.  The unit structure representing
    438  *		the soft partiton is passed down from userland.  We allocate
    439  *		a metadb entry, copyin the unit the structure, handle any
    440  *		metadevice parenting issues, then commit the record to the
    441  *		metadb.  Once the record is in the metadb, we must also
    442  *		build the associated in-core structures.  This is done via
    443  *		sp_build_incore() (see sp.c).
    444  */
    445 static int
    446 sp_set(void *d, int mode)
    447 {
    448 	minor_t		mnum;
    449 	mp_unit_t	*un;
    450 	void		*rec_addr;
    451 	mddb_recid_t	recids[3];
    452 	mddb_type_t	rec_type;
    453 	int		err;
    454 	set_t		setno;
    455 	md_error_t	*mdep;
    456 	md_unit_t	*child_un;
    457 	md_set_params_t *msp = (md_set_params_t *)d;
    458 
    459 	mnum = msp->mnum;
    460 	setno = MD_MIN2SET(mnum);
    461 	mdep = &msp->mde;
    462 
    463 	mdclrerror(mdep);
    464 
    465 	/* validate set */
    466 
    467 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
    468 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
    469 	if (md_get_setstatus(setno) & MD_SET_STALE)
    470 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
    471 
    472 	/* get the record type */
    473 	rec_type = (mddb_type_t)md_getshared_key(setno,
    474 	    sp_md_ops.md_driver.md_drivername);
    475 
    476 	/* check if there is already a device with this minor number */
    477 	un = MD_UNIT(mnum);
    478 	if (un != NULL)
    479 		return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum));
    480 
    481 	/* create the db record for this soft partition */
    482 
    483 	if (msp->options & MD_CRO_64BIT) {
    484 #if defined(_ILP32)
    485 		return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum));
    486 #else
    487 		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
    488 		    MD_CRO_64BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
    489 #endif
    490 	} else {
    491 		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
    492 		    MD_CRO_32BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
    493 	}
    494 	/* set initial value for possible child record */
    495 	recids[1] = 0;
    496 	if (recids[0] < 0)
    497 		return (mddbstatus2error(mdep, recids[0], mnum, setno));
    498 
    499 	/* get the address of the soft partition db record */
    500 	rec_addr = (void *) mddb_getrecaddr(recids[0]);
    501 
    502 	/*
    503 	 * at this point we can happily mess with the soft partition
    504 	 * db record since we haven't committed it to the metadb yet.
    505 	 * if we crash before we commit, the uncommitted record will be
    506 	 * automatically purged.
    507 	 */
    508 
    509 	/* copy in the user's soft partition unit struct */
    510 	if (err = ddi_copyin((void *)(uintptr_t)msp->mdp,
    511 	    rec_addr, (size_t)msp->size, mode)) {
    512 		mddb_deleterec_wrapper(recids[0]);
    513 		return (EFAULT);
    514 	}
    515 
    516 	/* fill in common unit structure fields which aren't set in userland */
    517 	un = (mp_unit_t *)rec_addr;
    518 
    519 	/* All 64 bit metadevices only support EFI labels. */
    520 	if (msp->options & MD_CRO_64BIT) {
    521 		un->c.un_flag |= MD_EFILABEL;
    522 	}
    523 
    524 	MD_SID(un) = mnum;
    525 	MD_RECID(un) = recids[0];
    526 	MD_PARENT(un) = MD_NO_PARENT;
    527 	un->c.un_revision |= MD_FN_META_DEV;
    528 
    529 	/* if we are parenting a metadevice, set our child's parent field */
    530 	if (md_getmajor(un->un_dev) == md_major) {
    531 		/* it's a metadevice, need to parent it */
    532 		child_un = MD_UNIT(md_getminor(un->un_dev));
    533 		if (child_un == NULL) {
    534 			mddb_deleterec_wrapper(recids[0]);
    535 			return (mdmderror(mdep, MDE_INVAL_UNIT,
    536 			    md_getminor(un->un_dev)));
    537 		}
    538 		md_set_parent(un->un_dev, MD_SID(un));
    539 
    540 		/* set child recid and recids end marker */
    541 		recids[1] = MD_RECID(child_un);
    542 		recids[2] = 0;
    543 	}
    544 
    545 	/*
    546 	 * build the incore structures.
    547 	 */
    548 	if (err = sp_build_incore(rec_addr, 0)) {
    549 		md_nblocks_set(mnum, -1ULL);
    550 		MD_UNIT(mnum) = NULL;
    551 
    552 		mddb_deleterec_wrapper(recids[0]);
    553 		return (err);
    554 	}
    555 
    556 	/*
    557 	 * Update unit availability
    558 	 */
    559 	md_set[setno].s_un_avail--;
    560 
    561 	/*
    562 	 * commit the record.
    563 	 * if we had to update a child record, it will get commited
    564 	 * as well.
    565 	 */
    566 	mddb_commitrecs_wrapper(recids);
    567 
    568 	/* create the mdi_unit struct for this soft partition */
    569 	md_create_unit_incore(mnum, &sp_md_ops, 0);
    570 
    571 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, TAG_METADEVICE, MD_UN2SET(un),
    572 	    MD_SID(un));
    573 	return (0);
    574 }
    575 
    576 
    577 /*
    578  * FUNCTION:	sp_get()
    579  * INPUT:	d	- data ptr.
    580  *		mode	- pass-through to ddi_copyout.
    581  *		lock	- lock ptr.
    582  * OUTPUT:	none.
    583  * RETURNS:	0		- success.
    584  *		non-zero	- error.
    585  * PURPOSE:	Get the soft partition unit structure specified by the
    586  *		minor number.  the in-core unit structure is obtained
    587  *		and copied into the md_i_get structure passed down from
    588  *		userland.
    589  */
    590 static int
    591 sp_get(void *d, int mode, IOLOCK *lock)
    592 {
    593 	minor_t		mnum;
    594 	mdi_unit_t	*ui;
    595 	mp_unit_t	*un;
    596 	md_error_t	*mdep;
    597 	md_i_get_t	*migp = d;
    598 
    599 
    600 	mnum = migp->id;
    601 	mdep = &migp->mde;
    602 
    603 	mdclrerror(mdep);
    604 
    605 	/* make sure this is a valid unit structure */
    606 	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
    607 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
    608 
    609 	/* get the mdi_unit */
    610 	if ((ui = MDI_UNIT(mnum)) == NULL) {
    611 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
    612 	}
    613 
    614 	/*
    615 	 * md_ioctl_readerlock returns a reference to the in-core
    616 	 * unit structure.  this lock will be dropped by
    617 	 * md_ioctl_lock_exit() before the ioctl returns.
    618 	 */
    619 	un = (mp_unit_t *)md_ioctl_readerlock(lock, ui);
    620 
    621 	/* verify the md_i_get structure */
    622 	if (migp->size == 0) {
    623 		migp->size = un->c.un_size;
    624 		return (0);
    625 	}
    626 	if (migp->size < un->c.un_size) {
    627 		return (EFAULT);
    628 	}
    629 
    630 	/* copyout unit */
    631 	if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp,
    632 	    un->c.un_size, mode))
    633 		return (EFAULT);
    634 	return (0);
    635 }
    636 
    637 
    638 /*
    639  * FUNCTION:	sp_reset()
    640  * INPUT:	reset_params	- soft partitioning reset parameters.
    641  * OUTPUT:	none.
    642  * RETURNS:	0		- success.
    643  *		non-zero	- error.
    644  * PURPOSE:	Do the setup work needed to delete a soft partition.
    645  *		note that the actual removal of both in-core and metadb
    646  *		structures is done in the reset_sp() routine (see sp.c).
    647  *		In addition, since multiple soft partitions may exist
    648  *		on top of a single metadevice, the soft partition reset
    649  *		parameters (md_sp_reset_t) contains information about
    650  *		how the soft partition should deparent/reparent the
    651  *		underlying metadevice.  If the underlying metadevice is
    652  *		to be deparented, the new_parent field will be MD_NO_PARENT,
    653  *		otherwise it will be contain the minor number of another
    654  *		soft partition built on top of the underlying metadevice.
    655  */
    656 static int
    657 sp_reset(md_sp_reset_t *softp)
    658 {
    659 	minor_t		mnum = softp->mnum;
    660 	mdi_unit_t	*ui;
    661 	mp_unit_t	*un;
    662 	md_unit_t	*child_un;
    663 	set_t		setno = MD_MIN2SET(mnum);
    664 
    665 	mdclrerror(&softp->mde);
    666 
    667 	/* get the unit structure */
    668 	if ((un = sp_getun(mnum, &softp->mde)) == NULL) {
    669 		return (mdmderror(&softp->mde, MDE_INVAL_UNIT, mnum));
    670 	}
    671 
    672 	/* don't delete if we have a parent */
    673 	if (MD_HAS_PARENT(un->c.un_parent)) {
    674 		return (mdmderror(&softp->mde, MDE_IN_USE, mnum));
    675 	}
    676 
    677 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
    678 
    679 	ui = MDI_UNIT(mnum);
    680 	(void) md_unit_openclose_enter(ui);
    681 
    682 	/* don't delete if we are currently open */
    683 	if (md_unit_isopen(ui)) {
    684 		md_unit_openclose_exit(ui);
    685 		rw_exit(&md_unit_array_rw.lock);
    686 		return (mdmderror(&softp->mde, MDE_IS_OPEN, mnum));
    687 	}
    688 
    689 	md_unit_openclose_exit(ui);
    690 
    691 	/*
    692 	 * if we are built on metadevice, we need to deparent
    693 	 * or reparent that metadevice.
    694 	 */
    695 	if (md_getmajor(un->un_dev) == md_major) {
    696 		child_un = MD_UNIT(md_getminor(un->un_dev));
    697 		md_set_parent(un->un_dev, softp->new_parent);
    698 		mddb_commitrec_wrapper(MD_RECID(child_un));
    699 	}
    700 	/* remove the soft partition */
    701 	reset_sp(un, mnum, 1);
    702 
    703 	/*
    704 	 * Update unit availability
    705 	 */
    706 	md_set[setno].s_un_avail++;
    707 
    708 	/*
    709 	 * If MN set, reset s_un_next so all nodes can have
    710 	 * the same view of the next available slot when
    711 	 * nodes are -w and -j
    712 	 */
    713 	if (MD_MNSET_SETNO(setno)) {
    714 		md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
    715 	}
    716 
    717 	/* release locks and return */
    718 out:
    719 	rw_exit(&md_unit_array_rw.lock);
    720 	return (0);
    721 }
    722 
    723 
    724 /*
    725  * FUNCTION:	sp_grow()
    726  * INPUT:	d	- data ptr.
    727  *		mode	- pass-through to ddi_copyin.
    728  *		lockp	- lock ptr.
    729  * OUTPUT:	none.
    730  * RETURNS:	0		- success.
    731  *		non-zero	- error.
    732  * PURPOSE:	Attach more space to a soft partition.  We are passed in
    733  *		a new unit structure with the new extents and other updated
    734  *		information.  The new unit structure essentially replaces
    735  *		the old unit for this soft partition.  We place the new
    736  *		unit into the metadb, delete the old metadb record, and
    737  *		then update the in-core unit structure array to point to
    738  *		the new unit.
    739  */
    740 static int
    741 sp_grow(void *d, int mode, IOLOCK *lockp)
    742 {
    743 	minor_t		mnum;
    744 	mp_unit_t	*un, *new_un;
    745 	mdi_unit_t	*ui;
    746 	minor_t		*par = NULL;
    747 	IOLOCK		*plock = NULL;
    748 	int		i;
    749 	mddb_recid_t	recid;
    750 	mddb_type_t	rec_type;
    751 	mddb_recid_t	old_vtoc = 0;
    752 	md_create_rec_option_t options;
    753 	int		err;
    754 	int		rval = 0;
    755 	set_t		setno;
    756 	md_error_t	*mdep;
    757 	int		npar;
    758 	md_grow_params_t *mgp = (md_grow_params_t *)d;
    759 
    760 	mnum = mgp->mnum;
    761 	mdep = &mgp->mde;
    762 	setno = MD_MIN2SET(mnum);
    763 	npar = mgp->npar;
    764 
    765 	mdclrerror(mdep);
    766 
    767 	/* validate set */
    768 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
    769 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
    770 	if (md_get_setstatus(setno) & MD_SET_STALE)
    771 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
    772 
    773 	/* make sure this soft partition already exists */
    774 	ui = MDI_UNIT(mnum);
    775 	if (ui == NULL)
    776 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
    777 
    778 	/* handle any parents */
    779 	if (npar >= 1) {
    780 		ASSERT((minor_t *)(uintptr_t)mgp->par != NULL);
    781 		par = kmem_alloc(npar * sizeof (*par), KM_SLEEP);
    782 		plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP);
    783 		if (ddi_copyin((void *)(uintptr_t)mgp->par, par,
    784 		    (npar * sizeof (*par)), mode) != 0) {
    785 			kmem_free(par, npar * sizeof (*par));
    786 			kmem_free(plock, npar * sizeof (*plock));
    787 			return (EFAULT);
    788 		}
    789 	}
    790 
    791 	/*
    792 	 * handle parent locking.  grab the unit writer lock,
    793 	 * then all parent ioctl locks, and then finally our own.
    794 	 * parents should be sorted to avoid deadlock.
    795 	 */
    796 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
    797 	for (i = 0; i < npar; ++i) {
    798 		(void) md_ioctl_writerlock(&plock[i],
    799 		    MDI_UNIT(par[i]));
    800 	}
    801 	un = (mp_unit_t *)md_ioctl_writerlock(lockp, ui);
    802 
    803 	rec_type = (mddb_type_t)md_getshared_key(setno,
    804 	    sp_md_ops.md_driver.md_drivername);
    805 
    806 	/*
    807 	 * Preserve the friendly name nature of the unit that is growing.
    808 	 */
    809 	options = MD_CRO_SOFTPART;
    810 	if (un->c.un_revision & MD_FN_META_DEV)
    811 		options |= MD_CRO_FN;
    812 	if (mgp->options & MD_CRO_64BIT) {
    813 #if defined(_ILP32)
    814 		rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum);
    815 		goto out;
    816 #else
    817 		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
    818 		    MD_CRO_64BIT | options, setno);
    819 #endif
    820 	} else {
    821 		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
    822 		    MD_CRO_32BIT | options, setno);
    823 	}
    824 	if (recid < 0) {
    825 		rval = mddbstatus2error(mdep, (int)recid, mnum, setno);
    826 		goto out;
    827 	}
    828 
    829 	/* get the address of the new unit */
    830 	new_un = (mp_unit_t *)mddb_getrecaddr(recid);
    831 
    832 	/* copy in the user's unit struct */
    833 	err = ddi_copyin((void *)(uintptr_t)mgp->mdp, new_un,
    834 	    (size_t)mgp->size, mode);
    835 	if (err) {
    836 		mddb_deleterec_wrapper(recid);
    837 		rval = EFAULT;
    838 		goto out;
    839 	}
    840 	if (options & MD_CRO_FN)
    841 		new_un->c.un_revision |= MD_FN_META_DEV;
    842 
    843 	/* All 64 bit metadevices only support EFI labels. */
    844 	if (mgp->options & MD_CRO_64BIT) {
    845 		new_un->c.un_flag |= MD_EFILABEL;
    846 		/*
    847 		 * If the device was previously smaller than a terabyte,
    848 		 * and had a vtoc record attached to it, we remove the
    849 		 * vtoc record, because the layout has changed completely.
    850 		 */
    851 		if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
    852 		    (un->c.un_vtoc_id != 0)) {
    853 			old_vtoc = un->c.un_vtoc_id;
    854 			new_un->c.un_vtoc_id =
    855 			    md_vtoc_to_efi_record(old_vtoc, setno);
    856 		}
    857 	}
    858 
    859 	/* commit new unit struct */
    860 	MD_RECID(new_un) = recid;
    861 	mddb_commitrec_wrapper(recid);
    862 
    863 	/*
    864 	 * delete old unit struct.
    865 	 */
    866 	mddb_deleterec_wrapper(MD_RECID(un));
    867 
    868 	/* place new unit in in-core array */
    869 	md_nblocks_set(mnum, new_un->c.un_total_blocks);
    870 	MD_UNIT(mnum) = new_un;
    871 
    872 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, TAG_METADEVICE,
    873 	    MD_UN2SET(new_un), MD_SID(new_un));
    874 
    875 	/*
    876 	 * If old_vtoc has a non zero value, we know:
    877 	 * - This unit crossed the border from smaller to larger one TB
    878 	 * - There was a vtoc record for the unit,
    879 	 * - This vtoc record is no longer needed, because
    880 	 *   a new efi record has been created for this un.
    881 	 */
    882 	if (old_vtoc != 0) {
    883 		mddb_deleterec_wrapper(old_vtoc);
    884 	}
    885 
    886 	/* release locks, return success */
    887 out:
    888 	for (i =  npar - 1; (i >= 0); --i)
    889 		md_ioctl_writerexit(&plock[i]);
    890 	rw_exit(&md_unit_array_rw.lock);
    891 	if (plock != NULL)
    892 		kmem_free(plock, npar * sizeof (*plock));
    893 	if (par != NULL)
    894 		kmem_free(par, npar * sizeof (*par));
    895 	return (rval);
    896 }
    897 
    898 /*
    899  * FUNCTION:	sp_getdevs()
    900  * INPUT:	d	- data ptr.
    901  *		mode	- pass-through to ddi_copyout.
    902  *		lockp	- lock ptr.
    903  * OUTPUT:	none.
    904  * RETURNS:	0		- success.
    905  *		non-zero	- error.
    906  * PURPOSE:	Get the device on which the soft partition is built.
    907  *		This is simply a matter of copying out the md_dev64_t stored
    908  *		in the soft partition unit structure.
    909  */
    910 static int
    911 sp_getdevs(
    912 	void			*d,
    913 	int			mode,
    914 	IOLOCK			*lockp
    915 )
    916 {
    917 	minor_t			mnum;
    918 	mdi_unit_t		*ui;
    919 	mp_unit_t		*un;
    920 	md_error_t		*mdep;
    921 	md_dev64_t		*devsp;
    922 	md_dev64_t		unit_dev;
    923 	md_getdevs_params_t	*mgdp = (md_getdevs_params_t *)d;
    924 
    925 
    926 	mnum = mgdp->mnum;
    927 	mdep = &(mgdp->mde);
    928 
    929 	mdclrerror(mdep);
    930 
    931 	/* check set */
    932 	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
    933 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
    934 	/* check unit */
    935 	if ((ui = MDI_UNIT(mnum)) == NULL) {
    936 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
    937 	}
    938 	/* get unit */
    939 	un = (mp_unit_t *)md_ioctl_readerlock(lockp, ui);
    940 	devsp = (md_dev64_t *)(uintptr_t)mgdp->devs;
    941 
    942 	/* only ever 1 device for a soft partition */
    943 	if (mgdp->cnt != 0) {
    944 		/* do miniroot->target device translation */
    945 		unit_dev = un->un_dev;
    946 		if (md_getmajor(unit_dev) != md_major) {
    947 			if ((unit_dev = md_xlate_mini_2_targ(unit_dev))
    948 			    == NODEV64)
    949 				return (ENODEV);
    950 		}
    951 		/* copyout dev information */
    952 		if (ddi_copyout(&unit_dev, devsp, sizeof (*devsp), mode) != 0)
    953 			return (EFAULT);
    954 	}
    955 	mgdp->cnt = 1;
    956 
    957 	return (0);
    958 }
    959 
    960 /*
    961  * sp_set_capability:
    962  * ------------------
    963  * Called to set or clear a capability for a softpart
    964  * called by the MD_MN_SET_CAP ioctl.
    965  */
    966 static int
    967 sp_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
    968 {
    969 	set_t		setno;
    970 	mdi_unit_t	*ui;
    971 	mp_unit_t	*un;
    972 	int		err = 0;
    973 
    974 	if ((un = sp_getun(p->mnum, &p->mde)) == NULL)
    975 		return (EINVAL);
    976 
    977 	/* This function is only valid for a multi-node set */
    978 	setno = MD_MIN2SET(p->mnum);
    979 	if (!MD_MNSET_SETNO(setno)) {
    980 		return (EINVAL);
    981 	}
    982 	ui = MDI_UNIT(p->mnum);
    983 	(void) md_ioctl_readerlock(lockp, ui);
    984 
    985 	if (p->sc_set & DKV_ABR_CAP) {
    986 		void (*inc_abr_count)();
    987 
    988 		ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
    989 		/* Increment abr count in underlying metadevice */
    990 		inc_abr_count = (void(*)())md_get_named_service(un->un_dev,
    991 		    0, MD_INC_ABR_COUNT, 0);
    992 		if (inc_abr_count != NULL)
    993 			(void) (*inc_abr_count)(un->un_dev);
    994 	} else {
    995 		void (*dec_abr_count)();
    996 
    997 		ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
    998 		/* Decrement abr count in underlying metadevice */
    999 		dec_abr_count = (void(*)())md_get_named_service(un->un_dev,
   1000 		    0, MD_DEC_ABR_COUNT, 0);
   1001 		if (dec_abr_count != NULL)
   1002 			(void) (*dec_abr_count)(un->un_dev);
   1003 	}
   1004 	if (p->sc_set & DKV_DMR_CAP) {
   1005 		ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
   1006 	} else {
   1007 		ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
   1008 	}
   1009 	md_ioctl_readerexit(lockp);
   1010 	return (err);
   1011 }
   1012 
   1013 
   1014 /*
   1015  * FUNCTION:	sp_admin_ioctl().
   1016  * INPUT:	cmd	- ioctl to be handled.
   1017  *		data	- data ptr.
   1018  *		mode	- pass-through to copyin/copyout routines.
   1019  *		lockp	- lock ptr.
   1020  * OUTPUT:	none.
   1021  * RETURNS:	0		- success.
   1022  *		non-zero	- error.
   1023  * PURPOSE:	Handle administrative ioctl's.  Essentially a large
   1024  *		switch statement to dispatch the ioctl's to their
   1025  *		handlers.  See comment at beginning of file for specifics
   1026  *		on which ioctl's are handled.
   1027  */
   1028 static int
   1029 sp_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
   1030 {
   1031 	size_t	sz = 0;
   1032 	void	*d = NULL;
   1033 	int	err = 0;
   1034 
   1035 	/* We can only handle 32-bit clients for internal commands */
   1036 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
   1037 		return (EINVAL);
   1038 	}
   1039 
   1040 	/* handle ioctl */
   1041 	switch (cmd) {
   1042 
   1043 	case MD_IOCSET:
   1044 	{
   1045 		/* create new soft partition */
   1046 		if (! (mode & FWRITE))
   1047 			return (EACCES);
   1048 
   1049 		sz = sizeof (md_set_params_t);
   1050 
   1051 		d = kmem_alloc(sz, KM_SLEEP);
   1052 
   1053 		if (ddi_copyin(data, d, sz, mode)) {
   1054 			err = EFAULT;
   1055 			break;
   1056 		}
   1057 
   1058 		err = sp_set(d, mode);
   1059 		break;
   1060 	}
   1061 
   1062 	case MD_IOCGET:
   1063 	{
   1064 		/* get soft partition unit structure */
   1065 		if (! (mode & FREAD))
   1066 			return (EACCES);
   1067 
   1068 		sz = sizeof (md_i_get_t);
   1069 
   1070 		d = kmem_alloc(sz, KM_SLEEP);
   1071 
   1072 		if (ddi_copyin(data, d, sz, mode)) {
   1073 			err = EFAULT;
   1074 			break;
   1075 		}
   1076 
   1077 		err = sp_get(d, mode, lockp);
   1078 		break;
   1079 	}
   1080 	case MD_IOCRESET:
   1081 	{
   1082 		/* delete soft partition */
   1083 		if (! (mode & FWRITE))
   1084 			return (EACCES);
   1085 
   1086 		sz = sizeof (md_sp_reset_t);
   1087 		d = kmem_alloc(sz, KM_SLEEP);
   1088 
   1089 		if (ddi_copyin(data, d, sz, mode)) {
   1090 			err = EFAULT;
   1091 			break;
   1092 		}
   1093 
   1094 		err = sp_reset((md_sp_reset_t *)d);
   1095 		break;
   1096 	}
   1097 
   1098 	case MD_IOCGROW:
   1099 	{
   1100 		/* grow soft partition */
   1101 		if (! (mode & FWRITE))
   1102 			return (EACCES);
   1103 
   1104 		sz = sizeof (md_grow_params_t);
   1105 		d  = kmem_alloc(sz, KM_SLEEP);
   1106 
   1107 		if (ddi_copyin(data, d, sz, mode)) {
   1108 			err = EFAULT;
   1109 			break;
   1110 		}
   1111 
   1112 		err = sp_grow(d, mode, lockp);
   1113 		break;
   1114 	}
   1115 
   1116 	case MD_IOCGET_DEVS:
   1117 	{
   1118 		/* get underlying device */
   1119 		if (! (mode & FREAD))
   1120 			return (EACCES);
   1121 
   1122 		sz = sizeof (md_getdevs_params_t);
   1123 		d  = kmem_alloc(sz, KM_SLEEP);
   1124 
   1125 		if (ddi_copyin(data, d, sz, mode)) {
   1126 			err = EFAULT;
   1127 			break;
   1128 		}
   1129 
   1130 		err = sp_getdevs(d, mode, lockp);
   1131 		break;
   1132 	}
   1133 
   1134 	case MD_IOC_SPSTATUS:
   1135 	{
   1136 		/* set the status field of one or more soft partitions */
   1137 		if (! (mode & FWRITE))
   1138 			return (EACCES);
   1139 
   1140 		sz = sizeof (md_sp_statusset_t);
   1141 		d  = kmem_alloc(sz, KM_SLEEP);
   1142 
   1143 		if (ddi_copyin(data, d, sz, mode)) {
   1144 			err = EFAULT;
   1145 			break;
   1146 		}
   1147 
   1148 		err = sp_setstatus(d, mode, lockp);
   1149 		break;
   1150 	}
   1151 
   1152 	case MD_IOC_SPUPDATEWM:
   1153 	case MD_MN_IOC_SPUPDATEWM:
   1154 	{
   1155 		if (! (mode & FWRITE))
   1156 			return (EACCES);
   1157 
   1158 		sz = sizeof (md_sp_update_wm_t);
   1159 		d  = kmem_alloc(sz, KM_SLEEP);
   1160 
   1161 		if (ddi_copyin(data, d, sz, mode)) {
   1162 			err = EFAULT;
   1163 			break;
   1164 		}
   1165 
   1166 		err = sp_update_watermarks(d, mode);
   1167 		break;
   1168 	}
   1169 
   1170 	case MD_IOC_SPREADWM:
   1171 	{
   1172 		if (! (mode & FREAD))
   1173 			return (EACCES);
   1174 
   1175 		sz = sizeof (md_sp_read_wm_t);
   1176 		d  = kmem_alloc(sz, KM_SLEEP);
   1177 
   1178 		if (ddi_copyin(data, d, sz, mode)) {
   1179 			err = EFAULT;
   1180 			break;
   1181 		}
   1182 
   1183 		err = sp_read_watermark(d, mode);
   1184 		break;
   1185 	}
   1186 
   1187 	case MD_MN_SET_CAP:
   1188 	{
   1189 		if (! (mode & FWRITE))
   1190 			return (EACCES);
   1191 
   1192 		sz = sizeof (md_mn_setcap_params_t);
   1193 		d  = kmem_alloc(sz, KM_SLEEP);
   1194 
   1195 		if (ddi_copyin(data, d, sz, mode)) {
   1196 			err = EFAULT;
   1197 			break;
   1198 		}
   1199 
   1200 		err = sp_set_capability((md_mn_setcap_params_t *)d, lockp);
   1201 		break;
   1202 	}
   1203 
   1204 	default:
   1205 		return (ENOTTY);
   1206 	}
   1207 
   1208 	/*
   1209 	 * copyout and free any args
   1210 	 */
   1211 	if (sz != 0) {
   1212 		if (err == 0) {
   1213 			if (ddi_copyout(d, data, sz, mode) != 0) {
   1214 				err = EFAULT;
   1215 			}
   1216 		}
   1217 		kmem_free(d, sz);
   1218 	}
   1219 	return (err);
   1220 }
   1221 
   1222 
   1223 /*
   1224  * FUNCTION:	md_sp_ioctl()
   1225  * INPUT:	dev	- device we are operating on.
   1226  *		cmd	- ioctl to be handled.
   1227  *		data	- data ptr.
   1228  *		mode	- pass-through to copyin/copyout routines.
   1229  *		lockp	- lock ptr.
   1230  * OUTPUT:	none.
   1231  * RETURNS:	0		- success.
   1232  *		non-zero	- error.
   1233  * PURPOSE:	Dispatch ioctl's.  Administrative ioctl's are handled
   1234  *		by sp_admin_ioctl.  All others (see comment at beginning
   1235  *		of this file) are handled in-line here.
   1236  */
   1237 int
   1238 md_sp_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
   1239 {
   1240 	minor_t		mnum = getminor(dev);
   1241 	mp_unit_t	*un;
   1242 	mdi_unit_t	*ui;
   1243 	int		err = 0;
   1244 
   1245 	/* handle admin ioctls */
   1246 	if (mnum == MD_ADM_MINOR)
   1247 		return (sp_admin_ioctl(cmd, data, mode, lockp));
   1248 
   1249 	/* check unit */
   1250 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
   1251 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
   1252 	    ((ui = MDI_UNIT(mnum)) == NULL) ||
   1253 	    ((un = MD_UNIT(mnum)) == NULL))
   1254 		return (ENXIO);
   1255 
   1256 	/* is this a supported ioctl? */
   1257 	err = md_check_ioctl_against_unit(cmd, un->c);
   1258 	if (err != 0) {
   1259 		return (err);
   1260 	}
   1261 
   1262 
   1263 	/* handle ioctl */
   1264 	switch (cmd) {
   1265 
   1266 	case DKIOCINFO:
   1267 	{
   1268 		/* "disk" info */
   1269 		struct dk_cinfo		*p;
   1270 
   1271 		if (! (mode & FREAD))
   1272 			return (EACCES);
   1273 
   1274 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
   1275 
   1276 		get_info(p, mnum);
   1277 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
   1278 			err = EFAULT;
   1279 
   1280 		kmem_free(p, sizeof (*p));
   1281 		return (err);
   1282 	}
   1283 
   1284 	case DKIOCGMEDIAINFO:
   1285 	{
   1286 		struct dk_minfo	p;
   1287 
   1288 		if (! (mode & FREAD))
   1289 			return (EACCES);
   1290 
   1291 		get_minfo(&p, mnum);
   1292 		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
   1293 			err = EFAULT;
   1294 
   1295 		return (err);
   1296 	}
   1297 
   1298 	case DKIOCGGEOM:
   1299 	{
   1300 		/* geometry information */
   1301 		struct dk_geom		*p;
   1302 
   1303 		if (! (mode & FREAD))
   1304 			return (EACCES);
   1305 
   1306 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
   1307 
   1308 		md_get_geom((md_unit_t *)un, p);
   1309 		if (ddi_copyout((caddr_t)p, data, sizeof (*p),
   1310 		    mode) != 0)
   1311 			err = EFAULT;
   1312 
   1313 		kmem_free(p, sizeof (*p));
   1314 		return (err);
   1315 	}
   1316 	case DKIOCGAPART:
   1317 	{
   1318 		struct dk_map	dmp;
   1319 
   1320 		err = 0;
   1321 		md_get_cgapart((md_unit_t *)un, &dmp);
   1322 
   1323 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
   1324 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
   1325 			    mode) != 0)
   1326 				err = EFAULT;
   1327 		}
   1328 #ifdef _SYSCALL32
   1329 		else {
   1330 			struct dk_map32 dmp32;
   1331 
   1332 			dmp32.dkl_cylno = dmp.dkl_cylno;
   1333 			dmp32.dkl_nblk = dmp.dkl_nblk;
   1334 
   1335 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
   1336 			    mode) != 0)
   1337 				err = EFAULT;
   1338 		}
   1339 #endif /* _SYSCALL32 */
   1340 
   1341 		return (err);
   1342 	}
   1343 	case DKIOCGVTOC:
   1344 	{
   1345 		/* vtoc information */
   1346 		struct vtoc	*vtoc;
   1347 
   1348 		if (! (mode & FREAD))
   1349 			return (EACCES);
   1350 
   1351 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
   1352 		md_get_vtoc((md_unit_t *)un, vtoc);
   1353 
   1354 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
   1355 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
   1356 				err = EFAULT;
   1357 		}
   1358 #ifdef _SYSCALL32
   1359 		else {
   1360 			struct vtoc32	*vtoc32;
   1361 
   1362 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
   1363 
   1364 			vtoctovtoc32((*vtoc), (*vtoc32));
   1365 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
   1366 				err = EFAULT;
   1367 			kmem_free(vtoc32, sizeof (*vtoc32));
   1368 		}
   1369 #endif /* _SYSCALL32 */
   1370 
   1371 		kmem_free(vtoc, sizeof (*vtoc));
   1372 		return (err);
   1373 	}
   1374 
   1375 	case DKIOCSVTOC:
   1376 	{
   1377 		struct vtoc	*vtoc;
   1378 
   1379 		if (! (mode & FWRITE))
   1380 			return (EACCES);
   1381 
   1382 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
   1383 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
   1384 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
   1385 				err = EFAULT;
   1386 			}
   1387 		}
   1388 #ifdef _SYSCALL32
   1389 		else {
   1390 			struct vtoc32	*vtoc32;
   1391 
   1392 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
   1393 
   1394 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
   1395 				err = EFAULT;
   1396 			} else {
   1397 				vtoc32tovtoc((*vtoc32), (*vtoc));
   1398 			}
   1399 			kmem_free(vtoc32, sizeof (*vtoc32));
   1400 		}
   1401 #endif /* _SYSCALL32 */
   1402 
   1403 		if (err == 0)
   1404 			err = md_set_vtoc((md_unit_t *)un, vtoc);
   1405 
   1406 		kmem_free(vtoc, sizeof (*vtoc));
   1407 		return (err);
   1408 	}
   1409 
   1410 	case DKIOCGEXTVTOC:
   1411 	{
   1412 		/* extended vtoc information */
   1413 		struct extvtoc	*extvtoc;
   1414 
   1415 		if (! (mode & FREAD))
   1416 			return (EACCES);
   1417 
   1418 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
   1419 		md_get_extvtoc((md_unit_t *)un, extvtoc);
   1420 
   1421 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
   1422 			err = EFAULT;
   1423 
   1424 		kmem_free(extvtoc, sizeof (*extvtoc));
   1425 		return (err);
   1426 	}
   1427 
   1428 	case DKIOCSEXTVTOC:
   1429 	{
   1430 		struct extvtoc	*extvtoc;
   1431 
   1432 		if (! (mode & FWRITE))
   1433 			return (EACCES);
   1434 
   1435 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
   1436 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
   1437 			err = EFAULT;
   1438 		}
   1439 
   1440 		if (err == 0)
   1441 			err = md_set_extvtoc((md_unit_t *)un, extvtoc);
   1442 
   1443 		kmem_free(extvtoc, sizeof (*extvtoc));
   1444 		return (err);
   1445 	}
   1446 
   1447 	case DKIOCGETEFI:
   1448 	{
   1449 		/*
   1450 		 * This one can be done centralized,
   1451 		 * no need to put in the same code for all types of metadevices
   1452 		 */
   1453 		return (md_dkiocgetefi(mnum, data, mode));
   1454 	}
   1455 	case DKIOCSETEFI:
   1456 	{
   1457 		/*
   1458 		 * This one can be done centralized,
   1459 		 * no need to put in the same code for all types of metadevices
   1460 		 */
   1461 		return (md_dkiocsetefi(mnum, data, mode));
   1462 	}
   1463 
   1464 	case DKIOCPARTITION:
   1465 	{
   1466 		return (md_dkiocpartition(mnum, data, mode));
   1467 	}
   1468 
   1469 	case DKIOCGETVOLCAP:
   1470 	{
   1471 		/*
   1472 		 * Return the supported capabilities for the soft-partition.
   1473 		 * We can only support those caps that are provided by the
   1474 		 * underlying device.
   1475 		 */
   1476 
   1477 		volcap_t	vc;
   1478 
   1479 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
   1480 			return (EINVAL);
   1481 
   1482 		if (! (mode & FREAD))
   1483 			return (EACCES);
   1484 
   1485 		bzero(&vc, sizeof (vc));
   1486 
   1487 		/* Send ioctl to underlying driver */
   1488 
   1489 		err = md_call_ioctl(un->un_dev, cmd, &vc, (mode | FKIOCTL),
   1490 		    lockp);
   1491 
   1492 		if (err == 0)
   1493 			ui->ui_capab = vc.vc_info;
   1494 
   1495 		if (ddi_copyout(&vc, data, sizeof (vc), mode))
   1496 			err = EFAULT;
   1497 
   1498 		return (err);
   1499 	}
   1500 
   1501 	case DKIOCSETVOLCAP:
   1502 	{
   1503 		/*
   1504 		 * Enable a supported capability (as returned by DKIOCGETVOLCAP)
   1505 		 * Do not pass the request down as we're the top-level device
   1506 		 * handler for the application.
   1507 		 * If the requested capability is supported (set in ui_capab),
   1508 		 * set the corresponding bit in ui_tstate so that we can pass
   1509 		 * the appropriate flag when performing i/o.
   1510 		 * This request is propagated to all nodes.
   1511 		 */
   1512 		volcap_t	vc, vc1;
   1513 		volcapset_t	volcap = 0;
   1514 		void 		(*check_offline)();
   1515 		int		offline_status = 0;
   1516 
   1517 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
   1518 			return (EINVAL);
   1519 
   1520 		if (! (mode & FWRITE))
   1521 			return (EACCES);
   1522 
   1523 		if (ddi_copyin(data, &vc, sizeof (vc), mode))
   1524 			return (EFAULT);
   1525 
   1526 		/*
   1527 		 * Send DKIOCGETVOLCAP to underlying driver to see if
   1528 		 * capability supported
   1529 		 */
   1530 
   1531 		vc1.vc_info = 0;
   1532 		err = md_call_ioctl(un->un_dev, DKIOCGETVOLCAP, &vc1,
   1533 		    (mode | FKIOCTL), lockp);
   1534 		if (err != 0)
   1535 			return (err);
   1536 
   1537 		/* Save capabilities */
   1538 		ui->ui_capab = vc1.vc_info;
   1539 		/*
   1540 		 * Error if required capability not supported by underlying
   1541 		 * driver
   1542 		 */
   1543 		if ((vc1.vc_info & vc.vc_set) == 0)
   1544 			return (ENOTSUP);
   1545 
   1546 
   1547 		/*
   1548 		 * Check if underlying mirror has an offline submirror,
   1549 		 * fail if there is on offline submirror
   1550 		 */
   1551 		check_offline = (void(*)())md_get_named_service(un->un_dev,
   1552 		    0, MD_CHECK_OFFLINE, 0);
   1553 		if (check_offline != NULL)
   1554 			(void) (*check_offline)(un->un_dev, &offline_status);
   1555 		if (offline_status)
   1556 			return (EINVAL);
   1557 
   1558 		if (ui->ui_tstate & MD_ABR_CAP)
   1559 			volcap |= DKV_ABR_CAP;
   1560 
   1561 		/* Only send capability message if there is a change */
   1562 		if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
   1563 			err = mdmn_send_capability_message(mnum, vc, lockp);
   1564 		return (err);
   1565 	}
   1566 
   1567 	case DKIOCDMR:
   1568 	{
   1569 		/*
   1570 		 * Only valid for MN sets. We need to pass it down to the
   1571 		 * underlying driver if its a metadevice, after we've modified
   1572 		 * the offsets to pick up the correct lower-level device
   1573 		 * position.
   1574 		 */
   1575 		vol_directed_rd_t	*vdr;
   1576 #ifdef _MULTI_DATAMODEL
   1577 		vol_directed_rd32_t	*vdr32;
   1578 #endif	/* _MULTI_DATAMODEL */
   1579 
   1580 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
   1581 			return (EINVAL);
   1582 
   1583 		if (! (ui->ui_capab & DKV_DMR_CAP))
   1584 			return (EINVAL);
   1585 
   1586 		vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
   1587 		if (vdr == NULL)
   1588 			return (ENOMEM);
   1589 
   1590 		/*
   1591 		 * Underlying device supports directed mirror read, so update
   1592 		 * the user-supplied offset to pick the correct block from the
   1593 		 * partitioned metadevice.
   1594 		 */
   1595 #ifdef _MULTI_DATAMODEL
   1596 		vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
   1597 		if (vdr32 == NULL) {
   1598 			kmem_free(vdr, sizeof (vol_directed_rd_t));
   1599 			return (ENOMEM);
   1600 		}
   1601 
   1602 		switch (ddi_model_convert_from(mode & FMODELS)) {
   1603 		case DDI_MODEL_ILP32:
   1604 			if (ddi_copyin(data, vdr32, sizeof (*vdr32), mode)) {
   1605 				kmem_free(vdr, sizeof (*vdr));
   1606 				return (EFAULT);
   1607 			}
   1608 			vdr->vdr_flags = vdr32->vdr_flags;
   1609 			vdr->vdr_offset = vdr32->vdr_offset;
   1610 			vdr->vdr_nbytes = vdr32->vdr_nbytes;
   1611 			vdr->vdr_data = (void *)(uintptr_t)vdr32->vdr_data;
   1612 			vdr->vdr_side = vdr32->vdr_side;
   1613 			break;
   1614 
   1615 		case DDI_MODEL_NONE:
   1616 			if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
   1617 				kmem_free(vdr32, sizeof (*vdr32));
   1618 				kmem_free(vdr, sizeof (*vdr));
   1619 				return (EFAULT);
   1620 			}
   1621 			break;
   1622 
   1623 		default:
   1624 			kmem_free(vdr32, sizeof (*vdr32));
   1625 			kmem_free(vdr, sizeof (*vdr));
   1626 			return (EFAULT);
   1627 		}
   1628 #else	/* ! _MULTI_DATAMODEL */
   1629 		if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
   1630 			kmem_free(vdr, sizeof (*vdr));
   1631 			return (EFAULT);
   1632 		}
   1633 #endif	/* _MULTI_DATA_MODEL */
   1634 
   1635 		err = sp_directed_read(mnum, vdr, mode);
   1636 
   1637 
   1638 #ifdef _MULTI_DATAMODEL
   1639 		switch (ddi_model_convert_from(mode & FMODELS)) {
   1640 		case DDI_MODEL_ILP32:
   1641 			vdr32->vdr_flags = vdr->vdr_flags;
   1642 			vdr32->vdr_offset = vdr->vdr_offset;
   1643 			vdr32->vdr_side = vdr->vdr_side;
   1644 			vdr32->vdr_bytesread = vdr->vdr_bytesread;
   1645 			bcopy(vdr->vdr_side_name, vdr32->vdr_side_name,
   1646 			    sizeof (vdr32->vdr_side_name));
   1647 
   1648 			if (ddi_copyout(vdr32, data, sizeof (*vdr32), mode))
   1649 				err = EFAULT;
   1650 			break;
   1651 
   1652 		case DDI_MODEL_NONE:
   1653 			if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
   1654 				err = EFAULT;
   1655 			break;
   1656 		}
   1657 #else	/* ! _MULTI_DATA_MODEL */
   1658 		if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
   1659 			err = EFAULT;
   1660 #endif	/* _MULTI_DATA_MODEL */
   1661 
   1662 #ifdef _MULTI_DATAMODEL
   1663 		kmem_free(vdr32, sizeof (*vdr32));
   1664 #endif	/* _MULTI_DATAMODEL */
   1665 		kmem_free(vdr, sizeof (*vdr));
   1666 
   1667 		return (err);
   1668 	}
   1669 
   1670 	}
   1671 
   1672 	/* Option not handled */
   1673 	return (ENOTTY);
   1674 }
   1675