Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Just in case we're not in a build environment, make sure that
     29  * TEXT_DOMAIN gets set to something.
     30  */
     31 #if !defined(TEXT_DOMAIN)
     32 #define	TEXT_DOMAIN "SYS_TEST"
     33 #endif
     34 
     35 /*
     36  * soft partition operations
     37  *
     38  * Soft Partitions provide a virtual disk mechanism which is used to
     39  * divide a large volume into many small pieces, each appearing as a
     40  * separate device.  A soft partition consists of a series of extents,
     41  * each having an offset and a length.  The extents are logically
     42  * contiguous, so where the first extent leaves off the second extent
     43  * picks up.  Which extent a given "virtual offset" belongs to is
     44  * dependent on the size of all the previous extents in the soft
     45  * partition.
     46  *
     47  * Soft partitions are represented in memory by an extent node
     48  * (sp_ext_node_t) which contains all of the information necessary to
     49  * create a unit structure and update the on-disk format, called
     50  * "watermarks".  These extent nodes are typically kept in a doubly
     51  * linked list and are manipulated by list manipulation routines.  A
     52  * list of extents may represent all of the soft partitions on a volume,
     53  * a single soft partition, or perhaps just a set of extents that need
     54  * to be updated.  Extent lists may be sorted by extent or by name/seq#,
     55  * depending on which compare function is used.  Most of the routines
     56  * require the list be sorted by offset to work, and that's the typical
     57  * configuration.
     58  *
     59  * In order to do an allocation, knowledge of all soft partitions on the
     60  * volume is required.  Then free space is determined from the space
     61  * that is not allocated, and new allocations can be made from the free
     62  * space.  Once the new allocations are made, a unit structure is created
     63  * and the watermarks are updated.  The status is then changed to "okay"
     64  * on the unit structure to commit the transaction.  If updating the
     65  * watermarks fails, the unit structure is in an intermediate state and
     66  * the driver will not allow access to the device.
     67  *
     68  * A typical sequence of events is:
     69  *     1. Fetch the list of names for all soft partitions on a volume
     70  *         meta_sp_get_by_component()
     71  *     2. Construct an extent list from the name list
     72  *         meta_sp_extlist_from_namelist()
     73  *     3. Fill the gaps in the extent list with free extents
     74  *         meta_sp_list_freefill()
     75  *     4. Allocate from the free extents
     76  *         meta_sp_alloc_by_len()
     77  *         meta_sp_alloc_by_list()
     78  *     5. Create the unit structure from the extent list
     79  *         meta_sp_createunit()
     80  *         meta_sp_updateunit()
     81  *     6. Write out the watermarks
     82  *         meta_sp_update_wm()
     83  *     7. Set the status to "Okay"
     84  *         meta_sp_setstatus()
     85  *
     86  */
     87 
     88 #include <stdio.h>
     89 #include <meta.h>
     90 #include "meta_repartition.h"
     91 #include <sys/lvm/md_sp.h>
     92 #include <sys/lvm/md_crc.h>
     93 #include <strings.h>
     94 #include <sys/lvm/md_mirror.h>
     95 #include <sys/bitmap.h>
     96 
     97 extern int	md_in_daemon;
     98 
     99 typedef struct sp_ext_node {
    100 	struct sp_ext_node	*ext_next;	/* next element */
    101 	struct sp_ext_node	*ext_prev;	/* previous element */
    102 	sp_ext_type_t		ext_type;	/* type of extent */
    103 	sp_ext_offset_t		ext_offset;	/* starting offset */
    104 	sp_ext_length_t		ext_length;	/* length of this node */
    105 	uint_t			ext_flags;	/* extent flags */
    106 	uint32_t		ext_seq;	/* watermark seq no */
    107 	mdname_t		*ext_namep;	/* name pointer */
    108 	mdsetname_t		*ext_setp;	/* set pointer */
    109 } sp_ext_node_t;
    110 
    111 /* extent flags */
    112 #define	EXTFLG_UPDATE	(1)
    113 
    114 /* Extent node compare function for list sorting */
    115 typedef int (*ext_cmpfunc_t)(sp_ext_node_t *, sp_ext_node_t *);
    116 
    117 
    118 /* Function Prototypes */
    119 
    120 /* Debugging Functions */
    121 static void meta_sp_debug(char *format, ...);
    122 static void meta_sp_printunit(mp_unit_t *mp);
    123 
    124 /* Misc Support Functions */
    125 int meta_sp_parsesize(char *s, sp_ext_length_t *szp);
    126 static int meta_sp_parsesizestring(char *s, sp_ext_length_t *szp);
    127 static int meta_sp_setgeom(mdname_t *np, mdname_t *compnp, mp_unit_t *mp,
    128 	md_error_t *ep);
    129 static int meta_sp_get_by_component(mdsetname_t *sp, mdname_t *compnp,
    130     mdnamelist_t **nlpp, int force, md_error_t *ep);
    131 static sp_ext_length_t meta_sp_get_default_alignment(mdsetname_t *sp,
    132     mdname_t *compnp, md_error_t *ep);
    133 
    134 /* Extent List Manipulation Functions */
    135 static int meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2);
    136 static int meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2);
    137 static void meta_sp_list_insert(mdsetname_t *sp, mdname_t *np,
    138     sp_ext_node_t **head, sp_ext_offset_t offset, sp_ext_length_t length,
    139     sp_ext_type_t type, uint_t seq, uint_t flags, ext_cmpfunc_t compare);
    140 static void meta_sp_list_free(sp_ext_node_t **head);
    141 static void meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext);
    142 static sp_ext_length_t meta_sp_list_size(sp_ext_node_t *head,
    143     sp_ext_type_t exttype, int exclude_wm);
    144 static sp_ext_node_t *meta_sp_list_find(sp_ext_node_t *head,
    145     sp_ext_offset_t offset);
    146 static void meta_sp_list_freefill(sp_ext_node_t **extlist,
    147     sp_ext_length_t size);
    148 static void meta_sp_list_dump(sp_ext_node_t *head);
    149 static int meta_sp_list_overlaps(sp_ext_node_t *head);
    150 
    151 /* Extent List Query Functions */
    152 static boolean_t meta_sp_enough_space(int desired_number_of_sps,
    153 	blkcnt_t desired_sp_size, sp_ext_node_t **extent_listpp,
    154 	sp_ext_length_t alignment);
    155 static boolean_t meta_sp_get_extent_list(mdsetname_t *mdsetnamep,
    156 	mdname_t *device_mdnamep, sp_ext_node_t **extent_listpp,
    157 	md_error_t *ep);
    158 static boolean_t meta_sp_get_extent_list_for_drive(mdsetname_t *mdsetnamep,
    159 	mddrivename_t *mddrivenamep, sp_ext_node_t **extent_listpp);
    160 
    161 
    162 /* Extent Allocation Functions */
    163 static void meta_sp_alloc_by_ext(mdsetname_t *sp, mdname_t *np,
    164     sp_ext_node_t **extlist, sp_ext_node_t *free_ext,
    165     sp_ext_offset_t alloc_offset, sp_ext_length_t alloc_length, uint_t seq);
    166 static int meta_sp_alloc_by_len(mdsetname_t *sp, mdname_t *np,
    167     sp_ext_node_t **extlist, sp_ext_length_t *lp,
    168     sp_ext_offset_t last_off, sp_ext_length_t alignment);
    169 static int meta_sp_alloc_by_list(mdsetname_t *sp, mdname_t *np,
    170     sp_ext_node_t **extlist, sp_ext_node_t *oblist);
    171 
    172 /* Extent List Population Functions */
    173 static int meta_sp_extlist_from_namelist(mdsetname_t *sp, mdnamelist_t *spnlp,
    174     sp_ext_node_t **extlist, md_error_t *ep);
    175 static int meta_sp_extlist_from_wm(mdsetname_t *sp, mdname_t *compnp,
    176     sp_ext_node_t **extlist, ext_cmpfunc_t compare, md_error_t *ep);
    177 
    178 /* Print (metastat) Functions */
    179 static int meta_sp_short_print(md_sp_t *msp, char *fname, FILE *fp,
    180     mdprtopts_t options, md_error_t *ep);
    181 static char *meta_sp_status_to_name(xsp_status_t xsp_status, uint_t tstate);
    182 static int meta_sp_report(mdsetname_t *sp, md_sp_t *msp, mdnamelist_t **nlpp,
    183     char *fname, FILE *fp, mdprtopts_t options, md_error_t *ep);
    184 
    185 /* Watermark Manipulation Functions */
    186 static int meta_sp_update_wm(mdsetname_t *sp, md_sp_t *msp,
    187     sp_ext_node_t *extlist, md_error_t *ep);
    188 static int meta_sp_clear_wm(mdsetname_t *sp, md_sp_t *msp, md_error_t *ep);
    189 static int meta_sp_read_wm(mdsetname_t *sp, mdname_t *compnp,
    190     mp_watermark_t *wm, sp_ext_offset_t offset,  md_error_t *ep);
    191 static diskaddr_t meta_sp_get_start(mdsetname_t *sp, mdname_t *compnp,
    192     md_error_t *ep);
    193 
    194 /* Unit Structure Manipulation Functions */
    195 static void meta_sp_fillextarray(mp_unit_t *mp, sp_ext_node_t *extlist);
    196 static mp_unit_t *meta_sp_createunit(mdname_t *np, mdname_t *compnp,
    197     sp_ext_node_t *extlist, int numexts, sp_ext_length_t len,
    198     sp_status_t status, md_error_t *ep);
    199 static mp_unit_t *meta_sp_updateunit(mdname_t *np,  mp_unit_t *old_un,
    200     sp_ext_node_t *extlist, sp_ext_length_t grow_len, int numexts,
    201     md_error_t *ep);
    202 static int meta_create_sp(mdsetname_t *sp, md_sp_t *msp, sp_ext_node_t *oblist,
    203     mdcmdopts_t options, sp_ext_length_t alignment, md_error_t *ep);
    204 static int meta_check_sp(mdsetname_t *sp, md_sp_t *msp, mdcmdopts_t options,
    205     int *repart_options, md_error_t *ep);
    206 
    207 /* Reset (metaclear) Functions */
    208 static int meta_sp_reset_common(mdsetname_t *sp, mdname_t *np, md_sp_t *msp,
    209     md_sp_reset_t reset_params, mdcmdopts_t options, md_error_t *ep);
    210 
    211 /* Recovery (metarecover) Functions */
    212 static void meta_sp_display_exthdr(void);
    213 static void meta_sp_display_ext(sp_ext_node_t *ext);
    214 static int meta_sp_checkseq(sp_ext_node_t *extlist);
    215 static int meta_sp_resolve_name_conflict(mdsetname_t *, mdname_t *,
    216     mdname_t **, md_error_t *);
    217 static int meta_sp_validate_wm(mdsetname_t *sp, mdname_t *np,
    218     mdcmdopts_t options, md_error_t *ep);
    219 static int meta_sp_validate_unit(mdsetname_t *sp, mdname_t *compnp,
    220     mdcmdopts_t options, md_error_t *ep);
    221 static int meta_sp_validate_wm_and_unit(mdsetname_t *sp, mdname_t *np,
    222     mdcmdopts_t options, md_error_t *ep);
    223 static int meta_sp_validate_exts(mdname_t *np, sp_ext_node_t *wmext,
    224     sp_ext_node_t *unitext, md_error_t *ep);
    225 static int meta_sp_recover_from_wm(mdsetname_t *sp, mdname_t *compnp,
    226     mdcmdopts_t options, md_error_t *ep);
    227 static int meta_sp_recover_from_unit(mdsetname_t *sp, mdname_t *np,
    228     mdcmdopts_t options, md_error_t *ep);
    229 
    230 /*
    231  * Private Constants
    232  */
    233 
    234 static const int FORCE_RELOAD_CACHE = 1;
    235 static const uint_t NO_FLAGS = 0;
    236 static const sp_ext_offset_t NO_OFFSET = 0ULL;
    237 static const uint_t NO_SEQUENCE_NUMBER = 0;
    238 static const int ONE_SOFT_PARTITION = 1;
    239 
    240 static unsigned long *sp_parent_printed[MD_MAXSETS];
    241 
    242 #define	TEST_SOFT_PARTITION_NAMEP NULL
    243 #define	TEST_SETNAMEP NULL
    244 
    245 #define	EXCLUDE_WM	(1)
    246 #define	INCLUDE_WM	(0)
    247 
    248 #define	SP_UNALIGNED	(0LL)
    249 
    250 /*
    251  * **************************************************************************
    252  *                          Debugging Functions                             *
    253  * **************************************************************************
    254  */
    255 
    256 /*PRINTFLIKE1*/
    257 static void
    258 meta_sp_debug(char *format, ...)
    259 {
    260 	static int debug;
    261 	static int debug_set = 0;
    262 	va_list ap;
    263 
    264 	if (!debug_set) {
    265 		debug = getenv(META_SP_DEBUG) ? 1 : 0;
    266 		debug_set = 1;
    267 	}
    268 
    269 	if (debug) {
    270 		va_start(ap, format);
    271 		(void) vfprintf(stderr, format, ap);
    272 		va_end(ap);
    273 	}
    274 }
    275 
    276 static void
    277 meta_sp_printunit(mp_unit_t *mp)
    278 {
    279 	int i;
    280 
    281 	if (mp == NULL)
    282 		return;
    283 
    284 	/* print the common fields we know about */
    285 	(void) fprintf(stderr, "\tmp->c.un_type: %d\n", mp->c.un_type);
    286 	(void) fprintf(stderr, "\tmp->c.un_size: %u\n", mp->c.un_size);
    287 	(void) fprintf(stderr, "\tmp->c.un_self_id: %lu\n", MD_SID(mp));
    288 
    289 	/* sp-specific fields */
    290 	(void) fprintf(stderr, "\tmp->un_status: %u\n", mp->un_status);
    291 	(void) fprintf(stderr, "\tmp->un_numexts: %u\n", mp->un_numexts);
    292 	(void) fprintf(stderr, "\tmp->un_length: %llu\n", mp->un_length);
    293 	(void) fprintf(stderr, "\tmp->un_dev(32): 0x%llx\n", mp->un_dev);
    294 	(void) fprintf(stderr, "\tmp->un_dev(64): 0x%llx\n", mp->un_dev);
    295 	(void) fprintf(stderr, "\tmp->un_key: %d\n", mp->un_key);
    296 
    297 	/* print extent information */
    298 	(void) fprintf(stderr, "\tExt#\tvoff\t\tpoff\t\tLen\n");
    299 	for (i = 0; i < mp->un_numexts; i++) {
    300 		(void) fprintf(stderr, "\t%d\t%llu\t\t%llu\t\t%llu\n", i,
    301 		    mp->un_ext[i].un_voff, mp->un_ext[i].un_poff,
    302 		    mp->un_ext[i].un_len);
    303 	}
    304 }
    305 
    306 /*
    307  * FUNCTION:    meta_sp_parsesize()
    308  * INPUT:       s       - the string to parse
    309  * OUTPUT:      *szp    - disk block count (0 for "all")
    310  * RETURNS:     -1 for error, 0 for success
    311  * PURPOSE:     parses the command line parameter that specifies the
    312  *              requested size of a soft partition.  The input string
    313  *              is either the literal "all" or a numeric value
    314  *              followed by a single character, b for disk blocks, k
    315  *              for kilobytes, m for megabytes, g for gigabytes, or t
    316  *              for terabytes.  p for petabytes and e for exabytes
    317  *              have been added as undocumented features for future
    318  *              expansion.  For example, 100m is 100 megabytes, while
    319  *              50g is 50 gigabytes.  All values are rounded up to the
    320  *              nearest block size.
    321  */
    322 int
    323 meta_sp_parsesize(char *s, sp_ext_length_t *szp)
    324 {
    325 	if (s == NULL || szp == NULL) {
    326 		return (-1);
    327 	}
    328 
    329 	/* Check for literal "all" */
    330 	if (strcasecmp(s, "all") == 0) {
    331 		*szp = 0;
    332 		return (0);
    333 	}
    334 
    335 	return (meta_sp_parsesizestring(s, szp));
    336 }
    337 
    338 /*
    339  * FUNCTION:	meta_sp_parsesizestring()
    340  * INPUT:	s	- the string to parse
    341  * OUTPUT:	*szp	- disk block count
    342  * RETURNS:	-1 for error, 0 for success
    343  * PURPOSE:	parses a string that specifies size. The input string is a
    344  *		numeric value followed by a single character, b for disk blocks,
    345  *		k for kilobytes, m for megabytes, g for gigabytes, or t for
    346  *		terabytes.  p for petabytes and e for exabytes have been added
    347  *		as undocumented features for future expansion.  For example,
    348  *		100m is 100 megabytes, while 50g is 50 gigabytes.  All values
    349  *		are rounded up to the nearest block size.
    350  */
    351 static int
    352 meta_sp_parsesizestring(char *s, sp_ext_length_t *szp)
    353 {
    354 	sp_ext_length_t	len = 0;
    355 	char		len_type[2];
    356 
    357 	if (s == NULL || szp == NULL) {
    358 		return (-1);
    359 	}
    360 
    361 	/*
    362 	 * make sure block offset does not overflow 2^64 bytes.
    363 	 */
    364 	if ((sscanf(s, "%llu%1[BbKkMmGgTt]", &len, len_type) != 2) ||
    365 	    (len == 0LL) ||
    366 	    (len > (1LL << (64 - DEV_BSHIFT))))
    367 		return (-1);
    368 
    369 	switch (len_type[0]) {
    370 	case 'B':
    371 	case 'b':
    372 		len = lbtodb(roundup(len * DEV_BSIZE, DEV_BSIZE));
    373 		break;
    374 	case 'K':
    375 	case 'k':
    376 		len = lbtodb(roundup(len * 1024ULL, DEV_BSIZE));
    377 		break;
    378 	case 'M':
    379 	case 'm':
    380 		len = lbtodb(roundup(len * 1024ULL*1024ULL, DEV_BSIZE));
    381 		break;
    382 	case 'g':
    383 	case 'G':
    384 		len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL, DEV_BSIZE));
    385 		break;
    386 	case 't':
    387 	case 'T':
    388 		len = lbtodb(roundup(len * 1024ULL*1024ULL*1024ULL*1024ULL,
    389 		    DEV_BSIZE));
    390 		break;
    391 	case 'p':
    392 	case 'P':
    393 		len = lbtodb(roundup(
    394 		    len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
    395 		    DEV_BSIZE));
    396 		break;
    397 	case 'e':
    398 	case 'E':
    399 		len = lbtodb(roundup(
    400 		    len * 1024ULL*1024ULL*1024ULL*1024ULL*1024ULL*1024ULL,
    401 		    DEV_BSIZE));
    402 		break;
    403 	default:
    404 		/* error */
    405 		return (-1);
    406 	}
    407 
    408 	*szp = len;
    409 	return (0);
    410 }
    411 
    412 /*
    413  * FUNCTION:	meta_sp_setgeom()
    414  * INPUT:	np      - the underlying device to setup geometry for
    415  *		compnp	- the underlying device to setup geometry for
    416  *		mp	- the unit structure to set the geometry for
    417  * OUTPUT:	ep	- return error pointer
    418  * RETURNS:	int	- -1 if error, 0 otherwise
    419  * PURPOSE:	establishes geometry information for a device
    420  */
    421 static int
    422 meta_sp_setgeom(
    423 	mdname_t	*np,
    424 	mdname_t	*compnp,
    425 	mp_unit_t	*mp,
    426 	md_error_t	*ep
    427 )
    428 {
    429 	mdgeom_t	*geomp;
    430 	uint_t		round_cyl = 0;
    431 
    432 	if ((geomp = metagetgeom(compnp, ep)) == NULL)
    433 		return (-1);
    434 	if (meta_setup_geom((md_unit_t *)mp, np, geomp, geomp->write_reinstruct,
    435 	    geomp->read_reinstruct, round_cyl, ep) != 0)
    436 		return (-1);
    437 
    438 	return (0);
    439 }
    440 
    441 /*
    442  * FUNCTION:	meta_sp_setstatus()
    443  * INPUT:	sp	- the set name for the devices to set the status on
    444  *		minors	- an array of minor numbers of devices to set status on
    445  *		num_units - number of entries in the array
    446  *		status	- status value to set all units to
    447  * OUTPUT:	ep	- return error pointer
    448  * RETURNS:	int	- -1 if error, 0 success
    449  * PURPOSE:	sets the status of one or more soft partitions to the
    450  *		requested value
    451  */
    452 int
    453 meta_sp_setstatus(
    454 	mdsetname_t	*sp,
    455 	minor_t		*minors,
    456 	int		num_units,
    457 	sp_status_t	status,
    458 	md_error_t	*ep
    459 )
    460 {
    461 	md_sp_statusset_t	status_params;
    462 
    463 	assert(minors != NULL);
    464 
    465 	/* update status of all soft partitions to the status passed in */
    466 	(void) memset(&status_params, 0, sizeof (status_params));
    467 	status_params.num_units = num_units;
    468 	status_params.new_status = status;
    469 	status_params.size = num_units * sizeof (minor_t);
    470 	status_params.minors = (uintptr_t)minors;
    471 	MD_SETDRIVERNAME(&status_params, MD_SP, sp->setno);
    472 	if (metaioctl(MD_IOC_SPSTATUS, &status_params, &status_params.mde,
    473 	    NULL) != 0) {
    474 		(void) mdstealerror(ep, &status_params.mde);
    475 		return (-1);
    476 	}
    477 	return (0);
    478 }
    479 
    480 /*
    481  * FUNCTION:	meta_get_sp_names()
    482  * INPUT:	sp	- the set name to get soft partitions from
    483  *		options	- options from the command line
    484  * OUTPUT:	nlpp	- list of all soft partition names
    485  *		ep	- return error pointer
    486  * RETURNS:	int	- -1 if error, 0 success
    487  * PURPOSE:	returns a list of all soft partitions in the metadb
    488  *		for all devices in the specified set
    489  */
    490 int
    491 meta_get_sp_names(
    492 	mdsetname_t	*sp,
    493 	mdnamelist_t	**nlpp,
    494 	int		options,
    495 	md_error_t	*ep
    496 )
    497 {
    498 	return (meta_get_names(MD_SP, sp, nlpp, options, ep));
    499 }
    500 
    501 /*
    502  * FUNCTION:	meta_get_by_component()
    503  * INPUT:	sp	- the set name to get soft partitions from
    504  *		compnp	- the name of the device containing the soft
    505  *			  partitions that will be returned
    506  *		force	- 0 - reads cached namelist if available,
    507  *			  1 - reloads cached namelist, frees old namelist
    508  * OUTPUT:	nlpp	- list of all soft partition names
    509  *		ep	- return error pointer
    510  * RETURNS:	int	- -1 error, otherwise the number of soft partitions
    511  *			  found on the component (0 = none found).
    512  * PURPOSE:	returns a list of all soft partitions on a given device
    513  *		from the metadb information
    514  */
    515 static int
    516 meta_sp_get_by_component(
    517 	mdsetname_t	*sp,
    518 	mdname_t	*compnp,
    519 	mdnamelist_t	**nlpp,
    520 	int		force,
    521 	md_error_t	*ep
    522 )
    523 {
    524 	static mdnamelist_t	*cached_list = NULL;	/* cached namelist */
    525 	static int		cached_count = 0;	/* cached count */
    526 	mdnamelist_t		*spnlp = NULL;		/* all sp names */
    527 	mdnamelist_t		*namep;			/* list iterator */
    528 	mdnamelist_t		**tailpp = nlpp;	/* namelist tail */
    529 	mdnamelist_t		**cachetailpp;		/* cache tail */
    530 	md_sp_t			*msp;			/* unit structure */
    531 	int			count = 0;		/* count of sp's */
    532 	int			err;
    533 	mdname_t		*curnp;
    534 
    535 	if ((cached_list != NULL) && (!force)) {
    536 		/* return a copy of the cached list */
    537 		for (namep = cached_list; namep != NULL; namep = namep->next)
    538 			tailpp = meta_namelist_append_wrapper(tailpp,
    539 			    namep->namep);
    540 		return (cached_count);
    541 	}
    542 
    543 	/* free the cache and reset values to zeros to prepare for a new list */
    544 	metafreenamelist(cached_list);
    545 	cached_count = 0;
    546 	cached_list = NULL;
    547 	cachetailpp = &cached_list;
    548 	*nlpp = NULL;
    549 
    550 	/* get all the softpartitions first of all */
    551 	if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
    552 		return (-1);
    553 
    554 	/*
    555 	 * Now for each sp, see if it resides on the component we
    556 	 * are interested in, if so then add it to our list
    557 	 */
    558 	for (namep = spnlp; namep != NULL; namep = namep->next) {
    559 		curnp = namep->namep;
    560 
    561 		/* get the unit structure */
    562 		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
    563 			continue;
    564 
    565 		/*
    566 		 * If the current soft partition is not on the same
    567 		 * component, continue the search.  If it is on the same
    568 		 * component, add it to our namelist.
    569 		 */
    570 		err = meta_check_samedrive(compnp, msp->compnamep, ep);
    571 		if (err <= 0) {
    572 			/* not on the same device, check the next one */
    573 			continue;
    574 		}
    575 
    576 		/* it's on the same drive */
    577 
    578 		/*
    579 		 * Check for overlapping partitions if the component is not
    580 		 * a metadevice.
    581 		 */
    582 		if (!metaismeta(msp->compnamep)) {
    583 			/*
    584 			 * if they're on the same drive, neither
    585 			 * should be a metadevice if one isn't
    586 			 */
    587 			assert(!metaismeta(compnp));
    588 
    589 			if (meta_check_overlap(msp->compnamep->cname,
    590 			    compnp, 0, -1, msp->compnamep, 0, -1, ep) == 0)
    591 				continue;
    592 
    593 			/* in this case it's not an error for them to overlap */
    594 			mdclrerror(ep);
    595 		}
    596 
    597 		/* Component is on the same device, add to the used list */
    598 		tailpp = meta_namelist_append_wrapper(tailpp, curnp);
    599 		cachetailpp = meta_namelist_append_wrapper(cachetailpp,
    600 		    curnp);
    601 
    602 		++count;
    603 		++cached_count;
    604 	}
    605 
    606 	assert(count == cached_count);
    607 	return (count);
    608 
    609 out:
    610 	metafreenamelist(*nlpp);
    611 	*nlpp = NULL;
    612 	return (-1);
    613 }
    614 
    615 /*
    616  * FUNCTION:    meta_sp_get_default_alignment()
    617  * INPUT:       sp      - the pertinent set name
    618  *              compnp  - the name of the underlying component
    619  * OUTPUT:      ep      - return error pointer
    620  * RETURNS:     sp_ext_length_t =0: no default alignment
    621  *                              >0: default alignment
    622  * PURPOSE:     returns the default alignment for soft partitions to
    623  *              be built on top of the specified component or
    624  *              metadevice
    625  */
    626 static sp_ext_length_t
    627 meta_sp_get_default_alignment(
    628 	mdsetname_t	*sp,
    629 	mdname_t	*compnp,
    630 	md_error_t	*ep
    631 )
    632 {
    633 	sp_ext_length_t	a = SP_UNALIGNED;
    634 	char		*mname;
    635 
    636 	assert(compnp != NULL);
    637 
    638 	/*
    639 	 * We treat raw devices as opaque, and assume nothing about
    640 	 * their alignment requirements.
    641 	 */
    642 	if (!metaismeta(compnp))
    643 		return (SP_UNALIGNED);
    644 
    645 	/*
    646 	 * We already know it's a metadevice from the previous test;
    647 	 * metagetmiscname() will tell us which metadevice type we
    648 	 * have
    649 	 */
    650 	mname = metagetmiscname(compnp, ep);
    651 	if (mname == NULL)
    652 		goto out;
    653 
    654 	/*
    655 	 * For a mirror, we want to deal with the stripe that is the
    656 	 * primary side.  If it happens to be asymmetrically
    657 	 * configured, there is no simple way to fake a universal
    658 	 * alignment.  There's a chance that the least common
    659 	 * denominator of the set of interlaces from all stripes of
    660 	 * all submirrors would do it, but nobody that really cared
    661 	 * that much about this issue would create an asymmetric
    662 	 * config to start with.
    663 	 *
    664 	 * If the component underlying the soft partition is a mirror,
    665 	 * then at the exit of this loop, compnp will have been
    666 	 * updated to describe the first active submirror.
    667 	 */
    668 	if (strcmp(mname, MD_MIRROR) == 0) {
    669 		md_mirror_t	*mp;
    670 		int		smi;
    671 		md_submirror_t	*smp;
    672 
    673 		mp = meta_get_mirror(sp, compnp, ep);
    674 		if (mp == NULL)
    675 			goto out;
    676 
    677 		for (smi = 0; smi < NMIRROR; smi++) {
    678 
    679 			smp = &mp->submirrors[smi];
    680 			if (smp->state == SMS_UNUSED)
    681 				continue;
    682 
    683 			compnp = smp->submirnamep;
    684 			assert(compnp != NULL);
    685 
    686 			mname = metagetmiscname(compnp, ep);
    687 			if (mname == NULL)
    688 				goto out;
    689 
    690 			break;
    691 		}
    692 
    693 		if (smi == NMIRROR)
    694 			goto out;
    695 	}
    696 
    697 	/*
    698 	 * Handle stripes and submirrors identically; just return the
    699 	 * interlace of the first row.
    700 	 */
    701 	if (strcmp(mname, MD_STRIPE) == 0) {
    702 		md_stripe_t	*stp;
    703 
    704 		stp = meta_get_stripe(sp, compnp, ep);
    705 		if (stp == NULL)
    706 			goto out;
    707 
    708 		a = stp->rows.rows_val[0].interlace;
    709 		goto out;
    710 	}
    711 
    712 	/*
    713 	 * Raid is even more straightforward; the interlace applies to
    714 	 * the entire device.
    715 	 */
    716 	if (strcmp(mname, MD_RAID) == 0) {
    717 		md_raid_t	*rp;
    718 
    719 		rp = meta_get_raid(sp, compnp, ep);
    720 		if (rp == NULL)
    721 			goto out;
    722 
    723 		a = rp->interlace;
    724 		goto out;
    725 	}
    726 
    727 	/*
    728 	 * If we have arrived here with the alignment still not set,
    729 	 * then we expect the error to have been set by one of the
    730 	 * routines we called.  If neither is the case, something has
    731 	 * really gone wrong above.  (Probably the submirror walk
    732 	 * failed to produce a valid submirror, but that would be
    733 	 * really bad...)
    734 	 */
    735 out:
    736 	meta_sp_debug("meta_sp_get_default_alignment: miscname %s, "
    737 	    "alignment %lld\n", (mname == NULL) ? "NULL" : mname, a);
    738 
    739 	if (getenv(META_SP_DEBUG) && !mdisok(ep)) {
    740 		mde_perror(ep, NULL);
    741 	}
    742 
    743 	assert((a > 0) || (!mdisok(ep)));
    744 
    745 	return (a);
    746 }
    747 
    748 
    749 
    750 /*
    751  * FUNCTION:	meta_check_insp()
    752  * INPUT:	sp	- the set name for the device to check
    753  *		np	- the name of the device to check
    754  *		slblk	- the starting offset of the device to check
    755  *		nblks	- the number of blocks in the device to check
    756  * OUTPUT:	ep	- return error pointer
    757  * RETURNS:	int	-  0 - device contains soft partitions
    758  *			  -1 - device does not contain soft partitions
    759  * PURPOSE:	determines whether a device contains any soft partitions
    760  */
    761 /* ARGSUSED */
    762 int
    763 meta_check_insp(
    764 	mdsetname_t	*sp,
    765 	mdname_t	*np,
    766 	diskaddr_t	slblk,
    767 	diskaddr_t	nblks,
    768 	md_error_t	*ep
    769 )
    770 {
    771 	mdnamelist_t	*spnlp = NULL;	/* soft partition name list */
    772 	int		count;
    773 	int		rval;
    774 
    775 	/* check set pointer */
    776 	assert(sp != NULL);
    777 
    778 	/*
    779 	 * Get a list of the soft partitions that currently reside on
    780 	 * the component.  We should ALWAYS force reload the cache,
    781 	 * because if we're using the md.tab, we must rebuild
    782 	 * the list because it won't contain the previous (if any)
    783 	 * soft partition.
    784 	 */
    785 	/* find all soft partitions on the component */
    786 	count = meta_sp_get_by_component(sp, np, &spnlp, 1, ep);
    787 
    788 	if (count == -1) {
    789 		rval = -1;
    790 	} else if (count > 0) {
    791 		rval = mduseerror(ep, MDE_ALREADY, np->dev,
    792 		    spnlp->namep->cname, np->cname);
    793 	} else {
    794 		rval = 0;
    795 	}
    796 
    797 	metafreenamelist(spnlp);
    798 	return (rval);
    799 }
    800 
    801 /*
    802  * **************************************************************************
    803  *                    Extent List Manipulation Functions                    *
    804  * **************************************************************************
    805  */
    806 
    807 /*
    808  * FUNCTION:	meta_sp_cmp_by_nameseq()
    809  * INPUT:	e1	- first node to compare
    810  *		e2	- second node to compare
    811  * OUTPUT:	none
    812  * RETURNS:	int	- =0 - nodes are equal
    813  *			  <0 - e1 should go before e2
    814  *			  >0 - e1 should go after e2
    815  * PURPOSE:	used for sorted list inserts to build a list sorted by
    816  *		name first and sequence number second.
    817  */
    818 static int
    819 meta_sp_cmp_by_nameseq(sp_ext_node_t *e1, sp_ext_node_t *e2)
    820 {
    821 	int rval;
    822 
    823 	if (e1->ext_namep == NULL)
    824 		return (1);
    825 	if (e2->ext_namep == NULL)
    826 		return (-1);
    827 	if ((rval = strcmp(e1->ext_namep->cname, e2->ext_namep->cname)) != 0)
    828 		return (rval);
    829 
    830 	/* the names are equal, compare sequence numbers */
    831 	if (e1->ext_seq > e2->ext_seq)
    832 		return (1);
    833 	if (e1->ext_seq < e2->ext_seq)
    834 		return (-1);
    835 	/* sequence numbers are also equal */
    836 	return (0);
    837 }
    838 
    839 /*
    840  * FUNCTION:	meta_sp_cmp_by_offset()
    841  * INPUT:	e1	- first node to compare
    842  *		e2	- second node to compare
    843  * OUTPUT:	none
    844  * RETURNS:	int	- =0 - nodes are equal
    845  *			  <0 - e1 should go before e2
    846  *			  >0 - e1 should go after e2
    847  * PURPOSE:	used for sorted list inserts to build a list sorted by offset
    848  */
    849 static int
    850 meta_sp_cmp_by_offset(sp_ext_node_t *e1, sp_ext_node_t *e2)
    851 {
    852 	if (e1->ext_offset > e2->ext_offset)
    853 		return (1);
    854 	if (e1->ext_offset < e2->ext_offset)
    855 		return (-1);
    856 	/* offsets are equal */
    857 	return (0);
    858 }
    859 
    860 /*
    861  * FUNCTION:	meta_sp_list_insert()
    862  * INPUT:	sp	- the set name for the device the node belongs to
    863  *		np	- the name of the device the node belongs to
    864  *		head	- the head of the list, must be NULL for empty list
    865  *		offset	- the physical offset of this extent in sectors
    866  *		length	- the length of this extent in sectors
    867  *		type	- the type of the extent being inserted
    868  *		seq	- the sequence number of the extent being inserted
    869  *		flags	- extent flags (eg. whether it needs to be updated)
    870  *		compare	- the compare function to use
    871  * OUTPUT:	head	- points to the new head if a node was inserted
    872  *			  at the beginning
    873  * RETURNS:	void
    874  * PURPOSE:	inserts an extent node into a sorted doubly linked list.
    875  *		The sort order is determined by the compare function.
    876  *		Memory is allocated for the node in this function and it
    877  *		is up to the caller to free it, possibly using
    878  *		meta_sp_list_free().  If a node is inserted at the
    879  *		beginning of the list, the head pointer is updated to
    880  *		point to the new first node.
    881  */
    882 static void
    883 meta_sp_list_insert(
    884 	mdsetname_t	*sp,
    885 	mdname_t	*np,
    886 	sp_ext_node_t	**head,
    887 	sp_ext_offset_t	offset,
    888 	sp_ext_length_t	length,
    889 	sp_ext_type_t	type,
    890 	uint_t		seq,
    891 	uint_t		flags,
    892 	ext_cmpfunc_t	compare
    893 )
    894 {
    895 	sp_ext_node_t	*newext;
    896 	sp_ext_node_t	*curext;
    897 
    898 	assert(head != NULL);
    899 
    900 	/* Don't bother adding zero length nodes */
    901 	if (length == 0ULL)
    902 		return;
    903 
    904 	/* allocate and fill in new ext_node */
    905 	newext = Zalloc(sizeof (sp_ext_node_t));
    906 
    907 	newext->ext_offset = offset;
    908 	newext->ext_length = length;
    909 	newext->ext_flags = flags;
    910 	newext->ext_type = type;
    911 	newext->ext_seq = seq;
    912 	newext->ext_setp = sp;
    913 	newext->ext_namep = np;
    914 
    915 	/* first node in the list */
    916 	if (*head == NULL) {
    917 		newext->ext_next = newext->ext_prev = NULL;
    918 		*head = newext;
    919 	} else if ((*compare)(*head, newext) >= 0) {
    920 		/* the first node has a bigger offset, so insert before it */
    921 		assert((*head)->ext_prev == NULL);
    922 
    923 		newext->ext_prev = NULL;
    924 		newext->ext_next = *head;
    925 		(*head)->ext_prev = newext;
    926 		*head = newext;
    927 	} else {
    928 		/*
    929 		 * find the next node whose offset is greater than
    930 		 * the one we want to insert, or the end of the list.
    931 		 */
    932 		for (curext = *head;
    933 		    (curext->ext_next != NULL) &&
    934 		    ((*compare)(curext->ext_next, newext) < 0);
    935 		    (curext = curext->ext_next))
    936 			;
    937 
    938 		/* link the new node in after the current node */
    939 		newext->ext_next = curext->ext_next;
    940 		newext->ext_prev = curext;
    941 
    942 		if (curext->ext_next != NULL)
    943 			curext->ext_next->ext_prev = newext;
    944 
    945 		curext->ext_next = newext;
    946 	}
    947 }
    948 
    949 /*
    950  * FUNCTION:	meta_sp_list_free()
    951  * INPUT:	head	- the head of the list, must be NULL for empty list
    952  * OUTPUT:	head	- points to NULL on return
    953  * RETURNS:	void
    954  * PURPOSE:	walks a double linked extent list and frees each node
    955  */
    956 static void
    957 meta_sp_list_free(sp_ext_node_t **head)
    958 {
    959 	sp_ext_node_t	*ext;
    960 	sp_ext_node_t	*next;
    961 
    962 	assert(head != NULL);
    963 
    964 	ext = *head;
    965 	while (ext) {
    966 		next = ext->ext_next;
    967 		Free(ext);
    968 		ext = next;
    969 	}
    970 	*head = NULL;
    971 }
    972 
    973 /*
    974  * FUNCTION:	meta_sp_list_remove()
    975  * INPUT:	head	- the head of the list, must be NULL for empty list
    976  *		ext	- the extent to remove, must be a member of the list
    977  * OUTPUT:	head	- points to the new head of the list
    978  * RETURNS:	void
    979  * PURPOSE:	unlinks the node specified by ext from the list and
    980  *		frees it, possibly moving the head pointer forward if
    981  *		the head is the node being removed.
    982  */
    983 static void
    984 meta_sp_list_remove(sp_ext_node_t **head, sp_ext_node_t *ext)
    985 {
    986 	assert(head != NULL);
    987 	assert(*head != NULL);
    988 
    989 	if (*head == ext)
    990 		*head = ext->ext_next;
    991 
    992 	if (ext->ext_prev != NULL)
    993 		ext->ext_prev->ext_next = ext->ext_next;
    994 	if (ext->ext_next != NULL)
    995 		ext->ext_next->ext_prev = ext->ext_prev;
    996 	Free(ext);
    997 }
    998 
    999 /*
   1000  * FUNCTION:	meta_sp_list_size()
   1001  * INPUT:	head	- the head of the list, must be NULL for empty list
   1002  *		exttype	- the type of the extents to sum
   1003  *		exclude_wm - subtract space for extent headers from total
   1004  * OUTPUT:	none
   1005  * RETURNS:	sp_ext_length_t	- the sum of all of the lengths
   1006  * PURPOSE:	sums the lengths of all extents in the list matching the
   1007  *		specified type.  This could be used for computing the
   1008  *		amount of free or used space, for example.
   1009  */
   1010 static sp_ext_length_t
   1011 meta_sp_list_size(sp_ext_node_t *head, sp_ext_type_t exttype, int exclude_wm)
   1012 {
   1013 	sp_ext_node_t	*ext;
   1014 	sp_ext_length_t	size = 0LL;
   1015 
   1016 	for (ext = head; ext != NULL; ext = ext->ext_next)
   1017 		if (ext->ext_type == exttype)
   1018 			size += ext->ext_length -
   1019 			    ((exclude_wm) ? MD_SP_WMSIZE : 0);
   1020 
   1021 	return (size);
   1022 }
   1023 
   1024 /*
   1025  * FUNCTION:	meta_sp_list_find()
   1026  * INPUT:	head	- the head of the list, must be NULL for empty list
   1027  *		offset	- the offset contained by the node to find
   1028  * OUTPUT:	none
   1029  * RETURNS:	sp_ext_node_t *	- the node containing the requested offset
   1030  *				  or NULL if no such nodes were found.
   1031  * PURPOSE:	finds a node in a list containing the requested offset
   1032  *		(inclusive).  If multiple nodes contain this offset then
   1033  *		only the first will be returned, though typically these
   1034  *		lists are managed with non-overlapping nodes.
   1035  *
   1036  *		*The list MUST be sorted by offset for this function to work.*
   1037  */
   1038 static sp_ext_node_t *
   1039 meta_sp_list_find(
   1040 	sp_ext_node_t	*head,
   1041 	sp_ext_offset_t	offset
   1042 )
   1043 {
   1044 	sp_ext_node_t	*ext;
   1045 
   1046 	for (ext = head; ext != NULL; ext = ext->ext_next) {
   1047 		/* check if the offset lies within this extent */
   1048 		if ((offset >= ext->ext_offset) &&
   1049 		    (offset < ext->ext_offset + ext->ext_length)) {
   1050 			/*
   1051 			 * the requested extent should always be a
   1052 			 * subset of an extent in the list.
   1053 			 */
   1054 			return (ext);
   1055 		}
   1056 	}
   1057 	return (NULL);
   1058 }
   1059 
   1060 /*
   1061  * FUNCTION:	meta_sp_list_freefill()
   1062  * INPUT:	head	- the head of the list, must be NULL for empty list
   1063  *		size	- the size of the volume this extent list is
   1064  *			  representing
   1065  * OUTPUT:	head	- the new head of the list
   1066  * RETURNS:	void
   1067  * PURPOSE:	finds gaps in the extent list and fills them with a free
   1068  *		node.  If there is a gap at the beginning the head
   1069  *		pointer will be changed to point to the new free node.
   1070  *		If there is free space at the end, the last free extent
   1071  *		will extend all the way out to the size specified.
   1072  *
   1073  *		*The list MUST be sorted by offset for this function to work.*
   1074  */
   1075 static void
   1076 meta_sp_list_freefill(
   1077 	sp_ext_node_t	**head,
   1078 	sp_ext_length_t	size
   1079 )
   1080 {
   1081 	sp_ext_node_t	*ext;
   1082 	sp_ext_offset_t	curoff = 0LL;
   1083 
   1084 	for (ext = *head; ext != NULL; ext = ext->ext_next) {
   1085 		if (curoff < ext->ext_offset)
   1086 			meta_sp_list_insert(NULL, NULL, head,
   1087 			    curoff, ext->ext_offset - curoff,
   1088 			    EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
   1089 		curoff = ext->ext_offset + ext->ext_length;
   1090 	}
   1091 
   1092 	/* pad inverse list out to the end */
   1093 	if (curoff < size)
   1094 		meta_sp_list_insert(NULL, NULL, head, curoff, size - curoff,
   1095 		    EXTTYP_FREE, 0, 0, meta_sp_cmp_by_offset);
   1096 
   1097 	if (getenv(META_SP_DEBUG)) {
   1098 		meta_sp_debug("meta_sp_list_freefill: Extent list with "
   1099 		    "holes freefilled:\n");
   1100 		meta_sp_list_dump(*head);
   1101 	}
   1102 }
   1103 
   1104 /*
   1105  * FUNCTION:	meta_sp_list_dump()
   1106  * INPUT:	head	- the head of the list, must be NULL for empty list
   1107  * OUTPUT:	none
   1108  * RETURNS:	void
   1109  * PURPOSE:	dumps the entire extent list to stdout for easy debugging
   1110  */
   1111 static void
   1112 meta_sp_list_dump(sp_ext_node_t *head)
   1113 {
   1114 	sp_ext_node_t	*ext;
   1115 
   1116 	meta_sp_debug("meta_sp_list_dump: dumping extent list:\n");
   1117 	meta_sp_debug("%5s %10s %5s %7s %10s %10s %5s %10s %10s\n", "Name",
   1118 	    "Addr", "Seq#", "Type", "Offset", "Length", "Flags", "Prev",
   1119 	    "Next");
   1120 	for (ext = head; ext != NULL; ext = ext->ext_next) {
   1121 		if (ext->ext_namep != NULL)
   1122 			meta_sp_debug("%5s", ext->ext_namep->cname);
   1123 		else
   1124 			meta_sp_debug("%5s", "NONE");
   1125 
   1126 		meta_sp_debug("%10p %5u ", (void *) ext, ext->ext_seq);
   1127 		switch (ext->ext_type) {
   1128 		case EXTTYP_ALLOC:
   1129 			meta_sp_debug("%7s ", "ALLOC");
   1130 			break;
   1131 		case EXTTYP_FREE:
   1132 			meta_sp_debug("%7s ", "FREE");
   1133 			break;
   1134 		case EXTTYP_END:
   1135 			meta_sp_debug("%7s ", "END");
   1136 			break;
   1137 		case EXTTYP_RESERVED:
   1138 			meta_sp_debug("%7s ", "RESV");
   1139 			break;
   1140 		default:
   1141 			meta_sp_debug("%7s ", "INVLD");
   1142 			break;
   1143 		}
   1144 
   1145 		meta_sp_debug("%10llu %10llu %5u %10p %10p\n",
   1146 		    ext->ext_offset, ext->ext_length,
   1147 		    ext->ext_flags, (void *) ext->ext_prev,
   1148 		    (void *) ext->ext_next);
   1149 	}
   1150 	meta_sp_debug("\n");
   1151 }
   1152 
   1153 /*
   1154  * FUNCTION:	meta_sp_list_overlaps()
   1155  * INPUT:	head	- the head of the list, must be NULL for empty list
   1156  * OUTPUT:	none
   1157  * RETURNS:	int	- 1 if extents overlap, 0 if ok
   1158  * PURPOSE:	checks a list for overlaps.  The list MUST be sorted by
   1159  *		offset for this function to work properly.
   1160  */
   1161 static int
   1162 meta_sp_list_overlaps(sp_ext_node_t *head)
   1163 {
   1164 	sp_ext_node_t	*ext;
   1165 
   1166 	for (ext = head; ext->ext_next != NULL; ext = ext->ext_next) {
   1167 		if (ext->ext_offset + ext->ext_length >
   1168 		    ext->ext_next->ext_offset)
   1169 			return (1);
   1170 	}
   1171 	return (0);
   1172 }
   1173 
   1174 /*
   1175  * **************************************************************************
   1176  *                        Extent Allocation Functions                       *
   1177  * **************************************************************************
   1178  */
   1179 
   1180 /*
   1181  * FUNCTION:	meta_sp_alloc_by_ext()
   1182  * INPUT:	sp	- the set name for the device the node belongs to
   1183  *		np	- the name of the device the node belongs to
   1184  *		head	- the head of the list, must be NULL for empty list
   1185  *		free_ext	- the free extent being allocated from
   1186  *		alloc_offset	- the offset of the allocation
   1187  *		alloc_len	- the length of the allocation
   1188  *		seq		- the sequence number of the allocation
   1189  * OUTPUT:	head	- the new head pointer
   1190  * RETURNS:	void
   1191  * PURPOSE:	allocates a portion of the free extent free_ext.  The
   1192  *		allocated portion starts at alloc_offset and is
   1193  *		alloc_length long.  Both (alloc_offset) and (alloc_offset +
   1194  *		alloc_length) must be contained within the free extent.
   1195  *
   1196  *		The free extent is split into as many as 3 pieces - a
   1197  *		free extent containing [ free_offset .. alloc_offset ), an
   1198  *		allocated extent containing the range [ alloc_offset ..
   1199  *		alloc_end ], and another free extent containing the
   1200  *		range ( alloc_end .. free_end ].  If either of the two
   1201  *		new free extents would be zero length, they are not created.
   1202  *
   1203  *		Finally, the original free extent is removed.  All newly
   1204  *		created extents have the EXTFLG_UPDATE flag set.
   1205  */
   1206 static void
   1207 meta_sp_alloc_by_ext(
   1208 	mdsetname_t	*sp,
   1209 	mdname_t	*np,
   1210 	sp_ext_node_t	**head,
   1211 	sp_ext_node_t	*free_ext,
   1212 	sp_ext_offset_t	alloc_offset,
   1213 	sp_ext_length_t	alloc_length,
   1214 	uint_t		seq
   1215 )
   1216 {
   1217 	sp_ext_offset_t	free_offset = free_ext->ext_offset;
   1218 	sp_ext_length_t	free_length = free_ext->ext_length;
   1219 
   1220 	sp_ext_offset_t	alloc_end = alloc_offset + alloc_length;
   1221 	sp_ext_offset_t	free_end  = free_offset  + free_length;
   1222 
   1223 	/* allocated extent must be a subset of the free extent */
   1224 	assert(free_offset <= alloc_offset);
   1225 	assert(free_end >= alloc_end);
   1226 
   1227 	meta_sp_list_remove(head, free_ext);
   1228 
   1229 	if (free_offset < alloc_offset) {
   1230 		meta_sp_list_insert(NULL, NULL, head, free_offset,
   1231 		    (alloc_offset - free_offset), EXTTYP_FREE, 0,
   1232 		    EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   1233 	}
   1234 
   1235 	if (free_end > alloc_end) {
   1236 		meta_sp_list_insert(NULL, NULL, head, alloc_end,
   1237 		    (free_end - alloc_end), EXTTYP_FREE, 0, EXTFLG_UPDATE,
   1238 		    meta_sp_cmp_by_offset);
   1239 	}
   1240 
   1241 	meta_sp_list_insert(sp, np, head, alloc_offset, alloc_length,
   1242 	    EXTTYP_ALLOC, seq, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   1243 
   1244 	if (getenv(META_SP_DEBUG)) {
   1245 		meta_sp_debug("meta_sp_alloc_by_ext: extent list:\n");
   1246 		meta_sp_list_dump(*head);
   1247 	}
   1248 }
   1249 
   1250 /*
   1251  * FUNCTION:	meta_sp_alloc_by_len()
   1252  * INPUT:	sp	- the set name for the device the node belongs to
   1253  *		np	- the name of the device the node belongs to
   1254  *		head	- the head of the list, must be NULL for empty list
   1255  *		*lp	- the requested length to allocate
   1256  *		last_off	- the last offset already allocated.
   1257  *		alignment	- the desired extent alignmeent
   1258  * OUTPUT:	head	- the new head pointer
   1259  *		*lp	- the length allocated
   1260  * RETURNS:	int	- -1 if error, the number of new extents on success
   1261  * PURPOSE:	allocates extents from free space to satisfy the requested
   1262  *		length.  If requested length is zero, allocates all
   1263  *		remaining free space.  This function provides the meat
   1264  *		of the extent allocation algorithm.  Allocation is a
   1265  *		three tier process:
   1266  *
   1267  *		1. If last_off is nonzero and there is free space following
   1268  *		   that node, then it is extended to allocate as much of that
   1269  *		   free space as possible.  This is useful for metattach.
   1270  *		2. If a free extent can be found to satisfy the remaining
   1271  *		   requested space, then satisfy the rest of the request
   1272  *		   from that extent.
   1273  *		3. Start allocating space from any remaining free extents until
   1274  *		   the remainder of the request is satisified.
   1275  *
   1276  *              If alignment is non-zero, then every extent modified
   1277  *              or newly allocated will be aligned modulo alignment,
   1278  *              with a length that is an integer multiple of
   1279  *              alignment.
   1280  *
   1281  *		The EXTFLG_UPDATE flag is set for all nodes (free and
   1282  *		allocated) that require updated watermarks.
   1283  *
   1284  *		This algorithm may have a negative impact on fragmentation
   1285  *		in pathological cases and may be improved if it turns out
   1286  *		to be a problem.  This may be exacerbated by particularly
   1287  *		large alignments.
   1288  *
   1289  * NOTE:	It's confusing, so it demands an explanation:
   1290  *		- len is used to represent requested data space; it
   1291  *		  does not include room for a watermark.  On each full
   1292  *		  or partial allocation, len will be decremented by
   1293  *		  alloc_len (see next paragraph) until it reaches
   1294  *		  zero.
   1295  *		- alloc_len is used to represent data space allocated
   1296  *		  from a particular extent; it does not include space
   1297  *		  for a watermark.  In the rare event that a_length
   1298  *		  (see next paragraph) is equal to MD_SP_WMSIZE,
   1299  *		  alloc_len will be zero and the resulting MD_SP_WMSIZE
   1300  *		  fragment of space will be utterly unusable.
   1301  *		- a_length is used to represent all space to be
   1302  *		  allocated from a particular extent; it DOES include
   1303  *		  space for a watermark.
   1304  */
   1305 static int
   1306 meta_sp_alloc_by_len(
   1307 	mdsetname_t	*sp,
   1308 	mdname_t	*np,
   1309 	sp_ext_node_t	**head,
   1310 	sp_ext_length_t	*lp,
   1311 	sp_ext_offset_t	last_off,
   1312 	sp_ext_offset_t	alignment
   1313 )
   1314 {
   1315 	sp_ext_node_t	*free_ext;
   1316 	sp_ext_node_t	*alloc_ext;
   1317 	uint_t		last_seq = 0;
   1318 	uint_t		numexts = 0;
   1319 	sp_ext_length_t	freespace;
   1320 	sp_ext_length_t	alloc_len;
   1321 	sp_ext_length_t	len;
   1322 
   1323 	/* We're DOA if we can't read *lp */
   1324 	assert(lp != NULL);
   1325 	len = *lp;
   1326 
   1327 	/*
   1328 	 * Process the nominal case first: we've been given an actual
   1329 	 * size argument, rather than the literal "all"
   1330 	 */
   1331 
   1332 	if (len != 0) {
   1333 
   1334 		/*
   1335 		 * Short circuit the check for free space.  This may
   1336 		 * tell us we have enough space when we really don't
   1337 		 * because each extent loses space to a watermark, but
   1338 		 * it will always tell us there isn't enough space
   1339 		 * correctly.  Worst case we do some extra work.
   1340 		 */
   1341 		freespace = meta_sp_list_size(*head, EXTTYP_FREE,
   1342 		    INCLUDE_WM);
   1343 
   1344 		if (freespace < len)
   1345 			return (-1);
   1346 
   1347 		/*
   1348 		 * First see if we can extend the last extent for an
   1349 		 * attach.
   1350 		 */
   1351 		if (last_off != 0LL) {
   1352 			int align = 0;
   1353 
   1354 			alloc_ext =
   1355 			    meta_sp_list_find(*head, last_off);
   1356 			assert(alloc_ext != NULL);
   1357 
   1358 			/*
   1359 			 * The offset test reflects the
   1360 			 * inclusion of the watermark in the extent
   1361 			 */
   1362 			align = (alignment > 0) &&
   1363 			    (((alloc_ext->ext_offset + MD_SP_WMSIZE) %
   1364 			    alignment) == 0);
   1365 
   1366 			/*
   1367 			 * If we decided not to align here, we should
   1368 			 * also reset "alignment" so we don't bother
   1369 			 * later, either.
   1370 			 */
   1371 			if (!align) {
   1372 				alignment = 0;
   1373 			}
   1374 
   1375 			last_seq = alloc_ext->ext_seq;
   1376 
   1377 			free_ext = meta_sp_list_find(*head,
   1378 			    alloc_ext->ext_offset +
   1379 			    alloc_ext->ext_length);
   1380 
   1381 			/*
   1382 			 * If a free extent follows our last allocated
   1383 			 * extent, then remove the last allocated
   1384 			 * extent and increase the size of the free
   1385 			 * extent to overlap it, then allocate the
   1386 			 * total space from the new free extent.
   1387 			 */
   1388 			if (free_ext != NULL &&
   1389 			    free_ext->ext_type == EXTTYP_FREE) {
   1390 				assert(free_ext->ext_offset ==
   1391 				    alloc_ext->ext_offset +
   1392 				    alloc_ext->ext_length);
   1393 
   1394 				alloc_len =
   1395 				    MIN(len, free_ext->ext_length);
   1396 
   1397 				if (align && (alloc_len < len)) {
   1398 					/* No watermark space needed */
   1399 					alloc_len -= alloc_len % alignment;
   1400 				}
   1401 
   1402 				if (alloc_len > 0) {
   1403 					free_ext->ext_offset -=
   1404 					    alloc_ext->ext_length;
   1405 					free_ext->ext_length +=
   1406 					    alloc_ext->ext_length;
   1407 
   1408 					meta_sp_alloc_by_ext(sp, np, head,
   1409 					    free_ext, free_ext->ext_offset,
   1410 					    alloc_ext->ext_length + alloc_len,
   1411 					    last_seq);
   1412 
   1413 					/*
   1414 					 * now remove the original allocated
   1415 					 * node.  We may have overlapping
   1416 					 * extents for a short time before
   1417 					 * this node is removed.
   1418 					 */
   1419 					meta_sp_list_remove(head, alloc_ext);
   1420 					len -= alloc_len;
   1421 				}
   1422 			}
   1423 			last_seq++;
   1424 		}
   1425 
   1426 		if (len == 0LL)
   1427 			goto out;
   1428 
   1429 		/*
   1430 		 * Next, see if we can find a single allocation for
   1431 		 * the remainder.  This may make fragmentation worse
   1432 		 * in some cases, but there's no good way to allocate
   1433 		 * that doesn't have a highly fragmented corner case.
   1434 		 */
   1435 		for (free_ext = *head; free_ext != NULL;
   1436 		    free_ext = free_ext->ext_next) {
   1437 			sp_ext_offset_t	a_offset;
   1438 			sp_ext_offset_t	a_length;
   1439 
   1440 			if (free_ext->ext_type != EXTTYP_FREE)
   1441 				continue;
   1442 
   1443 			/*
   1444 			 * The length test should include space for
   1445 			 * the watermark
   1446 			 */
   1447 
   1448 			a_offset = free_ext->ext_offset;
   1449 			a_length = free_ext->ext_length;
   1450 
   1451 			if (alignment > 0) {
   1452 
   1453 				/*
   1454 				 * Shortcut for extents that have been
   1455 				 * previously added to pad out the
   1456 				 * data space
   1457 				 */
   1458 				if (a_length < alignment) {
   1459 					continue;
   1460 				}
   1461 
   1462 				/*
   1463 				 * Round up so the data space begins
   1464 				 * on a properly aligned boundary.
   1465 				 */
   1466 				a_offset += alignment -
   1467 				    (a_offset % alignment) - MD_SP_WMSIZE;
   1468 
   1469 				/*
   1470 				 * This is only necessary in case the
   1471 				 * watermark size is ever greater than
   1472 				 * one.  It'll never happen, of
   1473 				 * course; we'll get rid of watermarks
   1474 				 * before we make 'em bigger.
   1475 				 */
   1476 				if (a_offset < free_ext->ext_offset) {
   1477 					a_offset += alignment;
   1478 				}
   1479 
   1480 				/*
   1481 				 * Adjust the length to account for
   1482 				 * the space lost above (if any)
   1483 				 */
   1484 				a_length -=
   1485 				    (a_offset - free_ext->ext_offset);
   1486 			}
   1487 
   1488 			if (a_length >= len + MD_SP_WMSIZE) {
   1489 				meta_sp_alloc_by_ext(sp, np, head,
   1490 				    free_ext, a_offset,
   1491 				    len + MD_SP_WMSIZE, last_seq);
   1492 
   1493 				len = 0LL;
   1494 				numexts++;
   1495 				break;
   1496 			}
   1497 		}
   1498 
   1499 		if (len == 0LL)
   1500 			goto out;
   1501 
   1502 
   1503 		/*
   1504 		 * If the request could not be satisfied by extending
   1505 		 * the last extent or by a single extent, then put
   1506 		 * multiple smaller extents together until the request
   1507 		 * is satisfied.
   1508 		 */
   1509 		for (free_ext = *head; (free_ext != NULL) && (len > 0);
   1510 		    free_ext = free_ext->ext_next) {
   1511 			sp_ext_offset_t a_offset;
   1512 			sp_ext_length_t a_length;
   1513 
   1514 			if (free_ext->ext_type != EXTTYP_FREE)
   1515 				continue;
   1516 
   1517 			a_offset = free_ext->ext_offset;
   1518 			a_length = free_ext->ext_length;
   1519 
   1520 			if (alignment > 0) {
   1521 
   1522 				/*
   1523 				 * Shortcut for extents that have been
   1524 				 * previously added to pad out the
   1525 				 * data space
   1526 				 */
   1527 				if (a_length < alignment) {
   1528 					continue;
   1529 				}
   1530 
   1531 				/*
   1532 				 * Round up so the data space begins
   1533 				 * on a properly aligned boundary.
   1534 				 */
   1535 				a_offset += alignment -
   1536 				    (a_offset % alignment) - MD_SP_WMSIZE;
   1537 
   1538 				/*
   1539 				 * This is only necessary in case the
   1540 				 * watermark size is ever greater than
   1541 				 * one.  It'll never happen, of
   1542 				 * course; we'll get rid of watermarks
   1543 				 * before we make 'em bigger.
   1544 				 */
   1545 				if (a_offset < free_ext->ext_offset) {
   1546 					a_offset += alignment;
   1547 				}
   1548 
   1549 				/*
   1550 				 * Adjust the length to account for
   1551 				 * the space lost above (if any)
   1552 				 */
   1553 				a_length -=
   1554 				    (a_offset - free_ext->ext_offset);
   1555 
   1556 				/*
   1557 				 * Adjust the length to be properly
   1558 				 * aligned if it is NOT to be the
   1559 				 * last extent in the soft partition.
   1560 				 */
   1561 				if ((a_length - MD_SP_WMSIZE) < len)
   1562 					a_length -=
   1563 					    (a_length - MD_SP_WMSIZE)
   1564 					    % alignment;
   1565 			}
   1566 
   1567 			alloc_len = MIN(len, a_length - MD_SP_WMSIZE);
   1568 			if (alloc_len == 0)
   1569 				continue;
   1570 
   1571 			/*
   1572 			 * meta_sp_alloc_by_ext() expects the
   1573 			 * allocation length to include the watermark
   1574 			 * size, which is why we don't simply pass in
   1575 			 * alloc_len here.
   1576 			 */
   1577 			meta_sp_alloc_by_ext(sp, np, head, free_ext,
   1578 			    a_offset, MIN(len + MD_SP_WMSIZE, a_length),
   1579 			    last_seq);
   1580 
   1581 			len -= alloc_len;
   1582 			numexts++;
   1583 			last_seq++;
   1584 		}
   1585 
   1586 
   1587 		/*
   1588 		 * If there was not enough space we can throw it all
   1589 		 * away since no real work has been done yet.
   1590 		 */
   1591 		if (len != 0) {
   1592 			meta_sp_list_free(head);
   1593 			return (-1);
   1594 		}
   1595 	}
   1596 
   1597 	/*
   1598 	 * Otherwise, the literal "all" was specified: allocate all
   1599 	 * available free space.  Don't bother with alignment.
   1600 	 */
   1601 	else {
   1602 		/* First, extend the last extent if this is a grow */
   1603 		if (last_off != 0LL) {
   1604 			alloc_ext =
   1605 			    meta_sp_list_find(*head, last_off);
   1606 			assert(alloc_ext != NULL);
   1607 
   1608 			last_seq = alloc_ext->ext_seq;
   1609 
   1610 			free_ext = meta_sp_list_find(*head,
   1611 			    alloc_ext->ext_offset +
   1612 			    alloc_ext->ext_length);
   1613 
   1614 			/*
   1615 			 * If a free extent follows our last allocated
   1616 			 * extent, then remove the last allocated
   1617 			 * extent and increase the size of the free
   1618 			 * extent to overlap it, then allocate the
   1619 			 * total space from the new free extent.
   1620 			 */
   1621 			if (free_ext != NULL &&
   1622 			    free_ext->ext_type == EXTTYP_FREE) {
   1623 				assert(free_ext->ext_offset ==
   1624 				    alloc_ext->ext_offset +
   1625 				    alloc_ext->ext_length);
   1626 
   1627 				len = alloc_len =
   1628 				    free_ext->ext_length;
   1629 
   1630 				free_ext->ext_offset -=
   1631 				    alloc_ext->ext_length;
   1632 				free_ext->ext_length +=
   1633 				    alloc_ext->ext_length;
   1634 
   1635 				meta_sp_alloc_by_ext(sp, np, head,
   1636 				    free_ext, free_ext->ext_offset,
   1637 				    alloc_ext->ext_length + alloc_len,
   1638 				    last_seq);
   1639 
   1640 				/*
   1641 				 * now remove the original allocated
   1642 				 * node.  We may have overlapping
   1643 				 * extents for a short time before
   1644 				 * this node is removed.
   1645 				 */
   1646 				meta_sp_list_remove(head, alloc_ext);
   1647 			}
   1648 
   1649 			last_seq++;
   1650 		}
   1651 
   1652 		/* Next, grab all remaining free space */
   1653 		for (free_ext = *head; free_ext != NULL;
   1654 		    free_ext = free_ext->ext_next) {
   1655 
   1656 			if (free_ext->ext_type == EXTTYP_FREE) {
   1657 				alloc_len =
   1658 				    free_ext->ext_length - MD_SP_WMSIZE;
   1659 				if (alloc_len == 0)
   1660 					continue;
   1661 
   1662 				/*
   1663 				 * meta_sp_alloc_by_ext() expects the
   1664 				 * allocation length to include the
   1665 				 * watermark size, which is why we
   1666 				 * don't simply pass in alloc_len
   1667 				 * here.
   1668 				 */
   1669 				meta_sp_alloc_by_ext(sp, np, head,
   1670 				    free_ext, free_ext->ext_offset,
   1671 				    free_ext->ext_length,
   1672 				    last_seq);
   1673 
   1674 				len += alloc_len;
   1675 				numexts++;
   1676 				last_seq++;
   1677 			}
   1678 		}
   1679 	}
   1680 
   1681 out:
   1682 	if (getenv(META_SP_DEBUG)) {
   1683 		meta_sp_debug("meta_sp_alloc_by_len: Extent list after "
   1684 		    "allocation:\n");
   1685 		meta_sp_list_dump(*head);
   1686 	}
   1687 
   1688 	if (*lp == 0) {
   1689 		*lp = len;
   1690 
   1691 		/*
   1692 		 * Make sure the callers hit a no space error if we
   1693 		 * didn't actually find anything.
   1694 		 */
   1695 		if (len == 0) {
   1696 			return (-1);
   1697 		}
   1698 	}
   1699 
   1700 	return (numexts);
   1701 }
   1702 
   1703 /*
   1704  * FUNCTION:	meta_sp_alloc_by_list()
   1705  * INPUT:	sp	- the set name for the device the node belongs to
   1706  *		np	- the name of the device the node belongs to
   1707  *		head	- the head of the list, must be NULL for empty list
   1708  *		oblist	- an extent list containing requested nodes to allocate
   1709  * OUTPUT:	head	- the new head pointer
   1710  * RETURNS:	int	- -1 if error, the number of new extents on success
   1711  * PURPOSE:	allocates extents from free space to satisfy the requested
   1712  *		extent list.  This is primarily used for the -o/-b options
   1713  *		where the user may specifically request extents to allocate.
   1714  *		Each extent in the oblist must be a subset (inclusive) of a
   1715  *		free extent and may not overlap each other.  This
   1716  *		function sets the EXTFLG_UPDATE flag for each node that
   1717  *		requires a watermark update after allocating.
   1718  */
   1719 static int
   1720 meta_sp_alloc_by_list(
   1721 	mdsetname_t	*sp,
   1722 	mdname_t	*np,
   1723 	sp_ext_node_t	**head,
   1724 	sp_ext_node_t	*oblist
   1725 )
   1726 {
   1727 	sp_ext_node_t	*ext;
   1728 	sp_ext_node_t	*free_ext;
   1729 	uint_t		numexts = 0;
   1730 
   1731 	for (ext = oblist; ext != NULL; ext = ext->ext_next) {
   1732 
   1733 		free_ext = meta_sp_list_find(*head,
   1734 		    ext->ext_offset - MD_SP_WMSIZE);
   1735 
   1736 		/* Make sure the allocation is within the free extent */
   1737 		if ((free_ext == NULL) ||
   1738 		    (ext->ext_offset + ext->ext_length >
   1739 		    free_ext->ext_offset + free_ext->ext_length) ||
   1740 		    (free_ext->ext_type != EXTTYP_FREE))
   1741 			return (-1);
   1742 
   1743 		meta_sp_alloc_by_ext(sp, np, head, free_ext,
   1744 		    ext->ext_offset - MD_SP_WMSIZE,
   1745 		    ext->ext_length + MD_SP_WMSIZE, ext->ext_seq);
   1746 
   1747 		numexts++;
   1748 	}
   1749 
   1750 	assert(meta_sp_list_overlaps(*head) == 0);
   1751 
   1752 	if (getenv(META_SP_DEBUG)) {
   1753 		meta_sp_debug("meta_sp_alloc_by_list: Extent list after "
   1754 		    "allocation:\n");
   1755 		meta_sp_list_dump(*head);
   1756 	}
   1757 
   1758 	return (numexts);
   1759 }
   1760 
   1761 /*
   1762  * **************************************************************************
   1763  *                     Extent List Population Functions                     *
   1764  * **************************************************************************
   1765  */
   1766 
   1767 /*
   1768  * FUNCTION:	meta_sp_extlist_from_namelist()
   1769  * INPUT:	sp	- the set name for the device the node belongs to
   1770  *		spnplp	- the namelist of soft partitions to build a list from
   1771  * OUTPUT:	extlist	- the extent list built from the SPs in the namelist
   1772  *		ep	- return error pointer
   1773  * RETURNS:	int	- -1 if error, 0 on success
   1774  * PURPOSE:	builds an extent list representing the soft partitions
   1775  *		specified in the namelist.  Each extent in each soft
   1776  *		partition is added to the list with the type EXTTYP_ALLOC.
   1777  *		The EXTFLG_UPDATE flag is not set on any nodes.  Each
   1778  *		extent in the list includes the space occupied by the
   1779  *		watermark, which is not included in the unit structures.
   1780  */
   1781 static int
   1782 meta_sp_extlist_from_namelist(
   1783 	mdsetname_t	*sp,
   1784 	mdnamelist_t	*spnlp,
   1785 	sp_ext_node_t	**extlist,
   1786 	md_error_t	*ep
   1787 )
   1788 {
   1789 	int		extn;
   1790 	md_sp_t		*msp;		/* unit structure of the sp's */
   1791 	mdnamelist_t	*namep;
   1792 
   1793 	assert(sp != NULL);
   1794 
   1795 	/*
   1796 	 * Now go through the soft partitions and add a node to the used
   1797 	 * list for each allocated extent.
   1798 	 */
   1799 	for (namep = spnlp; namep != NULL; namep = namep->next) {
   1800 		mdname_t	*curnp = namep->namep;
   1801 
   1802 		/* get the unit structure */
   1803 		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
   1804 			return (-1);
   1805 
   1806 		for (extn = 0; (extn < msp->ext.ext_len); extn++) {
   1807 			md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
   1808 
   1809 			/*
   1810 			 * subtract from offset and add to the length
   1811 			 * to account for the watermark, which is not
   1812 			 * contained in the extents in the unit structure.
   1813 			 */
   1814 			meta_sp_list_insert(sp, curnp, extlist,
   1815 			    extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
   1816 			    EXTTYP_ALLOC, extn, 0, meta_sp_cmp_by_offset);
   1817 		}
   1818 	}
   1819 	return (0);
   1820 }
   1821 
   1822 /*
   1823  * FUNCTION:	meta_sp_extlist_from_wm()
   1824  * INPUT:	sp	- the set name for the device the node belongs to
   1825  *		compnp	- the name of the device to scan watermarks on
   1826  * OUTPUT:	extlist	- the extent list built from the SPs in the namelist
   1827  *		ep	- return error pointer
   1828  * RETURNS:	int	- -1 if error, 0 on success
   1829  * PURPOSE:	builds an extent list representing the soft partitions
   1830  *		specified in the namelist.  Each extent in each soft
   1831  *		partition is added to the list with the type EXTTYP_ALLOC.
   1832  *		The EXTFLG_UPDATE flag is not set on any nodes.  Each
   1833  *		extent in the list includes the space occupied by the
   1834  *		watermark, which is not included in the unit structures.
   1835  */
   1836 static int
   1837 meta_sp_extlist_from_wm(
   1838 	mdsetname_t	*sp,
   1839 	mdname_t	*compnp,
   1840 	sp_ext_node_t	**extlist,
   1841 	ext_cmpfunc_t	compare,
   1842 	md_error_t	*ep
   1843 )
   1844 {
   1845 	mp_watermark_t	wm;
   1846 	mdname_t	*np = NULL;
   1847 	mdsetname_t	*spsetp = NULL;
   1848 	sp_ext_offset_t	cur_off;
   1849 	md_set_desc	*sd;
   1850 	int		init = 0;
   1851 	mdkey_t		key;
   1852 	minor_t		mnum;
   1853 
   1854 	if (!metaislocalset(sp)) {
   1855 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
   1856 			return (-1);
   1857 	}
   1858 
   1859 	if ((cur_off = meta_sp_get_start(sp, compnp, ep)) == MD_DISKADDR_ERROR)
   1860 		return (-1);
   1861 
   1862 	for (;;) {
   1863 		if (meta_sp_read_wm(sp, compnp, &wm, cur_off, ep) != 0) {
   1864 			return (-1);
   1865 		}
   1866 
   1867 		/* get the set and name pointers */
   1868 		if (strcmp(wm.wm_setname, MD_SP_LOCALSETNAME) != 0) {
   1869 			if ((spsetp = metasetname(wm.wm_setname, ep)) == NULL) {
   1870 				return (-1);
   1871 			}
   1872 		}
   1873 
   1874 		/*
   1875 		 * For the MN set, meta_init_make_device needs to
   1876 		 * be run on all the nodes so the entries for the
   1877 		 * softpart device name and its comp can be created
   1878 		 * in the same order in the replica namespace.  If
   1879 		 * we have it run on mdmn_do_iocset then the mddbs
   1880 		 * will be out of sync between master node and slave
   1881 		 * nodes.
   1882 		 */
   1883 		if (strcmp(wm.wm_mdname, MD_SP_FREEWMNAME) != 0) {
   1884 
   1885 			if (!metaislocalset(sp) && MD_MNSET_DESC(sd)) {
   1886 				md_mn_msg_addmdname_t	*send_params;
   1887 				int			result;
   1888 				md_mn_result_t		*resp = NULL;
   1889 				int			message_size;
   1890 
   1891 				message_size =  sizeof (*send_params) +
   1892 				    strlen(wm.wm_mdname) + 1;
   1893 				send_params = Zalloc(message_size);
   1894 				send_params->addmdname_setno = sp->setno;
   1895 				(void) strcpy(&send_params->addmdname_name[0],
   1896 				    wm.wm_mdname);
   1897 				result = mdmn_send_message(sp->setno,
   1898 				    MD_MN_MSG_ADDMDNAME,
   1899 				    MD_MSGF_PANIC_WHEN_INCONSISTENT, 0,
   1900 				    (char *)send_params, message_size, &resp,
   1901 				    ep);
   1902 				Free(send_params);
   1903 				if (resp != NULL) {
   1904 					if (resp->mmr_exitval != 0) {
   1905 						free_result(resp);
   1906 						return (-1);
   1907 					}
   1908 					free_result(resp);
   1909 				}
   1910 				if (result != 0)
   1911 					return (-1);
   1912 			} else {
   1913 
   1914 				if (!is_existing_meta_hsp(sp, wm.wm_mdname)) {
   1915 					if ((key = meta_init_make_device(&sp,
   1916 					    wm.wm_mdname, ep)) <= 0) {
   1917 						return (-1);
   1918 					}
   1919 					init = 1;
   1920 				}
   1921 			}
   1922 
   1923 			np = metaname(&spsetp, wm.wm_mdname, META_DEVICE, ep);
   1924 			if (np == NULL) {
   1925 				if (init) {
   1926 					if (meta_getnmentbykey(sp->setno,
   1927 					    MD_SIDEWILD, key, NULL, &mnum,
   1928 					    NULL, ep) != NULL) {
   1929 						(void) metaioctl(MD_IOCREM_DEV,
   1930 						    &mnum, ep, NULL);
   1931 					}
   1932 					(void) del_self_name(sp, key, ep);
   1933 				}
   1934 				return (-1);
   1935 			}
   1936 		}
   1937 
   1938 		/* insert watermark into extent list */
   1939 		meta_sp_list_insert(spsetp, np, extlist, cur_off,
   1940 		    wm.wm_length + MD_SP_WMSIZE, wm.wm_type, wm.wm_seq,
   1941 		    EXTFLG_UPDATE, compare);
   1942 
   1943 		/* if we see the end watermark, we're done */
   1944 		if (wm.wm_type == EXTTYP_END)
   1945 			break;
   1946 
   1947 		cur_off += wm.wm_length + 1;
   1948 
   1949 		/* clear out set and name pointers for next iteration */
   1950 		np = NULL;
   1951 		spsetp = NULL;
   1952 	}
   1953 
   1954 	return (0);
   1955 }
   1956 
   1957 /*
   1958  * **************************************************************************
   1959  *                        Print (metastat) Functions                        *
   1960  * **************************************************************************
   1961  */
   1962 
   1963 /*
   1964  * FUNCTION:	meta_sp_short_print()
   1965  * INPUT:	msp	- the unit structure to display
   1966  *		fp	- the file pointer to send output to
   1967  *		options	- print options from the command line processor
   1968  * OUTPUT:	ep	- return error pointer
   1969  * RETURNS:	int	- -1 if error, 0 on success
   1970  * PURPOSE:	display a short report of the soft partition in md.tab
   1971  *		form, primarily used for metastat -p.
   1972  */
   1973 static int
   1974 meta_sp_short_print(
   1975 	md_sp_t		*msp,
   1976 	char		*fname,
   1977 	FILE		*fp,
   1978 	mdprtopts_t	options,
   1979 	md_error_t	*ep
   1980 )
   1981 {
   1982 	int	extn;
   1983 
   1984 	if (options & PRINT_LARGEDEVICES) {
   1985 		if ((msp->common.revision & MD_64BIT_META_DEV) == 0)
   1986 			return (0);
   1987 	}
   1988 
   1989 	if (options & PRINT_FN) {
   1990 		if ((msp->common.revision & MD_FN_META_DEV) == 0)
   1991 			return (0);
   1992 	}
   1993 
   1994 	/* print name and -p */
   1995 	if (fprintf(fp, "%s -p", msp->common.namep->cname) == EOF)
   1996 		return (mdsyserror(ep, errno, fname));
   1997 
   1998 	/* print the component */
   1999 	/*
   2000 	 * Always print the full path name
   2001 	 */
   2002 	if (fprintf(fp, " %s", msp->compnamep->rname) == EOF)
   2003 		return (mdsyserror(ep, errno, fname));
   2004 
   2005 	/* print out each extent */
   2006 	for (extn = 0; (extn < msp->ext.ext_len); extn++) {
   2007 		md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
   2008 		if (fprintf(fp, " -o %llu -b %llu ", extp->poff,
   2009 		    extp->len) == EOF)
   2010 			return (mdsyserror(ep, errno, fname));
   2011 	}
   2012 
   2013 	if (fprintf(fp, "\n") == EOF)
   2014 		return (mdsyserror(ep, errno, fname));
   2015 
   2016 	/* success */
   2017 	return (0);
   2018 }
   2019 
   2020 /*
   2021  * FUNCTION:	meta_sp_status_to_name()
   2022  * INPUT:	xsp_status	- the status value to convert to a string
   2023  *		tstate		- transient errored device state. If set the
   2024  *				  device is Unavailable
   2025  * OUTPUT:	none
   2026  * RETURNS:	char *	- a pointer to the string representing the status value
   2027  * PURPOSE:	return an internationalized string representing the
   2028  *		status value for a soft partition.  The strings are
   2029  *		strdup'd and must be freed by the caller.
   2030  */
   2031 static char *
   2032 meta_sp_status_to_name(
   2033 	xsp_status_t	xsp_status,
   2034 	uint_t		tstate
   2035 )
   2036 {
   2037 	char *rval = NULL;
   2038 
   2039 	/*
   2040 	 * Check to see if we have MD_INACCESSIBLE set. This is the only valid
   2041 	 * value for an 'Unavailable' return. tstate can be set because of
   2042 	 * other multi-node reasons (e.g. ABR being set)
   2043 	 */
   2044 	if (tstate & MD_INACCESSIBLE) {
   2045 		return (Strdup(dgettext(TEXT_DOMAIN, "Unavailable")));
   2046 	}
   2047 
   2048 	switch (xsp_status) {
   2049 	case MD_SP_CREATEPEND:
   2050 		rval = Strdup(dgettext(TEXT_DOMAIN, "Creating"));
   2051 		break;
   2052 	case MD_SP_GROWPEND:
   2053 		rval = Strdup(dgettext(TEXT_DOMAIN, "Growing"));
   2054 		break;
   2055 	case MD_SP_DELPEND:
   2056 		rval = Strdup(dgettext(TEXT_DOMAIN, "Deleting"));
   2057 		break;
   2058 	case MD_SP_OK:
   2059 		rval = Strdup(dgettext(TEXT_DOMAIN, "Okay"));
   2060 		break;
   2061 	case MD_SP_ERR:
   2062 		rval = Strdup(dgettext(TEXT_DOMAIN, "Errored"));
   2063 		break;
   2064 	case MD_SP_RECOVER:
   2065 		rval = Strdup(dgettext(TEXT_DOMAIN, "Recovering"));
   2066 		break;
   2067 	}
   2068 
   2069 	if (rval == NULL)
   2070 		rval = Strdup(dgettext(TEXT_DOMAIN, "Invalid"));
   2071 
   2072 	return (rval);
   2073 }
   2074 
   2075 /*
   2076  * FUNCTION:	meta_sp_report()
   2077  * INPUT:	sp	- the set name for the unit being displayed
   2078  *		msp	- the unit structure to display
   2079  *		nlpp	- pass back the large devs
   2080  *		fp	- the file pointer to send output to
   2081  *		options	- print options from the command line processor
   2082  * OUTPUT:	ep	- return error pointer
   2083  * RETURNS:	int	- -1 if error, 0 on success
   2084  * PURPOSE:	print a full report of the device specified
   2085  */
   2086 static int
   2087 meta_sp_report(
   2088 	mdsetname_t	*sp,
   2089 	md_sp_t		*msp,
   2090 	mdnamelist_t	**nlpp,
   2091 	char		*fname,
   2092 	FILE		*fp,
   2093 	mdprtopts_t	options,
   2094 	md_error_t	*ep
   2095 )
   2096 {
   2097 	uint_t		extn;
   2098 	char		*status;
   2099 	char		*devid = "";
   2100 	mdname_t	*didnp = NULL;
   2101 	ddi_devid_t	dtp;
   2102 	int		len;
   2103 	uint_t		tstate = 0;
   2104 
   2105 	if (options & PRINT_LARGEDEVICES) {
   2106 		if ((msp->common.revision & MD_64BIT_META_DEV) == 0) {
   2107 			return (0);
   2108 		} else {
   2109 			if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
   2110 				return (-1);
   2111 		}
   2112 	}
   2113 
   2114 	if (options & PRINT_FN) {
   2115 		if ((msp->common.revision & MD_FN_META_DEV) == 0) {
   2116 			return (0);
   2117 		} else {
   2118 			if (meta_getdevs(sp, msp->common.namep, nlpp, ep) != 0)
   2119 				return (-1);
   2120 		}
   2121 	}
   2122 
   2123 	if (options & PRINT_HEADER) {
   2124 		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: Soft Partition\n"),
   2125 		    msp->common.namep->cname) == EOF)
   2126 			return (mdsyserror(ep, errno, fname));
   2127 	}
   2128 
   2129 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Device: %s\n"),
   2130 	    msp->compnamep->cname) == EOF)
   2131 		return (mdsyserror(ep, errno, fname));
   2132 
   2133 	/* Determine if device is available before displaying status */
   2134 	if (metaismeta(msp->common.namep)) {
   2135 		if (meta_get_tstate(msp->common.namep->dev, &tstate, ep) != 0)
   2136 			return (-1);
   2137 	}
   2138 	status = meta_sp_status_to_name(msp->status, tstate & MD_DEV_ERRORED);
   2139 
   2140 	/* print out "State" to be consistent with other metadevices */
   2141 	if (tstate & MD_ABR_CAP) {
   2142 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
   2143 		    "    State: %s - Application Based Recovery (ABR)\n"),
   2144 		    status) == EOF) {
   2145 			Free(status);
   2146 			return (mdsyserror(ep, errno, fname));
   2147 		}
   2148 	} else {
   2149 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
   2150 		    "    State: %s\n"), status) == EOF) {
   2151 			Free(status);
   2152 			return (mdsyserror(ep, errno, fname));
   2153 		}
   2154 	}
   2155 	free(status);
   2156 
   2157 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %llu blocks (%s)\n"),
   2158 	    msp->common.size,
   2159 	    meta_number_to_string(msp->common.size, DEV_BSIZE)) == EOF)
   2160 		return (mdsyserror(ep, errno, fname));
   2161 
   2162 	/* print component details */
   2163 	if (! metaismeta(msp->compnamep)) {
   2164 		diskaddr_t	start_blk;
   2165 		int		has_mddb;
   2166 		char		*has_mddb_str;
   2167 
   2168 		/* print header */
   2169 		/*
   2170 		 * Building a format string on the fly that will
   2171 		 * be used in (f)printf. This allows the length
   2172 		 * of the ctd to vary from small to large without
   2173 		 * looking horrible.
   2174 		 */
   2175 		len = strlen(msp->compnamep->cname);
   2176 		len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
   2177 		len += 2;
   2178 		if (fprintf(fp,
   2179 		    "\t%-*.*s %-12.12s %-5.5s %s\n",
   2180 		    len, len,
   2181 		    dgettext(TEXT_DOMAIN, "Device"),
   2182 		    dgettext(TEXT_DOMAIN, "Start Block"),
   2183 		    dgettext(TEXT_DOMAIN, "Dbase"),
   2184 		    dgettext(TEXT_DOMAIN, "Reloc")) == EOF) {
   2185 			return (mdsyserror(ep, errno, fname));
   2186 		}
   2187 
   2188 
   2189 		/* get info */
   2190 		if ((start_blk = meta_sp_get_start(sp, msp->compnamep, ep)) ==
   2191 		    MD_DISKADDR_ERROR)
   2192 			return (-1);
   2193 
   2194 		if ((has_mddb = metahasmddb(sp, msp->compnamep, ep)) < 0)
   2195 			return (-1);
   2196 
   2197 		if (has_mddb)
   2198 			has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
   2199 		else
   2200 			has_mddb_str = dgettext(TEXT_DOMAIN, "No");
   2201 
   2202 		/* populate the key in the name_p structure */
   2203 		didnp = metadevname(&sp, msp->compnamep->dev, ep);
   2204 		if (didnp == NULL) {
   2205 			return (-1);
   2206 		}
   2207 
   2208 		/* determine if devid does NOT exist */
   2209 		if (options & PRINT_DEVID) {
   2210 			if ((dtp = meta_getdidbykey(sp->setno,
   2211 			    getmyside(sp, ep), didnp->key, ep)) == NULL)
   2212 				devid = dgettext(TEXT_DOMAIN, "No ");
   2213 			else {
   2214 				devid = dgettext(TEXT_DOMAIN, "Yes");
   2215 				free(dtp);
   2216 			}
   2217 		}
   2218 
   2219 		/* print info */
   2220 		/*
   2221 		 * This allows the length
   2222 		 * of the ctd to vary from small to large without
   2223 		 * looking horrible.
   2224 		 */
   2225 		if (fprintf(fp, "\t%-*s %8lld     %-5.5s %s\n",
   2226 		    len, msp->compnamep->cname,
   2227 		    start_blk, has_mddb_str, devid) == EOF) {
   2228 			return (mdsyserror(ep, errno, fname));
   2229 		}
   2230 		(void) fprintf(fp, "\n");
   2231 	}
   2232 
   2233 
   2234 	/* print the headers */
   2235 	if (fprintf(fp, "\t%6.6s %24.24s %24.24s\n",
   2236 	    dgettext(TEXT_DOMAIN, "Extent"),
   2237 	    dgettext(TEXT_DOMAIN, "Start Block"),
   2238 	    dgettext(TEXT_DOMAIN, "Block count")) == EOF)
   2239 		return (mdsyserror(ep, errno, fname));
   2240 
   2241 	/* print out each extent */
   2242 	for (extn = 0; (extn < msp->ext.ext_len); extn++) {
   2243 		md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
   2244 
   2245 		/* If PRINT_TIMES option is ever supported, add output here */
   2246 		if (fprintf(fp, "\t%6u %24llu %24llu\n",
   2247 		    extn, extp->poff, extp->len) == EOF)
   2248 			return (mdsyserror(ep, errno, fname));
   2249 	}
   2250 
   2251 	/* separate records with a newline */
   2252 	(void) fprintf(fp, "\n");
   2253 	return (0);
   2254 }
   2255 
   2256 /*
   2257  * FUNCTION:	meta_sp_print()
   2258  * INPUT:	sp	- the set name for the unit being displayed
   2259  *		np	- the name of the device to print
   2260  *		fname	- ??? not used
   2261  *		fp	- the file pointer to send output to
   2262  *		options	- print options from the command line processor
   2263  * OUTPUT:	ep	- return error pointer
   2264  * RETURNS:	int	- -1 if error, 0 on success
   2265  * PURPOSE:	print a full report of the device specified by metastat.
   2266  *		This is the main entry point for printing.
   2267  */
   2268 int
   2269 meta_sp_print(
   2270 	mdsetname_t	*sp,
   2271 	mdname_t	*np,
   2272 	mdnamelist_t	**nlpp,
   2273 	char		*fname,
   2274 	FILE		*fp,
   2275 	mdprtopts_t	options,
   2276 	md_error_t	*ep
   2277 )
   2278 {
   2279 	md_sp_t		*msp;
   2280 	md_unit_t	*mdp;
   2281 	int		rval = 0;
   2282 	set_t		setno;
   2283 	minor_t		unit;
   2284 
   2285 	/* should always have the same set */
   2286 	assert(sp != NULL);
   2287 
   2288 	/* print all the soft partitions */
   2289 	if (np == NULL) {
   2290 		mdnamelist_t	*nlp = NULL;
   2291 		mdnamelist_t	*p;
   2292 		int		cnt;
   2293 
   2294 		if ((cnt = meta_get_sp_names(sp, &nlp, options, ep)) < 0)
   2295 			return (-1);
   2296 		else if (cnt == 0)
   2297 			return (0);
   2298 
   2299 		/* recusively print them out */
   2300 		for (p = nlp; (p != NULL); p = p->next) {
   2301 			mdname_t	*curnp = p->namep;
   2302 
   2303 			/*
   2304 			 * one problem with the rval of -1 here is that
   2305 			 * the error gets "lost" when the next device is
   2306 			 * printed, but we want to print them all anyway.
   2307 			 */
   2308 			rval = meta_sp_print(sp, curnp, nlpp, fname, fp,
   2309 			    options, ep);
   2310 		}
   2311 
   2312 		/* clean up, return success */
   2313 		metafreenamelist(nlp);
   2314 		return (rval);
   2315 	}
   2316 
   2317 	/* get the unit structure */
   2318 	if ((msp = meta_get_sp_common(sp, np,
   2319 	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
   2320 		return (-1);
   2321 
   2322 	/* check for parented */
   2323 	if ((! (options & PRINT_SUBDEVS)) &&
   2324 	    (MD_HAS_PARENT(msp->common.parent))) {
   2325 		return (0);
   2326 	}
   2327 
   2328 	/* print appropriate detail */
   2329 	if (options & PRINT_SHORT) {
   2330 		if (meta_sp_short_print(msp, fname, fp, options, ep) != 0)
   2331 			return (-1);
   2332 	} else {
   2333 		if (meta_sp_report(sp, msp, nlpp, fname, fp, options, ep) != 0)
   2334 			return (-1);
   2335 	}
   2336 
   2337 	/*
   2338 	 * Print underlying metadevices if they are parented to us and
   2339 	 * if the info for the underlying metadevice has not been printed.
   2340 	 */
   2341 	if (metaismeta(msp->compnamep)) {
   2342 		/* get the unit structure for the subdevice */
   2343 		if ((mdp = meta_get_mdunit(sp, msp->compnamep, ep)) == NULL)
   2344 			return (-1);
   2345 
   2346 		setno = MD_MIN2SET(MD_SID(mdp));
   2347 		unit = MD_MIN2UNIT(MD_SID(mdp));
   2348 
   2349 		/* If info not already printed, recurse */
   2350 		if (sp_parent_printed[setno] == NULL ||
   2351 		    !BT_TEST(sp_parent_printed[setno], unit)) {
   2352 			if (meta_print_name(sp, msp->compnamep, nlpp, fname, fp,
   2353 			    (options | PRINT_HEADER | PRINT_SUBDEVS),
   2354 			    NULL, ep) != 0) {
   2355 				return (-1);
   2356 			}
   2357 			if (sp_parent_printed[setno] == NULL)
   2358 				sp_parent_printed[setno] =
   2359 				    Zalloc(BT_BITOUL(MD_MAXUNITS));
   2360 			BT_SET(sp_parent_printed[setno], unit);
   2361 		}
   2362 	}
   2363 	return (0);
   2364 }
   2365 
   2366 /*
   2367  * **************************************************************************
   2368  *                     Watermark Manipulation Functions                     *
   2369  * **************************************************************************
   2370  */
   2371 
   2372 /*
   2373  * FUNCTION:	meta_sp_get_start()
   2374  * INPUT:	sp	- the operating set
   2375  *		np 	- device upon which the sp is being built
   2376  * OUTPUT:	ep	- return error pointer
   2377  * RETURNS:	daddr_t	- -1 if error, otherwise the start block
   2378  * PURPOSE:	Encapsulate the determination of the start block of the
   2379  *		device upon which the sp is built or being built.
   2380  */
   2381 static diskaddr_t
   2382 meta_sp_get_start(
   2383 	mdsetname_t	*sp,
   2384 	mdname_t	*np,
   2385 	md_error_t	*ep
   2386 )
   2387 {
   2388 	daddr_t		start_block;
   2389 
   2390 	if ((start_block = metagetstart(sp, np, ep)) != MD_DISKADDR_ERROR)
   2391 		start_block += MD_SP_START;
   2392 
   2393 	return (start_block);
   2394 }
   2395 
   2396 /*
   2397  * FUNCTION:	meta_sp_update_wm_common()
   2398  * INPUT:	sp	- the operating set
   2399  *		msp	- a pointer to the XDR unit structure
   2400  *		extlist	- the extent list specifying watermarks to update
   2401  *		iocval	- either MD_IOC_SPUPDATEWM or MD_MN_IOC_SPUPDATEWM
   2402  * OUTPUT:	ep	- return error pointer
   2403  * RETURNS:	int	- -1 if error, 0 on success
   2404  * PURPOSE:	steps backwards through the extent list updating
   2405  *		watermarks for all extents with the EXTFLG_UPDATE flag
   2406  *		set.  Writing the watermarks guarantees consistency when
   2407  *		extents must be broken into pieces since the original
   2408  *		watermark will be the last to be updated, and will be
   2409  *		changed to point to a new watermark that is already
   2410  *		known to be consistent.  If one of the writes fails, the
   2411  *		original watermark stays intact and none of the changes
   2412  *		are realized.
   2413  */
   2414 static int
   2415 meta_sp_update_wm_common(
   2416 	mdsetname_t	*sp,
   2417 	md_sp_t		*msp,
   2418 	sp_ext_node_t	*extlist,
   2419 	int		iocval,
   2420 	md_error_t	*ep
   2421 )
   2422 {
   2423 	sp_ext_node_t	*ext;
   2424 	sp_ext_node_t	*tail;
   2425 	mp_watermark_t	*wmp, *watermarks;
   2426 	xsp_offset_t	*osp, *offsets;
   2427 	int		update_count = 0;
   2428 	int		rval = 0;
   2429 	md_unit_t	*mdp;
   2430 	md_sp_update_wm_t	update_params;
   2431 
   2432 	if (getenv(META_SP_DEBUG)) {
   2433 		meta_sp_debug("meta_sp_update_wm: Updating watermarks:\n");
   2434 		meta_sp_list_dump(extlist);
   2435 	}
   2436 
   2437 	/*
   2438 	 * find the last node so we can write the watermarks backwards
   2439 	 * and count watermarks to update so we can allocate space
   2440 	 */
   2441 	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
   2442 		if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
   2443 			update_count++;
   2444 		}
   2445 
   2446 		if (ext->ext_next == NULL) {
   2447 			tail = ext;
   2448 		}
   2449 	}
   2450 	ext = tail;
   2451 
   2452 	wmp = watermarks =
   2453 	    Zalloc(update_count * sizeof (mp_watermark_t));
   2454 	osp = offsets =
   2455 	    Zalloc(update_count * sizeof (sp_ext_offset_t));
   2456 
   2457 	while (ext != NULL) {
   2458 		if ((ext->ext_flags & EXTFLG_UPDATE) != 0) {
   2459 			/* update watermark */
   2460 			wmp->wm_magic = MD_SP_MAGIC;
   2461 			wmp->wm_version = MD_SP_VERSION;
   2462 			wmp->wm_type = ext->ext_type;
   2463 			wmp->wm_seq = ext->ext_seq;
   2464 			wmp->wm_length = ext->ext_length - MD_SP_WMSIZE;
   2465 
   2466 			/* fill in the volume name and set name */
   2467 			if (ext->ext_namep != NULL)
   2468 				(void) strcpy(wmp->wm_mdname,
   2469 				    ext->ext_namep->cname);
   2470 			else
   2471 				(void) strcpy(wmp->wm_mdname, MD_SP_FREEWMNAME);
   2472 			if (ext->ext_setp != NULL &&
   2473 			    ext->ext_setp->setno != MD_LOCAL_SET)
   2474 				(void) strcpy(wmp->wm_setname,
   2475 				    ext->ext_setp->setname);
   2476 			else
   2477 				(void) strcpy(wmp->wm_setname,
   2478 				    MD_SP_LOCALSETNAME);
   2479 
   2480 			/* Generate the checksum */
   2481 			wmp->wm_checksum = 0;
   2482 			crcgen((uchar_t *)wmp, (uint_t *)&wmp->wm_checksum,
   2483 			    sizeof (*wmp), NULL);
   2484 
   2485 			/* record the extent offset */
   2486 			*osp = ext->ext_offset;
   2487 
   2488 			/* Advance the placeholders */
   2489 			osp++; wmp++;
   2490 		}
   2491 		ext = ext->ext_prev;
   2492 	}
   2493 
   2494 	mdp = meta_get_mdunit(sp, msp->common.namep, ep);
   2495 	if (mdp == NULL) {
   2496 		rval = -1;
   2497 		goto out;
   2498 	}
   2499 
   2500 	(void) memset(&update_params, 0, sizeof (update_params));
   2501 	update_params.mnum = MD_SID(mdp);
   2502 	update_params.count = update_count;
   2503 	update_params.wmp = (uintptr_t)watermarks;
   2504 	update_params.osp = (uintptr_t)offsets;
   2505 	MD_SETDRIVERNAME(&update_params, MD_SP,
   2506 	    MD_MIN2SET(update_params.mnum));
   2507 
   2508 	if (metaioctl(iocval, &update_params, &update_params.mde,
   2509 	    msp->common.namep->cname) != 0) {
   2510 		(void) mdstealerror(ep, &update_params.mde);
   2511 		rval = -1;
   2512 		goto out;
   2513 	}
   2514 
   2515 out:
   2516 	Free(watermarks);
   2517 	Free(offsets);
   2518 
   2519 	return (rval);
   2520 }
   2521 
   2522 static int
   2523 meta_sp_update_wm(
   2524 	mdsetname_t	*sp,
   2525 	md_sp_t		*msp,
   2526 	sp_ext_node_t	*extlist,
   2527 	md_error_t	*ep
   2528 )
   2529 {
   2530 	return (meta_sp_update_wm_common(sp, msp, extlist, MD_IOC_SPUPDATEWM,
   2531 	    ep));
   2532 }
   2533 
   2534 static int
   2535 meta_mn_sp_update_wm(
   2536 	mdsetname_t	*sp,
   2537 	md_sp_t		*msp,
   2538 	sp_ext_node_t	*extlist,
   2539 	md_error_t	*ep
   2540 )
   2541 {
   2542 	return (meta_sp_update_wm_common(sp, msp, extlist, MD_MN_IOC_SPUPDATEWM,
   2543 	    ep));
   2544 }
   2545 
   2546 /*
   2547  * FUNCTION:	meta_sp_clear_wm()
   2548  * INPUT:	sp	- the operating set
   2549  *		msp	- the unit structure for the soft partition to clear
   2550  * OUTPUT:	ep	- return error pointer
   2551  * RETURNS:	int	- -1 if error, 0 on success
   2552  * PURPOSE:	steps through the extents for a soft partition unit and
   2553  *		creates an extent list designed to mark all of the
   2554  *		watermarks for those extents as free.  The extent list
   2555  *		is then passed to meta_sp_update_wm() to actually write
   2556  *		the watermarks out.
   2557  */
   2558 static int
   2559 meta_sp_clear_wm(
   2560 	mdsetname_t	*sp,
   2561 	md_sp_t		*msp,
   2562 	md_error_t	*ep
   2563 )
   2564 {
   2565 	sp_ext_node_t	*extlist = NULL;
   2566 	int		numexts = msp->ext.ext_len;
   2567 	uint_t		i;
   2568 	int		rval = 0;
   2569 
   2570 	/* for each watermark must set the flag to SP_FREE */
   2571 	for (i = 0; i < numexts; i++) {
   2572 		md_sp_ext_t	*extp = &msp->ext.ext_val[i];
   2573 
   2574 		meta_sp_list_insert(NULL, NULL, &extlist,
   2575 		    extp->poff - MD_SP_WMSIZE, extp->len + MD_SP_WMSIZE,
   2576 		    EXTTYP_FREE, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   2577 	}
   2578 
   2579 	/* update watermarks */
   2580 	rval = meta_sp_update_wm(sp, msp, extlist, ep);
   2581 
   2582 	meta_sp_list_free(&extlist);
   2583 	return (rval);
   2584 }
   2585 
   2586 /*
   2587  * FUNCTION:	meta_sp_read_wm()
   2588  * INPUT:	sp	- setname for component
   2589  *		compnp	- mdname_t for component
   2590  *		offset	- the offset of the watermark to read (sectors)
   2591  * OUTPUT:	wm	- the watermark structure to read into
   2592  *		ep	- return error pointer
   2593  * RETURNS:	int	- -1 if error, 0 on success
   2594  * PURPOSE:	seeks out to the requested offset and reads a watermark.
   2595  *		It then verifies that the magic number is correct and
   2596  *		that the checksum is valid, returning an error if either
   2597  *		is wrong.
   2598  */
   2599 static int
   2600 meta_sp_read_wm(
   2601 	mdsetname_t	*sp,
   2602 	mdname_t	*compnp,
   2603 	mp_watermark_t	*wm,
   2604 	sp_ext_offset_t	offset,
   2605 	md_error_t	*ep
   2606 )
   2607 {
   2608 	md_sp_read_wm_t	read_params;
   2609 
   2610 	/*
   2611 	 * make sure block offset does not overflow 2^64 bytes and it's a
   2612 	 * multiple of the block size.
   2613 	 */
   2614 	assert(offset <= (1LL << (64 - DEV_BSHIFT)));
   2615 	/* LINTED */
   2616 	assert((sizeof (*wm) % DEV_BSIZE) == 0);
   2617 
   2618 	(void) memset(wm, 0, sizeof (*wm));
   2619 
   2620 	(void) memset(&read_params, 0, sizeof (read_params));
   2621 	read_params.rdev = compnp->dev;
   2622 	read_params.wmp = (uintptr_t)wm;
   2623 	read_params.offset = offset;
   2624 	MD_SETDRIVERNAME(&read_params, MD_SP, sp->setno);
   2625 
   2626 	if (metaioctl(MD_IOC_SPREADWM, &read_params,
   2627 	    &read_params.mde, compnp->cname) != 0) {
   2628 
   2629 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   2630 		    "Extent header read failed, block %llu.\n"), offset);
   2631 		return (mdstealerror(ep, &read_params.mde));
   2632 	}
   2633 
   2634 	/* make sure magic number is correct */
   2635 	if (wm->wm_magic != MD_SP_MAGIC) {
   2636 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   2637 		    "found incorrect magic number %x, expected %x.\n"),
   2638 		    wm->wm_magic, MD_SP_MAGIC);
   2639 		/*
   2640 		 * Pass NULL for the device name as we don't have
   2641 		 * valid watermark contents.
   2642 		 */
   2643 		return (mdmderror(ep, MDE_SP_BADWMMAGIC, 0, NULL));
   2644 	}
   2645 
   2646 	if (crcchk((uchar_t *)wm, (uint_t *)&wm->wm_checksum,
   2647 	    sizeof (*wm), NULL)) {
   2648 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   2649 		    "found incorrect checksum %x.\n"),
   2650 		    wm->wm_checksum);
   2651 		return (mdmderror(ep, MDE_SP_BADWMCRC, 0, wm->wm_mdname));
   2652 	}
   2653 
   2654 	return (0);
   2655 }
   2656 
   2657 /*
   2658  * **************************************************************************
   2659  *                  Query Functions
   2660  * **************************************************************************
   2661  */
   2662 
   2663 /*
   2664  * IMPORTANT NOTE: This is a static function that assumes that
   2665  *		   its input parameters have been checked and
   2666  *		   have valid values that lie within acceptable
   2667  *		   ranges.
   2668  *
   2669  * FUNCTION:	meta_sp_enough_space()
   2670  * INPUT:	desired_number_of_sps - the number of soft partitions desired;
   2671  *					must be > 0
   2672  *		desired_sp_size - the desired soft partition size in blocks;
   2673  *				  must be > 0
   2674  *		extent_listpp - a reference to a reference to an extent
   2675  *				list that lists the extents on a device;
   2676  *				must be a reference to a reference to a
   2677  *				valid extent list
   2678  *		alignment - the desired data space alignment for the sp's
   2679  * OUTPUT:	boolean_t return value
   2680  * RETURNS:	boolean_t - B_TRUE if there's enough space in the extent
   2681  *			    list to create the desired soft partitions,
   2682  *			    B_FALSE if there's not enough space
   2683  * PURPOSE:	determines whether there's enough free space in an extent
   2684  *		list to allow creation of a set of soft partitions
   2685  */
   2686 static boolean_t
   2687 meta_sp_enough_space(
   2688 	int		desired_number_of_sps,
   2689 	blkcnt_t	desired_sp_size,
   2690 	sp_ext_node_t	**extent_listpp,
   2691 	sp_ext_length_t	alignment
   2692 )
   2693 {
   2694 	boolean_t		enough_space;
   2695 	int			number_of_sps;
   2696 	int			number_of_extents_used;
   2697 	sp_ext_length_t		desired_ext_length = desired_sp_size;
   2698 
   2699 	enough_space = B_TRUE;
   2700 	number_of_sps = 0;
   2701 	while ((enough_space == B_TRUE) &&
   2702 	    (number_of_sps < desired_number_of_sps)) {
   2703 		/*
   2704 		 * Use the extent allocation algorithm implemented by
   2705 		 * meta_sp_alloc_by_len() to test whether the free
   2706 		 * extents in the extent list referenced by *extent_listpp
   2707 		 * contain enough space to accomodate a soft partition
   2708 		 * of size desired_ext_length.
   2709 		 *
   2710 		 * Repeat the test <desired_number_of_sps> times
   2711 		 * or until it fails, whichever comes first,
   2712 		 * each time allocating the extents required to
   2713 		 * create the soft partition without actually
   2714 		 * creating the soft partition.
   2715 		 */
   2716 		number_of_extents_used = meta_sp_alloc_by_len(
   2717 		    TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
   2718 		    extent_listpp, &desired_ext_length,
   2719 		    NO_OFFSET, alignment);
   2720 		if (number_of_extents_used == -1) {
   2721 			enough_space = B_FALSE;
   2722 		} else {
   2723 			number_of_sps++;
   2724 		}
   2725 	}
   2726 	return (enough_space);
   2727 }
   2728 
   2729 /*
   2730  * IMPORTANT NOTE: This is a static function that calls other functions
   2731  *		   that check its mdsetnamep and device_mdnamep
   2732  *		   input parameters, but expects extent_listpp to
   2733  *		   be a initialized to a valid address to which
   2734  *		   it can write a reference to the extent list that
   2735  *		   it creates.
   2736  *
   2737  * FUNCTION:	meta_sp_get_extent_list()
   2738  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   2739  *			     for the set containing the device for
   2740  *			     which the extents are to be listed
   2741  *		device_mdnamep - a reference to the mdname_t structure
   2742  *				 for the device for which the extents
   2743  *				 are to be listed
   2744  * OUTPUT:	*extent_listpp - a reference to the extent list for
   2745  *				 the device; NULL if the function fails
   2746  *		*ep - the libmeta error encountered, if any
   2747  * RETURNS:	boolean_t - B_TRUE if the function call was successful,
   2748  *			    B_FALSE if not
   2749  * PURPOSE:	gets the extent list for a device
   2750  */
   2751 static boolean_t
   2752 meta_sp_get_extent_list(
   2753 	mdsetname_t	*mdsetnamep,
   2754 	mdname_t	*device_mdnamep,
   2755 	sp_ext_node_t	**extent_listpp,
   2756 	md_error_t	*ep
   2757 )
   2758 {
   2759 	diskaddr_t		device_size_in_blocks;
   2760 	mdnamelist_t		*sp_name_listp;
   2761 	diskaddr_t		start_block_address_in_blocks;
   2762 
   2763 	*extent_listpp = NULL;
   2764 	sp_name_listp = NULL;
   2765 
   2766 	start_block_address_in_blocks = meta_sp_get_start(mdsetnamep,
   2767 	    device_mdnamep, ep);
   2768 	if (start_block_address_in_blocks == MD_DISKADDR_ERROR) {
   2769 		if (getenv(META_SP_DEBUG)) {
   2770 			mde_perror(ep,
   2771 			    "meta_sp_get_extent_list:meta_sp_get_start");
   2772 		}
   2773 		return (B_FALSE);
   2774 	}
   2775 
   2776 	device_size_in_blocks = metagetsize(device_mdnamep, ep);
   2777 	if (device_size_in_blocks == MD_DISKADDR_ERROR) {
   2778 		if (getenv(META_SP_DEBUG)) {
   2779 			mde_perror(ep,
   2780 			    "meta_sp_get_extent_list:metagetsize");
   2781 		}
   2782 		return (B_FALSE);
   2783 	}
   2784 
   2785 	/*
   2786 	 * Sanity check: the start block will have skipped an integer
   2787 	 * number of cylinders, C.  C will usually be zero.  If (C > 0),
   2788 	 * and the disk slice happens to only be C cylinders in total
   2789 	 * size, we'll fail this check.
   2790 	 */
   2791 	if (device_size_in_blocks <=
   2792 	    (start_block_address_in_blocks + MD_SP_WMSIZE)) {
   2793 		(void) mdmderror(ep, MDE_SP_NOSPACE, 0, device_mdnamep->cname);
   2794 		return (B_FALSE);
   2795 	}
   2796 
   2797 	/*
   2798 	 * After this point, we will have allocated resources, so any
   2799 	 * failure returns must be through the supplied "fail" label
   2800 	 * to properly deallocate things.
   2801 	 */
   2802 
   2803 	/*
   2804 	 * Create an empty extent list that starts one watermark past
   2805 	 * the start block of the device and ends one watermark before
   2806 	 * the end of the device.
   2807 	 */
   2808 	meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
   2809 	    extent_listpp, NO_OFFSET,
   2810 	    (sp_ext_length_t)start_block_address_in_blocks,
   2811 	    EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS,
   2812 	    meta_sp_cmp_by_offset);
   2813 	meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
   2814 	    extent_listpp, (sp_ext_offset_t)(device_size_in_blocks -
   2815 	    MD_SP_WMSIZE), MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER,
   2816 	    NO_FLAGS, meta_sp_cmp_by_offset);
   2817 
   2818 	/*
   2819 	 * Get the list of soft partitions that are already on the
   2820 	 * device.
   2821 	 */
   2822 	if (meta_sp_get_by_component(mdsetnamep, device_mdnamep,
   2823 	    &sp_name_listp, FORCE_RELOAD_CACHE, ep) < 1) {
   2824 		if (getenv(META_SP_DEBUG)) {
   2825 			mde_perror(ep,
   2826 			    "meta_sp_get_extent_list:meta_sp_get_by_component");
   2827 		}
   2828 		goto fail;
   2829 	}
   2830 
   2831 	if (sp_name_listp != NULL) {
   2832 		/*
   2833 		 * If there are soft partitions on the device, add the
   2834 		 * extents used in them to the extent list.
   2835 		 */
   2836 		if (meta_sp_extlist_from_namelist(mdsetnamep, sp_name_listp,
   2837 		    extent_listpp, ep) == -1) {
   2838 			if (getenv(META_SP_DEBUG)) {
   2839 				mde_perror(ep, "meta_sp_get_extent_list:"
   2840 				    "meta_sp_extlist_from_namelist");
   2841 			}
   2842 			goto fail;
   2843 		}
   2844 		metafreenamelist(sp_name_listp);
   2845 	}
   2846 
   2847 	/*
   2848 	 * Add free extents to the extent list to represent
   2849 	 * the remaining regions of free space on the
   2850 	 * device.
   2851 	 */
   2852 	meta_sp_list_freefill(extent_listpp, device_size_in_blocks);
   2853 	return (B_TRUE);
   2854 
   2855 fail:
   2856 	if (sp_name_listp != NULL) {
   2857 		metafreenamelist(sp_name_listp);
   2858 	}
   2859 
   2860 	if (*extent_listpp != NULL) {
   2861 		/*
   2862 		 * meta_sp_list_free sets *extent_listpp to NULL.
   2863 		 */
   2864 		meta_sp_list_free(extent_listpp);
   2865 	}
   2866 	return (B_FALSE);
   2867 }
   2868 
   2869 /*
   2870  * IMPORTANT NOTE: This is a static function that calls other functions
   2871  *		   that check its mdsetnamep and mddrivenamep
   2872  *		   input parameters, but expects extent_listpp to
   2873  *		   be a initialized to a valid address to which
   2874  *		   it can write a reference to the extent list that
   2875  *		   it creates.
   2876  *
   2877  * FUNCTION:	meta_sp_get_extent_list_for_drive()
   2878  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   2879  *			     for the set containing the drive for
   2880  *			     which the extents are to be listed
   2881  *		mddrivenamep   - a reference to the mddrivename_t structure
   2882  *				 for the drive for which the extents
   2883  *				 are to be listed
   2884  * OUTPUT:	*extent_listpp - a reference to the extent list for
   2885  *				 the drive; NULL if the function fails
   2886  * RETURNS:	boolean_t - B_TRUE if the function call was successful,
   2887  *			    B_FALSE if not
   2888  * PURPOSE:	gets the extent list for a drive when the entire drive
   2889  *		is to be soft partitioned
   2890  */
   2891 static boolean_t
   2892 meta_sp_get_extent_list_for_drive(
   2893 	mdsetname_t	*mdsetnamep,
   2894 	mddrivename_t	*mddrivenamep,
   2895 	sp_ext_node_t	**extent_listpp
   2896 )
   2897 {
   2898 	boolean_t		can_use;
   2899 	diskaddr_t		free_space;
   2900 	md_error_t		mderror;
   2901 	mdvtoc_t		proposed_vtoc;
   2902 	int			repartition_options;
   2903 	int			return_value;
   2904 	md_sp_t			test_sp_struct;
   2905 
   2906 	can_use = B_TRUE;
   2907 	*extent_listpp = NULL;
   2908 	mderror = mdnullerror;
   2909 	test_sp_struct.compnamep = metaslicename(mddrivenamep, MD_SLICE0,
   2910 	    &mderror);
   2911 	if (test_sp_struct.compnamep == NULL) {
   2912 		can_use = B_FALSE;
   2913 	}
   2914 
   2915 	if (can_use == B_TRUE) {
   2916 		mderror = mdnullerror;
   2917 		repartition_options = 0;
   2918 		return_value = meta_check_sp(mdsetnamep, &test_sp_struct,
   2919 		    MDCMD_USE_WHOLE_DISK, &repartition_options, &mderror);
   2920 		if (return_value != 0) {
   2921 			can_use = B_FALSE;
   2922 		}
   2923 	}
   2924 
   2925 	if (can_use == B_TRUE) {
   2926 		mderror = mdnullerror;
   2927 		repartition_options = repartition_options |
   2928 		    (MD_REPART_FORCE | MD_REPART_DONT_LABEL);
   2929 		return_value = meta_repartition_drive(mdsetnamep, mddrivenamep,
   2930 		    repartition_options, &proposed_vtoc, &mderror);
   2931 		if (return_value != 0) {
   2932 			can_use = B_FALSE;
   2933 		}
   2934 	}
   2935 
   2936 	if (can_use == B_TRUE) {
   2937 		free_space = proposed_vtoc.parts[MD_SLICE0].size;
   2938 		if (free_space <= (MD_SP_START + MD_SP_WMSIZE)) {
   2939 			can_use = B_FALSE;
   2940 		}
   2941 	}
   2942 
   2943 	if (can_use == B_TRUE) {
   2944 		/*
   2945 		 * Create an extent list that starts with
   2946 		 * a reserved extent that ends at the start
   2947 		 * of the usable space on slice zero of the
   2948 		 * proposed VTOC, ends with an extent that
   2949 		 * reserves space for a watermark at the end
   2950 		 * of slice zero, and contains a single free
   2951 		 * extent that occupies the rest of the space
   2952 		 * on the slice.
   2953 		 *
   2954 		 * NOTE:
   2955 		 *
   2956 		 * Don't use metagetstart() or metagetsize() to
   2957 		 * find the usable space.  They query the mdname_t
   2958 		 * structure that represents an actual device to
   2959 		 * determine the amount of space on the device that
   2960 		 * contains metadata and the total amount of space
   2961 		 * on the device.  Since this function creates a
   2962 		 * proposed extent list that doesn't reflect the
   2963 		 * state of an actual device, there's no mdname_t
   2964 		 * structure to be queried.
   2965 		 *
   2966 		 * When a drive is reformatted to prepare for
   2967 		 * soft partitioning, all of slice seven is
   2968 		 * reserved for metadata, all of slice zero is
   2969 		 * available for soft partitioning, and all other
   2970 		 * slices on the drive are empty.  The proposed
   2971 		 * extent list for the drive therefore contains
   2972 		 * only three extents: a reserved extent that ends
   2973 		 * at the start of the usable space on slice zero,
   2974 		 * a single free extent that occupies all the usable
   2975 		 * space on slice zero, and an ending extent that
   2976 		 * reserves space for a watermark at the end of
   2977 		 * slice zero.
   2978 		 */
   2979 		meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
   2980 		    extent_listpp, NO_OFFSET, (sp_ext_length_t)(MD_SP_START),
   2981 		    EXTTYP_RESERVED, NO_SEQUENCE_NUMBER, NO_FLAGS,
   2982 		    meta_sp_cmp_by_offset);
   2983 		meta_sp_list_insert(TEST_SETNAMEP, TEST_SOFT_PARTITION_NAMEP,
   2984 		    extent_listpp, (sp_ext_offset_t)(free_space - MD_SP_WMSIZE),
   2985 		    MD_SP_WMSIZE, EXTTYP_END, NO_SEQUENCE_NUMBER, NO_FLAGS,
   2986 		    meta_sp_cmp_by_offset);
   2987 		meta_sp_list_freefill(extent_listpp, free_space);
   2988 	}
   2989 	return (can_use);
   2990 }
   2991 
   2992 /*
   2993  * FUNCTION:	meta_sp_can_create_sps()
   2994  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   2995  *			     for the set containing the device for
   2996  *			     which the extents are to be listed
   2997  *		mdnamep - a reference to the mdname_t of the device
   2998  *			  on which the soft parititions are to be created
   2999  *		number_of_sps - the desired number of soft partitions
   3000  *		sp_size - the desired soft partition size
   3001  * OUTPUT:	boolean_t return value
   3002  * RETURNS:	boolean_t - B_TRUE if the soft partitionns can be created,
   3003  *			    B_FALSE if not
   3004  * PURPOSE:	determines whether a set of soft partitions can be created
   3005  *		on a device
   3006  */
   3007 boolean_t
   3008 meta_sp_can_create_sps(
   3009 	mdsetname_t	*mdsetnamep,
   3010 	mdname_t	*mdnamep,
   3011 	int		number_of_sps,
   3012 	blkcnt_t	sp_size
   3013 )
   3014 {
   3015 	sp_ext_node_t	*extent_listp;
   3016 	boolean_t	succeeded;
   3017 	md_error_t	mde;
   3018 
   3019 	if ((number_of_sps > 0) && (sp_size > 0)) {
   3020 		succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
   3021 		    &extent_listp, &mde);
   3022 	} else {
   3023 		succeeded = B_FALSE;
   3024 	}
   3025 
   3026 	/*
   3027 	 * We don't really care about an error return from the
   3028 	 * alignment call; that will just result in passing zero,
   3029 	 * which will be interpreted as no alignment.
   3030 	 */
   3031 
   3032 	if (succeeded == B_TRUE) {
   3033 		succeeded = meta_sp_enough_space(number_of_sps,
   3034 		    sp_size, &extent_listp,
   3035 		    meta_sp_get_default_alignment(mdsetnamep, mdnamep, &mde));
   3036 		meta_sp_list_free(&extent_listp);
   3037 	}
   3038 	return (succeeded);
   3039 }
   3040 
   3041 /*
   3042  * FUNCTION:	meta_sp_can_create_sps_on_drive()
   3043  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3044  *			     for the set containing the drive for
   3045  *			     which the extents are to be listed
   3046  *		mddrivenamep - a reference to the mddrivename_t of the drive
   3047  *			       on which the soft parititions are to be created
   3048  *		number_of_sps - the desired number of soft partitions
   3049  *		sp_size - the desired soft partition size
   3050  * OUTPUT:	boolean_t return value
   3051  * RETURNS:	boolean_t - B_TRUE if the soft partitionns can be created,
   3052  *			    B_FALSE if not
   3053  * PURPOSE:	determines whether a set of soft partitions can be created
   3054  *		on a drive if the entire drive is soft partitioned
   3055  */
   3056 boolean_t
   3057 meta_sp_can_create_sps_on_drive(
   3058 	mdsetname_t	*mdsetnamep,
   3059 	mddrivename_t	*mddrivenamep,
   3060 	int		number_of_sps,
   3061 	blkcnt_t	sp_size
   3062 )
   3063 {
   3064 	sp_ext_node_t	*extent_listp;
   3065 	boolean_t	succeeded;
   3066 
   3067 	if ((number_of_sps > 0) && (sp_size > 0)) {
   3068 		succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
   3069 		    mddrivenamep, &extent_listp);
   3070 	} else {
   3071 		succeeded = B_FALSE;
   3072 	}
   3073 
   3074 	/*
   3075 	 * We don't care about alignment on the space call because
   3076 	 * we're specifically dealing with a drive, which will have no
   3077 	 * inherent alignment.
   3078 	 */
   3079 
   3080 	if (succeeded == B_TRUE) {
   3081 		succeeded = meta_sp_enough_space(number_of_sps, sp_size,
   3082 		    &extent_listp, SP_UNALIGNED);
   3083 		meta_sp_list_free(&extent_listp);
   3084 	}
   3085 	return (succeeded);
   3086 }
   3087 
   3088 /*
   3089  * FUNCTION:	meta_sp_get_free_space()
   3090  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3091  *			     for the set containing the device for
   3092  *			     which the free space is to be returned
   3093  *		mdnamep - a reference to the mdname_t of the device
   3094  *			  for which the free space is to be returned
   3095  * OUTPUT:	blkcnt_t return value
   3096  * RETURNS:	blkcnt_t - the number of blocks of free space on the device
   3097  * PURPOSE:	returns the number of blocks of free space on a device
   3098  */
   3099 blkcnt_t
   3100 meta_sp_get_free_space(
   3101 	mdsetname_t	*mdsetnamep,
   3102 	mdname_t	*mdnamep
   3103 )
   3104 {
   3105 	sp_ext_node_t		*extent_listp;
   3106 	sp_ext_length_t		free_blocks;
   3107 	boolean_t		succeeded;
   3108 	md_error_t		mde;
   3109 
   3110 	extent_listp = NULL;
   3111 	free_blocks = 0;
   3112 	succeeded = meta_sp_get_extent_list(mdsetnamep, mdnamep,
   3113 	    &extent_listp, &mde);
   3114 	if (succeeded == B_TRUE) {
   3115 		free_blocks = meta_sp_list_size(extent_listp,
   3116 		    EXTTYP_FREE, INCLUDE_WM);
   3117 		meta_sp_list_free(&extent_listp);
   3118 		if (free_blocks > (10 * MD_SP_WMSIZE)) {
   3119 			/*
   3120 			 * Subtract a safety margin for watermarks when
   3121 			 * computing the number of blocks available for
   3122 			 * use.  The actual number of watermarks can't
   3123 			 * be calculated without knowing the exact numbers
   3124 			 * and sizes of both the free extents and the soft
   3125 			 * partitions to be created.  The calculation is
   3126 			 * highly complex and error-prone even if those
   3127 			 * quantities are known.  The approximate value
   3128 			 * 10 * MD_SP_WMSIZE is within a few blocks of the
   3129 			 * correct value in all practical cases.
   3130 			 */
   3131 			free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
   3132 		} else {
   3133 			free_blocks = 0;
   3134 		}
   3135 	} else {
   3136 		mdclrerror(&mde);
   3137 	}
   3138 
   3139 	return (free_blocks);
   3140 }
   3141 
   3142 /*
   3143  * FUNCTION:	meta_sp_get_free_space_on_drive()
   3144  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3145  *			     for the set containing the drive for
   3146  *			     which the free space is to be returned
   3147  *		mddrivenamep - a reference to the mddrivename_t of the drive
   3148  *			       for which the free space is to be returned
   3149  * OUTPUT:	blkcnt_t return value
   3150  * RETURNS:	blkcnt_t - the number of blocks of free space on the drive
   3151  * PURPOSE:	returns the number of blocks of space usable for soft
   3152  *		partitions on an entire drive, if the entire drive is
   3153  *		soft partitioned
   3154  */
   3155 blkcnt_t
   3156 meta_sp_get_free_space_on_drive(
   3157 	mdsetname_t	*mdsetnamep,
   3158 	mddrivename_t	*mddrivenamep
   3159 )
   3160 {
   3161 	sp_ext_node_t		*extent_listp;
   3162 	sp_ext_length_t		free_blocks;
   3163 	boolean_t		succeeded;
   3164 
   3165 	extent_listp = NULL;
   3166 	free_blocks = 0;
   3167 	succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
   3168 	    mddrivenamep, &extent_listp);
   3169 	if (succeeded == B_TRUE) {
   3170 		free_blocks = meta_sp_list_size(extent_listp,
   3171 		    EXTTYP_FREE, INCLUDE_WM);
   3172 		meta_sp_list_free(&extent_listp);
   3173 		if (free_blocks > (10 * MD_SP_WMSIZE)) {
   3174 			/*
   3175 			 * Subtract a safety margin for watermarks when
   3176 			 * computing the number of blocks available for
   3177 			 * use.  The actual number of watermarks can't
   3178 			 * be calculated without knowing the exact numbers
   3179 			 * and sizes of both the free extents and the soft
   3180 			 * partitions to be created.  The calculation is
   3181 			 * highly complex and error-prone even if those
   3182 			 * quantities are known.  The approximate value
   3183 			 * 10 * MD_SP_WMSIZE is within a few blocks of the
   3184 			 * correct value in all practical cases.
   3185 			 */
   3186 			free_blocks = free_blocks - (10 * MD_SP_WMSIZE);
   3187 		} else {
   3188 			free_blocks = 0;
   3189 		}
   3190 	}
   3191 	return (free_blocks);
   3192 }
   3193 
   3194 /*
   3195  * FUNCTION:	meta_sp_get_number_of_possible_sps()
   3196  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3197  *			     for the set containing the device for
   3198  *			     which the number of possible soft partitions
   3199  *			     is to be returned
   3200  *		mdnamep - a reference to the mdname_t of the device
   3201  *			  for which the number of possible soft partitions
   3202  *			  is to be returned
   3203  * OUTPUT:	int return value
   3204  * RETURNS:	int - the number of soft partitions of the desired size
   3205  *		      that can be created on the device
   3206  * PURPOSE:	returns the number of soft partitions of a given size
   3207  *		that can be created on a device
   3208  */
   3209 int
   3210 meta_sp_get_number_of_possible_sps(
   3211 	mdsetname_t	*mdsetnamep,
   3212 	mdname_t	*mdnamep,
   3213 	blkcnt_t	sp_size
   3214 )
   3215 {
   3216 	sp_ext_node_t	*extent_listp;
   3217 	int		number_of_possible_sps;
   3218 	boolean_t	succeeded;
   3219 	md_error_t	mde;
   3220 	sp_ext_length_t	alignment;
   3221 
   3222 	extent_listp = NULL;
   3223 	number_of_possible_sps = 0;
   3224 	if (sp_size > 0) {
   3225 		if ((succeeded = meta_sp_get_extent_list(mdsetnamep,
   3226 		    mdnamep, &extent_listp, &mde)) == B_FALSE)
   3227 			mdclrerror(&mde);
   3228 	} else {
   3229 		succeeded = B_FALSE;
   3230 	}
   3231 
   3232 	if (succeeded == B_TRUE) {
   3233 		alignment = meta_sp_get_default_alignment(mdsetnamep,
   3234 		    mdnamep, &mde);
   3235 	}
   3236 
   3237 	while (succeeded == B_TRUE) {
   3238 		/*
   3239 		 * Keep allocating space from the extent list
   3240 		 * for soft partitions of the desired size until
   3241 		 * there's not enough free space left in the list
   3242 		 * for another soft partiition of that size.
   3243 		 * Add one to the number of possible soft partitions
   3244 		 * for each soft partition for which there is
   3245 		 * enough free space left.
   3246 		 */
   3247 		succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
   3248 		    sp_size, &extent_listp, alignment);
   3249 		if (succeeded == B_TRUE) {
   3250 			number_of_possible_sps++;
   3251 		}
   3252 	}
   3253 	if (extent_listp != NULL) {
   3254 		meta_sp_list_free(&extent_listp);
   3255 	}
   3256 	return (number_of_possible_sps);
   3257 }
   3258 
   3259 /*
   3260  * FUNCTION:	meta_sp_get_number_of_possible_sps_on_drive()
   3261  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3262  *			     for the set containing the drive for
   3263  *			     which the number of possible soft partitions
   3264  *			     is to be returned
   3265  *		mddrivenamep - a reference to the mddrivename_t of the drive
   3266  *			       for which the number of possible soft partitions
   3267  *			       is to be returned
   3268  *		sp_size - the size in blocks of the proposed soft partitions
   3269  * OUTPUT:	int return value
   3270  * RETURNS:	int - the number of soft partitions of the desired size
   3271  *		      that can be created on the drive
   3272  * PURPOSE:	returns the number of soft partitions of a given size
   3273  *		that can be created on a drive, if the entire drive is
   3274  *		soft partitioned
   3275  */
   3276 int
   3277 meta_sp_get_number_of_possible_sps_on_drive(
   3278 	mdsetname_t	*mdsetnamep,
   3279 	mddrivename_t	*mddrivenamep,
   3280 	blkcnt_t	sp_size
   3281 )
   3282 {
   3283 	sp_ext_node_t	*extent_listp;
   3284 	int		number_of_possible_sps;
   3285 	boolean_t	succeeded;
   3286 
   3287 	extent_listp = NULL;
   3288 	number_of_possible_sps = 0;
   3289 	if (sp_size > 0) {
   3290 		succeeded = meta_sp_get_extent_list_for_drive(mdsetnamep,
   3291 		    mddrivenamep, &extent_listp);
   3292 	} else {
   3293 		succeeded = B_FALSE;
   3294 	}
   3295 	while (succeeded == B_TRUE) {
   3296 		/*
   3297 		 * Keep allocating space from the extent list
   3298 		 * for soft partitions of the desired size until
   3299 		 * there's not enough free space left in the list
   3300 		 * for another soft partition of that size.
   3301 		 * Add one to the number of possible soft partitions
   3302 		 * for each soft partition for which there is
   3303 		 * enough free space left.
   3304 		 *
   3305 		 * Since it's a drive, not a metadevice, make no
   3306 		 * assumptions about alignment.
   3307 		 */
   3308 		succeeded = meta_sp_enough_space(ONE_SOFT_PARTITION,
   3309 		    sp_size, &extent_listp, SP_UNALIGNED);
   3310 		if (succeeded == B_TRUE) {
   3311 			number_of_possible_sps++;
   3312 		}
   3313 	}
   3314 	if (extent_listp != NULL) {
   3315 		meta_sp_list_free(&extent_listp);
   3316 	}
   3317 	return (number_of_possible_sps);
   3318 }
   3319 
   3320 /*
   3321  * FUNCTION:	meta_sp_get_possible_sp_size()
   3322  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3323  *			     for the set containing the device for
   3324  *			     which the possible soft partition size
   3325  *			     is to be returned
   3326  *		mdnamep - a reference to the mdname_t of the device
   3327  *			  for which the possible soft partition size
   3328  *			  is to be returned
   3329  *		number_of_sps - the desired number of soft partitions
   3330  * OUTPUT:	blkcnt_t return value
   3331  * RETURNS:	blkcnt_t - the possible soft partition size in blocks
   3332  * PURPOSE:	returns the maximum possible size of each of a given number of
   3333  *		soft partitions of equal size that can be created on a device
   3334  */
   3335 blkcnt_t
   3336 meta_sp_get_possible_sp_size(
   3337 	mdsetname_t	*mdsetnamep,
   3338 	mdname_t	*mdnamep,
   3339 	int		number_of_sps
   3340 )
   3341 {
   3342 	blkcnt_t	free_blocks;
   3343 	blkcnt_t	sp_size;
   3344 	boolean_t	succeeded;
   3345 
   3346 	sp_size = 0;
   3347 	if (number_of_sps > 0) {
   3348 		free_blocks = meta_sp_get_free_space(mdsetnamep, mdnamep);
   3349 		sp_size = free_blocks / number_of_sps;
   3350 		succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
   3351 		    number_of_sps, sp_size);
   3352 		while ((succeeded == B_FALSE) && (sp_size > 0)) {
   3353 			/*
   3354 			 * To compensate for space that may have been
   3355 			 * occupied by watermarks, reduce sp_size by a
   3356 			 * number of blocks equal to the number of soft
   3357 			 * partitions desired, and test again to see
   3358 			 * whether the desired number of soft partitions
   3359 			 * can be created.
   3360 			 */
   3361 			sp_size = sp_size - ((blkcnt_t)number_of_sps);
   3362 			succeeded = meta_sp_can_create_sps(mdsetnamep, mdnamep,
   3363 			    number_of_sps, sp_size);
   3364 		}
   3365 		if (sp_size < 0) {
   3366 			sp_size = 0;
   3367 		}
   3368 	}
   3369 	return (sp_size);
   3370 }
   3371 
   3372 /*
   3373  * FUNCTION:	meta_sp_get_possible_sp_size_on_drive()
   3374  * INPUT:	mdsetnamep - a reference to the mdsetname_t structure
   3375  *			     for the set containing the drive for
   3376  *			     which the possible soft partition size
   3377  *			     is to be returned
   3378  *		mddrivenamep - a reference to the mddrivename_t of the drive
   3379  *			       for which the possible soft partition size
   3380  *			       is to be returned
   3381  *		number_of_sps - the desired number of soft partitions
   3382  * OUTPUT:	blkcnt_t return value
   3383  * RETURNS:	blkcnt_t - the possible soft partition size in blocks
   3384  * PURPOSE:	returns the maximum possible size of each of a given number of
   3385  *		soft partitions of equal size that can be created on a drive
   3386  *              if the entire drive is soft partitioned
   3387  */
   3388 blkcnt_t
   3389 meta_sp_get_possible_sp_size_on_drive(
   3390 	mdsetname_t	*mdsetnamep,
   3391 	mddrivename_t	*mddrivenamep,
   3392 	int		number_of_sps
   3393 )
   3394 {
   3395 	blkcnt_t	free_blocks;
   3396 	blkcnt_t	sp_size;
   3397 	boolean_t	succeeded;
   3398 
   3399 	sp_size = 0;
   3400 	if (number_of_sps > 0) {
   3401 		free_blocks = meta_sp_get_free_space_on_drive(mdsetnamep,
   3402 		    mddrivenamep);
   3403 		sp_size = free_blocks / number_of_sps;
   3404 		succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
   3405 		    mddrivenamep, number_of_sps, sp_size);
   3406 		while ((succeeded == B_FALSE) && (sp_size > 0)) {
   3407 			/*
   3408 			 * To compensate for space that may have been
   3409 			 * occupied by watermarks, reduce sp_size by a
   3410 			 * number of blocks equal to the number of soft
   3411 			 * partitions desired, and test again to see
   3412 			 * whether the desired number of soft partitions
   3413 			 * can be created.
   3414 			 */
   3415 			sp_size = sp_size - ((blkcnt_t)number_of_sps);
   3416 			succeeded = meta_sp_can_create_sps_on_drive(mdsetnamep,
   3417 			    mddrivenamep, number_of_sps, sp_size);
   3418 		}
   3419 		if (sp_size < 0) {
   3420 			sp_size = 0;
   3421 		}
   3422 	}
   3423 	return (sp_size);
   3424 }
   3425 
   3426 /*
   3427  * **************************************************************************
   3428  *                  Unit Structure Manipulation Functions                   *
   3429  * **************************************************************************
   3430  */
   3431 
   3432 /*
   3433  * FUNCTION:	meta_sp_fillextarray()
   3434  * INPUT:	mp	- the unit structure to fill
   3435  *		extlist	- the list of extents to fill with
   3436  * OUTPUT:	none
   3437  * RETURNS:	void
   3438  * PURPOSE:	fills in the unit structure extent list with the extents
   3439  *		specified by extlist.  Only extents in extlist with the
   3440  *		EXTFLG_UPDATE flag are changed in the unit structure,
   3441  *		and the index into the unit structure is the sequence
   3442  *		number in the extent list.  After all of the nodes have
   3443  *		been updated the virtual offsets in the unit structure
   3444  *		are updated to reflect the new lengths.
   3445  */
   3446 static void
   3447 meta_sp_fillextarray(
   3448 	mp_unit_t	*mp,
   3449 	sp_ext_node_t	*extlist
   3450 )
   3451 {
   3452 	int	i;
   3453 	sp_ext_node_t	*ext;
   3454 	sp_ext_offset_t	curvoff = 0LL;
   3455 
   3456 	assert(mp != NULL);
   3457 
   3458 	/* go through the allocation list and fill in our unit structure */
   3459 	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
   3460 		if ((ext->ext_type == EXTTYP_ALLOC) &&
   3461 		    (ext->ext_flags & EXTFLG_UPDATE) != 0) {
   3462 			mp->un_ext[ext->ext_seq].un_poff =
   3463 			    ext->ext_offset + MD_SP_WMSIZE;
   3464 			mp->un_ext[ext->ext_seq].un_len =
   3465 			    ext->ext_length - MD_SP_WMSIZE;
   3466 		}
   3467 	}
   3468 
   3469 	for (i = 0; i < mp->un_numexts; i++) {
   3470 		assert(mp->un_ext[i].un_poff != 0);
   3471 		assert(mp->un_ext[i].un_len  != 0);
   3472 		mp->un_ext[i].un_voff = curvoff;
   3473 		curvoff += mp->un_ext[i].un_len;
   3474 	}
   3475 }
   3476 
   3477 /*
   3478  * FUNCTION:	meta_sp_createunit()
   3479  * INPUT:	np	- the name of the device to create a unit structure for
   3480  *		compnp	- the name of the device the soft partition is on
   3481  *		extlist	- the extent list to populate the new unit with
   3482  *		numexts	- the number of extents in the extent list
   3483  *		len	- the total size of the soft partition (sectors)
   3484  *		status	- the initial status of the unit structure
   3485  * OUTPUT:	ep	- return error pointer
   3486  * RETURNS:	mp_unit_t * - the new unit structure.
   3487  * PURPOSE:	allocates and fills in a new soft partition unit
   3488  *		structure to be passed to the soft partitioning driver
   3489  *		for creation.
   3490  */
   3491 static mp_unit_t *
   3492 meta_sp_createunit(
   3493 	mdname_t	*np,
   3494 	mdname_t	*compnp,
   3495 	sp_ext_node_t	*extlist,
   3496 	int		numexts,
   3497 	sp_ext_length_t	len,
   3498 	sp_status_t	status,
   3499 	md_error_t	*ep
   3500 )
   3501 {
   3502 	mp_unit_t	*mp;
   3503 	uint_t		ms_size;
   3504 
   3505 	ms_size = (sizeof (*mp) - sizeof (mp->un_ext[0])) +
   3506 	    (numexts * sizeof (mp->un_ext[0]));
   3507 
   3508 	mp = Zalloc(ms_size);
   3509 
   3510 	/* fill in fields in common unit structure */
   3511 	mp->c.un_type = MD_METASP;
   3512 	mp->c.un_size = ms_size;
   3513 	MD_SID(mp) = meta_getminor(np->dev);
   3514 	mp->c.un_total_blocks = len;
   3515 	mp->c.un_actual_tb = len;
   3516 
   3517 	/* set up geometry */
   3518 	(void) meta_sp_setgeom(np, compnp, mp, ep);
   3519 
   3520 	/* if we're building on metadevice we can't parent */
   3521 	if (metaismeta(compnp))
   3522 		MD_CAPAB(mp) = MD_CANT_PARENT;
   3523 	else
   3524 		MD_CAPAB(mp) = MD_CAN_PARENT;
   3525 
   3526 	/* fill soft partition-specific fields */
   3527 	mp->un_dev = compnp->dev;
   3528 	mp->un_key = compnp->key;
   3529 
   3530 	/* mdname_t start_blk field is not 64-bit! */
   3531 	mp->un_start_blk = (sp_ext_offset_t)compnp->start_blk;
   3532 	mp->un_status = status;
   3533 	mp->un_numexts = numexts;
   3534 	mp->un_length = len;
   3535 
   3536 	/* fill in the extent array */
   3537 	meta_sp_fillextarray(mp, extlist);
   3538 
   3539 	return (mp);
   3540 }
   3541 
   3542 /*
   3543  * FUNCTION:	meta_sp_updateunit()
   3544  * INPUT:	np       - name structure for the metadevice being updated
   3545  *		old_un	 - the original unit structure that is being updated
   3546  *		extlist	 - the extent list to populate the new unit with
   3547  *		grow_len - the amount by which the partition is being grown
   3548  *		numexts	 - the number of extents in the extent list
   3549  *		ep       - return error pointer
   3550  * OUTPUT:	none
   3551  * RETURNS:	mp_unit_t * - the updated unit structure
   3552  * PURPOSE:	allocates and fills in a new soft partition unit structure to
   3553  *		be passed to the soft partitioning driver for creation.  The
   3554  *		old unit structure is first copied in, and then the updated
   3555  *		extents are changed in the new unit structure.  This is
   3556  *		typically used when the size of an existing unit is changed.
   3557  */
   3558 static mp_unit_t *
   3559 meta_sp_updateunit(
   3560 	mdname_t	*np,
   3561 	mp_unit_t	*old_un,
   3562 	sp_ext_node_t	*extlist,
   3563 	sp_ext_length_t	grow_len,
   3564 	int		numexts,
   3565 	md_error_t	*ep
   3566 )
   3567 {
   3568 	mp_unit_t	*new_un;
   3569 	sp_ext_length_t	new_len;
   3570 	uint_t		new_size;
   3571 
   3572 	assert(old_un != NULL);
   3573 	assert(extlist != NULL);
   3574 
   3575 	/* allocate new unit structure and copy in old unit */
   3576 	new_size = (sizeof (*old_un) - sizeof (old_un->un_ext[0])) +
   3577 	    ((old_un->un_numexts + numexts) * sizeof (old_un->un_ext[0]));
   3578 	new_len = old_un->un_length + grow_len;
   3579 	new_un = Zalloc(new_size);
   3580 	bcopy(old_un, new_un, old_un->c.un_size);
   3581 
   3582 	/* update size and geometry information */
   3583 	new_un->c.un_size = new_size;
   3584 	new_un->un_length = new_len;
   3585 	new_un->c.un_total_blocks = new_len;
   3586 	new_un->c.un_actual_tb = new_len;
   3587 	if (meta_adjust_geom((md_unit_t *)new_un, np,
   3588 	    old_un->c.un_wr_reinstruct, old_un->c.un_rd_reinstruct,
   3589 	    0, ep) != 0) {
   3590 		Free(new_un);
   3591 		return (NULL);
   3592 	}
   3593 
   3594 	/* update extent information */
   3595 	new_un->un_numexts += numexts;
   3596 
   3597 	meta_sp_fillextarray(new_un, extlist);
   3598 
   3599 	return (new_un);
   3600 }
   3601 
   3602 /*
   3603  * FUNCTION:	meta_get_sp()
   3604  * INPUT:	sp	- the set name for the device to get
   3605  *		np	- the name of the device to get
   3606  * OUTPUT:	ep	- return error pointer
   3607  * RETURNS:	md_sp_t * - the XDR unit structure for the soft partition
   3608  * PURPOSE:	interface to the rest of libmeta for fetching a unit structure
   3609  *		for the named device.  Just a wrapper for meta_get_sp_common().
   3610  */
   3611 md_sp_t *
   3612 meta_get_sp(
   3613 	mdsetname_t	*sp,
   3614 	mdname_t	*np,
   3615 	md_error_t	*ep
   3616 )
   3617 {
   3618 	return (meta_get_sp_common(sp, np, 0, ep));
   3619 }
   3620 
   3621 /*
   3622  * FUNCTION:	meta_get_sp_common()
   3623  * INPUT:	sp	- the set name for the device to get
   3624  *		np	- the name of the device to get
   3625  *		fast	- whether to use the cache or not (NOT IMPLEMENTED!)
   3626  * OUTPUT:	ep	- return error pointer
   3627  * RETURNS:	md_sp_t * - the XDR unit structure for the soft partition,
   3628  *			    NULL if np is not a soft partition
   3629  * PURPOSE:	common routine for fetching a soft partition unit structure
   3630  */
   3631 md_sp_t *
   3632 meta_get_sp_common(
   3633 	mdsetname_t	*sp,
   3634 	mdname_t	*np,
   3635 	int		fast,
   3636 	md_error_t	*ep
   3637 )
   3638 {
   3639 	mddrivename_t	*dnp = np->drivenamep;
   3640 	char		*miscname;
   3641 	mp_unit_t	*mp;
   3642 	md_sp_t		*msp;
   3643 	int		i;
   3644 
   3645 	/* must have set */
   3646 	assert(sp != NULL);
   3647 
   3648 	/* short circuit */
   3649 	if (dnp->unitp != NULL) {
   3650 		if (dnp->unitp->type != MD_METASP)
   3651 			return (NULL);
   3652 		return ((md_sp_t *)dnp->unitp);
   3653 	}
   3654 	/* get miscname and unit */
   3655 	if ((miscname = metagetmiscname(np, ep)) == NULL)
   3656 		return (NULL);
   3657 
   3658 	if (strcmp(miscname, MD_SP) != 0) {
   3659 		(void) mdmderror(ep, MDE_NOT_SP, 0, np->cname);
   3660 		return (NULL);
   3661 	}
   3662 
   3663 	if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
   3664 		return (NULL);
   3665 
   3666 	assert(mp->c.un_type == MD_METASP);
   3667 
   3668 	/* allocate soft partition */
   3669 	msp = Zalloc(sizeof (*msp));
   3670 
   3671 	/* get the common information */
   3672 	msp->common.namep = np;
   3673 	msp->common.type = mp->c.un_type;
   3674 	msp->common.state = mp->c.un_status;
   3675 	msp->common.capabilities = mp->c.un_capabilities;
   3676 	msp->common.parent = mp->c.un_parent;
   3677 	msp->common.size = mp->c.un_total_blocks;
   3678 	msp->common.user_flags = mp->c.un_user_flags;
   3679 	msp->common.revision = mp->c.un_revision;
   3680 
   3681 	/* get soft partition information */
   3682 	if ((msp->compnamep = metakeyname(&sp, mp->un_key, fast, ep)) == NULL)
   3683 		goto out;
   3684 
   3685 	/*
   3686 	 * Fill in the key and the start block.  Note that the start
   3687 	 * block in the unit structure is 64 bits but the name pointer
   3688 	 * only supports 32 bits.
   3689 	 */
   3690 	msp->compnamep->key = mp->un_key;
   3691 	msp->compnamep->start_blk = mp->un_start_blk;
   3692 
   3693 	/* fill in status field */
   3694 	msp->status = mp->un_status;
   3695 
   3696 	/* allocate the extents */
   3697 	msp->ext.ext_val = Zalloc(mp->un_numexts * sizeof (*msp->ext.ext_val));
   3698 	msp->ext.ext_len = mp->un_numexts;
   3699 
   3700 	/* do the extents for this soft partition */
   3701 	for (i = 0; i < mp->un_numexts; i++) {
   3702 		struct mp_ext	*mde = &mp->un_ext[i];
   3703 		md_sp_ext_t	*extp = &msp->ext.ext_val[i];
   3704 
   3705 		extp->voff = mde->un_voff;
   3706 		extp->poff = mde->un_poff;
   3707 		extp->len = mde->un_len;
   3708 	}
   3709 
   3710 	/* cleanup, return success */
   3711 	Free(mp);
   3712 	dnp->unitp = (md_common_t *)msp;
   3713 	return (msp);
   3714 
   3715 out:
   3716 	/* clean up and return error */
   3717 	Free(mp);
   3718 	Free(msp);
   3719 	return (NULL);
   3720 }
   3721 
   3722 
   3723 /*
   3724  * FUNCTION:	meta_init_sp()
   3725  * INPUT:	spp	- the set name for the new device
   3726  *		argc	- the remaining argument count for the metainit cmdline
   3727  *		argv	- the remainder of the unparsed command line
   3728  *		options	- global options parsed by metainit
   3729  * OUTPUT:	ep	- return error pointer
   3730  * RETURNS:	int	- -1 failure, 0 success
   3731  * PURPOSE:	provides the command line parsing and name management overhead
   3732  *		for creating a new soft partition.  Ultimately this calls
   3733  *		meta_create_sp() which does the real work of allocating space
   3734  *		for the new soft partition.
   3735  */
   3736 int
   3737 meta_init_sp(
   3738 	mdsetname_t	**spp,
   3739 	int		argc,
   3740 	char		*argv[],
   3741 	mdcmdopts_t	options,
   3742 	md_error_t	*ep
   3743 )
   3744 {
   3745 	char		*compname = NULL;
   3746 	mdname_t	*spcompnp = NULL;	/* name of component volume */
   3747 	char		*devname = argv[0];	/* unit name */
   3748 	mdname_t	*np = NULL;		/* name of soft partition */
   3749 	md_sp_t		*msp = NULL;
   3750 	int		c;
   3751 	int		old_optind;
   3752 	sp_ext_length_t	len = 0LL;
   3753 	int		rval = -1;
   3754 	uint_t		seq;
   3755 	int		oflag;
   3756 	int		failed;
   3757 	mddrivename_t	*dnp = NULL;
   3758 	sp_ext_length_t	alignment = 0LL;
   3759 	sp_ext_node_t	*extlist = NULL;
   3760 
   3761 	assert(argc > 0);
   3762 
   3763 	/* expect sp name, -p, optional -e, compname, and size parameters */
   3764 	/* grab soft partition name */
   3765 	if ((np = metaname(spp, devname, META_DEVICE, ep)) == NULL)
   3766 		goto out;
   3767 
   3768 	/* see if it exists already */
   3769 	if (metagetmiscname(np, ep) != NULL) {
   3770 		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
   3771 		    meta_getminor(np->dev), devname);
   3772 		goto out;
   3773 	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
   3774 		goto out;
   3775 	} else {
   3776 		mdclrerror(ep);
   3777 	}
   3778 	--argc, ++argv;
   3779 
   3780 	if (argc == 0)
   3781 		goto syntax;
   3782 
   3783 	/* grab -p */
   3784 	if (strcmp(argv[0], "-p") != 0)
   3785 		goto syntax;
   3786 	--argc, ++argv;
   3787 
   3788 	if (argc == 0)
   3789 		goto syntax;
   3790 
   3791 	/* see if -e is there */
   3792 	if (strcmp(argv[0], "-e") == 0) {
   3793 		/* use the whole disk */
   3794 		options |= MDCMD_USE_WHOLE_DISK;
   3795 		--argc, ++argv;
   3796 	}
   3797 
   3798 	if (argc == 0)
   3799 		goto syntax;
   3800 
   3801 	/* get component name */
   3802 	compname = Strdup(argv[0]);
   3803 
   3804 	if (options & MDCMD_USE_WHOLE_DISK) {
   3805 		if ((dnp = metadrivename(spp, compname, ep)) == NULL) {
   3806 			goto out;
   3807 		}
   3808 		if ((spcompnp = metaslicename(dnp, 0, ep)) == NULL) {
   3809 			goto out;
   3810 		}
   3811 	} else if ((spcompnp = metaname(spp, compname, UNKNOWN, ep)) == NULL) {
   3812 		goto out;
   3813 	}
   3814 	assert(*spp != NULL);
   3815 
   3816 	if (!(options & MDCMD_NOLOCK)) {
   3817 		/* grab set lock */
   3818 		if (meta_lock(*spp, TRUE, ep))
   3819 			goto out;
   3820 
   3821 		if (meta_check_ownership(*spp, ep) != 0)
   3822 			goto out;
   3823 	}
   3824 
   3825 	/* allocate the soft partition */
   3826 	msp = Zalloc(sizeof (*msp));
   3827 
   3828 	/* setup common */
   3829 	msp->common.namep = np;
   3830 	msp->common.type = MD_METASP;
   3831 
   3832 	compname = spcompnp->cname;
   3833 
   3834 	assert(spcompnp->rname != NULL);
   3835 	--argc, ++argv;
   3836 
   3837 	if (argc == 0) {
   3838 		goto syntax;
   3839 	}
   3840 
   3841 	if (*argv[0] == '-') {
   3842 		/*
   3843 		 * parse any other command line options, this includes
   3844 		 * the recovery options -o and -b. The special thing
   3845 		 * with these options is that the len needs to be
   3846 		 * kept track of otherwise when the geometry of the
   3847 		 * "device" is built it will create an invalid geometry
   3848 		 */
   3849 		old_optind = optind = 0;
   3850 		opterr = 0;
   3851 		oflag = 0;
   3852 		seq = 0;
   3853 		failed = 0;
   3854 		while ((c = getopt(argc, argv, "A:o:b:")) != -1) {
   3855 			sp_ext_offset_t	offset;
   3856 			sp_ext_length_t	length;
   3857 			longlong_t	tmp_size;
   3858 
   3859 			switch (c) {
   3860 			case 'A':	/* data alignment */
   3861 				if (meta_sp_parsesizestring(optarg,
   3862 				    &alignment) == -1) {
   3863 					failed = 1;
   3864 				}
   3865 				break;
   3866 			case 'o':	/* offset in the partition */
   3867 				if (oflag == 1) {
   3868 					failed = 1;
   3869 				} else {
   3870 					tmp_size = atoll(optarg);
   3871 					if (tmp_size <= 0) {
   3872 						failed = 1;
   3873 					} else {
   3874 						oflag = 1;
   3875 						options |= MDCMD_DIRECT;
   3876 
   3877 						offset = tmp_size;
   3878 					}
   3879 				}
   3880 
   3881 				break;
   3882 			case 'b':	/* number of blocks */
   3883 				if (oflag == 0) {
   3884 					failed = 1;
   3885 				} else {
   3886 					tmp_size = atoll(optarg);
   3887 					if (tmp_size <= 0) {
   3888 						failed = 1;
   3889 					} else {
   3890 						oflag = 0;
   3891 
   3892 						length = tmp_size;
   3893 
   3894 						/* we have a pair of values */
   3895 						meta_sp_list_insert(*spp, np,
   3896 						    &extlist, offset, length,
   3897 						    EXTTYP_ALLOC, seq++,
   3898 						    EXTFLG_UPDATE,
   3899 						    meta_sp_cmp_by_offset);
   3900 						len += length;
   3901 					}
   3902 				}
   3903 
   3904 				break;
   3905 			default:
   3906 				argc -= old_optind;
   3907 				argv += old_optind;
   3908 				goto options;
   3909 			}
   3910 
   3911 			if (failed) {
   3912 				argc -= old_optind;
   3913 				argv += old_optind;
   3914 				goto syntax;
   3915 			}
   3916 
   3917 			old_optind = optind;
   3918 		}
   3919 		argc -= optind;
   3920 		argv += optind;
   3921 
   3922 		/*
   3923 		 * Must have matching pairs of -o and -b flags
   3924 		 */
   3925 		if (oflag != 0)
   3926 			goto syntax;
   3927 
   3928 		/*
   3929 		 * Can't specify both layout (indicated indirectly by
   3930 		 * len being set by thye -o/-b cases above) AND
   3931 		 * alignment
   3932 		 */
   3933 		if ((len > 0LL) && (alignment > 0LL))
   3934 			goto syntax;
   3935 
   3936 		/*
   3937 		 * sanity check the allocation list
   3938 		 */
   3939 		if ((extlist != NULL) && meta_sp_list_overlaps(extlist))
   3940 			goto syntax;
   3941 	}
   3942 
   3943 	if (len == 0LL) {
   3944 		if (argc == 0)
   3945 			goto syntax;
   3946 		if (meta_sp_parsesize(argv[0], &len) == -1)
   3947 			goto syntax;
   3948 		--argc, ++argv;
   3949 	}
   3950 
   3951 	msp->ext.ext_val = Zalloc(sizeof (*msp->ext.ext_val));
   3952 	msp->ext.ext_val->len = len;
   3953 	msp->compnamep = spcompnp;
   3954 
   3955 	/* we should be at the end */
   3956 	if (argc != 0)
   3957 		goto syntax;
   3958 
   3959 	/* create soft partition */
   3960 	if (meta_create_sp(*spp, msp, extlist, options, alignment, ep) != 0)
   3961 		goto out;
   3962 	rval = 0;
   3963 
   3964 	/* let em know */
   3965 	if (options & MDCMD_PRINT) {
   3966 		(void) printf(dgettext(TEXT_DOMAIN,
   3967 		    "%s: Soft Partition is setup\n"),
   3968 		    devname);
   3969 		(void) fflush(stdout);
   3970 	}
   3971 	goto out;
   3972 
   3973 syntax:
   3974 	/* syntax error */
   3975 	rval = meta_cook_syntax(ep, MDE_SYNTAX, compname, argc, argv);
   3976 	goto out;
   3977 
   3978 options:
   3979 	/* options error */
   3980 	rval = meta_cook_syntax(ep, MDE_OPTION, compname, argc, argv);
   3981 	goto out;
   3982 
   3983 out:
   3984 	if (msp != NULL) {
   3985 		if (msp->ext.ext_val != NULL) {
   3986 			Free(msp->ext.ext_val);
   3987 		}
   3988 		Free(msp);
   3989 	}
   3990 
   3991 	return (rval);
   3992 }
   3993 
   3994 /*
   3995  * FUNCTION:	meta_free_sp()
   3996  * INPUT:	msp	- the soft partition unit to free
   3997  * OUTPUT:	none
   3998  * RETURNS:	void
   3999  * PURPOSE:	provides an interface from the rest of libmeta for freeing a
   4000  *		soft partition unit
   4001  */
   4002 void
   4003 meta_free_sp(md_sp_t *msp)
   4004 {
   4005 	Free(msp);
   4006 }
   4007 
   4008 /*
   4009  * FUNCTION:	meta_sp_issp()
   4010  * INPUT:	sp	- the set name to check
   4011  *		np	- the name to check
   4012  * OUTPUT:	ep	- return error pointer
   4013  * RETURNS:	int	- 0 means sp,np is a soft partition
   4014  *			  1 means sp,np is not a soft partition
   4015  * PURPOSE:	determines whether the given device is a soft partition
   4016  *		device.  This is called by other metadevice check routines.
   4017  */
   4018 int
   4019 meta_sp_issp(
   4020 	mdsetname_t	*sp,
   4021 	mdname_t	*np,
   4022 	md_error_t	*ep
   4023 )
   4024 {
   4025 	if (meta_get_sp_common(sp, np, 0, ep) == NULL)
   4026 		return (1);
   4027 
   4028 	return (0);
   4029 }
   4030 
   4031 /*
   4032  * FUNCTION:	meta_check_sp()
   4033  * INPUT:	sp	- the set name to check
   4034  *		msp	- the unit structure to check
   4035  *		options	- creation options
   4036  * OUTPUT:	repart_options - options to be passed to
   4037  *				meta_repartition_drive()
   4038  *		ep	- return error pointer
   4039  * RETURNS:	int	-  0 ok to create on this component
   4040  *			  -1 error or not ok to create on this component
   4041  * PURPOSE:	Checks to determine whether the rules for creation of
   4042  *		soft partitions allow creation of a soft partition on
   4043  *		the device described by the mdname_t structure referred
   4044  *		to by msp->compnamep.
   4045  *
   4046  *		NOTE: Does NOT check to determine whether the extents
   4047  *		      described in the md_sp_t structure referred to by
   4048  *		      msp will fit on the device described by the mdname_t
   4049  *		      structure located at msp->compnamep.
   4050  */
   4051 static int
   4052 meta_check_sp(
   4053 	mdsetname_t	*sp,
   4054 	md_sp_t		*msp,
   4055 	mdcmdopts_t	options,
   4056 	int		*repart_options,
   4057 	md_error_t	*ep
   4058 )
   4059 {
   4060 	md_common_t	*mdp;
   4061 	mdname_t	*compnp = msp->compnamep;
   4062 	uint_t		slice;
   4063 	mddrivename_t	*dnp;
   4064 	mdname_t	*slicenp;
   4065 	mdvtoc_t	*vtocp;
   4066 
   4067 	/* make sure it is in the set */
   4068 	if (meta_check_inset(sp, compnp, ep) != 0)
   4069 		return (-1);
   4070 
   4071 	if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
   4072 		uint_t	rep_slice;
   4073 
   4074 		/*
   4075 		 * check to make sure we can partition this drive.
   4076 		 * we cannot continue if any of the following are
   4077 		 * true:
   4078 		 * The drive is a metadevice.
   4079 		 * The drive contains a mounted slice.
   4080 		 * The drive contains a slice being swapped to.
   4081 		 * The drive contains slices which are part of other
   4082 		 * metadevices.
   4083 		 * The drive contains a metadb.
   4084 		 */
   4085 		if (metaismeta(compnp))
   4086 			return (mddeverror(ep, MDE_IS_META, compnp->dev,
   4087 			    compnp->cname));
   4088 
   4089 		assert(compnp->drivenamep != NULL);
   4090 
   4091 		/*
   4092 		 * ensure that we have slice 0 since the disk will be
   4093 		 * repartitioned in the USE_WHOLE_DISK case.  this check
   4094 		 * is redundant unless the user incorrectly specifies a
   4095 		 * a fully qualified drive AND slice name (i.e.,
   4096 		 * /dev/dsk/cXtXdXsX), which will be incorrectly
   4097 		 * recognized as a drive name by the metaname code.
   4098 		 */
   4099 
   4100 		if ((vtocp = metagetvtoc(compnp, FALSE, &slice, ep)) == NULL)
   4101 			return (-1);
   4102 		if (slice != MD_SLICE0)
   4103 			return (mderror(ep, MDE_NOT_DRIVENAME, compnp->cname));
   4104 
   4105 		dnp = compnp->drivenamep;
   4106 		if (meta_replicaslice(dnp, &rep_slice, ep) != 0)
   4107 			return (-1);
   4108 
   4109 		for (slice = 0; slice < vtocp->nparts; slice++) {
   4110 
   4111 			/* only check if the slice really exists */
   4112 			if (vtocp->parts[slice].size == 0)
   4113 				continue;
   4114 
   4115 			slicenp = metaslicename(dnp, slice, ep);
   4116 			if (slicenp == NULL)
   4117 				return (-1);
   4118 
   4119 			/* check to ensure that it is not already in use */
   4120 			if (meta_check_inuse(sp,
   4121 			    slicenp, MDCHK_INUSE, ep) != 0) {
   4122 				return (-1);
   4123 			}
   4124 
   4125 			/*
   4126 			 * Up to this point, tests are applied to all
   4127 			 * slices uniformly.
   4128 			 */
   4129 
   4130 			if (slice == rep_slice) {
   4131 				/*
   4132 				 * Tests inside the body of this
   4133 				 * conditional are applied only to
   4134 				 * slice seven.
   4135 				 */
   4136 				if (meta_check_inmeta(sp, slicenp,
   4137 				    options | MDCHK_ALLOW_MDDB |
   4138 				    MDCHK_ALLOW_REPSLICE, 0, -1, ep) != 0)
   4139 					return (-1);
   4140 
   4141 				/*
   4142 				 * For slice seven, a metadb is NOT an
   4143 				 * automatic failure. It merely means
   4144 				 * that we're not allowed to muck
   4145 				 * about with the partitioning of that
   4146 				 * slice.  We indicate this by masking
   4147 				 * in the MD_REPART_LEAVE_REP flag.
   4148 				 */
   4149 				if (metahasmddb(sp, slicenp, ep)) {
   4150 					assert(repart_options !=
   4151 					    NULL);
   4152 					*repart_options |=
   4153 					    MD_REPART_LEAVE_REP;
   4154 				}
   4155 
   4156 				/*
   4157 				 * Skip the remaining tests for slice
   4158 				 * seven
   4159 				 */
   4160 				continue;
   4161 			}
   4162 
   4163 			/*
   4164 			 * Tests below this point will be applied to
   4165 			 * all slices EXCEPT for the replica slice.
   4166 			 */
   4167 
   4168 
   4169 			/* check if component is in a metadevice */
   4170 			if (meta_check_inmeta(sp, slicenp, options, 0,
   4171 			    -1, ep) != 0)
   4172 				return (-1);
   4173 
   4174 			/* check to see if component has a metadb */
   4175 			if (metahasmddb(sp, slicenp, ep))
   4176 				return (mddeverror(ep, MDE_HAS_MDDB,
   4177 				    slicenp->dev, slicenp->cname));
   4178 		}
   4179 		/*
   4180 		 * This should be all of the testing necessary when
   4181 		 * the MDCMD_USE_WHOLE_DISK flag is set; the rest of
   4182 		 * meta_check_sp() is oriented towards component
   4183 		 * arguments instead of disks.
   4184 		 */
   4185 		goto meta_check_sp_ok;
   4186 
   4187 	}
   4188 
   4189 	/* check to ensure that it is not already in use */
   4190 	if (meta_check_inuse(sp, compnp, MDCHK_INUSE, ep) != 0) {
   4191 		return (-1);
   4192 	}
   4193 
   4194 	if (!metaismeta(compnp)) {	/* handle non-metadevices */
   4195 
   4196 		/*
   4197 		 * The component can have one or more soft partitions on it
   4198 		 * already, but can't be part of any other type of metadevice,
   4199 		 * so if it is used for a metadevice, but the metadevice
   4200 		 * isn't a soft partition, return failure.
   4201 		 */
   4202 
   4203 		if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0 &&
   4204 		    meta_check_insp(sp, compnp, 0, -1, ep) == 0) {
   4205 			return (-1);
   4206 		}
   4207 	} else {			/* handle metadevices */
   4208 		/* get underlying unit & check capabilities */
   4209 		if ((mdp = meta_get_unit(sp, compnp, ep)) == NULL)
   4210 			return (-1);
   4211 
   4212 		if ((! (mdp->capabilities & MD_CAN_PARENT)) ||
   4213 		    (! (mdp->capabilities & MD_CAN_SP)))
   4214 			return (mdmderror(ep, MDE_INVAL_UNIT,
   4215 			    meta_getminor(compnp->dev), compnp->cname));
   4216 	}
   4217 
   4218 meta_check_sp_ok:
   4219 	mdclrerror(ep);
   4220 	return (0);
   4221 }
   4222 
   4223 /*
   4224  * FUNCTION:	meta_create_sp()
   4225  * INPUT:	sp	- the set name to create in
   4226  *		msp	- the unit structure to create
   4227  *		oblist	- an optional list of requested extents (-o/-b options)
   4228  *		options	- creation options
   4229  *		alignment - data alignment
   4230  * OUTPUT:	ep	- return error pointer
   4231  * RETURNS:	int	-  0 success, -1 error
   4232  * PURPOSE:	does most of the work for creating a soft partition.  If
   4233  *		metainit -p -e was used, first partition the drive.  Then
   4234  *		create an extent list based on the existing soft partitions
   4235  *		and assume all space not used by them is free.  Storage for
   4236  *		the new soft partition is allocated from the free extents
   4237  *		based on the length specified on the command line or the
   4238  *		oblist passed in.  The unit structure is then committed and
   4239  *		the watermarks are updated.  Finally, the status is changed to
   4240  *		Okay and the process is complete.
   4241  */
   4242 static int
   4243 meta_create_sp(
   4244 	mdsetname_t	*sp,
   4245 	md_sp_t		*msp,
   4246 	sp_ext_node_t	*oblist,
   4247 	mdcmdopts_t	options,
   4248 	sp_ext_length_t	alignment,
   4249 	md_error_t	*ep
   4250 )
   4251 {
   4252 	mdname_t	*np = msp->common.namep;
   4253 	mdname_t	*compnp = msp->compnamep;
   4254 	mp_unit_t	*mp = NULL;
   4255 	mdnamelist_t	*keynlp = NULL, *spnlp = NULL;
   4256 	md_set_params_t	set_params;
   4257 	int		rval = -1;
   4258 	diskaddr_t	comp_size;
   4259 	diskaddr_t	sp_start;
   4260 	sp_ext_node_t	*extlist = NULL;
   4261 	int		numexts = 0;	/* number of extents */
   4262 	int		count = 0;
   4263 	int		committed = 0;
   4264 	int		repart_options = MD_REPART_FORCE;
   4265 	int		create_flag = MD_CRO_32BIT;
   4266 	int		mn_set_master = 0;
   4267 
   4268 	md_set_desc	*sd;
   4269 	md_set_mmown_params_t	*ownpar = NULL;
   4270 	int		comp_is_mirror = 0;
   4271 
   4272 	/* validate soft partition */
   4273 	if (meta_check_sp(sp, msp, options, &repart_options, ep) != 0)
   4274 		return (-1);
   4275 
   4276 	if ((options & MDCMD_USE_WHOLE_DISK) != 0) {
   4277 		if ((options & MDCMD_DOIT) != 0) {
   4278 			if (meta_repartition_drive(sp,
   4279 			    compnp->drivenamep,
   4280 			    repart_options,
   4281 			    NULL, /* Don't return the VTOC */
   4282 			    ep) != 0)
   4283 
   4284 				return (-1);
   4285 		} else {
   4286 			/*
   4287 			 * If -n and -e are both specified, it doesn't make
   4288 			 * sense to continue without actually partitioning
   4289 			 * the drive.
   4290 			 */
   4291 			return (0);
   4292 		}
   4293 	}
   4294 
   4295 	/* populate the start_blk field of the component name */
   4296 	if ((sp_start = meta_sp_get_start(sp, compnp, ep)) ==
   4297 	    MD_DISKADDR_ERROR) {
   4298 		rval = -1;
   4299 		goto out;
   4300 	}
   4301 
   4302 	if (options & MDCMD_DOIT) {
   4303 		/* store name in namespace */
   4304 		if (add_key_name(sp, compnp, &keynlp, ep) != 0) {
   4305 			rval = -1;
   4306 			goto out;
   4307 		}
   4308 	}
   4309 
   4310 	/*
   4311 	 * Get a list of the soft partitions that currently reside on
   4312 	 * the component.  We should ALWAYS force reload the cache,
   4313 	 * because if this is a single creation, there will not BE a
   4314 	 * cached list, and if we're using the md.tab, we must rebuild
   4315 	 * the list because it won't contain the previous (if any)
   4316 	 * soft partition.
   4317 	 */
   4318 	count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
   4319 	if (count < 0) {
   4320 		/* error occured */
   4321 		rval = -1;
   4322 		goto out;
   4323 	}
   4324 
   4325 	/*
   4326 	 * get the size of the underlying device.  if the size is smaller
   4327 	 * than or equal to the watermark size, we know there isn't
   4328 	 * enough space.
   4329 	 */
   4330 	if ((comp_size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR) {
   4331 		rval = -1;
   4332 		goto out;
   4333 	} else if (comp_size <= MD_SP_WMSIZE) {
   4334 		(void) mdmderror(ep, MDE_SP_NOSPACE, 0, compnp->cname);
   4335 		rval = -1;
   4336 		goto out;
   4337 	}
   4338 	/*
   4339 	 * seed extlist with reserved space at the beginning of the volume and
   4340 	 * enough space for the end watermark.  The end watermark always gets
   4341 	 * updated, but if the underlying device changes size it may not be
   4342 	 * pointed to until the extent before it is updated.  Since the
   4343 	 * end of the reserved space is where the first watermark starts,
   4344 	 * the reserved extent should never be marked for updating.
   4345 	 */
   4346 
   4347 	meta_sp_list_insert(NULL, NULL, &extlist,
   4348 	    0ULL, sp_start, EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
   4349 	meta_sp_list_insert(NULL, NULL, &extlist,
   4350 	    (sp_ext_offset_t)(comp_size - MD_SP_WMSIZE), MD_SP_WMSIZE,
   4351 	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   4352 
   4353 	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
   4354 		rval = -1;
   4355 		goto out;
   4356 	}
   4357 
   4358 	metafreenamelist(spnlp);
   4359 
   4360 	if (getenv(META_SP_DEBUG)) {
   4361 		meta_sp_debug("meta_create_sp: list of used extents:\n");
   4362 		meta_sp_list_dump(extlist);
   4363 	}
   4364 
   4365 	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
   4366 
   4367 	/* get extent list from -o/-b options or from free space */
   4368 	if (options & MDCMD_DIRECT) {
   4369 		if (getenv(META_SP_DEBUG)) {
   4370 			meta_sp_debug("meta_create_sp: Dumping -o/-b list:\n");
   4371 			meta_sp_list_dump(oblist);
   4372 		}
   4373 
   4374 		numexts = meta_sp_alloc_by_list(sp, np, &extlist, oblist);
   4375 		if (numexts == -1) {
   4376 			(void) mdmderror(ep, MDE_SP_OVERLAP, 0, np->cname);
   4377 			rval = -1;
   4378 			goto out;
   4379 		}
   4380 	} else {
   4381 		numexts = meta_sp_alloc_by_len(sp, np, &extlist,
   4382 		    &msp->ext.ext_val->len, 0LL, (alignment > 0) ? alignment :
   4383 		    meta_sp_get_default_alignment(sp, compnp, ep));
   4384 		if (numexts == -1) {
   4385 			(void) mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname);
   4386 			rval = -1;
   4387 			goto out;
   4388 		}
   4389 	}
   4390 
   4391 	assert(extlist != NULL);
   4392 
   4393 	/* create soft partition */
   4394 	mp = meta_sp_createunit(msp->common.namep, msp->compnamep,
   4395 	    extlist, numexts, msp->ext.ext_val->len, MD_SP_CREATEPEND, ep);
   4396 
   4397 	create_flag = meta_check_devicesize(mp->c.un_total_blocks);
   4398 
   4399 	/* if we're not doing anything (metainit -n), return success */
   4400 	if (! (options & MDCMD_DOIT)) {
   4401 		rval = 0;	/* success */
   4402 		goto out;
   4403 	}
   4404 
   4405 	(void) memset(&set_params, 0, sizeof (set_params));
   4406 
   4407 	if (create_flag == MD_CRO_64BIT) {
   4408 		mp->c.un_revision |= MD_64BIT_META_DEV;
   4409 		set_params.options = MD_CRO_64BIT;
   4410 	} else {
   4411 		mp->c.un_revision &= ~MD_64BIT_META_DEV;
   4412 		set_params.options = MD_CRO_32BIT;
   4413 	}
   4414 
   4415 	if (getenv(META_SP_DEBUG)) {
   4416 		meta_sp_debug("meta_create_sp: printing unit structure\n");
   4417 		meta_sp_printunit(mp);
   4418 	}
   4419 
   4420 	/*
   4421 	 * Check to see if we're trying to create a partition on a mirror. If so
   4422 	 * we may have to enforce an ownership change before writing the
   4423 	 * watermark out.
   4424 	 */
   4425 	if (metaismeta(compnp)) {
   4426 		char *miscname;
   4427 
   4428 		miscname = metagetmiscname(compnp, ep);
   4429 		if (miscname != NULL)
   4430 			comp_is_mirror = (strcmp(miscname, MD_MIRROR) == 0);
   4431 		else
   4432 			comp_is_mirror = 0;
   4433 	} else {
   4434 		comp_is_mirror = 0;
   4435 	}
   4436 
   4437 	/*
   4438 	 * For a multi-node environment we have to ensure that the master
   4439 	 * node owns an underlying mirror before we issue the MD_IOCSET ioctl.
   4440 	 * If the master does not own the device we will deadlock as the
   4441 	 * implicit write of the watermarks (in sp_ioctl.c) will cause an
   4442 	 * ownership change that will block as the MD_IOCSET is still in
   4443 	 * progress. To close this window we force an owner change to occur
   4444 	 * before issuing the MD_IOCSET. We cannot simply open the device and
   4445 	 * write to it as this will only work for the first soft-partition
   4446 	 * creation.
   4447 	 */
   4448 
   4449 	if (comp_is_mirror && !metaislocalset(sp)) {
   4450 
   4451 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
   4452 			rval = -1;
   4453 			goto out;
   4454 		}
   4455 		if (MD_MNSET_DESC(sd) && sd->sd_mn_am_i_master) {
   4456 			mn_set_master = 1;
   4457 		}
   4458 	}
   4459 
   4460 	set_params.mnum = MD_SID(mp);
   4461 	set_params.size = mp->c.un_size;
   4462 	set_params.mdp = (uintptr_t)mp;
   4463 	MD_SETDRIVERNAME(&set_params, MD_SP, MD_MIN2SET(set_params.mnum));
   4464 
   4465 	/* first phase of commit. */
   4466 	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
   4467 	    np->cname) != 0) {
   4468 		(void) mdstealerror(ep, &set_params.mde);
   4469 		rval = -1;
   4470 		goto out;
   4471 	}
   4472 
   4473 	/* we've successfully committed the record */
   4474 	committed = 1;
   4475 
   4476 	/* write watermarks */
   4477 	/*
   4478 	 * Special-case for Multi-node sets. As we now have a distributed DRL
   4479 	 * update mechanism, we _will_ hit the ioctl-within-ioctl deadlock case
   4480 	 * unless we use a 'special' MN-capable ioctl to stage the watermark
   4481 	 * update. This only affects the master-node in an MN set.
   4482 	 */
   4483 	if (mn_set_master) {
   4484 		if (meta_mn_sp_update_wm(sp, msp, extlist, ep) < 0) {
   4485 			rval = -1;
   4486 			goto out;
   4487 		}
   4488 	} else {
   4489 		if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
   4490 			rval = -1;
   4491 			goto out;
   4492 		}
   4493 	}
   4494 
   4495 	/* second phase of commit, set status to MD_SP_OK */
   4496 	if (meta_sp_setstatus(sp, &(MD_SID(mp)), 1, MD_SP_OK, ep) < 0) {
   4497 		rval = -1;
   4498 		goto out;
   4499 	}
   4500 	rval = 0;
   4501 out:
   4502 	Free(mp);
   4503 	if (ownpar)
   4504 		Free(ownpar);
   4505 
   4506 	if (extlist != NULL)
   4507 		meta_sp_list_free(&extlist);
   4508 
   4509 	if (rval != 0 && keynlp != NULL && committed != 1)
   4510 		(void) del_key_names(sp, keynlp, NULL);
   4511 
   4512 	metafreenamelist(keynlp);
   4513 
   4514 	return (rval);
   4515 }
   4516 
   4517 /*
   4518  * **************************************************************************
   4519  *                      Reset (metaclear) Functions                         *
   4520  * **************************************************************************
   4521  */
   4522 
   4523 /*
   4524  * FUNCTION:	meta_sp_reset_common()
   4525  * INPUT:	sp	- the set name of the device to reset
   4526  *		np	- the name of the device to reset
   4527  *		msp	- the unit structure to reset
   4528  *		options	- metaclear options
   4529  * OUTPUT:	ep	- return error pointer
   4530  * RETURNS:	int	-  0 success, -1 error
   4531  * PURPOSE:	"resets", or more accurately deletes, the soft partition
   4532  *		specified.  First the state is set to "deleting" and then the
   4533  *		watermarks are all cleared out.  Once the watermarks have been
   4534  *		updated, the unit structure is deleted from the metadb.
   4535  */
   4536 static int
   4537 meta_sp_reset_common(
   4538 	mdsetname_t	*sp,
   4539 	mdname_t	*np,
   4540 	md_sp_t		*msp,
   4541 	md_sp_reset_t	reset_params,
   4542 	mdcmdopts_t	options,
   4543 	md_error_t	*ep
   4544 )
   4545 {
   4546 	char	*miscname;
   4547 	int	rval = -1;
   4548 	int	is_open = 0;
   4549 
   4550 	/* make sure that nobody owns us */
   4551 	if (MD_HAS_PARENT(msp->common.parent))
   4552 		return (mdmderror(ep, MDE_IN_USE, meta_getminor(np->dev),
   4553 		    np->cname));
   4554 
   4555 	/* make sure that the soft partition isn't open */
   4556 	if ((is_open = meta_isopen(sp, np, ep, options)) < 0)
   4557 		return (-1);
   4558 	else if (is_open)
   4559 		return (mdmderror(ep, MDE_IS_OPEN, meta_getminor(np->dev),
   4560 		    np->cname));
   4561 
   4562 	/* get miscname */
   4563 	if ((miscname = metagetmiscname(np, ep)) == NULL)
   4564 		return (-1);
   4565 
   4566 	/* fill in reset params */
   4567 	MD_SETDRIVERNAME(&reset_params, miscname, sp->setno);
   4568 	reset_params.mnum = meta_getminor(np->dev);
   4569 	reset_params.force = (options & MDCMD_FORCE) ? 1 : 0;
   4570 
   4571 	/*
   4572 	 * clear soft partition - phase one.
   4573 	 * place the soft partition into the "delete pending" state.
   4574 	 */
   4575 	if (meta_sp_setstatus(sp, &reset_params.mnum, 1, MD_SP_DELPEND, ep) < 0)
   4576 		return (-1);
   4577 
   4578 	/*
   4579 	 * Now clear the watermarks.  If the force flag is specified,
   4580 	 * ignore any errors writing the watermarks and delete the unit
   4581 	 * structure anyway.  An error may leave the on-disk format in a
   4582 	 * corrupt state.  If force is not specified and we fail here,
   4583 	 * the soft partition will remain in the "delete pending" state.
   4584 	 */
   4585 	if ((meta_sp_clear_wm(sp, msp, ep) < 0) &&
   4586 	    ((options & MDCMD_FORCE) == 0))
   4587 		goto out;
   4588 
   4589 	/*
   4590 	 * clear soft partition - phase two.
   4591 	 * the driver removes the soft partition from the metadb and
   4592 	 * zeros out incore version.
   4593 	 */
   4594 	if (metaioctl(MD_IOCRESET, &reset_params,
   4595 	    &reset_params.mde, np->cname) != 0) {
   4596 		(void) mdstealerror(ep, &reset_params.mde);
   4597 		goto out;
   4598 	}
   4599 
   4600 	/*
   4601 	 * Wait for the /dev to be cleaned up. Ignore the return
   4602 	 * value since there's not much we can do.
   4603 	 */
   4604 	(void) meta_update_devtree(meta_getminor(np->dev));
   4605 
   4606 	rval = 0;	/* success */
   4607 
   4608 	if (options & MDCMD_PRINT) {
   4609 		(void) printf(dgettext(TEXT_DOMAIN,
   4610 		    "%s: Soft Partition is cleared\n"),
   4611 		    np->cname);
   4612 		(void) fflush(stdout);
   4613 	}
   4614 
   4615 	/*
   4616 	 * if told to recurse and on a metadevice, then attempt to
   4617 	 * clear the subdevices.  Indicate failure if the clear fails.
   4618 	 */
   4619 	if ((options & MDCMD_RECURSE) &&
   4620 	    (metaismeta(msp->compnamep)) &&
   4621 	    (meta_reset_by_name(sp, msp->compnamep, options, ep) != 0))
   4622 		rval = -1;
   4623 
   4624 out:
   4625 	meta_invalidate_name(np);
   4626 	return (rval);
   4627 }
   4628 
   4629 /*
   4630  * FUNCTION:	meta_sp_reset()
   4631  * INPUT:	sp	- the set name of the device to reset
   4632  *		np	- the name of the device to reset
   4633  *		options	- metaclear options
   4634  * OUTPUT:	ep	- return error pointer
   4635  * RETURNS:	int	-  0 success, -1 error
   4636  * PURPOSE:	provides the entry point to the rest of libmeta for deleting a
   4637  *		soft partition.  If np is NULL, then soft partitions are
   4638  *		all deleted at the current level and then recursively deleted.
   4639  *		Otherwise, if a name is specified either directly or as a
   4640  *		result of a recursive operation, it deletes only that name.
   4641  *		Since something sitting under a soft partition may be parented
   4642  *		to it, we have to reparent that other device to another soft
   4643  *		partition on the same component if we're deleting the one it's
   4644  *		parented to.
   4645  */
   4646 int
   4647 meta_sp_reset(
   4648 	mdsetname_t	*sp,
   4649 	mdname_t	*np,
   4650 	mdcmdopts_t	options,
   4651 	md_error_t	*ep
   4652 )
   4653 {
   4654 	md_sp_t		*msp;
   4655 	int		rval = -1;
   4656 	mdnamelist_t	*spnlp = NULL, *nlp = NULL;
   4657 	md_sp_reset_t	reset_params;
   4658 	int		num_sp;
   4659 
   4660 	assert(sp != NULL);
   4661 
   4662 	/* reset/delete all soft paritions */
   4663 	if (np == NULL) {
   4664 		/*
   4665 		 * meta_reset_all sets MDCMD_RECURSE, but this behavior
   4666 		 * is incorrect for soft partitions.  We want to clear
   4667 		 * all soft partitions at a particular level in the
   4668 		 * metadevice stack before moving to the next level.
   4669 		 * Thus, we clear MDCMD_RECURSE from the options.
   4670 		 */
   4671 		options &= ~MDCMD_RECURSE;
   4672 
   4673 		/* for each soft partition */
   4674 		rval = 0;
   4675 		if (meta_get_sp_names(sp, &spnlp, 0, ep) < 0)
   4676 			rval = -1;
   4677 
   4678 		for (nlp = spnlp; (nlp != NULL); nlp = nlp->next) {
   4679 			np = nlp->namep;
   4680 			if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
   4681 				rval = -1;
   4682 				break;
   4683 			}
   4684 			/*
   4685 			 * meta_reset_all calls us twice to get soft
   4686 			 * partitions at the top and bottom of the stack.
   4687 			 * thus, if we have a parent, we'll get deleted
   4688 			 * on the next call.
   4689 			 */
   4690 			if (MD_HAS_PARENT(msp->common.parent))
   4691 				continue;
   4692 			/*
   4693 			 * If this is a multi-node set, we send a series
   4694 			 * of individual metaclear commands.
   4695 			 */
   4696 			if (meta_is_mn_set(sp, ep)) {
   4697 				if (meta_mn_send_metaclear_command(sp,
   4698 				    np->cname, options, 0, ep) != 0) {
   4699 					rval = -1;
   4700 					break;
   4701 				}
   4702 			} else {
   4703 				if (meta_sp_reset(sp, np, options, ep) != 0) {
   4704 					rval = -1;
   4705 					break;
   4706 				}
   4707 			}
   4708 		}
   4709 		/* cleanup return status */
   4710 		metafreenamelist(spnlp);
   4711 		return (rval);
   4712 	}
   4713 
   4714 	/* check the name */
   4715 	if (metachkmeta(np, ep) != 0)
   4716 		return (-1);
   4717 
   4718 	/* get the unit structure */
   4719 	if ((msp = meta_get_sp(sp, np, ep)) == NULL)
   4720 		return (-1);
   4721 
   4722 	/* clear out reset parameters */
   4723 	(void) memset(&reset_params, 0, sizeof (reset_params));
   4724 
   4725 	/* if our child is a metadevice, we need to deparent/reparent it */
   4726 	if (metaismeta(msp->compnamep)) {
   4727 		/* get sp's on this component */
   4728 		if ((num_sp = meta_sp_get_by_component(sp, msp->compnamep,
   4729 		    &spnlp, 1, ep)) <= 0)
   4730 			/* no sp's on this device.  error! */
   4731 			return (-1);
   4732 		else if (num_sp == 1)
   4733 			/* last sp on this device, so we deparent */
   4734 			reset_params.new_parent = MD_NO_PARENT;
   4735 		else {
   4736 			/* have to reparent this metadevice */
   4737 			for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
   4738 				if (meta_getminor(nlp->namep->dev) ==
   4739 				    meta_getminor(np->dev))
   4740 					continue;
   4741 				/*
   4742 				 * this isn't the softpart we are deleting,
   4743 				 * so use this device as the new parent.
   4744 				 */
   4745 				reset_params.new_parent =
   4746 				    meta_getminor(nlp->namep->dev);
   4747 				break;
   4748 			}
   4749 		}
   4750 		metafreenamelist(spnlp);
   4751 	}
   4752 
   4753 	if (meta_sp_reset_common(sp, np, msp, reset_params, options, ep) != 0)
   4754 		return (-1);
   4755 
   4756 	return (0);
   4757 }
   4758 
   4759 /*
   4760  * FUNCTION:	meta_sp_reset_component()
   4761  * INPUT:	sp	- the set name of the device to reset
   4762  *		name	- the string name of the device to reset
   4763  *		options	- metaclear options
   4764  * OUTPUT:	ep	- return error pointer
   4765  * RETURNS:	int	-  0 success, -1 error
   4766  * PURPOSE:	provides the ability to delete all soft partitions on a
   4767  *		specified device (metaclear -p).  It first gets all of the
   4768  *		soft partitions on the component and then deletes each one
   4769  *		individually.
   4770  */
   4771 int
   4772 meta_sp_reset_component(
   4773 	mdsetname_t	*sp,
   4774 	char		*name,
   4775 	mdcmdopts_t	options,
   4776 	md_error_t	*ep
   4777 )
   4778 {
   4779 	mdname_t	*compnp, *np;
   4780 	mdnamelist_t	*spnlp = NULL;
   4781 	mdnamelist_t	*nlp = NULL;
   4782 	md_sp_t		*msp;
   4783 	int		count;
   4784 	md_sp_reset_t	reset_params;
   4785 
   4786 	if ((compnp = metaname(&sp, name, UNKNOWN, ep)) == NULL)
   4787 		return (-1);
   4788 
   4789 	/* If we're starting out with no soft partitions, it's an error */
   4790 	count = meta_sp_get_by_component(sp, compnp, &spnlp, 1, ep);
   4791 	if (count == 0)
   4792 		return (mdmderror(ep, MDE_SP_NOSP, 0, compnp->cname));
   4793 	else if (count < 0)
   4794 		return (-1);
   4795 
   4796 	/*
   4797 	 * clear all soft partitions on this component.
   4798 	 * NOTE: we reparent underlying metadevices as we go so that
   4799 	 * things stay sane.  Also, if we encounter an error, we stop
   4800 	 * and go no further in case recovery might be needed.
   4801 	 */
   4802 	for (nlp = spnlp; nlp != NULL; nlp = nlp->next) {
   4803 		/* clear out reset parameters */
   4804 		(void) memset(&reset_params, 0, sizeof (reset_params));
   4805 
   4806 		/* check the name */
   4807 		np = nlp->namep;
   4808 
   4809 		if (metachkmeta(np, ep) != 0) {
   4810 			metafreenamelist(spnlp);
   4811 			return (-1);
   4812 		}
   4813 
   4814 		/* get the unit structure */
   4815 		if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
   4816 			metafreenamelist(spnlp);
   4817 			return (-1);
   4818 		}
   4819 
   4820 		/* have to deparent/reparent metadevices */
   4821 		if (metaismeta(compnp)) {
   4822 			if (nlp->next == NULL)
   4823 				reset_params.new_parent = MD_NO_PARENT;
   4824 			else
   4825 				reset_params.new_parent =
   4826 				    meta_getminor(spnlp->next->namep->dev);
   4827 		}
   4828 
   4829 		/* clear soft partition */
   4830 		if (meta_sp_reset_common(sp, np, msp, reset_params,
   4831 		    options, ep) < 0) {
   4832 			metafreenamelist(spnlp);
   4833 			return (-1);
   4834 		}
   4835 	}
   4836 	metafreenamelist(spnlp);
   4837 	return (0);
   4838 }
   4839 
   4840 /*
   4841  * **************************************************************************
   4842  *                      Grow (metattach) Functions                          *
   4843  * **************************************************************************
   4844  */
   4845 
   4846 /*
   4847  * FUNCTION:	meta_sp_attach()
   4848  * INPUT:	sp	- the set name of the device to attach to
   4849  *		np	- the name of the device to attach to
   4850  *		addsize	- the unparsed string holding the amount of space to add
   4851  *		options	- metattach options
   4852  *		alignment - data alignment
   4853  * OUTPUT:	ep	- return error pointer
   4854  * RETURNS:	int	-  0 success, -1 error
   4855  * PURPOSE:	grows a soft partition by reading in the existing unit
   4856  *		structure and setting its state to Growing, allocating more
   4857  *		space (similar to meta_create_sp()), updating the watermarks,
   4858  *		and then writing out the new unit structure in the Okay state.
   4859  */
   4860 int
   4861 meta_sp_attach(
   4862 	mdsetname_t	*sp,
   4863 	mdname_t	*np,
   4864 	char		*addsize,
   4865 	mdcmdopts_t	options,
   4866 	sp_ext_length_t	alignment,
   4867 	md_error_t	*ep
   4868 )
   4869 {
   4870 	md_grow_params_t	grow_params;
   4871 	sp_ext_length_t		grow_len;	/* amount to grow */
   4872 	mp_unit_t		*mp, *new_un;
   4873 	mdname_t		*compnp = NULL;
   4874 
   4875 	sp_ext_node_t		*extlist = NULL;
   4876 	int			numexts;
   4877 	mdnamelist_t		*spnlp = NULL;
   4878 	int			count;
   4879 	md_sp_t			*msp;
   4880 	daddr_t			start_block;
   4881 
   4882 	/* should have the same set */
   4883 	assert(sp != NULL);
   4884 	assert(sp->setno == MD_MIN2SET(meta_getminor(np->dev)));
   4885 
   4886 	/* check name */
   4887 	if (metachkmeta(np, ep) != 0)
   4888 		return (-1);
   4889 
   4890 	if (meta_sp_parsesize(addsize, &grow_len) == -1) {
   4891 		return (mdmderror(ep, MDE_SP_BAD_LENGTH, 0, np->cname));
   4892 	}
   4893 
   4894 	if ((mp = (mp_unit_t *)meta_get_mdunit(sp, np, ep)) == NULL)
   4895 		return (-1);
   4896 
   4897 	/* make sure we don't have a parent */
   4898 	if (MD_HAS_PARENT(mp->c.un_parent)) {
   4899 		Free(mp);
   4900 		return (mdmderror(ep, MDE_INVAL_UNIT, 0, np->cname));
   4901 	}
   4902 
   4903 	if (getenv(META_SP_DEBUG)) {
   4904 		meta_sp_debug("meta_sp_attach: Unit structure before new "
   4905 		    "space:\n");
   4906 		meta_sp_printunit(mp);
   4907 	}
   4908 
   4909 	/*
   4910 	 * NOTE: the fast option to metakeyname is 0 as opposed to 1
   4911 	 * If this was not the case we would suffer the following
   4912 	 * assertion failure:
   4913 	 * Assertion failed: type1 != MDT_FAST_META && type1 != MDT_FAST_COMP
   4914 	 * file meta_check.x, line 315
   4915 	 * I guess this is because we have not "seen" this drive before
   4916 	 * and hence hit the failure - this is of course the attach routine
   4917 	 */
   4918 	if ((compnp = metakeyname(&sp, mp->un_key, 0, ep)) == NULL) {
   4919 		Free(mp);
   4920 		return (-1);
   4921 	}
   4922 
   4923 	/* metakeyname does not fill in the key. */
   4924 	compnp->key = mp->un_key;
   4925 
   4926 	/* work out the space on the component that we are dealing with */
   4927 	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
   4928 
   4929 	/*
   4930 	 * see if the component has been soft partitioned yet, or if an
   4931 	 * error occurred.
   4932 	 */
   4933 	if (count == 0) {
   4934 		Free(mp);
   4935 		return (mdmderror(ep, MDE_NOT_SP, 0, np->cname));
   4936 	} else if (count < 0) {
   4937 		Free(mp);
   4938 		return (-1);
   4939 	}
   4940 
   4941 	/*
   4942 	 * seed extlist with reserved space at the beginning of the volume and
   4943 	 * enough space for the end watermark.  The end watermark always gets
   4944 	 * updated, but if the underlying device changes size it may not be
   4945 	 * pointed to until the extent before it is updated.  Since the
   4946 	 * end of the reserved space is where the first watermark starts,
   4947 	 * the reserved extent should never be marked for updating.
   4948 	 */
   4949 	if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
   4950 	    MD_DISKADDR_ERROR) {
   4951 		Free(mp);
   4952 		return (-1);
   4953 	}
   4954 
   4955 	meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
   4956 	    EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
   4957 	meta_sp_list_insert(NULL, NULL, &extlist,
   4958 	    metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
   4959 	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   4960 
   4961 	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
   4962 		Free(mp);
   4963 		return (-1);
   4964 	}
   4965 
   4966 	metafreenamelist(spnlp);
   4967 
   4968 	if (getenv(META_SP_DEBUG)) {
   4969 		meta_sp_debug("meta_sp_attach: list of used extents:\n");
   4970 		meta_sp_list_dump(extlist);
   4971 	}
   4972 
   4973 	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
   4974 
   4975 	assert(mp->un_numexts >= 1);
   4976 	numexts = meta_sp_alloc_by_len(sp, np, &extlist, &grow_len,
   4977 	    mp->un_ext[mp->un_numexts - 1].un_poff,
   4978 	    (alignment > 0) ? alignment :
   4979 	    meta_sp_get_default_alignment(sp, compnp, ep));
   4980 
   4981 	if (numexts == -1) {
   4982 		Free(mp);
   4983 		return (mdmderror(ep, MDE_SP_NOSPACE, 0, np->cname));
   4984 	}
   4985 
   4986 	/* allocate new unit structure and copy in old unit */
   4987 	if ((new_un = meta_sp_updateunit(np, mp, extlist,
   4988 	    grow_len, numexts, ep)) == NULL) {
   4989 		Free(mp);
   4990 		return (-1);
   4991 	}
   4992 	Free(mp);
   4993 
   4994 	/* If running in dryrun mode (-n option), we're done here */
   4995 	if ((options & MDCMD_DOIT) == 0) {
   4996 		if (options & MDCMD_PRINT) {
   4997 			(void) printf(dgettext(TEXT_DOMAIN,
   4998 			    "%s: Soft Partition would grow\n"),
   4999 			    np->cname);
   5000 			(void) fflush(stdout);
   5001 		}
   5002 		return (0);
   5003 	}
   5004 
   5005 	if (getenv(META_SP_DEBUG)) {
   5006 		meta_sp_debug("meta_sp_attach: updated unit structure:\n");
   5007 		meta_sp_printunit(new_un);
   5008 	}
   5009 
   5010 	assert(new_un != NULL);
   5011 
   5012 	(void) memset(&grow_params, 0, sizeof (grow_params));
   5013 	if (new_un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
   5014 		grow_params.options = MD_CRO_64BIT;
   5015 		new_un->c.un_revision |= MD_64BIT_META_DEV;
   5016 	} else {
   5017 		grow_params.options = MD_CRO_32BIT;
   5018 		new_un->c.un_revision &= ~MD_64BIT_META_DEV;
   5019 	}
   5020 	grow_params.mnum = MD_SID(new_un);
   5021 	grow_params.size = new_un->c.un_size;
   5022 	grow_params.mdp = (uintptr_t)new_un;
   5023 	MD_SETDRIVERNAME(&grow_params, MD_SP, MD_MIN2SET(grow_params.mnum));
   5024 
   5025 	if (metaioctl(MD_IOCGROW, &grow_params, &grow_params.mde,
   5026 	    np->cname) != 0) {
   5027 		(void) mdstealerror(ep, &grow_params.mde);
   5028 		return (-1);
   5029 	}
   5030 
   5031 	/* update all watermarks */
   5032 
   5033 	if ((msp = meta_get_sp(sp, np, ep)) == NULL)
   5034 		return (-1);
   5035 	if (meta_sp_update_wm(sp, msp, extlist, ep) < 0)
   5036 		return (-1);
   5037 
   5038 
   5039 	/* second phase of commit, set status to MD_SP_OK */
   5040 	if (meta_sp_setstatus(sp, &(MD_SID(new_un)), 1, MD_SP_OK, ep) < 0)
   5041 		return (-1);
   5042 
   5043 	meta_invalidate_name(np);
   5044 
   5045 	if (options & MDCMD_PRINT) {
   5046 		(void) printf(dgettext(TEXT_DOMAIN,
   5047 		    "%s: Soft Partition has been grown\n"),
   5048 		    np->cname);
   5049 		(void) fflush(stdout);
   5050 	}
   5051 
   5052 	return (0);
   5053 }
   5054 
   5055 /*
   5056  * **************************************************************************
   5057  *                    Recovery (metarecover) Functions                      *
   5058  * **************************************************************************
   5059  */
   5060 
   5061 /*
   5062  * FUNCTION:	meta_recover_sp()
   5063  * INPUT:	sp	- the name of the set we are recovering on
   5064  *		compnp	- name pointer for device we are recovering on
   5065  *		argc	- argument count
   5066  *		argv	- left over arguments not parsed by metarecover command
   5067  *		options	- metarecover options
   5068  * OUTPUT:	ep	- return error pointer
   5069  * RETURNS:	int	- 0 - success, -1 - error
   5070  * PURPOSE:	parse soft partitioning-specific metarecover options and
   5071  *		dispatch to the appropriate function to handle recovery.
   5072  */
   5073 int
   5074 meta_recover_sp(
   5075 	mdsetname_t	*sp,
   5076 	mdname_t	*compnp,
   5077 	int		argc,
   5078 	char		*argv[],
   5079 	mdcmdopts_t	options,
   5080 	md_error_t	*ep
   5081 )
   5082 {
   5083 	md_set_desc	*sd;
   5084 
   5085 	if (argc > 1) {
   5086 		(void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
   5087 		    argc, argv);
   5088 		return (-1);
   5089 	}
   5090 
   5091 	/*
   5092 	 * For a MN set, this operation must be performed on the master
   5093 	 * as it is responsible for maintaining the watermarks
   5094 	 */
   5095 	if (!metaislocalset(sp)) {
   5096 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
   5097 			return (-1);
   5098 		if (MD_MNSET_DESC(sd) && !sd->sd_mn_am_i_master) {
   5099 			(void) mddserror(ep, MDE_DS_MASTER_ONLY, sp->setno,
   5100 			    sd->sd_mn_master_nodenm, NULL, NULL);
   5101 			return (-1);
   5102 		}
   5103 	}
   5104 	if (argc == 0) {
   5105 		/*
   5106 		 * if no additional arguments are passed, metarecover should
   5107 		 * validate both on-disk and metadb structures as well as
   5108 		 * checking that both are consistent with each other
   5109 		 */
   5110 		if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
   5111 			return (-1);
   5112 		if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
   5113 			return (-1);
   5114 		if (meta_sp_validate_wm_and_unit(sp, compnp, options, ep) < 0)
   5115 			return (-1);
   5116 	} else if (strcmp(argv[0], "-d") == 0) {
   5117 		/*
   5118 		 * Ensure that there is no existing valid record for this
   5119 		 * soft-partition. If there is we have nothing to do.
   5120 		 */
   5121 		if (meta_sp_validate_unit(sp, compnp, options, ep) == 0)
   5122 			return (-1);
   5123 		/* validate and recover from on-disk structures */
   5124 		if (meta_sp_validate_wm(sp, compnp, options, ep) < 0)
   5125 			return (-1);
   5126 		if (meta_sp_recover_from_wm(sp, compnp, options, ep) < 0)
   5127 			return (-1);
   5128 	} else if (strcmp(argv[0], "-m") == 0) {
   5129 		/* validate and recover from metadb structures */
   5130 		if (meta_sp_validate_unit(sp, compnp, options, ep) < 0)
   5131 			return (-1);
   5132 		if (meta_sp_recover_from_unit(sp, compnp, options, ep) < 0)
   5133 			return (-1);
   5134 	} else {
   5135 		/* syntax error */
   5136 		(void) meta_cook_syntax(ep, MDE_SYNTAX, compnp->cname,
   5137 		    argc, argv);
   5138 		return (-1);
   5139 	}
   5140 
   5141 	return (0);
   5142 }
   5143 
   5144 /*
   5145  * FUNCTION:	meta_sp_display_exthdr()
   5146  * INPUT:	none
   5147  * OUTPUT:	none
   5148  * RETURNS:	void
   5149  * PURPOSE:	print header line for sp_ext_node_t information.  to be used
   5150  *		in conjunction with meta_sp_display_ext().
   5151  */
   5152 static void
   5153 meta_sp_display_exthdr(void)
   5154 {
   5155 	(void) printf("%20s %5s %7s %20s %20s\n",
   5156 	    dgettext(TEXT_DOMAIN, "Name"),
   5157 	    dgettext(TEXT_DOMAIN, "Seq#"),
   5158 	    dgettext(TEXT_DOMAIN, "Type"),
   5159 	    dgettext(TEXT_DOMAIN, "Offset"),
   5160 	    dgettext(TEXT_DOMAIN, "Length"));
   5161 }
   5162 
   5163 
   5164 /*
   5165  * FUNCTION:	meta_sp_display_ext()
   5166  * INPUT:	ext	- extent to display
   5167  * OUTPUT:	none
   5168  * RETURNS:	void
   5169  * PURPOSE:	print selected fields from sp_ext_node_t.
   5170  */
   5171 static void
   5172 meta_sp_display_ext(sp_ext_node_t *ext)
   5173 {
   5174 	/* print extent information */
   5175 	if (ext->ext_namep != NULL)
   5176 		(void) printf("%20s ", ext->ext_namep->cname);
   5177 	else
   5178 		(void) printf("%20s ", "NONE");
   5179 
   5180 	(void) printf("%5u ", ext->ext_seq);
   5181 
   5182 	switch (ext->ext_type) {
   5183 	case EXTTYP_ALLOC:
   5184 		(void) printf("%7s ", "ALLOC");
   5185 		break;
   5186 	case EXTTYP_FREE:
   5187 		(void) printf("%7s ", "FREE");
   5188 		break;
   5189 	case EXTTYP_RESERVED:
   5190 		(void) printf("%7s ", "RESV");
   5191 		break;
   5192 	case EXTTYP_END:
   5193 		(void) printf("%7s ", "END");
   5194 		break;
   5195 	default:
   5196 		(void) printf("%7s ", "INVLD");
   5197 		break;
   5198 	}
   5199 
   5200 	(void) printf("%20llu %20llu\n", ext->ext_offset, ext->ext_length);
   5201 }
   5202 
   5203 
   5204 /*
   5205  * FUNCTION:	meta_sp_checkseq()
   5206  * INPUT:	extlist	- list of extents to be checked
   5207  * OUTPUT:	none
   5208  * RETURNS:	int	- 0 - success, -1 - error
   5209  * PURPOSE:	check soft partition sequence numbers.  this function assumes
   5210  *		that a list of extents representing 1 or more soft partitions
   5211  *		is passed in sorted in sequence number order.  within a
   5212  *		single soft partition, there may not be any missing or
   5213  *		duplicate sequence numbers.
   5214  */
   5215 static int
   5216 meta_sp_checkseq(sp_ext_node_t *extlist)
   5217 {
   5218 	sp_ext_node_t *ext;
   5219 
   5220 	assert(extlist != NULL);
   5221 
   5222 	for (ext = extlist;
   5223 	    ext->ext_next != NULL && ext->ext_next->ext_type == EXTTYP_ALLOC;
   5224 	    ext = ext->ext_next) {
   5225 		if (ext->ext_next->ext_namep != NULL &&
   5226 		    strcmp(ext->ext_next->ext_namep->cname,
   5227 		    ext->ext_namep->cname) != 0)
   5228 				continue;
   5229 
   5230 		if (ext->ext_next->ext_seq != ext->ext_seq + 1) {
   5231 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5232 			    "%s: sequence numbers are "
   5233 			    "incorrect: %d should be %d\n"),
   5234 			    ext->ext_next->ext_namep->cname,
   5235 			    ext->ext_next->ext_seq, ext->ext_seq + 1);
   5236 			return (-1);
   5237 		}
   5238 	}
   5239 	return (0);
   5240 }
   5241 
   5242 
   5243 /*
   5244  * FUNCTION:	meta_sp_resolve_name_conflict()
   5245  * INPUT:	sp	- name of set we're are recovering in.
   5246  *		old_np	- name pointer of soft partition we found on disk.
   5247  * OUTPUT:	new_np	- name pointer for new soft partition name.
   5248  *		ep	- error pointer returned.
   5249  * RETURNS:	int	- 0 - name not replace, 1 - name replaced, -1 - error
   5250  * PURPOSE:	Check to see if the name of one of the soft partitions we found
   5251  *		on disk already exists in the metadb.  If so, prompt for a new
   5252  *		name.  In addition, we keep a static array of names that
   5253  *		will be recovered from this device since these names don't
   5254  *		exist in the configuration at this point but cannot be
   5255  *		recovered more than once.
   5256  */
   5257 static int
   5258 meta_sp_resolve_name_conflict(
   5259 	mdsetname_t	*sp,
   5260 	mdname_t	*old_np,
   5261 	mdname_t	**new_np,
   5262 	md_error_t	*ep
   5263 )
   5264 {
   5265 	char		yesno[255];
   5266 	char		*yes;
   5267 	char		newname[MD_SP_MAX_DEVNAME_PLUS_1];
   5268 	int		nunits;
   5269 	static int	*used_names = NULL;
   5270 
   5271 	assert(old_np != NULL);
   5272 
   5273 	if (used_names == NULL) {
   5274 		if ((nunits = meta_get_nunits(ep)) < 0)
   5275 			return (-1);
   5276 		used_names = Zalloc(nunits * sizeof (int));
   5277 	}
   5278 
   5279 	/* see if it exists already */
   5280 	if (used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] == 0 &&
   5281 	    metagetmiscname(old_np, ep) == NULL) {
   5282 		if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
   5283 			return (-1);
   5284 		else {
   5285 			used_names[MD_MIN2UNIT(meta_getminor(old_np->dev))] = 1;
   5286 			mdclrerror(ep);
   5287 			return (0);
   5288 		}
   5289 	}
   5290 
   5291 	/* name exists, ask the user for a new one */
   5292 	(void) printf(dgettext(TEXT_DOMAIN,
   5293 	    "WARNING: A soft partition named %s was found in the extent\n"
   5294 	    "headers, but this name already exists in the metadb "
   5295 	    "configuration.\n"
   5296 	    "In order to continue recovery you must supply\n"
   5297 	    "a new name for this soft partition.\n"), old_np->cname);
   5298 	(void) printf(dgettext(TEXT_DOMAIN,
   5299 	    "Would you like to continue and supply a new name? (yes/no) "));
   5300 
   5301 	(void) fflush(stdout);
   5302 	if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
   5303 	    (strlen(yesno) == 1))
   5304 		(void) snprintf(yesno, sizeof (yesno), "%s\n",
   5305 		    dgettext(TEXT_DOMAIN, "no"));
   5306 	yes = dgettext(TEXT_DOMAIN, "yes");
   5307 	if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
   5308 		return (-1);
   5309 	}
   5310 
   5311 	(void) fflush(stdin);
   5312 
   5313 	/* get the new name */
   5314 	for (;;) {
   5315 		(void) printf(dgettext(TEXT_DOMAIN, "Please enter a new name "
   5316 		    "for this soft partition (dXXXX) "));
   5317 		(void) fflush(stdout);
   5318 		if (fgets(newname, MD_SP_MAX_DEVNAME_PLUS_1, stdin) == NULL)
   5319 			(void) strcpy(newname, "");
   5320 
   5321 		/* remove newline character */
   5322 		if (newname[strlen(newname) - 1] == '\n')
   5323 			newname[strlen(newname) - 1] = '\0';
   5324 
   5325 		if (!(is_metaname(newname)) ||
   5326 		    (meta_init_make_device(&sp, newname, ep) <= 0)) {
   5327 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5328 			    "Invalid metadevice name\n"));
   5329 			(void) fflush(stderr);
   5330 			continue;
   5331 		}
   5332 
   5333 		if ((*new_np = metaname(&sp, newname,
   5334 		    META_DEVICE, ep)) == NULL) {
   5335 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5336 			    "Invalid metadevice name\n"));
   5337 			(void) fflush(stderr);
   5338 			continue;
   5339 		}
   5340 
   5341 		assert(MD_MIN2UNIT(meta_getminor((*new_np)->dev)) < nunits);
   5342 		/* make sure the name isn't already being used */
   5343 		if (used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] ||
   5344 		    metagetmiscname(*new_np, ep) != NULL) {
   5345 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5346 			    "That name already exists\n"));
   5347 			continue;
   5348 		} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP))
   5349 			return (-1);
   5350 
   5351 		break;
   5352 	}
   5353 
   5354 	/* got a new name, place in used array and return */
   5355 	used_names[MD_MIN2UNIT(meta_getminor((*new_np)->dev))] = 1;
   5356 	mdclrerror(ep);
   5357 	return (1);
   5358 }
   5359 
   5360 /*
   5361  * FUNCTION:	meta_sp_validate_wm()
   5362  * INPUT:	sp	- set name we are recovering in
   5363  *		compnp	- name pointer for device we are recovering from
   5364  *		options	- metarecover options
   5365  * OUTPUT:	ep	- error pointer returned
   5366  * RETURNS:	int	- 0 - success, -1 - error
   5367  * PURPOSE:	validate and display watermark configuration.  walk the
   5368  *		on-disk watermark structures and validate the information
   5369  *		found within.  since a watermark configuration is
   5370  *		"self-defining", the act of traversing the watermarks
   5371  *		is part of the validation process.
   5372  */
   5373 static int
   5374 meta_sp_validate_wm(
   5375 	mdsetname_t	*sp,
   5376 	mdname_t	*compnp,
   5377 	mdcmdopts_t	options,
   5378 	md_error_t	*ep
   5379 )
   5380 {
   5381 	sp_ext_node_t	*extlist = NULL;
   5382 	sp_ext_node_t	*ext;
   5383 	int		num_sps = 0;
   5384 	int		rval;
   5385 
   5386 	if ((options & MDCMD_VERBOSE) != 0)
   5387 		(void) printf(dgettext(TEXT_DOMAIN,
   5388 		    "Verifying on-disk structures on %s.\n"),
   5389 		    compnp->cname);
   5390 
   5391 	/*
   5392 	 * for each watermark, build an ext_node, place on list.
   5393 	 */
   5394 	rval = meta_sp_extlist_from_wm(sp, compnp, &extlist,
   5395 	    meta_sp_cmp_by_nameseq, ep);
   5396 
   5397 	if ((options & MDCMD_VERBOSE) != 0) {
   5398 		/* print out what we found */
   5399 		if (extlist == NULL)
   5400 			(void) printf(dgettext(TEXT_DOMAIN,
   5401 			    "No extent headers found on %s.\n"),
   5402 			    compnp->cname);
   5403 		else {
   5404 			(void) printf(dgettext(TEXT_DOMAIN,
   5405 			    "The following extent headers were found on %s.\n"),
   5406 			    compnp->cname);
   5407 			meta_sp_display_exthdr();
   5408 		}
   5409 		for (ext = extlist; ext != NULL; ext = ext->ext_next)
   5410 			meta_sp_display_ext(ext);
   5411 	}
   5412 
   5413 	if (rval < 0) {
   5414 		(void) printf(dgettext(TEXT_DOMAIN,
   5415 		    "%s: On-disk structures invalid or "
   5416 		    "no soft partitions found.\n"),
   5417 		    compnp->cname);
   5418 		return (-1);
   5419 	}
   5420 
   5421 	assert(extlist != NULL);
   5422 
   5423 	/* count number of soft partitions */
   5424 	for (ext = extlist;
   5425 	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
   5426 	    ext = ext->ext_next) {
   5427 		if (ext->ext_next != NULL &&
   5428 		    ext->ext_next->ext_namep != NULL &&
   5429 		    strcmp(ext->ext_next->ext_namep->cname,
   5430 		    ext->ext_namep->cname) == 0)
   5431 				continue;
   5432 		num_sps++;
   5433 	}
   5434 
   5435 	if ((options & MDCMD_VERBOSE) != 0)
   5436 		(void) printf(dgettext(TEXT_DOMAIN,
   5437 		    "Found %d soft partition(s) on %s.\n"), num_sps,
   5438 		    compnp->cname);
   5439 
   5440 	if (num_sps == 0) {
   5441 		(void) printf(dgettext(TEXT_DOMAIN,
   5442 		    "%s: No soft partitions.\n"), compnp->cname);
   5443 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5444 	}
   5445 
   5446 	/* check sequence numbers */
   5447 	if ((options & MDCMD_VERBOSE) != 0)
   5448 		(void) printf(dgettext(TEXT_DOMAIN,
   5449 		    "Checking sequence numbers.\n"));
   5450 
   5451 	if (meta_sp_checkseq(extlist) != 0)
   5452 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5453 
   5454 	return (0);
   5455 }
   5456 
   5457 /*
   5458  * FUNCTION:	meta_sp_validate_unit()
   5459  * INPUT:	sp	- name of set we are recovering in
   5460  *		compnp	- name of component we are recovering from
   5461  *		options	- metarecover options
   5462  * OUTPUT:	ep	- error pointer returned
   5463  * RETURNS:	int	- 0 - success, -1 - error
   5464  * PURPOSE:	validate and display metadb configuration.  begin by getting
   5465  *		all soft partitions built on the specified component.  get
   5466  *		the unit structure for each one and validate the fields within.
   5467  */
   5468 static int
   5469 meta_sp_validate_unit(
   5470 	mdsetname_t	*sp,
   5471 	mdname_t	*compnp,
   5472 	mdcmdopts_t	options,
   5473 	md_error_t	*ep
   5474 )
   5475 {
   5476 	md_sp_t		*msp;
   5477 	mdnamelist_t	*spnlp = NULL;
   5478 	mdnamelist_t	*namep = NULL;
   5479 	int		count;
   5480 	uint_t		extn;
   5481 	sp_ext_length_t	size;
   5482 
   5483 	if ((options & MDCMD_VERBOSE) != 0)
   5484 		(void) printf(dgettext(TEXT_DOMAIN,
   5485 		    "%s: Validating soft partition metadb entries.\n"),
   5486 		    compnp->cname);
   5487 
   5488 	if ((size = metagetsize(compnp, ep)) == MD_DISKADDR_ERROR)
   5489 		return (-1);
   5490 
   5491 	/* get all soft partitions on component */
   5492 	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
   5493 
   5494 	if (count == 0) {
   5495 		(void) printf(dgettext(TEXT_DOMAIN,
   5496 		    "%s: No soft partitions.\n"), compnp->cname);
   5497 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5498 	} else if (count < 0) {
   5499 		return (-1);
   5500 	}
   5501 
   5502 	/* Now go through the soft partitions and check each one */
   5503 	for (namep = spnlp; namep != NULL; namep = namep->next) {
   5504 		mdname_t	*curnp = namep->namep;
   5505 		sp_ext_offset_t	curvoff;
   5506 
   5507 		/* get the unit structure */
   5508 		if ((msp = meta_get_sp_common(sp, curnp, 0, ep)) == NULL)
   5509 			return (-1);
   5510 
   5511 		/* verify generic unit structure parameters */
   5512 		if ((options & MDCMD_VERBOSE) != 0)
   5513 			(void) printf(dgettext(TEXT_DOMAIN,
   5514 			    "\nVerifying device %s.\n"),
   5515 			    curnp->cname);
   5516 
   5517 		/*
   5518 		 * MD_SP_LAST is an invalid state and is always the
   5519 		 * highest numbered.
   5520 		 */
   5521 		if (msp->status >= MD_SP_LAST) {
   5522 			(void) printf(dgettext(TEXT_DOMAIN,
   5523 			    "%s: status value %u is out of range.\n"),
   5524 			    curnp->cname, msp->status);
   5525 			return (mdmderror(ep, MDE_RECOVER_FAILED,
   5526 			    0, curnp->cname));
   5527 		} else if ((options & MDCMD_VERBOSE) != 0) {
   5528 			uint_t	tstate = 0;
   5529 
   5530 			if (metaismeta(msp->compnamep)) {
   5531 				if (meta_get_tstate(msp->common.namep->dev,
   5532 				    &tstate, ep) != 0)
   5533 					return (-1);
   5534 			}
   5535 			(void) printf(dgettext(TEXT_DOMAIN,
   5536 			    "%s: Status \"%s\" is valid.\n"),
   5537 			    curnp->cname, meta_sp_status_to_name(msp->status,
   5538 			    tstate & MD_DEV_ERRORED));
   5539 		}
   5540 
   5541 		/* Now verify each extent */
   5542 		if ((options & MDCMD_VERBOSE) != 0)
   5543 			(void) printf("%14s %21s %21s %21s\n",
   5544 			    dgettext(TEXT_DOMAIN, "Extent Number"),
   5545 			    dgettext(TEXT_DOMAIN, "Virtual Offset"),
   5546 			    dgettext(TEXT_DOMAIN, "Physical Offset"),
   5547 			    dgettext(TEXT_DOMAIN, "Length"));
   5548 
   5549 		curvoff = 0ULL;
   5550 		for (extn = 0; extn < msp->ext.ext_len; extn++) {
   5551 			md_sp_ext_t	*extp = &msp->ext.ext_val[extn];
   5552 
   5553 			if ((options & MDCMD_VERBOSE) != 0)
   5554 				(void) printf("%14u %21llu %21llu %21llu\n",
   5555 				    extn, extp->voff, extp->poff, extp->len);
   5556 
   5557 			if (extp->voff != curvoff) {
   5558 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5559 				    "%s: virtual offset for extent %u "
   5560 				    "is inconsistent, expected %llu, "
   5561 				    "got %llu.\n"), curnp->cname, extn,
   5562 				    curvoff, extp->voff);
   5563 				return (mdmderror(ep, MDE_RECOVER_FAILED,
   5564 				    0, compnp->cname));
   5565 			}
   5566 
   5567 			/* make sure extent does not drop off the end */
   5568 			if ((extp->poff + extp->len) == size) {
   5569 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5570 				    "%s: extent %u at offset %llu, "
   5571 				    "length %llu exceeds the size of the "
   5572 				    "device, %llu.\n"), curnp->cname,
   5573 				    extn, extp->poff, extp->len, size);
   5574 				return (mdmderror(ep, MDE_RECOVER_FAILED,
   5575 				    0, compnp->cname));
   5576 			}
   5577 
   5578 			curvoff += extp->len;
   5579 		}
   5580 	}
   5581 	if (options & MDCMD_PRINT) {
   5582 		(void) printf(dgettext(TEXT_DOMAIN,
   5583 		    "%s: Soft Partition metadb configuration is valid\n"),
   5584 		    compnp->cname);
   5585 	}
   5586 	return (0);
   5587 }
   5588 
   5589 /*
   5590  * FUNCTION:	meta_sp_validate_wm_and_unit()
   5591  * INPUT:	sp	- name of set we are recovering in
   5592  *		compnp	- name of device we are recovering from
   5593  *		options	- metarecover options
   5594  * OUTPUT:	ep	- error pointer returned
   5595  * RETURNS:	int	- 0 - success, -1 error
   5596  * PURPOSE:	cross-validate and display watermarks and metadb records.
   5597  *		get both the unit structures for the soft partitions built
   5598  *		on the specified component and the watermarks found on that
   5599  *		component and check to make sure they are consistent with
   5600  *		each other.
   5601  */
   5602 static int
   5603 meta_sp_validate_wm_and_unit(
   5604 	mdsetname_t	*sp,
   5605 	mdname_t	*np,
   5606 	mdcmdopts_t	options,
   5607 	md_error_t	*ep
   5608 )
   5609 {
   5610 	sp_ext_node_t	*wmlist = NULL;
   5611 	sp_ext_node_t	*unitlist = NULL;
   5612 	sp_ext_node_t	*unitext;
   5613 	sp_ext_node_t	*wmext;
   5614 	sp_ext_offset_t	tmpunitoff;
   5615 	mdnamelist_t	*spnlp = NULL;
   5616 	int		count;
   5617 	int		rval = 0;
   5618 	int		verbose = (options & MDCMD_VERBOSE);
   5619 
   5620 	/* get unit structure list */
   5621 	count = meta_sp_get_by_component(sp, np, &spnlp, 0, ep);
   5622 	if (count <= 0)
   5623 		return (-1);
   5624 
   5625 	meta_sp_list_insert(NULL, NULL, &unitlist,
   5626 	    metagetsize(np, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
   5627 	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   5628 
   5629 	if (meta_sp_extlist_from_namelist(sp, spnlp, &unitlist, ep) == -1) {
   5630 		metafreenamelist(spnlp);
   5631 		return (-1);
   5632 	}
   5633 
   5634 	metafreenamelist(spnlp);
   5635 
   5636 	meta_sp_list_freefill(&unitlist, metagetsize(np, ep));
   5637 
   5638 	if (meta_sp_extlist_from_wm(sp, np, &wmlist,
   5639 	    meta_sp_cmp_by_offset, ep) < 0) {
   5640 		meta_sp_list_free(&unitlist);
   5641 		return (-1);
   5642 	}
   5643 
   5644 	if (getenv(META_SP_DEBUG)) {
   5645 		meta_sp_debug("meta_sp_validate_wm_and_unit: unit list:\n");
   5646 		meta_sp_list_dump(unitlist);
   5647 		meta_sp_debug("meta_sp_validate_wm_and_unit: wm list:\n");
   5648 		meta_sp_list_dump(wmlist);
   5649 	}
   5650 
   5651 	/*
   5652 	 * step through both lists and compare allocated nodes.  Free
   5653 	 * nodes and end watermarks may differ between the two but
   5654 	 * that's generally ok, and if they're wrong will typically
   5655 	 * cause misplaced allocated extents.
   5656 	 */
   5657 	if (verbose)
   5658 		(void) printf(dgettext(TEXT_DOMAIN, "\n%s: Verifying metadb "
   5659 		    "allocations match extent headers.\n"), np->cname);
   5660 
   5661 	unitext = unitlist;
   5662 	wmext = wmlist;
   5663 	while ((wmext != NULL) && (unitext != NULL)) {
   5664 		/* find next allocated extents in each list */
   5665 		while (wmext != NULL && wmext->ext_type != EXTTYP_ALLOC)
   5666 			wmext = wmext->ext_next;
   5667 
   5668 		while (unitext != NULL && unitext->ext_type != EXTTYP_ALLOC)
   5669 			unitext = unitext->ext_next;
   5670 
   5671 		if (wmext == NULL || unitext == NULL)
   5672 			break;
   5673 
   5674 		if (verbose) {
   5675 			(void) printf(dgettext(TEXT_DOMAIN,
   5676 			    "Metadb extent:\n"));
   5677 			meta_sp_display_exthdr();
   5678 			meta_sp_display_ext(unitext);
   5679 			(void) printf(dgettext(TEXT_DOMAIN,
   5680 			    "Extent header extent:\n"));
   5681 			meta_sp_display_exthdr();
   5682 			meta_sp_display_ext(wmext);
   5683 			(void) printf("\n");
   5684 		}
   5685 
   5686 		if (meta_sp_validate_exts(np, wmext, unitext, ep) < 0)
   5687 			rval = -1;
   5688 
   5689 		/*
   5690 		 * if the offsets aren't equal, only increment the
   5691 		 * lowest one in hopes of getting the lists back in sync.
   5692 		 */
   5693 		tmpunitoff = unitext->ext_offset;
   5694 		if (unitext->ext_offset <= wmext->ext_offset)
   5695 			unitext = unitext->ext_next;
   5696 		if (wmext->ext_offset <= tmpunitoff)
   5697 			wmext = wmext->ext_next;
   5698 	}
   5699 
   5700 	/*
   5701 	 * if both lists aren't at the end then there are extra
   5702 	 * allocated nodes in one of them.
   5703 	 */
   5704 	if (wmext != NULL) {
   5705 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5706 		    "%s: extent headers contain allocations not in "
   5707 		    "the metadb\n\n"), np->cname);
   5708 		rval = -1;
   5709 	}
   5710 
   5711 	if (unitext != NULL) {
   5712 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5713 		    "%s: metadb contains allocations not in the extent "
   5714 		    "headers\n\n"), np->cname);
   5715 		rval = -1;
   5716 	}
   5717 
   5718 	if (options & MDCMD_PRINT) {
   5719 		if (rval == 0) {
   5720 			(void) printf(dgettext(TEXT_DOMAIN,
   5721 			    "%s: Soft Partition metadb matches extent "
   5722 			    "header configuration\n"), np->cname);
   5723 		} else {
   5724 			(void) printf(dgettext(TEXT_DOMAIN,
   5725 			    "%s: Soft Partition metadb does not match extent "
   5726 			    "header configuration\n"), np->cname);
   5727 		}
   5728 	}
   5729 
   5730 	return (rval);
   5731 }
   5732 
   5733 /*
   5734  * FUNCTION:	meta_sp_validate_exts()
   5735  * INPUT:	compnp	- name pointer for device we are recovering from
   5736  *		wmext	- extent node representing watermark
   5737  *		unitext	- extent node from unit structure
   5738  * OUTPUT:	ep	- return error pointer
   5739  * RETURNS:	int	- 0 - succes, mdmderror return code - error
   5740  * PURPOSE:	Takes two extent nodes and checks them against each other.
   5741  *		offset, length, sequence number, set, and name are compared.
   5742  */
   5743 static int
   5744 meta_sp_validate_exts(
   5745 	mdname_t	*compnp,
   5746 	sp_ext_node_t	*wmext,
   5747 	sp_ext_node_t	*unitext,
   5748 	md_error_t	*ep
   5749 )
   5750 {
   5751 	if (wmext->ext_offset != unitext->ext_offset) {
   5752 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5753 		    "%s: unit structure and extent header offsets differ.\n"),
   5754 		    compnp->cname);
   5755 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5756 	}
   5757 
   5758 	if (wmext->ext_length != unitext->ext_length) {
   5759 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5760 		    "%s: unit structure and extent header lengths differ.\n"),
   5761 		    compnp->cname);
   5762 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5763 	}
   5764 
   5765 	if (wmext->ext_seq != unitext->ext_seq) {
   5766 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5767 		    "%s: unit structure and extent header sequence numbers "
   5768 		    "differ.\n"), compnp->cname);
   5769 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5770 	}
   5771 
   5772 	if (wmext->ext_type != unitext->ext_type) {
   5773 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5774 		    "%s: unit structure and extent header types differ.\n"),
   5775 		    compnp->cname);
   5776 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5777 	}
   5778 
   5779 	/*
   5780 	 * If one has a set pointer and the other doesn't, error.
   5781 	 * If both extents have setnames, then make sure they match
   5782 	 * If both are NULL, it's ok, they match.
   5783 	 */
   5784 	if ((unitext->ext_setp == NULL) ^ (wmext->ext_setp == NULL)) {
   5785 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5786 		    "%s: unit structure and extent header set values "
   5787 		    "differ.\n"), compnp->cname);
   5788 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5789 	}
   5790 
   5791 	if (unitext->ext_setp != NULL) {
   5792 		if (strcmp(unitext->ext_setp->setname,
   5793 		    wmext->ext_setp->setname) != 0) {
   5794 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5795 			    "%s: unit structure and extent header set names "
   5796 			    "differ.\n"), compnp->cname);
   5797 			return (mdmderror(ep, MDE_RECOVER_FAILED,
   5798 			    0, compnp->cname));
   5799 		}
   5800 	}
   5801 
   5802 	/*
   5803 	 * If one has a name pointer and the other doesn't, error.
   5804 	 * If both extents have names, then make sure they match
   5805 	 * If both are NULL, it's ok, they match.
   5806 	 */
   5807 	if ((unitext->ext_namep == NULL) ^ (wmext->ext_namep == NULL)) {
   5808 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5809 		    "%s: unit structure and extent header name values "
   5810 		    "differ.\n"), compnp->cname);
   5811 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5812 	}
   5813 
   5814 	if (unitext->ext_namep != NULL) {
   5815 		if (strcmp(wmext->ext_namep->cname,
   5816 		    unitext->ext_namep->cname) != 0) {
   5817 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5818 			    "%s: unit structure and extent header names "
   5819 			    "differ.\n"), compnp->cname);
   5820 			return (mdmderror(ep, MDE_RECOVER_FAILED,
   5821 			    0, compnp->cname));
   5822 		}
   5823 	}
   5824 
   5825 	return (0);
   5826 }
   5827 
   5828 /*
   5829  * FUNCTION:	update_sp_status()
   5830  * INPUT:	sp	- name of set we are recovering in
   5831  *		minors	- pointer to an array of soft partition minor numbers
   5832  *		num_sps	- number of minor numbers in array
   5833  *		status	- new status to be applied to all soft parts in array
   5834  *		mn_set	- set if current set is a multi-node set
   5835  * OUTPUT:	ep	- return error pointer
   5836  * RETURNS:	int	- 0 - success, -1 - error
   5837  * PURPOSE:	update  status of soft partitions to new status. minors is an
   5838  *		array of minor numbers to apply the new status to.
   5839  *		If mn_set is set, a message is sent to all nodes in the
   5840  *		cluster to update the status locally.
   5841  */
   5842 static int
   5843 update_sp_status(
   5844 	mdsetname_t	*sp,
   5845 	minor_t		*minors,
   5846 	int		num_sps,
   5847 	sp_status_t	status,
   5848 	bool_t		mn_set,
   5849 	md_error_t	*ep
   5850 )
   5851 {
   5852 	int	i;
   5853 	int	err = 0;
   5854 
   5855 	if (mn_set) {
   5856 		md_mn_msg_sp_setstat_t	sp_setstat_params;
   5857 		int			result;
   5858 		md_mn_result_t		*resp = NULL;
   5859 
   5860 		for (i = 0; i < num_sps; i++) {
   5861 			sp_setstat_params.sp_setstat_mnum = minors[i];
   5862 			sp_setstat_params.sp_setstat_status = status;
   5863 
   5864 			result = mdmn_send_message(sp->setno,
   5865 			    MD_MN_MSG_SP_SETSTAT, MD_MSGF_DEFAULT_FLAGS, 0,
   5866 			    (char *)&sp_setstat_params,
   5867 			    sizeof (sp_setstat_params),
   5868 			    &resp, ep);
   5869 			if (resp != NULL) {
   5870 				if (resp->mmr_exitval != 0)
   5871 					err = -1;
   5872 				free_result(resp);
   5873 			}
   5874 			if (result != 0) {
   5875 				err = -1;
   5876 			}
   5877 		}
   5878 	} else {
   5879 		if (meta_sp_setstatus(sp, minors, num_sps, status, ep) < 0)
   5880 			err = -1;
   5881 	}
   5882 	if (err < 0) {
   5883 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   5884 		    "Error updating status on recovered soft "
   5885 		    "partitions.\n"));
   5886 	}
   5887 	return (err);
   5888 }
   5889 
   5890 /*
   5891  * FUNCTION:	meta_sp_recover_from_wm()
   5892  * INPUT:	sp	- name of set we are recovering in
   5893  *		compnp	- name pointer for component we are recovering from
   5894  *		options	- metarecover options
   5895  * OUTPUT:	ep	- return error pointer
   5896  * RETURNS:	int	- 0 - success, -1 - error
   5897  * PURPOSE:	update metadb records to match watermarks.  begin by getting
   5898  *		an extlist representing all soft partitions on the component.
   5899  *		then build a unit structure for each soft partition.
   5900  *		notify user of changes, then commit each soft partition to
   5901  *		the metadb one at a time in the "recovering" state.  update
   5902  *		any watermarks that may need it	(to reflect possible name
   5903  *		changes), and, finally, set the status of all recovered
   5904  *		partitions to the "OK" state at once.
   5905  */
   5906 static int
   5907 meta_sp_recover_from_wm(
   5908 	mdsetname_t	*sp,
   5909 	mdname_t	*compnp,
   5910 	mdcmdopts_t	options,
   5911 	md_error_t	*ep
   5912 )
   5913 {
   5914 	sp_ext_node_t		*extlist = NULL;
   5915 	sp_ext_node_t		*sp_list = NULL;
   5916 	sp_ext_node_t		*update_list = NULL;
   5917 	sp_ext_node_t		*ext;
   5918 	sp_ext_node_t		*sp_ext;
   5919 	mp_unit_t		*mp;
   5920 	mp_unit_t		**un_array;
   5921 	int			numexts = 0, num_sps = 0, i = 0;
   5922 	int			err = 0;
   5923 	int			not_recovered = 0;
   5924 	int			committed = 0;
   5925 	sp_ext_length_t		sp_length = 0LL;
   5926 	mdnamelist_t		*keynlp = NULL;
   5927 	mdname_t		*np;
   5928 	mdname_t		*new_np;
   5929 	int			new_name;
   5930 	md_set_params_t		set_params;
   5931 	minor_t			*minors = NULL;
   5932 	char			yesno[255];
   5933 	char			*yes;
   5934 	bool_t			mn_set = 0;
   5935 	md_set_desc		*sd;
   5936 	mm_unit_t		*mm;
   5937 	md_set_mmown_params_t	*ownpar = NULL;
   5938 	int			comp_is_mirror = 0;
   5939 
   5940 	/*
   5941 	 * if this component appears in another metadevice already, do
   5942 	 * NOT recover from it.
   5943 	 */
   5944 	if (meta_check_inmeta(sp, compnp, options, 0, -1, ep) != 0)
   5945 		return (-1);
   5946 
   5947 	/* set flag if dealing with a MN set */
   5948 	if (!metaislocalset(sp)) {
   5949 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
   5950 			return (-1);
   5951 		}
   5952 		if (MD_MNSET_DESC(sd))
   5953 			mn_set = 1;
   5954 	}
   5955 	/*
   5956 	 * for each watermark, build an ext_node, place on list.
   5957 	 */
   5958 	if (meta_sp_extlist_from_wm(sp, compnp, &extlist,
   5959 	    meta_sp_cmp_by_nameseq, ep) < 0)
   5960 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   5961 
   5962 	assert(extlist != NULL);
   5963 
   5964 	/* count number of soft partitions */
   5965 	for (ext = extlist;
   5966 	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
   5967 	    ext = ext->ext_next) {
   5968 		if (ext->ext_next != NULL &&
   5969 		    ext->ext_next->ext_namep != NULL &&
   5970 		    strcmp(ext->ext_next->ext_namep->cname,
   5971 		    ext->ext_namep->cname) == 0)
   5972 				continue;
   5973 		num_sps++;
   5974 	}
   5975 
   5976 	/* allocate array of unit structure pointers */
   5977 	un_array = Zalloc(num_sps * sizeof (mp_unit_t *));
   5978 
   5979 	/*
   5980 	 * build unit structures from list of ext_nodes.
   5981 	 */
   5982 	for (ext = extlist;
   5983 	    ext != NULL && ext->ext_type == EXTTYP_ALLOC;
   5984 	    ext = ext->ext_next) {
   5985 		meta_sp_list_insert(ext->ext_setp, ext->ext_namep,
   5986 		    &sp_list, ext->ext_offset, ext->ext_length,
   5987 		    ext->ext_type, ext->ext_seq, ext->ext_flags,
   5988 		    meta_sp_cmp_by_nameseq);
   5989 
   5990 		numexts++;
   5991 		sp_length += ext->ext_length - MD_SP_WMSIZE;
   5992 
   5993 		if (ext->ext_next != NULL &&
   5994 		    ext->ext_next->ext_namep != NULL &&
   5995 		    strcmp(ext->ext_next->ext_namep->cname,
   5996 		    ext->ext_namep->cname) == 0)
   5997 				continue;
   5998 
   5999 		/*
   6000 		 * if we made it here, we are at a soft partition
   6001 		 * boundary in the list.
   6002 		 */
   6003 		if (getenv(META_SP_DEBUG)) {
   6004 			meta_sp_debug("meta_recover_from_wm: dumping wm "
   6005 			    "list:\n");
   6006 			meta_sp_list_dump(sp_list);
   6007 		}
   6008 
   6009 		assert(sp_list != NULL);
   6010 		assert(sp_list->ext_namep != NULL);
   6011 
   6012 		if ((new_name = meta_sp_resolve_name_conflict(sp,
   6013 		    sp_list->ext_namep, &new_np, ep)) < 0) {
   6014 			err = 1;
   6015 			goto out;
   6016 		} else if (new_name) {
   6017 			for (sp_ext = sp_list;
   6018 			    sp_ext != NULL;
   6019 			    sp_ext = sp_ext->ext_next) {
   6020 				/*
   6021 				 * insert into the update list for
   6022 				 * watermark update.
   6023 				 */
   6024 				meta_sp_list_insert(sp_ext->ext_setp,
   6025 				    new_np, &update_list, sp_ext->ext_offset,
   6026 				    sp_ext->ext_length, sp_ext->ext_type,
   6027 				    sp_ext->ext_seq, EXTFLG_UPDATE,
   6028 				    meta_sp_cmp_by_offset);
   6029 			}
   6030 
   6031 		}
   6032 		if (options & MDCMD_DOIT) {
   6033 			/* store name in namespace */
   6034 			if (mn_set) {
   6035 				/* send message to all nodes to return key */
   6036 				md_mn_msg_addkeyname_t	*send_params;
   6037 				int			result;
   6038 				md_mn_result_t		*resp = NULL;
   6039 				int			message_size;
   6040 
   6041 				message_size =  sizeof (*send_params) +
   6042 				    strlen(compnp->cname) + 1;
   6043 				send_params = Zalloc(message_size);
   6044 				send_params->addkeyname_setno = sp->setno;
   6045 				(void) strcpy(&send_params->addkeyname_name[0],
   6046 				    compnp->cname);
   6047 				result = mdmn_send_message(sp->setno,
   6048 				    MD_MN_MSG_ADDKEYNAME, MD_MSGF_DEFAULT_FLAGS,
   6049 				    0, (char *)send_params, message_size, &resp,
   6050 				    ep);
   6051 				Free(send_params);
   6052 				if (resp != NULL) {
   6053 					if (resp->mmr_exitval >= 0) {
   6054 						compnp->key =
   6055 						    (mdkey_t)resp->mmr_exitval;
   6056 					} else {
   6057 						err = 1;
   6058 						free_result(resp);
   6059 						goto out;
   6060 					}
   6061 					free_result(resp);
   6062 				}
   6063 				if (result != 0) {
   6064 					err = 1;
   6065 					goto out;
   6066 				}
   6067 				(void) metanamelist_append(&keynlp, compnp);
   6068 			} else {
   6069 				if (add_key_name(sp, compnp, &keynlp,
   6070 				    ep) != 0) {
   6071 					err = 1;
   6072 					goto out;
   6073 				}
   6074 			}
   6075 		}
   6076 
   6077 		/* create the unit structure */
   6078 		if ((mp = meta_sp_createunit(
   6079 		    (new_name) ? new_np : sp_list->ext_namep, compnp,
   6080 		    sp_list, numexts, sp_length, MD_SP_RECOVER, ep)) == NULL) {
   6081 			err = 1;
   6082 			goto out;
   6083 		}
   6084 
   6085 		if (getenv(META_SP_DEBUG)) {
   6086 			meta_sp_debug("meta_sp_recover_from_wm: "
   6087 			    "printing newly created unit structure");
   6088 			meta_sp_printunit(mp);
   6089 		}
   6090 
   6091 		/* place in unit structure array */
   6092 		un_array[i++] = mp;
   6093 
   6094 		/* free sp_list */
   6095 		meta_sp_list_free(&sp_list);
   6096 		sp_list = NULL;
   6097 		numexts = 0;
   6098 		sp_length = 0LL;
   6099 	}
   6100 
   6101 	/* display configuration updates */
   6102 	(void) printf(dgettext(TEXT_DOMAIN,
   6103 	    "The following soft partitions were found and will be added to\n"
   6104 	    "your metadevice configuration.\n"));
   6105 	(void) printf("%5s %15s %18s\n",
   6106 	    dgettext(TEXT_DOMAIN, "Name"),
   6107 	    dgettext(TEXT_DOMAIN, "Size"),
   6108 	    dgettext(TEXT_DOMAIN, "No. of Extents"));
   6109 	for (i = 0; i < num_sps; i++) {
   6110 		(void) printf("%5s%lu %15llu %9d\n", "d",
   6111 		    MD_MIN2UNIT(MD_SID(un_array[i])),
   6112 		    un_array[i]->un_length, un_array[i]->un_numexts);
   6113 	}
   6114 
   6115 	if (!(options & MDCMD_DOIT)) {
   6116 		not_recovered = 1;
   6117 		goto out;
   6118 	}
   6119 
   6120 	/* ask user for confirmation */
   6121 	(void) printf(dgettext(TEXT_DOMAIN,
   6122 	    "WARNING: You are about to add one or more soft partition\n"
   6123 	    "metadevices to your metadevice configuration.  If there\n"
   6124 	    "appears to be an error in the soft partition(s) displayed\n"
   6125 	    "above, do NOT proceed with this recovery operation.\n"));
   6126 	(void) printf(dgettext(TEXT_DOMAIN,
   6127 	    "Are you sure you want to do this (yes/no)? "));
   6128 
   6129 	(void) fflush(stdout);
   6130 	if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
   6131 	    (strlen(yesno) == 1))
   6132 		(void) snprintf(yesno, sizeof (yesno), "%s\n",
   6133 		    dgettext(TEXT_DOMAIN, "no"));
   6134 	yes = dgettext(TEXT_DOMAIN, "yes");
   6135 	if (strncasecmp(yesno, yes, strlen(yesno) - 1) != 0) {
   6136 		not_recovered = 1;
   6137 		goto out;
   6138 	}
   6139 
   6140 	/* commit records one at a time */
   6141 	for (i = 0; i < num_sps; i++) {
   6142 		(void) memset(&set_params, 0, sizeof (set_params));
   6143 		set_params.mnum = MD_SID(un_array[i]);
   6144 		set_params.size = (un_array[i])->c.un_size;
   6145 		set_params.mdp = (uintptr_t)(un_array[i]);
   6146 		set_params.options =
   6147 		    meta_check_devicesize(un_array[i]->un_length);
   6148 		if (set_params.options == MD_CRO_64BIT) {
   6149 			un_array[i]->c.un_revision |= MD_64BIT_META_DEV;
   6150 		} else {
   6151 			un_array[i]->c.un_revision &= ~MD_64BIT_META_DEV;
   6152 		}
   6153 		MD_SETDRIVERNAME(&set_params, MD_SP,
   6154 		    MD_MIN2SET(set_params.mnum));
   6155 
   6156 		np = metamnumname(&sp, MD_SID(un_array[i]), 0, ep);
   6157 
   6158 		/*
   6159 		 * If this is an MN set, send the MD_IOCSET ioctl to all nodes
   6160 		 */
   6161 		if (mn_set) {
   6162 			md_mn_msg_iocset_t	send_params;
   6163 			int			result;
   6164 			md_mn_result_t		*resp = NULL;
   6165 			int			mess_size;
   6166 
   6167 			/*
   6168 			 * Calculate message size. md_mn_msg_iocset_t only
   6169 			 * contains one extent, so increment the size to
   6170 			 * include all extents
   6171 			 */
   6172 			mess_size = sizeof (send_params) -
   6173 			    sizeof (mp_ext_t) +
   6174 			    (un_array[i]->un_numexts * sizeof (mp_ext_t));
   6175 
   6176 			send_params.iocset_params = set_params;
   6177 			(void) memcpy(&send_params.unit, un_array[i],
   6178 			    sizeof (*un_array[i]) - sizeof (mp_ext_t) +
   6179 			    (un_array[i]->un_numexts * sizeof (mp_ext_t)));
   6180 			result = mdmn_send_message(sp->setno,
   6181 			    MD_MN_MSG_IOCSET, MD_MSGF_DEFAULT_FLAGS, 0,
   6182 			    (char *)&send_params, mess_size, &resp,
   6183 			    ep);
   6184 			if (resp != NULL) {
   6185 				if (resp->mmr_exitval != 0)
   6186 					err = 1;
   6187 				free_result(resp);
   6188 			}
   6189 			if (result != 0) {
   6190 				err = 1;
   6191 			}
   6192 		} else {
   6193 			if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
   6194 			    np->cname) != 0) {
   6195 				err = 1;
   6196 			}
   6197 		}
   6198 
   6199 		if (err == 1) {
   6200 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   6201 			    "%s: Error committing record to metadb.\n"),
   6202 			    np->cname);
   6203 			goto out;
   6204 		}
   6205 
   6206 		/* note that we've committed a record */
   6207 		if (!committed)
   6208 			committed = 1;
   6209 
   6210 		/* update any watermarks that need it */
   6211 		if (update_list != NULL) {
   6212 			md_sp_t *msp;
   6213 
   6214 			/*
   6215 			 * Check to see if we're trying to create a partition
   6216 			 * on a mirror. If so we may have to enforce an
   6217 			 * ownership change before writing the watermark out.
   6218 			 */
   6219 			if (metaismeta(compnp)) {
   6220 				char *miscname;
   6221 
   6222 				miscname = metagetmiscname(compnp, ep);
   6223 				if (miscname != NULL)
   6224 					comp_is_mirror = (strcmp(miscname,
   6225 					    MD_MIRROR) == 0);
   6226 				else
   6227 					comp_is_mirror = 0;
   6228 			}
   6229 			/*
   6230 			 * If this is a MN set and the component is a mirror,
   6231 			 * change ownership to this node in order to write the
   6232 			 * watermarks
   6233 			 */
   6234 			if (mn_set && comp_is_mirror) {
   6235 				mm = (mm_unit_t *)meta_get_unit(sp, compnp, ep);
   6236 				if (mm == NULL) {
   6237 					err = 1;
   6238 					goto out;
   6239 				} else {
   6240 					err = meta_mn_change_owner(&ownpar,
   6241 					    sp->setno,
   6242 					    meta_getminor(compnp->dev),
   6243 					    sd->sd_mn_mynode->nd_nodeid,
   6244 					    MD_MN_MM_PREVENT_CHANGE |
   6245 					    MD_MN_MM_SPAWN_THREAD);
   6246 					if (err != 0)
   6247 						goto out;
   6248 				}
   6249 			}
   6250 
   6251 			if ((msp = meta_get_sp(sp, np, ep)) == NULL) {
   6252 				err = 1;
   6253 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   6254 				    "%s: Error updating extent headers.\n"),
   6255 				    np->cname);
   6256 				goto out;
   6257 			}
   6258 			if (meta_sp_update_wm(sp, msp, update_list, ep) < 0) {
   6259 				err = 1;
   6260 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   6261 				    "%s: Error updating extent headers "
   6262 				    "on disk.\n"), np->cname);
   6263 				goto out;
   6264 			}
   6265 		}
   6266 		/*
   6267 		 * If we have changed ownership earlier and prevented any
   6268 		 * ownership changes, we can now allow ownership changes
   6269 		 * again.
   6270 		 */
   6271 		if (ownpar) {
   6272 			(void) meta_mn_change_owner(&ownpar, sp->setno,
   6273 			    ownpar->d.mnum,
   6274 			    ownpar->d.owner,
   6275 			    MD_MN_MM_ALLOW_CHANGE | MD_MN_MM_SPAWN_THREAD);
   6276 		}
   6277 	}
   6278 
   6279 	/* update status of all soft partitions to OK */
   6280 	minors = Zalloc(num_sps * sizeof (minor_t));
   6281 	for (i = 0; i < num_sps; i++)
   6282 		minors[i] = MD_SID(un_array[i]);
   6283 
   6284 	err = update_sp_status(sp, minors, num_sps, MD_SP_OK, mn_set, ep);
   6285 	if (err != 0)
   6286 		goto out;
   6287 
   6288 	if (options & MDCMD_PRINT)
   6289 		(void) printf(dgettext(TEXT_DOMAIN, "%s: "
   6290 		    "Soft Partitions recovered from device.\n"),
   6291 		    compnp->cname);
   6292 out:
   6293 	/* free memory */
   6294 	if (extlist != NULL)
   6295 		meta_sp_list_free(&extlist);
   6296 	if (sp_list != NULL)
   6297 		meta_sp_list_free(&sp_list);
   6298 	if (update_list != NULL)
   6299 		meta_sp_list_free(&update_list);
   6300 	if (un_array != NULL)	{
   6301 		for (i = 0; i < num_sps; i++)
   6302 			Free(un_array[i]);
   6303 		Free(un_array);
   6304 	}
   6305 	if (minors != NULL)
   6306 		Free(minors);
   6307 	if (ownpar != NULL)
   6308 		Free(ownpar);
   6309 	(void) fflush(stdout);
   6310 
   6311 	if ((keynlp != NULL) && (committed != 1)) {
   6312 		/*
   6313 		 * if we haven't committed any softparts, either because of an
   6314 		 * error or because the user decided not to proceed, delete
   6315 		 * namelist key for the component
   6316 		 */
   6317 		if (mn_set) {
   6318 			mdnamelist_t	*p;
   6319 
   6320 			for (p = keynlp; (p != NULL); p = p->next) {
   6321 				mdname_t		*np = p->namep;
   6322 				md_mn_msg_delkeyname_t	send_params;
   6323 				md_mn_result_t		*resp = NULL;
   6324 
   6325 				send_params.delkeyname_dev = np->dev;
   6326 				send_params.delkeyname_setno = sp->setno;
   6327 				send_params.delkeyname_key = np->key;
   6328 				(void) mdmn_send_message(sp->setno,
   6329 				    MD_MN_MSG_DELKEYNAME, MD_MSGF_DEFAULT_FLAGS,
   6330 				    0, (char *)&send_params,
   6331 				    sizeof (send_params),
   6332 				    &resp, ep);
   6333 				if (resp != NULL) {
   6334 					free_result(resp);
   6335 				}
   6336 			}
   6337 		} else {
   6338 			(void) del_key_names(sp, keynlp, NULL);
   6339 		}
   6340 	}
   6341 
   6342 	metafreenamelist(keynlp);
   6343 
   6344 	if (err)
   6345 		return (mdmderror(ep, MDE_RECOVER_FAILED, 0, compnp->cname));
   6346 
   6347 	if (not_recovered)
   6348 		if (options & MDCMD_PRINT)
   6349 			(void) printf(dgettext(TEXT_DOMAIN, "%s: "
   6350 			    "Soft Partitions NOT recovered from device.\n"),
   6351 			    compnp->cname);
   6352 	return (0);
   6353 }
   6354 
   6355 /*
   6356  * FUNCTION:	meta_sp_recover_from_unit()
   6357  * INPUT:	sp	- name of set we are recovering in
   6358  *		compnp	- name of component we are recovering from
   6359  *		options	- metarecover options
   6360  * OUTPUT:	ep	- return error pointer
   6361  * RETURNS:	int	- 0 - success, -1 - error
   6362  * PURPOSE:	update watermarks to match metadb records.  begin by getting
   6363  *		a namelist representing all soft partitions on the specified
   6364  *		component.  then, build an extlist representing the soft
   6365  *		partitions, filling in the freespace extents.  notify user
   6366  *		of changes, place all soft partitions into the "recovering"
   6367  *		state and update the watermarks.  finally, return all soft
   6368  *		partitions to the "OK" state.
   6369  */
   6370 static int
   6371 meta_sp_recover_from_unit(
   6372 	mdsetname_t	*sp,
   6373 	mdname_t	*compnp,
   6374 	mdcmdopts_t	options,
   6375 	md_error_t	*ep
   6376 )
   6377 {
   6378 	mdnamelist_t	*spnlp = NULL;
   6379 	mdnamelist_t	*nlp = NULL;
   6380 	sp_ext_node_t	*ext = NULL;
   6381 	sp_ext_node_t	*extlist = NULL;
   6382 	int		count;
   6383 	char		yesno[255];
   6384 	char		*yes;
   6385 	int		rval = 0;
   6386 	minor_t		*minors = NULL;
   6387 	int		i;
   6388 	md_sp_t		*msp;
   6389 	md_set_desc	*sd;
   6390 	bool_t		mn_set = 0;
   6391 	daddr_t		start_block;
   6392 
   6393 	count = meta_sp_get_by_component(sp, compnp, &spnlp, 0, ep);
   6394 	if (count <= 0)
   6395 		return (-1);
   6396 
   6397 	/* set flag if dealing with a MN set */
   6398 	if (!metaislocalset(sp)) {
   6399 		if ((sd = metaget_setdesc(sp, ep)) == NULL) {
   6400 			return (-1);
   6401 		}
   6402 		if (MD_MNSET_DESC(sd))
   6403 			mn_set = 1;
   6404 	}
   6405 	/*
   6406 	 * Save the XDR unit structure for one of the soft partitions;
   6407 	 * we'll use this later to provide metadevice context to
   6408 	 * update the watermarks so the device can be resolved by
   6409 	 * devid instead of dev_t.
   6410 	 */
   6411 	if ((msp = meta_get_sp(sp, spnlp->namep, ep)) == NULL) {
   6412 		metafreenamelist(spnlp);
   6413 		return (-1);
   6414 	}
   6415 
   6416 	if ((start_block = meta_sp_get_start(sp, compnp, ep)) ==
   6417 	    MD_DISKADDR_ERROR) {
   6418 		return (-1);
   6419 	}
   6420 
   6421 	meta_sp_list_insert(NULL, NULL, &extlist, 0ULL, start_block,
   6422 	    EXTTYP_RESERVED, 0, 0, meta_sp_cmp_by_offset);
   6423 	meta_sp_list_insert(NULL, NULL, &extlist,
   6424 	    metagetsize(compnp, ep) - MD_SP_WMSIZE, MD_SP_WMSIZE,
   6425 	    EXTTYP_END, 0, EXTFLG_UPDATE, meta_sp_cmp_by_offset);
   6426 
   6427 	if (meta_sp_extlist_from_namelist(sp, spnlp, &extlist, ep) == -1) {
   6428 		metafreenamelist(spnlp);
   6429 		return (-1);
   6430 	}
   6431 
   6432 	assert(extlist != NULL);
   6433 	if ((options & MDCMD_VERBOSE) != 0) {
   6434 		(void) printf(dgettext(TEXT_DOMAIN,
   6435 		    "Updating extent headers on device %s from metadb.\n\n"),
   6436 		    compnp->cname);
   6437 		(void) printf(dgettext(TEXT_DOMAIN,
   6438 		    "The following extent headers will be written:\n"));
   6439 		meta_sp_display_exthdr();
   6440 	}
   6441 
   6442 	meta_sp_list_freefill(&extlist, metagetsize(compnp, ep));
   6443 
   6444 	for (ext = extlist; ext != NULL; ext = ext->ext_next) {
   6445 
   6446 		/* mark every node for updating except the reserved space */
   6447 		if (ext->ext_type != EXTTYP_RESERVED) {
   6448 			ext->ext_flags |= EXTFLG_UPDATE;
   6449 
   6450 			/* print extent information */
   6451 			if ((options & MDCMD_VERBOSE) != 0)
   6452 				meta_sp_display_ext(ext);
   6453 		}
   6454 	}
   6455 
   6456 	/* request verification and then update all watermarks */
   6457 	if ((options & MDCMD_DOIT) != 0) {
   6458 
   6459 		(void) printf(dgettext(TEXT_DOMAIN,
   6460 		    "\nWARNING: You are about to overwrite portions of %s\n"
   6461 		    "with soft partition metadata. The extent headers will be\n"
   6462 		    "written to match the existing metadb configuration.  If\n"
   6463 		    "the device was not previously setup with this\n"
   6464 		    "configuration, data loss may result.\n\n"),
   6465 		    compnp->cname);
   6466 		(void) printf(dgettext(TEXT_DOMAIN,
   6467 		    "Are you sure you want to do this (yes/no)? "));
   6468 
   6469 		(void) fflush(stdout);
   6470 		if ((fgets(yesno, sizeof (yesno), stdin) == NULL) ||
   6471 		    (strlen(yesno) == 1))
   6472 			(void) snprintf(yesno, sizeof (yesno),
   6473 			    "%s\n", dgettext(TEXT_DOMAIN, "no"));
   6474 		yes = dgettext(TEXT_DOMAIN, "yes");
   6475 		if (strncasecmp(yesno, yes, strlen(yesno) - 1) == 0) {
   6476 			/* place soft partitions into recovering state */
   6477 			minors = Zalloc(count * sizeof (minor_t));
   6478 			for (nlp = spnlp, i = 0;
   6479 			    nlp != NULL && i < count;
   6480 			    nlp = nlp->next, i++) {
   6481 				assert(nlp->namep != NULL);
   6482 				minors[i] = meta_getminor(nlp->namep->dev);
   6483 			}
   6484 			if (update_sp_status(sp, minors, count,
   6485 			    MD_SP_RECOVER, mn_set, ep) != 0) {
   6486 				rval = -1;
   6487 				goto out;
   6488 			}
   6489 
   6490 			/* update the watermarks */
   6491 			if (meta_sp_update_wm(sp, msp, extlist, ep) < 0) {
   6492 				rval = -1;
   6493 				goto out;
   6494 			}
   6495 
   6496 			if (options & MDCMD_PRINT) {
   6497 				(void) printf(dgettext(TEXT_DOMAIN, "%s: "
   6498 				    "Soft Partitions recovered from metadb\n"),
   6499 				    compnp->cname);
   6500 			}
   6501 
   6502 			/* return soft partitions to the OK state */
   6503 			if (update_sp_status(sp, minors, count,
   6504 			    MD_SP_OK, mn_set, ep) != 0) {
   6505 				rval = -1;
   6506 				goto out;
   6507 			}
   6508 
   6509 			rval = 0;
   6510 			goto out;
   6511 		}
   6512 	}
   6513 
   6514 	if (options & MDCMD_PRINT) {
   6515 		(void) printf(dgettext(TEXT_DOMAIN,
   6516 		    "%s: Soft Partitions NOT recovered from metadb\n"),
   6517 		    compnp->cname);
   6518 	}
   6519 
   6520 out:
   6521 	if (minors != NULL)
   6522 		Free(minors);
   6523 	metafreenamelist(spnlp);
   6524 	meta_sp_list_free(&extlist);
   6525 	(void) fflush(stdout);
   6526 	return (rval);
   6527 }
   6528 
   6529 
   6530 /*
   6531  * FUNCTION:	meta_sp_update_abr()
   6532  * INPUT:	sp	- name of set we are recovering in
   6533  * OUTPUT:	ep	- return error pointer
   6534  * RETURNS:	int	- 0 - success, -1 - error
   6535  * PURPOSE:	update the ABR state for all soft partitions in the set. This
   6536  *		is called when joining a set. It sends a message to the master
   6537  *		node for each soft partition to get the value of tstate and
   6538  *		then sets ABR ,if required, by opening the sp, setting ABR
   6539  *		and then closing the sp. This approach is taken rather that
   6540  *		just issuing the MD_MN_SET_CAP ioctl, in order to deal with
   6541  *		the case when we have another node simultaneously unsetting ABR.
   6542  */
   6543 int
   6544 meta_sp_update_abr(
   6545 	mdsetname_t	*sp,
   6546 	md_error_t	*ep
   6547 )
   6548 {
   6549 	mdnamelist_t	*devnlp = NULL;
   6550 	mdnamelist_t	*p;
   6551 	mdname_t	*devnp = NULL;
   6552 	md_unit_t	*un;
   6553 	char		fname[MAXPATHLEN];
   6554 	int		mnum, fd;
   6555 	volcap_t	vc;
   6556 	uint_t		tstate;
   6557 
   6558 
   6559 	if (meta_get_sp_names(sp, &devnlp, 0, ep) < 0) {
   6560 		return (-1);
   6561 	}
   6562 
   6563 	/* Exit if no soft partitions in this set */
   6564 	if (devnlp == NULL)
   6565 		return (0);
   6566 
   6567 	/* For each soft partition */
   6568 	for (p = devnlp; (p != NULL); p = p->next) {
   6569 		devnp = p->namep;
   6570 
   6571 		/* check if this is a top level metadevice */
   6572 		if ((un = meta_get_mdunit(sp, devnp, ep)) == NULL)
   6573 			goto out;
   6574 		if (MD_HAS_PARENT(MD_PARENT(un))) {
   6575 			Free(un);
   6576 			continue;
   6577 		}
   6578 		Free(un);
   6579 
   6580 		/* Get tstate from Master */
   6581 		if (meta_mn_send_get_tstate(devnp->dev, &tstate, ep) != 0) {
   6582 			mdname_t	*np;
   6583 			np = metamnumname(&sp, meta_getminor(devnp->dev), 0,
   6584 			    ep);
   6585 			if (np) {
   6586 				md_perror(dgettext(TEXT_DOMAIN,
   6587 				    "Unable to get tstate for %s"), np->cname);
   6588 			}
   6589 			continue;
   6590 		}
   6591 		/* If not set on the master, nothing to do */
   6592 		if (!(tstate & MD_ABR_CAP))
   6593 			continue;
   6594 
   6595 		mnum = meta_getminor(devnp->dev);
   6596 		(void) snprintf(fname, MAXPATHLEN, "/dev/md/%s/rdsk/d%u",
   6597 		    sp->setname, (unsigned)MD_MIN2UNIT(mnum));
   6598 		if ((fd = open(fname, O_RDWR, 0)) < 0) {
   6599 			md_perror(dgettext(TEXT_DOMAIN,
   6600 			    "Could not open device %s"), fname);
   6601 			continue;
   6602 		}
   6603 
   6604 		/* Set ABR state */
   6605 		vc.vc_info = 0;
   6606 		vc.vc_set = 0;
   6607 		if (ioctl(fd, DKIOCGETVOLCAP, &vc) < 0) {
   6608 			(void) close(fd);
   6609 			continue;
   6610 		}
   6611 
   6612 		vc.vc_set = DKV_ABR_CAP;
   6613 		if (ioctl(fd, DKIOCSETVOLCAP, &vc) < 0) {
   6614 			(void) close(fd);
   6615 			goto out;
   6616 		}
   6617 
   6618 		(void) close(fd);
   6619 	}
   6620 	metafreenamelist(devnlp);
   6621 	return (0);
   6622 out:
   6623 	metafreenamelist(devnlp);
   6624 	return (-1);
   6625 }
   6626 
   6627 /*
   6628  * FUNCTION:	meta_mn_sp_update_abr()
   6629  * INPUT:	arg	- Given set.
   6630  * PURPOSE:	update the ABR state for all soft partitions in the set by
   6631  *		forking a process to call meta_sp_update_abr()
   6632  *		This function is only called via rpc.metad when adding a node
   6633  *		to a set, ie this node is beong joined to the set by another
   6634  *		node.
   6635  */
   6636 void *
   6637 meta_mn_sp_update_abr(void *arg)
   6638 {
   6639 	set_t		setno = *((set_t *)arg);
   6640 	mdsetname_t	*sp;
   6641 	md_error_t	mde = mdnullerror;
   6642 	int		fval;
   6643 
   6644 	/* should have a set */
   6645 	assert(setno != NULL);
   6646 
   6647 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
   6648 		mde_perror(&mde, "");
   6649 		return (NULL);
   6650 	}
   6651 
   6652 	if (!(meta_is_mn_set(sp, &mde))) {
   6653 		mde_perror(&mde, "");
   6654 		return (NULL);
   6655 	}
   6656 
   6657 	/* fork a process */
   6658 	if ((fval = md_daemonize(sp, &mde)) != 0) {
   6659 		/*
   6660 		 * md_daemonize will fork off a process.  The is the
   6661 		 * parent or error.
   6662 		 */
   6663 		if (fval > 0) {
   6664 			return (NULL);
   6665 		}
   6666 		mde_perror(&mde, "");
   6667 		return (NULL);
   6668 	}
   6669 	/*
   6670 	 * Child process should never return back to rpc.metad, but
   6671 	 * should exit.
   6672 	 * Flush all internally cached data inherited from parent process
   6673 	 * since cached data will be cleared when parent process RPC request
   6674 	 * has completed (which is possibly before this child process
   6675 	 * can complete).
   6676 	 * Child process can retrieve and cache its own copy of data from
   6677 	 * rpc.metad that won't be changed by the parent process.
   6678 	 *
   6679 	 * Reset md_in_daemon since this child will be a client of rpc.metad
   6680 	 * not part of the rpc.metad daemon itself.
   6681 	 * md_in_daemon is used by rpc.metad so that libmeta can tell if
   6682 	 * this thread is rpc.metad or any other thread.  (If this thread
   6683 	 * was rpc.metad it could use some short circuit code to get data
   6684 	 * directly from rpc.metad instead of doing an RPC call to rpc.metad).
   6685 	 */
   6686 	md_in_daemon = 0;
   6687 	metaflushsetname(sp);
   6688 	sr_cache_flush_setno(setno);
   6689 	if ((sp = metasetnosetname(setno, &mde)) == NULL) {
   6690 		mde_perror(&mde, "");
   6691 		md_exit(sp, 1);
   6692 	}
   6693 
   6694 
   6695 	/*
   6696 	 * Closing stdin/out/err here.
   6697 	 */
   6698 	(void) close(0);
   6699 	(void) close(1);
   6700 	(void) close(2);
   6701 	assert(fval == 0);
   6702 
   6703 	(void) meta_sp_update_abr(sp, &mde);
   6704 
   6705 	md_exit(sp, 0);
   6706 	/*NOTREACHED*/
   6707 	return (NULL);
   6708 }
   6709 
   6710 int
   6711 meta_sp_check_component(
   6712 	mdsetname_t	*sp,
   6713 	mdname_t	*np,
   6714 	md_error_t	*ep
   6715 )
   6716 {
   6717 	md_sp_t	*msp;
   6718 	minor_t	mnum = 0;
   6719 	md_dev64_t	dev = 0;
   6720 	mdnm_params_t	nm;
   6721 	md_getdevs_params_t	mgd;
   6722 	side_t	sideno;
   6723 	char	*miscname;
   6724 	md_dev64_t	*mydev = NULL;
   6725 	char	*pname = NULL, *t;
   6726 	char	*ctd_name = NULL;
   6727 	char	*devname = NULL;
   6728 	int	len;
   6729 	int	rval = -1;
   6730 
   6731 	(void) memset(&nm, '\0', sizeof (nm));
   6732 	if ((msp = meta_get_sp_common(sp, np, 0, ep)) == NULL)
   6733 		return (-1);
   6734 
   6735 	if ((miscname = metagetmiscname(np, ep)) == NULL)
   6736 		return (-1);
   6737 
   6738 	sideno = getmyside(sp, ep);
   6739 
   6740 	meta_sp_debug("meta_sp_check_component: %s is on %s key: %d"
   6741 	    " dev: %llu\n",
   6742 	    np->cname, msp->compnamep->cname, msp->compnamep->key,
   6743 	    msp->compnamep->dev);
   6744 
   6745 	/*
   6746 	 * Now get the data from the unit structure. The compnamep stuff
   6747 	 * contains the data from the namespace and we need the un_dev
   6748 	 * from the unit structure.
   6749 	 */
   6750 	(void) memset(&mgd, '\0', sizeof (mgd));
   6751 	MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
   6752 	mgd.cnt = 1;		    /* sp's only have one subdevice */
   6753 	mgd.mnum = meta_getminor(np->dev);
   6754 
   6755 	mydev = Zalloc(sizeof (*mydev));
   6756 	mgd.devs = (uintptr_t)mydev;
   6757 
   6758 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
   6759 		meta_sp_debug("meta_sp_check_component: ioctl failed\n");
   6760 		(void) mdstealerror(ep, &mgd.mde);
   6761 		rval = 0;
   6762 		goto out;
   6763 	} else if (mgd.cnt <= 0) {
   6764 		assert(mgd.cnt >= 0);
   6765 		rval = 0;
   6766 		goto out;
   6767 	}
   6768 
   6769 	/* Get the devname from the name space. */
   6770 	if ((devname = meta_getnmentbykey(sp->setno, sideno,
   6771 	    msp->compnamep->key, NULL, &mnum, &dev, ep)) == NULL) {
   6772 		meta_sp_debug("meta_sp_check_component: key %d not"
   6773 		    "found\n", msp->compnamep->key);
   6774 		goto out;
   6775 	}
   6776 
   6777 	meta_sp_debug("dev %s from component: (%lu, %lu)\n",
   6778 	    devname,
   6779 	    meta_getmajor(*mydev),
   6780 	    meta_getminor(*mydev));
   6781 	meta_sp_debug("minor from the namespace: %lu\n", mnum);
   6782 
   6783 	if (mnum != meta_getminor(*mydev)) {
   6784 		/*
   6785 		 * The minor numbers are different. Update the namespace
   6786 		 * with the information from the component.
   6787 		 */
   6788 
   6789 		t = strrchr(devname, '/');
   6790 		t++;
   6791 		ctd_name = Strdup(t);
   6792 
   6793 		meta_sp_debug("meta_sp_check_component: ctd_name: %s\n",
   6794 		    ctd_name);
   6795 
   6796 		len = strlen(devname);
   6797 		t = strrchr(devname, '/');
   6798 		t++;
   6799 		pname = Zalloc((len - strlen(t)) + 1);
   6800 		(void) strncpy(pname, devname, (len - strlen(t)));
   6801 		meta_sp_debug("pathname: %s\n", pname);
   6802 
   6803 		meta_sp_debug("updating the minor number to %lu\n", nm.mnum);
   6804 
   6805 		if (meta_update_namespace(sp->setno, sideno,
   6806 		    ctd_name, *mydev, msp->compnamep->key, pname,
   6807 		    ep) != 0) {
   6808 			goto out;
   6809 		}
   6810 	}
   6811 out:
   6812 	if (pname != NULL)
   6813 		Free(pname);
   6814 	if (ctd_name != NULL)
   6815 		Free(ctd_name);
   6816 	if (devname != NULL)
   6817 		Free(devname);
   6818 	if (mydev != NULL)
   6819 		Free(mydev);
   6820 	return (rval);
   6821 }
   6822