Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Just in case we're not in a build environment, make sure that
     28  * TEXT_DOMAIN gets set to something.
     29  */
     30 #if !defined(TEXT_DOMAIN)
     31 #define	TEXT_DOMAIN "SYS_TEST"
     32 #endif
     33 
     34 /*
     35  * RAID operations
     36  */
     37 
     38 #include <stdlib.h>
     39 #include <meta.h>
     40 #include <sys/lvm/md_raid.h>
     41 #include <sys/lvm/mdvar.h>
     42 #include <sys/lvm/md_convert.h>
     43 #include <stddef.h>
     44 
     45 /*
     46  * FUNCTION:    meta_get_raid_names()
     47  * INPUT:       sp      - the set name to get raid from
     48  *              options - options from the command line
     49  * OUTPUT:      nlpp    - list of all raid names
     50  *              ep      - return error pointer
     51  * RETURNS:     int     - -1 if error, 0 success
     52  * PURPOSE:     returns a list of all raid in the metadb
     53  *              for all devices in the specified set
     54  */
     55 int
     56 meta_get_raid_names(
     57 	mdsetname_t	*sp,
     58 	mdnamelist_t	**nlpp,
     59 	int		options,
     60 	md_error_t	*ep
     61 )
     62 {
     63 	return (meta_get_names(MD_RAID, sp, nlpp, options, ep));
     64 }
     65 
     66 /*
     67  * free raid unit
     68  */
     69 void
     70 meta_free_raid(
     71 	md_raid_t	*raidp
     72 )
     73 {
     74 	if (raidp->cols.cols_val != NULL) {
     75 		assert(raidp->cols.cols_len > 0);
     76 		Free(raidp->cols.cols_val);
     77 	}
     78 	Free(raidp);
     79 }
     80 
     81 /*
     82  * get raid (common)
     83  */
     84 md_raid_t *
     85 meta_get_raid_common(
     86 	mdsetname_t		*sp,
     87 	mdname_t		*raidnp,
     88 	int			fast,
     89 	md_error_t		*ep
     90 )
     91 {
     92 	mddrivename_t		*dnp = raidnp->drivenamep;
     93 	char			*miscname;
     94 	mr_unit_t		*mr;
     95 	md_raid_t		*raidp;
     96 	uint_t			ncol;
     97 	uint_t			col;
     98 	md_resync_ioctl_t	ri;
     99 
    100 	/* must have set */
    101 	assert(sp != NULL);
    102 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
    103 
    104 	/* short circuit */
    105 	if (dnp->unitp != NULL) {
    106 		assert(dnp->unitp->type == MD_METARAID);
    107 		return ((md_raid_t *)dnp->unitp);
    108 	}
    109 
    110 	/* get miscname and unit */
    111 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
    112 		return (NULL);
    113 	if (strcmp(miscname, MD_RAID) != 0) {
    114 		(void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
    115 		    raidnp->cname);
    116 		return (NULL);
    117 	}
    118 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
    119 		return (NULL);
    120 	assert(mr->c.un_type == MD_METARAID);
    121 
    122 	/* allocate raid */
    123 	raidp = Zalloc(sizeof (*raidp));
    124 
    125 	/* allocate columns */
    126 	ncol = mr->un_totalcolumncnt;
    127 	assert(ncol >= MD_RAID_MIN);
    128 	raidp->cols.cols_len = ncol;
    129 	raidp->cols.cols_val = Zalloc(raidp->cols.cols_len *
    130 	    sizeof (*raidp->cols.cols_val));
    131 
    132 	/* get common info */
    133 	raidp->common.namep = raidnp;
    134 	raidp->common.type = mr->c.un_type;
    135 	raidp->common.state = mr->c.un_status;
    136 	raidp->common.capabilities = mr->c.un_capabilities;
    137 	raidp->common.parent = mr->c.un_parent;
    138 	raidp->common.size = mr->c.un_total_blocks;
    139 	raidp->common.user_flags = mr->c.un_user_flags;
    140 	raidp->common.revision = mr->c.un_revision;
    141 
    142 	/* get options */
    143 	raidp->state = mr->un_state;
    144 	raidp->timestamp = mr->un_timestamp;
    145 	raidp->interlace = mr->un_segsize;
    146 	raidp->orig_ncol = mr->un_origcolumncnt;
    147 	raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
    148 	raidp->pw_count = mr->un_pwcnt;
    149 	assert(raidp->orig_ncol <= ncol);
    150 	if ((mr->un_hsp_id != MD_HSP_NONE) &&
    151 	    ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id,
    152 	    ep)) == NULL)) {
    153 		goto out;
    154 	}
    155 
    156 	/* get columns, update unit state */
    157 	for (col = 0; (col < ncol); ++col) {
    158 		mr_column_t	*rcp = &mr->un_column[col];
    159 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
    160 
    161 		/* get column name */
    162 		mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep);
    163 		if (mdrcp->colnamep == NULL)
    164 			goto out;
    165 
    166 		/* override any start_blk */
    167 #ifdef	DEBUG
    168 		if (metagetstart(sp, mdrcp->colnamep, ep) !=
    169 		    MD_DISKADDR_ERROR) {
    170 			assert(mdrcp->colnamep->start_blk <=
    171 			    rcp->un_orig_devstart);
    172 		} else {
    173 			mdclrerror(ep);
    174 		}
    175 #endif	/* DEBUG */
    176 		mdrcp->colnamep->start_blk = rcp->un_orig_devstart;
    177 
    178 		/* if hotspared */
    179 		if (HOTSPARED(mr, col)) {
    180 			/* get hotspare name */
    181 			mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key,
    182 			    fast, ep);
    183 			if (mdrcp->hsnamep == NULL)
    184 				goto out;
    185 
    186 			if (getenv("META_DEBUG_START_BLK") != NULL) {
    187 				if (metagetstart(sp, mdrcp->hsnamep, ep) ==
    188 				    MD_DISKADDR_ERROR)
    189 					mdclrerror(ep);
    190 
    191 				if ((mdrcp->hsnamep->start_blk == 0) &&
    192 				    (rcp->un_hs_pwstart != 0))
    193 					md_eprintf(dgettext(TEXT_DOMAIN,
    194 					    "%s: suspected bad start block,"
    195 					    " seems labelled [raid]\n"),
    196 					    mdrcp->hsnamep->cname);
    197 
    198 				if ((mdrcp->hsnamep->start_blk > 0) &&
    199 				    (rcp->un_hs_pwstart == 0))
    200 					md_eprintf(dgettext(TEXT_DOMAIN,
    201 					    "%s: suspected bad start block, "
    202 					    " seems unlabelled [raid]\n"),
    203 					    mdrcp->hsnamep->cname);
    204 			}
    205 
    206 			/* override any start_blk */
    207 			mdrcp->hsnamep->start_blk = rcp->un_hs_devstart;
    208 		}
    209 
    210 		/* get state, flags, and timestamp */
    211 		mdrcp->state = rcp->un_devstate;
    212 		mdrcp->flags = rcp->un_devflags;
    213 		mdrcp->timestamp = rcp->un_devtimestamp;
    214 	}
    215 
    216 	/* get resync info */
    217 	(void) memset(&ri, 0, sizeof (ri));
    218 	ri.ri_mnum = meta_getminor(raidnp->dev);
    219 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
    220 	if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) {
    221 		(void) mdstealerror(ep, &ri.mde);
    222 		goto out;
    223 	}
    224 	raidp->resync_flags = ri.ri_flags;
    225 	raidp->percent_dirty = ri.ri_percent_dirty;
    226 	raidp->percent_done = ri.ri_percent_done;
    227 
    228 	/* cleanup, return success */
    229 	Free(mr);
    230 	dnp->unitp = (md_common_t *)raidp;
    231 	return (raidp);
    232 
    233 	/* cleanup, return error */
    234 out:
    235 	Free(mr);
    236 	meta_free_raid(raidp);
    237 	return (NULL);
    238 }
    239 
    240 /*
    241  * get raid
    242  */
    243 md_raid_t *
    244 meta_get_raid(
    245 	mdsetname_t		*sp,
    246 	mdname_t		*raidnp,
    247 	md_error_t		*ep
    248 )
    249 {
    250 	return (meta_get_raid_common(sp, raidnp, 0, ep));
    251 }
    252 
    253 /*
    254  * check raid for dev
    255  */
    256 static int
    257 in_raid(
    258 	mdsetname_t	*sp,
    259 	mdname_t	*raidnp,
    260 	mdname_t	*np,
    261 	diskaddr_t	slblk,
    262 	diskaddr_t	nblks,
    263 	md_error_t	*ep
    264 )
    265 {
    266 	md_raid_t	*raidp;
    267 	uint_t		col;
    268 
    269 	/* should be in the same set */
    270 	assert(sp != NULL);
    271 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
    272 
    273 	/* get unit */
    274 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
    275 		return (-1);
    276 
    277 	/* look in columns */
    278 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
    279 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
    280 		mdname_t	*colnp = cp->colnamep;
    281 		diskaddr_t	col_sblk;
    282 		int		err;
    283 
    284 		/* check same drive since metagetstart() can fail */
    285 		if ((err = meta_check_samedrive(np, colnp, ep)) < 0)
    286 			return (-1);
    287 		else if (err == 0)
    288 			continue;
    289 
    290 		/* check overlap */
    291 		if ((col_sblk = metagetstart(sp, colnp, ep)) ==
    292 		    MD_DISKADDR_ERROR)
    293 			return (-1);
    294 		if (meta_check_overlap(raidnp->cname, np, slblk, nblks,
    295 		    colnp, col_sblk, -1, ep) != 0) {
    296 			return (-1);
    297 		}
    298 	}
    299 
    300 	/* return success */
    301 	return (0);
    302 }
    303 
    304 /*
    305  * check to see if we're in a raid
    306  */
    307 int
    308 meta_check_inraid(
    309 	mdsetname_t	*sp,
    310 	mdname_t	*np,
    311 	diskaddr_t	slblk,
    312 	diskaddr_t	nblks,
    313 	md_error_t	*ep
    314 )
    315 {
    316 	mdnamelist_t	*raidnlp = NULL;
    317 	mdnamelist_t	*p;
    318 	int		rval = 0;
    319 
    320 	/* should have a set */
    321 	assert(sp != NULL);
    322 
    323 	/* for each raid */
    324 	if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
    325 		return (-1);
    326 	for (p = raidnlp; (p != NULL); p = p->next) {
    327 		mdname_t	*raidnp = p->namep;
    328 
    329 		/* check raid */
    330 		if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) {
    331 			rval = -1;
    332 			break;
    333 		}
    334 	}
    335 
    336 	/* cleanup, return success */
    337 	metafreenamelist(raidnlp);
    338 	return (rval);
    339 }
    340 
    341 /*
    342  * check column
    343  */
    344 int
    345 meta_check_column(
    346 	mdsetname_t	*sp,
    347 	mdname_t	*np,
    348 	md_error_t	*ep
    349 )
    350 {
    351 	mdchkopts_t	options = (MDCHK_ALLOW_MDDB);
    352 
    353 	/* check for soft partitions */
    354 	if (meta_sp_issp(sp, np, ep) != 0) {
    355 		/* make sure we have a disk */
    356 		if (metachkcomp(np, ep) != 0)
    357 			return (-1);
    358 	}
    359 
    360 	/* check to ensure that it is not already in use */
    361 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
    362 		return (-1);
    363 	}
    364 
    365 	/* make sure it is in the set */
    366 	if (meta_check_inset(sp, np, ep) != 0)
    367 		return (-1);
    368 
    369 	/* make sure its not in a metadevice */
    370 	if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0)
    371 		return (-1);
    372 
    373 	/* return success */
    374 	return (0);
    375 }
    376 
    377 /*
    378  * print raid
    379  */
    380 static int
    381 raid_print(
    382 	md_raid_t	*raidp,
    383 	char		*fname,
    384 	FILE		*fp,
    385 	mdprtopts_t	options,
    386 	md_error_t	*ep
    387 )
    388 {
    389 	uint_t		col;
    390 	int		rval = -1;
    391 
    392 
    393 	if (options & PRINT_LARGEDEVICES) {
    394 		if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) {
    395 			rval = 0;
    396 			goto out;
    397 		}
    398 	}
    399 
    400 	if (options & PRINT_FN) {
    401 		if ((raidp->common.revision & MD_FN_META_DEV) == 0) {
    402 			rval = 0;
    403 			goto out;
    404 		}
    405 	}
    406 
    407 	/* print name and -r */
    408 	if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF)
    409 		goto out;
    410 
    411 	/*
    412 	 * Print columns. Always print the full path.
    413 	 */
    414 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
    415 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
    416 
    417 		if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF)
    418 			goto out;
    419 	}
    420 
    421 	if (fprintf(fp, " -k") == EOF)
    422 		goto out;
    423 
    424 	/* print options */
    425 	if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF)
    426 		goto out;
    427 
    428 	if (raidp->pw_count != PWCNT_MIN)
    429 		if (fprintf(fp, " -w %d", raidp->pw_count) == EOF)
    430 			goto out;
    431 
    432 	if (raidp->hspnamep != NULL) {
    433 		if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF)
    434 			goto out;
    435 	}
    436 	if (raidp->orig_ncol != raidp->cols.cols_len) {
    437 		assert(raidp->orig_ncol < raidp->cols.cols_len);
    438 		if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF)
    439 			goto out;
    440 	}
    441 
    442 	/* terminate last line */
    443 	if (fprintf(fp, "\n") == EOF)
    444 		goto out;
    445 
    446 	/* success */
    447 	rval = 0;
    448 
    449 	/* cleanup, return error */
    450 out:
    451 	if (rval != 0)
    452 		(void) mdsyserror(ep, errno, fname);
    453 	return (rval);
    454 }
    455 
    456 static int
    457 find_resyncing_column(
    458 	md_raid_t *raidp
    459 )
    460 {
    461 	int		col;
    462 
    463 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
    464 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
    465 		if (cp->state & RCS_RESYNC)
    466 			return (col);
    467 	}
    468 
    469 	/* No resyncing columns */
    470 	return (-1);
    471 }
    472 
    473 /*
    474  * convert raid state to name
    475  */
    476 char *
    477 raid_state_to_name(
    478 	md_raid_t	*raidp,
    479 	md_timeval32_t	*tvp,
    480 	uint_t		tstate /* Errored tstate flags */
    481 )
    482 {
    483 
    484 	/* grab time */
    485 	if (tvp != NULL)
    486 		*tvp = raidp->timestamp;
    487 
    488 	/*
    489 	 * If the device has a transient error state (due to it being DR'ed or
    490 	 * failed) and there has been no I/O to it (the actual device is still
    491 	 * marked as 'Okay') then we cannot know what the state is or what
    492 	 * action to take on it. Therefore report the device as 'Unavailable'.
    493 	 * A subsequent I/O to the device will cause the 'Okay' status to
    494 	 * disappear if the device is actually gone and then we will print out
    495 	 * the appropriate status.  The MD_INACCESSIBLE state is only set
    496 	 * on the raid when we open it or probe it.  One the raid is open
    497 	 * then we will just have regular error status on the device.
    498 	 */
    499 	if (tstate & MD_INACCESSIBLE) {
    500 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
    501 	}
    502 
    503 	/* resyncing */
    504 	if (find_resyncing_column(raidp) >= 0)
    505 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
    506 
    507 	/* everything else */
    508 	switch (raidp->state) {
    509 		case RUS_INIT :
    510 			return (dgettext(TEXT_DOMAIN, "Initializing"));
    511 		case RUS_OKAY :
    512 			return (dgettext(TEXT_DOMAIN, "Okay"));
    513 		case RUS_ERRED :
    514 		/*FALLTHROUGH*/
    515 		case RUS_LAST_ERRED :
    516 			return (dgettext(TEXT_DOMAIN, "Needs Maintenance"));
    517 		case RUS_DOI :
    518 			return (dgettext(TEXT_DOMAIN, "Initialization Failed"));
    519 		case RUS_REGEN :
    520 			return (dgettext(TEXT_DOMAIN, "Regen"));
    521 		default :
    522 			return (dgettext(TEXT_DOMAIN, "invalid"));
    523 	} /* switch */
    524 }
    525 
    526 static int
    527 find_erred_column(md_raid_t *raidp, rcs_state_t state)
    528 {
    529 	int		col;
    530 
    531 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
    532 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
    533 		if (cp->state & state)
    534 			return (col);
    535 	}
    536 
    537 	/* No erred columns */
    538 	return (-1);
    539 }
    540 
    541 /*
    542  * convert raid state to repair action
    543  */
    544 char *
    545 raid_state_to_action(md_raid_t *raidp)
    546 {
    547 	static char	emsg[1024];
    548 	mdname_t	*raidnp = raidp->common.namep;
    549 	int		err_col;
    550 
    551 	/* first check for full init failure */
    552 	if (raidp->state & RUS_DOI) {
    553 		(void) snprintf(emsg, sizeof (emsg),
    554 		    "metaclear -f %s", raidnp->cname);
    555 		return (emsg);
    556 	}
    557 
    558 	/* replace errored or init errored raid column */
    559 	if ((err_col = find_erred_column(raidp,
    560 	    (RCS_ERRED | RCS_INIT_ERRED))) >= 0) {
    561 		mdname_t	*colnp;
    562 
    563 		/* get column with error */
    564 		assert(err_col < raidp->cols.cols_len);
    565 		colnp = raidp->cols.cols_val[err_col].colnamep;
    566 		(void) snprintf(emsg, sizeof (emsg),
    567 		    "metareplace %s%s %s <%s>",
    568 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
    569 		    raidnp->cname, colnp->cname,
    570 		    dgettext(TEXT_DOMAIN, "new device"));
    571 		return (emsg);
    572 	}
    573 
    574 
    575 	/* replace last errored raid column */
    576 	if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) {
    577 		mdname_t	*colnp;
    578 
    579 		assert(err_col < raidp->cols.cols_len);
    580 		colnp = raidp->cols.cols_val[err_col].colnamep;
    581 		(void) snprintf(emsg, sizeof (emsg),
    582 		    "metareplace %s %s %s <%s>",
    583 		    ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""),
    584 		    raidnp->cname, colnp->cname,
    585 		    dgettext(TEXT_DOMAIN, "new device"));
    586 		return (emsg);
    587 	}
    588 
    589 	/* OK */
    590 	return (NULL);
    591 }
    592 
    593 /*
    594  * get printable raid column state
    595  */
    596 char *
    597 raid_col_state_to_name(
    598 	md_raidcol_t	*colp,
    599 	md_timeval32_t	*tvp,
    600 	uint_t		tstate
    601 )
    602 {
    603 	/* grab time */
    604 	if (tvp != NULL)
    605 		*tvp = colp->timestamp;
    606 
    607 	if (tstate != 0) {
    608 		return (dgettext(TEXT_DOMAIN, "Unavailable"));
    609 	}
    610 
    611 	/* everything else */
    612 	switch (colp->state) {
    613 	case RCS_INIT:
    614 		return (dgettext(TEXT_DOMAIN, "Initializing"));
    615 
    616 	case RCS_OKAY:
    617 		return (dgettext(TEXT_DOMAIN, "Okay"));
    618 
    619 	case RCS_INIT_ERRED:
    620 	/*FALLTHROUGH*/
    621 	case RCS_ERRED:
    622 		return (dgettext(TEXT_DOMAIN, "Maintenance"));
    623 
    624 	case RCS_LAST_ERRED:
    625 		return (dgettext(TEXT_DOMAIN, "Last Erred"));
    626 
    627 	case RCS_RESYNC:
    628 		return (dgettext(TEXT_DOMAIN, "Resyncing"));
    629 
    630 	default:
    631 		return (dgettext(TEXT_DOMAIN, "Unknown"));
    632 	}
    633 }
    634 
    635 /*
    636  * print raid column
    637  */
    638 static int
    639 display_raid_device_info(
    640 	mdsetname_t	*sp,
    641 	md_raidcol_t	*colp,
    642 	char		*fname,
    643 	FILE		*fp,
    644 	mdprtopts_t	options,
    645 	int		print_len,
    646 	uint_t		top_tstate, /* Errored tstate flags */
    647 	md_error_t	*ep
    648 )
    649 {
    650 	mdname_t	*namep = ((colp->hsnamep != NULL) ?
    651 	    colp->hsnamep : colp->colnamep);
    652 	char 		*devid = "";
    653 	char		*cname = colp->colnamep->cname;
    654 	diskaddr_t	start_blk;
    655 	int		has_mddb;
    656 	char		*has_mddb_str;
    657 	char		*col_state;
    658 	md_timeval32_t	tv;
    659 	char		*hsname = ((colp->hsnamep != NULL) ?
    660 	    colp->hsnamep->cname : "");
    661 	int		rval = -1;
    662 	mdname_t	*didnp = NULL;
    663 	ddi_devid_t	dtp;
    664 	uint_t		tstate = 0;
    665 
    666 	/* get info */
    667 	if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR)
    668 		return (-1);
    669 	if ((has_mddb = metahasmddb(sp, namep, ep)) < 0)
    670 		return (-1);
    671 	if (has_mddb)
    672 		has_mddb_str = dgettext(TEXT_DOMAIN, "Yes");
    673 	else
    674 		has_mddb_str = dgettext(TEXT_DOMAIN, "No");
    675 
    676 	if (metaismeta(namep)) {
    677 		if (meta_get_tstate(namep->dev, &tstate, ep) != 0)
    678 			return (-1);
    679 		col_state = raid_col_state_to_name(colp, &tv,
    680 		    tstate & MD_DEV_ERRORED);
    681 	} else {
    682 		/*
    683 		 * if top_tstate is set, that implies that you have
    684 		 * a ctd type device with an unavailable metadevice
    685 		 * on top of it. If so, print a - for it's state
    686 		 */
    687 		if (top_tstate != 0)
    688 			col_state = "-";
    689 		else
    690 			col_state = raid_col_state_to_name(colp, &tv, tstate);
    691 	}
    692 
    693 	/* populate the key in the name_p structure */
    694 	if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL)
    695 		return (-1);
    696 
    697 	/* determine if devid does NOT exist */
    698 	if (options & PRINT_DEVID) {
    699 		if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep),
    700 		    didnp->key, ep)) == NULL)
    701 			devid = dgettext(TEXT_DOMAIN, "No ");
    702 		else {
    703 			devid = dgettext(TEXT_DOMAIN, "Yes");
    704 			free(dtp);
    705 		}
    706 	}
    707 	/* print column */
    708 	/*
    709 	 * Building a format string on the fly that will
    710 	 * be used in (f)printf. This allows the length
    711 	 * of the ctd to vary from small to large without
    712 	 * looking horrible.
    713 	 */
    714 	if (! (options & PRINT_TIMES)) {
    715 		if (fprintf(fp,
    716 		    "\t%-*.*s %8lld     %5.5s %12.12s %5.5s %s\n",
    717 		    print_len, print_len, cname, start_blk, has_mddb_str,
    718 		    col_state, devid, hsname) == EOF) {
    719 			goto out;
    720 		}
    721 	} else {
    722 		char	*timep = meta_print_time(&tv);
    723 
    724 		if (fprintf(fp,
    725 		    "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n",
    726 		    print_len, cname, start_blk, has_mddb_str,
    727 		    col_state, devid, hsname, timep) == EOF) {
    728 			goto out;
    729 		}
    730 	}
    731 
    732 	/* success */
    733 	rval = 0;
    734 
    735 	/* cleanup, return error */
    736 out:
    737 	if (rval != 0)
    738 		(void) mdsyserror(ep, errno, fname);
    739 
    740 	return (rval);
    741 }
    742 
    743 /*
    744  * print raid options
    745  */
    746 int
    747 meta_print_raid_options(
    748 	mdhspname_t	*hspnamep,
    749 	char		*fname,
    750 	FILE		*fp,
    751 	md_error_t	*ep
    752 )
    753 {
    754 	char		*hspname = ((hspnamep != NULL) ? hspnamep->hspname :
    755 	    dgettext(TEXT_DOMAIN, "none"));
    756 	int		rval = -1;
    757 
    758 	/* print options */
    759 	if (fprintf(fp, dgettext(TEXT_DOMAIN,
    760 	    "    Hot spare pool: %s\n"), hspname) == EOF) {
    761 		goto out;
    762 	}
    763 
    764 	/* success */
    765 	rval = 0;
    766 
    767 	/* cleanup, return error */
    768 out:
    769 	if (rval != 0)
    770 		(void) mdsyserror(ep, errno, fname);
    771 	return (rval);
    772 }
    773 
    774 /*
    775  * report raid
    776  */
    777 static int
    778 raid_report(
    779 	mdsetname_t	*sp,
    780 	md_raid_t	*raidp,
    781 	char		*fname,
    782 	FILE		*fp,
    783 	mdprtopts_t	options,
    784 	md_error_t	*ep
    785 )
    786 {
    787 	char		*p;
    788 	uint_t		ncol = raidp->cols.cols_len;
    789 	uint_t		orig_ncol = raidp->orig_ncol;
    790 	diskaddr_t	column_size = raidp->column_size;
    791 	char		*raid_state;
    792 	md_timeval32_t	tv;
    793 	char		*timep;
    794 	uint_t		col;
    795 	int		rval = -1;
    796 	int		len = 0;
    797 	uint_t		tstate = 0;
    798 
    799 	if (options & PRINT_LARGEDEVICES) {
    800 		if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) {
    801 			rval = 0;
    802 			goto out;
    803 		}
    804 	}
    805 
    806 	if (options & PRINT_FN) {
    807 		if ((raidp->common.revision & MD_FN_META_DEV) == 0) {
    808 			rval = 0;
    809 			goto out;
    810 		}
    811 	}
    812 
    813 	/* print header */
    814 	if (options & PRINT_HEADER) {
    815 		if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"),
    816 		    raidp->common.namep->cname) == EOF) {
    817 			goto out;
    818 		}
    819 
    820 	}
    821 
    822 	/* print state */
    823 	if (metaismeta(raidp->common.namep)) {
    824 		if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0)
    825 			return (-1);
    826 	}
    827 	tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */
    828 	raid_state = raid_state_to_name(raidp, &tv, tstate);
    829 	if (options & PRINT_TIMES) {
    830 		timep = meta_print_time(&tv);
    831 	} else {
    832 		timep = "";
    833 	}
    834 
    835 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    State: %-12s %s\n"),
    836 	    raid_state, timep) == EOF) {
    837 		goto out;
    838 	}
    839 
    840 	/*
    841 	 * Display recovery action if we're marked in the Unavailable state.
    842 	 */
    843 	if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) {
    844 		/* print what to do */
    845 		if (tstate & MD_INACCESSIBLE) {
    846 			char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */
    847 
    848 			if (metaislocalset(sp)) {
    849 				sname[0] = '\0';
    850 			} else {
    851 				(void) snprintf(sname, MD_MAX_SETNAME + 3,
    852 				    "-s %s", sp->setname);
    853 			}
    854 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
    855 			    "    Invoke: metastat -i %s\n"), sname) == EOF) {
    856 				goto out;
    857 			}
    858 		} else if ((p = raid_state_to_action(raidp)) != NULL) {
    859 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
    860 			    "    Invoke: %s\n"), p) == EOF) {
    861 				goto out;
    862 			}
    863 		}
    864 
    865 		/* resync status */
    866 		if (raidp->resync_flags & MD_RI_INPROGRESS) {
    867 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
    868 			    "    Resync in progress: %2d.%1d%% done\n"),
    869 			    raidp->percent_done/10,
    870 			    raidp->percent_done % 10) == EOF) {
    871 				goto out;
    872 			}
    873 		} else if (raidp->resync_flags & MD_GROW_INPROGRESS) {
    874 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
    875 			    "    Initialization in progress: %2d.%1d%% "
    876 			    "done\n"),
    877 			    raidp->percent_done/10,
    878 			    raidp->percent_done % 10) == EOF) {
    879 				goto out;
    880 			}
    881 		} else if (raidp->state & RUS_REGEN) {
    882 			if (fprintf(fp, dgettext(TEXT_DOMAIN,
    883 			    "    Parity regeneration in progress: %2d.%1d%% "
    884 			    "done\n"),
    885 			    raidp->percent_done/10,
    886 			    raidp->percent_done % 10) == EOF) {
    887 				goto out;
    888 			}
    889 		}
    890 	}
    891 
    892 	/* print hotspare pool */
    893 	if (raidp->hspnamep != NULL) {
    894 		if (meta_print_raid_options(raidp->hspnamep,
    895 		    fname, fp, ep) != 0) {
    896 			return (-1);
    897 		}
    898 	}
    899 
    900 	/* print interlace */
    901 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Interlace: %lld blocks\n"),
    902 	    raidp->interlace) == EOF) {
    903 		goto out;
    904 	}
    905 
    906 	/* print size */
    907 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
    908 	    raidp->common.size,
    909 	    meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) {
    910 		goto out;
    911 	}
    912 
    913 	/* MD_DEBUG stuff */
    914 	if (options & PRINT_DEBUG) {
    915 		mdname_t	*raidnp = raidp->common.namep;
    916 		mr_unit_t	*mr;
    917 
    918 		/* get additional info */
    919 		if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
    920 			return (-1);
    921 		assert(mr->c.un_type == MD_METARAID);
    922 
    923 		/* print prewrite count and size */
    924 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
    925 		    "    Prewrite Count: %u slots\n"),
    926 		    mr->un_pwcnt) == EOF) {
    927 			Free(mr);
    928 			goto out;
    929 		}
    930 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
    931 		    "    Prewrite Slot Size: %u blocks\n"),
    932 		    (mr->un_pwsize / mr->un_pwcnt)) == EOF) {
    933 			Free(mr);
    934 			goto out;
    935 		}
    936 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
    937 		    "    Prewrite Total Size: %u blocks\n"),
    938 		    mr->un_pwsize) == EOF) {
    939 			Free(mr);
    940 			goto out;
    941 		}
    942 		Free(mr);
    943 	}
    944 
    945 	/* print original devices */
    946 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF)
    947 		goto out;
    948 	if (fprintf(fp, dgettext(TEXT_DOMAIN, "    Size: %lld blocks (%s)\n"),
    949 	    column_size * (orig_ncol - 1),
    950 	    meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE))
    951 	    == EOF) {
    952 		goto out;
    953 	}
    954 	/*
    955 	 * Building a format string on the fly that will
    956 	 * be used in (f)printf. This allows the length
    957 	 * of the ctd to vary from small to large without
    958 	 * looking horrible.
    959 	 */
    960 	for (col = 0; (col < orig_ncol); ++col) {
    961 		len = max(len,
    962 		    strlen(raidp->cols.cols_val[col].colnamep->cname));
    963 	}
    964 
    965 	len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device")));
    966 	len += 2;
    967 
    968 	if (! (options & PRINT_TIMES)) {
    969 		if (fprintf(fp,
    970 		    "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s  %s\n",
    971 		    len, len,
    972 		    dgettext(TEXT_DOMAIN, "Device"),
    973 		    dgettext(TEXT_DOMAIN, "Start Block"),
    974 		    dgettext(TEXT_DOMAIN, "Dbase"),
    975 		    dgettext(TEXT_DOMAIN, "State"),
    976 		    dgettext(TEXT_DOMAIN, "Reloc"),
    977 		    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
    978 			goto out;
    979 		}
    980 	} else {
    981 		if (fprintf(fp,
    982 		    "\t%-*s  %5s  %-5s  %-11s  %-5s   %-9s  %s\n",
    983 		    len,
    984 		    dgettext(TEXT_DOMAIN, "Device"),
    985 		    dgettext(TEXT_DOMAIN, "Start"),
    986 		    dgettext(TEXT_DOMAIN, "Dbase"),
    987 		    dgettext(TEXT_DOMAIN, "State"),
    988 		    dgettext(TEXT_DOMAIN, "Reloc"),
    989 		    dgettext(TEXT_DOMAIN, "Hot Spare"),
    990 		    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
    991 			goto out;
    992 		}
    993 	}
    994 	for (col = 0; (col < orig_ncol); ++col) {
    995 		md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
    996 
    997 		if (display_raid_device_info(sp, mdrcp, fname, fp, options,
    998 		    len, tstate, ep) != 0) {
    999 			return (-1);
   1000 		}
   1001 	}
   1002 
   1003 	/* print concatenated devices */
   1004 	if (col < ncol) {
   1005 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
   1006 		    "Concatenated Devices:\n")) == EOF) {
   1007 			goto out;
   1008 		}
   1009 		if (fprintf(fp, dgettext(TEXT_DOMAIN,
   1010 		    "    Size: %lld blocks (%s)\n"),
   1011 		    column_size * (ncol - orig_ncol),
   1012 		    meta_number_to_string(column_size * (ncol - orig_ncol),
   1013 		    DEV_BSIZE))
   1014 		    == EOF) {
   1015 			goto out;
   1016 		}
   1017 		/*
   1018 		 * This allows the length
   1019 		 * of the ctd to vary from small to large without
   1020 		 * looking horrible.
   1021 		 */
   1022 		if (! (options & PRINT_TIMES)) {
   1023 			if (fprintf(fp,
   1024 			    "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n",
   1025 			    len, len,
   1026 			    dgettext(TEXT_DOMAIN, "Device"),
   1027 			    dgettext(TEXT_DOMAIN, "Start Block"),
   1028 			    dgettext(TEXT_DOMAIN, "Dbase"),
   1029 			    dgettext(TEXT_DOMAIN, "State"),
   1030 			    dgettext(TEXT_DOMAIN, "Reloc"),
   1031 			    dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) {
   1032 				goto out;
   1033 			}
   1034 		} else {
   1035 			if (fprintf(fp,
   1036 			    "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n",
   1037 			    len,
   1038 			    dgettext(TEXT_DOMAIN, "Device"),
   1039 			    dgettext(TEXT_DOMAIN, "Start"),
   1040 			    dgettext(TEXT_DOMAIN, "Dbase"),
   1041 			    dgettext(TEXT_DOMAIN, "State"),
   1042 			    dgettext(TEXT_DOMAIN, "Reloc"),
   1043 			    dgettext(TEXT_DOMAIN, "Hot Spare"),
   1044 			    dgettext(TEXT_DOMAIN, "Time")) == EOF) {
   1045 				goto out;
   1046 			}
   1047 		}
   1048 		assert(col == orig_ncol);
   1049 		for (/* void */; (col < ncol); col++) {
   1050 			md_raidcol_t	*mdrcp = &raidp->cols.cols_val[col];
   1051 
   1052 			if (display_raid_device_info(sp, mdrcp, fname, fp,
   1053 			    options, len, tstate, ep) != 0) {
   1054 				return (-1);
   1055 			}
   1056 		}
   1057 	}
   1058 
   1059 	/* add extra line */
   1060 	if (fprintf(fp, "\n") == EOF)
   1061 		goto out;
   1062 
   1063 	/* success */
   1064 	rval = 0;
   1065 
   1066 	/* cleanup, return error */
   1067 out:
   1068 	if (rval != 0)
   1069 		(void) mdsyserror(ep, errno, fname);
   1070 	return (rval);
   1071 }
   1072 
   1073 /*
   1074  * print/report raid
   1075  */
   1076 int
   1077 meta_raid_print(
   1078 	mdsetname_t	*sp,
   1079 	mdname_t	*raidnp,
   1080 	mdnamelist_t	**nlpp,
   1081 	char		*fname,
   1082 	FILE		*fp,
   1083 	mdprtopts_t	options,
   1084 	md_error_t	*ep
   1085 )
   1086 {
   1087 	md_raid_t	*raidp;
   1088 	int		col;
   1089 
   1090 	/* should have same set */
   1091 	assert(sp != NULL);
   1092 	assert((raidnp == NULL) ||
   1093 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
   1094 
   1095 	/* print all raids */
   1096 	if (raidnp == NULL) {
   1097 		mdnamelist_t	*nlp = NULL;
   1098 		mdnamelist_t	*p;
   1099 		int		cnt;
   1100 		int		rval = 0;
   1101 
   1102 		/* get list */
   1103 		if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0)
   1104 			return (-1);
   1105 		else if (cnt == 0)
   1106 			return (0);
   1107 
   1108 		/* recurse */
   1109 		for (p = nlp; (p != NULL); p = p->next) {
   1110 			mdname_t	*np = p->namep;
   1111 
   1112 			if (meta_raid_print(sp, np, nlpp, fname, fp,
   1113 			    options, ep) != 0)
   1114 				rval = -1;
   1115 		}
   1116 
   1117 		/* cleanup, return success */
   1118 		metafreenamelist(nlp);
   1119 		return (rval);
   1120 	}
   1121 
   1122 	/* get unit structure */
   1123 	if ((raidp = meta_get_raid_common(sp, raidnp,
   1124 	    ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL)
   1125 		return (-1);
   1126 
   1127 	/* check for parented */
   1128 	if ((! (options & PRINT_SUBDEVS)) &&
   1129 	    (MD_HAS_PARENT(raidp->common.parent))) {
   1130 		return (0);
   1131 	}
   1132 
   1133 	/* print appropriate detail */
   1134 	if (options & PRINT_SHORT) {
   1135 		if (raid_print(raidp, fname, fp, options, ep) != 0)
   1136 			return (-1);
   1137 	} else {
   1138 		if (raid_report(sp, raidp, fname, fp, options, ep) != 0)
   1139 			return (-1);
   1140 	}
   1141 
   1142 	/* Recurse on components that are metadevices */
   1143 	for (col = 0; col < raidp->cols.cols_len; ++col) {
   1144 		md_raidcol_t	*colp = &raidp->cols.cols_val[col];
   1145 		mdname_t	*namep = colp->colnamep;
   1146 
   1147 		if ((metaismeta(namep)) &&
   1148 		    (meta_print_name(sp, namep, nlpp, fname, fp,
   1149 		    (options | PRINT_HEADER | PRINT_SUBDEVS),
   1150 		    NULL, ep) != 0)) {
   1151 			return (-1);
   1152 		}
   1153 	}
   1154 
   1155 	return (0);
   1156 }
   1157 
   1158 /*
   1159  * adjust raid geometry
   1160  */
   1161 static int
   1162 adjust_geom(
   1163 	mdname_t	*raidnp,
   1164 	mdname_t	*colnp,
   1165 	mr_unit_t	*mr,
   1166 	md_error_t	*ep
   1167 )
   1168 {
   1169 	uint_t		round_cyl = 1;
   1170 	mdgeom_t	*geomp;
   1171 
   1172 	/* get reinstructs */
   1173 	if ((geomp = metagetgeom(colnp, ep)) == NULL)
   1174 		return (-1);
   1175 
   1176 	/* adjust geometry */
   1177 	if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct,
   1178 	    geomp->read_reinstruct, round_cyl, ep) != 0)
   1179 		return (-1);
   1180 
   1181 	/* return success */
   1182 	return (0);
   1183 }
   1184 
   1185 /*
   1186  * add another column to the raid unit structure
   1187  */
   1188 static int
   1189 attach_raid_col(
   1190 	mdsetname_t	*sp,
   1191 	mdname_t	*raidnp,
   1192 	mr_unit_t	*mr,
   1193 	mr_column_t	*mdc,
   1194 	mdname_t	*colnp,
   1195 	rcs_state_t	state,
   1196 	mdnamelist_t	**keynlpp,
   1197 	mdcmdopts_t	options,
   1198 	md_error_t	*ep
   1199 )
   1200 {
   1201 	diskaddr_t	column_size = mr->un_segsize * mr->un_segsincolumn;
   1202 	diskaddr_t	size;
   1203 	uint_t		 maxio;
   1204 	mdcinfo_t	*cinfop;
   1205 	md_timeval32_t	tmp_time;
   1206 
   1207 	/* setup state and timestamp */
   1208 	mdc->un_devstate = state;
   1209 	if (meta_gettimeofday(&tmp_time) == -1)
   1210 		return (mdsyserror(ep, errno, NULL));
   1211 
   1212 	mdc->un_devtimestamp = tmp_time;
   1213 	/* get start, size, and maxio */
   1214 	if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
   1215 	    MD_DISKADDR_ERROR)
   1216 		return (-1);
   1217 	if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
   1218 		return (-1);
   1219 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
   1220 		return (-1);
   1221 	maxio = cinfop->maxtransfer;
   1222 
   1223 	/* adjust start and size by prewrite */
   1224 	mdc->un_orig_pwstart = mdc->un_orig_devstart;
   1225 	mdc->un_orig_devstart += mr->un_pwsize;
   1226 
   1227 	/* make sure we still have something left */
   1228 	if ((mdc->un_orig_devstart >= size) ||
   1229 	    ((size - mdc->un_orig_devstart) < column_size)) {
   1230 		return (mdsyserror(ep, ENOSPC, colnp->cname));
   1231 	}
   1232 	size -= mdc->un_orig_devstart;
   1233 	if (maxio < mr->un_maxio) {
   1234 		return (mdcomperror(ep, MDE_MAXIO,
   1235 		    meta_getminor(raidnp->dev), colnp->dev, colnp->cname));
   1236 	}
   1237 
   1238 	if (options & MDCMD_DOIT) {
   1239 		/* store name in namespace */
   1240 		if (add_key_name(sp, colnp, keynlpp, ep) != 0)
   1241 			return (-1);
   1242 	}
   1243 
   1244 	/* setup column */
   1245 	mdc->un_orig_dev = colnp->dev;
   1246 	mdc->un_orig_key = colnp->key;
   1247 	mdc->un_dev = colnp->dev;
   1248 	mdc->un_pwstart = mdc->un_orig_pwstart;
   1249 	mdc->un_devstart = mdc->un_orig_devstart;
   1250 	mdc->un_alt_dev = NODEV64;
   1251 	mdc->un_alt_pwstart = 0;
   1252 	mdc->un_alt_devstart = 0;
   1253 	mdc->un_hs_id = 0;
   1254 
   1255 	/* add the size (we use) of the device to the total */
   1256 	mr->c.un_actual_tb += column_size;
   1257 
   1258 	/* adjust geometry */
   1259 	if (adjust_geom(raidnp, colnp, mr, ep) != 0)
   1260 		return (-1);
   1261 
   1262 	/* count column */
   1263 	mr->un_totalcolumncnt++;
   1264 
   1265 	/* return success */
   1266 	return (0);
   1267 }
   1268 
   1269 /*
   1270  * invalidate column names
   1271  */
   1272 static int
   1273 invalidate_columns(
   1274 	mdsetname_t	*sp,
   1275 	mdname_t	*raidnp,
   1276 	md_error_t	*ep
   1277 )
   1278 {
   1279 	md_raid_t	*raidp;
   1280 	uint_t		col;
   1281 
   1282 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
   1283 		return (-1);
   1284 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
   1285 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
   1286 		mdname_t	*colnp = cp->colnamep;
   1287 
   1288 		meta_invalidate_name(colnp);
   1289 	}
   1290 	return (0);
   1291 }
   1292 
   1293 /*
   1294  * attach columns to raid
   1295  */
   1296 int
   1297 meta_raid_attach(
   1298 	mdsetname_t		*sp,
   1299 	mdname_t		*raidnp,
   1300 	mdnamelist_t		*colnlp,
   1301 	mdcmdopts_t		options,
   1302 	md_error_t		*ep
   1303 )
   1304 {
   1305 	uint_t			concat_cnt = 0;
   1306 	mdnamelist_t		*p;
   1307 	mr_unit_t		*old_mr;
   1308 	mr_unit_t		*new_mr;
   1309 	size_t			old_rusize;
   1310 	size_t			new_rusize;
   1311 	mdnamelist_t		*keynlp = NULL;
   1312 	md_grow_params_t	mgp;
   1313 	int			rval = -1;
   1314 	int			create_flag = MD_CRO_32BIT;
   1315 
   1316 	/* should have a set */
   1317 	assert(sp != NULL);
   1318 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
   1319 
   1320 	/* check type */
   1321 	if (metachkmeta(raidnp, ep) != 0)
   1322 		return (-1);
   1323 
   1324 	/* check and count new columns */
   1325 	for (p = colnlp; (p != NULL); p = p->next) {
   1326 		mdname_t	*np = p->namep;
   1327 		mdnamelist_t	*p2;
   1328 
   1329 		/* check against existing devices */
   1330 		if (meta_check_column(sp, np, ep) != 0)
   1331 			return (-1);
   1332 
   1333 		/* check against ourselves */
   1334 		for (p2 = p->next; (p2 != NULL); p2 = p2->next) {
   1335 			if (meta_check_overlap(np->cname, np, 0, -1,
   1336 			    p2->namep, 0, -1, ep) != 0) {
   1337 				return (-1);
   1338 			}
   1339 		}
   1340 
   1341 		/* count */
   1342 		++concat_cnt;
   1343 	}
   1344 
   1345 	/* get old unit */
   1346 	if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
   1347 		return (-1);
   1348 
   1349 	/*
   1350 	 * calculate the size needed for the new raid unit and allocate
   1351 	 * the appropriate structure. allocate new unit.
   1352 	 */
   1353 	old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]);
   1354 	old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]);
   1355 	new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]);
   1356 	new_rusize += (old_mr->un_totalcolumncnt + concat_cnt)
   1357 	    * sizeof (new_mr->un_column[0]);
   1358 	new_mr = Zalloc(new_rusize);
   1359 	(void) memcpy(new_mr, old_mr, old_rusize);
   1360 
   1361 	/* We always want a do-it, this is for attach_raid_col below */
   1362 	options |= MDCMD_DOIT;
   1363 
   1364 	/* build new unit structure */
   1365 	for (p = colnlp; (p != NULL); p = p->next) {
   1366 		mdname_t	*colnp = p->namep;
   1367 		mr_column_t	*mdc;
   1368 
   1369 		/* attach column */
   1370 		mdc = &new_mr->un_column[new_mr->un_totalcolumncnt];
   1371 		if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp,
   1372 		    RCS_INIT, &keynlp, options, ep) != 0) {
   1373 			goto out;
   1374 		}
   1375 	}
   1376 	assert(new_mr->un_totalcolumncnt
   1377 	    == (old_mr->un_totalcolumncnt + concat_cnt));
   1378 
   1379 
   1380 	create_flag = meta_check_devicesize(new_mr->c.un_total_blocks);
   1381 
   1382 	/* grow raid */
   1383 	(void) memset(&mgp, 0, sizeof (mgp));
   1384 	mgp.mnum = MD_SID(new_mr);
   1385 	MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno);
   1386 	mgp.size = new_rusize;
   1387 	mgp.mdp = (uintptr_t)new_mr;
   1388 
   1389 	if (create_flag == MD_CRO_32BIT) {
   1390 		mgp.options = MD_CRO_32BIT;
   1391 		new_mr->c.un_revision &= ~MD_64BIT_META_DEV;
   1392 	} else {
   1393 		mgp.options = MD_CRO_64BIT;
   1394 		new_mr->c.un_revision |= MD_64BIT_META_DEV;
   1395 	}
   1396 	if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) {
   1397 		(void) mdstealerror(ep, &mgp.mde);
   1398 		goto out;
   1399 	}
   1400 
   1401 	/* clear cache */
   1402 	if (invalidate_columns(sp, raidnp, ep) != 0)
   1403 		goto out;
   1404 	meta_invalidate_name(raidnp);
   1405 
   1406 	/* let em know */
   1407 	if (options & MDCMD_PRINT) {
   1408 		if (concat_cnt == 1) {
   1409 			(void) printf(dgettext(TEXT_DOMAIN,
   1410 			    "%s: component is attached\n"),
   1411 			    raidnp->cname);
   1412 		} else {
   1413 			(void) printf(dgettext(TEXT_DOMAIN,
   1414 			    "%s: components are attached\n"),
   1415 			    raidnp->cname);
   1416 		}
   1417 		(void) fflush(stdout);
   1418 	}
   1419 
   1420 
   1421 	/* grow any parents */
   1422 	if (meta_concat_parent(sp, raidnp, ep) != 0)
   1423 		goto out;
   1424 	rval = 0;	/* success */
   1425 
   1426 	/* cleanup, return error */
   1427 out:
   1428 	Free(old_mr);
   1429 	Free(new_mr);
   1430 	if (rval != 0)
   1431 		(void) del_key_names(sp, keynlp, NULL);
   1432 	metafreenamelist(keynlp);
   1433 	return (rval);
   1434 }
   1435 
   1436 /*
   1437  * get raid parameters
   1438  */
   1439 int
   1440 meta_raid_get_params(
   1441 	mdsetname_t	*sp,
   1442 	mdname_t	*raidnp,
   1443 	mr_params_t	*paramsp,
   1444 	md_error_t	*ep
   1445 )
   1446 {
   1447 	md_raid_t	*raidp;
   1448 
   1449 	/* should have a set */
   1450 	assert(sp != NULL);
   1451 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
   1452 
   1453 	/* check name */
   1454 	if (metachkmeta(raidnp, ep) != 0)
   1455 		return (-1);
   1456 
   1457 	/* get unit */
   1458 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
   1459 		return (-1);
   1460 
   1461 	/* return parameters */
   1462 	(void) memset(paramsp, 0, sizeof (*paramsp));
   1463 	if (raidp->hspnamep == NULL)
   1464 		paramsp->hsp_id = MD_HSP_NONE;
   1465 	else
   1466 		paramsp->hsp_id = raidp->hspnamep->hsp;
   1467 	return (0);
   1468 }
   1469 
   1470 /*
   1471  * set raid parameters
   1472  */
   1473 int
   1474 meta_raid_set_params(
   1475 	mdsetname_t		*sp,
   1476 	mdname_t		*raidnp,
   1477 	mr_params_t		*paramsp,
   1478 	md_error_t		*ep
   1479 )
   1480 {
   1481 	md_raid_params_t	msp;
   1482 
   1483 	/* should have a set */
   1484 	assert(sp != NULL);
   1485 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
   1486 
   1487 	/* check name */
   1488 	if (metachkmeta(raidnp, ep) != 0)
   1489 		return (-1);
   1490 
   1491 	/* set parameters */
   1492 	(void) memset(&msp, 0, sizeof (msp));
   1493 	MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno);
   1494 	msp.mnum = meta_getminor(raidnp->dev);
   1495 	msp.params = *paramsp;
   1496 	if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0)
   1497 		return (mdstealerror(ep, &msp.mde));
   1498 
   1499 	/* clear cache */
   1500 	meta_invalidate_name(raidnp);
   1501 
   1502 	/* return success */
   1503 	return (0);
   1504 }
   1505 
   1506 /*
   1507  * validate raid replace column
   1508  */
   1509 static int
   1510 validate_new_raid(
   1511 	mdsetname_t	*sp,
   1512 	mdname_t	*raidnp,
   1513 	mdname_t	*colnp,
   1514 	replace_params_t *paramsp,
   1515 	int		dup_ok,
   1516 	md_error_t	*ep
   1517 )
   1518 {
   1519 	mr_unit_t	*mr;
   1520 	diskaddr_t	column_size;
   1521 	diskaddr_t	label;
   1522 	mdcinfo_t	*cinfop;
   1523 	int		rval = -1;
   1524 
   1525 	/* get raid unit */
   1526 	if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL)
   1527 		return (-1);
   1528 	column_size = mr->un_segsize * mr->un_segsincolumn;
   1529 
   1530 	/* check it out */
   1531 	if (meta_check_column(sp, colnp, ep) != 0) {
   1532 		if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY)))
   1533 			goto out;
   1534 		mdclrerror(ep);
   1535 	}
   1536 	if ((paramsp->number_blks = metagetsize(colnp, ep)) ==
   1537 	    MD_DISKADDR_ERROR)
   1538 		goto out;
   1539 	if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR)
   1540 		goto out;
   1541 	paramsp->has_label = ((label > 0) ? 1 : 0);
   1542 	if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) ==
   1543 	    MD_DISKADDR_ERROR)
   1544 		goto out;
   1545 	if ((paramsp->number_blks - paramsp->start_blk) < column_size) {
   1546 		(void) mdsyserror(ep, ENOSPC, colnp->cname);
   1547 		goto out;
   1548 	}
   1549 	if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
   1550 		goto out;
   1551 	if (cinfop->maxtransfer < mr->un_maxio) {
   1552 		(void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev),
   1553 		    colnp->dev, colnp->cname);
   1554 		goto out;
   1555 	}
   1556 
   1557 	/* success */
   1558 	rval = 0;
   1559 
   1560 	/* cleanup, return error */
   1561 out:
   1562 	Free(mr);
   1563 	return (rval);
   1564 }
   1565 
   1566 /*
   1567  * replace raid column
   1568  */
   1569 int
   1570 meta_raid_replace(
   1571 	mdsetname_t		*sp,
   1572 	mdname_t		*raidnp,
   1573 	mdname_t		*oldnp,
   1574 	mdname_t		*newnp,
   1575 	mdcmdopts_t		options,
   1576 	md_error_t		*ep
   1577 )
   1578 {
   1579 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
   1580 	replace_params_t	params;
   1581 	md_dev64_t		old_dev, new_dev;
   1582 	diskaddr_t		new_start_blk, new_end_blk;
   1583 	int			rebind;
   1584 	char			*new_devidp = NULL;
   1585 	md_error_t		xep = mdnullerror;
   1586 	int			ret;
   1587 	md_set_desc		*sd;
   1588 	uint_t			tstate;
   1589 
   1590 	/* should have same set */
   1591 	assert(sp != NULL);
   1592 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
   1593 
   1594 	/* check name */
   1595 	if (metachkmeta(raidnp, ep) != 0)
   1596 		return (-1);
   1597 
   1598 	/* save new binding incase this is a rebind where oldnp==newnp */
   1599 	new_dev = newnp->dev;
   1600 	new_start_blk = newnp->start_blk;
   1601 	new_end_blk = newnp->end_blk;
   1602 
   1603 	/* invalidate, then get the raid (fill in oldnp from metadb) */
   1604 	meta_invalidate_name(raidnp);
   1605 	if (meta_get_raid(sp, raidnp, ep) == NULL)
   1606 		return (-1);
   1607 
   1608 	/* can't replace a component if the raid inaccessible */
   1609 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
   1610 		return (-1);
   1611 	}
   1612 	if (tstate & MD_INACCESSIBLE) {
   1613 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
   1614 		    meta_getminor(raidnp->dev), raidnp->cname));
   1615 	}
   1616 
   1617 	/* the old device binding is now established */
   1618 	if ((old_dev = oldnp->dev) == NODEV64)
   1619 		return (mdsyserror(ep, ENODEV, oldnp->cname));
   1620 
   1621 
   1622 	/* setup raid info */
   1623 	(void) memset(&params, 0, sizeof (params));
   1624 	params.mnum = meta_getminor(raidnp->dev);
   1625 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
   1626 	params.old_dev = old_dev;
   1627 	params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP;
   1628 
   1629 	if ((strcmp(oldnp->rname, newnp->rname) == 0) &&
   1630 	    (old_dev != new_dev)) {
   1631 		rebind = 1;
   1632 	} else {
   1633 		rebind = 0;
   1634 	}
   1635 	if (rebind) {
   1636 		newnp->dev = new_dev;
   1637 		newnp->start_blk = new_start_blk;
   1638 		newnp->end_blk = new_end_blk;
   1639 	}
   1640 
   1641 	/*
   1642 	 * Save a copy of the devid associated with the new disk, the
   1643 	 * reason is that the checks for the column (meta_check_column)
   1644 	 * via validate_new_raid(), could cause the disk's devid to be
   1645 	 * changed to that of the devid that is currently stored in the
   1646 	 * replica namespace for the disk in question. This devid could
   1647 	 * be stale if we are replacing the disk. The actual function
   1648 	 * that overwrites the devid is dr2drivedesc().
   1649 	 */
   1650 
   1651 	/* don't setup new_devid if no devid's or MN diskset */
   1652 	if (newnp->drivenamep->devid != NULL)
   1653 		new_devidp = Strdup(newnp->drivenamep->devid);
   1654 
   1655 	if (!metaislocalset(sp)) {
   1656 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
   1657 			return (-1);
   1658 		if (MD_MNSET_DESC(sd))
   1659 			new_devidp = NULL;
   1660 	}
   1661 
   1662 	/* check out new (sets up start_blk, has_label, number_blks) */
   1663 	if (validate_new_raid(sp, raidnp, newnp, &params, rebind,
   1664 	    ep) != 0) {
   1665 		Free(new_devidp);
   1666 		return (-1);
   1667 	}
   1668 
   1669 	/*
   1670 	 * Copy back the saved devid.
   1671 	 */
   1672 	Free(newnp->drivenamep->devid);
   1673 	if (new_devidp) {
   1674 		newnp->drivenamep->devid = Strdup(new_devidp);
   1675 		Free(new_devidp);
   1676 	}
   1677 
   1678 	/* store name in namespace, allocate new key */
   1679 	if (add_key_name(sp, newnp, NULL, ep) != 0)
   1680 		return (-1);
   1681 
   1682 	if (rebind && !metaislocalset(sp)) {
   1683 		/*
   1684 		 * We are 'rebind'ing a disk that is in a diskset so as well
   1685 		 * as updating the diskset's namespace the local set needs
   1686 		 * to be updated because it also contains a reference to the
   1687 		 * disk in question.
   1688 		 */
   1689 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET,
   1690 		    newnp->cname, ep);
   1691 
   1692 		if (ret != METADEVADM_SUCCESS) {
   1693 			(void) del_key_name(sp, newnp, &xep);
   1694 			return (-1);
   1695 		}
   1696 	}
   1697 
   1698 	/* replace column */
   1699 	params.new_dev = new_dev;
   1700 	params.new_key = newnp->key;
   1701 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0) {
   1702 		(void) del_key_name(sp, newnp, ep);
   1703 		return (mdstealerror(ep, &params.mde));
   1704 	}
   1705 
   1706 	/* clear cache */
   1707 	meta_invalidate_name(oldnp);
   1708 	meta_invalidate_name(newnp);
   1709 	meta_invalidate_name(raidnp);
   1710 
   1711 	/* let em know */
   1712 	if (options & MDCMD_PRINT) {
   1713 		(void) printf(dgettext(TEXT_DOMAIN,
   1714 		    "%s: device %s is replaced with %s\n"),
   1715 		    raidnp->cname, oldnp->cname, newnp->cname);
   1716 		(void) fflush(stdout);
   1717 	}
   1718 
   1719 	/* return success */
   1720 	return (0);
   1721 }
   1722 
   1723 /*
   1724  * enable raid column
   1725  */
   1726 int
   1727 meta_raid_enable(
   1728 	mdsetname_t		*sp,
   1729 	mdname_t		*raidnp,
   1730 	mdname_t		*colnp,
   1731 	mdcmdopts_t		options,
   1732 	md_error_t		*ep
   1733 )
   1734 {
   1735 	int			force = ((options & MDCMD_FORCE) ? 1 : 0);
   1736 	replace_params_t	params;
   1737 	md_dev64_t		fs_dev, del_dev;
   1738 	int			err = 0;
   1739 	char			*devnm;
   1740 	int			ret;
   1741 	uint_t			tstate;
   1742 
   1743 	/* should have same set */
   1744 	assert(sp != NULL);
   1745 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
   1746 
   1747 	/* check name */
   1748 	if (metachkmeta(raidnp, ep) != 0)
   1749 		return (-1);
   1750 
   1751 	/* get the file_system dev binding */
   1752 	if (meta_getdev(sp, colnp, ep) != 0)
   1753 		return (-1);
   1754 	fs_dev = colnp->dev;
   1755 
   1756 	/* get the raid unit (fill in colnp->dev with metadb version) */
   1757 	meta_invalidate_name(raidnp);
   1758 	if (meta_get_raid(sp, raidnp, ep) == NULL)
   1759 		return (-1);
   1760 
   1761 	/* enabling a component can't work if the raid inaccessible */
   1762 	if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) {
   1763 		return (-1);
   1764 	}
   1765 	if (tstate & MD_INACCESSIBLE) {
   1766 		return (mdmderror(ep, MDE_IN_UNAVAIL_STATE,
   1767 		    meta_getminor(raidnp->dev), raidnp->cname));
   1768 	}
   1769 
   1770 	/* the metadb device binding is now established */
   1771 	if (colnp->dev == NODEV64)
   1772 		return (mdsyserror(ep, ENODEV, colnp->cname));
   1773 
   1774 	/*
   1775 	 * check for the case where the dev_t has changed between the
   1776 	 * filesystem and the metadb.  This is called a rebind, and
   1777 	 * is handled by meta_raid_replace.
   1778 	 */
   1779 	if (fs_dev != colnp->dev) {
   1780 		/*
   1781 		 * Save the devt of mddb version
   1782 		 */
   1783 		del_dev = colnp->dev;
   1784 
   1785 		/* establish file system binding with invalid start/end */
   1786 		colnp->dev = fs_dev;
   1787 		colnp->start_blk = -1;
   1788 		colnp->end_blk = -1;
   1789 		err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep);
   1790 
   1791 		/*
   1792 		 * Don't do it if meta_raid_replace returns an error
   1793 		 */
   1794 		if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD,
   1795 		    del_dev, NULL, NULL, &colnp->key, ep)) != NULL) {
   1796 			(void) del_key_name(sp, colnp, ep);
   1797 			Free(devnm);
   1798 		}
   1799 		return (err);
   1800 	}
   1801 
   1802 	/* setup raid info */
   1803 	(void) memset(&params, 0, sizeof (params));
   1804 	params.mnum = meta_getminor(raidnp->dev);
   1805 	MD_SETDRIVERNAME(&params, MD_RAID, sp->setno);
   1806 	params.old_dev = params.new_dev = colnp->dev;
   1807 	if (force)
   1808 		params.cmd = FORCE_ENABLE_COMP;
   1809 	else
   1810 		params.cmd = ENABLE_COMP;
   1811 
   1812 	/* check it out */
   1813 	if (validate_new_raid(sp, raidnp, colnp, &params, 1, ep) != 0)
   1814 		return (-1);
   1815 
   1816 	/* enable column */
   1817 	if (metaioctl(MD_IOCREPLACE, &params, &params.mde, NULL) != 0)
   1818 		return (mdstealerror(ep, &params.mde));
   1819 
   1820 	/*
   1821 	 * are we dealing with a non-local set? If so need to update the
   1822 	 * local namespace so that the disk record has the correct devid.
   1823 	 */
   1824 	if (!metaislocalset(sp)) {
   1825 		ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname,
   1826 		    ep);
   1827 
   1828 		if (ret != METADEVADM_SUCCESS) {
   1829 			/*
   1830 			 * Failed to update the local set. Nothing to do here
   1831 			 * apart from report the error. The namespace is
   1832 			 * most likely broken and some form of remedial
   1833 			 * recovery is going to be required.
   1834 			 */
   1835 			mde_perror(ep, "");
   1836 			mdclrerror(ep);
   1837 		}
   1838 	}
   1839 
   1840 	/* clear cache */
   1841 	meta_invalidate_name(colnp);
   1842 	meta_invalidate_name(raidnp);
   1843 
   1844 	/* let em know */
   1845 	if (options & MDCMD_PRINT) {
   1846 		(void) printf(dgettext(TEXT_DOMAIN,
   1847 		    "%s: device %s is enabled\n"),
   1848 		    raidnp->cname, colnp->cname);
   1849 		(void) fflush(stdout);
   1850 	}
   1851 
   1852 	/* return success */
   1853 	return (0);
   1854 }
   1855 
   1856 /*
   1857  * check for dups in the raid itself
   1858  */
   1859 static int
   1860 check_twice(
   1861 	md_raid_t	*raidp,
   1862 	uint_t		col,
   1863 	md_error_t	*ep
   1864 )
   1865 {
   1866 	mdname_t	*raidnp = raidp->common.namep;
   1867 	mdname_t	*thisnp;
   1868 	uint_t		c;
   1869 
   1870 	thisnp = raidp->cols.cols_val[col].colnamep;
   1871 	for (c = 0; (c < col); ++c) {
   1872 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[c];
   1873 		mdname_t	*colnp = mdcp->colnamep;
   1874 
   1875 		if (meta_check_overlap(raidnp->cname, thisnp, 0, -1,
   1876 		    colnp, 0, -1, ep) != 0) {
   1877 			return (-1);
   1878 		}
   1879 	}
   1880 	return (0);
   1881 }
   1882 
   1883 /*
   1884  * default raid interlace
   1885  */
   1886 diskaddr_t
   1887 meta_default_raid_interlace(void)
   1888 {
   1889 	diskaddr_t	interlace;
   1890 
   1891 	/* default to 512k, round up if necessary */
   1892 	interlace = btodb(512 * 1024);
   1893 	if (interlace < lbtodb(MININTERLACE))
   1894 		interlace = roundup(MININTERLACE, interlace);
   1895 	return (interlace);
   1896 }
   1897 
   1898 /*
   1899  * convert interlaces
   1900  */
   1901 int
   1902 meta_raid_check_interlace(
   1903 	diskaddr_t	interlace,
   1904 	char		*uname,
   1905 	md_error_t	*ep
   1906 )
   1907 {
   1908 	if ((interlace < btodb(RAID_MIN_INTERLACE)) ||
   1909 	    (interlace > btodb(MAXINTERLACE))) {
   1910 		return (mderror(ep, MDE_BAD_INTERLACE, uname));
   1911 	}
   1912 	return (0);
   1913 }
   1914 
   1915 /*
   1916  * check raid
   1917  */
   1918 int
   1919 meta_check_raid(
   1920 	mdsetname_t	*sp,
   1921 	md_raid_t	*raidp,
   1922 	mdcmdopts_t	options,
   1923 	md_error_t	*ep
   1924 )
   1925 {
   1926 	mdname_t	*raidnp = raidp->common.namep;
   1927 	int		doit = ((options & MDCMD_DOIT) ? 1 : 0);
   1928 	int		updateit = ((options & MDCMD_UPDATE) ? 1 : 0);
   1929 	uint_t		ncol;
   1930 	uint_t		col;
   1931 	minor_t		mnum = meta_getminor(raidnp->dev);
   1932 
   1933 	/* check number */
   1934 	if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) ||
   1935 	    (raidp->orig_ncol > ncol)) {
   1936 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
   1937 	}
   1938 
   1939 	/* compute default interlace */
   1940 	if (raidp->interlace == 0) {
   1941 		raidp->interlace = meta_default_raid_interlace();
   1942 	}
   1943 
   1944 	/* check state */
   1945 	switch (raidp->state) {
   1946 	case RUS_INIT:
   1947 	case RUS_OKAY:
   1948 		break;
   1949 
   1950 	default:
   1951 		return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname));
   1952 	}
   1953 
   1954 	/* check interlace */
   1955 	if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0)
   1956 		return (-1);
   1957 
   1958 	/* check hotspare pool name */
   1959 	if (doit) {
   1960 		if ((raidp->hspnamep != NULL) &&
   1961 		    (metachkhsp(sp, raidp->hspnamep, ep) != 0)) {
   1962 			return (-1);
   1963 		}
   1964 	}
   1965 
   1966 	/* check columns */
   1967 	for (col = 0; (col < ncol); ++col) {
   1968 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
   1969 		mdname_t	*colnp = mdcp->colnamep;
   1970 		diskaddr_t	start_blk, size;
   1971 
   1972 		/* setup column */
   1973 		if (raidp->state == RUS_INIT)
   1974 			mdcp->state = RCS_INIT;
   1975 		else
   1976 			mdcp->state = RCS_OKAY;
   1977 
   1978 		/* check column */
   1979 		if (!updateit) {
   1980 			if (meta_check_column(sp, colnp, ep) != 0)
   1981 				return (-1);
   1982 			if (((start_blk = metagetstart(sp, colnp, ep)) ==
   1983 			    MD_DISKADDR_ERROR) || ((size = metagetsize(colnp,
   1984 			    ep)) == MD_DISKADDR_ERROR)) {
   1985 				return (-1);
   1986 			}
   1987 			if (start_blk >= size)
   1988 				return (mdsyserror(ep, ENOSPC, colnp->cname));
   1989 			size -= start_blk;
   1990 			size = rounddown(size, raidp->interlace);
   1991 			if (size == 0)
   1992 				return (mdsyserror(ep, ENOSPC, colnp->cname));
   1993 		}
   1994 
   1995 		/* check this raid too */
   1996 		if (check_twice(raidp, col, ep) != 0)
   1997 			return (-1);
   1998 	}
   1999 
   2000 	/* return success */
   2001 	return (0);
   2002 }
   2003 
   2004 /*
   2005  * setup raid geometry
   2006  */
   2007 static int
   2008 raid_geom(
   2009 	md_raid_t	*raidp,
   2010 	mr_unit_t	*mr,
   2011 	md_error_t	*ep
   2012 )
   2013 {
   2014 	uint_t		write_reinstruct = 0;
   2015 	uint_t		read_reinstruct = 0;
   2016 	uint_t		round_cyl = 1;
   2017 	uint_t		col;
   2018 	mdgeom_t	*geomp;
   2019 
   2020 	/* get worst reinstructs */
   2021 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
   2022 		md_raidcol_t	*mdcp = &raidp->cols.cols_val[col];
   2023 		mdname_t	*colnp = mdcp->colnamep;
   2024 
   2025 		if ((geomp = metagetgeom(colnp, ep)) == NULL)
   2026 			return (-1);
   2027 		if (geomp->write_reinstruct > write_reinstruct)
   2028 			write_reinstruct = geomp->write_reinstruct;
   2029 		if (geomp->read_reinstruct > read_reinstruct)
   2030 			read_reinstruct = geomp->read_reinstruct;
   2031 	}
   2032 
   2033 	/* setup geometry from first column */
   2034 	assert(raidp->cols.cols_len > 0);
   2035 	if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep,
   2036 	    ep)) == NULL) {
   2037 		return (-1);
   2038 	}
   2039 	if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp,
   2040 	    write_reinstruct, read_reinstruct, round_cyl, ep) != 0)
   2041 		return (-1);
   2042 
   2043 	/* return success */
   2044 	return (0);
   2045 }
   2046 
   2047 int
   2048 meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state)
   2049 {
   2050 	int 	statecnt = 0;
   2051 	int	col;
   2052 
   2053 	for (col = 0; col < mr->un_totalcolumncnt; col++)
   2054 		if (mr->un_column[col].un_devstate & state)
   2055 			statecnt++;
   2056 	return (statecnt);
   2057 }
   2058 /*
   2059  * validate that a raid device being created with the -k flag is a real
   2060  * raid device
   2061  */
   2062 int
   2063 meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr)
   2064 {
   2065 	long long	buf[DEV_BSIZE / sizeof (long long)];
   2066 	raid_pwhdr_t	pwhdr;
   2067 	raid_pwhdr_t	*rpw = &pwhdr;
   2068 	minor_t		mnum;
   2069 	int		col;
   2070 	int		fd;
   2071 
   2072 	for (col = 0; col < mr->un_totalcolumncnt; col++) {
   2073 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
   2074 		mdname_t	*colnp = cp->colnamep;
   2075 
   2076 		if ((fd = open(colnp->rname, O_RDONLY)) < 0)
   2077 			goto error_exit;
   2078 
   2079 		if (lseek64(fd,
   2080 		    (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0)
   2081 			goto error_exit;
   2082 
   2083 		if (read(fd, buf, DEV_BSIZE) < 0)
   2084 			goto error_exit;
   2085 
   2086 		/*
   2087 		 * If our raid device is a 64 bit device, we can accept the
   2088 		 * pw header we just read in.
   2089 		 * Otherwise it's of type raid_pwhdr32_od_t and has to
   2090 		 * be converted.
   2091 		 */
   2092 		if (mr->c.un_revision & MD_64BIT_META_DEV) {
   2093 			rpw = (raid_pwhdr_t *)buf;
   2094 		} else {
   2095 			RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw);
   2096 		}
   2097 
   2098 		if (rpw->rpw_column != col)
   2099 			goto error_exit;
   2100 
   2101 		if (col == 0)
   2102 			mnum = rpw->rpw_unit;
   2103 
   2104 		if (rpw->rpw_unit != mnum)
   2105 			goto error_exit;
   2106 
   2107 		if (rpw->rpw_magic_ext == RAID_PWMAGIC) {
   2108 			/* 4.1 prewrite header */
   2109 			if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) ||
   2110 			    (rpw->rpw_totalcolumncnt !=
   2111 			    mr->un_totalcolumncnt) ||
   2112 			    (rpw->rpw_segsize != mr->un_segsize) ||
   2113 			    (rpw->rpw_segsincolumn != mr->un_segsincolumn) ||
   2114 			    (rpw->rpw_pwcnt != mr->un_pwcnt) ||
   2115 			    (rpw->rpw_pwstart !=
   2116 			    mr->un_column[col].un_pwstart) ||
   2117 			    (rpw->rpw_devstart !=
   2118 			    mr->un_column[col].un_devstart) ||
   2119 			    (rpw->rpw_pwsize != mr->un_pwsize))
   2120 				goto error_exit;
   2121 		}
   2122 		/*
   2123 		 * this is an old prewrite header (4.0) the unit structure
   2124 		 * will have to be trusted.
   2125 		 */
   2126 		(void) close(fd);
   2127 	}
   2128 
   2129 	return (0);
   2130 
   2131 error_exit:
   2132 	(void) close(fd);
   2133 	return (-1);
   2134 }
   2135 
   2136 /*
   2137  * create raid
   2138  */
   2139 int
   2140 meta_create_raid(
   2141 	mdsetname_t	*sp,
   2142 	md_raid_t	*raidp,
   2143 	mdcmdopts_t	options,
   2144 	md_error_t	*ep
   2145 )
   2146 {
   2147 	mdname_t	*raidnp = raidp->common.namep;
   2148 	uint_t		ncol = raidp->cols.cols_len;
   2149 	uint_t		orig_ncol = raidp->orig_ncol;
   2150 	size_t		rdsize;
   2151 	mr_unit_t	*mr;
   2152 	uint_t		col;
   2153 	diskaddr_t	disk_size = 0;
   2154 	uint_t		disk_maxio = 0;
   2155 	uint_t		pwes;
   2156 	diskaddr_t	non_pw_blks, column_size;
   2157 	mdnamelist_t	*keynlp = NULL;
   2158 	md_set_params_t	set_params;
   2159 	int		rval = -1;
   2160 	md_timeval32_t	creation_time;
   2161 	int		create_flag = MD_CRO_32BIT;
   2162 
   2163 	/* validate raid */
   2164 	if (meta_check_raid(sp, raidp, options, ep) != 0)
   2165 		return (-1);
   2166 
   2167 	/* allocate raid unit */
   2168 	rdsize = sizeof (*mr) - sizeof (mr->un_column[0]);
   2169 	rdsize += ncol * sizeof (mr->un_column[0]);
   2170 	mr = Zalloc(rdsize);
   2171 
   2172 	if (meta_gettimeofday(&creation_time) == -1)
   2173 		return (mdsyserror(ep, errno, NULL));
   2174 	/*
   2175 	 * initialize the top level mr_unit_t structure
   2176 	 * setup the unit state to indicate whether to retain
   2177 	 * any data currently on the metadevice or to clear it
   2178 	 */
   2179 	mr->c.un_type = MD_METARAID;
   2180 	MD_SID(mr) = meta_getminor(raidnp->dev);
   2181 	mr->c.un_size = rdsize;
   2182 	mr->un_magic = RAID_UNMAGIC;
   2183 	mr->un_state = raidp->state;
   2184 	mr->un_timestamp = creation_time;
   2185 	mr->un_origcolumncnt = orig_ncol;
   2186 	mr->un_segsize = (uint_t)raidp->interlace;
   2187 	if (raidp->hspnamep != NULL) {
   2188 		mr->un_hsp_id = raidp->hspnamep->hsp;
   2189 	} else {
   2190 		mr->un_hsp_id = MD_HSP_NONE;
   2191 	}
   2192 	/*
   2193 	 * setup original columns, saving start_block and
   2194 	 * finding smallest size and maxio
   2195 	 */
   2196 	for (col = 0; (col < orig_ncol); ++col) {
   2197 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
   2198 		mdname_t	*colnp = cp->colnamep;
   2199 		mr_column_t	*mdc = &mr->un_column[col];
   2200 		diskaddr_t	size;
   2201 		uint_t		maxio;
   2202 		mdcinfo_t	*cinfop;
   2203 
   2204 		/* setup state */
   2205 		mdc->un_devstate = cp->state;
   2206 
   2207 		/* setup creation time */
   2208 		mdc->un_devtimestamp = creation_time;
   2209 
   2210 		/* get start, size, and maxio */
   2211 		if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) ==
   2212 		    MD_DISKADDR_ERROR)
   2213 			goto out;
   2214 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
   2215 			goto out;
   2216 		size -= mdc->un_orig_devstart;
   2217 		if ((cinfop = metagetcinfo(colnp, ep)) == NULL)
   2218 			goto out;
   2219 		maxio = cinfop->maxtransfer;
   2220 
   2221 		if (options & MDCMD_DOIT) {
   2222 			/* store name in namespace */
   2223 			if (add_key_name(sp, colnp, &keynlp, ep) != 0)
   2224 				goto out;
   2225 		}
   2226 
   2227 		/* setup column */
   2228 		mdc->un_orig_key = colnp->key;
   2229 		mdc->un_orig_dev = colnp->dev;
   2230 		mdc->un_dev = mdc->un_orig_dev;
   2231 		mdc->un_pwstart = mdc->un_orig_pwstart;
   2232 		mdc->un_devstart = mdc->un_orig_devstart;
   2233 		mdc->un_alt_dev = NODEV64;
   2234 		mdc->un_alt_pwstart = 0;
   2235 		mdc->un_alt_devstart = 0;
   2236 		mdc->un_hs_id = 0;
   2237 		if (mr->un_state == RUS_INIT)
   2238 			mdc->un_devstate = RCS_INIT;
   2239 		else
   2240 			mdc->un_devstate = RCS_OKAY;
   2241 
   2242 		/* adjust for smallest disk */
   2243 		if (disk_size == 0) {
   2244 			disk_size = size;
   2245 		} else if (size < disk_size) {
   2246 			disk_size = size;
   2247 		}
   2248 		if (disk_maxio == 0) {
   2249 			disk_maxio = maxio;
   2250 		} else if (maxio < disk_maxio) {
   2251 			disk_maxio = maxio;
   2252 		}
   2253 	}
   2254 	assert(col == mr->un_origcolumncnt);
   2255 
   2256 	/*
   2257 	 * before processing any of the attached column(s)
   2258 	 * set up the composition of the metadevice for column
   2259 	 * sizes and pre-write information
   2260 	 */
   2261 	mr->un_maxio = disk_maxio;	/* smallest maxio */
   2262 	mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1));
   2263 	pwes = mr->un_iosize;
   2264 	if (raidp->pw_count)
   2265 		mr->un_pwcnt = raidp->pw_count;
   2266 	else
   2267 		mr->un_pwcnt = PWCNT_MIN;
   2268 	if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) {
   2269 		(void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname);
   2270 		goto out;
   2271 	}
   2272 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
   2273 
   2274 	/* now calculate the number of segments per column */
   2275 	non_pw_blks = disk_size - mr->un_pwsize;	/* smallest disk */
   2276 	if ((mr->un_pwsize > disk_size) ||
   2277 	    (non_pw_blks < (diskaddr_t)mr->un_segsize)) {
   2278 		(void) mdsyserror(ep, ENOSPC, raidnp->cname);
   2279 		goto out;
   2280 	}
   2281 	mr->un_segsincolumn = non_pw_blks / mr->un_segsize;
   2282 	column_size = mr->un_segsize * mr->un_segsincolumn;
   2283 
   2284 	/*
   2285 	 * adjust the pw_cnt, pw_size, to fit into any fragmentation
   2286 	 * left over after column_size has been computed
   2287 	 */
   2288 	mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2);
   2289 	mr->un_pwcnt = mr->un_pwsize / pwes;
   2290 	assert(mr->un_pwcnt >= PWCNT_MIN);
   2291 	mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2);
   2292 	assert((mr->un_pwsize + column_size) <= disk_size);
   2293 
   2294 	/*
   2295 	 * calculate the actual block count available based on the
   2296 	 * segment size and the number of segments per column ...
   2297 	 * ... and adjust for the number of parity segments
   2298 	 */
   2299 	mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1);
   2300 
   2301 	if (raid_geom(raidp, mr, ep) != 0)
   2302 		goto out;
   2303 
   2304 	create_flag = meta_check_devicesize(mr->c.un_total_blocks);
   2305 
   2306 	/*
   2307 	 * now calculate the pre-write offset and update the column
   2308 	 * structures to include the address of the individual pre-write
   2309 	 * areas
   2310 	 */
   2311 	for (col = 0; (col < orig_ncol); ++col) {
   2312 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
   2313 		mdname_t	*colnp = cp->colnamep;
   2314 		mr_column_t	*mdc = &mr->un_column[col];
   2315 		diskaddr_t	size;
   2316 
   2317 		/* get size */
   2318 		if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR)
   2319 			goto out;
   2320 
   2321 		/* adjust start and size by prewrite */
   2322 		mdc->un_orig_pwstart = mdc->un_orig_devstart;
   2323 		mdc->un_orig_devstart += mr->un_pwsize;
   2324 		mdc->un_pwstart = mdc->un_orig_pwstart;
   2325 		mdc->un_devstart = mdc->un_orig_devstart;
   2326 
   2327 		assert(size >= mdc->un_orig_devstart);
   2328 		size -= mdc->un_orig_devstart;
   2329 
   2330 		/* make sure we still have something left */
   2331 		assert(size >= column_size);
   2332 	}
   2333 
   2334 	/* do concat cols */
   2335 	mr->un_totalcolumncnt = mr->un_origcolumncnt;
   2336 	assert(col == mr->un_origcolumncnt);
   2337 	for (col = orig_ncol; (col < ncol); ++col) {
   2338 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
   2339 		mdname_t	*colnp = cp->colnamep;
   2340 		mr_column_t	*mdc = &mr->un_column[col];
   2341 
   2342 		/* attach column */
   2343 		if (attach_raid_col(sp, raidnp, mr, mdc, colnp,
   2344 		    cp->state, &keynlp, options, ep) != 0) {
   2345 			goto out;
   2346 		}
   2347 	}
   2348 	assert(mr->un_totalcolumncnt == ncol);
   2349 
   2350 	/* fill in the size of the raid */
   2351 	if (options & MDCMD_UPDATE) {
   2352 		raidp->common.size = mr->c.un_total_blocks;
   2353 		raidp->column_size = mr->un_segsize * mr->un_segsincolumn;
   2354 	}
   2355 
   2356 	/* if we're not doing anything, return success */
   2357 	if (! (options & MDCMD_DOIT)) {
   2358 		rval = 0;	/* success */
   2359 		goto out;
   2360 	}
   2361 
   2362 	if ((mr->un_state & RUS_OKAY) &&
   2363 	    (meta_raid_valid(raidp, mr) != 0)) {
   2364 		(void) mderror(ep, MDE_RAID_INVALID, raidnp->cname);
   2365 		goto out;
   2366 	}
   2367 
   2368 	/* create raid */
   2369 	(void) memset(&set_params, 0, sizeof (set_params));
   2370 	/* did the user tell us to generate a large device? */
   2371 	if (create_flag == MD_CRO_64BIT) {
   2372 		mr->c.un_revision |= MD_64BIT_META_DEV;
   2373 		set_params.options = MD_CRO_64BIT;
   2374 	} else {
   2375 		mr->c.un_revision &= ~MD_64BIT_META_DEV;
   2376 		set_params.options = MD_CRO_32BIT;
   2377 	}
   2378 	set_params.mnum = MD_SID(mr);
   2379 	set_params.size = mr->c.un_size;
   2380 	set_params.mdp = (uintptr_t)mr;
   2381 	MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum));
   2382 	if (metaioctl(MD_IOCSET, &set_params, &set_params.mde,
   2383 	    raidnp->cname) != 0) {
   2384 		(void) mdstealerror(ep, &set_params.mde);
   2385 		goto out;
   2386 	}
   2387 	rval = 0;	/* success */
   2388 
   2389 	/* cleanup, return success */
   2390 out:
   2391 	Free(mr);
   2392 	if (rval != 0) {
   2393 		(void) del_key_names(sp, keynlp, NULL);
   2394 	}
   2395 	metafreenamelist(keynlp);
   2396 	if ((rval == 0) && (options & MDCMD_DOIT)) {
   2397 		if (invalidate_columns(sp, raidnp, ep) != 0)
   2398 			rval = -1;
   2399 		meta_invalidate_name(raidnp);
   2400 	}
   2401 	return (rval);
   2402 }
   2403 
   2404 /*
   2405  * initialize raid
   2406  * NOTE: this functions is metainit(1m)'s command line parser!
   2407  */
   2408 int
   2409 meta_init_raid(
   2410 	mdsetname_t	**spp,
   2411 	int		argc,
   2412 	char		*argv[],
   2413 	mdcmdopts_t	options,
   2414 	md_error_t	*ep
   2415 )
   2416 {
   2417 	char		*uname = argv[0];
   2418 	mdname_t	*raidnp = NULL;
   2419 	int		old_optind;
   2420 	int		c;
   2421 	md_raid_t	*raidp = NULL;
   2422 	uint_t		ncol, col;
   2423 	int		rval = -1;
   2424 	md_set_desc	*sd;
   2425 
   2426 	/* get raid name */
   2427 	assert(argc > 0);
   2428 	if (argc < 1)
   2429 		goto syntax;
   2430 	if ((raidnp = metaname(spp, uname, META_DEVICE, ep)) == NULL)
   2431 		goto out;
   2432 	assert(*spp != NULL);
   2433 
   2434 	/*
   2435 	 * Raid metadevice not allowed on multi-node diskset.
   2436 	 */
   2437 	if (! metaislocalset(*spp)) {
   2438 		if ((sd = metaget_setdesc(*spp, ep)) == NULL)
   2439 			goto out;
   2440 		if (MD_MNSET_DESC(sd)) {
   2441 			rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname,
   2442 			    argc, argv);
   2443 			goto out;
   2444 		}
   2445 	}
   2446 
   2447 	uname = raidnp->cname;
   2448 	if (metachkmeta(raidnp, ep) != 0)
   2449 		goto out;
   2450 
   2451 	if (!(options & MDCMD_NOLOCK)) {
   2452 		/* grab set lock */
   2453 		if (meta_lock(*spp, TRUE, ep) != 0)
   2454 			goto out;
   2455 
   2456 		if (meta_check_ownership(*spp, ep) != 0)
   2457 			goto out;
   2458 	}
   2459 
   2460 	/* see if it exists already */
   2461 	if (metagetmiscname(raidnp, ep) != NULL) {
   2462 		(void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP,
   2463 		    meta_getminor(raidnp->dev), uname);
   2464 		goto out;
   2465 	} else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) {
   2466 		goto out;
   2467 	} else {
   2468 		mdclrerror(ep);
   2469 	}
   2470 	--argc, ++argv;
   2471 
   2472 	/* grab -r */
   2473 	if ((argc < 1) || (strcmp(argv[0], "-r") != 0))
   2474 		goto syntax;
   2475 	--argc, ++argv;
   2476 
   2477 	/* parse general options */
   2478 	optind = 0;
   2479 	opterr = 0;
   2480 	if (getopt(argc, argv, "") != -1)
   2481 		goto options;
   2482 
   2483 	/* allocate raid */
   2484 	raidp = Zalloc(sizeof (*raidp));
   2485 
   2486 	/* setup common */
   2487 	raidp->common.namep = raidnp;
   2488 	raidp->common.type = MD_METARAID;
   2489 	raidp->state = RUS_INIT;
   2490 
   2491 	/* allocate and parse cols */
   2492 	for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol)
   2493 		;
   2494 	raidp->cols.cols_len = ncol;
   2495 	if (ncol != 0) {
   2496 		raidp->cols.cols_val =
   2497 		    Zalloc(ncol * sizeof (*raidp->cols.cols_val));
   2498 	}
   2499 	for (col = 0; ((argc > 0) && (col < ncol)); ++col) {
   2500 		md_raidcol_t	*mdc = &raidp->cols.cols_val[col];
   2501 		mdname_t	*colnp;
   2502 
   2503 		/* parse column name */
   2504 		if ((colnp = metaname(spp, argv[0], UNKNOWN, ep)) == NULL)
   2505 			goto out;
   2506 		/* check for soft partitions */
   2507 		if (meta_sp_issp(*spp, colnp, ep) != 0) {
   2508 			/* check disks */
   2509 			if (metachkcomp(colnp, ep) != 0)
   2510 				goto out;
   2511 		}
   2512 		mdc->colnamep = colnp;
   2513 		--argc, ++argv;
   2514 	}
   2515 
   2516 	/* parse raid options */
   2517 	old_optind = optind = 0;
   2518 	opterr = 0;
   2519 	while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) {
   2520 		switch (c) {
   2521 		case 'h':
   2522 			if ((raidp->hspnamep = metahspname(spp, optarg,
   2523 			    ep)) == NULL) {
   2524 				goto out;
   2525 			}
   2526 
   2527 			/*
   2528 			 * Get out if the specified hotspare pool really
   2529 			 * doesn't exist.
   2530 			 */
   2531 			if (raidp->hspnamep->hsp == MD_HSP_NONE) {
   2532 				(void) mdhsperror(ep, MDE_INVAL_HSP,
   2533 				    raidp->hspnamep->hsp, optarg);
   2534 				goto out;
   2535 			}
   2536 			break;
   2537 
   2538 		case 'i':
   2539 			if (parse_interlace(uname, optarg, &raidp->interlace,
   2540 			    ep) != 0) {
   2541 				goto out;
   2542 			}
   2543 			if (meta_raid_check_interlace(raidp->interlace,
   2544 			    uname, ep))
   2545 				goto out;
   2546 			break;
   2547 
   2548 		case 'k':
   2549 			raidp->state = RUS_OKAY;
   2550 			break;
   2551 
   2552 		case 'o':
   2553 			if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) ||
   2554 			    ((int)raidp->orig_ncol < 0)) {
   2555 				goto syntax;
   2556 			}
   2557 			if ((raidp->orig_ncol < MD_RAID_MIN) ||
   2558 			    (raidp->orig_ncol > ncol)) {
   2559 				rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname);
   2560 				goto out;
   2561 			}
   2562 			break;
   2563 		case 'w':
   2564 			if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) ||
   2565 			    ((int)raidp->pw_count < 0))
   2566 				goto syntax;
   2567 			if (((int)raidp->pw_count < PWCNT_MIN) ||
   2568 			    ((int)raidp->pw_count > PWCNT_MAX)) {
   2569 				rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname);
   2570 				goto out;
   2571 			}
   2572 			break;
   2573 		default:
   2574 			argc += old_optind;
   2575 			argv -= old_optind;
   2576 			goto options;
   2577 		}
   2578 		old_optind = optind;
   2579 	}
   2580 	argc -= optind;
   2581 	argv += optind;
   2582 
   2583 	/* we should be at the end */
   2584 	if (argc != 0)
   2585 		goto syntax;
   2586 
   2587 	/* default to all original columns */
   2588 	if (raidp->orig_ncol == 0)
   2589 		raidp->orig_ncol = ncol;
   2590 
   2591 	/* create raid */
   2592 	if (meta_create_raid(*spp, raidp, options, ep) != 0)
   2593 		goto out;
   2594 	rval = 0;	/* success */
   2595 
   2596 	/* let em know */
   2597 	if (options & MDCMD_PRINT) {
   2598 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"),
   2599 		    uname);
   2600 		(void) fflush(stdout);
   2601 	}
   2602 	goto out;
   2603 
   2604 	/* syntax error */
   2605 syntax:
   2606 	rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv);
   2607 	goto out;
   2608 
   2609 	/* options error */
   2610 options:
   2611 	rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv);
   2612 	goto out;
   2613 
   2614 	/* cleanup, return error */
   2615 out:
   2616 	if (raidp != NULL)
   2617 		meta_free_raid(raidp);
   2618 	return (rval);
   2619 }
   2620 
   2621 /*
   2622  * reset RAIDs
   2623  */
   2624 int
   2625 meta_raid_reset(
   2626 	mdsetname_t	*sp,
   2627 	mdname_t	*raidnp,
   2628 	mdcmdopts_t	options,
   2629 	md_error_t	*ep
   2630 )
   2631 {
   2632 	md_raid_t	*raidp;
   2633 	int		rval = -1;
   2634 	int		col;
   2635 
   2636 	/* should have same set */
   2637 	assert(sp != NULL);
   2638 	assert((raidnp == NULL) ||
   2639 	    (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))));
   2640 
   2641 	/* reset all raids */
   2642 	if (raidnp == NULL) {
   2643 		mdnamelist_t	*raidnlp = NULL;
   2644 		mdnamelist_t	*p;
   2645 
   2646 		/* for each raid */
   2647 		rval = 0;
   2648 		if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0)
   2649 			return (-1);
   2650 		for (p = raidnlp; (p != NULL); p = p->next) {
   2651 			/* reset RAID */
   2652 			raidnp = p->namep;
   2653 			if (meta_raid_reset(sp, raidnp, options, ep) != 0) {
   2654 				rval = -1;
   2655 				break;
   2656 			}
   2657 		}
   2658 
   2659 		/* cleanup, return success */
   2660 		metafreenamelist(raidnlp);
   2661 		return (rval);
   2662 	}
   2663 
   2664 	/* check name */
   2665 	if (metachkmeta(raidnp, ep) != 0)
   2666 		return (-1);
   2667 
   2668 	/* get unit structure */
   2669 	if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL)
   2670 		return (-1);
   2671 
   2672 	/* make sure nobody owns us */
   2673 	if (MD_HAS_PARENT(raidp->common.parent)) {
   2674 		return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev),
   2675 		    raidnp->cname));
   2676 	}
   2677 
   2678 	/* clear subdevices cache */
   2679 	if (invalidate_columns(sp, raidnp, ep) != 0)
   2680 		return (-1);
   2681 
   2682 	/* clear metadevice */
   2683 	if (meta_reset(sp, raidnp, options, ep) != 0)
   2684 		goto out;
   2685 	rval = 0;	/* success */
   2686 
   2687 	/* let em know */
   2688 	if (options & MDCMD_PRINT) {
   2689 		(void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"),
   2690 		    raidnp->cname);
   2691 		(void) fflush(stdout);
   2692 	}
   2693 
   2694 	/* clear subdevices */
   2695 	if (! (options & MDCMD_RECURSE))
   2696 		goto out;
   2697 
   2698 	for (col = 0; (col < raidp->cols.cols_len); ++col) {
   2699 		md_raidcol_t	*cp = &raidp->cols.cols_val[col];
   2700 		mdname_t	*colnp = cp->colnamep;
   2701 
   2702 		/* only recurse on metadevices */
   2703 		if (! metaismeta(colnp))
   2704 			continue;
   2705 
   2706 		if (meta_reset_by_name(sp, colnp, options, ep) != 0)
   2707 			rval = -1;
   2708 	}
   2709 
   2710 	/* cleanup, return success */
   2711 out:
   2712 	meta_invalidate_name(raidnp);
   2713 	return (rval);
   2714 }
   2715 
   2716 /*
   2717  * reports TRUE if any RAID component is in error
   2718  */
   2719 int
   2720 meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names)
   2721 {
   2722 	mdnamelist_t	*nlp;
   2723 	md_error_t	  status	= mdnullerror;
   2724 	md_error_t	 *ep		= &status;
   2725 	int		  any_errs	= FALSE;
   2726 
   2727 	for (nlp = raid_names; nlp; nlp = nlp->next) {
   2728 		md_raid_t	*raidp;
   2729 
   2730 		if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) {
   2731 			any_errs |= TRUE;
   2732 			goto out;
   2733 		}
   2734 		if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) {
   2735 			any_errs |= TRUE;
   2736 			goto out;
   2737 		}
   2738 	}
   2739 out:
   2740 	if (!mdisok(ep))
   2741 		mdclrerror(ep);
   2742 
   2743 	return (any_errs);
   2744 }
   2745 /*
   2746  * regen parity on a raid
   2747  */
   2748 int
   2749 meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size,
   2750 	md_error_t *ep)
   2751 {
   2752 	char			*miscname;
   2753 	md_resync_ioctl_t	ri;
   2754 
   2755 	/* should have a set */
   2756 	assert(sp != NULL);
   2757 	assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)));
   2758 
   2759 	/* make sure we have a raid */
   2760 	if ((miscname = metagetmiscname(raidnp, ep)) == NULL)
   2761 		return (-1);
   2762 	if (strcmp(miscname, MD_RAID) != 0) {
   2763 		return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev),
   2764 		    raidnp->cname));
   2765 	}
   2766 
   2767 	/* start resync */
   2768 	(void) memset(&ri, 0, sizeof (ri));
   2769 	MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno);
   2770 	ri.ri_mnum = meta_getminor(raidnp->dev);
   2771 	ri.ri_copysize = size;
   2772 	if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0)
   2773 		return (mdstealerror(ep, &ri.mde));
   2774 
   2775 	/* return success */
   2776 	return (0);
   2777 }
   2778 
   2779 int
   2780 meta_raid_check_component(
   2781 	mdsetname_t	*sp,
   2782 	mdname_t	*np,
   2783 	md_dev64_t	mydevs,
   2784 	md_error_t	*ep
   2785 )
   2786 {
   2787 	md_raid_t	 *raid;
   2788 	mdnm_params_t	nm;
   2789 	md_getdevs_params_t	mgd;
   2790 	side_t	sideno;
   2791 	char	*miscname;
   2792 	md_dev64_t	*mydev = NULL;
   2793 	mdkey_t	key;
   2794 	char	*pname = NULL, *t;
   2795 	char	*ctd_name = NULL;
   2796 	char	*devname = NULL;
   2797 	int	len;
   2798 	int	i;
   2799 	int	rval = -1;
   2800 
   2801 	(void) memset(&nm, '\0', sizeof (nm));
   2802 	if ((raid = meta_get_raid_common(sp, np, 0, ep)) == NULL)
   2803 		return (-1);
   2804 
   2805 	if ((miscname = metagetmiscname(np, ep)) == NULL)
   2806 		return (-1);
   2807 
   2808 	sideno = getmyside(sp, ep);
   2809 
   2810 	/* get count of underlying devices */
   2811 
   2812 	(void) memset(&mgd, '\0', sizeof (mgd));
   2813 	MD_SETDRIVERNAME(&mgd, miscname, sp->setno);
   2814 	mgd.mnum = meta_getminor(np->dev);
   2815 	mgd.cnt = 0;
   2816 	mgd.devs = NULL;
   2817 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
   2818 		(void) mdstealerror(ep, &mgd.mde);
   2819 		rval = 0;
   2820 		goto out;
   2821 	} else if (mgd.cnt <= 0) {
   2822 		assert(mgd.cnt >= 0);
   2823 		rval = 0;
   2824 		goto out;
   2825 	}
   2826 
   2827 	/*
   2828 	 * Now get the data from the unit structure.
   2829 	 * The compnamep stuff contains the data from
   2830 	 * the namespace and we need the un_dev
   2831 	 * from the unit structure.
   2832 	 */
   2833 	mydev = Zalloc(sizeof (*mydev) * mgd.cnt);
   2834 	mgd.devs = (uintptr_t)mydev;
   2835 	if (metaioctl(MD_IOCGET_DEVS, &mgd, &mgd.mde, np->cname) != 0) {
   2836 		(void) mdstealerror(ep, &mgd.mde);
   2837 		rval = 0;
   2838 		goto out;
   2839 	} else if (mgd.cnt <= 0) {
   2840 		assert(mgd.cnt >= 0);
   2841 		rval = 0;
   2842 		goto out;
   2843 	}
   2844 
   2845 	for (i = 0; i < raid->orig_ncol; i++) {
   2846 		md_raidcol_t	*colp = &raid->cols.cols_val[i];
   2847 		mdname_t	*compnp = colp->colnamep;
   2848 
   2849 		if (mydevs == mydev[i]) {
   2850 			/* Get the devname from the name space. */
   2851 			if ((devname = meta_getnmentbydev(sp->setno, sideno,
   2852 			    compnp->dev, NULL, NULL, &key, ep)) == NULL) {
   2853 				goto out;
   2854 			}
   2855 
   2856 			if (compnp->dev != meta_getminor(mydev[i])) {
   2857 				/*
   2858 				 * The minor numbers are different. Update
   2859 				 * the namespace with the information from
   2860 				 * the component.
   2861 				 */
   2862 
   2863 				t = strrchr(devname, '/');
   2864 				t++;
   2865 				ctd_name = Strdup(t);
   2866 
   2867 				len = strlen(devname);
   2868 				t = strrchr(devname, '/');
   2869 				t++;
   2870 				pname = Zalloc((len - strlen(t)) + 1);
   2871 				(void) strncpy(pname, devname,
   2872 				    (len - strlen(t)));
   2873 
   2874 				if (meta_update_namespace(sp->setno, sideno,
   2875 				    ctd_name, mydev[i], key, pname,
   2876 				    ep) != 0) {
   2877 					goto out;
   2878 				}
   2879 			}
   2880 			rval = 0;
   2881 			break;
   2882 		} /* End of if (mydevs == mydev[i]) */
   2883 	} /* end of for loop */
   2884 out:
   2885 	if (pname != NULL)
   2886 		Free(pname);
   2887 	if (ctd_name != NULL)
   2888 		Free(ctd_name);
   2889 	if (devname != NULL)
   2890 		Free(devname);
   2891 	if (mydev != NULL)
   2892 		Free(mydev);
   2893 	return (rval);
   2894 }
   2895