Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Just in case we're not in a build environment, make sure that
     29  * TEXT_DOMAIN gets set to something.
     30  */
     31 #if !defined(TEXT_DOMAIN)
     32 #define	TEXT_DOMAIN "SYS_TEST"
     33 #endif
     34 
     35 /*
     36  * Metadevice database interfaces.
     37  */
     38 
     39 #define	MDDB
     40 
     41 #include <meta.h>
     42 #include <sys/lvm/md_mddb.h>
     43 #include <sys/lvm/md_crc.h>
     44 #include <sys/lvm/mdio.h>
     45 #include <string.h>
     46 #include <strings.h>
     47 #include <ctype.h>
     48 
     49 struct svm_daemon {
     50 	char *svmd_name;
     51 	char *svmd_kill_val;
     52 };
     53 
     54 /*
     55  * This is a list of the daemons that are not stopped by the SVM smf(5)
     56  * services. The mdmonitord is started via svc:/system/mdmonitor:default
     57  * but no contract(4) is constructed and so it is not stopped by smf(5).
     58  */
     59 struct svm_daemon svmd_kill_list[] = {
     60 		{"mdmonitord", "HUP"},
     61 		{"mddoors", "KILL"},
     62 	};
     63 
     64 #define	DAEMON_COUNT (sizeof (svmd_kill_list)/ sizeof (struct svm_daemon))
     65 
     66 extern int procsigs(int block, sigset_t *oldsigs, md_error_t *ep);
     67 
     68 /*
     69  * Are the locator blocks for the replicas using devids
     70  */
     71 static int	devid_in_use = FALSE;
     72 
     73 static char *
     74 getlongname(
     75 	struct mddb_config	*c,
     76 	md_error_t		*ep
     77 )
     78 {
     79 	char		*diskname = NULL;
     80 	char		*devid_str;
     81 	devid_nmlist_t	*disklist = NULL;
     82 
     83 	c->c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
     84 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
     85 		(void) mdstealerror(ep, &c->c_mde);
     86 		return (NULL);
     87 	}
     88 
     89 	if (c->c_locator.l_devid_flags & MDDB_DEVID_SZ) {
     90 		c->c_locator.l_devid = (uintptr_t)
     91 		    Malloc(c->c_locator.l_devid_sz);
     92 		c->c_locator.l_devid_flags =
     93 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
     94 	} else {
     95 		(void) mderror(ep, MDE_NODEVID, "");
     96 		goto out;
     97 	}
     98 
     99 	if (metaioctl(MD_DB_ENDDEV, c, &c->c_mde, NULL) != 0) {
    100 		(void) mdstealerror(ep, &c->c_mde);
    101 		goto out;
    102 	}
    103 
    104 	if (c->c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
    105 		(void) mderror(ep, MDE_NODEVID, "");
    106 		goto out;
    107 	}
    108 
    109 	if (metaioctl(MD_DB_GETDEV, c, &c->c_mde, NULL) != 0) {
    110 		(void) mdstealerror(ep, &c->c_mde);
    111 		goto out;
    112 	}
    113 
    114 	if (c->c_locator.l_devid != NULL) {
    115 		if (meta_deviceid_to_nmlist("/dev/dsk",
    116 		    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
    117 		    c->c_locator.l_minor_name, &disklist) != 0) {
    118 			devid_str = devid_str_encode(
    119 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid, NULL);
    120 			(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
    121 			mderrorextra(ep, devid_str);
    122 			if (devid_str != NULL)
    123 				devid_str_free(devid_str);
    124 			goto out;
    125 		}
    126 		diskname = Strdup(disklist[0].devname);
    127 	}
    128 
    129 out:
    130 	if (disklist != NULL)
    131 		devid_free_nmlist(disklist);
    132 
    133 	if (c->c_locator.l_devid != NULL)
    134 		Free((void *)(uintptr_t)c->c_locator.l_devid);
    135 
    136 	return (diskname);
    137 }
    138 
    139 /*
    140  * meta_get_lb_inittime sends a request for the lb_inittime to the kernel
    141  */
    142 md_timeval32_t
    143 meta_get_lb_inittime(
    144 	mdsetname_t	*sp,
    145 	md_error_t	*ep
    146 )
    147 {
    148 	mddb_config_t	c;
    149 
    150 	(void) memset(&c, 0, sizeof (c));
    151 
    152 	/* Fill in setno, setname, and sideno */
    153 	c.c_setno = sp->setno;
    154 
    155 	if (metaioctl(MD_DB_LBINITTIME, &c, &c.c_mde, NULL) != 0) {
    156 		(void) mdstealerror(ep, &c.c_mde);
    157 	}
    158 
    159 	return (c.c_timestamp);
    160 }
    161 
    162 /*
    163  * mkmasterblks writes out the master blocks of the mddb to the replica.
    164  *
    165  * In a MN diskset, this is called by the node that is adding this replica
    166  * to the diskset.
    167  */
    168 
    169 #define	MDDB_VERIFY_SIZE	8192
    170 
    171 static int
    172 mkmasterblks(
    173 	mdsetname_t	*sp,
    174 	mdname_t	*np,
    175 	int		fd,
    176 	daddr_t		firstblk,
    177 	int		dbsize,
    178 	md_timeval32_t	inittime,
    179 	md_error_t	*ep
    180 )
    181 {
    182 	int		consecutive;
    183 	md_timeval32_t	tp;
    184 	struct mddb_mb	*mb;
    185 	char		*buffer;
    186 	int		iosize;
    187 	md_set_desc	*sd;
    188 	int		mn_set = 0;
    189 	daddr_t		startblk;
    190 	int		cnt;
    191 	ddi_devid_t	devid;
    192 
    193 	if (! metaislocalset(sp)) {
    194 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
    195 			return (-1);
    196 
    197 		if (MD_MNSET_DESC(sd)) {
    198 			mn_set = 1;		/* Used later */
    199 		}
    200 	}
    201 
    202 	/*
    203 	 * Loop to verify the entire mddb region on disk is read/writable.
    204 	 * buffer is used to write/read in at most MDDB_VERIFY_SIZE block
    205 	 * chunks.
    206 	 *
    207 	 * A side-effect of this loop is to zero out the entire mddb region
    208 	 */
    209 	if ((buffer = Zalloc(MDDB_VERIFY_SIZE * DEV_BSIZE)) == NULL)
    210 		return (mdsyserror(ep, ENOMEM, np->rname));
    211 
    212 	startblk = firstblk;
    213 	for (cnt = dbsize; cnt > 0; cnt -= consecutive) {
    214 
    215 		if (cnt > MDDB_VERIFY_SIZE)
    216 			consecutive = MDDB_VERIFY_SIZE;
    217 		else
    218 			consecutive = cnt;
    219 
    220 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
    221 			Free(buffer);
    222 			return (mdsyserror(ep, errno, np->rname));
    223 		}
    224 
    225 		iosize = DEV_BSIZE * consecutive;
    226 		if (write(fd, buffer, iosize) != iosize) {
    227 			Free(buffer);
    228 			return (mdsyserror(ep, errno, np->rname));
    229 		}
    230 
    231 		if (lseek(fd, (off_t)(startblk * DEV_BSIZE), SEEK_SET) < 0) {
    232 			Free(buffer);
    233 			return (mdsyserror(ep, errno, np->rname));
    234 		}
    235 
    236 		if (read(fd, buffer, iosize) != iosize) {
    237 			Free(buffer);
    238 			return (mdsyserror(ep, errno, np->rname));
    239 		}
    240 
    241 		startblk += consecutive;
    242 	}
    243 
    244 	Free(buffer);
    245 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
    246 		return (mdsyserror(ep, ENOMEM, np->rname));
    247 
    248 	if (meta_gettimeofday(&tp) == -1) {
    249 		Free(mb);
    250 		return (mdsyserror(ep, errno, np->rname));
    251 	}
    252 
    253 	mb->mb_magic = MDDB_MAGIC_MB;
    254 	/*
    255 	 * If a MN diskset, set master block revision for a MN set.
    256 	 * Even though the master block structure is no different
    257 	 * for a MN set, setting the revision field to a different
    258 	 * number keeps any pre-MN_diskset code from accessing
    259 	 * this diskset.  It also allows for an early determination
    260 	 * of a MN diskset when reading in from disk so that the
    261 	 * proper size locator block and locator names structure
    262 	 * can be read in thus saving time on diskset startup.
    263 	 */
    264 	if (mn_set)
    265 		mb->mb_revision = MDDB_REV_MNMB;
    266 	else
    267 		mb->mb_revision = MDDB_REV_MB;
    268 	mb->mb_timestamp = tp;
    269 	mb->mb_setno = sp->setno;
    270 	mb->mb_blkcnt = dbsize - 1;
    271 	mb->mb_blkno = firstblk;
    272 	mb->mb_nextblk = 0;
    273 
    274 	mb->mb_blkmap.m_firstblk = firstblk + 1;
    275 	mb->mb_blkmap.m_consecutive = dbsize - 1;
    276 	if (! metaislocalset(sp)) {
    277 		mb->mb_setcreatetime = inittime;
    278 	}
    279 
    280 	/*
    281 	 * We try to save the disks device ID into the remaining bytes in
    282 	 * the master block. The saved devid is used to provide a mapping
    283 	 * between this disk's devid and the devid stored into the master
    284 	 * block. This allows the disk image to be self-identifying
    285 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
    286 	 * when we try to import these disks on the remote copied image.
    287 	 * If we cannot save the disks device ID onto the master block that is
    288 	 * ok.  The disk is just not self-identifying and won't be importable
    289 	 * in the remote copy scenario.
    290 	 */
    291 	if (devid_get(fd, &devid) == 0) {
    292 		size_t len;
    293 
    294 		len = devid_sizeof(devid);
    295 		if (len <= DEV_BSIZE - sizeof (*mb)) {
    296 			/* there is enough space to store the devid */
    297 			mb->mb_devid_magic = MDDB_MAGIC_DE;
    298 			mb->mb_devid_len = len;
    299 			(void) memcpy(mb->mb_devid, devid, len);
    300 		}
    301 		devid_free(devid);
    302 	}
    303 
    304 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
    305 	    (crc_skip_t *)NULL);
    306 
    307 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
    308 		Free(mb);
    309 		return (mdsyserror(ep, errno, np->rname));
    310 	}
    311 
    312 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
    313 		Free(mb);
    314 		return (mdsyserror(ep, errno, np->rname));
    315 	}
    316 
    317 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0) {
    318 		Free(mb);
    319 		return (mdsyserror(ep, errno, np->rname));
    320 	}
    321 
    322 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE) {
    323 		Free(mb);
    324 		return (mdsyserror(ep, errno, np->rname));
    325 	}
    326 
    327 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
    328 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) {
    329 		Free(mb);
    330 		return (mdmddberror(ep, MDE_NOTVERIFIED,
    331 		    meta_getminor(np->dev), sp->setno, 0, np->rname));
    332 	}
    333 
    334 	Free(mb);
    335 	return (0);
    336 }
    337 
    338 void
    339 meta_mkdummymaster(
    340 	mdsetname_t	*sp,
    341 	int		fd,
    342 	daddr_t		firstblk
    343 )
    344 {
    345 	md_timeval32_t	tp;
    346 	struct mddb_mb	*mb;
    347 	ddi_devid_t	devid;
    348 	md_set_desc	*sd;
    349 	md_error_t	ep = mdnullerror;
    350 	md_timeval32_t	inittime;
    351 
    352 	/*
    353 	 * No dummy master blocks are written for a MN diskset since devids
    354 	 * are not supported in MN disksets.
    355 	 */
    356 	if (! metaislocalset(sp)) {
    357 		if ((sd = metaget_setdesc(sp, &ep)) == NULL)
    358 			return;
    359 
    360 		if (MD_MNSET_DESC(sd))
    361 			return;
    362 	}
    363 
    364 	if ((mb = Zalloc(DEV_BSIZE)) == NULL)
    365 		return;
    366 
    367 	mb->mb_magic = MDDB_MAGIC_DU;
    368 	mb->mb_revision = MDDB_REV_MB;
    369 	mb->mb_setno = sp->setno;
    370 	inittime = meta_get_lb_inittime(sp, &ep);
    371 	mb->mb_setcreatetime = inittime;
    372 
    373 	if (meta_gettimeofday(&tp) != -1)
    374 		mb->mb_timestamp = tp;
    375 
    376 	/*
    377 	 * We try to save the disks device ID into the remaining bytes in
    378 	 * the master block.  This allows the disk image to be self-identifying
    379 	 * if it gets copied (e.g. SNDR, True Copy, etc.).  This is used
    380 	 * when we try to import these disks on the remote copied image.
    381 	 * If we cannot save the disks device ID onto the master block that is
    382 	 * ok.  The disk is just not self-identifying and won't be importable
    383 	 * in the remote copy scenario.
    384 	 */
    385 	if (devid_get(fd, &devid) == 0) {
    386 		int len;
    387 
    388 		len = devid_sizeof(devid);
    389 		if (len <= DEV_BSIZE - sizeof (*mb)) {
    390 			/* there is enough space to store the devid */
    391 			mb->mb_devid_magic = MDDB_MAGIC_DE;
    392 			mb->mb_devid_len = len;
    393 			(void) memcpy(mb->mb_devid, (char *)devid, len);
    394 		}
    395 		devid_free(devid);
    396 	}
    397 
    398 	crcgen((uchar_t *)mb, (uint_t *)&mb->mb_checksum, (uint_t)DEV_BSIZE,
    399 	    (crc_skip_t *)NULL);
    400 
    401 	/*
    402 	 * If any of these operations fail, we need to inform the
    403 	 * user that the disk won't be self identifying. When support
    404 	 * for importing remotely replicated disksets is added, we
    405 	 * want to add the error messages here.
    406 	 */
    407 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
    408 		goto out;
    409 
    410 	if (write(fd, mb, DEV_BSIZE) != DEV_BSIZE)
    411 		goto out;
    412 
    413 	if (lseek(fd, (off_t)(firstblk * DEV_BSIZE), SEEK_SET) < 0)
    414 		goto out;
    415 
    416 	if (read(fd, mb, DEV_BSIZE) != DEV_BSIZE)
    417 		goto out;
    418 
    419 	if (crcchk((uchar_t *)mb, (uint_t *)&mb->mb_checksum,
    420 	    (uint_t)DEV_BSIZE, (crc_skip_t *)NULL))
    421 		goto out;
    422 
    423 out:
    424 	Free(mb);
    425 }
    426 
    427 static int
    428 buildconf(mdsetname_t *sp, md_error_t *ep)
    429 {
    430 	md_replicalist_t	*rlp = NULL;
    431 	md_replicalist_t	*rl;
    432 	FILE			*cfp = NULL;
    433 	FILE			*mfp = NULL;
    434 	struct stat		sbuf;
    435 	int			rval = 0;
    436 	int			in_miniroot = 0;
    437 	char			line[MDDB_BOOTLIST_MAX_LEN];
    438 	char			*tname = NULL;
    439 
    440 	/* get list of local replicas */
    441 	if (! metaislocalset(sp))
    442 		return (0);
    443 
    444 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
    445 		return (-1);
    446 
    447 	/* open tempfile, copy permissions of original file */
    448 	if ((cfp = fopen(META_DBCONFTMP, "w+")) == NULL) {
    449 		/*
    450 		 * On the miniroot tmp files must be created in /var/tmp.
    451 		 * If we get a EROFS error, we assume that we are in the
    452 		 * miniroot.
    453 		 */
    454 		if (errno != EROFS)
    455 			goto error;
    456 		in_miniroot = 1;
    457 		errno = 0;
    458 		tname = tempnam("/var/tmp", "slvm_");
    459 		if (tname == NULL && errno == EROFS) {
    460 			/*
    461 			 * If we are booted on a read-only root because
    462 			 * of mddb quorum problems we don't want to emit
    463 			 * any scary error messages.
    464 			 */
    465 			errno = 0;
    466 			goto out;
    467 		}
    468 
    469 		/* open tempfile, copy permissions of original file */
    470 		if ((cfp = fopen(tname, "w+")) == NULL)
    471 			goto error;
    472 	}
    473 	if (stat(META_DBCONF, &sbuf) == 0) {
    474 		if (fchmod(fileno(cfp), (sbuf.st_mode & 0666)) != 0)
    475 			goto error;
    476 		if (fchown(fileno(cfp), sbuf.st_uid, sbuf.st_gid) != 0)
    477 			goto error;
    478 	}
    479 
    480 	/* print header */
    481 	if (fprintf(cfp, "#metadevice database location file ") == EOF)
    482 		goto error;
    483 	if (fprintf(cfp, "do not hand edit\n") < 0)
    484 		goto error;
    485 	if (fprintf(cfp,
    486 	    "#driver\tminor_t\tdaddr_t\tdevice id\tchecksum\n") < 0)
    487 		goto error;
    488 
    489 	/* dump replicas */
    490 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
    491 		md_replica_t	*r = rl->rl_repp;
    492 		int		checksum = 42;
    493 		int		i;
    494 		char		*devidp;
    495 		minor_t		min;
    496 
    497 		devidp = devid_str_encode(r->r_devid, r->r_minor_name);
    498 		/* If devid code can't encode devidp - skip entry */
    499 		if (devidp == NULL) {
    500 			continue;
    501 		}
    502 
    503 		/* compute checksum */
    504 		for (i = 0; ((r->r_driver_name[i] != '\0') &&
    505 		    (i < sizeof (r->r_driver_name))); i++) {
    506 			checksum -= r->r_driver_name[i];
    507 		}
    508 		min = meta_getminor(r->r_namep->dev);
    509 		checksum -= min;
    510 		checksum -= r->r_blkno;
    511 
    512 		for (i = 0; i < strlen(devidp); i++) {
    513 			checksum -= devidp[i];
    514 		}
    515 		/* print info */
    516 		if (fprintf(cfp, "%s\t%lu\t%ld\t%s\t%d\n",
    517 		    r->r_driver_name, min, r->r_blkno, devidp, checksum) < 0) {
    518 			goto error;
    519 		}
    520 
    521 		devid_str_free(devidp);
    522 	}
    523 
    524 	/* close and rename to real file */
    525 	if (fflush(cfp) != 0)
    526 		goto error;
    527 	if (fsync(fileno(cfp)) != 0)
    528 		goto error;
    529 	if (fclose(cfp) != 0) {
    530 		cfp = NULL;
    531 		goto error;
    532 	}
    533 	cfp = NULL;
    534 
    535 	/*
    536 	 * Renames don't work in the miniroot since tmpfiles are
    537 	 * created in /var/tmp. Hence we copy the data out.
    538 	 */
    539 
    540 	if (! in_miniroot) {
    541 		if (rename(META_DBCONFTMP, META_DBCONF) != 0)
    542 			goto error;
    543 	} else {
    544 		if ((cfp = fopen(tname, "r")) == NULL)
    545 			goto error;
    546 		if ((mfp = fopen(META_DBCONF, "w+")) == NULL)
    547 			goto error;
    548 		while (fgets(line, MDDB_BOOTLIST_MAX_LEN, cfp) != NULL) {
    549 			if (fputs(line, mfp) == NULL)
    550 				goto error;
    551 		}
    552 		(void) fclose(cfp);
    553 		cfp = NULL;
    554 		if (fflush(mfp) != 0)
    555 			goto error;
    556 		if (fsync(fileno(mfp)) != 0)
    557 			goto error;
    558 		if (fclose(mfp) != 0) {
    559 			mfp = NULL;
    560 			goto error;
    561 		}
    562 		/* delete the tempfile */
    563 		(void) unlink(tname);
    564 	}
    565 	/* success */
    566 	rval = 0;
    567 	goto out;
    568 
    569 	/* tempfile error */
    570 error:
    571 	rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
    572 	    mdsyserror(ep, errno, META_DBCONFTMP);
    573 
    574 
    575 	/* cleanup, return success */
    576 out:
    577 	if (rlp != NULL)
    578 		metafreereplicalist(rlp);
    579 	if ((cfp != NULL) && (fclose(cfp) != 0) && (rval == 0)) {
    580 		rval = (in_miniroot) ? mdsyserror(ep, errno, tname):
    581 		    mdsyserror(ep, errno, META_DBCONFTMP);
    582 	}
    583 	free(tname);
    584 	return (rval);
    585 }
    586 
    587 /*
    588  * check replica for dev
    589  */
    590 static int
    591 in_replica(
    592 	mdsetname_t	*sp,
    593 	md_replica_t	*rp,
    594 	mdname_t	*np,
    595 	diskaddr_t	slblk,
    596 	diskaddr_t	nblks,
    597 	md_error_t	*ep
    598 )
    599 {
    600 	mdname_t	*repnp = rp->r_namep;
    601 	diskaddr_t	rep_sblk = rp->r_blkno;
    602 	diskaddr_t	rep_nblks = rp->r_nblk;
    603 
    604 	/* should be in the same set */
    605 	assert(sp != NULL);
    606 
    607 	/* if error in master block, assume whole partition */
    608 	if ((rep_sblk == MD_DISKADDR_ERROR) ||
    609 	    (rep_nblks == MD_DISKADDR_ERROR)) {
    610 		rep_sblk = 0;
    611 		rep_nblks = MD_DISKADDR_ERROR;
    612 	}
    613 
    614 	/* check overlap */
    615 	if (meta_check_overlap(
    616 	    MDB_STR, np, slblk, nblks, repnp, rep_sblk, rep_nblks, ep) != 0) {
    617 		return (-1);
    618 	}
    619 
    620 	/* return success */
    621 	return (0);
    622 }
    623 
    624 /*
    625  * check to see if we're in a replica
    626  */
    627 int
    628 meta_check_inreplica(
    629 	mdsetname_t		*sp,
    630 	mdname_t		*np,
    631 	diskaddr_t		slblk,
    632 	diskaddr_t		nblks,
    633 	md_error_t		*ep
    634 )
    635 {
    636 	md_replicalist_t	*rlp = NULL;
    637 	md_replicalist_t	*rl;
    638 	int			rval = 0;
    639 
    640 	/* should have a set */
    641 	assert(sp != NULL);
    642 
    643 	/* for each replica */
    644 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0)
    645 		return (-1);
    646 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
    647 		md_replica_t	*rp = rl->rl_repp;
    648 
    649 		/* check replica */
    650 		if (in_replica(sp, rp, np, slblk, nblks, ep) != 0) {
    651 			rval = -1;
    652 			break;
    653 		}
    654 	}
    655 
    656 	/* cleanup, return success */
    657 	metafreereplicalist(rlp);
    658 	return (rval);
    659 }
    660 
    661 /*
    662  * check replica
    663  */
    664 int
    665 meta_check_replica(
    666 	mdsetname_t	*sp,		/* set to check against */
    667 	mdname_t	*np,		/* component to check against */
    668 	mdchkopts_t	options,	/* option flags */
    669 	diskaddr_t	slblk,		/* start logical block */
    670 	diskaddr_t	nblks,		/* number of blocks (-1,rest of them) */
    671 	md_error_t	*ep		/* error packet */
    672 )
    673 {
    674 	mdchkopts_t	chkoptions = MDCHK_ALLOW_REPSLICE;
    675 
    676 	/* make sure we have a disk */
    677 	if (metachkcomp(np, ep) != 0)
    678 		return (-1);
    679 
    680 	/* check to ensure that it is not already in use */
    681 	if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) {
    682 		return (-1);
    683 	}
    684 
    685 	if (options & MDCHK_ALLOW_NODBS)
    686 		return (0);
    687 
    688 	if (options & MDCHK_DRVINSET)
    689 		return (0);
    690 
    691 	/* make sure it is in the set */
    692 	if (meta_check_inset(sp, np, ep) != 0)
    693 		return (-1);
    694 
    695 	/* make sure its not in a metadevice */
    696 	if (meta_check_inmeta(sp, np, chkoptions, slblk, nblks, ep) != 0)
    697 		return (-1);
    698 
    699 	/* return success */
    700 	return (0);
    701 }
    702 
    703 static int
    704 update_dbinfo_on_drives(
    705 	mdsetname_t	*sp,
    706 	md_drive_desc	*dd,
    707 	int		set_locked,
    708 	int		force,
    709 	md_error_t	*ep
    710 )
    711 {
    712 	md_set_desc		*sd;
    713 	int			i;
    714 	md_setkey_t		*cl_sk;
    715 	int			rval = 0;
    716 	md_mnnode_desc		*nd;
    717 
    718 	if ((sd = metaget_setdesc(sp, ep)) == NULL)
    719 		return (-1);
    720 
    721 	if (! set_locked) {
    722 		if (MD_MNSET_DESC(sd)) {
    723 			md_error_t xep = mdnullerror;
    724 			sigset_t sigs;
    725 			/* Make sure we are blocking all signals */
    726 			if (procsigs(TRUE, &sigs, &xep) < 0)
    727 				mdclrerror(&xep);
    728 
    729 			nd = sd->sd_nodelist;
    730 			while (nd) {
    731 				if (force && strcmp(nd->nd_nodename,
    732 				    mynode()) != 0) {
    733 					nd = nd->nd_next;
    734 					continue;
    735 				}
    736 
    737 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
    738 					nd = nd->nd_next;
    739 					continue;
    740 				}
    741 
    742 				if (clnt_lock_set(nd->nd_nodename, sp, ep))
    743 					return (-1);
    744 				nd = nd->nd_next;
    745 			}
    746 		} else {
    747 			for (i = 0; i < MD_MAXSIDES; i++) {
    748 				/* Skip empty slots */
    749 				if (sd->sd_nodes[i][0] == '\0')
    750 					continue;
    751 
    752 				if (force && strcmp(sd->sd_nodes[i],
    753 				    mynode()) != 0)
    754 					continue;
    755 
    756 				if (clnt_lock_set(sd->sd_nodes[i], sp, ep))
    757 					return (-1);
    758 			}
    759 		}
    760 	}
    761 
    762 	if (MD_MNSET_DESC(sd)) {
    763 		nd = sd->sd_nodelist;
    764 		while (nd) {
    765 			if (force && strcmp(nd->nd_nodename, mynode()) != 0) {
    766 				nd = nd->nd_next;
    767 				continue;
    768 			}
    769 
    770 			if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
    771 				nd = nd->nd_next;
    772 				continue;
    773 			}
    774 
    775 			if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep)
    776 			    == -1) {
    777 				rval = -1;
    778 				break;
    779 			}
    780 			nd = nd->nd_next;
    781 		}
    782 	} else {
    783 		for (i = 0; i < MD_MAXSIDES; i++) {
    784 			/* Skip empty slots */
    785 			if (sd->sd_nodes[i][0] == '\0')
    786 				continue;
    787 
    788 			if (force && strcmp(sd->sd_nodes[i], mynode()) != 0)
    789 				continue;
    790 
    791 			if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep)
    792 			    == -1) {
    793 				rval = -1;
    794 				break;
    795 			}
    796 		}
    797 	}
    798 
    799 	if (! set_locked) {
    800 		cl_sk = cl_get_setkey(sp->setno, sp->setname);
    801 		if (MD_MNSET_DESC(sd)) {
    802 			nd = sd->sd_nodelist;
    803 			while (nd) {
    804 				if (force &&
    805 				    strcmp(nd->nd_nodename, mynode()) != 0) {
    806 					nd = nd->nd_next;
    807 					continue;
    808 				}
    809 
    810 				if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) {
    811 					nd = nd->nd_next;
    812 					continue;
    813 				}
    814 
    815 				if (clnt_unlock_set(nd->nd_nodename, cl_sk,
    816 				    ep)) {
    817 					rval = -1;
    818 					break;
    819 				}
    820 				nd = nd->nd_next;
    821 			}
    822 		} else {
    823 			for (i = 0; i < MD_MAXSIDES; i++) {
    824 				/* Skip empty slots */
    825 				if (sd->sd_nodes[i][0] == '\0')
    826 					continue;
    827 
    828 				if (force &&
    829 				    strcmp(sd->sd_nodes[i], mynode()) != 0)
    830 					continue;
    831 
    832 				if (clnt_unlock_set(sd->sd_nodes[i], cl_sk,
    833 				    ep)) {
    834 					rval = -1;
    835 					break;
    836 				}
    837 			}
    838 
    839 		}
    840 		cl_set_setkey(NULL);
    841 	}
    842 
    843 	return (rval);
    844 }
    845 
    846 int
    847 meta_db_addsidenms(
    848 	mdsetname_t	*sp,
    849 	mdname_t	*np,
    850 	daddr_t		blkno,
    851 	int		bcast,
    852 	md_error_t	*ep
    853 )
    854 {
    855 	side_t		sideno;
    856 	char		*bname = NULL;
    857 	char		*dname = NULL;
    858 	minor_t		mnum;
    859 	mddb_config_t	c;
    860 	int		done;
    861 	int		rval = 0;
    862 	md_set_desc	*sd;
    863 
    864 	sideno = MD_SIDEWILD;
    865 	/*CONSTCOND*/
    866 	while (1) {
    867 		if (bname != NULL) {
    868 			Free(bname);
    869 			bname = NULL;
    870 		}
    871 		if (dname != NULL) {
    872 			Free(dname);
    873 			dname = NULL;
    874 		}
    875 		if ((done = meta_getnextside_devinfo(sp, np->bname,
    876 		    &sideno, &bname, &dname, &mnum, ep)) == -1) {
    877 			rval = -1;
    878 			break;
    879 		}
    880 
    881 		if (done == 0)
    882 			break;
    883 
    884 		if (! metaislocalset(sp)) {
    885 			if ((sd = metaget_setdesc(sp, ep)) == NULL) {
    886 				rval = -1;
    887 				break;
    888 			}
    889 		}
    890 
    891 		/*
    892 		 * Send addsidenms to all nodes using rpc.mdcommd if
    893 		 * sidename is being added to MN diskset.
    894 		 *
    895 		 *   It's ok to broadcast this call to other nodes.
    896 		 *
    897 		 *   Note: The broadcast to other nodes isn't needed during
    898 		 *   the addition of the first mddbs to the set since the
    899 		 *   other nodes haven't been joined to the set yet.  All
    900 		 *   nodes in a MN diskset are (implicitly) joined to the set
    901 		 *   on the addition of the first mddb.
    902 		 */
    903 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
    904 		    (bcast == DB_ADDSIDENMS_BCAST)) {
    905 			md_mn_result_t			*resultp = NULL;
    906 			md_mn_msg_meta_db_newside_t	db_ns;
    907 			int				send_rval;
    908 
    909 			db_ns.msg_l_dev = np->dev;
    910 			db_ns.msg_sideno = sideno;
    911 			db_ns.msg_blkno = blkno;
    912 			(void) strncpy(db_ns.msg_dname, dname,
    913 			    sizeof (db_ns.msg_dname));
    914 			(void) splitname(np->bname, &db_ns.msg_splitname);
    915 			db_ns.msg_mnum = mnum;
    916 
    917 			/* Set devid to NULL until devids are supported */
    918 			db_ns.msg_devid[0] = NULL;
    919 
    920 			/*
    921 			 * If reconfig cycle has been started, this node is
    922 			 * stuck in in the return step until this command has
    923 			 * completed.  If mdcommd is suspended, ask
    924 			 * send_message to fail (instead of retrying)
    925 			 * so that metaset can finish allowing the reconfig
    926 			 * cycle to proceed.
    927 			 */
    928 			send_rval = mdmn_send_message(sp->setno,
    929 			    MD_MN_MSG_META_DB_NEWSIDE, MD_MSGF_FAIL_ON_SUSPEND |
    930 			    MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ns,
    931 			    sizeof (md_mn_msg_meta_db_newside_t),
    932 			    &resultp, ep);
    933 			if (send_rval != 0) {
    934 				rval = -1;
    935 				if (resultp == NULL)
    936 					(void) mddserror(ep,
    937 					    MDE_DS_COMMD_SEND_FAIL,
    938 					    sp->setno, NULL, NULL,
    939 					    sp->setname);
    940 				else {
    941 					(void) mdstealerror(ep,
    942 					    &(resultp->mmr_ep));
    943 					if (mdisok(ep)) {
    944 						(void) mddserror(ep,
    945 						    MDE_DS_COMMD_SEND_FAIL,
    946 						    sp->setno, NULL, NULL,
    947 						    sp->setname);
    948 					}
    949 					free_result(resultp);
    950 				}
    951 				break;
    952 			}
    953 			if (resultp)
    954 				free_result(resultp);
    955 		} else {
    956 			/*
    957 			 * Let this side's  device name, minor # and driver name
    958 			 * be known to the database replica.
    959 			 */
    960 			(void) memset(&c, 0, sizeof (c));
    961 
    962 			/* Fill in device/replica info */
    963 			c.c_locator.l_dev = meta_cmpldev(np->dev);
    964 			c.c_locator.l_blkno = blkno;
    965 			(void) strncpy(c.c_locator.l_driver, dname,
    966 			    sizeof (c.c_locator.l_driver));
    967 			if (splitname(np->bname, &c.c_devname) ==
    968 			    METASPLIT_LONGDISKNAME && devid_in_use == FALSE) {
    969 				rval = mddeverror(ep, MDE_DISKNAMETOOLONG,
    970 				    NODEV64, np->rname);
    971 				break;
    972 			}
    973 
    974 			c.c_locator.l_mnum = mnum;
    975 
    976 			/* Fill in setno, setname, and sideno */
    977 			c.c_setno = sp->setno;
    978 			(void) strncpy(c.c_setname, sp->setname,
    979 			    sizeof (c.c_setname));
    980 			c.c_sideno = sideno;
    981 
    982 			/*
    983 			 * Don't need device id information from this ioctl
    984 			 * Kernel determines device id from dev_t, which
    985 			 * is just what this code would do.
    986 			 */
    987 			c.c_locator.l_devid = (uint64_t)0;
    988 			c.c_locator.l_devid_flags = 0;
    989 
    990 			if (metaioctl(MD_DB_NEWSIDE, &c, &c.c_mde, NULL) != 0) {
    991 				rval = mdstealerror(ep, &c.c_mde);
    992 				break;
    993 			}
    994 		}
    995 	}
    996 
    997 	/* cleanup, return success */
    998 	if (bname != NULL) {
    999 		Free(bname);
   1000 		bname = NULL;
   1001 	}
   1002 	if (dname != NULL) {
   1003 		Free(dname);
   1004 		dname = NULL;
   1005 	}
   1006 	return (rval);
   1007 }
   1008 
   1009 
   1010 int
   1011 meta_db_delsidenm(
   1012 	mdsetname_t	*sp,
   1013 	side_t		sideno,
   1014 	mdname_t	*np,
   1015 	daddr_t		blkno,
   1016 	md_error_t	*ep
   1017 )
   1018 {
   1019 	mddb_config_t	c;
   1020 	md_set_desc	*sd;
   1021 
   1022 	if (! metaislocalset(sp)) {
   1023 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
   1024 			return (-1);
   1025 	}
   1026 	/* Use rpc.mdcommd to delete mddb side from all nodes */
   1027 	if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
   1028 	    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
   1029 		md_mn_result_t			*resultp = NULL;
   1030 		md_mn_msg_meta_db_delside_t	db_ds;
   1031 		int				send_rval;
   1032 
   1033 		db_ds.msg_l_dev = np->dev;
   1034 		db_ds.msg_blkno = blkno;
   1035 		db_ds.msg_sideno = sideno;
   1036 
   1037 		/* Set devid to NULL until devids are supported */
   1038 		db_ds.msg_devid[0] = NULL;
   1039 
   1040 		/*
   1041 		 * If reconfig cycle has been started, this node is
   1042 		 * stuck in in the return step until this command has
   1043 		 * completed.  If mdcommd is suspended, ask
   1044 		 * send_message to fail (instead of retrying)
   1045 		 * so that metaset can finish allowing the reconfig
   1046 		 * cycle to proceed.
   1047 		 */
   1048 		send_rval = mdmn_send_message(sp->setno,
   1049 		    MD_MN_MSG_META_DB_DELSIDE, MD_MSGF_FAIL_ON_SUSPEND |
   1050 		    MD_MSGF_PANIC_WHEN_INCONSISTENT, 0, (char *)&db_ds,
   1051 		    sizeof (md_mn_msg_meta_db_delside_t), &resultp, ep);
   1052 		if (send_rval != 0) {
   1053 			if (resultp == NULL)
   1054 				(void) mddserror(ep,
   1055 				    MDE_DS_COMMD_SEND_FAIL,
   1056 				    sp->setno, NULL, NULL,
   1057 				    sp->setname);
   1058 			else {
   1059 				(void) mdstealerror(ep, &(resultp->mmr_ep));
   1060 				if (mdisok(ep)) {
   1061 					(void) mddserror(ep,
   1062 					    MDE_DS_COMMD_SEND_FAIL,
   1063 					    sp->setno, NULL, NULL,
   1064 					    sp->setname);
   1065 				}
   1066 				free_result(resultp);
   1067 			}
   1068 			return (-1);
   1069 		}
   1070 		if (resultp)
   1071 			free_result(resultp);
   1072 
   1073 	} else {
   1074 		/*
   1075 		 * Let this side's  device name, minor # and driver name
   1076 		 * be known to the database replica.
   1077 		 */
   1078 		(void) memset(&c, 0, sizeof (c));
   1079 
   1080 		/* Fill in device/replica info */
   1081 		c.c_locator.l_dev = meta_cmpldev(np->dev);
   1082 		c.c_locator.l_blkno = blkno;
   1083 
   1084 		/* Fill in setno, setname, and sideno */
   1085 		c.c_setno = sp->setno;
   1086 		(void) strcpy(c.c_setname, sp->setname);
   1087 		c.c_sideno = sideno;
   1088 
   1089 		/*
   1090 		 * Don't need device id information from this ioctl
   1091 		 * Kernel determines device id from dev_t, which
   1092 		 * is just what this code would do.
   1093 		 */
   1094 		c.c_locator.l_devid = (uint64_t)0;
   1095 		c.c_locator.l_devid_flags = 0;
   1096 
   1097 		if (metaioctl(MD_DB_DELSIDE, &c, &c.c_mde, NULL) != 0)
   1098 			return (mdstealerror(ep, &c.c_mde));
   1099 	}
   1100 	return (0);
   1101 }
   1102 
   1103 
   1104 static int
   1105 mdnamesareunique(mdnamelist_t *nlp, md_error_t *ep)
   1106 {
   1107 	mdnamelist_t		*dnp1, *dnp2;
   1108 
   1109 	for (dnp1 = nlp; dnp1 != NULL; dnp1 = dnp1->next) {
   1110 		for (dnp2 = dnp1->next; dnp2 != NULL; dnp2 = dnp2->next) {
   1111 			if (strcmp(dnp1->namep->cname, dnp2->namep->cname) == 0)
   1112 				return (mderror(ep, MDE_DUPDRIVE,
   1113 				    dnp1->namep->cname));
   1114 		}
   1115 	}
   1116 	return (0);
   1117 }
   1118 
   1119 
   1120 /*
   1121  * Return 1 if files are different, else return 0
   1122  */
   1123 static int
   1124 filediff(char *tsname, char *sname)
   1125 {
   1126 	int ret = 1, fd;
   1127 	size_t tsz, sz;
   1128 	struct stat sbuf;
   1129 	char *tbuf, *buf;
   1130 
   1131 	if (stat(tsname, &sbuf) != 0)
   1132 		return (1);
   1133 	tsz = sbuf.st_size;
   1134 	if (stat(sname, &sbuf) != 0)
   1135 		return (1);
   1136 	sz = sbuf.st_size;
   1137 	if (tsz != sz)
   1138 		return (1);
   1139 
   1140 	/* allocate memory and read both files into buffer */
   1141 	tbuf = malloc(tsz);
   1142 	buf = malloc(sz);
   1143 	if (tbuf == NULL || buf == NULL)
   1144 		goto out;
   1145 
   1146 	fd = open(tsname, O_RDONLY);
   1147 	if (fd == -1)
   1148 		goto out;
   1149 	sz = read(fd, tbuf, tsz);
   1150 	(void) close(fd);
   1151 	if (sz != tsz)
   1152 		goto out;
   1153 
   1154 	fd = open(sname, O_RDONLY);
   1155 	if (fd == -1)
   1156 		goto out;
   1157 	sz = read(fd, buf, tsz);
   1158 	(void) close(fd);
   1159 	if (sz != tsz)
   1160 		goto out;
   1161 
   1162 	/* compare content */
   1163 	ret = bcmp(tbuf, buf, tsz);
   1164 out:
   1165 	if (tbuf)
   1166 		free(tbuf);
   1167 	if (buf)
   1168 		free(buf);
   1169 	return (ret);
   1170 }
   1171 
   1172 /*
   1173  * patch md.conf file with mddb locations
   1174  */
   1175 int
   1176 meta_db_patch(
   1177 	char		*sname,		/* system file name */
   1178 	char		*cname,		/* mddb.cf file name */
   1179 	int		patch,		/* patching locally */
   1180 	md_error_t	*ep
   1181 )
   1182 {
   1183 	char		*tsname = NULL;
   1184 	char		line[MDDB_BOOTLIST_MAX_LEN];
   1185 	FILE		*tsfp = NULL;
   1186 	FILE		*mfp = NULL;
   1187 	int		rval = -1;
   1188 
   1189 	/* check names */
   1190 	if (sname == NULL) {
   1191 		if (patch)
   1192 			sname = "md.conf";
   1193 		else
   1194 			sname = "/kernel/drv/md.conf";
   1195 	}
   1196 	if (cname == NULL)
   1197 		cname = META_DBCONF;
   1198 
   1199 	/*
   1200 	 * edit file
   1201 	 */
   1202 	if (meta_systemfile_copy(sname, 0, 1, 1, 0, &tsname, &tsfp, ep) != 0) {
   1203 		if (mdissyserror(ep, EROFS)) {
   1204 			/*
   1205 			 * If we are booted on a read-only root because
   1206 			 * of mddb quorum problems we don't want to emit
   1207 			 * any scary error messages.
   1208 			 */
   1209 			mdclrerror(ep);
   1210 			rval = 0;
   1211 		}
   1212 		goto out;
   1213 	}
   1214 
   1215 	if (meta_systemfile_append_mddb(cname, sname, tsname, tsfp, 1, 0, 0,
   1216 	    ep) != 0)
   1217 		goto out;
   1218 
   1219 	/* if file content is identical, skip rename */
   1220 	if (filediff(tsname, sname) == 0) {
   1221 		rval = 0;
   1222 		goto out;
   1223 	}
   1224 
   1225 	if ((fflush(tsfp) != 0) || (fsync(fileno(tsfp)) != 0) ||
   1226 	    (fclose(tsfp) != 0)) {
   1227 		(void) mdsyserror(ep, errno, tsname);
   1228 		goto out;
   1229 	}
   1230 
   1231 	tsfp = NULL;
   1232 
   1233 	/*
   1234 	 * rename file. If we get a Cross Device error then it
   1235 	 * is because we are in the miniroot.
   1236 	 */
   1237 	if (rename(tsname, sname) != 0 && errno != EXDEV) {
   1238 		(void) mdsyserror(ep, errno, sname);
   1239 		goto out;
   1240 	}
   1241 
   1242 	if (errno == EXDEV) {
   1243 		if ((tsfp = fopen(tsname, "r")) == NULL)
   1244 			goto out;
   1245 		if ((mfp = fopen(sname, "w+")) == NULL)
   1246 			goto out;
   1247 		while (fgets(line, sizeof (line), tsfp) != NULL) {
   1248 			if (fputs(line, mfp) == NULL)
   1249 				goto out;
   1250 		}
   1251 		(void) fclose(tsfp);
   1252 		tsfp = NULL;
   1253 		if (fflush(mfp) != 0)
   1254 			goto out;
   1255 		if (fsync(fileno(mfp)) != 0)
   1256 			goto out;
   1257 		if (fclose(mfp) != 0) {
   1258 			mfp = NULL;
   1259 			goto out;
   1260 		}
   1261 	}
   1262 
   1263 	Free(tsname);
   1264 	tsname = NULL;
   1265 	rval = 0;
   1266 
   1267 	/* cleanup, return error */
   1268 out:
   1269 	if (tsfp != NULL)
   1270 		(void) fclose(tsfp);
   1271 	if (tsname != NULL) {
   1272 		(void) unlink(tsname);
   1273 		Free(tsname);
   1274 	}
   1275 	return (rval);
   1276 }
   1277 
   1278 /*
   1279  * Add replicas to set.  This happens as a result of:
   1280  *	- metadb [-s set_name] -a
   1281  *	- metaset -s set_name -a disk
   1282  *	- metaset -s set_name -d disk	 (causes a rebalance of mddbs)
   1283  *	- metaset -s set_name -b
   1284  *
   1285  * For a local set, this routine is run on the local set host.
   1286  *
   1287  * For a traditional diskset, this routine is run on the node that
   1288  * is running the metaset command.
   1289  *
   1290  * For a multinode diskset, this routine is run by the node that is
   1291  * running the metaset command.  If this is the first mddb added to
   1292  * the MN diskset, then no communication is made to other nodes via commd
   1293  * since the other nodes will be in-sync with respect to the mddbs when
   1294  * those other nodes join the set and snarf in the newly created mddb.
   1295  * If this is not the first mddb added to the MN diskset, then this
   1296  * attach command is sent to all of the nodes using commd.  This keeps
   1297  * the nodes in-sync.
   1298  */
   1299 int
   1300 meta_db_attach(
   1301 	mdsetname_t		*sp,
   1302 	mdnamelist_t		*db_nlp,
   1303 	mdchkopts_t		options,
   1304 	md_timeval32_t		*timeval,
   1305 	int			dbcnt,
   1306 	int			dbsize,
   1307 	char			*sysfilename,
   1308 	md_error_t		*ep
   1309 )
   1310 {
   1311 	struct mddb_config	c;
   1312 	mdnamelist_t		*nlp;
   1313 	mdname_t		*np;
   1314 	md_drive_desc		*dd = NULL;
   1315 	md_drive_desc		*p;
   1316 	int			i;
   1317 	int			fd;
   1318 	side_t			sideno;
   1319 	daddr_t			blkno;
   1320 	int			replicacount = 0;
   1321 	int			start_svmdaemons = 0;
   1322 	int			rval = 0;
   1323 	md_error_t		status = mdnullerror;
   1324 	md_set_desc		*sd;
   1325 	int			stale_bool = FALSE;
   1326 	int			flags;
   1327 	int			firstmddb = 1;
   1328 	md_timeval32_t		inittime = {0, 0};
   1329 
   1330 	/*
   1331 	 * Error if we don't get some work to do.
   1332 	 */
   1333 	if (db_nlp == NULL)
   1334 		return (mdsyserror(ep, EINVAL, NULL));
   1335 
   1336 	if (mdnamesareunique(db_nlp, ep) != 0)
   1337 		return (-1);
   1338 	(void) memset(&c, 0, sizeof (c));
   1339 	c.c_id = 0;
   1340 	c.c_setno = sp->setno;
   1341 
   1342 	/* Don't need device id information from this ioctl */
   1343 	c.c_locator.l_devid = (uint64_t)0;
   1344 	c.c_locator.l_devid_flags = 0;
   1345 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
   1346 		if (metaislocalset(sp)) {
   1347 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID))
   1348 				mdclrerror(&c.c_mde);
   1349 			else if (! mdismddberror(&c.c_mde, MDE_DB_NODB) ||
   1350 			    (! (options & MDCHK_ALLOW_NODBS)))
   1351 				return (mdstealerror(ep, &c.c_mde));
   1352 		} else {
   1353 			if (! mdismddberror(&c.c_mde, MDE_DB_NOTOWNER))
   1354 				return (mdstealerror(ep, &c.c_mde));
   1355 		}
   1356 		mdclrerror(&c.c_mde);
   1357 	}
   1358 	/*
   1359 	 * Is current set STALE?
   1360 	 */
   1361 	if (c.c_flags & MDDB_C_STALE) {
   1362 		stale_bool = TRUE;
   1363 	}
   1364 
   1365 	assert(db_nlp != NULL);
   1366 
   1367 	/* if these are the first replicas then the SVM daemons need to run */
   1368 	if (c.c_dbcnt == 0)
   1369 		start_svmdaemons = 1;
   1370 
   1371 	/*
   1372 	 * check to see if we will go over the total possible number
   1373 	 * of data bases
   1374 	 */
   1375 	nlp = db_nlp;
   1376 	while (nlp) {
   1377 		replicacount += dbcnt;
   1378 		nlp = nlp->next;
   1379 	}
   1380 
   1381 	if ((replicacount + c.c_dbcnt) > c.c_dbmax)
   1382 		return (mdmddberror(ep, MDE_TOOMANY_REPLICAS, NODEV32,
   1383 		    sp->setno, c.c_dbcnt + replicacount, NULL));
   1384 
   1385 	/*
   1386 	 * go through and check to make sure all locations specified
   1387 	 * are legal also pick out driver name;
   1388 	 */
   1389 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1390 		diskaddr_t devsize;
   1391 
   1392 		np = nlp->namep;
   1393 
   1394 		if (! metaislocalset(sp)) {
   1395 			uint_t	partno;
   1396 			uint_t	rep_partno;
   1397 			mddrivename_t	*dnp = np->drivenamep;
   1398 
   1399 			/*
   1400 			 * make sure that non-local database replicas
   1401 			 * are always on the replica slice.
   1402 			 */
   1403 			if (meta_replicaslice(dnp,
   1404 			    &rep_partno, ep) != 0)
   1405 				return (-1);
   1406 			if (metagetvtoc(np, FALSE, &partno, ep) == NULL)
   1407 				return (-1);
   1408 			if (partno != rep_partno)
   1409 				return (mddeverror(ep, MDE_REPCOMP_ONLY,
   1410 				    np->dev, sp->setname));
   1411 		}
   1412 
   1413 		if (meta_check_replica(sp, np, options, 0, (dbcnt * dbsize),
   1414 		    ep)) {
   1415 			return (-1);
   1416 		}
   1417 
   1418 		if ((devsize = metagetsize(np, ep)) == -1)
   1419 			return (-1);
   1420 
   1421 		if (devsize < (diskaddr_t)((dbcnt * dbsize) + 16))
   1422 			return (mdmddberror(ep, MDE_REPLICA_TOOSMALL,
   1423 			    meta_getminor(np->dev), sp->setno, devsize,
   1424 			    np->cname));
   1425 	}
   1426 
   1427 	/*
   1428 	 * If first disk in set we don't have lb_inittime yet for use as
   1429 	 * mb_setcreatetime so don't go looking for it. WE'll come back
   1430 	 * later and update after the locator block has been created.
   1431 	 * If this isn't the first disk in the set, we have a locator
   1432 	 * block and thus we have lb_inittime. Set mb_setcreatetime to
   1433 	 * lb_inittime.
   1434 	 */
   1435 	if (! metaislocalset(sp)) {
   1436 		if (c.c_dbcnt != 0) {
   1437 			firstmddb = 0;
   1438 			inittime = meta_get_lb_inittime(sp, ep);
   1439 		}
   1440 	}
   1441 
   1442 	/*
   1443 	 * go through and write all master blocks
   1444 	 */
   1445 
   1446 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1447 		np = nlp->namep;
   1448 
   1449 		if ((fd = open(np->rname, O_RDWR)) < 0)
   1450 			return (mdsyserror(ep, errno, np->rname));
   1451 
   1452 		for (i = 0; i < dbcnt; i++) {
   1453 			if (mkmasterblks(sp, np, fd, (i * dbsize + 16), dbsize,
   1454 			    inittime, ep)) {
   1455 				(void) close(fd);
   1456 				return (-1);
   1457 			}
   1458 		}
   1459 		(void) close(fd);
   1460 	}
   1461 
   1462 	if ((sideno = getmyside(sp, ep)) == MD_SIDEWILD)
   1463 		return (-1);
   1464 
   1465 	if (! metaislocalset(sp)) {
   1466 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
   1467 		if (! mdisok(ep))
   1468 			return (-1);
   1469 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
   1470 			return (-1);
   1471 
   1472 	}
   1473 
   1474 	/*
   1475 	 * go through and tell kernel to add them
   1476 	 */
   1477 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1478 		mdcinfo_t	*cinfo;
   1479 
   1480 		np = nlp->namep;
   1481 
   1482 		if ((cinfo = metagetcinfo(np, ep)) == NULL) {
   1483 			rval = -1;
   1484 			goto out;
   1485 		}
   1486 
   1487 		/*
   1488 		 * If mddb is being added to MN diskset and there already
   1489 		 * exists a valid mddb in the set (which equates to this
   1490 		 * node being an owner of the set) then use rpc.mdcommd
   1491 		 * mechanism to add mddb(s) so that all nodes stay in sync.
   1492 		 * If set is stale, don't log the message since rpc.mdcommd
   1493 		 * can't write the message to the mddb.
   1494 		 *
   1495 		 * Otherwise, just add mddb to this node.
   1496 		 */
   1497 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
   1498 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
   1499 			md_mn_result_t			*resultp = NULL;
   1500 			md_mn_msg_meta_db_attach_t	attach;
   1501 			int 				send_rval;
   1502 
   1503 			/*
   1504 			 * In a scenario where new replicas had been added on
   1505 			 * the master, and then all of the old replicas failed
   1506 			 * before the slaves had knowledge of the new replicas,
   1507 			 * the slaves are unable to re-parse in the mddb
   1508 			 * from the new replicas since the slaves have no
   1509 			 * knowledge of the new replicas.  The following
   1510 			 * algorithm solves this problem:
   1511 			 * 	- META_DB_ATTACH message generates submsgs
   1512 			 * 		- BLOCK parse (master)
   1513 			 * 		- MDDB_ATTACH new replicas
   1514 			 * 		- UNBLOCK parse (master) causing parse
   1515 			 *		information to be sent from master
   1516 			 *		to slaves at a higher class than the
   1517 			 *		unblock so the parse message will
   1518 			 *		reach slaves before unblock message.
   1519 			 */
   1520 			attach.msg_l_dev = np->dev;
   1521 			attach.msg_cnt = dbcnt;
   1522 			attach.msg_dbsize = dbsize;
   1523 			(void) strncpy(attach.msg_dname, cinfo->dname,
   1524 			    sizeof (attach.msg_dname));
   1525 			(void) splitname(np->bname, &attach.msg_splitname);
   1526 			attach.msg_options = options;
   1527 
   1528 			/* Set devid to NULL until devids are supported */
   1529 			attach.msg_devid[0] = NULL;
   1530 
   1531 			/*
   1532 			 * If reconfig cycle has been started, this node is
   1533 			 * stuck in in the return step until this command has
   1534 			 * completed.  If mdcommd is suspended, ask
   1535 			 * send_message to fail (instead of retrying)
   1536 			 * so that metaset can finish allowing the reconfig
   1537 			 * cycle to proceed.
   1538 			 */
   1539 			flags = MD_MSGF_FAIL_ON_SUSPEND;
   1540 			if (stale_bool == TRUE)
   1541 				flags |= MD_MSGF_NO_LOG;
   1542 			send_rval = mdmn_send_message(sp->setno,
   1543 			    MD_MN_MSG_META_DB_ATTACH,
   1544 			    flags, 0, (char *)&attach,
   1545 			    sizeof (md_mn_msg_meta_db_attach_t),
   1546 			    &resultp, ep);
   1547 			if (send_rval != 0) {
   1548 				rval = -1;
   1549 				if (resultp == NULL)
   1550 					(void) mddserror(ep,
   1551 					    MDE_DS_COMMD_SEND_FAIL,
   1552 					    sp->setno, NULL, NULL,
   1553 					    sp->setname);
   1554 				else {
   1555 					(void) mdstealerror(ep,
   1556 					    &(resultp->mmr_ep));
   1557 					if (mdisok(ep)) {
   1558 						(void) mddserror(ep,
   1559 						    MDE_DS_COMMD_SEND_FAIL,
   1560 						    sp->setno, NULL, NULL,
   1561 						    sp->setname);
   1562 					}
   1563 					free_result(resultp);
   1564 				}
   1565 				goto out;
   1566 			}
   1567 			if (resultp)
   1568 				free_result(resultp);
   1569 		} else {
   1570 			/* Adding mddb(s) to just this node */
   1571 			for (i = 0; i < dbcnt; i++) {
   1572 				(void) memset(&c, 0, sizeof (c));
   1573 				/* Fill in device/replica info */
   1574 				c.c_locator.l_dev = meta_cmpldev(np->dev);
   1575 				c.c_locator.l_blkno = i * dbsize + 16;
   1576 				blkno = c.c_locator.l_blkno;
   1577 				(void) strncpy(c.c_locator.l_driver,
   1578 				    cinfo->dname,
   1579 				    sizeof (c.c_locator.l_driver));
   1580 
   1581 				if (splitname(np->bname, &c.c_devname) ==
   1582 				    METASPLIT_LONGDISKNAME && devid_in_use ==
   1583 				    FALSE) {
   1584 					rval = mddeverror(ep,
   1585 					    MDE_DISKNAMETOOLONG,
   1586 					    NODEV64, np->rname);
   1587 					goto out;
   1588 				}
   1589 
   1590 				c.c_locator.l_mnum = meta_getminor(np->dev);
   1591 
   1592 				/* Fill in setno, setname, and sideno */
   1593 				c.c_setno = sp->setno;
   1594 				if (! metaislocalset(sp)) {
   1595 					if (MD_MNSET_DESC(sd)) {
   1596 						c.c_multi_node = 1;
   1597 					}
   1598 				}
   1599 				(void) strcpy(c.c_setname, sp->setname);
   1600 				c.c_sideno = sideno;
   1601 
   1602 				/*
   1603 				 * Don't need device id information from this
   1604 				 * ioctl Kernel determines device id from
   1605 				 * dev_t, which is just what this code would do.
   1606 				 */
   1607 				c.c_locator.l_devid = (uint64_t)0;
   1608 				c.c_locator.l_devid_flags = 0;
   1609 
   1610 				if (timeval != NULL)
   1611 					c.c_timestamp = *timeval;
   1612 
   1613 				if (setup_med_cfg(sp, &c,
   1614 				    (options & MDCHK_SET_FORCE), ep)) {
   1615 					rval = -1;
   1616 					goto out;
   1617 				}
   1618 
   1619 				if (metaioctl(MD_DB_NEWDEV, &c, &c.c_mde,
   1620 				    NULL) != 0) {
   1621 					rval = mdstealerror(ep, &c.c_mde);
   1622 					goto out;
   1623 				}
   1624 				/*
   1625 				 * This is either a traditional diskset OR this
   1626 				 * is the first replica added to a MN diskset.
   1627 				 * In either case, set broadcast to NO_BCAST so
   1628 				 * that message won't go through rpc.mdcommd.
   1629 				 * If this is a traditional diskset, the bcast
   1630 				 * flag is ignored since traditional disksets
   1631 				 * don't use the rpc.mdcommd.
   1632 				 */
   1633 				if (meta_db_addsidenms(sp, np, blkno,
   1634 				    DB_ADDSIDENMS_NO_BCAST, ep))
   1635 					goto out;
   1636 			}
   1637 		}
   1638 		if (! metaislocalset(sp)) {
   1639 			/* update the dbcnt and size in dd */
   1640 			for (p = dd; p != NULL; p = p->dd_next)
   1641 				if (p->dd_dnp == np->drivenamep) {
   1642 					p->dd_dbcnt = dbcnt;
   1643 					p->dd_dbsize  = dbsize;
   1644 					break;
   1645 				}
   1646 		}
   1647 
   1648 		/*
   1649 		 * If this was the first addition of disks to the
   1650 		 * diskset you now need to update the mb_setcreatetime
   1651 		 * which needed lb_inittime which wasn't there until now.
   1652 		 */
   1653 		if (firstmddb) {
   1654 			if (meta_update_mb(sp, dd, ep) != 0) {
   1655 				return (-1);
   1656 			}
   1657 		}
   1658 		(void) close(fd);
   1659 	}
   1660 
   1661 out:
   1662 	if (metaislocalset(sp)) {
   1663 
   1664 		/* everything looks fine. Start mdmonitord */
   1665 		if (rval == 0 && start_svmdaemons == 1) {
   1666 			if (meta_smf_enable(META_SMF_CORE, &status) == -1) {
   1667 				mde_perror(&status, "");
   1668 				mdclrerror(&status);
   1669 			}
   1670 		}
   1671 
   1672 		if (buildconf(sp, &status)) {
   1673 			/* Don't mask any previous errors */
   1674 			if (rval == 0)
   1675 				rval = mdstealerror(ep, &status);
   1676 			return (rval);
   1677 		}
   1678 
   1679 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
   1680 			/* Don't mask any previous errors */
   1681 			if (rval == 0)
   1682 				rval = mdstealerror(ep, &status);
   1683 		}
   1684 	} else {
   1685 		if (update_dbinfo_on_drives(sp, dd,
   1686 		    (options & MDCHK_SET_LOCKED),
   1687 		    (options & MDCHK_SET_FORCE),
   1688 		    &status)) {
   1689 			/* Don't mask any previous errors */
   1690 			if (rval == 0)
   1691 				rval = mdstealerror(ep, &status);
   1692 			else
   1693 				mdclrerror(&status);
   1694 		}
   1695 		metafreedrivedesc(&dd);
   1696 	}
   1697 	/*
   1698 	 * For MN disksets that already had already had nodes joined
   1699 	 * before the attach of this mddb(s), the name invalidation is
   1700 	 * done by the commd handler routine.  Otherwise, if this
   1701 	 * is the first attach of a MN diskset mddb, the invalidation
   1702 	 * must be done here since the first attach cannot be sent
   1703 	 * via the commd since there are no nodes joined to the set yet.
   1704 	 */
   1705 	if ((metaislocalset(sp)) || (!MD_MNSET_DESC(sd)) ||
   1706 	    (MD_MNSET_DESC(sd) &&
   1707 	    (!(sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)))) {
   1708 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
   1709 			meta_invalidate_name(nlp->namep);
   1710 		}
   1711 	}
   1712 	return (rval);
   1713 }
   1714 
   1715 /*
   1716  * deletelist_length
   1717  *
   1718  *	return the number of slices that have been specified for deletion
   1719  *	on the metadb command line.  This does not calculate the number
   1720  *	of replicas because there may be multiple replicas per slice.
   1721  */
   1722 static int
   1723 deletelist_length(mdnamelist_t *db_nlp)
   1724 {
   1725 
   1726 	mdnamelist_t		*nlp;
   1727 	int			list_length = 0;
   1728 
   1729 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1730 		list_length++;
   1731 	}
   1732 
   1733 	return (list_length);
   1734 }
   1735 
   1736 static int
   1737 in_deletelist(char *devname, mdnamelist_t *db_nlp)
   1738 {
   1739 
   1740 	mdnamelist_t		*nlp;
   1741 	mdname_t		*np;
   1742 	int			index = 0;
   1743 
   1744 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1745 		np = nlp->namep;
   1746 
   1747 		if (strcmp(devname, np->bname) == 0)
   1748 			return (index);
   1749 		index++;
   1750 	}
   1751 
   1752 	return (-1);
   1753 }
   1754 
   1755 /*
   1756  * Delete replicas from set.  This happens as a result of:
   1757  *	- metadb [-s set_name] -d
   1758  *	- metaset -s set_name -a disk	(causes a rebalance of mddbs)
   1759  *	- metaset -s set_name -d disk
   1760  *	- metaset -s set_name -b
   1761  *
   1762  * For a local set, this routine is run on the local set host.
   1763  *
   1764  * For a traditional diskset, this routine is run on the node that
   1765  * is running the metaset command.
   1766  *
   1767  * For a multinode diskset, this routine is run by the node that is
   1768  * running the metaset command.  This detach routine is sent to all
   1769  * of the joined nodes in the diskset using commd.  This keeps
   1770  * the nodes in-sync.
   1771  */
   1772 int
   1773 meta_db_detach(
   1774 	mdsetname_t		*sp,
   1775 	mdnamelist_t		*db_nlp,
   1776 	mdforceopts_t		force_option,
   1777 	char			*sysfilename,
   1778 	md_error_t		*ep
   1779 )
   1780 {
   1781 	struct mddb_config	c;
   1782 	mdnamelist_t		*nlp;
   1783 	mdname_t		*np;
   1784 	md_drive_desc		*dd = NULL;
   1785 	md_drive_desc		*p;
   1786 	int			replicacount;
   1787 	int			replica_delete_count;
   1788 	int			nr_replica_slices;
   1789 	int			i;
   1790 	int			stop_svmdaemons = 0;
   1791 	int			rval = 0;
   1792 	int			index;
   1793 	int			valid_replicas_nottodelete = 0;
   1794 	int			invalid_replicas_nottodelete = 0;
   1795 	int			invalid_replicas_todelete = 0;
   1796 	int			errored = 0;
   1797 	int			*tag_array;
   1798 	int			fd = -1;
   1799 	md_error_t		status = mdnullerror;
   1800 	md_set_desc		*sd;
   1801 	int			stale_bool = FALSE;
   1802 	int			flags;
   1803 
   1804 	/*
   1805 	 * Error if we don't get some work to do.
   1806 	 */
   1807 	if (db_nlp == NULL)
   1808 		return (mdsyserror(ep, EINVAL, NULL));
   1809 
   1810 	if (mdnamesareunique(db_nlp, ep) != 0)
   1811 		return (-1);
   1812 
   1813 	(void) memset(&c, 0, sizeof (c));
   1814 	c.c_id = 0;
   1815 	c.c_setno = sp->setno;
   1816 
   1817 	/* Don't need device id information from this ioctl */
   1818 	c.c_locator.l_devid = (uint64_t)0;
   1819 	c.c_locator.l_devid_flags = 0;
   1820 
   1821 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
   1822 		return (mdstealerror(ep, &c.c_mde));
   1823 
   1824 	/*
   1825 	 * Is current set STALE?
   1826 	 */
   1827 	if (c.c_flags & MDDB_C_STALE) {
   1828 		stale_bool = TRUE;
   1829 	}
   1830 
   1831 	replicacount = c.c_dbcnt;
   1832 
   1833 	assert(db_nlp != NULL);
   1834 
   1835 	/*
   1836 	 * go through and gather how many data bases are on each
   1837 	 * device specified.
   1838 	 */
   1839 
   1840 	nr_replica_slices = deletelist_length(db_nlp);
   1841 	tag_array = (int *)calloc(nr_replica_slices, sizeof (int));
   1842 
   1843 	replica_delete_count = 0;
   1844 	for (i = 0; i < replicacount; i++) {
   1845 		char	*devname;
   1846 		int	found = 0;
   1847 
   1848 		c.c_id = i;
   1849 
   1850 		/* Don't need device id information from this ioctl */
   1851 		c.c_locator.l_devid = (uint64_t)0;
   1852 		c.c_locator.l_devid_flags = 0;
   1853 
   1854 		if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
   1855 			return (mdstealerror(ep, &c.c_mde));
   1856 
   1857 		devname = splicename(&c.c_devname);
   1858 
   1859 		if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
   1860 			Free(devname);
   1861 			devname = getlongname(&c, ep);
   1862 			if (devname == NULL) {
   1863 				return (-1);
   1864 			}
   1865 		}
   1866 
   1867 		if ((index = in_deletelist(devname, db_nlp)) != -1) {
   1868 			found = 1;
   1869 			tag_array[index] = 1;
   1870 			replica_delete_count++;
   1871 		}
   1872 
   1873 		errored = c.c_locator.l_flags & (MDDB_F_EREAD |
   1874 		    MDDB_F_EWRITE | MDDB_F_TOOSMALL | MDDB_F_EFMT |
   1875 		    MDDB_F_EDATA | MDDB_F_EMASTER);
   1876 
   1877 		/*
   1878 		 * There are four combinations of "errored" and "found"
   1879 		 * and they are used to find the number of
   1880 		 * (a) valid/invalid replicas that are not in the delete
   1881 		 * list and are available in the system.
   1882 		 * (b) valid/invalid replicas that are to be deleted.
   1883 		 */
   1884 
   1885 		if (errored && !found)		/* errored and !found */
   1886 			invalid_replicas_nottodelete++;
   1887 		else if (!found)		/* !errored and !found */
   1888 			valid_replicas_nottodelete++;
   1889 		else if (errored)		/* errored and found */
   1890 			invalid_replicas_todelete++;
   1891 		/*
   1892 		 * else it is !errored and found. This means
   1893 		 * valid_replicas_todelete++; But this variable will not
   1894 		 * be used anywhere
   1895 		 */
   1896 
   1897 		Free(devname);
   1898 	}
   1899 
   1900 	index = 0;
   1901 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1902 		np = nlp->namep;
   1903 		if (tag_array[index++] != 1) {
   1904 			Free(tag_array);
   1905 			return (mddeverror(ep, MDE_NO_DB, np->dev, np->cname));
   1906 		}
   1907 	}
   1908 
   1909 	Free(tag_array);
   1910 
   1911 
   1912 	/* if all replicas are deleted stop mdmonitord */
   1913 	if ((replicacount - replica_delete_count) == 0)
   1914 		stop_svmdaemons = 1;
   1915 
   1916 	if (((replicacount - replica_delete_count) < MD_MINREPLICAS)) {
   1917 		if (force_option & MDFORCE_NONE)
   1918 			return (mderror(ep, MDE_NOTENOUGH_DB, sp->setname));
   1919 		if (! metaislocalset(sp) && ! (force_option & MDFORCE_DS))
   1920 			return (mderror(ep, MDE_DELDB_NOTALLOWED, sp->setname));
   1921 	}
   1922 
   1923 	/*
   1924 	 * The following algorithms are followed to check for deletion:
   1925 	 * (a) If the delete list(db_nlp) has all invalid replicas and no valid
   1926 	 * replicas, then deletion should be allowed.
   1927 	 * (b) Deletion should be allowed only if valid replicas that are "not"
   1928 	 * to be deleted is always greater than the invalid replicas that
   1929 	 * are "not" to be deleted.
   1930 	 * (c) If the user uses -f option, then deletion should be allowed.
   1931 	 */
   1932 
   1933 	if ((invalid_replicas_todelete != replica_delete_count) &&
   1934 	    (invalid_replicas_nottodelete > valid_replicas_nottodelete) &&
   1935 	    (force_option != MDFORCE_LOCAL))
   1936 		return (mderror(ep, MDE_DEL_VALIDDB_NOTALLOWED, sp->setname));
   1937 
   1938 	/*
   1939 	 * go through and tell kernel to delete them
   1940 	 */
   1941 
   1942 	/* Don't need device id information from this ioctl */
   1943 	c.c_locator.l_devid = (uint64_t)0;
   1944 	c.c_locator.l_devid_flags = 0;
   1945 
   1946 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0)
   1947 		return (mdstealerror(ep, &c.c_mde));
   1948 
   1949 	if (! metaislocalset(sp)) {
   1950 		dd = metaget_drivedesc_fromnamelist(sp, db_nlp, ep);
   1951 		if (! mdisok(ep))
   1952 			return (-1);
   1953 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
   1954 			return (-1);
   1955 	}
   1956 
   1957 	for (nlp = db_nlp; nlp != NULL; nlp = nlp->next) {
   1958 		np = nlp->namep;
   1959 
   1960 		/*
   1961 		 * If mddb is being deleted from MN diskset and node is
   1962 		 * an owner of the diskset then use rpc.mdcommd
   1963 		 * mechanism to add mddb(s) so that all nodes stay in sync.
   1964 		 * If set is stale, don't log the message since rpc.mdcommd
   1965 		 * can't write the message to the mddb.
   1966 		 *
   1967 		 * When mddbs are first being added to set, a detach can
   1968 		 * be called before any node has joined the diskset, so
   1969 		 * must check to see if node is an owner of the diskset.
   1970 		 *
   1971 		 * Otherwise, just delete mddb from this node.
   1972 		 */
   1973 
   1974 		if ((! metaislocalset(sp)) && MD_MNSET_DESC(sd) &&
   1975 		    (sd->sd_mn_mynode->nd_flags & MD_MN_NODE_OWN)) {
   1976 			md_mn_result_t			*resultp;
   1977 			md_mn_msg_meta_db_detach_t	detach;
   1978 			int				send_rval;
   1979 
   1980 			/*
   1981 			 * The following algorithm is used to detach replicas.
   1982 			 * 	- META_DB_DETACH message generates submsgs
   1983 			 * 		- BLOCK parse (master)
   1984 			 * 		- MDDB_DETACH replicas
   1985 			 * 		- UNBLOCK parse (master) causing parse
   1986 			 *		information to be sent from master
   1987 			 *		to slaves at a higher class than the
   1988 			 *		unblock so the parse message will
   1989 			 *		reach slaves before unblock message.
   1990 			 */
   1991 			(void) splitname(np->bname, &detach.msg_splitname);
   1992 
   1993 			/* Set devid to NULL until devids are supported */
   1994 			detach.msg_devid[0] = NULL;
   1995 
   1996 			/*
   1997 			 * If reconfig cycle has been started, this node is
   1998 			 * stuck in in the return step until this command has
   1999 			 * completed.  If mdcommd is suspended, ask
   2000 			 * send_message to fail (instead of retrying)
   2001 			 * so that metaset can finish allowing the reconfig
   2002 			 * cycle to proceed.
   2003 			 */
   2004 			flags = MD_MSGF_FAIL_ON_SUSPEND;
   2005 			if (stale_bool == TRUE)
   2006 				flags |= MD_MSGF_NO_LOG;
   2007 			send_rval = mdmn_send_message(sp->setno,
   2008 			    MD_MN_MSG_META_DB_DETACH,
   2009 			    flags, 0, (char *)&detach,
   2010 			    sizeof (md_mn_msg_meta_db_detach_t),
   2011 			    &resultp, ep);
   2012 			if (send_rval != 0) {
   2013 				rval = -1;
   2014 				if (resultp == NULL)
   2015 					(void) mddserror(ep,
   2016 					    MDE_DS_COMMD_SEND_FAIL,
   2017 					    sp->setno, NULL, NULL,
   2018 					    sp->setname);
   2019 				else {
   2020 					(void) mdstealerror(ep,
   2021 					    &(resultp->mmr_ep));
   2022 					if (mdisok(ep)) {
   2023 						(void) mddserror(ep,
   2024 						    MDE_DS_COMMD_SEND_FAIL,
   2025 						    sp->setno, NULL, NULL,
   2026 						    sp->setname);
   2027 					}
   2028 					free_result(resultp);
   2029 				}
   2030 				goto out;
   2031 			}
   2032 			if (resultp)
   2033 				free_result(resultp);
   2034 		} else {
   2035 			i = 0;
   2036 			while (i < c.c_dbcnt) {
   2037 				char	*devname;
   2038 
   2039 				c.c_id = i;
   2040 
   2041 				/* Don't need devid info from this ioctl */
   2042 				c.c_locator.l_devid = (uint64_t)0;
   2043 				c.c_locator.l_devid_flags = 0;
   2044 
   2045 				if (metaioctl(MD_DB_GETDEV, &c,
   2046 				    &c.c_mde, NULL)) {
   2047 					rval = mdstealerror(ep, &c.c_mde);
   2048 					goto out;
   2049 				}
   2050 
   2051 				devname = splicename(&c.c_devname);
   2052 
   2053 				if (strstr(devname, META_LONGDISKNAME_STR)
   2054 				    != NULL) {
   2055 					Free(devname);
   2056 					devname = getlongname(&c, ep);
   2057 					if (devname == NULL) {
   2058 						return (-1);
   2059 					}
   2060 				}
   2061 
   2062 				if (strcmp(devname, np->bname) != 0) {
   2063 					Free(devname);
   2064 					i++;
   2065 					continue;
   2066 				}
   2067 				Free(devname);
   2068 
   2069 				/* Don't need devid info from this ioctl */
   2070 				c.c_locator.l_devid = (uint64_t)0;
   2071 				c.c_locator.l_devid_flags = 0;
   2072 
   2073 				if (metaioctl(MD_DB_DELDEV, &c,
   2074 				    &c.c_mde, NULL) != 0) {
   2075 					rval = mdstealerror(ep, &c.c_mde);
   2076 					goto out;
   2077 				}
   2078 
   2079 				/* Not incrementing "i" intentionally */
   2080 			}
   2081 		}
   2082 		if (! metaislocalset(sp)) {
   2083 			/* update the dbcnt and size in dd */
   2084 			for (p = dd; p != NULL; p = p->dd_next) {
   2085 				if (p->dd_dnp == np->drivenamep) {
   2086 					p->dd_dbcnt = 0;
   2087 					p->dd_dbsize  = 0;
   2088 					break;
   2089 				}
   2090 			}
   2091 
   2092 			/*
   2093 			 * Slam a dummy master block and make it self
   2094 			 * identifying
   2095 			 */
   2096 			if ((fd = open(np->rname, O_RDWR)) >= 0) {
   2097 				meta_mkdummymaster(sp, fd, 16);
   2098 				(void) close(fd);
   2099 			}
   2100 		}
   2101 	}
   2102 out:
   2103 	if (metaislocalset(sp)) {
   2104 		/*
   2105 		 * Stop all the daemons if there are
   2106 		 * no more replicas so that the module can be
   2107 		 * unloaded.
   2108 		 */
   2109 		if (rval == 0 && stop_svmdaemons == 1) {
   2110 			char buf[MAXPATHLEN];
   2111 			int i;
   2112 
   2113 			for (i = 0; i < DAEMON_COUNT; i++) {
   2114 				(void) snprintf(buf, MAXPATHLEN,
   2115 				    "/usr/bin/pkill -%s -x %s",
   2116 				    svmd_kill_list[i].svmd_kill_val,
   2117 				    svmd_kill_list[i].svmd_name);
   2118 				if (pclose(popen(buf, "w")) == -1)
   2119 					md_perror(buf);
   2120 			}
   2121 
   2122 			if (meta_smf_disable(META_SMF_ALL, &status) == -1) {
   2123 				mde_perror(&status, "");
   2124 				mdclrerror(&status);
   2125 			}
   2126 		}
   2127 		if (buildconf(sp, &status)) {
   2128 			/* Don't mask any previous errors */
   2129 			if (rval == 0)
   2130 				rval = mdstealerror(ep, &status);
   2131 			else
   2132 				mdclrerror(&status);
   2133 			return (rval);
   2134 		}
   2135 
   2136 		if (meta_db_patch(sysfilename, NULL, 0, &status)) {
   2137 			/* Don't mask any previous errors */
   2138 			if (rval == 0)
   2139 				rval = mdstealerror(ep, &status);
   2140 			else
   2141 				mdclrerror(&status);
   2142 		}
   2143 	} else {
   2144 		if (update_dbinfo_on_drives(sp, dd,
   2145 		    (force_option & MDFORCE_SET_LOCKED),
   2146 		    ((force_option & MDFORCE_LOCAL) |
   2147 		    (force_option & MDFORCE_DS)), &status)) {
   2148 			/* Don't mask any previous errors */
   2149 			if (rval == 0)
   2150 				rval = mdstealerror(ep, &status);
   2151 			else
   2152 				mdclrerror(&status);
   2153 		}
   2154 		metafreedrivedesc(&dd);
   2155 	}
   2156 	if ((metaislocalset(sp)) || (!(MD_MNSET_DESC(sd)))) {
   2157 		for (nlp = db_nlp; (nlp != NULL); nlp = nlp->next) {
   2158 			meta_invalidate_name(nlp->namep);
   2159 		}
   2160 	}
   2161 	return (rval);
   2162 }
   2163 
   2164 static md_replica_t *
   2165 metareplicaname(
   2166 	mdsetname_t		*sp,
   2167 	int			flags,
   2168 	struct mddb_config	*c,
   2169 	md_error_t		*ep
   2170 )
   2171 {
   2172 	md_replica_t	*rp;
   2173 	char		*devname;
   2174 	size_t		sz;
   2175 	devid_nmlist_t	*disklist = NULL;
   2176 	char		*devid_str;
   2177 
   2178 	/* allocate replicaname */
   2179 	rp = Zalloc(sizeof (*rp));
   2180 
   2181 	/* get device name */
   2182 	devname = splicename(&c->c_devname);
   2183 
   2184 	/*
   2185 	 * Check if the device has a long name (>40 characters) and
   2186 	 * if so then we have to use devids to get the device name.
   2187 	 * If this cannot be done then we have to fail the request.
   2188 	 */
   2189 	if (strstr(devname, META_LONGDISKNAME_STR) != NULL) {
   2190 		if (c->c_locator.l_devid != NULL) {
   2191 			if (meta_deviceid_to_nmlist("/dev/dsk",
   2192 			    (ddi_devid_t)(uintptr_t)c->c_locator.l_devid,
   2193 			    c->c_locator.l_minor_name, &disklist) != 0) {
   2194 				devid_str = devid_str_encode(
   2195 				    (ddi_devid_t)(uintptr_t)
   2196 				    c->c_locator.l_devid, NULL);
   2197 				(void) mderror(ep, MDE_MISSING_DEVID_DISK, "");
   2198 				mderrorextra(ep, devid_str);
   2199 				if (devid_str != NULL)
   2200 					devid_str_free(devid_str);
   2201 				Free(rp);
   2202 				Free(devname);
   2203 				return (NULL);
   2204 			}
   2205 		} else {
   2206 			(void) mderror(ep, MDE_NODEVID, "");
   2207 			Free(rp);
   2208 			Free(devname);
   2209 			return (NULL);
   2210 		}
   2211 		Free(devname);
   2212 		devname = disklist[0].devname;
   2213 	}
   2214 
   2215 	if (flags & PRINT_FAST) {
   2216 		if ((rp->r_namep = metaname_fast(&sp, devname,
   2217 		    LOGICAL_DEVICE, ep)) == NULL) {
   2218 			Free(devname);
   2219 			Free(rp);
   2220 			return (NULL);
   2221 		}
   2222 	} else {
   2223 		if ((rp->r_namep = metaname(&sp, devname,
   2224 		    LOGICAL_DEVICE, ep)) == NULL) {
   2225 			Free(devname);
   2226 			Free(rp);
   2227 			return (NULL);
   2228 		}
   2229 	}
   2230 	Free(devname);
   2231 
   2232 	/* make sure it's OK */
   2233 	if ((! (flags & MD_BASICNAME_OK)) &&
   2234 	    (metachkcomp(rp->r_namep, ep) != 0)) {
   2235 		Free(rp);
   2236 		return (NULL);
   2237 	}
   2238 
   2239 	rp->r_blkno = (daddr_t)MD_DISKADDR_ERROR;
   2240 	rp->r_nblk = (daddr_t)MD_DISKADDR_ERROR;
   2241 	rp->r_flags = c->c_locator.l_flags | MDDB_F_NODEVID;
   2242 	if (c->c_locator.l_devid_flags & MDDB_DEVID_VALID) {
   2243 		sz = devid_sizeof((ddi_devid_t)(uintptr_t)
   2244 		    (c->c_locator.l_devid));
   2245 		if ((rp->r_devid = (ddi_devid_t)malloc(sz)) ==
   2246 		    (ddi_devid_t)NULL) {
   2247 			Free(rp);
   2248 			return (NULL);
   2249 		}
   2250 		(void) memcpy((void *)rp->r_devid,
   2251 		    (void *)(uintptr_t)c->c_locator.l_devid, sz);
   2252 		(void) strcpy(rp->r_minor_name, c->c_locator.l_minor_name);
   2253 		rp->r_flags &= ~MDDB_F_NODEVID;
   2254 		/* Overwrite dev derived from name with dev from devid */
   2255 		rp->r_namep->dev = meta_expldev(c->c_locator.l_dev);
   2256 	}
   2257 	(void) strcpy(rp->r_driver_name, c->c_locator.l_driver);
   2258 
   2259 	rp->r_blkno = c->c_locator.l_blkno;
   2260 	if (c->c_dbend != 0)
   2261 		rp->r_nblk = c->c_dbend - c->c_locator.l_blkno + 1;
   2262 
   2263 	/* return replica */
   2264 	return (rp);
   2265 }
   2266 
   2267 /*
   2268  * free replica list
   2269  */
   2270 void
   2271 metafreereplicalist(
   2272 	md_replicalist_t	*rlp
   2273 )
   2274 {
   2275 	md_replicalist_t	*rl = NULL;
   2276 
   2277 	for (/* void */; (rlp != NULL); rlp = rl) {
   2278 		rl = rlp->rl_next;
   2279 		if (rlp->rl_repp->r_devid != (ddi_devid_t)0) {
   2280 			free(rlp->rl_repp->r_devid);
   2281 		}
   2282 		Free(rlp->rl_repp);
   2283 		Free(rlp);
   2284 	}
   2285 }
   2286 
   2287 /*
   2288  * return list of all replicas in set
   2289  */
   2290 int
   2291 metareplicalist(
   2292 	mdsetname_t		*sp,
   2293 	int			flags,
   2294 	md_replicalist_t	**rlpp,
   2295 	md_error_t		*ep
   2296 )
   2297 {
   2298 	md_replicalist_t	**tail = rlpp;
   2299 	int			count = 0;
   2300 	struct mddb_config	c;
   2301 	int			i;
   2302 	char			*devid;
   2303 
   2304 	/* for each replica */
   2305 	i = 0;
   2306 	do {
   2307 		md_replica_t	*rp;
   2308 
   2309 		/* get next replica */
   2310 		(void) memset(&c, 0, sizeof (c));
   2311 		c.c_id = i;
   2312 		c.c_setno = sp->setno;
   2313 
   2314 		c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
   2315 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
   2316 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
   2317 				mdclrerror(&c.c_mde);
   2318 				break;	/* handle none at all */
   2319 			}
   2320 			(void) mdstealerror(ep, &c.c_mde);
   2321 			goto out;
   2322 		}
   2323 
   2324 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SZ) {
   2325 			if ((devid = malloc(c.c_locator.l_devid_sz)) == NULL) {
   2326 				(void) mdsyserror(ep, ENOMEM, META_DBCONF);
   2327 				goto out;
   2328 			}
   2329 			c.c_locator.l_devid = (uintptr_t)devid;
   2330 			/*
   2331 			 * Turn on space and sz flags since 'sz' amount of
   2332 			 * space has been alloc'd.
   2333 			 */
   2334 			c.c_locator.l_devid_flags =
   2335 			    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
   2336 		}
   2337 
   2338 		if (metaioctl(MD_DB_ENDDEV, &c, &c.c_mde, NULL) != 0) {
   2339 			if (mdismddberror(&c.c_mde, MDE_DB_INVALID)) {
   2340 				mdclrerror(&c.c_mde);
   2341 				break;	/* handle none at all */
   2342 			}
   2343 			(void) mdstealerror(ep, &c.c_mde);
   2344 			goto out;
   2345 		}
   2346 
   2347 		/*
   2348 		 * Paranoid check - shouldn't happen, but is left as
   2349 		 * a place holder for changes that will be needed after
   2350 		 * dynamic reconfiguration changes are added to SVM (to
   2351 		 * support movement of disks at any point in time).
   2352 		 */
   2353 		if (c.c_locator.l_devid_flags & MDDB_DEVID_NOSPACE) {
   2354 			(void) fprintf(stderr,
   2355 			    dgettext(TEXT_DOMAIN,
   2356 			    "Error: Relocation Information "
   2357 			    "(drvnm=%s, mnum=0x%lx) \n"
   2358 			    "relocation information size changed - \n"
   2359 			    "rerun command\n"),
   2360 			    c.c_locator.l_driver, c.c_locator.l_mnum);
   2361 			(void) mderror(ep, MDE_DEVID_TOOBIG, NULL);
   2362 			goto out;
   2363 		}
   2364 
   2365 		if (c.c_dbcnt == 0)
   2366 			break;		/* handle none at all */
   2367 
   2368 		/* get info */
   2369 		if ((rp = metareplicaname(sp, flags, &c, ep)) == NULL)
   2370 			goto out;
   2371 
   2372 		/* append to list */
   2373 		*tail = Zalloc(sizeof (**tail));
   2374 		(*tail)->rl_repp = rp;
   2375 		tail = &(*tail)->rl_next;
   2376 		++count;
   2377 
   2378 		if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
   2379 			free(devid);
   2380 			c.c_locator.l_devid_flags = 0;
   2381 		}
   2382 
   2383 	} while (++i < c.c_dbcnt);
   2384 
   2385 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
   2386 		free(devid);
   2387 	}
   2388 
   2389 	/* return count */
   2390 	return (count);
   2391 
   2392 	/* cleanup, return error */
   2393 out:
   2394 	if (c.c_locator.l_devid_flags & MDDB_DEVID_SPACE) {
   2395 		free(devid);
   2396 	}
   2397 	metafreereplicalist(*rlpp);
   2398 	*rlpp = NULL;
   2399 	return (-1);
   2400 }
   2401 
   2402 /*
   2403  * meta_sync_db_locations - get list of replicas from kernel and write
   2404  * 	out to mddb.cf and md.conf.  'Syncs up' the replica list in
   2405  * 	the kernel with the replica list in the conf files.
   2406  *
   2407  */
   2408 void
   2409 meta_sync_db_locations(
   2410 	mdsetname_t	*sp,
   2411 	md_error_t	*ep
   2412 )
   2413 {
   2414 	char		*sname = 0;		/* system file name */
   2415 	char 		*cname = 0;		/* config file name */
   2416 
   2417 	if (!metaislocalset(sp))
   2418 		return;
   2419 
   2420 	/* Updates backup of configuration file (aka mddb.cf) */
   2421 	if (buildconf(sp, ep) != 0)
   2422 		return;
   2423 
   2424 	/* Updates system configuration file (aka md.conf) */
   2425 	(void) meta_db_patch(sname, cname, 0, ep);
   2426 }
   2427 
   2428 /*
   2429  * setup_db_locations - parse the mddb.cf file and
   2430  *			tells the driver which db locations to use.
   2431  */
   2432 int
   2433 meta_setup_db_locations(
   2434 	md_error_t	*ep
   2435 )
   2436 {
   2437 	mddb_config_t	c;
   2438 	FILE		*fp;
   2439 	char		inbuff[1024];
   2440 	char		*buff;
   2441 	uint_t		i;
   2442 	size_t		sz;
   2443 	int		rval = 0;
   2444 	char		*devidp;
   2445 	uint_t		devid_size;
   2446 	char		*minor_name = NULL;
   2447 	ddi_devid_t	devid_decode;
   2448 	int		checksum;
   2449 
   2450 	/* do mddb.cf file */
   2451 	(void) memset(&c, '\0', sizeof (c));
   2452 	if ((fp = fopen(META_DBCONF, "r")) == NULL) {
   2453 		if (errno != ENOENT)
   2454 			return (mdsyserror(ep, errno, META_DBCONF));
   2455 	}
   2456 	while ((fp != NULL) && ((buff = fgets(inbuff, (sizeof (inbuff) - 1),
   2457 	    fp)) != NULL)) {
   2458 
   2459 		/* ignore comments */
   2460 		if (*buff == '#')
   2461 			continue;
   2462 
   2463 		/* parse locator */
   2464 		(void) memset(&c, 0, sizeof (c));
   2465 		c.c_setno = MD_LOCAL_SET;
   2466 		i = strcspn(buff, " \t");
   2467 		if (i > sizeof (c.c_locator.l_driver))
   2468 			i = sizeof (c.c_locator.l_driver);
   2469 		(void) strncpy(c.c_locator.l_driver, buff, i);
   2470 		buff += i;
   2471 		c.c_locator.l_dev =
   2472 		    makedev((major_t)0, (minor_t)strtol(buff, &buff, 10));
   2473 		c.c_locator.l_blkno = (daddr_t)strtol(buff, &buff, 10);
   2474 		c.c_locator.l_mnum = minor(c.c_locator.l_dev);
   2475 
   2476 		/* parse out devid */
   2477 		while (isspace((int)(*buff)))
   2478 			buff += 1;
   2479 		i = strcspn(buff, " \t");
   2480 		if ((devidp = (char *)malloc(i+1)) == NULL)
   2481 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
   2482 
   2483 		(void) strncpy(devidp, buff, i);
   2484 		devidp[i] = '\0';
   2485 		if (devid_str_decode(devidp, &devid_decode,
   2486 		    &minor_name) == -1) {
   2487 			free(devidp);
   2488 			continue;
   2489 		}
   2490 
   2491 		/* Conf file must have minor name associated with devid */
   2492 		if (minor_name == NULL) {
   2493 			free(devidp);
   2494 			devid_free(devid_decode);
   2495 			continue;
   2496 		}
   2497 
   2498 		sz = devid_sizeof(devid_decode);
   2499 		/* Copy to devid size buffer that ioctl expects */
   2500 		if ((c.c_locator.l_devid = (uintptr_t)malloc(sz)) == NULL) {
   2501 			devid_free(devid_decode);
   2502 			free(minor_name);
   2503 			free(devidp);
   2504 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
   2505 		}
   2506 
   2507 		(void) memcpy((void *)(uintptr_t)c.c_locator.l_devid,
   2508 		    (void *)devid_decode, sz);
   2509 
   2510 		devid_free(devid_decode);
   2511 
   2512 		if (strlen(minor_name) > MDDB_MINOR_NAME_MAX) {
   2513 			free(minor_name);
   2514 			free(devidp);
   2515 			free((void *)(uintptr_t)c.c_locator.l_devid);
   2516 			return (mdsyserror(ep, ENOMEM, META_DBCONF));
   2517 		}
   2518 		(void) strcpy(c.c_locator.l_minor_name, minor_name);
   2519 		free(minor_name);
   2520 		c.c_locator.l_devid_flags = MDDB_DEVID_VALID |
   2521 		    MDDB_DEVID_SPACE | MDDB_DEVID_SZ;
   2522 		c.c_locator.l_devid_sz = sz;
   2523 
   2524 		devid_size = strlen(devidp);
   2525 		buff += devid_size;
   2526 
   2527 		checksum = strtol(buff, &buff, 10);
   2528 		for (i = 0; c.c_locator.l_driver[i] != 0; i++)
   2529 			checksum += c.c_locator.l_driver[i];
   2530 		for (i = 0; i < devid_size; i++) {
   2531 			checksum += devidp[i];
   2532 		}
   2533 		free(devidp);
   2534 
   2535 		checksum += minor(c.c_locator.l_dev);
   2536 		checksum += c.c_locator.l_blkno;
   2537 		if (checksum != 42) {
   2538 			/* overwritten later for more serious problems */
   2539 			rval = mderror(ep, MDE_MDDB_CKSUM, META_DBCONF);
   2540 			free((void *)(uintptr_t)c.c_locator.l_devid);
   2541 			continue;
   2542 		}
   2543 		c.c_locator.l_flags = 0;
   2544 
   2545 		/* use db location */
   2546 		if (metaioctl(MD_DB_USEDEV, &c, &c.c_mde, NULL) != 0) {
   2547 			free((void *)(uintptr_t)c.c_locator.l_devid);
   2548 			return (mdstealerror(ep, &c.c_mde));
   2549 		}
   2550 
   2551 		/* free up devid if in use */
   2552 		free((void *)(uintptr_t)c.c_locator.l_devid);
   2553 		c.c_locator.l_devid = (uint64_t)0;
   2554 		c.c_locator.l_devid_flags = 0;
   2555 	}
   2556 	if ((fp) && (fclose(fp) != 0))
   2557 		return (mdsyserror(ep, errno, META_DBCONF));
   2558 
   2559 	/* check for stale database */
   2560 	(void) memset((char *)&c, 0, sizeof (struct mddb_config));
   2561 	c.c_id = 0;
   2562 	c.c_setno = MD_LOCAL_SET;
   2563 
   2564 	/*
   2565 	 * While we do not need the devid here we may need to
   2566 	 * know if devid's are being used by the kernel for
   2567 	 * the replicas. This is because under some circumstances
   2568 	 * we can only manipulate the SVM configuration if the
   2569 	 * kernel is using devid's.
   2570 	 */
   2571 	c.c_locator.l_devid = (uint64_t)0;
   2572 	c.c_locator.l_devid_flags = MDDB_DEVID_GETSZ;
   2573 	c.c_locator.l_devid_sz = 0;
   2574 
   2575 	if (metaioctl(MD_DB_GETDEV, &c, &c.c_mde, NULL) != 0) {
   2576 		if (! mdismddberror(&c.c_mde, MDE_DB_INVALID))
   2577 			return (mdstealerror(ep, &c.c_mde));
   2578 		mdclrerror(&c.c_mde);
   2579 	}
   2580 
   2581 	if (c.c_flags & MDDB_C_STALE)
   2582 		return (mdmddberror(ep, MDE_DB_STALE, NODEV32, MD_LOCAL_SET,
   2583 		    0, NULL));
   2584 
   2585 	if (c.c_locator.l_devid_sz != 0) {
   2586 		/*
   2587 		 * Devid's are being used to track the replicas because
   2588 		 * there is space for a devid.
   2589 		 */
   2590 		devid_in_use = TRUE;
   2591 	}
   2592 
   2593 	/* success */
   2594 	return (rval);
   2595 }
   2596 
   2597 /*
   2598  * meta_db_minreplica - returns the minimum size replica currently in use.
   2599  */
   2600 daddr_t
   2601 meta_db_minreplica(
   2602 	mdsetname_t	*sp,
   2603 	md_error_t	*ep
   2604 )
   2605 {
   2606 	md_replica_t		*r;
   2607 	md_replicalist_t	*rl, *rlp = NULL;
   2608 	daddr_t			nblks = 0;
   2609 
   2610 	if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0)
   2611 		return (-1);
   2612 
   2613 	if (rlp == NULL)
   2614 		return (-1);
   2615 
   2616 	/* find the smallest existing replica */
   2617 	for (rl = rlp; rl != NULL; rl = rl->rl_next) {
   2618 		r = rl->rl_repp;
   2619 		nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks));
   2620 	}
   2621 
   2622 	metafreereplicalist(rlp);
   2623 	return (nblks);
   2624 }
   2625 
   2626 /*
   2627  * meta_get_replica_names
   2628  *  returns an mdnamelist_t of replica slices
   2629  */
   2630 /*ARGSUSED*/
   2631 int
   2632 meta_get_replica_names(
   2633 	mdsetname_t	*sp,
   2634 	mdnamelist_t	**nlpp,
   2635 	int		options,
   2636 	md_error_t	*ep
   2637 )
   2638 {
   2639 	md_replicalist_t	*rlp = NULL;
   2640 	md_replicalist_t	*rl;
   2641 	mdnamelist_t		**tailpp = nlpp;
   2642 	int			cnt = 0;
   2643 
   2644 	assert(nlpp != NULL);
   2645 
   2646 	if (!metaislocalset(sp))
   2647 		goto out;
   2648 
   2649 	/* get replicas */
   2650 	if (metareplicalist(sp, MD_BASICNAME_OK, &rlp, ep) < 0) {
   2651 		cnt = -1;
   2652 		goto out;
   2653 	}
   2654 
   2655 	/* build name list */
   2656 	for (rl = rlp; (rl != NULL); rl = rl->rl_next) {
   2657 		/*
   2658 		 * Add the name struct to the end of the
   2659 		 * namelist but keep a pointer to the last
   2660 		 * element so that we don't incur the overhead
   2661 		 * of traversing the list each time
   2662 		 */
   2663 		tailpp = meta_namelist_append_wrapper(
   2664 		    tailpp, rl->rl_repp->r_namep);
   2665 		++cnt;
   2666 	}
   2667 
   2668 	/* cleanup, return count or error */
   2669 out:
   2670 	metafreereplicalist(rlp);
   2671 	return (cnt);
   2672 }
   2673