Home | History | Annotate | Download | only in zoneadmd
      1      0     stevel /*
      2      0     stevel  * CDDL HEADER START
      3      0     stevel  *
      4      0     stevel  * The contents of this file are subject to the terms of the
      5   1544   eschrock  * Common Development and Distribution License (the "License").
      6   1544   eschrock  * You may not use this file except in compliance with the License.
      7      0     stevel  *
      8      0     stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0     stevel  * or http://www.opensolaris.org/os/licensing.
     10      0     stevel  * See the License for the specific language governing permissions
     11      0     stevel  * and limitations under the License.
     12      0     stevel  *
     13      0     stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0     stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0     stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0     stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0     stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0     stevel  *
     19      0     stevel  * CDDL HEADER END
     20      0     stevel  */
     21   1645      comay 
     22      0     stevel /*
     23   8485      Peter  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24      0     stevel  * Use is subject to license terms.
     25      0     stevel  */
     26      0     stevel 
     27      0     stevel /*
     28      0     stevel  * This module contains functions used to bring up and tear down the
     29      0     stevel  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
     30      0     stevel  * interfaces, [un]configuring devices, establishing resource controls,
     31      0     stevel  * and creating/destroying the zone in the kernel.  These actions, on
     32      0     stevel  * the way up, ready the zone; on the way down, they halt the zone.
     33      0     stevel  * See the much longer block comment at the beginning of zoneadmd.c
     34      0     stevel  * for a bigger picture of how the whole program functions.
     35    766   carlsonj  *
     36    766   carlsonj  * This module also has primary responsibility for the layout of "scratch
     37    766   carlsonj  * zones."  These are mounted, but inactive, zones that are used during
     38    766   carlsonj  * operating system upgrade and potentially other administrative action.  The
     39    766   carlsonj  * scratch zone environment is similar to the miniroot environment.  The zone's
     40    766   carlsonj  * actual root is mounted read-write on /a, and the standard paths (/usr,
     41    766   carlsonj  * /sbin, /lib) all lead to read-only copies of the running system's binaries.
     42    766   carlsonj  * This allows the administrative tools to manipulate the zone using "-R /a"
     43    766   carlsonj  * without relying on any binaries in the zone itself.
     44    766   carlsonj  *
     45    766   carlsonj  * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
     46    766   carlsonj  * environment), then we must resolve the lofs mounts used there to uncover
     47    766   carlsonj  * writable (unshared) resources.  Shared resources, though, are always
     48    766   carlsonj  * read-only.  In addition, if the "same" zone with a different root path is
     49    766   carlsonj  * currently running, then "/b" inside the zone points to the running zone's
     50    766   carlsonj  * root.  This allows LU to synchronize configuration files during the upgrade
     51    766   carlsonj  * process.
     52    766   carlsonj  *
     53    766   carlsonj  * To construct this environment, this module creates a tmpfs mount on
     54    766   carlsonj  * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
     55    766   carlsonj  * described above is constructed on the fly.  The zone is then created using
     56    766   carlsonj  * $ZONEPATH/lu as the root.
     57    766   carlsonj  *
     58    766   carlsonj  * Note that scratch zones are inactive.  The zone's bits are not running and
     59    766   carlsonj  * likely cannot be run correctly until upgrade is done.  Init is not running
     60    766   carlsonj  * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
     61    766   carlsonj  * is not a part of the usual halt/ready/boot state machine.
     62      0     stevel  */
     63      0     stevel 
     64      0     stevel #include <sys/param.h>
     65      0     stevel #include <sys/mount.h>
     66      0     stevel #include <sys/mntent.h>
     67      0     stevel #include <sys/socket.h>
     68      0     stevel #include <sys/utsname.h>
     69      0     stevel #include <sys/types.h>
     70      0     stevel #include <sys/stat.h>
     71      0     stevel #include <sys/sockio.h>
     72      0     stevel #include <sys/stropts.h>
     73      0     stevel #include <sys/conf.h>
     74   8662     Jordan #include <sys/systeminfo.h>
     75   3448   dh155122 
     76   3448   dh155122 #include <libdlpi.h>
     77   3871   yz147064 #include <libdllink.h>
     78   5895   yz147064 #include <libdlvlan.h>
     79      0     stevel 
     80      0     stevel #include <inet/tcp.h>
     81      0     stevel #include <arpa/inet.h>
     82      0     stevel #include <netinet/in.h>
     83      0     stevel #include <net/route.h>
     84      0     stevel 
     85      0     stevel #include <stdio.h>
     86      0     stevel #include <errno.h>
     87      0     stevel #include <fcntl.h>
     88      0     stevel #include <unistd.h>
     89      0     stevel #include <rctl.h>
     90      0     stevel #include <stdlib.h>
     91      0     stevel #include <string.h>
     92      0     stevel #include <strings.h>
     93      0     stevel #include <wait.h>
     94      0     stevel #include <limits.h>
     95      0     stevel #include <libgen.h>
     96    789     ahrens #include <libzfs.h>
     97   2621      llai1 #include <libdevinfo.h>
     98      0     stevel #include <zone.h>
     99      0     stevel #include <assert.h>
    100   2303   carlsonj #include <libcontract.h>
    101   2303   carlsonj #include <libcontract_priv.h>
    102   2303   carlsonj #include <uuid/uuid.h>
    103      0     stevel 
    104      0     stevel #include <sys/mntio.h>
    105      0     stevel #include <sys/mnttab.h>
    106      0     stevel #include <sys/fs/autofs.h>	/* for _autofssys() */
    107      0     stevel #include <sys/fs/lofs_info.h>
    108    789     ahrens #include <sys/fs/zfs.h>
    109      0     stevel 
    110      0     stevel #include <pool.h>
    111      0     stevel #include <sys/pool.h>
    112   3247   gjelinek #include <sys/priocntl.h>
    113      0     stevel 
    114   2712    nn35248 #include <libbrand.h>
    115   2712    nn35248 #include <sys/brand.h>
    116      0     stevel #include <libzonecfg.h>
    117   2170      evanl #include <synch.h>
    118   2611   vp157776 
    119      0     stevel #include "zoneadmd.h"
    120   1676        jpk #include <tsol/label.h>
    121   1676        jpk #include <libtsnet.h>
    122   1676        jpk #include <sys/priv.h>
    123      0     stevel 
    124      0     stevel #define	V4_ADDR_LEN	32
    125      0     stevel #define	V6_ADDR_LEN	128
    126      0     stevel 
    127      0     stevel #define	IPD_DEFAULT_OPTS \
    128      0     stevel 	MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
    129      0     stevel 
    130      0     stevel #define	DFSTYPES	"/etc/dfs/fstypes"
    131   1676        jpk #define	MAXTNZLEN	2048
    132      0     stevel 
    133   5829   gjelinek #define	ALT_MOUNT(mount_cmd) 	((mount_cmd) != Z_MNT_BOOT)
    134   5829   gjelinek 
    135      0     stevel /* for routing socket */
    136      0     stevel static int rts_seqno = 0;
    137      0     stevel 
    138    766   carlsonj /* mangled zone name when mounting in an alternate root environment */
    139    766   carlsonj static char kernzone[ZONENAME_MAX];
    140    766   carlsonj 
    141    766   carlsonj /* array of cached mount entries for resolve_lofs */
    142    766   carlsonj static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
    143   1676        jpk 
    144   1676        jpk /* for Trusted Extensions */
    145   1676        jpk static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
    146   1676        jpk static int tsol_mounts(zlog_t *, char *, char *);
    147   1676        jpk static void tsol_unmounts(zlog_t *, char *);
    148   5596   dh155122 
    149   1676        jpk static m_label_t *zlabel = NULL;
    150   1676        jpk static m_label_t *zid_label = NULL;
    151   1676        jpk static priv_set_t *zprivs = NULL;
    152    766   carlsonj 
    153      0     stevel /* from libsocket, not in any header file */
    154      0     stevel extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
    155   7370     Gerald 
    156   7370     Gerald /* from zoneadmd */
    157   7370     Gerald extern char query_hook[];
    158    766   carlsonj 
    159    766   carlsonj /*
    160    766   carlsonj  * An optimization for build_mnttable: reallocate (and potentially copy the
    161    766   carlsonj  * data) only once every N times through the loop.
    162    766   carlsonj  */
    163    766   carlsonj #define	MNTTAB_HUNK	32
    164      0     stevel 
    165      0     stevel /*
    166      0     stevel  * Private autofs system call
    167      0     stevel  */
    168      0     stevel extern int _autofssys(int, void *);
    169      0     stevel 
    170      0     stevel static int
    171      0     stevel autofs_cleanup(zoneid_t zoneid)
    172      0     stevel {
    173      0     stevel 	/*
    174      0     stevel 	 * Ask autofs to unmount all trigger nodes in the given zone.
    175      0     stevel 	 */
    176      0     stevel 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
    177    766   carlsonj }
    178    766   carlsonj 
    179    766   carlsonj static void
    180    766   carlsonj free_mnttable(struct mnttab *mnt_array, uint_t nelem)
    181    766   carlsonj {
    182    766   carlsonj 	uint_t i;
    183    766   carlsonj 
    184    766   carlsonj 	if (mnt_array == NULL)
    185    766   carlsonj 		return;
    186    766   carlsonj 	for (i = 0; i < nelem; i++) {
    187    766   carlsonj 		free(mnt_array[i].mnt_mountp);
    188    766   carlsonj 		free(mnt_array[i].mnt_fstype);
    189    766   carlsonj 		free(mnt_array[i].mnt_special);
    190    766   carlsonj 		free(mnt_array[i].mnt_mntopts);
    191    766   carlsonj 		assert(mnt_array[i].mnt_time == NULL);
    192    766   carlsonj 	}
    193    766   carlsonj 	free(mnt_array);
    194    766   carlsonj }
    195    766   carlsonj 
    196    766   carlsonj /*
    197    766   carlsonj  * Build the mount table for the zone rooted at "zroot", storing the resulting
    198    766   carlsonj  * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
    199    766   carlsonj  * array in "nelemp".
    200    766   carlsonj  */
    201    766   carlsonj static int
    202    766   carlsonj build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
    203    766   carlsonj     struct mnttab **mnt_arrayp, uint_t *nelemp)
    204    766   carlsonj {
    205    766   carlsonj 	struct mnttab mnt;
    206    766   carlsonj 	struct mnttab *mnts;
    207    766   carlsonj 	struct mnttab *mnp;
    208    766   carlsonj 	uint_t nmnt;
    209    766   carlsonj 
    210    766   carlsonj 	rewind(mnttab);
    211    766   carlsonj 	resetmnttab(mnttab);
    212    766   carlsonj 	nmnt = 0;
    213    766   carlsonj 	mnts = NULL;
    214    766   carlsonj 	while (getmntent(mnttab, &mnt) == 0) {
    215    766   carlsonj 		struct mnttab *tmp_array;
    216    766   carlsonj 
    217    766   carlsonj 		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
    218    766   carlsonj 			continue;
    219    766   carlsonj 		if (nmnt % MNTTAB_HUNK == 0) {
    220    766   carlsonj 			tmp_array = realloc(mnts,
    221    766   carlsonj 			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
    222    766   carlsonj 			if (tmp_array == NULL) {
    223    766   carlsonj 				free_mnttable(mnts, nmnt);
    224    766   carlsonj 				return (-1);
    225    766   carlsonj 			}
    226    766   carlsonj 			mnts = tmp_array;
    227    766   carlsonj 		}
    228    766   carlsonj 		mnp = &mnts[nmnt++];
    229    766   carlsonj 
    230    766   carlsonj 		/*
    231    766   carlsonj 		 * Zero out any fields we're not using.
    232    766   carlsonj 		 */
    233    766   carlsonj 		(void) memset(mnp, 0, sizeof (*mnp));
    234    766   carlsonj 
    235    766   carlsonj 		if (mnt.mnt_special != NULL)
    236    766   carlsonj 			mnp->mnt_special = strdup(mnt.mnt_special);
    237    766   carlsonj 		if (mnt.mnt_mntopts != NULL)
    238    766   carlsonj 			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
    239    766   carlsonj 		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
    240    766   carlsonj 		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
    241    766   carlsonj 		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
    242    766   carlsonj 		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
    243    766   carlsonj 		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
    244    766   carlsonj 			zerror(zlogp, B_TRUE, "memory allocation failed");
    245    766   carlsonj 			free_mnttable(mnts, nmnt);
    246    766   carlsonj 			return (-1);
    247    766   carlsonj 		}
    248    766   carlsonj 	}
    249    766   carlsonj 	*mnt_arrayp = mnts;
    250    766   carlsonj 	*nelemp = nmnt;
    251    766   carlsonj 	return (0);
    252    766   carlsonj }
    253    766   carlsonj 
    254    766   carlsonj /*
    255    766   carlsonj  * This is an optimization.  The resolve_lofs function is used quite frequently
    256    766   carlsonj  * to manipulate file paths, and on a machine with a large number of zones,
    257    766   carlsonj  * there will be a huge number of mounted file systems.  Thus, we trigger a
    258    766   carlsonj  * reread of the list of mount points
    259    766   carlsonj  */
    260    766   carlsonj static void
    261    766   carlsonj lofs_discard_mnttab(void)
    262    766   carlsonj {
    263    766   carlsonj 	free_mnttable(resolve_lofs_mnts,
    264    766   carlsonj 	    resolve_lofs_mnt_max - resolve_lofs_mnts);
    265    766   carlsonj 	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
    266    766   carlsonj }
    267    766   carlsonj 
    268    766   carlsonj static int
    269    766   carlsonj lofs_read_mnttab(zlog_t *zlogp)
    270    766   carlsonj {
    271    766   carlsonj 	FILE *mnttab;
    272    766   carlsonj 	uint_t nmnts;
    273    766   carlsonj 
    274    766   carlsonj 	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
    275    766   carlsonj 		return (-1);
    276    766   carlsonj 	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
    277    766   carlsonj 	    &nmnts) == -1) {
    278    766   carlsonj 		(void) fclose(mnttab);
    279    766   carlsonj 		return (-1);
    280    766   carlsonj 	}
    281    766   carlsonj 	(void) fclose(mnttab);
    282    766   carlsonj 	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
    283    766   carlsonj 	return (0);
    284    766   carlsonj }
    285    766   carlsonj 
    286    766   carlsonj /*
    287    766   carlsonj  * This function loops over potential loopback mounts and symlinks in a given
    288    766   carlsonj  * path and resolves them all down to an absolute path.
    289    766   carlsonj  */
    290   5576        edp void
    291    766   carlsonj resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
    292    766   carlsonj {
    293    766   carlsonj 	int len, arlen;
    294    766   carlsonj 	const char *altroot;
    295    766   carlsonj 	char tmppath[MAXPATHLEN];
    296    766   carlsonj 	boolean_t outside_altroot;
    297    766   carlsonj 
    298    766   carlsonj 	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
    299    766   carlsonj 		return;
    300    766   carlsonj 	tmppath[len] = '\0';
    301    766   carlsonj 	(void) strlcpy(path, tmppath, sizeof (tmppath));
    302    766   carlsonj 
    303    766   carlsonj 	/* This happens once per zoneadmd operation. */
    304    766   carlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
    305    766   carlsonj 		return;
    306    766   carlsonj 
    307    766   carlsonj 	altroot = zonecfg_get_root();
    308    766   carlsonj 	arlen = strlen(altroot);
    309    766   carlsonj 	outside_altroot = B_FALSE;
    310    766   carlsonj 	for (;;) {
    311    766   carlsonj 		struct mnttab *mnp;
    312    766   carlsonj 
    313   3079     dminer 		/* Search in reverse order to find longest match */
    314   3079     dminer 		for (mnp = resolve_lofs_mnt_max - 1; mnp >= resolve_lofs_mnts;
    315   3079     dminer 		    mnp--) {
    316    766   carlsonj 			if (mnp->mnt_fstype == NULL ||
    317    766   carlsonj 			    mnp->mnt_mountp == NULL ||
    318   3079     dminer 			    mnp->mnt_special == NULL)
    319    766   carlsonj 				continue;
    320    766   carlsonj 			len = strlen(mnp->mnt_mountp);
    321    766   carlsonj 			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
    322    766   carlsonj 			    (path[len] == '/' || path[len] == '\0'))
    323    766   carlsonj 				break;
    324    766   carlsonj 		}
    325   3079     dminer 		if (mnp < resolve_lofs_mnts)
    326   3079     dminer 			break;
    327   3079     dminer 		/* If it's not a lofs then we're done */
    328   3079     dminer 		if (strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
    329    766   carlsonj 			break;
    330    766   carlsonj 		if (outside_altroot) {
    331    766   carlsonj 			char *cp;
    332    766   carlsonj 			int olen = sizeof (MNTOPT_RO) - 1;
    333    766   carlsonj 
    334    766   carlsonj 			/*
    335    766   carlsonj 			 * If we run into a read-only mount outside of the
    336    766   carlsonj 			 * alternate root environment, then the user doesn't
    337    766   carlsonj 			 * want this path to be made read-write.
    338    766   carlsonj 			 */
    339    766   carlsonj 			if (mnp->mnt_mntopts != NULL &&
    340    766   carlsonj 			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
    341    766   carlsonj 			    NULL &&
    342    766   carlsonj 			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
    343    766   carlsonj 			    (cp[olen] == '\0' || cp[olen] == ',')) {
    344    766   carlsonj 				break;
    345    766   carlsonj 			}
    346    766   carlsonj 		} else if (arlen > 0 &&
    347    766   carlsonj 		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
    348    766   carlsonj 		    (mnp->mnt_special[arlen] != '\0' &&
    349    766   carlsonj 		    mnp->mnt_special[arlen] != '/'))) {
    350    766   carlsonj 			outside_altroot = B_TRUE;
    351    766   carlsonj 		}
    352    766   carlsonj 		/* use temporary buffer because new path might be longer */
    353    766   carlsonj 		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
    354    766   carlsonj 		    mnp->mnt_special, path + len);
    355    766   carlsonj 		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
    356    766   carlsonj 			break;
    357    766   carlsonj 		path[len] = '\0';
    358    766   carlsonj 	}
    359    766   carlsonj }
    360    766   carlsonj 
    361    766   carlsonj /*
    362    766   carlsonj  * For a regular mount, check if a replacement lofs mount is needed because the
    363    766   carlsonj  * referenced device is already mounted somewhere.
    364    766   carlsonj  */
    365    766   carlsonj static int
    366    766   carlsonj check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
    367    766   carlsonj {
    368    766   carlsonj 	struct mnttab *mnp;
    369    766   carlsonj 	zone_fsopt_t *optptr, *onext;
    370    766   carlsonj 
    371    766   carlsonj 	/* This happens once per zoneadmd operation. */
    372    766   carlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
    373    766   carlsonj 		return (-1);
    374    766   carlsonj 
    375    766   carlsonj 	/*
    376    766   carlsonj 	 * If this special node isn't already in use, then it's ours alone;
    377    766   carlsonj 	 * no need to worry about conflicting mounts.
    378    766   carlsonj 	 */
    379    766   carlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
    380    766   carlsonj 	    mnp++) {
    381    766   carlsonj 		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
    382    766   carlsonj 			break;
    383    766   carlsonj 	}
    384    766   carlsonj 	if (mnp >= resolve_lofs_mnt_max)
    385    766   carlsonj 		return (0);
    386    766   carlsonj 
    387    766   carlsonj 	/*
    388    766   carlsonj 	 * Convert this duplicate mount into a lofs mount.
    389    766   carlsonj 	 */
    390    766   carlsonj 	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
    391    766   carlsonj 	    sizeof (fsptr->zone_fs_special));
    392    766   carlsonj 	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
    393    766   carlsonj 	    sizeof (fsptr->zone_fs_type));
    394    766   carlsonj 	fsptr->zone_fs_raw[0] = '\0';
    395    766   carlsonj 
    396    766   carlsonj 	/*
    397    766   carlsonj 	 * Discard all but one of the original options and set that to be the
    398    766   carlsonj 	 * same set of options used for inherit package directory resources.
    399    766   carlsonj 	 */
    400    766   carlsonj 	optptr = fsptr->zone_fs_options;
    401    766   carlsonj 	if (optptr == NULL) {
    402    766   carlsonj 		optptr = malloc(sizeof (*optptr));
    403    766   carlsonj 		if (optptr == NULL) {
    404    766   carlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s",
    405    766   carlsonj 			    fsptr->zone_fs_dir);
    406    766   carlsonj 			return (-1);
    407    766   carlsonj 		}
    408    766   carlsonj 	} else {
    409    766   carlsonj 		while ((onext = optptr->zone_fsopt_next) != NULL) {
    410    766   carlsonj 			optptr->zone_fsopt_next = onext->zone_fsopt_next;
    411    766   carlsonj 			free(onext);
    412    766   carlsonj 		}
    413    766   carlsonj 	}
    414    766   carlsonj 	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
    415    766   carlsonj 	optptr->zone_fsopt_next = NULL;
    416    766   carlsonj 	fsptr->zone_fs_options = optptr;
    417    766   carlsonj 	return (0);
    418      0     stevel }
    419      0     stevel 
    420   5182        edp int
    421   3813         dp make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode,
    422   3813         dp     uid_t userid, gid_t groupid)
    423      0     stevel {
    424      0     stevel 	char path[MAXPATHLEN];
    425      0     stevel 	struct stat st;
    426      0     stevel 
    427      0     stevel 	if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) >
    428      0     stevel 	    sizeof (path)) {
    429      0     stevel 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix,
    430      0     stevel 		    subdir);
    431      0     stevel 		return (-1);
    432      0     stevel 	}
    433      0     stevel 
    434      0     stevel 	if (lstat(path, &st) == 0) {
    435      0     stevel 		/*
    436      0     stevel 		 * We don't check the file mode since presumably the zone
    437      0     stevel 		 * administrator may have had good reason to change the mode,
    438      0     stevel 		 * and we don't need to second guess him.
    439      0     stevel 		 */
    440      0     stevel 		if (!S_ISDIR(st.st_mode)) {
    441   7714        Ric 			if (S_ISREG(st.st_mode)) {
    442   7714        Ric 				/*
    443   7714        Ric 				 * Allow readonly mounts of /etc/ files; this
    444   7714        Ric 				 * is needed most by Trusted Extensions.
    445   1676        jpk 				 */
    446   1676        jpk 				if (strncmp(subdir, "/etc/",
    447   1676        jpk 				    strlen("/etc/")) != 0) {
    448   1676        jpk 					zerror(zlogp, B_FALSE,
    449   1676        jpk 					    "%s is not in /etc", path);
    450   1676        jpk 					return (-1);
    451   1676        jpk 				}
    452   1676        jpk 			} else {
    453   1676        jpk 				zerror(zlogp, B_FALSE,
    454   1676        jpk 				    "%s is not a directory", path);
    455   1676        jpk 				return (-1);
    456   1676        jpk 			}
    457      0     stevel 		}
    458   3813         dp 		return (0);
    459   3813         dp 	}
    460   3813         dp 
    461   3813         dp 	if (mkdirp(path, mode) != 0) {
    462      0     stevel 		if (errno == EROFS)
    463      0     stevel 			zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on "
    464      0     stevel 			    "a read-only file system in this local zone.\nMake "
    465      0     stevel 			    "sure %s exists in the global zone.", path, subdir);
    466      0     stevel 		else
    467      0     stevel 			zerror(zlogp, B_TRUE, "mkdirp of %s failed", path);
    468      0     stevel 		return (-1);
    469      0     stevel 	}
    470   3813         dp 
    471   3813         dp 	(void) chown(path, userid, groupid);
    472      0     stevel 	return (0);
    473      0     stevel }
    474      0     stevel 
    475      0     stevel static void
    476      0     stevel free_remote_fstypes(char **types)
    477      0     stevel {
    478      0     stevel 	uint_t i;
    479      0     stevel 
    480      0     stevel 	if (types == NULL)
    481      0     stevel 		return;
    482      0     stevel 	for (i = 0; types[i] != NULL; i++)
    483      0     stevel 		free(types[i]);
    484      0     stevel 	free(types);
    485      0     stevel }
    486      0     stevel 
    487      0     stevel static char **
    488      0     stevel get_remote_fstypes(zlog_t *zlogp)
    489      0     stevel {
    490      0     stevel 	char **types = NULL;
    491      0     stevel 	FILE *fp;
    492      0     stevel 	char buf[MAXPATHLEN];
    493      0     stevel 	char fstype[MAXPATHLEN];
    494      0     stevel 	uint_t lines = 0;
    495      0     stevel 	uint_t i;
    496      0     stevel 
    497      0     stevel 	if ((fp = fopen(DFSTYPES, "r")) == NULL) {
    498      0     stevel 		zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES);
    499      0     stevel 		return (NULL);
    500      0     stevel 	}
    501      0     stevel 	/*
    502      0     stevel 	 * Count the number of lines
    503      0     stevel 	 */
    504      0     stevel 	while (fgets(buf, sizeof (buf), fp) != NULL)
    505      0     stevel 		lines++;
    506      0     stevel 	if (lines == 0)	/* didn't read anything; empty file */
    507      0     stevel 		goto out;
    508      0     stevel 	rewind(fp);
    509      0     stevel 	/*
    510      0     stevel 	 * Allocate enough space for a NULL-terminated array.
    511      0     stevel 	 */
    512      0     stevel 	types = calloc(lines + 1, sizeof (char *));
    513      0     stevel 	if (types == NULL) {
    514      0     stevel 		zerror(zlogp, B_TRUE, "memory allocation failed");
    515      0     stevel 		goto out;
    516      0     stevel 	}
    517      0     stevel 	i = 0;
    518      0     stevel 	while (fgets(buf, sizeof (buf), fp) != NULL) {
    519      0     stevel 		/* LINTED - fstype is big enough to hold buf */
    520      0     stevel 		if (sscanf(buf, "%s", fstype) == 0) {
    521      0     stevel 			zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES);
    522      0     stevel 			free_remote_fstypes(types);
    523      0     stevel 			types = NULL;
    524      0     stevel 			goto out;
    525      0     stevel 		}
    526      0     stevel 		types[i] = strdup(fstype);
    527      0     stevel 		if (types[i] == NULL) {
    528      0     stevel 			zerror(zlogp, B_TRUE, "memory allocation failed");
    529      0     stevel 			free_remote_fstypes(types);
    530      0     stevel 			types = NULL;
    531      0     stevel 			goto out;
    532      0     stevel 		}
    533      0     stevel 		i++;
    534      0     stevel 	}
    535      0     stevel out:
    536      0     stevel 	(void) fclose(fp);
    537      0     stevel 	return (types);
    538      0     stevel }
    539      0     stevel 
    540      0     stevel static boolean_t
    541      0     stevel is_remote_fstype(const char *fstype, char *const *remote_fstypes)
    542      0     stevel {
    543      0     stevel 	uint_t i;
    544      0     stevel 
    545      0     stevel 	if (remote_fstypes == NULL)
    546      0     stevel 		return (B_FALSE);
    547      0     stevel 	for (i = 0; remote_fstypes[i] != NULL; i++) {
    548      0     stevel 		if (strcmp(remote_fstypes[i], fstype) == 0)
    549      0     stevel 			return (B_TRUE);
    550      0     stevel 	}
    551      0     stevel 	return (B_FALSE);
    552      0     stevel }
    553      0     stevel 
    554    766   carlsonj /*
    555    766   carlsonj  * This converts a zone root path (normally of the form .../root) to a Live
    556    766   carlsonj  * Upgrade scratch zone root (of the form .../lu).
    557    766   carlsonj  */
    558      0     stevel static void
    559    766   carlsonj root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
    560      0     stevel {
    561    766   carlsonj 	if (!isresolved && zonecfg_in_alt_root())
    562    766   carlsonj 		resolve_lofs(zlogp, zroot, zrootlen);
    563    766   carlsonj 	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
    564      0     stevel }
    565      0     stevel 
    566      0     stevel /*
    567      0     stevel  * The general strategy for unmounting filesystems is as follows:
    568      0     stevel  *
    569      0     stevel  * - Remote filesystems may be dead, and attempting to contact them as
    570      0     stevel  * part of a regular unmount may hang forever; we want to always try to
    571      0     stevel  * forcibly unmount such filesystems and only fall back to regular
    572      0     stevel  * unmounts if the filesystem doesn't support forced unmounts.
    573      0     stevel  *
    574      0     stevel  * - We don't want to unnecessarily corrupt metadata on local
    575      0     stevel  * filesystems (ie UFS), so we want to start off with graceful unmounts,
    576      0     stevel  * and only escalate to doing forced unmounts if we get stuck.
    577      0     stevel  *
    578      0     stevel  * We start off walking backwards through the mount table.  This doesn't
    579      0     stevel  * give us strict ordering but ensures that we try to unmount submounts
    580      0     stevel  * first.  We thus limit the number of failed umount2(2) calls.
    581      0     stevel  *
    582      0     stevel  * The mechanism for determining if we're stuck is to count the number
    583      0     stevel  * of failed unmounts each iteration through the mount table.  This
    584      0     stevel  * gives us an upper bound on the number of filesystems which remain
    585      0     stevel  * mounted (autofs trigger nodes are dealt with separately).  If at the
    586      0     stevel  * end of one unmount+autofs_cleanup cycle we still have the same number
    587      0     stevel  * of mounts that we started out with, we're stuck and try a forced
    588      0     stevel  * unmount.  If that fails (filesystem doesn't support forced unmounts)
    589      0     stevel  * then we bail and are unable to teardown the zone.  If it succeeds,
    590      0     stevel  * we're no longer stuck so we continue with our policy of trying
    591      0     stevel  * graceful mounts first.
    592      0     stevel  *
    593      0     stevel  * Zone must be down (ie, no processes or threads active).
    594      0     stevel  */
    595      0     stevel static int
    596    766   carlsonj unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
    597      0     stevel {
    598      0     stevel 	int error = 0;
    599      0     stevel 	FILE *mnttab;
    600      0     stevel 	struct mnttab *mnts;
    601      0     stevel 	uint_t nmnt;
    602      0     stevel 	char zroot[MAXPATHLEN + 1];
    603      0     stevel 	size_t zrootlen;
    604      0     stevel 	uint_t oldcount = UINT_MAX;
    605      0     stevel 	boolean_t stuck = B_FALSE;
    606      0     stevel 	char **remote_fstypes = NULL;
    607      0     stevel 
    608      0     stevel 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
    609      0     stevel 		zerror(zlogp, B_FALSE, "unable to determine zone root");
    610      0     stevel 		return (-1);
    611      0     stevel 	}
    612    766   carlsonj 	if (unmount_cmd)
    613    766   carlsonj 		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
    614      0     stevel 
    615      0     stevel 	(void) strcat(zroot, "/");
    616      0     stevel 	zrootlen = strlen(zroot);
    617   1676        jpk 
    618   1676        jpk 	/*
    619   1676        jpk 	 * For Trusted Extensions unmount each higher level zone's mount
    620   1676        jpk 	 * of our zone's /export/home
    621   1676        jpk 	 */
    622   1769   carlsonj 	if (!unmount_cmd)
    623   1769   carlsonj 		tsol_unmounts(zlogp, zone_name);
    624      0     stevel 
    625      0     stevel 	if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
    626      0     stevel 		zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB);
    627      0     stevel 		return (-1);
    628      0     stevel 	}
    629      0     stevel 	/*
    630      0     stevel 	 * Use our hacky mntfs ioctl so we see everything, even mounts with
    631      0     stevel 	 * MS_NOMNTTAB.
    632      0     stevel 	 */
    633      0     stevel 	if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) {
    634      0     stevel 		zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB);
    635      0     stevel 		error++;
    636      0     stevel 		goto out;
    637      0     stevel 	}
    638      0     stevel 
    639      0     stevel 	/*
    640      0     stevel 	 * Build the list of remote fstypes so we know which ones we
    641      0     stevel 	 * should forcibly unmount.
    642      0     stevel 	 */
    643      0     stevel 	remote_fstypes = get_remote_fstypes(zlogp);
    644      0     stevel 	for (; /* ever */; ) {
    645      0     stevel 		uint_t newcount = 0;
    646      0     stevel 		boolean_t unmounted;
    647      0     stevel 		struct mnttab *mnp;
    648      0     stevel 		char *path;
    649      0     stevel 		uint_t i;
    650      0     stevel 
    651      0     stevel 		mnts = NULL;
    652      0     stevel 		nmnt = 0;
    653      0     stevel 		/*
    654      0     stevel 		 * MNTTAB gives us a way to walk through mounted
    655      0     stevel 		 * filesystems; we need to be able to walk them in
    656      0     stevel 		 * reverse order, so we build a list of all mounted
    657      0     stevel 		 * filesystems.
    658      0     stevel 		 */
    659      0     stevel 		if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts,
    660      0     stevel 		    &nmnt) != 0) {
    661      0     stevel 			error++;
    662      0     stevel 			goto out;
    663      0     stevel 		}
    664      0     stevel 		for (i = 0; i < nmnt; i++) {
    665      0     stevel 			mnp = &mnts[nmnt - i - 1]; /* access in reverse order */
    666      0     stevel 			path = mnp->mnt_mountp;
    667      0     stevel 			unmounted = B_FALSE;
    668      0     stevel 			/*
    669      0     stevel 			 * Try forced unmount first for remote filesystems.
    670      0     stevel 			 *
    671      0     stevel 			 * Not all remote filesystems support forced unmounts,
    672      0     stevel 			 * so if this fails (ENOTSUP) we'll continue on
    673      0     stevel 			 * and try a regular unmount.
    674      0     stevel 			 */
    675      0     stevel 			if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
    676      0     stevel 				if (umount2(path, MS_FORCE) == 0)
    677      0     stevel 					unmounted = B_TRUE;
    678      0     stevel 			}
    679      0     stevel 			/*
    680      0     stevel 			 * Try forced unmount if we're stuck.
    681      0     stevel 			 */
    682      0     stevel 			if (stuck) {
    683      0     stevel 				if (umount2(path, MS_FORCE) == 0) {
    684      0     stevel 					unmounted = B_TRUE;
    685      0     stevel 					stuck = B_FALSE;
    686      0     stevel 				} else {
    687      0     stevel 					/*
    688      0     stevel 					 * The first failure indicates a
    689      0     stevel 					 * mount we won't be able to get
    690      0     stevel 					 * rid of automatically, so we
    691      0     stevel 					 * bail.
    692      0     stevel 					 */
    693      0     stevel 					error++;
    694      0     stevel 					zerror(zlogp, B_FALSE,
    695      0     stevel 					    "unable to unmount '%s'", path);
    696      0     stevel 					free_mnttable(mnts, nmnt);
    697      0     stevel 					goto out;
    698      0     stevel 				}
    699      0     stevel 			}
    700      0     stevel 			/*
    701      0     stevel 			 * Try regular unmounts for everything else.
    702      0     stevel 			 */
    703      0     stevel 			if (!unmounted && umount2(path, 0) != 0)
    704      0     stevel 				newcount++;
    705      0     stevel 		}
    706      0     stevel 		free_mnttable(mnts, nmnt);
    707      0     stevel 
    708      0     stevel 		if (newcount == 0)
    709      0     stevel 			break;
    710      0     stevel 		if (newcount >= oldcount) {
    711      0     stevel 			/*
    712      0     stevel 			 * Last round didn't unmount anything; we're stuck and
    713      0     stevel 			 * should start trying forced unmounts.
    714      0     stevel 			 */
    715      0     stevel 			stuck = B_TRUE;
    716      0     stevel 		}
    717      0     stevel 		oldcount = newcount;
    718      0     stevel 
    719      0     stevel 		/*
    720      0     stevel 		 * Autofs doesn't let you unmount its trigger nodes from
    721      0     stevel 		 * userland so we have to tell the kernel to cleanup for us.
    722      0     stevel 		 */
    723      0     stevel 		if (autofs_cleanup(zoneid) != 0) {
    724      0     stevel 			zerror(zlogp, B_TRUE, "unable to remove autofs nodes");
    725      0     stevel 			error++;
    726      0     stevel 			goto out;
    727      0     stevel 		}
    728      0     stevel 	}
    729      0     stevel 
    730      0     stevel out:
    731      0     stevel 	free_remote_fstypes(remote_fstypes);
    732      0     stevel 	(void) fclose(mnttab);
    733      0     stevel 	return (error ? -1 : 0);
    734      0     stevel }
    735      0     stevel 
    736      0     stevel static int
    737      0     stevel fs_compare(const void *m1, const void *m2)
    738      0     stevel {
    739      0     stevel 	struct zone_fstab *i = (struct zone_fstab *)m1;
    740      0     stevel 	struct zone_fstab *j = (struct zone_fstab *)m2;
    741      0     stevel 
    742      0     stevel 	return (strcmp(i->zone_fs_dir, j->zone_fs_dir));
    743      0     stevel }
    744      0     stevel 
    745      0     stevel /*
    746      0     stevel  * Fork and exec (and wait for) the mentioned binary with the provided
    747      0     stevel  * arguments.  Returns (-1) if something went wrong with fork(2) or exec(2),
    748      0     stevel  * returns the exit status otherwise.
    749      0     stevel  *
    750      0     stevel  * If we were unable to exec the provided pathname (for whatever
    751      0     stevel  * reason), we return the special token ZEXIT_EXEC.  The current value
    752      0     stevel  * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
    753      0     stevel  * consumers of this function; any future consumers must make sure this
    754      0     stevel  * remains the case.
    755      0     stevel  */
    756      0     stevel static int
    757      0     stevel forkexec(zlog_t *zlogp, const char *path, char *const argv[])
    758      0     stevel {
    759      0     stevel 	pid_t child_pid;
    760      0     stevel 	int child_status = 0;
    761      0     stevel 
    762      0     stevel 	/*
    763      0     stevel 	 * Do not let another thread localize a message while we are forking.
    764      0     stevel 	 */
    765      0     stevel 	(void) mutex_lock(&msglock);
    766      0     stevel 	child_pid = fork();
    767      0     stevel 	(void) mutex_unlock(&msglock);
    768      0     stevel 	if (child_pid == -1) {
    769      0     stevel 		zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]);
    770      0     stevel 		return (-1);
    771      0     stevel 	} else if (child_pid == 0) {
    772      0     stevel 		closefrom(0);
    773   1915   gjelinek 		/* redirect stdin, stdout & stderr to /dev/null */
    774   1915   gjelinek 		(void) open("/dev/null", O_RDONLY);	/* stdin */
    775   1915   gjelinek 		(void) open("/dev/null", O_WRONLY);	/* stdout */
    776   1915   gjelinek 		(void) open("/dev/null", O_WRONLY);	/* stderr */
    777      0     stevel 		(void) execv(path, argv);
    778      0     stevel 		/*
    779      0     stevel 		 * Since we are in the child, there is no point calling zerror()
    780      0     stevel 		 * since there is nobody waiting to consume it.  So exit with a
    781      0     stevel 		 * special code that the parent will recognize and call zerror()
    782      0     stevel 		 * accordingly.
    783      0     stevel 		 */
    784      0     stevel 
    785      0     stevel 		_exit(ZEXIT_EXEC);
    786      0     stevel 	} else {
    787      0     stevel 		(void) waitpid(child_pid, &child_status, 0);
    788      0     stevel 	}
    789      0     stevel 
    790      0     stevel 	if (WIFSIGNALED(child_status)) {
    791      0     stevel 		zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
    792      0     stevel 		    "signal %d", path, WTERMSIG(child_status));
    793      0     stevel 		return (-1);
    794      0     stevel 	}
    795      0     stevel 	assert(WIFEXITED(child_status));
    796      0     stevel 	if (WEXITSTATUS(child_status) == ZEXIT_EXEC) {
    797      0     stevel 		zerror(zlogp, B_FALSE, "failed to exec %s", path);
    798      0     stevel 		return (-1);
    799      0     stevel 	}
    800      0     stevel 	return (WEXITSTATUS(child_status));
    801      0     stevel }
    802      0     stevel 
    803      0     stevel static int
    804   6734    johnlev isregfile(const char *path)
    805   6734    johnlev {
    806   6734    johnlev 	struct stat64 st;
    807   6734    johnlev 
    808   6734    johnlev 	if (stat64(path, &st) == -1)
    809   6734    johnlev 		return (-1);
    810   6734    johnlev 
    811   6734    johnlev 	return (S_ISREG(st.st_mode));
    812   6734    johnlev }
    813   6734    johnlev 
    814   6734    johnlev static int
    815      0     stevel dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev)
    816      0     stevel {
    817      0     stevel 	char cmdbuf[MAXPATHLEN];
    818      0     stevel 	char *argv[4];
    819      0     stevel 	int status;
    820      0     stevel 
    821      0     stevel 	/*
    822      0     stevel 	 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
    823      0     stevel 	 * that would cost us an extra fork/exec without buying us anything.
    824      0     stevel 	 */
    825      0     stevel 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype)
    826   2712    nn35248 	    >= sizeof (cmdbuf)) {
    827      0     stevel 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
    828      0     stevel 		return (-1);
    829      0     stevel 	}
    830   6734    johnlev 
    831   6734    johnlev 	/*
    832   6734    johnlev 	 * If it doesn't exist, that's OK: we verified this previously
    833   6734    johnlev 	 * in zoneadm.
    834   6734    johnlev 	 */
    835   6734    johnlev 	if (isregfile(cmdbuf) == -1)
    836   6734    johnlev 		return (0);
    837      0     stevel 
    838      0     stevel 	argv[0] = "fsck";
    839      0     stevel 	argv[1] = "-m";
    840      0     stevel 	argv[2] = (char *)rawdev;
    841      0     stevel 	argv[3] = NULL;
    842      0     stevel 
    843      0     stevel 	status = forkexec(zlogp, cmdbuf, argv);
    844      0     stevel 	if (status == 0 || status == -1)
    845      0     stevel 		return (status);
    846      0     stevel 	zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; "
    847      0     stevel 	    "run fsck manually", rawdev, status);
    848      0     stevel 	return (-1);
    849      0     stevel }
    850      0     stevel 
    851      0     stevel static int
    852      0     stevel domount(zlog_t *zlogp, const char *fstype, const char *opts,
    853      0     stevel     const char *special, const char *directory)
    854      0     stevel {
    855      0     stevel 	char cmdbuf[MAXPATHLEN];
    856      0     stevel 	char *argv[6];
    857      0     stevel 	int status;
    858      0     stevel 
    859      0     stevel 	/*
    860      0     stevel 	 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
    861      0     stevel 	 * that would cost us an extra fork/exec without buying us anything.
    862      0     stevel 	 */
    863      0     stevel 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype)
    864   2712    nn35248 	    >= sizeof (cmdbuf)) {
    865      0     stevel 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
    866      0     stevel 		return (-1);
    867      0     stevel 	}
    868      0     stevel 	argv[0] = "mount";
    869      0     stevel 	if (opts[0] == '\0') {
    870      0     stevel 		argv[1] = (char *)special;
    871      0     stevel 		argv[2] = (char *)directory;
    872      0     stevel 		argv[3] = NULL;
    873      0     stevel 	} else {
    874      0     stevel 		argv[1] = "-o";
    875      0     stevel 		argv[2] = (char *)opts;
    876      0     stevel 		argv[3] = (char *)special;
    877      0     stevel 		argv[4] = (char *)directory;
    878      0     stevel 		argv[5] = NULL;
    879      0     stevel 	}
    880      0     stevel 
    881      0     stevel 	status = forkexec(zlogp, cmdbuf, argv);
    882      0     stevel 	if (status == 0 || status == -1)
    883      0     stevel 		return (status);
    884      0     stevel 	if (opts[0] == '\0')
    885      0     stevel 		zerror(zlogp, B_FALSE, "\"%s %s %s\" "
    886      0     stevel 		    "failed with exit code %d",
    887      0     stevel 		    cmdbuf, special, directory, status);
    888      0     stevel 	else
    889      0     stevel 		zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" "
    890      0     stevel 		    "failed with exit code %d",
    891      0     stevel 		    cmdbuf, opts, special, directory, status);
    892      0     stevel 	return (-1);
    893      0     stevel }
    894      0     stevel 
    895      0     stevel /*
    896   5182        edp  * Check if a given mount point path exists.
    897   5182        edp  * If it does, make sure it doesn't contain any symlinks.
    898   5182        edp  * Note that if "leaf" is false we're checking an intermediate
    899   5182        edp  * component of the mount point path, so it must be a directory.
    900   5182        edp  * If "leaf" is true, then we're checking the entire mount point
    901   5182        edp  * path, so the mount point itself can be anything aside from a
    902   5182        edp  * symbolic link.
    903   5182        edp  *
    904   5182        edp  * If the path is invalid then a negative value is returned.  If the
    905   5182        edp  * path exists and is a valid mount point path then 0 is returned.
    906   5182        edp  * If the path doesn't exist return a positive value.
    907   5182        edp  */
    908   5182        edp static int
    909   5182        edp valid_mount_point(zlog_t *zlogp, const char *path, const boolean_t leaf)
    910      0     stevel {
    911      0     stevel 	struct stat statbuf;
    912      0     stevel 	char respath[MAXPATHLEN];
    913      0     stevel 	int res;
    914      0     stevel 
    915      0     stevel 	if (lstat(path, &statbuf) != 0) {
    916      0     stevel 		if (errno == ENOENT)
    917   5182        edp 			return (1);
    918      0     stevel 		zerror(zlogp, B_TRUE, "can't stat %s", path);
    919      0     stevel 		return (-1);
    920      0     stevel 	}
    921      0     stevel 	if (S_ISLNK(statbuf.st_mode)) {
    922      0     stevel 		zerror(zlogp, B_FALSE, "%s is a symlink", path);
    923      0     stevel 		return (-1);
    924      0     stevel 	}
    925   5182        edp 	if (!leaf && !S_ISDIR(statbuf.st_mode)) {
    926   5182        edp 		zerror(zlogp, B_FALSE, "%s is not a directory", path);
    927   5182        edp 		return (-1);
    928      0     stevel 	}
    929      0     stevel 	if ((res = resolvepath(path, respath, sizeof (respath))) == -1) {
    930      0     stevel 		zerror(zlogp, B_TRUE, "unable to resolve path %s", path);
    931      0     stevel 		return (-1);
    932      0     stevel 	}
    933      0     stevel 	respath[res] = '\0';
    934      0     stevel 	if (strcmp(path, respath) != 0) {
    935      0     stevel 		/*
    936   5182        edp 		 * We don't like ".."s, "."s, or "//"s throwing us off
    937      0     stevel 		 */
    938      0     stevel 		zerror(zlogp, B_FALSE, "%s is not a canonical path", path);
    939      0     stevel 		return (-1);
    940      0     stevel 	}
    941      0     stevel 	return (0);
    942      0     stevel }
    943      0     stevel 
    944      0     stevel /*
    945   5182        edp  * Validate a mount point path.  A valid mount point path is an
    946   5182        edp  * absolute path that either doesn't exist, or, if it does exists it
    947   5182        edp  * must be an absolute canonical path that doesn't have any symbolic
    948   5182        edp  * links in it.  The target of a mount point path can be any filesystem
    949   5182        edp  * object.  (Different filesystems can support different mount points,
    950   5182        edp  * for example "lofs" and "mntfs" both support files and directories
    951   5182        edp  * while "ufs" just supports directories.)
    952   5182        edp  *
    953   5182        edp  * If the path is invalid then a negative value is returned.  If the
    954   5182        edp  * path exists and is a valid mount point path then 0 is returned.
    955   5182        edp  * If the path doesn't exist return a positive value.
    956   5182        edp  */
    957   5182        edp int
    958   5182        edp valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *spec,
    959   5182        edp     const char *dir, const char *fstype)
    960   5182        edp {
    961   5182        edp 	char abspath[MAXPATHLEN], *slashp, *slashp_next;
    962   5182        edp 	int rv;
    963   5182        edp 
    964   5182        edp 	/*
    965   5182        edp 	 * Sanity check the target mount point path.
    966   5182        edp 	 * It must be a non-null string that starts with a '/'.
    967   5182        edp 	 */
    968   5182        edp 	if (dir[0] != '/') {
    969   5182        edp 		if (spec[0] == '\0') {
    970   5182        edp 			/*
    971   5182        edp 			 * This must be an invalid ipd entry (see comments
    972   5182        edp 			 * in mount_filesystems_ipdent()).
    973   5182        edp 			 */
    974   5182        edp 			zerror(zlogp, B_FALSE,
    975   5182        edp 			    "invalid inherit-pkg-dir entry: \"%s\"", dir);
    976   5182        edp 		} else {
    977   5182        edp 			/* Something went wrong. */
    978   5182        edp 			zerror(zlogp, B_FALSE, "invalid mount directory, "
    979   5182        edp 			    "type: \"%s\", special: \"%s\", dir: \"%s\"",
    980   5182        edp 			    fstype, spec, dir);
    981   5182        edp 		}
    982   5182        edp 		return (-1);
    983   5182        edp 	}
    984   5182        edp 
    985   5182        edp 	/*
    986   5182        edp 	 * Join rootpath and dir.  Make sure abspath ends with '/', this
    987   5182        edp 	 * is added to all paths (even non-directory paths) to allow us
    988   5182        edp 	 * to detect the end of paths below.  If the path already ends
    989   5182        edp 	 * in a '/', then that's ok too (although we'll fail the
    990   5182        edp 	 * cannonical path check in valid_mount_point()).
    991   5182        edp 	 */
    992   5182        edp 	if (snprintf(abspath, sizeof (abspath),
    993   5182        edp 	    "%s%s/", rootpath, dir) >= sizeof (abspath)) {
    994   5182        edp 		zerror(zlogp, B_FALSE, "pathname %s%s is too long",
    995   5182        edp 		    rootpath, dir);
    996   5182        edp 		return (-1);
    997   5182        edp 	}
    998   5182        edp 
    999   5182        edp 	/*
   1000   5182        edp 	 * Starting with rootpath, verify the mount path one component
   1001   5182        edp 	 * at a time.  Continue until we've evaluated all of abspath.
   1002   5182        edp 	 */
   1003      0     stevel 	slashp = &abspath[strlen(rootpath)];
   1004      0     stevel 	assert(*slashp == '/');
   1005      0     stevel 	do {
   1006   5182        edp 		slashp_next = strchr(slashp + 1, '/');
   1007      0     stevel 		*slashp = '\0';
   1008   5182        edp 		if (slashp_next != NULL) {
   1009   5182        edp 			/* This is an intermediary mount path component. */
   1010   5182        edp 			rv = valid_mount_point(zlogp, abspath, B_FALSE);
   1011   5182        edp 		} else {
   1012   5182        edp 			/* This is the last component of the mount path. */
   1013   5182        edp 			rv = valid_mount_point(zlogp, abspath, B_TRUE);
   1014   5182        edp 		}
   1015   5182        edp 		if (rv < 0)
   1016   5182        edp 			return (rv);
   1017      0     stevel 		*slashp = '/';
   1018   5182        edp 	} while ((slashp = slashp_next) != NULL);
   1019   5182        edp 	return (rv);
   1020      0     stevel }
   1021      0     stevel 
   1022      0     stevel static int
   1023   2712    nn35248 mount_one_dev_device_cb(void *arg, const char *match, const char *name)
   1024   2712    nn35248 {
   1025   2712    nn35248 	di_prof_t prof = arg;
   1026   2712    nn35248 
   1027   2712    nn35248 	if (name == NULL)
   1028   2712    nn35248 		return (di_prof_add_dev(prof, match));
   1029   2712    nn35248 	return (di_prof_add_map(prof, match, name));
   1030   2712    nn35248 }
   1031   2712    nn35248 
   1032   2712    nn35248 static int
   1033   2712    nn35248 mount_one_dev_symlink_cb(void *arg, const char *source, const char *target)
   1034   2712    nn35248 {
   1035   2712    nn35248 	di_prof_t prof = arg;
   1036   2712    nn35248 
   1037   2712    nn35248 	return (di_prof_add_symlink(prof, source, target));
   1038   2712    nn35248 }
   1039   2712    nn35248 
   1040  10616  Sebastien int
   1041  10616  Sebastien vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep)
   1042   3448   dh155122 {
   1043   3448   dh155122 	zone_dochandle_t handle;
   1044   3448   dh155122 
   1045   3448   dh155122 	if ((handle = zonecfg_init_handle()) == NULL) {
   1046   3448   dh155122 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   1047   3448   dh155122 		return (-1);
   1048   3448   dh155122 	}
   1049   3448   dh155122 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
   1050   3448   dh155122 		zerror(zlogp, B_FALSE, "invalid configuration");
   1051   3448   dh155122 		zonecfg_fini_handle(handle);
   1052   3448   dh155122 		return (-1);
   1053   3448   dh155122 	}
   1054   3448   dh155122 	if (zonecfg_get_iptype(handle, iptypep) != Z_OK) {
   1055   3448   dh155122 		zerror(zlogp, B_FALSE, "invalid ip-type configuration");
   1056   3448   dh155122 		zonecfg_fini_handle(handle);
   1057   3448   dh155122 		return (-1);
   1058   3448   dh155122 	}
   1059   3448   dh155122 	zonecfg_fini_handle(handle);
   1060   3448   dh155122 	return (0);
   1061   3448   dh155122 }
   1062   3448   dh155122 
   1063   2712    nn35248 /*
   1064   2712    nn35248  * Apply the standard lists of devices/symlinks/mappings and the user-specified
   1065   2712    nn35248  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
   1066   2712    nn35248  * use these as a profile/filter to determine what exists in /dev.
   1067   2712    nn35248  */
   1068   2712    nn35248 static int
   1069   7655     gerald mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
   1070   2712    nn35248 {
   1071   2712    nn35248 	char			brand[MAXNAMELEN];
   1072   2712    nn35248 	zone_dochandle_t	handle = NULL;
   1073   2727        edp 	brand_handle_t		bh = NULL;
   1074   2712    nn35248 	struct zone_devtab	ztab;
   1075   2712    nn35248 	di_prof_t		prof = NULL;
   1076   2712    nn35248 	int			err;
   1077   2712    nn35248 	int			retval = -1;
   1078   3448   dh155122 	zone_iptype_t		iptype;
   1079   3448   dh155122 	const char 		*curr_iptype;
   1080   2712    nn35248 
   1081   2712    nn35248 	if (di_prof_init(devpath, &prof)) {
   1082   2712    nn35248 		zerror(zlogp, B_TRUE, "failed to initialize profile");
   1083   2712    nn35248 		goto cleanup;
   1084   2712    nn35248 	}
   1085   2712    nn35248 
   1086   7655     gerald 	/*
   1087   7655     gerald 	 * Get a handle to the brand info for this zone.
   1088  10943     Edward 	 * If we are mounting the zone, then we must always use the default
   1089   7655     gerald 	 * brand device mounts.
   1090   7655     gerald 	 */
   1091   7655     gerald 	if (ALT_MOUNT(mount_cmd)) {
   1092  10943     Edward 		(void) strlcpy(brand, default_brand, sizeof (brand));
   1093   7655     gerald 	} else {
   1094  10796    Stephen 		(void) strlcpy(brand, brand_name, sizeof (brand));
   1095   7655     gerald 	}
   1096   7655     gerald 
   1097   7655     gerald 	if ((bh = brand_open(brand)) == NULL) {
   1098   2712    nn35248 		zerror(zlogp, B_FALSE, "unable to determine zone brand");
   1099   2712    nn35248 		goto cleanup;
   1100   2712    nn35248 	}
   1101   2712    nn35248 
   1102  10616  Sebastien 	if (vplat_get_iptype(zlogp, &iptype) < 0) {
   1103   3448   dh155122 		zerror(zlogp, B_TRUE, "unable to determine ip-type");
   1104   3448   dh155122 		goto cleanup;
   1105   3448   dh155122 	}
   1106   3448   dh155122 	switch (iptype) {
   1107   3448   dh155122 	case ZS_SHARED:
   1108   3448   dh155122 		curr_iptype = "shared";
   1109   3448   dh155122 		break;
   1110   3448   dh155122 	case ZS_EXCLUSIVE:
   1111   3448   dh155122 		curr_iptype = "exclusive";
   1112   3448   dh155122 		break;
   1113   3448   dh155122 	}
   1114   3448   dh155122 
   1115   2727        edp 	if (brand_platform_iter_devices(bh, zone_name,
   1116   3448   dh155122 	    mount_one_dev_device_cb, prof, curr_iptype) != 0) {
   1117   2712    nn35248 		zerror(zlogp, B_TRUE, "failed to add standard device");
   1118   2712    nn35248 		goto cleanup;
   1119   2712    nn35248 	}
   1120   2712    nn35248 
   1121   2727        edp 	if (brand_platform_iter_link(bh,
   1122   2712    nn35248 	    mount_one_dev_symlink_cb, prof) != 0) {
   1123   2712    nn35248 		zerror(zlogp, B_TRUE, "failed to add standard symlink");
   1124   2712    nn35248 		goto cleanup;
   1125   2712    nn35248 	}
   1126   2712    nn35248 
   1127   2712    nn35248 	/* Add user-specified devices and directories */
   1128   2712    nn35248 	if ((handle = zonecfg_init_handle()) == NULL) {
   1129   2712    nn35248 		zerror(zlogp, B_FALSE, "can't initialize zone handle");
   1130   2712    nn35248 		goto cleanup;
   1131   2712    nn35248 	}
   1132   2712    nn35248 	if (err = zonecfg_get_handle(zone_name, handle)) {
   1133   2712    nn35248 		zerror(zlogp, B_FALSE, "can't get handle for zone "
   1134   2712    nn35248 		    "%s: %s", zone_name, zonecfg_strerror(err));
   1135   2712    nn35248 		goto cleanup;
   1136   2712    nn35248 	}
   1137   2712    nn35248 	if (err = zonecfg_setdevent(handle)) {
   1138   2712    nn35248 		zerror(zlogp, B_FALSE, "%s: %s", zone_name,
   1139   2712    nn35248 		    zonecfg_strerror(err));
   1140   2712    nn35248 		goto cleanup;
   1141   2712    nn35248 	}
   1142   2712    nn35248 	while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
   1143   2712    nn35248 		if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
   1144   2712    nn35248 			zerror(zlogp, B_TRUE, "failed to add "
   1145   2712    nn35248 			    "user-specified device");
   1146   2712    nn35248 			goto cleanup;
   1147   2712    nn35248 		}
   1148   2712    nn35248 	}
   1149   2712    nn35248 	(void) zonecfg_enddevent(handle);
   1150   2712    nn35248 
   1151   2712    nn35248 	/* Send profile to kernel */
   1152   2712    nn35248 	if (di_prof_commit(prof)) {
   1153   2712    nn35248 		zerror(zlogp, B_TRUE, "failed to commit profile");
   1154   2712    nn35248 		goto cleanup;
   1155   2712    nn35248 	}
   1156   2712    nn35248 
   1157   2712    nn35248 	retval = 0;
   1158   2712    nn35248 
   1159   2712    nn35248 cleanup:
   1160   2727        edp 	if (bh != NULL)
   1161   2727        edp 		brand_close(bh);
   1162   3716   gjelinek 	if (handle != NULL)
   1163   2712    nn35248 		zonecfg_fini_handle(handle);
   1164   2712    nn35248 	if (prof)
   1165   2712    nn35248 		di_prof_fini(prof);
   1166   2712    nn35248 	return (retval);
   1167   2712    nn35248 }
   1168   2712    nn35248 
   1169   2712    nn35248 static int
   1170   7655     gerald mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath,
   1171   7655     gerald     zone_mnt_t mount_cmd)
   1172      0     stevel {
   1173   2712    nn35248 	char path[MAXPATHLEN];
   1174   2712    nn35248 	char specpath[MAXPATHLEN];
   1175   2712    nn35248 	char optstr[MAX_MNTOPT_STR];
   1176      0     stevel 	zone_fsopt_t *optptr;
   1177   2712    nn35248 	int rv;
   1178      0     stevel 
   1179   5182        edp 	if ((rv = valid_mount_path(zlogp, rootpath, fsptr->zone_fs_special,
   1180   5182        edp 	    fsptr->zone_fs_dir, fsptr->zone_fs_type)) < 0) {
   1181      0     stevel 		zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
   1182      0     stevel 		    rootpath, fsptr->zone_fs_dir);
   1183      0     stevel 		return (-1);
   1184   5182        edp 	} else if (rv > 0) {
   1185   5182        edp 		/* The mount point path doesn't exist, create it now. */
   1186   5182        edp 		if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir,
   1187   5182        edp 		    DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
   1188   5182        edp 		    DEFAULT_DIR_GROUP) != 0) {
   1189   5182        edp 			zerror(zlogp, B_FALSE, "failed to create mount point");
   1190   5182        edp 			return (-1);
   1191   5182        edp 		}
   1192   5182        edp 
   1193   5182        edp 		/*
   1194   5182        edp 		 * Now this might seem weird, but we need to invoke
   1195   5182        edp 		 * valid_mount_path() again.  Why?  Because it checks
   1196   5182        edp 		 * to make sure that the mount point path is canonical,
   1197   5182        edp 		 * which it can only do if the path exists, so now that
   1198   5182        edp 		 * we've created the path we have to verify it again.
   1199   5182        edp 		 */
   1200   5182        edp 		if ((rv = valid_mount_path(zlogp, rootpath,
   1201   5182        edp 		    fsptr->zone_fs_special, fsptr->zone_fs_dir,
   1202   5182        edp 		    fsptr->zone_fs_type)) < 0) {
   1203   5182        edp 			zerror(zlogp, B_FALSE,
   1204   5182        edp 			    "%s%s is not a valid mount point",
   1205   5182        edp 			    rootpath, fsptr->zone_fs_dir);
   1206   5182        edp 			return (-1);
   1207   5182        edp 		}
   1208   5182        edp 	}
   1209      0     stevel 
   1210      0     stevel 	(void) snprintf(path, sizeof (path), "%s%s", rootpath,
   1211      0     stevel 	    fsptr->zone_fs_dir);
   1212      0     stevel 
   1213      0     stevel 	if (strlen(fsptr->zone_fs_special) == 0) {
   1214      0     stevel 		/*
   1215      0     stevel 		 * A zero-length special is how we distinguish IPDs from
   1216    766   carlsonj 		 * general-purpose FSs.  Make sure it mounts from a place that
   1217    766   carlsonj 		 * can be seen via the alternate zone's root.
   1218      0     stevel 		 */
   1219    766   carlsonj 		if (snprintf(specpath, sizeof (specpath), "%s%s",
   1220    766   carlsonj 		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
   1221    766   carlsonj 		    sizeof (specpath)) {
   1222    766   carlsonj 			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
   1223    766   carlsonj 			    "long in alternate root", fsptr->zone_fs_dir);
   1224    766   carlsonj 			return (-1);
   1225    766   carlsonj 		}
   1226    766   carlsonj 		if (zonecfg_in_alt_root())
   1227    766   carlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
   1228      0     stevel 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
   1229    766   carlsonj 		    specpath, path) != 0) {
   1230      0     stevel 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
   1231    766   carlsonj 			    specpath);
   1232      0     stevel 			return (-1);
   1233      0     stevel 		}
   1234      0     stevel 		return (0);
   1235      0     stevel 	}
   1236      0     stevel 
   1237      0     stevel 	/*
   1238      0     stevel 	 * In general the strategy here is to do just as much verification as
   1239      0     stevel 	 * necessary to avoid crashing or otherwise doing something bad; if the
   1240      0     stevel 	 * administrator initiated the operation via zoneadm(1m), he'll get
   1241      0     stevel 	 * auto-verification which will let him know what's wrong.  If he
   1242      0     stevel 	 * modifies the zone configuration of a running zone and doesn't attempt
   1243      0     stevel 	 * to verify that it's OK we won't crash but won't bother trying to be
   1244      0     stevel 	 * too helpful either.  zoneadm verify is only a couple keystrokes away.
   1245      0     stevel 	 */
   1246      0     stevel 	if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) {
   1247      0     stevel 		zerror(zlogp, B_FALSE, "cannot mount %s on %s: "
   1248      0     stevel 		    "invalid file-system type %s", fsptr->zone_fs_special,
   1249      0     stevel 		    fsptr->zone_fs_dir, fsptr->zone_fs_type);
   1250      0     stevel 		return (-1);
   1251    766   carlsonj 	}
   1252    766   carlsonj 
   1253    766   carlsonj 	/*
   1254    766   carlsonj 	 * If we're looking at an alternate root environment, then construct
   1255   3688        edp 	 * read-only loopback mounts as necessary.  Note that any special
   1256   3688        edp 	 * paths for lofs zone mounts in an alternate root must have
   1257   3688        edp 	 * already been pre-pended with any alternate root path by the
   1258   3688        edp 	 * time we get here.
   1259    766   carlsonj 	 */
   1260    766   carlsonj 	if (zonecfg_in_alt_root()) {
   1261    766   carlsonj 		struct stat64 st;
   1262    766   carlsonj 
   1263    766   carlsonj 		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
   1264   2772   carlsonj 		    S_ISBLK(st.st_mode)) {
   1265   3688        edp 			/*
   1266   3688        edp 			 * If we're going to mount a block device we need
   1267   3688        edp 			 * to check if that device is already mounted
   1268   3688        edp 			 * somewhere else, and if so, do a lofs mount
   1269   3688        edp 			 * of the device instead of a direct mount
   1270   3688        edp 			 */
   1271   2772   carlsonj 			if (check_lofs_needed(zlogp, fsptr) == -1)
   1272   2772   carlsonj 				return (-1);
   1273   2772   carlsonj 		} else if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
   1274   3688        edp 			/*
   1275   3688        edp 			 * For lofs mounts, the special node is inside the
   1276   3688        edp 			 * alternate root.  We need lofs resolution for
   1277   3688        edp 			 * this case in order to get at the underlying
   1278   3688        edp 			 * read-write path.
   1279   3688        edp 			 */
   1280   3688        edp 			resolve_lofs(zlogp, fsptr->zone_fs_special,
   1281    766   carlsonj 			    sizeof (fsptr->zone_fs_special));
   1282    766   carlsonj 		}
   1283      0     stevel 	}
   1284      0     stevel 
   1285      0     stevel 	/*
   1286      0     stevel 	 * Run 'fsck -m' if there's a device to fsck.
   1287      0     stevel 	 */
   1288      0     stevel 	if (fsptr->zone_fs_raw[0] != '\0' &&
   1289   6734    johnlev 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) {
   1290   6734    johnlev 		return (-1);
   1291   6734    johnlev 	} else if (isregfile(fsptr->zone_fs_special) == 1 &&
   1292   6734    johnlev 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_special) != 0) {
   1293   6734    johnlev 		return (-1);
   1294   6734    johnlev 	}
   1295      0     stevel 
   1296      0     stevel 	/*
   1297      0     stevel 	 * Build up mount option string.
   1298      0     stevel 	 */
   1299      0     stevel 	optstr[0] = '\0';
   1300      0     stevel 	if (fsptr->zone_fs_options != NULL) {
   1301      0     stevel 		(void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt,
   1302      0     stevel 		    sizeof (optstr));
   1303      0     stevel 		for (optptr = fsptr->zone_fs_options->zone_fsopt_next;
   1304      0     stevel 		    optptr != NULL; optptr = optptr->zone_fsopt_next) {
   1305      0     stevel 			(void) strlcat(optstr, ",", sizeof (optstr));
   1306      0     stevel 			(void) strlcat(optstr, optptr->zone_fsopt_opt,
   1307      0     stevel 			    sizeof (optstr));
   1308      0     stevel 		}
   1309      0     stevel 	}
   1310   2712    nn35248 
   1311   2712    nn35248 	if ((rv = domount(zlogp, fsptr->zone_fs_type, optstr,
   1312   2712    nn35248 	    fsptr->zone_fs_special, path)) != 0)
   1313   2712    nn35248 		return (rv);
   1314   2712    nn35248 
   1315   2712    nn35248 	/*
   1316   2712    nn35248 	 * The mount succeeded.  If this was not a mount of /dev then
   1317   2712    nn35248 	 * we're done.
   1318   2712    nn35248 	 */
   1319   2712    nn35248 	if (strcmp(fsptr->zone_fs_type, MNTTYPE_DEV) != 0)
   1320   2712    nn35248 		return (0);
   1321   2712    nn35248 
   1322   2712    nn35248 	/*
   1323   2712    nn35248 	 * We just mounted an instance of a /dev filesystem, so now we
   1324   2712    nn35248 	 * need to configure it.
   1325   2712    nn35248 	 */
   1326   7655     gerald 	return (mount_one_dev(zlogp, path, mount_cmd));
   1327      0     stevel }
   1328      0     stevel 
   1329      0     stevel static void
   1330      0     stevel free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
   1331      0     stevel {
   1332      0     stevel 	uint_t i;
   1333      0     stevel 
   1334      0     stevel 	if (fsarray == NULL)
   1335      0     stevel 		return;
   1336      0     stevel 	for (i = 0; i < nelem; i++)
   1337      0     stevel 		zonecfg_free_fs_option_list(fsarray[i].zone_fs_options);
   1338      0     stevel 	free(fsarray);
   1339      0     stevel }
   1340      0     stevel 
   1341    766   carlsonj /*
   1342   2653   vp157776  * This function initiates the creation of a small Solaris Environment for
   1343   2653   vp157776  * scratch zone. The Environment creation process is split up into two
   1344   2653   vp157776  * functions(build_mounted_pre_var() and build_mounted_post_var()). It
   1345   2653   vp157776  * is done this way because:
   1346   2653   vp157776  * 	We need to have both /etc and /var in the root of the scratchzone.
   1347   2653   vp157776  * 	We loopback mount zone's own /etc and /var into the root of the
   1348   2653   vp157776  * 	scratch zone. Unlike /etc, /var can be a seperate filesystem. So we
   1349   2653   vp157776  * 	need to delay the mount of /var till the zone's root gets populated.
   1350   2653   vp157776  *	So mounting of localdirs[](/etc and /var) have been moved to the
   1351   2653   vp157776  * 	build_mounted_post_var() which gets called only after the zone
   1352   2653   vp157776  * 	specific filesystems are mounted.
   1353   5829   gjelinek  *
   1354   5829   gjelinek  * Note that the scratch zone we set up for updating the zone (Z_MNT_UPDATE)
   1355   5829   gjelinek  * does not loopback mount the zone's own /etc and /var into the root of the
   1356   5829   gjelinek  * scratch zone.
   1357    766   carlsonj  */
   1358    766   carlsonj static boolean_t
   1359   2653   vp157776 build_mounted_pre_var(zlog_t *zlogp, char *rootpath,
   1360   3071   vp157776     size_t rootlen, const char *zonepath, char *luroot, size_t lurootlen)
   1361    766   carlsonj {
   1362    766   carlsonj 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
   1363    766   carlsonj 	const char **cpp;
   1364    766   carlsonj 	static const char *mkdirs[] = {
   1365   2592         dp 		"/system", "/system/contract", "/system/object", "/proc",
   1366   2592         dp 		"/dev", "/tmp", "/a", NULL
   1367    766   carlsonj 	};
   1368   2653   vp157776 	char *altstr;
   1369    766   carlsonj 	FILE *fp;
   1370    766   carlsonj 	uuid_t uuid;
   1371   2712    nn35248 
   1372    766   carlsonj 	resolve_lofs(zlogp, rootpath, rootlen);
   1373   3071   vp157776 	(void) snprintf(luroot, lurootlen, "%s/lu", zonepath);
   1374   3071   vp157776 	resolve_lofs(zlogp, luroot, lurootlen);
   1375    766   carlsonj 	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
   1376    766   carlsonj 	(void) symlink("./usr/bin", tmp);
   1377    766   carlsonj 
   1378    766   carlsonj 	/*
   1379    766   carlsonj 	 * These are mostly special mount points; not handled here.  (See
   1380    766   carlsonj 	 * zone_mount_early.)
   1381    766   carlsonj 	 */
   1382    766   carlsonj 	for (cpp = mkdirs; *cpp != NULL; cpp++) {
   1383    766   carlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
   1384    766   carlsonj 		if (mkdir(tmp, 0755) != 0) {
   1385    766   carlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
   1386    766   carlsonj 			return (B_FALSE);
   1387    766   carlsonj 		}
   1388    766   carlsonj 	}
   1389   2653   vp157776 	/*
   1390   2653   vp157776 	 * This is here to support lucopy.  If there's an instance of this same
   1391   2653   vp157776 	 * zone on the current running system, then we mount its root up as
   1392   2653   vp157776 	 * read-only inside the scratch zone.
   1393   2653   vp157776 	 */
   1394   2653   vp157776 	(void) zonecfg_get_uuid(zone_name, uuid);
   1395   2653   vp157776 	altstr = strdup(zonecfg_get_root());
   1396   2653   vp157776 	if (altstr == NULL) {
   1397   2653   vp157776 		zerror(zlogp, B_TRUE, "memory allocation failed");
   1398   2653   vp157776 		return (B_FALSE);
   1399   2653   vp157776 	}
   1400   2653   vp157776 	zonecfg_set_root("");
   1401   2653   vp157776 	(void) strlcpy(tmp, zone_name, sizeof (tmp));
   1402   2653   vp157776 	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
   1403   2653   vp157776 	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
   1404   2653   vp157776 	    strcmp(fromdir, rootpath) != 0) {
   1405   2653   vp157776 		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
   1406   2653   vp157776 		if (mkdir(tmp, 0755) != 0) {
   1407   2653   vp157776 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
   1408   2653   vp157776 			return (B_FALSE);
   1409   2653   vp157776 		}
   1410   2653   vp157776 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
   1411   2653   vp157776 		    tmp) != 0) {
   1412   2653   vp157776 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
   1413   2653   vp157776 			    fromdir);
   1414   2653   vp157776 			return (B_FALSE);
   1415   2653   vp157776 		}
   1416   2653   vp157776 	}
   1417   2653   vp157776 	zonecfg_set_root(altstr);
   1418   2653   vp157776 	free(altstr);
   1419   2653   vp157776 
   1420   2653   vp157776 	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
   1421   2653   vp157776 		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
   1422   2653   vp157776 		return (B_FALSE);
   1423   2653   vp157776 	}
   1424   2653   vp157776 	(void) ftruncate(fileno(fp), 0);
   1425   2653   vp157776 	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
   1426   2653   vp157776 		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
   1427   2653   vp157776 	}
   1428   2653   vp157776 	zonecfg_close_scratch(fp);
   1429   2653   vp157776 	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
   1430   2653   vp157776 	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
   1431   2653   vp157776 		return (B_FALSE);
   1432   2653   vp157776 	(void) strlcpy(rootpath, tmp, rootlen);
   1433   2653   vp157776 	return (B_TRUE);
   1434   2653   vp157776 }
   1435   2653   vp157776 
   1436   2653   vp157776 
   1437   2653   vp157776 static boolean_t
   1438   5829   gjelinek build_mounted_post_var(zlog_t *zlogp, zone_mnt_t mount_cmd, char *rootpath,
   1439   5829   gjelinek     const char *luroot)
   1440   2653   vp157776 {
   1441   2653   vp157776 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
   1442   2653   vp157776 	const char **cpp;
   1443   5829   gjelinek 	const char **loopdirs;
   1444   5829   gjelinek 	const char **tmpdirs;
   1445   2653   vp157776 	static const char *localdirs[] = {
   1446   2653   vp157776 		"/etc", "/var", NULL
   1447   2653   vp157776 	};
   1448   5829   gjelinek 	static const char *scr_loopdirs[] = {
   1449   2653   vp157776 		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
   1450   2653   vp157776 		"/usr", NULL
   1451   2653   vp157776 	};
   1452   5829   gjelinek 	static const char *upd_loopdirs[] = {
   1453   5829   gjelinek 		"/etc", "/kernel", "/lib", "/opt", "/platform", "/sbin",
   1454   5829   gjelinek 		"/usr", "/var", NULL
   1455   5829   gjelinek 	};
   1456   5829   gjelinek 	static const char *scr_tmpdirs[] = {
   1457   2653   vp157776 		"/tmp", "/var/run", NULL
   1458   2653   vp157776 	};
   1459   5829   gjelinek 	static const char *upd_tmpdirs[] = {
   1460   5829   gjelinek 		"/tmp", "/var/run", "/var/tmp", NULL
   1461   5829   gjelinek 	};
   1462   2653   vp157776 	struct stat st;
   1463    766   carlsonj 
   1464   5829   gjelinek 	if (mount_cmd == Z_MNT_SCRATCH) {
   1465   5829   gjelinek 		/*
   1466   5829   gjelinek 		 * These are mounted read-write from the zone undergoing
   1467   5829   gjelinek 		 * upgrade.  We must be careful not to 'leak' things from the
   1468   5829   gjelinek 		 * main system into the zone, and this accomplishes that goal.
   1469   5829   gjelinek 		 */
   1470   5829   gjelinek 		for (cpp = localdirs; *cpp != NULL; cpp++) {
   1471   5829   gjelinek 			(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot,
   1472    766   carlsonj 			    *cpp);
   1473   5829   gjelinek 			(void) snprintf(fromdir, sizeof (fromdir), "%s%s",
   1474   5829   gjelinek 			    rootpath, *cpp);
   1475   5829   gjelinek 			if (mkdir(tmp, 0755) != 0) {
   1476   5829   gjelinek 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
   1477   5829   gjelinek 				return (B_FALSE);
   1478   5829   gjelinek 			}
   1479   5829   gjelinek 			if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp)
   1480   5829   gjelinek 			    != 0) {
   1481   5829   gjelinek 				zerror(zlogp, B_TRUE, "cannot mount %s on %s",
   1482   5829   gjelinek 				    tmp, *cpp);
   1483   5829   gjelinek 				return (B_FALSE);
   1484   5829   gjelinek 			}
   1485   5829   gjelinek 		}
   1486   5829   gjelinek 	}
   1487   5829   gjelinek 
   1488   5829   gjelinek 	if (mount_cmd == Z_MNT_UPDATE)
   1489   5829   gjelinek 		loopdirs = upd_loopdirs;
   1490   5829   gjelinek 	else
   1491   5829   gjelinek 		loopdirs = scr_loopdirs;
   1492    766   carlsonj 
   1493    766   carlsonj 	/*
   1494    766   carlsonj 	 * These are things mounted read-only from the running system because
   1495    766   carlsonj 	 * they contain binaries that must match system.
   1496    766   carlsonj 	 */
   1497    766   carlsonj 	for (cpp = loopdirs; *cpp != NULL; cpp++) {
   1498    766   carlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
   1499    766   carlsonj 		if (mkdir(tmp, 0755) != 0) {
   1500    766   carlsonj 			if (errno != EEXIST) {
   1501    766   carlsonj 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
   1502    766   carlsonj 				return (B_FALSE);
   1503    766   carlsonj 			}
   1504    766   carlsonj 			if (lstat(tmp, &st) != 0) {
   1505    766   carlsonj 				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
   1506    766   carlsonj 				return (B_FALSE);
   1507    766   carlsonj 			}
   1508    766   carlsonj 			/*
   1509    766   carlsonj 			 * Ignore any non-directories encountered.  These are
   1510    766   carlsonj 			 * things that have been converted into symlinks
   1511    766   carlsonj 			 * (/etc/fs and /etc/lib) and no longer need a lofs
   1512    766   carlsonj 			 * fixup.
   1513    766   carlsonj 			 */
   1514    766   carlsonj 			if (!S_ISDIR(st.st_mode))
   1515    766   carlsonj 				continue;
   1516    766   carlsonj 		}
   1517    766   carlsonj 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
   1518    766   carlsonj 		    tmp) != 0) {
   1519    766   carlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
   1520    766   carlsonj 			    *cpp);
   1521    766   carlsonj 			return (B_FALSE);
   1522    766   carlsonj 		}
   1523    766   carlsonj 	}
   1524    766   carlsonj 
   1525   5829   gjelinek 	if (mount_cmd == Z_MNT_UPDATE)
   1526   5829   gjelinek 		tmpdirs = upd_tmpdirs;
   1527   5829   gjelinek 	else
   1528   5829   gjelinek 		tmpdirs = scr_tmpdirs;
   1529   5829   gjelinek 
   1530    766   carlsonj 	/*
   1531    766   carlsonj 	 * These are things with tmpfs mounted inside.
   1532    766   carlsonj 	 */
   1533    766   carlsonj 	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
   1534    766   carlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
   1535   5829   gjelinek 		if (mount_cmd == Z_MNT_SCRATCH && mkdir(tmp, 0755) != 0 &&
   1536   5829   gjelinek 		    errno != EEXIST) {
   1537    766   carlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
   1538    766   carlsonj 			return (B_FALSE);
   1539    766   carlsonj 		}
   1540   3514   gjelinek 
   1541   3514   gjelinek 		/*
   1542   3514   gjelinek 		 * We could set the mode for /tmp when we do the mkdir but
   1543   3514   gjelinek 		 * since that can be modified by the umask we will just set
   1544   3514   gjelinek 		 * the correct mode for /tmp now.
   1545   3514   gjelinek 		 */
   1546   3514   gjelinek 		if (strcmp(*cpp, "/tmp") == 0 && chmod(tmp, 01777) != 0) {
   1547   3514   gjelinek 			zerror(zlogp, B_TRUE, "cannot chmod %s", tmp);
   1548   3514   gjelinek 			return (B_FALSE);
   1549   3514   gjelinek 		}
   1550   3514   gjelinek 
   1551    766   carlsonj 		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
   1552    766   carlsonj 			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
   1553    766   carlsonj 			return (B_FALSE);
   1554    766   carlsonj 		}
   1555    766   carlsonj 	}
   1556    766   carlsonj 	return (B_TRUE);
   1557    766   carlsonj }
   1558    766   carlsonj 
   1559   2712    nn35248 typedef struct plat_gmount_cb_data {
   1560   2712    nn35248 	zlog_t			*pgcd_zlogp;
   1561   2712    nn35248 	struct zone_fstab	**pgcd_fs_tab;
   1562   2712    nn35248 	int			*pgcd_num_fs;
   1563   2712    nn35248 } plat_gmount_cb_data_t;
   1564   2712    nn35248 
   1565   2712    nn35248 /*
   1566   2712    nn35248  * plat_gmount_cb() is a callback function invoked by libbrand to iterate
   1567   2712    nn35248  * through all global brand platform mounts.
   1568   2712    nn35248  */
   1569   2712    nn35248 int
   1570   2712    nn35248 plat_gmount_cb(void *data, const char *spec, const char *dir,
   1571   2712    nn35248     const char *fstype, const char *opt)
   1572   2712    nn35248 {
   1573   2712    nn35248 	plat_gmount_cb_data_t	*cp = data;
   1574   2712    nn35248 	zlog_t			*zlogp = cp->pgcd_zlogp;
   1575   2712    nn35248 	struct zone_fstab	*fs_ptr = *cp->pgcd_fs_tab;
   1576   2712    nn35248 	int			num_fs = *cp->pgcd_num_fs;
   1577   2712    nn35248 	struct zone_fstab	*fsp, *tmp_ptr;
   1578   2712    nn35248 
   1579   2712    nn35248 	num_fs++;
   1580   2712    nn35248 	if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) {
   1581   2712    nn35248 		zerror(zlogp, B_TRUE, "memory allocation failed");
   1582   2712    nn35248 		return (-1);
   1583   2712    nn35248 	}
   1584   2712    nn35248 
   1585   2712    nn35248 	fs_ptr = tmp_ptr;
   1586   2712    nn35248 	fsp = &fs_ptr[num_fs - 1];
   1587   2712    nn35248 
   1588   2712    nn35248 	/* update the callback struct passed in */
   1589   2712    nn35248 	*cp->pgcd_fs_tab = fs_ptr;
   1590   2712    nn35248 	*cp->pgcd_num_fs = num_fs;
   1591   2712    nn35248 
   1592   2712    nn35248 	fsp->zone_fs_raw[0] = '\0';
   1593   2712    nn35248 	(void) strlcpy(fsp->zone_fs_special, spec,
   1594   2712    nn35248 	    sizeof (fsp->zone_fs_special));
   1595   2712    nn35248 	(void) strlcpy(fsp->zone_fs_dir, dir, sizeof (fsp->zone_fs_dir));
   1596   2712    nn35248 	(void) strlcpy(fsp->zone_fs_type, fstype, sizeof (fsp->zone_fs_type));
   1597   2712    nn35248 	fsp->zone_fs_options = NULL;
   1598   3688        edp 	if ((opt != NULL) &&
   1599   3688        edp 	    (zonecfg_add_fs_option(fsp, (char *)opt) != Z_OK)) {
   1600   2712    nn35248 		zerror(zlogp, B_FALSE, "error adding property");
   1601   2712    nn35248 		return (-1);
   1602   2712    nn35248 	}
   1603   2712    nn35248 
   1604   2712    nn35248 	return (0);
   1605   2712    nn35248 }
   1606   2712    nn35248 
   1607   2712    nn35248 static int
   1608   2712    nn35248 mount_filesystems_ipdent(zone_dochandle_t handle, zlog_t *zlogp,
   1609   2712    nn35248     struct zone_fstab **fs_tabp, int *num_fsp)
   1610   2712    nn35248 {
   1611   2712    nn35248 	struct zone_fstab *tmp_ptr, *fs_ptr, *fsp, fstab;
   1612   2712    nn35248 	int num_fs;
   1613   2712    nn35248 
   1614   2712    nn35248 	num_fs = *num_fsp;
   1615   2712    nn35248 	fs_ptr = *fs_tabp;
   1616   2712    nn35248 
   1617   2712    nn35248 	if (zonecfg_setipdent(handle) != Z_OK) {
   1618   2712    nn35248 		zerror(zlogp, B_FALSE, "invalid configuration");
   1619   2712    nn35248 		return (-1);
   1620   2712    nn35248 	}
   1621   2712    nn35248 	while (zonecfg_getipdent(handle, &fstab) == Z_OK) {
   1622   2712    nn35248 		num_fs++;
   1623   2712    nn35248 		if ((tmp_ptr = realloc(fs_ptr,
   1624   2712    nn35248 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
   1625   2712    nn35248 			zerror(zlogp, B_TRUE, "memory allocation failed");
   1626   2712    nn35248 			(void) zonecfg_endipdent(handle);
   1627   2712    nn35248 			return (-1);
   1628   2712    nn35248 		}
   1629   2712    nn35248 
   1630   2712    nn35248 		/* update the pointers passed in */
   1631   2712    nn35248 		*fs_tabp = tmp_ptr;
   1632   2712    nn35248 		*num_fsp = num_fs;
   1633   2712    nn35248 
   1634   2712    nn35248 		/*
   1635   2712    nn35248 		 * IPDs logically only have a mount point; all other properties
   1636   2712    nn35248 		 * are implied.
   1637   2712    nn35248 		 */
   1638   2712    nn35248 		fs_ptr = tmp_ptr;
   1639   2712    nn35248 		fsp = &fs_ptr[num_fs - 1];
   1640   2712    nn35248 		(void) strlcpy(fsp->zone_fs_dir,
   1641   2712    nn35248 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
   1642   2712    nn35248 		fsp->zone_fs_special[0] = '\0';
   1643   2712    nn35248 		fsp->zone_fs_raw[0] = '\0';
   1644   2712    nn35248 		fsp->zone_fs_type[0] = '\0';
   1645   2712    nn35248 		fsp->zone_fs_options = NULL;
   1646   2712    nn35248 	}
   1647   2712    nn35248 	(void) zonecfg_endipdent(handle);
   1648   2712    nn35248 	return (0);
   1649   2712    nn35248 }
   1650   2712    nn35248 
   1651   2712    nn35248 static int
   1652   2712    nn35248 mount_filesystems_fsent(zone_dochandle_t handle, zlog_t *zlogp,
   1653   5829   gjelinek     struct zone_fstab **fs_tabp, int *num_fsp, zone_mnt_t mount_cmd)
   1654   2712    nn35248 {
   1655   2712    nn35248 	struct zone_fstab *tmp_ptr, *fs_ptr, *fsp, fstab;
   1656   2712    nn35248 	int num_fs;
   1657   2712    nn35248 
   1658   2712    nn35248 	num_fs = *num_fsp;
   1659   2712    nn35248 	fs_ptr = *fs_tabp;
   1660   2712    nn35248 
   1661   2712    nn35248 	if (zonecfg_setfsent(handle) != Z_OK) {
   1662   2712    nn35248 		zerror(zlogp, B_FALSE, "invalid configuration");
   1663   2712    nn35248 		return (-1);
   1664   2712    nn35248 	}
   1665   2712    nn35248 	while (zonecfg_getfsent(handle, &fstab) == Z_OK) {
   1666   2712    nn35248 		/*
   1667   2712    nn35248 		 * ZFS filesystems will not be accessible under an alternate
   1668   2712    nn35248 		 * root, since the pool will not be known.  Ignore them in this
   1669   2712    nn35248 		 * case.
   1670   2712    nn35248 		 */
   1671   5829   gjelinek 		if (ALT_MOUNT(mount_cmd) &&
   1672   5829   gjelinek 		    strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0)
   1673   2712    nn35248 			continue;
   1674   2712    nn35248 
   1675   2712    nn35248 		num_fs++;
   1676   2712    nn35248 		if ((tmp_ptr = realloc(fs_ptr,
   1677   2712    nn35248 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
   1678   2712    nn35248 			zerror(zlogp, B_TRUE, "memory allocation failed");
   1679   2712    nn35248 			(void) zonecfg_endfsent(handle);
   1680   2712    nn35248 			return (-1);
   1681   2712    nn35248 		}
   1682   2712    nn35248 		/* update the pointers passed in */
   1683   2712    nn35248 		*fs_tabp = tmp_ptr;
   1684   2712    nn35248 		*num_fsp = num_fs;
   1685   2712    nn35248 
   1686   2712    nn35248 		fs_ptr = tmp_ptr;
   1687   2712    nn35248 		fsp = &fs_ptr[num_fs - 1];
   1688   2712    nn35248 		(void) strlcpy(fsp->zone_fs_dir,
   1689   2712    nn35248 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
   1690   2712    nn35248 		(void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw,
   1691   2712    nn35248 		    sizeof (fsp->zone_fs_raw));
   1692   2712    nn35248 		(void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type,
   1693   2712    nn35248 		    sizeof (fsp->zone_fs_type));
   1694   2712    nn35248 		fsp->zone_fs_options = fstab.zone_fs_options;
   1695   3688        edp 
   1696   3688        edp 		/*
   1697   3688        edp 		 * For all lofs mounts, make sure that the 'special'
   1698   3688        edp 		 * entry points inside the alternate root.  The
   1699   3688        edp 		 * source path for a lofs mount in a given zone needs
   1700   3688        edp 		 * to be relative to the root of the boot environment
   1701   3688        edp 		 * that contains the zone.  Note that we don't do this
   1702   3688        edp 		 * for non-lofs mounts since they will have a device
   1703   3688        edp 		 * as a backing store and device paths must always be
   1704   3688        edp 		 * specified relative to the current boot environment.
   1705   3688        edp 		 */
   1706   3688        edp 		fsp->zone_fs_special[0] = '\0';
   1707   3688        edp 		if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) {
   1708   3688        edp 			(void) strlcat(fsp->zone_fs_special, zonecfg_get_root(),
   1709   3688        edp 			    sizeof (fsp->zone_fs_special));
   1710   3688        edp 		}
   1711   3688        edp 		(void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special,
   1712   3688        edp 		    sizeof (fsp->zone_fs_special));
   1713   2712    nn35248 	}
   1714   2712    nn35248 	(void) zonecfg_endfsent(handle);
   1715   2712    nn35248 	return (0);
   1716   2712    nn35248 }
   1717   2712    nn35248 
   1718      0     stevel static int
   1719   5829   gjelinek mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
   1720      0     stevel {
   1721   2712    nn35248 	char rootpath[MAXPATHLEN];
   1722   2712    nn35248 	char zonepath[MAXPATHLEN];
   1723   2712    nn35248 	char brand[MAXNAMELEN];
   1724   3071   vp157776 	char luroot[MAXPATHLEN];
   1725   2712    nn35248 	int i, num_fs = 0;
   1726   2712    nn35248 	struct zone_fstab *fs_ptr = NULL;
   1727      0     stevel 	zone_dochandle_t handle = NULL;
   1728      0     stevel 	zone_state_t zstate;
   1729   2727        edp 	brand_handle_t bh;
   1730   2712    nn35248 	plat_gmount_cb_data_t cb;
   1731      0     stevel 
   1732      0     stevel 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
   1733    766   carlsonj 	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
   1734      0     stevel 		zerror(zlogp, B_FALSE,
   1735    766   carlsonj 		    "zone must be in '%s' or '%s' state to mount file-systems",
   1736    766   carlsonj 		    zone_state_str(ZONE_STATE_READY),
   1737    766   carlsonj 		    zone_state_str(ZONE_STATE_MOUNTED));
   1738      0     stevel 		goto bad;
   1739      0     stevel 	}
   1740      0     stevel 
   1741      0     stevel 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
   1742      0     stevel 		zerror(zlogp, B_TRUE, "unable to determine zone path");
   1743      0     stevel 		goto bad;
   1744      0     stevel 	}
   1745      0     stevel 
   1746      0     stevel 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
   1747      0     stevel 		zerror(zlogp, B_TRUE, "unable to determine zone root");
   1748      0     stevel 		goto bad;
   1749      0     stevel 	}
   1750      0     stevel 
   1751      0     stevel 	if ((handle = zonecfg_init_handle()) == NULL) {
   1752   1645      comay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   1753      0     stevel 		goto bad;
   1754      0     stevel 	}
   1755      0     stevel 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
   1756      0     stevel 	    zonecfg_setfsent(handle) != Z_OK) {
   1757      0     stevel 		zerror(zlogp, B_FALSE, "invalid configuration");
   1758      0     stevel 		goto bad;
   1759      0     stevel 	}
   1760      0     stevel 
   1761   7655     gerald 	/*
   1762  10943     Edward 	 * If we are mounting the zone, then we must always use the default
   1763   7655     gerald 	 * brand global mounts.
   1764   7655     gerald 	 */
   1765   7655     gerald 	if (ALT_MOUNT(mount_cmd)) {
   1766  10943     Edward 		(void) strlcpy(brand, default_brand, sizeof (brand));
   1767   7655     gerald 	} else {
   1768  10796    Stephen 		(void) strlcpy(brand, brand_name, sizeof (brand));
   1769   7655     gerald 	}
   1770   7655     gerald 
   1771   2712    nn35248 	/* Get a handle to the brand info for this zone */
   1772   7655     gerald 	if ((bh = brand_open(brand)) == NULL) {
   1773   2712    nn35248 		zerror(zlogp, B_FALSE, "unable to determine zone brand");
   1774   3716   gjelinek 		zonecfg_fini_handle(handle);
   1775   2712    nn35248 		return (-1);
   1776   2712    nn35248 	}
   1777   2712    nn35248 
   1778   2712    nn35248 	/*
   1779   2712    nn35248 	 * Get the list of global filesystems to mount from the brand
   1780   2712    nn35248 	 * configuration.
   1781   2712    nn35248 	 */
   1782   2712    nn35248 	cb.pgcd_zlogp = zlogp;
   1783   2712    nn35248 	cb.pgcd_fs_tab = &fs_ptr;
   1784   2712    nn35248 	cb.pgcd_num_fs = &num_fs;
   1785   2727        edp 	if (brand_platform_iter_gmounts(bh, zonepath,
   1786   2712    nn35248 	    plat_gmount_cb, &cb) != 0) {
   1787   2712    nn35248 		zerror(zlogp, B_FALSE, "unable to mount filesystems");
   1788   2727        edp 		brand_close(bh);
   1789   3716   gjelinek 		zonecfg_fini_handle(handle);
   1790   2712    nn35248 		return (-1);
   1791   2712    nn35248 	}
   1792   2727        edp 	brand_close(bh);
   1793   2712    nn35248 
   1794      0     stevel 	/*
   1795      0     stevel 	 * Iterate through the rest of the filesystems, first the IPDs, then
   1796      0     stevel 	 * the general FSs.  Sort them all, then mount them in sorted order.
   1797      0     stevel 	 * This is to make sure the higher level directories (e.g., /usr)
   1798      0     stevel 	 * get mounted before any beneath them (e.g., /usr/local).
   1799      0     stevel 	 */
   1800   2712    nn35248 	if (mount_filesystems_ipdent(handle, zlogp, &fs_ptr, &num_fs) != 0)
   1801      0     stevel 		goto bad;
   1802      0     stevel 
   1803   2712    nn35248 	if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs,
   1804   2712    nn35248 	    mount_cmd) != 0)
   1805      0     stevel 		goto bad;
   1806    789     ahrens 
   1807      0     stevel 	zonecfg_fini_handle(handle);
   1808      0     stevel 	handle = NULL;
   1809      0     stevel 
   1810    766   carlsonj 	/*
   1811   2712    nn35248 	 * Normally when we mount a zone all the zone filesystems
   1812   2712    nn35248 	 * get mounted relative to rootpath, which is usually
   1813   2712    nn35248 	 * <zonepath>/root.  But when mounting a zone for administration
   1814   2712    nn35248 	 * purposes via the zone "mount" state, build_mounted_pre_var()
   1815   2712    nn35248 	 * updates rootpath to be <zonepath>/lu/a so we'll mount all
   1816   2712    nn35248 	 * the zones filesystems there instead.
   1817   2712    nn35248 	 *
   1818   2712    nn35248 	 * build_mounted_pre_var() and build_mounted_post_var() will
   1819   2712    nn35248 	 * also do some extra work to create directories and lofs mount
   1820   2712    nn35248 	 * a bunch of global zone file system paths into <zonepath>/lu.
   1821   2712    nn35248 	 *
   1822   2712    nn35248 	 * This allows us to be able to enter the zone (now rooted at
   1823   2712    nn35248 	 * <zonepath>/lu) and run the upgrade/patch tools that are in the
   1824   2712    nn35248 	 * global zone and have them upgrade the to-be-modified zone's
   1825   2712    nn35248 	 * files mounted on /a.  (Which mirrors the existing standard
   1826   2712    nn35248 	 * upgrade environment.)
   1827   2712    nn35248 	 *
   1828   2712    nn35248 	 * There is of course one catch.  When doing the upgrade
   1829   2712    nn35248 	 * we need <zoneroot>/lu/dev to be the /dev filesystem
   1830   2712    nn35248 	 * for the zone and we don't want to have any /dev filesystem
   1831   2712    nn35248 	 * mounted at <zoneroot>/lu/a/dev.  Since /dev is specified
   1832   2712    nn35248 	 * as a normal zone filesystem by default we'll try to mount
   1833   2712    nn35248 	 * it at <zoneroot>/lu/a/dev, so we have to detect this
   1834   2712    nn35248 	 * case and instead mount it at <zoneroot>/lu/dev.
   1835   2712    nn35248 	 *
   1836   2712    nn35248 	 * All this work is done in three phases:
   1837   2653   vp157776 	 *   1) Create and populate lu directory (build_mounted_pre_var()).
   1838   2653   vp157776 	 *   2) Mount the required filesystems as per the zone configuration.
   1839   2653   vp157776 	 *   3) Set up the rest of the scratch zone environment
   1840   2653   vp157776 	 *	(build_mounted_post_var()).
   1841    766   carlsonj 	 */
   1842   5829   gjelinek 	if (ALT_MOUNT(mount_cmd) && !build_mounted_pre_var(zlogp,
   1843   3071   vp157776 	    rootpath, sizeof (rootpath), zonepath, luroot, sizeof (luroot)))
   1844    766   carlsonj 		goto bad;
   1845    766   carlsonj 
   1846      0     stevel 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
   1847   2712    nn35248 
   1848      0     stevel 	for (i = 0; i < num_fs; i++) {
   1849   5829   gjelinek 		if (ALT_MOUNT(mount_cmd) &&
   1850   2712    nn35248 		    strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
   1851   2712    nn35248 			size_t slen = strlen(rootpath) - 2;
   1852   2712    nn35248 
   1853   2712    nn35248 			/*
   1854   2712    nn35248 			 * By default we'll try to mount /dev as /a/dev
   1855   2712    nn35248 			 * but /dev is special and always goes at the top
   1856   2712    nn35248 			 * so strip the trailing '/a' from the rootpath.
   1857   2712    nn35248 			 */
   1858   2712    nn35248 			assert(strcmp(&rootpath[slen], "/a") == 0);
   1859   2712    nn35248 			rootpath[slen] = '\0';
   1860   7655     gerald 			if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd)
   1861   7655     gerald 			    != 0)
   1862   2712    nn35248 				goto bad;
   1863   2712    nn35248 			rootpath[slen] = '/';
   1864   2712    nn35248 			continue;
   1865   2712    nn35248 		}
   1866   7655     gerald 		if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
   1867      0     stevel 			goto bad;
   1868      0     stevel 	}
   1869   5829   gjelinek 	if (ALT_MOUNT(mount_cmd) &&
   1870   5829   gjelinek 	    !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot))
   1871   2653   vp157776 		goto bad;
   1872   1676        jpk 
   1873   1676        jpk 	/*
   1874   1676        jpk 	 * For Trusted Extensions cross-mount each lower level /export/home
   1875   1676        jpk 	 */
   1876   5829   gjelinek 	if (mount_cmd == Z_MNT_BOOT &&
   1877   5829   gjelinek 	    tsol_mounts(zlogp, zone_name, rootpath) != 0)
   1878   1676        jpk 		goto bad;
   1879   1676        jpk 
   1880      0     stevel 	free_fs_data(fs_ptr, num_fs);
   1881      0     stevel 
   1882      0     stevel 	/*
   1883      0     stevel 	 * Everything looks fine.
   1884      0     stevel 	 */
   1885      0     stevel 	return (0);
   1886      0     stevel 
   1887      0     stevel bad:
   1888      0     stevel 	if (handle != NULL)
   1889      0     stevel 		zonecfg_fini_handle(handle);
   1890      0     stevel 	free_fs_data(fs_ptr, num_fs);
   1891      0     stevel 	return (-1);
   1892      0     stevel }
   1893      0     stevel 
   1894      0     stevel /* caller makes sure neither parameter is NULL */
   1895      0     stevel static int
   1896      0     stevel addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
   1897      0     stevel {
   1898      0     stevel 	int prefixlen;
   1899      0     stevel 
   1900      0     stevel 	prefixlen = atoi(prefixstr);
   1901      0     stevel 	if (prefixlen < 0 || prefixlen > maxprefixlen)
   1902      0     stevel 		return (1);
   1903      0     stevel 	while (prefixlen > 0) {
   1904      0     stevel 		if (prefixlen >= 8) {
   1905      0     stevel 			*maskstr++ = 0xFF;
   1906      0     stevel 			prefixlen -= 8;
   1907      0     stevel 			continue;
   1908      0     stevel 		}
   1909      0     stevel 		*maskstr |= 1 << (8 - prefixlen);
   1910      0     stevel 		prefixlen--;
   1911      0     stevel 	}
   1912      0     stevel 	return (0);
   1913      0     stevel }
   1914      0     stevel 
   1915      0     stevel /*
   1916      0     stevel  * Tear down all interfaces belonging to the given zone.  This should
   1917      0     stevel  * be called with the zone in a state other than "running", so that
   1918      0     stevel  * interfaces can't be assigned to the zone after this returns.
   1919      0     stevel  *
   1920      0     stevel  * If anything goes wrong, log an error message and return an error.
   1921      0     stevel  */
   1922      0     stevel static int
   1923   3448   dh155122 unconfigure_shared_network_interfaces(zlog_t *zlogp, zoneid_t zone_id)
   1924      0     stevel {
   1925      0     stevel 	struct lifnum lifn;
   1926      0     stevel 	struct lifconf lifc;
   1927      0     stevel 	struct lifreq *lifrp, lifrl;
   1928      0     stevel 	int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES;
   1929      0     stevel 	int num_ifs, s, i, ret_code = 0;
   1930      0     stevel 	uint_t bufsize;
   1931      0     stevel 	char *buf = NULL;
   1932      0     stevel 
   1933      0     stevel 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
   1934      0     stevel 		zerror(zlogp, B_TRUE, "could not get socket");
   1935      0     stevel 		ret_code = -1;
   1936      0     stevel 		goto bad;
   1937      0     stevel 	}
   1938      0     stevel 	lifn.lifn_family = AF_UNSPEC;
   1939      0     stevel 	lifn.lifn_flags = (int)lifc_flags;
   1940      0     stevel 	if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
   1941      0     stevel 		zerror(zlogp, B_TRUE,
   1942   3448   dh155122 		    "could not determine number of network interfaces");
   1943      0     stevel 		ret_code = -1;
   1944      0     stevel 		goto bad;
   1945      0     stevel 	}
   1946      0     stevel 	num_ifs = lifn.lifn_count;
   1947      0     stevel 	bufsize = num_ifs * sizeof (struct lifreq);
   1948      0     stevel 	if ((buf = malloc(bufsize)) == NULL) {
   1949      0     stevel 		zerror(zlogp, B_TRUE, "memory allocation failed");
   1950      0     stevel 		ret_code = -1;
   1951      0     stevel 		goto bad;
   1952      0     stevel 	}
   1953      0     stevel 	lifc.lifc_family = AF_UNSPEC;
   1954      0     stevel 	lifc.lifc_flags = (int)lifc_flags;
   1955      0     stevel 	lifc.lifc_len = bufsize;
   1956      0     stevel 	lifc.lifc_buf = buf;
   1957      0     stevel 	if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
   1958   3448   dh155122 		zerror(zlogp, B_TRUE, "could not get configured network "
   1959   3448   dh155122 		    "interfaces");
   1960      0     stevel 		ret_code = -1;
   1961      0     stevel 		goto bad;
   1962      0     stevel 	}
   1963      0     stevel 	lifrp = lifc.lifc_req;
   1964      0     stevel 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) {
   1965      0     stevel 		(void) close(s);
   1966      0     stevel 		if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) <
   1967      0     stevel 		    0) {
   1968      0     stevel 			zerror(zlogp, B_TRUE, "%s: could not get socket",
   1969      0     stevel 			    lifrl.lifr_name);
   1970      0     stevel 			ret_code = -1;
   1971      0     stevel 			continue;
   1972      0     stevel 		}
   1973      0     stevel 		(void) memset(&lifrl, 0, sizeof (lifrl));
   1974      0     stevel 		(void) strncpy(lifrl.lifr_name, lifrp->lifr_name,
   1975      0     stevel 		    sizeof (lifrl.lifr_name));
   1976      0     stevel 		if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) {
   1977   3251   sl108498 			if (errno == ENXIO)
   1978   3251   sl108498 				/*
   1979   3251   sl108498 				 * Interface may have been removed by admin or
   1980   3251   sl108498 				 * another zone halting.
   1981   3251   sl108498 				 */
   1982   3251   sl108498 				continue;
   1983      0     stevel 			zerror(zlogp, B_TRUE,
   1984   3251   sl108498 			    "%s: could not determine the zone to which this "
   1985   3448   dh155122 			    "network interface is bound", lifrl.lifr_name);
   1986      0     stevel 			ret_code = -1;
   1987      0     stevel 			continue;
   1988      0     stevel 		}
   1989      0     stevel 		if (lifrl.lifr_zoneid == zone_id) {
   1990      0     stevel 			if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) {
   1991      0     stevel 				zerror(zlogp, B_TRUE,
   1992   3448   dh155122 				    "%s: could not remove network interface",
   1993      0     stevel 				    lifrl.lifr_name);
   1994      0     stevel 				ret_code = -1;
   1995      0     stevel 				continue;
   1996      0     stevel 			}
   1997      0     stevel 		}
   1998      0     stevel 	}
   1999      0     stevel bad:
   2000      0     stevel 	if (s > 0)
   2001      0     stevel 		(void) close(s);
   2002      0     stevel 	if (buf)
   2003      0     stevel 		free(buf);
   2004      0     stevel 	return (ret_code);
   2005      0     stevel }
   2006      0     stevel 
   2007      0     stevel static union	sockunion {
   2008      0     stevel 	struct	sockaddr sa;
   2009      0     stevel 	struct	sockaddr_in sin;
   2010      0     stevel 	struct	sockaddr_dl sdl;
   2011      0     stevel 	struct	sockaddr_in6 sin6;
   2012      0     stevel } so_dst, so_ifp;
   2013      0     stevel 
   2014      0     stevel static struct {
   2015      0     stevel 	struct	rt_msghdr hdr;
   2016      0     stevel 	char	space[512];
   2017      0     stevel } rtmsg;
   2018      0     stevel 
   2019      0     stevel static int
   2020      0     stevel salen(struct sockaddr *sa)
   2021      0     stevel {
   2022      0     stevel 	switch (sa->sa_family) {
   2023      0     stevel 	case AF_INET:
   2024      0     stevel 		return (sizeof (struct sockaddr_in));
   2025      0     stevel 	case AF_LINK:
   2026      0     stevel 		return (sizeof (struct sockaddr_dl));
   2027      0     stevel 	case AF_INET6:
   2028      0     stevel 		return (sizeof (struct sockaddr_in6));
   2029      0     stevel 	default:
   2030      0     stevel 		return (sizeof (struct sockaddr));
   2031      0     stevel 	}
   2032      0     stevel }
   2033      0     stevel 
   2034      0     stevel #define	ROUNDUP_LONG(a) \
   2035      0     stevel 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
   2036      0     stevel 
   2037      0     stevel /*
   2038      0     stevel  * Look up which zone is using a given IP address.  The address in question
   2039      0     stevel  * is expected to have been stuffed into the structure to which lifr points
   2040      0     stevel  * via a previous SIOCGLIFADDR ioctl().
   2041      0     stevel  *
   2042      0     stevel  * This is done using black router socket magic.
   2043      0     stevel  *
   2044      0     stevel  * Return the name of the zone on success or NULL on failure.
   2045      0     stevel  *
   2046      0     stevel  * This is a lot of code for a simple task; a new ioctl request to take care
   2047      0     stevel  * of this might be a useful RFE.
   2048      0     stevel  */
   2049      0     stevel 
   2050      0     stevel static char *
   2051      0     stevel who_is_using(zlog_t *zlogp, struct lifreq *lifr)
   2052      0     stevel {
   2053      0     stevel 	static char answer[ZONENAME_MAX];
   2054      0     stevel 	pid_t pid;
   2055      0     stevel 	int s, rlen, l, i;
   2056      0     stevel 	char *cp = rtmsg.space;
   2057      0     stevel 	struct sockaddr_dl *ifp = NULL;
   2058      0     stevel 	struct sockaddr *sa;
   2059      0     stevel 	char save_if_name[LIFNAMSIZ];
   2060      0     stevel 
   2061      0     stevel 	answer[0] = '\0';
   2062      0     stevel 
   2063      0     stevel 	pid = getpid();
   2064      0     stevel 	if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) {
   2065      0     stevel 		zerror(zlogp, B_TRUE, "could not get routing socket");
   2066      0     stevel 		return (NULL);
   2067      0     stevel 	}
   2068      0     stevel 
   2069      0     stevel 	if (lifr->lifr_addr.ss_family == AF_INET) {
   2070      0     stevel 		struct sockaddr_in *sin4;
   2071      0     stevel 
   2072      0     stevel 		so_dst.sa.sa_family = AF_INET;
   2073      0     stevel 		sin4 = (struct sockaddr_in *)&lifr->lifr_addr;
   2074      0     stevel 		so_dst.sin.sin_addr = sin4->sin_addr;
   2075      0     stevel 	} else {
   2076      0     stevel 		struct sockaddr_in6 *sin6;
   2077      0     stevel 
   2078      0     stevel 		so_dst.sa.sa_family = AF_INET6;
   2079      0     stevel 		sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
   2080      0     stevel 		so_dst.sin6.sin6_addr = sin6->sin6_addr;
   2081      0     stevel 	}
   2082      0     stevel 
   2083      0     stevel 	so_ifp.sa.sa_family = AF_LINK;
   2084      0     stevel 
   2085      0     stevel 	(void) memset(&rtmsg, 0, sizeof (rtmsg));
   2086      0     stevel 	rtmsg.hdr.rtm_type = RTM_GET;
   2087      0     stevel 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST;
   2088      0     stevel 	rtmsg.hdr.rtm_version = RTM_VERSION;
   2089      0     stevel 	rtmsg.hdr.rtm_seq = ++rts_seqno;
   2090      0     stevel 	rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST;
   2091      0     stevel 
   2092      0     stevel 	l = ROUNDUP_LONG(salen(&so_dst.sa));
   2093      0     stevel 	(void) memmove(cp, &(so_dst), l);
   2094      0     stevel 	cp += l;
   2095      0     stevel 	l = ROUNDUP_LONG(salen(&so_ifp.sa));
   2096      0     stevel 	(void) memmove(cp, &(so_ifp), l);
   2097      0     stevel 	cp += l;
   2098      0     stevel 
   2099      0     stevel 	rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg;
   2100      0     stevel 
   2101      0     stevel 	if ((rlen = write(s, &rtmsg, l)) < 0) {
   2102      0     stevel 		zerror(zlogp, B_TRUE, "writing to routing socket");
   2103      0     stevel 		return (NULL);
   2104      0     stevel 	} else if (rlen < (int)rtmsg.hdr.rtm_msglen) {
   2105      0     stevel 		zerror(zlogp, B_TRUE,
   2106      0     stevel 		    "write to routing socket got only %d for len\n", rlen);
   2107      0     stevel 		return (NULL);
   2108      0     stevel 	}
   2109      0     stevel 	do {
   2110      0     stevel 		l = read(s, &rtmsg, sizeof (rtmsg));
   2111      0     stevel 	} while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno ||
   2112      0     stevel 	    rtmsg.hdr.rtm_pid != pid));
   2113      0     stevel 	if (l < 0) {
   2114      0     stevel 		zerror(zlogp, B_TRUE, "reading from routing socket");
   2115      0     stevel 		return (NULL);
   2116      0     stevel 	}
   2117      0     stevel 
   2118      0     stevel 	if (rtmsg.hdr.rtm_version != RTM_VERSION) {
   2119      0     stevel 		zerror(zlogp, B_FALSE,
   2120      0     stevel 		    "routing message version %d not understood",
   2121      0     stevel 		    rtmsg.hdr.rtm_version);
   2122      0     stevel 		return (NULL);
   2123      0     stevel 	}
   2124      0     stevel 	if (rtmsg.hdr.rtm_msglen != (ushort_t)l) {
   2125      0     stevel 		zerror(zlogp, B_FALSE, "message length mismatch, "
   2126      0     stevel 		    "expected %d bytes, returned %d bytes",
   2127      0     stevel 		    rtmsg.hdr.rtm_msglen, l);
   2128      0     stevel 		return (NULL);
   2129      0     stevel 	}
   2130      0     stevel 	if (rtmsg.hdr.rtm_errno != 0)  {
   2131      0     stevel 		errno = rtmsg.hdr.rtm_errno;
   2132      0     stevel 		zerror(zlogp, B_TRUE, "RTM_GET routing socket message");
   2133      0     stevel 		return (NULL);
   2134      0     stevel 	}
   2135      0     stevel 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) {
   2136   3448   dh155122 		zerror(zlogp, B_FALSE, "network interface not found");
   2137      0     stevel 		return (NULL);
   2138      0     stevel 	}
   2139      0     stevel 	cp = ((char *)(&rtmsg.hdr + 1));
   2140      0     stevel 	for (i = 1; i != 0; i <<= 1) {
   2141      0     stevel 		/* LINTED E_BAD_PTR_CAST_ALIGN */
   2142      0     stevel 		sa = (struct sockaddr *)cp;
   2143      0     stevel 		if (i != RTA_IFP) {
   2144      0     stevel 			if ((i & rtmsg.hdr.rtm_addrs) != 0)
   2145      0     stevel 				cp += ROUNDUP_LONG(salen(sa));
   2146      0     stevel 			continue;
   2147      0     stevel 		}
   2148      0     stevel 		if (sa->sa_family == AF_LINK &&
   2149      0     stevel 		    ((struct sockaddr_dl *)sa)->sdl_nlen != 0)
   2150      0     stevel 			ifp = (struct sockaddr_dl *)sa;
   2151      0     stevel 		break;
   2152      0     stevel 	}
   2153      0     stevel 	if (ifp == NULL) {
   2154   3448   dh155122 		zerror(zlogp, B_FALSE, "network interface could not be "
   2155   3448   dh155122 		    "determined");
   2156      0     stevel 		return (NULL);
   2157      0     stevel 	}
   2158      0     stevel 
   2159      0     stevel 	/*
   2160      0     stevel 	 * We need to set the I/F name to what we got above, then do the
   2161      0     stevel 	 * appropriate ioctl to get its zone name.  But lifr->lifr_name is
   2162      0     stevel 	 * used by the calling function to do a REMOVEIF, so if we leave the
   2163      0     stevel 	 * "good" zone's I/F name in place, *that* I/F will be removed instead
   2164      0     stevel 	 * of the bad one.  So we save the old (bad) I/F name before over-
   2165      0     stevel 	 * writing it and doing the ioctl, then restore it after the ioctl.
   2166      0     stevel 	 */
   2167      0     stevel 	(void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name));
   2168      0     stevel 	(void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen);
   2169      0     stevel 	lifr->lifr_name[ifp->sdl_nlen] = '\0';
   2170      0     stevel 	i = ioctl(s, SIOCGLIFZONE, lifr);
   2171      0     stevel 	(void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name));
   2172      0     stevel 	if (i < 0) {
   2173      0     stevel 		zerror(zlogp, B_TRUE,
   2174   3448   dh155122 		    "%s: could not determine the zone network interface "
   2175   3448   dh155122 		    "belongs to", lifr->lifr_name);
   2176      0     stevel 		return (NULL);
   2177      0     stevel 	}
   2178      0     stevel 	if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0)
   2179      0     stevel 		(void) snprintf(answer, sizeof (answer), "%d",
   2180      0     stevel 		    lifr->lifr_zoneid);
   2181      0     stevel 
   2182      0     stevel 	if (strlen(answer) > 0)
   2183      0     stevel 		return (answer);
   2184      0     stevel 	return (NULL);
   2185      0     stevel }
   2186      0     stevel 
   2187      0     stevel /*
   2188      0     stevel  * Configures a single interface: a new virtual interface is added, based on
   2189      0     stevel  * the physical interface nwiftabptr->zone_nwif_physical, with the address
   2190      0     stevel  * specified in nwiftabptr->zone_nwif_address, for zone zone_id.  Note that
   2191      0     stevel  * the "address" can be an IPv6 address (with a /prefixlength required), an
   2192      0     stevel  * IPv4 address (with a /prefixlength optional), or a name; for the latter,
   2193      0     stevel  * an IPv4 name-to-address resolution will be attempted.
   2194      0     stevel  *
   2195      0     stevel  * If anything goes wrong, we log an detailed error message, attempt to tear
   2196      0     stevel  * down whatever we set up and return an error.
   2197      0     stevel  */
   2198      0     stevel static int
   2199      0     stevel configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
   2200   8058     Jordan     struct zone_nwiftab *nwiftabptr)
   2201      0     stevel {
   2202      0     stevel 	struct lifreq lifr;
   2203      0     stevel 	struct sockaddr_in netmask4;
   2204      0     stevel 	struct sockaddr_in6 netmask6;
   2205  10067      Vamsi 	struct sockaddr_storage laddr;
   2206      0     stevel 	struct in_addr in4;
   2207      0     stevel 	sa_family_t af;
   2208      0     stevel 	char *slashp = strchr(nwiftabptr->zone_nwif_address, '/');
   2209      0     stevel 	int s;
   2210      0     stevel 	boolean_t got_netmask = B_FALSE;
   2211   9720    Saurabh 	boolean_t is_loopback = B_FALSE;
   2212      0     stevel 	char addrstr4[INET_ADDRSTRLEN];
   2213      0     stevel 	int res;
   2214      0     stevel 
   2215      0     stevel 	res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr);
   2216      0     stevel 	if (res != Z_OK) {
   2217      0     stevel 		zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res),
   2218      0     stevel 		    nwiftabptr->zone_nwif_address);
   2219      0     stevel 		return (-1);
   2220      0     stevel 	}
   2221      0     stevel 	af = lifr.lifr_addr.ss_family;
   2222      0     stevel 	if (af == AF_INET)
   2223      0     stevel 		in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr;
   2224      0     stevel 	if ((s = socket(af, SOCK_DGRAM, 0)) < 0) {
   2225      0     stevel 		zerror(zlogp, B_TRUE, "could not get socket");
   2226      0     stevel 		return (-1);
   2227      0     stevel 	}
   2228      0     stevel 
   2229  10067      Vamsi 	/*
   2230  10067      Vamsi 	 * This is a similar kind of "hack" like in addif() to get around
   2231  10067      Vamsi 	 * the problem of SIOCLIFADDIF.  The problem is that this ioctl
   2232  10067      Vamsi 	 * does not include the netmask when adding a logical interface.
   2233  10067      Vamsi 	 * To get around this problem, we first add the logical interface
   2234  10067      Vamsi 	 * with a 0 address.  After that, we set the netmask if provided.
   2235  10067      Vamsi 	 * Finally we set the interface address.
   2236  10067      Vamsi 	 */
   2237  10067      Vamsi 	laddr = lifr.lifr_addr;
   2238      0     stevel 	(void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
   2239      0     stevel 	    sizeof (lifr.lifr_name));
   2240  10067      Vamsi 	(void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr));
   2241  10067      Vamsi 
   2242      0     stevel 	if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
   2243   2611   vp157776 		/*
   2244   2611   vp157776 		 * Here, we know that the interface can't be brought up.
   2245   2611   vp157776 		 * A similar warning message was already printed out to
   2246   2611   vp157776 		 * the console by zoneadm(1M) so instead we log the
   2247   2611   vp157776 		 * message to syslog and continue.
   2248   2611   vp157776 		 */
   2249   3448   dh155122 		zerror(&logsys, B_TRUE, "WARNING: skipping network interface "
   2250   2611   vp157776 		    "'%s' which may not be present/plumbed in the "
   2251   2611   vp157776 		    "global zone.", lifr.lifr_name);
   2252      0     stevel 		(void) close(s);
   2253   2611   vp157776 		return (Z_OK);
   2254      0     stevel 	}
   2255      0     stevel 
   2256      0     stevel 	/* Preserve literal IPv4 address for later potential printing. */
   2257      0     stevel 	if (af == AF_INET)
   2258      0     stevel 		(void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
   2259      0     stevel 
   2260      0     stevel 	lifr.lifr_zoneid = zone_id;
   2261      0     stevel 	if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
   2262   3448   dh155122 		zerror(zlogp, B_TRUE, "%s: could not place network interface "
   2263   3448   dh155122 		    "into zone", lifr.lifr_name);
   2264      0     stevel 		goto bad;
   2265      0     stevel 	}
   2266      0     stevel 
   2267   9720    Saurabh 	/*
   2268   9720    Saurabh 	 * Loopback interface will use the default netmask assigned, if no
   2269   9720    Saurabh 	 * netmask is found.
   2270   9720    Saurabh 	 */
   2271      0     stevel 	if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {
   2272   9720    Saurabh 		is_loopback = B_TRUE;
   2273   9720    Saurabh 	}
   2274   9720    Saurabh 	if (af == AF_INET) {
   2275   9720    Saurabh 		/*
   2276   9720    Saurabh 		 * The IPv4 netmask can be determined either
   2277   9720    Saurabh 		 * directly if a prefix length was supplied with
   2278   9720    Saurabh 		 * the address or via the netmasks database.  Not
   2279   9720    Saurabh 		 * being able to determine it is a common failure,
   2280   9720    Saurabh 		 * but it often is not fatal to operation of the
   2281   9720    Saurabh 		 * interface.  In that case, a warning will be
   2282   9720    Saurabh 		 * printed after the rest of the interface's
   2283   9720    Saurabh 		 * parameters have been configured.
   2284   9720    Saurabh 		 */
   2285   9720    Saurabh 		(void) memset(&netmask4, 0, sizeof (netmask4));
   2286   9720    Saurabh 		if (slashp != NULL) {
   2287   9720    Saurabh 			if (addr2netmask(slashp + 1, V4_ADDR_LEN,
   2288   9720    Saurabh 			    (uchar_t *)&netmask4.sin_addr) != 0) {
   2289      0     stevel 				*slashp = '/';
   2290      0     stevel 				zerror(zlogp, B_FALSE,
   2291      0     stevel 				    "%s: invalid prefix length in %s",
   2292      0     stevel 				    lifr.lifr_name,
   2293      0     stevel 				    nwiftabptr->zone_nwif_address);
   2294      0     stevel 				goto bad;
   2295      0     stevel 			}
   2296      0     stevel 			got_netmask = B_TRUE;
   2297   9720    Saurabh 		} else if (getnetmaskbyaddr(in4,
   2298   9720    Saurabh 		    &netmask4.sin_addr) == 0) {
   2299   9720    Saurabh 			got_netmask = B_TRUE;
   2300   9720    Saurabh 		}
   2301   9720    Saurabh 		if (got_netmask) {
   2302   9720    Saurabh 			netmask4.sin_family = af;
   2303   9720    Saurabh 			(void) memcpy(&lifr.lifr_addr, &netmask4,
   2304   9720    Saurabh 			    sizeof (netmask4));
   2305   9720    Saurabh 		}
   2306   9720    Saurabh 	} else {
   2307   9720    Saurabh 		(void) memset(&netmask6, 0, sizeof (netmask6));
   2308   9720    Saurabh 		if (addr2netmask(slashp + 1, V6_ADDR_LEN,
   2309   9720    Saurabh 		    (uchar_t *)&netmask6.sin6_addr) != 0) {
   2310   9720    Saurabh 			*slashp = '/';
   2311   9720    Saurabh 			zerror(zlogp, B_FALSE,
   2312   9720    Saurabh 			    "%s: invalid prefix length in %s",
   2313   9720    Saurabh 			    lifr.lifr_name,
   2314   9720    Saurabh 			    nwiftabptr->zone_nwif_address);
   2315      0     stevel 			goto bad;
   2316      0     stevel 		}
   2317   9720    Saurabh 		got_netmask = B_TRUE;
   2318   9720    Saurabh 		netmask6.sin6_family = af;
   2319   9720    Saurabh 		(void) memcpy(&lifr.lifr_addr, &netmask6,
   2320   9720    Saurabh 		    sizeof (netmask6));
   2321   9720    Saurabh 	}
   2322   9720    Saurabh 	if (got_netmask &&
   2323   9720    Saurabh 	    ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) {
   2324   9720    Saurabh 		zerror(zlogp, B_TRUE, "%s: could not set netmask",
   2325   9720    Saurabh 		    lifr.lifr_name);
   2326   9720    Saurabh 		goto bad;
   2327   9720    Saurabh 	}
   2328   9720    Saurabh 
   2329  10067      Vamsi 	/* Set the interface address */
   2330  10067      Vamsi 	lifr.lifr_addr = laddr;
   2331   9720    Saurabh 	if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
   2332   9720    Saurabh 		zerror(zlogp, B_TRUE,
   2333  10067      Vamsi 		    "%s: could not set IP address to %s",
   2334  10067      Vamsi 		    lifr.lifr_name, nwiftabptr->zone_nwif_address);
   2335   9720    Saurabh 		goto bad;
   2336      0     stevel 	}
   2337      0     stevel 
   2338      0     stevel 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
   2339      0     stevel 		zerror(zlogp, B_TRUE, "%s: could not get flags",
   2340      0     stevel 		    lifr.lifr_name);
   2341      0     stevel 		goto bad;
   2342      0     stevel 	}
   2343      0     stevel 	lifr.lifr_flags |= IFF_UP;
   2344      0     stevel 	if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
   2345      0     stevel 		int save_errno = errno;
   2346      0     stevel 		char *zone_using;
   2347      0     stevel 
   2348      0     stevel 		/*
   2349      0     stevel 		 * If we failed with something other than EADDRNOTAVAIL,
   2350      0     stevel 		 * then skip to the end.  Otherwise, look up our address,
   2351      0     stevel 		 * then call a function to determine which zone is already
   2352      0     stevel 		 * using that address.
   2353      0     stevel 		 */
   2354      0     stevel 		if (errno != EADDRNOTAVAIL) {
   2355      0     stevel 			zerror(zlogp, B_TRUE,
   2356   3448   dh155122 			    "%s: could not bring network interface up",
   2357   3448   dh155122 			    lifr.lifr_name);
   2358      0     stevel 			goto bad;
   2359      0     stevel 		}
   2360      0     stevel 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
   2361      0     stevel 			zerror(zlogp, B_TRUE, "%s: could not get address",
   2362      0     stevel 			    lifr.lifr_name);
   2363      0     stevel 			goto bad;
   2364      0     stevel 		}
   2365      0     stevel 		zone_using = who_is_using(zlogp, &lifr);
   2366      0     stevel 		errno = save_errno;
   2367      0     stevel 		if (zone_using == NULL)
   2368      0     stevel 			zerror(zlogp, B_TRUE,
   2369   3448   dh155122 			    "%s: could not bring network interface up",
   2370   3448   dh155122 			    lifr.lifr_name);
   2371   3448   dh155122 		else
   2372   3448   dh155122 			zerror(zlogp, B_TRUE, "%s: could not bring network "
   2373   3448   dh155122 			    "interface up: address in use by zone '%s'",
   2374   3448   dh155122 			    lifr.lifr_name, zone_using);
   2375      0     stevel 		goto bad;
   2376      0     stevel 	}
   2377      0     stevel 
   2378   9720    Saurabh 	if (!got_netmask && !is_loopback) {
   2379      0     stevel 		/*
   2380      0     stevel 		 * A common, but often non-fatal problem, is that the system
   2381      0     stevel 		 * cannot find the netmask for an interface address. This is
   2382      0     stevel 		 * often caused by it being only in /etc/inet/netmasks, but
   2383      0     stevel 		 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
   2384      0     stevel 		 * in that. This doesn't show up at boot because the netmask
   2385      0     stevel 		 * is obtained from /etc/inet/netmasks when no network
   2386      0     stevel 		 * interfaces are up, but isn't consulted when NIS/NIS+ is
   2387      0     stevel 		 * available. We warn the user here that something like this
   2388      0     stevel 		 * has happened and we're just running with a default and
   2389      0     stevel 		 * possible incorrect netmask.
   2390      0     stevel 		 */
   2391      0     stevel 		char buffer[INET6_ADDRSTRLEN];
   2392      0     stevel 		void  *addr;
   2393   8485      Peter 		const char *nomatch = "no matching subnet found in netmasks(4)";
   2394      0     stevel 
   2395      0     stevel 		if (af == AF_INET)
   2396      0     stevel 			addr = &((struct sockaddr_in *)
   2397      0     stevel 			    (&lifr.lifr_addr))->sin_addr;
   2398      0     stevel 		else
   2399      0     stevel 			addr = &((struct sockaddr_in6 *)
   2400      0     stevel 			    (&lifr.lifr_addr))->sin6_addr;
   2401      0     stevel 
   2402   8485      Peter 		/*
   2403   8485      Peter 		 * Find out what netmask the interface is going to be using.
   2404   8485      Peter 		 * If we just brought up an IPMP data address on an underlying
   2405   8485      Peter 		 * interface above, the address will have already migrated, so
   2406   8485      Peter 		 * the SIOCGLIFNETMASK won't be able to find it (but we need
   2407   8485      Peter 		 * to bring the address up to get the actual netmask).  Just
   2408   8485      Peter 		 * omit printing the actual netmask in this corner-case.
   2409   8485      Peter 		 */
   2410      0     stevel 		if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
   2411   8485      Peter 		    inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) {
   2412   8485      Peter 			zerror(zlogp, B_FALSE, "WARNING: %s; using default.",
   2413   8485      Peter 			    nomatch);
   2414   8485      Peter 		} else {
   2415   8485      Peter 			zerror(zlogp, B_FALSE,
   2416   8485      Peter 			    "WARNING: %s: %s: %s; using default of %s.",
   2417   8485      Peter 			    lifr.lifr_name, nomatch, addrstr4, buffer);
   2418   8485      Peter 		}
   2419      0     stevel 	}
   2420      0     stevel 
   2421   6076     gfaden 	/*
   2422   6076     gfaden 	 * If a default router was specified for this interface
   2423   6076     gfaden 	 * set the route now. Ignore if already set.
   2424   6076     gfaden 	 */
   2425   6076     gfaden 	if (strlen(nwiftabptr->zone_nwif_defrouter) > 0) {
   2426   6076     gfaden 		int status;
   2427   6076     gfaden 		char *argv[7];
   2428   6076     gfaden 
   2429   6076     gfaden 		argv[0] = "route";
   2430   6076     gfaden 		argv[1] = "add";
   2431   6076     gfaden 		argv[2] = "-ifp";
   2432   6076     gfaden 		argv[3] = nwiftabptr->zone_nwif_physical;
   2433   6076     gfaden 		argv[4] = "default";
   2434   6076     gfaden 		argv[5] = nwiftabptr->zone_nwif_defrouter;
   2435   6076     gfaden 		argv[6] = NULL;
   2436   6076     gfaden 
   2437   6076     gfaden 		status = forkexec(zlogp, "/usr/sbin/route", argv);
   2438   6076     gfaden 		if (status != 0 && status != EEXIST)
   2439   6076     gfaden 			zerror(zlogp, B_FALSE, "Unable to set route for "
   2440   6076     gfaden 			    "interface %s to %s\n",
   2441   6076     gfaden 			    nwiftabptr->zone_nwif_physical,
   2442   6076     gfaden 			    nwiftabptr->zone_nwif_defrouter);
   2443   6076     gfaden 	}
   2444   6076     gfaden 
   2445      0     stevel 	(void) close(s);
   2446      0     stevel 	return (Z_OK);
   2447      0     stevel bad:
   2448      0     stevel 	(void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
   2449      0     stevel 	(void) close(s);
   2450      0     stevel 	return (-1);
   2451      0     stevel }
   2452      0     stevel 
   2453      0     stevel /*
   2454      0     stevel  * Sets up network interfaces based on information from the zone configuration.
   2455   8058     Jordan  * IPv4 and IPv6 loopback interfaces are set up "for free", modeling the global
   2456   8058     Jordan  * system.
   2457      0     stevel  *
   2458      0     stevel  * If anything goes wrong, we log a general error message, attempt to tear down
   2459      0     stevel  * whatever we set up, and return an error.
   2460      0     stevel  */
   2461      0     stevel static int
   2462   3448   dh155122 configure_shared_network_interfaces(zlog_t *zlogp)
   2463      0     stevel {
   2464      0     stevel 	zone_dochandle_t handle;
   2465      0     stevel 	struct zone_nwiftab nwiftab, loopback_iftab;
   2466      0     stevel 	zoneid_t zoneid;
   2467      0     stevel 
   2468      0     stevel 	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
   2469      0     stevel 		zerror(zlogp, B_TRUE, "unable to get zoneid");
   2470      0     stevel 		return (-1);
   2471      0     stevel 	}
   2472      0     stevel 
   2473      0     stevel 	if ((handle = zonecfg_init_handle()) == NULL) {
   2474      0     stevel 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   2475      0     stevel 		return (-1);
   2476      0     stevel 	}
   2477      0     stevel 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
   2478      0     stevel 		zerror(zlogp, B_FALSE, "invalid configuration");
   2479      0     stevel 		zonecfg_fini_handle(handle);
   2480      0     stevel 		return (-1);
   2481      0     stevel 	}
   2482      0     stevel 	if (zonecfg_setnwifent(handle) == Z_OK) {
   2483      0     stevel 		for (;;) {
   2484      0     stevel 			if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
   2485      0     stevel 				break;
   2486   8058     Jordan 			if (configure_one_interface(zlogp, zoneid, &nwiftab) !=
   2487      0     stevel 			    Z_OK) {
   2488      0     stevel 				(void) zonecfg_endnwifent(handle);
   2489      0     stevel 				zonecfg_fini_handle(handle);
   2490      0     stevel 				return (-1);
   2491      0     stevel 			}
   2492      0     stevel 		}
   2493      0     stevel 		(void) zonecfg_endnwifent(handle);
   2494      0     stevel 	}
   2495      0     stevel 	zonecfg_fini_handle(handle);
   2496   5863     gfaden 	if (is_system_labeled()) {
   2497   5863     gfaden 		/*
   2498   5863     gfaden 		 * Labeled zones share the loopback interface
   2499   5863     gfaden 		 * so it is not plumbed for shared stack instances.
   2500   5863     gfaden 		 */
   2501   5863     gfaden 		return (0);
   2502   5863     gfaden 	}
   2503      0     stevel 	(void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
   2504      0     stevel 	    sizeof (loopback_iftab.zone_nwif_physical));
   2505      0     stevel 	(void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
   2506      0     stevel 	    sizeof (loopback_iftab.zone_nwif_address));
   2507   6378     gfaden 	loopback_iftab.zone_nwif_defrouter[0] = '\0';
   2508   8058     Jordan 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK)
   2509   8058     Jordan 		return (-1);
   2510   8058     Jordan 
   2511   8058     Jordan 	/* Always plumb up the IPv6 loopback interface. */
   2512   8058     Jordan 	(void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
   2513   8058     Jordan 	    sizeof (loopback_iftab.zone_nwif_address));
   2514   8058     Jordan 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK)
   2515   8058     Jordan 		return (-1);
   2516   3448   dh155122 	return (0);
   2517   3448   dh155122 }
   2518   3448   dh155122 
   2519   8878      Peter static void
   2520   8878      Peter zdlerror(zlog_t *zlogp, dladm_status_t err, const char *dlname, const char *str)
   2521   8878      Peter {
   2522   8878      Peter 	char errmsg[DLADM_STRSIZE];
   2523   8878      Peter 
   2524   8878      Peter 	(void) dladm_status2str(err, errmsg);
   2525   8878      Peter 	zerror(zlogp, B_FALSE, "%s '%s': %s", str, dlname, errmsg);
   2526   8878      Peter }
   2527   8878      Peter 
   2528   7342      Aruna static int
   2529  10616  Sebastien add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname)
   2530   3448   dh155122 {
   2531   8878      Peter 	dladm_status_t err;
   2532   8878      Peter 
   2533   3448   dh155122 	/* First check if it's in use by global zone. */
   2534   3448   dh155122 	if (zonecfg_ifname_exists(AF_INET, dlname) ||
   2535   3448   dh155122 	    zonecfg_ifname_exists(AF_INET6, dlname)) {
   2536   8878      Peter 		zerror(zlogp, B_FALSE, "WARNING: skipping network interface "
   2537   8878      Peter 		    "'%s' which is used in the global zone", dlname);
   2538   3448   dh155122 		return (-1);
   2539   3448   dh155122 	}
   2540   3448   dh155122 
   2541   5895   yz147064 	/* Set zoneid of this link. */
   2542  10616  Sebastien 	err = dladm_set_linkprop(dld_handle, linkid, "zone", &zone_name, 1,
   2543  10616  Sebastien 	    DLADM_OPT_ACTIVE);
   2544   8878      Peter 	if (err != DLADM_STATUS_OK) {
   2545   8878      Peter 		zdlerror(zlogp, err, dlname,
   2546   8878      Peter 		    "WARNING: unable to add network interface");
   2547   7342      Aruna 		return (-1);
   2548   7342      Aruna 	}
   2549   3448   dh155122 	return (0);
   2550   3448   dh155122 }
   2551   3448   dh155122 
   2552   3448   dh155122 /*
   2553   3448   dh155122  * Add the kernel access control information for the interface names.
   2554   3448   dh155122  * If anything goes wrong, we log a general error message, attempt to tear down
   2555   3448   dh155122  * whatever we set up, and return an error.
   2556   3448   dh155122  */
   2557   3448   dh155122 static int
   2558   3448   dh155122 configure_exclusive_network_interfaces(zlog_t *zlogp)
   2559   3448   dh155122 {
   2560   3448   dh155122 	zone_dochandle_t handle;
   2561   3448   dh155122 	struct zone_nwiftab nwiftab;
   2562   3448   dh155122 	char rootpath[MAXPATHLEN];
   2563   3448   dh155122 	char path[MAXPATHLEN];
   2564  10616  Sebastien 	datalink_id_t linkid;
   2565   3448   dh155122 	di_prof_t prof = NULL;
   2566   3448   dh155122 	boolean_t added = B_FALSE;
   2567   3448   dh155122 
   2568   3448   dh155122 	if ((handle = zonecfg_init_handle()) == NULL) {
   2569   3448   dh155122 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   2570   3448   dh155122 		return (-1);
   2571   3448   dh155122 	}
   2572   3448   dh155122 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
   2573   3448   dh155122 		zerror(zlogp, B_FALSE, "invalid configuration");
   2574   3448   dh155122 		zonecfg_fini_handle(handle);
   2575   3448   dh155122 		return (-1);
   2576   3448   dh155122 	}
   2577   3448   dh155122 
   2578   3448   dh155122 	if (zonecfg_setnwifent(handle) != Z_OK) {
   2579   3448   dh155122 		zonecfg_fini_handle(handle);
   2580   3448   dh155122 		return (0);
   2581   3448   dh155122 	}
   2582   3448   dh155122 
   2583   3448   dh155122 	for (;;) {
   2584   3448   dh155122 		if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
   2585   3448   dh155122 			break;
   2586   3448   dh155122 
   2587   3448   dh155122 		if (prof == NULL) {
   2588   3448   dh155122 			if (zone_get_devroot(zone_name, rootpath,
   2589   3448   dh155122 			    sizeof (rootpath)) != Z_OK) {
   2590   3448   dh155122 				(void) zonecfg_endnwifent(handle);
   2591   3448   dh155122 				zonecfg_fini_handle(handle);
   2592   3448   dh155122 				zerror(zlogp, B_TRUE,
   2593   3448   dh155122 				    "unable to determine dev root");
   2594   3448   dh155122 				return (-1);
   2595   3448   dh155122 			}
   2596   3448   dh155122 			(void) snprintf(path, sizeof (path), "%s%s", rootpath,
   2597   3448   dh155122 			    "/dev");
   2598   3448   dh155122 			if (di_prof_init(path, &prof) != 0) {
   2599   3448   dh155122 				(void) zonecfg_endnwifent(handle);
   2600   3448   dh155122 				zonecfg_fini_handle(handle);
   2601   3448   dh155122 				zerror(zlogp, B_TRUE,
   2602   3448   dh155122 				    "failed to initialize profile");
   2603   3448   dh155122 				return (-1);
   2604   3448   dh155122 			}
   2605   3448   dh155122 		}
   2606   3448   dh155122 
   2607   3448   dh155122 		/*
   2608   5895   yz147064 		 * Create the /dev entry for backward compatibility.
   2609   3448   dh155122 		 * Only create the /dev entry if it's not in use.
   2610   5895   yz147064 		 * Note that the zone still boots when the assigned
   2611   5895   yz147064 		 * interface is inaccessible, used by others, etc.
   2612   5895   yz147064 		 * Also, when vanity naming is used, some interface do
   2613   5895   yz147064 		 * do not have corresponding /dev node names (for example,
   2614   5895   yz147064 		 * vanity named aggregations).  The /dev entry is not
   2615   5895   yz147064 		 * created in that case.  The /dev/net entry is always
   2616   5895   yz147064 		 * accessible.
   2617   3448   dh155122 		 */
   2618  10616  Sebastien 		if (dladm_name2info(dld_handle, nwiftab.zone_nwif_physical,
   2619  10616  Sebastien 		    &linkid, NULL, NULL, NULL) == DLADM_STATUS_OK &&
   2620  10616  Sebastien 		    add_datalink(zlogp, zone_name, linkid,
   2621  10616  Sebastien 		    nwiftab.zone_nwif_physical) == 0) {
   2622   7342      Aruna 			added = B_TRUE;
   2623   7342      Aruna 		} else {
   2624   7342      Aruna 			(void) zonecfg_endnwifent(handle);
   2625   7342      Aruna 			zonecfg_fini_handle(handle);
   2626   7342      Aruna 			zerror(zlogp, B_TRUE, "failed to add network device");
   2627   7342      Aruna 			return (-1);
   2628   3448   dh155122 		}
   2629   3448   dh155122 	}
   2630   3448   dh155122 	(void) zonecfg_endnwifent(handle);
   2631   3448   dh155122 	zonecfg_fini_handle(handle);
   2632   3448   dh155122 
   2633   3448   dh155122 	if (prof != NULL && added) {
   2634   3448   dh155122 		if (di_prof_commit(prof) != 0) {
   2635   3448   dh155122 			zerror(zlogp, B_TRUE, "failed to commit profile");
   2636   3448   dh155122 			return (-1);
   2637   3448   dh155122 		}
   2638   3448   dh155122 	}
   2639   3448   dh155122 	if (prof != NULL)
   2640   3448   dh155122 		di_prof_fini(prof);
   2641   3448   dh155122 
   2642   3448   dh155122 	return (0);
   2643   3448   dh155122 }
   2644   3448   dh155122 
   2645  10616  Sebastien static int
   2646  10616  Sebastien unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
   2647  10616  Sebastien {
   2648  10616  Sebastien 	int dlnum = 0;
   2649  10616  Sebastien 
   2650  10616  Sebastien 	/*
   2651  10616  Sebastien 	 * The kernel shutdown callback for the dls module should have removed
   2652  10616  Sebastien 	 * all datalinks from this zone.  If any remain, then there's a
   2653  10616  Sebastien 	 * problem.
   2654  10616  Sebastien 	 */
   2655   3448   dh155122 	if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) {
   2656   3448   dh155122 		zerror(zlogp, B_TRUE, "unable to list network interfaces");
   2657   3448   dh155122 		return (-1);
   2658   3448   dh155122 	}
   2659  10616  Sebastien 	if (dlnum != 0) {
   2660  10616  Sebastien 		zerror(zlogp, B_FALSE,
   2661  10616  Sebastien 		    "datalinks remain in zone after shutdown");
   2662  10616  Sebastien 		return (-1);
   2663  10616  Sebastien 	}
   2664      0     stevel 	return (0);
   2665      0     stevel }
   2666      0     stevel 
   2667      0     stevel static int
   2668      0     stevel tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
   2669      0     stevel     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
   2670      0     stevel {
   2671      0     stevel 	int fd;
   2672      0     stevel 	struct strioctl ioc;
   2673      0     stevel 	tcp_ioc_abort_conn_t conn;
   2674      0     stevel 	int error;
   2675      0     stevel 
   2676      0     stevel 	conn.ac_local = *local;
   2677      0     stevel 	conn.ac_remote = *remote;
   2678      0     stevel 	conn.ac_start = TCPS_SYN_SENT;
   2679      0     stevel 	conn.ac_end = TCPS_TIME_WAIT;
   2680      0     stevel 	conn.ac_zoneid = zoneid;
   2681      0     stevel 
   2682      0     stevel 	ioc.ic_cmd = TCP_IOC_ABORT_CONN;
   2683      0     stevel 	ioc.ic_timout = -1; /* infinite timeout */
   2684      0     stevel 	ioc.ic_len = sizeof (conn);
   2685      0     stevel 	ioc.ic_dp = (char *)&conn;
   2686      0     stevel 
   2687      0     stevel 	if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {
   2688      0     stevel 		zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp");
   2689      0     stevel 		return (-1);
   2690      0     stevel 	}
   2691      0     stevel 
   2692      0     stevel 	error = ioctl(fd, I_STR, &ioc);
   2693      0     stevel 	(void) close(fd);
   2694      0     stevel 	if (error == 0 || errno == ENOENT)	/* ENOENT is not an error */
   2695      0     stevel 		return (0);
   2696      0     stevel 	return (-1);
   2697      0     stevel }
   2698      0     stevel 
   2699      0     stevel static int
   2700      0     stevel tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid)
   2701      0     stevel {
   2702      0     stevel 	struct sockaddr_storage l, r;
   2703      0     stevel 	struct sockaddr_in *local, *remote;
   2704      0     stevel 	struct sockaddr_in6 *local6, *remote6;
   2705      0     stevel 	int error;
   2706      0     stevel 
   2707      0     stevel 	/*
   2708      0     stevel 	 * Abort IPv4 connections.
   2709      0     stevel 	 */
   2710      0     stevel 	bzero(&l, sizeof (*local));
   2711      0     stevel 	local = (struct sockaddr_in *)&l;
   2712      0     stevel 	local->sin_family = AF_INET;
   2713      0     stevel 	local->sin_addr.s_addr = INADDR_ANY;
   2714      0     stevel 	local->sin_port = 0;
   2715      0     stevel 
   2716      0     stevel 	bzero(&r, sizeof (*remote));
   2717      0     stevel 	remote = (struct sockaddr_in *)&r;
   2718      0     stevel 	remote->sin_family = AF_INET;
   2719      0     stevel 	remote->sin_addr.s_addr = INADDR_ANY;
   2720      0     stevel 	remote->sin_port = 0;
   2721      0     stevel 
   2722      0     stevel 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
   2723      0     stevel 		return (error);
   2724      0     stevel 
   2725      0     stevel 	/*
   2726      0     stevel 	 * Abort IPv6 connections.
   2727      0     stevel 	 */
   2728      0     stevel 	bzero(&l, sizeof (*local6));
   2729      0     stevel 	local6 = (struct sockaddr_in6 *)&l;
   2730      0     stevel 	local6->sin6_family = AF_INET6;
   2731      0     stevel 	local6->sin6_port = 0;
   2732      0     stevel 	local6->sin6_addr = in6addr_any;
   2733      0     stevel 
   2734      0     stevel 	bzero(&r, sizeof (*remote6));
   2735      0     stevel 	remote6 = (struct sockaddr_in6 *)&r;
   2736      0     stevel 	remote6->sin6_family = AF_INET6;
   2737      0     stevel 	remote6->sin6_port = 0;
   2738      0     stevel 	remote6->sin6_addr = in6addr_any;
   2739      0     stevel 
   2740      0     stevel 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
   2741      0     stevel 		return (error);
   2742      0     stevel 	return (0);
   2743      0     stevel }
   2744      0     stevel 
   2745      0     stevel static int
   2746   5829   gjelinek get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
   2747   1645      comay {
   2748   1645      comay 	int error = -1;
   2749   1645      comay 	zone_dochandle_t handle;
   2750   1645      comay 	char *privname = NULL;
   2751   1645      comay 
   2752   1645      comay 	if ((handle = zonecfg_init_handle()) == NULL) {
   2753   1645      comay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   2754   1645      comay 		return (-1);
   2755   1645      comay 	}
   2756   1645      comay 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
   2757   1645      comay 		zerror(zlogp, B_FALSE, "invalid configuration");
   2758   2712    nn35248 		zonecfg_fini_handle(handle);
   2759   2712    nn35248 		return (-1);
   2760   2712    nn35248 	}
   2761   2712    nn35248 
   2762   5829   gjelinek 	if (ALT_MOUNT(mount_cmd)) {
   2763   3673   dh155122 		zone_iptype_t	iptype;
   2764   3673   dh155122 		const char	*curr_iptype;
   2765   3673   dh155122 
   2766   3673   dh155122 		if (zonecfg_get_iptype(handle, &iptype) != Z_OK) {
   2767   3673   dh155122 			zerror(zlogp, B_TRUE, "unable to determine ip-type");
   2768   3673   dh155122 			zonecfg_fini_handle(handle);
   2769   3673   dh155122 			return (-1);
   2770   3673   dh155122 		}
   2771   3673   dh155122 
   2772   3673   dh155122 		switch (iptype) {
   2773   3673   dh155122 		case ZS_SHARED:
   2774   3673   dh155122 			curr_iptype = "shared";
   2775   3673   dh155122 			break;
   2776   3673   dh155122 		case ZS_EXCLUSIVE:
   2777   3673   dh155122 			curr_iptype = "exclusive";
   2778   3673   dh155122 			break;
   2779   3673   dh155122 		}
   2780   3673   dh155122 
   2781   3716   gjelinek 		if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) {
   2782   3716   gjelinek 			zonecfg_fini_handle(handle);
   2783   2712    nn35248 			return (0);
   2784   3716   gjelinek 		}
   2785   2712    nn35248 		zerror(zlogp, B_FALSE,
   2786   2712    nn35248 		    "failed to determine the zone's default privilege set");
   2787   1645      comay 		zonecfg_fini_handle(handle);
   2788   1645      comay 		return (-1);
   2789   1645      comay 	}
   2790   1645      comay 
   2791   1645      comay 	switch (zonecfg_get_privset(handle, privs, &privname)) {
   2792   1645      comay 	case Z_OK:
   2793   1645      comay 		error = 0;
   2794   1645      comay 		break;
   2795   1645      comay 	case Z_PRIV_PROHIBITED:
   2796   1645      comay 		zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
   2797   1645      comay 		    "within the zone's privilege set", privname);
   2798   1645      comay 		break;
   2799   1645      comay 	case Z_PRIV_REQUIRED:
   2800   1645      comay 		zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
   2801   1645      comay 		    "from the zone's privilege set", privname);
   2802   1645      comay 		break;
   2803   1645      comay 	case Z_PRIV_UNKNOWN:
   2804   1645      comay 		zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
   2805   1645      comay 		    "in the zone's privilege set", privname);
   2806   1645      comay 		break;
   2807   1645      comay 	default:
   2808   1645      comay 		zerror(zlogp, B_FALSE, "failed to determine the zone's "
   2809   1645      comay 		    "privilege set");
   2810   1645      comay 		break;
   2811   1645      comay 	}
   2812   1645      comay 
   2813   1645      comay 	free(privname);
   2814   1645      comay 	zonecfg_fini_handle(handle);
   2815   1645      comay 	return (error);
   2816      0     stevel }
   2817      0     stevel 
   2818      0     stevel static int
   2819      0     stevel get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
   2820      0     stevel {
   2821      0     stevel 	nvlist_t *nvl = NULL;
   2822      0     stevel 	char *nvl_packed = NULL;
   2823      0     stevel 	size_t nvl_size = 0;
   2824      0     stevel 	nvlist_t **nvlv = NULL;
   2825      0     stevel 	int rctlcount = 0;
   2826      0     stevel 	int error = -1;
   2827      0     stevel 	zone_dochandle_t handle;
   2828      0     stevel 	struct zone_rctltab rctltab;
   2829      0     stevel 	rctlblk_t *rctlblk = NULL;
   2830      0     stevel 
   2831      0     stevel 	*bufp = NULL;
   2832      0     stevel 	*bufsizep = 0;
   2833      0     stevel 
   2834      0     stevel 	if ((handle = zonecfg_init_handle()) == NULL) {
   2835      0     stevel 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   2836      0     stevel 		return (-1);
   2837      0     stevel 	}
   2838      0     stevel 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
   2839      0     stevel 		zerror(zlogp, B_FALSE, "invalid configuration");
   2840      0     stevel 		zonecfg_fini_handle(handle);
   2841      0     stevel 		return (-1);
   2842      0     stevel 	}
   2843      0     stevel 
   2844      0     stevel 	rctltab.zone_rctl_valptr = NULL;
   2845      0     stevel 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
   2846      0     stevel 		zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
   2847      0     stevel 		goto out;
   2848      0     stevel 	}
   2849      0     stevel 
   2850      0     stevel 	if (zonecfg_setrctlent(handle) != Z_OK) {
   2851      0     stevel 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
   2852      0     stevel 		goto out;
   2853      0     stevel 	}
   2854      0     stevel 
   2855      0     stevel 	if ((rctlblk = malloc(rctlblk_size())) == NULL) {
   2856      0     stevel 		zerror(zlogp, B_TRUE, "memory allocation failed");
   2857      0     stevel 		goto out;
   2858      0     stevel 	}
   2859      0     stevel 	while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
   2860      0     stevel 		struct zone_rctlvaltab *rctlval;
   2861      0     stevel 		uint_t i, count;
   2862      0     stevel 		const char *name = rctltab.zone_rctl_name;
   2863      0     stevel 
   2864      0     stevel 		/* zoneadm should have already warned about unknown rctls. */
   2865      0     stevel 		if (!zonecfg_is_rctl(name)) {
   2866      0     stevel 			zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
   2867      0     stevel 			rctltab.zone_rctl_valptr = NULL;
   2868      0     stevel 			continue;
   2869      0     stevel 		}
   2870      0     stevel 		count = 0;
   2871      0     stevel 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
   2872      0     stevel 		    rctlval = rctlval->zone_rctlval_next) {
   2873      0     stevel 			count++;
   2874      0     stevel 		}
   2875      0     stevel 		if (count == 0) {	/* ignore */
   2876      0     stevel 			continue;	/* Nothing to free */
   2877      0     stevel 		}
   2878      0     stevel 		if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
   2879      0     stevel 			goto out;
   2880      0     stevel 		i = 0;
   2881      0     stevel 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
   2882      0     stevel 		    rctlval = rctlval->zone_rctlval_next, i++) {
   2883      0     stevel 			if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) {
   2884      0     stevel 				zerror(zlogp, B_TRUE, "%s failed",
   2885      0     stevel 				    "nvlist_alloc");
   2886      0     stevel 				goto out;
   2887      0     stevel 			}
   2888      0     stevel 			if (zonecfg_construct_rctlblk(rctlval, rctlblk)
   2889      0     stevel 			    != Z_OK) {
   2890      0     stevel 				zerror(zlogp, B_FALSE, "invalid rctl value: "
   2891      0     stevel 				    "(priv=%s,limit=%s,action=%s)",
   2892      0     stevel 				    rctlval->zone_rctlval_priv,
   2893      0     stevel 				    rctlval->zone_rctlval_limit,
   2894      0     stevel 				    rctlval->zone_rctlval_action);
   2895      0     stevel 				goto out;
   2896      0     stevel 			}
   2897      0     stevel 			if (!zonecfg_valid_rctl(name, rctlblk)) {
   2898      0     stevel 				zerror(zlogp, B_FALSE,
   2899      0     stevel 				    "(priv=%s,limit=%s,action=%s) is not a "
   2900      0     stevel 				    "valid value for rctl '%s'",
   2901      0     stevel 				    rctlval->zone_rctlval_priv,
   2902      0     stevel 				    rctlval->zone_rctlval_limit,
   2903      0     stevel 				    rctlval->zone_rctlval_action,
   2904      0     stevel 				    name);
   2905      0     stevel 				goto out;
   2906      0     stevel 			}
   2907      0     stevel 			if (nvlist_add_uint64(nvlv[i], "privilege",
   2908   1645      comay 			    rctlblk_get_privilege(rctlblk)) != 0) {
   2909      0     stevel 				zerror(zlogp, B_FALSE, "%s failed",
   2910      0     stevel 				    "nvlist_add_uint64");
   2911      0     stevel 				goto out;
   2912      0     stevel 			}
   2913      0     stevel 			if (nvlist_add_uint64(nvlv[i], "limit",
   2914   1645      comay 			    rctlblk_get_value(rctlblk)) != 0) {
   2915      0     stevel 				zerror(zlogp, B_FALSE, "%s failed",
   2916      0     stevel 				    "nvlist_add_uint64");
   2917      0     stevel 				goto out;
   2918      0     stevel 			}
   2919      0     stevel 			if (nvlist_add_uint64(nvlv[i], "action",
   2920      0     stevel 			    (uint_t)rctlblk_get_local_action(rctlblk, NULL))
   2921      0     stevel 			    != 0) {
   2922      0     stevel 				zerror(zlogp, B_FALSE, "%s failed",
   2923      0     stevel 				    "nvlist_add_uint64");
   2924      0     stevel 				goto out;
   2925      0     stevel 			}
   2926      0     stevel 		}
   2927      0     stevel 		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
   2928      0     stevel 		rctltab.zone_rctl_valptr = NULL;
   2929      0     stevel 		if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
   2930      0     stevel 		    != 0) {
   2931      0     stevel 			zerror(zlogp, B_FALSE, "%s failed",
   2932      0     stevel 			    "nvlist_add_nvlist_array");
   2933      0     stevel 			goto out;
   2934      0     stevel 		}
   2935      0     stevel 		for (i = 0; i < count; i++)
   2936      0     stevel 			nvlist_free(nvlv[i]);
   2937      0     stevel 		free(nvlv);
   2938      0     stevel 		nvlv = NULL;
   2939      0     stevel 		rctlcount++;
   2940      0     stevel 	}
   2941      0     stevel 	(void) zonecfg_endrctlent(handle);
   2942      0     stevel 
   2943      0     stevel 	if (rctlcount == 0) {
   2944      0     stevel 		error = 0;
   2945      0     stevel 		goto out;
   2946      0     stevel 	}
   2947      0     stevel 	if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
   2948      0     stevel 	    != 0) {
   2949      0     stevel 		zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
   2950      0     stevel 		goto out;
   2951      0     stevel 	}
   2952      0     stevel 
   2953      0     stevel 	error = 0;
   2954      0     stevel 	*bufp = nvl_packed;
   2955      0     stevel 	*bufsizep = nvl_size;
   2956      0     stevel 
   2957      0     stevel out:
   2958      0     stevel 	free(rctlblk);
   2959      0     stevel 	zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
   2960      0     stevel 	if (error && nvl_packed != NULL)
   2961      0     stevel 		free(nvl_packed);
   2962      0     stevel 	if (nvl != NULL)
   2963      0     stevel 		nvlist_free(nvl);
   2964      0     stevel 	if (nvlv != NULL)
   2965      0     stevel 		free(nvlv);
   2966      0     stevel 	if (handle != NULL)
   2967      0     stevel 		zonecfg_fini_handle(handle);
   2968      0     stevel 	return (error);
   2969      0     stevel }
   2970      0     stevel 
   2971      0     stevel static int
   2972   7370     Gerald get_implicit_datasets(zlog_t *zlogp, char **retstr)
   2973   7370     Gerald {
   2974   7370     Gerald 	char cmdbuf[2 * MAXPATHLEN];
   2975   7370     Gerald 
   2976   7370     Gerald 	if (query_hook[0] == '\0')
   2977   7370     Gerald 		return (0);
   2978   7370     Gerald 
   2979   7370     Gerald 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s datasets", query_hook)
   2980   7370     Gerald 	    > sizeof (cmdbuf))
   2981   7370     Gerald 		return (-1);
   2982   7370     Gerald 
   2983   7370     Gerald 	if (do_subproc(zlogp, cmdbuf, retstr) != 0)
   2984   7370     Gerald 		return (-1);
   2985   7370     Gerald 
   2986   7370     Gerald 	return (0);
   2987   7370     Gerald }
   2988   7370     Gerald 
   2989   7370     Gerald static int
   2990    789     ahrens get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
   2991    789     ahrens {
   2992    789     ahrens 	zone_dochandle_t handle;
   2993    789     ahrens 	struct zone_dstab dstab;
   2994    789     ahrens 	size_t total, offset, len;
   2995    789     ahrens 	int error = -1;
   2996   5185   gjelinek 	char *str = NULL;
   2997   7370     Gerald 	char *implicit_datasets = NULL;
   2998   7370     Gerald 	int implicit_len = 0;
   2999    789     ahrens 
   3000    789     ahrens 	*bufp = NULL;
   3001    789     ahrens 	*bufsizep = 0;
   3002    789     ahrens 
   3003    789     ahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
   3004    789     ahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
   3005    789     ahrens 		return (-1);
   3006    789     ahrens 	}
   3007    789     ahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
   3008    789     ahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
   3009    789     ahrens 		zonecfg_fini_handle(handle);
   3010    789     ahrens 		return (-1);
   3011   7370     Gerald 	}
   3012   7370     Gerald 
   3013   7370     Gerald 	if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) {
   3014   7370     Gerald 		zerror(zlogp, B_FALSE, "getting implicit datasets failed");
   3015   7370     Gerald 		goto out;
   3016    789     ahrens 	}
   3017    789     ahrens 
   3018    789     ahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
   3019    789     ahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
   3020    789     ahrens 		goto out;
   3021    789     ahrens 	}
   3022    789     ahrens 
   3023    789     ahrens 	total = 0;
   3024    789     ahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK)
   3025    789     ahrens 		total += strlen(dstab.zone_dataset_name) + 1;
   3026    789     ahrens 	(void) zonecfg_enddsent(handle);
   3027   7370     Gerald 
   3028   7370     Gerald 	if (implicit_datasets != NULL)
   3029   7370     Gerald 		implicit_len = strlen(implicit_datasets);
   3030   7370     Gerald 	if (implicit_len > 0)
   3031   7370     Gerald 		total += implicit_len + 1;
   3032    789     ahrens 
   3033    789     ahrens 	if (total == 0) {
   3034    789     ahrens 		error = 0;
   3035    789     ahrens 		goto out;
   3036    789     ahrens 	}
   3037    789     ahrens 
   3038    789     ahrens 	if ((str = malloc(total)) == NULL) {
   3039    789     ahrens 		zerror(zlogp, B_TRUE, "memory allocation failed");
   3040    789     ahrens 		goto out;
   3041    789     ahrens 	}
   3042    789     ahrens 
   3043    789     ahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
   3044    789     ahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
   3045    789     ahrens 		goto out;
   3046    789     ahrens 	}
   3047    789     ahrens 	offset = 0;
   3048    789     ahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
   3049    789     ahrens 		len = strlen(dstab.zone_dataset_name);
   3050    789     ahrens 		(void) strlcpy(str + offset, dstab.zone_dataset_name,
   3051   5185   gjelinek 		    total - offset);
   3052    789     ahrens 		offset += len;
   3053   5185   gjelinek 		if (offset < total - 1)
   3054    789     ahrens 			str[offset++] = ',';
   3055    789     ahrens 	}
   3056    789     ahrens 	(void) zonecfg_enddsent(