Home | History | Annotate | Download | only in zlogin
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * zlogin provides three types of login which allow users in the global
     28  * zone to access non-global zones.
     29  *
     30  * - "interactive login" is similar to rlogin(1); for example, the user could
     31  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
     32  *   granted a new pty (which is then shoved into the zone), and an I/O
     33  *   loop between parent and child processes takes care of the interactive
     34  *   session.  In this mode, login(1) (and its -c option, which means
     35  *   "already authenticated") is employed to take care of the initialization
     36  *   of the user's session.
     37  *
     38  * - "non-interactive login" is similar to su(1M); the user could issue
     39  *   'zlogin my-zone ls -l' and the command would be run as specified.
     40  *   In this mode, zlogin sets up pipes as the communication channel, and
     41  *   'su' is used to do the login setup work.
     42  *
     43  * - "console login" is the equivalent to accessing the tip line for a
     44  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
     45  *   In this mode, zlogin contacts the zoneadmd process via unix domain
     46  *   socket.  If zoneadmd is not running, it starts it.  This allows the
     47  *   console to be available anytime the zone is installed, regardless of
     48  *   whether it is running.
     49  */
     50 
     51 #include <sys/socket.h>
     52 #include <sys/termios.h>
     53 #include <sys/utsname.h>
     54 #include <sys/stat.h>
     55 #include <sys/types.h>
     56 #include <sys/contract/process.h>
     57 #include <sys/ctfs.h>
     58 #include <sys/brand.h>
     59 #include <sys/wait.h>
     60 #include <alloca.h>
     61 #include <assert.h>
     62 #include <ctype.h>
     63 #include <door.h>
     64 #include <errno.h>
     65 #include <nss_dbdefs.h>
     66 #include <poll.h>
     67 #include <priv.h>
     68 #include <pwd.h>
     69 #include <unistd.h>
     70 #include <utmpx.h>
     71 #include <sac.h>
     72 #include <signal.h>
     73 #include <stdarg.h>
     74 #include <stdio.h>
     75 #include <stdlib.h>
     76 #include <string.h>
     77 #include <strings.h>
     78 #include <stropts.h>
     79 #include <wait.h>
     80 #include <zone.h>
     81 #include <fcntl.h>
     82 #include <libdevinfo.h>
     83 #include <libintl.h>
     84 #include <locale.h>
     85 #include <libzonecfg.h>
     86 #include <libcontract.h>
     87 #include <libbrand.h>
     88 
     89 static int masterfd;
     90 static struct termios save_termios;
     91 static struct termios effective_termios;
     92 static int save_fd;
     93 static struct winsize winsize;
     94 static volatile int dead;
     95 static volatile pid_t child_pid = -1;
     96 static int interactive = 0;
     97 static priv_set_t *dropprivs;
     98 
     99 static int nocmdchar = 0;
    100 static int failsafe = 0;
    101 static char cmdchar = '~';
    102 
    103 static int pollerr = 0;
    104 
    105 static const char *pname;
    106 
    107 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
    108 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
    109 #endif
    110 
    111 #define	SUPATH	"/usr/bin/su"
    112 #define	FAILSAFESHELL	"/sbin/sh"
    113 #define	DEFAULTSHELL	"/sbin/sh"
    114 #define	DEF_PATH	"/usr/sbin:/usr/bin"
    115 
    116 #define	CLUSTER_BRAND_NAME	"cluster"
    117 
    118 /*
    119  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
    120  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
    121  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
    122  * also chosen in conjunction with the HI_WATER setting to make sure we
    123  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
    124  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
    125  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
    126  * is less than HI_WATER data already in the pipe.
    127  */
    128 #define	ZLOGIN_BUFSIZ	8192
    129 #define	ZLOGIN_RDBUFSIZ	1024
    130 #define	HI_WATER	8192
    131 
    132 /*
    133  * See canonify() below.  CANONIFY_LEN is the maximum length that a
    134  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
    135  */
    136 #define	CANONIFY_LEN 5
    137 
    138 static void
    139 usage(void)
    140 {
    141 	(void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
    142 	    "[-l user] zonename [command [args ...] ]\n"), pname);
    143 	exit(2);
    144 }
    145 
    146 static const char *
    147 getpname(const char *arg0)
    148 {
    149 	const char *p = strrchr(arg0, '/');
    150 
    151 	if (p == NULL)
    152 		p = arg0;
    153 	else
    154 		p++;
    155 
    156 	pname = p;
    157 	return (p);
    158 }
    159 
    160 static void
    161 zerror(const char *fmt, ...)
    162 {
    163 	va_list alist;
    164 
    165 	(void) fprintf(stderr, "%s: ", pname);
    166 	va_start(alist, fmt);
    167 	(void) vfprintf(stderr, fmt, alist);
    168 	va_end(alist);
    169 	(void) fprintf(stderr, "\n");
    170 }
    171 
    172 static void
    173 zperror(const char *str)
    174 {
    175 	const char *estr;
    176 
    177 	if ((estr = strerror(errno)) != NULL)
    178 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
    179 	else
    180 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
    181 }
    182 
    183 /*
    184  * The first part of our privilege dropping scheme needs to be called before
    185  * fork(), since we must have it for security; we don't want to be surprised
    186  * later that we couldn't allocate the privset.
    187  */
    188 static int
    189 prefork_dropprivs()
    190 {
    191 	if ((dropprivs = priv_allocset()) == NULL)
    192 		return (1);
    193 
    194 	priv_basicset(dropprivs);
    195 	(void) priv_delset(dropprivs, PRIV_PROC_INFO);
    196 	(void) priv_delset(dropprivs, PRIV_PROC_FORK);
    197 	(void) priv_delset(dropprivs, PRIV_PROC_EXEC);
    198 	(void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
    199 
    200 	/*
    201 	 * We need to keep the basic privilege PROC_SESSION and all unknown
    202 	 * basic privileges as well as the privileges PROC_ZONE and
    203 	 * PROC_OWNER in order to query session information and
    204 	 * send signals.
    205 	 */
    206 	if (interactive == 0) {
    207 		(void) priv_addset(dropprivs, PRIV_PROC_ZONE);
    208 		(void) priv_addset(dropprivs, PRIV_PROC_OWNER);
    209 	} else {
    210 		(void) priv_delset(dropprivs, PRIV_PROC_SESSION);
    211 	}
    212 
    213 	return (0);
    214 }
    215 
    216 /*
    217  * The second part of the privilege drop.  We are paranoid about being attacked
    218  * by the zone, so we drop all privileges.  This should prevent a compromise
    219  * which gets us to fork(), exec(), symlink(), etc.
    220  */
    221 static void
    222 postfork_dropprivs()
    223 {
    224 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
    225 		zperror(gettext("Warning: could not set permitted privileges"));
    226 	}
    227 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
    228 		zperror(gettext("Warning: could not set limit privileges"));
    229 	}
    230 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
    231 		zperror(gettext("Warning: could not set inheritable "
    232 		    "privileges"));
    233 	}
    234 }
    235 
    236 /*
    237  * Create the unix domain socket and call the zoneadmd server; handshake
    238  * with it to determine whether it will allow us to connect.
    239  */
    240 static int
    241 get_console_master(const char *zname)
    242 {
    243 	int sockfd = -1;
    244 	struct sockaddr_un servaddr;
    245 	char clientid[MAXPATHLEN];
    246 	char handshake[MAXPATHLEN], c;
    247 	int msglen;
    248 	int i = 0, err = 0;
    249 
    250 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
    251 		zperror(gettext("could not create socket"));
    252 		return (-1);
    253 	}
    254 
    255 	bzero(&servaddr, sizeof (servaddr));
    256 	servaddr.sun_family = AF_UNIX;
    257 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
    258 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
    259 
    260 	if (connect(sockfd, (struct sockaddr *)&servaddr,
    261 	    sizeof (servaddr)) == -1) {
    262 		zperror(gettext("Could not connect to zone console"));
    263 		goto bad;
    264 	}
    265 	masterfd = sockfd;
    266 
    267 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
    268 	    getpid(), setlocale(LC_MESSAGES, NULL));
    269 
    270 	if (msglen >= sizeof (clientid) || msglen < 0) {
    271 		zerror("protocol error");
    272 		goto bad;
    273 	}
    274 
    275 	if (write(masterfd, clientid, msglen) != msglen) {
    276 		zerror("protocol error");
    277 		goto bad;
    278 	}
    279 
    280 	bzero(handshake, sizeof (handshake));
    281 
    282 	/*
    283 	 * Take care not to accumulate more than our fill, and leave room for
    284 	 * the NUL at the end.
    285 	 */
    286 	while ((err = read(masterfd, &c, 1)) == 1) {
    287 		if (i >= (sizeof (handshake) - 1))
    288 			break;
    289 		if (c == '\n')
    290 			break;
    291 		handshake[i] = c;
    292 		i++;
    293 	}
    294 
    295 	/*
    296 	 * If something went wrong during the handshake we bail; perhaps
    297 	 * the server died off.
    298 	 */
    299 	if (err == -1) {
    300 		zperror(gettext("Could not connect to zone console"));
    301 		goto bad;
    302 	}
    303 
    304 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
    305 		return (0);
    306 
    307 	zerror(gettext("Console is already in use by process ID %s."),
    308 	    handshake);
    309 bad:
    310 	(void) close(sockfd);
    311 	masterfd = -1;
    312 	return (-1);
    313 }
    314 
    315 
    316 /*
    317  * Routines to handle pty creation upon zone entry and to shuttle I/O back
    318  * and forth between the two terminals.  We also compute and store the
    319  * name of the slave terminal associated with the master side.
    320  */
    321 static int
    322 get_master_pty()
    323 {
    324 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
    325 		zperror(gettext("failed to obtain a pseudo-tty"));
    326 		return (-1);
    327 	}
    328 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
    329 		zperror(gettext("failed to get terminal settings from stdin"));
    330 		return (-1);
    331 	}
    332 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
    333 
    334 	return (0);
    335 }
    336 
    337 /*
    338  * This is a bit tricky; normally a pts device will belong to the zone it
    339  * is granted to.  But in the case of "entering" a zone, we need to establish
    340  * the pty before entering the zone so that we can vector I/O to and from it
    341  * from the global zone.
    342  *
    343  * We use the zonept() call to let the ptm driver know what we are up to;
    344  * the only other hairy bit is the setting of zoneslavename (which happens
    345  * above, in get_master_pty()).
    346  */
    347 static int
    348 init_slave_pty(zoneid_t zoneid, char *devroot)
    349 {
    350 	int slavefd = -1;
    351 	char *slavename, zoneslavename[MAXPATHLEN];
    352 
    353 	/*
    354 	 * Set slave permissions, zone the pts, then unlock it.
    355 	 */
    356 	if (grantpt(masterfd) != 0) {
    357 		zperror(gettext("grantpt failed"));
    358 		return (-1);
    359 	}
    360 
    361 	if (unlockpt(masterfd) != 0) {
    362 		zperror(gettext("unlockpt failed"));
    363 		return (-1);
    364 	}
    365 
    366 	/*
    367 	 * We must open the slave side before zoning this pty; otherwise
    368 	 * the kernel would refuse us the open-- zoning a pty makes it
    369 	 * inaccessible to the global zone.  Note we are trying to open
    370 	 * the device node via the $ZONEROOT/dev path for this pty.
    371 	 *
    372 	 * Later we'll close the slave out when once we've opened it again
    373 	 * from within the target zone.  Blarg.
    374 	 */
    375 	if ((slavename = ptsname(masterfd)) == NULL) {
    376 		zperror(gettext("failed to get name for pseudo-tty"));
    377 		return (-1);
    378 	}
    379 
    380 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
    381 	    devroot, slavename);
    382 
    383 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
    384 		zerror(gettext("failed to open %s: %s"), zoneslavename,
    385 		    strerror(errno));
    386 		return (-1);
    387 	}
    388 
    389 	/*
    390 	 * Push hardware emulation (ptem), line discipline (ldterm),
    391 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
    392 	 */
    393 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
    394 		zperror(gettext("failed to push ptem module"));
    395 		if (!failsafe)
    396 			goto bad;
    397 	}
    398 
    399 	/*
    400 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
    401 	 * this prior to entering the zone so that we can detect any errors
    402 	 * early, and so that we can set the anchor from the global zone.
    403 	 */
    404 	if (ioctl(slavefd, I_ANCHOR) == -1) {
    405 		zperror(gettext("failed to set stream anchor"));
    406 		if (!failsafe)
    407 			goto bad;
    408 	}
    409 
    410 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
    411 		zperror(gettext("failed to push ldterm module"));
    412 		if (!failsafe)
    413 			goto bad;
    414 	}
    415 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
    416 		zperror(gettext("failed to push ttcompat module"));
    417 		if (!failsafe)
    418 			goto bad;
    419 	}
    420 
    421 	/*
    422 	 * Propagate terminal settings from the external term to the new one.
    423 	 */
    424 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
    425 		zperror(gettext("failed to set terminal settings"));
    426 		if (!failsafe)
    427 			goto bad;
    428 	}
    429 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
    430 
    431 	if (zonept(masterfd, zoneid) != 0) {
    432 		zperror(gettext("could not set zoneid of pty"));
    433 		goto bad;
    434 	}
    435 
    436 	return (slavefd);
    437 
    438 bad:
    439 	(void) close(slavefd);
    440 	return (-1);
    441 }
    442 
    443 /*
    444  * Place terminal into raw mode.
    445  */
    446 static int
    447 set_tty_rawmode(int fd)
    448 {
    449 	struct termios term;
    450 	if (tcgetattr(fd, &term) < 0) {
    451 		zperror(gettext("failed to get user terminal settings"));
    452 		return (-1);
    453 	}
    454 
    455 	/* Stash for later, so we can revert back to previous mode */
    456 	save_termios = term;
    457 	save_fd = fd;
    458 
    459 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
    460 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
    461 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
    462 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
    463 	/* disable output post-processing */
    464 	term.c_oflag &= ~OPOST;
    465 	/* disable canonical mode, signal chars, echo & extended functions */
    466 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
    467 
    468 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
    469 	term.c_cc[VTIME] = 0;
    470 
    471 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
    472 		zperror(gettext("failed to set user terminal to raw mode"));
    473 		return (-1);
    474 	}
    475 
    476 	/*
    477 	 * We need to know the value of VEOF so that we can properly process for
    478 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
    479 	 * because VMIN overloads the same array slot in non-canonical mode.
    480 	 * Stupid @&^%!
    481 	 *
    482 	 * So here we construct the "effective" termios from the current
    483 	 * terminal settings, and the corrected VEOF and VEOL settings.
    484 	 */
    485 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
    486 		zperror(gettext("failed to get user terminal settings"));
    487 		return (-1);
    488 	}
    489 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
    490 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
    491 
    492 	return (0);
    493 }
    494 
    495 /*
    496  * Copy terminal window size from our terminal to the pts.
    497  */
    498 /*ARGSUSED*/
    499 static void
    500 sigwinch(int s)
    501 {
    502 	struct winsize ws;
    503 
    504 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
    505 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
    506 }
    507 
    508 static volatile int close_on_sig = -1;
    509 
    510 static void
    511 /*ARGSUSED*/
    512 sigcld(int s)
    513 {
    514 	int status;
    515 	pid_t pid;
    516 
    517 	/*
    518 	 * Peek at the exit status.  If this isn't the process we cared
    519 	 * about, then just reap it.
    520 	 */
    521 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
    522 		if (pid == child_pid &&
    523 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
    524 			dead = 1;
    525 			if (close_on_sig != -1) {
    526 				(void) write(close_on_sig, "a", 1);
    527 				(void) close(close_on_sig);
    528 				close_on_sig = -1;
    529 			}
    530 		} else {
    531 			(void) waitpid(pid, &status, WNOHANG);
    532 		}
    533 	}
    534 }
    535 
    536 /*
    537  * Some signals (currently, SIGINT) must be forwarded on to the process
    538  * group of the child process.
    539  */
    540 static void
    541 sig_forward(int s)
    542 {
    543 	if (child_pid != -1) {
    544 		pid_t pgid = getpgid(child_pid);
    545 		if (pgid != -1)
    546 			(void) sigsend(P_PGID, pgid, s);
    547 	}
    548 }
    549 
    550 /*
    551  * reset terminal settings for global environment
    552  */
    553 static void
    554 reset_tty()
    555 {
    556 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
    557 }
    558 
    559 /*
    560  * Convert character to printable representation, for display with locally
    561  * echoed command characters (like when we need to display ~^D)
    562  */
    563 static void
    564 canonify(char c, char *cc)
    565 {
    566 	if (isprint(c)) {
    567 		cc[0] = c;
    568 		cc[1] = '\0';
    569 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
    570 		cc[0] = '^';
    571 		cc[1] = c + '@';
    572 		cc[2] = '\0';
    573 	} else {
    574 		cc[0] = '\\';
    575 		cc[1] = ((c >> 6) & 7) + '0';
    576 		cc[2] = ((c >> 3) & 7) + '0';
    577 		cc[3] = (c & 7) + '0';
    578 		cc[4] = '\0';
    579 	}
    580 }
    581 
    582 /*
    583  * process_user_input watches the input stream for the escape sequence for
    584  * 'quit' (by default, tilde-period).  Because we might be fed just one
    585  * keystroke at a time, state associated with the user input (are we at the
    586  * beginning of the line?  are we locally echoing the next character?) is
    587  * maintained by beginning_of_line and local_echo across calls to the routine.
    588  * If the write to outfd fails, we'll try to read from infd in an attempt
    589  * to prevent deadlock between the two processes.
    590  *
    591  * This routine returns -1 when the 'quit' escape sequence has been issued,
    592  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
    593  */
    594 static int
    595 process_user_input(int outfd, int infd)
    596 {
    597 	static boolean_t beginning_of_line = B_TRUE;
    598 	static boolean_t local_echo = B_FALSE;
    599 	char ibuf[ZLOGIN_BUFSIZ];
    600 	int nbytes;
    601 	char *buf = ibuf;
    602 	char c = *buf;
    603 
    604 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
    605 	if (nbytes == -1 && (errno != EINTR || dead))
    606 		return (-1);
    607 
    608 	if (nbytes == -1)	/* The read was interrupted. */
    609 		return (0);
    610 
    611 	/* 0 read means EOF, close the pipe to the child */
    612 	if (nbytes == 0)
    613 		return (1);
    614 
    615 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
    616 		buf++;
    617 		if (beginning_of_line && !nocmdchar) {
    618 			beginning_of_line = B_FALSE;
    619 			if (c == cmdchar) {
    620 				local_echo = B_TRUE;
    621 				continue;
    622 			}
    623 		} else if (local_echo) {
    624 			local_echo = B_FALSE;
    625 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
    626 				char cc[CANONIFY_LEN];
    627 
    628 				canonify(c, cc);
    629 				(void) write(STDOUT_FILENO, &cmdchar, 1);
    630 				(void) write(STDOUT_FILENO, cc, strlen(cc));
    631 				return (-1);
    632 			}
    633 		}
    634 retry:
    635 		if (write(outfd, &c, 1) <= 0) {
    636 			/*
    637 			 * Since the fd we are writing to is opened with
    638 			 * O_NONBLOCK it is possible to get EAGAIN if the
    639 			 * pipe is full.  One way this could happen is if we
    640 			 * are writing a lot of data into the pipe in this loop
    641 			 * and the application on the other end is echoing that
    642 			 * data back out to its stdout.  The output pipe can
    643 			 * fill up since we are stuck here in this loop and not
    644 			 * draining the other pipe.  We can try to read some of
    645 			 * the data to see if we can drain the pipe so that the
    646 			 * application can continue to make progress.  The read
    647 			 * is non-blocking so we won't hang here.  We also wait
    648 			 * a bit before retrying since there could be other
    649 			 * reasons why the pipe is full and we don't want to
    650 			 * continuously retry.
    651 			 */
    652 			if (errno == EAGAIN) {
    653 				struct timespec rqtp;
    654 				int ln;
    655 				char obuf[ZLOGIN_BUFSIZ];
    656 
    657 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
    658 					(void) write(STDOUT_FILENO, obuf, ln);
    659 
    660 				/* sleep for 10 milliseconds */
    661 				rqtp.tv_sec = 0;
    662 				rqtp.tv_nsec = 10 * (NANOSEC / MILLISEC);
    663 				(void) nanosleep(&rqtp, NULL);
    664 				if (!dead)
    665 					goto retry;
    666 			}
    667 
    668 			return (-1);
    669 		}
    670 		beginning_of_line = (c == '\r' || c == '\n' ||
    671 		    c == effective_termios.c_cc[VKILL] ||
    672 		    c == effective_termios.c_cc[VEOL] ||
    673 		    c == effective_termios.c_cc[VSUSP] ||
    674 		    c == effective_termios.c_cc[VINTR]);
    675 	}
    676 	return (0);
    677 }
    678 
    679 /*
    680  * This function prevents deadlock between zlogin and the application in the
    681  * zone that it is talking to.  This can happen when we read from zlogin's
    682  * stdin and write the data down the pipe to the application.  If the pipe
    683  * is full, we'll block in the write.  Because zlogin could be blocked in
    684  * the write, it would never read the application's stdout/stderr so the
    685  * application can then block on those writes (when the pipe fills up).  If the
    686  * the application gets blocked this way, it can never get around to reading
    687  * its stdin so that zlogin can unblock from its write.  Once in this state,
    688  * the two processes are deadlocked.
    689  *
    690  * To prevent this, we want to verify that we can write into the pipe before we
    691  * read from our stdin.  If the pipe already is pretty full, we bypass the read
    692  * for now.  We'll circle back here again after the poll() so that we can
    693  * try again.  When this function is called, we already know there is data
    694  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
    695  * stdin is EOF, and 0 if everything is ok (even though we might not have
    696  * read/written any data into the pipe on this iteration).
    697  */
    698 static int
    699 process_raw_input(int stdin_fd, int appin_fd)
    700 {
    701 	int cc;
    702 	struct stat64 sb;
    703 	char ibuf[ZLOGIN_RDBUFSIZ];
    704 
    705 	/* Check how much data is already in the pipe */
    706 	if (fstat64(appin_fd, &sb) == -1) {
    707 		perror("stat failed");
    708 		return (-1);
    709 	}
    710 
    711 	if (dead)
    712 		return (-1);
    713 
    714 	/*
    715 	 * The pipe already has a lot of data in it,  don't write any more
    716 	 * right now.
    717 	 */
    718 	if (sb.st_size >= HI_WATER)
    719 		return (0);
    720 
    721 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
    722 	if (cc == -1 && (errno != EINTR || dead))
    723 		return (-1);
    724 
    725 	if (cc == -1)	/* The read was interrupted. */
    726 		return (0);
    727 
    728 	/* 0 read means EOF, close the pipe to the child */
    729 	if (cc == 0)
    730 		return (1);
    731 
    732 	/*
    733 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
    734 	 * data *to*.
    735 	 */
    736 	if (write(stdin_fd, ibuf, cc) == -1)
    737 		return (-1);
    738 
    739 	return (0);
    740 }
    741 
    742 /*
    743  * Write the output from the application running in the zone.  We can get
    744  * a signal during the write (usually it would be SIGCHLD when the application
    745  * has exited) so we loop to make sure we have written all of the data we read.
    746  */
    747 static int
    748 process_output(int in_fd, int out_fd)
    749 {
    750 	int wrote = 0;
    751 	int cc;
    752 	char ibuf[ZLOGIN_BUFSIZ];
    753 
    754 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
    755 	if (cc == -1 && (errno != EINTR || dead))
    756 		return (-1);
    757 	if (cc == 0)	/* EOF */
    758 		return (-1);
    759 	if (cc == -1)	/* The read was interrupted. */
    760 		return (0);
    761 
    762 	do {
    763 		int len;
    764 
    765 		len = write(out_fd, ibuf + wrote, cc - wrote);
    766 		if (len == -1 && errno != EINTR)
    767 			return (-1);
    768 		if (len != -1)
    769 			wrote += len;
    770 	} while (wrote < cc);
    771 
    772 	return (0);
    773 }
    774 
    775 /*
    776  * This is the main I/O loop, and is shared across all zlogin modes.
    777  * Parameters:
    778  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
    779  *		   the zone will be written here.
    780  *
    781  * 	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
    782  *		   we're running non-interactive); used in process_raw_input
    783  *		   to ensure we don't fill up the application's stdin pipe.
    784  *
    785  *	stdout_fd: The fd representing 'stdout' for the slave side; output
    786  *		   from the zone will arrive here.
    787  *
    788  *	stderr_fd: The fd representing 'stderr' for the slave side; output
    789  *		   from the zone will arrive here.
    790  *
    791  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
    792  *		   be performed on the input coming from STDIN.
    793  *
    794  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
    795  * mode supplies a stderr).
    796  *
    797  */
    798 static void
    799 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
    800     boolean_t raw_mode)
    801 {
    802 	struct pollfd pollfds[4];
    803 	char ibuf[ZLOGIN_BUFSIZ];
    804 	int cc, ret;
    805 
    806 	/* read from stdout of zone and write to stdout of global zone */
    807 	pollfds[0].fd = stdout_fd;
    808 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
    809 
    810 	/* read from stderr of zone and write to stderr of global zone */
    811 	pollfds[1].fd = stderr_fd;
    812 	pollfds[1].events = pollfds[0].events;
    813 
    814 	/* read from stdin of global zone and write to stdin of zone */
    815 	pollfds[2].fd = STDIN_FILENO;
    816 	pollfds[2].events = pollfds[0].events;
    817 
    818 	/* read from signalling pipe so we know when child dies */
    819 	pollfds[3].fd = sig_fd;
    820 	pollfds[3].events = pollfds[0].events;
    821 
    822 	for (;;) {
    823 		pollfds[0].revents = pollfds[1].revents =
    824 		    pollfds[2].revents = pollfds[3].revents = 0;
    825 
    826 		if (dead)
    827 			break;
    828 
    829 		/*
    830 		 * There is a race condition here where we can receive the
    831 		 * child death signal, set the dead flag, but since we have
    832 		 * passed the test above, we would go into poll and hang.
    833 		 * To avoid this we use the sig_fd as an additional poll fd.
    834 		 * The signal handler writes into the other end of this pipe
    835 		 * when the child dies so that the poll will always see that
    836 		 * input and proceed.  We just loop around at that point and
    837 		 * then notice the dead flag.
    838 		 */
    839 
    840 		ret = poll(pollfds,
    841 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
    842 
    843 		if (ret == -1 && errno != EINTR) {
    844 			perror("poll failed");
    845 			break;
    846 		}
    847 
    848 		if (errno == EINTR && dead) {
    849 			break;
    850 		}
    851 
    852 		/* event from master side stdout */
    853 		if (pollfds[0].revents) {
    854 			if (pollfds[0].revents &
    855 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
    856 				if (process_output(stdout_fd, STDOUT_FILENO)
    857 				    != 0)
    858 					break;
    859 			} else {
    860 				pollerr = pollfds[0].revents;
    861 				break;
    862 			}
    863 		}
    864 
    865 		/* event from master side stderr */
    866 		if (pollfds[1].revents) {
    867 			if (pollfds[1].revents &
    868 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
    869 				if (process_output(stderr_fd, STDERR_FILENO)
    870 				    != 0)
    871 					break;
    872 			} else {
    873 				pollerr = pollfds[1].revents;
    874 				break;
    875 			}
    876 		}
    877 
    878 		/* event from user STDIN side */
    879 		if (pollfds[2].revents) {
    880 			if (pollfds[2].revents &
    881 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
    882 				/*
    883 				 * stdin fd is stdin of the target; so,
    884 				 * the thing we'll write the user data *to*.
    885 				 *
    886 				 * Also, unlike on the output side, we
    887 				 * close the pipe on a zero-length message.
    888 				 */
    889 				int res;
    890 
    891 				if (raw_mode)
    892 					res = process_raw_input(stdin_fd,
    893 					    appin_fd);
    894 				else
    895 					res = process_user_input(stdin_fd,
    896 					    stdout_fd);
    897 
    898 				if (res < 0)
    899 					break;
    900 				if (res > 0) {
    901 					/* EOF (close) child's stdin_fd */
    902 					pollfds[2].fd = -1;
    903 					while ((res = close(stdin_fd)) != 0 &&
    904 					    errno == EINTR)
    905 						;
    906 					if (res != 0)
    907 						break;
    908 				}
    909 
    910 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
    911 				/*
    912 				 * It's OK to get a POLLHUP on STDIN-- it
    913 				 * always happens if you do:
    914 				 *
    915 				 * echo foo | zlogin <zone> <command>
    916 				 *
    917 				 * We reset fd to -1 in this case to clear
    918 				 * the condition and close the pipe (EOF) to
    919 				 * the other side in order to wrap things up.
    920 				 */
    921 				int res;
    922 
    923 				pollfds[2].fd = -1;
    924 				while ((res = close(stdin_fd)) != 0 &&
    925 				    errno == EINTR)
    926 					;
    927 				if (res != 0)
    928 					break;
    929 			} else {
    930 				pollerr = pollfds[2].revents;
    931 				break;
    932 			}
    933 		}
    934 	}
    935 
    936 	/*
    937 	 * We are in the midst of dying, but try to poll with a short
    938 	 * timeout to see if we can catch the last bit of I/O from the
    939 	 * children.
    940 	 */
    941 retry:
    942 	pollfds[0].revents = pollfds[1].revents = 0;
    943 	(void) poll(pollfds, 2, 100);
    944 	if (pollfds[0].revents &
    945 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
    946 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
    947 			(void) write(STDOUT_FILENO, ibuf, cc);
    948 			goto retry;
    949 		}
    950 	}
    951 	if (pollfds[1].revents &
    952 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
    953 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
    954 			(void) write(STDERR_FILENO, ibuf, cc);
    955 			goto retry;
    956 		}
    957 	}
    958 }
    959 
    960 /*
    961  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
    962  */
    963 static const char *
    964 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
    965     size_t len)
    966 {
    967 	bzero(user_cmd, sizeof (user_cmd));
    968 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
    969 		return (NULL);
    970 
    971 	return (user_cmd);
    972 }
    973 
    974 /* From libc */
    975 extern int str2passwd(const char *, int, void *, char *, int);
    976 
    977 /*
    978  * exec() the user_cmd brand hook, and convert the output string to a
    979  * struct passwd.  This is to be called after zone_enter().
    980  *
    981  */
    982 static struct passwd *
    983 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
    984     int pwbuflen)
    985 {
    986 	char pwline[NSS_BUFLEN_PASSWD];
    987 	char *cin = NULL;
    988 	FILE *fin;
    989 	int status;
    990 
    991 	assert(getzoneid() != GLOBAL_ZONEID);
    992 
    993 	if ((fin = popen(user_cmd, "r")) == NULL)
    994 		return (NULL);
    995 
    996 	while (cin == NULL && !feof(fin))
    997 		cin = fgets(pwline, sizeof (pwline), fin);
    998 
    999 	if (cin == NULL) {
   1000 		(void) pclose(fin);
   1001 		return (NULL);
   1002 	}
   1003 
   1004 	status = pclose(fin);
   1005 	if (!WIFEXITED(status))
   1006 		return (NULL);
   1007 	if (WEXITSTATUS(status) != 0)
   1008 		return (NULL);
   1009 
   1010 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
   1011 		return (pwent);
   1012 	else
   1013 		return (NULL);
   1014 }
   1015 
   1016 static char **
   1017 zone_login_cmd(brand_handle_t bh, const char *login)
   1018 {
   1019 	static char result_buf[ARG_MAX];
   1020 	char **new_argv, *ptr, *lasts;
   1021 	int n, a;
   1022 
   1023 	/* Get the login command for the target zone. */
   1024 	bzero(result_buf, sizeof (result_buf));
   1025 	if (brand_get_login_cmd(bh, login,
   1026 	    result_buf, sizeof (result_buf)) != 0)
   1027 		return (NULL);
   1028 
   1029 	/*
   1030 	 * We got back a string that we'd like to execute.  But since
   1031 	 * we're not doing the execution via a shell we'll need to convert
   1032 	 * the exec string to an array of strings.  We'll do that here
   1033 	 * but we're going to be very simplistic about it and break stuff
   1034 	 * up based on spaces.  We're not even going to support any kind
   1035 	 * of quoting or escape characters.  It's truly amazing that
   1036 	 * there is no library function in OpenSolaris to do this for us.
   1037 	 */
   1038 
   1039 	/*
   1040 	 * Be paranoid.  Since we're deliniating based on spaces make
   1041 	 * sure there are no adjacent spaces.
   1042 	 */
   1043 	if (strstr(result_buf, "  ") != NULL)
   1044 		return (NULL);
   1045 
   1046 	/* Remove any trailing whitespace.  */
   1047 	n = strlen(result_buf);
   1048 	if (result_buf[n - 1] == ' ')
   1049 		result_buf[n - 1] = '\0';
   1050 
   1051 	/* Count how many elements there are in the exec string. */
   1052 	ptr = result_buf;
   1053 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
   1054 		;
   1055 
   1056 	/* Allocate the argv array that we're going to return. */
   1057 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
   1058 		return (NULL);
   1059 
   1060 	/* Tokenize the exec string and return. */
   1061 	a = 0;
   1062 	new_argv[a++] = result_buf;
   1063 	if (n > 2) {
   1064 		(void) strtok_r(result_buf, " ", &lasts);
   1065 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
   1066 			;
   1067 	} else {
   1068 		new_argv[a++] = NULL;
   1069 	}
   1070 	assert(n == a);
   1071 	return (new_argv);
   1072 }
   1073 
   1074 /*
   1075  * Prepare argv array for exec'd process; if we're passing commands to the
   1076  * new process, then use su(1M) to do the invocation.  Otherwise, use
   1077  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
   1078  * login that we're coming from another zone, and to disregard its CONSOLE
   1079  * checks).
   1080  */
   1081 static char **
   1082 prep_args(brand_handle_t bh, const char *login, char **argv)
   1083 {
   1084 	int argc = 0, a = 0, i, n = -1;
   1085 	char **new_argv;
   1086 
   1087 	if (argv != NULL) {
   1088 		size_t subshell_len = 1;
   1089 		char *subshell;
   1090 
   1091 		while (argv[argc] != NULL)
   1092 			argc++;
   1093 
   1094 		for (i = 0; i < argc; i++) {
   1095 			subshell_len += strlen(argv[i]) + 1;
   1096 		}
   1097 		if ((subshell = calloc(1, subshell_len)) == NULL)
   1098 			return (NULL);
   1099 
   1100 		for (i = 0; i < argc; i++) {
   1101 			(void) strcat(subshell, argv[i]);
   1102 			(void) strcat(subshell, " ");
   1103 		}
   1104 
   1105 		if (failsafe) {
   1106 			n = 4;
   1107 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
   1108 				return (NULL);
   1109 
   1110 			new_argv[a++] = FAILSAFESHELL;
   1111 		} else {
   1112 			n = 5;
   1113 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
   1114 				return (NULL);
   1115 
   1116 			new_argv[a++] = SUPATH;
   1117 			new_argv[a++] = (char *)login;
   1118 		}
   1119 		new_argv[a++] = "-c";
   1120 		new_argv[a++] = subshell;
   1121 		new_argv[a++] = NULL;
   1122 		assert(a == n);
   1123 	} else {
   1124 		if (failsafe) {
   1125 			n = 2;
   1126 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
   1127 				return (NULL);
   1128 			new_argv[a++] = FAILSAFESHELL;
   1129 			new_argv[a++] = NULL;
   1130 			assert(n == a);
   1131 		} else {
   1132 			new_argv = zone_login_cmd(bh, login);
   1133 		}
   1134 	}
   1135 
   1136 	return (new_argv);
   1137 }
   1138 
   1139 /*
   1140  * Helper routine for prep_env below.
   1141  */
   1142 static char *
   1143 add_env(char *name, char *value)
   1144 {
   1145 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
   1146 	char *str;
   1147 
   1148 	if ((str = malloc(sz)) == NULL)
   1149 		return (NULL);
   1150 
   1151 	(void) snprintf(str, sz, "%s=%s", name, value);
   1152 	return (str);
   1153 }
   1154 
   1155 /*
   1156  * Prepare envp array for exec'd process.
   1157  */
   1158 static char **
   1159 prep_env()
   1160 {
   1161 	int e = 0, size = 1;
   1162 	char **new_env, *estr;
   1163 	char *term = getenv("TERM");
   1164 
   1165 	size++;	/* for $PATH */
   1166 	if (term != NULL)
   1167 		size++;
   1168 
   1169 	/*
   1170 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
   1171 	 * We also set $SHELL, since neither login nor su will be around to do
   1172 	 * it.
   1173 	 */
   1174 	if (failsafe)
   1175 		size += 2;
   1176 
   1177 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
   1178 		return (NULL);
   1179 
   1180 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
   1181 		return (NULL);
   1182 	new_env[e++] = estr;
   1183 
   1184 	if (term != NULL) {
   1185 		if ((estr = add_env("TERM", term)) == NULL)
   1186 			return (NULL);
   1187 		new_env[e++] = estr;
   1188 	}
   1189 
   1190 	if (failsafe) {
   1191 		if ((estr = add_env("HOME", "/")) == NULL)
   1192 			return (NULL);
   1193 		new_env[e++] = estr;
   1194 
   1195 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
   1196 			return (NULL);
   1197 		new_env[e++] = estr;
   1198 	}
   1199 
   1200 	new_env[e++] = NULL;
   1201 
   1202 	assert(e == size);
   1203 
   1204 	return (new_env);
   1205 }
   1206 
   1207 /*
   1208  * Finish the preparation of the envp array for exec'd non-interactive
   1209  * zlogins.  This is called in the child process *after* we zone_enter(), since
   1210  * it derives things we can only know within the zone, such as $HOME, $SHELL,
   1211  * etc.  We need only do this in the non-interactive, mode, since otherwise
   1212  * login(1) will do it.  We don't do this in failsafe mode, since it presents
   1213  * additional ways in which the command could fail, and we'd prefer to avoid
   1214  * that.
   1215  */
   1216 static char **
   1217 prep_env_noninteractive(const char *user_cmd, char **env)
   1218 {
   1219 	size_t size;
   1220 	char **new_env;
   1221 	int e, i;
   1222 	char *estr;
   1223 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
   1224 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
   1225 	struct passwd pwent;
   1226 	struct passwd *pw = NULL;
   1227 
   1228 	assert(env != NULL);
   1229 	assert(failsafe == 0);
   1230 
   1231 	/*
   1232 	 * Exec the "user_cmd" brand hook to get a pwent for the
   1233 	 * login user.  If this fails, HOME will be set to "/", SHELL
   1234 	 * will be set to $DEFAULTSHELL, and we will continue to exec
   1235 	 * SUPATH <login> -c <cmd>.
   1236 	 */
   1237 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
   1238 
   1239 	/*
   1240 	 * Get existing envp size.
   1241 	 */
   1242 	for (size = 0; env[size] != NULL; size++)
   1243 		;
   1244 
   1245 	e = size;
   1246 
   1247 	/*
   1248 	 * Finish filling out the environment; we duplicate the environment
   1249 	 * setup described in login(1), for lack of a better precedent.
   1250 	 */
   1251 	if (pw != NULL)
   1252 		size += 3;	/* LOGNAME, HOME, MAIL */
   1253 	else
   1254 		size += 1;	/* HOME */
   1255 
   1256 	size++;	/* always fill in SHELL */
   1257 	size++; /* terminating NULL */
   1258 
   1259 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
   1260 		goto malloc_fail;
   1261 
   1262 	/*
   1263 	 * Copy existing elements of env into new_env.
   1264 	 */
   1265 	for (i = 0; env[i] != NULL; i++) {
   1266 		if ((new_env[i] = strdup(env[i])) == NULL)
   1267 			goto malloc_fail;
   1268 	}
   1269 	assert(e == i);
   1270 
   1271 	if (pw != NULL) {
   1272 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
   1273 			goto malloc_fail;
   1274 		new_env[e++] = estr;
   1275 
   1276 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
   1277 			goto malloc_fail;
   1278 		new_env[e++] = estr;
   1279 
   1280 		if (chdir(pw->pw_dir) != 0)
   1281 			zerror(gettext("Could not chdir to home directory "
   1282 			    "%s: %s"), pw->pw_dir, strerror(errno));
   1283 
   1284 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
   1285 		    pw->pw_name);
   1286 		if ((estr = add_env("MAIL", varmail)) == NULL)
   1287 			goto malloc_fail;
   1288 		new_env[e++] = estr;
   1289 	} else {
   1290 		if ((estr = add_env("HOME", "/")) == NULL)
   1291 			goto malloc_fail;
   1292 		new_env[e++] = estr;
   1293 	}
   1294 
   1295 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
   1296 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
   1297 			goto malloc_fail;
   1298 		new_env[e++] = estr;
   1299 	} else {
   1300 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
   1301 			goto malloc_fail;
   1302 		new_env[e++] = estr;
   1303 	}
   1304 
   1305 	new_env[e++] = NULL;	/* add terminating NULL */
   1306 
   1307 	assert(e == size);
   1308 	return (new_env);
   1309 
   1310 malloc_fail:
   1311 	zperror(gettext("failed to allocate memory for process environment"));
   1312 	return (NULL);
   1313 }
   1314 
   1315 static int
   1316 close_func(void *slavefd, int fd)
   1317 {
   1318 	if (fd != *(int *)slavefd)
   1319 		(void) close(fd);
   1320 	return (0);
   1321 }
   1322 
   1323 static void
   1324 set_cmdchar(char *cmdcharstr)
   1325 {
   1326 	char c;
   1327 	long lc;
   1328 
   1329 	if ((c = *cmdcharstr) != '\\') {
   1330 		cmdchar = c;
   1331 		return;
   1332 	}
   1333 
   1334 	c = cmdcharstr[1];
   1335 	if (c == '\0' || c == '\\') {
   1336 		cmdchar = '\\';
   1337 		return;
   1338 	}
   1339 
   1340 	if (c < '0' || c > '7') {
   1341 		zerror(gettext("Unrecognized escape character option %s"),
   1342 		    cmdcharstr);
   1343 		usage();
   1344 	}
   1345 
   1346 	lc = strtol(cmdcharstr + 1, NULL, 8);
   1347 	if (lc < 0 || lc > 255) {
   1348 		zerror(gettext("Octal escape character '%s' too large"),
   1349 		    cmdcharstr);
   1350 		usage();
   1351 	}
   1352 	cmdchar = (char)lc;
   1353 }
   1354 
   1355 static int
   1356 setup_utmpx(char *slavename)
   1357 {
   1358 	struct utmpx ut;
   1359 
   1360 	bzero(&ut, sizeof (ut));
   1361 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
   1362 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
   1363 	ut.ut_pid = getpid();
   1364 	ut.ut_id[0] = 'z';
   1365 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
   1366 	ut.ut_type = LOGIN_PROCESS;
   1367 	(void) time(&ut.ut_tv.tv_sec);
   1368 
   1369 	if (makeutx(&ut) == NULL) {
   1370 		zerror(gettext("makeutx failed"));
   1371 		return (-1);
   1372 	}
   1373 	return (0);
   1374 }
   1375 
   1376 static void
   1377 release_lock_file(int lockfd)
   1378 {
   1379 	(void) close(lockfd);
   1380 }
   1381 
   1382 static int
   1383 grab_lock_file(const char *zone_name, int *lockfd)
   1384 {
   1385 	char pathbuf[PATH_MAX];
   1386 	struct flock flock;
   1387 
   1388 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
   1389 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
   1390 		    strerror(errno));
   1391 		return (-1);
   1392 	}
   1393 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
   1394 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
   1395 	    ZONES_TMPDIR, zone_name);
   1396 
   1397 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
   1398 		zerror(gettext("could not open %s: %s"), pathbuf,
   1399 		    strerror(errno));
   1400 		return (-1);
   1401 	}
   1402 	/*
   1403 	 * Lock the file to synchronize with other zoneadmds
   1404 	 */
   1405 	flock.l_type = F_WRLCK;
   1406 	flock.l_whence = SEEK_SET;
   1407 	flock.l_start = (off_t)0;
   1408 	flock.l_len = (off_t)0;
   1409 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
   1410 		zerror(gettext("unable to lock %s: %s"), pathbuf,
   1411 		    strerror(errno));
   1412 		release_lock_file(*lockfd);
   1413 		return (-1);
   1414 	}
   1415 	return (Z_OK);
   1416 }
   1417 
   1418 static int
   1419 start_zoneadmd(const char *zone_name)
   1420 {
   1421 	pid_t retval;
   1422 	int pstatus = 0, error = -1, lockfd, doorfd;
   1423 	struct door_info info;
   1424 	char doorpath[MAXPATHLEN];
   1425 
   1426 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
   1427 
   1428 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
   1429 		return (-1);
   1430 	/*
   1431 	 * We must do the door check with the lock held.  Otherwise, we
   1432 	 * might race against another zoneadm/zlogin process and wind
   1433 	 * up with two processes trying to start zoneadmd at the same
   1434 	 * time.  zoneadmd will detect this, and fail, but we prefer this
   1435 	 * to be as seamless as is practical, from a user perspective.
   1436 	 */
   1437 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
   1438 		if (errno != ENOENT) {
   1439 			zerror("failed to open %s: %s", doorpath,
   1440 			    strerror(errno));
   1441 			goto out;
   1442 		}
   1443 	} else {
   1444 		/*
   1445 		 * Seems to be working ok.
   1446 		 */
   1447 		if (door_info(doorfd, &info) == 0 &&
   1448 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
   1449 			error = 0;
   1450 			goto out;
   1451 		}
   1452 	}
   1453 
   1454 	if ((child_pid = fork()) == -1) {
   1455 		zperror(gettext("could not fork"));
   1456 		goto out;
   1457 	} else if (child_pid == 0) {
   1458 		/* child process */
   1459 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
   1460 		    zone_name, NULL);
   1461 		zperror(gettext("could not exec zoneadmd"));
   1462 		_exit(1);
   1463 	}
   1464 
   1465 	/* parent process */
   1466 	do {
   1467 		retval = waitpid(child_pid, &pstatus, 0);
   1468 	} while (retval != child_pid);
   1469 	if (WIFSIGNALED(pstatus) ||
   1470 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
   1471 		zerror(gettext("could not start %s"), "zoneadmd");
   1472 		goto out;
   1473 	}
   1474 	error = 0;
   1475 out:
   1476 	release_lock_file(lockfd);
   1477 	(void) close(doorfd);
   1478 	return (error);
   1479 }
   1480 
   1481 static int
   1482 init_template(void)
   1483 {
   1484 	int fd;
   1485 	int err = 0;
   1486 
   1487 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
   1488 	if (fd == -1)
   1489 		return (-1);
   1490 
   1491 	/*
   1492 	 * zlogin doesn't do anything with the contract.
   1493 	 * Deliver no events, don't inherit, and allow it to be orphaned.
   1494 	 */
   1495 	err |= ct_tmpl_set_critical(fd, 0);
   1496 	err |= ct_tmpl_set_informative(fd, 0);
   1497 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
   1498 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
   1499 	if (err || ct_tmpl_activate(fd)) {
   1500 		(void) close(fd);
   1501 		return (-1);
   1502 	}
   1503 
   1504 	return (fd);
   1505 }
   1506 
   1507 static int
   1508 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
   1509     char **new_args, char **new_env)
   1510 {
   1511 	pid_t retval;
   1512 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
   1513 	int child_status;
   1514 	int tmpl_fd;
   1515 	sigset_t block_cld;
   1516 
   1517 	if ((tmpl_fd = init_template()) == -1) {
   1518 		reset_tty();
   1519 		zperror(gettext("could not create contract"));
   1520 		return (1);
   1521 	}
   1522 
   1523 	if (pipe(stdin_pipe) != 0) {
   1524 		zperror(gettext("could not create STDIN pipe"));
   1525 		return (1);
   1526 	}
   1527 	/*
   1528 	 * When the user types ^D, we get a zero length message on STDIN.
   1529 	 * We need to echo that down the pipe to send it to the other side;
   1530 	 * but by default, pipes don't propagate zero-length messages.  We
   1531 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
   1532 	 */
   1533 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
   1534 		zperror(gettext("could not configure STDIN pipe"));
   1535 		return (1);
   1536 
   1537 	}
   1538 	if (pipe(stdout_pipe) != 0) {
   1539 		zperror(gettext("could not create STDOUT pipe"));
   1540 		return (1);
   1541 	}
   1542 	if (pipe(stderr_pipe) != 0) {
   1543 		zperror(gettext("could not create STDERR pipe"));
   1544 		return (1);
   1545 	}
   1546 
   1547 	if (pipe(dead_child_pipe) != 0) {
   1548 		zperror(gettext("could not create signalling pipe"));
   1549 		return (1);
   1550 	}
   1551 	close_on_sig = dead_child_pipe[0];
   1552 
   1553 	/*
   1554 	 * If any of the pipe FD's winds up being less than STDERR, then we
   1555 	 * have a mess on our hands-- and we are lacking some of the I/O
   1556 	 * streams we would expect anyway.  So we bail.
   1557 	 */
   1558 	if (stdin_pipe[0] <= STDERR_FILENO ||
   1559 	    stdin_pipe[1] <= STDERR_FILENO ||
   1560 	    stdout_pipe[0] <= STDERR_FILENO ||
   1561 	    stdout_pipe[1] <= STDERR_FILENO ||
   1562 	    stderr_pipe[0] <= STDERR_FILENO ||
   1563 	    stderr_pipe[1] <= STDERR_FILENO ||
   1564 	    dead_child_pipe[0] <= STDERR_FILENO ||
   1565 	    dead_child_pipe[1] <= STDERR_FILENO) {
   1566 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
   1567 		return (1);
   1568 	}
   1569 
   1570 	if (prefork_dropprivs() != 0) {
   1571 		zperror(gettext("could not allocate privilege set"));
   1572 		return (1);
   1573 	}
   1574 
   1575 	(void) sigset(SIGCLD, sigcld);
   1576 	(void) sigemptyset(&block_cld);
   1577 	(void) sigaddset(&block_cld, SIGCLD);
   1578 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
   1579 
   1580 	if ((child_pid = fork()) == -1) {
   1581 		(void) ct_tmpl_clear(tmpl_fd);
   1582 		(void) close(tmpl_fd);
   1583 		zperror(gettext("could not fork"));
   1584 		return (1);
   1585 	} else if (child_pid == 0) { /* child process */
   1586 		(void) ct_tmpl_clear(tmpl_fd);
   1587 
   1588 		/*
   1589 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
   1590 		 */
   1591 		(void) close(STDIN_FILENO);
   1592 		(void) close(STDOUT_FILENO);
   1593 		(void) close(STDERR_FILENO);
   1594 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
   1595 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
   1596 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
   1597 		(void) closefrom(STDERR_FILENO + 1);
   1598 
   1599 		(void) sigset(SIGCLD, SIG_DFL);
   1600 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
   1601 		/*
   1602 		 * In case any of stdin, stdout or stderr are streams,
   1603 		 * anchor them to prevent malicious I_POPs.
   1604 		 */
   1605 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
   1606 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
   1607 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
   1608 
   1609 		if (zone_enter(zoneid) == -1) {
   1610 			zerror(gettext("could not enter zone %s: %s"),
   1611 			    zonename, strerror(errno));
   1612 			_exit(1);
   1613 		}
   1614 
   1615 		/*
   1616 		 * For non-native zones, tell libc where it can find locale
   1617 		 * specific getttext() messages.
   1618 		 */
   1619 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
   1620 			(void) bindtextdomain(TEXT_DOMAIN,
   1621 			    "/.SUNWnative/usr/lib/locale");
   1622 		else if (access("/native/usr/lib/locale", R_OK) == 0)
   1623 			(void) bindtextdomain(TEXT_DOMAIN,
   1624 			    "/native/usr/lib/locale");
   1625 
   1626 		if (!failsafe)
   1627 			new_env = prep_env_noninteractive(user_cmd, new_env);
   1628 
   1629 		if (new_env == NULL) {
   1630 			_exit(1);
   1631 		}
   1632 
   1633 		/*
   1634 		 * Move into a new process group; the zone_enter will have
   1635 		 * placed us into zsched's session, and we want to be in
   1636 		 * a unique process group.
   1637 		 */
   1638 		(void) setpgid(getpid(), getpid());
   1639 
   1640 		(void) execve(new_args[0], new_args, new_env);
   1641 		zperror(gettext("exec failure"));
   1642 		_exit(1);
   1643 	}
   1644 	/* parent */
   1645 
   1646 	/* close pipe sides written by child */
   1647 	(void) close(stdout_pipe[1]);
   1648 	(void) close(stderr_pipe[1]);
   1649 
   1650 	(void) sigset(SIGINT, sig_forward);
   1651 
   1652 	postfork_dropprivs();
   1653 
   1654 	(void) ct_tmpl_clear(tmpl_fd);
   1655 	(void) close(tmpl_fd);
   1656 
   1657 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
   1658 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
   1659 	    dead_child_pipe[1], B_TRUE);
   1660 	do {
   1661 		retval = waitpid(child_pid, &child_status, 0);
   1662 		if (retval == -1) {
   1663 			child_status = 0;
   1664 		}
   1665 	} while (retval != child_pid && errno != ECHILD);
   1666 
   1667 	return (WEXITSTATUS(child_status));
   1668 }
   1669 
   1670 int
   1671 main(int argc, char **argv)
   1672 {
   1673 	int arg, console = 0;
   1674 	zoneid_t zoneid;
   1675 	zone_state_t st;
   1676 	char *login = "root";
   1677 	int lflag = 0;
   1678 	char *zonename = NULL;
   1679 	char **proc_args = NULL;
   1680 	char **new_args, **new_env;
   1681 	sigset_t block_cld;
   1682 	char devroot[MAXPATHLEN];
   1683 	char *slavename, slaveshortname[MAXPATHLEN];
   1684 	priv_set_t *privset;
   1685 	int tmpl_fd;
   1686 	char zonebrand[MAXNAMELEN];
   1687 	char default_brand[MAXNAMELEN];
   1688 	struct stat sb;
   1689 	char kernzone[ZONENAME_MAX];
   1690 	brand_handle_t bh;
   1691 	char user_cmd[MAXPATHLEN];
   1692 
   1693 	(void) setlocale(LC_ALL, "");
   1694 	(void) textdomain(TEXT_DOMAIN);
   1695 
   1696 	(void) getpname(argv[0]);
   1697 
   1698 	while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
   1699 		switch (arg) {
   1700 		case 'C':
   1701 			console = 1;
   1702 			break;
   1703 		case 'E':
   1704 			nocmdchar = 1;
   1705 			break;
   1706 		case 'R':	/* undocumented */
   1707 			if (*optarg != '/') {
   1708 				zerror(gettext("root path must be absolute."));
   1709 				exit(2);
   1710 			}
   1711 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
   1712 				zerror(
   1713 				    gettext("root path must be a directory."));
   1714 				exit(2);
   1715 			}
   1716 			zonecfg_set_root(optarg);
   1717 			break;
   1718 		case 'S':
   1719 			failsafe = 1;
   1720 			break;
   1721 		case 'e':
   1722 			set_cmdchar(optarg);
   1723 			break;
   1724 		case 'l':
   1725 			login = optarg;
   1726 			lflag = 1;
   1727 			break;
   1728 		default:
   1729 			usage();
   1730 		}
   1731 	}
   1732 
   1733 	if (console != 0 && lflag != 0) {
   1734 		zerror(gettext("-l may not be specified for console login"));
   1735 		usage();
   1736 	}
   1737 
   1738 	if (console != 0 && failsafe != 0) {
   1739 		zerror(gettext("-S may not be specified for console login"));
   1740 		usage();
   1741 	}
   1742 
   1743 	if (console != 0 && zonecfg_in_alt_root()) {
   1744 		zerror(gettext("-R may not be specified for console login"));
   1745 		exit(2);
   1746 	}
   1747 
   1748 	if (failsafe != 0 && lflag != 0) {
   1749 		zerror(gettext("-l may not be specified for failsafe login"));
   1750 		usage();
   1751 	}
   1752 
   1753 	if (optind == (argc - 1)) {
   1754 		/*
   1755 		 * zone name, no process name; this should be an interactive
   1756 		 * as long as STDIN is really a tty.
   1757 		 */
   1758 		if (isatty(STDIN_FILENO))
   1759 			interactive = 1;
   1760 		zonename = argv[optind];
   1761 	} else if (optind < (argc - 1)) {
   1762 		if (console) {
   1763 			zerror(gettext("Commands may not be specified for "
   1764 			    "console login."));
   1765 			usage();
   1766 		}
   1767 		/* zone name and process name, and possibly some args */
   1768 		zonename = argv[optind];
   1769 		proc_args = &argv[optind + 1];
   1770 		interactive = 0;
   1771 	} else {
   1772 		usage();
   1773 	}
   1774 
   1775 	if (getzoneid() != GLOBAL_ZONEID) {
   1776 		zerror(gettext("'%s' may only be used from the global zone"),
   1777 		    pname);
   1778 		return (1);
   1779 	}
   1780 
   1781 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
   1782 		zerror(gettext("'%s' not applicable to the global zone"),
   1783 		    pname);
   1784 		return (1);
   1785 	}
   1786 
   1787 	if (zone_get_state(zonename, &st) != Z_OK) {
   1788 		zerror(gettext("zone '%s' unknown"), zonename);
   1789 		return (1);
   1790 	}
   1791 
   1792 	if (st < ZONE_STATE_INSTALLED) {
   1793 		zerror(gettext("cannot login to a zone which is '%s'"),
   1794 		    zone_state_str(st));
   1795 		return (1);
   1796 	}
   1797 
   1798 	/*
   1799 	 * In both console and non-console cases, we require all privs.
   1800 	 * In the console case, because we may need to startup zoneadmd.
   1801 	 * In the non-console case in order to do zone_enter(2), zonept()
   1802 	 * and other tasks.
   1803 	 *
   1804 	 * Future work: this solution is temporary.  Ultimately, we need to
   1805 	 * move to a flexible system which allows the global admin to
   1806 	 * designate that a particular user can zlogin (and probably zlogin
   1807 	 * -C) to a particular zone.  This all-root business we have now is
   1808 	 * quite sketchy.
   1809 	 */
   1810 	if ((privset = priv_allocset()) == NULL) {
   1811 		zperror(gettext("priv_allocset failed"));
   1812 		return (1);
   1813 	}
   1814 
   1815 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
   1816 		zperror(gettext("getppriv failed"));
   1817 		priv_freeset(privset);
   1818 		return (1);
   1819 	}
   1820 
   1821 	if (priv_isfullset(privset) == B_FALSE) {
   1822 		zerror(gettext("You lack sufficient privilege to run "
   1823 		    "this command (all privs required)"));
   1824 		priv_freeset(privset);
   1825 		return (1);
   1826 	}
   1827 	priv_freeset(privset);
   1828 
   1829 	/*
   1830 	 * The console is a separate case from the rest of the code; handle
   1831 	 * it first.
   1832 	 */
   1833 	if (console) {
   1834 		/*
   1835 		 * Ensure that zoneadmd for this zone is running.
   1836 		 */
   1837 		if (start_zoneadmd(zonename) == -1)
   1838 			return (1);
   1839 
   1840 		/*
   1841 		 * Make contact with zoneadmd.
   1842 		 */
   1843 		if (get_console_master(zonename) == -1)
   1844 			return (1);
   1845 
   1846 		(void) printf(gettext("[Connected to zone '%s' console]\n"),
   1847 		    zonename);
   1848 
   1849 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
   1850 			reset_tty();
   1851 			zperror(gettext("failed to set stdin pty to raw mode"));
   1852 			return (1);
   1853 		}
   1854 
   1855 		(void) sigset(SIGWINCH, sigwinch);
   1856 		(void) sigwinch(0);
   1857 
   1858 		/*
   1859 		 * Run the I/O loop until we get disconnected.
   1860 		 */
   1861 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
   1862 		reset_tty();
   1863 		(void) printf(gettext("\n[Connection to zone '%s' console "
   1864 		    "closed]\n"), zonename);
   1865 
   1866 		return (0);
   1867 	}
   1868 
   1869 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
   1870 		zerror(gettext("login allowed only to running zones "
   1871 		    "(%s is '%s')."), zonename, zone_state_str(st));
   1872 		return (1);
   1873 	}
   1874 
   1875 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
   1876 	if (zonecfg_in_alt_root()) {
   1877 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
   1878 
   1879 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
   1880 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
   1881 			zerror(gettext("cannot find scratch zone %s"),
   1882 			    zonename);
   1883 			if (fp != NULL)
   1884 				zonecfg_close_scratch(fp);
   1885 			return (1);
   1886 		}
   1887 		zonecfg_close_scratch(fp);
   1888 	}
   1889 
   1890 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
   1891 		zerror(gettext("failed to get zoneid for zone '%s'"),
   1892 		    zonename);
   1893 		return (1);
   1894 	}
   1895 
   1896 	/*
   1897 	 * We need the zone root path only if we are setting up a pty.
   1898 	 */
   1899 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
   1900 		zerror(gettext("could not get dev path for zone %s"),
   1901 		    zonename);
   1902 		return (1);
   1903 	}
   1904 
   1905 	if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
   1906 		zerror(gettext("could not get brand for zone %s"), zonename);
   1907 		return (1);
   1908 	}
   1909 	/*
   1910 	 * In the alternate root environment, the only supported
   1911 	 * operations are mount and unmount.  In this case, just treat
   1912 	 * the zone as native if it is cluster.  Cluster zones can be
   1913 	 * native for the purpose of LU or upgrade, and the cluster
   1914 	 * brand may not exist in the miniroot (such as in net install
   1915 	 * upgrade).
   1916 	 */
   1917 	if (zonecfg_default_brand(default_brand,
   1918 	    sizeof (default_brand)) != Z_OK) {
   1919 		zerror(gettext("unable to determine default brand"));
   1920 		return (1);
   1921 	}
   1922 	if (zonecfg_in_alt_root() &&
   1923 	    strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
   1924 		(void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
   1925 	}
   1926 
   1927 	if ((bh = brand_open(zonebrand)) == NULL) {
   1928 		zerror(gettext("could not open brand for zone %s"), zonename);
   1929 		return (1);
   1930 	}
   1931 
   1932 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
   1933 		zperror(gettext("could not assemble new arguments"));
   1934 		brand_close(bh);
   1935 		return (1);
   1936 	}
   1937 	/*
   1938 	 * Get the brand specific user_cmd.  This command is used to get
   1939 	 * a passwd(4) entry for login.
   1940 	 */
   1941 	if (!interactive && !failsafe) {
   1942 		if (zone_get_user_cmd(bh, login, user_cmd,
   1943 		    sizeof (user_cmd)) == NULL) {
   1944 			zerror(gettext("could not get user_cmd for zone %s"),
   1945 			    zonename);
   1946 			brand_close(bh);
   1947 			return (1);
   1948 		}
   1949 	}
   1950 	brand_close(bh);
   1951 
   1952 	if ((new_env = prep_env()) == NULL) {
   1953 		zperror(gettext("could not assemble new environment"));
   1954 		return (1);
   1955 	}
   1956 
   1957 	if (!interactive)
   1958 		return (noninteractive_login(zonename, user_cmd, zoneid,
   1959 		    new_args, new_env));
   1960 
   1961 	if (zonecfg_in_alt_root()) {
   1962 		zerror(gettext("cannot use interactive login with scratch "
   1963 		    "zone"));
   1964 		return (1);
   1965 	}
   1966 
   1967 	/*
   1968 	 * Things are more complex in interactive mode; we get the
   1969 	 * master side of the pty, then place the user's terminal into
   1970 	 * raw mode.
   1971 	 */
   1972 	if (get_master_pty() == -1) {
   1973 		zerror(gettext("could not setup master pty device"));
   1974 		return (1);
   1975 	}
   1976 
   1977 	/*
   1978 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
   1979 	 */
   1980 	if ((slavename = ptsname(masterfd)) == NULL) {
   1981 		zperror(gettext("failed to get name for pseudo-tty"));
   1982 		return (1);
   1983 	}
   1984 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
   1985 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
   1986 		    sizeof (slaveshortname));
   1987 	else
   1988 		(void) strlcpy(slaveshortname, slavename,
   1989 		    sizeof (slaveshortname));
   1990 
   1991 	(void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
   1992 	    slaveshortname);
   1993 
   1994 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
   1995 		reset_tty();
   1996 		zperror(gettext("failed to set stdin pty to raw mode"));
   1997 		return (1);
   1998 	}
   1999 
   2000 	if (prefork_dropprivs() != 0) {
   2001 		reset_tty();
   2002 		zperror(gettext("could not allocate privilege set"));
   2003 		return (1);
   2004 	}
   2005 
   2006 	/*
   2007 	 * We must mask SIGCLD until after we have coped with the fork
   2008 	 * sufficiently to deal with it; otherwise we can race and receive the
   2009 	 * signal before child_pid has been initialized (yes, this really
   2010 	 * happens).
   2011 	 */
   2012 	(void) sigset(SIGCLD, sigcld);
   2013 	(void) sigemptyset(&block_cld);
   2014 	(void) sigaddset(&block_cld, SIGCLD);
   2015 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
   2016 
   2017 	/*
   2018 	 * We activate the contract template at the last minute to
   2019 	 * avoid intermediate functions that could be using fork(2)
   2020 	 * internally.
   2021 	 */
   2022 	if ((tmpl_fd = init_template()) == -1) {
   2023 		reset_tty();
   2024 		zperror(gettext("could not create contract"));
   2025 		return (1);
   2026 	}
   2027 
   2028 	if ((child_pid = fork()) == -1) {
   2029 		(void) ct_tmpl_clear(tmpl_fd);
   2030 		reset_tty();
   2031 		zperror(gettext("could not fork"));
   2032 		return (1);
   2033 	} else if (child_pid == 0) { /* child process */
   2034 		int slavefd, newslave;
   2035 
   2036 		(void) ct_tmpl_clear(tmpl_fd);
   2037 		(void) close(tmpl_fd);
   2038 
   2039 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
   2040 
   2041 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
   2042 			return (1);
   2043 
   2044 		/*
   2045 		 * Close all fds except for the slave pty.
   2046 		 */
   2047 		(void) fdwalk(close_func, &slavefd);
   2048 
   2049 		/*
   2050 		 * Temporarily dup slavefd to stderr; that way if we have
   2051 		 * to print out that zone_enter failed, the output will
   2052 		 * have somewhere to go.
   2053 		 */
   2054 		if (slavefd != STDERR_FILENO)
   2055 			(void) dup2(slavefd, STDERR_FILENO);
   2056 
   2057 		if (zone_enter(zoneid) == -1) {
   2058 			zerror(gettext("could not enter zone %s: %s"),
   2059 			    zonename, strerror(errno));
   2060 			return (1);
   2061 		}
   2062 
   2063 		if (slavefd != STDERR_FILENO)
   2064 			(void) close(STDERR_FILENO);
   2065 
   2066 		/*
   2067 		 * We take pains to get this process into a new process
   2068 		 * group, and subsequently a new session.  In this way,
   2069 		 * we'll have a session which doesn't yet have a controlling
   2070 		 * terminal.  When we open the slave, it will become the
   2071 		 * controlling terminal; no PIDs concerning pgrps or sids
   2072 		 * will leak inappropriately into the zone.
   2073 		 */
   2074 		(void) setpgrp();
   2075 
   2076 		/*
   2077 		 * We need the slave pty to be referenced from the zone's
   2078 		 * /dev in order to ensure that the devt's, etc are all
   2079 		 * correct.  Otherwise we break ttyname and the like.
   2080 		 */
   2081 		if ((newslave = open(slavename, O_RDWR)) == -1) {
   2082 			(void) close(slavefd);
   2083 			return (1);
   2084 		}
   2085 		(void) close(slavefd);
   2086 		slavefd = newslave;
   2087 
   2088 		/*
   2089 		 * dup the slave to the various FDs, so that when the
   2090 		 * spawned process does a write/read it maps to the slave
   2091 		 * pty.
   2092 		 */
   2093 		(void) dup2(slavefd, STDIN_FILENO);
   2094 		(void) dup2(slavefd, STDOUT_FILENO);
   2095 		(void) dup2(slavefd, STDERR_FILENO);
   2096 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
   2097 		    slavefd != STDERR_FILENO) {
   2098 			(void) close(slavefd);
   2099 		}
   2100 
   2101 		/*
   2102 		 * In failsafe mode, we don't use login(1), so don't try
   2103 		 * setting up a utmpx entry.
   2104 		 *
   2105 		 * A branded zone may have very different utmpx semantics.
   2106 		 * At the moment, we only have two brand types:
   2107 		 * Solaris-like (native, sn1) and Linux.  In the Solaris
   2108 		 * case, we know exactly how to do the necessary utmpx
   2109 		 * setup.  Fortunately for us, the Linux /bin/login is
   2110 		 * prepared to deal with a non-initialized utmpx entry, so
   2111 		 * we can simply skip it.  If future brands don't fall into
   2112 		 * either category, we'll have to add a per-brand utmpx
   2113 		 * setup hook.
   2114 		 */
   2115 		if (!failsafe && (strcmp(zonebrand, "lx") != 0))
   2116 			if (setup_utmpx(slaveshortname) == -1)
   2117 				return (1);
   2118 
   2119 		(void) execve(new_args[0], new_args, new_env);
   2120 		zperror(gettext("exec failure"));
   2121 		return (1);
   2122 	}
   2123 	(void) ct_tmpl_clear(tmpl_fd);
   2124 	(void) close(tmpl_fd);
   2125 
   2126 	/*
   2127 	 * The rest is only for the parent process.
   2128 	 */
   2129 	(void) sigset(SIGWINCH, sigwinch);
   2130 
   2131 	postfork_dropprivs();
   2132 
   2133 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
   2134 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
   2135 
   2136 	reset_tty();
   2137 	(void) fprintf(stderr,
   2138 	    gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
   2139 	    slaveshortname);
   2140 
   2141 	if (pollerr != 0) {
   2142 		(void) fprintf(stderr, gettext("Error: connection closed due "
   2143 		    "to unexpected pollevents=0x%x.\n"), pollerr);
   2144 		return (1);
   2145 	}
   2146 
   2147 	return (0);
   2148 }
   2149