Home | History | Annotate | Download | only in init
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 /*
     31  * University Copyright- Copyright (c) 1982, 1986, 1988
     32  * The Regents of the University of California
     33  * All Rights Reserved
     34  *
     35  * University Acknowledgment- Portions of this document are derived from
     36  * software developed by the University of California, Berkeley, and its
     37  * contributors.
     38  */
     39 
     40 /*
     41  * init(1M) is the general process spawning program.  Its primary job is to
     42  * start and restart svc.startd for smf(5).  For backwards-compatibility it also
     43  * spawns and respawns processes according to /etc/inittab and the current
     44  * run-level.  It reads /etc/default/inittab for general configuration.
     45  *
     46  * To change run-levels the system administrator runs init from the command
     47  * line with a level name.  init signals svc.startd via libscf and directs the
     48  * zone's init (pid 1 in the global zone) what to do by sending it a signal;
     49  * these signal numbers are commonly refered to in the code as 'states'.  Valid
     50  * run-levels are [sS0123456].  Additionally, init can be given directives
     51  * [qQabc], which indicate actions to be taken pertaining to /etc/inittab.
     52  *
     53  * When init processes inittab entries, it finds processes that are to be
     54  * spawned at various run-levels.  inittab contains the set of the levels for
     55  * which each inittab entry is valid.
     56  *
     57  * State File and Restartability
     58  *   Premature exit by init(1M) is handled as a special case by the kernel:
     59  *   init(1M) will be immediately re-executed, retaining its original PID.  (PID
     60  *   1 in the global zone.)  To track the processes it has previously spawned,
     61  *   as well as other mutable state, init(1M) regularly updates a state file
     62  *   such that its subsequent invocations have knowledge of its various
     63  *   dependent processes and duties.
     64  *
     65  * Process Contracts
     66  *   We start svc.startd(1M) in a contract and transfer inherited contracts when
     67  *   restarting it.  Everything else is started using the legacy contract
     68  *   template, and the created contracts are abandoned when they become empty.
     69  *
     70  * utmpx Entry Handling
     71  *   Because init(1M) no longer governs the startup process, its knowledge of
     72  *   when utmpx becomes writable is indirect.  However, spawned processes
     73  *   expect to be constructed with valid utmpx entries.  As a result, attempts
     74  *   to write normal entries will be retried until successful.
     75  *
     76  * Maintenance Mode
     77  *   In certain failure scenarios, init(1M) will enter a maintenance mode, in
     78  *   which it invokes sulogin(1M) to allow the operator an opportunity to
     79  *   repair the system.  Normally, this operation is performed as a
     80  *   fork(2)-exec(2)-waitpid(3C) sequence with the parent waiting for repair or
     81  *   diagnosis to be completed.  In the cases that fork(2) requests themselves
     82  *   fail, init(1M) will directly execute sulogin(1M), and allow the kernel to
     83  *   restart init(1M) on exit from the operator session.
     84  *
     85  *   One scenario where init(1M) enters its maintenance mode is when
     86  *   svc.startd(1M) begins to fail rapidly, defined as when the average time
     87  *   between recent failures drops below a given threshold.
     88  */
     89 
     90 #include <sys/contract/process.h>
     91 #include <sys/ctfs.h>
     92 #include <sys/stat.h>
     93 #include <sys/statvfs.h>
     94 #include <sys/stropts.h>
     95 #include <sys/systeminfo.h>
     96 #include <sys/time.h>
     97 #include <sys/termios.h>
     98 #include <sys/tty.h>
     99 #include <sys/types.h>
    100 #include <sys/utsname.h>
    101 
    102 #include <bsm/adt_event.h>
    103 #include <bsm/libbsm.h>
    104 #include <security/pam_appl.h>
    105 
    106 #include <assert.h>
    107 #include <ctype.h>
    108 #include <dirent.h>
    109 #include <errno.h>
    110 #include <fcntl.h>
    111 #include <libcontract.h>
    112 #include <libcontract_priv.h>
    113 #include <libintl.h>
    114 #include <libscf.h>
    115 #include <libscf_priv.h>
    116 #include <poll.h>
    117 #include <procfs.h>
    118 #include <signal.h>
    119 #include <stdarg.h>
    120 #include <stdio.h>
    121 #include <stdio_ext.h>
    122 #include <stdlib.h>
    123 #include <string.h>
    124 #include <strings.h>
    125 #include <syslog.h>
    126 #include <time.h>
    127 #include <ulimit.h>
    128 #include <unistd.h>
    129 #include <utmpx.h>
    130 #include <wait.h>
    131 #include <zone.h>
    132 #include <ucontext.h>
    133 
    134 #undef	sleep
    135 
    136 #define	fioctl(p, sptr, cmd)	ioctl(fileno(p), sptr, cmd)
    137 #define	min(a, b)		(((a) < (b)) ? (a) : (b))
    138 
    139 #define	TRUE	1
    140 #define	FALSE	0
    141 #define	FAILURE	-1
    142 
    143 #define	UT_LINE_SZ	32	/* Size of a utmpx ut_line field */
    144 
    145 /*
    146  * SLEEPTIME	The number of seconds "init" sleeps between wakeups if
    147  *		nothing else requires this "init" wakeup.
    148  */
    149 #define	SLEEPTIME	(5 * 60)
    150 
    151 /*
    152  * MAXCMDL	The maximum length of a command string in inittab.
    153  */
    154 #define	MAXCMDL	512
    155 
    156 /*
    157  * EXEC		The length of the prefix string added to all comamnds
    158  *		found in inittab.
    159  */
    160 #define	EXEC	(sizeof ("exec ") - 1)
    161 
    162 /*
    163  * TWARN	The amount of time between warning signal, SIGTERM,
    164  *		and the fatal kill signal, SIGKILL.
    165  */
    166 #define	TWARN	5
    167 
    168 #define	id_eq(x, y)	((x[0] == y[0] && x[1] == y[1] && x[2] == y[2] &&\
    169 			x[3] == y[3]) ? TRUE : FALSE)
    170 
    171 /*
    172  * The kernel's default umask is 022 these days; since some processes inherit
    173  * their umask from init, init will set it from CMASK in /etc/default/init.
    174  * init gets the default umask from the kernel, it sets it to 022 whenever
    175  * it wants to create a file and reverts to CMASK afterwards.
    176  */
    177 
    178 static int cmask;
    179 
    180 /*
    181  * The following definitions, concluding with the 'lvls' array, provide a
    182  * common mapping between level-name (like 'S'), signal number (state),
    183  * run-level mask, and specific properties associated with a run-level.
    184  * This array should be accessed using the routines lvlname_to_state(),
    185  * lvlname_to_mask(), state_to_mask(), and state_to_flags().
    186  */
    187 
    188 /*
    189  * Correspondence of signals to init actions.
    190  */
    191 #define	LVLQ		SIGHUP
    192 #define	LVL0		SIGINT
    193 #define	LVL1		SIGQUIT
    194 #define	LVL2		SIGILL
    195 #define	LVL3		SIGTRAP
    196 #define	LVL4		SIGIOT
    197 #define	LVL5		SIGEMT
    198 #define	LVL6		SIGFPE
    199 #define	SINGLE_USER	SIGBUS
    200 #define	LVLa		SIGSEGV
    201 #define	LVLb		SIGSYS
    202 #define	LVLc		SIGPIPE
    203 
    204 /*
    205  * Bit Mask for each level.  Used to determine legal levels.
    206  */
    207 #define	MASK0	0x0001
    208 #define	MASK1	0x0002
    209 #define	MASK2	0x0004
    210 #define	MASK3	0x0008
    211 #define	MASK4	0x0010
    212 #define	MASK5	0x0020
    213 #define	MASK6	0x0040
    214 #define	MASKSU	0x0080
    215 #define	MASKa	0x0100
    216 #define	MASKb	0x0200
    217 #define	MASKc	0x0400
    218 
    219 #define	MASK_NUMERIC (MASK0 | MASK1 | MASK2 | MASK3 | MASK4 | MASK5 | MASK6)
    220 #define	MASK_abc (MASKa | MASKb | MASKc)
    221 
    222 /*
    223  * Flags to indicate properties of various states.
    224  */
    225 #define	LSEL_RUNLEVEL	0x0001	/* runlevels you can transition to */
    226 
    227 typedef struct lvl {
    228 	int	lvl_state;
    229 	int	lvl_mask;
    230 	char	lvl_name;
    231 	int	lvl_flags;
    232 } lvl_t;
    233 
    234 static lvl_t lvls[] = {
    235 	{ LVLQ,		0,	'Q', 0					},
    236 	{ LVLQ,		0,	'q', 0					},
    237 	{ LVL0,		MASK0,	'0', LSEL_RUNLEVEL			},
    238 	{ LVL1, 	MASK1,	'1', LSEL_RUNLEVEL			},
    239 	{ LVL2, 	MASK2,	'2', LSEL_RUNLEVEL			},
    240 	{ LVL3, 	MASK3,	'3', LSEL_RUNLEVEL			},
    241 	{ LVL4, 	MASK4,	'4', LSEL_RUNLEVEL			},
    242 	{ LVL5, 	MASK5,	'5', LSEL_RUNLEVEL			},
    243 	{ LVL6, 	MASK6, 	'6', LSEL_RUNLEVEL			},
    244 	{ SINGLE_USER, 	MASKSU, 'S', LSEL_RUNLEVEL			},
    245 	{ SINGLE_USER, 	MASKSU, 's', LSEL_RUNLEVEL			},
    246 	{ LVLa,		MASKa,	'a', 0					},
    247 	{ LVLb,		MASKb,	'b', 0					},
    248 	{ LVLc,		MASKc,	'c', 0					}
    249 };
    250 
    251 #define	LVL_NELEMS (sizeof (lvls) / sizeof (lvl_t))
    252 
    253 /*
    254  * Legal action field values.
    255  */
    256 #define	OFF		0	/* Kill process if on, else ignore */
    257 #define	RESPAWN		1	/* Continuously restart process when it dies */
    258 #define	ONDEMAND	RESPAWN	/* Respawn for a, b, c type processes */
    259 #define	ONCE		2	/* Start process, do not respawn when dead */
    260 #define	WAIT		3	/* Perform once and wait to complete */
    261 #define	BOOT		4	/* Start at boot time only */
    262 #define	BOOTWAIT	5	/* Start at boot time and wait to complete */
    263 #define	POWERFAIL	6	/* Start on powerfail */
    264 #define	POWERWAIT	7	/* Start and wait for complete on powerfail */
    265 #define	INITDEFAULT	8	/* Default level "init" should start at */
    266 #define	SYSINIT		9	/* Actions performed before init speaks */
    267 
    268 #define	M_OFF		0001
    269 #define	M_RESPAWN	0002
    270 #define	M_ONDEMAND	M_RESPAWN
    271 #define	M_ONCE		0004
    272 #define	M_WAIT		0010
    273 #define	M_BOOT		0020
    274 #define	M_BOOTWAIT	0040
    275 #define	M_PF		0100
    276 #define	M_PWAIT		0200
    277 #define	M_INITDEFAULT	0400
    278 #define	M_SYSINIT	01000
    279 
    280 /* States for the inittab parser in getcmd(). */
    281 #define	ID	1
    282 #define	LEVELS	2
    283 #define	ACTION	3
    284 #define	COMMAND	4
    285 #define	COMMENT	5
    286 
    287 /*
    288  * inittab entry id constants
    289  */
    290 #define	INITTAB_ENTRY_ID_SIZE 4
    291 #define	INITTAB_ENTRY_ID_STR_FORMAT "%.4s"	/* if INITTAB_ENTRY_ID_SIZE */
    292 						/* changes, this should */
    293 						/* change accordingly */
    294 
    295 /*
    296  * Init can be in any of three main states, "normal" mode where it is
    297  * processing entries for the lines file in a normal fashion, "boot" mode,
    298  * where it is only interested in the boot actions, and "powerfail" mode,
    299  * where it is only interested in powerfail related actions. The following
    300  * masks declare the legal actions for each mode.
    301  */
    302 #define	NORMAL_MODES	(M_OFF | M_RESPAWN | M_ONCE | M_WAIT)
    303 #define	BOOT_MODES	(M_BOOT | M_BOOTWAIT)
    304 #define	PF_MODES	(M_PF | M_PWAIT)
    305 
    306 struct PROC_TABLE {
    307 	char	p_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
    308 						/* process */
    309 	pid_t	p_pid;		/* Process id */
    310 	short	p_count;	/* How many respawns of this command in */
    311 				/*   the current series */
    312 	long	p_time;		/* Start time for a series of respawns */
    313 	short	p_flags;
    314 	short	p_exit;		/* Exit status of a process which died */
    315 };
    316 
    317 /*
    318  * Flags for the "p_flags" word of a PROC_TABLE entry:
    319  *
    320  *	OCCUPIED	This slot in init's proc table is in use.
    321  *
    322  *	LIVING		Process is alive.
    323  *
    324  *	NOCLEANUP	efork() is not allowed to cleanup this entry even
    325  *			if process is dead.
    326  *
    327  *	NAMED		This process has a name, i.e. came from inittab.
    328  *
    329  *	DEMANDREQUEST	Process started by a "telinit [abc]" command.  Processes
    330  *			formed this way are respawnable and immune to level
    331  *			changes as long as their entry exists in inittab.
    332  *
    333  *	TOUCHED		Flag used by remv() to determine whether it has looked
    334  *			at an entry while checking for processes to be killed.
    335  *
    336  *	WARNED		Flag used by remv() to mark processes that have been
    337  *			sent the SIGTERM signal.  If they don't die in 5
    338  *			seconds, they are sent the SIGKILL signal.
    339  *
    340  *	KILLED		Flag used by remv() to mark procs that have been sent
    341  *			the SIGTERM and SIGKILL signals.
    342  *
    343  *	PF_MASK		Bitwise or of legal flags, for sanity checking.
    344  */
    345 #define	OCCUPIED	01
    346 #define	LIVING		02
    347 #define	NOCLEANUP	04
    348 #define	NAMED		010
    349 #define	DEMANDREQUEST	020
    350 #define	TOUCHED		040
    351 #define	WARNED		0100
    352 #define	KILLED		0200
    353 #define	PF_MASK		0377
    354 
    355 /*
    356  * Respawn limits for processes that are to be respawned:
    357  *
    358  *	SPAWN_INTERVAL	The number of seconds over which "init" will try to
    359  *			respawn a process SPAWN_LIMIT times before it gets mad.
    360  *
    361  *	SPAWN_LIMIT	The number of respawns "init" will attempt in
    362  *			SPAWN_INTERVAL seconds before it generates an
    363  *			error message and inhibits further tries for
    364  *			INHIBIT seconds.
    365  *
    366  *	INHIBIT		The number of seconds "init" ignores an entry it had
    367  *			trouble spawning unless a "telinit Q" is received.
    368  */
    369 
    370 #define	SPAWN_INTERVAL	(2*60)
    371 #define	SPAWN_LIMIT	10
    372 #define	INHIBIT		(5*60)
    373 
    374 /*
    375  * The maximum number of decimal digits for an id_t.  (ceil(log10 (max_id)))
    376  */
    377 #define	ID_MAX_STR_LEN	10
    378 
    379 #define	NULLPROC	((struct PROC_TABLE *)(0))
    380 #define	NO_ROOM		((struct PROC_TABLE *)(FAILURE))
    381 
    382 struct CMD_LINE {
    383 	char c_id[INITTAB_ENTRY_ID_SIZE];	/* Four letter unique id of */
    384 						/* process to be affected by */
    385 						/* action */
    386 	short c_levels;	/* Mask of legal levels for process */
    387 	short c_action;	/* Mask for type of action required */
    388 	char *c_command; /* Pointer to init command */
    389 };
    390 
    391 struct	pidrec {
    392 	int	pd_type;	/* Command type */
    393 	pid_t	pd_pid;		/* pid to add or remove */
    394 };
    395 
    396 /*
    397  * pd_type's
    398  */
    399 #define	ADDPID	1
    400 #define	REMPID	2
    401 
    402 static struct	pidlist {
    403 	pid_t	pl_pid;		/* pid to watch for */
    404 	int	pl_dflag;	/* Flag indicating SIGCLD from this pid */
    405 	short	pl_exit;	/* Exit status of proc */
    406 	struct	pidlist	*pl_next; /* Next in list */
    407 } *Plhead, *Plfree;
    408 
    409 /*
    410  * The following structure contains a set of modes for /dev/syscon
    411  * and should match the default contents of /etc/ioctl.syscon.  It should also
    412  * be kept in-sync with base_termios in uts/common/io/ttcompat.c.
    413  */
    414 static struct termios	dflt_termios = {
    415 	BRKINT|ICRNL|IXON|IMAXBEL,			/* iflag */
    416 	OPOST|ONLCR|TAB3,				/* oflag */
    417 	CS8|CREAD|B9600,				/* cflag */
    418 	ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOCTL|ECHOKE|IEXTEN, /* lflag */
    419 	CINTR, CQUIT, CERASE, CKILL, CEOF, 0, 0, 0,
    420 	0, 0, 0, 0, 0, 0, 0, 0,
    421 	0, 0, 0
    422 };
    423 
    424 static struct termios	stored_syscon_termios;
    425 static int		write_ioctl = 0;	/* Rewrite /etc/ioctl.syscon */
    426 
    427 static union WAKEUP {
    428 	struct WAKEFLAGS {
    429 		unsigned w_usersignal : 1;	/* User sent signal to "init" */
    430 		unsigned w_childdeath : 1;	/* An "init" child died */
    431 		unsigned w_powerhit : 1;	/* OS experienced powerfail */
    432 	}	w_flags;
    433 	int w_mask;
    434 } wakeup;
    435 
    436 
    437 struct init_state {
    438 	int			ist_runlevel;
    439 	int			ist_num_proc;
    440 	int			ist_utmpx_ok;
    441 	struct PROC_TABLE	ist_proc_table[1];
    442 };
    443 
    444 #define	cur_state	(g_state->ist_runlevel)
    445 #define	num_proc	(g_state->ist_num_proc)
    446 #define	proc_table	(g_state->ist_proc_table)
    447 #define	utmpx_ok	(g_state->ist_utmpx_ok)
    448 
    449 /* Contract cookies. */
    450 #define	ORDINARY_COOKIE		0
    451 #define	STARTD_COOKIE		1
    452 
    453 
    454 #ifndef NDEBUG
    455 #define	bad_error(func, err)	{					\
    456 	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
    457 	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
    458 	abort();							\
    459 }
    460 #else
    461 #define	bad_error(func, err)	abort()
    462 #endif
    463 
    464 
    465 /*
    466  * Useful file and device names.
    467  */
    468 static char *CONSOLE	  = "/dev/console";	/* Real system console */
    469 static char *INITPIPE_DIR = "/var/run";
    470 static char *INITPIPE	  = "/var/run/initpipe";
    471 
    472 #define	INIT_STATE_DIR "/etc/svc/volatile"
    473 static const char * const init_state_file = INIT_STATE_DIR "/init.state";
    474 static const char * const init_next_state_file =
    475 	INIT_STATE_DIR "/init-next.state";
    476 
    477 static const int init_num_proc = 20;	/* Initial size of process table. */
    478 
    479 static char *UTMPX	 = UTMPX_FILE;		/* Snapshot record file */
    480 static char *WTMPX	 = WTMPX_FILE;		/* Long term record file */
    481 static char *INITTAB	 = "/etc/inittab";	/* Script file for "init" */
    482 static char *SYSTTY	 = "/dev/systty";	/* System Console */
    483 static char *SYSCON	 = "/dev/syscon";	/* Virtual System console */
    484 static char *IOCTLSYSCON = "/etc/ioctl.syscon";	/* Last syscon modes */
    485 static char *ENVFILE	 = "/etc/default/init";	/* Default env. */
    486 static char *SU	= "/etc/sulogin";	/* Super-user program for single user */
    487 static char *SH	= "/sbin/sh";		/* Standard shell */
    488 
    489 /*
    490  * Default Path.  /sbin is included in path only during sysinit phase
    491  */
    492 #define	DEF_PATH	"PATH=/usr/sbin:/usr/bin"
    493 #define	INIT_PATH	"PATH=/sbin:/usr/sbin:/usr/bin"
    494 
    495 static int	prior_state;
    496 static int	prev_state;	/* State "init" was in last time it woke */
    497 static int	new_state;	/* State user wants "init" to go to. */
    498 static int	lvlq_received;	/* Explicit request to examine state */
    499 static int	op_modes = BOOT_MODES; /* Current state of "init" */
    500 static int	Gchild = 0;	/* Flag to indicate "godchild" died, set in */
    501 				/*   childeath() and cleared in cleanaux() */
    502 static int	Pfd = -1;	/* fd to receive pids thru */
    503 static unsigned int	spawncnt, pausecnt;
    504 static int	rsflag;		/* Set if a respawn has taken place */
    505 static volatile int time_up;	/* Flag set to TRUE by the alarm interrupt */
    506 				/* routine each time an alarm interrupt */
    507 				/* takes place. */
    508 static int	sflg = 0;	/* Set if we were booted -s to single user */
    509 static int	rflg = 0;	/* Set if booted -r, reconfigure devices */
    510 static int	bflg = 0;	/* Set if booted -b, don't run rc scripts */
    511 static pid_t	init_pid;	/* PID of "one true" init for current zone */
    512 
    513 static struct init_state *g_state = NULL;
    514 static size_t	g_state_sz;
    515 static int	booting = 1;	/* Set while we're booting. */
    516 
    517 /*
    518  * Array for default global environment.
    519  */
    520 #define	MAXENVENT	24	/* Max number of default env variables + 1 */
    521 				/* init can use three itself, so this leaves */
    522 				/* 20 for the administrator in ENVFILE. */
    523 static char	*glob_envp[MAXENVENT];	/* Array of environment strings */
    524 static int	glob_envn;		/* Number of environment strings */
    525 
    526 
    527 static struct pollfd	poll_fds[1];
    528 static int		poll_nfds = 0;	/* poll_fds is uninitialized */
    529 
    530 /*
    531  * Contracts constants
    532  */
    533 #define	SVC_INIT_PREFIX "init:/"
    534 #define	SVC_AUX_SIZE (INITTAB_ENTRY_ID_SIZE + 1)
    535 #define	SVC_FMRI_SIZE (sizeof (SVC_INIT_PREFIX) + INITTAB_ENTRY_ID_SIZE)
    536 
    537 static int	legacy_tmpl = -1;	/* fd for legacy contract template */
    538 static int	startd_tmpl = -1;	/* fd for svc.startd's template */
    539 static char	startd_svc_aux[SVC_AUX_SIZE];
    540 
    541 static char	startd_cline[256] = "";	/* svc.startd's command line */
    542 static int	do_restart_startd = 1;	/* Whether to restart svc.startd. */
    543 static char	*smf_options = NULL;	/* Options to give to startd. */
    544 static int	smf_debug = 0;		/* Messages for debugging smf(5) */
    545 static time_t	init_boot_time;		/* Substitute for kernel boot time. */
    546 
    547 #define	NSTARTD_FAILURE_TIMES	3		/* trigger after 3 failures */
    548 #define	STARTD_FAILURE_RATE_NS	5000000000LL	/* 1 failure/5 seconds */
    549 
    550 static hrtime_t	startd_failure_time[NSTARTD_FAILURE_TIMES];
    551 static uint_t	startd_failure_index;
    552 
    553 
    554 static char	*prog_name(char *);
    555 static int	state_to_mask(int);
    556 static int	lvlname_to_mask(char, int *);
    557 static void	lscf_set_runlevel(char);
    558 static int	state_to_flags(int);
    559 static char	state_to_name(int);
    560 static int	lvlname_to_state(char);
    561 static int	getcmd(struct CMD_LINE *, char *);
    562 static int	realcon();
    563 static int	spawn_processes();
    564 static int	get_ioctl_syscon();
    565 static int	account(short, struct PROC_TABLE *, char *);
    566 static void	alarmclk();
    567 static void	childeath(int);
    568 static void	cleanaux();
    569 static void	clearent(pid_t, short);
    570 static void	console(boolean_t, char *, ...);
    571 static void	init_signals(void);
    572 static void	setup_pipe();
    573 static void	killproc(pid_t);
    574 static void	init_env();
    575 static void	boot_init();
    576 static void	powerfail();
    577 static void	remv();
    578 static void	write_ioctl_syscon();
    579 static void	spawn(struct PROC_TABLE *, struct CMD_LINE *);
    580 static void	setimer(int);
    581 static void	siglvl(int, siginfo_t *, ucontext_t *);
    582 static void	sigpoll(int);
    583 static void	enter_maintenance(void);
    584 static void	timer(int);
    585 static void	userinit(int, char **);
    586 static void	notify_pam_dead(struct utmpx *);
    587 static long	waitproc(struct PROC_TABLE *);
    588 static struct PROC_TABLE *efork(int, struct PROC_TABLE *, int);
    589 static struct PROC_TABLE *findpslot(struct CMD_LINE *);
    590 static void	increase_proc_table_size();
    591 static void	st_init();
    592 static void	st_write();
    593 static void	contracts_init();
    594 static void	contract_event(struct pollfd *);
    595 static int	startd_run(const char *, int, ctid_t);
    596 static void	startd_record_failure();
    597 static int	startd_failure_rate_critical();
    598 static char	*audit_boot_msg();
    599 static int	audit_put_record(int, int, char *);
    600 static void	update_boot_archive(int new_state);
    601 
    602 int
    603 main(int argc, char *argv[])
    604 {
    605 	int	chg_lvl_flag = FALSE, print_banner = FALSE;
    606 	int	may_need_audit = 1;
    607 	int	c;
    608 	char	*msg;
    609 
    610 	/* Get a timestamp for use as boot time, if needed. */
    611 	(void) time(&init_boot_time);
    612 
    613 	/* Get the default umask */
    614 	cmask = umask(022);
    615 	(void) umask(cmask);
    616 
    617 	/* Parse the arguments to init. Check for single user */
    618 	opterr = 0;
    619 	while ((c = getopt(argc, argv, "brsm:")) != EOF) {
    620 		switch (c) {
    621 		case 'b':
    622 			rflg = 0;
    623 			bflg = 1;
    624 			if (!sflg)
    625 				sflg++;
    626 			break;
    627 		case 'r':
    628 			bflg = 0;
    629 			rflg++;
    630 			break;
    631 		case 's':
    632 			if (!bflg)
    633 				sflg++;
    634 			break;
    635 		case 'm':
    636 			smf_options = optarg;
    637 			smf_debug = (strstr(smf_options, "debug") != NULL);
    638 			break;
    639 		}
    640 	}
    641 
    642 	/*
    643 	 * Determine if we are the main init, or a user invoked init, whose job
    644 	 * it is to inform init to change levels or perform some other action.
    645 	 */
    646 	if (zone_getattr(getzoneid(), ZONE_ATTR_INITPID, &init_pid,
    647 	    sizeof (init_pid)) != sizeof (init_pid)) {
    648 		(void) fprintf(stderr, "could not get pid for init\n");
    649 		return (1);
    650 	}
    651 
    652 	/*
    653 	 * If this PID is not the same as the "true" init for the zone, then we
    654 	 * must be in 'user' mode.
    655 	 */
    656 	if (getpid() != init_pid) {
    657 		userinit(argc, argv);
    658 	}
    659 
    660 	if (getzoneid() != GLOBAL_ZONEID) {
    661 		print_banner = TRUE;
    662 	}
    663 
    664 	/*
    665 	 * Initialize state (and set "booting").
    666 	 */
    667 	st_init();
    668 
    669 	if (booting && print_banner) {
    670 		struct utsname un;
    671 		char buf[BUFSIZ], *isa;
    672 		long ret;
    673 		int bits = 32;
    674 
    675 		/*
    676 		 * We want to print the boot banner as soon as
    677 		 * possible.  In the global zone, the kernel does it,
    678 		 * but we do not have that luxury in non-global zones,
    679 		 * so we will print it here.
    680 		 */
    681 		(void) uname(&un);
    682 		ret = sysinfo(SI_ISALIST, buf, sizeof (buf));
    683 		if (ret != -1L && ret <= sizeof (buf)) {
    684 			for (isa = strtok(buf, " "); isa;
    685 			    isa = strtok(NULL, " ")) {
    686 				if (strcmp(isa, "sparcv9") == 0 ||
    687 				    strcmp(isa, "amd64") == 0) {
    688 					bits = 64;
    689 					break;
    690 				}
    691 			}
    692 		}
    693 
    694 		console(B_FALSE,
    695 		    "\n\n%s Release %s Version %s %d-bit\r\n",
    696 		    un.sysname, un.release, un.version, bits);
    697 		console(B_FALSE,
    698 		    "Copyright 1983-2010 Sun Microsystems, Inc. "
    699 		    " All rights reserved.\r\n");
    700 		console(B_FALSE,
    701 		    "Use is subject to license terms.\r\n");
    702 	}
    703 
    704 	/*
    705 	 * Get the ioctl settings for /dev/syscon from /etc/ioctl.syscon
    706 	 * so that it can be brought up in the state it was in when the
    707 	 * system went down; or set to defaults if ioctl.syscon isn't
    708 	 * valid.
    709 	 *
    710 	 * This needs to be done even if we're restarting so reset_modes()
    711 	 * will work in case we need to go down to single user mode.
    712 	 */
    713 	write_ioctl = get_ioctl_syscon();
    714 
    715 	/*
    716 	 * Set up all signals to be caught or ignored as appropriate.
    717 	 */
    718 	init_signals();
    719 
    720 	/* Load glob_envp from ENVFILE. */
    721 	init_env();
    722 
    723 	contracts_init();
    724 
    725 	if (!booting) {
    726 		/* cur_state should have been read in. */
    727 
    728 		op_modes = NORMAL_MODES;
    729 
    730 		/* Rewrite the ioctl file if it was bad. */
    731 		if (write_ioctl)
    732 			write_ioctl_syscon();
    733 	} else {
    734 		/*
    735 		 * It's fine to boot up with state as zero, because
    736 		 * startd will later tell us the real state.
    737 		 */
    738 		cur_state = 0;
    739 		op_modes = BOOT_MODES;
    740 
    741 		boot_init();
    742 	}
    743 
    744 	prev_state = prior_state = cur_state;
    745 
    746 	setup_pipe();
    747 
    748 	/*
    749 	 * Here is the beginning of the main process loop.
    750 	 */
    751 	for (;;) {
    752 		if (lvlq_received) {
    753 			setup_pipe();
    754 			lvlq_received = B_FALSE;
    755 		}
    756 
    757 		/*
    758 		 * Clean up any accounting records for dead "godchildren".
    759 		 */
    760 		if (Gchild)
    761 			cleanaux();
    762 
    763 		/*
    764 		 * If in "normal" mode, check all living processes and initiate
    765 		 * kill sequence on those that should not be there anymore.
    766 		 */
    767 		if (op_modes == NORMAL_MODES && cur_state != LVLa &&
    768 		    cur_state != LVLb && cur_state != LVLc)
    769 			remv();
    770 
    771 		/*
    772 		 * If a change in run levels is the reason we awoke, now do
    773 		 * the accounting to report the change in the utmp file.
    774 		 * Also report the change on the system console.
    775 		 */
    776 		if (chg_lvl_flag) {
    777 			chg_lvl_flag = FALSE;
    778 
    779 			if (state_to_flags(cur_state) & LSEL_RUNLEVEL) {
    780 				char rl = state_to_name(cur_state);
    781 
    782 				if (rl != -1)
    783 					lscf_set_runlevel(rl);
    784 			}
    785 
    786 			may_need_audit = 1;
    787 		}
    788 
    789 		/*
    790 		 * Scan the inittab file and spawn and respawn processes that
    791 		 * should be alive in the current state. If inittab does not
    792 		 * exist default to  single user mode.
    793 		 */
    794 		if (spawn_processes() == FAILURE) {
    795 			prior_state = prev_state;
    796 			cur_state = SINGLE_USER;
    797 		}
    798 
    799 		/* If any respawns occurred, take note. */
    800 		if (rsflag) {
    801 			rsflag = 0;
    802 			spawncnt++;
    803 		}
    804 
    805 		/*
    806 		 * If a powerfail signal was received during the last
    807 		 * sequence, set mode to powerfail.  When spawn_processes() is
    808 		 * entered the first thing it does is to check "powerhit".  If
    809 		 * it is in PF_MODES then it clears "powerhit" and does
    810 		 * a powerfail sequence.  If it is not in PF_MODES, then it
    811 		 * puts itself in PF_MODES and then clears "powerhit".  Should
    812 		 * "powerhit" get set again while spawn_processes() is working
    813 		 * on a powerfail sequence, the following code  will see that
    814 		 * spawn_processes() tries to execute the powerfail sequence
    815 		 * again.  This guarantees that the powerfail sequence will be
    816 		 * successfully completed before further processing takes
    817 		 * place.
    818 		 */
    819 		if (wakeup.w_flags.w_powerhit) {
    820 			op_modes = PF_MODES;
    821 			/*
    822 			 * Make sure that cur_state != prev_state so that
    823 			 * ONCE and WAIT types work.
    824 			 */
    825 			prev_state = 0;
    826 		} else if (op_modes != NORMAL_MODES) {
    827 			/*
    828 			 * If spawn_processes() was not just called while in
    829 			 * normal mode, we set the mode to normal and it will
    830 			 * be called again to check normal modes.  If we have
    831 			 * just finished a powerfail sequence with prev_state
    832 			 * equal to zero, we set prev_state equal to cur_state
    833 			 * before the next pass through.
    834 			 */
    835 			if (op_modes == PF_MODES)
    836 				prev_state = cur_state;
    837 			op_modes = NORMAL_MODES;
    838 		} else if (cur_state == LVLa || cur_state == LVLb ||
    839 		    cur_state == LVLc) {
    840 			/*
    841 			 * If it was a change of levels that awakened us and the
    842 			 * new level is one of the demand levels then reset
    843 			 * cur_state to the previous state and do another scan
    844 			 * to take care of the usual respawn actions.
    845 			 */
    846 			cur_state = prior_state;
    847 			prior_state = prev_state;
    848 			prev_state = cur_state;
    849 		} else {
    850 			prev_state = cur_state;
    851 
    852 			if (wakeup.w_mask == 0) {
    853 				int ret;
    854 
    855 				if (may_need_audit && (cur_state == LVL3)) {
    856 					msg = audit_boot_msg();
    857 
    858 					may_need_audit = 0;
    859 					(void) audit_put_record(ADT_SUCCESS,
    860 					    ADT_SUCCESS, msg);
    861 					free(msg);
    862 				}
    863 
    864 				/*
    865 				 * "init" is finished with all actions for
    866 				 * the current wakeup.
    867 				 */
    868 				ret = poll(poll_fds, poll_nfds,
    869 				    SLEEPTIME * MILLISEC);
    870 				pausecnt++;
    871 				if (ret > 0)
    872 					contract_event(&poll_fds[0]);
    873 				else if (ret < 0 && errno != EINTR)
    874 					console(B_TRUE, "poll() error: %s\n",
    875 					    strerror(errno));
    876 			}
    877 
    878 			if (wakeup.w_flags.w_usersignal) {
    879 				/*
    880 				 * Install the new level.  This could be a real
    881 				 * change in levels  or a telinit [Q|a|b|c] or
    882 				 * just a telinit to the same level at which
    883 				 * we are running.
    884 				 */
    885 				if (new_state != cur_state) {
    886 					if (new_state == LVLa ||
    887 					    new_state == LVLb ||
    888 					    new_state == LVLc) {
    889 						prev_state = prior_state;
    890 						prior_state = cur_state;
    891 						cur_state = new_state;
    892 					} else {
    893 						prev_state = cur_state;
    894 						if (cur_state >= 0)
    895 							prior_state = cur_state;
    896 						cur_state = new_state;
    897 						chg_lvl_flag = TRUE;
    898 					}
    899 				}
    900 
    901 				new_state = 0;
    902 			}
    903 
    904 			if (wakeup.w_flags.w_powerhit)
    905 				op_modes = PF_MODES;
    906 
    907 			/*
    908 			 * Clear all wakeup reasons.
    909 			 */
    910 			wakeup.w_mask = 0;
    911 		}
    912 	}
    913 
    914 	/*NOTREACHED*/
    915 }
    916 
    917 static void
    918 update_boot_archive(int new_state)
    919 {
    920 	if (new_state != LVL0 && new_state != LVL5 && new_state != LVL6)
    921 		return;
    922 
    923 	if (getzoneid() != GLOBAL_ZONEID)
    924 		return;
    925 
    926 	(void) system("/sbin/bootadm -ea update_all");
    927 }
    928 
    929 /*
    930  * void enter_maintenance()
    931  *   A simple invocation of sulogin(1M), with no baggage, in the case that we
    932  *   are unable to activate svc.startd(1M).  We fork; the child runs sulogin;
    933  *   we wait for it to exit.
    934  */
    935 static void
    936 enter_maintenance()
    937 {
    938 	struct PROC_TABLE	*su_process;
    939 
    940 	console(B_FALSE, "Requesting maintenance mode\n"
    941 	    "(See /lib/svc/share/README for additional information.)\n");
    942 	(void) sighold(SIGCLD);
    943 	while ((su_process = efork(M_OFF, NULLPROC, NOCLEANUP)) == NO_ROOM)
    944 		(void) pause();
    945 	(void) sigrelse(SIGCLD);
    946 	if (su_process == NULLPROC) {
    947 		int fd;
    948 
    949 		(void) fclose(stdin);
    950 		(void) fclose(stdout);
    951 		(void) fclose(stderr);
    952 		closefrom(0);
    953 
    954 		fd = open(SYSCON, O_RDWR | O_NOCTTY);
    955 		if (fd >= 0) {
    956 			(void) dup2(fd, 1);
    957 			(void) dup2(fd, 2);
    958 		} else {
    959 			/*
    960 			 * Need to issue an error message somewhere.
    961 			 */
    962 			syslog(LOG_CRIT, "init[%d]: cannot open %s; %s\n",
    963 			    getpid(), SYSCON, strerror(errno));
    964 		}
    965 
    966 		/*
    967 		 * Execute the "su" program.
    968 		 */
    969 		(void) execle(SU, SU, "-", (char *)0, glob_envp);
    970 		console(B_TRUE, "execle of %s failed: %s\n", SU,
    971 		    strerror(errno));
    972 		timer(5);
    973 		exit(1);
    974 	}
    975 
    976 	/*
    977 	 * If we are the parent, wait around for the child to die
    978 	 * or for "init" to be signaled to change levels.
    979 	 */
    980 	while (waitproc(su_process) == FAILURE) {
    981 		/*
    982 		 * All other reasons for waking are ignored when in
    983 		 * single-user mode.  The only child we are interested
    984 		 * in is being waited for explicitly by waitproc().
    985 		 */
    986 		wakeup.w_mask = 0;
    987 	}
    988 }
    989 
    990 /*
    991  * remv() scans through "proc_table" and performs cleanup.  If
    992  * there is a process in the table, which shouldn't be here at
    993  * the current run level, then remv() kills the process.
    994  */
    995 static void
    996 remv()
    997 {
    998 	struct PROC_TABLE	*process;
    999 	struct CMD_LINE		cmd;
   1000 	char			cmd_string[MAXCMDL];
   1001 	int			change_level;
   1002 
   1003 	change_level = (cur_state != prev_state ? TRUE : FALSE);
   1004 
   1005 	/*
   1006 	 * Clear the TOUCHED flag on all entries so that when we have
   1007 	 * finished scanning inittab, we will be able to tell if we
   1008 	 * have any processes for which there is no entry in inittab.
   1009 	 */
   1010 	for (process = proc_table;
   1011 	    (process < proc_table + num_proc); process++) {
   1012 		process->p_flags &= ~TOUCHED;
   1013 	}
   1014 
   1015 	/*
   1016 	 * Scan all inittab entries.
   1017 	 */
   1018 	while (getcmd(&cmd, &cmd_string[0]) == TRUE) {
   1019 		/* Scan for process which goes with this entry in inittab. */
   1020 		for (process = proc_table;
   1021 		    (process < proc_table + num_proc); process++) {
   1022 			if ((process->p_flags & OCCUPIED) == 0 ||
   1023 			    !id_eq(process->p_id, cmd.c_id))
   1024 				continue;
   1025 
   1026 			/*
   1027 			 * This slot contains the process we are looking for.
   1028 			 */
   1029 
   1030 			/*
   1031 			 * Is the cur_state SINGLE_USER or is this process
   1032 			 * marked as "off" or was this proc started by some
   1033 			 * mechanism other than LVL{a|b|c} and the current level
   1034 			 * does not support this process?
   1035 			 */
   1036 			if (cur_state == SINGLE_USER ||
   1037 			    cmd.c_action == M_OFF ||
   1038 			    ((cmd.c_levels & state_to_mask(cur_state)) == 0 &&
   1039 			    (process->p_flags & DEMANDREQUEST) == 0)) {
   1040 				if (process->p_flags & LIVING) {
   1041 					/*
   1042 					 * Touch this entry so we know we have
   1043 					 * treated it.  Note that procs which
   1044 					 * are already dead at this point and
   1045 					 * should not be restarted are left
   1046 					 * untouched.  This causes their slot to
   1047 					 * be freed later after dead accounting
   1048 					 * is done.
   1049 					 */
   1050 					process->p_flags |= TOUCHED;
   1051 
   1052 					if ((process->p_flags & KILLED) == 0) {
   1053 						if (change_level) {
   1054 							process->p_flags
   1055 							    |= WARNED;
   1056 							(void) kill(
   1057 							    process->p_pid,
   1058 							    SIGTERM);
   1059 						} else {
   1060 							/*
   1061 							 * Fork a killing proc
   1062 							 * so "init" can
   1063 							 * continue without
   1064 							 * having to pause for
   1065 							 * TWARN seconds.
   1066 							 */
   1067 							killproc(
   1068 							    process->p_pid);
   1069 						}
   1070 						process->p_flags |= KILLED;
   1071 					}
   1072 				}
   1073 			} else {
   1074 				/*
   1075 				 * Process can exist at current level.  If it is
   1076 				 * still alive or a DEMANDREQUEST we touch it so
   1077 				 * it will be left alone.  Otherwise we leave it
   1078 				 * untouched so it will be accounted for and
   1079 				 * cleaned up later in remv().  Dead
   1080 				 * DEMANDREQUESTs will be accounted but not
   1081 				 * freed.
   1082 				 */
   1083 				if (process->p_flags &
   1084 				    (LIVING|NOCLEANUP|DEMANDREQUEST))
   1085 					process->p_flags |= TOUCHED;
   1086 			}
   1087 
   1088 			break;
   1089 		}
   1090 	}
   1091 
   1092 	st_write();
   1093 
   1094 	/*
   1095 	 * If this was a change of levels call, scan through the
   1096 	 * process table for processes that were warned to die.  If any
   1097 	 * are found that haven't left yet, sleep for TWARN seconds and
   1098 	 * then send final terminations to any that haven't died yet.
   1099 	 */
   1100 	if (change_level) {
   1101 
   1102 		/*
   1103 		 * Set the alarm for TWARN seconds on the assumption
   1104 		 * that there will be some that need to be waited for.
   1105 		 * This won't harm anything except we are guaranteed to
   1106 		 * wakeup in TWARN seconds whether we need to or not.
   1107 		 */
   1108 		setimer(TWARN);
   1109 
   1110 		/*
   1111 		 * Scan for processes which should be dying.  We hope they
   1112 		 * will die without having to be sent a SIGKILL signal.
   1113 		 */
   1114 		for (process = proc_table;
   1115 		    (process < proc_table + num_proc); process++) {
   1116 			/*
   1117 			 * If this process should die, hasn't yet, and the
   1118 			 * TWARN time hasn't expired yet, wait for process
   1119 			 * to die or for timer to expire.
   1120 			 */
   1121 			while (time_up == FALSE &&
   1122 			    (process->p_flags & (WARNED|LIVING|OCCUPIED)) ==
   1123 			    (WARNED|LIVING|OCCUPIED))
   1124 				(void) pause();
   1125 
   1126 			if (time_up == TRUE)
   1127 				break;
   1128 		}
   1129 
   1130 		/*
   1131 		 * If we reached the end of the table without the timer
   1132 		 * expiring, then there are no procs which will have to be
   1133 		 * sent the SIGKILL signal.  If the timer has expired, then
   1134 		 * it is necessary to scan the table again and send signals
   1135 		 * to all processes which aren't going away nicely.
   1136 		 */
   1137 		if (time_up == TRUE) {
   1138 			for (process = proc_table;
   1139 			    (process < proc_table + num_proc); process++) {
   1140 				if ((process->p_flags &
   1141 				    (WARNED|LIVING|OCCUPIED)) ==
   1142 				    (WARNED|LIVING|OCCUPIED))
   1143 					(void) kill(process->p_pid, SIGKILL);
   1144 			}
   1145 		}
   1146 		setimer(0);
   1147 	}
   1148 
   1149 	/*
   1150 	 * Rescan the proc_table for two kinds of entry, those marked LIVING,
   1151 	 * NAMED, which don't have an entry in inittab (haven't been TOUCHED
   1152 	 * by the above scanning), and haven't been sent kill signals, and
   1153 	 * those entries marked not LIVING, NAMED.  The former procs are killed.
   1154 	 * The latter have DEAD_PROCESS accounting done and the slot cleared.
   1155 	 */
   1156 	for (process = proc_table;
   1157 	    (process < proc_table + num_proc); process++) {
   1158 		if ((process->p_flags & (LIVING|NAMED|TOUCHED|KILLED|OCCUPIED))
   1159 		    == (LIVING|NAMED|OCCUPIED)) {
   1160 			killproc(process->p_pid);
   1161 			process->p_flags |= KILLED;
   1162 		} else if ((process->p_flags & (LIVING|NAMED|OCCUPIED)) ==
   1163 		    (NAMED|OCCUPIED)) {
   1164 			(void) account(DEAD_PROCESS, process, NULL);
   1165 			/*
   1166 			 * If this named proc hasn't been TOUCHED, then free the
   1167 			 * space. It has either died of it's own accord, but
   1168 			 * isn't respawnable or it was killed because it
   1169 			 * shouldn't exist at this level.
   1170 			 */
   1171 			if ((process->p_flags & TOUCHED) == 0)
   1172 				process->p_flags = 0;
   1173 		}
   1174 	}
   1175 
   1176 	st_write();
   1177 }
   1178 
   1179 /*
   1180  * Extract the svc.startd command line and whether to restart it from its
   1181  * inittab entry.
   1182  */
   1183 /*ARGSUSED*/
   1184 static void
   1185 process_startd_line(struct CMD_LINE *cmd, char *cmd_string)
   1186 {
   1187 	size_t sz;
   1188 
   1189 	/* Save the command line. */
   1190 	if (sflg || rflg) {
   1191 		/* Also append -r or -s. */
   1192 		(void) strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
   1193 		(void) strlcat(startd_cline, " -", sizeof (startd_cline));
   1194 		if (sflg)
   1195 			sz = strlcat(startd_cline, "s", sizeof (startd_cline));
   1196 		if (rflg)
   1197 			sz = strlcat(startd_cline, "r", sizeof (startd_cline));
   1198 	} else {
   1199 		sz = strlcpy(startd_cline, cmd_string, sizeof (startd_cline));
   1200 	}
   1201 
   1202 	if (sz >= sizeof (startd_cline)) {
   1203 		console(B_TRUE,
   1204 		    "svc.startd command line too long.  Ignoring.\n");
   1205 		startd_cline[0] = '\0';
   1206 		return;
   1207 	}
   1208 }
   1209 
   1210 /*
   1211  * spawn_processes() scans inittab for entries which should be run at this
   1212  * mode.  Processes which should be running but are not, are started.
   1213  */
   1214 static int
   1215 spawn_processes()
   1216 {
   1217 	struct PROC_TABLE		*pp;
   1218 	struct CMD_LINE			cmd;
   1219 	char				cmd_string[MAXCMDL];
   1220 	short				lvl_mask;
   1221 	int				status;
   1222 
   1223 	/*
   1224 	 * First check the "powerhit" flag.  If it is set, make sure the modes
   1225 	 * are PF_MODES and clear the "powerhit" flag.  Avoid the possible race
   1226 	 * on the "powerhit" flag by disallowing a new powerfail interrupt
   1227 	 * between the test of the powerhit flag and the clearing of it.
   1228 	 */
   1229 	if (wakeup.w_flags.w_powerhit) {
   1230 		wakeup.w_flags.w_powerhit = 0;
   1231 		op_modes = PF_MODES;
   1232 	}
   1233 	lvl_mask = state_to_mask(cur_state);
   1234 
   1235 	/*
   1236 	 * Scan through all the entries in inittab.
   1237 	 */
   1238 	while ((status = getcmd(&cmd, &cmd_string[0])) == TRUE) {
   1239 		if (id_eq(cmd.c_id, "smf")) {
   1240 			process_startd_line(&cmd, cmd_string);
   1241 			continue;
   1242 		}
   1243 
   1244 retry_for_proc_slot:
   1245 
   1246 		/*
   1247 		 * Find out if there is a process slot for this entry already.
   1248 		 */
   1249 		if ((pp = findpslot(&cmd)) == NULLPROC) {
   1250 			/*
   1251 			 * we've run out of proc table entries
   1252 			 * increase proc_table.
   1253 			 */
   1254 			increase_proc_table_size();
   1255 
   1256 			/*
   1257 			 * Retry now as we have an empty proc slot.
   1258 			 * In case increase_proc_table_size() fails,
   1259 			 * we will keep retrying.
   1260 			 */
   1261 			goto retry_for_proc_slot;
   1262 		}
   1263 
   1264 		/*
   1265 		 * If there is an entry, and it is marked as DEMANDREQUEST,
   1266 		 * one of the levels a, b, or c is in its levels mask, and
   1267 		 * the action field is ONDEMAND and ONDEMAND is a permissable
   1268 		 * mode, and the process is dead, then respawn it.
   1269 		 */
   1270 		if (((pp->p_flags & (LIVING|DEMANDREQUEST)) == DEMANDREQUEST) &&
   1271 		    (cmd.c_levels & MASK_abc) &&
   1272 		    (cmd.c_action & op_modes) == M_ONDEMAND) {
   1273 			spawn(pp, &cmd);
   1274 			continue;
   1275 		}
   1276 
   1277 		/*
   1278 		 * If the action is not an action we are interested in,
   1279 		 * skip the entry.
   1280 		 */
   1281 		if ((cmd.c_action & op_modes) == 0 || pp->p_flags & LIVING ||
   1282 		    (cmd.c_levels & lvl_mask) == 0)
   1283 			continue;
   1284 
   1285 		/*
   1286 		 * If the modes are the normal modes (ONCE, WAIT, RESPAWN, OFF,
   1287 		 * ONDEMAND) and the action field is either OFF or the action
   1288 		 * field is ONCE or WAIT and the current level is the same as
   1289 		 * the last level, then skip this entry.  ONCE and WAIT only
   1290 		 * get run when the level changes.
   1291 		 */
   1292 		if (op_modes == NORMAL_MODES &&
   1293 		    (cmd.c_action == M_OFF ||
   1294 		    (cmd.c_action & (M_ONCE|M_WAIT)) &&
   1295 		    cur_state == prev_state))
   1296 			continue;
   1297 
   1298 		/*
   1299 		 * At this point we are interested in performing the action for
   1300 		 * this entry.  Actions fall into two categories, spinning off
   1301 		 * a process and not waiting, and spinning off a process and
   1302 		 * waiting for it to die.  If the action is ONCE, RESPAWN,
   1303 		 * ONDEMAND, POWERFAIL, or BOOT we don't wait for the process
   1304 		 * to die, for all other actions we do wait.
   1305 		 */
   1306 		if (cmd.c_action & (M_ONCE | M_RESPAWN | M_PF | M_BOOT)) {
   1307 			spawn(pp, &cmd);
   1308 
   1309 		} else {
   1310 			spawn(pp, &cmd);
   1311 			while (waitproc(pp) == FAILURE)
   1312 				;
   1313 			(void) account(DEAD_PROCESS, pp, NULL);
   1314 			pp->p_flags = 0;
   1315 		}
   1316 	}
   1317 	return (status);
   1318 }
   1319 
   1320 /*
   1321  * spawn() spawns a shell, inserts the information about the process
   1322  * process into the proc_table, and does the startup accounting.
   1323  */
   1324 static void
   1325 spawn(struct PROC_TABLE *process, struct CMD_LINE *cmd)
   1326 {
   1327 	int		i;
   1328 	int		modes, maxfiles;
   1329 	time_t		now;
   1330 	struct PROC_TABLE tmproc, *oprocess;
   1331 
   1332 	/*
   1333 	 * The modes to be sent to efork() are 0 unless we are
   1334 	 * spawning a LVLa, LVLb, or LVLc entry or we will be
   1335 	 * waiting for the death of the child before continuing.
   1336 	 */
   1337 	modes = NAMED;
   1338 	if (process->p_flags & DEMANDREQUEST || cur_state == LVLa ||
   1339 	    cur_state == LVLb || cur_state == LVLc)
   1340 		modes |= DEMANDREQUEST;
   1341 	if ((cmd->c_action & (M_SYSINIT | M_WAIT | M_BOOTWAIT | M_PWAIT)) != 0)
   1342 		modes |= NOCLEANUP;
   1343 
   1344 	/*
   1345 	 * If this is a respawnable process, check the threshold
   1346 	 * information to avoid excessive respawns.
   1347 	 */
   1348 	if (cmd->c_action & M_RESPAWN) {
   1349 		/*
   1350 		 * Add NOCLEANUP to all respawnable commands so that the
   1351 		 * information about the frequency of respawns isn't lost.
   1352 		 */
   1353 		modes |= NOCLEANUP;
   1354 		(void) time(&now);
   1355 
   1356 		/*
   1357 		 * If no time is assigned, then this is the first time
   1358 		 * this command is being processed in this series.  Assign
   1359 		 * the current time.
   1360 		 */
   1361 		if (process->p_time == 0L)
   1362 			process->p_time = now;
   1363 
   1364 		if (process->p_count++ == SPAWN_LIMIT) {
   1365 
   1366 			if ((now - process->p_time) < SPAWN_INTERVAL) {
   1367 				/*
   1368 				 * Process is respawning too rapidly.  Print
   1369 				 * message and refuse to respawn it for now.
   1370 				 */
   1371 				console(B_TRUE, "Command is respawning too "
   1372 				    "rapidly. Check for possible errors.\n"
   1373 				    "id:%4s \"%s\"\n",
   1374 				    &cmd->c_id[0], &cmd->c_command[EXEC]);
   1375 				return;
   1376 			}
   1377 			process->p_time = now;
   1378 			process->p_count = 0;
   1379 
   1380 		} else if (process->p_count > SPAWN_LIMIT) {
   1381 			/*
   1382 			 * If process has been respawning too rapidly and
   1383 			 * the inhibit time limit hasn't expired yet, we
   1384 			 * refuse to respawn.
   1385 			 */
   1386 			if (now - process->p_time < SPAWN_INTERVAL + INHIBIT)
   1387 				return;
   1388 			process->p_time = now;
   1389 			process->p_count = 0;
   1390 		}
   1391 		rsflag = TRUE;
   1392 	}
   1393 
   1394 	/*
   1395 	 * Spawn a child process to execute this command.
   1396 	 */
   1397 	(void) sighold(SIGCLD);
   1398 	oprocess = process;
   1399 	while ((process = efork(cmd->c_action, oprocess, modes)) == NO_ROOM)
   1400 		(void) pause();
   1401 
   1402 	if (process == NULLPROC) {
   1403 
   1404 		/*
   1405 		 * We are the child.  We must make sure we get a different
   1406 		 * file pointer for our references to utmpx.  Otherwise our
   1407 		 * seeks and reads will compete with those of the parent.
   1408 		 */
   1409 		endutxent();
   1410 
   1411 		/*
   1412 		 * Perform the accounting for the beginning of a process.
   1413 		 * Note that all processes are initially "INIT_PROCESS"es.
   1414 		 */
   1415 		tmproc.p_id[0] = cmd->c_id[0];
   1416 		tmproc.p_id[1] = cmd->c_id[1];
   1417 		tmproc.p_id[2] = cmd->c_id[2];
   1418 		tmproc.p_id[3] = cmd->c_id[3];
   1419 		tmproc.p_pid = getpid();
   1420 		tmproc.p_exit = 0;
   1421 		(void) account(INIT_PROCESS, &tmproc,
   1422 		    prog_name(&cmd->c_command[EXEC]));
   1423 		maxfiles = ulimit(UL_GDESLIM, 0);
   1424 		for (i = 0; i < maxfiles; i++)
   1425 			(void) fcntl(i, F_SETFD, FD_CLOEXEC);
   1426 
   1427 		/*
   1428 		 * Now exec a shell with the -c option and the command
   1429 		 * from inittab.
   1430 		 */
   1431 		(void) execle(SH, "INITSH", "-c", cmd->c_command, (char *)0,
   1432 		    glob_envp);
   1433 		console(B_TRUE, "Command\n\"%s\"\n failed to execute.  errno "
   1434 		    "= %d (exec of shell failed)\n", cmd->c_command, errno);
   1435 
   1436 		/*
   1437 		 * Don't come back so quickly that "init" doesn't have a
   1438 		 * chance to finish putting this child in "proc_table".
   1439 		 */
   1440 		timer(20);
   1441 		exit(1);
   1442 
   1443 	}
   1444 
   1445 	/*
   1446 	 * We are the parent.  Insert the necessary
   1447 	 * information in the proc_table.
   1448 	 */
   1449 	process->p_id[0] = cmd->c_id[0];
   1450 	process->p_id[1] = cmd->c_id[1];
   1451 	process->p_id[2] = cmd->c_id[2];
   1452 	process->p_id[3] = cmd->c_id[3];
   1453 
   1454 	st_write();
   1455 
   1456 	(void) sigrelse(SIGCLD);
   1457 }
   1458 
   1459 /*
   1460  * findpslot() finds the old slot in the process table for the
   1461  * command with the same id, or it finds an empty slot.
   1462  */
   1463 static struct PROC_TABLE *
   1464 findpslot(struct CMD_LINE *cmd)
   1465 {
   1466 	struct PROC_TABLE	*process;
   1467 	struct PROC_TABLE	*empty = NULLPROC;
   1468 
   1469 	for (process = proc_table;
   1470 	    (process < proc_table + num_proc); process++) {
   1471 		if (process->p_flags & OCCUPIED &&
   1472 		    id_eq(process->p_id, cmd->c_id))
   1473 			break;
   1474 
   1475 		/*
   1476 		 * If the entry is totally empty and "empty" is still 0,
   1477 		 * remember where this hole is and make sure the slot is
   1478 		 * zeroed out.
   1479 		 */
   1480 		if (empty == NULLPROC && (process->p_flags & OCCUPIED) == 0) {
   1481 			empty = process;
   1482 			process->p_id[0] = '\0';
   1483 			process->p_id[1] = '\0';
   1484 			process->p_id[2] = '\0';
   1485 			process->p_id[3] = '\0';
   1486 			process->p_pid = 0;
   1487 			process->p_time = 0L;
   1488 			process->p_count = 0;
   1489 			process->p_flags = 0;
   1490 			process->p_exit = 0;
   1491 		}
   1492 	}
   1493 
   1494 	/*
   1495 	 * If there is no entry for this slot, then there should be an
   1496 	 * empty slot.  If there is no empty slot, then we've run out
   1497 	 * of proc_table space.  If the latter is true, empty will be
   1498 	 * NULL and the caller will have to complain.
   1499 	 */
   1500 	if (process == (proc_table + num_proc))
   1501 		process = empty;
   1502 
   1503 	return (process);
   1504 }
   1505 
   1506 /*
   1507  * getcmd() parses lines from inittab.  Each time it finds a command line
   1508  * it will return TRUE as well as fill the passed CMD_LINE structure and
   1509  * the shell command string.  When the end of inittab is reached, FALSE
   1510  * is returned inittab is automatically opened if it is not currently open
   1511  * and is closed when the end of the file is reached.
   1512  */
   1513 static FILE *fp_inittab = NULL;
   1514 
   1515 static int
   1516 getcmd(struct CMD_LINE *cmd, char *shcmd)
   1517 {
   1518 	char	*ptr;
   1519 	int	c, lastc, state;
   1520 	char 	*ptr1;
   1521 	int	answer, i, proceed;
   1522 	struct	stat	sbuf;
   1523 	static char *actions[] = {
   1524 		"off", "respawn", "ondemand", "once", "wait", "boot",
   1525 		"bootwait", "powerfail", "powerwait", "initdefault",
   1526 		"sysinit",
   1527 	};
   1528 	static short act_masks[] = {
   1529 		M_OFF, M_RESPAWN, M_ONDEMAND, M_ONCE, M_WAIT, M_BOOT,
   1530 		M_BOOTWAIT, M_PF, M_PWAIT, M_INITDEFAULT, M_SYSINIT,
   1531 	};
   1532 	/*
   1533 	 * Only these actions will be allowed for entries which
   1534 	 * are specified for single-user mode.
   1535 	 */
   1536 	short su_acts = M_INITDEFAULT | M_PF | M_PWAIT | M_WAIT;
   1537 
   1538 	if (fp_inittab == NULL) {
   1539 		/*
   1540 		 * Before attempting to open inittab we stat it to make
   1541 		 * sure it currently exists and is not empty.  We try
   1542 		 * several times because someone may have temporarily
   1543 		 * unlinked or truncated the file.
   1544 		 */
   1545 		for (i = 0; i < 3; i++) {
   1546 			if (stat(INITTAB, &sbuf) == -1) {
   1547 				if (i == 2) {
   1548 					console(B_TRUE,
   1549 					    "Cannot stat %s, errno: %d\n",
   1550 					    INITTAB, errno);
   1551 					return (FAILURE);
   1552 				} else {
   1553 					timer(3);
   1554 				}
   1555 			} else if (sbuf.st_size < 10) {
   1556 				if (i == 2) {
   1557 					console(B_TRUE,
   1558 					    "%s truncated or corrupted\n",
   1559 					    INITTAB);
   1560 					return (FAILURE);
   1561 				} else {
   1562 					timer(3);
   1563 				}
   1564 			} else {
   1565 				break;
   1566 			}
   1567 		}
   1568 
   1569 		/*
   1570 		 * If unable to open inittab, print error message and
   1571 		 * return FAILURE to caller.
   1572 		 */
   1573 		if ((fp_inittab = fopen(INITTAB, "r")) == NULL) {
   1574 			console(B_TRUE, "Cannot open %s errno: %d\n", INITTAB,
   1575 			    errno);
   1576 			return (FAILURE);
   1577 		}
   1578 	}
   1579 
   1580 	/*
   1581 	 * Keep getting commands from inittab until you find a
   1582 	 * good one or run out of file.
   1583 	 */
   1584 	for (answer = FALSE; answer == FALSE; ) {
   1585 		/*
   1586 		 * Zero out the cmd itself before trying next line.
   1587 		 */
   1588 		bzero(cmd, sizeof (struct CMD_LINE));
   1589 
   1590 		/*
   1591 		 * Read in lines of inittab, parsing at colons, until a line is
   1592 		 * read in which doesn't end with a backslash.  Do not start if
   1593 		 * the first character read is an EOF.  Note that this means
   1594 		 * that lines which don't end in a newline are still processed,
   1595 		 * since the "for" will terminate normally once started,
   1596 		 * regardless of whether line terminates with a newline or EOF.
   1597 		 */
   1598 		state = FAILURE;
   1599 		if ((c = fgetc(fp_inittab)) == EOF) {
   1600 			answer = FALSE;
   1601 			(void) fclose(fp_inittab);
   1602 			fp_inittab = NULL;
   1603 			break;
   1604 		}
   1605 
   1606 		for (proceed = TRUE, ptr = shcmd, state = ID, lastc = '\0';
   1607 		    proceed && c != EOF;
   1608 		    lastc = c, c = fgetc(fp_inittab)) {
   1609 		    /* If we're not in the FAILURE state and haven't	*/
   1610 		    /* yet reached the shell command field, process	*/
   1611 		    /* the line, otherwise just look for a real end	*/
   1612 		    /* of line.						*/
   1613 		    if (state != FAILURE && state != COMMAND) {
   1614 			/*
   1615 			 * Squeeze out spaces and tabs.
   1616 			 */
   1617 			if (c == ' ' || c == '\t')
   1618 				continue;
   1619 
   1620 			/*
   1621 			 * Ignore characters in a comment, except for the \n.
   1622 			 */
   1623 			if (state == COMMENT) {
   1624 				if (c == '\n') {
   1625 					lastc = ' ';
   1626 					break;
   1627 				} else {
   1628 					continue;
   1629 				}
   1630 			}
   1631 
   1632 			/*
   1633 			 * Detect comments (lines whose first non-whitespace
   1634 			 * character is '#') by checking that we're at the
   1635 			 * beginning of a line, have seen a '#', and haven't
   1636 			 * yet accumulated any characters.
   1637 			 */
   1638 			if (state == ID && c == '#' && ptr == shcmd) {
   1639 				state = COMMENT;
   1640 				continue;
   1641 			}
   1642 
   1643 			/*
   1644 			 * If the character is a ':', then check the
   1645 			 * previous field for correctness and advance
   1646 			 * to the next field.
   1647 			 */
   1648 			if (c == ':') {
   1649 			    switch (state) {
   1650 
   1651 			    case ID :
   1652 				/*
   1653 				 * Check to see that there are only
   1654 				 * 1 to 4 characters for the id.
   1655 				 */
   1656 				if ((i = ptr - shcmd) < 1 || i > 4) {
   1657 					state = FAILURE;
   1658 				} else {
   1659 					bcopy(shcmd, &cmd->c_id[0], i);
   1660 					ptr = shcmd;
   1661 					state = LEVELS;
   1662 				}
   1663 				break;
   1664 
   1665 			    case LEVELS :
   1666 				/*
   1667 				 * Build a mask for all the levels for
   1668 				 * which this command will be legal.
   1669 				 */
   1670 				for (cmd->c_levels = 0, ptr1 = shcmd;
   1671 				    ptr1 < ptr; ptr1++) {
   1672 					int mask;
   1673 					if (lvlname_to_mask(*ptr1,
   1674 					    &mask) == -1) {
   1675 						state = FAILURE;
   1676 						break;
   1677 					}
   1678 					cmd->c_levels |= mask;
   1679 				}
   1680 				if (state != FAILURE) {
   1681 					state = ACTION;
   1682 					ptr = shcmd;	/* Reset the buffer */
   1683 				}
   1684 				break;
   1685 
   1686 			    case ACTION :
   1687 				/*
   1688 				 * Null terminate the string in shcmd buffer and
   1689 				 * then try to match against legal actions.  If
   1690 				 * the field is of length 0, then the default of
   1691 				 * "RESPAWN" is used if the id is numeric,
   1692 				 * otherwise the default is "OFF".
   1693 				 */
   1694 				if (ptr == shcmd) {
   1695 					if (isdigit(cmd->c_id[0]) &&
   1696 					    (cmd->c_id[1] == '\0' ||
   1697 						isdigit(cmd->c_id[1])) &&
   1698 					    (cmd->c_id[2] == '\0' ||
   1699 						isdigit(cmd->c_id[2])) &&
   1700 					    (cmd->c_id[3] == '\0' ||
   1701 						isdigit(cmd->c_id[3])))
   1702 						    cmd->c_action = M_RESPAWN;
   1703 					else
   1704 						    cmd->c_action = M_OFF;
   1705 				} else {
   1706 				    for (cmd->c_action = 0, i = 0, *ptr = '\0';
   1707 				    i < sizeof (actions)/sizeof (char *);
   1708 				    i++) {
   1709 					if (strcmp(shcmd, actions[i]) == 0) {
   1710 					    if ((cmd->c_levels & MASKSU) &&
   1711 						!(act_masks[i] & su_acts))
   1712 						    cmd->c_action = 0;
   1713 					    else
   1714 						cmd->c_action = act_masks[i];
   1715 					    break;
   1716 					}
   1717 				    }
   1718 				}
   1719 
   1720 				/*
   1721 				 * If the action didn't match any legal action,
   1722 				 * set state to FAILURE.
   1723 				 */
   1724 				if (cmd->c_action == 0) {
   1725 					state = FAILURE;
   1726 				} else {
   1727 					state = COMMAND;
   1728 					(void) strcpy(shcmd, "exec ");
   1729 				}
   1730 				ptr = shcmd + EXEC;
   1731 				break;
   1732 			    }
   1733 			    continue;
   1734 			}
   1735 		    }
   1736 
   1737 		    /* If the character is a '\n', then this is the end of a */
   1738 		    /* line.  If the '\n' wasn't preceded by a backslash, */
   1739 		    /* it is also the end of an inittab command.  If it was */
   1740 		    /* preceded by a backslash then the next line is a */
   1741 		    /* continuation.  Note that the continuation '\n' falls */
   1742 		    /* through and is treated like other characters and is */
   1743 		    /* stored in the shell command line. */
   1744 		    if (c == '\n' && lastc != '\\') {
   1745 				proceed = FALSE;
   1746 				*ptr = '\0';
   1747 				break;
   1748 		    }
   1749 
   1750 		    /* For all other characters just stuff them into the */
   1751 		    /* command as long as there aren't too many of them. */
   1752 		    /* Make sure there is room for a terminating '\0' also. */
   1753 		    if (ptr >= shcmd + MAXCMDL - 1)
   1754 			state = FAILURE;
   1755 		    else
   1756 			*ptr++ = (char)c;
   1757 
   1758 		    /* If the character we just stored was a quoted	*/
   1759 		    /* backslash, then change "c" to '\0', so that this	*/
   1760 		    /* backslash will not cause a subsequent '\n' to appear */
   1761 		    /* quoted.  In otherwords '\' '\' '\n' is the real end */
   1762 		    /* of a command, while '\' '\n' is a continuation. */
   1763 		    if (c == '\\' && lastc == '\\')
   1764 			c = '\0';
   1765 		}
   1766 
   1767 		/*
   1768 		 * Make sure all the fields are properly specified
   1769 		 * for a good command line.
   1770 		 */
   1771 		if (state == COMMAND) {
   1772 			answer = TRUE;
   1773 			cmd->c_command = shcmd;
   1774 
   1775 			/*
   1776 			 * If no default level was supplied, insert
   1777 			 * all numerical levels.
   1778 			 */
   1779 			if (cmd->c_levels == 0)
   1780 				cmd->c_levels = MASK_NUMERIC;
   1781 
   1782 			/*
   1783 			 * If no action has been supplied, declare this
   1784 			 * entry to be OFF.
   1785 			 */
   1786 			if (cmd->c_action == 0)
   1787 				cmd->c_action = M_OFF;
   1788 
   1789 			/*
   1790 			 * If no shell command has been supplied, make sure
   1791 			 * there is a null string in the command field.
   1792 			 */
   1793 			if (ptr == shcmd + EXEC)
   1794 				*shcmd = '\0';
   1795 		} else
   1796 			answer = FALSE;
   1797 
   1798 		/*
   1799 		 * If we have reached the end of inittab, then close it
   1800 		 * and quit trying to find a good command line.
   1801 		 */
   1802 		if (c == EOF) {
   1803 			(void) fclose(fp_inittab);
   1804 			fp_inittab = NULL;
   1805 			break;
   1806 		}
   1807 	}
   1808 	return (answer);
   1809 }
   1810 
   1811 /*
   1812  * lvlname_to_state(): convert the character name of a state to its level
   1813  * (its corresponding signal number).
   1814  */
   1815 static int
   1816 lvlname_to_state(char name)
   1817 {
   1818 	int i;
   1819 	for (i = 0; i < LVL_NELEMS; i++) {
   1820 		if (lvls[i].lvl_name == name)
   1821 			return (lvls[i].lvl_state);
   1822 	}
   1823 	return (-1);
   1824 }
   1825 
   1826 /*
   1827  * state_to_name(): convert the level to the character name.
   1828  */
   1829 static char
   1830 state_to_name(int state)
   1831 {
   1832 	int i;
   1833 	for (i = 0; i < LVL_NELEMS; i++) {
   1834 		if (lvls[i].lvl_state == state)
   1835 			return (lvls[i].lvl_name);
   1836 	}
   1837 	return (-1);
   1838 }
   1839 
   1840 /*
   1841  * state_to_mask(): return the mask corresponding to a signal number
   1842  */
   1843 static int
   1844 state_to_mask(int state)
   1845 {
   1846 	int i;
   1847 	for (i = 0; i < LVL_NELEMS; i++) {
   1848 		if (lvls[i].lvl_state == state)
   1849 			return (lvls[i].lvl_mask);
   1850 	}
   1851 	return (0);	/* return 0, since that represents an empty mask */
   1852 }
   1853 
   1854 /*
   1855  * lvlname_to_mask(): return the mask corresponding to a levels character name
   1856  */
   1857 static int
   1858 lvlname_to_mask(char name, int *mask)
   1859 {
   1860 	int i;
   1861 	for (i = 0; i < LVL_NELEMS; i++) {
   1862 		if (lvls[i].lvl_name == name) {
   1863 			*mask = lvls[i].lvl_mask;
   1864 			return (0);
   1865 		}
   1866 	}
   1867 	return (-1);
   1868 }
   1869 
   1870 /*
   1871  * state_to_flags(): return the flags corresponding to a runlevel.  These
   1872  * indicate properties of that runlevel.
   1873  */
   1874 static int
   1875 state_to_flags(int state)
   1876 {
   1877 	int i;
   1878 	for (i = 0; i < LVL_NELEMS; i++) {
   1879 		if (lvls[i].lvl_state == state)
   1880 			return (lvls[i].lvl_flags);
   1881 	}
   1882 	return (0);
   1883 }
   1884 
   1885 /*
   1886  * killproc() creates a child which kills the process specified by pid.
   1887  */
   1888 void
   1889 killproc(pid_t pid)
   1890 {
   1891 	struct PROC_TABLE	*process;
   1892 
   1893 	(void) sighold(SIGCLD);
   1894 	while ((process = efork(M_OFF, NULLPROC, 0)) == NO_ROOM)
   1895 		(void) pause();
   1896 	(void) sigrelse(SIGCLD);
   1897 
   1898 	if (process == NULLPROC) {
   1899 		/*
   1900 		 * efork() sets all signal handlers to the default, so reset
   1901 		 * the ALRM handler to make timer() work as expected.
   1902 		 */
   1903 		(void) sigset(SIGALRM, alarmclk);
   1904 
   1905 		/*
   1906 		 * We are the child.  Try to terminate the process nicely
   1907 		 * first using SIGTERM and if it refuses to die in TWARN
   1908 		 * seconds kill it with SIGKILL.
   1909 		 */
   1910 		(void) kill(pid, SIGTERM);
   1911 		(void) timer(TWARN);
   1912 		(void) kill(pid, SIGKILL);
   1913 		(void) exit(0);
   1914 	}
   1915 }
   1916 
   1917 /*
   1918  * Set up the default environment for all procs to be forked from init.
   1919  * Read the values from the /etc/default/init file, except for PATH.  If
   1920  * there's not enough room in the environment array, the environment
   1921  * lines that don't fit are silently discarded.
   1922  */
   1923 void
   1924 init_env()
   1925 {
   1926 	char	line[MAXCMDL];
   1927 	FILE	*fp;
   1928 	int	inquotes, length, wslength;
   1929 	char	*tokp, *cp1, *cp2;
   1930 
   1931 	glob_envp[0] = malloc((unsigned)(strlen(DEF_PATH)+2));
   1932 	(void) strcpy(glob_envp[0], DEF_PATH);
   1933 	glob_envn = 1;
   1934 
   1935 	if (rflg) {
   1936 		glob_envp[1] =
   1937 		    malloc((unsigned)(strlen("_DVFS_RECONFIG=YES")+2));
   1938 		(void) strcpy(glob_envp[1], "_DVFS_RECONFIG=YES");
   1939 		++glob_envn;
   1940 	} else if (bflg == 1) {
   1941 		glob_envp[1] =
   1942 		    malloc((unsigned)(strlen("RB_NOBOOTRC=YES")+2));
   1943 		(void) strcpy(glob_envp[1], "RB_NOBOOTRC=YES");
   1944 		++glob_envn;
   1945 	}
   1946 
   1947 	if ((fp = fopen(ENVFILE, "r")) == NULL) {
   1948 		console(B_TRUE,
   1949 		    "Cannot open %s. Environment not initialized.\n",
   1950 		    ENVFILE);
   1951 	} else {
   1952 		while (fgets(line, MAXCMDL - 1, fp) != NULL &&
   1953 		    glob_envn < MAXENVENT - 2) {
   1954 			/*
   1955 			 * Toss newline
   1956 			 */
   1957 			length = strlen(line);
   1958 			if (line[length - 1] == '\n')
   1959 				line[length - 1] = '\0';
   1960 
   1961 			/*
   1962 			 * Ignore blank or comment lines.
   1963 			 */
   1964 			if (line[0] == '#' || line[0] == '\0' ||
   1965 			    (wslength = strspn(line, " \t\n")) ==
   1966 			    strlen(line) ||
   1967 			    strchr(line, '#') == line + wslength)
   1968 				continue;
   1969 
   1970 			/*
   1971 			 * First make a pass through the line and change
   1972 			 * any non-quoted semi-colons to blanks so they
   1973 			 * will be treated as token separators below.
   1974 			 */
   1975 			inquotes = 0;
   1976 			for (cp1 = line; *cp1 != '\0'; cp1++) {
   1977 				if (*cp1 == '"') {
   1978 					if (inquotes == 0)
   1979 						inquotes = 1;
   1980 					else
   1981 						inquotes = 0;
   1982 				} else if (*cp1 == ';') {
   1983 					if (inquotes == 0)
   1984 						*cp1 = ' ';
   1985 				}
   1986 			}
   1987 
   1988 			/*
   1989 			 * Tokens within the line are separated by blanks
   1990 			 *  and tabs.  For each token in the line which
   1991 			 * contains a '=' we strip out any quotes and then
   1992 			 * stick the token in the environment array.
   1993 			 */
   1994 			if ((tokp = strtok(line, " \t")) == NULL)
   1995 				continue;
   1996 			do {
   1997 				if (strchr(tokp, '=') == NULL)
   1998 					continue;
   1999 				length = strlen(tokp);
   2000 				while ((cp1 = strpbrk(tokp, "\"\'")) != NULL) {
   2001 					for (cp2 = cp1;
   2002 					    cp2 < &tokp[length]; cp2++)
   2003 						*cp2 = *(cp2 + 1);
   2004 					length--;
   2005 				}
   2006 
   2007 				if (strncmp(tokp, "CMASK=",
   2008 				    sizeof ("CMASK=") - 1) == 0) {
   2009 					long t;
   2010 
   2011 					/* We know there's an = */
   2012 					t = strtol(strchr(tokp, '=') + 1, NULL,
   2013 					    8);
   2014 
   2015 					/* Sanity */
   2016 					if (t <= 077 && t >= 0)
   2017 						cmask = (int)t;
   2018 					(void) umask(cmask);
   2019 					continue;
   2020 				}
   2021 				glob_envp[glob_envn] =
   2022 				    malloc((unsigned)(length + 1));
   2023 				(void) strcpy(glob_envp[glob_envn], tokp);
   2024 				if (++glob_envn >= MAXENVENT - 1)
   2025 					break;
   2026 			} while ((tokp = strtok(NULL, " \t")) != NULL);
   2027 		}
   2028 
   2029 		/*
   2030 		 * Append a null pointer to the environment array
   2031 		 * to mark its end.
   2032 		 */
   2033 		glob_envp[glob_envn] = NULL;
   2034 		(void) fclose(fp);
   2035 	}
   2036 }
   2037 
   2038 /*
   2039  * boot_init(): Do initialization things that should be done at boot.
   2040  */
   2041 void
   2042 boot_init()
   2043 {
   2044 	int i;
   2045 	struct PROC_TABLE *process, *oprocess;
   2046 	struct CMD_LINE	cmd;
   2047 	char	line[MAXCMDL];
   2048 	char	svc_aux[SVC_AUX_SIZE];
   2049 	char	init_svc_fmri[SVC_FMRI_SIZE];
   2050 	char *old_path;
   2051 	int maxfiles;
   2052 
   2053 	/* Use INIT_PATH for sysinit cmds */
   2054 	old_path = glob_envp[0];
   2055 	glob_envp[0] = malloc((unsigned)(strlen(INIT_PATH)+2));
   2056 	(void) strcpy(glob_envp[0], INIT_PATH);
   2057 
   2058 	/*
   2059 	 * Scan inittab(4) and process the special svc.startd entry, initdefault
   2060 	 * and sysinit entries.
   2061 	 */
   2062 	while (getcmd(&cmd, &line[0]) == TRUE) {
   2063 		if (startd_tmpl >= 0 && id_eq(cmd.c_id, "smf")) {
   2064 			process_startd_line(&cmd, line);
   2065 			(void) snprintf(startd_svc_aux, SVC_AUX_SIZE,
   2066 			    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
   2067 		} else if (cmd.c_action == M_INITDEFAULT) {
   2068 			/*
   2069 			 * initdefault is no longer meaningful, as the SMF
   2070 			 * milestone controls what (legacy) run level we
   2071 			 * boot to.
   2072 			 */
   2073 			console(B_TRUE,
   2074 			    "Ignoring legacy \"initdefault\" entry.\n");
   2075 		} else if (cmd.c_action == M_SYSINIT) {
   2076 			/*
   2077 			 * Execute the "sysinit" entry and wait for it to
   2078 			 * complete.  No bookkeeping is performed on these
   2079 			 * entries because we avoid writing to the file system
   2080 			 * until after there has been an chance to check it.
   2081 			 */
   2082 			if (process = findpslot(&cmd)) {
   2083 				(void) sighold(SIGCLD);
   2084 				(void) snprintf(svc_aux, SVC_AUX_SIZE,
   2085 				    INITTAB_ENTRY_ID_STR_FORMAT, cmd.c_id);
   2086 				(void) snprintf(init_svc_fmri, SVC_FMRI_SIZE,
   2087 				    SVC_INIT_PREFIX INITTAB_ENTRY_ID_STR_FORMAT,
   2088 				    cmd.c_id);
   2089 				if (legacy_tmpl >= 0) {
   2090 					(void) ct_pr_tmpl_set_svc_fmri(
   2091 					    legacy_tmpl, init_svc_fmri);
   2092 					(void) ct_pr_tmpl_set_svc_aux(
   2093 					    legacy_tmpl, svc_aux);
   2094 				}
   2095 
   2096 				for (oprocess = process;
   2097 				    (process = efork(M_OFF, oprocess,
   2098 				    (NAMED|NOCLEANUP))) == NO_ROOM;
   2099 				    /* CSTYLED */)
   2100 					;
   2101 				(void) sigrelse(SIGCLD);
   2102 
   2103 				if (process == NULLPROC) {
   2104 					maxfiles = ulimit(UL_GDESLIM, 0);
   2105 
   2106 					for (i = 0; i < maxfiles; i++)
   2107 						(void) fcntl(i, F_SETFD,
   2108 						    FD_CLOEXEC);
   2109 					(void) execle(SH, "INITSH", "-c",
   2110 					    cmd.c_command,
   2111 					    (char *)0, glob_envp);
   2112 					console(B_TRUE,
   2113 "Command\n\"%s\"\n failed to execute.  errno = %d (exec of shell failed)\n",
   2114 					    cmd.c_command, errno);
   2115 					exit(1);
   2116 				} else while (waitproc(process) == FAILURE);
   2117 				process->p_flags = 0;
   2118 				st_write();
   2119 			}
   2120 		}
   2121 	}
   2122 
   2123 	/* Restore the path. */
   2124 	free(glob_envp[0]);
   2125 	glob_envp[0] = old_path;
   2126 
   2127 	/*
   2128 	 * This will enable st_write() to complain about init_state_file.
   2129 	 */
   2130 	booting = 0;
   2131 
   2132 	/*
   2133 	 * If the /etc/ioctl.syscon didn't exist or had invalid contents write
   2134 	 * out a correct version.
   2135 	 */
   2136 	if (write_ioctl)
   2137 		write_ioctl_syscon();
   2138 
   2139 	/*
   2140 	 * Start svc.startd(1M), which does most of the work.
   2141 	 */
   2142 	if (startd_cline[0] != '\0' && startd_tmpl >= 0) {
   2143 		/* Start svc.startd. */
   2144 		if (startd_run(startd_cline, startd_tmpl, 0) == -1)
   2145 			cur_state = SINGLE_USER;
   2146 	} else {
   2147 		console(B_TRUE, "Absent svc.startd entry or bad "
   2148 		    "contract template.  Not starting svc.startd.\n");
   2149 		enter_maintenance();
   2150 	}
   2151 }
   2152 
   2153 /*
   2154  * init_signals(): Initialize all signals to either be caught or ignored.
   2155  */
   2156 void
   2157 init_signals(void)
   2158 {
   2159 	struct sigaction act;
   2160 	int i;
   2161 
   2162 	/*
   2163 	 * Start by ignoring all signals, then selectively re-enable some.
   2164 	 * The SIG_IGN disposition will only affect asynchronous signals:
   2165 	 * any signal that we trigger synchronously that doesn't end up
   2166 	 * being handled by siglvl() will be forcibly delivered by the kernel.
   2167 	 */
   2168 	for (i = SIGHUP; i <= SIGRTMAX; i++)
   2169 		(void) sigset(i, SIG_IGN);
   2170 
   2171 	/*
   2172 	 * Handle all level-changing signals using siglvl() and set sa_mask so
   2173 	 * that all level-changing signals are blocked while in siglvl().
   2174 	 */
   2175 	act.sa_handler = siglvl;
   2176 	act.sa_flags = SA_SIGINFO;
   2177 	(void) sigemptyset(&act.sa_mask);
   2178 
   2179 	(void) sigaddset(&act.sa_mask, LVLQ);
   2180 	(void) sigaddset(&act.sa_mask, LVL0);
   2181 	(void) sigaddset(&act.sa_mask, LVL1);
   2182 	(void) sigaddset(&act.sa_mask, LVL2);
   2183 	(void) sigaddset(&act.sa_mask, LVL3);
   2184 	(void) sigaddset(&act.sa_mask, LVL4);
   2185 	(void) sigaddset(&act.sa_mask, LVL5);
   2186 	(void) sigaddset(&act.sa_mask, LVL6);
   2187 	(void) sigaddset(&act.sa_mask, SINGLE_USER);
   2188 	(void) sigaddset(&act.sa_mask, LVLa);
   2189 	(void) sigaddset(&act.sa_mask, LVLb);
   2190 	(void) sigaddset(&act.sa_mask, LVLc);
   2191 
   2192 	(void) sigaction(LVLQ, &act, NULL);
   2193 	(void) sigaction(LVL0, &act, NULL);
   2194 	(void) sigaction(LVL1, &act, NULL);
   2195 	(void) sigaction(LVL2, &act, NULL);
   2196 	(void) sigaction(LVL3, &act, NULL);
   2197 	(void) sigaction(LVL4, &act, NULL);
   2198 	(void) sigaction(LVL5, &act, NULL);
   2199 	(void) sigaction(LVL6, &act, NULL);
   2200 	(void) sigaction(SINGLE_USER, &act, NULL);
   2201 	(void) sigaction(LVLa, &act, NULL);
   2202 	(void) sigaction(LVLb, &act, NULL);
   2203 	(void) sigaction(LVLc, &act, NULL);
   2204 
   2205 	(void) sigset(SIGALRM, alarmclk);
   2206 	alarmclk();
   2207 
   2208 	(void) sigset(SIGCLD, childeath);
   2209 	(void) sigset(SIGPWR, powerfail);
   2210 }
   2211 
   2212 /*
   2213  * Set up pipe for "godchildren". If the file exists and is a pipe just open
   2214  * it. Else, if the file system is r/w create it.  Otherwise, defer its
   2215  * creation and open until after /var/run has been mounted.  This function is
   2216  * only called on startup and when explicitly requested via LVLQ.
   2217  */
   2218 void
   2219 setup_pipe()
   2220 {
   2221 	struct stat stat_buf;
   2222 	struct statvfs statvfs_buf;
   2223 	struct sigaction act;
   2224 
   2225 	/*
   2226 	 * Always close the previous pipe descriptor as the mounted filesystems
   2227 	 * may have changed.
   2228 	 */
   2229 	if (Pfd >= 0)
   2230 		(void) close(Pfd);
   2231 
   2232 	if ((stat(INITPIPE, &stat_buf) == 0) &&
   2233 	    ((stat_buf.st_mode & (S_IFMT|S_IRUSR)) == (S_IFIFO|S_IRUSR)))
   2234 		Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
   2235 	else
   2236 		if ((statvfs(INITPIPE_DIR, &statvfs_buf) == 0) &&
   2237 		    ((statvfs_buf.f_flag & ST_RDONLY) == 0)) {
   2238 			(void) unlink(INITPIPE);
   2239 			(void) mknod(INITPIPE, S_IFIFO | 0600, 0);
   2240 			Pfd = open(INITPIPE, O_RDWR | O_NDELAY);
   2241 		}
   2242 
   2243 	if (Pfd >= 0) {
   2244 		(void) ioctl(Pfd, I_SETSIG, S_INPUT);
   2245 		/*
   2246 		 * Read pipe in message discard mode.
   2247 		 */
   2248 		(void) ioctl(Pfd, I_SRDOPT, RMSGD);
   2249 
   2250 		act.sa_handler = sigpoll;
   2251 		act.sa_flags = 0;
   2252 		(void) sigemptyset(&act.sa_mask);
   2253 		(void) sigaddset(&act.sa_mask, SIGCLD);
   2254 		(void) sigaction(SIGPOLL, &act, NULL);
   2255 	}
   2256 }
   2257 
   2258 /*
   2259  * siglvl - handle an asynchronous signal from init(1M) telling us that we
   2260  * should change the current run level.  We set new_state accordingly.
   2261  */
   2262 void
   2263 siglvl(int sig, siginfo_t *sip, ucontext_t *ucp)
   2264 {
   2265 	struct PROC_TABLE *process;
   2266 	struct sigaction act;
   2267 
   2268 	/*
   2269 	 * If the signal was from the kernel (rather than init(1M)) then init
   2270 	 * itself tripped the signal.  That is, we might have a bug and tripped
   2271 	 * a real SIGSEGV instead of receiving it as an alias for SIGLVLa.  In
   2272 	 * such a case we reset the disposition to SIG_DFL, block all signals
   2273 	 * in uc_mask but the current one, and return to the interrupted ucp
   2274 	 * to effect an appropriate death.  The kernel will then restart us.
   2275 	 *
   2276 	 * The one exception to SI_FROMKERNEL() is SIGFPE (a.k.a. LVL6), which
   2277 	 * the kernel can send us when it wants to effect an orderly reboot.
   2278 	 * For this case we must also verify si_code is zero, rather than a
   2279 	 * code such as FPE_INTDIV which a bug might have triggered.
   2280 	 */
   2281 	if (sip != NULL && SI_FROMKERNEL(sip) &&
   2282 	    (sig != SIGFPE || sip->si_code == 0)) {
   2283 
   2284 		(void) sigemptyset(&act.sa_mask);
   2285 		act.sa_handler = SIG_DFL;
   2286 		act.sa_flags = 0;
   2287 		(void) sigaction(sig, &act, NULL);
   2288 
   2289 		(void) sigfillset(&ucp->uc_sigmask);
   2290 		(void) sigdelset(&ucp->uc_sigmask, sig);
   2291 		ucp->uc_flags |= UC_SIGMASK;
   2292 
   2293 		(void) setcontext(ucp);
   2294 	}
   2295 
   2296 	/*
   2297 	 * If the signal received is a LVLQ signal, do not really
   2298 	 * change levels, just restate the current level.  If the
   2299 	 * signal is not a LVLQ, set the new level to the signal
   2300 	 * received.
   2301 	 */
   2302 	if (sig == LVLQ) {
   2303 		new_state = cur_state;
   2304 		lvlq_received = B_TRUE;
   2305 	} else {
   2306 		new_state = sig;
   2307 	}
   2308 
   2309 	/*
   2310 	 * Clear all times and repeat counts in the process table
   2311 	 * since either the level is changing or the user has editted
   2312 	 * the inittab file and wants us to look at it again.
   2313 	 * If the user has fixed a typo, we don't want residual timing
   2314 	 * data preventing the fixed command line from executing.
   2315 	 */
   2316 	for (process = proc_table;
   2317 	    (process < proc_table + num_proc); process++) {
   2318 		process->p_time = 0L;
   2319 		process->p_count = 0;
   2320 	}
   2321 
   2322 	/*
   2323 	 * Set the flag to indicate that a "user signal" was received.
   2324 	 */
   2325 	wakeup.w_flags.w_usersignal = 1;
   2326 }
   2327 
   2328 
   2329 /*
   2330  * alarmclk
   2331  */
   2332 static void
   2333 alarmclk()
   2334 {
   2335 	time_up = TRUE;
   2336 }
   2337 
   2338 /*
   2339  * childeath_single():
   2340  *
   2341  * This used to be the SIGCLD handler and it was set with signal()
   2342  * (as opposed to sigset()).  When a child exited we'd come to the
   2343  * handler, wait for the child, and reenable the handler with
   2344  * signal() just before returning.  The implementation of signal()
   2345  * checks with waitid() for waitable children and sends a SIGCLD
   2346  * if there are some.  If children are exiting faster than the
   2347  * handler can run we keep sending signals and the handler never
   2348  * gets to return and eventually the stack runs out and init dies.
   2349  * To prevent that we set the handler with sigset() so the handler
   2350  * doesn't need to be reset, and in childeath() (see below) we
   2351  * call childeath_single() as long as there are children to be
   2352  * waited for.  If a child exits while init is in the handler a
   2353  * SIGCLD will be pending and delivered on return from the handler.
   2354  * If the child was already waited for the handler will have nothing
   2355  * to do and return, otherwise the child will be waited for.
   2356  */
   2357 static void
   2358 childeath_single(pid_t pid, int status)
   2359 {
   2360 	struct PROC_TABLE	*process;
   2361 	struct pidlist		*pp;
   2362 
   2363 	/*
   2364 	 * Scan the process table to see if we are interested in this process.
   2365 	 */
   2366 	for (process = proc_table;
   2367 	    (process < proc_table + num_proc); process++) {
   2368 		if ((process->p_flags & (LIVING|OCCUPIED)) ==
   2369 		    (LIVING|OCCUPIED) && process->p_pid == pid) {
   2370 
   2371 			/*
   2372 			 * Mark this process as having died and store the exit
   2373 			 * status.  Also set the wakeup flag for a dead child
   2374 			 * and break out of the loop.
   2375 			 */
   2376 			process->p_flags &= ~LIVING;
   2377 			process->p_exit = (short)status;
   2378 			wakeup.w_flags.w_childdeath = 1;
   2379 
   2380 			return;
   2381 		}
   2382 	}
   2383 
   2384 	/*
   2385 	 * No process was found above, look through auxiliary list.
   2386 	 */
   2387 	(void) sighold(SIGPOLL);
   2388 	pp = Plhead;
   2389 	while (pp) {
   2390 		if (pid > pp->pl_pid) {
   2391 			/*
   2392 			 * Keep on looking.
   2393 			 */
   2394 			pp = pp->pl_next;
   2395 			continue;
   2396 		} else if (pid < pp->pl_pid) {
   2397 			/*
   2398 			 * Not in the list.
   2399 			 */
   2400 			break;
   2401 		} else {
   2402 			/*
   2403 			 * This is a dead "godchild".
   2404 			 */
   2405 			pp->pl_dflag = 1;
   2406 			pp->pl_exit = (short)status;
   2407 			wakeup.w_flags.w_childdeath = 1;
   2408 			Gchild = 1;	/* Notice to call cleanaux(). */
   2409 			break;
   2410 		}
   2411 	}
   2412 
   2413 	(void) sigrelse(SIGPOLL);
   2414 }
   2415 
   2416 /* ARGSUSED */
   2417 static void
   2418 childeath(int signo)
   2419 {
   2420 	pid_t pid;
   2421 	int status;
   2422 
   2423 	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
   2424 		childeath_single(pid, status);
   2425 }
   2426 
   2427 static void
   2428 powerfail()
   2429 {
   2430 	(void) nice(-19);
   2431 	wakeup.w_flags.w_powerhit = 1;
   2432 }
   2433 
   2434 /*
   2435  * efork() forks a child and the parent inserts the process in its table
   2436  * of processes that are directly a result of forks that it has performed.
   2437  * The child just changes the "global" with the process id for this process
   2438  * to it's new value.
   2439  * If efork() is called with a pointer into the proc_table it uses that slot,
   2440  * otherwise it searches for a free slot.  Regardless of how it was called,
   2441  * it returns the pointer to the proc_table entry
   2442  *
   2443  * The SIGCLD signal is blocked (held) before calling efork()
   2444  * and is unblocked (released) after efork() returns.
   2445  *
   2446  * Ideally, this should be rewritten to use modern signal semantics.
   2447  */
   2448 static struct PROC_TABLE *
   2449 efork(int action, struct PROC_TABLE *process, int modes)
   2450 {
   2451 	pid_t	childpid;
   2452 	struct PROC_TABLE *proc;
   2453 	int		i;
   2454 	/*
   2455 	 * Freshen up the proc_table, removing any entries for dead processes
   2456 	 * that don't have NOCLEANUP set.  Perform the necessary accounting.
   2457 	 */
   2458 	for (proc = proc_table; (proc < proc_table + num_proc); proc++) {
   2459 		if ((proc->p_flags & (OCCUPIED|LIVING|NOCLEANUP)) ==
   2460 		    (OCCUPIED)) {
   2461 			/*
   2462 			 * Is this a named process?
   2463 			 * If so, do the necessary bookkeeping.
   2464 			 */
   2465 			if (proc->p_flags & NAMED)
   2466 				(void) account(DEAD_PROCESS, proc, NULL);
   2467 
   2468 			/*
   2469 			 * Free this entry for new usage.
   2470 			 */
   2471 			proc->p_flags = 0;
   2472 		}
   2473 	}
   2474 
   2475 	while ((childpid = fork()) == FAILURE) {
   2476 		/*
   2477 		 * Shorten the alarm timer in case someone else's child dies
   2478 		 * and free up a slot in the process table.
   2479 		 */
   2480 		setimer(5);
   2481 
   2482 		/*
   2483 		 * Wait for some children to die.  Since efork()
   2484 		 * is always called with SIGCLD blocked, unblock
   2485 		 * it here so that child death signals can come in.
   2486 		 */
   2487 		(void) sigrelse(SIGCLD);
   2488 		(void) pause();
   2489 		(void) sighold(SIGCLD);
   2490 		setimer(0);
   2491 	}
   2492 
   2493 	if (childpid != 0) {
   2494 
   2495 		if (process == NULLPROC) {
   2496 			/*
   2497 			 * No proc table pointer specified so search
   2498 			 * for a free slot.
   2499 			 */
   2500 			for (process = proc_table;  process->p_flags != 0 &&
   2501 			    (process < proc_table + num_proc); process++)
   2502 					;
   2503 
   2504 			if (process == (proc_table + num_proc)) {
   2505 				int old_proc_table_size = num_proc;
   2506 
   2507 				/* Increase the process table size */
   2508 				increase_proc_table_size();
   2509 				if (old_proc_table_size == num_proc) {
   2510 					/* didn't grow: memory failure */
   2511 					return (NO_ROOM);
   2512 				} else {
   2513 					process =
   2514 					    proc_table + old_proc_table_size;
   2515 				}
   2516 			}
   2517 
   2518 			process->p_time = 0L;
   2519 			process->p_count = 0;
   2520 		}
   2521 		process->p_id[0] = '\0';
   2522 		process->p_id[1] = '\0';
   2523 		process->p_id[2] = '\0';
   2524 		process->p_id[3] = '\0';
   2525 		process->p_pid = childpid;
   2526 		process->p_flags = (LIVING | OCCUPIED | modes);
   2527 		process->p_exit = 0;
   2528 
   2529 		st_write();
   2530 	} else {
   2531 		if ((action & (M_WAIT | M_BOOTWAIT)) == 0)
   2532 			(void) setpgrp();
   2533 
   2534 		process = NULLPROC;
   2535 
   2536 		/*
   2537 		 * Reset all signals to the system defaults.
   2538 		 */
   2539 		for (i = SIGHUP; i <= SIGRTMAX; i++)
   2540 			(void) sigset(i, SIG_DFL);
   2541 
   2542 		/*
   2543 		 * POSIX B.2.2.2 advises that init should set SIGTTOU,
   2544 		 * SIGTTIN, and SIGTSTP to SIG_IGN.
   2545 		 *
   2546 		 * Make sure that SIGXCPU and SIGXFSZ also remain ignored,
   2547 		 * for backward compatibility.
   2548 		 */
   2549 		(void) sigset(SIGTTIN, SIG_IGN);
   2550 		(void) sigset(SIGTTOU, SIG_IGN);
   2551 		(void) sigset(SIGTSTP, SIG_IGN);
   2552 		(void) sigset(SIGXCPU, SIG_IGN);
   2553 		(void) sigset(SIGXFSZ, SIG_IGN);
   2554 	}
   2555 	return (process);
   2556 }
   2557 
   2558 
   2559 /*
   2560  * waitproc() waits for a specified process to die.  For this function to
   2561  * work, the specified process must already in the proc_table.  waitproc()
   2562  * returns the exit status of the specified process when it dies.
   2563  */
   2564 static long
   2565 waitproc(struct PROC_TABLE *process)
   2566 {
   2567 	int		answer;
   2568 	sigset_t	oldmask, newmask, zeromask;
   2569 
   2570 	(void) sigemptyset(&zeromask);
   2571 	(void) sigemptyset(&newmask);
   2572 
   2573 	(void) sigaddset(&newmask, SIGCLD);
   2574 
   2575 	/* Block SIGCLD and save the current signal mask */
   2576 	if (sigprocmask(SIG_BLOCK, &newmask, &oldmask) < 0)
   2577 		perror("SIG_BLOCK error");
   2578 
   2579 	/*
   2580 	 * Wait around until the process dies.
   2581 	 */
   2582 	if (process->p_flags & LIVING)
   2583 		(void) sigsuspend(&zeromask);
   2584 
   2585 	/* Reset signal mask to unblock SIGCLD */
   2586 	if (sigprocmask(SIG_SETMASK, &oldmask, NULL) < 0)
   2587 		perror("SIG_SETMASK error");
   2588 
   2589 	if (process->p_flags & LIVING)
   2590 		return (FAILURE);
   2591 
   2592 	/*
   2593 	 * Make sure to only return 16 bits so that answer will always
   2594 	 * be positive whenever the process of interest really died.
   2595 	 */
   2596 	answer = (process->p_exit & 0xffff);
   2597 
   2598 	/*
   2599 	 * Free the slot in the proc_table.
   2600 	 */
   2601 	process->p_flags = 0;
   2602 	return (answer);
   2603 }
   2604 
   2605 /*
   2606  * notify_pam_dead(): calls into the PAM framework to close the given session.
   2607  */
   2608 static void
   2609 notify_pam_dead(struct utmpx *up)
   2610 {
   2611 	pam_handle_t *pamh;
   2612 	char user[sizeof (up->ut_user) + 1];
   2613 	char ttyn[sizeof (up->ut_line) + 1];
   2614 	char host[sizeof (up->ut_host) + 1];
   2615 
   2616 	/*
   2617 	 * PAM does not take care of updating utmpx/wtmpx.
   2618 	 */
   2619 	(void) snprintf(user, sizeof (user), "%s", up->ut_user);
   2620 	(void) snprintf(ttyn, sizeof (ttyn), "%s", up->ut_line);
   2621 	(void) snprintf(host, sizeof (host), "%s", up->ut_host);
   2622 
   2623 	if (pam_start("init", user, NULL, &pamh) == PAM_SUCCESS)  {
   2624 		(void) pam_set_item(pamh, PAM_TTY, ttyn);
   2625 		(void) pam_set_item(pamh, PAM_RHOST, host);
   2626 		(void) pam_close_session(pamh, 0);
   2627 		(void) pam_end(pamh, PAM_SUCCESS);
   2628 	}
   2629 }
   2630 
   2631 /*
   2632  * Check you can access utmpx (As / may be read-only and
   2633  * /var may not be mounted yet).
   2634  */
   2635 static int
   2636 access_utmpx(void)
   2637 {
   2638 	do {
   2639 		utmpx_ok = (access(UTMPX, R_OK|W_OK) == 0);
   2640 	} while (!utmpx_ok && errno == EINTR);
   2641 
   2642 	return (utmpx_ok);
   2643 }
   2644 
   2645 /*
   2646  * account() updates entries in utmpx and appends new entries to the end of
   2647  * wtmpx (assuming they exist).  The program argument indicates the name of
   2648  * program if INIT_PROCESS, otherwise should be NULL.
   2649  *
   2650  * account() only blocks for INIT_PROCESS requests.
   2651  *
   2652  * Returns non-zero if write failed.
   2653  */
   2654 static int
   2655 account(short state, struct PROC_TABLE *process, char *program)
   2656 {
   2657 	struct utmpx utmpbuf, *u, *oldu;
   2658 	int tmplen;
   2659 	char fail_buf[UT_LINE_SZ];
   2660 	sigset_t block, unblock;
   2661 
   2662 	if (!utmpx_ok && !access_utmpx()) {
   2663 		return (-1);
   2664 	}
   2665 
   2666 	/*
   2667 	 * Set up the prototype for the utmp structure we want to write.
   2668 	 */
   2669 	u = &utmpbuf;
   2670 	(void) memset(u, 0, sizeof (struct utmpx));
   2671 
   2672 	/*
   2673 	 * Fill in the various fields of the utmp structure.
   2674 	 */
   2675 	u->ut_id[0] = process->p_id[0];
   2676 	u->ut_id[1] = process->p_id[1];
   2677 	u->ut_id[2] = process->p_id[2];
   2678 	u->ut_id[3] = process->p_id[3];
   2679 	u->ut_pid = process->p_pid;
   2680 
   2681 	/*
   2682 	 * Fill the "ut_exit" structure.
   2683 	 */
   2684 	u->ut_exit.e_termination = WTERMSIG(process->p_exit);
   2685 	u->ut_exit.e_exit = WEXITSTATUS(process->p_exit);
   2686 	u->ut_type = state;
   2687 
   2688 	(void) time(&u->ut_tv.tv_sec);
   2689 
   2690 	/*
   2691 	 * Block signals for utmp update.
   2692 	 */
   2693 	(void) sigfillset(&block);
   2694 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
   2695 
   2696 	/*
   2697 	 * See if there already is such an entry in the "utmpx" file.
   2698 	 */
   2699 	setutxent();	/* Start at beginning of utmpx file. */
   2700 
   2701 	if ((oldu = getutxid(u)) != NULL) {
   2702 		/*
   2703 		 * Copy in the old "user", "line" and "host" fields
   2704 		 * to our new structure.
   2705 		 */
   2706 		bcopy(oldu->ut_user, u->ut_user, sizeof (u->ut_user));
   2707 		bcopy(oldu->ut_line, u->ut_line, sizeof (u->ut_line));
   2708 		bcopy(oldu->ut_host, u->ut_host, sizeof (u->ut_host));
   2709 		u->ut_syslen = (tmplen = strlen(u->ut_host)) ?
   2710 		    min(tmplen + 1, sizeof (u->ut_host)) : 0;
   2711 
   2712 		if (oldu->ut_type == USER_PROCESS && state == DEAD_PROCESS) {
   2713 			notify_pam_dead(oldu);
   2714 		}
   2715 	}
   2716 
   2717 	/*
   2718 	 * Perform special accounting. Insert the special string into the
   2719 	 * ut_line array. For INIT_PROCESSes put in the name of the
   2720 	 * program in the "ut_user" field.
   2721 	 */
   2722 	switch (state) {
   2723 	case INIT_PROCESS:
   2724 		(void) strncpy(u->ut_user, program, sizeof (u->ut_user));
   2725 		(void) strcpy(fail_buf, "INIT_PROCESS");
   2726 		break;
   2727 
   2728 	default:
   2729 		(void) strlcpy(fail_buf, u->ut_id, sizeof (u->ut_id) + 1);
   2730 		break;
   2731 	}
   2732 
   2733 	/*
   2734 	 * Write out the updated entry to utmpx file.
   2735 	 */
   2736 	if (pututxline(u) == NULL) {
   2737 		console(B_TRUE, "Failed write of utmpx entry: \"%s\": %s\n",
   2738 		    fail_buf, strerror(errno));
   2739 		endutxent();
   2740 		(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
   2741 		return (-1);
   2742 	}
   2743 
   2744 	/*
   2745 	 * If we're able to write to utmpx, then attempt to add to the
   2746 	 * end of the wtmpx file.
   2747 	 */
   2748 	updwtmpx(WTMPX, u);
   2749 
   2750 	endutxent();
   2751 
   2752 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
   2753 
   2754 	return (0);
   2755 }
   2756 
   2757 static void
   2758 clearent(pid_t pid, short status)
   2759 {
   2760 	struct utmpx *up;
   2761 	sigset_t block, unblock;
   2762 
   2763 	/*
   2764 	 * Block signals for utmp update.
   2765 	 */
   2766 	(void) sigfillset(&block);
   2767 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
   2768 
   2769 	/*
   2770 	 * No error checking for now.
   2771 	 */
   2772 
   2773 	setutxent();
   2774 	while (up = getutxent()) {
   2775 		if (up->ut_pid == pid) {
   2776 			if (up->ut_type == DEAD_PROCESS) {
   2777 				/*
   2778 				 * Cleaned up elsewhere.
   2779 				 */
   2780 				continue;
   2781 			}
   2782 
   2783 			notify_pam_dead(up);
   2784 
   2785 			up->ut_type = DEAD_PROCESS;
   2786 			up->ut_exit.e_termination = WTERMSIG(status);
   2787 			up->ut_exit.e_exit = WEXITSTATUS(status);
   2788 			(void) time(&up->ut_tv.tv_sec);
   2789 
   2790 			(void) pututxline(up);
   2791 			/*
   2792 			 * Now attempt to add to the end of the
   2793 			 * wtmp and wtmpx files.  Do not create
   2794 			 * if they don't already exist.
   2795 			 */
   2796 			updwtmpx(WTMPX, up);
   2797 
   2798 			break;
   2799 		}
   2800 	}
   2801 
   2802 	endutxent();
   2803 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
   2804 }
   2805 
   2806 /*
   2807  * prog_name() searches for the word or unix path name and
   2808  * returns a pointer to the last element of the pathname.
   2809  */
   2810 static char *
   2811 prog_name(char *string)
   2812 {
   2813 	char	*ptr, *ptr2;
   2814 	/* XXX - utmp - fix name length */
   2815 	static char word[_POSIX_LOGIN_NAME_MAX];
   2816 
   2817 	/*
   2818 	 * Search for the first word skipping leading spaces and tabs.
   2819 	 */
   2820 	while (*string == ' ' || *string == '\t')
   2821 		string++;
   2822 
   2823 	/*
   2824 	 * If the first non-space non-tab character is not one allowed in
   2825 	 * a word, return a pointer to a null string, otherwise parse the
   2826 	 * pathname.
   2827 	 */
   2828 	if (*string != '.' && *string != '/' && *string != '_' &&
   2829 	    (*string < 'a' || *string > 'z') &&
   2830 	    (*string < 'A' || * string > 'Z') &&
   2831 	    (*string < '0' || *string > '9'))
   2832 		return ("");
   2833 
   2834 	/*
   2835 	 * Parse the pathname looking forward for '/', ' ', '\t', '\n' or
   2836 	 * '\0'.  Each time a '/' is found, move "ptr" to one past the
   2837 	 * '/', thus when a ' ', '\t', '\n', or '\0' is found, "ptr" will
   2838 	 * point to the last element of the pathname.
   2839 	 */
   2840 	for (ptr = string; *string != ' ' && *string != '\t' &&
   2841 	    *string != '\n' && *string != '\0'; string++) {
   2842 		if (*string == '/')
   2843 			ptr = string+1;
   2844 	}
   2845 
   2846 	/*
   2847 	 * Copy out up to the size of the "ut_user" array into "word",
   2848 	 * null terminate it and return a pointer to it.
   2849 	 */
   2850 	/* XXX - utmp - fix name length */
   2851 	for (ptr2 = &word[0]; ptr2 < &word[_POSIX_LOGIN_NAME_MAX - 1] &&
   2852 	    ptr < string; /* CSTYLED */)
   2853 		*ptr2++ = *ptr++;
   2854 
   2855 	*ptr2 = '\0';
   2856 	return (&word[0]);
   2857 }
   2858 
   2859 
   2860 /*
   2861  * realcon() returns a nonzero value if there is a character device
   2862  * associated with SYSCON that has the same device number as CONSOLE.
   2863  */
   2864 static int
   2865 realcon()
   2866 {
   2867 	struct stat sconbuf, conbuf;
   2868 
   2869 	if (stat(SYSCON, &sconbuf) != -1 &&
   2870 	    stat(CONSOLE, &conbuf) != -1 &&
   2871 	    S_ISCHR(sconbuf.st_mode) &&
   2872 	    S_ISCHR(conbuf.st_mode) &&
   2873 	    sconbuf.st_rdev == conbuf.st_rdev) {
   2874 		return (1);
   2875 	} else {
   2876 		return (0);
   2877 	}
   2878 }
   2879 
   2880 
   2881 /*
   2882  * get_ioctl_syscon() retrieves the SYSCON settings from the IOCTLSYSCON file.
   2883  * Returns true if the IOCTLSYSCON file needs to be written (with
   2884  * write_ioctl_syscon() below)
   2885  */
   2886 static int
   2887 get_ioctl_syscon()
   2888 {
   2889 	FILE	*fp;
   2890 	unsigned int	iflags, oflags, cflags, lflags, ldisc, cc[18];
   2891 	int		i, valid_format = 0;
   2892 
   2893 	/*
   2894 	 * Read in the previous modes for SYSCON from IOCTLSYSCON.
   2895 	 */
   2896 	if ((fp = fopen(IOCTLSYSCON, "r")) == NULL) {
   2897 		stored_syscon_termios = dflt_termios;
   2898 		console(B_TRUE,
   2899 		    "warning:%s does not exist, default settings assumed\n",
   2900 		    IOCTLSYSCON);
   2901 	} else {
   2902 
   2903 	    i = fscanf(fp,
   2904 	    "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
   2905 		&iflags, &oflags, &cflags, &lflags,
   2906 		&cc[0], &cc[1], &cc[2], &cc[3], &cc[4], &cc[5], &cc[6],
   2907 		&cc[7], &cc[8], &cc[9], &cc[10], &cc[11], &cc[12], &cc[13],
   2908 		&cc[14], &cc[15], &cc[16], &cc[17]);
   2909 
   2910 	    if (i == 22) {
   2911 		stored_syscon_termios.c_iflag = iflags;
   2912 		stored_syscon_termios.c_oflag = oflags;
   2913 		stored_syscon_termios.c_cflag = cflags;
   2914 		stored_syscon_termios.c_lflag = lflags;
   2915 		for (i = 0; i < 18; i++)
   2916 			stored_syscon_termios.c_cc[i] = (char)cc[i];
   2917 		valid_format = 1;
   2918 	    } else if (i == 13) {
   2919 		rewind(fp);
   2920 		i = fscanf(fp, "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x",
   2921 		    &iflags, &oflags, &cflags, &lflags, &ldisc, &cc[0], &cc[1],
   2922 		    &cc[2], &cc[3], &cc[4], &cc[5], &cc[6], &cc[7]);
   2923 
   2924 		/*
   2925 		 * If the file is formatted properly, use the values to
   2926 		 * initialize the console terminal condition.
   2927 		 */
   2928 		stored_syscon_termios.c_iflag = (ushort_t)iflags;
   2929 		stored_syscon_termios.c_oflag = (ushort_t)oflags;
   2930 		stored_syscon_termios.c_cflag = (ushort_t)cflags;
   2931 		stored_syscon_termios.c_lflag = (ushort_t)lflags;
   2932 		for (i = 0; i < 8; i++)
   2933 			stored_syscon_termios.c_cc[i] = (char)cc[i];
   2934 		valid_format = 1;
   2935 	    }
   2936 	    (void) fclose(fp);
   2937 
   2938 	    /* If the file is badly formatted, use the default settings. */
   2939 	    if (!valid_format)
   2940 		stored_syscon_termios = dflt_termios;
   2941 	}
   2942 
   2943 	/* If the file had a bad format, rewrite it later. */
   2944 	return (!valid_format);
   2945 }
   2946 
   2947 
   2948 static void
   2949 write_ioctl_syscon()
   2950 {
   2951 	FILE *fp;
   2952 	int i;
   2953 
   2954 	(void) unlink(SYSCON);
   2955 	(void) link(SYSTTY, SYSCON);
   2956 	(void) umask(022);
   2957 	fp = fopen(IOCTLSYSCON, "w");
   2958 
   2959 	(void) fprintf(fp, "%x:%x:%x:%x:0", stored_syscon_termios.c_iflag,
   2960 	    stored_syscon_termios.c_oflag, stored_syscon_termios.c_cflag,
   2961 	    stored_syscon_termios.c_lflag);
   2962 	for (i = 0; i < 8; ++i)
   2963 		(void) fprintf(fp, ":%x", stored_syscon_termios.c_cc[i]);
   2964 	(void) putc('\n', fp);
   2965 
   2966 	(void) fflush(fp);
   2967 	(void) fsync(fileno(fp));
   2968 	(void) fclose(fp);
   2969 	(void) umask(cmask);
   2970 }
   2971 
   2972 
   2973 /*
   2974  * void console(boolean_t, char *, ...)
   2975  *   Outputs the requested message to the system console.  Note that the number
   2976  *   of arguments passed to console() should be determined by the print format.
   2977  *
   2978  *   The "prefix" parameter indicates whether or not "INIT: " should precede the
   2979  *   message.
   2980  *
   2981  *   To make sure we write to the console in a sane fashion, we use the modes
   2982  *   we keep in stored_syscon_termios (which we read out of /etc/ioctl.syscon).
   2983  *   Afterwards we restore whatever modes were already there.
   2984  */
   2985 /* PRINTFLIKE2 */
   2986 static void
   2987 console(boolean_t prefix, char *format, ...)
   2988 {
   2989 	char	outbuf[BUFSIZ];
   2990 	va_list	args;
   2991 	int fd, getret;
   2992 	struct termios old_syscon_termios;
   2993 	FILE *f;
   2994 
   2995 	/*
   2996 	 * We open SYSCON anew each time in case it has changed (see
   2997 	 * userinit()).
   2998 	 */
   2999 	if ((fd = open(SYSCON, O_RDWR | O_NOCTTY)) < 0 ||
   3000 	    (f = fdopen(fd, "r+")) == NULL) {
   3001 		if (prefix)
   3002 			syslog(LOG_WARNING, "INIT: ");
   3003 		va_start(args, format);
   3004 		vsyslog(LOG_WARNING, format, args);
   3005 		va_end(args);
   3006 		if (fd >= 0)
   3007 			(void) close(fd);
   3008 		return;
   3009 	}
   3010 	setbuf(f, &outbuf[0]);
   3011 
   3012 	getret = tcgetattr(fd, &old_syscon_termios);
   3013 	old_syscon_termios.c_cflag &= ~HUPCL;
   3014 	if (realcon())
   3015 		/* Don't overwrite cflag of real console. */
   3016 		stored_syscon_termios.c_cflag = old_syscon_termios.c_cflag;
   3017 
   3018 	stored_syscon_termios.c_cflag &= ~HUPCL;
   3019 
   3020 	(void) tcsetattr(fd, TCSANOW, &stored_syscon_termios);
   3021 
   3022 	if (prefix)
   3023 		(void) fprintf(f, "\nINIT: ");
   3024 	va_start(args, format);
   3025 	(void) vfprintf(f, format, args);
   3026 	va_end(args);
   3027 
   3028 	if (getret == 0)
   3029 		(void) tcsetattr(fd, TCSADRAIN, &old_syscon_termios);
   3030 
   3031 	(void) fclose(f);
   3032 }
   3033 
   3034 /*
   3035  * timer() is a substitute for sleep() which uses alarm() and pause().
   3036  */
   3037 static void
   3038 timer(int waitime)
   3039 {
   3040 	setimer(waitime);
   3041 	while (time_up == FALSE)
   3042 		(void) pause();
   3043 }
   3044 
   3045 static void
   3046 setimer(int timelimit)
   3047 {
   3048 	alarmclk();
   3049 	(void) alarm(timelimit);
   3050 	time_up = (timelimit ? FALSE : TRUE);
   3051 }
   3052 
   3053 /*
   3054  * Fails with
   3055  *   ENOMEM - out of memory
   3056  *   ECONNABORTED - repository connection broken
   3057  *   EPERM - permission denied
   3058  *   EACCES - backend access denied
   3059  *   EROFS - backend readonly
   3060  */
   3061 static int
   3062 get_or_add_startd(scf_instance_t *inst)
   3063 {
   3064 	scf_handle_t *h;
   3065 	scf_scope_t *scope = NULL;
   3066 	scf_service_t *svc = NULL;
   3067 	int ret = 0;
   3068 
   3069 	h = scf_instance_handle(inst);
   3070 
   3071 	if (scf_handle_decode_fmri(h, SCF_SERVICE_STARTD, NULL, NULL, inst,
   3072 	    NULL, NULL, SCF_DECODE_FMRI_EXACT) == 0)
   3073 		return (0);
   3074 
   3075 	switch (scf_error()) {
   3076 	case SCF_ERROR_CONNECTION_BROKEN:
   3077 		return (ECONNABORTED);
   3078 
   3079 	case SCF_ERROR_NOT_FOUND:
   3080 		break;
   3081 
   3082 	case SCF_ERROR_HANDLE_MISMATCH:
   3083 	case SCF_ERROR_INVALID_ARGUMENT:
   3084 	case SCF_ERROR_CONSTRAINT_VIOLATED:
   3085 	default:
   3086 		bad_error("scf_handle_decode_fmri", scf_error());
   3087 	}
   3088 
   3089 	/* Make sure we're right, since we're adding piece-by-piece. */
   3090 	assert(strcmp(SCF_SERVICE_STARTD,
   3091 	    "svc:/system/svc/restarter:default") == 0);
   3092 
   3093 	if ((scope = scf_scope_create(h)) == NULL ||
   3094 	    (svc = scf_service_create(h)) == NULL) {
   3095 		ret = ENOMEM;
   3096 		goto out;
   3097 	}
   3098 
   3099 get_scope:
   3100 	if (scf_handle_get_scope(h, SCF_SCOPE_LOCAL, scope) != 0) {
   3101 		switch (scf_error()) {
   3102 		case SCF_ERROR_CONNECTION_BROKEN:
   3103 			ret = ECONNABORTED;
   3104 			goto out;
   3105 
   3106 		case SCF_ERROR_NOT_FOUND:
   3107 			(void) fputs(gettext(
   3108 			    "smf(5) repository missing local scope.\n"),
   3109 			    stderr);
   3110 			exit(1);
   3111 			/* NOTREACHED */
   3112 
   3113 		case SCF_ERROR_HANDLE_MISMATCH:
   3114 		case SCF_ERROR_INVALID_ARGUMENT:
   3115 		default:
   3116 			bad_error("scf_handle_get_scope", scf_error());
   3117 		}
   3118 	}
   3119 
   3120 get_svc:
   3121 	if (scf_scope_get_service(scope, "system/svc/restarter", svc) != 0) {
   3122 		switch (scf_error()) {
   3123 		case SCF_ERROR_CONNECTION_BROKEN:
   3124 			ret = ECONNABORTED;
   3125 			goto out;
   3126 
   3127 		case SCF_ERROR_DELETED:
   3128 			goto get_scope;
   3129 
   3130 		case SCF_ERROR_NOT_FOUND:
   3131 			break;
   3132 
   3133 		case SCF_ERROR_HANDLE_MISMATCH:
   3134 		case SCF_ERROR_INVALID_ARGUMENT:
   3135 		case SCF_ERROR_NOT_SET:
   3136 		default:
   3137 			bad_error("scf_scope_get_service", scf_error());
   3138 		}
   3139 
   3140 add_svc:
   3141 		if (scf_scope_add_service(scope, "system/svc/restarter", svc) !=
   3142 		    0) {
   3143 			switch (scf_error()) {
   3144 			case SCF_ERROR_CONNECTION_BROKEN:
   3145 				ret = ECONNABORTED;
   3146 				goto out;
   3147 
   3148 			case SCF_ERROR_EXISTS:
   3149 				goto get_svc;
   3150 
   3151 			case SCF_ERROR_PERMISSION_DENIED:
   3152 				ret = EPERM;
   3153 				goto out;
   3154 
   3155 			case SCF_ERROR_BACKEND_ACCESS:
   3156 				ret = EACCES;
   3157 				goto out;
   3158 
   3159 			case SCF_ERROR_BACKEND_READONLY:
   3160 				ret = EROFS;
   3161 				goto out;
   3162 
   3163 			case SCF_ERROR_HANDLE_MISMATCH:
   3164 			case SCF_ERROR_INVALID_ARGUMENT:
   3165 			case SCF_ERROR_NOT_SET:
   3166 			default:
   3167 				bad_error("scf_scope_add_service", scf_error());
   3168 			}
   3169 		}
   3170 	}
   3171 
   3172 get_inst:
   3173 	if (scf_service_get_instance(svc, "default", inst) != 0) {
   3174 		switch (scf_error()) {
   3175 		case SCF_ERROR_CONNECTION_BROKEN:
   3176 			ret = ECONNABORTED;
   3177 			goto out;
   3178 
   3179 		case SCF_ERROR_DELETED:
   3180 			goto add_svc;
   3181 
   3182 		case SCF_ERROR_NOT_FOUND:
   3183 			break;
   3184 
   3185 		case SCF_ERROR_HANDLE_MISMATCH:
   3186 		case SCF_ERROR_INVALID_ARGUMENT:
   3187 		case SCF_ERROR_NOT_SET:
   3188 		default:
   3189 			bad_error("scf_service_get_instance", scf_error());
   3190 		}
   3191 
   3192 		if (scf_service_add_instance(svc, "default", inst) !=
   3193 		    0) {
   3194 			switch (scf_error()) {
   3195 			case SCF_ERROR_CONNECTION_BROKEN:
   3196 				ret = ECONNABORTED;
   3197 				goto out;
   3198 
   3199 			case SCF_ERROR_DELETED:
   3200 				goto add_svc;
   3201 
   3202 			case SCF_ERROR_EXISTS:
   3203 				goto get_inst;
   3204 
   3205 			case SCF_ERROR_PERMISSION_DENIED:
   3206 				ret = EPERM;
   3207 				goto out;
   3208 
   3209 			case SCF_ERROR_BACKEND_ACCESS:
   3210 				ret = EACCES;
   3211 				goto out;
   3212 
   3213 			case SCF_ERROR_BACKEND_READONLY:
   3214 				ret = EROFS;
   3215 				goto out;
   3216 
   3217 			case SCF_ERROR_HANDLE_MISMATCH:
   3218 			case SCF_ERROR_INVALID_ARGUMENT:
   3219 			case SCF_ERROR_NOT_SET:
   3220 			default:
   3221 				bad_error("scf_service_add_instance",
   3222 				    scf_error());
   3223 			}
   3224 		}
   3225 	}
   3226 
   3227 	ret = 0;
   3228 
   3229 out:
   3230 	scf_service_destroy(svc);
   3231 	scf_scope_destroy(scope);
   3232 	return (ret);
   3233 }
   3234 
   3235 /*
   3236  * Fails with
   3237  *   ECONNABORTED - repository connection broken
   3238  *   ECANCELED - the transaction's property group was deleted
   3239  */
   3240 static int
   3241 transaction_add_set(scf_transaction_t *tx, scf_transaction_entry_t *ent,
   3242     const char *pname, scf_type_t type)
   3243 {
   3244 change_type:
   3245 	if (scf_transaction_property_change_type(tx, ent, pname, type) == 0)
   3246 		return (0);
   3247 
   3248 	switch (scf_error()) {
   3249 	case SCF_ERROR_CONNECTION_BROKEN:
   3250 		return (ECONNABORTED);
   3251 
   3252 	case SCF_ERROR_DELETED:
   3253 		return (ECANCELED);
   3254 
   3255 	case SCF_ERROR_NOT_FOUND:
   3256 		goto new;
   3257 
   3258 	case SCF_ERROR_HANDLE_MISMATCH:
   3259 	case SCF_ERROR_INVALID_ARGUMENT:
   3260 	case SCF_ERROR_NOT_BOUND:
   3261 	case SCF_ERROR_NOT_SET:
   3262 	default:
   3263 		bad_error("scf_transaction_property_change_type", scf_error());
   3264 	}
   3265 
   3266 new:
   3267 	if (scf_transaction_property_new(tx, ent, pname, type) == 0)
   3268 		return (0);
   3269 
   3270 	switch (scf_error()) {
   3271 	case SCF_ERROR_CONNECTION_BROKEN:
   3272 		return (ECONNABORTED);
   3273 
   3274 	case SCF_ERROR_DELETED:
   3275 		return (ECANCELED);
   3276 
   3277 	case SCF_ERROR_EXISTS:
   3278 		goto change_type;
   3279 
   3280 	case SCF_ERROR_HANDLE_MISMATCH:
   3281 	case SCF_ERROR_INVALID_ARGUMENT:
   3282 	case SCF_ERROR_NOT_BOUND:
   3283 	case SCF_ERROR_NOT_SET:
   3284 	default:
   3285 		bad_error("scf_transaction_property_new", scf_error());
   3286 		/* NOTREACHED */
   3287 	}
   3288 }
   3289 
   3290 static void
   3291 scferr(void)
   3292 {
   3293 	switch (scf_error()) {
   3294 	case SCF_ERROR_NO_MEMORY:
   3295 		console(B_TRUE, gettext("Out of memory.\n"));
   3296 		break;
   3297 
   3298 	case SCF_ERROR_CONNECTION_BROKEN:
   3299 		console(B_TRUE, gettext(
   3300 		    "Connection to smf(5) repository server broken.\n"));
   3301 		break;
   3302 
   3303 	case SCF_ERROR_NO_RESOURCES:
   3304 		console(B_TRUE, gettext(
   3305 		    "smf(5) repository server is out of memory.\n"));
   3306 		break;
   3307 
   3308 	case SCF_ERROR_PERMISSION_DENIED:
   3309 		console(B_TRUE, gettext("Insufficient privileges.\n"));
   3310 		break;
   3311 
   3312 	default:
   3313 		console(B_TRUE, gettext("libscf error: %s\n"),
   3314 		    scf_strerror(scf_error()));
   3315 	}
   3316 }
   3317 
   3318 static void
   3319 lscf_set_runlevel(char rl)
   3320 {
   3321 	scf_handle_t *h;
   3322 	scf_instance_t *inst = NULL;
   3323 	scf_propertygroup_t *pg = NULL;
   3324 	scf_transaction_t *tx = NULL;
   3325 	scf_transaction_entry_t *ent = NULL;
   3326 	scf_value_t *val = NULL;
   3327 	char buf[2];
   3328 	int r;
   3329 
   3330 	h = scf_handle_create(SCF_VERSION);
   3331 	if (h == NULL) {
   3332 		scferr();
   3333 		return;
   3334 	}
   3335 
   3336 	if (scf_handle_bind(h) != 0) {
   3337 		switch (scf_error()) {
   3338 		case SCF_ERROR_NO_SERVER:
   3339 			console(B_TRUE,
   3340 			    gettext("smf(5) repository server not running.\n"));
   3341 			goto bail;
   3342 
   3343 		default:
   3344 			scferr();
   3345 			goto bail;
   3346 		}
   3347 	}
   3348 
   3349 	if ((inst = scf_instance_create(h)) == NULL ||
   3350 	    (pg = scf_pg_create(h)) == NULL ||
   3351 	    (val = scf_value_create(h)) == NULL ||
   3352 	    (tx = scf_transaction_create(h)) == NULL ||
   3353 	    (ent = scf_entry_create(h)) == NULL) {
   3354 		scferr();
   3355 		goto bail;
   3356 	}
   3357 
   3358 get_inst:
   3359 	r = get_or_add_startd(inst);
   3360 	switch (r) {
   3361 	case 0:
   3362 		break;
   3363 
   3364 	case ENOMEM:
   3365 	case ECONNABORTED:
   3366 	case EPERM:
   3367 	case EACCES:
   3368 	case EROFS:
   3369 		scferr();
   3370 		goto bail;
   3371 	default:
   3372 		bad_error("get_or_add_startd", r);
   3373 	}
   3374 
   3375 get_pg:
   3376 	if (scf_instance_get_pg(inst, SCF_PG_OPTIONS_OVR, pg) != 0) {
   3377 		switch (scf_error()) {
   3378 		case SCF_ERROR_CONNECTION_BROKEN:
   3379 			scferr();
   3380 			goto bail;
   3381 
   3382 		case SCF_ERROR_DELETED:
   3383 			goto get_inst;
   3384 
   3385 		case SCF_ERROR_NOT_FOUND:
   3386 			break;
   3387 
   3388 		case SCF_ERROR_HANDLE_MISMATCH:
   3389 		case SCF_ERROR_INVALID_ARGUMENT:
   3390 		case SCF_ERROR_NOT_SET:
   3391 		default:
   3392 			bad_error("scf_instance_get_pg", scf_error());
   3393 		}
   3394 
   3395 add_pg:
   3396 		if (scf_instance_add_pg(inst, SCF_PG_OPTIONS_OVR,
   3397 		    SCF_PG_OPTIONS_OVR_TYPE, SCF_PG_OPTIONS_OVR_FLAGS, pg) !=
   3398 		    0) {
   3399 			switch (scf_error()) {
   3400 			case SCF_ERROR_CONNECTION_BROKEN:
   3401 			case SCF_ERROR_PERMISSION_DENIED:
   3402 			case SCF_ERROR_BACKEND_ACCESS:
   3403 				scferr();
   3404 				goto bail;
   3405 
   3406 			case SCF_ERROR_DELETED:
   3407 				goto get_inst;
   3408 
   3409 			case SCF_ERROR_EXISTS:
   3410 				goto get_pg;
   3411 
   3412 			case SCF_ERROR_HANDLE_MISMATCH:
   3413 			case SCF_ERROR_INVALID_ARGUMENT:
   3414 			case SCF_ERROR_NOT_SET:
   3415 			default:
   3416 				bad_error("scf_instance_add_pg", scf_error());
   3417 			}
   3418 		}
   3419 	}
   3420 
   3421 	buf[0] = rl;
   3422 	buf[1] = '\0';
   3423 	r = scf_value_set_astring(val, buf);
   3424 	assert(r == 0);
   3425 
   3426 	for (;;) {
   3427 		if (scf_transaction_start(tx, pg) != 0) {
   3428 			switch (scf_error()) {
   3429 			case SCF_ERROR_CONNECTION_BROKEN:
   3430 			case SCF_ERROR_PERMISSION_DENIED:
   3431 			case SCF_ERROR_BACKEND_ACCESS:
   3432 				scferr();
   3433 				goto bail;
   3434 
   3435 			case SCF_ERROR_DELETED:
   3436 				goto add_pg;
   3437 
   3438 			case SCF_ERROR_HANDLE_MISMATCH:
   3439 			case SCF_ERROR_NOT_BOUND:
   3440 			case SCF_ERROR_IN_USE:
   3441 			case SCF_ERROR_NOT_SET:
   3442 			default:
   3443 				bad_error("scf_transaction_start", scf_error());
   3444 			}
   3445 		}
   3446 
   3447 		r = transaction_add_set(tx, ent, "runlevel", SCF_TYPE_ASTRING);
   3448 		switch (r) {
   3449 		case 0:
   3450 			break;
   3451 
   3452 		case ECONNABORTED:
   3453 			scferr();
   3454 			goto bail;
   3455 
   3456 		case ECANCELED:
   3457 			scf_transaction_reset(tx);
   3458 			goto add_pg;
   3459 
   3460 		default:
   3461 			bad_error("transaction_add_set", r);
   3462 		}
   3463 
   3464 		r = scf_entry_add_value(ent, val);
   3465 		assert(r == 0);
   3466 
   3467 		r = scf_transaction_commit(tx);
   3468 		if (r == 1)
   3469 			break;
   3470 
   3471 		if (r != 0) {
   3472 			switch (scf_error()) {
   3473 			case SCF_ERROR_CONNECTION_BROKEN:
   3474 			case SCF_ERROR_PERMISSION_DENIED:
   3475 			case SCF_ERROR_BACKEND_ACCESS:
   3476 			case SCF_ERROR_BACKEND_READONLY:
   3477 				scferr();
   3478 				goto bail;
   3479 
   3480 			case SCF_ERROR_DELETED:
   3481 				scf_transaction_reset(tx);
   3482 				goto add_pg;
   3483 
   3484 			case SCF_ERROR_INVALID_ARGUMENT:
   3485 			case SCF_ERROR_NOT_BOUND:
   3486 			case SCF_ERROR_NOT_SET:
   3487 			default:
   3488 				bad_error("scf_transaction_commit",
   3489 				    scf_error());
   3490 			}
   3491 		}
   3492 
   3493 		scf_transaction_reset(tx);
   3494 		(void) scf_pg_update(pg);
   3495 	}
   3496 
   3497 bail:
   3498 	scf_transaction_destroy(tx);
   3499 	scf_entry_destroy(ent);
   3500 	scf_value_destroy(val);
   3501 	scf_pg_destroy(pg);
   3502 	scf_instance_destroy(inst);
   3503 
   3504 	(void) scf_handle_unbind(h);
   3505 	scf_handle_destroy(h);
   3506 }
   3507 
   3508 /*
   3509  * Function to handle requests from users to main init running as process 1.
   3510  */
   3511 static void
   3512 userinit(int argc, char **argv)
   3513 {
   3514 	FILE	*fp;
   3515 	char	*ln;
   3516 	int	init_signal;
   3517 	struct stat	sconbuf, conbuf;
   3518 	const char *usage_msg = "Usage: init [0123456SsQqabc]\n";
   3519 
   3520 	/*
   3521 	 * We are a user invoked init.  Is there an argument and is it
   3522 	 * a single character?  If not, print usage message and quit.
   3523 	 */
   3524 	if (argc != 2 || argv[1][1] != '\0') {
   3525 		(void) fprintf(stderr, usage_msg);
   3526 		exit(0);
   3527 	}
   3528 
   3529 	if ((init_signal = lvlname_to_state((char)argv[1][0])) == -1) {
   3530 		(void) fprintf(stderr, usage_msg);
   3531 		(void) audit_put_record(ADT_FAILURE, ADT_FAIL_VALUE_BAD_CMD,
   3532 		    argv[1]);
   3533 		exit(1);
   3534 	}
   3535 
   3536 	if (init_signal == SINGLE_USER) {
   3537 		/*
   3538 		 * Make sure this process is talking to a legal tty line
   3539 		 * and that /dev/syscon is linked to this line.
   3540 		 */
   3541 		ln = ttyname(0);	/* Get the name of tty */
   3542 		if (ln == NULL) {
   3543 			(void) fprintf(stderr,
   3544 			    "Standard input not a tty line\n");
   3545 			(void) audit_put_record(ADT_FAILURE,
   3546 			    ADT_FAIL_VALUE_BAD_TTY, argv[1]);
   3547 			exit(1);
   3548 		}
   3549 
   3550 		if ((stat(ln, &sconbuf) != -1) &&
   3551 		    (stat(SYSCON, &conbuf) == -1 ||
   3552 		    sconbuf.st_rdev != conbuf.st_rdev)) {
   3553 			/*
   3554 			 * /dev/syscon needs to change.
   3555 			 * Unlink /dev/syscon and relink it to the current line.
   3556 			 */
   3557 			if (lstat(SYSCON, &conbuf) != -1 &&
   3558 			    unlink(SYSCON) == FAILURE) {
   3559 				perror("Can't unlink /dev/syscon");
   3560 				(void) fprintf(stderr,
   3561 				    "Run command on the system console.\n");
   3562 				(void) audit_put_record(ADT_FAILURE,
   3563 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
   3564 				exit(1);
   3565 			}
   3566 			if (symlink(ln, SYSCON) == FAILURE) {
   3567 				(void) fprintf(stderr,
   3568 				    "Can't symlink /dev/syscon to %s: %s", ln,
   3569 				    strerror(errno));
   3570 
   3571 				/* Try to leave a syscon */
   3572 				(void) link(SYSTTY, SYSCON);
   3573 				(void) audit_put_record(ADT_FAILURE,
   3574 				    ADT_FAIL_VALUE_PROGRAM, argv[1]);
   3575 				exit(1);
   3576 			}
   3577 
   3578 			/*
   3579 			 * Try to leave a message on system console saying where
   3580 			 * /dev/syscon is currently connected.
   3581 			 */
   3582 			if ((fp = fopen(SYSTTY, "r+")) != NULL) {
   3583 				(void) fprintf(fp,
   3584 				    "\n****	SYSCON CHANGED TO %s	****\n",
   3585 				    ln);
   3586 				(void) fclose(fp);
   3587 			}
   3588 		}
   3589 	}
   3590 
   3591 	update_boot_archive(init_signal);
   3592 
   3593 	(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
   3594 
   3595 	/*
   3596 	 * Signal init; init will take care of telling svc.startd.
   3597 	 */
   3598 	if (kill(init_pid, init_signal) == FAILURE) {
   3599 		(void) fprintf(stderr, "Must be super-user\n");
   3600 		(void) audit_put_record(ADT_FAILURE,
   3601 		    ADT_FAIL_VALUE_AUTH, argv[1]);
   3602 		exit(1);
   3603 	}
   3604 
   3605 	exit(0);
   3606 }
   3607 
   3608 
   3609 #define	DELTA	25	/* Number of pidlist elements to allocate at a time */
   3610 
   3611 /* ARGSUSED */
   3612 void
   3613 sigpoll(int n)
   3614 {
   3615 	struct pidrec prec;
   3616 	struct pidrec *p = &prec;
   3617 	struct pidlist *plp;
   3618 	struct pidlist *tp, *savetp;
   3619 	int i;
   3620 
   3621 	if (Pfd < 0) {
   3622 		return;
   3623 	}
   3624 
   3625 	for (;;) {
   3626 		/*
   3627 		 * Important Note: Either read will really fail (in which case
   3628 		 * return is all we can do) or will get EAGAIN (Pfd was opened
   3629 		 * O_NDELAY), in which case we also want to return.
   3630 		 * Always return from here!
   3631 		 */
   3632 		if (read(Pfd, p, sizeof (struct pidrec)) !=
   3633 						sizeof (struct pidrec)) {
   3634 			return;
   3635 		}
   3636 		switch (p->pd_type) {
   3637 
   3638 		case ADDPID:
   3639 			/*
   3640 			 * New "godchild", add to list.
   3641 			 */
   3642 			if (Plfree == NULL) {
   3643 				plp = (struct pidlist *)calloc(DELTA,
   3644 				    sizeof (struct pidlist));
   3645 				if (plp == NULL) {
   3646 					/* Can't save pid */
   3647 					break;
   3648 				}
   3649 				/*
   3650 				 * Point at 2nd record allocated, we'll use plp.
   3651 				 */
   3652 				tp = plp + 1;
   3653 				/*
   3654 				 * Link them into a chain.
   3655 				 */
   3656 				Plfree = tp;
   3657 				for (i = 0; i < DELTA - 2; i++) {
   3658 					tp->pl_next = tp + 1;
   3659 					tp++;
   3660 				}
   3661 			} else {
   3662 				plp = Plfree;
   3663 				Plfree = plp->pl_next;
   3664 			}
   3665 			plp->pl_pid = p->pd_pid;
   3666 			plp->pl_dflag = 0;
   3667 			plp->pl_next = NULL;
   3668 			/*
   3669 			 * Note - pid list is kept in increasing order of pids.
   3670 			 */
   3671 			if (Plhead == NULL) {
   3672 				Plhead = plp;
   3673 				/* Back up to read next record */
   3674 				break;
   3675 			} else {
   3676 				savetp = tp = Plhead;
   3677 				while (tp) {
   3678 					if (plp->pl_pid > tp->pl_pid) {
   3679 						savetp = tp;
   3680 						tp = tp->pl_next;
   3681 						continue;
   3682 					} else if (plp->pl_pid < tp->pl_pid) {
   3683 						if (tp == Plhead) {
   3684 							plp->pl_next = Plhead;
   3685 							Plhead = plp;
   3686 						} else {
   3687 							plp->pl_next =
   3688 							    savetp->pl_next;
   3689 							savetp->pl_next = plp;
   3690 						}
   3691 						break;
   3692 					} else {
   3693 						/* Already in list! */
   3694 						plp->pl_next = Plfree;
   3695 						Plfree = plp;
   3696 						break;
   3697 					}
   3698 				}
   3699 				if (tp == NULL) {
   3700 					/* Add to end of list */
   3701 					savetp->pl_next = plp;
   3702 				}
   3703 			}
   3704 			/* Back up to read next record. */
   3705 			break;
   3706 
   3707 		case REMPID:
   3708 			/*
   3709 			 * This one was handled by someone else,
   3710 			 * purge it from the list.
   3711 			 */
   3712 			if (Plhead == NULL) {
   3713 				/* Back up to read next record. */
   3714 				break;
   3715 			}
   3716 			savetp = tp = Plhead;
   3717 			while (tp) {
   3718 				if (p->pd_pid > tp->pl_pid) {
   3719 					/* Keep on looking. */
   3720 					savetp = tp;
   3721 					tp = tp->pl_next;
   3722 					continue;
   3723 				} else if (p->pd_pid < tp->pl_pid) {
   3724 					/* Not in list. */
   3725 					break;
   3726 				} else {
   3727 					/* Found it. */
   3728 					if (tp == Plhead)
   3729 						Plhead = tp->pl_next;
   3730 					else
   3731 						savetp->pl_next = tp->pl_next;
   3732 					tp->pl_next = Plfree;
   3733 					Plfree = tp;
   3734 					break;
   3735 				}
   3736 			}
   3737 			/* Back up to read next record. */
   3738 			break;
   3739 		default:
   3740 			console(B_TRUE, "Bad message on initpipe\n");
   3741 			break;
   3742 		}
   3743 	}
   3744 }
   3745 
   3746 
   3747 static void
   3748 cleanaux()
   3749 {
   3750 	struct pidlist *savep, *p;
   3751 	pid_t	pid;
   3752 	short	status;
   3753 
   3754 	(void) sighold(SIGCLD);
   3755 	Gchild = 0;	/* Note - Safe to do this here since no SIGCLDs */
   3756 	(void) sighold(SIGPOLL);
   3757 	savep = p = Plhead;
   3758 	while (p) {
   3759 		if (p->pl_dflag) {
   3760 			/*
   3761 			 * Found an entry to delete,
   3762 			 * remove it from list first.
   3763 			 */
   3764 			pid = p->pl_pid;
   3765 			status = p->pl_exit;
   3766 			if (p == Plhead) {
   3767 				Plhead = p->pl_next;
   3768 				p->pl_next = Plfree;
   3769 				Plfree = p;
   3770 				savep = p = Plhead;
   3771 			} else {
   3772 				savep->pl_next = p->pl_next;
   3773 				p->pl_next = Plfree;
   3774 				Plfree = p;
   3775 				p = savep->pl_next;
   3776 			}
   3777 			clearent(pid, status);
   3778 			continue;
   3779 		}
   3780 		savep = p;
   3781 		p = p->pl_next;
   3782 	}
   3783 	(void) sigrelse(SIGPOLL);
   3784 	(void) sigrelse(SIGCLD);
   3785 }
   3786 
   3787 
   3788 /*
   3789  * /etc/inittab has more entries and we have run out of room in the proc_table
   3790  * array. Double the size of proc_table to accomodate the extra entries.
   3791  */
   3792 static void
   3793 increase_proc_table_size()
   3794 {
   3795 	sigset_t block, unblock;
   3796 	void *ptr;
   3797 	size_t delta = num_proc * sizeof (struct PROC_TABLE);
   3798 
   3799 
   3800 	/*
   3801 	 * Block signals for realloc.
   3802 	 */
   3803 	(void) sigfillset(&block);
   3804 	(void) sigprocmask(SIG_BLOCK, &block, &unblock);
   3805 
   3806 
   3807 	/*
   3808 	 * On failure we just return because callers of this function check
   3809 	 * for failure.
   3810 	 */
   3811 	do
   3812 		ptr = realloc(g_state, g_state_sz + delta);
   3813 	while (ptr == NULL && errno == EAGAIN);
   3814 
   3815 	if (ptr != NULL) {
   3816 		/* ensure that the new part is initialized to zero */
   3817 		bzero((caddr_t)ptr + g_state_sz, delta);
   3818 
   3819 		g_state = ptr;
   3820 		g_state_sz += delta;
   3821 		num_proc <<= 1;
   3822 	}
   3823 
   3824 
   3825 	/* unblock our signals before returning */
   3826 	(void) sigprocmask(SIG_SETMASK, &unblock, NULL);
   3827 }
   3828 
   3829 
   3830 
   3831 /*
   3832  * Sanity check g_state.
   3833  */
   3834 static int
   3835 st_sane()
   3836 {
   3837 	int i;
   3838 	struct PROC_TABLE *ptp;
   3839 
   3840 
   3841 	/* Note: cur_state is encoded as a signal number */
   3842 	if (cur_state < 1 || cur_state == 9 || cur_state > 13)
   3843 		return (0);
   3844 
   3845 	/* Check num_proc */
   3846 	if (g_state_sz != sizeof (struct init_state) + (num_proc - 1) *
   3847 	    sizeof (struct PROC_TABLE))
   3848 		return (0);
   3849 
   3850 	/* Check proc_table */
   3851 	for (i = 0, ptp = proc_table; i < num_proc; ++i, ++ptp) {
   3852 		/* skip unoccupied entries */
   3853 		if (!(ptp->p_flags & OCCUPIED))
   3854 			continue;
   3855 
   3856 		/* p_flags has no bits outside of PF_MASK */
   3857 		if (ptp->p_flags & ~(PF_MASK))
   3858 			return (0);
   3859 
   3860 		/* 5 <= pid <= MAXPID */
   3861 		if (ptp->p_pid < 5 || ptp->p_pid > MAXPID)
   3862 			return (0);
   3863 
   3864 		/* p_count >= 0 */
   3865 		if (ptp->p_count < 0)
   3866 			return (0);
   3867 
   3868 		/* p_time >= 0 */
   3869 		if (ptp->p_time < 0)
   3870 			return (0);
   3871 	}
   3872 
   3873 	return (1);
   3874 }
   3875 
   3876 /*
   3877  * Initialize our state.
   3878  *
   3879  * If the system just booted, then init_state_file, which is located on an
   3880  * everpresent tmpfs filesystem, should not exist.
   3881  *
   3882  * If we were restarted, then init_state_file should exist, in
   3883  * which case we'll read it in, sanity check it, and use it.
   3884  *
   3885  * Note: You can't call console() until proc_table is ready.
   3886  */
   3887 void
   3888 st_init()
   3889 {
   3890 	struct stat stb;
   3891 	int ret, st_fd, insane = 0;
   3892 	size_t to_be_read;
   3893 	char *ptr;
   3894 
   3895 
   3896 	booting = 1;
   3897 
   3898 	do {
   3899 		/*
   3900 		 * If we can exclusively create the file, then we're the
   3901 		 * initial invocation of init(1M).
   3902 		 */
   3903 		st_fd = open(init_state_file, O_RDWR | O_CREAT | O_EXCL,
   3904 		    S_IRUSR | S_IWUSR);
   3905 	} while (st_fd == -1 && errno == EINTR);
   3906 	if (st_fd != -1)
   3907 		goto new_state;
   3908 
   3909 	booting = 0;
   3910 
   3911 	do {
   3912 		st_fd = open(init_state_file, O_RDWR, S_IRUSR | S_IWUSR);
   3913 	} while (st_fd == -1 && errno == EINTR);
   3914 	if (st_fd == -1)
   3915 		goto new_state;
   3916 
   3917 	/* Get the size of the file. */
   3918 	do
   3919 		ret = fstat(st_fd, &stb);
   3920 	while (ret == -1 && errno == EINTR);
   3921 	if (ret == -1)
   3922 		goto new_state;
   3923 
   3924 	do
   3925 		g_state = malloc(stb.st_size);
   3926 	while (g_state == NULL && errno == EAGAIN);
   3927 	if (g_state == NULL)
   3928 		goto new_state;
   3929 
   3930 	to_be_read = stb.st_size;
   3931 	ptr = (char *)g_state;
   3932 	while (to_be_read > 0) {
   3933 		ssize_t read_ret;
   3934 
   3935 		read_ret = read(st_fd, ptr, to_be_read);
   3936 		if (read_ret < 0) {
   3937 			if (errno == EINTR)
   3938 				continue;
   3939 
   3940 			goto new_state;
   3941 		}
   3942 
   3943 		to_be_read -= read_ret;
   3944 		ptr += read_ret;
   3945 	}
   3946 
   3947 	(void) close(st_fd);
   3948 
   3949 	g_state_sz = stb.st_size;
   3950 
   3951 	if (st_sane()) {
   3952 		console(B_TRUE, "Restarting.\n");
   3953 		return;
   3954 	}
   3955 
   3956 	insane = 1;
   3957 
   3958 new_state:
   3959 	if (st_fd >= 0)
   3960 		(void) close(st_fd);
   3961 	else
   3962 		(void) unlink(init_state_file);
   3963 
   3964 	if (g_state != NULL)
   3965 		free(g_state);
   3966 
   3967 	/* Something went wrong, so allocate new state. */
   3968 	g_state_sz = sizeof (struct init_state) +
   3969 	    ((init_num_proc - 1) * sizeof (struct PROC_TABLE));
   3970 	do
   3971 		g_state = calloc(1, g_state_sz);
   3972 	while (g_state == NULL && errno == EAGAIN);
   3973 	if (g_state == NULL) {
   3974 		/* Fatal error! */
   3975 		exit(errno);
   3976 	}
   3977 
   3978 	g_state->ist_runlevel = -1;
   3979 	num_proc = init_num_proc;
   3980 
   3981 	if (!booting) {
   3982 		console(B_TRUE, "Restarting.\n");
   3983 
   3984 		/* Overwrite the bad state file. */
   3985 		st_write();
   3986 
   3987 		if (!insane) {
   3988 			console(B_TRUE,
   3989 			    "Error accessing persistent state file `%s'.  "
   3990 			    "Ignored.\n", init_state_file);
   3991 		} else {
   3992 			console(B_TRUE,
   3993 			    "Persistent state file `%s' is invalid and was "
   3994 			    "ignored.\n", init_state_file);
   3995 		}
   3996 	}
   3997 }
   3998 
   3999 /*
   4000  * Write g_state out to the state file.
   4001  */
   4002 void
   4003 st_write()
   4004 {
   4005 	static int complained = 0;
   4006 
   4007 	int st_fd;
   4008 	char *cp;
   4009 	size_t sz;
   4010 	ssize_t ret;
   4011 
   4012 
   4013 	do {
   4014 		st_fd = open(init_next_state_file,
   4015 		    O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
   4016 	} while (st_fd < 0 && errno == EINTR);
   4017 	if (st_fd < 0)
   4018 		goto err;
   4019 
   4020 	cp = (char *)g_state;
   4021 	sz = g_state_sz;
   4022 	while (sz > 0) {
   4023 		ret = write(st_fd, cp, sz);
   4024 		if (ret < 0) {
   4025 			if (errno == EINTR)
   4026 				continue;
   4027 
   4028 			goto err;
   4029 		}
   4030 
   4031 		sz -= ret;
   4032 		cp += ret;
   4033 	}
   4034 
   4035 	(void) close(st_fd);
   4036 	st_fd = -1;
   4037 	if (rename(init_next_state_file, init_state_file)) {
   4038 		(void) unlink(init_next_state_file);
   4039 		goto err;
   4040 	}
   4041 	complained = 0;
   4042 
   4043 	return;
   4044 
   4045 err:
   4046 	if (st_fd >= 0)
   4047 		(void) close(st_fd);
   4048 
   4049 	if (!booting && !complained) {
   4050 		/*
   4051 		 * Only complain after the filesystem should have come up.
   4052 		 * And only do it once so we don't loop between console()
   4053 		 * & efork().
   4054 		 */
   4055 		complained = 1;
   4056 		if (st_fd)
   4057 			console(B_TRUE, "Couldn't write persistent state "
   4058 			    "file `%s'.\n", init_state_file);
   4059 		else
   4060 			console(B_TRUE, "Couldn't move persistent state "
   4061 			    "file `%s' to `%s'.\n", init_next_state_file,
   4062 			    init_state_file);
   4063 	}
   4064 }
   4065 
   4066 /*
   4067  * Create a contract with these parameters.
   4068  */
   4069 static int
   4070 contract_make_template(uint_t info, uint_t critical, uint_t fatal,
   4071     uint64_t cookie)
   4072 {
   4073 	int fd, err;
   4074 
   4075 	char *ioctl_tset_emsg =
   4076 	    "Couldn't set \"%s\" contract template parameter: %s.\n";
   4077 
   4078 	do
   4079 		fd = open64(CTFS_ROOT "/process/template", O_RDWR);
   4080 	while (fd < 0 && errno == EINTR);
   4081 	if (fd < 0) {
   4082 		console(B_TRUE, "Couldn't create process template: %s.\n",
   4083 		    strerror(errno));
   4084 		return (-1);
   4085 	}
   4086 
   4087 	if (err = ct_pr_tmpl_set_param(fd, CT_PR_INHERIT | CT_PR_REGENT))
   4088 		console(B_TRUE, "Contract set template inherit, regent "
   4089 		    "failed: %s.\n", strerror(err));
   4090 
   4091 	/*
   4092 	 * These errors result in a misconfigured template, which is better
   4093 	 * than no template at all, so warn but don't abort.
   4094 	 */
   4095 	if (err = ct_tmpl_set_informative(fd, info))
   4096 		console(B_TRUE, ioctl_tset_emsg, "informative", strerror(err));
   4097 
   4098 	if (err = ct_tmpl_set_critical(fd, critical))
   4099 		console(B_TRUE, ioctl_tset_emsg, "critical", strerror(err));
   4100 
   4101 	if (err = ct_pr_tmpl_set_fatal(fd, fatal))
   4102 		console(B_TRUE, ioctl_tset_emsg, "fatal", strerror(err));
   4103 
   4104 	if (err = ct_tmpl_set_cookie(fd, cookie))
   4105 		console(B_TRUE, ioctl_tset_emsg, "cookie", strerror(err));
   4106 
   4107 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
   4108 
   4109 	return (fd);
   4110 }
   4111 
   4112 /*
   4113  * Create the templates and open an event file descriptor.  We use dup2(2) to
   4114  * get these descriptors away from the stdin/stdout/stderr group.
   4115  */
   4116 static void
   4117 contracts_init()
   4118 {
   4119 	int err, fd;
   4120 
   4121 	/*
   4122 	 * Create & configure a legacy template.  We only want empty events so
   4123 	 * we know when to abandon them.
   4124 	 */
   4125 	legacy_tmpl = contract_make_template(0, CT_PR_EV_EMPTY, CT_PR_EV_HWERR,
   4126 	    ORDINARY_COOKIE);
   4127 	if (legacy_tmpl >= 0) {
   4128 		err = ct_tmpl_activate(legacy_tmpl);
   4129 		if (err != 0) {
   4130 			(void) close(legacy_tmpl);
   4131 			legacy_tmpl = -1;
   4132 			console(B_TRUE,
   4133 			    "Couldn't activate legacy template (%s); "
   4134 			    "legacy services will be in init's contract.\n",
   4135 			    strerror(err));
   4136 		}
   4137 	} else
   4138 		console(B_TRUE,
   4139 		    "Legacy services will be in init's contract.\n");
   4140 
   4141 	if (dup2(legacy_tmpl, 255) == -1) {
   4142 		console(B_TRUE, "Could not duplicate legacy template: %s.\n",
   4143 		    strerror(errno));
   4144 	} else {
   4145 		(void) close(legacy_tmpl);
   4146 		legacy_tmpl = 255;
   4147 	}
   4148 
   4149 	(void) fcntl(legacy_tmpl, F_SETFD, FD_CLOEXEC);
   4150 
   4151 	startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
   4152 	    CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE, STARTD_COOKIE);
   4153 
   4154 	if (dup2(startd_tmpl, 254) == -1) {
   4155 		console(B_TRUE, "Could not duplicate startd template: %s.\n",
   4156 		    strerror(errno));
   4157 	} else {
   4158 		(void) close(startd_tmpl);
   4159 		startd_tmpl = 254;
   4160 	}
   4161 
   4162 	(void) fcntl(startd_tmpl, F_SETFD, FD_CLOEXEC);
   4163 
   4164 	if (legacy_tmpl < 0 && startd_tmpl < 0) {
   4165 		/* The creation errors have already been reported. */
   4166 		console(B_TRUE,
   4167 		    "Ignoring contract events.  Core smf(5) services will not "
   4168 		    "be restarted.\n");
   4169 		return;
   4170 	}
   4171 
   4172 	/*
   4173 	 * Open an event endpoint.
   4174 	 */
   4175 	do
   4176 		fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
   4177 	while (fd < 0 && errno == EINTR);
   4178 	if (fd < 0) {
   4179 		console(B_TRUE,
   4180 		    "Couldn't open process pbundle: %s.  Core smf(5) services "
   4181 		    "will not be restarted.\n", strerror(errno));
   4182 		return;
   4183 	}
   4184 
   4185 	if (dup2(fd, 253) == -1) {
   4186 		console(B_TRUE, "Could not duplicate process bundle: %s.\n",
   4187 		    strerror(errno));
   4188 	} else {
   4189 		(void) close(fd);
   4190 		fd = 253;
   4191 	}
   4192 
   4193 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
   4194 
   4195 	/* Reset in case we've been restarted. */
   4196 	(void) ct_event_reset(fd);
   4197 
   4198 	poll_fds[0].fd = fd;
   4199 	poll_fds[0].events = POLLIN;
   4200 	poll_nfds = 1;
   4201 }
   4202 
   4203 static int
   4204 contract_getfile(ctid_t id, const char *name, int oflag)
   4205 {
   4206 	int fd;
   4207 
   4208 	do
   4209 		fd = contract_open(id, "process", name, oflag);
   4210 	while (fd < 0 && errno == EINTR);
   4211 
   4212 	if (fd < 0)
   4213 		console(B_TRUE, "Couldn't open %s for contract %ld: %s.\n",
   4214 		    name, id, strerror(errno));
   4215 
   4216 	return (fd);
   4217 }
   4218 
   4219 static int
   4220 contract_cookie(ctid_t id, uint64_t *cp)
   4221 {
   4222 	int fd, err;
   4223 	ct_stathdl_t sh;
   4224 
   4225 	fd = contract_getfile(id, "status", O_RDONLY);
   4226 	if (fd < 0)
   4227 		return (-1);
   4228 
   4229 	err = ct_status_read(fd, CTD_COMMON, &sh);
   4230 	if (err != 0) {
   4231 		console(B_TRUE, "Couldn't read status of contract %ld: %s.\n",
   4232 		    id, strerror(err));
   4233 		(void) close(fd);
   4234 		return (-1);
   4235 	}
   4236 
   4237 	(void) close(fd);
   4238 
   4239 	*cp = ct_status_get_cookie(sh);
   4240 
   4241 	ct_status_free(sh);
   4242 	return (0);
   4243 }
   4244 
   4245 static void
   4246 contract_ack(ct_evthdl_t e)
   4247 {
   4248 	int fd;
   4249 
   4250 	if (ct_event_get_flags(e) & CTE_INFO)
   4251 		return;
   4252 
   4253 	fd = contract_getfile(ct_event_get_ctid(e), "ctl", O_WRONLY);
   4254 	if (fd < 0)
   4255 		return;
   4256 
   4257 	(void) ct_ctl_ack(fd, ct_event_get_evid(e));
   4258 	(void) close(fd);
   4259 }
   4260 
   4261 /*
   4262  * Process a contract event.
   4263  */
   4264 static void
   4265 contract_event(struct pollfd *poll)
   4266 {
   4267 	ct_evthdl_t e;
   4268 	int err;
   4269 	ctid_t ctid;
   4270 
   4271 	if (!(poll->revents & POLLIN)) {
   4272 		if (poll->revents & POLLERR)
   4273 			console(B_TRUE,
   4274 			    "Unknown poll error on my process contract "
   4275 			    "pbundle.\n");
   4276 		return;
   4277 	}
   4278 
   4279 	err = ct_event_read(poll->fd, &e);
   4280 	if (err != 0) {
   4281 		console(B_TRUE, "Error retrieving contract event: %s.\n",
   4282 		    strerror(err));
   4283 		return;
   4284 	}
   4285 
   4286 	ctid = ct_event_get_ctid(e);
   4287 
   4288 	if (ct_event_get_type(e) == CT_PR_EV_EMPTY) {
   4289 		uint64_t cookie;
   4290 		int ret, abandon = 1;
   4291 
   4292 		/* If it's svc.startd, restart it.  Else, abandon. */
   4293 		ret = contract_cookie(ctid, &cookie);
   4294 
   4295 		if (ret == 0) {
   4296 			if (cookie == STARTD_COOKIE &&
   4297 			    do_restart_startd) {
   4298 				if (smf_debug)
   4299 					console(B_TRUE, "Restarting "
   4300 					    "svc.startd.\n");
   4301 
   4302 				/*
   4303 				 * Account for the failure.  If the failure rate
   4304 				 * exceeds a threshold, then drop to maintenance
   4305 				 * mode.
   4306 				 */
   4307 				startd_record_failure();
   4308 				if (startd_failure_rate_critical())
   4309 					enter_maintenance();
   4310 
   4311 				if (startd_tmpl < 0)
   4312 					console(B_TRUE,
   4313 					    "Restarting svc.startd in "
   4314 					    "improper contract (bad "
   4315 					    "template).\n");
   4316 
   4317 				(void) startd_run(startd_cline, startd_tmpl,
   4318 				    ctid);
   4319 
   4320 				abandon = 0;
   4321 			}
   4322 		}
   4323 
   4324 		if (abandon && (err = contract_abandon_id(ctid))) {
   4325 			console(B_TRUE, "Couldn't abandon contract %ld: %s.\n",
   4326 			    ctid, strerror(err));
   4327 		}
   4328 
   4329 		/*
   4330 		 * No need to acknowledge the event since either way the
   4331 		 * originating contract should be abandoned.
   4332 		 */
   4333 	} else {
   4334 		console(B_TRUE,
   4335 		    "Received contract event of unexpected type %d from "
   4336 		    "contract %ld.\n", ct_event_get_type(e), ctid);
   4337 
   4338 		if ((ct_event_get_flags(e) & (CTE_INFO | CTE_ACK)) == 0)
   4339 			/* Allow unexpected critical events to be released. */
   4340 			contract_ack(e);
   4341 	}
   4342 
   4343 	ct_event_free(e);
   4344 }
   4345 
   4346 /*
   4347  * svc.startd(1M) Management
   4348  */
   4349 
   4350 /*
   4351  * (Re)start svc.startd(1M).  old_ctid should be the contract ID of the old
   4352  * contract, or 0 if we're starting it for the first time.  If wait is true
   4353  * we'll wait for and return the exit value of the child.
   4354  */
   4355 static int
   4356 startd_run(const char *cline, int tmpl, ctid_t old_ctid)
   4357 {
   4358 	int err, i, ret, did_activate;
   4359 	pid_t pid;
   4360 	struct stat sb;
   4361 
   4362 	if (cline[0] == '\0')
   4363 		return (-1);
   4364 
   4365 	/*
   4366 	 * Don't restart startd if the system is rebooting or shutting down.
   4367 	 */
   4368 	do {
   4369 		ret = stat("/etc/svc/volatile/resetting", &sb);
   4370 	} while (ret == -1 && errno == EINTR);
   4371 
   4372 	if (ret == 0) {
   4373 		if (smf_debug)
   4374 			console(B_TRUE, "Quiescing for reboot.\n");
   4375 		(void) pause();
   4376 		return (-1);
   4377 	}
   4378 
   4379 	err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
   4380 	if (err == EINVAL) {
   4381 		console(B_TRUE, "Remake startd_tmpl; reattempt transfer.\n");
   4382 		tmpl = startd_tmpl = contract_make_template(0, CT_PR_EV_EMPTY,
   4383 		    CT_PR_EV_HWERR, STARTD_COOKIE);
   4384 
   4385 		err = ct_pr_tmpl_set_transfer(tmpl, old_ctid);
   4386 	}
   4387 	if (err != 0) {
   4388 		console(B_TRUE,
   4389 		    "Couldn't set transfer parameter of contract template: "
   4390 		    "%s.\n", strerror(err));
   4391 	}
   4392 
   4393 	if ((err = ct_pr_tmpl_set_svc_fmri(startd_tmpl,
   4394 	    SCF_SERVICE_STARTD)) != 0)
   4395 		console(B_TRUE,
   4396 		    "Can not set svc_fmri in contract template: %s\n",
   4397 		    strerror(err));
   4398 	if ((err = ct_pr_tmpl_set_svc_aux(startd_tmpl,
   4399 	    startd_svc_aux)) != 0)
   4400 		console(B_TRUE,
   4401 		    "Can not set svc_aux in contract template: %s\n",
   4402 		    strerror(err));
   4403 	did_activate = !(ct_tmpl_activate(tmpl));
   4404 	if (!did_activate)
   4405 		console(B_TRUE,
   4406 		    "Template activation failed; not starting \"%s\" in "
   4407 		    "proper contract.\n", cline);
   4408 
   4409 	/* Hold SIGCLD so we can wait if necessary. */
   4410 	(void) sighold(SIGCLD);
   4411 
   4412 	while ((pid = fork()) < 0) {
   4413 		if (errno == EPERM) {
   4414 			console(B_TRUE, "Insufficient permission to fork.\n");
   4415 
   4416 			/* Now that's a doozy. */
   4417 			exit(1);
   4418 		}
   4419 
   4420 		console(B_TRUE,
   4421 		    "fork() for svc.startd failed: %s.  Will retry in 1 "
   4422 		    "second...\n", strerror(errno));
   4423 
   4424 		(void) sleep(1);
   4425 
   4426 		/* Eventually give up? */
   4427 	}
   4428 
   4429 	if (pid == 0) {
   4430 		/* child */
   4431 
   4432 		/* See the comment in efork() */
   4433 		for (i = SIGHUP; i <= SIGRTMAX; ++i) {
   4434 			if (i == SIGTTOU || i == SIGTTIN || i == SIGTSTP)
   4435 				(void) sigset(i, SIG_IGN);
   4436 			else
   4437 				(void) sigset(i, SIG_DFL);
   4438 		}
   4439 
   4440 		if (smf_options != NULL) {
   4441 			/* Put smf_options in the environment. */
   4442 			glob_envp[glob_envn] =
   4443 			    malloc(sizeof ("SMF_OPTIONS=") - 1 +
   4444 			    strlen(smf_options) + 1);
   4445 
   4446 			if (glob_envp[glob_envn] != NULL) {
   4447 				/* LINTED */
   4448 				(void) sprintf(glob_envp[glob_envn],
   4449 				    "SMF_OPTIONS=%s", smf_options);
   4450 				glob_envp[glob_envn+1] = NULL;
   4451 			} else {
   4452 				console(B_TRUE,
   4453 				    "Could not set SMF_OPTIONS (%s).\n",
   4454 				    strerror(errno));
   4455 			}
   4456 		}
   4457 
   4458 		if (smf_debug)
   4459 			console(B_TRUE, "Executing svc.startd\n");
   4460 
   4461 		(void) execle(SH, "INITSH", "-c", cline, NULL, glob_envp);
   4462 
   4463 		console(B_TRUE, "Could not exec \"%s\" (%s).\n", SH,
   4464 		    strerror(errno));
   4465 
   4466 		exit(1);
   4467 	}
   4468 
   4469 	/* parent */
   4470 
   4471 	if (did_activate) {
   4472 		if (legacy_tmpl < 0 || ct_tmpl_activate(legacy_tmpl) != 0)
   4473 			(void) ct_tmpl_clear(tmpl);
   4474 	}
   4475 
   4476 	/* Clear the old_ctid reference so the kernel can reclaim it. */
   4477 	if (old_ctid != 0)
   4478 		(void) ct_pr_tmpl_set_transfer(tmpl, 0);
   4479 
   4480 	(void) sigrelse(SIGCLD);
   4481 
   4482 	return (0);
   4483 }
   4484 
   4485 /*
   4486  * void startd_record_failure(void)
   4487  *   Place the current time in our circular array of svc.startd failures.
   4488  */
   4489 void
   4490 startd_record_failure()
   4491 {
   4492 	int index = startd_failure_index++ % NSTARTD_FAILURE_TIMES;
   4493 
   4494 	startd_failure_time[index] = gethrtime();
   4495 }
   4496 
   4497 /*
   4498  * int startd_failure_rate_critical(void)
   4499  *   Return true if the average failure interval is less than the permitted
   4500  *   interval.  Implicit success if insufficient measurements for an average
   4501  *   exist.
   4502  */
   4503 int
   4504 startd_failure_rate_critical()
   4505 {
   4506 	int n = startd_failure_index;
   4507 	hrtime_t avg_ns = 0;
   4508 
   4509 	if (startd_failure_index < NSTARTD_FAILURE_TIMES)
   4510 		return (0);
   4511 
   4512 	avg_ns =
   4513 	    (startd_failure_time[(n - 1) % NSTARTD_FAILURE_TIMES] -
   4514 	    startd_failure_time[n % NSTARTD_FAILURE_TIMES]) /
   4515 	    NSTARTD_FAILURE_TIMES;
   4516 
   4517 	return (avg_ns < STARTD_FAILURE_RATE_NS);
   4518 }
   4519 
   4520 /*
   4521  * returns string that must be free'd
   4522  */
   4523 
   4524 static char
   4525 *audit_boot_msg()
   4526 {
   4527 	char		*b, *p;
   4528 	char		desc[] = "booted";
   4529 	zoneid_t	zid = getzoneid();
   4530 
   4531 	b = malloc(sizeof (desc) + MAXNAMELEN + 3);
   4532 	if (b == NULL)
   4533 		return (b);
   4534 
   4535 	p = b;
   4536 	p += strlcpy(p, desc, sizeof (desc));
   4537 	if (zid != GLOBAL_ZONEID) {
   4538 		p += strlcpy(p, ": ", 3);
   4539 		(void) getzonenamebyid(zid, p, MAXNAMELEN);
   4540 	}
   4541 	return (b);
   4542 }
   4543 
   4544 /*
   4545  * Generate AUE_init_solaris audit record.  Return 1 if
   4546  * auditing is enabled in case the caller cares.
   4547  *
   4548  * In the case of userint() or a local zone invocation of
   4549  * one_true_init, the process initially contains the audit
   4550  * characteristics of the process that invoked init.  The first pass
   4551  * through here uses those characteristics then for the case of
   4552  * one_true_init in a local zone, clears them so subsequent system
   4553  * state changes won't be attributed to the person who booted the
   4554  * zone.
   4555  */
   4556 static int
   4557 audit_put_record(int pass_fail, int status, char *msg)
   4558 {
   4559 	adt_session_data_t	*ah;
   4560 	adt_event_data_t	*event;
   4561 
   4562 	if (!adt_audit_enabled())
   4563 		return (0);
   4564 
   4565 	/*
   4566 	 * the PROC_DATA picks up the context to tell whether this is
   4567 	 * an attributed record (auid = -2 is unattributed)
   4568 	 */
   4569 	if (adt_start_session(&ah, NULL, ADT_USE_PROC_DATA)) {
   4570 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
   4571 		return (1);
   4572 	}
   4573 	event = adt_alloc_event(ah, ADT_init_solaris);
   4574 	if (event == NULL) {
   4575 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
   4576 		(void) adt_end_session(ah);
   4577 		return (1);
   4578 	}
   4579 	event->adt_init_solaris.info = msg;	/* NULL is ok here */
   4580 
   4581 	if (adt_put_event(event, pass_fail, status)) {
   4582 		console(B_TRUE, "audit failure:  %s\n", strerror(errno));
   4583 		(void) adt_end_session(ah);
   4584 		return (1);
   4585 	}
   4586 	adt_free_event(event);
   4587 
   4588 	(void) adt_end_session(ah);
   4589 
   4590 	return (1);
   4591 }
   4592