Home | History | Annotate | Download | only in os
      1     0    stevel /*
      2     0    stevel  * CDDL HEADER START
      3     0    stevel  *
      4     0    stevel  * The contents of this file are subject to the terms of the
      5  1043    casper  * Common Development and Distribution License (the "License").
      6  1043    casper  * You may not use this file except in compliance with the License.
      7     0    stevel  *
      8     0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0    stevel  * or http://www.opensolaris.org/os/licensing.
     10     0    stevel  * See the License for the specific language governing permissions
     11     0    stevel  * and limitations under the License.
     12     0    stevel  *
     13     0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0    stevel  *
     19     0    stevel  * CDDL HEADER END
     20     0    stevel  */
     21  6247       raf 
     22     0    stevel /*
     23  9068     jason  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24     0    stevel  * Use is subject to license terms.
     25     0    stevel  */
     26     0    stevel 
     27     0    stevel /*	Copyright (c) 1988 AT&T	*/
     28     0    stevel /*	  All Rights Reserved  	*/
     29     0    stevel 
     30     0    stevel #include <sys/types.h>
     31     0    stevel #include <sys/param.h>
     32     0    stevel #include <sys/sysmacros.h>
     33     0    stevel #include <sys/systm.h>
     34     0    stevel #include <sys/signal.h>
     35     0    stevel #include <sys/cred_impl.h>
     36     0    stevel #include <sys/policy.h>
     37     0    stevel #include <sys/user.h>
     38     0    stevel #include <sys/errno.h>
     39     0    stevel #include <sys/file.h>
     40     0    stevel #include <sys/vfs.h>
     41     0    stevel #include <sys/vnode.h>
     42     0    stevel #include <sys/mman.h>
     43     0    stevel #include <sys/acct.h>
     44     0    stevel #include <sys/cpuvar.h>
     45     0    stevel #include <sys/proc.h>
     46     0    stevel #include <sys/cmn_err.h>
     47     0    stevel #include <sys/debug.h>
     48     0    stevel #include <sys/pathname.h>
     49     0    stevel #include <sys/vm.h>
     50  4426  aguzovsk #include <sys/lgrp.h>
     51     0    stevel #include <sys/vtrace.h>
     52     0    stevel #include <sys/exec.h>
     53     0    stevel #include <sys/exechdr.h>
     54     0    stevel #include <sys/kmem.h>
     55     0    stevel #include <sys/prsystm.h>
     56     0    stevel #include <sys/modctl.h>
     57     0    stevel #include <sys/vmparam.h>
     58  6247       raf #include <sys/door.h>
     59     0    stevel #include <sys/schedctl.h>
     60     0    stevel #include <sys/utrap.h>
     61     0    stevel #include <sys/systeminfo.h>
     62     0    stevel #include <sys/stack.h>
     63     0    stevel #include <sys/rctl.h>
     64     0    stevel #include <sys/dtrace.h>
     65     0    stevel #include <sys/lwpchan_impl.h>
     66     0    stevel #include <sys/pool.h>
     67     0    stevel #include <sys/sdt.h>
     68  2712   nn35248 #include <sys/brand.h>
     69     0    stevel 
     70     0    stevel #include <c2/audit.h>
     71     0    stevel 
     72     0    stevel #include <vm/hat.h>
     73     0    stevel #include <vm/anon.h>
     74     0    stevel #include <vm/as.h>
     75     0    stevel #include <vm/seg.h>
     76     0    stevel #include <vm/seg_vn.h>
     77     0    stevel 
     78     0    stevel #define	PRIV_RESET		0x01	/* needs to reset privs */
     79     0    stevel #define	PRIV_SETID		0x02	/* needs to change uids */
     80     0    stevel #define	PRIV_SETUGID		0x04	/* is setuid/setgid/forced privs */
     81     0    stevel #define	PRIV_INCREASE		0x08	/* child runs with more privs */
     82  1676       jpk #define	MAC_FLAGS		0x10	/* need to adjust MAC flags */
     83     0    stevel 
     84     0    stevel static int execsetid(struct vnode *, struct vattr *, uid_t *, uid_t *);
     85     0    stevel static int hold_execsw(struct execsw *);
     86     0    stevel 
     87     0    stevel uint_t auxv_hwcap = 0;	/* auxv AT_SUN_HWCAP value; determined on the fly */
     88     0    stevel #if defined(_SYSCALL32_IMPL)
     89     0    stevel uint_t auxv_hwcap32 = 0;	/* 32-bit version of auxv_hwcap */
     90     0    stevel #endif
     91     0    stevel 
     92     0    stevel #define	PSUIDFLAGS		(SNOCD|SUGID)
     93     0    stevel 
     94     0    stevel /*
     95     0    stevel  * exec() - wrapper around exece providing NULL environment pointer
     96     0    stevel  */
     97     0    stevel int
     98     0    stevel exec(const char *fname, const char **argp)
     99     0    stevel {
    100     0    stevel 	return (exece(fname, argp, NULL));
    101     0    stevel }
    102     0    stevel 
    103     0    stevel /*
    104     0    stevel  * exece() - system call wrapper around exec_common()
    105     0    stevel  */
    106     0    stevel int
    107     0    stevel exece(const char *fname, const char **argp, const char **envp)
    108     0    stevel {
    109     0    stevel 	int error;
    110     0    stevel 
    111  2712   nn35248 	error = exec_common(fname, argp, envp, EBA_NONE);
    112     0    stevel 	return (error ? (set_errno(error)) : 0);
    113     0    stevel }
    114     0    stevel 
    115     0    stevel int
    116  2712   nn35248 exec_common(const char *fname, const char **argp, const char **envp,
    117  2712   nn35248     int brand_action)
    118     0    stevel {
    119     0    stevel 	vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL;
    120     0    stevel 	proc_t *p = ttoproc(curthread);
    121     0    stevel 	klwp_t *lwp = ttolwp(curthread);
    122     0    stevel 	struct user *up = PTOU(p);
    123     0    stevel 	long execsz;		/* temporary count of exec size */
    124     0    stevel 	int i;
    125     0    stevel 	int error;
    126     0    stevel 	char exec_file[MAXCOMLEN+1];
    127     0    stevel 	struct pathname pn;
    128     0    stevel 	struct pathname resolvepn;
    129     0    stevel 	struct uarg args;
    130     0    stevel 	struct execa ua;
    131     0    stevel 	k_sigset_t savedmask;
    132     0    stevel 	lwpdir_t *lwpdir = NULL;
    133  9393     Roger 	tidhash_t *tidhash;
    134     0    stevel 	lwpdir_t *old_lwpdir = NULL;
    135     0    stevel 	uint_t old_lwpdir_sz;
    136  9393     Roger 	tidhash_t *old_tidhash;
    137     0    stevel 	uint_t old_tidhash_sz;
    138  9393     Roger 	ret_tidhash_t *ret_tidhash;
    139     0    stevel 	lwpent_t *lep;
    140  6994       edp 	boolean_t brandme = B_FALSE;
    141     0    stevel 
    142     0    stevel 	/*
    143     0    stevel 	 * exec() is not supported for the /proc agent lwp.
    144     0    stevel 	 */
    145     0    stevel 	if (curthread == p->p_agenttp)
    146     0    stevel 		return (ENOTSUP);
    147     0    stevel 
    148  2712   nn35248 	if (brand_action != EBA_NONE) {
    149  2712   nn35248 		/*
    150  2712   nn35248 		 * Brand actions are not supported for processes that are not
    151  2712   nn35248 		 * running in a branded zone.
    152  2712   nn35248 		 */
    153  2712   nn35248 		if (!ZONE_IS_BRANDED(p->p_zone))
    154  2712   nn35248 			return (ENOTSUP);
    155  2712   nn35248 
    156  2712   nn35248 		if (brand_action == EBA_NATIVE) {
    157  2712   nn35248 			/* Only branded processes can be unbranded */
    158  2712   nn35248 			if (!PROC_IS_BRANDED(p))
    159  2712   nn35248 				return (ENOTSUP);
    160  2712   nn35248 		} else {
    161  2712   nn35248 			/* Only unbranded processes can be branded */
    162  2712   nn35248 			if (PROC_IS_BRANDED(p))
    163  2712   nn35248 				return (ENOTSUP);
    164  6994       edp 			brandme = B_TRUE;
    165  2712   nn35248 		}
    166  2712   nn35248 	} else {
    167  2712   nn35248 		/*
    168  2712   nn35248 		 * If this is a native zone, or if the process is already
    169  2712   nn35248 		 * branded, then we don't need to do anything.  If this is
    170  2712   nn35248 		 * a native process in a branded zone, we need to brand the
    171  2712   nn35248 		 * process as it exec()s the new binary.
    172  2712   nn35248 		 */
    173  2712   nn35248 		if (ZONE_IS_BRANDED(p->p_zone) && !PROC_IS_BRANDED(p))
    174  6994       edp 			brandme = B_TRUE;
    175  2712   nn35248 	}
    176     0    stevel 
    177     0    stevel 	/*
    178     0    stevel 	 * Inform /proc that an exec() has started.
    179     0    stevel 	 * Hold signals that are ignored by default so that we will
    180     0    stevel 	 * not be interrupted by a signal that will be ignored after
    181     0    stevel 	 * successful completion of gexec().
    182     0    stevel 	 */
    183     0    stevel 	mutex_enter(&p->p_lock);
    184     0    stevel 	prexecstart();
    185     0    stevel 	schedctl_finish_sigblock(curthread);
    186     0    stevel 	savedmask = curthread->t_hold;
    187     0    stevel 	sigorset(&curthread->t_hold, &ignoredefault);
    188     0    stevel 	mutex_exit(&p->p_lock);
    189     0    stevel 
    190     0    stevel 	/*
    191     0    stevel 	 * Look up path name and remember last component for later.
    192     0    stevel 	 * To help coreadm expand its %d token, we attempt to save
    193     0    stevel 	 * the directory containing the executable in p_execdir. The
    194     0    stevel 	 * first call to lookuppn() may fail and return EINVAL because
    195     0    stevel 	 * dirvpp is non-NULL. In that case, we make a second call to
    196     0    stevel 	 * lookuppn() with dirvpp set to NULL; p_execdir will be NULL,
    197     0    stevel 	 * but coreadm is allowed to expand %d to the empty string and
    198     0    stevel 	 * there are other cases in which that failure may occur.
    199     0    stevel 	 */
    200     0    stevel 	if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0)
    201     0    stevel 		goto out;
    202     0    stevel 	pn_alloc(&resolvepn);
    203     0    stevel 	if ((error = lookuppn(&pn, &resolvepn, FOLLOW, &dir, &vp)) != 0) {
    204     0    stevel 		pn_free(&resolvepn);
    205     0    stevel 		pn_free(&pn);
    206     0    stevel 		if (error != EINVAL)
    207     0    stevel 			goto out;
    208     0    stevel 
    209     0    stevel 		dir = NULL;
    210     0    stevel 		if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0)
    211     0    stevel 			goto out;
    212     0    stevel 		pn_alloc(&resolvepn);
    213     0    stevel 		if ((error = lookuppn(&pn, &resolvepn, FOLLOW, NULLVPP,
    214     0    stevel 		    &vp)) != 0) {
    215     0    stevel 			pn_free(&resolvepn);
    216     0    stevel 			pn_free(&pn);
    217     0    stevel 			goto out;
    218     0    stevel 		}
    219     0    stevel 	}
    220     0    stevel 	if (vp == NULL) {
    221     0    stevel 		if (dir != NULL)
    222     0    stevel 			VN_RELE(dir);
    223     0    stevel 		error = ENOENT;
    224     0    stevel 		pn_free(&resolvepn);
    225     0    stevel 		pn_free(&pn);
    226  6134    casper 		goto out;
    227  6134    casper 	}
    228  6134    casper 
    229  6134    casper 	if ((error = secpolicy_basic_exec(CRED(), vp)) != 0) {
    230  6134    casper 		if (dir != NULL)
    231  6134    casper 			VN_RELE(dir);
    232  6134    casper 		pn_free(&resolvepn);
    233  6134    casper 		pn_free(&pn);
    234  6134    casper 		VN_RELE(vp);
    235     0    stevel 		goto out;
    236     0    stevel 	}
    237  1043    casper 
    238  1043    casper 	/*
    239  1043    casper 	 * We do not allow executing files in attribute directories.
    240  1043    casper 	 * We test this by determining whether the resolved path
    241  1043    casper 	 * contains a "/" when we're in an attribute directory;
    242  1043    casper 	 * only if the pathname does not contain a "/" the resolved path
    243  1043    casper 	 * points to a file in the current working (attribute) directory.
    244  1043    casper 	 */
    245  1043    casper 	if ((p->p_user.u_cdir->v_flag & V_XATTRDIR) != 0 &&
    246  1043    casper 	    strchr(resolvepn.pn_path, '/') == NULL) {
    247  1043    casper 		if (dir != NULL)
    248  1043    casper 			VN_RELE(dir);
    249  1043    casper 		error = EACCES;
    250  1043    casper 		pn_free(&resolvepn);
    251  1043    casper 		pn_free(&pn);
    252  1043    casper 		VN_RELE(vp);
    253  1043    casper 		goto out;
    254  1043    casper 	}
    255  1043    casper 
    256     0    stevel 	bzero(exec_file, MAXCOMLEN+1);
    257     0    stevel 	(void) strncpy(exec_file, pn.pn_path, MAXCOMLEN);
    258     0    stevel 	bzero(&args, sizeof (args));
    259     0    stevel 	args.pathname = resolvepn.pn_path;
    260     0    stevel 	/* don't free resolvepn until we are done with args */
    261     0    stevel 	pn_free(&pn);
    262     0    stevel 
    263     0    stevel 	/*
    264     0    stevel 	 * Specific exec handlers, or policies determined via
    265     0    stevel 	 * /etc/system may override the historical default.
    266     0    stevel 	 */
    267     0    stevel 	args.stk_prot = PROT_ZFOD;
    268     0    stevel 	args.dat_prot = PROT_ZFOD;
    269     0    stevel 
    270     0    stevel 	CPU_STATS_ADD_K(sys, sysexec, 1);
    271     0    stevel 	DTRACE_PROC1(exec, char *, args.pathname);
    272     0    stevel 
    273     0    stevel 	ua.fname = fname;
    274     0    stevel 	ua.argp = argp;
    275     0    stevel 	ua.envp = envp;
    276     0    stevel 
    277  2712   nn35248 	/* If necessary, brand this process before we start the exec. */
    278  6994       edp 	if (brandme)
    279  2712   nn35248 		brand_setbrand(p);
    280  2712   nn35248 
    281     0    stevel 	if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
    282  2712   nn35248 	    exec_file, p->p_cred, brand_action)) != 0) {
    283  6994       edp 		if (brandme)
    284  6994       edp 			brand_clearbrand(p);
    285     0    stevel 		VN_RELE(vp);
    286     0    stevel 		if (dir != NULL)
    287     0    stevel 			VN_RELE(dir);
    288     0    stevel 		pn_free(&resolvepn);
    289     0    stevel 		goto fail;
    290     0    stevel 	}
    291     0    stevel 
    292     0    stevel 	/*
    293     0    stevel 	 * Free floating point registers (sun4u only)
    294     0    stevel 	 */
    295     0    stevel 	ASSERT(lwp != NULL);
    296     0    stevel 	lwp_freeregs(lwp, 1);
    297     0    stevel 
    298     0    stevel 	/*
    299  1217       rab 	 * Free thread and process context ops.
    300     0    stevel 	 */
    301     0    stevel 	if (curthread->t_ctx)
    302     0    stevel 		freectx(curthread, 1);
    303  1217       rab 	if (p->p_pctx)
    304  1217       rab 		freepctx(p, 1);
    305     0    stevel 
    306     0    stevel 	/*
    307     0    stevel 	 * Remember file name for accounting; clear any cached DTrace predicate.
    308     0    stevel 	 */
    309     0    stevel 	up->u_acflag &= ~AFORK;
    310     0    stevel 	bcopy(exec_file, up->u_comm, MAXCOMLEN+1);
    311     0    stevel 	curthread->t_predcache = NULL;
    312     0    stevel 
    313     0    stevel 	/*
    314     0    stevel 	 * Clear contract template state
    315     0    stevel 	 */
    316     0    stevel 	lwp_ctmpl_clear(lwp);
    317     0    stevel 
    318     0    stevel 	/*
    319     0    stevel 	 * Save the directory in which we found the executable for expanding
    320     0    stevel 	 * the %d token used in core file patterns.
    321     0    stevel 	 */
    322     0    stevel 	mutex_enter(&p->p_lock);
    323     0    stevel 	tmpvp = p->p_execdir;
    324     0    stevel 	p->p_execdir = dir;
    325     0    stevel 	if (p->p_execdir != NULL)
    326     0    stevel 		VN_HOLD(p->p_execdir);
    327     0    stevel 	mutex_exit(&p->p_lock);
    328     0    stevel 
    329     0    stevel 	if (tmpvp != NULL)
    330     0    stevel 		VN_RELE(tmpvp);
    331     0    stevel 
    332     0    stevel 	/*
    333     0    stevel 	 * Reset stack state to the user stack, clear set of signals
    334     0    stevel 	 * caught on the signal stack, and reset list of signals that
    335     0    stevel 	 * restart system calls; the new program's environment should
    336     0    stevel 	 * not be affected by detritus from the old program.  Any
    337     0    stevel 	 * pending held signals remain held, so don't clear t_hold.
    338     0    stevel 	 */
    339     0    stevel 	mutex_enter(&p->p_lock);
    340     0    stevel 	lwp->lwp_oldcontext = 0;
    341     0    stevel 	lwp->lwp_ustack = 0;
    342     0    stevel 	lwp->lwp_old_stk_ctl = 0;
    343     0    stevel 	sigemptyset(&up->u_signodefer);
    344     0    stevel 	sigemptyset(&up->u_sigonstack);
    345     0    stevel 	sigemptyset(&up->u_sigresethand);
    346     0    stevel 	lwp->lwp_sigaltstack.ss_sp = 0;
    347     0    stevel 	lwp->lwp_sigaltstack.ss_size = 0;
    348     0    stevel 	lwp->lwp_sigaltstack.ss_flags = SS_DISABLE;
    349     0    stevel 
    350     0    stevel 	/*
    351     0    stevel 	 * Make saved resource limit == current resource limit.
    352     0    stevel 	 */
    353     0    stevel 	for (i = 0; i < RLIM_NLIMITS; i++) {
    354     0    stevel 		/*CONSTCOND*/
    355     0    stevel 		if (RLIM_SAVED(i)) {
    356     0    stevel 			(void) rctl_rlimit_get(rctlproc_legacy[i], p,
    357     0    stevel 			    &up->u_saved_rlimit[i]);
    358     0    stevel 		}
    359     0    stevel 	}
    360     0    stevel 
    361     0    stevel 	/*
    362     0    stevel 	 * If the action was to catch the signal, then the action
    363     0    stevel 	 * must be reset to SIG_DFL.
    364     0    stevel 	 */
    365     0    stevel 	sigdefault(p);
    366     0    stevel 	p->p_flag &= ~(SNOWAIT|SJCTL);
    367     0    stevel 	p->p_flag |= (SEXECED|SMSACCT|SMSFORK);
    368     0    stevel 	up->u_signal[SIGCLD - 1] = SIG_DFL;
    369     0    stevel 
    370     0    stevel 	/*
    371     0    stevel 	 * Delete the dot4 sigqueues/signotifies.
    372     0    stevel 	 */
    373     0    stevel 	sigqfree(p);
    374     0    stevel 
    375     0    stevel 	mutex_exit(&p->p_lock);
    376     0    stevel 
    377     0    stevel 	mutex_enter(&p->p_pflock);
    378     0    stevel 	p->p_prof.pr_base = NULL;
    379     0    stevel 	p->p_prof.pr_size = 0;
    380     0    stevel 	p->p_prof.pr_off = 0;
    381     0    stevel 	p->p_prof.pr_scale = 0;
    382     0    stevel 	p->p_prof.pr_samples = 0;
    383     0    stevel 	mutex_exit(&p->p_pflock);
    384     0    stevel 
    385     0    stevel 	ASSERT(curthread->t_schedctl == NULL);
    386     0    stevel 
    387     0    stevel #if defined(__sparc)
    388     0    stevel 	if (p->p_utraps != NULL)
    389     0    stevel 		utrap_free(p);
    390     0    stevel #endif	/* __sparc */
    391     0    stevel 
    392     0    stevel 	/*
    393     0    stevel 	 * Close all close-on-exec files.
    394     0    stevel 	 */
    395     0    stevel 	close_exec(P_FINFO(p));
    396     0    stevel 	TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
    397  2712   nn35248 
    398  6994       edp 	/* Unbrand ourself if necessary. */
    399  6994       edp 	if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE))
    400  6994       edp 		brand_clearbrand(p);
    401  2712   nn35248 
    402     0    stevel 	setregs(&args);
    403     0    stevel 
    404     0    stevel 	/* Mark this as an executable vnode */
    405     0    stevel 	mutex_enter(&vp->v_lock);
    406     0    stevel 	vp->v_flag |= VVMEXEC;
    407     0    stevel 	mutex_exit(&vp->v_lock);
    408     0    stevel 
    409     0    stevel 	VN_RELE(vp);
    410     0    stevel 	if (dir != NULL)
    411     0    stevel 		VN_RELE(dir);
    412     0    stevel 	pn_free(&resolvepn);
    413     0    stevel 
    414     0    stevel 	/*
    415     0    stevel 	 * Allocate a new lwp directory and lwpid hash table if necessary.
    416     0    stevel 	 */
    417     0    stevel 	if (curthread->t_tid != 1 || p->p_lwpdir_sz != 2) {
    418     0    stevel 		lwpdir = kmem_zalloc(2 * sizeof (lwpdir_t), KM_SLEEP);
    419     0    stevel 		lwpdir->ld_next = lwpdir + 1;
    420  9393     Roger 		tidhash = kmem_zalloc(2 * sizeof (tidhash_t), KM_SLEEP);
    421     0    stevel 		if (p->p_lwpdir != NULL)
    422     0    stevel 			lep = p->p_lwpdir[curthread->t_dslot].ld_entry;
    423     0    stevel 		else
    424     0    stevel 			lep = kmem_zalloc(sizeof (*lep), KM_SLEEP);
    425     0    stevel 	}
    426  2712   nn35248 
    427  2712   nn35248 	if (PROC_IS_BRANDED(p))
    428  2712   nn35248 		BROP(p)->b_exec();
    429     0    stevel 
    430     0    stevel 	mutex_enter(&p->p_lock);
    431     0    stevel 	prbarrier(p);
    432     0    stevel 
    433     0    stevel 	/*
    434     0    stevel 	 * Reset lwp id to the default value of 1.
    435     0    stevel 	 * This is a single-threaded process now
    436     0    stevel 	 * and lwp #1 is lwp_wait()able by default.
    437     0    stevel 	 * The t_unpark flag should not be inherited.
    438     0    stevel 	 */
    439     0    stevel 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
    440     0    stevel 	curthread->t_tid = 1;
    441  4426  aguzovsk 	kpreempt_disable();
    442  4426  aguzovsk 	ASSERT(curthread->t_lpl != NULL);
    443  4426  aguzovsk 	p->p_t1_lgrpid = curthread->t_lpl->lpl_lgrpid;
    444  4426  aguzovsk 	kpreempt_enable();
    445  4426  aguzovsk 	if (p->p_tr_lgrpid != LGRP_NONE && p->p_tr_lgrpid != p->p_t1_lgrpid) {
    446  4426  aguzovsk 		lgrp_update_trthr_migrations(1);
    447  4426  aguzovsk 	}
    448     0    stevel 	curthread->t_unpark = 0;
    449     0    stevel 	curthread->t_proc_flag |= TP_TWAIT;
    450     0    stevel 	curthread->t_proc_flag &= ~TP_DAEMON;	/* daemons shouldn't exec */
    451     0    stevel 	p->p_lwpdaemon = 0;			/* but oh well ... */
    452     0    stevel 	p->p_lwpid = 1;
    453     0    stevel 
    454     0    stevel 	/*
    455     0    stevel 	 * Install the newly-allocated lwp directory and lwpid hash table
    456     0    stevel 	 * and insert the current thread into the new hash table.
    457     0    stevel 	 */
    458     0    stevel 	if (lwpdir != NULL) {
    459     0    stevel 		old_lwpdir = p->p_lwpdir;
    460     0    stevel 		old_lwpdir_sz = p->p_lwpdir_sz;
    461     0    stevel 		old_tidhash = p->p_tidhash;
    462     0    stevel 		old_tidhash_sz = p->p_tidhash_sz;
    463     0    stevel 		p->p_lwpdir = p->p_lwpfree = lwpdir;
    464     0    stevel 		p->p_lwpdir_sz = 2;
    465     0    stevel 		lep->le_thread = curthread;
    466     0    stevel 		lep->le_lwpid = curthread->t_tid;
    467     0    stevel 		lep->le_start = curthread->t_start;
    468  9393     Roger 		lwp_hash_in(p, lep, tidhash, 2, 0);
    469  9393     Roger 		p->p_tidhash = tidhash;
    470  9393     Roger 		p->p_tidhash_sz = 2;
    471     0    stevel 	}
    472  9393     Roger 	ret_tidhash = p->p_ret_tidhash;
    473  9393     Roger 	p->p_ret_tidhash = NULL;
    474  2712   nn35248 
    475     0    stevel 	/*
    476     0    stevel 	 * Restore the saved signal mask and
    477     0    stevel 	 * inform /proc that the exec() has finished.
    478     0    stevel 	 */
    479     0    stevel 	curthread->t_hold = savedmask;
    480     0    stevel 	prexecend();
    481     0    stevel 	mutex_exit(&p->p_lock);
    482     0    stevel 	if (old_lwpdir) {
    483     0    stevel 		kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t));
    484  9393     Roger 		kmem_free(old_tidhash, old_tidhash_sz * sizeof (tidhash_t));
    485  9393     Roger 	}
    486  9393     Roger 	while (ret_tidhash != NULL) {
    487  9393     Roger 		ret_tidhash_t *next = ret_tidhash->rth_next;
    488  9393     Roger 		kmem_free(ret_tidhash->rth_tidhash,
    489  9393     Roger 		    ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
    490  9393     Roger 		kmem_free(ret_tidhash, sizeof (*ret_tidhash));
    491  9393     Roger 		ret_tidhash = next;
    492     0    stevel 	}
    493  2712   nn35248 
    494     0    stevel 	ASSERT(error == 0);
    495     0    stevel 	DTRACE_PROC(exec__success);
    496     0    stevel 	return (0);
    497     0    stevel 
    498     0    stevel fail:
    499     0    stevel 	DTRACE_PROC1(exec__failure, int, error);
    500     0    stevel out:		/* error return */
    501     0    stevel 	mutex_enter(&p->p_lock);
    502     0    stevel 	curthread->t_hold = savedmask;
    503     0    stevel 	prexecend();
    504     0    stevel 	mutex_exit(&p->p_lock);
    505     0    stevel 	ASSERT(error != 0);
    506     0    stevel 	return (error);
    507     0    stevel }
    508     0    stevel 
    509     0    stevel 
    510     0    stevel /*
    511     0    stevel  * Perform generic exec duties and switchout to object-file specific
    512     0    stevel  * handler.
    513     0    stevel  */
    514     0    stevel int
    515     0    stevel gexec(
    516     0    stevel 	struct vnode **vpp,
    517     0    stevel 	struct execa *uap,
    518     0    stevel 	struct uarg *args,
    519     0    stevel 	struct intpdata *idatap,
    520     0    stevel 	int level,
    521     0    stevel 	long *execsz,
    522     0    stevel 	caddr_t exec_file,
    523  2712   nn35248 	struct cred *cred,
    524  2712   nn35248 	int brand_action)
    525     0    stevel {
    526     0    stevel 	struct vnode *vp;
    527     0    stevel 	proc_t *pp = ttoproc(curthread);
    528     0    stevel 	struct execsw *eswp;
    529     0    stevel 	int error = 0;
    530     0    stevel 	int suidflags = 0;
    531     0    stevel 	ssize_t resid;
    532     0    stevel 	uid_t uid, gid;
    533     0    stevel 	struct vattr vattr;
    534     0    stevel 	char magbuf[MAGIC_BYTES];
    535     0    stevel 	int setid;
    536     0    stevel 	cred_t *oldcred, *newcred = NULL;
    537     0    stevel 	int privflags = 0;
    538  1335    casper 	int setidfl;
    539     0    stevel 
    540     0    stevel 	/*
    541     0    stevel 	 * If the SNOCD or SUGID flag is set, turn it off and remember the
    542     0    stevel 	 * previous setting so we can restore it if we encounter an error.
    543     0    stevel 	 */
    544     0    stevel 	if (level == 0 && (pp->p_flag & PSUIDFLAGS)) {
    545     0    stevel 		mutex_enter(&pp->p_lock);
    546     0    stevel 		suidflags = pp->p_flag & PSUIDFLAGS;
    547     0    stevel 		pp->p_flag &= ~PSUIDFLAGS;
    548     0    stevel 		mutex_exit(&pp->p_lock);
    549     0    stevel 	}
    550     0    stevel 
    551     0    stevel 	if ((error = execpermissions(*vpp, &vattr, args)) != 0)
    552     0    stevel 		goto bad;
    553     0    stevel 
    554     0    stevel 	/* need to open vnode for stateful file systems like rfs */
    555  5331       amw 	if ((error = VOP_OPEN(vpp, FREAD, CRED(), NULL)) != 0)
    556     0    stevel 		goto bad;
    557     0    stevel 	vp = *vpp;
    558     0    stevel 
    559     0    stevel 	/*
    560     0    stevel 	 * Note: to support binary compatibility with SunOS a.out
    561     0    stevel 	 * executables, we read in the first four bytes, as the
    562     0    stevel 	 * magic number is in bytes 2-3.
    563     0    stevel 	 */
    564     0    stevel 	if (error = vn_rdwr(UIO_READ, vp, magbuf, sizeof (magbuf),
    565     0    stevel 	    (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid))
    566     0    stevel 		goto bad;
    567     0    stevel 	if (resid != 0)
    568     0    stevel 		goto bad;
    569     0    stevel 
    570     0    stevel 	if ((eswp = findexec_by_hdr(magbuf)) == NULL)
    571     0    stevel 		goto bad;
    572     0    stevel 
    573     0    stevel 	if (level == 0 &&
    574     0    stevel 	    (privflags = execsetid(vp, &vattr, &uid, &gid)) != 0) {
    575     0    stevel 
    576     0    stevel 		newcred = cred = crdup(cred);
    577     0    stevel 
    578     0    stevel 		/* If we can, drop the PA bit */
    579     0    stevel 		if ((privflags & PRIV_RESET) != 0)
    580     0    stevel 			priv_adjust_PA(cred);
    581     0    stevel 
    582     0    stevel 		if (privflags & PRIV_SETID) {
    583     0    stevel 			cred->cr_uid = uid;
    584     0    stevel 			cred->cr_gid = gid;
    585     0    stevel 			cred->cr_suid = uid;
    586     0    stevel 			cred->cr_sgid = gid;
    587     0    stevel 		}
    588     0    stevel 
    589  1676       jpk 		if (privflags & MAC_FLAGS) {
    590  1676       jpk 			if (!(CR_FLAGS(cred) & NET_MAC_AWARE_INHERIT))
    591  1676       jpk 				CR_FLAGS(cred) &= ~NET_MAC_AWARE;
    592  1676       jpk 			CR_FLAGS(cred) &= ~NET_MAC_AWARE_INHERIT;
    593  1676       jpk 		}
    594  1676       jpk 
    595     0    stevel 		/*
    596     0    stevel 		 * Implement the privilege updates:
    597     0    stevel 		 *
    598     0    stevel 		 * Restrict with L:
    599     0    stevel 		 *
    600     0    stevel 		 *	I' = I & L
    601     0    stevel 		 *
    602     0    stevel 		 *	E' = P' = (I' + F) & A
    603     0    stevel 		 *
    604     0    stevel 		 * But if running under ptrace, we cap I with P.
    605     0    stevel 		 */
    606     0    stevel 		if ((privflags & PRIV_RESET) != 0) {
    607     0    stevel 			if ((privflags & PRIV_INCREASE) != 0 &&
    608     0    stevel 			    (pp->p_proc_flag & P_PR_PTRACE) != 0)
    609     0    stevel 				priv_intersect(&CR_OPPRIV(cred),
    610  5753       gww 				    &CR_IPRIV(cred));
    611     0    stevel 			priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
    612     0    stevel 			CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
    613     0    stevel 			priv_adjust_PA(cred);
    614     0    stevel 		}
    615     0    stevel 	}
    616     0    stevel 
    617     0    stevel 	/* SunOS 4.x buy-back */
    618     0    stevel 	if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) &&
    619     0    stevel 	    (vattr.va_mode & (VSUID|VSGID))) {
    620  9068     jason 		char path[MAXNAMELEN];
    621  9068     jason 		refstr_t *mntpt = NULL;
    622  9068     jason 		int ret = -1;
    623  9068     jason 
    624  9068     jason 		bzero(path, sizeof (path));
    625  9068     jason 		zone_hold(pp->p_zone);
    626  9068     jason 
    627  9068     jason 		ret = vnodetopath(pp->p_zone->zone_rootvp, vp, path,
    628  9068     jason 		    sizeof (path), cred);
    629  9068     jason 
    630  9068     jason 		/* fallback to mountpoint if a path can't be found */
    631  9068     jason 		if ((ret != 0) || (ret == 0 && path[0] == '\0'))
    632  9068     jason 			mntpt = vfs_getmntpoint(vp->v_vfsp);
    633  9068     jason 
    634  9068     jason 		if (mntpt == NULL)
    635  9068     jason 			zcmn_err(pp->p_zone->zone_id, CE_NOTE,
    636  9068     jason 			    "!uid %d: setuid execution not allowed, "
    637  9068     jason 			    "file=%s", cred->cr_uid, path);
    638  9068     jason 		else
    639  9068     jason 			zcmn_err(pp->p_zone->zone_id, CE_NOTE,
    640  9068     jason 			    "!uid %d: setuid execution not allowed, "
    641  9068     jason 			    "fs=%s, file=%s", cred->cr_uid,
    642  9068     jason 			    ZONE_PATH_TRANSLATE(refstr_value(mntpt),
    643  9068     jason 			    pp->p_zone), exec_file);
    644  9068     jason 
    645  9068     jason 		if (!INGLOBALZONE(pp)) {
    646  9068     jason 			/* zone_rootpath always has trailing / */
    647  9068     jason 			if (mntpt == NULL)
    648  9068     jason 				cmn_err(CE_NOTE, "!zone: %s, uid: %d "
    649  9068     jason 				    "setuid execution not allowed, file=%s%s",
    650  9068     jason 				    pp->p_zone->zone_name, cred->cr_uid,
    651  9068     jason 				    pp->p_zone->zone_rootpath, path + 1);
    652  9068     jason 			else
    653  9068     jason 				cmn_err(CE_NOTE, "!zone: %s, uid: %d "
    654  9068     jason 				    "setuid execution not allowed, fs=%s, "
    655  9068     jason 				    "file=%s", pp->p_zone->zone_name,
    656  9068     jason 				    cred->cr_uid, refstr_value(mntpt),
    657  9068     jason 				    exec_file);
    658  9068     jason 		}
    659  9068     jason 
    660  9068     jason 		if (mntpt != NULL)
    661  9068     jason 			refstr_rele(mntpt);
    662  9068     jason 
    663  9068     jason 		zone_rele(pp->p_zone);
    664     0    stevel 	}
    665     0    stevel 
    666     0    stevel 	/*
    667     0    stevel 	 * execsetid() told us whether or not we had to change the
    668     0    stevel 	 * credentials of the process.  In privflags, it told us
    669     0    stevel 	 * whether we gained any privileges or executed a set-uid executable.
    670     0    stevel 	 */
    671     0    stevel 	setid = (privflags & (PRIV_SETUGID|PRIV_INCREASE));
    672     0    stevel 
    673     0    stevel 	/*
    674     0    stevel 	 * Use /etc/system variable to determine if the stack
    675     0    stevel 	 * should be marked as executable by default.
    676     0    stevel 	 */
    677     0    stevel 	if (noexec_user_stack)
    678     0    stevel 		args->stk_prot &= ~PROT_EXEC;
    679     0    stevel 
    680     0    stevel 	args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */
    681  4528   paulsan 	args->ex_vp = vp;
    682     0    stevel 
    683     0    stevel 	/*
    684     0    stevel 	 * Traditionally, the setid flags told the sub processes whether
    685     0    stevel 	 * the file just executed was set-uid or set-gid; this caused
    686     0    stevel 	 * some confusion as the 'setid' flag did not match the SUGID
    687     0    stevel 	 * process flag which is only set when the uids/gids do not match.
    688     0    stevel 	 * A script set-gid/set-uid to the real uid/gid would start with
    689     0    stevel 	 * /dev/fd/X but an executable would happily trust LD_LIBRARY_PATH.
    690     0    stevel 	 * Now we flag those cases where the calling process cannot
    691     0    stevel 	 * be trusted to influence the newly exec'ed process, either
    692     0    stevel 	 * because it runs with more privileges or when the uids/gids
    693     0    stevel 	 * do in fact not match.
    694     0    stevel 	 * This also makes the runtime linker agree with the on exec
    695     0    stevel 	 * values of SNOCD and SUGID.
    696     0    stevel 	 */
    697  1335    casper 	setidfl = 0;
    698  1335    casper 	if (cred->cr_uid != cred->cr_ruid || (cred->cr_rgid != cred->cr_gid &&
    699  1335    casper 	    !supgroupmember(cred->cr_gid, cred))) {
    700  1335    casper 		setidfl |= EXECSETID_UGIDS;
    701  1335    casper 	}
    702  1335    casper 	if (setid & PRIV_SETUGID)
    703  1335    casper 		setidfl |= EXECSETID_SETID;
    704  1335    casper 	if (setid & PRIV_INCREASE)
    705  1335    casper 		setidfl |= EXECSETID_PRIVS;
    706  1335    casper 
    707     0    stevel 	error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz,
    708  5753       gww 	    setidfl, exec_file, cred, brand_action);
    709     0    stevel 	rw_exit(eswp->exec_lock);
    710     0    stevel 	if (error != 0) {
    711     0    stevel 		if (newcred != NULL)
    712     0    stevel 			crfree(newcred);
    713     0    stevel 		goto bad;
    714     0    stevel 	}
    715     0    stevel 
    716     0    stevel 	if (level == 0) {
    717     0    stevel 		mutex_enter(&pp->p_crlock);
    718     0    stevel 		if (newcred != NULL) {
    719     0    stevel 			/*
    720     0    stevel 			 * Free the old credentials, and set the new ones.
    721     0    stevel 			 * Do this for both the process and the (single) thread.
    722     0    stevel 			 */
    723     0    stevel 			crfree(pp->p_cred);
    724     0    stevel 			pp->p_cred = cred;	/* cred already held for proc */
    725     0    stevel 			crhold(cred);		/* hold new cred for thread */
    726     0    stevel 			/*
    727     0    stevel 			 * DTrace accesses t_cred in probe context.  t_cred
    728     0    stevel 			 * must always be either NULL, or point to a valid,
    729     0    stevel 			 * allocated cred structure.
    730     0    stevel 			 */
    731     0    stevel 			oldcred = curthread->t_cred;
    732     0    stevel 			curthread->t_cred = cred;
    733     0    stevel 			crfree(oldcred);
    734     0    stevel 		}
    735     0    stevel 		/*
    736     0    stevel 		 * On emerging from a successful exec(), the saved
    737     0    stevel 		 * uid and gid equal the effective uid and gid.
    738     0    stevel 		 */
    739     0    stevel 		cred->cr_suid = cred->cr_uid;
    740     0    stevel 		cred->cr_sgid = cred->cr_gid;
    741     0    stevel 
    742     0    stevel 		/*
    743     0    stevel 		 * If the real and effective ids do not match, this
    744     0    stevel 		 * is a setuid process that should not dump core.
    745     0    stevel 		 * The group comparison is tricky; we prevent the code
    746     0    stevel 		 * from flagging SNOCD when executing with an effective gid
    747     0    stevel 		 * which is a supplementary group.
    748     0    stevel 		 */
    749     0    stevel 		if (cred->cr_ruid != cred->cr_uid ||
    750     0    stevel 		    (cred->cr_rgid != cred->cr_gid &&
    751     0    stevel 		    !supgroupmember(cred->cr_gid, cred)) ||
    752     0    stevel 		    (privflags & PRIV_INCREASE) != 0)
    753     0    stevel 			suidflags = PSUIDFLAGS;
    754     0    stevel 		else
    755     0    stevel 			suidflags = 0;
    756     0    stevel 
    757     0    stevel 		mutex_exit(&pp->p_crlock);
    758     0    stevel 		if (suidflags) {
    759     0    stevel 			mutex_enter(&pp->p_lock);
    760     0    stevel 			pp->p_flag |= suidflags;
    761     0    stevel 			mutex_exit(&pp->p_lock);
    762     0    stevel 		}
    763     0    stevel 		if (setid && (pp->p_proc_flag & P_PR_PTRACE) == 0) {
    764     0    stevel 			/*
    765     0    stevel 			 * If process is traced via /proc, arrange to
    766     0    stevel 			 * invalidate the associated /proc vnode.
    767     0    stevel 			 */
    768     0    stevel 			if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
    769     0    stevel 				args->traceinval = 1;
    770     0    stevel 		}
    771     0    stevel 		if (pp->p_proc_flag & P_PR_PTRACE)
    772     0    stevel 			psignal(pp, SIGTRAP);
    773     0    stevel 		if (args->traceinval)
    774     0    stevel 			prinvalidate(&pp->p_user);
    775     0    stevel 	}
    776     0    stevel 
    777     0    stevel 	return (0);
    778     0    stevel bad:
    779     0    stevel 	if (error == 0)
    780     0    stevel 		error = ENOEXEC;
    781     0    stevel 
    782     0    stevel 	if (suidflags) {
    783     0    stevel 		mutex_enter(&pp->p_lock);
    784     0    stevel 		pp->p_flag |= suidflags;
    785     0    stevel 		mutex_exit(&pp->p_lock);
    786     0    stevel 	}
    787     0    stevel 	return (error);
    788     0    stevel }
    789     0    stevel 
    790     0    stevel extern char *execswnames[];
    791     0    stevel 
    792     0    stevel struct execsw *
    793     0    stevel allocate_execsw(char *name, char *magic, size_t magic_size)
    794     0    stevel {
    795     0    stevel 	int i, j;
    796     0    stevel 	char *ename;
    797     0    stevel 	char *magicp;
    798     0    stevel 
    799     0    stevel 	mutex_enter(&execsw_lock);
    800     0    stevel 	for (i = 0; i < nexectype; i++) {
    801     0    stevel 		if (execswnames[i] == NULL) {
    802     0    stevel 			ename = kmem_alloc(strlen(name) + 1, KM_SLEEP);
    803     0    stevel 			(void) strcpy(ename, name);
    804     0    stevel 			execswnames[i] = ename;
    805     0    stevel 			/*
    806     0    stevel 			 * Set the magic number last so that we
    807     0    stevel 			 * don't need to hold the execsw_lock in
    808     0    stevel 			 * findexectype().
    809     0    stevel 			 */
    810     0    stevel 			magicp = kmem_alloc(magic_size, KM_SLEEP);
    811     0    stevel 			for (j = 0; j < magic_size; j++)
    812     0    stevel 				magicp[j] = magic[j];
    813     0    stevel 			execsw[i].exec_magic = magicp;
    814     0    stevel 			mutex_exit(&execsw_lock);
    815     0    stevel 			return (&execsw[i]);
    816     0    stevel 		}
    817     0    stevel 	}
    818     0    stevel 	mutex_exit(&execsw_lock);
    819     0    stevel 	return (NULL);
    820     0    stevel }
    821     0    stevel 
    822     0    stevel /*
    823     0    stevel  * Find the exec switch table entry with the corresponding magic string.
    824     0    stevel  */
    825     0    stevel struct execsw *
    826     0    stevel findexecsw(char *magic)
    827     0    stevel {
    828     0    stevel 	struct execsw *eswp;
    829     0    stevel 
    830     0    stevel 	for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) {
    831     0    stevel 		ASSERT(eswp->exec_maglen <= MAGIC_BYTES);
    832     0    stevel 		if (magic && eswp->exec_maglen != 0 &&
    833     0    stevel 		    bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0)
    834     0    stevel 			return (eswp);
    835     0    stevel 	}
    836     0    stevel 	return (NULL);
    837     0    stevel }
    838     0    stevel 
    839     0    stevel /*
    840     0    stevel  * Find the execsw[] index for the given exec header string by looking for the
    841     0    stevel  * magic string at a specified offset and length for each kind of executable
    842     0    stevel  * file format until one matches.  If no execsw[] entry is found, try to
    843     0    stevel  * autoload a module for this magic string.
    844     0    stevel  */
    845     0    stevel struct execsw *
    846     0    stevel findexec_by_hdr(char *header)
    847     0    stevel {
    848     0    stevel 	struct execsw *eswp;
    849     0    stevel 
    850     0    stevel 	for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) {
    851     0    stevel 		ASSERT(eswp->exec_maglen <= MAGIC_BYTES);
    852     0    stevel 		if (header && eswp->exec_maglen != 0 &&
    853     0    stevel 		    bcmp(&header[eswp->exec_magoff], eswp->exec_magic,
    854  5753       gww 		    eswp->exec_maglen) == 0) {
    855     0    stevel 			if (hold_execsw(eswp) != 0)
    856     0    stevel 				return (NULL);
    857     0    stevel 			return (eswp);
    858     0    stevel 		}
    859     0    stevel 	}
    860     0    stevel 	return (NULL);	/* couldn't find the type */
    861     0    stevel }
    862     0    stevel 
    863     0    stevel /*
    864     0    stevel  * Find the execsw[] index for the given magic string.  If no execsw[] entry
    865     0    stevel  * is found, try to autoload a module for this magic string.
    866     0    stevel  */
    867     0    stevel struct execsw *
    868     0    stevel findexec_by_magic(char *magic)
    869     0    stevel {
    870     0    stevel 	struct execsw *eswp;
    871     0    stevel 
    872     0    stevel 	for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) {
    873     0    stevel 		ASSERT(eswp->exec_maglen <= MAGIC_BYTES);
    874     0    stevel 		if (magic && eswp->exec_maglen != 0 &&
    875     0    stevel 		    bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0) {
    876     0    stevel 			if (hold_execsw(eswp) != 0)
    877     0    stevel 				return (NULL);
    878     0    stevel 			return (eswp);
    879     0    stevel 		}
    880     0    stevel 	}
    881     0    stevel 	return (NULL);	/* couldn't find the type */
    882     0    stevel }
    883     0    stevel 
    884     0    stevel static int
    885     0    stevel hold_execsw(struct execsw *eswp)
    886     0    stevel {
    887     0    stevel 	char *name;
    888     0    stevel 
    889     0    stevel 	rw_enter(eswp->exec_lock, RW_READER);
    890     0    stevel 	while (!LOADED_EXEC(eswp)) {
    891     0    stevel 		rw_exit(eswp->exec_lock);
    892     0    stevel 		name = execswnames[eswp-execsw];
    893     0    stevel 		ASSERT(name);
    894     0    stevel 		if (modload("exec", name) == -1)
    895     0    stevel 			return (-1);
    896     0    stevel 		rw_enter(eswp->exec_lock, RW_READER);
    897     0    stevel 	}
    898     0    stevel 	return (0);
    899     0    stevel }
    900     0    stevel 
    901     0    stevel static int
    902     0    stevel execsetid(struct vnode *vp, struct vattr *vattrp, uid_t *uidp, uid_t *gidp)
    903     0    stevel {
    904     0    stevel 	proc_t *pp = ttoproc(curthread);
    905     0    stevel 	uid_t uid, gid;
    906     0    stevel 	cred_t *cr = pp->p_cred;
    907     0    stevel 	int privflags = 0;
    908     0    stevel 
    909     0    stevel 	/*
    910     0    stevel 	 * Remember credentials.
    911     0    stevel 	 */
    912     0    stevel 	uid = cr->cr_uid;
    913     0    stevel 	gid = cr->cr_gid;
    914     0    stevel 
    915     0    stevel 	/* Will try to reset the PRIV_AWARE bit later. */
    916     0    stevel 	if ((CR_FLAGS(cr) & (PRIV_AWARE|PRIV_AWARE_INHERIT)) == PRIV_AWARE)
    917     0    stevel 		privflags |= PRIV_RESET;
    918     0    stevel 
    919     0    stevel 	if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) == 0) {
    920     0    stevel 		/*
    921     0    stevel 		 * Set-uid root execution only allowed if the limit set
    922     0    stevel 		 * holds all unsafe privileges.
    923     0    stevel 		 */
    924     0    stevel 		if ((vattrp->va_mode & VSUID) && (vattrp->va_uid != 0 ||
    925     0    stevel 		    priv_issubset(&priv_unsafe, &CR_LPRIV(cr)))) {
    926     0    stevel 			uid = vattrp->va_uid;
    927     0    stevel 			privflags |= PRIV_SETUGID;
    928     0    stevel 		}
    929     0    stevel 		if (vattrp->va_mode & VSGID) {
    930     0    stevel 			gid = vattrp->va_gid;
    931     0    stevel 			privflags |= PRIV_SETUGID;
    932     0    stevel 		}
    933     0    stevel 	}
    934     0    stevel 
    935     0    stevel 	/*
    936     0    stevel 	 * Do we need to change our credential anyway?
    937     0    stevel 	 * This is the case when E != I or P != I, as
    938     0    stevel 	 * we need to do the assignments (with F empty and A full)
    939     0    stevel 	 * Or when I is not a subset of L; in that case we need to
    940     0    stevel 	 * enforce L.
    941     0    stevel 	 *
    942     0    stevel 	 *		I' = L & I
    943     0    stevel 	 *
    944     0    stevel 	 *		E' = P' = (I' + F) & A
    945     0    stevel 	 * or
    946     0    stevel 	 *		E' = P' = I'
    947     0    stevel 	 */
    948     0    stevel 	if (!priv_isequalset(&CR_EPRIV(cr), &CR_IPRIV(cr)) ||
    949     0    stevel 	    !priv_issubset(&CR_IPRIV(cr), &CR_LPRIV(cr)) ||
    950     0    stevel 	    !priv_isequalset(&CR_PPRIV(cr), &CR_IPRIV(cr)))
    951     0    stevel 		privflags |= PRIV_RESET;
    952  1676       jpk 
    953  1676       jpk 	/* If MAC-aware flag(s) are on, need to update cred to remove. */
    954  1676       jpk 	if ((CR_FLAGS(cr) & NET_MAC_AWARE) ||
    955  1676       jpk 	    (CR_FLAGS(cr) & NET_MAC_AWARE_INHERIT))
    956  1676       jpk 		privflags |= MAC_FLAGS;
    957     0    stevel 
    958     0    stevel 	/*
    959     0    stevel 	 * When we introduce the "forced" set then we will need
    960     0    stevel 	 * to set PRIV_INCREASE here if I not a subset of P.
    961     0    stevel 	 * If the "allowed" set is introduced we will need to do
    962     0    stevel 	 * a similar thing; however, it seems more reasonable to
    963     0    stevel 	 * have the allowed set reduce "L": script language interpreters
    964     0    stevel 	 * would typically have an allowed set of "all".
    965     0    stevel 	 */
    966     0    stevel 
    967     0    stevel 	/*
    968     0    stevel 	 * Set setuid/setgid protections if no ptrace() compatibility.
    969     0    stevel 	 * For privileged processes, honor setuid/setgid even in
    970     0    stevel 	 * the presence of ptrace() compatibility.
    971     0    stevel 	 */
    972     0    stevel 	if (((pp->p_proc_flag & P_PR_PTRACE) == 0 ||
    973     0    stevel 	    PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, (uid == 0))) &&
    974     0    stevel 	    (cr->cr_uid != uid ||
    975     0    stevel 	    cr->cr_gid != gid ||
    976     0    stevel 	    cr->cr_suid != uid ||
    977     0    stevel 	    cr->cr_sgid != gid)) {
    978     0    stevel 		*uidp = uid;
    979     0    stevel 		*gidp = gid;
    980     0    stevel 		privflags |= PRIV_SETID;
    981     0    stevel 	}
    982     0    stevel 	return (privflags);
    983     0    stevel }
    984     0    stevel 
    985     0    stevel int
    986     0    stevel execpermissions(struct vnode *vp, struct vattr *vattrp, struct uarg *args)
    987     0    stevel {
    988     0    stevel 	int error;
    989     0    stevel 	proc_t *p = ttoproc(curthread);
    990     0    stevel 
    991     0    stevel 	vattrp->va_mask = AT_MODE | AT_UID | AT_GID | AT_SIZE;
    992  5331       amw 	if (error = VOP_GETATTR(vp, vattrp, ATTR_EXEC, p->p_cred, NULL))
    993     0    stevel 		return (error);
    994     0    stevel 	/*
    995     0    stevel 	 * Check the access mode.
    996     0    stevel 	 * If VPROC, ask /proc if the file is an object file.
    997     0    stevel 	 */
    998  5331       amw 	if ((error = VOP_ACCESS(vp, VEXEC, 0, p->p_cred, NULL)) != 0 ||
    999     0    stevel 	    !(vp->v_type == VREG || (vp->v_type == VPROC && pr_isobject(vp))) ||
   1000     0    stevel 	    (vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0 ||
   1001     0    stevel 	    (vattrp->va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) {
   1002     0    stevel 		if (error == 0)
   1003     0    stevel 			error = EACCES;
   1004     0    stevel 		return (error);
   1005     0    stevel 	}
   1006     0    stevel 
   1007     0    stevel 	if ((p->p_plist || (p->p_proc_flag & (P_PR_PTRACE|P_PR_TRACE))) &&
   1008  5331       amw 	    (error = VOP_ACCESS(vp, VREAD, 0, p->p_cred, NULL))) {
   1009     0    stevel 		/*
   1010     0    stevel 		 * If process is under ptrace(2) compatibility,
   1011     0    stevel 		 * fail the exec(2).
   1012     0    stevel 		 */
   1013     0    stevel 		if (p->p_proc_flag & P_PR_PTRACE)
   1014     0    stevel 			goto bad;
   1015     0    stevel 		/*
   1016     0    stevel 		 * Process is traced via /proc.
   1017     0    stevel 		 * Arrange to invalidate the /proc vnode.
   1018     0    stevel 		 */
   1019     0    stevel 		args->traceinval = 1;
   1020     0    stevel 	}
   1021     0    stevel 	return (0);
   1022     0    stevel bad:
   1023     0    stevel 	if (error == 0)
   1024     0    stevel 		error = ENOEXEC;
   1025     0    stevel 	return (error);
   1026     0    stevel }
   1027     0    stevel 
   1028     0    stevel /*
   1029     0    stevel  * Map a section of an executable file into the user's
   1030     0    stevel  * address space.
   1031     0    stevel  */
   1032     0    stevel int
   1033     0    stevel execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen,
   1034     0    stevel     off_t offset, int prot, int page, uint_t szc)
   1035     0    stevel {
   1036     0    stevel 	int error = 0;
   1037     0    stevel 	off_t oldoffset;
   1038     0    stevel 	caddr_t zfodbase, oldaddr;
   1039     0    stevel 	size_t end, oldlen;
   1040     0    stevel 	size_t zfoddiff;
   1041     0    stevel 	label_t ljb;
   1042     0    stevel 	proc_t *p = ttoproc(curthread);
   1043     0    stevel 
   1044     0    stevel 	oldaddr = addr;
   1045     0    stevel 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
   1046     0    stevel 	if (len) {
   1047     0    stevel 		oldlen = len;
   1048     0    stevel 		len += ((size_t)oldaddr - (size_t)addr);
   1049     0    stevel 		oldoffset = offset;
   1050     0    stevel 		offset = (off_t)((uintptr_t)offset & PAGEMASK);
   1051     0    stevel 		if (page) {
   1052     0    stevel 			spgcnt_t  prefltmem, availm, npages;
   1053     0    stevel 			int preread;
   1054     0    stevel 			uint_t mflag = MAP_PRIVATE | MAP_FIXED;
   1055     0    stevel 
   1056     0    stevel 			if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_EXEC) {
   1057     0    stevel 				mflag |= MAP_TEXT;
   1058     0    stevel 			} else {
   1059     0    stevel 				mflag |= MAP_INITDATA;
   1060     0    stevel 			}
   1061     0    stevel 
   1062     0    stevel 			if (valid_usr_range(addr, len, prot, p->p_as,
   1063     0    stevel 			    p->p_as->a_userlimit) != RANGE_OKAY) {
   1064     0    stevel 				error = ENOMEM;
   1065     0    stevel 				goto bad;
   1066     0    stevel 			}
   1067     0    stevel 			if (error = VOP_MAP(vp, (offset_t)offset,
   1068     0    stevel 			    p->p_as, &addr, len, prot, PROT_ALL,
   1069  5331       amw 			    mflag, CRED(), NULL))
   1070     0    stevel 				goto bad;
   1071     0    stevel 
   1072     0    stevel 			/*
   1073     0    stevel 			 * If the segment can fit, then we prefault
   1074     0    stevel 			 * the entire segment in.  This is based on the
   1075     0    stevel 			 * model that says the best working set of a
   1076     0    stevel 			 * small program is all of its pages.
   1077     0    stevel 			 */
   1078     0    stevel 			npages = (spgcnt_t)btopr(len);
   1079     0    stevel 			prefltmem = freemem - desfree;
   1080     0    stevel 			preread =
   1081     0    stevel 			    (npages < prefltmem && len < PGTHRESH) ? 1 : 0;
   1082     0    stevel 
   1083     0    stevel 			/*
   1084     0    stevel 			 * If we aren't prefaulting the segment,
   1085     0    stevel 			 * increment "deficit", if necessary to ensure
   1086     0    stevel 			 * that pages will become available when this
   1087     0    stevel 			 * process starts executing.
   1088     0    stevel 			 */
   1089     0    stevel 			availm = freemem - lotsfree;
   1090     0    stevel 			if (preread == 0 && npages > availm &&
   1091     0    stevel 			    deficit < lotsfree) {
   1092     0    stevel 				deficit += MIN((pgcnt_t)(npages - availm),
   1093     0    stevel 				    lotsfree - deficit);
   1094     0    stevel 			}
   1095     0    stevel 
   1096     0    stevel 			if (preread) {
   1097     0    stevel 				TRACE_2(TR_FAC_PROC, TR_EXECMAP_PREREAD,
   1098     0    stevel 				    "execmap preread:freemem %d size %lu",
   1099     0    stevel 				    freemem, len);
   1100     0    stevel 				(void) as_fault(p->p_as->a_hat, p->p_as,
   1101     0    stevel 				    (caddr_t)addr, len, F_INVAL, S_READ);
   1102     0    stevel 			}
   1103     0    stevel 		} else {
   1104     0    stevel 			if (valid_usr_range(addr, len, prot, p->p_as,
   1105     0    stevel 			    p->p_as->a_userlimit) != RANGE_OKAY) {
   1106     0    stevel 				error = ENOMEM;
   1107     0    stevel 				goto bad;
   1108     0    stevel 			}
   1109     0    stevel 
   1110     0    stevel 			if (error = as_map(p->p_as, addr, len,
   1111     0    stevel 			    segvn_create, zfod_argsp))
   1112     0    stevel 				goto bad;
   1113     0    stevel 			/*
   1114     0    stevel 			 * Read in the segment in one big chunk.
   1115     0    stevel 			 */
   1116     0    stevel 			if (error = vn_rdwr(UIO_READ, vp, (caddr_t)oldaddr,
   1117     0    stevel 			    oldlen, (offset_t)oldoffset, UIO_USERSPACE, 0,
   1118     0    stevel 			    (rlim64_t)0, CRED(), (ssize_t *)0))
   1119     0    stevel 				goto bad;
   1120     0    stevel 			/*
   1121     0    stevel 			 * Now set protections.
   1122     0    stevel 			 */
   1123     0    stevel 			if (prot != PROT_ZFOD) {
   1124     0    stevel 				(void) as_setprot(p->p_as, (caddr_t)addr,
   1125     0    stevel 				    len, prot);
   1126     0    stevel 			}
   1127     0    stevel 		}
   1128     0    stevel 	}
   1129     0    stevel 
   1130     0    stevel 	if (zfodlen) {
   1131  2712   nn35248 		struct as *as = curproc->p_as;
   1132  2712   nn35248 		struct seg *seg;
   1133  2712   nn35248 		uint_t zprot = 0;
   1134  2712   nn35248 
   1135     0    stevel 		end = (size_t)addr + len;
   1136     0    stevel 		zfodbase = (caddr_t)roundup(end, PAGESIZE);
   1137     0    stevel 		zfoddiff = (uintptr_t)zfodbase - end;
   1138     0    stevel 		if (zfoddiff) {
   1139  2712   nn35248 			/*
   1140  2712   nn35248 			 * Before we go to zero the remaining space on the last
   1141  2712   nn35248 			 * page, make sure we have write permission.
   1142  2712   nn35248 			 */
   1143  2712   nn35248 
   1144  2712   nn35248 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1145  2712   nn35248 			seg = as_segat(curproc->p_as, (caddr_t)end);
   1146  2712   nn35248 			if (seg != NULL)
   1147  2712   nn35248 				SEGOP_GETPROT(seg, (caddr_t)end, zfoddiff - 1,
   1148  2712   nn35248 				    &zprot);
   1149  2712   nn35248 			AS_LOCK_EXIT(as, &as->a_lock);
   1150  2712   nn35248 
   1151  2712   nn35248 			if (seg != NULL && (zprot & PROT_WRITE) == 0) {
   1152  2712   nn35248 				(void) as_setprot(as, (caddr_t)end,
   1153  2712   nn35248 				    zfoddiff - 1, zprot | PROT_WRITE);
   1154  2712   nn35248 			}
   1155  2712   nn35248 
   1156     0    stevel 			if (on_fault(&ljb)) {
   1157     0    stevel 				no_fault();
   1158  2712   nn35248 				if (seg != NULL && (zprot & PROT_WRITE) == 0)
   1159  2712   nn35248 					(void) as_setprot(as, (caddr_t)end,
   1160  5753       gww 					    zfoddiff - 1, zprot);
   1161     0    stevel 				error = EFAULT;
   1162     0    stevel 				goto bad;
   1163     0    stevel 			}
   1164     0    stevel 			uzero((void *)end, zfoddiff);
   1165     0    stevel 			no_fault();
   1166  2712   nn35248 			if (seg != NULL && (zprot & PROT_WRITE) == 0)
   1167  2712   nn35248 				(void) as_setprot(as, (caddr_t)end,
   1168  2712   nn35248 				    zfoddiff - 1, zprot);
   1169     0    stevel 		}
   1170     0    stevel 		if (zfodlen > zfoddiff) {
   1171     0    stevel 			struct segvn_crargs crargs =
   1172     0    stevel 			    SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
   1173     0    stevel 
   1174     0    stevel 			zfodlen -= zfoddiff;
   1175     0    stevel 			if (valid_usr_range(zfodbase, zfodlen, prot, p->p_as,
   1176     0    stevel 			    p->p_as->a_userlimit) != RANGE_OKAY) {
   1177     0    stevel 				error = ENOMEM;
   1178     0    stevel 				goto bad;
   1179     0    stevel 			}
   1180  2991    susans 			if (szc > 0) {
   1181  2991    susans 				/*
   1182  2991    susans 				 * ASSERT alignment because the mapelfexec()
   1183  2991    susans 				 * caller for the szc > 0 case extended zfod
   1184  2991    susans 				 * so it's end is pgsz aligned.
   1185  2991    susans 				 */
   1186  2991    susans 				size_t pgsz = page_get_pagesize(szc);
   1187  2991    susans 				ASSERT(IS_P2ALIGNED(zfodbase + zfodlen, pgsz));
   1188  2991    susans 
   1189  2991    susans 				if (IS_P2ALIGNED(zfodbase, pgsz)) {
   1190  2991    susans 					crargs.szc = szc;
   1191  2991    susans 				} else {
   1192  2991    susans 					crargs.szc = AS_MAP_HEAP;
   1193  2991    susans 				}
   1194  2991    susans 			} else {
   1195  2991    susans 				crargs.szc = AS_MAP_NO_LPOOB;
   1196  2991    susans 			}
   1197     0    stevel 			if (error = as_map(p->p_as, (caddr_t)zfodbase,
   1198     0    stevel 			    zfodlen, segvn_create, &crargs))
   1199     0    stevel 				goto bad;
   1200     0    stevel 			if (prot != PROT_ZFOD) {
   1201     0    stevel 				(void) as_setprot(p->p_as, (caddr_t)zfodbase,
   1202     0    stevel 				    zfodlen, prot);
   1203     0    stevel 			}
   1204     0    stevel 		}
   1205     0    stevel 	}
   1206     0    stevel 	return (0);
   1207     0    stevel bad:
   1208     0    stevel 	return (error);
   1209     0    stevel }
   1210     0    stevel 
   1211     0    stevel void
   1212     0    stevel setexecenv(struct execenv *ep)
   1213     0    stevel {
   1214     0    stevel 	proc_t *p = ttoproc(curthread);
   1215     0    stevel 	klwp_t *lwp = ttolwp(curthread);
   1216     0    stevel 	struct vnode *vp;
   1217     0    stevel 
   1218     0    stevel 	p->p_bssbase = ep->ex_bssbase;
   1219     0    stevel 	p->p_brkbase = ep->ex_brkbase;
   1220     0    stevel 	p->p_brksize = ep->ex_brksize;
   1221     0    stevel 	if (p->p_exec)
   1222     0    stevel 		VN_RELE(p->p_exec);	/* out with the old */
   1223     0    stevel 	vp = p->p_exec = ep->ex_vp;
   1224     0    stevel 	if (vp != NULL)
   1225     0    stevel 		VN_HOLD(vp);		/* in with the new */
   1226     0    stevel 
   1227     0    stevel 	lwp->lwp_sigaltstack.ss_sp = 0;
   1228     0    stevel 	lwp->lwp_sigaltstack.ss_size = 0;
   1229     0    stevel 	lwp->lwp_sigaltstack.ss_flags = SS_DISABLE;
   1230     0    stevel }
   1231     0    stevel 
   1232     0    stevel int
   1233     0    stevel execopen(struct vnode **vpp, int *fdp)
   1234     0    stevel {
   1235     0    stevel 	struct vnode *vp = *vpp;
   1236     0    stevel 	file_t *fp;
   1237     0    stevel 	int error = 0;
   1238     0    stevel 	int filemode = FREAD;
   1239     0    stevel 
   1240     0    stevel 	VN_HOLD(vp);		/* open reference */
   1241     0    stevel 	if (error = falloc(NULL, filemode, &fp, fdp)) {
   1242     0    stevel 		VN_RELE(vp);
   1243     0    stevel 		*fdp = -1;	/* just in case falloc changed value */
   1244     0    stevel 		return (error);
   1245     0    stevel 	}
   1246  5331       amw 	if (error = VOP_OPEN(&vp, filemode, CRED(), NULL)) {
   1247     0    stevel 		VN_RELE(vp);
   1248     0    stevel 		setf(*fdp, NULL);
   1249     0    stevel 		unfalloc(fp);
   1250     0    stevel 		*fdp = -1;
   1251     0    stevel 		return (error);
   1252     0    stevel 	}
   1253     0    stevel 	*vpp = vp;		/* vnode should not have changed */
   1254     0    stevel 	fp->f_vnode = vp;
   1255     0    stevel 	mutex_exit(&fp->f_tlock);
   1256     0    stevel 	setf(*fdp, fp);
   1257     0    stevel 	return (0);
   1258     0    stevel }
   1259     0    stevel 
   1260     0    stevel int
   1261     0    stevel execclose(int fd)
   1262     0    stevel {
   1263     0    stevel 	return (closeandsetf(fd, NULL));
   1264     0    stevel }
   1265     0    stevel 
   1266     0    stevel 
   1267     0    stevel /*
   1268     0    stevel  * noexec stub function.
   1269     0    stevel  */
   1270     0    stevel /*ARGSUSED*/
   1271     0    stevel int
   1272     0    stevel noexec(
   1273     0    stevel     struct vnode *vp,
   1274     0    stevel     struct execa *uap,
   1275     0    stevel     struct uarg *args,
   1276     0    stevel     struct intpdata *idatap,
   1277     0    stevel     int level,
   1278     0    stevel     long *execsz,
   1279     0    stevel     int setid,
   1280     0    stevel     caddr_t exec_file,
   1281     0    stevel     struct cred *cred)
   1282     0    stevel {
   1283     0    stevel 	cmn_err(CE_WARN, "missing exec capability for %s", uap->fname);
   1284     0    stevel 	return (ENOEXEC);
   1285     0    stevel }
   1286     0    stevel 
   1287     0    stevel /*
   1288     0    stevel  * Support routines for building a user stack.
   1289     0    stevel  *
   1290     0    stevel  * execve(path, argv, envp) must construct a new stack with the specified
   1291     0    stevel  * arguments and environment variables (see exec_args() for a description
   1292     0    stevel  * of the user stack layout).  To do this, we copy the arguments and
   1293     0    stevel  * environment variables from the old user address space into the kernel,
   1294     0    stevel  * free the old as, create the new as, and copy our buffered information
   1295     0    stevel  * to the new stack.  Our kernel buffer has the following structure:
   1296     0    stevel  *
   1297     0    stevel  *	+-----------------------+ <--- stk_base + stk_size
   1298     0    stevel  *	| string offsets	|
   1299     0    stevel  *	+-----------------------+ <--- stk_offp
   1300     0    stevel  *	|			|
   1301     0    stevel  *	| STK_AVAIL() space	|
   1302     0    stevel  *	|			|
   1303     0    stevel  *	+-----------------------+ <--- stk_strp
   1304     0    stevel  *	| strings		|
   1305     0    stevel  *	+-----------------------+ <--- stk_base
   1306     0    stevel  *
   1307     0    stevel  * When we add a string, we store the string's contents (including the null
   1308     0    stevel  * terminator) at stk_strp, and we store the offset of the string relative to
   1309     0    stevel  * stk_base at --stk_offp.  At strings are added, stk_strp increases and
   1310     0    stevel  * stk_offp decreases.  The amount of space remaining, STK_AVAIL(), is just
   1311     0    stevel  * the difference between these pointers.  If we run out of space, we return
   1312     0    stevel  * an error and exec_args() starts all over again with a buffer twice as large.
   1313     0    stevel  * When we're all done, the kernel buffer looks like this:
   1314     0    stevel  *
   1315     0    stevel  *	+-----------------------+ <--- stk_base + stk_size
   1316     0    stevel  *	| argv[0] offset	|
   1317     0    stevel  *	+-----------------------+
   1318     0    stevel  *	| ...			|
   1319     0    stevel  *	+-----------------------+
   1320     0    stevel  *	| argv[argc-1] offset	|
   1321     0    stevel  *	+-----------------------+
   1322     0    stevel  *	| envp[0] offset	|
   1323     0    stevel  *	+-----------------------+
   1324     0    stevel  *	| ...			|
   1325     0    stevel  *	+-----------------------+
   1326     0    stevel  *	| envp[envc-1] offset	|
   1327     0    stevel  *	+-----------------------+
   1328     0    stevel  *	| AT_SUN_PLATFORM offset|
   1329     0    stevel  *	+-----------------------+
   1330     0    stevel  *	| AT_SUN_EXECNAME offset|
   1331     0    stevel  *	+-----------------------+ <--- stk_offp
   1332     0    stevel  *	|			|
   1333     0    stevel  *	| STK_AVAIL() space	|
   1334     0    stevel  *	|			|
   1335     0    stevel  *	+-----------------------+ <--- stk_strp
   1336     0    stevel  *	| AT_SUN_EXECNAME offset|
   1337     0    stevel  *	+-----------------------+
   1338     0    stevel  *	| AT_SUN_PLATFORM offset|
   1339     0    stevel  *	+-----------------------+
   1340     0    stevel  *	| envp[envc-1] string	|
   1341     0    stevel  *	+-----------------------+
   1342     0    stevel  *	| ...			|
   1343     0    stevel  *	+-----------------------+
   1344     0    stevel  *	| envp[0] string	|
   1345     0    stevel  *	+-----------------------+
   1346     0    stevel  *	| argv[argc-1] string	|
   1347     0    stevel  *	+-----------------------+
   1348     0    stevel  *	| ...			|
   1349     0    stevel  *	+-----------------------+
   1350     0    stevel  *	| argv[0] string	|
   1351     0    stevel  *	+-----------------------+ <--- stk_base
   1352     0    stevel  */
   1353     0    stevel 
   1354     0    stevel #define	STK_AVAIL(args)		((char *)(args)->stk_offp - (args)->stk_strp)
   1355     0    stevel 
   1356     0    stevel /*
   1357     0    stevel  * Add a string to the stack.
   1358     0    stevel  */
   1359     0    stevel static int
   1360     0    stevel stk_add(uarg_t *args, const char *sp, enum uio_seg segflg)
   1361     0    stevel {
   1362     0    stevel 	int error;
   1363     0    stevel 	size_t len;
   1364     0    stevel 
   1365     0    stevel 	if (STK_AVAIL(args) < sizeof (int))
   1366     0    stevel 		return (E2BIG);
   1367     0    stevel 	*--args->stk_offp = args->stk_strp - args->stk_base;
   1368     0    stevel 
   1369     0    stevel 	if (segflg == UIO_USERSPACE) {
   1370     0    stevel 		error = copyinstr(sp, args->stk_strp, STK_AVAIL(args), &len);
   1371     0    stevel 		if (error != 0)
   1372     0    stevel 			return (error);
   1373     0    stevel 	} else {
   1374     0    stevel 		len = strlen(sp) + 1;
   1375     0    stevel 		if (len > STK_AVAIL(args))
   1376     0    stevel 			return (E2BIG);
   1377     0    stevel 		bcopy(sp, args->stk_strp, len);
   1378     0    stevel 	}
   1379     0    stevel 
   1380     0    stevel 	args->stk_strp += len;
   1381     0    stevel 
   1382     0    stevel 	return (0);
   1383     0    stevel }
   1384     0    stevel 
   1385     0    stevel static int
   1386     0    stevel stk_getptr(uarg_t *args, char *src, char **dst)
   1387     0    stevel {
   1388     0    stevel 	int error;
   1389     0    stevel 
   1390     0    stevel 	if (args->from_model == DATAMODEL_NATIVE) {
   1391     0    stevel 		ulong_t ptr;
   1392     0    stevel 		error = fulword(src, &ptr);
   1393     0    stevel 		*dst = (caddr_t)ptr;
   1394     0    stevel 	} else {
   1395     0    stevel 		uint32_t ptr;
   1396     0    stevel 		error = fuword32(src, &ptr);
   1397     0    stevel 		*dst = (caddr_t)(uintptr_t)ptr;
   1398     0    stevel 	}
   1399     0    stevel 	return (error);
   1400     0    stevel }
   1401     0    stevel 
   1402     0    stevel static int
   1403     0    stevel stk_putptr(uarg_t *args, char *addr, char *value)
   1404     0    stevel {
   1405     0    stevel 	if (args->to_model == DATAMODEL_NATIVE)
   1406     0    stevel 		return (sulword(addr, (ulong_t)value));
   1407     0    stevel 	else
   1408     0    stevel 		return (suword32(addr, (uint32_t)(uintptr_t)value));
   1409     0    stevel }
   1410     0    stevel 
   1411     0    stevel static int
   1412     0    stevel stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
   1413     0    stevel {
   1414     0    stevel 	char *sp;
   1415     0    stevel 	int argc, error;
   1416     0    stevel 	int argv_empty = 0;
   1417     0    stevel 	size_t ptrsize = args->from_ptrsize;
   1418     0    stevel 	size_t size, pad;
   1419     0    stevel 	char *argv = (char *)uap->argp;
   1420     0    stevel 	char *envp = (char *)uap->envp;
   1421     0    stevel 
   1422     0    stevel 	/*
   1423     0    stevel 	 * Copy interpreter's name and argument to argv[0] and argv[1].
   1424     0    stevel 	 */
   1425     0    stevel 	if (intp != NULL && intp->intp_name != NULL) {
   1426     0    stevel 		if ((error = stk_add(args, intp->intp_name, UIO_SYSSPACE)) != 0)
   1427     0    stevel 			return (error);
   1428     0    stevel 		if (intp->intp_arg != NULL &&
   1429     0    stevel 		    (error = stk_add(args, intp->intp_arg, UIO_SYSSPACE)) != 0)
   1430     0    stevel 			return (error);
   1431     0    stevel 		if (args->fname != NULL)
   1432     0    stevel 			error = stk_add(args, args->fname, UIO_SYSSPACE);
   1433     0    stevel 		else
   1434     0    stevel 			error = stk_add(args, uap->fname, UIO_USERSPACE);
   1435     0    stevel 		if (error)
   1436     0    stevel 			return (error);
   1437     0    stevel 
   1438     0    stevel 		/*
   1439     0    stevel 		 * Check for an empty argv[].
   1440     0    stevel 		 */
   1441     0    stevel 		if (stk_getptr(args, argv, &sp))
   1442     0    stevel 			return (EFAULT);
   1443     0    stevel 		if (sp == NULL)
   1444     0    stevel 			argv_empty = 1;
   1445     0    stevel 
   1446     0    stevel 		argv += ptrsize;		/* ignore original argv[0] */
   1447     0    stevel 	}
   1448     0    stevel 
   1449     0    stevel 	if (argv_empty == 0) {
   1450     0    stevel 		/*
   1451     0    stevel 		 * Add argv[] strings to the stack.
   1452     0    stevel 		 */
   1453     0    stevel 		for (;;) {
   1454     0    stevel 			if (stk_getptr(args, argv, &sp))
   1455     0    stevel 				return (EFAULT);
   1456     0    stevel 			if (sp == NULL)
   1457     0    stevel 				break;
   1458     0    stevel 			if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0)
   1459     0    stevel 				return (error);
   1460     0    stevel 			argv += ptrsize;
   1461     0    stevel 		}
   1462     0    stevel 	}
   1463     0    stevel 	argc = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
   1464     0    stevel 	args->arglen = args->stk_strp - args->stk_base;
   1465     0    stevel 
   1466     0    stevel 	/*
   1467     0    stevel 	 * Add environ[] strings to the stack.
   1468     0    stevel 	 */
   1469     0    stevel 	if (envp != NULL) {
   1470     0    stevel 		for (;;) {
   1471     0    stevel 			if (stk_getptr(args, envp, &sp))
   1472     0    stevel 				return (EFAULT);
   1473     0    stevel 			if (sp == NULL)
   1474     0    stevel 				break;
   1475     0    stevel 			if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0)
   1476     0    stevel 				return (error);
   1477     0    stevel 			envp += ptrsize;
   1478     0    stevel 		}
   1479     0    stevel 	}
   1480     0    stevel 	args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
   1481     0    stevel 	args->ne = args->na - argc;
   1482     0    stevel 
   1483     0    stevel 	/*
   1484  2712   nn35248 	 * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
   1485  2712   nn35248 	 * AT_SUN_EMULATOR strings to the stack.
   1486     0    stevel 	 */
   1487     0    stevel 	if (auxvpp != NULL && *auxvpp != NULL) {
   1488     0    stevel 		if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
   1489     0    stevel 			return (error);
   1490     0    stevel 		if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
   1491  2712   nn35248 			return (error);
   1492  2712   nn35248 		if (args->brandname != NULL &&
   1493  6247       raf 		    (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0)
   1494  2712   nn35248 			return (error);
   1495  2712   nn35248 		if (args->emulator != NULL &&
   1496  6247       raf 		    (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
   1497     0    stevel 			return (error);
   1498     0    stevel 	}
   1499     0    stevel 
   1500     0    stevel 	/*
   1501     0    stevel 	 * Compute the size of the stack.  This includes all the pointers,
   1502     0    stevel 	 * the space reserved for the aux vector, and all the strings.
   1503     0    stevel 	 * The total number of pointers is args->na (which is argc + envc)
   1504     0    stevel 	 * plus 4 more: (1) a pointer's worth of space for argc; (2) the NULL
   1505     0    stevel 	 * after the last argument (i.e. argv[argc]); (3) the NULL after the
   1506     0    stevel 	 * last environment variable (i.e. envp[envc]); and (4) the NULL after
   1507     0    stevel 	 * all the strings, at the very top of the stack.
   1508     0    stevel 	 */
   1509     0    stevel 	size = (args->na + 4) * args->to_ptrsize + args->auxsize +
   1510     0    stevel 	    (args->stk_strp - args->stk_base);
   1511     0    stevel 
   1512     0    stevel 	/*
   1513     0    stevel 	 * Pad the string section with zeroes to align the stack size.
   1514     0    stevel 	 */
   1515     0    stevel 	pad = P2NPHASE(size, args->stk_align);
   1516     0    stevel 
   1517     0    stevel 	if (STK_AVAIL(args) < pad)
   1518     0    stevel 		return (E2BIG);
   1519     0    stevel 
   1520     0    stevel 	args->usrstack_size = size + pad;
   1521     0    stevel 
   1522     0    stevel 	while (pad-- != 0)
   1523     0    stevel 		*args->stk_strp++ = 0;
   1524     0    stevel 
   1525     0    stevel 	args->nc = args->stk_strp - args->stk_base;
   1526     0    stevel 
   1527     0    stevel 	return (0);
   1528     0    stevel }
   1529     0    stevel 
   1530     0    stevel static int
   1531     0    stevel stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
   1532     0    stevel {
   1533     0    stevel 	size_t ptrsize = args->to_ptrsize;
   1534     0    stevel 	ssize_t pslen;
   1535     0    stevel 	char *kstrp = args->stk_base;
   1536     0    stevel 	char *ustrp = usrstack - args->nc - ptrsize;
   1537     0    stevel 	char *usp = usrstack - args->usrstack_size;
   1538     0    stevel 	int *offp = (int *)(args->stk_base + args->stk_size);
   1539     0    stevel 	int envc = args->ne;
   1540     0    stevel 	int argc = args->na - envc;
   1541     0    stevel 	int i;
   1542     0    stevel 
   1543     0    stevel 	/*
   1544     0    stevel 	 * Record argc for /proc.
   1545     0    stevel 	 */
   1546     0    stevel 	up->u_argc = argc;
   1547     0    stevel 
   1548     0    stevel 	/*
   1549     0    stevel 	 * Put argc on the stack.  Note that even though it's an int,
   1550     0    stevel 	 * it always consumes ptrsize bytes (for alignment).
   1551     0    stevel 	 */
   1552     0    stevel 	if (stk_putptr(args, usp, (char *)(uintptr_t)argc))
   1553     0    stevel 		return (-1);
   1554     0    stevel 
   1555     0    stevel 	/*
   1556     0    stevel 	 * Add argc space (ptrsize) to usp and record argv for /proc.
   1557     0    stevel 	 */
   1558     0    stevel 	up->u_argv = (uintptr_t)(usp += ptrsize);
   1559     0    stevel 
   1560     0    stevel 	/*
   1561     0    stevel 	 * Put the argv[] pointers on the stack.
   1562     0    stevel 	 */
   1563     0    stevel 	for (i = 0; i < argc; i++, usp += ptrsize)
   1564     0    stevel 		if (stk_putptr(args, usp, &ustrp[*--offp]))
   1565     0    stevel 			return (-1);
   1566     0    stevel 
   1567     0    stevel 	/*
   1568     0    stevel 	 * Copy arguments to u_psargs.
   1569     0    stevel 	 */
   1570     0    stevel 	pslen = MIN(args->arglen, PSARGSZ) - 1;
   1571     0    stevel 	for (i = 0; i < pslen; i++)
   1572     0    stevel 		up->u_psargs[i] = (kstrp[i] == '\0' ? ' ' : kstrp[i]);
   1573     0    stevel 	while (i < PSARGSZ)
   1574     0    stevel 		up->u_psargs[i++] = '\0';
   1575     0    stevel 
   1576     0    stevel 	/*
   1577     0    stevel 	 * Add space for argv[]'s NULL terminator (ptrsize) to usp and
   1578     0    stevel 	 * record envp for /proc.
   1579     0    stevel 	 */
   1580     0    stevel 	up->u_envp = (uintptr_t)(usp += ptrsize);
   1581     0    stevel 
   1582     0    stevel 	/*
   1583     0    stevel 	 * Put the envp[] pointers on the stack.
   1584     0    stevel 	 */
   1585     0    stevel 	for (i = 0; i < envc; i++, usp += ptrsize)
   1586     0    stevel 		if (stk_putptr(args, usp, &ustrp[*--offp]))
   1587     0    stevel 			return (-1);
   1588     0    stevel 
   1589     0    stevel 	/*
   1590     0    stevel 	 * Add space for envp[]'s NULL terminator (ptrsize) to usp and
   1591     0    stevel 	 * remember where the stack ends, which is also where auxv begins.
   1592     0    stevel 	 */
   1593     0    stevel 	args->stackend = usp += ptrsize;
   1594     0    stevel 
   1595     0    stevel 	/*
   1596     0    stevel 	 * Put all the argv[], envp[], and auxv strings on the stack.
   1597     0    stevel 	 */
   1598     0    stevel 	if (copyout(args->stk_base, ustrp, args->nc))
   1599     0    stevel 		return (-1);
   1600     0    stevel 
   1601     0    stevel 	/*
   1602     0    stevel 	 * Fill in the aux vector now that we know the user stack addresses
   1603  2712   nn35248 	 * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
   1604  2712   nn35248 	 * AT_SUN_EMULATOR strings.
   1605     0    stevel 	 */
   1606     0    stevel 	if (auxvpp != NULL && *auxvpp != NULL) {
   1607     0    stevel 		if (args->to_model == DATAMODEL_NATIVE) {
   1608     0    stevel 			auxv_t **a = (auxv_t **)auxvpp;
   1609     0    stevel 			ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
   1610     0    stevel 			ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp])
   1611  2712   nn35248 			if (args->brandname != NULL)
   1612  2712   nn35248 				ADDAUX(*a,
   1613  2712   nn35248 				    AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
   1614  2712   nn35248 			if (args->emulator != NULL)
   1615  2712   nn35248 				ADDAUX(*a,
   1616  2712   nn35248 				    AT_SUN_EMULATOR, (long)&ustrp[*--offp])
   1617     0    stevel 		} else {
   1618     0    stevel 			auxv32_t **a = (auxv32_t **)auxvpp;
   1619     0    stevel 			ADDAUX(*a,
   1620     0    stevel 			    AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
   1621     0    stevel 			ADDAUX(*a,
   1622  2712   nn35248 			    AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
   1623  2712   nn35248 			if (args->brandname != NULL)
   1624  2712   nn35248 				ADDAUX(*a, AT_SUN_BRANDNAME,
   1625  2712   nn35248 				    (int)(uintptr_t)&ustrp[*--offp])
   1626  2712   nn35248 			if (args->emulator != NULL)
   1627  2712   nn35248 				ADDAUX(*a, AT_SUN_EMULATOR,
   1628  2712   nn35248 				    (int)(uintptr_t)&ustrp[*--offp])
   1629     0    stevel 		}
   1630     0    stevel 	}
   1631     0    stevel 
   1632     0    stevel 	return (0);
   1633     0    stevel }
   1634     0    stevel 
   1635     0    stevel /*
   1636     0    stevel  * Initialize a new user stack with the specified arguments and environment.
   1637     0    stevel  * The initial user stack layout is as follows:
   1638     0    stevel  *
   1639     0    stevel  *	User Stack
   1640     0    stevel  *	+---------------+ <--- curproc->p_usrstack
   1641  3177   dp78419  *	|		|
   1642  3177   dp78419  *	| slew		|
   1643  3177   dp78419  *	|		|
   1644  3177   dp78419  *	+---------------+
   1645     0    stevel  *	| NULL		|
   1646     0    stevel  *	+---------------+
   1647     0    stevel  *	|		|
   1648     0    stevel  *	| auxv strings	|
   1649     0    stevel  *	|		|
   1650     0    stevel  *	+---------------+
   1651     0    stevel  *	|		|
   1652     0    stevel  *	| envp strings	|
   1653     0    stevel  *	|		|
   1654     0    stevel  *	+---------------+
   1655     0    stevel  *	|		|
   1656     0    stevel  *	| argv strings	|
   1657     0    stevel  *	|		|
   1658     0    stevel  *	+---------------+ <--- ustrp
   1659     0    stevel  *	|		|
   1660     0    stevel  *	| aux vector	|
   1661     0    stevel  *	|		|
   1662     0    stevel  *	+---------------+ <--- auxv
   1663     0    stevel  *	| NULL		|
   1664     0    stevel  *	+---------------+
   1665     0    stevel  *	| envp[envc-1]	|
   1666     0    stevel  *	+---------------+
   1667     0    stevel  *	| ...		|
   1668     0    stevel  *	+---------------+
   1669     0    stevel  *	| envp[0]	|
   1670     0    stevel  *	+---------------+ <--- envp[]
   1671     0    stevel  *	| NULL		|
   1672     0    stevel  *	+---------------+
   1673     0    stevel  *	| argv[argc-1]	|
   1674     0    stevel  *	+---------------+
   1675     0    stevel  *	| ...		|
   1676     0    stevel  *	+---------------+
   1677     0    stevel  *	| argv[0]	|
   1678     0    stevel  *	+---------------+ <--- argv[]
   1679     0    stevel  *	| argc		|
   1680     0    stevel  *	+---------------+ <--- stack base
   1681     0    stevel  */
   1682     0    stevel int
   1683     0    stevel exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
   1684     0    stevel {
   1685     0    stevel 	size_t size;
   1686     0    stevel 	int error;
   1687     0    stevel 	proc_t *p = ttoproc(curthread);
   1688     0    stevel 	user_t *up = PTOU(p);
   1689     0    stevel 	char *usrstack;
   1690     0    stevel 	rctl_entity_p_t e;
   1691     0    stevel 	struct as *as;
   1692  2991    susans 	extern int use_stk_lpg;
   1693  3177   dp78419 	size_t sp_slew;
   1694     0    stevel 
   1695     0    stevel 	args->from_model = p->p_model;
   1696     0    stevel 	if (p->p_model == DATAMODEL_NATIVE) {
   1697     0    stevel 		args->from_ptrsize = sizeof (long);
   1698     0    stevel 	} else {
   1699     0    stevel 		args->from_ptrsize = sizeof (int32_t);
   1700     0    stevel 	}
   1701     0    stevel 
   1702     0    stevel 	if (args->to_model == DATAMODEL_NATIVE) {
   1703     0    stevel 		args->to_ptrsize = sizeof (long);
   1704     0    stevel 		args->ncargs = NCARGS;
   1705     0    stevel 		args->stk_align = STACK_ALIGN;
   1706  7838     Roger 		if (args->addr32)
   1707  7838     Roger 			usrstack = (char *)USRSTACK64_32;
   1708  7838     Roger 		else
   1709  7838     Roger 			usrstack = (char *)USRSTACK;
   1710     0    stevel 	} else {
   1711     0    stevel 		args->to_ptrsize = sizeof (int32_t);
   1712     0    stevel 		args->ncargs = NCARGS32;
   1713     0    stevel 		args->stk_align = STACK_ALIGN32;
   1714     0    stevel 		usrstack = (char *)USRSTACK32;
   1715     0    stevel 	}
   1716     0    stevel 
   1717     0    stevel 	ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0);
   1718     0    stevel 
   1719     0    stevel #if defined(__sparc)
   1720     0    stevel 	/*
   1721     0    stevel 	 * Make sure user register windows are empty before
   1722     0    stevel 	 * attempting to make a new stack.
   1723     0    stevel 	 */
   1724     0    stevel 	(void) flush_user_windows_to_stack(NULL);
   1725     0    stevel #endif
   1726     0    stevel 
   1727     0    stevel 	for (size = PAGESIZE; ; size *= 2) {
   1728     0    stevel 		args->stk_size = size;
   1729     0    stevel 		args->stk_base = kmem_alloc(size, KM_SLEEP);
   1730     0    stevel 		args->stk_strp = args->stk_base;
   1731     0    stevel 		args->stk_offp = (int *)(args->stk_base + size);
   1732     0    stevel 		error = stk_copyin(uap, args, intp, auxvpp);
   1733     0    stevel 		if (error == 0)
   1734     0    stevel 			break;
   1735     0    stevel 		kmem_free(args->stk_base, size);
   1736     0    stevel 		if (error != E2BIG && error != ENAMETOOLONG)
   1737     0    stevel 			return (error);
   1738     0    stevel 		if (size >= args->ncargs)
   1739     0    stevel 			return (E2BIG);
   1740     0    stevel 	}
   1741     0    stevel 
   1742     0    stevel 	size = args->usrstack_size;
   1743     0    stevel 
   1744     0    stevel 	ASSERT(error == 0);
   1745     0    stevel 	ASSERT(P2PHASE(size, args->stk_align) == 0);
   1746     0    stevel 	ASSERT((ssize_t)STK_AVAIL(args) >= 0);
   1747     0    stevel 
   1748     0    stevel 	if (size > args->ncargs) {
   1749     0    stevel 		kmem_free(args->stk_base, args->stk_size);
   1750     0    stevel 		return (E2BIG);
   1751     0    stevel 	}
   1752     0    stevel 
   1753     0    stevel 	/*
   1754     0    stevel 	 * Leave only the current lwp and force the other lwps to exit.
   1755     0    stevel 	 * If another lwp beat us to the punch by calling exit(), bail out.
   1756     0    stevel 	 */
   1757     0    stevel 	if ((error = exitlwps(0)) != 0) {
   1758     0    stevel 		kmem_free(args->stk_base, args->stk_size);
   1759     0    stevel 		return (error);
   1760     0    stevel 	}
   1761     0    stevel 
   1762     0    stevel 	/*
   1763     0    stevel 	 * Revoke any doors created by the process.
   1764     0    stevel 	 */
   1765     0    stevel 	if (p->p_door_list)
   1766     0    stevel 		door_exit();
   1767     0    stevel 
   1768     0    stevel 	/*
   1769     0    stevel 	 * Release schedctl data structures.
   1770     0    stevel 	 */
   1771     0    stevel 	if (p->p_pagep)
   1772     0    stevel 		schedctl_proc_cleanup();
   1773     0    stevel 
   1774     0    stevel 	/*
   1775     0    stevel 	 * Clean up any DTrace helpers for the process.
   1776     0    stevel 	 */
   1777     0    stevel 	if (p->p_dtrace_helpers != NULL) {
   1778     0    stevel 		ASSERT(dtrace_helpers_cleanup != NULL);
   1779     0    stevel 		(*dtrace_helpers_cleanup)();
   1780     0    stevel 	}
   1781     0    stevel 
   1782     0    stevel 	mutex_enter(&p->p_lock);
   1783     0    stevel 	/*
   1784     0    stevel 	 * Cleanup the DTrace provider associated with this process.
   1785     0    stevel 	 */
   1786     0    stevel 	if (p->p_dtrace_probes) {
   1787     0    stevel 		ASSERT(dtrace_fasttrap_exec_ptr != NULL);
   1788     0    stevel 		dtrace_fasttrap_exec_ptr(p);
   1789     0    stevel 	}
   1790     0    stevel 	mutex_exit(&p->p_lock);
   1791     0    stevel 
   1792     0    stevel 	/*
   1793     0    stevel 	 * discard the lwpchan cache.
   1794     0    stevel 	 */
   1795     0    stevel 	if (p->p_lcp != NULL)
   1796     0    stevel 		lwpchan_destroy_cache(1);
   1797     0    stevel 
   1798     0    stevel 	/*
   1799     0    stevel 	 * Delete the POSIX timers.
   1800     0    stevel 	 */
   1801     0    stevel 	if (p->p_itimer != NULL)
   1802     0    stevel 		timer_exit();
   1803     0    stevel 
   1804  9870     Roger 	/*
   1805  9870     Roger 	 * Delete the ITIMER_REALPROF interval timer.
   1806  9870     Roger 	 * The other ITIMER_* interval timers are specified
   1807  9870     Roger 	 * to be inherited across exec().
   1808  9870     Roger 	 */
   1809  9870     Roger 	delete_itimer_realprof();
   1810  9870     Roger 
   1811     0    stevel 	if (audit_active)
   1812     0    stevel 		audit_exec(args->stk_base, args->stk_base + args->arglen,
   1813     0    stevel 		    args->na - args->ne, args->ne);
   1814     0    stevel 
   1815     0    stevel 	/*
   1816     0    stevel 	 * Ensure that we don't change resource associations while we
   1817     0    stevel 	 * change address spaces.
   1818     0    stevel 	 */
   1819     0    stevel 	mutex_enter(&p->p_lock);
   1820     0    stevel 	pool_barrier_enter();
   1821     0    stevel 	mutex_exit(&p->p_lock);
   1822     0    stevel 
   1823     0    stevel 	/*
   1824     0    stevel 	 * Destroy the old address space and create a new one.
   1825     0    stevel 	 * From here on, any errors are fatal to the exec()ing process.
   1826     0    stevel 	 * On error we return -1, which means the caller must SIGKILL
   1827     0    stevel 	 * the process.
   1828     0    stevel 	 */
   1829     0    stevel 	relvm();
   1830     0    stevel 
   1831     0    stevel 	mutex_enter(&p->p_lock);
   1832     0    stevel 	pool_barrier_exit();
   1833     0    stevel 	mutex_exit(&p->p_lock);
   1834     0    stevel 
   1835     0    stevel 	up->u_execsw = args->execswp;
   1836     0    stevel 
   1837     0    stevel 	p->p_brkbase = NULL;
   1838     0    stevel 	p->p_brksize = 0;
   1839  2991    susans 	p->p_brkpageszc = 0;
   1840     0    stevel 	p->p_stksize = 0;
   1841  2991    susans 	p->p_stkpageszc = 0;
   1842     0    stevel 	p->p_model = args->to_model;
   1843     0    stevel 	p->p_usrstack = usrstack;
   1844     0    stevel 	p->p_stkprot = args->stk_prot;
   1845     0    stevel 	p->p_datprot = args->dat_prot;
   1846     0    stevel 
   1847     0    stevel 	/*
   1848     0    stevel 	 * Reset resource controls such that all controls are again active as
   1849     0    stevel 	 * well as appropriate to the potentially new address model for the
   1850     0    stevel 	 * process.
   1851     0    stevel 	 */
   1852     0    stevel 	e.rcep_p.proc = p;
   1853     0    stevel 	e.rcep_t = RCENTITY_PROCESS;
   1854     0    stevel 	rctl_set_reset(p->p_rctls, p, &e);
   1855     0    stevel 
   1856  2991    susans 	/* Too early to call map_pgsz for the heap */
   1857  2991    susans 	if (use_stk_lpg) {
   1858  2991    susans 		p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
   1859  2991    susans 	}
   1860     0    stevel 
   1861  2991    susans 	mutex_enter(&p->p_lock);
   1862  2991    susans 	p->p_flag |= SAUTOLPG;	/* kernel controls page sizes */
   1863  2991    susans 	mutex_exit(&p->p_lock);
   1864     0    stevel 
   1865  3177   dp78419 	/*
   1866  3177   dp78419 	 * Some platforms may choose to randomize real stack start by adding a
   1867  3177   dp78419 	 * small slew (not more than a few hundred bytes) to the top of the
   1868  3177   dp78419 	 * stack. This helps avoid cache thrashing when identical processes
   1869  3177   dp78419 	 * simultaneously share caches that don't provide enough associativity
   1870  3177   dp78419 	 * (e.g. sun4v systems). In this case stack slewing makes the same hot
   1871  3177   dp78419 	 * stack variables in different processes to live in different cache
   1872  3177   dp78419 	 * sets increasing effective associativity.
   1873  3177   dp78419 	 */
   1874  3177   dp78419 	sp_slew = exec_get_spslew();
   1875  3177   dp78419 	ASSERT(P2PHASE(sp_slew, args->stk_align) == 0);
   1876  3177   dp78419 	exec_set_sp(size + sp_slew);
   1877     0    stevel 
   1878     0    stevel 	as = as_alloc();
   1879     0    stevel 	p->p_as = as;
   1880  2768  sl108498 	as->a_proc = p;
   1881  7838     Roger 	if (p->p_model == DATAMODEL_ILP32 || args->addr32)
   1882     0    stevel 		as->a_userlimit = (caddr_t)USERLIMIT32;
   1883     0    stevel 	(void) hat_setup(as->a_hat, HAT_ALLOC);
   1884  4528   paulsan 	hat_join_srd(as->a_hat, args->ex_vp);
   1885     0    stevel 
   1886     0    stevel 	/*
   1887     0    stevel 	 * Finally, write out the contents of the new stack.
   1888     0    stevel 	 */
   1889  3177   dp78419 	error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
   1890     0    stevel 	kmem_free(args->stk_base, args->stk_size);
   1891     0    stevel 	return (error);
   1892     0    stevel }
   1893