OpenGrok

Cross Reference: prsubr.c
xref: /onnv/onnv-gate/usr/src/uts/common/fs/proc/prsubr.c
Home | History | Annotate | Line # | Download | only in proc
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
     24  */
     25 
     26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 #include <sys/types.h>
     30 #include <sys/t_lock.h>
     31 #include <sys/param.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/cred.h>
     34 #include <sys/priv.h>
     35 #include <sys/debug.h>
     36 #include <sys/errno.h>
     37 #include <sys/inline.h>
     38 #include <sys/kmem.h>
     39 #include <sys/mman.h>
     40 #include <sys/proc.h>
     41 #include <sys/brand.h>
     42 #include <sys/sobject.h>
     43 #include <sys/sysmacros.h>
     44 #include <sys/systm.h>
     45 #include <sys/uio.h>
     46 #include <sys/var.h>
     47 #include <sys/vfs.h>
     48 #include <sys/vnode.h>
     49 #include <sys/session.h>
     50 #include <sys/pcb.h>
     51 #include <sys/signal.h>
     52 #include <sys/user.h>
     53 #include <sys/disp.h>
     54 #include <sys/class.h>
     55 #include <sys/ts.h>
     56 #include <sys/bitmap.h>
     57 #include <sys/poll.h>
     58 #include <sys/shm_impl.h>
     59 #include <sys/fault.h>
     60 #include <sys/syscall.h>
     61 #include <sys/procfs.h>
     62 #include <sys/processor.h>
     63 #include <sys/cpuvar.h>
     64 #include <sys/copyops.h>
     65 #include <sys/time.h>
     66 #include <sys/msacct.h>
     67 #include <vm/as.h>
     68 #include <vm/rm.h>
     69 #include <vm/seg.h>
     70 #include <vm/seg_vn.h>
     71 #include <vm/seg_dev.h>
     72 #include <vm/seg_spt.h>
     73 #include <vm/page.h>
     74 #include <sys/vmparam.h>
     75 #include <sys/swap.h>
     76 #include <fs/proc/prdata.h>
     77 #include <sys/task.h>
     78 #include <sys/project.h>
     79 #include <sys/contract_impl.h>
     80 #include <sys/contract/process.h>
     81 #include <sys/contract/process_impl.h>
     82 #include <sys/schedctl.h>
     83 #include <sys/pool.h>
     84 #include <sys/zone.h>
     85 #include <sys/atomic.h>
     86 #include <sys/sdt.h>
     87 
     88 #define	MAX_ITERS_SPIN	5
     89 
     90 typedef struct prpagev {
     91 	uint_t *pg_protv;	/* vector of page permissions */
     92 	char *pg_incore;	/* vector of incore flags */
     93 	size_t pg_npages;	/* number of pages in protv and incore */
     94 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
     95 } prpagev_t;
     96 
     97 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
     98 
     99 extern struct seg_ops segdev_ops;	/* needs a header file */
    100 extern struct seg_ops segspt_shmops;	/* needs a header file */
    101 
    102 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
    103 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
    104 
    105 /*
    106  * Choose an lwp from the complete set of lwps for the process.
    107  * This is called for any operation applied to the process
    108  * file descriptor that requires an lwp to operate upon.
    109  *
    110  * Returns a pointer to the thread for the selected LWP,
    111  * and with the dispatcher lock held for the thread.
    112  *
    113  * The algorithm for choosing an lwp is critical for /proc semantics;
    114  * don't touch this code unless you know all of the implications.
    115  */
    116 kthread_t *
    117 prchoose(proc_t *p)
    118 {
    119 	kthread_t *t;
    120 	kthread_t *t_onproc = NULL;	/* running on processor */
    121 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
    122 	kthread_t *t_sleep = NULL;	/* sleeping */
    123 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
    124 	kthread_t *t_susp = NULL;	/* suspended stop */
    125 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
    126 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
    127 	kthread_t *t_req = NULL;	/* requested stop */
    128 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
    129 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
    130 
    131 	ASSERT(MUTEX_HELD(&p->p_lock));
    132 
    133 	/*
    134 	 * If the agent lwp exists, it takes precedence over all others.
    135 	 */
    136 	if ((t = p->p_agenttp) != NULL) {
    137 		thread_lock(t);
    138 		return (t);
    139 	}
    140 
    141 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
    142 		return (t);
    143 	do {		/* for eacn lwp in the process */
    144 		if (VSTOPPED(t)) {	/* virtually stopped */
    145 			if (t_req == NULL)
    146 				t_req = t;
    147 			continue;
    148 		}
    149 
    150 		thread_lock(t);		/* make sure thread is in good state */
    151 		switch (t->t_state) {
    152 		default:
    153 			panic("prchoose: bad thread state %d, thread 0x%p",
    154 			    t->t_state, (void *)t);
    155 			/*NOTREACHED*/
    156 		case TS_SLEEP:
    157 			/* this is filthy */
    158 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
    159 			    t->t_wchan0 == NULL) {
    160 				if (t_hold == NULL)
    161 					t_hold = t;
    162 			} else {
    163 				if (t_sleep == NULL)
    164 					t_sleep = t;
    165 			}
    166 			break;
    167 		case TS_RUN:
    168 		case TS_WAIT:
    169 			if (t_run == NULL)
    170 				t_run = t;
    171 			break;
    172 		case TS_ONPROC:
    173 			if (t_onproc == NULL)
    174 				t_onproc = t;
    175 			break;
    176 		case TS_ZOMB:		/* last possible choice */
    177 			break;
    178 		case TS_STOPPED:
    179 			switch (t->t_whystop) {
    180 			case PR_SUSPENDED:
    181 				if (t_susp == NULL)
    182 					t_susp = t;
    183 				break;
    184 			case PR_JOBCONTROL:
    185 				if (t->t_proc_flag & TP_PRSTOP) {
    186 					if (t_jdstop == NULL)
    187 						t_jdstop = t;
    188 				} else {
    189 					if (t_jstop == NULL)
    190 						t_jstop = t;
    191 				}
    192 				break;
    193 			case PR_REQUESTED:
    194 				if (t->t_dtrace_stop && t_dtrace == NULL)
    195 					t_dtrace = t;
    196 				else if (t_req == NULL)
    197 					t_req = t;
    198 				break;
    199 			case PR_SYSENTRY:
    200 			case PR_SYSEXIT:
    201 			case PR_SIGNALLED:
    202 			case PR_FAULTED:
    203 				/*
    204 				 * Make an lwp calling exit() be the
    205 				 * last lwp seen in the process.
    206 				 */
    207 				if (t_istop == NULL ||
    208 				    (t_istop->t_whystop == PR_SYSENTRY &&
    209 				    t_istop->t_whatstop == SYS_exit))
    210 					t_istop = t;
    211 				break;
    212 			case PR_CHECKPOINT:	/* can't happen? */
    213 				break;
    214 			default:
    215 				panic("prchoose: bad t_whystop %d, thread 0x%p",
    216 				    t->t_whystop, (void *)t);
    217 				/*NOTREACHED*/
    218 			}
    219 			break;
    220 		}
    221 		thread_unlock(t);
    222 	} while ((t = t->t_forw) != p->p_tlist);
    223 
    224 	if (t_onproc)
    225 		t = t_onproc;
    226 	else if (t_run)
    227 		t = t_run;
    228 	else if (t_sleep)
    229 		t = t_sleep;
    230 	else if (t_jstop)
    231 		t = t_jstop;
    232 	else if (t_jdstop)
    233 		t = t_jdstop;
    234 	else if (t_istop)
    235 		t = t_istop;
    236 	else if (t_dtrace)
    237 		t = t_dtrace;
    238 	else if (t_req)
    239 		t = t_req;
    240 	else if (t_hold)
    241 		t = t_hold;
    242 	else if (t_susp)
    243 		t = t_susp;
    244 	else			/* TS_ZOMB */
    245 		t = p->p_tlist;
    246 
    247 	if (t != NULL)
    248 		thread_lock(t);
    249 	return (t);
    250 }
    251 
    252 /*
    253  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
    254  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
    255  * on the /proc file descriptor.  Called from stop() when a traced
    256  * process stops on an event of interest.  Also called from exit()
    257  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
    258  */
    259 void
    260 prnotify(struct vnode *vp)
    261 {
    262 	prcommon_t *pcp = VTOP(vp)->pr_common;
    263 
    264 	mutex_enter(&pcp->prc_mutex);
    265 	cv_broadcast(&pcp->prc_wait);
    266 	mutex_exit(&pcp->prc_mutex);
    267 	if (pcp->prc_flags & PRC_POLL) {
    268 		/*
    269 		 * We call pollwakeup() with POLLHUP to ensure that
    270 		 * the pollers are awakened even if they are polling
    271 		 * for nothing (i.e., waiting for the process to exit).
    272 		 * This enables the use of the PRC_POLL flag for optimization
    273 		 * (we can turn off PRC_POLL only if we know no pollers remain).
    274 		 */
    275 		pcp->prc_flags &= ~PRC_POLL;
    276 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
    277 	}
    278 }
    279 
    280 /* called immediately below, in prfree() */
    281 static void
    282 prfreenotify(vnode_t *vp)
    283 {
    284 	prnode_t *pnp;
    285 	prcommon_t *pcp;
    286 
    287 	while (vp != NULL) {
    288 		pnp = VTOP(vp);
    289 		pcp = pnp->pr_common;
    290 		ASSERT(pcp->prc_thread == NULL);
    291 		pcp->prc_proc = NULL;
    292 		/*
    293 		 * We can't call prnotify() here because we are holding
    294 		 * pidlock.  We assert that there is no need to.
    295 		 */
    296 		mutex_enter(&pcp->prc_mutex);
    297 		cv_broadcast(&pcp->prc_wait);
    298 		mutex_exit(&pcp->prc_mutex);
    299 		ASSERT(!(pcp->prc_flags & PRC_POLL));
    300 
    301 		vp = pnp->pr_next;
    302 		pnp->pr_next = NULL;
    303 	}
    304 }
    305 
    306 /*
    307  * Called from a hook in freeproc() when a traced process is removed
    308  * from the process table.  The proc-table pointers of all associated
    309  * /proc vnodes are cleared to indicate that the process has gone away.
    310  */
    311 void
    312 prfree(proc_t *p)
    313 {
    314 	uint_t slot = p->p_slot;
    315 
    316 	ASSERT(MUTEX_HELD(&pidlock));
    317 
    318 	/*
    319 	 * Block the process against /proc so it can be freed.
    320 	 * It cannot be freed while locked by some controlling process.
    321 	 * Lock ordering:
    322 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
    323 	 */
    324 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
    325 	mutex_enter(&p->p_lock);
    326 	while (p->p_proc_flag & P_PR_LOCK) {
    327 		mutex_exit(&pr_pidlock);
    328 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
    329 		mutex_exit(&p->p_lock);
    330 		mutex_enter(&pr_pidlock);
    331 		mutex_enter(&p->p_lock);
    332 	}
    333 
    334 	ASSERT(p->p_tlist == NULL);
    335 
    336 	prfreenotify(p->p_plist);
    337 	p->p_plist = NULL;
    338 
    339 	prfreenotify(p->p_trace);
    340 	p->p_trace = NULL;
    341 
    342 	/*
    343 	 * We broadcast to wake up everyone waiting for this process.
    344 	 * No one can reach this process from this point on.
    345 	 */
    346 	cv_broadcast(&pr_pid_cv[slot]);
    347 
    348 	mutex_exit(&p->p_lock);
    349 	mutex_exit(&pr_pidlock);
    350 }
    351 
    352 /*
    353  * Called from a hook in exit() when a traced process is becoming a zombie.
    354  */
    355 void
    356 prexit(proc_t *p)
    357 {
    358 	ASSERT(MUTEX_HELD(&p->p_lock));
    359 
    360 	if (pr_watch_active(p)) {
    361 		pr_free_watchpoints(p);
    362 		watch_disable(curthread);
    363 	}
    364 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
    365 	if (p->p_trace) {
    366 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
    367 		prnotify(p->p_trace);
    368 	}
    369 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
    370 }
    371 
    372 /*
    373  * Called when a thread calls lwp_exit().
    374  */
    375 void
    376 prlwpexit(kthread_t *t)
    377 {
    378 	vnode_t *vp;
    379 	prnode_t *pnp;
    380 	prcommon_t *pcp;
    381 	proc_t *p = ttoproc(t);
    382 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
    383 
    384 	ASSERT(t == curthread);
    385 	ASSERT(MUTEX_HELD(&p->p_lock));
    386 
    387 	/*
    388 	 * The process must be blocked against /proc to do this safely.
    389 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
    390 	 * It is the caller's responsibility to have called prbarrier(p).
    391 	 */
    392 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
    393 
    394 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
    395 		pnp = VTOP(vp);
    396 		pcp = pnp->pr_common;
    397 		if (pcp->prc_thread == t) {
    398 			pcp->prc_thread = NULL;
    399 			pcp->prc_flags |= PRC_DESTROY;
    400 		}
    401 	}
    402 
    403 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
    404 		pnp = VTOP(vp);
    405 		pcp = pnp->pr_common;
    406 		pcp->prc_thread = NULL;
    407 		pcp->prc_flags |= PRC_DESTROY;
    408 		prnotify(vp);
    409 	}
    410 
    411 	if (p->p_trace)
    412 		prnotify(p->p_trace);
    413 }
    414 
    415 /*
    416  * Called when a zombie thread is joined or when a
    417  * detached lwp exits.  Called from lwp_hash_out().
    418  */
    419 void
    420 prlwpfree(proc_t *p, lwpent_t *lep)
    421 {
    422 	vnode_t *vp;
    423 	prnode_t *pnp;
    424 	prcommon_t *pcp;
    425 
    426 	ASSERT(MUTEX_HELD(&p->p_lock));
    427 
    428 	/*
    429 	 * The process must be blocked against /proc to do this safely.
    430 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
    431 	 * It is the caller's responsibility to have called prbarrier(p).
    432 	 */
    433 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
    434 
    435 	vp = lep->le_trace;
    436 	lep->le_trace = NULL;
    437 	while (vp) {
    438 		prnotify(vp);
    439 		pnp = VTOP(vp);
    440 		pcp = pnp->pr_common;
    441 		ASSERT(pcp->prc_thread == NULL &&
    442 		    (pcp->prc_flags & PRC_DESTROY));
    443 		pcp->prc_tslot = -1;
    444 		vp = pnp->pr_next;
    445 		pnp->pr_next = NULL;
    446 	}
    447 
    448 	if (p->p_trace)
    449 		prnotify(p->p_trace);
    450 }
    451 
    452 /*
    453  * Called from a hook in exec() when a thread starts exec().
    454  */
    455 void
    456 prexecstart(void)
    457 {
    458 	proc_t *p = ttoproc(curthread);
    459 	klwp_t *lwp = ttolwp(curthread);
    460 
    461 	/*
    462 	 * The P_PR_EXEC flag blocks /proc operations for
    463 	 * the duration of the exec().
    464 	 * We can't start exec() while the process is
    465 	 * locked by /proc, so we call prbarrier().
    466 	 * lwp_nostop keeps the process from being stopped
    467 	 * via job control for the duration of the exec().
    468 	 */
    469 
    470 	ASSERT(MUTEX_HELD(&p->p_lock));
    471 	prbarrier(p);
    472 	lwp->lwp_nostop++;
    473 	p->p_proc_flag |= P_PR_EXEC;
    474 }
    475 
    476 /*
    477  * Called from a hook in exec() when a thread finishes exec().
    478  * The thread may or may not have succeeded.  Some other thread
    479  * may have beat it to the punch.
    480  */
    481 void
    482 prexecend(void)
    483 {
    484 	proc_t *p = ttoproc(curthread);
    485 	klwp_t *lwp = ttolwp(curthread);
    486 	vnode_t *vp;
    487 	prnode_t *pnp;
    488 	prcommon_t *pcp;
    489 	model_t model = p->p_model;
    490 	id_t tid = curthread->t_tid;
    491 	int tslot = curthread->t_dslot;
    492 
    493 	ASSERT(MUTEX_HELD(&p->p_lock));
    494 
    495 	lwp->lwp_nostop--;
    496 	if (p->p_flag & SEXITLWPS) {
    497 		/*
    498 		 * We are on our way to exiting because some
    499 		 * other thread beat us in the race to exec().
    500 		 * Don't clear the P_PR_EXEC flag in this case.
    501 		 */
    502 		return;
    503 	}
    504 
    505 	/*
    506 	 * Wake up anyone waiting in /proc for the process to complete exec().
    507 	 */
    508 	p->p_proc_flag &= ~P_PR_EXEC;
    509 	if ((vp = p->p_trace) != NULL) {
    510 		pcp = VTOP(vp)->pr_common;
    511 		mutex_enter(&pcp->prc_mutex);
    512 		cv_broadcast(&pcp->prc_wait);
    513 		mutex_exit(&pcp->prc_mutex);
    514 		for (; vp != NULL; vp = pnp->pr_next) {
    515 			pnp = VTOP(vp);
    516 			pnp->pr_common->prc_datamodel = model;
    517 		}
    518 	}
    519 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
    520 		/*
    521 		 * We dealt with the process common above.
    522 		 */
    523 		ASSERT(p->p_trace != NULL);
    524 		pcp = VTOP(vp)->pr_common;
    525 		mutex_enter(&pcp->prc_mutex);
    526 		cv_broadcast(&pcp->prc_wait);
    527 		mutex_exit(&pcp->prc_mutex);
    528 		for (; vp != NULL; vp = pnp->pr_next) {
    529 			pnp = VTOP(vp);
    530 			pcp = pnp->pr_common;
    531 			pcp->prc_datamodel = model;
    532 			pcp->prc_tid = tid;
    533 			pcp->prc_tslot = tslot;
    534 		}
    535 	}
    536 }
    537 
    538 /*
    539  * Called from a hook in relvm() just before freeing the address space.
    540  * We free all the watched areas now.
    541  */
    542 void
    543 prrelvm(void)
    544 {
    545 	proc_t *p = ttoproc(curthread);
    546 
    547 	mutex_enter(&p->p_lock);
    548 	prbarrier(p);	/* block all other /proc operations */
    549 	if (pr_watch_active(p)) {
    550 		pr_free_watchpoints(p);
    551 		watch_disable(curthread);
    552 	}
    553 	mutex_exit(&p->p_lock);
    554 	pr_free_watched_pages(p);
    555 }
    556 
    557 /*
    558  * Called from hooks in exec-related code when a traced process
    559  * attempts to exec(2) a setuid/setgid program or an unreadable
    560  * file.  Rather than fail the exec we invalidate the associated
    561  * /proc vnodes so that subsequent attempts to use them will fail.
    562  *
    563  * All /proc vnodes, except directory vnodes, are retained on a linked
    564  * list (rooted at p_plist in the process structure) until last close.
    565  *
    566  * A controlling process must re-open the /proc files in order to
    567  * regain control.
    568  */
    569 void
    570 prinvalidate(struct user *up)
    571 {
    572 	kthread_t *t = curthread;
    573 	proc_t *p = ttoproc(t);
    574 	vnode_t *vp;
    575 	prnode_t *pnp;
    576 	int writers = 0;
    577 
    578 	mutex_enter(&p->p_lock);
    579 	prbarrier(p);	/* block all other /proc operations */
    580 
    581 	/*
    582 	 * At this moment, there can be only one lwp in the process.
    583 	 */
    584 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
    585 
    586 	/*
    587 	 * Invalidate any currently active /proc vnodes.
    588 	 */
    589 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
    590 		pnp = VTOP(vp);
    591 		switch (pnp->pr_type) {
    592 		case PR_PSINFO:		/* these files can read by anyone */
    593 		case PR_LPSINFO:
    594 		case PR_LWPSINFO:
    595 		case PR_LWPDIR:
    596 		case PR_LWPIDDIR:
    597 		case PR_USAGE:
    598 		case PR_LUSAGE:
    599 		case PR_LWPUSAGE:
    600 			break;
    601 		default:
    602 			pnp->pr_flags |= PR_INVAL;
    603 			break;
    604 		}
    605 	}
    606 	/*
    607 	 * Wake up anyone waiting for the process or lwp.
    608 	 * p->p_trace is guaranteed to be non-NULL if there
    609 	 * are any open /proc files for this process.
    610 	 */
    611 	if ((vp = p->p_trace) != NULL) {
    612 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
    613 
    614 		prnotify(vp);
    615 		/*
    616 		 * Are there any writers?
    617 		 */
    618 		if ((writers = pcp->prc_writers) != 0) {
    619 			/*
    620 			 * Clear the exclusive open flag (old /proc interface).
    621 			 * Set prc_selfopens equal to prc_writers so that
    622 			 * the next O_EXCL|O_WRITE open will succeed
    623 			 * even with existing (though invalid) writers.
    624 			 * prclose() must decrement prc_selfopens when
    625 			 * the invalid files are closed.
    626 			 */
    627 			pcp->prc_flags &= ~PRC_EXCL;
    628 			ASSERT(pcp->prc_selfopens <= writers);
    629 			pcp->prc_selfopens = writers;
    630 		}
    631 	}
    632 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
    633 	while (vp != NULL) {
    634 		/*
    635 		 * We should not invalidate the lwpiddir vnodes,
    636 		 * but the necessities of maintaining the old
    637 		 * ioctl()-based version of /proc require it.
    638 		 */
    639 		pnp = VTOP(vp);
    640 		pnp->pr_flags |= PR_INVAL;
    641 		prnotify(vp);
    642 		vp = pnp->pr_next;
    643 	}
    644 
    645 	/*
    646 	 * If any tracing flags are in effect and any vnodes are open for
    647 	 * writing then set the requested-stop and run-on-last-close flags.
    648 	 * Otherwise, clear all tracing flags.
    649 	 */
    650 	t->t_proc_flag &= ~TP_PAUSE;
    651 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
    652 		t->t_proc_flag |= TP_PRSTOP;
    653 		aston(t);		/* so ISSIG will see the flag */
    654 		p->p_proc_flag |= P_PR_RUNLCL;
    655 	} else {
    656 		premptyset(&up->u_entrymask);		/* syscalls */
    657 		premptyset(&up->u_exitmask);
    658 		up->u_systrap = 0;
    659 		premptyset(&p->p_sigmask);		/* signals */
    660 		premptyset(&p->p_fltmask);		/* faults */
    661 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
    662 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
    663 		prnostep(ttolwp(t));
    664 	}
    665 
    666 	mutex_exit(&p->p_lock);
    667 }
    668 
    669 /*
    670  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
    671  * Return with pr_pidlock held in all cases.
    672  * Return with p_lock held if the the process still exists.
    673  * Return value is the process pointer if the process still exists, else NULL.
    674  * If we lock the process, give ourself kernel priority to avoid deadlocks;
    675  * this is undone in prunlock().
    676  */
    677 proc_t *
    678 pr_p_lock(prnode_t *pnp)
    679 {
    680 	proc_t *p;
    681 	prcommon_t *pcp;
    682 
    683 	mutex_enter(&pr_pidlock);
    684 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
    685 		return (NULL);
    686 	mutex_enter(&p->p_lock);
    687 	while (p->p_proc_flag & P_PR_LOCK) {
    688 		/*
    689 		 * This cv/mutex pair is persistent even if
    690 		 * the process disappears while we sleep.
    691 		 */
    692 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
    693 		kmutex_t *mp = &p->p_lock;
    694 
    695 		mutex_exit(&pr_pidlock);
    696 		cv_wait(cv, mp);
    697 		mutex_exit(mp);
    698 		mutex_enter(&pr_pidlock);
    699 		if (pcp->prc_proc == NULL)
    700 			return (NULL);
    701 		ASSERT(p == pcp->prc_proc);
    702 		mutex_enter(&p->p_lock);
    703 	}
    704 	p->p_proc_flag |= P_PR_LOCK;
    705 	THREAD_KPRI_REQUEST();
    706 	return (p);
    707 }
    708 
    709 /*
    710  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
    711  * This prevents any lwp of the process from disappearing and
    712  * blocks most operations that a process can perform on itself.
    713  * Returns 0 on success, a non-zero error number on failure.
    714  *
    715  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
    716  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
    717  *
    718  * error returns:
    719  *	ENOENT: process or lwp has disappeared or process is exiting
    720  *		(or has become a zombie and zdisp == ZNO).
    721  *	EAGAIN: procfs vnode has become invalid.
    722  *	EINTR:  signal arrived while waiting for exec to complete.
    723  */
    724 int
    725 prlock(prnode_t *pnp, int zdisp)
    726 {
    727 	prcommon_t *pcp;
    728 	proc_t *p;
    729 
    730 again:
    731 	pcp = pnp->pr_common;
    732 	p = pr_p_lock(pnp);
    733 	mutex_exit(&pr_pidlock);
    734 
    735 	/*
    736 	 * Return ENOENT immediately if there is no process.
    737 	 */
    738 	if (p == NULL)
    739 		return (ENOENT);
    740 
    741 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
    742 
    743 	/*
    744 	 * Return ENOENT if process entered zombie state or is exiting
    745 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
    746 	 */
    747 	if (zdisp == ZNO &&
    748 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
    749 		prunlock(pnp);
    750 		return (ENOENT);
    751 	}
    752 
    753 	/*
    754 	 * If lwp-specific, check to see if lwp has disappeared.
    755 	 */
    756 	if (pcp->prc_flags & PRC_LWP) {
    757 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
    758 		    pcp->prc_tslot == -1) {
    759 			prunlock(pnp);
    760 			return (ENOENT);
    761 		}
    762 	}
    763 
    764 	/*
    765 	 * Return EAGAIN if we have encountered a security violation.
    766 	 * (The process exec'd a set-id or unreadable executable file.)
    767 	 */
    768 	if (pnp->pr_flags & PR_INVAL) {
    769 		prunlock(pnp);
    770 		return (EAGAIN);
    771 	}
    772 
    773 	/*
    774 	 * If process is undergoing an exec(), wait for
    775 	 * completion and then start all over again.
    776 	 */
    777 	if (p->p_proc_flag & P_PR_EXEC) {
    778 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
    779 		mutex_enter(&pcp->prc_mutex);
    780 		prunlock(pnp);
    781 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
    782 			mutex_exit(&pcp->prc_mutex);
    783 			return (EINTR);
    784 		}
    785 		mutex_exit(&pcp->prc_mutex);
    786 		goto again;
    787 	}
    788 
    789 	/*
    790 	 * We return holding p->p_lock.
    791 	 */
    792 	return (0);
    793 }
    794 
    795 /*
    796  * Undo prlock() and pr_p_lock().
    797  * p->p_lock is still held; pr_pidlock is no longer held.
    798  *
    799  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
    800  * if any, waiting for the flag to be dropped; it retains p->p_lock.
    801  *
    802  * prunlock() calls prunmark() and then drops p->p_lock.
    803  */
    804 void
    805 prunmark(proc_t *p)
    806 {
    807 	ASSERT(p->p_proc_flag & P_PR_LOCK);
    808 	ASSERT(MUTEX_HELD(&p->p_lock));
    809 
    810 	cv_signal(&pr_pid_cv[p->p_slot]);
    811 	p->p_proc_flag &= ~P_PR_LOCK;
    812 	THREAD_KPRI_RELEASE();
    813 }
    814 
    815 void
    816 prunlock(prnode_t *pnp)
    817 {
    818 	prcommon_t *pcp = pnp->pr_common;
    819 	proc_t *p = pcp->prc_proc;
    820 
    821 	/*
    822 	 * If we (or someone) gave it a SIGKILL, and it is not
    823 	 * already a zombie, set it running unconditionally.
    824 	 */
    825 	if ((p->p_flag & SKILLED) &&
    826 	    !(p->p_flag & SEXITING) &&
    827 	    !(pcp->prc_flags & PRC_DESTROY) &&
    828 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
    829 		(void) pr_setrun(pnp, 0);
    830 	prunmark(p);
    831 	mutex_exit(&p->p_lock);
    832 }
    833 
    834 /*
    835  * Called while holding p->p_lock to delay until the process is unlocked.
    836  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
    837  * The process cannot become locked again until p->p_lock is dropped.
    838  */
    839 void
    840 prbarrier(proc_t *p)
    841 {
    842 	ASSERT(MUTEX_HELD(&p->p_lock));
    843 
    844 	if (p->p_proc_flag & P_PR_LOCK) {
    845 		/* The process is locked; delay until not locked */
    846 		uint_t slot = p->p_slot;
    847 
    848 		while (p->p_proc_flag & P_PR_LOCK)
    849 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
    850 		cv_signal(&pr_pid_cv[slot]);
    851 	}
    852 }
    853 
    854 /*
    855  * Return process/lwp status.
    856  * The u-block is mapped in by this routine and unmapped at the end.
    857  */
    858 void
    859 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
    860 {
    861 	kthread_t *t;
    862 
    863 	ASSERT(MUTEX_HELD(&p->p_lock));
    864 
    865 	t = prchoose(p);	/* returns locked thread */
    866 	ASSERT(t != NULL);
    867 	thread_unlock(t);
    868 
    869 	/* just bzero the process part, prgetlwpstatus() does the rest */
    870 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
    871 	sp->pr_nlwp = p->p_lwpcnt;
    872 	sp->pr_nzomb = p->p_zombcnt;
    873 	prassignset(&sp->pr_sigpend, &p->p_sig);
    874 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
    875 	sp->pr_brksize = p->p_brksize;
    876 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
    877 	sp->pr_stksize = p->p_stksize;
    878 	sp->pr_pid = p->p_pid;
    879 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
    880 	    (p->p_flag & SZONETOP)) {
    881 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
    882 		/*
    883 		 * Inside local zones, fake zsched's pid as parent pids for
    884 		 * processes which reference processes outside of the zone.
    885 		 */
    886 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
    887 	} else {
    888 		sp->pr_ppid = p->p_ppid;
    889 	}
    890 	sp->pr_pgid  = p->p_pgrp;
    891 	sp->pr_sid   = p->p_sessp->s_sid;
    892 	sp->pr_taskid = p->p_task->tk_tkid;
    893 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
    894 	sp->pr_zoneid = p->p_zone->zone_id;
    895 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
    896 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
    897 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
    898 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
    899 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
    900 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
    901 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
    902 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
    903 	switch (p->p_model) {
    904 	case DATAMODEL_ILP32:
    905 		sp->pr_dmodel = PR_MODEL_ILP32;
    906 		break;
    907 	case DATAMODEL_LP64:
    908 		sp->pr_dmodel = PR_MODEL_LP64;
    909 		break;
    910 	}
    911 	if (p->p_agenttp)
    912 		sp->pr_agentid = p->p_agenttp->t_tid;
    913 
    914 	/* get the chosen lwp's status */
    915 	prgetlwpstatus(t, &sp->pr_lwp, zp);
    916 
    917 	/* replicate the flags */
    918 	sp->pr_flags = sp->pr_lwp.pr_flags;
    919 }
    920 
    921 #ifdef _SYSCALL32_IMPL
    922 void
    923 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
    924 {
    925 	proc_t *p = ttoproc(t);
    926 	klwp_t *lwp = ttolwp(t);
    927 	struct mstate *ms = &lwp->lwp_mstate;
    928 	hrtime_t usr, sys;
    929 	int flags;
    930 	ulong_t instr;
    931 
    932 	ASSERT(MUTEX_HELD(&p->p_lock));
    933 
    934 	bzero(sp, sizeof (*sp));
    935 	flags = 0L;
    936 	if (t->t_state == TS_STOPPED) {
    937 		flags |= PR_STOPPED;
    938 		if ((t->t_schedflag & TS_PSTART) == 0)
    939 			flags |= PR_ISTOP;
    940 	} else if (VSTOPPED(t)) {
    941 		flags |= PR_STOPPED|PR_ISTOP;
    942 	}
    943 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
    944 		flags |= PR_DSTOP;
    945 	if (lwp->lwp_asleep)
    946 		flags |= PR_ASLEEP;
    947 	if (t == p->p_agenttp)
    948 		flags |= PR_AGENT;
    949 	if (!(t->t_proc_flag & TP_TWAIT))
    950 		flags |= PR_DETACH;
    951 	if (t->t_proc_flag & TP_DAEMON)
    952 		flags |= PR_DAEMON;
    953 	if (p->p_proc_flag & P_PR_FORK)
    954 		flags |= PR_FORK;
    955 	if (p->p_proc_flag & P_PR_RUNLCL)
    956 		flags |= PR_RLC;
    957 	if (p->p_proc_flag & P_PR_KILLCL)
    958 		flags |= PR_KLC;
    959 	if (p->p_proc_flag & P_PR_ASYNC)
    960 		flags |= PR_ASYNC;
    961 	if (p->p_proc_flag & P_PR_BPTADJ)
    962 		flags |= PR_BPTADJ;
    963 	if (p->p_proc_flag & P_PR_PTRACE)
    964 		flags |= PR_PTRACE;
    965 	if (p->p_flag & SMSACCT)
    966 		flags |= PR_MSACCT;
    967 	if (p->p_flag & SMSFORK)
    968 		flags |= PR_MSFORK;
    969 	if (p->p_flag & SVFWAIT)
    970 		flags |= PR_VFORKP;
    971 	sp->pr_flags = flags;
    972 	if (VSTOPPED(t)) {
    973 		sp->pr_why   = PR_REQUESTED;
    974 		sp->pr_what  = 0;
    975 	} else {
    976 		sp->pr_why   = t->t_whystop;
    977 		sp->pr_what  = t->t_whatstop;
    978 	}
    979 	sp->pr_lwpid = t->t_tid;
    980 	sp->pr_cursig  = lwp->lwp_cursig;
    981 	prassignset(&sp->pr_lwppend, &t->t_sig);
    982 	schedctl_finish_sigblock(t);
    983 	prassignset(&sp->pr_lwphold, &t->t_hold);
    984 	if (t->t_whystop == PR_FAULTED) {
    985 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
    986 		if (t->t_whatstop == FLTPAGE)
    987 			sp->pr_info.si_addr =
    988 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
    989 	} else if (lwp->lwp_curinfo)
    990 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
    991 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
    992 	    sp->pr_info.si_zoneid != zp->zone_id) {
    993 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
    994 		sp->pr_info.si_uid = 0;
    995 		sp->pr_info.si_ctid = -1;
    996 		sp->pr_info.si_zoneid = zp->zone_id;
    997 	}
    998 	sp->pr_altstack.ss_sp =
    999 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
   1000 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
   1001 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
   1002 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
   1003 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
   1004 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
   1005 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
   1006 	    sizeof (sp->pr_clname) - 1);
   1007 	if (flags & PR_STOPPED)
   1008 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
   1009 	usr = ms->ms_acct[LMS_USER];
   1010 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
   1011 	scalehrtime(&usr);
   1012 	scalehrtime(&sys);
   1013 	hrt2ts32(usr, &sp->pr_utime);
   1014 	hrt2ts32(sys, &sp->pr_stime);
   1015 
   1016 	/*
   1017 	 * Fetch the current instruction, if not a system process.
   1018 	 * We don't attempt this unless the lwp is stopped.
   1019 	 */
   1020 	if ((p->p_flag & SSYS) || p->p_as == &kas)
   1021 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
   1022 	else if (!(flags & PR_STOPPED))
   1023 		sp->pr_flags |= PR_PCINVAL;
   1024 	else if (!prfetchinstr(lwp, &instr))
   1025 		sp->pr_flags |= PR_PCINVAL;
   1026 	else
   1027 		sp->pr_instr = (uint32_t)instr;
   1028 
   1029 	/*
   1030 	 * Drop p_lock while touching the lwp's stack.
   1031 	 */
   1032 	mutex_exit(&p->p_lock);
   1033 	if (prisstep(lwp))
   1034 		sp->pr_flags |= PR_STEP;
   1035 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
   1036 		int i;
   1037 
   1038 		sp->pr_syscall = get_syscall32_args(lwp,
   1039 		    (int *)sp->pr_sysarg, &i);
   1040 		sp->pr_nsysarg = (ushort_t)i;
   1041 	}
   1042 	if ((flags & PR_STOPPED) || t == curthread)
   1043 		prgetprregs32(lwp, sp->pr_reg);
   1044 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
   1045 	    (flags & PR_VFORKP)) {
   1046 		long r1, r2;
   1047 		user_t *up;
   1048 		auxv_t *auxp;
   1049 		int i;
   1050 
   1051 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
   1052 		if (sp->pr_errno == 0) {
   1053 			sp->pr_rval1 = (int32_t)r1;
   1054 			sp->pr_rval2 = (int32_t)r2;
   1055 			sp->pr_errpriv = PRIV_NONE;
   1056 		} else
   1057 			sp->pr_errpriv = lwp->lwp_badpriv;
   1058 
   1059 		if (t->t_sysnum == SYS_execve) {
   1060 			up = PTOU(p);
   1061 			sp->pr_sysarg[0] = 0;
   1062 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
   1063 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
   1064 			for (i = 0, auxp = up->u_auxv;
   1065 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
   1066 			    i++, auxp++) {
   1067 				if (auxp->a_type == AT_SUN_EXECNAME) {
   1068 					sp->pr_sysarg[0] =
   1069 					    (caddr32_t)
   1070 					    (uintptr_t)auxp->a_un.a_ptr;
   1071 					break;
   1072 				}
   1073 			}
   1074 		}
   1075 	}
   1076 	if (prhasfp())
   1077 		prgetprfpregs32(lwp, &sp->pr_fpreg);
   1078 	mutex_enter(&p->p_lock);
   1079 }
   1080 
   1081 void
   1082 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
   1083 {
   1084 	kthread_t *t;
   1085 
   1086 	ASSERT(MUTEX_HELD(&p->p_lock));
   1087 
   1088 	t = prchoose(p);	/* returns locked thread */
   1089 	ASSERT(t != NULL);
   1090 	thread_unlock(t);
   1091 
   1092 	/* just bzero the process part, prgetlwpstatus32() does the rest */
   1093 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
   1094 	sp->pr_nlwp = p->p_lwpcnt;
   1095 	sp->pr_nzomb = p->p_zombcnt;
   1096 	prassignset(&sp->pr_sigpend, &p->p_sig);
   1097 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
   1098 	sp->pr_brksize = (uint32_t)p->p_brksize;
   1099 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
   1100 	sp->pr_stksize = (uint32_t)p->p_stksize;
   1101 	sp->pr_pid   = p->p_pid;
   1102 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
   1103 	    (p->p_flag & SZONETOP)) {
   1104 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
   1105 		/*
   1106 		 * Inside local zones, fake zsched's pid as parent pids for
   1107 		 * processes which reference processes outside of the zone.
   1108 		 */
   1109 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
   1110 	} else {
   1111 		sp->pr_ppid = p->p_ppid;
   1112 	}
   1113 	sp->pr_pgid  = p->p_pgrp;
   1114 	sp->pr_sid   = p->p_sessp->s_sid;
   1115 	sp->pr_taskid = p->p_task->tk_tkid;
   1116 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
   1117 	sp->pr_zoneid = p->p_zone->zone_id;
   1118 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
   1119 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
   1120 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
   1121 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
   1122 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
   1123 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
   1124 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
   1125 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
   1126 	switch (p->p_model) {
   1127 	case DATAMODEL_ILP32:
   1128 		sp->pr_dmodel = PR_MODEL_ILP32;
   1129 		break;
   1130 	case DATAMODEL_LP64:
   1131 		sp->pr_dmodel = PR_MODEL_LP64;
   1132 		break;
   1133 	}
   1134 	if (p->p_agenttp)
   1135 		sp->pr_agentid = p->p_agenttp->t_tid;
   1136 
   1137 	/* get the chosen lwp's status */
   1138 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
   1139 
   1140 	/* replicate the flags */
   1141 	sp->pr_flags = sp->pr_lwp.pr_flags;
   1142 }
   1143 #endif	/* _SYSCALL32_IMPL */
   1144 
   1145 /*
   1146  * Return lwp status.
   1147  */
   1148 void
   1149 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
   1150 {
   1151 	proc_t *p = ttoproc(t);
   1152 	klwp_t *lwp = ttolwp(t);
   1153 	struct mstate *ms = &lwp->lwp_mstate;
   1154 	hrtime_t usr, sys;
   1155 	int flags;
   1156 	ulong_t instr;
   1157 
   1158 	ASSERT(MUTEX_HELD(&p->p_lock));
   1159 
   1160 	bzero(sp, sizeof (*sp));
   1161 	flags = 0L;
   1162 	if (t->t_state == TS_STOPPED) {
   1163 		flags |= PR_STOPPED;
   1164 		if ((t->t_schedflag & TS_PSTART) == 0)
   1165 			flags |= PR_ISTOP;
   1166 	} else if (VSTOPPED(t)) {
   1167 		flags |= PR_STOPPED|PR_ISTOP;
   1168 	}
   1169 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
   1170 		flags |= PR_DSTOP;
   1171 	if (lwp->lwp_asleep)
   1172 		flags |= PR_ASLEEP;
   1173 	if (t == p->p_agenttp)
   1174 		flags |= PR_AGENT;
   1175 	if (!(t->t_proc_flag & TP_TWAIT))
   1176 		flags |= PR_DETACH;
   1177 	if (t->t_proc_flag & TP_DAEMON)
   1178 		flags |= PR_DAEMON;
   1179 	if (p->p_proc_flag & P_PR_FORK)
   1180 		flags |= PR_FORK;
   1181 	if (p->p_proc_flag & P_PR_RUNLCL)
   1182 		flags |= PR_RLC;
   1183 	if (p->p_proc_flag & P_PR_KILLCL)
   1184 		flags |= PR_KLC;
   1185 	if (p->p_proc_flag & P_PR_ASYNC)
   1186 		flags |= PR_ASYNC;
   1187 	if (p->p_proc_flag & P_PR_BPTADJ)
   1188 		flags |= PR_BPTADJ;
   1189 	if (p->p_proc_flag & P_PR_PTRACE)
   1190 		flags |= PR_PTRACE;
   1191 	if (p->p_flag & SMSACCT)
   1192 		flags |= PR_MSACCT;
   1193 	if (p->p_flag & SMSFORK)
   1194 		flags |= PR_MSFORK;
   1195 	if (p->p_flag & SVFWAIT)
   1196 		flags |= PR_VFORKP;
   1197 	if (p->p_pgidp->pid_pgorphaned)
   1198 		flags |= PR_ORPHAN;
   1199 	if (p->p_pidflag & CLDNOSIGCHLD)
   1200 		flags |= PR_NOSIGCHLD;
   1201 	if (p->p_pidflag & CLDWAITPID)
   1202 		flags |= PR_WAITPID;
   1203 	sp->pr_flags = flags;
   1204 	if (VSTOPPED(t)) {
   1205 		sp->pr_why   = PR_REQUESTED;
   1206 		sp->pr_what  = 0;
   1207 	} else {
   1208 		sp->pr_why   = t->t_whystop;
   1209 		sp->pr_what  = t->t_whatstop;
   1210 	}
   1211 	sp->pr_lwpid = t->t_tid;
   1212 	sp->pr_cursig  = lwp->lwp_cursig;
   1213 	prassignset(&sp->pr_lwppend, &t->t_sig);
   1214 	schedctl_finish_sigblock(t);
   1215 	prassignset(&sp->pr_lwphold, &t->t_hold);
   1216 	if (t->t_whystop == PR_FAULTED)
   1217 		bcopy(&lwp->lwp_siginfo,
   1218 		    &sp->pr_info, sizeof (k_siginfo_t));
   1219 	else if (lwp->lwp_curinfo)
   1220 		bcopy(&lwp->lwp_curinfo->sq_info,
   1221 		    &sp->pr_info, sizeof (k_siginfo_t));
   1222 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
   1223 	    sp->pr_info.si_zoneid != zp->zone_id) {
   1224 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
   1225 		sp->pr_info.si_uid = 0;
   1226 		sp->pr_info.si_ctid = -1;
   1227 		sp->pr_info.si_zoneid = zp->zone_id;
   1228 	}
   1229 	sp->pr_altstack = lwp->lwp_sigaltstack;
   1230 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
   1231 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
   1232 	sp->pr_ustack = lwp->lwp_ustack;
   1233 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
   1234 	    sizeof (sp->pr_clname) - 1);
   1235 	if (flags & PR_STOPPED)
   1236 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
   1237 	usr = ms->ms_acct[LMS_USER];
   1238 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
   1239 	scalehrtime(&usr);
   1240 	scalehrtime(&sys);
   1241 	hrt2ts(usr, &sp->pr_utime);
   1242 	hrt2ts(sys, &sp->pr_stime);
   1243 
   1244 	/*
   1245 	 * Fetch the current instruction, if not a system process.
   1246 	 * We don't attempt this unless the lwp is stopped.
   1247 	 */
   1248 	if ((p->p_flag & SSYS) || p->p_as == &kas)
   1249 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
   1250 	else if (!(flags & PR_STOPPED))
   1251 		sp->pr_flags |= PR_PCINVAL;
   1252 	else if (!prfetchinstr(lwp, &instr))
   1253 		sp->pr_flags |= PR_PCINVAL;
   1254 	else
   1255 		sp->pr_instr = instr;
   1256 
   1257 	/*
   1258 	 * Drop p_lock while touching the lwp's stack.
   1259 	 */
   1260 	mutex_exit(&p->p_lock);
   1261 	if (prisstep(lwp))
   1262 		sp->pr_flags |= PR_STEP;
   1263 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
   1264 		int i;
   1265 
   1266 		sp->pr_syscall = get_syscall_args(lwp,
   1267 		    (long *)sp->pr_sysarg, &i);
   1268 		sp->pr_nsysarg = (ushort_t)i;
   1269 	}
   1270 	if ((flags & PR_STOPPED) || t == curthread)
   1271 		prgetprregs(lwp, sp->pr_reg);
   1272 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
   1273 	    (flags & PR_VFORKP)) {
   1274 		user_t *up;
   1275 		auxv_t *auxp;
   1276 		int i;
   1277 
   1278 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
   1279 		if (sp->pr_errno == 0)
   1280 			sp->pr_errpriv = PRIV_NONE;
   1281 		else
   1282 			sp->pr_errpriv = lwp->lwp_badpriv;
   1283 
   1284 		if (t->t_sysnum == SYS_execve) {
   1285 			up = PTOU(p);
   1286 			sp->pr_sysarg[0] = 0;
   1287 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
   1288 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
   1289 			for (i = 0, auxp = up->u_auxv;
   1290 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
   1291 			    i++, auxp++) {
   1292 				if (auxp->a_type == AT_SUN_EXECNAME) {
   1293 					sp->pr_sysarg[0] =
   1294 					    (uintptr_t)auxp->a_un.a_ptr;
   1295 					break;
   1296 				}
   1297 			}
   1298 		}
   1299 	}
   1300 	if (prhasfp())
   1301 		prgetprfpregs(lwp, &sp->pr_fpreg);
   1302 	mutex_enter(&p->p_lock);
   1303 }
   1304 
   1305 /*
   1306  * Get the sigaction structure for the specified signal.  The u-block
   1307  * must already have been mapped in by the caller.
   1308  */
   1309 void
   1310 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
   1311 {
   1312 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
   1313 
   1314 	bzero(sp, sizeof (*sp));
   1315 
   1316 	if (sig != 0 && (unsigned)sig < nsig) {
   1317 		sp->sa_handler = up->u_signal[sig-1];
   1318 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
   1319 		if (sigismember(&up->u_sigonstack, sig))
   1320 			sp->sa_flags |= SA_ONSTACK;
   1321 		if (sigismember(&up->u_sigresethand, sig))
   1322 			sp->sa_flags |= SA_RESETHAND;
   1323 		if (sigismember(&up->u_sigrestart, sig))
   1324 			sp->sa_flags |= SA_RESTART;
   1325 		if (sigismember(&p->p_siginfo, sig))
   1326 			sp->sa_flags |= SA_SIGINFO;
   1327 		if (sigismember(&up->u_signodefer, sig))
   1328 			sp->sa_flags |= SA_NODEFER;
   1329 		if (sig == SIGCLD) {
   1330 			if (p->p_flag & SNOWAIT)
   1331 				sp->sa_flags |= SA_NOCLDWAIT;
   1332 			if ((p->p_flag & SJCTL) == 0)
   1333 				sp->sa_flags |= SA_NOCLDSTOP;
   1334 		}
   1335 	}
   1336 }
   1337 
   1338 #ifdef _SYSCALL32_IMPL
   1339 void
   1340 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
   1341 {
   1342 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
   1343 
   1344 	bzero(sp, sizeof (*sp));
   1345 
   1346 	if (sig != 0 && (unsigned)sig < nsig) {
   1347 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
   1348 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
   1349 		if (sigismember(&up->u_sigonstack, sig))
   1350 			sp->sa_flags |= SA_ONSTACK;
   1351 		if (sigismember(&up->u_sigresethand, sig))
   1352 			sp->sa_flags |= SA_RESETHAND;
   1353 		if (sigismember(&up->u_sigrestart, sig))
   1354 			sp->sa_flags |= SA_RESTART;
   1355 		if (sigismember(&p->p_siginfo, sig))
   1356 			sp->sa_flags |= SA_SIGINFO;
   1357 		if (sigismember(&up->u_signodefer, sig))
   1358 			sp->sa_flags |= SA_NODEFER;
   1359 		if (sig == SIGCLD) {
   1360 			if (p->p_flag & SNOWAIT)
   1361 				sp->sa_flags |= SA_NOCLDWAIT;
   1362 			if ((p->p_flag & SJCTL) == 0)
   1363 				sp->sa_flags |= SA_NOCLDSTOP;
   1364 		}
   1365 	}
   1366 }
   1367 #endif	/* _SYSCALL32_IMPL */
   1368 
   1369 /*
   1370  * Count the number of segments in this process's address space.
   1371  */
   1372 int
   1373 prnsegs(struct as *as, int reserved)
   1374 {
   1375 	int n = 0;
   1376 	struct seg *seg;
   1377 
   1378 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   1379 
   1380 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
   1381 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
   1382 		caddr_t saddr, naddr;
   1383 		void *tmp = NULL;
   1384 
   1385 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   1386 			(void) pr_getprot(seg, reserved, &tmp,
   1387 			    &saddr, &naddr, eaddr);
   1388 			if (saddr != naddr)
   1389 				n++;
   1390 		}
   1391 
   1392 		ASSERT(tmp == NULL);
   1393 	}
   1394 
   1395 	return (n);
   1396 }
   1397 
   1398 /*
   1399  * Convert uint32_t to decimal string w/o leading zeros.
   1400  * Add trailing null characters if 'len' is greater than string length.
   1401  * Return the string length.
   1402  */
   1403 int
   1404 pr_u32tos(uint32_t n, char *s, int len)
   1405 {
   1406 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
   1407 	char *cp = cbuf;
   1408 	char *end = s + len;
   1409 
   1410 	do {
   1411 		*cp++ = (char)(n % 10 + '0');
   1412 		n /= 10;
   1413 	} while (n);
   1414 
   1415 	len = (int)(cp - cbuf);
   1416 
   1417 	do {
   1418 		*s++ = *--cp;
   1419 	} while (cp > cbuf);
   1420 
   1421 	while (s < end)		/* optional pad */
   1422 		*s++ = '\0';
   1423 
   1424 	return (len);
   1425 }
   1426 
   1427 /*
   1428  * Convert uint64_t to decimal string w/o leading zeros.
   1429  * Return the string length.
   1430  */
   1431 static int
   1432 pr_u64tos(uint64_t n, char *s)
   1433 {
   1434 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
   1435 	char *cp = cbuf;
   1436 	int len;
   1437 
   1438 	do {
   1439 		*cp++ = (char)(n % 10 + '0');
   1440 		n /= 10;
   1441 	} while (n);
   1442 
   1443 	len = (int)(cp - cbuf);
   1444 
   1445 	do {
   1446 		*s++ = *--cp;
   1447 	} while (cp > cbuf);
   1448 
   1449 	return (len);
   1450 }
   1451 
   1452 void
   1453 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
   1454 {
   1455 	char *s = name;
   1456 	struct vfs *vfsp;
   1457 	struct vfssw *vfsswp;
   1458 
   1459 	if ((vfsp = vp->v_vfsp) != NULL &&
   1460 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
   1461 	    *vfsswp->vsw_name) {
   1462 		(void) strcpy(s, vfsswp->vsw_name);
   1463 		s += strlen(s);
   1464 		*s++ = '.';
   1465 	}
   1466 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
   1467 	*s++ = '.';
   1468 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
   1469 	*s++ = '.';
   1470 	s += pr_u64tos(vattr->va_nodeid, s);
   1471 	*s++ = '\0';
   1472 }
   1473 
   1474 struct seg *
   1475 break_seg(proc_t *p)
   1476 {
   1477 	caddr_t addr = p->p_brkbase;
   1478 	struct seg *seg;
   1479 	struct vnode *vp;
   1480 
   1481 	if (p->p_brksize != 0)
   1482 		addr += p->p_brksize - 1;
   1483 	seg = as_segat(p->p_as, addr);
   1484 	if (seg != NULL && seg->s_ops == &segvn_ops &&
   1485 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
   1486 		return (seg);
   1487 	return (NULL);
   1488 }
   1489 
   1490 /*
   1491  * Implementation of service functions to handle procfs generic chained
   1492  * copyout buffers.
   1493  */
   1494 typedef struct pr_iobuf_list {
   1495 	list_node_t	piol_link;	/* buffer linkage */
   1496 	size_t		piol_size;	/* total size (header + data) */
   1497 	size_t		piol_usedsize;	/* amount to copy out from this buf */
   1498 } piol_t;
   1499 
   1500 #define	MAPSIZE	(64 * 1024)
   1501 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
   1502 
   1503 void
   1504 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
   1505 {
   1506 	piol_t	*iol;
   1507 	size_t	initial_size = MIN(1, n) * itemsize;
   1508 
   1509 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
   1510 
   1511 	ASSERT(list_head(iolhead) == NULL);
   1512 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
   1513 	ASSERT(initial_size > 0);
   1514 
   1515 	/*
   1516 	 * Someone creating chained copyout buffers may ask for less than
   1517 	 * MAPSIZE if the amount of data to be buffered is known to be
   1518 	 * smaller than that.
   1519 	 * But in order to prevent involuntary self-denial of service,
   1520 	 * the requested input size is clamped at MAPSIZE.
   1521 	 */
   1522 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
   1523 	iol = kmem_alloc(initial_size, KM_SLEEP);
   1524 	list_insert_head(iolhead, iol);
   1525 	iol->piol_usedsize = 0;
   1526 	iol->piol_size = initial_size;
   1527 }
   1528 
   1529 void *
   1530 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
   1531 {
   1532 	piol_t	*iol;
   1533 	char	*new;
   1534 
   1535 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
   1536 	ASSERT(list_head(iolhead) != NULL);
   1537 
   1538 	iol = (piol_t *)list_tail(iolhead);
   1539 
   1540 	if (iol->piol_size <
   1541 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
   1542 		/*
   1543 		 * Out of space in the current buffer. Allocate more.
   1544 		 */
   1545 		piol_t *newiol;
   1546 
   1547 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
   1548 		newiol->piol_size = MAPSIZE;
   1549 		newiol->piol_usedsize = 0;
   1550 
   1551 		list_insert_after(iolhead, iol, newiol);
   1552 		iol = list_next(iolhead, iol);
   1553 		ASSERT(iol == newiol);
   1554 	}
   1555 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
   1556 	iol->piol_usedsize += itemsize;
   1557 	bzero(new, itemsize);
   1558 	return (new);
   1559 }
   1560 
   1561 int
   1562 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
   1563 {
   1564 	int error = errin;
   1565 	piol_t	*iol;
   1566 
   1567 	while ((iol = list_head(iolhead)) != NULL) {
   1568 		list_remove(iolhead, iol);
   1569 		if (!error) {
   1570 			if (copyout(PIOL_DATABUF(iol), *tgt,
   1571 			    iol->piol_usedsize))
   1572 				error = EFAULT;
   1573 			*tgt += iol->piol_usedsize;
   1574 		}
   1575 		kmem_free(iol, iol->piol_size);
   1576 	}
   1577 	list_destroy(iolhead);
   1578 
   1579 	return (error);
   1580 }
   1581 
   1582 int
   1583 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
   1584 {
   1585 	offset_t	off = uiop->uio_offset;
   1586 	char		*base;
   1587 	size_t		size;
   1588 	piol_t		*iol;
   1589 	int		error = errin;
   1590 
   1591 	while ((iol = list_head(iolhead)) != NULL) {
   1592 		list_remove(iolhead, iol);
   1593 		base = PIOL_DATABUF(iol);
   1594 		size = iol->piol_usedsize;
   1595 		if (off <= size && error == 0 && uiop->uio_resid > 0)
   1596 			error = uiomove(base + off, size - off,
   1597 			    UIO_READ, uiop);
   1598 		off = MAX(0, off - (offset_t)size);
   1599 		kmem_free(iol, iol->piol_size);
   1600 	}
   1601 	list_destroy(iolhead);
   1602 
   1603 	return (error);
   1604 }
   1605 
   1606 /*
   1607  * Return an array of structures with memory map information.
   1608  * We allocate here; the caller must deallocate.
   1609  */
   1610 int
   1611 prgetmap(proc_t *p, int reserved, list_t *iolhead)
   1612 {
   1613 	struct as *as = p->p_as;
   1614 	prmap_t *mp;
   1615 	struct seg *seg;
   1616 	struct seg *brkseg, *stkseg;
   1617 	struct vnode *vp;
   1618 	struct vattr vattr;
   1619 	uint_t prot;
   1620 
   1621 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   1622 
   1623 	/*
   1624 	 * Request an initial buffer size that doesn't waste memory
   1625 	 * if the address space has only a small number of segments.
   1626 	 */
   1627 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
   1628 
   1629 	if ((seg = AS_SEGFIRST(as)) == NULL)
   1630 		return (0);
   1631 
   1632 	brkseg = break_seg(p);
   1633 	stkseg = as_segat(as, prgetstackbase(p));
   1634 
   1635 	do {
   1636 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
   1637 		caddr_t saddr, naddr;
   1638 		void *tmp = NULL;
   1639 
   1640 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   1641 			prot = pr_getprot(seg, reserved, &tmp,
   1642 			    &saddr, &naddr, eaddr);
   1643 			if (saddr == naddr)
   1644 				continue;
   1645 
   1646 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
   1647 
   1648 			mp->pr_vaddr = (uintptr_t)saddr;
   1649 			mp->pr_size = naddr - saddr;
   1650 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
   1651 			mp->pr_mflags = 0;
   1652 			if (prot & PROT_READ)
   1653 				mp->pr_mflags |= MA_READ;
   1654 			if (prot & PROT_WRITE)
   1655 				mp->pr_mflags |= MA_WRITE;
   1656 			if (prot & PROT_EXEC)
   1657 				mp->pr_mflags |= MA_EXEC;
   1658 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
   1659 				mp->pr_mflags |= MA_SHARED;
   1660 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
   1661 				mp->pr_mflags |= MA_NORESERVE;
   1662 			if (seg->s_ops == &segspt_shmops ||
   1663 			    (seg->s_ops == &segvn_ops &&
   1664 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
   1665 				mp->pr_mflags |= MA_ANON;
   1666 			if (seg == brkseg)
   1667 				mp->pr_mflags |= MA_BREAK;
   1668 			else if (seg == stkseg) {
   1669 				mp->pr_mflags |= MA_STACK;
   1670 				if (reserved) {
   1671 					size_t maxstack =
   1672 					    ((size_t)p->p_stk_ctl +
   1673 					    PAGEOFFSET) & PAGEMASK;
   1674 					mp->pr_vaddr =
   1675 					    (uintptr_t)prgetstackbase(p) +
   1676 					    p->p_stksize - maxstack;
   1677 					mp->pr_size = (uintptr_t)naddr -
   1678 					    mp->pr_vaddr;
   1679 				}
   1680 			}
   1681 			if (seg->s_ops == &segspt_shmops)
   1682 				mp->pr_mflags |= MA_ISM | MA_SHM;
   1683 			mp->pr_pagesize = PAGESIZE;
   1684 
   1685 			/*
   1686 			 * Manufacture a filename for the "object" directory.
   1687 			 */
   1688 			vattr.va_mask = AT_FSID|AT_NODEID;
   1689 			if (seg->s_ops == &segvn_ops &&
   1690 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
   1691 			    vp != NULL && vp->v_type == VREG &&
   1692 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
   1693 				if (vp == p->p_exec)
   1694 					(void) strcpy(mp->pr_mapname, "a.out");
   1695 				else
   1696 					pr_object_name(mp->pr_mapname,
   1697 					    vp, &vattr);
   1698 			}
   1699 
   1700 			/*
   1701 			 * Get the SysV shared memory id, if any.
   1702 			 */
   1703 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
   1704 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
   1705 			    SHMID_NONE) {
   1706 				if (mp->pr_shmid == SHMID_FREE)
   1707 					mp->pr_shmid = -1;
   1708 
   1709 				mp->pr_mflags |= MA_SHM;
   1710 			} else {
   1711 				mp->pr_shmid = -1;
   1712 			}
   1713 		}
   1714 		ASSERT(tmp == NULL);
   1715 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   1716 
   1717 	return (0);
   1718 }
   1719 
   1720 #ifdef _SYSCALL32_IMPL
   1721 int
   1722 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
   1723 {
   1724 	struct as *as = p->p_as;
   1725 	prmap32_t *mp;
   1726 	struct seg *seg;
   1727 	struct seg *brkseg, *stkseg;
   1728 	struct vnode *vp;
   1729 	struct vattr vattr;
   1730 	uint_t prot;
   1731 
   1732 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   1733 
   1734 	/*
   1735 	 * Request an initial buffer size that doesn't waste memory
   1736 	 * if the address space has only a small number of segments.
   1737 	 */
   1738 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
   1739 
   1740 	if ((seg = AS_SEGFIRST(as)) == NULL)
   1741 		return (0);
   1742 
   1743 	brkseg = break_seg(p);
   1744 	stkseg = as_segat(as, prgetstackbase(p));
   1745 
   1746 	do {
   1747 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
   1748 		caddr_t saddr, naddr;
   1749 		void *tmp = NULL;
   1750 
   1751 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   1752 			prot = pr_getprot(seg, reserved, &tmp,
   1753 			    &saddr, &naddr, eaddr);
   1754 			if (saddr == naddr)
   1755 				continue;
   1756 
   1757 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
   1758 
   1759 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
   1760 			mp->pr_size = (size32_t)(naddr - saddr);
   1761 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
   1762 			mp->pr_mflags = 0;
   1763 			if (prot & PROT_READ)
   1764 				mp->pr_mflags |= MA_READ;
   1765 			if (prot & PROT_WRITE)
   1766 				mp->pr_mflags |= MA_WRITE;
   1767 			if (prot & PROT_EXEC)
   1768 				mp->pr_mflags |= MA_EXEC;
   1769 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
   1770 				mp->pr_mflags |= MA_SHARED;
   1771 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
   1772 				mp->pr_mflags |= MA_NORESERVE;
   1773 			if (seg->s_ops == &segspt_shmops ||
   1774 			    (seg->s_ops == &segvn_ops &&
   1775 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
   1776 				mp->pr_mflags |= MA_ANON;
   1777 			if (seg == brkseg)
   1778 				mp->pr_mflags |= MA_BREAK;
   1779 			else if (seg == stkseg) {
   1780 				mp->pr_mflags |= MA_STACK;
   1781 				if (reserved) {
   1782 					size_t maxstack =
   1783 					    ((size_t)p->p_stk_ctl +
   1784 					    PAGEOFFSET) & PAGEMASK;
   1785 					uintptr_t vaddr =
   1786 					    (uintptr_t)prgetstackbase(p) +
   1787 					    p->p_stksize - maxstack;
   1788 					mp->pr_vaddr = (caddr32_t)vaddr;
   1789 					mp->pr_size = (size32_t)
   1790 					    ((uintptr_t)naddr - vaddr);
   1791 				}
   1792 			}
   1793 			if (seg->s_ops == &segspt_shmops)
   1794 				mp->pr_mflags |= MA_ISM | MA_SHM;
   1795 			mp->pr_pagesize = PAGESIZE;
   1796 
   1797 			/*
   1798 			 * Manufacture a filename for the "object" directory.
   1799 			 */
   1800 			vattr.va_mask = AT_FSID|AT_NODEID;
   1801 			if (seg->s_ops == &segvn_ops &&
   1802 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
   1803 			    vp != NULL && vp->v_type == VREG &&
   1804 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
   1805 				if (vp == p->p_exec)
   1806 					(void) strcpy(mp->pr_mapname, "a.out");
   1807 				else
   1808 					pr_object_name(mp->pr_mapname,
   1809 					    vp, &vattr);
   1810 			}
   1811 
   1812 			/*
   1813 			 * Get the SysV shared memory id, if any.
   1814 			 */
   1815 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
   1816 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
   1817 			    SHMID_NONE) {
   1818 				if (mp->pr_shmid == SHMID_FREE)
   1819 					mp->pr_shmid = -1;
   1820 
   1821 				mp->pr_mflags |= MA_SHM;
   1822 			} else {
   1823 				mp->pr_shmid = -1;
   1824 			}
   1825 		}
   1826 		ASSERT(tmp == NULL);
   1827 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   1828 
   1829 	return (0);
   1830 }
   1831 #endif	/* _SYSCALL32_IMPL */
   1832 
   1833 /*
   1834  * Return the size of the /proc page data file.
   1835  */
   1836 size_t
   1837 prpdsize(struct as *as)
   1838 {
   1839 	struct seg *seg;
   1840 	size_t size;
   1841 
   1842 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   1843 
   1844 	if ((seg = AS_SEGFIRST(as)) == NULL)
   1845 		return (0);
   1846 
   1847 	size = sizeof (prpageheader_t);
   1848 	do {
   1849 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
   1850 		caddr_t saddr, naddr;
   1851 		void *tmp = NULL;
   1852 		size_t npage;
   1853 
   1854 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   1855 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
   1856 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
   1857 				size += sizeof (prasmap_t) + round8(npage);
   1858 		}
   1859 		ASSERT(tmp == NULL);
   1860 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   1861 
   1862 	return (size);
   1863 }
   1864 
   1865 #ifdef _SYSCALL32_IMPL
   1866 size_t
   1867 prpdsize32(struct as *as)
   1868 {
   1869 	struct seg *seg;
   1870 	size_t size;
   1871 
   1872 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   1873 
   1874 	if ((seg = AS_SEGFIRST(as)) == NULL)
   1875 		return (0);
   1876 
   1877 	size = sizeof (prpageheader32_t);
   1878 	do {
   1879 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
   1880 		caddr_t saddr, naddr;
   1881 		void *tmp = NULL;
   1882 		size_t npage;
   1883 
   1884 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   1885 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
   1886 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
   1887 				size += sizeof (prasmap32_t) + round8(npage);
   1888 		}
   1889 		ASSERT(tmp == NULL);
   1890 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   1891 
   1892 	return (size);
   1893 }
   1894 #endif	/* _SYSCALL32_IMPL */
   1895 
   1896 /*
   1897  * Read page data information.
   1898  */
   1899 int
   1900 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
   1901 {
   1902 	struct as *as = p->p_as;
   1903 	caddr_t buf;
   1904 	size_t size;
   1905 	prpageheader_t *php;
   1906 	prasmap_t *pmp;
   1907 	struct seg *seg;
   1908 	int error;
   1909 
   1910 again:
   1911 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
   1912 
   1913 	if ((seg = AS_SEGFIRST(as)) == NULL) {
   1914 		AS_LOCK_EXIT(as, &as->a_lock);
   1915 		return (0);
   1916 	}
   1917 	size = prpdsize(as);
   1918 	if (uiop->uio_resid < size) {
   1919 		AS_LOCK_EXIT(as, &as->a_lock);
   1920 		return (E2BIG);
   1921 	}
   1922 
   1923 	buf = kmem_zalloc(size, KM_SLEEP);
   1924 	php = (prpageheader_t *)buf;
   1925 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
   1926 
   1927 	hrt2ts(gethrtime(), &php->pr_tstamp);
   1928 	php->pr_nmap = 0;
   1929 	php->pr_npage = 0;
   1930 	do {
   1931 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
   1932 		caddr_t saddr, naddr;
   1933 		void *tmp = NULL;
   1934 
   1935 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   1936 			struct vnode *vp;
   1937 			struct vattr vattr;
   1938 			size_t len;
   1939 			size_t npage;
   1940 			uint_t prot;
   1941 			uintptr_t next;
   1942 
   1943 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
   1944 			if ((len = (size_t)(naddr - saddr)) == 0)
   1945 				continue;
   1946 			npage = len / PAGESIZE;
   1947 			next = (uintptr_t)(pmp + 1) + round8(npage);
   1948 			/*
   1949 			 * It's possible that the address space can change
   1950 			 * subtlely even though we're holding as->a_lock
   1951 			 * due to the nondeterminism of page_exists() in
   1952 			 * the presence of asychronously flushed pages or
   1953 			 * mapped files whose sizes are changing.
   1954 			 * page_exists() may be called indirectly from
   1955 			 * pr_getprot() by a SEGOP_INCORE() routine.
   1956 			 * If this happens we need to make sure we don't
   1957 			 * overrun the buffer whose size we computed based
   1958 			 * on the initial iteration through the segments.
   1959 			 * Once we've detected an overflow, we need to clean
   1960 			 * up the temporary memory allocated in pr_getprot()
   1961 			 * and retry. If there's a pending signal, we return
   1962 			 * EINTR so that this thread can be dislodged if
   1963 			 * a latent bug causes us to spin indefinitely.
   1964 			 */
   1965 			if (next > (uintptr_t)buf + size) {
   1966 				pr_getprot_done(&tmp);
   1967 				AS_LOCK_EXIT(as, &as->a_lock);
   1968 
   1969 				kmem_free(buf, size);
   1970 
   1971 				if (ISSIG(curthread, JUSTLOOKING))
   1972 					return (EINTR);
   1973 
   1974 				goto again;
   1975 			}
   1976 
   1977 			php->pr_nmap++;
   1978 			php->pr_npage += npage;
   1979 			pmp->pr_vaddr = (uintptr_t)saddr;
   1980 			pmp->pr_npage = npage;
   1981 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
   1982 			pmp->pr_mflags = 0;
   1983 			if (prot & PROT_READ)
   1984 				pmp->pr_mflags |= MA_READ;
   1985 			if (prot & PROT_WRITE)
   1986 				pmp->pr_mflags |= MA_WRITE;
   1987 			if (prot & PROT_EXEC)
   1988 				pmp->pr_mflags |= MA_EXEC;
   1989 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
   1990 				pmp->pr_mflags |= MA_SHARED;
   1991 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
   1992 				pmp->pr_mflags |= MA_NORESERVE;
   1993 			if (seg->s_ops == &segspt_shmops ||
   1994 			    (seg->s_ops == &segvn_ops &&
   1995 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
   1996 				pmp->pr_mflags |= MA_ANON;
   1997 			if (seg->s_ops == &segspt_shmops)
   1998 				pmp->pr_mflags |= MA_ISM | MA_SHM;
   1999 			pmp->pr_pagesize = PAGESIZE;
   2000 			/*
   2001 			 * Manufacture a filename for the "object" directory.
   2002 			 */
   2003 			vattr.va_mask = AT_FSID|AT_NODEID;
   2004 			if (seg->s_ops == &segvn_ops &&
   2005 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
   2006 			    vp != NULL && vp->v_type == VREG &&
   2007 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
   2008 				if (vp == p->p_exec)
   2009 					(void) strcpy(pmp->pr_mapname, "a.out");
   2010 				else
   2011 					pr_object_name(pmp->pr_mapname,
   2012 					    vp, &vattr);
   2013 			}
   2014 
   2015 			/*
   2016 			 * Get the SysV shared memory id, if any.
   2017 			 */
   2018 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
   2019 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
   2020 			    SHMID_NONE) {
   2021 				if (pmp->pr_shmid == SHMID_FREE)
   2022 					pmp->pr_shmid = -1;
   2023 
   2024 				pmp->pr_mflags |= MA_SHM;
   2025 			} else {
   2026 				pmp->pr_shmid = -1;
   2027 			}
   2028 
   2029 			hat_getstat(as, saddr, len, hatid,
   2030 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
   2031 			pmp = (prasmap_t *)next;
   2032 		}
   2033 		ASSERT(tmp == NULL);
   2034 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   2035 
   2036 	AS_LOCK_EXIT(as, &as->a_lock);
   2037 
   2038 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
   2039 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
   2040 	kmem_free(buf, size);
   2041 
   2042 	return (error);
   2043 }
   2044 
   2045 #ifdef _SYSCALL32_IMPL
   2046 int
   2047 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
   2048 {
   2049 	struct as *as = p->p_as;
   2050 	caddr_t buf;
   2051 	size_t size;
   2052 	prpageheader32_t *php;
   2053 	prasmap32_t *pmp;
   2054 	struct seg *seg;
   2055 	int error;
   2056 
   2057 again:
   2058 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
   2059 
   2060 	if ((seg = AS_SEGFIRST(as)) == NULL) {
   2061 		AS_LOCK_EXIT(as, &as->a_lock);
   2062 		return (0);
   2063 	}
   2064 	size = prpdsize32(as);
   2065 	if (uiop->uio_resid < size) {
   2066 		AS_LOCK_EXIT(as, &as->a_lock);
   2067 		return (E2BIG);
   2068 	}
   2069 
   2070 	buf = kmem_zalloc(size, KM_SLEEP);
   2071 	php = (prpageheader32_t *)buf;
   2072 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
   2073 
   2074 	hrt2ts32(gethrtime(), &php->pr_tstamp);
   2075 	php->pr_nmap = 0;
   2076 	php->pr_npage = 0;
   2077 	do {
   2078 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
   2079 		caddr_t saddr, naddr;
   2080 		void *tmp = NULL;
   2081 
   2082 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
   2083 			struct vnode *vp;
   2084 			struct vattr vattr;
   2085 			size_t len;
   2086 			size_t npage;
   2087 			uint_t prot;
   2088 			uintptr_t next;
   2089 
   2090 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
   2091 			if ((len = (size_t)(naddr - saddr)) == 0)
   2092 				continue;
   2093 			npage = len / PAGESIZE;
   2094 			next = (uintptr_t)(pmp + 1) + round8(npage);
   2095 			/*
   2096 			 * It's possible that the address space can change
   2097 			 * subtlely even though we're holding as->a_lock
   2098 			 * due to the nondeterminism of page_exists() in
   2099 			 * the presence of asychronously flushed pages or
   2100 			 * mapped files whose sizes are changing.
   2101 			 * page_exists() may be called indirectly from
   2102 			 * pr_getprot() by a SEGOP_INCORE() routine.
   2103 			 * If this happens we need to make sure we don't
   2104 			 * overrun the buffer whose size we computed based
   2105 			 * on the initial iteration through the segments.
   2106 			 * Once we've detected an overflow, we need to clean
   2107 			 * up the temporary memory allocated in pr_getprot()
   2108 			 * and retry. If there's a pending signal, we return
   2109 			 * EINTR so that this thread can be dislodged if
   2110 			 * a latent bug causes us to spin indefinitely.
   2111 			 */
   2112 			if (next > (uintptr_t)buf + size) {
   2113 				pr_getprot_done(&tmp);
   2114 				AS_LOCK_EXIT(as, &as->a_lock);
   2115 
   2116 				kmem_free(buf, size);
   2117 
   2118 				if (ISSIG(curthread, JUSTLOOKING))
   2119 					return (EINTR);
   2120 
   2121 				goto again;
   2122 			}
   2123 
   2124 			php->pr_nmap++;
   2125 			php->pr_npage += npage;
   2126 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
   2127 			pmp->pr_npage = (size32_t)npage;
   2128 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
   2129 			pmp->pr_mflags = 0;
   2130 			if (prot & PROT_READ)
   2131 				pmp->pr_mflags |= MA_READ;
   2132 			if (prot & PROT_WRITE)
   2133 				pmp->pr_mflags |= MA_WRITE;
   2134 			if (prot & PROT_EXEC)
   2135 				pmp->pr_mflags |= MA_EXEC;
   2136 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
   2137 				pmp->pr_mflags |= MA_SHARED;
   2138 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
   2139 				pmp->pr_mflags |= MA_NORESERVE;
   2140 			if (seg->s_ops == &segspt_shmops ||
   2141 			    (seg->s_ops == &segvn_ops &&
   2142 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
   2143 				pmp->pr_mflags |= MA_ANON;
   2144 			if (seg->s_ops == &segspt_shmops)
   2145 				pmp->pr_mflags |= MA_ISM | MA_SHM;
   2146 			pmp->pr_pagesize = PAGESIZE;
   2147 			/*
   2148 			 * Manufacture a filename for the "object" directory.
   2149 			 */
   2150 			vattr.va_mask = AT_FSID|AT_NODEID;
   2151 			if (seg->s_ops == &segvn_ops &&
   2152 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
   2153 			    vp != NULL && vp->v_type == VREG &&
   2154 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
   2155 				if (vp == p->p_exec)
   2156 					(void) strcpy(pmp->pr_mapname, "a.out");
   2157 				else
   2158 					pr_object_name(pmp->pr_mapname,
   2159 					    vp, &vattr);
   2160 			}
   2161 
   2162 			/*
   2163 			 * Get the SysV shared memory id, if any.
   2164 			 */
   2165 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
   2166 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
   2167 			    SHMID_NONE) {
   2168 				if (pmp->pr_shmid == SHMID_FREE)
   2169 					pmp->pr_shmid = -1;
   2170 
   2171 				pmp->pr_mflags |= MA_SHM;
   2172 			} else {
   2173 				pmp->pr_shmid = -1;
   2174 			}
   2175 
   2176 			hat_getstat(as, saddr, len, hatid,
   2177 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
   2178 			pmp = (prasmap32_t *)next;
   2179 		}
   2180 		ASSERT(tmp == NULL);
   2181 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   2182 
   2183 	AS_LOCK_EXIT(as, &as->a_lock);
   2184 
   2185 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
   2186 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
   2187 	kmem_free(buf, size);
   2188 
   2189 	return (error);
   2190 }
   2191 #endif	/* _SYSCALL32_IMPL */
   2192 
   2193 ushort_t
   2194 prgetpctcpu(uint64_t pct)
   2195 {
   2196 	/*
   2197 	 * The value returned will be relevant in the zone of the examiner,
   2198 	 * which may not be the same as the zone which performed the procfs
   2199 	 * mount.
   2200 	 */
   2201 	int nonline = zone_ncpus_online_get(curproc->p_zone);
   2202 
   2203 	/*
   2204 	 * Prorate over online cpus so we don't exceed 100%
   2205 	 */
   2206 	if (nonline > 1)
   2207 		pct /= nonline;
   2208 	pct >>= 16;		/* convert to 16-bit scaled integer */
   2209 	if (pct > 0x8000)	/* might happen, due to rounding */
   2210 		pct = 0x8000;
   2211 	return ((ushort_t)pct);
   2212 }
   2213 
   2214 /*
   2215  * Return information used by ps(1).
   2216  */
   2217 void
   2218 prgetpsinfo(proc_t *p, psinfo_t *psp)
   2219 {
   2220 	kthread_t *t;
   2221 	struct cred *cred;
   2222 	hrtime_t hrutime, hrstime;
   2223 
   2224 	ASSERT(MUTEX_HELD(&p->p_lock));
   2225 
   2226 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
   2227 		bzero(psp, sizeof (*psp));
   2228 	else {
   2229 		thread_unlock(t);
   2230 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
   2231 	}
   2232 
   2233 	/*
   2234 	 * only export SSYS and SMSACCT; everything else is off-limits to
   2235 	 * userland apps.
   2236 	 */
   2237 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
   2238 	psp->pr_nlwp = p->p_lwpcnt;
   2239 	psp->pr_nzomb = p->p_zombcnt;
   2240 	mutex_enter(&p->p_crlock);
   2241 	cred = p->p_cred;
   2242 	psp->pr_uid = crgetruid(cred);
   2243 	psp->pr_euid = crgetuid(cred);
   2244 	psp->pr_gid = crgetrgid(cred);
   2245 	psp->pr_egid = crgetgid(cred);
   2246 	mutex_exit(&p->p_crlock);
   2247 	psp->pr_pid = p->p_pid;
   2248 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
   2249 	    (p->p_flag & SZONETOP)) {
   2250 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
   2251 		/*
   2252 		 * Inside local zones, fake zsched's pid as parent pids for
   2253 		 * processes which reference processes outside of the zone.
   2254 		 */
   2255 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
   2256 	} else {
   2257 		psp->pr_ppid = p->p_ppid;
   2258 	}
   2259 	psp->pr_pgid = p->p_pgrp;
   2260 	psp->pr_sid = p->p_sessp->s_sid;
   2261 	psp->pr_taskid = p->p_task->tk_tkid;
   2262 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
   2263 	psp->pr_poolid = p->p_pool->pool_id;
   2264 	psp->pr_zoneid = p->p_zone->zone_id;
   2265 	if ((psp->pr_contract = PRCTID(p)) == 0)
   2266 		psp->pr_contract = -1;
   2267 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
   2268 	switch (p->p_model) {
   2269 	case DATAMODEL_ILP32:
   2270 		psp->pr_dmodel = PR_MODEL_ILP32;
   2271 		break;
   2272 	case DATAMODEL_LP64:
   2273 		psp->pr_dmodel = PR_MODEL_LP64;
   2274 		break;
   2275 	}
   2276 	hrutime = mstate_aggr_state(p, LMS_USER);
   2277 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
   2278 	hrt2ts((hrutime + hrstime), &psp->pr_time);
   2279 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
   2280 
   2281 	if (t == NULL) {
   2282 		int wcode = p->p_wcode;		/* must be atomic read */
   2283 
   2284 		if (wcode)
   2285 			psp->pr_wstat = wstat(wcode, p->p_wdata);
   2286 		psp->pr_ttydev = PRNODEV;
   2287 		psp->pr_lwp.pr_state = SZOMB;
   2288 		psp->pr_lwp.pr_sname = 'Z';
   2289 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
   2290 		psp->pr_lwp.pr_bindpset = PS_NONE;
   2291 	} else {
   2292 		user_t *up = PTOU(p);
   2293 		struct as *as;
   2294 		dev_t d;
   2295 		extern dev_t rwsconsdev, rconsdev, uconsdev;
   2296 
   2297 		d = cttydev(p);
   2298 		/*
   2299 		 * If the controlling terminal is the real
   2300 		 * or workstation console device, map to what the
   2301 		 * user thinks is the console device. Handle case when
   2302 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
   2303 		 */
   2304 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
   2305 			d = uconsdev;
   2306 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
   2307 		psp->pr_start = up->u_start;
   2308 		bcopy(up->u_comm, psp->pr_fname,
   2309 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
   2310 		bcopy(up->u_psargs, psp->pr_psargs,
   2311 		    MIN(PRARGSZ-1, PSARGSZ));
   2312 		psp->pr_argc = up->u_argc;
   2313 		psp->pr_argv = up->u_argv;
   2314 		psp->pr_envp = up->u_envp;
   2315 
   2316 		/* get the chosen lwp's lwpsinfo */
   2317 		prgetlwpsinfo(t, &psp->pr_lwp);
   2318 
   2319 		/* compute %cpu for the process */
   2320 		if (p->p_lwpcnt == 1)
   2321 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
   2322 		else {
   2323 			uint64_t pct = 0;
   2324 			hrtime_t cur_time = gethrtime_unscaled();
   2325 
   2326 			t = p->p_tlist;
   2327 			do {
   2328 				pct += cpu_update_pct(t, cur_time);
   2329 			} while ((t = t->t_forw) != p->p_tlist);
   2330 
   2331 			psp->pr_pctcpu = prgetpctcpu(pct);
   2332 		}
   2333 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
   2334 			psp->pr_size = 0;
   2335 			psp->pr_rssize = 0;
   2336 		} else {
   2337 			mutex_exit(&p->p_lock);
   2338 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   2339 			psp->pr_size = btopr(as->a_resvsize) *
   2340 			    (PAGESIZE / 1024);
   2341 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
   2342 			psp->pr_pctmem = rm_pctmemory(as);
   2343 			AS_LOCK_EXIT(as, &as->a_lock);
   2344 			mutex_enter(&p->p_lock);
   2345 		}
   2346 	}
   2347 }
   2348 
   2349 #ifdef _SYSCALL32_IMPL
   2350 void
   2351 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
   2352 {
   2353 	kthread_t *t;
   2354 	struct cred *cred;
   2355 	hrtime_t hrutime, hrstime;
   2356 
   2357 	ASSERT(MUTEX_HELD(&p->p_lock));
   2358 
   2359 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
   2360 		bzero(psp, sizeof (*psp));
   2361 	else {
   2362 		thread_unlock(t);
   2363 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
   2364 	}
   2365 
   2366 	/*
   2367 	 * only export SSYS and SMSACCT; everything else is off-limits to
   2368 	 * userland apps.
   2369 	 */
   2370 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
   2371 	psp->pr_nlwp = p->p_lwpcnt;
   2372 	psp->pr_nzomb = p->p_zombcnt;
   2373 	mutex_enter(&p->p_crlock);
   2374 	cred = p->p_cred;
   2375 	psp->pr_uid = crgetruid(cred);
   2376 	psp->pr_euid = crgetuid(cred);
   2377 	psp->pr_gid = crgetrgid(cred);
   2378 	psp->pr_egid = crgetgid(cred);
   2379 	mutex_exit(&p->p_crlock);
   2380 	psp->pr_pid = p->p_pid;
   2381 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
   2382 	    (p->p_flag & SZONETOP)) {
   2383 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
   2384 		/*
   2385 		 * Inside local zones, fake zsched's pid as parent pids for
   2386 		 * processes which reference processes outside of the zone.
   2387 		 */
   2388 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
   2389 	} else {
   2390 		psp->pr_ppid = p->p_ppid;
   2391 	}
   2392 	psp->pr_pgid = p->p_pgrp;
   2393 	psp->pr_sid = p->p_sessp->s_sid;
   2394 	psp->pr_taskid = p->p_task->tk_tkid;
   2395 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
   2396 	psp->pr_poolid = p->p_pool->pool_id;
   2397 	psp->pr_zoneid = p->p_zone->zone_id;
   2398 	if ((psp->pr_contract = PRCTID(p)) == 0)
   2399 		psp->pr_contract = -1;
   2400 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
   2401 	switch (p->p_model) {
   2402 	case DATAMODEL_ILP32:
   2403 		psp->pr_dmodel = PR_MODEL_ILP32;
   2404 		break;
   2405 	case DATAMODEL_LP64:
   2406 		psp->pr_dmodel = PR_MODEL_LP64;
   2407 		break;
   2408 	}
   2409 	hrutime = mstate_aggr_state(p, LMS_USER);
   2410 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
   2411 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
   2412 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
   2413 
   2414 	if (t == NULL) {
   2415 		extern int wstat(int, int);	/* needs a header file */
   2416 		int wcode = p->p_wcode;		/* must be atomic read */
   2417 
   2418 		if (wcode)
   2419 			psp->pr_wstat = wstat(wcode, p->p_wdata);
   2420 		psp->pr_ttydev = PRNODEV32;
   2421 		psp->pr_lwp.pr_state = SZOMB;
   2422 		psp->pr_lwp.pr_sname = 'Z';
   2423 	} else {
   2424 		user_t *up = PTOU(p);
   2425 		struct as *as;
   2426 		dev_t d;
   2427 		extern dev_t rwsconsdev, rconsdev, uconsdev;
   2428 
   2429 		d = cttydev(p);
   2430 		/*
   2431 		 * If the controlling terminal is the real
   2432 		 * or workstation console device, map to what the
   2433 		 * user thinks is the console device. Handle case when
   2434 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
   2435 		 */
   2436 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
   2437 			d = uconsdev;
   2438 		(void) cmpldev(&psp->pr_ttydev, d);
   2439 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
   2440 		bcopy(up->u_comm, psp->pr_fname,
   2441 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
   2442 		bcopy(up->u_psargs, psp->pr_psargs,
   2443 		    MIN(PRARGSZ-1, PSARGSZ));
   2444 		psp->pr_argc = up->u_argc;
   2445 		psp->pr_argv = (caddr32_t)up->u_argv;
   2446 		psp->pr_envp = (caddr32_t)up->u_envp;
   2447 
   2448 		/* get the chosen lwp's lwpsinfo */
   2449 		prgetlwpsinfo32(t, &psp->pr_lwp);
   2450 
   2451 		/* compute %cpu for the process */
   2452 		if (p->p_lwpcnt == 1)
   2453 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
   2454 		else {
   2455 			uint64_t pct = 0;
   2456 			hrtime_t cur_time;
   2457 
   2458 			t = p->p_tlist;
   2459 			cur_time = gethrtime_unscaled();
   2460 			do {
   2461 				pct += cpu_update_pct(t, cur_time);
   2462 			} while ((t = t->t_forw) != p->p_tlist);
   2463 
   2464 			psp->pr_pctcpu = prgetpctcpu(pct);
   2465 		}
   2466 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
   2467 			psp->pr_size = 0;
   2468 			psp->pr_rssize = 0;
   2469 		} else {
   2470 			mutex_exit(&p->p_lock);
   2471 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   2472 			psp->pr_size = (size32_t)
   2473 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
   2474 			psp->pr_rssize = (size32_t)
   2475 			    (rm_asrss(as) * (PAGESIZE / 1024));
   2476 			psp->pr_pctmem = rm_pctmemory(as);
   2477 			AS_LOCK_EXIT(as, &as->a_lock);
   2478 			mutex_enter(&p->p_lock);
   2479 		}
   2480 	}
   2481 
   2482 	/*
   2483 	 * If we are looking at an LP64 process, zero out
   2484 	 * the fields that cannot be represented in ILP32.
   2485 	 */
   2486 	if (p->p_model != DATAMODEL_ILP32) {
   2487 		psp->pr_size = 0;
   2488 		psp->pr_rssize = 0;
   2489 		psp->pr_argv = 0;
   2490 		psp->pr_envp = 0;
   2491 	}
   2492 }
   2493 #endif	/* _SYSCALL32_IMPL */
   2494 
   2495 void
   2496 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
   2497 {
   2498 	klwp_t *lwp = ttolwp(t);
   2499 	sobj_ops_t *sobj;
   2500 	char c, state;
   2501 	uint64_t pct;
   2502 	int retval, niceval;
   2503 	hrtime_t hrutime, hrstime;
   2504 
   2505 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
   2506 
   2507 	bzero(psp, sizeof (*psp));
   2508 
   2509 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
   2510 	psp->pr_lwpid = t->t_tid;
   2511 	psp->pr_addr = (uintptr_t)t;
   2512 	psp->pr_wchan = (uintptr_t)t->t_wchan;
   2513 
   2514 	/* map the thread state enum into a process state enum */
   2515 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
   2516 	switch (state) {
   2517 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
   2518 	case TS_RUN:		state = SRUN;		c = 'R';	break;
   2519 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
   2520 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
   2521 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
   2522 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
   2523 	default:		state = 0;		c = '?';	break;
   2524 	}
   2525 	psp->pr_state = state;
   2526 	psp->pr_sname = c;
   2527 	if ((sobj = t->t_sobj_ops) != NULL)
   2528 		psp->pr_stype = SOBJ_TYPE(sobj);
   2529 	retval = CL_DONICE(t, NULL, 0, &niceval);
   2530 	if (retval == 0) {
   2531 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
   2532 		psp->pr_nice = niceval + NZERO;
   2533 	}
   2534 	psp->pr_syscall = t->t_sysnum;
   2535 	psp->pr_pri = t->t_pri;
   2536 	psp->pr_start.tv_sec = t->t_start;
   2537 	psp->pr_start.tv_nsec = 0L;
   2538 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
   2539 	scalehrtime(&hrutime);
   2540 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
   2541 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
   2542 	scalehrtime(&hrstime);
   2543 	hrt2ts(hrutime + hrstime, &psp->pr_time);
   2544 	/* compute %cpu for the lwp */
   2545 	pct = cpu_update_pct(t, gethrtime_unscaled());
   2546 	psp->pr_pctcpu = prgetpctcpu(pct);
   2547 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
   2548 	if (psp->pr_cpu > 99)
   2549 		psp->pr_cpu = 99;
   2550 
   2551 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
   2552 	    sizeof (psp->pr_clname) - 1);
   2553 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
   2554 	psp->pr_onpro = t->t_cpu->cpu_id;
   2555 	psp->pr_bindpro = t->t_bind_cpu;
   2556 	psp->pr_bindpset = t->t_bind_pset;
   2557 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
   2558 }
   2559 
   2560 #ifdef _SYSCALL32_IMPL
   2561 void
   2562 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
   2563 {
   2564 	proc_t *p = ttoproc(t);
   2565 	klwp_t *lwp = ttolwp(t);
   2566 	sobj_ops_t *sobj;
   2567 	char c, state;
   2568 	uint64_t pct;
   2569 	int retval, niceval;
   2570 	hrtime_t hrutime, hrstime;
   2571 
   2572 	ASSERT(MUTEX_HELD(&p->p_lock));
   2573 
   2574 	bzero(psp, sizeof (*psp));
   2575 
   2576 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
   2577 	psp->pr_lwpid = t->t_tid;
   2578 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
   2579 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
   2580 
   2581 	/* map the thread state enum into a process state enum */
   2582 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
   2583 	switch (state) {
   2584 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
   2585 	case TS_RUN:		state = SRUN;		c = 'R';	break;
   2586 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
   2587 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
   2588 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
   2589 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
   2590 	default:		state = 0;		c = '?';	break;
   2591 	}
   2592 	psp->pr_state = state;
   2593 	psp->pr_sname = c;
   2594 	if ((sobj = t->t_sobj_ops) != NULL)
   2595 		psp->pr_stype = SOBJ_TYPE(sobj);
   2596 	retval = CL_DONICE(t, NULL, 0, &niceval);
   2597 	if (retval == 0) {
   2598 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
   2599 		psp->pr_nice = niceval + NZERO;
   2600 	} else {
   2601 		psp->pr_oldpri = 0;
   2602 		psp->pr_nice = 0;
   2603 	}
   2604 	psp->pr_syscall = t->t_sysnum;
   2605 	psp->pr_pri = t->t_pri;
   2606 	psp->pr_start.tv_sec = (time32_t)t->t_start;
   2607 	psp->pr_start.tv_nsec = 0L;
   2608 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
   2609 	scalehrtime(&hrutime);
   2610 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
   2611 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
   2612 	scalehrtime(&hrstime);
   2613 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
   2614 	/* compute %cpu for the lwp */
   2615 	pct = cpu_update_pct(t, gethrtime_unscaled());
   2616 	psp->pr_pctcpu = prgetpctcpu(pct);
   2617 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
   2618 	if (psp->pr_cpu > 99)
   2619 		psp->pr_cpu = 99;
   2620 
   2621 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
   2622 	    sizeof (psp->pr_clname) - 1);
   2623 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
   2624 	psp->pr_onpro = t->t_cpu->cpu_id;
   2625 	psp->pr_bindpro = t->t_bind_cpu;
   2626 	psp->pr_bindpset = t->t_bind_pset;
   2627 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
   2628 }
   2629 #endif	/* _SYSCALL32_IMPL */
   2630 
   2631 /*
   2632  * This used to get called when microstate accounting was disabled but
   2633  * microstate information was requested.  Since Microstate accounting is on
   2634  * regardless of the proc flags, this simply makes it appear to procfs that
   2635  * microstate accounting is on.  This is relatively meaningless since you
   2636  * can't turn it off, but this is here for the sake of appearances.
   2637  */
   2638 
   2639 /*ARGSUSED*/
   2640 void
   2641 estimate_msacct(kthread_t *t, hrtime_t curtime)
   2642 {
   2643 	proc_t *p;
   2644 
   2645 	if (t == NULL)
   2646 		return;
   2647 
   2648 	p = ttoproc(t);
   2649 	ASSERT(MUTEX_HELD(&p->p_lock));
   2650 
   2651 	/*
   2652 	 * A system process (p0) could be referenced if the thread is
   2653 	 * in the process of exiting.  Don't turn on microstate accounting
   2654 	 * in that case.
   2655 	 */
   2656 	if (p->p_flag & SSYS)
   2657 		return;
   2658 
   2659 	/*
   2660 	 * Loop through all the LWPs (kernel threads) in the process.
   2661 	 */
   2662 	t = p->p_tlist;
   2663 	do {
   2664 		t->t_proc_flag |= TP_MSACCT;
   2665 	} while ((t = t->t_forw) != p->p_tlist);
   2666 
   2667 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
   2668 }
   2669 
   2670 /*
   2671  * It's not really possible to disable microstate accounting anymore.
   2672  * However, this routine simply turns off the ms accounting flags in a process
   2673  * This way procfs can still pretend to turn microstate accounting on and
   2674  * off for a process, but it actually doesn't do anything.  This is
   2675  * a neutered form of preemptive idiot-proofing.
   2676  */
   2677 void
   2678 disable_msacct(proc_t *p)
   2679 {
   2680 	kthread_t *t;
   2681 
   2682 	ASSERT(MUTEX_HELD(&p->p_lock));
   2683 
   2684 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
   2685 	/*
   2686 	 * Loop through all the LWPs (kernel threads) in the process.
   2687 	 */
   2688 	if ((t = p->p_tlist) != NULL) {
   2689 		do {
   2690 			/* clear per-thread flag */
   2691 			t->t_proc_flag &= ~TP_MSACCT;
   2692 		} while ((t = t->t_forw) != p->p_tlist);
   2693 	}
   2694 }
   2695 
   2696 /*
   2697  * Return resource usage information.
   2698  */
   2699 void
   2700 prgetusage(kthread_t *t, prhusage_t *pup)
   2701 {
   2702 	klwp_t *lwp = ttolwp(t);
   2703 	hrtime_t *mstimep;
   2704 	struct mstate *ms = &lwp->lwp_mstate;
   2705 	int state;
   2706 	int i;
   2707 	hrtime_t curtime;
   2708 	hrtime_t waitrq;
   2709 	hrtime_t tmp1;
   2710 
   2711 	curtime = gethrtime_unscaled();
   2712 
   2713 	pup->pr_lwpid	= t->t_tid;
   2714 	pup->pr_count	= 1;
   2715 	pup->pr_create	= ms->ms_start;
   2716 	pup->pr_term    = ms->ms_term;
   2717 	scalehrtime(&pup->pr_create);
   2718 	scalehrtime(&pup->pr_term);
   2719 	if (ms->ms_term == 0) {
   2720 		pup->pr_rtime = curtime - ms->ms_start;
   2721 		scalehrtime(&pup->pr_rtime);
   2722 	} else {
   2723 		pup->pr_rtime = ms->ms_term - ms->ms_start;
   2724 		scalehrtime(&pup->pr_rtime);
   2725 	}
   2726 
   2727 
   2728 	pup->pr_utime    = ms->ms_acct[LMS_USER];
   2729 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
   2730 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
   2731 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
   2732 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
   2733 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
   2734 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
   2735 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
   2736 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
   2737 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
   2738 
   2739 	prscaleusage(pup);
   2740 
   2741 	/*
   2742 	 * Adjust for time waiting in the dispatcher queue.
   2743 	 */
   2744 	waitrq = t->t_waitrq;	/* hopefully atomic */
   2745 	if (waitrq != 0) {
   2746 		if (waitrq > curtime) {
   2747 			curtime = gethrtime_unscaled();
   2748 		}
   2749 		tmp1 = curtime - waitrq;
   2750 		scalehrtime(&tmp1);
   2751 		pup->pr_wtime += tmp1;
   2752 		curtime = waitrq;
   2753 	}
   2754 
   2755 	/*
   2756 	 * Adjust for time spent in current microstate.
   2757 	 */
   2758 	if (ms->ms_state_start > curtime) {
   2759 		curtime = gethrtime_unscaled();
   2760 	}
   2761 
   2762 	i = 0;
   2763 	do {
   2764 		switch (state = t->t_mstate) {
   2765 		case LMS_SLEEP:
   2766 			/*
   2767 			 * Update the timer for the current sleep state.
   2768 			 */
   2769 			switch (state = ms->ms_prev) {
   2770 			case LMS_TFAULT:
   2771 			case LMS_DFAULT:
   2772 			case LMS_KFAULT:
   2773 			case LMS_USER_LOCK:
   2774 				break;
   2775 			default:
   2776 				state = LMS_SLEEP;
   2777 				break;
   2778 			}
   2779 			break;
   2780 		case LMS_TFAULT:
   2781 		case LMS_DFAULT:
   2782 		case LMS_KFAULT:
   2783 		case LMS_USER_LOCK:
   2784 			state = LMS_SYSTEM;
   2785 			break;
   2786 		}
   2787 		switch (state) {
   2788 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
   2789 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
   2790 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
   2791 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
   2792 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
   2793 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
   2794 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
   2795 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
   2796 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
   2797 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
   2798 		default:		panic("prgetusage: unknown microstate");
   2799 		}
   2800 		tmp1 = curtime - ms->ms_state_start;
   2801 		if (tmp1 < 0) {
   2802 			curtime = gethrtime_unscaled();
   2803 			i++;
   2804 			continue;
   2805 		}
   2806 		scalehrtime(&tmp1);
   2807 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
   2808 
   2809 	*mstimep += tmp1;
   2810 
   2811 	/* update pup timestamp */
   2812 	pup->pr_tstamp = curtime;
   2813 	scalehrtime(&pup->pr_tstamp);
   2814 
   2815 	/*
   2816 	 * Resource usage counters.
   2817 	 */
   2818 	pup->pr_minf  = lwp->lwp_ru.minflt;
   2819 	pup->pr_majf  = lwp->lwp_ru.majflt;
   2820 	pup->pr_nswap = lwp->lwp_ru.nswap;
   2821 	pup->pr_inblk = lwp->lwp_ru.inblock;
   2822 	pup->pr_oublk = lwp->lwp_ru.oublock;
   2823 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
   2824 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
   2825 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
   2826 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
   2827 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
   2828 	pup->pr_sysc  = lwp->lwp_ru.sysc;
   2829 	pup->pr_ioch  = lwp->lwp_ru.ioch;
   2830 }
   2831 
   2832 /*
   2833  * Convert ms_acct stats from unscaled high-res time to nanoseconds
   2834  */
   2835 void
   2836 prscaleusage(prhusage_t *usg)
   2837 {
   2838 	scalehrtime(&usg->pr_utime);
   2839 	scalehrtime(&usg->pr_stime);
   2840 	scalehrtime(&usg->pr_ttime);
   2841 	scalehrtime(&usg->pr_tftime);
   2842 	scalehrtime(&usg->pr_dftime);
   2843 	scalehrtime(&usg->pr_kftime);
   2844 	scalehrtime(&usg->pr_ltime);
   2845 	scalehrtime(&usg->pr_slptime);
   2846 	scalehrtime(&usg->pr_wtime);
   2847 	scalehrtime(&usg->pr_stoptime);
   2848 }
   2849 
   2850 
   2851 /*
   2852  * Sum resource usage information.
   2853  */
   2854 void
   2855 praddusage(kthread_t *t, prhusage_t *pup)
   2856 {
   2857 	klwp_t *lwp = ttolwp(t);
   2858 	hrtime_t *mstimep;
   2859 	struct mstate *ms = &lwp->lwp_mstate;
   2860 	int state;
   2861 	int i;
   2862 	hrtime_t curtime;
   2863 	hrtime_t waitrq;
   2864 	hrtime_t tmp;
   2865 	prhusage_t conv;
   2866 
   2867 	curtime = gethrtime_unscaled();
   2868 
   2869 	if (ms->ms_term == 0) {
   2870 		tmp = curtime - ms->ms_start;
   2871 		scalehrtime(&tmp);
   2872 		pup->pr_rtime += tmp;
   2873 	} else {
   2874 		tmp = ms->ms_term - ms->ms_start;
   2875 		scalehrtime(&tmp);
   2876 		pup->pr_rtime += tmp;
   2877 	}
   2878 
   2879 	conv.pr_utime = ms->ms_acct[LMS_USER];
   2880 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
   2881 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
   2882 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
   2883 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
   2884 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
   2885 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
   2886 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
   2887 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
   2888 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
   2889 
   2890 	prscaleusage(&conv);
   2891 
   2892 	pup->pr_utime	+= conv.pr_utime;
   2893 	pup->pr_stime	+= conv.pr_stime;
   2894 	pup->pr_ttime	+= conv.pr_ttime;
   2895 	pup->pr_tftime	+= conv.pr_tftime;
   2896 	pup->pr_dftime	+= conv.pr_dftime;
   2897 	pup->pr_kftime	+= conv.pr_kftime;
   2898 	pup->pr_ltime	+= conv.pr_ltime;
   2899 	pup->pr_slptime	+= conv.pr_slptime;
   2900 	pup->pr_wtime	+= conv.pr_wtime;
   2901 	pup->pr_stoptime += conv.pr_stoptime;
   2902 
   2903 	/*
   2904 	 * Adjust for time waiting in the dispatcher queue.
   2905 	 */
   2906 	waitrq = t->t_waitrq;	/* hopefully atomic */
   2907 	if (waitrq != 0) {
   2908 		if (waitrq > curtime) {
   2909 			curtime = gethrtime_unscaled();
   2910 		}
   2911 		tmp = curtime - waitrq;
   2912 		scalehrtime(&tmp);
   2913 		pup->pr_wtime += tmp;
   2914 		curtime = waitrq;
   2915 	}
   2916 
   2917 	/*
   2918 	 * Adjust for time spent in current microstate.
   2919 	 */
   2920 	if (ms->ms_state_start > curtime) {
   2921 		curtime = gethrtime_unscaled();
   2922 	}
   2923 
   2924 	i = 0;
   2925 	do {
   2926 		switch (state = t->t_mstate) {
   2927 		case LMS_SLEEP:
   2928 			/*
   2929 			 * Update the timer for the current sleep state.
   2930 			 */
   2931 			switch (state = ms->ms_prev) {
   2932 			case LMS_TFAULT:
   2933 			case LMS_DFAULT:
   2934 			case LMS_KFAULT:
   2935 			case LMS_USER_LOCK:
   2936 				break;
   2937 			default:
   2938 				state = LMS_SLEEP;
   2939 				break;
   2940 			}
   2941 			break;
   2942 		case LMS_TFAULT:
   2943 		case LMS_DFAULT:
   2944 		case LMS_KFAULT:
   2945 		case LMS_USER_LOCK:
   2946 			state = LMS_SYSTEM;
   2947 			break;
   2948 		}
   2949 		switch (state) {
   2950 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
   2951 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
   2952 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
   2953 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
   2954 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
   2955 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
   2956 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
   2957 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
   2958 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
   2959 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
   2960 		default:		panic("praddusage: unknown microstate");
   2961 		}
   2962 		tmp = curtime - ms->ms_state_start;
   2963 		if (tmp < 0) {
   2964 			curtime = gethrtime_unscaled();
   2965 			i++;
   2966 			continue;
   2967 		}
   2968 		scalehrtime(&tmp);
   2969 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
   2970 
   2971 	*mstimep += tmp;
   2972 
   2973 	/* update pup timestamp */
   2974 	pup->pr_tstamp = curtime;
   2975 	scalehrtime(&pup->pr_tstamp);
   2976 
   2977 	/*
   2978 	 * Resource usage counters.
   2979 	 */
   2980 	pup->pr_minf  += lwp->lwp_ru.minflt;
   2981 	pup->pr_majf  += lwp->lwp_ru.majflt;
   2982 	pup->pr_nswap += lwp->lwp_ru.nswap;
   2983 	pup->pr_inblk += lwp->lwp_ru.inblock;
   2984 	pup->pr_oublk += lwp->lwp_ru.oublock;
   2985 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
   2986 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
   2987 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
   2988 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
   2989 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
   2990 	pup->pr_sysc  += lwp->lwp_ru.sysc;
   2991 	pup->pr_ioch  += lwp->lwp_ru.ioch;
   2992 }
   2993 
   2994 /*
   2995  * Convert a prhusage_t to a prusage_t.
   2996  * This means convert each hrtime_t to a timestruc_t
   2997  * and copy the count fields uint64_t => ulong_t.
   2998  */
   2999 void
   3000 prcvtusage(prhusage_t *pup, prusage_t *upup)
   3001 {
   3002 	uint64_t *ullp;
   3003 	ulong_t *ulp;
   3004 	int i;
   3005 
   3006 	upup->pr_lwpid = pup->pr_lwpid;
   3007 	upup->pr_count = pup->pr_count;
   3008 
   3009 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
   3010 	hrt2ts(pup->pr_create,	&upup->pr_create);
   3011 	hrt2ts(pup->pr_term,	&upup->pr_term);
   3012 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
   3013 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
   3014 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
   3015 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
   3016 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
   3017 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
   3018 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
   3019 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
   3020 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
   3021 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
   3022 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
   3023 	bzero(upup->filltime, sizeof (upup->filltime));
   3024 
   3025 	ullp = &pup->pr_minf;
   3026 	ulp = &upup->pr_minf;
   3027 	for (i = 0; i < 22; i++)
   3028 		*ulp++ = (ulong_t)*ullp++;
   3029 }
   3030 
   3031 #ifdef _SYSCALL32_IMPL
   3032 void
   3033 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
   3034 {
   3035 	uint64_t *ullp;
   3036 	uint32_t *ulp;
   3037 	int i;
   3038 
   3039 	upup->pr_lwpid = pup->pr_lwpid;
   3040 	upup->pr_count = pup->pr_count;
   3041 
   3042 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
   3043 	hrt2ts32(pup->pr_create,	&upup->pr_create);
   3044 	hrt2ts32(pup->pr_term,		&upup->pr_term);
   3045 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
   3046 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
   3047 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
   3048 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
   3049 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
   3050 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
   3051 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
   3052 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
   3053 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
   3054 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
   3055 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
   3056 	bzero(upup->filltime, sizeof (upup->filltime));
   3057 
   3058 	ullp = &pup->pr_minf;
   3059 	ulp = &upup->pr_minf;
   3060 	for (i = 0; i < 22; i++)
   3061 		*ulp++ = (uint32_t)*ullp++;
   3062 }
   3063 #endif	/* _SYSCALL32_IMPL */
   3064 
   3065 /*
   3066  * Determine whether a set is empty.
   3067  */
   3068 int
   3069 setisempty(uint32_t *sp, uint_t n)
   3070 {
   3071 	while (n--)
   3072 		if (*sp++)
   3073 			return (0);
   3074 	return (1);
   3075 }
   3076 
   3077 /*
   3078  * Utility routine for establishing a watched area in the process.
   3079  * Keep the list of watched areas sorted by virtual address.
   3080  */
   3081 int
   3082 set_watched_area(proc_t *p, struct watched_area *pwa)
   3083 {
   3084 	caddr_t vaddr = pwa->wa_vaddr;
   3085 	caddr_t eaddr = pwa->wa_eaddr;
   3086 	ulong_t flags = pwa->wa_flags;
   3087 	struct watched_area *target;
   3088 	avl_index_t where;
   3089 	int error = 0;
   3090 
   3091 	/* we must not be holding p->p_lock, but the process must be locked */
   3092 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
   3093 	ASSERT(p->p_proc_flag & P_PR_LOCK);
   3094 
   3095 	/*
   3096 	 * If this is our first watchpoint, enable watchpoints for the process.
   3097 	 */
   3098 	if (!pr_watch_active(p)) {
   3099 		kthread_t *t;
   3100 
   3101 		mutex_enter(&p->p_lock);
   3102 		if ((t = p->p_tlist) != NULL) {
   3103 			do {
   3104 				watch_enable(t);
   3105 			} while ((t = t->t_forw) != p->p_tlist);
   3106 		}
   3107 		mutex_exit(&p->p_lock);
   3108 	}
   3109 
   3110 	target = pr_find_watched_area(p, pwa, &where);
   3111 	if (target != NULL) {
   3112 		/*
   3113 		 * We discovered an existing, overlapping watched area.
   3114 		 * Allow it only if it is an exact match.
   3115 		 */
   3116 		if (target->wa_vaddr != vaddr ||
   3117 		    target->wa_eaddr != eaddr)
   3118 			error = EINVAL;
   3119 		else if (target->wa_flags != flags) {
   3120 			error = set_watched_page(p, vaddr, eaddr,
   3121 			    flags, target->wa_flags);
   3122 			target->wa_flags = flags;
   3123 		}
   3124 		kmem_free(pwa, sizeof (struct watched_area));
   3125 	} else {
   3126 		avl_insert(&p->p_warea, pwa, where);
   3127 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
   3128 	}
   3129 
   3130 	return (error);
   3131 }
   3132 
   3133 /*
   3134  * Utility routine for clearing a watched area in the process.
   3135  * Must be an exact match of the virtual address.
   3136  * size and flags don't matter.
   3137  */
   3138 int
   3139 clear_watched_area(proc_t *p, struct watched_area *pwa)
   3140 {
   3141 	struct watched_area *found;
   3142 
   3143 	/* we must not be holding p->p_lock, but the process must be locked */
   3144 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
   3145 	ASSERT(p->p_proc_flag & P_PR_LOCK);
   3146 
   3147 
   3148 	if (!pr_watch_active(p)) {
   3149 		kmem_free(pwa, sizeof (struct watched_area));
   3150 		return (0);
   3151 	}
   3152 
   3153 	/*
   3154 	 * Look for a matching address in the watched areas.  If a match is
   3155 	 * found, clear the old watched area and adjust the watched page(s).  It
   3156 	 * is not an error if there is no match.
   3157 	 */
   3158 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
   3159 	    found->wa_vaddr == pwa->wa_vaddr) {
   3160 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
   3161 		    found->wa_flags);
   3162 		avl_remove(&p->p_warea, found);
   3163 		kmem_free(found, sizeof (struct watched_area));
   3164 	}
   3165 
   3166 	kmem_free(pwa, sizeof (struct watched_area));
   3167 
   3168 	/*
   3169 	 * If we removed the last watched area from the process, disable
   3170 	 * watchpoints.
   3171 	 */
   3172 	if (!pr_watch_active(p)) {
   3173 		kthread_t *t;
   3174 
   3175 		mutex_enter(&p->p_lock);
   3176 		if ((t = p->p_tlist) != NULL) {
   3177 			do {
   3178 				watch_disable(t);
   3179 			} while ((t = t->t_forw) != p->p_tlist);
   3180 		}
   3181 		mutex_exit(&p->p_lock);
   3182 	}
   3183 
   3184 	return (0);
   3185 }
   3186 
   3187 /*
   3188  * Frees all the watched_area structures
   3189  */
   3190 void
   3191 pr_free_watchpoints(proc_t *p)
   3192 {
   3193 	struct watched_area *delp;
   3194 	void *cookie;
   3195 
   3196 	cookie = NULL;
   3197 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
   3198 		kmem_free(delp, sizeof (struct watched_area));
   3199 
   3200 	avl_destroy(&p->p_warea);
   3201 }
   3202 
   3203 /*
   3204  * This one is called by the traced process to unwatch all the
   3205  * pages while deallocating the list of watched_page structs.
   3206  */
   3207 void
   3208 pr_free_watched_pages(proc_t *p)
   3209 {
   3210 	struct as *as = p->p_as;
   3211 	struct watched_page *pwp;
   3212 	uint_t prot;
   3213 	int    retrycnt, err;
   3214 	void *cookie;
   3215 
   3216 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
   3217 		return;
   3218 
   3219 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
   3220 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
   3221 
   3222 	pwp = avl_first(&as->a_wpage);
   3223 
   3224 	cookie = NULL;
   3225 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
   3226 		retrycnt = 0;
   3227 		if ((prot = pwp->wp_oprot) != 0) {
   3228 			caddr_t addr = pwp->wp_vaddr;
   3229 			struct seg *seg;
   3230 		retry:
   3231 
   3232 			if ((pwp->wp_prot != prot ||
   3233 			    (pwp->wp_flags & WP_NOWATCH)) &&
   3234 			    (seg = as_segat(as, addr)) != NULL) {
   3235 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
   3236 				if (err == IE_RETRY) {
   3237 					ASSERT(retrycnt == 0);
   3238 					retrycnt++;
   3239 					goto retry;
   3240 				}
   3241 			}
   3242 		}
   3243 		kmem_free(pwp, sizeof (struct watched_page));
   3244 	}
   3245 
   3246 	avl_destroy(&as->a_wpage);
   3247 	p->p_wprot = NULL;
   3248 
   3249 	AS_LOCK_EXIT(as, &as->a_lock);
   3250 }
   3251 
   3252 /*
   3253  * Insert a watched area into the list of watched pages.
   3254  * If oflags is zero then we are adding a new watched area.
   3255  * Otherwise we are changing the flags of an existing watched area.
   3256  */
   3257 static int
   3258 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
   3259 	ulong_t flags, ulong_t oflags)
   3260 {
   3261 	struct as *as = p->p_as;
   3262 	avl_tree_t *pwp_tree;
   3263 	struct watched_page *pwp, *newpwp;
   3264 	struct watched_page tpw;
   3265 	avl_index_t where;
   3266 	struct seg *seg;
   3267 	uint_t prot;
   3268 	caddr_t addr;
   3269 
   3270 	/*
   3271 	 * We need to pre-allocate a list of structures before we grab the
   3272 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
   3273 	 * held.
   3274 	 */
   3275 	newpwp = NULL;
   3276 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
   3277 	    addr < eaddr; addr += PAGESIZE) {
   3278 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
   3279 		pwp->wp_list = newpwp;
   3280 		newpwp = pwp;
   3281 	}
   3282 
   3283 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
   3284 
   3285 	/*
   3286 	 * Search for an existing watched page to contain the watched area.
   3287 	 * If none is found, grab a new one from the available list
   3288 	 * and insert it in the active list, keeping the list sorted
   3289 	 * by user-level virtual address.
   3290 	 */
   3291 	if (p->p_flag & SVFWAIT)
   3292 		pwp_tree = &p->p_wpage;
   3293 	else
   3294 		pwp_tree = &as->a_wpage;
   3295 
   3296 again:
   3297 	if (avl_numnodes(pwp_tree) > prnwatch) {
   3298 		AS_LOCK_EXIT(as, &as->a_lock);
   3299 		while (newpwp != NULL) {
   3300 			pwp = newpwp->wp_list;
   3301 			kmem_free(newpwp, sizeof (struct watched_page));
   3302 			newpwp = pwp;
   3303 		}
   3304 		return (E2BIG);
   3305 	}
   3306 
   3307 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
   3308 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
   3309 		pwp = newpwp;
   3310 		newpwp = newpwp->wp_list;
   3311 		pwp->wp_list = NULL;
   3312 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
   3313 		    (uintptr_t)PAGEMASK);
   3314 		avl_insert(pwp_tree, pwp, where);
   3315 	}
   3316 
   3317 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
   3318 
   3319 	if (oflags & WA_READ)
   3320 		pwp->wp_read--;
   3321 	if (oflags & WA_WRITE)
   3322 		pwp->wp_write--;
   3323 	if (oflags & WA_EXEC)
   3324 		pwp->wp_exec--;
   3325 
   3326 	ASSERT(pwp->wp_read >= 0);
   3327 	ASSERT(pwp->wp_write >= 0);
   3328 	ASSERT(pwp->wp_exec >= 0);
   3329 
   3330 	if (flags & WA_READ)
   3331 		pwp->wp_read++;
   3332 	if (flags & WA_WRITE)
   3333 		pwp->wp_write++;
   3334 	if (flags & WA_EXEC)
   3335 		pwp->wp_exec++;
   3336 
   3337 	if (!(p->p_flag & SVFWAIT)) {
   3338 		vaddr = pwp->wp_vaddr;
   3339 		if (pwp->wp_oprot == 0 &&
   3340 		    (seg = as_segat(as, vaddr)) != NULL) {
   3341 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
   3342 			pwp->wp_oprot = (uchar_t)prot;
   3343 			pwp->wp_prot = (uchar_t)prot;
   3344 		}
   3345 		if (pwp->wp_oprot != 0) {
   3346 			prot = pwp->wp_oprot;
   3347 			if (pwp->wp_read)
   3348 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
   3349 			if (pwp->wp_write)
   3350 				prot &= ~PROT_WRITE;
   3351 			if (pwp->wp_exec)
   3352 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
   3353 			if (!(pwp->wp_flags & WP_NOWATCH) &&
   3354 			    pwp->wp_prot != prot &&
   3355 			    (pwp->wp_flags & WP_SETPROT) == 0) {
   3356 				pwp->wp_flags |= WP_SETPROT;
   3357 				pwp->wp_list = p->p_wprot;
   3358 				p->p_wprot = pwp;
   3359 			}
   3360 			pwp->wp_prot = (uchar_t)prot;
   3361 		}
   3362 	}
   3363 
   3364 	/*
   3365 	 * If the watched area extends into the next page then do
   3366 	 * it over again with the virtual address of the next page.
   3367 	 */
   3368 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
   3369 		goto again;
   3370 
   3371 	AS_LOCK_EXIT(as, &as->a_lock);
   3372 
   3373 	/*
   3374 	 * Free any pages we may have over-allocated
   3375 	 */
   3376 	while (newpwp != NULL) {
   3377 		pwp = newpwp->wp_list;
   3378 		kmem_free(newpwp, sizeof (struct watched_page));
   3379 		newpwp = pwp;
   3380 	}
   3381 
   3382 	return (0);
   3383 }
   3384 
   3385 /*
   3386  * Remove a watched area from the list of watched pages.
   3387  * A watched area may extend over more than one page.
   3388  */
   3389 static void
   3390 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
   3391 {
   3392 	struct as *as = p->p_as;
   3393 	struct watched_page *pwp;
   3394 	struct watched_page tpw;
   3395 	avl_tree_t *tree;
   3396 	avl_index_t where;
   3397 
   3398 	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
   3399 
   3400 	if (p->p_flag & SVFWAIT)
   3401 		tree = &p->p_wpage;
   3402 	else
   3403 		tree = &as->a_wpage;
   3404 
   3405 	tpw.wp_vaddr = vaddr =
   3406 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
   3407 	pwp = avl_find(tree, &tpw, &where);
   3408 	if (pwp == NULL)
   3409 		pwp = avl_nearest(tree, where, AVL_AFTER);
   3410 
   3411 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
   3412 		ASSERT(vaddr <=  pwp->wp_vaddr);
   3413 
   3414 		if (flags & WA_READ)
   3415 			pwp->wp_read--;
   3416 		if (flags & WA_WRITE)
   3417 			pwp->wp_write--;
   3418 		if (flags & WA_EXEC)
   3419 			pwp->wp_exec--;
   3420 
   3421 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
   3422 			/*
   3423 			 * Reset the hat layer's protections on this page.
   3424 			 */
   3425 			if (pwp->wp_oprot != 0) {
   3426 				uint_t prot = pwp->wp_oprot;
   3427 
   3428 				if (pwp->wp_read)
   3429 					prot &=
   3430 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
   3431 				if (pwp->wp_write)
   3432 					prot &= ~PROT_WRITE;
   3433 				if (pwp->wp_exec)
   3434 					prot &=
   3435 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
   3436 				if (!(pwp->wp_flags & WP_NOWATCH) &&
   3437 				    pwp->wp_prot != prot &&
   3438 				    (pwp->wp_flags & WP_SETPROT) == 0) {
   3439 					pwp->wp_flags |= WP_SETPROT;
   3440 					pwp->wp_list = p->p_wprot;
   3441 					p->p_wprot = pwp;
   3442 				}
   3443 				pwp->wp_prot = (uchar_t)prot;
   3444 			}
   3445 		} else {
   3446 			/*
   3447 			 * No watched areas remain in this page.
   3448 			 * Reset everything to normal.
   3449 			 */
   3450 			if (pwp->wp_oprot != 0) {
   3451 				pwp->wp_prot = pwp->wp_oprot;
   3452 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
   3453 					pwp->wp_flags |= WP_SETPROT;
   3454 					pwp->wp_list = p->p_wprot;
   3455 					p->p_wprot = pwp;
   3456 				}
   3457 			}
   3458 		}
   3459 
   3460 		pwp = AVL_NEXT(tree, pwp);
   3461 	}
   3462 
   3463 	AS_LOCK_EXIT(as, &as->a_lock);
   3464 }
   3465 
   3466 /*
   3467  * Return the original protections for the specified page.
   3468  */
   3469 static void
   3470 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
   3471 {
   3472 	struct watched_page *pwp;
   3473 	struct watched_page tpw;
   3474 
   3475 	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
   3476 
   3477 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
   3478 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
   3479 		*prot = pwp->wp_oprot;
   3480 }
   3481 
   3482 static prpagev_t *
   3483 pr_pagev_create(struct seg *seg, int check_noreserve)
   3484 {
   3485 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
   3486 	size_t total_pages = seg_pages(seg);
   3487 
   3488 	/*
   3489 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
   3490 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
   3491 	 * to about a megabyte of kernel heap by default.
   3492 	 */
   3493 	pagev->pg_npages = MIN(total_pages, pagev_lim);
   3494 	pagev->pg_pnbase = 0;
   3495 
   3496 	pagev->pg_protv =
   3497 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
   3498 
   3499 	if (check_noreserve)
   3500 		pagev->pg_incore =
   3501 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
   3502 	else
   3503 		pagev->pg_incore = NULL;
   3504 
   3505 	return (pagev);
   3506 }
   3507 
   3508 static void
   3509 pr_pagev_destroy(prpagev_t *pagev)
   3510 {
   3511 	if (pagev->pg_incore != NULL)
   3512 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
   3513 
   3514 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
   3515 	kmem_free(pagev, sizeof (prpagev_t));
   3516 }
   3517 
   3518 static caddr_t
   3519 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
   3520 {
   3521 	ulong_t lastpg = seg_page(seg, eaddr - 1);
   3522 	ulong_t pn, pnlim;
   3523 	caddr_t saddr;
   3524 	size_t len;
   3525 
   3526 	ASSERT(addr >= seg->s_base && addr <= eaddr);
   3527 
   3528 	if (addr == eaddr)
   3529 		return (eaddr);
   3530 
   3531 refill:
   3532 	ASSERT(addr < eaddr);
   3533 	pagev->pg_pnbase = seg_page(seg, addr);
   3534 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
   3535 	saddr = addr;
   3536 
   3537 	if (lastpg < pnlim)
   3538 		len = (size_t)(eaddr - addr);
   3539 	else
   3540 		len = pagev->pg_npages * PAGESIZE;
   3541 
   3542 	if (pagev->pg_incore != NULL) {
   3543 		/*
   3544 		 * INCORE cleverly has different semantics than GETPROT:
   3545 		 * it returns info on pages up to but NOT including addr + len.
   3546 		 */
   3547 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
   3548 		pn = pagev->pg_pnbase;
   3549 
   3550 		do {
   3551 			/*
   3552 			 * Guilty knowledge here:  We know that segvn_incore
   3553 			 * returns more than just the low-order bit that
   3554 			 * indicates the page is actually in memory.  If any
   3555 			 * bits are set, then the page has backing store.
   3556 			 */
   3557 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
   3558 				goto out;
   3559 
   3560 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
   3561 
   3562 		/*
   3563 		 * If we examined all the pages in the vector but we're not
   3564 		 * at the end of the segment, take another lap.
   3565 		 */
   3566 		if (addr < eaddr)
   3567 			goto refill;
   3568 	}
   3569 
   3570 	/*
   3571 	 * Need to take len - 1 because addr + len is the address of the
   3572 	 * first byte of the page just past the end of what we want.
   3573 	 */
   3574 out:
   3575 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
   3576 	return (addr);
   3577 }
   3578 
   3579 static caddr_t
   3580 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
   3581     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
   3582 {
   3583 	/*
   3584 	 * Our starting address is either the specified address, or the base
   3585 	 * address from the start of the pagev.  If the latter is greater,
   3586 	 * this means a previous call to pr_pagev_fill has already scanned
   3587 	 * further than the end of the previous mapping.
   3588 	 */
   3589 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
   3590 	caddr_t addr = MAX(*saddrp, base);
   3591 	ulong_t pn = seg_page(seg, addr);
   3592 	uint_t prot, nprot;
   3593 
   3594 	/*
   3595 	 * If we're dealing with noreserve pages, then advance addr to
   3596 	 * the address of the next page which has backing store.
   3597 	 */
   3598 	if (pagev->pg_incore != NULL) {
   3599 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
   3600 			if ((addr += PAGESIZE) == eaddr) {
   3601 				*saddrp = addr;
   3602 				prot = 0;
   3603 				goto out;
   3604 			}
   3605 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
   3606 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
   3607 				if (addr == eaddr) {
   3608 					*saddrp = addr;
   3609 					prot = 0;
   3610 					goto out;
   3611 				}
   3612 				pn = seg_page(seg, addr);
   3613 			}
   3614 		}
   3615 	}
   3616 
   3617 	/*
   3618 	 * Get the protections on the page corresponding to addr.
   3619 	 */
   3620 	pn = seg_page(seg, addr);
   3621 	ASSERT(pn >= pagev->pg_pnbase);
   3622 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
   3623 
   3624 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
   3625 	getwatchprot(seg->s_as, addr, &prot);
   3626 	*saddrp = addr;
   3627 
   3628 	/*
   3629 	 * Now loop until we find a backed page with different protections
   3630 	 * or we reach the end of this segment.
   3631 	 */
   3632 	while ((addr += PAGESIZE) < eaddr) {
   3633 		/*
   3634 		 * If pn has advanced to the page number following what we
   3635 		 * have information on, refill the page vector and reset
   3636 		 * addr and pn.  If pr_pagev_fill does not return the
   3637 		 * address of the next page, we have a discontiguity and
   3638 		 * thus have reached the end of the current mapping.
   3639 		 */
   3640 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
   3641 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
   3642 			if (naddr != addr)
   3643 				goto out;
   3644 			pn = seg_page(seg, addr);
   3645 		}
   3646 
   3647 		/*
   3648 		 * The previous page's protections are in prot, and it has
   3649 		 * backing.  If this page is MAP_NORESERVE and has no backing,
   3650 		 * then end this mapping and return the previous protections.
   3651 		 */
   3652 		if (pagev->pg_incore != NULL &&
   3653 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
   3654 			break;
   3655 
   3656 		/*
   3657 		 * Otherwise end the mapping if this page's protections (nprot)
   3658 		 * are different than those in the previous page (prot).
   3659 		 */
   3660 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
   3661 		getwatchprot(seg->s_as, addr, &nprot);
   3662 
   3663 		if (nprot != prot)
   3664 			break;
   3665 	}
   3666 
   3667 out:
   3668 	*protp = prot;
   3669 	return (addr);
   3670 }
   3671 
   3672 size_t
   3673 pr_getsegsize(struct seg *seg, int reserved)
   3674 {
   3675 	size_t size = seg->s_size;
   3676 
   3677 	/*
   3678 	 * If we're interested in the reserved space, return the size of the
   3679 	 * segment itself.  Everything else in this function is a special case
   3680 	 * to determine the actual underlying size of various segment types.
   3681 	 */
   3682 	if (reserved)
   3683 		return (size);
   3684 
   3685 	/*
   3686 	 * If this is a segvn mapping of a regular file, return the smaller
   3687 	 * of the segment size and the remaining size of the file beyond
   3688 	 * the file offset corresponding to seg->s_base.
   3689 	 */
   3690 	if (seg->s_ops == &segvn_ops) {
   3691 		vattr_t vattr;
   3692 		vnode_t *vp;
   3693 
   3694 		vattr.va_mask = AT_SIZE;
   3695 
   3696 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
   3697 		    vp != NULL && vp->v_type == VREG &&
   3698 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
   3699 
   3700 			u_offset_t fsize = vattr.va_size;
   3701 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
   3702 
   3703 			if (fsize < offset)
   3704 				fsize = 0;
   3705 			else
   3706 				fsize -= offset;
   3707 
   3708 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
   3709 
   3710 			if (fsize < (u_offset_t)size)
   3711 				size = (size_t)fsize;
   3712 		}
   3713 
   3714 		return (size);
   3715 	}
   3716 
   3717 	/*
   3718 	 * If this is an ISM shared segment, don't include pages that are
   3719 	 * beyond the real size of the spt segment that backs it.
   3720 	 */
   3721 	if (seg->s_ops == &segspt_shmops)
   3722 		return (MIN(spt_realsize(seg), size));
   3723 
   3724 	/*
   3725 	 * If this is segment is a mapping from /dev/null, then this is a
   3726 	 * reservation of virtual address space and has no actual size.
   3727 	 * Such segments are backed by segdev and have type set to neither
   3728 	 * MAP_SHARED nor MAP_PRIVATE.
   3729 	 */
   3730 	if (seg->s_ops == &segdev_ops &&
   3731 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
   3732 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
   3733 		return (0);
   3734 
   3735 	/*
   3736 	 * If this segment doesn't match one of the special types we handle,
   3737 	 * just return the size of the segment itself.
   3738 	 */
   3739 	return (size);
   3740 }
   3741 
   3742 uint_t
   3743 pr_getprot(struct seg *seg, int reserved, void **tmp,
   3744 	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
   3745 {
   3746 	struct as *as = seg->s_as;
   3747 
   3748 	caddr_t saddr = *saddrp;
   3749 	caddr_t naddr;
   3750 
   3751 	int check_noreserve;
   3752 	uint_t prot;
   3753 
   3754 	union {
   3755 		struct segvn_data *svd;
   3756 		struct segdev_data *sdp;
   3757 		void *data;
   3758 	} s;
   3759 
   3760 	s.data = seg->s_data;
   3761 
   3762 	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
   3763 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
   3764 	ASSERT(eaddr <= seg->s_base + seg->s_size);
   3765 
   3766 	/*
   3767 	 * Don't include MAP_NORESERVE pages in the address range
   3768 	 * unless their mappings have actually materialized.
   3769 	 * We cheat by knowing that segvn is the only segment
   3770 	 * driver that supports MAP_NORESERVE.
   3771 	 */
   3772 	check_noreserve =
   3773 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
   3774 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
   3775 	    (s.svd->flags & MAP_NORESERVE));
   3776 
   3777 	/*
   3778 	 * Examine every page only as a last resort.  We use guilty knowledge
   3779 	 * of segvn and segdev to avoid this: if there are no per-page
   3780 	 * protections present in the segment and we don't care about
   3781 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
   3782 	 */
   3783 	if (!check_noreserve && saddr == seg->s_base &&
   3784 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
   3785 		prot = s.svd->prot;
   3786 		getwatchprot(as, saddr, &prot);
   3787 		naddr = eaddr;
   3788 
   3789 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
   3790 	    s.sdp != NULL && s.sdp->pageprot == 0) {
   3791 		prot = s.sdp->prot;
   3792 		getwatchprot(as, saddr, &prot);
   3793 		naddr = eaddr;
   3794 
   3795 	} else {
   3796 		prpagev_t *pagev;
   3797 
   3798 		/*
   3799 		 * If addr is sitting at the start of the segment, then
   3800 		 * create a page vector to store protection and incore
   3801 		 * information for pages in the segment, and fill it.
   3802 		 * Otherwise, we expect *tmp to address the prpagev_t
   3803 		 * allocated by a previous call to this function.
   3804 		 */
   3805 		if (saddr == seg->s_base) {
   3806 			pagev = pr_pagev_create(seg, check_noreserve);
   3807 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
   3808 
   3809 			ASSERT(*tmp == NULL);
   3810 			*tmp = pagev;
   3811 
   3812 			ASSERT(saddr <= eaddr);
   3813 			*saddrp = saddr;
   3814 
   3815 			if (saddr == eaddr) {
   3816 				naddr = saddr;
   3817 				prot = 0;
   3818 				goto out;
   3819 			}
   3820 
   3821 		} else {
   3822 			ASSERT(*tmp != NULL);
   3823 			pagev = (prpagev_t *)*tmp;
   3824 		}
   3825 
   3826 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
   3827 		ASSERT(naddr <= eaddr);
   3828 	}
   3829 
   3830 out:
   3831 	if (naddr == eaddr)
   3832 		pr_getprot_done(tmp);
   3833 	*naddrp = naddr;
   3834 	return (prot);
   3835 }
   3836 
   3837 void
   3838 pr_getprot_done(void **tmp)
   3839 {
   3840 	if (*tmp != NULL) {
   3841 		pr_pagev_destroy((prpagev_t *)*tmp);
   3842 		*tmp = NULL;
   3843 	}
   3844 }
   3845 
   3846 /*
   3847  * Return true iff the vnode is a /proc file from the object directory.
   3848  */
   3849 int
   3850 pr_isobject(vnode_t *vp)
   3851 {
   3852 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
   3853 }
   3854 
   3855 /*
   3856  * Return true iff the vnode is a /proc file opened by the process itself.
   3857  */
   3858 int
   3859 pr_isself(vnode_t *vp)
   3860 {
   3861 	/*
   3862 	 * XXX: To retain binary compatibility with the old
   3863 	 * ioctl()-based version of /proc, we exempt self-opens
   3864 	 * of /proc/<pid> from being marked close-on-exec.
   3865 	 */
   3866 	return (vn_matchops(vp, prvnodeops) &&
   3867 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
   3868 	    VTOP(vp)->pr_type != PR_PIDDIR);
   3869 }
   3870 
   3871 static ssize_t
   3872 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
   3873 {
   3874 	ssize_t pagesize, hatsize;
   3875 
   3876 	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
   3877 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
   3878 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
   3879 	ASSERT(saddr < eaddr);
   3880 
   3881 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
   3882 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
   3883 	ASSERT(pagesize != 0);
   3884 
   3885 	if (pagesize == -1)
   3886 		pagesize = PAGESIZE;
   3887 
   3888 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
   3889 
   3890 	while (saddr < eaddr) {
   3891 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
   3892 			break;
   3893 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
   3894 		saddr += pagesize;
   3895 	}
   3896 
   3897 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
   3898 	return (hatsize);
   3899 }
   3900 
   3901 /*
   3902  * Return an array of structures with extended memory map information.
   3903  * We allocate here; the caller must deallocate.
   3904  */
   3905 int
   3906 prgetxmap(proc_t *p, list_t *iolhead)
   3907 {
   3908 	struct as *as = p->p_as;
   3909 	prxmap_t *mp;
   3910 	struct seg *seg;
   3911 	struct seg *brkseg, *stkseg;
   3912 	struct vnode *vp;
   3913 	struct vattr vattr;
   3914 	uint_t prot;
   3915 
   3916 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   3917 
   3918 	/*
   3919 	 * Request an initial buffer size that doesn't waste memory
   3920 	 * if the address space has only a small number of segments.
   3921 	 */
   3922 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
   3923 
   3924 	if ((seg = AS_SEGFIRST(as)) == NULL)
   3925 		return (0);
   3926 
   3927 	brkseg = break_seg(p);
   3928 	stkseg = as_segat(as, prgetstackbase(p));
   3929 
   3930 	do {
   3931 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
   3932 		caddr_t saddr, naddr, baddr;
   3933 		void *tmp = NULL;
   3934 		ssize_t psz;
   3935 		char *parr;
   3936 		uint64_t npages;
   3937 		uint64_t pagenum;
   3938 
   3939 		/*
   3940 		 * Segment loop part one: iterate from the base of the segment
   3941 		 * to its end, pausing at each address boundary (baddr) between
   3942 		 * ranges that have different virtual memory protections.
   3943 		 */
   3944 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
   3945 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
   3946 			ASSERT(baddr >= saddr && baddr <= eaddr);
   3947 
   3948 			/*
   3949 			 * Segment loop part two: iterate from the current
   3950 			 * position to the end of the protection boundary,
   3951 			 * pausing at each address boundary (naddr) between
   3952 			 * ranges that have different underlying page sizes.
   3953 			 */
   3954 			for (; saddr < baddr; saddr = naddr) {
   3955 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
   3956 				ASSERT(naddr >= saddr && naddr <= baddr);
   3957 
   3958 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
   3959 
   3960 				mp->pr_vaddr = (uintptr_t)saddr;
   3961 				mp->pr_size = naddr - saddr;
   3962 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
   3963 				mp->pr_mflags = 0;
   3964 				if (prot & PROT_READ)
   3965 					mp->pr_mflags |= MA_READ;
   3966 				if (prot & PROT_WRITE)
   3967 					mp->pr_mflags |= MA_WRITE;
   3968 				if (prot & PROT_EXEC)
   3969 					mp->pr_mflags |= MA_EXEC;
   3970 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
   3971 					mp->pr_mflags |= MA_SHARED;
   3972 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
   3973 					mp->pr_mflags |= MA_NORESERVE;
   3974 				if (seg->s_ops == &segspt_shmops ||
   3975 				    (seg->s_ops == &segvn_ops &&
   3976 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
   3977 				    vp == NULL)))
   3978 					mp->pr_mflags |= MA_ANON;
   3979 				if (seg == brkseg)
   3980 					mp->pr_mflags |= MA_BREAK;
   3981 				else if (seg == stkseg)
   3982 					mp->pr_mflags |= MA_STACK;
   3983 				if (seg->s_ops == &segspt_shmops)
   3984 					mp->pr_mflags |= MA_ISM | MA_SHM;
   3985 
   3986 				mp->pr_pagesize = PAGESIZE;
   3987 				if (psz == -1) {
   3988 					mp->pr_hatpagesize = 0;
   3989 				} else {
   3990 					mp->pr_hatpagesize = psz;
   3991 				}
   3992 
   3993 				/*
   3994 				 * Manufacture a filename for the "object" dir.
   3995 				 */
   3996 				mp->pr_dev = PRNODEV;
   3997 				vattr.va_mask = AT_FSID|AT_NODEID;
   3998 				if (seg->s_ops == &segvn_ops &&
   3999 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
   4000 				    vp != NULL && vp->v_type == VREG &&
   4001 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
   4002 				    NULL) == 0) {
   4003 					mp->pr_dev = vattr.va_fsid;
   4004 					mp->pr_ino = vattr.va_nodeid;
   4005 					if (vp == p->p_exec)
   4006 						(void) strcpy(mp->pr_mapname,
   4007 						    "a.out");
   4008 					else
   4009 						pr_object_name(mp->pr_mapname,
   4010 						    vp, &vattr);
   4011 				}
   4012 
   4013 				/*
   4014 				 * Get the SysV shared memory id, if any.
   4015 				 */
   4016 				if ((mp->pr_mflags & MA_SHARED) &&
   4017 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
   4018 				    seg->s_base)) != SHMID_NONE) {
   4019 					if (mp->pr_shmid == SHMID_FREE)
   4020 						mp->pr_shmid = -1;
   4021 
   4022 					mp->pr_mflags |= MA_SHM;
   4023 				} else {
   4024 					mp->pr_shmid = -1;
   4025 				}
   4026 
   4027 				npages = ((uintptr_t)(naddr - saddr)) >>
   4028 				    PAGESHIFT;
   4029 				parr = kmem_zalloc(npages, KM_SLEEP);
   4030 
   4031 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
   4032 
   4033 				for (pagenum = 0; pagenum < npages; pagenum++) {
   4034 					if (parr[pagenum] & SEG_PAGE_INCORE)
   4035 						mp->pr_rss++;
   4036 					if (parr[pagenum] & SEG_PAGE_ANON)
   4037 						mp->pr_anon++;
   4038 					if (parr[pagenum] & SEG_PAGE_LOCKED)
   4039 						mp->pr_locked++;
   4040 				}
   4041 				kmem_free(parr, npages);
   4042 			}
   4043 		}
   4044 		ASSERT(tmp == NULL);
   4045 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   4046 
   4047 	return (0);
   4048 }
   4049 
   4050 /*
   4051  * Return the process's credentials.  We don't need a 32-bit equivalent of
   4052  * this function because prcred_t and prcred32_t are actually the same.
   4053  */
   4054 void
   4055 prgetcred(proc_t *p, prcred_t *pcrp)
   4056 {
   4057 	mutex_enter(&p->p_crlock);
   4058 	cred2prcred(p->p_cred, pcrp);
   4059 	mutex_exit(&p->p_crlock);
   4060 }
   4061 
   4062 /*
   4063  * Compute actual size of the prpriv_t structure.
   4064  */
   4065 
   4066 size_t
   4067 prgetprivsize(void)
   4068 {
   4069 	return (priv_prgetprivsize(NULL));
   4070 }
   4071 
   4072 /*
   4073  * Return the process's privileges.  We don't need a 32-bit equivalent of
   4074  * this function because prpriv_t and prpriv32_t are actually the same.
   4075  */
   4076 void
   4077 prgetpriv(proc_t *p, prpriv_t *pprp)
   4078 {
   4079 	mutex_enter(&p->p_crlock);
   4080 	cred2prpriv(p->p_cred, pprp);
   4081 	mutex_exit(&p->p_crlock);
   4082 }
   4083 
   4084 #ifdef _SYSCALL32_IMPL
   4085 /*
   4086  * Return an array of structures with HAT memory map information.
   4087  * We allocate here; the caller must deallocate.
   4088  */
   4089 int
   4090 prgetxmap32(proc_t *p, list_t *iolhead)
   4091 {
   4092 	struct as *as = p->p_as;
   4093 	prxmap32_t *mp;
   4094 	struct seg *seg;
   4095 	struct seg *brkseg, *stkseg;
   4096 	struct vnode *vp;
   4097 	struct vattr vattr;
   4098 	uint_t prot;
   4099 
   4100 	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
   4101 
   4102 	/*
   4103 	 * Request an initial buffer size that doesn't waste memory
   4104 	 * if the address space has only a small number of segments.
   4105 	 */
   4106 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
   4107 
   4108 	if ((seg = AS_SEGFIRST(as)) == NULL)
   4109 		return (0);
   4110 
   4111 	brkseg = break_seg(p);
   4112 	stkseg = as_segat(as, prgetstackbase(p));
   4113 
   4114 	do {
   4115 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
   4116 		caddr_t saddr, naddr, baddr;
   4117 		void *tmp = NULL;
   4118 		ssize_t psz;
   4119 		char *parr;
   4120 		uint64_t npages;
   4121 		uint64_t pagenum;
   4122 
   4123 		/*
   4124 		 * Segment loop part one: iterate from the base of the segment
   4125 		 * to its end, pausing at each address boundary (baddr) between
   4126 		 * ranges that have different virtual memory protections.
   4127 		 */
   4128 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
   4129 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
   4130 			ASSERT(baddr >= saddr && baddr <= eaddr);
   4131 
   4132 			/*
   4133 			 * Segment loop part two: iterate from the current
   4134 			 * position to the end of the protection boundary,
   4135 			 * pausing at each address boundary (naddr) between
   4136 			 * ranges that have different underlying page sizes.
   4137 			 */
   4138 			for (; saddr < baddr; saddr = naddr) {
   4139 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
   4140 				ASSERT(naddr >= saddr && naddr <= baddr);
   4141 
   4142 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
   4143 
   4144 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
   4145 				mp->pr_size = (size32_t)(naddr - saddr);
   4146 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
   4147 				mp->pr_mflags = 0;
   4148 				if (prot & PROT_READ)
   4149 					mp->pr_mflags |= MA_READ;
   4150 				if (prot & PROT_WRITE)
   4151 					mp->pr_mflags |= MA_WRITE;
   4152 				if (prot & PROT_EXEC)
   4153 					mp->pr_mflags |= MA_EXEC;
   4154 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
   4155 					mp->pr_mflags |= MA_SHARED;
   4156 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
   4157 					mp->pr_mflags |= MA_NORESERVE;
   4158 				if (seg->s_ops == &segspt_shmops ||
   4159 				    (seg->s_ops == &segvn_ops &&
   4160 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
   4161 				    vp == NULL)))
   4162 					mp->pr_mflags |= MA_ANON;
   4163 				if (seg == brkseg)
   4164 					mp->pr_mflags |= MA_BREAK;
   4165 				else if (seg == stkseg)
   4166 					mp->pr_mflags |= MA_STACK;
   4167 				if (seg->s_ops == &segspt_shmops)
   4168 					mp->pr_mflags |= MA_ISM | MA_SHM;
   4169 
   4170 				mp->pr_pagesize = PAGESIZE;
   4171 				if (psz == -1) {
   4172 					mp->pr_hatpagesize = 0;
   4173 				} else {
   4174 					mp->pr_hatpagesize = psz;
   4175 				}
   4176 
   4177 				/*
   4178 				 * Manufacture a filename for the "object" dir.
   4179 				 */
   4180 				mp->pr_dev = PRNODEV32;
   4181 				vattr.va_mask = AT_FSID|AT_NODEID;
   4182 				if (seg->s_ops == &segvn_ops &&
   4183 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
   4184 				    vp != NULL && vp->v_type == VREG &&
   4185 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
   4186 				    NULL) == 0) {
   4187 					(void) cmpldev(&mp->pr_dev,
   4188 					    vattr.va_fsid);
   4189 					mp->pr_ino = vattr.va_nodeid;
   4190 					if (vp == p->p_exec)
   4191 						(void) strcpy(mp->pr_mapname,
   4192 						    "a.out");
   4193 					else
   4194 						pr_object_name(mp->pr_mapname,
   4195 						    vp, &vattr);
   4196 				}
   4197 
   4198 				/*
   4199 				 * Get the SysV shared memory id, if any.
   4200 				 */
   4201 				if ((mp->pr_mflags & MA_SHARED) &&
   4202 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
   4203 				    seg->s_base)) != SHMID_NONE) {
   4204 					if (mp->pr_shmid == SHMID_FREE)
   4205 						mp->pr_shmid = -1;
   4206 
   4207 					mp->pr_mflags |= MA_SHM;
   4208 				} else {
   4209 					mp->pr_shmid = -1;
   4210 				}
   4211 
   4212 				npages = ((uintptr_t)(naddr - saddr)) >>
   4213 				    PAGESHIFT;
   4214 				parr = kmem_zalloc(npages, KM_SLEEP);
   4215 
   4216 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
   4217 
   4218 				for (pagenum = 0; pagenum < npages; pagenum++) {
   4219 					if (parr[pagenum] & SEG_PAGE_INCORE)
   4220 						mp->pr_rss++;
   4221 					if (parr[pagenum] & SEG_PAGE_ANON)
   4222 						mp->pr_anon++;
   4223 					if (parr[pagenum] & SEG_PAGE_LOCKED)
   4224 						mp->pr_locked++;
   4225 				}
   4226 				kmem_free(parr, npages);
   4227 			}
   4228 		}
   4229 		ASSERT(tmp == NULL);
   4230 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
   4231 
   4232 	return (0);
   4233 }
   4234 #endif	/* _SYSCALL32_IMPL */
   4235