Home | History | Annotate | Download | only in procfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * lxpr_vnops.c:  Vnode operations for the lx /proc file system
     28  *
     29  * Assumptions and Gotchas:
     30  *
     31  * In order to preserve Solaris' security policy. This file system's
     32  * functionality does not override Solaris' security policies even if
     33  * that means breaking Linux compatibility.
     34  *
     35  * Linux has no concept of lwps so we only implement procs here as in the
     36  * old /proc interface.
     37  */
     38 
     39 #include <sys/cpupart.h>
     40 #include <sys/cpuvar.h>
     41 #include <sys/session.h>
     42 #include <sys/vmparam.h>
     43 #include <sys/mman.h>
     44 #include <vm/rm.h>
     45 #include <vm/seg_vn.h>
     46 #include <sys/sdt.h>
     47 #include <lx_signum.h>
     48 #include <sys/strlog.h>
     49 #include <sys/stropts.h>
     50 #include <sys/cmn_err.h>
     51 #include <sys/lx_brand.h>
     52 #include <sys/x86_archext.h>
     53 #include <sys/archsystm.h>
     54 #include <sys/fp.h>
     55 #include <sys/pool_pset.h>
     56 #include <sys/pset.h>
     57 #include <sys/zone.h>
     58 #include <sys/pghw.h>
     59 #include <sys/vfs_opreg.h>
     60 
     61 /* Dependent on the Solaris procfs */
     62 extern kthread_t *prchoose(proc_t *);
     63 
     64 #include "lx_proc.h"
     65 
     66 extern pgcnt_t swapfs_minfree;
     67 extern time_t boot_time;
     68 
     69 /*
     70  * Pointer to the vnode ops vector for this fs.
     71  * This is instantiated in lxprinit() in lxpr_vfsops.c
     72  */
     73 vnodeops_t *lxpr_vnodeops;
     74 
     75 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
     76 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
     77     caller_context_t *);
     78 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
     79 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
     80     caller_context_t *);
     81 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
     82 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
     83     pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
     84     pathname_t *);
     85 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
     86     caller_context_t *, int);
     87 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
     88 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
     89 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
     90 static int lxpr_sync(void);
     91 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
     92 
     93 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
     94 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
     95 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
     96 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
     97 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
     98 
     99 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
    100 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
    101 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
    102 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
    103 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
    104 
    105 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
    106 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
    107 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
    108 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
    109 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
    110 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
    111 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
    112 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
    113 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
    114 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
    115 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
    116 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
    117 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
    118 
    119 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
    120 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
    121 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
    122 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
    123 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
    124 
    125 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
    126 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
    127 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
    128 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
    129 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
    130 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
    131 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
    132 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
    133 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
    134 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
    135 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
    136 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
    137 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
    138 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
    139 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
    140 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
    141 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
    142 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
    143 
    144 /*
    145  * Simple conversion
    146  */
    147 #define	btok(x)	((x) >> 10)			/* bytes to kbytes */
    148 #define	ptok(x)	((x) << (PAGESHIFT - 10))	/* pages to kbytes */
    149 
    150 /*
    151  * The lx /proc vnode operations vector
    152  */
    153 const fs_operation_def_t lxpr_vnodeops_template[] = {
    154 	VOPNAME_OPEN,		{ .vop_open = lxpr_open },
    155 	VOPNAME_CLOSE,		{ .vop_close = lxpr_close },
    156 	VOPNAME_READ,		{ .vop_read = lxpr_read },
    157 	VOPNAME_GETATTR,	{ .vop_getattr = lxpr_getattr },
    158 	VOPNAME_ACCESS,		{ .vop_access = lxpr_access },
    159 	VOPNAME_LOOKUP,		{ .vop_lookup = lxpr_lookup },
    160 	VOPNAME_READDIR,	{ .vop_readdir = lxpr_readdir },
    161 	VOPNAME_READLINK,	{ .vop_readlink = lxpr_readlink },
    162 	VOPNAME_FSYNC,		{ .error = lxpr_sync },
    163 	VOPNAME_SEEK,		{ .error = lxpr_sync },
    164 	VOPNAME_INACTIVE,	{ .vop_inactive = lxpr_inactive },
    165 	VOPNAME_CMP,		{ .vop_cmp = lxpr_cmp },
    166 	VOPNAME_REALVP,		{ .vop_realvp = lxpr_realvp },
    167 	NULL,			NULL
    168 };
    169 
    170 
    171 /*
    172  * file contents of an lx /proc directory.
    173  */
    174 static lxpr_dirent_t lx_procdir[] = {
    175 	{ LXPR_CMDLINE,		"cmdline" },
    176 	{ LXPR_CPUINFO,		"cpuinfo" },
    177 	{ LXPR_DEVICES,		"devices" },
    178 	{ LXPR_DMA,		"dma" },
    179 	{ LXPR_FILESYSTEMS,	"filesystems" },
    180 	{ LXPR_INTERRUPTS,	"interrupts" },
    181 	{ LXPR_IOPORTS,		"ioports" },
    182 	{ LXPR_KCORE,		"kcore" },
    183 	{ LXPR_KMSG,		"kmsg" },
    184 	{ LXPR_LOADAVG,		"loadavg" },
    185 	{ LXPR_MEMINFO,		"meminfo" },
    186 	{ LXPR_MOUNTS,		"mounts" },
    187 	{ LXPR_NETDIR,		"net" },
    188 	{ LXPR_PARTITIONS,	"partitions" },
    189 	{ LXPR_SELF,		"self" },
    190 	{ LXPR_STAT,		"stat" },
    191 	{ LXPR_UPTIME,		"uptime" },
    192 	{ LXPR_VERSION,		"version" }
    193 };
    194 
    195 #define	PROCDIRFILES	(sizeof (lx_procdir) / sizeof (lx_procdir[0]))
    196 
    197 /*
    198  * Contents of an lx /proc/<pid> directory.
    199  */
    200 static lxpr_dirent_t piddir[] = {
    201 	{ LXPR_PID_CMDLINE,	"cmdline" },
    202 	{ LXPR_PID_CPU,		"cpu" },
    203 	{ LXPR_PID_CURDIR,	"cwd" },
    204 	{ LXPR_PID_ENV,		"environ" },
    205 	{ LXPR_PID_EXE,		"exe" },
    206 	{ LXPR_PID_MAPS,	"maps" },
    207 	{ LXPR_PID_MEM,		"mem" },
    208 	{ LXPR_PID_ROOTDIR,	"root" },
    209 	{ LXPR_PID_STAT,	"stat" },
    210 	{ LXPR_PID_STATM,	"statm" },
    211 	{ LXPR_PID_STATUS,	"status" },
    212 	{ LXPR_PID_FDDIR,	"fd" }
    213 };
    214 
    215 #define	PIDDIRFILES	(sizeof (piddir) / sizeof (piddir[0]))
    216 
    217 /*
    218  * contents of lx /proc/net directory
    219  */
    220 static lxpr_dirent_t netdir[] = {
    221 	{ LXPR_NET_ARP,		"arp" },
    222 	{ LXPR_NET_DEV,		"dev" },
    223 	{ LXPR_NET_DEV_MCAST,	"dev_mcast" },
    224 	{ LXPR_NET_IGMP,	"igmp" },
    225 	{ LXPR_NET_IP_MR_CACHE,	"ip_mr_cache" },
    226 	{ LXPR_NET_IP_MR_VIF,	"ip_mr_vif" },
    227 	{ LXPR_NET_MCFILTER,	"mcfilter" },
    228 	{ LXPR_NET_NETSTAT,	"netstat" },
    229 	{ LXPR_NET_RAW,		"raw" },
    230 	{ LXPR_NET_ROUTE,	"route" },
    231 	{ LXPR_NET_RPC,		"rpc" },
    232 	{ LXPR_NET_RT_CACHE,	"rt_cache" },
    233 	{ LXPR_NET_SOCKSTAT,	"sockstat" },
    234 	{ LXPR_NET_SNMP,	"snmp" },
    235 	{ LXPR_NET_STAT,	"stat" },
    236 	{ LXPR_NET_TCP,		"tcp" },
    237 	{ LXPR_NET_UDP,		"udp" },
    238 	{ LXPR_NET_UNIX,	"unix" }
    239 };
    240 
    241 #define	NETDIRFILES	(sizeof (netdir) / sizeof (netdir[0]))
    242 
    243 /*
    244  * lxpr_open(): Vnode operation for VOP_OPEN()
    245  */
    246 static int
    247 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
    248 {
    249 	vnode_t		*vp = *vpp;
    250 	lxpr_node_t	*lxpnp = VTOLXP(vp);
    251 	lxpr_nodetype_t	type = lxpnp->lxpr_type;
    252 	vnode_t		*rvp;
    253 	int		error = 0;
    254 
    255 	/*
    256 	 * We only allow reading in this file systrem
    257 	 */
    258 	if (flag & FWRITE)
    259 		return (EROFS);
    260 
    261 	/*
    262 	 * If we are opening an underlying file only allow regular files
    263 	 * reject the open for anything but a regular file.
    264 	 * Just do it if we are opening the current or root directory.
    265 	 */
    266 	if (lxpnp->lxpr_realvp != NULL) {
    267 		rvp = lxpnp->lxpr_realvp;
    268 
    269 		if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
    270 			error = EACCES;
    271 		else {
    272 			/*
    273 			 * Need to hold rvp since VOP_OPEN() may release it.
    274 			 */
    275 			VN_HOLD(rvp);
    276 			error = VOP_OPEN(&rvp, flag, cr, ct);
    277 			if (error) {
    278 				VN_RELE(rvp);
    279 			} else {
    280 				*vpp = rvp;
    281 				VN_RELE(vp);
    282 			}
    283 		}
    284 	}
    285 
    286 	if (type == LXPR_KMSG) {
    287 		ldi_ident_t	li = VTOLXPM(vp)->lxprm_li;
    288 		struct strioctl	str;
    289 		int		rv;
    290 
    291 		/*
    292 		 * Open the zone's console device using the layered driver
    293 		 * interface.
    294 		 */
    295 		if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
    296 		    &lxpnp->lxpr_cons_ldih, li)) != 0)
    297 			return (error);
    298 
    299 		/*
    300 		 * Send an ioctl to the underlying console device, letting it
    301 		 * know we're interested in getting console messages.
    302 		 */
    303 		str.ic_cmd = I_CONSLOG;
    304 		str.ic_timout = 0;
    305 		str.ic_len = 0;
    306 		str.ic_dp = NULL;
    307 		if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
    308 		    (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
    309 			return (error);
    310 	}
    311 
    312 	return (error);
    313 }
    314 
    315 
    316 /*
    317  * lxpr_close(): Vnode operation for VOP_CLOSE()
    318  */
    319 /* ARGSUSED */
    320 static int
    321 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
    322     caller_context_t *ct)
    323 {
    324 	lxpr_node_t	*lxpr = VTOLXP(vp);
    325 	lxpr_nodetype_t	type = lxpr->lxpr_type;
    326 	int		err;
    327 
    328 	/*
    329 	 * we should never get here because the close is done on the realvp
    330 	 * for these nodes
    331 	 */
    332 	ASSERT(type != LXPR_PID_FD_FD &&
    333 	    type != LXPR_PID_CURDIR &&
    334 	    type != LXPR_PID_ROOTDIR &&
    335 	    type != LXPR_PID_EXE);
    336 
    337 	if (type == LXPR_KMSG) {
    338 		if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
    339 			return (err);
    340 	}
    341 
    342 	return (0);
    343 }
    344 
    345 static void (*lxpr_read_function[LXPR_NFILES])() = {
    346 	lxpr_read_isdir,		/* /proc		*/
    347 	lxpr_read_isdir,		/* /proc/<pid>		*/
    348 	lxpr_read_pid_cmdline,		/* /proc/<pid>/cmdline	*/
    349 	lxpr_read_empty,		/* /proc/<pid>/cpu	*/
    350 	lxpr_read_invalid,		/* /proc/<pid>/cwd	*/
    351 	lxpr_read_empty,		/* /proc/<pid>/environ	*/
    352 	lxpr_read_invalid,		/* /proc/<pid>/exe	*/
    353 	lxpr_read_pid_maps,		/* /proc/<pid>/maps	*/
    354 	lxpr_read_empty,		/* /proc/<pid>/mem	*/
    355 	lxpr_read_invalid,		/* /proc/<pid>/root	*/
    356 	lxpr_read_pid_stat,		/* /proc/<pid>/stat	*/
    357 	lxpr_read_pid_statm,		/* /proc/<pid>/statm	*/
    358 	lxpr_read_pid_status,		/* /proc/<pid>/status	*/
    359 	lxpr_read_isdir,		/* /proc/<pid>/fd	*/
    360 	lxpr_read_fd,			/* /proc/<pid>/fd/nn	*/
    361 	lxpr_read_empty,		/* /proc/cmdline	*/
    362 	lxpr_read_cpuinfo,		/* /proc/cpuinfo	*/
    363 	lxpr_read_empty,		/* /proc/devices	*/
    364 	lxpr_read_empty,		/* /proc/dma		*/
    365 	lxpr_read_empty,		/* /proc/filesystems	*/
    366 	lxpr_read_empty,		/* /proc/interrupts	*/
    367 	lxpr_read_empty,		/* /proc/ioports	*/
    368 	lxpr_read_empty,		/* /proc/kcore		*/
    369 	lxpr_read_kmsg,			/* /proc/kmsg		*/
    370 	lxpr_read_loadavg,		/* /proc/loadavg	*/
    371 	lxpr_read_meminfo,		/* /proc/meminfo	*/
    372 	lxpr_read_mounts,		/* /proc/mounts		*/
    373 	lxpr_read_isdir,		/* /proc/net		*/
    374 	lxpr_read_net_arp,		/* /proc/net/arp	*/
    375 	lxpr_read_net_dev,		/* /proc/net/dev	*/
    376 	lxpr_read_net_dev_mcast,	/* /proc/net/dev_mcast	*/
    377 	lxpr_read_net_igmp,		/* /proc/net/igmp	*/
    378 	lxpr_read_net_ip_mr_cache,	/* /proc/net/ip_mr_cache */
    379 	lxpr_read_net_ip_mr_vif,	/* /proc/net/ip_mr_vif	*/
    380 	lxpr_read_net_mcfilter,		/* /proc/net/mcfilter	*/
    381 	lxpr_read_net_netstat,		/* /proc/net/netstat	*/
    382 	lxpr_read_net_raw,		/* /proc/net/raw	*/
    383 	lxpr_read_net_route,		/* /proc/net/route	*/
    384 	lxpr_read_net_rpc,		/* /proc/net/rpc	*/
    385 	lxpr_read_net_rt_cache,		/* /proc/net/rt_cache	*/
    386 	lxpr_read_net_sockstat,		/* /proc/net/sockstat	*/
    387 	lxpr_read_net_snmp,		/* /proc/net/snmp	*/
    388 	lxpr_read_net_stat,		/* /proc/net/stat	*/
    389 	lxpr_read_net_tcp,		/* /proc/net/tcp	*/
    390 	lxpr_read_net_udp,		/* /proc/net/udp	*/
    391 	lxpr_read_net_unix,		/* /proc/net/unix	*/
    392 	lxpr_read_partitions,		/* /proc/partitions	*/
    393 	lxpr_read_invalid,		/* /proc/self		*/
    394 	lxpr_read_stat,			/* /proc/stat		*/
    395 	lxpr_read_uptime,		/* /proc/uptime		*/
    396 	lxpr_read_version,		/* /proc/version	*/
    397 };
    398 
    399 /*
    400  * Array of lookup functions, indexed by lx /proc file type.
    401  */
    402 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
    403 	lxpr_lookup_procdir,		/* /proc		*/
    404 	lxpr_lookup_piddir,		/* /proc/<pid>		*/
    405 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/cmdline	*/
    406 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/cpu	*/
    407 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/cwd	*/
    408 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/environ	*/
    409 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/exe	*/
    410 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/maps	*/
    411 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/mem	*/
    412 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/root	*/
    413 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/stat	*/
    414 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/statm	*/
    415 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/status	*/
    416 	lxpr_lookup_fddir,		/* /proc/<pid>/fd	*/
    417 	lxpr_lookup_not_a_dir,		/* /proc/<pid>/fd/nn	*/
    418 	lxpr_lookup_not_a_dir,		/* /proc/cmdline	*/
    419 	lxpr_lookup_not_a_dir,		/* /proc/cpuinfo	*/
    420 	lxpr_lookup_not_a_dir,		/* /proc/devices	*/
    421 	lxpr_lookup_not_a_dir,		/* /proc/dma		*/
    422 	lxpr_lookup_not_a_dir,		/* /proc/filesystems	*/
    423 	lxpr_lookup_not_a_dir,		/* /proc/interrupts	*/
    424 	lxpr_lookup_not_a_dir,		/* /proc/ioports	*/
    425 	lxpr_lookup_not_a_dir,		/* /proc/kcore		*/
    426 	lxpr_lookup_not_a_dir,		/* /proc/kmsg		*/
    427 	lxpr_lookup_not_a_dir,		/* /proc/loadavg	*/
    428 	lxpr_lookup_not_a_dir,		/* /proc/meminfo	*/
    429 	lxpr_lookup_not_a_dir,		/* /proc/mounts		*/
    430 	lxpr_lookup_netdir,		/* /proc/net		*/
    431 	lxpr_lookup_not_a_dir,		/* /proc/net/arp	*/
    432 	lxpr_lookup_not_a_dir,		/* /proc/net/dev	*/
    433 	lxpr_lookup_not_a_dir,		/* /proc/net/dev_mcast	*/
    434 	lxpr_lookup_not_a_dir,		/* /proc/net/igmp	*/
    435 	lxpr_lookup_not_a_dir,		/* /proc/net/ip_mr_cache */
    436 	lxpr_lookup_not_a_dir,		/* /proc/net/ip_mr_vif	*/
    437 	lxpr_lookup_not_a_dir,		/* /proc/net/mcfilter	*/
    438 	lxpr_lookup_not_a_dir,		/* /proc/net/netstat	*/
    439 	lxpr_lookup_not_a_dir,		/* /proc/net/raw	*/
    440 	lxpr_lookup_not_a_dir,		/* /proc/net/route	*/
    441 	lxpr_lookup_not_a_dir,		/* /proc/net/rpc	*/
    442 	lxpr_lookup_not_a_dir,		/* /proc/net/rt_cache	*/
    443 	lxpr_lookup_not_a_dir,		/* /proc/net/sockstat	*/
    444 	lxpr_lookup_not_a_dir,		/* /proc/net/snmp	*/
    445 	lxpr_lookup_not_a_dir,		/* /proc/net/stat	*/
    446 	lxpr_lookup_not_a_dir,		/* /proc/net/tcp	*/
    447 	lxpr_lookup_not_a_dir,		/* /proc/net/udp	*/
    448 	lxpr_lookup_not_a_dir,		/* /proc/net/unix	*/
    449 	lxpr_lookup_not_a_dir,		/* /proc/partitions	*/
    450 	lxpr_lookup_not_a_dir,		/* /proc/self		*/
    451 	lxpr_lookup_not_a_dir,		/* /proc/stat		*/
    452 	lxpr_lookup_not_a_dir,		/* /proc/uptime		*/
    453 	lxpr_lookup_not_a_dir,		/* /proc/version	*/
    454 };
    455 
    456 /*
    457  * Array of readdir functions, indexed by /proc file type.
    458  */
    459 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
    460 	lxpr_readdir_procdir,		/* /proc		*/
    461 	lxpr_readdir_piddir,		/* /proc/<pid>		*/
    462 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/cmdline	*/
    463 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/cpu	*/
    464 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/cwd	*/
    465 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/environ	*/
    466 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/exe	*/
    467 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/maps	*/
    468 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/mem	*/
    469 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/root	*/
    470 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/stat	*/
    471 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/statm	*/
    472 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/status	*/
    473 	lxpr_readdir_fddir,		/* /proc/<pid>/fd	*/
    474 	lxpr_readdir_not_a_dir,		/* /proc/<pid>/fd/nn	*/
    475 	lxpr_readdir_not_a_dir,		/* /proc/cmdline	*/
    476 	lxpr_readdir_not_a_dir,		/* /proc/cpuinfo	*/
    477 	lxpr_readdir_not_a_dir,		/* /proc/devices	*/
    478 	lxpr_readdir_not_a_dir,		/* /proc/dma		*/
    479 	lxpr_readdir_not_a_dir,		/* /proc/filesystems	*/
    480 	lxpr_readdir_not_a_dir,		/* /proc/interrupts	*/
    481 	lxpr_readdir_not_a_dir,		/* /proc/ioports	*/
    482 	lxpr_readdir_not_a_dir,		/* /proc/kcore		*/
    483 	lxpr_readdir_not_a_dir,		/* /proc/kmsg		*/
    484 	lxpr_readdir_not_a_dir,		/* /proc/loadavg	*/
    485 	lxpr_readdir_not_a_dir,		/* /proc/meminfo	*/
    486 	lxpr_readdir_not_a_dir,		/* /proc/mounts		*/
    487 	lxpr_readdir_netdir,		/* /proc/net		*/
    488 	lxpr_readdir_not_a_dir,		/* /proc/net/arp	*/
    489 	lxpr_readdir_not_a_dir,		/* /proc/net/dev	*/
    490 	lxpr_readdir_not_a_dir,		/* /proc/net/dev_mcast	*/
    491 	lxpr_readdir_not_a_dir,		/* /proc/net/igmp	*/
    492 	lxpr_readdir_not_a_dir,		/* /proc/net/ip_mr_cache */
    493 	lxpr_readdir_not_a_dir,		/* /proc/net/ip_mr_vif	*/
    494 	lxpr_readdir_not_a_dir,		/* /proc/net/mcfilter	*/
    495 	lxpr_readdir_not_a_dir,		/* /proc/net/netstat	*/
    496 	lxpr_readdir_not_a_dir,		/* /proc/net/raw	*/
    497 	lxpr_readdir_not_a_dir,		/* /proc/net/route	*/
    498 	lxpr_readdir_not_a_dir,		/* /proc/net/rpc	*/
    499 	lxpr_readdir_not_a_dir,		/* /proc/net/rt_cache	*/
    500 	lxpr_readdir_not_a_dir,		/* /proc/net/sockstat	*/
    501 	lxpr_readdir_not_a_dir,		/* /proc/net/snmp	*/
    502 	lxpr_readdir_not_a_dir,		/* /proc/net/stat	*/
    503 	lxpr_readdir_not_a_dir,		/* /proc/net/tcp	*/
    504 	lxpr_readdir_not_a_dir,		/* /proc/net/udp	*/
    505 	lxpr_readdir_not_a_dir,		/* /proc/net/unix	*/
    506 	lxpr_readdir_not_a_dir,		/* /proc/partitions	*/
    507 	lxpr_readdir_not_a_dir,		/* /proc/self		*/
    508 	lxpr_readdir_not_a_dir,		/* /proc/stat		*/
    509 	lxpr_readdir_not_a_dir,		/* /proc/uptime		*/
    510 	lxpr_readdir_not_a_dir,		/* /proc/version	*/
    511 };
    512 
    513 
    514 /*
    515  * lxpr_read(): Vnode operation for VOP_READ()
    516  *
    517  * As the format of all the files that can be read in the lx procfs is human
    518  * readable and not binary structures there do not have to be different
    519  * read variants depending on whether the reading process model is 32 or 64 bits
    520  * (at least in general, and certainly the difference is unlikely to be enough
    521  * to justify have different routines for 32 and 64 bit reads
    522  */
    523 /* ARGSUSED */
    524 static int
    525 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
    526     caller_context_t *ct)
    527 {
    528 	lxpr_node_t *lxpnp = VTOLXP(vp);
    529 	lxpr_nodetype_t type = lxpnp->lxpr_type;
    530 	lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
    531 	int error;
    532 
    533 	ASSERT(type >= 0 && type < LXPR_NFILES);
    534 
    535 	lxpr_read_function[type](lxpnp, uiobuf);
    536 
    537 	error = lxpr_uiobuf_flush(uiobuf);
    538 	lxpr_uiobuf_free(uiobuf);
    539 
    540 	return (error);
    541 }
    542 
    543 
    544 /*
    545  * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
    546  *
    547  * Various special case reads:
    548  * - trying to read a directory
    549  * - invalid file (used to mean a file that should be implemented,
    550  *   but isn't yet)
    551  * - empty file
    552  * - wait to be able to read a file that will never have anything to read
    553  */
    554 /* ARGSUSED */
    555 static void
    556 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    557 {
    558 	lxpr_uiobuf_seterr(uiobuf, EISDIR);
    559 }
    560 
    561 /* ARGSUSED */
    562 static void
    563 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    564 {
    565 	lxpr_uiobuf_seterr(uiobuf, EINVAL);
    566 }
    567 
    568 /* ARGSUSED */
    569 static void
    570 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    571 {
    572 }
    573 
    574 /*
    575  * lxpr_read_pid_cmdline():
    576  *
    577  * This is not precisely compatible with linux:
    578  *
    579  * The linux cmdline returns argv with the correct separation
    580  * using \0 between the arguments, we cannot do that without
    581  * copying the real argv from the correct process context.
    582  * This is too difficult to attempt so we pretend that the
    583  * entire cmdline is just argv[0]. This is good enough for
    584  * ps to display correctly, but might cause some other things
    585  * not to work correctly.
    586  */
    587 static void
    588 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    589 {
    590 	proc_t *p;
    591 
    592 	ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
    593 
    594 	p = lxpr_lock(lxpnp->lxpr_pid);
    595 	if (p == NULL) {
    596 		lxpr_uiobuf_seterr(uiobuf, EINVAL);
    597 		return;
    598 	}
    599 
    600 	if (PTOU(p)->u_argv != 0) {
    601 		char *buff = PTOU(p)->u_psargs;
    602 		int len = strlen(buff);
    603 		lxpr_unlock(p);
    604 		lxpr_uiobuf_write(uiobuf, buff, len+1);
    605 	} else {
    606 		lxpr_unlock(p);
    607 	}
    608 }
    609 
    610 
    611 /*
    612  * lxpr_read_pid_maps(): memory map file
    613  */
    614 static void
    615 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    616 {
    617 	proc_t *p;
    618 	struct as *as;
    619 	struct seg *seg;
    620 	char *buf;
    621 	int buflen = MAXPATHLEN;
    622 	struct print_data {
    623 		caddr_t saddr;
    624 		caddr_t eaddr;
    625 		int type;
    626 		char prot[5];
    627 		uint32_t offset;
    628 		vnode_t *vp;
    629 		struct print_data *next;
    630 	} *print_head = NULL;
    631 	struct print_data **print_tail = &print_head;
    632 	struct print_data *pbuf;
    633 
    634 	ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
    635 
    636 	p = lxpr_lock(lxpnp->lxpr_pid);
    637 	if (p == NULL) {
    638 		lxpr_uiobuf_seterr(uiobuf, EINVAL);
    639 		return;
    640 	}
    641 
    642 	as = p->p_as;
    643 
    644 	if (as == &kas) {
    645 		lxpr_unlock(p);
    646 		return;
    647 	}
    648 
    649 	mutex_exit(&p->p_lock);
    650 
    651 	/* Iterate over all segments in the address space */
    652 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    653 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
    654 		vnode_t *vp;
    655 		uint_t protbits;
    656 
    657 		pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
    658 
    659 		pbuf->saddr = seg->s_base;
    660 		pbuf->eaddr = seg->s_base+seg->s_size;
    661 		pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
    662 
    663 		/*
    664 		 * Cheat and only use the protection bits of the first page
    665 		 * in the segment
    666 		 */
    667 		(void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
    668 		(void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
    669 
    670 		if (protbits & PROT_READ)	   pbuf->prot[0] = 'r';
    671 		if (protbits & PROT_WRITE)	   pbuf->prot[1] = 'w';
    672 		if (protbits & PROT_EXEC)	   pbuf->prot[2] = 'x';
    673 		if (pbuf->type & MAP_SHARED)	   pbuf->prot[3] = 's';
    674 		else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
    675 
    676 		if (seg->s_ops == &segvn_ops &&
    677 		    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
    678 		    vp != NULL && vp->v_type == VREG) {
    679 			VN_HOLD(vp);
    680 			pbuf->vp = vp;
    681 		} else {
    682 			pbuf->vp = NULL;
    683 		}
    684 
    685 		pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
    686 
    687 		pbuf->next = NULL;
    688 		*print_tail = pbuf;
    689 		print_tail = &pbuf->next;
    690 	}
    691 	AS_LOCK_EXIT(as, &as->a_lock);
    692 	mutex_enter(&p->p_lock);
    693 	lxpr_unlock(p);
    694 
    695 	buf = kmem_alloc(buflen, KM_SLEEP);
    696 
    697 	/* print the data we've extracted */
    698 	pbuf = print_head;
    699 	while (pbuf != NULL) {
    700 		struct print_data *pbuf_next;
    701 		vattr_t vattr;
    702 
    703 		int maj = 0;
    704 		int min = 0;
    705 		int inode = 0;
    706 
    707 		*buf = '\0';
    708 		if (pbuf->vp != NULL) {
    709 			vattr.va_mask = AT_FSID | AT_NODEID;
    710 			if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
    711 			    NULL) == 0) {
    712 				maj = getmajor(vattr.va_fsid);
    713 				min = getminor(vattr.va_fsid);
    714 				inode = vattr.va_nodeid;
    715 			}
    716 			(void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
    717 			VN_RELE(pbuf->vp);
    718 		}
    719 
    720 		if (*buf != '\0') {
    721 			lxpr_uiobuf_printf(uiobuf,
    722 			    "%08x-%08x %s %08x %02d:%03d %d %s\n",
    723 			    pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
    724 			    maj, min, inode, buf);
    725 		} else {
    726 			lxpr_uiobuf_printf(uiobuf,
    727 			    "%08x-%08x %s %08x %02d:%03d %d\n",
    728 			    pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
    729 			    maj, min, inode);
    730 		}
    731 
    732 		pbuf_next = pbuf->next;
    733 		kmem_free(pbuf, sizeof (*pbuf));
    734 		pbuf = pbuf_next;
    735 	}
    736 
    737 	kmem_free(buf, buflen);
    738 }
    739 
    740 /*
    741  * lxpr_read_pid_statm(): memory status file
    742  */
    743 static void
    744 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    745 {
    746 	proc_t *p;
    747 	struct as *as;
    748 	size_t vsize;
    749 	size_t rss;
    750 
    751 	ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
    752 
    753 	p = lxpr_lock(lxpnp->lxpr_pid);
    754 	if (p == NULL) {
    755 		lxpr_uiobuf_seterr(uiobuf, EINVAL);
    756 		return;
    757 	}
    758 
    759 	as = p->p_as;
    760 
    761 	mutex_exit(&p->p_lock);
    762 
    763 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    764 	vsize = btopr(as->a_resvsize);
    765 	rss = rm_asrss(as);
    766 	AS_LOCK_EXIT(as, &as->a_lock);
    767 
    768 	mutex_enter(&p->p_lock);
    769 	lxpr_unlock(p);
    770 
    771 	lxpr_uiobuf_printf(uiobuf,
    772 	    "%lu %lu %lu %lu %lu %lu %lu\n",
    773 	    vsize, rss, 0l, rss, 0l, 0l, 0l);
    774 }
    775 
    776 /*
    777  * lxpr_read_pid_status(): status file
    778  */
    779 static void
    780 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    781 {
    782 	proc_t *p;
    783 	kthread_t *t;
    784 	user_t *up;
    785 	cred_t *cr;
    786 	const gid_t *groups;
    787 	int    ngroups;
    788 	struct as *as;
    789 	char *status;
    790 	pid_t pid, ppid;
    791 	size_t vsize;
    792 	size_t rss;
    793 	k_sigset_t current, ignore, handle;
    794 	int    i, lx_sig;
    795 
    796 	ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
    797 
    798 	p = lxpr_lock(lxpnp->lxpr_pid);
    799 	if (p == NULL) {
    800 		lxpr_uiobuf_seterr(uiobuf, EINVAL);
    801 		return;
    802 	}
    803 
    804 	pid = p->p_pid;
    805 
    806 	/*
    807 	 * Convert pid to the Linux default of 1 if we're the zone's init
    808 	 * process
    809 	 */
    810 	if (pid == curproc->p_zone->zone_proc_initpid) {
    811 		pid = 1;
    812 		ppid = 0;	/* parent pid for init is 0 */
    813 	} else {
    814 		/*
    815 		 * Make sure not to reference parent PIDs that reside outside
    816 		 * the zone
    817 		 */
    818 		ppid = ((p->p_flag & SZONETOP)
    819 		    ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
    820 
    821 		/*
    822 		 * Convert ppid to the Linux default of 1 if our parent is the
    823 		 * zone's init process
    824 		 */
    825 		if (ppid == curproc->p_zone->zone_proc_initpid)
    826 			ppid = 1;
    827 	}
    828 
    829 	t = prchoose(p);
    830 	if (t != NULL) {
    831 		switch (t->t_state) {
    832 		case TS_SLEEP:
    833 			status = "S (sleeping)";
    834 			break;
    835 		case TS_RUN:
    836 		case TS_ONPROC:
    837 			status = "R (running)";
    838 			break;
    839 		case TS_ZOMB:
    840 			status = "Z (zombie)";
    841 			break;
    842 		case TS_STOPPED:
    843 			status = "T (stopped)";
    844 			break;
    845 		default:
    846 			status = "! (unknown)";
    847 			break;
    848 		}
    849 		thread_unlock(t);
    850 	} else {
    851 		/*
    852 		 * there is a hole in the exit code, where a proc can have
    853 		 * no threads but it is yet to be flagged SZOMB. We will
    854 		 * assume we are about to become a zombie
    855 		 */
    856 		status = "Z (zombie)";
    857 	}
    858 
    859 	up = PTOU(p);
    860 	mutex_enter(&p->p_crlock);
    861 	crhold(cr = p->p_cred);
    862 	mutex_exit(&p->p_crlock);
    863 
    864 	lxpr_uiobuf_printf(uiobuf,
    865 	    "Name:\t%s\n"
    866 	    "State:\t%s\n"
    867 	    "Tgid:\t%d\n"
    868 	    "Pid:\t%d\n"
    869 	    "PPid:\t%d\n"
    870 	    "TracerPid:\t%d\n"
    871 	    "Uid:\t%u\t%u\t%u\t%u\n"
    872 	    "Gid:\t%u\t%u\t%u\t%u\n"
    873 	    "FDSize:\t%d\n"
    874 	    "Groups:\t",
    875 	    up->u_comm,
    876 	    status,
    877 	    pid, /* thread group id - same as pid until we map lwps to procs */
    878 	    pid,
    879 	    ppid,
    880 	    0,
    881 	    crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
    882 	    crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
    883 	    p->p_fno_ctl);
    884 
    885 	ngroups = crgetngroups(cr);
    886 	groups  = crgetgroups(cr);
    887 	for (i = 0; i < ngroups; i++) {
    888 		lxpr_uiobuf_printf(uiobuf,
    889 		    "%u ",
    890 		    groups[i]);
    891 	}
    892 	crfree(cr);
    893 
    894 	as = p->p_as;
    895 	if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
    896 		mutex_exit(&p->p_lock);
    897 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    898 		vsize = as->a_resvsize;
    899 		rss = rm_asrss(as);
    900 		AS_LOCK_EXIT(as, &as->a_lock);
    901 		mutex_enter(&p->p_lock);
    902 
    903 		lxpr_uiobuf_printf(uiobuf,
    904 		    "\n"
    905 		    "VmSize:\t%8lu kB\n"
    906 		    "VmLck:\t%8lu kB\n"
    907 		    "VmRSS:\t%8lu kB\n"
    908 		    "VmData:\t%8lu kB\n"
    909 		    "VmStk:\t%8lu kB\n"
    910 		    "VmExe:\t%8lu kB\n"
    911 		    "VmLib:\t%8lu kB",
    912 		    btok(vsize),
    913 		    0l,
    914 		    ptok(rss),
    915 		    0l,
    916 		    btok(p->p_stksize),
    917 		    ptok(rss),
    918 		    0l);
    919 	}
    920 
    921 	sigemptyset(&current);
    922 	sigemptyset(&ignore);
    923 	sigemptyset(&handle);
    924 
    925 	for (i = 1; i < MAXSIG; i++) {
    926 		lx_sig = stol_signo[i];
    927 
    928 		if ((lx_sig > 0) && (lx_sig < MAXSIG)) {
    929 			if (sigismember(&p->p_sig, i))
    930 				sigaddset(&current, lx_sig);
    931 
    932 			if (up->u_signal[i] == SIG_IGN)
    933 				sigaddset(&ignore, lx_sig);
    934 			else if (up->u_signal[i] != SIG_DFL)
    935 				sigaddset(&handle, lx_sig);
    936 		}
    937 	}
    938 
    939 	lxpr_uiobuf_printf(uiobuf,
    940 	    "\n"
    941 	    "SigPnd:\t%08x%08x\n"
    942 	    "SigBlk:\t%08x%08x\n"
    943 	    "SigIgn:\t%08x%08x\n"
    944 	    "SigCgt:\t%08x%08x\n"
    945 	    "CapInh:\t%016x\n"
    946 	    "CapPrm:\t%016x\n"
    947 	    "CapEff:\t%016x\n",
    948 	    current.__sigbits[1], current.__sigbits[0],
    949 	    0, 0, /* signals blocked on per thread basis */
    950 	    ignore.__sigbits[1], ignore.__sigbits[0],
    951 	    handle.__sigbits[1], handle.__sigbits[0],
    952 	    /* Can't do anything with linux capabilities */
    953 	    0,
    954 	    0,
    955 	    0);
    956 
    957 	lxpr_unlock(p);
    958 }
    959 
    960 
    961 /*
    962  * lxpr_read_pid_stat(): pid stat file
    963  */
    964 static void
    965 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
    966 {
    967 	proc_t *p;
    968 	kthread_t *t;
    969 	struct as *as;
    970 	char stat;
    971 	pid_t pid, ppid, pgpid, spid;
    972 	gid_t psgid;
    973 	dev_t psdev;
    974 	size_t rss, vsize;
    975 	int nice, pri;
    976 	caddr_t wchan;
    977 	processorid_t cpu;
    978 
    979 	ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
    980 
    981 	p = lxpr_lock(lxpnp->lxpr_pid);
    982 	if (p == NULL) {
    983 		lxpr_uiobuf_seterr(uiobuf, EINVAL);
    984 		return;
    985 	}
    986 
    987 	pid = p->p_pid;
    988 
    989 	/*
    990 	 * Set Linux defaults if we're the zone's init process
    991 	 */
    992 	if (pid == curproc->p_zone->zone_proc_initpid) {
    993 		pid = 1;		/* PID for init */
    994 		ppid = 0;		/* parent PID for init is 0 */
    995 		pgpid = 0;		/* process group for init is 0 */
    996 		psgid = (gid_t)-1;	/* credential GID for init is -1 */
    997 		spid = 0;		/* session id for init is 0 */
    998 		psdev = 0;		/* session device for init is 0 */
    999 	} else {
   1000 		/*
   1001 		 * Make sure not to reference parent PIDs that reside outside
   1002 		 * the zone
   1003 		 */
   1004 		ppid = ((p->p_flag & SZONETOP)
   1005 		    ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
   1006 
   1007 		/*
   1008 		 * Convert ppid to the Linux default of 1 if our parent is the
   1009 		 * zone's init process
   1010 		 */
   1011 		if (ppid == curproc->p_zone->zone_proc_initpid)
   1012 			ppid = 1;
   1013 
   1014 		pgpid = p->p_pgrp;
   1015 
   1016 		mutex_enter(&p->p_splock);
   1017 		mutex_enter(&p->p_sessp->s_lock);
   1018 		spid = p->p_sessp->s_sid;
   1019 		/* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */
   1020 		psdev = p->p_sessp->s_dev;
   1021 		if (p->p_sessp->s_cred)
   1022 			psgid = crgetgid(p->p_sessp->s_cred);
   1023 		else
   1024 			psgid = crgetgid(p->p_cred);
   1025 
   1026 		mutex_exit(&p->p_sessp->s_lock);
   1027 		mutex_exit(&p->p_splock);
   1028 	}
   1029 
   1030 	t = prchoose(p);
   1031 	if (t != NULL) {
   1032 		switch (t->t_state) {
   1033 		case TS_SLEEP:
   1034 			stat = 'S'; break;
   1035 		case TS_RUN:
   1036 		case TS_ONPROC:
   1037 			stat = 'R'; break;
   1038 		case TS_ZOMB:
   1039 			stat = 'Z'; break;
   1040 		case TS_STOPPED:
   1041 			stat = 'T'; break;
   1042 		default:
   1043 			stat = '!'; break;
   1044 		}
   1045 
   1046 		if (CL_DONICE(t, NULL, 0, &nice) != 0)
   1047 			nice = 0;
   1048 
   1049 		pri = v.v_maxsyspri - t->t_pri;
   1050 		wchan = t->t_wchan;
   1051 		cpu = t->t_cpu->cpu_seqid;
   1052 		thread_unlock(t);
   1053 	} else {
   1054 		/* Only zombies have no threads */
   1055 		stat = 'Z';
   1056 		nice = 0;
   1057 		pri = 0;
   1058 		wchan = 0;
   1059 		cpu = 0;
   1060 	}
   1061 	as = p->p_as;
   1062 	mutex_exit(&p->p_lock);
   1063 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
   1064 	vsize = as->a_resvsize;
   1065 	rss = rm_asrss(as);
   1066 	AS_LOCK_EXIT(as, &as->a_lock);
   1067 	mutex_enter(&p->p_lock);
   1068 
   1069 	lxpr_uiobuf_printf(uiobuf,
   1070 	    "%d (%s) %c %d %d %d %d %d "
   1071 	    "%lu %lu %lu %lu %lu "
   1072 	    "%lu %lu %ld %ld "
   1073 	    "%d %d "
   1074 	    "0 "
   1075 	    "%ld %lu "
   1076 	    "%lu %ld %llu "
   1077 	    "%lu %lu %u "
   1078 	    "%lu %lu "
   1079 	    "%lu %lu %lu %lu "
   1080 	    "%lu "
   1081 	    "%lu %lu "
   1082 	    "%d "
   1083 	    "%d"
   1084 	    "\n",
   1085 	    pid,
   1086 	    PTOU(p)->u_comm,
   1087 	    stat,
   1088 	    ppid, pgpid,
   1089 	    spid, psdev, psgid,
   1090 	    0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
   1091 	    p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
   1092 	    pri, nice,
   1093 	    0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */
   1094 	    vsize, rss, p->p_vmem_ctl,
   1095 	    0l, 0l, USRSTACK, /* startcode, endcode, startstack */
   1096 	    0l, 0l, /* kstkesp, kstkeip */
   1097 	    0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
   1098 	    wchan,
   1099 	    0l, 0l, /* nswap, cnswap */
   1100 	    0, /* exit_signal */
   1101 	    cpu);
   1102 
   1103 	lxpr_unlock(p);
   1104 }
   1105 
   1106 /* ARGSUSED */
   1107 static void
   1108 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1109 {
   1110 }
   1111 
   1112 /* ARGSUSED */
   1113 static void
   1114 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1115 {
   1116 	lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
   1117 	    "                             |  Transmit\n");
   1118 	lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
   1119 	    " frame compressed multicast|bytes    packets errs drop fifo"
   1120 	    " colls carrier compressed\n");
   1121 
   1122 	/*
   1123 	 * XXX: data about each interface should go here, but we'll wait to
   1124 	 * see if anybody wants to use it.
   1125 	 */
   1126 }
   1127 
   1128 /* ARGSUSED */
   1129 static void
   1130 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1131 {
   1132 }
   1133 
   1134 /* ARGSUSED */
   1135 static void
   1136 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1137 {
   1138 }
   1139 
   1140 /* ARGSUSED */
   1141 static void
   1142 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1143 {
   1144 }
   1145 
   1146 /* ARGSUSED */
   1147 static void
   1148 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1149 {
   1150 }
   1151 
   1152 /* ARGSUSED */
   1153 static void
   1154 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1155 {
   1156 }
   1157 
   1158 /* ARGSUSED */
   1159 static void
   1160 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1161 {
   1162 }
   1163 
   1164 /* ARGSUSED */
   1165 static void
   1166 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1167 {
   1168 }
   1169 
   1170 /* ARGSUSED */
   1171 static void
   1172 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1173 {
   1174 }
   1175 
   1176 /* ARGSUSED */
   1177 static void
   1178 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1179 {
   1180 }
   1181 
   1182 /* ARGSUSED */
   1183 static void
   1184 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1185 {
   1186 }
   1187 
   1188 /* ARGSUSED */
   1189 static void
   1190 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1191 {
   1192 }
   1193 
   1194 /* ARGSUSED */
   1195 static void
   1196 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1197 {
   1198 }
   1199 
   1200 /* ARGSUSED */
   1201 static void
   1202 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1203 {
   1204 }
   1205 
   1206 /* ARGSUSED */
   1207 static void
   1208 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1209 {
   1210 }
   1211 
   1212 /* ARGSUSED */
   1213 static void
   1214 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1215 {
   1216 }
   1217 
   1218 /* ARGSUSED */
   1219 static void
   1220 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1221 {
   1222 }
   1223 
   1224 /*
   1225  * lxpr_read_kmsg(): read the contents of the kernel message queue. We
   1226  * translate this into the reception of console messages for this lx zone; each
   1227  * read copies out a single zone console message, or blocks until the next one
   1228  * is produced.
   1229  */
   1230 
   1231 #define	LX_KMSG_PRI	"<0>"
   1232 
   1233 static void
   1234 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
   1235 {
   1236 	ldi_handle_t	lh = lxpnp->lxpr_cons_ldih;
   1237 	mblk_t		*mp;
   1238 
   1239 	if (ldi_getmsg(lh, &mp, NULL) == 0) {
   1240 		/*
   1241 		 * lx procfs doesn't like successive reads to the same file
   1242 		 * descriptor unless we do an explicit rewind each time.
   1243 		 */
   1244 		lxpr_uiobuf_seek(uiobuf, 0);
   1245 
   1246 		lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
   1247 		    mp->b_cont->b_rptr);
   1248 
   1249 		freemsg(mp);
   1250 	}
   1251 }
   1252 
   1253 /*
   1254  * lxpr_read_loadavg(): read the contents of the "loadavg" file.
   1255  *
   1256  * Just enough for uptime to work
   1257  */
   1258 extern int nthread;
   1259 
   1260 static void
   1261 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1262 {
   1263 	ulong_t avenrun1;
   1264 	ulong_t avenrun5;
   1265 	ulong_t avenrun15;
   1266 	ulong_t avenrun1_cs;
   1267 	ulong_t avenrun5_cs;
   1268 	ulong_t avenrun15_cs;
   1269 	int loadavg[3];
   1270 	int *loadbuf;
   1271 	cpupart_t *cp;
   1272 
   1273 	uint_t nrunnable = 0;
   1274 	rctl_qty_t nlwps;
   1275 
   1276 	ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
   1277 
   1278 	mutex_enter(&cpu_lock);
   1279 
   1280 	/*
   1281 	 * Need to add up values over all CPU partitions. If pools are active,
   1282 	 * only report the values of the zone's partition, which by definition
   1283 	 * includes the current CPU.
   1284 	 */
   1285 	if (pool_pset_enabled()) {
   1286 		psetid_t psetid = zone_pset_get(curproc->p_zone);
   1287 
   1288 		ASSERT(curproc->p_zone != &zone0);
   1289 		cp = CPU->cpu_part;
   1290 
   1291 		nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
   1292 		(void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
   1293 		loadbuf = &loadavg[0];
   1294 
   1295 		/*
   1296 		 * We'll report the total number of lwps in the zone for the
   1297 		 * "nproc" parameter of /proc/loadavg; good enough for lx.
   1298 		 */
   1299 		nlwps = curproc->p_zone->zone_nlwps;
   1300 	} else {
   1301 		cp = cp_list_head;
   1302 		do {
   1303 			nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
   1304 		} while ((cp = cp->cp_next) != cp_list_head);
   1305 
   1306 		loadbuf = &avenrun[0];
   1307 
   1308 		/*
   1309 		 * This will report kernel threads as well as user lwps, but it
   1310 		 * should be good enough for lx consumers.
   1311 		 */
   1312 		nlwps = nthread;
   1313 	}
   1314 
   1315 	mutex_exit(&cpu_lock);
   1316 
   1317 	avenrun1 = loadbuf[0] >> FSHIFT;
   1318 	avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
   1319 	avenrun5 = loadbuf[1] >> FSHIFT;
   1320 	avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
   1321 	avenrun15 = loadbuf[2] >> FSHIFT;
   1322 	avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
   1323 
   1324 	lxpr_uiobuf_printf(uiobuf,
   1325 	    "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
   1326 	    avenrun1, avenrun1_cs,
   1327 	    avenrun5, avenrun5_cs,
   1328 	    avenrun15, avenrun15_cs,
   1329 	    nrunnable, nlwps, 0);
   1330 }
   1331 
   1332 /*
   1333  * lxpr_read_meminfo(): read the contents of the "meminfo" file.
   1334  */
   1335 static void
   1336 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1337 {
   1338 	long total_mem = physmem * PAGESIZE;
   1339 	long free_mem = freemem * PAGESIZE;
   1340 	long total_swap = k_anoninfo.ani_max * PAGESIZE;
   1341 	long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
   1342 
   1343 	ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
   1344 
   1345 	lxpr_uiobuf_printf(uiobuf,
   1346 	    "        total:     used:    free:  shared: buffers:  cached:\n"
   1347 	    "Mem:  %8lu %8lu %8lu %8u %8u %8u\n"
   1348 	    "Swap: %8lu %8lu %8lu\n"
   1349 	    "MemTotal:  %8lu kB\n"
   1350 	    "MemFree:   %8lu kB\n"
   1351 	    "MemShared: %8u kB\n"
   1352 	    "Buffers:   %8u kB\n"
   1353 	    "Cached:    %8u kB\n"
   1354 	    "SwapCached:%8u kB\n"
   1355 	    "Active:    %8u kB\n"
   1356 	    "Inactive:  %8u kB\n"
   1357 	    "HighTotal: %8u kB\n"
   1358 	    "HighFree:  %8u kB\n"
   1359 	    "LowTotal:  %8u kB\n"
   1360 	    "LowFree:   %8u kB\n"
   1361 	    "SwapTotal: %8lu kB\n"
   1362 	    "SwapFree:  %8lu kB\n",
   1363 	    total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
   1364 	    total_swap, used_swap, total_swap - used_swap,
   1365 	    btok(total_mem),				/* MemTotal */
   1366 	    btok(free_mem),				/* MemFree */
   1367 	    0,						/* MemShared */
   1368 	    0,						/* Buffers */
   1369 	    0,						/* Cached */
   1370 	    0,						/* SwapCached */
   1371 	    0,						/* Active */
   1372 	    0,						/* Inactive */
   1373 	    0,						/* HighTotal */
   1374 	    0,						/* HighFree */
   1375 	    btok(total_mem),				/* LowTotal */
   1376 	    btok(free_mem),				/* LowFree */
   1377 	    btok(total_swap),				/* SwapTotal */
   1378 	    btok(total_swap - used_swap));		/* SwapFree */
   1379 }
   1380 
   1381 /*
   1382  * lxpr_read_mounts():
   1383  */
   1384 /* ARGSUSED */
   1385 static void
   1386 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1387 {
   1388 	struct vfs *vfsp;
   1389 	struct vfs *vfslist;
   1390 	zone_t *zone = LXPTOZ(lxpnp);
   1391 	struct print_data {
   1392 		refstr_t *vfs_mntpt;
   1393 		refstr_t *vfs_resource;
   1394 		uint_t vfs_flag;
   1395 		int vfs_fstype;
   1396 		struct print_data *next;
   1397 	} *print_head = NULL;
   1398 	struct print_data **print_tail = &print_head;
   1399 	struct print_data *printp;
   1400 
   1401 	vfs_list_read_lock();
   1402 
   1403 	if (zone == global_zone) {
   1404 		vfsp = vfslist = rootvfs;
   1405 	} else {
   1406 		vfsp = vfslist = zone->zone_vfslist;
   1407 		/*
   1408 		 * If the zone has a root entry, it will be the first in
   1409 		 * the list.  If it doesn't, we conjure one up.
   1410 		 */
   1411 		if (vfslist == NULL ||
   1412 		    strcmp(refstr_value(vfsp->vfs_mntpt),
   1413 		    zone->zone_rootpath) != 0) {
   1414 			struct vfs *tvfsp;
   1415 			/*
   1416 			 * The root of the zone is not a mount point.  The vfs
   1417 			 * we want to report is that of the zone's root vnode.
   1418 			 */
   1419 			tvfsp = zone->zone_rootvp->v_vfsp;
   1420 
   1421 			lxpr_uiobuf_printf(uiobuf,
   1422 			    "/ / %s %s 0 0\n",
   1423 			    vfssw[tvfsp->vfs_fstype].vsw_name,
   1424 			    tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
   1425 
   1426 		}
   1427 		if (vfslist == NULL) {
   1428 			vfs_list_unlock();
   1429 			return;
   1430 		}
   1431 	}
   1432 
   1433 	/*
   1434 	 * Later on we have to do a lookupname, which can end up causing
   1435 	 * another vfs_list_read_lock() to be called. Which can lead to a
   1436 	 * deadlock. To avoid this, we extract the data we need into a local
   1437 	 * list, then we can run this list without holding vfs_list_read_lock()
   1438 	 * We keep the list in the same order as the vfs_list
   1439 	 */
   1440 	do {
   1441 		/* Skip mounts we shouldn't show */
   1442 		if (vfsp->vfs_flag & VFS_NOMNTTAB) {
   1443 			goto nextfs;
   1444 		}
   1445 
   1446 		printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
   1447 		refstr_hold(vfsp->vfs_mntpt);
   1448 		printp->vfs_mntpt = vfsp->vfs_mntpt;
   1449 		refstr_hold(vfsp->vfs_resource);
   1450 		printp->vfs_resource = vfsp->vfs_resource;
   1451 		printp->vfs_flag = vfsp->vfs_flag;
   1452 		printp->vfs_fstype = vfsp->vfs_fstype;
   1453 		printp->next = NULL;
   1454 
   1455 		*print_tail = printp;
   1456 		print_tail = &printp->next;
   1457 
   1458 nextfs:
   1459 		vfsp = (zone == global_zone) ?
   1460 		    vfsp->vfs_next : vfsp->vfs_zone_next;
   1461 
   1462 	} while (vfsp != vfslist);
   1463 
   1464 	vfs_list_unlock();
   1465 
   1466 	/*
   1467 	 * now we can run through what we've extracted without holding
   1468 	 * vfs_list_read_lock()
   1469 	 */
   1470 	printp = print_head;
   1471 	while (printp != NULL) {
   1472 		struct print_data *printp_next;
   1473 		const char *resource;
   1474 		char *mntpt;
   1475 		struct vnode *vp;
   1476 		int error;
   1477 
   1478 		mntpt = (char *)refstr_value(printp->vfs_mntpt);
   1479 		resource = refstr_value(printp->vfs_resource);
   1480 
   1481 		if (mntpt != NULL && mntpt[0] != '\0')
   1482 			mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
   1483 		else
   1484 			mntpt = "-";
   1485 
   1486 		error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
   1487 
   1488 		if (error != 0)
   1489 			goto nextp;
   1490 
   1491 		if (!(vp->v_flag & VROOT)) {
   1492 			VN_RELE(vp);
   1493 			goto nextp;
   1494 		}
   1495 		VN_RELE(vp);
   1496 
   1497 		if (resource != NULL && resource[0] != '\0') {
   1498 			if (resource[0] == '/') {
   1499 				resource = ZONE_PATH_VISIBLE(resource, zone) ?
   1500 				    ZONE_PATH_TRANSLATE(resource, zone) :
   1501 				    mntpt;
   1502 			}
   1503 		} else {
   1504 			resource = "-";
   1505 		}
   1506 
   1507 		lxpr_uiobuf_printf(uiobuf,
   1508 		    "%s %s %s %s 0 0\n",
   1509 		    resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
   1510 		    printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
   1511 
   1512 nextp:
   1513 		printp_next = printp->next;
   1514 		refstr_rele(printp->vfs_mntpt);
   1515 		refstr_rele(printp->vfs_resource);
   1516 		kmem_free(printp, sizeof (*printp));
   1517 		printp = printp_next;
   1518 
   1519 	}
   1520 }
   1521 
   1522 /*
   1523  * lxpr_read_partitions():
   1524  *
   1525  * We don't support partitions in a local zone because it requires access to
   1526  * physical devices.  But we need to fake up enough of the file to show that we
   1527  * have no partitions.
   1528  */
   1529 /* ARGSUSED */
   1530 static void
   1531 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1532 {
   1533 	lxpr_uiobuf_printf(uiobuf,
   1534 	    "major minor  #blocks  name     rio rmerge rsect ruse "
   1535 	    "wio wmerge wsect wuse running use aveq\n\n");
   1536 }
   1537 
   1538 /*
   1539  * lxpr_read_version(): read the contents of the "version" file.
   1540  */
   1541 /* ARGSUSED */
   1542 static void
   1543 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1544 {
   1545 	char *vers;
   1546 	if (lx_get_zone_kern_version(LXPTOZ(lxpnp)) <= LX_KERN_2_4)
   1547 		vers = LX_UNAME_RELEASE_2_4;
   1548 	else
   1549 		vers = LX_UNAME_RELEASE_2_6;
   1550 
   1551 	lxpr_uiobuf_printf(uiobuf,
   1552 	    "%s version %s (%s version %d.%d.%d) "
   1553 	    "#%s SMP %s\n",
   1554 	    LX_UNAME_SYSNAME, vers,
   1555 #if defined(__GNUC__)
   1556 	    "gcc",
   1557 	    __GNUC__,
   1558 	    __GNUC_MINOR__,
   1559 	    __GNUC_PATCHLEVEL__,
   1560 #else
   1561 	    "Sun C",
   1562 	    __SUNPRO_C / 0x100,
   1563 	    (__SUNPRO_C & 0xff) / 0x10,
   1564 	    __SUNPRO_C & 0xf,
   1565 #endif
   1566 	    LX_UNAME_VERSION,
   1567 	    "00:00:00 00/00/00");
   1568 }
   1569 
   1570 
   1571 /*
   1572  * lxpr_read_stat(): read the contents of the "stat" file.
   1573  *
   1574  */
   1575 /* ARGSUSED */
   1576 
   1577 static void
   1578 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1579 {
   1580 	cpu_t *cp, *cpstart;
   1581 	int pools_enabled;
   1582 	ulong_t idle_cum = 0;
   1583 	ulong_t sys_cum  = 0;
   1584 	ulong_t user_cum = 0;
   1585 	ulong_t irq_cum = 0;
   1586 	uint_t cpu_nrunnable_cum = 0;
   1587 	uint_t w_io_cum = 0;
   1588 
   1589 	ulong_t pgpgin_cum    = 0;
   1590 	ulong_t pgpgout_cum   = 0;
   1591 	ulong_t pgswapout_cum = 0;
   1592 	ulong_t pgswapin_cum  = 0;
   1593 	ulong_t intr_cum = 0;
   1594 	ulong_t pswitch_cum = 0;
   1595 	ulong_t forks_cum = 0;
   1596 	hrtime_t msnsecs[NCMSTATES];
   1597 	int lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp));
   1598 	/* temporary variable since scalehrtime modifies data in place */
   1599 	hrtime_t tmptime;
   1600 
   1601 	ASSERT(lxpnp->lxpr_type == LXPR_STAT);
   1602 
   1603 	mutex_enter(&cpu_lock);
   1604 	pools_enabled = pool_pset_enabled();
   1605 
   1606 	/* Calculate cumulative stats */
   1607 	cp = cpstart = CPU;
   1608 	do {
   1609 		int i;
   1610 
   1611 		/*
   1612 		 * Don't count CPUs that aren't even in the system
   1613 		 * or aren't up yet.
   1614 		 */
   1615 		if ((cp->cpu_flags & CPU_EXISTS) == 0) {
   1616 			continue;
   1617 		}
   1618 
   1619 		get_cpu_mstate(cp, msnsecs);
   1620 
   1621 		idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
   1622 		sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
   1623 		user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
   1624 
   1625 		pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
   1626 		pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
   1627 		pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
   1628 		pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
   1629 
   1630 		if (lx_kern_version >= LX_KERN_2_6) {
   1631 			cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
   1632 			w_io_cum += CPU_STATS(cp, sys.iowait);
   1633 			for (i = 0; i < NCMSTATES; i++) {
   1634 				tmptime = cp->cpu_intracct[i];
   1635 				scalehrtime(&tmptime);
   1636 				irq_cum += NSEC_TO_TICK(tmptime);
   1637 			}
   1638 		}
   1639 
   1640 		for (i = 0; i < PIL_MAX; i++)
   1641 			intr_cum += CPU_STATS(cp, sys.intr[i]);
   1642 
   1643 		pswitch_cum += CPU_STATS(cp, sys.pswitch);
   1644 		forks_cum += CPU_STATS(cp, sys.sysfork);
   1645 		forks_cum += CPU_STATS(cp, sys.sysvfork);
   1646 
   1647 		if (pools_enabled)
   1648 			cp = cp->cpu_next_part;
   1649 		else
   1650 			cp = cp->cpu_next;
   1651 	} while (cp != cpstart);
   1652 
   1653 	if (lx_kern_version >= LX_KERN_2_6) {
   1654 		lxpr_uiobuf_printf(uiobuf,
   1655 		    "cpu %ld %ld %ld %ld %ld %ld %ld\n",
   1656 		    user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0);
   1657 	} else {
   1658 		lxpr_uiobuf_printf(uiobuf,
   1659 		    "cpu %ld %ld %ld %ld\n",
   1660 		    user_cum, 0, sys_cum, idle_cum);
   1661 	}
   1662 
   1663 	/* Do per processor stats */
   1664 	do {
   1665 		int i;
   1666 
   1667 		ulong_t idle_ticks;
   1668 		ulong_t sys_ticks;
   1669 		ulong_t user_ticks;
   1670 		ulong_t irq_ticks = 0;
   1671 
   1672 		/*
   1673 		 * Don't count CPUs that aren't even in the system
   1674 		 * or aren't up yet.
   1675 		 */
   1676 		if ((cp->cpu_flags & CPU_EXISTS) == 0) {
   1677 			continue;
   1678 		}
   1679 
   1680 		get_cpu_mstate(cp, msnsecs);
   1681 
   1682 		idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
   1683 		sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
   1684 		user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
   1685 
   1686 		if (lx_kern_version >= LX_KERN_2_6) {
   1687 			for (i = 0; i < NCMSTATES; i++) {
   1688 				tmptime = cp->cpu_intracct[i];
   1689 				scalehrtime(&tmptime);
   1690 				irq_ticks += NSEC_TO_TICK(tmptime);
   1691 			}
   1692 
   1693 			lxpr_uiobuf_printf(uiobuf,
   1694 			    "cpu%d %ld %ld %ld %ld %ld %ld %ld\n",
   1695 			    cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks,
   1696 			    0, irq_ticks, 0);
   1697 		} else {
   1698 			lxpr_uiobuf_printf(uiobuf,
   1699 			    "cpu%d %ld %ld %ld %ld\n",
   1700 			    cp->cpu_id,
   1701 			    user_ticks, 0, sys_ticks, idle_ticks);
   1702 		}
   1703 
   1704 		if (pools_enabled)
   1705 			cp = cp->cpu_next_part;
   1706 		else
   1707 			cp = cp->cpu_next;
   1708 	} while (cp != cpstart);
   1709 
   1710 	mutex_exit(&cpu_lock);
   1711 
   1712 	if (lx_kern_version >= LX_KERN_2_6) {
   1713 		lxpr_uiobuf_printf(uiobuf,
   1714 		    "page %lu %lu\n"
   1715 		    "swap %lu %lu\n"
   1716 		    "intr %lu\n"
   1717 		    "ctxt %lu\n"
   1718 		    "btime %lu\n"
   1719 		    "processes %lu\n"
   1720 		    "procs_running %lu\n"
   1721 		    "procs_blocked %lu\n",
   1722 		    pgpgin_cum, pgpgout_cum,
   1723 		    pgswapin_cum, pgswapout_cum,
   1724 		    intr_cum,
   1725 		    pswitch_cum,
   1726 		    boot_time,
   1727 		    forks_cum,
   1728 		    cpu_nrunnable_cum,
   1729 		    w_io_cum);
   1730 	} else {
   1731 		lxpr_uiobuf_printf(uiobuf,
   1732 		    "page %lu %lu\n"
   1733 		    "swap %lu %lu\n"
   1734 		    "intr %lu\n"
   1735 		    "ctxt %lu\n"
   1736 		    "btime %lu\n"
   1737 		    "processes %lu\n",
   1738 		    pgpgin_cum, pgpgout_cum,
   1739 		    pgswapin_cum, pgswapout_cum,
   1740 		    intr_cum,
   1741 		    pswitch_cum,
   1742 		    boot_time,
   1743 		    forks_cum);
   1744 	}
   1745 }
   1746 
   1747 
   1748 /*
   1749  * lxpr_read_uptime(): read the contents of the "uptime" file.
   1750  *
   1751  * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
   1752  * Use fixed point arithmetic to get 2 decimal places
   1753  */
   1754 /* ARGSUSED */
   1755 static void
   1756 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1757 {
   1758 	cpu_t *cp, *cpstart;
   1759 	int pools_enabled;
   1760 	ulong_t idle_cum = 0;
   1761 	ulong_t cpu_count = 0;
   1762 	ulong_t idle_s;
   1763 	ulong_t idle_cs;
   1764 	ulong_t up_s;
   1765 	ulong_t up_cs;
   1766 	hrtime_t birthtime;
   1767 	hrtime_t centi_sec = 10000000;  /* 10^7 */
   1768 
   1769 	ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
   1770 
   1771 	/* Calculate cumulative stats */
   1772 	mutex_enter(&cpu_lock);
   1773 	pools_enabled = pool_pset_enabled();
   1774 
   1775 	cp = cpstart = CPU;
   1776 	do {
   1777 		/*
   1778 		 * Don't count CPUs that aren't even in the system
   1779 		 * or aren't up yet.
   1780 		 */
   1781 		if ((cp->cpu_flags & CPU_EXISTS) == 0) {
   1782 			continue;
   1783 		}
   1784 
   1785 		idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
   1786 		idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
   1787 		cpu_count += 1;
   1788 
   1789 		if (pools_enabled)
   1790 			cp = cp->cpu_next_part;
   1791 		else
   1792 			cp = cp->cpu_next;
   1793 	} while (cp != cpstart);
   1794 	mutex_exit(&cpu_lock);
   1795 
   1796 	/* Getting the Zone zsched process startup time */
   1797 	birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
   1798 	up_cs = (gethrtime() - birthtime) / centi_sec;
   1799 	up_s = up_cs / 100;
   1800 	up_cs %= 100;
   1801 
   1802 	ASSERT(cpu_count > 0);
   1803 	idle_cum /= cpu_count;
   1804 	idle_s = idle_cum / hz;
   1805 	idle_cs = idle_cum % hz;
   1806 	idle_cs *= 100;
   1807 	idle_cs /= hz;
   1808 
   1809 	lxpr_uiobuf_printf(uiobuf,
   1810 	    "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
   1811 }
   1812 
   1813 static const char *amd_x_edx[] = {
   1814 	NULL,	NULL,	NULL,	NULL,
   1815 	NULL,	NULL,	NULL,	NULL,
   1816 	NULL,	NULL,	NULL,	"syscall",
   1817 	NULL,	NULL,	NULL,	NULL,
   1818 	NULL,	NULL,	NULL,	"mp",
   1819 	"nx",	NULL,	"mmxext", NULL,
   1820 	NULL,	NULL,	NULL,	NULL,
   1821 	NULL,	"lm",	"3dnowext", "3dnow"
   1822 };
   1823 
   1824 static const char *amd_x_ecx[] = {
   1825 	"lahf_lm", NULL, "svm", NULL,
   1826 	"altmovcr8"
   1827 };
   1828 
   1829 static const char *tm_x_edx[] = {
   1830 	"recovery", "longrun", NULL, "lrti"
   1831 };
   1832 
   1833 /*
   1834  * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
   1835  */
   1836 static const char *intc_x_edx[] = {
   1837 	NULL,	NULL,	NULL,	NULL,
   1838 	NULL,	NULL,	NULL,	NULL,
   1839 	NULL,	NULL,	NULL,	"syscall",
   1840 	NULL,	NULL,	NULL,	NULL,
   1841 	NULL,	NULL,	NULL,	NULL,
   1842 	"nx",	NULL,	NULL,   NULL,
   1843 	NULL,	NULL,	NULL,	NULL,
   1844 	NULL,	"lm",   NULL,   NULL
   1845 };
   1846 
   1847 static const char *intc_edx[] = {
   1848 	"fpu",	"vme",	"de",	"pse",
   1849 	"tsc",	"msr",	"pae",	"mce",
   1850 	"cx8",	"apic",	 NULL,	"sep",
   1851 	"mtrr",	"pge",	"mca",	"cmov",
   1852 	"pat",	"pse36", "pn",	"clflush",
   1853 	NULL,	"dts",	"acpi",	"mmx",
   1854 	"fxsr",	"sse",	"sse2",	"ss",
   1855 	"ht",	"tm",	"ia64",	"pbe"
   1856 };
   1857 
   1858 /*
   1859  * "sse3" on linux is called "pni" (Prescott New Instructions).
   1860  */
   1861 static const char *intc_ecx[] = {
   1862 	"pni",	NULL,	NULL, "monitor",
   1863 	"ds_cpl", NULL,	NULL, "est",
   1864 	"tm2",	NULL,	"cid", NULL,
   1865 	NULL,	"cx16",	"xtpr"
   1866 };
   1867 
   1868 static void
   1869 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   1870 {
   1871 	int i;
   1872 	uint32_t bits;
   1873 	cpu_t *cp, *cpstart;
   1874 	int pools_enabled;
   1875 	const char **fp;
   1876 	char brandstr[CPU_IDSTRLEN];
   1877 	struct cpuid_regs cpr;
   1878 	int maxeax;
   1879 	int std_ecx, std_edx, ext_ecx, ext_edx;
   1880 
   1881 	ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
   1882 
   1883 	mutex_enter(&cpu_lock);
   1884 	pools_enabled = pool_pset_enabled();
   1885 
   1886 	cp = cpstart = CPU;
   1887 	do {
   1888 		/*
   1889 		 * This returns the maximum eax value for standard cpuid
   1890 		 * functions in eax.
   1891 		 */
   1892 		cpr.cp_eax = 0;
   1893 		(void) cpuid_insn(cp, &cpr);
   1894 		maxeax = cpr.cp_eax;
   1895 
   1896 		/*
   1897 		 * Get standard x86 feature flags.
   1898 		 */
   1899 		cpr.cp_eax = 1;
   1900 		(void) cpuid_insn(cp, &cpr);
   1901 		std_ecx = cpr.cp_ecx;
   1902 		std_edx = cpr.cp_edx;
   1903 
   1904 		/*
   1905 		 * Now get extended feature flags.
   1906 		 */
   1907 		cpr.cp_eax = 0x80000001;
   1908 		(void) cpuid_insn(cp, &cpr);
   1909 		ext_ecx = cpr.cp_ecx;
   1910 		ext_edx = cpr.cp_edx;
   1911 
   1912 		(void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
   1913 
   1914 		lxpr_uiobuf_printf(uiobuf,
   1915 		    "processor\t: %d\n"
   1916 		    "vendor_id\t: %s\n"
   1917 		    "cpu family\t: %d\n"
   1918 		    "model\t\t: %d\n"
   1919 		    "model name\t: %s\n"
   1920 		    "stepping\t: %d\n"
   1921 		    "cpu MHz\t\t: %u.%03u\n",
   1922 		    cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
   1923 		    cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
   1924 		    (uint32_t)(cpu_freq_hz / 1000000),
   1925 		    ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
   1926 
   1927 		lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
   1928 		    getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
   1929 
   1930 		if (x86_feature & X86_HTT) {
   1931 			/*
   1932 			 * 'siblings' is used for HT-style threads
   1933 			 */
   1934 			lxpr_uiobuf_printf(uiobuf,
   1935 			    "physical id\t: %lu\n"
   1936 			    "siblings\t: %u\n",
   1937 			    pg_plat_hw_instance_id(cp, PGHW_CHIP),
   1938 			    cpuid_get_ncpu_per_chip(cp));
   1939 		}
   1940 
   1941 		/*
   1942 		 * Since we're relatively picky about running on older hardware,
   1943 		 * we can be somewhat cavalier about the answers to these ones.
   1944 		 *
   1945 		 * In fact, given the hardware we support, we just say:
   1946 		 *
   1947 		 *	fdiv_bug	: no	(if we're on a 64-bit kernel)
   1948 		 *	hlt_bug		: no
   1949 		 *	f00f_bug	: no
   1950 		 *	coma_bug	: no
   1951 		 *	wp		: yes	(write protect in supervsr mode)
   1952 		 */
   1953 		lxpr_uiobuf_printf(uiobuf,
   1954 		    "fdiv_bug\t: %s\n"
   1955 		    "hlt_bug \t: no\n"
   1956 		    "f00f_bug\t: no\n"
   1957 		    "coma_bug\t: no\n"
   1958 		    "fpu\t\t: %s\n"
   1959 		    "fpu_exception\t: %s\n"
   1960 		    "cpuid level\t: %d\n"
   1961 		    "flags\t\t:",
   1962 #if defined(__i386)
   1963 		    fpu_pentium_fdivbug ? "yes" : "no",
   1964 #else
   1965 		    "no",
   1966 #endif /* __i386 */
   1967 		    fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
   1968 		    maxeax);
   1969 
   1970 		for (bits = std_edx, fp = intc_edx, i = 0;
   1971 		    i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
   1972 			if ((bits & (1 << i)) != 0 && *fp)
   1973 				lxpr_uiobuf_printf(uiobuf, " %s", *fp);
   1974 
   1975 		/*
   1976 		 * name additional features where appropriate
   1977 		 */
   1978 		switch (x86_vendor) {
   1979 		case X86_VENDOR_Intel:
   1980 			for (bits = ext_edx, fp = intc_x_edx, i = 0;
   1981 			    i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
   1982 			    fp++, i++)
   1983 				if ((bits & (1 << i)) != 0 && *fp)
   1984 					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
   1985 			break;
   1986 
   1987 		case X86_VENDOR_AMD:
   1988 			for (bits = ext_edx, fp = amd_x_edx, i = 0;
   1989 			    i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
   1990 			    fp++, i++)
   1991 				if ((bits & (1 << i)) != 0 && *fp)
   1992 					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
   1993 
   1994 			for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
   1995 			    i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
   1996 			    fp++, i++)
   1997 				if ((bits & (1 << i)) != 0 && *fp)
   1998 					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
   1999 			break;
   2000 
   2001 		case X86_VENDOR_TM:
   2002 			for (bits = ext_edx, fp = tm_x_edx, i = 0;
   2003 			    i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
   2004 			    fp++, i++)
   2005 				if ((bits & (1 << i)) != 0 && *fp)
   2006 					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
   2007 			break;
   2008 		default:
   2009 			break;
   2010 		}
   2011 
   2012 		for (bits = std_ecx, fp = intc_ecx, i = 0;
   2013 		    i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
   2014 			if ((bits & (1 << i)) != 0 && *fp)
   2015 				lxpr_uiobuf_printf(uiobuf, " %s", *fp);
   2016 
   2017 		lxpr_uiobuf_printf(uiobuf, "\n\n");
   2018 
   2019 		if (pools_enabled)
   2020 			cp = cp->cpu_next_part;
   2021 		else
   2022 			cp = cp->cpu_next;
   2023 	} while (cp != cpstart);
   2024 
   2025 	mutex_exit(&cpu_lock);
   2026 }
   2027 
   2028 /* ARGSUSED */
   2029 static void
   2030 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
   2031 {
   2032 	ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
   2033 	lxpr_uiobuf_seterr(uiobuf, EFAULT);
   2034 }
   2035 
   2036 
   2037 
   2038 /*
   2039  * lxpr_getattr(): Vnode operation for VOP_GETATTR()
   2040  */
   2041 static int
   2042 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   2043     caller_context_t *ct)
   2044 {
   2045 	register lxpr_node_t *lxpnp = VTOLXP(vp);
   2046 	lxpr_nodetype_t type = lxpnp->lxpr_type;
   2047 	extern uint_t nproc;
   2048 	int error;
   2049 
   2050 	/*
   2051 	 * Return attributes of underlying vnode if ATTR_REAL
   2052 	 *
   2053 	 * but keep fd files with the symlink permissions
   2054 	 */
   2055 	if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
   2056 		vnode_t *rvp = lxpnp->lxpr_realvp;
   2057 
   2058 		/*
   2059 		 * withold attribute information to owner or root
   2060 		 */
   2061 		if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
   2062 			return (error);
   2063 		}
   2064 
   2065 		/*
   2066 		 * now its attributes
   2067 		 */
   2068 		if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
   2069 			return (error);
   2070 		}
   2071 
   2072 		/*
   2073 		 * if it's a file in lx /proc/pid/fd/xx then set its
   2074 		 * mode and keep it looking like a symlink
   2075 		 */
   2076 		if (type == LXPR_PID_FD_FD) {
   2077 			vap->va_mode = lxpnp->lxpr_mode;
   2078 			vap->va_type = vp->v_type;
   2079 			vap->va_size = 0;
   2080 			vap->va_nlink = 1;
   2081 		}
   2082 		return (0);
   2083 	}
   2084 
   2085 	/* Default attributes, that may be overridden below */
   2086 	bzero(vap, sizeof (*vap));
   2087 	vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
   2088 	vap->va_nlink = 1;
   2089 	vap->va_type = vp->v_type;
   2090 	vap->va_mode = lxpnp->lxpr_mode;
   2091 	vap->va_fsid = vp->v_vfsp->vfs_dev;
   2092 	vap->va_blksize = DEV_BSIZE;
   2093 	vap->va_uid = lxpnp->lxpr_uid;
   2094 	vap->va_gid = lxpnp->lxpr_gid;
   2095 	vap->va_nodeid = lxpnp->lxpr_ino;
   2096 
   2097 	switch (type) {
   2098 	case LXPR_PROCDIR:
   2099 		vap->va_nlink = nproc + 2 + PROCDIRFILES;
   2100 		vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
   2101 		break;
   2102 	case LXPR_PIDDIR:
   2103 		vap->va_nlink = PIDDIRFILES;
   2104 		vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
   2105 		break;
   2106 	case LXPR_SELF:
   2107 		vap->va_uid = crgetruid(curproc->p_cred);
   2108 		vap->va_gid = crgetrgid(curproc->p_cred);
   2109 		break;
   2110 	default:
   2111 		break;
   2112 	}
   2113 
   2114 	vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
   2115 	return (0);
   2116 }
   2117 
   2118 
   2119 /*
   2120  * lxpr_access(): Vnode operation for VOP_ACCESS()
   2121  */
   2122 static int
   2123 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
   2124 {
   2125 	lxpr_node_t *lxpnp = VTOLXP(vp);
   2126 	int shift = 0;
   2127 	proc_t *tp;
   2128 
   2129 	/* lx /proc is a read only file system */
   2130 	if (mode & VWRITE)
   2131 		return (EROFS);
   2132 
   2133 	/*
   2134 	 * If this is a restricted file, check access permissions.
   2135 	 */
   2136 	switch (lxpnp->lxpr_type) {
   2137 	case LXPR_PIDDIR:
   2138 		return (0);
   2139 	case LXPR_PID_CURDIR:
   2140 	case LXPR_PID_ENV:
   2141 	case LXPR_PID_EXE:
   2142 	case LXPR_PID_MAPS:
   2143 	case LXPR_PID_MEM:
   2144 	case LXPR_PID_ROOTDIR:
   2145 	case LXPR_PID_FDDIR:
   2146 	case LXPR_PID_FD_FD:
   2147 		if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
   2148 			return (ENOENT);
   2149 		if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
   2150 		    priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
   2151 			lxpr_unlock(tp);
   2152 			return (EACCES);
   2153 		}
   2154 		lxpr_unlock(tp);
   2155 	default:
   2156 		break;
   2157 	}
   2158 
   2159 	if (lxpnp->lxpr_realvp != NULL) {
   2160 		/*
   2161 		 * For these we use the underlying vnode's accessibility.
   2162 		 */
   2163 		return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
   2164 	}
   2165 
   2166 	/* If user is root allow access regardless of permission bits */
   2167 	if (secpolicy_proc_access(cr) == 0)
   2168 		return (0);
   2169 
   2170 	/*
   2171 	 * Access check is based on only
   2172 	 * one of owner, group, public.
   2173 	 * If not owner, then check group.
   2174 	 * If not a member of the group, then
   2175 	 * check public access.
   2176 	 */
   2177 	if (crgetuid(cr) != lxpnp->lxpr_uid) {
   2178 		shift += 3;
   2179 		if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
   2180 			shift += 3;
   2181 	}
   2182 
   2183 	mode &= ~(lxpnp->lxpr_mode << shift);
   2184 
   2185 	if (mode == 0)
   2186 		return (0);
   2187 
   2188 	return (EACCES);
   2189 }
   2190 
   2191 
   2192 
   2193 
   2194 /* ARGSUSED */
   2195 static vnode_t *
   2196 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
   2197 {
   2198 	return (NULL);
   2199 }
   2200 
   2201 
   2202 /*
   2203  * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
   2204  */
   2205 /* ARGSUSED */
   2206 static int
   2207 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
   2208 	int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
   2209 	int *direntflags, pathname_t *realpnp)
   2210 {
   2211 	lxpr_node_t *lxpnp = VTOLXP(dp);
   2212 	lxpr_nodetype_t type = lxpnp->lxpr_type;
   2213 	int error;
   2214 
   2215 	ASSERT(dp->v_type == VDIR);
   2216 	ASSERT(type >= 0 && type < LXPR_NFILES);
   2217 
   2218 	/*
   2219 	 * we should never get here because the lookup
   2220 	 * is done on the realvp for these nodes
   2221 	 */
   2222 	ASSERT(type != LXPR_PID_FD_FD &&
   2223 	    type != LXPR_PID_CURDIR &&
   2224 	    type != LXPR_PID_ROOTDIR);
   2225 
   2226 	/*
   2227 	 * restrict lookup permission to owner or root
   2228 	 */
   2229 	if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
   2230 		return (error);
   2231 	}
   2232 
   2233 	/*
   2234 	 * Just return the parent vnode
   2235 	 * if thats where we are trying to go
   2236 	 */
   2237 	if (strcmp(comp, "..") == 0) {
   2238 		VN_HOLD(lxpnp->lxpr_parent);
   2239 		*vpp = lxpnp->lxpr_parent;
   2240 		return (0);
   2241 	}
   2242 
   2243 	/*
   2244 	 * Special handling for directory searches
   2245 	 * Note: null component name is synonym for
   2246 	 * current directory being searched.
   2247 	 */
   2248 	if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
   2249 		VN_HOLD(dp);
   2250 		*vpp = dp;
   2251 		return (0);
   2252 	}
   2253 
   2254 	*vpp = (lxpr_lookup_function[type](dp, comp));
   2255 	return ((*vpp == NULL) ? ENOENT : 0);
   2256 }
   2257 
   2258 /*
   2259  * Do a sequential search on the given directory table
   2260  */
   2261 static vnode_t *
   2262 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
   2263     lxpr_dirent_t *dirtab, int dirtablen)
   2264 {
   2265 	lxpr_node_t *lxpnp;
   2266 	int count;
   2267 
   2268 	for (count = 0; count < dirtablen; count++) {
   2269 		if (strcmp(dirtab[count].d_name, comp) == 0) {
   2270 			lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
   2271 			dp = LXPTOV(lxpnp);
   2272 			ASSERT(dp != NULL);
   2273 			return (dp);
   2274 		}
   2275 	}
   2276 	return (NULL);
   2277 }
   2278 
   2279 
   2280 static vnode_t *
   2281 lxpr_lookup_piddir(vnode_t *dp, char *comp)
   2282 {
   2283 	proc_t *p;
   2284 
   2285 	ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
   2286 
   2287 	p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
   2288 	if (p == NULL)
   2289 		return (NULL);
   2290 
   2291 	dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
   2292 
   2293 	lxpr_unlock(p);
   2294 
   2295 	return (dp);
   2296 }
   2297 
   2298 
   2299 /*
   2300  * Lookup one of the process's open files.
   2301  */
   2302 static vnode_t *
   2303 lxpr_lookup_fddir(vnode_t *dp, char *comp)
   2304 {
   2305 	lxpr_node_t *dlxpnp = VTOLXP(dp);
   2306 	lxpr_node_t *lxpnp;
   2307 	vnode_t *vp = NULL;
   2308 	proc_t *p;
   2309 	file_t *fp;
   2310 	uint_t fd;
   2311 	int c;
   2312 	uf_entry_t *ufp;
   2313 	uf_info_t *fip;
   2314 
   2315 	ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
   2316 
   2317 	/*
   2318 	 * convert the string rendition of the filename
   2319 	 * to a file descriptor
   2320 	 */
   2321 	fd = 0;
   2322 	while ((c = *comp++) != '\0') {
   2323 		int ofd;
   2324 		if (c < '0' || c > '9')
   2325 			return (NULL);
   2326 
   2327 		ofd = fd;
   2328 		fd = 10*fd + c - '0';
   2329 		/* integer overflow */
   2330 		if (fd / 10 != ofd)
   2331 			return (NULL);
   2332 	}
   2333 
   2334 	/*
   2335 	 * get the proc to work with and lock it
   2336 	 */
   2337 	p = lxpr_lock(dlxpnp->lxpr_pid);
   2338 	if ((p == NULL))
   2339 		return (NULL);
   2340 
   2341 	/*
   2342 	 * If the process is a zombie or system process
   2343 	 * it can't have any open files.
   2344 	 */
   2345 	if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
   2346 		lxpr_unlock(p);
   2347 		return (NULL);
   2348 	}
   2349 
   2350 	/*
   2351 	 * get us a fresh node/vnode
   2352 	 */
   2353 	lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
   2354 
   2355 	/*
   2356 	 * get open file info
   2357 	 */
   2358 	fip = (&(p)->p_user.u_finfo);
   2359 	mutex_enter(&fip->fi_lock);
   2360 
   2361 	/*
   2362 	 * got the fd data so now done with this proc
   2363 	 */
   2364 	lxpr_unlock(p);
   2365 
   2366 	if (fd < fip->fi_nfiles) {
   2367 		UF_ENTER(ufp, fip, fd);
   2368 		/*
   2369 		 * ensure the fd is still kosher.
   2370 		 * it may have gone between the readdir and
   2371 		 * the lookup
   2372 		 */
   2373 		if (fip->fi_list[fd].uf_file == NULL) {
   2374 			mutex_exit(&fip->fi_lock);
   2375 			UF_EXIT(ufp);
   2376 			lxpr_freenode(lxpnp);
   2377 			return (NULL);
   2378 		}
   2379 
   2380 		if ((fp = ufp->uf_file) != NULL)
   2381 			vp = fp->f_vnode;
   2382 		UF_EXIT(ufp);
   2383 	}
   2384 	mutex_exit(&fip->fi_lock);
   2385 
   2386 	if (vp == NULL) {
   2387 		lxpr_freenode(lxpnp);
   2388 		return (NULL);
   2389 	} else {
   2390 		/*
   2391 		 * Fill in the lxpr_node so future references will
   2392 		 * be able to find the underlying vnode.
   2393 		 * The vnode is held on the realvp.
   2394 		 */
   2395 		lxpnp->lxpr_realvp = vp;
   2396 		VN_HOLD(lxpnp->lxpr_realvp);
   2397 	}
   2398 
   2399 	dp = LXPTOV(lxpnp);
   2400 	ASSERT(dp != NULL);
   2401 
   2402 	return (dp);
   2403 }
   2404 
   2405 
   2406 static vnode_t *
   2407 lxpr_lookup_netdir(vnode_t *dp, char *comp)
   2408 {
   2409 	ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
   2410 
   2411 	dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
   2412 
   2413 	return (dp);
   2414 }
   2415 
   2416 
   2417 static vnode_t *
   2418 lxpr_lookup_procdir(vnode_t *dp, char *comp)
   2419 {
   2420 	ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
   2421 
   2422 	/*
   2423 	 * We know all the names of files & dirs in our
   2424 	 * file system structure except those that are pid names.
   2425 	 * These change as pids are created/deleted etc.
   2426 	 * So just look for a number as the first char to see if we
   2427 	 * are we doing pid lookups?
   2428 	 *
   2429 	 * Don't need to check for "self" as it is implemented as a symlink
   2430 	 */
   2431 	if (*comp >= '0' && *comp <= '9') {
   2432 		pid_t pid = 0;
   2433 		lxpr_node_t *lxpnp = NULL;
   2434 		proc_t *p;
   2435 		int c;
   2436 
   2437 		while ((c = *comp++) != '\0')
   2438 			pid = 10*pid + c - '0';
   2439 
   2440 		/*
   2441 		 * Can't continue if the process is still loading
   2442 		 * or it doesn't really exist yet (or maybe it just died!)
   2443 		 */
   2444 		p = lxpr_lock(pid);
   2445 		if (p == NULL)
   2446 			return (NULL);
   2447 
   2448 		if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
   2449 			lxpr_unlock(p);
   2450 			return (NULL);
   2451 		}
   2452 
   2453 		/*
   2454 		 * allocate and fill in a new lx /proc node
   2455 		 */
   2456 		lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
   2457 
   2458 		lxpr_unlock(p);
   2459 
   2460 		dp = LXPTOV(lxpnp);
   2461 		ASSERT(dp != NULL);
   2462 
   2463 		return (dp);
   2464 
   2465 	}
   2466 
   2467 	/* Lookup fixed names */
   2468 	return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
   2469 }
   2470 
   2471 
   2472 
   2473 
   2474 /*
   2475  * lxpr_readdir(): Vnode operation for VOP_READDIR()
   2476  */
   2477 /* ARGSUSED */
   2478 static int
   2479 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
   2480 	caller_context_t *ct, int flags)
   2481 {
   2482 	lxpr_node_t *lxpnp = VTOLXP(dp);
   2483 	lxpr_nodetype_t type = lxpnp->lxpr_type;
   2484 	ssize_t uresid;
   2485 	off_t uoffset;
   2486 	int error;
   2487 
   2488 	ASSERT(dp->v_type == VDIR);
   2489 	ASSERT(type >= 0 && type < LXPR_NFILES);
   2490 
   2491 	/*
   2492 	 * we should never get here because the readdir
   2493 	 * is done on the realvp for these nodes
   2494 	 */
   2495 	ASSERT(type != LXPR_PID_FD_FD &&
   2496 	    type != LXPR_PID_CURDIR &&
   2497 	    type != LXPR_PID_ROOTDIR);
   2498 
   2499 	/*
   2500 	 * restrict readdir permission to owner or root
   2501 	 */
   2502 	if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
   2503 		return (error);
   2504 
   2505 	uoffset = uiop->uio_offset;
   2506 	uresid = uiop->uio_resid;
   2507 
   2508 	/* can't do negative reads */
   2509 	if (uoffset < 0 || uresid <= 0)
   2510 		return (EINVAL);
   2511 
   2512 	/* can't read directory entries that don't exist! */
   2513 	if (uoffset % LXPR_SDSIZE)
   2514 		return (ENOENT);
   2515 
   2516 	return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
   2517 }
   2518 
   2519 
   2520 /* ARGSUSED */
   2521 static int
   2522 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
   2523 {
   2524 	return (ENOTDIR);
   2525 }
   2526 
   2527 /*
   2528  * This has the common logic for returning directory entries
   2529  */
   2530 static int
   2531 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
   2532     lxpr_dirent_t *dirtab, int dirtablen)
   2533 {
   2534 	/* bp holds one dirent64 structure */
   2535 	longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
   2536 	dirent64_t *dirent = (dirent64_t *)bp;
   2537 	ssize_t oresid;	/* save a copy for testing later */
   2538 	ssize_t uresid;
   2539 
   2540 	oresid = uiop->uio_resid;
   2541 
   2542 	/* clear out the dirent buffer */
   2543 	bzero(bp, sizeof (bp));
   2544 
   2545 	/*
   2546 	 * Satisfy user request
   2547 	 */
   2548 	while ((uresid = uiop->uio_resid) > 0) {
   2549 		int dirindex;
   2550 		off_t uoffset;
   2551 		int reclen;
   2552 		int error;
   2553 
   2554 		uoffset = uiop->uio_offset;
   2555 		dirindex  = (uoffset / LXPR_SDSIZE) - 2;
   2556 
   2557 		if (uoffset == 0) {
   2558 
   2559 			dirent->d_ino = lxpnp->lxpr_ino;
   2560 			dirent->d_name[0] = '.';
   2561 			dirent->d_name[1] = '\0';
   2562 			reclen = DIRENT64_RECLEN(1);
   2563 
   2564 		} else if (uoffset == LXPR_SDSIZE) {
   2565 
   2566 			dirent->d_ino = lxpr_parentinode(lxpnp);
   2567 			dirent->d_name[0] = '.';
   2568 			dirent->d_name[1] = '.';
   2569 			dirent->d_name[2] = '\0';
   2570 			reclen = DIRENT64_RECLEN(2);
   2571 
   2572 		} else if (dirindex < dirtablen) {
   2573 			int slen = strlen(dirtab[dirindex].d_name);
   2574 
   2575 			dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
   2576 			    lxpnp->lxpr_pid, 0);
   2577 
   2578 			ASSERT(slen < LXPNSIZ);
   2579 			(void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
   2580 			reclen = DIRENT64_RECLEN(slen);
   2581 
   2582 		} else {
   2583 			/* Run out of table entries */
   2584 			if (eofp) {
   2585 				*eofp = 1;
   2586 			}
   2587 			return (0);
   2588 		}
   2589 
   2590 		dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
   2591 		dirent->d_reclen = (ushort_t)reclen;
   2592 
   2593 		/*
   2594 		 * if the size of the data to transfer is greater
   2595 		 * that that requested then we can't do it this transfer.
   2596 		 */
   2597 		if (reclen > uresid) {
   2598 			/*
   2599 			 * Error if no entries have been returned yet.
   2600 			 */
   2601 			if (uresid == oresid) {
   2602 				return (EINVAL);
   2603 			}
   2604 			break;
   2605 		}
   2606 
   2607 		/*
   2608 		 * uiomove() updates both uiop->uio_resid and
   2609 		 * uiop->uio_offset by the same amount.  But we want
   2610 		 * uiop->uio_offset to change in increments
   2611 		 * of LXPR_SDSIZE, which is different from the number of bytes
   2612 		 * being returned to the user.
   2613 		 * So we set uiop->uio_offset separately, ignoring what
   2614 		 * uiomove() does.
   2615 		 */
   2616 		if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) {
   2617 			return (error);
   2618 		}
   2619 
   2620 		uiop->uio_offset = uoffset + LXPR_SDSIZE;
   2621 	}
   2622 
   2623 	/* Have run out of space, but could have just done last table entry */
   2624 	if (eofp) {
   2625 		*eofp =
   2626 		    (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
   2627 	}
   2628 	return (0);
   2629 }
   2630 
   2631 
   2632 static int
   2633 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
   2634 {
   2635 	/* bp holds one dirent64 structure */
   2636 	longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
   2637 	dirent64_t *dirent = (dirent64_t *)bp;
   2638 	ssize_t oresid;	/* save a copy for testing later */
   2639 	ssize_t uresid;
   2640 	off_t uoffset;
   2641 	zoneid_t zoneid;
   2642 	pid_t pid;
   2643 	int error;
   2644 	int ceof;
   2645 
   2646 	ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
   2647 
   2648 	oresid = uiop->uio_resid;
   2649 	zoneid = LXPTOZ(lxpnp)->zone_id;
   2650 
   2651 	/*
   2652 	 * We return directory entries in the order:
   2653 	 * "." and ".." then the unique lx procfs files, then the
   2654 	 * directories corresponding to the running processes.
   2655 	 *
   2656 	 * This is a good order because it allows us to more easily
   2657 	 * keep track of where we are betwen calls to getdents().
   2658 	 * If the number of processes changes between calls then we
   2659 	 * can't lose track of where we are in the lx procfs files.
   2660 	 */
   2661 
   2662 	/* Do the fixed entries */
   2663 	error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
   2664 	    PROCDIRFILES);
   2665 
   2666 	/* Finished if we got an error or if we couldn't do all the table */
   2667 	if (error != 0 || ceof == 0)
   2668 		return (error);
   2669 
   2670 	/* clear out the dirent buffer */
   2671 	bzero(bp, sizeof (bp));
   2672 
   2673 	/* Do the process entries */
   2674 	while ((uresid = uiop->uio_resid) > 0) {
   2675 		proc_t *p;
   2676 		int len;
   2677 		int reclen;
   2678 		int i;
   2679 
   2680 		uoffset = uiop->uio_offset;
   2681 
   2682 		/*
   2683 		 * Stop when entire proc table has been examined.
   2684 		 */
   2685 		i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
   2686 		if (i >= v.v_proc) {
   2687 			/* Run out of table entries */
   2688 			if (eofp) {
   2689 				*eofp = 1;
   2690 			}
   2691 			return (0);
   2692 		}
   2693 		mutex_enter(&pidlock);
   2694 
   2695 		/*
   2696 		 * Skip indices for which there is no pid_entry, PIDs for
   2697 		 * which there is no corresponding process, a PID of 0,
   2698 		 * and anything the security policy doesn't allow
   2699 		 * us to look at.
   2700 		 */
   2701 		if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
   2702 		    p->p_pid == 0 ||
   2703 		    secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
   2704 			mutex_exit(&pidlock);
   2705 			goto next;
   2706 		}
   2707 		mutex_exit(&pidlock);
   2708 
   2709 		/*
   2710 		 * Convert pid to the Linux default of 1 if we're the zone's
   2711 		 * init process, otherwise use the value from the proc
   2712 		 * structure
   2713 		 */
   2714 		pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
   2715 		    p->p_pid : 1);
   2716 
   2717 		/*
   2718 		 * If this /proc was mounted in the global zone, view
   2719 		 * all procs; otherwise, only view zone member procs.
   2720 		 */
   2721 		if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
   2722 			goto next;
   2723 		}
   2724 
   2725 		ASSERT(p->p_stat != 0);
   2726 
   2727 		dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
   2728 		len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
   2729 		ASSERT(len < LXPNSIZ);
   2730 		reclen = DIRENT64_RECLEN(len);
   2731 
   2732 		dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
   2733 		dirent->d_reclen = (ushort_t)reclen;
   2734 
   2735 		/*
   2736 		 * if the size of the data to transfer is greater
   2737 		 * that that requested then we can't do it this transfer.
   2738 		 */
   2739 		if (reclen > uresid) {
   2740 			/*
   2741 			 * Error if no entries have been returned yet.
   2742 			 */
   2743 			if (uresid == oresid)
   2744 				return (EINVAL);
   2745 			break;
   2746 		}
   2747 
   2748 		/*
   2749 		 * uiomove() updates both uiop->uio_resid and
   2750 		 * uiop->uio_offset by the same amount.  But we want
   2751 		 * uiop->uio_offset to change in increments
   2752 		 * of LXPR_SDSIZE, which is different from the number of bytes
   2753 		 * being returned to the user.
   2754 		 * So we set uiop->uio_offset separately, in the
   2755 		 * increment of this for loop, ignoring what uiomove() does.
   2756 		 */
   2757 		if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
   2758 			return (error);
   2759 
   2760 next:
   2761 		uiop->uio_offset = uoffset + LXPR_SDSIZE;
   2762 	}
   2763 
   2764 	if (eofp)
   2765 		*eofp =
   2766 		    (uiop->uio_offset >=
   2767 		    ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
   2768 
   2769 	return (0);
   2770 }
   2771 
   2772 
   2773 static int
   2774 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
   2775 {
   2776 	proc_t *p;
   2777 
   2778 	ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
   2779 
   2780 	/* can't read its contents if it died */
   2781 	mutex_enter(&pidlock);
   2782 
   2783 	p = prfind((lxpnp->lxpr_pid == 1) ?
   2784 	    curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
   2785 
   2786 	if (p == NULL || p->p_stat == SIDL) {
   2787 		mutex_exit(&pidlock);
   2788 		return (ENOENT);
   2789 	}
   2790 	mutex_exit(&pidlock);
   2791 
   2792 	return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
   2793 }
   2794 
   2795 
   2796 static int
   2797 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
   2798 {
   2799 	ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
   2800 	return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
   2801 }
   2802 
   2803 
   2804 static int
   2805 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
   2806 {
   2807 	/* bp holds one dirent64 structure */
   2808 	longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
   2809 	dirent64_t *dirent = (dirent64_t *)bp;
   2810 	ssize_t oresid;	/* save a copy for testing later */
   2811 	ssize_t uresid;
   2812 	off_t uoffset;
   2813 	int error;
   2814 	int ceof;
   2815 	proc_t *p;
   2816 	int fddirsize;
   2817 	uf_info_t *fip;
   2818 
   2819 
   2820 	ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
   2821 
   2822 	oresid = uiop->uio_resid;
   2823 
   2824 	/* can't read its contents if it died */
   2825 	p = lxpr_lock(lxpnp->lxpr_pid);
   2826 	if (p == NULL)
   2827 		return (ENOENT);
   2828 
   2829 	/* Get open file info */
   2830 	fip = (&(p)->p_user.u_finfo);
   2831 
   2832 	if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
   2833 		fddirsize = 0;
   2834 	else
   2835 		fddirsize = fip->fi_nfiles;
   2836 
   2837 	mutex_enter(&fip->fi_lock);
   2838 	lxpr_unlock(p);
   2839 
   2840 	/* Do the fixed entries (in this case just "." & "..") */
   2841 	error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
   2842 
   2843 	/* Finished if we got an error or if we couldn't do all the table */
   2844 	if (error != 0 || ceof == 0)
   2845 		return (error);
   2846 
   2847 	/* clear out the dirent buffer */
   2848 	bzero(bp, sizeof (bp));
   2849 
   2850 	/*
   2851 	 * Loop until user's request is satisfied or until
   2852 	 * all file descriptors have been examined.
   2853 	 */
   2854 	for (; (uresid = uiop->uio_resid) > 0;
   2855 	    uiop->uio_offset = uoffset + LXPR_SDSIZE) {
   2856 		int reclen;
   2857 		int fd;
   2858 		int len;
   2859 
   2860 		uoffset = uiop->uio_offset;
   2861 
   2862 		/*
   2863 		 * Stop at the end of the fd list
   2864 		 */
   2865 		fd = (uoffset / LXPR_SDSIZE) - 2;
   2866 		if (fd >= fddirsize) {
   2867 			if (eofp) {
   2868 				*eofp = 1;
   2869 			}
   2870 			goto out;
   2871 		}
   2872 
   2873 		if (fip->fi_list[fd].uf_file == NULL)
   2874 			continue;
   2875 
   2876 		dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
   2877 		len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
   2878 		ASSERT(len < LXPNSIZ);
   2879 		reclen = DIRENT64_RECLEN(len);
   2880 
   2881 		dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
   2882 		dirent->d_reclen = (ushort_t)reclen;
   2883 
   2884 		if (reclen > uresid) {
   2885 			/*
   2886 			 * Error if no entries have been returned yet.
   2887 			 */
   2888 			if (uresid == oresid)
   2889 				error = EINVAL;
   2890 			goto out;
   2891 		}
   2892 
   2893 		if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
   2894 			goto out;
   2895 	}
   2896 
   2897 	if (eofp)
   2898 		*eofp =
   2899 		    (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
   2900 
   2901 out:
   2902 	mutex_exit(&fip->fi_lock);
   2903 	return (error);
   2904 }
   2905 
   2906 
   2907 /*
   2908  * lxpr_readlink(): Vnode operation for VOP_READLINK()
   2909  */
   2910 /* ARGSUSED */
   2911 static int
   2912 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
   2913 {
   2914 	char bp[MAXPATHLEN + 1];
   2915 	size_t buflen = sizeof (bp);
   2916 	lxpr_node_t *lxpnp = VTOLXP(vp);
   2917 	vnode_t *rvp = lxpnp->lxpr_realvp;
   2918 	pid_t pid;
   2919 	int error = 0;
   2920 
   2921 	/* must be a symbolic link file */
   2922 	if (vp->v_type != VLNK)
   2923 		return (EINVAL);
   2924 
   2925 	/* Try to produce a symlink name for anything that has a realvp */
   2926 	if (rvp != NULL) {
   2927 		if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
   2928 			return (error);
   2929 		if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
   2930 			return (error);
   2931 	} else {
   2932 		switch (lxpnp->lxpr_type) {
   2933 		case LXPR_SELF:
   2934 			/*
   2935 			 * Don't need to check result as every possible int
   2936 			 * will fit within MAXPATHLEN bytes
   2937 			 */
   2938 
   2939 			/*
   2940 			 * Convert pid to the Linux default of 1 if we're the
   2941 			 * zone's init process
   2942 			 */
   2943 			pid = ((curproc->p_pid !=
   2944 			    curproc->p_zone->zone_proc_initpid)
   2945 			    ? curproc->p_pid : 1);
   2946 
   2947 			(void) snprintf(bp, buflen, "%d", pid);
   2948 			break;
   2949 		case LXPR_PID_CURDIR:
   2950 		case LXPR_PID_ROOTDIR:
   2951 		case LXPR_PID_EXE:
   2952 			return (EACCES);
   2953 		default:
   2954 			/*
   2955 			 * Need to return error so that nothing thinks
   2956 			 * that the symlink is empty and hence "."
   2957 			 */
   2958 			return (EINVAL);
   2959 		}
   2960 	}
   2961 
   2962 	/* copy the link data to user space */
   2963 	return (uiomove(bp, strlen(bp), UIO_READ, uiop));
   2964 }
   2965 
   2966 
   2967 /*
   2968  * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
   2969  * Vnode is no longer referenced, deallocate the file
   2970  * and all its resources.
   2971  */
   2972 /* ARGSUSED */
   2973 static void
   2974 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   2975 {
   2976 	lxpr_freenode(VTOLXP(vp));
   2977 }
   2978 
   2979 
   2980 /*
   2981  * lxpr_sync(): Vnode operation for VOP_SYNC()
   2982  */
   2983 static int
   2984 lxpr_sync()
   2985 {
   2986 	/*
   2987 	 * nothing to sync but this
   2988 	 * function must never fail
   2989 	 */
   2990 	return (0);
   2991 }
   2992 
   2993 
   2994 /*
   2995  * lxpr_cmp(): Vnode operation for VOP_CMP()
   2996  */
   2997 static int
   2998 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
   2999 {
   3000 	vnode_t *rvp;
   3001 
   3002 	while (vn_matchops(vp1, lxpr_vnodeops) &&
   3003 	    (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL)
   3004 		vp1 = rvp;
   3005 	while (vn_matchops(vp2, lxpr_vnodeops) &&
   3006 	    (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL)
   3007 		vp2 = rvp;
   3008 	if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
   3009 		return (vp1 == vp2);
   3010 	return (VOP_CMP(vp1, vp2, ct));
   3011 }
   3012 
   3013 
   3014 /*
   3015  * lxpr_realvp(): Vnode operation for VOP_REALVP()
   3016  */
   3017 static int
   3018 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
   3019 {
   3020 	vnode_t *rvp;
   3021 
   3022 	if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
   3023 		vp = rvp;
   3024 		if (VOP_REALVP(vp, &rvp, ct) == 0)
   3025 			vp = rvp;
   3026 	}
   3027 
   3028 	*vpp = vp;
   3029 	return (0);
   3030 }
   3031