Home | History | Annotate | Download | only in sockfs
      1     0    stevel /*
      2     0    stevel  * CDDL HEADER START
      3     0    stevel  *
      4     0    stevel  * The contents of this file are subject to the terms of the
      5  1548   rshoaib  * Common Development and Distribution License (the "License").
      6  1548   rshoaib  * You may not use this file except in compliance with the License.
      7     0    stevel  *
      8     0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0    stevel  * or http://www.opensolaris.org/os/licensing.
     10     0    stevel  * See the License for the specific language governing permissions
     11     0    stevel  * and limitations under the License.
     12     0    stevel  *
     13     0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0    stevel  *
     19     0    stevel  * CDDL HEADER END
     20     0    stevel  */
     21  1548   rshoaib 
     22     0    stevel /*
     23  8778      Erik  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24     0    stevel  * Use is subject to license terms.
     25     0    stevel  */
     26     0    stevel 
     27     0    stevel #include <sys/types.h>
     28     0    stevel #include <sys/t_lock.h>
     29     0    stevel #include <sys/param.h>
     30     0    stevel #include <sys/systm.h>
     31     0    stevel #include <sys/buf.h>
     32     0    stevel #include <sys/conf.h>
     33     0    stevel #include <sys/cred.h>
     34     0    stevel #include <sys/kmem.h>
     35     0    stevel #include <sys/sysmacros.h>
     36     0    stevel #include <sys/vfs.h>
     37  3898       rsb #include <sys/vfs_opreg.h>
     38     0    stevel #include <sys/vnode.h>
     39     0    stevel #include <sys/debug.h>
     40     0    stevel #include <sys/errno.h>
     41     0    stevel #include <sys/time.h>
     42     0    stevel #include <sys/file.h>
     43     0    stevel #include <sys/open.h>
     44     0    stevel #include <sys/user.h>
     45     0    stevel #include <sys/termios.h>
     46     0    stevel #include <sys/stream.h>
     47     0    stevel #include <sys/strsubr.h>
     48     0    stevel #include <sys/strsun.h>
     49     0    stevel #include <sys/esunddi.h>
     50     0    stevel #include <sys/flock.h>
     51     0    stevel #include <sys/modctl.h>
     52     0    stevel #include <sys/cmn_err.h>
     53     0    stevel #include <sys/mkdev.h>
     54     0    stevel #include <sys/pathname.h>
     55     0    stevel #include <sys/ddi.h>
     56     0    stevel #include <sys/stat.h>
     57     0    stevel #include <sys/fs/snode.h>
     58     0    stevel #include <sys/fs/dv_node.h>
     59     0    stevel #include <sys/zone.h>
     60     0    stevel 
     61     0    stevel #include <sys/socket.h>
     62     0    stevel #include <sys/socketvar.h>
     63     0    stevel #include <netinet/in.h>
     64     0    stevel #include <sys/un.h>
     65     0    stevel 
     66     0    stevel #include <sys/ucred.h>
     67     0    stevel 
     68     0    stevel #include <sys/tiuser.h>
     69     0    stevel #define	_SUN_TPI_VERSION	2
     70     0    stevel #include <sys/tihdr.h>
     71     0    stevel 
     72     0    stevel #include <c2/audit.h>
     73     0    stevel 
     74     0    stevel #include <fs/sockfs/nl7c.h>
     75  8348      Eric #include <fs/sockfs/sockcommon.h>
     76  8348      Eric #include <fs/sockfs/socktpi.h>
     77  8348      Eric #include <fs/sockfs/socktpi_impl.h>
     78  9491    Anders #include <fs/sockfs/sodirect.h>
     79     0    stevel 
     80     0    stevel /*
     81     0    stevel  * Macros that operate on struct cmsghdr.
     82     0    stevel  * The CMSG_VALID macro does not assume that the last option buffer is padded.
     83     0    stevel  */
     84     0    stevel #define	CMSG_CONTENT(cmsg)	(&((cmsg)[1]))
     85     0    stevel #define	CMSG_CONTENTLEN(cmsg)	((cmsg)->cmsg_len - sizeof (struct cmsghdr))
     86     0    stevel #define	CMSG_VALID(cmsg, start, end)					\
     87     0    stevel 	(ISALIGNED_cmsghdr(cmsg) &&					\
     88     0    stevel 	((uintptr_t)(cmsg) >= (uintptr_t)(start)) &&			\
     89     0    stevel 	((uintptr_t)(cmsg) < (uintptr_t)(end)) &&			\
     90     0    stevel 	((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) &&	\
     91     0    stevel 	((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
     92     0    stevel #define	SO_LOCK_WAKEUP_TIME	3000	/* Wakeup time in milliseconds */
     93     0    stevel 
     94     0    stevel dev_t sockdev;	/* For fsid in getattr */
     95  8194      Jack int sockfs_defer_nl7c_init = 0;
     96     0    stevel 
     97     0    stevel struct socklist socklist;
     98     0    stevel 
     99  8348      Eric struct kmem_cache *socket_cache;
    100  8348      Eric 
    101     0    stevel static int sockfs_update(kstat_t *, int);
    102     0    stevel static int sockfs_snapshot(kstat_t *, void *, int);
    103  8348      Eric extern smod_info_t *sotpi_smod_create(void);
    104     0    stevel 
    105     0    stevel extern void sendfile_init();
    106     0    stevel 
    107     0    stevel extern void nl7c_init(void);
    108  8194      Jack 
    109  8194      Jack extern int modrootloaded;
    110     0    stevel 
    111     0    stevel #define	ADRSTRLEN (2 * sizeof (void *) + 1)
    112     0    stevel /*
    113     0    stevel  * kernel structure for passing the sockinfo data back up to the user.
    114     0    stevel  * the strings array allows us to convert AF_UNIX addresses into strings
    115     0    stevel  * with a common method regardless of which n-bit kernel we're running.
    116     0    stevel  */
    117     0    stevel struct k_sockinfo {
    118     0    stevel 	struct sockinfo	ks_si;
    119     0    stevel 	char		ks_straddr[3][ADRSTRLEN];
    120     0    stevel };
    121     0    stevel 
    122     0    stevel /*
    123     0    stevel  * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
    124     0    stevel  * Returns with the vnode held.
    125     0    stevel  */
    126  8348      Eric int
    127     0    stevel sogetvp(char *devpath, vnode_t **vpp, int uioflag)
    128     0    stevel {
    129     0    stevel 	struct snode *csp;
    130     0    stevel 	vnode_t *vp, *dvp;
    131     0    stevel 	major_t maj;
    132     0    stevel 	int error;
    133     0    stevel 
    134     0    stevel 	ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
    135  8348      Eric 
    136     0    stevel 	/*
    137     0    stevel 	 * Lookup the underlying filesystem vnode.
    138     0    stevel 	 */
    139     0    stevel 	error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
    140     0    stevel 	if (error)
    141     0    stevel 		return (error);
    142     0    stevel 
    143     0    stevel 	/* Check that it is the correct vnode */
    144     0    stevel 	if (vp->v_type != VCHR) {
    145     0    stevel 		VN_RELE(vp);
    146     0    stevel 		return (ENOTSOCK);
    147     0    stevel 	}
    148     0    stevel 
    149     0    stevel 	/*
    150     0    stevel 	 * If devpath went through devfs, the device should already
    151     0    stevel 	 * be configured. If devpath is a mknod file, however, we
    152     0    stevel 	 * need to make sure the device is properly configured.
    153     0    stevel 	 * To do this, we do something similar to spec_open()
    154     0    stevel 	 * except that we resolve to the minor/leaf level since
    155     0    stevel 	 * we need to return a vnode.
    156     0    stevel 	 */
    157     0    stevel 	csp = VTOS(VTOS(vp)->s_commonvp);
    158     0    stevel 	if (!(csp->s_flag & SDIPSET)) {
    159     0    stevel 		char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    160     0    stevel 		error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
    161     0    stevel 		if (error == 0)
    162     0    stevel 			error = devfs_lookupname(pathname, NULLVPP, &dvp);
    163     0    stevel 		VN_RELE(vp);
    164     0    stevel 		kmem_free(pathname, MAXPATHLEN);
    165     0    stevel 		if (error != 0)
    166     0    stevel 			return (ENXIO);
    167     0    stevel 		vp = dvp;	/* use the devfs vp */
    168     0    stevel 	}
    169     0    stevel 
    170     0    stevel 	/* device is configured at this point */
    171     0    stevel 	maj = getmajor(vp->v_rdev);
    172     0    stevel 	if (!STREAMSTAB(maj)) {
    173     0    stevel 		VN_RELE(vp);
    174     0    stevel 		return (ENOSTR);
    175     0    stevel 	}
    176     0    stevel 
    177     0    stevel 	*vpp = vp;
    178     0    stevel 	return (0);
    179     0    stevel }
    180     0    stevel 
    181     0    stevel /*
    182     0    stevel  * Update the accessed, updated, or changed times in an sonode
    183     0    stevel  * with the current time.
    184     0    stevel  *
    185     0    stevel  * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
    186     0    stevel  * attributes in a fstat call. (They return the current time and 0 for
    187     0    stevel  * all timestamps, respectively.) We maintain the current timestamps
    188     0    stevel  * here primarily so that should sockmod be popped the resulting
    189     0    stevel  * file descriptor will behave like a stream w.r.t. the timestamps.
    190     0    stevel  */
    191     0    stevel void
    192     0    stevel so_update_attrs(struct sonode *so, int flag)
    193     0    stevel {
    194     0    stevel 	time_t now = gethrestime_sec();
    195     0    stevel 
    196  8348      Eric 	if (SOCK_IS_NONSTR(so))
    197  8348      Eric 		return;
    198  8348      Eric 
    199     0    stevel 	mutex_enter(&so->so_lock);
    200     0    stevel 	so->so_flag |= flag;
    201     0    stevel 	if (flag & SOACC)
    202  8348      Eric 		SOTOTPI(so)->sti_atime = now;
    203     0    stevel 	if (flag & SOMOD)
    204  8348      Eric 		SOTOTPI(so)->sti_mtime = now;
    205     0    stevel 	mutex_exit(&so->so_lock);
    206     0    stevel }
    207     0    stevel 
    208  8348      Eric extern so_create_func_t sock_comm_create_function;
    209  8348      Eric extern so_destroy_func_t sock_comm_destroy_function;
    210     0    stevel /*
    211     0    stevel  * Init function called when sockfs is loaded.
    212     0    stevel  */
    213     0    stevel int
    214     0    stevel sockinit(int fstype, char *name)
    215     0    stevel {
    216     0    stevel 	static const fs_operation_def_t sock_vfsops_template[] = {
    217     0    stevel 		NULL, NULL
    218     0    stevel 	};
    219     0    stevel 	int error;
    220     0    stevel 	major_t dev;
    221     0    stevel 	char *err_str;
    222     0    stevel 
    223     0    stevel 	error = vfs_setfsops(fstype, sock_vfsops_template, NULL);
    224     0    stevel 	if (error != 0) {
    225  1548   rshoaib 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
    226  1548   rshoaib 		    "sockinit: bad vfs ops template");
    227     0    stevel 		return (error);
    228     0    stevel 	}
    229     0    stevel 
    230  8348      Eric 	error = vn_make_ops(name, socket_vnodeops_template,
    231  8348      Eric 	    &socket_vnodeops);
    232     0    stevel 	if (error != 0) {
    233  8348      Eric 		err_str = "sockinit: bad socket vnode ops template";
    234     0    stevel 		/* vn_make_ops() does not reset socktpi_vnodeops on failure. */
    235  8348      Eric 		socket_vnodeops = NULL;
    236     0    stevel 		goto failure;
    237     0    stevel 	}
    238     0    stevel 
    239  8348      Eric 	socket_cache = kmem_cache_create("socket_cache",
    240  8348      Eric 	    sizeof (struct sonode), 0, sonode_constructor,
    241  8348      Eric 	    sonode_destructor, NULL, NULL, NULL, 0);
    242  3422  nh145002 
    243  8348      Eric 	error = socktpi_init();
    244  6707    brutus 	if (error != 0) {
    245  6707    brutus 		err_str = NULL;
    246  6707    brutus 		goto failure;
    247  6707    brutus 	}
    248  6707    brutus 
    249  9491    Anders 	error = sod_init();
    250     0    stevel 	if (error != 0) {
    251     0    stevel 		err_str = NULL;
    252     0    stevel 		goto failure;
    253     0    stevel 	}
    254     0    stevel 
    255     0    stevel 	/*
    256  8348      Eric 	 * Set up the default create and destroy functions
    257     0    stevel 	 */
    258  8348      Eric 	sock_comm_create_function = socket_sonode_create;
    259  8348      Eric 	sock_comm_destroy_function = socket_sonode_destroy;
    260     0    stevel 
    261     0    stevel 	/*
    262     0    stevel 	 * Build initial list mapping socket parameters to vnode.
    263     0    stevel 	 */
    264  8348      Eric 	smod_init();
    265  8348      Eric 	smod_add(sotpi_smod_create());
    266  8348      Eric 
    267  8348      Eric 	sockparams_init();
    268     0    stevel 
    269     0    stevel 	/*
    270     0    stevel 	 * If sockets are needed before init runs /sbin/soconfig
    271     0    stevel 	 * it is possible to preload the sockparams list here using
    272     0    stevel 	 * calls like:
    273     0    stevel 	 *	sockconfig(1,2,3, "/dev/tcp", 0);
    274     0    stevel 	 */
    275     0    stevel 
    276     0    stevel 	/*
    277     0    stevel 	 * Create a unique dev_t for use in so_fsid.
    278     0    stevel 	 */
    279     0    stevel 
    280     0    stevel 	if ((dev = getudev()) == (major_t)-1)
    281     0    stevel 		dev = 0;
    282     0    stevel 	sockdev = makedevice(dev, 0);
    283     0    stevel 
    284     0    stevel 	mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
    285     0    stevel 	sendfile_init();
    286  8194      Jack 	if (!modrootloaded) {
    287  8194      Jack 		sockfs_defer_nl7c_init = 1;
    288  8194      Jack 	} else {
    289  8194      Jack 		nl7c_init();
    290  8194      Jack 	}
    291     0    stevel 
    292     0    stevel 	return (0);
    293     0    stevel 
    294     0    stevel failure:
    295     0    stevel 	(void) vfs_freevfsops_by_type(fstype);
    296  8348      Eric 	if (socket_vnodeops != NULL)
    297  8348      Eric 		vn_freevnodeops(socket_vnodeops);
    298     0    stevel 	if (err_str != NULL)
    299  1548   rshoaib 		zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
    300     0    stevel 	return (error);
    301     0    stevel }
    302     0    stevel 
    303     0    stevel /*
    304     0    stevel  * Caller must hold the mutex. Used to set SOLOCKED.
    305     0    stevel  */
    306     0    stevel void
    307     0    stevel so_lock_single(struct sonode *so)
    308     0    stevel {
    309     0    stevel 	ASSERT(MUTEX_HELD(&so->so_lock));
    310     0    stevel 
    311     0    stevel 	while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
    312     0    stevel 		so->so_flag |= SOWANT;
    313     0    stevel 		cv_wait_stop(&so->so_want_cv, &so->so_lock,
    314  5753       gww 		    SO_LOCK_WAKEUP_TIME);
    315     0    stevel 	}
    316     0    stevel 	so->so_flag |= SOLOCKED;
    317     0    stevel }
    318     0    stevel 
    319     0    stevel /*
    320     0    stevel  * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
    321     0    stevel  * Used to clear SOLOCKED or SOASYNC_UNBIND.
    322     0    stevel  */
    323     0    stevel void
    324     0    stevel so_unlock_single(struct sonode *so, int flag)
    325     0    stevel {
    326     0    stevel 	ASSERT(MUTEX_HELD(&so->so_lock));
    327     0    stevel 	ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
    328     0    stevel 	ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
    329     0    stevel 	ASSERT(so->so_flag & flag);
    330     0    stevel 	/*
    331  8348      Eric 	 * Process the T_DISCON_IND on sti_discon_ind_mp.
    332     0    stevel 	 *
    333     0    stevel 	 * Call to so_drain_discon_ind will result in so_lock
    334     0    stevel 	 * being dropped and re-acquired later.
    335     0    stevel 	 */
    336  8348      Eric 	if (!SOCK_IS_NONSTR(so)) {
    337  8348      Eric 		sotpi_info_t *sti = SOTOTPI(so);
    338  8348      Eric 
    339  8348      Eric 		if (sti->sti_discon_ind_mp != NULL)
    340  8348      Eric 			so_drain_discon_ind(so);
    341  8348      Eric 	}
    342     0    stevel 
    343     0    stevel 	if (so->so_flag & SOWANT)
    344     0    stevel 		cv_broadcast(&so->so_want_cv);
    345     0    stevel 	so->so_flag &= ~(SOWANT|flag);
    346     0    stevel }
    347     0    stevel 
    348     0    stevel /*
    349     0    stevel  * Caller must hold the mutex. Used to set SOREADLOCKED.
    350     0    stevel  * If the caller wants nonblocking behavior it should set fmode.
    351     0    stevel  */
    352     0    stevel int
    353     0    stevel so_lock_read(struct sonode *so, int fmode)
    354     0    stevel {
    355     0    stevel 	ASSERT(MUTEX_HELD(&so->so_lock));
    356     0    stevel 
    357     0    stevel 	while (so->so_flag & SOREADLOCKED) {
    358     0    stevel 		if (fmode & (FNDELAY|FNONBLOCK))
    359     0    stevel 			return (EWOULDBLOCK);
    360     0    stevel 		so->so_flag |= SOWANT;
    361     0    stevel 		cv_wait_stop(&so->so_want_cv, &so->so_lock,
    362  5753       gww 		    SO_LOCK_WAKEUP_TIME);
    363     0    stevel 	}
    364     0    stevel 	so->so_flag |= SOREADLOCKED;
    365     0    stevel 	return (0);
    366     0    stevel }
    367     0    stevel 
    368     0    stevel /*
    369     0    stevel  * Like so_lock_read above but allows signals.
    370     0    stevel  */
    371     0    stevel int
    372     0    stevel so_lock_read_intr(struct sonode *so, int fmode)
    373     0    stevel {
    374     0    stevel 	ASSERT(MUTEX_HELD(&so->so_lock));
    375     0    stevel 
    376     0    stevel 	while (so->so_flag & SOREADLOCKED) {
    377     0    stevel 		if (fmode & (FNDELAY|FNONBLOCK))
    378     0    stevel 			return (EWOULDBLOCK);
    379     0    stevel 		so->so_flag |= SOWANT;
    380     0    stevel 		if (!cv_wait_sig(&so->so_want_cv, &so->so_lock))
    381     0    stevel 			return (EINTR);
    382     0    stevel 	}
    383     0    stevel 	so->so_flag |= SOREADLOCKED;
    384     0    stevel 	return (0);
    385     0    stevel }
    386     0    stevel 
    387     0    stevel /*
    388     0    stevel  * Caller must hold the mutex. Used to clear SOREADLOCKED,
    389     0    stevel  * set in so_lock_read() or so_lock_read_intr().
    390     0    stevel  */
    391     0    stevel void
    392     0    stevel so_unlock_read(struct sonode *so)
    393     0    stevel {
    394     0    stevel 	ASSERT(MUTEX_HELD(&so->so_lock));
    395     0    stevel 	ASSERT(so->so_flag & SOREADLOCKED);
    396     0    stevel 
    397     0    stevel 	if (so->so_flag & SOWANT)
    398     0    stevel 		cv_broadcast(&so->so_want_cv);
    399     0    stevel 	so->so_flag &= ~(SOWANT|SOREADLOCKED);
    400     0    stevel }
    401     0    stevel 
    402     0    stevel /*
    403     0    stevel  * Verify that the specified offset falls within the mblk and
    404     0    stevel  * that the resulting pointer is aligned.
    405     0    stevel  * Returns NULL if not.
    406     0    stevel  */
    407     0    stevel void *
    408     0    stevel sogetoff(mblk_t *mp, t_uscalar_t offset,
    409     0    stevel     t_uscalar_t length, uint_t align_size)
    410     0    stevel {
    411     0    stevel 	uintptr_t ptr1, ptr2;
    412     0    stevel 
    413     0    stevel 	ASSERT(mp && mp->b_wptr >= mp->b_rptr);
    414     0    stevel 	ptr1 = (uintptr_t)mp->b_rptr + offset;
    415     0    stevel 	ptr2 = (uintptr_t)ptr1 + length;
    416     0    stevel 	if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
    417     0    stevel 		eprintline(0);
    418     0    stevel 		return (NULL);
    419     0    stevel 	}
    420     0    stevel 	if ((ptr1 & (align_size - 1)) != 0) {
    421     0    stevel 		eprintline(0);
    422     0    stevel 		return (NULL);
    423     0    stevel 	}
    424     0    stevel 	return ((void *)ptr1);
    425     0    stevel }
    426     0    stevel 
    427     0    stevel /*
    428     0    stevel  * Return the AF_UNIX underlying filesystem vnode matching a given name.
    429     0    stevel  * Makes sure the sending and the destination sonodes are compatible.
    430     0    stevel  * The vnode is returned held.
    431     0    stevel  *
    432     0    stevel  * The underlying filesystem VSOCK vnode has a v_stream pointer that
    433     0    stevel  * references the actual stream head (hence indirectly the actual sonode).
    434     0    stevel  */
    435     0    stevel static int
    436     0    stevel so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
    437     0    stevel 		vnode_t **vpp)
    438     0    stevel {
    439     0    stevel 	vnode_t		*vp;	/* Underlying filesystem vnode */
    440  7409       Ric 	vnode_t		*rvp;	/* real vnode */
    441     0    stevel 	vnode_t		*svp;	/* sockfs vnode */
    442     0    stevel 	struct sonode	*so2;
    443     0    stevel 	int		error;
    444     0    stevel 
    445  7242   rh87107 	dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
    446  7242   rh87107 	    soun->sun_path));
    447     0    stevel 
    448     0    stevel 	error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
    449     0    stevel 	if (error) {
    450     0    stevel 		eprintsoline(so, error);
    451     0    stevel 		return (error);
    452     0    stevel 	}
    453  7409       Ric 
    454  7409       Ric 	/*
    455  7409       Ric 	 * Traverse lofs mounts get the real vnode
    456  7409       Ric 	 */
    457  7409       Ric 	if (VOP_REALVP(vp, &rvp, NULL) == 0) {
    458  7409       Ric 		VN_HOLD(rvp);		/* hold the real vnode */
    459  7409       Ric 		VN_RELE(vp);		/* release hold from lookup */
    460  7409       Ric 		vp = rvp;
    461  7409       Ric 	}
    462  7409       Ric 
    463     0    stevel 	if (vp->v_type != VSOCK) {
    464     0    stevel 		error = ENOTSOCK;
    465     0    stevel 		eprintsoline(so, error);
    466     0    stevel 		goto done2;
    467     0    stevel 	}
    468     0    stevel 
    469     0    stevel 	if (checkaccess) {
    470     0    stevel 		/*
    471     0    stevel 		 * Check that we have permissions to access the destination
    472     0    stevel 		 * vnode. This check is not done in BSD but it is required
    473     0    stevel 		 * by X/Open.
    474     0    stevel 		 */
    475  5331       amw 		if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) {
    476     0    stevel 			eprintsoline(so, error);
    477     0    stevel 			goto done2;
    478     0    stevel 		}
    479     0    stevel 	}
    480     0    stevel 
    481     0    stevel 	/*
    482     0    stevel 	 * Check if the remote socket has been closed.
    483     0    stevel 	 *
    484     0    stevel 	 * Synchronize with vn_rele_stream by holding v_lock while traversing
    485     0    stevel 	 * v_stream->sd_vnode.
    486     0    stevel 	 */
    487     0    stevel 	mutex_enter(&vp->v_lock);
    488     0    stevel 	if (vp->v_stream == NULL) {
    489     0    stevel 		mutex_exit(&vp->v_lock);
    490     0    stevel 		if (so->so_type == SOCK_DGRAM)
    491     0    stevel 			error = EDESTADDRREQ;
    492     0    stevel 		else
    493     0    stevel 			error = ECONNREFUSED;
    494     0    stevel 
    495     0    stevel 		eprintsoline(so, error);
    496     0    stevel 		goto done2;
    497     0    stevel 	}
    498     0    stevel 	ASSERT(vp->v_stream->sd_vnode);
    499     0    stevel 	svp = vp->v_stream->sd_vnode;
    500     0    stevel 	/*
    501     0    stevel 	 * holding v_lock on underlying filesystem vnode and acquiring
    502     0    stevel 	 * it on sockfs vnode. Assumes that no code ever attempts to
    503     0    stevel 	 * acquire these locks in the reverse order.
    504     0    stevel 	 */
    505     0    stevel 	VN_HOLD(svp);
    506     0    stevel 	mutex_exit(&vp->v_lock);
    507     0    stevel 
    508     0    stevel 	if (svp->v_type != VSOCK) {
    509     0    stevel 		error = ENOTSOCK;
    510     0    stevel 		eprintsoline(so, error);
    511     0    stevel 		goto done;
    512     0    stevel 	}
    513     0    stevel 
    514     0    stevel 	so2 = VTOSO(svp);
    515     0    stevel 
    516     0    stevel 	if (so->so_type != so2->so_type) {
    517     0    stevel 		error = EPROTOTYPE;
    518     0    stevel 		eprintsoline(so, error);
    519     0    stevel 		goto done;
    520     0    stevel 	}
    521     0    stevel 
    522     0    stevel 	VN_RELE(svp);
    523     0    stevel 	*vpp = vp;
    524     0    stevel 	return (0);
    525     0    stevel 
    526     0    stevel done:
    527     0    stevel 	VN_RELE(svp);
    528     0    stevel done2:
    529     0    stevel 	VN_RELE(vp);
    530     0    stevel 	return (error);
    531     0    stevel }
    532     0    stevel 
    533     0    stevel /*
    534     0    stevel  * Verify peer address for connect and sendto/sendmsg.
    535     0    stevel  * Since sendto/sendmsg would not get synchronous errors from the transport
    536     0    stevel  * provider we have to do these ugly checks in the socket layer to
    537     0    stevel  * preserve compatibility with SunOS 4.X.
    538     0    stevel  */
    539     0    stevel int
    540     0    stevel so_addr_verify(struct sonode *so, const struct sockaddr *name,
    541     0    stevel     socklen_t namelen)
    542     0    stevel {
    543     0    stevel 	int		family;
    544     0    stevel 
    545  7240   rh87107 	dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
    546  7240   rh87107 	    (void *)so, (void *)name, namelen));
    547     0    stevel 
    548     0    stevel 	ASSERT(name != NULL);
    549     0    stevel 
    550     0    stevel 	family = so->so_family;
    551     0    stevel 	switch (family) {
    552     0    stevel 	case AF_INET:
    553     0    stevel 		if (name->sa_family != family) {
    554     0    stevel 			eprintsoline(so, EAFNOSUPPORT);
    555     0    stevel 			return (EAFNOSUPPORT);
    556     0    stevel 		}
    557     0    stevel 		if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
    558     0    stevel 			eprintsoline(so, EINVAL);
    559     0    stevel 			return (EINVAL);
    560     0    stevel 		}
    561     0    stevel 		break;
    562     0    stevel 	case AF_INET6: {
    563     0    stevel #ifdef DEBUG
    564     0    stevel 		struct sockaddr_in6 *sin6;
    565     0    stevel #endif /* DEBUG */
    566     0    stevel 
    567     0    stevel 		if (name->sa_family != family) {
    568     0    stevel 			eprintsoline(so, EAFNOSUPPORT);
    569     0    stevel 			return (EAFNOSUPPORT);
    570     0    stevel 		}
    571     0    stevel 		if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
    572     0    stevel 			eprintsoline(so, EINVAL);
    573     0    stevel 			return (EINVAL);
    574     0    stevel 		}
    575     0    stevel #ifdef DEBUG
    576     0    stevel 		/* Verify that apps don't forget to clear sin6_scope_id etc */
    577     0    stevel 		sin6 = (struct sockaddr_in6 *)name;
    578     0    stevel 		if (sin6->sin6_scope_id != 0 &&
    579     0    stevel 		    !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
    580  1548   rshoaib 			zcmn_err(getzoneid(), CE_WARN,
    581     0    stevel 			    "connect/send* with uninitialized sin6_scope_id "
    582     0    stevel 			    "(%d) on socket. Pid = %d\n",
    583     0    stevel 			    (int)sin6->sin6_scope_id, (int)curproc->p_pid);
    584     0    stevel 		}
    585     0    stevel #endif /* DEBUG */
    586     0    stevel 		break;
    587     0    stevel 	}
    588     0    stevel 	case AF_UNIX:
    589  8348      Eric 		if (SOTOTPI(so)->sti_faddr_noxlate) {
    590     0    stevel 			return (0);
    591     0    stevel 		}
    592     0    stevel 		if (namelen < (socklen_t)sizeof (short)) {
    593     0    stevel 			eprintsoline(so, ENOENT);
    594     0    stevel 			return (ENOENT);
    595     0    stevel 		}
    596     0    stevel 		if (name->sa_family != family) {
    597     0    stevel 			eprintsoline(so, EAFNOSUPPORT);
    598     0    stevel 			return (EAFNOSUPPORT);
    599     0    stevel 		}
    600     0    stevel 		/* MAXPATHLEN + soun_family + nul termination */
    601     0    stevel 		if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
    602     0    stevel 			eprintsoline(so, ENAMETOOLONG);
    603     0    stevel 			return (ENAMETOOLONG);
    604     0    stevel 		}
    605     0    stevel 
    606     0    stevel 		break;
    607     0    stevel 
    608     0    stevel 	default:
    609     0    stevel 		/*
    610     0    stevel 		 * Default is don't do any length or sa_family check
    611     0    stevel 		 * to allow non-sockaddr style addresses.
    612     0    stevel 		 */
    613     0    stevel 		break;
    614     0    stevel 	}
    615     0    stevel 
    616     0    stevel 	return (0);
    617     0    stevel }
    618     0    stevel 
    619     0    stevel 
    620     0    stevel /*
    621     0    stevel  * Translate an AF_UNIX sockaddr_un to the transport internal name.
    622     0    stevel  * Assumes caller has called so_addr_verify first.
    623     0    stevel  */
    624     0    stevel /*ARGSUSED*/
    625     0    stevel int
    626     0    stevel so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
    627     0    stevel     socklen_t namelen, int checkaccess,
    628     0    stevel     void **addrp, socklen_t *addrlenp)
    629     0    stevel {
    630     0    stevel 	int			error;
    631     0    stevel 	struct sockaddr_un	*soun;
    632     0    stevel 	vnode_t			*vp;
    633     0    stevel 	void			*addr;
    634     0    stevel 	socklen_t		addrlen;
    635  8348      Eric 	sotpi_info_t		*sti = SOTOTPI(so);
    636     0    stevel 
    637     0    stevel 	dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
    638  7240   rh87107 	    (void *)so, (void *)name, namelen, checkaccess));
    639     0    stevel 
    640     0    stevel 	ASSERT(name != NULL);
    641     0    stevel 	ASSERT(so->so_family == AF_UNIX);
    642  8348      Eric 	ASSERT(!sti->sti_faddr_noxlate);
    643     0    stevel 	ASSERT(namelen >= (socklen_t)sizeof (short));
    644     0    stevel 	ASSERT(name->sa_family == AF_UNIX);
    645     0    stevel 	soun = (struct sockaddr_un *)name;
    646     0    stevel 	/*
    647     0    stevel 	 * Lookup vnode for the specified path name and verify that
    648     0    stevel 	 * it is a socket.
    649     0    stevel 	 */
    650     0    stevel 	error = so_ux_lookup(so, soun, checkaccess, &vp);
    651     0    stevel 	if (error) {
    652     0    stevel 		eprintsoline(so, error);
    653     0    stevel 		return (error);
    654     0    stevel 	}
    655     0    stevel 	/*
    656     0    stevel 	 * Use the address of the peer vnode as the address to send
    657     0    stevel 	 * to. We release the peer vnode here. In case it has been
    658     0    stevel 	 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the
    659     0    stevel 	 * transport the message will get an error or be dropped.
    660     0    stevel 	 */
    661  8348      Eric 	sti->sti_ux_faddr.soua_vp = vp;
    662  8348      Eric 	sti->sti_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT;
    663  8348      Eric 	addr = &sti->sti_ux_faddr;
    664  8348      Eric 	addrlen = (socklen_t)sizeof (sti->sti_ux_faddr);
    665  7240   rh87107 	dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
    666  7240   rh87107 	    addrlen, (void *)vp));
    667     0    stevel 	VN_RELE(vp);
    668     0    stevel 	*addrp = addr;
    669     0    stevel 	*addrlenp = (socklen_t)addrlen;
    670     0    stevel 	return (0);
    671     0    stevel }
    672     0    stevel 
    673     0    stevel /*
    674     0    stevel  * Esballoc free function for messages that contain SO_FILEP option.
    675     0    stevel  * Decrement the reference count on the file pointers using closef.
    676     0    stevel  */
    677     0    stevel void
    678     0    stevel fdbuf_free(struct fdbuf *fdbuf)
    679     0    stevel {
    680     0    stevel 	int	i;
    681     0    stevel 	struct file *fp;
    682     0    stevel 
    683     0    stevel 	dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
    684     0    stevel 	for (i = 0; i < fdbuf->fd_numfd; i++) {
    685     0    stevel 		/*
    686     0    stevel 		 * We need pointer size alignment for fd_fds. On a LP64
    687     0    stevel 		 * kernel, the required alignment is 8 bytes while
    688     0    stevel 		 * the option headers and values are only 4 bytes
    689     0    stevel 		 * aligned. So its safer to do a bcopy compared to
    690     0    stevel 		 * assigning fdbuf->fd_fds[i] to fp.
    691     0    stevel 		 */
    692     0    stevel 		bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
    693  7240   rh87107 		dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
    694     0    stevel 		(void) closef(fp);
    695     0    stevel 	}
    696     0    stevel 	if (fdbuf->fd_ebuf != NULL)
    697     0    stevel 		kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
    698     0    stevel 	kmem_free(fdbuf, fdbuf->fd_size);
    699     0    stevel }
    700     0    stevel 
    701     0    stevel /*
    702   455      meem  * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
    703   455      meem  * Waits if memory is not available.
    704     0    stevel  */
    705     0    stevel mblk_t *
    706     0    stevel fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
    707     0    stevel {
    708   455      meem 	uchar_t	*buf;
    709     0    stevel 	mblk_t	*mp;
    710     0    stevel 
    711     0    stevel 	dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
    712     0    stevel 	buf = kmem_alloc(size, KM_SLEEP);
    713     0    stevel 	fdbuf->fd_ebuf = (caddr_t)buf;
    714     0    stevel 	fdbuf->fd_ebuflen = size;
    715     0    stevel 	fdbuf->fd_frtn.free_func = fdbuf_free;
    716     0    stevel 	fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
    717     0    stevel 
    718   455      meem 	mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
    719     0    stevel 	mp->b_datap->db_type = M_PROTO;
    720     0    stevel 	return (mp);
    721     0    stevel }
    722     0    stevel 
    723     0    stevel /*
    724     0    stevel  * Extract file descriptors from a fdbuf.
    725     0    stevel  * Return list in rights/rightslen.
    726     0    stevel  */
    727     0    stevel /*ARGSUSED*/
    728     0    stevel static int
    729     0    stevel fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen)
    730     0    stevel {
    731     0    stevel 	int	i, fd;
    732     0    stevel 	int	*rp;
    733     0    stevel 	struct file *fp;
    734     0    stevel 	int	numfd;
    735     0    stevel 
    736     0    stevel 	dprint(1, ("fdbuf_extract: %d fds, len %d\n",
    737  5753       gww 	    fdbuf->fd_numfd, rightslen));
    738     0    stevel 
    739     0    stevel 	numfd = fdbuf->fd_numfd;
    740     0    stevel 	ASSERT(rightslen == numfd * (int)sizeof (int));
    741     0    stevel 
    742     0    stevel 	/*
    743     0    stevel 	 * Allocate a file descriptor and increment the f_count.
    744     0    stevel 	 * The latter is needed since we always call fdbuf_free
    745     0    stevel 	 * which performs a closef.
    746     0    stevel 	 */
    747     0    stevel 	rp = (int *)rights;
    748     0    stevel 	for (i = 0; i < numfd; i++) {
    749     0    stevel 		if ((fd = ufalloc(0)) == -1)
    750     0    stevel 			goto cleanup;
    751     0    stevel 		/*
    752     0    stevel 		 * We need pointer size alignment for fd_fds. On a LP64
    753     0    stevel 		 * kernel, the required alignment is 8 bytes while
    754     0    stevel 		 * the option headers and values are only 4 bytes
    755     0    stevel 		 * aligned. So its safer to do a bcopy compared to
    756     0    stevel 		 * assigning fdbuf->fd_fds[i] to fp.
    757     0    stevel 		 */
    758     0    stevel 		bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
    759     0    stevel 		mutex_enter(&fp->f_tlock);
    760     0    stevel 		fp->f_count++;
    761     0    stevel 		mutex_exit(&fp->f_tlock);
    762     0    stevel 		setf(fd, fp);
    763     0    stevel 		*rp++ = fd;
    764     0    stevel 		if (audit_active)
    765     0    stevel 			audit_fdrecv(fd, fp);
    766     0    stevel 		dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
    767  7240   rh87107 		    i, fd, (void *)fp, fp->f_count));
    768     0    stevel 	}
    769     0    stevel 	return (0);
    770     0    stevel 
    771     0    stevel cleanup:
    772     0    stevel 	/*
    773     0    stevel 	 * Undo whatever partial work the loop above has done.
    774     0    stevel 	 */
    775     0    stevel 	{
    776     0    stevel 		int j;
    777     0    stevel 
    778     0    stevel 		rp = (int *)rights;
    779     0    stevel 		for (j = 0; j < i; j++) {
    780     0    stevel 			dprint(0,
    781     0    stevel 			    ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
    782     0    stevel 			(void) closeandsetf(*rp++, NULL);
    783     0    stevel 		}
    784     0    stevel 	}
    785     0    stevel 
    786     0    stevel 	return (EMFILE);
    787     0    stevel }
    788     0    stevel 
    789     0    stevel /*
    790     0    stevel  * Insert file descriptors into an fdbuf.
    791     0    stevel  * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
    792     0    stevel  * by calling fdbuf_free().
    793     0    stevel  */
    794     0    stevel int
    795     0    stevel fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
    796     0    stevel {
    797     0    stevel 	int		numfd, i;
    798     0    stevel 	int		*fds;
    799     0    stevel 	struct file	*fp;
    800     0    stevel 	struct fdbuf	*fdbuf;
    801     0    stevel 	int		fdbufsize;
    802     0    stevel 
    803     0    stevel 	dprint(1, ("fdbuf_create: len %d\n", rightslen));
    804     0    stevel 
    805     0    stevel 	numfd = rightslen / (int)sizeof (int);
    806     0    stevel 
    807     0    stevel 	fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
    808     0    stevel 	fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
    809     0    stevel 	fdbuf->fd_size = fdbufsize;
    810     0    stevel 	fdbuf->fd_numfd = 0;
    811     0    stevel 	fdbuf->fd_ebuf = NULL;
    812     0    stevel 	fdbuf->fd_ebuflen = 0;
    813     0    stevel 	fds = (int *)rights;
    814     0    stevel 	for (i = 0; i < numfd; i++) {
    815     0    stevel 		if ((fp = getf(fds[i])) == NULL) {
    816     0    stevel 			fdbuf_free(fdbuf);
    817     0    stevel 			return (EBADF);
    818     0    stevel 		}
    819     0    stevel 		dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
    820  7240   rh87107 		    i, fds[i], (void *)fp, fp->f_count));
    821     0    stevel 		mutex_enter(&fp->f_tlock);
    822     0    stevel 		fp->f_count++;
    823     0    stevel 		mutex_exit(&fp->f_tlock);
    824     0    stevel 		/*
    825     0    stevel 		 * The maximum alignment for fdbuf (or any option header
    826     0    stevel 		 * and its value) it 4 bytes. On a LP64 kernel, the alignment
    827     0    stevel 		 * is not sufficient for pointers (fd_fds in this case). Since
    828     0    stevel 		 * we just did a kmem_alloc (we get a double word alignment),
    829     0    stevel 		 * we don't need to do anything on the send side (we loose
    830     0    stevel 		 * the double word alignment because fdbuf goes after an
    831     0    stevel 		 * option header (eg T_unitdata_req) which is only 4 byte
    832     0    stevel 		 * aligned). We take care of this when we extract the file
    833     0    stevel 		 * descriptor in fdbuf_extract or fdbuf_free.
    834     0    stevel 		 */
    835     0    stevel 		fdbuf->fd_fds[i] = fp;
    836     0    stevel 		fdbuf->fd_numfd++;
    837     0    stevel 		releasef(fds[i]);
    838     0    stevel 		if (audit_active)
    839     0    stevel 			audit_fdsend(fds[i], fp, 0);
    840     0    stevel 	}
    841     0    stevel 	*fdbufp = fdbuf;
    842     0    stevel 	return (0);
    843     0    stevel }
    844     0    stevel 
    845     0    stevel static int
    846     0    stevel fdbuf_optlen(int rightslen)
    847     0    stevel {
    848     0    stevel 	int numfd;
    849     0    stevel 
    850     0    stevel 	numfd = rightslen / (int)sizeof (int);
    851     0    stevel 
    852     0    stevel 	return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
    853     0    stevel }
    854     0    stevel 
    855     0    stevel static t_uscalar_t
    856     0    stevel fdbuf_cmsglen(int fdbuflen)
    857     0    stevel {
    858     0    stevel 	return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
    859     0    stevel 	    (int)sizeof (struct file *) * (int)sizeof (int));
    860     0    stevel }
    861     0    stevel 
    862     0    stevel 
    863     0    stevel /*
    864     0    stevel  * Return non-zero if the mblk and fdbuf are consistent.
    865     0    stevel  */
    866     0    stevel static int
    867     0    stevel fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
    868     0    stevel {
    869     0    stevel 	if (fdbuflen >= FDBUF_HDRSIZE &&
    870     0    stevel 	    fdbuflen == fdbuf->fd_size) {
    871     0    stevel 		frtn_t *frp = mp->b_datap->db_frtnp;
    872     0    stevel 		/*
    873     0    stevel 		 * Check that the SO_FILEP portion of the
    874     0    stevel 		 * message has not been modified by
    875     0    stevel 		 * the loopback transport. The sending sockfs generates
    876     0    stevel 		 * a message that is esballoc'ed with the free function
    877     0    stevel 		 * being fdbuf_free() and where free_arg contains the
    878     0    stevel 		 * identical information as the SO_FILEP content.
    879     0    stevel 		 *
    880     0    stevel 		 * If any of these constraints are not satisfied we
    881     0    stevel 		 * silently ignore the option.
    882     0    stevel 		 */
    883     0    stevel 		ASSERT(mp);
    884     0    stevel 		if (frp != NULL &&
    885     0    stevel 		    frp->free_func == fdbuf_free &&
    886     0    stevel 		    frp->free_arg != NULL &&
    887     0    stevel 		    bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
    888     0    stevel 			dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
    889  7240   rh87107 			    (void *)fdbuf, fdbuflen));
    890     0    stevel 			return (1);
    891     0    stevel 		} else {
    892  1548   rshoaib 			zcmn_err(getzoneid(), CE_WARN,
    893     0    stevel 			    "sockfs: mismatched fdbuf content (%p)",
    894     0    stevel 			    (void *)mp);
    895     0    stevel 			return (0);
    896     0    stevel 		}
    897     0    stevel 	} else {
    898  1548   rshoaib 		zcmn_err(getzoneid(), CE_WARN,
    899     0    stevel 		    "sockfs: mismatched fdbuf len %d, %d\n",
    900     0    stevel 		    fdbuflen, fdbuf->fd_size);
    901     0    stevel 		return (0);
    902     0    stevel 	}
    903     0    stevel }
    904     0    stevel 
    905     0    stevel /*
    906     0    stevel  * When the file descriptors returned by sorecvmsg can not be passed
    907     0    stevel  * to the application this routine will cleanup the references on
    908     0    stevel  * the files. Start at startoff bytes into the buffer.
    909     0    stevel  */
    910     0    stevel static void
    911     0    stevel close_fds(void *fdbuf, int fdbuflen, int startoff)
    912     0    stevel {
    913     0    stevel 	int *fds = (int *)fdbuf;
    914     0    stevel 	int numfd = fdbuflen / (int)sizeof (int);
    915     0    stevel 	int i;
    916     0    stevel 
    917     0    stevel 	dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
    918     0    stevel 
    919     0    stevel 	for (i = 0; i < numfd; i++) {
    920     0    stevel 		if (startoff < 0)
    921     0    stevel 			startoff = 0;
    922     0    stevel 		if (startoff < (int)sizeof (int)) {
    923     0    stevel 			/*
    924     0    stevel 			 * This file descriptor is partially or fully after
    925     0    stevel 			 * the offset
    926     0    stevel 			 */
    927     0    stevel 			dprint(0,
    928     0    stevel 			    ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
    929     0    stevel 			(void) closeandsetf(fds[i], NULL);
    930     0    stevel 		}
    931     0    stevel 		startoff -= (int)sizeof (int);
    932     0    stevel 	}
    933     0    stevel }
    934     0    stevel 
    935     0    stevel /*
    936     0    stevel  * Close all file descriptors contained in the control part starting at
    937     0    stevel  * the startoffset.
    938     0    stevel  */
    939     0    stevel void
    940     0    stevel so_closefds(void *control, t_uscalar_t controllen, int oldflg,
    941     0    stevel     int startoff)
    942     0    stevel {
    943     0    stevel 	struct cmsghdr *cmsg;
    944     0    stevel 
    945     0    stevel 	if (control == NULL)
    946     0    stevel 		return;
    947     0    stevel 
    948     0    stevel 	if (oldflg) {
    949     0    stevel 		close_fds(control, controllen, startoff);
    950     0    stevel 		return;
    951     0    stevel 	}
    952     0    stevel 	/* Scan control part for file descriptors. */
    953     0    stevel 	for (cmsg = (struct cmsghdr *)control;
    954     0    stevel 	    CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
    955     0    stevel 	    cmsg = CMSG_NEXT(cmsg)) {
    956     0    stevel 		if (cmsg->cmsg_level == SOL_SOCKET &&
    957     0    stevel 		    cmsg->cmsg_type == SCM_RIGHTS) {
    958     0    stevel 			close_fds(CMSG_CONTENT(cmsg),
    959     0    stevel 			    (int)CMSG_CONTENTLEN(cmsg),
    960     0    stevel 			    startoff - (int)sizeof (struct cmsghdr));
    961     0    stevel 		}
    962     0    stevel 		startoff -= cmsg->cmsg_len;
    963     0    stevel 	}
    964     0    stevel }
    965     0    stevel 
    966     0    stevel /*
    967     0    stevel  * Returns a pointer/length for the file descriptors contained
    968     0    stevel  * in the control buffer. Returns with *fdlenp == -1 if there are no
    969     0    stevel  * file descriptor options present. This is different than there being
    970     0    stevel  * a zero-length file descriptor option.
    971     0    stevel  * Fail if there are multiple SCM_RIGHT cmsgs.
    972     0    stevel  */
    973     0    stevel int
    974     0    stevel so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
    975     0    stevel     void **fdsp, int *fdlenp)
    976     0    stevel {
    977     0    stevel 	struct cmsghdr *cmsg;
    978     0    stevel 	void *fds;
    979     0    stevel 	int fdlen;
    980     0    stevel 
    981     0    stevel 	if (control == NULL) {
    982     0    stevel 		*fdsp = NULL;
    983     0    stevel 		*fdlenp = -1;
    984     0    stevel 		return (0);
    985     0    stevel 	}
    986     0    stevel 
    987     0    stevel 	if (oldflg) {
    988     0    stevel 		*fdsp = control;
    989     0    stevel 		if (controllen == 0)
    990     0    stevel 			*fdlenp = -1;
    991     0    stevel 		else
    992     0    stevel 			*fdlenp = controllen;
    993     0    stevel 		dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
    994     0    stevel 		return (0);
    995     0    stevel 	}
    996     0    stevel 
    997     0    stevel 	fds = NULL;
    998     0    stevel 	fdlen = 0;
    999     0    stevel 
   1000     0    stevel 	for (cmsg = (struct cmsghdr *)control;
   1001     0    stevel 	    CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
   1002     0    stevel 	    cmsg = CMSG_NEXT(cmsg)) {
   1003     0    stevel 		if (cmsg->cmsg_level == SOL_SOCKET &&
   1004     0    stevel 		    cmsg->cmsg_type == SCM_RIGHTS) {
   1005     0    stevel 			if (fds != NULL)
   1006     0    stevel 				return (EINVAL);
   1007     0    stevel 			fds = CMSG_CONTENT(cmsg);
   1008     0    stevel 			fdlen = (int)CMSG_CONTENTLEN(cmsg);
   1009   408    krgopi 			dprint(1, ("so_getfdopt: new %lu\n",
   1010  5753       gww 			    (size_t)CMSG_CONTENTLEN(cmsg)));
   1011     0    stevel 		}
   1012     0    stevel 	}
   1013     0    stevel 	if (fds == NULL) {
   1014     0    stevel 		dprint(1, ("so_getfdopt: NONE\n"));
   1015     0    stevel 		*fdlenp = -1;
   1016     0    stevel 	} else
   1017     0    stevel 		*fdlenp = fdlen;
   1018     0    stevel 	*fdsp = fds;
   1019     0    stevel 	return (0);
   1020     0    stevel }
   1021     0    stevel 
   1022     0    stevel /*
   1023     0    stevel  * Return the length of the options including any file descriptor options.
   1024     0    stevel  */
   1025     0    stevel t_uscalar_t
   1026     0    stevel so_optlen(void *control, t_uscalar_t controllen, int oldflg)
   1027     0    stevel {
   1028     0    stevel 	struct cmsghdr *cmsg;
   1029     0    stevel 	t_uscalar_t optlen = 0;
   1030     0    stevel 	t_uscalar_t len;
   1031     0    stevel 
   1032     0    stevel 	if (control == NULL)
   1033     0    stevel 		return (0);
   1034     0    stevel 
   1035     0    stevel 	if (oldflg)
   1036     0    stevel 		return ((t_uscalar_t)(sizeof (struct T_opthdr) +
   1037     0    stevel 		    fdbuf_optlen(controllen)));
   1038     0    stevel 
   1039     0    stevel 	for (cmsg = (struct cmsghdr *)control;
   1040     0    stevel 	    CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
   1041     0    stevel 	    cmsg = CMSG_NEXT(cmsg)) {
   1042     0    stevel 		if (cmsg->cmsg_level == SOL_SOCKET &&
   1043     0    stevel 		    cmsg->cmsg_type == SCM_RIGHTS) {
   1044     0    stevel 			len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
   1045     0    stevel 		} else {
   1046     0    stevel 			len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
   1047     0    stevel 		}
   1048     0    stevel 		optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
   1049     0    stevel 		    sizeof (struct T_opthdr));
   1050     0    stevel 	}
   1051     0    stevel 	dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
   1052  5753       gww 	    controllen, oldflg, optlen));
   1053     0    stevel 	return (optlen);
   1054     0    stevel }
   1055     0    stevel 
   1056     0    stevel /*
   1057     0    stevel  * Copy options from control to the mblk. Skip any file descriptor options.
   1058     0    stevel  */
   1059     0    stevel void
   1060     0    stevel so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
   1061     0    stevel {
   1062     0    stevel 	struct T_opthdr toh;
   1063     0    stevel 	struct cmsghdr *cmsg;
   1064     0    stevel 
   1065     0    stevel 	if (control == NULL)
   1066     0    stevel 		return;
   1067     0    stevel 
   1068     0    stevel 	if (oldflg) {
   1069     0    stevel 		/* No real options - caller has handled file descriptors */
   1070     0    stevel 		return;
   1071     0    stevel 	}
   1072     0    stevel 	for (cmsg = (struct cmsghdr *)control;
   1073     0    stevel 	    CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
   1074     0    stevel 	    cmsg = CMSG_NEXT(cmsg)) {
   1075     0    stevel 		/*
   1076     0    stevel 		 * Note: The caller handles file descriptors prior
   1077     0    stevel 		 * to calling this function.
   1078     0    stevel 		 */
   1079     0    stevel 		t_uscalar_t len;
   1080     0    stevel 
   1081     0    stevel 		if (cmsg->cmsg_level == SOL_SOCKET &&
   1082     0    stevel 		    cmsg->cmsg_type == SCM_RIGHTS)
   1083     0    stevel 			continue;
   1084     0    stevel 
   1085     0    stevel 		len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
   1086     0    stevel 		toh.level = cmsg->cmsg_level;
   1087     0    stevel 		toh.name = cmsg->cmsg_type;
   1088     0    stevel 		toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
   1089     0    stevel 		toh.status = 0;
   1090     0    stevel 
   1091     0    stevel 		soappendmsg(mp, &toh, sizeof (toh));
   1092     0    stevel 		soappendmsg(mp, CMSG_CONTENT(cmsg), len);
   1093     0    stevel 		mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
   1094     0    stevel 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
   1095     0    stevel 	}
   1096     0    stevel }
   1097     0    stevel 
   1098     0    stevel /*
   1099     0    stevel  * Return the length of the control message derived from the options.
   1100     0    stevel  * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
   1101     0    stevel  * When oldflg is set only include SO_FILEP.
   1102  2280  gt145670  * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
   1103  2280  gt145670  * allocates the space that so_opt2cmsg fills. If one changes, the other should
   1104  2280  gt145670  * also be checked for any possible impacts.
   1105     0    stevel  */
   1106     0    stevel t_uscalar_t
   1107     0    stevel so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
   1108     0    stevel {
   1109     0    stevel 	t_uscalar_t cmsglen = 0;
   1110     0    stevel 	struct T_opthdr *tohp;
   1111     0    stevel 	t_uscalar_t len;
   1112     0    stevel 	t_uscalar_t last_roundup = 0;
   1113     0    stevel 
   1114     0    stevel 	ASSERT(__TPI_TOPT_ISALIGNED(opt));
   1115     0    stevel 
   1116     0    stevel 	for (tohp = (struct T_opthdr *)opt;
   1117     0    stevel 	    tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
   1118     0    stevel 	    tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
   1119     0    stevel 		dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
   1120  5753       gww 		    tohp->level, tohp->name, tohp->len));
   1121     0    stevel 		if (tohp->level == SOL_SOCKET &&
   1122     0    stevel 		    (tohp->name == SO_SRCADDR ||
   1123     0    stevel 		    tohp->name == SO_UNIX_CLOSE)) {
   1124     0    stevel 			continue;
   1125     0    stevel 		}
   1126     0    stevel 		if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
   1127     0    stevel 			struct fdbuf *fdbuf;
   1128     0    stevel 			int fdbuflen;
   1129     0    stevel 
   1130     0    stevel 			fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
   1131     0    stevel 			fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
   1132     0    stevel 
   1133     0    stevel 			if (!fdbuf_verify(mp, fdbuf, fdbuflen))
   1134     0    stevel 				continue;
   1135     0    stevel 			if (oldflg) {
   1136     0    stevel 				cmsglen += fdbuf_cmsglen(fdbuflen);
   1137     0    stevel 				continue;
   1138     0    stevel 			}
   1139     0    stevel 			len = fdbuf_cmsglen(fdbuflen);
   1140  2280  gt145670 		} else if (tohp->level == SOL_SOCKET &&
   1141  2280  gt145670 		    tohp->name == SCM_TIMESTAMP) {
   1142  2280  gt145670 			if (oldflg)
   1143  2280  gt145670 				continue;
   1144  2280  gt145670 
   1145  2280  gt145670 			if (get_udatamodel() == DATAMODEL_NATIVE) {
   1146  2280  gt145670 				len = sizeof (struct timeval);
   1147  2280  gt145670 			} else {
   1148  2280  gt145670 				len = sizeof (struct timeval32);
   1149  2280  gt145670 			}
   1150     0    stevel 		} else {
   1151     0    stevel 			if (oldflg)
   1152     0    stevel 				continue;
   1153     0    stevel 			len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
   1154     0    stevel 		}
   1155     0    stevel 		/*
   1156  2280  gt145670 		 * Exclude roundup for last option to not set
   1157     0    stevel 		 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
   1158     0    stevel 		 */
   1159     0    stevel 		last_roundup = (t_uscalar_t)
   1160     0    stevel 		    (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
   1161     0    stevel 		    (len + (int)sizeof (struct cmsghdr)));
   1162     0    stevel 		cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
   1163     0    stevel 		    last_roundup;
   1164     0    stevel 	}
   1165     0    stevel 	cmsglen -= last_roundup;
   1166     0    stevel 	dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
   1167  5753       gww 	    optlen, oldflg, cmsglen));
   1168     0    stevel 	return (cmsglen);
   1169     0    stevel }
   1170     0    stevel 
   1171     0    stevel /*
   1172     0    stevel  * Copy options from options to the control. Convert SO_FILEP to
   1173     0    stevel  * file descriptors.
   1174     0    stevel  * Returns errno or zero.
   1175  2280  gt145670  * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
   1176  2280  gt145670  * allocates the space that so_opt2cmsg fills. If one changes, the other should
   1177  2280  gt145670  * also be checked for any possible impacts.
   1178     0    stevel  */
   1179     0    stevel int
   1180     0    stevel so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg,
   1181     0    stevel     void *control, t_uscalar_t controllen)
   1182     0    stevel {
   1183     0    stevel 	struct T_opthdr *tohp;
   1184     0    stevel 	struct cmsghdr *cmsg;
   1185     0    stevel 	struct fdbuf *fdbuf;
   1186     0    stevel 	int fdbuflen;
   1187     0    stevel 	int error;
   1188  2280  gt145670 #if defined(DEBUG) || defined(__lint)
   1189  2280  gt145670 	struct cmsghdr *cend = (struct cmsghdr *)
   1190  2280  gt145670 	    (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
   1191  2280  gt145670 #endif
   1192     0    stevel 	cmsg = (struct cmsghdr *)control;
   1193     0    stevel 
   1194     0    stevel 	ASSERT(__TPI_TOPT_ISALIGNED(opt));
   1195     0    stevel 
   1196     0    stevel 	for (tohp = (struct T_opthdr *)opt;
   1197     0    stevel 	    tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
   1198     0    stevel 	    tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
   1199     0    stevel 		dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
   1200  5753       gww 		    tohp->level, tohp->name, tohp->len));
   1201     0    stevel 
   1202     0    stevel 		if (tohp->level == SOL_SOCKET &&
   1203     0    stevel 		    (tohp->name == SO_SRCADDR ||
   1204     0    stevel 		    tohp->name == SO_UNIX_CLOSE)) {
   1205     0    stevel 			continue;
   1206     0    stevel 		}
   1207     0    stevel 		ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
   1208     0    stevel 		if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
   1209     0    stevel 			fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
   1210     0    stevel 			fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
   1211     0    stevel 
   1212     0    stevel 			if (!fdbuf_verify(mp, fdbuf, fdbuflen))
   1213     0    stevel 				return (EPROTO);
   1214     0    stevel 			if (oldflg) {
   1215     0    stevel 				error = fdbuf_extract(fdbuf, control,
   1216     0    stevel 				    (int)controllen);
   1217     0    stevel 				if (error != 0)
   1218     0    stevel 					return (error);
   1219     0    stevel 				continue;
   1220     0    stevel 			} else {
   1221     0    stevel 				int fdlen;
   1222     0    stevel 
   1223     0    stevel 				fdlen = (int)fdbuf_cmsglen(
   1224     0    stevel 				    (int)_TPI_TOPT_DATALEN(tohp));
   1225     0    stevel 
   1226     0    stevel 				cmsg->cmsg_level = tohp->level;
   1227     0    stevel 				cmsg->cmsg_type = SCM_RIGHTS;
   1228     0    stevel 				cmsg->cmsg_len = (socklen_t)(fdlen +
   1229  5753       gww 				    sizeof (struct cmsghdr));
   1230     0    stevel 
   1231     0    stevel 				error = fdbuf_extract(fdbuf,
   1232  5753       gww 				    CMSG_CONTENT(cmsg), fdlen);
   1233     0    stevel 				if (error != 0)
   1234     0    stevel 					return (error);
   1235     0    stevel 			}
   1236  1673  gt145670 		} else if (tohp->level == SOL_SOCKET &&
   1237  1673  gt145670 		    tohp->name == SCM_TIMESTAMP) {
   1238  1673  gt145670 			timestruc_t *timestamp;
   1239  1673  gt145670 
   1240  1673  gt145670 			if (oldflg)
   1241  1673  gt145670 				continue;
   1242  1673  gt145670 
   1243  1673  gt145670 			cmsg->cmsg_level = tohp->level;
   1244  1673  gt145670 			cmsg->cmsg_type = tohp->name;
   1245  1673  gt145670 
   1246  1673  gt145670 			timestamp =
   1247  1673  gt145670 			    (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
   1248  1673  gt145670 			    sizeof (intptr_t));
   1249  1673  gt145670 
   1250  1673  gt145670 			if (get_udatamodel() == DATAMODEL_NATIVE) {
   1251  2280  gt145670 				struct timeval tv;
   1252  1673  gt145670 
   1253  1673  gt145670 				cmsg->cmsg_len = sizeof (struct timeval) +
   1254  1673  gt145670 				    sizeof (struct cmsghdr);
   1255  2280  gt145670 				tv.tv_sec = timestamp->tv_sec;
   1256  2280  gt145670 				tv.tv_usec = timestamp->tv_nsec /
   1257  2280  gt145670 				    (NANOSEC / MICROSEC);
   1258  2280  gt145670 				/*
   1259  2280  gt145670 				 * on LP64 systems, the struct timeval in
   1260  2280  gt145670 				 * the destination will not be 8-byte aligned,
   1261  2280  gt145670 				 * so use bcopy to avoid alignment trouble
   1262  2280  gt145670 				 */
   1263  2280  gt145670 				bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
   1264  1673  gt145670 			} else {
   1265  1673  gt145670 				struct timeval32 *time32;
   1266  1673  gt145670 
   1267  1673  gt145670 				cmsg->cmsg_len = sizeof (struct timeval32) +
   1268  1673  gt145670 				    sizeof (struct cmsghdr);
   1269  1673  gt145670 				time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
   1270  1673  gt145670 				time32->tv_sec = (time32_t)timestamp->tv_sec;
   1271  1673  gt145670 				time32->tv_usec =
   1272  1673  gt145670 				    (int32_t)(timestamp->tv_nsec /
   1273  1673  gt145670 				    (NANOSEC / MICROSEC));
   1274  1673  gt145670 			}
   1275  1673  gt145670 
   1276     0    stevel 		} else {
   1277     0    stevel 			if (oldflg)
   1278     0    stevel 				continue;
   1279     0    stevel 
   1280     0    stevel 			cmsg->cmsg_level = tohp->level;
   1281     0    stevel 			cmsg->cmsg_type = tohp->name;
   1282     0    stevel 			cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) +
   1283     0    stevel 			    sizeof (struct cmsghdr));
   1284     0    stevel 
   1285     0    stevel 			/* copy content to control data part */
   1286     0    stevel 			bcopy(&tohp[1], CMSG_CONTENT(cmsg),
   1287  5753       gww 			    CMSG_CONTENTLEN(cmsg));
   1288     0    stevel 		}
   1289     0    stevel 		/* move to next CMSG structure! */
   1290     0    stevel 		cmsg = CMSG_NEXT(cmsg);
   1291     0    stevel 	}
   1292  2280  gt145670 	dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
   1293  7240   rh87107 	    control, controllen, (void *)cend, (void *)cmsg));
   1294  2280  gt145670 	ASSERT(cmsg <= cend);
   1295     0    stevel 	return (0);
   1296     0    stevel }
   1297     0    stevel 
   1298     0    stevel /*
   1299     0    stevel  * Extract the SO_SRCADDR option value if present.
   1300     0    stevel  */
   1301     0    stevel void
   1302     0    stevel so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
   1303     0    stevel     t_uscalar_t *srclenp)
   1304     0    stevel {
   1305     0    stevel 	struct T_opthdr		*tohp;
   1306     0    stevel 
   1307     0    stevel 	ASSERT(__TPI_TOPT_ISALIGNED(opt));
   1308     0    stevel 
   1309     0    stevel 	ASSERT(srcp != NULL && srclenp != NULL);
   1310     0    stevel 	*srcp = NULL;
   1311     0    stevel 	*srclenp = 0;
   1312     0    stevel 
   1313     0    stevel 	for (tohp = (struct T_opthdr *)opt;
   1314     0    stevel 	    tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
   1315     0    stevel 	    tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
   1316     0    stevel 		dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
   1317  5753       gww 		    tohp->level, tohp->name, tohp->len));
   1318     0    stevel 		if (tohp->level == SOL_SOCKET &&
   1319     0    stevel 		    tohp->name == SO_SRCADDR) {
   1320     0    stevel 			*srcp = _TPI_TOPT_DATA(tohp);
   1321     0    stevel 			*srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
   1322     0    stevel 		}
   1323     0    stevel 	}
   1324     0    stevel }
   1325     0    stevel 
   1326     0    stevel /*
   1327     0    stevel  * Verify if the SO_UNIX_CLOSE option is present.
   1328     0    stevel  */
   1329     0    stevel int
   1330     0    stevel so_getopt_unix_close(void *opt, t_uscalar_t optlen)
   1331     0    stevel {
   1332     0    stevel 	struct T_opthdr		*tohp;
   1333     0    stevel 
   1334     0    stevel 	ASSERT(__TPI_TOPT_ISALIGNED(opt));
   1335     0    stevel 
   1336     0    stevel 	for (tohp = (struct T_opthdr *)opt;
   1337     0    stevel 	    tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
   1338     0    stevel 	    tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
   1339     0    stevel 		dprint(1,
   1340  5753       gww 		    ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
   1341  5753       gww 		    tohp->level, tohp->name, tohp->len));
   1342     0    stevel 		if (tohp->level == SOL_SOCKET &&
   1343     0    stevel 		    tohp->name == SO_UNIX_CLOSE)
   1344     0    stevel 			return (1);
   1345     0    stevel 	}
   1346     0    stevel 	return (0);
   1347     0    stevel }
   1348     0    stevel 
   1349     0    stevel /*
   1350     0    stevel  * Allocate an M_PROTO message.
   1351     0    stevel  *
   1352     0    stevel  * If allocation fails the behavior depends on sleepflg:
   1353     0    stevel  *	_ALLOC_NOSLEEP	fail immediately
   1354     0    stevel  *	_ALLOC_INTR	sleep for memory until a signal is caught
   1355     0    stevel  *	_ALLOC_SLEEP	sleep forever. Don't return NULL.
   1356     0    stevel  */
   1357     0    stevel mblk_t *
   1358  8778      Erik soallocproto(size_t size, int sleepflg, cred_t *cr)
   1359     0    stevel {
   1360     0    stevel 	mblk_t	*mp;
   1361     0    stevel 
   1362     0    stevel 	/* Round up size for reuse */
   1363     0    stevel 	size = MAX(size, 64);
   1364  8778      Erik 	if (cr != NULL)
   1365  8778      Erik 		mp = allocb_cred(size, cr, curproc->p_pid);
   1366  8778      Erik 	else
   1367  8778      Erik 		mp = allocb(size, BPRI_MED);
   1368  8778      Erik 
   1369     0    stevel 	if (mp == NULL) {
   1370     0    stevel 		int error;	/* Dummy - error not returned to caller */
   1371     0    stevel 
   1372     0    stevel 		switch (sleepflg) {
   1373     0    stevel 		case _ALLOC_SLEEP:
   1374  8778      Erik 			if (cr != NULL) {
   1375  8778      Erik 				mp = allocb_cred_wait(size, STR_NOSIG, &error,
   1376  8778      Erik 				    cr, curproc->p_pid);
   1377  8778      Erik 			} else {
   1378  8778      Erik 				mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
   1379  8778      Erik 				    &error);
   1380  8778      Erik 			}
   1381     0    stevel 			ASSERT(mp);
   1382     0    stevel 			break;
   1383     0    stevel 		case _ALLOC_INTR:
   1384  8778      Erik 			if (cr != NULL) {
   1385  8778      Erik 				mp = allocb_cred_wait(size, 0, &error, cr,
   1386  8778      Erik 				    curproc->p_pid);
   1387  8778      Erik 			} else {
   1388  8778      Erik 				mp = allocb_wait(size, BPRI_MED, 0, &error);
   1389  8778      Erik 			}
   1390     0    stevel 			if (mp == NULL) {
   1391     0    stevel 				/* Caught signal while sleeping for memory */
   1392     0    stevel 				eprintline(ENOBUFS);
   1393     0    stevel 				return (NULL);
   1394     0    stevel 			}
   1395     0    stevel 			break;
   1396     0    stevel 		case _ALLOC_NOSLEEP:
   1397     0    stevel 		default:
   1398     0    stevel 			eprintline(ENOBUFS);
   1399     0    stevel 			return (NULL);
   1400     0    stevel 		}
   1401     0    stevel 	}
   1402     0    stevel 	DB_TYPE(mp) = M_PROTO;
   1403     0    stevel 	return (mp);
   1404     0    stevel }
   1405     0    stevel 
   1406     0    stevel /*
   1407     0    stevel  * Allocate an M_PROTO message with a single component.
   1408     0    stevel  * len is the length of buf. size is the amount to allocate.
   1409     0    stevel  *
   1410     0    stevel  * buf can be NULL with a non-zero len.
   1411     0    stevel  * This results in a bzero'ed chunk being placed the message.
   1412     0    stevel  */
   1413     0    stevel mblk_t *
   1414  8778      Erik soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
   1415  8778      Erik     cred_t *cr)
   1416     0    stevel {
   1417     0    stevel 	mblk_t	*mp;
   1418     0    stevel 
   1419     0    stevel 	if (size == 0)
   1420     0    stevel 		size = len;
   1421     0    stevel 
   1422     0    stevel 	ASSERT(size >= len);
   1423     0    stevel 	/* Round up size for reuse */
   1424     0    stevel 	size = MAX(size, 64);
   1425  8778      Erik 	mp = soallocproto(size, sleepflg, cr);
   1426     0    stevel 	if (mp == NULL)
   1427     0    stevel 		return (NULL);
   1428     0    stevel 	mp->b_datap->db_type = M_PROTO;
   1429     0    stevel 	if (len != 0) {
   1430     0    stevel 		if (buf != NULL)
   1431     0    stevel 			bcopy(buf, mp->b_wptr, len);
   1432     0    stevel 		else
   1433     0    stevel 			bzero(mp->b_wptr, len);
   1434     0    stevel 		mp->b_wptr += len;
   1435     0    stevel 	}
   1436     0    stevel 	return (mp);
   1437     0    stevel }
   1438     0    stevel 
   1439     0    stevel /*
   1440     0    stevel  * Append buf/len to mp.
   1441     0    stevel  * The caller has to ensure that there is enough room in the mblk.
   1442     0    stevel  *
   1443     0    stevel  * buf can be NULL with a non-zero len.
   1444     0    stevel  * This results in a bzero'ed chunk being placed the message.
   1445     0    stevel  */
   1446     0    stevel void
   1447     0    stevel soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
   1448     0    stevel {
   1449     0    stevel 	ASSERT(mp);
   1450     0    stevel 
   1451     0    stevel 	if (len != 0) {
   1452     0    stevel 		/* Assert for room left */
   1453     0    stevel 		ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
   1454     0    stevel 		if (buf != NULL)
   1455     0    stevel 			bcopy(buf, mp->b_wptr, len);
   1456     0    stevel 		else
   1457     0    stevel 			bzero(mp->b_wptr, len);
   1458     0    stevel 	}
   1459     0    stevel 	mp->b_wptr += len;
   1460     0    stevel }
   1461     0    stevel 
   1462     0    stevel /*
   1463     0    stevel  * Create a message using two kernel buffers.
   1464     0    stevel  * If size is set that will determine the allocation size (e.g. for future
   1465     0    stevel  * soappendmsg calls). If size is zero it is derived from the buffer
   1466     0    stevel  * lengths.
   1467     0    stevel  */
   1468     0    stevel mblk_t *
   1469     0    stevel soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
   1470  8778      Erik     ssize_t size, int sleepflg, cred_t *cr)
   1471     0    stevel {
   1472     0    stevel 	mblk_t *mp;
   1473     0    stevel 
   1474     0    stevel 	if (size == 0)
   1475     0    stevel 		size = len1 + len2;
   1476     0    stevel 	ASSERT(size >= len1 + len2);
   1477     0    stevel 
   1478  8778      Erik 	mp = soallocproto1(buf1, len1, size, sleepflg, cr);
   1479     0    stevel 	if (mp)
   1480     0    stevel 		soappendmsg(mp, buf2, len2);
   1481     0    stevel 	return (mp);
   1482     0    stevel }
   1483     0    stevel 
   1484     0    stevel /*
   1485     0    stevel  * Create a message using three kernel buffers.
   1486     0    stevel  * If size is set that will determine the allocation size (for future
   1487     0    stevel  * soappendmsg calls). If size is zero it is derived from the buffer
   1488     0    stevel  * lengths.
   1489     0    stevel  */
   1490     0    stevel mblk_t *
   1491     0    stevel soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
   1492  8778      Erik     const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
   1493     0    stevel {
   1494     0    stevel 	mblk_t *mp;
   1495     0    stevel 
   1496     0    stevel 	if (size == 0)
   1497     0    stevel 		size = len1 + len2 +len3;
   1498     0    stevel 	ASSERT(size >= len1 + len2 + len3);
   1499     0    stevel 
   1500  8778      Erik 	mp = soallocproto1(buf1, len1, size, sleepflg, cr);
   1501     0    stevel 	if (mp != NULL) {
   1502     0    stevel 		soappendmsg(mp, buf2, len2);
   1503     0    stevel 		soappendmsg(mp, buf3, len3);
   1504     0    stevel 	}
   1505     0    stevel 	return (mp);
   1506     0    stevel }
   1507     0    stevel 
   1508     0    stevel #ifdef DEBUG
   1509     0    stevel char *
   1510     0    stevel pr_state(uint_t state, uint_t mode)
   1511     0    stevel {
   1512     0    stevel 	static char buf[1024];
   1513     0    stevel 
   1514     0    stevel 	buf[0] = 0;
   1515     0    stevel 	if (state & SS_ISCONNECTED)
   1516  7240   rh87107 		(void) strcat(buf, "ISCONNECTED ");
   1517     0    stevel 	if (state & SS_ISCONNECTING)
   1518  7240   rh87107 		(void) strcat(buf, "ISCONNECTING ");
   1519     0    stevel 	if (state & SS_ISDISCONNECTING)
   1520  7240   rh87107 		(void) strcat(buf, "ISDISCONNECTING ");
   1521     0    stevel 	if (state & SS_CANTSENDMORE)
   1522  7240   rh87107 		(void) strcat(buf, "CANTSENDMORE ");
   1523     0    stevel 
   1524     0    stevel 	if (state & SS_CANTRCVMORE)
   1525  7240   rh87107 		(void) strcat(buf, "CANTRCVMORE ");
   1526     0    stevel 	if (state & SS_ISBOUND)
   1527  7240   rh87107 		(void) strcat(buf, "ISBOUND ");
   1528     0    stevel 	if (state & SS_NDELAY)
   1529  7240   rh87107 		(void) strcat(buf, "NDELAY ");
   1530     0    stevel 	if (state & SS_NONBLOCK)
   1531  7240   rh87107 		(void) strcat(buf, "NONBLOCK ");
   1532     0    stevel 
   1533     0    stevel 	if (state & SS_ASYNC)
   1534  7240   rh87107 		(void) strcat(buf, "ASYNC ");
   1535     0    stevel 	if (state & SS_ACCEPTCONN)
   1536  7240   rh87107 		(void) strcat(buf, "ACCEPTCONN ");
   1537     0    stevel 	if (state & SS_SAVEDEOR)
   1538  7240   rh87107 		(void) strcat(buf, "SAVEDEOR ");
   1539     0    stevel 
   1540     0    stevel 	if (state & SS_RCVATMARK)
   1541  7240   rh87107 		(void) strcat(buf, "RCVATMARK ");
   1542     0    stevel 	if (state & SS_OOBPEND)
   1543  7240   rh87107 		(void) strcat(buf, "OOBPEND ");
   1544     0    stevel 	if (state & SS_HAVEOOBDATA)
   1545  7240   rh87107 		(void) strcat(buf, "HAVEOOBDATA ");
   1546     0    stevel 	if (state & SS_HADOOBDATA)
   1547  7240   rh87107 		(void) strcat(buf, "HADOOBDATA ");
   1548     0    stevel 
   1549     0    stevel 	if (mode & SM_PRIV)
   1550  7240   rh87107 		(void) strcat(buf, "PRIV ");
   1551     0    stevel 	if (mode & SM_ATOMIC)
   1552  7240   rh87107 		(void) strcat(buf, "ATOMIC ");
   1553     0    stevel 	if (mode & SM_ADDR)
   1554  7240   rh87107 		(void) strcat(buf, "ADDR ");
   1555     0    stevel 	if (mode & SM_CONNREQUIRED)
   1556  7240   rh87107 		(void) strcat(buf, "CONNREQUIRED ");
   1557     0    stevel 
   1558     0    stevel 	if (mode & SM_FDPASSING)
   1559  7240   rh87107 		(void) strcat(buf, "FDPASSING ");
   1560     0    stevel 	if (mode & SM_EXDATA)
   1561  7240   rh87107 		(void) strcat(buf, "EXDATA ");
   1562     0    stevel 	if (mode & SM_OPTDATA)
   1563  7240   rh87107 		(void) strcat(buf, "OPTDATA ");
   1564     0    stevel 	if (mode & SM_BYTESTREAM)
   1565  7240   rh87107 		(void) strcat(buf, "BYTESTREAM ");
   1566     0    stevel 	return (buf);
   1567     0    stevel }
   1568     0    stevel 
   1569     0    stevel char *
   1570     0    stevel pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
   1571     0    stevel {
   1572     0    stevel 	static char buf[1024];
   1573     0    stevel 
   1574     0    stevel 	if (addr == NULL || addrlen == 0) {
   1575  7240   rh87107 		(void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
   1576     0    stevel 		return (buf);
   1577     0    stevel 	}
   1578     0    stevel 	switch (family) {
   1579     0    stevel 	case AF_INET: {
   1580     0    stevel 		struct sockaddr_in sin;
   1581     0    stevel 
   1582     0    stevel 		bcopy(addr, &sin, sizeof (sin));
   1583     0    stevel 
   1584     0    stevel 		(void) sprintf(buf, "(len %d) %x/%d",
   1585  6712     tomee 		    addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
   1586     0    stevel 		break;
   1587     0    stevel 	}
   1588     0    stevel 	case AF_INET6: {
   1589     0    stevel 		struct sockaddr_in6 sin6;
   1590     0    stevel 		uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
   1591     0    stevel 
   1592     0    stevel 		bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
   1593  7240   rh87107 		(void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
   1594     0    stevel 		    addrlen,
   1595     0    stevel 		    ntohs(piece[0]), ntohs(piece[1]),
   1596     0    stevel 		    ntohs(piece[2]), ntohs(piece[3]),
   1597     0    stevel 		    ntohs(piece[4]), ntohs(piece[5]),
   1598     0    stevel 		    ntohs(piece[6]), ntohs(piece[7]),
   1599     0    stevel 		    ntohs(sin6.sin6_port));
   1600     0    stevel 		break;
   1601     0    stevel 	}
   1602     0    stevel 	case AF_UNIX: {
   1603     0    stevel 		struct sockaddr_un *soun = (struct sockaddr_un *)addr;
   1604     0    stevel 
   1605  6712     tomee 		(void) sprintf(buf, "(len %d) %s", addrlen,
   1606  5753       gww 		    (soun == NULL) ? "(none)" : soun->sun_path);
   1607     0    stevel 		break;
   1608     0    stevel 	}
   1609     0    stevel 	default:
   1610     0    stevel 		(void) sprintf(buf, "(unknown af %d)", family);
   1611     0    stevel 		break;
   1612     0    stevel 	}
   1613     0    stevel 	return (buf);
   1614     0    stevel }
   1615     0    stevel 
   1616     0    stevel /* The logical equivalence operator (a if-and-only-if b) */
   1617     0    stevel #define	EQUIV(a, b)	(((a) && (b)) || (!(a) && (!(b))))
   1618     0    stevel 
   1619     0    stevel /*
   1620     0    stevel  * Verify limitations and invariants on oob state.
   1621     0    stevel  * Return 1 if OK, otherwise 0 so that it can be used as
   1622     0    stevel  *	ASSERT(verify_oobstate(so));
   1623     0    stevel  */
   1624     0    stevel int
   1625     0    stevel so_verify_oobstate(struct sonode *so)
   1626     0    stevel {
   1627  8348      Eric 	boolean_t havemark;
   1628  8348      Eric 
   1629     0    stevel 	ASSERT(MUTEX_HELD(&so->so_lock));
   1630     0    stevel 
   1631     0    stevel 	/*
   1632     0    stevel 	 * The possible state combinations are:
   1633     0    stevel 	 *	0
   1634     0    stevel 	 *	SS_OOBPEND
   1635     0    stevel 	 *	SS_OOBPEND|SS_HAVEOOBDATA
   1636     0    stevel 	 *	SS_OOBPEND|SS_HADOOBDATA
   1637     0    stevel 	 *	SS_HADOOBDATA
   1638     0    stevel 	 */
   1639     0    stevel 	switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
   1640     0    stevel 	case 0:
   1641     0    stevel 	case SS_OOBPEND:
   1642     0    stevel 	case SS_OOBPEND|SS_HAVEOOBDATA:
   1643     0    stevel 	case SS_OOBPEND|SS_HADOOBDATA:
   1644     0    stevel 	case SS_HADOOBDATA:
   1645     0    stevel 		break;
   1646     0    stevel 	default:
   1647  8348      Eric 		printf("Bad oob state 1 (%p): state %s\n",
   1648  8348      Eric 		    (void *)so, pr_state(so->so_state, so->so_mode));
   1649     0    stevel 		return (0);
   1650     0    stevel 	}
   1651     0    stevel 
   1652     0    stevel 	/* SS_RCVATMARK should only be set when SS_OOBPEND is set */
   1653     0    stevel 	if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
   1654  8348      Eric 		printf("Bad oob state 2 (%p): state %s\n",
   1655  8348      Eric 		    (void *)so, pr_state(so->so_state, so->so_mode));
   1656     0    stevel 		return (0);
   1657     0    stevel 	}
   1658     0    stevel 
   1659     0    stevel 	/*
   1660  8348      Eric 	 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
   1661  8348      Eric 	 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
   1662     0    stevel 	 */
   1663  8348      Eric 	havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
   1664  8348      Eric 	    SOTOTPI(so)->sti_oobsigcnt > 0;
   1665  8348      Eric 
   1666  8348      Eric 	if (!EQUIV(havemark || (so->so_state & SS_RCVATMARK),
   1667  5753       gww 	    so->so_state & SS_OOBPEND)) {
   1668  8348      Eric 		printf("Bad oob state 3 (%p): state %s\n",
   1669  8348      Eric 		    (void *)so, pr_state(so->so_state, so->so_mode));
   1670     0    stevel 		return (0);
   1671     0    stevel 	}
   1672     0    stevel 
   1673     0    stevel 	/*
   1674     0    stevel 	 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
   1675     0    stevel 	 */
   1676     0    stevel 	if (!(so->so_options & SO_OOBINLINE) &&
   1677     0    stevel 	    !EQUIV(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
   1678  8348      Eric 		printf("Bad oob state 4 (%p): state %s\n",
   1679  8348      Eric 		    (void *)so, pr_state(so->so_state, so->so_mode));
   1680     0    stevel 		return (0);
   1681     0    stevel 	}
   1682  8348      Eric 
   1683  8348      Eric 	if (!SOCK_IS_NONSTR(so) &&
   1684  8348      Eric 	    SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
   1685     0    stevel 		printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
   1686  8348      Eric 		    (void *)so, SOTOTPI(so)->sti_oobsigcnt,
   1687  8348      Eric 		    SOTOTPI(so)->sti_oobcnt,
   1688  8348      Eric 		    pr_state(so->so_state, so->so_mode));
   1689     0    stevel 		return (0);
   1690     0    stevel 	}
   1691  8348      Eric 
   1692     0    stevel 	return (1);
   1693     0    stevel }
   1694     0    stevel #undef	EQUIV
   1695     0    stevel #endif /* DEBUG */
   1696     0    stevel 
   1697     0    stevel /* initialize sockfs zone specific kstat related items			*/
   1698     0    stevel void *
   1699     0    stevel sock_kstat_init(zoneid_t zoneid)
   1700     0    stevel {
   1701     0    stevel 	kstat_t	*ksp;
   1702     0    stevel 
   1703     0    stevel 	ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
   1704     0    stevel 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
   1705     0    stevel 
   1706     0    stevel 	if (ksp != NULL) {
   1707     0    stevel 		ksp->ks_update = sockfs_update;
   1708     0    stevel 		ksp->ks_snapshot = sockfs_snapshot;
   1709     0    stevel 		ksp->ks_lock = &socklist.sl_lock;
   1710     0    stevel 		ksp->ks_private = (void *)(uintptr_t)zoneid;
   1711     0    stevel 		kstat_install(ksp);
   1712     0    stevel 	}
   1713     0    stevel 
   1714     0    stevel 	return (ksp);
   1715     0    stevel }
   1716     0    stevel 
   1717     0    stevel /* tear down sockfs zone specific kstat related items			*/
   1718     0    stevel /*ARGSUSED*/
   1719     0    stevel void
   1720     0    stevel sock_kstat_fini(zoneid_t zoneid, void *arg)
   1721     0    stevel {
   1722     0    stevel 	kstat_t *ksp = (kstat_t *)arg;
   1723     0    stevel 
   1724     0    stevel 	if (ksp != NULL) {
   1725     0    stevel 		ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
   1726     0    stevel 		kstat_delete(ksp);
   1727     0    stevel 	}
   1728     0    stevel }
   1729     0    stevel 
   1730     0    stevel /*
   1731     0    stevel  * Zones:
   1732     0    stevel  * Note that nactive is going to be different for each zone.
   1733     0    stevel  * This means we require kstat to call sockfs_update and then sockfs_snapshot
   1734     0    stevel  * for the same zone, or sockfs_snapshot will be taken into the wrong size
   1735     0    stevel  * buffer. This is safe, but if the buffer is too small, user will not be
   1736     0    stevel  * given details of all sockets. However, as this kstat has a ks_lock, kstat
   1737     0    stevel  * driver will keep it locked between the update and the snapshot, so no
   1738     0    stevel  * other process (zone) can currently get inbetween resulting in a wrong size
   1739     0    stevel  * buffer allocation.
   1740     0    stevel  */
   1741     0    stevel static int
   1742     0    stevel sockfs_update(kstat_t *ksp, int rw)
   1743     0    stevel {
   1744     0    stevel 	uint_t	nactive = 0;		/* # of active AF_UNIX sockets	*/
   1745     0    stevel 	struct sonode	*so;		/* current sonode on socklist	*/
   1746     0    stevel 	zoneid_t	myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
   1747     0    stevel 
   1748     0    stevel 	ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
   1749     0    stevel 
   1750     0    stevel 	if (rw == KSTAT_WRITE) {	/* bounce all writes		*/
   1751     0    stevel 		return (EACCES);
   1752     0    stevel 	}
   1753     0    stevel 
   1754  8348      Eric 	for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
   1755  8348      Eric 		if (so->so_count != 0 && so->so_zoneid == myzoneid) {
   1756     0    stevel 			nactive++;
   1757     0    stevel 		}
   1758     0    stevel 	}
   1759     0    stevel 	ksp->ks_ndata = nactive;
   1760     0    stevel 	ksp->ks_data_size = nactive * sizeof (struct k_sockinfo);
   1761     0    stevel 
   1762     0    stevel 	return (0);
   1763     0    stevel }
   1764     0    stevel 
   1765     0    stevel static int
   1766     0    stevel sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
   1767     0    stevel {
   1768     0    stevel 	int			ns;	/* # of sonodes we've copied	*/
   1769     0    stevel 	struct sonode		*so;	/* current sonode on socklist	*/
   1770     0    stevel 	struct k_sockinfo	*pksi;	/* where we put sockinfo data	*/
   1771     0    stevel 	t_uscalar_t		sn_len;	/* soa_len			*/
   1772     0    stevel 	zoneid_t		myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
   1773  8348      Eric 	sotpi_info_t 		*sti;
   1774     0    stevel 
   1775     0    stevel 	ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
   1776     0    stevel 
   1777     0    stevel 	ksp->ks_snaptime = gethrtime();
   1778     0    stevel 
   1779     0    stevel 	if (rw == KSTAT_WRITE) {	/* bounce all writes		*/
   1780     0    stevel 		return (EACCES);
   1781     0    stevel 	}
   1782     0    stevel 
   1783     0    stevel 	/*
   1784     0    stevel 	 * for each sonode on the socklist, we massage the important
   1785     0    stevel 	 * info into buf, in k_sockinfo format.
   1786     0    stevel 	 */
   1787     0    stevel 	pksi = (struct k_sockinfo *)buf;
   1788  8348      Eric 	ns = 0;
   1789  8348      Eric 	for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
   1790     0    stevel 		/* only stuff active sonodes and the same zone:		*/
   1791  8348      Eric 		if (so->so_count == 0 || so->so_zoneid != myzoneid) {
   1792     0    stevel 			continue;
   1793     0    stevel 		}
   1794     0    stevel 
   1795     0    stevel 		/*
   1796     0    stevel 		 * If the sonode was activated between the update and the
   1797     0    stevel 		 * snapshot, we're done - as this is only a snapshot.
   1798     0    stevel 		 */
   1799     0    stevel 		if ((caddr_t)(pksi) >= (caddr_t)buf + ksp->ks_data_size) {
   1800     0    stevel 			break;
   1801     0    stevel 		}
   1802     0    stevel 
   1803  8348      Eric 		sti = SOTOTPI(so);
   1804     0    stevel 		/* copy important info into buf:			*/
   1805     0    stevel 		pksi->ks_si.si_size = sizeof (struct k_sockinfo);
   1806     0    stevel 		pksi->ks_si.si_family = so->so_family;
   1807     0    stevel 		pksi->ks_si.si_type = so->so_type;
   1808     0    stevel 		pksi->ks_si.si_flag = so->so_flag;
   1809     0    stevel 		pksi->ks_si.si_state = so->so_state;
   1810  8348      Eric 		pksi->ks_si.si_serv_type = sti->sti_serv_type;
   1811  8348      Eric 		pksi->ks_si.si_ux_laddr_sou_magic =
   1812  8348      Eric 		    sti->sti_ux_laddr.soua_magic;
   1813  8348      Eric 		pksi->ks_si.si_ux_faddr_sou_magic =
   1814  8348      Eric 		    sti->sti_ux_faddr.soua_magic;
   1815  8348      Eric 		pksi->ks_si.si_laddr_soa_len = sti->sti_laddr.soa_len;
   1816  8348      Eric 		pksi->ks_si.si_faddr_soa_len = sti->sti_faddr.soa_len;
   1817     0    stevel 		pksi->ks_si.si_szoneid = so->so_zoneid;
   1818  8348      Eric 		pksi->ks_si.si_faddr_noxlate = sti->sti_faddr_noxlate;
   1819     0    stevel 
   1820     0    stevel 		mutex_enter(&so->so_lock);
   1821     0    stevel 
   1822  8348      Eric 		if (sti->sti_laddr_sa != NULL) {
   1823  8348      Eric 			ASSERT(sti->sti_laddr_sa->sa_data != NULL);
   1824  8348      Eric 			sn_len = sti->sti_laddr_len;
   1825     0    stevel 			ASSERT(sn_len <= sizeof (short) +
   1826     0    stevel 			    sizeof (pksi->ks_si.si_laddr_sun_path));
   1827     0    stevel 
   1828     0    stevel 			pksi->ks_si.si_laddr_family =
   1829  8348      Eric 			    sti->sti_laddr_sa->sa_family;
   1830     0    stevel 			if (sn_len != 0) {
   1831     0    stevel 				/* AF_UNIX socket names are NULL terminated */
   1832     0    stevel 				(void) strncpy(pksi->ks_si.si_laddr_sun_path,
   1833  8348      Eric 				    sti->sti_laddr_sa->sa_data,
   1834     0    stevel 				    sizeof (pksi->ks_si.si_laddr_sun_path));
   1835     0    stevel 				sn_len = strlen(pksi->ks_si.si_laddr_sun_path);
   1836     0    stevel 			}
   1837     0    stevel 			pksi->ks_si.si_laddr_sun_path[sn_len] = 0;
   1838     0    stevel 		}
   1839     0    stevel 
   1840  8348      Eric 		if (sti->sti_faddr_sa != NULL) {
   1841  8348      Eric 			ASSERT(sti->sti_faddr_sa->sa_data != NULL);
   1842  8348      Eric 			sn_len = sti->sti_faddr_len;
   1843     0    stevel 			ASSERT(sn_len <= sizeof (short) +
   1844     0    stevel 			    sizeof (pksi->ks_si.si_faddr_sun_path));
   1845     0    stevel 
   1846     0    stevel 			pksi->ks_si.si_faddr_family =
   1847  8348      Eric 			    sti->sti_faddr_sa->sa_family;
   1848     0    stevel 			if (sn_len != 0) {
   1849     0    stevel 				(void) strncpy(pksi->ks_si.si_faddr_sun_path,
   1850  8348      Eric 				    sti->sti_faddr_sa->sa_data,
   1851     0    stevel 				    sizeof (pksi->ks_si.si_faddr_sun_path));
   1852     0    stevel 				sn_len = strlen(pksi->ks_si.si_faddr_sun_path);
   1853     0    stevel 			}
   1854     0    stevel 			pksi->ks_si.si_faddr_sun_path[sn_len] = 0;
   1855     0    stevel 		}
   1856     0    stevel 
   1857     0    stevel 		mutex_exit(&so->so_lock);
   1858     0    stevel 
   1859     0    stevel 		(void) sprintf(pksi->ks_straddr[0], "%p", (void *)so);
   1860     0    stevel 		(void) sprintf(pksi->ks_straddr[1], "%p",
   1861  8348      Eric 		    (void *)sti->sti_ux_laddr.soua_vp);
   1862     0    stevel 		(void) sprintf(pksi->ks_straddr[2], "%p",
   1863  8348      Eric 		    (void *)sti->sti_ux_faddr.soua_vp);
   1864     0    stevel 
   1865     0    stevel 		ns++;
   1866     0    stevel 		pksi++;
   1867     0    stevel 	}
   1868     0    stevel 
   1869     0    stevel 	ksp->ks_ndata = ns;
   1870     0    stevel 	return (0);
   1871     0    stevel }
   1872     0    stevel 
   1873     0    stevel ssize_t
   1874     0    stevel soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
   1875     0    stevel {
   1876     0    stevel 	struct uio auio;
   1877     0    stevel 	struct iovec aiov[MSG_MAXIOVLEN];
   1878     0    stevel 	register vnode_t *vp;
   1879     0    stevel 	int ioflag, rwflag;
   1880     0    stevel 	ssize_t cnt;
   1881     0    stevel 	int error = 0;
   1882     0    stevel 	int iovcnt = 0;
   1883     0    stevel 	short fflag;
   1884     0    stevel 
   1885     0    stevel 	vp = fp->f_vnode;
   1886     0    stevel 	fflag = fp->f_flag;
   1887     0    stevel 
   1888     0    stevel 	rwflag = 0;
   1889     0    stevel 	aiov[0].iov_base = (caddr_t)buf;
   1890     0    stevel 	aiov[0].iov_len = size;
   1891     0    stevel 	iovcnt = 1;
   1892     0    stevel 	cnt = (ssize_t)size;
   1893     0    stevel 	(void) VOP_RWLOCK(vp, rwflag, NULL);
   1894     0    stevel 
   1895     0    stevel 	auio.uio_loffset = fileoff;
   1896     0    stevel 	auio.uio_iov = aiov;
   1897     0    stevel 	auio.uio_iovcnt = iovcnt;
   1898     0    stevel 	auio.uio_resid = cnt;
   1899     0    stevel 	auio.uio_segflg = UIO_SYSSPACE;
   1900     0    stevel 	auio.uio_llimit = MAXOFFSET_T;
   1901     0    stevel 	auio.uio_fmode = fflag;
   1902     0    stevel 	auio.uio_extflg = UIO_COPY_CACHED;
   1903     0    stevel 
   1904     0    stevel 	ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
   1905     0    stevel 
   1906     0    stevel 	/* If read sync is not asked for, filter sync flags */
   1907     0    stevel 	if ((ioflag & FRSYNC) == 0)
   1908     0    stevel 		ioflag &= ~(FSYNC|FDSYNC);
   1909     0    stevel 	error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
   1910     0    stevel 	cnt -= auio.uio_resid;
   1911     0    stevel 
   1912     0    stevel 	VOP_RWUNLOCK(vp, rwflag, NULL);
   1913     0    stevel 
   1914     0    stevel 	if (error == EINTR && cnt != 0)
   1915     0    stevel 		error = 0;
   1916     0    stevel out:
   1917     0    stevel 	if (error != 0) {
   1918     0    stevel 		*err = error;
   1919     0    stevel 		return (0);
   1920     0    stevel 	} else {
   1921     0    stevel 		*err = 0;
   1922     0    stevel 		return (cnt);
   1923     0    stevel 	}
   1924     0    stevel }
   1925  8348      Eric 
   1926  8348      Eric int
   1927  8348      Eric so_copyin(const void *from, void *to, size_t size, int fromkernel)
   1928  8348      Eric {
   1929  8348      Eric 	if (fromkernel) {
   1930  8348      Eric 		bcopy(from, to, size);
   1931  8348      Eric 		return (0);
   1932  8348      Eric 	}
   1933  8348      Eric 	return (xcopyin(from, to, size));
   1934  8348      Eric }
   1935  8348      Eric 
   1936  8348      Eric int
   1937  8348      Eric so_copyout(const void *from, void *to, size_t size, int tokernel)
   1938  8348      Eric {
   1939  8348      Eric 	if (tokernel) {
   1940  8348      Eric 		bcopy(from, to, size);
   1941  8348      Eric 		return (0);
   1942  8348      Eric 	}
   1943  8348      Eric 	return (xcopyout(from, to, size));
   1944  8348      Eric }
   1945