Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 /*
     31  * University Copyright- Copyright (c) 1982, 1986, 1988
     32  * The Regents of the University of California
     33  * All Rights Reserved
     34  *
     35  * University Acknowledgment- Portions of this document are derived from
     36  * software developed by the University of California, Berkeley, and its
     37  * contributors.
     38  */
     39 
     40 #ifndef _SYS_SOCKETVAR_H
     41 #define	_SYS_SOCKETVAR_H
     42 
     43 #include <sys/types.h>
     44 #include <sys/stream.h>
     45 #include <sys/t_lock.h>
     46 #include <sys/cred.h>
     47 #include <sys/vnode.h>
     48 #include <sys/file.h>
     49 #include <sys/param.h>
     50 #include <sys/zone.h>
     51 #include <sys/sdt.h>
     52 #include <sys/modctl.h>
     53 #include <sys/atomic.h>
     54 #include <sys/socket.h>
     55 #include <sys/ksocket.h>
     56 #include <sys/kstat.h>
     57 
     58 #ifdef _KERNEL
     59 #include <sys/vfs_opreg.h>
     60 #endif
     61 
     62 #ifdef	__cplusplus
     63 extern "C" {
     64 #endif
     65 
     66 /*
     67  * Internal representation of the address used to represent addresses
     68  * in the loopback transport for AF_UNIX. While the sockaddr_un is used
     69  * as the sockfs layer address for AF_UNIX the pathnames contained in
     70  * these addresses are not unique (due to relative pathnames) thus can not
     71  * be used in the transport.
     72  *
     73  * The transport level address consists of a magic number (used to separate the
     74  * name space for specific and implicit binds). For a specific bind
     75  * this is followed by a "vnode *" which ensures that all specific binds
     76  * have a unique transport level address. For implicit binds the latter
     77  * part of the address is a byte string (of the same length as a pointer)
     78  * that is assigned by the loopback transport.
     79  *
     80  * The uniqueness assumes that the loopback transport has a separate namespace
     81  * for sockets in order to avoid name conflicts with e.g. TLI use of the
     82  * same transport.
     83  */
     84 struct so_ux_addr {
     85 	void	*soua_vp;	/* vnode pointer or assigned by tl */
     86 	uint_t	soua_magic;	/* See below */
     87 };
     88 
     89 #define	SOU_MAGIC_EXPLICIT	0x75787670	/* "uxvp" */
     90 #define	SOU_MAGIC_IMPLICIT	0x616e6f6e	/* "anon" */
     91 
     92 struct sockaddr_ux {
     93 	sa_family_t		sou_family;	/* AF_UNIX */
     94 	struct so_ux_addr	sou_addr;
     95 };
     96 
     97 #if defined(_KERNEL) || defined(_KMEMUSER)
     98 
     99 #include <sys/socket_proto.h>
    100 
    101 typedef struct sonodeops sonodeops_t;
    102 typedef struct sonode sonode_t;
    103 
    104 struct sodirect_s;
    105 
    106 /*
    107  * The sonode represents a socket. A sonode never exist in the file system
    108  * name space and can not be opened using open() - only the socket, socketpair
    109  * and accept calls create sonodes.
    110  *
    111  * The locking of sockfs uses the so_lock mutex plus the SOLOCKED and
    112  * SOREADLOCKED flags in so_flag. The mutex protects all the state in the
    113  * sonode. It is expected that the underlying transport protocol serializes
    114  * socket operations, so sockfs will not normally not single-thread
    115  * operations. However, certain sockets, including TPI based ones, can only
    116  * handle one control operation at a time. The SOLOCKED flag is used to
    117  * single-thread operations from sockfs users to prevent e.g. multiple bind()
    118  * calls to operate on the same sonode concurrently. The SOREADLOCKED flag is
    119  * used to ensure that only one thread sleeps in kstrgetmsg for a given
    120  * sonode. This is needed to ensure atomic operation for things like
    121  * MSG_WAITALL.
    122  *
    123  * The so_fallback_rwlock is used to ensure that for sockets that can
    124  * fall back to TPI, the fallback is not initiated until all pending
    125  * operations have completed.
    126  *
    127  * Note that so_lock is sometimes held across calls that might go to sleep
    128  * (kmem_alloc and soallocproto*). This implies that no other lock in
    129  * the system should be held when calling into sockfs; from the system call
    130  * side or from strrput (in case of TPI based sockets). If locks are held
    131  * while calling into sockfs the system might hang when running low on memory.
    132  */
    133 struct sonode {
    134 	struct	vnode	*so_vnode;	/* vnode associated with this sonode */
    135 
    136 	sonodeops_t 	*so_ops;	/* operations vector for this sonode */
    137 	void		*so_priv;	/* sonode private data */
    138 
    139 	krwlock_t	so_fallback_rwlock;
    140 	kmutex_t	so_lock;	/* protects sonode fields */
    141 
    142 	kcondvar_t	so_state_cv;	/* synchronize state changes */
    143 	kcondvar_t	so_single_cv;	/* wait due to SOLOCKED */
    144 	kcondvar_t	so_read_cv;	/* wait due to SOREADLOCKED */
    145 
    146 	/* These fields are protected by so_lock */
    147 
    148 	uint_t		so_state;	/* internal state flags SS_*, below */
    149 	uint_t		so_mode;	/* characteristics on socket. SM_* */
    150 	ushort_t 	so_flag;	/* flags, see below */
    151 	int		so_count;	/* count of opened references */
    152 
    153 	sock_connid_t	so_proto_connid; /* protocol generation number */
    154 
    155 	ushort_t 	so_error;	/* error affecting connection */
    156 
    157 	struct sockparams *so_sockparams;	/* vnode or socket module */
    158 	/* Needed to recreate the same socket for accept */
    159 	short	so_family;
    160 	short	so_type;
    161 	short	so_protocol;
    162 	short	so_version;		/* From so_socket call */
    163 
    164 	/* Accept queue */
    165 	kmutex_t	so_acceptq_lock;	/* protects accept queue */
    166 	struct sonode	*so_acceptq_next;	/* acceptq list node */
    167 	struct sonode 	*so_acceptq_head;
    168 	struct sonode	**so_acceptq_tail;
    169 	unsigned int	so_acceptq_len;
    170 	unsigned int	so_backlog;		/* Listen backlog */
    171 	kcondvar_t	so_acceptq_cv;		/* wait for new conn. */
    172 
    173 	/* Options */
    174 	short	so_options;		/* From socket call, see socket.h */
    175 	struct linger	so_linger;	/* SO_LINGER value */
    176 #define	so_sndbuf	so_proto_props.sopp_txhiwat	/* SO_SNDBUF value */
    177 #define	so_sndlowat	so_proto_props.sopp_txlowat	/* tx low water mark */
    178 #define	so_rcvbuf	so_proto_props.sopp_rxhiwat	/* SO_RCVBUF value */
    179 #define	so_rcvlowat	so_proto_props.sopp_rxlowat	/* rx low water mark */
    180 #define	so_max_addr_len	so_proto_props.sopp_maxaddrlen
    181 #define	so_minpsz	so_proto_props.sopp_minpsz
    182 #define	so_maxpsz	so_proto_props.sopp_maxpsz
    183 
    184 	int	so_xpg_rcvbuf;		/* SO_RCVBUF value for XPG4 socket */
    185 	clock_t	so_sndtimeo;		/* send timeout */
    186 	clock_t	so_rcvtimeo;		/* recv timeout */
    187 
    188 	mblk_t	*so_oobmsg;		/* outofline oob data */
    189 	ssize_t	so_oobmark;		/* offset of the oob data */
    190 
    191 	pid_t	so_pgrp;		/* pgrp for signals */
    192 
    193 	cred_t		*so_peercred;	/* connected socket peer cred */
    194 	pid_t		so_cpid;	/* connected socket peer cached pid */
    195 	zoneid_t	so_zoneid;	/* opener's zoneid */
    196 
    197 	struct pollhead	so_poll_list;	/* common pollhead */
    198 	short		so_pollev;	/* events that should be generated */
    199 
    200 	/* Receive */
    201 	unsigned int	so_rcv_queued;	/* # bytes on both rcv lists */
    202 	mblk_t		*so_rcv_q_head;	/* processing/copyout rcv queue */
    203 	mblk_t		*so_rcv_q_last_head;
    204 	mblk_t		*so_rcv_head;	/* protocol prequeue */
    205 	mblk_t		*so_rcv_last_head;	/* last mblk in b_next chain */
    206 	kcondvar_t	so_rcv_cv;	/* wait for data */
    207 	uint_t		so_rcv_wanted;	/* # of bytes wanted by app */
    208 	timeout_id_t	so_rcv_timer_tid;
    209 
    210 #define	so_rcv_thresh	so_proto_props.sopp_rcvthresh
    211 #define	so_rcv_timer_interval so_proto_props.sopp_rcvtimer
    212 
    213 	kcondvar_t	so_snd_cv;	/* wait for snd buffers */
    214 	uint32_t
    215 		so_snd_qfull: 1,	/* Transmit full */
    216 		so_rcv_wakeup: 1,
    217 		so_snd_wakeup: 1,
    218 		so_not_str: 1,	/* B_TRUE if not streams based socket */
    219 		so_pad_to_bit_31: 28;
    220 
    221 	/* Communication channel with protocol */
    222 	sock_lower_handle_t	so_proto_handle;
    223 	sock_downcalls_t 	*so_downcalls;
    224 
    225 	struct sock_proto_props	so_proto_props; /* protocol settings */
    226 	boolean_t		so_flowctrld;	/* Flow controlled */
    227 	uint_t			so_copyflag;	/* Copy related flag */
    228 	kcondvar_t		so_copy_cv;	/* Copy cond variable */
    229 
    230 	/* kernel sockets */
    231 	ksocket_callbacks_t 	so_ksock_callbacks;
    232 	void			*so_ksock_cb_arg;	/* callback argument */
    233 	kcondvar_t		so_closing_cv;
    234 
    235 	/* != NULL for sodirect enabled socket */
    236 	struct sodirect_s	*so_direct;
    237 };
    238 
    239 #define	SO_HAVE_DATA(so)						\
    240 	/*								\
    241 	 * For the (tid == 0) case we must check so_rcv_{q_,}head	\
    242 	 * rather than (so_rcv_queued > 0), since the latter does not	\
    243 	 * take into account mblks with only control/name information.	\
    244 	 */								\
    245 	((so)->so_rcv_timer_tid == 0 && ((so)->so_rcv_head != NULL ||	\
    246 	(so)->so_rcv_q_head != NULL)) ||				\
    247 	((so)->so_state & SS_CANTRCVMORE)
    248 
    249 /*
    250  * Events handled by the protocol (in case sd_poll is set)
    251  */
    252 #define	SO_PROTO_POLLEV		(POLLIN|POLLRDNORM|POLLRDBAND)
    253 
    254 
    255 #endif /* _KERNEL || _KMEMUSER */
    256 
    257 /* flags */
    258 #define	SOMOD		0x0001		/* update socket modification time */
    259 #define	SOACC		0x0002		/* update socket access time */
    260 
    261 #define	SOLOCKED	0x0010		/* use to serialize open/closes */
    262 #define	SOREADLOCKED	0x0020		/* serialize kstrgetmsg calls */
    263 #define	SOCLONE		0x0040		/* child of clone driver */
    264 #define	SOASYNC_UNBIND	0x0080		/* wait for ACK of async unbind */
    265 
    266 #define	SOCK_IS_NONSTR(so)	((so)->so_not_str)
    267 
    268 /*
    269  * Socket state bits.
    270  */
    271 #define	SS_ISCONNECTED		0x00000001 /* socket connected to a peer */
    272 #define	SS_ISCONNECTING		0x00000002 /* in process, connecting to peer */
    273 #define	SS_ISDISCONNECTING	0x00000004 /* in process of disconnecting */
    274 #define	SS_CANTSENDMORE		0x00000008 /* can't send more data to peer */
    275 
    276 #define	SS_CANTRCVMORE		0x00000010 /* can't receive more data */
    277 #define	SS_ISBOUND		0x00000020 /* socket is bound */
    278 #define	SS_NDELAY		0x00000040 /* FNDELAY non-blocking */
    279 #define	SS_NONBLOCK		0x00000080 /* O_NONBLOCK non-blocking */
    280 
    281 #define	SS_ASYNC		0x00000100 /* async i/o notify */
    282 #define	SS_ACCEPTCONN		0x00000200 /* listen done */
    283 /*	unused			0x00000400 */	/* was SS_HASCONNIND */
    284 #define	SS_SAVEDEOR		0x00000800 /* Saved MSG_EOR rcv side state */
    285 
    286 #define	SS_RCVATMARK		0x00001000 /* at mark on input */
    287 #define	SS_OOBPEND		0x00002000 /* OOB pending or present - poll */
    288 #define	SS_HAVEOOBDATA		0x00004000 /* OOB data present */
    289 #define	SS_HADOOBDATA		0x00008000 /* OOB data consumed */
    290 #define	SS_CLOSING		0x00010000 /* in process of closing */
    291 
    292 /*	unused			0x00020000 */	/* was SS_FADDR_NOXLATE */
    293 /*	unused			0x00040000 */	/* was SS_HASDATA */
    294 /*	unused 			0x00080000 */	/* was SS_DONEREAD */
    295 /*	unused 			0x00100000 */	/* was SS_MOREDATA */
    296 /*	unused 			0x00200000 */	/* was SS_DIRECT */
    297 
    298 #define	SS_SODIRECT		0x00400000 /* transport supports sodirect */
    299 
    300 #define	SS_SENTLASTREADSIG	0x01000000 /* last rx signal has been sent */
    301 #define	SS_SENTLASTWRITESIG	0x02000000 /* last tx signal has been sent */
    302 
    303 #define	SS_FALLBACK_DRAIN	0x20000000 /* data was/is being drained */
    304 #define	SS_FALLBACK_PENDING	0x40000000 /* fallback is pending */
    305 #define	SS_FALLBACK_COMP	0x80000000 /* fallback has completed */
    306 
    307 
    308 /* Set of states when the socket can't be rebound */
    309 #define	SS_CANTREBIND	(SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING|\
    310 			    SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ACCEPTCONN)
    311 
    312 /*
    313  * Sockets that can fall back to TPI must ensure that fall back is not
    314  * initiated while a thread is using a socket.
    315  */
    316 #define	SO_BLOCK_FALLBACK(so, fn) {			\
    317 	ASSERT(MUTEX_NOT_HELD(&(so)->so_lock));		\
    318 	rw_enter(&(so)->so_fallback_rwlock, RW_READER);	\
    319 	if ((so)->so_state & SS_FALLBACK_COMP) {	\
    320 		rw_exit(&(so)->so_fallback_rwlock);	\
    321 		return (fn);				\
    322 	}						\
    323 }
    324 
    325 #define	SO_UNBLOCK_FALLBACK(so)	{			\
    326 	rw_exit(&(so)->so_fallback_rwlock);		\
    327 }
    328 
    329 /* Poll events */
    330 #define	SO_POLLEV_IN		0x1	/* POLLIN wakeup needed */
    331 #define	SO_POLLEV_ALWAYS	0x2	/* wakeups */
    332 
    333 /*
    334  * Characteristics of sockets. Not changed after the socket is created.
    335  */
    336 #define	SM_PRIV			0x001	/* privileged for broadcast, raw... */
    337 #define	SM_ATOMIC		0x002	/* atomic data transmission */
    338 #define	SM_ADDR			0x004	/* addresses given with messages */
    339 #define	SM_CONNREQUIRED		0x008	/* connection required by protocol */
    340 
    341 #define	SM_FDPASSING		0x010	/* passes file descriptors */
    342 #define	SM_EXDATA		0x020	/* Can handle T_EXDATA_REQ */
    343 #define	SM_OPTDATA		0x040	/* Can handle T_OPTDATA_REQ */
    344 #define	SM_BYTESTREAM		0x080	/* Byte stream - can use M_DATA */
    345 
    346 #define	SM_ACCEPTOR_ID		0x100	/* so_acceptor_id is valid */
    347 
    348 #define	SM_KERNEL		0x200	/* kernel socket */
    349 
    350 /* The modes below are only for non-streams sockets */
    351 #define	SM_ACCEPTSUPP		0x400	/* can handle accept() */
    352 #define	SM_SENDFILESUPP		0x800	/* Private: proto supp sendfile  */
    353 
    354 /*
    355  * Socket versions. Used by the socket library when calling _so_socket().
    356  */
    357 #define	SOV_STREAM	0	/* Not a socket - just a stream */
    358 #define	SOV_DEFAULT	1	/* Select based on so_default_version */
    359 #define	SOV_SOCKSTREAM	2	/* Socket plus streams operations */
    360 #define	SOV_SOCKBSD	3	/* Socket with no streams operations */
    361 #define	SOV_XPG4_2	4	/* Xnet socket */
    362 
    363 #if defined(_KERNEL) || defined(_KMEMUSER)
    364 
    365 /*
    366  * sonode create and destroy functions.
    367  */
    368 typedef struct sonode *(*so_create_func_t)(struct sockparams *,
    369     int, int, int, int, int, int *, cred_t *);
    370 typedef void (*so_destroy_func_t)(struct sonode *);
    371 
    372 /* STREAM device information */
    373 typedef struct sdev_info {
    374 	char	*sd_devpath;
    375 	int	sd_devpathlen; /* Is 0 if sp_devpath is a static string */
    376 	vnode_t	*sd_vnode;
    377 } sdev_info_t;
    378 
    379 #define	SOCKMOD_VERSION		1
    380 /* name of the TPI pseudo socket module */
    381 #define	SOTPI_SMOD_NAME		"socktpi"
    382 
    383 typedef struct __smod_priv_s {
    384 	so_create_func_t	smodp_sock_create_func;
    385 	so_destroy_func_t	smodp_sock_destroy_func;
    386 	so_proto_fallback_func_t smodp_proto_fallback_func;
    387 } __smod_priv_t;
    388 
    389 /*
    390  * Socket module register information
    391  */
    392 typedef struct smod_reg_s {
    393 	int		smod_version;
    394 	char		*smod_name;
    395 	size_t		smod_uc_version;
    396 	size_t		smod_dc_version;
    397 	so_proto_create_func_t	smod_proto_create_func;
    398 
    399 	/* __smod_priv_data must be NULL */
    400 	__smod_priv_t	*__smod_priv;
    401 } smod_reg_t;
    402 
    403 /*
    404  * Socket module information
    405  */
    406 typedef struct smod_info {
    407 	int		smod_version;
    408 	char		*smod_name;
    409 	uint_t		smod_refcnt;		/* # of entries */
    410 	size_t		smod_uc_version; 	/* upcall version */
    411 	size_t		smod_dc_version;	/* down call version */
    412 	so_proto_create_func_t	smod_proto_create_func;
    413 	so_proto_fallback_func_t smod_proto_fallback_func;
    414 	so_create_func_t	smod_sock_create_func;
    415 	so_destroy_func_t	smod_sock_destroy_func;
    416 	list_node_t	smod_node;
    417 } smod_info_t;
    418 
    419 typedef struct sockparams_stats {
    420 	kstat_named_t	sps_nfallback;	/* # of fallbacks to TPI */
    421 	kstat_named_t	sps_nactive;	/* # of active sockets */
    422 	kstat_named_t	sps_ncreate;	/* total # of created sockets */
    423 } sockparams_stats_t;
    424 
    425 /*
    426  * sockparams
    427  *
    428  * Used for mapping family/type/protocol to module
    429  */
    430 struct sockparams {
    431 	/*
    432 	 * The family, type, protocol, sdev_info and smod_info are
    433 	 * set when the entry is created, and they will never change
    434 	 * thereafter.
    435 	 */
    436 	int		sp_family;
    437 	int		sp_type;
    438 	int		sp_protocol;
    439 
    440 	sdev_info_t	sp_sdev_info;	/* STREAM device */
    441 	char		*sp_smod_name;	/* socket module name */
    442 	smod_info_t	*sp_smod_info;	/* socket module */
    443 
    444 	kmutex_t	sp_lock;	/* lock for refcnt */
    445 	uint64_t	sp_refcnt;	/* entry reference count */
    446 	sockparams_stats_t sp_stats;
    447 	kstat_t		*sp_kstat;
    448 
    449 	/*
    450 	 * The entries below are only modified while holding
    451 	 * splist_lock as a writer.
    452 	 */
    453 	int		sp_flags;	/* see below */
    454 	list_node_t	sp_node;
    455 };
    456 
    457 
    458 /*
    459  * sockparams flags
    460  */
    461 #define	SOCKPARAMS_EPHEMERAL	0x1	/* temp. entry, not on global list */
    462 
    463 extern void sockparams_init(void);
    464 extern struct sockparams *sockparams_hold_ephemeral_bydev(int, int, int,
    465     const char *, int, int *);
    466 extern struct sockparams *sockparams_hold_ephemeral_bymod(int, int, int,
    467     const char *, int, int *);
    468 extern void sockparams_ephemeral_drop_last_ref(struct sockparams *);
    469 
    470 extern void smod_init(void);
    471 extern void smod_add(smod_info_t *);
    472 extern int smod_register(const smod_reg_t *);
    473 extern int smod_unregister(const char *);
    474 extern smod_info_t *smod_lookup_byname(const char *);
    475 
    476 #define	SOCKPARAMS_HAS_DEVICE(sp)					\
    477 	((sp)->sp_sdev_info.sd_devpath != NULL)
    478 
    479 /* Increase the smod_info_t reference count */
    480 #define	SMOD_INC_REF(smodp) {						\
    481 	ASSERT((smodp) != NULL);					\
    482 	DTRACE_PROBE1(smodinfo__inc__ref, struct smod_info *, (smodp));	\
    483 	atomic_inc_uint(&(smodp)->smod_refcnt);				\
    484 }
    485 
    486 /*
    487  * Decreace the socket module entry reference count.
    488  * When no one mapping to the entry, we try to unload the module from the
    489  * kernel. If the module can't unload, just leave the module entry with
    490  * a zero refcnt.
    491  */
    492 #define	SMOD_DEC_REF(sp, smodp) {					\
    493 	ASSERT((smodp) != NULL);					\
    494 	ASSERT((smodp)->smod_refcnt != 0);				\
    495 	atomic_dec_uint(&(smodp)->smod_refcnt);				\
    496 	/*								\
    497 	 * No need to atomically check the return value because the	\
    498 	 * socket module framework will verify that no one is using	\
    499 	 * the module before unloading. Worst thing that can happen	\
    500 	 * here is multiple calls to mod_remove_by_name(), which is OK.	\
    501 	 */								\
    502 	if ((smodp)->smod_refcnt == 0)					\
    503 		(void) mod_remove_by_name((sp)->sp_smod_name);		\
    504 }
    505 
    506 /* Increase the reference count */
    507 #define	SOCKPARAMS_INC_REF(sp) {					\
    508 	ASSERT((sp) != NULL);						\
    509 	DTRACE_PROBE1(sockparams__inc__ref, struct sockparams *, (sp));	\
    510 	mutex_enter(&(sp)->sp_lock);					\
    511 	(sp)->sp_refcnt++;						\
    512 	ASSERT((sp)->sp_refcnt != 0);					\
    513 	mutex_exit(&(sp)->sp_lock);					\
    514 }
    515 
    516 /*
    517  * Decrease the reference count.
    518  *
    519  * If the sockparams is ephemeral, then the thread dropping the last ref
    520  * count will destroy the entry.
    521  */
    522 #define	SOCKPARAMS_DEC_REF(sp) {					\
    523 	ASSERT((sp) != NULL);						\
    524 	DTRACE_PROBE1(sockparams__dec__ref, struct sockparams *, (sp));	\
    525 	mutex_enter(&(sp)->sp_lock);					\
    526 	ASSERT((sp)->sp_refcnt > 0);					\
    527 	if ((sp)->sp_refcnt == 1) {					\
    528 		if ((sp)->sp_flags & SOCKPARAMS_EPHEMERAL) {		\
    529 			mutex_exit(&(sp)->sp_lock);			\
    530 			sockparams_ephemeral_drop_last_ref((sp));	\
    531 		} else {						\
    532 			(sp)->sp_refcnt--;				\
    533 			if ((sp)->sp_smod_info != NULL)			\
    534 				SMOD_DEC_REF(sp, (sp)->sp_smod_info);	\
    535 			(sp)->sp_smod_info = NULL;			\
    536 			mutex_exit(&(sp)->sp_lock);			\
    537 		}							\
    538 	} else {							\
    539 		(sp)->sp_refcnt--;					\
    540 		mutex_exit(&(sp)->sp_lock);				\
    541 	}								\
    542 }
    543 
    544 /*
    545  * Used to traverse the list of AF_UNIX sockets to construct the kstat
    546  * for netstat(1m).
    547  */
    548 struct socklist {
    549 	kmutex_t	sl_lock;
    550 	struct sonode	*sl_list;
    551 };
    552 
    553 extern struct socklist socklist;
    554 /*
    555  * ss_full_waits is the number of times the reader thread
    556  * waits when the queue is full and ss_empty_waits is the number
    557  * of times the consumer thread waits when the queue is empty.
    558  * No locks for these as they are just indicators of whether
    559  * disk or network or both is slow or fast.
    560  */
    561 struct sendfile_stats {
    562 	uint32_t ss_file_cached;
    563 	uint32_t ss_file_not_cached;
    564 	uint32_t ss_full_waits;
    565 	uint32_t ss_empty_waits;
    566 	uint32_t ss_file_segmap;
    567 };
    568 
    569 /*
    570  * A single sendfile request is represented by snf_req.
    571  */
    572 typedef struct snf_req {
    573 	struct snf_req	*sr_next;
    574 	mblk_t		*sr_mp_head;
    575 	mblk_t		*sr_mp_tail;
    576 	kmutex_t	sr_lock;
    577 	kcondvar_t	sr_cv;
    578 	uint_t		sr_qlen;
    579 	int		sr_hiwat;
    580 	int		sr_lowat;
    581 	int		sr_operation;
    582 	struct vnode	*sr_vp;
    583 	file_t 		*sr_fp;
    584 	ssize_t		sr_maxpsz;
    585 	u_offset_t	sr_file_off;
    586 	u_offset_t	sr_file_size;
    587 #define	SR_READ_DONE	0x80000000
    588 	int		sr_read_error;
    589 	int		sr_write_error;
    590 } snf_req_t;
    591 
    592 /* A queue of sendfile requests */
    593 struct sendfile_queue {
    594 	snf_req_t	*snfq_req_head;
    595 	snf_req_t	*snfq_req_tail;
    596 	kmutex_t	snfq_lock;
    597 	kcondvar_t	snfq_cv;
    598 	int		snfq_svc_threads;	/* # of service threads */
    599 	int		snfq_idle_cnt;		/* # of idling threads */
    600 	int		snfq_max_threads;
    601 	int		snfq_req_cnt;		/* Number of requests */
    602 };
    603 
    604 #define	READ_OP			1
    605 #define	SNFQ_TIMEOUT		(60 * 5 * hz)	/* 5 minutes */
    606 
    607 /* Socket network operations switch */
    608 struct sonodeops {
    609 	int 	(*sop_init)(struct sonode *, struct sonode *, cred_t *,
    610 		    int);
    611 	int	(*sop_accept)(struct sonode *, int, cred_t *, struct sonode **);
    612 	int	(*sop_bind)(struct sonode *, struct sockaddr *, socklen_t,
    613 		    int, cred_t *);
    614 	int	(*sop_listen)(struct sonode *, int, cred_t *);
    615 	int	(*sop_connect)(struct sonode *, const struct sockaddr *,
    616 		    socklen_t, int, int, cred_t *);
    617 	int	(*sop_recvmsg)(struct sonode *, struct msghdr *,
    618 		    struct uio *, cred_t *);
    619 	int	(*sop_sendmsg)(struct sonode *, struct msghdr *,
    620 		    struct uio *, cred_t *);
    621 	int	(*sop_sendmblk)(struct sonode *, struct msghdr *, int,
    622 		    cred_t *, mblk_t **);
    623 	int	(*sop_getpeername)(struct sonode *, struct sockaddr *,
    624 		    socklen_t *, boolean_t, cred_t *);
    625 	int	(*sop_getsockname)(struct sonode *, struct sockaddr *,
    626 		    socklen_t *, cred_t *);
    627 	int	(*sop_shutdown)(struct sonode *, int, cred_t *);
    628 	int	(*sop_getsockopt)(struct sonode *, int, int, void *,
    629 		    socklen_t *, int, cred_t *);
    630 	int 	(*sop_setsockopt)(struct sonode *, int, int, const void *,
    631 		    socklen_t, cred_t *);
    632 	int 	(*sop_ioctl)(struct sonode *, int, intptr_t, int,
    633 		    cred_t *, int32_t *);
    634 	int 	(*sop_poll)(struct sonode *, short, int, short *,
    635 		    struct pollhead **);
    636 	int 	(*sop_close)(struct sonode *, int, cred_t *);
    637 };
    638 
    639 #define	SOP_INIT(so, flag, cr, flags)	\
    640 	((so)->so_ops->sop_init((so), (flag), (cr), (flags)))
    641 #define	SOP_ACCEPT(so, fflag, cr, nsop)	\
    642 	((so)->so_ops->sop_accept((so), (fflag), (cr), (nsop)))
    643 #define	SOP_BIND(so, name, namelen, flags, cr)	\
    644 	((so)->so_ops->sop_bind((so), (name), (namelen), (flags), (cr)))
    645 #define	SOP_LISTEN(so, backlog, cr)	\
    646 	((so)->so_ops->sop_listen((so), (backlog), (cr)))
    647 #define	SOP_CONNECT(so, name, namelen, fflag, flags, cr)	\
    648 	((so)->so_ops->sop_connect((so), (name), (namelen), (fflag), (flags), \
    649 	(cr)))
    650 #define	SOP_RECVMSG(so, msg, uiop, cr)	\
    651 	((so)->so_ops->sop_recvmsg((so), (msg), (uiop), (cr)))
    652 #define	SOP_SENDMSG(so, msg, uiop, cr)	\
    653 	((so)->so_ops->sop_sendmsg((so), (msg), (uiop), (cr)))
    654 #define	SOP_SENDMBLK(so, msg, size, cr, mpp)	\
    655 	((so)->so_ops->sop_sendmblk((so), (msg), (size), (cr), (mpp)))
    656 #define	SOP_GETPEERNAME(so, addr, addrlen, accept, cr)	\
    657 	((so)->so_ops->sop_getpeername((so), (addr), (addrlen), (accept), (cr)))
    658 #define	SOP_GETSOCKNAME(so, addr, addrlen, cr)	\
    659 	((so)->so_ops->sop_getsockname((so), (addr), (addrlen), (cr)))
    660 #define	SOP_SHUTDOWN(so, how, cr)	\
    661 	((so)->so_ops->sop_shutdown((so), (how), (cr)))
    662 #define	SOP_GETSOCKOPT(so, level, optionname, optval, optlenp, flags, cr) \
    663 	((so)->so_ops->sop_getsockopt((so), (level), (optionname),	\
    664 	    (optval), (optlenp), (flags), (cr)))
    665 #define	SOP_SETSOCKOPT(so, level, optionname, optval, optlen, cr)	\
    666 	((so)->so_ops->sop_setsockopt((so), (level), (optionname),	\
    667 	    (optval), (optlen), (cr)))
    668 #define	SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)	\
    669 	((so)->so_ops->sop_ioctl((so), (cmd), (arg), (mode), (cr), (rvalp)))
    670 #define	SOP_POLL(so, events, anyyet, reventsp, phpp) \
    671 	((so)->so_ops->sop_poll((so), (events), (anyyet), (reventsp), (phpp)))
    672 #define	SOP_CLOSE(so, flag, cr)	\
    673 	((so)->so_ops->sop_close((so), (flag), (cr)))
    674 
    675 #endif /* defined(_KERNEL) || defined(_KMEMUSER) */
    676 
    677 #ifdef _KERNEL
    678 
    679 #define	ISALIGNED_cmsghdr(addr) \
    680 		(((uintptr_t)(addr) & (_CMSG_HDR_ALIGNMENT - 1)) == 0)
    681 
    682 #define	ROUNDUP_cmsglen(len) \
    683 	(((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
    684 
    685 #define	IS_NON_STREAM_SOCK(vp) \
    686 	((vp)->v_type == VSOCK && (vp)->v_stream == NULL)
    687 /*
    688  * Macros that operate on struct cmsghdr.
    689  * Used in parsing msg_control.
    690  * The CMSG_VALID macro does not assume that the last option buffer is padded.
    691  */
    692 #define	CMSG_NEXT(cmsg)						\
    693 	(struct cmsghdr *)((uintptr_t)(cmsg) +			\
    694 	    ROUNDUP_cmsglen((cmsg)->cmsg_len))
    695 #define	CMSG_CONTENT(cmsg)	(&((cmsg)[1]))
    696 #define	CMSG_CONTENTLEN(cmsg)	((cmsg)->cmsg_len - sizeof (struct cmsghdr))
    697 #define	CMSG_VALID(cmsg, start, end)					\
    698 	(ISALIGNED_cmsghdr(cmsg) &&					\
    699 	((uintptr_t)(cmsg) >= (uintptr_t)(start)) &&			\
    700 	((uintptr_t)(cmsg) < (uintptr_t)(end)) &&			\
    701 	((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) &&	\
    702 	((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
    703 
    704 /*
    705  * Maximum size of any argument that is copied in (addresses, options,
    706  * access rights). MUST be at least MAXPATHLEN + 3.
    707  * BSD and SunOS 4.X limited this to MLEN or MCLBYTES.
    708  */
    709 #define	SO_MAXARGSIZE	8192
    710 
    711 /*
    712  * Convert between vnode and sonode
    713  */
    714 #define	VTOSO(vp)	((struct sonode *)((vp)->v_data))
    715 #define	SOTOV(sp)	((sp)->so_vnode)
    716 
    717 /*
    718  * Internal flags for sobind()
    719  */
    720 #define	_SOBIND_REBIND		0x01	/* Bind to existing local address */
    721 #define	_SOBIND_UNSPEC		0x02	/* Bind to unspecified address */
    722 #define	_SOBIND_LOCK_HELD	0x04	/* so_excl_lock held by caller */
    723 #define	_SOBIND_NOXLATE		0x08	/* No addr translation for AF_UNIX */
    724 #define	_SOBIND_XPG4_2		0x10	/* xpg4.2 semantics */
    725 #define	_SOBIND_SOCKBSD		0x20	/* BSD semantics */
    726 #define	_SOBIND_LISTEN		0x40	/* Make into SS_ACCEPTCONN */
    727 #define	_SOBIND_SOCKETPAIR	0x80	/* Internal flag for so_socketpair() */
    728 					/* to enable listen with backlog = 1 */
    729 
    730 /*
    731  * Internal flags for sounbind()
    732  */
    733 #define	_SOUNBIND_REBIND	0x01	/* Don't clear fields - will rebind */
    734 
    735 /*
    736  * Internal flags for soconnect()
    737  */
    738 #define	_SOCONNECT_NOXLATE	0x01	/* No addr translation for AF_UNIX */
    739 #define	_SOCONNECT_DID_BIND	0x02	/* Unbind when connect fails */
    740 #define	_SOCONNECT_XPG4_2	0x04	/* xpg4.2 semantics */
    741 
    742 /*
    743  * Internal flags for sodisconnect()
    744  */
    745 #define	_SODISCONNECT_LOCK_HELD	0x01	/* so_excl_lock held by caller */
    746 
    747 /*
    748  * Internal flags for sotpi_getsockopt().
    749  */
    750 #define	_SOGETSOCKOPT_XPG4_2	0x01	/* xpg4.2 semantics */
    751 
    752 /*
    753  * Internal flags for soallocproto*()
    754  */
    755 #define	_ALLOC_NOSLEEP		0	/* Don't sleep for memory */
    756 #define	_ALLOC_INTR		1	/* Sleep until interrupt */
    757 #define	_ALLOC_SLEEP		2	/* Sleep forever */
    758 
    759 /*
    760  * Internal structure for handling AF_UNIX file descriptor passing
    761  */
    762 struct fdbuf {
    763 	int		fd_size;	/* In bytes, for kmem_free */
    764 	int		fd_numfd;	/* Number of elements below */
    765 	char		*fd_ebuf;	/* Extra buffer to free  */
    766 	int		fd_ebuflen;
    767 	frtn_t		fd_frtn;
    768 	struct file	*fd_fds[1];	/* One or more */
    769 };
    770 #define	FDBUF_HDRSIZE	(sizeof (struct fdbuf) - sizeof (struct file *))
    771 
    772 /*
    773  * Variable that can be patched to set what version of socket socket()
    774  * will create.
    775  */
    776 extern int so_default_version;
    777 
    778 #ifdef DEBUG
    779 /* Turn on extra testing capabilities */
    780 #define	SOCK_TEST
    781 #endif /* DEBUG */
    782 
    783 #ifdef DEBUG
    784 char	*pr_state(uint_t, uint_t);
    785 char	*pr_addr(int, struct sockaddr *, t_uscalar_t);
    786 int	so_verify_oobstate(struct sonode *);
    787 #endif /* DEBUG */
    788 
    789 /*
    790  * DEBUG macros
    791  */
    792 #if defined(DEBUG)
    793 #define	SOCK_DEBUG
    794 
    795 extern int sockdebug;
    796 extern int sockprinterr;
    797 
    798 #define	eprint(args)	printf args
    799 #define	eprintso(so, args) \
    800 { if (sockprinterr && ((so)->so_options & SO_DEBUG)) printf args; }
    801 #define	eprintline(error)					\
    802 {								\
    803 	if (error != EINTR && (sockprinterr || sockdebug > 0))	\
    804 		printf("socket error %d: line %d file %s\n",	\
    805 			(error), __LINE__, __FILE__);		\
    806 }
    807 
    808 #define	eprintsoline(so, error)					\
    809 { if (sockprinterr && ((so)->so_options & SO_DEBUG))		\
    810 	printf("socket(%p) error %d: line %d file %s\n",	\
    811 		(void *)(so), (error), __LINE__, __FILE__);	\
    812 }
    813 #define	dprint(level, args)	{ if (sockdebug > (level)) printf args; }
    814 #define	dprintso(so, level, args) \
    815 { if (sockdebug > (level) && ((so)->so_options & SO_DEBUG)) printf args; }
    816 
    817 #else /* define(DEBUG) */
    818 
    819 #define	eprint(args)		{}
    820 #define	eprintso(so, args)	{}
    821 #define	eprintline(error)	{}
    822 #define	eprintsoline(so, error)	{}
    823 #define	dprint(level, args)	{}
    824 #define	dprintso(so, level, args) {}
    825 
    826 #endif /* defined(DEBUG) */
    827 
    828 extern struct vfsops			sock_vfsops;
    829 extern struct vnodeops			*socket_vnodeops;
    830 extern const struct fs_operation_def	socket_vnodeops_template[];
    831 
    832 extern dev_t				sockdev;
    833 
    834 /*
    835  * sockfs functions
    836  */
    837 extern int	sock_getmsg(vnode_t *, struct strbuf *, struct strbuf *,
    838 			uchar_t *, int *, int, rval_t *);
    839 extern int	sock_putmsg(vnode_t *, struct strbuf *, struct strbuf *,
    840 			uchar_t, int, int);
    841 extern int	sogetvp(char *, vnode_t **, int);
    842 extern int	sockinit(int, char *);
    843 extern int	soconfig(int, int, int,	char *, int, char *);
    844 extern int	solookup(int, int, int, struct sockparams **);
    845 extern void	so_lock_single(struct sonode *);
    846 extern void	so_unlock_single(struct sonode *, int);
    847 extern int	so_lock_read(struct sonode *, int);
    848 extern int	so_lock_read_intr(struct sonode *, int);
    849 extern void	so_unlock_read(struct sonode *);
    850 extern void	*sogetoff(mblk_t *, t_uscalar_t, t_uscalar_t, uint_t);
    851 extern void	so_getopt_srcaddr(void *, t_uscalar_t,
    852 			void **, t_uscalar_t *);
    853 extern int	so_getopt_unix_close(void *, t_uscalar_t);
    854 extern void	fdbuf_free(struct fdbuf *);
    855 extern mblk_t	*fdbuf_allocmsg(int, struct fdbuf *);
    856 extern int	fdbuf_create(void *, int, struct fdbuf **);
    857 extern void	so_closefds(void *, t_uscalar_t, int, int);
    858 extern int	so_getfdopt(void *, t_uscalar_t, int, void **, int *);
    859 t_uscalar_t	so_optlen(void *, t_uscalar_t, int);
    860 extern void	so_cmsg2opt(void *, t_uscalar_t, int, mblk_t *);
    861 extern t_uscalar_t
    862 		so_cmsglen(mblk_t *, void *, t_uscalar_t, int);
    863 extern int	so_opt2cmsg(mblk_t *, void *, t_uscalar_t, int,
    864 			void *, t_uscalar_t);
    865 extern void	soisconnecting(struct sonode *);
    866 extern void	soisconnected(struct sonode *);
    867 extern void	soisdisconnected(struct sonode *, int);
    868 extern void	socantsendmore(struct sonode *);
    869 extern void	socantrcvmore(struct sonode *);
    870 extern void	soseterror(struct sonode *, int);
    871 extern int	sogeterr(struct sonode *, boolean_t);
    872 extern int	sowaitconnected(struct sonode *, int, int);
    873 
    874 extern ssize_t	soreadfile(file_t *, uchar_t *, u_offset_t, int *, size_t);
    875 extern void	*sock_kstat_init(zoneid_t);
    876 extern void	sock_kstat_fini(zoneid_t, void *);
    877 extern struct sonode *getsonode(int, int *, file_t **);
    878 /*
    879  * Function wrappers (mostly around the sonode switch) for
    880  * backward compatibility.
    881  */
    882 extern int	soaccept(struct sonode *, int, struct sonode **);
    883 extern int	sobind(struct sonode *, struct sockaddr *, socklen_t,
    884 		    int, int);
    885 extern int	solisten(struct sonode *, int);
    886 extern int	soconnect(struct sonode *, const struct sockaddr *, socklen_t,
    887 		    int, int);
    888 extern int	sorecvmsg(struct sonode *, struct nmsghdr *, struct uio *);
    889 extern int	sosendmsg(struct sonode *, struct nmsghdr *, struct uio *);
    890 extern int	soshutdown(struct sonode *, int);
    891 extern int	sogetsockopt(struct sonode *, int, int, void *, socklen_t *,
    892 		    int);
    893 extern int	sosetsockopt(struct sonode *, int, int, const void *,
    894 		    t_uscalar_t);
    895 
    896 extern struct sonode	*socreate(struct sockparams *, int, int, int, int,
    897 			    int *);
    898 
    899 extern int	so_copyin(const void *, void *, size_t, int);
    900 extern int	so_copyout(const void *, void *, size_t, int);
    901 
    902 #endif
    903 
    904 /*
    905  * Internal structure for obtaining sonode information from the socklist.
    906  * These types match those corresponding in the sonode structure.
    907  * This is not a published interface, and may change at any time.
    908  */
    909 struct sockinfo {
    910 	uint_t		si_size;		/* real length of this struct */
    911 	short		si_family;
    912 	short		si_type;
    913 	ushort_t	si_flag;
    914 	uint_t		si_state;
    915 	uint_t		si_ux_laddr_sou_magic;
    916 	uint_t		si_ux_faddr_sou_magic;
    917 	t_scalar_t	si_serv_type;
    918 	t_uscalar_t	si_laddr_soa_len;
    919 	t_uscalar_t	si_faddr_soa_len;
    920 	uint16_t	si_laddr_family;
    921 	uint16_t	si_faddr_family;
    922 	char		si_laddr_sun_path[MAXPATHLEN + 1]; /* NULL terminated */
    923 	char		si_faddr_sun_path[MAXPATHLEN + 1];
    924 	boolean_t	si_faddr_noxlate;
    925 	zoneid_t	si_szoneid;
    926 };
    927 
    928 #define	SOCKMOD_PATH	"socketmod"	/* dir where sockmods are stored */
    929 
    930 #ifdef	__cplusplus
    931 }
    932 #endif
    933 
    934 #endif	/* _SYS_SOCKETVAR_H */
    935