Home | History | Annotate | Download | only in sys
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     22 /*	  All Rights Reserved  	*/
     23 
     24 
     25 /*
     26  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     27  * Use is subject to license terms.
     28  */
     29 
     30 #ifndef _SYS_STRSUBR_H
     31 #define	_SYS_STRSUBR_H
     32 
     33 /*
     34  * WARNING:
     35  * Everything in this file is private, belonging to the
     36  * STREAMS subsystem.  The only guarantee made about the
     37  * contents of this file is that if you include it, your
     38  * code will not port to the next release.
     39  */
     40 #include <sys/stream.h>
     41 #include <sys/stropts.h>
     42 #include <sys/kstat.h>
     43 #include <sys/uio.h>
     44 #include <sys/proc.h>
     45 #include <sys/netstack.h>
     46 #include <sys/modhash.h>
     47 
     48 #ifdef	__cplusplus
     49 extern "C" {
     50 #endif
     51 
     52 /*
     53  * In general, the STREAMS locks are disjoint; they are only held
     54  * locally, and not simultaneously by a thread.  However, module
     55  * code, including at the stream head, requires some locks to be
     56  * acquired in order for its safety.
     57  *	1. Stream level claim.  This prevents the value of q_next
     58  *		from changing while module code is executing.
     59  *	2. Queue level claim.  This prevents the value of q_ptr
     60  *		from changing while put or service code is executing.
     61  *		In addition, it provides for queue single-threading
     62  *		for QPAIR and PERQ MT-safe modules.
     63  *	3. Stream head lock.  May be held by the stream head module
     64  *		to implement a read/write/open/close monitor.
     65  *	   Note: that the only types of twisted stream supported are
     66  *	   the pipe and transports which have read and write service
     67  *	   procedures on both sides of the twist.
     68  *	4. Queue lock.  May be acquired by utility routines on
     69  *		behalf of a module.
     70  */
     71 
     72 /*
     73  * In general, sd_lock protects the consistency of the stdata
     74  * structure.  Additionally, it is used with sd_monitor
     75  * to implement an open/close monitor.  In particular, it protects
     76  * the following fields:
     77  *	sd_iocblk
     78  *	sd_flag
     79  *	sd_copyflag
     80  *	sd_iocid
     81  *	sd_iocwait
     82  *	sd_sidp
     83  *	sd_pgidp
     84  *	sd_wroff
     85  *	sd_tail
     86  *	sd_rerror
     87  *	sd_werror
     88  *	sd_pushcnt
     89  *	sd_sigflags
     90  *	sd_siglist
     91  *	sd_pollist
     92  *	sd_mark
     93  *	sd_closetime
     94  *	sd_wakeq
     95  *	sd_maxblk
     96  *
     97  * The following fields are modified only by the allocator, which
     98  * has exclusive access to them at that time:
     99  *	sd_wrq
    100  *	sd_strtab
    101  *
    102  * The following field is protected by the overlying file system
    103  * code, guaranteeing single-threading of opens:
    104  *	sd_vnode
    105  *
    106  * Stream-level locks should be acquired before any queue-level locks
    107  *	are acquired.
    108  *
    109  * The stream head write queue lock(sd_wrq) is used to protect the
    110  * fields qn_maxpsz and qn_minpsz because freezestr() which is
    111  * necessary for strqset() only gets the queue lock.
    112  */
    113 
    114 /*
    115  * Function types for the parameterized stream head.
    116  * The msgfunc_t takes the parameters:
    117  * 	msgfunc(vnode_t *vp, mblk_t *mp, strwakeup_t *wakeups,
    118  *		strsigset_t *firstmsgsigs, strsigset_t *allmsgsigs,
    119  *		strpollset_t *pollwakeups);
    120  * It returns an optional message to be processed by the stream head.
    121  *
    122  * The parameters for errfunc_t are:
    123  *	errfunc(vnode *vp, int ispeek, int *clearerr);
    124  * It returns an errno and zero if there was no pending error.
    125  */
    126 typedef uint_t	strwakeup_t;
    127 typedef uint_t	strsigset_t;
    128 typedef short	strpollset_t;
    129 typedef uintptr_t callbparams_id_t;
    130 typedef	mblk_t	*(*msgfunc_t)(vnode_t *, mblk_t *, strwakeup_t *,
    131 			strsigset_t *, strsigset_t *, strpollset_t *);
    132 typedef int 	(*errfunc_t)(vnode_t *, int, int *);
    133 
    134 /*
    135  * Per stream sd_lock in putnext may be replaced by per cpu stream_putlocks
    136  * each living in a separate cache line. putnext/canputnext grabs only one of
    137  * stream_putlocks while strlock() (called on behalf of insertq()/removeq())
    138  * acquires all stream_putlocks. Normally stream_putlocks are only employed
    139  * for highly contended streams that have SQ_CIPUT queues in the critical path
    140  * (e.g. NFS/UDP stream).
    141  *
    142  * stream_putlocks are dynamically assigned to stdata structure through
    143  * sd_ciputctrl pointer possibly when a stream is already in use. Since
    144  * strlock() uses stream_putlocks only under sd_lock acquiring sd_lock when
    145  * assigning stream_putlocks to the stream ensures synchronization with
    146  * strlock().
    147  *
    148  * For lock ordering purposes stream_putlocks are treated as the extension of
    149  * sd_lock and are always grabbed right after grabbing sd_lock and released
    150  * right before releasing sd_lock except putnext/canputnext where only one of
    151  * stream_putlocks locks is used and where it is the first lock to grab.
    152  */
    153 
    154 typedef struct ciputctrl_str {
    155 	union _ciput_un {
    156 		uchar_t	pad[64];
    157 		struct _ciput_str {
    158 			kmutex_t	ciput_lck;
    159 			ushort_t	ciput_cnt;
    160 		} ciput_str;
    161 	} ciput_un;
    162 } ciputctrl_t;
    163 
    164 #define	ciputctrl_lock	ciput_un.ciput_str.ciput_lck
    165 #define	ciputctrl_count	ciput_un.ciput_str.ciput_cnt
    166 
    167 /*
    168  * Header for a stream: interface to rest of system.
    169  *
    170  * NOTE: While this is a consolidation-private structure, some unbundled and
    171  *       third-party products inappropriately make use of some of the fields.
    172  *       As such, please take care to not gratuitously change any offsets of
    173  *       existing members.
    174  */
    175 typedef struct stdata {
    176 	struct queue	*sd_wrq;	/* write queue */
    177 	struct msgb	*sd_iocblk;	/* return block for ioctl */
    178 	struct vnode	*sd_vnode;	/* pointer to associated vnode */
    179 	struct streamtab *sd_strtab;	/* pointer to streamtab for stream */
    180 	uint_t		sd_flag;	/* state/flags */
    181 	uint_t		sd_iocid;	/* ioctl id */
    182 	struct pid	*sd_sidp;	/* controlling session info */
    183 	struct pid	*sd_pgidp;	/* controlling process group info */
    184 	ushort_t	sd_tail;	/* reserved space in written mblks */
    185 	ushort_t	sd_wroff;	/* write offset */
    186 	int		sd_rerror;	/* error to return on read ops */
    187 	int		sd_werror;	/* error to return on write ops */
    188 	int		sd_pushcnt;	/* number of pushes done on stream */
    189 	int		sd_sigflags;	/* logical OR of all siglist events */
    190 	struct strsig	*sd_siglist;	/* pid linked list to rcv SIGPOLL sig */
    191 	struct pollhead sd_pollist;	/* list of all pollers to wake up */
    192 	struct msgb	*sd_mark;	/* "marked" message on read queue */
    193 	clock_t		sd_closetime;	/* time to wait to drain q in close */
    194 	kmutex_t	sd_lock;	/* protect head consistency */
    195 	kcondvar_t	sd_monitor;	/* open/close/push/pop monitor */
    196 	kcondvar_t	sd_iocmonitor;	/* ioctl single-threading */
    197 	kcondvar_t	sd_refmonitor;	/* sd_refcnt monitor */
    198 	ssize_t		sd_qn_minpsz;	/* These two fields are a performance */
    199 	ssize_t		sd_qn_maxpsz;	/* enhancements, cache the values in */
    200 					/* the stream head so we don't have */
    201 					/* to ask the module below the stream */
    202 					/* head to get this information. */
    203 	struct stdata	*sd_mate;	/* pointer to twisted stream mate */
    204 	kthread_id_t	sd_freezer;	/* thread that froze stream */
    205 	kmutex_t	sd_reflock;	/* Protects sd_refcnt */
    206 	int		sd_refcnt;	/* number of claimstr */
    207 	uint_t		sd_wakeq;	/* strwakeq()'s copy of sd_flag */
    208 	struct queue	*sd_struiordq;	/* sync barrier struio() read queue */
    209 	struct queue	*sd_struiowrq;	/* sync barrier struio() write queue */
    210 	char		sd_struiodnak;	/* defer NAK of M_IOCTL by rput() */
    211 	struct msgb	*sd_struionak;	/* pointer M_IOCTL mblk(s) to NAK */
    212 	caddr_t		sd_t_audit_data; /* For audit purposes only */
    213 	ssize_t		sd_maxblk;	/* maximum message block size */
    214 	uint_t		sd_rput_opt;	/* options/flags for strrput */
    215 	uint_t		sd_wput_opt;	/* options/flags for write/putmsg */
    216 	uint_t		sd_read_opt;	/* options/flags for strread */
    217 	msgfunc_t	sd_rprotofunc;	/* rput M_*PROTO routine */
    218 	msgfunc_t	sd_rputdatafunc; /* read M_DATA routine */
    219 	msgfunc_t	sd_rmiscfunc;	/* rput routine (non-data/proto) */
    220 	msgfunc_t	sd_wputdatafunc; /* wput M_DATA routine */
    221 	errfunc_t	sd_rderrfunc;	/* read side error callback */
    222 	errfunc_t	sd_wrerrfunc;	/* write side error callback */
    223 	/*
    224 	 * support for low contention concurrent putnext.
    225 	 */
    226 	ciputctrl_t	*sd_ciputctrl;
    227 	uint_t		sd_nciputctrl;
    228 
    229 	int		sd_anchor;	/* position of anchor in stream */
    230 	/*
    231 	 * Service scheduling at the stream head.
    232 	 */
    233 	kmutex_t	sd_qlock;
    234 	struct queue	*sd_qhead;	/* Head of queues to be serviced. */
    235 	struct queue	*sd_qtail;	/* Tail of queues to be serviced. */
    236 	void		*sd_servid;	/* Service ID for bckgrnd schedule */
    237 	ushort_t	sd_svcflags;	/* Servicing flags */
    238 	short		sd_nqueues;	/* Number of queues in the list */
    239 	kcondvar_t	sd_qcv;		/* Waiters for qhead to become empty */
    240 	kcondvar_t	sd_zcopy_wait;
    241 	uint_t		sd_copyflag;	/* copy-related flags */
    242 	zoneid_t	sd_anchorzone;	/* Allow removal from same zone only */
    243 	struct msgb	*sd_cmdblk;	/* reply from _I_CMD */
    244 } stdata_t;
    245 
    246 /*
    247  * stdata servicing flags.
    248  */
    249 #define	STRS_WILLSERVICE	0x01
    250 #define	STRS_SCHEDULED		0x02
    251 
    252 #define	STREAM_NEEDSERVICE(stp)	((stp)->sd_qhead != NULL)
    253 
    254 /*
    255  * stdata flag field defines
    256  */
    257 #define	IOCWAIT		0x00000001	/* Someone is doing an ioctl */
    258 #define	RSLEEP		0x00000002	/* Someone wants to read/recv msg */
    259 #define	WSLEEP		0x00000004	/* Someone wants to write */
    260 #define	STRPRI		0x00000008	/* An M_PCPROTO is at stream head */
    261 #define	STRHUP		0x00000010	/* Device has vanished */
    262 #define	STWOPEN		0x00000020	/* waiting for 1st open */
    263 #define	STPLEX		0x00000040	/* stream is being multiplexed */
    264 #define	STRISTTY	0x00000080	/* stream is a terminal */
    265 #define	STRGETINPROG	0x00000100	/* (k)strgetmsg is running */
    266 #define	IOCWAITNE	0x00000200	/* STR_NOERROR ioctl running */
    267 #define	STRDERR		0x00000400	/* fatal read error from M_ERROR */
    268 #define	STWRERR		0x00000800	/* fatal write error from M_ERROR */
    269 #define	STRDERRNONPERSIST 0x00001000	/* nonpersistent read errors */
    270 #define	STWRERRNONPERSIST 0x00002000	/* nonpersistent write errors */
    271 #define	STRCLOSE	0x00004000	/* wait for a close to complete */
    272 #define	SNDMREAD	0x00008000	/* used for read notification */
    273 #define	OLDNDELAY	0x00010000	/* use old TTY semantics for */
    274 					/* NDELAY reads and writes */
    275 	/*		0x00020000	   unused */
    276 	/*		0x00040000	   unused */
    277 #define	STRTOSTOP	0x00080000	/* block background writes */
    278 #define	STRCMDWAIT	0x00100000 	/* someone is doing an _I_CMD */
    279 	/*		0x00200000	   unused */
    280 #define	STRMOUNT	0x00400000	/* stream is mounted */
    281 #define	STRNOTATMARK	0x00800000	/* Not at mark (when empty read q) */
    282 #define	STRDELIM	0x01000000	/* generate delimited messages */
    283 #define	STRATMARK	0x02000000	/* At mark (due to MSGMARKNEXT) */
    284 #define	STZCNOTIFY	0x04000000	/* wait for zerocopy mblk to be acked */
    285 #define	STRPLUMB	0x08000000	/* push/pop pending */
    286 #define	STREOF		0x10000000	/* End-of-file indication */
    287 #define	STREOPENFAIL	0x20000000	/* indicates if re-open has failed */
    288 #define	STRMATE		0x40000000	/* this stream is a mate */
    289 #define	STRHASLINKS	0x80000000	/* I_LINKs under this stream */
    290 
    291 /*
    292  * Copy-related flags (sd_copyflag), set by SO_COPYOPT.
    293  */
    294 #define	STZCVMSAFE	0x00000001	/* safe to borrow file (segmapped) */
    295 					/* pages instead of bcopy */
    296 #define	STZCVMUNSAFE	0x00000002	/* unsafe to borrow file pages */
    297 #define	STRCOPYCACHED	0x00000004	/* copy should NOT bypass cache */
    298 
    299 /*
    300  * Options and flags for strrput (sd_rput_opt)
    301  */
    302 #define	SR_POLLIN	0x00000001	/* pollwakeup needed for band0 data */
    303 #define	SR_SIGALLDATA	0x00000002	/* Send SIGPOLL for all M_DATA */
    304 #define	SR_CONSOL_DATA	0x00000004	/* Consolidate M_DATA onto q_last */
    305 #define	SR_IGN_ZEROLEN	0x00000008	/* Ignore zero-length M_DATA */
    306 
    307 /*
    308  * Options and flags for strwrite/strputmsg (sd_wput_opt)
    309  */
    310 #define	SW_SIGPIPE	0x00000001	/* Send SIGPIPE for write error */
    311 #define	SW_RECHECK_ERR	0x00000002	/* Recheck errors in strwrite loop */
    312 #define	SW_SNDZERO	0x00000004	/* send 0-length msg down pipe/FIFO */
    313 
    314 /*
    315  * Options and flags for strread (sd_read_opt)
    316  */
    317 #define	RD_MSGDIS	0x00000001	/* read msg discard */
    318 #define	RD_MSGNODIS	0x00000002	/* read msg no discard */
    319 #define	RD_PROTDAT	0x00000004	/* read M_[PC]PROTO contents as data */
    320 #define	RD_PROTDIS	0x00000008	/* discard M_[PC]PROTO blocks and */
    321 					/* retain data blocks */
    322 /*
    323  * Flags parameter for strsetrputhooks() and strsetwputhooks().
    324  * These flags define the interface for setting the above internal
    325  * flags in sd_rput_opt and sd_wput_opt.
    326  */
    327 #define	SH_CONSOL_DATA	0x00000001	/* Consolidate M_DATA onto q_last */
    328 #define	SH_SIGALLDATA	0x00000002	/* Send SIGPOLL for all M_DATA */
    329 #define	SH_IGN_ZEROLEN	0x00000004	/* Drop zero-length M_DATA */
    330 
    331 #define	SH_SIGPIPE	0x00000100	/* Send SIGPIPE for write error */
    332 #define	SH_RECHECK_ERR	0x00000200	/* Recheck errors in strwrite loop */
    333 
    334 /*
    335  * Each queue points to a sync queue (the inner perimeter) which keeps
    336  * track of the number of threads that are inside a given queue (sq_count)
    337  * and also is used to implement the asynchronous putnext
    338  * (by queuing messages if the queue can not be entered.)
    339  *
    340  * Messages are queued on sq_head/sq_tail including deferred qwriter(INNER)
    341  * messages. The sq_head/sq_tail list is a singly-linked list with
    342  * b_queue recording the queue and b_prev recording the function to
    343  * be called (either the put procedure or a qwriter callback function.)
    344  *
    345  * The sq_count counter tracks the number of threads that are
    346  * executing inside the perimeter or (in the case of outer perimeters)
    347  * have some work queued for them relating to the perimeter. The sq_rmqcount
    348  * counter tracks the subset which are in removeq() (usually invoked from
    349  * qprocsoff(9F)).
    350  *
    351  * In addition a module writer can declare that the module has an outer
    352  * perimeter (by setting D_MTOUTPERIM) in which case all inner perimeter
    353  * syncq's for the module point (through sq_outer) to an outer perimeter
    354  * syncq. The outer perimeter consists of the doubly linked list (sq_onext and
    355  * sq_oprev) linking all the inner perimeter syncq's with out outer perimeter
    356  * syncq. This is used to implement qwriter(OUTER) (an asynchronous way of
    357  * getting exclusive access at the outer perimeter) and outer_enter/exit
    358  * which are used by the framework to acquire exclusive access to the outer
    359  * perimeter during open and close of modules that have set D_MTOUTPERIM.
    360  *
    361  * In the inner perimeter case sq_save is available for use by machine
    362  * dependent code. sq_head/sq_tail are used to queue deferred messages on
    363  * the inner perimeter syncqs and to queue become_writer requests on the
    364  * outer perimeter syncqs.
    365  *
    366  * Note: machine dependent optimized versions of putnext may depend
    367  * on the order of sq_flags and sq_count (so that they can e.g.
    368  * read these two fields in a single load instruction.)
    369  *
    370  * Per perimeter SQLOCK/sq_count in putnext/put may be replaced by per cpu
    371  * sq_putlocks/sq_putcounts each living in a separate cache line. Obviously
    372  * sq_putlock[x] protects sq_putcount[x]. putnext/put routine will grab only 1
    373  * of sq_putlocks and update only 1 of sq_putcounts. strlock() and many
    374  * other routines in strsubr.c and ddi.c will grab all sq_putlocks (as well as
    375  * SQLOCK) and figure out the count value as the sum of sq_count and all of
    376  * sq_putcounts. The idea is to make critical fast path -- putnext -- much
    377  * faster at the expense of much less often used slower path like
    378  * strlock(). One known case where entersq/strlock is executed pretty often is
    379  * SpecWeb but since IP is SQ_CIOC and socket TCP/IP stream is nextless
    380  * there's no need to grab multiple sq_putlocks and look at sq_putcounts. See
    381  * strsubr.c for more comments.
    382  *
    383  * Note regular SQLOCK and sq_count are still used in many routines
    384  * (e.g. entersq(), rwnext()) in the same way as before sq_putlocks were
    385  * introduced.
    386  *
    387  * To understand when all sq_putlocks need to be held and all sq_putcounts
    388  * need to be added up one needs to look closely at putnext code. Basically if
    389  * a routine like e.g. wait_syncq() needs to be sure that perimeter is empty
    390  * all sq_putlocks/sq_putcounts need to be held/added up. On the other hand
    391  * there's no need to hold all sq_putlocks and count all sq_putcounts in
    392  * routines like leavesq()/dropsq() and etc. since the are usually exit
    393  * counterparts of entersq/outer_enter() and etc. which have already either
    394  * prevented put entry poins from executing or did not care about put
    395  * entrypoints. entersq() doesn't need to care about sq_putlocks/sq_putcounts
    396  * if the entry point has a shared access since put has the highest degree of
    397  * concurrency and such entersq() does not intend to block out put
    398  * entrypoints.
    399  *
    400  * Before sq_putcounts were introduced the standard way to wait for perimeter
    401  * to become empty was:
    402  *
    403  *	mutex_enter(SQLOCK(sq));
    404  *	while (sq->sq_count > 0) {
    405  *		sq->sq_flags |= SQ_WANTWAKEUP;
    406  *		cv_wait(&sq->sq_wait, SQLOCK(sq));
    407  *	}
    408  *	mutex_exit(SQLOCK(sq));
    409  *
    410  * The new way is:
    411  *
    412  * 	mutex_enter(SQLOCK(sq));
    413  *	count = sq->sq_count;
    414  *	SQ_PUTLOCKS_ENTER(sq);
    415  *	SUM_SQ_PUTCOUNTS(sq, count);
    416  *	while (count != 0) {
    417  *		sq->sq_flags |= SQ_WANTWAKEUP;
    418  *		SQ_PUTLOCKS_EXIT(sq);
    419  *		cv_wait(&sq->sq_wait, SQLOCK(sq));
    420  *		count = sq->sq_count;
    421  *		SQ_PUTLOCKS_ENTER(sq);
    422  *		SUM_SQ_PUTCOUNTS(sq, count);
    423  *	}
    424  *	SQ_PUTLOCKS_EXIT(sq);
    425  *	mutex_exit(SQLOCK(sq));
    426  *
    427  * Note that SQ_WANTWAKEUP is set before dropping SQ_PUTLOCKS. This makes sure
    428  * putnext won't skip a wakeup.
    429  *
    430  * sq_putlocks are treated as the extension of SQLOCK for lock ordering
    431  * purposes and are always grabbed right after grabbing SQLOCK and released
    432  * right before releasing SQLOCK. This also allows dynamic creation of
    433  * sq_putlocks while holding SQLOCK (by making sq_ciputctrl non null even when
    434  * the stream is already in use). Only in putnext one of sq_putlocks
    435  * is grabbed instead of SQLOCK. putnext return path remembers what counter it
    436  * incremented and decrements the right counter on its way out.
    437  */
    438 
    439 struct syncq {
    440 	kmutex_t	sq_lock;	/* atomic access to syncq */
    441 	uint16_t	sq_count;	/* # threads inside */
    442 	uint16_t	sq_flags;	/* state and some type info */
    443 	/*
    444 	 * Distributed syncq scheduling
    445 	 *  The list of queue's is handled by sq_head and
    446 	 *  sq_tail fields.
    447 	 *
    448 	 *  The list of events is handled by the sq_evhead and sq_evtail
    449 	 *  fields.
    450 	 */
    451 	queue_t		*sq_head;	/* queue of deferred messages */
    452 	queue_t		*sq_tail;	/* queue of deferred messages */
    453 	mblk_t		*sq_evhead;	/* Event message on the syncq */
    454 	mblk_t		*sq_evtail;
    455 	uint_t		sq_nqueues;	/* # of queues on this sq */
    456 	/*
    457 	 * Concurrency and condition variables
    458 	 */
    459 	uint16_t	sq_type;	/* type (concurrency) of syncq */
    460 	uint16_t	sq_rmqcount;	/* # threads inside removeq() */
    461 	kcondvar_t 	sq_wait;	/* block on this sync queue */
    462 	kcondvar_t 	sq_exitwait;	/* waiting for thread to leave the */
    463 					/* inner perimeter */
    464 	/*
    465 	 * Handling synchronous callbacks such as qtimeout and qbufcall
    466 	 */
    467 	ushort_t	sq_callbflags;	/* flags for callback synchronization */
    468 	callbparams_id_t sq_cancelid;	/* id of callback being cancelled */
    469 	struct callbparams *sq_callbpend;	/* Pending callbacks */
    470 
    471 	/*
    472 	 * Links forming an outer perimeter from one outer syncq and
    473 	 * a set of inner sync queues.
    474 	 */
    475 	struct syncq	*sq_outer;	/* Pointer to outer perimeter */
    476 	struct syncq	*sq_onext;	/* Linked list of syncq's making */
    477 	struct syncq	*sq_oprev;	/* up the outer perimeter. */
    478 	/*
    479 	 * support for low contention concurrent putnext.
    480 	 */
    481 	ciputctrl_t	*sq_ciputctrl;
    482 	uint_t		sq_nciputctrl;
    483 	/*
    484 	 * Counter for the number of threads wanting to become exclusive.
    485 	 */
    486 	uint_t		sq_needexcl;
    487 	/*
    488 	 * These two fields are used for scheduling a syncq for
    489 	 * background processing. The sq_svcflag is protected by
    490 	 * SQLOCK lock.
    491 	 */
    492 	struct syncq	*sq_next;	/* for syncq scheduling */
    493 	void *		sq_servid;
    494 	uint_t		sq_servcount;	/* # pending background threads */
    495 	uint_t		sq_svcflags;	/* Scheduling flags	*/
    496 	clock_t		sq_tstamp;	/* Time when was enabled */
    497 	/*
    498 	 * Maximum priority of the queues on this syncq.
    499 	 */
    500 	pri_t		sq_pri;
    501 };
    502 typedef struct syncq syncq_t;
    503 
    504 /*
    505  * sync queue scheduling flags (for sq_svcflags).
    506  */
    507 #define	SQ_SERVICE	0x1		/* being serviced */
    508 #define	SQ_BGTHREAD	0x2		/* awaiting service by bg thread */
    509 #define	SQ_DISABLED	0x4		/* don't put syncq in service list */
    510 
    511 /*
    512  * FASTPUT bit in sd_count/putcount.
    513  */
    514 #define	SQ_FASTPUT	0x8000
    515 #define	SQ_FASTMASK	0x7FFF
    516 
    517 /*
    518  * sync queue state flags
    519  */
    520 #define	SQ_EXCL		0x0001		/* exclusive access to inner */
    521 					/*	perimeter */
    522 #define	SQ_BLOCKED	0x0002		/* qprocsoff */
    523 #define	SQ_FROZEN	0x0004		/* freezestr */
    524 #define	SQ_WRITER	0x0008		/* qwriter(OUTER) pending or running */
    525 #define	SQ_MESSAGES	0x0010		/* messages on syncq */
    526 #define	SQ_WANTWAKEUP	0x0020		/* do cv_broadcast on sq_wait */
    527 #define	SQ_WANTEXWAKEUP	0x0040		/* do cv_broadcast on sq_exitwait */
    528 #define	SQ_EVENTS	0x0080		/* Events pending */
    529 #define	SQ_QUEUED	(SQ_MESSAGES | SQ_EVENTS)
    530 #define	SQ_FLAGMASK	0x00FF
    531 
    532 /*
    533  * Test a queue to see if inner perimeter is exclusive.
    534  */
    535 #define	PERIM_EXCL(q)	((q)->q_syncq->sq_flags & SQ_EXCL)
    536 
    537 /*
    538  * If any of these flags are set it is not possible for a thread to
    539  * enter a put or service procedure. Instead it must either block
    540  * or put the message on the syncq.
    541  */
    542 #define	SQ_GOAWAY	(SQ_EXCL|SQ_BLOCKED|SQ_FROZEN|SQ_WRITER|\
    543 			SQ_QUEUED)
    544 /*
    545  * If any of these flags are set it not possible to drain the syncq
    546  */
    547 #define	SQ_STAYAWAY	(SQ_BLOCKED|SQ_FROZEN|SQ_WRITER)
    548 
    549 /*
    550  * Flags to trigger syncq tail processing.
    551  */
    552 #define	SQ_TAIL		(SQ_QUEUED|SQ_WANTWAKEUP|SQ_WANTEXWAKEUP)
    553 
    554 /*
    555  * Syncq types (stored in sq_type)
    556  * The SQ_TYPES_IN_FLAGS (ciput) are also stored in sq_flags
    557  * for performance reasons. Thus these type values have to be in the low
    558  * 16 bits and not conflict with the sq_flags values above.
    559  *
    560  * Notes:
    561  *  - putnext() and put() assume that the put procedures have the highest
    562  *    degree of concurrency. Thus if any of the SQ_CI* are set then SQ_CIPUT
    563  *    has to be set. This restriction can be lifted by adding code to putnext
    564  *    and put that check that sq_count == 0 like entersq does.
    565  *  - putnext() and put() does currently not handle !SQ_COPUT
    566  *  - In order to implement !SQ_COCB outer_enter has to be fixed so that
    567  *    the callback can be cancelled while cv_waiting in outer_enter.
    568  *  - If SQ_CISVC needs to be implemented, qprocsoff() needs to wait
    569  *    for the currently running services to stop (wait for QINSERVICE
    570  *    to go off). disable_svc called from qprcosoff disables only
    571  *    services that will be run in future.
    572  *
    573  * All the SQ_CO flags are set when there is no outer perimeter.
    574  */
    575 #define	SQ_CIPUT	0x0100		/* Concurrent inner put proc */
    576 #define	SQ_CISVC	0x0200		/* Concurrent inner svc proc */
    577 #define	SQ_CIOC		0x0400		/* Concurrent inner open/close */
    578 #define	SQ_CICB		0x0800		/* Concurrent inner callback */
    579 #define	SQ_COPUT	0x1000		/* Concurrent outer put proc */
    580 #define	SQ_COSVC	0x2000		/* Concurrent outer svc proc */
    581 #define	SQ_COOC		0x4000		/* Concurrent outer open/close */
    582 #define	SQ_COCB		0x8000		/* Concurrent outer callback */
    583 
    584 /* Types also kept in sq_flags for performance */
    585 #define	SQ_TYPES_IN_FLAGS	(SQ_CIPUT)
    586 
    587 #define	SQ_CI		(SQ_CIPUT|SQ_CISVC|SQ_CIOC|SQ_CICB)
    588 #define	SQ_CO		(SQ_COPUT|SQ_COSVC|SQ_COOC|SQ_COCB)
    589 #define	SQ_TYPEMASK	(SQ_CI|SQ_CO)
    590 
    591 /*
    592  * Flag combinations passed to entersq and leavesq to specify the type
    593  * of entry point.
    594  */
    595 #define	SQ_PUT		(SQ_CIPUT|SQ_COPUT)
    596 #define	SQ_SVC		(SQ_CISVC|SQ_COSVC)
    597 #define	SQ_OPENCLOSE	(SQ_CIOC|SQ_COOC)
    598 #define	SQ_CALLBACK	(SQ_CICB|SQ_COCB)
    599 
    600 /*
    601  * Other syncq types which are not copied into flags.
    602  */
    603 #define	SQ_PERMOD	0x01		/* Syncq is PERMOD */
    604 
    605 /*
    606  * Asynchronous callback qun*** flag.
    607  * The mechanism these flags are used in is one where callbacks enter
    608  * the perimeter thanks to framework support. To use this mechanism
    609  * the q* and qun* flavors of the callback routines must be used.
    610  * e.g. qtimeout and quntimeout. The synchronization provided by the flags
    611  * avoids deadlocks between blocking qun* routines and the perimeter
    612  * lock.
    613  */
    614 #define	SQ_CALLB_BYPASSED	0x01		/* bypassed callback fn */
    615 
    616 /*
    617  * Cancel callback mask.
    618  * The mask expands as the number of cancelable callback types grows
    619  * Note - separate callback flag because different callbacks have
    620  * overlapping id space.
    621  */
    622 #define	SQ_CALLB_CANCEL_MASK	(SQ_CANCEL_TOUT|SQ_CANCEL_BUFCALL)
    623 
    624 #define	SQ_CANCEL_TOUT		0x02		/* cancel timeout request */
    625 #define	SQ_CANCEL_BUFCALL	0x04		/* cancel bufcall request */
    626 
    627 typedef struct callbparams {
    628 	syncq_t		*cbp_sq;
    629 	void		(*cbp_func)(void *);
    630 	void		*cbp_arg;
    631 	callbparams_id_t cbp_id;
    632 	uint_t		cbp_flags;
    633 	struct callbparams *cbp_next;
    634 	size_t		cbp_size;
    635 } callbparams_t;
    636 
    637 typedef struct strbufcall {
    638 	void		(*bc_func)(void *);
    639 	void		*bc_arg;
    640 	size_t		bc_size;
    641 	bufcall_id_t	bc_id;
    642 	struct strbufcall *bc_next;
    643 	kthread_id_t	bc_executor;
    644 } strbufcall_t;
    645 
    646 /*
    647  * Structure of list of processes to be sent SIGPOLL/SIGURG signal
    648  * on request.  The valid S_* events are defined in stropts.h.
    649  */
    650 typedef struct strsig {
    651 	struct pid	*ss_pidp;	/* pid/pgrp pointer */
    652 	pid_t		ss_pid;		/* positive pid, negative pgrp */
    653 	int		ss_events;	/* S_* events */
    654 	struct strsig	*ss_next;
    655 } strsig_t;
    656 
    657 /*
    658  * bufcall list
    659  */
    660 struct bclist {
    661 	strbufcall_t	*bc_head;
    662 	strbufcall_t	*bc_tail;
    663 };
    664 
    665 /*
    666  * Structure used to track mux links and unlinks.
    667  */
    668 struct mux_node {
    669 	major_t		 mn_imaj;	/* internal major device number */
    670 	uint16_t	 mn_indegree;	/* number of incoming edges */
    671 	struct mux_node *mn_originp;	/* where we came from during search */
    672 	struct mux_edge *mn_startp;	/* where search left off in mn_outp */
    673 	struct mux_edge *mn_outp;	/* list of outgoing edges */
    674 	uint_t		 mn_flags;	/* see below */
    675 };
    676 
    677 /*
    678  * Flags for mux_nodes.
    679  */
    680 #define	VISITED	1
    681 
    682 /*
    683  * Edge structure - a list of these is hung off the
    684  * mux_node to represent the outgoing edges.
    685  */
    686 struct mux_edge {
    687 	struct mux_node	*me_nodep;	/* edge leads to this node */
    688 	struct mux_edge	*me_nextp;	/* next edge */
    689 	int		 me_muxid;	/* id of link */
    690 	dev_t		 me_dev;	/* dev_t - used for kernel PUNLINK */
    691 };
    692 
    693 /*
    694  * Queue info
    695  *
    696  * The syncq is included here to reduce memory fragmentation
    697  * for kernel memory allocators that only allocate in sizes that are
    698  * powers of two. If the kernel memory allocator changes this should
    699  * be revisited.
    700  */
    701 typedef struct queinfo {
    702 	struct queue	qu_rqueue;	/* read queue - must be first */
    703 	struct queue	qu_wqueue;	/* write queue - must be second */
    704 	struct syncq	qu_syncq;	/* syncq - must be third */
    705 } queinfo_t;
    706 
    707 /*
    708  * Multiplexed streams info
    709  */
    710 typedef struct linkinfo {
    711 	struct linkblk	li_lblk;	/* must be first */
    712 	struct file	*li_fpdown;	/* file pointer for lower stream */
    713 	struct linkinfo	*li_next;	/* next in list */
    714 	struct linkinfo *li_prev;	/* previous in list */
    715 } linkinfo_t;
    716 
    717 /*
    718  * List of syncq's used by freeezestr/unfreezestr
    719  */
    720 typedef struct syncql {
    721 	struct syncql	*sql_next;
    722 	syncq_t		*sql_sq;
    723 } syncql_t;
    724 
    725 typedef struct sqlist {
    726 	syncql_t	*sqlist_head;
    727 	size_t		sqlist_size;		/* structure size in bytes */
    728 	size_t		sqlist_index;		/* next free entry in array */
    729 	syncql_t	sqlist_array[4];	/* 4 or more entries */
    730 } sqlist_t;
    731 
    732 typedef struct perdm {
    733 	struct perdm		*dm_next;
    734 	syncq_t			*dm_sq;
    735 	struct streamtab	*dm_str;
    736 	uint_t			dm_ref;
    737 } perdm_t;
    738 
    739 #define	NEED_DM(dmp, qflag) \
    740 	(dmp == NULL && (qflag & (QPERMOD | QMTOUTPERIM)))
    741 
    742 /*
    743  * fmodsw_impl_t is used within the kernel. fmodsw is used by
    744  * the modules/drivers. The information is copied from fmodsw
    745  * defined in the module/driver into the fmodsw_impl_t structure
    746  * during the module/driver initialization.
    747  */
    748 typedef struct fmodsw_impl	fmodsw_impl_t;
    749 
    750 struct fmodsw_impl {
    751 	fmodsw_impl_t		*f_next;
    752 	char			f_name[FMNAMESZ + 1];
    753 	struct streamtab	*f_str;
    754 	uint32_t		f_qflag;
    755 	uint32_t		f_sqtype;
    756 	perdm_t			*f_dmp;
    757 	uint32_t		f_ref;
    758 	uint32_t		f_hits;
    759 };
    760 
    761 typedef enum {
    762 	FMODSW_HOLD =	0x00000001,
    763 	FMODSW_LOAD =	0x00000002
    764 } fmodsw_flags_t;
    765 
    766 typedef struct cdevsw_impl {
    767 	struct streamtab	*d_str;
    768 	uint32_t		d_qflag;
    769 	uint32_t		d_sqtype;
    770 	perdm_t			*d_dmp;
    771 } cdevsw_impl_t;
    772 
    773 /*
    774  * Enumeration of the types of access that can be requested for a
    775  * controlling terminal under job control.
    776  */
    777 enum jcaccess {
    778 	JCREAD,			/* read data on a ctty */
    779 	JCWRITE,		/* write data to a ctty */
    780 	JCSETP,			/* set ctty parameters */
    781 	JCGETP			/* get ctty parameters */
    782 };
    783 
    784 struct str_stack {
    785 	netstack_t	*ss_netstack;	/* Common netstack */
    786 
    787 	kmutex_t	ss_sad_lock;	/* autopush lock */
    788 	mod_hash_t	*ss_sad_hash;
    789 	size_t		ss_sad_hash_nchains;
    790 	struct saddev	*ss_saddev;	/* sad device array */
    791 	int		ss_sadcnt;	/* number of sad devices */
    792 
    793 	int		ss_devcnt;	/* number of mux_nodes */
    794 	struct mux_node	*ss_mux_nodes;	/* mux info for cycle checking */
    795 };
    796 typedef struct str_stack str_stack_t;
    797 
    798 /*
    799  * Finding related queues
    800  */
    801 #define	STREAM(q)	((q)->q_stream)
    802 #define	SQ(rq)		((syncq_t *)((rq) + 2))
    803 
    804 /*
    805  * Get the module/driver name for a queue.  Since some queues don't have
    806  * q_info structures (e.g., see log_makeq()), fall back to "?".
    807  */
    808 #define	Q2NAME(q) \
    809 	(((q)->q_qinfo != NULL && (q)->q_qinfo->qi_minfo->mi_idname != NULL) ? \
    810 	(q)->q_qinfo->qi_minfo->mi_idname : "?")
    811 
    812 /*
    813  * Locking macros
    814  */
    815 #define	QLOCK(q)	(&(q)->q_lock)
    816 #define	SQLOCK(sq)	(&(sq)->sq_lock)
    817 
    818 #define	STREAM_PUTLOCKS_ENTER(stp) {					       \
    819 		ASSERT(MUTEX_HELD(&(stp)->sd_lock));			       \
    820 		if ((stp)->sd_ciputctrl != NULL) {			       \
    821 			int i;						       \
    822 			int nlocks = (stp)->sd_nciputctrl;		       \
    823 			ciputctrl_t *cip = (stp)->sd_ciputctrl;		       \
    824 			for (i = 0; i <= nlocks; i++) {			       \
    825 				mutex_enter(&cip[i].ciputctrl_lock);	       \
    826 			}						       \
    827 		}							       \
    828 	}
    829 
    830 #define	STREAM_PUTLOCKS_EXIT(stp) {					       \
    831 		ASSERT(MUTEX_HELD(&(stp)->sd_lock));			       \
    832 		if ((stp)->sd_ciputctrl != NULL) {			       \
    833 			int i;						       \
    834 			int nlocks = (stp)->sd_nciputctrl;		       \
    835 			ciputctrl_t *cip = (stp)->sd_ciputctrl;		       \
    836 			for (i = 0; i <= nlocks; i++) {			       \
    837 				mutex_exit(&cip[i].ciputctrl_lock);	       \
    838 			}						       \
    839 		}							       \
    840 	}
    841 
    842 #define	SQ_PUTLOCKS_ENTER(sq) {						       \
    843 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				       \
    844 		if ((sq)->sq_ciputctrl != NULL) {			       \
    845 			int i;						       \
    846 			int nlocks = (sq)->sq_nciputctrl;		       \
    847 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
    848 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
    849 			for (i = 0; i <= nlocks; i++) {			       \
    850 				mutex_enter(&cip[i].ciputctrl_lock);	       \
    851 			}						       \
    852 		}							       \
    853 	}
    854 
    855 #define	SQ_PUTLOCKS_EXIT(sq) {						       \
    856 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				       \
    857 		if ((sq)->sq_ciputctrl != NULL) {			       \
    858 			int i;						       \
    859 			int nlocks = (sq)->sq_nciputctrl;		       \
    860 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
    861 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
    862 			for (i = 0; i <= nlocks; i++) {			       \
    863 				mutex_exit(&cip[i].ciputctrl_lock);	       \
    864 			}						       \
    865 		}							       \
    866 	}
    867 
    868 #define	SQ_PUTCOUNT_SETFAST(sq) {					\
    869 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				\
    870 		if ((sq)->sq_ciputctrl != NULL) {			\
    871 			int i;						\
    872 			int nlocks = (sq)->sq_nciputctrl;		\
    873 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		\
    874 			ASSERT((sq)->sq_type & SQ_CIPUT);		\
    875 			for (i = 0; i <= nlocks; i++) {			\
    876 				mutex_enter(&cip[i].ciputctrl_lock);	\
    877 				cip[i].ciputctrl_count |= SQ_FASTPUT;	\
    878 				mutex_exit(&cip[i].ciputctrl_lock);	\
    879 			}						\
    880 		}							\
    881 	}
    882 
    883 #define	SQ_PUTCOUNT_CLRFAST(sq) {					\
    884 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				\
    885 		if ((sq)->sq_ciputctrl != NULL) {			\
    886 			int i;						\
    887 			int nlocks = (sq)->sq_nciputctrl;		\
    888 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		\
    889 			ASSERT((sq)->sq_type & SQ_CIPUT);		\
    890 			for (i = 0; i <= nlocks; i++) {			\
    891 				mutex_enter(&cip[i].ciputctrl_lock);	\
    892 				cip[i].ciputctrl_count &= ~SQ_FASTPUT;	\
    893 				mutex_exit(&cip[i].ciputctrl_lock);	\
    894 			}						\
    895 		}							\
    896 	}
    897 
    898 
    899 #ifdef	DEBUG
    900 
    901 #define	SQ_PUTLOCKS_HELD(sq) {						       \
    902 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				       \
    903 		if ((sq)->sq_ciputctrl != NULL) {			       \
    904 			int i;						       \
    905 			int nlocks = (sq)->sq_nciputctrl;		       \
    906 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
    907 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
    908 			for (i = 0; i <= nlocks; i++) {			       \
    909 				ASSERT(MUTEX_HELD(&cip[i].ciputctrl_lock));    \
    910 			}						       \
    911 		}							       \
    912 	}
    913 
    914 #define	SUMCHECK_SQ_PUTCOUNTS(sq, countcheck) {				       \
    915 		if ((sq)->sq_ciputctrl != NULL) {			       \
    916 			int i;						       \
    917 			uint_t count = 0;				       \
    918 			int ncounts = (sq)->sq_nciputctrl;		       \
    919 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
    920 			for (i = 0; i <= ncounts; i++) {		       \
    921 				count +=				       \
    922 				    (((sq)->sq_ciputctrl[i].ciputctrl_count) & \
    923 				    SQ_FASTMASK);			       \
    924 			}						       \
    925 			ASSERT(count == (countcheck));			       \
    926 		}							       \
    927 	}
    928 
    929 #define	SUMCHECK_CIPUTCTRL_COUNTS(ciput, nciput, countcheck) {		       \
    930 		int i;							       \
    931 		uint_t count = 0;					       \
    932 		ASSERT((ciput) != NULL);				       \
    933 		for (i = 0; i <= (nciput); i++) {			       \
    934 			count += (((ciput)[i].ciputctrl_count) &	       \
    935 			    SQ_FASTMASK);				       \
    936 		}							       \
    937 		ASSERT(count == (countcheck));				       \
    938 	}
    939 
    940 #else	/* DEBUG */
    941 
    942 #define	SQ_PUTLOCKS_HELD(sq)
    943 #define	SUMCHECK_SQ_PUTCOUNTS(sq, countcheck)
    944 #define	SUMCHECK_CIPUTCTRL_COUNTS(sq, nciput, countcheck)
    945 
    946 #endif	/* DEBUG */
    947 
    948 #define	SUM_SQ_PUTCOUNTS(sq, count) {					       \
    949 		if ((sq)->sq_ciputctrl != NULL) {			       \
    950 			int i;						       \
    951 			int ncounts = (sq)->sq_nciputctrl;		       \
    952 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
    953 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
    954 			for (i = 0; i <= ncounts; i++) {		       \
    955 				(count) += ((cip[i].ciputctrl_count) &	       \
    956 				    SQ_FASTMASK);			       \
    957 			}						       \
    958 		}							       \
    959 	}
    960 
    961 #define	CLAIM_QNEXT_LOCK(stp)	mutex_enter(&(stp)->sd_lock)
    962 #define	RELEASE_QNEXT_LOCK(stp)	mutex_exit(&(stp)->sd_lock)
    963 
    964 /*
    965  * syncq message manipulation macros.
    966  */
    967 /*
    968  * Put a message on the queue syncq.
    969  * Assumes QLOCK held.
    970  */
    971 #define	SQPUT_MP(qp, mp)						\
    972 	{								\
    973 		qp->q_syncqmsgs++;					\
    974 		if (qp->q_sqhead == NULL) {				\
    975 			qp->q_sqhead = qp->q_sqtail = mp;		\
    976 		} else {						\
    977 			qp->q_sqtail->b_next = mp;			\
    978 			qp->q_sqtail = mp;				\
    979 		}							\
    980 		set_qfull(qp);						\
    981 	}
    982 
    983 /*
    984  * Miscellaneous parameters and flags.
    985  */
    986 
    987 /*
    988  * Default timeout in milliseconds for ioctls and close
    989  */
    990 #define	STRTIMOUT 15000
    991 
    992 /*
    993  * Flag values for stream io
    994  */
    995 #define	WRITEWAIT	0x1	/* waiting for write event */
    996 #define	READWAIT	0x2	/* waiting for read event */
    997 #define	NOINTR		0x4	/* error is not to be set for signal */
    998 #define	GETWAIT		0x8	/* waiting for getmsg event */
    999 
   1000 /*
   1001  * These flags need to be unique for stream io name space
   1002  * and copy modes name space.  These flags allow strwaitq
   1003  * and strdoioctl to proceed as if signals or errors on the stream
   1004  * head have not occurred; i.e. they will be detected by some other
   1005  * means.
   1006  * STR_NOSIG does not allow signals to interrupt the call
   1007  * STR_NOERROR does not allow stream head read, write or hup errors to
   1008  * affect the call.  When used with strdoioctl(), if a previous ioctl
   1009  * is pending and times out, STR_NOERROR will cause strdoioctl() to not
   1010  * return ETIME. If, however, the requested ioctl times out, ETIME
   1011  * will be returned (use ic_timout instead)
   1012  * STR_PEEK is used to inform strwaitq that the reader is peeking at data
   1013  * and that a non-persistent error should not be cleared.
   1014  * STR_DELAYERR is used to inform strwaitq that it should not check errors
   1015  * after being awoken since, in addition to an error, there might also be
   1016  * data queued on the stream head read queue.
   1017  */
   1018 #define	STR_NOSIG	0x10	/* Ignore signals during strdoioctl/strwaitq */
   1019 #define	STR_NOERROR	0x20	/* Ignore errors during strdoioctl/strwaitq */
   1020 #define	STR_PEEK	0x40	/* Peeking behavior on non-persistent errors */
   1021 #define	STR_DELAYERR	0x80	/* Do not check errors on return */
   1022 
   1023 /*
   1024  * Copy modes for tty and I_STR ioctls
   1025  */
   1026 #define	U_TO_K 	01			/* User to Kernel */
   1027 #define	K_TO_K  02			/* Kernel to Kernel */
   1028 
   1029 /*
   1030  * Mux defines.
   1031  */
   1032 #define	LINKNORMAL	0x01		/* normal mux link */
   1033 #define	LINKPERSIST	0x02		/* persistent mux link */
   1034 #define	LINKTYPEMASK	0x03		/* bitmask of all link types */
   1035 #define	LINKCLOSE	0x04		/* unlink from strclose */
   1036 
   1037 /*
   1038  * Definitions of Streams macros and function interfaces.
   1039  */
   1040 
   1041 /*
   1042  * Obsolete queue scheduling macros. They are not used anymore, but still kept
   1043  * here for 3-d party modules and drivers who might still use them.
   1044  */
   1045 #define	setqsched()
   1046 #define	qready()	1
   1047 
   1048 #ifdef _KERNEL
   1049 #define	runqueues()
   1050 #define	queuerun()
   1051 #endif
   1052 
   1053 /* compatibility module for style 2 drivers with DR race condition */
   1054 #define	DRMODNAME	"drcompat"
   1055 
   1056 /*
   1057  * Macros dealing with mux_nodes.
   1058  */
   1059 #define	MUX_VISIT(X)	((X)->mn_flags |= VISITED)
   1060 #define	MUX_CLEAR(X)	((X)->mn_flags &= (~VISITED)); \
   1061 			((X)->mn_originp = NULL)
   1062 #define	MUX_DIDVISIT(X)	((X)->mn_flags & VISITED)
   1063 
   1064 
   1065 /*
   1066  * Twisted stream macros
   1067  */
   1068 #define	STRMATED(X)	((X)->sd_flag & STRMATE)
   1069 #define	STRLOCKMATES(X)	if (&((X)->sd_lock) > &(((X)->sd_mate)->sd_lock)) { \
   1070 				mutex_enter(&((X)->sd_lock)); \
   1071 				mutex_enter(&(((X)->sd_mate)->sd_lock));  \
   1072 			} else {  \
   1073 				mutex_enter(&(((X)->sd_mate)->sd_lock)); \
   1074 				mutex_enter(&((X)->sd_lock)); \
   1075 			}
   1076 #define	STRUNLOCKMATES(X)	mutex_exit(&((X)->sd_lock)); \
   1077 			mutex_exit(&(((X)->sd_mate)->sd_lock))
   1078 
   1079 #ifdef _KERNEL
   1080 
   1081 extern void strinit(void);
   1082 extern int strdoioctl(struct stdata *, struct strioctl *, int, int,
   1083     cred_t *, int *);
   1084 extern void strsendsig(struct strsig *, int, uchar_t, int);
   1085 extern void str_sendsig(vnode_t *, int, uchar_t, int);
   1086 extern void strhup(struct stdata *);
   1087 extern int qattach(queue_t *, dev_t *, int, cred_t *, fmodsw_impl_t *,
   1088     boolean_t);
   1089 extern int qreopen(queue_t *, dev_t *, int, cred_t *);
   1090 extern void qdetach(queue_t *, int, int, cred_t *, boolean_t);
   1091 extern void enterq(queue_t *);
   1092 extern void leaveq(queue_t *);
   1093 extern int putiocd(mblk_t *, caddr_t, int, cred_t *);
   1094 extern int getiocd(mblk_t *, caddr_t, int);
   1095 extern struct linkinfo *alloclink(queue_t *, queue_t *, struct file *);
   1096 extern void lbfree(struct linkinfo *);
   1097 extern int linkcycle(stdata_t *, stdata_t *, str_stack_t *);
   1098 extern struct linkinfo *findlinks(stdata_t *, int, int, str_stack_t *);
   1099 extern queue_t *getendq(queue_t *);
   1100 extern int mlink(vnode_t *, int, int, cred_t *, int *, int);
   1101 extern int mlink_file(vnode_t *, int, struct file *, cred_t *, int *, int);
   1102 extern int munlink(struct stdata *, struct linkinfo *, int, cred_t *, int *,
   1103     str_stack_t *);
   1104 extern int munlinkall(struct stdata *, int, cred_t *, int *, str_stack_t *);
   1105 extern void mux_addedge(stdata_t *, stdata_t *, int, str_stack_t *);
   1106 extern void mux_rmvedge(stdata_t *, int, str_stack_t *);
   1107 extern int devflg_to_qflag(struct streamtab *, uint32_t, uint32_t *,
   1108     uint32_t *);
   1109 extern void setq(queue_t *, struct qinit *, struct qinit *, perdm_t *,
   1110     uint32_t, uint32_t, boolean_t);
   1111 extern perdm_t *hold_dm(struct streamtab *, uint32_t, uint32_t);
   1112 extern void rele_dm(perdm_t *);
   1113 extern int strmakectl(struct strbuf *, int32_t, int32_t, mblk_t **);
   1114 extern int strmakedata(ssize_t *, struct uio *, stdata_t *, int32_t, mblk_t **);
   1115 extern int strmakemsg(struct strbuf *, ssize_t *, struct uio *,
   1116     struct stdata *, int32_t, mblk_t **);
   1117 extern int strgetmsg(vnode_t *, struct strbuf *, struct strbuf *, uchar_t *,
   1118     int *, int, rval_t *);
   1119 extern int strputmsg(vnode_t *, struct strbuf *, struct strbuf *, uchar_t,
   1120     int flag, int fmode);
   1121 extern int strstartplumb(struct stdata *, int, int);
   1122 extern void strendplumb(struct stdata *);
   1123 extern int stropen(struct vnode *, dev_t *, int, cred_t *);
   1124 extern int strclose(struct vnode *, int, cred_t *);
   1125 extern int strpoll(register struct stdata *, short, int, short *,
   1126     struct pollhead **);
   1127 extern void strclean(struct vnode *);
   1128 extern void str_cn_clean();	/* XXX hook for consoles signal cleanup */
   1129 extern int strwrite(struct vnode *, struct uio *, cred_t *);
   1130 extern int strwrite_common(struct vnode *, struct uio *, cred_t *, int);
   1131 extern int strread(struct vnode *, struct uio *, cred_t *);
   1132 extern int strioctl(struct vnode *, int, intptr_t, int, int, cred_t *, int *);
   1133 extern int strrput(queue_t *, mblk_t *);
   1134 extern int strrput_nondata(queue_t *, mblk_t *);
   1135 extern mblk_t *strrput_proto(vnode_t *, mblk_t *,
   1136     strwakeup_t *, strsigset_t *, strsigset_t *, strpollset_t *);
   1137 extern mblk_t *strrput_misc(vnode_t *, mblk_t *,
   1138     strwakeup_t *, strsigset_t *, strsigset_t *, strpollset_t *);
   1139 extern int getiocseqno(void);
   1140 extern int strwaitbuf(size_t, int);
   1141 extern int strwaitq(stdata_t *, int, ssize_t, int, clock_t, int *);
   1142 extern struct stdata *shalloc(queue_t *);
   1143 extern void shfree(struct stdata *s);
   1144 extern queue_t *allocq(void);
   1145 extern void freeq(queue_t *);
   1146 extern qband_t *allocband(void);
   1147 extern void freeband(qband_t *);
   1148 extern void freebs_enqueue(mblk_t *, dblk_t *);
   1149 extern void setqback(queue_t *, unsigned char);
   1150 extern int strcopyin(void *, void *, size_t, int);
   1151 extern int strcopyout(void *, void *, size_t, int);
   1152 extern void strsignal(struct stdata *, int, int32_t);
   1153 extern clock_t str_cv_wait(kcondvar_t *, kmutex_t *, clock_t, int);
   1154 extern void disable_svc(queue_t *);
   1155 extern void enable_svc(queue_t *);
   1156 extern void remove_runlist(queue_t *);
   1157 extern void wait_svc(queue_t *);
   1158 extern void backenable(queue_t *, uchar_t);
   1159 extern void set_qend(queue_t *);
   1160 extern int strgeterr(stdata_t *, int32_t, int);
   1161 extern void qenable_locked(queue_t *);
   1162 extern mblk_t *getq_noenab(queue_t *, ssize_t);
   1163 extern void rmvq_noenab(queue_t *, mblk_t *);
   1164 extern void qbackenable(queue_t *, uchar_t);
   1165 extern void set_qfull(queue_t *);
   1166 
   1167 extern void strblock(queue_t *);
   1168 extern void strunblock(queue_t *);
   1169 extern int qclaimed(queue_t *);
   1170 extern int straccess(struct stdata *, enum jcaccess);
   1171 
   1172 extern void entersq(syncq_t *, int);
   1173 extern void leavesq(syncq_t *, int);
   1174 extern void claimq(queue_t *);
   1175 extern void releaseq(queue_t *);
   1176 extern void claimstr(queue_t *);
   1177 extern void releasestr(queue_t *);
   1178 extern void removeq(queue_t *);
   1179 extern void insertq(struct stdata *, queue_t *);
   1180 extern void drain_syncq(syncq_t *);
   1181 extern void qfill_syncq(syncq_t *, queue_t *, mblk_t *);
   1182 extern void qdrain_syncq(syncq_t *, queue_t *);
   1183 extern int flush_syncq(syncq_t *, queue_t *);
   1184 extern void wait_sq_svc(syncq_t *);
   1185 
   1186 extern void outer_enter(syncq_t *, uint16_t);
   1187 extern void outer_exit(syncq_t *);
   1188 extern void qwriter_inner(queue_t *, mblk_t *, void (*)());
   1189 extern void qwriter_outer(queue_t *, mblk_t *, void (*)());
   1190 
   1191 extern callbparams_t *callbparams_alloc(syncq_t *, void (*)(void *),
   1192     void *, int);
   1193 extern void callbparams_free(syncq_t *, callbparams_t *);
   1194 extern void callbparams_free_id(syncq_t *, callbparams_id_t, int32_t);
   1195 extern void qcallbwrapper(void *);
   1196 
   1197 extern mblk_t *esballoc_wait(unsigned char *, size_t, uint_t, frtn_t *);
   1198 extern mblk_t *esballoca(unsigned char *, size_t, uint_t, frtn_t *);
   1199 extern mblk_t *desballoca(unsigned char *, size_t, uint_t, frtn_t *);
   1200 extern int do_sendfp(struct stdata *, struct file *, struct cred *);
   1201 extern int frozenstr(queue_t *);
   1202 extern size_t xmsgsize(mblk_t *);
   1203 
   1204 extern void putnext_tail(syncq_t *, queue_t *, uint32_t);
   1205 extern void stream_willservice(stdata_t *);
   1206 extern void stream_runservice(stdata_t *);
   1207 
   1208 extern void strmate(vnode_t *, vnode_t *);
   1209 extern queue_t *strvp2wq(vnode_t *);
   1210 extern vnode_t *strq2vp(queue_t *);
   1211 extern mblk_t *allocb_wait(size_t, uint_t, uint_t, int *);
   1212 extern mblk_t *allocb_cred(size_t, cred_t *, pid_t);
   1213 extern mblk_t *allocb_cred_wait(size_t, uint_t, int *, cred_t *, pid_t);
   1214 extern mblk_t *allocb_tmpl(size_t, const mblk_t *);
   1215 extern mblk_t *allocb_tryhard(size_t);
   1216 extern void mblk_copycred(mblk_t *, const mblk_t *);
   1217 extern void mblk_setcred(mblk_t *, cred_t *, pid_t);
   1218 extern cred_t *msg_getcred(const mblk_t *, pid_t *);
   1219 extern struct ts_label_s *msg_getlabel(const mblk_t *);
   1220 extern cred_t *msg_extractcred(mblk_t *, pid_t *);
   1221 extern void strpollwakeup(vnode_t *, short);
   1222 extern int putnextctl_wait(queue_t *, int);
   1223 
   1224 extern int kstrputmsg(struct vnode *, mblk_t *, struct uio *, ssize_t,
   1225     unsigned char, int, int);
   1226 extern int kstrgetmsg(struct vnode *, mblk_t **, struct uio *,
   1227     unsigned char *, int *, clock_t, rval_t *);
   1228 
   1229 extern void strsetrerror(vnode_t *, int, int, errfunc_t);
   1230 extern void strsetwerror(vnode_t *, int, int, errfunc_t);
   1231 extern void strseteof(vnode_t *, int);
   1232 extern void strflushrq(vnode_t *, int);
   1233 extern void strsetrputhooks(vnode_t *, uint_t, msgfunc_t, msgfunc_t);
   1234 extern void strsetwputhooks(vnode_t *, uint_t, clock_t);
   1235 extern void strsetrwputdatahooks(vnode_t *, msgfunc_t, msgfunc_t);
   1236 extern int strwaitmark(vnode_t *);
   1237 extern void strsignal_nolock(stdata_t *, int, uchar_t);
   1238 
   1239 struct multidata_s;
   1240 struct pdesc_s;
   1241 extern int hcksum_assoc(mblk_t *, struct multidata_s *, struct pdesc_s  *,
   1242     uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, int);
   1243 extern void hcksum_retrieve(mblk_t *, struct multidata_s *, struct pdesc_s *,
   1244     uint32_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *);
   1245 extern void lso_info_set(mblk_t *, uint32_t, uint32_t);
   1246 extern void lso_info_get(mblk_t *, uint32_t *, uint32_t *);
   1247 extern unsigned int bcksum(uchar_t *, int, unsigned int);
   1248 extern boolean_t is_vmloaned_mblk(mblk_t *, struct multidata_s *,
   1249     struct pdesc_s *);
   1250 
   1251 extern int fmodsw_register(const char *, struct streamtab *, int);
   1252 extern int fmodsw_unregister(const char *);
   1253 extern fmodsw_impl_t *fmodsw_find(const char *, fmodsw_flags_t);
   1254 extern void fmodsw_rele(fmodsw_impl_t *);
   1255 
   1256 extern void freemsgchain(mblk_t *);
   1257 extern mblk_t *copymsgchain(mblk_t *);
   1258 
   1259 extern mblk_t *mcopyinuio(struct stdata *, uio_t *, ssize_t, ssize_t, int *);
   1260 
   1261 /*
   1262  * shared or externally configured data structures
   1263  */
   1264 extern ssize_t strmsgsz;		/* maximum stream message size */
   1265 extern ssize_t strctlsz;		/* maximum size of ctl message */
   1266 extern int nstrpush;			/* maximum number of pushes allowed */
   1267 
   1268 /*
   1269  * Bufcalls related variables.
   1270  */
   1271 extern struct bclist strbcalls;		/* List of bufcalls */
   1272 extern kmutex_t	strbcall_lock;		/* Protects the list of bufcalls */
   1273 extern kcondvar_t strbcall_cv;		/* Signaling when a bufcall is added */
   1274 extern kcondvar_t bcall_cv;	/* wait of executing bufcall completes */
   1275 
   1276 extern frtn_t frnop;
   1277 
   1278 extern struct kmem_cache *ciputctrl_cache;
   1279 extern int n_ciputctrl;
   1280 extern int max_n_ciputctrl;
   1281 extern int min_n_ciputctrl;
   1282 
   1283 extern cdevsw_impl_t *devimpl;
   1284 
   1285 /*
   1286  * esballoc queue for throttling
   1287  */
   1288 typedef struct esb_queue {
   1289 	kmutex_t	eq_lock;
   1290 	uint_t		eq_len;		/* number of queued messages */
   1291 	mblk_t		*eq_head;	/* head of queue */
   1292 	mblk_t		*eq_tail;	/* tail of queue */
   1293 	uint_t		eq_flags;	/* esballoc queue flags */
   1294 } esb_queue_t;
   1295 
   1296 /*
   1297  * esballoc flags for queue processing.
   1298  */
   1299 #define	ESBQ_PROCESSING	0x01	/* queue is being processed */
   1300 #define	ESBQ_TIMER	0x02	/* timer is active */
   1301 
   1302 extern void esballoc_queue_init(void);
   1303 
   1304 #endif	/* _KERNEL */
   1305 
   1306 /*
   1307  * Note: Use of these macros are restricted to kernel/unix and
   1308  * intended for the STREAMS framework.
   1309  * All modules/drivers should include sys/ddi.h.
   1310  *
   1311  * Finding related queues
   1312  */
   1313 #define		_OTHERQ(q)	((q)->q_flag&QREADR? (q)+1: (q)-1)
   1314 #define		_WR(q)		((q)->q_flag&QREADR? (q)+1: (q))
   1315 #define		_RD(q)		((q)->q_flag&QREADR? (q): (q)-1)
   1316 #define		_SAMESTR(q)	(!((q)->q_flag & QEND))
   1317 
   1318 /*
   1319  * These are also declared here for modules/drivers that erroneously
   1320  * include strsubr.h after ddi.h or fail to include ddi.h at all.
   1321  */
   1322 extern struct queue *OTHERQ(queue_t *); /* stream.h */
   1323 extern struct queue *RD(queue_t *);
   1324 extern struct queue *WR(queue_t *);
   1325 extern int SAMESTR(queue_t *);
   1326 
   1327 /*
   1328  * The following hardware checksum related macros are private
   1329  * interfaces that are subject to change without notice.
   1330  */
   1331 #ifdef _KERNEL
   1332 #define	DB_CKSUMSTART(mp)	((mp)->b_datap->db_cksumstart)
   1333 #define	DB_CKSUMEND(mp)		((mp)->b_datap->db_cksumend)
   1334 #define	DB_CKSUMSTUFF(mp)	((mp)->b_datap->db_cksumstuff)
   1335 #define	DB_CKSUMFLAGS(mp)	((mp)->b_datap->db_struioun.cksum.flags)
   1336 #define	DB_CKSUM16(mp)		((mp)->b_datap->db_cksum16)
   1337 #define	DB_CKSUM32(mp)		((mp)->b_datap->db_cksum32)
   1338 #define	DB_LSOFLAGS(mp)		((mp)->b_datap->db_struioun.cksum.flags)
   1339 #define	DB_LSOMSS(mp)		((mp)->b_datap->db_struioun.cksum.pad)
   1340 #endif	/* _KERNEL */
   1341 
   1342 #ifdef	__cplusplus
   1343 }
   1344 #endif
   1345 
   1346 
   1347 #endif	/* _SYS_STRSUBR_H */
   1348