Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     22 /*	  All Rights Reserved  	*/
     23 
     24 
     25 /*
     26  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     27  * Use is subject to license terms.
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/param.h>
     33 #include <sys/errno.h>
     34 #include <sys/signal.h>
     35 #include <sys/stat.h>
     36 #include <sys/proc.h>
     37 #include <sys/cred.h>
     38 #include <sys/user.h>
     39 #include <sys/vnode.h>
     40 #include <sys/file.h>
     41 #include <sys/stream.h>
     42 #include <sys/strsubr.h>
     43 #include <sys/stropts.h>
     44 #include <sys/tihdr.h>
     45 #include <sys/var.h>
     46 #include <sys/poll.h>
     47 #include <sys/termio.h>
     48 #include <sys/ttold.h>
     49 #include <sys/systm.h>
     50 #include <sys/uio.h>
     51 #include <sys/cmn_err.h>
     52 #include <sys/sad.h>
     53 #include <sys/netstack.h>
     54 #include <sys/priocntl.h>
     55 #include <sys/jioctl.h>
     56 #include <sys/procset.h>
     57 #include <sys/session.h>
     58 #include <sys/kmem.h>
     59 #include <sys/filio.h>
     60 #include <sys/vtrace.h>
     61 #include <sys/debug.h>
     62 #include <sys/strredir.h>
     63 #include <sys/fs/fifonode.h>
     64 #include <sys/fs/snode.h>
     65 #include <sys/strlog.h>
     66 #include <sys/strsun.h>
     67 #include <sys/project.h>
     68 #include <sys/kbio.h>
     69 #include <sys/msio.h>
     70 #include <sys/tty.h>
     71 #include <sys/ptyvar.h>
     72 #include <sys/vuid_event.h>
     73 #include <sys/modctl.h>
     74 #include <sys/sunddi.h>
     75 #include <sys/sunldi_impl.h>
     76 #include <sys/autoconf.h>
     77 #include <sys/policy.h>
     78 #include <sys/dld.h>
     79 #include <sys/zone.h>
     80 
     81 /*
     82  * This define helps improve the readability of streams code while
     83  * still maintaining a very old streams performance enhancement.  The
     84  * performance enhancement basically involved having all callers
     85  * of straccess() perform the first check that straccess() will do
     86  * locally before actually calling straccess().  (There by reducing
     87  * the number of unnecessary calls to straccess().)
     88  */
     89 #define	i_straccess(x, y)	((stp->sd_sidp == NULL) ? 0 : \
     90 				    (stp->sd_vnode->v_type == VFIFO) ? 0 : \
     91 				    straccess((x), (y)))
     92 
     93 /*
     94  * what is mblk_pull_len?
     95  *
     96  * If a streams message consists of many short messages,
     97  * a performance degradation occurs from copyout overhead.
     98  * To decrease the per mblk overhead, messages that are
     99  * likely to consist of many small mblks are pulled up into
    100  * one continuous chunk of memory.
    101  *
    102  * To avoid the processing overhead of examining every
    103  * mblk, a quick heuristic is used. If the first mblk in
    104  * the message is shorter than mblk_pull_len, it is likely
    105  * that the rest of the mblk will be short.
    106  *
    107  * This heuristic was decided upon after performance tests
    108  * indicated that anything more complex slowed down the main
    109  * code path.
    110  */
    111 #define	MBLK_PULL_LEN 64
    112 uint32_t mblk_pull_len = MBLK_PULL_LEN;
    113 
    114 /*
    115  * The sgttyb_handling flag controls the handling of the old BSD
    116  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
    117  *
    118  * 0 - Emit no warnings at all and retain old, broken behavior.
    119  * 1 - Emit no warnings and silently handle new semantics.
    120  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
    121  *     (once per system invocation).  Handle with new semantics.
    122  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
    123  *     made (so that offenders drop core and are easy to debug).
    124  *
    125  * The "new semantics" are that TIOCGETP returns B38400 for
    126  * sg_[io]speed if the corresponding value is over B38400, and that
    127  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
    128  * bit rate."
    129  */
    130 int sgttyb_handling = 1;
    131 static boolean_t sgttyb_complaint;
    132 
    133 /* don't push drcompat module by default on Style-2 streams */
    134 static int push_drcompat = 0;
    135 
    136 /*
    137  * id value used to distinguish between different ioctl messages
    138  */
    139 static uint32_t ioc_id;
    140 
    141 static void putback(struct stdata *, queue_t *, mblk_t *, int);
    142 static void strcleanall(struct vnode *);
    143 static int strwsrv(queue_t *);
    144 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
    145 
    146 /*
    147  * qinit and module_info structures for stream head read and write queues
    148  */
    149 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
    150 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
    151 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
    152 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
    153 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
    154     FIFOLOWAT };
    155 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
    156 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
    157 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
    158 
    159 extern kmutex_t	strresources;	/* protects global resources */
    160 extern kmutex_t muxifier;	/* single-threads multiplexor creation */
    161 
    162 static boolean_t msghasdata(mblk_t *bp);
    163 #define	msgnodata(bp) (!msghasdata(bp))
    164 
    165 /*
    166  * Stream head locking notes:
    167  *	There are four monitors associated with the stream head:
    168  *	1. v_stream monitor: in stropen() and strclose() v_lock
    169  *		is held while the association of vnode and stream
    170  *		head is established or tested for.
    171  *	2. open/close/push/pop monitor: sd_lock is held while each
    172  *		thread bids for exclusive access to this monitor
    173  *		for opening or closing a stream.  In addition, this
    174  *		monitor is entered during pushes and pops.  This
    175  *		guarantees that during plumbing operations there
    176  *		is only one thread trying to change the plumbing.
    177  *		Any other threads present in the stream are only
    178  *		using the plumbing.
    179  *	3. read/write monitor: in the case of read, a thread holds
    180  *		sd_lock while trying to get data from the stream
    181  *		head queue.  if there is none to fulfill a read
    182  *		request, it sets RSLEEP and calls cv_wait_sig() down
    183  *		in strwaitq() to await the arrival of new data.
    184  *		when new data arrives in strrput(), sd_lock is acquired
    185  *		before testing for RSLEEP and calling cv_broadcast().
    186  *		the behavior of strwrite(), strwsrv(), and WSLEEP
    187  *		mirror this.
    188  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
    189  *		thread is doing an ioctl at a time.
    190  */
    191 
    192 static int
    193 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
    194     int anchor, cred_t *crp, uint_t anchor_zoneid)
    195 {
    196 	int error;
    197 	fmodsw_impl_t *fp;
    198 
    199 	if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
    200 		error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
    201 		return (error);
    202 	}
    203 	if (stp->sd_pushcnt >= nstrpush) {
    204 		return (EINVAL);
    205 	}
    206 
    207 	if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
    208 		stp->sd_flag |= STREOPENFAIL;
    209 		return (EINVAL);
    210 	}
    211 
    212 	/*
    213 	 * push new module and call its open routine via qattach
    214 	 */
    215 	if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
    216 		return (error);
    217 
    218 	/*
    219 	 * Check to see if caller wants a STREAMS anchor
    220 	 * put at this place in the stream, and add if so.
    221 	 */
    222 	mutex_enter(&stp->sd_lock);
    223 	if (anchor == stp->sd_pushcnt) {
    224 		stp->sd_anchor = stp->sd_pushcnt;
    225 		stp->sd_anchorzone = anchor_zoneid;
    226 	}
    227 	mutex_exit(&stp->sd_lock);
    228 
    229 	return (0);
    230 }
    231 
    232 /*
    233  * Open a stream device.
    234  */
    235 int
    236 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
    237 {
    238 	struct stdata *stp;
    239 	queue_t *qp;
    240 	int s;
    241 	dev_t dummydev, savedev;
    242 	struct autopush *ap;
    243 	struct dlautopush dlap;
    244 	int error = 0;
    245 	ssize_t	rmin, rmax;
    246 	int cloneopen;
    247 	queue_t *brq;
    248 	major_t major;
    249 	str_stack_t *ss;
    250 	zoneid_t zoneid;
    251 	uint_t anchor;
    252 
    253 	if (audit_active)
    254 		audit_stropen(vp, devp, flag, crp);
    255 
    256 	/*
    257 	 * If the stream already exists, wait for any open in progress
    258 	 * to complete, then call the open function of each module and
    259 	 * driver in the stream.  Otherwise create the stream.
    260 	 */
    261 	TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
    262 retry:
    263 	mutex_enter(&vp->v_lock);
    264 	if ((stp = vp->v_stream) != NULL) {
    265 
    266 		/*
    267 		 * Waiting for stream to be created to device
    268 		 * due to another open.
    269 		 */
    270 		mutex_exit(&vp->v_lock);
    271 
    272 		if (STRMATED(stp)) {
    273 			struct stdata *strmatep = stp->sd_mate;
    274 
    275 			STRLOCKMATES(stp);
    276 			if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
    277 				if (flag & (FNDELAY|FNONBLOCK)) {
    278 					error = EAGAIN;
    279 					mutex_exit(&strmatep->sd_lock);
    280 					goto ckreturn;
    281 				}
    282 				mutex_exit(&stp->sd_lock);
    283 				if (!cv_wait_sig(&strmatep->sd_monitor,
    284 				    &strmatep->sd_lock)) {
    285 					error = EINTR;
    286 					mutex_exit(&strmatep->sd_lock);
    287 					mutex_enter(&stp->sd_lock);
    288 					goto ckreturn;
    289 				}
    290 				mutex_exit(&strmatep->sd_lock);
    291 				goto retry;
    292 			}
    293 			if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
    294 				if (flag & (FNDELAY|FNONBLOCK)) {
    295 					error = EAGAIN;
    296 					mutex_exit(&strmatep->sd_lock);
    297 					goto ckreturn;
    298 				}
    299 				mutex_exit(&strmatep->sd_lock);
    300 				if (!cv_wait_sig(&stp->sd_monitor,
    301 				    &stp->sd_lock)) {
    302 					error = EINTR;
    303 					goto ckreturn;
    304 				}
    305 				mutex_exit(&stp->sd_lock);
    306 				goto retry;
    307 			}
    308 
    309 			if (stp->sd_flag & (STRDERR|STWRERR)) {
    310 				error = EIO;
    311 				mutex_exit(&strmatep->sd_lock);
    312 				goto ckreturn;
    313 			}
    314 
    315 			stp->sd_flag |= STWOPEN;
    316 			STRUNLOCKMATES(stp);
    317 		} else {
    318 			mutex_enter(&stp->sd_lock);
    319 			if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
    320 				if (flag & (FNDELAY|FNONBLOCK)) {
    321 					error = EAGAIN;
    322 					goto ckreturn;
    323 				}
    324 				if (!cv_wait_sig(&stp->sd_monitor,
    325 				    &stp->sd_lock)) {
    326 					error = EINTR;
    327 					goto ckreturn;
    328 				}
    329 				mutex_exit(&stp->sd_lock);
    330 				goto retry;  /* could be clone! */
    331 			}
    332 
    333 			if (stp->sd_flag & (STRDERR|STWRERR)) {
    334 				error = EIO;
    335 				goto ckreturn;
    336 			}
    337 
    338 			stp->sd_flag |= STWOPEN;
    339 			mutex_exit(&stp->sd_lock);
    340 		}
    341 
    342 		/*
    343 		 * Open all modules and devices down stream to notify
    344 		 * that another user is streaming.  For modules, set the
    345 		 * last argument to MODOPEN and do not pass any open flags.
    346 		 * Ignore dummydev since this is not the first open.
    347 		 */
    348 		claimstr(stp->sd_wrq);
    349 		qp = stp->sd_wrq;
    350 		while (_SAMESTR(qp)) {
    351 			qp = qp->q_next;
    352 			if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
    353 				break;
    354 		}
    355 		releasestr(stp->sd_wrq);
    356 		mutex_enter(&stp->sd_lock);
    357 		stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
    358 		stp->sd_rerror = 0;
    359 		stp->sd_werror = 0;
    360 ckreturn:
    361 		cv_broadcast(&stp->sd_monitor);
    362 		mutex_exit(&stp->sd_lock);
    363 		return (error);
    364 	}
    365 
    366 	/*
    367 	 * This vnode isn't streaming.  SPECFS already
    368 	 * checked for multiple vnodes pointing to the
    369 	 * same stream, so create a stream to the driver.
    370 	 */
    371 	qp = allocq();
    372 	stp = shalloc(qp);
    373 
    374 	/*
    375 	 * Initialize stream head.  shalloc() has given us
    376 	 * exclusive access, and we have the vnode locked;
    377 	 * we can do whatever we want with stp.
    378 	 */
    379 	stp->sd_flag = STWOPEN;
    380 	stp->sd_siglist = NULL;
    381 	stp->sd_pollist.ph_list = NULL;
    382 	stp->sd_sigflags = 0;
    383 	stp->sd_mark = NULL;
    384 	stp->sd_closetime = STRTIMOUT;
    385 	stp->sd_sidp = NULL;
    386 	stp->sd_pgidp = NULL;
    387 	stp->sd_vnode = vp;
    388 	stp->sd_rerror = 0;
    389 	stp->sd_werror = 0;
    390 	stp->sd_wroff = 0;
    391 	stp->sd_tail = 0;
    392 	stp->sd_iocblk = NULL;
    393 	stp->sd_cmdblk = NULL;
    394 	stp->sd_pushcnt = 0;
    395 	stp->sd_qn_minpsz = 0;
    396 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
    397 	stp->sd_maxblk = INFPSZ;
    398 	qp->q_ptr = _WR(qp)->q_ptr = stp;
    399 	STREAM(qp) = STREAM(_WR(qp)) = stp;
    400 	vp->v_stream = stp;
    401 	mutex_exit(&vp->v_lock);
    402 	if (vp->v_type == VFIFO) {
    403 		stp->sd_flag |= OLDNDELAY;
    404 		/*
    405 		 * This means, both for pipes and fifos
    406 		 * strwrite will send SIGPIPE if the other
    407 		 * end is closed. For putmsg it depends
    408 		 * on whether it is a XPG4_2 application
    409 		 * or not
    410 		 */
    411 		stp->sd_wput_opt = SW_SIGPIPE;
    412 
    413 		/* setq might sleep in kmem_alloc - avoid holding locks. */
    414 		setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
    415 		    SQ_CI|SQ_CO, B_FALSE);
    416 
    417 		set_qend(qp);
    418 		stp->sd_strtab = fifo_getinfo();
    419 		_WR(qp)->q_nfsrv = _WR(qp);
    420 		qp->q_nfsrv = qp;
    421 		/*
    422 		 * Wake up others that are waiting for stream to be created.
    423 		 */
    424 		mutex_enter(&stp->sd_lock);
    425 		/*
    426 		 * nothing is be pushed on stream yet, so
    427 		 * optimized stream head packetsizes are just that
    428 		 * of the read queue
    429 		 */
    430 		stp->sd_qn_minpsz = qp->q_minpsz;
    431 		stp->sd_qn_maxpsz = qp->q_maxpsz;
    432 		stp->sd_flag &= ~STWOPEN;
    433 		goto fifo_opendone;
    434 	}
    435 	/* setq might sleep in kmem_alloc - avoid holding locks. */
    436 	setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
    437 
    438 	set_qend(qp);
    439 
    440 	/*
    441 	 * Open driver and create stream to it (via qattach).
    442 	 */
    443 	savedev = *devp;
    444 	cloneopen = (getmajor(*devp) == clone_major);
    445 	if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
    446 		mutex_enter(&vp->v_lock);
    447 		vp->v_stream = NULL;
    448 		mutex_exit(&vp->v_lock);
    449 		mutex_enter(&stp->sd_lock);
    450 		cv_broadcast(&stp->sd_monitor);
    451 		mutex_exit(&stp->sd_lock);
    452 		freeq(_RD(qp));
    453 		shfree(stp);
    454 		return (error);
    455 	}
    456 	/*
    457 	 * Set sd_strtab after open in order to handle clonable drivers
    458 	 */
    459 	stp->sd_strtab = STREAMSTAB(getmajor(*devp));
    460 
    461 	/*
    462 	 * Historical note: dummydev used to be be prior to the initial
    463 	 * open (via qattach above), which made the value seen
    464 	 * inconsistent between an I_PUSH and an autopush of a module.
    465 	 */
    466 	dummydev = *devp;
    467 
    468 	/*
    469 	 * For clone open of old style (Q not associated) network driver,
    470 	 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
    471 	 */
    472 	brq = _RD(_WR(qp)->q_next);
    473 	major = getmajor(*devp);
    474 	if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
    475 	    ((brq->q_flag & _QASSOCIATED) == 0)) {
    476 		if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
    477 			cmn_err(CE_WARN, "cannot push " DRMODNAME
    478 			    " streams module");
    479 	}
    480 
    481 	if (!NETWORK_DRV(major)) {
    482 		savedev = *devp;
    483 	} else {
    484 		/*
    485 		 * For network devices, process differently based on the
    486 		 * return value from dld_autopush():
    487 		 *
    488 		 *   0: the passed-in device points to a GLDv3 datalink with
    489 		 *   per-link autopush configuration; use that configuration
    490 		 *   and ignore any per-driver autopush configuration.
    491 		 *
    492 		 *   1: the passed-in device points to a physical GLDv3
    493 		 *   datalink without per-link autopush configuration.  The
    494 		 *   passed in device was changed to refer to the actual
    495 		 *   physical device (if it's not already); we use that new
    496 		 *   device to look up any per-driver autopush configuration.
    497 		 *
    498 		 *   -1: neither of the above cases applied; use the initial
    499 		 *   device to look up any per-driver autopush configuration.
    500 		 */
    501 		switch (dld_autopush(&savedev, &dlap)) {
    502 		case 0:
    503 			zoneid = crgetzoneid(crp);
    504 			for (s = 0; s < dlap.dap_npush; s++) {
    505 				error = push_mod(qp, &dummydev, stp,
    506 				    dlap.dap_aplist[s], dlap.dap_anchor, crp,
    507 				    zoneid);
    508 				if (error != 0)
    509 					break;
    510 			}
    511 			goto opendone;
    512 		case 1:
    513 			break;
    514 		case -1:
    515 			savedev = *devp;
    516 			break;
    517 		}
    518 	}
    519 	/*
    520 	 * Find the autopush configuration based on "savedev". Start with the
    521 	 * global zone. If not found check in the local zone.
    522 	 */
    523 	zoneid = GLOBAL_ZONEID;
    524 retryap:
    525 	ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
    526 	    netstack_str;
    527 	if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
    528 		netstack_rele(ss->ss_netstack);
    529 		if (zoneid == GLOBAL_ZONEID) {
    530 			/*
    531 			 * None found. Also look in the zone's autopush table.
    532 			 */
    533 			zoneid = crgetzoneid(crp);
    534 			if (zoneid != GLOBAL_ZONEID)
    535 				goto retryap;
    536 		}
    537 		goto opendone;
    538 	}
    539 	anchor = ap->ap_anchor;
    540 	zoneid = crgetzoneid(crp);
    541 	for (s = 0; s < ap->ap_npush; s++) {
    542 		error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
    543 		    anchor, crp, zoneid);
    544 		if (error != 0)
    545 			break;
    546 	}
    547 	sad_ap_rele(ap, ss);
    548 	netstack_rele(ss->ss_netstack);
    549 
    550 opendone:
    551 
    552 	/*
    553 	 * let specfs know that open failed part way through
    554 	 */
    555 	if (error) {
    556 		mutex_enter(&stp->sd_lock);
    557 		stp->sd_flag |= STREOPENFAIL;
    558 		mutex_exit(&stp->sd_lock);
    559 	}
    560 
    561 	/*
    562 	 * Wake up others that are waiting for stream to be created.
    563 	 */
    564 	mutex_enter(&stp->sd_lock);
    565 	stp->sd_flag &= ~STWOPEN;
    566 
    567 	/*
    568 	 * As a performance concern we are caching the values of
    569 	 * q_minpsz and q_maxpsz of the module below the stream
    570 	 * head in the stream head.
    571 	 */
    572 	mutex_enter(QLOCK(stp->sd_wrq->q_next));
    573 	rmin = stp->sd_wrq->q_next->q_minpsz;
    574 	rmax = stp->sd_wrq->q_next->q_maxpsz;
    575 	mutex_exit(QLOCK(stp->sd_wrq->q_next));
    576 
    577 	/* do this processing here as a performance concern */
    578 	if (strmsgsz != 0) {
    579 		if (rmax == INFPSZ)
    580 			rmax = strmsgsz;
    581 		else
    582 			rmax = MIN(strmsgsz, rmax);
    583 	}
    584 
    585 	mutex_enter(QLOCK(stp->sd_wrq));
    586 	stp->sd_qn_minpsz = rmin;
    587 	stp->sd_qn_maxpsz = rmax;
    588 	mutex_exit(QLOCK(stp->sd_wrq));
    589 
    590 fifo_opendone:
    591 	cv_broadcast(&stp->sd_monitor);
    592 	mutex_exit(&stp->sd_lock);
    593 	return (error);
    594 }
    595 
    596 static int strsink(queue_t *, mblk_t *);
    597 static struct qinit deadrend = {
    598 	strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
    599 };
    600 static struct qinit deadwend = {
    601 	NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
    602 };
    603 
    604 /*
    605  * Close a stream.
    606  * This is called from closef() on the last close of an open stream.
    607  * Strclean() will already have removed the siglist and pollist
    608  * information, so all that remains is to remove all multiplexor links
    609  * for the stream, pop all the modules (and the driver), and free the
    610  * stream structure.
    611  */
    612 
    613 int
    614 strclose(struct vnode *vp, int flag, cred_t *crp)
    615 {
    616 	struct stdata *stp;
    617 	queue_t *qp;
    618 	int rval;
    619 	int freestp = 1;
    620 	queue_t *rmq;
    621 
    622 	if (audit_active)
    623 		audit_strclose(vp, flag, crp);
    624 
    625 	TRACE_1(TR_FAC_STREAMS_FR,
    626 	    TR_STRCLOSE, "strclose:%p", vp);
    627 	ASSERT(vp->v_stream);
    628 
    629 	stp = vp->v_stream;
    630 	ASSERT(!(stp->sd_flag & STPLEX));
    631 	qp = stp->sd_wrq;
    632 
    633 	/*
    634 	 * Needed so that strpoll will return non-zero for this fd.
    635 	 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
    636 	 */
    637 	mutex_enter(&stp->sd_lock);
    638 	stp->sd_flag |= STRHUP;
    639 	mutex_exit(&stp->sd_lock);
    640 
    641 	/*
    642 	 * If the registered process or process group did not have an
    643 	 * open instance of this stream then strclean would not be
    644 	 * called. Thus at the time of closing all remaining siglist entries
    645 	 * are removed.
    646 	 */
    647 	if (stp->sd_siglist != NULL)
    648 		strcleanall(vp);
    649 
    650 	ASSERT(stp->sd_siglist == NULL);
    651 	ASSERT(stp->sd_sigflags == 0);
    652 
    653 	if (STRMATED(stp)) {
    654 		struct stdata *strmatep = stp->sd_mate;
    655 		int waited = 1;
    656 
    657 		STRLOCKMATES(stp);
    658 		while (waited) {
    659 			waited = 0;
    660 			while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
    661 				mutex_exit(&strmatep->sd_lock);
    662 				cv_wait(&stp->sd_monitor, &stp->sd_lock);
    663 				mutex_exit(&stp->sd_lock);
    664 				STRLOCKMATES(stp);
    665 				waited = 1;
    666 			}
    667 			while (strmatep->sd_flag &
    668 			    (STWOPEN|STRCLOSE|STRPLUMB)) {
    669 				mutex_exit(&stp->sd_lock);
    670 				cv_wait(&strmatep->sd_monitor,
    671 				    &strmatep->sd_lock);
    672 				mutex_exit(&strmatep->sd_lock);
    673 				STRLOCKMATES(stp);
    674 				waited = 1;
    675 			}
    676 		}
    677 		stp->sd_flag |= STRCLOSE;
    678 		STRUNLOCKMATES(stp);
    679 	} else {
    680 		mutex_enter(&stp->sd_lock);
    681 		stp->sd_flag |= STRCLOSE;
    682 		mutex_exit(&stp->sd_lock);
    683 	}
    684 
    685 	ASSERT(qp->q_first == NULL);	/* No more delayed write */
    686 
    687 	/* Check if an I_LINK was ever done on this stream */
    688 	if (stp->sd_flag & STRHASLINKS) {
    689 		netstack_t *ns;
    690 		str_stack_t *ss;
    691 
    692 		ns = netstack_find_by_cred(crp);
    693 		ASSERT(ns != NULL);
    694 		ss = ns->netstack_str;
    695 		ASSERT(ss != NULL);
    696 
    697 		(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
    698 		netstack_rele(ss->ss_netstack);
    699 	}
    700 
    701 	while (_SAMESTR(qp)) {
    702 		/*
    703 		 * Holding sd_lock prevents q_next from changing in
    704 		 * this stream.
    705 		 */
    706 		mutex_enter(&stp->sd_lock);
    707 		if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
    708 
    709 			/*
    710 			 * sleep until awakened by strwsrv() or timeout
    711 			 */
    712 			for (;;) {
    713 				mutex_enter(QLOCK(qp->q_next));
    714 				if (!(qp->q_next->q_mblkcnt)) {
    715 					mutex_exit(QLOCK(qp->q_next));
    716 					break;
    717 				}
    718 				stp->sd_flag |= WSLEEP;
    719 
    720 				/* ensure strwsrv gets enabled */
    721 				qp->q_next->q_flag |= QWANTW;
    722 				mutex_exit(QLOCK(qp->q_next));
    723 				/* get out if we timed out or recv'd a signal */
    724 				if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
    725 				    stp->sd_closetime, 0) <= 0) {
    726 					break;
    727 				}
    728 			}
    729 			stp->sd_flag &= ~WSLEEP;
    730 		}
    731 		mutex_exit(&stp->sd_lock);
    732 
    733 		rmq = qp->q_next;
    734 		if (rmq->q_flag & QISDRV) {
    735 			ASSERT(!_SAMESTR(rmq));
    736 			wait_sq_svc(_RD(qp)->q_syncq);
    737 		}
    738 
    739 		qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
    740 	}
    741 
    742 	/*
    743 	 * Since we call pollwakeup in close() now, the poll list should
    744 	 * be empty in most cases. The only exception is the layered devices
    745 	 * (e.g. the console drivers with redirection modules pushed on top
    746 	 * of it).  We have to do this after calling qdetach() because
    747 	 * the redirection module won't have torn down the console
    748 	 * redirection until after qdetach() has been invoked.
    749 	 */
    750 	if (stp->sd_pollist.ph_list != NULL) {
    751 		pollwakeup(&stp->sd_pollist, POLLERR);
    752 		pollhead_clean(&stp->sd_pollist);
    753 	}
    754 	ASSERT(stp->sd_pollist.ph_list == NULL);
    755 	ASSERT(stp->sd_sidp == NULL);
    756 	ASSERT(stp->sd_pgidp == NULL);
    757 
    758 	/* Prevent qenable from re-enabling the stream head queue */
    759 	disable_svc(_RD(qp));
    760 
    761 	/*
    762 	 * Wait until service procedure of each queue is
    763 	 * run, if QINSERVICE is set.
    764 	 */
    765 	wait_svc(_RD(qp));
    766 
    767 	/*
    768 	 * Now, flush both queues.
    769 	 */
    770 	flushq(_RD(qp), FLUSHALL);
    771 	flushq(qp, FLUSHALL);
    772 
    773 	/*
    774 	 * If the write queue of the stream head is pointing to a
    775 	 * read queue, we have a twisted stream.  If the read queue
    776 	 * is alive, convert the stream head queues into a dead end.
    777 	 * If the read queue is dead, free the dead pair.
    778 	 */
    779 	if (qp->q_next && !_SAMESTR(qp)) {
    780 		if (qp->q_next->q_qinfo == &deadrend) {	/* half-closed pipe */
    781 			flushq(qp->q_next, FLUSHALL); /* ensure no message */
    782 			shfree(qp->q_next->q_stream);
    783 			freeq(qp->q_next);
    784 			freeq(_RD(qp));
    785 		} else if (qp->q_next == _RD(qp)) {	/* fifo */
    786 			freeq(_RD(qp));
    787 		} else {				/* pipe */
    788 			freestp = 0;
    789 			/*
    790 			 * The q_info pointers are never accessed when
    791 			 * SQLOCK is held.
    792 			 */
    793 			ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
    794 			mutex_enter(SQLOCK(qp->q_syncq));
    795 			qp->q_qinfo = &deadwend;
    796 			_RD(qp)->q_qinfo = &deadrend;
    797 			mutex_exit(SQLOCK(qp->q_syncq));
    798 		}
    799 	} else {
    800 		freeq(_RD(qp)); /* free stream head queue pair */
    801 	}
    802 
    803 	mutex_enter(&vp->v_lock);
    804 	if (stp->sd_iocblk) {
    805 		if (stp->sd_iocblk != (mblk_t *)-1) {
    806 			freemsg(stp->sd_iocblk);
    807 		}
    808 		stp->sd_iocblk = NULL;
    809 	}
    810 	stp->sd_vnode = NULL;
    811 	vp->v_stream = NULL;
    812 	mutex_exit(&vp->v_lock);
    813 	mutex_enter(&stp->sd_lock);
    814 	freemsg(stp->sd_cmdblk);
    815 	stp->sd_cmdblk = NULL;
    816 	stp->sd_flag &= ~STRCLOSE;
    817 	cv_broadcast(&stp->sd_monitor);
    818 	mutex_exit(&stp->sd_lock);
    819 
    820 	if (freestp)
    821 		shfree(stp);
    822 	return (0);
    823 }
    824 
    825 static int
    826 strsink(queue_t *q, mblk_t *bp)
    827 {
    828 	struct copyresp *resp;
    829 
    830 	switch (bp->b_datap->db_type) {
    831 	case M_FLUSH:
    832 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
    833 			*bp->b_rptr &= ~FLUSHR;
    834 			bp->b_flag |= MSGNOLOOP;
    835 			/*
    836 			 * Protect against the driver passing up
    837 			 * messages after it has done a qprocsoff.
    838 			 */
    839 			if (_OTHERQ(q)->q_next == NULL)
    840 				freemsg(bp);
    841 			else
    842 				qreply(q, bp);
    843 		} else {
    844 			freemsg(bp);
    845 		}
    846 		break;
    847 
    848 	case M_COPYIN:
    849 	case M_COPYOUT:
    850 		if (bp->b_cont) {
    851 			freemsg(bp->b_cont);
    852 			bp->b_cont = NULL;
    853 		}
    854 		bp->b_datap->db_type = M_IOCDATA;
    855 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
    856 		resp = (struct copyresp *)bp->b_rptr;
    857 		resp->cp_rval = (caddr_t)1;	/* failure */
    858 		/*
    859 		 * Protect against the driver passing up
    860 		 * messages after it has done a qprocsoff.
    861 		 */
    862 		if (_OTHERQ(q)->q_next == NULL)
    863 			freemsg(bp);
    864 		else
    865 			qreply(q, bp);
    866 		break;
    867 
    868 	case M_IOCTL:
    869 		if (bp->b_cont) {
    870 			freemsg(bp->b_cont);
    871 			bp->b_cont = NULL;
    872 		}
    873 		bp->b_datap->db_type = M_IOCNAK;
    874 		/*
    875 		 * Protect against the driver passing up
    876 		 * messages after it has done a qprocsoff.
    877 		 */
    878 		if (_OTHERQ(q)->q_next == NULL)
    879 			freemsg(bp);
    880 		else
    881 			qreply(q, bp);
    882 		break;
    883 
    884 	default:
    885 		freemsg(bp);
    886 		break;
    887 	}
    888 
    889 	return (0);
    890 }
    891 
    892 /*
    893  * Clean up after a process when it closes a stream.  This is called
    894  * from closef for all closes, whereas strclose is called only for the
    895  * last close on a stream.  The siglist is scanned for entries for the
    896  * current process, and these are removed.
    897  */
    898 void
    899 strclean(struct vnode *vp)
    900 {
    901 	strsig_t *ssp, *pssp, *tssp;
    902 	stdata_t *stp;
    903 	int update = 0;
    904 
    905 	TRACE_1(TR_FAC_STREAMS_FR,
    906 	    TR_STRCLEAN, "strclean:%p", vp);
    907 	stp = vp->v_stream;
    908 	pssp = NULL;
    909 	mutex_enter(&stp->sd_lock);
    910 	ssp = stp->sd_siglist;
    911 	while (ssp) {
    912 		if (ssp->ss_pidp == curproc->p_pidp) {
    913 			tssp = ssp->ss_next;
    914 			if (pssp)
    915 				pssp->ss_next = tssp;
    916 			else
    917 				stp->sd_siglist = tssp;
    918 			mutex_enter(&pidlock);
    919 			PID_RELE(ssp->ss_pidp);
    920 			mutex_exit(&pidlock);
    921 			kmem_free(ssp, sizeof (strsig_t));
    922 			update = 1;
    923 			ssp = tssp;
    924 		} else {
    925 			pssp = ssp;
    926 			ssp = ssp->ss_next;
    927 		}
    928 	}
    929 	if (update) {
    930 		stp->sd_sigflags = 0;
    931 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
    932 			stp->sd_sigflags |= ssp->ss_events;
    933 	}
    934 	mutex_exit(&stp->sd_lock);
    935 }
    936 
    937 /*
    938  * Used on the last close to remove any remaining items on the siglist.
    939  * These could be present on the siglist due to I_ESETSIG calls that
    940  * use process groups or processed that do not have an open file descriptor
    941  * for this stream (Such entries would not be removed by strclean).
    942  */
    943 static void
    944 strcleanall(struct vnode *vp)
    945 {
    946 	strsig_t *ssp, *nssp;
    947 	stdata_t *stp;
    948 
    949 	stp = vp->v_stream;
    950 	mutex_enter(&stp->sd_lock);
    951 	ssp = stp->sd_siglist;
    952 	stp->sd_siglist = NULL;
    953 	while (ssp) {
    954 		nssp = ssp->ss_next;
    955 		mutex_enter(&pidlock);
    956 		PID_RELE(ssp->ss_pidp);
    957 		mutex_exit(&pidlock);
    958 		kmem_free(ssp, sizeof (strsig_t));
    959 		ssp = nssp;
    960 	}
    961 	stp->sd_sigflags = 0;
    962 	mutex_exit(&stp->sd_lock);
    963 }
    964 
    965 /*
    966  * Retrieve the next message from the logical stream head read queue
    967  * using either rwnext (if sync stream) or getq_noenab.
    968  * It is the callers responsibility to call qbackenable after
    969  * it is finished with the message. The caller should not call
    970  * qbackenable until after any putback calls to avoid spurious backenabling.
    971  */
    972 mblk_t *
    973 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
    974     int *errorp)
    975 {
    976 	mblk_t *bp;
    977 	int error;
    978 	ssize_t rbytes = 0;
    979 
    980 	/* Holding sd_lock prevents the read queue from changing  */
    981 	ASSERT(MUTEX_HELD(&stp->sd_lock));
    982 
    983 	if (uiop != NULL && stp->sd_struiordq != NULL &&
    984 	    q->q_first == NULL &&
    985 	    (!first || (stp->sd_wakeq & RSLEEP))) {
    986 		/*
    987 		 * Stream supports rwnext() for the read side.
    988 		 * If this is the first time we're called by e.g. strread
    989 		 * only do the downcall if there is a deferred wakeup
    990 		 * (registered in sd_wakeq).
    991 		 */
    992 		struiod_t uiod;
    993 
    994 		if (first)
    995 			stp->sd_wakeq &= ~RSLEEP;
    996 
    997 		(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
    998 		    sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
    999 		uiod.d_mp = 0;
   1000 		/*
   1001 		 * Mark that a thread is in rwnext on the read side
   1002 		 * to prevent strrput from nacking ioctls immediately.
   1003 		 * When the last concurrent rwnext returns
   1004 		 * the ioctls are nack'ed.
   1005 		 */
   1006 		ASSERT(MUTEX_HELD(&stp->sd_lock));
   1007 		stp->sd_struiodnak++;
   1008 		/*
   1009 		 * Note: rwnext will drop sd_lock.
   1010 		 */
   1011 		error = rwnext(q, &uiod);
   1012 		ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
   1013 		mutex_enter(&stp->sd_lock);
   1014 		stp->sd_struiodnak--;
   1015 		while (stp->sd_struiodnak == 0 &&
   1016 		    ((bp = stp->sd_struionak) != NULL)) {
   1017 			stp->sd_struionak = bp->b_next;
   1018 			bp->b_next = NULL;
   1019 			bp->b_datap->db_type = M_IOCNAK;
   1020 			/*
   1021 			 * Protect against the driver passing up
   1022 			 * messages after it has done a qprocsoff.
   1023 			 */
   1024 			if (_OTHERQ(q)->q_next == NULL)
   1025 				freemsg(bp);
   1026 			else {
   1027 				mutex_exit(&stp->sd_lock);
   1028 				qreply(q, bp);
   1029 				mutex_enter(&stp->sd_lock);
   1030 			}
   1031 		}
   1032 		ASSERT(MUTEX_HELD(&stp->sd_lock));
   1033 		if (error == 0 || error == EWOULDBLOCK) {
   1034 			if ((bp = uiod.d_mp) != NULL) {
   1035 				*errorp = 0;
   1036 				ASSERT(MUTEX_HELD(&stp->sd_lock));
   1037 				return (bp);
   1038 			}
   1039 			error = 0;
   1040 		} else if (error == EINVAL) {
   1041 			/*
   1042 			 * The stream plumbing must have
   1043 			 * changed while we were away, so
   1044 			 * just turn off rwnext()s.
   1045 			 */
   1046 			error = 0;
   1047 		} else if (error == EBUSY) {
   1048 			/*
   1049 			 * The module might have data in transit using putnext
   1050 			 * Fall back on waiting + getq.
   1051 			 */
   1052 			error = 0;
   1053 		} else {
   1054 			*errorp = error;
   1055 			ASSERT(MUTEX_HELD(&stp->sd_lock));
   1056 			return (NULL);
   1057 		}
   1058 		/*
   1059 		 * Try a getq in case a rwnext() generated mblk
   1060 		 * has bubbled up via strrput().
   1061 		 */
   1062 	}
   1063 	*errorp = 0;
   1064 	ASSERT(MUTEX_HELD(&stp->sd_lock));
   1065 
   1066 	/*
   1067 	 * If we have a valid uio, try and use this as a guide for how
   1068 	 * many bytes to retrieve from the queue via getq_noenab().
   1069 	 * Doing this can avoid unneccesary counting of overlong
   1070 	 * messages in putback(). We currently only do this for sockets
   1071 	 * and only if there is no sd_rputdatafunc hook.
   1072 	 *
   1073 	 * The sd_rputdatafunc hook transforms the entire message
   1074 	 * before any bytes in it can be given to a client. So, rbytes
   1075 	 * must be 0 if there is a hook.
   1076 	 */
   1077 	if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
   1078 	    (stp->sd_rputdatafunc == NULL))
   1079 		rbytes = uiop->uio_resid;
   1080 
   1081 	return (getq_noenab(q, rbytes));
   1082 }
   1083 
   1084 /*
   1085  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
   1086  * If the message does not fit in the uio the remainder of it is returned;
   1087  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
   1088  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
   1089  * the error code, the message is consumed, and NULL is returned.
   1090  */
   1091 static mblk_t *
   1092 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
   1093 {
   1094 	int error;
   1095 	ptrdiff_t n;
   1096 	mblk_t *nbp;
   1097 
   1098 	ASSERT(bp->b_wptr >= bp->b_rptr);
   1099 
   1100 	do {
   1101 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
   1102 			ASSERT(n > 0);
   1103 
   1104 			error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
   1105 			if (error != 0) {
   1106 				freemsg(bp);
   1107 				*errorp = error;
   1108 				return (NULL);
   1109 			}
   1110 		}
   1111 
   1112 		bp->b_rptr += n;
   1113 		while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
   1114 			nbp = bp;
   1115 			bp = bp->b_cont;
   1116 			freeb(nbp);
   1117 		}
   1118 	} while (bp != NULL && uiop->uio_resid > 0);
   1119 
   1120 	*errorp = 0;
   1121 	return (bp);
   1122 }
   1123 
   1124 /*
   1125  * Read a stream according to the mode flags in sd_flag:
   1126  *
   1127  * (default mode)		- Byte stream, msg boundaries are ignored
   1128  * RD_MSGDIS (msg discard)	- Read on msg boundaries and throw away
   1129  *				any data remaining in msg
   1130  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
   1131  *				any remaining data on head of read queue
   1132  *
   1133  * Consume readable messages on the front of the queue until
   1134  * ttolwp(curthread)->lwp_count
   1135  * is satisfied, the readable messages are exhausted, or a message
   1136  * boundary is reached in a message mode.  If no data was read and
   1137  * the stream was not opened with the NDELAY flag, block until data arrives.
   1138  * Otherwise return the data read and update the count.
   1139  *
   1140  * In default mode a 0 length message signifies end-of-file and terminates
   1141  * a read in progress.  The 0 length message is removed from the queue
   1142  * only if it is the only message read (no data is read).
   1143  *
   1144  * An attempt to read an M_PROTO or M_PCPROTO message results in an
   1145  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
   1146  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
   1147  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
   1148  * are unlinked from and M_DATA blocks in the message, the protos are
   1149  * thrown away, and the data is read.
   1150  */
   1151 /* ARGSUSED */
   1152 int
   1153 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
   1154 {
   1155 	struct stdata *stp;
   1156 	mblk_t *bp, *nbp;
   1157 	queue_t *q;
   1158 	int error = 0;
   1159 	uint_t old_sd_flag;
   1160 	int first;
   1161 	char rflg;
   1162 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
   1163 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
   1164 	short delim;
   1165 	unsigned char pri = 0;
   1166 	char waitflag;
   1167 	unsigned char type;
   1168 
   1169 	TRACE_1(TR_FAC_STREAMS_FR,
   1170 	    TR_STRREAD_ENTER, "strread:%p", vp);
   1171 	ASSERT(vp->v_stream);
   1172 	stp = vp->v_stream;
   1173 
   1174 	mutex_enter(&stp->sd_lock);
   1175 
   1176 	if ((error = i_straccess(stp, JCREAD)) != 0) {
   1177 		mutex_exit(&stp->sd_lock);
   1178 		return (error);
   1179 	}
   1180 
   1181 	if (stp->sd_flag & (STRDERR|STPLEX)) {
   1182 		error = strgeterr(stp, STRDERR|STPLEX, 0);
   1183 		if (error != 0) {
   1184 			mutex_exit(&stp->sd_lock);
   1185 			return (error);
   1186 		}
   1187 	}
   1188 
   1189 	/*
   1190 	 * Loop terminates when uiop->uio_resid == 0.
   1191 	 */
   1192 	rflg = 0;
   1193 	waitflag = READWAIT;
   1194 	q = _RD(stp->sd_wrq);
   1195 	for (;;) {
   1196 		ASSERT(MUTEX_HELD(&stp->sd_lock));
   1197 		old_sd_flag = stp->sd_flag;
   1198 		mark = 0;
   1199 		delim = 0;
   1200 		first = 1;
   1201 		while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
   1202 			int done = 0;
   1203 
   1204 			ASSERT(MUTEX_HELD(&stp->sd_lock));
   1205 
   1206 			if (error != 0)
   1207 				goto oops;
   1208 
   1209 			if (stp->sd_flag & (STRHUP|STREOF)) {
   1210 				goto oops;
   1211 			}
   1212 			if (rflg && !(stp->sd_flag & STRDELIM)) {
   1213 				goto oops;
   1214 			}
   1215 			/*
   1216 			 * If a read(fd,buf,0) has been done, there is no
   1217 			 * need to sleep. We always have zero bytes to
   1218 			 * return.
   1219 			 */
   1220 			if (uiop->uio_resid == 0) {
   1221 				goto oops;
   1222 			}
   1223 
   1224 			qbackenable(q, 0);
   1225 
   1226 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
   1227 			    "strread calls strwaitq:%p, %p, %p",
   1228 			    vp, uiop, crp);
   1229 			if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
   1230 			    uiop->uio_fmode, -1, &done)) != 0 || done) {
   1231 				TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
   1232 				    "strread error or done:%p, %p, %p",
   1233 				    vp, uiop, crp);
   1234 				if ((uiop->uio_fmode & FNDELAY) &&
   1235 				    (stp->sd_flag & OLDNDELAY) &&
   1236 				    (error == EAGAIN))
   1237 					error = 0;
   1238 				goto oops;
   1239 			}
   1240 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
   1241 			    "strread awakes:%p, %p, %p", vp, uiop, crp);
   1242 			if ((error = i_straccess(stp, JCREAD)) != 0) {
   1243 				goto oops;
   1244 			}
   1245 			first = 0;
   1246 		}
   1247 
   1248 		ASSERT(MUTEX_HELD(&stp->sd_lock));
   1249 		ASSERT(bp);
   1250 		pri = bp->b_band;
   1251 		/*
   1252 		 * Extract any mark information. If the message is not
   1253 		 * completely consumed this information will be put in the mblk
   1254 		 * that is putback.
   1255 		 * If MSGMARKNEXT is set and the message is completely consumed
   1256 		 * the STRATMARK flag will be set below. Likewise, if
   1257 		 * MSGNOTMARKNEXT is set and the message is
   1258 		 * completely consumed STRNOTATMARK will be set.
   1259 		 *
   1260 		 * For some unknown reason strread only breaks the read at the
   1261 		 * last mark.
   1262 		 */
   1263 		mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
   1264 		ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
   1265 		    (MSGMARKNEXT|MSGNOTMARKNEXT));
   1266 		if (mark != 0 && bp == stp->sd_mark) {
   1267 			if (rflg) {
   1268 				putback(stp, q, bp, pri);
   1269 				goto oops;
   1270 			}
   1271 			mark |= _LASTMARK;
   1272 			stp->sd_mark = NULL;
   1273 		}
   1274 		if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
   1275 			delim = 1;
   1276 		mutex_exit(&stp->sd_lock);
   1277 
   1278 		if (STREAM_NEEDSERVICE(stp))
   1279 			stream_runservice(stp);
   1280 
   1281 		type = bp->b_datap->db_type;
   1282 
   1283 		switch (type) {
   1284 
   1285 		case M_DATA:
   1286 ismdata:
   1287 			if (msgnodata(bp)) {
   1288 				if (mark || delim) {
   1289 					freemsg(bp);
   1290 				} else if (rflg) {
   1291 
   1292 					/*
   1293 					 * If already read data put zero
   1294 					 * length message back on queue else
   1295 					 * free msg and return 0.
   1296 					 */
   1297 					bp->b_band = pri;
   1298 					mutex_enter(&stp->sd_lock);
   1299 					putback(stp, q, bp, pri);
   1300 					mutex_exit(&stp->sd_lock);
   1301 				} else {
   1302 					freemsg(bp);
   1303 				}
   1304 				error =  0;
   1305 				goto oops1;
   1306 			}
   1307 
   1308 			rflg = 1;
   1309 			waitflag |= NOINTR;
   1310 			bp = struiocopyout(bp, uiop, &error);
   1311 			if (error != 0)
   1312 				goto oops1;
   1313 
   1314 			mutex_enter(&stp->sd_lock);
   1315 			if (bp) {
   1316 				/*
   1317 				 * Have remaining data in message.
   1318 				 * Free msg if in discard mode.
   1319 				 */
   1320 				if (stp->sd_read_opt & RD_MSGDIS) {
   1321 					freemsg(bp);
   1322 				} else {
   1323 					bp->b_band = pri;
   1324 					if ((mark & _LASTMARK) &&
   1325 					    (stp->sd_mark == NULL))
   1326 						stp->sd_mark = bp;
   1327 					bp->b_flag |= mark & ~_LASTMARK;
   1328 					if (delim)
   1329 						bp->b_flag |= MSGDELIM;
   1330 					if (msgnodata(bp))
   1331 						freemsg(bp);
   1332 					else
   1333 						putback(stp, q, bp, pri);
   1334 				}
   1335 			} else {
   1336 				/*
   1337 				 * Consumed the complete message.
   1338 				 * Move the MSG*MARKNEXT information
   1339 				 * to the stream head just in case
   1340 				 * the read queue becomes empty.
   1341 				 *
   1342 				 * If the stream head was at the mark
   1343 				 * (STRATMARK) before we dropped sd_lock above
   1344 				 * and some data was consumed then we have
   1345 				 * moved past the mark thus STRATMARK is
   1346 				 * cleared. However, if a message arrived in
   1347 				 * strrput during the copyout above causing
   1348 				 * STRATMARK to be set we can not clear that
   1349 				 * flag.
   1350 				 */
   1351 				if (mark &
   1352 				    (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
   1353 					if (mark & MSGMARKNEXT) {
   1354 						stp->sd_flag &= ~STRNOTATMARK;
   1355 						stp->sd_flag |= STRATMARK;
   1356 					} else if (mark & MSGNOTMARKNEXT) {
   1357 						stp->sd_flag &= ~STRATMARK;
   1358 						stp->sd_flag |= STRNOTATMARK;
   1359 					} else {
   1360 						stp->sd_flag &=
   1361 						    ~(STRATMARK|STRNOTATMARK);
   1362 					}
   1363 				} else if (rflg && (old_sd_flag & STRATMARK)) {
   1364 					stp->sd_flag &= ~STRATMARK;
   1365 				}
   1366 			}
   1367 
   1368 			/*
   1369 			 * Check for signal messages at the front of the read
   1370 			 * queue and generate the signal(s) if appropriate.
   1371 			 * The only signal that can be on queue is M_SIG at
   1372 			 * this point.
   1373 			 */
   1374 			while ((((bp = q->q_first)) != NULL) &&
   1375 			    (bp->b_datap->db_type == M_SIG)) {
   1376 				bp = getq_noenab(q, 0);
   1377 				/*
   1378 				 * sd_lock is held so the content of the
   1379 				 * read queue can not change.
   1380 				 */
   1381 				ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
   1382 				strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
   1383 				mutex_exit(&stp->sd_lock);
   1384 				freemsg(bp);
   1385 				if (STREAM_NEEDSERVICE(stp))
   1386 					stream_runservice(stp);
   1387 				mutex_enter(&stp->sd_lock);
   1388 			}
   1389 
   1390 			if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
   1391 			    delim ||
   1392 			    (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
   1393 				goto oops;
   1394 			}
   1395 			continue;
   1396 
   1397 		case M_SIG:
   1398 			strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
   1399 			freemsg(bp);
   1400 			mutex_enter(&stp->sd_lock);
   1401 			continue;
   1402 
   1403 		case M_PROTO:
   1404 		case M_PCPROTO:
   1405 			/*
   1406 			 * Only data messages are readable.
   1407 			 * Any others generate an error, unless
   1408 			 * RD_PROTDIS or RD_PROTDAT is set.
   1409 			 */
   1410 			if (stp->sd_read_opt & RD_PROTDAT) {
   1411 				for (nbp = bp; nbp; nbp = nbp->b_next) {
   1412 					if ((nbp->b_datap->db_type ==
   1413 					    M_PROTO) ||
   1414 					    (nbp->b_datap->db_type ==
   1415 					    M_PCPROTO)) {
   1416 						nbp->b_datap->db_type = M_DATA;
   1417 					} else {
   1418 						break;
   1419 					}
   1420 				}
   1421 				/*
   1422 				 * clear stream head hi pri flag based on
   1423 				 * first message
   1424 				 */
   1425 				if (type == M_PCPROTO) {
   1426 					mutex_enter(&stp->sd_lock);
   1427 					stp->sd_flag &= ~STRPRI;
   1428 					mutex_exit(&stp->sd_lock);
   1429 				}
   1430 				goto ismdata;
   1431 			} else if (stp->sd_read_opt & RD_PROTDIS) {
   1432 				/*
   1433 				 * discard non-data messages
   1434 				 */
   1435 				while (bp &&
   1436 				    ((bp->b_datap->db_type == M_PROTO) ||
   1437 				    (bp->b_datap->db_type == M_PCPROTO))) {
   1438 					nbp = unlinkb(bp);
   1439 					freeb(bp);
   1440 					bp = nbp;
   1441 				}
   1442 				/*
   1443 				 * clear stream head hi pri flag based on
   1444 				 * first message
   1445 				 */
   1446 				if (type == M_PCPROTO) {
   1447 					mutex_enter(&stp->sd_lock);
   1448 					stp->sd_flag &= ~STRPRI;
   1449 					mutex_exit(&stp->sd_lock);
   1450 				}
   1451 				if (bp) {
   1452 					bp->b_band = pri;
   1453 					goto ismdata;
   1454 				} else {
   1455 					break;
   1456 				}
   1457 			}
   1458 			/* FALLTHRU */
   1459 		case M_PASSFP:
   1460 			if ((bp->b_datap->db_type == M_PASSFP) &&
   1461 			    (stp->sd_read_opt & RD_PROTDIS)) {
   1462 				freemsg(bp);
   1463 				break;
   1464 			}
   1465 			mutex_enter(&stp->sd_lock);
   1466 			putback(stp, q, bp, pri);
   1467 			mutex_exit(&stp->sd_lock);
   1468 			if (rflg == 0)
   1469 				error = EBADMSG;
   1470 			goto oops1;
   1471 
   1472 		default:
   1473 			/*
   1474 			 * Garbage on stream head read queue.
   1475 			 */
   1476 			cmn_err(CE_WARN, "bad %x found at stream head\n",
   1477 			    bp->b_datap->db_type);
   1478 			freemsg(bp);
   1479 			goto oops1;
   1480 		}
   1481 		mutex_enter(&stp->sd_lock);
   1482 	}
   1483 oops:
   1484 	mutex_exit(&stp->sd_lock);
   1485 oops1:
   1486 	qbackenable(q, pri);
   1487 	return (error);
   1488 #undef	_LASTMARK
   1489 }
   1490 
   1491 /*
   1492  * Default processing of M_PROTO/M_PCPROTO messages.
   1493  * Determine which wakeups and signals are needed.
   1494  * This can be replaced by a user-specified procedure for kernel users
   1495  * of STREAMS.
   1496  */
   1497 /* ARGSUSED */
   1498 mblk_t *
   1499 strrput_proto(vnode_t *vp, mblk_t *mp,
   1500     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
   1501     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
   1502 {
   1503 	*wakeups = RSLEEP;
   1504 	*allmsgsigs = 0;
   1505 
   1506 	switch (mp->b_datap->db_type) {
   1507 	case M_PROTO:
   1508 		if (mp->b_band == 0) {
   1509 			*firstmsgsigs = S_INPUT | S_RDNORM;
   1510 			*pollwakeups = POLLIN | POLLRDNORM;
   1511 		} else {
   1512 			*firstmsgsigs = S_INPUT | S_RDBAND;
   1513 			*pollwakeups = POLLIN | POLLRDBAND;
   1514 		}
   1515 		break;
   1516 	case M_PCPROTO:
   1517 		*firstmsgsigs = S_HIPRI;
   1518 		*pollwakeups = POLLPRI;
   1519 		break;
   1520 	}
   1521 	return (mp);
   1522 }
   1523 
   1524 /*
   1525  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
   1526  * M_PASSFP messages.
   1527  * Determine which wakeups and signals are needed.
   1528  * This can be replaced by a user-specified procedure for kernel users
   1529  * of STREAMS.
   1530  */
   1531 /* ARGSUSED */
   1532 mblk_t *
   1533 strrput_misc(vnode_t *vp, mblk_t *mp,
   1534     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
   1535     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
   1536 {
   1537 	*wakeups = 0;
   1538 	*firstmsgsigs = 0;
   1539 	*allmsgsigs = 0;
   1540 	*pollwakeups = 0;
   1541 	return (mp);
   1542 }
   1543 
   1544 /*
   1545  * Stream read put procedure.  Called from downstream driver/module
   1546  * with messages for the stream head.  Data, protocol, and in-stream
   1547  * signal messages are placed on the queue, others are handled directly.
   1548  */
   1549 int
   1550 strrput(queue_t *q, mblk_t *bp)
   1551 {
   1552 	struct stdata	*stp;
   1553 	ulong_t		rput_opt;
   1554 	strwakeup_t	wakeups;
   1555 	strsigset_t	firstmsgsigs;	/* Signals if first message on queue */
   1556 	strsigset_t	allmsgsigs;	/* Signals for all messages */
   1557 	strsigset_t	signals;	/* Signals events to generate */
   1558 	strpollset_t	pollwakeups;
   1559 	mblk_t		*nextbp;
   1560 	uchar_t		band = 0;
   1561 	int		hipri_sig;
   1562 
   1563 	stp = (struct stdata *)q->q_ptr;
   1564 	/*
   1565 	 * Use rput_opt for optimized access to the SR_ flags except
   1566 	 * SR_POLLIN. That flag has to be checked under sd_lock since it
   1567 	 * is modified by strpoll().
   1568 	 */
   1569 	rput_opt = stp->sd_rput_opt;
   1570 
   1571 	ASSERT(qclaimed(q));
   1572 	TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
   1573 	    "strrput called with message type:q %p bp %p", q, bp);
   1574 
   1575 	/*
   1576 	 * Perform initial processing and pass to the parameterized functions.
   1577 	 */
   1578 	ASSERT(bp->b_next == NULL);
   1579 
   1580 	switch (bp->b_datap->db_type) {
   1581 	case M_DATA:
   1582 		/*
   1583 		 * sockfs is the only consumer of STREOF and when it is set,
   1584 		 * it implies that the receiver is not interested in receiving
   1585 		 * any more data, hence the mblk is freed to prevent unnecessary
   1586 		 * message queueing at the stream head.
   1587 		 */
   1588 		if (stp->sd_flag == STREOF) {
   1589 			freemsg(bp);
   1590 			return (0);
   1591 		}
   1592 		if ((rput_opt & SR_IGN_ZEROLEN) &&
   1593 		    bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
   1594 			/*
   1595 			 * Ignore zero-length M_DATA messages. These might be
   1596 			 * generated by some transports.
   1597 			 * The zero-length M_DATA messages, even if they
   1598 			 * are ignored, should effect the atmark tracking and
   1599 			 * should wake up a thread sleeping in strwaitmark.
   1600 			 */
   1601 			mutex_enter(&stp->sd_lock);
   1602 			if (bp->b_flag & MSGMARKNEXT) {
   1603 				/*
   1604 				 * Record the position of the mark either
   1605 				 * in q_last or in STRATMARK.
   1606 				 */
   1607 				if (q->q_last != NULL) {
   1608 					q->q_last->b_flag &= ~MSGNOTMARKNEXT;
   1609 					q->q_last->b_flag |= MSGMARKNEXT;
   1610 				} else {
   1611 					stp->sd_flag &= ~STRNOTATMARK;
   1612 					stp->sd_flag |= STRATMARK;
   1613 				}
   1614 			} else if (bp->b_flag & MSGNOTMARKNEXT) {
   1615 				/*
   1616 				 * Record that this is not the position of
   1617 				 * the mark either in q_last or in
   1618 				 * STRNOTATMARK.
   1619 				 */
   1620 				if (q->q_last != NULL) {
   1621 					q->q_last->b_flag &= ~MSGMARKNEXT;
   1622 					q->q_last->b_flag |= MSGNOTMARKNEXT;
   1623 				} else {
   1624 					stp->sd_flag &= ~STRATMARK;
   1625 					stp->sd_flag |= STRNOTATMARK;
   1626 				}
   1627 			}
   1628 			if (stp->sd_flag & RSLEEP) {
   1629 				stp->sd_flag &= ~RSLEEP;
   1630 				cv_broadcast(&q->q_wait);
   1631 			}
   1632 			mutex_exit(&stp->sd_lock);
   1633 			freemsg(bp);
   1634 			return (0);
   1635 		}
   1636 		wakeups = RSLEEP;
   1637 		if (bp->b_band == 0) {
   1638 			firstmsgsigs = S_INPUT | S_RDNORM;
   1639 			pollwakeups = POLLIN | POLLRDNORM;
   1640 		} else {
   1641 			firstmsgsigs = S_INPUT | S_RDBAND;
   1642 			pollwakeups = POLLIN | POLLRDBAND;
   1643 		}
   1644 		if (rput_opt & SR_SIGALLDATA)
   1645 			allmsgsigs = firstmsgsigs;
   1646 		else
   1647 			allmsgsigs = 0;
   1648 
   1649 		mutex_enter(&stp->sd_lock);
   1650 		if ((rput_opt & SR_CONSOL_DATA) &&
   1651 		    (q->q_last != NULL) &&
   1652 		    (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
   1653 			/*
   1654 			 * Consolidate an M_DATA message onto an M_DATA,
   1655 			 * M_PROTO, or M_PCPROTO by merging it with q_last.
   1656 			 * The consolidation does not take place if
   1657 			 * the old message is marked with either of the
   1658 			 * marks or the delim flag or if the new
   1659 			 * message is marked with MSGMARK. The MSGMARK
   1660 			 * check is needed to handle the odd semantics of
   1661 			 * MSGMARK where essentially the whole message
   1662 			 * is to be treated as marked.
   1663 			 * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
   1664 			 * new message to the front of the b_cont chain.
   1665 			 */
   1666 			mblk_t *lbp = q->q_last;
   1667 			unsigned char db_type = lbp->b_datap->db_type;
   1668 
   1669 			if ((db_type == M_DATA || db_type == M_PROTO ||
   1670 			    db_type == M_PCPROTO) &&
   1671 			    !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
   1672 				rmvq_noenab(q, lbp);
   1673 				/*
   1674 				 * The first message in the b_cont list
   1675 				 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
   1676 				 * We need to handle the case where we
   1677 				 * are appending:
   1678 				 *
   1679 				 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
   1680 				 * 2) a MSGMARKNEXT to a plain message.
   1681 				 * 3) a MSGNOTMARKNEXT to a plain message
   1682 				 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
   1683 				 *    message.
   1684 				 *
   1685 				 * Thus we never append a MSGMARKNEXT or
   1686 				 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
   1687 				 */
   1688 				if (bp->b_flag & MSGMARKNEXT) {
   1689 					lbp->b_flag |= MSGMARKNEXT;
   1690 					lbp->b_flag &= ~MSGNOTMARKNEXT;
   1691 					bp->b_flag &= ~MSGMARKNEXT;
   1692 				} else if (bp->b_flag & MSGNOTMARKNEXT) {
   1693 					lbp->b_flag |= MSGNOTMARKNEXT;
   1694 					bp->b_flag &= ~MSGNOTMARKNEXT;
   1695 				}
   1696 
   1697 				linkb(lbp, bp);
   1698 				bp = lbp;
   1699 				/*
   1700 				 * The new message logically isn't the first
   1701 				 * even though the q_first check below thinks
   1702 				 * it is. Clear the firstmsgsigs to make it
   1703 				 * not appear to be first.
   1704 				 */
   1705 				firstmsgsigs = 0;
   1706 			}
   1707 		}
   1708 		break;
   1709 
   1710 	case M_PASSFP:
   1711 		wakeups = RSLEEP;
   1712 		allmsgsigs = 0;
   1713 		if (bp->b_band == 0) {
   1714 			firstmsgsigs = S_INPUT | S_RDNORM;
   1715 			pollwakeups = POLLIN | POLLRDNORM;
   1716 		} else {
   1717 			firstmsgsigs = S_INPUT | S_RDBAND;
   1718 			pollwakeups = POLLIN | POLLRDBAND;
   1719 		}
   1720 		mutex_enter(&stp->sd_lock);
   1721 		break;
   1722 
   1723 	case M_PROTO:
   1724 	case M_PCPROTO:
   1725 		ASSERT(stp->sd_rprotofunc != NULL);
   1726 		bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
   1727 		    &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
   1728 #define	ALLSIG	(S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
   1729 		S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
   1730 #define	ALLPOLL	(POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
   1731 		POLLWRBAND)
   1732 
   1733 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
   1734 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
   1735 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
   1736 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
   1737 
   1738 		mutex_enter(&stp->sd_lock);
   1739 		break;
   1740 
   1741 	default:
   1742 		ASSERT(stp->sd_rmiscfunc != NULL);
   1743 		bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
   1744 		    &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
   1745 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
   1746 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
   1747 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
   1748 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
   1749 #undef	ALLSIG
   1750 #undef	ALLPOLL
   1751 		mutex_enter(&stp->sd_lock);
   1752 		break;
   1753 	}
   1754 	ASSERT(MUTEX_HELD(&stp->sd_lock));
   1755 
   1756 	/* By default generate superset of signals */
   1757 	signals = (firstmsgsigs | allmsgsigs);
   1758 
   1759 	/*
   1760 	 * The  proto and misc functions can return multiple messages
   1761 	 * as a b_next chain. Such messages are processed separately.
   1762 	 */
   1763 one_more:
   1764 	hipri_sig = 0;
   1765 	if (bp == NULL) {
   1766 		nextbp = NULL;
   1767 	} else {
   1768 		nextbp = bp->b_next;
   1769 		bp->b_next = NULL;
   1770 
   1771 		switch (bp->b_datap->db_type) {
   1772 		case M_PCPROTO:
   1773 			/*
   1774 			 * Only one priority protocol message is allowed at the
   1775 			 * stream head at a time.
   1776 			 */
   1777 			if (stp->sd_flag & STRPRI) {
   1778 				TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
   1779 				    "M_PCPROTO already at head");
   1780 				freemsg(bp);
   1781 				mutex_exit(&stp->sd_lock);
   1782 				goto done;
   1783 			}
   1784 			stp->sd_flag |= STRPRI;
   1785 			hipri_sig = 1;
   1786 			/* FALLTHRU */
   1787 		case M_DATA:
   1788 		case M_PROTO:
   1789 		case M_PASSFP:
   1790 			band = bp->b_band;
   1791 			/*
   1792 			 * Marking doesn't work well when messages
   1793 			 * are marked in more than one band.  We only
   1794 			 * remember the last message received, even if
   1795 			 * it is placed on the queue ahead of other
   1796 			 * marked messages.
   1797 			 */
   1798 			if (bp->b_flag & MSGMARK)
   1799 				stp->sd_mark = bp;
   1800 			(void) putq(q, bp);
   1801 
   1802 			/*
   1803 			 * If message is a PCPROTO message, always use
   1804 			 * firstmsgsigs to determine if a signal should be
   1805 			 * sent as strrput is the only place to send
   1806 			 * signals for PCPROTO. Other messages are based on
   1807 			 * the STRGETINPROG flag. The flag determines if
   1808 			 * strrput or (k)strgetmsg will be responsible for
   1809 			 * sending the signals, in the firstmsgsigs case.
   1810 			 */
   1811 			if ((hipri_sig == 1) ||
   1812 			    (((stp->sd_flag & STRGETINPROG) == 0) &&
   1813 			    (q->q_first == bp)))
   1814 				signals = (firstmsgsigs | allmsgsigs);
   1815 			else
   1816 				signals = allmsgsigs;
   1817 			break;
   1818 
   1819 		default:
   1820 			mutex_exit(&stp->sd_lock);
   1821 			(void) strrput_nondata(q, bp);
   1822 			mutex_enter(&stp->sd_lock);
   1823 			break;
   1824 		}
   1825 	}
   1826 	ASSERT(MUTEX_HELD(&stp->sd_lock));
   1827 	/*
   1828 	 * Wake sleeping read/getmsg and cancel deferred wakeup
   1829 	 */
   1830 	if (wakeups & RSLEEP)
   1831 		stp->sd_wakeq &= ~RSLEEP;
   1832 
   1833 	wakeups &= stp->sd_flag;
   1834 	if (wakeups & RSLEEP) {
   1835 		stp->sd_flag &= ~RSLEEP;
   1836 		cv_broadcast(&q->q_wait);
   1837 	}
   1838 	if (wakeups & WSLEEP) {
   1839 		stp->sd_flag &= ~WSLEEP;
   1840 		cv_broadcast(&_WR(q)->q_wait);
   1841 	}
   1842 
   1843 	if (pollwakeups != 0) {
   1844 		if (pollwakeups == (POLLIN | POLLRDNORM)) {
   1845 			/*
   1846 			 * Can't use rput_opt since it was not
   1847 			 * read when sd_lock was held and SR_POLLIN is changed
   1848 			 * by strpoll() under sd_lock.
   1849 			 */
   1850 			if (!(stp->sd_rput_opt & SR_POLLIN))
   1851 				goto no_pollwake;
   1852 			stp->sd_rput_opt &= ~SR_POLLIN;
   1853 		}
   1854 		mutex_exit(&stp->sd_lock);
   1855 		pollwakeup(&stp->sd_pollist, pollwakeups);
   1856 		mutex_enter(&stp->sd_lock);
   1857 	}
   1858 no_pollwake:
   1859 
   1860 	/*
   1861 	 * strsendsig can handle multiple signals with a
   1862 	 * single call.
   1863 	 */
   1864 	if (stp->sd_sigflags & signals)
   1865 		strsendsig(stp->sd_siglist, signals, band, 0);
   1866 	mutex_exit(&stp->sd_lock);
   1867 
   1868 
   1869 done:
   1870 	if (nextbp == NULL)
   1871 		return (0);
   1872 
   1873 	/*
   1874 	 * Any signals were handled the first time.
   1875 	 * Wakeups and pollwakeups are redone to avoid any race
   1876 	 * conditions - all the messages are not queued until the
   1877 	 * last message has been processed by strrput.
   1878 	 */
   1879 	bp = nextbp;
   1880 	signals = firstmsgsigs = allmsgsigs = 0;
   1881 	mutex_enter(&stp->sd_lock);
   1882 	goto one_more;
   1883 }
   1884 
   1885 static void
   1886 log_dupioc(queue_t *rq, mblk_t *bp)
   1887 {
   1888 	queue_t *wq, *qp;
   1889 	char *modnames, *mnp, *dname;
   1890 	size_t maxmodstr;
   1891 	boolean_t islast;
   1892 
   1893 	/*
   1894 	 * Allocate a buffer large enough to hold the names of nstrpush modules
   1895 	 * and one driver, with spaces between and NUL terminator.  If we can't
   1896 	 * get memory, then we'll just log the driver name.
   1897 	 */
   1898 	maxmodstr = nstrpush * (FMNAMESZ + 1);
   1899 	mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
   1900 
   1901 	/* march down write side to print log message down to the driver */
   1902 	wq = WR(rq);
   1903 
   1904 	/* make sure q_next doesn't shift around while we're grabbing data */
   1905 	claimstr(wq);
   1906 	qp = wq->q_next;
   1907 	do {
   1908 		dname = Q2NAME(qp);
   1909 		islast = !SAMESTR(qp) || qp->q_next == NULL;
   1910 		if (modnames == NULL) {
   1911 			/*
   1912 			 * If we don't have memory, then get the driver name in
   1913 			 * the log where we can see it.  Note that memory
   1914 			 * pressure is a possible cause of these sorts of bugs.
   1915 			 */
   1916 			if (islast) {
   1917 				modnames = dname;
   1918 				maxmodstr = 0;
   1919 			}
   1920 		} else {
   1921 			mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
   1922 			if (!islast)
   1923 				*mnp++ = ' ';
   1924 		}
   1925 		qp = qp->q_next;
   1926 	} while (!islast);
   1927 	releasestr(wq);
   1928 	/* Cannot happen unless stream head is corrupt. */
   1929 	ASSERT(modnames != NULL);
   1930 	(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
   1931 	    SL_CONSOLE|SL_TRACE|SL_ERROR,
   1932 	    "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
   1933 	    rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
   1934 	    (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
   1935 	if (maxmodstr != 0)
   1936 		kmem_free(modnames, maxmodstr);
   1937 }
   1938 
   1939 int
   1940 strrput_nondata(queue_t *q, mblk_t *bp)
   1941 {
   1942 	struct stdata *stp;
   1943 	struct iocblk *iocbp;
   1944 	struct stroptions *sop;
   1945 	struct copyreq *reqp;
   1946 	struct copyresp *resp;
   1947 	unsigned char bpri;
   1948 	unsigned char  flushed_already = 0;
   1949 
   1950 	stp = (struct stdata *)q->q_ptr;
   1951 
   1952 	ASSERT(!(stp->sd_flag & STPLEX));
   1953 	ASSERT(qclaimed(q));
   1954 
   1955 	switch (bp->b_datap->db_type) {
   1956 	case M_ERROR:
   1957 		/*
   1958 		 * An error has occurred downstream, the errno is in the first
   1959 		 * bytes of the message.
   1960 		 */
   1961 		if ((bp->b_wptr - bp->b_rptr) == 2) {	/* New flavor */
   1962 			unsigned char rw = 0;
   1963 
   1964 			mutex_enter(&stp->sd_lock);
   1965 			if (*bp->b_rptr != NOERROR) {	/* read error */
   1966 				if (*bp->b_rptr != 0) {
   1967 					if (stp->sd_flag & STRDERR)
   1968 						flushed_already |= FLUSHR;
   1969 					stp->sd_flag |= STRDERR;
   1970 					rw |= FLUSHR;
   1971 				} else {
   1972 					stp->sd_flag &= ~STRDERR;
   1973 				}
   1974 				stp->sd_rerror = *bp->b_rptr;
   1975 			}
   1976 			bp->b_rptr++;
   1977 			if (*bp->b_rptr != NOERROR) {	/* write error */
   1978 				if (*bp->b_rptr != 0) {
   1979 					if (stp->sd_flag & STWRERR)
   1980 						flushed_already |= FLUSHW;
   1981 					stp->sd_flag |= STWRERR;
   1982 					rw |= FLUSHW;
   1983 				} else {
   1984 					stp->sd_flag &= ~STWRERR;
   1985 				}
   1986 				stp->sd_werror = *bp->b_rptr;
   1987 			}
   1988 			if (rw) {
   1989 				TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
   1990 				    "strrput cv_broadcast:q %p, bp %p",
   1991 				    q, bp);
   1992 				cv_broadcast(&q->q_wait); /* readers */
   1993 				cv_broadcast(&_WR(q)->q_wait); /* writers */
   1994 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
   1995 
   1996 				mutex_exit(&stp->sd_lock);
   1997 				pollwakeup(&stp->sd_pollist, POLLERR);
   1998 				mutex_enter(&stp->sd_lock);
   1999 
   2000 				if (stp->sd_sigflags & S_ERROR)
   2001 					strsendsig(stp->sd_siglist, S_ERROR, 0,
   2002 					    ((rw & FLUSHR) ? stp->sd_rerror :
   2003 					    stp->sd_werror));
   2004 				mutex_exit(&stp->sd_lock);
   2005 				/*
   2006 				 * Send the M_FLUSH only
   2007 				 * for the first M_ERROR
   2008 				 * message on the stream
   2009 				 */
   2010 				if (flushed_already == rw) {
   2011 					freemsg(bp);
   2012 					return (0);
   2013 				}
   2014 
   2015 				bp->b_datap->db_type = M_FLUSH;
   2016 				*bp->b_rptr = rw;
   2017 				bp->b_wptr = bp->b_rptr + 1;
   2018 				/*
   2019 				 * Protect against the driver
   2020 				 * passing up messages after
   2021 				 * it has done a qprocsoff
   2022 				 */
   2023 				if (_OTHERQ(q)->q_next == NULL)
   2024 					freemsg(bp);
   2025 				else
   2026 					qreply(q, bp);
   2027 				return (0);
   2028 			} else
   2029 				mutex_exit(&stp->sd_lock);
   2030 		} else if (*bp->b_rptr != 0) {		/* Old flavor */
   2031 				if (stp->sd_flag & (STRDERR|STWRERR))
   2032 					flushed_already = FLUSHRW;
   2033 				mutex_enter(&stp->sd_lock);
   2034 				stp->sd_flag |= (STRDERR|STWRERR);
   2035 				stp->sd_rerror = *bp->b_rptr;
   2036 				stp->sd_werror = *bp->b_rptr;
   2037 				TRACE_2(TR_FAC_STREAMS_FR,
   2038 				    TR_STRRPUT_WAKE2,
   2039 				    "strrput wakeup #2:q %p, bp %p", q, bp);
   2040 				cv_broadcast(&q->q_wait); /* the readers */
   2041 				cv_broadcast(&_WR(q)->q_wait); /* the writers */
   2042 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
   2043 
   2044 				mutex_exit(&stp->sd_lock);
   2045 				pollwakeup(&stp->sd_pollist, POLLERR);
   2046 				mutex_enter(&stp->sd_lock);
   2047 
   2048 				if (stp->sd_sigflags & S_ERROR)
   2049 					strsendsig(stp->sd_siglist, S_ERROR, 0,
   2050 					    (stp->sd_werror ? stp->sd_werror :
   2051 					    stp->sd_rerror));
   2052 				mutex_exit(&stp->sd_lock);
   2053 
   2054 				/*
   2055 				 * Send the M_FLUSH only
   2056 				 * for the first M_ERROR
   2057 				 * message on the stream
   2058 				 */
   2059 				if (flushed_already != FLUSHRW) {
   2060 					bp->b_datap->db_type = M_FLUSH;
   2061 					*bp->b_rptr = FLUSHRW;
   2062 					/*
   2063 					 * Protect against the driver passing up
   2064 					 * messages after it has done a
   2065 					 * qprocsoff.
   2066 					 */
   2067 				if (_OTHERQ(q)->q_next == NULL)
   2068 					freemsg(bp);
   2069 				else
   2070 					qreply(q, bp);
   2071 				return (0);
   2072 				}
   2073 		}
   2074 		freemsg(bp);
   2075 		return (0);
   2076 
   2077 	case M_HANGUP:
   2078 
   2079 		freemsg(bp);
   2080 		mutex_enter(&stp->sd_lock);
   2081 		stp->sd_werror = ENXIO;
   2082 		stp->sd_flag |= STRHUP;
   2083 		stp->sd_flag &= ~(WSLEEP|RSLEEP);
   2084 
   2085 		/*
   2086 		 * send signal if controlling tty
   2087 		 */
   2088 
   2089 		if (stp->sd_sidp) {
   2090 			prsignal(stp->sd_sidp, SIGHUP);
   2091 			if (stp->sd_sidp != stp->sd_pgidp)
   2092 				pgsignal(stp->sd_pgidp, SIGTSTP);
   2093 		}
   2094 
   2095 		/*
   2096 		 * wake up read, write, and exception pollers and
   2097 		 * reset wakeup mechanism.
   2098 		 */
   2099 		cv_broadcast(&q->q_wait);	/* the readers */
   2100 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
   2101 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
   2102 		strhup(stp);
   2103 		mutex_exit(&stp->sd_lock);
   2104 		return (0);
   2105 
   2106 	case M_UNHANGUP:
   2107 		freemsg(bp);
   2108 		mutex_enter(&stp->sd_lock);
   2109 		stp->sd_werror = 0;
   2110 		stp->sd_flag &= ~STRHUP;
   2111 		mutex_exit(&stp->sd_lock);
   2112 		return (0);
   2113 
   2114 	case M_SIG:
   2115 		/*
   2116 		 * Someone downstream wants to post a signal.  The
   2117 		 * signal to post is contained in the first byte of the
   2118 		 * message.  If the message would go on the front of
   2119 		 * the queue, send a signal to the process group
   2120 		 * (if not SIGPOLL) or to the siglist processes
   2121 		 * (SIGPOLL).  If something is already on the queue,
   2122 		 * OR if we are delivering a delayed suspend (*sigh*
   2123 		 * another "tty" hack) and there's no one sleeping already,
   2124 		 * just enqueue the message.
   2125 		 */
   2126 		mutex_enter(&stp->sd_lock);
   2127 		if (q->q_first || (*bp->b_rptr == SIGTSTP &&
   2128 		    !(stp->sd_flag & RSLEEP))) {
   2129 			(void) putq(q, bp);
   2130 			mutex_exit(&stp->sd_lock);
   2131 			return (0);
   2132 		}
   2133 		mutex_exit(&stp->sd_lock);
   2134 		/* FALLTHRU */
   2135 
   2136 	case M_PCSIG:
   2137 		/*
   2138 		 * Don't enqueue, just post the signal.
   2139 		 */
   2140 		strsignal(stp, *bp->b_rptr, 0L);
   2141 		freemsg(bp);
   2142 		return (0);
   2143 
   2144 	case M_CMD:
   2145 		if (MBLKL(bp) != sizeof (cmdblk_t)) {
   2146 			freemsg(bp);
   2147 			return (0);
   2148 		}
   2149 
   2150 		mutex_enter(&stp->sd_lock);
   2151 		if (stp->sd_flag & STRCMDWAIT) {
   2152 			ASSERT(stp->sd_cmdblk == NULL);
   2153 			stp->sd_cmdblk = bp;
   2154 			cv_broadcast(&stp->sd_monitor);
   2155 			mutex_exit(&stp->sd_lock);
   2156 		} else {
   2157 			mutex_exit(&stp->sd_lock);
   2158 			freemsg(bp);
   2159 		}
   2160 		return (0);
   2161 
   2162 	case M_FLUSH:
   2163 		/*
   2164 		 * Flush queues.  The indication of which queues to flush
   2165 		 * is in the first byte of the message.  If the read queue
   2166 		 * is specified, then flush it.  If FLUSHBAND is set, just
   2167 		 * flush the band specified by the second byte of the message.
   2168 		 *
   2169 		 * If a module has issued a M_SETOPT to not flush hi
   2170 		 * priority messages off of the stream head, then pass this
   2171 		 * flag into the flushq code to preserve such messages.
   2172 		 */
   2173 
   2174 		if (*bp->b_rptr & FLUSHR) {
   2175 			mutex_enter(&stp->sd_lock);
   2176 			if (*bp->b_rptr & FLUSHBAND) {
   2177 				ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
   2178 				flushband(q, *(bp->b_rptr + 1), FLUSHALL);
   2179 			} else
   2180 				flushq_common(q, FLUSHALL,
   2181 				    stp->sd_read_opt & RFLUSHPCPROT);
   2182 			if ((q->q_first == NULL) ||
   2183 			    (q->q_first->b_datap->db_type < QPCTL))
   2184 				stp->sd_flag &= ~STRPRI;
   2185 			else {
   2186 				ASSERT(stp->sd_flag & STRPRI);
   2187 			}
   2188 			mutex_exit(&stp->sd_lock);
   2189 		}
   2190 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
   2191 			*bp->b_rptr &= ~FLUSHR;
   2192 			bp->b_flag |= MSGNOLOOP;
   2193 			/*
   2194 			 * Protect against the driver passing up
   2195 			 * messages after it has done a qprocsoff.
   2196 			 */
   2197 			if (_OTHERQ(q)->q_next == NULL)
   2198 				freemsg(bp);
   2199 			else
   2200 				qreply(q, bp);
   2201 			return (0);
   2202 		}
   2203 		freemsg(bp);
   2204 		return (0);
   2205 
   2206 	case M_IOCACK:
   2207 	case M_IOCNAK:
   2208 		iocbp = (struct iocblk *)bp->b_rptr;
   2209 		/*
   2210 		 * If not waiting for ACK or NAK then just free msg.
   2211 		 * If incorrect id sequence number then just free msg.
   2212 		 * If already have ACK or NAK for user then this is a
   2213 		 *    duplicate, display a warning and free the msg.
   2214 		 */
   2215 		mutex_enter(&stp->sd_lock);
   2216 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
   2217 		    (stp->sd_iocid != iocbp->ioc_id)) {
   2218 			/*
   2219 			 * If the ACK/NAK is a dup, display a message
   2220 			 * Dup is when sd_iocid == ioc_id, and
   2221 			 * sd_iocblk == <valid ptr> or -1 (the former
   2222 			 * is when an ioctl has been put on the stream
   2223 			 * head, but has not yet been consumed, the
   2224 			 * later is when it has been consumed).
   2225 			 */
   2226 			if ((stp->sd_iocid == iocbp->ioc_id) &&
   2227 			    (stp->sd_iocblk != NULL)) {
   2228 				log_dupioc(q, bp);
   2229 			}
   2230 			freemsg(bp);
   2231 			mutex_exit(&stp->sd_lock);
   2232 			return (0);
   2233 		}
   2234 
   2235 		/*
   2236 		 * Assign ACK or NAK to user and wake up.
   2237 		 */
   2238 		stp->sd_iocblk = bp;
   2239 		cv_broadcast(&stp->sd_monitor);
   2240 		mutex_exit(&stp->sd_lock);
   2241 		return (0);
   2242 
   2243 	case M_COPYIN:
   2244 	case M_COPYOUT:
   2245 		reqp = (struct copyreq *)bp->b_rptr;
   2246 
   2247 		/*
   2248 		 * If not waiting for ACK or NAK then just fail request.
   2249 		 * If already have ACK, NAK, or copy request, then just
   2250 		 * fail request.
   2251 		 * If incorrect id sequence number then just fail request.
   2252 		 */
   2253 		mutex_enter(&stp->sd_lock);
   2254 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
   2255 		    (stp->sd_iocid != reqp->cq_id)) {
   2256 			if (bp->b_cont) {
   2257 				freemsg(bp->b_cont);
   2258 				bp->b_cont = NULL;
   2259 			}
   2260 			bp->b_datap->db_type = M_IOCDATA;
   2261 			bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
   2262 			resp = (struct copyresp *)bp->b_rptr;
   2263 			resp->cp_rval = (caddr_t)1;	/* failure */
   2264 			mutex_exit(&stp->sd_lock);
   2265 			putnext(stp->sd_wrq, bp);
   2266 			return (0);
   2267 		}
   2268 
   2269 		/*
   2270 		 * Assign copy request to user and wake up.
   2271 		 */
   2272 		stp->sd_iocblk = bp;
   2273 		cv_broadcast(&stp->sd_monitor);
   2274 		mutex_exit(&stp->sd_lock);
   2275 		return (0);
   2276 
   2277 	case M_SETOPTS:
   2278 		/*
   2279 		 * Set stream head options (read option, write offset,
   2280 		 * min/max packet size, and/or high/low water marks for
   2281 		 * the read side only).
   2282 		 */
   2283 
   2284 		bpri = 0;
   2285 		sop = (struct stroptions *)bp->b_rptr;
   2286 		mutex_enter(&stp->sd_lock);
   2287 		if (sop->so_flags & SO_READOPT) {
   2288 			switch (sop->so_readopt & RMODEMASK) {
   2289 			case RNORM:
   2290 				stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
   2291 				break;
   2292 
   2293 			case RMSGD:
   2294 				stp->sd_read_opt =
   2295 				    ((stp->sd_read_opt & ~RD_MSGNODIS) |
   2296 				    RD_MSGDIS);
   2297 				break;
   2298 
   2299 			case RMSGN:
   2300 				stp->sd_read_opt =
   2301 				    ((stp->sd_read_opt & ~RD_MSGDIS) |
   2302 				    RD_MSGNODIS);
   2303 				break;
   2304 			}
   2305 			switch (sop->so_readopt & RPROTMASK) {
   2306 			case RPROTNORM:
   2307 				stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
   2308 				break;
   2309 
   2310 			case RPROTDAT:
   2311 				stp->sd_read_opt =
   2312 				    ((stp->sd_read_opt & ~RD_PROTDIS) |
   2313 				    RD_PROTDAT);
   2314 				break;
   2315 
   2316 			case RPROTDIS:
   2317 				stp->sd_read_opt =
   2318 				    ((stp->sd_read_opt & ~RD_PROTDAT) |
   2319 				    RD_PROTDIS);
   2320 				break;
   2321 			}
   2322 			switch (sop->so_readopt & RFLUSHMASK) {
   2323 			case RFLUSHPCPROT:
   2324 				/*
   2325 				 * This sets the stream head to NOT flush
   2326 				 * M_PCPROTO messages.
   2327 				 */
   2328 				stp->sd_read_opt |= RFLUSHPCPROT;
   2329 				break;
   2330 			}
   2331 		}
   2332 		if (sop->so_flags & SO_ERROPT) {
   2333 			switch (sop->so_erropt & RERRMASK) {
   2334 			case RERRNORM:
   2335 				stp->sd_flag &= ~STRDERRNONPERSIST;
   2336 				break;
   2337 			case RERRNONPERSIST:
   2338 				stp->sd_flag |= STRDERRNONPERSIST;
   2339 				break;
   2340 			}
   2341 			switch (sop->so_erropt & WERRMASK) {
   2342 			case WERRNORM:
   2343 				stp->sd_flag &= ~STWRERRNONPERSIST;
   2344 				break;
   2345 			case WERRNONPERSIST:
   2346 				stp->sd_flag |= STWRERRNONPERSIST;
   2347 				break;
   2348 			}
   2349 		}
   2350 		if (sop->so_flags & SO_COPYOPT) {
   2351 			if (sop->so_copyopt & ZCVMSAFE) {
   2352 				stp->sd_copyflag |= STZCVMSAFE;
   2353 				stp->sd_copyflag &= ~STZCVMUNSAFE;
   2354 			} else if (sop->so_copyopt & ZCVMUNSAFE) {
   2355 				stp->sd_copyflag |= STZCVMUNSAFE;
   2356 				stp->sd_copyflag &= ~STZCVMSAFE;
   2357 			}
   2358 
   2359 			if (sop->so_copyopt & COPYCACHED) {
   2360 				stp->sd_copyflag |= STRCOPYCACHED;
   2361 			}
   2362 		}
   2363 		if (sop->so_flags & SO_WROFF)
   2364 			stp->sd_wroff = sop->so_wroff;
   2365 		if (sop->so_flags & SO_TAIL)
   2366 			stp->sd_tail = sop->so_tail;
   2367 		if (sop->so_flags & SO_MINPSZ)
   2368 			q->q_minpsz = sop->so_minpsz;
   2369 		if (sop->so_flags & SO_MAXPSZ)
   2370 			q->q_maxpsz = sop->so_maxpsz;
   2371 		if (sop->so_flags & SO_MAXBLK)
   2372 			stp->sd_maxblk = sop->so_maxblk;
   2373 		if (sop->so_flags & SO_HIWAT) {
   2374 			if (sop->so_flags & SO_BAND) {
   2375 				if (strqset(q, QHIWAT,
   2376 				    sop->so_band, sop->so_hiwat)) {
   2377 					cmn_err(CE_WARN, "strrput: could not "
   2378 					    "allocate qband\n");
   2379 				} else {
   2380 					bpri = sop->so_band;
   2381 				}
   2382 			} else {
   2383 				q->q_hiwat = sop->so_hiwat;
   2384 			}
   2385 		}
   2386 		if (sop->so_flags & SO_LOWAT) {
   2387 			if (sop->so_flags & SO_BAND) {
   2388 				if (strqset(q, QLOWAT,
   2389 				    sop->so_band, sop->so_lowat)) {
   2390 					cmn_err(CE_WARN, "strrput: could not "
   2391 					    "allocate qband\n");
   2392 				} else {
   2393 					bpri = sop->so_band;
   2394 				}
   2395 			} else {
   2396 				q->q_lowat = sop->so_lowat;
   2397 			}
   2398 		}
   2399 		if (sop->so_flags & SO_MREADON)
   2400 			stp->sd_flag |= SNDMREAD;
   2401 		if (sop->so_flags & SO_MREADOFF)
   2402 			stp->sd_flag &= ~SNDMREAD;
   2403 		if (sop->so_flags & SO_NDELON)
   2404 			stp->sd_flag |= OLDNDELAY;
   2405 		if (sop->so_flags & SO_NDELOFF)
   2406 			stp->sd_flag &= ~OLDNDELAY;
   2407 		if (sop->so_flags & SO_ISTTY)
   2408 			stp->sd_flag |= STRISTTY;
   2409 		if (sop->so_flags & SO_ISNTTY)
   2410 			stp->sd_flag &= ~STRISTTY;
   2411 		if (sop->so_flags & SO_TOSTOP)
   2412 			stp->sd_flag |= STRTOSTOP;
   2413 		if (sop->so_flags & SO_TONSTOP)
   2414 			stp->sd_flag &= ~STRTOSTOP;
   2415 		if (sop->so_flags & SO_DELIM)
   2416 			stp->sd_flag |= STRDELIM;
   2417 		if (sop->so_flags & SO_NODELIM)
   2418 			stp->sd_flag &= ~STRDELIM;
   2419 
   2420 		mutex_exit(&stp->sd_lock);
   2421 		freemsg(bp);
   2422 
   2423 		/* Check backenable in case the water marks changed */
   2424 		qbackenable(q, bpri);
   2425 		return (0);
   2426 
   2427 	/*
   2428 	 * The following set of cases deal with situations where two stream
   2429 	 * heads are connected to each other (twisted streams).  These messages
   2430 	 * have no meaning at the stream head.
   2431 	 */
   2432 	case M_BREAK:
   2433 	case M_CTL:
   2434 	case M_DELAY:
   2435 	case M_START:
   2436 	case M_STOP:
   2437 	case M_IOCDATA:
   2438 	case M_STARTI:
   2439 	case M_STOPI:
   2440 		freemsg(bp);
   2441 		return (0);
   2442 
   2443 	case M_IOCTL:
   2444 		/*
   2445 		 * Always NAK this condition
   2446 		 * (makes no sense)
   2447 		 * If there is one or more threads in the read side
   2448 		 * rwnext we have to defer the nacking until that thread
   2449 		 * returns (in strget).
   2450 		 */
   2451 		mutex_enter(&stp->sd_lock);
   2452 		if (stp->sd_struiodnak != 0) {
   2453 			/*
   2454 			 * Defer NAK to the streamhead. Queue at the end
   2455 			 * the list.
   2456 			 */
   2457 			mblk_t *mp = stp->sd_struionak;
   2458 
   2459 			while (mp && mp->b_next)
   2460 				mp = mp->b_next;
   2461 			if (mp)
   2462 				mp->b_next = bp;
   2463 			else
   2464 				stp->sd_struionak = bp;
   2465 			bp->b_next = NULL;
   2466 			mutex_exit(&stp->sd_lock);
   2467 			return (0);
   2468 		}
   2469 		mutex_exit(&stp->sd_lock);
   2470 
   2471 		bp->b_datap->db_type = M_IOCNAK;
   2472 		/*
   2473 		 * Protect against the driver passing up
   2474 		 * messages after it has done a qprocsoff.
   2475 		 */
   2476 		if (_OTHERQ(q)->q_next == NULL)
   2477 			freemsg(bp);
   2478 		else
   2479 			qreply(q, bp);
   2480 		return (0);
   2481 
   2482 	default:
   2483 #ifdef DEBUG
   2484 		cmn_err(CE_WARN,
   2485 		    "bad message type %x received at stream head\n",
   2486 		    bp->b_datap->db_type);
   2487 #endif
   2488 		freemsg(bp);
   2489 		return (0);
   2490 	}
   2491 
   2492 	/* NOTREACHED */
   2493 }
   2494 
   2495 /*
   2496  * Check if the stream pointed to by `stp' can be written to, and return an
   2497  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
   2498  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
   2499  * then always return EPIPE and send a SIGPIPE to the invoking thread.
   2500  */
   2501 static int
   2502 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
   2503 {
   2504 	int error;
   2505 
   2506 	ASSERT(MUTEX_HELD(&stp->sd_lock));
   2507 
   2508 	/*
   2509 	 * For modem support, POSIX states that on writes, EIO should
   2510 	 * be returned if the stream has been hung up.
   2511 	 */
   2512 	if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
   2513 		error = EIO;
   2514 	else
   2515 		error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
   2516 
   2517 	if (error != 0) {
   2518 		if (!(stp->sd_flag & STPLEX) &&
   2519 		    (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
   2520 			tsignal(curthread, SIGPIPE);
   2521 			error = EPIPE;
   2522 		}
   2523 	}
   2524 
   2525 	return (error);
   2526 }
   2527 
   2528 /*
   2529  * Copyin and send data down a stream.
   2530  * The caller will allocate and copyin any control part that precedes the
   2531  * message and pass that in as mctl.
   2532  *
   2533  * Caller should *not* hold sd_lock.
   2534  * When EWOULDBLOCK is returned the caller has to redo the canputnext
   2535  * under sd_lock in order to avoid missing a backenabling wakeup.
   2536  *
   2537  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
   2538  *
   2539  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
   2540  * For sync streams we can only ignore flow control by reverting to using
   2541  * putnext.
   2542  *
   2543  * If sd_maxblk is less than *iosize this routine might return without
   2544  * transferring all of *iosize. In all cases, on return *iosize will contain
   2545  * the amount of data that was transferred.
   2546  */
   2547 static int
   2548 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
   2549     int b_flag, int pri, int flags)
   2550 {
   2551 	struiod_t uiod;
   2552 	mblk_t *mp;
   2553 	queue_t *wqp = stp->sd_wrq;
   2554 	int error = 0;
   2555 	ssize_t count = *iosize;
   2556 
   2557 	ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
   2558 
   2559 	if (uiop != NULL && count >= 0)
   2560 		flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
   2561 
   2562 	if (!(flags & STRUIO_POSTPONE)) {
   2563 		/*
   2564 		 * Use regular canputnext, strmakedata, putnext sequence.
   2565 		 */
   2566 		if (pri == 0) {
   2567 			if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
   2568 				freemsg(mctl);
   2569 				return (EWOULDBLOCK);
   2570 			}
   2571 		} else {
   2572 			if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
   2573 				freemsg(mctl);
   2574 				return (EWOULDBLOCK);
   2575 			}
   2576 		}
   2577 
   2578 		if ((error = strmakedata(iosize, uiop, stp, flags,
   2579 		    &mp)) != 0) {
   2580 			freemsg(mctl);
   2581 			/*
   2582 			 * need to change return code to ENOMEM
   2583 			 * so that this is not confused with
   2584 			 * flow control, EAGAIN.
   2585 			 */
   2586 
   2587 			if (error == EAGAIN)
   2588 				return (ENOMEM);
   2589 			else
   2590 				return (error);
   2591 		}
   2592 		if (mctl != NULL) {
   2593 			if (mctl->b_cont == NULL)
   2594 				mctl->b_cont = mp;
   2595 			else if (mp != NULL)
   2596 				linkb(mctl, mp);
   2597 			mp = mctl;
   2598 		} else if (mp == NULL)
   2599 			return (0);
   2600 
   2601 		mp->b_flag |= b_flag;
   2602 		mp->b_band = (uchar_t)pri;
   2603 
   2604 		if (flags & MSG_IGNFLOW) {
   2605 			/*
   2606 			 * XXX Hack: Don't get stuck running service
   2607 			 * procedures. This is needed for sockfs when
   2608 			 * sending the unbind message out of the rput
   2609 			 * procedure - we don't want a put procedure
   2610 			 * to run service procedures.
   2611 			 */
   2612 			putnext(wqp, mp);
   2613 		} else {
   2614 			stream_willservice(stp);
   2615 			putnext(wqp, mp);
   2616 			stream_runservice(stp);
   2617 		}
   2618 		return (0);
   2619 	}
   2620 	/*
   2621 	 * Stream supports rwnext() for the write side.
   2622 	 */
   2623 	if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
   2624 		freemsg(mctl);
   2625 		/*
   2626 		 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
   2627 		 */
   2628 		return (error == EAGAIN ? ENOMEM : error);
   2629 	}
   2630 	if (mctl != NULL) {
   2631 		if (mctl->b_cont == NULL)
   2632 			mctl->b_cont = mp;
   2633 		else if (mp != NULL)
   2634 			linkb(mctl, mp);
   2635 		mp = mctl;
   2636 	} else if (mp == NULL) {
   2637 		return (0);
   2638 	}
   2639 
   2640 	mp->b_flag |= b_flag;
   2641 	mp->b_band = (uchar_t)pri;
   2642 
   2643 	(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
   2644 	    sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
   2645 	uiod.d_uio.uio_offset = 0;
   2646 	uiod.d_mp = mp;
   2647 	error = rwnext(wqp, &uiod);
   2648 	if (! uiod.d_mp) {
   2649 		uioskip(uiop, *iosize);
   2650 		return (error);
   2651 	}
   2652 	ASSERT(mp == uiod.d_mp);
   2653 	if (error == EINVAL) {
   2654 		/*
   2655 		 * The stream plumbing must have changed while
   2656 		 * we were away, so just turn off rwnext()s.
   2657 		 */
   2658 		error = 0;
   2659 	} else if (error == EBUSY || error == EWOULDBLOCK) {
   2660 		/*
   2661 		 * Couldn't enter a perimeter or took a page fault,
   2662 		 * so fall-back to putnext().
   2663 		 */
   2664 		error = 0;
   2665 	} else {
   2666 		freemsg(mp);
   2667 		return (error);
   2668 	}
   2669 	/* Have to check canput before consuming data from the uio */
   2670 	if (pri == 0) {
   2671 		if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
   2672 			freemsg(mp);
   2673 			return (EWOULDBLOCK);
   2674 		}
   2675 	} else {
   2676 		if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
   2677 			freemsg(mp);
   2678 			return (EWOULDBLOCK);
   2679 		}
   2680 	}
   2681 	ASSERT(mp == uiod.d_mp);
   2682 	/* Copyin data from the uio */
   2683 	if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
   2684 		freemsg(mp);
   2685 		return (error);
   2686 	}
   2687 	uioskip(uiop, *iosize);
   2688 	if (flags & MSG_IGNFLOW) {
   2689 		/*
   2690 		 * XXX Hack: Don't get stuck running service procedures.
   2691 		 * This is needed for sockfs when sending the unbind message
   2692 		 * out of the rput procedure - we don't want a put procedure
   2693 		 * to run service procedures.
   2694 		 */
   2695 		putnext(wqp, mp);
   2696 	} else {
   2697 		stream_willservice(stp);
   2698 		putnext(wqp, mp);
   2699 		stream_runservice(stp);
   2700 	}
   2701 	return (0);
   2702 }
   2703 
   2704 /*
   2705  * Write attempts to break the write request into messages conforming
   2706  * with the minimum and maximum packet sizes set downstream.
   2707  *
   2708  * Write will not block if downstream queue is full and
   2709  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
   2710  *
   2711  * A write of zero bytes gets packaged into a zero length message and sent
   2712  * downstream like any other message.
   2713  *
   2714  * If buffers of the requested sizes are not available, the write will
   2715  * sleep until the buffers become available.
   2716  *
   2717  * Write (if specified) will supply a write offset in a message if it
   2718  * makes sense. This can be specified by downstream modules as part of
   2719  * a M_SETOPTS message.  Write will not supply the write offset if it
   2720  * cannot supply any data in a buffer.  In other words, write will never
   2721  * send down an empty packet due to a write offset.
   2722  */
   2723 /* ARGSUSED2 */
   2724 int
   2725 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
   2726 {
   2727 	return (strwrite_common(vp, uiop, crp, 0));
   2728 }
   2729 
   2730 /* ARGSUSED2 */
   2731 int
   2732 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
   2733 {
   2734 	struct stdata *stp;
   2735 	struct queue *wqp;
   2736 	ssize_t rmin, rmax;
   2737 	ssize_t iosize;
   2738 	int waitflag;
   2739 	int tempmode;
   2740 	int error = 0;
   2741 	int b_flag;
   2742 
   2743 	ASSERT(vp->v_stream);
   2744 	stp = vp->v_stream;
   2745 
   2746 	mutex_enter(&stp->sd_lock);
   2747 
   2748 	if ((error = i_straccess(stp, JCWRITE)) != 0) {
   2749 		mutex_exit(&stp->sd_lock);
   2750 		return (error);
   2751 	}
   2752 
   2753 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
   2754 		error = strwriteable(stp, B_TRUE, B_TRUE);
   2755 		if (error != 0) {
   2756 			mutex_exit(&stp->sd_lock);
   2757 			return (error);
   2758 		}
   2759 	}
   2760 
   2761 	mutex_exit(&stp->sd_lock);
   2762 
   2763 	wqp = stp->sd_wrq;
   2764 
   2765 	/* get these values from them cached in the stream head */
   2766 	rmin = stp->sd_qn_minpsz;
   2767 	rmax = stp->sd_qn_maxpsz;
   2768 
   2769 	/*
   2770 	 * Check the min/max packet size constraints.  If min packet size
   2771 	 * is non-zero, the write cannot be split into multiple messages
   2772 	 * and still guarantee the size constraints.
   2773 	 */
   2774 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
   2775 
   2776 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
   2777 	if (rmax == 0) {
   2778 		return (0);
   2779 	}
   2780 	if (rmin > 0) {
   2781 		if (uiop->uio_resid < rmin) {
   2782 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
   2783 			    "strwrite out:q %p out %d error %d",
   2784 			    wqp, 0, ERANGE);
   2785 			return (ERANGE);
   2786 		}
   2787 		if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
   2788 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
   2789 			    "strwrite out:q %p out %d error %d",
   2790 			    wqp, 1, ERANGE);
   2791 			return (ERANGE);
   2792 		}
   2793 	}
   2794 
   2795 	/*
   2796 	 * Do until count satisfied or error.
   2797 	 */
   2798 	waitflag = WRITEWAIT | wflag;
   2799 	if (stp->sd_flag & OLDNDELAY)
   2800 		tempmode = uiop->uio_fmode & ~FNDELAY;
   2801 	else
   2802 		tempmode = uiop->uio_fmode;
   2803 
   2804 	if (rmax == INFPSZ)
   2805 		rmax = uiop->uio_resid;
   2806 
   2807 	/*
   2808 	 * Note that tempmode does not get used in strput/strmakedata
   2809 	 * but only in strwaitq. The other routines use uio_fmode
   2810 	 * unmodified.
   2811 	 */
   2812 
   2813 	/* LINTED: constant in conditional context */
   2814 	while (1) {	/* breaks when uio_resid reaches zero */
   2815 		/*
   2816 		 * Determine the size of the next message to be
   2817 		 * packaged.  May have to break write into several
   2818 		 * messages based on max packet size.
   2819 		 */
   2820 		iosize = MIN(uiop->uio_resid, rmax);
   2821 
   2822 		/*
   2823 		 * Put block downstream when flow control allows it.
   2824 		 */
   2825 		if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
   2826 			b_flag = MSGDELIM;
   2827 		else
   2828 			b_flag = 0;
   2829 
   2830 		for (;;) {
   2831 			int done = 0;
   2832 
   2833 			error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0);
   2834 			if (error == 0)
   2835 				break;
   2836 			if (error != EWOULDBLOCK)
   2837 				goto out;
   2838 
   2839 			mutex_enter(&stp->sd_lock);
   2840 			/*
   2841 			 * Check for a missed wakeup.
   2842 			 * Needed since strput did not hold sd_lock across
   2843 			 * the canputnext.
   2844 			 */
   2845 			if (canputnext(wqp)) {
   2846 				/* Try again */
   2847 				mutex_exit(&stp->sd_lock);
   2848 				continue;
   2849 			}
   2850 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
   2851 			    "strwrite wait:q %p wait", wqp);
   2852 			if ((error = strwaitq(stp, waitflag, (ssize_t)0,
   2853 			    tempmode, -1, &done)) != 0 || done) {
   2854 				mutex_exit(&stp->sd_lock);
   2855 				if ((vp->v_type == VFIFO) &&
   2856 				    (uiop->uio_fmode & FNDELAY) &&
   2857 				    (error == EAGAIN))
   2858 					error = 0;
   2859 				goto out;
   2860 			}
   2861 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
   2862 			    "strwrite wake:q %p awakes", wqp);
   2863 			if ((error = i_straccess(stp, JCWRITE)) != 0) {
   2864 				mutex_exit(&stp->sd_lock);
   2865 				goto out;
   2866 			}
   2867 			mutex_exit(&stp->sd_lock);
   2868 		}
   2869 		waitflag |= NOINTR;
   2870 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
   2871 		    "strwrite resid:q %p uiop %p", wqp, uiop);
   2872 		if (uiop->uio_resid) {
   2873 			/* Recheck for errors - needed for sockets */
   2874 			if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
   2875 			    (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
   2876 				mutex_enter(&stp->sd_lock);
   2877 				error = strwriteable(stp, B_FALSE, B_TRUE);
   2878 				mutex_exit(&stp->sd_lock);
   2879 				if (error != 0)
   2880 					return (error);
   2881 			}
   2882 			continue;
   2883 		}
   2884 		break;
   2885 	}
   2886 out:
   2887 	/*
   2888 	 * For historical reasons, applications expect EAGAIN when a data
   2889 	 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
   2890 	 */
   2891 	if (error == ENOMEM)
   2892 		error = EAGAIN;
   2893 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
   2894 	    "strwrite out:q %p out %d error %d", wqp, 2, error);
   2895 	return (error);
   2896 }
   2897 
   2898 /*
   2899  * Stream head write service routine.
   2900  * Its job is to wake up any sleeping writers when a queue
   2901  * downstream needs data (part of the flow control in putq and getq).
   2902  * It also must wake anyone sleeping on a poll().
   2903  * For stream head right below mux module, it must also invoke put procedure
   2904  * of next downstream module.
   2905  */
   2906 int
   2907 strwsrv(queue_t *q)
   2908 {
   2909 	struct stdata *stp;
   2910 	queue_t *tq;
   2911 	qband_t *qbp;
   2912 	int i;
   2913 	qband_t *myqbp;
   2914 	int isevent;
   2915 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
   2916 
   2917 	TRACE_1(TR_FAC_STREAMS_FR,
   2918 	    TR_STRWSRV, "strwsrv:q %p", q);
   2919 	stp = (struct stdata *)q->q_ptr;
   2920 	ASSERT(qclaimed(q));
   2921 	mutex_enter(&stp->sd_lock);
   2922 	ASSERT(!(stp->sd_flag & STPLEX));
   2923 
   2924 	if (stp->sd_flag & WSLEEP) {
   2925 		stp->sd_flag &= ~WSLEEP;
   2926 		cv_broadcast(&q->q_wait);
   2927 	}
   2928 	mutex_exit(&stp->sd_lock);
   2929 
   2930 	/* The other end of a stream pipe went away. */
   2931 	if ((tq = q->q_next) == NULL) {
   2932 		return (0);
   2933 	}
   2934 
   2935 	/* Find the next module forward that has a service procedure */
   2936 	claimstr(q);
   2937 	tq = q->q_nfsrv;
   2938 	ASSERT(tq != NULL);
   2939 
   2940 	if ((q->q_flag & QBACK)) {
   2941 		if ((tq->q_flag & QFULL)) {
   2942 			mutex_enter(QLOCK(tq));
   2943 			if (!(tq->q_flag & QFULL)) {
   2944 				mutex_exit(QLOCK(tq));
   2945 				goto wakeup;
   2946 			}
   2947 			/*
   2948 			 * The queue must have become full again. Set QWANTW
   2949 			 * again so strwsrv will be back enabled when
   2950 			 * the queue becomes non-full next time.
   2951 			 */
   2952 			tq->q_flag |= QWANTW;
   2953 			mutex_exit(QLOCK(tq));
   2954 		} else {
   2955 		wakeup:
   2956 			pollwakeup(&stp->sd_pollist, POLLWRNORM);
   2957 			mutex_enter(&stp->sd_lock);
   2958 			if (stp->sd_sigflags & S_WRNORM)
   2959 				strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
   2960 			mutex_exit(&stp->sd_lock);
   2961 		}
   2962 	}
   2963 
   2964 	isevent = 0;
   2965 	i = 1;
   2966 	bzero((caddr_t)qbf, NBAND);
   2967 	mutex_enter(QLOCK(tq));
   2968 	if ((myqbp = q->q_bandp) != NULL)
   2969 		for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
   2970 			ASSERT(myqbp);
   2971 			if ((myqbp->qb_flag & QB_BACK)) {
   2972 				if (qbp->qb_flag & QB_FULL) {
   2973 					/*
   2974 					 * The band must have become full again.
   2975 					 * Set QB_WANTW again so strwsrv will
   2976 					 * be back enabled when the band becomes
   2977 					 * non-full next time.
   2978 					 */
   2979 					qbp->qb_flag |= QB_WANTW;
   2980 				} else {
   2981 					isevent = 1;
   2982 					qbf[i] = 1;
   2983 				}
   2984 			}
   2985 			myqbp = myqbp->qb_next;
   2986 			i++;
   2987 		}
   2988 	mutex_exit(QLOCK(tq));
   2989 
   2990 	if (isevent) {
   2991 		for (i = tq->q_nband; i; i--) {
   2992 			if (qbf[i]) {
   2993 				pollwakeup(&stp->sd_pollist, POLLWRBAND);
   2994 				mutex_enter(&stp->sd_lock);
   2995 				if (stp->sd_sigflags & S_WRBAND)
   2996 					strsendsig(stp->sd_siglist, S_WRBAND,
   2997 					    (uchar_t)i, 0);
   2998 				mutex_exit(&stp->sd_lock);
   2999 			}
   3000 		}
   3001 	}
   3002 
   3003 	releasestr(q);
   3004 	return (0);
   3005 }
   3006 
   3007 /*
   3008  * Special case of strcopyin/strcopyout for copying
   3009  * struct strioctl that can deal with both data
   3010  * models.
   3011  */
   3012 
   3013 #ifdef	_LP64
   3014 
   3015 static int
   3016 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
   3017 {
   3018 	struct	strioctl32 strioc32;
   3019 	struct	strioctl *striocp;
   3020 
   3021 	if (copyflag & U_TO_K) {
   3022 		ASSERT((copyflag & K_TO_K) == 0);
   3023 
   3024 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
   3025 			if (copyin(from, &strioc32, sizeof (strioc32)))
   3026 				return (EFAULT);
   3027 
   3028 			striocp = (struct strioctl *)to;
   3029 			striocp->ic_cmd	= strioc32.ic_cmd;
   3030 			striocp->ic_timout = strioc32.ic_timout;
   3031 			striocp->ic_len	= strioc32.ic_len;
   3032 			striocp->ic_dp	= (char *)(uintptr_t)strioc32.ic_dp;
   3033 
   3034 		} else { /* NATIVE data model */
   3035 			if (copyin(from, to, sizeof (struct strioctl))) {
   3036 				return (EFAULT);
   3037 			} else {
   3038 				return (0);
   3039 			}
   3040 		}
   3041 	} else {
   3042 		ASSERT(copyflag & K_TO_K);
   3043 		bcopy(from, to, sizeof (struct strioctl));
   3044 	}
   3045 	return (0);
   3046 }
   3047 
   3048 static int
   3049 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
   3050 {
   3051 	struct	strioctl32 strioc32;
   3052 	struct	strioctl *striocp;
   3053 
   3054 	if (copyflag & U_TO_K) {
   3055 		ASSERT((copyflag & K_TO_K) == 0);
   3056 
   3057 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
   3058 			striocp = (struct strioctl *)from;
   3059 			strioc32.ic_cmd	= striocp->ic_cmd;
   3060 			strioc32.ic_timout = striocp->ic_timout;
   3061 			strioc32.ic_len	= striocp->ic_len;
   3062 			strioc32.ic_dp	= (caddr32_t)(uintptr_t)striocp->ic_dp;
   3063 			ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
   3064 			    striocp->ic_dp);
   3065 
   3066 			if (copyout(&strioc32, to, sizeof (strioc32)))
   3067 				return (EFAULT);
   3068 
   3069 		} else { /* NATIVE data model */
   3070 			if (copyout(from, to, sizeof (struct strioctl))) {
   3071 				return (EFAULT);
   3072 			} else {
   3073 				return (0);
   3074 			}
   3075 		}
   3076 	} else {
   3077 		ASSERT(copyflag & K_TO_K);
   3078 		bcopy(from, to, sizeof (struct strioctl));
   3079 	}
   3080 	return (0);
   3081 }
   3082 
   3083 #else	/* ! _LP64 */
   3084 
   3085 /* ARGSUSED2 */
   3086 static int
   3087 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
   3088 {
   3089 	return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
   3090 }
   3091 
   3092 /* ARGSUSED2 */
   3093 static int
   3094 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
   3095 {
   3096 	return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
   3097 }
   3098 
   3099 #endif	/* _LP64 */
   3100 
   3101 /*
   3102  * Determine type of job control semantics expected by user.  The
   3103  * possibilities are:
   3104  *	JCREAD	- Behaves like read() on fd; send SIGTTIN
   3105  *	JCWRITE	- Behaves like write() on fd; send SIGTTOU if TOSTOP set
   3106  *	JCSETP	- Sets a value in the stream; send SIGTTOU, ignore TOSTOP
   3107  *	JCGETP	- Gets a value in the stream; no signals.
   3108  * See straccess in strsubr.c for usage of these values.
   3109  *
   3110  * This routine also returns -1 for I_STR as a special case; the
   3111  * caller must call again with the real ioctl number for
   3112  * classification.
   3113  */
   3114 static int
   3115 job_control_type(int cmd)
   3116 {
   3117 	switch (cmd) {
   3118 	case I_STR:
   3119 		return (-1);
   3120 
   3121 	case I_RECVFD:
   3122 	case I_E_RECVFD:
   3123 		return (JCREAD);
   3124 
   3125 	case I_FDINSERT:
   3126 	case I_SENDFD:
   3127 		return (JCWRITE);
   3128 
   3129 	case TCSETA:
   3130 	case TCSETAW:
   3131 	case TCSETAF:
   3132 	case TCSBRK:
   3133 	case TCXONC:
   3134 	case TCFLSH:
   3135 	case TCDSET:	/* Obsolete */
   3136 	case TIOCSWINSZ:
   3137 	case TCSETS:
   3138 	case TCSETSW:
   3139 	case TCSETSF:
   3140 	case TIOCSETD:
   3141 	case TIOCHPCL:
   3142 	case TIOCSETP:
   3143 	case TIOCSETN:
   3144 	case TIOCEXCL:
   3145 	case TIOCNXCL:
   3146 	case TIOCFLUSH:
   3147 	case TIOCSETC:
   3148 	case TIOCLBIS:
   3149 	case TIOCLBIC:
   3150 	case TIOCLSET:
   3151 	case TIOCSBRK:
   3152 	case TIOCCBRK:
   3153 	case TIOCSDTR:
   3154 	case TIOCCDTR:
   3155 	case TIOCSLTC:
   3156 	case TIOCSTOP:
   3157 	case TIOCSTART:
   3158 	case TIOCSTI:
   3159 	case TIOCSPGRP:
   3160 	case TIOCMSET:
   3161 	case TIOCMBIS:
   3162 	case TIOCMBIC:
   3163 	case TIOCREMOTE:
   3164 	case TIOCSIGNAL:
   3165 	case LDSETT:
   3166 	case LDSMAP:	/* Obsolete */
   3167 	case DIOCSETP:
   3168 	case I_FLUSH:
   3169 	case I_SRDOPT:
   3170 	case I_SETSIG:
   3171 	case I_SWROPT:
   3172 	case I_FLUSHBAND:
   3173 	case I_SETCLTIME:
   3174 	case I_SERROPT:
   3175 	case I_ESETSIG:
   3176 	case FIONBIO:
   3177 	case FIOASYNC:
   3178 	case FIOSETOWN:
   3179 	case JBOOT:	/* Obsolete */
   3180 	case JTERM:	/* Obsolete */
   3181 	case JTIMOM:	/* Obsolete */
   3182 	case JZOMBOOT:	/* Obsolete */
   3183 	case JAGENT:	/* Obsolete */
   3184 	case JTRUN:	/* Obsolete */
   3185 	case JXTPROTO:	/* Obsolete */
   3186 	case TIOCSETLD:
   3187 		return (JCSETP);
   3188 	}
   3189 
   3190 	return (JCGETP);
   3191 }
   3192 
   3193 /*
   3194  * ioctl for streams
   3195  */
   3196 int
   3197 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
   3198     cred_t *crp, int *rvalp)
   3199 {
   3200 	struct stdata *stp;
   3201 	struct strcmd *scp;
   3202 	struct strioctl strioc;
   3203 	struct uio uio;
   3204 	struct iovec iov;
   3205 	int access;
   3206 	mblk_t *mp;
   3207 	int error = 0;
   3208 	int done = 0;
   3209 	ssize_t	rmin, rmax;
   3210 	queue_t *wrq;
   3211 	queue_t *rdq;
   3212 	boolean_t kioctl = B_FALSE;
   3213 
   3214 	if (flag & FKIOCTL) {
   3215 		copyflag = K_TO_K;
   3216 		kioctl = B_TRUE;
   3217 	}
   3218 	ASSERT(vp->v_stream);
   3219 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
   3220 	stp = vp->v_stream;
   3221 
   3222 	TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
   3223 	    "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
   3224 
   3225 	if (audit_active)
   3226 		audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp);
   3227 
   3228 	/*
   3229 	 * If the copy is kernel to kernel, make sure that the FNATIVE
   3230 	 * flag is set.  After this it would be a serious error to have
   3231 	 * no model flag.
   3232 	 */
   3233 	if (copyflag == K_TO_K)
   3234 		flag = (flag & ~FMODELS) | FNATIVE;
   3235 
   3236 	ASSERT((flag & FMODELS) != 0);
   3237 
   3238 	wrq = stp->sd_wrq;
   3239 	rdq = _RD(wrq);
   3240 
   3241 	access = job_control_type(cmd);
   3242 
   3243 	/* We should never see these here, should be handled by iwscn */
   3244 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
   3245 		return (EINVAL);
   3246 
   3247 	mutex_enter(&stp->sd_lock);
   3248 	if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
   3249 		mutex_exit(&stp->sd_lock);
   3250 		return (error);
   3251 	}
   3252 	mutex_exit(&stp->sd_lock);
   3253 
   3254 	/*
   3255 	 * Check for sgttyb-related ioctls first, and complain as
   3256 	 * necessary.
   3257 	 */
   3258 	switch (cmd) {
   3259 	case TIOCGETP:
   3260 	case TIOCSETP:
   3261 	case TIOCSETN:
   3262 		if (sgttyb_handling >= 2 && !sgttyb_complaint) {
   3263 			sgttyb_complaint = B_TRUE;
   3264 			cmn_err(CE_NOTE,
   3265 			    "application used obsolete TIOC[GS]ET");
   3266 		}
   3267 		if (sgttyb_handling >= 3) {
   3268 			tsignal(curthread, SIGSYS);
   3269 			return (EIO);
   3270 		}
   3271 		break;
   3272 	}
   3273 
   3274 	mutex_enter(&stp->sd_lock);
   3275 
   3276 	switch (cmd) {
   3277 	case I_RECVFD:
   3278 	case I_E_RECVFD:
   3279 	case I_PEEK:
   3280 	case I_NREAD:
   3281 	case FIONREAD:
   3282 	case FIORDCHK:
   3283 	case I_ATMARK:
   3284 	case FIONBIO:
   3285 	case FIOASYNC:
   3286 		if (stp->sd_flag & (STRDERR|STPLEX)) {
   3287 			error = strgeterr(stp, STRDERR|STPLEX, 0);
   3288 			if (error != 0) {
   3289 				mutex_exit(&stp->sd_lock);
   3290 				return (error);
   3291 			}
   3292 		}
   3293 		break;
   3294 
   3295 	default:
   3296 		if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
   3297 			error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
   3298 			if (error != 0) {
   3299 				mutex_exit(&stp->sd_lock);
   3300 				return (error);
   3301 			}
   3302 		}
   3303 	}
   3304 
   3305 	mutex_exit(&stp->sd_lock);
   3306 
   3307 	switch (cmd) {
   3308 	default:
   3309 		/*
   3310 		 * The stream head has hardcoded knowledge of a
   3311 		 * miscellaneous collection of terminal-, keyboard- and
   3312 		 * mouse-related ioctls, enumerated below.  This hardcoded
   3313 		 * knowledge allows the stream head to automatically
   3314 		 * convert transparent ioctl requests made by userland
   3315 		 * programs into I_STR ioctls which many old STREAMS
   3316 		 * modules and drivers require.
   3317 		 *
   3318 		 * No new ioctls should ever be added to this list.
   3319 		 * Instead, the STREAMS module or driver should be written
   3320 		 * to either handle transparent ioctls or require any
   3321 		 * userland programs to use I_STR ioctls (by returning
   3322 		 * EINVAL to any transparent ioctl requests).
   3323 		 *
   3324 		 * More importantly, removing ioctls from this list should
   3325 		 * be done with the utmost care, since our STREAMS modules
   3326 		 * and drivers *count* on the stream head performing this
   3327 		 * conversion, and thus may panic while processing
   3328 		 * transparent ioctl request for one of these ioctls (keep
   3329 		 * in mind that third party modules and drivers may have
   3330 		 * similar problems).
   3331 		 */
   3332 		if (((cmd & IOCTYPE) == LDIOC) ||
   3333 		    ((cmd & IOCTYPE) == tIOC) ||
   3334 		    ((cmd & IOCTYPE) == TIOC) ||
   3335 		    ((cmd & IOCTYPE) == KIOC) ||
   3336 		    ((cmd & IOCTYPE) == MSIOC) ||
   3337 		    ((cmd & IOCTYPE) == VUIOC)) {
   3338 			/*
   3339 			 * The ioctl is a tty ioctl - set up strioc buffer
   3340 			 * and call strdoioctl() to do the work.
   3341 			 */
   3342 			if (stp->sd_flag & STRHUP)
   3343 				return (ENXIO);
   3344 			strioc.ic_cmd = cmd;
   3345 			strioc.ic_timout = INFTIM;
   3346 
   3347 			switch (cmd) {
   3348 
   3349 			case TCXONC:
   3350 			case TCSBRK:
   3351 			case TCFLSH:
   3352 			case TCDSET:
   3353 				{
   3354 				int native_arg = (int)arg;
   3355 				strioc.ic_len = sizeof (int);
   3356 				strioc.ic_dp = (char *)&native_arg;
   3357 				return (strdoioctl(stp, &strioc, flag,
   3358 				    K_TO_K, crp, rvalp));
   3359 				}
   3360 
   3361 			case TCSETA:
   3362 			case TCSETAW:
   3363 			case TCSETAF:
   3364 				strioc.ic_len = sizeof (struct termio);
   3365 				strioc.ic_dp = (char *)arg;
   3366 				return (strdoioctl(stp, &strioc, flag,
   3367 				    copyflag, crp, rvalp));
   3368 
   3369 			case TCSETS:
   3370 			case TCSETSW:
   3371 			case TCSETSF:
   3372 				strioc.ic_len = sizeof (struct termios);
   3373 				strioc.ic_dp = (char *)arg;
   3374 				return (strdoioctl(stp, &strioc, flag,
   3375 				    copyflag, crp, rvalp));
   3376 
   3377 			case LDSETT:
   3378 				strioc.ic_len = sizeof (struct termcb);
   3379 				strioc.ic_dp = (char *)arg;
   3380 				return (strdoioctl(stp, &strioc, flag,
   3381 				    copyflag, crp, rvalp));
   3382 
   3383 			case TIOCSETP:
   3384 				strioc.ic_len = sizeof (struct sgttyb);
   3385 				strioc.ic_dp = (char *)arg;
   3386 				return (strdoioctl(stp, &strioc, flag,
   3387 				    copyflag, crp, rvalp));
   3388 
   3389 			case TIOCSTI:
   3390 				if ((flag & FREAD) == 0 &&
   3391 				    secpolicy_sti(crp) != 0) {
   3392 					return (EPERM);
   3393 				}
   3394 				mutex_enter(&stp->sd_lock);
   3395 				mutex_enter(&curproc->p_splock);
   3396 				if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
   3397 				    secpolicy_sti(crp) != 0) {
   3398 					mutex_exit(&curproc->p_splock);
   3399 					mutex_exit(&stp->sd_lock);
   3400 					return (EACCES);
   3401 				}
   3402 				mutex_exit(&curproc->p_splock);
   3403 				mutex_exit(&stp->sd_lock);
   3404 
   3405 				strioc.ic_len = sizeof (char);
   3406 				strioc.ic_dp = (char *)arg;
   3407 				return (strdoioctl(stp, &strioc, flag,
   3408 				    copyflag, crp, rvalp));
   3409 
   3410 			case TIOCSWINSZ:
   3411 				strioc.ic_len = sizeof (struct winsize);
   3412 				strioc.ic_dp = (char *)arg;
   3413 				return (strdoioctl(stp, &strioc, flag,
   3414 				    copyflag, crp, rvalp));
   3415 
   3416 			case TIOCSSIZE:
   3417 				strioc.ic_len = sizeof (struct ttysize);
   3418 				strioc.ic_dp = (char *)arg;
   3419 				return (strdoioctl(stp, &strioc, flag,
   3420 				    copyflag, crp, rvalp));
   3421 
   3422 			case TIOCSSOFTCAR:
   3423 			case KIOCTRANS:
   3424 			case KIOCTRANSABLE:
   3425 			case KIOCCMD:
   3426 			case KIOCSDIRECT:
   3427 			case KIOCSCOMPAT:
   3428 			case KIOCSKABORTEN:
   3429 			case KIOCSRPTDELAY:
   3430 			case KIOCSRPTRATE:
   3431 			case VUIDSFORMAT:
   3432 			case TIOCSPPS:
   3433 				strioc.ic_len = sizeof (int);
   3434 				strioc.ic_dp = (char *)arg;
   3435 				return (strdoioctl(stp, &strioc, flag,
   3436 				    copyflag, crp, rvalp));
   3437 
   3438 			case KIOCSETKEY:
   3439 			case KIOCGETKEY:
   3440 				strioc.ic_len = sizeof (struct kiockey);
   3441 				strioc.ic_dp = (char *)arg;
   3442 				return (strdoioctl(stp, &strioc, flag,
   3443 				    copyflag, crp, rvalp));
   3444 
   3445 			case KIOCSKEY:
   3446 			case KIOCGKEY:
   3447 				strioc.ic_len = sizeof (struct kiockeymap);
   3448 				strioc.ic_dp = (char *)arg;
   3449 				return (strdoioctl(stp, &strioc, flag,
   3450 				    copyflag, crp, rvalp));
   3451 
   3452 			case KIOCSLED:
   3453 				/* arg is a pointer to char */
   3454 				strioc.ic_len = sizeof (char);
   3455 				strioc.ic_dp = (char *)arg;
   3456 				return (strdoioctl(stp, &strioc, flag,
   3457 				    copyflag, crp, rvalp));
   3458 
   3459 			case MSIOSETPARMS:
   3460 				strioc.ic_len = sizeof (Ms_parms);
   3461 				strioc.ic_dp = (char *)arg;
   3462 				return (strdoioctl(stp, &strioc, flag,
   3463 				    copyflag, crp, rvalp));
   3464 
   3465 			case VUIDSADDR:
   3466 			case VUIDGADDR:
   3467 				strioc.ic_len = sizeof (struct vuid_addr_probe);
   3468 				strioc.ic_dp = (char *)arg;
   3469 				return (strdoioctl(stp, &strioc, flag,
   3470 				    copyflag, crp, rvalp));
   3471 
   3472 			/*
   3473 			 * These M_IOCTL's don't require any data to be sent
   3474 			 * downstream, and the driver will allocate and link
   3475 			 * on its own mblk_t upon M_IOCACK -- thus we set
   3476 			 * ic_len to zero and set ic_dp to arg so we know
   3477 			 * where to copyout to later.
   3478 			 */
   3479 			case TIOCGSOFTCAR:
   3480 			case TIOCGWINSZ:
   3481 			case TIOCGSIZE:
   3482 			case KIOCGTRANS:
   3483 			case KIOCGTRANSABLE:
   3484 			case KIOCTYPE:
   3485 			case KIOCGDIRECT:
   3486 			case KIOCGCOMPAT:
   3487 			case KIOCLAYOUT:
   3488 			case KIOCGLED:
   3489 			case MSIOGETPARMS:
   3490 			case MSIOBUTTONS:
   3491 			case VUIDGFORMAT:
   3492 			case TIOCGPPS:
   3493 			case TIOCGPPSEV:
   3494 			case TCGETA:
   3495 			case TCGETS:
   3496 			case LDGETT:
   3497 			case TIOCGETP:
   3498 			case KIOCGRPTDELAY:
   3499 			case KIOCGRPTRATE:
   3500 				strioc.ic_len = 0;
   3501 				strioc.ic_dp = (char *)arg;
   3502 				return (strdoioctl(stp, &strioc, flag,
   3503 				    copyflag, crp, rvalp));
   3504 			}
   3505 		}
   3506 
   3507 		/*
   3508 		 * Unknown cmd - send it down as a transparent ioctl.
   3509 		 */
   3510 		strioc.ic_cmd = cmd;
   3511 		strioc.ic_timout = INFTIM;
   3512 		strioc.ic_len = TRANSPARENT;
   3513 		strioc.ic_dp = (char *)&arg;
   3514 
   3515 		return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
   3516 
   3517 	case I_STR:
   3518 		/*
   3519 		 * Stream ioctl.  Read in an strioctl buffer from the user
   3520 		 * along with any data specified and send it downstream.
   3521 		 * Strdoioctl will wait allow only one ioctl message at
   3522 		 * a time, and waits for the acknowledgement.
   3523 		 */
   3524 
   3525 		if (stp->sd_flag & STRHUP)
   3526 			return (ENXIO);
   3527 
   3528 		error = strcopyin_strioctl((void *)arg, &strioc, flag,
   3529 		    copyflag);
   3530 		if (error != 0)
   3531 			return (error);
   3532 
   3533 		if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
   3534 			return (EINVAL);
   3535 
   3536 		access = job_control_type(strioc.ic_cmd);
   3537 		mutex_enter(&stp->sd_lock);
   3538 		if ((access != -1) &&
   3539 		    ((error = i_straccess(stp, access)) != 0)) {
   3540 			mutex_exit(&stp->sd_lock);
   3541 			return (error);
   3542 		}
   3543 		mutex_exit(&stp->sd_lock);
   3544 
   3545 		/*
   3546 		 * The I_STR facility provides a trap door for malicious
   3547 		 * code to send down bogus streamio(7I) ioctl commands to
   3548 		 * unsuspecting STREAMS modules and drivers which expect to
   3549 		 * only get these messages from the stream head.
   3550 		 * Explicitly prohibit any streamio ioctls which can be
   3551 		 * passed downstream by the stream head.  Note that we do
   3552 		 * not block all streamio ioctls because the ioctl
   3553 		 * numberspace is not well managed and thus it's possible
   3554 		 * that a module or driver's ioctl numbers may accidentally
   3555 		 * collide with them.
   3556 		 */
   3557 		switch (strioc.ic_cmd) {
   3558 		case I_LINK:
   3559 		case I_PLINK:
   3560 		case I_UNLINK:
   3561 		case I_PUNLINK:
   3562 		case _I_GETPEERCRED:
   3563 		case _I_PLINK_LH:
   3564 			return (EINVAL);
   3565 		}
   3566 
   3567 		error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
   3568 		if (error == 0) {
   3569 			error = strcopyout_strioctl(&strioc, (void *)arg,
   3570 			    flag, copyflag);
   3571 		}
   3572 		return (error);
   3573 
   3574 	case _I_CMD:
   3575 		/*
   3576 		 * Like I_STR, but without using M_IOC* messages and without
   3577 		 * copyins/copyouts beyond the passed-in argument.
   3578 		 */
   3579 		if (stp->sd_flag & STRHUP)
   3580 			return (ENXIO);
   3581 
   3582 		if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL)
   3583 			return (ENOMEM);
   3584 
   3585 		if (copyin((void *)arg, scp, sizeof (strcmd_t))) {
   3586 			kmem_free(scp, sizeof (strcmd_t));
   3587 			return (EFAULT);
   3588 		}
   3589 
   3590 		access = job_control_type(scp->sc_cmd);
   3591 		mutex_enter(&stp->sd_lock);
   3592 		if (access != -1 && (error = i_straccess(stp, access)) != 0) {
   3593 			mutex_exit(&stp->sd_lock);
   3594 			kmem_free(scp, sizeof (strcmd_t));
   3595 			return (error);
   3596 		}
   3597 		mutex_exit(&stp->sd_lock);
   3598 
   3599 		*rvalp = 0;
   3600 		if ((error = strdocmd(stp, scp, crp)) == 0) {
   3601 			if (copyout(scp, (void *)arg, sizeof (strcmd_t)))
   3602 				error = EFAULT;
   3603 		}
   3604 		kmem_free(scp, sizeof (strcmd_t));
   3605 		return (error);
   3606 
   3607 	case I_NREAD:
   3608 		/*
   3609 		 * Return number of bytes of data in first message
   3610 		 * in queue in "arg" and return the number of messages
   3611 		 * in queue in return value.
   3612 		 */
   3613 	{
   3614 		size_t	size;
   3615 		int	retval;
   3616 		int	count = 0;
   3617 
   3618 		mutex_enter(QLOCK(rdq));
   3619 
   3620 		size = msgdsize(rdq->q_first);
   3621 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
   3622 			count++;
   3623 
   3624 		mutex_exit(QLOCK(rdq));
   3625 		if (stp->sd_struiordq) {
   3626 			infod_t infod;
   3627 
   3628 			infod.d_cmd = INFOD_COUNT;
   3629 			infod.d_count = 0;
   3630 			if (count == 0) {
   3631 				infod.d_cmd |= INFOD_FIRSTBYTES;
   3632 				infod.d_bytes = 0;
   3633 			}
   3634 			infod.d_res = 0;
   3635 			(void) infonext(rdq, &infod);
   3636 			count += infod.d_count;
   3637 			if (infod.d_res & INFOD_FIRSTBYTES)
   3638 				size = infod.d_bytes;
   3639 		}
   3640 
   3641 		/*
   3642 		 * Drop down from size_t to the "int" required by the
   3643 		 * interface.  Cap at INT_MAX.
   3644 		 */
   3645 		retval = MIN(size, INT_MAX);
   3646 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
   3647 		    copyflag);
   3648 		if (!error)
   3649 			*rvalp = count;
   3650 		return (error);
   3651 	}
   3652 
   3653 	case FIONREAD:
   3654 		/*
   3655 		 * Return number of bytes of data in all data messages
   3656 		 * in queue in "arg".
   3657 		 */
   3658 	{
   3659 		size_t	size = 0;
   3660 		int	retval;
   3661 
   3662 		mutex_enter(QLOCK(rdq));
   3663 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
   3664 			size += msgdsize(mp);
   3665 		mutex_exit(QLOCK(rdq));
   3666 
   3667 		if (stp->sd_struiordq) {
   3668 			infod_t infod;
   3669 
   3670 			infod.d_cmd = INFOD_BYTES;
   3671 			infod.d_res = 0;
   3672 			infod.d_bytes = 0;
   3673 			(void) infonext(rdq, &infod);
   3674 			size += infod.d_bytes;
   3675 		}
   3676 
   3677 		/*
   3678 		 * Drop down from size_t to the "int" required by the
   3679 		 * interface.  Cap at INT_MAX.
   3680 		 */
   3681 		retval = MIN(size, INT_MAX);
   3682 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
   3683 		    copyflag);
   3684 
   3685 		*rvalp = 0;
   3686 		return (error);
   3687 	}
   3688 	case FIORDCHK:
   3689 		/*
   3690 		 * FIORDCHK does not use arg value (like FIONREAD),
   3691 		 * instead a count is returned. I_NREAD value may
   3692 		 * not be accurate but safe. The real thing to do is
   3693 		 * to add the msgdsizes of all data  messages until
   3694 		 * a non-data message.
   3695 		 */
   3696 	{
   3697 		size_t size = 0;
   3698 
   3699 		mutex_enter(QLOCK(rdq));
   3700 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
   3701 			size += msgdsize(mp);
   3702 		mutex_exit(QLOCK(rdq));
   3703 
   3704 		if (stp->sd_struiordq) {
   3705 			infod_t infod;
   3706 
   3707 			infod.d_cmd = INFOD_BYTES;
   3708 			infod.d_res = 0;
   3709 			infod.d_bytes = 0;
   3710 			(void) infonext(rdq, &infod);
   3711 			size += infod.d_bytes;
   3712 		}
   3713 
   3714 		/*
   3715 		 * Since ioctl returns an int, and memory sizes under
   3716 		 * LP64 may not fit, we return INT_MAX if the count was
   3717 		 * actually greater.
   3718 		 */
   3719 		*rvalp = MIN(size, INT_MAX);
   3720 		return (0);
   3721 	}
   3722 
   3723 	case I_FIND:
   3724 		/*
   3725 		 * Get module name.
   3726 		 */
   3727 	{
   3728 		char mname[FMNAMESZ + 1];
   3729 		queue_t *q;
   3730 
   3731 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
   3732 		    mname, FMNAMESZ + 1, NULL);
   3733 		if (error)
   3734 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
   3735 
   3736 		/*
   3737 		 * Return EINVAL if we're handed a bogus module name.
   3738 		 */
   3739 		if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
   3740 			TRACE_0(TR_FAC_STREAMS_FR,
   3741 			    TR_I_CANT_FIND, "couldn't I_FIND");
   3742 			return (EINVAL);
   3743 		}
   3744 
   3745 		*rvalp = 0;
   3746 
   3747 		/* Look downstream to see if module is there. */
   3748 		claimstr(stp->sd_wrq);
   3749 		for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
   3750 			if (q->q_flag & QREADR) {
   3751 				q = NULL;
   3752 				break;
   3753 			}
   3754 			if (strcmp(mname, Q2NAME(q)) == 0)
   3755 				break;
   3756 		}
   3757 		releasestr(stp->sd_wrq);
   3758 
   3759 		*rvalp = (q ? 1 : 0);
   3760 		return (error);
   3761 	}
   3762 
   3763 	case I_PUSH:
   3764 	case __I_PUSH_NOCTTY:
   3765 		/*
   3766 		 * Push a module.
   3767 		 * For the case __I_PUSH_NOCTTY push a module but
   3768 		 * do not allocate controlling tty. See bugid 4025044
   3769 		 */
   3770 
   3771 	{
   3772 		char mname[FMNAMESZ + 1];
   3773 		fmodsw_impl_t *fp;
   3774 		dev_t dummydev;
   3775 
   3776 		if (stp->sd_flag & STRHUP)
   3777 			return (ENXIO);
   3778 
   3779 		/*
   3780 		 * Get module name and look up in fmodsw.
   3781 		 */
   3782 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
   3783 		    mname, FMNAMESZ + 1, NULL);
   3784 		if (error)
   3785 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
   3786 
   3787 		if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
   3788 		    NULL)
   3789 			return (EINVAL);
   3790 
   3791 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
   3792 		    "I_PUSH:fp %p stp %p", fp, stp);
   3793 
   3794 		if (error = strstartplumb(stp, flag, cmd)) {
   3795 			fmodsw_rele(fp);
   3796 			return (error);
   3797 		}
   3798 
   3799 		/*
   3800 		 * See if any more modules can be pushed on this stream.
   3801 		 * Note that this check must be done after strstartplumb()
   3802 		 * since otherwise multiple threads issuing I_PUSHes on
   3803 		 * the same stream will be able to exceed nstrpush.
   3804 		 */
   3805 		mutex_enter(&stp->sd_lock);
   3806 		if (stp->sd_pushcnt >= nstrpush) {
   3807 			fmodsw_rele(fp);
   3808 			strendplumb(stp);
   3809 			mutex_exit(&stp->sd_lock);
   3810 			return (EINVAL);
   3811 		}
   3812 		mutex_exit(&stp->sd_lock);
   3813 
   3814 		/*
   3815 		 * Push new module and call its open routine
   3816 		 * via qattach().  Modules don't change device
   3817 		 * numbers, so just ignore dummydev here.
   3818 		 */
   3819 		dummydev = vp->v_rdev;
   3820 		if ((error = qattach(rdq, &dummydev, 0, crp, fp,
   3821 		    B_FALSE)) == 0) {
   3822 			if (vp->v_type == VCHR && /* sorry, no pipes allowed */
   3823 			    (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
   3824 				/*
   3825 				 * try to allocate it as a controlling terminal
   3826 				 */
   3827 				(void) strctty(stp);
   3828 			}
   3829 		}
   3830 
   3831 		mutex_enter(&stp->sd_lock);
   3832 
   3833 		/*
   3834 		 * As a performance concern we are caching the values of
   3835 		 * q_minpsz and q_maxpsz of the module below the stream
   3836 		 * head in the stream head.
   3837 		 */
   3838 		mutex_enter(QLOCK(stp->sd_wrq->q_next));
   3839 		rmin = stp->sd_wrq->q_next->q_minpsz;
   3840 		rmax = stp->sd_wrq->q_next->q_maxpsz;
   3841 		mutex_exit(QLOCK(stp->sd_wrq->q_next));
   3842 
   3843 		/* Do this processing here as a performance concern */
   3844 		if (strmsgsz != 0) {
   3845 			if (rmax == INFPSZ)
   3846 				rmax = strmsgsz;
   3847 			else  {
   3848 				if (vp->v_type == VFIFO)
   3849 					rmax = MIN(PIPE_BUF, rmax);
   3850 				else	rmax = MIN(strmsgsz, rmax);
   3851 			}
   3852 		}
   3853 
   3854 		mutex_enter(QLOCK(wrq));
   3855 		stp->sd_qn_minpsz = rmin;
   3856 		stp->sd_qn_maxpsz = rmax;
   3857 		mutex_exit(QLOCK(wrq));
   3858 
   3859 		strendplumb(stp);
   3860 		mutex_exit(&stp->sd_lock);
   3861 		return (error);
   3862 	}
   3863 
   3864 	case I_POP:
   3865 	{
   3866 		queue_t	*q;
   3867 
   3868 		if (stp->sd_flag & STRHUP)
   3869 			return (ENXIO);
   3870 		if (!wrq->q_next)	/* for broken pipes */
   3871 			return (EINVAL);
   3872 
   3873 		if (error = strstartplumb(stp, flag, cmd))
   3874 			return (error);
   3875 
   3876 		/*
   3877 		 * If there is an anchor on this stream and popping
   3878 		 * the current module would attempt to pop through the
   3879 		 * anchor, then disallow the pop unless we have sufficient
   3880 		 * privileges; take the cheapest (non-locking) check
   3881 		 * first.
   3882 		 */
   3883 		if (secpolicy_ip_config(crp, B_TRUE) != 0 ||
   3884 		    (stp->sd_anchorzone != crgetzoneid(crp))) {
   3885 			mutex_enter(&stp->sd_lock);
   3886 			/*
   3887 			 * Anchors only apply if there's at least one
   3888 			 * module on the stream (sd_pushcnt > 0).
   3889 			 */
   3890 			if (stp->sd_pushcnt > 0 &&
   3891 			    stp->sd_pushcnt == stp->sd_anchor &&
   3892 			    stp->sd_vnode->v_type != VFIFO) {
   3893 				strendplumb(stp);
   3894 				mutex_exit(&stp->sd_lock);
   3895 				if (stp->sd_anchorzone != crgetzoneid(crp))
   3896 					return (EINVAL);
   3897 				/* Audit and report error */
   3898 				return (secpolicy_ip_config(crp, B_FALSE));
   3899 			}
   3900 			mutex_exit(&stp->sd_lock);
   3901 		}
   3902 
   3903 		q = wrq->q_next;
   3904 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
   3905 		    "I_POP:%p from %p", q, stp);
   3906 		if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
   3907 			error = EINVAL;
   3908 		} else {
   3909 			qdetach(_RD(q), 1, flag, crp, B_FALSE);
   3910 			error = 0;
   3911 		}
   3912 		mutex_enter(&stp->sd_lock);
   3913 
   3914 		/*
   3915 		 * As a performance concern we are caching the values of
   3916 		 * q_minpsz and q_maxpsz of the module below the stream
   3917 		 * head in the stream head.
   3918 		 */
   3919 		mutex_enter(QLOCK(wrq->q_next));
   3920 		rmin = wrq->q_next->q_minpsz;
   3921 		rmax = wrq->q_next->q_maxpsz;
   3922 		mutex_exit(QLOCK(wrq->q_next));
   3923 
   3924 		/* Do this processing here as a performance concern */
   3925 		if (strmsgsz != 0) {
   3926 			if (rmax == INFPSZ)
   3927 				rmax = strmsgsz;
   3928 			else  {
   3929 				if (vp->v_type == VFIFO)
   3930 					rmax = MIN(PIPE_BUF, rmax);
   3931 				else	rmax = MIN(strmsgsz, rmax);
   3932 			}
   3933 		}
   3934 
   3935 		mutex_enter(QLOCK(wrq));
   3936 		stp->sd_qn_minpsz = rmin;
   3937 		stp->sd_qn_maxpsz = rmax;
   3938 		mutex_exit(QLOCK(wrq));
   3939 
   3940 		/* If we popped through the anchor, then reset the anchor. */
   3941 		if (stp->sd_pushcnt < stp->sd_anchor) {
   3942 			stp->sd_anchor = 0;
   3943 			stp->sd_anchorzone = 0;
   3944 		}
   3945 		strendplumb(stp);
   3946 		mutex_exit(&stp->sd_lock);
   3947 		return (error);
   3948 	}
   3949 
   3950 	case _I_MUXID2FD:
   3951 	{
   3952 		/*
   3953 		 * Create a fd for a I_PLINK'ed lower stream with a given
   3954 		 * muxid.  With the fd, application can send down ioctls,
   3955 		 * like I_LIST, to the previously I_PLINK'ed stream.  Note
   3956 		 * that after getting the fd, the application has to do an
   3957 		 * I_PUNLINK on the muxid before it can do any operation
   3958 		 * on the lower stream.  This is required by spec1170.
   3959 		 *
   3960 		 * The fd used to do this ioctl should point to the same
   3961 		 * controlling device used to do the I_PLINK.  If it uses
   3962 		 * a different stream or an invalid muxid, I_MUXID2FD will
   3963 		 * fail.  The error code is set to EINVAL.
   3964 		 *
   3965 		 * The intended use of this interface is the following.
   3966 		 * An application I_PLINK'ed a stream and exits.  The fd
   3967 		 * to the lower stream is gone.  Another application
   3968 		 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
   3969 		 */
   3970 		int muxid = (int)arg;
   3971 		int fd;
   3972 		linkinfo_t *linkp;
   3973 		struct file *fp;
   3974 		netstack_t *ns;
   3975 		str_stack_t *ss;
   3976 
   3977 		/*
   3978 		 * Do not allow the wildcard muxid.  This ioctl is not
   3979 		 * intended to find arbitrary link.
   3980 		 */
   3981 		if (muxid == 0) {
   3982 			return (EINVAL);
   3983 		}
   3984 
   3985 		ns = netstack_find_by_cred(crp);
   3986 		ASSERT(ns != NULL);
   3987 		ss = ns->netstack_str;
   3988 		ASSERT(ss != NULL);
   3989 
   3990 		mutex_enter(&muxifier);
   3991 		linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss);
   3992 		if (linkp == NULL) {
   3993 			mutex_exit(&muxifier);
   3994 			netstack_rele(ss->ss_netstack);
   3995 			return (EINVAL);
   3996 		}
   3997 
   3998 		if ((fd = ufalloc(0)) == -1) {
   3999 			mutex_exit(&muxifier);
   4000 			netstack_rele(ss->ss_netstack);
   4001 			return (EMFILE);
   4002 		}
   4003 		fp = linkp->li_fpdown;
   4004 		mutex_enter(&fp->f_tlock);
   4005 		fp->f_count++;
   4006 		mutex_exit(&fp->f_tlock);
   4007 		mutex_exit(&muxifier);
   4008 		setf(fd, fp);
   4009 		*rvalp = fd;
   4010 		netstack_rele(ss->ss_netstack);
   4011 		return (0);
   4012 	}
   4013 
   4014 	case _I_INSERT:
   4015 	{
   4016 		/*
   4017 		 * To insert a module to a given position in a stream.
   4018 		 * In the first release, only allow privileged user
   4019 		 * to use this ioctl. Furthermore, the insert is only allowed
   4020 		 * below an anchor if the zoneid is the same as the zoneid
   4021 		 * which created the anchor.
   4022 		 *
   4023 		 * Note that we do not plan to support this ioctl
   4024 		 * on pipes in the first release.  We want to learn more
   4025 		 * about the implications of these ioctls before extending
   4026 		 * their support.  And we do not think these features are
   4027 		 * valuable for pipes.
   4028 		 */
   4029 		STRUCT_DECL(strmodconf, strmodinsert);
   4030 		char mod_name[FMNAMESZ + 1];
   4031 		fmodsw_impl_t *fp;
   4032 		dev_t dummydev;
   4033 		queue_t *tmp_wrq;
   4034 		int pos;
   4035 		boolean_t is_insert;
   4036 
   4037 		STRUCT_INIT(strmodinsert, flag);
   4038 		if (stp->sd_flag & STRHUP)
   4039 			return (ENXIO);
   4040 		if (STRMATED(stp))
   4041 			return (EINVAL);
   4042 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
   4043 			return (error);
   4044 		if (stp->sd_anchor != 0 &&
   4045 		    stp->sd_anchorzone != crgetzoneid(crp))
   4046 			return (EINVAL);
   4047 
   4048 		error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
   4049 		    STRUCT_SIZE(strmodinsert), copyflag);
   4050 		if (error)
   4051 			return (error);
   4052 
   4053 		/*
   4054 		 * Get module name and look up in fmodsw.
   4055 		 */
   4056 		error = (copyflag & U_TO_K ? copyinstr :
   4057 		    copystr)(STRUCT_FGETP(strmodinsert, mod_name),
   4058 		    mod_name, FMNAMESZ + 1, NULL);
   4059 		if (error)
   4060 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
   4061 
   4062 		if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
   4063 		    NULL)
   4064 			return (EINVAL);
   4065 
   4066 		if (error = strstartplumb(stp, flag, cmd)) {
   4067 			fmodsw_rele(fp);
   4068 			return (error);
   4069 		}
   4070 
   4071 		/*
   4072 		 * Is this _I_INSERT just like an I_PUSH?  We need to know
   4073 		 * this because we do some optimizations if this is a
   4074 		 * module being pushed.
   4075 		 */
   4076 		pos = STRUCT_FGET(strmodinsert, pos);
   4077 		is_insert = (pos != 0);
   4078 
   4079 		/*
   4080 		 * Make sure pos is valid.  Even though it is not an I_PUSH,
   4081 		 * we impose the same limit on the number of modules in a
   4082 		 * stream.
   4083 		 */
   4084 		mutex_enter(&stp->sd_lock);
   4085 		if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
   4086 		    pos > stp->sd_pushcnt) {
   4087 			fmodsw_rele(fp);
   4088 			strendplumb(stp);
   4089 			mutex_exit(&stp->sd_lock);
   4090 			return (EINVAL);
   4091 		}
   4092 		if (stp->sd_anchor != 0) {
   4093 			/*
   4094 			 * Is this insert below the anchor?
   4095 			 * Pushcnt hasn't been increased yet hence
   4096 			 * we test for greater than here, and greater or
   4097 			 * equal after qattach.
   4098 			 */
   4099 			if (pos > (stp->sd_pushcnt - stp->sd_anchor) &&
   4100 			    stp->sd_anchorzone != crgetzoneid(crp)) {
   4101 				fmodsw_rele(fp);
   4102 				strendplumb(stp);
   4103 				mutex_exit(&stp->sd_lock);
   4104 				return (EPERM);
   4105 			}
   4106 		}
   4107 
   4108 		mutex_exit(&stp->sd_lock);
   4109 
   4110 		/*
   4111 		 * First find the correct position this module to
   4112 		 * be inserted.  We don't need to call claimstr()
   4113 		 * as the stream should not be changing at this point.
   4114 		 *
   4115 		 * Insert new module and call its open routine
   4116 		 * via qattach().  Modules don't change device
   4117 		 * numbers, so just ignore dummydev here.
   4118 		 */
   4119 		for (tmp_wrq = stp->sd_wrq; pos > 0;
   4120 		    tmp_wrq = tmp_wrq->q_next, pos--) {
   4121 			ASSERT(SAMESTR(tmp_wrq));
   4122 		}
   4123 		dummydev = vp->v_rdev;
   4124 		if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
   4125 		    fp, is_insert)) != 0) {
   4126 			mutex_enter(&stp->sd_lock);
   4127 			strendplumb(stp);
   4128 			mutex_exit(&stp->sd_lock);
   4129 			return (error);
   4130 		}
   4131 
   4132 		mutex_enter(&stp->sd_lock);
   4133 
   4134 		/*
   4135 		 * As a performance concern we are caching the values of
   4136 		 * q_minpsz and q_maxpsz of the module below the stream
   4137 		 * head in the stream head.
   4138 		 */
   4139 		if (!is_insert) {
   4140 			mutex_enter(QLOCK(stp->sd_wrq->q_next));
   4141 			rmin = stp->sd_wrq->q_next->q_minpsz;
   4142 			rmax = stp->sd_wrq->q_next->q_maxpsz;
   4143 			mutex_exit(QLOCK(stp->sd_wrq->q_next));
   4144 
   4145 			/* Do this processing here as a performance concern */
   4146 			if (strmsgsz != 0) {
   4147 				if (rmax == INFPSZ) {
   4148 					rmax = strmsgsz;
   4149 				} else  {
   4150 					rmax = MIN(strmsgsz, rmax);
   4151 				}
   4152 			}
   4153 
   4154 			mutex_enter(QLOCK(wrq));
   4155 			stp->sd_qn_minpsz = rmin;
   4156 			stp->sd_qn_maxpsz = rmax;
   4157 			mutex_exit(QLOCK(wrq));
   4158 		}
   4159 
   4160 		/*
   4161 		 * Need to update the anchor value if this module is
   4162 		 * inserted below the anchor point.
   4163 		 */
   4164 		if (stp->sd_anchor != 0) {
   4165 			pos = STRUCT_FGET(strmodinsert, pos);
   4166 			if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
   4167 				stp->sd_anchor++;
   4168 		}
   4169 
   4170 		strendplumb(stp);
   4171 		mutex_exit(&stp->sd_lock);
   4172 		return (0);
   4173 	}
   4174 
   4175 	case _I_REMOVE:
   4176 	{
   4177 		/*
   4178 		 * To remove a module with a given name in a stream.  The
   4179 		 * caller of this ioctl needs to provide both the name and
   4180 		 * the position of the module to be removed.  This eliminates
   4181 		 * the ambiguity of removal if a module is inserted/pushed
   4182 		 * multiple times in a stream.  In the first release, only
   4183 		 * allow privileged user to use this ioctl.
   4184 		 * Furthermore, the remove is only allowed
   4185 		 * below an anchor if the zoneid is the same as the zoneid
   4186 		 * which created the anchor.
   4187 		 *
   4188 		 * Note that we do not plan to support this ioctl
   4189 		 * on pipes in the first release.  We want to learn more
   4190 		 * about the implications of these ioctls before extending
   4191 		 * their support.  And we do not think these features are
   4192 		 * valuable for pipes.
   4193 		 *
   4194 		 * Also note that _I_REMOVE cannot be used to remove a
   4195 		 * driver or the stream head.
   4196 		 */
   4197 		STRUCT_DECL(strmodconf, strmodremove);
   4198 		queue_t	*q;
   4199 		int pos;
   4200 		char mod_name[FMNAMESZ + 1];
   4201 		boolean_t is_remove;
   4202 
   4203 		STRUCT_INIT(strmodremove, flag);
   4204 		if (stp->sd_flag & STRHUP)
   4205 			return (ENXIO);
   4206 		if (STRMATED(stp))
   4207 			return (EINVAL);
   4208 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
   4209 			return (error);
   4210 		if (stp->sd_anchor != 0 &&
   4211 		    stp->sd_anchorzone != crgetzoneid(crp))
   4212 			return (EINVAL);
   4213 
   4214 		error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
   4215 		    STRUCT_SIZE(strmodremove), copyflag);
   4216 		if (error)
   4217 			return (error);
   4218 
   4219 		error = (copyflag & U_TO_K ? copyinstr :
   4220 		    copystr)(STRUCT_FGETP(strmodremove, mod_name),
   4221 		    mod_name, FMNAMESZ + 1, NULL);
   4222 		if (error)
   4223 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
   4224 
   4225 		if ((error = strstartplumb(stp, flag, cmd)) != 0)
   4226 			return (error);
   4227 
   4228 		/*
   4229 		 * Match the name of given module to the name of module at
   4230 		 * the given position.
   4231 		 */
   4232 		pos = STRUCT_FGET(strmodremove, pos);
   4233 
   4234 		is_remove = (pos != 0);
   4235 		for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
   4236 		    q = q->q_next, pos--)
   4237 			;
   4238 		if (pos > 0 || !SAMESTR(q) ||
   4239 		    strcmp(Q2NAME(q), mod_name) != 0) {
   4240 			mutex_enter(&stp->sd_lock);
   4241 			strendplumb(stp);
   4242 			mutex_exit(&stp->sd_lock);
   4243 			return (EINVAL);
   4244 		}
   4245 
   4246 		/*
   4247 		 * If the position is at or below an anchor, then the zoneid
   4248 		 * must match the zoneid that created the anchor.
   4249 		 */
   4250 		if (stp->sd_anchor != 0) {
   4251 			pos = STRUCT_FGET(strmodremove, pos);
   4252 			if (pos >= (stp->sd_pushcnt - stp->sd_anchor) &&
   4253 			    stp->sd_anchorzone != crgetzoneid(crp)) {
   4254 				mutex_enter(&stp->sd_lock);
   4255 				strendplumb(stp);
   4256 				mutex_exit(&stp->sd_lock);
   4257 				return (EPERM);
   4258 			}
   4259 		}
   4260 
   4261 
   4262 		ASSERT(!(q->q_flag & QREADR));
   4263 		qdetach(_RD(q), 1, flag, crp, is_remove);
   4264 
   4265 		mutex_enter(&stp->sd_lock);
   4266 
   4267 		/*
   4268 		 * As a performance concern we are caching the values of
   4269 		 * q_minpsz and q_maxpsz of the module below the stream
   4270 		 * head in the stream head.
   4271 		 */
   4272 		if (!is_remove) {
   4273 			mutex_enter(QLOCK(wrq->q_next));
   4274 			rmin = wrq->q_next->q_minpsz;
   4275 			rmax = wrq->q_next->q_maxpsz;
   4276 			mutex_exit(QLOCK(wrq->q_next));
   4277 
   4278 			/* Do this processing here as a performance concern */
   4279 			if (strmsgsz != 0) {
   4280 				if (rmax == INFPSZ)
   4281 					rmax = strmsgsz;
   4282 				else  {
   4283 					if (vp->v_type == VFIFO)
   4284 						rmax = MIN(PIPE_BUF, rmax);
   4285 					else	rmax = MIN(strmsgsz, rmax);
   4286 				}
   4287 			}
   4288 
   4289 			mutex_enter(QLOCK(wrq));
   4290 			stp->sd_qn_minpsz = rmin;
   4291 			stp->sd_qn_maxpsz = rmax;
   4292 			mutex_exit(QLOCK(wrq));
   4293 		}
   4294 
   4295 		/*
   4296 		 * Need to update the anchor value if this module is removed
   4297 		 * at or below the anchor point.  If the removed module is at
   4298 		 * the anchor point, remove the anchor for this stream if
   4299 		 * there is no module above the anchor point.  Otherwise, if
   4300 		 * the removed module is below the anchor point, decrement the
   4301 		 * anchor point by 1.
   4302 		 */
   4303 		if (stp->sd_anchor != 0) {
   4304 			pos = STRUCT_FGET(strmodremove, pos);
   4305 			if (pos == stp->sd_pushcnt - stp->sd_anchor + 1)
   4306 				stp->sd_anchor = 0;
   4307 			else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
   4308 				stp->sd_anchor--;
   4309 		}
   4310 
   4311 		strendplumb(stp);
   4312 		mutex_exit(&stp->sd_lock);
   4313 		return (0);
   4314 	}
   4315 
   4316 	case I_ANCHOR:
   4317 		/*
   4318 		 * Set the anchor position on the stream to reside at
   4319 		 * the top module (in other words, the top module
   4320 		 * cannot be popped).  Anchors with a FIFO make no
   4321 		 * obvious sense, so they're not allowed.
   4322 		 */
   4323 		mutex_enter(&stp->sd_lock);
   4324 
   4325 		if (stp->sd_vnode->v_type == VFIFO) {
   4326 			mutex_exit(&stp->sd_lock);
   4327 			return (EINVAL);
   4328 		}
   4329 		/* Only allow the same zoneid to update the anchor */
   4330 		if (stp->sd_anchor != 0 &&
   4331 		    stp->sd_anchorzone != crgetzoneid(crp)) {
   4332 			mutex_exit(&stp->sd_lock);
   4333 			return (EINVAL);
   4334 		}
   4335 		stp->sd_anchor = stp->sd_pushcnt;
   4336 		stp->sd_anchorzone = crgetzoneid(crp);
   4337 		mutex_exit(&stp->sd_lock);
   4338 		return (0);
   4339 
   4340 	case I_LOOK:
   4341 		/*
   4342 		 * Get name of first module downstream.
   4343 		 * If no module, return an error.
   4344 		 */
   4345 		claimstr(wrq);
   4346 		if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) {
   4347 			char *name = Q2NAME(wrq->q_next);
   4348 
   4349 			error = strcopyout(name, (void *)arg, strlen(name) + 1,
   4350 			    copyflag);
   4351 			releasestr(wrq);
   4352 			return (error);
   4353 		}
   4354 		releasestr(wrq);
   4355 		return (EINVAL);
   4356 
   4357 	case I_LINK:
   4358 	case I_PLINK:
   4359 		/*
   4360 		 * Link a multiplexor.
   4361 		 */
   4362 		return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
   4363 
   4364 	case _I_PLINK_LH:
   4365 		/*
   4366 		 * Link a multiplexor: Call must originate from kernel.
   4367 		 */
   4368 		if (kioctl)
   4369 			return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
   4370 
   4371 		return (EINVAL);
   4372 	case I_UNLINK:
   4373 	case I_PUNLINK:
   4374 		/*
   4375 		 * Unlink a multiplexor.
   4376 		 * If arg is -1, unlink all links for which this is the
   4377 		 * controlling stream.  Otherwise, arg is an index number
   4378 		 * for a link to be removed.
   4379 		 */
   4380 	{
   4381 		struct linkinfo *linkp;
   4382 		int native_arg = (int)arg;
   4383 		int type;
   4384 		netstack_t *ns;
   4385 		str_stack_t *ss;
   4386 
   4387 		TRACE_1(TR_FAC_STREAMS_FR,
   4388 		    TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
   4389 		if (vp->v_type == VFIFO) {
   4390 			return (EINVAL);
   4391 		}
   4392 		if (cmd == I_UNLINK)
   4393 			type = LINKNORMAL;
   4394 		else	/* I_PUNLINK */
   4395 			type = LINKPERSIST;
   4396 		if (native_arg == 0) {
   4397 			return (EINVAL);
   4398 		}
   4399 		ns = netstack_find_by_cred(crp);
   4400 		ASSERT(ns != NULL);
   4401 		ss = ns->netstack_str;
   4402 		ASSERT(ss != NULL);
   4403 
   4404 		if (native_arg == MUXID_ALL)
   4405 			error = munlinkall(stp, type, crp, rvalp, ss);
   4406 		else {
   4407 			mutex_enter(&muxifier);
   4408 			if (!(linkp = findlinks(stp, (int)arg, type, ss))) {
   4409 				/* invalid user supplied index number */
   4410 				mutex_exit(&muxifier);
   4411 				netstack_rele(ss->ss_netstack);
   4412 				return (EINVAL);
   4413 			}
   4414 			/* munlink drops the muxifier lock */
   4415 			error = munlink(stp, linkp, type, crp, rvalp, ss);
   4416 		}
   4417 		netstack_rele(ss->ss_netstack);
   4418 		return (error);
   4419 	}
   4420 
   4421 	case I_FLUSH:
   4422 		/*
   4423 		 * send a flush message downstream
   4424 		 * flush message can indicate
   4425 		 * FLUSHR - flush read queue
   4426 		 * FLUSHW - flush write queue
   4427 		 * FLUSHRW - flush read/write queue
   4428 		 */
   4429 		if (stp->sd_flag & STRHUP)
   4430 			return (ENXIO);
   4431 		if (arg & ~FLUSHRW)
   4432 			return (EINVAL);
   4433 
   4434 		for (;;) {
   4435 			if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
   4436 				break;
   4437 			}
   4438 			if (error = strwaitbuf(1, BPRI_HI)) {
   4439 				return (error);
   4440 			}
   4441 		}
   4442 
   4443 		/*
   4444 		 * Send down an unsupported ioctl and wait for the nack
   4445 		 * in order to allow the M_FLUSH to propagate back
   4446 		 * up to the stream head.
   4447 		 * Replaces if (qready()) runqueues();
   4448 		 */
   4449 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
   4450 		strioc.ic_timout = 0;
   4451 		strioc.ic_len = 0;
   4452 		strioc.ic_dp = NULL;
   4453 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
   4454 		*rvalp = 0;
   4455 		return (0);
   4456 
   4457 	case I_FLUSHBAND:
   4458 	{
   4459 		struct bandinfo binfo;
   4460 
   4461 		error = strcopyin((void *)arg, &binfo, sizeof (binfo),
   4462 		    copyflag);
   4463 		if (error)
   4464 			return (error);
   4465 		if (stp->sd_flag & STRHUP)
   4466 			return (ENXIO);
   4467 		if (binfo.bi_flag & ~FLUSHRW)
   4468 			return (EINVAL);
   4469 		while (!(mp = allocb(2, BPRI_HI))) {
   4470 			if (error = strwaitbuf(2, BPRI_HI))
   4471 				return (error);
   4472 		}
   4473 		mp->b_datap->db_type = M_FLUSH;
   4474 		*mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
   4475 		*mp->b_wptr++ = binfo.bi_pri;
   4476 		putnext(stp->sd_wrq, mp);
   4477 		/*
   4478 		 * Send down an unsupported ioctl and wait for the nack
   4479 		 * in order to allow the M_FLUSH to propagate back
   4480 		 * up to the stream head.
   4481 		 * Replaces if (qready()) runqueues();
   4482 		 */
   4483 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
   4484 		strioc.ic_timout = 0;
   4485 		strioc.ic_len = 0;
   4486 		strioc.ic_dp = NULL;
   4487 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
   4488 		*rvalp = 0;
   4489 		return (0);
   4490 	}
   4491 
   4492 	case I_SRDOPT:
   4493 		/*
   4494 		 * Set read options
   4495 		 *
   4496 		 * RNORM - default stream mode
   4497 		 * RMSGN - message no discard
   4498 		 * RMSGD - message discard
   4499 		 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
   4500 		 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
   4501 		 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
   4502 		 */
   4503 		if (arg & ~(RMODEMASK | RPROTMASK))
   4504 			return (EINVAL);
   4505 
   4506 		if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
   4507 			return (EINVAL);
   4508 
   4509 		mutex_enter(&stp->sd_lock);
   4510 		switch (arg & RMODEMASK) {
   4511 		case RNORM:
   4512 			stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
   4513 			break;
   4514 		case RMSGD:
   4515 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
   4516 			    RD_MSGDIS;
   4517 			break;
   4518 		case RMSGN:
   4519 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
   4520 			    RD_MSGNODIS;
   4521 			break;
   4522 		}
   4523 
   4524 		switch (arg & RPROTMASK) {
   4525 		case RPROTNORM:
   4526 			stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
   4527 			break;
   4528 
   4529 		case RPROTDAT:
   4530 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
   4531 			    RD_PROTDAT);
   4532 			break;
   4533 
   4534 		case RPROTDIS:
   4535 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
   4536 			    RD_PROTDIS);
   4537 			break;
   4538 		}
   4539 		mutex_exit(&stp->sd_lock);
   4540 		return (0);
   4541 
   4542 	case I_GRDOPT:
   4543 		/*
   4544 		 * Get read option and return the value
   4545 		 * to spot pointed to by arg
   4546 		 */
   4547 	{
   4548 		int rdopt;
   4549 
   4550 		rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
   4551 		    ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
   4552 		rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
   4553 		    ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
   4554 
   4555 		return (strcopyout(&rdopt, (void *)arg, sizeof (int),
   4556 		    copyflag));
   4557 	}
   4558 
   4559 	case I_SERROPT:
   4560 		/*
   4561 		 * Set error options
   4562 		 *
   4563 		 * RERRNORM - persistent read errors
   4564 		 * RERRNONPERSIST - non-persistent read errors
   4565 		 * WERRNORM - persistent write errors
   4566 		 * WERRNONPERSIST - non-persistent write errors
   4567 		 */
   4568 		if (arg & ~(RERRMASK | WERRMASK))
   4569 			return (EINVAL);
   4570 
   4571 		mutex_enter(&stp->sd_lock);
   4572 		switch (arg & RERRMASK) {
   4573 		case RERRNORM:
   4574 			stp->sd_flag &= ~STRDERRNONPERSIST;
   4575 			break;
   4576 		case RERRNONPERSIST:
   4577 			stp->sd_flag |= STRDERRNONPERSIST;
   4578 			break;
   4579 		}
   4580 		switch (arg & WERRMASK) {
   4581 		case WERRNORM:
   4582 			stp->sd_flag &= ~STWRERRNONPERSIST;
   4583 			break;
   4584 		case WERRNONPERSIST:
   4585 			stp->sd_flag |= STWRERRNONPERSIST;
   4586 			break;
   4587 		}
   4588 		mutex_exit(&stp->sd_lock);
   4589 		return (0);
   4590 
   4591 	case I_GERROPT:
   4592 		/*
   4593 		 * Get error option and return the value
   4594 		 * to spot pointed to by arg
   4595 		 */
   4596 	{
   4597 		int erropt = 0;
   4598 
   4599 		erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
   4600 		    RERRNORM;
   4601 		erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
   4602 		    WERRNORM;
   4603 		return (strcopyout(&erropt, (void *)arg, sizeof (int),
   4604 		    copyflag));
   4605 	}
   4606 
   4607 	case I_SETSIG:
   4608 		/*
   4609 		 * Register the calling proc to receive the SIGPOLL
   4610 		 * signal based on the events given in arg.  If
   4611 		 * arg is zero, remove the proc from register list.
   4612 		 */
   4613 	{
   4614 		strsig_t *ssp, *pssp;
   4615 		struct pid *pidp;
   4616 
   4617 		pssp = NULL;
   4618 		pidp = curproc->p_pidp;
   4619 		/*
   4620 		 * Hold sd_lock to prevent traversal of sd_siglist while
   4621 		 * it is modified.
   4622 		 */
   4623 		mutex_enter(&stp->sd_lock);
   4624 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
   4625 		    pssp = ssp, ssp = ssp->ss_next)
   4626 			;
   4627 
   4628 		if (arg) {
   4629 			if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
   4630 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
   4631 				mutex_exit(&stp->sd_lock);
   4632 				return (EINVAL);
   4633 			}
   4634 			if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
   4635 				mutex_exit(&stp->sd_lock);
   4636 				return (EINVAL);
   4637 			}
   4638 
   4639 			/*
   4640 			 * If proc not already registered, add it
   4641 			 * to list.
   4642 			 */
   4643 			if (!ssp) {
   4644 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
   4645 				ssp->ss_pidp = pidp;
   4646 				ssp->ss_pid = pidp->pid_id;
   4647 				ssp->ss_next = NULL;
   4648 				if (pssp)
   4649 					pssp->ss_next = ssp;
   4650 				else
   4651 					stp->sd_siglist = ssp;
   4652 				mutex_enter(&pidlock);
   4653 				PID_HOLD(pidp);
   4654 				mutex_exit(&pidlock);
   4655 			}
   4656 
   4657 			/*
   4658 			 * Set events.
   4659 			 */
   4660 			ssp->ss_events = (int)arg;
   4661 		} else {
   4662 			/*
   4663 			 * Remove proc from register list.
   4664 			 */
   4665 			if (ssp) {
   4666 				mutex_enter(&pidlock);
   4667 				PID_RELE(pidp);
   4668 				mutex_exit(&pidlock);
   4669 				if (pssp)
   4670 					pssp->ss_next = ssp->ss_next;
   4671 				else
   4672 					stp->sd_siglist = ssp->ss_next;
   4673 				kmem_free(ssp, sizeof (strsig_t));
   4674 			} else {
   4675 				mutex_exit(&stp->sd_lock);
   4676 				return (EINVAL);
   4677 			}
   4678 		}
   4679 
   4680 		/*
   4681 		 * Recalculate OR of sig events.
   4682 		 */
   4683 		stp->sd_sigflags = 0;
   4684 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
   4685 			stp->sd_sigflags |= ssp->ss_events;
   4686 		mutex_exit(&stp->sd_lock);
   4687 		return (0);
   4688 	}
   4689 
   4690 	case I_GETSIG:
   4691 		/*
   4692 		 * Return (in arg) the current registration of events
   4693 		 * for which the calling proc is to be signaled.
   4694 		 */
   4695 	{
   4696 		struct strsig *ssp;
   4697 		struct pid  *pidp;
   4698 
   4699 		pidp = curproc->p_pidp;
   4700 		mutex_enter(&stp->sd_lock);
   4701 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
   4702 			if (ssp->ss_pidp == pidp) {
   4703 				error = strcopyout(&ssp->ss_events, (void *)arg,
   4704 				    sizeof (int), copyflag);
   4705 				mutex_exit(&stp->sd_lock);
   4706 				return (error);
   4707 			}
   4708 		mutex_exit(&stp->sd_lock);
   4709 		return (EINVAL);
   4710 	}
   4711 
   4712 	case I_ESETSIG:
   4713 		/*
   4714 		 * Register the ss_pid to receive the SIGPOLL
   4715 		 * signal based on the events is ss_events arg.  If
   4716 		 * ss_events is zero, remove the proc from register list.
   4717 		 */
   4718 	{
   4719 		struct strsig *ssp, *pssp;
   4720 		struct proc *proc;
   4721 		struct pid  *pidp;
   4722 		pid_t pid;
   4723 		struct strsigset ss;
   4724 
   4725 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
   4726 		if (error)
   4727 			return (error);
   4728 
   4729 		pid = ss.ss_pid;
   4730 
   4731 		if (ss.ss_events != 0) {
   4732 			/*
   4733 			 * Permissions check by sending signal 0.
   4734 			 * Note that when kill fails it does a set_errno
   4735 			 * causing the system call to fail.
   4736 			 */
   4737 			error = kill(pid, 0);
   4738 			if (error) {
   4739 				return (error);
   4740 			}
   4741 		}
   4742 		mutex_enter(&pidlock);
   4743 		if (pid == 0)
   4744 			proc = curproc;
   4745 		else if (pid < 0)
   4746 			proc = pgfind(-pid);
   4747 		else
   4748 			proc = prfind(pid);
   4749 		if (proc == NULL) {
   4750 			mutex_exit(&pidlock);
   4751 			return (ESRCH);
   4752 		}
   4753 		if (pid < 0)
   4754 			pidp = proc->p_pgidp;
   4755 		else
   4756 			pidp = proc->p_pidp;
   4757 		ASSERT(pidp);
   4758 		/*
   4759 		 * Get a hold on the pid structure while referencing it.
   4760 		 * There is a separate PID_HOLD should it be inserted
   4761 		 * in the list below.
   4762 		 */
   4763 		PID_HOLD(pidp);
   4764 		mutex_exit(&pidlock);
   4765 
   4766 		pssp = NULL;
   4767 		/*
   4768 		 * Hold sd_lock to prevent traversal of sd_siglist while
   4769 		 * it is modified.
   4770 		 */
   4771 		mutex_enter(&stp->sd_lock);
   4772 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
   4773 		    pssp = ssp, ssp = ssp->ss_next)
   4774 			;
   4775 
   4776 		if (ss.ss_events) {
   4777 			if (ss.ss_events &
   4778 			    ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
   4779 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
   4780 				mutex_exit(&stp->sd_lock);
   4781 				mutex_enter(&pidlock);
   4782 				PID_RELE(pidp);
   4783 				mutex_exit(&pidlock);
   4784 				return (EINVAL);
   4785 			}
   4786 			if ((ss.ss_events & S_BANDURG) &&
   4787 			    !(ss.ss_events & S_RDBAND)) {
   4788 				mutex_exit(&stp->sd_lock);
   4789 				mutex_enter(&pidlock);
   4790 				PID_RELE(pidp);
   4791 				mutex_exit(&pidlock);
   4792 				return (EINVAL);
   4793 			}
   4794 
   4795 			/*
   4796 			 * If proc not already registered, add it
   4797 			 * to list.
   4798 			 */
   4799 			if (!ssp) {
   4800 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
   4801 				ssp->ss_pidp = pidp;
   4802 				ssp->ss_pid = pid;
   4803 				ssp->ss_next = NULL;
   4804 				if (pssp)
   4805 					pssp->ss_next = ssp;
   4806 				else
   4807 					stp->sd_siglist = ssp;
   4808 				mutex_enter(&pidlock);
   4809 				PID_HOLD(pidp);
   4810 				mutex_exit(&pidlock);
   4811 			}
   4812 
   4813 			/*
   4814 			 * Set events.
   4815 			 */
   4816 			ssp->ss_events = ss.ss_events;
   4817 		} else {
   4818 			/*
   4819 			 * Remove proc from register list.
   4820 			 */
   4821 			if (ssp) {
   4822 				mutex_enter(&pidlock);
   4823 				PID_RELE(pidp);
   4824 				mutex_exit(&pidlock);
   4825 				if (pssp)
   4826 					pssp->ss_next = ssp->ss_next;
   4827 				else
   4828 					stp->sd_siglist = ssp->ss_next;
   4829 				kmem_free(ssp, sizeof (strsig_t));
   4830 			} else {
   4831 				mutex_exit(&stp->sd_lock);
   4832 				mutex_enter(&pidlock);
   4833 				PID_RELE(pidp);
   4834 				mutex_exit(&pidlock);
   4835 				return (EINVAL);
   4836 			}
   4837 		}
   4838 
   4839 		/*
   4840 		 * Recalculate OR of sig events.
   4841 		 */
   4842 		stp->sd_sigflags = 0;
   4843 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
   4844 			stp->sd_sigflags |= ssp->ss_events;
   4845 		mutex_exit(&stp->sd_lock);
   4846 		mutex_enter(&pidlock);
   4847 		PID_RELE(pidp);
   4848 		mutex_exit(&pidlock);
   4849 		return (0);
   4850 	}
   4851 
   4852 	case I_EGETSIG:
   4853 		/*
   4854 		 * Return (in arg) the current registration of events
   4855 		 * for which the calling proc is to be signaled.
   4856 		 */
   4857 	{
   4858 		struct strsig *ssp;
   4859 		struct proc *proc;
   4860 		pid_t pid;
   4861 		struct pid  *pidp;
   4862 		struct strsigset ss;
   4863 
   4864 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
   4865 		if (error)
   4866 			return (error);
   4867 
   4868 		pid = ss.ss_pid;
   4869 		mutex_enter(&pidlock);
   4870 		if (pid == 0)
   4871 			proc = curproc;
   4872 		else if (pid < 0)
   4873 			proc = pgfind(-pid);
   4874 		else
   4875 			proc = prfind(pid);
   4876 		if (proc == NULL) {
   4877 			mutex_exit(&pidlock);
   4878 			return (ESRCH);
   4879 		}
   4880 		if (pid < 0)
   4881 			pidp = proc->p_pgidp;
   4882 		else
   4883 			pidp = proc->p_pidp;
   4884 
   4885 		/* Prevent the pidp from being reassigned */
   4886 		PID_HOLD(pidp);
   4887 		mutex_exit(&pidlock);
   4888 
   4889 		mutex_enter(&stp->sd_lock);
   4890 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
   4891 			if (ssp->ss_pid == pid) {
   4892 				ss.ss_pid = ssp->ss_pid;
   4893 				ss.ss_events = ssp->ss_events;
   4894 				error = strcopyout(&ss, (void *)arg,
   4895 				    sizeof (struct strsigset), copyflag);
   4896 				mutex_exit(&stp->sd_lock);
   4897 				mutex_enter(&pidlock);
   4898 				PID_RELE(pidp);
   4899 				mutex_exit(&pidlock);
   4900 				return (error);
   4901 			}
   4902 		mutex_exit(&stp->sd_lock);
   4903 		mutex_enter(&pidlock);
   4904 		PID_RELE(pidp);
   4905 		mutex_exit(&pidlock);
   4906 		return (EINVAL);
   4907 	}
   4908 
   4909 	case I_PEEK:
   4910 	{
   4911 		STRUCT_DECL(strpeek, strpeek);
   4912 		size_t n;
   4913 		mblk_t *fmp, *tmp_mp = NULL;
   4914 
   4915 		STRUCT_INIT(strpeek, flag);
   4916 
   4917 		error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
   4918 		    STRUCT_SIZE(strpeek), copyflag);
   4919 		if (error)
   4920 			return (error);
   4921 
   4922 		mutex_enter(QLOCK(rdq));
   4923 		/*
   4924 		 * Skip the invalid messages
   4925 		 */
   4926 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
   4927 			if (mp->b_datap->db_type != M_SIG)
   4928 				break;
   4929 
   4930 		/*
   4931 		 * If user has requested to peek at a high priority message
   4932 		 * and first message is not, return 0
   4933 		 */
   4934 		if (mp != NULL) {
   4935 			if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
   4936 			    queclass(mp) == QNORM) {
   4937 				*rvalp = 0;
   4938 				mutex_exit(QLOCK(rdq));
   4939 				return (0);
   4940 			}
   4941 		} else if (stp->sd_struiordq == NULL ||
   4942 		    (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
   4943 			/*
   4944 			 * No mblks to look at at the streamhead and
   4945 			 * 1). This isn't a synch stream or
   4946 			 * 2). This is a synch stream but caller wants high
   4947 			 *	priority messages which is not supported by
   4948 			 *	the synch stream. (it only supports QNORM)
   4949 			 */
   4950 			*rvalp = 0;
   4951 			mutex_exit(QLOCK(rdq));
   4952 			return (0);
   4953 		}
   4954 
   4955 		fmp = mp;
   4956 
   4957 		if (mp && mp->b_datap->db_type == M_PASSFP) {
   4958 			mutex_exit(QLOCK(rdq));
   4959 			return (EBADMSG);
   4960 		}
   4961 
   4962 		ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
   4963 		    mp->b_datap->db_type == M_PROTO ||
   4964 		    mp->b_datap->db_type == M_DATA);
   4965 
   4966 		if (mp && mp->b_datap->db_type == M_PCPROTO) {
   4967 			STRUCT_FSET(strpeek, flags, RS_HIPRI);
   4968 		} else {
   4969 			STRUCT_FSET(strpeek, flags, 0);
   4970 		}
   4971 
   4972 
   4973 		if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
   4974 			mutex_exit(QLOCK(rdq));
   4975 			return (ENOSR);
   4976 		}
   4977 		mutex_exit(QLOCK(rdq));
   4978 
   4979 		/*
   4980 		 * set mp = tmp_mp, so that I_PEEK processing can continue.
   4981 		 * tmp_mp is used to free the dup'd message.
   4982 		 */
   4983 		mp = tmp_mp;
   4984 
   4985 		uio.uio_fmode = 0;
   4986 		uio.uio_extflg = UIO_COPY_CACHED;
   4987 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
   4988 		    UIO_SYSSPACE;
   4989 		uio.uio_limit = 0;
   4990 		/*
   4991 		 * First process PROTO blocks, if any.
   4992 		 * If user doesn't want to get ctl info by setting maxlen <= 0,
   4993 		 * then set len to -1/0 and skip control blocks part.
   4994 		 */
   4995 		if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
   4996 			STRUCT_FSET(strpeek, ctlbuf.len, -1);
   4997 		else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
   4998 			STRUCT_FSET(strpeek, ctlbuf.len, 0);
   4999 		else {
   5000 			int	ctl_part = 0;
   5001 
   5002 			iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
   5003 			iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
   5004 			uio.uio_iov = &iov;
   5005 			uio.uio_resid = iov.iov_len;
   5006 			uio.uio_loffset = 0;
   5007 			uio.uio_iovcnt = 1;
   5008 			while (mp && mp->b_datap->db_type != M_DATA &&
   5009 			    uio.uio_resid >= 0) {
   5010 				ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
   5011 				    mp->b_datap->db_type == M_PROTO :
   5012 				    mp->b_datap->db_type == M_PCPROTO);
   5013 
   5014 				if ((n = MIN(uio.uio_resid,
   5015 				    mp->b_wptr - mp->b_rptr)) != 0 &&
   5016 				    (error = uiomove((char *)mp->b_rptr, n,
   5017 				    UIO_READ, &uio)) != 0) {
   5018 					freemsg(tmp_mp);
   5019 					return (error);
   5020 				}
   5021 				ctl_part = 1;
   5022 				mp = mp->b_cont;
   5023 			}
   5024 			/* No ctl message */
   5025 			if (ctl_part == 0)
   5026 				STRUCT_FSET(strpeek, ctlbuf.len, -1);
   5027 			else
   5028 				STRUCT_FSET(strpeek, ctlbuf.len,
   5029 				    STRUCT_FGET(strpeek, ctlbuf.maxlen) -
   5030 				    uio.uio_resid);
   5031 		}
   5032 
   5033 		/*
   5034 		 * Now process DATA blocks, if any.
   5035 		 * If user doesn't want to get data info by setting maxlen <= 0,
   5036 		 * then set len to -1/0 and skip data blocks part.
   5037 		 */
   5038 		if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
   5039 			STRUCT_FSET(strpeek, databuf.len, -1);
   5040 		else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
   5041 			STRUCT_FSET(strpeek, databuf.len, 0);
   5042 		else {
   5043 			int	data_part = 0;
   5044 
   5045 			iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
   5046 			iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
   5047 			uio.uio_iov = &iov;
   5048 			uio.uio_resid = iov.iov_len;
   5049 			uio.uio_loffset = 0;
   5050 			uio.uio_iovcnt = 1;
   5051 			while (mp && uio.uio_resid) {
   5052 				if (mp->b_datap->db_type == M_DATA) {
   5053 					if ((n = MIN(uio.uio_resid,
   5054 					    mp->b_wptr - mp->b_rptr)) != 0 &&
   5055 					    (error = uiomove((char *)mp->b_rptr,
   5056 					    n, UIO_READ, &uio)) != 0) {
   5057 						freemsg(tmp_mp);
   5058 						return (error);
   5059 					}
   5060 					data_part = 1;
   5061 				}
   5062 				ASSERT(data_part == 0 ||
   5063 				    mp->b_datap->db_type == M_DATA);
   5064 				mp = mp->b_cont;
   5065 			}
   5066 			/* No data message */
   5067 			if (data_part == 0)
   5068 				STRUCT_FSET(strpeek, databuf.len, -1);
   5069 			else
   5070 				STRUCT_FSET(strpeek, databuf.len,
   5071 				    STRUCT_FGET(strpeek, databuf.maxlen) -
   5072 				    uio.uio_resid);
   5073 		}
   5074 		freemsg(tmp_mp);
   5075 
   5076 		/*
   5077 		 * It is a synch stream and user wants to get
   5078 		 * data (maxlen > 0).
   5079 		 * uio setup is done by the codes that process DATA
   5080 		 * blocks above.
   5081 		 */
   5082 		if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
   5083 			infod_t infod;
   5084 
   5085 			infod.d_cmd = INFOD_COPYOUT;
   5086 			infod.d_res = 0;
   5087 			infod.d_uiop = &uio;
   5088 			error = infonext(rdq, &infod);
   5089 			if (error == EINVAL || error == EBUSY)
   5090 				error = 0;
   5091 			if (error)
   5092 				return (error);
   5093 			STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
   5094 			    databuf.maxlen) - uio.uio_resid);
   5095 			if (STRUCT_FGET(strpeek, databuf.len) == 0) {
   5096 				/*
   5097 				 * No data found by the infonext().
   5098 				 */
   5099 				STRUCT_FSET(strpeek, databuf.len, -1);
   5100 			}
   5101 		}
   5102 		error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
   5103 		    STRUCT_SIZE(strpeek), copyflag);
   5104 		if (error) {
   5105 			return (error);
   5106 		}
   5107 		/*
   5108 		 * If there is no message retrieved, set return code to 0
   5109 		 * otherwise, set it to 1.
   5110 		 */
   5111 		if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
   5112 		    STRUCT_FGET(strpeek, databuf.len) == -1)
   5113 			*rvalp = 0;
   5114 		else
   5115 			*rvalp = 1;
   5116 		return (0);
   5117 	}
   5118 
   5119 	case I_FDINSERT:
   5120 	{
   5121 		STRUCT_DECL(strfdinsert, strfdinsert);
   5122 		struct file *resftp;
   5123 		struct stdata *resstp;
   5124 		t_uscalar_t	ival;
   5125 		ssize_t msgsize;
   5126 		struct strbuf mctl;
   5127 
   5128 		STRUCT_INIT(strfdinsert, flag);
   5129 		if (stp->sd_flag & STRHUP)
   5130 			return (ENXIO);
   5131 		/*
   5132 		 * STRDERR, STWRERR and STPLEX tested above.
   5133 		 */
   5134 		error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
   5135 		    STRUCT_SIZE(strfdinsert), copyflag);
   5136 		if (error)
   5137 			return (error);
   5138 
   5139 		if (STRUCT_FGET(strfdinsert, offset) < 0 ||
   5140 		    (STRUCT_FGET(strfdinsert, offset) %
   5141 		    sizeof (t_uscalar_t)) != 0)
   5142 			return (EINVAL);
   5143 		if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
   5144 			if ((resstp = resftp->f_vnode->v_stream) == NULL) {
   5145 				releasef(STRUCT_FGET(strfdinsert, fildes));
   5146 				return (EINVAL);
   5147 			}
   5148 		} else
   5149 			return (EINVAL);
   5150 
   5151 		mutex_enter(&resstp->sd_lock);
   5152 		if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
   5153 			error = strgeterr(resstp,
   5154 			    STRDERR|STWRERR|STRHUP|STPLEX, 0);
   5155 			if (error != 0) {
   5156 				mutex_exit(&resstp->sd_lock);
   5157 				releasef(STRUCT_FGET(strfdinsert, fildes));
   5158 				return (error);
   5159 			}
   5160 		}
   5161 		mutex_exit(&resstp->sd_lock);
   5162 
   5163 #ifdef	_ILP32
   5164 		{
   5165 			queue_t	*q;
   5166 			queue_t	*mate = NULL;
   5167 
   5168 			/* get read queue of stream terminus */
   5169 			claimstr(resstp->sd_wrq);
   5170 			for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
   5171 			    q = q->q_next)
   5172 				if (!STRMATED(resstp) && STREAM(q) != resstp &&
   5173 				    mate == NULL) {
   5174 					ASSERT(q->q_qinfo->qi_srvp);
   5175 					ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
   5176 					claimstr(q);
   5177 					mate = q;
   5178 				}
   5179 			q = _RD(q);
   5180 			if (mate)
   5181 				releasestr(mate);
   5182 			releasestr(resstp->sd_wrq);
   5183 			ival = (t_uscalar_t)q;
   5184 		}
   5185 #else
   5186 		ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
   5187 #endif	/* _ILP32 */
   5188 
   5189 		if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
   5190 		    STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
   5191 			releasef(STRUCT_FGET(strfdinsert, fildes));
   5192 			return (EINVAL);
   5193 		}
   5194 
   5195 		/*
   5196 		 * Check for legal flag value.
   5197 		 */
   5198 		if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
   5199 			releasef(STRUCT_FGET(strfdinsert, fildes));
   5200 			return (EINVAL);
   5201 		}
   5202 
   5203 		/* get these values from those cached in the stream head */
   5204 		mutex_enter(QLOCK(stp->sd_wrq));
   5205 		rmin = stp->sd_qn_minpsz;
   5206 		rmax = stp->sd_qn_maxpsz;
   5207 		mutex_exit(QLOCK(stp->sd_wrq));
   5208 
   5209 		/*
   5210 		 * Make sure ctl and data sizes together fall within
   5211 		 * the limits of the max and min receive packet sizes
   5212 		 * and do not exceed system limit.  A negative data
   5213 		 * length means that no data part is to be sent.
   5214 		 */
   5215 		ASSERT((rmax >= 0) || (rmax == INFPSZ));
   5216 		if (rmax == 0) {
   5217 			releasef(STRUCT_FGET(strfdinsert, fildes));
   5218 			return (ERANGE);
   5219 		}
   5220 		if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
   5221 			msgsize = 0;
   5222 		if ((msgsize < rmin) ||
   5223 		    ((msgsize > rmax) && (rmax != INFPSZ)) ||
   5224 		    (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
   5225 			releasef(STRUCT_FGET(strfdinsert, fildes));
   5226 			return (ERANGE);
   5227 		}
   5228 
   5229 		mutex_enter(&stp->sd_lock);
   5230 		while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
   5231 		    !canputnext(stp->sd_wrq)) {
   5232 			if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
   5233 			    flag, -1, &done)) != 0 || done) {
   5234 				mutex_exit(&stp->sd_lock);
   5235 				releasef(STRUCT_FGET(strfdinsert, fildes));
   5236 				return (error);
   5237 			}
   5238 			if ((error = i_straccess(stp, access)) != 0) {
   5239 				mutex_exit(&stp->sd_lock);
   5240 				releasef(
   5241 				    STRUCT_FGET(strfdinsert, fildes));
   5242 				return (error);
   5243 			}
   5244 		}
   5245 		mutex_exit(&stp->sd_lock);
   5246 
   5247 		/*
   5248 		 * Copy strfdinsert.ctlbuf into native form of
   5249 		 * ctlbuf to pass down into strmakemsg().
   5250 		 */
   5251 		mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
   5252 		mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
   5253 		mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
   5254 
   5255 		iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
   5256 		iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
   5257 		uio.uio_iov = &iov;
   5258 		uio.uio_iovcnt = 1;
   5259 		uio.uio_loffset = 0;
   5260 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
   5261 		    UIO_SYSSPACE;
   5262 		uio.uio_fmode = 0;
   5263 		uio.uio_extflg = UIO_COPY_CACHED;
   5264 		uio.uio_resid = iov.iov_len;
   5265 		if ((error = strmakemsg(&mctl,
   5266 		    &msgsize, &uio, stp,
   5267 		    STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
   5268 			STRUCT_FSET(strfdinsert, databuf.len, msgsize);
   5269 			releasef(STRUCT_FGET(strfdinsert, fildes));
   5270 			return (error);
   5271 		}
   5272 
   5273 		STRUCT_FSET(strfdinsert, databuf.len, msgsize);
   5274 
   5275 		/*
   5276 		 * Place the possibly reencoded queue pointer 'offset' bytes
   5277 		 * from the start of the control portion of the message.
   5278 		 */
   5279 		*((t_uscalar_t *)(mp->b_rptr +
   5280 		    STRUCT_FGET(strfdinsert, offset))) = ival;
   5281 
   5282 		/*
   5283 		 * Put message downstream.
   5284 		 */
   5285 		stream_willservice(stp);
   5286 		putnext(stp->sd_wrq, mp);
   5287 		stream_runservice(stp);
   5288 		releasef(STRUCT_FGET(strfdinsert, fildes));
   5289 		return (error);
   5290 	}
   5291 
   5292 	case I_SENDFD:
   5293 	{
   5294 		struct file *fp;
   5295 
   5296 		if ((fp = getf((int)arg)) == NULL)
   5297 			return (EBADF);
   5298 		error = do_sendfp(stp, fp, crp);
   5299 		if (audit_active) {
   5300 			audit_fdsend((int)arg, fp, error);
   5301 		}
   5302 		releasef((int)arg);
   5303 		return (error);
   5304 	}
   5305 
   5306 	case I_RECVFD:
   5307 	case I_E_RECVFD:
   5308 	{
   5309 		struct k_strrecvfd *srf;
   5310 		int i, fd;
   5311 
   5312 		mutex_enter(&stp->sd_lock);
   5313 		while (!(mp = getq(rdq))) {
   5314 			if (stp->sd_flag & (STRHUP|STREOF)) {
   5315 				mutex_exit(&stp->sd_lock);
   5316 				return (ENXIO);
   5317 			}
   5318 			if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
   5319 			    flag, -1, &done)) != 0 || done) {
   5320 				mutex_exit(&stp->sd_lock);
   5321 				return (error);
   5322 			}
   5323 			if ((error = i_straccess(stp, access)) != 0) {
   5324 				mutex_exit(&stp->sd_lock);
   5325 				return (error);
   5326 			}
   5327 		}
   5328 		if (mp->b_datap->db_type != M_PASSFP) {
   5329 			putback(stp, rdq, mp, mp->b_band);
   5330 			mutex_exit(&stp->sd_lock);
   5331 			return (EBADMSG);
   5332 		}
   5333 		mutex_exit(&stp->sd_lock);
   5334 
   5335 		srf = (struct k_strrecvfd *)mp->b_rptr;
   5336 		if ((fd = ufalloc(0)) == -1) {
   5337 			mutex_enter(&stp->sd_lock);
   5338 			putback(stp, rdq, mp, mp->b_band);
   5339 			mutex_exit(&stp->sd_lock);
   5340 			return (EMFILE);
   5341 		}
   5342 		if (cmd == I_RECVFD) {
   5343 			struct o_strrecvfd	ostrfd;
   5344 
   5345 			/* check to see if uid/gid values are too large. */
   5346 
   5347 			if (srf->uid > (o_uid_t)USHRT_MAX ||
   5348 			    srf->gid > (o_gid_t)USHRT_MAX) {
   5349 				mutex_enter(&stp->sd_lock);
   5350 				putback(stp, rdq, mp, mp->b_band);
   5351 				mutex_exit(&stp->sd_lock);
   5352 				setf(fd, NULL);	/* release fd entry */
   5353 				return (EOVERFLOW);
   5354 			}
   5355 
   5356 			ostrfd.fd = fd;
   5357 			ostrfd.uid = (o_uid_t)srf->uid;
   5358 			ostrfd.gid = (o_gid_t)srf->gid;
   5359 
   5360 			/* Null the filler bits */
   5361 			for (i = 0; i < 8; i++)
   5362 				ostrfd.fill[i] = 0;
   5363 
   5364 			error = strcopyout(&ostrfd, (void *)arg,
   5365 			    sizeof (struct o_strrecvfd), copyflag);
   5366 		} else {		/* I_E_RECVFD */
   5367 			struct strrecvfd	strfd;
   5368 
   5369 			strfd.fd = fd;
   5370 			strfd.uid = srf->uid;
   5371 			strfd.gid = srf->gid;
   5372 
   5373 			/* null the filler bits */
   5374 			for (i = 0; i < 8; i++)
   5375 				strfd.fill[i] = 0;
   5376 
   5377 			error = strcopyout(&strfd, (void *)arg,
   5378 			    sizeof (struct strrecvfd), copyflag);
   5379 		}
   5380 
   5381 		if (error) {
   5382 			setf(fd, NULL);	/* release fd entry */
   5383 			mutex_enter(&stp->sd_lock);
   5384 			putback(stp, rdq, mp, mp->b_band);
   5385 			mutex_exit(&stp->sd_lock);
   5386 			return (error);
   5387 		}
   5388 		if (audit_active) {
   5389 			audit_fdrecv(fd, srf->fp);
   5390 		}
   5391 
   5392 		/*
   5393 		 * Always increment f_count since the freemsg() below will
   5394 		 * always call free_passfp() which performs a closef().
   5395 		 */
   5396 		mutex_enter(&srf->fp->f_tlock);
   5397 		srf->fp->f_count++;
   5398 		mutex_exit(&srf->fp->f_tlock);
   5399 		setf(fd, srf->fp);
   5400 		freemsg(mp);
   5401 		return (0);
   5402 	}
   5403 
   5404 	case I_SWROPT:
   5405 		/*
   5406 		 * Set/clear the write options. arg is a bit
   5407 		 * mask with any of the following bits set...
   5408 		 * 	SNDZERO - send zero length message
   5409 		 *	SNDPIPE - send sigpipe to process if
   5410 		 *		sd_werror is set and process is
   5411 		 *		doing a write or putmsg.
   5412 		 * The new stream head write options should reflect
   5413 		 * what is in arg.
   5414 		 */
   5415 		if (arg & ~(SNDZERO|SNDPIPE))
   5416 			return (EINVAL);
   5417 
   5418 		mutex_enter(&stp->sd_lock);
   5419 		stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
   5420 		if (arg & SNDZERO)
   5421 			stp->sd_wput_opt |= SW_SNDZERO;
   5422 		if (arg & SNDPIPE)
   5423 			stp->sd_wput_opt |= SW_SIGPIPE;
   5424 		mutex_exit(&stp->sd_lock);
   5425 		return (0);
   5426 
   5427 	case I_GWROPT:
   5428 	{
   5429 		int wropt = 0;
   5430 
   5431 		if (stp->sd_wput_opt & SW_SNDZERO)
   5432 			wropt |= SNDZERO;
   5433 		if (stp->sd_wput_opt & SW_SIGPIPE)
   5434 			wropt |= SNDPIPE;
   5435 		return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
   5436 		    copyflag));
   5437 	}
   5438 
   5439 	case I_LIST:
   5440 		/*
   5441 		 * Returns all the modules found on this stream,
   5442 		 * upto the driver. If argument is NULL, return the
   5443 		 * number of modules (including driver). If argument
   5444 		 * is not NULL, copy the names into the structure
   5445 		 * provided.
   5446 		 */
   5447 
   5448 	{
   5449 		queue_t *q;
   5450 		char *qname;
   5451 		int i, nmods;
   5452 		struct str_mlist *mlist;
   5453 		STRUCT_DECL(str_list, strlist);
   5454 
   5455 		if (arg == NULL) { /* Return number of modules plus driver */
   5456 			if (stp->sd_vnode->v_type == VFIFO)
   5457 				*rvalp = stp->sd_pushcnt;
   5458 			else
   5459 				*rvalp = stp->sd_pushcnt + 1;
   5460 			return (0);
   5461 		}
   5462 
   5463 		STRUCT_INIT(strlist, flag);
   5464 
   5465 		error = strcopyin((void *)arg, STRUCT_BUF(strlist),
   5466 		    STRUCT_SIZE(strlist), copyflag);
   5467 		if (error != 0)
   5468 			return (error);
   5469 
   5470 		mlist = STRUCT_FGETP(strlist, sl_modlist);
   5471 		nmods = STRUCT_FGET(strlist, sl_nmods);
   5472 		if (nmods <= 0)
   5473 			return (EINVAL);
   5474 
   5475 		claimstr(stp->sd_wrq);
   5476 		q = stp->sd_wrq;
   5477 		for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) {
   5478 			qname = Q2NAME(q->q_next);
   5479 			error = strcopyout(qname, &mlist[i], strlen(qname) + 1,
   5480 			    copyflag);
   5481 			if (error != 0) {
   5482 				releasestr(stp->sd_wrq);
   5483 				return (error);
   5484 			}
   5485 		}
   5486 		releasestr(stp->sd_wrq);
   5487 		return (strcopyout(&i, (void *)arg, sizeof (int), copyflag));
   5488 	}
   5489 
   5490 	case I_CKBAND:
   5491 	{
   5492 		queue_t *q;
   5493 		qband_t *qbp;
   5494 
   5495 		if ((arg < 0) || (arg >= NBAND))
   5496 			return (EINVAL);
   5497 		q = _RD(stp->sd_wrq);
   5498 		mutex_enter(QLOCK(q));
   5499 		if (arg > (int)q->q_nband) {
   5500 			*rvalp = 0;
   5501 		} else {
   5502 			if (arg == 0) {
   5503 				if (q->q_first)
   5504 					*rvalp = 1;
   5505 				else
   5506 					*rvalp = 0;
   5507 			} else {
   5508 				qbp = q->q_bandp;
   5509 				while (--arg > 0)
   5510 					qbp = qbp->qb_next;
   5511 				if (qbp->qb_first)
   5512 					*rvalp = 1;
   5513 				else
   5514 					*rvalp = 0;
   5515 			}
   5516 		}
   5517 		mutex_exit(QLOCK(q));
   5518 		return (0);
   5519 	}
   5520 
   5521 	case I_GETBAND:
   5522 	{
   5523 		int intpri;
   5524 		queue_t *q;
   5525 
   5526 		q = _RD(stp->sd_wrq);
   5527 		mutex_enter(QLOCK(q));
   5528 		mp = q->q_first;
   5529 		if (!mp) {
   5530 			mutex_exit(QLOCK(q));
   5531 			return (ENODATA);
   5532 		}
   5533 		intpri = (int)mp->b_band;
   5534 		error = strcopyout(&intpri, (void *)arg, sizeof (int),
   5535 		    copyflag);
   5536 		mutex_exit(QLOCK(q));
   5537 		return (error);
   5538 	}
   5539 
   5540 	case I_ATMARK:
   5541 	{
   5542 		queue_t *q;
   5543 
   5544 		if (arg & ~(ANYMARK|LASTMARK))
   5545 			return (EINVAL);
   5546 		q = _RD(stp->sd_wrq);
   5547 		mutex_enter(&stp->sd_lock);
   5548 		if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
   5549 			*rvalp = 1;
   5550 		} else {
   5551 			mutex_enter(QLOCK(q));
   5552 			mp = q->q_first;
   5553 
   5554 			if (mp == NULL)
   5555 				*rvalp = 0;
   5556 			else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
   5557 				*rvalp = 1;
   5558 			else if ((arg == LASTMARK) && (mp == stp->sd_mark))
   5559 				*rvalp = 1;
   5560 			else
   5561 				*rvalp = 0;
   5562 			mutex_exit(QLOCK(q));
   5563 		}
   5564 		mutex_exit(&stp->sd_lock);
   5565 		return (0);
   5566 	}
   5567 
   5568 	case I_CANPUT:
   5569 	{
   5570 		char band;
   5571 
   5572 		if ((arg < 0) || (arg >= NBAND))
   5573 			return (EINVAL);
   5574 		band = (char)arg;
   5575 		*rvalp = bcanputnext(stp->sd_wrq, band);
   5576 		return (0);
   5577 	}
   5578 
   5579 	case I_SETCLTIME:
   5580 	{
   5581 		int closetime;
   5582 
   5583 		error = strcopyin((void *)arg, &closetime, sizeof (int),
   5584 		    copyflag);
   5585 		if (error)
   5586 			return (error);
   5587 		if (closetime < 0)
   5588 			return (EINVAL);
   5589 
   5590 		stp->sd_closetime = closetime;
   5591 		return (0);
   5592 	}
   5593 
   5594 	case I_GETCLTIME:
   5595 	{
   5596 		int closetime;
   5597 
   5598 		closetime = stp->sd_closetime;
   5599 		return (strcopyout(&closetime, (void *)arg, sizeof (int),
   5600 		    copyflag));
   5601 	}
   5602 
   5603 	case TIOCGSID:
   5604 	{
   5605 		pid_t sid;
   5606 
   5607 		mutex_enter(&stp->sd_lock);
   5608 		if (stp->sd_sidp == NULL) {
   5609 			mutex_exit(&stp->sd_lock);
   5610 			return (ENOTTY);
   5611 		}
   5612 		sid = stp->sd_sidp->pid_id;
   5613 		mutex_exit(&stp->sd_lock);
   5614 		return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
   5615 		    copyflag));
   5616 	}
   5617 
   5618 	case TIOCSPGRP:
   5619 	{
   5620 		pid_t pgrp;
   5621 		proc_t *q;
   5622 		pid_t	sid, fg_pgid, bg_pgid;
   5623 
   5624 		if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
   5625 		    copyflag))
   5626 			return (error);
   5627 		mutex_enter(&stp->sd_lock);
   5628 		mutex_enter(&pidlock);
   5629 		if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
   5630 			mutex_exit(&pidlock);
   5631 			mutex_exit(&stp->sd_lock);
   5632 			return (ENOTTY);
   5633 		}
   5634 		if (pgrp == stp->sd_pgidp->pid_id) {
   5635 			mutex_exit(&pidlock);
   5636 			mutex_exit(&stp->sd_lock);
   5637 			return (0);
   5638 		}
   5639 		if (pgrp <= 0 || pgrp >= maxpid) {
   5640 			mutex_exit(&pidlock);
   5641 			mutex_exit(&stp->sd_lock);
   5642 			return (EINVAL);
   5643 		}
   5644 		if ((q = pgfind(pgrp)) == NULL ||
   5645 		    q->p_sessp != ttoproc(curthread)->p_sessp) {
   5646 			mutex_exit(&pidlock);
   5647 			mutex_exit(&stp->sd_lock);
   5648 			return (EPERM);
   5649 		}
   5650 		sid = stp->sd_sidp->pid_id;
   5651 		fg_pgid = q->p_pgrp;
   5652 		bg_pgid = stp->sd_pgidp->pid_id;
   5653 		CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
   5654 		PID_RELE(stp->sd_pgidp);
   5655 		ctty_clear_sighuped();
   5656 		stp->sd_pgidp = q->p_pgidp;
   5657 		PID_HOLD(stp->sd_pgidp);
   5658 		mutex_exit(&pidlock);
   5659 		mutex_exit(&stp->sd_lock);
   5660 		return (0);
   5661 	}
   5662 
   5663 	case TIOCGPGRP:
   5664 	{
   5665 		pid_t pgrp;
   5666 
   5667 		mutex_enter(&stp->sd_lock);
   5668 		if (stp->sd_sidp == NULL) {
   5669 			mutex_exit(&stp->sd_lock);
   5670 			return (ENOTTY);
   5671 		}
   5672 		pgrp = stp->sd_pgidp->pid_id;
   5673 		mutex_exit(&stp->sd_lock);
   5674 		return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
   5675 		    copyflag));
   5676 	}
   5677 
   5678 	case TIOCSCTTY:
   5679 	{
   5680 		return (strctty(stp));
   5681 	}
   5682 
   5683 	case TIOCNOTTY:
   5684 	{
   5685 		/* freectty() always assumes curproc. */
   5686 		if (freectty(B_FALSE) != 0)
   5687 			return (0);
   5688 		return (ENOTTY);
   5689 	}
   5690 
   5691 	case FIONBIO:
   5692 	case FIOASYNC:
   5693 		return (0);	/* handled by the upper layer */
   5694 	}
   5695 }
   5696 
   5697 /*
   5698  * Custom free routine used for M_PASSFP messages.
   5699  */
   5700 static void
   5701 free_passfp(struct k_strrecvfd *srf)
   5702 {
   5703 	(void) closef(srf->fp);
   5704 	kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
   5705 }
   5706 
   5707 /* ARGSUSED */
   5708 int
   5709 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
   5710 {
   5711 	queue_t *qp, *nextqp;
   5712 	struct k_strrecvfd *srf;
   5713 	mblk_t *mp;
   5714 	frtn_t *frtnp;
   5715 	size_t bufsize;
   5716 	queue_t	*mate = NULL;
   5717 	syncq_t	*sq = NULL;
   5718 	int retval = 0;
   5719 
   5720 	if (stp->sd_flag & STRHUP)
   5721 		return (ENXIO);
   5722 
   5723 	claimstr(stp->sd_wrq);
   5724 
   5725 	/* Fastpath, we have a pipe, and we are already mated, use it. */
   5726 	if (STRMATED(stp)) {
   5727 		qp = _RD(stp->sd_mate->sd_wrq);
   5728 		claimstr(qp);
   5729 		mate = qp;
   5730 	} else { /* Not already mated. */
   5731 
   5732 		/*
   5733 		 * Walk the stream to the end of this one.
   5734 		 * assumes that the claimstr() will prevent
   5735 		 * plumbing between the stream head and the
   5736 		 * driver from changing
   5737 		 */
   5738 		qp = stp->sd_wrq;
   5739 
   5740 		/*
   5741 		 * Loop until we reach the end of this stream.
   5742 		 * On completion, qp points to the write queue
   5743 		 * at the end of the stream, or the read queue
   5744 		 * at the stream head if this is a fifo.
   5745 		 */
   5746 		while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
   5747 			;
   5748 
   5749 		/*
   5750 		 * Just in case we get a q_next which is NULL, but
   5751 		 * not at the end of the stream.  This is actually
   5752 		 * broken, so we set an assert to catch it in
   5753 		 * debug, and set an error and return if not debug.
   5754 		 */
   5755 		ASSERT(qp);
   5756 		if (qp == NULL) {
   5757 			releasestr(stp->sd_wrq);
   5758 			return (EINVAL);
   5759 		}
   5760 
   5761 		/*
   5762 		 * Enter the syncq for the driver, so (hopefully)
   5763 		 * the queue values will not change on us.
   5764 		 * XXXX - This will only prevent the race IFF only
   5765 		 *   the write side modifies the q_next member, and
   5766 		 *   the put procedure is protected by at least
   5767 		 *   MT_PERQ.
   5768 		 */
   5769 		if ((sq = qp->q_syncq) != NULL)
   5770 			entersq(sq, SQ_PUT);
   5771 
   5772 		/* Now get the q_next value from this qp. */
   5773 		nextqp = qp->q_next;
   5774 
   5775 		/*
   5776 		 * If nextqp exists and the other stream is different
   5777 		 * from this one claim the stream, set the mate, and
   5778 		 * get the read queue at the stream head of the other
   5779 		 * stream.  Assumes that nextqp was at least valid when
   5780 		 * we got it.  Hopefully the entersq of the driver
   5781 		 * will prevent it from changing on us.
   5782 		 */
   5783 		if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
   5784 			ASSERT(qp->q_qinfo->qi_srvp);
   5785 			ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
   5786 			ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
   5787 			claimstr(nextqp);
   5788 
   5789 			/* Make sure we still have a q_next */
   5790 			if (nextqp != qp->q_next) {
   5791 				releasestr(stp->sd_wrq);
   5792 				releasestr(nextqp);
   5793 				return (EINVAL);
   5794 			}
   5795 
   5796 			qp = _RD(STREAM(nextqp)->sd_wrq);
   5797 			mate = qp;
   5798 		}
   5799 		/* If we entered the synq above, leave it. */
   5800 		if (sq != NULL)
   5801 			leavesq(sq, SQ_PUT);
   5802 	} /*  STRMATED(STP)  */
   5803 
   5804 	/* XXX prevents substitution of the ops vector */
   5805 	if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
   5806 		retval = EINVAL;
   5807 		goto out;
   5808 	}
   5809 
   5810 	if (qp->q_flag & QFULL) {
   5811 		retval = EAGAIN;
   5812 		goto out;
   5813 	}
   5814 
   5815 	/*
   5816 	 * Since M_PASSFP messages include a file descriptor, we use
   5817 	 * esballoc() and specify a custom free routine (free_passfp()) that
   5818 	 * will close the descriptor as part of freeing the message.  For
   5819 	 * convenience, we stash the frtn_t right after the data block.
   5820 	 */
   5821 	bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
   5822 	srf = kmem_alloc(bufsize, KM_NOSLEEP);
   5823 	if (srf == NULL) {
   5824 		retval = EAGAIN;
   5825 		goto out;
   5826 	}
   5827 
   5828 	frtnp = (frtn_t *)(srf + 1);
   5829 	frtnp->free_arg = (caddr_t)srf;
   5830 	frtnp->free_func = free_passfp;
   5831 
   5832 	mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
   5833 	if (mp == NULL) {
   5834 		kmem_free(srf, bufsize);
   5835 		retval = EAGAIN;
   5836 		goto out;
   5837 	}
   5838 	mp->b_wptr += sizeof (struct k_strrecvfd);
   5839 	mp->b_datap->db_type = M_PASSFP;
   5840 
   5841 	srf->fp = fp;
   5842 	srf->uid = crgetuid(curthread->t_cred);
   5843 	srf->gid = crgetgid(curthread->t_cred);
   5844 	mutex_enter(&fp->f_tlock);
   5845 	fp->f_count++;
   5846 	mutex_exit(&fp->f_tlock);
   5847 
   5848 	put(qp, mp);
   5849 out:
   5850 	releasestr(stp->sd_wrq);
   5851 	if (mate)
   5852 		releasestr(mate);
   5853 	return (retval);
   5854 }
   5855 
   5856 /*
   5857  * Send an ioctl message downstream and wait for acknowledgement.
   5858  * flags may be set to either U_TO_K or K_TO_K and a combination
   5859  * of STR_NOERROR or STR_NOSIG
   5860  * STR_NOSIG: Signals are essentially ignored or held and have
   5861  *	no effect for the duration of the call.
   5862  * STR_NOERROR: Ignores stream head read, write and hup errors.
   5863  *	Additionally, if an existing ioctl times out, it is assumed
   5864  *	lost and and this ioctl will continue as if the previous ioctl had
   5865  *	finished.  ETIME may be returned if this ioctl times out (i.e.
   5866  *	ic_timout is not INFTIM).  Non-stream head errors may be returned if
   5867  *	the ioc_error indicates that the driver/module had problems,
   5868  *	an EFAULT was found when accessing user data, a lack of
   5869  * 	resources, etc.
   5870  */
   5871 int
   5872 strdoioctl(
   5873 	struct stdata *stp,
   5874 	struct strioctl *strioc,
   5875 	int fflags,		/* file flags with model info */
   5876 	int flag,
   5877 	cred_t *crp,
   5878 	int *rvalp)
   5879 {
   5880 	mblk_t *bp;
   5881 	struct iocblk *iocbp;
   5882 	struct copyreq *reqp;
   5883 	struct copyresp *resp;
   5884 	int id;
   5885 	int transparent = 0;
   5886 	int error = 0;
   5887 	int len = 0;
   5888 	caddr_t taddr;
   5889 	int copyflag = (flag & (U_TO_K | K_TO_K));
   5890 	int sigflag = (flag & STR_NOSIG);
   5891 	int errs;
   5892 	uint_t waitflags;
   5893 
   5894 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
   5895 	ASSERT((fflags & FMODELS) != 0);
   5896 
   5897 	TRACE_2(TR_FAC_STREAMS_FR,
   5898 	    TR_STRDOIOCTL,
   5899 	    "strdoioctl:stp %p strioc %p", stp, strioc);
   5900 	if (strioc->ic_len == TRANSPARENT) {	/* send arg in M_DATA block */
   5901 		transparent = 1;
   5902 		strioc->ic_len = sizeof (intptr_t);
   5903 	}
   5904 
   5905 	if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
   5906 		return (EINVAL);
   5907 
   5908 	if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
   5909 	    crp, curproc->p_pid)) == NULL)
   5910 			return (error);
   5911 
   5912 	bzero(bp->b_wptr, sizeof (union ioctypes));
   5913 
   5914 	iocbp = (struct iocblk *)bp->b_wptr;
   5915 	iocbp->ioc_count = strioc->ic_len;
   5916 	iocbp->ioc_cmd = strioc->ic_cmd;
   5917 	iocbp->ioc_flag = (fflags & FMODELS);
   5918 
   5919 	crhold(crp);
   5920 	iocbp->ioc_cr = crp;
   5921 	DB_TYPE(bp) = M_IOCTL;
   5922 	bp->b_wptr += sizeof (struct iocblk);
   5923 
   5924 	if (flag & STR_NOERROR)
   5925 		errs = STPLEX;
   5926 	else
   5927 		errs = STRHUP|STRDERR|STWRERR|STPLEX;
   5928 
   5929 	/*
   5930 	 * If there is data to copy into ioctl block, do so.
   5931 	 */
   5932 	if (iocbp->ioc_count > 0) {
   5933 		if (transparent)
   5934 			/*
   5935 			 * Note: STR_NOERROR does not have an effect
   5936 			 * in putiocd()
   5937 			 */
   5938 			id = K_TO_K | sigflag;
   5939 		else
   5940 			id = flag;
   5941 		if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
   5942 			freemsg(bp);
   5943 			crfree(crp);
   5944 			return (error);
   5945 		}
   5946 
   5947 		/*
   5948 		 * We could have slept copying in user pages.
   5949 		 * Recheck the stream head state (the other end
   5950 		 * of a pipe could have gone away).
   5951 		 */
   5952 		if (stp->sd_flag & errs) {
   5953 			mutex_enter(&stp->sd_lock);
   5954 			error = strgeterr(stp, errs, 0);
   5955 			mutex_exit(&stp->sd_lock);
   5956 			if (error != 0) {
   5957 				freemsg(bp);
   5958 				crfree(crp);
   5959 				return (error);
   5960 			}
   5961 		}
   5962 	}
   5963 	if (transparent)
   5964 		iocbp->ioc_count = TRANSPARENT;
   5965 
   5966 	/*
   5967 	 * Block for up to STRTIMOUT milliseconds if there is an outstanding
   5968 	 * ioctl for this stream already running.  All processes
   5969 	 * sleeping here will be awakened as a result of an ACK
   5970 	 * or NAK being received for the outstanding ioctl, or
   5971 	 * as a result of the timer expiring on the outstanding
   5972 	 * ioctl (a failure), or as a result of any waiting
   5973 	 * process's timer expiring (also a failure).
   5974 	 */
   5975 
   5976 	error = 0;
   5977 	mutex_enter(&stp->sd_lock);
   5978 	while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) {
   5979 		clock_t cv_rval;
   5980 
   5981 		TRACE_0(TR_FAC_STREAMS_FR,
   5982 		    TR_STRDOIOCTL_WAIT,
   5983 		    "strdoioctl sleeps - IOCWAIT");
   5984 		cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
   5985 		    STRTIMOUT, sigflag);
   5986 		if (cv_rval <= 0) {
   5987 			if (cv_rval == 0) {
   5988 				error = EINTR;
   5989 			} else {
   5990 				if (flag & STR_NOERROR) {
   5991 					/*
   5992 					 * Terminating current ioctl in
   5993 					 * progress -- assume it got lost and
   5994 					 * wake up the other thread so that the
   5995 					 * operation completes.
   5996 					 */
   5997 					if (!(stp->sd_flag & IOCWAITNE)) {
   5998 						stp->sd_flag |= IOCWAITNE;
   5999 						cv_broadcast(&stp->sd_monitor);
   6000 					}
   6001 					/*
   6002 					 * Otherwise, there's a running
   6003 					 * STR_NOERROR -- we have no choice
   6004 					 * here but to wait forever (or until
   6005 					 * interrupted).
   6006 					 */
   6007 				} else {
   6008 					/*
   6009 					 * pending ioctl has caused
   6010 					 * us to time out
   6011 					 */
   6012 					error = ETIME;
   6013 				}
   6014 			}
   6015 		} else if ((stp->sd_flag & errs)) {
   6016 			error = strgeterr(stp, errs, 0);
   6017 		}
   6018 		if (error) {
   6019 			mutex_exit(&stp->sd_lock);
   6020 			freemsg(bp);
   6021 			crfree(crp);
   6022 			return (error);
   6023 		}
   6024 	}
   6025 
   6026 	/*
   6027 	 * Have control of ioctl mechanism.
   6028 	 * Send down ioctl packet and wait for response.
   6029 	 */
   6030 	if (stp->sd_iocblk != (mblk_t *)-1) {
   6031 		freemsg(stp->sd_iocblk);
   6032 	}
   6033 	stp->sd_iocblk = NULL;
   6034 
   6035 	/*
   6036 	 * If this is marked with 'noerror' (internal; mostly
   6037 	 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
   6038 	 * in here by setting IOCWAITNE.
   6039 	 */
   6040 	waitflags = IOCWAIT;
   6041 	if (flag & STR_NOERROR)
   6042 		waitflags |= IOCWAITNE;
   6043 
   6044 	stp->sd_flag |= waitflags;
   6045 
   6046 	/*
   6047 	 * Assign sequence number.
   6048 	 */
   6049 	iocbp->ioc_id = stp->sd_iocid = getiocseqno();
   6050 
   6051 	mutex_exit(&stp->sd_lock);
   6052 
   6053 	TRACE_1(TR_FAC_STREAMS_FR,
   6054 	    TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
   6055 	stream_willservice(stp);
   6056 	putnext(stp->sd_wrq, bp);
   6057 	stream_runservice(stp);
   6058 
   6059 	/*
   6060 	 * Timed wait for acknowledgment.  The wait time is limited by the
   6061 	 * timeout value, which must be a positive integer (number of
   6062 	 * milliseconds) to wait, or 0 (use default value of STRTIMOUT
   6063 	 * milliseconds), or -1 (wait forever).  This will be awakened
   6064 	 * either by an ACK/NAK message arriving, the timer expiring, or
   6065 	 * the timer expiring on another ioctl waiting for control of the
   6066 	 * mechanism.
   6067 	 */
   6068 waitioc:
   6069 	mutex_enter(&stp->sd_lock);
   6070 
   6071 
   6072 	/*
   6073 	 * If the reply has already arrived, don't sleep.  If awakened from
   6074 	 * the sleep, fail only if the reply has not arrived by then.
   6075 	 * Otherwise, process the reply.
   6076 	 */
   6077 	while (!stp->sd_iocblk) {
   6078 		clock_t cv_rval;
   6079 
   6080 		if (stp->sd_flag & errs) {
   6081 			error = strgeterr(stp, errs, 0);
   6082 			if (error != 0) {
   6083 				stp->sd_flag &= ~waitflags;
   6084 				cv_broadcast(&stp->sd_iocmonitor);
   6085 				mutex_exit(&stp->sd_lock);
   6086 				crfree(crp);
   6087 				return (error);
   6088 			}
   6089 		}
   6090 
   6091 		TRACE_0(TR_FAC_STREAMS_FR,
   6092 		    TR_STRDOIOCTL_WAIT2,
   6093 		    "strdoioctl sleeps awaiting reply");
   6094 		ASSERT(error == 0);
   6095 
   6096 		cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
   6097 		    (strioc->ic_timout ?
   6098 		    strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
   6099 
   6100 		/*
   6101 		 * There are four possible cases here: interrupt, timeout,
   6102 		 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
   6103 		 * valid M_IOCTL reply).
   6104 		 *
   6105 		 * If we've been awakened by a STR_NOERROR ioctl on some other
   6106 		 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
   6107 		 * will be set.  Pretend as if we just timed out.  Note that
   6108 		 * this other thread waited at least STRTIMOUT before trying to
   6109 		 * awaken our thread, so this is indistinguishable (even for
   6110 		 * INFTIM) from the case where we failed with ETIME waiting on
   6111 		 * IOCWAIT in the prior loop.
   6112 		 */
   6113 		if (cv_rval > 0 && !(flag & STR_NOERROR) &&
   6114 		    stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
   6115 			cv_rval = -1;
   6116 		}
   6117 
   6118 		/*
   6119 		 * note: STR_NOERROR does not protect
   6120 		 * us here.. use ic_timout < 0
   6121 		 */
   6122 		if (cv_rval <= 0) {
   6123 			if (cv_rval == 0) {
   6124 				error = EINTR;
   6125 			} else {
   6126 				error =  ETIME;
   6127 			}
   6128 			/*
   6129 			 * A message could have come in after we were scheduled
   6130 			 * but before we were actually run.
   6131 			 */
   6132 			bp = stp->sd_iocblk;
   6133 			stp->sd_iocblk = NULL;
   6134 			if (bp != NULL) {
   6135 				if ((bp->b_datap->db_type == M_COPYIN) ||
   6136 				    (bp->b_datap->db_type == M_COPYOUT)) {
   6137 					mutex_exit(&stp->sd_lock);
   6138 					if (bp->b_cont) {
   6139 						freemsg(bp->b_cont);
   6140 						bp->b_cont = NULL;
   6141 					}
   6142 					bp->b_datap->db_type = M_IOCDATA;
   6143 					bp->b_wptr = bp->b_rptr +
   6144 					    sizeof (struct copyresp);
   6145 					resp = (struct copyresp *)bp->b_rptr;
   6146 					resp->cp_rval =
   6147 					    (caddr_t)1; /* failure */
   6148 					stream_willservice(stp);
   6149 					putnext(stp->sd_wrq, bp);
   6150 					stream_runservice(stp);
   6151 					mutex_enter(&stp->sd_lock);
   6152 				} else {
   6153 					freemsg(bp);
   6154 				}
   6155 			}
   6156 			stp->sd_flag &= ~waitflags;
   6157 			cv_broadcast(&stp->sd_iocmonitor);
   6158 			mutex_exit(&stp->sd_lock);
   6159 			crfree(crp);
   6160 			return (error);
   6161 		}
   6162 	}
   6163 	bp = stp->sd_iocblk;
   6164 	/*
   6165 	 * Note: it is strictly impossible to get here with sd_iocblk set to
   6166 	 * -1.  This is because the initial loop above doesn't allow any new
   6167 	 * ioctls into the fray until all others have passed this point.
   6168 	 */
   6169 	ASSERT(bp != NULL && bp != (mblk_t *)-1);
   6170 	TRACE_1(TR_FAC_STREAMS_FR,
   6171 	    TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
   6172 	if ((bp->b_datap->db_type == M_IOCACK) ||
   6173 	    (bp->b_datap->db_type == M_IOCNAK)) {
   6174 		/* for detection of duplicate ioctl replies */
   6175 		stp->sd_iocblk = (mblk_t *)-1;
   6176 		stp->sd_flag &= ~waitflags;
   6177 		cv_broadcast(&stp->sd_iocmonitor);
   6178 		mutex_exit(&stp->sd_lock);
   6179 	} else {
   6180 		/*
   6181 		 * flags not cleared here because we're still doing
   6182 		 * copy in/out for ioctl.
   6183 		 */
   6184 		stp->sd_iocblk = NULL;
   6185 		mutex_exit(&stp->sd_lock);
   6186 	}
   6187 
   6188 
   6189 	/*
   6190 	 * Have received acknowledgment.
   6191 	 */
   6192 
   6193 	switch (bp->b_datap->db_type) {
   6194 	case M_IOCACK:
   6195 		/*
   6196 		 * Positive ack.
   6197 		 */
   6198 		iocbp = (struct iocblk *)bp->b_rptr;
   6199 
   6200 		/*
   6201 		 * Set error if indicated.
   6202 		 */
   6203 		if (iocbp->ioc_error) {
   6204 			error = iocbp->ioc_error;
   6205 			break;
   6206 		}
   6207 
   6208 		/*
   6209 		 * Set return value.
   6210 		 */
   6211 		*rvalp = iocbp->ioc_rval;
   6212 
   6213 		/*
   6214 		 * Data may have been returned in ACK message (ioc_count > 0).
   6215 		 * If so, copy it out to the user's buffer.
   6216 		 */
   6217 		if (iocbp->ioc_count && !transparent) {
   6218 			if (error = getiocd(bp, strioc->ic_dp, copyflag))
   6219 				break;
   6220 		}
   6221 		if (!transparent) {
   6222 			if (len)	/* an M_COPYOUT was used with I_STR */
   6223 				strioc->ic_len = len;
   6224 			else
   6225 				strioc->ic_len = (int)iocbp->ioc_count;
   6226 		}
   6227 		break;
   6228 
   6229 	case M_IOCNAK:
   6230 		/*
   6231 		 * Negative ack.
   6232 		 *
   6233 		 * The only thing to do is set error as specified
   6234 		 * in neg ack packet.
   6235 		 */
   6236 		iocbp = (struct iocblk *)bp->b_rptr;
   6237 
   6238 		error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
   6239 		break;
   6240 
   6241 	case M_COPYIN:
   6242 		/*
   6243 		 * Driver or module has requested user ioctl data.
   6244 		 */
   6245 		reqp = (struct copyreq *)bp->b_rptr;
   6246 
   6247 		/*
   6248 		 * M_COPYIN should *never* have a message attached, though
   6249 		 * it's harmless if it does -- thus, panic on a DEBUG
   6250 		 * kernel and just free it on a non-DEBUG build.
   6251 		 */
   6252 		ASSERT(bp->b_cont == NULL);
   6253 		if (bp->b_cont != NULL) {
   6254 			freemsg(bp->b_cont);
   6255 			bp->b_cont = NULL;
   6256 		}
   6257 
   6258 		error = putiocd(bp, reqp->cq_addr, flag, crp);
   6259 		if (error && bp->b_cont) {
   6260 			freemsg(bp->b_cont);
   6261 			bp->b_cont = NULL;
   6262 		}
   6263 
   6264 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
   6265 		bp->b_datap->db_type = M_IOCDATA;
   6266 
   6267 		mblk_setcred(bp, crp, curproc->p_pid);
   6268 		resp = (struct copyresp *)bp->b_rptr;
   6269 		resp->cp_rval = (caddr_t)(uintptr_t)error;
   6270 		resp->cp_flag = (fflags & FMODELS);
   6271 
   6272 		stream_willservice(stp);
   6273 		putnext(stp->sd_wrq, bp);
   6274 		stream_runservice(stp);
   6275 
   6276 		if (error) {
   6277 			mutex_enter(&stp->sd_lock);
   6278 			stp->sd_flag &= ~waitflags;
   6279 			cv_broadcast(&stp->sd_iocmonitor);
   6280 			mutex_exit(&stp->sd_lock);
   6281 			crfree(crp);
   6282 			return (error);
   6283 		}
   6284 
   6285 		goto waitioc;
   6286 
   6287 	case M_COPYOUT:
   6288 		/*
   6289 		 * Driver or module has ioctl data for a user.
   6290 		 */
   6291 		reqp = (struct copyreq *)bp->b_rptr;
   6292 		ASSERT(bp->b_cont != NULL);
   6293 
   6294 		/*
   6295 		 * Always (transparent or non-transparent )
   6296 		 * use the address specified in the request
   6297 		 */
   6298 		taddr = reqp->cq_addr;
   6299 		if (!transparent)
   6300 			len = (int)reqp->cq_size;
   6301 
   6302 		/* copyout data to the provided address */
   6303 		error = getiocd(bp, taddr, copyflag);
   6304 
   6305 		freemsg(bp->b_cont);
   6306 		bp->b_cont = NULL;
   6307 
   6308 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
   6309 		bp->b_datap->db_type = M_IOCDATA;
   6310 
   6311 		mblk_setcred(bp, crp, curproc->p_pid);
   6312 		resp = (struct copyresp *)bp->b_rptr;
   6313 		resp->cp_rval = (caddr_t)(uintptr_t)error;
   6314 		resp->cp_flag = (fflags & FMODELS);
   6315 
   6316 		stream_willservice(stp);
   6317 		putnext(stp->sd_wrq, bp);
   6318 		stream_runservice(stp);
   6319 
   6320 		if (error) {
   6321 			mutex_enter(&stp->sd_lock);
   6322 			stp->sd_flag &= ~waitflags;
   6323 			cv_broadcast(&stp->sd_iocmonitor);
   6324 			mutex_exit(&stp->sd_lock);
   6325 			crfree(crp);
   6326 			return (error);
   6327 		}
   6328 		goto waitioc;
   6329 
   6330 	default:
   6331 		ASSERT(0);
   6332 		mutex_enter(&stp->sd_lock);
   6333 		stp->sd_flag &= ~waitflags;
   6334 		cv_broadcast(&stp->sd_iocmonitor);
   6335 		mutex_exit(&stp->sd_lock);
   6336 		break;
   6337 	}
   6338 
   6339 	freemsg(bp);
   6340 	crfree(crp);
   6341 	return (error);
   6342 }
   6343 
   6344 /*
   6345  * Send an M_CMD message downstream and wait for a reply.  This is a ptools
   6346  * special used to retrieve information from modules/drivers a stream without
   6347  * being subjected to flow control or interfering with pending messages on the
   6348  * stream (e.g. an ioctl in flight).
   6349  */
   6350 int
   6351 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp)
   6352 {
   6353 	mblk_t *mp;
   6354 	struct cmdblk *cmdp;
   6355 	int error = 0;
   6356 	int errs = STRHUP|STRDERR|STWRERR|STPLEX;
   6357 	clock_t rval, timeout = STRTIMOUT;
   6358 
   6359 	if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) ||
   6360 	    scp->sc_timeout < -1)
   6361 		return (EINVAL);
   6362 
   6363 	if (scp->sc_timeout > 0)
   6364 		timeout = scp->sc_timeout * MILLISEC;
   6365 
   6366 	if ((mp = allocb_cred(sizeof (struct cmdblk), crp,
   6367 	    curproc->p_pid)) == NULL)
   6368 		return (ENOMEM);
   6369 
   6370 	crhold(crp);
   6371 
   6372 	cmdp = (struct cmdblk *)mp->b_wptr;
   6373 	cmdp->cb_cr = crp;
   6374 	cmdp->cb_cmd = scp->sc_cmd;
   6375 	cmdp->cb_len = scp->sc_len;
   6376 	cmdp->cb_error = 0;
   6377 	mp->b_wptr += sizeof (struct cmdblk);
   6378 
   6379 	DB_TYPE(mp) = M_CMD;
   6380 	DB_CPID(mp) = curproc->p_pid;
   6381 
   6382 	/*
   6383 	 * Copy in the payload.
   6384 	 */
   6385 	if (cmdp->cb_len > 0) {
   6386 		mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp,
   6387 		    curproc->p_pid);
   6388 		if (mp->b_cont == NULL) {
   6389 			error = ENOMEM;
   6390 			goto out;
   6391 		}
   6392 
   6393 		/* cb_len comes from sc_len, which has already been checked */
   6394 		ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf));
   6395 		(void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len);
   6396 		mp->b_cont->b_wptr += cmdp->cb_len;
   6397 		DB_CPID(mp->b_cont) = curproc->p_pid;
   6398 	}
   6399 
   6400 	/*
   6401 	 * Since this mechanism is strictly for ptools, and since only one
   6402 	 * process can be grabbed at a time, we simply fail if there's
   6403 	 * currently an operation pending.
   6404 	 */
   6405 	mutex_enter(&stp->sd_lock);
   6406 	if (stp->sd_flag & STRCMDWAIT) {
   6407 		mutex_exit(&stp->sd_lock);
   6408 		error = EBUSY;
   6409 		goto out;
   6410 	}
   6411 	stp->sd_flag |= STRCMDWAIT;
   6412 	ASSERT(stp->sd_cmdblk == NULL);
   6413 	mutex_exit(&stp->sd_lock);
   6414 
   6415 	putnext(stp->sd_wrq, mp);
   6416 	mp = NULL;
   6417 
   6418 	/*
   6419 	 * Timed wait for acknowledgment.  If the reply has already arrived,
   6420 	 * don't sleep.  If awakened from the sleep, fail only if the reply
   6421 	 * has not arrived by then.  Otherwise, process the reply.
   6422 	 */
   6423 	mutex_enter(&stp->sd_lock);
   6424 	while (stp->sd_cmdblk == NULL) {
   6425 		if (stp->sd_flag & errs) {
   6426 			if ((error = strgeterr(stp, errs, 0)) != 0)
   6427 				goto waitout;
   6428 		}
   6429 
   6430 		rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0);
   6431 		if (stp->sd_cmdblk != NULL)
   6432 			break;
   6433 
   6434 		if (rval <= 0) {
   6435 			error = (rval == 0) ? EINTR : ETIME;
   6436 			goto waitout;
   6437 		}
   6438 	}
   6439 
   6440 	/*
   6441 	 * We received a reply.
   6442 	 */
   6443 	mp = stp->sd_cmdblk;
   6444 	stp->sd_cmdblk = NULL;
   6445 	ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD);
   6446 	ASSERT(stp->sd_flag & STRCMDWAIT);
   6447 	stp->sd_flag &= ~STRCMDWAIT;
   6448 	mutex_exit(&stp->sd_lock);
   6449 
   6450 	cmdp = (struct cmdblk *)mp->b_rptr;
   6451 	if ((error = cmdp->cb_error) != 0)
   6452 		goto out;
   6453 
   6454 	/*
   6455 	 * Data may have been returned in the reply (cb_len > 0).
   6456 	 * If so, copy it out to the user's buffer.
   6457 	 */
   6458 	if (cmdp->cb_len > 0) {
   6459 		if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) {
   6460 			error = EPROTO;
   6461 			goto out;
   6462 		}
   6463 
   6464 		cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf));
   6465 		(void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len);
   6466 	}
   6467 	scp->sc_len = cmdp->cb_len;
   6468 out:
   6469 	freemsg(mp);
   6470 	crfree(crp);
   6471 	return (error);
   6472 waitout:
   6473 	ASSERT(stp->sd_cmdblk == NULL);
   6474 	stp->sd_flag &= ~STRCMDWAIT;
   6475 	mutex_exit(&stp->sd_lock);
   6476 	crfree(crp);
   6477 	return (error);
   6478 }
   6479 
   6480 /*
   6481  * For the SunOS keyboard driver.
   6482  * Return the next available "ioctl" sequence number.
   6483  * Exported, so that streams modules can send "ioctl" messages
   6484  * downstream from their open routine.
   6485  */
   6486 int
   6487 getiocseqno(void)
   6488 {
   6489 	int	i;
   6490 
   6491 	mutex_enter(&strresources);
   6492 	i = ++ioc_id;
   6493 	mutex_exit(&strresources);
   6494 	return (i);
   6495 }
   6496 
   6497 /*
   6498  * Get the next message from the read queue.  If the message is
   6499  * priority, STRPRI will have been set by strrput().  This flag
   6500  * should be reset only when the entire message at the front of the
   6501  * queue as been consumed.
   6502  *
   6503  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
   6504  */
   6505 int
   6506 strgetmsg(
   6507 	struct vnode *vp,
   6508 	struct strbuf *mctl,
   6509 	struct strbuf *mdata,
   6510 	unsigned char *prip,
   6511 	int *flagsp,
   6512 	int fmode,
   6513 	rval_t *rvp)
   6514 {
   6515 	struct stdata *stp;
   6516 	mblk_t *bp, *nbp;
   6517 	mblk_t *savemp = NULL;
   6518 	mblk_t *savemptail = NULL;
   6519 	uint_t old_sd_flag;
   6520 	int flg;
   6521 	int more = 0;
   6522 	int error = 0;
   6523 	char first = 1;
   6524 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
   6525 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
   6526 	unsigned char pri = 0;
   6527 	queue_t *q;
   6528 	int	pr = 0;			/* Partial read successful */
   6529 	struct uio uios;
   6530 	struct uio *uiop = &uios;
   6531 	struct iovec iovs;
   6532 	unsigned char type;
   6533 
   6534 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
   6535 	    "strgetmsg:%p", vp);
   6536 
   6537 	ASSERT(vp->v_stream);
   6538 	stp = vp->v_stream;
   6539 	rvp->r_val1 = 0;
   6540 
   6541 	mutex_enter(&stp->sd_lock);
   6542 
   6543 	if ((error = i_straccess(stp, JCREAD)) != 0) {
   6544 		mutex_exit(&stp->sd_lock);
   6545 		return (error);
   6546 	}
   6547 
   6548 	if (stp->sd_flag & (STRDERR|STPLEX)) {
   6549 		error = strgeterr(stp, STRDERR|STPLEX, 0);
   6550 		if (error != 0) {
   6551 			mutex_exit(&stp->sd_lock);
   6552 			return (error);
   6553 		}
   6554 	}
   6555 	mutex_exit(&stp->sd_lock);
   6556 
   6557 	switch (*flagsp) {
   6558 	case MSG_HIPRI:
   6559 		if (*prip != 0)
   6560 			return (EINVAL);
   6561 		break;
   6562 
   6563 	case MSG_ANY:
   6564 	case MSG_BAND:
   6565 		break;
   6566 
   6567 	default:
   6568 		return (EINVAL);
   6569 	}
   6570 	/*
   6571 	 * Setup uio and iov for data part
   6572 	 */
   6573 	iovs.iov_base = mdata->buf;
   6574 	iovs.iov_len = mdata->maxlen;
   6575 	uios.uio_iov = &iovs;
   6576 	uios.uio_iovcnt = 1;
   6577 	uios.uio_loffset = 0;
   6578 	uios.uio_segflg = UIO_USERSPACE;
   6579 	uios.uio_fmode = 0;
   6580 	uios.uio_extflg = UIO_COPY_CACHED;
   6581 	uios.uio_resid = mdata->maxlen;
   6582 	uios.uio_offset = 0;
   6583 
   6584 	q = _RD(stp->sd_wrq);
   6585 	mutex_enter(&stp->sd_lock);
   6586 	old_sd_flag = stp->sd_flag;
   6587 	mark = 0;
   6588 	for (;;) {
   6589 		int done = 0;
   6590 		mblk_t *q_first = q->q_first;
   6591 
   6592 		/*
   6593 		 * Get the next message of appropriate priority
   6594 		 * from the stream head.  If the caller is interested
   6595 		 * in band or hipri messages, then they should already
   6596 		 * be enqueued at the stream head.  On the other hand
   6597 		 * if the caller wants normal (band 0) messages, they
   6598 		 * might be deferred in a synchronous stream and they
   6599 		 * will need to be pulled up.
   6600 		 *
   6601 		 * After we have dequeued a message, we might find that
   6602 		 * it was a deferred M_SIG that was enqueued at the
   6603 		 * stream head.  It must now be posted as part of the
   6604 		 * read by calling strsignal_nolock().
   6605 		 *
   6606 		 * Also note that strrput does not enqueue an M_PCSIG,
   6607 		 * and there cannot be more than one hipri message,
   6608 		 * so there was no need to have the M_PCSIG case.
   6609 		 *
   6610 		 * At some time it might be nice to try and wrap the
   6611 		 * functionality of kstrgetmsg() and strgetmsg() into
   6612 		 * a common routine so to reduce the amount of replicated
   6613 		 * code (since they are extremely similar).
   6614 		 */
   6615 		if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
   6616 			/* Asking for normal, band0 data */
   6617 			bp = strget(stp, q, uiop, first, &error);
   6618 			ASSERT(MUTEX_HELD(&stp->sd_lock));
   6619 			if (bp != NULL) {
   6620 				if (DB_TYPE(bp) == M_SIG) {
   6621 					strsignal_nolock(stp, *bp->b_rptr,
   6622 					    bp->b_band);
   6623 					freemsg(bp);
   6624 					continue;
   6625 				} else {
   6626 					break;
   6627 				}
   6628 			}
   6629 			if (error != 0)
   6630 				goto getmout;
   6631 
   6632 		/*
   6633 		 * We can't depend on the value of STRPRI here because
   6634 		 * the stream head may be in transit. Therefore, we
   6635 		 * must look at the type of the first message to
   6636 		 * determine if a high priority messages is waiting
   6637 		 */
   6638 		} else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
   6639 		    DB_TYPE(q_first) >= QPCTL &&
   6640 		    (bp = getq_noenab(q, 0)) != NULL) {
   6641 			/* Asked for HIPRI and got one */
   6642 			ASSERT(DB_TYPE(bp) >= QPCTL);
   6643 			break;
   6644 		} else if ((*flagsp & MSG_BAND) && q_first != NULL &&
   6645 		    ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
   6646 		    (bp = getq_noenab(q, 0)) != NULL) {
   6647 			/*
   6648 			 * Asked for at least band "prip" and got either at
   6649 			 * least that band or a hipri message.
   6650 			 */
   6651 			ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
   6652 			if (DB_TYPE(bp) == M_SIG) {
   6653 				strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
   6654 				freemsg(bp);
   6655 				continue;
   6656 			} else {
   6657 				break;
   6658 			}
   6659 		}
   6660 
   6661 		/* No data. Time to sleep? */
   6662 		qbackenable(q, 0);
   6663 
   6664 		/*
   6665 		 * If STRHUP or STREOF, return 0 length control and data.
   6666 		 * If resid is 0, then a read(fd,buf,0) was done. Do not
   6667 		 * sleep to satisfy this request because by default we have
   6668 		 * zero bytes to return.
   6669 		 */
   6670 		if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
   6671 		    mdata->maxlen == 0)) {
   6672 			mctl->len = mdata->len = 0;
   6673 			*flagsp = 0;
   6674 			mutex_exit(&stp->sd_lock);
   6675 			return (0);
   6676 		}
   6677 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
   6678 		    "strgetmsg calls strwaitq:%p, %p",
   6679 		    vp, uiop);
   6680 		if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
   6681 		    &done)) != 0) || done) {
   6682 			TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
   6683 			    "strgetmsg error or done:%p, %p",
   6684 			    vp, uiop);
   6685 			mutex_exit(&stp->sd_lock);
   6686 			return (error);
   6687 		}
   6688 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
   6689 		    "strgetmsg awakes:%p, %p", vp, uiop);
   6690 		if ((error = i_straccess(stp, JCREAD)) != 0) {
   6691 			mutex_exit(&stp->sd_lock);
   6692 			return (error);
   6693 		}
   6694 		first = 0;
   6695 	}
   6696 	ASSERT(bp != NULL);
   6697 	/*
   6698 	 * Extract any mark information. If the message is not completely
   6699 	 * consumed this information will be put in the mblk
   6700 	 * that is putback.
   6701 	 * If MSGMARKNEXT is set and the message is completely consumed
   6702 	 * the STRATMARK flag will be set below. Likewise, if
   6703 	 * MSGNOTMARKNEXT is set and the message is
   6704 	 * completely consumed STRNOTATMARK will be set.
   6705 	 */
   6706 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
   6707 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
   6708 	    (MSGMARKNEXT|MSGNOTMARKNEXT));
   6709 	if (mark != 0 && bp == stp->sd_mark) {
   6710 		mark |= _LASTMARK;
   6711 		stp->sd_mark = NULL;
   6712 	}
   6713 	/*
   6714 	 * keep track of the original message type and priority
   6715 	 */
   6716 	pri = bp->b_band;
   6717 	type = bp->b_datap->db_type;
   6718 	if (type == M_PASSFP) {
   6719 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
   6720 			stp->sd_mark = bp;
   6721 		bp->b_flag |= mark & ~_LASTMARK;
   6722 		putback(stp, q, bp, pri);
   6723 		qbackenable(q, pri);
   6724 		mutex_exit(&stp->sd_lock);
   6725 		return (EBADMSG);
   6726 	}
   6727 	ASSERT(type != M_SIG);
   6728 
   6729 	/*
   6730 	 * Set this flag so strrput will not generate signals. Need to
   6731 	 * make sure this flag is cleared before leaving this routine
   6732 	 * else signals will stop being sent.
   6733 	 */
   6734 	stp->sd_flag |= STRGETINPROG;
   6735 	mutex_exit(&stp->sd_lock);
   6736 
   6737 	if (STREAM_NEEDSERVICE(stp))
   6738 		stream_runservice(stp);
   6739 
   6740 	/*
   6741 	 * Set HIPRI flag if message is priority.
   6742 	 */
   6743 	if (type >= QPCTL)
   6744 		flg = MSG_HIPRI;
   6745 	else
   6746 		flg = MSG_BAND;
   6747 
   6748 	/*
   6749 	 * First process PROTO or PCPROTO blocks, if any.
   6750 	 */
   6751 	if (mctl->maxlen >= 0 && type != M_DATA) {
   6752 		size_t	n, bcnt;
   6753 		char	*ubuf;
   6754 
   6755 		bcnt = mctl->maxlen;
   6756 		ubuf = mctl->buf;
   6757 		while (bp != NULL && bp->b_datap->db_type != M_DATA) {
   6758 			if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
   6759 			    copyout(bp->b_rptr, ubuf, n)) {
   6760 				error = EFAULT;
   6761 				mutex_enter(&stp->sd_lock);
   6762 				/*
   6763 				 * clear stream head pri flag based on
   6764 				 * first message type
   6765 				 */
   6766 				if (type >= QPCTL) {
   6767 					ASSERT(type == M_PCPROTO);
   6768 					stp->sd_flag &= ~STRPRI;
   6769 				}
   6770 				more = 0;
   6771 				freemsg(bp);
   6772 				goto getmout;
   6773 			}
   6774 			ubuf += n;
   6775 			bp->b_rptr += n;
   6776 			if (bp->b_rptr >= bp->b_wptr) {
   6777 				nbp = bp;
   6778 				bp = bp->b_cont;
   6779 				freeb(nbp);
   6780 			}
   6781 			ASSERT(n <= bcnt);
   6782 			bcnt -= n;
   6783 			if (bcnt == 0)
   6784 				break;
   6785 		}
   6786 		mctl->len = mctl->maxlen - bcnt;
   6787 	} else
   6788 		mctl->len = -1;
   6789 
   6790 	if (bp && bp->b_datap->db_type != M_DATA) {
   6791 		/*
   6792 		 * More PROTO blocks in msg.
   6793 		 */
   6794 		more |= MORECTL;
   6795 		savemp = bp;
   6796 		while (bp && bp->b_datap->db_type != M_DATA) {
   6797 			savemptail = bp;
   6798 			bp = bp->b_cont;
   6799 		}
   6800 		savemptail->b_cont = NULL;
   6801 	}
   6802 
   6803 	/*
   6804 	 * Now process DATA blocks, if any.
   6805 	 */
   6806 	if (mdata->maxlen >= 0 && bp) {
   6807 		/*
   6808 		 * struiocopyout will consume a potential zero-length
   6809 		 * M_DATA even if uio_resid is zero.
   6810 		 */
   6811 		size_t oldresid = uiop->uio_resid;
   6812 
   6813 		bp = struiocopyout(bp, uiop, &error);
   6814 		if (error != 0) {
   6815 			mutex_enter(&stp->sd_lock);
   6816 			/*
   6817 			 * clear stream head hi pri flag based on
   6818 			 * first message
   6819 			 */
   6820 			if (type >= QPCTL) {
   6821 				ASSERT(type == M_PCPROTO);
   6822 				stp->sd_flag &= ~STRPRI;
   6823 			}
   6824 			more = 0;
   6825 			freemsg(savemp);
   6826 			goto getmout;
   6827 		}
   6828 		/*
   6829 		 * (pr == 1) indicates a partial read.
   6830 		 */
   6831 		if (oldresid > uiop->uio_resid)
   6832 			pr = 1;
   6833 		mdata->len = mdata->maxlen - uiop->uio_resid;
   6834 	} else
   6835 		mdata->len = -1;
   6836 
   6837 	if (bp) {			/* more data blocks in msg */
   6838 		more |= MOREDATA;
   6839 		if (savemp)
   6840 			savemptail->b_cont = bp;
   6841 		else
   6842 			savemp = bp;
   6843 	}
   6844 
   6845 	mutex_enter(&stp->sd_lock);
   6846 	if (savemp) {
   6847 		if (pr && (savemp->b_datap->db_type == M_DATA) &&
   6848 		    msgnodata(savemp)) {
   6849 			/*
   6850 			 * Avoid queuing a zero-length tail part of
   6851 			 * a message. pr=1 indicates that we read some of
   6852 			 * the message.
   6853 			 */
   6854 			freemsg(savemp);
   6855 			more &= ~MOREDATA;
   6856 			/*
   6857 			 * clear stream head hi pri flag based on
   6858 			 * first message
   6859 			 */
   6860 			if (type >= QPCTL) {
   6861 				ASSERT(type == M_PCPROTO);
   6862 				stp->sd_flag &= ~STRPRI;
   6863 			}
   6864 		} else {
   6865 			savemp->b_band = pri;
   6866 			/*
   6867 			 * If the first message was HIPRI and the one we're
   6868 			 * putting back isn't, then clear STRPRI, otherwise
   6869 			 * set STRPRI again.  Note that we must set STRPRI
   6870 			 * again since the flush logic in strrput_nondata()
   6871 			 * may have cleared it while we had sd_lock dropped.
   6872 			 */
   6873 			if (type >= QPCTL) {
   6874 				ASSERT(type == M_PCPROTO);
   6875 				if (queclass(savemp) < QPCTL)
   6876 					stp->sd_flag &= ~STRPRI;
   6877 				else
   6878 					stp->sd_flag |= STRPRI;
   6879 			} else if (queclass(savemp) >= QPCTL) {
   6880 				/*
   6881 				 * The first message was not a HIPRI message,
   6882 				 * but the one we are about to putback is.
   6883 				 * For simplicitly, we do not allow for HIPRI
   6884 				 * messages to be embedded in the message
   6885 				 * body, so just force it to same type as
   6886 				 * first message.
   6887 				 */
   6888 				ASSERT(type == M_DATA || type == M_PROTO);
   6889 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
   6890 				savemp->b_datap->db_type = type;
   6891 			}
   6892 			if (mark != 0) {
   6893 				savemp->b_flag |= mark & ~_LASTMARK;
   6894 				if ((mark & _LASTMARK) &&
   6895 				    (stp->sd_mark == NULL)) {
   6896 					/*
   6897 					 * If another marked message arrived
   6898 					 * while sd_lock was not held sd_mark
   6899 					 * would be non-NULL.
   6900 					 */
   6901 					stp->sd_mark = savemp;
   6902 				}
   6903 			}
   6904 			putback(stp, q, savemp, pri);
   6905 		}
   6906 	} else {
   6907 		/*
   6908 		 * The complete message was consumed.
   6909 		 *
   6910 		 * If another M_PCPROTO arrived while sd_lock was not held
   6911 		 * it would have been discarded since STRPRI was still set.
   6912 		 *
   6913 		 * Move the MSG*MARKNEXT information
   6914 		 * to the stream head just in case
   6915 		 * the read queue becomes empty.
   6916 		 * clear stream head hi pri flag based on
   6917 		 * first message
   6918 		 *
   6919 		 * If the stream head was at the mark
   6920 		 * (STRATMARK) before we dropped sd_lock above
   6921 		 * and some data was consumed then we have
   6922 		 * moved past the mark thus STRATMARK is
   6923 		 * cleared. However, if a message arrived in
   6924 		 * strrput during the copyout above causing
   6925 		 * STRATMARK to be set we can not clear that
   6926 		 * flag.
   6927 		 */
   6928 		if (type >= QPCTL) {
   6929 			ASSERT(type == M_PCPROTO);
   6930 			stp->sd_flag &= ~STRPRI;
   6931 		}
   6932 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
   6933 			if (mark & MSGMARKNEXT) {
   6934 				stp->sd_flag &= ~STRNOTATMARK;
   6935 				stp->sd_flag |= STRATMARK;
   6936 			} else if (mark & MSGNOTMARKNEXT) {
   6937 				stp->sd_flag &= ~STRATMARK;
   6938 				stp->sd_flag |= STRNOTATMARK;
   6939 			} else {
   6940 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
   6941 			}
   6942 		} else if (pr && (old_sd_flag & STRATMARK)) {
   6943 			stp->sd_flag &= ~STRATMARK;
   6944 		}
   6945 	}
   6946 
   6947 	*flagsp = flg;
   6948 	*prip = pri;
   6949 
   6950 	/*
   6951 	 * Getmsg cleanup processing - if the state of the queue has changed
   6952 	 * some signals may need to be sent and/or poll awakened.
   6953 	 */
   6954 getmout:
   6955 	qbackenable(q, pri);
   6956 
   6957 	/*
   6958 	 * We dropped the stream head lock above. Send all M_SIG messages
   6959 	 * before processing stream head for SIGPOLL messages.
   6960 	 */
   6961 	ASSERT(MUTEX_HELD(&stp->sd_lock));
   6962 	while ((bp = q->q_first) != NULL &&
   6963 	    (bp->b_datap->db_type == M_SIG)) {
   6964 		/*
   6965 		 * sd_lock is held so the content of the read queue can not
   6966 		 * change.
   6967 		 */
   6968 		bp = getq(q);
   6969 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
   6970 
   6971 		strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
   6972 		mutex_exit(&stp->sd_lock);
   6973 		freemsg(bp);
   6974 		if (STREAM_NEEDSERVICE(stp))
   6975 			stream_runservice(stp);
   6976 		mutex_enter(&stp->sd_lock);
   6977 	}
   6978 
   6979 	/*
   6980 	 * stream head cannot change while we make the determination
   6981 	 * whether or not to send a signal. Drop the flag to allow strrput
   6982 	 * to send firstmsgsigs again.
   6983 	 */
   6984 	stp->sd_flag &= ~STRGETINPROG;
   6985 
   6986 	/*
   6987 	 * If the type of message at the front of the queue changed
   6988 	 * due to the receive the appropriate signals and pollwakeup events
   6989 	 * are generated. The type of changes are:
   6990 	 *	Processed a hipri message, q_first is not hipri.
   6991 	 *	Processed a band X message, and q_first is band Y.
   6992 	 * The generated signals and pollwakeups are identical to what
   6993 	 * strrput() generates should the message that is now on q_first
   6994 	 * arrive to an empty read queue.
   6995 	 *
   6996 	 * Note: only strrput will send a signal for a hipri message.
   6997 	 */
   6998 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
   6999 		strsigset_t signals = 0;
   7000 		strpollset_t pollwakeups = 0;
   7001 
   7002 		if (flg & MSG_HIPRI) {
   7003 			/*
   7004 			 * Removed a hipri message. Regular data at
   7005 			 * the front of  the queue.
   7006 			 */
   7007 			if (bp->b_band == 0) {
   7008 				signals = S_INPUT | S_RDNORM;
   7009 				pollwakeups = POLLIN | POLLRDNORM;
   7010 			} else {
   7011 				signals = S_INPUT | S_RDBAND;
   7012 				pollwakeups = POLLIN | POLLRDBAND;
   7013 			}
   7014 		} else if (pri != bp->b_band) {
   7015 			/*
   7016 			 * The band is different for the new q_first.
   7017 			 */
   7018 			if (bp->b_band == 0) {
   7019 				signals = S_RDNORM;
   7020 				pollwakeups = POLLIN | POLLRDNORM;
   7021 			} else {
   7022 				signals = S_RDBAND;
   7023 				pollwakeups = POLLIN | POLLRDBAND;
   7024 			}
   7025 		}
   7026 
   7027 		if (pollwakeups != 0) {
   7028 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
   7029 				if (!(stp->sd_rput_opt & SR_POLLIN))
   7030 					goto no_pollwake;
   7031 				stp->sd_rput_opt &= ~SR_POLLIN;
   7032 			}
   7033 			mutex_exit(&stp->sd_lock);
   7034 			pollwakeup(&stp->sd_pollist, pollwakeups);
   7035 			mutex_enter(&stp->sd_lock);
   7036 		}
   7037 no_pollwake:
   7038 
   7039 		if (stp->sd_sigflags & signals)
   7040 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
   7041 	}
   7042 	mutex_exit(&stp->sd_lock);
   7043 
   7044 	rvp->r_val1 = more;
   7045 	return (error);
   7046 #undef	_LASTMARK
   7047 }
   7048 
   7049 /*
   7050  * Get the next message from the read queue.  If the message is
   7051  * priority, STRPRI will have been set by strrput().  This flag
   7052  * should be reset only when the entire message at the front of the
   7053  * queue as been consumed.
   7054  *
   7055  * If uiop is NULL all data is returned in mctlp.
   7056  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
   7057  * not enabled.
   7058  * The timeout parameter is in milliseconds; -1 for infinity.
   7059  * This routine handles the consolidation private flags:
   7060  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
   7061  *	MSG_DELAYERROR	Defer the error check until the queue is empty.
   7062  *	MSG_HOLDSIG	Hold signals while waiting for data.
   7063  *	MSG_IPEEK	Only peek at messages.
   7064  *	MSG_DISCARDTAIL	Discard the tail M_DATA part of the message
   7065  *			that doesn't fit.
   7066  *	MSG_NOMARK	If the message is marked leave it on the queue.
   7067  *
   7068  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
   7069  */
   7070 int
   7071 kstrgetmsg(
   7072 	struct vnode *vp,
   7073 	mblk_t **mctlp,
   7074 	struct uio *uiop,
   7075 	unsigned char *prip,
   7076 	int *flagsp,
   7077 	clock_t timout,
   7078 	rval_t *rvp)
   7079 {
   7080 	struct stdata *stp;
   7081 	mblk_t *bp, *nbp;
   7082 	mblk_t *savemp = NULL;
   7083 	mblk_t *savemptail = NULL;
   7084 	int flags;
   7085 	uint_t old_sd_flag;
   7086 	int flg;
   7087 	int more = 0;
   7088 	int error = 0;
   7089 	char first = 1;
   7090 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
   7091 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
   7092 	unsigned char pri = 0;
   7093 	queue_t *q;
   7094 	int	pr = 0;			/* Partial read successful */
   7095 	unsigned char type;
   7096 
   7097 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
   7098 	    "kstrgetmsg:%p", vp);
   7099 
   7100 	ASSERT(vp->v_stream);
   7101 	stp = vp->v_stream;
   7102 	rvp->r_val1 = 0;
   7103 
   7104 	mutex_enter(&stp->sd_lock);
   7105 
   7106 	if ((error = i_straccess(stp, JCREAD)) != 0) {
   7107 		mutex_exit(&stp->sd_lock);
   7108 		return (error);
   7109 	}
   7110 
   7111 	flags = *flagsp;
   7112 	if (stp->sd_flag & (STRDERR|STPLEX)) {
   7113 		if ((stp->sd_flag & STPLEX) ||
   7114 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
   7115 			error = strgeterr(stp, STRDERR|STPLEX,
   7116 			    (flags & MSG_IPEEK));
   7117 			if (error != 0) {
   7118 				mutex_exit(&stp->sd_lock);
   7119 				return (error);
   7120 			}
   7121 		}
   7122 	}
   7123 	mutex_exit(&stp->sd_lock);
   7124 
   7125 	switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
   7126 	case MSG_HIPRI:
   7127 		if (*prip != 0)
   7128 			return (EINVAL);
   7129 		break;
   7130 
   7131 	case MSG_ANY:
   7132 	case MSG_BAND:
   7133 		break;
   7134 
   7135 	default:
   7136 		return (EINVAL);
   7137 	}
   7138 
   7139 retry:
   7140 	q = _RD(stp->sd_wrq);
   7141 	mutex_enter(&stp->sd_lock);
   7142 	old_sd_flag = stp->sd_flag;
   7143 	mark = 0;
   7144 	for (;;) {
   7145 		int done = 0;
   7146 		int waitflag;
   7147 		int fmode;
   7148 		mblk_t *q_first = q->q_first;
   7149 
   7150 		/*
   7151 		 * This section of the code operates just like the code
   7152 		 * in strgetmsg().  There is a comment there about what
   7153 		 * is going on here.
   7154 		 */
   7155 		if (!(flags & (MSG_HIPRI|MSG_BAND))) {
   7156 			/* Asking for normal, band0 data */
   7157 			bp = strget(stp, q, uiop, first, &error);
   7158 			ASSERT(MUTEX_HELD(&stp->sd_lock));
   7159 			if (bp != NULL) {
   7160 				if (DB_TYPE(bp) == M_SIG) {
   7161 					strsignal_nolock(stp, *bp->b_rptr,
   7162 					    bp->b_band);
   7163 					freemsg(bp);
   7164 					continue;
   7165 				} else {
   7166 					break;
   7167 				}
   7168 			}
   7169 			if (error != 0) {
   7170 				goto getmout;
   7171 			}
   7172 		/*
   7173 		 * We can't depend on the value of STRPRI here because
   7174 		 * the stream head may be in transit. Therefore, we
   7175 		 * must look at the type of the first message to
   7176 		 * determine if a high priority messages is waiting
   7177 		 */
   7178 		} else if ((flags & MSG_HIPRI) && q_first != NULL &&
   7179 		    DB_TYPE(q_first) >= QPCTL &&
   7180 		    (bp = getq_noenab(q, 0)) != NULL) {
   7181 			ASSERT(DB_TYPE(bp) >= QPCTL);
   7182 			break;
   7183 		} else if ((flags & MSG_BAND) && q_first != NULL &&
   7184 		    ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
   7185 		    (bp = getq_noenab(q, 0)) != NULL) {
   7186 			/*
   7187 			 * Asked for at least band "prip" and got either at
   7188 			 * least that band or a hipri message.
   7189 			 */
   7190 			ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
   7191 			if (DB_TYPE(bp) == M_SIG) {
   7192 				strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
   7193 				freemsg(bp);
   7194 				continue;
   7195 			} else {
   7196 				break;
   7197 			}
   7198 		}
   7199 
   7200 		/* No data. Time to sleep? */
   7201 		qbackenable(q, 0);
   7202 
   7203 		/*
   7204 		 * Delayed error notification?
   7205 		 */
   7206 		if ((stp->sd_flag & (STRDERR|STPLEX)) &&
   7207 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
   7208 			error = strgeterr(stp, STRDERR|STPLEX,
   7209 			    (flags & MSG_IPEEK));
   7210 			if (error != 0) {
   7211 				mutex_exit(&stp->sd_lock);
   7212 				return (error);
   7213 			}
   7214 		}
   7215 
   7216 		/*
   7217 		 * If STRHUP or STREOF, return 0 length control and data.
   7218 		 * If a read(fd,buf,0) has been done, do not sleep, just
   7219 		 * return.
   7220 		 *
   7221 		 * If mctlp == NULL and uiop == NULL, then the code will
   7222 		 * do the strwaitq. This is an understood way of saying
   7223 		 * sleep "polling" until a message is received.
   7224 		 */
   7225 		if ((stp->sd_flag & (STRHUP|STREOF)) ||
   7226 		    (uiop != NULL && uiop->uio_resid == 0)) {
   7227 			if (mctlp != NULL)
   7228 				*mctlp = NULL;
   7229 			*flagsp = 0;
   7230 			mutex_exit(&stp->sd_lock);
   7231 			return (0);
   7232 		}
   7233 
   7234 		waitflag = GETWAIT;
   7235 		if (flags &
   7236 		    (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
   7237 			if (flags & MSG_HOLDSIG)
   7238 				waitflag |= STR_NOSIG;
   7239 			if (flags & MSG_IGNERROR)
   7240 				waitflag |= STR_NOERROR;
   7241 			if (flags & MSG_IPEEK)
   7242 				waitflag |= STR_PEEK;
   7243 			if (flags & MSG_DELAYERROR)
   7244 				waitflag |= STR_DELAYERR;
   7245 		}
   7246 		if (uiop != NULL)
   7247 			fmode = uiop->uio_fmode;
   7248 		else
   7249 			fmode = 0;
   7250 
   7251 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
   7252 		    "kstrgetmsg calls strwaitq:%p, %p",
   7253 		    vp, uiop);
   7254 		if (((error = strwaitq(stp, waitflag, (ssize_t)0,
   7255 		    fmode, timout, &done))) != 0 || done) {
   7256 			TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
   7257 			    "kstrgetmsg error or done:%p, %p",
   7258 			    vp, uiop);
   7259 			mutex_exit(&stp->sd_lock);
   7260 			return (error);
   7261 		}
   7262 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
   7263 		    "kstrgetmsg awakes:%p, %p", vp, uiop);
   7264 		if ((error = i_straccess(stp, JCREAD)) != 0) {
   7265 			mutex_exit(&stp->sd_lock);
   7266 			return (error);
   7267 		}
   7268 		first = 0;
   7269 	}
   7270 	ASSERT(bp != NULL);
   7271 	/*
   7272 	 * Extract any mark information. If the message is not completely
   7273 	 * consumed this information will be put in the mblk
   7274 	 * that is putback.
   7275 	 * If MSGMARKNEXT is set and the message is completely consumed
   7276 	 * the STRATMARK flag will be set below. Likewise, if
   7277 	 * MSGNOTMARKNEXT is set and the message is
   7278 	 * completely consumed STRNOTATMARK will be set.
   7279 	 */
   7280 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
   7281 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
   7282 	    (MSGMARKNEXT|MSGNOTMARKNEXT));
   7283 	pri = bp->b_band;
   7284 	if (mark != 0) {
   7285 		/*
   7286 		 * If the caller doesn't want the mark return.
   7287 		 * Used to implement MSG_WAITALL in sockets.
   7288 		 */
   7289 		if (flags & MSG_NOMARK) {
   7290 			putback(stp, q, bp, pri);
   7291 			qbackenable(q, pri);
   7292 			mutex_exit(&stp->sd_lock);
   7293 			return (EWOULDBLOCK);
   7294 		}
   7295 		if (bp == stp->sd_mark) {
   7296 			mark |= _LASTMARK;
   7297 			stp->sd_mark = NULL;
   7298 		}
   7299 	}
   7300 
   7301 	/*
   7302 	 * keep track of the first message type
   7303 	 */
   7304 	type = bp->b_datap->db_type;
   7305 
   7306 	if (bp->b_datap->db_type == M_PASSFP) {
   7307 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
   7308 			stp->sd_mark = bp;
   7309 		bp->b_flag |= mark & ~_LASTMARK;
   7310 		putback(stp, q, bp, pri);
   7311 		qbackenable(q, pri);
   7312 		mutex_exit(&stp->sd_lock);
   7313 		return (EBADMSG);
   7314 	}
   7315 	ASSERT(type != M_SIG);
   7316 
   7317 	if (flags & MSG_IPEEK) {
   7318 		/*
   7319 		 * Clear any struioflag - we do the uiomove over again
   7320 		 * when peeking since it simplifies the code.
   7321 		 *
   7322 		 * Dup the message and put the original back on the queue.
   7323 		 * If dupmsg() fails, try again with copymsg() to see if
   7324 		 * there is indeed a shortage of memory.  dupmsg() may fail
   7325 		 * if db_ref in any of the messages reaches its limit.
   7326 		 */
   7327 
   7328 		if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
   7329 			/*
   7330 			 * Restore the state of the stream head since we
   7331 			 * need to drop sd_lock (strwaitbuf is sleeping).
   7332 			 */
   7333 			size_t size = msgdsize(bp);
   7334 
   7335 			if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
   7336 				stp->sd_mark = bp;
   7337 			bp->b_flag |= mark & ~_LASTMARK;
   7338 			putback(stp, q, bp, pri);
   7339 			mutex_exit(&stp->sd_lock);
   7340 			error = strwaitbuf(size, BPRI_HI);
   7341 			if (error) {
   7342 				/*
   7343 				 * There is no net change to the queue thus
   7344 				 * no need to qbackenable.
   7345 				 */
   7346 				return (error);
   7347 			}
   7348 			goto retry;
   7349 		}
   7350 
   7351 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
   7352 			stp->sd_mark = bp;
   7353 		bp->b_flag |= mark & ~_LASTMARK;
   7354 		putback(stp, q, bp, pri);
   7355 		bp = nbp;
   7356 	}
   7357 
   7358 	/*
   7359 	 * Set this flag so strrput will not generate signals. Need to
   7360 	 * make sure this flag is cleared before leaving this routine
   7361 	 * else signals will stop being sent.
   7362 	 */
   7363 	stp->sd_flag |= STRGETINPROG;
   7364 	mutex_exit(&stp->sd_lock);
   7365 
   7366 	if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) {
   7367 		mblk_t *tmp, *prevmp;
   7368 
   7369 		/*
   7370 		 * Put first non-data mblk back to stream head and
   7371 		 * cut the mblk chain so sd_rputdatafunc only sees
   7372 		 * M_DATA mblks. We can skip the first mblk since it
   7373 		 * is M_DATA according to the condition above.
   7374 		 */
   7375 		for (prevmp = bp, tmp = bp->b_cont; tmp != NULL;
   7376 		    prevmp = tmp, tmp = tmp->b_cont) {
   7377 			if (DB_TYPE(tmp) != M_DATA) {
   7378 				prevmp->b_cont = NULL;
   7379 				mutex_enter(&stp->sd_lock);
   7380 				putback(stp, q, tmp, tmp->b_band);
   7381 				mutex_exit(&stp->sd_lock);
   7382 				break;
   7383 			}
   7384 		}
   7385 
   7386 		bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
   7387 		    NULL, NULL, NULL, NULL);
   7388 
   7389 		if (bp == NULL)
   7390 			goto retry;
   7391 	}
   7392 
   7393 	if (STREAM_NEEDSERVICE(stp))
   7394 		stream_runservice(stp);
   7395 
   7396 	/*
   7397 	 * Set HIPRI flag if message is priority.
   7398 	 */
   7399 	if (type >= QPCTL)
   7400 		flg = MSG_HIPRI;
   7401 	else
   7402 		flg = MSG_BAND;
   7403 
   7404 	/*
   7405 	 * First process PROTO or PCPROTO blocks, if any.
   7406 	 */
   7407 	if (mctlp != NULL && type != M_DATA) {
   7408 		mblk_t *nbp;
   7409 
   7410 		*mctlp = bp;
   7411 		while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
   7412 			bp = bp->b_cont;
   7413 		nbp = bp->b_cont;
   7414 		bp->b_cont = NULL;
   7415 		bp = nbp;
   7416 	}
   7417 
   7418 	if (bp && bp->b_datap->db_type != M_DATA) {
   7419 		/*
   7420 		 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
   7421 		 */
   7422 		more |= MORECTL;
   7423 		savemp = bp;
   7424 		while (bp && bp->b_datap->db_type != M_DATA) {
   7425 			savemptail = bp;
   7426 			bp = bp->b_cont;
   7427 		}
   7428 		savemptail->b_cont = NULL;
   7429 	}
   7430 
   7431 	/*
   7432 	 * Now process DATA blocks, if any.
   7433 	 */
   7434 	if (uiop == NULL) {
   7435 		/* Append data to tail of mctlp */
   7436 
   7437 		if (mctlp != NULL) {
   7438 			mblk_t **mpp = mctlp;
   7439 
   7440 			while (*mpp != NULL)
   7441 				mpp = &((*mpp)->b_cont);
   7442 			*mpp = bp;
   7443 			bp = NULL;
   7444 		}
   7445 	} else if (uiop->uio_resid >= 0 && bp) {
   7446 		size_t oldresid = uiop->uio_resid;
   7447 
   7448 		/*
   7449 		 * If a streams message is likely to consist
   7450 		 * of many small mblks, it is pulled up into
   7451 		 * one continuous chunk of memory.
   7452 		 * The size of the first mblk may be bogus because
   7453 		 * successive read() calls on the socket reduce
   7454 		 * the size of this mblk until it is exhausted
   7455 		 * and then the code walks on to the next. Thus
   7456 		 * the size of the mblk may not be the original size
   7457 		 * that was passed up, it's simply a remainder
   7458 		 * and hence can be very small without any
   7459 		 * implication that the packet is badly fragmented.
   7460 		 * So the size of the possible second mblk is
   7461 		 * used to spot a badly fragmented packet.
   7462 		 * see longer comment at top of page
   7463 		 * by mblk_pull_len declaration.
   7464 		 */
   7465 
   7466 		if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) {
   7467 			(void) pullupmsg(bp, -1);
   7468 		}
   7469 
   7470 		bp = struiocopyout(bp, uiop, &error);
   7471 		if (error != 0) {
   7472 			if (mctlp != NULL) {
   7473 				freemsg(*mctlp);
   7474 				*mctlp = NULL;
   7475 			} else
   7476 				freemsg(savemp);
   7477 			mutex_enter(&stp->sd_lock);
   7478 			/*
   7479 			 * clear stream head hi pri flag based on
   7480 			 * first message
   7481 			 */
   7482 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
   7483 				ASSERT(type == M_PCPROTO);
   7484 				stp->sd_flag &= ~STRPRI;
   7485 			}
   7486 			more = 0;
   7487 			goto getmout;
   7488 		}
   7489 		/*
   7490 		 * (pr == 1) indicates a partial read.
   7491 		 */
   7492 		if (oldresid > uiop->uio_resid)
   7493 			pr = 1;
   7494 	}
   7495 
   7496 	if (bp) {			/* more data blocks in msg */
   7497 		more |= MOREDATA;
   7498 		if (savemp)
   7499 			savemptail->b_cont = bp;
   7500 		else
   7501 			savemp = bp;
   7502 	}
   7503 
   7504 	mutex_enter(&stp->sd_lock);
   7505 	if (savemp) {
   7506 		if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
   7507 			/*
   7508 			 * When MSG_DISCARDTAIL is set or
   7509 			 * when peeking discard any tail. When peeking this
   7510 			 * is the tail of the dup that was copied out - the
   7511 			 * message has already been putback on the queue.
   7512 			 * Return MOREDATA to the caller even though the data
   7513 			 * is discarded. This is used by sockets (to
   7514 			 * set MSG_TRUNC).
   7515 			 */
   7516 			freemsg(savemp);
   7517 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
   7518 				ASSERT(type == M_PCPROTO);