Home | History | Annotate | Download | only in sockfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/inttypes.h>
     29 #include <sys/t_lock.h>
     30 #include <sys/param.h>
     31 #include <sys/systm.h>
     32 #include <sys/buf.h>
     33 #include <sys/conf.h>
     34 #include <sys/cred.h>
     35 #include <sys/kmem.h>
     36 #include <sys/sysmacros.h>
     37 #include <sys/vfs.h>
     38 #include <sys/vnode.h>
     39 #include <sys/debug.h>
     40 #include <sys/errno.h>
     41 #include <sys/time.h>
     42 #include <sys/file.h>
     43 #include <sys/user.h>
     44 #include <sys/stream.h>
     45 #include <sys/strsubr.h>
     46 #include <sys/esunddi.h>
     47 #include <sys/flock.h>
     48 #include <sys/modctl.h>
     49 #include <sys/vtrace.h>
     50 #include <sys/strsun.h>
     51 #include <sys/cmn_err.h>
     52 #include <sys/proc.h>
     53 #include <sys/ddi.h>
     54 
     55 #include <sys/suntpi.h>
     56 #include <sys/socket.h>
     57 #include <sys/sockio.h>
     58 #include <sys/socketvar.h>
     59 #include <netinet/in.h>
     60 #include <inet/common.h>
     61 #include <inet/proto_set.h>
     62 
     63 #include <sys/tiuser.h>
     64 #define	_SUN_TPI_VERSION	2
     65 #include <sys/tihdr.h>
     66 
     67 #include <inet/kssl/ksslapi.h>
     68 
     69 #include <c2/audit.h>
     70 
     71 #include <fs/sockfs/socktpi.h>
     72 #include <fs/sockfs/socktpi_impl.h>
     73 
     74 int so_default_version = SOV_SOCKSTREAM;
     75 
     76 #ifdef DEBUG
     77 /* Set sockdebug to print debug messages when SO_DEBUG is set */
     78 int sockdebug = 0;
     79 
     80 /* Set sockprinterr to print error messages when SO_DEBUG is set */
     81 int sockprinterr = 0;
     82 
     83 /*
     84  * Set so_default_options to SO_DEBUG is all sockets should be created
     85  * with SO_DEBUG set. This is needed to get debug printouts from the
     86  * socket() call itself.
     87  */
     88 int so_default_options = 0;
     89 #endif /* DEBUG */
     90 
     91 #ifdef SOCK_TEST
     92 /*
     93  * Set to number of ticks to limit cv_waits for code coverage testing.
     94  * Set to 1000 when SO_DEBUG is set to 2.
     95  */
     96 clock_t sock_test_timelimit = 0;
     97 #endif /* SOCK_TEST */
     98 
     99 /*
    100  * For concurrency testing of e.g. opening /dev/ip which does not
    101  * handle T_INFO_REQ messages.
    102  */
    103 int so_no_tinfo = 0;
    104 
    105 /*
    106  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
    107  * to simply ignore the T_CAPABILITY_REQ.
    108  */
    109 clock_t	sock_capability_timeout	= 2;	/* seconds */
    110 
    111 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
    112 static void	so_removehooks(struct sonode *so);
    113 
    114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
    115 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
    116 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
    117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
    118 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
    119 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
    120 
    121 /*
    122  * Convert a socket to a stream. Invoked when the illusory sockmod
    123  * is popped from the stream.
    124  * Change the stream head back to default operation without losing
    125  * any messages (T_conn_ind's are moved to the stream head queue).
    126  */
    127 int
    128 so_sock2stream(struct sonode *so)
    129 {
    130 	struct vnode		*vp = SOTOV(so);
    131 	queue_t			*rq;
    132 	mblk_t			*mp;
    133 	int			error = 0;
    134 	sotpi_info_t		*sti = SOTOTPI(so);
    135 
    136 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
    137 
    138 	mutex_enter(&so->so_lock);
    139 	so_lock_single(so);
    140 
    141 	ASSERT(so->so_version != SOV_STREAM);
    142 
    143 	if (sti->sti_direct) {
    144 		mblk_t **mpp;
    145 		int rval;
    146 
    147 		/*
    148 		 * Tell the transport below that sockmod is being popped
    149 		 */
    150 		mutex_exit(&so->so_lock);
    151 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
    152 		    &rval);
    153 		mutex_enter(&so->so_lock);
    154 		if (error != 0) {
    155 			dprintso(so, 0, ("so_sock2stream(%p): "
    156 			    "_SIOCSOCKFALLBACK failed\n", (void *)so));
    157 			goto exit;
    158 		}
    159 		sti->sti_direct = 0;
    160 
    161 		for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL;
    162 		    mpp = &mp->b_next) {
    163 			struct T_conn_ind	*conn_ind;
    164 
    165 			/*
    166 			 * strsock_proto() has already verified the length of
    167 			 * this message block.
    168 			 */
    169 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
    170 
    171 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
    172 			if (conn_ind->OPT_length == 0 &&
    173 			    conn_ind->OPT_offset == 0)
    174 				continue;
    175 
    176 			if (DB_REF(mp) > 1) {
    177 				mblk_t	*newmp;
    178 				size_t	length;
    179 				cred_t	*cr;
    180 				pid_t	cpid;
    181 				int error;	/* Dummy - error not returned */
    182 
    183 				/*
    184 				 * Copy the message block because it is used
    185 				 * elsewhere, too.
    186 				 * Can't use copyb since we want to wait
    187 				 * yet allow for EINTR.
    188 				 */
    189 				/* Round up size for reuse */
    190 				length = MAX(MBLKL(mp), 64);
    191 				cr = msg_getcred(mp, &cpid);
    192 				if (cr != NULL) {
    193 					newmp = allocb_cred_wait(length, 0,
    194 					    &error, cr, cpid);
    195 				} else {
    196 					newmp = allocb_wait(length, 0, 0,
    197 					    &error);
    198 				}
    199 				if (newmp == NULL) {
    200 					error = EINTR;
    201 					goto exit;
    202 				}
    203 				bcopy(mp->b_rptr, newmp->b_wptr, length);
    204 				newmp->b_wptr += length;
    205 				newmp->b_next = mp->b_next;
    206 
    207 				/*
    208 				 * Link the new message block into the queue
    209 				 * and free the old one.
    210 				 */
    211 				*mpp = newmp;
    212 				mp->b_next = NULL;
    213 				freemsg(mp);
    214 
    215 				mp = newmp;
    216 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
    217 			}
    218 
    219 			/*
    220 			 * Remove options added by TCP for accept fast-path.
    221 			 */
    222 			conn_ind->OPT_length = 0;
    223 			conn_ind->OPT_offset = 0;
    224 		}
    225 	}
    226 
    227 	so->so_version = SOV_STREAM;
    228 	so->so_proto_handle = NULL;
    229 
    230 	/*
    231 	 * Remove the hooks in the stream head to avoid queuing more
    232 	 * packets in sockfs.
    233 	 */
    234 	mutex_exit(&so->so_lock);
    235 	so_removehooks(so);
    236 	mutex_enter(&so->so_lock);
    237 
    238 	/*
    239 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
    240 	 * on the queue - the behavior of urgent data after a switch is
    241 	 * left undefined.
    242 	 */
    243 	so->so_error = sti->sti_delayed_error = 0;
    244 	freemsg(so->so_oobmsg);
    245 	so->so_oobmsg = NULL;
    246 	sti->sti_oobsigcnt = sti->sti_oobcnt = 0;
    247 
    248 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
    249 	    SS_SAVEDEOR);
    250 	ASSERT(so_verify_oobstate(so));
    251 
    252 	freemsg(sti->sti_ack_mp);
    253 	sti->sti_ack_mp = NULL;
    254 
    255 	/*
    256 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
    257 	 */
    258 	so_flush_discon_ind(so);
    259 
    260 	/*
    261 	 * Move any queued T_CONN_IND messages to stream head queue.
    262 	 */
    263 	rq = RD(strvp2wq(vp));
    264 	while ((mp = sti->sti_conn_ind_head) != NULL) {
    265 		sti->sti_conn_ind_head = mp->b_next;
    266 		mp->b_next = NULL;
    267 		if (sti->sti_conn_ind_head == NULL) {
    268 			ASSERT(sti->sti_conn_ind_tail == mp);
    269 			sti->sti_conn_ind_tail = NULL;
    270 		}
    271 		dprintso(so, 0,
    272 		    ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so));
    273 
    274 		/* Drop lock across put() */
    275 		mutex_exit(&so->so_lock);
    276 		put(rq, mp);
    277 		mutex_enter(&so->so_lock);
    278 	}
    279 
    280 exit:
    281 	ASSERT(MUTEX_HELD(&so->so_lock));
    282 	so_unlock_single(so, SOLOCKED);
    283 	mutex_exit(&so->so_lock);
    284 	return (error);
    285 }
    286 
    287 /*
    288  * Covert a stream back to a socket. This is invoked when the illusory
    289  * sockmod is pushed on a stream (where the stream was "created" by
    290  * popping the illusory sockmod).
    291  * This routine can not recreate the socket state (certain aspects of
    292  * it like urgent data state and the bound/connected addresses for AF_UNIX
    293  * sockets can not be recreated by asking the transport for information).
    294  * Thus this routine implicitly assumes that the socket is in an initial
    295  * state (as if it was just created). It flushes any messages queued on the
    296  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
    297  */
    298 void
    299 so_stream2sock(struct sonode *so)
    300 {
    301 	struct vnode *vp = SOTOV(so);
    302 	sotpi_info_t *sti = SOTOTPI(so);
    303 
    304 	ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));
    305 
    306 	mutex_enter(&so->so_lock);
    307 	so_lock_single(so);
    308 	ASSERT(so->so_version == SOV_STREAM);
    309 	so->so_version = SOV_SOCKSTREAM;
    310 	sti->sti_pushcnt = 0;
    311 	mutex_exit(&so->so_lock);
    312 
    313 	/*
    314 	 * Set a permenent error to force any thread in sorecvmsg to
    315 	 * return (and drop SOREADLOCKED). Clear the error once
    316 	 * we have SOREADLOCKED.
    317 	 * This makes a read sleeping during the I_PUSH of sockmod return
    318 	 * EIO.
    319 	 */
    320 	strsetrerror(SOTOV(so), EIO, 1, NULL);
    321 
    322 	/*
    323 	 * Get the read lock before flushing data to avoid
    324 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
    325 	 */
    326 	mutex_enter(&so->so_lock);
    327 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
    328 	mutex_exit(&so->so_lock);
    329 
    330 	strsetrerror(SOTOV(so), 0, 0, NULL);
    331 	so_installhooks(so);
    332 
    333 	/*
    334 	 * Flush everything on the read queue.
    335 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
    336 	 * remain; those types of messages would confuse sockfs.
    337 	 */
    338 	strflushrq(vp, FLUSHALL);
    339 	mutex_enter(&so->so_lock);
    340 
    341 	/*
    342 	 * Flush the T_DISCON_IND on sti_discon_ind_mp.
    343 	 */
    344 	so_flush_discon_ind(so);
    345 	so_unlock_read(so);	/* Clear SOREADLOCKED */
    346 
    347 	so_unlock_single(so, SOLOCKED);
    348 	mutex_exit(&so->so_lock);
    349 }
    350 
    351 /*
    352  * Install the hooks in the stream head.
    353  */
    354 void
    355 so_installhooks(struct sonode *so)
    356 {
    357 	struct vnode *vp = SOTOV(so);
    358 
    359 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
    360 	    strsock_proto, strsock_misc);
    361 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
    362 }
    363 
    364 /*
    365  * Remove the hooks in the stream head.
    366  */
    367 static void
    368 so_removehooks(struct sonode *so)
    369 {
    370 	struct vnode *vp = SOTOV(so);
    371 
    372 	strsetrputhooks(vp, 0, NULL, NULL);
    373 	strsetwputhooks(vp, 0, STRTIMOUT);
    374 	/*
    375 	 * Leave read behavior as it would have been for a normal
    376 	 * stream i.e. a read of an M_PROTO will fail.
    377 	 */
    378 }
    379 
    380 void
    381 so_basic_strinit(struct sonode *so)
    382 {
    383 	struct vnode *vp = SOTOV(so);
    384 	struct stdata *stp;
    385 	mblk_t *mp;
    386 	sotpi_info_t *sti = SOTOTPI(so);
    387 
    388 	/* Preallocate an unbind_req message */
    389 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, CRED());
    390 	mutex_enter(&so->so_lock);
    391 	sti->sti_unbind_mp = mp;
    392 #ifdef DEBUG
    393 	so->so_options = so_default_options;
    394 #endif /* DEBUG */
    395 	mutex_exit(&so->so_lock);
    396 
    397 	so_installhooks(so);
    398 
    399 	stp = vp->v_stream;
    400 	/*
    401 	 * Have to keep minpsz at zero in order to allow write/send of zero
    402 	 * bytes.
    403 	 */
    404 	mutex_enter(&stp->sd_lock);
    405 	if (stp->sd_qn_minpsz == 1)
    406 		stp->sd_qn_minpsz = 0;
    407 	mutex_exit(&stp->sd_lock);
    408 }
    409 
    410 /*
    411  * Initialize the streams side of a socket including
    412  * T_info_req/ack processing. If tso is not NULL its values are used thereby
    413  * avoiding the T_INFO_REQ.
    414  */
    415 int
    416 so_strinit(struct sonode *so, struct sonode *tso)
    417 {
    418 	sotpi_info_t *sti = SOTOTPI(so);
    419 	sotpi_info_t *tsti;
    420 	int error;
    421 
    422 	so_basic_strinit(so);
    423 
    424 	/*
    425 	 * The T_CAPABILITY_REQ should be the first message sent down because
    426 	 * at least TCP has a fast-path for this which avoids timeouts while
    427 	 * waiting for the T_CAPABILITY_ACK under high system load.
    428 	 */
    429 	if (tso == NULL) {
    430 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
    431 		if (error)
    432 			return (error);
    433 	} else {
    434 		tsti = SOTOTPI(tso);
    435 
    436 		mutex_enter(&so->so_lock);
    437 		sti->sti_tsdu_size = tsti->sti_tsdu_size;
    438 		sti->sti_etsdu_size = tsti->sti_etsdu_size;
    439 		sti->sti_addr_size = tsti->sti_addr_size;
    440 		sti->sti_opt_size = tsti->sti_opt_size;
    441 		sti->sti_tidu_size = tsti->sti_tidu_size;
    442 		sti->sti_serv_type = tsti->sti_serv_type;
    443 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
    444 		mutex_exit(&so->so_lock);
    445 
    446 		/* the following do_tcapability may update so->so_mode */
    447 		if ((tsti->sti_serv_type != T_CLTS) &&
    448 		    (sti->sti_direct == 0)) {
    449 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
    450 			if (error)
    451 				return (error);
    452 		}
    453 	}
    454 	/*
    455 	 * If the addr_size is 0 we treat it as already bound
    456 	 * and connected. This is used by the routing socket.
    457 	 * We set the addr_size to something to allocate a the address
    458 	 * structures.
    459 	 */
    460 	if (sti->sti_addr_size == 0) {
    461 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
    462 		/* Address size can vary with address families. */
    463 		if (so->so_family == AF_INET6)
    464 			sti->sti_addr_size =
    465 			    (t_scalar_t)sizeof (struct sockaddr_in6);
    466 		else
    467 			sti->sti_addr_size =
    468 			    (t_scalar_t)sizeof (struct sockaddr_in);
    469 		ASSERT(sti->sti_unbind_mp);
    470 	}
    471 
    472 	so_alloc_addr(so, sti->sti_addr_size);
    473 
    474 	return (0);
    475 }
    476 
    477 static void
    478 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
    479 {
    480 	sotpi_info_t *sti = SOTOTPI(so);
    481 
    482 	sti->sti_tsdu_size = tia->TSDU_size;
    483 	sti->sti_etsdu_size = tia->ETSDU_size;
    484 	sti->sti_addr_size = tia->ADDR_size;
    485 	sti->sti_opt_size = tia->OPT_size;
    486 	sti->sti_tidu_size = tia->TIDU_size;
    487 	sti->sti_serv_type = tia->SERV_type;
    488 	switch (tia->CURRENT_state) {
    489 	case TS_UNBND:
    490 		break;
    491 	case TS_IDLE:
    492 		so->so_state |= SS_ISBOUND;
    493 		sti->sti_laddr_len = 0;
    494 		sti->sti_laddr_valid = 0;
    495 		break;
    496 	case TS_DATA_XFER:
    497 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
    498 		sti->sti_laddr_len = 0;
    499 		sti->sti_faddr_len = 0;
    500 		sti->sti_laddr_valid = 0;
    501 		sti->sti_faddr_valid = 0;
    502 		break;
    503 	}
    504 
    505 	/*
    506 	 * Heuristics for determining the socket mode flags
    507 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
    508 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
    509 	 * from the info ack.
    510 	 */
    511 	if (sti->sti_serv_type == T_CLTS) {
    512 		so->so_mode |= SM_ATOMIC | SM_ADDR;
    513 	} else {
    514 		so->so_mode |= SM_CONNREQUIRED;
    515 		if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2)
    516 			so->so_mode |= SM_EXDATA;
    517 	}
    518 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
    519 		/* Semantics are to discard tail end of messages */
    520 		so->so_mode |= SM_ATOMIC;
    521 	}
    522 	if (so->so_family == AF_UNIX) {
    523 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
    524 		if (sti->sti_addr_size == -1) {
    525 			/* MAXPATHLEN + soun_family + nul termination */
    526 			sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN +
    527 			    sizeof (short) + 1);
    528 		}
    529 		if (so->so_type == SOCK_STREAM) {
    530 			/*
    531 			 * Make it into a byte-stream transport.
    532 			 * SOCK_SEQPACKET sockets are unchanged.
    533 			 */
    534 			sti->sti_tsdu_size = 0;
    535 		}
    536 	} else if (sti->sti_addr_size == -1) {
    537 		/*
    538 		 * Logic extracted from sockmod - have to pick some max address
    539 		 * length in order to preallocate the addresses.
    540 		 */
    541 		sti->sti_addr_size = SOA_DEFSIZE;
    542 	}
    543 	if (sti->sti_tsdu_size == 0)
    544 		so->so_mode |= SM_BYTESTREAM;
    545 }
    546 
    547 static int
    548 check_tinfo(struct sonode *so)
    549 {
    550 	sotpi_info_t *sti = SOTOTPI(so);
    551 
    552 	/* Consistency checks */
    553 	if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) {
    554 		eprintso(so, ("service type and socket type mismatch\n"));
    555 		eprintsoline(so, EPROTO);
    556 		return (EPROTO);
    557 	}
    558 	if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) {
    559 		eprintso(so, ("service type and socket type mismatch\n"));
    560 		eprintsoline(so, EPROTO);
    561 		return (EPROTO);
    562 	}
    563 	if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) {
    564 		eprintso(so, ("service type and socket type mismatch\n"));
    565 		eprintsoline(so, EPROTO);
    566 		return (EPROTO);
    567 	}
    568 	if (so->so_family == AF_INET &&
    569 	    sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
    570 		eprintso(so,
    571 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
    572 		    sti->sti_addr_size));
    573 		eprintsoline(so, EMSGSIZE);
    574 		return (EMSGSIZE);
    575 	}
    576 	if (so->so_family == AF_INET6 &&
    577 	    sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
    578 		eprintso(so,
    579 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
    580 		    sti->sti_addr_size));
    581 		eprintsoline(so, EMSGSIZE);
    582 		return (EMSGSIZE);
    583 	}
    584 
    585 	dprintso(so, 1, (
    586 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
    587 	    sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size,
    588 	    sti->sti_addr_size, sti->sti_opt_size,
    589 	    sti->sti_tidu_size));
    590 	dprintso(so, 1, ("tinfo: so_state %s\n",
    591 	    pr_state(so->so_state, so->so_mode)));
    592 	return (0);
    593 }
    594 
    595 /*
    596  * Send down T_info_req and wait for the ack.
    597  * Record interesting T_info_ack values in the sonode.
    598  */
    599 static int
    600 do_tinfo(struct sonode *so)
    601 {
    602 	struct T_info_req tir;
    603 	mblk_t *mp;
    604 	int error;
    605 
    606 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
    607 
    608 	if (so_no_tinfo) {
    609 		SOTOTPI(so)->sti_addr_size = 0;
    610 		return (0);
    611 	}
    612 
    613 	dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));
    614 
    615 	/* Send T_INFO_REQ */
    616 	tir.PRIM_type = T_INFO_REQ;
    617 	mp = soallocproto1(&tir, sizeof (tir),
    618 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
    619 	    _ALLOC_INTR, CRED());
    620 	if (mp == NULL) {
    621 		eprintsoline(so, ENOBUFS);
    622 		return (ENOBUFS);
    623 	}
    624 	/* T_INFO_REQ has to be M_PCPROTO */
    625 	DB_TYPE(mp) = M_PCPROTO;
    626 
    627 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
    628 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
    629 	if (error) {
    630 		eprintsoline(so, error);
    631 		return (error);
    632 	}
    633 	mutex_enter(&so->so_lock);
    634 	/* Wait for T_INFO_ACK */
    635 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
    636 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
    637 		mutex_exit(&so->so_lock);
    638 		eprintsoline(so, error);
    639 		return (error);
    640 	}
    641 
    642 	ASSERT(mp);
    643 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
    644 	mutex_exit(&so->so_lock);
    645 	freemsg(mp);
    646 	return (check_tinfo(so));
    647 }
    648 
    649 /*
    650  * Send down T_capability_req and wait for the ack.
    651  * Record interesting T_capability_ack values in the sonode.
    652  */
    653 static int
    654 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
    655 {
    656 	struct T_capability_req tcr;
    657 	struct T_capability_ack *tca;
    658 	mblk_t *mp;
    659 	int error;
    660 	sotpi_info_t *sti = SOTOTPI(so);
    661 
    662 	ASSERT(cap_bits1 != 0);
    663 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
    664 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
    665 
    666 	if (sti->sti_provinfo->tpi_capability == PI_NO)
    667 		return (do_tinfo(so));
    668 
    669 	if (so_no_tinfo) {
    670 		sti->sti_addr_size = 0;
    671 		if ((cap_bits1 &= ~TC1_INFO) == 0)
    672 			return (0);
    673 	}
    674 
    675 	dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));
    676 
    677 	/* Send T_CAPABILITY_REQ */
    678 	tcr.PRIM_type = T_CAPABILITY_REQ;
    679 	tcr.CAP_bits1 = cap_bits1;
    680 	mp = soallocproto1(&tcr, sizeof (tcr),
    681 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
    682 	    _ALLOC_INTR, CRED());
    683 	if (mp == NULL) {
    684 		eprintsoline(so, ENOBUFS);
    685 		return (ENOBUFS);
    686 	}
    687 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
    688 	DB_TYPE(mp) = M_PCPROTO;
    689 
    690 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
    691 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
    692 	if (error) {
    693 		eprintsoline(so, error);
    694 		return (error);
    695 	}
    696 	mutex_enter(&so->so_lock);
    697 	/* Wait for T_CAPABILITY_ACK */
    698 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
    699 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
    700 		mutex_exit(&so->so_lock);
    701 		PI_PROVLOCK(sti->sti_provinfo);
    702 		if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW)
    703 			sti->sti_provinfo->tpi_capability = PI_NO;
    704 		PI_PROVUNLOCK(sti->sti_provinfo);
    705 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
    706 		if (cap_bits1 & TC1_INFO) {
    707 			/*
    708 			 * If the T_CAPABILITY_REQ timed out and then a
    709 			 * T_INFO_REQ gets a protocol error, most likely
    710 			 * the capability was slow (vs. unsupported). Return
    711 			 * ENOSR for this case as a best guess.
    712 			 */
    713 			if (error == ETIME) {
    714 				return ((error = do_tinfo(so)) == EPROTO ?
    715 				    ENOSR : error);
    716 			}
    717 			return (do_tinfo(so));
    718 		}
    719 		return (0);
    720 	}
    721 
    722 	ASSERT(mp);
    723 	tca = (struct T_capability_ack *)mp->b_rptr;
    724 
    725 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
    726 	so_proc_tcapability_ack(so, tca);
    727 
    728 	cap_bits1 = tca->CAP_bits1;
    729 
    730 	mutex_exit(&so->so_lock);
    731 	freemsg(mp);
    732 
    733 	if (cap_bits1 & TC1_INFO)
    734 		return (check_tinfo(so));
    735 
    736 	return (0);
    737 }
    738 
    739 /*
    740  * Process a T_CAPABILITY_ACK
    741  */
    742 void
    743 so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca)
    744 {
    745 	sotpi_info_t *sti = SOTOTPI(so);
    746 
    747 	if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) {
    748 		PI_PROVLOCK(sti->sti_provinfo);
    749 		sti->sti_provinfo->tpi_capability = PI_YES;
    750 		PI_PROVUNLOCK(sti->sti_provinfo);
    751 	}
    752 
    753 	if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) {
    754 		sti->sti_acceptor_id = tca->ACCEPTOR_id;
    755 		so->so_mode |= SM_ACCEPTOR_ID;
    756 	}
    757 
    758 	if (tca->CAP_bits1 & TC1_INFO)
    759 		copy_tinfo(so, &tca->INFO_ack);
    760 }
    761 
    762 /*
    763  * Retrieve socket error, clear error if not peek.
    764  */
    765 int
    766 sogeterr(struct sonode *so, boolean_t clear_err)
    767 {
    768 	int error;
    769 
    770 	ASSERT(MUTEX_HELD(&so->so_lock));
    771 
    772 	error = so->so_error;
    773 	if (clear_err)
    774 		so->so_error = 0;
    775 
    776 	return (error);
    777 }
    778 
    779 /*
    780  * This routine is registered with the stream head to retrieve read
    781  * side errors.
    782  * It does not clear the socket error for a peeking read side operation.
    783  * It the error is to be cleared it sets *clearerr.
    784  */
    785 int
    786 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
    787 {
    788 	struct sonode *so = VTOSO(vp);
    789 	int error;
    790 
    791 	mutex_enter(&so->so_lock);
    792 	if (ispeek) {
    793 		error = so->so_error;
    794 		*clearerr = 0;
    795 	} else {
    796 		error = so->so_error;
    797 		so->so_error = 0;
    798 		*clearerr = 1;
    799 	}
    800 	mutex_exit(&so->so_lock);
    801 	return (error);
    802 }
    803 
    804 /*
    805  * This routine is registered with the stream head to retrieve write
    806  * side errors.
    807  * It does not clear the socket error for a peeking read side operation.
    808  * It the error is to be cleared it sets *clearerr.
    809  */
    810 int
    811 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
    812 {
    813 	struct sonode *so = VTOSO(vp);
    814 	int error;
    815 
    816 	mutex_enter(&so->so_lock);
    817 	if (so->so_state & SS_CANTSENDMORE) {
    818 		error = EPIPE;
    819 		*clearerr = 0;
    820 	} else {
    821 		error = so->so_error;
    822 		if (ispeek) {
    823 			*clearerr = 0;
    824 		} else {
    825 			so->so_error = 0;
    826 			*clearerr = 1;
    827 		}
    828 	}
    829 	mutex_exit(&so->so_lock);
    830 	return (error);
    831 }
    832 
    833 /*
    834  * Set a nonpersistent read and write error on the socket.
    835  * Used when there is a T_uderror_ind for a connected socket.
    836  * The caller also needs to call strsetrerror and strsetwerror
    837  * after dropping the lock.
    838  */
    839 void
    840 soseterror(struct sonode *so, int error)
    841 {
    842 	ASSERT(error != 0);
    843 
    844 	ASSERT(MUTEX_HELD(&so->so_lock));
    845 	so->so_error = (ushort_t)error;
    846 }
    847 
    848 void
    849 soisconnecting(struct sonode *so)
    850 {
    851 	ASSERT(MUTEX_HELD(&so->so_lock));
    852 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
    853 	so->so_state |= SS_ISCONNECTING;
    854 	cv_broadcast(&so->so_state_cv);
    855 }
    856 
    857 void
    858 soisconnected(struct sonode *so)
    859 {
    860 	ASSERT(MUTEX_HELD(&so->so_lock));
    861 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
    862 	so->so_state |= SS_ISCONNECTED;
    863 	cv_broadcast(&so->so_state_cv);
    864 }
    865 
    866 /*
    867  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
    868  */
    869 void
    870 soisdisconnected(struct sonode *so, int error)
    871 {
    872 	ASSERT(MUTEX_HELD(&so->so_lock));
    873 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
    874 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
    875 	so->so_error = (ushort_t)error;
    876 	if (so->so_peercred != NULL) {
    877 		crfree(so->so_peercred);
    878 		so->so_peercred = NULL;
    879 	}
    880 	cv_broadcast(&so->so_state_cv);
    881 }
    882 
    883 /*
    884  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
    885  * Does not affect write side.
    886  * The caller also has to call strsetrerror.
    887  */
    888 static void
    889 sobreakconn(struct sonode *so, int error)
    890 {
    891 	ASSERT(MUTEX_HELD(&so->so_lock));
    892 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
    893 	so->so_error = (ushort_t)error;
    894 	cv_broadcast(&so->so_state_cv);
    895 }
    896 
    897 /*
    898  * Can no longer send.
    899  * Caller must also call strsetwerror.
    900  *
    901  * We mark the peer address as no longer valid for getpeername, but
    902  * leave it around for so_unix_close to notify the peer (that
    903  * transport has no addressing held at that layer).
    904  */
    905 void
    906 socantsendmore(struct sonode *so)
    907 {
    908 	ASSERT(MUTEX_HELD(&so->so_lock));
    909 	so->so_state |= SS_CANTSENDMORE;
    910 	cv_broadcast(&so->so_state_cv);
    911 }
    912 
    913 /*
    914  * The caller must call strseteof(,1) as well as this routine
    915  * to change the socket state.
    916  */
    917 void
    918 socantrcvmore(struct sonode *so)
    919 {
    920 	ASSERT(MUTEX_HELD(&so->so_lock));
    921 	so->so_state |= SS_CANTRCVMORE;
    922 	cv_broadcast(&so->so_state_cv);
    923 }
    924 
    925 /*
    926  * The caller has sent down a "request_prim" primitive and wants to wait for
    927  * an ack ("ack_prim") or an T_ERROR_ACK for it.
    928  * The specified "ack_prim" can be a T_OK_ACK.
    929  *
    930  * Assumes that all the TPI acks are M_PCPROTO messages.
    931  *
    932  * Note that the socket is single-threaded (using so_lock_single)
    933  * for all operations that generate TPI ack messages. Since
    934  * only TPI ack messages are M_PCPROTO we should never receive
    935  * anything except either the ack we are expecting or a T_ERROR_ACK
    936  * for the same primitive.
    937  */
    938 int
    939 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
    940 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
    941 {
    942 	mblk_t *mp;
    943 	union T_primitives *tpr;
    944 	int error;
    945 
    946 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
    947 	    (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));
    948 
    949 	ASSERT(MUTEX_HELD(&so->so_lock));
    950 
    951 	error = sowaitack(so, &mp, wait);
    952 	if (error)
    953 		return (error);
    954 
    955 	dprintso(so, 1, ("got msg %p\n", (void *)mp));
    956 	if (DB_TYPE(mp) != M_PCPROTO ||
    957 	    MBLKL(mp) < sizeof (tpr->type)) {
    958 		freemsg(mp);
    959 		eprintsoline(so, EPROTO);
    960 		return (EPROTO);
    961 	}
    962 	tpr = (union T_primitives *)mp->b_rptr;
    963 	/*
    964 	 * Did we get the primitive that we were asking for?
    965 	 * For T_OK_ACK we also check that it matches the request primitive.
    966 	 */
    967 	if (tpr->type == ack_prim &&
    968 	    (ack_prim != T_OK_ACK ||
    969 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
    970 		if (MBLKL(mp) >= (ssize_t)min_size) {
    971 			/* Found what we are looking for */
    972 			*mpp = mp;
    973 			return (0);
    974 		}
    975 		/* Too short */
    976 		freemsg(mp);
    977 		eprintsoline(so, EPROTO);
    978 		return (EPROTO);
    979 	}
    980 
    981 	if (tpr->type == T_ERROR_ACK &&
    982 	    tpr->error_ack.ERROR_prim == request_prim) {
    983 		/* Error to the primitive we were looking for */
    984 		if (tpr->error_ack.TLI_error == TSYSERR) {
    985 			error = tpr->error_ack.UNIX_error;
    986 		} else {
    987 			error = proto_tlitosyserr(tpr->error_ack.TLI_error);
    988 		}
    989 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
    990 		    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
    991 		    tpr->error_ack.UNIX_error, error));
    992 		freemsg(mp);
    993 		return (error);
    994 	}
    995 	/*
    996 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
    997 	 */
    998 #ifdef DEBUG
    999 	if (tpr->type == T_ERROR_ACK) {
   1000 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
   1001 		    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
   1002 		    tpr->error_ack.UNIX_error));
   1003 	} else if (tpr->type == T_OK_ACK) {
   1004 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
   1005 		    tpr->ok_ack.CORRECT_prim, ack_prim, request_prim));
   1006 	} else {
   1007 		dprintso(so, 0,
   1008 		    ("unexpected primitive %d, expected %d for %d\n",
   1009 		    tpr->type, ack_prim, request_prim));
   1010 	}
   1011 #endif /* DEBUG */
   1012 
   1013 	freemsg(mp);
   1014 	eprintsoline(so, EPROTO);
   1015 	return (EPROTO);
   1016 }
   1017 
   1018 /*
   1019  * Wait for a T_OK_ACK for the specified primitive.
   1020  */
   1021 int
   1022 sowaitokack(struct sonode *so, t_scalar_t request_prim)
   1023 {
   1024 	mblk_t *mp;
   1025 	int error;
   1026 
   1027 	error = sowaitprim(so, request_prim, T_OK_ACK,
   1028 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
   1029 	if (error)
   1030 		return (error);
   1031 	freemsg(mp);
   1032 	return (0);
   1033 }
   1034 
   1035 /*
   1036  * Queue a received TPI ack message on sti_ack_mp.
   1037  */
   1038 void
   1039 soqueueack(struct sonode *so, mblk_t *mp)
   1040 {
   1041 	sotpi_info_t *sti = SOTOTPI(so);
   1042 
   1043 	if (DB_TYPE(mp) != M_PCPROTO) {
   1044 		zcmn_err(getzoneid(), CE_WARN,
   1045 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
   1046 		    *(t_scalar_t *)mp->b_rptr);
   1047 		freemsg(mp);
   1048 		return;
   1049 	}
   1050 
   1051 	mutex_enter(&so->so_lock);
   1052 	if (sti->sti_ack_mp != NULL) {
   1053 		dprintso(so, 1, ("sti_ack_mp already set\n"));
   1054 		freemsg(sti->sti_ack_mp);
   1055 		sti->sti_ack_mp = NULL;
   1056 	}
   1057 	sti->sti_ack_mp = mp;
   1058 	cv_broadcast(&sti->sti_ack_cv);
   1059 	mutex_exit(&so->so_lock);
   1060 }
   1061 
   1062 /*
   1063  * Wait for a TPI ack ignoring signals and errors.
   1064  */
   1065 int
   1066 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
   1067 {
   1068 	sotpi_info_t *sti = SOTOTPI(so);
   1069 
   1070 	ASSERT(MUTEX_HELD(&so->so_lock));
   1071 
   1072 	while (sti->sti_ack_mp == NULL) {
   1073 #ifdef SOCK_TEST
   1074 		if (wait == 0 && sock_test_timelimit != 0)
   1075 			wait = sock_test_timelimit;
   1076 #endif
   1077 		if (wait != 0) {
   1078 			/*
   1079 			 * Only wait for the time limit.
   1080 			 */
   1081 			if (cv_reltimedwait(&sti->sti_ack_cv, &so->so_lock,
   1082 			    wait, TR_CLOCK_TICK) == -1) {
   1083 				eprintsoline(so, ETIME);
   1084 				return (ETIME);
   1085 			}
   1086 		}
   1087 		else
   1088 			cv_wait(&sti->sti_ack_cv, &so->so_lock);
   1089 	}
   1090 	*mpp = sti->sti_ack_mp;
   1091 #ifdef DEBUG
   1092 	{
   1093 		union T_primitives *tpr;
   1094 		mblk_t *mp = *mpp;
   1095 
   1096 		tpr = (union T_primitives *)mp->b_rptr;
   1097 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
   1098 		ASSERT(tpr->type == T_OK_ACK ||
   1099 		    tpr->type == T_ERROR_ACK ||
   1100 		    tpr->type == T_BIND_ACK ||
   1101 		    tpr->type == T_CAPABILITY_ACK ||
   1102 		    tpr->type == T_INFO_ACK ||
   1103 		    tpr->type == T_OPTMGMT_ACK);
   1104 	}
   1105 #endif /* DEBUG */
   1106 	sti->sti_ack_mp = NULL;
   1107 	return (0);
   1108 }
   1109 
   1110 /*
   1111  * Queue a received T_CONN_IND message on sti_conn_ind_head/tail.
   1112  */
   1113 void
   1114 soqueueconnind(struct sonode *so, mblk_t *mp)
   1115 {
   1116 	sotpi_info_t *sti = SOTOTPI(so);
   1117 
   1118 	if (DB_TYPE(mp) != M_PROTO) {
   1119 		zcmn_err(getzoneid(), CE_WARN,
   1120 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
   1121 		freemsg(mp);
   1122 		return;
   1123 	}
   1124 
   1125 	mutex_enter(&so->so_lock);
   1126 	ASSERT(mp->b_next == NULL);
   1127 	if (sti->sti_conn_ind_head == NULL) {
   1128 		sti->sti_conn_ind_head = mp;
   1129 	} else {
   1130 		ASSERT(sti->sti_conn_ind_tail->b_next == NULL);
   1131 		sti->sti_conn_ind_tail->b_next = mp;
   1132 	}
   1133 	sti->sti_conn_ind_tail = mp;
   1134 	/* Wakeup a single consumer of the T_CONN_IND */
   1135 	cv_signal(&so->so_acceptq_cv);
   1136 	mutex_exit(&so->so_lock);
   1137 }
   1138 
   1139 /*
   1140  * Wait for a T_CONN_IND.
   1141  * Don't wait if nonblocking.
   1142  * Accept signals and socket errors.
   1143  */
   1144 int
   1145 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
   1146 {
   1147 	mblk_t *mp;
   1148 	sotpi_info_t *sti = SOTOTPI(so);
   1149 	int error = 0;
   1150 
   1151 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
   1152 	mutex_enter(&so->so_lock);
   1153 check_error:
   1154 	if (so->so_error) {
   1155 		error = sogeterr(so, B_TRUE);
   1156 		if (error) {
   1157 			mutex_exit(&so->so_lock);
   1158 			return (error);
   1159 		}
   1160 	}
   1161 
   1162 	if (sti->sti_conn_ind_head == NULL) {
   1163 		if (fmode & (FNDELAY|FNONBLOCK)) {
   1164 			error = EWOULDBLOCK;
   1165 			goto done;
   1166 		}
   1167 
   1168 		if (so->so_state & SS_CLOSING) {
   1169 			error = EINTR;
   1170 			goto done;
   1171 		}
   1172 
   1173 		if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) {
   1174 			error = EINTR;
   1175 			goto done;
   1176 		}
   1177 		goto check_error;
   1178 	}
   1179 	mp = sti->sti_conn_ind_head;
   1180 	sti->sti_conn_ind_head = mp->b_next;
   1181 	mp->b_next = NULL;
   1182 	if (sti->sti_conn_ind_head == NULL) {
   1183 		ASSERT(sti->sti_conn_ind_tail == mp);
   1184 		sti->sti_conn_ind_tail = NULL;
   1185 	}
   1186 	*mpp = mp;
   1187 done:
   1188 	mutex_exit(&so->so_lock);
   1189 	return (error);
   1190 }
   1191 
   1192 /*
   1193  * Flush a T_CONN_IND matching the sequence number from the list.
   1194  * Return zero if found; non-zero otherwise.
   1195  * This is called very infrequently thus it is ok to do a linear search.
   1196  */
   1197 int
   1198 soflushconnind(struct sonode *so, t_scalar_t seqno)
   1199 {
   1200 	mblk_t *prevmp, *mp;
   1201 	struct T_conn_ind *tci;
   1202 	sotpi_info_t *sti = SOTOTPI(so);
   1203 
   1204 	mutex_enter(&so->so_lock);
   1205 	for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL;
   1206 	    prevmp = mp, mp = mp->b_next) {
   1207 		tci = (struct T_conn_ind *)mp->b_rptr;
   1208 		if (tci->SEQ_number == seqno) {
   1209 			dprintso(so, 1,
   1210 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
   1211 			/* Deleting last? */
   1212 			if (sti->sti_conn_ind_tail == mp) {
   1213 				sti->sti_conn_ind_tail = prevmp;
   1214 			}
   1215 			if (prevmp == NULL) {
   1216 				/* Deleting first */
   1217 				sti->sti_conn_ind_head = mp->b_next;
   1218 			} else {
   1219 				prevmp->b_next = mp->b_next;
   1220 			}
   1221 			mp->b_next = NULL;
   1222 
   1223 			ASSERT((sti->sti_conn_ind_head == NULL &&
   1224 			    sti->sti_conn_ind_tail == NULL) ||
   1225 			    (sti->sti_conn_ind_head != NULL &&
   1226 			    sti->sti_conn_ind_tail != NULL));
   1227 
   1228 			so->so_error = ECONNABORTED;
   1229 			mutex_exit(&so->so_lock);
   1230 
   1231 			/*
   1232 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
   1233 			 * an SSL context, and needs to be released.
   1234 			 */
   1235 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
   1236 			    (mp->b_cont != NULL)) {
   1237 				kssl_ctx_t kssl_ctx;
   1238 
   1239 				ASSERT(MBLKL(mp->b_cont) ==
   1240 				    sizeof (kssl_ctx_t));
   1241 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
   1242 				kssl_release_ctx(kssl_ctx);
   1243 			}
   1244 			freemsg(mp);
   1245 			return (0);
   1246 		}
   1247 	}
   1248 	mutex_exit(&so->so_lock);
   1249 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
   1250 	return (-1);
   1251 }
   1252 
   1253 /*
   1254  * Wait until the socket is connected or there is an error.
   1255  * fmode should contain any nonblocking flags. nosig should be
   1256  * set if the caller does not want the wait to be interrupted by a signal.
   1257  */
   1258 int
   1259 sowaitconnected(struct sonode *so, int fmode, int nosig)
   1260 {
   1261 	int error;
   1262 
   1263 	ASSERT(MUTEX_HELD(&so->so_lock));
   1264 
   1265 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
   1266 	    SS_ISCONNECTING && so->so_error == 0) {
   1267 
   1268 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
   1269 		    (void *)so));
   1270 		if (fmode & (FNDELAY|FNONBLOCK))
   1271 			return (EINPROGRESS);
   1272 
   1273 		if (so->so_state & SS_CLOSING)
   1274 			return (EINTR);
   1275 
   1276 		if (nosig)
   1277 			cv_wait(&so->so_state_cv, &so->so_lock);
   1278 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
   1279 			/*
   1280 			 * Return EINTR and let the application use
   1281 			 * nonblocking techniques for detecting when
   1282 			 * the connection has been established.
   1283 			 */
   1284 			return (EINTR);
   1285 		}
   1286 		dprintso(so, 1, ("awoken on %p\n", (void *)so));
   1287 	}
   1288 
   1289 	if (so->so_error != 0) {
   1290 		error = sogeterr(so, B_TRUE);
   1291 		ASSERT(error != 0);
   1292 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
   1293 		return (error);
   1294 	}
   1295 	if (!(so->so_state & SS_ISCONNECTED)) {
   1296 		/*
   1297 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
   1298 		 * zero errno. Or another thread could have consumed so_error
   1299 		 * e.g. by calling read.
   1300 		 */
   1301 		error = ECONNREFUSED;
   1302 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
   1303 		return (error);
   1304 	}
   1305 	return (0);
   1306 }
   1307 
   1308 
   1309 /*
   1310  * Handle the signal generation aspect of urgent data.
   1311  */
   1312 static void
   1313 so_oob_sig(struct sonode *so, int extrasig,
   1314     strsigset_t *signals, strpollset_t *pollwakeups)
   1315 {
   1316 	sotpi_info_t *sti = SOTOTPI(so);
   1317 
   1318 	ASSERT(MUTEX_HELD(&so->so_lock));
   1319 
   1320 	ASSERT(so_verify_oobstate(so));
   1321 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
   1322 	if (sti->sti_oobsigcnt > sti->sti_oobcnt) {
   1323 		/*
   1324 		 * Signal has already been generated once for this
   1325 		 * urgent "event". However, since TCP can receive updated
   1326 		 * urgent pointers we still generate a signal.
   1327 		 */
   1328 		ASSERT(so->so_state & SS_OOBPEND);
   1329 		if (extrasig) {
   1330 			*signals |= S_RDBAND;
   1331 			*pollwakeups |= POLLRDBAND;
   1332 		}
   1333 		return;
   1334 	}
   1335 
   1336 	sti->sti_oobsigcnt++;
   1337 	ASSERT(sti->sti_oobsigcnt > 0);	/* Wraparound */
   1338 	ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
   1339 
   1340 	/*
   1341 	 * Record (for select/poll) that urgent data is pending.
   1342 	 */
   1343 	so->so_state |= SS_OOBPEND;
   1344 	/*
   1345 	 * New urgent data on the way so forget about any old
   1346 	 * urgent data.
   1347 	 */
   1348 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
   1349 	if (so->so_oobmsg != NULL) {
   1350 		dprintso(so, 1, ("sock: discarding old oob\n"));
   1351 		freemsg(so->so_oobmsg);
   1352 		so->so_oobmsg = NULL;
   1353 	}
   1354 	*signals |= S_RDBAND;
   1355 	*pollwakeups |= POLLRDBAND;
   1356 	ASSERT(so_verify_oobstate(so));
   1357 }
   1358 
   1359 /*
   1360  * Handle the processing of the T_EXDATA_IND with urgent data.
   1361  * Returns the T_EXDATA_IND if it should be queued on the read queue.
   1362  */
   1363 /* ARGSUSED2 */
   1364 static mblk_t *
   1365 so_oob_exdata(struct sonode *so, mblk_t *mp,
   1366 	strsigset_t *signals, strpollset_t *pollwakeups)
   1367 {
   1368 	sotpi_info_t *sti = SOTOTPI(so);
   1369 
   1370 	ASSERT(MUTEX_HELD(&so->so_lock));
   1371 
   1372 	ASSERT(so_verify_oobstate(so));
   1373 
   1374 	ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);
   1375 
   1376 	sti->sti_oobcnt++;
   1377 	ASSERT(sti->sti_oobcnt > 0);	/* wraparound? */
   1378 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
   1379 
   1380 	/*
   1381 	 * Set MSGMARK for SIOCATMARK.
   1382 	 */
   1383 	mp->b_flag |= MSGMARK;
   1384 
   1385 	ASSERT(so_verify_oobstate(so));
   1386 	return (mp);
   1387 }
   1388 
   1389 /*
   1390  * Handle the processing of the actual urgent data.
   1391  * Returns the data mblk if it should be queued on the read queue.
   1392  */
   1393 static mblk_t *
   1394 so_oob_data(struct sonode *so, mblk_t *mp,
   1395 	strsigset_t *signals, strpollset_t *pollwakeups)
   1396 {
   1397 	sotpi_info_t *sti = SOTOTPI(so);
   1398 
   1399 	ASSERT(MUTEX_HELD(&so->so_lock));
   1400 
   1401 	ASSERT(so_verify_oobstate(so));
   1402 
   1403 	ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
   1404 	ASSERT(mp != NULL);
   1405 	/*
   1406 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
   1407 	 * Otherwise we store it in so_oobmsg.
   1408 	 */
   1409 	ASSERT(so->so_oobmsg == NULL);
   1410 	if (so->so_options & SO_OOBINLINE) {
   1411 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
   1412 		*signals |= S_INPUT | S_RDNORM;
   1413 	} else {
   1414 		*pollwakeups |= POLLRDBAND;
   1415 		so->so_state |= SS_HAVEOOBDATA;
   1416 		so->so_oobmsg = mp;
   1417 		mp = NULL;
   1418 	}
   1419 	ASSERT(so_verify_oobstate(so));
   1420 	return (mp);
   1421 }
   1422 
   1423 /*
   1424  * Caller must hold the mutex.
   1425  * For delayed processing, save the T_DISCON_IND received
   1426  * from below on sti_discon_ind_mp.
   1427  * When the message is processed the framework will call:
   1428  *      (*func)(so, mp);
   1429  */
   1430 static void
   1431 so_save_discon_ind(struct sonode *so,
   1432 	mblk_t *mp,
   1433 	void (*func)(struct sonode *so, mblk_t *))
   1434 {
   1435 	sotpi_info_t *sti = SOTOTPI(so);
   1436 
   1437 	ASSERT(MUTEX_HELD(&so->so_lock));
   1438 
   1439 	/*
   1440 	 * Discard new T_DISCON_IND if we have already received another.
   1441 	 * Currently the earlier message can either be on sti_discon_ind_mp
   1442 	 * or being processed.
   1443 	 */
   1444 	if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
   1445 		zcmn_err(getzoneid(), CE_WARN,
   1446 		    "sockfs: received unexpected additional T_DISCON_IND\n");
   1447 		freemsg(mp);
   1448 		return;
   1449 	}
   1450 	mp->b_prev = (mblk_t *)func;
   1451 	mp->b_next = NULL;
   1452 	sti->sti_discon_ind_mp = mp;
   1453 }
   1454 
   1455 /*
   1456  * Caller must hold the mutex and make sure that either SOLOCKED
   1457  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
   1458  * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp.
   1459  * Need to ensure that strsock_proto() will not end up sleeping for
   1460  * SOASYNC_UNBIND, while executing this function.
   1461  */
   1462 void
   1463 so_drain_discon_ind(struct sonode *so)
   1464 {
   1465 	mblk_t	*bp;
   1466 	void (*func)(struct sonode *so, mblk_t *);
   1467 	sotpi_info_t *sti = SOTOTPI(so);
   1468 
   1469 	ASSERT(MUTEX_HELD(&so->so_lock));
   1470 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
   1471 
   1472 	/* Process T_DISCON_IND on sti_discon_ind_mp */
   1473 	if ((bp = sti->sti_discon_ind_mp) != NULL) {
   1474 		sti->sti_discon_ind_mp = NULL;
   1475 		func = (void (*)())bp->b_prev;
   1476 		bp->b_prev = NULL;
   1477 
   1478 		/*
   1479 		 * This (*func) is supposed to generate a message downstream
   1480 		 * and we need to have a flag set until the corresponding
   1481 		 * upstream message reaches stream head.
   1482 		 * When processing T_DISCON_IND in strsock_discon_ind
   1483 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
   1484 		 * drop the flag after we get the ACK in strsock_proto.
   1485 		 */
   1486 		(void) (*func)(so, bp);
   1487 	}
   1488 }
   1489 
   1490 /*
   1491  * Caller must hold the mutex.
   1492  * Remove the T_DISCON_IND on sti_discon_ind_mp.
   1493  */
   1494 void
   1495 so_flush_discon_ind(struct sonode *so)
   1496 {
   1497 	mblk_t	*bp;
   1498 	sotpi_info_t *sti = SOTOTPI(so);
   1499 
   1500 	ASSERT(MUTEX_HELD(&so->so_lock));
   1501 
   1502 	/*
   1503 	 * Remove T_DISCON_IND mblk at sti_discon_ind_mp.
   1504 	 */
   1505 	if ((bp = sti->sti_discon_ind_mp) != NULL) {
   1506 		sti->sti_discon_ind_mp = NULL;
   1507 		bp->b_prev = NULL;
   1508 		freemsg(bp);
   1509 	}
   1510 }
   1511 
   1512 /*
   1513  * Caller must hold the mutex.
   1514  *
   1515  * This function is used to process the T_DISCON_IND message. It does
   1516  * immediate processing when called from strsock_proto and delayed
   1517  * processing of discon_ind saved on sti_discon_ind_mp when called from
   1518  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
   1519  * sti_discon_ind_mp for delayed processing, this function is registered
   1520  * as the callback function to process the message.
   1521  *
   1522  * SOASYNC_UNBIND should be held in this function, during the non-blocking
   1523  * unbind operation, and should be released only after we receive the ACK
   1524  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
   1525  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
   1526  * sent from either this function or tcp_unbind(), flushing away any TPI
   1527  * message that is being sent down and stays in a lower module's queue.
   1528  *
   1529  * This function drops so_lock and grabs it again.
   1530  */
   1531 static void
   1532 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
   1533 {
   1534 	struct vnode *vp;
   1535 	struct stdata *stp;
   1536 	union T_primitives *tpr;
   1537 	struct T_unbind_req *ubr;
   1538 	mblk_t *mp;
   1539 	int error;
   1540 	sotpi_info_t *sti = SOTOTPI(so);
   1541 
   1542 	ASSERT(MUTEX_HELD(&so->so_lock));
   1543 	ASSERT(discon_mp);
   1544 	ASSERT(discon_mp->b_rptr);
   1545 
   1546 	tpr = (union T_primitives *)discon_mp->b_rptr;
   1547 	ASSERT(tpr->type == T_DISCON_IND);
   1548 
   1549 	vp = SOTOV(so);
   1550 	stp = vp->v_stream;
   1551 	ASSERT(stp);
   1552 
   1553 	/*
   1554 	 * Not a listener
   1555 	 */
   1556 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
   1557 
   1558 	/*
   1559 	 * This assumes that the name space for DISCON_reason
   1560 	 * is the errno name space.
   1561 	 */
   1562 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
   1563 	sti->sti_laddr_valid = 0;
   1564 	sti->sti_faddr_valid = 0;
   1565 
   1566 	/*
   1567 	 * Unbind with the transport without blocking.
   1568 	 * If we've already received a T_DISCON_IND do not unbind.
   1569 	 *
   1570 	 * If there is no preallocated unbind message, we have already
   1571 	 * unbound with the transport
   1572 	 *
   1573 	 * If the socket is not bound, no need to unbind.
   1574 	 */
   1575 	mp = sti->sti_unbind_mp;
   1576 	if (mp == NULL) {
   1577 		ASSERT(!(so->so_state & SS_ISBOUND));
   1578 		mutex_exit(&so->so_lock);
   1579 	} else if (!(so->so_state & SS_ISBOUND))  {
   1580 		mutex_exit(&so->so_lock);
   1581 	} else {
   1582 		sti->sti_unbind_mp = NULL;
   1583 
   1584 		/*
   1585 		 * Is another T_DISCON_IND being processed.
   1586 		 */
   1587 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
   1588 
   1589 		/*
   1590 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
   1591 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
   1592 		 * only after we receive the ACK in strsock_proto.
   1593 		 */
   1594 		so->so_flag |= SOASYNC_UNBIND;
   1595 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
   1596 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
   1597 		sti->sti_laddr_valid = 0;
   1598 		mutex_exit(&so->so_lock);
   1599 
   1600 		/*
   1601 		 * Send down T_UNBIND_REQ ignoring flow control.
   1602 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
   1603 		 * does not run service procedures.
   1604 		 */
   1605 		ASSERT(DB_TYPE(mp) == M_PROTO);
   1606 		ubr = (struct T_unbind_req *)mp->b_rptr;
   1607 		mp->b_wptr += sizeof (*ubr);
   1608 		ubr->PRIM_type = T_UNBIND_REQ;
   1609 
   1610 		/*
   1611 		 * Flush the read and write side (except stream head read queue)
   1612 		 * and send down T_UNBIND_REQ.
   1613 		 */
   1614 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
   1615 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
   1616 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
   1617 		/* LINTED - warning: statement has no consequent: if */
   1618 		if (error) {
   1619 			eprintsoline(so, error);
   1620 		}
   1621 	}
   1622 
   1623 	if (tpr->discon_ind.DISCON_reason != 0)
   1624 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
   1625 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
   1626 	strseteof(SOTOV(so), 1);
   1627 	/*
   1628 	 * strseteof takes care of read side wakeups,
   1629 	 * pollwakeups, and signals.
   1630 	 */
   1631 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
   1632 	freemsg(discon_mp);
   1633 
   1634 
   1635 	pollwakeup(&stp->sd_pollist, POLLOUT);
   1636 	mutex_enter(&stp->sd_lock);
   1637 
   1638 	/*
   1639 	 * Wake sleeping write
   1640 	 */
   1641 	if (stp->sd_flag & WSLEEP) {
   1642 		stp->sd_flag &= ~WSLEEP;
   1643 		cv_broadcast(&stp->sd_wrq->q_wait);
   1644 	}
   1645 
   1646 	/*
   1647 	 * strsendsig can handle multiple signals with a
   1648 	 * single call.  Send SIGPOLL for S_OUTPUT event.
   1649 	 */
   1650 	if (stp->sd_sigflags & S_OUTPUT)
   1651 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
   1652 
   1653 	mutex_exit(&stp->sd_lock);
   1654 	mutex_enter(&so->so_lock);
   1655 }
   1656 
   1657 /*
   1658  * This routine is registered with the stream head to receive M_PROTO
   1659  * and M_PCPROTO messages.
   1660  *
   1661  * Returns NULL if the message was consumed.
   1662  * Returns an mblk to make that mblk be processed (and queued) by the stream
   1663  * head.
   1664  *
   1665  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
   1666  * *pollwakeups) for the stream head to take action on. Note that since
   1667  * sockets always deliver SIGIO for every new piece of data this routine
   1668  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
   1669  *
   1670  * This routine handles all data related TPI messages independent of
   1671  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
   1672  * arrive on a SOCK_STREAM.
   1673  */
   1674 static mblk_t *
   1675 strsock_proto(vnode_t *vp, mblk_t *mp,
   1676 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
   1677 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
   1678 {
   1679 	union T_primitives *tpr;
   1680 	struct sonode *so;
   1681 	sotpi_info_t *sti;
   1682 
   1683 	so = VTOSO(vp);
   1684 	sti = SOTOTPI(so);
   1685 
   1686 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));
   1687 
   1688 	/* Set default return values */
   1689 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
   1690 
   1691 	ASSERT(DB_TYPE(mp) == M_PROTO ||
   1692 	    DB_TYPE(mp) == M_PCPROTO);
   1693 
   1694 	if (MBLKL(mp) < sizeof (tpr->type)) {
   1695 		/* The message is too short to even contain the primitive */
   1696 		zcmn_err(getzoneid(), CE_WARN,
   1697 		    "sockfs: Too short TPI message received. Len = %ld\n",
   1698 		    (ptrdiff_t)(MBLKL(mp)));
   1699 		freemsg(mp);
   1700 		return (NULL);
   1701 	}
   1702 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
   1703 		/* The read pointer is not aligned correctly for TPI */
   1704 		zcmn_err(getzoneid(), CE_WARN,
   1705 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
   1706 		    (void *)mp->b_rptr);
   1707 		freemsg(mp);
   1708 		return (NULL);
   1709 	}
   1710 	tpr = (union T_primitives *)mp->b_rptr;
   1711 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
   1712 
   1713 	switch (tpr->type) {
   1714 
   1715 	case T_DATA_IND:
   1716 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
   1717 			zcmn_err(getzoneid(), CE_WARN,
   1718 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
   1719 			    (ptrdiff_t)(MBLKL(mp)));
   1720 			freemsg(mp);
   1721 			return (NULL);
   1722 		}
   1723 		/*
   1724 		 * Ignore zero-length T_DATA_IND messages. These might be
   1725 		 * generated by some transports.
   1726 		 * This is needed to prevent read (which skips the M_PROTO
   1727 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
   1728 		 * on a non-blocking socket after select/poll has indicated
   1729 		 * that data is available).
   1730 		 */
   1731 		if (msgdsize(mp->b_cont) == 0) {
   1732 			dprintso(so, 0,
   1733 			    ("strsock_proto: zero length T_DATA_IND\n"));
   1734 			freemsg(mp);
   1735 			return (NULL);
   1736 		}
   1737 		*allmsgsigs = S_INPUT | S_RDNORM;
   1738 		*pollwakeups = POLLIN | POLLRDNORM;
   1739 		*wakeups = RSLEEP;
   1740 		return (mp);
   1741 
   1742 	case T_UNITDATA_IND: {
   1743 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
   1744 		void			*addr;
   1745 		t_uscalar_t		addrlen;
   1746 
   1747 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
   1748 			zcmn_err(getzoneid(), CE_WARN,
   1749 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
   1750 			    (ptrdiff_t)(MBLKL(mp)));
   1751 			freemsg(mp);
   1752 			return (NULL);
   1753 		}
   1754 
   1755 		/* Is this is not a connected datagram socket? */
   1756 		if ((so->so_mode & SM_CONNREQUIRED) ||
   1757 		    !(so->so_state & SS_ISCONNECTED)) {
   1758 			/*
   1759 			 * Not a connected datagram socket. Look for
   1760 			 * the SO_UNIX_CLOSE option. If such an option is found
   1761 			 * discard the message (since it has no meaning
   1762 			 * unless connected).
   1763 			 */
   1764 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
   1765 			    tudi->OPT_length != 0) {
   1766 				void *opt;
   1767 				t_uscalar_t optlen = tudi->OPT_length;
   1768 
   1769 				opt = sogetoff(mp, tudi->OPT_offset,
   1770 				    optlen, __TPI_ALIGN_SIZE);
   1771 				if (opt == NULL) {
   1772 					/* The len/off falls outside mp */
   1773 					freemsg(mp);
   1774 					mutex_enter(&so->so_lock);
   1775 					soseterror(so, EPROTO);
   1776 					mutex_exit(&so->so_lock);
   1777 					zcmn_err(getzoneid(), CE_WARN,
   1778 					    "sockfs: T_unidata_ind with "
   1779 					    "invalid optlen/offset %u/%d\n",
   1780 					    optlen, tudi->OPT_offset);
   1781 					return (NULL);
   1782 				}
   1783 				if (so_getopt_unix_close(opt, optlen)) {
   1784 					freemsg(mp);
   1785 					return (NULL);
   1786 				}
   1787 			}
   1788 			*allmsgsigs = S_INPUT | S_RDNORM;
   1789 			*pollwakeups = POLLIN | POLLRDNORM;
   1790 			*wakeups = RSLEEP;
   1791 			if (audit_active)
   1792 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
   1793 				    mp, 0);
   1794 			return (mp);
   1795 		}
   1796 
   1797 		/*
   1798 		 * A connect datagram socket. For AF_INET{,6} we verify that
   1799 		 * the source address matches the "connected to" address.
   1800 		 * The semantics of AF_UNIX sockets is to not verify
   1801 		 * the source address.
   1802 		 * Note that this source address verification is transport
   1803 		 * specific. Thus the real fix would be to extent TPI
   1804 		 * to allow T_CONN_REQ messages to be send to connectionless
   1805 		 * transport providers and always let the transport provider
   1806 		 * do whatever filtering is needed.
   1807 		 *
   1808 		 * The verification/filtering semantics for transports
   1809 		 * other than AF_INET and AF_UNIX are unknown. The choice
   1810 		 * would be to either filter using bcmp or let all messages
   1811 		 * get through. This code does not filter other address
   1812 		 * families since this at least allows the application to
   1813 		 * work around any missing filtering.
   1814 		 *
   1815 		 * XXX Should we move filtering to UDP/ICMP???
   1816 		 * That would require passing e.g. a T_DISCON_REQ to UDP
   1817 		 * when the socket becomes unconnected.
   1818 		 */
   1819 		addrlen = tudi->SRC_length;
   1820 		/*
   1821 		 * The alignment restriction is really to strict but
   1822 		 * we want enough alignment to inspect the fields of
   1823 		 * a sockaddr_in.
   1824 		 */
   1825 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
   1826 		    __TPI_ALIGN_SIZE);
   1827 		if (addr == NULL) {
   1828 			freemsg(mp);
   1829 			mutex_enter(&so->so_lock);
   1830 			soseterror(so, EPROTO);
   1831 			mutex_exit(&so->so_lock);
   1832 			zcmn_err(getzoneid(), CE_WARN,
   1833 			    "sockfs: T_unidata_ind with invalid "
   1834 			    "addrlen/offset %u/%d\n",
   1835 			    addrlen, tudi->SRC_offset);
   1836 			return (NULL);
   1837 		}
   1838 
   1839 		if (so->so_family == AF_INET) {
   1840 			/*
   1841 			 * For AF_INET we allow wildcarding both sin_addr
   1842 			 * and sin_port.
   1843 			 */
   1844 			struct sockaddr_in *faddr, *sin;
   1845 
   1846 			/* Prevent sti_faddr_sa from changing while accessed */
   1847 			mutex_enter(&so->so_lock);
   1848 			ASSERT(sti->sti_faddr_len ==
   1849 			    (socklen_t)sizeof (struct sockaddr_in));
   1850 			faddr = (struct sockaddr_in *)sti->sti_faddr_sa;
   1851 			sin = (struct sockaddr_in *)addr;
   1852 			if (addrlen !=
   1853 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
   1854 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
   1855 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
   1856 			    (so->so_type != SOCK_RAW &&
   1857 			    sin->sin_port != faddr->sin_port &&
   1858 			    faddr->sin_port != 0)) {
   1859 #ifdef DEBUG
   1860 				dprintso(so, 0,
   1861 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
   1862 				    pr_addr(so->so_family,
   1863 				    (struct sockaddr *)addr, addrlen)));
   1864 				dprintso(so, 0, (" - %s\n",
   1865 				    pr_addr(so->so_family, sti->sti_faddr_sa,
   1866 				    (t_uscalar_t)sti->sti_faddr_len)));
   1867 #endif /* DEBUG */
   1868 				mutex_exit(&so->so_lock);
   1869 				freemsg(mp);
   1870 				return (NULL);
   1871 			}
   1872 			mutex_exit(&so->so_lock);
   1873 		} else if (so->so_family == AF_INET6) {
   1874 			/*
   1875 			 * For AF_INET6 we allow wildcarding both sin6_addr
   1876 			 * and sin6_port.
   1877 			 */
   1878 			struct sockaddr_in6 *faddr6, *sin6;
   1879 			static struct in6_addr zeroes; /* inits to all zeros */
   1880 
   1881 			/* Prevent sti_faddr_sa from changing while accessed */
   1882 			mutex_enter(&so->so_lock);
   1883 			ASSERT(sti->sti_faddr_len ==
   1884 			    (socklen_t)sizeof (struct sockaddr_in6));
   1885 			faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
   1886 			sin6 = (struct sockaddr_in6 *)addr;
   1887 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
   1888 			if (addrlen !=
   1889 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
   1890 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
   1891 			    &faddr6->sin6_addr) &&
   1892 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
   1893 			    (so->so_type != SOCK_RAW &&
   1894 			    sin6->sin6_port != faddr6->sin6_port &&
   1895 			    faddr6->sin6_port != 0)) {
   1896 #ifdef DEBUG
   1897 				dprintso(so, 0,
   1898 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
   1899 				    pr_addr(so->so_family,
   1900 				    (struct sockaddr *)addr, addrlen)));
   1901 				dprintso(so, 0, (" - %s\n",
   1902 				    pr_addr(so->so_family, sti->sti_faddr_sa,
   1903 				    (t_uscalar_t)sti->sti_faddr_len)));
   1904 #endif /* DEBUG */
   1905 				mutex_exit(&so->so_lock);
   1906 				freemsg(mp);
   1907 				return (NULL);
   1908 			}
   1909 			mutex_exit(&so->so_lock);
   1910 		} else if (so->so_family == AF_UNIX &&
   1911 		    msgdsize(mp->b_cont) == 0 &&
   1912 		    tudi->OPT_length != 0) {
   1913 			/*
   1914 			 * Attempt to extract AF_UNIX
   1915 			 * SO_UNIX_CLOSE indication from options.
   1916 			 */
   1917 			void *opt;
   1918 			t_uscalar_t optlen = tudi->OPT_length;
   1919 
   1920 			opt = sogetoff(mp, tudi->OPT_offset,
   1921 			    optlen, __TPI_ALIGN_SIZE);
   1922 			if (opt == NULL) {
   1923 				/* The len/off falls outside mp */
   1924 				freemsg(mp);
   1925 				mutex_enter(&so->so_lock);
   1926 				soseterror(so, EPROTO);
   1927 				mutex_exit(&so->so_lock);
   1928 				zcmn_err(getzoneid(), CE_WARN,
   1929 				    "sockfs: T_unidata_ind with invalid "
   1930 				    "optlen/offset %u/%d\n",
   1931 				    optlen, tudi->OPT_offset);
   1932 				return (NULL);
   1933 			}
   1934 			/*
   1935 			 * If we received a unix close indication mark the
   1936 			 * socket and discard this message.
   1937 			 */
   1938 			if (so_getopt_unix_close(opt, optlen)) {
   1939 				mutex_enter(&so->so_lock);
   1940 				sobreakconn(so, ECONNRESET);
   1941 				mutex_exit(&so->so_lock);
   1942 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
   1943 				freemsg(mp);
   1944 				*pollwakeups = POLLIN | POLLRDNORM;
   1945 				*allmsgsigs = S_INPUT | S_RDNORM;
   1946 				*wakeups = RSLEEP;
   1947 				return (NULL);
   1948 			}
   1949 		}
   1950 		*allmsgsigs = S_INPUT | S_RDNORM;
   1951 		*pollwakeups = POLLIN | POLLRDNORM;
   1952 		*wakeups = RSLEEP;
   1953 		return (mp);
   1954 	}
   1955 
   1956 	case T_OPTDATA_IND: {
   1957 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
   1958 
   1959 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
   1960 			zcmn_err(getzoneid(), CE_WARN,
   1961 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
   1962 			    (ptrdiff_t)(MBLKL(mp)));
   1963 			freemsg(mp);
   1964 			return (NULL);
   1965 		}
   1966 		/*
   1967 		 * Allow zero-length messages carrying options.
   1968 		 * This is used when carrying the SO_UNIX_CLOSE option.
   1969 		 */
   1970 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
   1971 		    tdi->OPT_length != 0) {
   1972 			/*
   1973 			 * Attempt to extract AF_UNIX close indication
   1974 			 * from the options. Ignore any other options -
   1975 			 * those are handled once the message is removed
   1976 			 * from the queue.
   1977 			 * The close indication message should not carry data.
   1978 			 */
   1979 			void *opt;
   1980 			t_uscalar_t optlen = tdi->OPT_length;
   1981 
   1982 			opt = sogetoff(mp, tdi->OPT_offset,
   1983 			    optlen, __TPI_ALIGN_SIZE);
   1984 			if (opt == NULL) {
   1985 				/* The len/off falls outside mp */
   1986 				freemsg(mp);
   1987 				mutex_enter(&so->so_lock);
   1988 				soseterror(so, EPROTO);
   1989 				mutex_exit(&so->so_lock);
   1990 				zcmn_err(getzoneid(), CE_WARN,
   1991 				    "sockfs: T_optdata_ind with invalid "
   1992 				    "optlen/offset %u/%d\n",
   1993 				    optlen, tdi->OPT_offset);
   1994 				return (NULL);
   1995 			}
   1996 			/*
   1997 			 * If we received a close indication mark the
   1998 			 * socket and discard this message.
   1999 			 */
   2000 			if (so_getopt_unix_close(opt, optlen)) {
   2001 				mutex_enter(&so->so_lock);
   2002 				socantsendmore(so);
   2003 				sti->sti_faddr_valid = 0;
   2004 				mutex_exit(&so->so_lock);
   2005 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
   2006 				freemsg(mp);
   2007 				return (NULL);
   2008 			}
   2009 		}
   2010 		*allmsgsigs = S_INPUT | S_RDNORM;
   2011 		*pollwakeups = POLLIN | POLLRDNORM;
   2012 		*wakeups = RSLEEP;
   2013 		return (mp);
   2014 	}
   2015 
   2016 	case T_EXDATA_IND: {
   2017 		mblk_t		*mctl, *mdata;
   2018 		mblk_t *lbp;
   2019 		union T_primitives *tprp;
   2020 		struct stdata   *stp;
   2021 		queue_t *qp;
   2022 
   2023 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
   2024 			zcmn_err(getzoneid(), CE_WARN,
   2025 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
   2026 			    (ptrdiff_t)(MBLKL(mp)));
   2027 			freemsg(mp);
   2028 			return (NULL);
   2029 		}
   2030 		/*
   2031 		 * Ignore zero-length T_EXDATA_IND messages. These might be
   2032 		 * generated by some transports.
   2033 		 *
   2034 		 * This is needed to prevent read (which skips the M_PROTO
   2035 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
   2036 		 * on a non-blocking socket after select/poll has indicated
   2037 		 * that data is available).
   2038 		 */
   2039 		dprintso(so, 1,
   2040 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
   2041 		    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
   2042 		    pr_state(so->so_state, so->so_mode)));
   2043 
   2044 		if (msgdsize(mp->b_cont) == 0) {
   2045 			dprintso(so, 0,
   2046 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
   2047 			freemsg(mp);
   2048 			return (NULL);
   2049 		}
   2050 
   2051 		/*
   2052 		 * Split into the T_EXDATA_IND and the M_DATA part.
   2053 		 * We process these three pieces separately:
   2054 		 *	signal generation
   2055 		 *	handling T_EXDATA_IND
   2056 		 *	handling M_DATA component
   2057 		 */
   2058 		mctl = mp;
   2059 		mdata = mctl->b_cont;
   2060 		mctl->b_cont = NULL;
   2061 		mutex_enter(&so->so_lock);
   2062 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
   2063 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
   2064 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
   2065 
   2066 		stp = vp->v_stream;
   2067 		ASSERT(stp != NULL);
   2068 		qp = _RD(stp->sd_wrq);
   2069 
   2070 		mutex_enter(QLOCK(qp));
   2071 		lbp = qp->q_last;
   2072 
   2073 		/*
   2074 		 * We want to avoid queueing up a string of T_EXDATA_IND
   2075 		 * messages with no intervening data messages at the stream
   2076 		 * head. These messages contribute to the total message
   2077 		 * count. Eventually this can lead to STREAMS flow contol
   2078 		 * and also cause TCP to advertise a zero window condition
   2079 		 * to the peer. This can happen in the degenerate case where
   2080 		 * the sender and receiver exchange only OOB data. The sender
   2081 		 * only sends messages with MSG_OOB flag and the receiver
   2082 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
   2083 		 * An example of this scenario has been reported in applications
   2084 		 * that use OOB data to exchange heart beats. Flow control
   2085 		 * relief will never happen if the application only reads OOB
   2086 		 * data which is done directly by sorecvoob() and the
   2087 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
   2088 		 * Note that there is no correctness issue in compressing the
   2089 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
   2090 		 * message. A single read that does not specify MSG_OOB will
   2091 		 * read across all the marks in a loop in sotpi_recvmsg().
   2092 		 * Each mark is individually distinguishable only if the
   2093 		 * T_EXDATA_IND messages are separated by data messages.
   2094 		 */
   2095 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
   2096 			tprp = (union T_primitives *)lbp->b_rptr;
   2097 			if ((tprp->type == T_EXDATA_IND) &&
   2098 			    !(so->so_options & SO_OOBINLINE)) {
   2099 
   2100 				/*
   2101 				 * free the new M_PROTO message
   2102 				 */
   2103 				freemsg(mctl);
   2104 
   2105 				/*
   2106 				 * adjust the OOB count and OOB	signal count
   2107 				 * just incremented for the new OOB data.
   2108 				 */
   2109 				sti->sti_oobcnt--;
   2110 				sti->sti_oobsigcnt--;
   2111 				mutex_exit(QLOCK(qp));
   2112 				mutex_exit(&so->so_lock);
   2113 				return (NULL);
   2114 			}
   2115 		}
   2116 		mutex_exit(QLOCK(qp));
   2117 
   2118 		/*
   2119 		 * Pass the T_EXDATA_IND and the M_DATA back separately
   2120 		 * by using b_next linkage. (The stream head will queue any
   2121 		 * b_next linked messages separately.) This is needed
   2122 		 * since MSGMARK applies to the last by of the message
   2123 		 * hence we can not have any M_DATA component attached
   2124 		 * to the marked T_EXDATA_IND. Note that the stream head
   2125 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
   2126 		 * message in order to preserve the constraint that
   2127 		 * the T_EXDATA_IND always is a separate message.
   2128 		 */
   2129 		ASSERT(mctl != NULL);
   2130 		mctl->b_next = mdata;
   2131 		mp = mctl;
   2132 #ifdef DEBUG
   2133 		if (mdata == NULL) {
   2134 			dprintso(so, 1,
   2135 			    ("after outofline T_EXDATA_IND(%p): "
   2136 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
   2137 			    (void *)vp, sti->sti_oobsigcnt,
   2138 			    sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
   2139 			    pr_state(so->so_state, so->so_mode)));
   2140 		} else {
   2141 			dprintso(so, 1,
   2142 			    ("after inline T_EXDATA_IND(%p): "
   2143 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
   2144 			    (void *)vp, sti->sti_oobsigcnt,
   2145 			    sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
   2146 			    pr_state(so->so_state, so->so_mode)));
   2147 		}
   2148 #endif /* DEBUG */
   2149 		mutex_exit(&so->so_lock);
   2150 		*wakeups = RSLEEP;
   2151 		return (mp);
   2152 	}
   2153 
   2154 	case T_CONN_CON: {
   2155 		struct T_conn_con	*conn_con;
   2156 		void			*addr;
   2157 		t_uscalar_t		addrlen;
   2158 
   2159 		/*
   2160 		 * Verify the state, update the state to ISCONNECTED,
   2161 		 * record the potentially new address in the message,
   2162 		 * and drop the message.
   2163 		 */
   2164 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
   2165 			zcmn_err(getzoneid(), CE_WARN,
   2166 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
   2167 			    (ptrdiff_t)(MBLKL(mp)));
   2168 			freemsg(mp);
   2169 			return (NULL);
   2170 		}
   2171 
   2172 		mutex_enter(&so->so_lock);
   2173 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
   2174 		    SS_ISCONNECTING) {
   2175 			mutex_exit(&so->so_lock);
   2176 			dprintso(so, 1,
   2177 			    ("T_CONN_CON: state %x\n", so->so_state));
   2178 			freemsg(mp);
   2179 			return (NULL);
   2180 		}
   2181 
   2182 		conn_con = &tpr->conn_con;
   2183 		addrlen = conn_con->RES_length;
   2184 		/*
   2185 		 * Allow the address to be of different size than sent down
   2186 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
   2187 		 * For AF_UNIX require the identical length.
   2188 		 */
   2189 		if (so->so_family == AF_UNIX ?
   2190 		    addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) :
   2191 		    addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) {
   2192 			zcmn_err(getzoneid(), CE_WARN,
   2193 			    "sockfs: T_conn_con with different "
   2194 			    "length %u/%d\n",
   2195 			    addrlen, conn_con->RES_length);
   2196 			soisdisconnected(so, EPROTO);
   2197 			sti->sti_laddr_valid = 0;
   2198 			sti->sti_faddr_valid = 0;
   2199 			mutex_exit(&so->so_lock);
   2200 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
   2201 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
   2202 			strseteof(SOTOV(so), 1);
   2203 			freemsg(mp);
   2204 			/*
   2205 			 * strseteof takes care of read side wakeups,
   2206 			 * pollwakeups, and signals.
   2207 			 */
   2208 			*wakeups = WSLEEP;
   2209 			*allmsgsigs = S_OUTPUT;
   2210 			*pollwakeups = POLLOUT;
   2211 			return (NULL);
   2212 		}
   2213 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
   2214 		if (addr == NULL) {
   2215 			zcmn_err(getzoneid(), CE_WARN,
   2216 			    "sockfs: T_conn_con with invalid "
   2217 			    "addrlen/offset %u/%d\n",
   2218 			    addrlen, conn_con->RES_offset);
   2219 			mutex_exit(&so->so_lock);
   2220 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
   2221 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
   2222 			strseteof(SOTOV(so), 1);
   2223 			freemsg(mp);
   2224 			/*
   2225 			 * strseteof takes care of read side wakeups,
   2226 			 * pollwakeups, and signals.
   2227 			 */
   2228 			*wakeups = WSLEEP;
   2229 			*allmsgsigs = S_OUTPUT;
   2230 			*pollwakeups = POLLOUT;
   2231 			return (NULL);
   2232 		}
   2233 
   2234 		/*
   2235 		 * Save for getpeername.
   2236 		 */
   2237 		if (so->so_family != AF_UNIX) {
   2238 			sti->sti_faddr_len = (socklen_t)addrlen;
   2239 			ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
   2240 			bcopy(addr, sti->sti_faddr_sa, addrlen);
   2241 			sti->sti_faddr_valid = 1;
   2242 		}
   2243 
   2244 		if (so->so_peercred != NULL)
   2245 			crfree(so->so_peercred);
   2246 		so->so_peercred = msg_getcred(mp, &so->so_cpid);
   2247 		if (so->so_peercred != NULL)
   2248 			crhold(so->so_peercred);
   2249 
   2250 		/* Wakeup anybody sleeping in sowaitconnected */
   2251 		soisconnected(so);
   2252 		mutex_exit(&so->so_lock);
   2253 
   2254 		/*
   2255 		 * The socket is now available for sending data.
   2256 		 */
   2257 		*wakeups = WSLEEP;
   2258 		*allmsgsigs = S_OUTPUT;
   2259 		*pollwakeups = POLLOUT;
   2260 		freemsg(mp);
   2261 		return (NULL);
   2262 	}
   2263 
   2264 	/*
   2265 	 * Extra processing in case of an SSL proxy, before queuing or
   2266 	 * forwarding to the fallback endpoint
   2267 	 */
   2268 	case T_SSL_PROXY_CONN_IND:
   2269 	case T_CONN_IND:
   2270 		/*
   2271 		 * Verify the min size and queue the message on
   2272 		 * the sti_conn_ind_head/tail list.
   2273 		 */
   2274 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
   2275 			zcmn_err(getzoneid(), CE_WARN,
   2276 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
   2277 			    (ptrdiff_t)(MBLKL(mp)));
   2278 			freemsg(mp);
   2279 			return (NULL);
   2280 		}
   2281 
   2282 		if (audit_active)
   2283 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
   2284 		if (!(so->so_state & SS_ACCEPTCONN)) {
   2285 			zcmn_err(getzoneid(), CE_WARN,
   2286 			    "sockfs: T_conn_ind on non-listening socket\n");
   2287 			freemsg(mp);
   2288 			return (NULL);
   2289 		}
   2290 
   2291 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
   2292 			/* No context: need to fall back */
   2293 			struct sonode *fbso;
   2294 			stdata_t *fbstp;
   2295 
   2296 			tpr->type = T_CONN_IND;
   2297 
   2298 			fbso = kssl_find_fallback(sti->sti_kssl_ent);
   2299 
   2300 			/*
   2301 			 * No fallback: the remote will timeout and
   2302 			 * disconnect.
   2303 			 */
   2304 			if (fbso == NULL) {
   2305 				freemsg(mp);
   2306 				return (NULL);
   2307 			}
   2308 			fbstp = SOTOV(fbso)->v_stream;
   2309 			qreply(fbstp->sd_wrq->q_next, mp);
   2310 			return (NULL);
   2311 		}
   2312 		soqueueconnind(so, mp);
   2313 		*allmsgsigs = S_INPUT | S_RDNORM;
   2314 		*pollwakeups = POLLIN | POLLRDNORM;
   2315 		*wakeups = RSLEEP;
   2316 		return (NULL);
   2317 
   2318 	case T_ORDREL_IND:
   2319 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
   2320 			zcmn_err(getzoneid(), CE_WARN,
   2321 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
   2322 			    (ptrdiff_t)(MBLKL(mp)));
   2323 			freemsg(mp);
   2324 			return (NULL);
   2325 		}
   2326 
   2327 		/*
   2328 		 * Some providers send this when not fully connected.
   2329 		 * SunLink X.25 needs to retrieve disconnect reason after
   2330 		 * disconnect for compatibility. It uses T_ORDREL_IND
   2331 		 * instead of T_DISCON_IND so that it may use the
   2332 		 * endpoint after a connect failure to retrieve the
   2333 		 * reason using an ioctl. Thus we explicitly clear
   2334 		 * SS_ISCONNECTING here for SunLink X.25.
   2335 		 * This is a needed TPI violation.
   2336 		 */
   2337 		mutex_enter(&so->so_lock);
   2338 		so->so_state &= ~SS_ISCONNECTING;
   2339 		socantrcvmore(so);
   2340 		mutex_exit(&so->so_lock);
   2341 		strseteof(SOTOV(so), 1);
   2342 		/*
   2343 		 * strseteof takes care of read side wakeups,
   2344 		 * pollwakeups, and signals.
   2345 		 */
   2346 		freemsg(mp);
   2347 		return (NULL);
   2348 
   2349 	case T_DISCON_IND:
   2350 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
   2351 			zcmn_err(getzoneid(), CE_WARN,
   2352 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
   2353 			    (ptrdiff_t)(MBLKL(mp)));
   2354 			freemsg(mp);
   2355 			return (NULL);
   2356 		}
   2357 		if (so->so_state & SS_ACCEPTCONN) {
   2358 			/*
   2359 			 * This is a listener. Look for a queued T_CONN_IND
   2360 			 * with a matching sequence number and remove it
   2361 			 * from the list.
   2362 			 * It is normal to not find the sequence number since
   2363 			 * the soaccept might have already dequeued it
   2364 			 * (in which case the T_CONN_RES will fail with
   2365 			 * TBADSEQ).
   2366 			 */
   2367 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
   2368 			freemsg(mp);
   2369 			return (0);
   2370 		}
   2371 
   2372 		/*
   2373 		 * Not a listener
   2374 		 *
   2375 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
   2376 		 * Such a discon_ind appears when the peer has first done
   2377 		 * a shutdown() followed by a close() in which case we just
   2378 		 * want to record socantsendmore.
   2379 		 * In this case sockfs first receives a T_ORDREL_IND followed
   2380 		 * by a T_DISCON_IND.
   2381 		 * Note that for other transports (e.g. TCP) we need to handle
   2382 		 * the discon_ind in this case since it signals an error.
   2383 		 */
   2384 		mutex_enter(&so->so_lock);
   2385 		if ((so->so_state & SS_CANTRCVMORE) &&
   2386 		    (so->so_family == AF_UNIX)) {
   2387 			socantsendmore(so);
   2388 			sti->sti_faddr_valid = 0;
   2389 			mutex_exit(&so->so_lock);
   2390 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
   2391 			dprintso(so, 1,
   2392 			    ("T_DISCON_IND: error %d\n", so->so_error));
   2393 			freemsg(mp);
   2394 			/*
   2395 			 * Set these variables for caller to process them.
   2396 			 * For the else part where T_DISCON_IND is processed,
   2397 			 * this will be done in the function being called
   2398 			 * (strsock_discon_ind())
   2399 			 */
   2400 			*wakeups = WSLEEP;
   2401 			*allmsgsigs = S_OUTPUT;
   2402 			*pollwakeups = POLLOUT;
   2403 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
   2404 			/*
   2405 			 * Deferred processing of T_DISCON_IND
   2406 			 */
   2407 			so_save_discon_ind(so, mp, strsock_discon_ind);
   2408 			mutex_exit(&so->so_lock);
   2409 		} else {
   2410 			/*
   2411 			 * Process T_DISCON_IND now
   2412 			 */
   2413 			(void) strsock_discon_ind(so, mp);
   2414 			mutex_exit(&so->so_lock);
   2415 		}
   2416 		return (NULL);
   2417 
   2418 	case T_UDERROR_IND: {
   2419 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
   2420 		void			*addr;
   2421 		t_uscalar_t		addrlen;
   2422 		int			error;
   2423 
   2424 		dprintso(so, 0,
   2425 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
   2426 
   2427 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
   2428 			zcmn_err(getzoneid(), CE_WARN,
   2429 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
   2430 			    (ptrdiff_t)(MBLKL(mp)));
   2431 			freemsg(mp);
   2432 			return (NULL);
   2433 		}
   2434 		/* Ignore on connection-oriented transports */
   2435 		if (so->so_mode & SM_CONNREQUIRED) {
   2436 			freemsg(mp);
   2437 			eprintsoline(so, 0);
   2438 			zcmn_err(getzoneid(), CE_WARN,
   2439 			    "sockfs: T_uderror_ind on connection-oriented "
   2440 			    "transport\n");
   2441 			return (NULL);
   2442 		}
   2443 		addrlen = tudi->DEST_length;
   2444 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
   2445 		if (addr == NULL) {
   2446 			zcmn_err(getzoneid(), CE_WARN,
   2447 			    "sockfs: T_uderror_ind with invalid "
   2448 			    "addrlen/offset %u/%d\n",
   2449 			    addrlen, tudi->DEST_offset);
   2450 			freemsg(mp);
   2451 			return (NULL);
   2452 		}
   2453 
   2454 		/* Verify source address for connected socket. */
   2455 		mutex_enter(&so->so_lock);
   2456 		if (so->so_state & SS_ISCONNECTED) {
   2457 			void *faddr;
   2458 			t_uscalar_t faddr_len;
   2459 			boolean_t match = B_FALSE;
   2460 
   2461 			switch (so->so_family) {
   2462 			case AF_INET: {
   2463 				/* Compare just IP address and port */
   2464 				struct sockaddr_in *sin1, *sin2;
   2465 
   2466 				sin1 = (struct sockaddr_in *)sti->sti_faddr_sa;
   2467 				sin2 = (struct sockaddr_in *)addr;
   2468 				if (addrlen == sizeof (struct sockaddr_in) &&
   2469 				    sin1->sin_port == sin2->sin_port &&
   2470 				    sin1->sin_addr.s_addr ==
   2471 				    sin2->sin_addr.s_addr)
   2472 					match = B_TRUE;
   2473 				break;
   2474 			}
   2475 			case AF_INET6: {
   2476 				/* Compare just IP address and port. Not flow */
   2477 				struct sockaddr_in6 *sin1, *sin2;
   2478 
   2479 				sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
   2480 				sin2 = (struct sockaddr_in6 *)addr;
   2481 				if (addrlen == sizeof (struct sockaddr_in6) &&
   2482 				    sin1->sin6_port == sin2->sin6_port &&
   2483 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
   2484 				    &sin2->sin6_addr))
   2485 					match = B_TRUE;
   2486 				break;
   2487 			}
   2488 			case AF_UNIX:
   2489 				faddr = &sti->sti_ux_faddr;
   2490 				faddr_len =
   2491 				    (t_uscalar_t)sizeof (sti->sti_ux_faddr);
   2492 				if (faddr_len == addrlen &&
   2493 				    bcmp(addr, faddr, addrlen) == 0)
   2494 					match = B_TRUE;
   2495 				break;
   2496 			default:
   2497 				faddr = sti->sti_faddr_sa;
   2498 				faddr_len = (t_uscalar_t)sti->sti_faddr_len;
   2499 				if (faddr_len == addrlen &&
   2500 				    bcmp(addr, faddr, addrlen) == 0)
   2501 					match = B_TRUE;
   2502 				break;
   2503 			}
   2504 
   2505 			if (!match) {
   2506 #ifdef DEBUG
   2507 				dprintso(so, 0,
   2508 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
   2509 				    pr_addr(so->so_family,
   2510 				    (struct sockaddr *)addr, addrlen)));
   2511 				dprintso(so, 0, ("%s\n",
   2512 				    pr_addr(so->so_family, sti->sti_faddr_sa,
   2513 				    sti->sti_faddr_len)));
   2514 #endif /* DEBUG */
   2515 				mutex_exit(&so->so_lock);
   2516 				freemsg(mp);
   2517 				return (NULL);
   2518 			}
   2519 			/*
   2520 			 * Make the write error nonpersistent. If the error
   2521 			 * is zero we use ECONNRESET.
   2522 			 * This assumes that the name space for ERROR_type
   2523 			 * is the errno name space.
   2524 			 */
   2525 			if (tudi->ERROR_type != 0)
   2526 				error = tudi->ERROR_type;
   2527 			else
   2528 				error = ECONNRESET;
   2529 
   2530 			soseterror(so, error);
   2531 			mutex_exit(&so->so_lock);
   2532 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
   2533 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
   2534 			*wakeups = RSLEEP | WSLEEP;
   2535 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
   2536 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
   2537 			freemsg(mp);
   2538 			return (NULL);
   2539 		}
   2540 		/*
   2541 		 * If the application asked for delayed errors
   2542 		 * record the T_UDERROR_IND sti_eaddr_mp and the reason in
   2543 		 * sti_delayed_error for delayed error posting. If the reason
   2544 		 * is zero use ECONNRESET.
   2545 		 * Note that delayed error indications do not make sense for
   2546 		 * AF_UNIX sockets since sendto checks that the destination
   2547 		 * address is valid at the time of the sendto.
   2548 		 */
   2549 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
   2550 			mutex_exit(&so->so_lock);
   2551 			freemsg(mp);
   2552 			return (NULL);
   2553 		}
   2554 		if (sti->sti_eaddr_mp != NULL)
   2555 			freemsg(sti->sti_eaddr_mp);
   2556 
   2557 		sti->sti_eaddr_mp = mp;
   2558 		if (tudi->ERROR_type != 0)
   2559 			error = tudi->ERROR_type;
   2560 		else
   2561 			error = ECONNRESET;
   2562 		sti->sti_delayed_error = (ushort_t)error;
   2563 		mutex_exit(&so->so_lock);
   2564 		return (NULL);
   2565 	}
   2566 
   2567 	case T_ERROR_ACK:
   2568 		dprintso(so, 0,
   2569 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
   2570 		    tpr->error_ack.ERROR_prim,
   2571 		    tpr->error_ack.TLI_error,
   2572 		    tpr->error_ack.UNIX_error));
   2573 
   2574 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
   2575 			zcmn_err(getzoneid(), CE_WARN,
   2576 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
   2577 			    (ptrdiff_t)(MBLKL(mp)));
   2578 			freemsg(mp);
   2579 			return (NULL);
   2580 		}
   2581 		/*
   2582 		 * Check if we were waiting for the async message
   2583 		 */
   2584 		mutex_enter(&so->so_lock);
   2585 		if ((so->so_flag & SOASYNC_UNBIND) &&
   2586 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
   2587 			so_unlock_single(so, SOASYNC_UNBIND);
   2588 			mutex_exit(&so->so_lock);
   2589 			freemsg(mp);
   2590 			return (NULL);
   2591 		}
   2592 		mutex_exit(&so->so_lock);
   2593 		soqueueack(so, mp);
   2594 		return (NULL);
   2595 
   2596 	case T_OK_ACK:
   2597 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
   2598 			zcmn_err(getzoneid(), CE_WARN,
   2599 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
   2600 			    (ptrdiff_t)(MBLKL(mp)));
   2601 			freemsg(mp);
   2602 			return (NULL);
   2603 		}
   2604 		/*
   2605 		 * Check if we were waiting for the async message
   2606 		 */
   2607 		mutex_enter(&so->so_lock);
   2608 		if ((so->so_flag & SOASYNC_UNBIND) &&
   2609 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
   2610 			dprintso(so, 1,
   2611 			    ("strsock_proto: T_OK_ACK async unbind\n"));
   2612 			so_unlock_single(so, SOASYNC_UNBIND);
   2613 			mutex_exit(&so->so_lock);
   2614 			freemsg(mp);
   2615 			return (NULL);
   2616 		}
   2617 		mutex_exit(&so->so_lock);
   2618 		soqueueack(so, mp);
   2619 		return (NULL);
   2620 
   2621 	case T_INFO_ACK:
   2622 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
   2623 			zcmn_err(getzoneid(), CE_WARN,
   2624 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
   2625 			    (ptrdiff_t)(MBLKL(mp)));
   2626 			freemsg(mp);
   2627 			return (NULL);
   2628 		}
   2629 		soqueueack(so, mp);
   2630 		return (NULL);
   2631 
   2632 	case T_CAPABILITY_ACK:
   2633 		/*
   2634 		 * A T_capability_ack need only be large enough to hold
   2635 		 * the PRIM_type and CAP_bits1 fields; checking for anything
   2636 		 * larger might reject a correct response from an older
   2637 		 * provider.
   2638 		 */
   2639 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
   2640 			zcmn_err(getzoneid(), CE_WARN,
   2641 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
   2642 			    (ptrdiff_t)(MBLKL(mp)));
   2643 			freemsg(mp);
   2644 			return (NULL);
   2645 		}
   2646 		soqueueack(so, mp);
   2647 		return (NULL);
   2648 
   2649 	case T_BIND_ACK:
   2650 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
   2651 			zcmn_err(getzoneid(), CE_WARN,
   2652 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
   2653 			    (ptrdiff_t)(MBLKL(mp)));
   2654 			freemsg(mp);
   2655 			return (NULL);
   2656 		}
   2657 		soqueueack(so, mp);
   2658 		return (NULL);
   2659 
   2660 	case T_OPTMGMT_ACK:
   2661 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
   2662 			zcmn_err(getzoneid(), CE_WARN,
   2663 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
   2664 			    (ptrdiff_t)(MBLKL(mp)));
   2665 			freemsg(mp);
   2666 			return (NULL);
   2667 		}
   2668 		soqueueack(so, mp);
   2669 		return (NULL);
   2670 	default:
   2671 #ifdef DEBUG
   2672 		zcmn_err(getzoneid(), CE_WARN,
   2673 		    "sockfs: unknown TPI primitive %d received\n",
   2674 		    tpr->type);
   2675 #endif /* DEBUG */
   2676 		freemsg(mp);
   2677 		return (NULL);
   2678 	}
   2679 }
   2680 
   2681 /*
   2682  * This routine is registered with the stream head to receive other
   2683  * (non-data, and non-proto) messages.
   2684  *
   2685  * Returns NULL if the message was consumed.
   2686  * Returns an mblk to make that mblk be processed by the stream head.
   2687  *
   2688  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
   2689  * *pollwakeups) for the stream head to take action on.
   2690  */
   2691 static mblk_t *
   2692 strsock_misc(vnode_t *vp, mblk_t *mp,
   2693 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
   2694 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
   2695 {
   2696 	struct sonode *so;
   2697 	sotpi_info_t *sti;
   2698 
   2699 	so = VTOSO(vp);
   2700 	sti = SOTOTPI(so);
   2701 
   2702 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
   2703 	    (void *)vp, (void *)mp, DB_TYPE(mp)));
   2704 
   2705 	/* Set default return values */
   2706 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
   2707 
   2708 	switch (DB_TYPE(mp)) {
   2709 	case M_PCSIG:
   2710 		/*
   2711 		 * This assumes that an M_PCSIG for the urgent data arrives
   2712 		 * before the corresponding T_EXDATA_IND.
   2713 		 *
   2714 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
   2715 		 * awoken before the urgent data shows up.
   2716 		 * For OOBINLINE this can result in select returning
   2717 		 * only exceptions as opposed to except|read.
   2718 		 */
   2719 		if (*mp->b_rptr == SIGURG) {
   2720 			mutex_enter(&so->so_lock);
   2721 			dprintso(so, 1,
   2722 			    ("SIGURG(%p): counts %d/%d state %s\n",
   2723 			    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
   2724 			    pr_state(so->so_state, so->so_mode)));
   2725 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
   2726 			dprintso(so, 1,
   2727 			    ("after SIGURG(%p): counts %d/%d "
   2728 			    " poll 0x%x sig 0x%x state %s\n",
   2729 			    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
   2730 			    *pollwakeups, *allmsgsigs,
   2731 			    pr_state(so->so_state, so->so_mode)));
   2732 			mutex_exit(&so->so_lock);
   2733 		}
   2734 		freemsg(mp);
   2735 		return (NULL);
   2736 
   2737 	case M_SIG:
   2738 	case M_HANGUP:
   2739 	case M_UNHANGUP:
   2740 	case M_ERROR:
   2741 		/* M_ERRORs etc are ignored */
   2742 		freemsg(mp);
   2743 		return (NULL);
   2744 
   2745 	case M_FLUSH:
   2746 		/*
   2747 		 * Do not flush read queue. If the M_FLUSH
   2748 		 * arrives because of an impending T_discon_ind
   2749 		 * we still have to keep any queued data - this is part of
   2750 		 * socket semantics.
   2751 		 */
   2752 		if (*mp->b_rptr & FLUSHW) {
   2753 			*mp->b_rptr &= ~FLUSHR;
   2754 			return (mp);
   2755 		}
   2756 		freemsg(mp);
   2757 		return (NULL);
   2758 
   2759 	default:
   2760 		return (mp);
   2761 	}
   2762 }
   2763 
   2764 
   2765 /* Register to receive signals for certain events */
   2766 int
   2767 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
   2768 {
   2769 	struct strsigset ss;
   2770 	int32_t rval;
   2771 
   2772 	/*
   2773 	 * Note that SOLOCKED will be set except for the call from soaccept().
   2774 	 */
   2775 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
   2776 	ss.ss_pid = pgrp;
   2777 	ss.ss_events = events;
   2778 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
   2779 	    &rval));
   2780 }
   2781 
   2782 
   2783 /* Register for events matching the SS_ASYNC flag */
   2784 int
   2785 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
   2786 {
   2787 	int events = so->so_state & SS_ASYNC ?
   2788 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
   2789 	    S_RDBAND | S_BANDURG;
   2790 
   2791 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
   2792 }
   2793 
   2794 
   2795 /* Change the SS_ASYNC flag, and update signal delivery if needed */
   2796 int
   2797 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
   2798 {
   2799 	ASSERT(mutex_owned(&so->so_lock));
   2800 	if (so->so_pgrp != 0) {
   2801 		int error;
   2802 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
   2803 		    S_RDBAND | S_BANDURG :			/* New sigs */
   2804 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
   2805 
   2806 		so_lock_single(so);
   2807 		mutex_exit(&so->so_lock);
   2808 
   2809 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
   2810 
   2811 		mutex_enter(&so->so_lock);
   2812 		so_unlock_single(so, SOLOCKED);
   2813 		if (error)
   2814 			return (error);
   2815 	}
   2816 	so->so_state ^= SS_ASYNC;
   2817 	return (0);
   2818 }
   2819 
   2820 /*
   2821  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
   2822  * any existing one.  If passed zero, just clear the existing one.
   2823  */
   2824 int
   2825 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
   2826 {
   2827 	int events = so->so_state & SS_ASYNC ?
   2828 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
   2829 	    S_RDBAND | S_BANDURG;
   2830 	int error;
   2831 
   2832 	ASSERT(mutex_owned(&so->so_lock));
   2833 
   2834 	/*
   2835 	 * Change socket process (group).
   2836 	 *
   2837 	 * strioctl (via so_set_asyncsigs) will perform permission check and
   2838 	 * also keep a PID_HOLD to prevent the pid from being reused.
   2839 	 */
   2840 	so_lock_single(so);
   2841 	mutex_exit(&so->so_lock);
   2842 
   2843 	if (pgrp != 0) {
   2844 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
   2845 		    pgrp, events));
   2846 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
   2847 		if (error != 0) {
   2848 			eprintsoline(so, error);
   2849 			goto bad;
   2850 		}
   2851 	}
   2852 	/* Remove the previously registered process/group */
   2853 	if (so->so_pgrp != 0) {
   2854 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
   2855 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
   2856 		if (error != 0) {
   2857 			eprintsoline(so, error);
   2858 			error = 0;
   2859 		}
   2860 	}
   2861 	mutex_enter(&so->so_lock);
   2862 	so_unlock_single(so, SOLOCKED);
   2863 	so->so_pgrp = pgrp;
   2864 	return (0);
   2865 bad:
   2866 	mutex_enter(&so->so_lock);
   2867 	so_unlock_single(so, SOLOCKED);
   2868 	return (error);
   2869 }
   2870 
   2871 /*
   2872  * Wrapper for getmsg. If the socket has been converted to a stream
   2873  * pass the request to the stream head.
   2874  */
   2875 int
   2876 sock_getmsg(
   2877 	struct vnode *vp,
   2878 	struct strbuf *mctl,
   2879 	struct strbuf *mdata,
   2880 	uchar_t *prip,
   2881 	int *flagsp,
   2882 	int fmode,
   2883 	rval_t *rvp
   2884 )
   2885 {
   2886 	struct sonode *so;
   2887 
   2888 	ASSERT(vp->v_type == VSOCK);
   2889 	/*
   2890 	 * Use the stream head to find the real socket vnode.
   2891 	 * This is needed when namefs sits above sockfs.  Some
   2892 	 * sockets (like SCTP) are not streams.
   2893 	 */
   2894 	if (!vp->v_stream) {
   2895 		return (ENOSTR);
   2896 	}
   2897 	ASSERT(vp->v_stream->sd_vnode);
   2898 	vp = vp->v_stream->sd_vnode;
   2899 	ASSERT(vn_matchops(vp, socket_vnodeops));
   2900 	so = VTOSO(vp);
   2901 
   2902 	dprintso(so, 1, ("sock_getmsg(%p) %s\n",
   2903 	    (void *)so, pr_state(so->so_state, so->so_mode)));
   2904 
   2905 	if (so->so_version == SOV_STREAM) {
   2906 		/* The imaginary "sockmod" has been popped - act as a stream */
   2907 		return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
   2908 	}
   2909 	eprintsoline(so, ENOSTR);
   2910 	return (ENOSTR);
   2911 }
   2912 
   2913 /*
   2914  * Wrapper for putmsg. If the socket has been converted to a stream
   2915  * pass the request to the stream head.
   2916  *
   2917  * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
   2918  * streams ioctl set it does not support putmsg and getmsg.
   2919  * Allowing putmsg would prevent sockfs from tracking the state of
   2920  * the socket/transport and would also invalidate the locking in sockfs.
   2921  */
   2922 int
   2923 sock_putmsg(
   2924 	struct vnode *vp,
   2925 	struct strbuf *mctl,
   2926 	struct strbuf *mdata,
   2927 	uchar_t pri,
   2928 	int flag,
   2929 	int fmode
   2930 )
   2931 {
   2932 	struct sonode *so;
   2933 
   2934 	ASSERT(vp->v_type == VSOCK);
   2935 	/*
   2936 	 * Use the stream head to find the real socket vnode.
   2937 	 * This is needed when namefs sits above sockfs.
   2938 	 */
   2939 	if (!vp->v_stream) {
   2940 		return (ENOSTR);
   2941 	}
   2942 	ASSERT(vp->v_stream->sd_vnode);
   2943 	vp = vp->v_stream->sd_vnode;
   2944 	ASSERT(vn_matchops(vp, socket_vnodeops));
   2945 	so = VTOSO(vp);
   2946 
   2947 	dprintso(so, 1, ("sock_putmsg(%p) %s\n",
   2948 	    (void *)so, pr_state(so->so_state, so->so_mode)));
   2949 
   2950 	if (so->so_version == SOV_STREAM) {
   2951 		/* The imaginary "sockmod" has been popped - act as a stream */
   2952 		return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
   2953 	}
   2954 	eprintsoline(so, ENOSTR);
   2955 	return (ENOSTR);
   2956 }
   2957 
   2958 /*
   2959  * Special function called only from f_getfl().
   2960  * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
   2961  * No locks are acquired here, so it is safe to use while uf_lock is held.
   2962  * This exists solely for BSD fcntl() FASYNC compatibility.
   2963  */
   2964 int
   2965 sock_getfasync(vnode_t *vp)
   2966 {
   2967 	struct sonode *so;
   2968 
   2969 	ASSERT(vp->v_type == VSOCK);
   2970 	/*
   2971 	 * For stream model, v_stream is used; For non-stream, v_stream always
   2972 	 * equals NULL
   2973 	 */
   2974 	if (vp->v_stream != NULL)
   2975 		so = VTOSO(vp->v_stream->sd_vnode);
   2976 	else
   2977 		so = VTOSO(vp);
   2978 
   2979 	if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
   2980 		return (0);
   2981 
   2982 	return (FASYNC);
   2983 }
   2984