Home | History | Annotate | Download | only in tcp
      1    741  masputra /*
      2    741  masputra  * CDDL HEADER START
      3    741  masputra  *
      4    741  masputra  * The contents of this file are subject to the terms of the
      5   2024   krishna  * Common Development and Distribution License (the "License").
      6   2024   krishna  * You may not use this file except in compliance with the License.
      7    741  masputra  *
      8    741  masputra  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    741  masputra  * or http://www.opensolaris.org/os/licensing.
     10    741  masputra  * See the License for the specific language governing permissions
     11    741  masputra  * and limitations under the License.
     12    741  masputra  *
     13    741  masputra  * When distributing Covered Code, include this CDDL HEADER in each
     14    741  masputra  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    741  masputra  * If applicable, add the following below this CDDL HEADER, with the
     16    741  masputra  * fields enclosed by brackets "[]" replaced with your own identifying
     17    741  masputra  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    741  masputra  *
     19    741  masputra  * CDDL HEADER END
     20    741  masputra  */
     21    741  masputra /*
     22   8485     Peter  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23    741  masputra  * Use is subject to license terms.
     24    741  masputra  */
     25    741  masputra 
     26    741  masputra #include <sys/types.h>
     27    741  masputra #include <sys/stream.h>
     28    741  masputra #include <sys/strsun.h>
     29    741  masputra #include <sys/strsubr.h>
     30    741  masputra #include <sys/debug.h>
     31   2958  dr146992 #include <sys/sdt.h>
     32    741  masputra #include <sys/cmn_err.h>
     33    741  masputra #include <sys/tihdr.h>
     34    741  masputra 
     35    741  masputra #include <inet/common.h>
     36   5240  nordmark #include <inet/optcom.h>
     37    741  masputra #include <inet/ip.h>
     38   8485     Peter #include <inet/ip_if.h>
     39    741  masputra #include <inet/ip_impl.h>
     40    741  masputra #include <inet/tcp.h>
     41    741  masputra #include <inet/tcp_impl.h>
     42    741  masputra #include <inet/ipsec_impl.h>
     43    741  masputra #include <inet/ipclassifier.h>
     44    741  masputra #include <inet/ipp_common.h>
     45   7828     Brian #include <inet/ip_if.h>
     46    741  masputra 
     47    741  masputra /*
     48    741  masputra  * This file implements TCP fusion - a protocol-less data path for TCP
     49    741  masputra  * loopback connections.  The fusion of two local TCP endpoints occurs
     50    741  masputra  * at connection establishment time.  Various conditions (see details
     51    741  masputra  * in tcp_fuse()) need to be met for fusion to be successful.  If it
     52    741  masputra  * fails, we fall back to the regular TCP data path; if it succeeds,
     53    741  masputra  * both endpoints proceed to use tcp_fuse_output() as the transmit path.
     54    741  masputra  * tcp_fuse_output() enqueues application data directly onto the peer's
     55   9993    Anders  * receive queue; no protocol processing is involved.
     56    741  masputra  *
     57   3429  vi117747  * Sychronization is handled by squeue and the mutex tcp_non_sq_lock.
     58    741  masputra  * One of the requirements for fusion to succeed is that both endpoints
     59    741  masputra  * need to be using the same squeue.  This ensures that neither side
     60   9993    Anders  * can disappear while the other side is still sending data. Flow
     61   9993    Anders  * control information is manipulated outside the squeue, so the
     62   9993    Anders  * tcp_non_sq_lock must be held when touching tcp_flow_stopped.
     63    741  masputra  */
     64    741  masputra 
     65    741  masputra /*
     66    741  masputra  * Setting this to false means we disable fusion altogether and
     67    741  masputra  * loopback connections would go through the protocol paths.
     68    741  masputra  */
     69    741  masputra boolean_t do_tcp_fusion = B_TRUE;
     70    741  masputra 
     71    741  masputra /*
     72    741  masputra  * This routine gets called by the eager tcp upon changing state from
     73    741  masputra  * SYN_RCVD to ESTABLISHED.  It fuses a direct path between itself
     74    741  masputra  * and the active connect tcp such that the regular tcp processings
     75    741  masputra  * may be bypassed under allowable circumstances.  Because the fusion
     76    741  masputra  * requires both endpoints to be in the same squeue, it does not work
     77    741  masputra  * for simultaneous active connects because there is no easy way to
     78    741  masputra  * switch from one squeue to another once the connection is created.
     79    741  masputra  * This is different from the eager tcp case where we assign it the
     80    741  masputra  * same squeue as the one given to the active connect tcp during open.
     81    741  masputra  */
     82    741  masputra void
     83  11042      Erik tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcpha_t *tcpha)
     84    741  masputra {
     85  11042      Erik 	conn_t		*peer_connp, *connp = tcp->tcp_connp;
     86  11042      Erik 	tcp_t		*peer_tcp;
     87   3448  dh155122 	tcp_stack_t	*tcps = tcp->tcp_tcps;
     88   3448  dh155122 	netstack_t	*ns;
     89   3448  dh155122 	ip_stack_t	*ipst = tcps->tcps_netstack->netstack_ip;
     90    741  masputra 
     91    741  masputra 	ASSERT(!tcp->tcp_fused);
     92    741  masputra 	ASSERT(tcp->tcp_loopback);
     93    741  masputra 	ASSERT(tcp->tcp_loopback_peer == NULL);
     94    741  masputra 	/*
     95  11042      Erik 	 * We need to inherit conn_rcvbuf of the listener tcp,
     96  10312       Rao 	 * but we can't really use tcp_listener since we get here after
     97  11042      Erik 	 * sending up T_CONN_IND and tcp_tli_accept() may be called
     98  10312       Rao 	 * independently, at which point tcp_listener is cleared;
     99  10312       Rao 	 * this is why we use tcp_saved_listener. The listener itself
    100  10312       Rao 	 * is guaranteed to be around until tcp_accept_finish() is called
    101  10312       Rao 	 * on this eager -- this won't happen until we're done since we're
    102  10312       Rao 	 * inside the eager's perimeter now.
    103    741  masputra 	 */
    104  11042      Erik 	ASSERT(tcp->tcp_saved_listener != NULL);
    105    741  masputra 	/*
    106    741  masputra 	 * Lookup peer endpoint; search for the remote endpoint having
    107    741  masputra 	 * the reversed address-port quadruplet in ESTABLISHED state,
    108    741  masputra 	 * which is guaranteed to be unique in the system.  Zone check
    109    741  masputra 	 * is applied accordingly for loopback address, but not for
    110    741  masputra 	 * local address since we want fusion to happen across Zones.
    111    741  masputra 	 */
    112  11042      Erik 	if (connp->conn_ipversion == IPV4_VERSION) {
    113    741  masputra 		peer_connp = ipcl_conn_tcp_lookup_reversed_ipv4(connp,
    114  11042      Erik 		    (ipha_t *)iphdr, tcpha, ipst);
    115    741  masputra 	} else {
    116    741  masputra 		peer_connp = ipcl_conn_tcp_lookup_reversed_ipv6(connp,
    117  11042      Erik 		    (ip6_t *)iphdr, tcpha, ipst);
    118    741  masputra 	}
    119    741  masputra 
    120    741  masputra 	/*
    121    741  masputra 	 * We can only proceed if peer exists, resides in the same squeue
    122   9992    Anders 	 * as our conn and is not raw-socket. We also restrict fusion to
    123   9992    Anders 	 * endpoints of the same type (STREAMS or non-STREAMS). The squeue
    124   9992    Anders 	 * assignment of this eager tcp was done earlier at the time of SYN
    125   9992    Anders 	 * processing in ip_fanout_tcp{_v6}.  Note that similar squeues by
    126   9992    Anders 	 * itself doesn't guarantee a safe condition to fuse, hence we perform
    127    741  masputra 	 * additional tests below.
    128    741  masputra 	 */
    129    741  masputra 	ASSERT(peer_connp == NULL || peer_connp != connp);
    130    741  masputra 	if (peer_connp == NULL || peer_connp->conn_sqp != connp->conn_sqp ||
    131   9992    Anders 	    !IPCL_IS_TCP(peer_connp) ||
    132   9992    Anders 	    IPCL_IS_NONSTR(connp) != IPCL_IS_NONSTR(peer_connp)) {
    133    741  masputra 		if (peer_connp != NULL) {
    134   3448  dh155122 			TCP_STAT(tcps, tcp_fusion_unqualified);
    135    741  masputra 			CONN_DEC_REF(peer_connp);
    136    741  masputra 		}
    137    741  masputra 		return;
    138    741  masputra 	}
    139    741  masputra 	peer_tcp = peer_connp->conn_tcp;	/* active connect tcp */
    140    741  masputra 
    141    741  masputra 	ASSERT(peer_tcp != NULL && peer_tcp != tcp && !peer_tcp->tcp_fused);
    142   9532      Erik 	ASSERT(peer_tcp->tcp_loopback_peer == NULL);
    143    741  masputra 	ASSERT(peer_connp->conn_sqp == connp->conn_sqp);
    144    741  masputra 
    145   9532      Erik 	/*
    146   9532      Erik 	 * Due to IRE changes the peer and us might not agree on tcp_loopback.
    147   9532      Erik 	 * We bail in that case.
    148   9532      Erik 	 */
    149   9532      Erik 	if (!peer_tcp->tcp_loopback) {
    150   9532      Erik 		TCP_STAT(tcps, tcp_fusion_unqualified);
    151   9532      Erik 		CONN_DEC_REF(peer_connp);
    152   9532      Erik 		return;
    153   9532      Erik 	}
    154    741  masputra 	/*
    155    741  masputra 	 * Fuse the endpoints; we perform further checks against both
    156    741  masputra 	 * tcp endpoints to ensure that a fusion is allowed to happen.
    157  11042      Erik 	 * In particular we bail out if kernel SSL exists.
    158    741  masputra 	 */
    159   3448  dh155122 	ns = tcps->tcps_netstack;
    160   3448  dh155122 	ipst = ns->netstack_ip;
    161   3448  dh155122 
    162    741  masputra 	if (!tcp->tcp_unfusable && !peer_tcp->tcp_unfusable &&
    163  11042      Erik 	    (tcp->tcp_kssl_ent == NULL) && (tcp->tcp_xmit_head == NULL) &&
    164  11042      Erik 	    (peer_tcp->tcp_xmit_head == NULL)) {
    165    741  masputra 		mblk_t *mp;
    166  11042      Erik 		queue_t *peer_rq = peer_connp->conn_rq;
    167    741  masputra 
    168   8348      Eric 		ASSERT(!TCP_IS_DETACHED(peer_tcp));
    169  11042      Erik 		ASSERT(tcp->tcp_fused_sigurg_mp == NULL);
    170  11042      Erik 		ASSERT(peer_tcp->tcp_fused_sigurg_mp == NULL);
    171   2024   krishna 		ASSERT(tcp->tcp_kssl_ctx == NULL);
    172    741  masputra 
    173    741  masputra 		/*
    174    741  masputra 		 * We need to drain data on both endpoints during unfuse.
    175    741  masputra 		 * If we need to send up SIGURG at the time of draining,
    176    741  masputra 		 * we want to be sure that an mblk is readily available.
    177    741  masputra 		 * This is why we pre-allocate the M_PCSIG mblks for both
    178    741  masputra 		 * endpoints which will only be used during/after unfuse.
    179   9994    Anders 		 * The mblk might already exist if we are doing a re-fuse.
    180    741  masputra 		 */
    181   8348      Eric 		if (!IPCL_IS_NONSTR(tcp->tcp_connp)) {
    182   9992    Anders 			ASSERT(!IPCL_IS_NONSTR(peer_tcp->tcp_connp));
    183   9992    Anders 
    184   9994    Anders 			if (tcp->tcp_fused_sigurg_mp == NULL) {
    185   9994    Anders 				if ((mp = allocb(1, BPRI_HI)) == NULL)
    186   9994    Anders 					goto failed;
    187   9994    Anders 				tcp->tcp_fused_sigurg_mp = mp;
    188   9994    Anders 			}
    189   8348      Eric 
    190   9994    Anders 			if (peer_tcp->tcp_fused_sigurg_mp == NULL) {
    191   9994    Anders 				if ((mp = allocb(1, BPRI_HI)) == NULL)
    192   9994    Anders 					goto failed;
    193   9994    Anders 				peer_tcp->tcp_fused_sigurg_mp = mp;
    194   9994    Anders 			}
    195   8348      Eric 
    196   9992    Anders 			if ((mp = allocb(sizeof (struct stroptions),
    197   9992    Anders 			    BPRI_HI)) == NULL)
    198   9992    Anders 				goto failed;
    199   6707    brutus 		}
    200    741  masputra 
    201    741  masputra 		/* Fuse both endpoints */
    202    741  masputra 		peer_tcp->tcp_loopback_peer = tcp;
    203    741  masputra 		tcp->tcp_loopback_peer = peer_tcp;
    204    741  masputra 		peer_tcp->tcp_fused = tcp->tcp_fused = B_TRUE;
    205    741  masputra 
    206    741  masputra 		/*
    207    741  masputra 		 * We never use regular tcp paths in fusion and should
    208    741  masputra 		 * therefore clear tcp_unsent on both endpoints.  Having
    209    741  masputra 		 * them set to non-zero values means asking for trouble
    210    741  masputra 		 * especially after unfuse, where we may end up sending
    211    741  masputra 		 * through regular tcp paths which expect xmit_list and
    212    741  masputra 		 * friends to be correctly setup.
    213    741  masputra 		 */
    214    741  masputra 		peer_tcp->tcp_unsent = tcp->tcp_unsent = 0;
    215    741  masputra 
    216    741  masputra 		tcp_timers_stop(tcp);
    217    741  masputra 		tcp_timers_stop(peer_tcp);
    218    741  masputra 
    219  11042      Erik 		/*
    220  11042      Erik 		 * Set receive buffer and max packet size for the
    221  11042      Erik 		 * active open tcp.
    222  11042      Erik 		 * eager's values will be set in tcp_accept_finish.
    223  11042      Erik 		 */
    224  11042      Erik 		(void) tcp_rwnd_set(peer_tcp, peer_tcp->tcp_connp->conn_rcvbuf);
    225  10312       Rao 
    226  11042      Erik 		/*
    227  11042      Erik 		 * Set the write offset value to zero since we won't
    228  11042      Erik 		 * be needing any room for TCP/IP headers.
    229  11042      Erik 		 */
    230  11042      Erik 		if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) {
    231  11042      Erik 			struct stroptions *stropt;
    232    741  masputra 
    233  11042      Erik 			DB_TYPE(mp) = M_SETOPTS;
    234  11042      Erik 			mp->b_wptr += sizeof (*stropt);
    235    741  masputra 
    236  11042      Erik 			stropt = (struct stroptions *)mp->b_rptr;
    237  11042      Erik 			stropt->so_flags = SO_WROFF;
    238  11042      Erik 			stropt->so_wroff = 0;
    239    741  masputra 
    240  11042      Erik 			/* Send the options up */
    241  11042      Erik 			putnext(peer_rq, mp);
    242  11042      Erik 		} else {
    243  11042      Erik 			struct sock_proto_props sopp;
    244   8348      Eric 
    245  11042      Erik 			/* The peer is a non-STREAMS end point */
    246  11042      Erik 			ASSERT(IPCL_IS_TCP(peer_connp));
    247   8348      Eric 
    248  11042      Erik 			sopp.sopp_flags = SOCKOPT_WROFF;
    249  11042      Erik 			sopp.sopp_wroff = 0;
    250  11042      Erik 			(*peer_connp->conn_upcalls->su_set_proto_props)
    251  11042      Erik 			    (peer_connp->conn_upper_handle, &sopp);
    252   8023      Phil 		}
    253    741  masputra 	} else {
    254   3448  dh155122 		TCP_STAT(tcps, tcp_fusion_unqualified);
    255    741  masputra 	}
    256    741  masputra 	CONN_DEC_REF(peer_connp);
    257    741  masputra 	return;
    258    741  masputra 
    259    741  masputra failed:
    260    741  masputra 	if (tcp->tcp_fused_sigurg_mp != NULL) {
    261    741  masputra 		freeb(tcp->tcp_fused_sigurg_mp);
    262    741  masputra 		tcp->tcp_fused_sigurg_mp = NULL;
    263    741  masputra 	}
    264    741  masputra 	if (peer_tcp->tcp_fused_sigurg_mp != NULL) {
    265    741  masputra 		freeb(peer_tcp->tcp_fused_sigurg_mp);
    266    741  masputra 		peer_tcp->tcp_fused_sigurg_mp = NULL;
    267    741  masputra 	}
    268    741  masputra 	CONN_DEC_REF(peer_connp);
    269    741  masputra }
    270    741  masputra 
    271    741  masputra /*
    272    741  masputra  * Unfuse a previously-fused pair of tcp loopback endpoints.
    273    741  masputra  */
    274    741  masputra void
    275    741  masputra tcp_unfuse(tcp_t *tcp)
    276    741  masputra {
    277    741  masputra 	tcp_t *peer_tcp = tcp->tcp_loopback_peer;
    278   9993    Anders 	tcp_stack_t *tcps = tcp->tcp_tcps;
    279    741  masputra 
    280    741  masputra 	ASSERT(tcp->tcp_fused && peer_tcp != NULL);
    281    741  masputra 	ASSERT(peer_tcp->tcp_fused && peer_tcp->tcp_loopback_peer == tcp);
    282    741  masputra 	ASSERT(tcp->tcp_connp->conn_sqp == peer_tcp->tcp_connp->conn_sqp);
    283    741  masputra 	ASSERT(tcp->tcp_unsent == 0 && peer_tcp->tcp_unsent == 0);
    284    741  masputra 
    285    741  masputra 	/*
    286   9993    Anders 	 * Cancel any pending push timers.
    287    741  masputra 	 */
    288   9993    Anders 	if (tcp->tcp_push_tid != 0) {
    289   9993    Anders 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid);
    290   9993    Anders 		tcp->tcp_push_tid = 0;
    291   9993    Anders 	}
    292   9993    Anders 	if (peer_tcp->tcp_push_tid != 0) {
    293   9993    Anders 		(void) TCP_TIMER_CANCEL(peer_tcp, peer_tcp->tcp_push_tid);
    294   9993    Anders 		peer_tcp->tcp_push_tid = 0;
    295   9993    Anders 	}
    296   9993    Anders 
    297   9993    Anders 	/*
    298   9993    Anders 	 * Drain any pending data; Note that in case of a detached tcp, the
    299   9993    Anders 	 * draining will happen later after the tcp is unfused.  For non-
    300   9993    Anders 	 * urgent data, this can be handled by the regular tcp_rcv_drain().
    301   9993    Anders 	 * If we have urgent data sitting in the receive list, we will
    302   9993    Anders 	 * need to send up a SIGURG signal first before draining the data.
    303   9993    Anders 	 * All of these will be handled by the code in tcp_fuse_rcv_drain()
    304   9993    Anders 	 * when called from tcp_rcv_drain().
    305   9993    Anders 	 */
    306   9993    Anders 	if (!TCP_IS_DETACHED(tcp)) {
    307  11042      Erik 		(void) tcp_fuse_rcv_drain(tcp->tcp_connp->conn_rq, tcp,
    308   9993    Anders 		    &tcp->tcp_fused_sigurg_mp);
    309   9993    Anders 	}
    310   9993    Anders 	if (!TCP_IS_DETACHED(peer_tcp)) {
    311  11042      Erik 		(void) tcp_fuse_rcv_drain(peer_tcp->tcp_connp->conn_rq,
    312  11042      Erik 		    peer_tcp,  &peer_tcp->tcp_fused_sigurg_mp);
    313   9993    Anders 	}
    314   9993    Anders 
    315   9993    Anders 	/* Lift up any flow-control conditions */
    316   9993    Anders 	mutex_enter(&tcp->tcp_non_sq_lock);
    317   9993    Anders 	if (tcp->tcp_flow_stopped) {
    318   9993    Anders 		tcp_clrqfull(tcp);
    319   9993    Anders 		TCP_STAT(tcps, tcp_fusion_backenabled);
    320   9993    Anders 	}
    321   9993    Anders 	mutex_exit(&tcp->tcp_non_sq_lock);
    322   9993    Anders 
    323   9993    Anders 	mutex_enter(&peer_tcp->tcp_non_sq_lock);
    324   9993    Anders 	if (peer_tcp->tcp_flow_stopped) {
    325   9993    Anders 		tcp_clrqfull(peer_tcp);
    326   9993    Anders 		TCP_STAT(tcps, tcp_fusion_backenabled);
    327   9993    Anders 	}
    328   9993    Anders 	mutex_exit(&peer_tcp->tcp_non_sq_lock);
    329    741  masputra 
    330    741  masputra 	/*
    331  11042      Erik 	 * Update tha_seq and tha_ack in the header template
    332    741  masputra 	 */
    333  11042      Erik 	tcp->tcp_tcpha->tha_seq = htonl(tcp->tcp_snxt);
    334  11042      Erik 	tcp->tcp_tcpha->tha_ack = htonl(tcp->tcp_rnxt);
    335  11042      Erik 	peer_tcp->tcp_tcpha->tha_seq = htonl(peer_tcp->tcp_snxt);
    336  11042      Erik 	peer_tcp->tcp_tcpha->tha_ack = htonl(peer_tcp->tcp_rnxt);
    337    741  masputra 
    338    741  masputra 	/* Unfuse the endpoints */
    339    741  masputra 	peer_tcp->tcp_fused = tcp->tcp_fused = B_FALSE;
    340    741  masputra 	peer_tcp->tcp_loopback_peer = tcp->tcp_loopback_peer = NULL;
    341    741  masputra }
    342    741  masputra 
    343    741  masputra /*
    344   9992    Anders  * Fusion output routine used to handle urgent data sent by STREAMS based
    345   9992    Anders  * endpoints. This routine is called by tcp_fuse_output() for handling
    346   9992    Anders  * non-M_DATA mblks.
    347    741  masputra  */
    348    741  masputra void
    349    741  masputra tcp_fuse_output_urg(tcp_t *tcp, mblk_t *mp)
    350    741  masputra {
    351    741  masputra 	mblk_t *mp1;
    352    741  masputra 	struct T_exdata_ind *tei;
    353    741  masputra 	tcp_t *peer_tcp = tcp->tcp_loopback_peer;
    354    741  masputra 	mblk_t *head, *prev_head = NULL;
    355   3448  dh155122 	tcp_stack_t	*tcps = tcp->tcp_tcps;
    356    741  masputra 
    357    741  masputra 	ASSERT(tcp->tcp_fused);
    358   9993    Anders 	ASSERT(peer_tcp != NULL && peer_tcp->tcp_loopback_peer == tcp);
    359   9992    Anders 	ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
    360    741  masputra 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
    361    741  masputra 	ASSERT(mp->b_cont != NULL && DB_TYPE(mp->b_cont) == M_DATA);
    362    741  masputra 	ASSERT(MBLKL(mp) >= sizeof (*tei) && MBLKL(mp->b_cont) > 0);
    363    741  masputra 
    364    741  masputra 	/*
    365    741  masputra 	 * Urgent data arrives in the form of T_EXDATA_REQ from above.
    366    741  masputra 	 * Each occurence denotes a new urgent pointer.  For each new
    367    741  masputra 	 * urgent pointer we signal (SIGURG) the receiving app to indicate
    368    741  masputra 	 * that it needs to go into urgent mode.  This is similar to the
    369    741  masputra 	 * urgent data handling in the regular tcp.  We don't need to keep
    370    741  masputra 	 * track of where the urgent pointer is, because each T_EXDATA_REQ
    371    741  masputra 	 * "advances" the urgent pointer for us.
    372    741  masputra 	 *
    373    741  masputra 	 * The actual urgent data carried by T_EXDATA_REQ is then prepended
    374    741  masputra 	 * by a T_EXDATA_IND before being enqueued behind any existing data
    375    741  masputra 	 * destined for the receiving app.  There is only a single urgent
    376    741  masputra 	 * pointer (out-of-band mark) for a given tcp.  If the new urgent
    377    741  masputra 	 * data arrives before the receiving app reads some existing urgent
    378    741  masputra 	 * data, the previous marker is lost.  This behavior is emulated
    379    741  masputra 	 * accordingly below, by removing any existing T_EXDATA_IND messages
    380    741  masputra 	 * and essentially converting old urgent data into non-urgent.
    381    741  masputra 	 */
    382    741  masputra 	ASSERT(tcp->tcp_valid_bits & TCP_URG_VALID);
    383    741  masputra 	/* Let sender get out of urgent mode */
    384    741  masputra 	tcp->tcp_valid_bits &= ~TCP_URG_VALID;
    385    741  masputra 
    386    741  masputra 	/*
    387    741  masputra 	 * This flag indicates that a signal needs to be sent up.
    388    741  masputra 	 * This flag will only get cleared once SIGURG is delivered and
    389    741  masputra 	 * is not affected by the tcp_fused flag -- delivery will still
    390    741  masputra 	 * happen even after an endpoint is unfused, to handle the case
    391    741  masputra 	 * where the sending endpoint immediately closes/unfuses after
    392    741  masputra 	 * sending urgent data and the accept is not yet finished.
    393    741  masputra 	 */
    394    741  masputra 	peer_tcp->tcp_fused_sigurg = B_TRUE;
    395    741  masputra 
    396    741  masputra 	/* Reuse T_EXDATA_REQ mblk for T_EXDATA_IND */
    397    741  masputra 	DB_TYPE(mp) = M_PROTO;
    398    741  masputra 	tei = (struct T_exdata_ind *)mp->b_rptr;
    399    741  masputra 	tei->PRIM_type = T_EXDATA_IND;
    400    741  masputra 	tei->MORE_flag = 0;
    401    741  masputra 	mp->b_wptr = (uchar_t *)&tei[1];
    402    741  masputra 
    403   3448  dh155122 	TCP_STAT(tcps, tcp_fusion_urg);
    404   3448  dh155122 	BUMP_MIB(&tcps->tcps_mib, tcpOutUrg);
    405    741  masputra 
    406    741  masputra 	head = peer_tcp->tcp_rcv_list;
    407    741  masputra 	while (head != NULL) {
    408    741  masputra 		/*
    409    741  masputra 		 * Remove existing T_EXDATA_IND, keep the data which follows
    410    741  masputra 		 * it and relink our list.  Note that we don't modify the
    411    741  masputra 		 * tcp_rcv_last_tail since it never points to T_EXDATA_IND.
    412    741  masputra 		 */
    413    741  masputra 		if (DB_TYPE(head) != M_DATA) {
    414    741  masputra 			mp1 = head;
    415    741  masputra 
    416    741  masputra 			ASSERT(DB_TYPE(mp1->b_cont) == M_DATA);
    417    741  masputra 			head = mp1->b_cont;
    418    741  masputra 			mp1->b_cont = NULL;
    419    741  masputra 			head->b_next = mp1->b_next;
    420    741  masputra 			mp1->b_next = NULL;
    421    741  masputra 			if (prev_head != NULL)
    422    741  masputra 				prev_head->b_next = head;
    423    741  masputra 			if (peer_tcp->tcp_rcv_list == mp1)
    424    741  masputra 				peer_tcp->tcp_rcv_list = head;
    425    741  masputra 			if (peer_tcp->tcp_rcv_last_head == mp1)
    426    741  masputra 				peer_tcp->tcp_rcv_last_head = head;
    427    741  masputra 			freeb(mp1);
    428    741  masputra 		}
    429    741  masputra 		prev_head = head;
    430    741  masputra 		head = head->b_next;
    431    741  masputra 	}
    432    741  masputra }
    433    741  masputra 
    434    741  masputra /*
    435    741  masputra  * Fusion output routine, called by tcp_output() and tcp_wput_proto().
    436   3429  vi117747  * If we are modifying any member that can be changed outside the squeue,
    437   3429  vi117747  * like tcp_flow_stopped, we need to take tcp_non_sq_lock.
    438    741  masputra  */
    439    741  masputra boolean_t
    440    741  masputra tcp_fuse_output(tcp_t *tcp, mblk_t *mp, uint32_t send_size)
    441    741  masputra {
    442  11042      Erik 	conn_t		*connp = tcp->tcp_connp;
    443  11042      Erik 	tcp_t		*peer_tcp = tcp->tcp_loopback_peer;
    444  11042      Erik 	conn_t		*peer_connp = peer_tcp->tcp_connp;
    445  11042      Erik 	boolean_t	flow_stopped, peer_data_queued = B_FALSE;
    446  11042      Erik 	boolean_t	urgent = (DB_TYPE(mp) != M_DATA);
    447  11042      Erik 	boolean_t	push = B_TRUE;
    448  11042      Erik 	mblk_t		*mp1 = mp;
    449  11042      Erik 	uint_t		ip_hdr_len;
    450  11042      Erik 	uint32_t	recv_size = send_size;
    451   3448  dh155122 	tcp_stack_t	*tcps = tcp->tcp_tcps;
    452   3448  dh155122 	netstack_t	*ns = tcps->tcps_netstack;
    453   3448  dh155122 	ip_stack_t	*ipst = ns->netstack_ip;
    454  11042      Erik 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
    455  11042      Erik 	iaflags_t	ixaflags = connp->conn_ixa->ixa_flags;
    456  11042      Erik 	boolean_t	do_ipsec, hooks_out, hooks_in, ipobs_enabled;
    457    741  masputra 
    458    741  masputra 	ASSERT(tcp->tcp_fused);
    459    741  masputra 	ASSERT(peer_tcp != NULL && peer_tcp->tcp_loopback_peer == tcp);
    460  11042      Erik 	ASSERT(connp->conn_sqp == peer_connp->conn_sqp);
    461    741  masputra 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO ||
    462    741  masputra 	    DB_TYPE(mp) == M_PCPROTO);
    463    741  masputra 
    464    741  masputra 	if (send_size == 0) {
    465    741  masputra 		freemsg(mp);
    466    741  masputra 		return (B_TRUE);
    467    741  masputra 	}
    468    741  masputra 
    469    741  masputra 	/*
    470    741  masputra 	 * Handle urgent data; we either send up SIGURG to the peer now
    471    741  masputra 	 * or do it later when we drain, in case the peer is detached
    472    741  masputra 	 * or if we're short of memory for M_PCSIG mblk.
    473    741  masputra 	 */
    474    741  masputra 	if (urgent) {
    475    741  masputra 		tcp_fuse_output_urg(tcp, mp);
    476   2958  dr146992 
    477   2958  dr146992 		mp1 = mp->b_cont;
    478   2958  dr146992 	}
    479   2958  dr146992 
    480  11042      Erik 	/*
    481  11042      Erik 	 * Check that we are still using an IRE_LOCAL or IRE_LOOPBACK before
    482  11042      Erik 	 * further processes.
    483  11042      Erik 	 */
    484  11042      Erik 	if (!ip_output_verify_local(connp->conn_ixa))
    485  11042      Erik 		goto unfuse;
    486  11042      Erik 
    487  11042      Erik 	/*
    488  11042      Erik 	 * Build IP and TCP header in case we have something that needs the
    489  11042      Erik 	 * headers. Those cases are:
    490  11042      Erik 	 * 1. IPsec
    491  11042      Erik 	 * 2. IPobs
    492  11042      Erik 	 * 3. FW_HOOKS
    493  11042      Erik 	 *
    494  11042      Erik 	 * If tcp_xmit_mp() fails to dupb() the message, unfuse the connection
    495  11042      Erik 	 * and back to regular path.
    496  11042      Erik 	 */
    497  11042      Erik 	if (ixaflags & IXAF_IS_IPV4) {
    498  11042      Erik 		do_ipsec = (ixaflags & IXAF_IPSEC_SECURE) ||
    499  11042      Erik 		    CONN_INBOUND_POLICY_PRESENT(peer_connp, ipss);
    500  11042      Erik 
    501  11042      Erik 		hooks_out = HOOKS4_INTERESTED_LOOPBACK_OUT(ipst);
    502  11042      Erik 		hooks_in = HOOKS4_INTERESTED_LOOPBACK_IN(ipst);
    503  11042      Erik 		ipobs_enabled = (ipst->ips_ip4_observe.he_interested != 0);
    504  11042      Erik 	} else {
    505  11042      Erik 		do_ipsec = (ixaflags & IXAF_IPSEC_SECURE) ||
    506  11042      Erik 		    CONN_INBOUND_POLICY_PRESENT_V6(peer_connp, ipss);
    507  11042      Erik 
    508  11042      Erik 		hooks_out = HOOKS6_INTERESTED_LOOPBACK_OUT(ipst);
    509  11042      Erik 		hooks_in = HOOKS6_INTERESTED_LOOPBACK_IN(ipst);
    510  11042      Erik 		ipobs_enabled = (ipst->ips_ip6_observe.he_interested != 0);
    511  11042      Erik 	}
    512  11042      Erik 
    513  11042      Erik 	/* We do logical 'or' for efficiency */
    514  11042      Erik 	if (ipobs_enabled | do_ipsec | hooks_in | hooks_out) {
    515   2958  dr146992 		if ((mp1 = tcp_xmit_mp(tcp, mp1, tcp->tcp_mss, NULL, NULL,
    516   2958  dr146992 		    tcp->tcp_snxt, B_TRUE, NULL, B_FALSE)) == NULL)
    517   2958  dr146992 			/* If tcp_xmit_mp fails, use regular path */
    518   2958  dr146992 			goto unfuse;
    519   2958  dr146992 
    520   7828     Brian 		/*
    521  11042      Erik 		 * Leave all IP relevant processes to ip_output_process_local(),
    522  11042      Erik 		 * which handles IPsec, IPobs, and FW_HOOKS.
    523   7828     Brian 		 */
    524  11042      Erik 		mp1 = ip_output_process_local(mp1, connp->conn_ixa, hooks_out,
    525  11042      Erik 		    hooks_in, do_ipsec ? peer_connp : NULL);
    526   7828     Brian 
    527  11042      Erik 		/* If the message is dropped for any reason. */
    528   2958  dr146992 		if (mp1 == NULL)
    529   2958  dr146992 			goto unfuse;
    530   2958  dr146992 
    531   7828     Brian 		/*
    532  11042      Erik 		 * Data length might have been changed by FW_HOOKS.
    533  11042      Erik 		 * We assume that the first mblk contains the TCP/IP headers.
    534   7828     Brian 		 */
    535  11042      Erik 		if (hooks_in || hooks_out) {
    536  11042      Erik 			tcpha_t *tcpha;
    537  11042      Erik 
    538  11042      Erik 			ip_hdr_len = (ixaflags & IXAF_IS_IPV4) ?
    539  11042      Erik 			    IPH_HDR_LENGTH((ipha_t *)mp1->b_rptr) :
    540  11042      Erik 			    ip_hdr_length_v6(mp1, (ip6_t *)mp1->b_rptr);
    541  11042      Erik 
    542  11042      Erik 			tcpha = (tcpha_t *)&mp1->b_rptr[ip_hdr_len];
    543  11042      Erik 			ASSERT((uchar_t *)tcpha + sizeof (tcpha_t) <=
    544  11042      Erik 			    mp1->b_wptr);
    545  11042      Erik 			recv_size += htonl(tcpha->tha_seq) - tcp->tcp_snxt;
    546  11042      Erik 
    547   7828     Brian 		}
    548   2958  dr146992 
    549   2958  dr146992 		/*
    550   2958  dr146992 		 * The message duplicated by tcp_xmit_mp is freed.
    551   2958  dr146992 		 * Note: the original message passed in remains unchanged.
    552   2958  dr146992 		 */
    553   2958  dr146992 		freemsg(mp1);
    554    741  masputra 	}
    555    741  masputra 
    556    741  masputra 	/*
    557    741  masputra 	 * Enqueue data into the peer's receive list; we may or may not
    558    741  masputra 	 * drain the contents depending on the conditions below.
    559   8682    Anders 	 *
    560   9993    Anders 	 * For non-STREAMS sockets we normally queue data directly in the
    561   9993    Anders 	 * socket by calling the su_recv upcall. However, if the peer is
    562   9993    Anders 	 * detached we use tcp_rcv_enqueue() instead. Queued data will be
    563   9993    Anders 	 * drained when the accept completes (in tcp_accept_finish()).
    564    741  masputra 	 */
    565  11042      Erik 	if (IPCL_IS_NONSTR(peer_connp) &&
    566   9993    Anders 	    !TCP_IS_DETACHED(peer_tcp)) {
    567   8348      Eric 		int error;
    568   8348      Eric 		int flags = 0;
    569   8348      Eric 
    570   8348      Eric 		if ((tcp->tcp_valid_bits & TCP_URG_VALID) &&
    571   8348      Eric 		    (tcp->tcp_urg == tcp->tcp_snxt)) {
    572   8348      Eric 			flags = MSG_OOB;
    573  11042      Erik 			(*peer_connp->conn_upcalls->su_signal_oob)
    574  11042      Erik 			    (peer_connp->conn_upper_handle, 0);
    575   8348      Eric 			tcp->tcp_valid_bits &= ~TCP_URG_VALID;
    576   8348      Eric 		}
    577  11042      Erik 		if ((*peer_connp->conn_upcalls->su_recv)(
    578  11042      Erik 		    peer_connp->conn_upper_handle, mp, recv_size,
    579   9534    Anders 		    flags, &error, &push) < 0) {
    580   9534    Anders 			ASSERT(error != EOPNOTSUPP);
    581   9534    Anders 			peer_data_queued = B_TRUE;
    582   9534    Anders 		}
    583   8348      Eric 	} else {
    584  11042      Erik 		if (IPCL_IS_NONSTR(peer_connp) &&
    585   8348      Eric 		    (tcp->tcp_valid_bits & TCP_URG_VALID) &&
    586   8348      Eric 		    (tcp->tcp_urg == tcp->tcp_snxt)) {
    587   8348      Eric 			/*
    588   8348      Eric 			 * Can not deal with urgent pointers
    589   8348      Eric 			 * that arrive before the connection has been
    590   8348      Eric 			 * accept()ed.
    591   8348      Eric 			 */
    592   8348      Eric 			tcp->tcp_valid_bits &= ~TCP_URG_VALID;
    593   8348      Eric 			freemsg(mp);
    594   8348      Eric 			return (B_TRUE);
    595   8348      Eric 		}
    596   8348      Eric 
    597  11042      Erik 		tcp_rcv_enqueue(peer_tcp, mp, recv_size,
    598  11042      Erik 		    tcp->tcp_connp->conn_cred);
    599   9993    Anders 
    600   9993    Anders 		/* In case it wrapped around and also to keep it constant */
    601   9993    Anders 		peer_tcp->tcp_rwnd += recv_size;
    602   8348      Eric 	}
    603    741  masputra 
    604    741  masputra 	/*
    605    741  masputra 	 * Exercise flow-control when needed; we will get back-enabled
    606   9993    Anders 	 * in either tcp_accept_finish(), tcp_unfuse(), or when data is
    607   9993    Anders 	 * consumed. If peer endpoint is detached, we emulate streams flow
    608   9993    Anders 	 * control by checking the peer's queue size and high water mark;
    609   9993    Anders 	 * otherwise we simply use canputnext() to decide if we need to stop
    610   9993    Anders 	 * our flow.
    611    741  masputra 	 *
    612   9993    Anders 	 * Since we are accessing our tcp_flow_stopped and might modify it,
    613   9993    Anders 	 * we need to take tcp->tcp_non_sq_lock.
    614    741  masputra 	 */
    615   9993    Anders 	mutex_enter(&tcp->tcp_non_sq_lock);
    616    741  masputra 	flow_stopped = tcp->tcp_flow_stopped;
    617   9993    Anders 	if ((TCP_IS_DETACHED(peer_tcp) &&
    618  11042      Erik 	    (peer_tcp->tcp_rcv_cnt >= peer_connp->conn_rcvbuf)) ||
    619   9993    Anders 	    (!TCP_IS_DETACHED(peer_tcp) &&
    620  11042      Erik 	    !IPCL_IS_NONSTR(peer_connp) && !canputnext(peer_connp->conn_rq))) {
    621   4011      udpa 		peer_data_queued = B_TRUE;
    622   4011      udpa 	}
    623   4011      udpa 
    624   4011      udpa 	if (!flow_stopped && (peer_data_queued ||
    625  11042      Erik 	    (TCP_UNSENT_BYTES(tcp) >= connp->conn_sndbuf))) {
    626    741  masputra 		tcp_setqfull(tcp);
    627    741  masputra 		flow_stopped = B_TRUE;
    628   3448  dh155122 		TCP_STAT(tcps, tcp_fusion_flowctl);
    629   9993    Anders 		DTRACE_PROBE3(tcp__fuse__output__flowctl, tcp_t *, tcp,
    630   9993    Anders 		    uint_t, send_size, uint_t, peer_tcp->tcp_rcv_cnt);
    631   4011      udpa 	} else if (flow_stopped && !peer_data_queued &&
    632  11042      Erik 	    (TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat)) {
    633    741  masputra 		tcp_clrqfull(tcp);
    634   6970   ja97890 		TCP_STAT(tcps, tcp_fusion_backenabled);
    635   2578      meem 		flow_stopped = B_FALSE;
    636    741  masputra 	}
    637   3429  vi117747 	mutex_exit(&tcp->tcp_non_sq_lock);
    638   6970   ja97890 
    639   3448  dh155122 	ipst->ips_loopback_packets++;
    640    741  masputra 	tcp->tcp_last_sent_len = send_size;
    641    741  masputra 
    642    741  masputra 	/* Need to adjust the following SNMP MIB-related variables */
    643    741  masputra 	tcp->tcp_snxt += send_size;
    644    741  masputra 	tcp->tcp_suna = tcp->tcp_snxt;
    645   2958  dr146992 	peer_tcp->tcp_rnxt += recv_size;
    646    741  masputra 	peer_tcp->tcp_rack = peer_tcp->tcp_rnxt;
    647    741  masputra 
    648   3448  dh155122 	BUMP_MIB(&tcps->tcps_mib, tcpOutDataSegs);
    649   3448  dh155122 	UPDATE_MIB(&tcps->tcps_mib, tcpOutDataBytes, send_size);
    650    741  masputra 
    651   3448  dh155122 	BUMP_MIB(&tcps->tcps_mib, tcpInSegs);
    652   3448  dh155122 	BUMP_MIB(&tcps->tcps_mib, tcpInDataInorderSegs);
    653   3448  dh155122 	UPDATE_MIB(&tcps->tcps_mib, tcpInDataInorderBytes, send_size);
    654    741  masputra 
    655    741  masputra 	BUMP_LOCAL(tcp->tcp_obsegs);
    656    741  masputra 	BUMP_LOCAL(peer_tcp->tcp_ibsegs);
    657    741  masputra 
    658    741  masputra 	DTRACE_PROBE2(tcp__fuse__output, tcp_t *, tcp, uint_t, send_size);
    659    741  masputra 
    660   9992    Anders 	if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
    661   9992    Anders 	    !TCP_IS_DETACHED(peer_tcp)) {
    662    741  masputra 		/*
    663    741  masputra 		 * Drain the peer's receive queue it has urgent data or if
    664   9993    Anders 		 * we're not flow-controlled.
    665    741  masputra 		 */
    666   9993    Anders 		if (urgent || !flow_stopped) {
    667   9992    Anders 			ASSERT(peer_tcp->tcp_rcv_list != NULL);
    668   2504      meem 			/*
    669   2504      meem 			 * For TLI-based streams, a thread in tcp_accept_swap()
    670   2504      meem 			 * can race with us.  That thread will ensure that the
    671  11042      Erik 			 * correct peer_connp->conn_rq is globally visible
    672  11042      Erik 			 * before peer_tcp->tcp_detached is visible as clear,
    673  11042      Erik 			 * but we must also ensure that the load of conn_rq
    674  11042      Erik 			 * cannot be reordered to be before the tcp_detached
    675  11042      Erik 			 * check.
    676   2504      meem 			 */
    677   2504      meem 			membar_consumer();
    678  11042      Erik 			(void) tcp_fuse_rcv_drain(peer_connp->conn_rq, peer_tcp,
    679   2504      meem 			    NULL);
    680    741  masputra 		}
    681    741  masputra 	}
    682    741  masputra 	return (B_TRUE);
    683   2958  dr146992 unfuse:
    684   2958  dr146992 	tcp_unfuse(tcp);
    685   2958  dr146992 	return (B_FALSE);
    686    741  masputra }
    687    741  masputra 
    688    741  masputra /*
    689    741  masputra  * This routine gets called to deliver data upstream on a fused or
    690    741  masputra  * previously fused tcp loopback endpoint; the latter happens only
    691    741  masputra  * when there is a pending SIGURG signal plus urgent data that can't
    692    741  masputra  * be sent upstream in the past.
    693    741  masputra  */
    694    741  masputra boolean_t
    695    741  masputra tcp_fuse_rcv_drain(queue_t *q, tcp_t *tcp, mblk_t **sigurg_mpp)
    696    741  masputra {
    697    741  masputra 	mblk_t *mp;
    698   8348      Eric 	conn_t	*connp = tcp->tcp_connp;
    699   8348      Eric 
    700    741  masputra #ifdef DEBUG
    701    741  masputra 	uint_t cnt = 0;
    702    741  masputra #endif
    703   3448  dh155122 	tcp_stack_t	*tcps = tcp->tcp_tcps;
    704   6970   ja97890 	tcp_t		*peer_tcp = tcp->tcp_loopback_peer;
    705    741  masputra 
    706    741  masputra 	ASSERT(tcp->tcp_loopback);
    707    741  masputra 	ASSERT(tcp->tcp_fused || tcp->tcp_fused_sigurg);
    708    741  masputra 	ASSERT(!tcp->tcp_fused || tcp->tcp_loopback_peer != NULL);
    709   8348      Eric 	ASSERT(IPCL_IS_NONSTR(connp) || sigurg_mpp != NULL || tcp->tcp_fused);
    710    741  masputra 
    711    741  masputra 	/* No need for the push timer now, in case it was scheduled */
    712    741  masputra 	if (tcp->tcp_push_tid != 0) {
    713    741  masputra 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid);
    714    741  masputra 		tcp->tcp_push_tid = 0;
    715    741  masputra 	}
    716    741  masputra 	/*
    717    741  masputra 	 * If there's urgent data sitting in receive list and we didn't
    718    741  masputra 	 * get a chance to send up a SIGURG signal, make sure we send
    719    741  masputra 	 * it first before draining in order to ensure that SIOCATMARK
    720    741  masputra 	 * works properly.
    721    741  masputra 	 */
    722    741  masputra 	if (tcp->tcp_fused_sigurg) {
    723   9992    Anders 		ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
    724   9992    Anders 
    725   8348      Eric 		tcp->tcp_fused_sigurg = B_FALSE;
    726   9992    Anders 		/*
    727   9992    Anders 		 * sigurg_mpp is normally NULL, i.e. when we're still
    728   9992    Anders 		 * fused and didn't get here because of tcp_unfuse().
    729   9992    Anders 		 * In this case try hard to allocate the M_PCSIG mblk.
    730   9992    Anders 		 */
    731   9992    Anders 		if (sigurg_mpp == NULL &&
    732   9992    Anders 		    (mp = allocb(1, BPRI_HI)) == NULL &&
    733   9992    Anders 		    (mp = allocb_tryhard(1)) == NULL) {
    734   9992    Anders 			/* Alloc failed; try again next time */
    735   9992    Anders 			tcp->tcp_push_tid = TCP_TIMER(tcp,
    736   9992    Anders 			    tcp_push_timer,
    737   9992    Anders 			    MSEC_TO_TICK(
    738   9992    Anders 			    tcps->tcps_push_timer_interval));
    739   9992    Anders 			return (B_TRUE);
    740   9992    Anders 		} else if (sigurg_mpp != NULL) {
    741    741  masputra 			/*
    742   9992    Anders 			 * Use the supplied M_PCSIG mblk; it means we're
    743   9992    Anders 			 * either unfused or in the process of unfusing,
    744   9992    Anders 			 * and the drain must happen now.
    745    741  masputra 			 */
    746   9992    Anders 			mp = *sigurg_mpp;
    747   9992    Anders 			*sigurg_mpp = NULL;
    748   9992    Anders 		}
    749   9992    Anders 		ASSERT(mp != NULL);
    750   8348      Eric 
    751   9992    Anders 		/* Send up the signal */
    752   9992    Anders 		DB_TYPE(mp) = M_PCSIG;
    753   9992    Anders 		*mp->b_wptr++ = (uchar_t)SIGURG;
    754   9992    Anders 		putnext(q, mp);
    755   9992    Anders 
    756    741  masputra 		/*
    757    741  masputra 		 * Let the regular tcp_rcv_drain() path handle
    758    741  masputra 		 * draining the data if we're no longer fused.
    759    741  masputra 		 */
    760    741  masputra 		if (!tcp->tcp_fused)
    761    741  masputra 			return (B_FALSE);
    762    741  masputra 	}
    763    741  masputra 
    764    741  masputra 	/* Drain the data */
    765    741  masputra 	while ((mp = tcp->tcp_rcv_list) != NULL) {
    766    741  masputra 		tcp->tcp_rcv_list = mp->b_next;
    767    741  masputra 		mp->b_next = NULL;
    768    741  masputra #ifdef DEBUG
    769    741  masputra 		cnt += msgdsize(mp);
    770    741  masputra #endif
    771   8348      Eric 		ASSERT(!IPCL_IS_NONSTR(connp));
    772   9993    Anders 		putnext(q, mp);
    773   9993    Anders 		TCP_STAT(tcps, tcp_fusion_putnext);
    774    741  masputra 	}
    775    741  masputra 
    776   8348      Eric #ifdef DEBUG
    777    741  masputra 	ASSERT(cnt == tcp->tcp_rcv_cnt);
    778   8348      Eric #endif
    779    741  masputra 	tcp->tcp_rcv_last_head = NULL;
    780    741  masputra 	tcp->tcp_rcv_last_tail = NULL;
    781    741  masputra 	tcp->tcp_rcv_cnt = 0;
    782  11042      Erik 	tcp->tcp_rwnd = tcp->tcp_connp->conn_rcvbuf;
    783   6970   ja97890 
    784   9993    Anders 	mutex_enter(&peer_tcp->tcp_non_sq_lock);
    785   6970   ja97890 	if (peer_tcp->tcp_flow_stopped && (TCP_UNSENT_BYTES(peer_tcp) <=
    786  11042      Erik 	    peer_tcp->tcp_connp->conn_sndlowat)) {
    787   6970   ja97890 		tcp_clrqfull(peer_tcp);
    788   6970   ja97890 		TCP_STAT(tcps, tcp_fusion_backenabled);
    789   6970   ja97890 	}
    790   9993    Anders 	mutex_exit(&peer_tcp->tcp_non_sq_lock);
    791    741  masputra 
    792    741  masputra 	return (B_TRUE);
    793    741  masputra }
    794    741  masputra 
    795    741  masputra /*
    796    741  masputra  * Calculate the size of receive buffer for a fused tcp endpoint.
    797    741  masputra  */
    798    741  masputra size_t
    799    741  masputra tcp_fuse_set_rcv_hiwat(tcp_t *tcp, size_t rwnd)
    800    741  masputra {
    801   3448  dh155122 	tcp_stack_t	*tcps = tcp->tcp_tcps;
    802   3448  dh155122 
    803    741  masputra 	ASSERT(tcp->tcp_fused);
    804    741  masputra 
    805    741  masputra 	/* Ensure that value is within the maximum upper bound */
    806   3448  dh155122 	if (rwnd > tcps->tcps_max_buf)
    807   3448  dh155122 		rwnd = tcps->tcps_max_buf;
    808    741  masputra 	/*
    809    741  masputra 	 * Round up to system page size in case SO_RCVBUF is modified
    810    741  masputra 	 * after SO_SNDBUF; the latter is also similarly rounded up.
    811    741  masputra 	 */
    812    741  masputra 	rwnd = P2ROUNDUP_TYPED(rwnd, PAGESIZE, size_t);
    813  10312       Rao 
    814  10312       Rao 	/*
    815  10312       Rao 	 * Record high water mark, this is used for flow-control
    816  10312       Rao 	 * purposes in tcp_fuse_output().
    817  10312       Rao 	 */
    818  11042      Erik 	tcp->tcp_connp->conn_rcvbuf = rwnd;
    819  11042      Erik 	tcp->tcp_rwnd = rwnd;
    820    741  masputra 	return (rwnd);
    821    741  masputra }
    822    741  masputra 
    823    741  masputra /*
    824    741  masputra  * Calculate the maximum outstanding unread data block for a fused tcp endpoint.
    825    741  masputra  */
    826    741  masputra int
    827  10312       Rao tcp_fuse_maxpsz(tcp_t *tcp)
    828    741  masputra {
    829    741  masputra 	tcp_t *peer_tcp = tcp->tcp_loopback_peer;
    830  11042      Erik 	conn_t *connp = tcp->tcp_connp;
    831  11042      Erik 	uint_t sndbuf = connp->conn_sndbuf;
    832    741  masputra 	uint_t maxpsz = sndbuf;
    833    741  masputra 
    834    741  masputra 	ASSERT(tcp->tcp_fused);
    835    741  masputra 	ASSERT(peer_tcp != NULL);
    836  11042      Erik 	ASSERT(peer_tcp->tcp_connp->conn_rcvbuf != 0);
    837    741  masputra 	/*
    838    741  masputra 	 * In the fused loopback case, we want the stream head to split
    839    741  masputra 	 * up larger writes into smaller chunks for a more accurate flow-
    840    741  masputra 	 * control accounting.  Our maxpsz is half of the sender's send
    841    741  masputra 	 * buffer or the receiver's receive buffer, whichever is smaller.
    842    741  masputra 	 * We round up the buffer to system page size due to the lack of
    843    741  masputra 	 * TCP MSS concept in Fusion.
    844    741  masputra 	 */
    845  11042      Erik 	if (maxpsz > peer_tcp->tcp_connp->conn_rcvbuf)
    846  11042      Erik 		maxpsz = peer_tcp->tcp_connp->conn_rcvbuf;
    847    741  masputra 	maxpsz = P2ROUNDUP_TYPED(maxpsz, PAGESIZE, uint_t) >> 1;
    848    741  masputra 
    849    741  masputra 	return (maxpsz);
    850    741  masputra }
    851   9534    Anders 
    852   9534    Anders /*
    853   9534    Anders  * Called to release flow control.
    854   9534    Anders  */
    855   9534    Anders void
    856   9534    Anders tcp_fuse_backenable(tcp_t *tcp)
    857   9534    Anders {
    858   9534    Anders 	tcp_t *peer_tcp = tcp->tcp_loopback_peer;
    859   9534    Anders 
    860   9534    Anders 	ASSERT(tcp->tcp_fused);
    861   9534    Anders 	ASSERT(peer_tcp != NULL && peer_tcp->tcp_fused);
    862   9534    Anders 	ASSERT(peer_tcp->tcp_loopback_peer == tcp);
    863   9534    Anders 	ASSERT(!TCP_IS_DETACHED(tcp));
    864   9534    Anders 	ASSERT(tcp->tcp_connp->conn_sqp ==
    865   9534    Anders 	    peer_tcp->tcp_connp->conn_sqp);
    866   9534    Anders 
    867   9534    Anders 	if (tcp->tcp_rcv_list != NULL)
    868  11042      Erik 		(void) tcp_fuse_rcv_drain(tcp->tcp_connp->conn_rq, tcp, NULL);
    869   9534    Anders 
    870   9993    Anders 	mutex_enter(&peer_tcp->tcp_non_sq_lock);
    871   9534    Anders 	if (peer_tcp->tcp_flow_stopped &&
    872   9534    Anders 	    (TCP_UNSENT_BYTES(peer_tcp) <=
    873  11042      Erik 	    peer_tcp->tcp_connp->conn_sndlowat)) {
    874   9534    Anders 		tcp_clrqfull(peer_tcp);
    875   9534    Anders 	}
    876   9534    Anders 	mutex_exit(&peer_tcp->tcp_non_sq_lock);
    877   9534    Anders 
    878   9534    Anders 	TCP_STAT(tcp->tcp_tcps, tcp_fusion_backenabled);
    879   9534    Anders }
    880