Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_INET_TCP_IMPL_H
     27 #define	_INET_TCP_IMPL_H
     28 
     29 /*
     30  * TCP implementation private declarations.  These interfaces are
     31  * used to build the IP module and are not meant to be accessed
     32  * by any modules except IP itself.  They are undocumented and are
     33  * subject to change without notice.
     34  */
     35 
     36 #ifdef	__cplusplus
     37 extern "C" {
     38 #endif
     39 
     40 #ifdef _KERNEL
     41 
     42 #include <inet/tcp.h>
     43 
     44 #define	TCP_MOD_ID	5105
     45 
     46 /*
     47  * Was this tcp created via socket() interface?
     48  */
     49 #define	TCP_IS_SOCKET(tcp)	((tcp)->tcp_issocket)
     50 
     51 /*
     52  * Is this tcp not attached to any upper client?
     53  */
     54 #define	TCP_IS_DETACHED(tcp)	((tcp)->tcp_detached)
     55 
     56 #define	TCP_TIMER(tcp, f, tim)		\
     57 	tcp_timeout(tcp->tcp_connp, f, tim)
     58 #define	TCP_TIMER_CANCEL(tcp, id)	\
     59 	tcp_timeout_cancel(tcp->tcp_connp, id)
     60 
     61 /*
     62  * To restart the TCP retransmission timer.
     63  */
     64 #define	TCP_TIMER_RESTART(tcp, intvl) {					\
     65 	if ((tcp)->tcp_timer_tid != 0)					\
     66 		(void) TCP_TIMER_CANCEL((tcp), (tcp)->tcp_timer_tid);	\
     67 	(tcp)->tcp_timer_tid = TCP_TIMER((tcp), tcp_timer,		\
     68 	    MSEC_TO_TICK(intvl));					\
     69 }
     70 
     71 /*
     72  * This stops synchronous streams for a fused tcp endpoint
     73  * and prevents tcp_fuse_rrw() from pulling data from it.
     74  */
     75 #define	TCP_FUSE_SYNCSTR_STOP(tcp) {				\
     76 	if ((tcp)->tcp_direct_sockfs) {				\
     77 		mutex_enter(&(tcp)->tcp_non_sq_lock);		\
     78 		(tcp)->tcp_fuse_syncstr_stopped = B_TRUE;	\
     79 		mutex_exit(&(tcp)->tcp_non_sq_lock);		\
     80 	}							\
     81 }
     82 
     83 /*
     84  * This causes all calls to tcp_fuse_rrw() to block until
     85  * TCP_FUSE_SYNCSTR_UNPLUG_DRAIN() is called.
     86  */
     87 #define	TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp) {			\
     88 	if ((tcp)->tcp_direct_sockfs) {				\
     89 		mutex_enter(&(tcp)->tcp_non_sq_lock);		\
     90 		ASSERT(!(tcp)->tcp_fuse_syncstr_plugged);	\
     91 		(tcp)->tcp_fuse_syncstr_plugged = B_TRUE;	\
     92 		mutex_exit(&(tcp)->tcp_non_sq_lock);		\
     93 	}							\
     94 }
     95 
     96 /*
     97  * This unplugs the draining of data through tcp_fuse_rrw(); see
     98  * the comments in tcp_fuse_rrw() for how we preserve ordering.
     99  */
    100 #define	TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp) {			\
    101 	if ((tcp)->tcp_direct_sockfs) {				\
    102 		mutex_enter(&(tcp)->tcp_non_sq_lock);		\
    103 		(tcp)->tcp_fuse_syncstr_plugged = B_FALSE;	\
    104 		(void) cv_broadcast(&(tcp)->tcp_fuse_plugcv);	\
    105 		mutex_exit(&(tcp)->tcp_non_sq_lock);		\
    106 	}							\
    107 }
    108 
    109 /*
    110  * Before caching the conn IRE, we need to make sure certain TCP
    111  * states are in sync with the ire. The mismatch could occur if the
    112  * TCP state has been set in tcp_adapt_ire() using a different IRE,
    113  * e.g if an address was not present during an initial connect(),
    114  * tcp_adapt_ire() will set the state using the interface route.
    115  * Subsequently, if the address is added to the local machine, the
    116  * retransmitted SYN will get the correct (loopback) IRE, but the TCP
    117  * state (tcp_loopback and tcp_localnet) will remain out of sync.
    118  * This is especially an issue with TCP fusion which relies on the
    119  * TCP state to be accurate.
    120  *
    121  * This check/change should be made only if the TCP is not yet in
    122  * the established state, else it would lead to inconsistencies.
    123  */
    124 #define	TCP_CHECK_IREINFO(tcp, ire) {					\
    125 	if ((tcp)->tcp_state < TCPS_ESTABLISHED) {			\
    126 		if (((ire)->ire_type & (IRE_LOOPBACK | 			\
    127 		    IRE_LOCAL)) && !(tcp)->tcp_loopback) {		\
    128 			(tcp)->tcp_loopback = B_TRUE;			\
    129 		} else if ((tcp)->tcp_loopback && 			\
    130 		    !((ire)->ire_type & (IRE_LOOPBACK | IRE_LOCAL))) {	\
    131 			(tcp)->tcp_loopback = B_FALSE;			\
    132 		}							\
    133 		if ((tcp)->tcp_ipversion == IPV4_VERSION) {		\
    134 			(tcp)->tcp_localnet =				\
    135 			    ((ire)->ire_gateway_addr == 0);		\
    136 		} else {						\
    137 			(tcp)->tcp_localnet =				\
    138 			    IN6_IS_ADDR_UNSPECIFIED(			\
    139 			    &(ire)->ire_gateway_addr_v6);		\
    140 		}							\
    141 	}								\
    142 }
    143 
    144 /*
    145  * Write-side flow-control is implemented via the per instance STREAMS
    146  * write-side Q by explicitly setting QFULL to stop the flow of mblk_t(s)
    147  * and clearing QFULL and calling qbackenable() to restart the flow based
    148  * on the number of TCP unsent bytes (i.e. those not on the wire waiting
    149  * for a remote ACK).
    150  *
    151  * This is different than a standard STREAMS kmod which when using the
    152  * STREAMS Q the framework would automatictly flow-control based on the
    153  * defined hiwat/lowat values as mblk_t's are enqueued/dequeued.
    154  *
    155  * As of FireEngine TCP write-side flow-control needs to take into account
    156  * both the unsent tcp_xmit list bytes but also any squeue_t enqueued bytes
    157  * (i.e. from tcp_wput() -> tcp_output()).
    158  *
    159  * This is accomplished by adding a new tcp_t fields, tcp_squeue_bytes, to
    160  * count the number of bytes enqueued by tcp_wput() and the number of bytes
    161  * dequeued and processed by tcp_output().
    162  *
    163  * So, the total number of bytes unsent is (squeue_bytes + unsent) with all
    164  * flow-control uses of unsent replaced with the macro TCP_UNSENT_BYTES.
    165  */
    166 extern void	tcp_clrqfull(tcp_t *);
    167 extern void	tcp_setqfull(tcp_t *);
    168 
    169 #define	TCP_UNSENT_BYTES(tcp) \
    170 	((tcp)->tcp_squeue_bytes + (tcp)->tcp_unsent)
    171 
    172 /* Named Dispatch Parameter Management Structure */
    173 typedef struct tcpparam_s {
    174 	uint32_t	tcp_param_min;
    175 	uint32_t	tcp_param_max;
    176 	uint32_t	tcp_param_val;
    177 	char		*tcp_param_name;
    178 } tcpparam_t;
    179 
    180 
    181 #define	tcps_time_wait_interval		tcps_params[0].tcp_param_val
    182 #define	tcps_conn_req_max_q		tcps_params[1].tcp_param_val
    183 #define	tcps_conn_req_max_q0		tcps_params[2].tcp_param_val
    184 #define	tcps_conn_req_min		tcps_params[3].tcp_param_val
    185 #define	tcps_conn_grace_period		tcps_params[4].tcp_param_val
    186 #define	tcps_cwnd_max_			tcps_params[5].tcp_param_val
    187 #define	tcps_dbg			tcps_params[6].tcp_param_val
    188 #define	tcps_smallest_nonpriv_port	tcps_params[7].tcp_param_val
    189 #define	tcps_ip_abort_cinterval		tcps_params[8].tcp_param_val
    190 #define	tcps_ip_abort_linterval		tcps_params[9].tcp_param_val
    191 #define	tcps_ip_abort_interval		tcps_params[10].tcp_param_val
    192 #define	tcps_ip_notify_cinterval	tcps_params[11].tcp_param_val
    193 #define	tcps_ip_notify_interval		tcps_params[12].tcp_param_val
    194 #define	tcps_ipv4_ttl			tcps_params[13].tcp_param_val
    195 #define	tcps_keepalive_interval_high	tcps_params[14].tcp_param_max
    196 #define	tcps_keepalive_interval		tcps_params[14].tcp_param_val
    197 #define	tcps_keepalive_interval_low	tcps_params[14].tcp_param_min
    198 #define	tcps_maxpsz_multiplier		tcps_params[15].tcp_param_val
    199 #define	tcps_mss_def_ipv4		tcps_params[16].tcp_param_val
    200 #define	tcps_mss_max_ipv4		tcps_params[17].tcp_param_val
    201 #define	tcps_mss_min			tcps_params[18].tcp_param_val
    202 #define	tcps_naglim_def			tcps_params[19].tcp_param_val
    203 #define	tcps_rexmit_interval_initial	tcps_params[20].tcp_param_val
    204 #define	tcps_rexmit_interval_max	tcps_params[21].tcp_param_val
    205 #define	tcps_rexmit_interval_min	tcps_params[22].tcp_param_val
    206 #define	tcps_deferred_ack_interval	tcps_params[23].tcp_param_val
    207 #define	tcps_snd_lowat_fraction		tcps_params[24].tcp_param_val
    208 #define	tcps_sth_rcv_hiwat		tcps_params[25].tcp_param_val
    209 #define	__tcps_not_used1		tcps_params[26].tcp_param_val
    210 #define	tcps_dupack_fast_retransmit	tcps_params[27].tcp_param_val
    211 #define	tcps_ignore_path_mtu		tcps_params[28].tcp_param_val
    212 #define	tcps_smallest_anon_port		tcps_params[29].tcp_param_val
    213 #define	tcps_largest_anon_port		tcps_params[30].tcp_param_val
    214 #define	tcps_xmit_hiwat			tcps_params[31].tcp_param_val
    215 #define	tcps_xmit_lowat			tcps_params[32].tcp_param_val
    216 #define	tcps_recv_hiwat			tcps_params[33].tcp_param_val
    217 #define	tcps_recv_hiwat_minmss		tcps_params[34].tcp_param_val
    218 #define	tcps_fin_wait_2_flush_interval	tcps_params[35].tcp_param_val
    219 #define	tcps_max_buf			tcps_params[36].tcp_param_val
    220 #define	tcps_strong_iss			tcps_params[37].tcp_param_val
    221 #define	tcps_rtt_updates		tcps_params[38].tcp_param_val
    222 #define	tcps_wscale_always		tcps_params[39].tcp_param_val
    223 #define	tcps_tstamp_always		tcps_params[40].tcp_param_val
    224 #define	tcps_tstamp_if_wscale		tcps_params[41].tcp_param_val
    225 #define	tcps_rexmit_interval_extra	tcps_params[42].tcp_param_val
    226 #define	tcps_deferred_acks_max		tcps_params[43].tcp_param_val
    227 #define	tcps_slow_start_after_idle	tcps_params[44].tcp_param_val
    228 #define	tcps_slow_start_initial		tcps_params[45].tcp_param_val
    229 #define	tcps_sack_permitted		tcps_params[46].tcp_param_val
    230 #define	__tcps_not_used2		tcps_params[47].tcp_param_val
    231 #define	tcps_ipv6_hoplimit		tcps_params[48].tcp_param_val
    232 #define	tcps_mss_def_ipv6		tcps_params[49].tcp_param_val
    233 #define	tcps_mss_max_ipv6		tcps_params[50].tcp_param_val
    234 #define	tcps_rev_src_routes		tcps_params[51].tcp_param_val
    235 #define	tcps_local_dack_interval	tcps_params[52].tcp_param_val
    236 #define	tcps_ndd_get_info_interval	tcps_params[53].tcp_param_val
    237 #define	tcps_local_dacks_max		tcps_params[54].tcp_param_val
    238 #define	tcps_ecn_permitted		tcps_params[55].tcp_param_val
    239 #define	tcps_rst_sent_rate_enabled	tcps_params[56].tcp_param_val
    240 #define	tcps_rst_sent_rate		tcps_params[57].tcp_param_val
    241 #define	tcps_push_timer_interval	tcps_params[58].tcp_param_val
    242 #define	tcps_use_smss_as_mss_opt	tcps_params[59].tcp_param_val
    243 #define	tcps_keepalive_abort_interval_high	tcps_params[60].tcp_param_max
    244 #define	tcps_keepalive_abort_interval		tcps_params[60].tcp_param_val
    245 #define	tcps_keepalive_abort_interval_low	tcps_params[60].tcp_param_min
    246 
    247 extern struct qinit tcp_loopback_rinit, tcp_rinitv4, tcp_rinitv6;
    248 extern boolean_t do_tcp_fusion;
    249 
    250 extern int	tcp_maxpsz_set(tcp_t *, boolean_t);
    251 extern void	tcp_timers_stop(tcp_t *);
    252 extern void	tcp_rcv_enqueue(tcp_t *, mblk_t *, uint_t);
    253 extern void	tcp_push_timer(void *);
    254 extern timeout_id_t tcp_timeout(conn_t *, void (*)(void *), clock_t);
    255 extern clock_t	tcp_timeout_cancel(conn_t *, timeout_id_t);
    256 
    257 extern void	tcp_fuse(tcp_t *, uchar_t *, tcph_t *);
    258 extern void	tcp_unfuse(tcp_t *);
    259 extern boolean_t tcp_fuse_output(tcp_t *, mblk_t *, uint32_t);
    260 extern void	tcp_fuse_output_urg(tcp_t *, mblk_t *);
    261 extern boolean_t tcp_fuse_rcv_drain(queue_t *, tcp_t *, mblk_t **);
    262 extern void	tcp_fuse_syncstr_enable_pair(tcp_t *);
    263 extern void	tcp_fuse_disable_pair(tcp_t *, boolean_t);
    264 extern int	tcp_fuse_rrw(queue_t *, struiod_t *);
    265 extern int	tcp_fuse_rinfop(queue_t *, infod_t *);
    266 extern size_t	tcp_fuse_set_rcv_hiwat(tcp_t *, size_t);
    267 extern int	tcp_fuse_maxpsz_set(tcp_t *);
    268 
    269 /*
    270  * Object to represent database of options to search passed to
    271  * {sock,tpi}optcom_req() interface routine to take care of option
    272  * management and associated methods.
    273  */
    274 extern optdb_obj_t	tcp_opt_obj;
    275 extern uint_t		tcp_max_optsize;
    276 
    277 #endif	/* _KERNEL */
    278 
    279 #ifdef	__cplusplus
    280 }
    281 #endif
    282 
    283 #endif	/* _INET_TCP_IMPL_H */
    284