Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_INET_IP_IMPL_H
     27 #define	_INET_IP_IMPL_H
     28 
     29 /*
     30  * IP implementation private declarations.  These interfaces are
     31  * used to build the IP module and are not meant to be accessed
     32  * by any modules except IP itself.  They are undocumented and are
     33  * subject to change without notice.
     34  */
     35 
     36 #ifdef	__cplusplus
     37 extern "C" {
     38 #endif
     39 
     40 #ifdef _KERNEL
     41 
     42 #include <sys/sdt.h>
     43 #include <sys/dld.h>
     44 
     45 #define	IP_MOD_ID		5701
     46 
     47 #define	INET_NAME	"ip"
     48 
     49 #ifdef	_BIG_ENDIAN
     50 #define	IP_HDR_CSUM_TTL_ADJUST	256
     51 #define	IP_TCP_CSUM_COMP	IPPROTO_TCP
     52 #define	IP_UDP_CSUM_COMP	IPPROTO_UDP
     53 #else
     54 #define	IP_HDR_CSUM_TTL_ADJUST	1
     55 #define	IP_TCP_CSUM_COMP	(IPPROTO_TCP << 8)
     56 #define	IP_UDP_CSUM_COMP	(IPPROTO_UDP << 8)
     57 #endif
     58 
     59 #define	TCP_CHECKSUM_OFFSET	16
     60 #define	TCP_CHECKSUM_SIZE	2
     61 
     62 #define	UDP_CHECKSUM_OFFSET	6
     63 #define	UDP_CHECKSUM_SIZE	2
     64 
     65 #define	IPH_TCPH_CHECKSUMP(ipha, hlen)	\
     66 	((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + TCP_CHECKSUM_OFFSET)))
     67 
     68 #define	IPH_UDPH_CHECKSUMP(ipha, hlen)	\
     69 	((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + UDP_CHECKSUM_OFFSET)))
     70 
     71 #define	ILL_HCKSUM_CAPABLE(ill)		\
     72 	(((ill)->ill_capabilities & ILL_CAPAB_HCKSUM) != 0)
     73 /*
     74  * Macro that performs software checksum calculation on the IP header.
     75  */
     76 #define	IP_HDR_CKSUM(ipha, sum, v_hlen_tos_len, ttl_protocol) {		\
     77 	(sum) += (ttl_protocol) + (ipha)->ipha_ident +			\
     78 	    ((v_hlen_tos_len) >> 16) +					\
     79 	    ((v_hlen_tos_len) & 0xFFFF) +				\
     80 	    (ipha)->ipha_fragment_offset_and_flags;			\
     81 	(sum) = (((sum) & 0xFFFF) + ((sum) >> 16));			\
     82 	(sum) = ~((sum) + ((sum) >> 16));				\
     83 	(ipha)->ipha_hdr_checksum = (uint16_t)(sum);			\
     84 }
     85 
     86 #define	IS_IP_HDR_HWCKSUM(ipsec, mp, ill)				\
     87 	((!ipsec) && (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) &&		\
     88 	ILL_HCKSUM_CAPABLE(ill) && dohwcksum)
     89 
     90 /*
     91  * This macro acts as a wrapper around IP_CKSUM_XMIT_FAST, and it performs
     92  * several checks on the IRE and ILL (among other things) in order to see
     93  * whether or not hardware checksum offload is allowed for the outgoing
     94  * packet.  It assumes that the caller has held a reference to the IRE.
     95  */
     96 #define	IP_CKSUM_XMIT(ill, ire, mp, ihp, up, proto, start, end,		\
     97 	    max_frag, ipsec_len, pseudo) {				\
     98 	uint32_t _hck_flags;						\
     99 	/*								\
    100 	 * We offload checksum calculation to hardware when IPsec isn't	\
    101 	 * present and if fragmentation isn't required.  We also check	\
    102 	 * if M_DATA fastpath is safe to be used on the	corresponding	\
    103 	 * IRE; this check is performed without grabbing ire_lock but	\
    104 	 * instead by holding a reference to it.  This is sufficient	\
    105 	 * for IRE_CACHE; for IRE_BROADCAST on non-Ethernet links, the	\
    106 	 * DL_NOTE_FASTPATH_FLUSH indication could come up from the	\
    107 	 * driver and trigger the IRE (hence fp_mp) deletion.  This is	\
    108 	 * why only IRE_CACHE type is eligible for offload.		\
    109 	 *								\
    110 	 * The presense of IP options also forces the network stack to	\
    111 	 * calculate the checksum in software.  This is because:	\
    112 	 *								\
    113 	 * Wrap around: certain partial-checksum NICs (eri, ce) limit	\
    114 	 * the size of "start offset" width to 6-bit.  This effectively	\
    115 	 * sets the largest value of the offset to 64-bytes, starting	\
    116 	 * from the MAC header.  When the cumulative MAC and IP headers	\
    117 	 * exceed such limit, the offset will wrap around.  This causes	\
    118 	 * the checksum to be calculated at the wrong place.		\
    119 	 *								\
    120 	 * IPv4 source routing: none of the full-checksum capable NICs	\
    121 	 * is capable of correctly handling the	IPv4 source-routing	\
    122 	 * option for purposes of calculating the pseudo-header; the	\
    123 	 * actual destination is different from the destination in the	\
    124 	 * header which is that of the next-hop.  (This case may not be	\
    125 	 * true for NICs which can parse IPv6 extension headers, but	\
    126 	 * we choose to simplify the implementation by not offloading	\
    127 	 * checksum when they are present.)				\
    128 	 *								\
    129 	 */								\
    130 	if ((ill) != NULL && ILL_HCKSUM_CAPABLE(ill) &&			\
    131 	    !((ire)->ire_flags & RTF_MULTIRT) &&			\
    132 	    (!((ire)->ire_type & IRE_BROADCAST) ||			\
    133 	    (ill)->ill_type == IFT_ETHER) &&			\
    134 	    (ipsec_len) == 0 &&						\
    135 	    (((ire)->ire_ipversion == IPV4_VERSION &&			\
    136 	    (start) == IP_SIMPLE_HDR_LENGTH &&				\
    137 	    ((ire)->ire_nce != NULL &&					\
    138 	    (ire)->ire_nce->nce_fp_mp != NULL &&	\
    139 	    MBLKHEAD(mp) >= MBLKL((ire)->ire_nce->nce_fp_mp))) ||	\
    140 	    ((ire)->ire_ipversion == IPV6_VERSION &&			\
    141 	    (start) == IPV6_HDR_LEN &&					\
    142 	    (ire)->ire_nce->nce_fp_mp != NULL &&			\
    143 	    MBLKHEAD(mp) >= MBLKL((ire)->ire_nce->nce_fp_mp))) &&	\
    144 	    (max_frag) >= (uint_t)((end) + (ipsec_len)) &&		\
    145 	    dohwcksum) {						\
    146 		_hck_flags = (ill)->ill_hcksum_capab->ill_hcksum_txflags; \
    147 	} else {							\
    148 		_hck_flags = 0;						\
    149 	}								\
    150 	IP_CKSUM_XMIT_FAST((ire)->ire_ipversion, _hck_flags, mp, ihp,	\
    151 	    up, proto, start, end, pseudo);				\
    152 }
    153 
    154 /*
    155  * Based on the device capabilities, this macro either marks an outgoing
    156  * packet with hardware checksum offload information or calculate the
    157  * checksum in software.  If the latter is performed, the checksum field
    158  * of the dblk is cleared; otherwise it will be non-zero and contain the
    159  * necessary flag(s) for the driver.
    160  */
    161 #define	IP_CKSUM_XMIT_FAST(ipver, hck_flags, mp, ihp, up, proto, start,	\
    162 	    end, pseudo) {						\
    163 	uint32_t _sum;							\
    164 	/*								\
    165 	 * Underlying interface supports hardware checksum offload for	\
    166 	 * the payload; leave the payload checksum for the hardware to	\
    167 	 * calculate.  N.B: We only need to set up checksum info on the	\
    168 	 * first mblk.							\
    169 	 */								\
    170 	DB_CKSUMFLAGS(mp) = 0;						\
    171 	if (((ipver) == IPV4_VERSION &&					\
    172 	    ((hck_flags) & HCKSUM_INET_FULL_V4)) ||			\
    173 	    ((ipver) == IPV6_VERSION &&					\
    174 	    ((hck_flags) & HCKSUM_INET_FULL_V6))) {			\
    175 		/*							\
    176 		 * Hardware calculates pseudo-header, header and the	\
    177 		 * payload checksums, so clear the checksum field in	\
    178 		 * the protocol header.					\
    179 		 */							\
    180 		*(up) = 0;						\
    181 		DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;			\
    182 	} else if ((hck_flags) & HCKSUM_INET_PARTIAL)  {		\
    183 		/*							\
    184 		 * Partial checksum offload has been enabled.  Fill	\
    185 		 * the checksum field in the protocl header with the	\
    186 		 * pseudo-header checksum value.			\
    187 		 */							\
    188 		_sum = ((proto) == IPPROTO_UDP) ?			\
    189 		    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP;		\
    190 		_sum += *(up) + (pseudo);				\
    191 		_sum = (_sum & 0xFFFF) + (_sum >> 16);			\
    192 		*(up) = (_sum & 0xFFFF) + (_sum >> 16);			\
    193 		/*							\
    194 		 * Offsets are relative to beginning of IP header.	\
    195 		 */							\
    196 		DB_CKSUMSTART(mp) = (start);				\
    197 		DB_CKSUMSTUFF(mp) = ((proto) == IPPROTO_UDP) ?		\
    198 		    (start) + UDP_CHECKSUM_OFFSET :			\
    199 		    (start) + TCP_CHECKSUM_OFFSET;			\
    200 		DB_CKSUMEND(mp) = (end);				\
    201 		DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM;			\
    202 	} else {							\
    203 		/*							\
    204 		 * Software checksumming.				\
    205 		 */							\
    206 		_sum = ((proto) == IPPROTO_UDP) ?			\
    207 		    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP;		\
    208 		_sum += (pseudo);					\
    209 		_sum = IP_CSUM(mp, start, _sum);			\
    210 		*(up) = (uint16_t)(((proto) == IPPROTO_UDP) ?		\
    211 		    (_sum ? _sum : ~_sum) : _sum);			\
    212 	}								\
    213 	/*								\
    214 	 * Hardware supports IP header checksum offload; clear the	\
    215 	 * contents of IP header checksum field as expected by NIC.	\
    216 	 * Do this only if we offloaded either full or partial sum.	\
    217 	 */								\
    218 	if ((ipver) == IPV4_VERSION && DB_CKSUMFLAGS(mp) != 0 &&	\
    219 	    ((hck_flags) & HCKSUM_IPHDRCKSUM)) {			\
    220 		DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;			\
    221 		((ipha_t *)(ihp))->ipha_hdr_checksum = 0;		\
    222 	}								\
    223 }
    224 
    225 /*
    226  * Macro to inspect the checksum of a fully-reassembled incoming datagram.
    227  */
    228 #define	IP_CKSUM_RECV_REASS(hck_flags, off, pseudo, sum, err) {		\
    229 	(err) = B_FALSE;						\
    230 	if ((hck_flags) & HCK_FULLCKSUM) {				\
    231 		/*							\
    232 		 * The sum of all fragment checksums should		\
    233 		 * result in -0 (0xFFFF) or otherwise invalid.		\
    234 		 */							\
    235 		if ((sum) != 0xFFFF)					\
    236 			(err) = B_TRUE;					\
    237 	} else if ((hck_flags) & HCK_PARTIALCKSUM) {			\
    238 		(sum) += (pseudo);					\
    239 		(sum) = ((sum) & 0xFFFF) + ((sum) >> 16);		\
    240 		(sum) = ((sum) & 0xFFFF) + ((sum) >> 16);		\
    241 		if (~(sum) & 0xFFFF)					\
    242 			(err) = B_TRUE;					\
    243 	} else if (((sum) = IP_CSUM(mp, off, pseudo)) != 0) {		\
    244 		(err) = B_TRUE;						\
    245 	}								\
    246 }
    247 
    248 /*
    249  * This macro inspects an incoming packet to see if the checksum value
    250  * contained in it is valid; if the hardware has provided the information,
    251  * the value is verified, otherwise it performs software checksumming.
    252  * The checksum value is returned to caller.
    253  */
    254 #define	IP_CKSUM_RECV(hck_flags, sum, cksum_start, ulph_off, mp, mp1, err) { \
    255 	int32_t _len;							\
    256 									\
    257 	(err) = B_FALSE;						\
    258 	if ((hck_flags) & HCK_FULLCKSUM) {				\
    259 		/*							\
    260 		 * Full checksum has been computed by the hardware	\
    261 		 * and has been attached.  If the driver wants us to	\
    262 		 * verify the correctness of the attached value, in	\
    263 		 * order to protect against faulty hardware, compare	\
    264 		 * it against -0 (0xFFFF) to see if it's valid.		\
    265 		 */							\
    266 		(sum) = DB_CKSUM16(mp);					\
    267 		if (!((hck_flags) & HCK_FULLCKSUM_OK) && (sum) != 0xFFFF) \
    268 			(err) = B_TRUE;					\
    269 	} else if (((hck_flags) & HCK_PARTIALCKSUM) &&			\
    270 	    ((mp1) == NULL || (mp1)->b_cont == NULL) &&			\
    271 	    (ulph_off) >= DB_CKSUMSTART(mp) &&				\
    272 	    ((_len = (ulph_off) - DB_CKSUMSTART(mp)) & 1) == 0) {	\
    273 		uint32_t _adj;						\
    274 		/*							\
    275 		 * Partial checksum has been calculated by hardware	\
    276 		 * and attached to the packet; in addition, any		\
    277 		 * prepended extraneous data is even byte aligned,	\
    278 		 * and there are at most two mblks associated with	\
    279 		 * the packet.  If any such data exists, we adjust	\
    280 		 * the checksum; also take care any postpended data.	\
    281 		 */							\
    282 		IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, _len, _adj);	\
    283 		/*							\
    284 		 * One's complement subtract extraneous checksum	\
    285 		 */							\
    286 		(sum) += DB_CKSUM16(mp);				\
    287 		if (_adj >= (sum))					\
    288 			(sum) = ~(_adj - (sum)) & 0xFFFF;		\
    289 		else							\
    290 			(sum) -= _adj;					\
    291 		(sum) = ((sum) & 0xFFFF) + ((int)(sum) >> 16);		\
    292 		(sum) = ((sum) & 0xFFFF) + ((int)(sum) >> 16);		\
    293 		if (~(sum) & 0xFFFF)					\
    294 			(err) = B_TRUE;					\
    295 	} else if (((sum) = IP_CSUM(mp, ulph_off, sum)) != 0) {		\
    296 		(err) = B_TRUE;						\
    297 	}								\
    298 }
    299 
    300 /*
    301  * Macro to adjust a given checksum value depending on any prepended
    302  * or postpended data on the packet.  It expects the start offset to
    303  * begin at an even boundary and that the packet consists of at most
    304  * two mblks.
    305  */
    306 #define	IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj) {		\
    307 	/*								\
    308 	 * Prepended extraneous data; adjust checksum.			\
    309 	 */								\
    310 	if ((len) > 0)							\
    311 		(adj) = IP_BCSUM_PARTIAL(cksum_start, len, 0);		\
    312 	else								\
    313 		(adj) = 0;						\
    314 	/*								\
    315 	 * len is now the total length of mblk(s)			\
    316 	 */								\
    317 	(len) = MBLKL(mp);						\
    318 	if ((mp1) == NULL)						\
    319 		(mp1) = (mp);						\
    320 	else								\
    321 		(len) += MBLKL(mp1);					\
    322 	/*								\
    323 	 * Postpended extraneous data; adjust checksum.			\
    324 	 */								\
    325 	if (((len) = (DB_CKSUMEND(mp) - len)) > 0) {			\
    326 		uint32_t _pad;						\
    327 									\
    328 		_pad = IP_BCSUM_PARTIAL((mp1)->b_wptr, len, 0);		\
    329 		/*							\
    330 		 * If the postpended extraneous data was odd		\
    331 		 * byte aligned, swap resulting checksum bytes.		\
    332 		 */							\
    333 		if ((uintptr_t)(mp1)->b_wptr & 1)			\
    334 			(adj) += ((_pad << 8) & 0xFFFF) | (_pad >> 8);	\
    335 		else							\
    336 			(adj) += _pad;					\
    337 		(adj) = ((adj) & 0xFFFF) + ((int)(adj) >> 16);		\
    338 	}								\
    339 }
    340 
    341 #define	ILL_MDT_CAPABLE(ill)		\
    342 	(((ill)->ill_capabilities & ILL_CAPAB_MDT) != 0)
    343 
    344 /*
    345  * ioctl identifier and structure for Multidata Transmit update
    346  * private M_CTL communication from IP to ULP.
    347  */
    348 #define	MDT_IOC_INFO_UPDATE	(('M' << 8) + 1020)
    349 
    350 typedef struct ip_mdt_info_s {
    351 	uint_t	mdt_info_id;	/* MDT_IOC_INFO_UPDATE */
    352 	ill_mdt_capab_t	mdt_capab; /* ILL MDT capabilities */
    353 } ip_mdt_info_t;
    354 
    355 /*
    356  * Macro that determines whether or not a given ILL is allowed for MDT.
    357  */
    358 #define	ILL_MDT_USABLE(ill)						\
    359 	(ILL_MDT_CAPABLE(ill) &&					\
    360 	ill->ill_mdt_capab != NULL &&					\
    361 	ill->ill_mdt_capab->ill_mdt_version == MDT_VERSION_2 &&		\
    362 	ill->ill_mdt_capab->ill_mdt_on != 0)
    363 
    364 #define	ILL_LSO_CAPABLE(ill)		\
    365 	(((ill)->ill_capabilities & ILL_CAPAB_DLD_LSO) != 0)
    366 
    367 /*
    368  * ioctl identifier and structure for Large Segment Offload
    369  * private M_CTL communication from IP to ULP.
    370  */
    371 #define	LSO_IOC_INFO_UPDATE	(('L' << 24) + ('S' << 16) + ('O' << 8))
    372 
    373 typedef struct ip_lso_info_s {
    374 	uint_t	lso_info_id;	/* LSO_IOC_INFO_UPDATE */
    375 	ill_lso_capab_t	lso_capab; /* ILL LSO capabilities */
    376 } ip_lso_info_t;
    377 
    378 /*
    379  * Macro that determines whether or not a given ILL is allowed for LSO.
    380  */
    381 #define	ILL_LSO_USABLE(ill)						\
    382 	(ILL_LSO_CAPABLE(ill) &&					\
    383 	ill->ill_lso_capab != NULL &&					\
    384 	ill->ill_lso_capab->ill_lso_on != 0)
    385 
    386 #define	ILL_LSO_TCP_USABLE(ill)						\
    387 	(ILL_LSO_USABLE(ill) &&						\
    388 	ill->ill_lso_capab->ill_lso_flags & DLD_LSO_TX_BASIC_TCP_IPV4)
    389 
    390 /*
    391  * Macro that determines whether or not a given CONN may be considered
    392  * for fast path prior to proceeding further with LSO or Multidata.
    393  */
    394 #define	CONN_IS_LSO_MD_FASTPATH(connp)	\
    395 	((connp)->conn_dontroute == 0 &&	/* SO_DONTROUTE */	\
    396 	!((connp)->conn_nexthop_set) &&		/* IP_NEXTHOP */	\
    397 	(connp)->conn_outgoing_ill == NULL)	/* IP{V6}_BOUND_IF */
    398 
    399 /* Definitions for fragmenting IP packets using MDT. */
    400 
    401 /*
    402  * Smaller and private version of pdescinfo_t used specifically for IP,
    403  * which allows for only a single payload span per packet.
    404  */
    405 typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2)	ip_pdescinfo_t;
    406 
    407 /*
    408  * Macro version of ip_can_frag_mdt() which avoids the function call if we
    409  * only examine a single message block.
    410  */
    411 #define	IP_CAN_FRAG_MDT(mp, hdr_len, len)			\
    412 	(((mp)->b_cont == NULL) ?				\
    413 	(MBLKL(mp) >= ((hdr_len) + ip_wput_frag_mdt_min)) :	\
    414 	ip_can_frag_mdt((mp), (hdr_len), (len)))
    415 
    416 /*
    417  * Macro that determines whether or not a given IPC requires
    418  * outbound IPSEC processing.
    419  */
    420 #define	CONN_IPSEC_OUT_ENCAPSULATED(connp)	\
    421 	((connp)->conn_out_enforce_policy ||	\
    422 	((connp)->conn_latch != NULL &&		\
    423 	(connp)->conn_latch->ipl_out_policy != NULL))
    424 
    425 /*
    426  * Macro that checks whether or not a particular UDP conn is
    427  * flow-controlling on the read-side.
    428  *
    429  * Note that this check is done after the conn is found in
    430  * the UDP fanout table.
    431  */
    432 #define	CONN_UDP_FLOWCTLD(connp) !canputnext((connp)->conn_rq)
    433 
    434 /* Macro that follows definitions of flags for mac_tx() (see mac_client.h) */
    435 #define	IP_DROP_ON_NO_DESC	0x01	/* Equivalent to MAC_DROP_ON_NO_DESC */
    436 
    437 #define	ILL_DIRECT_CAPABLE(ill)						\
    438 	(((ill)->ill_capabilities & ILL_CAPAB_DLD_DIRECT) != 0)
    439 
    440 #define	ILL_SEND_TX(ill, ire, hint, mp, flag, connp) {			\
    441 	if (ILL_DIRECT_CAPABLE(ill) && DB_TYPE(mp) == M_DATA) {		\
    442 		ill_dld_direct_t *idd;					\
    443 		uintptr_t	cookie;					\
    444 		conn_t		*udp_connp = (conn_t *)connp;		\
    445 									\
    446 		idd = &(ill)->ill_dld_capab->idc_direct;		\
    447 		/*							\
    448 		 * Send the packet directly to DLD, where it		\
    449 		 * may be queued depending on the availability		\
    450 		 * of transmit resources at the media layer.		\
    451 		 * Ignore the returned value for the time being 	\
    452 		 * In future, we may want to take this into		\
    453 		 * account and flow control the TCP.			\
    454 		 */							\
    455 		cookie = idd->idd_tx_df(idd->idd_tx_dh, mp,		\
    456 		    (uintptr_t)(hint), flag);				\
    457 									\
    458 		/*							\
    459 		 * non-NULL cookie indicates flow control situation	\
    460 		 * and the cookie itself identifies this specific	\
    461 		 * Tx ring that is blocked. This cookie is used to	\
    462 		 * block the UDP conn that is sending packets over	\
    463 		 * this specific Tx ring.				\
    464 		 */							\
    465 		if ((cookie != NULL) && (udp_connp != NULL) &&		\
    466 		    (udp_connp->conn_ulp == IPPROTO_UDP)) {		\
    467 			idl_tx_list_t *idl_txl;				\
    468 			ip_stack_t *ipst;				\
    469 									\
    470 			/*						\
    471 			 * Flow controlled.				\
    472 			 */						\
    473 			DTRACE_PROBE2(ill__send__tx__cookie,		\
    474 			    uintptr_t, cookie, conn_t *, udp_connp);	\
    475 			ipst = udp_connp->conn_netstack->netstack_ip;	\
    476 			idl_txl =					\
    477 			    &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];\
    478 			mutex_enter(&idl_txl->txl_lock);		\
    479 			if (udp_connp->conn_direct_blocked ||		\
    480 			    (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh,	\
    481 			    cookie) == 0)) {				\
    482 				DTRACE_PROBE1(ill__tx__not__blocked,	\
    483 				    boolean,				\
    484 				    udp_connp->conn_direct_blocked);	\
    485 			} else if (idl_txl->txl_cookie != NULL &&	\
    486 			    idl_txl->txl_cookie != cookie) {		\
    487 				udp_t *udp = udp_connp->conn_udp;	\
    488 				udp_stack_t *us = udp->udp_us;		\
    489 									\
    490 				DTRACE_PROBE2(ill__send__tx__collision,	\
    491 				    uintptr_t, cookie,			\
    492 				    uintptr_t, idl_txl->txl_cookie);	\
    493 				UDP_STAT(us, udp_cookie_coll);		\
    494 			} else {					\
    495 				udp_connp->conn_direct_blocked = B_TRUE;\
    496 				idl_txl->txl_cookie = cookie;		\
    497 				conn_drain_insert(udp_connp, idl_txl);	\
    498 				DTRACE_PROBE1(ill__send__tx__insert,	\
    499 				    conn_t *, udp_connp);		\
    500 			}						\
    501 			mutex_exit(&idl_txl->txl_lock);			\
    502 		}							\
    503 	} else {							\
    504 		putnext((ire)->ire_stq, mp);				\
    505 	}								\
    506 }
    507 
    508 #define	MBLK_RX_FANOUT_SLOWPATH(mp, ipha)				\
    509 	(DB_TYPE(mp) != M_DATA || DB_REF(mp) != 1 || !OK_32PTR(ipha) || \
    510 	(((uchar_t *)ipha + IP_SIMPLE_HDR_LENGTH) >= (mp)->b_wptr))
    511 
    512 /*
    513  * In non-global zone exclusive IP stacks, data structures such as IRE
    514  * entries pretend that they're in the global zone.  The following
    515  * macro evaluates to the real zoneid instead of a pretend
    516  * GLOBAL_ZONEID.
    517  */
    518 #define	IP_REAL_ZONEID(zoneid, ipst)					\
    519 	(((zoneid) == GLOBAL_ZONEID) ?					\
    520 	    netstackid_to_zoneid((ipst)->ips_netstack->netstack_stackid) : \
    521 	    (zoneid))
    522 
    523 extern int	ip_wput_frag_mdt_min;
    524 extern boolean_t ip_can_frag_mdt(mblk_t *, ssize_t, ssize_t);
    525 extern mblk_t   *ip_prepend_zoneid(mblk_t *, zoneid_t, ip_stack_t *);
    526 extern void ill_flow_enable(void *, ip_mac_tx_cookie_t);
    527 extern zoneid_t	ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_stack_t *, zoneid_t);
    528 extern zoneid_t	ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *,
    529     ip_stack_t *, zoneid_t);
    530 
    531 /*
    532  * flag passed in by IP based protocols to get a private ip stream with
    533  * no conn_t. Note this flag has the same value as SO_FALLBACK
    534  */
    535 #define	IP_HELPER_STR	SO_FALLBACK
    536 
    537 #define	IP_MOD_MINPSZ	1
    538 #define	IP_MOD_MAXPSZ	INFPSZ
    539 #define	IP_MOD_HIWAT	65536
    540 #define	IP_MOD_LOWAT	1024
    541 
    542 #define	DEV_IP	"/devices/pseudo/ip@0:ip"
    543 #define	DEV_IP6	"/devices/pseudo/ip6@0:ip6"
    544 
    545 extern struct kmem_cache  *ip_helper_stream_cache;
    546 
    547 #endif	/* _KERNEL */
    548 
    549 #ifdef	__cplusplus
    550 }
    551 #endif
    552 
    553 #endif	/* _INET_IP_IMPL_H */
    554