Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * IP PACKET CLASSIFIER
     28  *
     29  * The IP packet classifier provides mapping between IP packets and persistent
     30  * connection state for connection-oriented protocols. It also provides
     31  * interface for managing connection states.
     32  *
     33  * The connection state is kept in conn_t data structure and contains, among
     34  * other things:
     35  *
     36  *	o local/remote address and ports
     37  *	o Transport protocol
     38  *	o squeue for the connection (for TCP only)
     39  *	o reference counter
     40  *	o Connection state
     41  *	o hash table linkage
     42  *	o interface/ire information
     43  *	o credentials
     44  *	o ipsec policy
     45  *	o send and receive functions.
     46  *	o mutex lock.
     47  *
     48  * Connections use a reference counting scheme. They are freed when the
     49  * reference counter drops to zero. A reference is incremented when connection
     50  * is placed in a list or table, when incoming packet for the connection arrives
     51  * and when connection is processed via squeue (squeue processing may be
     52  * asynchronous and the reference protects the connection from being destroyed
     53  * before its processing is finished).
     54  *
     55  * conn_recv is used to pass up packets to the ULP.
     56  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
     57  * a listener, and changes to tcp_input_listener as the listener has picked a
     58  * good squeue. For other cases it is set to tcp_input_data.
     59  *
     60  * conn_recvicmp is used to pass up ICMP errors to the ULP.
     61  *
     62  * Classifier uses several hash tables:
     63  *
     64  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
     65  *	ipcl_bind_fanout:	contains all connections in BOUND state
     66  *	ipcl_proto_fanout:	IPv4 protocol fanout
     67  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
     68  *	ipcl_udp_fanout:	contains all UDP connections
     69  *	ipcl_iptun_fanout:	contains all IP tunnel connections
     70  *	ipcl_globalhash_fanout:	contains all connections
     71  *
     72  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
     73  * which need to view all existing connections.
     74  *
     75  * All tables are protected by per-bucket locks. When both per-bucket lock and
     76  * connection lock need to be held, the per-bucket lock should be acquired
     77  * first, followed by the connection lock.
     78  *
     79  * All functions doing search in one of these tables increment a reference
     80  * counter on the connection found (if any). This reference should be dropped
     81  * when the caller has finished processing the connection.
     82  *
     83  *
     84  * INTERFACES:
     85  * ===========
     86  *
     87  * Connection Lookup:
     88  * ------------------
     89  *
     90  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
     91  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
     92  *
     93  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
     94  * it can't find any associated connection. If the connection is found, its
     95  * reference counter is incremented.
     96  *
     97  *	mp:	mblock, containing packet header. The full header should fit
     98  *		into a single mblock. It should also contain at least full IP
     99  *		and TCP or UDP header.
    100  *
    101  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
    102  *
    103  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
    104  *		 the packet.
    105  *
    106  * 	ira->ira_zoneid: The zone in which the returned connection must be; the
    107  *		zoneid corresponding to the ire_zoneid on the IRE located for
    108  *		the packet's destination address.
    109  *
    110  *	ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
    111  *		IRAF_TX_SHARED_ADDR flags
    112  *
    113  *	For TCP connections, the lookup order is as follows:
    114  *		5-tuple {src, dst, protocol, local port, remote port}
    115  *			lookup in ipcl_conn_fanout table.
    116  *		3-tuple {dst, remote port, protocol} lookup in
    117  *			ipcl_bind_fanout table.
    118  *
    119  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
    120  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
    121  *	these interfaces do not handle cases where a packets belongs
    122  *	to multiple UDP clients, which is handled in IP itself.
    123  *
    124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
    125  * determine which actual zone gets the segment.  This is used only in a
    126  * labeled environment.  The matching rules are:
    127  *
    128  *	- If it's not a multilevel port, then the label on the packet selects
    129  *	  the zone.  Unlabeled packets are delivered to the global zone.
    130  *
    131  *	- If it's a multilevel port, then only the zone registered to receive
    132  *	  packets on that port matches.
    133  *
    134  * Also, in a labeled environment, packet labels need to be checked.  For fully
    135  * bound TCP connections, we can assume that the packet label was checked
    136  * during connection establishment, and doesn't need to be checked on each
    137  * packet.  For others, though, we need to check for strict equality or, for
    138  * multilevel ports, membership in the range or set.  This part currently does
    139  * a tnrh lookup on each packet, but could be optimized to use cached results
    140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
    141  * we would apply the same rules as TCP.)
    142  *
    143  * An implication of the above is that fully-bound TCP sockets must always use
    144  * distinct 4-tuples; they can't be discriminated by label alone.
    145  *
    146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
    147  * as there's no connection set-up handshake and no shared state.
    148  *
    149  * Labels on looped-back packets within a single zone do not need to be
    150  * checked, as all processes in the same zone have the same label.
    151  *
    152  * Finally, for unlabeled packets received by a labeled system, special rules
    153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
    154  * socket in the zone whose label matches the default label of the sender, if
    155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
    156  * receiver's label must dominate the sender's default label.
    157  *
    158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
    159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
    160  *					 ip_stack);
    161  *
    162  *	Lookup routine to find a exact match for {src, dst, local port,
    163  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
    164  *	ports are read from the IP and TCP header respectively.
    165  *
    166  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
    167  *					 zoneid, ip_stack);
    168  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
    169  *					 zoneid, ip_stack);
    170  *
    171  * 	Lookup routine to find a listener with the tuple {lport, laddr,
    172  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
    173  * 	parameter interface index is also compared.
    174  *
    175  * void ipcl_walk(func, arg, ip_stack)
    176  *
    177  * 	Apply 'func' to every connection available. The 'func' is called as
    178  *	(*func)(connp, arg). The walk is non-atomic so connections may be
    179  *	created and destroyed during the walk. The CONN_CONDEMNED and
    180  *	CONN_INCIPIENT flags ensure that connections which are newly created
    181  *	or being destroyed are not selected by the walker.
    182  *
    183  * Table Updates
    184  * -------------
    185  *
    186  * int ipcl_conn_insert(connp);
    187  * int ipcl_conn_insert_v4(connp);
    188  * int ipcl_conn_insert_v6(connp);
    189  *
    190  *	Insert 'connp' in the ipcl_conn_fanout.
    191  *	Arguements :
    192  *		connp		conn_t to be inserted
    193  *
    194  *	Return value :
    195  *		0		if connp was inserted
    196  *		EADDRINUSE	if the connection with the same tuple
    197  *				already exists.
    198  *
    199  * int ipcl_bind_insert(connp);
    200  * int ipcl_bind_insert_v4(connp);
    201  * int ipcl_bind_insert_v6(connp);
    202  *
    203  * 	Insert 'connp' in ipcl_bind_fanout.
    204  * 	Arguements :
    205  * 		connp		conn_t to be inserted
    206  *
    207  *
    208  * void ipcl_hash_remove(connp);
    209  *
    210  * 	Removes the 'connp' from the connection fanout table.
    211  *
    212  * Connection Creation/Destruction
    213  * -------------------------------
    214  *
    215  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
    216  *
    217  * 	Creates a new conn based on the type flag, inserts it into
    218  * 	globalhash table.
    219  *
    220  *	type:	This flag determines the type of conn_t which needs to be
    221  *		created i.e., which kmem_cache it comes from.
    222  *		IPCL_TCPCONN	indicates a TCP connection
    223  *		IPCL_SCTPCONN	indicates a SCTP connection
    224  *		IPCL_UDPCONN	indicates a UDP conn_t.
    225  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
    226  *		IPCL_RTSCONN	indicates a RTS conn_t.
    227  *		IPCL_IPCCONN	indicates all other connections.
    228  *
    229  * void ipcl_conn_destroy(connp)
    230  *
    231  * 	Destroys the connection state, removes it from the global
    232  * 	connection hash table and frees its memory.
    233  */
    234 
    235 #include <sys/types.h>
    236 #include <sys/stream.h>
    237 #include <sys/stropts.h>
    238 #include <sys/sysmacros.h>
    239 #include <sys/strsubr.h>
    240 #include <sys/strsun.h>
    241 #define	_SUN_TPI_VERSION 2
    242 #include <sys/ddi.h>
    243 #include <sys/cmn_err.h>
    244 #include <sys/debug.h>
    245 
    246 #include <sys/systm.h>
    247 #include <sys/param.h>
    248 #include <sys/kmem.h>
    249 #include <sys/isa_defs.h>
    250 #include <inet/common.h>
    251 #include <netinet/ip6.h>
    252 #include <netinet/icmp6.h>
    253 
    254 #include <inet/ip.h>
    255 #include <inet/ip_if.h>
    256 #include <inet/ip_ire.h>
    257 #include <inet/ip6.h>
    258 #include <inet/ip_ndp.h>
    259 #include <inet/ip_impl.h>
    260 #include <inet/udp_impl.h>
    261 #include <inet/sctp_ip.h>
    262 #include <inet/sctp/sctp_impl.h>
    263 #include <inet/rawip_impl.h>
    264 #include <inet/rts_impl.h>
    265 #include <inet/iptun/iptun_impl.h>
    266 
    267 #include <sys/cpuvar.h>
    268 
    269 #include <inet/ipclassifier.h>
    270 #include <inet/tcp.h>
    271 #include <inet/ipsec_impl.h>
    272 
    273 #include <sys/tsol/tnet.h>
    274 #include <sys/sockio.h>
    275 
    276 /* Old value for compatibility. Setable in /etc/system */
    277 uint_t tcp_conn_hash_size = 0;
    278 
    279 /* New value. Zero means choose automatically.  Setable in /etc/system */
    280 uint_t ipcl_conn_hash_size = 0;
    281 uint_t ipcl_conn_hash_memfactor = 8192;
    282 uint_t ipcl_conn_hash_maxsize = 82500;
    283 
    284 /* bind/udp fanout table size */
    285 uint_t ipcl_bind_fanout_size = 512;
    286 uint_t ipcl_udp_fanout_size = 16384;
    287 
    288 /* Raw socket fanout size.  Must be a power of 2. */
    289 uint_t ipcl_raw_fanout_size = 256;
    290 
    291 /*
    292  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
    293  * expect that most large deployments would have hundreds of tunnels, and
    294  * thousands in the extreme case.
    295  */
    296 uint_t ipcl_iptun_fanout_size = 6143;
    297 
    298 /*
    299  * Power of 2^N Primes useful for hashing for N of 0-28,
    300  * these primes are the nearest prime <= 2^N - 2^(N-2).
    301  */
    302 
    303 #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
    304 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
    305 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
    306 		50331599, 100663291, 201326557, 0}
    307 
    308 /*
    309  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
    310  * are aligned on cache lines.
    311  */
    312 typedef union itc_s {
    313 	conn_t	itc_conn;
    314 	char	itcu_filler[CACHE_ALIGN(conn_s)];
    315 } itc_t;
    316 
    317 struct kmem_cache  *tcp_conn_cache;
    318 struct kmem_cache  *ip_conn_cache;
    319 extern struct kmem_cache  *sctp_conn_cache;
    320 extern struct kmem_cache  *tcp_sack_info_cache;
    321 struct kmem_cache  *udp_conn_cache;
    322 struct kmem_cache  *rawip_conn_cache;
    323 struct kmem_cache  *rts_conn_cache;
    324 
    325 extern void	tcp_timermp_free(tcp_t *);
    326 extern mblk_t	*tcp_timermp_alloc(int);
    327 
    328 static int	ip_conn_constructor(void *, void *, int);
    329 static void	ip_conn_destructor(void *, void *);
    330 
    331 static int	tcp_conn_constructor(void *, void *, int);
    332 static void	tcp_conn_destructor(void *, void *);
    333 
    334 static int	udp_conn_constructor(void *, void *, int);
    335 static void	udp_conn_destructor(void *, void *);
    336 
    337 static int	rawip_conn_constructor(void *, void *, int);
    338 static void	rawip_conn_destructor(void *, void *);
    339 
    340 static int	rts_conn_constructor(void *, void *, int);
    341 static void	rts_conn_destructor(void *, void *);
    342 
    343 /*
    344  * Global (for all stack instances) init routine
    345  */
    346 void
    347 ipcl_g_init(void)
    348 {
    349 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
    350 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
    351 	    ip_conn_constructor, ip_conn_destructor,
    352 	    NULL, NULL, NULL, 0);
    353 
    354 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
    355 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
    356 	    tcp_conn_constructor, tcp_conn_destructor,
    357 	    tcp_conn_reclaim, NULL, NULL, 0);
    358 
    359 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
    360 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
    361 	    udp_conn_constructor, udp_conn_destructor,
    362 	    NULL, NULL, NULL, 0);
    363 
    364 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
    365 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
    366 	    rawip_conn_constructor, rawip_conn_destructor,
    367 	    NULL, NULL, NULL, 0);
    368 
    369 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
    370 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
    371 	    rts_conn_constructor, rts_conn_destructor,
    372 	    NULL, NULL, NULL, 0);
    373 }
    374 
    375 /*
    376  * ipclassifier intialization routine, sets up hash tables.
    377  */
    378 void
    379 ipcl_init(ip_stack_t *ipst)
    380 {
    381 	int i;
    382 	int sizes[] = P2Ps();
    383 
    384 	/*
    385 	 * Calculate size of conn fanout table from /etc/system settings
    386 	 */
    387 	if (ipcl_conn_hash_size != 0) {
    388 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
    389 	} else if (tcp_conn_hash_size != 0) {
    390 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
    391 	} else {
    392 		extern pgcnt_t freemem;
    393 
    394 		ipst->ips_ipcl_conn_fanout_size =
    395 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
    396 
    397 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
    398 			ipst->ips_ipcl_conn_fanout_size =
    399 			    ipcl_conn_hash_maxsize;
    400 		}
    401 	}
    402 
    403 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
    404 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
    405 			break;
    406 		}
    407 	}
    408 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
    409 		/* Out of range, use the 2^16 value */
    410 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
    411 	}
    412 
    413 	/* Take values from /etc/system */
    414 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
    415 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
    416 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
    417 	ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
    418 
    419 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
    420 
    421 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
    422 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
    423 
    424 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
    425 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
    426 		    MUTEX_DEFAULT, NULL);
    427 	}
    428 
    429 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
    430 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
    431 
    432 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
    433 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
    434 		    MUTEX_DEFAULT, NULL);
    435 	}
    436 
    437 	ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
    438 	    sizeof (connf_t), KM_SLEEP);
    439 	for (i = 0; i < IPPROTO_MAX; i++) {
    440 		mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
    441 		    MUTEX_DEFAULT, NULL);
    442 	}
    443 
    444 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
    445 	    sizeof (connf_t), KM_SLEEP);
    446 	for (i = 0; i < IPPROTO_MAX; i++) {
    447 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
    448 		    MUTEX_DEFAULT, NULL);
    449 	}
    450 
    451 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
    452 	mutex_init(&ipst->ips_rts_clients->connf_lock,
    453 	    NULL, MUTEX_DEFAULT, NULL);
    454 
    455 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
    456 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
    457 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
    458 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
    459 		    MUTEX_DEFAULT, NULL);
    460 	}
    461 
    462 	ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
    463 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
    464 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
    465 		mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
    466 		    MUTEX_DEFAULT, NULL);
    467 	}
    468 
    469 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
    470 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
    471 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
    472 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
    473 		    MUTEX_DEFAULT, NULL);
    474 	}
    475 
    476 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
    477 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
    478 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
    479 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
    480 		    NULL, MUTEX_DEFAULT, NULL);
    481 	}
    482 }
    483 
    484 void
    485 ipcl_g_destroy(void)
    486 {
    487 	kmem_cache_destroy(ip_conn_cache);
    488 	kmem_cache_destroy(tcp_conn_cache);
    489 	kmem_cache_destroy(udp_conn_cache);
    490 	kmem_cache_destroy(rawip_conn_cache);
    491 	kmem_cache_destroy(rts_conn_cache);
    492 }
    493 
    494 /*
    495  * All user-level and kernel use of the stack must be gone
    496  * by now.
    497  */
    498 void
    499 ipcl_destroy(ip_stack_t *ipst)
    500 {
    501 	int i;
    502 
    503 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
    504 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
    505 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
    506 	}
    507 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
    508 	    sizeof (connf_t));
    509 	ipst->ips_ipcl_conn_fanout = NULL;
    510 
    511 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
    512 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
    513 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
    514 	}
    515 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
    516 	    sizeof (connf_t));
    517 	ipst->ips_ipcl_bind_fanout = NULL;
    518 
    519 	for (i = 0; i < IPPROTO_MAX; i++) {
    520 		ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
    521 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
    522 	}
    523 	kmem_free(ipst->ips_ipcl_proto_fanout_v4,
    524 	    IPPROTO_MAX * sizeof (connf_t));
    525 	ipst->ips_ipcl_proto_fanout_v4 = NULL;
    526 
    527 	for (i = 0; i < IPPROTO_MAX; i++) {
    528 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
    529 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
    530 	}
    531 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
    532 	    IPPROTO_MAX * sizeof (connf_t));
    533 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
    534 
    535 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
    536 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
    537 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
    538 	}
    539 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
    540 	    sizeof (connf_t));
    541 	ipst->ips_ipcl_udp_fanout = NULL;
    542 
    543 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
    544 		ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
    545 		mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
    546 	}
    547 	kmem_free(ipst->ips_ipcl_iptun_fanout,
    548 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
    549 	ipst->ips_ipcl_iptun_fanout = NULL;
    550 
    551 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
    552 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
    553 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
    554 	}
    555 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
    556 	    sizeof (connf_t));
    557 	ipst->ips_ipcl_raw_fanout = NULL;
    558 
    559 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
    560 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
    561 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
    562 	}
    563 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
    564 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
    565 	ipst->ips_ipcl_globalhash_fanout = NULL;
    566 
    567 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
    568 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
    569 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
    570 	ipst->ips_rts_clients = NULL;
    571 }
    572 
    573 /*
    574  * conn creation routine. initialize the conn, sets the reference
    575  * and inserts it in the global hash table.
    576  */
    577 conn_t *
    578 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
    579 {
    580 	conn_t	*connp;
    581 	struct kmem_cache *conn_cache;
    582 
    583 	switch (type) {
    584 	case IPCL_SCTPCONN:
    585 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
    586 			return (NULL);
    587 		sctp_conn_init(connp);
    588 		netstack_hold(ns);
    589 		connp->conn_netstack = ns;
    590 		connp->conn_ixa->ixa_ipst = ns->netstack_ip;
    591 		ipcl_globalhash_insert(connp);
    592 		return (connp);
    593 
    594 	case IPCL_TCPCONN:
    595 		conn_cache = tcp_conn_cache;
    596 		break;
    597 
    598 	case IPCL_UDPCONN:
    599 		conn_cache = udp_conn_cache;
    600 		break;
    601 
    602 	case IPCL_RAWIPCONN:
    603 		conn_cache = rawip_conn_cache;
    604 		break;
    605 
    606 	case IPCL_RTSCONN:
    607 		conn_cache = rts_conn_cache;
    608 		break;
    609 
    610 	case IPCL_IPCCONN:
    611 		conn_cache = ip_conn_cache;
    612 		break;
    613 
    614 	default:
    615 		connp = NULL;
    616 		ASSERT(0);
    617 	}
    618 
    619 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
    620 		return (NULL);
    621 
    622 	connp->conn_ref = 1;
    623 	netstack_hold(ns);
    624 	connp->conn_netstack = ns;
    625 	connp->conn_ixa->ixa_ipst = ns->netstack_ip;
    626 	ipcl_globalhash_insert(connp);
    627 	return (connp);
    628 }
    629 
    630 void
    631 ipcl_conn_destroy(conn_t *connp)
    632 {
    633 	mblk_t	*mp;
    634 	netstack_t	*ns = connp->conn_netstack;
    635 
    636 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
    637 	ASSERT(connp->conn_ref == 0);
    638 
    639 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
    640 
    641 	if (connp->conn_cred != NULL) {
    642 		crfree(connp->conn_cred);
    643 		connp->conn_cred = NULL;
    644 	}
    645 
    646 	if (connp->conn_ht_iphc != NULL) {
    647 		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
    648 		connp->conn_ht_iphc = NULL;
    649 		connp->conn_ht_iphc_allocated = 0;
    650 		connp->conn_ht_iphc_len = 0;
    651 		connp->conn_ht_ulp = NULL;
    652 		connp->conn_ht_ulp_len = 0;
    653 	}
    654 	ip_pkt_free(&connp->conn_xmit_ipp);
    655 
    656 	ipcl_globalhash_remove(connp);
    657 
    658 	if (connp->conn_latch != NULL) {
    659 		IPLATCH_REFRELE(connp->conn_latch);
    660 		connp->conn_latch = NULL;
    661 	}
    662 	if (connp->conn_latch_in_policy != NULL) {
    663 		IPPOL_REFRELE(connp->conn_latch_in_policy);
    664 		connp->conn_latch_in_policy = NULL;
    665 	}
    666 	if (connp->conn_latch_in_action != NULL) {
    667 		IPACT_REFRELE(connp->conn_latch_in_action);
    668 		connp->conn_latch_in_action = NULL;
    669 	}
    670 	if (connp->conn_policy != NULL) {
    671 		IPPH_REFRELE(connp->conn_policy, ns);
    672 		connp->conn_policy = NULL;
    673 	}
    674 
    675 	if (connp->conn_ipsec_opt_mp != NULL) {
    676 		freemsg(connp->conn_ipsec_opt_mp);
    677 		connp->conn_ipsec_opt_mp = NULL;
    678 	}
    679 
    680 	if (connp->conn_flags & IPCL_TCPCONN) {
    681 		tcp_t *tcp = connp->conn_tcp;
    682 
    683 		tcp_free(tcp);
    684 		mp = tcp->tcp_timercache;
    685 
    686 		tcp->tcp_tcps = NULL;
    687 
    688 		if (tcp->tcp_sack_info != NULL) {
    689 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
    690 			kmem_cache_free(tcp_sack_info_cache,
    691 			    tcp->tcp_sack_info);
    692 		}
    693 
    694 		/*
    695 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
    696 		 * the mblk.
    697 		 */
    698 		if (tcp->tcp_rsrv_mp != NULL) {
    699 			freeb(tcp->tcp_rsrv_mp);
    700 			tcp->tcp_rsrv_mp = NULL;
    701 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
    702 		}
    703 
    704 		ipcl_conn_cleanup(connp);
    705 		connp->conn_flags = IPCL_TCPCONN;
    706 		if (ns != NULL) {
    707 			ASSERT(tcp->tcp_tcps == NULL);
    708 			connp->conn_netstack = NULL;
    709 			connp->conn_ixa->ixa_ipst = NULL;
    710 			netstack_rele(ns);
    711 		}
    712 
    713 		bzero(tcp, sizeof (tcp_t));
    714 
    715 		tcp->tcp_timercache = mp;
    716 		tcp->tcp_connp = connp;
    717 		kmem_cache_free(tcp_conn_cache, connp);
    718 		return;
    719 	}
    720 
    721 	if (connp->conn_flags & IPCL_SCTPCONN) {
    722 		ASSERT(ns != NULL);
    723 		sctp_free(connp);
    724 		return;
    725 	}
    726 
    727 	ipcl_conn_cleanup(connp);
    728 	if (ns != NULL) {
    729 		connp->conn_netstack = NULL;
    730 		connp->conn_ixa->ixa_ipst = NULL;
    731 		netstack_rele(ns);
    732 	}
    733 
    734 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
    735 	if (connp->conn_flags & IPCL_UDPCONN) {
    736 		connp->conn_flags = IPCL_UDPCONN;
    737 		kmem_cache_free(udp_conn_cache, connp);
    738 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
    739 		connp->conn_flags = IPCL_RAWIPCONN;
    740 		connp->conn_proto = IPPROTO_ICMP;
    741 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
    742 		kmem_cache_free(rawip_conn_cache, connp);
    743 	} else if (connp->conn_flags & IPCL_RTSCONN) {
    744 		connp->conn_flags = IPCL_RTSCONN;
    745 		kmem_cache_free(rts_conn_cache, connp);
    746 	} else {
    747 		connp->conn_flags = IPCL_IPCCONN;
    748 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
    749 		ASSERT(connp->conn_priv == NULL);
    750 		kmem_cache_free(ip_conn_cache, connp);
    751 	}
    752 }
    753 
    754 /*
    755  * Running in cluster mode - deregister listener information
    756  */
    757 static void
    758 ipcl_conn_unlisten(conn_t *connp)
    759 {
    760 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
    761 	ASSERT(connp->conn_lport != 0);
    762 
    763 	if (cl_inet_unlisten != NULL) {
    764 		sa_family_t	addr_family;
    765 		uint8_t		*laddrp;
    766 
    767 		if (connp->conn_ipversion == IPV6_VERSION) {
    768 			addr_family = AF_INET6;
    769 			laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
    770 		} else {
    771 			addr_family = AF_INET;
    772 			laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
    773 		}
    774 		(*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
    775 		    IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
    776 	}
    777 	connp->conn_flags &= ~IPCL_CL_LISTENER;
    778 }
    779 
    780 /*
    781  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
    782  * which table the conn belonged to). So for debugging we can see which hash
    783  * table this connection was in.
    784  */
    785 #define	IPCL_HASH_REMOVE(connp)	{					\
    786 	connf_t	*connfp = (connp)->conn_fanout;				\
    787 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
    788 	if (connfp != NULL) {						\
    789 		mutex_enter(&connfp->connf_lock);			\
    790 		if ((connp)->conn_next != NULL)				\
    791 			(connp)->conn_next->conn_prev =			\
    792 			    (connp)->conn_prev;				\
    793 		if ((connp)->conn_prev != NULL)				\
    794 			(connp)->conn_prev->conn_next =			\
    795 			    (connp)->conn_next;				\
    796 		else							\
    797 			connfp->connf_head = (connp)->conn_next;	\
    798 		(connp)->conn_fanout = NULL;				\
    799 		(connp)->conn_next = NULL;				\
    800 		(connp)->conn_prev = NULL;				\
    801 		(connp)->conn_flags |= IPCL_REMOVED;			\
    802 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
    803 			ipcl_conn_unlisten((connp));			\
    804 		CONN_DEC_REF((connp));					\
    805 		mutex_exit(&connfp->connf_lock);			\
    806 	}								\
    807 }
    808 
    809 void
    810 ipcl_hash_remove(conn_t *connp)
    811 {
    812 	uint8_t		protocol = connp->conn_proto;
    813 
    814 	IPCL_HASH_REMOVE(connp);
    815 	if (protocol == IPPROTO_RSVP)
    816 		ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
    817 }
    818 
    819 /*
    820  * The whole purpose of this function is allow removal of
    821  * a conn_t from the connected hash for timewait reclaim.
    822  * This is essentially a TW reclaim fastpath where timewait
    823  * collector checks under fanout lock (so no one else can
    824  * get access to the conn_t) that refcnt is 2 i.e. one for
    825  * TCP and one for the classifier hash list. If ref count
    826  * is indeed 2, we can just remove the conn under lock and
    827  * avoid cleaning up the conn under squeue. This gives us
    828  * improved performance.
    829  */
    830 void
    831 ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
    832 {
    833 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
    834 	ASSERT(MUTEX_HELD(&connp->conn_lock));
    835 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
    836 
    837 	if ((connp)->conn_next != NULL) {
    838 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
    839 	}
    840 	if ((connp)->conn_prev != NULL) {
    841 		(connp)->conn_prev->conn_next = (connp)->conn_next;
    842 	} else {
    843 		connfp->connf_head = (connp)->conn_next;
    844 	}
    845 	(connp)->conn_fanout = NULL;
    846 	(connp)->conn_next = NULL;
    847 	(connp)->conn_prev = NULL;
    848 	(connp)->conn_flags |= IPCL_REMOVED;
    849 	ASSERT((connp)->conn_ref == 2);
    850 	(connp)->conn_ref--;
    851 }
    852 
    853 #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
    854 	ASSERT((connp)->conn_fanout == NULL);				\
    855 	ASSERT((connp)->conn_next == NULL);				\
    856 	ASSERT((connp)->conn_prev == NULL);				\
    857 	if ((connfp)->connf_head != NULL) {				\
    858 		(connfp)->connf_head->conn_prev = (connp);		\
    859 		(connp)->conn_next = (connfp)->connf_head;		\
    860 	}								\
    861 	(connp)->conn_fanout = (connfp);				\
    862 	(connfp)->connf_head = (connp);					\
    863 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
    864 	    IPCL_CONNECTED;						\
    865 	CONN_INC_REF(connp);						\
    866 }
    867 
    868 #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
    869 	IPCL_HASH_REMOVE((connp));					\
    870 	mutex_enter(&(connfp)->connf_lock);				\
    871 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
    872 	mutex_exit(&(connfp)->connf_lock);				\
    873 }
    874 
    875 #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
    876 	conn_t *pconnp = NULL, *nconnp;					\
    877 	IPCL_HASH_REMOVE((connp));					\
    878 	mutex_enter(&(connfp)->connf_lock);				\
    879 	nconnp = (connfp)->connf_head;					\
    880 	while (nconnp != NULL &&					\
    881 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {		\
    882 		pconnp = nconnp;					\
    883 		nconnp = nconnp->conn_next;				\
    884 	}								\
    885 	if (pconnp != NULL) {						\
    886 		pconnp->conn_next = (connp);				\
    887 		(connp)->conn_prev = pconnp;				\
    888 	} else {							\
    889 		(connfp)->connf_head = (connp);				\
    890 	}								\
    891 	if (nconnp != NULL) {						\
    892 		(connp)->conn_next = nconnp;				\
    893 		nconnp->conn_prev = (connp);				\
    894 	}								\
    895 	(connp)->conn_fanout = (connfp);				\
    896 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
    897 	    IPCL_BOUND;							\
    898 	CONN_INC_REF(connp);						\
    899 	mutex_exit(&(connfp)->connf_lock);				\
    900 }
    901 
    902 #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
    903 	conn_t **list, *prev, *next;					\
    904 	boolean_t isv4mapped =						\
    905 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);		\
    906 	IPCL_HASH_REMOVE((connp));					\
    907 	mutex_enter(&(connfp)->connf_lock);				\
    908 	list = &(connfp)->connf_head;					\
    909 	prev = NULL;							\
    910 	while ((next = *list) != NULL) {				\
    911 		if (isv4mapped &&					\
    912 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&	\
    913 		    connp->conn_zoneid == next->conn_zoneid) {		\
    914 			(connp)->conn_next = next;			\
    915 			if (prev != NULL)				\
    916 				prev = next->conn_prev;			\
    917 			next->conn_prev = (connp);			\
    918 			break;						\
    919 		}							\
    920 		list = &next->conn_next;				\
    921 		prev = next;						\
    922 	}								\
    923 	(connp)->conn_prev = prev;					\
    924 	*list = (connp);						\
    925 	(connp)->conn_fanout = (connfp);				\
    926 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
    927 	    IPCL_BOUND;							\
    928 	CONN_INC_REF((connp));						\
    929 	mutex_exit(&(connfp)->connf_lock);				\
    930 }
    931 
    932 void
    933 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
    934 {
    935 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
    936 }
    937 
    938 /*
    939  * Because the classifier is used to classify inbound packets, the destination
    940  * address is meant to be our local tunnel address (tunnel source), and the
    941  * source the remote tunnel address (tunnel destination).
    942  *
    943  * Note that conn_proto can't be used for fanout since the upper protocol
    944  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
    945  */
    946 conn_t *
    947 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
    948 {
    949 	connf_t	*connfp;
    950 	conn_t	*connp;
    951 
    952 	/* first look for IPv4 tunnel links */
    953 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
    954 	mutex_enter(&connfp->connf_lock);
    955 	for (connp = connfp->connf_head; connp != NULL;
    956 	    connp = connp->conn_next) {
    957 		if (IPCL_IPTUN_MATCH(connp, *dst, *src))
    958 			break;
    959 	}
    960 	if (connp != NULL)
    961 		goto done;
    962 
    963 	mutex_exit(&connfp->connf_lock);
    964 
    965 	/* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
    966 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
    967 	    INADDR_ANY)];
    968 	mutex_enter(&connfp->connf_lock);
    969 	for (connp = connfp->connf_head; connp != NULL;
    970 	    connp = connp->conn_next) {
    971 		if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
    972 			break;
    973 	}
    974 done:
    975 	if (connp != NULL)
    976 		CONN_INC_REF(connp);
    977 	mutex_exit(&connfp->connf_lock);
    978 	return (connp);
    979 }
    980 
    981 conn_t *
    982 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
    983 {
    984 	connf_t	*connfp;
    985 	conn_t	*connp;
    986 
    987 	/* Look for an IPv6 tunnel link */
    988 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
    989 	mutex_enter(&connfp->connf_lock);
    990 	for (connp = connfp->connf_head; connp != NULL;
    991 	    connp = connp->conn_next) {
    992 		if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
    993 			CONN_INC_REF(connp);
    994 			break;
    995 		}
    996 	}
    997 	mutex_exit(&connfp->connf_lock);
    998 	return (connp);
    999 }
   1000 
   1001 /*
   1002  * This function is used only for inserting SCTP raw socket now.
   1003  * This may change later.
   1004  *
   1005  * Note that only one raw socket can be bound to a port.  The param
   1006  * lport is in network byte order.
   1007  */
   1008 static int
   1009 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
   1010 {
   1011 	connf_t	*connfp;
   1012 	conn_t	*oconnp;
   1013 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   1014 
   1015 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
   1016 
   1017 	/* Check for existing raw socket already bound to the port. */
   1018 	mutex_enter(&connfp->connf_lock);
   1019 	for (oconnp = connfp->connf_head; oconnp != NULL;
   1020 	    oconnp = oconnp->conn_next) {
   1021 		if (oconnp->conn_lport == lport &&
   1022 		    oconnp->conn_zoneid == connp->conn_zoneid &&
   1023 		    oconnp->conn_family == connp->conn_family &&
   1024 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
   1025 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
   1026 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
   1027 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
   1028 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
   1029 		    &connp->conn_laddr_v6))) {
   1030 			break;
   1031 		}
   1032 	}
   1033 	mutex_exit(&connfp->connf_lock);
   1034 	if (oconnp != NULL)
   1035 		return (EADDRNOTAVAIL);
   1036 
   1037 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
   1038 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
   1039 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
   1040 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
   1041 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1042 		} else {
   1043 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1044 		}
   1045 	} else {
   1046 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
   1047 	}
   1048 	return (0);
   1049 }
   1050 
   1051 static int
   1052 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
   1053 {
   1054 	connf_t	*connfp;
   1055 	conn_t	*tconnp;
   1056 	ipaddr_t laddr = connp->conn_laddr_v4;
   1057 	ipaddr_t faddr = connp->conn_faddr_v4;
   1058 
   1059 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
   1060 	mutex_enter(&connfp->connf_lock);
   1061 	for (tconnp = connfp->connf_head; tconnp != NULL;
   1062 	    tconnp = tconnp->conn_next) {
   1063 		if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
   1064 			/* A tunnel is already bound to these addresses. */
   1065 			mutex_exit(&connfp->connf_lock);
   1066 			return (EADDRINUSE);
   1067 		}
   1068 	}
   1069 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
   1070 	mutex_exit(&connfp->connf_lock);
   1071 	return (0);
   1072 }
   1073 
   1074 static int
   1075 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
   1076 {
   1077 	connf_t	*connfp;
   1078 	conn_t	*tconnp;
   1079 	in6_addr_t *laddr = &connp->conn_laddr_v6;
   1080 	in6_addr_t *faddr = &connp->conn_faddr_v6;
   1081 
   1082 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
   1083 	mutex_enter(&connfp->connf_lock);
   1084 	for (tconnp = connfp->connf_head; tconnp != NULL;
   1085 	    tconnp = tconnp->conn_next) {
   1086 		if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
   1087 			/* A tunnel is already bound to these addresses. */
   1088 			mutex_exit(&connfp->connf_lock);
   1089 			return (EADDRINUSE);
   1090 		}
   1091 	}
   1092 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
   1093 	mutex_exit(&connfp->connf_lock);
   1094 	return (0);
   1095 }
   1096 
   1097 /*
   1098  * Check for a MAC exemption conflict on a labeled system.  Note that for
   1099  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
   1100  * transport layer.  This check is for binding all other protocols.
   1101  *
   1102  * Returns true if there's a conflict.
   1103  */
   1104 static boolean_t
   1105 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
   1106 {
   1107 	connf_t	*connfp;
   1108 	conn_t *tconn;
   1109 
   1110 	connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
   1111 	mutex_enter(&connfp->connf_lock);
   1112 	for (tconn = connfp->connf_head; tconn != NULL;
   1113 	    tconn = tconn->conn_next) {
   1114 		/* We don't allow v4 fallback for v6 raw socket */
   1115 		if (connp->conn_family != tconn->conn_family)
   1116 			continue;
   1117 		/* If neither is exempt, then there's no conflict */
   1118 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
   1119 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
   1120 			continue;
   1121 		/* We are only concerned about sockets for a different zone */
   1122 		if (connp->conn_zoneid == tconn->conn_zoneid)
   1123 			continue;
   1124 		/* If both are bound to different specific addrs, ok */
   1125 		if (connp->conn_laddr_v4 != INADDR_ANY &&
   1126 		    tconn->conn_laddr_v4 != INADDR_ANY &&
   1127 		    connp->conn_laddr_v4 != tconn->conn_laddr_v4)
   1128 			continue;
   1129 		/* These two conflict; fail */
   1130 		break;
   1131 	}
   1132 	mutex_exit(&connfp->connf_lock);
   1133 	return (tconn != NULL);
   1134 }
   1135 
   1136 static boolean_t
   1137 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
   1138 {
   1139 	connf_t	*connfp;
   1140 	conn_t *tconn;
   1141 
   1142 	connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
   1143 	mutex_enter(&connfp->connf_lock);
   1144 	for (tconn = connfp->connf_head; tconn != NULL;
   1145 	    tconn = tconn->conn_next) {
   1146 		/* We don't allow v4 fallback for v6 raw socket */
   1147 		if (connp->conn_family != tconn->conn_family)
   1148 			continue;
   1149 		/* If neither is exempt, then there's no conflict */
   1150 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
   1151 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
   1152 			continue;
   1153 		/* We are only concerned about sockets for a different zone */
   1154 		if (connp->conn_zoneid == tconn->conn_zoneid)
   1155 			continue;
   1156 		/* If both are bound to different addrs, ok */
   1157 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
   1158 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
   1159 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
   1160 		    &tconn->conn_laddr_v6))
   1161 			continue;
   1162 		/* These two conflict; fail */
   1163 		break;
   1164 	}
   1165 	mutex_exit(&connfp->connf_lock);
   1166 	return (tconn != NULL);
   1167 }
   1168 
   1169 /*
   1170  * (v4, v6) bind hash insertion routines
   1171  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
   1172  */
   1173 
   1174 int
   1175 ipcl_bind_insert(conn_t *connp)
   1176 {
   1177 	if (connp->conn_ipversion == IPV6_VERSION)
   1178 		return (ipcl_bind_insert_v6(connp));
   1179 	else
   1180 		return (ipcl_bind_insert_v4(connp));
   1181 }
   1182 
   1183 int
   1184 ipcl_bind_insert_v4(conn_t *connp)
   1185 {
   1186 	connf_t	*connfp;
   1187 	int	ret = 0;
   1188 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   1189 	uint16_t	lport = connp->conn_lport;
   1190 	uint8_t		protocol = connp->conn_proto;
   1191 
   1192 	if (IPCL_IS_IPTUN(connp))
   1193 		return (ipcl_iptun_hash_insert(connp, ipst));
   1194 
   1195 	switch (protocol) {
   1196 	default:
   1197 		if (is_system_labeled() &&
   1198 		    check_exempt_conflict_v4(connp, ipst))
   1199 			return (EADDRINUSE);
   1200 		/* FALLTHROUGH */
   1201 	case IPPROTO_UDP:
   1202 		if (protocol == IPPROTO_UDP) {
   1203 			connfp = &ipst->ips_ipcl_udp_fanout[
   1204 			    IPCL_UDP_HASH(lport, ipst)];
   1205 		} else {
   1206 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
   1207 		}
   1208 
   1209 		if (connp->conn_faddr_v4 != INADDR_ANY) {
   1210 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
   1211 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
   1212 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1213 		} else {
   1214 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1215 		}
   1216 		if (protocol == IPPROTO_RSVP)
   1217 			ill_set_inputfn_all(ipst);
   1218 		break;
   1219 
   1220 	case IPPROTO_TCP:
   1221 		/* Insert it in the Bind Hash */
   1222 		ASSERT(connp->conn_zoneid != ALL_ZONES);
   1223 		connfp = &ipst->ips_ipcl_bind_fanout[
   1224 		    IPCL_BIND_HASH(lport, ipst)];
   1225 		if (connp->conn_laddr_v4 != INADDR_ANY) {
   1226 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1227 		} else {
   1228 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1229 		}
   1230 		if (cl_inet_listen != NULL) {
   1231 			ASSERT(connp->conn_ipversion == IPV4_VERSION);
   1232 			connp->conn_flags |= IPCL_CL_LISTENER;
   1233 			(*cl_inet_listen)(
   1234 			    connp->conn_netstack->netstack_stackid,
   1235 			    IPPROTO_TCP, AF_INET,
   1236 			    (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
   1237 		}
   1238 		break;
   1239 
   1240 	case IPPROTO_SCTP:
   1241 		ret = ipcl_sctp_hash_insert(connp, lport);
   1242 		break;
   1243 	}
   1244 
   1245 	return (ret);
   1246 }
   1247 
   1248 int
   1249 ipcl_bind_insert_v6(conn_t *connp)
   1250 {
   1251 	connf_t		*connfp;
   1252 	int		ret = 0;
   1253 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   1254 	uint16_t	lport = connp->conn_lport;
   1255 	uint8_t		protocol = connp->conn_proto;
   1256 
   1257 	if (IPCL_IS_IPTUN(connp)) {
   1258 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
   1259 	}
   1260 
   1261 	switch (protocol) {
   1262 	default:
   1263 		if (is_system_labeled() &&
   1264 		    check_exempt_conflict_v6(connp, ipst))
   1265 			return (EADDRINUSE);
   1266 		/* FALLTHROUGH */
   1267 	case IPPROTO_UDP:
   1268 		if (protocol == IPPROTO_UDP) {
   1269 			connfp = &ipst->ips_ipcl_udp_fanout[
   1270 			    IPCL_UDP_HASH(lport, ipst)];
   1271 		} else {
   1272 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
   1273 		}
   1274 
   1275 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
   1276 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
   1277 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
   1278 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1279 		} else {
   1280 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1281 		}
   1282 		break;
   1283 
   1284 	case IPPROTO_TCP:
   1285 		/* Insert it in the Bind Hash */
   1286 		ASSERT(connp->conn_zoneid != ALL_ZONES);
   1287 		connfp = &ipst->ips_ipcl_bind_fanout[
   1288 		    IPCL_BIND_HASH(lport, ipst)];
   1289 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
   1290 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1291 		} else {
   1292 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1293 		}
   1294 		if (cl_inet_listen != NULL) {
   1295 			sa_family_t	addr_family;
   1296 			uint8_t		*laddrp;
   1297 
   1298 			if (connp->conn_ipversion == IPV6_VERSION) {
   1299 				addr_family = AF_INET6;
   1300 				laddrp =
   1301 				    (uint8_t *)&connp->conn_bound_addr_v6;
   1302 			} else {
   1303 				addr_family = AF_INET;
   1304 				laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
   1305 			}
   1306 			connp->conn_flags |= IPCL_CL_LISTENER;
   1307 			(*cl_inet_listen)(
   1308 			    connp->conn_netstack->netstack_stackid,
   1309 			    IPPROTO_TCP, addr_family, laddrp, lport, NULL);
   1310 		}
   1311 		break;
   1312 
   1313 	case IPPROTO_SCTP:
   1314 		ret = ipcl_sctp_hash_insert(connp, lport);
   1315 		break;
   1316 	}
   1317 
   1318 	return (ret);
   1319 }
   1320 
   1321 /*
   1322  * ipcl_conn_hash insertion routines.
   1323  * The caller has already set conn_proto and the addresses/ports in the conn_t.
   1324  */
   1325 
   1326 int
   1327 ipcl_conn_insert(conn_t *connp)
   1328 {
   1329 	if (connp->conn_ipversion == IPV6_VERSION)
   1330 		return (ipcl_conn_insert_v6(connp));
   1331 	else
   1332 		return (ipcl_conn_insert_v4(connp));
   1333 }
   1334 
   1335 int
   1336 ipcl_conn_insert_v4(conn_t *connp)
   1337 {
   1338 	connf_t		*connfp;
   1339 	conn_t		*tconnp;
   1340 	int		ret = 0;
   1341 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   1342 	uint16_t	lport = connp->conn_lport;
   1343 	uint8_t		protocol = connp->conn_proto;
   1344 
   1345 	if (IPCL_IS_IPTUN(connp))
   1346 		return (ipcl_iptun_hash_insert(connp, ipst));
   1347 
   1348 	switch (protocol) {
   1349 	case IPPROTO_TCP:
   1350 		/*
   1351 		 * For TCP, we check whether the connection tuple already
   1352 		 * exists before allowing the connection to proceed.  We
   1353 		 * also allow indexing on the zoneid. This is to allow
   1354 		 * multiple shared stack zones to have the same tcp
   1355 		 * connection tuple. In practice this only happens for
   1356 		 * INADDR_LOOPBACK as it's the only local address which
   1357 		 * doesn't have to be unique.
   1358 		 */
   1359 		connfp = &ipst->ips_ipcl_conn_fanout[
   1360 		    IPCL_CONN_HASH(connp->conn_faddr_v4,
   1361 		    connp->conn_ports, ipst)];
   1362 		mutex_enter(&connfp->connf_lock);
   1363 		for (tconnp = connfp->connf_head; tconnp != NULL;
   1364 		    tconnp = tconnp->conn_next) {
   1365 			if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
   1366 			    connp->conn_faddr_v4, connp->conn_laddr_v4,
   1367 			    connp->conn_ports) &&
   1368 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
   1369 				/* Already have a conn. bail out */
   1370 				mutex_exit(&connfp->connf_lock);
   1371 				return (EADDRINUSE);
   1372 			}
   1373 		}
   1374 		if (connp->conn_fanout != NULL) {
   1375 			/*
   1376 			 * Probably a XTI/TLI application trying to do a
   1377 			 * rebind. Let it happen.
   1378 			 */
   1379 			mutex_exit(&connfp->connf_lock);
   1380 			IPCL_HASH_REMOVE(connp);
   1381 			mutex_enter(&connfp->connf_lock);
   1382 		}
   1383 
   1384 		ASSERT(connp->conn_recv != NULL);
   1385 		ASSERT(connp->conn_recvicmp != NULL);
   1386 
   1387 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
   1388 		mutex_exit(&connfp->connf_lock);
   1389 		break;
   1390 
   1391 	case IPPROTO_SCTP:
   1392 		/*
   1393 		 * The raw socket may have already been bound, remove it
   1394 		 * from the hash first.
   1395 		 */
   1396 		IPCL_HASH_REMOVE(connp);
   1397 		ret = ipcl_sctp_hash_insert(connp, lport);
   1398 		break;
   1399 
   1400 	default:
   1401 		/*
   1402 		 * Check for conflicts among MAC exempt bindings.  For
   1403 		 * transports with port numbers, this is done by the upper
   1404 		 * level per-transport binding logic.  For all others, it's
   1405 		 * done here.
   1406 		 */
   1407 		if (is_system_labeled() &&
   1408 		    check_exempt_conflict_v4(connp, ipst))
   1409 			return (EADDRINUSE);
   1410 		/* FALLTHROUGH */
   1411 
   1412 	case IPPROTO_UDP:
   1413 		if (protocol == IPPROTO_UDP) {
   1414 			connfp = &ipst->ips_ipcl_udp_fanout[
   1415 			    IPCL_UDP_HASH(lport, ipst)];
   1416 		} else {
   1417 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
   1418 		}
   1419 
   1420 		if (connp->conn_faddr_v4 != INADDR_ANY) {
   1421 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
   1422 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
   1423 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1424 		} else {
   1425 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1426 		}
   1427 		break;
   1428 	}
   1429 
   1430 	return (ret);
   1431 }
   1432 
   1433 int
   1434 ipcl_conn_insert_v6(conn_t *connp)
   1435 {
   1436 	connf_t		*connfp;
   1437 	conn_t		*tconnp;
   1438 	int		ret = 0;
   1439 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   1440 	uint16_t	lport = connp->conn_lport;
   1441 	uint8_t		protocol = connp->conn_proto;
   1442 	uint_t		ifindex = connp->conn_bound_if;
   1443 
   1444 	if (IPCL_IS_IPTUN(connp))
   1445 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
   1446 
   1447 	switch (protocol) {
   1448 	case IPPROTO_TCP:
   1449 
   1450 		/*
   1451 		 * For tcp, we check whether the connection tuple already
   1452 		 * exists before allowing the connection to proceed.  We
   1453 		 * also allow indexing on the zoneid. This is to allow
   1454 		 * multiple shared stack zones to have the same tcp
   1455 		 * connection tuple. In practice this only happens for
   1456 		 * ipv6_loopback as it's the only local address which
   1457 		 * doesn't have to be unique.
   1458 		 */
   1459 		connfp = &ipst->ips_ipcl_conn_fanout[
   1460 		    IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
   1461 		    ipst)];
   1462 		mutex_enter(&connfp->connf_lock);
   1463 		for (tconnp = connfp->connf_head; tconnp != NULL;
   1464 		    tconnp = tconnp->conn_next) {
   1465 			/* NOTE: need to match zoneid. Bug in onnv-gate */
   1466 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
   1467 			    connp->conn_faddr_v6, connp->conn_laddr_v6,
   1468 			    connp->conn_ports) &&
   1469 			    (tconnp->conn_bound_if == 0 ||
   1470 			    tconnp->conn_bound_if == ifindex) &&
   1471 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
   1472 				/* Already have a conn. bail out */
   1473 				mutex_exit(&connfp->connf_lock);
   1474 				return (EADDRINUSE);
   1475 			}
   1476 		}
   1477 		if (connp->conn_fanout != NULL) {
   1478 			/*
   1479 			 * Probably a XTI/TLI application trying to do a
   1480 			 * rebind. Let it happen.
   1481 			 */
   1482 			mutex_exit(&connfp->connf_lock);
   1483 			IPCL_HASH_REMOVE(connp);
   1484 			mutex_enter(&connfp->connf_lock);
   1485 		}
   1486 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
   1487 		mutex_exit(&connfp->connf_lock);
   1488 		break;
   1489 
   1490 	case IPPROTO_SCTP:
   1491 		IPCL_HASH_REMOVE(connp);
   1492 		ret = ipcl_sctp_hash_insert(connp, lport);
   1493 		break;
   1494 
   1495 	default:
   1496 		if (is_system_labeled() &&
   1497 		    check_exempt_conflict_v6(connp, ipst))
   1498 			return (EADDRINUSE);
   1499 		/* FALLTHROUGH */
   1500 	case IPPROTO_UDP:
   1501 		if (protocol == IPPROTO_UDP) {
   1502 			connfp = &ipst->ips_ipcl_udp_fanout[
   1503 			    IPCL_UDP_HASH(lport, ipst)];
   1504 		} else {
   1505 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
   1506 		}
   1507 
   1508 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
   1509 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
   1510 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
   1511 			IPCL_HASH_INSERT_BOUND(connfp, connp);
   1512 		} else {
   1513 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
   1514 		}
   1515 		break;
   1516 	}
   1517 
   1518 	return (ret);
   1519 }
   1520 
   1521 /*
   1522  * v4 packet classifying function. looks up the fanout table to
   1523  * find the conn, the packet belongs to. returns the conn with
   1524  * the reference held, null otherwise.
   1525  *
   1526  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
   1527  * Lookup" comment block are applied.  Labels are also checked as described
   1528  * above.  If the packet is from the inside (looped back), and is from the same
   1529  * zone, then label checks are omitted.
   1530  */
   1531 conn_t *
   1532 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
   1533     ip_recv_attr_t *ira, ip_stack_t *ipst)
   1534 {
   1535 	ipha_t	*ipha;
   1536 	connf_t	*connfp, *bind_connfp;
   1537 	uint16_t lport;
   1538 	uint16_t fport;
   1539 	uint32_t ports;
   1540 	conn_t	*connp;
   1541 	uint16_t  *up;
   1542 	zoneid_t	zoneid = ira->ira_zoneid;
   1543 
   1544 	ipha = (ipha_t *)mp->b_rptr;
   1545 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
   1546 
   1547 	switch (protocol) {
   1548 	case IPPROTO_TCP:
   1549 		ports = *(uint32_t *)up;
   1550 		connfp =
   1551 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
   1552 		    ports, ipst)];
   1553 		mutex_enter(&connfp->connf_lock);
   1554 		for (connp = connfp->connf_head; connp != NULL;
   1555 		    connp = connp->conn_next) {
   1556 			if (IPCL_CONN_MATCH(connp, protocol,
   1557 			    ipha->ipha_src, ipha->ipha_dst, ports) &&
   1558 			    (connp->conn_zoneid == zoneid ||
   1559 			    connp->conn_allzones ||
   1560 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1561 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
   1562 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
   1563 				break;
   1564 		}
   1565 
   1566 		if (connp != NULL) {
   1567 			/*
   1568 			 * We have a fully-bound TCP connection.
   1569 			 *
   1570 			 * For labeled systems, there's no need to check the
   1571 			 * label here.  It's known to be good as we checked
   1572 			 * before allowing the connection to become bound.
   1573 			 */
   1574 			CONN_INC_REF(connp);
   1575 			mutex_exit(&connfp->connf_lock);
   1576 			return (connp);
   1577 		}
   1578 
   1579 		mutex_exit(&connfp->connf_lock);
   1580 		lport = up[1];
   1581 		bind_connfp =
   1582 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
   1583 		mutex_enter(&bind_connfp->connf_lock);
   1584 		for (connp = bind_connfp->connf_head; connp != NULL;
   1585 		    connp = connp->conn_next) {
   1586 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
   1587 			    lport) &&
   1588 			    (connp->conn_zoneid == zoneid ||
   1589 			    connp->conn_allzones ||
   1590 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1591 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
   1592 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
   1593 				break;
   1594 		}
   1595 
   1596 		/*
   1597 		 * If the matching connection is SLP on a private address, then
   1598 		 * the label on the packet must match the local zone's label.
   1599 		 * Otherwise, it must be in the label range defined by tnrh.
   1600 		 * This is ensured by tsol_receive_local.
   1601 		 *
   1602 		 * Note that we don't check tsol_receive_local for
   1603 		 * the connected case.
   1604 		 */
   1605 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
   1606 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
   1607 		    ira, connp)) {
   1608 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
   1609 			    char *, "connp(1) could not receive mp(2)",
   1610 			    conn_t *, connp, mblk_t *, mp);
   1611 			connp = NULL;
   1612 		}
   1613 
   1614 		if (connp != NULL) {
   1615 			/* Have a listener at least */
   1616 			CONN_INC_REF(connp);
   1617 			mutex_exit(&bind_connfp->connf_lock);
   1618 			return (connp);
   1619 		}
   1620 
   1621 		mutex_exit(&bind_connfp->connf_lock);
   1622 		break;
   1623 
   1624 	case IPPROTO_UDP:
   1625 		lport = up[1];
   1626 		fport = up[0];
   1627 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
   1628 		mutex_enter(&connfp->connf_lock);
   1629 		for (connp = connfp->connf_head; connp != NULL;
   1630 		    connp = connp->conn_next) {
   1631 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
   1632 			    fport, ipha->ipha_src) &&
   1633 			    (connp->conn_zoneid == zoneid ||
   1634 			    connp->conn_allzones ||
   1635 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1636 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
   1637 				break;
   1638 		}
   1639 
   1640 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
   1641 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
   1642 		    ira, connp)) {
   1643 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
   1644 			    char *, "connp(1) could not receive mp(2)",
   1645 			    conn_t *, connp, mblk_t *, mp);
   1646 			connp = NULL;
   1647 		}
   1648 
   1649 		if (connp != NULL) {
   1650 			CONN_INC_REF(connp);
   1651 			mutex_exit(&connfp->connf_lock);
   1652 			return (connp);
   1653 		}
   1654 
   1655 		/*
   1656 		 * We shouldn't come here for multicast/broadcast packets
   1657 		 */
   1658 		mutex_exit(&connfp->connf_lock);
   1659 
   1660 		break;
   1661 
   1662 	case IPPROTO_ENCAP:
   1663 	case IPPROTO_IPV6:
   1664 		return (ipcl_iptun_classify_v4(&ipha->ipha_src,
   1665 		    &ipha->ipha_dst, ipst));
   1666 	}
   1667 
   1668 	return (NULL);
   1669 }
   1670 
   1671 conn_t *
   1672 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
   1673     ip_recv_attr_t *ira, ip_stack_t *ipst)
   1674 {
   1675 	ip6_t		*ip6h;
   1676 	connf_t		*connfp, *bind_connfp;
   1677 	uint16_t	lport;
   1678 	uint16_t	fport;
   1679 	tcpha_t		*tcpha;
   1680 	uint32_t	ports;
   1681 	conn_t		*connp;
   1682 	uint16_t	*up;
   1683 	zoneid_t	zoneid = ira->ira_zoneid;
   1684 
   1685 	ip6h = (ip6_t *)mp->b_rptr;
   1686 
   1687 	switch (protocol) {
   1688 	case IPPROTO_TCP:
   1689 		tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
   1690 		up = &tcpha->tha_lport;
   1691 		ports = *(uint32_t *)up;
   1692 
   1693 		connfp =
   1694 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
   1695 		    ports, ipst)];
   1696 		mutex_enter(&connfp->connf_lock);
   1697 		for (connp = connfp->connf_head; connp != NULL;
   1698 		    connp = connp->conn_next) {
   1699 			if (IPCL_CONN_MATCH_V6(connp, protocol,
   1700 			    ip6h->ip6_src, ip6h->ip6_dst, ports) &&
   1701 			    (connp->conn_zoneid == zoneid ||
   1702 			    connp->conn_allzones ||
   1703 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1704 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
   1705 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
   1706 				break;
   1707 		}
   1708 
   1709 		if (connp != NULL) {
   1710 			/*
   1711 			 * We have a fully-bound TCP connection.
   1712 			 *
   1713 			 * For labeled systems, there's no need to check the
   1714 			 * label here.  It's known to be good as we checked
   1715 			 * before allowing the connection to become bound.
   1716 			 */
   1717 			CONN_INC_REF(connp);
   1718 			mutex_exit(&connfp->connf_lock);
   1719 			return (connp);
   1720 		}
   1721 
   1722 		mutex_exit(&connfp->connf_lock);
   1723 
   1724 		lport = up[1];
   1725 		bind_connfp =
   1726 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
   1727 		mutex_enter(&bind_connfp->connf_lock);
   1728 		for (connp = bind_connfp->connf_head; connp != NULL;
   1729 		    connp = connp->conn_next) {
   1730 			if (IPCL_BIND_MATCH_V6(connp, protocol,
   1731 			    ip6h->ip6_dst, lport) &&
   1732 			    (connp->conn_zoneid == zoneid ||
   1733 			    connp->conn_allzones ||
   1734 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1735 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
   1736 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
   1737 				break;
   1738 		}
   1739 
   1740 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
   1741 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
   1742 		    ira, connp)) {
   1743 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
   1744 			    char *, "connp(1) could not receive mp(2)",
   1745 			    conn_t *, connp, mblk_t *, mp);
   1746 			connp = NULL;
   1747 		}
   1748 
   1749 		if (connp != NULL) {
   1750 			/* Have a listner at least */
   1751 			CONN_INC_REF(connp);
   1752 			mutex_exit(&bind_connfp->connf_lock);
   1753 			return (connp);
   1754 		}
   1755 
   1756 		mutex_exit(&bind_connfp->connf_lock);
   1757 		break;
   1758 
   1759 	case IPPROTO_UDP:
   1760 		up = (uint16_t *)&mp->b_rptr[hdr_len];
   1761 		lport = up[1];
   1762 		fport = up[0];
   1763 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
   1764 		mutex_enter(&connfp->connf_lock);
   1765 		for (connp = connfp->connf_head; connp != NULL;
   1766 		    connp = connp->conn_next) {
   1767 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
   1768 			    fport, ip6h->ip6_src) &&
   1769 			    (connp->conn_zoneid == zoneid ||
   1770 			    connp->conn_allzones ||
   1771 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1772 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
   1773 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
   1774 				break;
   1775 		}
   1776 
   1777 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
   1778 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
   1779 		    ira, connp)) {
   1780 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
   1781 			    char *, "connp(1) could not receive mp(2)",
   1782 			    conn_t *, connp, mblk_t *, mp);
   1783 			connp = NULL;
   1784 		}
   1785 
   1786 		if (connp != NULL) {
   1787 			CONN_INC_REF(connp);
   1788 			mutex_exit(&connfp->connf_lock);
   1789 			return (connp);
   1790 		}
   1791 
   1792 		/*
   1793 		 * We shouldn't come here for multicast/broadcast packets
   1794 		 */
   1795 		mutex_exit(&connfp->connf_lock);
   1796 		break;
   1797 	case IPPROTO_ENCAP:
   1798 	case IPPROTO_IPV6:
   1799 		return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
   1800 		    &ip6h->ip6_dst, ipst));
   1801 	}
   1802 
   1803 	return (NULL);
   1804 }
   1805 
   1806 /*
   1807  * wrapper around ipcl_classify_(v4,v6) routines.
   1808  */
   1809 conn_t *
   1810 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
   1811 {
   1812 	if (ira->ira_flags & IRAF_IS_IPV4) {
   1813 		return (ipcl_classify_v4(mp, ira->ira_protocol,
   1814 		    ira->ira_ip_hdr_length, ira, ipst));
   1815 	} else {
   1816 		return (ipcl_classify_v6(mp, ira->ira_protocol,
   1817 		    ira->ira_ip_hdr_length, ira, ipst));
   1818 	}
   1819 }
   1820 
   1821 /*
   1822  * Only used to classify SCTP RAW sockets
   1823  */
   1824 conn_t *
   1825 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
   1826     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
   1827 {
   1828 	connf_t		*connfp;
   1829 	conn_t		*connp;
   1830 	in_port_t	lport;
   1831 	int		ipversion;
   1832 	const void	*dst;
   1833 	zoneid_t	zoneid = ira->ira_zoneid;
   1834 
   1835 	lport = ((uint16_t *)&ports)[1];
   1836 	if (ira->ira_flags & IRAF_IS_IPV4) {
   1837 		dst = (const void *)&ipha->ipha_dst;
   1838 		ipversion = IPV4_VERSION;
   1839 	} else {
   1840 		dst = (const void *)&ip6h->ip6_dst;
   1841 		ipversion = IPV6_VERSION;
   1842 	}
   1843 
   1844 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
   1845 	mutex_enter(&connfp->connf_lock);
   1846 	for (connp = connfp->connf_head; connp != NULL;
   1847 	    connp = connp->conn_next) {
   1848 		/* We don't allow v4 fallback for v6 raw socket. */
   1849 		if (ipversion != connp->conn_ipversion)
   1850 			continue;
   1851 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
   1852 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
   1853 			if (ipversion == IPV4_VERSION) {
   1854 				if (!IPCL_CONN_MATCH(connp, protocol,
   1855 				    ipha->ipha_src, ipha->ipha_dst, ports))
   1856 					continue;
   1857 			} else {
   1858 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
   1859 				    ip6h->ip6_src, ip6h->ip6_dst, ports))
   1860 					continue;
   1861 			}
   1862 		} else {
   1863 			if (ipversion == IPV4_VERSION) {
   1864 				if (!IPCL_BIND_MATCH(connp, protocol,
   1865 				    ipha->ipha_dst, lport))
   1866 					continue;
   1867 			} else {
   1868 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
   1869 				    ip6h->ip6_dst, lport))
   1870 					continue;
   1871 			}
   1872 		}
   1873 
   1874 		if (connp->conn_zoneid == zoneid ||
   1875 		    connp->conn_allzones ||
   1876 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
   1877 		    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
   1878 		    (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
   1879 			break;
   1880 	}
   1881 
   1882 	if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
   1883 	    !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
   1884 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
   1885 		    char *, "connp(1) could not receive mp(2)",
   1886 		    conn_t *, connp, mblk_t *, mp);
   1887 		connp = NULL;
   1888 	}
   1889 
   1890 	if (connp != NULL)
   1891 		goto found;
   1892 	mutex_exit(&connfp->connf_lock);
   1893 
   1894 	/* Try to look for a wildcard SCTP RAW socket match. */
   1895 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
   1896 	mutex_enter(&connfp->connf_lock);
   1897 	for (connp = connfp->connf_head; connp != NULL;
   1898 	    connp = connp->conn_next) {
   1899 		/* We don't allow v4 fallback for v6 raw socket. */
   1900 		if (ipversion != connp->conn_ipversion)
   1901 			continue;
   1902 		if (!IPCL_ZONE_MATCH(connp, zoneid))
   1903 			continue;
   1904 
   1905 		if (ipversion == IPV4_VERSION) {
   1906 			if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
   1907 				break;
   1908 		} else {
   1909 			if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
   1910 				break;
   1911 			}
   1912 		}
   1913 	}
   1914 
   1915 	if (connp != NULL)
   1916 		goto found;
   1917 
   1918 	mutex_exit(&connfp->connf_lock);
   1919 	return (NULL);
   1920 
   1921 found:
   1922 	ASSERT(connp != NULL);
   1923 	CONN_INC_REF(connp);
   1924 	mutex_exit(&connfp->connf_lock);
   1925 	return (connp);
   1926 }
   1927 
   1928 /* ARGSUSED */
   1929 static int
   1930 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
   1931 {
   1932 	itc_t	*itc = (itc_t *)buf;
   1933 	conn_t 	*connp = &itc->itc_conn;
   1934 	tcp_t	*tcp = (tcp_t *)&itc[1];
   1935 
   1936 	bzero(connp, sizeof (conn_t));
   1937 	bzero(tcp, sizeof (tcp_t));
   1938 
   1939 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
   1940 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
   1941 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
   1942 	tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
   1943 	if (tcp->tcp_timercache == NULL)
   1944 		return (ENOMEM);
   1945 	connp->conn_tcp = tcp;
   1946 	connp->conn_flags = IPCL_TCPCONN;
   1947 	connp->conn_proto = IPPROTO_TCP;
   1948 	tcp->tcp_connp = connp;
   1949 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
   1950 
   1951 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
   1952 	if (connp->conn_ixa == NULL) {
   1953 		tcp_timermp_free(tcp);
   1954 		return (ENOMEM);
   1955 	}
   1956 	connp->conn_ixa->ixa_refcnt = 1;
   1957 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
   1958 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
   1959 	return (0);
   1960 }
   1961 
   1962 /* ARGSUSED */
   1963 static void
   1964 tcp_conn_destructor(void *buf, void *cdrarg)
   1965 {
   1966 	itc_t	*itc = (itc_t *)buf;
   1967 	conn_t 	*connp = &itc->itc_conn;
   1968 	tcp_t	*tcp = (tcp_t *)&itc[1];
   1969 
   1970 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
   1971 	ASSERT(tcp->tcp_connp == connp);
   1972 	ASSERT(connp->conn_tcp == tcp);
   1973 	tcp_timermp_free(tcp);
   1974 	mutex_destroy(&connp->conn_lock);
   1975 	cv_destroy(&connp->conn_cv);
   1976 	cv_destroy(&connp->conn_sq_cv);
   1977 	rw_destroy(&connp->conn_ilg_lock);
   1978 
   1979 	/* Can be NULL if constructor failed */
   1980 	if (connp->conn_ixa != NULL) {
   1981 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
   1982 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
   1983 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
   1984 		ixa_refrele(connp->conn_ixa);
   1985 	}
   1986 }
   1987 
   1988 /* ARGSUSED */
   1989 static int
   1990 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
   1991 {
   1992 	itc_t	*itc = (itc_t *)buf;
   1993 	conn_t 	*connp = &itc->itc_conn;
   1994 
   1995 	bzero(connp, sizeof (conn_t));
   1996 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
   1997 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
   1998 	connp->conn_flags = IPCL_IPCCONN;
   1999 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
   2000 
   2001 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
   2002 	if (connp->conn_ixa == NULL)
   2003 		return (ENOMEM);
   2004 	connp->conn_ixa->ixa_refcnt = 1;
   2005 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
   2006 	return (0);
   2007 }
   2008 
   2009 /* ARGSUSED */
   2010 static void
   2011 ip_conn_destructor(void *buf, void *cdrarg)
   2012 {
   2013 	itc_t	*itc = (itc_t *)buf;
   2014 	conn_t 	*connp = &itc->itc_conn;
   2015 
   2016 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
   2017 	ASSERT(connp->conn_priv == NULL);
   2018 	mutex_destroy(&connp->conn_lock);
   2019 	cv_destroy(&connp->conn_cv);
   2020 	rw_destroy(&connp->conn_ilg_lock);
   2021 
   2022 	/* Can be NULL if constructor failed */
   2023 	if (connp->conn_ixa != NULL) {
   2024 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
   2025 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
   2026 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
   2027 		ixa_refrele(connp->conn_ixa);
   2028 	}
   2029 }
   2030 
   2031 /* ARGSUSED */
   2032 static int
   2033 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
   2034 {
   2035 	itc_t	*itc = (itc_t *)buf;
   2036 	conn_t 	*connp = &itc->itc_conn;
   2037 	udp_t	*udp = (udp_t *)&itc[1];
   2038 
   2039 	bzero(connp, sizeof (conn_t));
   2040 	bzero(udp, sizeof (udp_t));
   2041 
   2042 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
   2043 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
   2044 	connp->conn_udp = udp;
   2045 	connp->conn_flags = IPCL_UDPCONN;
   2046 	connp->conn_proto = IPPROTO_UDP;
   2047 	udp->udp_connp = connp;
   2048 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
   2049 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
   2050 	if (connp->conn_ixa == NULL)
   2051 		return (ENOMEM);
   2052 	connp->conn_ixa->ixa_refcnt = 1;
   2053 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
   2054 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
   2055 	return (0);
   2056 }
   2057 
   2058 /* ARGSUSED */
   2059 static void
   2060 udp_conn_destructor(void *buf, void *cdrarg)
   2061 {
   2062 	itc_t	*itc = (itc_t *)buf;
   2063 	conn_t 	*connp = &itc->itc_conn;
   2064 	udp_t	*udp = (udp_t *)&itc[1];
   2065 
   2066 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
   2067 	ASSERT(udp->udp_connp == connp);
   2068 	ASSERT(connp->conn_udp == udp);
   2069 	mutex_destroy(&connp->conn_lock);
   2070 	cv_destroy(&connp->conn_cv);
   2071 	rw_destroy(&connp->conn_ilg_lock);
   2072 
   2073 	/* Can be NULL if constructor failed */
   2074 	if (connp->conn_ixa != NULL) {
   2075 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
   2076 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
   2077 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
   2078 		ixa_refrele(connp->conn_ixa);
   2079 	}
   2080 }
   2081 
   2082 /* ARGSUSED */
   2083 static int
   2084 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
   2085 {
   2086 	itc_t	*itc = (itc_t *)buf;
   2087 	conn_t 	*connp = &itc->itc_conn;
   2088 	icmp_t	*icmp = (icmp_t *)&itc[1];
   2089 
   2090 	bzero(connp, sizeof (conn_t));
   2091 	bzero(icmp, sizeof (icmp_t));
   2092 
   2093 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
   2094 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
   2095 	connp->conn_icmp = icmp;
   2096 	connp->conn_flags = IPCL_RAWIPCONN;
   2097 	connp->conn_proto = IPPROTO_ICMP;
   2098 	icmp->icmp_connp = connp;
   2099 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
   2100 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
   2101 	if (connp->conn_ixa == NULL)
   2102 		return (ENOMEM);
   2103 	connp->conn_ixa->ixa_refcnt = 1;
   2104 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
   2105 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
   2106 	return (0);
   2107 }
   2108 
   2109 /* ARGSUSED */
   2110 static void
   2111 rawip_conn_destructor(void *buf, void *cdrarg)
   2112 {
   2113 	itc_t	*itc = (itc_t *)buf;
   2114 	conn_t 	*connp = &itc->itc_conn;
   2115 	icmp_t	*icmp = (icmp_t *)&itc[1];
   2116 
   2117 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
   2118 	ASSERT(icmp->icmp_connp == connp);
   2119 	ASSERT(connp->conn_icmp == icmp);
   2120 	mutex_destroy(&connp->conn_lock);
   2121 	cv_destroy(&connp->conn_cv);
   2122 	rw_destroy(&connp->conn_ilg_lock);
   2123 
   2124 	/* Can be NULL if constructor failed */
   2125 	if (connp->conn_ixa != NULL) {
   2126 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
   2127 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
   2128 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
   2129 		ixa_refrele(connp->conn_ixa);
   2130 	}
   2131 }
   2132 
   2133 /* ARGSUSED */
   2134 static int
   2135 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
   2136 {
   2137 	itc_t	*itc = (itc_t *)buf;
   2138 	conn_t 	*connp = &itc->itc_conn;
   2139 	rts_t	*rts = (rts_t *)&itc[1];
   2140 
   2141 	bzero(connp, sizeof (conn_t));
   2142 	bzero(rts, sizeof (rts_t));
   2143 
   2144 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
   2145 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
   2146 	connp->conn_rts = rts;
   2147 	connp->conn_flags = IPCL_RTSCONN;
   2148 	rts->rts_connp = connp;
   2149 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
   2150 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
   2151 	if (connp->conn_ixa == NULL)
   2152 		return (ENOMEM);
   2153 	connp->conn_ixa->ixa_refcnt = 1;
   2154 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
   2155 	return (0);
   2156 }
   2157 
   2158 /* ARGSUSED */
   2159 static void
   2160 rts_conn_destructor(void *buf, void *cdrarg)
   2161 {
   2162 	itc_t	*itc = (itc_t *)buf;
   2163 	conn_t 	*connp = &itc->itc_conn;
   2164 	rts_t	*rts = (rts_t *)&itc[1];
   2165 
   2166 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
   2167 	ASSERT(rts->rts_connp == connp);
   2168 	ASSERT(connp->conn_rts == rts);
   2169 	mutex_destroy(&connp->conn_lock);
   2170 	cv_destroy(&connp->conn_cv);
   2171 	rw_destroy(&connp->conn_ilg_lock);
   2172 
   2173 	/* Can be NULL if constructor failed */
   2174 	if (connp->conn_ixa != NULL) {
   2175 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
   2176 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
   2177 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
   2178 		ixa_refrele(connp->conn_ixa);
   2179 	}
   2180 }
   2181 
   2182 /*
   2183  * Called as part of ipcl_conn_destroy to assert and clear any pointers
   2184  * in the conn_t.
   2185  *
   2186  * Below we list all the pointers in the conn_t as a documentation aid.
   2187  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
   2188  * If you add any pointers to the conn_t please add an ASSERT here
   2189  * and #ifdef it out if it can't be actually asserted to be NULL.
   2190  * In any case, we bzero most of the conn_t at the end of the function.
   2191  */
   2192 void
   2193 ipcl_conn_cleanup(conn_t *connp)
   2194 {
   2195 	ip_xmit_attr_t	*ixa;
   2196 
   2197 	ASSERT(connp->conn_latch == NULL);
   2198 	ASSERT(connp->conn_latch_in_policy == NULL);
   2199 	ASSERT(connp->conn_latch_in_action == NULL);
   2200 #ifdef notdef
   2201 	ASSERT(connp->conn_rq == NULL);
   2202 	ASSERT(connp->conn_wq == NULL);
   2203 #endif
   2204 	ASSERT(connp->conn_cred == NULL);
   2205 	ASSERT(connp->conn_g_fanout == NULL);
   2206 	ASSERT(connp->conn_g_next == NULL);
   2207 	ASSERT(connp->conn_g_prev == NULL);
   2208 	ASSERT(connp->conn_policy == NULL);
   2209 	ASSERT(connp->conn_fanout == NULL);
   2210 	ASSERT(connp->conn_next == NULL);
   2211 	ASSERT(connp->conn_prev == NULL);
   2212 	ASSERT(connp->conn_oper_pending_ill == NULL);
   2213 	ASSERT(connp->conn_ilg == NULL);
   2214 	ASSERT(connp->conn_drain_next == NULL);
   2215 	ASSERT(connp->conn_drain_prev == NULL);
   2216 #ifdef notdef
   2217 	/* conn_idl is not cleared when removed from idl list */
   2218 	ASSERT(connp->conn_idl == NULL);
   2219 #endif
   2220 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
   2221 #ifdef notdef
   2222 	/* conn_netstack is cleared by the caller; needed by ixa_cleanup */
   2223 	ASSERT(connp->conn_netstack == NULL);
   2224 #endif
   2225 
   2226 	ASSERT(connp->conn_helper_info == NULL);
   2227 	ASSERT(connp->conn_ixa != NULL);
   2228 	ixa = connp->conn_ixa;
   2229 	ASSERT(ixa->ixa_refcnt == 1);
   2230 	/* Need to preserve ixa_protocol */
   2231 	ixa_cleanup(ixa);
   2232 	ixa->ixa_flags = 0;
   2233 
   2234 	/* Clear out the conn_t fields that are not preserved */
   2235 	bzero(&connp->conn_start_clr,
   2236 	    sizeof (conn_t) -
   2237 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
   2238 }
   2239 
   2240 /*
   2241  * All conns are inserted in a global multi-list for the benefit of
   2242  * walkers. The walk is guaranteed to walk all open conns at the time
   2243  * of the start of the walk exactly once. This property is needed to
   2244  * achieve some cleanups during unplumb of interfaces. This is achieved
   2245  * as follows.
   2246  *
   2247  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
   2248  * call the insert and delete functions below at creation and deletion
   2249  * time respectively. The conn never moves or changes its position in this
   2250  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
   2251  * won't increase due to walkers, once the conn deletion has started. Note
   2252  * that we can't remove the conn from the global list and then wait for
   2253  * the refcnt to drop to zero, since walkers would then see a truncated
   2254  * list. CONN_INCIPIENT ensures that walkers don't start looking at
   2255  * conns until ip_open is ready to make them globally visible.
   2256  * The global round robin multi-list locks are held only to get the
   2257  * next member/insertion/deletion and contention should be negligible
   2258  * if the multi-list is much greater than the number of cpus.
   2259  */
   2260 void
   2261 ipcl_globalhash_insert(conn_t *connp)
   2262 {
   2263 	int	index;
   2264 	struct connf_s	*connfp;
   2265 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   2266 
   2267 	/*
   2268 	 * No need for atomic here. Approximate even distribution
   2269 	 * in the global lists is sufficient.
   2270 	 */
   2271 	ipst->ips_conn_g_index++;
   2272 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
   2273 
   2274 	connp->conn_g_prev = NULL;
   2275 	/*
   2276 	 * Mark as INCIPIENT, so that walkers will ignore this
   2277 	 * for now, till ip_open is ready to make it visible globally.
   2278 	 */
   2279 	connp->conn_state_flags |= CONN_INCIPIENT;
   2280 
   2281 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
   2282 	/* Insert at the head of the list */
   2283 	mutex_enter(&connfp->connf_lock);
   2284 	connp->conn_g_next = connfp->connf_head;
   2285 	if (connp->conn_g_next != NULL)
   2286 		connp->conn_g_next->conn_g_prev = connp;
   2287 	connfp->connf_head = connp;
   2288 
   2289 	/* The fanout bucket this conn points to */
   2290 	connp->conn_g_fanout = connfp;
   2291 
   2292 	mutex_exit(&connfp->connf_lock);
   2293 }
   2294 
   2295 void
   2296 ipcl_globalhash_remove(conn_t *connp)
   2297 {
   2298 	struct connf_s	*connfp;
   2299 
   2300 	/*
   2301 	 * We were never inserted in the global multi list.
   2302 	 * IPCL_NONE variety is never inserted in the global multilist
   2303 	 * since it is presumed to not need any cleanup and is transient.
   2304 	 */
   2305 	if (connp->conn_g_fanout == NULL)
   2306 		return;
   2307 
   2308 	connfp = connp->conn_g_fanout;
   2309 	mutex_enter(&connfp->connf_lock);
   2310 	if (connp->conn_g_prev != NULL)
   2311 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
   2312 	else
   2313 		connfp->connf_head = connp->conn_g_next;
   2314 	if (connp->conn_g_next != NULL)
   2315 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
   2316 	mutex_exit(&connfp->connf_lock);
   2317 
   2318 	/* Better to stumble on a null pointer than to corrupt memory */
   2319 	connp->conn_g_next = NULL;
   2320 	connp->conn_g_prev = NULL;
   2321 	connp->conn_g_fanout = NULL;
   2322 }
   2323 
   2324 /*
   2325  * Walk the list of all conn_t's in the system, calling the function provided
   2326  * With the specified argument for each.
   2327  * Applies to both IPv4 and IPv6.
   2328  *
   2329  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
   2330  * conn_oper_pending_ill). To guard against stale pointers
   2331  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
   2332  * unplumbed or removed. New conn_t's that are created while we are walking
   2333  * may be missed by this walk, because they are not necessarily inserted
   2334  * at the tail of the list. They are new conn_t's and thus don't have any
   2335  * stale pointers. The CONN_CLOSING flag ensures that no new reference
   2336  * is created to the struct that is going away.
   2337  */
   2338 void
   2339 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
   2340 {
   2341 	int	i;
   2342 	conn_t	*connp;
   2343 	conn_t	*prev_connp;
   2344 
   2345 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
   2346 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
   2347 		prev_connp = NULL;
   2348 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
   2349 		while (connp != NULL) {
   2350 			mutex_enter(&connp->conn_lock);
   2351 			if (connp->conn_state_flags &
   2352 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
   2353 				mutex_exit(&connp->conn_lock);
   2354 				connp = connp->conn_g_next;
   2355 				continue;
   2356 			}
   2357 			CONN_INC_REF_LOCKED(connp);
   2358 			mutex_exit(&connp->conn_lock);
   2359 			mutex_exit(
   2360 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
   2361 			(*func)(connp, arg);
   2362 			if (prev_connp != NULL)
   2363 				CONN_DEC_REF(prev_connp);
   2364 			mutex_enter(
   2365 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
   2366 			prev_connp = connp;
   2367 			connp = connp->conn_g_next;
   2368 		}
   2369 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
   2370 		if (prev_connp != NULL)
   2371 			CONN_DEC_REF(prev_connp);
   2372 	}
   2373 }
   2374 
   2375 /*
   2376  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
   2377  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
   2378  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
   2379  * (peer tcp in ESTABLISHED state).
   2380  */
   2381 conn_t *
   2382 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
   2383     ip_stack_t *ipst)
   2384 {
   2385 	uint32_t ports;
   2386 	uint16_t *pports = (uint16_t *)&ports;
   2387 	connf_t	*connfp;
   2388 	conn_t	*tconnp;
   2389 	boolean_t zone_chk;
   2390 
   2391 	/*
   2392 	 * If either the source of destination address is loopback, then
   2393 	 * both endpoints must be in the same Zone.  Otherwise, both of
   2394 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
   2395 	 * state) and the endpoints may reside in different Zones.
   2396 	 */
   2397 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
   2398 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
   2399 
   2400 	pports[0] = tcpha->tha_fport;
   2401 	pports[1] = tcpha->tha_lport;
   2402 
   2403 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
   2404 	    ports, ipst)];
   2405 
   2406 	mutex_enter(&connfp->connf_lock);
   2407 	for (tconnp = connfp->connf_head; tconnp != NULL;
   2408 	    tconnp = tconnp->conn_next) {
   2409 
   2410 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
   2411 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
   2412 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
   2413 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
   2414 
   2415 			ASSERT(tconnp != connp);
   2416 			CONN_INC_REF(tconnp);
   2417 			mutex_exit(&connfp->connf_lock);
   2418 			return (tconnp);
   2419 		}
   2420 	}
   2421 	mutex_exit(&connfp->connf_lock);
   2422 	return (NULL);
   2423 }
   2424 
   2425 /*
   2426  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
   2427  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
   2428  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
   2429  * (peer tcp in ESTABLISHED state).
   2430  */
   2431 conn_t *
   2432 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
   2433     ip_stack_t *ipst)
   2434 {
   2435 	uint32_t ports;
   2436 	uint16_t *pports = (uint16_t *)&ports;
   2437 	connf_t	*connfp;
   2438 	conn_t	*tconnp;
   2439 	boolean_t zone_chk;
   2440 
   2441 	/*
   2442 	 * If either the source of destination address is loopback, then
   2443 	 * both endpoints must be in the same Zone.  Otherwise, both of
   2444 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
   2445 	 * state) and the endpoints may reside in different Zones.  We
   2446 	 * don't do Zone check for link local address(es) because the
   2447 	 * current Zone implementation treats each link local address as
   2448 	 * being unique per system node, i.e. they belong to global Zone.
   2449 	 */
   2450 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
   2451 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
   2452 
   2453 	pports[0] = tcpha->tha_fport;
   2454 	pports[1] = tcpha->tha_lport;
   2455 
   2456 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
   2457 	    ports, ipst)];
   2458 
   2459 	mutex_enter(&connfp->connf_lock);
   2460 	for (tconnp = connfp->connf_head; tconnp != NULL;
   2461 	    tconnp = tconnp->conn_next) {
   2462 
   2463 		/* We skip conn_bound_if check here as this is loopback tcp */
   2464 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
   2465 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
   2466 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
   2467 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
   2468 
   2469 			ASSERT(tconnp != connp);
   2470 			CONN_INC_REF(tconnp);
   2471 			mutex_exit(&connfp->connf_lock);
   2472 			return (tconnp);
   2473 		}
   2474 	}
   2475 	mutex_exit(&connfp->connf_lock);
   2476 	return (NULL);
   2477 }
   2478 
   2479 /*
   2480  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
   2481  * Returns with conn reference held. Caller must call CONN_DEC_REF.
   2482  * Only checks for connected entries i.e. no INADDR_ANY checks.
   2483  */
   2484 conn_t *
   2485 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
   2486     ip_stack_t *ipst)
   2487 {
   2488 	uint32_t ports;
   2489 	uint16_t *pports;
   2490 	connf_t	*connfp;
   2491 	conn_t	*tconnp;
   2492 
   2493 	pports = (uint16_t *)&ports;
   2494 	pports[0] = tcpha->tha_fport;
   2495 	pports[1] = tcpha->tha_lport;
   2496 
   2497 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
   2498 	    ports, ipst)];
   2499 
   2500 	mutex_enter(&connfp->connf_lock);
   2501 	for (tconnp = connfp->connf_head; tconnp != NULL;
   2502 	    tconnp = tconnp->conn_next) {
   2503 
   2504 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
   2505 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
   2506 		    tconnp->conn_tcp->tcp_state >= min_state) {
   2507 
   2508 			CONN_INC_REF(tconnp);
   2509 			mutex_exit(&connfp->connf_lock);
   2510 			return (tconnp);
   2511 		}
   2512 	}
   2513 	mutex_exit(&connfp->connf_lock);
   2514 	return (NULL);
   2515 }
   2516 
   2517 /*
   2518  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
   2519  * Returns with conn reference held. Caller must call CONN_DEC_REF.
   2520  * Only checks for connected entries i.e. no INADDR_ANY checks.
   2521  * Match on ifindex in addition to addresses.
   2522  */
   2523 conn_t *
   2524 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
   2525     uint_t ifindex, ip_stack_t *ipst)
   2526 {
   2527 	tcp_t	*tcp;
   2528 	uint32_t ports;
   2529 	uint16_t *pports;
   2530 	connf_t	*connfp;
   2531 	conn_t	*tconnp;
   2532 
   2533 	pports = (uint16_t *)&ports;
   2534 	pports[0] = tcpha->tha_fport;
   2535 	pports[1] = tcpha->tha_lport;
   2536 
   2537 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
   2538 	    ports, ipst)];
   2539 
   2540 	mutex_enter(&connfp->connf_lock);
   2541 	for (tconnp = connfp->connf_head; tconnp != NULL;
   2542 	    tconnp = tconnp->conn_next) {
   2543 
   2544 		tcp = tconnp->conn_tcp;
   2545 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
   2546 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
   2547 		    tcp->tcp_state >= min_state &&
   2548 		    (tconnp->conn_bound_if == 0 ||
   2549 		    tconnp->conn_bound_if == ifindex)) {
   2550 
   2551 			CONN_INC_REF(tconnp);
   2552 			mutex_exit(&connfp->connf_lock);
   2553 			return (tconnp);
   2554 		}
   2555 	}
   2556 	mutex_exit(&connfp->connf_lock);
   2557 	return (NULL);
   2558 }
   2559 
   2560 /*
   2561  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
   2562  * a listener when changing state.
   2563  */
   2564 conn_t *
   2565 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
   2566     ip_stack_t *ipst)
   2567 {
   2568 	connf_t		*bind_connfp;
   2569 	conn_t		*connp;
   2570 	tcp_t		*tcp;
   2571 
   2572 	/*
   2573 	 * Avoid false matches for packets sent to an IP destination of
   2574 	 * all zeros.
   2575 	 */
   2576 	if (laddr == 0)
   2577 		return (NULL);
   2578 
   2579 	ASSERT(zoneid != ALL_ZONES);
   2580 
   2581 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
   2582 	mutex_enter(&bind_connfp->connf_lock);
   2583 	for (connp = bind_connfp->connf_head; connp != NULL;
   2584 	    connp = connp->conn_next) {
   2585 		tcp = connp->conn_tcp;
   2586 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
   2587 		    IPCL_ZONE_MATCH(connp, zoneid) &&
   2588 		    (tcp->tcp_listener == NULL)) {
   2589 			CONN_INC_REF(connp);
   2590 			mutex_exit(&bind_connfp->connf_lock);
   2591 			return (connp);
   2592 		}
   2593 	}
   2594 	mutex_exit(&bind_connfp->connf_lock);
   2595 	return (NULL);
   2596 }
   2597 
   2598 /*
   2599  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
   2600  * a listener when changing state.
   2601  */
   2602 conn_t *
   2603 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
   2604     zoneid_t zoneid, ip_stack_t *ipst)
   2605 {
   2606 	connf_t		*bind_connfp;
   2607 	conn_t		*connp = NULL;
   2608 	tcp_t		*tcp;
   2609 
   2610 	/*
   2611 	 * Avoid false matches for packets sent to an IP destination of
   2612 	 * all zeros.
   2613 	 */
   2614 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
   2615 		return (NULL);
   2616 
   2617 	ASSERT(zoneid != ALL_ZONES);
   2618 
   2619 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
   2620 	mutex_enter(&bind_connfp->connf_lock);
   2621 	for (connp = bind_connfp->connf_head; connp != NULL;
   2622 	    connp = connp->conn_next) {
   2623 		tcp = connp->conn_tcp;
   2624 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
   2625 		    IPCL_ZONE_MATCH(connp, zoneid) &&
   2626 		    (connp->conn_bound_if == 0 ||
   2627 		    connp->conn_bound_if == ifindex) &&
   2628 		    tcp->tcp_listener == NULL) {
   2629 			CONN_INC_REF(connp);
   2630 			mutex_exit(&bind_connfp->connf_lock);
   2631 			return (connp);
   2632 		}
   2633 	}
   2634 	mutex_exit(&bind_connfp->connf_lock);
   2635 	return (NULL);
   2636 }
   2637 
   2638 /*
   2639  * ipcl_get_next_conn
   2640  *	get the next entry in the conn global list
   2641  *	and put a reference on the next_conn.
   2642  *	decrement the reference on the current conn.
   2643  *
   2644  * This is an iterator based walker function that also provides for
   2645  * some selection by the caller. It walks through the conn_hash bucket
   2646  * searching for the next valid connp in the list, and selects connections
   2647  * that are neither closed nor condemned. It also REFHOLDS the conn
   2648  * thus ensuring that the conn exists when the caller uses the conn.
   2649  */
   2650 conn_t *
   2651 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
   2652 {
   2653 	conn_t	*next_connp;
   2654 
   2655 	if (connfp == NULL)
   2656 		return (NULL);
   2657 
   2658 	mutex_enter(&connfp->connf_lock);
   2659 
   2660 	next_connp = (connp == NULL) ?
   2661 	    connfp->connf_head : connp->conn_g_next;
   2662 
   2663 	while (next_connp != NULL) {
   2664 		mutex_enter(&next_connp->conn_lock);
   2665 		if (!(next_connp->conn_flags & conn_flags) ||
   2666 		    (next_connp->conn_state_flags &
   2667 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
   2668 			/*
   2669 			 * This conn has been condemned or
   2670 			 * is closing, or the flags don't match
   2671 			 */
   2672 			mutex_exit(&next_connp->conn_lock);
   2673 			next_connp = next_connp->conn_g_next;
   2674 			continue;
   2675 		}
   2676 		CONN_INC_REF_LOCKED(next_connp);
   2677 		mutex_exit(&next_connp->conn_lock);
   2678 		break;
   2679 	}
   2680 
   2681 	mutex_exit(&connfp->connf_lock);
   2682 
   2683 	if (connp != NULL)
   2684 		CONN_DEC_REF(connp);
   2685 
   2686 	return (next_connp);
   2687 }
   2688 
   2689 #ifdef CONN_DEBUG
   2690 /*
   2691  * Trace of the last NBUF refhold/refrele
   2692  */
   2693 int
   2694 conn_trace_ref(conn_t *connp)
   2695 {
   2696 	int	last;
   2697 	conn_trace_t	*ctb;
   2698 
   2699 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   2700 	last = connp->conn_trace_last;
   2701 	last++;
   2702 	if (last == CONN_TRACE_MAX)
   2703 		last = 0;
   2704 
   2705 	ctb = &connp->conn_trace_buf[last];
   2706 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
   2707 	connp->conn_trace_last = last;
   2708 	return (1);
   2709 }
   2710 
   2711 int
   2712 conn_untrace_ref(conn_t *connp)
   2713 {
   2714 	int	last;
   2715 	conn_trace_t	*ctb;
   2716 
   2717 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   2718 	last = connp->conn_trace_last;
   2719 	last++;
   2720 	if (last == CONN_TRACE_MAX)
   2721 		last = 0;
   2722 
   2723 	ctb = &connp->conn_trace_buf[last];
   2724 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
   2725 	connp->conn_trace_last = last;
   2726 	return (1);
   2727 }
   2728 #endif
   2729