Home | History | Annotate | Download | only in sctp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/socket.h>
     28 #include <sys/ddi.h>
     29 #include <sys/sunddi.h>
     30 #include <sys/tsol/tndb.h>
     31 #include <sys/tsol/tnet.h>
     32 
     33 #include <netinet/in.h>
     34 #include <netinet/ip6.h>
     35 
     36 #include <inet/common.h>
     37 #include <inet/ip.h>
     38 #include <inet/ip6.h>
     39 #include <inet/ipclassifier.h>
     40 #include <inet/ipsec_impl.h>
     41 #include <inet/ipp_common.h>
     42 #include <inet/sctp_ip.h>
     43 
     44 #include "sctp_impl.h"
     45 #include "sctp_addr.h"
     46 
     47 /* Default association hash size.  The size must be a power of 2. */
     48 #define	SCTP_CONN_HASH_SIZE	8192
     49 
     50 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
     51 
     52 /*
     53  * Cluster networking hook for traversing current assoc list.
     54  * This routine is used to extract the current list of live associations
     55  * which must continue to to be dispatched to this node.
     56  */
     57 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
     58     boolean_t);
     59 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
     60     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
     61 
     62 void
     63 sctp_hash_init(sctp_stack_t *sctps)
     64 {
     65 	int i;
     66 
     67 	/* Start with /etc/system value */
     68 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
     69 
     70 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
     71 		/* Not a power of two. Round up to nearest power of two */
     72 		for (i = 0; i < 31; i++) {
     73 			if (sctps->sctps_conn_hash_size < (1 << i))
     74 				break;
     75 		}
     76 		sctps->sctps_conn_hash_size = 1 << i;
     77 	}
     78 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
     79 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
     80 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
     81 		    sctps->sctps_conn_hash_size);
     82 	}
     83 	sctps->sctps_conn_fanout =
     84 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
     85 	    sizeof (sctp_tf_t), KM_SLEEP);
     86 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
     87 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
     88 		    MUTEX_DEFAULT, NULL);
     89 	}
     90 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
     91 	    sizeof (sctp_tf_t),	KM_SLEEP);
     92 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
     93 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
     94 		    MUTEX_DEFAULT, NULL);
     95 	}
     96 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
     97 	    sizeof (sctp_tf_t),	KM_SLEEP);
     98 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
     99 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
    100 		    MUTEX_DEFAULT, NULL);
    101 	}
    102 }
    103 
    104 void
    105 sctp_hash_destroy(sctp_stack_t *sctps)
    106 {
    107 	int i;
    108 
    109 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
    110 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
    111 	}
    112 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
    113 	    sizeof (sctp_tf_t));
    114 	sctps->sctps_conn_fanout = NULL;
    115 
    116 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
    117 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
    118 	}
    119 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
    120 	    sizeof (sctp_tf_t));
    121 	sctps->sctps_listen_fanout = NULL;
    122 
    123 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
    124 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
    125 	}
    126 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
    127 	    sizeof (sctp_tf_t));
    128 	sctps->sctps_bind_fanout = NULL;
    129 }
    130 
    131 /*
    132  * Exported routine for extracting active SCTP associations.
    133  * Like TCP, we terminate the walk if the callback returns non-zero.
    134  *
    135  * Need to walk all sctp_stack_t instances since this clustering
    136  * interface is assumed global for all instances
    137  */
    138 int
    139 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
    140     void *arg, boolean_t cansleep)
    141 {
    142 	netstack_handle_t nh;
    143 	netstack_t *ns;
    144 	int ret = 0;
    145 
    146 	netstack_next_init(&nh);
    147 	while ((ns = netstack_next(&nh)) != NULL) {
    148 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
    149 		    ns->netstack_sctp);
    150 		netstack_rele(ns);
    151 	}
    152 	netstack_next_fini(&nh);
    153 	return (ret);
    154 }
    155 
    156 static int
    157 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
    158     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
    159 {
    160 	sctp_t		*sctp;
    161 	sctp_t		*sctp_prev;
    162 	cl_sctp_info_t	cl_sctpi;
    163 	uchar_t		*slist;
    164 	uchar_t		*flist;
    165 
    166 	sctp_prev = NULL;
    167 	mutex_enter(&sctps->sctps_g_lock);
    168 	sctp = list_head(&sctps->sctps_g_list);
    169 	while (sctp != NULL) {
    170 		size_t	ssize;
    171 		size_t	fsize;
    172 
    173 		mutex_enter(&sctp->sctp_reflock);
    174 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
    175 			mutex_exit(&sctp->sctp_reflock);
    176 			sctp = list_next(&sctps->sctps_g_list, sctp);
    177 			continue;
    178 		}
    179 		sctp->sctp_refcnt++;
    180 		mutex_exit(&sctp->sctp_reflock);
    181 		mutex_exit(&sctps->sctps_g_lock);
    182 		if (sctp_prev != NULL)
    183 			SCTP_REFRELE(sctp_prev);
    184 		RUN_SCTP(sctp);
    185 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
    186 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
    187 
    188 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
    189 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
    190 		if (slist == NULL || flist == NULL) {
    191 			WAKE_SCTP(sctp);
    192 			if (slist != NULL)
    193 				kmem_free(slist, ssize);
    194 			if (flist != NULL)
    195 				kmem_free(flist, fsize);
    196 			SCTP_REFRELE(sctp);
    197 			return (1);
    198 		}
    199 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
    200 		sctp_get_saddr_list(sctp, slist, ssize);
    201 		sctp_get_faddr_list(sctp, flist, fsize);
    202 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
    203 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
    204 		cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
    205 		if (cl_sctpi.cl_sctpi_family == AF_INET)
    206 			cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
    207 		else
    208 			cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
    209 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
    210 		cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
    211 		cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
    212 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
    213 		WAKE_SCTP(sctp);
    214 		cl_sctpi.cl_sctpi_laddrp = slist;
    215 		cl_sctpi.cl_sctpi_faddrp = flist;
    216 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
    217 			kmem_free(slist, ssize);
    218 			kmem_free(flist, fsize);
    219 			SCTP_REFRELE(sctp);
    220 			return (1);
    221 		}
    222 		/* list will be freed by cl_callback */
    223 		sctp_prev = sctp;
    224 		mutex_enter(&sctps->sctps_g_lock);
    225 		sctp = list_next(&sctps->sctps_g_list, sctp);
    226 	}
    227 	mutex_exit(&sctps->sctps_g_lock);
    228 	if (sctp_prev != NULL)
    229 		SCTP_REFRELE(sctp_prev);
    230 	return (0);
    231 }
    232 
    233 sctp_t *
    234 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
    235     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
    236 {
    237 	sctp_tf_t		*tf;
    238 	sctp_t			*sctp;
    239 	sctp_faddr_t		*fp;
    240 	conn_t			*connp;
    241 
    242 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
    243 	mutex_enter(&tf->tf_lock);
    244 
    245 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
    246 		connp = sctp->sctp_connp;
    247 		if (ports != connp->conn_ports)
    248 			continue;
    249 		if (!(connp->conn_zoneid == zoneid ||
    250 		    connp->conn_allzones ||
    251 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
    252 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
    253 		    (iraflags & IRAF_TX_SHARED_ADDR))))
    254 			continue;
    255 
    256 		/* check for faddr match */
    257 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
    258 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
    259 				break;
    260 			}
    261 		}
    262 
    263 		/* no faddr match; keep looking */
    264 		if (fp == NULL)
    265 			continue;
    266 
    267 		/* check for laddr match */
    268 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
    269 			SCTP_REFHOLD(sctp);
    270 			goto done;
    271 		}
    272 		/* no match; continue to the next in the chain */
    273 	}
    274 
    275 done:
    276 	mutex_exit(&tf->tf_lock);
    277 	return (sctp);
    278 }
    279 
    280 static sctp_t *
    281 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
    282     iaflags_t iraflags, sctp_stack_t *sctps)
    283 {
    284 	sctp_t			*sctp;
    285 	sctp_tf_t		*tf;
    286 	uint16_t		lport;
    287 	conn_t			*connp;
    288 
    289 	lport = ((uint16_t *)&ports)[1];
    290 
    291 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
    292 	mutex_enter(&tf->tf_lock);
    293 
    294 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
    295 		connp = sctp->sctp_connp;
    296 		if (lport != connp->conn_lport)
    297 			continue;
    298 
    299 		if (!(connp->conn_zoneid == zoneid ||
    300 		    connp->conn_allzones ||
    301 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
    302 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
    303 		    (iraflags & IRAF_TX_SHARED_ADDR))))
    304 			continue;
    305 
    306 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
    307 			SCTP_REFHOLD(sctp);
    308 			goto done;
    309 		}
    310 		/* no match; continue to the next in the chain */
    311 	}
    312 
    313 done:
    314 	mutex_exit(&tf->tf_lock);
    315 	return (sctp);
    316 }
    317 
    318 /* called by ipsec_sctp_pol */
    319 conn_t *
    320 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
    321     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
    322 {
    323 	sctp_t *sctp;
    324 
    325 	sctp = sctp_conn_match(src, dst, ports, zoneid, iraflags, sctps);
    326 	if (sctp == NULL) {
    327 		/* Not in conn fanout; check listen fanout */
    328 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
    329 		if (sctp == NULL)
    330 			return (NULL);
    331 	}
    332 	return (sctp->sctp_connp);
    333 }
    334 
    335 /*
    336  * Fanout to a sctp instance.
    337  */
    338 conn_t *
    339 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
    340     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps)
    341 {
    342 	zoneid_t zoneid = ira->ira_zoneid;
    343 	iaflags_t iraflags = ira->ira_flags;
    344 	sctp_t *sctp;
    345 
    346 	sctp = sctp_conn_match(src, dst, ports, zoneid, iraflags, sctps);
    347 	if (sctp == NULL) {
    348 		/* Not in conn fanout; check listen fanout */
    349 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
    350 		if (sctp == NULL)
    351 			return (NULL);
    352 		/*
    353 		 * On systems running trusted extensions, check if dst
    354 		 * should accept the packet. "IPV6_VERSION" indicates
    355 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
    356 		 * IPv6 addresses are supported.
    357 		 */
    358 		if ((iraflags & IRAF_SYSTEM_LABELED) &&
    359 		    !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
    360 		    sctp->sctp_connp)) {
    361 			DTRACE_PROBE3(
    362 			    tx__ip__log__info__classify__sctp,
    363 			    char *,
    364 			    "connp(1) could not receive mp(2)",
    365 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
    366 			SCTP_REFRELE(sctp);
    367 			return (NULL);
    368 		}
    369 	}
    370 	/*
    371 	 * For labeled systems, there's no need to check the
    372 	 * label here.  It's known to be good as we checked
    373 	 * before allowing the connection to become bound.
    374 	 */
    375 	return (sctp->sctp_connp);
    376 }
    377 
    378 /*
    379  * Fanout for ICMP errors for SCTP
    380  * The caller puts <fport, lport> in the ports parameter.
    381  */
    382 void
    383 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
    384     ip_recv_attr_t *ira)
    385 {
    386 	sctp_t		*sctp;
    387 	conn_t		*connp;
    388 	in6_addr_t	map_src, map_dst;
    389 	in6_addr_t	*src, *dst;
    390 	boolean_t	secure;
    391 	ill_t		*ill = ira->ira_ill;
    392 	ip_stack_t	*ipst = ill->ill_ipst;
    393 	netstack_t	*ns = ipst->ips_netstack;
    394 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
    395 	sctp_stack_t	*sctps = ns->netstack_sctp;
    396 	iaflags_t	iraflags = ira->ira_flags;
    397 	ill_t		*rill = ira->ira_rill;
    398 
    399 	ASSERT(iraflags & IRAF_ICMP_ERROR);
    400 
    401 	secure = iraflags & IRAF_IPSEC_SECURE;
    402 
    403 	/* Assume IP provides aligned packets - otherwise toss */
    404 	if (!OK_32PTR(mp->b_rptr)) {
    405 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
    406 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
    407 		freemsg(mp);
    408 		return;
    409 	}
    410 
    411 	if (!(iraflags & IRAF_IS_IPV4)) {
    412 		src = &ip6h->ip6_src;
    413 		dst = &ip6h->ip6_dst;
    414 	} else {
    415 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
    416 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
    417 		src = &map_src;
    418 		dst = &map_dst;
    419 	}
    420 	connp = sctp_fanout(src, dst, ports, ira, mp, sctps);
    421 	if (connp == NULL) {
    422 		ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
    423 		return;
    424 	}
    425 	sctp = CONN2SCTP(connp);
    426 
    427 	/*
    428 	 * We check some fields in conn_t without holding a lock.
    429 	 * This should be fine.
    430 	 */
    431 	if (((iraflags & IRAF_IS_IPV4) ?
    432 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
    433 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
    434 	    secure) {
    435 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
    436 		    ip6h, ira);
    437 		if (mp == NULL) {
    438 			SCTP_REFRELE(sctp);
    439 			return;
    440 		}
    441 	}
    442 
    443 	ira->ira_ill = ira->ira_rill = NULL;
    444 
    445 	mutex_enter(&sctp->sctp_lock);
    446 	if (sctp->sctp_running) {
    447 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
    448 		mutex_exit(&sctp->sctp_lock);
    449 	} else {
    450 		sctp->sctp_running = B_TRUE;
    451 		mutex_exit(&sctp->sctp_lock);
    452 
    453 		mutex_enter(&sctp->sctp_recvq_lock);
    454 		if (sctp->sctp_recvq != NULL) {
    455 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
    456 			mutex_exit(&sctp->sctp_recvq_lock);
    457 			WAKE_SCTP(sctp);
    458 		} else {
    459 			mutex_exit(&sctp->sctp_recvq_lock);
    460 			if (ira->ira_flags & IRAF_ICMP_ERROR) {
    461 				sctp_icmp_error(sctp, mp);
    462 			} else {
    463 				sctp_input_data(sctp, mp, ira);
    464 			}
    465 			WAKE_SCTP(sctp);
    466 		}
    467 	}
    468 	SCTP_REFRELE(sctp);
    469 	ira->ira_ill = ill;
    470 	ira->ira_rill = rill;
    471 }
    472 
    473 void
    474 sctp_conn_hash_remove(sctp_t *sctp)
    475 {
    476 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
    477 
    478 	if (!tf) {
    479 		return;
    480 	}
    481 	/*
    482 	 * On a clustered note send this notification to the clustering
    483 	 * subsystem.
    484 	 */
    485 	if (cl_sctp_disconnect != NULL) {
    486 		(*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
    487 		    (cl_sctp_handle_t)sctp);
    488 	}
    489 
    490 	mutex_enter(&tf->tf_lock);
    491 	ASSERT(tf->tf_sctp);
    492 	if (tf->tf_sctp == sctp) {
    493 		tf->tf_sctp = sctp->sctp_conn_hash_next;
    494 		if (sctp->sctp_conn_hash_next) {
    495 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
    496 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
    497 		}
    498 	} else {
    499 		ASSERT(sctp->sctp_conn_hash_prev);
    500 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
    501 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
    502 		    sctp->sctp_conn_hash_next;
    503 
    504 		if (sctp->sctp_conn_hash_next) {
    505 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
    506 			    == sctp);
    507 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
    508 			    sctp->sctp_conn_hash_prev;
    509 		}
    510 	}
    511 	sctp->sctp_conn_hash_next = NULL;
    512 	sctp->sctp_conn_hash_prev = NULL;
    513 	sctp->sctp_conn_tfp = NULL;
    514 	mutex_exit(&tf->tf_lock);
    515 }
    516 
    517 void
    518 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
    519 {
    520 	if (sctp->sctp_conn_tfp) {
    521 		sctp_conn_hash_remove(sctp);
    522 	}
    523 
    524 	if (!caller_holds_lock) {
    525 		mutex_enter(&tf->tf_lock);
    526 	} else {
    527 		ASSERT(MUTEX_HELD(&tf->tf_lock));
    528 	}
    529 
    530 	sctp->sctp_conn_hash_next = tf->tf_sctp;
    531 	if (tf->tf_sctp) {
    532 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
    533 	}
    534 	sctp->sctp_conn_hash_prev = NULL;
    535 	tf->tf_sctp = sctp;
    536 	sctp->sctp_conn_tfp = tf;
    537 	if (!caller_holds_lock) {
    538 		mutex_exit(&tf->tf_lock);
    539 	}
    540 }
    541 
    542 void
    543 sctp_listen_hash_remove(sctp_t *sctp)
    544 {
    545 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
    546 	conn_t	*connp = sctp->sctp_connp;
    547 
    548 	if (!tf) {
    549 		return;
    550 	}
    551 	/*
    552 	 * On a clustered note send this notification to the clustering
    553 	 * subsystem.
    554 	 */
    555 	if (cl_sctp_unlisten != NULL) {
    556 		uchar_t	*slist;
    557 		ssize_t	ssize;
    558 
    559 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
    560 		slist = kmem_alloc(ssize, KM_SLEEP);
    561 		sctp_get_saddr_list(sctp, slist, ssize);
    562 		(*cl_sctp_unlisten)(connp->conn_family, slist,
    563 		    sctp->sctp_nsaddrs, connp->conn_lport);
    564 		/* list will be freed by the clustering module */
    565 	}
    566 
    567 	mutex_enter(&tf->tf_lock);
    568 	ASSERT(tf->tf_sctp);
    569 	if (tf->tf_sctp == sctp) {
    570 		tf->tf_sctp = sctp->sctp_listen_hash_next;
    571 		if (sctp->sctp_listen_hash_next != NULL) {
    572 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
    573 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
    574 		}
    575 	} else {
    576 		ASSERT(sctp->sctp_listen_hash_prev);
    577 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
    578 		    sctp);
    579 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
    580 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
    581 
    582 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
    583 		    sctp->sctp_listen_hash_next;
    584 
    585 		if (sctp->sctp_listen_hash_next != NULL) {
    586 			sctp_t *next = sctp->sctp_listen_hash_next;
    587 
    588 			ASSERT(next->sctp_listen_hash_prev == sctp);
    589 			next->sctp_listen_hash_prev =
    590 			    sctp->sctp_listen_hash_prev;
    591 		}
    592 	}
    593 	sctp->sctp_listen_hash_next = NULL;
    594 	sctp->sctp_listen_hash_prev = NULL;
    595 	sctp->sctp_listen_tfp = NULL;
    596 	mutex_exit(&tf->tf_lock);
    597 }
    598 
    599 void
    600 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
    601 {
    602 	conn_t	*connp = sctp->sctp_connp;
    603 
    604 	if (sctp->sctp_listen_tfp) {
    605 		sctp_listen_hash_remove(sctp);
    606 	}
    607 
    608 	mutex_enter(&tf->tf_lock);
    609 	sctp->sctp_listen_hash_next = tf->tf_sctp;
    610 	if (tf->tf_sctp) {
    611 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
    612 	}
    613 	sctp->sctp_listen_hash_prev = NULL;
    614 	tf->tf_sctp = sctp;
    615 	sctp->sctp_listen_tfp = tf;
    616 	mutex_exit(&tf->tf_lock);
    617 	/*
    618 	 * On a clustered note send this notification to the clustering
    619 	 * subsystem.
    620 	 */
    621 	if (cl_sctp_listen != NULL) {
    622 		uchar_t	*slist;
    623 		ssize_t	ssize;
    624 
    625 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
    626 		slist = kmem_alloc(ssize, KM_SLEEP);
    627 		sctp_get_saddr_list(sctp, slist, ssize);
    628 		(*cl_sctp_listen)(connp->conn_family, slist,
    629 		    sctp->sctp_nsaddrs, connp->conn_lport);
    630 		/* list will be freed by the clustering module */
    631 	}
    632 }
    633 
    634 /*
    635  * Hash list insertion routine for sctp_t structures.
    636  * Inserts entries with the ones bound to a specific IP address first
    637  * followed by those bound to INADDR_ANY.
    638  */
    639 void
    640 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
    641 {
    642 	sctp_t	**sctpp;
    643 	sctp_t	*sctpnext;
    644 
    645 	if (sctp->sctp_ptpbhn != NULL) {
    646 		ASSERT(!caller_holds_lock);
    647 		sctp_bind_hash_remove(sctp);
    648 	}
    649 	sctpp = &tbf->tf_sctp;
    650 	if (!caller_holds_lock) {
    651 		mutex_enter(&tbf->tf_lock);
    652 	} else {
    653 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
    654 	}
    655 	sctpnext = sctpp[0];
    656 	if (sctpnext) {
    657 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
    658 	}
    659 	sctp->sctp_bind_hash = sctpnext;
    660 	sctp->sctp_ptpbhn = sctpp;
    661 	sctpp[0] = sctp;
    662 	/* For sctp_*_hash_remove */
    663 	sctp->sctp_bind_lockp = &tbf->tf_lock;
    664 	if (!caller_holds_lock)
    665 		mutex_exit(&tbf->tf_lock);
    666 }
    667 
    668 /*
    669  * Hash list removal routine for sctp_t structures.
    670  */
    671 void
    672 sctp_bind_hash_remove(sctp_t *sctp)
    673 {
    674 	sctp_t	*sctpnext;
    675 	kmutex_t *lockp;
    676 
    677 	lockp = sctp->sctp_bind_lockp;
    678 
    679 	if (sctp->sctp_ptpbhn == NULL)
    680 		return;
    681 
    682 	ASSERT(lockp != NULL);
    683 	mutex_enter(lockp);
    684 	if (sctp->sctp_ptpbhn) {
    685 		sctpnext = sctp->sctp_bind_hash;
    686 		if (sctpnext) {
    687 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
    688 			sctp->sctp_bind_hash = NULL;
    689 		}
    690 		*sctp->sctp_ptpbhn = sctpnext;
    691 		sctp->sctp_ptpbhn = NULL;
    692 	}
    693 	mutex_exit(lockp);
    694 	sctp->sctp_bind_lockp = NULL;
    695 }
    696 
    697 /*
    698  * Similar to but different from sctp_conn_match().
    699  *
    700  * Matches sets of addresses as follows: if the argument addr set is
    701  * a complete subset of the corresponding addr set in the sctp_t, it
    702  * is a match.
    703  *
    704  * Caller must hold tf->tf_lock.
    705  *
    706  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
    707  */
    708 sctp_t *
    709 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
    710     int min_state)
    711 {
    712 	sctp_t *sctp;
    713 	sctp_faddr_t *fp;
    714 
    715 	ASSERT(MUTEX_HELD(&tf->tf_lock));
    716 
    717 	for (sctp = tf->tf_sctp; sctp != NULL;
    718 	    sctp = sctp->sctp_conn_hash_next) {
    719 		if (*ports != sctp->sctp_connp->conn_ports ||
    720 		    sctp->sctp_state < min_state) {
    721 			continue;
    722 		}
    723 
    724 		/* check for faddr match */
    725 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
    726 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
    727 				break;
    728 			}
    729 		}
    730 
    731 		if (fp == NULL) {
    732 			/* no faddr match; keep looking */
    733 			continue;
    734 		}
    735 
    736 		/*
    737 		 * There is an existing association with the same peer
    738 		 * address.  So now we need to check if our local address
    739 		 * set overlaps with the one of the existing association.
    740 		 * If they overlap, we should return it.
    741 		 */
    742 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
    743 			goto done;
    744 		}
    745 
    746 		/* no match; continue searching */
    747 	}
    748 
    749 done:
    750 	if (sctp != NULL) {
    751 		SCTP_REFHOLD(sctp);
    752 	}
    753 	return (sctp);
    754 }
    755