Home | History | Annotate | Download | only in sctp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/systm.h>
     29 #include <sys/stream.h>
     30 #include <sys/cmn_err.h>
     31 #include <sys/kmem.h>
     32 #define	_SUN_TPI_VERSION 2
     33 #include <sys/tihdr.h>
     34 #include <sys/stropts.h>
     35 #include <sys/strsubr.h>
     36 #include <sys/socket.h>
     37 #include <sys/tsol/tndb.h>
     38 
     39 #include <netinet/in.h>
     40 #include <netinet/ip6.h>
     41 
     42 #include <inet/common.h>
     43 #include <inet/ip.h>
     44 #include <inet/ip6.h>
     45 #include <inet/ipclassifier.h>
     46 #include <inet/ipsec_impl.h>
     47 
     48 #include "sctp_impl.h"
     49 #include "sctp_addr.h"
     50 
     51 /*
     52  * Common accept code.  Called by sctp_conn_request.
     53  * cr_pkt is the INIT / INIT ACK packet.
     54  */
     55 static int
     56 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
     57     uint_t ip_hdr_len, sctp_init_chunk_t *iack)
     58 {
     59 
     60 	sctp_hdr_t		*sctph;
     61 	sctp_chunk_hdr_t	*ich;
     62 	sctp_init_chunk_t	*init;
     63 	int			err;
     64 	uint_t			sctp_options;
     65 	conn_t			*aconnp;
     66 	conn_t			*lconnp;
     67 	sctp_stack_t	*sctps = listener->sctp_sctps;
     68 
     69 	sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
     70 	ASSERT(OK_32PTR(sctph));
     71 
     72 	aconnp = acceptor->sctp_connp;
     73 	lconnp = listener->sctp_connp;
     74 	aconnp->conn_lport = lconnp->conn_lport;
     75 	aconnp->conn_fport = sctph->sh_sport;
     76 
     77 	ich = (sctp_chunk_hdr_t *)(iack + 1);
     78 	init = (sctp_init_chunk_t *)(ich + 1);
     79 
     80 	/* acceptor isn't in any fanouts yet, so don't need to hold locks */
     81 	ASSERT(acceptor->sctp_faddrs == NULL);
     82 	err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
     83 	    &sctp_options);
     84 	if (err != 0)
     85 		return (err);
     86 
     87 	if ((err = sctp_set_hdraddrs(acceptor)) != 0)
     88 		return (err);
     89 
     90 	if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0)
     91 		return (err);
     92 
     93 	if ((sctp_options & SCTP_PRSCTP_OPTION) &&
     94 	    listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
     95 		acceptor->sctp_prsctp_aware = B_TRUE;
     96 	} else {
     97 		acceptor->sctp_prsctp_aware = B_FALSE;
     98 	}
     99 
    100 	/* Get  initial TSNs */
    101 	acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
    102 	acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
    103 	    acceptor->sctp_ltsn - 1;
    104 	acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
    105 	/* Serial numbers are initialized to the same value as the TSNs */
    106 	acceptor->sctp_lcsn = acceptor->sctp_ltsn;
    107 
    108 	if (!sctp_initialize_params(acceptor, init, iack))
    109 		return (ENOMEM);
    110 
    111 	/*
    112 	 * Copy sctp_secret from the listener in case we need to validate
    113 	 * a possibly delayed cookie.
    114 	 */
    115 	bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
    116 	bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
    117 	    SCTP_SECRET_LEN);
    118 	acceptor->sctp_last_secret_update = ddi_get_lbolt64();
    119 
    120 	/*
    121 	 * After acceptor is inserted in the hash list, it can be found.
    122 	 * So we need to lock it here.
    123 	 */
    124 	RUN_SCTP(acceptor);
    125 
    126 	sctp_conn_hash_insert(&sctps->sctps_conn_fanout[
    127 	    SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0);
    128 	sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
    129 	    SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);
    130 
    131 	/*
    132 	 * No need to check for multicast destination since ip will only pass
    133 	 * up multicasts to those that have expressed interest
    134 	 * TODO: what about rejecting broadcasts?
    135 	 * Also check that source is not a multicast or broadcast address.
    136 	 */
    137 	/* XXXSCTP */
    138 	acceptor->sctp_state = SCTPS_ESTABLISHED;
    139 	acceptor->sctp_assoc_start_time = (uint32_t)ddi_get_lbolt();
    140 	/*
    141 	 * listener->sctp_rwnd should be the default window size or a
    142 	 * window size changed via SO_RCVBUF option.
    143 	 */
    144 	acceptor->sctp_rwnd = listener->sctp_rwnd;
    145 	acceptor->sctp_irwnd = acceptor->sctp_rwnd;
    146 	acceptor->sctp_pd_point = acceptor->sctp_rwnd;
    147 	acceptor->sctp_upcalls = listener->sctp_upcalls;
    148 
    149 	return (0);
    150 }
    151 
    152 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
    153 sctp_t *
    154 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
    155     sctp_init_chunk_t *iack, ip_recv_attr_t *ira)
    156 {
    157 	sctp_t	*eager;
    158 	ip6_t	*ip6h;
    159 	int	err;
    160 	conn_t	*connp, *econnp;
    161 	sctp_stack_t	*sctps;
    162 	struct sock_proto_props sopp;
    163 	cred_t		*cr;
    164 	pid_t		cpid;
    165 	in6_addr_t	faddr, laddr;
    166 	ip_xmit_attr_t	*ixa;
    167 
    168 	/*
    169 	 * No need to check for duplicate as this is the listener
    170 	 * and we are holding the lock.  This means that no new
    171 	 * connection can be created out of it.  And since the
    172 	 * fanout already done cannot find a match, it means that
    173 	 * there is no duplicate.
    174 	 */
    175 	ASSERT(OK_32PTR(mp->b_rptr));
    176 
    177 	if ((eager = sctp_create_eager(sctp)) == NULL) {
    178 		return (NULL);
    179 	}
    180 
    181 	connp = sctp->sctp_connp;
    182 	sctps = sctp->sctp_sctps;
    183 	econnp = eager->sctp_connp;
    184 
    185 	if (connp->conn_policy != NULL) {
    186 		/* Inherit the policy from the listener; use actions from ira */
    187 		if (!ip_ipsec_policy_inherit(econnp, connp, ira)) {
    188 			sctp_close_eager(eager);
    189 			BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
    190 			return (NULL);
    191 		}
    192 	}
    193 
    194 	ip6h = (ip6_t *)mp->b_rptr;
    195 	if (ira->ira_flags & IXAF_IS_IPV4) {
    196 		ipha_t	*ipha;
    197 
    198 		ipha = (ipha_t *)ip6h;
    199 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &laddr);
    200 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &faddr);
    201 	} else {
    202 		laddr = ip6h->ip6_dst;
    203 		faddr = ip6h->ip6_src;
    204 	}
    205 
    206 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
    207 		/*
    208 		 * XXX need to fix the cached policy issue here.
    209 		 * We temporarily set the conn_laddr/conn_faddr here so
    210 		 * that IPsec can use it for the latched policy
    211 		 * selector.  This is obvioursly wrong as SCTP can
    212 		 * use different addresses...
    213 		 */
    214 		econnp->conn_laddr_v6 = laddr;
    215 		econnp->conn_faddr_v6 = faddr;
    216 		econnp->conn_saddr_v6 = laddr;
    217 	}
    218 	if (ipsec_conn_cache_policy(econnp,
    219 	    (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) {
    220 		sctp_close_eager(eager);
    221 		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
    222 		return (NULL);
    223 	}
    224 
    225 	/* Save for getpeerucred */
    226 	cr = ira->ira_cred;
    227 	cpid = ira->ira_cpid;
    228 
    229 	if (is_system_labeled()) {
    230 		ip_xmit_attr_t *ixa = econnp->conn_ixa;
    231 
    232 		ASSERT(ira->ira_tsl != NULL);
    233 
    234 		/* Discard any old label */
    235 		if (ixa->ixa_free_flags & IXA_FREE_TSL) {
    236 			ASSERT(ixa->ixa_tsl != NULL);
    237 			label_rele(ixa->ixa_tsl);
    238 			ixa->ixa_free_flags &= ~IXA_FREE_TSL;
    239 			ixa->ixa_tsl = NULL;
    240 		}
    241 
    242 		if ((connp->conn_mlp_type != mlptSingle ||
    243 		    connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
    244 		    ira->ira_tsl != NULL) {
    245 			/*
    246 			 * If this is an MLP connection or a MAC-Exempt
    247 			 * connection with an unlabeled node, packets are to be
    248 			 * exchanged using the security label of the received
    249 			 * Cookie packet instead of the server application's
    250 			 * label.
    251 			 * tsol_check_dest called from ip_set_destination
    252 			 * might later update TSF_UNLABELED by replacing
    253 			 * ixa_tsl with a new label.
    254 			 */
    255 			label_hold(ira->ira_tsl);
    256 			ip_xmit_attr_replace_tsl(ixa, ira->ira_tsl);
    257 		} else {
    258 			ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
    259 		}
    260 	}
    261 
    262 	err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
    263 	if (err != 0) {
    264 		sctp_close_eager(eager);
    265 		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
    266 		return (NULL);
    267 	}
    268 
    269 	ASSERT(eager->sctp_current->ixa != NULL);
    270 
    271 	ixa = eager->sctp_current->ixa;
    272 	if (!(ira->ira_flags & IXAF_IS_IPV4)) {
    273 		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
    274 
    275 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
    276 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) {
    277 			eager->sctp_linklocal = 1;
    278 
    279 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
    280 			ixa->ixa_scopeid = ifindex;
    281 			econnp->conn_incoming_ifindex = ifindex;
    282 		}
    283 	}
    284 
    285 	/*
    286 	 * On a clustered note send this notification to the clustering
    287 	 * subsystem.
    288 	 */
    289 	if (cl_sctp_connect != NULL) {
    290 		uchar_t	*slist;
    291 		uchar_t	*flist;
    292 		size_t	fsize;
    293 		size_t	ssize;
    294 
    295 		fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs;
    296 		ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs;
    297 		slist = kmem_alloc(ssize, KM_NOSLEEP);
    298 		flist = kmem_alloc(fsize, KM_NOSLEEP);
    299 		if (slist == NULL || flist == NULL) {
    300 			if (slist != NULL)
    301 				kmem_free(slist, ssize);
    302 			if (flist != NULL)
    303 				kmem_free(flist, fsize);
    304 			sctp_close_eager(eager);
    305 			BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
    306 			SCTP_KSTAT(sctps, sctp_cl_connect);
    307 			return (NULL);
    308 		}
    309 		/* The clustering module frees these list */
    310 		sctp_get_saddr_list(eager, slist, ssize);
    311 		sctp_get_faddr_list(eager, flist, fsize);
    312 		(*cl_sctp_connect)(econnp->conn_family, slist,
    313 		    eager->sctp_nsaddrs, econnp->conn_lport, flist,
    314 		    eager->sctp_nfaddrs, econnp->conn_fport, B_FALSE,
    315 		    (cl_sctp_handle_t)eager);
    316 	}
    317 
    318 	/* Connection established, so send up the conn_ind */
    319 	if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
    320 	    (sock_lower_handle_t)eager, NULL, cr, cpid,
    321 	    &eager->sctp_upcalls)) == NULL) {
    322 		sctp_close_eager(eager);
    323 		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
    324 		return (NULL);
    325 	}
    326 	ASSERT(SCTP_IS_DETACHED(eager));
    327 	eager->sctp_detached = B_FALSE;
    328 	bzero(&sopp, sizeof (sopp));
    329 	sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
    330 	sopp.sopp_maxblk = strmsgsz;
    331 	if (econnp->conn_family == AF_INET) {
    332 		sopp.sopp_wroff = sctps->sctps_wroff_xtra +
    333 		    sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len;
    334 	} else {
    335 		sopp.sopp_wroff = sctps->sctps_wroff_xtra +
    336 		    sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len;
    337 	}
    338 	eager->sctp_ulp_prop(eager->sctp_ulpd, &sopp);
    339 	return (eager);
    340 }
    341 
    342 /*
    343  * Connect to a peer - this function inserts the sctp in the
    344  * bind and conn fanouts, sends the INIT, and replies to the client
    345  * with an OK ack.
    346  */
    347 int
    348 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen,
    349     cred_t *cr, pid_t pid)
    350 {
    351 	sin_t		*sin;
    352 	sin6_t		*sin6;
    353 	in6_addr_t	dstaddr;
    354 	in_port_t	dstport;
    355 	mblk_t		*initmp;
    356 	sctp_tf_t	*tbf;
    357 	sctp_t		*lsctp;
    358 	char		buf[INET6_ADDRSTRLEN];
    359 	int		sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
    360 	int		err;
    361 	sctp_faddr_t	*cur_fp;
    362 	sctp_stack_t	*sctps = sctp->sctp_sctps;
    363 	conn_t		*connp = sctp->sctp_connp;
    364 	uint_t		scope_id = 0;
    365 	ip_xmit_attr_t	*ixa;
    366 
    367 	/*
    368 	 * Determine packet type based on type of address passed in
    369 	 * the request should contain an IPv4 or IPv6 address.
    370 	 * Make sure that address family matches the type of
    371 	 * family of the address passed down.
    372 	 */
    373 	if (addrlen < sizeof (sin_t)) {
    374 		return (EINVAL);
    375 	}
    376 	switch (dst->sa_family) {
    377 	case AF_INET:
    378 		sin = (sin_t *)dst;
    379 
    380 		/* Check for attempt to connect to non-unicast */
    381 		if (CLASSD(sin->sin_addr.s_addr) ||
    382 		    (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
    383 			ip0dbg(("sctp_connect: non-unicast\n"));
    384 			return (EINVAL);
    385 		}
    386 		if (connp->conn_ipv6_v6only)
    387 			return (EAFNOSUPPORT);
    388 
    389 		/* convert to v6 mapped */
    390 		/* Check for attempt to connect to INADDR_ANY */
    391 		if (sin->sin_addr.s_addr == INADDR_ANY)  {
    392 			struct in_addr v4_addr;
    393 			/*
    394 			 * SunOS 4.x and 4.3 BSD allow an application
    395 			 * to connect a TCP socket to INADDR_ANY.
    396 			 * When they do this, the kernel picks the
    397 			 * address of one interface and uses it
    398 			 * instead.  The kernel usually ends up
    399 			 * picking the address of the loopback
    400 			 * interface.  This is an undocumented feature.
    401 			 * However, we provide the same thing here
    402 			 * in case any TCP apps that use this feature
    403 			 * are being ported to SCTP...
    404 			 */
    405 			v4_addr.s_addr = htonl(INADDR_LOOPBACK);
    406 			IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
    407 		} else {
    408 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
    409 		}
    410 		dstport = sin->sin_port;
    411 		break;
    412 	case AF_INET6:
    413 		sin6 = (sin6_t *)dst;
    414 		/* Check for attempt to connect to non-unicast. */
    415 		if ((addrlen < sizeof (sin6_t)) ||
    416 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
    417 			ip0dbg(("sctp_connect: non-unicast\n"));
    418 			return (EINVAL);
    419 		}
    420 		if (connp->conn_ipv6_v6only &&
    421 		    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
    422 			return (EAFNOSUPPORT);
    423 		}
    424 		/* check for attempt to connect to unspec */
    425 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
    426 			dstaddr = ipv6_loopback;
    427 		} else {
    428 			dstaddr = sin6->sin6_addr;
    429 			if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) {
    430 				sctp->sctp_linklocal = 1;
    431 				scope_id = sin6->sin6_scope_id;
    432 			}
    433 		}
    434 		dstport = sin6->sin6_port;
    435 		connp->conn_flowinfo = sin6->sin6_flowinfo;
    436 		break;
    437 	default:
    438 		dprint(1, ("sctp_connect: unknown family %d\n",
    439 		    dst->sa_family));
    440 		return (EAFNOSUPPORT);
    441 	}
    442 
    443 	(void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
    444 	dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));
    445 
    446 	RUN_SCTP(sctp);
    447 
    448 	if (connp->conn_family != dst->sa_family ||
    449 	    (connp->conn_state_flags & CONN_CLOSING)) {
    450 		WAKE_SCTP(sctp);
    451 		return (EINVAL);
    452 	}
    453 
    454 	/* We update our cred/cpid based on the caller of connect */
    455 	if (connp->conn_cred != cr) {
    456 		crhold(cr);
    457 		crfree(connp->conn_cred);
    458 		connp->conn_cred = cr;
    459 	}
    460 	connp->conn_cpid = pid;
    461 
    462 	/* Cache things in conn_ixa without any refhold */
    463 	ixa = connp->conn_ixa;
    464 	ixa->ixa_cred = cr;
    465 	ixa->ixa_cpid = pid;
    466 	if (is_system_labeled()) {
    467 		/* We need to restart with a label based on the cred */
    468 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
    469 	}
    470 
    471 	switch (sctp->sctp_state) {
    472 	case SCTPS_IDLE: {
    473 		struct sockaddr_storage	ss;
    474 
    475 		/*
    476 		 * We support a quick connect capability here, allowing
    477 		 * clients to transition directly from IDLE to COOKIE_WAIT.
    478 		 * sctp_bindi will pick an unused port, insert the connection
    479 		 * in the bind hash and transition to BOUND state. SCTP
    480 		 * picks and uses what it considers the optimal local address
    481 		 * set (just like specifiying INADDR_ANY to bind()).
    482 		 */
    483 		dprint(1, ("sctp_connect: idle, attempting bind...\n"));
    484 		ASSERT(sctp->sctp_nsaddrs == 0);
    485 
    486 		bzero(&ss, sizeof (ss));
    487 		ss.ss_family = connp->conn_family;
    488 		WAKE_SCTP(sctp);
    489 		if ((err = sctp_bind(sctp, (struct sockaddr *)&ss,
    490 		    sizeof (ss))) != 0) {
    491 			return (err);
    492 		}
    493 		RUN_SCTP(sctp);
    494 		/* FALLTHRU */
    495 	}
    496 
    497 	case SCTPS_BOUND:
    498 		ASSERT(sctp->sctp_nsaddrs > 0);
    499 
    500 		/* do the connect */
    501 		/* XXX check for attempt to connect to self */
    502 		connp->conn_fport = dstport;
    503 
    504 		ASSERT(sctp->sctp_iphc);
    505 		ASSERT(sctp->sctp_iphc6);
    506 
    507 		/*
    508 		 * Don't allow this connection to completely duplicate
    509 		 * an existing connection.
    510 		 *
    511 		 * Ensure that the duplicate check and insertion is atomic.
    512 		 */
    513 		sctp_conn_hash_remove(sctp);
    514 		tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps,
    515 		    connp->conn_ports)];
    516 		mutex_enter(&tbf->tf_lock);
    517 		lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports,
    518 		    SCTPS_COOKIE_WAIT);
    519 		if (lsctp != NULL) {
    520 			/* found a duplicate connection */
    521 			mutex_exit(&tbf->tf_lock);
    522 			SCTP_REFRELE(lsctp);
    523 			WAKE_SCTP(sctp);
    524 			return (EADDRINUSE);
    525 		}
    526 
    527 		/*
    528 		 * OK; set up the peer addr (this may grow after we get
    529 		 * the INIT ACK from the peer with additional addresses).
    530 		 */
    531 		if ((err = sctp_add_faddr(sctp, &dstaddr, sleep,
    532 		    B_FALSE)) != 0) {
    533 			mutex_exit(&tbf->tf_lock);
    534 			WAKE_SCTP(sctp);
    535 			return (err);
    536 		}
    537 		cur_fp = sctp->sctp_faddrs;
    538 		ASSERT(cur_fp->ixa != NULL);
    539 
    540 		/* No valid src addr, return. */
    541 		if (cur_fp->state == SCTP_FADDRS_UNREACH) {
    542 			mutex_exit(&tbf->tf_lock);
    543 			WAKE_SCTP(sctp);
    544 			return (EADDRNOTAVAIL);
    545 		}
    546 
    547 		sctp->sctp_primary = cur_fp;
    548 		sctp->sctp_current = cur_fp;
    549 		sctp->sctp_mss = cur_fp->sfa_pmss;
    550 		sctp_conn_hash_insert(tbf, sctp, 1);
    551 		mutex_exit(&tbf->tf_lock);
    552 
    553 		ixa = cur_fp->ixa;
    554 		ASSERT(ixa->ixa_cred != NULL);
    555 
    556 		if (scope_id != 0) {
    557 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
    558 			ixa->ixa_scopeid = scope_id;
    559 		} else {
    560 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
    561 		}
    562 
    563 		/* initialize composite headers */
    564 		if ((err = sctp_set_hdraddrs(sctp)) != 0) {
    565 			sctp_conn_hash_remove(sctp);
    566 			WAKE_SCTP(sctp);
    567 			return (err);
    568 		}
    569 
    570 		if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) {
    571 			sctp_conn_hash_remove(sctp);
    572 			WAKE_SCTP(sctp);
    573 			return (err);
    574 		}
    575 
    576 		/*
    577 		 * Turn off the don't fragment bit on the (only) faddr,
    578 		 * so that if one of the messages exchanged during the
    579 		 * initialization sequence exceeds the path mtu, it
    580 		 * at least has a chance to get there. SCTP does no
    581 		 * fragmentation of initialization messages.  The DF bit
    582 		 * will be turned on again in sctp_send_cookie_echo()
    583 		 * (but the cookie echo will still be sent with the df bit
    584 		 * off).
    585 		 */
    586 		cur_fp->df = B_FALSE;
    587 
    588 		/* Mark this address as alive */
    589 		cur_fp->state = SCTP_FADDRS_ALIVE;
    590 
    591 		/* Send the INIT to the peer */
    592 		SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto);
    593 		sctp->sctp_state = SCTPS_COOKIE_WAIT;
    594 		/*
    595 		 * sctp_init_mp() could result in modifying the source
    596 		 * address list, so take the hash lock.
    597 		 */
    598 		mutex_enter(&tbf->tf_lock);
    599 		initmp = sctp_init_mp(sctp, cur_fp);
    600 		if (initmp == NULL) {
    601 			mutex_exit(&tbf->tf_lock);
    602 			/*
    603 			 * It may happen that all the source addresses
    604 			 * (loopback/link local) are removed.  In that case,
    605 			 * faile the connect.
    606 			 */
    607 			if (sctp->sctp_nsaddrs == 0) {
    608 				sctp_conn_hash_remove(sctp);
    609 				SCTP_FADDR_TIMER_STOP(cur_fp);
    610 				WAKE_SCTP(sctp);
    611 				return (EADDRNOTAVAIL);
    612 			}
    613 
    614 			/* Otherwise, let the retransmission timer retry */
    615 			WAKE_SCTP(sctp);
    616 			goto notify_ulp;
    617 		}
    618 		mutex_exit(&tbf->tf_lock);
    619 
    620 		/*
    621 		 * On a clustered note send this notification to the clustering
    622 		 * subsystem.
    623 		 */
    624 		if (cl_sctp_connect != NULL) {
    625 			uchar_t		*slist;
    626 			uchar_t		*flist;
    627 			size_t		ssize;
    628 			size_t		fsize;
    629 
    630 			fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
    631 			ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
    632 			slist = kmem_alloc(ssize, KM_SLEEP);
    633 			flist = kmem_alloc(fsize, KM_SLEEP);
    634 			/* The clustering module frees the lists */
    635 			sctp_get_saddr_list(sctp, slist, ssize);
    636 			sctp_get_faddr_list(sctp, flist, fsize);
    637 			(*cl_sctp_connect)(connp->conn_family, slist,
    638 			    sctp->sctp_nsaddrs, connp->conn_lport,
    639 			    flist, sctp->sctp_nfaddrs, connp->conn_fport,
    640 			    B_TRUE, (cl_sctp_handle_t)sctp);
    641 		}
    642 		ASSERT(ixa->ixa_cred != NULL);
    643 		ASSERT(ixa->ixa_ire != NULL);
    644 
    645 		(void) conn_ip_output(initmp, ixa);
    646 		BUMP_LOCAL(sctp->sctp_opkts);
    647 		WAKE_SCTP(sctp);
    648 
    649 notify_ulp:
    650 		sctp_set_ulp_prop(sctp);
    651 
    652 		return (0);
    653 	default:
    654 		ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
    655 		WAKE_SCTP(sctp);
    656 		return (EINVAL);
    657 	}
    658 }
    659