Home | History | Annotate | Download | only in sctp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/systm.h>
     29 #include <sys/stream.h>
     30 #include <sys/cmn_err.h>
     31 #define	_SUN_TPI_VERSION 2
     32 #include <sys/tihdr.h>
     33 #include <sys/socket.h>
     34 #include <sys/stropts.h>
     35 #include <sys/strsun.h>
     36 #include <sys/strsubr.h>
     37 #include <sys/socketvar.h>
     38 #include <inet/common.h>
     39 #include <inet/mi.h>
     40 #include <inet/ip.h>
     41 #include <inet/ip6.h>
     42 #include <inet/sctp_ip.h>
     43 #include <inet/ipclassifier.h>
     44 
     45 /*
     46  * PR-SCTP comments.
     47  *
     48  * A message can expire before it gets to the transmit list (i.e. it is still
     49  * in the unsent list - unchunked), after it gets to the transmit list, but
     50  * before transmission has actually started, or after transmission has begun.
     51  * Accordingly, we check for the status of a message in sctp_chunkify() when
     52  * the message is being transferred from the unsent list to the transmit list;
     53  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
     54  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
     55  * When we nuke a message in sctp_chunkify(), all we need to do is take it
     56  * out of the unsent list and update sctp_unsent; when a message is deemed
     57  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
     58  * list, update sctp_unsent IFF transmission for the message has not yet begun
     59  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
     60  * message has started, then we cannot just take it out of the list, we need
     61  * to send Forward TSN chunk to the peer so that the peer can clear its
     62  * fragment list for this message. However, we cannot just send the Forward
     63  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
     64  * messages preceeding this abandoned message. So, we send a Forward TSN
     65  * IFF all messages prior to this abandoned message has been SACKd, if not
     66  * we defer sending the Forward TSN to sctp_cumack(), which will check for
     67  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
     68  * sctp_rexmit() when we check for retransmissions, we need to determine if
     69  * the advanced peer ack point can be moved ahead, and if so, send a Forward
     70  * TSN to the peer instead of retransmitting the chunk. Note that when
     71  * we send a Forward TSN for a message, there may be yet unsent chunks for
     72  * this message; we need to mark all such chunks as abandoned, so that
     73  * sctp_cumack() can take the message out of the transmit list, additionally
     74  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
     75  * decremented when a message/chunk is deemed abandoned), sockfs needs to
     76  * be notified so that it can adjust its idea of the queued message.
     77  */
     78 
     79 #include "sctp_impl.h"
     80 
     81 static struct kmem_cache	*sctp_kmem_ftsn_set_cache;
     82 static mblk_t			*sctp_chunkify(sctp_t *, int, int, int);
     83 
     84 #ifdef	DEBUG
     85 static boolean_t	sctp_verify_chain(mblk_t *, mblk_t *);
     86 #endif
     87 
     88 /*
     89  * Called to allocate a header mblk when sending data to SCTP.
     90  * Data will follow in b_cont of this mblk.
     91  */
     92 mblk_t *
     93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
     94     int flags)
     95 {
     96 	mblk_t *mp;
     97 	struct T_unitdata_req *tudr;
     98 	size_t size;
     99 	int error;
    100 
    101 	size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
    102 	size = MAX(size, sizeof (sctp_msg_hdr_t));
    103 	if (flags & SCTP_CAN_BLOCK) {
    104 		mp = allocb_wait(size, BPRI_MED, 0, &error);
    105 	} else {
    106 		mp = allocb(size, BPRI_MED);
    107 	}
    108 	if (mp) {
    109 		tudr = (struct T_unitdata_req *)mp->b_rptr;
    110 		tudr->PRIM_type = T_UNITDATA_REQ;
    111 		tudr->DEST_length = nlen;
    112 		tudr->DEST_offset = sizeof (*tudr);
    113 		tudr->OPT_length = clen;
    114 		tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
    115 		    _TPI_ALIGN_TOPT(nlen));
    116 		if (nlen > 0)
    117 			bcopy(name, tudr + 1, nlen);
    118 		if (clen > 0)
    119 			bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
    120 		mp->b_wptr += (tudr ->OPT_offset + clen);
    121 		mp->b_datap->db_type = M_PROTO;
    122 	}
    123 	return (mp);
    124 }
    125 
    126 /*ARGSUSED2*/
    127 int
    128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
    129 {
    130 	sctp_faddr_t	*fp = NULL;
    131 	struct T_unitdata_req	*tudr;
    132 	int		error = 0;
    133 	mblk_t		*mproto = mp;
    134 	in6_addr_t	*addr;
    135 	in6_addr_t	tmpaddr;
    136 	uint16_t	sid = sctp->sctp_def_stream;
    137 	uint32_t	ppid = sctp->sctp_def_ppid;
    138 	uint32_t	context = sctp->sctp_def_context;
    139 	uint16_t	msg_flags = sctp->sctp_def_flags;
    140 	sctp_msg_hdr_t	*sctp_msg_hdr;
    141 	uint32_t	msg_len = 0;
    142 	uint32_t	timetolive = sctp->sctp_def_timetolive;
    143 
    144 	ASSERT(DB_TYPE(mproto) == M_PROTO);
    145 
    146 	mp = mp->b_cont;
    147 	ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
    148 
    149 	tudr = (struct T_unitdata_req *)mproto->b_rptr;
    150 	ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
    151 
    152 	/* Get destination address, if specified */
    153 	if (tudr->DEST_length > 0) {
    154 		sin_t *sin;
    155 		sin6_t *sin6;
    156 
    157 		sin = (struct sockaddr_in *)
    158 		    (mproto->b_rptr + tudr->DEST_offset);
    159 		switch (sin->sin_family) {
    160 		case AF_INET:
    161 			if (tudr->DEST_length < sizeof (*sin)) {
    162 				return (EINVAL);
    163 			}
    164 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
    165 			addr = &tmpaddr;
    166 			break;
    167 		case AF_INET6:
    168 			if (tudr->DEST_length < sizeof (*sin6)) {
    169 				return (EINVAL);
    170 			}
    171 			sin6 = (struct sockaddr_in6 *)
    172 			    (mproto->b_rptr + tudr->DEST_offset);
    173 			addr = &sin6->sin6_addr;
    174 			break;
    175 		default:
    176 			return (EAFNOSUPPORT);
    177 		}
    178 		fp = sctp_lookup_faddr(sctp, addr);
    179 		if (fp == NULL) {
    180 			return (EINVAL);
    181 		}
    182 	}
    183 	/* Ancillary Data? */
    184 	if (tudr->OPT_length > 0) {
    185 		struct cmsghdr		*cmsg;
    186 		char			*cend;
    187 		struct sctp_sndrcvinfo	*sndrcv;
    188 
    189 		cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
    190 		cend = ((char *)cmsg + tudr->OPT_length);
    191 		ASSERT(cend <= (char *)mproto->b_wptr);
    192 
    193 		for (;;) {
    194 			if ((char *)(cmsg + 1) > cend ||
    195 			    ((char *)cmsg + cmsg->cmsg_len) > cend) {
    196 				break;
    197 			}
    198 			if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
    199 			    (cmsg->cmsg_type == SCTP_SNDRCV)) {
    200 				if (cmsg->cmsg_len <
    201 				    (sizeof (*sndrcv) + sizeof (*cmsg))) {
    202 					return (EINVAL);
    203 				}
    204 				sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
    205 				sid = sndrcv->sinfo_stream;
    206 				msg_flags = sndrcv->sinfo_flags;
    207 				ppid = sndrcv->sinfo_ppid;
    208 				context = sndrcv->sinfo_context;
    209 				timetolive = sndrcv->sinfo_timetolive;
    210 				break;
    211 			}
    212 			if (cmsg->cmsg_len > 0)
    213 				cmsg = CMSG_NEXT(cmsg);
    214 			else
    215 				break;
    216 		}
    217 	}
    218 	if (msg_flags & MSG_ABORT) {
    219 		if (mp && mp->b_cont) {
    220 			mblk_t *pump = msgpullup(mp, -1);
    221 			if (!pump) {
    222 				return (ENOMEM);
    223 			}
    224 			freemsg(mp);
    225 			mp = pump;
    226 			mproto->b_cont = mp;
    227 		}
    228 		RUN_SCTP(sctp);
    229 		sctp_user_abort(sctp, mp);
    230 		freemsg(mproto);
    231 		goto process_sendq;
    232 	}
    233 	if (mp == NULL)
    234 		goto done;
    235 
    236 	RUN_SCTP(sctp);
    237 
    238 	/* Reject any new data requests if we are shutting down */
    239 	if (sctp->sctp_state > SCTPS_ESTABLISHED ||
    240 	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
    241 		error = EPIPE;
    242 		goto unlock_done;
    243 	}
    244 
    245 	/* Re-use the mproto to store relevant info. */
    246 	ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
    247 
    248 	mproto->b_rptr = mproto->b_datap->db_base;
    249 	mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
    250 
    251 	sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
    252 	bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
    253 	sctp_msg_hdr->smh_context = context;
    254 	sctp_msg_hdr->smh_sid = sid;
    255 	sctp_msg_hdr->smh_ppid = ppid;
    256 	sctp_msg_hdr->smh_flags = msg_flags;
    257 	sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
    258 	sctp_msg_hdr->smh_tob = lbolt64;
    259 	for (; mp != NULL; mp = mp->b_cont)
    260 		msg_len += MBLKL(mp);
    261 	sctp_msg_hdr->smh_msglen = msg_len;
    262 
    263 	/* User requested specific destination */
    264 	SCTP_SET_CHUNK_DEST(mproto, fp);
    265 
    266 	if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
    267 	    sid >= sctp->sctp_num_ostr) {
    268 		/* Send sendfail event */
    269 		sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
    270 		    B_FALSE);
    271 		error = EINVAL;
    272 		goto unlock_done;
    273 	}
    274 
    275 	/* no data */
    276 	if (msg_len == 0) {
    277 		sctp_sendfail_event(sctp, dupmsg(mproto),
    278 		    SCTP_ERR_NO_USR_DATA, B_FALSE);
    279 		error = EINVAL;
    280 		goto unlock_done;
    281 	}
    282 
    283 	/* Add it to the unsent list */
    284 	if (sctp->sctp_xmit_unsent == NULL) {
    285 		sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
    286 	} else {
    287 		sctp->sctp_xmit_unsent_tail->b_next = mproto;
    288 		sctp->sctp_xmit_unsent_tail = mproto;
    289 	}
    290 	sctp->sctp_unsent += msg_len;
    291 	BUMP_LOCAL(sctp->sctp_msgcount);
    292 	/*
    293 	 * Notify sockfs if the tx queue is full.
    294 	 */
    295 	if (SCTP_TXQ_LEN(sctp) >= sctp->sctp_xmit_hiwater) {
    296 		sctp->sctp_txq_full = 1;
    297 		sctp->sctp_ulp_xmitted(sctp->sctp_ulpd, B_TRUE);
    298 	}
    299 	if (sctp->sctp_state == SCTPS_ESTABLISHED)
    300 		sctp_output(sctp, UINT_MAX);
    301 process_sendq:
    302 	WAKE_SCTP(sctp);
    303 	sctp_process_sendq(sctp);
    304 	return (0);
    305 unlock_done:
    306 	WAKE_SCTP(sctp);
    307 done:
    308 	return (error);
    309 }
    310 
    311 /*
    312  * While there are messages on sctp_xmit_unsent, detach each one. For each:
    313  * allocate space for the chunk header, fill in the data chunk, and fill in
    314  * the chunk header. Then append it to sctp_xmit_tail.
    315  * Return after appending as many bytes as required (bytes_to_send).
    316  * We also return if we've appended one or more chunks, and find a subsequent
    317  * unsent message is too big to fit in the segment.
    318  */
    319 mblk_t *
    320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send)
    321 {
    322 	mblk_t			*mp;
    323 	mblk_t			*chunk_mp;
    324 	mblk_t			*chunk_head;
    325 	mblk_t			*chunk_hdr;
    326 	mblk_t			*chunk_tail = NULL;
    327 	int			count;
    328 	int			chunksize;
    329 	sctp_data_hdr_t		*sdc;
    330 	mblk_t			*mdblk = sctp->sctp_xmit_unsent;
    331 	sctp_faddr_t		*fp;
    332 	sctp_faddr_t		*fp1;
    333 	size_t			xtralen;
    334 	sctp_msg_hdr_t		*msg_hdr;
    335 	sctp_stack_t		*sctps = sctp->sctp_sctps;
    336 	sctp_msg_hdr_t		*next_msg_hdr;
    337 	size_t			nextlen;
    338 	int			remaining_len = mss - firstseg_len;
    339 
    340 	ASSERT(remaining_len >= 0);
    341 
    342 	fp = SCTP_CHUNK_DEST(mdblk);
    343 	if (fp == NULL)
    344 		fp = sctp->sctp_current;
    345 	if (fp->isv4)
    346 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
    347 		    sizeof (*sdc);
    348 	else
    349 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
    350 		    sizeof (*sdc);
    351 	count = chunksize = remaining_len - sizeof (*sdc);
    352 nextmsg:
    353 	next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr;
    354 	nextlen = next_msg_hdr->smh_msglen;
    355 	/*
    356 	 * Will the entire next message fit in the current packet ?
    357 	 * if not, leave it on the unsent list.
    358 	 */
    359 	if ((firstseg_len != 0) && (nextlen > remaining_len))
    360 		return (NULL);
    361 
    362 	chunk_mp = mdblk->b_cont;
    363 
    364 	/*
    365 	 * If this partially chunked, we ignore the next one for now and
    366 	 * use the one already present. For the unchunked bits, we use the
    367 	 * length of the last chunk.
    368 	 */
    369 	if (SCTP_IS_MSG_CHUNKED(mdblk)) {
    370 		int	chunk_len;
    371 
    372 		ASSERT(chunk_mp->b_next != NULL);
    373 		mdblk->b_cont = chunk_mp->b_next;
    374 		chunk_mp->b_next = NULL;
    375 		SCTP_MSG_CLEAR_CHUNKED(mdblk);
    376 		mp = mdblk->b_cont;
    377 		while (mp->b_next != NULL)
    378 			mp = mp->b_next;
    379 		chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
    380 		if (fp->sfa_pmss - chunk_len > sizeof (*sdc))
    381 			count = chunksize = fp->sfa_pmss - chunk_len;
    382 		else
    383 			count = chunksize = fp->sfa_pmss;
    384 		count = chunksize = count - sizeof (*sdc);
    385 	} else {
    386 		msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
    387 		if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
    388 			sctp->sctp_xmit_unsent = mdblk->b_next;
    389 			if (sctp->sctp_xmit_unsent == NULL)
    390 				sctp->sctp_xmit_unsent_tail = NULL;
    391 			ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
    392 			sctp->sctp_unsent -= msg_hdr->smh_msglen;
    393 			mdblk->b_next = NULL;
    394 			BUMP_LOCAL(sctp->sctp_prsctpdrop);
    395 			/*
    396 			 * Update ULP the amount of queued data, which is
    397 			 * sent-unack'ed + unsent.
    398 			 */
    399 			if (!SCTP_IS_DETACHED(sctp))
    400 				SCTP_TXQ_UPDATE(sctp);
    401 			sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
    402 			goto try_next;
    403 		}
    404 		mdblk->b_cont = NULL;
    405 	}
    406 	msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
    407 nextchunk:
    408 	chunk_head = chunk_mp;
    409 	chunk_tail = NULL;
    410 
    411 	/* Skip as many mblk's as we need */
    412 	while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
    413 		count -= MBLKL(chunk_mp);
    414 		chunk_tail = chunk_mp;
    415 		chunk_mp = chunk_mp->b_cont;
    416 	}
    417 	/* Split the chain, if needed */
    418 	if (chunk_mp != NULL) {
    419 		if (count > 0) {
    420 			mblk_t	*split_mp = dupb(chunk_mp);
    421 
    422 			if (split_mp == NULL) {
    423 				if (mdblk->b_cont == NULL) {
    424 					mdblk->b_cont = chunk_head;
    425 				} else  {
    426 					SCTP_MSG_SET_CHUNKED(mdblk);
    427 					ASSERT(chunk_head->b_next == NULL);
    428 					chunk_head->b_next = mdblk->b_cont;
    429 					mdblk->b_cont = chunk_head;
    430 				}
    431 				return (sctp->sctp_xmit_tail);
    432 			}
    433 			if (chunk_tail != NULL) {
    434 				chunk_tail->b_cont = split_mp;
    435 				chunk_tail = chunk_tail->b_cont;
    436 			} else {
    437 				chunk_head = chunk_tail = split_mp;
    438 			}
    439 			chunk_tail->b_wptr = chunk_tail->b_rptr + count;
    440 			chunk_mp->b_rptr = chunk_tail->b_wptr;
    441 			count = 0;
    442 		} else if (chunk_tail == NULL) {
    443 			goto next;
    444 		} else {
    445 			chunk_tail->b_cont = NULL;
    446 		}
    447 	}
    448 	/* Alloc chunk hdr, if needed */
    449 	if (DB_REF(chunk_head) > 1 ||
    450 	    ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
    451 	    MBLKHEAD(chunk_head) < sizeof (*sdc)) {
    452 		if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
    453 			if (mdblk->b_cont == NULL) {
    454 				if (chunk_mp != NULL)
    455 					linkb(chunk_head, chunk_mp);
    456 				mdblk->b_cont = chunk_head;
    457 			} else {
    458 				SCTP_MSG_SET_CHUNKED(mdblk);
    459 				if (chunk_mp != NULL)
    460 					linkb(chunk_head, chunk_mp);
    461 				ASSERT(chunk_head->b_next == NULL);
    462 				chunk_head->b_next = mdblk->b_cont;
    463 				mdblk->b_cont = chunk_head;
    464 			}
    465 			return (sctp->sctp_xmit_tail);
    466 		}
    467 		chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
    468 		chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
    469 		chunk_hdr->b_cont = chunk_head;
    470 	} else {
    471 		chunk_hdr = chunk_head;
    472 		chunk_hdr->b_rptr -= sizeof (*sdc);
    473 	}
    474 	ASSERT(chunk_hdr->b_datap->db_ref == 1);
    475 	sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
    476 	sdc->sdh_id = CHUNK_DATA;
    477 	sdc->sdh_flags = 0;
    478 	sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
    479 	ASSERT(sdc->sdh_len);
    480 	sdc->sdh_sid = htons(msg_hdr->smh_sid);
    481 	/*
    482 	 * We defer assigning the SSN just before sending the chunk, else
    483 	 * if we drop the chunk in sctp_get_msg_to_send(), we would need
    484 	 * to send a Forward TSN to let the peer know. Some more comments
    485 	 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
    486 	 */
    487 	sdc->sdh_payload_id = msg_hdr->smh_ppid;
    488 
    489 	if (mdblk->b_cont == NULL) {
    490 		mdblk->b_cont = chunk_hdr;
    491 		SCTP_DATA_SET_BBIT(sdc);
    492 	} else {
    493 		mp = mdblk->b_cont;
    494 		while (mp->b_next != NULL)
    495 			mp = mp->b_next;
    496 		mp->b_next = chunk_hdr;
    497 	}
    498 
    499 	bytes_to_send -= (chunksize - count);
    500 	if (chunk_mp != NULL) {
    501 next:
    502 		count = chunksize = fp->sfa_pmss - sizeof (*sdc);
    503 		goto nextchunk;
    504 	}
    505 	SCTP_DATA_SET_EBIT(sdc);
    506 	sctp->sctp_xmit_unsent = mdblk->b_next;
    507 	if (mdblk->b_next == NULL) {
    508 		sctp->sctp_xmit_unsent_tail = NULL;
    509 	}
    510 	mdblk->b_next = NULL;
    511 
    512 	if (sctp->sctp_xmit_tail == NULL) {
    513 		sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
    514 	} else {
    515 		mp = sctp->sctp_xmit_tail;
    516 		while (mp->b_next != NULL)
    517 			mp = mp->b_next;
    518 		mp->b_next = mdblk;
    519 		mdblk->b_prev = mp;
    520 	}
    521 try_next:
    522 	if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
    523 		mdblk = sctp->sctp_xmit_unsent;
    524 		fp1 = SCTP_CHUNK_DEST(mdblk);
    525 		if (fp1 == NULL)
    526 			fp1 = sctp->sctp_current;
    527 		if (fp == fp1) {
    528 			size_t len = MBLKL(mdblk->b_cont);
    529 			if ((count > 0) &&
    530 			    ((len > fp->sfa_pmss - sizeof (*sdc)) ||
    531 			    (len <= count))) {
    532 				count -= sizeof (*sdc);
    533 				count = chunksize = count - (count & 0x3);
    534 			} else {
    535 				count = chunksize = fp->sfa_pmss -
    536 				    sizeof (*sdc);
    537 			}
    538 		} else {
    539 			if (fp1->isv4)
    540 				xtralen = sctp->sctp_hdr_len;
    541 			else
    542 				xtralen = sctp->sctp_hdr6_len;
    543 			xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
    544 			count = chunksize = fp1->sfa_pmss - sizeof (*sdc);
    545 			fp = fp1;
    546 		}
    547 		goto nextmsg;
    548 	}
    549 	return (sctp->sctp_xmit_tail);
    550 }
    551 
    552 void
    553 sctp_free_msg(mblk_t *ump)
    554 {
    555 	mblk_t *mp, *nmp;
    556 
    557 	for (mp = ump->b_cont; mp; mp = nmp) {
    558 		nmp = mp->b_next;
    559 		mp->b_next = mp->b_prev = NULL;
    560 		freemsg(mp);
    561 	}
    562 	ASSERT(!ump->b_prev);
    563 	ump->b_next = NULL;
    564 	freeb(ump);
    565 }
    566 
    567 mblk_t *
    568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
    569     int *error)
    570 {
    571 	int hdrlen;
    572 	char *hdr;
    573 	int isv4 = fp->isv4;
    574 	sctp_stack_t	*sctps = sctp->sctp_sctps;
    575 
    576 	if (error != NULL)
    577 		*error = 0;
    578 
    579 	if (isv4) {
    580 		hdrlen = sctp->sctp_hdr_len;
    581 		hdr = sctp->sctp_iphc;
    582 	} else {
    583 		hdrlen = sctp->sctp_hdr6_len;
    584 		hdr = sctp->sctp_iphc6;
    585 	}
    586 	/*
    587 	 * A null fp->ire could mean that the address is 'down'. Similarly,
    588 	 * it is possible that the address went down, we tried to send an
    589 	 * heartbeat and ended up setting fp->saddr as unspec because we
    590 	 * didn't have any usable source address.  In either case
    591 	 * sctp_get_ire() will try find an IRE, if available, and set
    592 	 * the source address, if needed.  If we still don't have any
    593 	 * usable source address, fp->state will be SCTP_FADDRS_UNREACH and
    594 	 * we return EHOSTUNREACH.
    595 	 */
    596 	if (fp->ire == NULL || SCTP_IS_ADDR_UNSPEC(fp->isv4, fp->saddr)) {
    597 		sctp_get_ire(sctp, fp);
    598 		if (fp->state == SCTP_FADDRS_UNREACH) {
    599 			if (error != NULL)
    600 				*error = EHOSTUNREACH;
    601 			return (NULL);
    602 		}
    603 	}
    604 	/* Copy in IP header. */
    605 	if ((mp->b_rptr - mp->b_datap->db_base) <
    606 	    (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2 ||
    607 	    !IS_P2ALIGNED(DB_BASE(mp), sizeof (ire_t *))) {
    608 		mblk_t *nmp;
    609 
    610 		/*
    611 		 * This can happen if IP headers are adjusted after
    612 		 * data was moved into chunks, or during retransmission,
    613 		 * or things like snoop is running.
    614 		 */
    615 		nmp = allocb_cred(sctps->sctps_wroff_xtra + hdrlen + sacklen,
    616 		    CONN_CRED(sctp->sctp_connp), sctp->sctp_cpid);
    617 		if (nmp == NULL) {
    618 			if (error !=  NULL)
    619 				*error = ENOMEM;
    620 			return (NULL);
    621 		}
    622 		nmp->b_rptr += sctps->sctps_wroff_xtra;
    623 		nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
    624 		nmp->b_cont = mp;
    625 		mp = nmp;
    626 	} else {
    627 		mp->b_rptr -= (hdrlen + sacklen);
    628 		mblk_setcred(mp, CONN_CRED(sctp->sctp_connp), sctp->sctp_cpid);
    629 	}
    630 	bcopy(hdr, mp->b_rptr, hdrlen);
    631 	if (sacklen) {
    632 		sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
    633 	}
    634 	if (fp != sctp->sctp_current) {
    635 		/* change addresses in header */
    636 		if (isv4) {
    637 			ipha_t *iph = (ipha_t *)mp->b_rptr;
    638 
    639 			IN6_V4MAPPED_TO_IPADDR(&fp->faddr, iph->ipha_dst);
    640 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->saddr)) {
    641 				IN6_V4MAPPED_TO_IPADDR(&fp->saddr,
    642 				    iph->ipha_src);
    643 			} else if (sctp->sctp_bound_to_all) {
    644 				iph->ipha_src = INADDR_ANY;
    645 			}
    646 		} else {
    647 			((ip6_t *)(mp->b_rptr))->ip6_dst = fp->faddr;
    648 			if (!IN6_IS_ADDR_UNSPECIFIED(&fp->saddr)) {
    649 				((ip6_t *)(mp->b_rptr))->ip6_src = fp->saddr;
    650 			} else if (sctp->sctp_bound_to_all) {
    651 				V6_SET_ZERO(((ip6_t *)(mp->b_rptr))->ip6_src);
    652 			}
    653 		}
    654 	}
    655 	/*
    656 	 * IP will not free this IRE if it is condemned.  SCTP needs to
    657 	 * free it.
    658 	 */
    659 	if ((fp->ire != NULL) && (fp->ire->ire_marks & IRE_MARK_CONDEMNED)) {
    660 		IRE_REFRELE_NOTR(fp->ire);
    661 		fp->ire = NULL;
    662 	}
    663 
    664 	/* Stash the conn and ire ptr info for IP */
    665 	SCTP_STASH_IPINFO(mp, fp->ire);
    666 
    667 	return (mp);
    668 }
    669 
    670 /*
    671  * SCTP requires every chunk to be padded so that the total length
    672  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
    673  * the specified pad length.
    674  */
    675 static mblk_t *
    676 sctp_get_padding(sctp_t *sctp, int pad)
    677 {
    678 	mblk_t *fill;
    679 
    680 	ASSERT(pad < SCTP_ALIGN);
    681 	ASSERT(sctp->sctp_pad_mp != NULL);
    682 	if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
    683 		fill->b_wptr += pad;
    684 		return (fill);
    685 	}
    686 
    687 	/*
    688 	 * The memory saving path of reusing the sctp_pad_mp
    689 	 * fails may be because it has been dupb() too
    690 	 * many times (DBLK_REFMAX).  Use the memory consuming
    691 	 * path of allocating the pad mblk.
    692 	 */
    693 	if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
    694 		/* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
    695 		*(int32_t *)fill->b_rptr = 0;
    696 		fill->b_wptr += pad;
    697 	}
    698 	return (fill);
    699 }
    700 
    701 static mblk_t *
    702 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
    703 {
    704 	mblk_t		*meta;
    705 	mblk_t		*start_mp = NULL;
    706 	mblk_t		*end_mp = NULL;
    707 	mblk_t		*mp, *nmp;
    708 	mblk_t		*fill;
    709 	sctp_data_hdr_t	*sdh;
    710 	int		msglen;
    711 	int		extra;
    712 	sctp_msg_hdr_t	*msg_hdr;
    713 	sctp_faddr_t	*old_fp = NULL;
    714 	sctp_faddr_t	*chunk_fp;
    715 	sctp_stack_t	*sctps = sctp->sctp_sctps;
    716 
    717 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
    718 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
    719 		if (SCTP_IS_MSG_ABANDONED(meta) ||
    720 		    SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
    721 			continue;
    722 		}
    723 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
    724 			if (SCTP_CHUNK_WANT_REXMIT(mp)) {
    725 				/*
    726 				 * Use the same peer address to do fast
    727 				 * retransmission.  If the original peer
    728 				 * address is dead, switch to the current
    729 				 * one.  Record the old one so that we
    730 				 * will pick the chunks sent to the old
    731 				 * one for fast retransmission.
    732 				 */
    733 				chunk_fp = SCTP_CHUNK_DEST(mp);
    734 				if (*fp == NULL) {
    735 					*fp = chunk_fp;
    736 					if ((*fp)->state != SCTP_FADDRS_ALIVE) {
    737 						old_fp = *fp;
    738 						*fp = sctp->sctp_current;
    739 					}
    740 				} else if (old_fp == NULL && *fp != chunk_fp) {
    741 					continue;
    742 				} else if (old_fp != NULL &&
    743 				    old_fp != chunk_fp) {
    744 					continue;
    745 				}
    746 
    747 				sdh = (sctp_data_hdr_t *)mp->b_rptr;
    748 				msglen = ntohs(sdh->sdh_len);
    749 				if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
    750 					extra = SCTP_ALIGN - extra;
    751 				}
    752 
    753 				/*
    754 				 * We still return at least the first message
    755 				 * even if that message cannot fit in as
    756 				 * PMTU may have changed.
    757 				 */
    758 				if (*total + msglen + extra >
    759 				    (*fp)->sfa_pmss && start_mp != NULL) {
    760 					return (start_mp);
    761 				}
    762 				if ((nmp = dupmsg(mp)) == NULL)
    763 					return (start_mp);
    764 				if (extra > 0) {
    765 					fill = sctp_get_padding(sctp, extra);
    766 					if (fill != NULL) {
    767 						linkb(nmp, fill);
    768 					} else {
    769 						return (start_mp);
    770 					}
    771 				}
    772 				BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans);
    773 				BUMP_LOCAL(sctp->sctp_rxtchunks);
    774 				SCTP_CHUNK_CLEAR_REXMIT(mp);
    775 				if (start_mp == NULL) {
    776 					start_mp = nmp;
    777 				} else {
    778 					linkb(end_mp, nmp);
    779 				}
    780 				end_mp = nmp;
    781 				*total += msglen + extra;
    782 				dprint(2, ("sctp_find_fast_rexmit_mblks: "
    783 				    "tsn %x\n", sdh->sdh_tsn));
    784 			}
    785 		}
    786 	}
    787 	/* Clear the flag as there is no more message to be fast rexmitted. */
    788 	sctp->sctp_chk_fast_rexmit = B_FALSE;
    789 	return (start_mp);
    790 }
    791 
    792 /* A debug function just to make sure that a mblk chain is not broken */
    793 #ifdef	DEBUG
    794 static boolean_t
    795 sctp_verify_chain(mblk_t *head, mblk_t *tail)
    796 {
    797 	mblk_t	*mp = head;
    798 
    799 	if (head == NULL || tail == NULL)
    800 		return (B_TRUE);
    801 	while (mp != NULL) {
    802 		if (mp == tail)
    803 			return (B_TRUE);
    804 		mp = mp->b_next;
    805 	}
    806 	return (B_FALSE);
    807 }
    808 #endif
    809 
    810 /*
    811  * Gets the next unsent chunk to transmit. Messages that are abandoned are
    812  * skipped. A message can be abandoned if it has a non-zero timetolive and
    813  * transmission has not yet started or if it is a partially reliable
    814  * message and its time is up (assuming we are PR-SCTP aware).
    815  * We only return a chunk if it will fit entirely in the current packet.
    816  * 'cansend' is used to determine if need to try and chunkify messages from
    817  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
    818  *
    819  * firstseg_len indicates the space already used, cansend represents remaining
    820  * space in the window, ((sfa_pmss - firstseg_len) can therefore reasonably
    821  * be used to compute the cansend arg).
    822  */
    823 mblk_t *
    824 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
    825     int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp)
    826 {
    827 	mblk_t		*mp1;
    828 	sctp_msg_hdr_t	*msg_hdr;
    829 	mblk_t		*tmp_meta;
    830 	sctp_faddr_t	*fp1;
    831 
    832 	ASSERT(error != NULL && mp != NULL);
    833 	*error = 0;
    834 
    835 	ASSERT(sctp->sctp_current != NULL);
    836 
    837 chunkified:
    838 	while (meta != NULL) {
    839 		tmp_meta = meta->b_next;
    840 		msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
    841 		mp1 = meta->b_cont;
    842 		if (SCTP_IS_MSG_ABANDONED(meta))
    843 			goto next_msg;
    844 		if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
    845 			while (mp1 != NULL) {
    846 				if (SCTP_CHUNK_CANSEND(mp1)) {
    847 					*mp = mp1;
    848 #ifdef	DEBUG
    849 					ASSERT(sctp_verify_chain(
    850 					    sctp->sctp_xmit_head, meta));
    851 #endif
    852 					return (meta);
    853 				}
    854 				mp1 = mp1->b_next;
    855 			}
    856 			goto next_msg;
    857 		}
    858 		/*
    859 		 * If we come here and the first chunk is sent, then we
    860 		 * we are PR-SCTP aware, in which case if the cumulative
    861 		 * TSN has moved upto or beyond the first chunk (which
    862 		 * means all the previous messages have been cumulative
    863 		 * SACK'd), then we send a Forward TSN with the last
    864 		 * chunk that was sent in this message. If we can't send
    865 		 * a Forward TSN because previous non-abandoned messages
    866 		 * have not been acked then we will defer the Forward TSN
    867 		 * to sctp_rexmit() or sctp_cumack().
    868 		 */
    869 		if (SCTP_CHUNK_ISSENT(mp1)) {
    870 			*error = sctp_check_abandoned_msg(sctp, meta);
    871 			if (*error != 0) {
    872 #ifdef	DEBUG
    873 				ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
    874 				    sctp->sctp_xmit_tail));
    875 #endif
    876 				return (NULL);
    877 			}
    878 			goto next_msg;
    879 		}
    880 		BUMP_LOCAL(sctp->sctp_prsctpdrop);
    881 		ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
    882 		if (meta->b_prev == NULL) {
    883 			ASSERT(sctp->sctp_xmit_head == meta);
    884 			sctp->sctp_xmit_head = tmp_meta;
    885 			if (sctp->sctp_xmit_tail == meta)
    886 				sctp->sctp_xmit_tail = tmp_meta;
    887 			meta->b_next = NULL;
    888 			if (tmp_meta != NULL)
    889 				tmp_meta->b_prev = NULL;
    890 		} else if (meta->b_next == NULL) {
    891 			if (sctp->sctp_xmit_tail == meta)
    892 				sctp->sctp_xmit_tail = meta->b_prev;
    893 			meta->b_prev->b_next = NULL;
    894 			meta->b_prev = NULL;
    895 		} else {
    896 			meta->b_prev->b_next = tmp_meta;
    897 			tmp_meta->b_prev = meta->b_prev;
    898 			if (sctp->sctp_xmit_tail == meta)
    899 				sctp->sctp_xmit_tail = tmp_meta;
    900 			meta->b_prev = NULL;
    901 			meta->b_next = NULL;
    902 		}
    903 		sctp->sctp_unsent -= msg_hdr->smh_msglen;
    904 		/*
    905 		 * Update ULP the amount of queued data, which is
    906 		 * sent-unack'ed + unsent.
    907 		 */
    908 		if (!SCTP_IS_DETACHED(sctp))
    909 			SCTP_TXQ_UPDATE(sctp);
    910 		sctp_sendfail_event(sctp, meta, 0, B_TRUE);
    911 next_msg:
    912 		meta = tmp_meta;
    913 	}
    914 	/* chunkify, if needed */
    915 	if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
    916 		ASSERT(sctp->sctp_unsent > 0);
    917 		if (fp == NULL) {
    918 			fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
    919 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
    920 				fp = sctp->sctp_current;
    921 		} else {
    922 			/*
    923 			 * If user specified destination, try to honor that.
    924 			 */
    925 			fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
    926 			if (fp1 != NULL && fp1->state == SCTP_FADDRS_ALIVE &&
    927 			    fp1 != fp) {
    928 				goto chunk_done;
    929 			}
    930 		}
    931 		meta = sctp_chunkify(sctp, fp->sfa_pmss, firstseg_len, cansend);
    932 		if (meta == NULL)
    933 			goto chunk_done;
    934 		/*
    935 		 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
    936 		 * new chunk(s) to the tail, so we need to skip the
    937 		 * sctp_xmit_tail, which would have already been processed.
    938 		 * This could happen when there is unacked chunks, but
    939 		 * nothing new to send.
    940 		 * When sctp_chunkify() is called when the transmit queue
    941 		 * is empty then we need to start from sctp_xmit_tail.
    942 		 */
    943 		if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
    944 #ifdef	DEBUG
    945 			mp1 = sctp->sctp_xmit_tail->b_cont;
    946 			while (mp1 != NULL) {
    947 				ASSERT(!SCTP_CHUNK_CANSEND(mp1));
    948 				mp1 = mp1->b_next;
    949 			}
    950 #endif
    951 			if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
    952 				goto chunk_done;
    953 		}
    954 		goto chunkified;
    955 	}
    956 chunk_done:
    957 #ifdef	DEBUG
    958 	ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
    959 #endif
    960 	return (NULL);
    961 }
    962 
    963 void
    964 sctp_fast_rexmit(sctp_t *sctp)
    965 {
    966 	mblk_t		*mp, *head;
    967 	int		pktlen = 0;
    968 	sctp_faddr_t	*fp = NULL;
    969 	sctp_stack_t	*sctps = sctp->sctp_sctps;
    970 
    971 	ASSERT(sctp->sctp_xmit_head != NULL);
    972 	mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
    973 	if (mp == NULL) {
    974 		SCTP_KSTAT(sctps, sctp_fr_not_found);
    975 		return;
    976 	}
    977 	if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
    978 		freemsg(mp);
    979 		SCTP_KSTAT(sctps, sctp_fr_add_hdr);
    980 		return;
    981 	}
    982 	if ((pktlen > fp->sfa_pmss) && fp->isv4) {
    983 		ipha_t *iph = (ipha_t *)head->b_rptr;
    984 
    985 		iph->ipha_fragment_offset_and_flags = 0;
    986 	}
    987 
    988 	sctp_set_iplen(sctp, head);
    989 	sctp_add_sendq(sctp, head);
    990 	sctp->sctp_active = fp->lastactive = lbolt64;
    991 }
    992 
    993 void
    994 sctp_output(sctp_t *sctp, uint_t num_pkt)
    995 {
    996 	mblk_t			*mp = NULL;
    997 	mblk_t			*nmp;
    998 	mblk_t			*head;
    999 	mblk_t			*meta = sctp->sctp_xmit_tail;
   1000 	mblk_t			*fill = NULL;
   1001 	uint16_t 		chunklen;
   1002 	uint32_t 		cansend;
   1003 	int32_t			seglen;
   1004 	int32_t			xtralen;
   1005 	int32_t			sacklen;
   1006 	int32_t			pad = 0;
   1007 	int32_t			pathmax;
   1008 	int			extra;
   1009 	int64_t			now = lbolt64;
   1010 	sctp_faddr_t		*fp;
   1011 	sctp_faddr_t		*lfp;
   1012 	sctp_data_hdr_t		*sdc;
   1013 	int			error;
   1014 	boolean_t		notsent = B_TRUE;
   1015 	sctp_stack_t		*sctps = sctp->sctp_sctps;
   1016 
   1017 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
   1018 		sacklen = 0;
   1019 	} else {
   1020 		/* send a SACK chunk */
   1021 		sacklen = sizeof (sctp_chunk_hdr_t) +
   1022 		    sizeof (sctp_sack_chunk_t) +
   1023 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
   1024 		lfp = sctp->sctp_lastdata;
   1025 		ASSERT(lfp != NULL);
   1026 		if (lfp->state != SCTP_FADDRS_ALIVE)
   1027 			lfp = sctp->sctp_current;
   1028 	}
   1029 
   1030 	cansend = sctp->sctp_frwnd;
   1031 	if (sctp->sctp_unsent < cansend)
   1032 		cansend = sctp->sctp_unsent;
   1033 
   1034 	/*
   1035 	 * Start persist timer if unable to send or when
   1036 	 * trying to send into a zero window. This timer
   1037 	 * ensures the blocked send attempt is retried.
   1038 	 */
   1039 	if ((cansend < sctp->sctp_current->sfa_pmss / 2) &&
   1040 	    (sctp->sctp_unacked != 0) &&
   1041 	    (sctp->sctp_unacked < sctp->sctp_current->sfa_pmss) &&
   1042 	    !sctp->sctp_ndelay ||
   1043 	    (cansend == 0 && sctp->sctp_unacked == 0 &&
   1044 	    sctp->sctp_unsent != 0)) {
   1045 		head = NULL;
   1046 		fp = sctp->sctp_current;
   1047 		goto unsent_data;
   1048 	}
   1049 	if (meta != NULL)
   1050 		mp = meta->b_cont;
   1051 	while (cansend > 0 && num_pkt-- != 0) {
   1052 		pad = 0;
   1053 
   1054 		/*
   1055 		 * Find first segment eligible for transmit.
   1056 		 */
   1057 		while (mp != NULL) {
   1058 			if (SCTP_CHUNK_CANSEND(mp))
   1059 				break;
   1060 			mp = mp->b_next;
   1061 		}
   1062 		if (mp == NULL) {
   1063 			meta = sctp_get_msg_to_send(sctp, &mp,
   1064 			    meta == NULL ? NULL : meta->b_next, &error, sacklen,
   1065 			    cansend, NULL);
   1066 			if (error != 0 || meta == NULL) {
   1067 				head = NULL;
   1068 				fp = sctp->sctp_current;
   1069 				goto unsent_data;
   1070 			}
   1071 			sctp->sctp_xmit_tail =  meta;
   1072 		}
   1073 
   1074 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1075 		seglen = ntohs(sdc->sdh_len);
   1076 		xtralen = sizeof (*sdc);
   1077 		chunklen = seglen - xtralen;
   1078 
   1079 		/*
   1080 		 * Check rwnd.
   1081 		 */
   1082 		if (chunklen > cansend) {
   1083 			head = NULL;
   1084 			fp = SCTP_CHUNK_DEST(meta);
   1085 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
   1086 				fp = sctp->sctp_current;
   1087 			goto unsent_data;
   1088 		}
   1089 		if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
   1090 			extra = SCTP_ALIGN - extra;
   1091 
   1092 		/*
   1093 		 * Pick destination address, and check cwnd.
   1094 		 */
   1095 		if (sacklen > 0 && (seglen + extra <= lfp->cwnd - lfp->suna) &&
   1096 		    (seglen + sacklen + extra <= lfp->sfa_pmss)) {
   1097 			/*
   1098 			 * Only include SACK chunk if it can be bundled
   1099 			 * with a data chunk, and sent to sctp_lastdata.
   1100 			 */
   1101 			pathmax = lfp->cwnd - lfp->suna;
   1102 
   1103 			fp = lfp;
   1104 			if ((nmp = dupmsg(mp)) == NULL) {
   1105 				head = NULL;
   1106 				goto unsent_data;
   1107 			}
   1108 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
   1109 			head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
   1110 			    &error);
   1111 			if (head == NULL) {
   1112 				/*
   1113 				 * If none of the source addresses are
   1114 				 * available (i.e error == EHOSTUNREACH),
   1115 				 * pretend we have sent the data. We will
   1116 				 * eventually time out trying to retramsmit
   1117 				 * the data if the interface never comes up.
   1118 				 * If we have already sent some stuff (i.e.,
   1119 				 * notsent is B_FALSE) then we are fine, else
   1120 				 * just mark this packet as sent.
   1121 				 */
   1122 				if (notsent && error == EHOSTUNREACH) {
   1123 					SCTP_CHUNK_SENT(sctp, mp, sdc,
   1124 					    fp, chunklen, meta);
   1125 				}
   1126 				freemsg(nmp);
   1127 				SCTP_KSTAT(sctps, sctp_output_failed);
   1128 				goto unsent_data;
   1129 			}
   1130 			seglen += sacklen;
   1131 			xtralen += sacklen;
   1132 			sacklen = 0;
   1133 		} else {
   1134 			fp = SCTP_CHUNK_DEST(meta);
   1135 			if (fp == NULL || fp->state != SCTP_FADDRS_ALIVE)
   1136 				fp = sctp->sctp_current;
   1137 			/*
   1138 			 * If we haven't sent data to this destination for
   1139 			 * a while, do slow start again.
   1140 			 */
   1141 			if (now - fp->lastactive > fp->rto) {
   1142 				SET_CWND(fp, fp->sfa_pmss,
   1143 				    sctps->sctps_slow_start_after_idle);
   1144 			}
   1145 
   1146 			pathmax = fp->cwnd - fp->suna;
   1147 			if (seglen + extra > pathmax) {
   1148 				head = NULL;
   1149 				goto unsent_data;
   1150 			}
   1151 			if ((nmp = dupmsg(mp)) == NULL) {
   1152 				head = NULL;
   1153 				goto unsent_data;
   1154 			}
   1155 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
   1156 			head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
   1157 			if (head == NULL) {
   1158 				/*
   1159 				 * If none of the source addresses are
   1160 				 * available (i.e error == EHOSTUNREACH),
   1161 				 * pretend we have sent the data. We will
   1162 				 * eventually time out trying to retramsmit
   1163 				 * the data if the interface never comes up.
   1164 				 * If we have already sent some stuff (i.e.,
   1165 				 * notsent is B_FALSE) then we are fine, else
   1166 				 * just mark this packet as sent.
   1167 				 */
   1168 				if (notsent && error == EHOSTUNREACH) {
   1169 					SCTP_CHUNK_SENT(sctp, mp, sdc,
   1170 					    fp, chunklen, meta);
   1171 				}
   1172 				freemsg(nmp);
   1173 				SCTP_KSTAT(sctps, sctp_output_failed);
   1174 				goto unsent_data;
   1175 			}
   1176 		}
   1177 		fp->lastactive = now;
   1178 		if (pathmax > fp->sfa_pmss)
   1179 			pathmax = fp->sfa_pmss;
   1180 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
   1181 		mp = mp->b_next;
   1182 
   1183 		/* Use this chunk to measure RTT? */
   1184 		if (sctp->sctp_out_time == 0) {
   1185 			sctp->sctp_out_time = now;
   1186 			sctp->sctp_rtt_tsn = sctp->sctp_ltsn - 1;
   1187 			ASSERT(sctp->sctp_rtt_tsn == ntohl(sdc->sdh_tsn));
   1188 		}
   1189 		if (extra > 0) {
   1190 			fill = sctp_get_padding(sctp, extra);
   1191 			if (fill != NULL) {
   1192 				linkb(head, fill);
   1193 				pad = extra;
   1194 				seglen += extra;
   1195 			} else {
   1196 				goto unsent_data;
   1197 			}
   1198 		}
   1199 		/*
   1200 		 * Bundle chunks. We linkb() the chunks together to send
   1201 		 * downstream in a single packet.
   1202 		 * Partial chunks MUST NOT be bundled with full chunks, so we
   1203 		 * rely on sctp_get_msg_to_send() to only return messages that
   1204 		 * will fit entirely in the current packet.
   1205 		 */
   1206 		while (seglen < pathmax) {
   1207 			int32_t		new_len;
   1208 			int32_t		new_xtralen;
   1209 
   1210 			while (mp != NULL) {
   1211 				if (SCTP_CHUNK_CANSEND(mp))
   1212 					break;
   1213 				mp = mp->b_next;
   1214 			}
   1215 			if (mp == NULL) {
   1216 				meta = sctp_get_msg_to_send(sctp, &mp,
   1217 				    meta->b_next, &error, seglen,
   1218 				    (seglen - xtralen) >= cansend ? 0 :
   1219 				    cansend - seglen, fp);
   1220 				if (error != 0)
   1221 					break;
   1222 				/* If no more eligible chunks, cease bundling */
   1223 				if (meta == NULL)
   1224 					break;
   1225 				sctp->sctp_xmit_tail =  meta;
   1226 			}
   1227 			ASSERT(mp != NULL);
   1228 			if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
   1229 			    fp != SCTP_CHUNK_DEST(meta)) {
   1230 				break;
   1231 			}
   1232 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1233 			chunklen = ntohs(sdc->sdh_len);
   1234 			if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
   1235 				extra = SCTP_ALIGN - extra;
   1236 
   1237 			new_len = seglen + chunklen;
   1238 			new_xtralen = xtralen + sizeof (*sdc);
   1239 			chunklen -= sizeof (*sdc);
   1240 
   1241 			if (new_len - new_xtralen > cansend ||
   1242 			    new_len + extra > pathmax) {
   1243 				break;
   1244 			}
   1245 			if ((nmp = dupmsg(mp)) == NULL)
   1246 				break;
   1247 			if (extra > 0) {
   1248 				fill = sctp_get_padding(sctp, extra);
   1249 				if (fill != NULL) {
   1250 					pad += extra;
   1251 					new_len += extra;
   1252 					linkb(nmp, fill);
   1253 				} else {
   1254 					freemsg(nmp);
   1255 					break;
   1256 				}
   1257 			}
   1258 			seglen = new_len;
   1259 			xtralen = new_xtralen;
   1260 			SCTP_CHUNK_CLEAR_FLAGS(nmp);
   1261 			SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
   1262 			linkb(head, nmp);
   1263 			mp = mp->b_next;
   1264 		}
   1265 		if ((seglen > fp->sfa_pmss) && fp->isv4) {
   1266 			ipha_t *iph = (ipha_t *)head->b_rptr;
   1267 
   1268 			/*
   1269 			 * Path MTU is different from what we thought it would
   1270 			 * be when we created chunks, or IP headers have grown.
   1271 			 * Need to clear the DF bit.
   1272 			 */
   1273 			iph->ipha_fragment_offset_and_flags = 0;
   1274 		}
   1275 		/* xmit segment */
   1276 		ASSERT(cansend >= seglen - pad - xtralen);
   1277 		cansend -= (seglen - pad - xtralen);
   1278 		dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
   1279 		    "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
   1280 		    seglen - xtralen, ntohl(sdc->sdh_tsn),
   1281 		    ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
   1282 		    cansend, sctp->sctp_lastack_rxd));
   1283 		sctp_set_iplen(sctp, head);
   1284 		sctp_add_sendq(sctp, head);
   1285 		/* arm rto timer (if not set) */
   1286 		if (!fp->timer_running)
   1287 			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
   1288 		notsent = B_FALSE;
   1289 	}
   1290 	sctp->sctp_active = now;
   1291 	return;
   1292 unsent_data:
   1293 	/* arm persist timer (if rto timer not set) */
   1294 	if (!fp->timer_running)
   1295 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
   1296 	if (head != NULL)
   1297 		freemsg(head);
   1298 }
   1299 
   1300 /*
   1301  * The following two functions initialize and destroy the cache
   1302  * associated with the sets used for PR-SCTP.
   1303  */
   1304 void
   1305 sctp_ftsn_sets_init(void)
   1306 {
   1307 	sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
   1308 	    sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
   1309 	    NULL, 0);
   1310 }
   1311 
   1312 void
   1313 sctp_ftsn_sets_fini(void)
   1314 {
   1315 	kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
   1316 }
   1317 
   1318 
   1319 /* Free PR-SCTP sets */
   1320 void
   1321 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
   1322 {
   1323 	sctp_ftsn_set_t *p;
   1324 
   1325 	while (s != NULL) {
   1326 		p = s->next;
   1327 		s->next = NULL;
   1328 		kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
   1329 		s = p;
   1330 	}
   1331 }
   1332 
   1333 /*
   1334  * Given a message meta block, meta, this routine creates or modifies
   1335  * the set that will be used to generate a Forward TSN chunk. If the
   1336  * entry for stream id, sid, for this message already exists, the
   1337  * sequence number, ssn, is updated if it is greater than the existing
   1338  * one. If an entry for this sid does not exist, one is created if
   1339  * the size does not exceed fp->sfa_pmss. We return false in case
   1340  * or an error.
   1341  */
   1342 boolean_t
   1343 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
   1344     uint_t *nsets, uint32_t *slen)
   1345 {
   1346 	sctp_ftsn_set_t		*p;
   1347 	sctp_msg_hdr_t		*msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
   1348 	uint16_t		sid = htons(msg_hdr->smh_sid);
   1349 	/* msg_hdr->smh_ssn is already in NBO */
   1350 	uint16_t		ssn = msg_hdr->smh_ssn;
   1351 
   1352 	ASSERT(s != NULL && nsets != NULL);
   1353 	ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
   1354 
   1355 	if (*s == NULL) {
   1356 		ASSERT((*slen + sizeof (uint32_t)) <= fp->sfa_pmss);
   1357 		*s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
   1358 		if (*s == NULL)
   1359 			return (B_FALSE);
   1360 		(*s)->ftsn_entries.ftsn_sid = sid;
   1361 		(*s)->ftsn_entries.ftsn_ssn = ssn;
   1362 		(*s)->next = NULL;
   1363 		*nsets = 1;
   1364 		*slen += sizeof (uint32_t);
   1365 		return (B_TRUE);
   1366 	}
   1367 	for (p = *s; p->next != NULL; p = p->next) {
   1368 		if (p->ftsn_entries.ftsn_sid == sid) {
   1369 			if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
   1370 				p->ftsn_entries.ftsn_ssn = ssn;
   1371 			return (B_TRUE);
   1372 		}
   1373 	}
   1374 	/* the last one */
   1375 	if (p->ftsn_entries.ftsn_sid == sid) {
   1376 		if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
   1377 			p->ftsn_entries.ftsn_ssn = ssn;
   1378 	} else {
   1379 		if ((*slen + sizeof (uint32_t)) > fp->sfa_pmss)
   1380 			return (B_FALSE);
   1381 		p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
   1382 		    KM_NOSLEEP);
   1383 		if (p->next == NULL)
   1384 			return (B_FALSE);
   1385 		p = p->next;
   1386 		p->ftsn_entries.ftsn_sid = sid;
   1387 		p->ftsn_entries.ftsn_ssn = ssn;
   1388 		p->next = NULL;
   1389 		(*nsets)++;
   1390 		*slen += sizeof (uint32_t);
   1391 	}
   1392 	return (B_TRUE);
   1393 }
   1394 
   1395 /*
   1396  * Given a set of stream id - sequence number pairs, this routing creates
   1397  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
   1398  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
   1399  * will add the IP/SCTP header.
   1400  */
   1401 mblk_t *
   1402 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
   1403     uint_t nsets, uint32_t seglen)
   1404 {
   1405 	mblk_t			*ftsn_mp;
   1406 	sctp_chunk_hdr_t	*ch_hdr;
   1407 	uint32_t		*advtsn;
   1408 	uint16_t		schlen;
   1409 	size_t			xtralen;
   1410 	ftsn_entry_t		*ftsn_entry;
   1411 	sctp_stack_t	*sctps = sctp->sctp_sctps;
   1412 
   1413 	seglen += sizeof (sctp_chunk_hdr_t);
   1414 	if (fp->isv4)
   1415 		xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
   1416 	else
   1417 		xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
   1418 	ftsn_mp = allocb_cred(xtralen + seglen, CONN_CRED(sctp->sctp_connp),
   1419 	    sctp->sctp_cpid);
   1420 	if (ftsn_mp == NULL)
   1421 		return (NULL);
   1422 	ftsn_mp->b_rptr += xtralen;
   1423 	ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
   1424 
   1425 	ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
   1426 	ch_hdr->sch_id = CHUNK_FORWARD_TSN;
   1427 	ch_hdr->sch_flags = 0;
   1428 	/*
   1429 	 * The cast here should not be an issue since seglen is
   1430 	 * the length of the Forward TSN chunk.
   1431 	 */
   1432 	schlen = (uint16_t)seglen;
   1433 	U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
   1434 
   1435 	advtsn = (uint32_t *)(ch_hdr + 1);
   1436 	U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
   1437 	ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
   1438 	while (nsets > 0) {
   1439 		ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
   1440 		ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
   1441 		ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
   1442 		ftsn_entry++;
   1443 		sets = sets->next;
   1444 		nsets--;
   1445 	}
   1446 	return (ftsn_mp);
   1447 }
   1448 
   1449 /*
   1450  * Given a starting message, the routine steps through all the
   1451  * messages whose TSN is less than sctp->sctp_adv_pap and creates
   1452  * ftsn sets. The ftsn sets is then used to create an Forward TSN
   1453  * chunk. All the messages, that have chunks that are included in the
   1454  * ftsn sets, are flagged abandonded. If a message is partially sent
   1455  * and is deemed abandoned, all remaining unsent chunks are marked
   1456  * abandoned and are deducted from sctp_unsent.
   1457  */
   1458 void
   1459 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
   1460     sctp_faddr_t *fp, uint32_t *seglen)
   1461 {
   1462 	mblk_t		*mp1 = mp;
   1463 	mblk_t		*mp_head = mp;
   1464 	mblk_t		*meta_head = meta;
   1465 	mblk_t		*head;
   1466 	sctp_ftsn_set_t	*sets = NULL;
   1467 	uint_t		nsets = 0;
   1468 	uint16_t	clen;
   1469 	sctp_data_hdr_t	*sdc;
   1470 	uint32_t	sacklen;
   1471 	uint32_t	adv_pap = sctp->sctp_adv_pap;
   1472 	uint32_t	unsent = 0;
   1473 	boolean_t	ubit;
   1474 	sctp_stack_t	*sctps = sctp->sctp_sctps;
   1475 
   1476 	*seglen = sizeof (uint32_t);
   1477 
   1478 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
   1479 	while (meta != NULL &&
   1480 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
   1481 		/*
   1482 		 * Skip adding FTSN sets for un-ordered messages as they do
   1483 		 * not have SSNs.
   1484 		 */
   1485 		ubit = SCTP_DATA_GET_UBIT(sdc);
   1486 		if (!ubit &&
   1487 		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
   1488 			meta = NULL;
   1489 			sctp->sctp_adv_pap = adv_pap;
   1490 			goto ftsn_done;
   1491 		}
   1492 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
   1493 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
   1494 			adv_pap = ntohl(sdc->sdh_tsn);
   1495 			mp1 = mp1->b_next;
   1496 		}
   1497 		meta = meta->b_next;
   1498 		if (meta != NULL) {
   1499 			mp1 = meta->b_cont;
   1500 			if (!SCTP_CHUNK_ISSENT(mp1))
   1501 				break;
   1502 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
   1503 		}
   1504 	}
   1505 ftsn_done:
   1506 	/*
   1507 	 * Can't compare with sets == NULL, since we don't add any
   1508 	 * sets for un-ordered messages.
   1509 	 */
   1510 	if (meta == meta_head)
   1511 		return;
   1512 	*nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
   1513 	sctp_free_ftsn_set(sets);
   1514 	if (*nmp == NULL)
   1515 		return;
   1516 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
   1517 		sacklen = 0;
   1518 	} else {
   1519 		sacklen = sizeof (sctp_chunk_hdr_t) +
   1520 		    sizeof (sctp_sack_chunk_t) +
   1521 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
   1522 		if (*seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
   1523 			/* piggybacked SACK doesn't fit */
   1524 			sacklen = 0;
   1525 		} else {
   1526 			fp = sctp->sctp_lastdata;
   1527 		}
   1528 	}
   1529 	head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
   1530 	if (head == NULL) {
   1531 		freemsg(*nmp);
   1532 		*nmp = NULL;
   1533 		SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
   1534 		return;
   1535 	}
   1536 	*seglen += sacklen;
   1537 	*nmp = head;
   1538 
   1539 	/*
   1540 	 * XXXNeed to optimise this, the reason it is done here is so
   1541 	 * that we don't have to undo in case of failure.
   1542 	 */
   1543 	mp1 = mp_head;
   1544 	sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
   1545 	while (meta_head != NULL &&
   1546 	    SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
   1547 		if (!SCTP_IS_MSG_ABANDONED(meta_head))
   1548 			SCTP_MSG_SET_ABANDONED(meta_head);
   1549 		while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
   1550 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
   1551 			if (!SCTP_CHUNK_ISACKED(mp1)) {
   1552 				clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
   1553 				SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
   1554 				    meta_head);
   1555 			}
   1556 			mp1 = mp1->b_next;
   1557 		}
   1558 		while (mp1 != NULL) {
   1559 			sdc = (sctp_data_hdr_t *)mp1->b_rptr;
   1560 			if (!SCTP_CHUNK_ABANDONED(mp1)) {
   1561 				ASSERT(!SCTP_CHUNK_ISSENT(mp1));
   1562 				unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
   1563 				SCTP_ABANDON_CHUNK(mp1);
   1564 			}
   1565 			mp1 = mp1->b_next;
   1566 		}
   1567 		meta_head = meta_head->b_next;
   1568 		if (meta_head != NULL) {
   1569 			mp1 = meta_head->b_cont;
   1570 			if (!SCTP_CHUNK_ISSENT(mp1))
   1571 				break;
   1572 			sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
   1573 		}
   1574 	}
   1575 	if (unsent > 0) {
   1576 		ASSERT(sctp->sctp_unsent >= unsent);
   1577 		sctp->sctp_unsent -= unsent;
   1578 		/*
   1579 		 * Update ULP the amount of queued data, which is
   1580 		 * sent-unack'ed + unsent.
   1581 		 */
   1582 		if (!SCTP_IS_DETACHED(sctp))
   1583 			SCTP_TXQ_UPDATE(sctp);
   1584 	}
   1585 }
   1586 
   1587 /*
   1588  * This function steps through messages starting at meta and checks if
   1589  * the message is abandoned. It stops when it hits an unsent chunk or
   1590  * a message that has all its chunk acked. This is the only place
   1591  * where the sctp_adv_pap is moved forward to indicated abandoned
   1592  * messages.
   1593  */
   1594 void
   1595 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
   1596 {
   1597 	uint32_t	tsn = sctp->sctp_adv_pap;
   1598 	sctp_data_hdr_t	*sdc;
   1599 	sctp_msg_hdr_t	*msg_hdr;
   1600 
   1601 	ASSERT(mp != NULL);
   1602 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1603 	ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
   1604 	msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
   1605 	if (!SCTP_IS_MSG_ABANDONED(meta) &&
   1606 	    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
   1607 		return;
   1608 	}
   1609 	while (meta != NULL) {
   1610 		while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
   1611 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1612 			tsn = ntohl(sdc->sdh_tsn);
   1613 			mp = mp->b_next;
   1614 		}
   1615 		if (mp != NULL)
   1616 			break;
   1617 		/*
   1618 		 * We continue checking for successive messages only if there
   1619 		 * is a chunk marked for retransmission. Else, we might
   1620 		 * end up sending FTSN prematurely for chunks that have been
   1621 		 * sent, but not yet acked.
   1622 		 */
   1623 		if ((meta = meta->b_next) != NULL) {
   1624 			msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
   1625 			if (!SCTP_IS_MSG_ABANDONED(meta) &&
   1626 			    !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
   1627 				break;
   1628 			}
   1629 			for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
   1630 				if (!SCTP_CHUNK_ISSENT(mp)) {
   1631 					sctp->sctp_adv_pap = tsn;
   1632 					return;
   1633 				}
   1634 				if (SCTP_CHUNK_WANT_REXMIT(mp))
   1635 					break;
   1636 			}
   1637 			if (mp == NULL)
   1638 				break;
   1639 		}
   1640 	}
   1641 	sctp->sctp_adv_pap = tsn;
   1642 }
   1643 
   1644 
   1645 /*
   1646  * Determine if we should bundle a data chunk with the chunk being
   1647  * retransmitted.  We bundle if
   1648  *
   1649  * - the chunk is sent to the same destination and unack'ed.
   1650  *
   1651  * OR
   1652  *
   1653  * - the chunk is unsent, i.e. new data.
   1654  */
   1655 #define	SCTP_CHUNK_RX_CANBUNDLE(mp, fp)					\
   1656 	(!SCTP_CHUNK_ABANDONED((mp)) && 				\
   1657 	((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&	\
   1658 	!SCTP_CHUNK_ISACKED(mp))) ||					\
   1659 	(((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
   1660 	SCTP_CHUNK_FLAG_SENT)))
   1661 
   1662 /*
   1663  * Retransmit first segment which hasn't been acked with cumtsn or send
   1664  * a Forward TSN chunk, if appropriate.
   1665  */
   1666 void
   1667 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
   1668 {
   1669 	mblk_t		*mp;
   1670 	mblk_t		*nmp = NULL;
   1671 	mblk_t		*head;
   1672 	mblk_t		*meta = sctp->sctp_xmit_head;
   1673 	mblk_t		*fill;
   1674 	uint32_t	seglen = 0;
   1675 	uint32_t	sacklen;
   1676 	uint16_t	chunklen;
   1677 	int		extra;
   1678 	sctp_data_hdr_t	*sdc;
   1679 	sctp_faddr_t	*fp;
   1680 	uint32_t	adv_pap = sctp->sctp_adv_pap;
   1681 	boolean_t	do_ftsn = B_FALSE;
   1682 	boolean_t	ftsn_check = B_TRUE;
   1683 	uint32_t	first_ua_tsn;
   1684 	sctp_msg_hdr_t	*mhdr;
   1685 	sctp_stack_t	*sctps = sctp->sctp_sctps;
   1686 	int		error;
   1687 
   1688 	while (meta != NULL) {
   1689 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
   1690 			uint32_t	tsn;
   1691 
   1692 			if (!SCTP_CHUNK_ISSENT(mp))
   1693 				goto window_probe;
   1694 			/*
   1695 			 * We break in the following cases -
   1696 			 *
   1697 			 *	if the advanced peer ack point includes the next
   1698 			 *	chunk to be retransmited - possibly the Forward
   1699 			 * 	TSN was lost.
   1700 			 *
   1701 			 *	if we are PRSCTP aware and the next chunk to be
   1702 			 *	retransmitted is now abandoned
   1703 			 *
   1704 			 *	if the next chunk to be retransmitted is for
   1705 			 *	the dest on which the timer went off. (this
   1706 			 *	message is not abandoned).
   1707 			 *
   1708 			 * We check for Forward TSN only for the first
   1709 			 * eligible chunk to be retransmitted. The reason
   1710 			 * being if the first eligible chunk is skipped (say
   1711 			 * it was sent to a destination other than oldfp)
   1712 			 * then we cannot advance the cum TSN via Forward
   1713 			 * TSN chunk.
   1714 			 *
   1715 			 * Also, ftsn_check is B_TRUE only for the first
   1716 			 * eligible chunk, it  will be B_FALSE for all
   1717 			 * subsequent candidate messages for retransmission.
   1718 			 */
   1719 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1720 			tsn = ntohl(sdc->sdh_tsn);
   1721 			if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
   1722 				if (sctp->sctp_prsctp_aware && ftsn_check) {
   1723 					if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
   1724 						ASSERT(sctp->sctp_prsctp_aware);
   1725 						do_ftsn = B_TRUE;
   1726 						goto out;
   1727 					} else {
   1728 						sctp_check_adv_ack_pt(sctp,
   1729 						    meta, mp);
   1730 						if (SEQ_GT(sctp->sctp_adv_pap,
   1731 						    adv_pap)) {
   1732 							do_ftsn = B_TRUE;
   1733 							goto out;
   1734 						}
   1735 					}
   1736 					ftsn_check = B_FALSE;
   1737 				}
   1738 				if (SCTP_CHUNK_DEST(mp) == oldfp)
   1739 					goto out;
   1740 			}
   1741 		}
   1742 		meta = meta->b_next;
   1743 		if (meta != NULL && sctp->sctp_prsctp_aware) {
   1744 			mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
   1745 
   1746 			while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
   1747 			    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
   1748 				meta = meta->b_next;
   1749 			}
   1750 		}
   1751 	}
   1752 window_probe:
   1753 	/*
   1754 	 * Retransmit fired for a destination which didn't have
   1755 	 * any unacked data pending.
   1756 	 */
   1757 	if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
   1758 		/*
   1759 		 * Send a window probe. Inflate frwnd to allow
   1760 		 * sending one segment.
   1761 		 */
   1762 		if (sctp->sctp_frwnd < (oldfp->sfa_pmss - sizeof (*sdc)))
   1763 			sctp->sctp_frwnd = oldfp->sfa_pmss - sizeof (*sdc);
   1764 
   1765 		/* next TSN to send */
   1766 		sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
   1767 
   1768 		/*
   1769 		 * The above sctp_frwnd adjustment is coarse.  The "changed"
   1770 		 * sctp_frwnd may allow us to send more than 1 packet.  So
   1771 		 * tell sctp_output() to send only 1 packet.
   1772 		 */
   1773 		sctp_output(sctp, 1);
   1774 
   1775 		/* Last sent TSN */
   1776 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
   1777 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
   1778 		sctp->sctp_zero_win_probe = B_TRUE;
   1779 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
   1780 	}
   1781 	return;
   1782 out:
   1783 	/*
   1784 	 * After a time out, assume that everything has left the network.  So
   1785 	 * we can clear rxt_unacked for the original peer address.
   1786 	 */
   1787 	oldfp->rxt_unacked = 0;
   1788 
   1789 	/*
   1790 	 * If we were probing for zero window, don't adjust retransmission
   1791 	 * variables, but the timer is still backed off.
   1792 	 */
   1793 	if (sctp->sctp_zero_win_probe) {
   1794 		mblk_t	*pkt;
   1795 		uint_t	pkt_len;
   1796 
   1797 		/*
   1798 		 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
   1799 		 * and sctp_rxt_maxtsn will specify the ZWP packet.
   1800 		 */
   1801 		fp = oldfp;
   1802 		if (oldfp->state != SCTP_FADDRS_ALIVE)
   1803 			fp = sctp_rotate_faddr(sctp, oldfp);
   1804 		pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
   1805 		if (pkt != NULL) {
   1806 			ASSERT(pkt_len <= fp->sfa_pmss);
   1807 			sctp_set_iplen(sctp, pkt);
   1808 			sctp_add_sendq(sctp, pkt);
   1809 		} else {
   1810 			SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
   1811 		}
   1812 
   1813 		/*
   1814 		 * The strikes will be clear by sctp_faddr_alive() when the
   1815 		 * other side sends us an ack.
   1816 		 */
   1817 		oldfp->strikes++;
   1818 		sctp->sctp_strikes++;
   1819 
   1820 		SCTP_CALC_RXT(sctp, oldfp);
   1821 		if (oldfp != fp && oldfp->suna != 0)
   1822 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
   1823 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
   1824 		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
   1825 		return;
   1826 	}
   1827 
   1828 	/*
   1829 	 * Enter slowstart for this destination
   1830 	 */
   1831 	oldfp->ssthresh = oldfp->cwnd / 2;
   1832 	if (oldfp->ssthresh < 2 * oldfp->sfa_pmss)
   1833 		oldfp->ssthresh = 2 * oldfp->sfa_pmss;
   1834 	oldfp->cwnd = oldfp->sfa_pmss;
   1835 	oldfp->pba = 0;
   1836 	fp = sctp_rotate_faddr(sctp, oldfp);
   1837 	ASSERT(fp != NULL);
   1838 	sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1839 
   1840 	first_ua_tsn = ntohl(sdc->sdh_tsn);
   1841 	if (do_ftsn) {
   1842 		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
   1843 		if (nmp == NULL) {
   1844 			sctp->sctp_adv_pap = adv_pap;
   1845 			goto restart_timer;
   1846 		}
   1847 		head = nmp;
   1848 		/*
   1849 		 * Move to the next unabandoned chunk. XXXCheck if meta will
   1850 		 * always be marked abandoned.
   1851 		 */
   1852 		while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
   1853 			meta = meta->b_next;
   1854 		if (meta != NULL)
   1855 			mp = mp->b_cont;
   1856 		else
   1857 			mp = NULL;
   1858 		goto try_bundle;
   1859 	}
   1860 	seglen = ntohs(sdc->sdh_len);
   1861 	chunklen = seglen - sizeof (*sdc);
   1862 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
   1863 		extra = SCTP_ALIGN - extra;
   1864 
   1865 	/* Find out if we need to piggyback SACK. */
   1866 	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
   1867 		sacklen = 0;
   1868 	} else {
   1869 		sacklen = sizeof (sctp_chunk_hdr_t) +
   1870 		    sizeof (sctp_sack_chunk_t) +
   1871 		    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
   1872 		if (seglen + sacklen > sctp->sctp_lastdata->sfa_pmss) {
   1873 			/* piggybacked SACK doesn't fit */
   1874 			sacklen = 0;
   1875 		} else {
   1876 			/*
   1877 			 * OK, we have room to send SACK back.  But we
   1878 			 * should send it back to the last fp where we
   1879 			 * receive data from, unless sctp_lastdata equals
   1880 			 * oldfp, then we should probably not send it
   1881 			 * back to that fp.  Also we should check that
   1882 			 * the fp is alive.
   1883 			 */
   1884 			if (sctp->sctp_lastdata != oldfp &&
   1885 			    sctp->sctp_lastdata->state == SCTP_FADDRS_ALIVE) {
   1886 				fp = sctp->sctp_lastdata;
   1887 			}
   1888 		}
   1889 	}
   1890 
   1891 	/*
   1892 	 * Cancel RTT measurement if the retransmitted TSN is before the
   1893 	 * TSN used for timimg.
   1894 	 */
   1895 	if (sctp->sctp_out_time != 0 &&
   1896 	    SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
   1897 		sctp->sctp_out_time = 0;
   1898 	}
   1899 	/* Clear the counter as the RTT calculation may be off. */
   1900 	fp->rtt_updates = 0;
   1901 	oldfp->rtt_updates = 0;
   1902 
   1903 	/*
   1904 	 * After a timeout, we should change the current faddr so that
   1905 	 * new chunks will be sent to the alternate address.
   1906 	 */
   1907 	sctp_set_faddr_current(sctp, fp);
   1908 
   1909 	nmp = dupmsg(mp);
   1910 	if (nmp == NULL)
   1911 		goto restart_timer;
   1912 	if (extra > 0) {
   1913 		fill = sctp_get_padding(sctp, extra);
   1914 		if (fill != NULL) {
   1915 			linkb(nmp, fill);
   1916 			seglen += extra;
   1917 		} else {
   1918 			freemsg(nmp);
   1919 			goto restart_timer;
   1920 		}
   1921 	}
   1922 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
   1923 	head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
   1924 	if (head == NULL) {
   1925 		freemsg(nmp);
   1926 		SCTP_KSTAT(sctps, sctp_rexmit_failed);
   1927 		goto restart_timer;
   1928 	}
   1929 	seglen += sacklen;
   1930 
   1931 	SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
   1932 
   1933 	mp = mp->b_next;
   1934 
   1935 try_bundle:
   1936 	/* We can at least and at most send 1 packet at timeout. */
   1937 	while (seglen < fp->sfa_pmss) {
   1938 		int32_t new_len;
   1939 
   1940 		/* Go through the list to find more chunks to be bundled. */
   1941 		while (mp != NULL) {
   1942 			/* Check if the chunk can be bundled. */
   1943 			if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
   1944 				break;
   1945 			mp = mp->b_next;
   1946 		}
   1947 		/* Go to the next message. */
   1948 		if (mp == NULL) {
   1949 			for (meta = meta->b_next; meta != NULL;
   1950 			    meta = meta->b_next) {
   1951 				mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
   1952 
   1953 				if (SCTP_IS_MSG_ABANDONED(meta) ||
   1954 				    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
   1955 				    sctp)) {
   1956 					continue;
   1957 				}
   1958 
   1959 				mp = meta->b_cont;
   1960 				goto try_bundle;
   1961 			}
   1962 			/*
   1963 			 * Check if there is a new message which potentially
   1964 			 * could be bundled with this retransmission.
   1965 			 */
   1966 			meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
   1967 			    seglen, fp->sfa_pmss - seglen, NULL);
   1968 			if (error != 0 || meta == NULL) {
   1969 				/* No more chunk to be bundled. */
   1970 				break;
   1971 			} else {
   1972 				goto try_bundle;
   1973 			}
   1974 		}
   1975 
   1976 		sdc = (sctp_data_hdr_t *)mp->b_rptr;
   1977 		new_len = ntohs(sdc->sdh_len);
   1978 		chunklen = new_len - sizeof (*sdc);
   1979 
   1980 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
   1981 			extra = SCTP_ALIGN - extra;
   1982 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
   1983 			break;
   1984 		if ((nmp = dupmsg(mp)) == NULL)
   1985 			break;
   1986 
   1987 		if (extra > 0) {
   1988 			fill = sctp_get_padding(sctp, extra);
   1989 			if (fill != NULL) {
   1990 				linkb(nmp, fill);
   1991 			} else {
   1992 				freemsg(nmp);
   1993 				break;
   1994 			}
   1995 		}
   1996 		linkb(head, nmp);
   1997 
   1998 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
   1999 		SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
   2000 
   2001 		seglen = new_len;
   2002 		mp = mp->b_next;
   2003 	}
   2004 done_bundle:
   2005 	if ((seglen > fp->sfa_pmss) && fp->isv4) {
   2006 		ipha_t *iph = (ipha_t *)head->b_rptr;
   2007 
   2008 		/*
   2009 		 * Path MTU is different from path we thought it would
   2010 		 * be when we created chunks, or IP headers have grown.
   2011 		 * Need to clear the DF bit.
   2012 		 */
   2013 		iph->ipha_fragment_offset_and_flags = 0;
   2014 	}
   2015 	fp->rxt_unacked += seglen;
   2016 
   2017 	dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
   2018 	    "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
   2019 	    seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
   2020 	    (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
   2021 
   2022 	sctp->sctp_rexmitting = B_TRUE;
   2023 	sctp->sctp_rxt_nxttsn = first_ua_tsn;
   2024 	sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
   2025 	sctp_set_iplen(sctp, head);
   2026 	sctp_add_sendq(sctp, head);
   2027 
   2028 	/*
   2029 	 * Restart the oldfp timer with exponential backoff and
   2030 	 * the new fp timer for the retransmitted chunks.
   2031 	 */
   2032 restart_timer:
   2033 	oldfp->strikes++;
   2034 	sctp->sctp_strikes++;
   2035 	SCTP_CALC_RXT(sctp, oldfp);
   2036 	/*
   2037 	 * If there is still some data in the oldfp, restart the
   2038 	 * retransmission timer.  If there is no data, the heartbeat will
   2039 	 * continue to run so it will do its job in checking the reachability
   2040 	 * of the oldfp.
   2041 	 */
   2042 	if (oldfp != fp && oldfp->suna != 0)
   2043 		SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->rto);
   2044 
   2045 	/*
   2046 	 * Should we restart the timer of the new fp?  If there is
   2047 	 * outstanding data to the new fp, the timer should be
   2048 	 * running already.  So restarting it means that the timer
   2049 	 * will fire later for those outstanding data.  But if
   2050 	 * we don't restart it, the timer will fire too early for the
   2051 	 * just retransmitted chunks to the new fp.  The reason is that we
   2052 	 * don't keep a timestamp on when a chunk is retransmitted.
   2053 	 * So when the timer fires, it will just search for the
   2054 	 * chunk with the earliest TSN sent to new fp.  This probably
   2055 	 * is the chunk we just retransmitted.  So for now, let's
   2056 	 * be conservative and restart the timer of the new fp.
   2057 	 */
   2058 	SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
   2059 
   2060 	sctp->sctp_active = lbolt64;
   2061 }
   2062 
   2063 /*
   2064  * This function is called by sctp_ss_rexmit() to create a packet
   2065  * to be retransmitted to the given fp.  The given meta and mp
   2066  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
   2067  * first chunk to be retransmitted.  This is also called when we want
   2068  * to retransmit a zero window probe from sctp_rexmit() or when we
   2069  * want to retransmit the zero window probe after the window has
   2070  * opened from sctp_got_sack().
   2071  */
   2072 mblk_t *
   2073 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
   2074     uint_t *packet_len)
   2075 {
   2076 	uint32_t	seglen = 0;
   2077 	uint16_t	chunklen;
   2078 	int		extra;
   2079 	mblk_t		*nmp;
   2080 	mblk_t		*head;
   2081 	mblk_t		*fill;
   2082 	sctp_data_hdr_t	*sdc;
   2083 	sctp_msg_hdr_t	*mhdr;
   2084 
   2085 	sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
   2086 	seglen = ntohs(sdc->sdh_len);
   2087 	chunklen = seglen - sizeof (*sdc);
   2088 	if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
   2089 		extra = SCTP_ALIGN - extra;
   2090 
   2091 	nmp = dupmsg(*mp);
   2092 	if (nmp == NULL)
   2093 		return (NULL);
   2094 	if (extra > 0) {
   2095 		fill = sctp_get_padding(sctp, extra);
   2096 		if (fill != NULL) {
   2097 			linkb(nmp, fill);
   2098 			seglen += extra;
   2099 		} else {
   2100 			freemsg(nmp);
   2101 			return (NULL);
   2102 		}
   2103 	}
   2104 	SCTP_CHUNK_CLEAR_FLAGS(nmp);
   2105 	head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
   2106 	if (head == NULL) {
   2107 		freemsg(nmp);
   2108 		return (NULL);
   2109 	}
   2110 	SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
   2111 	/*
   2112 	 * Don't update the TSN if we are doing a Zero Win Probe.
   2113 	 */
   2114 	if (!sctp->sctp_zero_win_probe)
   2115 		sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
   2116 	*mp = (*mp)->b_next;
   2117 
   2118 try_bundle:
   2119 	while (seglen < fp->sfa_pmss) {
   2120 		int32_t new_len;
   2121 
   2122 		/*
   2123 		 * Go through the list to find more chunks to be bundled.
   2124 		 * We should only retransmit sent by unack'ed chunks.  Since
   2125 		 * they were sent before, the peer's receive window should
   2126 		 * be able to receive them.
   2127 		 */
   2128 		while (*mp != NULL) {
   2129 			/* Check if the chunk can be bundled. */
   2130 			if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
   2131 				break;
   2132 			*mp = (*mp)->b_next;
   2133 		}
   2134 		/* Go to the next message. */
   2135 		if (*mp == NULL) {
   2136 			for (*meta = (*meta)->b_next; *meta != NULL;
   2137 			    *meta = (*meta)->b_next) {
   2138 				mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
   2139 
   2140 				if (SCTP_IS_MSG_ABANDONED(*meta) ||
   2141 				    SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
   2142 				    sctp)) {
   2143 					continue;
   2144 				}
   2145 
   2146 				*mp = (*meta)->b_cont;
   2147 				goto try_bundle;
   2148 			}
   2149 			/* No more chunk to be bundled. */
   2150 			break;
   2151 		}
   2152 
   2153 		sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
   2154 		/* Don't bundle chunks beyond sctp_rxt_maxtsn. */
   2155 		if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
   2156 			break;
   2157 		new_len = ntohs(sdc->sdh_len);
   2158 		chunklen = new_len - sizeof (*sdc);
   2159 
   2160 		if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
   2161 			extra = SCTP_ALIGN - extra;
   2162 		if ((new_len = seglen + new_len + extra) > fp->sfa_pmss)
   2163 			break;
   2164 		if ((nmp = dupmsg(*mp)) == NULL)
   2165 			break;
   2166 
   2167 		if (extra > 0) {
   2168 			fill = sctp_get_padding(sctp, extra);
   2169 			if (fill != NULL) {
   2170 				linkb(nmp, fill);
   2171 			} else {
   2172 				freemsg(nmp);
   2173 				break;
   2174 			}
   2175 		}
   2176 		linkb(head, nmp);
   2177 
   2178 		SCTP_CHUNK_CLEAR_FLAGS(nmp);
   2179 		SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
   2180 		/*
   2181 		 * Don't update the TSN if we are doing a Zero Win Probe.
   2182 		 */
   2183 		if (!sctp->sctp_zero_win_probe)
   2184 			sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
   2185 
   2186 		seglen = new_len;
   2187 		*mp = (*mp)->b_next;
   2188 	}
   2189 	*packet_len = seglen;
   2190 	fp->rxt_unacked += seglen;
   2191 	return (head);
   2192 }
   2193 
   2194 /*
   2195  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
   2196  * advances the cum_tsn but the cum_tsn is still less than what we have sent
   2197  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
   2198  * SACK.  We retransmit unacked chunks without having to wait for another
   2199  * timeout.  The rationale is that the SACK should not be "partial" if all the
   2200  * lost chunks have been retransmitted.  Since the SACK is "partial,"
   2201  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
   2202  * be missing.  It is better for us to retransmit them now instead
   2203  * of waiting for a timeout.
   2204  */
   2205 void
   2206 sctp_ss_rexmit(sctp_t *sctp)
   2207 {
   2208 	mblk_t		*meta;
   2209 	mblk_t		*mp;
   2210 	mblk_t		*pkt;
   2211 	sctp_faddr_t	*fp;
   2212 	uint_t		pkt_len;
   2213 	uint32_t	tot_wnd;
   2214 	sctp_data_hdr_t	*sdc;
   2215 	int		burst;
   2216 	sctp_stack_t	*sctps = sctp->sctp_sctps;
   2217 
   2218 	ASSERT(!sctp->sctp_zero_win_probe);
   2219 
   2220 	/*
   2221 	 * If the last cum ack is smaller than what we have just
   2222 	 * retransmitted, simply return.
   2223 	 */
   2224 	if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
   2225 		sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
   2226 	else
   2227 		return;
   2228 	ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
   2229 
   2230 	/*
   2231 	 * After a timer fires, sctp_current should be set to the new
   2232 	 * fp where the retransmitted chunks are sent.
   2233 	 */
   2234 	fp = sctp->sctp_current;
   2235 
   2236 	/*
   2237 	 * Since we are retransmitting, we only need to use cwnd to determine
   2238 	 * how much we can send as we were allowed (by peer's receive window)
   2239 	 * to send those retransmitted chunks previously when they are first
   2240 	 * sent.  If we record how much we have retransmitted but
   2241 	 * unacknowledged using rxt_unacked, then the amount we can now send
   2242 	 * is equal to cwnd minus rxt_unacked.
   2243 	 *
   2244 	 * The field rxt_unacked is incremented when we retransmit a packet
   2245 	 * and decremented when we got a SACK acknowledging something.  And
   2246 	 * it is reset when the retransmission timer fires as we assume that
   2247 	 * all packets have left the network after a timeout.  If this
   2248 	 * assumption is not true, it means that after a timeout, we can
   2249 	 * get a SACK acknowledging more than rxt_unacked (its value only
   2250 	 * contains what is retransmitted when the timer fires).  So
   2251 	 * rxt_unacked will become very big (it is an unsiged int so going
   2252 	 * negative means that the value is huge).  This is the reason we
   2253 	 * always send at least 1 MSS bytes.
   2254 	 *
   2255 	 * The reason why we do not have an accurate count is that we
   2256 	 * only know how many packets are outstanding (using the TSN numbers).
   2257 	 * But we do not know how many bytes those packets contain.  To
   2258 	 * have an accurate count, we need to walk through the send list.
   2259 	 * As it is not really important to have an accurate count during
   2260 	 * retransmission, we skip this walk to save some time.  This should
   2261 	 * not make the retransmission too aggressive to cause congestion.
   2262 	 */
   2263 	if (fp->cwnd <= fp->rxt_unacked)
   2264 		tot_wnd = fp->sfa_pmss;
   2265 	else
   2266 		tot_wnd = fp->cwnd - fp->rxt_unacked;
   2267 
   2268 	/* Find the first unack'ed chunk */
   2269 	for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
   2270 		sctp_msg_hdr_t	*mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
   2271 
   2272 		if (SCTP_IS_MSG_ABANDONED(meta) ||
   2273 		    SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
   2274 			continue;
   2275 		}
   2276 
   2277 		for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
   2278 			/* Again, this may not be possible */
   2279 			if (!SCTP_CHUNK_ISSENT(mp))
   2280 				return;
   2281 			sdc = (sctp_data_hdr_t *)mp->b_rptr;
   2282 			if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
   2283 				goto found_msg;
   2284 		}
   2285 	}
   2286 
   2287 	/* Everything is abandoned... */
   2288 	return;
   2289 
   2290 found_msg:
   2291 	if (!fp->timer_running)
   2292 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
   2293 	pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
   2294 	if (pkt == NULL) {
   2295 		SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
   2296 		return;
   2297 	}
   2298 	if ((pkt_len > fp->sfa_pmss) && fp->isv4) {
   2299 		ipha_t	*iph = (ipha_t *)pkt->b_rptr;
   2300 
   2301 		/*
   2302 		 * Path MTU is different from path we thought it would
   2303 		 * be when we created chunks, or IP headers have grown.
   2304 		 *  Need to clear the DF bit.
   2305 		 */
   2306 		iph->ipha_fragment_offset_and_flags = 0;
   2307 	}
   2308 	sctp_set_iplen(sctp, pkt);
   2309 	sctp_add_sendq(sctp, pkt);
   2310 
   2311 	/* Check and see if there is more chunk to be retransmitted. */
   2312 	if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sfa_pmss ||
   2313 	    meta == NULL)
   2314 		return;
   2315 	if (mp == NULL)
   2316 		meta = meta->b_next;
   2317 	if (meta == NULL)
   2318 		return;
   2319 
   2320 	/* Retransmit another packet if the window allows. */
   2321 	for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
   2322 	    meta != NULL && burst > 0; meta = meta->b_next, burst--) {
   2323 		if (mp == NULL)
   2324 			mp = meta->b_cont;
   2325 		for (; mp != NULL; mp = mp->b_next) {
   2326 			/* Again, this may not be possible */
   2327 			if (!SCTP_CHUNK_ISSENT(mp))
   2328 				return;
   2329 			if (!SCTP_CHUNK_ISACKED(mp))
   2330 				goto found_msg;
   2331 		}
   2332 	}
   2333 }
   2334