Home | History | Annotate | Download | only in sctp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <sys/types.h>
     30 #include <sys/systm.h>
     31 #include <sys/stream.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/kmem.h>
     34 #define	_SUN_TPI_VERSION 2
     35 #include <sys/tihdr.h>
     36 #include <sys/socket.h>
     37 #include <sys/strsun.h>
     38 #include <sys/strsubr.h>
     39 
     40 #include <netinet/in.h>
     41 #include <netinet/ip6.h>
     42 #include <netinet/tcp_seq.h>
     43 #include <netinet/sctp.h>
     44 
     45 #include <inet/common.h>
     46 #include <inet/ip.h>
     47 #include <inet/ip6.h>
     48 #include <inet/mib2.h>
     49 #include <inet/ipclassifier.h>
     50 #include <inet/ipp_common.h>
     51 #include <inet/ipsec_impl.h>
     52 #include <inet/sctp_ip.h>
     53 
     54 #include "sctp_impl.h"
     55 #include "sctp_asconf.h"
     56 #include "sctp_addr.h"
     57 
     58 static struct kmem_cache *sctp_kmem_set_cache;
     59 
     60 /*
     61  * PR-SCTP comments.
     62  *
     63  * When we get a valid Forward TSN chunk, we check the fragment list for this
     64  * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
     65  * the next expected SSN to be present in the stream queue, we deliver any
     66  * such stranded messages upstream. We also update the SACK info. appropriately.
     67  * When checking for advancing the cumulative ack (in sctp_cumack()) we must
     68  * check for abandoned chunks and messages. While traversing the tramsmit
     69  * list if we come across an abandoned chunk, we can skip the message (i.e.
     70  * take it out of the (re)transmit list) since this message, and hence this
     71  * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
     72  * unsent chunk for a message this now abandoned we need to check if a
     73  * Forward TSN needs to be sent, this could be a case where we deferred sending
     74  * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
     75  * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
     76  * if we can send a Forward TSN via sctp_check_abandoned_data().
     77  */
     78 void
     79 sctp_free_set(sctp_set_t *s)
     80 {
     81 	sctp_set_t *p;
     82 
     83 	while (s) {
     84 		p = s->next;
     85 		kmem_cache_free(sctp_kmem_set_cache, s);
     86 		s = p;
     87 	}
     88 }
     89 
     90 static void
     91 sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
     92 {
     93 	sctp_set_t *p, *t;
     94 
     95 	if (head == NULL || num == NULL)
     96 		return;
     97 
     98 	ASSERT(*num >= 0);
     99 	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
    100 
    101 	if (*head == NULL) {
    102 		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
    103 		if (*head == NULL)
    104 			return;
    105 		(*head)->prev = (*head)->next = NULL;
    106 		(*head)->begin = tsn;
    107 		(*head)->end = tsn;
    108 		*num = 1;
    109 		return;
    110 	}
    111 
    112 	ASSERT((*head)->prev == NULL);
    113 
    114 	/*
    115 	 * Handle this special case here so we don't have to check
    116 	 * for it each time in the loop.
    117 	 */
    118 	if (SEQ_LT(tsn + 1, (*head)->begin)) {
    119 		/* add a new set, and move the head pointer */
    120 		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
    121 		if (t == NULL)
    122 			return;
    123 		t->next = *head;
    124 		t->prev = NULL;
    125 		(*head)->prev = t;
    126 		t->begin = tsn;
    127 		t->end = tsn;
    128 		(*num)++;
    129 		*head = t;
    130 		return;
    131 	}
    132 
    133 	/*
    134 	 * We need to handle the following cases, where p points to
    135 	 * the current set (as we walk through the loop):
    136 	 *
    137 	 * 1. tsn is entirely less than p; create a new set before p.
    138 	 * 2. tsn borders p from less; coalesce p with tsn.
    139 	 * 3. tsn is withing p; do nothing.
    140 	 * 4. tsn borders p from greater; coalesce p with tsn.
    141 	 * 4a. p may now border p->next from less; if so, coalesce those
    142 	 *    two sets.
    143 	 * 5. tsn is entirely greater then all sets; add a new set at
    144 	 *    the end.
    145 	 */
    146 	for (p = *head; ; p = p->next) {
    147 		if (SEQ_LT(tsn + 1, p->begin)) {
    148 			/* 1: add a new set before p. */
    149 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
    150 			if (t == NULL)
    151 				return;
    152 			t->next = p;
    153 			t->prev = NULL;
    154 			t->begin = tsn;
    155 			t->end = tsn;
    156 			if (p->prev) {
    157 				t->prev = p->prev;
    158 				p->prev->next = t;
    159 			}
    160 			p->prev = t;
    161 			(*num)++;
    162 			return;
    163 		}
    164 
    165 		if ((tsn + 1) == p->begin) {
    166 			/* 2: adjust p->begin */
    167 			p->begin = tsn;
    168 			return;
    169 		}
    170 
    171 		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
    172 			/* 3; do nothing */
    173 			return;
    174 		}
    175 
    176 		if ((p->end + 1) == tsn) {
    177 			/* 4; adjust p->end */
    178 			p->end = tsn;
    179 
    180 			if (p->next != NULL && (tsn + 1) == p->next->begin) {
    181 				/* 4a: coalesce p and p->next */
    182 				t = p->next;
    183 				p->end = t->end;
    184 				p->next = t->next;
    185 				if (t->next != NULL)
    186 					t->next->prev = p;
    187 				kmem_cache_free(sctp_kmem_set_cache, t);
    188 				(*num)--;
    189 			}
    190 			return;
    191 		}
    192 
    193 		if (p->next == NULL) {
    194 			/* 5: add new set at the end */
    195 			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
    196 			if (t == NULL)
    197 				return;
    198 			t->next = NULL;
    199 			t->prev = p;
    200 			t->begin = tsn;
    201 			t->end = tsn;
    202 			p->next = t;
    203 			(*num)++;
    204 			return;
    205 		}
    206 
    207 		if (SEQ_GT(tsn, p->end + 1))
    208 			continue;
    209 	}
    210 }
    211 
    212 static void
    213 sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
    214 {
    215 	sctp_set_t *p, *t;
    216 
    217 	if (head == NULL || *head == NULL || num == NULL)
    218 		return;
    219 
    220 	/* Nothing to remove */
    221 	if (SEQ_LT(end, (*head)->begin))
    222 		return;
    223 
    224 	/* Find out where to start removing sets */
    225 	for (p = *head; p->next; p = p->next) {
    226 		if (SEQ_LEQ(end, p->end))
    227 			break;
    228 	}
    229 
    230 	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
    231 		/* adjust p */
    232 		p->begin = end + 1;
    233 		/* all done */
    234 		if (p == *head)
    235 			return;
    236 	} else if (SEQ_GEQ(end, p->end)) {
    237 		/* remove this set too */
    238 		p = p->next;
    239 	}
    240 
    241 	/* unlink everything before this set */
    242 	t = *head;
    243 	*head = p;
    244 	if (p != NULL && p->prev != NULL) {
    245 		p->prev->next = NULL;
    246 		p->prev = NULL;
    247 	}
    248 
    249 	sctp_free_set(t);
    250 
    251 	/* recount the number of sets */
    252 	*num = 0;
    253 
    254 	for (p = *head; p != NULL; p = p->next)
    255 		(*num)++;
    256 }
    257 
    258 void
    259 sctp_sets_init()
    260 {
    261 	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
    262 	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
    263 	    NULL, 0);
    264 }
    265 
    266 void
    267 sctp_sets_fini()
    268 {
    269 	kmem_cache_destroy(sctp_kmem_set_cache);
    270 }
    271 
    272 sctp_chunk_hdr_t *
    273 sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
    274 {
    275 	sctp_chunk_hdr_t *ch;
    276 	uint16_t ch_len;
    277 
    278 	if (remaining < sizeof (*ch)) {
    279 		return (NULL);
    280 	}
    281 
    282 	ch = (sctp_chunk_hdr_t *)rptr;
    283 	ch_len = ntohs(ch->sch_len);
    284 
    285 	if (ch_len < sizeof (*ch) || remaining < ch_len) {
    286 		return (NULL);
    287 	}
    288 
    289 	return (ch);
    290 }
    291 
    292 sctp_chunk_hdr_t *
    293 sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
    294 {
    295 	int pad;
    296 	uint16_t ch_len;
    297 
    298 	if (!ch) {
    299 		return (NULL);
    300 	}
    301 
    302 	ch_len = ntohs(ch->sch_len);
    303 
    304 	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
    305 		pad = SCTP_ALIGN - pad;
    306 	}
    307 
    308 	*remaining -= (ch_len + pad);
    309 	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
    310 
    311 	return (sctp_first_chunk((uchar_t *)ch, *remaining));
    312 }
    313 
    314 /*
    315  * Attach ancillary data to a received SCTP segments.
    316  * If the source address (fp) is not the primary, send up a
    317  * unitdata_ind so recvfrom() can populate the msg_name field.
    318  * If ancillary data is also requested, we append it to the
    319  * unitdata_req. Otherwise, we just send up an optdata_ind.
    320  */
    321 static int
    322 sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
    323     sctp_faddr_t *fp, ip6_pkt_t *ipp)
    324 {
    325 	struct T_unitdata_ind	*tudi;
    326 	int			optlen;
    327 	int			hdrlen;
    328 	uchar_t			*optptr;
    329 	struct cmsghdr		*cmsg;
    330 	mblk_t			*mp1;
    331 	struct sockaddr_in6	sin_buf[1];
    332 	struct sockaddr_in6	*sin6;
    333 	struct sockaddr_in	*sin4;
    334 	uint_t			addflag = 0;
    335 
    336 	sin4 = NULL;
    337 	sin6 = NULL;
    338 
    339 	optlen = hdrlen = 0;
    340 
    341 	/* Figure out address size */
    342 	if (sctp->sctp_ipversion == IPV4_VERSION) {
    343 		sin4 = (struct sockaddr_in *)sin_buf;
    344 		sin4->sin_family = AF_INET;
    345 		sin4->sin_port = sctp->sctp_fport;
    346 		IN6_V4MAPPED_TO_IPADDR(&fp->faddr, sin4->sin_addr.s_addr);
    347 		hdrlen = sizeof (*tudi) + sizeof (*sin4);
    348 	} else {
    349 		sin6 = sin_buf;
    350 		sin6->sin6_family = AF_INET6;
    351 		sin6->sin6_port = sctp->sctp_fport;
    352 		sin6->sin6_addr = fp->faddr;
    353 		hdrlen = sizeof (*tudi) + sizeof (*sin6);
    354 	}
    355 
    356 	/* If app asked to receive send / recv info */
    357 	if (sctp->sctp_recvsndrcvinfo) {
    358 		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
    359 		if (hdrlen == 0)
    360 			hdrlen = sizeof (struct T_optdata_ind);
    361 	}
    362 
    363 	if (sctp->sctp_ipv6_recvancillary == 0)
    364 		goto noancillary;
    365 
    366 	if ((ipp->ipp_fields & IPPF_IFINDEX) &&
    367 	    ipp->ipp_ifindex != sctp->sctp_recvifindex &&
    368 	    (sctp->sctp_ipv6_recvancillary & SCTP_IPV6_RECVPKTINFO)) {
    369 		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
    370 		if (hdrlen == 0)
    371 			hdrlen = sizeof (struct T_unitdata_ind);
    372 		addflag |= SCTP_IPV6_RECVPKTINFO;
    373 	}
    374 	/* If app asked for hoplimit and it has changed ... */
    375 	if ((ipp->ipp_fields & IPPF_HOPLIMIT) &&
    376 	    ipp->ipp_hoplimit != sctp->sctp_recvhops &&
    377 	    (sctp->sctp_ipv6_recvancillary & SCTP_IPV6_RECVHOPLIMIT)) {
    378 		optlen += sizeof (*cmsg) + sizeof (uint_t);
    379 		if (hdrlen == 0)
    380 			hdrlen = sizeof (struct T_unitdata_ind);
    381 		addflag |= SCTP_IPV6_RECVHOPLIMIT;
    382 	}
    383 	/* If app asked for hopbyhop headers and it has changed ... */
    384 	if ((sctp->sctp_ipv6_recvancillary & SCTP_IPV6_RECVHOPOPTS) &&
    385 	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
    386 	    (ipp->ipp_fields & IPPF_HOPOPTS),
    387 	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
    388 		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
    389 		    sctp->sctp_v6label_len;
    390 		if (hdrlen == 0)
    391 			hdrlen = sizeof (struct T_unitdata_ind);
    392 		addflag |= SCTP_IPV6_RECVHOPOPTS;
    393 		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
    394 		    &sctp->sctp_hopoptslen,
    395 		    (ipp->ipp_fields & IPPF_HOPOPTS),
    396 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
    397 			return (-1);
    398 	}
    399 	/* If app asked for dst headers before routing headers ... */
    400 	if ((sctp->sctp_ipv6_recvancillary & SCTP_IPV6_RECVRTDSTOPTS) &&
    401 	    ip_cmpbuf(sctp->sctp_rtdstopts, sctp->sctp_rtdstoptslen,
    402 	    (ipp->ipp_fields & IPPF_RTDSTOPTS),
    403 	    ipp->ipp_rtdstopts, ipp->ipp_rtdstoptslen)) {
    404 		optlen += sizeof (*cmsg) + ipp->ipp_rtdstoptslen;
    405 		if (hdrlen == 0)
    406 			hdrlen = sizeof (struct T_unitdata_ind);
    407 		addflag |= SCTP_IPV6_RECVRTDSTOPTS;
    408 		if (!ip_allocbuf((void **)&sctp->sctp_rtdstopts,
    409 		    &sctp->sctp_rtdstoptslen,
    410 		    (ipp->ipp_fields & IPPF_RTDSTOPTS),
    411 		    ipp->ipp_rtdstopts, ipp->ipp_rtdstoptslen))
    412 			return (-1);
    413 	}
    414 	/* If app asked for routing headers and it has changed ... */
    415 	if (sctp->sctp_ipv6_recvancillary & SCTP_IPV6_RECVRTHDR) {
    416 		if (ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
    417 		    (ipp->ipp_fields & IPPF_RTHDR),
    418 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
    419 			optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
    420 			if (hdrlen == 0)
    421 				hdrlen = sizeof (struct T_unitdata_ind);
    422 			addflag |= SCTP_IPV6_RECVRTHDR;
    423 			if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
    424 			    &sctp->sctp_rthdrlen,
    425 			    (ipp->ipp_fields & IPPF_RTHDR),
    426 			    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
    427 				return (-1);
    428 		}
    429 	}
    430 	/* If app asked for dest headers and it has changed ... */
    431 	if ((sctp->sctp_ipv6_recvancillary & SCTP_IPV6_RECVDSTOPTS) &&
    432 	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
    433 	    (ipp->ipp_fields & IPPF_DSTOPTS),
    434 	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
    435 		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
    436 		if (hdrlen == 0)
    437 			hdrlen = sizeof (struct T_unitdata_ind);
    438 		addflag |= SCTP_IPV6_RECVDSTOPTS;
    439 		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
    440 		    &sctp->sctp_dstoptslen,
    441 		    (ipp->ipp_fields & IPPF_DSTOPTS),
    442 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
    443 			return (-1);
    444 	}
    445 noancillary:
    446 	/* Nothing to add */
    447 	if (hdrlen == 0)
    448 		return (-1);
    449 
    450 	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
    451 	if (mp1 == NULL)
    452 		return (-1);
    453 	mp1->b_cont = *mp;
    454 	*mp = mp1;
    455 	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
    456 	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
    457 	DB_TYPE(mp1) = M_PROTO;
    458 	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
    459 	tudi->PRIM_type = T_UNITDATA_IND;
    460 	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
    461 	tudi->SRC_offset = sizeof (*tudi);
    462 	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
    463 	tudi->OPT_length = optlen;
    464 	if (sin4) {
    465 		bcopy(sin4, tudi + 1, sizeof (*sin4));
    466 	} else {
    467 		bcopy(sin6, tudi + 1, sizeof (*sin6));
    468 	}
    469 	optptr = (uchar_t *)tudi + tudi->OPT_offset;
    470 
    471 	if (sctp->sctp_recvsndrcvinfo) {
    472 		/* XXX need backout method if memory allocation fails. */
    473 		struct sctp_sndrcvinfo *sri;
    474 
    475 		cmsg = (struct cmsghdr *)optptr;
    476 		cmsg->cmsg_level = IPPROTO_SCTP;
    477 		cmsg->cmsg_type = SCTP_SNDRCV;
    478 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
    479 		optptr += sizeof (*cmsg);
    480 
    481 		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
    482 		ASSERT(OK_32PTR(sri));
    483 		sri->sinfo_stream = ntohs(dcp->sdh_sid);
    484 		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
    485 		if (SCTP_DATA_GET_UBIT(dcp)) {
    486 			sri->sinfo_flags = MSG_UNORDERED;
    487 		} else {
    488 			sri->sinfo_flags = 0;
    489 		}
    490 		sri->sinfo_ppid = dcp->sdh_payload_id;
    491 		sri->sinfo_context = 0;
    492 		sri->sinfo_timetolive = 0;
    493 		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
    494 		sri->sinfo_cumtsn = sctp->sctp_ftsn;
    495 		sri->sinfo_assoc_id = 0;
    496 
    497 		optptr += sizeof (*sri);
    498 	}
    499 
    500 	/*
    501 	 * If app asked for pktinfo and the index has changed ...
    502 	 * Note that the local address never changes for the connection.
    503 	 */
    504 	if (addflag & SCTP_IPV6_RECVPKTINFO) {
    505 		struct in6_pktinfo *pkti;
    506 
    507 		cmsg = (struct cmsghdr *)optptr;
    508 		cmsg->cmsg_level = IPPROTO_IPV6;
    509 		cmsg->cmsg_type = IPV6_PKTINFO;
    510 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
    511 		optptr += sizeof (*cmsg);
    512 
    513 		pkti = (struct in6_pktinfo *)optptr;
    514 		if (sctp->sctp_ipversion == IPV6_VERSION)
    515 			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
    516 		else
    517 			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
    518 			    &pkti->ipi6_addr);
    519 		pkti->ipi6_ifindex = ipp->ipp_ifindex;
    520 		optptr += sizeof (*pkti);
    521 		ASSERT(OK_32PTR(optptr));
    522 		/* Save as "last" value */
    523 		sctp->sctp_recvifindex = ipp->ipp_ifindex;
    524 	}
    525 	/* If app asked for hoplimit and it has changed ... */
    526 	if (addflag & SCTP_IPV6_RECVHOPLIMIT) {
    527 		cmsg = (struct cmsghdr *)optptr;
    528 		cmsg->cmsg_level = IPPROTO_IPV6;
    529 		cmsg->cmsg_type = IPV6_HOPLIMIT;
    530 		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
    531 		optptr += sizeof (*cmsg);
    532 
    533 		*(uint_t *)optptr = ipp->ipp_hoplimit;
    534 		optptr += sizeof (uint_t);
    535 		ASSERT(OK_32PTR(optptr));
    536 		/* Save as "last" value */
    537 		sctp->sctp_recvhops = ipp->ipp_hoplimit;
    538 	}
    539 	if (addflag & SCTP_IPV6_RECVHOPOPTS) {
    540 		cmsg = (struct cmsghdr *)optptr;
    541 		cmsg->cmsg_level = IPPROTO_IPV6;
    542 		cmsg->cmsg_type = IPV6_HOPOPTS;
    543 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
    544 		optptr += sizeof (*cmsg);
    545 
    546 		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
    547 		optptr += ipp->ipp_hopoptslen;
    548 		ASSERT(OK_32PTR(optptr));
    549 		/* Save as last value */
    550 		ip_savebuf((void **)&sctp->sctp_hopopts,
    551 		    &sctp->sctp_hopoptslen,
    552 		    (ipp->ipp_fields & IPPF_HOPOPTS),
    553 		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
    554 	}
    555 	if (addflag & SCTP_IPV6_RECVRTDSTOPTS) {
    556 		cmsg = (struct cmsghdr *)optptr;
    557 		cmsg->cmsg_level = IPPROTO_IPV6;
    558 		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
    559 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rtdstoptslen;
    560 		optptr += sizeof (*cmsg);
    561 
    562 		bcopy(ipp->ipp_rtdstopts, optptr, ipp->ipp_rtdstoptslen);
    563 		optptr += ipp->ipp_rtdstoptslen;
    564 		ASSERT(OK_32PTR(optptr));
    565 		/* Save as last value */
    566 		ip_savebuf((void **)&sctp->sctp_rtdstopts,
    567 		    &sctp->sctp_rtdstoptslen,
    568 		    (ipp->ipp_fields & IPPF_RTDSTOPTS),
    569 		    ipp->ipp_rtdstopts, ipp->ipp_rtdstoptslen);
    570 	}
    571 	if (addflag & SCTP_IPV6_RECVRTHDR) {
    572 		cmsg = (struct cmsghdr *)optptr;
    573 		cmsg->cmsg_level = IPPROTO_IPV6;
    574 		cmsg->cmsg_type = IPV6_RTHDR;
    575 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
    576 		optptr += sizeof (*cmsg);
    577 
    578 		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
    579 		optptr += ipp->ipp_rthdrlen;
    580 		ASSERT(OK_32PTR(optptr));
    581 		/* Save as last value */
    582 		ip_savebuf((void **)&sctp->sctp_rthdr,
    583 		    &sctp->sctp_rthdrlen,
    584 		    (ipp->ipp_fields & IPPF_RTHDR),
    585 		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
    586 	}
    587 	if (addflag & SCTP_IPV6_RECVDSTOPTS) {
    588 		cmsg = (struct cmsghdr *)optptr;
    589 		cmsg->cmsg_level = IPPROTO_IPV6;
    590 		cmsg->cmsg_type = IPV6_DSTOPTS;
    591 		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
    592 		optptr += sizeof (*cmsg);
    593 
    594 		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
    595 		optptr += ipp->ipp_dstoptslen;
    596 		ASSERT(OK_32PTR(optptr));
    597 		/* Save as last value */
    598 		ip_savebuf((void **)&sctp->sctp_dstopts,
    599 		    &sctp->sctp_dstoptslen,
    600 		    (ipp->ipp_fields & IPPF_DSTOPTS),
    601 		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
    602 	}
    603 
    604 	ASSERT(optptr == mp1->b_wptr);
    605 
    606 	return (0);
    607 }
    608 
    609 void
    610 sctp_free_reass(sctp_instr_t *sip)
    611 {
    612 	mblk_t *mp, *mpnext, *mctl;
    613 
    614 	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
    615 		mpnext = mp->b_next;
    616 		mp->b_next = NULL;
    617 		mp->b_prev = NULL;
    618 		if (DB_TYPE(mp) == M_CTL) {
    619 			mctl = mp;
    620 			ASSERT(mp->b_cont != NULL);
    621 			mp = mp->b_cont;
    622 			mctl->b_cont = NULL;
    623 			freeb(mctl);
    624 		}
    625 		freemsg(mp);
    626 	}
    627 }
    628 
    629 /*
    630  * If the series of data fragments of which dmp is a part is successfully
    631  * reassembled, the first mblk in the series is returned. dc is adjusted
    632  * to point at the data chunk in the lead mblk, and b_rptr also points to
    633  * the data chunk; the following mblk's b_rptr's point at the actual payload.
    634  *
    635  * If the series is not yet reassembled, NULL is returned. dc is not changed.
    636  * XXX should probably move this up into the state machine.
    637  */
    638 
    639 /* Fragment list for un-ordered messages. Partial delivery is not supported */
    640 static mblk_t *
    641 sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
    642 {
    643 	mblk_t		*hmp;
    644 	mblk_t		*begin = NULL;
    645 	mblk_t		*end = NULL;
    646 	sctp_data_hdr_t	*qdc;
    647 	uint32_t	ntsn;
    648 	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
    649 #ifdef	DEBUG
    650 	mblk_t		*mp1;
    651 #endif
    652 
    653 	/* First frag. */
    654 	if (sctp->sctp_uo_frags == NULL) {
    655 		sctp->sctp_uo_frags = dmp;
    656 		return (NULL);
    657 	}
    658 	hmp = sctp->sctp_uo_frags;
    659 	/*
    660 	 * Insert the segment according to the TSN, fragmented unordered
    661 	 * chunks are sequenced by TSN.
    662 	 */
    663 	while (hmp != NULL) {
    664 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
    665 		ntsn = ntohl(qdc->sdh_tsn);
    666 		if (SEQ_GT(ntsn, tsn)) {
    667 			if (hmp->b_prev == NULL) {
    668 				dmp->b_next = hmp;
    669 				hmp->b_prev = dmp;
    670 				sctp->sctp_uo_frags = dmp;
    671 			} else {
    672 				dmp->b_next = hmp;
    673 				dmp->b_prev = hmp->b_prev;
    674 				hmp->b_prev->b_next = dmp;
    675 				hmp->b_prev = dmp;
    676 			}
    677 			break;
    678 		}
    679 		if (hmp->b_next == NULL) {
    680 			hmp->b_next = dmp;
    681 			dmp->b_prev = hmp;
    682 			break;
    683 		}
    684 		hmp = hmp->b_next;
    685 	}
    686 	/* check if we completed a msg */
    687 	if (SCTP_DATA_GET_BBIT(*dc)) {
    688 		begin = dmp;
    689 	} else if (SCTP_DATA_GET_EBIT(*dc)) {
    690 		end = dmp;
    691 	}
    692 	/*
    693 	 * We walk consecutive TSNs backwards till we get a seg. with
    694 	 * the B bit
    695 	 */
    696 	if (begin == NULL) {
    697 		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
    698 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
    699 			ntsn = ntohl(qdc->sdh_tsn);
    700 			if ((int32_t)(tsn - ntsn) > 1) {
    701 				return (NULL);
    702 			}
    703 			if (SCTP_DATA_GET_BBIT(qdc)) {
    704 				begin = hmp;
    705 				break;
    706 			}
    707 			tsn = ntsn;
    708 		}
    709 	}
    710 	tsn = ntohl((*dc)->sdh_tsn);
    711 	/*
    712 	 * We walk consecutive TSNs till we get a seg. with the E bit
    713 	 */
    714 	if (end == NULL) {
    715 		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
    716 			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
    717 			ntsn = ntohl(qdc->sdh_tsn);
    718 			if ((int32_t)(ntsn - tsn) > 1) {
    719 				return (NULL);
    720 			}
    721 			if (SCTP_DATA_GET_EBIT(qdc)) {
    722 				end = hmp;
    723 				break;
    724 			}
    725 			tsn = ntsn;
    726 		}
    727 	}
    728 	if (begin == NULL || end == NULL) {
    729 		return (NULL);
    730 	}
    731 	/* Got one!, Remove the msg from the list */
    732 	if (sctp->sctp_uo_frags == begin) {
    733 		ASSERT(begin->b_prev == NULL);
    734 		sctp->sctp_uo_frags = end->b_next;
    735 		if (end->b_next != NULL)
    736 			end->b_next->b_prev = NULL;
    737 	} else {
    738 		begin->b_prev->b_next = end->b_next;
    739 		if (end->b_next != NULL)
    740 			end->b_next->b_prev = begin->b_prev;
    741 	}
    742 	begin->b_prev = NULL;
    743 	end->b_next = NULL;
    744 
    745 	/*
    746 	 * Null out b_next and b_prev and chain using b_cont.
    747 	 */
    748 	dmp = end = begin;
    749 	hmp = begin->b_next;
    750 	*dc = (sctp_data_hdr_t *)begin->b_rptr;
    751 	begin->b_next = NULL;
    752 	while (hmp != NULL) {
    753 		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
    754 		hmp->b_rptr = (uchar_t *)(qdc + 1);
    755 		end = hmp->b_next;
    756 		dmp->b_cont = hmp;
    757 		dmp = hmp;
    758 
    759 		if (end != NULL)
    760 			hmp->b_next = NULL;
    761 		hmp->b_prev = NULL;
    762 		hmp = end;
    763 	}
    764 	BUMP_LOCAL(sctp->sctp_reassmsgs);
    765 #ifdef	DEBUG
    766 	mp1 = begin;
    767 	while (mp1 != NULL) {
    768 		ASSERT(mp1->b_next == NULL);
    769 		ASSERT(mp1->b_prev == NULL);
    770 		mp1 = mp1->b_cont;
    771 	}
    772 #endif
    773 	return (begin);
    774 }
    775 
    776 /*
    777  * Try partial delivery.
    778  */
    779 static mblk_t *
    780 sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
    781     sctp_data_hdr_t **dc)
    782 {
    783 	mblk_t		*first_mp;
    784 	mblk_t		*mp;
    785 	mblk_t		*dmp;
    786 	mblk_t		*qmp;
    787 	mblk_t		*prev;
    788 	sctp_data_hdr_t	*qdc;
    789 	uint32_t	tsn;
    790 
    791 	ASSERT(DB_TYPE(hmp) == M_CTL);
    792 
    793 	dprint(4, ("trypartial: got=%d, needed=%d\n",
    794 	    (int)(srp->got), (int)(srp->needed)));
    795 
    796 	first_mp = hmp->b_cont;
    797 	mp = first_mp;
    798 	qdc = (sctp_data_hdr_t *)mp->b_rptr;
    799 
    800 	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->hasBchunk);
    801 
    802 	tsn = ntohl(qdc->sdh_tsn) + 1;
    803 
    804 	/*
    805 	 * This loop has two exit conditions: the
    806 	 * end of received chunks has been reached, or
    807 	 * there is a break in the sequence. We want
    808 	 * to chop the reassembly list as follows (the
    809 	 * numbers are TSNs):
    810 	 *   10 -> 11 -> 	(end of chunks)
    811 	 *   10 -> 11 -> | 13   (break in sequence)
    812 	 */
    813 	prev = mp;
    814 	mp = mp->b_cont;
    815 	while (mp != NULL) {
    816 		qdc = (sctp_data_hdr_t *)mp->b_rptr;
    817 		if (ntohl(qdc->sdh_tsn) != tsn)
    818 			break;
    819 		prev = mp;
    820 		mp = mp->b_cont;
    821 		tsn++;
    822 	}
    823 	/*
    824 	 * We are sending all the fragments upstream, we have to retain
    825 	 * the srp info for further fragments.
    826 	 */
    827 	if (mp == NULL) {
    828 		dmp = hmp->b_cont;
    829 		hmp->b_cont = NULL;
    830 		srp->nexttsn = tsn;
    831 		srp->msglen = 0;
    832 		srp->needed = 0;
    833 		srp->got = 0;
    834 		srp->partial_delivered = B_TRUE;
    835 		srp->tail = NULL;
    836 	} else {
    837 		dmp = hmp->b_cont;
    838 		hmp->b_cont = mp;
    839 	}
    840 	srp->hasBchunk = B_FALSE;
    841 	/*
    842 	 * mp now points at the last chunk in the sequence,
    843 	 * and prev points to mp's previous in the list.
    844 	 * We chop the list at prev, and convert mp into the
    845 	 * new list head by setting the B bit. Subsequence
    846 	 * fragment deliveries will follow the normal reassembly
    847 	 * path.
    848 	 */
    849 	prev->b_cont = NULL;
    850 	srp->partial_delivered = B_TRUE;
    851 
    852 	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
    853 	    (int)(srp->got), (int)(srp->needed)));
    854 
    855 	/*
    856 	 * Adjust all mblk's except the lead so their rptr's point to the
    857 	 * payload. sctp_data_chunk() will need to process the lead's
    858 	 * data chunk section, so leave it's rptr pointing at the data chunk.
    859 	 */
    860 	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
    861 	if (srp->tail != NULL) {
    862 		srp->got--;
    863 		ASSERT(srp->got != 0);
    864 		if (srp->needed != 0) {
    865 			srp->needed--;
    866 			ASSERT(srp->needed != 0);
    867 		}
    868 		srp->msglen -= ntohs((*dc)->sdh_len);
    869 	}
    870 	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
    871 		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
    872 		qmp->b_rptr = (uchar_t *)(qdc + 1);
    873 
    874 		/*
    875 		 * Deduct the balance from got and needed here, now that
    876 		 * we know we are actually delivering these data.
    877 		 */
    878 		if (srp->tail != NULL) {
    879 			srp->got--;
    880 			ASSERT(srp->got != 0);
    881 			if (srp->needed != 0) {
    882 				srp->needed--;
    883 				ASSERT(srp->needed != 0);
    884 			}
    885 			srp->msglen -= ntohs(qdc->sdh_len);
    886 		}
    887 	}
    888 	ASSERT(srp->msglen == 0);
    889 	BUMP_LOCAL(sctp->sctp_reassmsgs);
    890 
    891 	return (dmp);
    892 }
    893 
    894 /*
    895  * Fragment list for ordered messages.
    896  * If no error occures, error is set to 0. If we run out of memory, error
    897  * is set to 1. If the peer commits a fatal error (like using different
    898  * sequence numbers for the same data fragment series), the association is
    899  * aborted and error is set to 2. tpfinished indicates whether we have
    900  * assembled a complete message, this is used in sctp_data_chunk() to
    901  * see if we can try to send any queued message for this stream.
    902  */
    903 static mblk_t *
    904 sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
    905     sctp_instr_t *sip, boolean_t *tpfinished)
    906 {
    907 	mblk_t		*hmp;
    908 	mblk_t		*pmp;
    909 	mblk_t		*qmp;
    910 	mblk_t		*first_mp;
    911 	sctp_reass_t	*srp;
    912 	sctp_data_hdr_t	*qdc;
    913 	sctp_data_hdr_t	*bdc;
    914 	sctp_data_hdr_t	*edc;
    915 	uint32_t	tsn;
    916 	uint16_t	fraglen = 0;
    917 
    918 	*error = 0;
    919 
    920 	/* find the reassembly queue for this data chunk */
    921 	hmp = qmp = sip->istr_reass;
    922 	for (; hmp != NULL; hmp = hmp->b_next) {
    923 		srp = (sctp_reass_t *)DB_BASE(hmp);
    924 		if (ntohs((*dc)->sdh_ssn) == srp->ssn)
    925 			goto foundit;
    926 		else if (SSN_GT(srp->ssn, ntohs((*dc)->sdh_ssn)))
    927 			break;
    928 		qmp = hmp;
    929 	}
    930 
    931 	/*
    932 	 * Allocate a M_CTL that will contain information about this
    933 	 * fragmented message.
    934 	 */
    935 	if ((pmp = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
    936 		*error = 1;
    937 		return (NULL);
    938 	}
    939 	DB_TYPE(pmp) = M_CTL;
    940 	srp = (sctp_reass_t *)DB_BASE(pmp);
    941 	pmp->b_cont = dmp;
    942 
    943 	if (hmp != NULL) {
    944 		if (sip->istr_reass == hmp) {
    945 			sip->istr_reass = pmp;
    946 			pmp->b_next = hmp;
    947 			pmp->b_prev = NULL;
    948 			hmp->b_prev = pmp;
    949 		} else {
    950 			qmp->b_next = pmp;
    951 			pmp->b_prev = qmp;
    952 			pmp->b_next = hmp;
    953 			hmp->b_prev = pmp;
    954 		}
    955 	} else {
    956 		/* make a new reass head and stick it on the end */
    957 		if (sip->istr_reass == NULL) {
    958 			sip->istr_reass = pmp;
    959 			pmp->b_prev = NULL;
    960 		} else {
    961 			qmp->b_next = pmp;
    962 			pmp->b_prev = qmp;
    963 		}
    964 		pmp->b_next = NULL;
    965 	}
    966 	srp->partial_delivered = B_FALSE;
    967 	srp->ssn = ntohs((*dc)->sdh_ssn);
    968 empty_srp:
    969 	srp->needed = 0;
    970 	srp->got = 1;
    971 	srp->tail = dmp;
    972 	if (SCTP_DATA_GET_BBIT(*dc)) {
    973 		srp->msglen = ntohs((*dc)->sdh_len);
    974 		srp->nexttsn = ntohl((*dc)->sdh_tsn) + 1;
    975 		srp->hasBchunk = B_TRUE;
    976 	} else if (srp->partial_delivered &&
    977 	    srp->nexttsn == ntohl((*dc)->sdh_tsn)) {
    978 		SCTP_DATA_SET_BBIT(*dc);
    979 		/* Last fragment */
    980 		if (SCTP_DATA_GET_EBIT(*dc)) {
    981 			srp->needed = 1;
    982 			goto frag_done;
    983 		}
    984 		srp->hasBchunk = B_TRUE;
    985 		srp->msglen = ntohs((*dc)->sdh_len);
    986 		srp->nexttsn++;
    987 	}
    988 	return (NULL);
    989 foundit:
    990 	/*
    991 	 * else already have a reassembly queue. Insert the new data chunk
    992 	 * in the reassemble queue. Try the tail first, on the assumption
    993 	 * that the fragments are coming in in order.
    994 	 */
    995 	qmp = srp->tail;
    996 
    997 	/*
    998 	 * This means the message was partially delivered.
    999 	 */
   1000 	if (qmp == NULL) {
   1001 		ASSERT(srp->got == 0 && srp->needed == 0 &&
   1002 		    srp->partial_delivered);
   1003 		ASSERT(hmp->b_cont == NULL);
   1004 		hmp->b_cont = dmp;
   1005 		goto empty_srp;
   1006 	}
   1007 	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
   1008 	ASSERT(qmp->b_cont == NULL);
   1009 
   1010 	/* XXXIs it fine to do this just here? */
   1011 	if ((*dc)->sdh_sid != qdc->sdh_sid) {
   1012 		/* our peer is fatally confused; XXX abort the assc */
   1013 		*error = 2;
   1014 		return (NULL);
   1015 	}
   1016 	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(