Home | History | Annotate | Download | only in idm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/conf.h>
     27 #include <sys/stat.h>
     28 #include <sys/file.h>
     29 #include <sys/ddi.h>
     30 #include <sys/sunddi.h>
     31 #include <sys/modctl.h>
     32 #include <sys/priv.h>
     33 #include <sys/cpuvar.h>
     34 #include <sys/socket.h>
     35 #include <sys/strsubr.h>
     36 #include <sys/sysmacros.h>
     37 #include <sys/sdt.h>
     38 #include <netinet/tcp.h>
     39 #include <inet/tcp.h>
     40 #include <sys/socketvar.h>
     41 #include <sys/pathname.h>
     42 #include <sys/fs/snode.h>
     43 #include <sys/fs/dv_node.h>
     44 #include <sys/vnode.h>
     45 #include <netinet/in.h>
     46 #include <net/if.h>
     47 #include <sys/sockio.h>
     48 #include <sys/ksocket.h>
     49 #include <sys/filio.h>		/* FIONBIO */
     50 #include <sys/iscsi_protocol.h>
     51 #include <sys/idm/idm.h>
     52 #include <sys/idm/idm_so.h>
     53 #include <sys/idm/idm_text.h>
     54 
     55 #define	IN_PROGRESS_DELAY	1
     56 
     57 /*
     58  * in6addr_any is currently all zeroes, but use the macro in case this
     59  * ever changes.
     60  */
     61 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
     62 
     63 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
     64 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
     65 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
     66 
     67 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
     68 static void idm_so_conn_destroy_common(idm_conn_t *ic);
     69 static void idm_so_conn_connect_common(idm_conn_t *ic);
     70 
     71 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
     72     boolean_t boot_conn);
     73 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
     74 static void idm_set_tgt_connect_options(ksocket_t so);
     75 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
     76 
     77 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
     78 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
     79     idm_buf_t *idb, uint32_t offset, uint32_t length);
     80 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
     81 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
     82     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
     83 
     84 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
     85     uint32_t ro, uint32_t dlength);
     86 
     87 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
     88     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
     89 
     90 static void idm_so_socket_set_nonblock(struct sonode *node);
     91 static void idm_so_socket_set_block(struct sonode *node);
     92 
     93 /*
     94  * Transport ops prototypes
     95  */
     96 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
     97 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
     98 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
     99 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
    100 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
    101 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
    102 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
    103 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
    104     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
    105 static void idm_so_notice_key_values(idm_conn_t *it,
    106     nvlist_t *negotiated_nvl);
    107 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
    108     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
    109 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
    110     idm_transport_caps_t *caps);
    111 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
    112 static void idm_so_buf_free(idm_buf_t *idb);
    113 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
    114 static void idm_so_buf_teardown(idm_buf_t *idb);
    115 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
    116 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
    117 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
    118 static void idm_so_tgt_svc_offline(idm_svc_t *is);
    119 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
    120 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
    121 static void idm_so_conn_disconnect(idm_conn_t *ic);
    122 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
    123 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
    124 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
    125 
    126 /*
    127  * IDM Native Sockets transport operations
    128  */
    129 static
    130 idm_transport_ops_t idm_so_transport_ops = {
    131 	idm_so_tx,			/* it_tx_pdu */
    132 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
    133 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
    134 	idm_so_rx_datain,		/* it_rx_datain */
    135 	idm_so_rx_rtt,			/* it_rx_rtt */
    136 	idm_so_rx_dataout,		/* it_rx_dataout */
    137 	NULL,				/* it_alloc_conn_rsrc */
    138 	NULL,				/* it_free_conn_rsrc */
    139 	NULL,				/* it_tgt_enable_datamover */
    140 	NULL,				/* it_ini_enable_datamover */
    141 	NULL,				/* it_conn_terminate */
    142 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
    143 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
    144 	idm_so_notice_key_values,	/* it_notice_key_values */
    145 	idm_so_conn_is_capable,		/* it_conn_is_capable */
    146 	idm_so_buf_alloc,		/* it_buf_alloc */
    147 	idm_so_buf_free,		/* it_buf_free */
    148 	idm_so_buf_setup,		/* it_buf_setup */
    149 	idm_so_buf_teardown,		/* it_buf_teardown */
    150 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
    151 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
    152 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
    153 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
    154 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
    155 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
    156 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
    157 	idm_so_ini_conn_create,		/* it_ini_conn_create */
    158 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
    159 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
    160 	idm_so_conn_disconnect,		/* it_ini_conn_disconnect */
    161 	idm_so_declare_key_values	/* it_declare_key_values */
    162 };
    163 
    164 kmutex_t	idm_so_timed_socket_mutex;
    165 /*
    166  * idm_so_init()
    167  * Sockets transport initialization
    168  */
    169 void
    170 idm_so_init(idm_transport_t *it)
    171 {
    172 	/* Cache for IDM Data and R2T Transmit PDU's */
    173 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
    174 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
    175 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
    176 
    177 	/* Cache for IDM Receive PDU's */
    178 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
    179 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
    180 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
    181 
    182 	/* 128k buffer cache */
    183 	idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
    184 	    IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
    185 
    186 	/* Set the sockets transport ops */
    187 	it->it_ops = &idm_so_transport_ops;
    188 
    189 	mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
    190 
    191 }
    192 
    193 /*
    194  * idm_so_fini()
    195  * Sockets transport teardown
    196  */
    197 void
    198 idm_so_fini(void)
    199 {
    200 	kmem_cache_destroy(idm.idm_so_128k_buf_cache);
    201 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
    202 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
    203 	mutex_destroy(&idm_so_timed_socket_mutex);
    204 }
    205 
    206 ksocket_t
    207 idm_socreate(int domain, int type, int protocol)
    208 {
    209 	ksocket_t ks;
    210 
    211 	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
    212 	    CRED())) {
    213 		return (ks);
    214 	} else {
    215 		return (NULL);
    216 	}
    217 }
    218 
    219 /*
    220  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
    221  * reception and transmission.  The sonode still exists but its state
    222  * gets modified to indicate it is no longer connected.  Calls to
    223  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
    224  * regain control of a thread stuck in idm_sorecv.
    225  */
    226 void
    227 idm_soshutdown(ksocket_t so)
    228 {
    229 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
    230 }
    231 
    232 /*
    233  * idm_sodestroy releases all resources associated with a socket previously
    234  * created with idm_socreate.  The socket must be shutdown using
    235  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
    236  * otherwise undefined behavior will result.
    237  */
    238 void
    239 idm_sodestroy(ksocket_t ks)
    240 {
    241 	(void) ksocket_close(ks, CRED());
    242 }
    243 
    244 /*
    245  * Function to compare two addresses in sockaddr_storage format
    246  */
    247 
    248 int
    249 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
    250     const struct sockaddr_storage *cmp_ss2,
    251     boolean_t v4_mapped_as_v4,
    252     boolean_t compare_ports)
    253 {
    254 	struct sockaddr_storage			mapped_v4_ss1, mapped_v4_ss2;
    255 	const struct sockaddr_storage		*ss1, *ss2;
    256 	struct in_addr				*in1, *in2;
    257 	struct in6_addr				*in61, *in62;
    258 	int i;
    259 
    260 	/*
    261 	 * Normalize V4-mapped IPv6 addresses into V4 format if
    262 	 * v4_mapped_as_v4 is B_TRUE.
    263 	 */
    264 	ss1 = cmp_ss1;
    265 	ss2 = cmp_ss2;
    266 	if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
    267 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
    268 		if (IN6_IS_ADDR_V4MAPPED(in61)) {
    269 			bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
    270 			mapped_v4_ss1.ss_family = AF_INET;
    271 			((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
    272 			    ((struct sockaddr_in *)ss1)->sin_port;
    273 			IN6_V4MAPPED_TO_INADDR(in61,
    274 			    &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
    275 			ss1 = &mapped_v4_ss1;
    276 		}
    277 	}
    278 	ss2 = cmp_ss2;
    279 	if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
    280 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
    281 		if (IN6_IS_ADDR_V4MAPPED(in62)) {
    282 			bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
    283 			mapped_v4_ss2.ss_family = AF_INET;
    284 			((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
    285 			    ((struct sockaddr_in *)ss2)->sin_port;
    286 			IN6_V4MAPPED_TO_INADDR(in62,
    287 			    &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
    288 			ss2 = &mapped_v4_ss2;
    289 		}
    290 	}
    291 
    292 	/*
    293 	 * Compare ports, then address family, then ip address
    294 	 */
    295 	if (compare_ports &&
    296 	    (((struct sockaddr_in *)ss1)->sin_port !=
    297 	    ((struct sockaddr_in *)ss2)->sin_port)) {
    298 		if (((struct sockaddr_in *)ss1)->sin_port >
    299 		    ((struct sockaddr_in *)ss2)->sin_port)
    300 			return (1);
    301 		else
    302 			return (-1);
    303 	}
    304 
    305 	/*
    306 	 * ports are the same
    307 	 */
    308 	if (ss1->ss_family != ss2->ss_family) {
    309 		if (ss1->ss_family == AF_INET)
    310 			return (1);
    311 		else
    312 			return (-1);
    313 	}
    314 
    315 	/*
    316 	 * address families are the same
    317 	 */
    318 	if (ss1->ss_family == AF_INET) {
    319 		in1 = &((struct sockaddr_in *)ss1)->sin_addr;
    320 		in2 = &((struct sockaddr_in *)ss2)->sin_addr;
    321 
    322 		if (in1->s_addr > in2->s_addr)
    323 			return (1);
    324 		else if (in1->s_addr < in2->s_addr)
    325 			return (-1);
    326 		else
    327 			return (0);
    328 	} else if (ss1->ss_family == AF_INET6) {
    329 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
    330 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
    331 
    332 		for (i = 0; i < 4; i++) {
    333 			if (in61->s6_addr32[i] > in62->s6_addr32[i])
    334 				return (1);
    335 			else if (in61->s6_addr32[i] < in62->s6_addr32[i])
    336 				return (-1);
    337 		}
    338 		return (0);
    339 	}
    340 
    341 	return (1);
    342 }
    343 
    344 /*
    345  * IP address filter functions to flag addresses that should not
    346  * go out to initiators through discovery.
    347  */
    348 static boolean_t
    349 idm_v4_addr_okay(struct in_addr *in_addr)
    350 {
    351 	in_addr_t addr = ntohl(in_addr->s_addr);
    352 
    353 	if ((INADDR_NONE == addr) ||
    354 	    (IN_MULTICAST(addr)) ||
    355 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
    356 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
    357 		return (B_FALSE);
    358 	}
    359 	return (B_TRUE);
    360 }
    361 
    362 static boolean_t
    363 idm_v6_addr_okay(struct in6_addr *addr6)
    364 {
    365 
    366 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
    367 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
    368 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
    369 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
    370 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
    371 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
    372 		return (B_FALSE);
    373 	}
    374 	return (B_TRUE);
    375 }
    376 
    377 /*
    378  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
    379  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
    380  */
    381 int
    382 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
    383 {
    384 	ksocket_t 		so4, so6;
    385 	struct lifnum		lifn;
    386 	struct lifconf		lifc;
    387 	struct lifreq		*lp;
    388 	int			rval;
    389 	int			numifs;
    390 	int			bufsize;
    391 	void			*buf;
    392 	int			i, j, n, rc;
    393 	struct sockaddr_storage	ss;
    394 	struct sockaddr_in	*sin;
    395 	struct sockaddr_in6	*sin6;
    396 	idm_addr_t		*ip;
    397 	idm_addr_list_t		*ipaddr = NULL;
    398 	int			size_ipaddr;
    399 
    400 	*ipaddr_p = NULL;
    401 	size_ipaddr = 0;
    402 	buf = NULL;
    403 
    404 	/* create an ipv4 and ipv6 UDP socket */
    405 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
    406 		return (0);
    407 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
    408 		idm_sodestroy(so6);
    409 		return (0);
    410 	}
    411 
    412 
    413 retry_count:
    414 	/* snapshot the current number of interfaces */
    415 	lifn.lifn_family = PF_UNSPEC;
    416 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
    417 	lifn.lifn_count = 0;
    418 	/* use vp6 for ioctls with unspecified families by default */
    419 	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
    420 	    != 0) {
    421 		goto cleanup;
    422 	}
    423 
    424 	numifs = lifn.lifn_count;
    425 	if (numifs <= 0) {
    426 		goto cleanup;
    427 	}
    428 
    429 	/* allocate extra room in case more interfaces appear */
    430 	numifs += 10;
    431 
    432 	/* get the interface names and ip addresses */
    433 	bufsize = numifs * sizeof (struct lifreq);
    434 	buf = kmem_alloc(bufsize, KM_SLEEP);
    435 
    436 	lifc.lifc_family = AF_UNSPEC;
    437 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
    438 	lifc.lifc_len = bufsize;
    439 	lifc.lifc_buf = buf;
    440 	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
    441 	if (rc != 0) {
    442 		goto cleanup;
    443 	}
    444 	/* if our extra room is used up, try again */
    445 	if (bufsize <= lifc.lifc_len) {
    446 		kmem_free(buf, bufsize);
    447 		buf = NULL;
    448 		goto retry_count;
    449 	}
    450 	/* calc actual number of ifconfs */
    451 	n = lifc.lifc_len / sizeof (struct lifreq);
    452 
    453 	/* get ip address */
    454 	if (n > 0) {
    455 		size_ipaddr = sizeof (idm_addr_list_t) +
    456 		    (n - 1) * sizeof (idm_addr_t);
    457 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
    458 	} else {
    459 		goto cleanup;
    460 	}
    461 
    462 	/*
    463 	 * Examine the array of interfaces and filter uninteresting ones
    464 	 */
    465 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
    466 
    467 		/*
    468 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
    469 		 */
    470 		ss = lp->lifr_addr;
    471 		/*
    472 		 * fetch the flags using the socket of the correct family
    473 		 */
    474 		switch (ss.ss_family) {
    475 		case AF_INET:
    476 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
    477 			    &rval, CRED());
    478 			break;
    479 		case AF_INET6:
    480 			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
    481 			    &rval, CRED());
    482 			break;
    483 		default:
    484 			continue;
    485 		}
    486 		if (rc == 0) {
    487 			/*
    488 			 * If we got the flags, skip uninteresting
    489 			 * interfaces based on flags
    490 			 */
    491 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
    492 				continue;
    493 			if (lp->lifr_flags &
    494 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
    495 				continue;
    496 		}
    497 
    498 		/* save ip address */
    499 		ip = &ipaddr->al_addrs[j];
    500 		switch (ss.ss_family) {
    501 		case AF_INET:
    502 			sin = (struct sockaddr_in *)&ss;
    503 			if (!idm_v4_addr_okay(&sin->sin_addr))
    504 				continue;
    505 			ip->a_addr.i_addr.in4 = sin->sin_addr;
    506 			ip->a_addr.i_insize = sizeof (struct in_addr);
    507 			break;
    508 		case AF_INET6:
    509 			sin6 = (struct sockaddr_in6 *)&ss;
    510 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
    511 				continue;
    512 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
    513 			ip->a_addr.i_insize = sizeof (struct in6_addr);
    514 			break;
    515 		default:
    516 			continue;
    517 		}
    518 		j++;
    519 	}
    520 
    521 	if (j == 0) {
    522 		/* no valid ifaddr */
    523 		kmem_free(ipaddr, size_ipaddr);
    524 		size_ipaddr = 0;
    525 		ipaddr = NULL;
    526 	} else {
    527 		ipaddr->al_out_cnt = j;
    528 	}
    529 
    530 
    531 cleanup:
    532 	idm_sodestroy(so6);
    533 	idm_sodestroy(so4);
    534 
    535 	if (buf != NULL)
    536 		kmem_free(buf, bufsize);
    537 
    538 	*ipaddr_p = ipaddr;
    539 	return (size_ipaddr);
    540 }
    541 
    542 int
    543 idm_sorecv(ksocket_t so, void *msg, size_t len)
    544 {
    545 	iovec_t iov;
    546 
    547 	ASSERT(so != NULL);
    548 	ASSERT(len != 0);
    549 
    550 	/*
    551 	 * Fill in iovec and receive data
    552 	 */
    553 	iov.iov_base = msg;
    554 	iov.iov_len = len;
    555 
    556 	return (idm_iov_sorecv(so, &iov, 1, len));
    557 }
    558 
    559 /*
    560  * idm_sosendto - Sends a buffered data on a non-connected socket.
    561  *
    562  * This function puts the data provided on the wire by calling sosendmsg.
    563  * It will return only when all the data has been sent or if an error
    564  * occurs.
    565  *
    566  * Returns 0 for success, the socket errno value if sosendmsg fails, and
    567  * -1 if sosendmsg returns success but uio_resid != 0
    568  */
    569 int
    570 idm_sosendto(ksocket_t so, void *buff, size_t len,
    571     struct sockaddr *name, socklen_t namelen)
    572 {
    573 	struct msghdr		msg;
    574 	struct iovec		iov[1];
    575 	int			error;
    576 	size_t			sent = 0;
    577 
    578 	iov[0].iov_base	= buff;
    579 	iov[0].iov_len	= len;
    580 
    581 	/* Initialization of the message header. */
    582 	bzero(&msg, sizeof (msg));
    583 	msg.msg_iov	= iov;
    584 	msg.msg_iovlen	= 1;
    585 	msg.msg_name	= name;
    586 	msg.msg_namelen	= namelen;
    587 
    588 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
    589 		/* Data sent */
    590 		if (sent == len) {
    591 			/* All data sent.  Success. */
    592 			return (0);
    593 		} else {
    594 			/* Not all data was sent.  Failure */
    595 			return (-1);
    596 		}
    597 	}
    598 
    599 	/* Send failed */
    600 	return (error);
    601 }
    602 
    603 /*
    604  * idm_iov_sosend - Sends an iovec on a connection.
    605  *
    606  * This function puts the data provided on the wire by calling sosendmsg.
    607  * It will return only when all the data has been sent or if an error
    608  * occurs.
    609  *
    610  * Returns 0 for success, the socket errno value if sosendmsg fails, and
    611  * -1 if sosendmsg returns success but uio_resid != 0
    612  */
    613 int
    614 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
    615 {
    616 	struct msghdr		msg;
    617 	int			error;
    618 	size_t 			sent = 0;
    619 
    620 	ASSERT(iop != NULL);
    621 
    622 	/* Initialization of the message header. */
    623 	bzero(&msg, sizeof (msg));
    624 	msg.msg_iov	= iop;
    625 	msg.msg_iovlen	= iovlen;
    626 
    627 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
    628 	    == 0) {
    629 		/* Data sent */
    630 		if (sent == total_len) {
    631 			/* All data sent.  Success. */
    632 			return (0);
    633 		} else {
    634 			/* Not all data was sent.  Failure */
    635 			return (-1);
    636 		}
    637 	}
    638 
    639 	/* Send failed */
    640 	return (error);
    641 }
    642 
    643 /*
    644  * idm_iov_sorecv - Receives an iovec from a connection
    645  *
    646  * This function gets the data asked for from the socket.  It will return
    647  * only when all the requested data has been retrieved or if an error
    648  * occurs.
    649  *
    650  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
    651  * -1 if sorecvmsg returns success but uio_resid != 0
    652  */
    653 int
    654 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
    655 {
    656 	struct msghdr		msg;
    657 	int			error;
    658 	size_t			recv;
    659 	int 			flags;
    660 
    661 	ASSERT(iop != NULL);
    662 
    663 	/* Initialization of the message header. */
    664 	bzero(&msg, sizeof (msg));
    665 	msg.msg_iov	= iop;
    666 	msg.msg_iovlen	= iovlen;
    667 	flags		= MSG_WAITALL;
    668 
    669 	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
    670 	    == 0) {
    671 		/* Received data */
    672 		if (recv == total_len) {
    673 			/* All requested data received.  Success */
    674 			return (0);
    675 		} else {
    676 			/*
    677 			 * Not all data was received.  The connection has
    678 			 * probably failed.
    679 			 */
    680 			return (-1);
    681 		}
    682 	}
    683 
    684 	/* Receive failed */
    685 	return (error);
    686 }
    687 
    688 static void
    689 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
    690 {
    691 	int	conn_abort = 10000;
    692 	int	conn_notify = 2000;
    693 	int	abort = 30000;
    694 
    695 	/* Pre-connect socket options */
    696 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
    697 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
    698 	    CRED());
    699 	if (boot_conn == B_FALSE) {
    700 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
    701 		    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
    702 		    CRED());
    703 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
    704 		    TCP_ABORT_THRESHOLD,
    705 		    (char *)&abort, sizeof (int), CRED());
    706 	}
    707 }
    708 
    709 static void
    710 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
    711 {
    712 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
    713 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
    714 	const int	on = 1;
    715 
    716 	/* Set postconnect options */
    717 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
    718 	    (char *)&on, sizeof (int), CRED());
    719 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
    720 	    (char *)&rcvbuf, sizeof (int), CRED());
    721 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
    722 	    (char *)&sndbuf, sizeof (int), CRED());
    723 }
    724 
    725 static void
    726 idm_set_tgt_connect_options(ksocket_t ks)
    727 {
    728 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
    729 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
    730 	const int	on = 1;
    731 
    732 	/* Set connect options */
    733 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
    734 	    (char *)&rcvbuf, sizeof (int), CRED());
    735 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
    736 	    (char *)&sndbuf, sizeof (int), CRED());
    737 	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
    738 	    (char *)&on, sizeof (on), CRED());
    739 }
    740 
    741 static uint32_t
    742 n2h24(const uchar_t *ptr)
    743 {
    744 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
    745 }
    746 
    747 
    748 static idm_status_t
    749 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
    750 {
    751 	iscsi_hdr_t	*bhs;
    752 	uint32_t	hdr_digest_crc;
    753 	uint32_t	crc_calculated;
    754 	void		*new_hdr;
    755 	int		ahslen = 0;
    756 	int		total_len = 0;
    757 	int		iovlen = 0;
    758 	struct iovec	iov[2];
    759 	idm_so_conn_t	*so_conn;
    760 	int		rc;
    761 
    762 	so_conn = ic->ic_transport_private;
    763 
    764 	/*
    765 	 * Read BHS
    766 	 */
    767 	bhs = pdu->isp_hdr;
    768 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
    769 	if (rc != IDM_STATUS_SUCCESS) {
    770 		return (IDM_STATUS_FAIL);
    771 	}
    772 
    773 	/*
    774 	 * Check actual AHS length against the amount available in the buffer
    775 	 */
    776 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
    777 	    (bhs->hlength * sizeof (uint32_t));
    778 	pdu->isp_datalen = n2h24(bhs->dlength);
    779 	if (ic->ic_conn_type == CONN_TYPE_TGT &&
    780 	    pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
    781 		IDM_CONN_LOG(CE_WARN,
    782 		    "idm_sorecvhdr: exceeded the max data segment length");
    783 		return (IDM_STATUS_FAIL);
    784 	}
    785 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
    786 		/* Allocate a new header segment and change the callback */
    787 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
    788 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
    789 		pdu->isp_hdr = new_hdr;
    790 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
    791 
    792 		/*
    793 		 * This callback will restore the expected values after
    794 		 * the RX PDU has been processed.
    795 		 */
    796 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
    797 	}
    798 
    799 	/*
    800 	 * Setup receipt of additional header and header digest (if enabled).
    801 	 */
    802 	if (bhs->hlength > 0) {
    803 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
    804 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
    805 		iov[iovlen].iov_len = ahslen;
    806 		total_len += iov[iovlen].iov_len;
    807 		iovlen++;
    808 	}
    809 
    810 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
    811 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
    812 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
    813 		total_len += iov[iovlen].iov_len;
    814 		iovlen++;
    815 	}
    816 
    817 	if ((iovlen != 0) &&
    818 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
    819 	    total_len) != 0)) {
    820 		return (IDM_STATUS_FAIL);
    821 	}
    822 
    823 	/*
    824 	 * Validate header digest if enabled
    825 	 */
    826 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
    827 		crc_calculated = idm_crc32c(pdu->isp_hdr,
    828 		    sizeof (iscsi_hdr_t) + ahslen);
    829 		if (crc_calculated != hdr_digest_crc) {
    830 			/* Invalid Header Digest */
    831 			return (IDM_STATUS_HEADER_DIGEST);
    832 		}
    833 	}
    834 
    835 	return (0);
    836 }
    837 
    838 /*
    839  * idm_so_ini_conn_create()
    840  * Allocate the sockets transport connection resources.
    841  */
    842 static idm_status_t
    843 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
    844 {
    845 	ksocket_t	so;
    846 	idm_so_conn_t	*so_conn;
    847 	idm_status_t	idmrc;
    848 
    849 	so = idm_socreate(cr->cr_domain, cr->cr_type,
    850 	    cr->cr_protocol);
    851 	if (so == NULL) {
    852 		return (IDM_STATUS_FAIL);
    853 	}
    854 
    855 	/* Bind the socket if configured to do so */
    856 	if (cr->cr_bound) {
    857 		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
    858 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
    859 			idm_sodestroy(so);
    860 			return (IDM_STATUS_FAIL);
    861 		}
    862 	}
    863 
    864 	idmrc = idm_so_conn_create_common(ic, so);
    865 	if (idmrc != IDM_STATUS_SUCCESS) {
    866 		idm_soshutdown(so);
    867 		idm_sodestroy(so);
    868 		return (IDM_STATUS_FAIL);
    869 	}
    870 
    871 	so_conn = ic->ic_transport_private;
    872 	/* Set up socket options */
    873 	idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
    874 
    875 	return (IDM_STATUS_SUCCESS);
    876 }
    877 
    878 /*
    879  * idm_so_ini_conn_destroy()
    880  * Tear down the sockets transport connection resources.
    881  */
    882 static void
    883 idm_so_ini_conn_destroy(idm_conn_t *ic)
    884 {
    885 	idm_so_conn_destroy_common(ic);
    886 }
    887 
    888 /*
    889  * idm_so_ini_conn_connect()
    890  * Establish the connection referred to by the handle previously allocated via
    891  * idm_so_ini_conn_create().
    892  */
    893 static idm_status_t
    894 idm_so_ini_conn_connect(idm_conn_t *ic)
    895 {
    896 	idm_so_conn_t	*so_conn;
    897 	struct sonode	*node = NULL;
    898 	int 		rc;
    899 	clock_t		lbolt, conn_login_max, conn_login_interval;
    900 	boolean_t	nonblock;
    901 
    902 	so_conn = ic->ic_transport_private;
    903 	nonblock = ic->ic_conn_params.nonblock_socket;
    904 	conn_login_max = ic->ic_conn_params.conn_login_max;
    905 	conn_login_interval = ddi_get_lbolt() +
    906 	    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
    907 
    908 	if (nonblock == B_TRUE) {
    909 		node = ((struct sonode *)(so_conn->ic_so));
    910 		/* Set to none block socket mode */
    911 		idm_so_socket_set_nonblock(node);
    912 		do {
    913 			rc = ksocket_connect(so_conn->ic_so,
    914 			    &ic->ic_ini_dst_addr.sin,
    915 			    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
    916 			    CRED());
    917 			if (rc == 0 || rc == EISCONN) {
    918 				/* socket success or already success */
    919 				rc = IDM_STATUS_SUCCESS;
    920 				break;
    921 			}
    922 			if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
    923 			    (rc == ECONNRESET)) {
    924 				/* socket connection timeout or refuse */
    925 				break;
    926 			}
    927 			lbolt = ddi_get_lbolt();
    928 			if (lbolt > conn_login_max) {
    929 				/*
    930 				 * Connection retry timeout,
    931 				 * failed connect to target.
    932 				 */
    933 				break;
    934 			}
    935 			if (lbolt < conn_login_interval) {
    936 				if ((rc == EINPROGRESS) || (rc == EALREADY)) {
    937 					/* TCP connect still in progress */
    938 					delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
    939 					continue;
    940 				} else {
    941 					delay(conn_login_interval - lbolt);
    942 				}
    943 			}
    944 			conn_login_interval = ddi_get_lbolt() +
    945 			    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
    946 		} while (rc != 0);
    947 		/* resume to nonblock mode */
    948 		if (rc == IDM_STATUS_SUCCESS) {
    949 			idm_so_socket_set_block(node);
    950 		}
    951 	} else {
    952 		rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
    953 		    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
    954 	}
    955 
    956 	if (rc != 0) {
    957 		idm_soshutdown(so_conn->ic_so);
    958 		return (IDM_STATUS_FAIL);
    959 	}
    960 
    961 	idm_so_conn_connect_common(ic);
    962 
    963 	idm_set_ini_postconnect_options(so_conn);
    964 
    965 	return (IDM_STATUS_SUCCESS);
    966 }
    967 
    968 idm_status_t
    969 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
    970 {
    971 	idm_status_t	idmrc;
    972 
    973 	idmrc = idm_so_conn_create_common(ic, new_so);
    974 
    975 	return (idmrc);
    976 }
    977 
    978 static void
    979 idm_so_tgt_conn_destroy(idm_conn_t *ic)
    980 {
    981 	idm_so_conn_destroy_common(ic);
    982 }
    983 
    984 /*
    985  * idm_so_tgt_conn_connect()
    986  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
    987  * is invoked from the SM as a result of an inbound connection request.
    988  */
    989 static idm_status_t
    990 idm_so_tgt_conn_connect(idm_conn_t *ic)
    991 {
    992 	idm_so_conn_connect_common(ic);
    993 
    994 	return (IDM_STATUS_SUCCESS);
    995 }
    996 
    997 static idm_status_t
    998 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
    999 {
   1000 	idm_so_conn_t	*so_conn;
   1001 
   1002 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
   1003 	so_conn->ic_so = new_so;
   1004 
   1005 	ic->ic_transport_private = so_conn;
   1006 	ic->ic_transport_hdrlen = 0;
   1007 
   1008 	/* Set the scoreboarding flag on this connection */
   1009 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
   1010 	ic->ic_conn_params.max_recv_dataseglen =
   1011 	    ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
   1012 	ic->ic_conn_params.max_xmit_dataseglen =
   1013 	    ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
   1014 
   1015 	/*
   1016 	 * Initialize tx thread mutex and list
   1017 	 */
   1018 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
   1019 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
   1020 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
   1021 	    offsetof(idm_pdu_t, idm_tx_link));
   1022 
   1023 	return (IDM_STATUS_SUCCESS);
   1024 }
   1025 
   1026 static void
   1027 idm_so_conn_destroy_common(idm_conn_t *ic)
   1028 {
   1029 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
   1030 
   1031 	ic->ic_transport_private = NULL;
   1032 	idm_sodestroy(so_conn->ic_so);
   1033 	list_destroy(&so_conn->ic_tx_list);
   1034 	mutex_destroy(&so_conn->ic_tx_mutex);
   1035 	cv_destroy(&so_conn->ic_tx_cv);
   1036 
   1037 	kmem_free(so_conn, sizeof (idm_so_conn_t));
   1038 }
   1039 
   1040 static void
   1041 idm_so_conn_connect_common(idm_conn_t *ic)
   1042 {
   1043 	idm_so_conn_t	*so_conn;
   1044 	struct sockaddr_in6	t_addr;
   1045 	socklen_t	t_addrlen = 0;
   1046 
   1047 	so_conn = ic->ic_transport_private;
   1048 	bzero(&t_addr, sizeof (struct sockaddr_in6));
   1049 	t_addrlen = sizeof (struct sockaddr_in6);
   1050 
   1051 	/* Set the local and remote addresses in the idm conn handle */
   1052 	(void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
   1053 	    &t_addrlen, CRED());
   1054 	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
   1055 	(void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
   1056 	    &t_addrlen, CRED());
   1057 	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
   1058 
   1059 	mutex_enter(&ic->ic_mutex);
   1060 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
   1061 	    &p0, TS_RUN, minclsyspri);
   1062 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
   1063 	    &p0, TS_RUN, minclsyspri);
   1064 
   1065 	while (so_conn->ic_rx_thread_did == 0 ||
   1066 	    so_conn->ic_tx_thread_did == 0)
   1067 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
   1068 	mutex_exit(&ic->ic_mutex);
   1069 }
   1070 
   1071 /*
   1072  * idm_so_conn_disconnect()
   1073  * Shutdown the socket connection and stop the thread
   1074  */
   1075 static void
   1076 idm_so_conn_disconnect(idm_conn_t *ic)
   1077 {
   1078 	idm_so_conn_t	*so_conn;
   1079 
   1080 	so_conn = ic->ic_transport_private;
   1081 
   1082 	mutex_enter(&ic->ic_mutex);
   1083 	so_conn->ic_rx_thread_running = B_FALSE;
   1084 	so_conn->ic_tx_thread_running = B_FALSE;
   1085 	/* We need to wakeup the TX thread */
   1086 	mutex_enter(&so_conn->ic_tx_mutex);
   1087 	cv_signal(&so_conn->ic_tx_cv);
   1088 	mutex_exit(&so_conn->ic_tx_mutex);
   1089 	mutex_exit(&ic->ic_mutex);
   1090 
   1091 	/* This should wakeup the RX thread if it is sleeping */
   1092 	idm_soshutdown(so_conn->ic_so);
   1093 
   1094 	thread_join(so_conn->ic_tx_thread_did);
   1095 	thread_join(so_conn->ic_rx_thread_did);
   1096 }
   1097 
   1098 /*
   1099  * idm_so_tgt_svc_create()
   1100  * Establish a service on an IP address and port.  idm_svc_req_t contains
   1101  * the service parameters.
   1102  */
   1103 /*ARGSUSED*/
   1104 static idm_status_t
   1105 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
   1106 {
   1107 	idm_so_svc_t		*so_svc;
   1108 
   1109 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
   1110 
   1111 	/* Set the new sockets service in svc handle */
   1112 	is->is_so_svc = (void *)so_svc;
   1113 
   1114 	return (IDM_STATUS_SUCCESS);
   1115 }
   1116 
   1117 /*
   1118  * idm_so_tgt_svc_destroy()
   1119  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
   1120  */
   1121 static void
   1122 idm_so_tgt_svc_destroy(idm_svc_t *is)
   1123 {
   1124 	/* the socket will have been torn down; free the service */
   1125 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
   1126 }
   1127 
   1128 /*
   1129  * idm_so_tgt_svc_online()
   1130  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
   1131  */
   1132 
   1133 static idm_status_t
   1134 idm_so_tgt_svc_online(idm_svc_t *is)
   1135 {
   1136 	idm_so_svc_t		*so_svc;
   1137 	idm_svc_req_t		*sr = &is->is_svc_req;
   1138 	struct sockaddr_in6	sin6_ip;
   1139 	const uint32_t		on = 1;
   1140 	const uint32_t		off = 0;
   1141 
   1142 	mutex_enter(&is->is_mutex);
   1143 	so_svc = (idm_so_svc_t *)is->is_so_svc;
   1144 
   1145 	/*
   1146 	 * Try creating an IPv6 socket first
   1147 	 */
   1148 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
   1149 		mutex_exit(&is->is_mutex);
   1150 		return (IDM_STATUS_FAIL);
   1151 	} else {
   1152 		bzero(&sin6_ip, sizeof (sin6_ip));
   1153 		sin6_ip.sin6_family = AF_INET6;
   1154 		sin6_ip.sin6_port = htons(sr->sr_port);
   1155 		sin6_ip.sin6_addr = in6addr_any;
   1156 
   1157 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
   1158 		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
   1159 		/*
   1160 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
   1161 		 */
   1162 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
   1163 		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
   1164 
   1165 		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
   1166 		    sizeof (sin6_ip), CRED()) != 0) {
   1167 			mutex_exit(&is->is_mutex);
   1168 			idm_sodestroy(so_svc->is_so);
   1169 			return (IDM_STATUS_FAIL);
   1170 		}
   1171 	}
   1172 
   1173 	idm_set_tgt_connect_options(so_svc->is_so);
   1174 
   1175 	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
   1176 		mutex_exit(&is->is_mutex);
   1177 		idm_soshutdown(so_svc->is_so);
   1178 		idm_sodestroy(so_svc->is_so);
   1179 		return (IDM_STATUS_FAIL);
   1180 	}
   1181 
   1182 	/* Launch a watch thread */
   1183 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
   1184 	    is, 0, &p0, TS_RUN, minclsyspri);
   1185 
   1186 	if (so_svc->is_thread == NULL) {
   1187 		/* Failure to launch; teardown the socket */
   1188 		mutex_exit(&is->is_mutex);
   1189 		idm_soshutdown(so_svc->is_so);
   1190 		idm_sodestroy(so_svc->is_so);
   1191 		return (IDM_STATUS_FAIL);
   1192 	}
   1193 	ksocket_hold(so_svc->is_so);
   1194 	/* Wait for the port watcher thread to start */
   1195 	while (!so_svc->is_thread_running)
   1196 		cv_wait(&is->is_cv, &is->is_mutex);
   1197 	mutex_exit(&is->is_mutex);
   1198 
   1199 	return (IDM_STATUS_SUCCESS);
   1200 }
   1201 
   1202 /*
   1203  * idm_so_tgt_svc_offline
   1204  *
   1205  * Stop listening on the IP address and port identified by idm_svc_t.
   1206  */
   1207 static void
   1208 idm_so_tgt_svc_offline(idm_svc_t *is)
   1209 {
   1210 	idm_so_svc_t		*so_svc;
   1211 	mutex_enter(&is->is_mutex);
   1212 	so_svc = (idm_so_svc_t *)is->is_so_svc;
   1213 	so_svc->is_thread_running = B_FALSE;
   1214 	mutex_exit(&is->is_mutex);
   1215 
   1216 	/*
   1217 	 * Teardown socket
   1218 	 */
   1219 	idm_sodestroy(so_svc->is_so);
   1220 
   1221 	/*
   1222 	 * Now we expect the port watcher thread to terminate
   1223 	 */
   1224 	thread_join(so_svc->is_thread_did);
   1225 }
   1226 
   1227 /*
   1228  * Watch thread for target service connection establishment.
   1229  */
   1230 void
   1231 idm_so_svc_port_watcher(void *arg)
   1232 {
   1233 	idm_svc_t		*svc = arg;
   1234 	ksocket_t		new_so;
   1235 	idm_conn_t		*ic;
   1236 	idm_status_t		idmrc;
   1237 	idm_so_svc_t		*so_svc;
   1238 	int			rc;
   1239 	const uint32_t		off = 0;
   1240 	struct sockaddr_in6 	t_addr;
   1241 	socklen_t		t_addrlen;
   1242 
   1243 	bzero(&t_addr, sizeof (struct sockaddr_in6));
   1244 	t_addrlen = sizeof (struct sockaddr_in6);
   1245 	mutex_enter(&svc->is_mutex);
   1246 
   1247 	so_svc = svc->is_so_svc;
   1248 	so_svc->is_thread_running = B_TRUE;
   1249 	so_svc->is_thread_did = so_svc->is_thread->t_did;
   1250 
   1251 	cv_signal(&svc->is_cv);
   1252 
   1253 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
   1254 	    svc->is_svc_req.sr_port);
   1255 
   1256 	while (so_svc->is_thread_running) {
   1257 		mutex_exit(&svc->is_mutex);
   1258 
   1259 		if ((rc = ksocket_accept(so_svc->is_so,
   1260 		    (struct sockaddr *)&t_addr, &t_addrlen,
   1261 		    &new_so, CRED())) != 0) {
   1262 			mutex_enter(&svc->is_mutex);
   1263 			if (rc == ECONNABORTED)
   1264 				continue;
   1265 			/* Connection problem */
   1266 			break;
   1267 		}
   1268 		/*
   1269 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
   1270 		 */
   1271 		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
   1272 		    (char *)&off, sizeof (off), CRED());
   1273 
   1274 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
   1275 		    &ic);
   1276 		if (idmrc != IDM_STATUS_SUCCESS) {
   1277 			/* Drop connection */
   1278 			idm_soshutdown(new_so);
   1279 			idm_sodestroy(new_so);
   1280 			mutex_enter(&svc->is_mutex);
   1281 			continue;
   1282 		}
   1283 
   1284 		idmrc = idm_so_tgt_conn_create(ic, new_so);
   1285 		if (idmrc != IDM_STATUS_SUCCESS) {
   1286 			idm_svc_conn_destroy(ic);
   1287 			idm_soshutdown(new_so);
   1288 			idm_sodestroy(new_so);
   1289 			mutex_enter(&svc->is_mutex);
   1290 			continue;
   1291 		}
   1292 
   1293 		/*
   1294 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
   1295 		 * will notify the client (target) about the new connection.
   1296 		 */
   1297 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
   1298 
   1299 		mutex_enter(&svc->is_mutex);
   1300 	}
   1301 	ksocket_rele(so_svc->is_so);
   1302 	so_svc->is_thread_running = B_FALSE;
   1303 	mutex_exit(&svc->is_mutex);
   1304 
   1305 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
   1306 	    svc->is_svc_req.sr_port);
   1307 
   1308 	thread_exit();
   1309 }
   1310 
   1311 /*
   1312  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
   1313  * frees resources associated with the task.
   1314  *
   1315  * It's not clear that this should return idm_status_t.  What do we do
   1316  * if it fails?
   1317  */
   1318 static idm_status_t
   1319 idm_so_free_task_rsrc(idm_task_t *idt)
   1320 {
   1321 	idm_buf_t	*idb, *next_idb;
   1322 
   1323 	/*
   1324 	 * There is nothing to cleanup on initiator connections
   1325 	 */
   1326 	if (IDM_CONN_ISINI(idt->idt_ic))
   1327 		return (IDM_STATUS_SUCCESS);
   1328 
   1329 	/*
   1330 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
   1331 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
   1332 	 *
   1333 	 * In addition, remove any buffers associated with this task from
   1334 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
   1335 	 * items don't actually get removed from that list (and completion
   1336 	 * routines called) until idm_task_cleanup.
   1337 	 */
   1338 	mutex_enter(&idt->idt_mutex);
   1339 
   1340 	for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
   1341 		next_idb = list_next(&idt->idt_outbufv, idb);
   1342 		if (idb->idb_in_transport) {
   1343 			/*
   1344 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
   1345 			 */
   1346 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   1347 			    uintptr_t, idb->idb_buf,
   1348 			    uint32_t, idb->idb_bufoffset,
   1349 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   1350 			    uint32_t, idb->idb_xfer_len,
   1351 			    int, XFER_BUF_RX_FROM_INI);
   1352 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
   1353 			mutex_enter(&idt->idt_mutex);
   1354 		}
   1355 	}
   1356 
   1357 	for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
   1358 		next_idb = list_next(&idt->idt_inbufv, idb);
   1359 		/*
   1360 		 * We want to remove these items from the tx_list as well,
   1361 		 * but knowing it's in the idt_inbufv list is not a guarantee
   1362 		 * that it's in the tx_list.  If it's on the tx list then
   1363 		 * let idm_sotx_thread() clean it up.
   1364 		 */
   1365 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
   1366 			/*
   1367 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
   1368 			 */
   1369 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   1370 			    uintptr_t, idb->idb_buf,
   1371 			    uint32_t, idb->idb_bufoffset,
   1372 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   1373 			    uint32_t, idb->idb_xfer_len,
   1374 			    int, XFER_BUF_TX_TO_INI);
   1375 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
   1376 			mutex_enter(&idt->idt_mutex);
   1377 		}
   1378 	}
   1379 
   1380 	mutex_exit(&idt->idt_mutex);
   1381 
   1382 	return (IDM_STATUS_SUCCESS);
   1383 }
   1384 
   1385 /*
   1386  * idm_so_negotiate_key_values() validates the key values for this connection
   1387  */
   1388 /* ARGSUSED */
   1389 static kv_status_t
   1390 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
   1391     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
   1392 {
   1393 	/* All parameters are negotiated at the iscsit level */
   1394 	return (KV_HANDLED);
   1395 }
   1396 
   1397 /*
   1398  * idm_so_notice_key_values() activates the negotiated key values for
   1399  * this connection.
   1400  */
   1401 static void
   1402 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
   1403 {
   1404 	char			*nvp_name;
   1405 	nvpair_t		*nvp;
   1406 	nvpair_t		*next_nvp;
   1407 	int			nvrc;
   1408 	idm_status_t		idm_status;
   1409 	const idm_kv_xlate_t	*ikvx;
   1410 	uint64_t		num_val;
   1411 
   1412 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
   1413 	    nvp != NULL; nvp = next_nvp) {
   1414 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
   1415 		nvp_name = nvpair_name(nvp);
   1416 
   1417 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
   1418 		switch (ikvx->ik_key_id) {
   1419 		case KI_HEADER_DIGEST:
   1420 		case KI_DATA_DIGEST:
   1421 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
   1422 			ASSERT(idm_status == 0);
   1423 
   1424 			/* Remove processed item from negotiated_nvl list */
   1425 			nvrc = nvlist_remove_all(
   1426 			    negotiated_nvl, ikvx->ik_key_name);
   1427 			ASSERT(nvrc == 0);
   1428 			break;
   1429 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
   1430 			/*
   1431 			 * Just pass the value down to idm layer.
   1432 			 * No need to remove it from negotiated_nvl list here.
   1433 			 */
   1434 			nvrc = nvpair_value_uint64(nvp, &num_val);
   1435 			ASSERT(nvrc == 0);
   1436 			it->ic_conn_params.max_xmit_dataseglen =
   1437 			    (uint32_t)num_val;
   1438 			break;
   1439 		default:
   1440 			break;
   1441 		}
   1442 	}
   1443 }
   1444 
   1445 /*
   1446  * idm_so_declare_key_values() declares the key values for this connection
   1447  */
   1448 /* ARGSUSED */
   1449 static kv_status_t
   1450 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
   1451     nvlist_t *outgoing_nvl)
   1452 {
   1453 	char			*nvp_name;
   1454 	nvpair_t		*nvp;
   1455 	nvpair_t		*next_nvp;
   1456 	kv_status_t		kvrc;
   1457 	int			nvrc = 0;
   1458 	const idm_kv_xlate_t	*ikvx;
   1459 	uint64_t		num_val;
   1460 
   1461 	for (nvp = nvlist_next_nvpair(config_nvl, NULL);
   1462 	    nvp != NULL && nvrc == 0; nvp = next_nvp) {
   1463 		next_nvp = nvlist_next_nvpair(config_nvl, nvp);
   1464 		nvp_name = nvpair_name(nvp);
   1465 
   1466 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
   1467 		switch (ikvx->ik_key_id) {
   1468 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
   1469 			if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
   1470 				break;
   1471 			}
   1472 			if (outgoing_nvl &&
   1473 			    (nvrc = nvlist_add_uint64(outgoing_nvl,
   1474 			    nvp_name, num_val)) != 0) {
   1475 				break;
   1476 			}
   1477 			it->ic_conn_params.max_recv_dataseglen =
   1478 			    (uint32_t)num_val;
   1479 			break;
   1480 		default:
   1481 			break;
   1482 		}
   1483 	}
   1484 	kvrc = idm_nvstat_to_kvstat(nvrc);
   1485 	return (kvrc);
   1486 }
   1487 
   1488 static idm_status_t
   1489 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
   1490     const idm_kv_xlate_t *ikvx)
   1491 {
   1492 	int			nvrc;
   1493 	char			*digest_choice_string;
   1494 
   1495 	nvrc = nvpair_value_string(digest_choice,
   1496 	    &digest_choice_string);
   1497 	ASSERT(nvrc == 0);
   1498 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
   1499 		switch (ikvx->ik_key_id) {
   1500 		case KI_HEADER_DIGEST:
   1501 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
   1502 			break;
   1503 		case KI_DATA_DIGEST:
   1504 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
   1505 			break;
   1506 		default:
   1507 			ASSERT(0);
   1508 			break;
   1509 		}
   1510 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
   1511 		switch (ikvx->ik_key_id) {
   1512 		case KI_HEADER_DIGEST:
   1513 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
   1514 			break;
   1515 		case KI_DATA_DIGEST:
   1516 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
   1517 			break;
   1518 		default:
   1519 			ASSERT(0);
   1520 			break;
   1521 		}
   1522 	} else {
   1523 		ASSERT(0);
   1524 	}
   1525 
   1526 	return (IDM_STATUS_SUCCESS);
   1527 }
   1528 
   1529 
   1530 /*
   1531  * idm_so_conn_is_capable() verifies that the passed connection is provided
   1532  * for by the sockets interface.
   1533  */
   1534 /* ARGSUSED */
   1535 static boolean_t
   1536 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
   1537 {
   1538 	return (B_TRUE);
   1539 }
   1540 
   1541 /*
   1542  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
   1543  * idm_sorecv_scsidata() function invoked earlier actually reads the data
   1544  * off the socket into the appropriate buffers.
   1545  */
   1546 static void
   1547 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
   1548 {
   1549 	iscsi_data_hdr_t	*bhs;
   1550 	idm_task_t		*idt;
   1551 	idm_buf_t		*idb;
   1552 	uint32_t		datasn;
   1553 	size_t			offset;
   1554 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
   1555 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
   1556 
   1557 	ASSERT(ic != NULL);
   1558 	ASSERT(pdu != NULL);
   1559 
   1560 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
   1561 	datasn	= ntohl(bhs->datasn);
   1562 	offset	= ntohl(bhs->offset);
   1563 
   1564 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
   1565 
   1566 	/*
   1567 	 * Look up the task corresponding to the initiator task tag
   1568 	 * to get the buffers affiliated with the task.
   1569 	 */
   1570 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
   1571 	if (idt == NULL) {
   1572 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
   1573 		idm_pdu_rx_protocol_error(ic, pdu);
   1574 		return;
   1575 	}
   1576 
   1577 	idb = pdu->isp_sorx_buf;
   1578 	if (idb == NULL) {
   1579 		IDM_CONN_LOG(CE_WARN,
   1580 		    "idm_so_rx_datain: failed to find buffer");
   1581 		idm_task_rele(idt);
   1582 		idm_pdu_rx_protocol_error(ic, pdu);
   1583 		return;
   1584 	}
   1585 
   1586 	/*
   1587 	 * DataSN values should be sequential and should not have any gaps or
   1588 	 * repetitions. Check the DataSN with the one stored in the task.
   1589 	 */
   1590 	if (datasn == idt->idt_exp_datasn) {
   1591 		idt->idt_exp_datasn++; /* keep track of DataSN received */
   1592 	} else {
   1593 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
   1594 		idm_task_rele(idt);
   1595 		idm_pdu_rx_protocol_error(ic, pdu);
   1596 		return;
   1597 	}
   1598 
   1599 	/*
   1600 	 * PDUs in a sequence should be in continuously increasing
   1601 	 * address offset
   1602 	 */
   1603 	if (offset != idb->idb_exp_offset) {
   1604 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
   1605 		idm_task_rele(idt);
   1606 		idm_pdu_rx_protocol_error(ic, pdu);
   1607 		return;
   1608 	}
   1609 	/* Expected next relative buffer offset */
   1610 	idb->idb_exp_offset += n2h24(bhs->dlength);
   1611 	idt->idt_rx_bytes += n2h24(bhs->dlength);
   1612 
   1613 	idm_task_rele(idt);
   1614 
   1615 	/*
   1616 	 * For now call scsi_rsp which will process the data rsp
   1617 	 * Revisit, need to provide an explicit client entry point for
   1618 	 * phase collapse completions.
   1619 	 */
   1620 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
   1621 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
   1622 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
   1623 	}
   1624 
   1625 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1626 }
   1627 
   1628 /*
   1629  * The idm_so_rx_dataout() function is used by the iSCSI target to read
   1630  * data from the Data-Out PDU sent by the iSCSI initiator.
   1631  *
   1632  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
   1633  * task to get the buffers associated with the PDU. A PDU might span buffers.
   1634  * The data is then read into the respective buffer.
   1635  */
   1636 static void
   1637 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
   1638 {
   1639 
   1640 	iscsi_data_hdr_t	*bhs;
   1641 	idm_task_t		*idt;
   1642 	idm_buf_t		*idb;
   1643 	size_t			offset;
   1644 
   1645 	ASSERT(ic != NULL);
   1646 	ASSERT(pdu != NULL);
   1647 
   1648 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
   1649 	offset = ntohl(bhs->offset);
   1650 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
   1651 
   1652 	/*
   1653 	 * Look up the task corresponding to the initiator task tag
   1654 	 * to get the buffers affiliated with the task.
   1655 	 */
   1656 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
   1657 	if (idt == NULL) {
   1658 		IDM_CONN_LOG(CE_WARN,
   1659 		    "idm_so_rx_dataout: failed to find task");
   1660 		idm_pdu_rx_protocol_error(ic, pdu);
   1661 		return;
   1662 	}
   1663 
   1664 	idb = pdu->isp_sorx_buf;
   1665 	if (idb == NULL) {
   1666 		IDM_CONN_LOG(CE_WARN,
   1667 		    "idm_so_rx_dataout: failed to find buffer");
   1668 		idm_task_rele(idt);
   1669 		idm_pdu_rx_protocol_error(ic, pdu);
   1670 		return;
   1671 	}
   1672 
   1673 	/* Keep track of data transferred - check data offsets */
   1674 	if (offset != idb->idb_exp_offset) {
   1675 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
   1676 		    "%ld, %d", offset, idb->idb_exp_offset);
   1677 		idm_task_rele(idt);
   1678 		idm_pdu_rx_protocol_error(ic, pdu);
   1679 		return;
   1680 	}
   1681 	/* Expected next relative offset */
   1682 	idb->idb_exp_offset += ntoh24(bhs->dlength);
   1683 	idt->idt_rx_bytes += n2h24(bhs->dlength);
   1684 
   1685 	/*
   1686 	 * Call the buffer callback when the transfer is complete
   1687 	 *
   1688 	 * The connection state machine should only abort tasks after
   1689 	 * shutting down the connection so we are assured that there
   1690 	 * won't be a simultaneous attempt to abort this task at the
   1691 	 * same time as we are processing this PDU (due to a connection
   1692 	 * state change).
   1693 	 */
   1694 	if (bhs->flags & ISCSI_FLAG_FINAL) {
   1695 		/*
   1696 		 * We only want to call idm_buf_rx_from_ini_done once
   1697 		 * per transfer.  It's possible that this task has
   1698 		 * already been aborted in which case
   1699 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
   1700 		 * for each buffer with idb_in_transport==B_TRUE.  To
   1701 		 * close this window and ensure that this doesn't happen,
   1702 		 * we'll clear idb->idb_in_transport now while holding
   1703 		 * the task mutex.   This is only really an issue for
   1704 		 * SCSI task abort -- if tasks were being aborted because
   1705 		 * of a connection state change the state machine would
   1706 		 * have already stopped the receive thread.
   1707 		 */
   1708 		mutex_enter(&idt->idt_mutex);
   1709 
   1710 		/*
   1711 		 * Release the task hold here (obtained in idm_task_find)
   1712 		 * because the task may complete synchronously during
   1713 		 * idm_buf_rx_from_ini_done.  Since we still have an active
   1714 		 * buffer we know there is at least one additional hold on idt.
   1715 		 */
   1716 		idm_task_rele(idt);
   1717 
   1718 		/*
   1719 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
   1720 		 */
   1721 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   1722 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   1723 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   1724 		    uint32_t, idb->idb_xfer_len,
   1725 		    int, XFER_BUF_RX_FROM_INI);
   1726 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
   1727 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1728 		return;
   1729 	}
   1730 
   1731 	idm_task_rele(idt);
   1732 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1733 }
   1734 
   1735 /*
   1736  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
   1737  * the R2T PDU sent by the iSCSI target indicating that it is ready to
   1738  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
   1739  * and looks up the task in the task tree using the itt to get the output
   1740  * buffers associated the task. The R2T PDU contains the offset of the
   1741  * requested data and the data length. This function then constructs a
   1742  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
   1743  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
   1744  */
   1745 
   1746 static void
   1747 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
   1748 {
   1749 	idm_task_t		*idt;
   1750 	idm_buf_t		*idb;
   1751 	iscsi_rtt_hdr_t		*rtt_hdr;
   1752 	uint32_t		data_offset;
   1753 	uint32_t		data_length;
   1754 
   1755 	ASSERT(ic != NULL);
   1756 	ASSERT(pdu != NULL);
   1757 
   1758 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
   1759 	data_offset = ntohl(rtt_hdr->data_offset);
   1760 	data_length = ntohl(rtt_hdr->data_length);
   1761 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
   1762 
   1763 	if (idt == NULL) {
   1764 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
   1765 		idm_pdu_rx_protocol_error(ic, pdu);
   1766 		return;
   1767 	}
   1768 
   1769 	/* Find the buffer bound to the task by the iSCSI initiator */
   1770 	mutex_enter(&idt->idt_mutex);
   1771 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
   1772 	if (idb == NULL) {
   1773 		mutex_exit(&idt->idt_mutex);
   1774 		idm_task_rele(idt);
   1775 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
   1776 		idm_pdu_rx_protocol_error(ic, pdu);
   1777 		return;
   1778 	}
   1779 
   1780 	/* return buffer contains this data */
   1781 	if (data_offset + data_length > idb->idb_buflen) {
   1782 		/* Overflow */
   1783 		mutex_exit(&idt->idt_mutex);
   1784 		idm_task_rele(idt);
   1785 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
   1786 		    "buffer");
   1787 		idm_pdu_rx_protocol_error(ic, pdu);
   1788 		return;
   1789 	}
   1790 
   1791 	idt->idt_r2t_ttt = rtt_hdr->ttt;
   1792 	idt->idt_exp_datasn = 0;
   1793 
   1794 	idm_so_send_rtt_data(ic, idt, idb, data_offset,
   1795 	    ntohl(rtt_hdr->data_length));
   1796 	mutex_exit(&idt->idt_mutex);
   1797 
   1798 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1799 	idm_task_rele(idt);
   1800 
   1801 }
   1802 
   1803 idm_status_t
   1804 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
   1805 {
   1806 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
   1807 	int		pad_len;
   1808 	uint32_t	data_digest_crc;
   1809 	uint32_t	crc_calculated;
   1810 	int		total_len;
   1811 	idm_so_conn_t	*so_conn;
   1812 
   1813 	so_conn = ic->ic_transport_private;
   1814 
   1815 	pad_len = ((ISCSI_PAD_WORD_LEN -
   1816 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
   1817 	    (ISCSI_PAD_WORD_LEN - 1));
   1818 
   1819 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
   1820 
   1821 	total_len = pdu->isp_datalen;
   1822 
   1823 	if (pad_len) {
   1824 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
   1825 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
   1826 		total_len		+= pad_len;
   1827 		pdu->isp_iovlen++;
   1828 	}
   1829 
   1830 	/* setup data digest */
   1831 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
   1832 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
   1833 		    (char *)&data_digest_crc;
   1834 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
   1835 		    sizeof (data_digest_crc);
   1836 		total_len		+= sizeof (data_digest_crc);
   1837 		pdu->isp_iovlen++;
   1838 	}
   1839 
   1840 	pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
   1841 
   1842 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
   1843 	    pdu->isp_iovlen, total_len) != 0) {
   1844 		return (IDM_STATUS_IO);
   1845 	}
   1846 
   1847 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
   1848 		crc_calculated = idm_crc32c(pdu->isp_data,
   1849 		    pdu->isp_datalen);
   1850 		if (pad_len) {
   1851 			crc_calculated = idm_crc32c_continued((char *)&pad,
   1852 			    pad_len, crc_calculated);
   1853 		}
   1854 		if (crc_calculated != data_digest_crc) {
   1855 			IDM_CONN_LOG(CE_WARN,
   1856 			    "idm_sorecvdata: "
   1857 			    "CRC error: actual 0x%x, calc 0x%x",
   1858 			    data_digest_crc, crc_calculated);
   1859 
   1860 			/* Invalid Data Digest */
   1861 			return (IDM_STATUS_DATA_DIGEST);
   1862 		}
   1863 	}
   1864 
   1865 	return (IDM_STATUS_SUCCESS);
   1866 }
   1867 
   1868 /*
   1869  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
   1870  * Data-type PDU header must be read into the idm_pdu_t structure prior to
   1871  * calling this function.
   1872  */
   1873 idm_status_t
   1874 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
   1875 {
   1876 	iscsi_data_hdr_t	*bhs;
   1877 	idm_task_t		*task;
   1878 	uint32_t		offset;
   1879 	uint8_t			opcode;
   1880 	uint32_t		dlength;
   1881 	list_t			*buflst;
   1882 	uint32_t		xfer_bytes;
   1883 	idm_status_t		status;
   1884 
   1885 	ASSERT(ic != NULL);
   1886 	ASSERT(pdu != NULL);
   1887 
   1888 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
   1889 
   1890 	offset	= ntohl(bhs->offset);
   1891 	opcode	= bhs->opcode;
   1892 	dlength = n2h24(bhs->dlength);
   1893 
   1894 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
   1895 	    (opcode == ISCSI_OP_SCSI_DATA));
   1896 
   1897 	/*
   1898 	 * Successful lookup implicitly gets a "hold" on the task.  This
   1899 	 * hold must be released before leaving this function.  At one
   1900 	 * point we were caching this task context and retaining the hold
   1901 	 * but it turned out to be very difficult to release the hold properly.
   1902 	 * The task can be aborted and the connection shutdown between this
   1903 	 * call and the subsequent expected call to idm_so_rx_datain/
   1904 	 * idm_so_rx_dataout (in which case those functions are not called).
   1905 	 * Releasing the hold in the PDU callback doesn't work well either
   1906 	 * because the whole task may be completed by then at which point
   1907 	 * it is too late to release the hold -- for better or worse this
   1908 	 * code doesn't wait on the refcnts during normal operation.
   1909 	 * idm_task_find() is very fast and it is not a huge burden if we
   1910 	 * have to do it twice.
   1911 	 */
   1912 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
   1913 	if (task == NULL) {
   1914 		IDM_CONN_LOG(CE_WARN,
   1915 		    "idm_sorecv_scsidata: could not find task");
   1916 		return (IDM_STATUS_FAIL);
   1917 	}
   1918 
   1919 	mutex_enter(&task->idt_mutex);
   1920 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
   1921 	    &task->idt_inbufv : &task->idt_outbufv;
   1922 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
   1923 	mutex_exit(&task->idt_mutex);
   1924 
   1925 	if (pdu->isp_sorx_buf == NULL) {
   1926 		idm_task_rele(task);
   1927 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
   1928 		    "buffer for offset %x opcode=%x",
   1929 		    offset, opcode);
   1930 		return (IDM_STATUS_FAIL);
   1931 	}
   1932 
   1933 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
   1934 	ASSERT(xfer_bytes != 0);
   1935 	if (xfer_bytes != dlength) {
   1936 		idm_task_rele(task);
   1937 		/*
   1938 		 * Buffer overflow, connection error.  The PDU data is still
   1939 		 * sitting in the socket so we can't use the connection
   1940 		 * again until that data is drained.
   1941 		 */
   1942 		return (IDM_STATUS_FAIL);
   1943 	}
   1944 
   1945 	status = idm_sorecvdata(ic, pdu);
   1946 
   1947 	idm_task_rele(task);
   1948 
   1949 	return (status);
   1950 }
   1951 
   1952 static uint32_t
   1953 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
   1954 {
   1955 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
   1956 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
   1957 
   1958 	ASSERT(ro >= idb->idb_bufoffset);
   1959 
   1960 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
   1961 	    (caddr_t)idb->idb_buf + buf_ro;
   1962 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
   1963 	pdu->isp_iovlen++;
   1964 
   1965 	return (xfer_len);
   1966 }
   1967 
   1968 int
   1969 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
   1970 {
   1971 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
   1972 	ASSERT(pdu->isp_data != NULL);
   1973 
   1974 	pdu->isp_databuflen = pdu->isp_datalen;
   1975 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
   1976 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
   1977 	pdu->isp_iovlen = 1;
   1978 	/*
   1979 	 * Since we are associating a new data buffer with this received
   1980 	 * PDU we need to set a specific callback to free the data
   1981 	 * after the PDU is processed.
   1982 	 */
   1983 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
   1984 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
   1985 
   1986 	return (idm_sorecvdata(ic, pdu));
   1987 }
   1988 
   1989 void
   1990 idm_sorx_thread(void *arg)
   1991 {
   1992 	boolean_t	conn_failure = B_FALSE;
   1993 	idm_conn_t	*ic = (idm_conn_t *)arg;
   1994 	idm_so_conn_t	*so_conn;
   1995 	idm_pdu_t	*pdu;
   1996 	idm_status_t	rc;
   1997 
   1998 	idm_conn_hold(ic);
   1999 
   2000 	mutex_enter(&ic->ic_mutex);
   2001 
   2002 	so_conn = ic->ic_transport_private;
   2003 	so_conn->ic_rx_thread_running = B_TRUE;
   2004 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
   2005 	cv_signal(&ic->ic_cv);
   2006 
   2007 	while (so_conn->ic_rx_thread_running) {
   2008 		mutex_exit(&ic->ic_mutex);
   2009 
   2010 		/*
   2011 		 * Get PDU with default header size (large enough for
   2012 		 * BHS plus any anticipated AHS).  PDU from
   2013 		 * the cache will have all values set correctly
   2014 		 * for sockets RX including callback.
   2015 		 */
   2016 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
   2017 		pdu->isp_ic = ic;
   2018 		pdu->isp_flags = 0;
   2019 		pdu->isp_transport_hdrlen = 0;
   2020 
   2021 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
   2022 			/*
   2023 			 * Call idm_pdu_complete so that we call the callback
   2024 			 * and ensure any memory allocated in idm_sorecvhdr
   2025 			 * gets freed up.
   2026 			 */
   2027 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
   2028 
   2029 			/*
   2030 			 * If ic_rx_thread_running is still set then
   2031 			 * this is some kind of connection problem
   2032 			 * on the socket.  In this case we want to
   2033 			 * generate an event.  Otherwise some other
   2034 			 * thread closed the socket due to another
   2035 			 * issue in which case we don't need to
   2036 			 * generate an event.
   2037 			 */
   2038 			mutex_enter(&ic->ic_mutex);
   2039 			if (so_conn->ic_rx_thread_running) {
   2040 				conn_failure = B_TRUE;
   2041 				so_conn->ic_rx_thread_running = B_FALSE;
   2042 			}
   2043 
   2044 			continue;
   2045 		}
   2046 
   2047 		/*
   2048 		 * Header has been read and validated.  Now we need
   2049 		 * to read the PDU data payload (if present).  SCSI data
   2050 		 * need to be transferred from the socket directly into
   2051 		 * the associated transfer buffer for the SCSI task.
   2052 		 */
   2053 		if (pdu->isp_datalen != 0) {
   2054 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
   2055 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
   2056 				rc = idm_sorecv_scsidata(ic, pdu);
   2057 				/*
   2058 				 * All SCSI errors are fatal to the
   2059 				 * connection right now since we have no
   2060 				 * place to put the data.  What we need
   2061 				 * is some kind of sink to dispose of unwanted
   2062 				 * SCSI data.  For example an invalid task tag
   2063 				 * should not kill the connection (although
   2064 				 * we may want to drop the connection).
   2065 				 */
   2066 			} else {
   2067 				/*
   2068 				 * Not data PDUs so allocate a buffer for the
   2069 				 * data segment and read the remaining data.
   2070 				 */
   2071 				rc = idm_sorecv_nonscsidata(ic, pdu);
   2072 			}
   2073 			if (rc != 0) {
   2074 				/*
   2075 				 * Call idm_pdu_complete so that we call the
   2076 				 * callback and ensure any memory allocated
   2077 				 * in idm_sorecvhdr gets freed up.
   2078 				 */
   2079 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
   2080 
   2081 				/*
   2082 				 * If ic_rx_thread_running is still set then
   2083 				 * this is some kind of connection problem
   2084 				 * on the socket.  In this case we want to
   2085 				 * generate an event.  Otherwise some other
   2086 				 * thread closed the socket due to another
   2087 				 * issue in which case we don't need to
   2088 				 * generate an event.
   2089 				 */
   2090 				mutex_enter(&ic->ic_mutex);
   2091 				if (so_conn->ic_rx_thread_running) {
   2092 					conn_failure = B_TRUE;
   2093 					so_conn->ic_rx_thread_running = B_FALSE;
   2094 				}
   2095 				continue;
   2096 			}
   2097 		}
   2098 
   2099 		/*
   2100 		 * Process RX PDU
   2101 		 */
   2102 		idm_pdu_rx(ic, pdu);
   2103 
   2104 		mutex_enter(&ic->ic_mutex);
   2105 	}
   2106 
   2107 	mutex_exit(&ic->ic_mutex);
   2108 
   2109 	/*
   2110 	 * If we dropped out of the RX processing loop because of
   2111 	 * a socket problem or other connection failure (including
   2112 	 * digest errors) then we need to generate a state machine
   2113 	 * event to shut the connection down.
   2114 	 * If the state machine is already in, for example, INIT_ERROR, this
   2115 	 * event will get dropped, and the TX thread will never be notified
   2116 	 * to shut down.  To be safe, we'll just notify it here.
   2117 	 */
   2118 	if (conn_failure) {
   2119 		if (so_conn->ic_tx_thread_running) {
   2120 			so_conn->ic_tx_thread_running = B_FALSE;
   2121 			mutex_enter(&so_conn->ic_tx_mutex);
   2122 			cv_signal(&so_conn->ic_tx_cv);
   2123 			mutex_exit(&so_conn->ic_tx_mutex);
   2124 		}
   2125 
   2126 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
   2127 	}
   2128 
   2129 	idm_conn_rele(ic);
   2130 
   2131 	thread_exit();
   2132 }
   2133 
   2134 /*
   2135  * idm_so_tx
   2136  *
   2137  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
   2138  * point.  By definition, it is supposed to be fast.  So, simply queue
   2139  * the entry and return.  The real work is done by idm_i_so_tx() via
   2140  * idm_sotx_thread().
   2141  */
   2142 
   2143 static void
   2144 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
   2145 {
   2146 	idm_so_conn_t *so_conn = ic->ic_transport_private;
   2147 
   2148 	ASSERT(pdu->isp_ic == ic);
   2149 	mutex_enter(&so_conn->ic_tx_mutex);
   2150 
   2151 	if (!so_conn->ic_tx_thread_running) {
   2152 		mutex_exit(&so_conn->ic_tx_mutex);
   2153 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
   2154 		return;
   2155 	}
   2156 
   2157 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
   2158 	cv_signal(&so_conn->ic_tx_cv);
   2159 	mutex_exit(&so_conn->ic_tx_mutex);
   2160 }
   2161 
   2162 static idm_status_t
   2163 idm_i_so_tx(idm_pdu_t *pdu)
   2164 {
   2165 	idm_conn_t	*ic = pdu->isp_ic;
   2166 	idm_status_t	status = IDM_STATUS_SUCCESS;
   2167 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
   2168 	int		pad_len;
   2169 	uint32_t	hdr_digest_crc;
   2170 	uint32_t	data_digest_crc = 0;
   2171 	int		total_len = 0;
   2172 	int		iovlen = 0;
   2173 	struct iovec	iov[6];
   2174 	idm_so_conn_t	*so_conn;
   2175 
   2176 	so_conn = ic->ic_transport_private;
   2177 
   2178 	/* Setup BHS */
   2179 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
   2180 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
   2181 	total_len		+= iov[iovlen].iov_len;
   2182 	iovlen++;
   2183 
   2184 	/* Setup header digest */
   2185 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
   2186 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
   2187 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
   2188 
   2189 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
   2190 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
   2191 		total_len		+= iov[iovlen].iov_len;
   2192 		iovlen++;
   2193 	}
   2194 
   2195 	/* Setup the data */
   2196 	if (pdu->isp_datalen) {
   2197 		idm_task_t		*idt;
   2198 		idm_buf_t		*idb;
   2199 		iscsi_data_hdr_t	*ihp;
   2200 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
   2201 		/* Write of immediate data */
   2202 		if (ic->ic_ffp &&
   2203 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
   2204 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
   2205 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
   2206 			if (idt) {
   2207 				mutex_enter(&idt->idt_mutex);
   2208 				idb = idm_buf_find(&idt->idt_outbufv, 0);
   2209 				mutex_exit(&idt->idt_mutex);
   2210 				/*
   2211 				 * If the initiator call to idm_buf_alloc
   2212 				 * failed then we can get to this point
   2213 				 * without a bound buffer.  The associated
   2214 				 * connection failure will clean things up
   2215 				 * later.  It would be nice to come up with
   2216 				 * a cleaner way to handle this.  In
   2217 				 * particular it seems absurd to look up
   2218 				 * the task and the buffer just to update
   2219 				 * this counter.
   2220 				 */
   2221 				if (idb)
   2222 					idb->idb_xfer_len += pdu->isp_datalen;
   2223 				idm_task_rele(idt);
   2224 			}
   2225 		}
   2226 
   2227 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
   2228 		iov[iovlen].iov_len  = pdu->isp_datalen;
   2229 		total_len += iov[iovlen].iov_len;
   2230 		iovlen++;
   2231 	}
   2232 
   2233 	/* Setup the data pad if necessary */
   2234 	pad_len = ((ISCSI_PAD_WORD_LEN -
   2235 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
   2236 	    (ISCSI_PAD_WORD_LEN - 1));
   2237 
   2238 	if (pad_len) {
   2239 		bzero(pad, sizeof (pad));
   2240 		iov[iovlen].iov_base = (void *)&pad;
   2241 		iov[iovlen].iov_len  = pad_len;
   2242 		total_len		+= iov[iovlen].iov_len;
   2243 		iovlen++;
   2244 	}
   2245 
   2246 	/*
   2247 	 * Setup the data digest if enabled.  Data-digest is not sent
   2248 	 * for login-phase PDUs.
   2249 	 */
   2250 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
   2251 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
   2252 	    (pdu->isp_datalen || pad_len)) {
   2253 		/*
   2254 		 * RFC3720/10.2.3: A zero-length Data Segment also
   2255 		 * implies a zero-length data digest.
   2256 		 */
   2257 		if (pdu->isp_datalen) {
   2258 			data_digest_crc = idm_crc32c(pdu->isp_data,
   2259 			    pdu->isp_datalen);
   2260 		}
   2261 		if (pad_len) {
   2262 			data_digest_crc = idm_crc32c_continued(&pad,
   2263 			    pad_len, data_digest_crc);
   2264 		}
   2265 
   2266 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
   2267 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
   2268 		total_len		+= iov[iovlen].iov_len;
   2269 		iovlen++;
   2270 	}
   2271 
   2272 	/* Transmit the PDU */
   2273 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
   2274 	    total_len) != 0) {
   2275 		/* Set error status */
   2276 		IDM_CONN_LOG(CE_WARN,
   2277 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
   2278 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
   2279 		    (void *) pdu->isp_data);
   2280 		status = IDM_STATUS_IO;
   2281 	}
   2282 
   2283 	/*
   2284 	 * Success does not mean that the PDU actually reached the
   2285 	 * remote node since it could get dropped along the way.
   2286 	 */
   2287 	idm_pdu_complete(pdu, status);
   2288 
   2289 	return (status);
   2290 }
   2291 
   2292 /*
   2293  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
   2294  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
   2295  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
   2296  * A target can invoke this function multiple times for a single read command
   2297  * (identified by the same ITT) to split the input into several sequences.
   2298  *
   2299  * DataSN starts with 0 for the first data PDU of an input command and advances
   2300  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
   2301  * which is set to 1 for the last data PDU of a sequence.
   2302  * If the initiator supports phase collapse, the status bit must be set along
   2303  * with the F bit to indicate that the status is shipped together with the last
   2304  * Data-In PDU.
   2305  *
   2306  * The data PDUs within a sequence will be sent in order with the buffer offset
   2307  * in increasing order. i.e. initiator and target must have negotiated the
   2308  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
   2309  *
   2310  * Caller holds idt->idt_mutex
   2311  */
   2312 static idm_status_t
   2313 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
   2314 {
   2315 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
   2316 	idm_pdu_t	tmppdu;
   2317 
   2318 	ASSERT(mutex_owned(&idt->idt_mutex));
   2319 
   2320 	/*
   2321 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
   2322 	 * idm_sotx_thread.
   2323 	 */
   2324 	mutex_enter(&so_conn->ic_tx_mutex);
   2325 
   2326 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
   2327 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   2328 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2329 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
   2330 
   2331 	if (!so_conn->ic_tx_thread_running) {
   2332 		mutex_exit(&so_conn->ic_tx_mutex);
   2333 		/*
   2334 		 * Don't release idt->idt_mutex since we're supposed to hold
   2335 		 * in when calling idm_buf_tx_to_ini_done
   2336 		 */
   2337 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   2338 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   2339 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2340 		    uint32_t, idb->idb_xfer_len,
   2341 		    int, XFER_BUF_TX_TO_INI);
   2342 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
   2343 		return (IDM_STATUS_FAIL);
   2344 	}
   2345 
   2346 	/*
   2347 	 * Build a template for the data PDU headers we will use so that
   2348 	 * the SN values will stay consistent with other PDU's we are
   2349 	 * transmitting like R2T and SCSI status.
   2350 	 */
   2351 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
   2352 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
   2353 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
   2354 	    ISCSI_OP_SCSI_DATA_RSP);
   2355 	idb->idb_tx_thread = B_TRUE;
   2356 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
   2357 	cv_signal(&so_conn->ic_tx_cv);
   2358 	mutex_exit(&so_conn->ic_tx_mutex);
   2359 	mutex_exit(&idt->idt_mutex);
   2360 
   2361 	/*
   2362 	 * Returning success here indicates the transfer was successfully
   2363 	 * dispatched -- it does not mean that the transfer completed
   2364 	 * successfully.
   2365 	 */
   2366 	return (IDM_STATUS_SUCCESS);
   2367 }
   2368 
   2369 /*
   2370  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
   2371  * data blocks it is ready to receive from the initiator in response to a WRITE
   2372  * SCSI command. The target iSCSI layer passes the information about the desired
   2373  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
   2374  * offset and datalen are passed via the 'idb' argument.
   2375  *
   2376  * Scope for Prototype build:
   2377  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
   2378  * negotiated the "InitialR2T" to "Yes".
   2379  *
   2380  * Caller holds idt->idt_mutex
   2381  */
   2382 static idm_status_t
   2383 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
   2384 {
   2385 	idm_pdu_t		*pdu;
   2386 	iscsi_rtt_hdr_t		*rtt;
   2387 
   2388 	ASSERT(mutex_owned(&idt->idt_mutex));
   2389 
   2390 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
   2391 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   2392 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2393 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
   2394 
   2395 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
   2396 	pdu->isp_ic = idt->idt_ic;
   2397 	pdu->isp_flags = IDM_PDU_SET_STATSN;
   2398 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
   2399 
   2400 	/* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
   2401 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
   2402 
   2403 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
   2404 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
   2405 
   2406 	rtt->opcode		= ISCSI_OP_RTT_RSP;
   2407 	rtt->flags		= ISCSI_FLAG_FINAL;
   2408 	rtt->data_offset	= htonl(idb->idb_bufoffset);
   2409 	rtt->data_length	= htonl(idb->idb_xfer_len);
   2410 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
   2411 
   2412 	/* Keep track of buffer offsets */
   2413 	idb->idb_exp_offset	= idb->idb_bufoffset;
   2414 	mutex_exit(&idt->idt_mutex);
   2415 
   2416 	/*
   2417 	 * Transmit the PDU.
   2418 	 */
   2419 	idm_pdu_tx(pdu);
   2420 
   2421 	return (IDM_STATUS_SUCCESS);
   2422 }
   2423 
   2424 static idm_status_t
   2425 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
   2426 {
   2427 	if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
   2428 		idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
   2429 		    KM_NOSLEEP);
   2430 		idb->idb_buf_private = idm.idm_so_128k_buf_cache;
   2431 	} else {
   2432 		idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
   2433 		idb->idb_buf_private = NULL;
   2434 	}
   2435 
   2436 	if (idb->idb_buf == NULL) {
   2437 		IDM_CONN_LOG(CE_NOTE,
   2438 		    "idm_so_buf_alloc: failed buffer allocation");
   2439 		return (IDM_STATUS_FAIL);
   2440 	}
   2441 
   2442 	return (IDM_STATUS_SUCCESS);
   2443 }
   2444 
   2445 /* ARGSUSED */
   2446 static idm_status_t
   2447 idm_so_buf_setup(idm_buf_t *idb)
   2448 {
   2449 	/* Ensure bufalloc'd flag is unset */
   2450 	idb->idb_bufalloc = B_FALSE;
   2451 
   2452 	return (IDM_STATUS_SUCCESS);
   2453 }
   2454 
   2455 /* ARGSUSED */
   2456 static void
   2457 idm_so_buf_teardown(idm_buf_t *idb)
   2458 {
   2459 	/* nothing to do here */
   2460 }
   2461 
   2462 static void
   2463 idm_so_buf_free(idm_buf_t *idb)
   2464 {
   2465 	if (idb->idb_buf_private == NULL) {
   2466 		kmem_free(idb->idb_buf, idb->idb_buflen);
   2467 	} else {
   2468 		kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
   2469 	}
   2470 }
   2471 
   2472 static void
   2473 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
   2474     uint32_t offset, uint32_t length)
   2475 {
   2476 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
   2477 	idm_pdu_t	tmppdu;
   2478 	idm_buf_t	*rtt_buf;
   2479 
   2480 	ASSERT(mutex_owned(&idt->idt_mutex));
   2481 
   2482 	/*
   2483 	 * Allocate a buffer to represent the RTT transfer.  We could further
   2484 	 * optimize this by allocating the buffers internally from an rtt
   2485 	 * specific buffer cache since this is socket-specific code but for
   2486 	 * now we will keep it simple.
   2487 	 */
   2488 	rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
   2489 	if (rtt_buf == NULL) {
   2490 		/*
   2491 		 * If we're in FFP then the failure was likely a resource
   2492 		 * allocation issue and we should close the connection by
   2493 		 * sending a CE_TRANSPORT_FAIL event.
   2494 		 *
   2495 		 * If we're not in FFP then idm_buf_alloc will always
   2496 		 * fail and the state is transitioning to "complete" anyway
   2497 		 * so we won't bother to send an event.
   2498 		 */
   2499 		mutex_enter(&ic->ic_state_mutex);
   2500 		if (ic->ic_ffp)
   2501 			idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
   2502 			    NULL, CT_NONE);
   2503 		mutex_exit(&ic->ic_state_mutex);
   2504 		return;
   2505 	}
   2506 
   2507 	rtt_buf->idb_buf_cb = NULL;
   2508 	rtt_buf->idb_cb_arg = NULL;
   2509 	rtt_buf->idb_bufoffset = offset;
   2510 	rtt_buf->idb_xfer_len = length;
   2511 	rtt_buf->idb_ic = idt->idt_ic;
   2512 	rtt_buf->idb_task_binding = idt;
   2513 
   2514 	/*
   2515 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
   2516 	 * idm_sotx_thread.
   2517 	 */
   2518 	mutex_enter(&so_conn->ic_tx_mutex);
   2519 
   2520 	if (!so_conn->ic_tx_thread_running) {
   2521 		idm_buf_free(rtt_buf);
   2522 		mutex_exit(&so_conn->ic_tx_mutex);
   2523 		return;
   2524 	}
   2525 
   2526 	/*
   2527 	 * This new buffer represents an additional reference on the task
   2528 	 */
   2529 	idm_task_hold(idt);
   2530 
   2531 	/*
   2532 	 * Build a template for the data PDU headers we will use so that
   2533 	 * the SN values will stay consistent with other PDU's we are
   2534 	 * transmitting like R2T and SCSI status.
   2535 	 */
   2536 	bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
   2537 	tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
   2538 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
   2539 	    ISCSI_OP_SCSI_DATA);
   2540 	rtt_buf->idb_tx_thread = B_TRUE;
   2541 	rtt_buf->idb_in_transport = B_TRUE;
   2542 	list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
   2543 	cv_signal(&so_conn->ic_tx_cv);
   2544 	mutex_exit(&so_conn->ic_tx_mutex);
   2545 }
   2546 
   2547 static void
   2548 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
   2549 {
   2550 	/*
   2551 	 * Don't worry about status -- we assume any error handling
   2552 	 * is performed by the caller (idm_sotx_thread).
   2553 	 */
   2554 	idb->idb_in_transport = B_FALSE;
   2555 	idm_task_rele(idt);
   2556 	idm_buf_free(idb);
   2557 }
   2558 
   2559 static idm_status_t
   2560 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
   2561     uint32_t buf_region_offset, uint32_t buf_region_length)
   2562 {
   2563 	idm_conn_t		*ic;
   2564 	uint32_t		max_dataseglen;
   2565 	size_t			remainder, chunk;
   2566 	uint32_t		data_offset = buf_region_offset;
   2567 	iscsi_data_hdr_t	*bhs;
   2568 	idm_pdu_t		*pdu;
   2569 	idm_status_t		tx_status;
   2570 
   2571 	ASSERT(mutex_owned(&idt->idt_mutex));
   2572 
   2573 	ic = idt->idt_ic;
   2574 
   2575 	max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
   2576 	remainder = buf_region_length;
   2577 
   2578 	while (remainder) {
   2579 		if (idt->idt_state != TASK_ACTIVE) {
   2580 			ASSERT((idt->idt_state != TASK_IDLE) &&
   2581 			    (idt->idt_state != TASK_COMPLETE));
   2582 			return (IDM_STATUS_ABORTED);
   2583 		}
   2584 
   2585 		/* check to see if we need to chunk the data */
   2586 		if (remainder > max_dataseglen) {
   2587 			chunk = max_dataseglen;
   2588 		} else {
   2589 			chunk = remainder;
   2590 		}
   2591 
   2592 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
   2593 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
   2594 		pdu->isp_ic = ic;
   2595 		pdu->isp_flags = 0;	/* initialize isp_flags */
   2596 
   2597 		/*
   2598 		 * We've already built a build a header template
   2599 		 * to use during the transfer.  Use this template so that
   2600 		 * the SN values stay consistent with any unrelated PDU's
   2601 		 * being transmitted.
   2602 		 */
   2603 		bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
   2604 		    sizeof (iscsi_hdr_t));
   2605 
   2606 		/*
   2607 		 * Set DataSN, data offset, and flags in BHS
   2608 		 * For the prototype build, A = 0, S = 0, U = 0
   2609 		 */
   2610 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
   2611 
   2612 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
   2613 
   2614 		hton24(bhs->dlength, chunk);
   2615 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
   2616 
   2617 		/* setup data */
   2618 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
   2619 		pdu->isp_datalen = (uint_t)chunk;
   2620 
   2621 		if (chunk == remainder) {
   2622 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
   2623 			/* Piggyback the status with the last data PDU */
   2624 			if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
   2625 				pdu->isp_flags |= IDM_PDU_SET_STATSN |
   2626 				    IDM_PDU_ADVANCE_STATSN;
   2627 				(*idt->idt_ic->ic_conn_ops.icb_update_statsn)
   2628 				    (idt, pdu);
   2629 				idt->idt_flags |=
   2630 				    IDM_TASK_PHASECOLLAPSE_SUCCESS;
   2631 
   2632 			}
   2633 		}
   2634 
   2635 		remainder	-= chunk;
   2636 		data_offset	+= chunk;
   2637 
   2638 		/* Instrument the data-send DTrace probe. */
   2639 		if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
   2640 			DTRACE_ISCSI_2(data__send,
   2641 			    idm_conn_t *, idt->idt_ic,
   2642 			    iscsi_data_rsp_hdr_t *,
   2643 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
   2644 		}
   2645 
   2646 		/*
   2647 		 * Now that we're done working with idt_exp_datasn,
   2648 		 * idt->idt_state and idb->idb_bufoffset we can release
   2649 		 * the task lock -- don't want to hold it across the
   2650 		 * call to idm_i_so_tx since we could block.
   2651 		 */
   2652 		mutex_exit(&idt->idt_mutex);
   2653 
   2654 		/*
   2655 		 * Transmit the PDU.  Call the internal routine directly
   2656 		 * as there is already implicit ordering.
   2657 		 */
   2658 		if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
   2659 			mutex_enter(&idt->idt_mutex);
   2660 			return (tx_status);
   2661 		}
   2662 
   2663 		mutex_enter(&idt->idt_mutex);
   2664 		idt->idt_tx_bytes += chunk;
   2665 	}
   2666 
   2667 	return (IDM_STATUS_SUCCESS);
   2668 }
   2669 
   2670 /*
   2671  * TX PDU cache
   2672  */
   2673 /* ARGSUSED */
   2674 int
   2675 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
   2676 {
   2677 	idm_pdu_t	*pdu = hdl;
   2678 
   2679 	bzero(pdu, sizeof (idm_pdu_t));
   2680 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
   2681 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
   2682 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
   2683 	pdu->isp_magic = IDM_PDU_MAGIC;
   2684 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
   2685 
   2686 	return (0);
   2687 }
   2688 
   2689 /* ARGSUSED */
   2690 void
   2691 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
   2692 {
   2693 	/* reset values between use */
   2694 	pdu->isp_datalen = 0;
   2695 
   2696 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
   2697 }
   2698 
   2699 /*
   2700  * RX PDU cache
   2701  */
   2702 /* ARGSUSED */
   2703 int
   2704 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
   2705 {
   2706 	idm_pdu_t	*pdu = hdl;
   2707 
   2708 	bzero(pdu, sizeof (idm_pdu_t));
   2709 	pdu->isp_magic = IDM_PDU_MAGIC;
   2710 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
   2711 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
   2712 
   2713 	return (0);
   2714 }
   2715 
   2716 /* ARGSUSED */
   2717 static void
   2718 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
   2719 {
   2720 	pdu->isp_iovlen = 0;
   2721 	pdu->isp_sorx_buf = 0;
   2722 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
   2723 }
   2724 
   2725 static void
   2726 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
   2727 {
   2728 	/*
   2729 	 * We had to modify our cached RX PDU with a longer header buffer
   2730 	 * and/or a longer data buffer.  Release the new buffers and fix
   2731 	 * the fields back to what we would expect for a cached RX PDU.
   2732 	 */
   2733 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
   2734 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
   2735 	}
   2736 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
   2737 		kmem_free(pdu->isp_data, pdu->isp_datalen);
   2738 	}
   2739 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
   2740 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
   2741 	pdu->isp_data = NULL;
   2742 	pdu->isp_datalen = 0;
   2743 	pdu->isp_sorx_buf = 0;
   2744 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
   2745 	idm_sorx_cache_pdu_cb(pdu, status);
   2746 }
   2747 
   2748 /*
   2749  * This thread is only active when I/O is queued for transmit
   2750  * because the socket is busy.
   2751  */
   2752 void
   2753 idm_sotx_thread(void *arg)
   2754 {
   2755 	idm_conn_t	*ic = arg;
   2756 	idm_tx_obj_t	*object, *next;
   2757 	idm_so_conn_t	*so_conn;
   2758 	idm_status_t	status = IDM_STATUS_SUCCESS;
   2759 
   2760 	idm_conn_hold(ic);
   2761 
   2762 	mutex_enter(&ic->ic_mutex);
   2763 	so_conn = ic->ic_transport_private;
   2764 	so_conn->ic_tx_thread_running = B_TRUE;
   2765 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
   2766 	cv_signal(&ic->ic_cv);
   2767 	mutex_exit(&ic->ic_mutex);
   2768 
   2769 	mutex_enter(&so_conn->ic_tx_mutex);
   2770 
   2771 	while (so_conn->ic_tx_thread_running) {
   2772 		while (list_is_empty(&so_conn->ic_tx_list)) {
   2773 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
   2774 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
   2775 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
   2776 
   2777 			if (!so_conn->ic_tx_thread_running) {
   2778 				goto tx_bail;
   2779 			}
   2780 		}
   2781 
   2782 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
   2783 		list_remove(&so_conn->ic_tx_list, object);
   2784 		mutex_exit(&so_conn->ic_tx_mutex);
   2785 
   2786 		switch (object->idm_tx_obj_magic) {
   2787 		case IDM_PDU_MAGIC: {
   2788 			idm_pdu_t *pdu = (idm_pdu_t *)object;
   2789 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
   2790 			    idm_pdu_t *, (idm_pdu_t *)object);
   2791 
   2792 			if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
   2793 				/* No IDM task */
   2794 				(ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
   2795 			}
   2796 			status = idm_i_so_tx((idm_pdu_t *)object);
   2797 			break;
   2798 		}
   2799 		case IDM_BUF_MAGIC: {
   2800 			idm_buf_t *idb = (idm_buf_t *)object;
   2801 			idm_task_t *idt = idb->idb_task_binding;
   2802 
   2803 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
   2804 			    idm_buf_t *, idb);
   2805 
   2806 			mutex_enter(&idt->idt_mutex);
   2807 			status = idm_so_send_buf_region(idt,
   2808 			    idb, 0, idb->idb_xfer_len);
   2809 
   2810 			/*
   2811 			 * TX thread owns the buffer so we expect it to
   2812 			 * be "in transport"
   2813 			 */
   2814 			ASSERT(idb->idb_in_transport);
   2815 			if (IDM_CONN_ISTGT(ic)) {
   2816 				/*
   2817 				 * idm_buf_tx_to_ini_done releases
   2818 				 * idt->idt_mutex
   2819 				 */
   2820 				DTRACE_ISCSI_8(xfer__done,
   2821 				    idm_conn_t *, idt->idt_ic,
   2822 				    uintptr_t, idb->idb_buf,
   2823 				    uint32_t, idb->idb_bufoffset,
   2824 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2825 				    uint32_t, idb->idb_xfer_len,
   2826 				    int, XFER_BUF_TX_TO_INI);
   2827 				idm_buf_tx_to_ini_done(idt, idb, status);
   2828 			} else {
   2829 				idm_so_send_rtt_data_done(idt, idb);
   2830 				mutex_exit(&idt->idt_mutex);
   2831 			}
   2832 			break;
   2833 		}
   2834 
   2835 		default:
   2836 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
   2837 			    "(0x%08x)", object->idm_tx_obj_magic);
   2838 			status = IDM_STATUS_FAIL;
   2839 		}
   2840 
   2841 		mutex_enter(&so_conn->ic_tx_mutex);
   2842 
   2843 		if (status != IDM_STATUS_SUCCESS) {
   2844 			so_conn->ic_tx_thread_running = B_FALSE;
   2845 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
   2846 		}
   2847 	}
   2848 
   2849 	/*
   2850 	 * Before we leave, we need to abort every item remaining in the
   2851 	 * TX list.
   2852 	 */
   2853 
   2854 tx_bail:
   2855 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
   2856 
   2857 	while (object != NULL) {
   2858 		next = list_next(&so_conn->ic_tx_list, object);
   2859 
   2860 		list_remove(&so_conn->ic_tx_list, object);
   2861 		switch (object->idm_tx_obj_magic) {
   2862 		case IDM_PDU_MAGIC:
   2863 			idm_pdu_complete((idm_pdu_t *)object,
   2864 			    IDM_STATUS_ABORTED);
   2865 			break;
   2866 
   2867 		case IDM_BUF_MAGIC: {
   2868 			idm_buf_t *idb = (idm_buf_t *)object;
   2869 			idm_task_t *idt = idb->idb_task_binding;
   2870 			mutex_exit(&so_conn->ic_tx_mutex);
   2871 			mutex_enter(&idt->idt_mutex);
   2872 			/*
   2873 			 * TX thread owns the buffer so we expect it to
   2874 			 * be "in transport"
   2875 			 */
   2876 			ASSERT(idb->idb_in_transport);
   2877 			if (IDM_CONN_ISTGT(ic)) {
   2878 				/*
   2879 				 * idm_buf_tx_to_ini_done releases
   2880 				 * idt->idt_mutex
   2881 				 */
   2882 				DTRACE_ISCSI_8(xfer__done,
   2883 				    idm_conn_t *, idt->idt_ic,
   2884 				    uintptr_t, idb->idb_buf,
   2885 				    uint32_t, idb->idb_bufoffset,
   2886 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2887 				    uint32_t, idb->idb_xfer_len,
   2888 				    int, XFER_BUF_TX_TO_INI);
   2889 				idm_buf_tx_to_ini_done(idt, idb,
   2890 				    IDM_STATUS_ABORTED);
   2891 			} else {
   2892 				idm_so_send_rtt_data_done(idt, idb);
   2893 				mutex_exit(&idt->idt_mutex);
   2894 			}
   2895 			mutex_enter(&so_conn->ic_tx_mutex);
   2896 			break;
   2897 		}
   2898 		default:
   2899 			IDM_CONN_LOG(CE_WARN,
   2900 			    "idm_sotx_thread: Unexpected magic "
   2901 			    "(0x%08x)", object->idm_tx_obj_magic);
   2902 		}
   2903 
   2904 		object = next;
   2905 	}
   2906 
   2907 	mutex_exit(&so_conn->ic_tx_mutex);
   2908 	idm_conn_rele(ic);
   2909 	thread_exit();
   2910 	/*NOTREACHED*/
   2911 }
   2912 
   2913 static void
   2914 idm_so_socket_set_nonblock(struct sonode *node)
   2915 {
   2916 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
   2917 	    (node->so_state | FNONBLOCK), CRED(), NULL);
   2918 }
   2919 
   2920 static void
   2921 idm_so_socket_set_block(struct sonode *node)
   2922 {
   2923 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
   2924 	    (node->so_state & (~FNONBLOCK)), CRED(), NULL);
   2925 }
   2926 
   2927 
   2928 /*
   2929  * Called by kernel sockets when the connection has been accepted or
   2930  * rejected. In early volo, a "disconnect" callback was sent instead of
   2931  * "connectfailed", so we check for both.
   2932  */
   2933 /* ARGSUSED */
   2934 void
   2935 idm_so_timed_socket_connect_cb(ksocket_t ks,
   2936     ksocket_callback_event_t ev, void *arg, uintptr_t info)
   2937 {
   2938 	idm_so_timed_socket_t	*itp = arg;
   2939 	ASSERT(itp != NULL);
   2940 	ASSERT(ev == KSOCKET_EV_CONNECTED ||
   2941 	    ev == KSOCKET_EV_CONNECTFAILED ||
   2942 	    ev == KSOCKET_EV_DISCONNECTED);
   2943 
   2944 	mutex_enter(&idm_so_timed_socket_mutex);
   2945 	itp->it_callback_called = B_TRUE;
   2946 	if (ev == KSOCKET_EV_CONNECTED) {
   2947 		itp->it_socket_error_code = 0;
   2948 	} else {
   2949 		/* Make sure the error code is non-zero on error */
   2950 		if (info == 0)
   2951 			info = ECONNRESET;
   2952 		itp->it_socket_error_code = (int)info;
   2953 	}
   2954 	cv_signal(&itp->it_cv);
   2955 	mutex_exit(&idm_so_timed_socket_mutex);
   2956 }
   2957 
   2958 int
   2959 idm_so_timed_socket_connect(ksocket_t ks,
   2960     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
   2961 {
   2962 	clock_t			conn_login_max;
   2963 	int			rc, nonblocking, rval;
   2964 	idm_so_timed_socket_t	it;
   2965 	ksocket_callbacks_t	ks_cb;
   2966 
   2967 	conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
   2968 
   2969 	/*
   2970 	 * Set to non-block socket mode, with callback on connect
   2971 	 * Early volo used "disconnected" instead of "connectfailed",
   2972 	 * so set callback to look for both.
   2973 	 */
   2974 	bzero(&it, sizeof (it));
   2975 	ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
   2976 	    KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
   2977 	ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
   2978 	ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
   2979 	ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
   2980 	cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
   2981 	rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
   2982 	if (rc != 0)
   2983 		return (rc);
   2984 
   2985 	/* Set to non-blocking mode */
   2986 	nonblocking = 1;
   2987 	rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
   2988 	    CRED());
   2989 	if (rc != 0)
   2990 		goto cleanup;
   2991 
   2992 	bzero(&it, sizeof (it));
   2993 	for (;;) {
   2994 		/*
   2995 		 * Warning -- in a loopback scenario, the call to
   2996 		 * the connect_cb can occur inside the call to
   2997 		 * ksocket_connect. Do not hold the mutex around the
   2998 		 * call to ksocket_connect.
   2999 		 */
   3000 		rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
   3001 		if (rc == 0 || rc == EISCONN) {
   3002 			/* socket success or already success */
   3003 			rc = 0;
   3004 			break;
   3005 		}
   3006 		if ((rc != EINPROGRESS) && (rc != EALREADY)) {
   3007 			break;
   3008 		}
   3009 
   3010 		/* TCP connect still in progress. See if out of time. */
   3011 		if (ddi_get_lbolt() > conn_login_max) {
   3012 			/*
   3013 			 * Connection retry timeout,
   3014 			 * failed connect to target.
   3015 			 */
   3016 			rc = ETIMEDOUT;
   3017 			break;
   3018 		}
   3019 
   3020 		/*
   3021 		 * TCP connect still in progress.  Sleep until callback.
   3022 		 * Do NOT go to sleep if the callback already occurred!
   3023 		 */
   3024 		mutex_enter(&idm_so_timed_socket_mutex);
   3025 		if (!it.it_callback_called) {
   3026 			(void) cv_timedwait(&it.it_cv,
   3027 			    &idm_so_timed_socket_mutex, conn_login_max);
   3028 		}
   3029 		if (it.it_callback_called) {
   3030 			rc = it.it_socket_error_code;
   3031 			mutex_exit(&idm_so_timed_socket_mutex);
   3032 			break;
   3033 		}
   3034 		/* If timer expires, go call ksocket_connect one last time. */
   3035 		mutex_exit(&idm_so_timed_socket_mutex);
   3036 	}
   3037 
   3038 	/* resume blocking mode */
   3039 	nonblocking = 0;
   3040 	(void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
   3041 	    CRED());
   3042 cleanup:
   3043 	(void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
   3044 	cv_destroy(&it.it_cv);
   3045 	if (rc != 0) {
   3046 		idm_soshutdown(ks);
   3047 	}
   3048 	return (rc);
   3049 }
   3050 
   3051 
   3052 void
   3053 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
   3054 {
   3055 	int			dp_addr_size;
   3056 	struct sockaddr_in	*sin;
   3057 	struct sockaddr_in6	*sin6;
   3058 
   3059 	/* Build sockaddr_storage for this portal (idm_addr_t) */
   3060 	bzero(sa, sizeof (*sa));
   3061 	dp_addr_size = dportal->a_addr.i_insize;
   3062 	if (dp_addr_size == sizeof (struct in_addr)) {
   3063 		/* IPv4 */
   3064 		sa->ss_family = AF_INET;
   3065 		sin = (struct sockaddr_in *)sa;
   3066 		sin->sin_port = htons(dportal->a_port);
   3067 		bcopy(&dportal->a_addr.i_addr.in4,
   3068 		    &sin->sin_addr, sizeof (struct in_addr));
   3069 	} else if (dp_addr_size == sizeof (struct in6_addr)) {
   3070 		/* IPv6 */
   3071 		sa->ss_family = AF_INET6;
   3072 		sin6 = (struct sockaddr_in6 *)sa;
   3073 		sin6->sin6_port = htons(dportal->a_port);
   3074 		bcopy(&dportal->a_addr.i_addr.in6,
   3075 		    &sin6->sin6_addr, sizeof (struct in6_addr));
   3076 	} else {
   3077 		ASSERT(0);
   3078 	}
   3079 }
   3080 
   3081 
   3082 /*
   3083  * return a human-readable form of a sockaddr_storage, in the form
   3084  * [ip-address]:port.  This is used in calls to logging functions.
   3085  * If several calls to idm_sa_ntop are made within the same invocation
   3086  * of a logging function, then each one needs its own buf.
   3087  */
   3088 const char *
   3089 idm_sa_ntop(const struct sockaddr_storage *sa,
   3090     char *buf, size_t size)
   3091 {
   3092 	static const char bogus_ip[] = "[0].-1";
   3093 	char tmp[INET6_ADDRSTRLEN];
   3094 
   3095 	switch (sa->ss_family) {
   3096 	case AF_INET6:
   3097 		{
   3098 			const struct sockaddr_in6 *in6 =
   3099 			    (const struct sockaddr_in6 *) sa;
   3100 
   3101 			if (inet_ntop(in6->sin6_family,
   3102 			    &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
   3103 				goto err;
   3104 			}
   3105 			if (strlen(tmp) + sizeof ("[].65535") > size) {
   3106 				goto err;
   3107 			}
   3108 			/* struct sockaddr_storage gets port info from v4 loc */
   3109 			(void) snprintf(buf, size, "[%s].%u", tmp,
   3110 			    ntohs(in6->sin6_port));
   3111 			return (buf);
   3112 		}
   3113 	case AF_INET:
   3114 		{
   3115 			const struct sockaddr_in *in =
   3116 			    (const struct sockaddr_in *) sa;
   3117 
   3118 			if (inet_ntop(in->sin_family, &in->sin_addr,
   3119 			    tmp, sizeof (tmp)) == NULL) {
   3120 				goto err;
   3121 			}
   3122 			if (strlen(tmp) + sizeof ("[].65535") > size) {
   3123 				goto err;
   3124 			}
   3125 			(void) snprintf(buf, size,  "[%s].%u", tmp,
   3126 			    ntohs(in->sin_port));
   3127 			return (buf);
   3128 		}
   3129 	default:
   3130 		break;
   3131 	}
   3132 err:
   3133 	(void) snprintf(buf, size, "%s", bogus_ip);
   3134 	return (buf);
   3135 }
   3136