Home | History | Annotate | Download | only in idm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/conf.h>
     27 #include <sys/stat.h>
     28 #include <sys/file.h>
     29 #include <sys/ddi.h>
     30 #include <sys/sunddi.h>
     31 #include <sys/modctl.h>
     32 #include <sys/priv.h>
     33 #include <sys/cpuvar.h>
     34 #include <sys/socket.h>
     35 #include <sys/strsubr.h>
     36 #include <sys/sysmacros.h>
     37 #include <sys/sdt.h>
     38 #include <netinet/tcp.h>
     39 #include <inet/tcp.h>
     40 #include <sys/socketvar.h>
     41 #include <sys/pathname.h>
     42 #include <sys/fs/snode.h>
     43 #include <sys/fs/dv_node.h>
     44 #include <sys/vnode.h>
     45 #include <netinet/in.h>
     46 #include <net/if.h>
     47 #include <sys/sockio.h>
     48 #include <sys/ksocket.h>
     49 #include <sys/filio.h>		/* FIONBIO */
     50 #include <sys/iscsi_protocol.h>
     51 #include <sys/idm/idm.h>
     52 #include <sys/idm/idm_so.h>
     53 #include <sys/idm/idm_text.h>
     54 
     55 #define	IN_PROGRESS_DELAY	1
     56 
     57 /*
     58  * in6addr_any is currently all zeroes, but use the macro in case this
     59  * ever changes.
     60  */
     61 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
     62 
     63 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
     64 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
     65 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
     66 
     67 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
     68 static void idm_so_conn_destroy_common(idm_conn_t *ic);
     69 static void idm_so_conn_connect_common(idm_conn_t *ic);
     70 
     71 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
     72     boolean_t boot_conn);
     73 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
     74 static void idm_set_tgt_connect_options(ksocket_t so);
     75 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
     76 
     77 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
     78 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
     79     idm_buf_t *idb, uint32_t offset, uint32_t length);
     80 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
     81 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
     82     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
     83 
     84 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
     85     uint32_t ro, uint32_t dlength);
     86 
     87 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
     88     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
     89 
     90 static void idm_so_socket_set_nonblock(struct sonode *node);
     91 static void idm_so_socket_set_block(struct sonode *node);
     92 
     93 /*
     94  * Transport ops prototypes
     95  */
     96 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
     97 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
     98 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
     99 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
    100 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
    101 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
    102 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
    103 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
    104     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
    105 static void idm_so_notice_key_values(idm_conn_t *it,
    106     nvlist_t *negotiated_nvl);
    107 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
    108     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
    109 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
    110     idm_transport_caps_t *caps);
    111 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
    112 static void idm_so_buf_free(idm_buf_t *idb);
    113 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
    114 static void idm_so_buf_teardown(idm_buf_t *idb);
    115 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
    116 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
    117 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
    118 static void idm_so_tgt_svc_offline(idm_svc_t *is);
    119 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
    120 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
    121 static void idm_so_conn_disconnect(idm_conn_t *ic);
    122 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
    123 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
    124 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
    125 
    126 /*
    127  * IDM Native Sockets transport operations
    128  */
    129 static
    130 idm_transport_ops_t idm_so_transport_ops = {
    131 	idm_so_tx,			/* it_tx_pdu */
    132 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
    133 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
    134 	idm_so_rx_datain,		/* it_rx_datain */
    135 	idm_so_rx_rtt,			/* it_rx_rtt */
    136 	idm_so_rx_dataout,		/* it_rx_dataout */
    137 	NULL,				/* it_alloc_conn_rsrc */
    138 	NULL,				/* it_free_conn_rsrc */
    139 	NULL,				/* it_tgt_enable_datamover */
    140 	NULL,				/* it_ini_enable_datamover */
    141 	NULL,				/* it_conn_terminate */
    142 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
    143 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
    144 	idm_so_notice_key_values,	/* it_notice_key_values */
    145 	idm_so_conn_is_capable,		/* it_conn_is_capable */
    146 	idm_so_buf_alloc,		/* it_buf_alloc */
    147 	idm_so_buf_free,		/* it_buf_free */
    148 	idm_so_buf_setup,		/* it_buf_setup */
    149 	idm_so_buf_teardown,		/* it_buf_teardown */
    150 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
    151 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
    152 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
    153 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
    154 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
    155 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
    156 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
    157 	idm_so_ini_conn_create,		/* it_ini_conn_create */
    158 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
    159 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
    160 	idm_so_conn_disconnect,		/* it_ini_conn_disconnect */
    161 	idm_so_declare_key_values	/* it_declare_key_values */
    162 };
    163 
    164 kmutex_t	idm_so_timed_socket_mutex;
    165 /*
    166  * idm_so_init()
    167  * Sockets transport initialization
    168  */
    169 void
    170 idm_so_init(idm_transport_t *it)
    171 {
    172 	/* Cache for IDM Data and R2T Transmit PDU's */
    173 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
    174 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
    175 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
    176 
    177 	/* Cache for IDM Receive PDU's */
    178 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
    179 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
    180 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
    181 
    182 	/* 128k buffer cache */
    183 	idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
    184 	    IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
    185 
    186 	/* Set the sockets transport ops */
    187 	it->it_ops = &idm_so_transport_ops;
    188 
    189 	mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
    190 
    191 }
    192 
    193 /*
    194  * idm_so_fini()
    195  * Sockets transport teardown
    196  */
    197 void
    198 idm_so_fini(void)
    199 {
    200 	kmem_cache_destroy(idm.idm_so_128k_buf_cache);
    201 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
    202 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
    203 	mutex_destroy(&idm_so_timed_socket_mutex);
    204 }
    205 
    206 ksocket_t
    207 idm_socreate(int domain, int type, int protocol)
    208 {
    209 	ksocket_t ks;
    210 
    211 	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
    212 	    CRED())) {
    213 		return (ks);
    214 	} else {
    215 		return (NULL);
    216 	}
    217 }
    218 
    219 /*
    220  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
    221  * reception and transmission.  The sonode still exists but its state
    222  * gets modified to indicate it is no longer connected.  Calls to
    223  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
    224  * regain control of a thread stuck in idm_sorecv.
    225  */
    226 void
    227 idm_soshutdown(ksocket_t so)
    228 {
    229 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
    230 }
    231 
    232 /*
    233  * idm_sodestroy releases all resources associated with a socket previously
    234  * created with idm_socreate.  The socket must be shutdown using
    235  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
    236  * otherwise undefined behavior will result.
    237  */
    238 void
    239 idm_sodestroy(ksocket_t ks)
    240 {
    241 	(void) ksocket_close(ks, CRED());
    242 }
    243 
    244 /*
    245  * Function to compare two addresses in sockaddr_storage format
    246  */
    247 
    248 int
    249 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
    250     const struct sockaddr_storage *cmp_ss2,
    251     boolean_t v4_mapped_as_v4,
    252     boolean_t compare_ports)
    253 {
    254 	struct sockaddr_storage			mapped_v4_ss1, mapped_v4_ss2;
    255 	const struct sockaddr_storage		*ss1, *ss2;
    256 	struct in_addr				*in1, *in2;
    257 	struct in6_addr				*in61, *in62;
    258 	int i;
    259 
    260 	/*
    261 	 * Normalize V4-mapped IPv6 addresses into V4 format if
    262 	 * v4_mapped_as_v4 is B_TRUE.
    263 	 */
    264 	ss1 = cmp_ss1;
    265 	ss2 = cmp_ss2;
    266 	if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
    267 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
    268 		if (IN6_IS_ADDR_V4MAPPED(in61)) {
    269 			bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
    270 			mapped_v4_ss1.ss_family = AF_INET;
    271 			((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
    272 			    ((struct sockaddr_in *)ss1)->sin_port;
    273 			IN6_V4MAPPED_TO_INADDR(in61,
    274 			    &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
    275 			ss1 = &mapped_v4_ss1;
    276 		}
    277 	}
    278 	ss2 = cmp_ss2;
    279 	if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
    280 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
    281 		if (IN6_IS_ADDR_V4MAPPED(in62)) {
    282 			bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
    283 			mapped_v4_ss2.ss_family = AF_INET;
    284 			((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
    285 			    ((struct sockaddr_in *)ss2)->sin_port;
    286 			IN6_V4MAPPED_TO_INADDR(in62,
    287 			    &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
    288 			ss2 = &mapped_v4_ss2;
    289 		}
    290 	}
    291 
    292 	/*
    293 	 * Compare ports, then address family, then ip address
    294 	 */
    295 	if (compare_ports &&
    296 	    (((struct sockaddr_in *)ss1)->sin_port !=
    297 	    ((struct sockaddr_in *)ss2)->sin_port)) {
    298 		if (((struct sockaddr_in *)ss1)->sin_port >
    299 		    ((struct sockaddr_in *)ss2)->sin_port)
    300 			return (1);
    301 		else
    302 			return (-1);
    303 	}
    304 
    305 	/*
    306 	 * ports are the same
    307 	 */
    308 	if (ss1->ss_family != ss2->ss_family) {
    309 		if (ss1->ss_family == AF_INET)
    310 			return (1);
    311 		else
    312 			return (-1);
    313 	}
    314 
    315 	/*
    316 	 * address families are the same
    317 	 */
    318 	if (ss1->ss_family == AF_INET) {
    319 		in1 = &((struct sockaddr_in *)ss1)->sin_addr;
    320 		in2 = &((struct sockaddr_in *)ss2)->sin_addr;
    321 
    322 		if (in1->s_addr > in2->s_addr)
    323 			return (1);
    324 		else if (in1->s_addr < in2->s_addr)
    325 			return (-1);
    326 		else
    327 			return (0);
    328 	} else if (ss1->ss_family == AF_INET6) {
    329 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
    330 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
    331 
    332 		for (i = 0; i < 4; i++) {
    333 			if (in61->s6_addr32[i] > in62->s6_addr32[i])
    334 				return (1);
    335 			else if (in61->s6_addr32[i] < in62->s6_addr32[i])
    336 				return (-1);
    337 		}
    338 		return (0);
    339 	}
    340 
    341 	return (1);
    342 }
    343 
    344 /*
    345  * IP address filter functions to flag addresses that should not
    346  * go out to initiators through discovery.
    347  */
    348 static boolean_t
    349 idm_v4_addr_okay(struct in_addr *in_addr)
    350 {
    351 	in_addr_t addr = ntohl(in_addr->s_addr);
    352 
    353 	if ((INADDR_NONE == addr) ||
    354 	    (IN_MULTICAST(addr)) ||
    355 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
    356 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
    357 		return (B_FALSE);
    358 	}
    359 	return (B_TRUE);
    360 }
    361 
    362 static boolean_t
    363 idm_v6_addr_okay(struct in6_addr *addr6)
    364 {
    365 
    366 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
    367 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
    368 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
    369 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
    370 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
    371 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
    372 		return (B_FALSE);
    373 	}
    374 	return (B_TRUE);
    375 }
    376 
    377 /*
    378  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
    379  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
    380  */
    381 int
    382 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
    383 {
    384 	ksocket_t 		so4, so6;
    385 	struct lifnum		lifn;
    386 	struct lifconf		lifc;
    387 	struct lifreq		*lp;
    388 	int			rval;
    389 	int			numifs;
    390 	int			bufsize;
    391 	void			*buf;
    392 	int			i, j, n, rc;
    393 	struct sockaddr_storage	ss;
    394 	struct sockaddr_in	*sin;
    395 	struct sockaddr_in6	*sin6;
    396 	idm_addr_t		*ip;
    397 	idm_addr_list_t		*ipaddr = NULL;
    398 	int			size_ipaddr;
    399 
    400 	*ipaddr_p = NULL;
    401 	size_ipaddr = 0;
    402 	buf = NULL;
    403 
    404 	/* create an ipv4 and ipv6 UDP socket */
    405 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
    406 		return (0);
    407 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
    408 		idm_sodestroy(so6);
    409 		return (0);
    410 	}
    411 
    412 
    413 retry_count:
    414 	/* snapshot the current number of interfaces */
    415 	lifn.lifn_family = PF_UNSPEC;
    416 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
    417 	lifn.lifn_count = 0;
    418 	/* use vp6 for ioctls with unspecified families by default */
    419 	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
    420 	    != 0) {
    421 		goto cleanup;
    422 	}
    423 
    424 	numifs = lifn.lifn_count;
    425 	if (numifs <= 0) {
    426 		goto cleanup;
    427 	}
    428 
    429 	/* allocate extra room in case more interfaces appear */
    430 	numifs += 10;
    431 
    432 	/* get the interface names and ip addresses */
    433 	bufsize = numifs * sizeof (struct lifreq);
    434 	buf = kmem_alloc(bufsize, KM_SLEEP);
    435 
    436 	lifc.lifc_family = AF_UNSPEC;
    437 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
    438 	lifc.lifc_len = bufsize;
    439 	lifc.lifc_buf = buf;
    440 	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
    441 	if (rc != 0) {
    442 		goto cleanup;
    443 	}
    444 	/* if our extra room is used up, try again */
    445 	if (bufsize <= lifc.lifc_len) {
    446 		kmem_free(buf, bufsize);
    447 		buf = NULL;
    448 		goto retry_count;
    449 	}
    450 	/* calc actual number of ifconfs */
    451 	n = lifc.lifc_len / sizeof (struct lifreq);
    452 
    453 	/* get ip address */
    454 	if (n > 0) {
    455 		size_ipaddr = sizeof (idm_addr_list_t) +
    456 		    (n - 1) * sizeof (idm_addr_t);
    457 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
    458 	} else {
    459 		goto cleanup;
    460 	}
    461 
    462 	/*
    463 	 * Examine the array of interfaces and filter uninteresting ones
    464 	 */
    465 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
    466 
    467 		/*
    468 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
    469 		 */
    470 		ss = lp->lifr_addr;
    471 		/*
    472 		 * fetch the flags using the socket of the correct family
    473 		 */
    474 		switch (ss.ss_family) {
    475 		case AF_INET:
    476 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
    477 			    &rval, CRED());
    478 			break;
    479 		case AF_INET6:
    480 			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
    481 			    &rval, CRED());
    482 			break;
    483 		default:
    484 			continue;
    485 		}
    486 		if (rc == 0) {
    487 			/*
    488 			 * If we got the flags, skip uninteresting
    489 			 * interfaces based on flags
    490 			 */
    491 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
    492 				continue;
    493 			if (lp->lifr_flags &
    494 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
    495 				continue;
    496 		}
    497 
    498 		/* save ip address */
    499 		ip = &ipaddr->al_addrs[j];
    500 		switch (ss.ss_family) {
    501 		case AF_INET:
    502 			sin = (struct sockaddr_in *)&ss;
    503 			if (!idm_v4_addr_okay(&sin->sin_addr))
    504 				continue;
    505 			ip->a_addr.i_addr.in4 = sin->sin_addr;
    506 			ip->a_addr.i_insize = sizeof (struct in_addr);
    507 			break;
    508 		case AF_INET6:
    509 			sin6 = (struct sockaddr_in6 *)&ss;
    510 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
    511 				continue;
    512 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
    513 			ip->a_addr.i_insize = sizeof (struct in6_addr);
    514 			break;
    515 		default:
    516 			continue;
    517 		}
    518 		j++;
    519 	}
    520 
    521 	if (j == 0) {
    522 		/* no valid ifaddr */
    523 		kmem_free(ipaddr, size_ipaddr);
    524 		size_ipaddr = 0;
    525 		ipaddr = NULL;
    526 	} else {
    527 		ipaddr->al_out_cnt = j;
    528 	}
    529 
    530 
    531 cleanup:
    532 	idm_sodestroy(so6);
    533 	idm_sodestroy(so4);
    534 
    535 	if (buf != NULL)
    536 		kmem_free(buf, bufsize);
    537 
    538 	*ipaddr_p = ipaddr;
    539 	return (size_ipaddr);
    540 }
    541 
    542 int
    543 idm_sorecv(ksocket_t so, void *msg, size_t len)
    544 {
    545 	iovec_t iov;
    546 
    547 	ASSERT(so != NULL);
    548 	ASSERT(len != 0);
    549 
    550 	/*
    551 	 * Fill in iovec and receive data
    552 	 */
    553 	iov.iov_base = msg;
    554 	iov.iov_len = len;
    555 
    556 	return (idm_iov_sorecv(so, &iov, 1, len));
    557 }
    558 
    559 /*
    560  * idm_sosendto - Sends a buffered data on a non-connected socket.
    561  *
    562  * This function puts the data provided on the wire by calling sosendmsg.
    563  * It will return only when all the data has been sent or if an error
    564  * occurs.
    565  *
    566  * Returns 0 for success, the socket errno value if sosendmsg fails, and
    567  * -1 if sosendmsg returns success but uio_resid != 0
    568  */
    569 int
    570 idm_sosendto(ksocket_t so, void *buff, size_t len,
    571     struct sockaddr *name, socklen_t namelen)
    572 {
    573 	struct msghdr		msg;
    574 	struct iovec		iov[1];
    575 	int			error;
    576 	size_t			sent = 0;
    577 
    578 	iov[0].iov_base	= buff;
    579 	iov[0].iov_len	= len;
    580 
    581 	/* Initialization of the message header. */
    582 	bzero(&msg, sizeof (msg));
    583 	msg.msg_iov	= iov;
    584 	msg.msg_iovlen	= 1;
    585 	msg.msg_name	= name;
    586 	msg.msg_namelen	= namelen;
    587 
    588 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
    589 		/* Data sent */
    590 		if (sent == len) {
    591 			/* All data sent.  Success. */
    592 			return (0);
    593 		} else {
    594 			/* Not all data was sent.  Failure */
    595 			return (-1);
    596 		}
    597 	}
    598 
    599 	/* Send failed */
    600 	return (error);
    601 }
    602 
    603 /*
    604  * idm_iov_sosend - Sends an iovec on a connection.
    605  *
    606  * This function puts the data provided on the wire by calling sosendmsg.
    607  * It will return only when all the data has been sent or if an error
    608  * occurs.
    609  *
    610  * Returns 0 for success, the socket errno value if sosendmsg fails, and
    611  * -1 if sosendmsg returns success but uio_resid != 0
    612  */
    613 int
    614 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
    615 {
    616 	struct msghdr		msg;
    617 	int			error;
    618 	size_t 			sent = 0;
    619 
    620 	ASSERT(iop != NULL);
    621 
    622 	/* Initialization of the message header. */
    623 	bzero(&msg, sizeof (msg));
    624 	msg.msg_iov	= iop;
    625 	msg.msg_iovlen	= iovlen;
    626 
    627 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
    628 	    == 0) {
    629 		/* Data sent */
    630 		if (sent == total_len) {
    631 			/* All data sent.  Success. */
    632 			return (0);
    633 		} else {
    634 			/* Not all data was sent.  Failure */
    635 			return (-1);
    636 		}
    637 	}
    638 
    639 	/* Send failed */
    640 	return (error);
    641 }
    642 
    643 /*
    644  * idm_iov_sorecv - Receives an iovec from a connection
    645  *
    646  * This function gets the data asked for from the socket.  It will return
    647  * only when all the requested data has been retrieved or if an error
    648  * occurs.
    649  *
    650  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
    651  * -1 if sorecvmsg returns success but uio_resid != 0
    652  */
    653 int
    654 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
    655 {
    656 	struct msghdr		msg;
    657 	int			error;
    658 	size_t			recv;
    659 	int 			flags;
    660 
    661 	ASSERT(iop != NULL);
    662 
    663 	/* Initialization of the message header. */
    664 	bzero(&msg, sizeof (msg));
    665 	msg.msg_iov	= iop;
    666 	msg.msg_iovlen	= iovlen;
    667 	flags		= MSG_WAITALL;
    668 
    669 	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
    670 	    == 0) {
    671 		/* Received data */
    672 		if (recv == total_len) {
    673 			/* All requested data received.  Success */
    674 			return (0);
    675 		} else {
    676 			/*
    677 			 * Not all data was received.  The connection has
    678 			 * probably failed.
    679 			 */
    680 			return (-1);
    681 		}
    682 	}
    683 
    684 	/* Receive failed */
    685 	return (error);
    686 }
    687 
    688 static void
    689 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
    690 {
    691 	int	conn_abort = 10000;
    692 	int	conn_notify = 2000;
    693 	int	abort = 30000;
    694 
    695 	/* Pre-connect socket options */
    696 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
    697 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
    698 	    CRED());
    699 	if (boot_conn == B_FALSE) {
    700 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
    701 		    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
    702 		    CRED());
    703 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
    704 		    TCP_ABORT_THRESHOLD,
    705 		    (char *)&abort, sizeof (int), CRED());
    706 	}
    707 }
    708 
    709 static void
    710 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
    711 {
    712 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
    713 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
    714 	const int	on = 1;
    715 
    716 	/* Set postconnect options */
    717 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
    718 	    (char *)&on, sizeof (int), CRED());
    719 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
    720 	    (char *)&rcvbuf, sizeof (int), CRED());
    721 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
    722 	    (char *)&sndbuf, sizeof (int), CRED());
    723 }
    724 
    725 static void
    726 idm_set_tgt_connect_options(ksocket_t ks)
    727 {
    728 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
    729 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
    730 	const int	on = 1;
    731 
    732 	/* Set connect options */
    733 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
    734 	    (char *)&rcvbuf, sizeof (int), CRED());
    735 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
    736 	    (char *)&sndbuf, sizeof (int), CRED());
    737 	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
    738 	    (char *)&on, sizeof (on), CRED());
    739 }
    740 
    741 static uint32_t
    742 n2h24(const uchar_t *ptr)
    743 {
    744 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
    745 }
    746 
    747 
    748 static idm_status_t
    749 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
    750 {
    751 	iscsi_hdr_t	*bhs;
    752 	uint32_t	hdr_digest_crc;
    753 	uint32_t	crc_calculated;
    754 	void		*new_hdr;
    755 	int		ahslen = 0;
    756 	int		total_len = 0;
    757 	int		iovlen = 0;
    758 	struct iovec	iov[2];
    759 	idm_so_conn_t	*so_conn;
    760 	int		rc;
    761 
    762 	so_conn = ic->ic_transport_private;
    763 
    764 	/*
    765 	 * Read BHS
    766 	 */
    767 	bhs = pdu->isp_hdr;
    768 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
    769 	if (rc != IDM_STATUS_SUCCESS) {
    770 		return (IDM_STATUS_FAIL);
    771 	}
    772 
    773 	/*
    774 	 * Check actual AHS length against the amount available in the buffer
    775 	 */
    776 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
    777 	    (bhs->hlength * sizeof (uint32_t));
    778 	pdu->isp_datalen = n2h24(bhs->dlength);
    779 	if (ic->ic_conn_type == CONN_TYPE_TGT &&
    780 	    pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
    781 		IDM_CONN_LOG(CE_WARN,
    782 		    "idm_sorecvhdr: exceeded the max data segment length");
    783 		return (IDM_STATUS_FAIL);
    784 	}
    785 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
    786 		/* Allocate a new header segment and change the callback */
    787 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
    788 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
    789 		pdu->isp_hdr = new_hdr;
    790 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
    791 
    792 		/*
    793 		 * This callback will restore the expected values after
    794 		 * the RX PDU has been processed.
    795 		 */
    796 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
    797 	}
    798 
    799 	/*
    800 	 * Setup receipt of additional header and header digest (if enabled).
    801 	 */
    802 	if (bhs->hlength > 0) {
    803 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
    804 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
    805 		iov[iovlen].iov_len = ahslen;
    806 		total_len += iov[iovlen].iov_len;
    807 		iovlen++;
    808 	}
    809 
    810 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
    811 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
    812 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
    813 		total_len += iov[iovlen].iov_len;
    814 		iovlen++;
    815 	}
    816 
    817 	if ((iovlen != 0) &&
    818 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
    819 	    total_len) != 0)) {
    820 		return (IDM_STATUS_FAIL);
    821 	}
    822 
    823 	/*
    824 	 * Validate header digest if enabled
    825 	 */
    826 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
    827 		crc_calculated = idm_crc32c(pdu->isp_hdr,
    828 		    sizeof (iscsi_hdr_t) + ahslen);
    829 		if (crc_calculated != hdr_digest_crc) {
    830 			/* Invalid Header Digest */
    831 			return (IDM_STATUS_HEADER_DIGEST);
    832 		}
    833 	}
    834 
    835 	return (0);
    836 }
    837 
    838 /*
    839  * idm_so_ini_conn_create()
    840  * Allocate the sockets transport connection resources.
    841  */
    842 static idm_status_t
    843 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
    844 {
    845 	ksocket_t	so;
    846 	idm_so_conn_t	*so_conn;
    847 	idm_status_t	idmrc;
    848 
    849 	so = idm_socreate(cr->cr_domain, cr->cr_type,
    850 	    cr->cr_protocol);
    851 	if (so == NULL) {
    852 		return (IDM_STATUS_FAIL);
    853 	}
    854 
    855 	/* Bind the socket if configured to do so */
    856 	if (cr->cr_bound) {
    857 		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
    858 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
    859 			idm_sodestroy(so);
    860 			return (IDM_STATUS_FAIL);
    861 		}
    862 	}
    863 
    864 	idmrc = idm_so_conn_create_common(ic, so);
    865 	if (idmrc != IDM_STATUS_SUCCESS) {
    866 		idm_soshutdown(so);
    867 		idm_sodestroy(so);
    868 		return (IDM_STATUS_FAIL);
    869 	}
    870 
    871 	so_conn = ic->ic_transport_private;
    872 	/* Set up socket options */
    873 	idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
    874 
    875 	return (IDM_STATUS_SUCCESS);
    876 }
    877 
    878 /*
    879  * idm_so_ini_conn_destroy()
    880  * Tear down the sockets transport connection resources.
    881  */
    882 static void
    883 idm_so_ini_conn_destroy(idm_conn_t *ic)
    884 {
    885 	idm_so_conn_destroy_common(ic);
    886 }
    887 
    888 /*
    889  * idm_so_ini_conn_connect()
    890  * Establish the connection referred to by the handle previously allocated via
    891  * idm_so_ini_conn_create().
    892  */
    893 static idm_status_t
    894 idm_so_ini_conn_connect(idm_conn_t *ic)
    895 {
    896 	idm_so_conn_t	*so_conn;
    897 	struct sonode	*node = NULL;
    898 	int 		rc;
    899 	clock_t		lbolt, conn_login_max, conn_login_interval;
    900 	boolean_t	nonblock;
    901 
    902 	so_conn = ic->ic_transport_private;
    903 	nonblock = ic->ic_conn_params.nonblock_socket;
    904 	conn_login_max = ic->ic_conn_params.conn_login_max;
    905 	conn_login_interval = ddi_get_lbolt() +
    906 	    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
    907 
    908 	if (nonblock == B_TRUE) {
    909 		node = ((struct sonode *)(so_conn->ic_so));
    910 		/* Set to none block socket mode */
    911 		idm_so_socket_set_nonblock(node);
    912 		do {
    913 			rc = ksocket_connect(so_conn->ic_so,
    914 			    &ic->ic_ini_dst_addr.sin,
    915 			    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
    916 			    CRED());
    917 			if (rc == 0 || rc == EISCONN) {
    918 				/* socket success or already success */
    919 				rc = IDM_STATUS_SUCCESS;
    920 				break;
    921 			}
    922 			if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
    923 			    (rc == ECONNRESET)) {
    924 				/* socket connection timeout or refuse */
    925 				break;
    926 			}
    927 			lbolt = ddi_get_lbolt();
    928 			if (lbolt > conn_login_max) {
    929 				/*
    930 				 * Connection retry timeout,
    931 				 * failed connect to target.
    932 				 */
    933 				break;
    934 			}
    935 			if (lbolt < conn_login_interval) {
    936 				if ((rc == EINPROGRESS) || (rc == EALREADY)) {
    937 					/* TCP connect still in progress */
    938 					delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
    939 					continue;
    940 				} else {
    941 					delay(conn_login_interval - lbolt);
    942 				}
    943 			}
    944 			conn_login_interval = ddi_get_lbolt() +
    945 			    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
    946 		} while (rc != 0);
    947 		/* resume to nonblock mode */
    948 		if (rc == IDM_STATUS_SUCCESS) {
    949 			idm_so_socket_set_block(node);
    950 		}
    951 	} else {
    952 		rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
    953 		    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
    954 	}
    955 
    956 	if (rc != 0) {
    957 		idm_soshutdown(so_conn->ic_so);
    958 		return (IDM_STATUS_FAIL);
    959 	}
    960 
    961 	idm_so_conn_connect_common(ic);
    962 
    963 	idm_set_ini_postconnect_options(so_conn);
    964 
    965 	return (IDM_STATUS_SUCCESS);
    966 }
    967 
    968 idm_status_t
    969 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
    970 {
    971 	idm_status_t	idmrc;
    972 
    973 	idmrc = idm_so_conn_create_common(ic, new_so);
    974 
    975 	return (idmrc);
    976 }
    977 
    978 static void
    979 idm_so_tgt_conn_destroy(idm_conn_t *ic)
    980 {
    981 	idm_so_conn_destroy_common(ic);
    982 }
    983 
    984 /*
    985  * idm_so_tgt_conn_connect()
    986  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
    987  * is invoked from the SM as a result of an inbound connection request.
    988  */
    989 static idm_status_t
    990 idm_so_tgt_conn_connect(idm_conn_t *ic)
    991 {
    992 	idm_so_conn_connect_common(ic);
    993 
    994 	return (IDM_STATUS_SUCCESS);
    995 }
    996 
    997 static idm_status_t
    998 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
    999 {
   1000 	idm_so_conn_t	*so_conn;
   1001 
   1002 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
   1003 	so_conn->ic_so = new_so;
   1004 
   1005 	ic->ic_transport_private = so_conn;
   1006 	ic->ic_transport_hdrlen = 0;
   1007 
   1008 	/* Set the scoreboarding flag on this connection */
   1009 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
   1010 	ic->ic_conn_params.max_recv_dataseglen =
   1011 	    ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
   1012 	ic->ic_conn_params.max_xmit_dataseglen =
   1013 	    ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
   1014 
   1015 	/*
   1016 	 * Initialize tx thread mutex and list
   1017 	 */
   1018 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
   1019 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
   1020 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
   1021 	    offsetof(idm_pdu_t, idm_tx_link));
   1022 
   1023 	return (IDM_STATUS_SUCCESS);
   1024 }
   1025 
   1026 static void
   1027 idm_so_conn_destroy_common(idm_conn_t *ic)
   1028 {
   1029 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
   1030 
   1031 	ic->ic_transport_private = NULL;
   1032 	idm_sodestroy(so_conn->ic_so);
   1033 	list_destroy(&so_conn->ic_tx_list);
   1034 	mutex_destroy(&so_conn->ic_tx_mutex);
   1035 	cv_destroy(&so_conn->ic_tx_cv);
   1036 
   1037 	kmem_free(so_conn, sizeof (idm_so_conn_t));
   1038 }
   1039 
   1040 static void
   1041 idm_so_conn_connect_common(idm_conn_t *ic)
   1042 {
   1043 	idm_so_conn_t	*so_conn;
   1044 	struct sockaddr_in6	t_addr;
   1045 	socklen_t	t_addrlen = 0;
   1046 
   1047 	so_conn = ic->ic_transport_private;
   1048 	bzero(&t_addr, sizeof (struct sockaddr_in6));
   1049 	t_addrlen = sizeof (struct sockaddr_in6);
   1050 
   1051 	/* Set the local and remote addresses in the idm conn handle */
   1052 	(void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
   1053 	    &t_addrlen, CRED());
   1054 	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
   1055 	(void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
   1056 	    &t_addrlen, CRED());
   1057 	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
   1058 
   1059 	mutex_enter(&ic->ic_mutex);
   1060 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
   1061 	    &p0, TS_RUN, minclsyspri);
   1062 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
   1063 	    &p0, TS_RUN, minclsyspri);
   1064 
   1065 	while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running)
   1066 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
   1067 	mutex_exit(&ic->ic_mutex);
   1068 }
   1069 
   1070 /*
   1071  * idm_so_conn_disconnect()
   1072  * Shutdown the socket connection and stop the thread
   1073  */
   1074 static void
   1075 idm_so_conn_disconnect(idm_conn_t *ic)
   1076 {
   1077 	idm_so_conn_t	*so_conn;
   1078 
   1079 	so_conn = ic->ic_transport_private;
   1080 
   1081 	mutex_enter(&ic->ic_mutex);
   1082 	so_conn->ic_rx_thread_running = B_FALSE;
   1083 	so_conn->ic_tx_thread_running = B_FALSE;
   1084 	/* We need to wakeup the TX thread */
   1085 	mutex_enter(&so_conn->ic_tx_mutex);
   1086 	cv_signal(&so_conn->ic_tx_cv);
   1087 	mutex_exit(&so_conn->ic_tx_mutex);
   1088 	mutex_exit(&ic->ic_mutex);
   1089 
   1090 	/* This should wakeup the RX thread if it is sleeping */
   1091 	idm_soshutdown(so_conn->ic_so);
   1092 
   1093 	thread_join(so_conn->ic_tx_thread_did);
   1094 	thread_join(so_conn->ic_rx_thread_did);
   1095 }
   1096 
   1097 /*
   1098  * idm_so_tgt_svc_create()
   1099  * Establish a service on an IP address and port.  idm_svc_req_t contains
   1100  * the service parameters.
   1101  */
   1102 /*ARGSUSED*/
   1103 static idm_status_t
   1104 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
   1105 {
   1106 	idm_so_svc_t		*so_svc;
   1107 
   1108 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
   1109 
   1110 	/* Set the new sockets service in svc handle */
   1111 	is->is_so_svc = (void *)so_svc;
   1112 
   1113 	return (IDM_STATUS_SUCCESS);
   1114 }
   1115 
   1116 /*
   1117  * idm_so_tgt_svc_destroy()
   1118  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
   1119  */
   1120 static void
   1121 idm_so_tgt_svc_destroy(idm_svc_t *is)
   1122 {
   1123 	/* the socket will have been torn down; free the service */
   1124 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
   1125 }
   1126 
   1127 /*
   1128  * idm_so_tgt_svc_online()
   1129  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
   1130  */
   1131 
   1132 static idm_status_t
   1133 idm_so_tgt_svc_online(idm_svc_t *is)
   1134 {
   1135 	idm_so_svc_t		*so_svc;
   1136 	idm_svc_req_t		*sr = &is->is_svc_req;
   1137 	struct sockaddr_in6	sin6_ip;
   1138 	const uint32_t		on = 1;
   1139 	const uint32_t		off = 0;
   1140 
   1141 	mutex_enter(&is->is_mutex);
   1142 	so_svc = (idm_so_svc_t *)is->is_so_svc;
   1143 
   1144 	/*
   1145 	 * Try creating an IPv6 socket first
   1146 	 */
   1147 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
   1148 		mutex_exit(&is->is_mutex);
   1149 		return (IDM_STATUS_FAIL);
   1150 	} else {
   1151 		bzero(&sin6_ip, sizeof (sin6_ip));
   1152 		sin6_ip.sin6_family = AF_INET6;
   1153 		sin6_ip.sin6_port = htons(sr->sr_port);
   1154 		sin6_ip.sin6_addr = in6addr_any;
   1155 
   1156 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
   1157 		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
   1158 		/*
   1159 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
   1160 		 */
   1161 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
   1162 		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
   1163 
   1164 		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
   1165 		    sizeof (sin6_ip), CRED()) != 0) {
   1166 			mutex_exit(&is->is_mutex);
   1167 			idm_sodestroy(so_svc->is_so);
   1168 			return (IDM_STATUS_FAIL);
   1169 		}
   1170 	}
   1171 
   1172 	idm_set_tgt_connect_options(so_svc->is_so);
   1173 
   1174 	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
   1175 		mutex_exit(&is->is_mutex);
   1176 		idm_soshutdown(so_svc->is_so);
   1177 		idm_sodestroy(so_svc->is_so);
   1178 		return (IDM_STATUS_FAIL);
   1179 	}
   1180 
   1181 	/* Launch a watch thread */
   1182 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
   1183 	    is, 0, &p0, TS_RUN, minclsyspri);
   1184 
   1185 	if (so_svc->is_thread == NULL) {
   1186 		/* Failure to launch; teardown the socket */
   1187 		mutex_exit(&is->is_mutex);
   1188 		idm_soshutdown(so_svc->is_so);
   1189 		idm_sodestroy(so_svc->is_so);
   1190 		return (IDM_STATUS_FAIL);
   1191 	}
   1192 	ksocket_hold(so_svc->is_so);
   1193 	/* Wait for the port watcher thread to start */
   1194 	while (!so_svc->is_thread_running)
   1195 		cv_wait(&is->is_cv, &is->is_mutex);
   1196 	mutex_exit(&is->is_mutex);
   1197 
   1198 	return (IDM_STATUS_SUCCESS);
   1199 }
   1200 
   1201 /*
   1202  * idm_so_tgt_svc_offline
   1203  *
   1204  * Stop listening on the IP address and port identified by idm_svc_t.
   1205  */
   1206 static void
   1207 idm_so_tgt_svc_offline(idm_svc_t *is)
   1208 {
   1209 	idm_so_svc_t		*so_svc;
   1210 	mutex_enter(&is->is_mutex);
   1211 	so_svc = (idm_so_svc_t *)is->is_so_svc;
   1212 	so_svc->is_thread_running = B_FALSE;
   1213 	mutex_exit(&is->is_mutex);
   1214 
   1215 	/*
   1216 	 * Teardown socket
   1217 	 */
   1218 	idm_sodestroy(so_svc->is_so);
   1219 
   1220 	/*
   1221 	 * Now we expect the port watcher thread to terminate
   1222 	 */
   1223 	thread_join(so_svc->is_thread_did);
   1224 }
   1225 
   1226 /*
   1227  * Watch thread for target service connection establishment.
   1228  */
   1229 void
   1230 idm_so_svc_port_watcher(void *arg)
   1231 {
   1232 	idm_svc_t		*svc = arg;
   1233 	ksocket_t		new_so;
   1234 	idm_conn_t		*ic;
   1235 	idm_status_t		idmrc;
   1236 	idm_so_svc_t		*so_svc;
   1237 	int			rc;
   1238 	const uint32_t		off = 0;
   1239 	struct sockaddr_in6 	t_addr;
   1240 	socklen_t		t_addrlen;
   1241 
   1242 	bzero(&t_addr, sizeof (struct sockaddr_in6));
   1243 	t_addrlen = sizeof (struct sockaddr_in6);
   1244 	mutex_enter(&svc->is_mutex);
   1245 
   1246 	so_svc = svc->is_so_svc;
   1247 	so_svc->is_thread_running = B_TRUE;
   1248 	so_svc->is_thread_did = so_svc->is_thread->t_did;
   1249 
   1250 	cv_signal(&svc->is_cv);
   1251 
   1252 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
   1253 	    svc->is_svc_req.sr_port);
   1254 
   1255 	while (so_svc->is_thread_running) {
   1256 		mutex_exit(&svc->is_mutex);
   1257 
   1258 		if ((rc = ksocket_accept(so_svc->is_so,
   1259 		    (struct sockaddr *)&t_addr, &t_addrlen,
   1260 		    &new_so, CRED())) != 0) {
   1261 			mutex_enter(&svc->is_mutex);
   1262 			if (rc == ECONNABORTED)
   1263 				continue;
   1264 			/* Connection problem */
   1265 			break;
   1266 		}
   1267 		/*
   1268 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
   1269 		 */
   1270 		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
   1271 		    (char *)&off, sizeof (off), CRED());
   1272 
   1273 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
   1274 		    &ic);
   1275 		if (idmrc != IDM_STATUS_SUCCESS) {
   1276 			/* Drop connection */
   1277 			idm_soshutdown(new_so);
   1278 			idm_sodestroy(new_so);
   1279 			mutex_enter(&svc->is_mutex);
   1280 			continue;
   1281 		}
   1282 
   1283 		idmrc = idm_so_tgt_conn_create(ic, new_so);
   1284 		if (idmrc != IDM_STATUS_SUCCESS) {
   1285 			idm_svc_conn_destroy(ic);
   1286 			idm_soshutdown(new_so);
   1287 			idm_sodestroy(new_so);
   1288 			mutex_enter(&svc->is_mutex);
   1289 			continue;
   1290 		}
   1291 
   1292 		/*
   1293 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
   1294 		 * will notify the client (target) about the new connection.
   1295 		 */
   1296 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
   1297 
   1298 		mutex_enter(&svc->is_mutex);
   1299 	}
   1300 	ksocket_rele(so_svc->is_so);
   1301 	so_svc->is_thread_running = B_FALSE;
   1302 	mutex_exit(&svc->is_mutex);
   1303 
   1304 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
   1305 	    svc->is_svc_req.sr_port);
   1306 
   1307 	thread_exit();
   1308 }
   1309 
   1310 /*
   1311  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
   1312  * frees resources associated with the task.
   1313  *
   1314  * It's not clear that this should return idm_status_t.  What do we do
   1315  * if it fails?
   1316  */
   1317 static idm_status_t
   1318 idm_so_free_task_rsrc(idm_task_t *idt)
   1319 {
   1320 	idm_buf_t	*idb, *next_idb;
   1321 
   1322 	/*
   1323 	 * There is nothing to cleanup on initiator connections
   1324 	 */
   1325 	if (IDM_CONN_ISINI(idt->idt_ic))
   1326 		return (IDM_STATUS_SUCCESS);
   1327 
   1328 	/*
   1329 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
   1330 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
   1331 	 *
   1332 	 * In addition, remove any buffers associated with this task from
   1333 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
   1334 	 * items don't actually get removed from that list (and completion
   1335 	 * routines called) until idm_task_cleanup.
   1336 	 */
   1337 	mutex_enter(&idt->idt_mutex);
   1338 
   1339 	for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
   1340 		next_idb = list_next(&idt->idt_outbufv, idb);
   1341 		if (idb->idb_in_transport) {
   1342 			/*
   1343 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
   1344 			 */
   1345 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   1346 			    uintptr_t, idb->idb_buf,
   1347 			    uint32_t, idb->idb_bufoffset,
   1348 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   1349 			    uint32_t, idb->idb_xfer_len,
   1350 			    int, XFER_BUF_RX_FROM_INI);
   1351 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
   1352 			mutex_enter(&idt->idt_mutex);
   1353 		}
   1354 	}
   1355 
   1356 	for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
   1357 		next_idb = list_next(&idt->idt_inbufv, idb);
   1358 		/*
   1359 		 * We want to remove these items from the tx_list as well,
   1360 		 * but knowing it's in the idt_inbufv list is not a guarantee
   1361 		 * that it's in the tx_list.  If it's on the tx list then
   1362 		 * let idm_sotx_thread() clean it up.
   1363 		 */
   1364 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
   1365 			/*
   1366 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
   1367 			 */
   1368 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   1369 			    uintptr_t, idb->idb_buf,
   1370 			    uint32_t, idb->idb_bufoffset,
   1371 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   1372 			    uint32_t, idb->idb_xfer_len,
   1373 			    int, XFER_BUF_TX_TO_INI);
   1374 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
   1375 			mutex_enter(&idt->idt_mutex);
   1376 		}
   1377 	}
   1378 
   1379 	mutex_exit(&idt->idt_mutex);
   1380 
   1381 	return (IDM_STATUS_SUCCESS);
   1382 }
   1383 
   1384 /*
   1385  * idm_so_negotiate_key_values() validates the key values for this connection
   1386  */
   1387 /* ARGSUSED */
   1388 static kv_status_t
   1389 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
   1390     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
   1391 {
   1392 	/* All parameters are negotiated at the iscsit level */
   1393 	return (KV_HANDLED);
   1394 }
   1395 
   1396 /*
   1397  * idm_so_notice_key_values() activates the negotiated key values for
   1398  * this connection.
   1399  */
   1400 static void
   1401 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
   1402 {
   1403 	char			*nvp_name;
   1404 	nvpair_t		*nvp;
   1405 	nvpair_t		*next_nvp;
   1406 	int			nvrc;
   1407 	idm_status_t		idm_status;
   1408 	const idm_kv_xlate_t	*ikvx;
   1409 	uint64_t		num_val;
   1410 
   1411 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
   1412 	    nvp != NULL; nvp = next_nvp) {
   1413 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
   1414 		nvp_name = nvpair_name(nvp);
   1415 
   1416 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
   1417 		switch (ikvx->ik_key_id) {
   1418 		case KI_HEADER_DIGEST:
   1419 		case KI_DATA_DIGEST:
   1420 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
   1421 			ASSERT(idm_status == 0);
   1422 
   1423 			/* Remove processed item from negotiated_nvl list */
   1424 			nvrc = nvlist_remove_all(
   1425 			    negotiated_nvl, ikvx->ik_key_name);
   1426 			ASSERT(nvrc == 0);
   1427 			break;
   1428 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
   1429 			/*
   1430 			 * Just pass the value down to idm layer.
   1431 			 * No need to remove it from negotiated_nvl list here.
   1432 			 */
   1433 			nvrc = nvpair_value_uint64(nvp, &num_val);
   1434 			ASSERT(nvrc == 0);
   1435 			it->ic_conn_params.max_xmit_dataseglen =
   1436 			    (uint32_t)num_val;
   1437 			break;
   1438 		default:
   1439 			break;
   1440 		}
   1441 	}
   1442 }
   1443 
   1444 /*
   1445  * idm_so_declare_key_values() declares the key values for this connection
   1446  */
   1447 /* ARGSUSED */
   1448 static kv_status_t
   1449 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
   1450     nvlist_t *outgoing_nvl)
   1451 {
   1452 	char			*nvp_name;
   1453 	nvpair_t		*nvp;
   1454 	nvpair_t		*next_nvp;
   1455 	kv_status_t		kvrc;
   1456 	int			nvrc = 0;
   1457 	const idm_kv_xlate_t	*ikvx;
   1458 	uint64_t		num_val;
   1459 
   1460 	for (nvp = nvlist_next_nvpair(config_nvl, NULL);
   1461 	    nvp != NULL && nvrc == 0; nvp = next_nvp) {
   1462 		next_nvp = nvlist_next_nvpair(config_nvl, nvp);
   1463 		nvp_name = nvpair_name(nvp);
   1464 
   1465 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
   1466 		switch (ikvx->ik_key_id) {
   1467 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
   1468 			if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
   1469 				break;
   1470 			}
   1471 			if (outgoing_nvl &&
   1472 			    (nvrc = nvlist_add_uint64(outgoing_nvl,
   1473 			    nvp_name, num_val)) != 0) {
   1474 				break;
   1475 			}
   1476 			it->ic_conn_params.max_recv_dataseglen =
   1477 			    (uint32_t)num_val;
   1478 			break;
   1479 		default:
   1480 			break;
   1481 		}
   1482 	}
   1483 	kvrc = idm_nvstat_to_kvstat(nvrc);
   1484 	return (kvrc);
   1485 }
   1486 
   1487 static idm_status_t
   1488 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
   1489     const idm_kv_xlate_t *ikvx)
   1490 {
   1491 	int			nvrc;
   1492 	char			*digest_choice_string;
   1493 
   1494 	nvrc = nvpair_value_string(digest_choice,
   1495 	    &digest_choice_string);
   1496 	ASSERT(nvrc == 0);
   1497 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
   1498 		switch (ikvx->ik_key_id) {
   1499 		case KI_HEADER_DIGEST:
   1500 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
   1501 			break;
   1502 		case KI_DATA_DIGEST:
   1503 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
   1504 			break;
   1505 		default:
   1506 			ASSERT(0);
   1507 			break;
   1508 		}
   1509 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
   1510 		switch (ikvx->ik_key_id) {
   1511 		case KI_HEADER_DIGEST:
   1512 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
   1513 			break;
   1514 		case KI_DATA_DIGEST:
   1515 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
   1516 			break;
   1517 		default:
   1518 			ASSERT(0);
   1519 			break;
   1520 		}
   1521 	} else {
   1522 		ASSERT(0);
   1523 	}
   1524 
   1525 	return (IDM_STATUS_SUCCESS);
   1526 }
   1527 
   1528 
   1529 /*
   1530  * idm_so_conn_is_capable() verifies that the passed connection is provided
   1531  * for by the sockets interface.
   1532  */
   1533 /* ARGSUSED */
   1534 static boolean_t
   1535 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
   1536 {
   1537 	return (B_TRUE);
   1538 }
   1539 
   1540 /*
   1541  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
   1542  * idm_sorecv_scsidata() function invoked earlier actually reads the data
   1543  * off the socket into the appropriate buffers.
   1544  */
   1545 static void
   1546 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
   1547 {
   1548 	iscsi_data_hdr_t	*bhs;
   1549 	idm_task_t		*idt;
   1550 	idm_buf_t		*idb;
   1551 	uint32_t		datasn;
   1552 	size_t			offset;
   1553 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
   1554 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
   1555 
   1556 	ASSERT(ic != NULL);
   1557 	ASSERT(pdu != NULL);
   1558 
   1559 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
   1560 	datasn	= ntohl(bhs->datasn);
   1561 	offset	= ntohl(bhs->offset);
   1562 
   1563 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
   1564 
   1565 	/*
   1566 	 * Look up the task corresponding to the initiator task tag
   1567 	 * to get the buffers affiliated with the task.
   1568 	 */
   1569 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
   1570 	if (idt == NULL) {
   1571 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
   1572 		idm_pdu_rx_protocol_error(ic, pdu);
   1573 		return;
   1574 	}
   1575 
   1576 	idb = pdu->isp_sorx_buf;
   1577 	if (idb == NULL) {
   1578 		IDM_CONN_LOG(CE_WARN,
   1579 		    "idm_so_rx_datain: failed to find buffer");
   1580 		idm_task_rele(idt);
   1581 		idm_pdu_rx_protocol_error(ic, pdu);
   1582 		return;
   1583 	}
   1584 
   1585 	/*
   1586 	 * DataSN values should be sequential and should not have any gaps or
   1587 	 * repetitions. Check the DataSN with the one stored in the task.
   1588 	 */
   1589 	if (datasn == idt->idt_exp_datasn) {
   1590 		idt->idt_exp_datasn++; /* keep track of DataSN received */
   1591 	} else {
   1592 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
   1593 		idm_task_rele(idt);
   1594 		idm_pdu_rx_protocol_error(ic, pdu);
   1595 		return;
   1596 	}
   1597 
   1598 	/*
   1599 	 * PDUs in a sequence should be in continuously increasing
   1600 	 * address offset
   1601 	 */
   1602 	if (offset != idb->idb_exp_offset) {
   1603 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
   1604 		idm_task_rele(idt);
   1605 		idm_pdu_rx_protocol_error(ic, pdu);
   1606 		return;
   1607 	}
   1608 	/* Expected next relative buffer offset */
   1609 	idb->idb_exp_offset += n2h24(bhs->dlength);
   1610 	idt->idt_rx_bytes += n2h24(bhs->dlength);
   1611 
   1612 	idm_task_rele(idt);
   1613 
   1614 	/*
   1615 	 * For now call scsi_rsp which will process the data rsp
   1616 	 * Revisit, need to provide an explicit client entry point for
   1617 	 * phase collapse completions.
   1618 	 */
   1619 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
   1620 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
   1621 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
   1622 	}
   1623 
   1624 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1625 }
   1626 
   1627 /*
   1628  * The idm_so_rx_dataout() function is used by the iSCSI target to read
   1629  * data from the Data-Out PDU sent by the iSCSI initiator.
   1630  *
   1631  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
   1632  * task to get the buffers associated with the PDU. A PDU might span buffers.
   1633  * The data is then read into the respective buffer.
   1634  */
   1635 static void
   1636 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
   1637 {
   1638 
   1639 	iscsi_data_hdr_t	*bhs;
   1640 	idm_task_t		*idt;
   1641 	idm_buf_t		*idb;
   1642 	size_t			offset;
   1643 
   1644 	ASSERT(ic != NULL);
   1645 	ASSERT(pdu != NULL);
   1646 
   1647 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
   1648 	offset = ntohl(bhs->offset);
   1649 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
   1650 
   1651 	/*
   1652 	 * Look up the task corresponding to the initiator task tag
   1653 	 * to get the buffers affiliated with the task.
   1654 	 */
   1655 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
   1656 	if (idt == NULL) {
   1657 		IDM_CONN_LOG(CE_WARN,
   1658 		    "idm_so_rx_dataout: failed to find task");
   1659 		idm_pdu_rx_protocol_error(ic, pdu);
   1660 		return;
   1661 	}
   1662 
   1663 	idb = pdu->isp_sorx_buf;
   1664 	if (idb == NULL) {
   1665 		IDM_CONN_LOG(CE_WARN,
   1666 		    "idm_so_rx_dataout: failed to find buffer");
   1667 		idm_task_rele(idt);
   1668 		idm_pdu_rx_protocol_error(ic, pdu);
   1669 		return;
   1670 	}
   1671 
   1672 	/* Keep track of data transferred - check data offsets */
   1673 	if (offset != idb->idb_exp_offset) {
   1674 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
   1675 		    "%ld, %d", offset, idb->idb_exp_offset);
   1676 		idm_task_rele(idt);
   1677 		idm_pdu_rx_protocol_error(ic, pdu);
   1678 		return;
   1679 	}
   1680 	/* Expected next relative offset */
   1681 	idb->idb_exp_offset += ntoh24(bhs->dlength);
   1682 	idt->idt_rx_bytes += n2h24(bhs->dlength);
   1683 
   1684 	/*
   1685 	 * Call the buffer callback when the transfer is complete
   1686 	 *
   1687 	 * The connection state machine should only abort tasks after
   1688 	 * shutting down the connection so we are assured that there
   1689 	 * won't be a simultaneous attempt to abort this task at the
   1690 	 * same time as we are processing this PDU (due to a connection
   1691 	 * state change).
   1692 	 */
   1693 	if (bhs->flags & ISCSI_FLAG_FINAL) {
   1694 		/*
   1695 		 * We only want to call idm_buf_rx_from_ini_done once
   1696 		 * per transfer.  It's possible that this task has
   1697 		 * already been aborted in which case
   1698 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
   1699 		 * for each buffer with idb_in_transport==B_TRUE.  To
   1700 		 * close this window and ensure that this doesn't happen,
   1701 		 * we'll clear idb->idb_in_transport now while holding
   1702 		 * the task mutex.   This is only really an issue for
   1703 		 * SCSI task abort -- if tasks were being aborted because
   1704 		 * of a connection state change the state machine would
   1705 		 * have already stopped the receive thread.
   1706 		 */
   1707 		mutex_enter(&idt->idt_mutex);
   1708 
   1709 		/*
   1710 		 * Release the task hold here (obtained in idm_task_find)
   1711 		 * because the task may complete synchronously during
   1712 		 * idm_buf_rx_from_ini_done.  Since we still have an active
   1713 		 * buffer we know there is at least one additional hold on idt.
   1714 		 */
   1715 		idm_task_rele(idt);
   1716 
   1717 		/*
   1718 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
   1719 		 */
   1720 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   1721 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   1722 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   1723 		    uint32_t, idb->idb_xfer_len,
   1724 		    int, XFER_BUF_RX_FROM_INI);
   1725 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
   1726 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1727 		return;
   1728 	}
   1729 
   1730 	idm_task_rele(idt);
   1731 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1732 }
   1733 
   1734 /*
   1735  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
   1736  * the R2T PDU sent by the iSCSI target indicating that it is ready to
   1737  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
   1738  * and looks up the task in the task tree using the itt to get the output
   1739  * buffers associated the task. The R2T PDU contains the offset of the
   1740  * requested data and the data length. This function then constructs a
   1741  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
   1742  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
   1743  */
   1744 
   1745 static void
   1746 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
   1747 {
   1748 	idm_task_t		*idt;
   1749 	idm_buf_t		*idb;
   1750 	iscsi_rtt_hdr_t		*rtt_hdr;
   1751 	uint32_t		data_offset;
   1752 	uint32_t		data_length;
   1753 
   1754 	ASSERT(ic != NULL);
   1755 	ASSERT(pdu != NULL);
   1756 
   1757 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
   1758 	data_offset = ntohl(rtt_hdr->data_offset);
   1759 	data_length = ntohl(rtt_hdr->data_length);
   1760 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
   1761 
   1762 	if (idt == NULL) {
   1763 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
   1764 		idm_pdu_rx_protocol_error(ic, pdu);
   1765 		return;
   1766 	}
   1767 
   1768 	/* Find the buffer bound to the task by the iSCSI initiator */
   1769 	mutex_enter(&idt->idt_mutex);
   1770 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
   1771 	if (idb == NULL) {
   1772 		mutex_exit(&idt->idt_mutex);
   1773 		idm_task_rele(idt);
   1774 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
   1775 		idm_pdu_rx_protocol_error(ic, pdu);
   1776 		return;
   1777 	}
   1778 
   1779 	/* return buffer contains this data */
   1780 	if (data_offset + data_length > idb->idb_buflen) {
   1781 		/* Overflow */
   1782 		mutex_exit(&idt->idt_mutex);
   1783 		idm_task_rele(idt);
   1784 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
   1785 		    "buffer");
   1786 		idm_pdu_rx_protocol_error(ic, pdu);
   1787 		return;
   1788 	}
   1789 
   1790 	idt->idt_r2t_ttt = rtt_hdr->ttt;
   1791 	idt->idt_exp_datasn = 0;
   1792 
   1793 	idm_so_send_rtt_data(ic, idt, idb, data_offset,
   1794 	    ntohl(rtt_hdr->data_length));
   1795 	mutex_exit(&idt->idt_mutex);
   1796 
   1797 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
   1798 	idm_task_rele(idt);
   1799 
   1800 }
   1801 
   1802 idm_status_t
   1803 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
   1804 {
   1805 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
   1806 	int		pad_len;
   1807 	uint32_t	data_digest_crc;
   1808 	uint32_t	crc_calculated;
   1809 	int		total_len;
   1810 	idm_so_conn_t	*so_conn;
   1811 
   1812 	so_conn = ic->ic_transport_private;
   1813 
   1814 	pad_len = ((ISCSI_PAD_WORD_LEN -
   1815 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
   1816 	    (ISCSI_PAD_WORD_LEN - 1));
   1817 
   1818 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
   1819 
   1820 	total_len = pdu->isp_datalen;
   1821 
   1822 	if (pad_len) {
   1823 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
   1824 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
   1825 		total_len		+= pad_len;
   1826 		pdu->isp_iovlen++;
   1827 	}
   1828 
   1829 	/* setup data digest */
   1830 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
   1831 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
   1832 		    (char *)&data_digest_crc;
   1833 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
   1834 		    sizeof (data_digest_crc);
   1835 		total_len		+= sizeof (data_digest_crc);
   1836 		pdu->isp_iovlen++;
   1837 	}
   1838 
   1839 	pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
   1840 
   1841 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
   1842 	    pdu->isp_iovlen, total_len) != 0) {
   1843 		return (IDM_STATUS_IO);
   1844 	}
   1845 
   1846 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
   1847 		crc_calculated = idm_crc32c(pdu->isp_data,
   1848 		    pdu->isp_datalen);
   1849 		if (pad_len) {
   1850 			crc_calculated = idm_crc32c_continued((char *)&pad,
   1851 			    pad_len, crc_calculated);
   1852 		}
   1853 		if (crc_calculated != data_digest_crc) {
   1854 			IDM_CONN_LOG(CE_WARN,
   1855 			    "idm_sorecvdata: "
   1856 			    "CRC error: actual 0x%x, calc 0x%x",
   1857 			    data_digest_crc, crc_calculated);
   1858 
   1859 			/* Invalid Data Digest */
   1860 			return (IDM_STATUS_DATA_DIGEST);
   1861 		}
   1862 	}
   1863 
   1864 	return (IDM_STATUS_SUCCESS);
   1865 }
   1866 
   1867 /*
   1868  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
   1869  * Data-type PDU header must be read into the idm_pdu_t structure prior to
   1870  * calling this function.
   1871  */
   1872 idm_status_t
   1873 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
   1874 {
   1875 	iscsi_data_hdr_t	*bhs;
   1876 	idm_task_t		*task;
   1877 	uint32_t		offset;
   1878 	uint8_t			opcode;
   1879 	uint32_t		dlength;
   1880 	list_t			*buflst;
   1881 	uint32_t		xfer_bytes;
   1882 	idm_status_t		status;
   1883 
   1884 	ASSERT(ic != NULL);
   1885 	ASSERT(pdu != NULL);
   1886 
   1887 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
   1888 
   1889 	offset	= ntohl(bhs->offset);
   1890 	opcode	= bhs->opcode;
   1891 	dlength = n2h24(bhs->dlength);
   1892 
   1893 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
   1894 	    (opcode == ISCSI_OP_SCSI_DATA));
   1895 
   1896 	/*
   1897 	 * Successful lookup implicitly gets a "hold" on the task.  This
   1898 	 * hold must be released before leaving this function.  At one
   1899 	 * point we were caching this task context and retaining the hold
   1900 	 * but it turned out to be very difficult to release the hold properly.
   1901 	 * The task can be aborted and the connection shutdown between this
   1902 	 * call and the subsequent expected call to idm_so_rx_datain/
   1903 	 * idm_so_rx_dataout (in which case those functions are not called).
   1904 	 * Releasing the hold in the PDU callback doesn't work well either
   1905 	 * because the whole task may be completed by then at which point
   1906 	 * it is too late to release the hold -- for better or worse this
   1907 	 * code doesn't wait on the refcnts during normal operation.
   1908 	 * idm_task_find() is very fast and it is not a huge burden if we
   1909 	 * have to do it twice.
   1910 	 */
   1911 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
   1912 	if (task == NULL) {
   1913 		IDM_CONN_LOG(CE_WARN,
   1914 		    "idm_sorecv_scsidata: could not find task");
   1915 		return (IDM_STATUS_FAIL);
   1916 	}
   1917 
   1918 	mutex_enter(&task->idt_mutex);
   1919 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
   1920 	    &task->idt_inbufv : &task->idt_outbufv;
   1921 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
   1922 	mutex_exit(&task->idt_mutex);
   1923 
   1924 	if (pdu->isp_sorx_buf == NULL) {
   1925 		idm_task_rele(task);
   1926 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
   1927 		    "buffer for offset %x opcode=%x",
   1928 		    offset, opcode);
   1929 		return (IDM_STATUS_FAIL);
   1930 	}
   1931 
   1932 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
   1933 	ASSERT(xfer_bytes != 0);
   1934 	if (xfer_bytes != dlength) {
   1935 		idm_task_rele(task);
   1936 		/*
   1937 		 * Buffer overflow, connection error.  The PDU data is still
   1938 		 * sitting in the socket so we can't use the connection
   1939 		 * again until that data is drained.
   1940 		 */
   1941 		return (IDM_STATUS_FAIL);
   1942 	}
   1943 
   1944 	status = idm_sorecvdata(ic, pdu);
   1945 
   1946 	idm_task_rele(task);
   1947 
   1948 	return (status);
   1949 }
   1950 
   1951 static uint32_t
   1952 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
   1953 {
   1954 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
   1955 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
   1956 
   1957 	ASSERT(ro >= idb->idb_bufoffset);
   1958 
   1959 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
   1960 	    (caddr_t)idb->idb_buf + buf_ro;
   1961 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
   1962 	pdu->isp_iovlen++;
   1963 
   1964 	return (xfer_len);
   1965 }
   1966 
   1967 int
   1968 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
   1969 {
   1970 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
   1971 	ASSERT(pdu->isp_data != NULL);
   1972 
   1973 	pdu->isp_databuflen = pdu->isp_datalen;
   1974 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
   1975 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
   1976 	pdu->isp_iovlen = 1;
   1977 	/*
   1978 	 * Since we are associating a new data buffer with this received
   1979 	 * PDU we need to set a specific callback to free the data
   1980 	 * after the PDU is processed.
   1981 	 */
   1982 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
   1983 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
   1984 
   1985 	return (idm_sorecvdata(ic, pdu));
   1986 }
   1987 
   1988 void
   1989 idm_sorx_thread(void *arg)
   1990 {
   1991 	boolean_t	conn_failure = B_FALSE;
   1992 	idm_conn_t	*ic = (idm_conn_t *)arg;
   1993 	idm_so_conn_t	*so_conn;
   1994 	idm_pdu_t	*pdu;
   1995 	idm_status_t	rc;
   1996 
   1997 	idm_conn_hold(ic);
   1998 
   1999 	mutex_enter(&ic->ic_mutex);
   2000 
   2001 	so_conn = ic->ic_transport_private;
   2002 	so_conn->ic_rx_thread_running = B_TRUE;
   2003 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
   2004 	cv_signal(&ic->ic_cv);
   2005 
   2006 	while (so_conn->ic_rx_thread_running) {
   2007 		mutex_exit(&ic->ic_mutex);
   2008 
   2009 		/*
   2010 		 * Get PDU with default header size (large enough for
   2011 		 * BHS plus any anticipated AHS).  PDU from
   2012 		 * the cache will have all values set correctly
   2013 		 * for sockets RX including callback.
   2014 		 */
   2015 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
   2016 		pdu->isp_ic = ic;
   2017 		pdu->isp_flags = 0;
   2018 		pdu->isp_transport_hdrlen = 0;
   2019 
   2020 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
   2021 			/*
   2022 			 * Call idm_pdu_complete so that we call the callback
   2023 			 * and ensure any memory allocated in idm_sorecvhdr
   2024 			 * gets freed up.
   2025 			 */
   2026 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
   2027 
   2028 			/*
   2029 			 * If ic_rx_thread_running is still set then
   2030 			 * this is some kind of connection problem
   2031 			 * on the socket.  In this case we want to
   2032 			 * generate an event.  Otherwise some other
   2033 			 * thread closed the socket due to another
   2034 			 * issue in which case we don't need to
   2035 			 * generate an event.
   2036 			 */
   2037 			mutex_enter(&ic->ic_mutex);
   2038 			if (so_conn->ic_rx_thread_running) {
   2039 				conn_failure = B_TRUE;
   2040 				so_conn->ic_rx_thread_running = B_FALSE;
   2041 			}
   2042 
   2043 			continue;
   2044 		}
   2045 
   2046 		/*
   2047 		 * Header has been read and validated.  Now we need
   2048 		 * to read the PDU data payload (if present).  SCSI data
   2049 		 * need to be transferred from the socket directly into
   2050 		 * the associated transfer buffer for the SCSI task.
   2051 		 */
   2052 		if (pdu->isp_datalen != 0) {
   2053 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
   2054 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
   2055 				rc = idm_sorecv_scsidata(ic, pdu);
   2056 				/*
   2057 				 * All SCSI errors are fatal to the
   2058 				 * connection right now since we have no
   2059 				 * place to put the data.  What we need
   2060 				 * is some kind of sink to dispose of unwanted
   2061 				 * SCSI data.  For example an invalid task tag
   2062 				 * should not kill the connection (although
   2063 				 * we may want to drop the connection).
   2064 				 */
   2065 			} else {
   2066 				/*
   2067 				 * Not data PDUs so allocate a buffer for the
   2068 				 * data segment and read the remaining data.
   2069 				 */
   2070 				rc = idm_sorecv_nonscsidata(ic, pdu);
   2071 			}
   2072 			if (rc != 0) {
   2073 				/*
   2074 				 * Call idm_pdu_complete so that we call the
   2075 				 * callback and ensure any memory allocated
   2076 				 * in idm_sorecvhdr gets freed up.
   2077 				 */
   2078 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
   2079 
   2080 				/*
   2081 				 * If ic_rx_thread_running is still set then
   2082 				 * this is some kind of connection problem
   2083 				 * on the socket.  In this case we want to
   2084 				 * generate an event.  Otherwise some other
   2085 				 * thread closed the socket due to another
   2086 				 * issue in which case we don't need to
   2087 				 * generate an event.
   2088 				 */
   2089 				mutex_enter(&ic->ic_mutex);
   2090 				if (so_conn->ic_rx_thread_running) {
   2091 					conn_failure = B_TRUE;
   2092 					so_conn->ic_rx_thread_running = B_FALSE;
   2093 				}
   2094 				continue;
   2095 			}
   2096 		}
   2097 
   2098 		/*
   2099 		 * Process RX PDU
   2100 		 */
   2101 		idm_pdu_rx(ic, pdu);
   2102 
   2103 		mutex_enter(&ic->ic_mutex);
   2104 	}
   2105 
   2106 	mutex_exit(&ic->ic_mutex);
   2107 
   2108 	/*
   2109 	 * If we dropped out of the RX processing loop because of
   2110 	 * a socket problem or other connection failure (including
   2111 	 * digest errors) then we need to generate a state machine
   2112 	 * event to shut the connection down.
   2113 	 * If the state machine is already in, for example, INIT_ERROR, this
   2114 	 * event will get dropped, and the TX thread will never be notified
   2115 	 * to shut down.  To be safe, we'll just notify it here.
   2116 	 */
   2117 	if (conn_failure) {
   2118 		if (so_conn->ic_tx_thread_running) {
   2119 			so_conn->ic_tx_thread_running = B_FALSE;
   2120 			mutex_enter(&so_conn->ic_tx_mutex);
   2121 			cv_signal(&so_conn->ic_tx_cv);
   2122 			mutex_exit(&so_conn->ic_tx_mutex);
   2123 		}
   2124 
   2125 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
   2126 	}
   2127 
   2128 	idm_conn_rele(ic);
   2129 
   2130 	thread_exit();
   2131 }
   2132 
   2133 /*
   2134  * idm_so_tx
   2135  *
   2136  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
   2137  * point.  By definition, it is supposed to be fast.  So, simply queue
   2138  * the entry and return.  The real work is done by idm_i_so_tx() via
   2139  * idm_sotx_thread().
   2140  */
   2141 
   2142 static void
   2143 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
   2144 {
   2145 	idm_so_conn_t *so_conn = ic->ic_transport_private;
   2146 
   2147 	ASSERT(pdu->isp_ic == ic);
   2148 	mutex_enter(&so_conn->ic_tx_mutex);
   2149 
   2150 	if (!so_conn->ic_tx_thread_running) {
   2151 		mutex_exit(&so_conn->ic_tx_mutex);
   2152 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
   2153 		return;
   2154 	}
   2155 
   2156 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
   2157 	cv_signal(&so_conn->ic_tx_cv);
   2158 	mutex_exit(&so_conn->ic_tx_mutex);
   2159 }
   2160 
   2161 static idm_status_t
   2162 idm_i_so_tx(idm_pdu_t *pdu)
   2163 {
   2164 	idm_conn_t	*ic = pdu->isp_ic;
   2165 	idm_status_t	status = IDM_STATUS_SUCCESS;
   2166 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
   2167 	int		pad_len;
   2168 	uint32_t	hdr_digest_crc;
   2169 	uint32_t	data_digest_crc = 0;
   2170 	int		total_len = 0;
   2171 	int		iovlen = 0;
   2172 	struct iovec	iov[6];
   2173 	idm_so_conn_t	*so_conn;
   2174 
   2175 	so_conn = ic->ic_transport_private;
   2176 
   2177 	/* Setup BHS */
   2178 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
   2179 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
   2180 	total_len		+= iov[iovlen].iov_len;
   2181 	iovlen++;
   2182 
   2183 	/* Setup header digest */
   2184 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
   2185 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
   2186 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
   2187 
   2188 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
   2189 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
   2190 		total_len		+= iov[iovlen].iov_len;
   2191 		iovlen++;
   2192 	}
   2193 
   2194 	/* Setup the data */
   2195 	if (pdu->isp_datalen) {
   2196 		idm_task_t		*idt;
   2197 		idm_buf_t		*idb;
   2198 		iscsi_data_hdr_t	*ihp;
   2199 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
   2200 		/* Write of immediate data */
   2201 		if (ic->ic_ffp &&
   2202 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
   2203 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
   2204 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
   2205 			if (idt) {
   2206 				mutex_enter(&idt->idt_mutex);
   2207 				idb = idm_buf_find(&idt->idt_outbufv, 0);
   2208 				mutex_exit(&idt->idt_mutex);
   2209 				/*
   2210 				 * If the initiator call to idm_buf_alloc
   2211 				 * failed then we can get to this point
   2212 				 * without a bound buffer.  The associated
   2213 				 * connection failure will clean things up
   2214 				 * later.  It would be nice to come up with
   2215 				 * a cleaner way to handle this.  In
   2216 				 * particular it seems absurd to look up
   2217 				 * the task and the buffer just to update
   2218 				 * this counter.
   2219 				 */
   2220 				if (idb)
   2221 					idb->idb_xfer_len += pdu->isp_datalen;
   2222 				idm_task_rele(idt);
   2223 			}
   2224 		}
   2225 
   2226 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
   2227 		iov[iovlen].iov_len  = pdu->isp_datalen;
   2228 		total_len += iov[iovlen].iov_len;
   2229 		iovlen++;
   2230 	}
   2231 
   2232 	/* Setup the data pad if necessary */
   2233 	pad_len = ((ISCSI_PAD_WORD_LEN -
   2234 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
   2235 	    (ISCSI_PAD_WORD_LEN - 1));
   2236 
   2237 	if (pad_len) {
   2238 		bzero(pad, sizeof (pad));
   2239 		iov[iovlen].iov_base = (void *)&pad;
   2240 		iov[iovlen].iov_len  = pad_len;
   2241 		total_len		+= iov[iovlen].iov_len;
   2242 		iovlen++;
   2243 	}
   2244 
   2245 	/*
   2246 	 * Setup the data digest if enabled.  Data-digest is not sent
   2247 	 * for login-phase PDUs.
   2248 	 */
   2249 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
   2250 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
   2251 	    (pdu->isp_datalen || pad_len)) {
   2252 		/*
   2253 		 * RFC3720/10.2.3: A zero-length Data Segment also
   2254 		 * implies a zero-length data digest.
   2255 		 */
   2256 		if (pdu->isp_datalen) {
   2257 			data_digest_crc = idm_crc32c(pdu->isp_data,
   2258 			    pdu->isp_datalen);
   2259 		}
   2260 		if (pad_len) {
   2261 			data_digest_crc = idm_crc32c_continued(&pad,
   2262 			    pad_len, data_digest_crc);
   2263 		}
   2264 
   2265 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
   2266 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
   2267 		total_len		+= iov[iovlen].iov_len;
   2268 		iovlen++;
   2269 	}
   2270 
   2271 	/* Transmit the PDU */
   2272 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
   2273 	    total_len) != 0) {
   2274 		/* Set error status */
   2275 		IDM_CONN_LOG(CE_WARN,
   2276 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
   2277 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
   2278 		    (void *) pdu->isp_data);
   2279 		status = IDM_STATUS_IO;
   2280 	}
   2281 
   2282 	/*
   2283 	 * Success does not mean that the PDU actually reached the
   2284 	 * remote node since it could get dropped along the way.
   2285 	 */
   2286 	idm_pdu_complete(pdu, status);
   2287 
   2288 	return (status);
   2289 }
   2290 
   2291 /*
   2292  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
   2293  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
   2294  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
   2295  * A target can invoke this function multiple times for a single read command
   2296  * (identified by the same ITT) to split the input into several sequences.
   2297  *
   2298  * DataSN starts with 0 for the first data PDU of an input command and advances
   2299  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
   2300  * which is set to 1 for the last data PDU of a sequence.
   2301  * If the initiator supports phase collapse, the status bit must be set along
   2302  * with the F bit to indicate that the status is shipped together with the last
   2303  * Data-In PDU.
   2304  *
   2305  * The data PDUs within a sequence will be sent in order with the buffer offset
   2306  * in increasing order. i.e. initiator and target must have negotiated the
   2307  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
   2308  *
   2309  * Caller holds idt->idt_mutex
   2310  */
   2311 static idm_status_t
   2312 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
   2313 {
   2314 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
   2315 	idm_pdu_t	tmppdu;
   2316 
   2317 	ASSERT(mutex_owned(&idt->idt_mutex));
   2318 
   2319 	/*
   2320 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
   2321 	 * idm_sotx_thread.
   2322 	 */
   2323 	mutex_enter(&so_conn->ic_tx_mutex);
   2324 
   2325 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
   2326 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   2327 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2328 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
   2329 
   2330 	if (!so_conn->ic_tx_thread_running) {
   2331 		mutex_exit(&so_conn->ic_tx_mutex);
   2332 		/*
   2333 		 * Don't release idt->idt_mutex since we're supposed to hold
   2334 		 * in when calling idm_buf_tx_to_ini_done
   2335 		 */
   2336 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
   2337 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   2338 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2339 		    uint32_t, idb->idb_xfer_len,
   2340 		    int, XFER_BUF_TX_TO_INI);
   2341 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
   2342 		return (IDM_STATUS_FAIL);
   2343 	}
   2344 
   2345 	/*
   2346 	 * Build a template for the data PDU headers we will use so that
   2347 	 * the SN values will stay consistent with other PDU's we are
   2348 	 * transmitting like R2T and SCSI status.
   2349 	 */
   2350 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
   2351 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
   2352 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
   2353 	    ISCSI_OP_SCSI_DATA_RSP);
   2354 	idb->idb_tx_thread = B_TRUE;
   2355 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
   2356 	cv_signal(&so_conn->ic_tx_cv);
   2357 	mutex_exit(&so_conn->ic_tx_mutex);
   2358 	mutex_exit(&idt->idt_mutex);
   2359 
   2360 	/*
   2361 	 * Returning success here indicates the transfer was successfully
   2362 	 * dispatched -- it does not mean that the transfer completed
   2363 	 * successfully.
   2364 	 */
   2365 	return (IDM_STATUS_SUCCESS);
   2366 }
   2367 
   2368 /*
   2369  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
   2370  * data blocks it is ready to receive from the initiator in response to a WRITE
   2371  * SCSI command. The target iSCSI layer passes the information about the desired
   2372  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
   2373  * offset and datalen are passed via the 'idb' argument.
   2374  *
   2375  * Scope for Prototype build:
   2376  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
   2377  * negotiated the "InitialR2T" to "Yes".
   2378  *
   2379  * Caller holds idt->idt_mutex
   2380  */
   2381 static idm_status_t
   2382 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
   2383 {
   2384 	idm_pdu_t		*pdu;
   2385 	iscsi_rtt_hdr_t		*rtt;
   2386 
   2387 	ASSERT(mutex_owned(&idt->idt_mutex));
   2388 
   2389 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
   2390 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
   2391 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2392 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
   2393 
   2394 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
   2395 	pdu->isp_ic = idt->idt_ic;
   2396 	pdu->isp_flags = 0;	/* initialize isp_flags */
   2397 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
   2398 
   2399 	/* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */
   2400 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
   2401 
   2402 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
   2403 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
   2404 
   2405 	rtt->opcode		= ISCSI_OP_RTT_RSP;
   2406 	rtt->flags		= ISCSI_FLAG_FINAL;
   2407 	rtt->data_offset	= htonl(idb->idb_bufoffset);
   2408 	rtt->data_length	= htonl(idb->idb_xfer_len);
   2409 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
   2410 
   2411 	/* Keep track of buffer offsets */
   2412 	idb->idb_exp_offset	= idb->idb_bufoffset;
   2413 	mutex_exit(&idt->idt_mutex);
   2414 
   2415 	/*
   2416 	 * Transmit the PDU.
   2417 	 */
   2418 	idm_pdu_tx(pdu);
   2419 
   2420 	return (IDM_STATUS_SUCCESS);
   2421 }
   2422 
   2423 static idm_status_t
   2424 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
   2425 {
   2426 	if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
   2427 		idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
   2428 		    KM_NOSLEEP);
   2429 		idb->idb_buf_private = idm.idm_so_128k_buf_cache;
   2430 	} else {
   2431 		idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
   2432 		idb->idb_buf_private = NULL;
   2433 	}
   2434 
   2435 	if (idb->idb_buf == NULL) {
   2436 		IDM_CONN_LOG(CE_NOTE,
   2437 		    "idm_so_buf_alloc: failed buffer allocation");
   2438 		return (IDM_STATUS_FAIL);
   2439 	}
   2440 
   2441 	return (IDM_STATUS_SUCCESS);
   2442 }
   2443 
   2444 /* ARGSUSED */
   2445 static idm_status_t
   2446 idm_so_buf_setup(idm_buf_t *idb)
   2447 {
   2448 	/* Ensure bufalloc'd flag is unset */
   2449 	idb->idb_bufalloc = B_FALSE;
   2450 
   2451 	return (IDM_STATUS_SUCCESS);
   2452 }
   2453 
   2454 /* ARGSUSED */
   2455 static void
   2456 idm_so_buf_teardown(idm_buf_t *idb)
   2457 {
   2458 	/* nothing to do here */
   2459 }
   2460 
   2461 static void
   2462 idm_so_buf_free(idm_buf_t *idb)
   2463 {
   2464 	if (idb->idb_buf_private == NULL) {
   2465 		kmem_free(idb->idb_buf, idb->idb_buflen);
   2466 	} else {
   2467 		kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
   2468 	}
   2469 }
   2470 
   2471 static void
   2472 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
   2473     uint32_t offset, uint32_t length)
   2474 {
   2475 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
   2476 	idm_pdu_t	tmppdu;
   2477 	idm_buf_t	*rtt_buf;
   2478 
   2479 	ASSERT(mutex_owned(&idt->idt_mutex));
   2480 
   2481 	/*
   2482 	 * Allocate a buffer to represent the RTT transfer.  We could further
   2483 	 * optimize this by allocating the buffers internally from an rtt
   2484 	 * specific buffer cache since this is socket-specific code but for
   2485 	 * now we will keep it simple.
   2486 	 */
   2487 	rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
   2488 	if (rtt_buf == NULL) {
   2489 		/*
   2490 		 * If we're in FFP then the failure was likely a resource
   2491 		 * allocation issue and we should close the connection by
   2492 		 * sending a CE_TRANSPORT_FAIL event.
   2493 		 *
   2494 		 * If we're not in FFP then idm_buf_alloc will always
   2495 		 * fail and the state is transitioning to "complete" anyway
   2496 		 * so we won't bother to send an event.
   2497 		 */
   2498 		mutex_enter(&ic->ic_state_mutex);
   2499 		if (ic->ic_ffp)
   2500 			idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
   2501 			    NULL, CT_NONE);
   2502 		mutex_exit(&ic->ic_state_mutex);
   2503 		return;
   2504 	}
   2505 
   2506 	rtt_buf->idb_buf_cb = NULL;
   2507 	rtt_buf->idb_cb_arg = NULL;
   2508 	rtt_buf->idb_bufoffset = offset;
   2509 	rtt_buf->idb_xfer_len = length;
   2510 	rtt_buf->idb_ic = idt->idt_ic;
   2511 	rtt_buf->idb_task_binding = idt;
   2512 
   2513 	/*
   2514 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
   2515 	 * idm_sotx_thread.
   2516 	 */
   2517 	mutex_enter(&so_conn->ic_tx_mutex);
   2518 
   2519 	if (!so_conn->ic_tx_thread_running) {
   2520 		idm_buf_free(rtt_buf);
   2521 		mutex_exit(&so_conn->ic_tx_mutex);
   2522 		return;
   2523 	}
   2524 
   2525 	/*
   2526 	 * This new buffer represents an additional reference on the task
   2527 	 */
   2528 	idm_task_hold(idt);
   2529 
   2530 	/*
   2531 	 * Build a template for the data PDU headers we will use so that
   2532 	 * the SN values will stay consistent with other PDU's we are
   2533 	 * transmitting like R2T and SCSI status.
   2534 	 */
   2535 	bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
   2536 	tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
   2537 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
   2538 	    ISCSI_OP_SCSI_DATA);
   2539 	rtt_buf->idb_tx_thread = B_TRUE;
   2540 	rtt_buf->idb_in_transport = B_TRUE;
   2541 	list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
   2542 	cv_signal(&so_conn->ic_tx_cv);
   2543 	mutex_exit(&so_conn->ic_tx_mutex);
   2544 }
   2545 
   2546 static void
   2547 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
   2548 {
   2549 	/*
   2550 	 * Don't worry about status -- we assume any error handling
   2551 	 * is performed by the caller (idm_sotx_thread).
   2552 	 */
   2553 	idb->idb_in_transport = B_FALSE;
   2554 	idm_task_rele(idt);
   2555 	idm_buf_free(idb);
   2556 }
   2557 
   2558 static idm_status_t
   2559 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
   2560     uint32_t buf_region_offset, uint32_t buf_region_length)
   2561 {
   2562 	idm_conn_t		*ic;
   2563 	uint32_t		max_dataseglen;
   2564 	size_t			remainder, chunk;
   2565 	uint32_t		data_offset = buf_region_offset;
   2566 	iscsi_data_hdr_t	*bhs;
   2567 	idm_pdu_t		*pdu;
   2568 	idm_status_t		tx_status;
   2569 
   2570 	ASSERT(mutex_owned(&idt->idt_mutex));
   2571 
   2572 	ic = idt->idt_ic;
   2573 
   2574 	max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
   2575 	remainder = buf_region_length;
   2576 
   2577 	while (remainder) {
   2578 		if (idt->idt_state != TASK_ACTIVE) {
   2579 			ASSERT((idt->idt_state != TASK_IDLE) &&
   2580 			    (idt->idt_state != TASK_COMPLETE));
   2581 			return (IDM_STATUS_ABORTED);
   2582 		}
   2583 
   2584 		/* check to see if we need to chunk the data */
   2585 		if (remainder > max_dataseglen) {
   2586 			chunk = max_dataseglen;
   2587 		} else {
   2588 			chunk = remainder;
   2589 		}
   2590 
   2591 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
   2592 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
   2593 		pdu->isp_ic = ic;
   2594 		pdu->isp_flags = 0;	/* initialize isp_flags */
   2595 
   2596 		/*
   2597 		 * We've already built a build a header template
   2598 		 * to use during the transfer.  Use this template so that
   2599 		 * the SN values stay consistent with any unrelated PDU's
   2600 		 * being transmitted.
   2601 		 */
   2602 		bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
   2603 		    sizeof (iscsi_hdr_t));
   2604 
   2605 		/*
   2606 		 * Set DataSN, data offset, and flags in BHS
   2607 		 * For the prototype build, A = 0, S = 0, U = 0
   2608 		 */
   2609 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
   2610 
   2611 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
   2612 
   2613 		hton24(bhs->dlength, chunk);
   2614 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
   2615 
   2616 		/* setup data */
   2617 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
   2618 		pdu->isp_datalen = (uint_t)chunk;
   2619 
   2620 		if (chunk == remainder) {
   2621 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
   2622 			/* Piggyback the status with the last data PDU */
   2623 			if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
   2624 				pdu->isp_flags |= IDM_PDU_SET_STATSN |
   2625 				    IDM_PDU_ADVANCE_STATSN;
   2626 				(*idt->idt_ic->ic_conn_ops.icb_update_statsn)
   2627 				    (idt, pdu);
   2628 				idt->idt_flags |=
   2629 				    IDM_TASK_PHASECOLLAPSE_SUCCESS;
   2630 
   2631 			}
   2632 		}
   2633 
   2634 		remainder	-= chunk;
   2635 		data_offset	+= chunk;
   2636 
   2637 		/* Instrument the data-send DTrace probe. */
   2638 		if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
   2639 			DTRACE_ISCSI_2(data__send,
   2640 			    idm_conn_t *, idt->idt_ic,
   2641 			    iscsi_data_rsp_hdr_t *,
   2642 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
   2643 		}
   2644 
   2645 		/*
   2646 		 * Now that we're done working with idt_exp_datasn,
   2647 		 * idt->idt_state and idb->idb_bufoffset we can release
   2648 		 * the task lock -- don't want to hold it across the
   2649 		 * call to idm_i_so_tx since we could block.
   2650 		 */
   2651 		mutex_exit(&idt->idt_mutex);
   2652 
   2653 		/*
   2654 		 * Transmit the PDU.  Call the internal routine directly
   2655 		 * as there is already implicit ordering.
   2656 		 */
   2657 		if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
   2658 			mutex_enter(&idt->idt_mutex);
   2659 			return (tx_status);
   2660 		}
   2661 
   2662 		mutex_enter(&idt->idt_mutex);
   2663 		idt->idt_tx_bytes += chunk;
   2664 	}
   2665 
   2666 	return (IDM_STATUS_SUCCESS);
   2667 }
   2668 
   2669 /*
   2670  * TX PDU cache
   2671  */
   2672 /* ARGSUSED */
   2673 int
   2674 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
   2675 {
   2676 	idm_pdu_t	*pdu = hdl;
   2677 
   2678 	bzero(pdu, sizeof (idm_pdu_t));
   2679 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
   2680 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
   2681 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
   2682 	pdu->isp_magic = IDM_PDU_MAGIC;
   2683 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
   2684 
   2685 	return (0);
   2686 }
   2687 
   2688 /* ARGSUSED */
   2689 void
   2690 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
   2691 {
   2692 	/* reset values between use */
   2693 	pdu->isp_datalen = 0;
   2694 
   2695 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
   2696 }
   2697 
   2698 /*
   2699  * RX PDU cache
   2700  */
   2701 /* ARGSUSED */
   2702 int
   2703 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
   2704 {
   2705 	idm_pdu_t	*pdu = hdl;
   2706 
   2707 	bzero(pdu, sizeof (idm_pdu_t));
   2708 	pdu->isp_magic = IDM_PDU_MAGIC;
   2709 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
   2710 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
   2711 
   2712 	return (0);
   2713 }
   2714 
   2715 /* ARGSUSED */
   2716 static void
   2717 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
   2718 {
   2719 	pdu->isp_iovlen = 0;
   2720 	pdu->isp_sorx_buf = 0;
   2721 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
   2722 }
   2723 
   2724 static void
   2725 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
   2726 {
   2727 	/*
   2728 	 * We had to modify our cached RX PDU with a longer header buffer
   2729 	 * and/or a longer data buffer.  Release the new buffers and fix
   2730 	 * the fields back to what we would expect for a cached RX PDU.
   2731 	 */
   2732 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
   2733 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
   2734 	}
   2735 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
   2736 		kmem_free(pdu->isp_data, pdu->isp_datalen);
   2737 	}
   2738 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
   2739 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
   2740 	pdu->isp_data = NULL;
   2741 	pdu->isp_datalen = 0;
   2742 	pdu->isp_sorx_buf = 0;
   2743 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
   2744 	idm_sorx_cache_pdu_cb(pdu, status);
   2745 }
   2746 
   2747 /*
   2748  * This thread is only active when I/O is queued for transmit
   2749  * because the socket is busy.
   2750  */
   2751 void
   2752 idm_sotx_thread(void *arg)
   2753 {
   2754 	idm_conn_t	*ic = arg;
   2755 	idm_tx_obj_t	*object, *next;
   2756 	idm_so_conn_t	*so_conn;
   2757 	idm_status_t	status = IDM_STATUS_SUCCESS;
   2758 
   2759 	idm_conn_hold(ic);
   2760 
   2761 	mutex_enter(&ic->ic_mutex);
   2762 	so_conn = ic->ic_transport_private;
   2763 	so_conn->ic_tx_thread_running = B_TRUE;
   2764 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
   2765 	cv_signal(&ic->ic_cv);
   2766 	mutex_exit(&ic->ic_mutex);
   2767 
   2768 	mutex_enter(&so_conn->ic_tx_mutex);
   2769 
   2770 	while (so_conn->ic_tx_thread_running) {
   2771 		while (list_is_empty(&so_conn->ic_tx_list)) {
   2772 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
   2773 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
   2774 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
   2775 
   2776 			if (!so_conn->ic_tx_thread_running) {
   2777 				goto tx_bail;
   2778 			}
   2779 		}
   2780 
   2781 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
   2782 		list_remove(&so_conn->ic_tx_list, object);
   2783 		mutex_exit(&so_conn->ic_tx_mutex);
   2784 
   2785 		switch (object->idm_tx_obj_magic) {
   2786 		case IDM_PDU_MAGIC: {
   2787 			idm_pdu_t *pdu = (idm_pdu_t *)object;
   2788 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
   2789 			    idm_pdu_t *, (idm_pdu_t *)object);
   2790 
   2791 			if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
   2792 				/* No IDM task */
   2793 				(ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
   2794 			}
   2795 			status = idm_i_so_tx((idm_pdu_t *)object);
   2796 			break;
   2797 		}
   2798 		case IDM_BUF_MAGIC: {
   2799 			idm_buf_t *idb = (idm_buf_t *)object;
   2800 			idm_task_t *idt = idb->idb_task_binding;
   2801 
   2802 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
   2803 			    idm_buf_t *, idb);
   2804 
   2805 			mutex_enter(&idt->idt_mutex);
   2806 			status = idm_so_send_buf_region(idt,
   2807 			    idb, 0, idb->idb_xfer_len);
   2808 
   2809 			/*
   2810 			 * TX thread owns the buffer so we expect it to
   2811 			 * be "in transport"
   2812 			 */
   2813 			ASSERT(idb->idb_in_transport);
   2814 			if (IDM_CONN_ISTGT(ic)) {
   2815 				/*
   2816 				 * idm_buf_tx_to_ini_done releases
   2817 				 * idt->idt_mutex
   2818 				 */
   2819 				DTRACE_ISCSI_8(xfer__done,
   2820 				    idm_conn_t *, idt->idt_ic,
   2821 				    uintptr_t, idb->idb_buf,
   2822 				    uint32_t, idb->idb_bufoffset,
   2823 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2824 				    uint32_t, idb->idb_xfer_len,
   2825 				    int, XFER_BUF_TX_TO_INI);
   2826 				idm_buf_tx_to_ini_done(idt, idb, status);
   2827 			} else {
   2828 				idm_so_send_rtt_data_done(idt, idb);
   2829 				mutex_exit(&idt->idt_mutex);
   2830 			}
   2831 			break;
   2832 		}
   2833 
   2834 		default:
   2835 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
   2836 			    "(0x%08x)", object->idm_tx_obj_magic);
   2837 			status = IDM_STATUS_FAIL;
   2838 		}
   2839 
   2840 		mutex_enter(&so_conn->ic_tx_mutex);
   2841 
   2842 		if (status != IDM_STATUS_SUCCESS) {
   2843 			so_conn->ic_tx_thread_running = B_FALSE;
   2844 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
   2845 		}
   2846 	}
   2847 
   2848 	/*
   2849 	 * Before we leave, we need to abort every item remaining in the
   2850 	 * TX list.
   2851 	 */
   2852 
   2853 tx_bail:
   2854 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
   2855 
   2856 	while (object != NULL) {
   2857 		next = list_next(&so_conn->ic_tx_list, object);
   2858 
   2859 		list_remove(&so_conn->ic_tx_list, object);
   2860 		switch (object->idm_tx_obj_magic) {
   2861 		case IDM_PDU_MAGIC:
   2862 			idm_pdu_complete((idm_pdu_t *)object,
   2863 			    IDM_STATUS_ABORTED);
   2864 			break;
   2865 
   2866 		case IDM_BUF_MAGIC: {
   2867 			idm_buf_t *idb = (idm_buf_t *)object;
   2868 			idm_task_t *idt = idb->idb_task_binding;
   2869 			mutex_exit(&so_conn->ic_tx_mutex);
   2870 			mutex_enter(&idt->idt_mutex);
   2871 			/*
   2872 			 * TX thread owns the buffer so we expect it to
   2873 			 * be "in transport"
   2874 			 */
   2875 			ASSERT(idb->idb_in_transport);
   2876 			if (IDM_CONN_ISTGT(ic)) {
   2877 				/*
   2878 				 * idm_buf_tx_to_ini_done releases
   2879 				 * idt->idt_mutex
   2880 				 */
   2881 				DTRACE_ISCSI_8(xfer__done,
   2882 				    idm_conn_t *, idt->idt_ic,
   2883 				    uintptr_t, idb->idb_buf,
   2884 				    uint32_t, idb->idb_bufoffset,
   2885 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
   2886 				    uint32_t, idb->idb_xfer_len,
   2887 				    int, XFER_BUF_TX_TO_INI);
   2888 				idm_buf_tx_to_ini_done(idt, idb,
   2889 				    IDM_STATUS_ABORTED);
   2890 			} else {
   2891 				idm_so_send_rtt_data_done(idt, idb);
   2892 				mutex_exit(&idt->idt_mutex);
   2893 			}
   2894 			mutex_enter(&so_conn->ic_tx_mutex);
   2895 			break;
   2896 		}
   2897 		default:
   2898 			IDM_CONN_LOG(CE_WARN,
   2899 			    "idm_sotx_thread: Unexpected magic "
   2900 			    "(0x%08x)", object->idm_tx_obj_magic);
   2901 		}
   2902 
   2903 		object = next;
   2904 	}
   2905 
   2906 	mutex_exit(&so_conn->ic_tx_mutex);
   2907 	idm_conn_rele(ic);
   2908 	thread_exit();
   2909 	/*NOTREACHED*/
   2910 }
   2911 
   2912 static void
   2913 idm_so_socket_set_nonblock(struct sonode *node)
   2914 {
   2915 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
   2916 	    (node->so_state | FNONBLOCK), CRED(), NULL);
   2917 }
   2918 
   2919 static void
   2920 idm_so_socket_set_block(struct sonode *node)
   2921 {
   2922 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
   2923 	    (node->so_state & (~FNONBLOCK)), CRED(), NULL);
   2924 }
   2925 
   2926 
   2927 /*
   2928  * Called by kernel sockets when the connection has been accepted or
   2929  * rejected. In early volo, a "disconnect" callback was sent instead of
   2930  * "connectfailed", so we check for both.
   2931  */
   2932 /* ARGSUSED */
   2933 void
   2934 idm_so_timed_socket_connect_cb(ksocket_t ks,
   2935     ksocket_callback_event_t ev, void *arg, uintptr_t info)
   2936 {
   2937 	idm_so_timed_socket_t	*itp = arg;
   2938 	ASSERT(itp != NULL);
   2939 	ASSERT(ev == KSOCKET_EV_CONNECTED ||
   2940 	    ev == KSOCKET_EV_CONNECTFAILED ||
   2941 	    ev == KSOCKET_EV_DISCONNECTED);
   2942 
   2943 	mutex_enter(&idm_so_timed_socket_mutex);
   2944 	itp->it_callback_called = B_TRUE;
   2945 	if (ev == KSOCKET_EV_CONNECTED) {
   2946 		itp->it_socket_error_code = 0;
   2947 	} else {
   2948 		/* Make sure the error code is non-zero on error */
   2949 		if (info == 0)
   2950 			info = ECONNRESET;
   2951 		itp->it_socket_error_code = (int)info;
   2952 	}
   2953 	cv_signal(&itp->it_cv);
   2954 	mutex_exit(&idm_so_timed_socket_mutex);
   2955 }
   2956 
   2957 int
   2958 idm_so_timed_socket_connect(ksocket_t ks,
   2959     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
   2960 {
   2961 	clock_t			conn_login_max;
   2962 	int			rc, nonblocking, rval;
   2963 	idm_so_timed_socket_t	it;
   2964 	ksocket_callbacks_t	ks_cb;
   2965 
   2966 	conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
   2967 
   2968 	/*
   2969 	 * Set to non-block socket mode, with callback on connect
   2970 	 * Early volo used "disconnected" instead of "connectfailed",
   2971 	 * so set callback to look for both.
   2972 	 */
   2973 	bzero(&it, sizeof (it));
   2974 	ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
   2975 	    KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
   2976 	ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
   2977 	ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
   2978 	ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
   2979 	cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
   2980 	rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
   2981 	if (rc != 0)
   2982 		return (rc);
   2983 
   2984 	/* Set to non-blocking mode */
   2985 	nonblocking = 1;
   2986 	rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
   2987 	    CRED());
   2988 	if (rc != 0)
   2989 		goto cleanup;
   2990 
   2991 	bzero(&it, sizeof (it));
   2992 	for (;;) {
   2993 		/*
   2994 		 * Warning -- in a loopback scenario, the call to
   2995 		 * the connect_cb can occur inside the call to
   2996 		 * ksocket_connect. Do not hold the mutex around the
   2997 		 * call to ksocket_connect.
   2998 		 */
   2999 		rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
   3000 		if (rc == 0 || rc == EISCONN) {
   3001 			/* socket success or already success */
   3002 			rc = 0;
   3003 			break;
   3004 		}
   3005 		if ((rc != EINPROGRESS) && (rc != EALREADY)) {
   3006 			break;
   3007 		}
   3008 
   3009 		/* TCP connect still in progress. See if out of time. */
   3010 		if (ddi_get_lbolt() > conn_login_max) {
   3011 			/*
   3012 			 * Connection retry timeout,
   3013 			 * failed connect to target.
   3014 			 */
   3015 			rc = ETIMEDOUT;
   3016 			break;
   3017 		}
   3018 
   3019 		/*
   3020 		 * TCP connect still in progress.  Sleep until callback.
   3021 		 * Do NOT go to sleep if the callback already occurred!
   3022 		 */
   3023 		mutex_enter(&idm_so_timed_socket_mutex);
   3024 		if (!it.it_callback_called) {
   3025 			(void) cv_timedwait(&it.it_cv,
   3026 			    &idm_so_timed_socket_mutex, conn_login_max);
   3027 		}
   3028 		if (it.it_callback_called) {
   3029 			rc = it.it_socket_error_code;
   3030 			mutex_exit(&idm_so_timed_socket_mutex);
   3031 			break;
   3032 		}
   3033 		/* If timer expires, go call ksocket_connect one last time. */
   3034 		mutex_exit(&idm_so_timed_socket_mutex);
   3035 	}
   3036 
   3037 	/* resume blocking mode */
   3038 	nonblocking = 0;
   3039 	(void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
   3040 	    CRED());
   3041 cleanup:
   3042 	(void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
   3043 	cv_destroy(&it.it_cv);
   3044 	if (rc != 0) {
   3045 		idm_soshutdown(ks);
   3046 	}
   3047 	return (rc);
   3048 }
   3049 
   3050 
   3051 void
   3052 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
   3053 {
   3054 	int			dp_addr_size;
   3055 	struct sockaddr_in	*sin;
   3056 	struct sockaddr_in6	*sin6;
   3057 
   3058 	/* Build sockaddr_storage for this portal (idm_addr_t) */
   3059 	bzero(sa, sizeof (*sa));
   3060 	dp_addr_size = dportal->a_addr.i_insize;
   3061 	if (dp_addr_size == sizeof (struct in_addr)) {
   3062 		/* IPv4 */
   3063 		sa->ss_family = AF_INET;
   3064 		sin = (struct sockaddr_in *)sa;
   3065 		sin->sin_port = htons(dportal->a_port);
   3066 		bcopy(&dportal->a_addr.i_addr.in4,
   3067 		    &sin->sin_addr, sizeof (struct in_addr));
   3068 	} else if (dp_addr_size == sizeof (struct in6_addr)) {
   3069 		/* IPv6 */
   3070 		sa->ss_family = AF_INET6;
   3071 		sin6 = (struct sockaddr_in6 *)sa;
   3072 		sin6->sin6_port = htons(dportal->a_port);
   3073 		bcopy(&dportal->a_addr.i_addr.in6,
   3074 		    &sin6->sin6_addr, sizeof (struct in6_addr));
   3075 	} else {
   3076 		ASSERT(0);
   3077 	}
   3078 }
   3079 
   3080 
   3081 /*
   3082  * return a human-readable form of a sockaddr_storage, in the form
   3083  * [ip-address]:port.  This is used in calls to logging functions.
   3084  * If several calls to idm_sa_ntop are made within the same invocation
   3085  * of a logging function, then each one needs its own buf.
   3086  */
   3087 const char *
   3088 idm_sa_ntop(const struct sockaddr_storage *sa,
   3089     char *buf, size_t size)
   3090 {
   3091 	static const char bogus_ip[] = "[0].-1";
   3092 	char tmp[INET6_ADDRSTRLEN];
   3093 
   3094 	switch (sa->ss_family) {
   3095 	case AF_INET6:
   3096 		{
   3097 			const struct sockaddr_in6 *in6 =
   3098 			    (const struct sockaddr_in6 *) sa;
   3099 
   3100 			if (inet_ntop(in6->sin6_family,
   3101 			    &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
   3102 				goto err;
   3103 			}
   3104 			if (strlen(tmp) + sizeof ("[].65535") > size) {
   3105 				goto err;
   3106 			}
   3107 			/* struct sockaddr_storage gets port info from v4 loc */
   3108 			(void) snprintf(buf, size, "[%s].%u", tmp,
   3109 			    ntohs(in6->sin6_port));
   3110 			return (buf);
   3111 		}
   3112 	case AF_INET:
   3113 		{
   3114 			const struct sockaddr_in *in =
   3115 			    (const struct sockaddr_in *) sa;
   3116 
   3117 			if (inet_ntop(in->sin_family, &in->sin_addr,
   3118 			    tmp, sizeof (tmp)) == NULL) {
   3119 				goto err;
   3120 			}
   3121 			if (strlen(tmp) + sizeof ("[].65535") > size) {
   3122 				goto err;
   3123 			}
   3124 			(void) snprintf(buf, size,  "[%s].%u", tmp,
   3125 			    ntohs(in->sin_port));
   3126 			return (buf);
   3127 		}
   3128 	default:
   3129 		break;
   3130 	}
   3131 err:
   3132 	(void) snprintf(buf, size, "%s", bogus_ip);
   3133 	return (buf);
   3134 }
   3135