Home | History | Annotate | Download | only in iscsitgtd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <aio.h>
     28 #include <sys/aio.h>
     29 #include <sys/asynch.h>
     30 #include <stdio.h>
     31 #include <stddef.h>
     32 #include <strings.h>
     33 #include <pthread.h>
     34 #include <sys/types.h>
     35 #include <sys/statvfs.h>
     36 #include <sys/avl.h>
     37 #include <sys/param.h>
     38 #include <sys/mman.h>
     39 #include <sys/stat.h>
     40 #include <fcntl.h>
     41 #include <assert.h>
     42 #include <errno.h>
     43 #include <unistd.h>
     44 #include <signal.h>
     45 #include <sys/ucontext.h>
     46 #include <assert.h>
     47 #include <umem.h>
     48 #include <time.h>
     49 #include <syslog.h>
     50 
     51 #include <sys/scsi/generic/sense.h>
     52 #include <sys/scsi/generic/status.h>
     53 #include <sys/scsi/generic/inquiry.h>
     54 
     55 #include "target.h"
     56 #include "queue.h"
     57 #include "t10.h"
     58 #include "t10_spc.h"
     59 #include "utility.h"
     60 #include "mgmt_scf.h"
     61 
     62 /*
     63  * []------------------------------------------------------------------[]
     64  * | This file contains methods which isolate a transport from device   |
     65  * | emulation. The first part of the file contains method which are	|
     66  * | called by the transport to start commands or deliver data. The	|
     67  * | transport does not know anything about what emulation is being	|
     68  * | done. The emulation layer receieves cdb's and nows nothing about	|
     69  * | the transport. This is how it should be. There are a few special	|
     70  * | cases to deal with transports which have a notion of immediate	|
     71  * | data, but we're isolating that from the emulation layer.		|
     72  * []------------------------------------------------------------------[]
     73  */
     74 
     75 #define	MAX_AIO_CNT	256
     76 
     77 /*
     78  * Forward declarations
     79  */
     80 static Boolean_t t10_find_lun(t10_targ_impl_t *t, int lun, t10_cmd_t *);
     81 static void *lu_runner(void *v);
     82 static Boolean_t t10_lu_initialize(t10_lu_common_t *lu, char *basedir);
     83 static void *t10_aio_done(void *v);
     84 static Boolean_t lu_remove_cmds(msg_t *m, void *v);
     85 static void cmd_common_free(t10_cmd_t *cmd);
     86 static Boolean_t load_params(t10_lu_common_t *lu, char *basedir);
     87 static Boolean_t fallocate(int fd, off64_t len);
     88 static t10_cmd_state_t t10_cmd_state_machine(t10_cmd_t *c, t10_cmd_event_t e);
     89 static void clear_transport(transport_t t, t10_cmd_t *t10c);
     90 
     91 #ifdef FULL_DEBUG
     92 static char *state_to_str(t10_cmd_state_t s);
     93 #endif
     94 static char *event_to_str(t10_cmd_event_t e);
     95 /* ---- These are AVL comparison routines ---- */
     96 static int find_lu_by_num(const void *v1, const void *v2);
     97 static int find_lu_by_guid(const void *v1, const void *v2);
     98 static int find_lu_by_targ(const void *v1, const void *v2);
     99 static int find_cmd_by_addr(const void *v1, const void *v2);
    100 static sam_device_table_t sam_emul_table[];
    101 
    102 /*
    103  * Local variables
    104  */
    105 static avl_tree_t	lu_list;
    106 static pthread_mutex_t	lu_list_mutex;
    107 static int		lu_id;
    108 target_queue_t		*mgmtq;
    109 static pthread_mutex_t	t10_mutex;
    110 static int		t10_num;
    111 static sema_t		t10_sema;
    112 static sema_t		t10_aio_sema;
    113 
    114 /*
    115  * Constants
    116  */
    117 static const timespec_t usec = {0, 1000};
    118 
    119 /*
    120  * []----
    121  * | t10_init -- called once at the beginning of time to initialize globals
    122  * []----
    123  */
    124 void
    125 t10_init(target_queue_t *q)
    126 {
    127 	pthread_t	junk;
    128 
    129 	mgmtq = q;
    130 	(void) pthread_mutex_init(&lu_list_mutex, NULL);
    131 	(void) pthread_mutex_init(&t10_mutex, NULL);
    132 	(void) sema_init(&t10_sema, 0, USYNC_THREAD, NULL);
    133 	(void) sema_init(&t10_aio_sema, MAX_AIO_CNT, USYNC_THREAD, NULL);
    134 	avl_create(&lu_list, find_lu_by_guid, sizeof (t10_lu_common_t),
    135 	    offsetof(t10_lu_common_t, l_all_luns));
    136 	(void) pthread_create(&junk, NULL, t10_aio_done, NULL);
    137 }
    138 
    139 /*ARGSUSED*/
    140 static void *
    141 t10_aio_done(void *v)
    142 {
    143 	aio_result_t	*result;
    144 	t10_aio_t	*a;
    145 	t10_lu_impl_t	*lu;
    146 
    147 	do {
    148 		if (sema_wait(&t10_sema) != 0) {
    149 			queue_prt(mgmtq, Q_STE_ERRS,
    150 			    "SAM-  sema_wait returned error\n");
    151 			continue;
    152 		}
    153 
    154 		if ((result = aiowait(NULL)) == (aio_result_t *)-1) {
    155 			if (errno == EINVAL) {
    156 				queue_prt(mgmtq, Q_STE_ERRS,
    157 				    "SAM-  aiowait returned EINVAL\n");
    158 				continue;
    159 			} else
    160 				break;
    161 		} else {
    162 			a = (t10_aio_t *)result;
    163 			(void) sema_post(&t10_aio_sema);
    164 		}
    165 		if ((a != NULL) && (a->a_aio_cmplt != NULL)) {
    166 			lu = a->a_cmd->c_lu;
    167 			(void) pthread_mutex_lock(&lu->l_cmd_mutex);
    168 			if (t10_cmd_state_machine(a->a_cmd, T10_Cmd_T4) !=
    169 			    T10_Cmd_S1_Free) {
    170 				(void) pthread_mutex_unlock(&lu->l_cmd_mutex);
    171 				(*a->a_aio_cmplt)(a->a_id);
    172 			} else
    173 				(void) pthread_mutex_unlock(&lu->l_cmd_mutex);
    174 		} else {
    175 			queue_prt(mgmtq, Q_STE_ERRS,
    176 			    "SAM   aiowait returned results, but is NULL\n");
    177 		}
    178 	/*CONSTANTCONDITION*/
    179 	} while (1);
    180 
    181 	return (NULL);
    182 }
    183 
    184 /*
    185  * []------------------------------------------------------------------[]
    186  * | Methods called by transports to interface with SAM-3		|
    187  * []------------------------------------------------------------------[]
    188  */
    189 
    190 /*
    191  * []----
    192  * | t10_handle_create -- Create the I_T nexus
    193  * |
    194  * | NOTES:
    195  * | max_out can be set to 0 if the transport wishes to wait for all of
    196  * | the data before receiving a DATAOUT message. Fibre Channel will most
    197  * | likely set this to 0, whereas iSCSI will set max_out to the value
    198  * | of MaxRecvDataSegment.
    199  * | (*datain_cb)() is called, on the LU thread, when the emulation
    200  * | module needs data *and* t10_send_cmd was called with opt_data_len, but
    201  * | no opt_data.
    202  * []----
    203  */
    204 t10_targ_handle_t
    205 t10_handle_create(char *targ, char *init, int trans_vers, int tpg, int max_out,
    206     target_queue_t *tq, void (*datain_cb)(t10_cmd_t *, char *, size_t *))
    207 {
    208 	t10_targ_impl_t	*t = calloc(1, sizeof (t10_targ_impl_t));
    209 
    210 	if (t == NULL)
    211 		return (NULL);
    212 
    213 	(void) pthread_mutex_lock(&t10_mutex);
    214 	t->s_targ_num		= t10_num++;
    215 	(void) pthread_mutex_unlock(&t10_mutex);
    216 	t->s_targ_base		= strdup(targ);
    217 	t->s_i_name		= strdup(init);
    218 	t->s_trans_vers		= trans_vers;
    219 	t->s_maxout		= max_out;
    220 	t->s_to_transport	= tq;
    221 	t->s_dataout_cb		= datain_cb;
    222 
    223 	/*
    224 	 * Once we actually support two or more transports it would be
    225 	 * possible for a collision between the underlying transports
    226 	 * target port group values since one wouldn't necessarily know
    227 	 * anything about the other. We'll use the upper bits of the
    228 	 * target port group value to separate them.
    229 	 * If we were to support many transports and with one then running
    230 	 * out of bit space we'd need to change the allocation method. Since
    231 	 * these values aren't stored anywhere and just used by initiators
    232 	 * to determine relative path numbering there's no issue with changing
    233 	 * this later if need be.
    234 	 */
    235 	switch (trans_vers) {
    236 	case T10_TRANS_ISCSI:
    237 		t->s_tpgt	= 0x0000 | tpg;
    238 		break;
    239 
    240 	case T10_TRANS_FC:
    241 		t->s_tpgt	= 0x8000 | tpg;
    242 		break;
    243 	}
    244 
    245 	avl_create(&t->s_open_lu, find_lu_by_num, sizeof (t10_lu_impl_t),
    246 	    offsetof(t10_lu_impl_t, l_open_targ_node));
    247 
    248 	(void) pthread_mutex_init(&t->s_mutex, NULL);
    249 	return ((t10_targ_handle_t)t);
    250 }
    251 
    252 void
    253 t10_handle_disable(t10_targ_handle_t tp)
    254 {
    255 	t10_targ_impl_t	*t		= (t10_targ_impl_t *)tp;
    256 	t10_lu_impl_t	*l;
    257 	t10_shutdown_t	s;
    258 	int		lu_per_targ	= 0;
    259 
    260 	(void) pthread_mutex_lock(&t->s_mutex);
    261 	if (avl_numnodes(&t->s_open_lu) != 0) {
    262 		s.t_q = queue_alloc();
    263 		l = avl_first(&t->s_open_lu);
    264 		while (l != NULL) {
    265 
    266 			s.t_lu = l;
    267 			queue_message_set(l->l_common->l_from_transports, 0,
    268 			    msg_shutdown, (void *)&s);
    269 			queue_message_free(queue_message_get(s.t_q));
    270 			lu_per_targ++;
    271 			l = AVL_NEXT(&t->s_open_lu, l);
    272 		}
    273 		queue_prt(mgmtq, Q_STE_NONIO,
    274 		    "SAM%x  Sent %d shutdown requests for %s\n",
    275 		    t->s_targ_num, lu_per_targ, t->s_targ_base);
    276 		queue_free(s.t_q, NULL);
    277 	}
    278 	(void) pthread_mutex_unlock(&t->s_mutex);
    279 }
    280 
    281 int
    282 t10_handle_destroy(t10_targ_handle_t tp, Boolean_t wait)
    283 {
    284 	t10_targ_impl_t	*t		= (t10_targ_impl_t *)tp;
    285 	t10_lu_impl_t	*l;
    286 	t10_cmd_t	*c;
    287 	t10_cmd_t	*c2free;
    288 	int		fast_free	= 0;
    289 
    290 	(void) pthread_mutex_lock(&t->s_mutex);
    291 	if (avl_numnodes(&t->s_open_lu) != 0) {
    292 		while ((l = avl_first(&t->s_open_lu)) != NULL) {
    293 
    294 			(void) pthread_mutex_lock(&l->l_cmd_mutex);
    295 			if (avl_numnodes(&l->l_cmds) != 0) {
    296 				c = avl_first(&l->l_cmds);
    297 				while (c != NULL) {
    298 					c2free = c;
    299 					c = AVL_NEXT(&l->l_cmds, c);
    300 					/*
    301 					 * Remove those commands which
    302 					 * are waiting for a response from
    303 					 * the initiator or have already
    304 					 * been canceled by the transport.
    305 					 * The initiator response won't
    306 					 * arrive since the connection
    307 					 * is shutting down.  If the
    308 					 * backing store is closed, then
    309 					 * all the aio requests are
    310 					 * canceled by libaio, we can
    311 					 * free the t10_cmd in S4 or
    312 					 * S7 state.
    313 					 *
    314 					 * Other commands will be freed as
    315 					 * they are processed by the
    316 					 * transport layer or AIO.
    317 					 */
    318 					if ((c2free->c_state ==
    319 					    T10_Cmd_S5_Wait) ||
    320 					    (c2free->c_state ==
    321 					    T10_Cmd_S6_Freeing_In)) {
    322 						t10_aio_t *a;
    323 
    324 						a = (t10_aio_t *)
    325 						    c2free->c_emul_id;
    326 						if (a != NULL) {
    327 							queue_prt(mgmtq,
    328 							    Q_STE_NONIO,
    329 							    "SAM%x ... "
    330 							    "S5 or S6 Cmd %p, "
    331 							    "errno/ret %d/%d\n",
    332 							    t->s_targ_num,
    333 							    c2free,
    334 							    a->a_aio.aio_errno,
    335 							    /*CSTYLED*/
    336 							    a->a_aio.aio_return);
    337 						}
    338 						fast_free++;
    339 						(void) t10_cmd_state_machine(
    340 						    c2free, T10_Cmd_T8);
    341 					} else if ((c2free->c_state ==
    342 					    T10_Cmd_S4_AIO) ||
    343 					    (c2free->c_state ==
    344 					    T10_Cmd_S7_Freeing_AIO)) {
    345 						t10_aio_t *a;
    346 
    347 						a = (t10_aio_t *)
    348 						    c2free->c_emul_id;
    349 						if (a == NULL) {
    350 							continue;
    351 						} else if (a->a_aio.aio_errno ==
    352 						    ECANCELED) {
    353 							fast_free++;
    354 							/*
    355 							 * Note, using T5 not T8
    356 							 * because S4 + T8 = S7
    357 							 * not S1, S1 is the
    358 							 * desired result.
    359 							 */
    360 							/*CSTYLED*/
    361 							(void) t10_cmd_state_machine(c2free, T10_Cmd_T5);
    362 							/*
    363 							 * Account for this cmd
    364 							 * in aio sema.
    365 							 */
    366 							(void) sema_post(
    367 							    &t10_aio_sema);
    368 						} else {
    369 							queue_prt(mgmtq,
    370 							    Q_STE_NONIO,
    371 							    "SAM%x ... "
    372 							    "S4 or S7 Cmd %p, "
    373 							    "errno/ret %d/%d\n",
    374 							    t->s_targ_num,
    375 							    c2free,
    376 							    a->a_aio.aio_errno,
    377 							    /*CSTYLED*/
    378 							    a->a_aio.aio_return);
    379 						}
    380 					} else if (c2free->c_state ==
    381 					    T10_Cmd_S3_Trans) {
    382 						t10_aio_t *a;
    383 
    384 						a = (t10_aio_t *)
    385 						    c2free->c_emul_id;
    386 						if (a != NULL) {
    387 							queue_prt(mgmtq,
    388 							    Q_STE_NONIO,
    389 							    "SAM%x ... "
    390 							    "S3 Cmd %p, "
    391 							    "errno/ret %d/%d\n",
    392 							    t->s_targ_num,
    393 							    c2free,
    394 							    a->a_aio.aio_errno,
    395 							    /*CSTYLED*/
    396 							    a->a_aio.aio_return);
    397 						}
    398 						fast_free++;
    399 						(void) t10_cmd_state_machine(
    400 						    c2free, T10_Cmd_T8);
    401 					}
    402 				}
    403 				queue_prt(mgmtq, Q_STE_NONIO,
    404 				    "SAM%x  FastFree %d ... "
    405 				    "Waiting for %d cmds to drain\n",
    406 				    t->s_targ_num, fast_free,
    407 				    avl_numnodes(&l->l_cmds));
    408 
    409 				if (avl_numnodes(&l->l_cmds) != 0) {
    410 					l->l_wait_for_drain = True;
    411 					if (wait) {
    412 						while (l->l_wait_for_drain ==
    413 						    True) {
    414 							(void) pthread_cond_wait
    415 							    (&l->l_cmd_cond,
    416 							    &l->l_cmd_mutex);
    417 						}
    418 						assert(
    419 						    avl_numnodes(&l->l_cmds)
    420 						    == 0);
    421 						queue_prt(mgmtq, Q_STE_NONIO,
    422 						    "SAM%x  Commands drained\n",
    423 						    t->s_targ_num);
    424 					} else {
    425 						(void) pthread_mutex_unlock(
    426 						    &l->l_cmd_mutex);
    427 						(void) pthread_mutex_unlock(
    428 						    &t->s_mutex);
    429 						(void) nanosleep(&usec, 0);
    430 						return (1);
    431 					}
    432 				}
    433 			}
    434 			avl_remove(&t->s_open_lu, l);
    435 			avl_destroy(&l->l_cmds);
    436 			(void) pthread_mutex_unlock(&l->l_cmd_mutex);
    437 			free(l);
    438 		}
    439 	}
    440 	avl_destroy(&t->s_open_lu);
    441 	(void) pthread_mutex_unlock(&t->s_mutex);
    442 
    443 	(void) pthread_mutex_destroy(&t->s_mutex);
    444 	free(t->s_targ_base);
    445 	free(t->s_i_name);
    446 	free(t);
    447 	return (0);
    448 }
    449 
    450 /*
    451  * []----
    452  * | t10_cmd_create -- creates a command pointer
    453  * |
    454  * | If an error occurs, a sense condition buffer will be created that can
    455  * | be sent back to the initiator. The only time this should occur is during
    456  * | LU setup and we've run out of resources like not having enough file
    457  * | descriptors to open the backing store. If the cmdp is NULL, then there's
    458  * | not even enough memory to create a command buffer and the transport
    459  * | should shutdown it's connection a cleanly as possible.
    460  * []----
    461  */
    462 Boolean_t
    463 t10_cmd_create(t10_targ_handle_t t, int lun_number, uint8_t *cdb,
    464     size_t cdb_len, transport_t trans_id, t10_cmd_t **cmdp)
    465 {
    466 	t10_cmd_t	*cmd	= NULL;
    467 
    468 	*cmdp = NULL;
    469 	if (t == NULL)
    470 		goto error;
    471 
    472 	if ((cmd = umem_cache_alloc(t10_cmd_cache, UMEM_DEFAULT)) == NULL)
    473 		goto error;
    474 
    475 	bzero(cmd, sizeof (*cmd));
    476 	if ((cmd->c_cdb = malloc(cdb_len)) == NULL)
    477 		goto error;
    478 
    479 	cmd->c_trans_id	= trans_id;
    480 	*cmdp		= cmd;
    481 	if (t10_find_lun((t10_targ_impl_t *)t, lun_number, cmd) == False)
    482 		goto error;
    483 
    484 	(void) pthread_mutex_lock(&cmd->c_lu->l_cmd_mutex);
    485 	avl_add(&cmd->c_lu->l_cmds, (void *)cmd);
    486 	cmd->c_state	= T10_Cmd_S1_Free;
    487 	(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
    488 	bcopy(cdb, cmd->c_cdb, cdb_len);
    489 	cmd->c_cdb_len	= cdb_len;
    490 
    491 	return (True);
    492 
    493 error:
    494 	if (cmd && cmd->c_cdb) {
    495 		free(cmd->c_cdb);
    496 		cmd->c_cdb = NULL;
    497 	}
    498 
    499 	/*
    500 	 * If we haven't set up the argument pointer, then free the memory
    501 	 * that had been allocated to the command.
    502 	 */
    503 	if (*cmdp == NULL)
    504 		umem_cache_free(t10_cmd_cache, cmd);
    505 	return (False);
    506 }
    507 
    508 /*
    509  * []----
    510  * | t10_send_cmd -- send the given command to appropriate LUN emulation
    511  * |
    512  * | NOTE: emul_id is only provided for DATA_OUT commands (write ops)
    513  * | which have multiple phases to complete the request. The emulation
    514  * | module will provide this value when it requests more data to be
    515  * | sent.
    516  * []----
    517  */
    518 /*ARGSUSED*/
    519 Boolean_t
    520 t10_cmd_send(t10_targ_handle_t t, t10_cmd_t *cmd, char *opt_data,
    521     size_t opt_data_len)
    522 {
    523 	if (cmd == NULL)
    524 		return (False);
    525 
    526 	cmd->c_data	= opt_data;
    527 	cmd->c_data_len	= opt_data_len;
    528 
    529 	t10_cmd_shoot_event(cmd, T10_Cmd_T1);
    530 	return (True);
    531 }
    532 
    533 /*ARGSUSED*/
    534 Boolean_t
    535 t10_cmd_data(t10_targ_handle_t t, t10_cmd_t *cmd, size_t offset, char *data,
    536     size_t data_len)
    537 {
    538 	if (cmd == NULL)
    539 		return (False);
    540 	cmd->c_data	= data;
    541 	cmd->c_data_len	= data_len;
    542 	cmd->c_offset	= offset;
    543 
    544 	t10_cmd_shoot_event(cmd, T10_Cmd_T4);
    545 	return (True);
    546 }
    547 
    548 void
    549 t10_cmd_done(t10_cmd_t *cmd)
    550 {
    551 	if (cmd != NULL)
    552 		t10_cmd_shoot_event(cmd, T10_Cmd_T5);
    553 }
    554 
    555 /*
    556  * t10_cmd_state_machine -- State machine for T10 commands
    557  *
    558  *	S1: Free	- State on instantiation, or after successful
    559  *			  completion of command
    560  *	S2: In		- The command is currently being processed
    561  *			  by the lu_runner() thread. Memory associated
    562  *			  with the command must not be freed. Can't
    563  *			  transition directly to Free state from threads
    564  *			  other than lu_runner().
    565  *	S3: Trans	- Command has been handed off to transport layer
    566  *	S4: AIO		- Command has been sent to AIO subsystem for
    567  *			  further processing.
    568  *	S5: Wait	- Waiting for response from Initiator.
    569  *	S6: Freeing_In	- Free command while command in lu_runner.
    570  *	S7: Freeing_AIO - Free command while command is in AIO.
    571  *
    572  * The state transition table is as follows:
    573  *
    574  *	   +----------+---+---+---+---+---+----+
    575  *	   |S1        |S2 |S3 |S4 |S5 |S6 |S7  |
    576  *	---+----------+---+---+---+---+--------+
    577  *	 S1|T4/5/6/8  |T1 | - | - | - | - | -  |
    578  *	---+----------+---+---+---+---+--------+
    579  *	 S2|T5/8      | - |T2 |T3 |T7 |T6 | -  |
    580  *	---+----------+---+---+---+---+--------+
    581  *	 S3|T5/8      |T4 | - | - |T7 |T6 | -  |
    582  *	---+----------+---+---+---+---+--------+
    583  *	 S4|T5        |T4 | - | - | - | - |T6/8|
    584  *	---+----------+---+---+---+---+--------+
    585  *	 S5|T5/8      | - |T4 | - | - |T6 | -  |
    586  *	---+----------+---+---+---+---+--------+
    587  *	 S6|T2/4/5/6/8| - | - | - | - | - |T3  |
    588  *	---+----------+---+---+---+---+--------+
    589  *	 S7|T4/5/8    | - | - | - | - | - |T6  |
    590  *	---+----------+---+---+---+---+--------+
    591  *
    592  * Events definitions:
    593  * -T1: Command has been placed on LU queue for exection.
    594  * -T2: Emulation completed to a point where the transport must
    595  *	take over and send data or CDB response out.
    596  * -T3: Emulation requires data from storage subsystem via asynchronous
    597  *	I/O.
    598  * -T4: One of the following events has caused the transition:
    599  *	- Response from initiator to R2T request.
    600  *	- Transport has data available to complete dataout request from T10.
    601  *	- AIO has completed read/write op.
    602  * -T5: Command complete. Free resources.
    603  * -T6: Cancel command.
    604  * -T7: Transport has sent command to Initiator.
    605  * -T8: Shutting down, cancel or complete as appropriate
    606  */
    607 static t10_cmd_state_t
    608 t10_cmd_state_machine(t10_cmd_t *c, t10_cmd_event_t e)
    609 {
    610 	t10_lu_impl_t	*lu		= c->c_lu;
    611 
    612 	/* ---- Callers must already hold the mutex ---- */
    613 	assert(pthread_mutex_trylock(&lu->l_cmd_mutex) != 0);
    614 
    615 	switch (c->c_state) {
    616 	case T10_Cmd_S1_Free:
    617 		switch (e) {
    618 		case T10_Cmd_T1:
    619 			c->c_state = T10_Cmd_S2_In;
    620 			queue_message_set(c->c_lu->l_common->l_from_transports,
    621 			    0, msg_cmd_send, (void *)c);
    622 			break;
    623 
    624 		case T10_Cmd_T4:
    625 		case T10_Cmd_T5:
    626 		case T10_Cmd_T6: /* warm reset */
    627 		case T10_Cmd_T8: /* shutdown */
    628 			c->c_state = T10_Cmd_S1_Free;
    629 			cmd_common_free(c);
    630 			return (T10_Cmd_S1_Free);
    631 
    632 		default:
    633 			queue_prt(mgmtq, Q_STE_ERRS,
    634 			    "Illegal event %s on %llx\n", event_to_str(e),
    635 			    c->c_trans_id);
    636 			assert(0);
    637 		}
    638 		break;
    639 
    640 	case T10_Cmd_S2_In:
    641 		switch (e) {
    642 		case T10_Cmd_T2:
    643 			c->c_state = T10_Cmd_S3_Trans;
    644 			queue_message_set(c->c_lu->l_to_transport, 0,
    645 			    c->c_msg, (void *)c);
    646 			break;
    647 
    648 		case T10_Cmd_T3:
    649 			c->c_state = T10_Cmd_S4_AIO;
    650 			(void) sema_post(&t10_sema);
    651 			break;
    652 
    653 		case T10_Cmd_T5:
    654 		case T10_Cmd_T8: /* shutdown */
    655 			c->c_state = T10_Cmd_S1_Free;
    656 			cmd_common_free(c);
    657 			return (T10_Cmd_S1_Free);
    658 
    659 		case T10_Cmd_T6:
    660 			c->c_state = T10_Cmd_S6_Freeing_In;
    661 			break;
    662 
    663 		case T10_Cmd_T7:
    664 			c->c_state = T10_Cmd_S5_Wait;
    665 			break;
    666 
    667 		default:
    668 			queue_prt(mgmtq, Q_STE_ERRS,
    669 			    "SAM: Illegal event %s on %llx\n",
    670 			    event_to_str(e), c->c_trans_id);
    671 			assert(0);
    672 		}
    673 		break;
    674 
    675 	case T10_Cmd_S3_Trans:
    676 		switch (e) {
    677 		case T10_Cmd_T4:
    678 			c->c_state = T10_Cmd_S2_In;
    679 			queue_message_set(lu->l_common->l_from_transports, 0,
    680 			    msg_cmd_data_out, (void *)c);
    681 			break;
    682 
    683 		case T10_Cmd_T5:
    684 		case T10_Cmd_T8: /* shutdown */
    685 			c->c_state = T10_Cmd_S1_Free;
    686 			cmd_common_free(c);
    687 			return (T10_Cmd_S1_Free);
    688 
    689 		case T10_Cmd_T6:
    690 			c->c_state = T10_Cmd_S6_Freeing_In;
    691 			break;
    692 
    693 		case T10_Cmd_T7:
    694 			c->c_state = T10_Cmd_S5_Wait;
    695 			break;
    696 
    697 		default:
    698 			queue_prt(mgmtq, Q_STE_ERRS,
    699 			    "Illegal event %s -- %llx\n", event_to_str(e),
    700 			    c->c_trans_id);
    701 			assert(0);
    702 		}
    703 		break;
    704 
    705 	case T10_Cmd_S4_AIO:
    706 		switch (e) {
    707 		case T10_Cmd_T4:
    708 			c->c_state = T10_Cmd_S2_In;
    709 			break;
    710 
    711 		case T10_Cmd_T5:
    712 			c->c_state = T10_Cmd_S1_Free;
    713 			cmd_common_free(c);
    714 			return (T10_Cmd_S1_Free);
    715 
    716 		case T10_Cmd_T6:
    717 		case T10_Cmd_T8: /* shutdown */
    718 			c->c_state = T10_Cmd_S7_Freeing_AIO;
    719 			break;
    720 
    721 		default:
    722 			queue_prt(mgmtq, Q_STE_ERRS,
    723 			    "Illegal event %s -- %llx\n", event_to_str(e),
    724 			    c->c_trans_id);
    725 			assert(0);
    726 		}
    727 		break;
    728 
    729 	case T10_Cmd_S5_Wait:
    730 		switch (e) {
    731 		case T10_Cmd_T4:
    732 			c->c_state = T10_Cmd_S3_Trans;
    733 			break;
    734 
    735 		case T10_Cmd_T5:
    736 		case T10_Cmd_T8: /* shutdown */
    737 			c->c_state = T10_Cmd_S1_Free;
    738 			cmd_common_free(c);
    739 			return (T10_Cmd_S1_Free);
    740 
    741 		case T10_Cmd_T6:
    742 			c->c_state = T10_Cmd_S6_Freeing_In;
    743 			break;
    744 
    745 		default:
    746 			queue_prt(mgmtq, Q_STE_ERRS,
    747 			    "Illegal event %s -- %llx\n", event_to_str(e),
    748 			    c->c_trans_id);
    749 			assert(0);
    750 		}
    751 		break;
    752 
    753 	case T10_Cmd_S6_Freeing_In:
    754 		switch (e) {
    755 		case T10_Cmd_T2:
    756 		case T10_Cmd_T4: /* AIO complete */
    757 		case T10_Cmd_T5: /* command complete */
    758 		case T10_Cmd_T6: /* warm reset */
    759 		case T10_Cmd_T8: /* shutdown */
    760 			c->c_state = T10_Cmd_S1_Free;
    761 			cmd_common_free(c);
    762 			return (T10_Cmd_S1_Free);
    763 
    764 		case T10_Cmd_T3:
    765 			c->c_state = T10_Cmd_S7_Freeing_AIO;
    766 			(void) sema_post(&t10_sema);
    767 			break;
    768 
    769 		default:
    770 			queue_prt(mgmtq, Q_STE_ERRS,
    771 			    "Illegal event %s -- %llx\n", event_to_str(e),
    772 			    c->c_trans_id);
    773 			assert(0);
    774 		}
    775 		break;
    776 
    777 	case T10_Cmd_S7_Freeing_AIO:
    778 		switch (e) {
    779 		case T10_Cmd_T4:	/* AIO complete */
    780 		case T10_Cmd_T5:	/* command complete */
    781 		case T10_Cmd_T8:
    782 			c->c_state = T10_Cmd_S1_Free;
    783 			cmd_common_free(c);
    784 			return (T10_Cmd_S1_Free);
    785 
    786 		case T10_Cmd_T6: /* warm reset */
    787 			queue_prt(mgmtq, Q_GEN_DETAILS,
    788 			    "Event %s in T10_Cmd_S7_Freeing_AIO -- %llx\n",
    789 			    event_to_str(e), c->c_trans_id);
    790 			break;
    791 
    792 		default:
    793 			queue_prt(mgmtq, Q_STE_ERRS,
    794 			    "Illegal event %s -- %llx\n", event_to_str(e),
    795 			    c->c_trans_id);
    796 			assert(0);
    797 		}
    798 		break;
    799 
    800 	default:
    801 		assert(0);
    802 	}
    803 	return (c->c_state);
    804 }
    805 
    806 void
    807 t10_cmd_shoot_event(t10_cmd_t *c, t10_cmd_event_t e)
    808 {
    809 	t10_lu_impl_t	*lu;
    810 
    811 	/*
    812 	 * Since the transport may or may not have called into the T10 layer
    813 	 * to allocate a command it's possible that this will be NULL. Instead
    814 	 * of requiring every caller of this function to first check if the
    815 	 * command pointer is null we'll do the check here.
    816 	 */
    817 	if (c == NULL)
    818 		return;
    819 
    820 	lu = c->c_lu;
    821 
    822 	/*
    823 	 * If t10_cmd_create() fails for some reason other than lack
    824 	 * of memory the extended status will be set for the transport
    825 	 * to send out. There will not be any LU associated with this
    826 	 * command, but the transport will still try to free it.
    827 	 */
    828 	if (!lu) {
    829 		assert(e == T10_Cmd_T5);
    830 		cmd_common_free(c);
    831 		return;
    832 	}
    833 
    834 	(void) pthread_mutex_lock(&lu->l_cmd_mutex);
    835 	(void) t10_cmd_state_machine(c, e);
    836 	(void) pthread_mutex_unlock(&lu->l_cmd_mutex);
    837 }
    838 
    839 /*
    840  * []----
    841  * | t10_task_mgmt -- handle SAM-3 task management needs
    842  * []----
    843  */
    844 /*ARGSUSED*/
    845 Boolean_t
    846 t10_task_mgmt(t10_targ_handle_t t1, TaskOp_t op, int opt_lun, void *tag)
    847 {
    848 	t10_targ_impl_t	*t = (t10_targ_impl_t *)t1;
    849 	t10_lu_impl_t	search;
    850 	t10_lu_impl_t	*lu;
    851 
    852 	switch (op) {
    853 	case InventoryChange:
    854 		(void) pthread_mutex_lock(&t->s_mutex);
    855 		if ((lu = avl_first(&t->s_open_lu)) != NULL) {
    856 			do {
    857 				/*CSTYLED*/
    858 				queue_message_set(lu->l_common->l_from_transports,
    859 				    0, msg_targ_inventory_change, (void *)lu);
    860 			} while ((lu = AVL_NEXT(&t->s_open_lu, lu)) != NULL);
    861 		}
    862 		(void) pthread_mutex_unlock(&t->s_mutex);
    863 		return (True);
    864 
    865 	case ResetTarget:
    866 		(void) pthread_mutex_lock(&t->s_mutex);
    867 		if ((lu = avl_first(&t->s_open_lu)) != NULL) {
    868 			do {
    869 				/*CSTYLED*/
    870 				queue_message_set(lu->l_common->l_from_transports,
    871 				    Q_HIGH, msg_reset_lu, (void *)lu);
    872 			} while ((lu = AVL_NEXT(&t->s_open_lu, lu)) != NULL);
    873 			(void) pthread_mutex_unlock(&t->s_mutex);
    874 			return (True);
    875 		} else {
    876 			(void) pthread_mutex_unlock(&t->s_mutex);
    877 			return (False);
    878 		}
    879 
    880 	case ResetLun:
    881 		search.l_targ_lun = opt_lun;
    882 		(void) pthread_mutex_lock(&t->s_mutex);
    883 		if ((lu = avl_find(&t->s_open_lu, (void *)&search, NULL)) !=
    884 		    NULL) {
    885 			queue_message_set(lu->l_common->l_from_transports,
    886 			    Q_HIGH, msg_reset_lu, (void *)lu);
    887 			(void) pthread_mutex_unlock(&t->s_mutex);
    888 			return (True);
    889 		} else {
    890 			(void) pthread_mutex_unlock(&t->s_mutex);
    891 			return (False);
    892 		}
    893 		break;
    894 
    895 	case CapacityChange:
    896 		search.l_targ_lun = opt_lun;
    897 		(void) pthread_mutex_lock(&t->s_mutex);
    898 		if ((lu = avl_find(&t->s_open_lu, (void *)&search, NULL)) !=
    899 		    NULL) {
    900 			queue_message_set(lu->l_common->l_from_transports,
    901 			    Q_HIGH, msg_lu_capacity_change,
    902 			    (void *)(uintptr_t)opt_lun);
    903 			(void) pthread_mutex_unlock(&t->s_mutex);
    904 			return (True);
    905 		} else {
    906 			(void) pthread_mutex_unlock(&t->s_mutex);
    907 			return (False);
    908 		}
    909 		break;
    910 
    911 	default:
    912 		return (False);
    913 	}
    914 }
    915 
    916 
    917 /*
    918  * []----
    919  * | t10_targ_stat -- Return stats on each LU associated with target.
    920  * []----
    921  */
    922 void
    923 t10_targ_stat(t10_targ_handle_t t1, char **buf)
    924 {
    925 	t10_targ_impl_t	*t = (t10_targ_impl_t *)t1;
    926 	t10_lu_impl_t	*itl;
    927 	char		lb[32];
    928 	char		*p;
    929 
    930 	/*
    931 	 * It's possible for the management interfaces to request stats
    932 	 * even though a connection is not up and running.
    933 	 */
    934 	if (t == NULL)
    935 		return;
    936 
    937 	(void) pthread_mutex_lock(&t->s_mutex);
    938 	itl = avl_first(&t->s_open_lu);
    939 	while (itl) {
    940 		tgt_buf_add_tag(buf, XML_ELEMENT_LUN, Tag_Start);
    941 		(void) snprintf(lb, sizeof (lb), "%d", itl->l_common->l_num);
    942 		tgt_buf_add_tag(buf, lb, Tag_String);
    943 
    944 		(void) snprintf(lb, sizeof (lb), "%lld", itl->l_cmds_read);
    945 		tgt_buf_add(buf, XML_ELEMENT_READCMDS, lb);
    946 		(void) snprintf(lb, sizeof (lb), "%lld", itl->l_cmds_write);
    947 		tgt_buf_add(buf, XML_ELEMENT_WRITECMDS, lb);
    948 		(void) snprintf(lb, sizeof (lb), "%lld", itl->l_sects_read);
    949 		tgt_buf_add(buf, XML_ELEMENT_READBLKS, lb);
    950 		(void) snprintf(lb, sizeof (lb), "%lld", itl->l_sects_write);
    951 		tgt_buf_add(buf, XML_ELEMENT_WRITEBLKS, lb);
    952 
    953 		switch (itl->l_common->l_state) {
    954 		case lu_online:
    955 			p = TGT_STATUS_ONLINE;
    956 			break;
    957 		case lu_offline:
    958 			p = TGT_STATUS_OFFLINE;
    959 			break;
    960 		case lu_errored:
    961 			p = TGT_STATUS_ERRORED;
    962 			break;
    963 		}
    964 		tgt_buf_add(buf, XML_ELEMENT_STATUS, p);
    965 
    966 		tgt_buf_add_tag(buf, XML_ELEMENT_LUN, Tag_End);
    967 		itl = AVL_NEXT(&t->s_open_lu, itl);
    968 	}
    969 	(void) pthread_mutex_unlock(&t->s_mutex);
    970 }
    971 
    972 /*
    973  * []----
    974  * | t10_thick_provision -- fill the backing store with real blocks
    975  * |
    976  * | The backing store is initially created as a hole-y file. The only
    977  * | thing wrong with leaving the files hole-y is that if a system
    978  * | administrator over provisions the storage at some point a client
    979  * | will attempt to write to a block and receive an error unless the
    980  * | administrator adds more backing store before that event. Now, depending
    981  * | on the client a write error isn't fatal. However, for file systems
    982  * | like UFS and ZFS, they can not currently deal with getting a write
    983  * | error when it's their metadata and panic. That's not good. The concept
    984  * | of "Thin Provisioning" is relatively new so we'll normally preallocate
    985  * | the space, but have the option of doing the "Thin Provisioning".
    986  * []----
    987  */
    988 Boolean_t
    989 t10_thick_provision(char *target, int lun, target_queue_t *q)
    990 {
    991 	t10_targ_handle_t	t;
    992 	t10_cmd_t		*cmd		= NULL;
    993 	uint8_t			cdb[16];	/* ---- fake buffer ---- */
    994 	diskaddr_t		offset		= 0;
    995 	size_t			size;
    996 	size_t			sync_size;
    997 	msg_t			*m		= NULL;
    998 	target_queue_t		*rq		= NULL;
    999 	char			path[MAXPATHLEN];
   1000 	char			*local_name;
   1001 	tgt_node_t		*n1;
   1002 	Boolean_t		rval		= False;
   1003 	struct statvfs		fs;
   1004 
   1005 	/*
   1006 	 * To guarantee that everything has been setup correctly
   1007 	 * we'll just use the standard interfaces. Otherwise we'd need
   1008 	 * to duplicate the code and therefore offer the chance of
   1009 	 * having something fixed/change in one location that isn't
   1010 	 * in another. Obvious right?
   1011 	 */
   1012 	if ((t = t10_handle_create(target, "", 0, 0, 0, q, NULL)) == NULL) {
   1013 		queue_prt(mgmtq, Q_STE_ERRS, "STE%x  Failed to create handle\n",
   1014 		    lun);
   1015 		return (False);
   1016 	}
   1017 	if (t10_cmd_create(t, lun, cdb, sizeof (cdb), 0, &cmd) == False) {
   1018 		queue_prt(mgmtq, Q_STE_ERRS, "STE%x  Failed to create cmd\n",
   1019 		    lun);
   1020 		goto error;
   1021 	}
   1022 
   1023 	/*
   1024 	 * Attempt to see if there is enough space currently for the LU.
   1025 	 * The initialization might still fail with out of space because someone
   1026 	 * else is consuming space while the initialization is occuring.
   1027 	 * Nothing we can do about that.
   1028 	 */
   1029 	if (fstatvfs(cmd->c_lu->l_common->l_fd, &fs) != 0) {
   1030 		queue_prt(mgmtq, Q_STE_ERRS, "STE%x  statvfs failed for LU\n",
   1031 		    lun);
   1032 		goto error;
   1033 	} else if ((fs.f_frsize * fs.f_bfree) < cmd->c_lu->l_common->l_size) {
   1034 		queue_prt(mgmtq, Q_STE_ERRS, "STE%x  Not enough space for LU\n",
   1035 		    lun);
   1036 		goto error;
   1037 	}
   1038 
   1039 	if (fallocate(cmd->c_lu->l_common->l_fd, cmd->c_lu->l_common->l_size) ==
   1040 	    False) {
   1041 		/*
   1042 		 * The lu_runner will use this buffer to copy data.
   1043 		 */
   1044 		sync_size = 1024 * 1024;
   1045 		if ((cmd->c_data = malloc(sync_size)) == NULL)
   1046 			goto error;
   1047 
   1048 		while ((offset < cmd->c_lu->l_common->l_size) && (rq == NULL)) {
   1049 			size = min(cmd->c_lu->l_common->l_size - offset,
   1050 			    sync_size);
   1051 			cmd->c_offset	= offset;
   1052 			cmd->c_data_len	= size;
   1053 			/*CSTYLED*/
   1054 			queue_message_set(cmd->c_lu->l_common->l_from_transports, 0,
   1055 			    msg_thick_provo, (void *)cmd);
   1056 			while ((m = queue_message_get(q)) != NULL) {
   1057 				switch (m->msg_type) {
   1058 				case msg_thick_provo:
   1059 					if ((int)(intptr_t)m->msg_data != 0) {
   1060 
   1061 						/*
   1062 						 * An error occurred during
   1063 						 * initialization which mean we
   1064 						 * need to remove this target.
   1065 						 */
   1066 						queue_prt(mgmtq, Q_STE_ERRS,
   1067 						    "STE%x  received data "
   1068 						    "error at 0x%llx\n", lun,
   1069 						    offset);
   1070 						goto error;
   1071 					}
   1072 					break;
   1073 
   1074 				case msg_shutdown:
   1075 					queue_prt(mgmtq, Q_STE_NONIO,
   1076 					    "---- Thick provo got shutdown\n");
   1077 					rq = (target_queue_t *)m->msg_data;
   1078 					queue_message_free(m);
   1079 					continue; /* don't use break */
   1080 
   1081 				default:
   1082 					assert(0);
   1083 				}
   1084 				break;
   1085 			}
   1086 			queue_message_free(m);
   1087 			offset		+= size;
   1088 		}
   1089 	} else {
   1090 		queue_prt(mgmtq, Q_STE_NONIO, "STE%x  fallocate worked\n",
   1091 		    lun);
   1092 	}
   1093 
   1094 	/*
   1095 	 * A forced shutdown is still considered a successful completion.
   1096 	 * Write errors and malloc failures constitute a failure.
   1097 	 */
   1098 	rval = True;
   1099 
   1100 	/* ---- Completed successfully ---- */
   1101 	if (rq == NULL) {
   1102 
   1103 		/*
   1104 		 * Now that the initialization is complete, update the params
   1105 		 * file to indicate the status is online. Once done, send a
   1106 		 * message to the LU thread indicating same.
   1107 		 */
   1108 		(void) snprintf(path, sizeof (path), "%s/%s/%s%d",
   1109 		    target_basedir, cmd->c_lu->l_targ->s_targ_base, PARAMBASE,
   1110 		    lun);
   1111 
   1112 		cmd->c_lu->l_common->l_state = lu_online;
   1113 		if ((n1 = tgt_node_find(cmd->c_lu->l_common->l_root,
   1114 		    XML_ELEMENT_STATUS)) == NULL) {
   1115 			queue_prt(mgmtq, Q_STE_ERRS,
   1116 			    "STE%x  couldn't find <status>\n", lun);
   1117 			goto error;
   1118 		}
   1119 
   1120 		if (tgt_update_value_str(n1, XML_ELEMENT_STATUS,
   1121 		    TGT_STATUS_ONLINE) == False) {
   1122 			queue_prt(mgmtq, Q_STE_ERRS,
   1123 			    "STE%x  Could update <status> to online\n", lun);
   1124 			goto error;
   1125 		}
   1126 
   1127 		local_name = get_local_name(cmd->c_lu->l_targ->s_targ_base);
   1128 		if (local_name == NULL)
   1129 			goto error;
   1130 
   1131 		(void) mgmt_param_save2scf(cmd->c_lu->l_common->l_root,
   1132 		    local_name, lun);
   1133 		free(local_name);
   1134 		queue_message_set(cmd->c_lu->l_common->l_from_transports, 0,
   1135 		    msg_lu_online, 0);
   1136 	}
   1137 
   1138 error:
   1139 	if (cmd != NULL) {
   1140 		if (cmd->c_data != NULL)
   1141 			free(cmd->c_data);
   1142 		t10_cmd_shoot_event(cmd, T10_Cmd_T5);
   1143 	}
   1144 	if (t != NULL) {
   1145 		t10_handle_disable(t);
   1146 		(void) t10_handle_destroy(t, True);
   1147 	}
   1148 	if (rq != NULL) {
   1149 		queue_message_set(rq, 0, msg_shutdown_rsp, 0);
   1150 	}
   1151 
   1152 	return (rval);
   1153 }
   1154 
   1155 /*
   1156  * []------------------------------------------------------------------[]
   1157  * | Methods called by emulation modules to interface with SAM-3	|
   1158  * []------------------------------------------------------------------[]
   1159  */
   1160 
   1161 /*
   1162  * trans_cmd_dup -- Duplicate a T10 command buffer
   1163  *
   1164  * During read operations with transports that restrict transfer sizes the
   1165  * emulation code has two options.
   1166  *    (1) It could transfer a chunk of data and wait until the
   1167  *        transport has sent that out. Notification coming through
   1168  *        the callback mechanism. If the command structure is not
   1169  *        duplicated it would need to wait since the command structure
   1170  *        contains the data pointer and offset values which the transport
   1171  *        needs.
   1172  *    (2) Use this routine to duplicate the command structure such
   1173  *        that the emulation layer can send all of the data in chunks
   1174  *        without waiting.
   1175  * For obvious performance reasons it's best to send all of the chunks
   1176  * without waiting.
   1177  *
   1178  * It's expected that the emulation layer will not call this routine for the
   1179  * last outgoing packet since the command structure will not be of futher
   1180  * use.
   1181  */
   1182 t10_cmd_t *
   1183 trans_cmd_dup(t10_cmd_t *cmd)
   1184 {
   1185 	t10_cmd_t	*c;
   1186 
   1187 	if ((c = umem_cache_alloc(t10_cmd_cache, UMEM_DEFAULT)) == NULL)
   1188 		return (NULL);
   1189 	bcopy(cmd, c, sizeof (*c));
   1190 	c->c_cmd_next = NULL;
   1191 	if ((c->c_cdb = (uint8_t *)malloc(c->c_cdb_len)) == NULL) {
   1192 		umem_cache_free(t10_cmd_cache, c);
   1193 		return (NULL);
   1194 	}
   1195 	bcopy(cmd->c_cdb, c->c_cdb, c->c_cdb_len);
   1196 
   1197 	(void) pthread_mutex_lock(&cmd->c_lu->l_cmd_mutex);
   1198 	c->c_state = T10_Cmd_S2_In;
   1199 	avl_add(&c->c_lu->l_cmds, (void *)c);
   1200 	(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
   1201 
   1202 	return (c);
   1203 }
   1204 
   1205 /*
   1206  * []----
   1207  * | trans_send_datain -- send data to transport
   1208  * |
   1209  * | NOTES:
   1210  * | (1) offset is only valid when a transport has set max_out to a non-zero
   1211  * |     value.
   1212  * | (2) The emulation code must free the memory, if it was allocated, when
   1213  * |     the transport is finished with it. The callback routine is used
   1214  * |     to provide the emulation code the notification. The callback will
   1215  * |     not be run on the same thread as the emulation code so appropriate
   1216  * |     locking may be required by the emulation code.
   1217  * | (3) If the boolean 'last' is True it means that the transport can
   1218  * |     assume the data out is finished with a CMD_SUCCESS and no futher
   1219  * |     communication from the emulation layer will occur.
   1220  * []----
   1221  */
   1222 Boolean_t
   1223 trans_send_datain(t10_cmd_t *c, char *data, size_t data_len, size_t offset,
   1224     void (*callback)(emul_handle_t e), Boolean_t last, emul_handle_t id)
   1225 {
   1226 #ifdef FULL_DEBUG
   1227 	queue_prt(mgmtq, Q_STE_IO,
   1228 	    "SAM%x  LUN%d DataIn 0x%x, offset 0x%x, Last %s\n",
   1229 	    c->c_lu->l_targ->s_targ_num, c->c_lu->l_common->l_num,
   1230 	    data_len, offset, last == True ? "true" : "false");
   1231 #endif
   1232 
   1233 	c->c_emul_complete	= callback;
   1234 	c->c_emul_id		= id;
   1235 	c->c_data		= data;
   1236 	c->c_data_len		= data_len;
   1237 	c->c_offset		= offset;
   1238 	c->c_last		= last;
   1239 	c->c_msg		= msg_cmd_data_in;
   1240 
   1241 	t10_cmd_shoot_event(c, T10_Cmd_T2);
   1242 	return (True);
   1243 }
   1244 
   1245 /*
   1246  * []----
   1247  * | trans_rqst_dataout -- Request data from transport for command
   1248  * |
   1249  * | If the transport has indicated that data is immediately available,
   1250  * | which is common for iSCSI, then we'll copy that data into the buffer
   1251  * | and call the emulation modules datain function directly.
   1252  * []----
   1253  */
   1254 Boolean_t
   1255 trans_rqst_dataout(t10_cmd_t *cmd, char *data, size_t data_len, size_t offset,
   1256     emul_cmd_t emul_id, void (*callback)(emul_handle_t e))
   1257 {
   1258 	size_t	max_xfer;
   1259 
   1260 	cmd->c_emul_complete	= callback;
   1261 	cmd->c_emul_id		= emul_id;
   1262 
   1263 	/*
   1264 	 * Transport supports immediate data on writes. Currently
   1265 	 * on the iSCSI protocol has this feature.
   1266 	 * XXX Should all of this be done in the transport?
   1267 	 */
   1268 	if (cmd->c_data_len) {
   1269 #ifdef FULL_DEBUG
   1270 		queue_prt(mgmtq, Q_STE_IO,
   1271 		    "SAM%x  LUN%d DataOut rqst w/ immed, data_len 0x%x\n",
   1272 		    cmd->c_lu->l_targ->s_targ_num,
   1273 		    cmd->c_lu->l_common->l_num, data_len);
   1274 #endif
   1275 		if (cmd->c_data == NULL) {
   1276 
   1277 			/*
   1278 			 * When there's data available, but no buffer it
   1279 			 * means the transport has decided to leave the
   1280 			 * data on the socket and will read it in
   1281 			 * when called.
   1282 			 */
   1283 			max_xfer = data_len;
   1284 			assert(cmd->c_lu->l_targ->s_dataout_cb != NULL);
   1285 			(*cmd->c_lu->l_targ->s_dataout_cb)(cmd, data,
   1286 			    &max_xfer);
   1287 
   1288 		} else {
   1289 
   1290 			/*
   1291 			 * The data is already in the command buffer so
   1292 			 * we need to copy it out.
   1293 			 */
   1294 			max_xfer = MIN(cmd->c_data_len - cmd->c_resid,
   1295 			    data_len);
   1296 			bcopy(cmd->c_data + cmd->c_resid, data, max_xfer);
   1297 			cmd->c_resid = cmd->c_data_len - max_xfer;
   1298 
   1299 			/*
   1300 			 * It's expected since the transport allocated
   1301 			 * the space, this routine will free the memory
   1302 			 * instead.
   1303 			 */
   1304 			(*cmd->c_lu->l_targ->s_dataout_cb)(cmd, data,
   1305 			    &max_xfer);
   1306 			cmd->c_data = NULL;
   1307 
   1308 		}
   1309 		cmd->c_data_len = 0;
   1310 		(*cmd->c_lu->l_data)(cmd, emul_id, offset, data, max_xfer);
   1311 		return (True);
   1312 	}
   1313 
   1314 #ifdef FULL_DEBUG
   1315 	queue_prt(mgmtq, Q_STE_IO,
   1316 	    "SAM%x  LUN%d DataOut Rqst data_len 0x%x\n",
   1317 	    cmd->c_lu->l_targ->s_targ_num,
   1318 	    cmd->c_lu->l_common->l_num, data_len);
   1319 #endif
   1320 
   1321 	assert(cmd->c_data == NULL);
   1322 
   1323 	cmd->c_data	= data;
   1324 	cmd->c_data_len	= data_len;
   1325 	cmd->c_offset	= offset;
   1326 	cmd->c_resid	= 0;
   1327 
   1328 	/*
   1329 	 * Short cut. There's no reason to call the transport if the
   1330 	 * emulation code hasn't requested any data. If that's the
   1331 	 * case just call the emulation codes data function.
   1332 	 */
   1333 	if (data_len == 0)
   1334 		(*cmd->c_lu->l_data)(cmd, emul_id, offset, data, max_xfer);
   1335 	else {
   1336 		cmd->c_msg = msg_cmd_data_rqst;
   1337 		t10_cmd_shoot_event(cmd, T10_Cmd_T2);
   1338 	}
   1339 	return (True);
   1340 }
   1341 
   1342 /*
   1343  * []----
   1344  * | trans_send_complete -- notify transport command has finished.
   1345  * |
   1346  * | This routine is called either for when the emulation has completed
   1347  * | a command which doesn't have a data in phase so we can't use the 'last'
   1348  * | flag or there's been an error.
   1349  * | The sense data is expected to be created by calling spc_create_sense(),
   1350  * | the memory for that sense data will be freed when the transport calls
   1351  * | t10_destroy_cmd().
   1352  * |
   1353  * | NOTE [1]: If the t10_status equals STATUS_BUSY the command queue for this
   1354  * | ITL will be examined. If there are commands in progress the status will
   1355  * | be changed to STATUS_QFULL
   1356  * |
   1357  * | NOTE [2]: Do not access 'cmd' after calling this function. The transport
   1358  * | may receive the command, act on it, and then call
   1359  * | t10_cmd_shoot_state(cmd, T10_Cmd_T5) before this function returns
   1360  * | thereby allowing 'cmd' to be freed and the space reallocated.
   1361  * []----
   1362  */
   1363 void
   1364 trans_send_complete(t10_cmd_t *cmd, int t10_status)
   1365 {
   1366 #ifdef FULL_DEBUG
   1367 	struct scsi_extended_sense	e;
   1368 #endif
   1369 
   1370 	(void) pthread_mutex_lock(&cmd->c_lu->l_cmd_mutex);
   1371 	/*
   1372 	 * XXX Get the exact chapter and verse from the T10 documents.
   1373 	 * translate a STATUS_BUSY to STATUS_QFULL if there are outstanding
   1374 	 * commands in the queue.
   1375 	 */
   1376 	if ((t10_status == STATUS_BUSY) &&
   1377 	    (avl_numnodes(&cmd->c_lu->l_cmds) != 0)) {
   1378 		t10_status	= STATUS_QFULL;
   1379 	}
   1380 	(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
   1381 
   1382 	cmd->c_cmd_status	= t10_status;
   1383 	cmd->c_last		= True;
   1384 	cmd->c_data_len		= 0;
   1385 	cmd->c_data		= 0;
   1386 	cmd->c_msg		= msg_cmd_cmplt;
   1387 
   1388 #ifdef FULL_DEBUG
   1389 	if (t10_status != STATUS_GOOD) {
   1390 		if (cmd->c_cmd_sense != NULL) {
   1391 			bcopy(&cmd->c_cmd_sense[2], &e, sizeof (e));
   1392 			queue_prt(mgmtq, Q_STE_ERRS,
   1393 			    "SAM%x  LUN%d key_sense=0x%x, "
   1394 			    "ASC=0x%x, ASCQ=0x%x\n",
   1395 			    cmd->c_lu->l_targ->s_targ_num,
   1396 			    cmd->c_lu->l_common->l_num,
   1397 			    e.es_key, e.es_add_code, e.es_qual_code);
   1398 		} else {
   1399 			queue_prt(mgmtq, Q_STE_ERRS,
   1400 			    "SAM%x  LUN%d key_sense=0x%x\n",
   1401 			    cmd->c_lu->l_targ->s_targ_num,
   1402 			    cmd->c_lu->l_common->l_num, t10_status);
   1403 		}
   1404 	}
   1405 #endif
   1406 
   1407 	t10_cmd_shoot_event(cmd, T10_Cmd_T2);
   1408 }
   1409 
   1410 void
   1411 trans_aiowrite(t10_cmd_t *cmd, char *data, size_t data_len, off_t offset,
   1412     t10_aio_t *taio)
   1413 {
   1414 	taio->a_cmd = cmd;
   1415 
   1416 	(void) sema_wait(&t10_aio_sema);
   1417 	(void) pthread_mutex_lock(&cmd->c_lu->l_cmd_mutex);
   1418 	if (aiowrite(cmd->c_lu->l_common->l_fd, data, data_len, offset, 0,
   1419 	    &taio->a_aio) == -1) {
   1420 		(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
   1421 		(void) sema_post(&t10_aio_sema);
   1422 		taio->a_aio.aio_return = -1;
   1423 		(*taio->a_aio_cmplt)(taio->a_id);
   1424 	} else {
   1425 		(void) t10_cmd_state_machine(cmd, T10_Cmd_T3);
   1426 		(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
   1427 	}
   1428 }
   1429 
   1430 void
   1431 trans_aioread(t10_cmd_t *cmd, char *data, size_t data_len, off_t offset,
   1432     t10_aio_t *taio)
   1433 {
   1434 	taio->a_cmd = cmd;
   1435 
   1436 	(void) sema_wait(&t10_aio_sema);
   1437 	(void) pthread_mutex_lock(&cmd->c_lu->l_cmd_mutex);
   1438 	if (aioread(cmd->c_lu->l_common->l_fd, data, data_len, offset, 0,
   1439 	    &taio->a_aio) == -1) {
   1440 		(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
   1441 		(void) sema_post(&t10_aio_sema);
   1442 		taio->a_aio.aio_return = -1;
   1443 		(*taio->a_aio_cmplt)(taio->a_id);
   1444 	} else {
   1445 		(void) t10_cmd_state_machine(cmd, T10_Cmd_T3);
   1446 		(void) pthread_mutex_unlock(&cmd->c_lu->l_cmd_mutex);
   1447 	}
   1448 }
   1449 
   1450 /*
   1451  * []----
   1452  * | trans_params_area -- return dtype params using a command pointer
   1453  * |
   1454  * | Lock down the ITL structure from change so that we can cleanly access
   1455  * | the params area. This is needed to deal with the transport closing
   1456  * | a connection while commands are in flight. When those commands finish
   1457  * | cleanup work needs to be done. Yet, the logical unit common area
   1458  * | can already be released since it doesn't know there's something to wait
   1459  * | for.
   1460  * []----
   1461  */
   1462 void *
   1463 trans_params_area(t10_cmd_t *cmd)
   1464 {
   1465 	void	*p	= NULL;
   1466 
   1467 	(void) pthread_mutex_lock(&cmd->c_lu->l_mutex);
   1468 	if (cmd->c_lu->l_common != NULL)
   1469 		p = cmd->c_lu->l_common->l_dtype_params;
   1470 	(void) pthread_mutex_unlock(&cmd->c_lu->l_mutex);
   1471 	return (p);
   1472 }
   1473 
   1474 /*
   1475  * []------------------------------------------------------------------[]
   1476  * | Support routines for Routing and Task Management			|
   1477  * []------------------------------------------------------------------[]
   1478  */
   1479 
   1480 /*
   1481  * []----
   1482  * | t10_find_lun -- Locate a per target LUN structure
   1483  * |
   1484  * | Finds per I_T_L structure. If this is the first time that this structure
   1485  * | has been accessed we allocate the structure and add it to the global
   1486  * | LUN structure. If that structure has never been accessed before it is
   1487  * | created along with a thread to handle the queue.
   1488  * []----
   1489  */
   1490 /*ARGSUSED*/
   1491 static Boolean_t
   1492 t10_find_lun(t10_targ_impl_t *t, int lun, t10_cmd_t *cmd)
   1493 {
   1494 	t10_lu_impl_t		*l		= NULL;
   1495 	t10_lu_impl_t		search;
   1496 	avl_index_t		wc		= 0; /* where common */
   1497 	avl_index_t		wt		= 0; /* where target */
   1498 	char			*guid		= NULL;
   1499 	char			*str;
   1500 	char			*dataset	= NULL;
   1501 	char			*local_name	= NULL;
   1502 	t10_lu_common_t		lc;
   1503 	t10_lu_common_t		*common		= NULL;
   1504 	tgt_node_t		*n		= NULL;
   1505 	tgt_node_t		*n1;
   1506 	tgt_node_t		*targ;
   1507 	tgt_node_t		*ll;
   1508 	char			path[MAXPATHLEN];
   1509 	Boolean_t		okay_to_free	= True;
   1510 
   1511 	bzero(&lc, sizeof (lc));
   1512 
   1513 	/*
   1514 	 * Only l_num is used by the AVL search routines so that's
   1515 	 * the only thing we'll set.
   1516 	 */
   1517 	search.l_targ_lun = lun;
   1518 
   1519 	(void) pthread_mutex_lock(&t->s_mutex);
   1520 	if ((l = avl_find(&t->s_open_lu, (void *)&search, &wt)) != NULL) {
   1521 
   1522 		/*
   1523 		 * This should be the normal fast path. At some point it
   1524 		 * might be good to look at optimizing this even more.
   1525 		 * If we know for example that the LUN numbers are sequential
   1526 		 * and there's fewer than 64 an array of pointers would be
   1527 		 * even faster than an AVL tree and not take up to much space.
   1528 		 */
   1529 		cmd->c_lu = l;
   1530 		(void) pthread_mutex_unlock(&t->s_mutex);
   1531 		return (True);
   1532 	}
   1533 	(void) pthread_mutex_unlock(&t->s_mutex);
   1534 
   1535 	/*
   1536 	 * First access for this I_T_L so we need to allocate space for it.
   1537 	 */
   1538 	if ((l = calloc(1, sizeof (*l))) == NULL) {
   1539 		cmd->c_cmd_status = STATUS_CHECK;
   1540 		spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1541 		return (False);
   1542 	}
   1543 
   1544 	/*
   1545 	 * Initialize the various local fields. Certain fields will not be
   1546 	 * initialized until we've got the common LUN pointer.
   1547 	 */
   1548 	(void) pthread_mutex_init(&l->l_cmd_mutex, NULL);
   1549 	(void) pthread_mutex_init(&l->l_mutex, NULL);
   1550 	(void) pthread_cond_init(&l->l_cmd_cond, NULL);
   1551 	avl_create(&l->l_cmds, find_cmd_by_addr, sizeof (t10_cmd_t),
   1552 	    offsetof(t10_cmd_t, c_cmd_avl));
   1553 
   1554 	l->l_wait_for_drain	= False;
   1555 	l->l_to_transport	= t->s_to_transport;
   1556 	l->l_targ		= t;
   1557 	l->l_targ_lun		= lun;
   1558 
   1559 	targ = NULL;
   1560 
   1561 	while ((targ = tgt_node_next_child(targets_config, XML_ELEMENT_TARG,
   1562 	    targ)) != NULL) {
   1563 		if ((tgt_find_value_str(targ, XML_ELEMENT_INAME, &str) ==
   1564 		    True) && (strcmp(str, t->s_targ_base) == 0)) {
   1565 			local_name = strdup(targ->x_value);
   1566 			free(str);
   1567 			break;
   1568 		} else if (str) {
   1569 			free(str);
   1570 			str = NULL;
   1571 		}
   1572 	}
   1573 	if (local_name == NULL)
   1574 		goto error;
   1575 
   1576 	if ((ll = tgt_node_next(targ, XML_ELEMENT_LUNLIST, NULL)) == NULL)
   1577 		goto error;
   1578 	n = NULL;
   1579 	while ((n = tgt_node_next(ll, XML_ELEMENT_LUN, n)) != NULL) {
   1580 		if (strtol(n->x_value, NULL, 0) == lun)
   1581 			break;
   1582 	}
   1583 	if (n == NULL) {
   1584 		spc_sense_create(cmd, KEY_ILLEGAL_REQUEST, 0);
   1585 		/* ---- ACCESS DENIED - INVALID LU IDENTIFIER ---- */
   1586 		spc_sense_ascq(cmd, 0x20, 0x9);
   1587 		goto error;
   1588 	}
   1589 
   1590 	(void) pthread_mutex_lock(&lu_list_mutex);
   1591 
   1592 	if (tgt_find_value_str(n, XML_ELEMENT_GUID, &guid) == False) {
   1593 		/*
   1594 		 * Set the targ variable back to NULL to indicate that we don't
   1595 		 * have an incore copy of the information. If the guid is 0,
   1596 		 * we'll update that value and update the ZFS property if targ
   1597 		 * is not NULL, otherwise will update parameter file.
   1598 		 */
   1599 		targ = NULL;
   1600 
   1601 		/*
   1602 		 * To locate the common LUN structure we need to find the GUID
   1603 		 * for this LUN. That's the only parsing this section of code
   1604 		 * will do to the params file.
   1605 		 */
   1606 
   1607 		if (mgmt_get_param(&n, local_name, lun) == False) {
   1608 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1609 			/* --- LUN no longer exists --- */
   1610 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1611 			goto error;
   1612 		}
   1613 		okay_to_free = True;
   1614 
   1615 		if (tgt_find_value_str(n, XML_ELEMENT_GUID, &guid) == False) {
   1616 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1617 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1618 			goto error;
   1619 		}
   1620 
   1621 	} else
   1622 		okay_to_free = False;
   1623 
   1624 	if ((strcmp(guid, "0") == 0) || (strcmp(guid, "0x0") == 0)) {
   1625 		free(guid);
   1626 		/*
   1627 		 * Create the GUID with NAA IEEE Registered Extended
   1628 		 * designator format.
   1629 		 */
   1630 		if (util_create_guid(&guid, SPC_INQUIRY_ID_TYPE_NAA) == False) {
   1631 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1632 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1633 			goto error;
   1634 		}
   1635 		if ((n1 = tgt_node_find(n, XML_ELEMENT_GUID)) == NULL) {
   1636 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1637 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1638 			goto error;
   1639 		}
   1640 		if (tgt_update_value_str(n1, XML_ELEMENT_GUID, guid) == False) {
   1641 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1642 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1643 			goto error;
   1644 		}
   1645 		if (targ != NULL) {
   1646 			/*
   1647 			 * Get the dataset for this shareiscsi target
   1648 			 */
   1649 			if (tgt_find_value_str(targ, XML_ELEMENT_ALIAS,
   1650 			    &dataset) == False) {
   1651 				(void) pthread_mutex_unlock(&lu_list_mutex);
   1652 				goto error;
   1653 			}
   1654 
   1655 			/*
   1656 			 * Set the ZFS persisted shareiscsi options
   1657 			 */
   1658 			if (put_zfs_shareiscsi(dataset, targ) != ERR_SUCCESS) {
   1659 				(void) pthread_mutex_unlock(&lu_list_mutex);
   1660 				goto error;
   1661 			}
   1662 
   1663 			free(dataset);
   1664 			dataset = NULL;
   1665 
   1666 		} else if (mgmt_param_save2scf(n, local_name, lun) == False) {
   1667 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1668 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1669 			goto error;
   1670 		}
   1671 	}
   1672 
   1673 	if (tgt_xml_decode(guid, &lc.l_guid, &lc.l_guid_len) == False) {
   1674 		(void) pthread_mutex_unlock(&lu_list_mutex);
   1675 		spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1676 		goto error;
   1677 	}
   1678 
   1679 	/*
   1680 	 * See if the common LUN for this GUID already exists.
   1681 	 */
   1682 	wc = 0;
   1683 	if ((common = avl_find(&lu_list, (void *)&lc, &wc)) == NULL) {
   1684 
   1685 		/*
   1686 		 * The GUID wasn't found, so create a new LUN structure
   1687 		 * and thread.
   1688 		 */
   1689 		if ((common = calloc(1, sizeof (*common))) == NULL) {
   1690 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1691 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1692 			goto error;
   1693 		}
   1694 
   1695 		common->l_from_transports = queue_alloc();
   1696 		common->l_num		= lun;
   1697 		common->l_internal_num	= lu_id++;
   1698 		common->l_guid		= lc.l_guid;
   1699 		common->l_guid_len	= lc.l_guid_len;
   1700 		common->l_fd		= -1; /* not open yet */
   1701 		common->l_mmap		= MAP_FAILED;
   1702 		common->l_root		= n;
   1703 		common->l_root_okay_to_free = okay_to_free;
   1704 		n			= NULL;
   1705 
   1706 		(void) pthread_mutex_init(&common->l_common_mutex, NULL);
   1707 
   1708 		(void) snprintf(path, sizeof (path), "%s/%s", target_basedir,
   1709 		    t->s_targ_base);
   1710 		if (t10_lu_initialize(common, path) == False) {
   1711 			queue_prt(mgmtq, Q_STE_ERRS,
   1712 			    "SAM%x  FAILED to initialize LU %d\n",
   1713 			    t->s_targ_num, lun);
   1714 			(void) pthread_mutex_unlock(&lu_list_mutex);
   1715 			spc_sense_create(cmd, KEY_HARDWARE_ERROR, 0);
   1716 			goto error;
   1717 		}
   1718 
   1719 		avl_create(&common->l_all_open, find_lu_by_targ,
   1720 		    sizeof (t10_lu_impl_t),
   1721 		    offsetof(t10_lu_impl_t, l_open_lu_node));
   1722 
   1723 		avl_insert(&lu_list, (void *)common, wc);
   1724 		(void) pthread_create(&common->l_thr_id, NULL, lu_runner,
   1725 		    (void *)common);
   1726 		queue_prt(mgmtq, Q_STE_NONIO,
   1727 		    "SAM%x  LU[%d.%d] Created new LU thread 0x%x\n",
   1728 		    t->s_targ_num, common->l_internal_num, common->l_num,
   1729 		    common->l_thr_id);
   1730 
   1731 	} else {
   1732 
   1733 		/*
   1734 		 * If there's a common LU structure already we free
   1735 		 * the guid which was created for the search. If an error
   1736 		 * occurs the guid space will be freed in the error handling
   1737 		 * code. If a new LU is created though we don't free the guid
   1738 		 * since the LU needs the information.
   1739 		 */
   1740 		free(lc.l_guid);
   1741 
   1742 		/*
   1743 		 * A similar condition exists with the xml tree. If there's
   1744 		 * already a common LU then this node *may* have been created
   1745 		 * here if it's not a ZVOL. If it is a ZVOL tree then it will
   1746 		 * have the same address as that found in l_root so don't
   1747 		 * free it.
   1748 		 */
   1749 		if (okay_to_free == True) {
   1750 			tgt_node_free(n);
   1751 			n = NULL;
   1752 		}
   1753 		lc.l_guid = NULL;
   1754 		queue_prt(mgmtq, Q_STE_NONIO,
   1755 		    "SAM%x  Found existing LU[%d.%d]\n", t->s_targ_num,
   1756 		    common->l_internal_num, common->l_num);
   1757 	}
   1758 	(void) pthread_mutex_lock(&common->l_common_mutex);
   1759 	(void) avl_find(&common->l_all_open, (void *)l, &wc);
   1760 	avl_insert(&common->l_all_open, (void *)l, wc);
   1761 	(void) pthread_mutex_unlock(&common->l_common_mutex);
   1762 
   1763 	(void) pthread_mutex_unlock(&lu_list_mutex);
   1764 
   1765 	/*
   1766 	 * Now add this I_T_L to the targets list of open LUNs so that
   1767 	 * in the future we can get access through the AVL tree.
   1768 	 * We wait to add the LU to the target list until now so that we don't
   1769 	 * have to delete the node in case an error occurs.
   1770 	 */
   1771 	(void) pthread_mutex_lock(&t->s_mutex);
   1772 	avl_insert(&t->s_open_lu, (void *)l, wt);
   1773 	(void) pthread_mutex_unlock(&t->s_mutex);
   1774 
   1775 	(void) pthread_mutex_lock(&l->l_mutex);
   1776 	l->l_common = common;
   1777 	(void) pthread_mutex_unlock(&l->l_mutex);
   1778 
   1779 	/*
   1780 	 * The common LU thread is responsible for filling in the command
   1781 	 * functions and table.
   1782 	 */
   1783 	queue_message_set(common->l_from_transports, 0, msg_lu_add, (void *)l);
   1784 
   1785 	free(guid);
   1786 	free(local_name);
   1787 
   1788 	cmd->c_lu = l;
   1789 	return (True);
   1790 
   1791 error:
   1792 	cmd->c_cmd_status = STATUS_CHECK;
   1793 	if (guid)
   1794 		free(guid);
   1795 	if (n)
   1796 		tgt_node_free(n);
   1797 	if (l)
   1798 		free(l);
   1799 	if (lc.l_guid)
   1800 		free(lc.l_guid);
   1801 	if (common)
   1802 		free(common);
   1803 	if (dataset)
   1804 		free(dataset);
   1805 	return (False);
   1806 }
   1807 
   1808 static Boolean_t
   1809 t10_lu_initialize(t10_lu_common_t *lu, char *basedir)
   1810 {
   1811 	char	*str	= NULL;
   1812 	int	dtype;
   1813 
   1814 	if (load_params(lu, basedir) == False)
   1815 		return (False);
   1816 
   1817 	if (tgt_find_value_str(lu->l_root, XML_ELEMENT_DTYPE, &str) == True) {
   1818 		for (dtype = 0; sam_emul_table[dtype].t_type_name != NULL;
   1819 		    dtype++) {
   1820 			if (strcmp(sam_emul_table[dtype].t_type_name,
   1821 			    str) == 0) {
   1822 				lu->l_dtype = dtype;
   1823 				if ((*sam_emul_table[dtype].t_common_init)(lu)
   1824 				    == False)
   1825 					goto error;
   1826 				else
   1827 					break;
   1828 			}
   1829 		}
   1830 		free(str);
   1831 	} else
   1832 		goto error;
   1833 
   1834 	return (True);
   1835 error:
   1836 	if (str != NULL)
   1837 		free(str);
   1838 	return (False);
   1839 }
   1840 
   1841 /*
   1842  * []----
   1843  * | lu_runner -- The workhorse for each LU
   1844  * |
   1845  * | This routine is the guts of the Task Router and Task Set for SAM-3.
   1846  * []----
   1847  */
   1848 static void *
   1849 lu_runner(void *v)
   1850 {
   1851 	t10_lu_common_t	*lu = (t10_lu_common_t *)v;
   1852 	msg_t		*m;
   1853 	t10_lu_impl_t	*itl;
   1854 	t10_cmd_t	*cmd;
   1855 	char		*data;
   1856 	char		*path;
   1857 	size_t		data_len;
   1858 	size_t		new_size;
   1859 	size_t		offset;
   1860 	ssize_t		cc;
   1861 	void		*provo_err;
   1862 	t10_shutdown_t	*s;
   1863 	t10_aio_t	*a;
   1864 
   1865 	util_title(mgmtq, Q_STE_NONIO, lu->l_internal_num, "Start LU");
   1866 
   1867 	while ((m = queue_message_get(lu->l_from_transports)) != NULL) {
   1868 
   1869 		switch (m->msg_type) {
   1870 		case msg_cmd_send:
   1871 			cmd = (t10_cmd_t *)m->msg_data;
   1872 
   1873 			if (cmd->c_lu->l_status) {
   1874 				spc_sense_create(cmd, cmd->c_lu->l_status, 0);
   1875 				spc_sense_ascq(cmd, cmd->c_lu->l_asc,
   1876 				    cmd->c_lu->l_ascq);
   1877 				/*
   1878 				 * Clear out the per LU values before
   1879 				 * calling trans_send_complete(). It's
   1880 				 * possible for the transport to handle
   1881 				 * this command and free it before returning.
   1882 				 */
   1883 				cmd->c_lu->l_status	= 0;
   1884 				cmd->c_lu->l_asc	= 0;
   1885 				cmd->c_lu->l_ascq	= 0;
   1886 				trans_send_complete(cmd, STATUS_CHECK);
   1887 			} else {
   1888 				lu->l_curr		= cmd;
   1889 				(*cmd->c_lu->l_cmd)
   1890 				    (cmd, cmd->c_cdb, cmd->c_cdb_len);
   1891 				lu->l_curr		= NULL;
   1892 			}
   1893 			break;
   1894 
   1895 		case msg_cmd_data_out:
   1896 			cmd		= (t10_cmd_t *)m->msg_data;
   1897 			data		= cmd->c_data;
   1898 			data_len	= cmd->c_data_len;
   1899 			offset		= cmd->c_offset;
   1900 
   1901 			/*
   1902 			 * We clear the c_data_len here because if the
   1903 			 * emulation routine processes the data and still
   1904 			 * needs more it will call trans_rqst_datain()
   1905 			 * which will look at c_data_len to see if there
   1906 			 * was immediate data available from the transport.
   1907 			 * In this case we've already processed the data
   1908 			 * and need to request more from the transport.
   1909 			 * c_data is set to NULL because there's an assert
   1910 			 * in trans_rqst_datain() checking that c_data is
   1911 			 * indeed null.
   1912 			 */
   1913 			cmd->c_data_len	= 0;
   1914 			cmd->c_data	= NULL;
   1915 
   1916 			lu->l_curr		= cmd;
   1917 			(*cmd->c_lu->l_data)(cmd, cmd->c_emul_id,
   1918 			    offset, data, data_len);
   1919 			lu->l_curr		= NULL;
   1920 			break;
   1921 
   1922 		case msg_lu_aio_done:
   1923 			a = (t10_aio_t *)m->msg_data;
   1924 			(*a->a_aio_cmplt)(a->a_id);
   1925 			break;
   1926 
   1927 		case msg_lu_add:
   1928 			itl = (t10_lu_impl_t *)m->msg_data;
   1929 			(*sam_emul_table[lu->l_dtype].t_per_init)(itl);
   1930 			break;
   1931 
   1932 		case msg_reset_lu:
   1933 			(void) pthread_mutex_lock(&lu->l_common_mutex);
   1934 			itl = avl_first(&lu->l_all_open);
   1935 			while (itl != NULL) {
   1936 				/*
   1937 				 * The current implementation is that we
   1938 				 * have a shared queue for each LU. That means
   1939 				 * if we reset a LU all I_T nexus' must
   1940 				 * receive a CHECK_CONDITION on their next
   1941 				 * command.
   1942 				 */
   1943 				(*sam_emul_table[lu->l_dtype].t_per_fini)(itl);
   1944 				(*sam_emul_table[lu->l_dtype].t_per_init)(itl);
   1945 
   1946 				itl = AVL_NEXT(&lu->l_all_open, itl);
   1947 			}
   1948 			(void) pthread_mutex_unlock(&lu->l_common_mutex);
   1949 			break;
   1950 
   1951 		case msg_shutdown:
   1952 			s = (t10_shutdown_t *)m->msg_data;
   1953 
   1954 			itl = s->t_lu;
   1955 			(void) pthread_mutex_lock(&lu_list_mutex);
   1956 			(void) pthread_mutex_lock(&lu->l_common_mutex);
   1957 			assert(avl_find(&lu->l_all_open, (void *)itl, NULL) !=
   1958 			    NULL);
   1959 			queue_walker_free(lu->l_from_transports,
   1960 			    lu_remove_cmds, (void *)itl);
   1961 			(*sam_emul_table[lu->l_dtype].t_per_fini)(itl);
   1962 			avl_remove(&lu->l_all_open, (void *)itl);
   1963 
   1964 			if (avl_numnodes(&lu->l_all_open) == 0) {
   1965 				/*
   1966 				 * Close backing store.
   1967 				 */
   1968 				queue_prt(mgmtq, Q_STE_NONIO,
   1969 				    "LU_%x  No remaining targets for LU(%d)\n",
   1970 				    lu->l_internal_num, lu->l_fd);
   1971 				if (lu->l_mmap != MAP_FAILED)
   1972 					(void) munmap(lu->l_mmap,
   1973 					    lu->l_size);
   1974 				if (close(lu->l_fd) != 0)
   1975 					queue_prt(mgmtq, Q_STE_ERRS,
   1976 					    "LU_%x  Failed to close fd, "
   1977 					    "errno=%d\n", lu->l_internal_num,
   1978 					    errno);
   1979 				else
   1980 					lu->l_fd = -1;
   1981 				/*CSTYLED*/
   1982 				(*sam_emul_table[lu->l_dtype].t_common_fini)(lu);
   1983 
   1984 				avl_remove(&lu_list, (void *)lu);
   1985 				util_title(mgmtq, Q_STE_NONIO,
   1986 				    lu->l_internal_num, "End LU");
   1987 				queue_free(lu->l_from_transports, NULL);
   1988 				(void) pthread_mutex_unlock(
   1989 				    &lu->l_common_mutex);
   1990 				(void) pthread_mutex_unlock(&lu_list_mutex);
   1991 				if (lu->l_root_okay_to_free == True)
   1992 					tgt_node_free(lu->l_root);
   1993 				free(lu->l_pid);
   1994 				free(lu->l_vid);
   1995 				free(lu->l_guid);
   1996 				free(lu);
   1997 				queue_message_free(m);
   1998 				queue_message_set(mgmtq, 0, msg_pthread_join,
   1999 				    (void *)(uintptr_t)pthread_self());
   2000 				/*
   2001 				 * Send the response after all the work here
   2002 				 * is done.
   2003 				 */
   2004 				queue_message_set(s->t_q, 0, msg_shutdown_rsp,
   2005 				    (void *)(uintptr_t)itl->l_targ_lun);
   2006 				pthread_exit(NULL);
   2007 			}
   2008 			queue_message_set(s->t_q, 0, msg_shutdown_rsp,
   2009 			    (void *)(uintptr_t)itl->l_targ_lun);
   2010 			(void) pthread_mutex_unlock(&lu->l_common_mutex);
   2011 			(void) pthread_mutex_unlock(&lu_list_mutex);
   2012 			break;
   2013 
   2014 		case msg_targ_inventory_change:
   2015 			itl = (t10_lu_impl_t *)m->msg_data;
   2016 			itl->l_status	= KEY_UNIT_ATTENTION;
   2017 			/*
   2018 			 * SPC-3 revision 21c, section 4.5.6, Table 28
   2019 			 * When LU inventory changes need to report
   2020 			 * a REPORTED LUNS DATA HAS CHANGED event.
   2021 			 */
   2022 			itl->l_asc	= 0x3f;
   2023 			itl->l_ascq	= 0x0e;
   2024 			queue_prt(mgmtq, Q_STE_NONIO,
   2025 			    "LU_%x  Received InventoryChange for %d\n",
   2026 			    lu->l_internal_num, itl->l_common->l_num);
   2027 			break;
   2028 
   2029 		case msg_thick_provo:
   2030 			cmd	= (t10_cmd_t *)m->msg_data;
   2031 			if (lu->l_mmap != MAP_FAILED) {
   2032 
   2033 				/*
   2034 				 * If the file at c_offset is currently
   2035 				 * unallocated we'll read in that buffer
   2036 				 * which will be zeros and then write it
   2037 				 * back out which will force the underlying
   2038 				 * filesystem to allocate the blocks.
   2039 				 * If someone has already issued a write
   2040 				 * to this area we'll then just cause a
   2041 				 * useless, but safe read/write to occur.
   2042 				 */
   2043 				lu->l_curr		= cmd;
   2044 				lu->l_curr_provo	= True;
   2045 				bcopy((char *)lu->l_mmap + cmd->c_offset,
   2046 				    cmd->c_data, cmd->c_data_len);
   2047 				cmd->c_lu->l_cmds_read++;
   2048 				cmd->c_lu->l_sects_read +=
   2049 				    cmd->c_data_len / 512;
   2050 				bcopy(cmd->c_data,
   2051 				    (char *)lu->l_mmap + cmd->c_offset,
   2052 				    cmd->c_data_len);
   2053 				cmd->c_lu->l_cmds_write++;
   2054 				cmd->c_lu->l_sects_write +=
   2055 				    cmd->c_data_len / 512;
   2056 				lu->l_curr		= NULL;
   2057 				lu->l_curr_provo	= False;
   2058 				provo_err		= 0;
   2059 
   2060 			} else {
   2061 				if ((cc = pread(lu->l_fd, cmd->c_data,
   2062 				    cmd->c_data_len, cmd->c_offset)) < 0) {
   2063 					queue_prt(mgmtq, Q_STE_ERRS,
   2064 					    "LU_%x  pread errno=%d\n",
   2065 					    lu->l_num, errno);
   2066 				} else if (pwrite(lu->l_fd, cmd->c_data, cc,
   2067 				    cmd->c_offset) != cc) {
   2068 					queue_prt(mgmtq, Q_STE_ERRS,
   2069 					    "LU_%x  pwrite errno=%d\n",
   2070 					    lu->l_num, errno);
   2071 				}
   2072 				provo_err = (cc == cmd->c_data_len) ?
   2073 				    (void *)0 : (void *)1;
   2074 			}
   2075 			/*
   2076 			 * acknowledge this op and wait for next
   2077 			 */
   2078 			queue_message_set(cmd->c_lu->l_to_transport, 0,
   2079 			    msg_thick_provo, provo_err);
   2080 			break;
   2081 
   2082 		case msg_lu_capacity_change:
   2083 			new_size = lseek(lu->l_fd, 0, SEEK_END);
   2084 			queue_prt(mgmtq, Q_STE_NONIO,
   2085 			    "LU_%x  Capacity Change from 0x%llx to 0x%llx\n",
   2086 			    lu->l_internal_num, lu->l_size, new_size);
   2087 			if ((path = malloc(MAXPATHLEN)) == NULL)
   2088 				break;
   2089 
   2090 			(void) snprintf(path, MAXPATHLEN, "%s/%s",
   2091 			    target_basedir, itl->l_targ->s_targ_base);
   2092 			(void) load_params(lu, path);
   2093 			free(path);
   2094 			(*sam_emul_table[lu->l_dtype].t_task_mgmt)(lu,
   2095 			    CapacityChange);
   2096 			(void) pthread_mutex_lock(&lu->l_common_mutex);
   2097 			itl = avl_first(&lu->l_all_open);
   2098 			while (itl != NULL) {
   2099 				itl->l_status	= KEY_UNIT_ATTENTION;
   2100 				itl->l_asc	= SPC_ASC_CAP_CHANGE;
   2101 				itl->l_ascq	= SPC_ASCQ_CAP_CHANGE;
   2102 				itl = AVL_NEXT(&lu->l_all_open, itl);
   2103 			}
   2104 			(void) pthread_mutex_unlock(&lu->l_common_mutex);
   2105 			break;
   2106 
   2107 		case msg_lu_online:
   2108 			queue_prt(mgmtq, Q_STE_NONIO,
   2109 			    "LU_%x  Received online event\n",
   2110 			    lu->l_internal_num);
   2111 			if ((path = malloc(MAXPATHLEN)) == NULL)
   2112 				break;
   2113 
   2114 			(void) pthread_mutex_lock(&lu->l_common_mutex);
   2115 			itl = avl_first(&lu->l_all_open);
   2116 			(void) pthread_mutex_unlock(&lu->l_common_mutex);
   2117 			(void) snprintf(path, MAXPATHLEN, "%s/%s",
   2118 			    target_basedir, itl->l_targ->s_targ_base);
   2119 			(void) load_params(lu, path);
   2120 			free(path);
   2121 			(*sam_emul_table[lu->l_dtype].t_task_mgmt)(lu,
   2122 			    DeviceOnline);
   2123 			(void) pthread_mutex_lock(&lu->l_common_mutex);
   2124 			itl = avl_first(&lu->l_all_open);
   2125 			while (itl != NULL) {
   2126 				(*sam_emul_table[lu->l_dtype].t_per_init)(itl);
   2127 				itl = AVL_NEXT(&lu->l_all_open, itl);
   2128 			}
   2129 			(void) pthread_mutex_unlock(&lu->l_common_mutex);
   2130 			break;
   2131 
   2132 		}
   2133 		queue_message_free(m);
   2134 	}
   2135 
   2136 	return (NULL);
   2137 }
   2138 
   2139 /*
   2140  * []----
   2141  * | lu_buserr_handler -- deal with SIGBUS on mmap'd files
   2142  * |
   2143  * | Normally SIGBUS's are a real bad thing. With this project, which uses
   2144  * | mmap'd files that start out as hole-y, can represent more space than
   2145  * | the underlying storage has available. This is good and considered a
   2146  * | feature for "Thin Provisioning". However, this means that if the
   2147  * | administrator isn't on the ball the storage can fill up. Because of the
   2148  * | asynchronous nature of writing to a mmap'd file the OS will send a SIGBUS
   2149  * | to the thread which caused the problem. The thread will then locate its
   2150  * | data structure and in turn signal the initiator that a problem occurred.
   2151  * | Since we can't restart we're we left off because the out of space
   2152  * | condition is still present another thread is started to handle other
   2153  * | commands for the logical unit. The current thread will then exit.
   2154  * |
   2155  * | NOTE:
   2156  * | If for any reason this routine doesn't find what's it's expecting to
   2157  * | assert() will be called to create a core. This routine will only recover
   2158  * | from the expected case of a SIGBUS, otherwise something real bad has
   2159  * | happened and we need to see the core.
   2160  * []----
   2161  */
   2162 /*ARGSUSED*/
   2163 void
   2164 lu_buserr_handler(int sig, siginfo_t *sip, void *v)
   2165 {
   2166 	t10_lu_common_t	*lu;
   2167 	pthread_t	id = pthread_self();
   2168 	char		*fa;
   2169 
   2170 	if (pthread_mutex_trylock(&lu_list_mutex) != 0) {
   2171 		assert(0);
   2172 	}
   2173 	lu = avl_first(&lu_list);
   2174 	while (lu != NULL) {
   2175 		if (lu->l_thr_id == id)
   2176 			break;
   2177 		lu = AVL_NEXT(&lu_list, lu);
   2178 	}
   2179 	(void) pthread_mutex_unlock(&lu_list_mutex);
   2180 
   2181 	if ((lu == NULL) || (lu->l_curr == NULL)) {
   2182 		queue_prt(mgmtq, Q_STE_ERRS,
   2183 		    "SAM%x  BUS ERROR and couldn't find logical unit\n",
   2184 		    lu->l_num);
   2185 		assert(0);
   2186 #ifdef NDEBUG
   2187 		return;
   2188 #endif
   2189 	}
   2190 
   2191 	if (lu->l_mmap == MAP_FAILED) {
   2192 		queue_prt(mgmtq, Q_STE_ERRS,
   2193 		    "SAM%x  BUS ERROR and device not mmap'd\n", lu->l_num);
   2194 		assert(0);
   2195 #ifdef NDEBUG
   2196 		return;
   2197 #endif
   2198 	}
   2199 
   2200 	fa = (char *)sip->__data.__fault.__addr;
   2201 	if ((fa < (char *)lu->l_mmap) ||
   2202 	    (fa > ((char *)lu->l_mmap + lu->l_size))) {
   2203 		queue_prt(mgmtq, Q_STE_ERRS,
   2204 		    "SAM%x  BUS ERROR occurred outsize of mmap bounds\n",
   2205 		    lu->l_num);
   2206 		assert(0);
   2207 #ifdef NDEBUG
   2208 		return;
   2209 #endif
   2210 	}
   2211 
   2212 	if (lu->l_curr_provo == True) {
   2213 		lu->l_curr_provo = False;
   2214 		queue_message_set(lu->l_curr->c_lu->l_to_transport, 0,
   2215 		    msg_thick_provo, (void *)1);
   2216 	} else {
   2217 		spc_sense_create(lu->l_curr, KEY_MEDIUM_ERROR, 0);
   2218 		spc_sense_ascq(lu->l_curr, SPC_ASC_WRITE_ERROR,
   2219 		    SPC_ASCQ_WRITE_ERROR);
   2220 		trans_send_complete(lu->l_curr, STATUS_CHECK);
   2221 	}
   2222 
   2223 	queue_prt(mgmtq, Q_STE_ERRS,
   2224 	    "SAM%x  Caught an out-of-space issue\n", lu->l_num);
   2225 
   2226 	/*
   2227 	 * Now restart another thread to pick up where we've left off with
   2228 	 * processing commands for this logical unit.
   2229 	 */
   2230 	(void) pthread_create(&lu->l_thr_id, NULL, lu_runner, (void *)lu);
   2231 	pthread_exit((void *)0);
   2232 }
   2233 
   2234 
   2235 /*
   2236  * []----
   2237  * | lu_remove_cmds -- look for and free commands
   2238  * []----
   2239  */
   2240 static Boolean_t
   2241 lu_remove_cmds(msg_t *m, void *v)
   2242 {
   2243 	t10_lu_impl_t	*lu = (t10_lu_impl_t *)v;
   2244 	t10_cmd_t	*c;
   2245 
   2246 	switch (m->msg_type) {
   2247 	case msg_cmd_send:
   2248 	case msg_cmd_data_out:
   2249 		c = (t10_cmd_t *)m->msg_data;
   2250 		if (lu == NULL) {
   2251 			queue_prt(mgmtq, Q_STE_NONIO,
   2252 			    "SAM%x  canceled command during lu_remove\n",
   2253 			    c->c_lu->l_targ->s_targ_num);
   2254 			t10_cmd_shoot_event(c, T10_Cmd_T6);
   2255 			return (True);
   2256 		}
   2257 		if (c->c_lu == lu) {
   2258 			queue_prt(mgmtq, Q_STE_NONIO,
   2259 			    "SAM%x  LUN %d, removed command during lu_remove\n",
   2260 			    c->c_lu->l_targ->s_targ_num, lu->l_common->l_num);
   2261 			t10_cmd_shoot_event(c, T10_Cmd_T5);
   2262 			return (True);
   2263 		}
   2264 		break;
   2265 	}
   2266 	return (False);
   2267 }
   2268 
   2269 /*
   2270  * []----
   2271  * | load_params -- load parameters and open LU backing store
   2272  * |
   2273  * | This routine can be called multiple times and will free and release
   2274  * | previous resources.
   2275  * []----
   2276  */
   2277 static Boolean_t
   2278 load_params(t10_lu_common_t *lu, char *basedir)
   2279 {
   2280 	char		file[MAXPATHLEN];
   2281 	char		*str;
   2282 	int		oflags		= O_RDWR|O_LARGEFILE|O_NDELAY;
   2283 	Boolean_t	mmap_lun	= False;
   2284 	tgt_node_t	*node		= NULL;
   2285 	int		version_maj	= XML_VERS_LUN_MAJ;
   2286 	int		version_min	= XML_VERS_LUN_MIN;
   2287 
   2288 	/*
   2289 	 * Clean up from previous call to this function. This occurs if
   2290 	 * the LU has grown since it was last opened.
   2291 	 */
   2292 	if (lu->l_mmap != MAP_FAILED)
   2293 		(void) munmap(lu->l_mmap, lu->l_size);
   2294 	if (lu->l_fd != -1) {
   2295 		(void) close(lu->l_fd);
   2296 		lu->l_fd = -1;
   2297 	}
   2298 
   2299 	node = lu->l_root;
   2300 
   2301 	if (validate_version(node, &version_maj, &version_min) == False)
   2302 		(void) fprintf(stderr, "Failed version check\n");
   2303 
   2304 	if (tgt_find_value_str(node, XML_ELEMENT_PID, &lu->l_pid) == False)
   2305 		goto error;
   2306 
   2307 	if (tgt_find_value_str(node, XML_ELEMENT_VID, &lu->l_vid) == False)
   2308 		goto error;
   2309 
   2310 	/*
   2311 	 * If there's no <status> tag it just means this is an older param
   2312 	 * file and there's no need to treat it as an error. Just mark
   2313 	 * the device as online.
   2314 	 */
   2315 	if (tgt_find_value_str(node, XML_ELEMENT_STATUS, &str) == True) {
   2316 		if (strcmp(str, TGT_STATUS_ONLINE) == 0)
   2317 			lu->l_state = lu_online;
   2318 		else if (strcmp(str, TGT_STATUS_OFFLINE) == 0)
   2319 			lu->l_state = lu_offline;
   2320 		else if (strcmp(str, TGT_STATUS_ERRORED) == 0)
   2321 			lu->l_state = lu_errored;
   2322 		free(str);
   2323 	} else
   2324 		lu->l_state = lu_online;
   2325 
   2326 	/*
   2327 	 * If offline, we need to check to see if there's an initialization
   2328 	 * thread running for this lun. If not, start one.
   2329 	 */
   2330 	if ((lu->l_state == lu_offline) &&
   2331 	    (thick_provo_chk_thr(strrchr(basedir, '/') + 1, lu->l_num) ==
   2332 	    False)) {
   2333 		queue_prt(mgmtq, Q_STE_NONIO,
   2334 		    "LU_%d  No initialization thread running\n", lu->l_num);
   2335 		if (thin_provisioning == False) {
   2336 			thick_provo_t	*tp;
   2337 			pthread_t	junk;
   2338 
   2339 			if ((tp = calloc(1, sizeof (*tp))) != NULL) {
   2340 				tp->targ_name = strdup(strrchr(basedir, '/')) +
   2341 				    1;
   2342 				tp->lun	= lu->l_num;
   2343 				tp->q	= queue_alloc();
   2344 				(void) pthread_create(&junk, NULL,
   2345 				    thick_provo_start, tp);
   2346 				/* ---- wait for start message ---- */
   2347 				queue_message_free(queue_message_get(tp->q));
   2348 			}
   2349 		}
   2350 	}
   2351 
   2352 	/*
   2353 	 * The default is to disable the fast write acknowledgement which
   2354 	 * can be overridden in a couple of ways. First, see if the global
   2355 	 * fast-write-ack is enabled, then check the per logical unit flags.
   2356 	 * The per LU bit is settable via a SCSI command.
   2357 	 */
   2358 	lu->l_fast_write_ack = False;
   2359 	(void) tgt_find_value_boolean(main_config, XML_ELEMENT_FAST,
   2360 	    &lu->l_fast_write_ack);
   2361 	(void) tgt_find_value_boolean(node, XML_ELEMENT_FAST,
   2362 	    &lu->l_fast_write_ack);
   2363 	if (lu->l_fast_write_ack == False)
   2364 		oflags |= O_SYNC;
   2365 
   2366 	/*
   2367 	 * Object-based Storage Devices currently use directories to
   2368 	 * represent the partitions and files in those directories to
   2369 	 * represent user objects and collections. Therefore, there's
   2370 	 * not just a single file to be opened, but potentially thousands.
   2371 	 * Therefore, stop here if we've got an OSD dtype.
   2372 	 */
   2373 	if (tgt_find_value_str(node, XML_ELEMENT_DTYPE, &str) == False)
   2374 		goto error;
   2375 	if (strcmp(str, TGT_TYPE_OSD) == 0) {
   2376 		free(str);
   2377 		return (True);
   2378 	} else
   2379 		free(str);
   2380 
   2381 	if (tgt_find_value_str(node, XML_ELEMENT_BACK, &str) == True) {
   2382 		lu->l_fd = open(str, oflags);
   2383 		free(str);
   2384 		if (lu->l_fd == -1)
   2385 			goto error;
   2386 	} else {
   2387 		(void) snprintf(file, sizeof (file), "%s/%s%d", basedir,
   2388 		    LUNBASE, lu->l_num);
   2389 		if ((lu->l_fd = open(file, oflags)) == -1)
   2390 			goto error;
   2391 	}
   2392 
   2393 	(void) tgt_find_value_boolean(node, XML_ELEMENT_MMAP_LUN, &mmap_lun);
   2394 	if (tgt_find_value_str(node, XML_ELEMENT_SIZE, &str) == True) {
   2395 		lu->l_size = strtoll(str, NULL, 0) * 512LL;
   2396 		free(str);
   2397 	} else
   2398 		goto error;
   2399 
   2400 	if (mmap_lun == True) {
   2401 		/*
   2402 		 * st_size will be wrong if the device is a block device
   2403 		 * but that's okay since you can't mmap in a block device.
   2404 		 * A block device will fall back to using AIO operations.
   2405 		 */
   2406 		lu->l_mmap = mmap(0, lu->l_size, PROT_READ|PROT_WRITE,
   2407 		    MAP_SHARED|MAP_ALIGN, lu->l_fd, 0);
   2408 	} else {
   2409 		lu->l_mmap = MAP_FAILED;
   2410 	}
   2411 	return (True);
   2412 error:
   2413 	if (lu->l_pid) {
   2414 		free(lu->l_pid);
   2415 		lu->l_pid = NULL;
   2416 	}
   2417 	if (lu->l_vid) {
   2418 		free(lu->l_vid);
   2419 		lu->l_vid = NULL;
   2420 	}
   2421 	if (lu->l_fd != -1) {
   2422 		(void) close(lu->l_fd);
   2423 		lu->l_fd = -1;
   2424 	}
   2425 	return (False);
   2426 }
   2427 
   2428 /*
   2429  * []----
   2430  * | cmd_common_free -- frees data stored in the cmd
   2431  * |
   2432  * | NOTE: The mutex which protects c_state must be held when this routine
   2433  * | is called if there's a LU associated with this command.
   2434  * []----
   2435  */
   2436 static void
   2437 cmd_common_free(t10_cmd_t *c)
   2438 {
   2439 	t10_lu_impl_t	*lu	= c->c_lu;
   2440 
   2441 	if (lu) {
   2442 		assert(pthread_mutex_trylock(&lu->l_cmd_mutex) != 0);
   2443 		/* command might be removed by t10_handle_destroy */
   2444 		if (avl_find(&lu->l_cmds, c, NULL) == NULL)
   2445 			return;
   2446 		avl_remove(&lu->l_cmds, c);
   2447 	}
   2448 
   2449 	c->c_state	= T10_Cmd_S1_Free;
   2450 	c->c_data	= 0;
   2451 	c->c_data_len	= 0;
   2452 
   2453 	clear_transport(c->c_trans_id, c);
   2454 
   2455 	if (c->c_emul_complete != NULL) {
   2456 		(*c->c_emul_complete)(c->c_emul_id);
   2457 		c->c_emul_complete = NULL;
   2458 	}
   2459 	if (c->c_cdb) {
   2460 		free(c->c_cdb);
   2461 		c->c_cdb = NULL;
   2462 	}
   2463 	if (c->c_cmd_sense) {
   2464 		free(c->c_cmd_sense);
   2465 		c->c_cmd_sense = NULL;
   2466 	}
   2467 	if (lu && (lu->l_wait_for_drain == True) &&
   2468 	    (avl_numnodes(&lu->l_cmds) == 0)) {
   2469 		lu->l_wait_for_drain = False;
   2470 		(void) pthread_cond_signal(&lu->l_cmd_cond);
   2471 	}
   2472 	umem_cache_free(t10_cmd_cache, c);
   2473 }
   2474 
   2475 /*
   2476  * clear_transport -- Remove the transports reference to the T10 command
   2477  *
   2478  * This should be a function pointer stored in the t10_lu_impl structure.
   2479  * The only reason it's not, is I wish to wait until we know a little more
   2480  * about the FC transport. There may be some other callbacks required for that
   2481  * transport and if so, I'll need to define a new method for passing in
   2482  * the callbacks to the t10_create_handle. The easiest way would probably
   2483  * have a structure. I'm concerned about supporting different versions, so
   2484  * wish to think about it some more before implementing.
   2485  *
   2486  * This function can be called on either the transport thread or the t10
   2487  * thread.
   2488  */
   2489 static void
   2490 clear_transport(transport_t t, t10_cmd_t *t10c)
   2491 {
   2492 	iscsi_cmd_t	*c = (iscsi_cmd_t *)t;
   2493 
   2494 	if (c) {
   2495 		if (c->c_t10_dup != 0) {
   2496 			c->c_t10_dup--;
   2497 		}
   2498 		if (c->c_t10_cmd != NULL) {
   2499 			/*
   2500 			 * Find and unlink the cmd to be freed.
   2501 			 * The last entry's next ptr is NULL.
   2502 			 */
   2503 			if (c->c_t10_cmd == t10c) {
   2504 				c->c_t10_cmd = t10c->c_cmd_next;
   2505 			} else {
   2506 				t10_cmd_t *t10cnxt = c->c_t10_cmd;
   2507 				while (t10cnxt->c_cmd_next != NULL) {
   2508 					if (t10cnxt->c_cmd_next == t10c) {
   2509 						t10cnxt->c_cmd_next =
   2510 						    t10c->c_cmd_next;
   2511 						break;
   2512 					}
   2513 					t10cnxt = t10cnxt->c_cmd_next;
   2514 				}
   2515 			}
   2516 		}
   2517 	}
   2518 }
   2519 
   2520 /*
   2521  * []----
   2522  * | fallocate -- allocate blocks for file via file system interface
   2523  * |
   2524  * | This is a faster approach to allocating the blocks for a file.
   2525  * | Instead of reading and then writing each block which will force the
   2526  * | file system to allocate the data we simply ask the file system to
   2527  * | allocate the space. Unfortunately not all file systems support this
   2528  * | feature.
   2529  * []----
   2530  */
   2531 static Boolean_t
   2532 fallocate(int fd, off64_t len)
   2533 {
   2534 #ifdef FALLOCATE_SUPPORTED
   2535 #if defined(_LARGEFILE64_SOURCE) && !defined(_LP64)
   2536 	struct flock64 lck;
   2537 
   2538 	lck.l_whence	= 0;
   2539 	lck.l_start	= 0;
   2540 	lck.l_len	= len;
   2541 	lck.l_type	= F_WRLCK;
   2542 
   2543 	if (fcntl(fd, F_ALLOCSP64, &lck) == -1)
   2544 		return (False);
   2545 	else
   2546 		return (True);
   2547 #else
   2548 	struct flock lck;
   2549 
   2550 	lck.l_whence	= 0;
   2551 	lck.l_start	= 0;
   2552 	lck.l_len	= len;
   2553 	lck.l_type	= F_WRLCK;
   2554 
   2555 	if (fcntl(fd, F_ALLOCSP, &lck) == -1)
   2556 		return (False);
   2557 	else
   2558 		return (True);
   2559 #endif
   2560 #else
   2561 	return (False);
   2562 #endif
   2563 }
   2564 
   2565 /*
   2566  * []----
   2567  * | find_lu_by_num -- AVL comparison which looks at LUN
   2568  * []----
   2569  */
   2570 static int
   2571 find_lu_by_num(const void *v1, const void *v2)
   2572 {
   2573 	t10_lu_impl_t	*l1	= (t10_lu_impl_t *)v1;
   2574 	t10_lu_impl_t	*l2	= (t10_lu_impl_t *)v2;
   2575 
   2576 	if (l1->l_targ_lun < l2->l_targ_lun)
   2577 		return (-1);
   2578 	if (l1->l_targ_lun > l2->l_targ_lun)
   2579 		return (1);
   2580 	return (0);
   2581 }
   2582 
   2583 /*
   2584  * []----
   2585  * | find_lu_by_guid -- AVL comparison which looks at GUID
   2586  * []----
   2587  */
   2588 static int
   2589 find_lu_by_guid(const void *v1, const void *v2)
   2590 {
   2591 	t10_lu_common_t	*l1	= (t10_lu_common_t *)v1;
   2592 	t10_lu_common_t	*l2	= (t10_lu_common_t *)v2;
   2593 	int		i;
   2594 
   2595 	if (l1->l_guid_len != l2->l_guid_len) {
   2596 		return ((l1->l_guid_len < l2->l_guid_len) ? -1 : 1);
   2597 	}
   2598 	for (i = 0; i < l1->l_guid_len; i++) {
   2599 		if (l1->l_guid[i] != l2->l_guid[i]) {
   2600 			return ((l1->l_guid[i] < l2->l_guid[i]) ? -1 : 1);
   2601 		}
   2602 	}
   2603 	return (0);
   2604 }
   2605 
   2606 /*
   2607  * []----
   2608  * | find_lu_by_targ -- AVL comparison which looks at the target
   2609  * |
   2610  * | NOTE:
   2611  * | The target value is the memory address of the per target structure.
   2612  * | Therefore, it's not persistent in any manner, nor can any association
   2613  * | be made between the target value and the initiator. It will be unique
   2614  * | however which is all that we're looking for.
   2615  * []----
   2616  */
   2617 static int
   2618 find_lu_by_targ(const void *v1, const void *v2)
   2619 {
   2620 	t10_lu_impl_t	*l1	= (t10_lu_impl_t *)v1;
   2621 	t10_lu_impl_t	*l2	= (t10_lu_impl_t *)v2;
   2622 
   2623 	if ((uint64_t)(uintptr_t)l1->l_targ < (uint64_t)(uintptr_t)l2->l_targ)
   2624 		return (-1);
   2625 	else if ((uint64_t)(uintptr_t)l1->l_targ >
   2626 	    (uint64_t)(uintptr_t)l2->l_targ)
   2627 		return (1);
   2628 	else
   2629 		return (0);
   2630 }
   2631 
   2632 /*
   2633  * []----
   2634  * | find_cmd_by_addr -- AVL comparison using the simplist of methods
   2635  * []----
   2636  */
   2637 static int
   2638 find_cmd_by_addr(const void *v1, const void *v2)
   2639 {
   2640 	uint64_t	cmd1	= (uint64_t)(uintptr_t)v1;
   2641 	uint64_t	cmd2	= (uint64_t)(uintptr_t)v2;
   2642 
   2643 	if (cmd1 < cmd2)
   2644 		return (-1);
   2645 	else if (cmd1 > cmd2)
   2646 		return (1);
   2647 	else
   2648 		return (0);
   2649 }
   2650 
   2651 /*ARGSUSED*/
   2652 static Boolean_t
   2653 sam_common_init(t10_lu_common_t *t)
   2654 {
   2655 	assert(0);
   2656 	return (False);
   2657 }
   2658 
   2659 /*ARGSUSED*/
   2660 static void
   2661 sam_common_fini(t10_lu_common_t *t)
   2662 {
   2663 	assert(0);
   2664 }
   2665 
   2666 #ifdef FULL_DEBUG
   2667 static char *
   2668 state_to_str(t10_cmd_state_t s)
   2669 {
   2670 	switch (s) {
   2671 	case T10_Cmd_S1_Free:		return ("FREE");
   2672 	case T10_Cmd_S2_In:		return ("IN");
   2673 	case T10_Cmd_S3_Trans:		return ("TRANS");
   2674 	case T10_Cmd_S4_AIO:		return ("AIO");
   2675 	case T10_Cmd_S5_Wait:		return ("WAIT");
   2676 	case T10_Cmd_S6_Freeing_In:	return ("FREEING_IN");
   2677 	case T10_Cmd_S7_Freeing_AIO:	return ("FREEING_AIO");
   2678 	}
   2679 	return ("Invalid State");
   2680 }
   2681 #endif
   2682 
   2683 static char *
   2684 event_to_str(t10_cmd_event_t e)
   2685 {
   2686 	switch (e) {
   2687 	case T10_Cmd_T1:	return ("T1");
   2688 	case T10_Cmd_T2:	return ("T2");
   2689 	case T10_Cmd_T3:	return ("T3");
   2690 	case T10_Cmd_T4:	return ("T4");
   2691 	case T10_Cmd_T5:	return ("T5");
   2692 	case T10_Cmd_T6:	return ("T6");
   2693 	case T10_Cmd_T7:	return ("T7");
   2694 	}
   2695 	return ("Invalid Event");
   2696 }
   2697 
   2698 /*ARGSUSED*/
   2699 static void
   2700 sam_per_init(t10_lu_impl_t *t)
   2701 {
   2702 	assert(0);
   2703 }
   2704 
   2705 /*ARGSUSED*/
   2706 static void
   2707 sam_per_fini(t10_lu_impl_t *t)
   2708 {
   2709 	assert(0);
   2710 }
   2711 
   2712 /*ARGSUSED*/
   2713 static void
   2714 sam_task_mgmt(t10_lu_common_t *t, TaskOp_t op)
   2715 {
   2716 	assert(0);
   2717 }
   2718 
   2719 static sam_device_table_t sam_emul_table[] = {
   2720 	/* 0x00: DTYPE_DIRECT */
   2721 	{ sbc_common_init, sbc_common_fini, sbc_per_init, sbc_per_fini,
   2722 		sbc_task_mgmt, TGT_TYPE_DISK },
   2723 	/* 0x01: DTYPE_SEQUENTIAL */
   2724 	{ ssc_common_init, ssc_common_fini, ssc_per_init, ssc_per_fini,
   2725 		ssc_task_mgmt, TGT_TYPE_TAPE },
   2726 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2727 		sam_task_mgmt, TGT_TYPE_INVALID },
   2728 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2729 		sam_task_mgmt, TGT_TYPE_INVALID },
   2730 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2731 		sam_task_mgmt, TGT_TYPE_INVALID },
   2732 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2733 		sam_task_mgmt, TGT_TYPE_INVALID },
   2734 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2735 		sam_task_mgmt, TGT_TYPE_INVALID },
   2736 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2737 		sam_task_mgmt, TGT_TYPE_INVALID },
   2738 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2739 		sam_task_mgmt, TGT_TYPE_INVALID },
   2740 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2741 		sam_task_mgmt, TGT_TYPE_INVALID },
   2742 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2743 		sam_task_mgmt, TGT_TYPE_INVALID },
   2744 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2745 		sam_task_mgmt, TGT_TYPE_INVALID },
   2746 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2747 		sam_task_mgmt, TGT_TYPE_INVALID },
   2748 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2749 		sam_task_mgmt, TGT_TYPE_INVALID },
   2750 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2751 		sam_task_mgmt, TGT_TYPE_INVALID },
   2752 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2753 		sam_task_mgmt, TGT_TYPE_INVALID },
   2754 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2755 		sam_task_mgmt, TGT_TYPE_INVALID },
   2756 	/* 0x11: DTYPE_OSD */
   2757 	{ osd_common_init, osd_common_fini, osd_per_init, osd_per_fini,
   2758 		osd_task_mgmt, TGT_TYPE_OSD },
   2759 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2760 		sam_task_mgmt, TGT_TYPE_INVALID },
   2761 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2762 		sam_task_mgmt, TGT_TYPE_INVALID },
   2763 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2764 		sam_task_mgmt, TGT_TYPE_INVALID },
   2765 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2766 		sam_task_mgmt, TGT_TYPE_INVALID },
   2767 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2768 		sam_task_mgmt, TGT_TYPE_INVALID },
   2769 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2770 		sam_task_mgmt, TGT_TYPE_INVALID },
   2771 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2772 		sam_task_mgmt, TGT_TYPE_INVALID },
   2773 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2774 		sam_task_mgmt, TGT_TYPE_INVALID },
   2775 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2776 		sam_task_mgmt, TGT_TYPE_INVALID },
   2777 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2778 		sam_task_mgmt, TGT_TYPE_INVALID },
   2779 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2780 		sam_task_mgmt, TGT_TYPE_INVALID },
   2781 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2782 		sam_task_mgmt, TGT_TYPE_INVALID },
   2783 	{ sam_common_init, sam_common_fini, sam_per_init, sam_per_fini,
   2784 		sam_task_mgmt, TGT_TYPE_INVALID },
   2785 	/* 0x1f: DTYPE_UNKNOWN */
   2786 	{ raw_common_init, raw_common_fini, raw_per_init, raw_per_fini,
   2787 		raw_task_mgmt, TGT_TYPE_RAW },
   2788 	/* End-of-Table marker */
   2789 	{ 0, 0, 0, 0, 0, NULL }
   2790 };
   2791