Home | History | Annotate | Download | only in io
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   1657     heppo  * Common Development and Distribution License (the "License").
      6   1657     heppo  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21      0    stevel /*
     22   8669      Dina  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23      0    stevel  * Use is subject to license terms.
     24      0    stevel  */
     25      0    stevel 
     26      0    stevel /*
     27      0    stevel  * lofi (loopback file) driver - allows you to attach a file to a device,
     28      0    stevel  * which can then be accessed through that device. The simple model is that
     29      0    stevel  * you tell lofi to open a file, and then use the block device you get as
     30      0    stevel  * you would any block device. lofi translates access to the block device
     31      0    stevel  * into I/O on the underlying file. This is mostly useful for
     32      0    stevel  * mounting images of filesystems.
     33      0    stevel  *
     34      0    stevel  * lofi is controlled through /dev/lofictl - this is the only device exported
     35      0    stevel  * during attach, and is minor number 0. lofiadm communicates with lofi through
     36      0    stevel  * ioctls on this device. When a file is attached to lofi, block and character
     37      0    stevel  * devices are exported in /dev/lofi and /dev/rlofi. Currently, these devices
     38      0    stevel  * are identified by their minor number, and the minor number is also used
     39      0    stevel  * as the name in /dev/lofi. If we ever decide to support virtual disks,
     40      0    stevel  * we'll have to divide the minor number space to identify fdisk partitions
     41      0    stevel  * and slices, and the name will then be the minor number shifted down a
     42      0    stevel  * few bits. Minor devices are tracked with state structures handled with
     43      0    stevel  * ddi_soft_state(9F) for simplicity.
     44      0    stevel  *
     45      0    stevel  * A file attached to lofi is opened when attached and not closed until
     46      0    stevel  * explicitly detached from lofi. This seems more sensible than deferring
     47      0    stevel  * the open until the /dev/lofi device is opened, for a number of reasons.
     48      0    stevel  * One is that any failure is likely to be noticed by the person (or script)
     49      0    stevel  * running lofiadm. Another is that it would be a security problem if the
     50      0    stevel  * file was replaced by another one after being added but before being opened.
     51      0    stevel  *
     52      0    stevel  * The only hard part about lofi is the ioctls. In order to support things
     53      0    stevel  * like 'newfs' on a lofi device, it needs to support certain disk ioctls.
     54      0    stevel  * So it has to fake disk geometry and partition information. More may need
     55      0    stevel  * to be faked if your favorite utility doesn't work and you think it should
     56      0    stevel  * (fdformat doesn't work because it really wants to know the type of floppy
     57      0    stevel  * controller to talk to, and that didn't seem easy to fake. Or possibly even
     58      0    stevel  * necessary, since we have mkfs_pcfs now).
     59   4451  eschrock  *
     60   4451  eschrock  * Normally, a lofi device cannot be detached if it is open (i.e. busy).  To
     61   4451  eschrock  * support simulation of hotplug events, an optional force flag is provided.
     62   4451  eschrock  * If a lofi device is open when a force detach is requested, then the
     63   4451  eschrock  * underlying file is closed and any subsequent operations return EIO.  When the
     64   4451  eschrock  * device is closed for the last time, it will be cleaned up at that time.  In
     65   4451  eschrock  * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is
     66   4451  eschrock  * detached but not removed.
     67      0    stevel  *
     68      0    stevel  * Known problems:
     69      0    stevel  *
     70      0    stevel  *	UFS logging. Mounting a UFS filesystem image "logging"
     71      0    stevel  *	works for basic copy testing but wedges during a build of ON through
     72      0    stevel  *	that image. Some deadlock in lufs holding the log mutex and then
     73      0    stevel  *	getting stuck on a buf. So for now, don't do that.
     74      0    stevel  *
     75      0    stevel  *	Direct I/O. Since the filesystem data is being cached in the buffer
     76      0    stevel  *	cache, _and_ again in the underlying filesystem, it's tempting to
     77      0    stevel  *	enable direct I/O on the underlying file. Don't, because that deadlocks.
     78      0    stevel  *	I think to fix the cache-twice problem we might need filesystem support.
     79      0    stevel  *
     80      0    stevel  *	lofi on itself. The simple lock strategy (lofi_lock) precludes this
     81      0    stevel  *	because you'll be in lofi_ioctl, holding the lock when you open the
     82      0    stevel  *	file, which, if it's lofi, will grab lofi_lock. We prevent this for
     83      0    stevel  *	now, though not using ddi_soft_state(9F) would make it possible to
     84      0    stevel  *	do. Though it would still be silly.
     85      0    stevel  *
     86      0    stevel  * Interesting things to do:
     87      0    stevel  *
     88      0    stevel  *	Allow multiple files for each device. A poor-man's metadisk, basically.
     89      0    stevel  *
     90      0    stevel  *	Pass-through ioctls on block devices. You can (though it's not
     91      0    stevel  *	documented), give lofi a block device as a file name. Then we shouldn't
     92   8313      Dina  *	need to fake a geometry, however, it may be relevant if you're replacing
     93   8313      Dina  *	metadisk, or using lofi to get crypto.
     94   8313      Dina  *	It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1
     95   8313      Dina  *	and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home.
     96   8313      Dina  *	In fact this even makes sense if you have lofi "above" metadisk.
     97      0    stevel  *
     98   8313      Dina  * Encryption:
     99   8313      Dina  *	Each lofi device can have its own symmetric key and cipher.
    100   8313      Dina  *	They are passed to us by lofiadm(1m) in the correct format for use
    101   8313      Dina  *	with the misc/kcf crypto_* routines.
    102   8313      Dina  *
    103   8313      Dina  *	Each block has its own IV, that is calculated in lofi_blk_mech(), based
    104   8313      Dina  *	on the "master" key held in the lsp and the block number of the buffer.
    105      0    stevel  */
    106      0    stevel 
    107      0    stevel #include <sys/types.h>
    108   5643     aalok #include <netinet/in.h>
    109      0    stevel #include <sys/sysmacros.h>
    110      0    stevel #include <sys/uio.h>
    111      0    stevel #include <sys/kmem.h>
    112      0    stevel #include <sys/cred.h>
    113      0    stevel #include <sys/mman.h>
    114      0    stevel #include <sys/errno.h>
    115      0    stevel #include <sys/aio_req.h>
    116      0    stevel #include <sys/stat.h>
    117      0    stevel #include <sys/file.h>
    118      0    stevel #include <sys/modctl.h>
    119      0    stevel #include <sys/conf.h>
    120      0    stevel #include <sys/debug.h>
    121      0    stevel #include <sys/vnode.h>
    122      0    stevel #include <sys/lofi.h>
    123      0    stevel #include <sys/fcntl.h>
    124      0    stevel #include <sys/pathname.h>
    125      0    stevel #include <sys/filio.h>
    126      0    stevel #include <sys/fdio.h>
    127      0    stevel #include <sys/open.h>
    128      0    stevel #include <sys/disp.h>
    129      0    stevel #include <vm/seg_map.h>
    130      0    stevel #include <sys/ddi.h>
    131      0    stevel #include <sys/sunddi.h>
    132   5643     aalok #include <sys/zmod.h>
    133   8313      Dina #include <sys/crypto/common.h>
    134   8313      Dina #include <sys/crypto/api.h>
    135   8996      Alok #include <LzmaDec.h>
    136   8313      Dina 
    137   8313      Dina /*
    138   8313      Dina  * The basis for CRYOFF is derived from usr/src/uts/common/sys/fs/ufs_fs.h.
    139   8313      Dina  * Crypto metadata, if it exists, is located at the end of the boot block
    140   8313      Dina  * (BBOFF + BBSIZE, which is SBOFF).  The super block and everything after
    141   8313      Dina  * is offset by the size of the crypto metadata which is handled by
    142   8313      Dina  * lsp->ls_crypto_offset.
    143   8313      Dina  */
    144   8313      Dina #define	CRYOFF	((off_t)8192)
    145      0    stevel 
    146      0    stevel #define	NBLOCKS_PROP_NAME	"Nblocks"
    147   5643     aalok #define	SIZE_PROP_NAME		"Size"
    148      0    stevel 
    149   8313      Dina #define	SETUP_C_DATA(cd, buf, len) 		\
    150   8313      Dina 	(cd).cd_format = CRYPTO_DATA_RAW;	\
    151   8313      Dina 	(cd).cd_offset = 0;			\
    152   8313      Dina 	(cd).cd_miscdata = NULL;		\
    153   8313      Dina 	(cd).cd_length = (len);			\
    154   8313      Dina 	(cd).cd_raw.iov_base = (buf);		\
    155   8313      Dina 	(cd).cd_raw.iov_len = (len);
    156   8313      Dina 
    157   8313      Dina #define	UIO_CHECK(uio)	\
    158   8313      Dina 	if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \
    159   8313      Dina 	    ((uio)->uio_resid % DEV_BSIZE) != 0) { \
    160   8313      Dina 		return (EINVAL); \
    161   8313      Dina 	}
    162   8313      Dina 
    163   8313      Dina static dev_info_t *lofi_dip = NULL;
    164   8313      Dina static void *lofi_statep = NULL;
    165      0    stevel static kmutex_t lofi_lock;		/* state lock */
    166      0    stevel 
    167      0    stevel /*
    168      0    stevel  * Because lofi_taskq_nthreads limits the actual swamping of the device, the
    169      0    stevel  * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively
    170      0    stevel  * high.  If we want to be assured that the underlying device is always busy,
    171      0    stevel  * we must be sure that the number of bytes enqueued when the number of
    172      0    stevel  * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for
    173      0    stevel  * the duration of the sleep time in taskq_ent_alloc().  That is, lofi should
    174      0    stevel  * set maxalloc to be the maximum throughput (in bytes per second) of the
    175      0    stevel  * underlying device divided by the minimum I/O size.  We assume a realistic
    176      0    stevel  * maximum throughput of one hundred megabytes per second; we set maxalloc on
    177      0    stevel  * the lofi task queue to be 104857600 divided by DEV_BSIZE.
    178      0    stevel  */
    179      0    stevel static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE;
    180      0    stevel static int lofi_taskq_nthreads = 4;	/* # of taskq threads per device */
    181      0    stevel 
    182      0    stevel uint32_t lofi_max_files = LOFI_MAX_FILES;
    183   8313      Dina const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC;
    184   5643     aalok 
    185   9048      jrgn /*
    186   9048      jrgn  * To avoid decompressing data in a compressed segment multiple times
    187   9048      jrgn  * when accessing small parts of a segment's data, we cache and reuse
    188   9048      jrgn  * the uncompressed segment's data.
    189   9048      jrgn  *
    190   9048      jrgn  * A single cached segment is sufficient to avoid lots of duplicate
    191   9048      jrgn  * segment decompress operations. A small cache size also reduces the
    192   9048      jrgn  * memory footprint.
    193   9048      jrgn  *
    194   9048      jrgn  * lofi_max_comp_cache is the maximum number of decompressed data segments
    195   9048      jrgn  * cached for each compressed lofi image. It can be set to 0 to disable
    196   9048      jrgn  * caching.
    197   9048      jrgn  */
    198   9048      jrgn 
    199   9048      jrgn uint32_t lofi_max_comp_cache = 1;
    200   9048      jrgn 
    201   5643     aalok static int gzip_decompress(void *src, size_t srclen, void *dst,
    202   5643     aalok 	size_t *destlen, int level);
    203   5643     aalok 
    204   8996      Alok static int lzma_decompress(void *src, size_t srclen, void *dst,
    205   8996      Alok 	size_t *dstlen, int level);
    206   8996      Alok 
    207   5643     aalok lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = {
    208   5643     aalok 	{gzip_decompress,	NULL,	6,	"gzip"}, /* default */
    209   5643     aalok 	{gzip_decompress,	NULL,	6,	"gzip-6"},
    210   8996      Alok 	{gzip_decompress,	NULL,	9,	"gzip-9"},
    211   8996      Alok 	{lzma_decompress,	NULL,	0,	"lzma"}
    212   5643     aalok };
    213   8996      Alok 
    214   8996      Alok /*ARGSUSED*/
    215   8996      Alok static void
    216   8996      Alok *SzAlloc(void *p, size_t size)
    217   8996      Alok {
    218   8996      Alok 	return (kmem_alloc(size, KM_SLEEP));
    219   8996      Alok }
    220   8996      Alok 
    221   8996      Alok /*ARGSUSED*/
    222   8996      Alok static void
    223   8996      Alok SzFree(void *p, void *address, size_t size)
    224   8996      Alok {
    225   8996      Alok 	kmem_free(address, size);
    226   8996      Alok }
    227   8996      Alok 
    228   8996      Alok static ISzAlloc g_Alloc = { SzAlloc, SzFree };
    229   9048      jrgn 
    230   9048      jrgn /*
    231   9048      jrgn  * Free data referenced by the linked list of cached uncompressed
    232   9048      jrgn  * segments.
    233   9048      jrgn  */
    234   9048      jrgn static void
    235   9048      jrgn lofi_free_comp_cache(struct lofi_state *lsp)
    236   9048      jrgn {
    237   9048      jrgn 	struct lofi_comp_cache *lc;
    238   9048      jrgn 
    239   9048      jrgn 	while ((lc = list_remove_head(&lsp->ls_comp_cache)) != NULL) {
    240   9048      jrgn 		kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz);
    241   9048      jrgn 		kmem_free(lc, sizeof (struct lofi_comp_cache));
    242   9048      jrgn 		lsp->ls_comp_cache_count--;
    243   9048      jrgn 	}
    244   9048      jrgn 	ASSERT(lsp->ls_comp_cache_count == 0);
    245   9048      jrgn }
    246      0    stevel 
    247      0    stevel static int
    248      0    stevel lofi_busy(void)
    249      0    stevel {
    250      0    stevel 	minor_t	minor;
    251      0    stevel 
    252      0    stevel 	/*
    253      0    stevel 	 * We need to make sure no mappings exist - mod_remove won't
    254      0    stevel 	 * help because the device isn't open.
    255      0    stevel 	 */
    256      0    stevel 	mutex_enter(&lofi_lock);
    257      0    stevel 	for (minor = 1; minor <= lofi_max_files; minor++) {
    258      0    stevel 		if (ddi_get_soft_state(lofi_statep, minor) != NULL) {
    259      0    stevel 			mutex_exit(&lofi_lock);
    260      0    stevel 			return (EBUSY);
    261      0    stevel 		}
    262      0    stevel 	}
    263      0    stevel 	mutex_exit(&lofi_lock);
    264      0    stevel 	return (0);
    265      0    stevel }
    266      0    stevel 
    267      0    stevel static int
    268      0    stevel is_opened(struct lofi_state *lsp)
    269      0    stevel {
    270      0    stevel 	ASSERT(mutex_owned(&lofi_lock));
    271      0    stevel 	return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count);
    272      0    stevel }
    273      0    stevel 
    274      0    stevel static int
    275      0    stevel mark_opened(struct lofi_state *lsp, int otyp)
    276      0    stevel {
    277      0    stevel 	ASSERT(mutex_owned(&lofi_lock));
    278      0    stevel 	switch (otyp) {
    279      0    stevel 	case OTYP_CHR:
    280      0    stevel 		lsp->ls_chr_open = 1;
    281      0    stevel 		break;
    282      0    stevel 	case OTYP_BLK:
    283      0    stevel 		lsp->ls_blk_open = 1;
    284      0    stevel 		break;
    285      0    stevel 	case OTYP_LYR:
    286      0    stevel 		lsp->ls_lyr_open_count++;
    287      0    stevel 		break;
    288      0    stevel 	default:
    289      0    stevel 		return (-1);
    290      0    stevel 	}
    291      0    stevel 	return (0);
    292      0    stevel }
    293      0    stevel 
    294      0    stevel static void
    295      0    stevel mark_closed(struct lofi_state *lsp, int otyp)
    296      0    stevel {
    297      0    stevel 	ASSERT(mutex_owned(&lofi_lock));
    298      0    stevel 	switch (otyp) {
    299      0    stevel 	case OTYP_CHR:
    300      0    stevel 		lsp->ls_chr_open = 0;
    301      0    stevel 		break;
    302      0    stevel 	case OTYP_BLK:
    303      0    stevel 		lsp->ls_blk_open = 0;
    304      0    stevel 		break;
    305      0    stevel 	case OTYP_LYR:
    306      0    stevel 		lsp->ls_lyr_open_count--;
    307      0    stevel 		break;
    308      0    stevel 	default:
    309      0    stevel 		break;
    310      0    stevel 	}
    311      0    stevel }
    312      0    stevel 
    313   4451  eschrock static void
    314   8313      Dina lofi_free_crypto(struct lofi_state *lsp)
    315   8313      Dina {
    316   8313      Dina 	ASSERT(mutex_owned(&lofi_lock));
    317   8313      Dina 
    318   8313      Dina 	if (lsp->ls_crypto_enabled) {
    319   8313      Dina 		/*
    320   8313      Dina 		 * Clean up the crypto state so that it doesn't hang around
    321   8313      Dina 		 * in memory after we are done with it.
    322   8313      Dina 		 */
    323   8313      Dina 		bzero(lsp->ls_key.ck_data,
    324   8313      Dina 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
    325   8313      Dina 		kmem_free(lsp->ls_key.ck_data,
    326   8313      Dina 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
    327   8313      Dina 		lsp->ls_key.ck_data = NULL;
    328   8313      Dina 		lsp->ls_key.ck_length = 0;
    329   8313      Dina 
    330   8313      Dina 		if (lsp->ls_mech.cm_param != NULL) {
    331   8313      Dina 			kmem_free(lsp->ls_mech.cm_param,
    332   8313      Dina 			    lsp->ls_mech.cm_param_len);
    333   8313      Dina 			lsp->ls_mech.cm_param = NULL;
    334   8313      Dina 			lsp->ls_mech.cm_param_len = 0;
    335   8313      Dina 		}
    336   8313      Dina 
    337   8313      Dina 		if (lsp->ls_iv_mech.cm_param != NULL) {
    338   8313      Dina 			kmem_free(lsp->ls_iv_mech.cm_param,
    339   8313      Dina 			    lsp->ls_iv_mech.cm_param_len);
    340   8313      Dina 			lsp->ls_iv_mech.cm_param = NULL;
    341   8313      Dina 			lsp->ls_iv_mech.cm_param_len = 0;
    342   8313      Dina 		}
    343   8313      Dina 
    344   8313      Dina 		mutex_destroy(&lsp->ls_crypto_lock);
    345   8313      Dina 	}
    346   8313      Dina }
    347   8313      Dina 
    348   8313      Dina static void
    349   4451  eschrock lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp,
    350   4451  eschrock     cred_t *credp)
    351   4451  eschrock {
    352   4451  eschrock 	dev_t	newdev;
    353   4451  eschrock 	char	namebuf[50];
    354   8313      Dina 
    355   8313      Dina 	ASSERT(mutex_owned(&lofi_lock));
    356   8313      Dina 
    357   8313      Dina 	lofi_free_crypto(lsp);
    358   4451  eschrock 
    359   4451  eschrock 	if (lsp->ls_vp) {
    360   5331       amw 		(void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag,
    361   5331       amw 		    1, 0, credp, NULL);
    362   4451  eschrock 		VN_RELE(lsp->ls_vp);
    363   4451  eschrock 		lsp->ls_vp = NULL;
    364   4451  eschrock 	}
    365   4451  eschrock 
    366   4451  eschrock 	newdev = makedevice(getmajor(dev), minor);
    367   4451  eschrock 	(void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
    368   4451  eschrock 	(void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
    369   4451  eschrock 
    370   4451  eschrock 	(void) snprintf(namebuf, sizeof (namebuf), "%d", minor);
    371   4451  eschrock 	ddi_remove_minor_node(lofi_dip, namebuf);
    372   4451  eschrock 	(void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor);
    373   4451  eschrock 	ddi_remove_minor_node(lofi_dip, namebuf);
    374   4451  eschrock 
    375   4451  eschrock 	kmem_free(lsp->ls_filename, lsp->ls_filename_sz);
    376   4451  eschrock 	taskq_destroy(lsp->ls_taskq);
    377   4451  eschrock 	if (lsp->ls_kstat) {
    378   4451  eschrock 		kstat_delete(lsp->ls_kstat);
    379   4451  eschrock 		mutex_destroy(&lsp->ls_kstat_lock);
    380   6791     aalok 	}
    381   6791     aalok 
    382   9048      jrgn 	/*
    383   9048      jrgn 	 * Free cached decompressed segment data
    384   9048      jrgn 	 */
    385   9048      jrgn 	lofi_free_comp_cache(lsp);
    386   9048      jrgn 	list_destroy(&lsp->ls_comp_cache);
    387   9048      jrgn 	mutex_destroy(&lsp->ls_comp_cache_lock);
    388   9048      jrgn 
    389   6791     aalok 	if (lsp->ls_uncomp_seg_sz > 0) {
    390   6791     aalok 		kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz);
    391   6791     aalok 		lsp->ls_uncomp_seg_sz = 0;
    392   4451  eschrock 	}
    393   9048      jrgn 
    394   9048      jrgn 	mutex_destroy(&lsp->ls_vp_lock);
    395   9048      jrgn 
    396   4451  eschrock 	ddi_soft_state_free(lofi_statep, minor);
    397   4451  eschrock }
    398   4451  eschrock 
    399   4451  eschrock /*ARGSUSED*/
    400      0    stevel static int
    401      0    stevel lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp)
    402      0    stevel {
    403      0    stevel 	minor_t	minor;
    404      0    stevel 	struct lofi_state *lsp;
    405      0    stevel 
    406      0    stevel 	mutex_enter(&lofi_lock);
    407      0    stevel 	minor = getminor(*devp);
    408      0    stevel 	if (minor == 0) {
    409      0    stevel 		/* master control device */
    410      0    stevel 		/* must be opened exclusively */
    411      0    stevel 		if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) {
    412      0    stevel 			mutex_exit(&lofi_lock);
    413      0    stevel 			return (EINVAL);
    414      0    stevel 		}
    415      0    stevel 		lsp = ddi_get_soft_state(lofi_statep, 0);
    416      0    stevel 		if (lsp == NULL) {
    417      0    stevel 			mutex_exit(&lofi_lock);
    418      0    stevel 			return (ENXIO);
    419      0    stevel 		}
    420      0    stevel 		if (is_opened(lsp)) {
    421      0    stevel 			mutex_exit(&lofi_lock);
    422      0    stevel 			return (EBUSY);
    423      0    stevel 		}
    424      0    stevel 		(void) mark_opened(lsp, OTYP_CHR);
    425      0    stevel 		mutex_exit(&lofi_lock);
    426      0    stevel 		return (0);
    427      0    stevel 	}
    428      0    stevel 
    429      0    stevel 	/* otherwise, the mapping should already exist */
    430      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, minor);
    431      0    stevel 	if (lsp == NULL) {
    432      0    stevel 		mutex_exit(&lofi_lock);
    433      0    stevel 		return (EINVAL);
    434      0    stevel 	}
    435      0    stevel 
    436   4451  eschrock 	if (lsp->ls_vp == NULL) {
    437   4451  eschrock 		mutex_exit(&lofi_lock);
    438   4451  eschrock 		return (ENXIO);
    439   4451  eschrock 	}
    440   4451  eschrock 
    441      0    stevel 	if (mark_opened(lsp, otyp) == -1) {
    442      0    stevel 		mutex_exit(&lofi_lock);
    443      0    stevel 		return (EINVAL);
    444      0    stevel 	}
    445      0    stevel 
    446      0    stevel 	mutex_exit(&lofi_lock);
    447      0    stevel 	return (0);
    448      0    stevel }
    449      0    stevel 
    450   4451  eschrock /*ARGSUSED*/
    451      0    stevel static int
    452      0    stevel lofi_close(dev_t dev, int flag, int otyp, struct cred *credp)
    453      0    stevel {
    454      0    stevel 	minor_t	minor;
    455      0    stevel 	struct lofi_state *lsp;
    456      0    stevel 
    457      0    stevel 	mutex_enter(&lofi_lock);
    458      0    stevel 	minor = getminor(dev);
    459      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, minor);
    460      0    stevel 	if (lsp == NULL) {
    461      0    stevel 		mutex_exit(&lofi_lock);
    462      0    stevel 		return (EINVAL);
    463      0    stevel 	}
    464      0    stevel 	mark_closed(lsp, otyp);
    465   4451  eschrock 
    466   4451  eschrock 	/*
    467   6734   johnlev 	 * If we forcibly closed the underlying device (li_force), or
    468   6734   johnlev 	 * asked for cleanup (li_cleanup), finish up if we're the last
    469   6734   johnlev 	 * out of the door.
    470   4451  eschrock 	 */
    471   6734   johnlev 	if (minor != 0 && !is_opened(lsp) &&
    472   6734   johnlev 	    (lsp->ls_cleanup || lsp->ls_vp == NULL))
    473   4451  eschrock 		lofi_free_handle(dev, minor, lsp, credp);
    474   6734   johnlev 
    475      0    stevel 	mutex_exit(&lofi_lock);
    476      0    stevel 	return (0);
    477      0    stevel }
    478      0    stevel 
    479   8313      Dina /*
    480   8313      Dina  * Sets the mechanism's initialization vector (IV) if one is needed.
    481   8313      Dina  * The IV is computed from the data block number.  lsp->ls_mech is
    482   8313      Dina  * altered so that:
    483   8313      Dina  *	lsp->ls_mech.cm_param_len is set to the IV len.
    484   8313      Dina  *	lsp->ls_mech.cm_param is set to the IV.
    485   8313      Dina  */
    486   8313      Dina static int
    487   8313      Dina lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno)
    488   8313      Dina {
    489   8313      Dina 	int	ret;
    490   8313      Dina 	crypto_data_t cdata;
    491   8313      Dina 	char	*iv;
    492   8313      Dina 	size_t	iv_len;
    493   8313      Dina 	size_t	min;
    494   8313      Dina 	void	*data;
    495   8313      Dina 	size_t	datasz;
    496   8313      Dina 
    497   8313      Dina 	ASSERT(mutex_owned(&lsp->ls_crypto_lock));
    498   8313      Dina 
    499   8313      Dina 	if (lsp == NULL)
    500   8313      Dina 		return (CRYPTO_DEVICE_ERROR);
    501   8313      Dina 
    502   8313      Dina 	/* lsp->ls_mech.cm_param{_len} has already been set for static iv */
    503   8313      Dina 	if (lsp->ls_iv_type == IVM_NONE) {
    504   8313      Dina 		return (CRYPTO_SUCCESS);
    505   8313      Dina 	}
    506   8313      Dina 
    507   8313      Dina 	/*
    508   8313      Dina 	 * if kmem already alloced from previous call and it's the same size
    509   8313      Dina 	 * we need now, just recycle it; allocate new kmem only if we have to
    510   8313      Dina 	 */
    511   8313      Dina 	if (lsp->ls_mech.cm_param == NULL ||
    512   8313      Dina 	    lsp->ls_mech.cm_param_len != lsp->ls_iv_len) {
    513   8313      Dina 		iv_len = lsp->ls_iv_len;
    514   8313      Dina 		iv = kmem_zalloc(iv_len, KM_SLEEP);
    515   8313      Dina 	} else {
    516   8313      Dina 		iv_len = lsp->ls_mech.cm_param_len;
    517   8313      Dina 		iv = lsp->ls_mech.cm_param;
    518   8313      Dina 		bzero(iv, iv_len);
    519   8313      Dina 	}
    520   8313      Dina 
    521   8313      Dina 	switch (lsp->ls_iv_type) {
    522   8313      Dina 	case IVM_ENC_BLKNO:
    523   8313      Dina 		/* iv is not static, lblkno changes each time */
    524   8313      Dina 		data = &lblkno;
    525   8313      Dina 		datasz = sizeof (lblkno);
    526   8313      Dina 		break;
    527   8313      Dina 	default:
    528   8313      Dina 		data = 0;
    529   8313      Dina 		datasz = 0;
    530   8313      Dina 		break;
    531   8313      Dina 	}
    532   8313      Dina 
    533   8313      Dina 	/*
    534   8313      Dina 	 * write blkno into the iv buffer padded on the left in case
    535   8313      Dina 	 * blkno ever grows bigger than its current longlong_t size
    536   8313      Dina 	 * or a variation other than blkno is used for the iv data
    537   8313      Dina 	 */
    538   8313      Dina 	min = MIN(datasz, iv_len);
    539   8313      Dina 	bcopy(data, iv + (iv_len - min), min);
    540   8313      Dina 
    541   8313      Dina 	/* encrypt the data in-place to get the IV */
    542   8313      Dina 	SETUP_C_DATA(cdata, iv, iv_len);
    543   8313      Dina 
    544   8313      Dina 	ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key,
    545   8313      Dina 	    NULL, NULL, NULL);
    546   8313      Dina 	if (ret != CRYPTO_SUCCESS) {
    547   8313      Dina 		cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)",
    548   8313      Dina 		    lblkno, ret);
    549   8313      Dina 		if (lsp->ls_mech.cm_param != iv)
    550   8313      Dina 			kmem_free(iv, iv_len);
    551   8996      Alok 
    552   8313      Dina 		return (ret);
    553   8313      Dina 	}
    554   8313      Dina 
    555   8313      Dina 	/* clean up the iv from the last computation */
    556   8313      Dina 	if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv)
    557   8313      Dina 		kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len);
    558   8996      Alok 
    559   8313      Dina 	lsp->ls_mech.cm_param_len = iv_len;
    560   8313      Dina 	lsp->ls_mech.cm_param = iv;
    561   8313      Dina 
    562   8313      Dina 	return (CRYPTO_SUCCESS);
    563   8313      Dina }
    564   8313      Dina 
    565   8313      Dina /*
    566   8313      Dina  * Performs encryption and decryption of a chunk of data of size "len",
    567   8313      Dina  * one DEV_BSIZE block at a time.  "len" is assumed to be a multiple of
    568   8313      Dina  * DEV_BSIZE.
    569   8313      Dina  */
    570   8313      Dina static int
    571   8313      Dina lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext,
    572   8313      Dina     caddr_t ciphertext, size_t len, boolean_t op_encrypt)
    573   8313      Dina {
    574   8313      Dina 	crypto_data_t cdata;
    575   8313      Dina 	crypto_data_t wdata;
    576   8313      Dina 	int ret;
    577   8313      Dina 	longlong_t lblkno = bp->b_lblkno;
    578   8313      Dina 
    579   8313      Dina 	mutex_enter(&lsp->ls_crypto_lock);
    580   8313      Dina 
    581   8313      Dina 	/*
    582   8313      Dina 	 * though we could encrypt/decrypt entire "len" chunk of data, we need
    583   8313      Dina 	 * to break it into DEV_BSIZE pieces to capture blkno incrementing
    584   8313      Dina 	 */
    585   8313      Dina 	SETUP_C_DATA(cdata, plaintext, len);
    586   8313      Dina 	cdata.cd_length = DEV_BSIZE;
    587   8313      Dina 	if (ciphertext != NULL) {		/* not in-place crypto */
    588   8313      Dina 		SETUP_C_DATA(wdata, ciphertext, len);
    589   8313      Dina 		wdata.cd_length = DEV_BSIZE;
    590   8313      Dina 	}
    591   8313      Dina 
    592   8313      Dina 	do {
    593   8313      Dina 		ret = lofi_blk_mech(lsp, lblkno);
    594   8313      Dina 		if (ret != CRYPTO_SUCCESS)
    595   8313      Dina 			continue;
    596   8313      Dina 
    597   8313      Dina 		if (op_encrypt) {
    598   8313      Dina 			ret = crypto_encrypt(&lsp->ls_mech, &cdata,
    599   8313      Dina 			    &lsp->ls_key, NULL,
    600   8313      Dina 			    ((ciphertext != NULL) ? &wdata : NULL), NULL);
    601   8313      Dina 		} else {
    602   8313      Dina 			ret = crypto_decrypt(&lsp->ls_mech, &cdata,
    603   8313      Dina 			    &lsp->ls_key, NULL,
    604   8313      Dina 			    ((ciphertext != NULL) ? &wdata : NULL), NULL);
    605   8313      Dina 		}
    606   8313      Dina 
    607   8313      Dina 		cdata.cd_offset += DEV_BSIZE;
    608   8313      Dina 		if (ciphertext != NULL)
    609   8313      Dina 			wdata.cd_offset += DEV_BSIZE;
    610   8313      Dina 		lblkno++;
    611   8313      Dina 	} while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len);
    612   8313      Dina 
    613   8313      Dina 	mutex_exit(&lsp->ls_crypto_lock);
    614   8313      Dina 
    615   8313      Dina 	if (ret != CRYPTO_SUCCESS) {
    616   8313      Dina 		cmn_err(CE_WARN, "%s failed for block %lld:  (0x%x)",
    617   8313      Dina 		    op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()",
    618   8313      Dina 		    lblkno, ret);
    619   8313      Dina 	}
    620   8313      Dina 
    621   8313      Dina 	return (ret);
    622   8313      Dina }
    623   8313      Dina 
    624   8313      Dina #define	RDWR_RAW	1
    625   8313      Dina #define	RDWR_BCOPY	2
    626   8313      Dina 
    627   8313      Dina static int
    628   8313      Dina lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
    629   8313      Dina     struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn)
    630   8313      Dina {
    631   8313      Dina 	ssize_t resid;
    632   8313      Dina 	int isread;
    633   8313      Dina 	int error;
    634   8313      Dina 
    635   8313      Dina 	/*
    636   8313      Dina 	 * Handles reads/writes for both plain and encrypted lofi
    637   8313      Dina 	 * Note:  offset is already shifted by lsp->ls_crypto_offset
    638   8313      Dina 	 * when it gets here.
    639   8313      Dina 	 */
    640   8313      Dina 
    641   8313      Dina 	isread = bp->b_flags & B_READ;
    642   8313      Dina 	if (isread) {
    643   8313      Dina 		if (method == RDWR_BCOPY) {
    644   8313      Dina 			/* DO NOT update bp->b_resid for bcopy */
    645   8313      Dina 			bcopy(bcopy_locn, bufaddr, len);
    646   8313      Dina 			error = 0;
    647   8313      Dina 		} else {		/* RDWR_RAW */
    648   8313      Dina 			error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len,
    649   8313      Dina 			    offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred,
    650   8313      Dina 			    &resid);
    651   8313      Dina 			bp->b_resid = resid;
    652   8313      Dina 		}
    653   8313      Dina 		if (lsp->ls_crypto_enabled && error == 0) {
    654   8313      Dina 			if (lofi_crypto(lsp, bp, bufaddr, NULL, len,
    655   8313      Dina 			    B_FALSE) != CRYPTO_SUCCESS) {
    656   8313      Dina 				/*
    657   8313      Dina 				 * XXX: original code didn't set residual
    658   8313      Dina 				 * back to len because no error was expected
    659   8313      Dina 				 * from bcopy() if encryption is not enabled
    660   8313      Dina 				 */
    661   8313      Dina 				if (method != RDWR_BCOPY)
    662   8313      Dina 					bp->b_resid = len;
    663   8313      Dina 				error = EIO;
    664   8313      Dina 			}
    665   8313      Dina 		}
    666   8313      Dina 		return (error);
    667   8313      Dina 	} else {
    668   8313      Dina 		void *iobuf = bufaddr;
    669   8313      Dina 
    670   8313      Dina 		if (lsp->ls_crypto_enabled) {
    671   8313      Dina 			/* don't do in-place crypto to keep bufaddr intact */
    672   8313      Dina 			iobuf = kmem_alloc(len, KM_SLEEP);
    673   8313      Dina 			if (lofi_crypto(lsp, bp, bufaddr, iobuf, len,
    674   8313      Dina 			    B_TRUE) != CRYPTO_SUCCESS) {
    675   8313      Dina 				kmem_free(iobuf, len);
    676   8313      Dina 				if (method != RDWR_BCOPY)
    677   8313      Dina 					bp->b_resid = len;
    678   8313      Dina 				return (EIO);
    679   8313      Dina 			}
    680   8313      Dina 		}
    681   8313      Dina 		if (method == RDWR_BCOPY) {
    682   8313      Dina 			/* DO NOT update bp->b_resid for bcopy */
    683   8313      Dina 			bcopy(iobuf, bcopy_locn, len);
    684   8313      Dina 			error = 0;
    685   8313      Dina 		} else {		/* RDWR_RAW */
    686   8313      Dina 			error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len,
    687   8313      Dina 			    offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred,
    688   8313      Dina 			    &resid);
    689   8313      Dina 			bp->b_resid = resid;
    690   8313      Dina 		}
    691   8313      Dina 		if (lsp->ls_crypto_enabled) {
    692   8313      Dina 			kmem_free(iobuf, len);
    693   8313      Dina 		}
    694   8313      Dina 		return (error);
    695   8313      Dina 	}
    696   8313      Dina }
    697   8313      Dina 
    698   5643     aalok static int
    699   5643     aalok lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp,
    700   8313      Dina     struct lofi_state *lsp)
    701   5643     aalok {
    702   5643     aalok 	int error;
    703   5643     aalok 	offset_t alignedoffset, mapoffset;
    704   5643     aalok 	size_t	xfersize;
    705   5643     aalok 	int	isread;
    706   8313      Dina 	int	smflags;
    707   5643     aalok 	caddr_t	mapaddr;
    708   5643     aalok 	size_t	len;
    709   5643     aalok 	enum seg_rw srw;
    710   8313      Dina 	int	save_error;
    711   8313      Dina 
    712   8313      Dina 	/*
    713   8313      Dina 	 * Note:  offset is already shifted by lsp->ls_crypto_offset
    714   8313      Dina 	 * when it gets here.
    715   8313      Dina 	 */
    716   8313      Dina 	if (lsp->ls_crypto_enabled)
    717   8313      Dina 		ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size);
    718   5643     aalok 
    719   5643     aalok 	/*
    720   5643     aalok 	 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on
    721   5643     aalok 	 * an 8K boundary, but the buf transfer address may not be
    722   5643     aalok 	 * aligned on more than a 512-byte boundary (we don't enforce
    723   5643     aalok 	 * that even though we could). This matters since the initial
    724   5643     aalok 	 * part of the transfer may not start at offset 0 within the
    725   5643     aalok 	 * segmap'd chunk. So we have to compensate for that with
    726   5643     aalok 	 * 'mapoffset'. Subsequent chunks always start off at the
    727   5643     aalok 	 * beginning, and the last is capped by b_resid
    728   8313      Dina 	 *
    729   8313      Dina 	 * Visually, where "|" represents page map boundaries:
    730   8313      Dina 	 *   alignedoffset (mapaddr begins at this segmap boundary)
    731   8313      Dina 	 *    |   offset (from beginning of file)
    732   8313      Dina 	 *    |    |	   len
    733   8313      Dina 	 *    v    v	    v
    734   8313      Dina 	 * ===|====X========|====...======|========X====|====
    735   8313      Dina 	 *	   /-------------...---------------/
    736   8313      Dina 	 *		^ bp->b_bcount/bp->b_resid at start
    737   8313      Dina 	 *    /----/--------/----...------/--------/
    738   8313      Dina 	 *	^	^	^   ^		^
    739   8313      Dina 	 *	|	|	|   |		nth xfersize (<= MAXBSIZE)
    740   8313      Dina 	 *	|	|	2nd thru n-1st xfersize (= MAXBSIZE)
    741   8313      Dina 	 *	|	1st xfersize (<= MAXBSIZE)
    742   8313      Dina 	 *    mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter)
    743   8313      Dina 	 *
    744   8313      Dina 	 * Notes: "alignedoffset" is "offset" rounded down to nearest
    745   8313      Dina 	 * MAXBSIZE boundary.  "len" is next page boundary of size
    746   8719      Dina 	 * PAGESIZE after "alignedoffset".
    747   5643     aalok 	 */
    748   5643     aalok 	mapoffset = offset & MAXBOFFSET;
    749   5643     aalok 	alignedoffset = offset - mapoffset;
    750   5643     aalok 	bp->b_resid = bp->b_bcount;
    751   5643     aalok 	isread = bp->b_flags & B_READ;
    752   5643     aalok 	srw = isread ? S_READ : S_WRITE;
    753   5643     aalok 	do {
    754   5643     aalok 		xfersize = MIN(lsp->ls_vp_comp_size - offset,
    755   5643     aalok 		    MIN(MAXBSIZE - mapoffset, bp->b_resid));
    756   8719      Dina 		len = roundup(mapoffset + xfersize, PAGESIZE);
    757   5643     aalok 		mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp,
    758   5643     aalok 		    alignedoffset, MAXBSIZE, 1, srw);
    759   5643     aalok 		/*
    760   5643     aalok 		 * Now fault in the pages. This lets us check
    761   5643     aalok 		 * for errors before we reference mapaddr and
    762   5643     aalok 		 * try to resolve the fault in bcopy (which would
    763   5643     aalok 		 * panic instead). And this can easily happen,
    764   5643     aalok 		 * particularly if you've lofi'd a file over NFS
    765   5643     aalok 		 * and someone deletes the file on the server.
    766   5643     aalok 		 */
    767   5643     aalok 		error = segmap_fault(kas.a_hat, segkmap, mapaddr,
    768   5643     aalok 		    len, F_SOFTLOCK, srw);
    769   5643     aalok 		if (error) {
    770   5643     aalok 			(void) segmap_release(segkmap, mapaddr, 0);
    771   5643     aalok 			if (FC_CODE(error) == FC_OBJERR)
    772   5643     aalok 				error = FC_ERRNO(error);
    773   5643     aalok 			else
    774   5643     aalok 				error = EIO;
    775   5643     aalok 			break;
    776   5643     aalok 		}
    777   8313      Dina 		/* error may be non-zero for encrypted lofi */
    778   8313      Dina 		error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize,
    779   8313      Dina 		    RDWR_BCOPY, mapaddr + mapoffset);
    780   8313      Dina 		if (error == 0) {
    781   8313      Dina 			bp->b_resid -= xfersize;
    782   8313      Dina 			bufaddr += xfersize;
    783   8313      Dina 			offset += xfersize;
    784   8313      Dina 		}
    785   5643     aalok 		smflags = 0;
    786   5643     aalok 		if (isread) {
    787   5643     aalok 			smflags |= SM_FREE;
    788   5643     aalok 			/*
    789   5643     aalok 			 * If we're reading an entire page starting
    790   5643     aalok 			 * at a page boundary, there's a good chance
    791   5643     aalok 			 * we won't need it again. Put it on the
    792   5643     aalok 			 * head of the freelist.
    793   5643     aalok 			 */
    794   8056      Dina 			if (mapoffset == 0 && xfersize == MAXBSIZE)
    795   5643     aalok 				smflags |= SM_DONTNEED;
    796   5643     aalok 		} else {
    797   8313      Dina 			if (error == 0)		/* write back good pages */
    798   8313      Dina 				smflags |= SM_WRITE;
    799   5643     aalok 		}
    800   5643     aalok 		(void) segmap_fault(kas.a_hat, segkmap, mapaddr,
    801   5643     aalok 		    len, F_SOFTUNLOCK, srw);
    802   8313      Dina 		save_error = segmap_release(segkmap, mapaddr, smflags);
    803   8313      Dina 		if (error == 0)
    804   8313      Dina 			error = save_error;
    805   5643     aalok 		/* only the first map may start partial */
    806   5643     aalok 		mapoffset = 0;
    807   5643     aalok 		alignedoffset += MAXBSIZE;
    808   5643     aalok 	} while ((error == 0) && (bp->b_resid > 0) &&
    809   5643     aalok 	    (offset < lsp->ls_vp_comp_size));
    810   5643     aalok 
    811   5643     aalok 	return (error);
    812   5643     aalok }
    813   5643     aalok 
    814   9048      jrgn /*
    815   9048      jrgn  * Check if segment seg_index is present in the decompressed segment
    816   9048      jrgn  * data cache.
    817   9048      jrgn  *
    818   9048      jrgn  * Returns a pointer to the decompressed segment data cache entry if
    819   9048      jrgn  * found, and NULL when decompressed data for this segment is not yet
    820   9048      jrgn  * cached.
    821   9048      jrgn  */
    822   9048      jrgn static struct lofi_comp_cache *
    823   9048      jrgn lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index)
    824   9048      jrgn {
    825   9048      jrgn 	struct lofi_comp_cache *lc;
    826   9048      jrgn 
    827   9048      jrgn 	ASSERT(mutex_owned(&lsp->ls_comp_cache_lock));
    828   9048      jrgn 
    829   9048      jrgn 	for (lc = list_head(&lsp->ls_comp_cache); lc != NULL;
    830   9048      jrgn 	    lc = list_next(&lsp->ls_comp_cache, lc)) {
    831   9048      jrgn 		if (lc->lc_index == seg_index) {
    832   9048      jrgn 			/*
    833   9048      jrgn 			 * Decompressed segment data was found in the
    834   9048      jrgn 			 * cache.
    835   9048      jrgn 			 *
    836   9048      jrgn 			 * The cache uses an LRU replacement strategy;
    837   9048      jrgn 			 * move the entry to head of list.
    838   9048      jrgn 			 */
    839   9048      jrgn 			list_remove(&lsp->ls_comp_cache, lc);
    840   9048      jrgn 			list_insert_head(&lsp->ls_comp_cache, lc);
    841   9048      jrgn 			return (lc);
    842   9048      jrgn 		}
    843   9048      jrgn 	}
    844   9048      jrgn 	return (NULL);
    845   9048      jrgn }
    846   9048      jrgn 
    847   9048      jrgn /*
    848   9048      jrgn  * Add the data for a decompressed segment at segment index
    849   9048      jrgn  * seg_index to the cache of the decompressed segments.
    850   9048      jrgn  *
    851   9048      jrgn  * Returns a pointer to the cache element structure in case
    852   9048      jrgn  * the data was added to the cache; returns NULL when the data
    853   9048      jrgn  * wasn't cached.
    854   9048      jrgn  */
    855   9048      jrgn static struct lofi_comp_cache *
    856   9048      jrgn lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index,
    857   9048      jrgn     uchar_t *data)
    858   9048      jrgn {
    859   9048      jrgn 	struct lofi_comp_cache *lc;
    860   9048      jrgn 
    861   9048      jrgn 	ASSERT(mutex_owned(&lsp->ls_comp_cache_lock));
    862   9048      jrgn 
    863   9048      jrgn 	while (lsp->ls_comp_cache_count > lofi_max_comp_cache) {
    864   9048      jrgn 		lc = list_remove_tail(&lsp->ls_comp_cache);
    865   9048      jrgn 		ASSERT(lc != NULL);
    866   9048      jrgn 		kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz);
    867   9048      jrgn 		kmem_free(lc, sizeof (struct lofi_comp_cache));
    868   9048      jrgn 		lsp->ls_comp_cache_count--;
    869   9048      jrgn 	}
    870   9048      jrgn 
    871   9048      jrgn 	/*
    872   9048      jrgn 	 * Do not cache when disabled by tunable variable
    873   9048      jrgn 	 */
    874   9048      jrgn 	if (lofi_max_comp_cache == 0)
    875   9048      jrgn 		return (NULL);
    876   9048      jrgn 
    877   9048      jrgn 	/*
    878   9048      jrgn 	 * When the cache has not yet reached the maximum allowed
    879   9048      jrgn 	 * number of segments, allocate a new cache element.
    880   9048      jrgn 	 * Otherwise the cache is full; reuse the last list element
    881   9048      jrgn 	 * (LRU) for caching the decompressed segment data.
    882   9048      jrgn 	 *
    883   9048      jrgn 	 * The cache element for the new decompressed segment data is
    884   9048      jrgn 	 * added to the head of the list.
    885   9048      jrgn 	 */
    886   9048      jrgn 	if (lsp->ls_comp_cache_count < lofi_max_comp_cache) {
    887   9048      jrgn 		lc = kmem_alloc(sizeof (struct lofi_comp_cache), KM_SLEEP);
    888   9048      jrgn 		lc->lc_data = NULL;
    889   9048      jrgn 		list_insert_head(&lsp->ls_comp_cache, lc);
    890   9048      jrgn 		lsp->ls_comp_cache_count++;
    891   9048      jrgn 	} else {
    892   9048      jrgn 		lc = list_remove_tail(&lsp->ls_comp_cache);
    893   9048      jrgn 		if (lc == NULL)
    894   9048      jrgn 			return (NULL);
    895   9048      jrgn 		list_insert_head(&lsp->ls_comp_cache, lc);
    896   9048      jrgn 	}
    897   9048      jrgn 
    898   9048      jrgn 	/*
    899   9048      jrgn 	 * Free old uncompressed segment data when reusing a cache
    900   9048      jrgn 	 * entry.
    901   9048      jrgn 	 */
    902   9048      jrgn 	if (lc->lc_data != NULL)
    903   9048      jrgn 		kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz);
    904   9048      jrgn 
    905   9048      jrgn 	lc->lc_data = data;
    906   9048      jrgn 	lc->lc_index = seg_index;
    907   9048      jrgn 	return (lc);
    908   9048      jrgn }
    909   9048      jrgn 
    910   9048      jrgn 
    911   5643     aalok /*ARGSUSED*/
    912   8996      Alok static int
    913   8996      Alok gzip_decompress(void *src, size_t srclen, void *dst,
    914   5643     aalok     size_t *dstlen, int level)
    915   5643     aalok {
    916   5643     aalok 	ASSERT(*dstlen >= srclen);
    917   5643     aalok 
    918   5643     aalok 	if (z_uncompress(dst, dstlen, src, srclen) != Z_OK)
    919   5643     aalok 		return (-1);
    920   8996      Alok 	return (0);
    921   8996      Alok }
    922   8996      Alok 
    923   8996      Alok #define	LZMA_HEADER_SIZE	(LZMA_PROPS_SIZE + 8)
    924   8996      Alok /*ARGSUSED*/
    925   8996      Alok static int
    926   8996      Alok lzma_decompress(void *src, size_t srclen, void *dst,
    927   8996      Alok 	size_t *dstlen, int level)
    928   8996      Alok {
    929   8996      Alok 	size_t insizepure;
    930   8996      Alok 	void *actual_src;
    931   8996      Alok 	ELzmaStatus status;
    932   8996      Alok 
    933   8996      Alok 	insizepure = srclen - LZMA_HEADER_SIZE;
    934   8996      Alok 	actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE);
    935   8996      Alok 
    936   8996      Alok 	if (LzmaDecode((Byte *)dst, (size_t *)dstlen,
    937   8996      Alok 	    (const Byte *)actual_src, &insizepure,
    938   8996      Alok 	    (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status,
    939   8996      Alok 	    &g_Alloc) != SZ_OK) {
    940   8996      Alok 		return (-1);
    941   8996      Alok 	}
    942   5643     aalok 	return (0);
    943   5643     aalok }
    944   5643     aalok 
    945      0    stevel /*
    946      0    stevel  * This is basically what strategy used to be before we found we
    947      0    stevel  * needed task queues.
    948      0    stevel  */
    949      0    stevel static void
    950      0    stevel lofi_strategy_task(void *arg)
    951      0    stevel {
    952      0    stevel 	struct buf *bp = (struct buf *)arg;
    953      0    stevel 	int error;
    954      0    stevel 	struct lofi_state *lsp;
    955   8313      Dina 	offset_t offset;
    956   8313      Dina 	caddr_t	bufaddr;
    957   8313      Dina 	size_t	len;
    958   8313      Dina 	size_t	xfersize;
    959   8313      Dina 	boolean_t bufinited = B_FALSE;
    960      0    stevel 
    961      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
    962   8313      Dina 	if (lsp == NULL) {
    963   8313      Dina 		error = ENXIO;
    964   8313      Dina 		goto errout;
    965   8313      Dina 	}
    966      0    stevel 	if (lsp->ls_kstat) {
    967      0    stevel 		mutex_enter(lsp->ls_kstat->ks_lock);
    968      0    stevel 		kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat));
    969      0    stevel 		mutex_exit(lsp->ls_kstat->ks_lock);
    970      0    stevel 	}
    971      0    stevel 	bp_mapin(bp);
    972      0    stevel 	bufaddr = bp->b_un.b_addr;
    973      0    stevel 	offset = bp->b_lblkno * DEV_BSIZE;	/* offset within file */
    974   8313      Dina 	if (lsp->ls_crypto_enabled) {
    975   8313      Dina 		/* encrypted data really begins after crypto header */
    976   8313      Dina 		offset += lsp->ls_crypto_offset;
    977   8313      Dina 	}
    978   8313      Dina 	len = bp->b_bcount;
    979   8313      Dina 	bufinited = B_TRUE;
    980   8313      Dina 
    981   8313      Dina 	if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
    982   8313      Dina 		error = EIO;
    983   8313      Dina 		goto errout;
    984   8313      Dina 	}
    985      0    stevel 
    986      0    stevel 	/*
    987      0    stevel 	 * We used to always use vn_rdwr here, but we cannot do that because
    988      0    stevel 	 * we might decide to read or write from the the underlying
    989      0    stevel 	 * file during this call, which would be a deadlock because
    990      0    stevel 	 * we have the rw_lock. So instead we page, unless it's not
    991   8313      Dina 	 * mapable or it's a character device or it's an encrypted lofi.
    992      0    stevel 	 */
    993   8313      Dina 	if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) ||
    994   8313      Dina 	    lsp->ls_crypto_enabled) {
    995   8313      Dina 		error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW,
    996   8313      Dina 		    NULL);
    997   8313      Dina 	} else if (lsp->ls_uncomp_seg_sz == 0) {
    998   8313      Dina 		error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp);
    999   8313      Dina 	} else {
   1000   9048      jrgn 		uchar_t *compressed_seg = NULL, *cmpbuf;
   1001   9048      jrgn 		uchar_t *uncompressed_seg = NULL;
   1002   8313      Dina 		lofi_compress_info_t *li;
   1003   8313      Dina 		size_t oblkcount;
   1004   9048      jrgn 		ulong_t seglen;
   1005   8313      Dina 		uint64_t sblkno, eblkno, cmpbytes;
   1006   9048      jrgn 		uint64_t uncompressed_seg_index;
   1007   9048      jrgn 		struct lofi_comp_cache *lc;
   1008   8313      Dina 		offset_t sblkoff, eblkoff;
   1009   8313      Dina 		u_offset_t salign, ealign;
   1010   8313      Dina 		u_offset_t sdiff;
   1011   8313      Dina 		uint32_t comp_data_sz;
   1012   5643     aalok 		uint64_t i;
   1013   5643     aalok 
   1014   5643     aalok 		/*
   1015   5643     aalok 		 * From here on we're dealing primarily with compressed files
   1016   5643     aalok 		 */
   1017   8313      Dina 		ASSERT(!lsp->ls_crypto_enabled);
   1018   5643     aalok 
   1019   5643     aalok 		/*
   1020   5643     aalok 		 * Compressed files can only be read from and
   1021   5643     aalok 		 * not written to
   1022   5643     aalok 		 */
   1023   5643     aalok 		if (!(bp->b_flags & B_READ)) {
   1024   5643     aalok 			bp->b_resid = bp->b_bcount;
   1025   5643     aalok 			error = EROFS;
   1026   5643     aalok 			goto done;
   1027   5643     aalok 		}
   1028   5643     aalok 
   1029   5643     aalok 		ASSERT(lsp->ls_comp_algorithm_index >= 0);
   1030   5643     aalok 		li = &lofi_compress_table[lsp->ls_comp_algorithm_index];
   1031   5643     aalok 		/*
   1032   5643     aalok 		 * Compute starting and ending compressed segment numbers
   1033   5643     aalok 		 * We use only bitwise operations avoiding division and
   1034   5643     aalok 		 * modulus because we enforce the compression segment size
   1035   5643     aalok 		 * to a power of 2
   1036   5643     aalok 		 */
   1037   5643     aalok 		sblkno = offset >> lsp->ls_comp_seg_shift;
   1038   5643     aalok 		sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1);
   1039   5643     aalok 		eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift;
   1040   5643     aalok 		eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1);
   1041   9048      jrgn 
   1042   9048      jrgn 		/*
   1043   9048      jrgn 		 * Check the decompressed segment cache.
   1044   9048      jrgn 		 *
   1045   9048      jrgn 		 * The cache is used only when the requested data
   1046   9048      jrgn 		 * is within a segment. Requests that cross
   1047   9048      jrgn 		 * segment boundaries bypass the cache.
   1048   9048      jrgn 		 */
   1049   9048      jrgn 		if (sblkno == eblkno ||
   1050   9048      jrgn 		    (sblkno + 1 == eblkno && eblkoff == 0)) {
   1051   9048      jrgn 			/*
   1052   9048      jrgn 			 * Request doesn't cross a segment boundary,
   1053   9048      jrgn 			 * now check the cache.
   1054   9048      jrgn 			 */
   1055   9048      jrgn 			mutex_enter(&lsp->ls_comp_cache_lock);
   1056   9048      jrgn 			lc = lofi_find_comp_data(lsp, sblkno);
   1057   9048      jrgn 			if (lc != NULL) {
   1058   9048      jrgn 				/*
   1059   9048      jrgn 				 * We've found the decompressed segment
   1060   9048      jrgn 				 * data in the cache; reuse it.
   1061   9048      jrgn 				 */
   1062   9048      jrgn 				bcopy(lc->lc_data + sblkoff, bufaddr,
   1063   9048      jrgn 				    bp->b_bcount);
   1064   9048      jrgn 				mutex_exit(&lsp->ls_comp_cache_lock);
   1065   9048      jrgn 				bp->b_resid = 0;
   1066   9048      jrgn 				error = 0;
   1067   9048      jrgn 				goto done;
   1068   9048      jrgn 			}
   1069   9048      jrgn 			mutex_exit(&lsp->ls_comp_cache_lock);
   1070   9048      jrgn 		}
   1071   5643     aalok 
   1072   5643     aalok 		/*
   1073   5643     aalok 		 * Align start offset to block boundary for segmap
   1074   5643     aalok 		 */
   1075   5643     aalok 		salign = lsp->ls_comp_seg_index[sblkno];
   1076   5643     aalok 		sdiff = salign & (DEV_BSIZE - 1);
   1077   5643     aalok 		salign -= sdiff;
   1078   5643     aalok 		if (eblkno >= (lsp->ls_comp_index_sz - 1)) {
   1079      0    stevel 			/*
   1080   5643     aalok 			 * We're dealing with the last segment of
   1081   5643     aalok 			 * the compressed file -- the size of this
   1082   5643     aalok 			 * segment *may not* be the same as the
   1083   5643     aalok 			 * segment size for the file
   1084      0    stevel 			 */
   1085   5643     aalok 			eblkoff = (offset + bp->b_bcount) &
   1086   5643     aalok 			    (lsp->ls_uncomp_last_seg_sz - 1);
   1087   5643     aalok 			ealign = lsp->ls_vp_comp_size;
   1088   5643     aalok 		} else {
   1089   5643     aalok 			ealign = lsp->ls_comp_seg_index[eblkno + 1];
   1090   5643     aalok 		}
   1091   5643     aalok 
   1092   5643     aalok 		/*
   1093   5643     aalok 		 * Preserve original request paramaters
   1094   5643     aalok 		 */
   1095   5643     aalok 		oblkcount = bp->b_bcount;
   1096   5643     aalok 
   1097   5643     aalok 		/*
   1098   5643     aalok 		 * Assign the calculated parameters
   1099   5643     aalok 		 */
   1100   5643     aalok 		comp_data_sz = ealign - salign;
   1101   5643     aalok 		bp->b_bcount = comp_data_sz;
   1102   5643     aalok 
   1103   5643     aalok 		/*
   1104   5643     aalok 		 * Allocate fixed size memory blocks to hold compressed
   1105   5643     aalok 		 * segments and one uncompressed segment since we
   1106   5643     aalok 		 * uncompress segments one at a time
   1107   5643     aalok 		 */
   1108   5643     aalok 		compressed_seg = kmem_alloc(bp->b_bcount, KM_SLEEP);
   1109   5643     aalok 		uncompressed_seg = kmem_alloc(lsp->ls_uncomp_seg_sz, KM_SLEEP);
   1110   5643     aalok 		/*
   1111   5643     aalok 		 * Map in the calculated number of blocks
   1112   5643     aalok 		 */
   1113   5643     aalok 		error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign,
   1114   5643     aalok 		    bp, lsp);
   1115   5643     aalok 
   1116   5643     aalok 		bp->b_bcount = oblkcount;
   1117   5643     aalok 		bp->b_resid = oblkcount;
   1118   5643     aalok 		if (error != 0)
   1119   5643     aalok 			goto done;
   1120   5643     aalok 
   1121   5643     aalok 		/*
   1122   5643     aalok 		 * We have the compressed blocks, now uncompress them
   1123   5643     aalok 		 */
   1124   5643     aalok 		cmpbuf = compressed_seg + sdiff;
   1125   8996      Alok 		for (i = sblkno; i <= eblkno; i++) {
   1126   8996      Alok 			ASSERT(i < lsp->ls_comp_index_sz - 1);
   1127   8996      Alok 
   1128   8996      Alok 			/*
   1129   8996      Alok 			 * The last segment is special in that it is
   1130   8996      Alok 			 * most likely not going to be the same
   1131   8996      Alok 			 * (uncompressed) size as the other segments.
   1132   8996      Alok 			 */
   1133   8996      Alok 			if (i == (lsp->ls_comp_index_sz - 2)) {
   1134   8996      Alok 				seglen = lsp->ls_uncomp_last_seg_sz;
   1135   8996      Alok 			} else {
   1136   8996      Alok 				seglen = lsp->ls_uncomp_seg_sz;
   1137   8996      Alok 			}
   1138   8996      Alok 
   1139   5643     aalok 			/*
   1140   5643     aalok 			 * Each of the segment index entries contains
   1141   5643     aalok 			 * the starting block number for that segment.
   1142   5643     aalok 			 * The number of compressed bytes in a segment
   1143   5643     aalok 			 * is thus the difference between the starting
   1144   5643     aalok 			 * block number of this segment and the starting
   1145   5643     aalok 			 * block number of the next segment.
   1146   5643     aalok 			 */
   1147   8996      Alok 			cmpbytes = lsp->ls_comp_seg_index[i + 1] -
   1148   8996      Alok 			    lsp->ls_comp_seg_index[i];
   1149   5643     aalok 
   1150   5643     aalok 			/*
   1151   5643     aalok 			 * The first byte in a compressed segment is a flag
   1152   5643     aalok 			 * that indicates whether this segment is compressed
   1153   5643     aalok 			 * at all
   1154   5643     aalok 			 */
   1155   5643     aalok 			if (*cmpbuf == UNCOMPRESSED) {
   1156   5643     aalok 				bcopy((cmpbuf + SEGHDR), uncompressed_seg,
   1157   5643     aalok 				    (cmpbytes - SEGHDR));
   1158   5643     aalok 			} else {
   1159   5643     aalok 				if (li->l_decompress((cmpbuf + SEGHDR),
   1160   5643     aalok 				    (cmpbytes - SEGHDR), uncompressed_seg,
   1161   5643     aalok 				    &seglen, li->l_level) != 0) {
   1162   5643     aalok 					error = EIO;
   1163   5643     aalok 					goto done;
   1164   5643     aalok 				}
   1165   5643     aalok 			}
   1166   5643     aalok 
   1167   9048      jrgn 			uncompressed_seg_index = i;
   1168   9048      jrgn 
   1169   5643     aalok 			/*
   1170   5643     aalok 			 * Determine how much uncompressed data we
   1171   5643     aalok 			 * have to copy and copy it
   1172   5643     aalok 			 */
   1173   5643     aalok 			xfersize = lsp->ls_uncomp_seg_sz - sblkoff;
   1174   8996      Alok 			if (i == eblkno)
   1175   8996      Alok 				xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff);
   1176   5643     aalok 
   1177   5643     aalok 			bcopy((uncompressed_seg + sblkoff), bufaddr, xfersize);
   1178   5643     aalok 
   1179   5643     aalok 			cmpbuf += cmpbytes;
   1180   5643     aalok 			bufaddr += xfersize;
   1181   5643     aalok 			bp->b_resid -= xfersize;
   1182   5643     aalok 			sblkoff = 0;
   1183   5643     aalok 
   1184   5643     aalok 			if (bp->b_resid == 0)
   1185      0    stevel 				break;
   1186   5643     aalok 		}
   1187   9048      jrgn 
   1188   9048      jrgn 		/*
   1189   9048      jrgn 		 * Add the data for the last decopressed segment to
   1190   9048      jrgn 		 * the cache.
   1191   9048      jrgn 		 *
   1192   9048      jrgn 		 * In case the uncompressed segment data was added to (and
   1193   9048      jrgn 		 * is referenced by) the cache, make sure we don't free it
   1194   9048      jrgn 		 * here.
   1195   9048      jrgn 		 */
   1196   9048      jrgn 		mutex_enter(&lsp->ls_comp_cache_lock);
   1197   9048      jrgn 		if ((lc = lofi_add_comp_data(lsp, uncompressed_seg_index,
   1198   9048      jrgn 		    uncompressed_seg)) != NULL) {
   1199   9048      jrgn 			uncompressed_seg = NULL;
   1200   9048      jrgn 		}
   1201   9048      jrgn 		mutex_exit(&lsp->ls_comp_cache_lock);
   1202   9048      jrgn 
   1203   8313      Dina done:
   1204   8313      Dina 		if (compressed_seg != NULL)
   1205   8313      Dina 			kmem_free(compressed_seg, comp_data_sz);
   1206   8313      Dina 		if (uncompressed_seg != NULL)
   1207   8313      Dina 			kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz);
   1208   8313      Dina 	} /* end of handling compressed files */
   1209      0    stevel 
   1210   8313      Dina errout:
   1211   8313      Dina 	if (bufinited && lsp->ls_kstat) {
   1212      0    stevel 		size_t n_done = bp->b_bcount - bp->b_resid;
   1213      0    stevel 		kstat_io_t *kioptr;
   1214      0    stevel 
   1215      0    stevel 		mutex_enter(lsp->ls_kstat->ks_lock);
   1216      0    stevel 		kioptr = KSTAT_IO_PTR(lsp->ls_kstat);
   1217      0    stevel 		if (bp->b_flags & B_READ) {
   1218      0    stevel 			kioptr->nread += n_done;
   1219      0    stevel 			kioptr->reads++;
   1220      0    stevel 		} else {
   1221      0    stevel 			kioptr->nwritten += n_done;
   1222      0    stevel 			kioptr->writes++;
   1223      0    stevel 		}
   1224      0    stevel 		kstat_runq_exit(kioptr);
   1225      0    stevel 		mutex_exit(lsp->ls_kstat->ks_lock);
   1226      0    stevel 	}
   1227   4451  eschrock 
   1228   4451  eschrock 	mutex_enter(&lsp->ls_vp_lock);
   1229   4451  eschrock 	if (--lsp->ls_vp_iocount == 0)
   1230   4451  eschrock 		cv_broadcast(&lsp->ls_vp_cv);
   1231   4451  eschrock 	mutex_exit(&lsp->ls_vp_lock);
   1232   4451  eschrock 
   1233      0    stevel 	bioerror(bp, error);
   1234      0    stevel 	biodone(bp);
   1235      0    stevel }
   1236      0    stevel 
   1237      0    stevel static int
   1238      0    stevel lofi_strategy(struct buf *bp)
   1239      0    stevel {
   1240      0    stevel 	struct lofi_state *lsp;
   1241      0    stevel 	offset_t	offset;
   1242      0    stevel 
   1243      0    stevel 	/*
   1244      0    stevel 	 * We cannot just do I/O here, because the current thread
   1245      0    stevel 	 * _might_ end up back in here because the underlying filesystem
   1246      0    stevel 	 * wants a buffer, which eventually gets into bio_recycle and
   1247      0    stevel 	 * might call into lofi to write out a delayed-write buffer.
   1248      0    stevel 	 * This is bad if the filesystem above lofi is the same as below.
   1249      0    stevel 	 *
   1250      0    stevel 	 * We could come up with a complex strategy using threads to
   1251      0    stevel 	 * do the I/O asynchronously, or we could use task queues. task
   1252      0    stevel 	 * queues were incredibly easy so they win.
   1253      0    stevel 	 */
   1254      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, getminor(bp->b_edev));
   1255   8313      Dina 	if (lsp == NULL) {
   1256   8313      Dina 		bioerror(bp, ENXIO);
   1257   8313      Dina 		biodone(bp);
   1258   8313      Dina 		return (0);
   1259   8313      Dina 	}
   1260   8313      Dina 
   1261   4451  eschrock 	mutex_enter(&lsp->ls_vp_lock);
   1262   4451  eschrock 	if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) {
   1263   4451  eschrock 		bioerror(bp, EIO);
   1264   4451  eschrock 		biodone(bp);
   1265   4451  eschrock 		mutex_exit(&lsp->ls_vp_lock);
   1266   4451  eschrock 		return (0);
   1267   4451  eschrock 	}
   1268   4451  eschrock 
   1269      0    stevel 	offset = bp->b_lblkno * DEV_BSIZE;	/* offset within file */
   1270   8313      Dina 	if (lsp->ls_crypto_enabled) {
   1271   8313      Dina 		/* encrypted data really begins after crypto header */
   1272   8313      Dina 		offset += lsp->ls_crypto_offset;
   1273   8313      Dina 	}
   1274      0    stevel 	if (offset == lsp->ls_vp_size) {
   1275      0    stevel 		/* EOF */
   1276      0    stevel 		if ((bp->b_flags & B_READ) != 0) {
   1277      0    stevel 			bp->b_resid = bp->b_bcount;
   1278      0    stevel 			bioerror(bp, 0);
   1279      0    stevel 		} else {
   1280      0    stevel 			/* writes should fail */
   1281      0    stevel 			bioerror(bp, ENXIO);
   1282      0    stevel 		}
   1283      0    stevel 		biodone(bp);
   1284   4451  eschrock 		mutex_exit(&lsp->ls_vp_lock);
   1285      0    stevel 		return (0);
   1286      0    stevel 	}
   1287      0    stevel 	if (offset > lsp->ls_vp_size) {
   1288      0    stevel 		bioerror(bp, ENXIO);
   1289      0    stevel 		biodone(bp);
   1290   4451  eschrock 		mutex_exit(&lsp->ls_vp_lock);
   1291      0    stevel 		return (0);
   1292      0    stevel 	}
   1293   4451  eschrock 	lsp->ls_vp_iocount++;
   1294   4451  eschrock 	mutex_exit(&lsp->ls_vp_lock);
   1295   4451  eschrock 
   1296      0    stevel 	if (lsp->ls_kstat) {
   1297      0    stevel 		mutex_enter(lsp->ls_kstat->ks_lock);
   1298      0    stevel 		kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat));
   1299      0    stevel 		mutex_exit(lsp->ls_kstat->ks_lock);
   1300      0    stevel 	}
   1301      0    stevel 	(void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP);
   1302      0    stevel 	return (0);
   1303      0    stevel }
   1304      0    stevel 
   1305      0    stevel /*ARGSUSED2*/
   1306      0    stevel static int
   1307      0    stevel lofi_read(dev_t dev, struct uio *uio, struct cred *credp)
   1308      0    stevel {
   1309      0    stevel 	if (getminor(dev) == 0)
   1310      0    stevel 		return (EINVAL);
   1311   8313      Dina 	UIO_CHECK(uio);
   1312      0    stevel 	return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio));
   1313      0    stevel }
   1314      0    stevel 
   1315      0    stevel /*ARGSUSED2*/
   1316      0    stevel static int
   1317      0    stevel lofi_write(dev_t dev, struct uio *uio, struct cred *credp)
   1318      0    stevel {
   1319      0    stevel 	if (getminor(dev) == 0)
   1320      0    stevel 		return (EINVAL);
   1321   8313      Dina 	UIO_CHECK(uio);
   1322      0    stevel 	return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio));
   1323      0    stevel }
   1324      0    stevel 
   1325      0    stevel /*ARGSUSED2*/
   1326      0    stevel static int
   1327      0    stevel lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp)
   1328      0    stevel {
   1329      0    stevel 	if (getminor(dev) == 0)
   1330      0    stevel 		return (EINVAL);
   1331   8313      Dina 	UIO_CHECK(aio->aio_uio);
   1332      0    stevel 	return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio));
   1333      0    stevel }
   1334      0    stevel 
   1335      0    stevel /*ARGSUSED2*/
   1336      0    stevel static int
   1337      0    stevel lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp)
   1338      0    stevel {
   1339      0    stevel 	if (getminor(dev) == 0)
   1340      0    stevel 		return (EINVAL);
   1341   8313      Dina 	UIO_CHECK(aio->aio_uio);
   1342      0    stevel 	return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio));
   1343      0    stevel }
   1344      0    stevel 
   1345      0    stevel /*ARGSUSED*/
   1346      0    stevel static int
   1347      0    stevel lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
   1348      0    stevel {
   1349      0    stevel 	switch (infocmd) {
   1350      0    stevel 	case DDI_INFO_DEVT2DEVINFO:
   1351      0    stevel 		*result = lofi_dip;
   1352      0    stevel 		return (DDI_SUCCESS);
   1353      0    stevel 	case DDI_INFO_DEVT2INSTANCE:
   1354      0    stevel 		*result = 0;
   1355      0    stevel 		return (DDI_SUCCESS);
   1356      0    stevel 	}
   1357      0    stevel 	return (DDI_FAILURE);
   1358      0    stevel }
   1359      0    stevel 
   1360      0    stevel static int
   1361      0    stevel lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
   1362      0    stevel {
   1363      0    stevel 	int	error;
   1364      0    stevel 
   1365      0    stevel 	if (cmd != DDI_ATTACH)
   1366      0    stevel 		return (DDI_FAILURE);
   1367      0    stevel 	error = ddi_soft_state_zalloc(lofi_statep, 0);
   1368      0    stevel 	if (error == DDI_FAILURE) {
   1369      0    stevel 		return (DDI_FAILURE);
   1370      0    stevel 	}
   1371      0    stevel 	error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0,
   1372      0    stevel 	    DDI_PSEUDO, NULL);
   1373      0    stevel 	if (error == DDI_FAILURE) {
   1374      0    stevel 		ddi_soft_state_free(lofi_statep, 0);
   1375      0    stevel 		return (DDI_FAILURE);
   1376      0    stevel 	}
   1377   5084   johnlev 	/* driver handles kernel-issued IOCTLs */
   1378   5084   johnlev 	if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
   1379   5084   johnlev 	    DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
   1380   5084   johnlev 		ddi_remove_minor_node(dip, NULL);
   1381   5084   johnlev 		ddi_soft_state_free(lofi_statep, 0);
   1382   5084   johnlev 		return (DDI_FAILURE);
   1383   5084   johnlev 	}
   1384      0    stevel 	lofi_dip = dip;
   1385      0    stevel 	ddi_report_dev(dip);
   1386      0    stevel 	return (DDI_SUCCESS);
   1387      0    stevel }
   1388      0    stevel 
   1389      0    stevel static int
   1390      0    stevel lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
   1391      0    stevel {
   1392      0    stevel 	if (cmd != DDI_DETACH)
   1393      0    stevel 		return (DDI_FAILURE);
   1394      0    stevel 	if (lofi_busy())
   1395      0    stevel 		return (DDI_FAILURE);
   1396      0    stevel 	lofi_dip = NULL;
   1397      0    stevel 	ddi_remove_minor_node(dip, NULL);
   1398   5084   johnlev 	ddi_prop_remove_all(dip);
   1399      0    stevel 	ddi_soft_state_free(lofi_statep, 0);
   1400      0    stevel 	return (DDI_SUCCESS);
   1401      0    stevel }
   1402      0    stevel 
   1403      0    stevel /*
   1404   8313      Dina  * With addition of encryption, be careful that encryption key is wiped before
   1405   8313      Dina  * kernel memory structures are freed, and also that key is not accidentally
   1406   8313      Dina  * passed out into userland structures.
   1407   8313      Dina  */
   1408   8313      Dina static void
   1409   8313      Dina free_lofi_ioctl(struct lofi_ioctl *klip)
   1410   8313      Dina {
   1411   8313      Dina 	/* Make sure this encryption key doesn't stick around */
   1412   8313      Dina 	bzero(klip->li_key, sizeof (klip->li_key));
   1413   8313      Dina 	kmem_free(klip, sizeof (struct lofi_ioctl));
   1414   8313      Dina }
   1415   8313      Dina 
   1416   8313      Dina /*
   1417      0    stevel  * These two just simplify the rest of the ioctls that need to copyin/out
   1418      0    stevel  * the lofi_ioctl structure.
   1419      0    stevel  */
   1420      0    stevel struct lofi_ioctl *
   1421   1657     heppo copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag)
   1422      0    stevel {
   1423      0    stevel 	struct lofi_ioctl *klip;
   1424      0    stevel 	int	error;
   1425      0    stevel 
   1426      0    stevel 	klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP);
   1427   1657     heppo 	error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag);
   1428      0    stevel 	if (error) {
   1429   8313      Dina 		free_lofi_ioctl(klip);
   1430      0    stevel 		return (NULL);
   1431      0    stevel 	}
   1432      0    stevel 
   1433      0    stevel 	/* make sure filename is always null-terminated */
   1434   8313      Dina 	klip->li_filename[MAXPATHLEN-1] = '\0';
   1435      0    stevel 
   1436      0    stevel 	/* validate minor number */
   1437      0    stevel 	if (klip->li_minor > lofi_max_files) {
   1438   8313      Dina 		free_lofi_ioctl(klip);
   1439   8313      Dina 		cmn_err(CE_WARN, "attempt to map more than lofi_max_files (%d)",
   1440   8313      Dina 		    lofi_max_files);
   1441      0    stevel 		return (NULL);
   1442      0    stevel 	}
   1443      0    stevel 	return (klip);
   1444      0    stevel }
   1445      0    stevel 
   1446      0    stevel int
   1447   1657     heppo copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip,
   1448   1657     heppo 	int flag)
   1449      0    stevel {
   1450      0    stevel 	int	error;
   1451      0    stevel 
   1452   8313      Dina 	/*
   1453   8313      Dina 	 * NOTE: Do NOT copy the crypto_key_t "back" to userland.
   1454   8313      Dina 	 * This ensures that an attacker can't trivially find the
   1455   8313      Dina 	 * key for a mapping just by issuing the ioctl.
   1456   8313      Dina 	 *
   1457   8313      Dina 	 * It can still be found by poking around in kmem with mdb(1),
   1458   8313      Dina 	 * but there is no point in making it easy when the info isn't
   1459   8313      Dina 	 * of any use in this direction anyway.
   1460   8313      Dina 	 *
   1461   8313      Dina 	 * Either way we don't actually have the raw key stored in
   1462   8313      Dina 	 * a form that we can get it anyway, since we just used it
   1463   8313      Dina 	 * to create a ctx template and didn't keep "the original".
   1464   8313      Dina 	 */
   1465   1657     heppo 	error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag);
   1466      0    stevel 	if (error)
   1467      0    stevel 		return (EFAULT);
   1468      0    stevel 	return (0);
   1469      0    stevel }
   1470      0    stevel 
   1471      0    stevel /*
   1472      0    stevel  * Return the minor number 'filename' is mapped to, if it is.
   1473      0    stevel  */
   1474      0    stevel static int
   1475      0    stevel file_to_minor(char *filename)
   1476      0    stevel {
   1477      0    stevel 	minor_t	minor;
   1478      0    stevel 	struct lofi_state *lsp;
   1479      0    stevel 
   1480      0    stevel 	ASSERT(mutex_owned(&lofi_lock));
   1481      0    stevel 	for (minor = 1; minor <= lofi_max_files; minor++) {
   1482      0    stevel 		lsp = ddi_get_soft_state(lofi_statep, minor);
   1483      0    stevel 		if (lsp == NULL)
   1484      0    stevel 			continue;
   1485      0    stevel 		if (strcmp(lsp->ls_filename, filename) == 0)
   1486      0    stevel 			return (minor);
   1487      0    stevel 	}
   1488      0    stevel 	return (0);
   1489      0    stevel }
   1490      0    stevel 
   1491      0    stevel /*
   1492      0    stevel  * lofiadm does some validation, but since Joe Random (or crashme) could
   1493      0    stevel  * do our ioctls, we need to do some validation too.
   1494      0    stevel  */
   1495      0    stevel static int
   1496      0    stevel valid_filename(const char *filename)
   1497      0    stevel {
   1498      0    stevel 	static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/";
   1499      0    stevel 	static char *charprefix = "/dev/" LOFI_CHAR_NAME "/";
   1500      0    stevel 
   1501      0    stevel 	/* must be absolute path */
   1502      0    stevel 	if (filename[0] != '/')
   1503      0    stevel 		return (0);
   1504      0    stevel 	/* must not be lofi */
   1505      0    stevel 	if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0)
   1506      0    stevel 		return (0);
   1507      0    stevel 	if (strncmp(filename, charprefix, strlen(charprefix)) == 0)
   1508      0    stevel 		return (0);
   1509      0    stevel 	return (1);
   1510      0    stevel }
   1511      0    stevel 
   1512      0    stevel /*
   1513      0    stevel  * Fakes up a disk geometry, and one big partition, based on the size
   1514      0    stevel  * of the file. This is needed because we allow newfs'ing the device,
   1515      0    stevel  * and newfs will do several disk ioctls to figure out the geometry and
   1516      0    stevel  * partition information. It uses that information to determine the parameters
   1517   3517  mp204432  * to pass to mkfs. Geometry is pretty much irrelevant these days, but we
   1518      0    stevel  * have to support it.
   1519      0    stevel  */
   1520      0    stevel static void
   1521      0    stevel fake_disk_geometry(struct lofi_state *lsp)
   1522      0    stevel {
   1523   8313      Dina 	u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset;
   1524   8313      Dina 
   1525      0    stevel 	/* dk_geom - see dkio(7I) */
   1526      0    stevel 	/*
   1527      0    stevel 	 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs
   1528      0    stevel 	 * of sectors), but that breaks programs like fdisk which want to
   1529      0    stevel 	 * partition a disk by cylinder. With one cylinder, you can't create
   1530      0    stevel 	 * an fdisk partition and put pcfs on it for testing (hard to pick
   1531      0    stevel 	 * a number between one and one).
   1532      0    stevel 	 *
   1533      0    stevel 	 * The cheezy floppy test is an attempt to not have too few cylinders
   1534      0    stevel 	 * for a small file, or so many on a big file that you waste space
   1535      0    stevel 	 * for backup superblocks or cylinder group structures.
   1536      0    stevel 	 */
   1537   8313      Dina 	if (dsize < (2 * 1024 * 1024)) /* floppy? */
   1538   8313      Dina 		lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024);
   1539      0    stevel 	else
   1540   8313      Dina 		lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024);
   1541      0    stevel 	/* in case file file is < 100k */
   1542      0    stevel 	if (lsp->ls_dkg.dkg_ncyl == 0)
   1543      0    stevel 		lsp->ls_dkg.dkg_ncyl = 1;
   1544      0    stevel 	lsp->ls_dkg.dkg_acyl = 0;
   1545      0    stevel 	lsp->ls_dkg.dkg_bcyl = 0;
   1546      0    stevel 	lsp->ls_dkg.dkg_nhead = 1;
   1547      0    stevel 	lsp->ls_dkg.dkg_obs1 = 0;
   1548      0    stevel 	lsp->ls_dkg.dkg_intrlv = 0;
   1549      0    stevel 	lsp->ls_dkg.dkg_obs2 = 0;
   1550      0    stevel 	lsp->ls_dkg.dkg_obs3 = 0;
   1551      0    stevel 	lsp->ls_dkg.dkg_apc = 0;
   1552      0    stevel 	lsp->ls_dkg.dkg_rpm = 7200;
   1553      0    stevel 	lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl + lsp->ls_dkg.dkg_acyl;
   1554   8313      Dina 	lsp->ls_dkg.dkg_nsect = dsize / (DEV_BSIZE * lsp->ls_dkg.dkg_ncyl);
   1555      0    stevel 	lsp->ls_dkg.dkg_write_reinstruct = 0;
   1556      0    stevel 	lsp->ls_dkg.dkg_read_reinstruct = 0;
   1557      0    stevel 
   1558      0    stevel 	/* vtoc - see dkio(7I) */
   1559      0    stevel 	bzero(&lsp->ls_vtoc, sizeof (struct vtoc));
   1560      0    stevel 	lsp->ls_vtoc.v_sanity = VTOC_SANE;
   1561      0    stevel 	lsp->ls_vtoc.v_version = V_VERSION;
   1562   8669      Dina 	(void) strncpy(lsp->ls_vtoc.v_volume, LOFI_DRIVER_NAME,
   1563   8669      Dina 	    sizeof (lsp->ls_vtoc.v_volume));
   1564      0    stevel 	lsp->ls_vtoc.v_sectorsz = DEV_BSIZE;
   1565      0    stevel 	lsp->ls_vtoc.v_nparts = 1;
   1566      0    stevel 	lsp->ls_vtoc.v_part[0].p_tag = V_UNASSIGNED;
   1567   5643     aalok 
   1568   5643     aalok 	/*
   1569   5643     aalok 	 * A compressed file is read-only, other files can
   1570   5643     aalok 	 * be read-write
   1571   5643     aalok 	 */
   1572   5643     aalok 	if (lsp->ls_uncomp_seg_sz > 0) {
   1573   5643     aalok 		lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT | V_RONLY;
   1574   5643     aalok 	} else {
   1575   5643     aalok 		lsp->ls_vtoc.v_part[0].p_flag = V_UNMNT;
   1576   5643     aalok 	}
   1577      0    stevel 	lsp->ls_vtoc.v_part[0].p_start = (daddr_t)0;
   1578      0    stevel 	/*
   1579      0    stevel 	 * The partition size cannot just be the number of sectors, because
   1580      0    stevel 	 * that might not end on a cylinder boundary. And if that's the case,
   1581      0    stevel 	 * newfs/mkfs will print a scary warning. So just figure the size
   1582      0    stevel 	 * based on the number of cylinders and sectors/cylinder.
   1583      0    stevel 	 */
   1584      0    stevel 	lsp->ls_vtoc.v_part[0].p_size = lsp->ls_dkg.dkg_pcyl *
   1585      0    stevel 	    lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead;
   1586      0    stevel 
   1587      0    stevel 	/* dk_cinfo - see dkio(7I) */
   1588      0    stevel 	bzero(&lsp->ls_ci, sizeof (struct dk_cinfo));
   1589      0    stevel 	(void) strcpy(lsp->ls_ci.dki_cname, LOFI_DRIVER_NAME);
   1590      0    stevel 	lsp->ls_ci.dki_ctype = DKC_MD;
   1591      0    stevel 	lsp->ls_ci.dki_flags = 0;
   1592      0    stevel 	lsp->ls_ci.dki_cnum = 0;
   1593      0    stevel 	lsp->ls_ci.dki_addr = 0;
   1594      0    stevel 	lsp->ls_ci.dki_space = 0;
   1595      0    stevel 	lsp->ls_ci.dki_prio = 0;
   1596      0    stevel 	lsp->ls_ci.dki_vec = 0;
   1597      0    stevel 	(void) strcpy(lsp->ls_ci.dki_dname, LOFI_DRIVER_NAME);
   1598      0    stevel 	lsp->ls_ci.dki_unit = 0;
   1599      0    stevel 	lsp->ls_ci.dki_slave = 0;
   1600      0    stevel 	lsp->ls_ci.dki_partition = 0;
   1601      0    stevel 	/*
   1602      0    stevel 	 * newfs uses this to set maxcontig. Must not be < 16, or it
   1603      0    stevel 	 * will be 0 when newfs multiplies it by DEV_BSIZE and divides
   1604      0    stevel 	 * it by the block size. Then tunefs doesn't work because
   1605      0    stevel 	 * maxcontig is 0.
   1606      0    stevel 	 */
   1607      0    stevel 	lsp->ls_ci.dki_maxtransfer = 16;
   1608      0    stevel }
   1609      0    stevel 
   1610      0    stevel /*
   1611   5643     aalok  * map in a compressed file
   1612   5643     aalok  *
   1613   5643     aalok  * Read in the header and the index that follows.
   1614   5643     aalok  *
   1615   5643     aalok  * The header is as follows -
   1616   5643     aalok  *
   1617   5643     aalok  * Signature (name of the compression algorithm)
   1618   5643     aalok  * Compression segment size (a multiple of 512)
   1619   5643     aalok  * Number of index entries
   1620   5643     aalok  * Size of the last block
   1621   5643     aalok  * The array containing the index entries
   1622   5643     aalok  *
   1623   5643     aalok  * The header information is always stored in
   1624   5643     aalok  * network byte order on disk.
   1625   5643     aalok  */
   1626   5643     aalok static int
   1627   5643     aalok lofi_map_compressed_file(struct lofi_state *lsp, char *buf)
   1628   5643     aalok {
   1629   5643     aalok 	uint32_t index_sz, header_len, i;
   1630   5643     aalok 	ssize_t	resid;
   1631   5643     aalok 	enum uio_rw rw;
   1632   5643     aalok 	char *tbuf = buf;
   1633   5643     aalok 	int error;
   1634   5643     aalok 
   1635   5643     aalok 	/* The signature has already been read */
   1636   5643     aalok 	tbuf += sizeof (lsp->ls_comp_algorithm);
   1637   5643     aalok 	bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz));
   1638   5643     aalok 	lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz);
   1639   5643     aalok 
   1640   5643     aalok 	/*
   1641   5643     aalok 	 * The compressed segment size must be a power of 2
   1642   5643     aalok 	 */
   1643   9048      jrgn 	if (lsp->ls_uncomp_seg_sz < DEV_BSIZE ||
   1644   9048      jrgn 	    !ISP2(lsp->ls_uncomp_seg_sz))
   1645   5643     aalok 		return (EINVAL);
   1646   5643     aalok 
   1647   5643     aalok 	for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++)
   1648   5643     aalok 		;
   1649   5643     aalok 
   1650   5643     aalok 	lsp->ls_comp_seg_shift = i;
   1651   5643     aalok 
   1652   5643     aalok 	tbuf += sizeof (lsp->ls_uncomp_seg_sz);
   1653   5643     aalok 	bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz));
   1654   5643     aalok 	lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz);
   1655   5643     aalok 
   1656   5643     aalok 	tbuf += sizeof (lsp->ls_comp_index_sz);
   1657   5643     aalok 	bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz),
   1658   5643     aalok 	    sizeof (lsp->ls_uncomp_last_seg_sz));
   1659   5643     aalok 	lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz);
   1660   5643     aalok 
   1661   5643     aalok 	/*
   1662   5643     aalok 	 * Compute the total size of the uncompressed data
   1663   5643     aalok 	 * for use in fake_disk_geometry and other calculations.
   1664   5643     aalok 	 * Disk geometry has to be faked with respect to the
   1665   5643     aalok 	 * actual uncompressed data size rather than the
   1666   5643     aalok 	 * compressed file size.
   1667   5643     aalok 	 */
   1668  10197    dminer 	lsp->ls_vp_size =
   1669  10197    dminer 	    (u_offset_t)(lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz
   1670   5643     aalok 	    + lsp->ls_uncomp_last_seg_sz;
   1671   5643     aalok 
   1672   5643     aalok 	/*
   1673   8996      Alok 	 * Index size is rounded up to DEV_BSIZE for ease
   1674   5643     aalok 	 * of segmapping
   1675   5643     aalok 	 */
   1676   5643     aalok 	index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz;
   1677   5643     aalok 	header_len = sizeof (lsp->ls_comp_algorithm) +
   1678   5643     aalok 	    sizeof (lsp->ls_uncomp_seg_sz) +
   1679   5643     aalok 	    sizeof (lsp->ls_comp_index_sz) +
   1680   5643     aalok 	    sizeof (lsp->ls_uncomp_last_seg_sz);
   1681   5643     aalok 	lsp->ls_comp_offbase = header_len + index_sz;
   1682   5643     aalok 
   1683   5643     aalok 	index_sz += header_len;
   1684   5643     aalok 	index_sz = roundup(index_sz, DEV_BSIZE);
   1685   5643     aalok 
   1686   5643     aalok 	lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP);
   1687   5643     aalok 	lsp->ls_comp_index_data_sz = index_sz;
   1688   5643     aalok 
   1689   5643     aalok 	/*
   1690   5643     aalok 	 * Read in the index -- this has a side-effect
   1691   5643     aalok 	 * of reading in the header as well
   1692   5643     aalok 	 */
   1693   5643     aalok 	rw = UIO_READ;
   1694   5643     aalok 	error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz,
   1695   5643     aalok 	    0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
   1696   5643     aalok 
   1697   5643     aalok 	if (error != 0)
   1698   5643     aalok 		return (error);
   1699   5643     aalok 
   1700   5643     aalok 	/* Skip the header, this is where the index really begins */
   1701   5643     aalok 	lsp->ls_comp_seg_index =
   1702   5643     aalok 	    /*LINTED*/
   1703   5643     aalok 	    (uint64_t *)(lsp->ls_comp_index_data + header_len);
   1704   5643     aalok 
   1705   5643     aalok 	/*
   1706   5643     aalok 	 * Now recompute offsets in the index to account for
   1707   5643     aalok 	 * the header length
   1708   5643     aalok 	 */
   1709   5643     aalok 	for (i = 0; i < lsp->ls_comp_index_sz; i++) {
   1710   5643     aalok 		lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase +
   1711   5643     aalok 		    BE_64(lsp->ls_comp_seg_index[i]);
   1712   5643     aalok 	}
   1713   5643     aalok 
   1714   5643     aalok 	return (error);
   1715   5643     aalok }
   1716   5643     aalok 
   1717   5643     aalok /*
   1718   5643     aalok  * Check to see if the passed in signature is a valid
   1719   8313      Dina  * one.  If it is valid, return the index into
   1720   5643     aalok  * lofi_compress_table.
   1721   5643     aalok  *
   1722   5643     aalok  * Return -1 if it is invalid
   1723   5643     aalok  */
   1724   5643     aalok static int lofi_compress_select(char *signature)
   1725   5643     aalok {
   1726   5643     aalok 	int i;
   1727   5643     aalok 
   1728   5643     aalok 	for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) {
   1729   5643     aalok 		if (strcmp(lofi_compress_table[i].l_name, signature) == 0)
   1730   5643     aalok 			return (i);
   1731   5643     aalok 	}
   1732   5643     aalok 
   1733   5643     aalok 	return (-1);
   1734   5643     aalok }
   1735   5643     aalok 
   1736   5643     aalok /*
   1737      0    stevel  * map a file to a minor number. Return the minor number.
   1738      0    stevel  */
   1739      0    stevel static int
   1740      0    stevel lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor,
   1741   1657     heppo     int *rvalp, struct cred *credp, int ioctl_flag)
   1742      0    stevel {
   1743      0    stevel 	minor_t	newminor;
   1744      0    stevel 	struct lofi_state *lsp;
   1745      0    stevel 	struct lofi_ioctl *klip;
   1746      0    stevel 	int	error;
   1747      0    stevel 	struct vnode *vp;
   1748      0    stevel 	int64_t	Nblocks_prop_val;
   1749      0    stevel 	int64_t	Size_prop_val;
   1750   5643     aalok 	int	compress_index;
   1751      0    stevel 	vattr_t	vattr;
   1752      0    stevel 	int	flag;
   1753      0    stevel 	enum vtype v_type;
   1754   4451  eschrock 	int zalloced = 0;
   1755      0    stevel 	dev_t	newdev;
   1756   4451  eschrock 	char	namebuf[50];
   1757   8313      Dina 	char	buf[DEV_BSIZE];
   1758   8313      Dina 	char	crybuf[DEV_BSIZE];
   1759   5643     aalok 	ssize_t	resid;
   1760   8313      Dina 	boolean_t need_vn_close = B_FALSE;
   1761   8313      Dina 	boolean_t keycopied = B_FALSE;
   1762   8313      Dina 	boolean_t need_size_update = B_FALSE;
   1763      0    stevel 
   1764   1657     heppo 	klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
   1765      0    stevel 	if (klip == NULL)
   1766      0    stevel 		return (EFAULT);
   1767      0    stevel 
   1768      0    stevel 	mutex_enter(&lofi_lock);
   1769      0    stevel 
   1770      0    stevel 	if (!valid_filename(klip->li_filename)) {
   1771      0    stevel 		error = EINVAL;
   1772      0    stevel 		goto out;
   1773      0    stevel 	}
   1774      0    stevel 
   1775      0    stevel 	if (file_to_minor(klip->li_filename) != 0) {
   1776      0    stevel 		error = EBUSY;
   1777      0    stevel 		goto out;
   1778      0    stevel 	}
   1779      0    stevel 
   1780      0    stevel 	if (pickminor) {
   1781      0    stevel 		/* Find a free one */
   1782      0    stevel 		for (newminor = 1; newminor <= lofi_max_files; newminor++)
   1783      0    stevel 			if (ddi_get_soft_state(lofi_statep, newminor) == NULL)
   1784      0    stevel 				break;
   1785      0    stevel 		if (newminor >= lofi_max_files) {
   1786      0    stevel 			error = EAGAIN;
   1787      0    stevel 			goto out;
   1788      0    stevel 		}
   1789      0    stevel 	} else {
   1790      0    stevel 		newminor = klip->li_minor;
   1791      0    stevel 		if (ddi_get_soft_state(lofi_statep, newminor) != NULL) {
   1792      0    stevel 			error = EEXIST;
   1793      0    stevel 			goto out;
   1794      0    stevel 		}
   1795      0    stevel 	}
   1796      0    stevel 
   1797      0    stevel 	/* make sure it's valid */
   1798      0    stevel 	error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW,
   1799      0    stevel 	    NULLVPP, &vp);
   1800      0    stevel 	if (error) {
   1801      0    stevel 		goto out;
   1802      0    stevel 	}
   1803      0    stevel 	v_type = vp->v_type;
   1804      0    stevel 	VN_RELE(vp);
   1805      0    stevel 	if (!V_ISLOFIABLE(v_type)) {
   1806      0    stevel 		error = EINVAL;
   1807      0    stevel 		goto out;
   1808      0    stevel 	}
   1809      0    stevel 	flag = FREAD | FWRITE | FOFFMAX | FEXCL;
   1810      0    stevel 	error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0);
   1811      0    stevel 	if (error) {
   1812      0    stevel 		/* try read-only */
   1813      0    stevel 		flag &= ~FWRITE;
   1814      0    stevel 		error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0,
   1815      0    stevel 		    &vp, 0, 0);
   1816      0    stevel 		if (error) {
   1817      0    stevel 			goto out;
   1818      0    stevel 		}
   1819      0    stevel 	}
   1820   8313      Dina 	need_vn_close = B_TRUE;
   1821   8313      Dina 
   1822      0    stevel 	vattr.va_mask = AT_SIZE;
   1823   5331       amw 	error = VOP_GETATTR(vp, &vattr, 0, credp, NULL);
   1824      0    stevel 	if (error) {
   1825   8313      Dina 		goto out;
   1826      0    stevel 	}
   1827      0    stevel 	/* the file needs to be a multiple of the block size */
   1828      0    stevel 	if ((vattr.va_size % DEV_BSIZE) != 0) {
   1829      0    stevel 		error = EINVAL;
   1830   8313      Dina 		goto out;
   1831      0    stevel 	}
   1832      0    stevel 	newdev = makedevice(getmajor(dev), newminor);
   1833      0    stevel 	Size_prop_val = vattr.va_size;
   1834      0    stevel 	if ((ddi_prop_update_int64(newdev, lofi_dip,
   1835      0    stevel 	    SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) {
   1836      0    stevel 		error = EINVAL;
   1837   8313      Dina 		goto out;
   1838      0    stevel 	}
   1839      0    stevel 	Nblocks_prop_val = vattr.va_size / DEV_BSIZE;
   1840      0    stevel 	if ((ddi_prop_update_int64(newdev, lofi_dip,
   1841      0    stevel 	    NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
   1842      0    stevel 		error = EINVAL;
   1843      0    stevel 		goto propout;
   1844      0    stevel 	}
   1845      0    stevel 	error = ddi_soft_state_zalloc(lofi_statep, newminor);
   1846      0    stevel 	if (error == DDI_FAILURE) {
   1847      0    stevel 		error = ENOMEM;
   1848      0    stevel 		goto propout;
   1849      0    stevel 	}
   1850      0    stevel 	zalloced = 1;
   1851      0    stevel 	(void) snprintf(namebuf, sizeof (namebuf), "%d", newminor);
   1852   6883   gd78059 	error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor,
   1853      0    stevel 	    DDI_PSEUDO, NULL);
   1854      0    stevel 	if (error != DDI_SUCCESS) {
   1855      0    stevel 		error = ENXIO;
   1856      0    stevel 		goto propout;
   1857      0    stevel 	}
   1858      0    stevel 	(void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor);
   1859      0    stevel 	error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor,
   1860      0    stevel 	    DDI_PSEUDO, NULL);
   1861      0    stevel 	if (error != DDI_SUCCESS) {
   1862      0    stevel 		/* remove block node */
   1863      0    stevel 		(void) snprintf(namebuf, sizeof (namebuf), "%d", newminor);
   1864      0    stevel 		ddi_remove_minor_node(lofi_dip, namebuf);
   1865      0    stevel 		error = ENXIO;
   1866      0    stevel 		goto propout;
   1867      0    stevel 	}
   1868      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, newminor);
   1869      0    stevel 	lsp->ls_filename_sz = strlen(klip->li_filename) + 1;
   1870      0    stevel 	lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP);
   1871      0    stevel 	(void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d",
   1872      0    stevel 	    LOFI_DRIVER_NAME, newminor);
   1873      0    stevel 	lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads,
   1874      0    stevel 	    minclsyspri, 1, lofi_taskq_maxalloc, 0);
   1875      0    stevel 	lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor,
   1876      0    stevel 	    NULL, "disk", KSTAT_TYPE_IO, 1, 0);
   1877      0    stevel 	if (lsp->ls_kstat) {
   1878      0    stevel 		mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL);
   1879      0    stevel 		lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock;
   1880      0    stevel 		kstat_install(lsp->ls_kstat);
   1881      0    stevel 	}
   1882   4451  eschrock 	cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL);
   1883   4451  eschrock 	mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL);
   1884   9048      jrgn 
   1885   9048      jrgn 	list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache),
   1886   9048      jrgn 	    offsetof(struct lofi_comp_cache, lc_list));
   1887   9048      jrgn 	mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL);
   1888   4451  eschrock 
   1889      0    stevel 	/*
   1890      0    stevel 	 * save open mode so file can be closed properly and vnode counts
   1891      0    stevel 	 * updated correctly.
   1892      0    stevel 	 */
   1893      0    stevel 	lsp->ls_openflag = flag;
   1894      0    stevel 
   1895      0    stevel 	/*
   1896      0    stevel 	 * Try to handle stacked lofs vnodes.
   1897      0    stevel 	 */
   1898      0    stevel 	if (vp->v_type == VREG) {
   1899   5331       amw 		if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) {
   1900      0    stevel 			lsp->ls_vp = vp;
   1901      0    stevel 		} else {
   1902      0    stevel 			/*
   1903      0    stevel 			 * Even though vp was obtained via vn_open(), we
   1904      0    stevel 			 * can't call vn_close() on it, since lofs will
   1905      0    stevel 			 * pass the VOP_CLOSE() on down to the realvp
   1906      0    stevel 			 * (which we are about to use). Hence we merely
   1907      0    stevel 			 * drop the reference to the lofs vnode and hold
   1908      0    stevel 			 * the realvp so things behave as if we've
   1909      0    stevel 			 * opened the realvp without any interaction
   1910      0    stevel 			 * with lofs.
   1911      0    stevel 			 */
   1912      0    stevel 			VN_HOLD(lsp->ls_vp);
   1913      0    stevel 			VN_RELE(vp);
   1914      0    stevel 		}
   1915      0    stevel 	} else {
   1916      0    stevel 		lsp->ls_vp = vp;
   1917      0    stevel 	}
   1918      0    stevel 	lsp->ls_vp_size = vattr.va_size;
   1919      0    stevel 	(void) strcpy(lsp->ls_filename, klip->li_filename);
   1920      0    stevel 	if (rvalp)
   1921      0    stevel 		*rvalp = (int)newminor;
   1922      0    stevel 	klip->li_minor = newminor;
   1923      0    stevel 
   1924   5643     aalok 	/*
   1925   8313      Dina 	 * Initialize crypto details for encrypted lofi
   1926   5643     aalok 	 */
   1927   8313      Dina 	if (klip->li_crypto_enabled) {
   1928   8313      Dina 		int ret;
   1929   8313      Dina 
   1930   8313      Dina 		mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL);
   1931   8313      Dina 
   1932   8313      Dina 		lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher);
   1933   8313      Dina 		if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) {
   1934   8313      Dina 			cmn_err(CE_WARN, "invalid cipher %s requested for %s",
   1935   8313      Dina 			    klip->li_cipher, lsp->ls_filename);
   1936   8313      Dina 			error = EINVAL;
   1937   8313      Dina 			goto propout;
   1938   8313      Dina 		}
   1939   8313      Dina 
   1940   8313      Dina 		/* this is just initialization here */
   1941   8313      Dina 		lsp->ls_mech.cm_param = NULL;
   1942   8313      Dina 		lsp->ls_mech.cm_param_len = 0;
   1943   8313      Dina 
   1944   8313      Dina 		lsp->ls_iv_type = klip->li_iv_type;
   1945   8313      Dina 		lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher);
   1946   8313      Dina 		if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) {
   1947   8313      Dina 			cmn_err(CE_WARN, "invalid iv cipher %s requested"
   1948   8313      Dina 			    " for %s", klip->li_iv_cipher, lsp->ls_filename);
   1949   8313      Dina 			error = EINVAL;
   1950   8313      Dina 			goto propout;
   1951   8313      Dina 		}
   1952   8313      Dina 
   1953   8313      Dina 		/* iv mech must itself take a null iv */
   1954   8313      Dina 		lsp->ls_iv_mech.cm_param = NULL;
   1955   8313      Dina 		lsp->ls_iv_mech.cm_param_len = 0;
   1956   8313      Dina 		lsp->ls_iv_len = klip->li_iv_len;
   1957   8313      Dina 
   1958   8313      Dina 		/*
   1959   8313      Dina 		 * Create ctx using li_cipher & the raw li_key after checking
   1960   8313      Dina 		 * that it isn't a weak key.
   1961   8313      Dina 		 */
   1962   8313      Dina 		lsp->ls_key.ck_format = CRYPTO_KEY_RAW;
   1963   8313      Dina 		lsp->ls_key.ck_length = klip->li_key_len;
   1964   8313      Dina 		lsp->ls_key.ck_data = kmem_alloc(
   1965   8313      Dina 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP);
   1966   8313      Dina 		bcopy(klip->li_key, lsp->ls_key.ck_data,
   1967   8313      Dina 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
   1968   8313      Dina 		keycopied = B_TRUE;
   1969   8313      Dina 
   1970   8313      Dina 		ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key);
   1971   8313      Dina 		if (ret != CRYPTO_SUCCESS) {
   1972   8313      Dina 			error = EINVAL;
   1973   8313      Dina 			cmn_err(CE_WARN, "weak key check failed for cipher "
   1974   8313      Dina 			    "%s on file %s (0x%x)", klip->li_cipher,
   1975   8313      Dina 			    lsp->ls_filename, ret);
   1976   8313      Dina 			goto propout;
   1977   8313      Dina 		}
   1978   8313      Dina 	}
   1979   8313      Dina 	lsp->ls_crypto_enabled = klip->li_crypto_enabled;
   1980   8313      Dina 
   1981   8313      Dina 	/*
   1982   8313      Dina 	 * Read the file signature to check if it is compressed or encrypted.
   1983   8313      Dina 	 * Crypto signature is in a different location; both areas should
   1984   8313      Dina 	 * read to keep compression and encryption mutually exclusive.
   1985   8313      Dina 	 */
   1986   8313      Dina 	if (lsp->ls_crypto_enabled) {
   1987   8313      Dina 		error = vn_rdwr(UIO_READ, lsp->ls_vp, crybuf, DEV_BSIZE,
   1988   8313      Dina 		    CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
   1989   8313      Dina 		if (error != 0)
   1990   8313      Dina 			goto propout;
   1991   8313      Dina 	}
   1992   8313      Dina 	error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE,
   1993   5643     aalok 	    0, RLIM64_INFINITY, kcred, &resid);
   1994   5643     aalok 	if (error != 0)
   1995   5643     aalok 		goto propout;
   1996   5643     aalok 
   1997   8313      Dina 	/* initialize these variables for all lofi files */
   1998   5643     aalok 	lsp->ls_uncomp_seg_sz = 0;
   1999   5643     aalok 	lsp->ls_vp_comp_size = lsp->ls_vp_size;
   2000   5643     aalok 	lsp->ls_comp_algorithm[0] = '\0';
   2001   5643     aalok 
   2002   8313      Dina 	/* encrypted lofi reads/writes shifted by crypto metadata size */
   2003   8313      Dina 	lsp->ls_crypto_offset = 0;
   2004   8313      Dina 
   2005   8313      Dina 	/* this is a compressed lofi */
   2006   8313      Dina 	if ((compress_index = lofi_compress_select(buf)) != -1) {
   2007   8313      Dina 
   2008   8313      Dina 		/* compression and encryption are mutually exclusive */
   2009   8313      Dina 		if (klip->li_crypto_enabled) {
   2010   8313      Dina 			error = ENOTSUP;
   2011   8313      Dina 			goto propout;
   2012   8313      Dina 		}
   2013   8313      Dina 
   2014   8313      Dina 		/* initialize compression info for compressed lofi */
   2015   5643     aalok 		lsp->ls_comp_algorithm_index = compress_index;
   2016   5643     aalok 		(void) strlcpy(lsp->ls_comp_algorithm,
   2017   5643     aalok 		    lofi_compress_table[compress_index].l_name,
   2018   5643     aalok 		    sizeof (lsp->ls_comp_algorithm));
   2019   8313      Dina 
   2020   5643     aalok 		error = lofi_map_compressed_file(lsp, buf);
   2021   5643     aalok 		if (error != 0)
   2022   5643     aalok 			goto propout;
   2023   8313      Dina 		need_size_update = B_TRUE;
   2024   5643     aalok 
   2025   8313      Dina 	/* this is an encrypted lofi */
   2026   8313      Dina 	} else if (strncmp(crybuf, lofi_crypto_magic,
   2027   8313      Dina 	    sizeof (lofi_crypto_magic)) == 0) {
   2028   8313      Dina 
   2029   8313      Dina 		char *marker = crybuf;
   2030   8313      Dina 
   2031   8313      Dina 		/*
   2032   8313      Dina 		 * This is the case where the header in the lofi image is
   2033   8313      Dina 		 * already initialized to indicate it is encrypted.
   2034   8313      Dina 		 * There is another case (see below) where encryption is
   2035   8313      Dina 		 * requested but the lofi image has never been used yet,
   2036   8313      Dina 		 * so the header needs to be written with encryption magic.
   2037   8313      Dina 		 */
   2038   8313      Dina 
   2039   8313      Dina 		/* indicate this must be an encrypted lofi due to magic */
   2040   8313      Dina 		klip->li_crypto_enabled = B_TRUE;
   2041   8313      Dina 
   2042   8313      Dina 		/*
   2043   8313      Dina 		 * The encryption header information is laid out this way:
   2044   8313      Dina 		 *	6 bytes:	hex "CFLOFI"
   2045   8313      Dina 		 *	2 bytes:	version = 0 ... for now
   2046   8313      Dina 		 *	96 bytes:	reserved1 (not implemented yet)
   2047   8313      Dina 		 *	4 bytes:	data_sector = 2 ... for now
   2048   8313      Dina 		 *	more...		not implemented yet
   2049   8313      Dina 		 */
   2050   8313      Dina 
   2051   8313      Dina 		/* copy the magic */
   2052   8313      Dina 		bcopy(marker, lsp->ls_crypto.magic,
   2053   8313      Dina 		    sizeof (lsp->ls_crypto.magic));
   2054   8313      Dina 		marker += sizeof (lsp->ls_crypto.magic);
   2055   8313      Dina 
   2056   8313      Dina 		/* read the encryption version number */
   2057   8313      Dina 		bcopy(marker, &(lsp->ls_crypto.version),
   2058   8313      Dina 		    sizeof (lsp->ls_crypto.version));
   2059   8313      Dina 		lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version);
   2060   8313      Dina 		marker += sizeof (lsp->ls_crypto.version);
   2061   8313      Dina 
   2062   8313      Dina 		/* read a chunk of reserved data */
   2063   8313      Dina 		bcopy(marker, lsp->ls_crypto.reserved1,
   2064   8313      Dina 		    sizeof (lsp->ls_crypto.reserved1));
   2065   8313      Dina 		marker += sizeof (lsp->ls_crypto.reserved1);
   2066   8313      Dina 
   2067   8313      Dina 		/* read block number where encrypted data begins */
   2068   8313      Dina 		bcopy(marker, &(lsp->ls_crypto.data_sector),
   2069   8313      Dina 		    sizeof (lsp->ls_crypto.data_sector));
   2070   8313      Dina 		lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector);
   2071   8313      Dina 		marker += sizeof (lsp->ls_crypto.data_sector);
   2072   8313      Dina 
   2073   8313      Dina 		/* and ignore the rest until it is implemented */
   2074   8313      Dina 
   2075   8313      Dina 		lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE;
   2076   8313      Dina 		need_size_update = B_TRUE;
   2077   8313      Dina 
   2078   8313      Dina 	/* neither compressed nor encrypted, BUT could be new encrypted lofi */
   2079   8313      Dina 	} else if (klip->li_crypto_enabled) {
   2080   8313      Dina 
   2081   8313      Dina 		/*
   2082   8313      Dina 		 * This is the case where encryption was requested but the
   2083   8313      Dina 		 * appears to be entirely blank where the encryption header
   2084   8313      Dina 		 * would have been in the lofi image.  If it is blank,
   2085   8313      Dina 		 * assume it is a brand new lofi image and initialize the
   2086   8313      Dina 		 * header area with encryption magic and current version
   2087   8313      Dina 		 * header data.  If it is not blank, that's an error.
   2088   8313      Dina 		 */
   2089   8313      Dina 		int	i;
   2090   8313      Dina 		char	*marker;
   2091   8313      Dina 		struct crypto_meta	chead;
   2092   8313      Dina 
   2093   8313      Dina 		for (i = 0; i < sizeof (struct crypto_meta); i++)
   2094   8313      Dina 			if (crybuf[i] != '\0')
   2095   8313      Dina 				break;
   2096   8313      Dina 		if (i != sizeof (struct crypto_meta)) {
   2097   8313      Dina 			error = EINVAL;
   2098   8313      Dina 			goto propout;
   2099   8313      Dina 		}
   2100   8313      Dina 
   2101   8313      Dina 		/* nothing there, initialize as encrypted lofi */
   2102   8313      Dina 		marker = crybuf;
   2103   8313      Dina 		bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic));
   2104   8313      Dina 		marker += sizeof (lofi_crypto_magic);
   2105   8313      Dina 		chead.version = htons(LOFI_CRYPTO_VERSION);
   2106   8313      Dina 		bcopy(&(chead.version), marker, sizeof (chead.version));
   2107   8313      Dina 		marker += sizeof (chead.version);
   2108   8313      Dina 		marker += sizeof (chead.reserved1);
   2109   8313      Dina 		chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR);
   2110   8313      Dina 		bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector));
   2111   8313      Dina 
   2112   8313      Dina 		/* write the header */
   2113   8313      Dina 		error = vn_rdwr(UIO_WRITE, lsp->ls_vp, crybuf, DEV_BSIZE,
   2114   8313      Dina 		    CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
   2115   8313      Dina 		if (error != 0)
   2116   8313      Dina 			goto propout;
   2117   8313      Dina 
   2118   8313      Dina 		/* fix things up so it looks like we read this info */
   2119   8313      Dina 		bcopy(lofi_crypto_magic, lsp->ls_crypto.magic,
   2120   8313      Dina 		    sizeof (lofi_crypto_magic));
   2121   8313      Dina 		lsp->ls_crypto.version = LOFI_CRYPTO_VERSION;
   2122   8313      Dina 		lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR;
   2123   8313      Dina 
   2124   8313      Dina 		lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE;
   2125   8313      Dina 		need_size_update = B_TRUE;
   2126   8313      Dina 	}
   2127   8313      Dina 
   2128   8313      Dina 	/*
   2129   8313      Dina 	 * Either lsp->ls_vp_size or lsp->ls_crypto_offset changed;
   2130   8313      Dina 	 * for encrypted lofi, advertise that it is somewhat shorter
   2131   8313      Dina 	 * due to embedded crypto metadata section
   2132   8313      Dina 	 */
   2133   8313      Dina 	if (need_size_update) {
   2134   5643     aalok 		/* update DDI properties */
   2135   8313      Dina 		Size_prop_val = lsp->ls_vp_size - lsp->ls_crypto_offset;
   2136   5643     aalok 		if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME,
   2137   5643     aalok 		    Size_prop_val)) != DDI_PROP_SUCCESS) {
   2138   5643     aalok 			error = EINVAL;
   2139   5643     aalok 			goto propout;
   2140   5643     aalok 		}
   2141   8313      Dina 		Nblocks_prop_val =
   2142   8313      Dina 		    (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE;
   2143   5643     aalok 		if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME,
   2144   5643     aalok 		    Nblocks_prop_val)) != DDI_PROP_SUCCESS) {
   2145   5643     aalok 			error = EINVAL;
   2146   5643     aalok 			goto propout;
   2147   5643     aalok 		}
   2148   5643     aalok 	}
   2149   5643     aalok 
   2150      0    stevel 	fake_disk_geometry(lsp);
   2151      0    stevel 	mutex_exit(&lofi_lock);
   2152   1657     heppo 	(void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
   2153      0    stevel 	free_lofi_ioctl(klip);
   2154      0    stevel 	return (0);
   2155      0    stevel 
   2156      0    stevel propout:
   2157   8313      Dina 	if (keycopied) {
   2158   8313      Dina 		bzero(lsp->ls_key.ck_data,
   2159   8313      Dina 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
   2160   8313      Dina 		kmem_free(lsp->ls_key.ck_data,
   2161   8313      Dina 		    CRYPTO_BITS2BYTES(lsp->ls_key.ck_length));
   2162   8313      Dina 		lsp->ls_key.ck_data = NULL;
   2163   8313      Dina 		lsp->ls_key.ck_length = 0;
   2164   8313      Dina 	}
   2165   8313      Dina 
   2166   8313      Dina 	if (zalloced)
   2167   8313      Dina 		ddi_soft_state_free(lofi_statep, newminor);
   2168   8313      Dina 
   2169      0    stevel 	(void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME);
   2170      0    stevel 	(void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME);
   2171   8313      Dina 
   2172      0    stevel out:
   2173   8313      Dina 	if (need_vn_close) {
   2174   8313      Dina 		(void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL);
   2175   8313      Dina 		VN_RELE(vp);
   2176   8313      Dina 	}
   2177   8313      Dina 
   2178      0    stevel 	mutex_exit(&lofi_lock);
   2179      0    stevel 	free_lofi_ioctl(klip);
   2180      0    stevel 	return (error);
   2181      0    stevel }
   2182      0    stevel 
   2183      0    stevel /*
   2184      0    stevel  * unmap a file.
   2185      0    stevel  */
   2186      0    stevel static int
   2187      0    stevel lofi_unmap_file(dev_t dev, struct lofi_ioctl *ulip, int byfilename,
   2188   1657     heppo     struct cred *credp, int ioctl_flag)
   2189      0    stevel {
   2190      0    stevel 	struct lofi_state *lsp;
   2191      0    stevel 	struct lofi_ioctl *klip;
   2192      0    stevel 	minor_t	minor;
   2193      0    stevel 
   2194   1657     heppo 	klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
   2195      0    stevel 	if (klip == NULL)
   2196      0    stevel 		return (EFAULT);
   2197      0    stevel 
   2198      0    stevel 	mutex_enter(&lofi_lock);
   2199      0    stevel 	if (byfilename) {
   2200      0    stevel 		minor = file_to_minor(klip->li_filename);
   2201      0    stevel 	} else {
   2202      0    stevel 		minor = klip->li_minor;
   2203      0    stevel 	}
   2204      0    stevel 	if (minor == 0) {
   2205      0    stevel 		mutex_exit(&lofi_lock);
   2206      0    stevel 		free_lofi_ioctl(klip);
   2207      0    stevel 		return (ENXIO);
   2208      0    stevel 	}
   2209      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, minor);
   2210   4451  eschrock 	if (lsp == NULL || lsp->ls_vp == NULL) {
   2211      0    stevel 		mutex_exit(&lofi_lock);
   2212      0    stevel 		free_lofi_ioctl(klip);
   2213      0    stevel 		return (ENXIO);
   2214      0    stevel 	}
   2215   4451  eschrock 
   2216   6734   johnlev 	/*
   2217   6734   johnlev 	 * If it's still held open, we'll do one of three things:
   2218   6734   johnlev 	 *
   2219   6734   johnlev 	 * If no flag is set, just return EBUSY.
   2220   6734   johnlev 	 *
   2221   6734   johnlev 	 * If the 'cleanup' flag is set, unmap and remove the device when
   2222   6734   johnlev 	 * the last user finishes.
   2223   6734   johnlev 	 *
   2224   6734   johnlev 	 * If the 'force' flag is set, then we forcibly close the underlying
   2225   6734   johnlev 	 * file.  Subsequent operations will fail, and the DKIOCSTATE ioctl
   2226   6734   johnlev 	 * will return DKIO_DEV_GONE.  When the device is last closed, the
   2227   6734   johnlev 	 * device will be cleaned up appropriately.
   2228   6734   johnlev 	 *
   2229   6734   johnlev 	 * This is complicated by the fact that we may have outstanding
   2230   6734   johnlev 	 * dispatched I/Os.  Rather than having a single mutex to serialize all
   2231   8313      Dina 	 * I/O, we keep a count of the number of outstanding I/O requests
   2232   8313      Dina 	 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os
   2233   8313      Dina 	 * should be dispatched (ls_vp_closereq).
   2234   8313      Dina 	 *
   2235   6734   johnlev 	 * We set the flag, wait for the number of outstanding I/Os to reach 0,
   2236   6734   johnlev 	 * and then close the underlying vnode.
   2237   6734   johnlev 	 */
   2238      0    stevel 	if (is_opened(lsp)) {
   2239   4451  eschrock 		if (klip->li_force) {
   2240   4451  eschrock 			mutex_enter(&lsp->ls_vp_lock);
   2241   4451  eschrock 			lsp->ls_vp_closereq = B_TRUE;
   2242  11041      Eric 			/* wake up any threads waiting on dkiocstate */
   2243  11041      Eric 			cv_broadcast(&lsp->ls_vp_cv);
   2244   4451  eschrock 			while (lsp->ls_vp_iocount > 0)
   2245   4451  eschrock 				cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock);
   2246   4451  eschrock 			mutex_exit(&lsp->ls_vp_lock);
   2247  11041      Eric 			lofi_free_handle(dev, minor, lsp, credp);
   2248   8313      Dina 
   2249   8313      Dina 			klip->li_minor = minor;
   2250   4451  eschrock 			mutex_exit(&lofi_lock);
   2251   4451  eschrock 			(void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
   2252   4451  eschrock 			free_lofi_ioctl(klip);
   2253   4451  eschrock 			return (0);
   2254   6734   johnlev 		} else if (klip->li_cleanup) {
   2255   6734   johnlev 			lsp->ls_cleanup = 1;
   2256   6734   johnlev 			mutex_exit(&lofi_lock);
   2257   6734   johnlev 			free_lofi_ioctl(klip);
   2258   6734   johnlev 			return (0);
   2259   4451  eschrock 		}
   2260   6734   johnlev 
   2261      0    stevel 		mutex_exit(&lofi_lock);
   2262      0    stevel 		free_lofi_ioctl(klip);
   2263      0    stevel 		return (EBUSY);
   2264      0    stevel 	}
   2265      0    stevel 
   2266   4451  eschrock 	lofi_free_handle(dev, minor, lsp, credp);
   2267      0    stevel 
   2268      0    stevel 	klip->li_minor = minor;
   2269      0    stevel 	mutex_exit(&lofi_lock);
   2270   1657     heppo 	(void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
   2271      0    stevel 	free_lofi_ioctl(klip);
   2272      0    stevel 	return (0);
   2273      0    stevel }
   2274      0    stevel 
   2275      0    stevel /*
   2276      0    stevel  * get the filename given the minor number, or the minor number given
   2277      0    stevel  * the name.
   2278      0    stevel  */
   2279   4451  eschrock /*ARGSUSED*/
   2280      0    stevel static int
   2281      0    stevel lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which,
   2282   1657     heppo     struct cred *credp, int ioctl_flag)
   2283      0    stevel {
   2284      0    stevel 	struct lofi_state *lsp;
   2285      0    stevel 	struct lofi_ioctl *klip;
   2286      0    stevel 	int	error;
   2287      0    stevel 	minor_t	minor;
   2288      0    stevel 
   2289   1657     heppo 	klip = copy_in_lofi_ioctl(ulip, ioctl_flag);
   2290      0    stevel 	if (klip == NULL)
   2291      0    stevel 		return (EFAULT);
   2292      0    stevel 
   2293      0    stevel 	switch (which) {
   2294      0    stevel 	case LOFI_GET_FILENAME:
   2295      0    stevel 		minor = klip->li_minor;
   2296      0    stevel 		if (minor == 0) {
   2297      0    stevel 			free_lofi_ioctl(klip);
   2298      0    stevel 			return (EINVAL);
   2299      0    stevel 		}
   2300      0    stevel 
   2301      0    stevel 		mutex_enter(&lofi_lock);
   2302      0    stevel 		lsp = ddi_get_soft_state(lofi_statep, minor);
   2303      0    stevel 		if (lsp == NULL) {
   2304      0    stevel 			mutex_exit(&lofi_lock);
   2305      0    stevel 			free_lofi_ioctl(klip);
   2306      0    stevel 			return (ENXIO);
   2307      0    stevel 		}
   2308      0    stevel 		(void) strcpy(klip->li_filename, lsp->ls_filename);
   2309   5643     aalok 		(void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm,
   2310   5643     aalok 		    sizeof (klip->li_algorithm));
   2311   8313      Dina 		klip->li_crypto_enabled = lsp->ls_crypto_enabled;
   2312      0    stevel 		mutex_exit(&lofi_lock);
   2313   1657     heppo 		error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
   2314      0    stevel 		free_lofi_ioctl(klip);
   2315      0    stevel 		return (error);
   2316      0    stevel 	case LOFI_GET_MINOR:
   2317      0    stevel 		mutex_enter(&lofi_lock);
   2318      0    stevel 		klip->li_minor = file_to_minor(klip->li_filename);
   2319   8313      Dina 		/* caller should not depend on klip->li_crypto_enabled here */
   2320      0    stevel 		mutex_exit(&lofi_lock);
   2321      0    stevel 		if (klip->li_minor == 0) {
   2322      0    stevel 			free_lofi_ioctl(klip);
   2323      0    stevel 			return (ENOENT);
   2324      0    stevel 		}
   2325   5643     aalok 		error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
   2326   5643     aalok 		free_lofi_ioctl(klip);
   2327   5643     aalok 		return (error);
   2328   5643     aalok 	case LOFI_CHECK_COMPRESSED:
   2329   5643     aalok 		mutex_enter(&lofi_lock);
   2330   5643     aalok 		klip->li_minor = file_to_minor(klip->li_filename);
   2331   5643     aalok 		mutex_exit(&lofi_lock);
   2332   5643     aalok 		if (klip->li_minor == 0) {
   2333   5643     aalok 			free_lofi_ioctl(klip);
   2334   5643     aalok 			return (ENOENT);
   2335   5643     aalok 		}
   2336   5643     aalok 		mutex_enter(&lofi_lock);
   2337   5643     aalok 		lsp = ddi_get_soft_state(lofi_statep, klip->li_minor);
   2338   5643     aalok 		if (lsp == NULL) {
   2339   5643     aalok 			mutex_exit(&lofi_lock);
   2340   5643     aalok 			free_lofi_ioctl(klip);
   2341   5643     aalok 			return (ENXIO);
   2342   5643     aalok 		}
   2343   5643     aalok 		ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0);
   2344   5643     aalok 
   2345   5643     aalok 		(void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm,
   2346   5643     aalok 		    sizeof (klip->li_algorithm));
   2347   5643     aalok 		mutex_exit(&lofi_lock);
   2348   1657     heppo 		error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag);
   2349      0    stevel 		free_lofi_ioctl(klip);
   2350      0    stevel 		return (error);
   2351      0    stevel 	default:
   2352      0    stevel 		free_lofi_ioctl(klip);
   2353      0    stevel 		return (EINVAL);
   2354      0    stevel 	}
   2355      0    stevel 
   2356      0    stevel }
   2357      0    stevel 
   2358      0    stevel static int
   2359      0    stevel lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
   2360      0    stevel     int *rvalp)
   2361      0    stevel {
   2362      0    stevel 	int	error;
   2363      0    stevel 	enum dkio_state dkstate;
   2364      0    stevel 	struct lofi_state *lsp;
   2365      0    stevel 	minor_t	minor;
   2366      0    stevel 
   2367      0    stevel 	minor = getminor(dev);
   2368      0    stevel 	/* lofi ioctls only apply to the master device */
   2369      0    stevel 	if (minor == 0) {
   2370      0    stevel 		struct lofi_ioctl *lip = (struct lofi_ioctl *)arg;
   2371      0    stevel 
   2372      0    stevel 		/*
   2373      0    stevel 		 * the query command only need read-access - i.e., normal
   2374      0    stevel 		 * users are allowed to do those on the ctl device as
   2375      0    stevel 		 * long as they can open it read-only.
   2376      0    stevel 		 */
   2377      0    stevel 		switch (cmd) {
   2378      0    stevel 		case LOFI_MAP_FILE:
   2379      0    stevel 			if ((flag & FWRITE) == 0)
   2380      0    stevel 				return (EPERM);
   2381   1657     heppo 			return (lofi_map_file(dev, lip, 1, rvalp, credp, flag));
   2382      0    stevel 		case LOFI_MAP_FILE_MINOR:
   2383      0    stevel 			if ((flag & FWRITE) == 0)
   2384      0    stevel 				return (EPERM);
   2385   1657     heppo 			return (lofi_map_file(dev, lip, 0, rvalp, credp, flag));
   2386      0    stevel 		case LOFI_UNMAP_FILE:
   2387      0    stevel 			if ((flag & FWRITE) == 0)
   2388      0    stevel 				return (EPERM);
   2389   1657     heppo 			return (lofi_unmap_file(dev, lip, 1, credp, flag));
   2390      0    stevel 		case LOFI_UNMAP_FILE_MINOR:
   2391      0    stevel 			if ((flag & FWRITE) == 0)
   2392      0    stevel 				return (EPERM);
   2393   1657     heppo 			return (lofi_unmap_file(dev, lip, 0, credp, flag));
   2394      0    stevel 		case LOFI_GET_FILENAME:
   2395      0    stevel 			return (lofi_get_info(dev, lip, LOFI_GET_FILENAME,
   2396   1657     heppo 			    credp, flag));
   2397      0    stevel 		case LOFI_GET_MINOR:
   2398      0    stevel 			return (lofi_get_info(dev, lip, LOFI_GET_MINOR,
   2399   1657     heppo 			    credp, flag));
   2400      0    stevel 		case LOFI_GET_MAXMINOR:
   2401   1657     heppo 			error = ddi_copyout(&lofi_max_files, &lip->li_minor,
   2402   1657     heppo 			    sizeof (lofi_max_files), flag);
   2403      0    stevel 			if (error)
   2404      0    stevel 				return (EFAULT);
   2405      0    stevel 			return (0);
   2406   5643     aalok 		case LOFI_CHECK_COMPRESSED:
   2407   5643     aalok 			return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED,
   2408   5643     aalok 			    credp, flag));
   2409      0    stevel 		default:
   2410      0    stevel 			break;
   2411      0    stevel 		}
   2412      0    stevel 	}
   2413      0    stevel 
   2414  11041      Eric 	mutex_enter(&lofi_lock);
   2415      0    stevel 	lsp = ddi_get_soft_state(lofi_statep, minor);
   2416  11041      Eric 	if (lsp == NULL || lsp->ls_vp_closereq) {
   2417  11041      Eric 		mutex_exit(&lofi_lock);
   2418      0    stevel 		return (ENXIO);
   2419  11041      Eric 	}
   2420  11041      Eric 	mutex_exit(&lofi_lock);
   2421      0    stevel 
   2422   4451  eschrock 	/*
   2423   4451  eschrock 	 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with
   2424   4451  eschrock 	 * EIO as if the device was no longer present.
   2425   4451  eschrock 	 */
   2426   4451  eschrock 	if (lsp->ls_vp == NULL && cmd != DKIOCSTATE)
   2427   4451  eschrock 		return (EIO);
   2428   4451  eschrock 
   2429      0    stevel 	/* these are for faking out utilities like newfs */
   2430      0    stevel 	switch (cmd) {
   2431      0    stevel 	case DKIOCGVTOC:
   2432      0    stevel 		switch (ddi_model_convert_from(flag & FMODELS)) {
   2433      0    stevel 		case DDI_MODEL_ILP32: {
   2434      0    stevel 			struct vtoc32 vtoc32;
   2435      0    stevel 
   2436      0    stevel 			vtoctovtoc32(lsp->ls_vtoc, vtoc32);
   2437      0    stevel 			if (ddi_copyout(&vtoc32, (void *)arg,
   2438      0    stevel 			    sizeof (struct vtoc32), flag))
   2439      0    stevel 				return (EFAULT);
   2440   8719      Dina 			break;
   2441      0    stevel 			}
   2442      0    stevel 
   2443      0    stevel 		case DDI_MODEL_NONE:
   2444      0    stevel 			if (ddi_copyout(&lsp->ls_vtoc, (void *)arg,
   2445      0    stevel 			    sizeof (struct vtoc), flag))
   2446      0    stevel 				return (EFAULT);
   2447      0    stevel 			break;
   2448      0    stevel 		}
   2449      0    stevel 		return (0);
   2450      0    stevel 	case DKIOCINFO:
   2451   1657     heppo 		error = ddi_copyout(&lsp->ls_ci, (void *)arg,
   2452   1657     heppo 		    sizeof (struct dk_cinfo), flag);
   2453      0    stevel 		if (error)
   2454      0    stevel 			return (EFAULT);
   2455      0    stevel 		return (0);
   2456      0    stevel 	case DKIOCG_VIRTGEOM:
   2457      0    stevel 	case DKIOCG_PHYGEOM:
   2458      0    stevel 	case DKIOCGGEOM:
   2459   1657     heppo 		error = ddi_copyout(&lsp->ls_dkg, (void *)arg,
   2460   1657     heppo 		    sizeof (struct dk_geom), flag);
   2461      0    stevel 		if (error)
   2462      0    stevel 			return (EFAULT);
   2463      0    stevel 		return (0);
   2464      0    stevel 	case DKIOCSTATE:
   2465   4451  eschrock 		/*
   2466   4451  eschrock 		 * Normally, lofi devices are always in the INSERTED state.  If
   2467   4451  eschrock 		 * a device is forcefully unmapped, then the device transitions
   2468   4451  eschrock 		 * to the DKIO_DEV_GONE state.
   2469   4451  eschrock 		 */
   2470   4451  eschrock 		if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate),
   2471   4451  eschrock 		    flag) != 0)
   2472   4451  eschrock 			return (EFAULT);
   2473   4451  eschrock 
   2474   4451  eschrock 		mutex_enter(&lsp->ls_vp_lock);
   2475  11041      Eric 		lsp->ls_vp_iocount++;
   2476  11041      Eric 		while (((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) ||
   2477  11041      Eric 		    (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) &&
   2478  11041      Eric 		    !lsp->ls_vp_closereq) {
   2479   4451  eschrock 			/*
   2480   4451  eschrock 			 * By virtue of having the device open, we know that
   2481   4451  eschrock 			 * 'lsp' will remain valid when we return.
   2482   4451  eschrock 			 */
   2483   4451  eschrock 			if (!cv_wait_sig(&lsp->ls_vp_cv,
   2484   4451  eschrock 			    &lsp->ls_vp_lock)) {
   2485  11041      Eric 				lsp->ls_vp_iocount--;
   2486  11041      Eric 				cv_broadcast(&lsp->ls_vp_cv);
   2487   4451  eschrock 				mutex_exit(&lsp->ls_vp_lock);
   2488   4451  eschrock 				return (EINTR);
   2489   4451  eschrock 			}
   2490   4451  eschrock 		}
   2491   4451  eschrock 
   2492  11041      Eric 		dkstate = (!lsp->ls_vp_closereq && lsp->ls_vp != NULL ?
   2493  11041      Eric 		    DKIO_INSERTED : DKIO_DEV_GONE);
   2494  11041      Eric 		lsp->ls_vp_iocount--;
   2495  11041      Eric 		cv_broadcast(&lsp->ls_vp_cv);
   2496   4451  eschrock 		mutex_exit(&lsp->ls_vp_lock);
   2497   4451  eschrock 
   2498   4451  eschrock 		if (ddi_copyout(&dkstate, (void *)arg,
   2499   4451  eschrock 		    sizeof (dkstate), flag) != 0)
   2500      0    stevel 			return (EFAULT);
   2501      0    stevel 		return (0);
   2502      0    stevel 	default:
   2503      0    stevel 		return (ENOTTY);
   2504      0    stevel 	}
   2505      0    stevel }
   2506      0    stevel 
   2507      0    stevel static struct cb_ops lofi_cb_ops = {
   2508      0    stevel 	lofi_open,		/* open */
   2509      0    stevel 	lofi_close,		/* close */
   2510      0    stevel 	lofi_strategy,		/* strategy */
   2511      0    stevel 	nodev,			/* print */
   2512      0    stevel 	nodev,			/* dump */
   2513      0    stevel 	lofi_read,		/* read */
   2514      0    stevel 	lofi_write,		/* write */
   2515      0    stevel 	lofi_ioctl,		/* ioctl */
   2516      0    stevel 	nodev,			/* devmap */
   2517      0    stevel 	nodev,			/* mmap */
   2518      0    stevel 	nodev,			/* segmap */
   2519      0    stevel 	nochpoll,		/* poll */
   2520      0    stevel 	ddi_prop_op,		/* prop_op */
   2521      0    stevel 	0,			/* streamtab  */
   2522      0    stevel 	D_64BIT | D_NEW | D_MP,	/* Driver compatibility flag */
   2523      0    stevel 	CB_REV,
   2524      0    stevel 	lofi_aread,
   2525      0    stevel 	lofi_awrite
   2526      0    stevel };
   2527      0    stevel 
   2528      0    stevel static struct dev_ops lofi_ops = {
   2529      0    stevel 	DEVO_REV,		/* devo_rev, */
   2530      0    stevel 	0,			/* refcnt  */
   2531      0    stevel 	lofi_info,		/* info */
   2532      0    stevel 	nulldev,		/* identify */
   2533      0    stevel 	nulldev,		/* probe */
   2534      0    stevel 	lofi_attach,		/* attach */
   2535      0    stevel 	lofi_detach,		/* detach */
   2536      0    stevel 	nodev,			/* reset */
   2537      0    stevel 	&lofi_cb_ops,		/* driver operations */
   2538   7656    Sherry 	NULL,			/* no bus operations */
   2539   7656    Sherry 	NULL,			/* power */
   2540   8313      Dina 	ddi_quiesce_not_needed,	/* quiesce */
   2541      0    stevel };
   2542      0    stevel 
   2543      0    stevel static struct modldrv modldrv = {
   2544      0    stevel 	&mod_driverops,
   2545   7656    Sherry 	"loopback file driver",
   2546      0    stevel 	&lofi_ops,
   2547      0    stevel };
   2548      0    stevel 
   2549      0    stevel static struct modlinkage modlinkage = {
   2550      0    stevel 	MODREV_1,
   2551      0    stevel 	&modldrv,
   2552      0    stevel 	NULL
   2553      0    stevel };
   2554      0    stevel 
   2555      0    stevel int
   2556      0    stevel _init(void)
   2557      0    stevel {
   2558      0    stevel 	int error;
   2559      0    stevel 
   2560      0    stevel 	error = ddi_soft_state_init(&lofi_statep,
   2561      0    stevel 	    sizeof (struct lofi_state), 0);
   2562      0    stevel 	if (error)
   2563      0    stevel 		return (error);
   2564      0    stevel 
   2565      0    stevel 	mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL);
   2566      0    stevel 	error = mod_install(&modlinkage);
   2567      0    stevel 	if (error) {
   2568      0    stevel 		mutex_destroy(&lofi_lock);
   2569      0    stevel 		ddi_soft_state_fini(&lofi_statep);
   2570      0    stevel 	}
   2571      0    stevel 
   2572      0    stevel 	return (error);
   2573      0    stevel }
   2574      0    stevel 
   2575      0    stevel int
   2576      0    stevel _fini(void)
   2577      0    stevel {
   2578      0    stevel 	int	error;
   2579      0    stevel 
   2580      0    stevel 	if (lofi_busy())
   2581      0    stevel 		return (EBUSY);
   2582      0    stevel 
   2583      0    stevel 	error = mod_remove(&modlinkage);
   2584      0    stevel 	if (error)
   2585      0    stevel 		return (error);
   2586      0    stevel 
   2587      0    stevel 	mutex_destroy(&lofi_lock);
   2588      0    stevel 	ddi_soft_state_fini(&lofi_statep);
   2589      0    stevel 
   2590      0    stevel 	return (error);
   2591      0    stevel }
   2592      0    stevel 
   2593      0    stevel int
   2594      0    stevel _info(struct modinfo *modinfop)
   2595      0    stevel {
   2596      0    stevel 	return (mod_info(&modlinkage, modinfop));
   2597      0    stevel }
   2598