Home | History | Annotate | Download | only in dcfs
      1  5648       setje 
      2  5648       setje /*
      3  5648       setje  * CDDL HEADER START
      4  5648       setje  *
      5  5648       setje  * The contents of this file are subject to the terms of the
      6  5648       setje  * Common Development and Distribution License (the "License").
      7  5648       setje  * You may not use this file except in compliance with the License.
      8  5648       setje  *
      9  5648       setje  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  5648       setje  * or http://www.opensolaris.org/os/licensing.
     11  5648       setje  * See the License for the specific language governing permissions
     12  5648       setje  * and limitations under the License.
     13  5648       setje  *
     14  5648       setje  * When distributing Covered Code, include this CDDL HEADER in each
     15  5648       setje  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  5648       setje  * If applicable, add the following below this CDDL HEADER, with the
     17  5648       setje  * fields enclosed by brackets "[]" replaced with your own identifying
     18  5648       setje  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  5648       setje  *
     20  5648       setje  * CDDL HEADER END
     21  5648       setje  */
     22  5648       setje /*
     23  6754       tomee  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  5648       setje  * Use is subject to license terms.
     25  5648       setje  */
     26  5648       setje 
     27  5648       setje /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
     28  5648       setje /*	  All Rights Reserved  	*/
     29  5648       setje 
     30  5648       setje /*
     31  5648       setje  * University Copyright- Copyright (c) 1982, 1986, 1988
     32  5648       setje  * The Regents of the University of California
     33  5648       setje  * All Rights Reserved
     34  5648       setje  *
     35  5648       setje  * University Acknowledgment- Portions of this document are derived from
     36  5648       setje  * software developed by the University of California, Berkeley, and its
     37  5648       setje  * contributors.
     38  5648       setje  */
     39  5648       setje 
     40  5648       setje #include <sys/types.h>
     41  5648       setje #include <sys/thread.h>
     42  5648       setje #include <sys/t_lock.h>
     43  5648       setje #include <sys/param.h>
     44  5648       setje #include <sys/systm.h>
     45  5648       setje #include <sys/bitmap.h>
     46  5648       setje #include <sys/buf.h>
     47  5648       setje #include <sys/cmn_err.h>
     48  5648       setje #include <sys/conf.h>
     49  5648       setje #include <sys/ddi.h>
     50  5648       setje #include <sys/debug.h>
     51  5648       setje #include <sys/errno.h>
     52  5648       setje #include <sys/time.h>
     53  5648       setje #include <sys/fcntl.h>
     54  5648       setje #include <sys/flock.h>
     55  5648       setje #include <sys/file.h>
     56  5648       setje #include <sys/kmem.h>
     57  5648       setje #include <sys/mman.h>
     58  5648       setje #include <sys/vmsystm.h>
     59  5648       setje #include <sys/open.h>
     60  5648       setje #include <sys/swap.h>
     61  5648       setje #include <sys/sysmacros.h>
     62  5648       setje #include <sys/uio.h>
     63  5648       setje #include <sys/vfs.h>
     64  5648       setje #include <sys/vfs_opreg.h>
     65  5648       setje #include <sys/vnode.h>
     66  5648       setje #include <sys/stat.h>
     67  5648       setje #include <sys/poll.h>
     68  5648       setje #include <sys/zmod.h>
     69  5648       setje #include <sys/fs/decomp.h>
     70  5648       setje 
     71  5648       setje #include <vm/hat.h>
     72  5648       setje #include <vm/as.h>
     73  5648       setje #include <vm/page.h>
     74  5648       setje #include <vm/pvn.h>
     75  5648       setje #include <vm/seg_vn.h>
     76  5648       setje #include <vm/seg_kmem.h>
     77  5648       setje #include <vm/seg_map.h>
     78  5648       setje 
     79  5648       setje #include <fs/fs_subr.h>
     80  5648       setje 
     81  5648       setje /*
     82  5648       setje  * dcfs - A filesystem for automatic decompressing of fiocompressed files
     83  5648       setje  *
     84  5648       setje  * This filesystem is a layered filesystem that sits on top of a normal
     85  5648       setje  * persistent filesystem and provides automatic decompression of files
     86  5648       setje  * that have been previously compressed and stored on the host file system.
     87  5648       setje  * This is a pseudo filesystem in that it does not persist data, rather it
     88  5648       setje  * intercepts file lookup requests on the host filesystem and provides
     89  5648       setje  * transparent decompression of those files. Currently the only supported
     90  5648       setje  * host filesystem is ufs.
     91  5648       setje  *
     92  5648       setje  * A file is compressed via a userland utility (currently cmd/boot/fiocompress)
     93  5648       setje  * and marked by fiocompress as a compressed file via a flag in the on-disk
     94  5648       setje  * inode (set via a ufs ioctl() - see `ufs_vnops.c`ufs_ioctl()`_FIO_COMPRESSED
     95  5648       setje  * ufs_lookup checks for this flag and if set, passes control to decompvp
     96  5648       setje  * a function defined in this (dcfs) filesystem. decomvp uncompresses the file
     97  5648       setje  * and returns a dcfs vnode to the VFS layer.
     98  5648       setje  *
     99  5648       setje  * dcfs is layered on top of ufs and passes requests involving persistence
    100  5648       setje  * to the underlying ufs filesystem. The compressed files currently cannot be
    101  5648       setje  * written to.
    102  5648       setje  */
    103  5648       setje 
    104  5648       setje 
    105  5648       setje /*
    106  5648       setje  * Define data structures within this file.
    107  5648       setje  */
    108  5648       setje #define	DCSHFT		5
    109  5648       setje #define	DCTABLESIZE	16
    110  5648       setje 
    111  5648       setje #if ((DCTABLESIZE & (DCTABLESIZE - 1)) == 0)
    112  5648       setje #define	DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) & (DCTABLESIZE - 1))
    113  5648       setje #else
    114  5648       setje #define	DCHASH(vp) (((uintptr_t)(vp) >> DCSHFT) % DTABLESIZEC)
    115  5648       setje #endif
    116  5648       setje 
    117  5648       setje #define	DCLRUSIZE	16
    118  5648       setje 
    119  5648       setje #define	DCCACHESIZE	4
    120  5648       setje 
    121  5648       setje #define	rounddown(x, y)	((x) & ~((y) - 1))
    122  5648       setje 
    123  5648       setje struct dcnode	*dctable[DCTABLESIZE];
    124  5648       setje 
    125  5648       setje struct dcnode	*dclru;
    126  5648       setje static int	dclru_len;
    127  5648       setje 
    128  5648       setje kmutex_t	dctable_lock;
    129  5648       setje 
    130  5648       setje dev_t		dcdev;
    131  5648       setje struct vfs	dc_vfs;
    132  5648       setje 
    133  5648       setje struct kmem_cache *dcnode_cache;
    134  5648       setje struct kmem_cache *dcbuf_cache[DCCACHESIZE];
    135  5648       setje 
    136  5648       setje kmutex_t	dccache_lock;
    137  5648       setje 
    138  5648       setje static int dcinit(int, char *);
    139  5648       setje 
    140  5648       setje static struct dcnode	*dcnode_alloc(void);
    141  5648       setje static void		dcnode_free(struct dcnode *);
    142  5648       setje static void		dcnode_recycle(struct dcnode *);
    143  5648       setje 
    144  5648       setje static void		dcinsert(struct dcnode *);
    145  5648       setje static void		dcdelete(struct dcnode *);
    146  5648       setje static struct dcnode	*dcfind(struct vnode *);
    147  5648       setje static void		dclru_add(struct dcnode *);
    148  5648       setje static void		dclru_sub(struct dcnode *);
    149  5648       setje 
    150  5648       setje 
    151  5648       setje /*
    152  5648       setje  * This is the loadable module wrapper.
    153  5648       setje  */
    154  5648       setje #include <sys/modctl.h>
    155  5648       setje 
    156  5648       setje struct vfsops *dc_vfsops;
    157  5648       setje 
    158  5648       setje static vfsdef_t vfw = {
    159  5648       setje 	VFSDEF_VERSION,
    160  5648       setje 	"dcfs",
    161  5648       setje 	dcinit,
    162  5648       setje 	0,
    163  5648       setje 	NULL
    164  5648       setje };
    165  5648       setje 
    166  5648       setje /*
    167  5648       setje  * Module linkage information for the kernel.
    168  5648       setje  */
    169  5648       setje extern struct mod_ops mod_fsops;
    170  5648       setje 
    171  5648       setje static struct modlfs modlfs = {
    172  5648       setje 	&mod_fsops, "compressed filesystem", &vfw
    173  5648       setje };
    174  5648       setje 
    175  5648       setje static struct modlinkage modlinkage = {
    176  5648       setje 	MODREV_1, (void *)&modlfs, NULL
    177  5648       setje };
    178  5648       setje 
    179  5648       setje int
    180  5648       setje _init()
    181  5648       setje {
    182  5648       setje 	return (mod_install(&modlinkage));
    183  5648       setje }
    184  5648       setje 
    185  5648       setje int
    186  5648       setje _info(struct modinfo *modinfop)
    187  5648       setje {
    188  5648       setje 	return (mod_info(&modlinkage, modinfop));
    189  5648       setje }
    190  5648       setje 
    191  5648       setje 
    192  5648       setje static int dc_open(struct vnode **, int, struct cred *, caller_context_t *);
    193  5648       setje static int dc_close(struct vnode *, int, int, offset_t,
    194  5648       setje     struct cred *, caller_context_t *);
    195  5648       setje static int dc_read(struct vnode *, struct uio *, int, struct cred *,
    196  5648       setje     struct caller_context *);
    197  5648       setje static int dc_getattr(struct vnode *, struct vattr *, int,
    198  5648       setje     struct cred *, caller_context_t *);
    199  5648       setje static int dc_setattr(struct vnode *, struct vattr *, int, struct cred *,
    200  5648       setje     struct caller_context *);
    201  5648       setje static int dc_access(struct vnode *, int, int,
    202  5648       setje     struct cred *, caller_context_t *);
    203  5648       setje static int dc_fsync(struct vnode *, int, struct cred *, caller_context_t *);
    204  5648       setje static void dc_inactive(struct vnode *, struct cred *, caller_context_t *);
    205  5648       setje static int dc_fid(struct vnode *, struct fid *, caller_context_t *);
    206  5648       setje static int dc_seek(struct vnode *, offset_t, offset_t *, caller_context_t *);
    207  5648       setje static int dc_frlock(struct vnode *, int, struct flock64 *, int, offset_t,
    208  5648       setje     struct flk_callback *, struct cred *, caller_context_t *);
    209  5648       setje static int dc_getpage(struct vnode *, offset_t, size_t, uint_t *,
    210  5648       setje     struct page **, size_t, struct seg *, caddr_t, enum seg_rw,
    211  5648       setje     struct cred *, caller_context_t *);
    212  5648       setje static int dc_putpage(struct vnode *, offset_t, size_t, int,
    213  5648       setje     struct cred *, caller_context_t *);
    214  5648       setje static int dc_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
    215  5648       setje     uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
    216  5648       setje static int dc_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
    217  5648       setje     uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
    218  5648       setje static int dc_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
    219  5648       setje     uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
    220  5648       setje 
    221  5648       setje struct vnodeops *dc_vnodeops;
    222  5648       setje 
    223  5648       setje const fs_operation_def_t dc_vnodeops_template[] = {
    224  5648       setje 	VOPNAME_OPEN,			{ .vop_open = dc_open },
    225  5648       setje 	VOPNAME_CLOSE,			{ .vop_close = dc_close },
    226  5648       setje 	VOPNAME_READ,			{ .vop_read = dc_read },
    227  5648       setje 	VOPNAME_GETATTR,		{ .vop_getattr =  dc_getattr },
    228  5648       setje 	VOPNAME_SETATTR,		{ .vop_setattr = dc_setattr },
    229  5648       setje 	VOPNAME_ACCESS,			{ .vop_access = dc_access },
    230  5648       setje 	VOPNAME_FSYNC,			{ .vop_fsync = dc_fsync },
    231  5648       setje 	VOPNAME_INACTIVE,		{ .vop_inactive = dc_inactive },
    232  5648       setje 	VOPNAME_FID,			{ .vop_fid = dc_fid },
    233  5648       setje 	VOPNAME_SEEK,			{ .vop_seek = dc_seek },
    234  5648       setje 	VOPNAME_FRLOCK,			{ .vop_frlock = dc_frlock },
    235  5648       setje 	VOPNAME_GETPAGE,		{ .vop_getpage = dc_getpage },
    236  5648       setje 	VOPNAME_PUTPAGE,		{ .vop_putpage = dc_putpage },
    237  5648       setje 	VOPNAME_MAP,			{ .vop_map = dc_map },
    238  5648       setje 	VOPNAME_ADDMAP,			{ .vop_addmap = dc_addmap },
    239  5648       setje 	VOPNAME_DELMAP,			{ .vop_delmap = dc_delmap },
    240  5648       setje 	NULL,				NULL
    241  5648       setje };
    242  5648       setje 
    243  5648       setje /*ARGSUSED*/
    244  5648       setje static int
    245  5648       setje dc_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *ctp)
    246  5648       setje {
    247  5648       setje 	return (0);
    248  5648       setje }
    249  5648       setje 
    250  5648       setje /*ARGSUSED*/
    251  5648       setje static int
    252  5648       setje dc_close(struct vnode *vp, int flag, int count, offset_t off,
    253  5648       setje     struct cred *cr, caller_context_t *ctp)
    254  5648       setje {
    255  5648       setje 	(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
    256  5648       setje 	cleanshares(vp, ttoproc(curthread)->p_pid);
    257  5648       setje 	return (0);
    258  5648       setje }
    259  5648       setje 
    260  5648       setje /*ARGSUSED*/
    261  5648       setje static int
    262  5648       setje dc_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
    263  5648       setje 	struct caller_context *ct)
    264  5648       setje {
    265  5648       setje 	struct dcnode *dp = VTODC(vp);
    266  5648       setje 	size_t rdsize = MAX(MAXBSIZE, dp->dc_hdr->ch_blksize);
    267  5648       setje 	size_t fsize = dp->dc_hdr->ch_fsize;
    268  5648       setje 	int error;
    269  5648       setje 
    270  5648       setje 	/*
    271  5648       setje 	 * Loop through file with segmap, decompression will occur
    272  5648       setje 	 * in dc_getapage
    273  5648       setje 	 */
    274  5648       setje 	do {
    275  5648       setje 		caddr_t base;
    276  5648       setje 		size_t n;
    277  5648       setje 		offset_t mapon;
    278  5648       setje 
    279  5648       setje 		/*
    280  5648       setje 		 * read to end of block or file
    281  5648       setje 		 */
    282  5648       setje 		mapon = uiop->uio_loffset & (rdsize - 1);
    283  5648       setje 		n = MIN(rdsize - mapon, uiop->uio_resid);
    284  5648       setje 		n = MIN(n, fsize - uiop->uio_loffset);
    285  5648       setje 		if (n == 0)
    286  5648       setje 			return (0);	/* at EOF */
    287  5648       setje 
    288  5648       setje 		base = segmap_getmapflt(segkmap, vp, uiop->uio_loffset, n, 1,
    289  5648       setje 		    S_READ);
    290  5648       setje 		error = uiomove(base + mapon, n, UIO_READ, uiop);
    291  5648       setje 		if (!error) {
    292  5648       setje 			uint_t flags;
    293  5648       setje 
    294  5648       setje 			if (n + mapon == rdsize || uiop->uio_loffset == fsize)
    295  5648       setje 				flags = SM_DONTNEED;
    296  5648       setje 			else
    297  5648       setje 				flags = 0;
    298  5648       setje 			error = segmap_release(segkmap, base, flags);
    299  5648       setje 		} else
    300  5648       setje 			(void) segmap_release(segkmap, base, 0);
    301  5648       setje 	} while (!error && uiop->uio_resid);
    302  5648       setje 
    303  5648       setje 	return (error);
    304  5648       setje }
    305  5648       setje 
    306  5648       setje static int
    307  5648       setje dc_getattr(struct vnode *vp, struct vattr *vap, int flags,
    308  5648       setje     cred_t *cred, caller_context_t *ctp)
    309  5648       setje {
    310  5648       setje 	struct dcnode *dp = VTODC(vp);
    311  5648       setje 	struct vnode *subvp = dp->dc_subvp;
    312  5648       setje 	int error;
    313  5648       setje 
    314  5648       setje 	error = VOP_GETATTR(subvp, vap, flags, cred, ctp);
    315  5648       setje 
    316  5648       setje 	/* substitute uncompressed size */
    317  5648       setje 	vap->va_size = dp->dc_hdr->ch_fsize;
    318  5648       setje 	return (error);
    319  5648       setje }
    320  5648       setje 
    321  5648       setje static int
    322  5648       setje dc_setattr(struct vnode *vp, struct vattr *vap, int flags, cred_t *cred,
    323  5648       setje     caller_context_t *ctp)
    324  5648       setje {
    325  5648       setje 	struct dcnode *dp = VTODC(vp);
    326  5648       setje 	struct vnode *subvp = dp->dc_subvp;
    327  5648       setje 
    328  5648       setje 	return (VOP_SETATTR(subvp, vap, flags, cred, ctp));
    329  5648       setje }
    330  5648       setje 
    331  5648       setje static int
    332  5648       setje dc_access(struct vnode *vp, int mode, int flags,
    333  5648       setje     cred_t *cred, caller_context_t *ctp)
    334  5648       setje {
    335  5648       setje 	struct dcnode *dp = VTODC(vp);
    336  5648       setje 	struct vnode *subvp = dp->dc_subvp;
    337  5648       setje 
    338  5648       setje 	return (VOP_ACCESS(subvp, mode, flags, cred, ctp));
    339  5648       setje }
    340  5648       setje 
    341  5648       setje /*ARGSUSED*/
    342  5648       setje static int
    343  5648       setje dc_fsync(vnode_t *vp, int syncflag, cred_t *cred, caller_context_t *ctp)
    344  5648       setje {
    345  5648       setje 	return (0);
    346  5648       setje }
    347  5648       setje 
    348  5648       setje /*ARGSUSED*/
    349  5648       setje static void
    350  5648       setje dc_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ctp)
    351  5648       setje {
    352  5648       setje 	struct dcnode *dp = VTODC(vp);
    353  5648       setje 
    354  5648       setje 	mutex_enter(&dctable_lock);
    355  5648       setje 	mutex_enter(&vp->v_lock);
    356  5648       setje 	ASSERT(vp->v_count >= 1);
    357  5648       setje 	if (--vp->v_count != 0) {
    358  5648       setje 		/*
    359  5648       setje 		 * Somebody accessed the dcnode before we got a chance to
    360  5648       setje 		 * remove it.  They will remove it when they do a vn_rele.
    361  5648       setje 		 */
    362  5648       setje 		mutex_exit(&vp->v_lock);
    363  5648       setje 		mutex_exit(&dctable_lock);
    364  5648       setje 		return;
    365  5648       setje 	}
    366  5648       setje 	mutex_exit(&vp->v_lock);
    367  5648       setje 
    368  5648       setje 	dcnode_free(dp);
    369  5648       setje 
    370  5648       setje 	mutex_exit(&dctable_lock);
    371  5648       setje }
    372  5648       setje 
    373  5648       setje static int
    374  5648       setje dc_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ctp)
    375  5648       setje {
    376  5648       setje 	struct dcnode *dp = VTODC(vp);
    377  5648       setje 	struct vnode *subvp = dp->dc_subvp;
    378  5648       setje 
    379  5648       setje 	return (VOP_FID(subvp, fidp, ctp));
    380  5648       setje }
    381  5648       setje 
    382  5648       setje static int
    383  5648       setje dc_seek(struct vnode *vp, offset_t oof, offset_t *noffp, caller_context_t *ctp)
    384  5648       setje {
    385  5648       setje 	struct dcnode *dp = VTODC(vp);
    386  5648       setje 	struct vnode *subvp = dp->dc_subvp;
    387  5648       setje 
    388  5648       setje 	return (VOP_SEEK(subvp, oof, noffp, ctp));
    389  5648       setje }
    390  5648       setje 
    391  5648       setje static int
    392  5648       setje dc_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
    393  5648       setje     offset_t offset, struct flk_callback *flk_cbp,
    394  5648       setje     cred_t *cr, caller_context_t *ctp)
    395  5648       setje {
    396  5648       setje 	struct dcnode *dp = VTODC(vp);
    397  5648       setje 
    398  5648       setje 	/*
    399  5648       setje 	 * If file is being mapped, disallow frlock.
    400  5648       setje 	 */
    401  5648       setje 	if (dp->dc_mapcnt > 0)
    402  5648       setje 		return (EAGAIN);
    403  5648       setje 
    404  5648       setje 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ctp));
    405  5648       setje }
    406  5648       setje 
    407  5648       setje /*ARGSUSED*/
    408  5648       setje static int
    409  5648       setje dc_getblock_miss(struct vnode *vp, offset_t off, size_t len, struct page **ppp,
    410  5648       setje     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr)
    411  5648       setje {
    412  5648       setje 	struct dcnode *dp = VTODC(vp);
    413  5648       setje 	struct comphdr *hdr = dp->dc_hdr;
    414  5648       setje 	struct page *pp;
    415  5648       setje 	struct buf *bp;
    416  5648       setje 	caddr_t saddr;
    417  5648       setje 	off_t cblkno;
    418  5648       setje 	size_t rdoff, rdsize, dsize;
    419  5648       setje 	long xlen;
    420  5648       setje 	int error, zerr;
    421  5648       setje 
    422  5648       setje 	ASSERT(len == hdr->ch_blksize);
    423  5648       setje 	/*
    424  5648       setje 	 * Get destination pages and make them addressable
    425  5648       setje 	 */
    426  5648       setje 	pp = page_create_va(vp, off, len, PG_WAIT, seg, addr);
    427  5648       setje 	bp = pageio_setup(pp, len, vp, B_READ);
    428  5648       setje 	bp_mapin(bp);
    429  5648       setje 
    430  5648       setje 	/*
    431  5648       setje 	 * read compressed data from subordinate vnode
    432  5648       setje 	 */
    433  5648       setje 	saddr = kmem_cache_alloc(dp->dc_bufcache, KM_SLEEP);
    434  5648       setje 	cblkno = off / len;
    435  5648       setje 	rdoff = hdr->ch_blkmap[cblkno];
    436  5648       setje 	rdsize = hdr->ch_blkmap[cblkno + 1] - rdoff;
    437  5648       setje 	error = vn_rdwr(UIO_READ, dp->dc_subvp, saddr, rdsize, rdoff,
    438  5648       setje 	    UIO_SYSSPACE, 0, 0, cr, NULL);
    439  5648       setje 	if (error)
    440  5648       setje 		goto cleanup;
    441  5648       setje 
    442  5648       setje 	/*
    443  5648       setje 	 * Uncompress
    444  5648       setje 	 */
    445  5648       setje 	dsize = len;
    446  5648       setje 	zerr = z_uncompress(bp->b_un.b_addr, &dsize, saddr, dp->dc_zmax);
    447  5648       setje 	if (zerr != Z_OK) {
    448  5648       setje 		error = EIO;
    449  5648       setje 		goto cleanup;
    450  5648       setje 	}
    451  5648       setje 
    452  5648       setje 	/*
    453  5648       setje 	 * Handle EOF
    454  5648       setje 	 */
    455  5648       setje 	xlen = hdr->ch_fsize - off;
    456  5648       setje 	if (xlen < len) {
    457  5648       setje 		bzero(bp->b_un.b_addr + xlen, len - xlen);
    458  5648       setje 		if (dsize != xlen)
    459  5648       setje 			error = EIO;
    460  5648       setje 	} else if (dsize != len)
    461  5648       setje 		error = EIO;
    462  5648       setje 
    463  5648       setje 	/*
    464  5648       setje 	 * Clean up
    465  5648       setje 	 */
    466  5648       setje cleanup:
    467  5648       setje 	kmem_cache_free(dp->dc_bufcache, saddr);
    468  5648       setje 	pageio_done(bp);
    469  5648       setje 	*ppp = pp;
    470  5648       setje 	return (error);
    471  5648       setje }
    472  5648       setje 
    473  5648       setje static int
    474  5648       setje dc_getblock(struct vnode *vp, offset_t off, size_t len, struct page **ppp,
    475  5648       setje     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr)
    476  5648       setje {
    477  5648       setje 	struct page *pp, *plist = NULL;
    478  5648       setje 	offset_t pgoff;
    479  5648       setje 	int rdblk;
    480  5648       setje 
    481  5648       setje 	/*
    482  5648       setje 	 * pvn_read_kluster() doesn't quite do what we want, since it
    483  5648       setje 	 * thinks sub block reads are ok.  Here we always decompress
    484  5648       setje 	 * a full block.
    485  5648       setje 	 */
    486  5648       setje 
    487  5648       setje 	/*
    488  5648       setje 	 * Check page cache
    489  5648       setje 	 */
    490  5648       setje 	rdblk = 0;
    491  5648       setje 	for (pgoff = off; pgoff < off + len; pgoff += PAGESIZE) {
    492  5648       setje 		pp = page_lookup(vp, pgoff, SE_EXCL);
    493  5648       setje 		if (pp == NULL) {
    494  5648       setje 			rdblk = 1;
    495  5648       setje 			break;
    496  5648       setje 		}
    497  5648       setje 		page_io_lock(pp);
    498  5648       setje 		page_add(&plist, pp);
    499  5648       setje 		plist = plist->p_next;
    500  5648       setje 	}
    501  5648       setje 	if (!rdblk) {
    502  5648       setje 		*ppp = plist;
    503  5648       setje 		return (0);	/* all pages in cache */
    504  5648       setje 	}
    505  5648       setje 
    506  5648       setje 	/*
    507  5648       setje 	 * Undo any locks so getblock_miss has an open field
    508  5648       setje 	 */
    509  5648       setje 	if (plist != NULL)
    510  5648       setje 		pvn_io_done(plist);
    511  5648       setje 
    512  5648       setje 	return (dc_getblock_miss(vp, off, len, ppp, seg, addr, rw, cr));
    513  5648       setje }
    514  5648       setje 
    515  5648       setje /*ARGSUSED10*/
    516  5648       setje static int
    517  5648       setje dc_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
    518  5648       setje     struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
    519  5648       setje     enum seg_rw rw, struct cred *cr, caller_context_t *ctp)
    520  5648       setje {
    521  5648       setje 	struct dcnode *dp = VTODC(vp);
    522  5648       setje 	struct comphdr *hdr = dp->dc_hdr;
    523  5648       setje 	struct page *pp, *plist = NULL;
    524  5648       setje 	caddr_t vp_baddr;
    525  5648       setje 	offset_t vp_boff, vp_bend;
    526  5648       setje 	size_t bsize = hdr->ch_blksize;
    527  5648       setje 	int nblks, error;
    528  5648       setje 
    529  5648       setje 	/* does not support write */
    530  5648       setje 	if (rw == S_WRITE) {
    531  5648       setje 		panic("write attempt on compressed file");
    532  5648       setje 		/*NOTREACHED*/
    533  5648       setje 	}
    534  5648       setje 
    535  5648       setje 	if (protp)
    536  5648       setje 		*protp = PROT_ALL;
    537  5648       setje 	/*
    538  5648       setje 	 * We don't support asynchronous operation at the moment, so
    539  5648       setje 	 * just pretend we did it.  If the pages are ever actually
    540  5648       setje 	 * needed, they'll get brought in then.
    541  5648       setje 	 */
    542  5648       setje 	if (pl == NULL)
    543  5648       setje 		return (0);
    544  5648       setje 
    545  5648       setje 	/*
    546  5648       setje 	 * Calc block start and end offsets
    547  5648       setje 	 */
    548  5648       setje 	vp_boff = rounddown(off, bsize);
    549  5648       setje 	vp_bend = roundup(off + len, bsize);
    550  5648       setje 	vp_baddr = (caddr_t)rounddown((uintptr_t)addr, bsize);
    551  5648       setje 
    552  5648       setje 	nblks = (vp_bend - vp_boff) / bsize;
    553  5648       setje 	while (nblks--) {
    554  5648       setje 		error = dc_getblock(vp, vp_boff, bsize, &pp, seg, vp_baddr,
    555  5648       setje 		    rw, cr);
    556  5648       setje 		page_list_concat(&plist, &pp);
    557  5648       setje 		vp_boff += bsize;
    558  5648       setje 		vp_baddr += bsize;
    559  5648       setje 	}
    560  5648       setje 	if (!error)
    561  5648       setje 		pvn_plist_init(plist, pl, plsz, off, len, rw);
    562  5648       setje 	else
    563  5648       setje 		pvn_read_done(plist, B_ERROR);
    564  5648       setje 	return (error);
    565  5648       setje }
    566  5648       setje 
    567  5648       setje /*
    568  5648       setje  * This function should never be called. We need to have it to pass
    569  5648       setje  * it as an argument to other functions.
    570  5648       setje  */
    571  5648       setje /*ARGSUSED*/
    572  5648       setje static int
    573  5648       setje dc_putapage(struct vnode *vp, struct page *pp, u_offset_t *offp, size_t *lenp,
    574  5648       setje     int flags, struct cred *cr)
    575  5648       setje {
    576  5648       setje 	/* should never happen */
    577  5648       setje 	cmn_err(CE_PANIC, "dcfs: dc_putapage: dirty page");
    578  5648       setje 	/*NOTREACHED*/
    579  5648       setje 	return (0);
    580  5648       setje }
    581  5648       setje 
    582  5648       setje 
    583  5648       setje /*
    584  5648       setje  * The only flags we support are B_INVAL, B_FREE and B_DONTNEED.
    585  5648       setje  * B_INVAL is set by:
    586  5648       setje  *
    587  5648       setje  *	1) the MC_SYNC command of memcntl(2) to support the MS_INVALIDATE flag.
    588  5648       setje  *	2) the MC_ADVISE command of memcntl(2) with the MADV_DONTNEED advice
    589  5648       setje  *	   which translates to an MC_SYNC with the MS_INVALIDATE flag.
    590  5648       setje  *
    591  5648       setje  * The B_FREE (as well as the B_DONTNEED) flag is set when the
    592  5648       setje  * MADV_SEQUENTIAL advice has been used. VOP_PUTPAGE is invoked
    593  5648       setje  * from SEGVN to release pages behind a pagefault.
    594  5648       setje  */
    595  5648       setje /*ARGSUSED5*/
    596  5648       setje static int
    597  5648       setje dc_putpage(struct vnode *vp, offset_t off, size_t len, int flags,
    598  5648       setje     struct cred *cr, caller_context_t *ctp)
    599  5648       setje {
    600  5648       setje 	int error = 0;
    601  5648       setje 
    602  5648       setje 	if (vp->v_count == 0) {
    603  5648       setje 		panic("dcfs_putpage: bad v_count");
    604  5648       setje 		/*NOTREACHED*/
    605  5648       setje 	}
    606  5648       setje 
    607  5648       setje 	if (vp->v_flag & VNOMAP)
    608  5648       setje 		return (ENOSYS);
    609  5648       setje 
    610  5648       setje 	if (!vn_has_cached_data(vp))	/* no pages mapped */
    611  5648       setje 		return (0);
    612  5648       setje 
    613  5648       setje 	if (len == 0)		/* from 'off' to EOF */
    614  5648       setje 		error = pvn_vplist_dirty(vp, off, dc_putapage, flags, cr);
    615  5648       setje 	else {
    616  5648       setje 		offset_t io_off;
    617  5648       setje 		se_t se = (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED;
    618  5648       setje 
    619  5648       setje 		for (io_off = off; io_off < off + len; io_off += PAGESIZE) {
    620  5648       setje 			page_t *pp;
    621  5648       setje 
    622  5648       setje 			/*
    623  5648       setje 			 * We insist on getting the page only if we are
    624  5648       setje 			 * about to invalidate, free or write it and
    625  5648       setje 			 * the B_ASYNC flag is not set.
    626  5648       setje 			 */
    627  5648       setje 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0))
    628  5648       setje 				pp = page_lookup(vp, io_off, se);
    629  5648       setje 			else
    630  5648       setje 				pp = page_lookup_nowait(vp, io_off, se);
    631  5648       setje 
    632  5648       setje 			if (pp == NULL)
    633  5648       setje 				continue;
    634  5648       setje 			/*
    635  5648       setje 			 * Normally pvn_getdirty() should return 0, which
    636  5648       setje 			 * impies that it has done the job for us.
    637  5648       setje 			 * The shouldn't-happen scenario is when it returns 1.
    638  5648       setje 			 * This means that the page has been modified and
    639  5648       setje 			 * needs to be put back.
    640  5648       setje 			 * Since we can't write to a dcfs compressed file,
    641  5648       setje 			 * we fake a failed I/O and force pvn_write_done()
    642  5648       setje 			 * to destroy the page.
    643  5648       setje 			 */
    644  5648       setje 			if (pvn_getdirty(pp, flags) == 1) {
    645  5648       setje 				cmn_err(CE_NOTE, "dc_putpage: dirty page");
    646  5648       setje 				pvn_write_done(pp, flags |
    647  5648       setje 				    B_ERROR | B_WRITE | B_INVAL | B_FORCE);
    648  5648       setje 			}
    649  5648       setje 		}
    650  5648       setje 	}
    651  5648       setje 	return (error);
    652  5648       setje }
    653  5648       setje 
    654  5648       setje static int
    655  5648       setje dc_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
    656  5648       setje     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
    657  5648       setje     struct cred *cred, caller_context_t *ctp)
    658  5648       setje {
    659  5648       setje 	struct vattr vattr;
    660  5648       setje 	struct segvn_crargs vn_a;
    661  5648       setje 	int error;
    662  5648       setje 
    663  5648       setje 	if (vp->v_flag & VNOMAP)
    664  5648       setje 		return (ENOSYS);
    665  5648       setje 
    666  5648       setje 	if (off < (offset_t)0 || (offset_t)(off + len) < (offset_t)0)
    667  5648       setje 		return (ENXIO);
    668  5648       setje 
    669  5648       setje 	/*
    670  5648       setje 	 * If file is being locked, disallow mapping.
    671  5648       setje 	 */
    672  5648       setje 	if (error = VOP_GETATTR(VTODC(vp)->dc_subvp, &vattr, 0, cred, ctp))
    673  5648       setje 		return (error);
    674  5648       setje 	if (vn_has_mandatory_locks(vp, vattr.va_mode))
    675  5648       setje 		return (EAGAIN);
    676  5648       setje 
    677  5648       setje 	as_rangelock(as);
    678  5648       setje 
    679  5648       setje 	if ((flags & MAP_FIXED) == 0) {
    680  5648       setje 		map_addr(addrp, len, off, 1, flags);
    681  5648       setje 		if (*addrp == NULL) {
    682  5648       setje 			as_rangeunlock(as);
    683  5648       setje 			return (ENOMEM);
    684  5648       setje 		}
    685  5648       setje 	} else {
    686  5648       setje 		/*
    687  5648       setje 		 * User specified address - blow away any previous mappings
    688  5648       setje 		 */
    689  5648       setje 		(void) as_unmap(as, *addrp, len);
    690  5648       setje 	}
    691  5648       setje 
    692  5648       setje 	vn_a.vp = vp;
    693  5648       setje 	vn_a.offset = off;
    694  5648       setje 	vn_a.type = flags & MAP_TYPE;
    695  5648       setje 	vn_a.prot = prot;
    696  5648       setje 	vn_a.maxprot = maxprot;
    697  5648       setje 	vn_a.flags = flags & ~MAP_TYPE;
    698  5648       setje 	vn_a.cred = cred;
    699  5648       setje 	vn_a.amp = NULL;
    700  5648       setje 	vn_a.szc = 0;
    701  5648       setje 	vn_a.lgrp_mem_policy_flags = 0;
    702  5648       setje 
    703  5648       setje 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
    704  5648       setje 	as_rangeunlock(as);
    705  5648       setje 	return (error);
    706  5648       setje }
    707  5648       setje 
    708  5648       setje /*ARGSUSED*/
    709  5648       setje static int
    710  5648       setje dc_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
    711  5648       setje     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
    712  5648       setje     struct cred *cr, caller_context_t *ctp)
    713  5648       setje {
    714  5648       setje 	struct dcnode *dp;
    715  5648       setje 
    716  5648       setje 	if (vp->v_flag & VNOMAP)
    717  5648       setje 		return (ENOSYS);
    718  5648       setje 
    719  5648       setje 	dp = VTODC(vp);
    720  5648       setje 	mutex_enter(&dp->dc_lock);
    721  5648       setje 	dp->dc_mapcnt += btopr(len);
    722  5648       setje 	mutex_exit(&dp->dc_lock);
    723  5648       setje 	return (0);
    724  5648       setje }
    725  5648       setje 
    726  5648       setje /*ARGSUSED*/
    727  5648       setje static int
    728  5648       setje dc_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
    729  5648       setje     size_t len, uint_t prot, uint_t maxprot, uint_t flags,
    730  5648       setje     struct cred *cr, caller_context_t *ctp)
    731  5648       setje {
    732  5648       setje 	struct dcnode *dp;
    733  5648       setje 
    734  5648       setje 	if (vp->v_flag & VNOMAP)
    735  5648       setje 		return (ENOSYS);
    736  5648       setje 
    737  5648       setje 	dp = VTODC(vp);
    738  5648       setje 	mutex_enter(&dp->dc_lock);
    739  5648       setje 	dp->dc_mapcnt -= btopr(len);
    740  5648       setje 	ASSERT(dp->dc_mapcnt >= 0);
    741  5648       setje 	mutex_exit(&dp->dc_lock);
    742  5648       setje 	return (0);
    743  5648       setje }
    744  5648       setje 
    745  5648       setje /*
    746  5648       setje  * Constructor/destructor routines for dcnodes
    747  5648       setje  */
    748  5648       setje /*ARGSUSED1*/
    749  5648       setje static int
    750  5648       setje dcnode_constructor(void *buf, void *cdrarg, int kmflags)
    751  5648       setje {
    752  5648       setje 	struct dcnode *dp = buf;
    753  5648       setje 	struct vnode *vp;
    754  5648       setje 
    755  6754       tomee 	vp = dp->dc_vp = vn_alloc(kmflags);
    756  6754       tomee 	if (vp == NULL) {
    757  6754       tomee 		return (-1);
    758  6754       tomee 	}
    759  6754       tomee 	vp->v_data = dp;
    760  5648       setje 	vp->v_type = VREG;
    761  5648       setje 	vp->v_flag = VNOSWAP;
    762  5648       setje 	vp->v_vfsp = &dc_vfs;
    763  5648       setje 	vn_setops(vp, dc_vnodeops);
    764  5648       setje 	vn_exists(vp);
    765  5648       setje 
    766  5648       setje 	mutex_init(&dp->dc_lock, NULL, MUTEX_DEFAULT, NULL);
    767  5648       setje 	dp->dc_mapcnt = 0;
    768  5648       setje 	dp->dc_lrunext = dp->dc_lruprev = NULL;
    769  6754       tomee 	dp->dc_hdr = NULL;
    770  6754       tomee 	dp->dc_subvp = NULL;
    771  5648       setje 	return (0);
    772  5648       setje }
    773  5648       setje 
    774  5648       setje /*ARGSUSED*/
    775  5648       setje static void
    776  5648       setje dcnode_destructor(void *buf, void *cdrarg)
    777  5648       setje {
    778  5648       setje 	struct dcnode *dp = buf;
    779  5648       setje 	struct vnode *vp = DCTOV(dp);
    780  5648       setje 
    781  5648       setje 	mutex_destroy(&dp->dc_lock);
    782  5648       setje 
    783  5648       setje 	VERIFY(dp->dc_hdr == NULL);
    784  5648       setje 	VERIFY(dp->dc_subvp == NULL);
    785  5648       setje 	vn_invalid(vp);
    786  5648       setje 	vn_free(vp);
    787  5648       setje }
    788  5648       setje 
    789  5648       setje static struct dcnode *
    790  5648       setje dcnode_alloc(void)
    791  5648       setje {
    792  5648       setje 	struct dcnode *dp;
    793  5648       setje 
    794  5648       setje 	/*
    795  5648       setje 	 * If the free list is above DCLRUSIZE
    796  5648       setje 	 * re-use one from it
    797  5648       setje 	 */
    798  5648       setje 	mutex_enter(&dctable_lock);
    799  5648       setje 	if (dclru_len < DCLRUSIZE) {
    800  5648       setje 		mutex_exit(&dctable_lock);
    801  5648       setje 		dp = kmem_cache_alloc(dcnode_cache, KM_SLEEP);
    802  5648       setje 	} else {
    803  5648       setje 		ASSERT(dclru != NULL);
    804  5648       setje 		dp = dclru;
    805  5648       setje 		dclru_sub(dp);
    806  5648       setje 		dcdelete(dp);
    807  5648       setje 		mutex_exit(&dctable_lock);
    808  5648       setje 		dcnode_recycle(dp);
    809  5648       setje 	}
    810  5648       setje 	return (dp);
    811  5648       setje }
    812  5648       setje 
    813  5648       setje static void
    814  5648       setje dcnode_free(struct dcnode *dp)
    815  5648       setje {
    816  5648       setje 	struct vnode *vp = DCTOV(dp);
    817  5648       setje 
    818  5648       setje 	ASSERT(MUTEX_HELD(&dctable_lock));
    819  5648       setje 
    820  5648       setje 	/*
    821  5648       setje 	 * If no cached pages, no need to put it on lru
    822  5648       setje 	 */
    823  5648       setje 	if (!vn_has_cached_data(vp)) {
    824  5648       setje 		dcdelete(dp);
    825  5648       setje 		dcnode_recycle(dp);
    826  5648       setje 		kmem_cache_free(dcnode_cache, dp);
    827  5648       setje 		return;
    828  5648       setje 	}
    829  5648       setje 
    830  5648       setje 	/*
    831  5648       setje 	 * Add to lru, if it's over the limit, free from head
    832  5648       setje 	 */
    833  5648       setje 	dclru_add(dp);
    834  5648       setje 	if (dclru_len > DCLRUSIZE) {
    835  5648       setje 		dp = dclru;
    836  5648       setje 		dclru_sub(dp);
    837  5648       setje 		dcdelete(dp);
    838  5648       setje 		dcnode_recycle(dp);
    839  5648       setje 		kmem_cache_free(dcnode_cache, dp);
    840  5648       setje 	}
    841  5648       setje }
    842  5648       setje 
    843  5648       setje static void
    844  5648       setje dcnode_recycle(struct dcnode *dp)
    845  5648       setje {
    846  5648       setje 	struct vnode *vp;
    847  5648       setje 
    848  5648       setje 	vp = DCTOV(dp);
    849  5648       setje 
    850  5648       setje 	VN_RELE(dp->dc_subvp);
    851  5648       setje 	dp->dc_subvp = NULL;
    852  5648       setje 	(void) pvn_vplist_dirty(vp, 0, dc_putapage, B_INVAL, NULL);
    853  5648       setje 	kmem_free(dp->dc_hdr, dp->dc_hdrsize);
    854  5648       setje 	dp->dc_hdr = NULL;
    855  5648       setje 	dp->dc_hdrsize = dp->dc_zmax = 0;
    856  5648       setje 	dp->dc_bufcache = NULL;
    857  5648       setje 	dp->dc_mapcnt = 0;
    858  5648       setje 	vn_reinit(vp);
    859  5648       setje 	vp->v_type = VREG;
    860  5648       setje 	vp->v_flag = VNOSWAP;
    861  5648       setje 	vp->v_vfsp = &dc_vfs;
    862  5648       setje }
    863  5648       setje 
    864  5648       setje static int
    865  5648       setje dcinit(int fstype, char *name)
    866  5648       setje {
    867  5648       setje 	static const fs_operation_def_t dc_vfsops_template[] = {
    868  5648       setje 		NULL, NULL
    869  5648       setje 	};
    870  5648       setje 	int error;
    871  5648       setje 	major_t dev;
    872  5648       setje 
    873  5648       setje 	error = vfs_setfsops(fstype, dc_vfsops_template, &dc_vfsops);
    874  5648       setje 	if (error) {
    875  5648       setje 		cmn_err(CE_WARN, "dcinit: bad vfs ops template");
    876  5648       setje 		return (error);
    877  5648       setje 	}
    878  5648       setje 	VFS_INIT(&dc_vfs, dc_vfsops, NULL);
    879  5648       setje 	dc_vfs.vfs_flag = VFS_RDONLY;
    880  5648       setje 	dc_vfs.vfs_fstype = fstype;
    881  5648       setje 	if ((dev = getudev()) == (major_t)-1)
    882  5648       setje 		dev = 0;
    883  5648       setje 	dcdev = makedevice(dev, 0);
    884  5648       setje 	dc_vfs.vfs_dev = dcdev;
    885  5648       setje 
    886  5648       setje 	error = vn_make_ops(name, dc_vnodeops_template, &dc_vnodeops);
    887  5648       setje 	if (error != 0) {
    888  5648       setje 		(void) vfs_freevfsops_by_type(fstype);
    889  5648       setje 		cmn_err(CE_WARN, "dcinit: bad vnode ops template");
    890  5648       setje 		return (error);
    891  5648       setje 	}
    892  5648       setje 
    893  5648       setje 	mutex_init(&dctable_lock, NULL, MUTEX_DEFAULT, NULL);
    894  5648       setje 	mutex_init(&dccache_lock, NULL, MUTEX_DEFAULT, NULL);
    895  5648       setje 	dcnode_cache = kmem_cache_create("dcnode_cache", sizeof (struct dcnode),
    896  5648       setje 	    0, dcnode_constructor, dcnode_destructor, NULL, NULL, NULL, 0);
    897  5648       setje 
    898  5648       setje 	return (0);
    899  5648       setje }
    900  5648       setje 
    901  5648       setje /*
    902  5648       setje  * Return shadow vnode with the given vp as its subordinate
    903  5648       setje  */
    904  5648       setje struct vnode *
    905  5648       setje decompvp(struct vnode *vp, cred_t *cred, caller_context_t *ctp)
    906  5648       setje {
    907  5648       setje 	struct dcnode *dp, *ndp;
    908  5648       setje 	struct comphdr thdr, *hdr;
    909  5648       setje 	struct kmem_cache **cpp;
    910  5648       setje 	struct vattr vattr;
    911  5648       setje 	size_t hdrsize, bsize;
    912  5648       setje 	int error;
    913  5648       setje 
    914  5648       setje 	/*
    915  5648       setje 	 * See if we have an existing shadow
    916  5648       setje 	 * If none, we have to manufacture one
    917  5648       setje 	 */
    918  5648       setje 	mutex_enter(&dctable_lock);
    919  5648       setje 	dp = dcfind(vp);
    920  5648       setje 	mutex_exit(&dctable_lock);
    921  5648       setje 	if (dp != NULL)
    922  5648       setje 		return (DCTOV(dp));
    923  5648       setje 
    924  5648       setje 	/*
    925  5648       setje 	 * Make sure it's a valid compressed file
    926  5648       setje 	 */
    927  5648       setje 	hdr = &thdr;
    928  5648       setje 	error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, sizeof (struct comphdr), 0,
    929  5648       setje 	    UIO_SYSSPACE, 0, 0, cred, NULL);
    930  7858  Krishnendu 	if (error || hdr->ch_magic != CH_MAGIC_ZLIB ||
    931  5648       setje 	    hdr->ch_version != CH_VERSION || hdr->ch_algorithm != CH_ALG_ZLIB ||
    932  5648       setje 	    hdr->ch_fsize == 0 || hdr->ch_blksize < PAGESIZE ||
    933  5648       setje 	    hdr->ch_blksize > ptob(DCCACHESIZE) ||
    934  5648       setje 	    (hdr->ch_blksize & (hdr->ch_blksize - 1)) != 0)
    935  5648       setje 		return (NULL);
    936  5648       setje 
    937  5648       setje 	/* get underlying file size */
    938  5648       setje 	if (VOP_GETATTR(vp, &vattr, 0, cred, ctp) != 0)
    939  5648       setje 		return (NULL);
    940  5648       setje 
    941  5648       setje 	/*
    942  5648       setje 	 * Re-read entire header
    943  5648       setje 	 */
    944  5648       setje 	hdrsize = hdr->ch_blkmap[0] + sizeof (uint64_t);
    945  5648       setje 	hdr = kmem_alloc(hdrsize, KM_SLEEP);
    946  5648       setje 	error = vn_rdwr(UIO_READ, vp, (caddr_t)hdr, hdrsize, 0, UIO_SYSSPACE,
    947  5648       setje 	    0, 0, cred, NULL);
    948  5648       setje 	if (error) {
    949  5648       setje 		kmem_free(hdr, hdrsize);
    950  5648       setje 		return (NULL);
    951  5648       setje 	}
    952  5648       setje 
    953  5648       setje 	/*
    954  5648       setje 	 * add extra blkmap entry to make dc_getblock()'s
    955  5648       setje 	 * life easier
    956  5648       setje 	 */
    957  5648       setje 	bsize = hdr->ch_blksize;
    958  5648       setje 	hdr->ch_blkmap[((hdr->ch_fsize-1) / bsize) + 1] = vattr.va_size;
    959  5648       setje 
    960  5648       setje 	ndp = dcnode_alloc();
    961  5648       setje 	ndp->dc_subvp = vp;
    962  5648       setje 	VN_HOLD(vp);
    963  5648       setje 	ndp->dc_hdr = hdr;
    964  5648       setje 	ndp->dc_hdrsize = hdrsize;
    965  5648       setje 
    966  5648       setje 	/*
    967  5648       setje 	 * Allocate kmem cache if none there already
    968  5648       setje 	 */
    969  5648       setje 	ndp->dc_zmax = ZMAXBUF(bsize);
    970  5648       setje 	cpp = &dcbuf_cache[btop(bsize)];
    971  5648       setje 	mutex_enter(&dccache_lock);
    972  5648       setje 	if (*cpp == NULL)
    973  5648       setje 		*cpp = kmem_cache_create("dcbuf_cache", ndp->dc_zmax, 0, NULL,
    974  5648       setje 		    NULL, NULL, NULL, NULL, 0);
    975  5648       setje 	mutex_exit(&dccache_lock);
    976  5648       setje 	ndp->dc_bufcache = *cpp;
    977  5648       setje 
    978  5648       setje 	/*
    979  5648       setje 	 * Recheck table in case someone else created shadow
    980  5648       setje 	 * while we were blocked above.
    981  5648       setje 	 */
    982  5648       setje 	mutex_enter(&dctable_lock);
    983  5648       setje 	dp = dcfind(vp);
    984  5648       setje 	if (dp != NULL) {
    985  5648       setje 		mutex_exit(&dctable_lock);
    986  5648       setje 		dcnode_recycle(ndp);
    987  5648       setje 		kmem_cache_free(dcnode_cache, ndp);
    988  5648       setje 		return (DCTOV(dp));
    989  5648       setje 	}
    990  5648       setje 	dcinsert(ndp);
    991  5648       setje 	mutex_exit(&dctable_lock);
    992  5648       setje 
    993  5648       setje 	return (DCTOV(ndp));
    994  5648       setje }
    995  5648       setje 
    996  5648       setje 
    997  5648       setje /*
    998  5648       setje  * dcnode lookup table
    999  5648       setje  * These routines maintain a table of dcnodes hashed by their
   1000  5648       setje  * subordinate vnode so that they can be found if they already
   1001  5648       setje  * exist in the vnode cache
   1002  5648       setje  */
   1003  5648       setje 
   1004  5648       setje /*
   1005  5648       setje  * Put a dcnode in the table.
   1006  5648       setje  */
   1007  5648       setje static void
   1008  5648       setje dcinsert(struct dcnode *newdp)
   1009  5648       setje {
   1010  5648       setje 	int idx = DCHASH(newdp->dc_subvp);
   1011  5648       setje 
   1012  5648       setje 	ASSERT(MUTEX_HELD(&dctable_lock));
   1013  5648       setje 	newdp->dc_hash = dctable[idx];
   1014  5648       setje 	dctable[idx] = newdp;
   1015  5648       setje }
   1016  5648       setje 
   1017  5648       setje /*
   1018  5648       setje  * Remove a dcnode from the hash table.
   1019  5648       setje  */
   1020  5648       setje void
   1021  5648       setje dcdelete(struct dcnode *deldp)
   1022  5648       setje {
   1023  5648       setje 	int idx = DCHASH(deldp->dc_subvp);
   1024  5648       setje 	struct dcnode *dp, *prevdp;
   1025  5648       setje 
   1026  5648       setje 	ASSERT(MUTEX_HELD(&dctable_lock));
   1027  5648       setje 	dp = dctable[idx];
   1028  5648       setje 	if (dp == deldp)
   1029  5648       setje 		dctable[idx] = dp->dc_hash;
   1030  5648       setje 	else {
   1031  5648       setje 		for (prevdp = dp, dp = dp->dc_hash; dp != NULL;
   1032  5648       setje 		    prevdp = dp, dp = dp->dc_hash) {
   1033  5648       setje 			if (dp == deldp) {
   1034  5648       setje 				prevdp->dc_hash = dp->dc_hash;
   1035  5648       setje 				break;
   1036  5648       setje 			}
   1037  5648       setje 		}
   1038  5648       setje 	}
   1039  5648       setje 	ASSERT(dp != NULL);
   1040  5648       setje }
   1041  5648       setje 
   1042  5648       setje /*
   1043  5648       setje  * Find a shadow vnode in the dctable hash list.
   1044  5648       setje  */
   1045  5648       setje static struct dcnode *
   1046  5648       setje dcfind(struct vnode *vp)
   1047  5648       setje {
   1048  5648       setje 	struct dcnode *dp;
   1049  5648       setje 
   1050  5648       setje 	ASSERT(MUTEX_HELD(&dctable_lock));
   1051  5648       setje 	for (dp = dctable[DCHASH(vp)]; dp != NULL; dp = dp->dc_hash)
   1052  5648       setje 		if (dp->dc_subvp == vp) {
   1053  5648       setje 			VN_HOLD(DCTOV(dp));
   1054  5648       setje 			if (dp->dc_lrunext)
   1055  5648       setje 				dclru_sub(dp);
   1056  5648       setje 			return (dp);
   1057  5648       setje 		}
   1058  5648       setje 	return (NULL);
   1059  5648       setje }
   1060  5648       setje 
   1061  5648       setje #ifdef	DEBUG
   1062  5648       setje static int
   1063  5648       setje dclru_count(void)
   1064  5648       setje {
   1065  5648       setje 	struct dcnode *dp;
   1066  5648       setje 	int i = 0;
   1067  5648       setje 
   1068  5648       setje 	if (dclru == NULL)
   1069  5648       setje 		return (0);
   1070  5648       setje 	for (dp = dclru; dp->dc_lrunext != dclru; dp = dp->dc_lrunext)
   1071  5648       setje 		i++;
   1072  5648       setje 	return (i + 1);
   1073  5648       setje }
   1074  5648       setje #endif
   1075  5648       setje 
   1076  5648       setje static void
   1077  5648       setje dclru_add(struct dcnode *dp)
   1078  5648       setje {
   1079  5648       setje 	/*
   1080  5648       setje 	 * Add to dclru as double-link chain
   1081  5648       setje 	 */
   1082  5648       setje 	ASSERT(MUTEX_HELD(&dctable_lock));
   1083  5648       setje 	if (dclru == NULL) {
   1084  5648       setje 		dclru = dp;
   1085  5648       setje 		dp->dc_lruprev = dp->dc_lrunext = dp;
   1086  5648       setje 	} else {
   1087  5648       setje 		struct dcnode *last = dclru->dc_lruprev;
   1088  5648       setje 
   1089  5648       setje 		dclru->dc_lruprev = dp;
   1090  5648       setje 		last->dc_lrunext = dp;
   1091  5648       setje 		dp->dc_lruprev = last;
   1092  5648       setje 		dp->dc_lrunext = dclru;
   1093  5648       setje 	}
   1094  5648       setje 	dclru_len++;
   1095  5648       setje 	ASSERT(dclru_len == dclru_count());
   1096  5648       setje }
   1097  5648       setje 
   1098  5648       setje static void
   1099  5648       setje dclru_sub(struct dcnode *dp)
   1100  5648       setje {
   1101  5648       setje 	ASSERT(MUTEX_HELD(&dctable_lock));
   1102  5648       setje 	dp->dc_lrunext->dc_lruprev = dp->dc_lruprev;
   1103  5648       setje 	dp->dc_lruprev->dc_lrunext = dp->dc_lrunext;
   1104  5648       setje 	if (dp == dclru)
   1105  5648       setje 		dclru = dp->dc_lrunext == dp ? NULL : dp->dc_lrunext;
   1106  5648       setje 	dp->dc_lrunext = dp->dc_lruprev = NULL;
   1107  5648       setje 	dclru_len--;
   1108  5648       setje 	ASSERT(dclru_len == dclru_count());
   1109  5648       setje }
   1110