Home | History | Annotate | Download | only in zfs
      1    789    ahrens /*
      2    789    ahrens  * CDDL HEADER START
      3    789    ahrens  *
      4    789    ahrens  * The contents of this file are subject to the terms of the
      5   2597  nd150628  * Common Development and Distribution License (the "License").
      6   2597  nd150628  * You may not use this file except in compliance with the License.
      7    789    ahrens  *
      8    789    ahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    789    ahrens  * or http://www.opensolaris.org/os/licensing.
     10    789    ahrens  * See the License for the specific language governing permissions
     11    789    ahrens  * and limitations under the License.
     12    789    ahrens  *
     13    789    ahrens  * When distributing Covered Code, include this CDDL HEADER in each
     14    789    ahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    789    ahrens  * If applicable, add the following below this CDDL HEADER, with the
     16    789    ahrens  * fields enclosed by brackets "[]" replaced with your own identifying
     17    789    ahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    789    ahrens  *
     19    789    ahrens  * CDDL HEADER END
     20    789    ahrens  */
     21   3886       ahl 
     22    789    ahrens /*
     23  10922      Jeff  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24    789    ahrens  * Use is subject to license terms.
     25    789    ahrens  */
     26    789    ahrens 
     27    789    ahrens /*
     28   2597  nd150628  * We keep our own copy of this algorithm for 2 main reasons:
     29  10922      Jeff  *	1. If we didn't, anyone modifying common/os/compress.c would
     30   2597  nd150628  *         directly break our on disk format
     31  10922      Jeff  *	2. Our version of lzjb does not have a number of checks that the
     32   2597  nd150628  *         common/os version needs and uses
     33  10922      Jeff  *	3. We initialize the lempel to ensure deterministic results,
     34  10922      Jeff  *	   so that identical blocks can always be deduplicated.
     35    789    ahrens  * In particular, we are adding the "feature" that compress() can
     36    789    ahrens  * take a destination buffer size and return -1 if the data will not
     37    789    ahrens  * compress to d_len or less.
     38    789    ahrens  */
     39    789    ahrens 
     40    789    ahrens #include <sys/types.h>
     41    789    ahrens 
     42    789    ahrens #define	MATCH_BITS	6
     43    789    ahrens #define	MATCH_MIN	3
     44    789    ahrens #define	MATCH_MAX	((1 << MATCH_BITS) + (MATCH_MIN - 1))
     45    789    ahrens #define	OFFSET_MASK	((1 << (16 - MATCH_BITS)) - 1)
     46  10922      Jeff #define	LEMPEL_SIZE	1024
     47    789    ahrens 
     48   3886       ahl /*ARGSUSED*/
     49    789    ahrens size_t
     50   3886       ahl lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
     51    789    ahrens {
     52    789    ahrens 	uchar_t *src = s_start;
     53    789    ahrens 	uchar_t *dst = d_start;
     54    789    ahrens 	uchar_t *cpy, *copymap;
     55    789    ahrens 	int copymask = 1 << (NBBY - 1);
     56  10922      Jeff 	int mlen, offset, hash;
     57    789    ahrens 	uint16_t *hp;
     58  10922      Jeff 	uint16_t lempel[LEMPEL_SIZE] = { 0 };
     59    789    ahrens 
     60    789    ahrens 	while (src < (uchar_t *)s_start + s_len) {
     61    789    ahrens 		if ((copymask <<= 1) == (1 << NBBY)) {
     62  10922      Jeff 			if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY)
     63    789    ahrens 				return (s_len);
     64    789    ahrens 			copymask = 1;
     65    789    ahrens 			copymap = dst;
     66    789    ahrens 			*dst++ = 0;
     67    789    ahrens 		}
     68    789    ahrens 		if (src > (uchar_t *)s_start + s_len - MATCH_MAX) {
     69    789    ahrens 			*dst++ = *src++;
     70    789    ahrens 			continue;
     71    789    ahrens 		}
     72  10922      Jeff 		hash = (src[0] << 16) + (src[1] << 8) + src[2];
     73  10922      Jeff 		hash += hash >> 9;
     74  10922      Jeff 		hash += hash >> 5;
     75  10922      Jeff 		hp = &lempel[hash & (LEMPEL_SIZE - 1)];
     76    789    ahrens 		offset = (intptr_t)(src - *hp) & OFFSET_MASK;
     77    789    ahrens 		*hp = (uint16_t)(uintptr_t)src;
     78    789    ahrens 		cpy = src - offset;
     79    789    ahrens 		if (cpy >= (uchar_t *)s_start && cpy != src &&
     80    789    ahrens 		    src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) {
     81    789    ahrens 			*copymap |= copymask;
     82    789    ahrens 			for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++)
     83    789    ahrens 				if (src[mlen] != cpy[mlen])
     84    789    ahrens 					break;
     85    789    ahrens 			*dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) |
     86    789    ahrens 			    (offset >> NBBY);
     87    789    ahrens 			*dst++ = (uchar_t)offset;
     88    789    ahrens 			src += mlen;
     89    789    ahrens 		} else {
     90    789    ahrens 			*dst++ = *src++;
     91    789    ahrens 		}
     92    789    ahrens 	}
     93    789    ahrens 	return (dst - (uchar_t *)d_start);
     94    789    ahrens }
     95    789    ahrens 
     96    789    ahrens /*ARGSUSED*/
     97    789    ahrens int
     98   3886       ahl lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
     99    789    ahrens {
    100    789    ahrens 	uchar_t *src = s_start;
    101    789    ahrens 	uchar_t *dst = d_start;
    102    789    ahrens 	uchar_t *d_end = (uchar_t *)d_start + d_len;
    103    789    ahrens 	uchar_t *cpy, copymap;
    104    789    ahrens 	int copymask = 1 << (NBBY - 1);
    105    789    ahrens 
    106    789    ahrens 	while (dst < d_end) {
    107    789    ahrens 		if ((copymask <<= 1) == (1 << NBBY)) {
    108    789    ahrens 			copymask = 1;
    109    789    ahrens 			copymap = *src++;
    110    789    ahrens 		}
    111    789    ahrens 		if (copymap & copymask) {
    112    789    ahrens 			int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN;
    113    789    ahrens 			int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK;
    114    789    ahrens 			src += 2;
    115    789    ahrens 			if ((cpy = dst - offset) < (uchar_t *)d_start)
    116    789    ahrens 				return (-1);
    117    789    ahrens 			while (--mlen >= 0 && dst < d_end)
    118    789    ahrens 				*dst++ = *cpy++;
    119    789    ahrens 		} else {
    120    789    ahrens 			*dst++ = *src++;
    121    789    ahrens 		}
    122    789    ahrens 	}
    123    789    ahrens 	return (0);
    124    789    ahrens }
    125