Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <assert.h>
     28 #include <ctype.h>
     29 #include <errno.h>
     30 #include <libintl.h>
     31 #include <stdio.h>
     32 #include <stdlib.h>
     33 #include <strings.h>
     34 #include <unistd.h>
     35 #include <stddef.h>
     36 #include <fcntl.h>
     37 #include <sys/mount.h>
     38 #include <pthread.h>
     39 #include <umem.h>
     40 
     41 #include <libzfs.h>
     42 
     43 #include "zfs_namecheck.h"
     44 #include "zfs_prop.h"
     45 #include "zfs_fletcher.h"
     46 #include "libzfs_impl.h"
     47 #include <sha2.h>
     48 
     49 /* in libzfs_dataset.c */
     50 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
     51 
     52 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
     53     int, avl_tree_t *, char **);
     54 
     55 static const zio_cksum_t zero_cksum = { 0 };
     56 
     57 typedef struct dedup_arg {
     58 	int	inputfd;
     59 	int	outputfd;
     60 	libzfs_handle_t  *dedup_hdl;
     61 } dedup_arg_t;
     62 
     63 typedef struct dataref {
     64 	uint64_t ref_guid;
     65 	uint64_t ref_object;
     66 	uint64_t ref_offset;
     67 } dataref_t;
     68 
     69 typedef struct dedup_entry {
     70 	struct dedup_entry	*dde_next;
     71 	zio_cksum_t dde_chksum;
     72 	dataref_t dde_ref;
     73 } dedup_entry_t;
     74 
     75 #define	MAX_DDT_PHYSMEM_PERCENT		20
     76 #define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
     77 
     78 typedef struct dedup_table {
     79 	dedup_entry_t	**dedup_hash_array;
     80 	umem_cache_t	*ddecache;
     81 	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
     82 	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
     83 	uint64_t	ddt_count;
     84 	int		numhashbits;
     85 	boolean_t	ddt_full;
     86 } dedup_table_t;
     87 
     88 static int
     89 high_order_bit(uint64_t n)
     90 {
     91 	int count;
     92 
     93 	for (count = 0; n != 0; count++)
     94 		n >>= 1;
     95 	return (count);
     96 }
     97 
     98 static size_t
     99 ssread(void *buf, size_t len, FILE *stream)
    100 {
    101 	size_t outlen;
    102 
    103 	if ((outlen = fread(buf, len, 1, stream)) == 0)
    104 		return (0);
    105 
    106 	return (outlen);
    107 }
    108 
    109 static void
    110 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
    111     zio_cksum_t *cs, dataref_t *dr)
    112 {
    113 	dedup_entry_t	*dde;
    114 
    115 	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
    116 		if (ddt->ddt_full == B_FALSE) {
    117 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
    118 			    "Dedup table full.  Deduplication will continue "
    119 			    "with existing table entries"));
    120 			ddt->ddt_full = B_TRUE;
    121 		}
    122 		return;
    123 	}
    124 
    125 	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
    126 	    != NULL) {
    127 		assert(*ddepp == NULL);
    128 		dde->dde_next = NULL;
    129 		dde->dde_chksum = *cs;
    130 		dde->dde_ref = *dr;
    131 		*ddepp = dde;
    132 		ddt->cur_ddt_size += sizeof (dedup_entry_t);
    133 		ddt->ddt_count++;
    134 	}
    135 }
    136 
    137 /*
    138  * Using the specified dedup table, do a lookup for an entry with
    139  * the checksum cs.  If found, return the block's reference info
    140  * in *dr. Otherwise, insert a new entry in the dedup table, using
    141  * the reference information specified by *dr.
    142  *
    143  * return value:  true - entry was found
    144  *		  false - entry was not found
    145  */
    146 static boolean_t
    147 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
    148     dataref_t *dr)
    149 {
    150 	uint32_t hashcode;
    151 	dedup_entry_t **ddepp;
    152 
    153 	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
    154 
    155 	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
    156 	    ddepp = &((*ddepp)->dde_next)) {
    157 		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs)) {
    158 			*dr = (*ddepp)->dde_ref;
    159 			return (B_TRUE);
    160 		}
    161 	}
    162 	ddt_hash_append(hdl, ddt, ddepp, cs, dr);
    163 	return (B_FALSE);
    164 }
    165 
    166 static int
    167 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
    168 {
    169 	fletcher_4_incremental_native(buf, len, zc);
    170 	return (write(outfd, buf, len));
    171 }
    172 
    173 /*
    174  * This function is started in a separate thread when the dedup option
    175  * has been requested.  The main send thread determines the list of
    176  * snapshots to be included in the send stream and makes the ioctl calls
    177  * for each one.  But instead of having the ioctl send the output to the
    178  * the output fd specified by the caller of zfs_send()), the
    179  * ioctl is told to direct the output to a pipe, which is read by the
    180  * alternate thread running THIS function.  This function does the
    181  * dedup'ing by:
    182  *  1. building a dedup table (the DDT)
    183  *  2. doing checksums on each data block and inserting a record in the DDT
    184  *  3. looking for matching checksums, and
    185  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
    186  *      a duplicate block is found.
    187  * The output of this function then goes to the output fd requested
    188  * by the caller of zfs_send().
    189  */
    190 static void *
    191 cksummer(void *arg)
    192 {
    193 	dedup_arg_t *dda = arg;
    194 	char *buf = malloc(1<<20);
    195 	dmu_replay_record_t thedrr;
    196 	dmu_replay_record_t *drr = &thedrr;
    197 	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
    198 	struct drr_end *drre = &thedrr.drr_u.drr_end;
    199 	struct drr_object *drro = &thedrr.drr_u.drr_object;
    200 	struct drr_write *drrw = &thedrr.drr_u.drr_write;
    201 	FILE *ofp;
    202 	int outfd;
    203 	dmu_replay_record_t wbr_drr;
    204 	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
    205 	dedup_table_t ddt;
    206 	zio_cksum_t stream_cksum;
    207 	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
    208 	uint64_t numbuckets;
    209 
    210 	ddt.max_ddt_size =
    211 	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
    212 	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
    213 
    214 	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
    215 
    216 	/*
    217 	 * numbuckets must be a power of 2.  Increase number to
    218 	 * a power of 2 if necessary.
    219 	 */
    220 	if (!ISP2(numbuckets))
    221 		numbuckets = 1 << high_order_bit(numbuckets);
    222 
    223 	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
    224 	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
    225 	    NULL, NULL, NULL, NULL, NULL, 0);
    226 	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
    227 	ddt.numhashbits = high_order_bit(numbuckets) - 1;
    228 	ddt.ddt_full = B_FALSE;
    229 
    230 	/* Initialize the write-by-reference block. */
    231 	wbr_drr.drr_type = DRR_WRITE_BYREF;
    232 	wbr_drr.drr_payloadlen = 0;
    233 
    234 	outfd = dda->outputfd;
    235 	ofp = fdopen(dda->inputfd, "r");
    236 	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
    237 
    238 		switch (drr->drr_type) {
    239 		case DRR_BEGIN:
    240 		{
    241 			int	fflags;
    242 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
    243 
    244 			/* set the DEDUP feature flag for this stream */
    245 			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
    246 			fflags |= DMU_BACKUP_FEATURE_DEDUP;
    247 			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
    248 
    249 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
    250 			    &stream_cksum, outfd) == -1)
    251 				goto out;
    252 			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
    253 			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
    254 				int sz = drr->drr_payloadlen;
    255 
    256 				if (sz > 1<<20) {
    257 					free(buf);
    258 					buf = malloc(sz);
    259 				}
    260 				(void) ssread(buf, sz, ofp);
    261 				if (ferror(stdin))
    262 					perror("fread");
    263 				if (cksum_and_write(buf, sz, &stream_cksum,
    264 				    outfd) == -1)
    265 					goto out;
    266 			}
    267 			break;
    268 		}
    269 
    270 		case DRR_END:
    271 		{
    272 			/* use the recalculated checksum */
    273 			ZIO_SET_CHECKSUM(&drre->drr_checksum,
    274 			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
    275 			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
    276 			if ((write(outfd, drr,
    277 			    sizeof (dmu_replay_record_t))) == -1)
    278 				goto out;
    279 			break;
    280 		}
    281 
    282 		case DRR_OBJECT:
    283 		{
    284 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
    285 			    &stream_cksum, outfd) == -1)
    286 				goto out;
    287 			if (drro->drr_bonuslen > 0) {
    288 				(void) ssread(buf,
    289 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
    290 				    ofp);
    291 				if (cksum_and_write(buf,
    292 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
    293 				    &stream_cksum, outfd) == -1)
    294 					goto out;
    295 			}
    296 			break;
    297 		}
    298 
    299 		case DRR_FREEOBJECTS:
    300 		{
    301 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
    302 			    &stream_cksum, outfd) == -1)
    303 				goto out;
    304 			break;
    305 		}
    306 
    307 		case DRR_WRITE:
    308 		{
    309 			dataref_t	dataref;
    310 
    311 			(void) ssread(buf, drrw->drr_length, ofp);
    312 			/*
    313 			 * If the block doesn't already have a dedup
    314 			 * checksum, calculate one.
    315 			 */
    316 			if (ZIO_CHECKSUM_EQUAL(drrw->drr_blkcksum,
    317 			    zero_cksum)) {
    318 				SHA256_CTX	ctx;
    319 				zio_cksum_t	tmpsha256;
    320 
    321 				SHA256Init(&ctx);
    322 				SHA256Update(&ctx, buf, drrw->drr_length);
    323 				SHA256Final(&tmpsha256, &ctx);
    324 				drrw->drr_blkcksum.zc_word[0] =
    325 				    BE_64(tmpsha256.zc_word[0]);
    326 				drrw->drr_blkcksum.zc_word[1] =
    327 				    BE_64(tmpsha256.zc_word[1]);
    328 				drrw->drr_blkcksum.zc_word[2] =
    329 				    BE_64(tmpsha256.zc_word[2]);
    330 				drrw->drr_blkcksum.zc_word[3] =
    331 				    BE_64(tmpsha256.zc_word[3]);
    332 			}
    333 
    334 			dataref.ref_guid = drrw->drr_toguid;
    335 			dataref.ref_object = drrw->drr_object;
    336 			dataref.ref_offset = drrw->drr_offset;
    337 
    338 			if (ddt_update(dda->dedup_hdl, &ddt,
    339 			    &drrw->drr_blkcksum, &dataref)) {
    340 				/* block already present in stream */
    341 				wbr_drrr->drr_object = drrw->drr_object;
    342 				wbr_drrr->drr_offset = drrw->drr_offset;
    343 				wbr_drrr->drr_length = drrw->drr_length;
    344 				wbr_drrr->drr_toguid = drrw->drr_toguid;
    345 				wbr_drrr->drr_refguid = dataref.ref_guid;
    346 				wbr_drrr->drr_refobject =
    347 				    dataref.ref_object;
    348 				wbr_drrr->drr_refoffset =
    349 				    dataref.ref_offset;
    350 
    351 				wbr_drrr->drr_blkcksum = drrw->drr_blkcksum;
    352 
    353 				if (cksum_and_write(&wbr_drr,
    354 				    sizeof (dmu_replay_record_t), &stream_cksum,
    355 				    outfd) == -1)
    356 					goto out;
    357 			} else {
    358 				/* block not previously seen */
    359 				if (cksum_and_write(drr,
    360 				    sizeof (dmu_replay_record_t), &stream_cksum,
    361 				    outfd) == -1)
    362 					goto out;
    363 				if (cksum_and_write(buf,
    364 				    drrw->drr_length,
    365 				    &stream_cksum, outfd) == -1)
    366 					goto out;
    367 			}
    368 			break;
    369 		}
    370 
    371 		case DRR_FREE:
    372 		{
    373 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
    374 			    &stream_cksum, outfd) == -1)
    375 				goto out;
    376 			break;
    377 		}
    378 
    379 		default:
    380 			(void) printf("INVALID record type 0x%x\n",
    381 			    drr->drr_type);
    382 			/* should never happen, so assert */
    383 			assert(B_FALSE);
    384 		}
    385 	}
    386 out:
    387 	umem_cache_destroy(ddt.ddecache);
    388 	free(ddt.dedup_hash_array);
    389 	free(buf);
    390 	(void) fclose(ofp);
    391 
    392 	return (NULL);
    393 }
    394 
    395 /*
    396  * Routines for dealing with the AVL tree of fs-nvlists
    397  */
    398 typedef struct fsavl_node {
    399 	avl_node_t fn_node;
    400 	nvlist_t *fn_nvfs;
    401 	char *fn_snapname;
    402 	uint64_t fn_guid;
    403 } fsavl_node_t;
    404 
    405 static int
    406 fsavl_compare(const void *arg1, const void *arg2)
    407 {
    408 	const fsavl_node_t *fn1 = arg1;
    409 	const fsavl_node_t *fn2 = arg2;
    410 
    411 	if (fn1->fn_guid > fn2->fn_guid)
    412 		return (+1);
    413 	else if (fn1->fn_guid < fn2->fn_guid)
    414 		return (-1);
    415 	else
    416 		return (0);
    417 }
    418 
    419 /*
    420  * Given the GUID of a snapshot, find its containing filesystem and
    421  * (optionally) name.
    422  */
    423 static nvlist_t *
    424 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
    425 {
    426 	fsavl_node_t fn_find;
    427 	fsavl_node_t *fn;
    428 
    429 	fn_find.fn_guid = snapguid;
    430 
    431 	fn = avl_find(avl, &fn_find, NULL);
    432 	if (fn) {
    433 		if (snapname)
    434 			*snapname = fn->fn_snapname;
    435 		return (fn->fn_nvfs);
    436 	}
    437 	return (NULL);
    438 }
    439 
    440 static void
    441 fsavl_destroy(avl_tree_t *avl)
    442 {
    443 	fsavl_node_t *fn;
    444 	void *cookie;
    445 
    446 	if (avl == NULL)
    447 		return;
    448 
    449 	cookie = NULL;
    450 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
    451 		free(fn);
    452 	avl_destroy(avl);
    453 	free(avl);
    454 }
    455 
    456 /*
    457  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
    458  */
    459 static avl_tree_t *
    460 fsavl_create(nvlist_t *fss)
    461 {
    462 	avl_tree_t *fsavl;
    463 	nvpair_t *fselem = NULL;
    464 
    465 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
    466 		return (NULL);
    467 
    468 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
    469 	    offsetof(fsavl_node_t, fn_node));
    470 
    471 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
    472 		nvlist_t *nvfs, *snaps;
    473 		nvpair_t *snapelem = NULL;
    474 
    475 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
    476 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
    477 
    478 		while ((snapelem =
    479 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
    480 			fsavl_node_t *fn;
    481 			uint64_t guid;
    482 
    483 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
    484 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
    485 				fsavl_destroy(fsavl);
    486 				return (NULL);
    487 			}
    488 			fn->fn_nvfs = nvfs;
    489 			fn->fn_snapname = nvpair_name(snapelem);
    490 			fn->fn_guid = guid;
    491 
    492 			/*
    493 			 * Note: if there are multiple snaps with the
    494 			 * same GUID, we ignore all but one.
    495 			 */
    496 			if (avl_find(fsavl, fn, NULL) == NULL)
    497 				avl_add(fsavl, fn);
    498 			else
    499 				free(fn);
    500 		}
    501 	}
    502 
    503 	return (fsavl);
    504 }
    505 
    506 /*
    507  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
    508  */
    509 typedef struct send_data {
    510 	uint64_t parent_fromsnap_guid;
    511 	nvlist_t *parent_snaps;
    512 	nvlist_t *fss;
    513 	nvlist_t *snapprops;
    514 	const char *fromsnap;
    515 	const char *tosnap;
    516 	boolean_t recursive;
    517 
    518 	/*
    519 	 * The header nvlist is of the following format:
    520 	 * {
    521 	 *   "tosnap" -> string
    522 	 *   "fromsnap" -> string (if incremental)
    523 	 *   "fss" -> {
    524 	 *	id -> {
    525 	 *
    526 	 *	 "name" -> string (full name; for debugging)
    527 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
    528 	 *
    529 	 *	 "props" -> { name -> value (only if set here) }
    530 	 *	 "snaps" -> { name (lastname) -> number (guid) }
    531 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
    532 	 *
    533 	 *	 "origin" -> number (guid) (if clone)
    534 	 *	 "sent" -> boolean (not on-disk)
    535 	 *	}
    536 	 *   }
    537 	 * }
    538 	 *
    539 	 */
    540 } send_data_t;
    541 
    542 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
    543 
    544 static int
    545 send_iterate_snap(zfs_handle_t *zhp, void *arg)
    546 {
    547 	send_data_t *sd = arg;
    548 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
    549 	char *snapname;
    550 	nvlist_t *nv;
    551 
    552 	snapname = strrchr(zhp->zfs_name, '@')+1;
    553 
    554 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
    555 	/*
    556 	 * NB: if there is no fromsnap here (it's a newly created fs in
    557 	 * an incremental replication), we will substitute the tosnap.
    558 	 */
    559 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
    560 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
    561 	    strcmp(snapname, sd->tosnap) == 0)) {
    562 		sd->parent_fromsnap_guid = guid;
    563 	}
    564 
    565 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
    566 	send_iterate_prop(zhp, nv);
    567 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
    568 	nvlist_free(nv);
    569 
    570 	zfs_close(zhp);
    571 	return (0);
    572 }
    573 
    574 static void
    575 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
    576 {
    577 	nvpair_t *elem = NULL;
    578 
    579 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
    580 		char *propname = nvpair_name(elem);
    581 		zfs_prop_t prop = zfs_name_to_prop(propname);
    582 		nvlist_t *propnv;
    583 
    584 		if (!zfs_prop_user(propname)) {
    585 			/*
    586 			 * Realistically, this should never happen.  However,
    587 			 * we want the ability to add DSL properties without
    588 			 * needing to make incompatible version changes.  We
    589 			 * need to ignore unknown properties to allow older
    590 			 * software to still send datasets containing these
    591 			 * properties, with the unknown properties elided.
    592 			 */
    593 			if (prop == ZPROP_INVAL)
    594 				continue;
    595 
    596 			if (zfs_prop_readonly(prop))
    597 				continue;
    598 		}
    599 
    600 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
    601 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
    602 		    prop == ZFS_PROP_REFQUOTA ||
    603 		    prop == ZFS_PROP_REFRESERVATION) {
    604 			char *source;
    605 			uint64_t value;
    606 			verify(nvlist_lookup_uint64(propnv,
    607 			    ZPROP_VALUE, &value) == 0);
    608 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
    609 				continue;
    610 			/*
    611 			 * May have no source before SPA_VERSION_RECVD_PROPS,
    612 			 * but is still modifiable.
    613 			 */
    614 			if (nvlist_lookup_string(propnv,
    615 			    ZPROP_SOURCE, &source) == 0) {
    616 				if ((strcmp(source, zhp->zfs_name) != 0) &&
    617 				    (strcmp(source,
    618 				    ZPROP_SOURCE_VAL_RECVD) != 0))
    619 					continue;
    620 			}
    621 		} else {
    622 			char *source;
    623 			if (nvlist_lookup_string(propnv,
    624 			    ZPROP_SOURCE, &source) != 0)
    625 				continue;
    626 			if ((strcmp(source, zhp->zfs_name) != 0) &&
    627 			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
    628 				continue;
    629 		}
    630 
    631 		if (zfs_prop_user(propname) ||
    632 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
    633 			char *value;
    634 			verify(nvlist_lookup_string(propnv,
    635 			    ZPROP_VALUE, &value) == 0);
    636 			VERIFY(0 == nvlist_add_string(nv, propname, value));
    637 		} else {
    638 			uint64_t value;
    639 			verify(nvlist_lookup_uint64(propnv,
    640 			    ZPROP_VALUE, &value) == 0);
    641 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
    642 		}
    643 	}
    644 }
    645 
    646 /*
    647  * recursively generate nvlists describing datasets.  See comment
    648  * for the data structure send_data_t above for description of contents
    649  * of the nvlist.
    650  */
    651 static int
    652 send_iterate_fs(zfs_handle_t *zhp, void *arg)
    653 {
    654 	send_data_t *sd = arg;
    655 	nvlist_t *nvfs, *nv;
    656 	int rv = 0;
    657 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
    658 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
    659 	char guidstring[64];
    660 
    661 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
    662 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
    663 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
    664 	    sd->parent_fromsnap_guid));
    665 
    666 	if (zhp->zfs_dmustats.dds_origin[0]) {
    667 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
    668 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
    669 		if (origin == NULL)
    670 			return (-1);
    671 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
    672 		    origin->zfs_dmustats.dds_guid));
    673 	}
    674 
    675 	/* iterate over props */
    676 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
    677 	send_iterate_prop(zhp, nv);
    678 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
    679 	nvlist_free(nv);
    680 
    681 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
    682 	sd->parent_fromsnap_guid = 0;
    683 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
    684 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
    685 	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
    686 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
    687 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
    688 	nvlist_free(sd->parent_snaps);
    689 	nvlist_free(sd->snapprops);
    690 
    691 	/* add this fs to nvlist */
    692 	(void) snprintf(guidstring, sizeof (guidstring),
    693 	    "0x%llx", (longlong_t)guid);
    694 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
    695 	nvlist_free(nvfs);
    696 
    697 	/* iterate over children */
    698 	if (sd->recursive)
    699 		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
    700 
    701 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
    702 
    703 	zfs_close(zhp);
    704 	return (rv);
    705 }
    706 
    707 static int
    708 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
    709     const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
    710 {
    711 	zfs_handle_t *zhp;
    712 	send_data_t sd = { 0 };
    713 	int error;
    714 
    715 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
    716 	if (zhp == NULL)
    717 		return (EZFS_BADTYPE);
    718 
    719 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
    720 	sd.fromsnap = fromsnap;
    721 	sd.tosnap = tosnap;
    722 	sd.recursive = recursive;
    723 
    724 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
    725 		nvlist_free(sd.fss);
    726 		if (avlp != NULL)
    727 			*avlp = NULL;
    728 		*nvlp = NULL;
    729 		return (error);
    730 	}
    731 
    732 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
    733 		nvlist_free(sd.fss);
    734 		*nvlp = NULL;
    735 		return (EZFS_NOMEM);
    736 	}
    737 
    738 	*nvlp = sd.fss;
    739 	return (0);
    740 }
    741 
    742 /*
    743  * Routines for dealing with the sorted snapshot functionality
    744  */
    745 typedef struct zfs_node {
    746 	zfs_handle_t	*zn_handle;
    747 	avl_node_t	zn_avlnode;
    748 } zfs_node_t;
    749 
    750 static int
    751 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
    752 {
    753 	avl_tree_t *avl = data;
    754 	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
    755 
    756 	node->zn_handle = zhp;
    757 	avl_add(avl, node);
    758 	return (0);
    759 }
    760 
    761 /* ARGSUSED */
    762 static int
    763 zfs_snapshot_compare(const void *larg, const void *rarg)
    764 {
    765 	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
    766 	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
    767 	uint64_t lcreate, rcreate;
    768 
    769 	/*
    770 	 * Sort them according to creation time.  We use the hidden
    771 	 * CREATETXG property to get an absolute ordering of snapshots.
    772 	 */
    773 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
    774 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
    775 
    776 	if (lcreate < rcreate)
    777 		return (-1);
    778 	else if (lcreate > rcreate)
    779 		return (+1);
    780 	else
    781 		return (0);
    782 }
    783 
    784 int
    785 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
    786 {
    787 	int ret = 0;
    788 	zfs_node_t *node;
    789 	avl_tree_t avl;
    790 	void *cookie = NULL;
    791 
    792 	avl_create(&avl, zfs_snapshot_compare,
    793 	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
    794 
    795 	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
    796 
    797 	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
    798 		ret |= callback(node->zn_handle, data);
    799 
    800 	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
    801 		free(node);
    802 
    803 	avl_destroy(&avl);
    804 
    805 	return (ret);
    806 }
    807 
    808 /*
    809  * Routines specific to "zfs send"
    810  */
    811 typedef struct send_dump_data {
    812 	/* these are all just the short snapname (the part after the @) */
    813 	const char *fromsnap;
    814 	const char *tosnap;
    815 	char prevsnap[ZFS_MAXNAMELEN];
    816 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
    817 	boolean_t verbose;
    818 	int outfd;
    819 	boolean_t err;
    820 	nvlist_t *fss;
    821 	avl_tree_t *fsavl;
    822 	snapfilter_cb_t *filter_cb;
    823 	void *filter_cb_arg;
    824 } send_dump_data_t;
    825 
    826 /*
    827  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
    828  * NULL) to the file descriptor specified by outfd.
    829  */
    830 static int
    831 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
    832     int outfd)
    833 {
    834 	zfs_cmd_t zc = { 0 };
    835 	libzfs_handle_t *hdl = zhp->zfs_hdl;
    836 
    837 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
    838 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
    839 
    840 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
    841 	if (fromsnap)
    842 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
    843 	zc.zc_cookie = outfd;
    844 	zc.zc_obj = fromorigin;
    845 
    846 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
    847 		char errbuf[1024];
    848 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
    849 		    "warning: cannot send '%s'"), zhp->zfs_name);
    850 
    851 		switch (errno) {
    852 
    853 		case EXDEV:
    854 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
    855 			    "not an earlier snapshot from the same fs"));
    856 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
    857 
    858 		case ENOENT:
    859 			if (zfs_dataset_exists(hdl, zc.zc_name,
    860 			    ZFS_TYPE_SNAPSHOT)) {
    861 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
    862 				    "incremental source (@%s) does not exist"),
    863 				    zc.zc_value);
    864 			}
    865 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
    866 
    867 		case EDQUOT:
    868 		case EFBIG:
    869 		case EIO:
    870 		case ENOLINK:
    871 		case ENOSPC:
    872 		case ENOSTR:
    873 		case ENXIO:
    874 		case EPIPE:
    875 		case ERANGE:
    876 		case EFAULT:
    877 		case EROFS:
    878 			zfs_error_aux(hdl, strerror(errno));
    879 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
    880 
    881 		default:
    882 			return (zfs_standard_error(hdl, errno, errbuf));
    883 		}
    884 	}
    885 
    886 	return (0);
    887 }
    888 
    889 static int
    890 dump_snapshot(zfs_handle_t *zhp, void *arg)
    891 {
    892 	send_dump_data_t *sdd = arg;
    893 	const char *thissnap;
    894 	int err;
    895 
    896 	thissnap = strchr(zhp->zfs_name, '@') + 1;
    897 
    898 	if (sdd->fromsnap && !sdd->seenfrom &&
    899 	    strcmp(sdd->fromsnap, thissnap) == 0) {
    900 		sdd->seenfrom = B_TRUE;
    901 		(void) strcpy(sdd->prevsnap, thissnap);
    902 		zfs_close(zhp);
    903 		return (0);
    904 	}
    905 
    906 	if (sdd->seento || !sdd->seenfrom) {
    907 		zfs_close(zhp);
    908 		return (0);
    909 	}
    910 
    911 	if (strcmp(sdd->tosnap, thissnap) == 0)
    912 		sdd->seento = B_TRUE;
    913 
    914 	/*
    915 	 * If a filter function exists, call it to determine whether
    916 	 * this snapshot will be sent.
    917 	 */
    918 	if (sdd->filter_cb != NULL &&
    919 	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE) {
    920 		/*
    921 		 * This snapshot is filtered out.  Don't send it, and don't
    922 		 * set prevsnap, so it will be as if this snapshot didn't
    923 		 * exist, and the next accepted snapshot will be sent as
    924 		 * an incremental from the last accepted one, or as the
    925 		 * first (and full) snapshot in the case of a replication,
    926 		 * non-incremental send.
    927 		 */
    928 		zfs_close(zhp);
    929 		return (0);
    930 	}
    931 
    932 	/* send it */
    933 	if (sdd->verbose) {
    934 		(void) fprintf(stderr, "sending from @%s to %s\n",
    935 		    sdd->prevsnap, zhp->zfs_name);
    936 	}
    937 
    938 	err = dump_ioctl(zhp, sdd->prevsnap,
    939 	    sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
    940 	    sdd->outfd);
    941 
    942 	(void) strcpy(sdd->prevsnap, thissnap);
    943 	zfs_close(zhp);
    944 	return (err);
    945 }
    946 
    947 static int
    948 dump_filesystem(zfs_handle_t *zhp, void *arg)
    949 {
    950 	int rv = 0;
    951 	send_dump_data_t *sdd = arg;
    952 	boolean_t missingfrom = B_FALSE;
    953 	zfs_cmd_t zc = { 0 };
    954 
    955 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
    956 	    zhp->zfs_name, sdd->tosnap);
    957 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
    958 		(void) fprintf(stderr, "WARNING: "
    959 		    "could not send %s@%s: does not exist\n",
    960 		    zhp->zfs_name, sdd->tosnap);
    961 		sdd->err = B_TRUE;
    962 		return (0);
    963 	}
    964 
    965 	if (sdd->replicate && sdd->fromsnap) {
    966 		/*
    967 		 * If this fs does not have fromsnap, and we're doing
    968 		 * recursive, we need to send a full stream from the
    969 		 * beginning (or an incremental from the origin if this
    970 		 * is a clone).  If we're doing non-recursive, then let
    971 		 * them get the error.
    972 		 */
    973 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
    974 		    zhp->zfs_name, sdd->fromsnap);
    975 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
    976 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
    977 			missingfrom = B_TRUE;
    978 		}
    979 	}
    980 
    981 	if (sdd->doall) {
    982 		sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
    983 		if (sdd->fromsnap == NULL || missingfrom)
    984 			sdd->seenfrom = B_TRUE;
    985 
    986 		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
    987 		if (!sdd->seenfrom) {
    988 			(void) fprintf(stderr,
    989 			    "WARNING: could not send %s@%s:\n"
    990 			    "incremental source (%s@%s) does not exist\n",
    991 			    zhp->zfs_name, sdd->tosnap,
    992 			    zhp->zfs_name, sdd->fromsnap);
    993 			sdd->err = B_TRUE;
    994 		} else if (!sdd->seento) {
    995 			if (sdd->fromsnap) {
    996 				(void) fprintf(stderr,
    997 				    "WARNING: could not send %s@%s:\n"
    998 				    "incremental source (%s@%s) "
    999 				    "is not earlier than it\n",
   1000 				    zhp->zfs_name, sdd->tosnap,
   1001 				    zhp->zfs_name, sdd->fromsnap);
   1002 			} else {
   1003 				(void) fprintf(stderr, "WARNING: "
   1004 				    "could not send %s@%s: does not exist\n",
   1005 				    zhp->zfs_name, sdd->tosnap);
   1006 			}
   1007 			sdd->err = B_TRUE;
   1008 		}
   1009 	} else {
   1010 		zfs_handle_t *snapzhp;
   1011 		char snapname[ZFS_MAXNAMELEN];
   1012 
   1013 		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
   1014 		    zfs_get_name(zhp), sdd->tosnap);
   1015 		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
   1016 		if (snapzhp == NULL) {
   1017 			rv = -1;
   1018 		} else {
   1019 			if (sdd->filter_cb == NULL ||
   1020 			    sdd->filter_cb(snapzhp, sdd->filter_cb_arg) ==
   1021 			    B_TRUE) {
   1022 				rv = dump_ioctl(snapzhp,
   1023 				    missingfrom ? NULL : sdd->fromsnap,
   1024 				    sdd->fromorigin || missingfrom,
   1025 				    sdd->outfd);
   1026 			}
   1027 			sdd->seento = B_TRUE;
   1028 			zfs_close(snapzhp);
   1029 		}
   1030 	}
   1031 
   1032 	return (rv);
   1033 }
   1034 
   1035 static int
   1036 dump_filesystems(zfs_handle_t *rzhp, void *arg)
   1037 {
   1038 	send_dump_data_t *sdd = arg;
   1039 	nvpair_t *fspair;
   1040 	boolean_t needagain, progress;
   1041 
   1042 	if (!sdd->replicate)
   1043 		return (dump_filesystem(rzhp, sdd));
   1044 
   1045 again:
   1046 	needagain = progress = B_FALSE;
   1047 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
   1048 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
   1049 		nvlist_t *fslist;
   1050 		char *fsname;
   1051 		zfs_handle_t *zhp;
   1052 		int err;
   1053 		uint64_t origin_guid = 0;
   1054 		nvlist_t *origin_nv;
   1055 
   1056 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
   1057 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
   1058 			continue;
   1059 
   1060 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
   1061 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
   1062 
   1063 		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
   1064 		if (origin_nv &&
   1065 		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
   1066 			/*
   1067 			 * origin has not been sent yet;
   1068 			 * skip this clone.
   1069 			 */
   1070 			needagain = B_TRUE;
   1071 			continue;
   1072 		}
   1073 
   1074 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
   1075 		if (zhp == NULL)
   1076 			return (-1);
   1077 		err = dump_filesystem(zhp, sdd);
   1078 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
   1079 		progress = B_TRUE;
   1080 		zfs_close(zhp);
   1081 		if (err)
   1082 			return (err);
   1083 	}
   1084 	if (needagain) {
   1085 		assert(progress);
   1086 		goto again;
   1087 	}
   1088 	return (0);
   1089 }
   1090 
   1091 /*
   1092  * Generate a send stream for the dataset identified by the argument zhp.
   1093  *
   1094  * The content of the send stream is the snapshot identified by
   1095  * 'tosnap'.  Incremental streams are requested in two ways:
   1096  *     - from the snapshot identified by "fromsnap" (if non-null) or
   1097  *     - from the origin of the dataset identified by zhp, which must
   1098  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
   1099  *	 is TRUE.
   1100  *
   1101  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
   1102  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
   1103  * if "replicate" is set.  If "doall" is set, dump all the intermediate
   1104  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
   1105  * case too. If "props" is set, send properties.
   1106  */
   1107 int
   1108 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
   1109     sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
   1110     void *cb_arg)
   1111 {
   1112 	char errbuf[1024];
   1113 	send_dump_data_t sdd = { 0 };
   1114 	int err;
   1115 	nvlist_t *fss = NULL;
   1116 	avl_tree_t *fsavl = NULL;
   1117 	char holdtag[128];
   1118 	static uint64_t holdseq;
   1119 	int spa_version;
   1120 	boolean_t holdsnaps = B_FALSE;
   1121 	pthread_t tid;
   1122 	int pipefd[2];
   1123 	dedup_arg_t dda = { 0 };
   1124 	int featureflags = 0;
   1125 
   1126 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   1127 	    "cannot send '%s'"), zhp->zfs_name);
   1128 
   1129 	if (fromsnap && fromsnap[0] == '\0') {
   1130 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
   1131 		    "zero-length incremental source"));
   1132 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
   1133 	}
   1134 
   1135 	if (zfs_spa_version(zhp, &spa_version) == 0 &&
   1136 	    spa_version >= SPA_VERSION_USERREFS)
   1137 		holdsnaps = B_TRUE;
   1138 
   1139 	if (flags.dedup) {
   1140 		featureflags |= DMU_BACKUP_FEATURE_DEDUP;
   1141 		if (err = pipe(pipefd)) {
   1142 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
   1143 			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
   1144 			    errbuf));
   1145 		}
   1146 		dda.outputfd = outfd;
   1147 		dda.inputfd = pipefd[1];
   1148 		dda.dedup_hdl = zhp->zfs_hdl;
   1149 		if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
   1150 			(void) close(pipefd[0]);
   1151 			(void) close(pipefd[1]);
   1152 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
   1153 			return (zfs_error(zhp->zfs_hdl,
   1154 			    EZFS_THREADCREATEFAILED, errbuf));
   1155 		}
   1156 	}
   1157 
   1158 	if (flags.replicate || flags.doall || flags.props) {
   1159 		dmu_replay_record_t drr = { 0 };
   1160 		char *packbuf = NULL;
   1161 		size_t buflen = 0;
   1162 		zio_cksum_t zc = { 0 };
   1163 
   1164 		if (holdsnaps) {
   1165 			(void) snprintf(holdtag, sizeof (holdtag),
   1166 			    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
   1167 			++holdseq;
   1168 			err = zfs_hold_range(zhp, fromsnap, tosnap,
   1169 			    holdtag, B_TRUE);
   1170 			if (err)
   1171 				goto err_out;
   1172 		}
   1173 
   1174 		if (flags.replicate || flags.props) {
   1175 			nvlist_t *hdrnv;
   1176 
   1177 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
   1178 			if (fromsnap) {
   1179 				VERIFY(0 == nvlist_add_string(hdrnv,
   1180 				    "fromsnap", fromsnap));
   1181 			}
   1182 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
   1183 			if (!flags.replicate) {
   1184 				VERIFY(0 == nvlist_add_boolean(hdrnv,
   1185 				    "not_recursive"));
   1186 			}
   1187 
   1188 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
   1189 			    fromsnap, tosnap, flags.replicate, &fss, &fsavl);
   1190 			if (err) {
   1191 				if (holdsnaps) {
   1192 					(void) zfs_release_range(zhp, fromsnap,
   1193 					    tosnap, holdtag);
   1194 				}
   1195 				goto err_out;
   1196 			}
   1197 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
   1198 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
   1199 			    NV_ENCODE_XDR, 0);
   1200 			nvlist_free(hdrnv);
   1201 			if (err) {
   1202 				fsavl_destroy(fsavl);
   1203 				nvlist_free(fss);
   1204 				if (holdsnaps) {
   1205 					(void) zfs_release_range(zhp, fromsnap,
   1206 					    tosnap, holdtag);
   1207 				}
   1208 				goto stderr_out;
   1209 			}
   1210 		}
   1211 
   1212 		/* write first begin record */
   1213 		drr.drr_type = DRR_BEGIN;
   1214 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
   1215 		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo,
   1216 		    DMU_COMPOUNDSTREAM);
   1217 		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo,
   1218 		    featureflags);
   1219 		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
   1220 		    sizeof (drr.drr_u.drr_begin.drr_toname),
   1221 		    "%s@%s", zhp->zfs_name, tosnap);
   1222 		drr.drr_payloadlen = buflen;
   1223 		err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
   1224 
   1225 		/* write header nvlist */
   1226 		if (err != -1 && packbuf != NULL) {
   1227 			err = cksum_and_write(packbuf, buflen, &zc, outfd);
   1228 		}
   1229 		free(packbuf);
   1230 		if (err == -1) {
   1231 			fsavl_destroy(fsavl);
   1232 			nvlist_free(fss);
   1233 			if (holdsnaps) {
   1234 				(void) zfs_release_range(zhp, fromsnap, tosnap,
   1235 				    holdtag);
   1236 			}
   1237 			err = errno;
   1238 			goto stderr_out;
   1239 		}
   1240 
   1241 		/* write end record */
   1242 		if (err != -1) {
   1243 			bzero(&drr, sizeof (drr));
   1244 			drr.drr_type = DRR_END;
   1245 			drr.drr_u.drr_end.drr_checksum = zc;
   1246 			err = write(outfd, &drr, sizeof (drr));
   1247 			if (err == -1) {
   1248 				fsavl_destroy(fsavl);
   1249 				nvlist_free(fss);
   1250 				if (holdsnaps) {
   1251 					(void) zfs_release_range(zhp, fromsnap,
   1252 					    tosnap, holdtag);
   1253 				}
   1254 				err = errno;
   1255 				goto stderr_out;
   1256 			}
   1257 		}
   1258 	}
   1259 
   1260 	/* dump each stream */
   1261 	sdd.fromsnap = fromsnap;
   1262 	sdd.tosnap = tosnap;
   1263 	if (flags.dedup)
   1264 		sdd.outfd = pipefd[0];
   1265 	else
   1266 		sdd.outfd = outfd;
   1267 	sdd.replicate = flags.replicate;
   1268 	sdd.doall = flags.doall;
   1269 	sdd.fromorigin = flags.fromorigin;
   1270 	sdd.fss = fss;
   1271 	sdd.fsavl = fsavl;
   1272 	sdd.verbose = flags.verbose;
   1273 	sdd.filter_cb = filter_func;
   1274 	sdd.filter_cb_arg = cb_arg;
   1275 	err = dump_filesystems(zhp, &sdd);
   1276 	fsavl_destroy(fsavl);
   1277 	nvlist_free(fss);
   1278 
   1279 	if (flags.dedup) {
   1280 		(void) close(pipefd[0]);
   1281 		(void) pthread_join(tid, NULL);
   1282 	}
   1283 
   1284 	if (flags.replicate || flags.doall || flags.props) {
   1285 		/*
   1286 		 * write final end record.  NB: want to do this even if
   1287 		 * there was some error, because it might not be totally
   1288 		 * failed.
   1289 		 */
   1290 		dmu_replay_record_t drr = { 0 };
   1291 		drr.drr_type = DRR_END;
   1292 		if (holdsnaps) {
   1293 			(void) zfs_release_range(zhp, fromsnap, tosnap,
   1294 			    holdtag);
   1295 		}
   1296 		if (write(outfd, &drr, sizeof (drr)) == -1) {
   1297 			return (zfs_standard_error(zhp->zfs_hdl,
   1298 			    errno, errbuf));
   1299 		}
   1300 	}
   1301 
   1302 	return (err || sdd.err);
   1303 
   1304 stderr_out:
   1305 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
   1306 err_out:
   1307 	if (flags.dedup) {
   1308 		(void) pthread_cancel(tid);
   1309 		(void) pthread_join(tid, NULL);
   1310 		(void) close(pipefd[0]);
   1311 	}
   1312 	return (err);
   1313 }
   1314 
   1315 /*
   1316  * Routines specific to "zfs recv"
   1317  */
   1318 
   1319 static int
   1320 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
   1321     boolean_t byteswap, zio_cksum_t *zc)
   1322 {
   1323 	char *cp = buf;
   1324 	int rv;
   1325 	int len = ilen;
   1326 
   1327 	do {
   1328 		rv = read(fd, cp, len);
   1329 		cp += rv;
   1330 		len -= rv;
   1331 	} while (rv > 0);
   1332 
   1333 	if (rv < 0 || len != 0) {
   1334 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   1335 		    "failed to read from stream"));
   1336 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
   1337 		    "cannot receive")));
   1338 	}
   1339 
   1340 	if (zc) {
   1341 		if (byteswap)
   1342 			fletcher_4_incremental_byteswap(buf, ilen, zc);
   1343 		else
   1344 			fletcher_4_incremental_native(buf, ilen, zc);
   1345 	}
   1346 	return (0);
   1347 }
   1348 
   1349 static int
   1350 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
   1351     boolean_t byteswap, zio_cksum_t *zc)
   1352 {
   1353 	char *buf;
   1354 	int err;
   1355 
   1356 	buf = zfs_alloc(hdl, len);
   1357 	if (buf == NULL)
   1358 		return (ENOMEM);
   1359 
   1360 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
   1361 	if (err != 0) {
   1362 		free(buf);
   1363 		return (err);
   1364 	}
   1365 
   1366 	err = nvlist_unpack(buf, len, nvp, 0);
   1367 	free(buf);
   1368 	if (err != 0) {
   1369 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
   1370 		    "stream (malformed nvlist)"));
   1371 		return (EINVAL);
   1372 	}
   1373 	return (0);
   1374 }
   1375 
   1376 static int
   1377 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
   1378     int baselen, char *newname, recvflags_t flags)
   1379 {
   1380 	static int seq;
   1381 	zfs_cmd_t zc = { 0 };
   1382 	int err;
   1383 	prop_changelist_t *clp;
   1384 	zfs_handle_t *zhp;
   1385 
   1386 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
   1387 	if (zhp == NULL)
   1388 		return (-1);
   1389 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
   1390 	    flags.force ? MS_FORCE : 0);
   1391 	zfs_close(zhp);
   1392 	if (clp == NULL)
   1393 		return (-1);
   1394 	err = changelist_prefix(clp);
   1395 	if (err)
   1396 		return (err);
   1397 
   1398 	zc.zc_objset_type = DMU_OST_ZFS;
   1399 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
   1400 
   1401 	if (tryname) {
   1402 		(void) strcpy(newname, tryname);
   1403 
   1404 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
   1405 
   1406 		if (flags.verbose) {
   1407 			(void) printf("attempting rename %s to %s\n",
   1408 			    zc.zc_name, zc.zc_value);
   1409 		}
   1410 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
   1411 		if (err == 0)
   1412 			changelist_rename(clp, name, tryname);
   1413 	} else {
   1414 		err = ENOENT;
   1415 	}
   1416 
   1417 	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
   1418 		seq++;
   1419 
   1420 		(void) strncpy(newname, name, baselen);
   1421 		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
   1422 		    "recv-%u-%u", getpid(), seq);
   1423 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
   1424 
   1425 		if (flags.verbose) {
   1426 			(void) printf("failed - trying rename %s to %s\n",
   1427 			    zc.zc_name, zc.zc_value);
   1428 		}
   1429 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
   1430 		if (err == 0)
   1431 			changelist_rename(clp, name, newname);
   1432 		if (err && flags.verbose) {
   1433 			(void) printf("failed (%u) - "
   1434 			    "will try again on next pass\n", errno);
   1435 		}
   1436 		err = EAGAIN;
   1437 	} else if (flags.verbose) {
   1438 		if (err == 0)
   1439 			(void) printf("success\n");
   1440 		else
   1441 			(void) printf("failed (%u)\n", errno);
   1442 	}
   1443 
   1444 	(void) changelist_postfix(clp);
   1445 	changelist_free(clp);
   1446 
   1447 	return (err);
   1448 }
   1449 
   1450 static int
   1451 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
   1452     char *newname, recvflags_t flags)
   1453 {
   1454 	zfs_cmd_t zc = { 0 };
   1455 	int err = 0;
   1456 	prop_changelist_t *clp;
   1457 	zfs_handle_t *zhp;
   1458 	boolean_t defer = B_FALSE;
   1459 	int spa_version;
   1460 
   1461 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
   1462 	if (zhp == NULL)
   1463 		return (-1);
   1464 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
   1465 	    flags.force ? MS_FORCE : 0);
   1466 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
   1467 	    zfs_spa_version(zhp, &spa_version) == 0 &&
   1468 	    spa_version >= SPA_VERSION_USERREFS)
   1469 		defer = B_TRUE;
   1470 	zfs_close(zhp);
   1471 	if (clp == NULL)
   1472 		return (-1);
   1473 	err = changelist_prefix(clp);
   1474 	if (err)
   1475 		return (err);
   1476 
   1477 	zc.zc_objset_type = DMU_OST_ZFS;
   1478 	zc.zc_defer_destroy = defer;
   1479 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
   1480 
   1481 	if (flags.verbose)
   1482 		(void) printf("attempting destroy %s\n", zc.zc_name);
   1483 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
   1484 	if (err == 0) {
   1485 		if (flags.verbose)
   1486 			(void) printf("success\n");
   1487 		changelist_remove(clp, zc.zc_name);
   1488 	}
   1489 
   1490 	(void) changelist_postfix(clp);
   1491 	changelist_free(clp);
   1492 
   1493 	/*
   1494 	 * Deferred destroy might destroy the snapshot or only mark it to be
   1495 	 * destroyed later, and it returns success in either case.
   1496 	 */
   1497 	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
   1498 	    ZFS_TYPE_SNAPSHOT))) {
   1499 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
   1500 	}
   1501 
   1502 	return (err);
   1503 }
   1504 
   1505 typedef struct guid_to_name_data {
   1506 	uint64_t guid;
   1507 	char *name;
   1508 } guid_to_name_data_t;
   1509 
   1510 static int
   1511 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
   1512 {
   1513 	guid_to_name_data_t *gtnd = arg;
   1514 	int err;
   1515 
   1516 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
   1517 		(void) strcpy(gtnd->name, zhp->zfs_name);
   1518 		return (EEXIST);
   1519 	}
   1520 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
   1521 	zfs_close(zhp);
   1522 	return (err);
   1523 }
   1524 
   1525 static int
   1526 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
   1527     char *name)
   1528 {
   1529 	/* exhaustive search all local snapshots */
   1530 	guid_to_name_data_t gtnd;
   1531 	int err = 0;
   1532 	zfs_handle_t *zhp;
   1533 	char *cp;
   1534 
   1535 	gtnd.guid = guid;
   1536 	gtnd.name = name;
   1537 
   1538 	if (strchr(parent, '@') == NULL) {
   1539 		zhp = make_dataset_handle(hdl, parent);
   1540 		if (zhp != NULL) {
   1541 			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
   1542 			zfs_close(zhp);
   1543 			if (err == EEXIST)
   1544 				return (0);
   1545 		}
   1546 	}
   1547 
   1548 	cp = strchr(parent, '/');
   1549 	if (cp)
   1550 		*cp = '\0';
   1551 	zhp = make_dataset_handle(hdl, parent);
   1552 	if (cp)
   1553 		*cp = '/';
   1554 
   1555 	if (zhp) {
   1556 		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
   1557 		zfs_close(zhp);
   1558 	}
   1559 
   1560 	return (err == EEXIST ? 0 : ENOENT);
   1561 
   1562 }
   1563 
   1564 /*
   1565  * Return true if dataset guid1 is created before guid2.
   1566  */
   1567 static int
   1568 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
   1569     uint64_t guid1, uint64_t guid2)
   1570 {
   1571 	nvlist_t *nvfs;
   1572 	char *fsname, *snapname;
   1573 	char buf[ZFS_MAXNAMELEN];
   1574 	int rv;
   1575 	zfs_node_t zn1, zn2;
   1576 
   1577 	if (guid2 == 0)
   1578 		return (0);
   1579 	if (guid1 == 0)
   1580 		return (1);
   1581 
   1582 	nvfs = fsavl_find(avl, guid1, &snapname);
   1583 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
   1584 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
   1585 	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
   1586 	if (zn1.zn_handle == NULL)
   1587 		return (-1);
   1588 
   1589 	nvfs = fsavl_find(avl, guid2, &snapname);
   1590 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
   1591 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
   1592 	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
   1593 	if (zn2.zn_handle == NULL) {
   1594 		zfs_close(zn2.zn_handle);
   1595 		return (-1);
   1596 	}
   1597 
   1598 	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
   1599 
   1600 	zfs_close(zn1.zn_handle);
   1601 	zfs_close(zn2.zn_handle);
   1602 
   1603 	return (rv);
   1604 }
   1605 
   1606 static int
   1607 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
   1608     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
   1609 {
   1610 	nvlist_t *local_nv;
   1611 	avl_tree_t *local_avl;
   1612 	nvpair_t *fselem, *nextfselem;
   1613 	char *tosnap, *fromsnap;
   1614 	char newname[ZFS_MAXNAMELEN];
   1615 	int error;
   1616 	boolean_t needagain, progress, recursive;
   1617 	char *s1, *s2;
   1618 
   1619 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
   1620 	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
   1621 
   1622 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
   1623 	    ENOENT);
   1624 
   1625 	if (flags.dryrun)
   1626 		return (0);
   1627 
   1628 again:
   1629 	needagain = progress = B_FALSE;
   1630 
   1631 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
   1632 	    recursive, &local_nv, &local_avl)) != 0)
   1633 		return (error);
   1634 
   1635 	/*
   1636 	 * Process deletes and renames
   1637 	 */
   1638 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
   1639 	    fselem; fselem = nextfselem) {
   1640 		nvlist_t *nvfs, *snaps;
   1641 		nvlist_t *stream_nvfs = NULL;
   1642 		nvpair_t *snapelem, *nextsnapelem;
   1643 		uint64_t fromguid = 0;
   1644 		uint64_t originguid = 0;
   1645 		uint64_t stream_originguid = 0;
   1646 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
   1647 		char *fsname, *stream_fsname;
   1648 
   1649 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
   1650 
   1651 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
   1652 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
   1653 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
   1654 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
   1655 		    &parent_fromsnap_guid));
   1656 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
   1657 
   1658 		/*
   1659 		 * First find the stream's fs, so we can check for
   1660 		 * a different origin (due to "zfs promote")
   1661 		 */
   1662 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
   1663 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
   1664 			uint64_t thisguid;
   1665 
   1666 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
   1667 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
   1668 
   1669 			if (stream_nvfs != NULL)
   1670 				break;
   1671 		}
   1672 
   1673 		/* check for promote */
   1674 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
   1675 		    &stream_originguid);
   1676 		if (stream_nvfs && originguid != stream_originguid) {
   1677 			switch (created_before(hdl, local_avl,
   1678 			    stream_originguid, originguid)) {
   1679 			case 1: {
   1680 				/* promote it! */
   1681 				zfs_cmd_t zc = { 0 };
   1682 				nvlist_t *origin_nvfs;
   1683 				char *origin_fsname;
   1684 
   1685 				if (flags.verbose)
   1686 					(void) printf("promoting %s\n", fsname);
   1687 
   1688 				origin_nvfs = fsavl_find(local_avl, originguid,
   1689 				    NULL);
   1690 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
   1691 				    "name", &origin_fsname));
   1692 				(void) strlcpy(zc.zc_value, origin_fsname,
   1693 				    sizeof (zc.zc_value));
   1694 				(void) strlcpy(zc.zc_name, fsname,
   1695 				    sizeof (zc.zc_name));
   1696 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
   1697 				if (error == 0)
   1698 					progress = B_TRUE;
   1699 				break;
   1700 			}
   1701 			default:
   1702 				break;
   1703 			case -1:
   1704 				fsavl_destroy(local_avl);
   1705 				nvlist_free(local_nv);
   1706 				return (-1);
   1707 			}
   1708 			/*
   1709 			 * We had/have the wrong origin, therefore our
   1710 			 * list of snapshots is wrong.  Need to handle
   1711 			 * them on the next pass.
   1712 			 */
   1713 			needagain = B_TRUE;
   1714 			continue;
   1715 		}
   1716 
   1717 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
   1718 		    snapelem; snapelem = nextsnapelem) {
   1719 			uint64_t thisguid;
   1720 			char *stream_snapname;
   1721 			nvlist_t *found, *props;
   1722 
   1723 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
   1724 
   1725 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
   1726 			found = fsavl_find(stream_avl, thisguid,
   1727 			    &stream_snapname);
   1728 
   1729 			/* check for delete */
   1730 			if (found == NULL) {
   1731 				char name[ZFS_MAXNAMELEN];
   1732 
   1733 				if (!flags.force)
   1734 					continue;
   1735 
   1736 				(void) snprintf(name, sizeof (name), "%s@%s",
   1737 				    fsname, nvpair_name(snapelem));
   1738 
   1739 				error = recv_destroy(hdl, name,
   1740 				    strlen(fsname)+1, newname, flags);
   1741 				if (error)
   1742 					needagain = B_TRUE;
   1743 				else
   1744 					progress = B_TRUE;
   1745 				continue;
   1746 			}
   1747 
   1748 			stream_nvfs = found;
   1749 
   1750 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
   1751 			    &props) && 0 == nvlist_lookup_nvlist(props,
   1752 			    stream_snapname, &props)) {
   1753 				zfs_cmd_t zc = { 0 };
   1754 
   1755 				zc.zc_cookie = B_TRUE; /* received */
   1756 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
   1757 				    "%s@%s", fsname, nvpair_name(snapelem));
   1758 				if (zcmd_write_src_nvlist(hdl, &zc,
   1759 				    props) == 0) {
   1760 					(void) zfs_ioctl(hdl,
   1761 					    ZFS_IOC_SET_PROP, &zc);
   1762 					zcmd_free_nvlists(&zc);
   1763 				}
   1764 			}
   1765 
   1766 			/* check for different snapname */
   1767 			if (strcmp(nvpair_name(snapelem),
   1768 			    stream_snapname) != 0) {
   1769 				char name[ZFS_MAXNAMELEN];
   1770 				char tryname[ZFS_MAXNAMELEN];
   1771 
   1772 				(void) snprintf(name, sizeof (name), "%s@%s",
   1773 				    fsname, nvpair_name(snapelem));
   1774 				(void) snprintf(tryname, sizeof (name), "%s@%s",
   1775 				    fsname, stream_snapname);
   1776 
   1777 				error = recv_rename(hdl, name, tryname,
   1778 				    strlen(fsname)+1, newname, flags);
   1779 				if (error)
   1780 					needagain = B_TRUE;
   1781 				else
   1782 					progress = B_TRUE;
   1783 			}
   1784 
   1785 			if (strcmp(stream_snapname, fromsnap) == 0)
   1786 				fromguid = thisguid;
   1787 		}
   1788 
   1789 		/* check for delete */
   1790 		if (stream_nvfs == NULL) {
   1791 			if (!flags.force)
   1792 				continue;
   1793 
   1794 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
   1795 			    newname, flags);
   1796 			if (error)
   1797 				needagain = B_TRUE;
   1798 			else
   1799 				progress = B_TRUE;
   1800 			continue;
   1801 		}
   1802 
   1803 		if (fromguid == 0 && flags.verbose) {
   1804 			(void) printf("local fs %s does not have fromsnap "
   1805 			    "(%s in stream); must have been deleted locally; "
   1806 			    "ignoring\n", fsname, fromsnap);
   1807 			continue;
   1808 		}
   1809 
   1810 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
   1811 		    "name", &stream_fsname));
   1812 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
   1813 		    "parentfromsnap", &stream_parent_fromsnap_guid));
   1814 
   1815 		s1 = strrchr(fsname, '/');
   1816 		s2 = strrchr(stream_fsname, '/');
   1817 
   1818 		/* check for rename */
   1819 		if ((stream_parent_fromsnap_guid != 0 &&
   1820 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
   1821 		    ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
   1822 			nvlist_t *parent;
   1823 			char tryname[ZFS_MAXNAMELEN];
   1824 
   1825 			parent = fsavl_find(local_avl,
   1826 			    stream_parent_fromsnap_guid, NULL);
   1827 			/*
   1828 			 * NB: parent might not be found if we used the
   1829 			 * tosnap for stream_parent_fromsnap_guid,
   1830 			 * because the parent is a newly-created fs;
   1831 			 * we'll be able to rename it after we recv the
   1832 			 * new fs.
   1833 			 */
   1834 			if (parent != NULL) {
   1835 				char *pname;
   1836 
   1837 				VERIFY(0 == nvlist_lookup_string(parent, "name",
   1838 				    &pname));
   1839 				(void) snprintf(tryname, sizeof (tryname),
   1840 				    "%s%s", pname, strrchr(stream_fsname, '/'));
   1841 			} else {
   1842 				tryname[0] = '\0';
   1843 				if (flags.verbose) {
   1844 					(void) printf("local fs %s new parent "
   1845 					    "not found\n", fsname);
   1846 				}
   1847 			}
   1848 
   1849 			error = recv_rename(hdl, fsname, tryname,
   1850 			    strlen(tofs)+1, newname, flags);
   1851 			if (error)
   1852 				needagain = B_TRUE;
   1853 			else
   1854 				progress = B_TRUE;
   1855 		}
   1856 	}
   1857 
   1858 	fsavl_destroy(local_avl);
   1859 	nvlist_free(local_nv);
   1860 
   1861 	if (needagain && progress) {
   1862 		/* do another pass to fix up temporary names */
   1863 		if (flags.verbose)
   1864 			(void) printf("another pass:\n");
   1865 		goto again;
   1866 	}
   1867 
   1868 	return (needagain);
   1869 }
   1870 
   1871 static int
   1872 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
   1873     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
   1874     char **top_zfs)
   1875 {
   1876 	nvlist_t *stream_nv = NULL;
   1877 	avl_tree_t *stream_avl = NULL;
   1878 	char *fromsnap = NULL;
   1879 	char tofs[ZFS_MAXNAMELEN];
   1880 	char errbuf[1024];
   1881 	dmu_replay_record_t drre;
   1882 	int error;
   1883 	boolean_t anyerr = B_FALSE;
   1884 	boolean_t softerr = B_FALSE;
   1885 
   1886 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   1887 	    "cannot receive"));
   1888 
   1889 	if (strchr(destname, '@')) {
   1890 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   1891 		    "can not specify snapshot name for multi-snapshot stream"));
   1892 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   1893 	}
   1894 
   1895 	assert(drr->drr_type == DRR_BEGIN);
   1896 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
   1897 	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
   1898 	    DMU_COMPOUNDSTREAM);
   1899 
   1900 	/*
   1901 	 * Read in the nvlist from the stream.
   1902 	 */
   1903 	if (drr->drr_payloadlen != 0) {
   1904 		boolean_t recursive;
   1905 
   1906 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
   1907 		    &stream_nv, flags.byteswap, zc);
   1908 		if (error) {
   1909 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
   1910 			goto out;
   1911 		}
   1912 
   1913 		recursive = (nvlist_lookup_boolean(stream_nv,
   1914 		    "not_recursive") == ENOENT);
   1915 
   1916 		if (recursive && !flags.isprefix) {
   1917 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   1918 			    "must use -d to receive replication "
   1919 			    "(send -R) stream"));
   1920 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   1921 		}
   1922 	}
   1923 
   1924 	/*
   1925 	 * Read in the end record and verify checksum.
   1926 	 */
   1927 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
   1928 	    flags.byteswap, NULL)))
   1929 		goto out;
   1930 	if (flags.byteswap) {
   1931 		drre.drr_type = BSWAP_32(drre.drr_type);
   1932 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
   1933 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
   1934 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
   1935 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
   1936 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
   1937 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
   1938 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
   1939 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
   1940 	}
   1941 	if (drre.drr_type != DRR_END) {
   1942 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
   1943 		goto out;
   1944 	}
   1945 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
   1946 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   1947 		    "incorrect header checksum"));
   1948 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
   1949 		goto out;
   1950 	}
   1951 
   1952 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
   1953 
   1954 	if (drr->drr_payloadlen != 0) {
   1955 		nvlist_t *stream_fss;
   1956 
   1957 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
   1958 		    &stream_fss));
   1959 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
   1960 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   1961 			    "couldn't allocate avl tree"));
   1962 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
   1963 			goto out;
   1964 		}
   1965 
   1966 		if (fromsnap != NULL) {
   1967 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
   1968 			if (flags.isprefix) {
   1969 				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
   1970 				    "/@");
   1971 				/* zfs_receive_one() will create_parents() */
   1972 				(void) strlcat(tofs,
   1973 				    &drr->drr_u.drr_begin.drr_toname[i],
   1974 				    ZFS_MAXNAMELEN);
   1975 				*strchr(tofs, '@') = '\0';
   1976 			}
   1977 			softerr = recv_incremental_replication(hdl, tofs,
   1978 			    flags, stream_nv, stream_avl);
   1979 		}
   1980 	}
   1981 
   1982 
   1983 	/* Finally, receive each contained stream */
   1984 	do {
   1985 		/*
   1986 		 * we should figure out if it has a recoverable
   1987 		 * error, in which case do a recv_skip() and drive on.
   1988 		 * Note, if we fail due to already having this guid,
   1989 		 * zfs_receive_one() will take care of it (ie,
   1990 		 * recv_skip() and return 0).
   1991 		 */
   1992 		error = zfs_receive_impl(hdl, destname, flags, fd,
   1993 		    stream_avl, top_zfs);
   1994 		if (error == ENODATA) {
   1995 			error = 0;
   1996 			break;
   1997 		}
   1998 		anyerr |= error;
   1999 	} while (error == 0);
   2000 
   2001 	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
   2002 		/*
   2003 		 * Now that we have the fs's they sent us, try the
   2004 		 * renames again.
   2005 		 */
   2006 		softerr = recv_incremental_replication(hdl, tofs, flags,
   2007 		    stream_nv, stream_avl);
   2008 	}
   2009 
   2010 out:
   2011 	fsavl_destroy(stream_avl);
   2012 	if (stream_nv)
   2013 		nvlist_free(stream_nv);
   2014 	if (softerr)
   2015 		error = -2;
   2016 	if (anyerr)
   2017 		error = -1;
   2018 	return (error);
   2019 }
   2020 
   2021 static void
   2022 trunc_prop_errs(int truncated)
   2023 {
   2024 	ASSERT(truncated != 0);
   2025 
   2026 	if (truncated == 1)
   2027 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   2028 		    "1 more property could not be set\n"));
   2029 	else
   2030 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
   2031 		    "%d more properties could not be set\n"), truncated);
   2032 }
   2033 
   2034 static int
   2035 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
   2036 {
   2037 	dmu_replay_record_t *drr;
   2038 	void *buf = malloc(1<<20);
   2039 	char errbuf[1024];
   2040 
   2041 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   2042 	    "cannot receive:"));
   2043 
   2044 	/* XXX would be great to use lseek if possible... */
   2045 	drr = buf;
   2046 
   2047 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
   2048 	    byteswap, NULL) == 0) {
   2049 		if (byteswap)
   2050 			drr->drr_type = BSWAP_32(drr->drr_type);
   2051 
   2052 		switch (drr->drr_type) {
   2053 		case DRR_BEGIN:
   2054 			/* NB: not to be used on v2 stream packages */
   2055 			if (drr->drr_payloadlen != 0) {
   2056 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2057 				    "invalid substream header"));
   2058 				return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   2059 			}
   2060 			break;
   2061 
   2062 		case DRR_END:
   2063 			free(buf);
   2064 			return (0);
   2065 
   2066 		case DRR_OBJECT:
   2067 			if (byteswap) {
   2068 				drr->drr_u.drr_object.drr_bonuslen =
   2069 				    BSWAP_32(drr->drr_u.drr_object.
   2070 				    drr_bonuslen);
   2071 			}
   2072 			(void) recv_read(hdl, fd, buf,
   2073 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
   2074 			    B_FALSE, NULL);
   2075 			break;
   2076 
   2077 		case DRR_WRITE:
   2078 			if (byteswap) {
   2079 				drr->drr_u.drr_write.drr_length =
   2080 				    BSWAP_64(drr->drr_u.drr_write.drr_length);
   2081 			}
   2082 			(void) recv_read(hdl, fd, buf,
   2083 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
   2084 			break;
   2085 
   2086 		case DRR_WRITE_BYREF:
   2087 		case DRR_FREEOBJECTS:
   2088 		case DRR_FREE:
   2089 			break;
   2090 
   2091 		default:
   2092 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2093 			    "invalid record type"));
   2094 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   2095 		}
   2096 	}
   2097 
   2098 	free(buf);
   2099 	return (-1);
   2100 }
   2101 
   2102 /*
   2103  * Restores a backup of tosnap from the file descriptor specified by infd.
   2104  */
   2105 static int
   2106 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
   2107     recvflags_t flags, dmu_replay_record_t *drr,
   2108     dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
   2109     char **top_zfs)
   2110 {
   2111 	zfs_cmd_t zc = { 0 };
   2112 	time_t begin_time;
   2113 	int ioctl_err, ioctl_errno, err, choplen;
   2114 	char *cp;
   2115 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
   2116 	char errbuf[1024];
   2117 	char prop_errbuf[1024];
   2118 	char chopprefix[ZFS_MAXNAMELEN];
   2119 	boolean_t newfs = B_FALSE;
   2120 	boolean_t stream_wantsnewfs;
   2121 	uint64_t parent_snapguid = 0;
   2122 	prop_changelist_t *clp = NULL;
   2123 	nvlist_t *snapprops_nvlist = NULL;
   2124 	zprop_errflags_t prop_errflags;
   2125 
   2126 	begin_time = time(NULL);
   2127 
   2128 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   2129 	    "cannot receive"));
   2130 
   2131 	if (stream_avl != NULL) {
   2132 		char *snapname;
   2133 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
   2134 		    &snapname);
   2135 		nvlist_t *props;
   2136 		int ret;
   2137 
   2138 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
   2139 		    &parent_snapguid);
   2140 		err = nvlist_lookup_nvlist(fs, "props", &props);
   2141 		if (err)
   2142 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
   2143 
   2144 		if (flags.canmountoff) {
   2145 			VERIFY(0 == nvlist_add_uint64(props,
   2146 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
   2147 		}
   2148 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
   2149 		if (err)
   2150 			nvlist_free(props);
   2151 
   2152 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
   2153 			VERIFY(0 == nvlist_lookup_nvlist(props,
   2154 			    snapname, &snapprops_nvlist));
   2155 		}
   2156 
   2157 		if (ret != 0)
   2158 			return (-1);
   2159 	}
   2160 
   2161 	/*
   2162 	 * Determine how much of the snapshot name stored in the stream
   2163 	 * we are going to tack on to the name they specified on the
   2164 	 * command line, and how much we are going to chop off.
   2165 	 *
   2166 	 * If they specified a snapshot, chop the entire name stored in
   2167 	 * the stream.
   2168 	 */
   2169 	(void) strcpy(chopprefix, drrb->drr_toname);
   2170 	if (flags.isprefix) {
   2171 		/*
   2172 		 * They specified a fs with -d, we want to tack on
   2173 		 * everything but the pool name stored in the stream
   2174 		 */
   2175 		if (strchr(tosnap, '@')) {
   2176 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
   2177 			    "argument - snapshot not allowed with -d"));
   2178 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
   2179 		}
   2180 		cp = strchr(chopprefix, '/');
   2181 		if (cp == NULL)
   2182 			cp = strchr(chopprefix, '@');
   2183 		*cp = '\0';
   2184 	} else if (strchr(tosnap, '@') == NULL) {
   2185 		/*
   2186 		 * If they specified a filesystem without -d, we want to
   2187 		 * tack on everything after the fs specified in the
   2188 		 * first name from the stream.
   2189 		 */
   2190 		cp = strchr(chopprefix, '@');
   2191 		*cp = '\0';
   2192 	}
   2193 	choplen = strlen(chopprefix);
   2194 
   2195 	/*
   2196 	 * Determine name of destination snapshot, store in zc_value.
   2197 	 */
   2198 	(void) strcpy(zc.zc_top_ds, tosnap);
   2199 	(void) strcpy(zc.zc_value, tosnap);
   2200 	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
   2201 	    sizeof (zc.zc_value));
   2202 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
   2203 		zcmd_free_nvlists(&zc);
   2204 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
   2205 	}
   2206 
   2207 	/*
   2208 	 * Determine the name of the origin snapshot, store in zc_string.
   2209 	 */
   2210 	if (drrb->drr_flags & DRR_FLAG_CLONE) {
   2211 		if (guid_to_name(hdl, tosnap,
   2212 		    drrb->drr_fromguid, zc.zc_string) != 0) {
   2213 			zcmd_free_nvlists(&zc);
   2214 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2215 			    "local origin for clone %s does not exist"),
   2216 			    zc.zc_value);
   2217 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
   2218 		}
   2219 		if (flags.verbose)
   2220 			(void) printf("found clone origin %s\n", zc.zc_string);
   2221 	}
   2222 
   2223 	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
   2224 	    (drrb->drr_flags & DRR_FLAG_CLONE));
   2225 
   2226 	if (stream_wantsnewfs) {
   2227 		/*
   2228 		 * if the parent fs does not exist, look for it based on
   2229 		 * the parent snap GUID
   2230 		 */
   2231 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   2232 		    "cannot receive new filesystem stream"));
   2233 
   2234 		(void) strcpy(zc.zc_name, zc.zc_value);
   2235 		cp = strrchr(zc.zc_name, '/');
   2236 		if (cp)
   2237 			*cp = '\0';
   2238 		if (cp &&
   2239 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
   2240 			char suffix[ZFS_MAXNAMELEN];
   2241 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
   2242 			if (guid_to_name(hdl, tosnap, parent_snapguid,
   2243 			    zc.zc_value) == 0) {
   2244 				*strchr(zc.zc_value, '@') = '\0';
   2245 				(void) strcat(zc.zc_value, suffix);
   2246 			}
   2247 		}
   2248 	} else {
   2249 		/*
   2250 		 * if the fs does not exist, look for it based on the
   2251 		 * fromsnap GUID
   2252 		 */
   2253 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   2254 		    "cannot receive incremental stream"));
   2255 
   2256 		(void) strcpy(zc.zc_name, zc.zc_value);
   2257 		*strchr(zc.zc_name, '@') = '\0';
   2258 
   2259 		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
   2260 			char snap[ZFS_MAXNAMELEN];
   2261 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
   2262 			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
   2263 			    zc.zc_value) == 0) {
   2264 				*strchr(zc.zc_value, '@') = '\0';
   2265 				(void) strcat(zc.zc_value, snap);
   2266 			}
   2267 		}
   2268 	}
   2269 
   2270 	(void) strcpy(zc.zc_name, zc.zc_value);
   2271 	*strchr(zc.zc_name, '@') = '\0';
   2272 
   2273 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
   2274 		zfs_handle_t *zhp;
   2275 		/*
   2276 		 * Destination fs exists.  Therefore this should either
   2277 		 * be an incremental, or the stream specifies a new fs
   2278 		 * (full stream or clone) and they want us to blow it
   2279 		 * away (and have therefore specified -F and removed any
   2280 		 * snapshots).
   2281 		 */
   2282 
   2283 		if (stream_wantsnewfs) {
   2284 			if (!flags.force) {
   2285 				zcmd_free_nvlists(&zc);
   2286 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2287 				    "destination '%s' exists\n"
   2288 				    "must specify -F to overwrite it"),
   2289 				    zc.zc_name);
   2290 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
   2291 			}
   2292 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
   2293 			    &zc) == 0) {
   2294 				zcmd_free_nvlists(&zc);
   2295 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2296 				    "destination has snapshots (eg. %s)\n"
   2297 				    "must destroy them to overwrite it"),
   2298 				    zc.zc_name);
   2299 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
   2300 			}
   2301 		}
   2302 
   2303 		if ((zhp = zfs_open(hdl, zc.zc_name,
   2304 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
   2305 			zcmd_free_nvlists(&zc);
   2306 			return (-1);
   2307 		}
   2308 
   2309 		if (stream_wantsnewfs &&
   2310 		    zhp->zfs_dmustats.dds_origin[0]) {
   2311 			zcmd_free_nvlists(&zc);
   2312 			zfs_close(zhp);
   2313 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2314 			    "destination '%s' is a clone\n"
   2315 			    "must destroy it to overwrite it"),
   2316 			    zc.zc_name);
   2317 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
   2318 		}
   2319 
   2320 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
   2321 		    stream_wantsnewfs) {
   2322 			/* We can't do online recv in this case */
   2323 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
   2324 			if (clp == NULL) {
   2325 				zfs_close(zhp);
   2326 				zcmd_free_nvlists(&zc);
   2327 				return (-1);
   2328 			}
   2329 			if (changelist_prefix(clp) != 0) {
   2330 				changelist_free(clp);
   2331 				zfs_close(zhp);
   2332 				zcmd_free_nvlists(&zc);
   2333 				return (-1);
   2334 			}
   2335 		}
   2336 		zfs_close(zhp);
   2337 	} else {
   2338 		/*
   2339 		 * Destination filesystem does not exist.  Therefore we better
   2340 		 * be creating a new filesystem (either from a full backup, or
   2341 		 * a clone).  It would therefore be invalid if the user
   2342 		 * specified only the pool name (i.e. if the destination name
   2343 		 * contained no slash character).
   2344 		 */
   2345 		if (!stream_wantsnewfs ||
   2346 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
   2347 			zcmd_free_nvlists(&zc);
   2348 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2349 			    "destination '%s' does not exist"), zc.zc_name);
   2350 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
   2351 		}
   2352 
   2353 		/*
   2354 		 * Trim off the final dataset component so we perform the
   2355 		 * recvbackup ioctl to the filesystems's parent.
   2356 		 */
   2357 		*cp = '\0';
   2358 
   2359 		if (flags.isprefix && !flags.dryrun &&
   2360 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
   2361 			zcmd_free_nvlists(&zc);
   2362 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
   2363 		}
   2364 
   2365 		newfs = B_TRUE;
   2366 	}
   2367 
   2368 	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
   2369 	zc.zc_cookie = infd;
   2370 	zc.zc_guid = flags.force;
   2371 	if (flags.verbose) {
   2372 		(void) printf("%s %s stream of %s into %s\n",
   2373 		    flags.dryrun ? "would receive" : "receiving",
   2374 		    drrb->drr_fromguid ? "incremental" : "full",
   2375 		    drrb->drr_toname, zc.zc_value);
   2376 		(void) fflush(stdout);
   2377 	}
   2378 
   2379 	if (flags.dryrun) {
   2380 		zcmd_free_nvlists(&zc);
   2381 		return (recv_skip(hdl, infd, flags.byteswap));
   2382 	}
   2383 
   2384 	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
   2385 	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
   2386 
   2387 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
   2388 	ioctl_errno = errno;
   2389 	prop_errflags = (zprop_errflags_t)zc.zc_obj;
   2390 
   2391 	if (err == 0) {
   2392 		nvlist_t *prop_errors;
   2393 		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
   2394 		    zc.zc_nvlist_dst_size, &prop_errors, 0));
   2395 
   2396 		nvpair_t *prop_err = NULL;
   2397 
   2398 		while ((prop_err = nvlist_next_nvpair(prop_errors,
   2399 		    prop_err)) != NULL) {
   2400 			char tbuf[1024];
   2401 			zfs_prop_t prop;
   2402 			int intval;
   2403 
   2404 			prop = zfs_name_to_prop(nvpair_name(prop_err));
   2405 			(void) nvpair_value_int32(prop_err, &intval);
   2406 			if (strcmp(nvpair_name(prop_err),
   2407 			    ZPROP_N_MORE_ERRORS) == 0) {
   2408 				trunc_prop_errs(intval);
   2409 				break;
   2410 			} else {
   2411 				(void) snprintf(tbuf, sizeof (tbuf),
   2412 				    dgettext(TEXT_DOMAIN,
   2413 				    "cannot receive %s property on %s"),
   2414 				    nvpair_name(prop_err), zc.zc_name);
   2415 				zfs_setprop_error(hdl, prop, intval, tbuf);
   2416 			}
   2417 		}
   2418 		nvlist_free(prop_errors);
   2419 	}
   2420 
   2421 	zc.zc_nvlist_dst = 0;
   2422 	zc.zc_nvlist_dst_size = 0;
   2423 	zcmd_free_nvlists(&zc);
   2424 
   2425 	if (err == 0 && snapprops_nvlist) {
   2426 		zfs_cmd_t zc2 = { 0 };
   2427 
   2428 		(void) strcpy(zc2.zc_name, zc.zc_value);
   2429 		zc2.zc_cookie = B_TRUE; /* received */
   2430 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
   2431 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
   2432 			zcmd_free_nvlists(&zc2);
   2433 		}
   2434 	}
   2435 
   2436 	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
   2437 		/*
   2438 		 * It may be that this snapshot already exists,
   2439 		 * in which case we want to consume & ignore it
   2440 		 * rather than failing.
   2441 		 */
   2442 		avl_tree_t *local_avl;
   2443 		nvlist_t *local_nv, *fs;
   2444 		char *cp = strchr(zc.zc_value, '@');
   2445 
   2446 		/*
   2447 		 * XXX Do this faster by just iterating over snaps in
   2448 		 * this fs.  Also if zc_value does not exist, we will
   2449 		 * get a strange "does not exist" error message.
   2450 		 */
   2451 		*cp = '\0';
   2452 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
   2453 		    &local_nv, &local_avl) == 0) {
   2454 			*cp = '@';
   2455 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
   2456 			fsavl_destroy(local_avl);
   2457 			nvlist_free(local_nv);
   2458 
   2459 			if (fs != NULL) {
   2460 				if (flags.verbose) {
   2461 					(void) printf("snap %s already exists; "
   2462 					    "ignoring\n", zc.zc_value);
   2463 				}
   2464 				err = ioctl_err = recv_skip(hdl, infd,
   2465 				    flags.byteswap);
   2466 			}
   2467 		}
   2468 		*cp = '@';
   2469 	}
   2470 
   2471 	if (ioctl_err != 0) {
   2472 		switch (ioctl_errno) {
   2473 		case ENODEV:
   2474 			cp = strchr(zc.zc_value, '@');
   2475 			*cp = '\0';
   2476 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2477 			    "most recent snapshot of %s does not\n"
   2478 			    "match incremental source"), zc.zc_value);
   2479 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
   2480 			*cp = '@';
   2481 			break;
   2482 		case ETXTBSY:
   2483 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2484 			    "destination %s has been modified\n"
   2485 			    "since most recent snapshot"), zc.zc_name);
   2486 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
   2487 			break;
   2488 		case EEXIST:
   2489 			cp = strchr(zc.zc_value, '@');
   2490 			if (newfs) {
   2491 				/* it's the containing fs that exists */
   2492 				*cp = '\0';
   2493 			}
   2494 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2495 			    "destination already exists"));
   2496 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
   2497 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
   2498 			    zc.zc_value);
   2499 			*cp = '@';
   2500 			break;
   2501 		case EINVAL:
   2502 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
   2503 			break;
   2504 		case ECKSUM:
   2505 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2506 			    "invalid stream (checksum mismatch)"));
   2507 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
   2508 			break;
   2509 		default:
   2510 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
   2511 		}
   2512 	}
   2513 
   2514 	/*
   2515 	 * Mount the target filesystem (if created).  Also mount any
   2516 	 * children of the target filesystem if we did a replication
   2517 	 * receive (indicated by stream_avl being non-NULL).
   2518 	 */
   2519 	cp = strchr(zc.zc_value, '@');
   2520 	if (cp && (ioctl_err == 0 || !newfs)) {
   2521 		zfs_handle_t *h;
   2522 
   2523 		*cp = '\0';
   2524 		h = zfs_open(hdl, zc.zc_value,
   2525 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
   2526 		if (h != NULL) {
   2527 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
   2528 				*cp = '@';
   2529 			} else if (newfs || stream_avl) {
   2530 				/*
   2531 				 * Track the first/top of hierarchy fs,
   2532 				 * for mounting and sharing later.
   2533 				 */
   2534 				if (top_zfs && *top_zfs == NULL)
   2535 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
   2536 			}
   2537 			zfs_close(h);
   2538 		}
   2539 		*cp = '@';
   2540 	}
   2541 
   2542 	if (clp) {
   2543 		err |= changelist_postfix(clp);
   2544 		changelist_free(clp);
   2545 	}
   2546 
   2547 	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
   2548 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
   2549 		    "failed to clear unreceived properties on %s"),
   2550 		    zc.zc_name);
   2551 		(void) fprintf(stderr, "\n");
   2552 	}
   2553 	if (prop_errflags & ZPROP_ERR_NORESTORE) {
   2554 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
   2555 		    "failed to restore original properties on %s"),
   2556 		    zc.zc_name);
   2557 		(void) fprintf(stderr, "\n");
   2558 	}
   2559 
   2560 	if (err || ioctl_err)
   2561 		return (-1);
   2562 
   2563 	if (flags.verbose) {
   2564 		char buf1[64];
   2565 		char buf2[64];
   2566 		uint64_t bytes = zc.zc_cookie;
   2567 		time_t delta = time(NULL) - begin_time;
   2568 		if (delta == 0)
   2569 			delta = 1;
   2570 		zfs_nicenum(bytes, buf1, sizeof (buf1));
   2571 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
   2572 
   2573 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
   2574 		    buf1, delta, buf2);
   2575 	}
   2576 
   2577 	return (0);
   2578 }
   2579 
   2580 static int
   2581 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
   2582     int infd, avl_tree_t *stream_avl, char **top_zfs)
   2583 {
   2584 	int err;
   2585 	dmu_replay_record_t drr, drr_noswap;
   2586 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
   2587 	char errbuf[1024];
   2588 	zio_cksum_t zcksum = { 0 };
   2589 	uint64_t featureflags;
   2590 	int hdrtype;
   2591 
   2592 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
   2593 	    "cannot receive"));
   2594 
   2595 	if (flags.isprefix &&
   2596 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
   2597 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
   2598 		    "(%s) does not exist"), tosnap);
   2599 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
   2600 	}
   2601 
   2602 	/* read in the BEGIN record */
   2603 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
   2604 	    &zcksum)))
   2605 		return (err);
   2606 
   2607 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
   2608 		/* It's the double end record at the end of a package */
   2609 		return (ENODATA);
   2610 	}
   2611 
   2612 	/* the kernel needs the non-byteswapped begin record */
   2613 	drr_noswap = drr;
   2614 
   2615 	flags.byteswap = B_FALSE;
   2616 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
   2617 		/*
   2618 		 * We computed the checksum in the wrong byteorder in
   2619 		 * recv_read() above; do it again correctly.
   2620 		 */
   2621 		bzero(&zcksum, sizeof (zio_cksum_t));
   2622 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
   2623 		flags.byteswap = B_TRUE;
   2624 
   2625 		drr.drr_type = BSWAP_32(drr.drr_type);
   2626 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
   2627 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
   2628 		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
   2629 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
   2630 		drrb->drr_type = BSWAP_32(drrb->drr_type);
   2631 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
   2632 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
   2633 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
   2634 	}
   2635 
   2636 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
   2637 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
   2638 		    "stream (bad magic number)"));
   2639 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   2640 	}
   2641 
   2642 	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
   2643 	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
   2644 
   2645 	if (!DMU_STREAM_SUPPORTED(featureflags) ||
   2646 	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
   2647 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
   2648 		    "stream has unsupported feature, feature flags = %lx"),
   2649 		    featureflags);
   2650 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   2651 	}
   2652 
   2653 	if (strchr(drrb->drr_toname, '@') == NULL) {
   2654 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
   2655 		    "stream (bad snapshot name)"));
   2656 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
   2657 	}
   2658 
   2659 	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
   2660 		return (zfs_receive_one(hdl, infd, tosnap, flags,
   2661 		    &drr, &drr_noswap, stream_avl, top_zfs));
   2662 	} else {  /* must be DMU_COMPOUNDSTREAM */
   2663 		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
   2664 		    DMU_COMPOUNDSTREAM);
   2665 		return (zfs_receive_package(hdl, infd, tosnap, flags,
   2666 		    &drr, &zcksum, top_zfs));
   2667 	}
   2668 }
   2669 
   2670 /*
   2671  * Restores a backup of tosnap from the file descriptor specified by infd.
   2672  * Return 0 on total success, -2 if some things couldn't be
   2673  * destroyed/renamed/promoted, -1 if some things couldn't be received.
   2674  * (-1 will override -2).
   2675  */
   2676 int
   2677 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
   2678     int infd, avl_tree_t *stream_avl)
   2679 {
   2680 	char *top_zfs = NULL;
   2681 	int err;
   2682 
   2683 	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
   2684 
   2685 	if (err == 0 && !flags.nomount && top_zfs) {
   2686 		zfs_handle_t *zhp;
   2687 		prop_changelist_t *clp;
   2688 
   2689 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
   2690 		if (zhp != NULL) {
   2691 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
   2692 			    CL_GATHER_MOUNT_ALWAYS, 0);
   2693 			zfs_close(zhp);
   2694 			if (clp != NULL) {
   2695 				/* mount and share received datasets */
   2696 				err = changelist_postfix(clp);
   2697 				changelist_free(clp);
   2698 			}
   2699 		}
   2700 		if (zhp == NULL || clp == NULL || err)
   2701 			err = -1;
   2702 	}
   2703 	if (top_zfs)
   2704 		free(top_zfs);
   2705 
   2706 	return (err);
   2707 }
   2708