Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  *
     21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     22  * Use is subject to license terms.
     23  */
     24 
     25 #include <sys/systm.h>
     26 
     27 #include <nfs/nfs.h>
     28 #include <nfs/export.h>
     29 #include <sys/cmn_err.h>
     30 
     31 #define	PSEUDOFS_SUFFIX		" (pseudo)"
     32 
     33 /*
     34  * A version of VOP_FID that deals with a remote VOP_FID for nfs.
     35  * If vp is an nfs node, nfs4_fid() returns EREMOTE, nfs3_fid() and nfs_fid()
     36  * returns the filehandle of vp as its fid. When nfs uses fid to set the
     37  * exportinfo filehandle template, a remote nfs filehandle would be too big for
     38  * the fid of the exported directory. This routine remaps the value of the
     39  * attribute va_nodeid of vp to be the fid of vp, so that the fid can fit.
     40  *
     41  * We need this fid mainly for setting up NFSv4 server namespace where an
     42  * nfs filesystem is also part of it. Thus, need to be able to setup a pseudo
     43  * exportinfo for an nfs node.
     44  *
     45  * e.g. mount a filesystem on top of a nfs dir, and then share the new mount
     46  *      (like exporting a local disk from a "diskless" client)
     47  */
     48 int
     49 vop_fid_pseudo(vnode_t *vp, fid_t *fidp)
     50 {
     51 	struct vattr va;
     52 	int error;
     53 
     54 	error = VOP_FID(vp, fidp, NULL);
     55 
     56 	/*
     57 	 * XXX nfs4_fid() does nothing and returns EREMOTE.
     58 	 * XXX nfs3_fid()/nfs_fid() returns nfs filehandle as its fid
     59 	 * which has a bigger length than local fid.
     60 	 * NFS_FH4MAXDATA is the size of
     61 	 * fhandle4_t.fh_xdata[NFS_FH4MAXDATA].
     62 	 *
     63 	 * Note: nfs[2,3,4]_fid() only gets called for diskless clients.
     64 	 */
     65 	if (error == EREMOTE ||
     66 	    (error == 0 && fidp->fid_len > NFS_FH4MAXDATA)) {
     67 
     68 		va.va_mask = AT_NODEID;
     69 		error = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
     70 		if (error)
     71 			return (error);
     72 
     73 		fidp->fid_len = sizeof (va.va_nodeid);
     74 		bcopy(&va.va_nodeid, fidp->fid_data, fidp->fid_len);
     75 		return (0);
     76 	}
     77 
     78 	return (error);
     79 }
     80 
     81 /*
     82  * Get an nfsv4 vnode of the given fid from the visible list of an
     83  * nfs filesystem or get the exi_vp if it is the root node.
     84  */
     85 int
     86 nfs4_vget_pseudo(struct exportinfo *exi, vnode_t **vpp, fid_t *fidp)
     87 {
     88 	fid_t exp_fid;
     89 	struct exp_visible *visp;
     90 	int error;
     91 
     92 	/* check if the given fid is in the visible list */
     93 
     94 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
     95 		if (EQFID(fidp, &visp->vis_fid)) {
     96 			VN_HOLD(visp->vis_vp);
     97 			*vpp = visp->vis_vp;
     98 			return (0);
     99 		}
    100 	}
    101 
    102 	/* check if the given fid is the same as the exported node */
    103 
    104 	bzero(&exp_fid, sizeof (exp_fid));
    105 	exp_fid.fid_len = MAXFIDSZ;
    106 	error = vop_fid_pseudo(exi->exi_vp, &exp_fid);
    107 	if (error)
    108 		return (error);
    109 
    110 	if (EQFID(fidp, &exp_fid)) {
    111 		VN_HOLD(exi->exi_vp);
    112 		*vpp = exi->exi_vp;
    113 		return (0);
    114 	}
    115 
    116 	return (ENOENT);
    117 }
    118 
    119 /*
    120  * Create a pseudo export entry
    121  *
    122  * This is an export entry that's created as the
    123  * side-effect of a "real" export.  As a part of
    124  * a real export, the pathname to the export is
    125  * checked to see if all the directory components
    126  * are accessible via an NFSv4 client, i.e. are
    127  * exported.  If treeclimb_export() finds an unexported
    128  * mountpoint along the path, then it calls this
    129  * function to export it.
    130  *
    131  * This pseudo export differs from a real export in that
    132  * it only allows read-only access.  A "visible" list of
    133  * directories is added to filter lookup and readdir results
    134  * to only contain dirnames which lead to descendant shares.
    135  *
    136  * A visible list has a per-file-system scope.  Any exportinfo
    137  * struct (real or pseudo) can have a visible list as long as
    138  * a) its export root is VROOT
    139  * b) a descendant of the export root is shared
    140  */
    141 int
    142 pseudo_exportfs(vnode_t *vp, struct exp_visible *vis_head,
    143 	    struct exportdata *exdata, struct exportinfo **exi_retp)
    144 {
    145 	struct exportinfo *exi;
    146 	struct exportdata *kex;
    147 	fid_t fid;
    148 	fsid_t fsid;
    149 	int error, vpathlen;
    150 
    151 	ASSERT(RW_WRITE_HELD(&exported_lock));
    152 
    153 	/*
    154 	 * Get the vfs id
    155 	 */
    156 	bzero(&fid, sizeof (fid));
    157 	fid.fid_len = MAXFIDSZ;
    158 	error = vop_fid_pseudo(vp, &fid);
    159 	if (error) {
    160 		/*
    161 		 * If VOP_FID returns ENOSPC then the fid supplied
    162 		 * is too small.  For now we simply return EREMOTE.
    163 		 */
    164 		if (error == ENOSPC)
    165 			error = EREMOTE;
    166 		return (error);
    167 	}
    168 
    169 	fsid = vp->v_vfsp->vfs_fsid;
    170 	exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
    171 	exi->exi_fsid = fsid;
    172 	exi->exi_fid = fid;
    173 	exi->exi_vp = vp;
    174 	VN_HOLD(exi->exi_vp);
    175 	exi->exi_visible = vis_head;
    176 	exi->exi_count = 1;
    177 	exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
    178 	    VSW_VOLATILEDEV) ? 1 : 0;
    179 	mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
    180 
    181 	/*
    182 	 * Build up the template fhandle
    183 	 */
    184 	exi->exi_fh.fh_fsid = fsid;
    185 	ASSERT(exi->exi_fid.fid_len <= sizeof (exi->exi_fh.fh_xdata));
    186 	exi->exi_fh.fh_xlen = exi->exi_fid.fid_len;
    187 	bcopy(exi->exi_fid.fid_data, exi->exi_fh.fh_xdata,
    188 	    exi->exi_fid.fid_len);
    189 	exi->exi_fh.fh_len = sizeof (exi->exi_fh.fh_data);
    190 
    191 	kex = &exi->exi_export;
    192 	kex->ex_flags = EX_PSEUDO;
    193 
    194 	vpathlen = vp->v_path ? strlen(vp->v_path) : 0;
    195 	kex->ex_pathlen = vpathlen + strlen(PSEUDOFS_SUFFIX);
    196 	kex->ex_path = kmem_alloc(kex->ex_pathlen + 1, KM_SLEEP);
    197 
    198 	if (vpathlen)
    199 		(void) strcpy(kex->ex_path, vp->v_path);
    200 	(void) strcpy(kex->ex_path + vpathlen, PSEUDOFS_SUFFIX);
    201 
    202 	/* Transfer the secinfo data from exdata to this new pseudo node */
    203 	if (exdata)
    204 		srv_secinfo_exp2pseu(&exi->exi_export, exdata);
    205 
    206 	/*
    207 	 * Initialize auth cache lock
    208 	 */
    209 	rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
    210 
    211 	/*
    212 	 * Insert the new entry at the front of the export list
    213 	 */
    214 	export_link(exi);
    215 
    216 	/*
    217 	 * If exi_retp is non-NULL return a pointer to the new
    218 	 * exportinfo structure.
    219 	 */
    220 	if (exi_retp)
    221 		*exi_retp = exi;
    222 
    223 	return (0);
    224 }
    225 
    226 /*
    227  * Free a list of visible directories
    228  */
    229 void
    230 free_visible(struct exp_visible *head)
    231 {
    232 	struct exp_visible *visp, *next;
    233 
    234 	for (visp = head; visp; visp = next) {
    235 		if (visp->vis_vp != NULL)
    236 			VN_RELE(visp->vis_vp);
    237 
    238 		next = visp->vis_next;
    239 		srv_secinfo_list_free(visp->vis_secinfo, visp->vis_seccnt);
    240 		kmem_free(visp, sizeof (*visp));
    241 	}
    242 }
    243 
    244 /*
    245  * Connects newchild (or subtree with newchild in head)
    246  * to the parent node. We always add it to the beginning
    247  * of sibling list.
    248  */
    249 static void
    250 tree_add_child(treenode_t *parent, treenode_t *newchild)
    251 {
    252 	newchild->tree_parent = parent;
    253 	newchild->tree_sibling = parent->tree_child_first;
    254 	parent->tree_child_first = newchild;
    255 }
    256 
    257 /*
    258  * Add new node to the head of subtree pointed by 'n'. n can be NULL.
    259  * Interconnects the new treenode with exp_visible and exportinfo
    260  * if needed.
    261  */
    262 static treenode_t *
    263 tree_prepend_node(treenode_t *n, exp_visible_t *v, exportinfo_t *e)
    264 {
    265 	treenode_t *tnode = kmem_zalloc(sizeof (*tnode), KM_SLEEP);
    266 
    267 	if (n) {
    268 		tnode->tree_child_first = n;
    269 		n->tree_parent = tnode;
    270 	}
    271 	if (v) {
    272 		tnode->tree_vis = v;
    273 		v->vis_tree = tnode;
    274 	}
    275 	if (e) {
    276 		tnode->tree_exi = e;
    277 		e->exi_tree = tnode;
    278 	}
    279 	return (tnode);
    280 }
    281 
    282 /*
    283  * Removes node from the tree and frees the treenode struct.
    284  * Does not free structures pointed by tree_exi and tree_vis,
    285  * they should be already freed.
    286  */
    287 static void
    288 tree_remove_node(treenode_t *node)
    289 {
    290 	treenode_t *parent = node->tree_parent;
    291 	treenode_t *s; /* s for sibling */
    292 
    293 	if (parent == NULL) {
    294 		kmem_free(node, sizeof (*node));
    295 		ns_root = NULL;
    296 		return;
    297 	}
    298 	/* This node is first child */
    299 	if (parent->tree_child_first == node) {
    300 		parent->tree_child_first = node->tree_sibling;
    301 	/* This node is not first child */
    302 	} else {
    303 		s = parent->tree_child_first;
    304 		while (s->tree_sibling != node)
    305 			s = s->tree_sibling;
    306 		s->tree_sibling = s->tree_sibling->tree_sibling;
    307 	}
    308 	kmem_free(node, sizeof (*node));
    309 }
    310 
    311 /*
    312  * Add a list of visible directories to a pseudo exportfs.
    313  *
    314  * When we export a new directory we need to add a new
    315  * path segment through the pseudofs to reach the new
    316  * directory. This new path is reflected in a list of
    317  * directories added to the "visible" list.
    318  *
    319  * Here there are two lists of visible fids: one hanging off the
    320  * pseudo exportinfo, and the one we want to add.  It's possible
    321  * that the two lists share a common path segment
    322  * and have some common directories.  We need to combine
    323  * the lists so there's no duplicate entries. Where a common
    324  * path component is found, the vis_count field is bumped.
    325  *
    326  * When the addition is complete, the supplied list is freed.
    327  */
    328 
    329 static void
    330 more_visible(struct exportinfo *exi, struct exp_visible *vis_head)
    331 {
    332 	struct exp_visible *vp1, *vp2;
    333 	struct exp_visible *tail, *new;
    334 	treenode_t *subtree_head, *dupl, *dest;
    335 	int found;
    336 
    337 	dest = exi->exi_tree;
    338 	subtree_head = vis_head->vis_tree;
    339 
    340 	/*
    341 	 * If exportinfo doesn't already have a visible
    342 	 * list just assign the entire supplied list.
    343 	 */
    344 	if (exi->exi_visible == NULL) {
    345 		exi->exi_visible = vis_head;
    346 		tree_add_child(dest, subtree_head);
    347 		return;
    348 	}
    349 
    350 	/*
    351 	 * The outer loop traverses the supplied list.
    352 	 */
    353 	for (vp1 = vis_head; vp1; vp1 = vp1->vis_next) {
    354 
    355 		/*
    356 		 * Given an element from the list to be added,
    357 		 * search the exportinfo visible list looking for a match.
    358 		 * If a match is found, increment the reference count.
    359 		 */
    360 		found = 0;
    361 
    362 		for (vp2 = exi->exi_visible; vp2; vp2 = vp2->vis_next) {
    363 
    364 			tail = vp2;
    365 
    366 			if (EQFID(&vp1->vis_fid, &vp2->vis_fid)) {
    367 				found = 1;
    368 				vp2->vis_count++;
    369 				VN_RELE(vp1->vis_vp);
    370 				vp1->vis_vp = NULL;
    371 
    372 				/*
    373 				 * If the visible struct we want to add
    374 				 * (vp1) has vis_exported set to 1, then
    375 				 * the matching visible struct we just found
    376 				 * must also have it's vis_exported field
    377 				 * set to 1.
    378 				 *
    379 				 * For example, if /export/home was shared
    380 				 * (and a mountpoint), then "export" and
    381 				 * "home" would each have visible structs in
    382 				 * the root pseudo exportinfo. The vis_exported
    383 				 * for home would be 1, and vis_exported for
    384 				 * export would be 0.  Now, if /export was
    385 				 * also shared, more_visible would find the
    386 				 * existing visible struct for export, and
    387 				 * see that vis_exported was 0.  The code
    388 				 * below will set it to 1.
    389 				 *
    390 				 * vp1 is from vis list passed in (vis_head)
    391 				 * vp2 is from vis list on pseudo exportinfo
    392 				 */
    393 				if (vp1->vis_exported && !vp2->vis_exported)
    394 					vp2->vis_exported = 1;
    395 				/*
    396 				 * Assuming that visibles in vis_head are sorted
    397 				 * in same order as they appear in the shared
    398 				 * path. If /a/b/c/d is being shared we will
    399 				 * see 'a' before 'b' etc.
    400 				 */
    401 				dupl = vp1->vis_tree;
    402 				dest = vp2->vis_tree;
    403 				/* If node is shared, transfer exportinfo ptr */
    404 				if (dupl->tree_exi) {
    405 					dest->tree_exi = dupl->tree_exi;
    406 					dest->tree_exi->exi_tree = dest;
    407 				}
    408 				subtree_head = dupl->tree_child_first;
    409 				kmem_free(dupl, sizeof (*dupl));
    410 				break;
    411 			}
    412 		}
    413 
    414 		/* If not found - add to the end of the list */
    415 		if (! found) {
    416 			new = kmem_zalloc(sizeof (*new), KM_SLEEP);
    417 			*new = *vp1;
    418 			tail->vis_next = new;
    419 			new->vis_next = NULL;
    420 			vp1->vis_vp = NULL;
    421 			/* Tell treenode that new visible is kmem_zalloc-ated */
    422 			new->vis_tree->tree_vis = new;
    423 		}
    424 	}
    425 
    426 	/*
    427 	 * Throw away the path list. vis_vp pointers in vis_head list
    428 	 * are either VN_RELEed or reassigned, and are set to NULL.
    429 	 * There is no need to VN_RELE in free_visible for this vis_head.
    430 	 */
    431 	free_visible(vis_head);
    432 	if (subtree_head)
    433 		tree_add_child(dest, subtree_head);
    434 }
    435 
    436 /*
    437  * Remove one visible entry from the pseudo exportfs.
    438  *
    439  * When we unexport a directory, we have to remove path
    440  * components from the visible list in the pseudo exportfs
    441  * entry. The supplied visible contains one fid of one path
    442  * component. The visible list of the export
    443  * is checked against provided visible, matching fid has its
    444  * reference count decremented.  If a reference count drops to
    445  * zero, then it means no paths now use this directory, so its
    446  * fid can be removed from the visible list.
    447  *
    448  * When the last path is removed, the visible list will be null.
    449  */
    450 static void
    451 less_visible(struct exportinfo *exi, struct exp_visible *vp1)
    452 {
    453 	struct exp_visible *vp2;
    454 	struct exp_visible *prev, *next;
    455 
    456 	for (vp2 = exi->exi_visible, prev = NULL; vp2; vp2 = next) {
    457 
    458 		next = vp2->vis_next;
    459 
    460 		if (EQFID(&vp1->vis_fid, &vp2->vis_fid)) {
    461 			/*
    462 			 * Decrement the ref count.
    463 			 * Remove the entry if it's zero.
    464 			 */
    465 			if (--vp2->vis_count <= 0) {
    466 				if (prev == NULL)
    467 					exi->exi_visible = next;
    468 				else
    469 					prev->vis_next = next;
    470 				VN_RELE(vp2->vis_vp);
    471 				srv_secinfo_list_free(vp2->vis_secinfo,
    472 				    vp2->vis_seccnt);
    473 				kmem_free(vp2, sizeof (*vp1));
    474 			} else {
    475 				/*
    476 				 * If we're here, then the vp2 will
    477 				 * remain in the vis list.  If the
    478 				 * vis entry corresponds to the object
    479 				 * being unshared, then vis_exported
    480 				 * needs to be set to 0.
    481 				 *
    482 				 * vp1 is a node from caller's list
    483 				 * vp2 is node from exportinfo's list
    484 				 *
    485 				 * Only 1 node in the caller's list
    486 				 * will have vis_exported set to 1,
    487 				 * and it corresponds to the obj being
    488 				 * unshared.  It should always be the
    489 				 * last element of the caller's list.
    490 				 */
    491 				if (vp1->vis_exported &&
    492 				    vp2->vis_exported) {
    493 					vp2->vis_exported = 0;
    494 				}
    495 			}
    496 
    497 			break;
    498 		}
    499 		prev = vp2;
    500 	}
    501 }
    502 
    503 /*
    504  * This function checks the path to a new export to
    505  * check whether all the pathname components are
    506  * exported. It works by climbing the file tree one
    507  * component at a time via "..", crossing mountpoints
    508  * if necessary until an export entry is found, or the
    509  * system root is reached.
    510  *
    511  * If an unexported mountpoint is found, then
    512  * a new pseudo export is added and the pathname from
    513  * the mountpoint down to the export is added to the
    514  * visible list for the new pseudo export.  If an existing
    515  * pseudo export is found, then the pathname is added
    516  * to its visible list.
    517  *
    518  * Note that there's some tests for exportdir.
    519  * The exportinfo entry that's passed as a parameter
    520  * is that of the real export and exportdir is set
    521  * for this case.
    522  *
    523  * Here is an example of a possible setup:
    524  *
    525  * () - a new fs; fs mount point
    526  * EXPORT - a real exported node
    527  * PSEUDO - a pseudo node
    528  * vis - visible list
    529  * f# - security flavor#
    530  * (f#) - security flavor# propagated from its descendents
    531  * "" - covered vnode
    532  *
    533  *
    534  *                 /
    535  *                 |
    536  *                 (a) PSEUDO (f1,f2)
    537  *                 |   vis: b,b,"c","n"
    538  *                 |
    539  *                 b
    540  *        ---------|------------------
    541  *        |                          |
    542  *        (c) EXPORT,f1(f2)          (n) PSEUDO (f1,f2)
    543  *        |   vis: "e","d"           |   vis: m,m,,p,q,"o"
    544  *        |                          |
    545  *  ------------------          -------------------
    546  *  |        |        |         |                  |
    547  *  (d)      (e)      f         m EXPORT,f1(f2)    p
    548  *  EXPORT   EXPORT             |                  |
    549  *  f1       f2                 |                  |
    550  *           |                  |                  |
    551  *           j                 (o) EXPORT,f2       q EXPORT f2
    552  *
    553  */
    554 int
    555 treeclimb_export(struct exportinfo *exip)
    556 {
    557 	vnode_t *dvp, *vp;
    558 	fid_t fid;
    559 	int error;
    560 	int exportdir;
    561 	struct exportinfo *exi = NULL;
    562 	struct exportinfo *new_exi = exip;
    563 	struct exp_visible *visp;
    564 	struct exp_visible *vis_head = NULL;
    565 	struct vattr va;
    566 	treenode_t *tree_head = NULL;
    567 
    568 	ASSERT(RW_WRITE_HELD(&exported_lock));
    569 
    570 	vp = exip->exi_vp;
    571 	VN_HOLD(vp);
    572 	exportdir = 1;
    573 
    574 	for (;;) {
    575 
    576 		bzero(&fid, sizeof (fid));
    577 		fid.fid_len = MAXFIDSZ;
    578 		error = vop_fid_pseudo(vp, &fid);
    579 		if (error)
    580 			break;
    581 
    582 		if (! exportdir) {
    583 			/*
    584 			 * Check if this exportroot is a VROOT dir.  If so,
    585 			 * then attach the pseudonodes.  If not, then
    586 			 * continue .. traversal until we hit a VROOT
    587 			 * export (pseudo or real).
    588 			 */
    589 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
    590 			if (exi != NULL && vp->v_flag & VROOT) {
    591 				/*
    592 				 * Found an export info
    593 				 *
    594 				 * Extend the list of visible
    595 				 * directories whether it's a pseudo
    596 				 * or a real export.
    597 				 */
    598 				more_visible(exi, vis_head);
    599 				break;	/* and climb no further */
    600 			}
    601 		}
    602 
    603 		/*
    604 		 * If at the root of the filesystem, need
    605 		 * to traverse across the mountpoint
    606 		 * and continue the climb on the mounted-on
    607 		 * filesystem.
    608 		 */
    609 		if (vp->v_flag & VROOT) {
    610 
    611 			if (! exportdir) {
    612 				/*
    613 				 * Found the root directory of a filesystem
    614 				 * that isn't exported.  Need to export
    615 				 * this as a pseudo export so that an NFS v4
    616 				 * client can do lookups in it.
    617 				 */
    618 				error = pseudo_exportfs(vp, vis_head, NULL,
    619 				    &new_exi);
    620 				if (error)
    621 					break;
    622 				vis_head = NULL;
    623 			}
    624 
    625 			if (VN_CMP(vp, rootdir)) {
    626 				/* at system root */
    627 				/*
    628 				 * If sharing "/", new_exi is shared exportinfo
    629 				 * (exip). Otherwise, new_exi is exportinfo
    630 				 * created in pseudo_exportfs() above.
    631 				 */
    632 				ns_root = tree_prepend_node(tree_head, 0,
    633 				    new_exi);
    634 				break;
    635 			}
    636 
    637 			vp = untraverse(vp);
    638 			exportdir = 0;
    639 			continue;
    640 		}
    641 
    642 		/*
    643 		 * Do a getattr to obtain the nodeid (inode num)
    644 		 * for this vnode.
    645 		 */
    646 		va.va_mask = AT_NODEID;
    647 		error = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
    648 		if (error)
    649 			break;
    650 
    651 		/*
    652 		 *  Add this directory fid to visible list
    653 		 */
    654 		visp = kmem_alloc(sizeof (*visp), KM_SLEEP);
    655 		VN_HOLD(vp);
    656 		visp->vis_vp = vp;
    657 		visp->vis_fid = fid;		/* structure copy */
    658 		visp->vis_ino = va.va_nodeid;
    659 		visp->vis_count = 1;
    660 		visp->vis_exported = exportdir;
    661 		visp->vis_secinfo = NULL;
    662 		visp->vis_seccnt = 0;
    663 		visp->vis_next = vis_head;
    664 		vis_head = visp;
    665 
    666 
    667 		/*
    668 		 * Will set treenode's pointer to exportinfo to
    669 		 * 1. shared exportinfo (exip) - if first visit here
    670 		 * 2. freshly allocated pseudo export (if any)
    671 		 * 3. null otherwise
    672 		 */
    673 		tree_head = tree_prepend_node(tree_head, visp, new_exi);
    674 		new_exi = NULL;
    675 
    676 		/*
    677 		 * Now, do a ".." to find parent dir of vp.
    678 		 */
    679 		error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, CRED(),
    680 		    NULL, NULL, NULL);
    681 
    682 		if (error == ENOTDIR && exportdir) {
    683 			dvp = exip->exi_dvp;
    684 			ASSERT(dvp != NULL);
    685 			VN_HOLD(dvp);
    686 			error = 0;
    687 		}
    688 
    689 		if (error)
    690 			break;
    691 
    692 		exportdir = 0;
    693 		VN_RELE(vp);
    694 		vp = dvp;
    695 	}
    696 
    697 	VN_RELE(vp);
    698 
    699 	/*
    700 	 * We can have set error due to error in:
    701 	 * 1. vop_fid_pseudo()
    702 	 * 2. pseudo_exportfs() which can fail only in vop_fid_pseudo()
    703 	 * 3. VOP_GETATTR()
    704 	 * 4. VOP_LOOKUP()
    705 	 * We must free pseudo exportinfos, visibles and treenodes.
    706 	 * Visibles are referenced from treenode_t::tree_vis and
    707 	 * exportinfo_t::exi_visible. To avoid double freeing, only
    708 	 * exi_visible pointer is used, via exi_rele(), for the clean-up.
    709 	 */
    710 	if (error) {
    711 		/* Free unconnected visibles, if there are any. */
    712 		if (vis_head)
    713 			free_visible(vis_head);
    714 
    715 		/* Connect unconnected exportinfo, if there is any. */
    716 		if (new_exi && new_exi != exip)
    717 			tree_head = tree_prepend_node(tree_head, 0, new_exi);
    718 
    719 		while (tree_head) {
    720 			treenode_t *t2 = tree_head;
    721 			exportinfo_t *e  = tree_head->tree_exi;
    722 			/* exip will be freed in exportfs() */
    723 			if (e && e != exip) {
    724 				(void) export_unlink(&e->exi_fsid, &e->exi_fid,
    725 				    e->exi_vp, NULL);
    726 				exi_rele(e);
    727 			}
    728 			tree_head = tree_head->tree_child_first;
    729 			kmem_free(t2, sizeof (*t2));
    730 		}
    731 	}
    732 
    733 	return (error);
    734 }
    735 
    736 /*
    737  * Walk up the tree and:
    738  * 1. release pseudo exportinfo if it has no child
    739  * 2. release visible in parent's exportinfo
    740  * 3. delete non-exported leaf nodes from tree
    741  *
    742  * Deleting of nodes will start only if the unshared
    743  * node was a leaf node.
    744  * Deleting of nodes will finish when we reach a node which
    745  * has children or is a real export, then we might still need
    746  * to continue releasing visibles, until we reach VROOT node.
    747  */
    748 void
    749 treeclimb_unexport(struct exportinfo *exip)
    750 {
    751 	struct exportinfo *exi;
    752 	treenode_t *tnode, *old_nd;
    753 
    754 	ASSERT(RW_WRITE_HELD(&exported_lock));
    755 
    756 	tnode = exip->exi_tree;
    757 	/*
    758 	 * The unshared exportinfo was unlinked in unexport().
    759 	 * Zeroing tree_exi ensures that we will skip it.
    760 	 */
    761 	tnode->tree_exi = NULL;
    762 
    763 	while (tnode) {
    764 
    765 		/* Stop at VROOT node which is exported or has child */
    766 		if (TREE_ROOT(tnode) &&
    767 		    (TREE_EXPORTED(tnode) || tnode->tree_child_first))
    768 			break;
    769 
    770 		/* Release pseudo export if it has no child */
    771 		if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) &&
    772 		    tnode->tree_child_first == 0) {
    773 			exi = tnode->tree_exi;
    774 			(void) export_unlink(&exi->exi_fsid, &exi->exi_fid,
    775 			    exi->exi_vp, NULL);
    776 			exi_rele(tnode->tree_exi);
    777 		}
    778 
    779 		/* Release visible in parent's exportinfo */
    780 		if (tnode->tree_vis) {
    781 			exi = vis2exi(tnode->tree_vis);
    782 			less_visible(exi, tnode->tree_vis);
    783 		}
    784 
    785 		/* Continue with parent */
    786 		old_nd = tnode;
    787 		tnode = tnode->tree_parent;
    788 
    789 		/* Remove itself, if this is a leaf and non-exported node */
    790 		if (old_nd->tree_child_first == NULL && !TREE_EXPORTED(old_nd))
    791 			tree_remove_node(old_nd);
    792 	}
    793 }
    794 
    795 /*
    796  * Traverse backward across mountpoint from the
    797  * root vnode of a filesystem to its mounted-on
    798  * vnode.
    799  */
    800 vnode_t *
    801 untraverse(vnode_t *vp)
    802 {
    803 	vnode_t *tvp, *nextvp;
    804 
    805 	tvp = vp;
    806 	for (;;) {
    807 		if (! (tvp->v_flag & VROOT))
    808 			break;
    809 
    810 		/* lock vfs to prevent unmount of this vfs */
    811 		vfs_lock_wait(tvp->v_vfsp);
    812 
    813 		if ((nextvp = tvp->v_vfsp->vfs_vnodecovered) == NULL) {
    814 			vfs_unlock(tvp->v_vfsp);
    815 			break;
    816 		}
    817 
    818 		/*
    819 		 * Hold nextvp to prevent unmount.  After unlock vfs and
    820 		 * rele tvp, any number of overlays could be unmounted.
    821 		 * Putting a hold on vfs_vnodecovered will only allow
    822 		 * tvp's vfs to be unmounted. Of course if caller placed
    823 		 * extra hold on vp before calling untraverse, the following
    824 		 * hold would not be needed.  Since prev actions of caller
    825 		 * are unknown, we need to hold here just to be safe.
    826 		 */
    827 		VN_HOLD(nextvp);
    828 		vfs_unlock(tvp->v_vfsp);
    829 		VN_RELE(tvp);
    830 		tvp = nextvp;
    831 	}
    832 
    833 	return (tvp);
    834 }
    835 
    836 /*
    837  * Given an exportinfo, climb up to find the exportinfo for the VROOT
    838  * of the filesystem.
    839  *
    840  * e.g.         /
    841  *              |
    842  *              a (VROOT) pseudo-exportinfo
    843  *		|
    844  *		b
    845  *		|
    846  *		c  #share /a/b/c
    847  *		|
    848  *		d
    849  *
    850  * where c is in the same filesystem as a.
    851  * So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a
    852  *
    853  * If d is shared, then c will be put into a's visible list.
    854  * Note: visible list is per filesystem and is attached to the
    855  * VROOT exportinfo.
    856  */
    857 struct exportinfo *
    858 get_root_export(struct exportinfo *exip)
    859 {
    860 	vnode_t *dvp, *vp;
    861 	fid_t fid;
    862 	struct exportinfo *exi = exip;
    863 	int error;
    864 
    865 	vp = exi->exi_vp;
    866 	VN_HOLD(vp);
    867 
    868 	for (;;) {
    869 
    870 		if (vp->v_flag & VROOT) {
    871 			ASSERT(exi != NULL);
    872 			break;
    873 		}
    874 
    875 		/*
    876 		 * Now, do a ".." to find parent dir of vp.
    877 		 */
    878 		error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, CRED(),
    879 		    NULL, NULL, NULL);
    880 
    881 		if (error) {
    882 			exi = NULL;
    883 			break;
    884 		}
    885 
    886 		VN_RELE(vp);
    887 		vp = dvp;
    888 
    889 		bzero(&fid, sizeof (fid));
    890 		fid.fid_len = MAXFIDSZ;
    891 		error = vop_fid_pseudo(vp, &fid);
    892 		if (error) {
    893 			exi = NULL;
    894 			break;
    895 		}
    896 
    897 		exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
    898 	}
    899 
    900 	VN_RELE(vp);
    901 	return (exi);
    902 }
    903 
    904 /*
    905  * Return true if the supplied vnode has a sub-directory exported.
    906  */
    907 int
    908 has_visible(struct exportinfo *exi, vnode_t *vp)
    909 {
    910 	struct exp_visible *visp;
    911 	fid_t fid;
    912 	bool_t vp_is_exported;
    913 
    914 	vp_is_exported = VN_CMP(vp,  exi->exi_vp);
    915 
    916 	/*
    917 	 * An exported root vnode has a sub-dir shared if it has a visible list.
    918 	 * i.e. if it does not have a visible list, then there is no node in
    919 	 * this filesystem leads to any other shared node.
    920 	 */
    921 	if (vp_is_exported && (vp->v_flag & VROOT))
    922 		return (exi->exi_visible ? 1 : 0);
    923 
    924 	/*
    925 	 * Only the exportinfo of a fs root node may have a visible list.
    926 	 * Either it is a pseudo root node, or a real exported root node.
    927 	 */
    928 	if ((exi = get_root_export(exi)) == NULL) {
    929 		return (0);
    930 	}
    931 
    932 	if (!exi->exi_visible)
    933 		return (0);
    934 
    935 	/* Get the fid of the vnode */
    936 	bzero(&fid, sizeof (fid));
    937 	fid.fid_len = MAXFIDSZ;
    938 	if (vop_fid_pseudo(vp, &fid) != 0) {
    939 		return (0);
    940 	}
    941 
    942 	/*
    943 	 * See if vp is in the visible list of the root node exportinfo.
    944 	 */
    945 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
    946 		if (EQFID(&fid, &visp->vis_fid)) {
    947 			/*
    948 			 * If vp is an exported non-root node with only 1 path
    949 			 * count (for itself), it indicates no sub-dir shared
    950 			 * using this vp as a path.
    951 			 */
    952 			if (vp_is_exported && visp->vis_count < 2)
    953 				break;
    954 
    955 			return (1);
    956 		}
    957 	}
    958 
    959 	return (0);
    960 }
    961 
    962 /*
    963  * Returns true if the supplied vnode is visible
    964  * in this export.  If vnode is visible, return
    965  * vis_exported in expseudo.
    966  */
    967 int
    968 nfs_visible(struct exportinfo *exi, vnode_t *vp, int *expseudo)
    969 {
    970 	struct exp_visible *visp;
    971 	fid_t fid;
    972 
    973 	/*
    974 	 * First check to see if vp is export root.
    975 	 *
    976 	 * A pseudo export root can never be exported
    977 	 * (it would be a real export then); however,
    978 	 * it is always visible.  If a pseudo root object
    979 	 * was exported by server admin, then the entire
    980 	 * pseudo exportinfo (and all visible entries) would
    981 	 * be destroyed.  A pseudo exportinfo only exists
    982 	 * to provide access to real (descendant) export(s).
    983 	 *
    984 	 * Previously, rootdir was special cased here; however,
    985 	 * the export root special case handles the rootdir
    986 	 * case also.
    987 	 */
    988 	if (VN_CMP(vp, exi->exi_vp)) {
    989 		*expseudo = 0;
    990 		return (1);
    991 	}
    992 
    993 	/*
    994 	 * Only a PSEUDO node has a visible list or an exported VROOT
    995 	 * node may have a visible list.
    996 	 */
    997 	if (! PSEUDO(exi) && (exi = get_root_export(exi)) == NULL) {
    998 		*expseudo = 0;
    999 		return (0);
   1000 	}
   1001 
   1002 	/* Get the fid of the vnode */
   1003 
   1004 	bzero(&fid, sizeof (fid));
   1005 	fid.fid_len = MAXFIDSZ;
   1006 	if (vop_fid_pseudo(vp, &fid) != 0) {
   1007 		*expseudo = 0;
   1008 		return (0);
   1009 	}
   1010 
   1011 	/*
   1012 	 * We can't trust VN_CMP() above because of LOFS.
   1013 	 * Even though VOP_CMP will do the right thing for LOFS
   1014 	 * objects, VN_CMP will short circuit out early when the
   1015 	 * vnode ops ptrs are different.  Just in case we're dealing
   1016 	 * with LOFS, compare exi_fid/fsid here.
   1017 	 *
   1018 	 * expseudo is not set because this is not an export
   1019 	 */
   1020 	if (EQFID(&exi->exi_fid, &fid) &&
   1021 	    EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid)) {
   1022 		*expseudo = 0;
   1023 		return (1);
   1024 	}
   1025 
   1026 
   1027 	/* See if it matches any fid in the visible list */
   1028 
   1029 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
   1030 		if (EQFID(&fid, &visp->vis_fid)) {
   1031 			*expseudo = visp->vis_exported;
   1032 			return (1);
   1033 		}
   1034 	}
   1035 
   1036 	*expseudo = 0;
   1037 
   1038 	return (0);
   1039 }
   1040 
   1041 /*
   1042  * Returns true if the supplied vnode is the
   1043  * directory of an export point.
   1044  */
   1045 int
   1046 nfs_exported(struct exportinfo *exi, vnode_t *vp)
   1047 {
   1048 	struct exp_visible *visp;
   1049 	fid_t fid;
   1050 
   1051 	/*
   1052 	 * First check to see if vp is the export root
   1053 	 * This check required for the case of lookup ..
   1054 	 * where .. is a V_ROOT vnode and a pseudo exportroot.
   1055 	 * Pseudo export root objects do not have an entry
   1056 	 * in the visible list even though every V_ROOT
   1057 	 * pseudonode is visible.  It is safe to compare
   1058 	 * vp here because pseudo_exportfs put a hold on
   1059 	 * it when exi_vp was initialized.
   1060 	 *
   1061 	 * Note: VN_CMP() won't match for LOFS shares, but they're
   1062 	 * handled below w/EQFID/EQFSID.
   1063 	 */
   1064 	if (VN_CMP(vp, exi->exi_vp))
   1065 		return (1);
   1066 
   1067 	/* Get the fid of the vnode */
   1068 
   1069 	bzero(&fid, sizeof (fid));
   1070 	fid.fid_len = MAXFIDSZ;
   1071 	if (vop_fid_pseudo(vp, &fid) != 0)
   1072 		return (0);
   1073 
   1074 	if (EQFID(&fid, &exi->exi_fid) &&
   1075 	    EQFSID(&vp->v_vfsp->vfs_fsid, &exi->exi_fsid)) {
   1076 		return (1);
   1077 	}
   1078 
   1079 	/* See if it matches any fid in the visible list */
   1080 
   1081 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
   1082 		if (EQFID(&fid, &visp->vis_fid))
   1083 			return (visp->vis_exported);
   1084 	}
   1085 
   1086 	return (0);
   1087 }
   1088 
   1089 /*
   1090  * Returns true if the supplied inode is visible
   1091  * in this export.  This function is used by
   1092  * readdir which uses inode numbers from the
   1093  * directory.
   1094  *
   1095  * NOTE: this code does not match inode number for ".",
   1096  * but it isn't required because NFS4 server rddir
   1097  * skips . and .. entries.
   1098  */
   1099 int
   1100 nfs_visible_inode(struct exportinfo *exi, ino64_t ino, int *expseudo)
   1101 {
   1102 	struct exp_visible *visp;
   1103 
   1104 	/*
   1105 	 * Only a PSEUDO node has a visible list or an exported VROOT
   1106 	 * node may have a visible list.
   1107 	 */
   1108 	if (! PSEUDO(exi) && (exi = get_root_export(exi)) == NULL) {
   1109 		*expseudo = 0;
   1110 		return (0);
   1111 	}
   1112 
   1113 	for (visp = exi->exi_visible; visp; visp = visp->vis_next)
   1114 		if ((u_longlong_t)ino == visp->vis_ino) {
   1115 			*expseudo = visp->vis_exported;
   1116 			return (1);
   1117 		}
   1118 
   1119 	*expseudo = 0;
   1120 	return (0);
   1121 }
   1122