Home | History | Annotate | Download | only in fs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/param.h>
     27 #include <sys/isa_defs.h>
     28 #include <sys/types.h>
     29 #include <sys/sysmacros.h>
     30 #include <sys/cred.h>
     31 #include <sys/systm.h>
     32 #include <sys/errno.h>
     33 #include <sys/fcntl.h>
     34 #include <sys/pathname.h>
     35 #include <sys/stat.h>
     36 #include <sys/vfs.h>
     37 #include <sys/acl.h>
     38 #include <sys/file.h>
     39 #include <sys/sunddi.h>
     40 #include <sys/debug.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/vnode.h>
     43 #include <sys/mode.h>
     44 #include <sys/nvpair.h>
     45 #include <sys/attr.h>
     46 #include <sys/gfs.h>
     47 #include <sys/mutex.h>
     48 #include <fs/fs_subr.h>
     49 #include <sys/kidmap.h>
     50 
     51 typedef struct {
     52 	gfs_file_t	gfs_private;
     53 	xattr_view_t	xattr_view;
     54 } xattr_file_t;
     55 
     56 /* ARGSUSED */
     57 static int
     58 xattr_file_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
     59 {
     60 	xattr_file_t *np = (*vpp)->v_data;
     61 
     62 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (flags & FWRITE))
     63 		return (EACCES);
     64 
     65 	return (0);
     66 }
     67 
     68 /* ARGSUSED */
     69 static int
     70 xattr_file_access(vnode_t *vp, int mode, int flags, cred_t *cr,
     71     caller_context_t *ct)
     72 {
     73 	xattr_file_t *np = vp->v_data;
     74 
     75 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (mode & VWRITE))
     76 		return (EACCES);
     77 
     78 	return (0);
     79 }
     80 
     81 /* ARGSUSED */
     82 static int
     83 xattr_file_close(vnode_t *vp, int flags, int count, offset_t off,
     84     cred_t *cr, caller_context_t *ct)
     85 {
     86 	cleanlocks(vp, ddi_get_pid(), 0);
     87 	cleanshares(vp, ddi_get_pid());
     88 	return (0);
     89 }
     90 
     91 static int
     92 xattr_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
     93 {
     94 	xattr_fid_t	*xfidp;
     95 	vnode_t		*pvp, *savevp;
     96 	int		error;
     97 	uint16_t	orig_len;
     98 
     99 	if (fidp->fid_len < XATTR_FIDSZ) {
    100 		fidp->fid_len = XATTR_FIDSZ;
    101 		return (ENOSPC);
    102 	}
    103 
    104 	savevp = pvp = gfs_file_parent(vp);
    105 	mutex_enter(&savevp->v_lock);
    106 	if (pvp->v_flag & V_XATTRDIR) {
    107 		pvp = gfs_file_parent(pvp);
    108 	}
    109 	mutex_exit(&savevp->v_lock);
    110 
    111 	xfidp = (xattr_fid_t *)fidp;
    112 	orig_len = fidp->fid_len;
    113 	fidp->fid_len = sizeof (xfidp->parent_fid);
    114 
    115 	error = VOP_FID(pvp, fidp, ct);
    116 	if (error) {
    117 		fidp->fid_len = orig_len;
    118 		return (error);
    119 	}
    120 
    121 	xfidp->parent_len = fidp->fid_len;
    122 	fidp->fid_len = XATTR_FIDSZ;
    123 	xfidp->dir_offset = gfs_file_inode(vp);
    124 
    125 	return (0);
    126 }
    127 
    128 /* ARGSUSED */
    129 static int
    130 xattr_fill_nvlist(vnode_t *vp, xattr_view_t xattr_view, nvlist_t *nvlp,
    131     cred_t *cr, caller_context_t *ct)
    132 {
    133 	int error;
    134 	f_attr_t attr;
    135 	uint64_t fsid;
    136 	xvattr_t xvattr;
    137 	xoptattr_t *xoap;	/* Pointer to optional attributes */
    138 	vnode_t *ppvp;
    139 	const char *domain;
    140 	uint32_t rid;
    141 
    142 	xva_init(&xvattr);
    143 
    144 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL)
    145 		return (EINVAL);
    146 
    147 	/*
    148 	 * For detecting ephemeral uid/gid
    149 	 */
    150 	xvattr.xva_vattr.va_mask |= (AT_UID|AT_GID);
    151 
    152 	/*
    153 	 * We need to access the real fs object.
    154 	 * vp points to a GFS file; ppvp points to the real object.
    155 	 */
    156 	ppvp = gfs_file_parent(gfs_file_parent(vp));
    157 
    158 	/*
    159 	 * Iterate through the attrs associated with this view
    160 	 */
    161 
    162 	for (attr = 0; attr < F_ATTR_ALL; attr++) {
    163 		if (xattr_view != attr_to_xattr_view(attr)) {
    164 			continue;
    165 		}
    166 
    167 		switch (attr) {
    168 		case F_SYSTEM:
    169 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
    170 			break;
    171 		case F_READONLY:
    172 			XVA_SET_REQ(&xvattr, XAT_READONLY);
    173 			break;
    174 		case F_HIDDEN:
    175 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
    176 			break;
    177 		case F_ARCHIVE:
    178 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
    179 			break;
    180 		case F_IMMUTABLE:
    181 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
    182 			break;
    183 		case F_APPENDONLY:
    184 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
    185 			break;
    186 		case F_NOUNLINK:
    187 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
    188 			break;
    189 		case F_OPAQUE:
    190 			XVA_SET_REQ(&xvattr, XAT_OPAQUE);
    191 			break;
    192 		case F_NODUMP:
    193 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
    194 			break;
    195 		case F_AV_QUARANTINED:
    196 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
    197 			break;
    198 		case F_AV_MODIFIED:
    199 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
    200 			break;
    201 		case F_AV_SCANSTAMP:
    202 			if (ppvp->v_type == VREG)
    203 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
    204 			break;
    205 		case F_CRTIME:
    206 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
    207 			break;
    208 		case F_FSID:
    209 			fsid = (((uint64_t)vp->v_vfsp->vfs_fsid.val[0] << 32) |
    210 			    (uint64_t)(vp->v_vfsp->vfs_fsid.val[1] &
    211 			    0xffffffff));
    212 			VERIFY(nvlist_add_uint64(nvlp, attr_to_name(attr),
    213 			    fsid) == 0);
    214 			break;
    215 		case F_REPARSE:
    216 			XVA_SET_REQ(&xvattr, XAT_REPARSE);
    217 			break;
    218 		default:
    219 			break;
    220 		}
    221 	}
    222 
    223 	error = VOP_GETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
    224 	if (error)
    225 		return (error);
    226 
    227 	/*
    228 	 * Process all the optional attributes together here.  Notice that
    229 	 * xoap was set when the optional attribute bits were set above.
    230 	 */
    231 	if ((xvattr.xva_vattr.va_mask & AT_XVATTR) && xoap) {
    232 		if (XVA_ISSET_RTN(&xvattr, XAT_READONLY)) {
    233 			VERIFY(nvlist_add_boolean_value(nvlp,
    234 			    attr_to_name(F_READONLY),
    235 			    xoap->xoa_readonly) == 0);
    236 		}
    237 		if (XVA_ISSET_RTN(&xvattr, XAT_HIDDEN)) {
    238 			VERIFY(nvlist_add_boolean_value(nvlp,
    239 			    attr_to_name(F_HIDDEN),
    240 			    xoap->xoa_hidden) == 0);
    241 		}
    242 		if (XVA_ISSET_RTN(&xvattr, XAT_SYSTEM)) {
    243 			VERIFY(nvlist_add_boolean_value(nvlp,
    244 			    attr_to_name(F_SYSTEM),
    245 			    xoap->xoa_system) == 0);
    246 		}
    247 		if (XVA_ISSET_RTN(&xvattr, XAT_ARCHIVE)) {
    248 			VERIFY(nvlist_add_boolean_value(nvlp,
    249 			    attr_to_name(F_ARCHIVE),
    250 			    xoap->xoa_archive) == 0);
    251 		}
    252 		if (XVA_ISSET_RTN(&xvattr, XAT_IMMUTABLE)) {
    253 			VERIFY(nvlist_add_boolean_value(nvlp,
    254 			    attr_to_name(F_IMMUTABLE),
    255 			    xoap->xoa_immutable) == 0);
    256 		}
    257 		if (XVA_ISSET_RTN(&xvattr, XAT_NOUNLINK)) {
    258 			VERIFY(nvlist_add_boolean_value(nvlp,
    259 			    attr_to_name(F_NOUNLINK),
    260 			    xoap->xoa_nounlink) == 0);
    261 		}
    262 		if (XVA_ISSET_RTN(&xvattr, XAT_APPENDONLY)) {
    263 			VERIFY(nvlist_add_boolean_value(nvlp,
    264 			    attr_to_name(F_APPENDONLY),
    265 			    xoap->xoa_appendonly) == 0);
    266 		}
    267 		if (XVA_ISSET_RTN(&xvattr, XAT_NODUMP)) {
    268 			VERIFY(nvlist_add_boolean_value(nvlp,
    269 			    attr_to_name(F_NODUMP),
    270 			    xoap->xoa_nodump) == 0);
    271 		}
    272 		if (XVA_ISSET_RTN(&xvattr, XAT_OPAQUE)) {
    273 			VERIFY(nvlist_add_boolean_value(nvlp,
    274 			    attr_to_name(F_OPAQUE),
    275 			    xoap->xoa_opaque) == 0);
    276 		}
    277 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_QUARANTINED)) {
    278 			VERIFY(nvlist_add_boolean_value(nvlp,
    279 			    attr_to_name(F_AV_QUARANTINED),
    280 			    xoap->xoa_av_quarantined) == 0);
    281 		}
    282 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_MODIFIED)) {
    283 			VERIFY(nvlist_add_boolean_value(nvlp,
    284 			    attr_to_name(F_AV_MODIFIED),
    285 			    xoap->xoa_av_modified) == 0);
    286 		}
    287 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_SCANSTAMP)) {
    288 			VERIFY(nvlist_add_uint8_array(nvlp,
    289 			    attr_to_name(F_AV_SCANSTAMP),
    290 			    xoap->xoa_av_scanstamp,
    291 			    sizeof (xoap->xoa_av_scanstamp)) == 0);
    292 		}
    293 		if (XVA_ISSET_RTN(&xvattr, XAT_CREATETIME)) {
    294 			VERIFY(nvlist_add_uint64_array(nvlp,
    295 			    attr_to_name(F_CRTIME),
    296 			    (uint64_t *)&(xoap->xoa_createtime),
    297 			    sizeof (xoap->xoa_createtime) /
    298 			    sizeof (uint64_t)) == 0);
    299 		}
    300 		if (XVA_ISSET_RTN(&xvattr, XAT_REPARSE)) {
    301 			VERIFY(nvlist_add_boolean_value(nvlp,
    302 			    attr_to_name(F_REPARSE),
    303 			    xoap->xoa_reparse) == 0);
    304 		}
    305 	}
    306 	/*
    307 	 * Check for optional ownersid/groupsid
    308 	 */
    309 
    310 	if (xvattr.xva_vattr.va_uid > MAXUID) {
    311 		nvlist_t *nvl_sid;
    312 
    313 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
    314 			return (ENOMEM);
    315 
    316 		if (kidmap_getsidbyuid(crgetzone(cr), xvattr.xva_vattr.va_uid,
    317 		    &domain, &rid) == 0) {
    318 			VERIFY(nvlist_add_string(nvl_sid,
    319 			    SID_DOMAIN, domain) == 0);
    320 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
    321 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_OWNERSID),
    322 			    nvl_sid) == 0);
    323 		}
    324 		nvlist_free(nvl_sid);
    325 	}
    326 	if (xvattr.xva_vattr.va_gid > MAXUID) {
    327 		nvlist_t *nvl_sid;
    328 
    329 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
    330 			return (ENOMEM);
    331 
    332 		if (kidmap_getsidbygid(crgetzone(cr), xvattr.xva_vattr.va_gid,
    333 		    &domain, &rid) == 0) {
    334 			VERIFY(nvlist_add_string(nvl_sid,
    335 			    SID_DOMAIN, domain) == 0);
    336 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
    337 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_GROUPSID),
    338 			    nvl_sid) == 0);
    339 		}
    340 		nvlist_free(nvl_sid);
    341 	}
    342 
    343 	return (0);
    344 }
    345 
    346 /*
    347  * The size of a sysattr file is the size of the nvlist that will be
    348  * returned by xattr_file_read().  A call to xattr_file_write() could
    349  * change the size of that nvlist.  That size is not stored persistently
    350  * so xattr_fill_nvlist() calls VOP_GETATTR so that it can be calculated.
    351  */
    352 static int
    353 xattr_file_size(vnode_t *vp, xattr_view_t xattr_view, size_t *size,
    354     cred_t *cr, caller_context_t *ct)
    355 {
    356 	nvlist_t *nvl;
    357 
    358 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
    359 		return (ENOMEM);
    360 	}
    361 
    362 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
    363 		nvlist_free(nvl);
    364 		return (EFAULT);
    365 	}
    366 
    367 	VERIFY(nvlist_size(nvl, size, NV_ENCODE_XDR) == 0);
    368 	nvlist_free(nvl);
    369 	return (0);
    370 }
    371 
    372 /* ARGSUSED */
    373 static int
    374 xattr_file_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    375     caller_context_t *ct)
    376 {
    377 	xattr_file_t *np = vp->v_data;
    378 	timestruc_t now;
    379 	size_t size;
    380 	int error;
    381 	vnode_t *pvp;
    382 	vattr_t pvattr;
    383 
    384 	vap->va_type = VREG;
    385 	vap->va_mode = MAKEIMODE(vap->va_type,
    386 	    (np->xattr_view == XATTR_VIEW_READONLY ? 0444 : 0644));
    387 	vap->va_nodeid = gfs_file_inode(vp);
    388 	vap->va_nlink = 1;
    389 	pvp = gfs_file_parent(vp);
    390 	(void) memset(&pvattr, 0, sizeof (pvattr));
    391 	pvattr.va_mask = AT_CTIME|AT_MTIME;
    392 	error = VOP_GETATTR(pvp, &pvattr, flags, cr, ct);
    393 	if (error) {
    394 		return (error);
    395 	}
    396 	vap->va_ctime = pvattr.va_ctime;
    397 	vap->va_mtime = pvattr.va_mtime;
    398 	gethrestime(&now);
    399 	vap->va_atime = now;
    400 	vap->va_uid = 0;
    401 	vap->va_gid = 0;
    402 	vap->va_rdev = 0;
    403 	vap->va_blksize = DEV_BSIZE;
    404 	vap->va_seq = 0;
    405 	vap->va_fsid = vp->v_vfsp->vfs_dev;
    406 	error = xattr_file_size(vp, np->xattr_view, &size, cr, ct);
    407 	vap->va_size = size;
    408 	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
    409 	return (error);
    410 }
    411 
    412 /* ARGSUSED */
    413 static int
    414 xattr_file_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
    415     caller_context_t *ct)
    416 {
    417 	xattr_file_t *np = vp->v_data;
    418 	xattr_view_t xattr_view = np->xattr_view;
    419 	char *buf;
    420 	size_t filesize;
    421 	nvlist_t *nvl;
    422 	int error;
    423 
    424 	/*
    425 	 * Validate file offset and fasttrack empty reads
    426 	 */
    427 	if (uiop->uio_loffset < (offset_t)0)
    428 		return (EINVAL);
    429 
    430 	if (uiop->uio_resid == 0)
    431 		return (0);
    432 
    433 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP))
    434 		return (ENOMEM);
    435 
    436 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
    437 		nvlist_free(nvl);
    438 		return (EFAULT);
    439 	}
    440 
    441 	VERIFY(nvlist_size(nvl, &filesize, NV_ENCODE_XDR) == 0);
    442 
    443 	if (uiop->uio_loffset >= filesize) {
    444 		nvlist_free(nvl);
    445 		return (0);
    446 	}
    447 
    448 	buf = kmem_alloc(filesize, KM_SLEEP);
    449 	VERIFY(nvlist_pack(nvl, &buf, &filesize, NV_ENCODE_XDR,
    450 	    KM_SLEEP) == 0);
    451 
    452 	error = uiomove((caddr_t)buf, filesize, UIO_READ, uiop);
    453 	kmem_free(buf, filesize);
    454 	nvlist_free(nvl);
    455 	return (error);
    456 }
    457 
    458 /* ARGSUSED */
    459 static int
    460 xattr_file_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
    461     caller_context_t *ct)
    462 {
    463 	int error = 0;
    464 	char *buf;
    465 	char *domain;
    466 	uint32_t rid;
    467 	ssize_t size = uiop->uio_resid;
    468 	nvlist_t *nvp;
    469 	nvpair_t *pair = NULL;
    470 	vnode_t *ppvp;
    471 	xvattr_t xvattr;
    472 	xoptattr_t *xoap = NULL;	/* Pointer to optional attributes */
    473 
    474 	if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0)
    475 		return (EINVAL);
    476 
    477 	/*
    478 	 * Validate file offset and size.
    479 	 */
    480 	if (uiop->uio_loffset < (offset_t)0)
    481 		return (EINVAL);
    482 
    483 	if (size == 0)
    484 		return (EINVAL);
    485 
    486 	xva_init(&xvattr);
    487 
    488 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL) {
    489 		return (EINVAL);
    490 	}
    491 
    492 	/*
    493 	 * Copy and unpack the nvlist
    494 	 */
    495 	buf = kmem_alloc(size, KM_SLEEP);
    496 	if (uiomove((caddr_t)buf, size, UIO_WRITE, uiop)) {
    497 		return (EFAULT);
    498 	}
    499 
    500 	if (nvlist_unpack(buf, size, &nvp, KM_SLEEP) != 0) {
    501 		kmem_free(buf, size);
    502 		uiop->uio_resid = size;
    503 		return (EINVAL);
    504 	}
    505 	kmem_free(buf, size);
    506 
    507 	/*
    508 	 * Fasttrack empty writes (nvlist with no nvpairs)
    509 	 */
    510 	if (nvlist_next_nvpair(nvp, NULL) == 0)
    511 		return (0);
    512 
    513 	ppvp = gfs_file_parent(gfs_file_parent(vp));
    514 
    515 	while (pair = nvlist_next_nvpair(nvp, pair)) {
    516 		data_type_t type;
    517 		f_attr_t attr;
    518 		boolean_t value;
    519 		uint64_t *time, *times;
    520 		uint_t elem, nelems;
    521 		nvlist_t *nvp_sid;
    522 		uint8_t *scanstamp;
    523 
    524 		/*
    525 		 * Validate the name and type of each attribute.
    526 		 * Log any unknown names and continue.  This will
    527 		 * help if additional attributes are added later.
    528 		 */
    529 		type = nvpair_type(pair);
    530 		if ((attr = name_to_attr(nvpair_name(pair))) == F_ATTR_INVAL) {
    531 			cmn_err(CE_WARN, "Unknown attribute %s",
    532 			    nvpair_name(pair));
    533 			continue;
    534 		}
    535 
    536 		/*
    537 		 * Verify nvlist type matches required type and view is OK
    538 		 */
    539 
    540 		if (type != attr_to_data_type(attr) ||
    541 		    (attr_to_xattr_view(attr) == XATTR_VIEW_READONLY)) {
    542 			nvlist_free(nvp);
    543 			return (EINVAL);
    544 		}
    545 
    546 		/*
    547 		 * For OWNERSID/GROUPSID make sure the target
    548 		 * file system support ephemeral ID's
    549 		 */
    550 		if ((attr == F_OWNERSID || attr == F_GROUPSID) &&
    551 		    (!(vp->v_vfsp->vfs_flag & VFS_XID))) {
    552 			nvlist_free(nvp);
    553 			return (EINVAL);
    554 		}
    555 
    556 		/*
    557 		 * Retrieve data from nvpair
    558 		 */
    559 		switch (type) {
    560 		case DATA_TYPE_BOOLEAN_VALUE:
    561 			if (nvpair_value_boolean_value(pair, &value)) {
    562 				nvlist_free(nvp);
    563 				return (EINVAL);
    564 			}
    565 			break;
    566 		case DATA_TYPE_UINT64_ARRAY:
    567 			if (nvpair_value_uint64_array(pair, &times, &nelems)) {
    568 				nvlist_free(nvp);
    569 				return (EINVAL);
    570 			}
    571 			break;
    572 		case DATA_TYPE_NVLIST:
    573 			if (nvpair_value_nvlist(pair, &nvp_sid)) {
    574 				nvlist_free(nvp);
    575 				return (EINVAL);
    576 			}
    577 			break;
    578 		case DATA_TYPE_UINT8_ARRAY:
    579 			if (nvpair_value_uint8_array(pair,
    580 			    &scanstamp, &nelems)) {
    581 				nvlist_free(nvp);
    582 				return (EINVAL);
    583 			}
    584 			break;
    585 		default:
    586 			nvlist_free(nvp);
    587 			return (EINVAL);
    588 		}
    589 
    590 		switch (attr) {
    591 		/*
    592 		 * If we have several similar optional attributes to
    593 		 * process then we should do it all together here so that
    594 		 * xoap and the requested bitmap can be set in one place.
    595 		 */
    596 		case F_READONLY:
    597 			XVA_SET_REQ(&xvattr, XAT_READONLY);
    598 			xoap->xoa_readonly = value;
    599 			break;
    600 		case F_HIDDEN:
    601 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
    602 			xoap->xoa_hidden = value;
    603 			break;
    604 		case F_SYSTEM:
    605 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
    606 			xoap->xoa_system = value;
    607 			break;
    608 		case F_ARCHIVE:
    609 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
    610 			xoap->xoa_archive = value;
    611 			break;
    612 		case F_IMMUTABLE:
    613 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
    614 			xoap->xoa_immutable = value;
    615 			break;
    616 		case F_NOUNLINK:
    617 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
    618 			xoap->xoa_nounlink = value;
    619 			break;
    620 		case F_APPENDONLY:
    621 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
    622 			xoap->xoa_appendonly = value;
    623 			break;
    624 		case F_NODUMP:
    625 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
    626 			xoap->xoa_nodump = value;
    627 			break;
    628 		case F_AV_QUARANTINED:
    629 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
    630 			xoap->xoa_av_quarantined = value;
    631 			break;
    632 		case F_AV_MODIFIED:
    633 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
    634 			xoap->xoa_av_modified = value;
    635 			break;
    636 		case F_CRTIME:
    637 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
    638 			time = (uint64_t *)&(xoap->xoa_createtime);
    639 			for (elem = 0; elem < nelems; elem++)
    640 				*time++ = times[elem];
    641 			break;
    642 		case F_OWNERSID:
    643 		case F_GROUPSID:
    644 			if (nvlist_lookup_string(nvp_sid, SID_DOMAIN,
    645 			    &domain) || nvlist_lookup_uint32(nvp_sid, SID_RID,
    646 			    &rid)) {
    647 				nvlist_free(nvp);
    648 				return (EINVAL);
    649 			}
    650 
    651 			/*
    652 			 * Now map domain+rid to ephemeral id's
    653 			 *
    654 			 * If mapping fails, then the uid/gid will
    655 			 * be set to UID_NOBODY by Winchester.
    656 			 */
    657 
    658 			if (attr == F_OWNERSID) {
    659 				(void) kidmap_getuidbysid(crgetzone(cr), domain,
    660 				    rid, &xvattr.xva_vattr.va_uid);
    661 				xvattr.xva_vattr.va_mask |= AT_UID;
    662 			} else {
    663 				(void) kidmap_getgidbysid(crgetzone(cr), domain,
    664 				    rid, &xvattr.xva_vattr.va_gid);
    665 				xvattr.xva_vattr.va_mask |= AT_GID;
    666 			}
    667 			break;
    668 		case F_AV_SCANSTAMP:
    669 			if (ppvp->v_type == VREG) {
    670 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
    671 				(void) memcpy(xoap->xoa_av_scanstamp,
    672 				    scanstamp, nelems);
    673 			} else {
    674 				nvlist_free(nvp);
    675 				return (EINVAL);
    676 			}
    677 			break;
    678 		case F_REPARSE:
    679 			XVA_SET_REQ(&xvattr, XAT_REPARSE);
    680 			xoap->xoa_reparse = value;
    681 			break;
    682 		default:
    683 			break;
    684 		}
    685 	}
    686 
    687 	ppvp = gfs_file_parent(gfs_file_parent(vp));
    688 	error = VOP_SETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
    689 	if (error)
    690 		uiop->uio_resid = size;
    691 
    692 	nvlist_free(nvp);
    693 	return (error);
    694 }
    695 
    696 static int
    697 xattr_file_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
    698     caller_context_t *ct)
    699 {
    700 	switch (cmd) {
    701 	case _PC_XATTR_EXISTS:
    702 	case _PC_SATTR_ENABLED:
    703 	case _PC_SATTR_EXISTS:
    704 		*valp = 0;
    705 		return (0);
    706 	default:
    707 		return (fs_pathconf(vp, cmd, valp, cr, ct));
    708 	}
    709 }
    710 
    711 vnodeops_t *xattr_file_ops;
    712 
    713 static const fs_operation_def_t xattr_file_tops[] = {
    714 	{ VOPNAME_OPEN,		{ .vop_open = xattr_file_open }		},
    715 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_file_close }	},
    716 	{ VOPNAME_READ,		{ .vop_read = xattr_file_read }		},
    717 	{ VOPNAME_WRITE,	{ .vop_write = xattr_file_write }	},
    718 	{ VOPNAME_IOCTL,	{ .error = fs_ioctl }			},
    719 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_file_getattr }	},
    720 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_file_access }	},
    721 	{ VOPNAME_READDIR,	{ .error = fs_notdir }			},
    722 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
    723 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
    724 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
    725 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_file_pathconf }	},
    726 	{ VOPNAME_PUTPAGE,	{ .error = fs_putpage }			},
    727 	{ VOPNAME_FSYNC,	{ .error = fs_fsync }			},
    728 	{ NULL }
    729 };
    730 
    731 vnode_t *
    732 xattr_mkfile(vnode_t *pvp, xattr_view_t xattr_view)
    733 {
    734 	vnode_t *vp;
    735 	xattr_file_t *np;
    736 
    737 	vp = gfs_file_create(sizeof (xattr_file_t), pvp, xattr_file_ops);
    738 	np = vp->v_data;
    739 	np->xattr_view = xattr_view;
    740 	vp->v_flag |= V_SYSATTR;
    741 	return (vp);
    742 }
    743 
    744 vnode_t *
    745 xattr_mkfile_ro(vnode_t *pvp)
    746 {
    747 	return (xattr_mkfile(pvp, XATTR_VIEW_READONLY));
    748 }
    749 
    750 vnode_t *
    751 xattr_mkfile_rw(vnode_t *pvp)
    752 {
    753 	return (xattr_mkfile(pvp, XATTR_VIEW_READWRITE));
    754 }
    755 
    756 vnodeops_t *xattr_dir_ops;
    757 
    758 static gfs_dirent_t xattr_dirents[] = {
    759 	{ VIEW_READONLY, xattr_mkfile_ro, GFS_CACHE_VNODE, },
    760 	{ VIEW_READWRITE, xattr_mkfile_rw, GFS_CACHE_VNODE, },
    761 	{ NULL },
    762 };
    763 
    764 #define	XATTRDIR_NENTS	((sizeof (xattr_dirents) / sizeof (gfs_dirent_t)) - 1)
    765 
    766 static int
    767 is_sattr_name(char *s)
    768 {
    769 	int i;
    770 
    771 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
    772 		if (strcmp(s, xattr_dirents[i].gfse_name) == 0) {
    773 			return (1);
    774 		}
    775 	}
    776 	return (0);
    777 }
    778 
    779 /*
    780  * Given the name of an extended attribute file, determine if there is a
    781  * normalization conflict with a sysattr view name.
    782  */
    783 int
    784 xattr_sysattr_casechk(char *s)
    785 {
    786 	int i;
    787 
    788 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
    789 		if (strcasecmp(s, xattr_dirents[i].gfse_name) == 0)
    790 			return (1);
    791 	}
    792 	return (0);
    793 }
    794 
    795 static int
    796 xattr_copy(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
    797     cred_t *cr, caller_context_t *ct)
    798 {
    799 	xvattr_t xvattr;
    800 	vnode_t *pdvp;
    801 	int error;
    802 
    803 	/*
    804 	 * Only copy system attrs if the views are the same
    805 	 */
    806 	if (strcmp(snm, tnm) != 0)
    807 		return (EINVAL);
    808 
    809 	xva_init(&xvattr);
    810 
    811 	XVA_SET_REQ(&xvattr, XAT_SYSTEM);
    812 	XVA_SET_REQ(&xvattr, XAT_READONLY);
    813 	XVA_SET_REQ(&xvattr, XAT_HIDDEN);
    814 	XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
    815 	XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
    816 	XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
    817 	XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
    818 	XVA_SET_REQ(&xvattr, XAT_NODUMP);
    819 	XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
    820 	XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
    821 	XVA_SET_REQ(&xvattr, XAT_CREATETIME);
    822 	XVA_SET_REQ(&xvattr, XAT_REPARSE);
    823 
    824 	pdvp = gfs_file_parent(sdvp);
    825 	error = VOP_GETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
    826 	if (error)
    827 		return (error);
    828 
    829 	pdvp = gfs_file_parent(tdvp);
    830 	error = VOP_SETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
    831 	return (error);
    832 }
    833 
    834 static int
    835 xattr_dir_realdir(vnode_t *dvp, vnode_t **realdvp, int lookup_flags,
    836     cred_t *cr, caller_context_t *ct)
    837 {
    838 	vnode_t *pvp;
    839 	int error;
    840 	struct pathname pn;
    841 	char *startnm = "";
    842 
    843 	*realdvp = NULL;
    844 
    845 	pvp = gfs_file_parent(dvp);
    846 
    847 	error = pn_get(startnm, UIO_SYSSPACE, &pn);
    848 	if (error) {
    849 		VN_RELE(pvp);
    850 		return (error);
    851 	}
    852 
    853 	/*
    854 	 * Set the LOOKUP_HAVE_SYSATTR_DIR flag so that we don't get into an
    855 	 * infinite loop with fop_lookup calling back to xattr_dir_lookup.
    856 	 */
    857 	lookup_flags |= LOOKUP_HAVE_SYSATTR_DIR;
    858 	error = VOP_LOOKUP(pvp, startnm, realdvp, &pn, lookup_flags,
    859 	    rootvp, cr, ct, NULL, NULL);
    860 	pn_free(&pn);
    861 
    862 	return (error);
    863 }
    864 
    865 /* ARGSUSED */
    866 static int
    867 xattr_dir_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
    868 {
    869 	if (flags & FWRITE) {
    870 		return (EACCES);
    871 	}
    872 
    873 	return (0);
    874 }
    875 
    876 /* ARGSUSED */
    877 static int
    878 xattr_dir_close(vnode_t *vpp, int flags, int count, offset_t off, cred_t *cr,
    879     caller_context_t *ct)
    880 {
    881 	return (0);
    882 }
    883 
    884 /*
    885  * Retrieve the attributes on an xattr directory.  If there is a "real"
    886  * xattr directory, use that.  Otherwise, get the attributes (represented
    887  * by PARENT_ATTRMASK) from the "parent" node and fill in the rest.  Note
    888  * that VOP_GETATTR() could turn off bits in the va_mask.
    889  */
    890 
    891 #define	PARENT_ATTRMASK	(AT_UID|AT_GID|AT_RDEV|AT_CTIME|AT_MTIME)
    892 
    893 /* ARGSUSED */
    894 static int
    895 xattr_dir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    896     caller_context_t *ct)
    897 {
    898 	timestruc_t now;
    899 	vnode_t *pvp;
    900 	int error;
    901 
    902 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, ct);
    903 	if (error == 0) {
    904 		error = VOP_GETATTR(pvp, vap, 0, cr, ct);
    905 		VN_RELE(pvp);
    906 		if (error) {
    907 			return (error);
    908 		}
    909 		vap->va_nlink += XATTRDIR_NENTS;
    910 		vap->va_size += XATTRDIR_NENTS;
    911 		return (0);
    912 	}
    913 
    914 	/*
    915 	 * There is no real xattr directory.  Cobble together
    916 	 * an entry using info from the parent object (if needed)
    917 	 * plus information common to all xattrs.
    918 	 */
    919 	if (vap->va_mask & PARENT_ATTRMASK) {
    920 		vattr_t pvattr;
    921 		uint_t  off_bits;
    922 
    923 		pvp = gfs_file_parent(vp);
    924 		(void) memset(&pvattr, 0, sizeof (pvattr));
    925 		pvattr.va_mask = PARENT_ATTRMASK;
    926 		error = VOP_GETATTR(pvp, &pvattr, 0, cr, ct);
    927 		if (error) {
    928 			return (error);
    929 		}
    930 
    931 		/*
    932 		 * VOP_GETATTR() might have turned off some bits in
    933 		 * pvattr.va_mask.  This means that the underlying
    934 		 * file system couldn't process those attributes.
    935 		 * We need to make sure those bits get turned off
    936 		 * in the vattr_t structure that gets passed back
    937 		 * to the caller.  Figure out which bits were turned
    938 		 * off (if any) then set pvattr.va_mask before it
    939 		 * gets copied to the vattr_t that the caller sees.
    940 		 */
    941 		off_bits = (pvattr.va_mask ^ PARENT_ATTRMASK) & PARENT_ATTRMASK;
    942 		pvattr.va_mask = vap->va_mask & ~off_bits;
    943 		*vap = pvattr;
    944 	}
    945 
    946 	vap->va_type = VDIR;
    947 	vap->va_mode = MAKEIMODE(vap->va_type, S_ISVTX | 0777);
    948 	vap->va_fsid = vp->v_vfsp->vfs_dev;
    949 	vap->va_nodeid = gfs_file_inode(vp);
    950 	vap->va_nlink = XATTRDIR_NENTS+2;
    951 	vap->va_size = vap->va_nlink;
    952 	gethrestime(&now);
    953 	vap->va_atime = now;
    954 	vap->va_blksize = 0;
    955 	vap->va_nblocks = 0;
    956 	vap->va_seq = 0;
    957 	return (0);
    958 }
    959 
    960 static int
    961 xattr_dir_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
    962     caller_context_t *ct)
    963 {
    964 	vnode_t *realvp;
    965 	int error;
    966 
    967 	/*
    968 	 * If there is a real xattr directory, do the setattr there.
    969 	 * Otherwise, just return success.  The GFS directory is transient,
    970 	 * and any setattr changes can disappear anyway.
    971 	 */
    972 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
    973 	if (error == 0) {
    974 		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
    975 		VN_RELE(realvp);
    976 	}
    977 	if (error == ENOENT) {
    978 		error = 0;
    979 	}
    980 	return (error);
    981 }
    982 
    983 /* ARGSUSED */
    984 static int
    985 xattr_dir_access(vnode_t *vp, int mode, int flags, cred_t *cr,
    986     caller_context_t *ct)
    987 {
    988 	int error;
    989 	vnode_t *realvp = NULL;
    990 
    991 	if (mode & VWRITE) {
    992 		return (EACCES);
    993 	}
    994 
    995 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
    996 
    997 	if (realvp)
    998 		VN_RELE(realvp);
    999 
   1000 	/*
   1001 	 * No real xattr dir isn't an error
   1002 	 * an error of EINVAL indicates attributes on attributes
   1003 	 * are not supported.  In that case just allow access to the
   1004 	 * transient directory.
   1005 	 */
   1006 	return ((error == ENOENT || error == EINVAL) ? 0 : error);
   1007 }
   1008 
   1009 static int
   1010 xattr_dir_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
   1011     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
   1012     vsecattr_t *vsecp)
   1013 {
   1014 	vnode_t *pvp;
   1015 	int error;
   1016 
   1017 	*vpp = NULL;
   1018 
   1019 	/*
   1020 	 * Don't allow creation of extended attributes with sysattr names.
   1021 	 */
   1022 	if (is_sattr_name(name)) {
   1023 		return (gfs_dir_lookup(dvp, name, vpp, cr, 0, NULL, NULL));
   1024 	}
   1025 
   1026 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
   1027 	    cr, ct);
   1028 	if (error == 0) {
   1029 		error = VOP_CREATE(pvp, name, vap, excl, mode, vpp, cr, flag,
   1030 		    ct, vsecp);
   1031 		VN_RELE(pvp);
   1032 	}
   1033 	return (error);
   1034 }
   1035 
   1036 static int
   1037 xattr_dir_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
   1038     int flags)
   1039 {
   1040 	vnode_t *pvp;
   1041 	int error;
   1042 
   1043 	if (is_sattr_name(name)) {
   1044 		return (EACCES);
   1045 	}
   1046 
   1047 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
   1048 	if (error == 0) {
   1049 		error = VOP_REMOVE(pvp, name, cr, ct, flags);
   1050 		VN_RELE(pvp);
   1051 	}
   1052 	return (error);
   1053 }
   1054 
   1055 static int
   1056 xattr_dir_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
   1057     caller_context_t *ct, int flags)
   1058 {
   1059 	vnode_t *pvp;
   1060 	int error;
   1061 
   1062 	if (svp->v_flag & V_SYSATTR) {
   1063 		return (EINVAL);
   1064 	}
   1065 
   1066 	error = xattr_dir_realdir(tdvp, &pvp, LOOKUP_XATTR, cr, ct);
   1067 	if (error == 0) {
   1068 		error = VOP_LINK(pvp, svp, name, cr, ct, flags);
   1069 		VN_RELE(pvp);
   1070 	}
   1071 	return (error);
   1072 }
   1073 
   1074 static int
   1075 xattr_dir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
   1076     cred_t *cr, caller_context_t *ct, int flags)
   1077 {
   1078 	vnode_t *spvp, *tpvp;
   1079 	int error;
   1080 	int held_tgt;
   1081 
   1082 	if (is_sattr_name(snm) || is_sattr_name(tnm))
   1083 		return (xattr_copy(sdvp, snm, tdvp, tnm, cr, ct));
   1084 	/*
   1085 	 * We know that sdvp is a GFS dir, or we wouldn't be here.
   1086 	 * Get the real unnamed directory.
   1087 	 */
   1088 	error = xattr_dir_realdir(sdvp, &spvp, LOOKUP_XATTR, cr, ct);
   1089 	if (error) {
   1090 		return (error);
   1091 	}
   1092 
   1093 	if (sdvp == tdvp) {
   1094 		/*
   1095 		 * If the source and target are the same GFS directory, the
   1096 		 * underlying unnamed source and target dir will be the same.
   1097 		 */
   1098 		tpvp = spvp;
   1099 		VN_HOLD(tpvp);
   1100 		held_tgt = 1;
   1101 	} else if (tdvp->v_flag & V_SYSATTR) {
   1102 		/*
   1103 		 * If the target dir is a different GFS directory,
   1104 		 * find its underlying unnamed dir.
   1105 		 */
   1106 		error = xattr_dir_realdir(tdvp, &tpvp, LOOKUP_XATTR, cr, ct);
   1107 		if (error) {
   1108 			VN_RELE(spvp);
   1109 			return (error);
   1110 		}
   1111 		held_tgt = 1;
   1112 	} else {
   1113 		/*
   1114 		 * Target dir is outside of GFS, pass it on through.
   1115 		 */
   1116 		tpvp = tdvp;
   1117 		held_tgt = 0;
   1118 	}
   1119 
   1120 	error = VOP_RENAME(spvp, snm, tpvp, tnm, cr, ct, flags);
   1121 
   1122 	if (held_tgt) {
   1123 		VN_RELE(tpvp);
   1124 	}
   1125 	VN_RELE(spvp);
   1126 
   1127 	return (error);
   1128 }
   1129 
   1130 /*
   1131  * readdir_xattr_casecmp: given a system attribute name, see if there
   1132  * is a real xattr with the same normalized name.
   1133  */
   1134 static int
   1135 readdir_xattr_casecmp(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
   1136     int *eflags)
   1137 {
   1138 	int error;
   1139 	vnode_t *vp;
   1140 	struct pathname pn;
   1141 
   1142 	*eflags = 0;
   1143 
   1144 	error = pn_get(nm, UIO_SYSSPACE, &pn);
   1145 	if (error == 0) {
   1146 		error = VOP_LOOKUP(dvp, nm, &vp, &pn,
   1147 		    FIGNORECASE, rootvp, cr, ct, NULL, NULL);
   1148 		if (error == 0) {
   1149 			*eflags = ED_CASE_CONFLICT;
   1150 			VN_RELE(vp);
   1151 		} else if (error == ENOENT) {
   1152 			error = 0;
   1153 		}
   1154 		pn_free(&pn);
   1155 	}
   1156 
   1157 	return (error);
   1158 }
   1159 
   1160 static int
   1161 xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp,
   1162     caller_context_t *ct, int flags)
   1163 {
   1164 	vnode_t *pvp;
   1165 	int error;
   1166 	int local_eof;
   1167 	int reset_off = 0;
   1168 	int has_xattrs = 0;
   1169 
   1170 	if (eofp == NULL) {
   1171 		eofp = &local_eof;
   1172 	}
   1173 	*eofp = 0;
   1174 
   1175 	/*
   1176 	 * See if there is a real extended attribute directory.
   1177 	 */
   1178 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
   1179 	if (error == 0) {
   1180 		has_xattrs = 1;
   1181 	}
   1182 
   1183 	/*
   1184 	 * Start by reading up the static entries.
   1185 	 */
   1186 	if (uiop->uio_loffset == 0) {
   1187 		ino64_t pino, ino;
   1188 		offset_t off;
   1189 		gfs_dir_t *dp = dvp->v_data;
   1190 		gfs_readdir_state_t gstate;
   1191 
   1192 		if (has_xattrs) {
   1193 			/*
   1194 			 * If there is a real xattr dir, skip . and ..
   1195 			 * in the GFS dir.  We'll pick them up below
   1196 			 * when we call into the underlying fs.
   1197 			 */
   1198 			uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET;
   1199 		}
   1200 		error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
   1201 		if (error == 0) {
   1202 			error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1,
   1203 			    uiop, pino, ino, flags);
   1204 		}
   1205 		if (error) {
   1206 			if (has_xattrs)
   1207 				VN_RELE(pvp);
   1208 			return (error);
   1209 		}
   1210 
   1211 		while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
   1212 		    !*eofp) {
   1213 			if (off >= 0 && off < dp->gfsd_nstatic) {
   1214 				int eflags;
   1215 
   1216 				/*
   1217 				 * Check to see if this sysattr set name has a
   1218 				 * case-insensitive conflict with a real xattr
   1219 				 * name.
   1220 				 */
   1221 				eflags = 0;
   1222 				if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) {
   1223 					error = readdir_xattr_casecmp(pvp,
   1224 					    dp->gfsd_static[off].gfse_name,
   1225 					    cr, ct, &eflags);
   1226 					if (error)
   1227 						break;
   1228 				}
   1229 				ino = dp->gfsd_inode(dvp, off);
   1230 
   1231 				error = gfs_readdir_emit(&gstate, uiop, off,
   1232 				    ino, dp->gfsd_static[off].gfse_name,
   1233 				    eflags);
   1234 				if (error)
   1235 					break;
   1236 			} else {
   1237 				*eofp = 1;
   1238 			}
   1239 		}
   1240 
   1241 		error = gfs_readdir_fini(&gstate, error, eofp, *eofp);
   1242 		if (error) {
   1243 			if (has_xattrs)
   1244 				VN_RELE(pvp);
   1245 			return (error);
   1246 		}
   1247 
   1248 		/*
   1249 		 * We must read all of the static entries in the first
   1250 		 * call.  Otherwise we won't know if uio_loffset in a
   1251 		 * subsequent call refers to the static entries or to those
   1252 		 * in an underlying fs.
   1253 		 */
   1254 		if (*eofp == 0)
   1255 			return (EINVAL);
   1256 		reset_off = 1;
   1257 	}
   1258 
   1259 	if (!has_xattrs) {
   1260 		*eofp = 1;
   1261 		return (0);
   1262 	}
   1263 
   1264 	*eofp = 0;
   1265 	if (reset_off) {
   1266 		uiop->uio_loffset = 0;
   1267 	}
   1268 	(void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL);
   1269 	error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags);
   1270 	VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL);
   1271 	VN_RELE(pvp);
   1272 
   1273 	return (error);
   1274 }
   1275 
   1276 /* ARGSUSED */
   1277 static void
   1278 xattr_dir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   1279 {
   1280 	gfs_file_t *fp;
   1281 
   1282 	fp = gfs_dir_inactive(vp);
   1283 	if (fp != NULL) {
   1284 		kmem_free(fp, fp->gfs_size);
   1285 	}
   1286 }
   1287 
   1288 static int
   1289 xattr_dir_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
   1290     caller_context_t *ct)
   1291 {
   1292 	switch (cmd) {
   1293 	case _PC_XATTR_EXISTS:
   1294 	case _PC_SATTR_ENABLED:
   1295 	case _PC_SATTR_EXISTS:
   1296 		*valp = 0;
   1297 		return (0);
   1298 	default:
   1299 		return (fs_pathconf(vp, cmd, valp, cr, ct));
   1300 	}
   1301 }
   1302 
   1303 static const fs_operation_def_t xattr_dir_tops[] = {
   1304 	{ VOPNAME_OPEN,		{ .vop_open = xattr_dir_open }		},
   1305 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_dir_close }	},
   1306 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
   1307 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_dir_getattr }	},
   1308 	{ VOPNAME_SETATTR,	{ .vop_setattr = xattr_dir_setattr }	},
   1309 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_dir_access }	},
   1310 	{ VOPNAME_READDIR,	{ .vop_readdir = xattr_dir_readdir }	},
   1311 	{ VOPNAME_LOOKUP,	{ .vop_lookup = gfs_vop_lookup }	},
   1312 	{ VOPNAME_CREATE,	{ .vop_create = xattr_dir_create }	},
   1313 	{ VOPNAME_REMOVE,	{ .vop_remove = xattr_dir_remove }	},
   1314 	{ VOPNAME_LINK,		{ .vop_link = xattr_dir_link }		},
   1315 	{ VOPNAME_RENAME,	{ .vop_rename = xattr_dir_rename }	},
   1316 	{ VOPNAME_MKDIR,	{ .error = fs_inval }			},
   1317 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
   1318 	{ VOPNAME_INACTIVE,	{ .vop_inactive = xattr_dir_inactive }	},
   1319 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
   1320 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_dir_pathconf }	},
   1321 	{ NULL, NULL }
   1322 };
   1323 
   1324 static gfs_opsvec_t xattr_opsvec[] = {
   1325 	{ "xattr dir", xattr_dir_tops, &xattr_dir_ops },
   1326 	{ "system attributes", xattr_file_tops, &xattr_file_ops },
   1327 	{ NULL, NULL, NULL }
   1328 };
   1329 
   1330 static int
   1331 xattr_lookup_cb(vnode_t *vp, const char *nm, vnode_t **vpp, ino64_t *inop,
   1332     cred_t *cr, int flags, int *deflags, pathname_t *rpnp)
   1333 {
   1334 	vnode_t *pvp;
   1335 	struct pathname pn;
   1336 	int error;
   1337 
   1338 	*vpp = NULL;
   1339 	*inop = 0;
   1340 
   1341 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
   1342 	    cr, NULL);
   1343 
   1344 	/*
   1345 	 * Return ENOENT for EACCES requests during lookup.  Once an
   1346 	 * attribute create is attempted EACCES will be returned.
   1347 	 */
   1348 	if (error) {
   1349 		if (error == EACCES)
   1350 			return (ENOENT);
   1351 		return (error);
   1352 	}
   1353 
   1354 	error = pn_get((char *)nm, UIO_SYSSPACE, &pn);
   1355 	if (error == 0) {
   1356 		error = VOP_LOOKUP(pvp, (char *)nm, vpp, &pn, flags, rootvp,
   1357 		    cr, NULL, deflags, rpnp);
   1358 		pn_free(&pn);
   1359 	}
   1360 	VN_RELE(pvp);
   1361 
   1362 	return (error);
   1363 }
   1364 
   1365 /* ARGSUSED */
   1366 static ino64_t
   1367 xattrdir_do_ino(vnode_t *vp, int index)
   1368 {
   1369 	/*
   1370 	 * We use index 0 for the directory fid.  Start
   1371 	 * the file numbering at 1.
   1372 	 */
   1373 	return ((ino64_t)index+1);
   1374 }
   1375 
   1376 void
   1377 xattr_init(void)
   1378 {
   1379 	VERIFY(gfs_make_opsvec(xattr_opsvec) == 0);
   1380 }
   1381 
   1382 int
   1383 xattr_dir_lookup(vnode_t *dvp, vnode_t **vpp, int flags, cred_t *cr)
   1384 {
   1385 	int error = 0;
   1386 
   1387 	*vpp = NULL;
   1388 
   1389 	if (dvp->v_type != VDIR && dvp->v_type != VREG)
   1390 		return (EINVAL);
   1391 
   1392 	mutex_enter(&dvp->v_lock);
   1393 
   1394 	/*
   1395 	 * If we're already in sysattr space, don't allow creation
   1396 	 * of another level of sysattrs.
   1397 	 */
   1398 	if (dvp->v_flag & V_SYSATTR) {
   1399 		mutex_exit(&dvp->v_lock);
   1400 		return (EINVAL);
   1401 	}
   1402 
   1403 	if (dvp->v_xattrdir != NULL) {
   1404 		*vpp = dvp->v_xattrdir;
   1405 		VN_HOLD(*vpp);
   1406 	} else {
   1407 		ulong_t val;
   1408 		int xattrs_allowed = dvp->v_vfsp->vfs_flag & VFS_XATTR;
   1409 		int sysattrs_allowed = 1;
   1410 
   1411 		/*
   1412 		 * We have to drop the lock on dvp.  gfs_dir_create will
   1413 		 * grab it for a VN_HOLD.
   1414 		 */
   1415 		mutex_exit(&dvp->v_lock);
   1416 
   1417 		/*
   1418 		 * If dvp allows xattr creation, but not sysattr
   1419 		 * creation, return the real xattr dir vp. We can't
   1420 		 * use the vfs feature mask here because _PC_SATTR_ENABLED
   1421 		 * has vnode-level granularity (e.g. .zfs).
   1422 		 */
   1423 		error = VOP_PATHCONF(dvp, _PC_SATTR_ENABLED, &val, cr, NULL);
   1424 		if (error != 0 || val == 0)
   1425 			sysattrs_allowed = 0;
   1426 
   1427 		if (!xattrs_allowed && !sysattrs_allowed)
   1428 			return (EINVAL);
   1429 
   1430 		if (!sysattrs_allowed) {
   1431 			struct pathname pn;
   1432 			char *nm = "";
   1433 
   1434 			error = pn_get(nm, UIO_SYSSPACE, &pn);
   1435 			if (error)
   1436 				return (error);
   1437 			error = VOP_LOOKUP(dvp, nm, vpp, &pn,
   1438 			    flags|LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, NULL,
   1439 			    NULL, NULL);
   1440 			pn_free(&pn);
   1441 			return (error);
   1442 		}
   1443 
   1444 		/*
   1445 		 * Note that we act as if we were given CREATE_XATTR_DIR,
   1446 		 * but only for creation of the GFS directory.
   1447 		 */
   1448 		*vpp = gfs_dir_create(
   1449 		    sizeof (gfs_dir_t), dvp, xattr_dir_ops, xattr_dirents,
   1450 		    xattrdir_do_ino, MAXNAMELEN, NULL, xattr_lookup_cb);
   1451 		mutex_enter(&dvp->v_lock);
   1452 		if (dvp->v_xattrdir != NULL) {
   1453 			/*
   1454 			 * We lost the race to create the xattr dir.
   1455 			 * Destroy this one, use the winner.  We can't
   1456 			 * just call VN_RELE(*vpp), because the vnode
   1457 			 * is only partially initialized.
   1458 			 */
   1459 			gfs_dir_t *dp = (*vpp)->v_data;
   1460 
   1461 			ASSERT((*vpp)->v_count == 1);
   1462 			vn_free(*vpp);
   1463 
   1464 			mutex_destroy(&dp->gfsd_lock);
   1465 			kmem_free(dp->gfsd_static,
   1466 			    dp->gfsd_nstatic * sizeof (gfs_dirent_t));
   1467 			kmem_free(dp, dp->gfsd_file.gfs_size);
   1468 
   1469 			/*
   1470 			 * There is an implied VN_HOLD(dvp) here.  We should
   1471 			 * be doing a VN_RELE(dvp) to clean up the reference
   1472 			 * from *vpp, and then a VN_HOLD(dvp) for the new
   1473 			 * reference.  Instead, we just leave the count alone.
   1474 			 */
   1475 
   1476 			*vpp = dvp->v_xattrdir;
   1477 			VN_HOLD(*vpp);
   1478 		} else {
   1479 			(*vpp)->v_flag |= (V_XATTRDIR|V_SYSATTR);
   1480 			dvp->v_xattrdir = *vpp;
   1481 		}
   1482 	}
   1483 	mutex_exit(&dvp->v_lock);
   1484 
   1485 	return (error);
   1486 }
   1487 
   1488 int
   1489 xattr_dir_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
   1490 {
   1491 	int error;
   1492 	vnode_t *pvp, *dvp;
   1493 	xattr_fid_t *xfidp;
   1494 	struct pathname pn;
   1495 	char *nm;
   1496 	uint16_t orig_len;
   1497 
   1498 	*vpp = NULL;
   1499 
   1500 	if (fidp->fid_len < XATTR_FIDSZ)
   1501 		return (EINVAL);
   1502 
   1503 	xfidp = (xattr_fid_t *)fidp;
   1504 	orig_len = fidp->fid_len;
   1505 	fidp->fid_len = xfidp->parent_len;
   1506 
   1507 	error = VFS_VGET(vfsp, &pvp, fidp);
   1508 	fidp->fid_len = orig_len;
   1509 	if (error)
   1510 		return (error);
   1511 
   1512 	/*
   1513 	 * Start by getting the GFS sysattr directory.	We might need
   1514 	 * to recreate it during the VOP_LOOKUP.
   1515 	 */
   1516 	nm = "";
   1517 	error = pn_get(nm, UIO_SYSSPACE, &pn);
   1518 	if (error) {
   1519 		VN_RELE(pvp);
   1520 		return (EINVAL);
   1521 	}
   1522 
   1523 	error = VOP_LOOKUP(pvp, nm, &dvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR,
   1524 	    rootvp, CRED(), NULL, NULL, NULL);
   1525 	pn_free(&pn);
   1526 	VN_RELE(pvp);
   1527 	if (error)
   1528 		return (error);
   1529 
   1530 	if (xfidp->dir_offset == 0) {
   1531 		/*
   1532 		 * If we were looking for the directory, we're done.
   1533 		 */
   1534 		*vpp = dvp;
   1535 		return (0);
   1536 	}
   1537 
   1538 	if (xfidp->dir_offset > XATTRDIR_NENTS) {
   1539 		VN_RELE(dvp);
   1540 		return (EINVAL);
   1541 	}
   1542 
   1543 	nm = xattr_dirents[xfidp->dir_offset - 1].gfse_name;
   1544 
   1545 	error = pn_get(nm, UIO_SYSSPACE, &pn);
   1546 	if (error) {
   1547 		VN_RELE(dvp);
   1548 		return (EINVAL);
   1549 	}
   1550 
   1551 	error = VOP_LOOKUP(dvp, nm, vpp, &pn, 0, rootvp, CRED(), NULL,
   1552 	    NULL, NULL);
   1553 
   1554 	pn_free(&pn);
   1555 	VN_RELE(dvp);
   1556 
   1557 	return (error);
   1558 }
   1559