Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  *  	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
     28  *	All Rights Reserved
     29  */
     30 
     31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     32 
     33 #include <sys/param.h>
     34 #include <sys/types.h>
     35 #include <sys/systm.h>
     36 #include <sys/cmn_err.h>
     37 #include <sys/vtrace.h>
     38 #include <sys/session.h>
     39 #include <sys/thread.h>
     40 #include <sys/dnlc.h>
     41 #include <sys/cred.h>
     42 #include <sys/priv.h>
     43 #include <sys/list.h>
     44 #include <sys/sdt.h>
     45 #include <sys/policy.h>
     46 
     47 #include <rpc/types.h>
     48 #include <rpc/xdr.h>
     49 
     50 #include <nfs/nfs.h>
     51 
     52 #include <nfs/nfs_clnt.h>
     53 
     54 #include <nfs/nfs4.h>
     55 #include <nfs/rnode4.h>
     56 #include <nfs/nfs4_clnt.h>
     57 
     58 /*
     59  * client side statistics
     60  */
     61 static const struct clstat4 clstat4_tmpl = {
     62 	{ "calls",	KSTAT_DATA_UINT64 },
     63 	{ "badcalls",	KSTAT_DATA_UINT64 },
     64 	{ "clgets",	KSTAT_DATA_UINT64 },
     65 	{ "cltoomany",	KSTAT_DATA_UINT64 },
     66 #ifdef DEBUG
     67 	{ "clalloc",	KSTAT_DATA_UINT64 },
     68 	{ "noresponse",	KSTAT_DATA_UINT64 },
     69 	{ "failover",	KSTAT_DATA_UINT64 },
     70 	{ "remap",	KSTAT_DATA_UINT64 },
     71 #endif
     72 };
     73 
     74 #ifdef DEBUG
     75 struct clstat4_debug clstat4_debug = {
     76 	{ "nrnode",	KSTAT_DATA_UINT64 },
     77 	{ "access",	KSTAT_DATA_UINT64 },
     78 	{ "dirent",	KSTAT_DATA_UINT64 },
     79 	{ "dirents",	KSTAT_DATA_UINT64 },
     80 	{ "reclaim",	KSTAT_DATA_UINT64 },
     81 	{ "clreclaim",	KSTAT_DATA_UINT64 },
     82 	{ "f_reclaim",	KSTAT_DATA_UINT64 },
     83 	{ "a_reclaim",	KSTAT_DATA_UINT64 },
     84 	{ "r_reclaim",	KSTAT_DATA_UINT64 },
     85 	{ "r_path",	KSTAT_DATA_UINT64 },
     86 };
     87 #endif
     88 
     89 /*
     90  * We keep a global list of per-zone client data, so we can clean up all zones
     91  * if we get low on memory.
     92  */
     93 static list_t nfs4_clnt_list;
     94 static kmutex_t nfs4_clnt_list_lock;
     95 static zone_key_t nfs4clnt_zone_key;
     96 
     97 static struct kmem_cache *chtab4_cache;
     98 
     99 #ifdef DEBUG
    100 static int nfs4_rfscall_debug;
    101 static int nfs4_try_failover_any;
    102 int nfs4_utf8_debug = 0;
    103 #endif
    104 
    105 /*
    106  * NFSv4 readdir cache implementation
    107  */
    108 typedef struct rddir4_cache_impl {
    109 	rddir4_cache	rc;		/* readdir cache element */
    110 	kmutex_t	lock;		/* lock protects count */
    111 	uint_t		count;		/* reference count */
    112 	avl_node_t	tree;		/* AVL tree link */
    113 } rddir4_cache_impl;
    114 
    115 static int rddir4_cache_compar(const void *, const void *);
    116 static void rddir4_cache_free(rddir4_cache_impl *);
    117 static rddir4_cache *rddir4_cache_alloc(int);
    118 static void rddir4_cache_hold(rddir4_cache *);
    119 static int try_failover(enum clnt_stat);
    120 
    121 static int nfs4_readdir_cache_hits = 0;
    122 static int nfs4_readdir_cache_waits = 0;
    123 static int nfs4_readdir_cache_misses = 0;
    124 
    125 /*
    126  * Shared nfs4 functions
    127  */
    128 
    129 /*
    130  * Copy an nfs_fh4.  The destination storage (to->nfs_fh4_val) must already
    131  * be allocated.
    132  */
    133 
    134 void
    135 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to)
    136 {
    137 	to->nfs_fh4_len = from->nfs_fh4_len;
    138 	bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len);
    139 }
    140 
    141 /*
    142  * nfs4cmpfh - compare 2 filehandles.
    143  * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
    144  * "less" than the second, +1 if the first is "greater" than the second.
    145  */
    146 
    147 int
    148 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2)
    149 {
    150 	const char *c1, *c2;
    151 
    152 	if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len)
    153 		return (-1);
    154 	if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len)
    155 		return (1);
    156 	for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val;
    157 	    c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len;
    158 	    c1++, c2++) {
    159 		if (*c1 < *c2)
    160 			return (-1);
    161 		if (*c1 > *c2)
    162 			return (1);
    163 	}
    164 
    165 	return (0);
    166 }
    167 
    168 /*
    169  * Compare two v4 filehandles.  Return zero if they're the same, non-zero
    170  * if they're not.  Like nfs4cmpfh(), but different filehandle
    171  * representation, and doesn't provide information about greater than or
    172  * less than.
    173  */
    174 
    175 int
    176 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2)
    177 {
    178 	if (fh1->fh_len == fh2->fh_len)
    179 		return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len));
    180 
    181 	return (1);
    182 }
    183 
    184 int
    185 stateid4_cmp(stateid4 *s1, stateid4 *s2)
    186 {
    187 	if (bcmp(s1, s2, sizeof (stateid4)) == 0)
    188 		return (1);
    189 	else
    190 		return (0);
    191 }
    192 
    193 nfsstat4
    194 puterrno4(int error)
    195 {
    196 	switch (error) {
    197 	case 0:
    198 		return (NFS4_OK);
    199 	case EPERM:
    200 		return (NFS4ERR_PERM);
    201 	case ENOENT:
    202 		return (NFS4ERR_NOENT);
    203 	case EINTR:
    204 		return (NFS4ERR_IO);
    205 	case EIO:
    206 		return (NFS4ERR_IO);
    207 	case ENXIO:
    208 		return (NFS4ERR_NXIO);
    209 	case ENOMEM:
    210 		return (NFS4ERR_RESOURCE);
    211 	case EACCES:
    212 		return (NFS4ERR_ACCESS);
    213 	case EBUSY:
    214 		return (NFS4ERR_IO);
    215 	case EEXIST:
    216 		return (NFS4ERR_EXIST);
    217 	case EXDEV:
    218 		return (NFS4ERR_XDEV);
    219 	case ENODEV:
    220 		return (NFS4ERR_IO);
    221 	case ENOTDIR:
    222 		return (NFS4ERR_NOTDIR);
    223 	case EISDIR:
    224 		return (NFS4ERR_ISDIR);
    225 	case EINVAL:
    226 		return (NFS4ERR_INVAL);
    227 	case EMFILE:
    228 		return (NFS4ERR_RESOURCE);
    229 	case EFBIG:
    230 		return (NFS4ERR_FBIG);
    231 	case ENOSPC:
    232 		return (NFS4ERR_NOSPC);
    233 	case EROFS:
    234 		return (NFS4ERR_ROFS);
    235 	case EMLINK:
    236 		return (NFS4ERR_MLINK);
    237 	case EDEADLK:
    238 		return (NFS4ERR_DEADLOCK);
    239 	case ENOLCK:
    240 		return (NFS4ERR_DENIED);
    241 	case EREMOTE:
    242 		return (NFS4ERR_SERVERFAULT);
    243 	case ENOTSUP:
    244 		return (NFS4ERR_NOTSUPP);
    245 	case EDQUOT:
    246 		return (NFS4ERR_DQUOT);
    247 	case ENAMETOOLONG:
    248 		return (NFS4ERR_NAMETOOLONG);
    249 	case EOVERFLOW:
    250 		return (NFS4ERR_INVAL);
    251 	case ENOSYS:
    252 		return (NFS4ERR_NOTSUPP);
    253 	case ENOTEMPTY:
    254 		return (NFS4ERR_NOTEMPTY);
    255 	case EOPNOTSUPP:
    256 		return (NFS4ERR_NOTSUPP);
    257 	case ESTALE:
    258 		return (NFS4ERR_STALE);
    259 	case EAGAIN:
    260 		if (curthread->t_flag & T_WOULDBLOCK) {
    261 			curthread->t_flag &= ~T_WOULDBLOCK;
    262 			return (NFS4ERR_DELAY);
    263 		}
    264 		return (NFS4ERR_LOCKED);
    265 	default:
    266 		return ((enum nfsstat4)error);
    267 	}
    268 }
    269 
    270 int
    271 geterrno4(enum nfsstat4 status)
    272 {
    273 	switch (status) {
    274 	case NFS4_OK:
    275 		return (0);
    276 	case NFS4ERR_PERM:
    277 		return (EPERM);
    278 	case NFS4ERR_NOENT:
    279 		return (ENOENT);
    280 	case NFS4ERR_IO:
    281 		return (EIO);
    282 	case NFS4ERR_NXIO:
    283 		return (ENXIO);
    284 	case NFS4ERR_ACCESS:
    285 		return (EACCES);
    286 	case NFS4ERR_EXIST:
    287 		return (EEXIST);
    288 	case NFS4ERR_XDEV:
    289 		return (EXDEV);
    290 	case NFS4ERR_NOTDIR:
    291 		return (ENOTDIR);
    292 	case NFS4ERR_ISDIR:
    293 		return (EISDIR);
    294 	case NFS4ERR_INVAL:
    295 		return (EINVAL);
    296 	case NFS4ERR_FBIG:
    297 		return (EFBIG);
    298 	case NFS4ERR_NOSPC:
    299 		return (ENOSPC);
    300 	case NFS4ERR_ROFS:
    301 		return (EROFS);
    302 	case NFS4ERR_MLINK:
    303 		return (EMLINK);
    304 	case NFS4ERR_NAMETOOLONG:
    305 		return (ENAMETOOLONG);
    306 	case NFS4ERR_NOTEMPTY:
    307 		return (ENOTEMPTY);
    308 	case NFS4ERR_DQUOT:
    309 		return (EDQUOT);
    310 	case NFS4ERR_STALE:
    311 		return (ESTALE);
    312 	case NFS4ERR_BADHANDLE:
    313 		return (ESTALE);
    314 	case NFS4ERR_BAD_COOKIE:
    315 		return (EINVAL);
    316 	case NFS4ERR_NOTSUPP:
    317 		return (EOPNOTSUPP);
    318 	case NFS4ERR_TOOSMALL:
    319 		return (EINVAL);
    320 	case NFS4ERR_SERVERFAULT:
    321 		return (EIO);
    322 	case NFS4ERR_BADTYPE:
    323 		return (EINVAL);
    324 	case NFS4ERR_DELAY:
    325 		return (ENXIO);
    326 	case NFS4ERR_SAME:
    327 		return (EPROTO);
    328 	case NFS4ERR_DENIED:
    329 		return (ENOLCK);
    330 	case NFS4ERR_EXPIRED:
    331 		return (EPROTO);
    332 	case NFS4ERR_LOCKED:
    333 		return (EACCES);
    334 	case NFS4ERR_GRACE:
    335 		return (EAGAIN);
    336 	case NFS4ERR_FHEXPIRED:	/* if got here, failed to get a new fh */
    337 		return (ESTALE);
    338 	case NFS4ERR_SHARE_DENIED:
    339 		return (EACCES);
    340 	case NFS4ERR_WRONGSEC:
    341 		return (EPERM);
    342 	case NFS4ERR_CLID_INUSE:
    343 		return (EAGAIN);
    344 	case NFS4ERR_RESOURCE:
    345 		return (EAGAIN);
    346 	case NFS4ERR_MOVED:
    347 		return (EPROTO);
    348 	case NFS4ERR_NOFILEHANDLE:
    349 		return (EIO);
    350 	case NFS4ERR_MINOR_VERS_MISMATCH:
    351 		return (ENOTSUP);
    352 	case NFS4ERR_STALE_CLIENTID:
    353 		return (EIO);
    354 	case NFS4ERR_STALE_STATEID:
    355 		return (EIO);
    356 	case NFS4ERR_OLD_STATEID:
    357 		return (EIO);
    358 	case NFS4ERR_BAD_STATEID:
    359 		return (EIO);
    360 	case NFS4ERR_BAD_SEQID:
    361 		return (EIO);
    362 	case NFS4ERR_NOT_SAME:
    363 		return (EPROTO);
    364 	case NFS4ERR_LOCK_RANGE:
    365 		return (EPROTO);
    366 	case NFS4ERR_SYMLINK:
    367 		return (EPROTO);
    368 	case NFS4ERR_RESTOREFH:
    369 		return (EPROTO);
    370 	case NFS4ERR_LEASE_MOVED:
    371 		return (EPROTO);
    372 	case NFS4ERR_ATTRNOTSUPP:
    373 		return (ENOTSUP);
    374 	case NFS4ERR_NO_GRACE:
    375 		return (EPROTO);
    376 	case NFS4ERR_RECLAIM_BAD:
    377 		return (EPROTO);
    378 	case NFS4ERR_RECLAIM_CONFLICT:
    379 		return (EPROTO);
    380 	case NFS4ERR_BADXDR:
    381 		return (EINVAL);
    382 	case NFS4ERR_LOCKS_HELD:
    383 		return (EIO);
    384 	case NFS4ERR_OPENMODE:
    385 		return (EACCES);
    386 	case NFS4ERR_BADOWNER:
    387 		/*
    388 		 * Client and server are in different DNS domains
    389 		 * and the NFSMAPID_DOMAIN in /etc/default/nfs
    390 		 * doesn't match.  No good answer here.  Return
    391 		 * EACCESS, which translates to "permission denied".
    392 		 */
    393 		return (EACCES);
    394 	case NFS4ERR_BADCHAR:
    395 		return (EINVAL);
    396 	case NFS4ERR_BADNAME:
    397 		return (EINVAL);
    398 	case NFS4ERR_BAD_RANGE:
    399 		return (EIO);
    400 	case NFS4ERR_LOCK_NOTSUPP:
    401 		return (ENOTSUP);
    402 	case NFS4ERR_OP_ILLEGAL:
    403 		return (EINVAL);
    404 	case NFS4ERR_DEADLOCK:
    405 		return (EDEADLK);
    406 	case NFS4ERR_FILE_OPEN:
    407 		return (EACCES);
    408 	case NFS4ERR_ADMIN_REVOKED:
    409 		return (EPROTO);
    410 	case NFS4ERR_CB_PATH_DOWN:
    411 		return (EPROTO);
    412 	default:
    413 #ifdef DEBUG
    414 		zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d",
    415 		    status);
    416 #endif
    417 		return ((int)status);
    418 	}
    419 }
    420 
    421 void
    422 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op)
    423 {
    424 	nfs4_server_t *server;
    425 
    426 	/*
    427 	 * Return if already printed/queued a msg
    428 	 * for this mount point.
    429 	 */
    430 	if (mi->mi_flags & MI4_BADOWNER_DEBUG)
    431 		return;
    432 	/*
    433 	 * Happens once per client <-> server pair.
    434 	 */
    435 	if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
    436 	    mi->mi_flags & MI4_INT))
    437 		return;
    438 
    439 	server = find_nfs4_server(mi);
    440 	if (server == NULL) {
    441 		nfs_rw_exit(&mi->mi_recovlock);
    442 		return;
    443 	}
    444 
    445 	if (!(server->s_flags & N4S_BADOWNER_DEBUG)) {
    446 		zcmn_err(mi->mi_zone->zone_id, CE_WARN,
    447 		    "!NFSMAPID_DOMAIN does not match"
    448 		    " the server: %s domain.\n"
    449 		    "Please check configuration",
    450 		    mi->mi_curr_serv->sv_hostname);
    451 		server->s_flags |= N4S_BADOWNER_DEBUG;
    452 	}
    453 	mutex_exit(&server->s_lock);
    454 	nfs4_server_rele(server);
    455 	nfs_rw_exit(&mi->mi_recovlock);
    456 
    457 	/*
    458 	 * Happens once per mntinfo4_t.
    459 	 * This error is deemed as one of the recovery facts "RF_BADOWNER",
    460 	 * queue this in the mesg queue for this mount_info. This message
    461 	 * is not printed, meaning its absent from id_to_dump_solo_fact()
    462 	 * but its there for inspection if the queue is ever dumped/inspected.
    463 	 */
    464 	mutex_enter(&mi->mi_lock);
    465 	if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) {
    466 		nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op,
    467 		    FALSE, NULL, 0, NULL);
    468 		mi->mi_flags |= MI4_BADOWNER_DEBUG;
    469 	}
    470 	mutex_exit(&mi->mi_lock);
    471 }
    472 
    473 int
    474 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime)
    475 {
    476 	int64_t sec;
    477 	int32_t nsec;
    478 
    479 	/*
    480 	 * Here check that the nfsv4 time is valid for the system.
    481 	 * nfsv4 time value is a signed 64-bit, and the system time
    482 	 * may be either int64_t or int32_t (depends on the kernel),
    483 	 * so if the kernel is 32-bit, the nfsv4 time value may not fit.
    484 	 */
    485 #ifndef _LP64
    486 	if (! NFS4_TIME_OK(ntime->seconds)) {
    487 		return (EOVERFLOW);
    488 	}
    489 #endif
    490 
    491 	/* Invalid to specify 1 billion (or more) nsecs */
    492 	if (ntime->nseconds >= 1000000000)
    493 		return (EINVAL);
    494 
    495 	if (ntime->seconds < 0) {
    496 		sec = ntime->seconds + 1;
    497 		nsec = -1000000000 + ntime->nseconds;
    498 	} else {
    499 		sec = ntime->seconds;
    500 		nsec = ntime->nseconds;
    501 	}
    502 
    503 	vatime->tv_sec = sec;
    504 	vatime->tv_nsec = nsec;
    505 
    506 	return (0);
    507 }
    508 
    509 int
    510 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime)
    511 {
    512 	int64_t sec;
    513 	uint32_t nsec;
    514 
    515 	/*
    516 	 * nfsv4 time value is a signed 64-bit, and the system time
    517 	 * may be either int64_t or int32_t (depends on the kernel),
    518 	 * so all system time values will fit.
    519 	 */
    520 	if (vatime->tv_nsec >= 0) {
    521 		sec = vatime->tv_sec;
    522 		nsec = vatime->tv_nsec;
    523 	} else {
    524 		sec = vatime->tv_sec - 1;
    525 		nsec = 1000000000 + vatime->tv_nsec;
    526 	}
    527 	ntime->seconds = sec;
    528 	ntime->nseconds = nsec;
    529 
    530 	return (0);
    531 }
    532 
    533 /*
    534  * Converts a utf8 string to a valid null terminated filename string.
    535  *
    536  * XXX - Not actually translating the UTF-8 string as per RFC 2279.
    537  *	 For now, just validate that the UTF-8 string off the wire
    538  *	 does not have characters that will freak out UFS, and leave
    539  *	 it at that.
    540  */
    541 char *
    542 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s)
    543 {
    544 	ASSERT(lenp != NULL);
    545 
    546 	if (u8s == NULL || u8s->utf8string_len <= 0 ||
    547 	    u8s->utf8string_val == NULL)
    548 		return (NULL);
    549 
    550 	/*
    551 	 * Check for obvious illegal filename chars
    552 	 */
    553 	if (utf8_strchr(u8s, '/') != NULL) {
    554 #ifdef DEBUG
    555 		if (nfs4_utf8_debug) {
    556 			char *path;
    557 			int len = u8s->utf8string_len;
    558 
    559 			path = kmem_alloc(len + 1, KM_SLEEP);
    560 			bcopy(u8s->utf8string_val, path, len);
    561 			path[len] = '\0';
    562 
    563 			zcmn_err(getzoneid(), CE_WARN,
    564 			    "Invalid UTF-8 filename: %s", path);
    565 
    566 			kmem_free(path, len + 1);
    567 		}
    568 #endif
    569 		return (NULL);
    570 	}
    571 
    572 	return (utf8_to_str(u8s, lenp, s));
    573 }
    574 
    575 /*
    576  * Converts a utf8 string to a C string.
    577  * kmem_allocs a new string if not supplied
    578  */
    579 char *
    580 utf8_to_str(utf8string *str, uint_t *lenp, char *s)
    581 {
    582 	char	*sp;
    583 	char	*u8p;
    584 	int	len;
    585 	int	 i;
    586 
    587 	ASSERT(lenp != NULL);
    588 
    589 	if (str == NULL)
    590 		return (NULL);
    591 
    592 	u8p = str->utf8string_val;
    593 	len = str->utf8string_len;
    594 	if (len <= 0 || u8p == NULL) {
    595 		if (s)
    596 			*s = '\0';
    597 		return (NULL);
    598 	}
    599 
    600 	sp = s;
    601 	if (sp == NULL)
    602 		sp = kmem_alloc(len + 1, KM_SLEEP);
    603 
    604 	/*
    605 	 * At least check for embedded nulls
    606 	 */
    607 	for (i = 0; i < len; i++) {
    608 		sp[i] = u8p[i];
    609 		if (u8p[i] == '\0') {
    610 #ifdef	DEBUG
    611 			zcmn_err(getzoneid(), CE_WARN,
    612 			    "Embedded NULL in UTF-8 string");
    613 #endif
    614 			if (s == NULL)
    615 				kmem_free(sp, len + 1);
    616 			return (NULL);
    617 		}
    618 	}
    619 	sp[len] = '\0';
    620 	*lenp = len + 1;
    621 
    622 	return (sp);
    623 }
    624 
    625 /*
    626  * str_to_utf8 - converts a null-terminated C string to a utf8 string
    627  */
    628 utf8string *
    629 str_to_utf8(char *nm, utf8string *str)
    630 {
    631 	int len;
    632 
    633 	if (str == NULL)
    634 		return (NULL);
    635 
    636 	if (nm == NULL || *nm == '\0') {
    637 		str->utf8string_len = 0;
    638 		str->utf8string_val = NULL;
    639 	}
    640 
    641 	len = strlen(nm);
    642 
    643 	str->utf8string_val = kmem_alloc(len, KM_SLEEP);
    644 	str->utf8string_len = len;
    645 	bcopy(nm, str->utf8string_val, len);
    646 
    647 	return (str);
    648 }
    649 
    650 utf8string *
    651 utf8_copy(utf8string *src, utf8string *dest)
    652 {
    653 	if (src == NULL)
    654 		return (NULL);
    655 	if (dest == NULL)
    656 		return (NULL);
    657 
    658 	if (src->utf8string_len > 0) {
    659 		dest->utf8string_val = kmem_alloc(src->utf8string_len,
    660 		    KM_SLEEP);
    661 		bcopy(src->utf8string_val, dest->utf8string_val,
    662 		    src->utf8string_len);
    663 		dest->utf8string_len = src->utf8string_len;
    664 	} else {
    665 		dest->utf8string_val = NULL;
    666 		dest->utf8string_len = 0;
    667 	}
    668 
    669 	return (dest);
    670 }
    671 
    672 int
    673 utf8_compare(const utf8string *a, const utf8string *b)
    674 {
    675 	int mlen, cmp;
    676 	int alen, blen;
    677 	char *aval, *bval;
    678 
    679 	if ((a == NULL) && (b == NULL))
    680 		return (0);
    681 	else if (a == NULL)
    682 		return (-1);
    683 	else if (b == NULL)
    684 		return (1);
    685 
    686 	alen = a->utf8string_len;
    687 	blen = b->utf8string_len;
    688 	aval = a->utf8string_val;
    689 	bval = b->utf8string_val;
    690 
    691 	if (((alen == 0) || (aval == NULL)) &&
    692 	    ((blen == 0) || (bval == NULL)))
    693 		return (0);
    694 	else if ((alen == 0) || (aval == NULL))
    695 		return (-1);
    696 	else if ((blen == 0) || (bval == NULL))
    697 		return (1);
    698 
    699 	mlen = MIN(alen, blen);
    700 	cmp = strncmp(aval, bval, mlen);
    701 
    702 	if ((cmp == 0) && (alen == blen))
    703 		return (0);
    704 	else if ((cmp == 0) && (alen < blen))
    705 		return (-1);
    706 	else if (cmp == 0)
    707 		return (1);
    708 	else if (cmp < 0)
    709 		return (-1);
    710 	return (1);
    711 }
    712 
    713 /*
    714  * utf8_dir_verify - checks that the utf8 string is valid
    715  */
    716 int
    717 utf8_dir_verify(utf8string *str)
    718 {
    719 	char *nm;
    720 	int len;
    721 
    722 	if (str == NULL)
    723 		return (0);
    724 
    725 	nm = str->utf8string_val;
    726 	len = str->utf8string_len;
    727 	if (nm == NULL || len == 0) {
    728 		return (0);
    729 	}
    730 
    731 	if (len == 1 && nm[0] == '.')
    732 		return (0);
    733 	if (len == 2 && nm[0] == '.' && nm[1] == '.')
    734 		return (0);
    735 
    736 	if (utf8_strchr(str, '/') != NULL)
    737 		return (0);
    738 
    739 	if (utf8_strchr(str, '\0') != NULL)
    740 		return (0);
    741 
    742 	return (1);
    743 }
    744 
    745 /*
    746  * from rpcsec module (common/rpcsec)
    747  */
    748 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
    749 extern void sec_clnt_freeh(AUTH *);
    750 extern void sec_clnt_freeinfo(struct sec_data *);
    751 
    752 /*
    753  * authget() gets an auth handle based on the security
    754  * information from the servinfo in mountinfo.
    755  * The auth handle is stored in ch_client->cl_auth.
    756  *
    757  * First security flavor of choice is to use sv_secdata
    758  * which is initiated by the client. If that fails, get
    759  * secinfo from the server and then select one from the
    760  * server secinfo list .
    761  *
    762  * For RPCSEC_GSS flavor, upon success, a secure context is
    763  * established between client and server.
    764  */
    765 int
    766 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr)
    767 {
    768 	int error, i;
    769 
    770 	/*
    771 	 * SV4_TRYSECINFO indicates to try the secinfo list from
    772 	 * sv_secinfo until a successful one is reached. Point
    773 	 * sv_currsec to the selected security mechanism for
    774 	 * later sessions.
    775 	 */
    776 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
    777 	if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) {
    778 		for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count;
    779 		    i++) {
    780 			if (!(error = sec_clnt_geth(ch_client,
    781 			    &svp->sv_secinfo->sdata[i],
    782 			    cr, &ch_client->cl_auth))) {
    783 
    784 				svp->sv_currsec = &svp->sv_secinfo->sdata[i];
    785 				svp->sv_secinfo->index = i;
    786 				/* done */
    787 				svp->sv_flags &= ~SV4_TRYSECINFO;
    788 				break;
    789 			}
    790 
    791 			/*
    792 			 * Allow the caller retry with the security flavor
    793 			 * pointed by svp->sv_secinfo->index when
    794 			 * ETIMEDOUT/ECONNRESET occurs.
    795 			 */
    796 			if (error == ETIMEDOUT || error == ECONNRESET) {
    797 				svp->sv_secinfo->index = i;
    798 				break;
    799 			}
    800 		}
    801 	} else {
    802 		/* sv_currsec points to one of the entries in sv_secinfo */
    803 		if (svp->sv_currsec) {
    804 			error = sec_clnt_geth(ch_client, svp->sv_currsec, cr,
    805 			    &ch_client->cl_auth);
    806 		} else {
    807 			/* If it's null, use sv_secdata. */
    808 			error = sec_clnt_geth(ch_client, svp->sv_secdata, cr,
    809 			    &ch_client->cl_auth);
    810 		}
    811 	}
    812 	nfs_rw_exit(&svp->sv_lock);
    813 
    814 	return (error);
    815 }
    816 
    817 /*
    818  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
    819  */
    820 int
    821 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
    822     struct chtab **chp, struct nfs4_clnt *nfscl)
    823 {
    824 	struct chhead *ch, *newch;
    825 	struct chhead **plistp;
    826 	struct chtab *cp;
    827 	int error;
    828 	k_sigset_t smask;
    829 
    830 	if (newcl == NULL || chp == NULL || ci == NULL)
    831 		return (EINVAL);
    832 
    833 	*newcl = NULL;
    834 	*chp = NULL;
    835 
    836 	/*
    837 	 * Find an unused handle or create one
    838 	 */
    839 	newch = NULL;
    840 	nfscl->nfscl_stat.clgets.value.ui64++;
    841 top:
    842 	/*
    843 	 * Find the correct entry in the cache to check for free
    844 	 * client handles.  The search is based on the RPC program
    845 	 * number, program version number, dev_t for the transport
    846 	 * device, and the protocol family.
    847 	 */
    848 	mutex_enter(&nfscl->nfscl_chtable4_lock);
    849 	plistp = &nfscl->nfscl_chtable4;
    850 	for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
    851 		if (ch->ch_prog == ci->cl_prog &&
    852 		    ch->ch_vers == ci->cl_vers &&
    853 		    ch->ch_dev == svp->sv_knconf->knc_rdev &&
    854 		    (strcmp(ch->ch_protofmly,
    855 		    svp->sv_knconf->knc_protofmly) == 0))
    856 			break;
    857 		plistp = &ch->ch_next;
    858 	}
    859 
    860 	/*
    861 	 * If we didn't find a cache entry for this quadruple, then
    862 	 * create one.  If we don't have one already preallocated,
    863 	 * then drop the cache lock, create one, and then start over.
    864 	 * If we did have a preallocated entry, then just add it to
    865 	 * the front of the list.
    866 	 */
    867 	if (ch == NULL) {
    868 		if (newch == NULL) {
    869 			mutex_exit(&nfscl->nfscl_chtable4_lock);
    870 			newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
    871 			newch->ch_timesused = 0;
    872 			newch->ch_prog = ci->cl_prog;
    873 			newch->ch_vers = ci->cl_vers;
    874 			newch->ch_dev = svp->sv_knconf->knc_rdev;
    875 			newch->ch_protofmly = kmem_alloc(
    876 			    strlen(svp->sv_knconf->knc_protofmly) + 1,
    877 			    KM_SLEEP);
    878 			(void) strcpy(newch->ch_protofmly,
    879 			    svp->sv_knconf->knc_protofmly);
    880 			newch->ch_list = NULL;
    881 			goto top;
    882 		}
    883 		ch = newch;
    884 		newch = NULL;
    885 		ch->ch_next = nfscl->nfscl_chtable4;
    886 		nfscl->nfscl_chtable4 = ch;
    887 	/*
    888 	 * We found a cache entry, but if it isn't on the front of the
    889 	 * list, then move it to the front of the list to try to take
    890 	 * advantage of locality of operations.
    891 	 */
    892 	} else if (ch != nfscl->nfscl_chtable4) {
    893 		*plistp = ch->ch_next;
    894 		ch->ch_next = nfscl->nfscl_chtable4;
    895 		nfscl->nfscl_chtable4 = ch;
    896 	}
    897 
    898 	/*
    899 	 * If there was a free client handle cached, then remove it
    900 	 * from the list, init it, and use it.
    901 	 */
    902 	if (ch->ch_list != NULL) {
    903 		cp = ch->ch_list;
    904 		ch->ch_list = cp->ch_list;
    905 		mutex_exit(&nfscl->nfscl_chtable4_lock);
    906 		if (newch != NULL) {
    907 			kmem_free(newch->ch_protofmly,
    908 			    strlen(newch->ch_protofmly) + 1);
    909 			kmem_free(newch, sizeof (*newch));
    910 		}
    911 		(void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
    912 		    &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
    913 
    914 		/*
    915 		 * Get an auth handle.
    916 		 */
    917 		error = authget(svp, cp->ch_client, cr);
    918 		if (error || cp->ch_client->cl_auth == NULL) {
    919 			CLNT_DESTROY(cp->ch_client);
    920 			kmem_cache_free(chtab4_cache, cp);
    921 			return ((error != 0) ? error : EINTR);
    922 		}
    923 		ch->ch_timesused++;
    924 		*newcl = cp->ch_client;
    925 		*chp = cp;
    926 		return (0);
    927 	}
    928 
    929 	/*
    930 	 * There weren't any free client handles which fit, so allocate
    931 	 * a new one and use that.
    932 	 */
    933 #ifdef DEBUG
    934 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1);
    935 #endif
    936 	mutex_exit(&nfscl->nfscl_chtable4_lock);
    937 
    938 	nfscl->nfscl_stat.cltoomany.value.ui64++;
    939 	if (newch != NULL) {
    940 		kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
    941 		kmem_free(newch, sizeof (*newch));
    942 	}
    943 
    944 	cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP);
    945 	cp->ch_head = ch;
    946 
    947 	sigintr(&smask, (int)ci->cl_flags & MI4_INT);
    948 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
    949 	    ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
    950 	sigunintr(&smask);
    951 
    952 	if (error != 0) {
    953 		kmem_cache_free(chtab4_cache, cp);
    954 #ifdef DEBUG
    955 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
    956 #endif
    957 		/*
    958 		 * Warning is unnecessary if error is EINTR.
    959 		 */
    960 		if (error != EINTR) {
    961 			nfs_cmn_err(error, CE_WARN,
    962 			    "clget: couldn't create handle: %m\n");
    963 		}
    964 		return (error);
    965 	}
    966 	(void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
    967 	auth_destroy(cp->ch_client->cl_auth);
    968 
    969 	/*
    970 	 * Get an auth handle.
    971 	 */
    972 	error = authget(svp, cp->ch_client, cr);
    973 	if (error || cp->ch_client->cl_auth == NULL) {
    974 		CLNT_DESTROY(cp->ch_client);
    975 		kmem_cache_free(chtab4_cache, cp);
    976 #ifdef DEBUG
    977 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
    978 #endif
    979 		return ((error != 0) ? error : EINTR);
    980 	}
    981 	ch->ch_timesused++;
    982 	*newcl = cp->ch_client;
    983 	ASSERT(cp->ch_client->cl_nosignal == FALSE);
    984 	*chp = cp;
    985 	return (0);
    986 }
    987 
    988 static int
    989 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
    990     struct chtab **chp, struct nfs4_clnt *nfscl)
    991 {
    992 	clinfo_t ci;
    993 	bool_t is_recov;
    994 	int firstcall, error = 0;
    995 
    996 	/*
    997 	 * Set read buffer size to rsize
    998 	 * and add room for RPC headers.
    999 	 */
   1000 	ci.cl_readsize = mi->mi_tsize;
   1001 	if (ci.cl_readsize != 0)
   1002 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
   1003 
   1004 	/*
   1005 	 * If soft mount and server is down just try once.
   1006 	 * meaning: do not retransmit.
   1007 	 */
   1008 	if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN))
   1009 		ci.cl_retrans = 0;
   1010 	else
   1011 		ci.cl_retrans = mi->mi_retrans;
   1012 
   1013 	ci.cl_prog = mi->mi_prog;
   1014 	ci.cl_vers = mi->mi_vers;
   1015 	ci.cl_flags = mi->mi_flags;
   1016 
   1017 	/*
   1018 	 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS
   1019 	 * security flavor, the client tries to establish a security context
   1020 	 * by contacting the server. If the connection is timed out or reset,
   1021 	 * e.g. server reboot, we will try again.
   1022 	 */
   1023 	is_recov = (curthread == mi->mi_recovthread);
   1024 	firstcall = 1;
   1025 
   1026 	do {
   1027 		error = clget4(&ci, svp, cr, newcl, chp, nfscl);
   1028 
   1029 		if (error == 0)
   1030 			break;
   1031 
   1032 		/*
   1033 		 * For forced unmount and zone shutdown, bail out but
   1034 		 * let the recovery thread do one more transmission.
   1035 		 */
   1036 		if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) &&
   1037 		    (!is_recov || !firstcall)) {
   1038 			error = EIO;
   1039 			break;
   1040 		}
   1041 
   1042 		/* do not retry for soft mount */
   1043 		if (!(mi->mi_flags & MI4_HARD))
   1044 			break;
   1045 
   1046 		/* let the caller deal with the failover case */
   1047 		if (FAILOVER_MOUNT4(mi))
   1048 			break;
   1049 
   1050 		firstcall = 0;
   1051 
   1052 	} while (error == ETIMEDOUT || error == ECONNRESET);
   1053 
   1054 	return (error);
   1055 }
   1056 
   1057 void
   1058 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl)
   1059 {
   1060 	if (cl->cl_auth != NULL) {
   1061 		sec_clnt_freeh(cl->cl_auth);
   1062 		cl->cl_auth = NULL;
   1063 	}
   1064 
   1065 	/*
   1066 	 * Timestamp this cache entry so that we know when it was last
   1067 	 * used.
   1068 	 */
   1069 	cp->ch_freed = gethrestime_sec();
   1070 
   1071 	/*
   1072 	 * Add the free client handle to the front of the list.
   1073 	 * This way, the list will be sorted in youngest to oldest
   1074 	 * order.
   1075 	 */
   1076 	mutex_enter(&nfscl->nfscl_chtable4_lock);
   1077 	cp->ch_list = cp->ch_head->ch_list;
   1078 	cp->ch_head->ch_list = cp;
   1079 	mutex_exit(&nfscl->nfscl_chtable4_lock);
   1080 }
   1081 
   1082 #define	CL_HOLDTIME	60	/* time to hold client handles */
   1083 
   1084 static void
   1085 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime)
   1086 {
   1087 	struct chhead *ch;
   1088 	struct chtab *cp;	/* list of objects that can be reclaimed */
   1089 	struct chtab *cpe;
   1090 	struct chtab *cpl;
   1091 	struct chtab **cpp;
   1092 #ifdef DEBUG
   1093 	int n = 0;
   1094 	clstat4_debug.clreclaim.value.ui64++;
   1095 #endif
   1096 
   1097 	/*
   1098 	 * Need to reclaim some memory, so step through the cache
   1099 	 * looking through the lists for entries which can be freed.
   1100 	 */
   1101 	cp = NULL;
   1102 
   1103 	mutex_enter(&nfscl->nfscl_chtable4_lock);
   1104 
   1105 	/*
   1106 	 * Here we step through each non-NULL quadruple and start to
   1107 	 * construct the reclaim list pointed to by cp.  Note that
   1108 	 * cp will contain all eligible chtab entries.  When this traversal
   1109 	 * completes, chtab entries from the last quadruple will be at the
   1110 	 * front of cp and entries from previously inspected quadruples have
   1111 	 * been appended to the rear of cp.
   1112 	 */
   1113 	for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
   1114 		if (ch->ch_list == NULL)
   1115 			continue;
   1116 		/*
   1117 		 * Search each list for entries older then
   1118 		 * cl_holdtime seconds.  The lists are maintained
   1119 		 * in youngest to oldest order so that when the
   1120 		 * first entry is found which is old enough, then
   1121 		 * all of the rest of the entries on the list will
   1122 		 * be old enough as well.
   1123 		 */
   1124 		cpl = ch->ch_list;
   1125 		cpp = &ch->ch_list;
   1126 		while (cpl != NULL &&
   1127 		    cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
   1128 			cpp = &cpl->ch_list;
   1129 			cpl =