Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/sysmacros.h>
     28 #include <sys/param.h>
     29 #include <sys/systm.h>
     30 #include <sys/fcntl.h>
     31 #include <sys/vfs.h>
     32 #include <sys/vnode.h>
     33 #include <sys/share.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/kmem.h>
     36 #include <sys/debug.h>
     37 #include <sys/t_lock.h>
     38 #include <sys/errno.h>
     39 #include <sys/nbmlock.h>
     40 
     41 int share_debug = 0;
     42 
     43 #ifdef DEBUG
     44 static void print_shares(struct vnode *);
     45 static void print_share(struct shrlock *);
     46 #endif
     47 
     48 static int isreadonly(struct vnode *);
     49 
     50 /*
     51  * Add the share reservation shr to vp.
     52  */
     53 int
     54 add_share(struct vnode *vp, struct shrlock *shr)
     55 {
     56 	struct shrlocklist *shrl;
     57 
     58 	/*
     59 	 * An access of zero is not legal, however some older clients
     60 	 * generate it anyways.  Allow the request only if it is
     61 	 * coming from a remote system.  Be generous in what you
     62 	 * accept and strict in what you send.
     63 	 */
     64 	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
     65 		return (EINVAL);
     66 	}
     67 
     68 	/*
     69 	 * Sanity check to make sure we have valid options.
     70 	 * There is known overlap but it doesn't hurt to be careful.
     71 	 */
     72 	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
     73 		return (EINVAL);
     74 	}
     75 	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
     76 	    F_MANDDNY|F_RMDNY)) {
     77 		return (EINVAL);
     78 	}
     79 
     80 	mutex_enter(&vp->v_lock);
     81 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
     82 		/*
     83 		 * If the share owner matches previous request
     84 		 * do special handling.
     85 		 */
     86 		if ((shrl->shr->s_sysid == shr->s_sysid) &&
     87 		    (shrl->shr->s_pid == shr->s_pid) &&
     88 		    (shrl->shr->s_own_len == shr->s_own_len) &&
     89 		    bcmp(shrl->shr->s_owner, shr->s_owner,
     90 		    shr->s_own_len) == 0) {
     91 
     92 			/*
     93 			 * If the existing request is F_COMPAT and
     94 			 * is the first share then allow any F_COMPAT
     95 			 * from the same process.  Trick:  If the existing
     96 			 * F_COMPAT is write access then it must have
     97 			 * the same owner as the first.
     98 			 */
     99 			if ((shrl->shr->s_deny & F_COMPAT) &&
    100 			    (shr->s_deny & F_COMPAT) &&
    101 			    ((shrl->next == NULL) ||
    102 			    (shrl->shr->s_access & F_WRACC)))
    103 				break;
    104 		}
    105 
    106 		/*
    107 		 * If a first share has been done in compatibility mode
    108 		 * handle the special cases.
    109 		 */
    110 		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {
    111 
    112 			if (!(shr->s_deny & F_COMPAT)) {
    113 				/*
    114 				 * If not compat and want write access or
    115 				 * want to deny read or
    116 				 * write exists, fails
    117 				 */
    118 				if ((shr->s_access & F_WRACC) ||
    119 				    (shr->s_deny & F_RDDNY) ||
    120 				    (shrl->shr->s_access & F_WRACC)) {
    121 					mutex_exit(&vp->v_lock);
    122 					return (EAGAIN);
    123 				}
    124 				/*
    125 				 * If read only file allow, this may allow
    126 				 * a deny write but that is meaningless on
    127 				 * a read only file.
    128 				 */
    129 				if (isreadonly(vp))
    130 					break;
    131 				mutex_exit(&vp->v_lock);
    132 				return (EAGAIN);
    133 			}
    134 			/*
    135 			 * This is a compat request and read access
    136 			 * and the first was also read access
    137 			 * we always allow it, otherwise we reject because
    138 			 * we have handled the only valid write case above.
    139 			 */
    140 			if ((shr->s_access == F_RDACC) &&
    141 			    (shrl->shr->s_access == F_RDACC))
    142 				break;
    143 			mutex_exit(&vp->v_lock);
    144 			return (EAGAIN);
    145 		}
    146 
    147 		/*
    148 		 * If we are trying to share in compatibility mode
    149 		 * and the current share is compat (and not the first)
    150 		 * we don't know enough.
    151 		 */
    152 		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
    153 			continue;
    154 
    155 		/*
    156 		 * If this is a compat we check for what can't succeed.
    157 		 */
    158 		if (shr->s_deny & F_COMPAT) {
    159 			/*
    160 			 * If we want write access or
    161 			 * if anyone is denying read or
    162 			 * if anyone has write access we fail
    163 			 */
    164 			if ((shr->s_access & F_WRACC) ||
    165 			    (shrl->shr->s_deny & F_RDDNY) ||
    166 			    (shrl->shr->s_access & F_WRACC)) {
    167 				mutex_exit(&vp->v_lock);
    168 				return (EAGAIN);
    169 			}
    170 			/*
    171 			 * If the first was opened with only read access
    172 			 * and is a read only file we allow.
    173 			 */
    174 			if (shrl->next == NULL) {
    175 				if ((shrl->shr->s_access == F_RDACC) &&
    176 				    isreadonly(vp)) {
    177 					break;
    178 				}
    179 				mutex_exit(&vp->v_lock);
    180 				return (EAGAIN);
    181 			}
    182 			/*
    183 			 * We still can't determine our fate so continue
    184 			 */
    185 			continue;
    186 		}
    187 
    188 		/*
    189 		 * Simple bitwise test, if we are trying to access what
    190 		 * someone else is denying or we are trying to deny
    191 		 * what someone else is accessing we fail.
    192 		 */
    193 		if ((shr->s_access & shrl->shr->s_deny) ||
    194 		    (shr->s_deny & shrl->shr->s_access)) {
    195 			mutex_exit(&vp->v_lock);
    196 			return (EAGAIN);
    197 		}
    198 	}
    199 
    200 	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
    201 	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
    202 	shrl->shr->s_access = shr->s_access;
    203 	shrl->shr->s_deny = shr->s_deny;
    204 
    205 	/*
    206 	 * Make sure no other deny modes are also set with F_COMPAT
    207 	 */
    208 	if (shrl->shr->s_deny & F_COMPAT)
    209 		shrl->shr->s_deny = F_COMPAT;
    210 	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
    211 	shrl->shr->s_pid = shr->s_pid;
    212 	shrl->shr->s_own_len = shr->s_own_len;
    213 	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
    214 	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
    215 	shrl->next = vp->v_shrlocks;
    216 	vp->v_shrlocks = shrl;
    217 #ifdef DEBUG
    218 	if (share_debug)
    219 		print_shares(vp);
    220 #endif
    221 
    222 	mutex_exit(&vp->v_lock);
    223 
    224 	return (0);
    225 }
    226 
    227 /*
    228  *	nlmid	sysid	pid
    229  *	=====	=====	===
    230  *	!=0	!=0	=0	in cluster; NLM lock
    231  *	!=0	=0	=0	in cluster; special case for NLM lock
    232  *	!=0	=0	!=0	in cluster; PXFS local lock
    233  *	!=0	!=0	!=0	cannot happen
    234  *	=0	!=0	=0	not in cluster; NLM lock
    235  *	=0	=0	!=0	not in cluster; local lock
    236  *	=0	=0	=0	cannot happen
    237  *	=0	!=0	!=0	cannot happen
    238  */
    239 static int
    240 is_match_for_del(struct shrlock *shr, struct shrlock *element)
    241 {
    242 	int nlmid1, nlmid2;
    243 	int result = 0;
    244 
    245 	nlmid1 = GETNLMID(shr->s_sysid);
    246 	nlmid2 = GETNLMID(element->s_sysid);
    247 
    248 	if (nlmid1 != 0) {		/* in a cluster */
    249 		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
    250 			/*
    251 			 * Lock obtained through nlm server.  Just need to
    252 			 * compare whole sysids.  pid will always = 0.
    253 			 */
    254 			result = shr->s_sysid == element->s_sysid;
    255 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
    256 			/*
    257 			 * This is a special case.  The NLM server wishes to
    258 			 * delete all share locks obtained through nlmid1.
    259 			 */
    260 			result = (nlmid1 == nlmid2);
    261 		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
    262 			/*
    263 			 * Lock obtained locally through PXFS.  Match nlmids
    264 			 * and pids.
    265 			 */
    266 			result = (nlmid1 == nlmid2 &&
    267 			    shr->s_pid == element->s_pid);
    268 		}
    269 	} else {			/* not in a cluster */
    270 		result = ((shr->s_sysid == 0 &&
    271 		    shr->s_pid == element->s_pid) ||
    272 		    (shr->s_sysid != 0 &&
    273 		    shr->s_sysid == element->s_sysid));
    274 	}
    275 	return (result);
    276 }
    277 
    278 /*
    279  * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
    280  * share could not be found.  If the share reservation is an NBMAND share
    281  * reservation, signal anyone waiting for the share to go away (e.g.,
    282  * blocking lock requests).
    283  */
    284 
    285 int
    286 del_share(struct vnode *vp, struct shrlock *shr)
    287 {
    288 	struct shrlocklist *shrl;
    289 	struct shrlocklist **shrlp;
    290 	int found = 0;
    291 	int is_nbmand = 0;
    292 
    293 	mutex_enter(&vp->v_lock);
    294 	/*
    295 	 * Delete the shares with the matching sysid and owner
    296 	 * But if own_len == 0 and sysid == 0 delete all with matching pid
    297 	 * But if own_len == 0 delete all with matching sysid.
    298 	 */
    299 	shrlp = &vp->v_shrlocks;
    300 	while (*shrlp) {
    301 		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
    302 		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
    303 		    shr->s_own_len) == 0)) ||
    304 
    305 		    (shr->s_own_len == 0 &&
    306 		    is_match_for_del(shr, (*shrlp)->shr))) {
    307 
    308 			shrl = *shrlp;
    309 			*shrlp = shrl->next;
    310 
    311 			if (shrl->shr->s_deny & F_MANDDNY)
    312 				is_nbmand = 1;
    313 
    314 			/* XXX deref sysid */
    315 			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
    316 			kmem_free(shrl->shr, sizeof (struct shrlock));
    317 			kmem_free(shrl, sizeof (struct shrlocklist));
    318 			found++;
    319 			continue;
    320 		}
    321 		shrlp = &(*shrlp)->next;
    322 	}
    323 
    324 	if (is_nbmand)
    325 		cv_broadcast(&vp->v_cv);
    326 
    327 	mutex_exit(&vp->v_lock);
    328 	return (found ? 0 : EINVAL);
    329 }
    330 
    331 /*
    332  * Clean up all local share reservations that the given process has with
    333  * the given file.
    334  */
    335 void
    336 cleanshares(struct vnode *vp, pid_t pid)
    337 {
    338 	struct shrlock shr;
    339 
    340 	if (vp->v_shrlocks == NULL)
    341 		return;
    342 
    343 	shr.s_access = 0;
    344 	shr.s_deny = 0;
    345 	shr.s_pid = pid;
    346 	shr.s_sysid = 0;
    347 	shr.s_own_len = 0;
    348 	shr.s_owner = NULL;
    349 
    350 	(void) del_share(vp, &shr);
    351 }
    352 
    353 static int
    354 is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
    355 {
    356 	int result = 0;
    357 
    358 	if (GETNLMID(sysid1) != 0) { /* in a cluster */
    359 		if (GETSYSID(sysid1) != 0) {
    360 			/*
    361 			 * Lock obtained through nlm server.  Just need to
    362 			 * compare whole sysids.
    363 			 */
    364 			result = (sysid1 == sysid2);
    365 		} else if (GETSYSID(sysid1) == 0) {
    366 			/*
    367 			 * This is a special case.  The NLM server identified
    368 			 * by nlmid1 wishes to find out if it has obtained
    369 			 * any share locks on the vnode.
    370 			 */
    371 			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
    372 		}
    373 	} else {			/* not in a cluster */
    374 		result = ((sysid1 != 0 && sysid1 == sysid2) ||
    375 		    (sysid1 == 0 && sysid2 != 0));
    376 	}
    377 	return (result);
    378 }
    379 
    380 
    381 /*
    382  * Determine whether there are any shares for the given vnode
    383  * with a remote sysid. Returns zero if not, non-zero if there are.
    384  * If sysid is non-zero then determine if this sysid has a share.
    385  *
    386  * Note that the return value from this function is potentially invalid
    387  * once it has been returned.  The caller is responsible for providing its
    388  * own synchronization mechanism to ensure that the return value is useful.
    389  */
    390 int
    391 shr_has_remote_shares(vnode_t *vp, int32_t sysid)
    392 {
    393 	struct shrlocklist *shrl;
    394 	int result = 0;
    395 
    396 	mutex_enter(&vp->v_lock);
    397 	shrl = vp->v_shrlocks;
    398 	while (shrl) {
    399 		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {
    400 
    401 			result = 1;
    402 			break;
    403 		}
    404 		shrl = shrl->next;
    405 	}
    406 	mutex_exit(&vp->v_lock);
    407 	return (result);
    408 }
    409 
    410 static int
    411 isreadonly(struct vnode *vp)
    412 {
    413 	return (vp->v_type != VCHR && vp->v_type != VBLK &&
    414 	    vp->v_type != VFIFO && vn_is_readonly(vp));
    415 }
    416 
    417 #ifdef DEBUG
    418 static void
    419 print_shares(struct vnode *vp)
    420 {
    421 	struct shrlocklist *shrl;
    422 
    423 	if (vp->v_shrlocks == NULL) {
    424 		printf("<NULL>\n");
    425 		return;
    426 	}
    427 
    428 	shrl = vp->v_shrlocks;
    429 	while (shrl) {
    430 		print_share(shrl->shr);
    431 		shrl = shrl->next;
    432 	}
    433 }
    434 
    435 static void
    436 print_share(struct shrlock *shr)
    437 {
    438 	int i;
    439 
    440 	if (shr == NULL) {
    441 		printf("<NULL>\n");
    442 		return;
    443 	}
    444 
    445 	printf("    access(%d):	", shr->s_access);
    446 	if (shr->s_access & F_RDACC)
    447 		printf("R");
    448 	if (shr->s_access & F_WRACC)
    449 		printf("W");
    450 	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
    451 		printf("N");
    452 	printf("\n");
    453 	printf("    deny:	");
    454 	if (shr->s_deny & F_COMPAT)
    455 		printf("C");
    456 	if (shr->s_deny & F_RDDNY)
    457 		printf("R");
    458 	if (shr->s_deny & F_WRDNY)
    459 		printf("W");
    460 	if (shr->s_deny == F_NODNY)
    461 		printf("N");
    462 	printf("\n");
    463 	printf("    sysid:	%d\n", shr->s_sysid);
    464 	printf("    pid:	%d\n", shr->s_pid);
    465 	printf("    owner:	[%d]", shr->s_own_len);
    466 	printf("'");
    467 	for (i = 0; i < shr->s_own_len; i++)
    468 		printf("%02x", (unsigned)shr->s_owner[i]);
    469 	printf("'\n");
    470 }
    471 #endif
    472 
    473 /*
    474  * Return non-zero if the given I/O request conflicts with a registered
    475  * share reservation.
    476  *
    477  * A process is identified by the tuple (sysid, pid). When the caller
    478  * context is passed to nbl_share_conflict, the sysid and pid in the
    479  * caller context are used. Otherwise the sysid is zero, and the pid is
    480  * taken from the current process.
    481  *
    482  * Conflict Algorithm:
    483  *   1. An op request of NBL_READ will fail if a different
    484  *      process has a mandatory share reservation with deny read.
    485  *
    486  *   2. An op request of NBL_WRITE will fail if a different
    487  *      process has a mandatory share reservation with deny write.
    488  *
    489  *   3. An op request of NBL_READWRITE will fail if a different
    490  *      process has a mandatory share reservation with deny read
    491  *      or deny write.
    492  *
    493  *   4. An op request of NBL_REMOVE will fail if there is
    494  *      a mandatory share reservation with an access of read,
    495  *      write, or remove. (Anything other than meta data access).
    496  *
    497  *   5. An op request of NBL_RENAME will fail if there is
    498  *      a mandatory share reservation with:
    499  *        a) access write or access remove
    500  *      or
    501  *        b) access read and deny remove
    502  *
    503  *   Otherwise there is no conflict and the op request succeeds.
    504  *
    505  * This behavior is required for interoperability between
    506  * the nfs server, cifs server, and local access.
    507  * This behavior can result in non-posix semantics.
    508  *
    509  * When mandatory share reservations are enabled, a process
    510  * should call nbl_share_conflict to determine if the
    511  * desired operation would conflict with an existing share
    512  * reservation.
    513  *
    514  * The call to nbl_share_conflict may be skipped if the
    515  * process has an existing share reservation and the operation
    516  * is being performed in the context of that existing share
    517  * reservation.
    518  */
    519 int
    520 nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
    521 {
    522 	struct shrlocklist *shrl;
    523 	int conflict = 0;
    524 	pid_t pid;
    525 	int sysid;
    526 
    527 	ASSERT(nbl_in_crit(vp));
    528 
    529 	if (ct == NULL) {
    530 		pid = curproc->p_pid;
    531 		sysid = 0;
    532 	} else {
    533 		pid = ct->cc_pid;
    534 		sysid = ct->cc_sysid;
    535 	}
    536 
    537 	mutex_enter(&vp->v_lock);
    538 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
    539 		if (!(shrl->shr->s_deny & F_MANDDNY))
    540 			continue;
    541 		/*
    542 		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
    543 		 * check if the share reservation being examined
    544 		 * belongs to the current process.
    545 		 * NBL_REMOVE and NBL_RENAME do not.
    546 		 * This behavior is required by the conflict
    547 		 * algorithm described above.
    548 		 */
    549 		switch (op) {
    550 		case NBL_READ:
    551 			if ((shrl->shr->s_deny & F_RDDNY) &&
    552 			    (shrl->shr->s_sysid != sysid ||
    553 			    shrl->shr->s_pid != pid))
    554 				conflict = 1;
    555 			break;
    556 		case NBL_WRITE:
    557 			if ((shrl->shr->s_deny & F_WRDNY) &&
    558 			    (shrl->shr->s_sysid != sysid ||
    559 			    shrl->shr->s_pid != pid))
    560 				conflict = 1;
    561 			break;
    562 		case NBL_READWRITE:
    563 			if ((shrl->shr->s_deny & F_RWDNY) &&
    564 			    (shrl->shr->s_sysid != sysid ||
    565 			    shrl->shr->s_pid != pid))
    566 				conflict = 1;
    567 			break;
    568 		case NBL_REMOVE:
    569 			if (shrl->shr->s_access & (F_RWACC|F_RMACC))
    570 				conflict = 1;
    571 			break;
    572 		case NBL_RENAME:
    573 			if (shrl->shr->s_access & (F_WRACC|F_RMACC))
    574 				conflict = 1;
    575 
    576 			else if ((shrl->shr->s_access & F_RDACC) &&
    577 			    (shrl->shr->s_deny & F_RMDNY))
    578 				conflict = 1;
    579 			break;
    580 #ifdef DEBUG
    581 		default:
    582 			cmn_err(CE_PANIC,
    583 			    "nbl_share_conflict: bogus op (%d)",
    584 			    op);
    585 			break;
    586 #endif
    587 		}
    588 		if (conflict)
    589 			break;
    590 	}
    591 
    592 	mutex_exit(&vp->v_lock);
    593 	return (conflict);
    594 }
    595 
    596 /*
    597  * Determine if the given process has a NBMAND share reservation on the
    598  * given vnode. Returns 1 if the process has such a share reservation,
    599  * returns 0 otherwise.
    600  */
    601 int
    602 proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
    603 {
    604 	struct shrlocklist *shrl;
    605 
    606 	/*
    607 	 * Any NBMAND share reservation on the vp for this process?
    608 	 */
    609 	mutex_enter(&vp->v_lock);
    610 	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
    611 		if (shrl->shr->s_sysid == 0 &&
    612 		    (shrl->shr->s_deny & F_MANDDNY) &&
    613 		    (shrl->shr->s_pid == pid)) {
    614 			mutex_exit(&vp->v_lock);
    615 			return (1);
    616 		}
    617 	}
    618 	mutex_exit(&vp->v_lock);
    619 
    620 	return (0);
    621 }
    622