Home | History | Annotate | Download | only in mount
      1 //
      2 // CDDL HEADER START
      3 //
      4 // The contents of this file are subject to the terms of the
      5 // Common Development and Distribution License (the License).
      6 // You may not use this file except in compliance with the License.
      7 //
      8 // You can obtain a copy of the license at usr/src/CDDL.txt
      9 // or http://www.opensolaris.org/os/licensing.
     10 // See the License for the specific language governing permissions
     11 // and limitations under the License.
     12 //
     13 // When distributing Covered Code, include this CDDL HEADER in each
     14 // file and include the License file at usr/src/CDDL.txt.
     15 // If applicable, add the following below this CDDL HEADER, with the
     16 // fields enclosed by brackets [] replaced with your own identifying
     17 // information: Portions Copyright [yyyy] [name of copyright owner]
     18 //
     19 // CDDL HEADER END
     20 //
     21 
     22 //
     23 // Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24 // Use is subject to license terms.
     25 //
     26 
     27 #pragma ident	"@(#)mount_client_impl.cc	1.106	08/05/20 SMI"
     28 
     29 #include <sys/errno.h>
     30 #include <sys/strerror.h>
     31 
     32 #include <sys/types.h>
     33 #include <sys/thread.h>
     34 #include <sys/file.h>
     35 #include <sys/pathname.h>
     36 #include <sys/sysmacros.h>
     37 #include <sys/vfs.h>
     38 #include <sys/mount.h>
     39 #include <sys/dnlc.h>
     40 
     41 #include <h/naming.h>
     42 #include <sys/cladm_int.h>
     43 #include <sys/cladm_debug.h>
     44 #include <sys/sol_conv.h>
     45 #include <solobj/solobj_impl.h>
     46 #include <nslib/ns.h>
     47 #include <sys/vm_util.h>
     48 
     49 #include <h/repl_pxfs.h>
     50 #include <pxfs/common/pxfslib.h>
     51 #include <pxfs/mount/mount_client_impl.h>
     52 #include <pxfs/mount/mount_debug.h>
     53 #include <pxfs/device/device_service_mgr.h>
     54 #include <pxfs/server/fs_impl.h>
     55 #include <pxfs/server/repl_pxfs_server.h>
     56 #include <pxfs/client/pxvfs.h>
     57 
     58 #ifndef VXFS_DISABLED
     59 #include <pxfs/server/vxfs_dependent_impl.h>
     60 #endif
     61 
     62 //
     63 // For update of the mnttab modification time. The function
     64 // vfs_mnttab_modtimeupd is declared static in vfs.c for
     65 // Solaris 8 and 9. For Solaris 10, it is global.
     66 //
     67 #include <sys/sol_version.h>
     68 #if SOL_VERSION >= __s10
     69 #define	GLOBAL_MNTTAB_MODTIME_INTERFACE
     70 #else
     71 extern timespec_t vfs_mnttab_mtime;
     72 #endif
     73 
     74 //lint -e1512
     75 //
     76 // Warning(1512) destructor for base class is not virtual -- In a
     77 // final pass through all the classes, we have found a class that is
     78 // the base class of a derivation and has a destructor but the
     79 // destructor is not virtual. It is conventional for inherited classes
     80 // to have virtual destructors so that is it safe to 'delete' a
     81 // pointer to a base class.
     82 //
     83 // Ths classes prov_common_iter and prov_common_setin the file
     84 // prov_common.h have to be changed to have virtual destructors.
     85 //
     86 
     87 
     88 // Static data member initialization.
     89 mount_client_impl	*mount_client_impl::this_mount_client = NULL;
     90 os::mutex_t		mount_client_impl::mount_client_lock;
     91 
     92 mount_client_impl::mount_client_impl() :
     93 	mntvp(NULL),
     94 	unmount_pxvfs_v1_p(NULL)
     95 {
     96 }
     97 
     98 mount_client_impl::~mount_client_impl()
     99 {
    100 	if (mntvp != NULL) {
    101 		VN_RELE(mntvp);
    102 		mntvp = NULL; // for lint
    103 	}
    104 	if (unmount_pxvfs_v1_p != NULL) {
    105 		VFS_RELE(unmount_pxvfs_v1_p->get_vfsp());
    106 		unmount_pxvfs_v1_p = NULL; // for lint
    107 	}
    108 }
    109 
    110 //
    111 // This is called when all CORBA references are released.
    112 //
    113 void
    114 mount_client_impl::_unreferenced(unref_t arg)
    115 {
    116 	if (!_last_unref(arg)) {
    117 		// _last_unref() should always be true since we don't use 0->1.
    118 		ASSERT(0);
    119 		return;
    120 	}
    121 
    122 	//
    123 	// If _unreferenced() is called because activate() failed
    124 	// then delete this object. If for some reason the mount server
    125 	// crashes and we get unreferenced, we don't panic when referencing
    126 	// "this_mount_client".
    127 	//
    128 	if (CORBA::is_nil(keepalive)) {
    129 		delete this;
    130 	}
    131 }
    132 
    133 //
    134 // This is called  to get a new reference.  Doing get_objref() here would
    135 // get the highest reference version that was compiled.  We want the
    136 // highest reference which is currently committed so we use this indirect
    137 // way.
    138 //
    139 void
    140 mount_client_impl::_generic_method(CORBA::octet_seq_t &,
    141     CORBA::object_seq_t &objs, Environment &e)
    142 {
    143 	objs[0] = get_objref();
    144 }
    145 
    146 //
    147 // After a rolling upgrade commit, new mount server references should
    148 // be used.  Currently, this is only called by pxvfs::unmount for
    149 // the new interface which supports forced unmount.
    150 //
    151 void
    152 mount_client_impl::update_mount_server_ref()
    153 {
    154 	Environment e;
    155 	CORBA::Object_var obj;
    156 
    157 	ASSERT(this_mount_client != NULL);
    158 
    159 	mount_client_lock.lock();
    160 	replica::service_admin_var	sa = pxfslib::get_service_admin_ref(
    161 	    "mount_client_impl::startup", "mount", e);
    162 	if (e.exception()) {
    163 		mount_client_lock.unlock();
    164 		e.clear();
    165 		return;
    166 	}
    167 	obj = sa->get_root_obj(e);
    168 	if (e.exception()) {
    169 		mount_client_lock.unlock();
    170 		e.clear();
    171 		return;
    172 	}
    173 	this_mount_client->server = fs::mount_server::_narrow(obj);
    174 	mount_client_lock.unlock();
    175 }
    176 
    177 //
    178 // upgrade_mount_client - use the new version of the mount_server
    179 //
    180 void
    181 mount_client_impl::upgrade_mount_client(fs::mount_server_ptr
    182     mountserver_p, Environment &)
    183 {
    184 	server = fs::mount_server::_duplicate(mountserver_p);
    185 }
    186 
    187 //
    188 // After a rolling upgrade commit, this mount_server reference will
    189 // will be released (in update_mount_server_ref() above).  So we need
    190 // to duplicate here to prevent the caller from using a bogus reference
    191 // after the replacement done above.
    192 //
    193 fs::mount_server_ptr
    194 mount_client_impl::get_server()
    195 {
    196 	fs::mount_server_ptr	mount_server_p;
    197 
    198 	ASSERT(this_mount_client != NULL);
    199 
    200 	mount_server_p = (fs::mount_server::_duplicate(
    201 	    this_mount_client->server));
    202 
    203 	return (mount_server_p);
    204 }
    205 
    206 //
    207 // Return a new reference to the local mount client.
    208 //
    209 fs::mount_client_ptr
    210 mount_client_impl::get_client_ref()
    211 {
    212 	ASSERT(this_mount_client != NULL);
    213 	return (this_mount_client->get_objref());
    214 }
    215 
    216 //
    217 // Return a pointer to the local mount client.
    218 //
    219 mount_client_impl *
    220 mount_client_impl::get_client()
    221 {
    222 	ASSERT(this_mount_client != NULL);
    223 	return (this_mount_client);
    224 }
    225 
    226 //
    227 // Lock the mount point and perform standard mount point checks.
    228 // Return an exception if the lock fails.
    229 // XXX should we locally lock vfs_t if this is for a remount?
    230 //
    231 // Note: this routine can be called multiple times if the primary
    232 // fails over and the new primary retries this operation so
    233 // this operation must be idempotent.
    234 //
    235 void
    236 mount_client_impl::lock_mountpoint(const char *mountpoint, int32_t mntflags,
    237     Environment &_environment)
    238 {
    239 	mount_lock.lock();
    240 	if (mntvp != NULL || _environment.is_orphan()) {
    241 		//
    242 		// The mount point is already locked by a call to us.
    243 		// This call should be a retry from a new primary or
    244 		// the global mount code has allowed two mounts (or remounts)
    245 		// to happen at the same time which shouldn't happen.
    246 		// If we modify the mount server to allow more than one
    247 		// mount point to be locked at the same time, we will
    248 		// need to use the mount point string to hash into a table.
    249 		// XXX We could save the mountpoint string to verify that
    250 		// it is the same as the previous call.
    251 		//
    252 		mount_lock.unlock();
    253 		MOUNT_DBPRINTF(
    254 		    MOUNT_TRACE_CLIENT,
    255 		    MOUNT_AMBER,
    256 		    ("client:lock_mountpoint retry %s\n",
    257 		    mountpoint));
    258 		return;
    259 	}
    260 
    261 	// Lookup the mount point path name to get a vnode pointer.
    262 	vnode_t *vp;
    263 	int	error = lookupname((char *)mountpoint, UIO_SYSSPACE, FOLLOW,
    264 	    NULL, &vp);
    265 	if (error != 0) {
    266 		mount_lock.unlock();
    267 		MOUNT_DBPRINTF(
    268 		    MOUNT_TRACE_CLIENT,
    269 		    MOUNT_RED,
    270 		    ("client:lock_mountpoint lookup %s error %d\n",
    271 		    mountpoint, error));
    272 		os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, NULL, NULL);
    273 		//
    274 		// SCMSGS
    275 		// @explanation
    276 		// While mounting a Cluster file system, the directory on
    277 		// which the mount is to take place could not be opened.
    278 		// @user_action
    279 		// Fix the reported error and retry. The most likely problem
    280 		// is that the directory does not exist - in that case, create
    281 		// it with the appropriate permissions and retry.
    282 		//
    283 		(void) msg.log(SC_SYSLOG_WARNING, MESSAGE,
    284 		    "Could not mount '%s' because there was an error (%d) in "
    285 		    "opening the directory.", mountpoint, error);
    286 		pxfslib::throw_exception(_environment, error);
    287 		return;
    288 	}
    289 
    290 	if (vn_vfswlock(vp) != 0) {
    291 		mount_lock.unlock();
    292 		MOUNT_DBPRINTF(
    293 		    MOUNT_TRACE_CLIENT,
    294 		    MOUNT_RED,
    295 		    ("client:lock_mountpoint vn_vfswlock %s\n",
    296 		    mountpoint));
    297 		pxfslib::throw_exception(_environment, EBUSY);
    298 		VN_RELE(vp);
    299 		return;
    300 	}
    301 
    302 	if (vp->v_flag & VNOMOUNT) {
    303 		mount_lock.unlock();
    304 		MOUNT_DBPRINTF(
    305 		    MOUNT_TRACE_CLIENT,
    306 		    MOUNT_RED,
    307 		    ("client:lock_mountpoint VNOMOUNT %s\n",
    308 		    mountpoint));
    309 		pxfslib::throw_exception(_environment, EINVAL);
    310 		vn_vfsunlock(vp);
    311 		VN_RELE(vp);
    312 		return;
    313 	}
    314 
    315 	// Make sure we are the only holder of the mount point.
    316 	dnlc_purge_vp(vp);
    317 
    318 	if (vn_ismntpt(vp) ||
    319 	    ((mntflags & (MS_REMOUNT | MS_OVERLAY)) == 0 &&
    320 	    (vp->v_count != 1 || (vp->v_flag & VROOT) != 0))) {
    321 		mount_lock.unlock();
    322 		MOUNT_DBPRINTF(
    323 		    MOUNT_TRACE_CLIENT,
    324 		    MOUNT_RED,
    325 		    ("client:lock_mountpoint %s mounted %d count %d\n",
    326 		    mountpoint, vn_ismntpt(vp), vp->v_count));
    327 		pxfslib::throw_exception(_environment, EBUSY);
    328 		vn_vfsunlock(vp);
    329 		VN_RELE(vp);
    330 		return;
    331 	}
    332 
    333 	//
    334 	// XXX Note that we don't check that vp->v_type == VDIR.
    335 	// This should be checked for most file systems but not "namefs".
    336 	// Also, the "busy" check above isn't quite right for "namefs".
    337 	//
    338 
    339 	mntvp = vp;
    340 	mount_lock.unlock();
    341 }
    342 
    343 //
    344 // Unlock the mount point.
    345 //
    346 // Note: this routine can be called multiple times if the primary
    347 // fails over and the new primary retries this operation so
    348 // this operation must be idempotent.
    349 //
    350 void
    351 mount_client_impl::unlock_mountpoint(const char *, Environment &_environment)
    352 {
    353 	//
    354 	// If the mount point vnode is not locked, this is a retry from a new
    355 	// primary.
    356 	//
    357 	mount_lock.lock();
    358 	if (mntvp == NULL || _environment.is_orphan()) {
    359 		mount_lock.unlock();
    360 		MOUNT_DBPRINTF(
    361 		    MOUNT_TRACE_CLIENT,
    362 		    MOUNT_AMBER,
    363 		    ("client:unlock_mountpoint retry\n"));
    364 		return;
    365 	}
    366 
    367 	vnode_t *vp = mntvp;
    368 	mntvp = NULL;
    369 	mount_lock.unlock();
    370 
    371 	vn_vfsunlock(vp);
    372 	VN_RELE(vp);
    373 }
    374 
    375 //
    376 // This is called by the mount server to lock the mount
    377 // point and prepare to unmount the file system.
    378 //
    379 // Note: this routine can be called multiple times if the primary
    380 // fails over and the new primary retries this operation so
    381 // this operation must be idempotent.
    382 //
    383 void
    384 mount_client_impl::prepare_unmount(fs::filesystem_ptr fsptr,
    385     solobj::cred_ptr credobj, Environment &_environment)
    386 {
    387 	CL_PANIC(0);
    388 }
    389 
    390 //
    391 // Forced unmount support version
    392 //
    393 // This is called by the mount server in preparation for the unmount.
    394 // The method pxvfsp->purge_caches provides the needed preparation:
    395 // (1)purge the dnlc; (2)release the cached root vnode; (3) empty the
    396 // inactive vnode list; and (4)lock the filesystem (vfs). The unmount_vfsp
    397 // is initialized - this is used when the mount server calls
    398 // ::remove_notify after the underlying filesystem has been unmounted.
    399 // Note: unmounting of filesystems is done serially.
    400 //
    401 // Note: this routine can be called multiple times if the primary
    402 // fails over and the new primary retries this operation so
    403 // this operation must be idempotent.
    404 //
    405 void
    406 mount_client_impl::prepare_unmount_1(fs::filesystem_ptr fsptr, int32_t flags,
    407     solobj::cred_ptr credobj, bool skip_purge, Environment &_environment)
    408 {
    409 	CL_PANIC(0);
    410 }
    411 
    412 //
    413 // prepare_unmount_v1
    414 //
    415 // This is called by the mount server in preparation for the unmount.
    416 // The method pxvfsp->purge_caches provides the needed preparation:
    417 // (1)purge the dnlc; (2)release the cached root vnode; (3) empty the
    418 // inactive vnode list; and (4)lock the filesystem (vfs).
    419 //
    420 // The unmount_vfsp is initialized - this is used when the mount server calls
    421 // ::remove_notify after the underlying filesystem has been unmounted.
    422 //
    423 // Unmounting of filesystems is done serially.
    424 //
    425 // This supports forced unmount.
    426 //
    427 // Note: this routine can be called multiple times if the primary
    428 // fails over and the new primary retries this operation so
    429 // this operation must be idempotent.
    430 //
    431 void
    432 mount_client_impl::prepare_unmount_v1(pxfs_v1::filesystem_ptr fsptr,
    433     int32_t flags, solobj::cred_ptr credobj, bool skip_purge,
    434     Environment &_environment)
    435 {
    436 	unmount_lock.lock();
    437 	if (unmount_pxvfs_v1_p != NULL || _environment.is_orphan()) {
    438 		unmount_lock.unlock();
    439 		//
    440 		// A second call to prepare_unmount has been made without
    441 		// calling unmount_failed() or remove_notify().
    442 		// If the vfs's match, this call is a retry from a new primary.
    443 		// If the vfs's don't match, the global unmount code has
    444 		// allowed two unmounts to happen at the same time which
    445 		// shouldn't happen.
    446 		//
    447 		MOUNT_DBPRINTF(
    448 		    MOUNT_TRACE_CLIENT,
    449 		    MOUNT_AMBER,
    450 		    ("client:prepare_unmount_v1 retry\n"));
    451 		return;
    452 	}
    453 
    454 	//
    455 	// Obtain the local vfs struct acting as a proxy for fs.
    456 	// Note that we can't be sure to find the pxvfs if the mount point
    457 	// is locked by a mount in progress, mount_server_impl::mount has
    458 	// returned but pxvfs::mount() hasn't entered the pxvfs yet.
    459 	//
    460 	pxvfs	*pxvfsp = pxvfs::find_pxvfs(fsptr, NULL);
    461 	if (pxvfsp == NULL) {
    462 		unmount_lock.unlock();
    463 		MOUNT_DBPRINTF(
    464 		    MOUNT_TRACE_CLIENT,
    465 		    MOUNT_RED,
    466 		    ("client:prepare_unmount_v1 couldn't find fs\n"));
    467 		pxfslib::throw_exception(_environment, EINVAL);
    468 		return;
    469 	}
    470 	vfs_t	*vfsp = pxvfsp->get_vfsp();
    471 	ASSERT(vfsp != NULL);
    472 
    473 	if (skip_purge) {
    474 		unmount_pxvfs_v1_p = pxvfsp;
    475 		unmount_lock.unlock();
    476 
    477 		// Release the hold from find_pxvfs().
    478 		VFS_RELE(vfsp);
    479 		return;
    480 	}
    481 
    482 	//
    483 	// The vfs_t keeps the hold on the vnode, we are just using the
    484 	// pointer and thus shouldn't release it.
    485 	//
    486 	vnode_t		*vp = vfsp->vfs_vnodecovered;
    487 
    488 	if (vn_vfswlock(vp) != 0) {
    489 		unmount_lock.unlock();
    490 		MOUNT_DBPRINTF(
    491 		    MOUNT_TRACE_CLIENT,
    492 		    MOUNT_RED,
    493 		    ("client:prepare_unmount_v1 vn_vfswlock pxvfs %p\n",
    494 		    pxvfsp));
    495 		// Release the hold we got from find_pxvfs().
    496 		VFS_RELE(vfsp);
    497 		pxfslib::throw_exception(_environment, EBUSY);
    498 		return;
    499 	}
    500 
    501 	cred_t	*credp = solobj_impl::conv(credobj);
    502 	if (pxvfsp->purge_caches(flags & MS_FORCE ? true : false, credp)) {
    503 		unmount_lock.unlock();
    504 		MOUNT_DBPRINTF(
    505 		    MOUNT_TRACE_CLIENT,
    506 		    MOUNT_RED,
    507 		    ("client:prepare_unmount_v1 busy\n"));
    508 		vn_vfsunlock(vp);
    509 
    510 		// Release the hold we got from find_pxvfs().
    511 		VFS_RELE(vfsp);
    512 		pxfslib::throw_exception(_environment, EBUSY);
    513 		return;
    514 	}
    515 
    516 	// Release the hold from find_pxvfs().
    517 	VFS_RELE(vfsp);
    518 	unmount_pxvfs_v1_p = pxvfsp;
    519 	unmount_lock.unlock();
    520 }
    521 
    522 //
    523 // Undo the locking done by prepare_unmount().
    524 //
    525 // Note: this routine can be called multiple times if the primary
    526 // fails over and the new primary retries this operation so
    527 // this operation must be idempotent.
    528 //
    529 // This method supports only mount versions before 1.1
    530 //
    531 void
    532 mount_client_impl::unmount_failed(Environment &_environment)
    533 {
    534 	//
    535 	// If unmount is not in progress, this is a retry from a new primary.
    536 	//
    537 	unmount_lock.lock();
    538 	if ((unmount_pxvfs_v1_p == NULL) || _environment.is_orphan()) {
    539 		unmount_lock.unlock();
    540 		MOUNT_DBPRINTF(
    541 		    MOUNT_TRACE_CLIENT,
    542 		    MOUNT_AMBER,
    543 		    ("client:unmount_failed retry\n"));
    544 		return;
    545 	}
    546 
    547 }
    548 
    549 //
    550 // Forced unmount support version
    551 //
    552 // Undo the locking done by prepare_unmount().
    553 //
    554 // Note: this routine can be called multiple times if the primary
    555 // fails over and the new primary retries this operation so
    556 // this operation must be idempotent.
    557 //
    558 void
    559 mount_client_impl::unmount_failed_1(bool skip, Environment &_environment)
    560 {
    561 	//
    562 	// If unmount is not in progress, this is a retry from a new primary.
    563 	//
    564 	unmount_lock.lock();
    565 	if ((unmount_pxvfs_v1_p == NULL) || _environment.is_orphan()) {
    566 		unmount_lock.unlock();
    567 		MOUNT_DBPRINTF(
    568 		    MOUNT_TRACE_CLIENT,
    569 		    MOUNT_AMBER,
    570 		    ("client:unmount_failed_1 retry\n"));
    571 		return;
    572 	}
    573 
    574 	unmount_failed_v1(skip);
    575 }
    576 
    577 //
    578 // unmount_failed_v1
    579 //
    580 // Supports forced unmount.
    581 //
    582 // Undo the locking done by prepare_unmount().
    583 //
    584 // Note: this routine can be called multiple times if the primary
    585 // fails over and the new primary retries this operation so
    586 // this operation must be idempotent.
    587 //
    588 void
    589 mount_client_impl::unmount_failed_v1(bool skip)
    590 {
    591 	//
    592 	// Note: if the mount server calls us and then fails before sending
    593 	// a checkpoint to its secondary, the new primary could call us
    594 	// "at the same time". In order to avoid using a mutex here,
    595 	// we set unmount_pxvfsp now to make the race window small.
    596 	//
    597 	pxvfs	*pxvfsp = unmount_pxvfs_v1_p;
    598 	unmount_pxvfs_v1_p = NULL;
    599 	unmount_lock.unlock();
    600 
    601 	if (!skip) {
    602 		vfs_t	*vfsp = pxvfsp->get_vfsp();
    603 		vfs_unlock(vfsp);
    604 		vn_vfsunlock(vfsp->vfs_vnodecovered);
    605 
    606 		pxvfsp->unmount_failed();
    607 	}
    608 }
    609 
    610 //
    611 // Instantiate a non-HA file system on this node.
    612 // The mount point should be locked before calling this.
    613 //
    614 // Note: this routine can be called multiple times if the primary
    615 // fails over and the new primary retries this operation so
    616 // this operation must be idempotent.
    617 // XXX This code will need to change when new file system types
    618 // are supported by PXFS.
    619 //
    620 void
    621 mount_client_impl::instantiate(const sol::mounta &ma, sol::uintptr_t mvp,
    622     solobj::cred_ptr credobj, fs::filesystem_out fs_obj, fs::fs_info &fsinfo,
    623     CORBA::String_out mntoptions, Environment &_environment)
    624 {
    625 	CL_PANIC(0);
    626 }
    627 
    628 //
    629 // Instantiate a non-HA file system on this node.
    630 // The mount point should be locked before calling this.
    631 //
    632 // Note: this routine can be called multiple times if the primary
    633 // fails over and the new primary retries this operation so
    634 // this operation must be idempotent.
    635 // XXX This code will need to change when new file system types
    636 // are supported by PXFS.
    637 //
    638 void
    639 mount_client_impl::instantiate_v1(const sol::mounta &ma, sol::uintptr_t mvp,
    640     solobj::cred_ptr credobj, pxfs_v1::filesystem_out fs_obj,
    641     pxfs_v1::fs_info &fsinfo,
    642     CORBA::String_out mntoptions, Environment &_environment)
    643 {
    644 	ASSERT(ma.flags & MS_SYSSPACE);
    645 
    646 	//
    647 	// If we are on the node doing the mount system call, 'mvp'
    648 	// will be the address of the locked mount point vnode.
    649 	// Otherwise, 'mntvp' should be non-NULL from lock_mountpoint().
    650 	//
    651 	vnode_t		*vp;
    652 	if (mvp != NULL) {
    653 		vp = (vnode_t *)mvp;
    654 	} else {
    655 		vp = mntvp;
    656 		ASSERT(vp != NULL);
    657 	}
    658 
    659 	int datalen;
    660 
    661 #ifndef VXFS_DISABLED
    662 	if (strcmp(ma.fstype, "vxfs") == 0) {
    663 		datalen =
    664 		    vxfs_dependent_impl::vxfs_fixup_args(ma,
    665 			vxfs_dependent_impl::VX_MOUNT);
    666 		if (datalen == -1) {
    667 			pxfslib::throw_exception(_environment, ENOENT);
    668 			return;
    669 		}
    670 	} else {
    671 		datalen = (int)ma.data.length();
    672 	}
    673 #else
    674 	datalen = (int)ma.data.length();
    675 #endif
    676 
    677 	//
    678 	// We set MS_NOSPLICE so that the underlying file system isn't
    679 	// linked into the file system name space.
    680 	//
    681 	// We turn off MS_GLOBAL, as we are mounting the underlying filesystem
    682 	// locally. With Solaris 9 build 58, Solaris disables mount in progress
    683 	// checks if MS_GLOBAL is specified. We have to make sure that
    684 	// MS_GLOBAL is turned off here, as we want these checks to be made.
    685 	// These checks make sure that if a global mount and a local mount
    686 	// happen concurrently, and are trying to mount the same device,
    687 	// on different mount-points, only one of them succeeds.
    688 	//
    689 	char		*options;
    690 	struct mounta	mnta;
    691 	mnta.spec = ((sol::mounta &)ma).spec;
    692 	mnta.dir = ((sol::mounta &)ma).dir;
    693 	mnta.flags = ma.flags | MS_NOSPLICE;
    694 	mnta.flags &= ~MS_GLOBAL;
    695 	mnta.fstype = ((sol::mounta &)ma).fstype;
    696 	mnta.dataptr = (char *)ma.data.buffer();
    697 	mnta.datalen = datalen;
    698 	int	len;
    699 	if (mnta.flags & MS_OPTIONSTR) {
    700 		len = (int)ma.options.length();
    701 		options = new char[(size_t)len];	//lint !e571
    702 		mnta.optptr = os::strcpy(options,
    703 		    (const char *)ma.options.buffer());
    704 		mnta.optlen = len;
    705 	} else {
    706 		len = MAX_MNTOPT_STR;
    707 		options = new char [(size_t)len];	//lint !e571
    708 		mnta.optptr = NULL;
    709 		mnta.optlen = 0;
    710 	}
    711 
    712 	//
    713 	// XXX Need a way to detect if this is a retry.
    714 	// We could lookup ma.spec and use vfs_devsearch()
    715 	// except this won't work for some file system types.
    716 	// It also doesn't work if the device really is busy
    717 	// (i.e., already mounted somewhere else) and this call
    718 	// is not a retry.
    719 	//
    720 
    721 	//
    722 	// Call the wrapped filesystem's mount routine, producing
    723 	// a vfs structure.
    724 	//
    725 	cred_t	*credp = solobj_impl::conv(credobj);
    726 	vfs	*vfsp = NULL;
    727 	int	error = domount(NULL, &mnta, vp, credp, &vfsp);
    728 
    729 	if (error == 0 && (mnta.flags & MS_OPTIONSTR) == 0) {
    730 		error = vfs_buildoptionstr(&vfsp->vfs_mntopts, options, len);
    731 	}
    732 	if (error) {
    733 		delete [] options;
    734 		pxfslib::throw_exception(_environment, error);
    735 		MOUNT_DBPRINTF(
    736 		    MOUNT_TRACE_CLIENT,
    737 		    MOUNT_RED,
    738 		    ("client:instantiate_v1 %s error %d\n",
    739 		    (const char *)ma.dir, error));
    740 		return;
    741 	}
    742 
    743 	ASSERT(vfsp != NULL);
    744 
    745 	//
    746 	// Create a filesystem object.
    747 	//
    748 	fs_norm_impl	*fsp =
    749 	    new fs_norm_impl(vfsp, mnta.fstype, mnta.spec, options);
    750 	fs_obj = fsp->get_objref();
    751 
    752 	// Fill in the fs_info structure.
    753 	(void) os::strcpy(fsinfo.fstype, vfssw[vfsp->vfs_fstype].vsw_name);
    754 	fsinfo.fsbsize = vfsp->vfs_bsize;
    755 	fsinfo.fsdev = vfsp->vfs_dev;
    756 	fsinfo.fsflag = vfsp->vfs_flag;
    757 	// XXX: should be...    fsinfo.fsid = vfs->vfs_fsid;
    758 	fsinfo.fsid.val[0] = vfsp->vfs_fsid.val[0];
    759 	fsinfo.fsid.val[1] = vfsp->vfs_fsid.val[1];
    760 
    761 	mntoptions = options;
    762 }
    763 
    764 //
    765 // Instantiate a HA file system on this node.
    766 // This method is called by mount_server_impl::notify_change(). The
    767 // purpose of this code is similar to instantiate_ha() except that
    768 // a filesystem pointer is passed instead of a vnode. Here we
    769 // extract the vnode from the filesystem pointer.
    770 //
    771 // Note: this routine can be called multiple times if the primary
    772 // fails over and the new primary retries this operation so
    773 // this operation must be idempotent.
    774 // XXX This code will need to change when new file system types
    775 // are supported by PXFS.
    776 //
    777 void
    778 mount_client_impl::reinstantiate_ha(const sol::mounta &ma,
    779     fs::filesystem_ptr fsptr, solobj::cred_ptr credobj,
    780     const char *dev_name, Environment &_environment)
    781 {
    782 	CL_PANIC(0);
    783 }
    784 
    785 //
    786 // Instantiate a HA file system on this node.
    787 // This method is called by mount_server_impl::notify_change(). The
    788 // purpose of this code is similar to instantiate_ha() except that
    789 // a filesystem pointer is passed instead of a vnode. Here we
    790 // extract the vnode from the filesystem pointer.
    791 //
    792 // Note: this routine can be called multiple times if the primary
    793 // fails over and the new primary retries this operation so
    794 // this operation must be idempotent.
    795 // XXX This code will need to change when new file system types
    796 // are supported by PXFS.
    797 //
    798 void
    799 mount_client_impl::reinstantiate_ha_v1(const sol::mounta &ma,
    800     pxfs_v1::filesystem_ptr fsptr, solobj::cred_ptr credobj,
    801     const char *dev_name, Environment &_environment)
    802 {
    803 	pxvfs	*pxvfsp = pxvfs::find_pxvfs(fsptr, NULL);
    804 	// XXX Same bug possible as prepare_unmount().
    805 	ASSERT(pxvfsp != NULL);
    806 
    807 	vfs_t	*vfsp = pxvfsp->get_vfsp();
    808 	ASSERT(vfsp != NULL);
    809 
    810 	vnode_t	*vp = vfsp->vfs_vnodecovered;
    811 	ASSERT(vp != NULL);
    812 
    813 	VFS_RELE(pxvfsp->get_vfsp());
    814 
    815 	instantiate_ha_common(ma, vp, credobj, dev_name,
    816 	    VERSION_1, _environment);
    817 }
    818 
    819 //
    820 // Instantiate a HA file system on this node.
    821 // The mount point should be locked before calling this.
    822 //
    823 // Note: this routine can be called multiple times if the primary
    824 // fails over and the new primary retries this operation so
    825 // this operation must be idempotent.
    826 // XXX This code will need to change when new file system types
    827 // are supported by PXFS.
    828 //
    829 void
    830 mount_client_impl::instantiate_ha(const sol::mounta &ma, sol::uintptr_t mvp,
    831     solobj::cred_ptr credobj, const char *dev_name, Environment &_environment)
    832 {
    833 	//
    834 	// If we are on the node doing the mount system call, 'mvp'
    835 	// will be the address of the locked mount point vnode.
    836 	// Otherwise, 'mntvp' should be non-NULL from lock_mountpoint().
    837 	//
    838 	vnode_t	*vp;
    839 	if (mvp != NULL) {
    840 		vp = (vnode_t *)mvp;
    841 	} else {
    842 		vp = mntvp;
    843 		ASSERT(vp != NULL);
    844 	}
    845 
    846 	instantiate_ha_common(ma, vp, credobj, dev_name,
    847 	    VERSION_0, _environment);
    848 }
    849 
    850 //
    851 // Instantiate a HA file system on this node.
    852 // The mount point should be locked before calling this.
    853 //
    854 // Note: this routine can be called multiple times if the primary
    855 // fails over and the new primary retries this operation so
    856 // this operation must be idempotent.
    857 // XXX This code will need to change when new file system types
    858 // are supported by PXFS.
    859 //
    860 void
    861 mount_client_impl::instantiate_ha_v1(const sol::mounta &ma, sol::uintptr_t mvp,
    862     solobj::cred_ptr credobj, const char *dev_name, Environment &_environment)
    863 {
    864 	//
    865 	// If we are on the node doing the mount system call, 'mvp'
    866 	// will be the address of the locked mount point vnode.
    867 	// Otherwise, 'mntvp' should be non-NULL from lock_mountpoint().
    868 	//
    869 	vnode_t		*vp;
    870 	if (mvp != NULL) {
    871 		vp = (vnode_t *)mvp;
    872 	} else {
    873 		vp = mntvp;
    874 		ASSERT(vp != NULL);
    875 	}
    876 
    877 	instantiate_ha_common(ma, vp, credobj, dev_name,
    878 	    VERSION_1, _environment);
    879 }
    880 
    881 //
    882 // Common code for both instantiate_ha() and reinstantiate_ha().
    883 //
    884 // Create a file system replica and register with the
    885 // replica manager. Also register with the cluster version manager for
    886 // callbacks in support of rolling upgrade.
    887 //
    888 void
    889 mount_client_impl::instantiate_ha_common(const sol::mounta &ma, vnode_t *vp,
    890     solobj::cred_ptr credobj, const char *dev_name, mount_ver_t mount_ver,
    891     Environment &)
    892 {
    893 	// Nested invocations need their own Environment
    894 	Environment	e;
    895 
    896 	char	id[20];
    897 	os::sprintf(id, "%u", orb_conf::node_number());
    898 
    899 	// Create a file system replica for this node
    900 	repl_pxfs_server	*repl_srvr_v1 = NULL;
    901 	ASSERT(mount_ver == VERSION_1);
    902 	repl_srvr_v1 = new repl_pxfs_server(vp, ma,
    903 	    solobj_impl::conv(credobj), id);
    904 
    905 	//
    906 	// Have the filesystem replica register with the Version Manager
    907 	// for upgrade callbacks.
    908 	//
    909 	repl_srvr_v1->upgrade_callback_register(ma);
    910 
    911 	replica::service_dependencies	serv_deps(1, 1);
    912 	serv_deps[0] = dev_name;
    913 	replica::prov_dependencies	prov_deps(1, 1);
    914 	prov_deps[0].service = dev_name;
    915 	//
    916 	// Note that we assume the "provider ID" is the same for both
    917 	// the file system and the device service.
    918 	// Also note that the (const char *) is needed so a copy of
    919 	// the string is made instead of just a pointer assignment.
    920 	//
    921 	prov_deps[0].repl_prov_desc = (const char *)id;
    922 
    923 	repl_srvr_v1->register_with_rm(1, &serv_deps, &prov_deps, true, e);
    924 	if (e.exception()) {
    925 		//
    926 		// Clean up callback registration
    927 		//
    928 		repl_srvr_v1->upgrade_callback_unregister();
    929 
    930 		//
    931 		// Check for service or provider already registered.
    932 		// If it is, this is either a retry after a failover
    933 		// or an attempt to mount a mounted file system.
    934 		//
    935 		if (!(replica::service_already_exists::_exnarrow(
    936 		    e.exception()) != NULL ||
    937 		    replica::repl_prov_already_exists::_exnarrow(
    938 		    e.exception()) != NULL)) {
    939 			MOUNT_DBPRINTF(
    940 			    MOUNT_TRACE_CLIENT,
    941 			    MOUNT_RED,
    942 			    ("client:instantiate_ha - Error FS replica %s\n",
    943 			    dev_name));
    944 		}
    945 		e.clear();
    946 	}
    947 }
    948 
    949 //
    950 // Respond to notification from the VFS list server that there's a new file
    951 // system to be added to this node's list.
    952 // The moint point should have previously been locked with lock_mountpoint().
    953 //
    954 // The basic work to be done is to construct a proxy for the filesystem object
    955 // given as argument, initialize it, set its flags field, and cross-link it
    956 // with a local mountpoint vnode.
    957 //
    958 // Note: this routine can be called multiple times if the primary
    959 // fails over and the new primary retries this operation so
    960 // this operation must be idempotent.
    961 //
    962 void
    963 mount_client_impl::add_notify_locked(const sol::mounta &ma,
    964     const char *mntoptions, fs::filesystem_ptr fsptr, const fs::fs_info &fsinfo,
    965     Environment &_environment)
    966 {
    967 	CL_PANIC(0);
    968 }
    969 
    970 //
    971 // Respond to notification from the VFS list server that there's a new file
    972 // system to be added to this node's list.
    973 // The moint point should have previously been locked with lock_mountpoint().
    974 //
    975 // The basic work to be done is to construct a proxy for the filesystem object
    976 // given as argument, initialize it, set its flags field, and cross-link it
    977 // with a local mountpoint vnode.
    978 //
    979 // Note: this routine can be called multiple times if the primary
    980 // fails over and the new primary retries this operation so
    981 // this operation must be idempotent.
    982 //
    983 void
    984 mount_client_impl::add_notify_locked_v1(const sol::mounta &ma,
    985     const char *mntoptions,
    986     pxfs_v1::filesystem_ptr fsptr, const pxfs_v1::fs_info &fsinfo,
    987     Environment &_environment)
    988 {
    989 	//
    990 	// If the mount point vnode is not set, this is a retry from a new
    991 	// primary.
    992 	//
    993 	mount_lock.lock();
    994 	if (mntvp == NULL || _environment.is_orphan()) {
    995 		mount_lock.unlock();
    996 		MOUNT_DBPRINTF(
    997 		    MOUNT_TRACE_CLIENT,
    998 		    MOUNT_AMBER,
    999 		    ("client:add_notify_locked_v1 %s retry\n",
   1000 		    (const char *)ma.dir));
   1001 		return;
   1002 	}
   1003 
   1004 	vnode_t		*vp = mntvp;
   1005 	mntvp = NULL;
   1006 	mount_lock.unlock();
   1007 
   1008 	ASSERT(!vn_ismntpt(vp));
   1009 
   1010 	//
   1011 	// Now that we've obtained the mount point vnode,
   1012 	// the rest of the work is common.
   1013 	// Note that we transfer our hold on mntvp to the vfs_t.
   1014 	//
   1015 	add_notify_common_v1(ma, mntoptions, fsptr, fsinfo, vp);
   1016 }
   1017 
   1018 //
   1019 // Respond to notification from the VFS list server that there's a new file
   1020 // system to be added to this node's list.
   1021 //
   1022 // The basic work to be done is to lock the mount point, construct a proxy
   1023 // for the filesystem object, initialize it, set its flags field,
   1024 // and link it into the file system name space.
   1025 //
   1026 // Note: this routine can be called multiple times if the primary
   1027 // fails over and the new primary retries this operation. The required
   1028 // idempotence is problematic because of a retry racing with an initial
   1029 // attempt.  This is resolved by serializing calls using the add_notify_lock
   1030 // and checking for an already existing mount.
   1031 //
   1032 void
   1033 mount_client_impl::add_notify(const sol::mounta &ma, const char *mntoptions,
   1034     bool is_ha_repl, const char *dev_name, fs::filesystem_ptr fsptr,
   1035     const fs::fs_info &fsinfo, Environment &_environment)
   1036 {
   1037 	CL_PANIC(0);
   1038 }
   1039 
   1040 //
   1041 // Respond to notification from the VFS list server that there's a new file
   1042 // system to be added to this node's list.
   1043 //
   1044 // The basic work to be done is to lock the mount point, construct a proxy
   1045 // for the filesystem object, initialize it, set its flags field,
   1046 // and link it into the file system name space.
   1047 //
   1048 // Note: this routine can be called multiple times if the primary
   1049 // fails over and the new primary retries this operation so
   1050 // this operation must be idempotent.
   1051 //
   1052 void
   1053 mount_client_impl::add_notify_v1(const sol::mounta &ma, const char *mntoptions,
   1054     bool is_ha_repl, const char *dev_name, pxfs_v1::filesystem_ptr fsptr,
   1055     const pxfs_v1::fs_info &fsinfo, Environment &_environment)
   1056 {
   1057 	ASSERT(ma.flags & MS_SYSSPACE);
   1058 
   1059 	vnode_t		*vp;
   1060 	int		error = lookupname(((sol::mounta &)ma).dir,
   1061 	    UIO_SYSSPACE, FOLLOW, NULL, &vp);
   1062 	if (error != 0) {
   1063 		MOUNT_DBPRINTF(
   1064 		    MOUNT_TRACE_CLIENT,
   1065 		    MOUNT_RED,
   1066 		    ("client:add_notify_v1 lookup %s error %d\n",
   1067 		    (const char *)ma.dir, error));
   1068 		os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, NULL, NULL);
   1069 		(void) msg.log(SC_SYSLOG_WARNING, MESSAGE,
   1070 		    "Could not mount '%s' because there was an error (%d) in "
   1071 		    "opening the directory.", (const char *)ma.dir,
   1072 		    error);
   1073 		pxfslib::throw_exception(_environment, error);
   1074 		return;
   1075 	}
   1076 
   1077 	//
   1078 	// If the mount point is a PXFS vnode for the file system we
   1079 	// are trying to add, then this is a retry after a failover.
   1080 	// XXX This condition might not be met for a while, the race
   1081 	// mentioned in lock_mountpoint() for mntvp could apply here.
   1082 	//
   1083 	vfs_t	*vfsp = vp->v_vfsp;
   1084 	if ((vfsp->vfs_flag & VFS_PXFS) &&
   1085 	    fsptr->_equiv(pxvfs::VFSTOPXFS(vfsp)->get_fsobj())) {
   1086 		MOUNT_DBPRINTF(
   1087 		    MOUNT_TRACE_CLIENT,
   1088 		    MOUNT_GREEN,
   1089 		    ("client:add_notify_v1 %s retry\n",
   1090 		    (const char *)ma.dir));
   1091 		VN_RELE(vp);
   1092 		return;
   1093 	}
   1094 
   1095 	if (vn_vfswlock(vp) != 0) {
   1096 		MOUNT_DBPRINTF(
   1097 		    MOUNT_TRACE_CLIENT,
   1098 		    MOUNT_RED,
   1099 		    ("client:add_notify_v1 %s vn_vfswlock\n",
   1100 		    (const char *)ma.dir));
   1101 		pxfslib::throw_exception(_environment, EBUSY);
   1102 		VN_RELE(vp);
   1103 		return;
   1104 	}
   1105 
   1106 	if (vp->v_flag & VNOMOUNT) {
   1107 		MOUNT_DBPRINTF(
   1108 		    MOUNT_TRACE_CLIENT,
   1109 		    MOUNT_RED,
   1110 		    ("client:add_notify_v1 %s NVOMOUNT\n",
   1111 		    (const char *)ma.dir));
   1112 		pxfslib::throw_exception(_environment, EINVAL);
   1113 		vn_vfsunlock(vp);
   1114 		VN_RELE(vp);
   1115 		return;
   1116 	}
   1117 
   1118 	// Make sure we are the only holder of the mount point.
   1119 	dnlc_purge_vp(vp);
   1120 
   1121 	if (vn_ismntpt(vp) ||
   1122 	    (ma.flags & (MS_REMOUNT | MS_OVERLAY)) == 0 &&
   1123 	    (vp->v_count != 1 || (vp->v_flag & VROOT) != 0)) {
   1124 		MOUNT_DBPRINTF(
   1125 		    MOUNT_TRACE_CLIENT,
   1126 		    MOUNT_RED,
   1127 		    ("client:add_notify_v1 %s mounted %d count %d\n",
   1128 		    (const char *)ma.dir,
   1129 		    vn_ismntpt(vp), vp->v_count));
   1130 		pxfslib::throw_exception(_environment, EBUSY);
   1131 		vn_vfsunlock(vp);
   1132 		VN_RELE(vp);
   1133 		return;
   1134 	}
   1135 
   1136 	//
   1137 	// XXX Note that we don't check that vp->v_type == VDIR.
   1138 	// This should be checked for most file systems but not "namefs".
   1139 	// Also, the "busy" check above isn't quite right for "namefs".
   1140 	//
   1141 
   1142 	//
   1143 	// Check to see if we need to start a file system replica.
   1144 	//
   1145 	if (is_ha_repl) {
   1146 		//
   1147 		// Create the file system service and register with the
   1148 		// replica manager.
   1149 		// XXX kcred.
   1150 		//
   1151 		char	id[20];
   1152 		os::sprintf(id, "%u", orb_conf::node_number());
   1153 		repl_pxfs_server	*repl_srvr =
   1154 		    new repl_pxfs_server(vp, ma, kcred, id);
   1155 		//
   1156 		// Have the filesystem replica register with the Version Manager
   1157 		// for upgrade callbacks.
   1158 		//
   1159 		repl_srvr->upgrade_callback_register(ma);
   1160 
   1161 		replica::service_dependencies	serv_deps(1, 1);
   1162 		serv_deps[0] = dev_name;
   1163 		replica::prov_dependencies	prov_deps(1, 1);
   1164 		prov_deps[0].service = dev_name;
   1165 		//
   1166 		// Note that we assume the "provider ID" is the same for both
   1167 		// the file system and the device service.
   1168 		// Also note that the (const char *) is needed so a copy of
   1169 		// the string is made instead of just a pointer assignment.
   1170 		//
   1171 		prov_deps[0].repl_prov_desc = (const char *)id;
   1172 
   1173 		// Nest invocation requires own Environment
   1174 		Environment	e;
   1175 
   1176 #ifdef _FAULT_INJECTION
   1177 	//
   1178 	// When this fault is triggered, repl_srvr->register_with_rm is
   1179 	// called with a bad paramter.  The result of this is:
   1180 	// UserException: replica::invalid_dependency.
   1181 	//
   1182 	if (fault_triggered(FAULTNUM_PXFS_ADD_NOTIFY, NULL, NULL)) {
   1183 		repl_srvr->register_with_rm(1, NULL, &prov_deps, true, e);
   1184 	} else
   1185 #endif
   1186 		repl_srvr->register_with_rm(1, &serv_deps, &prov_deps, true, e);
   1187 		if (e.exception()) {
   1188 			//
   1189 			// Clean up callback registration
   1190 			//
   1191 			repl_srvr->upgrade_callback_unregister();
   1192 
   1193 			//
   1194 			// Rather than impact node startup by throwing an
   1195 			// exception and returning, we just continue on
   1196 			// with mounting the filesystem.  Just because
   1197 			// a server replica for this filesystem didn't
   1198 			// start here is no reason to interfere with the
   1199 			// global mount.
   1200 			//
   1201 #ifdef DEBUG
   1202 			e.exception()->print_exception(
   1203 			    "failed to register:");
   1204 #endif
   1205 			e.clear();
   1206 
   1207 			MOUNT_DBPRINTF(
   1208 			    MOUNT_TRACE_CLIENT,
   1209 			    MOUNT_RED,
   1210 			    ("client:add_notify_v1 failed RM reg"
   1211 			    " %s mount point %s nodeid %s\n",
   1212 			    (const char *)ma.spec, (const char *)ma.dir, id));
   1213 
   1214 			os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG,
   1215 			    NULL, NULL);
   1216 			//
   1217 			// SCMSGS
   1218 			// @explanation
   1219 			// Filesystem availability may be lessened due to
   1220 			// reduced component redundancy.
   1221 			// @user_action
   1222 			// Check the device.
   1223 			//
   1224 			(void) msg.log(SC_SYSLOG_WARNING, MESSAGE,
   1225 			    "mount_client_impl::add_notify() "
   1226 			    "failed to start filesystem replica for "
   1227 			    "%s at mount point %s    nodeid %s",
   1228 			    (const char *)ma.spec, (const char *)ma.dir, id);
   1229 		}
   1230 	}
   1231 
   1232 	//
   1233 	// Now that we have the locked mount point vnode,
   1234 	// the rest of the work is common.
   1235 	// Note that we transfer our hold on vp to the vfs_t.
   1236 	//
   1237 	add_notify_common_v1(ma, mntoptions, fsptr, fsinfo, vp);
   1238 }
   1239 
   1240 //
   1241 // Respond to a request from the VFS list server to remove a file system from
   1242 // this node's list.
   1243 // Note: we assume that the caller has globally locked the relevant mountpoint.
   1244 //
   1245 // Note: this routine can be called multiple times if the primary
   1246 // fails over and the new primary retries this operation so
   1247 // this operation must be idempotent.
   1248 //
   1249 void
   1250 mount_client_impl::remove_notify(const char *mountpoint,
   1251     const char *, bool, Environment &_environment)
   1252 {
   1253 	CL_PANIC(0);
   1254 }
   1255 
   1256 //
   1257 // Respond to a request from the VFS list server to remove a file system from
   1258 // this node's list.
   1259 // Note: we assume that the caller has globally locked the relevant mountpoint.
   1260 //
   1261 // Note: this routine can be called multiple times if the primary
   1262 // fails over and the new primary retries this operation so
   1263 // this operation must be idempotent.
   1264 //
   1265 // This supports forced unmount.
   1266 //
   1267 void
   1268 mount_client_impl::remove_notify_1(const char *mountpoint, bool unlink_vfs,
   1269     Environment &_environment)
   1270 {
   1271 	unmount_lock.lock();
   1272 	if ((unmount_pxvfs_v1_p == NULL) || _environment.is_orphan()) {
   1273 		unmount_lock.unlock();
   1274 		//
   1275 		// This should be a retry after the mount server failed
   1276 		// over to another node.
   1277 		//
   1278 		return;
   1279 	}
   1280 
   1281 	remove_notify_v1(mountpoint, unlink_vfs);
   1282 }
   1283 
   1284 //
   1285 // remove_notify_v1
   1286 //
   1287 // Respond to a request from the VFS list server to remove a file system from
   1288 // this node's list.
   1289 //
   1290 // Note: we assume that the caller has globally locked the relevant mountpoint.
   1291 //
   1292 // Note: this routine can be called multiple times if the primary
   1293 // fails over and the new primary retries this operation so
   1294 // this operation must be idempotent.
   1295 //
   1296 void
   1297 mount_client_impl::remove_notify_v1(const char *mountpoint, bool unlink_vfs)
   1298 {
   1299 	//
   1300 	// Note: if the mount server calls us and then fails before sending
   1301 	// a checkpoint to its secondary, the new primary could call us
   1302 	// "at the same time". In order to avoid using a mutex here,
   1303 	// we set unmount_pxvfsp now to make the race window small.
   1304 	//
   1305 	pxvfs	*pxvfsp = unmount_pxvfs_v1_p;
   1306 	unmount_pxvfs_v1_p = NULL;
   1307 	unmount_lock.unlock();
   1308 
   1309 	vfs_t	*vfsp = pxvfsp->get_vfsp();
   1310 	ASSERT(vfsp != NULL);
   1311 
   1312 	MOUNT_DBPRINTF(
   1313 	    MOUNT_TRACE_CLIENT,
   1314 	    MOUNT_AMBER,
   1315 	    ("client:remove_notify_v1 %s pxvfsp %p\n",
   1316 	    mountpoint, pxvfsp));
   1317 
   1318 	//
   1319 	// Release the unmount lock and wake up any sleepers waiting to create
   1320 	// pxfobjs.
   1321 	//
   1322 	pxvfsp->unmount_succeeded();
   1323 
   1324 	//
   1325 	// Remove vfsp from the vfs list and release it.
   1326 	//
   1327 	if (unlink_vfs) {
   1328 		vnode_t		*coveredvp = vfsp->vfs_vnodecovered;
   1329 		ASSERT(coveredvp != NULL);
   1330 		VN_HOLD(coveredvp);
   1331 		vfs_remove(vfsp);
   1332 		vn_vfsunlock(coveredvp);
   1333 		VN_RELE(coveredvp);
   1334 	}
   1335 }
   1336 
   1337 //
   1338 // Remove a file system from the name space.
   1339 // This should be called if this node is not the server for the file
   1340 // system being removed or it is not the last replica (is_ha_repl is true).
   1341 //
   1342 // Note: this routine can be called multiple times if the primary
   1343 // fails over and the new primary retries this operation so
   1344 // this operation must be idempotent.
   1345 //
   1346 void
   1347 mount_client_impl::remove_client(const char *, const char *spec,
   1348     bool is_ha_repl, const char *, fs::filesystem_ptr fsptr,
   1349     solobj::cred_ptr credobj, Environment &_environment)
   1350 {
   1351 	CL_PANIC(0);
   1352 }
   1353 
   1354 //
   1355 // Remove a file system from the name space.
   1356 // This should be called if this node is not the server for the file
   1357 // system being removed or it is not the last replica (is_ha_repl is true).
   1358 //
   1359 // Note: this routine can be called multiple times if the primary
   1360 // fails over and the new primary retries this operation so
   1361 // this operation must be idempotent.
   1362 //
   1363 void
   1364 mount_client_impl::remove_client_v1(const char *, const char *spec,
   1365     bool is_ha_repl, const char *, pxfs_v1::filesystem_ptr fsptr,
   1366     solobj::cred_ptr credobj, Environment &_environment)
   1367 {
   1368 	//
   1369 	// Obtain the proxy vfs struct for fs.
   1370 	//
   1371 	pxvfs	*pxvfsp =
   1372 	    pxvfs::find_pxvfs(fsptr, NULL);
   1373 	if (pxvfsp == NULL) {
   1374 		// Must be a retry or was unmounted before we got here.
   1375 		return;
   1376 	}
   1377 
   1378 	vfs_t	*vfsp = pxvfsp->get_vfsp();
   1379 	ASSERT(vfsp != NULL);
   1380 
   1381 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
   1382 		// Release the hold we got from find_pxvfs().
   1383 		VFS_RELE(vfsp);
   1384 
   1385 		// Couldn't get the covered mount point lock.
   1386 		pxfslib::throw_exception(_environment, EBUSY);
   1387 		return;
   1388 	}
   1389 
   1390 	cred_t	*credp = solobj_impl::conv(credobj);
   1391 	if (pxvfsp->purge_caches(true, credp)) {
   1392 		vn_vfsunlock(vfsp->vfs_vnodecovered);
   1393 
   1394 		// Release the hold we got from find_pxvfs().
   1395 		VFS_RELE(vfsp);
   1396 
   1397 		pxfslib::throw_exception(_environment, EBUSY);
   1398 		return;
   1399 	}
   1400 
   1401 	//
   1402 	// Release the fsmgr_client/pxvfs binding.
   1403 	//
   1404 	pxvfsp->unmount_succeeded();
   1405 
   1406 	//
   1407 	// Remove vfsp from the vfs list and release it.
   1408 	//
   1409 	vnode_t		*coveredvp = vfsp->vfs_vnodecovered;
   1410 	ASSERT(coveredvp != NULL);
   1411 	VN_HOLD(coveredvp);
   1412 	vfs_remove(vfsp);
   1413 	vn_vfsunlock(coveredvp);
   1414 	VN_RELE(coveredvp);
   1415 
   1416 	// Release the hold for unmount_pxvfsp we got from find_pxvfs().
   1417 	VFS_RELE(vfsp);
   1418 
   1419 	char name[20];
   1420 
   1421 	//
   1422 	// Shutdown replica if needed.
   1423 	//
   1424 	if (is_ha_repl) {
   1425 		replica::service_admin_var	sa =
   1426 		    pxfslib::get_service_admin_ref(
   1427 			"mount_client_impl::remove_client", spec,
   1428 			_environment);
   1429 		if (_environment.exception()) {
   1430 			//
   1431 			// Need to shut down this replica but
   1432 			// can't get the service_admin object to do it.
   1433 			//
   1434 			// The only reason get_service_admin_ref()
   1435 			// should fail is if service
   1436 			// registration failed/never occurred,
   1437 			// or if the RM is in the process
   1438 			// of shutting down the service.
   1439 			//
   1440 #ifdef DEBUG
   1441 			_environment.exception()->print_exception(
   1442 			    "mount_client_impl::remove_client "
   1443 			    "get_service_admin_ref()"); // XXX
   1444 #endif
   1445 			    MOUNT_DBPRINTF(
   1446 			    MOUNT_TRACE_CLIENT,
   1447 			    MOUNT_AMBER,
   1448 			    ("client:remove_client_v1 failed get_service_admin "
   1449 			    "spec %s\n",
   1450 			    spec));
   1451 			_environment.clear();
   1452 		} else {
   1453 			os::sprintf(name, "%d", orb_conf::node_number());
   1454 			sa->change_repl_prov_status(name,
   1455 			    replica::SC_REMOVE_REPL_PROV, true, _environment);
   1456 			if (_environment.exception()) {
   1457 				os::sc_syslog_msg msg(
   1458 				    SC_SYSLOG_GLOBAL_MOUNT_TAG,
   1459 				    NULL, NULL);
   1460 				//
   1461 				// SCMSGS
   1462 				// @explanation
   1463 				// The system was unable to remove a PXFS
   1464 				// replica on the node that this message was
   1465 				// seen.
   1466 				// @user_action
   1467 				// Contact your authorized Sun service
   1468 				// provider to determine whether a workaround
   1469 				// or patch is available.
   1470 				//
   1471 				(void) msg.log(SC_SYSLOG_WARNING, MESSAGE,
   1472 				    "mount_client_impl::remove_client()"
   1473 				    " failed attempted"
   1474 				    " RM change_repl_prov_status() to"
   1475 				    " remove client,"
   1476 				    " spec %s, name %s",
   1477 				    spec, name);
   1478 				_environment.clear();
   1479 			}
   1480 		}
   1481 	}
   1482 }
   1483 
   1484 //
   1485 // Receive notification from server that proxy file system
   1486 // flags need to be set; used to implement remounts.
   1487 // It also unlocks the mount point and updates /etc/mnttab.
   1488 //
   1489 // Note: this routine can be called multiple times if the primary
   1490 // fails over and the new primary retries this operation so
   1491 // this operation must be idempotent.
   1492 //
   1493 void
   1494 mount_client_impl::set_flags(const sol::mounta &, const char *mntoptions,
   1495     fs::filesystem_ptr fsptr, uint32_t vfsflags, Environment &_environment)
   1496 {
   1497 	CL_PANIC(0);
   1498 }
   1499 
   1500 //
   1501 // Receive notification from server that proxy file system
   1502 // flags need to be set; used to implement remounts.
   1503 // It also unlocks the mount point and updates /etc/mnttab.
   1504 //
   1505 // Note: this routine can be called multiple times if the primary
   1506 // fails over and the new primary retries this operation so
   1507 // this operation must be idempotent.
   1508 //
   1509 void
   1510 mount_client_impl::set_flags_v1(const sol::mounta &, const char *mntoptions,
   1511     pxfs_v1::filesystem_ptr fsptr, uint32_t vfsflags, Environment &_environment)
   1512 {
   1513 	//
   1514 	// If the mount point vnode is not set, this is a retry from a new
   1515 	// primary.
   1516 	//
   1517 	mount_lock.lock();
   1518 	if (mntvp == NULL || _environment.is_orphan()) {
   1519 		mount_lock.unlock();
   1520 		return;
   1521 	}
   1522 
   1523 	vnode_t		*vp = mntvp;
   1524 	mntvp = NULL;
   1525 	mount_lock.unlock();
   1526 
   1527 	pxvfs	*pxvfsp = pxvfs::find_pxvfs(fsptr, NULL);
   1528 	// XXX Same bug possible as prepare_unmount().
   1529 	ASSERT(pxvfsp != NULL);
   1530 	pxvfsp->set_mntoptions(mntoptions);
   1531 	vfs_t	*vfsp = pxvfsp->get_vfsp();
   1532 
   1533 	// Release the hold we got from find_pxvfs()
   1534 	VFS_RELE(vfsp);
   1535 	ASSERT(vfsp != NULL);
   1536 
   1537 	vfsp->vfs_flag = vfsflags;
   1538 	vfsp->vfs_mtime = ddi_get_time();
   1539 
   1540 	//
   1541 	// Free the old mount options table and add a new one - then
   1542 	// fill the new table with the options.
   1543 	// Update the mnttab modification time. The function
   1544 	// vfs_mnttab_modtimeupd is declared static in vfs.c for
   1545 	// Solaris 8 and 9. For Solaris 10, it is global.
   1546 	//
   1547 #ifdef GLOBAL_MNTTAB_MODTIME_INTERFACE
   1548 	vfs_list_lock();
   1549 	vfs_createopttbl(&vfsp->vfs_mntopts, mntoptions);
   1550 	vfs_parsemntopts(&vfsp->vfs_mntopts, (char *)mntoptions, 1);
   1551 	vfs_mnttab_modtimeupd();
   1552 	vfs_list_unlock();
   1553 #else
   1554 	vfs_createopttbl(&vfsp->vfs_mntopts, mntoptions);
   1555 	vfs_parsemntopts(&vfsp->vfs_mntopts, (char *)mntoptions, 1);
   1556 	gethrestime(&vfs_mnttab_mtime);
   1557 #endif
   1558 
   1559 	// Unlock the mount point.
   1560 	vn_vfsunlock(vp);
   1561 	VN_RELE(vp);
   1562 }
   1563 
   1564 //
   1565 // Register the mount client with the mount server.
   1566 // This is called by _cladm(CL_INITIALIZE, CL_GBLMNT_ENABLE).
   1567 // The return value is the errno value for the system call.
   1568 //
   1569 int
   1570 mount_client_impl::activate()
   1571 {
   1572 	//
   1573 	// Check to make sure clexecd is running.
   1574 	//
   1575 	char				name[20];
   1576 	Environment			e;
   1577 
   1578 	repl_pxfs::ha_mounter_var	mounter;
   1579 	naming::naming_context_var	ctxp = ns::root_nameserver();
   1580 
   1581 	os::sprintf(name, "ha_mounter.%d", orb_conf::node_number());
   1582 
   1583 	CORBA::Object_var		obj = ctxp->resolve(name, e);
   1584 	if (e.exception() == NULL) {
   1585 		mounter = repl_pxfs::ha_mounter::_narrow(obj);
   1586 	}
   1587 	if ((e.exception() != NULL) || CORBA::is_nil(mounter)) {
   1588 		if (e.exception() != NULL) {
   1589 			CLEXEC_EXCEPTION(e, "mount_client_impl::activate",
   1590 			    "ha_mounter");
   1591 			e.clear();
   1592 		} else {
   1593 			CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_RED,
   1594 			    ("mount_client_impl::activate:%s"
   1595 			    " CORBA::is_nil(mounter)\n", name));
   1596 		}
   1597 		MOUNT_DBPRINTF(
   1598 		    MOUNT_TRACE_CLIENT,
   1599 		    MOUNT_RED,
   1600 		    ("client:activate: resolve %s failed\n",
   1601 		    name));
   1602 		return (EAGAIN);
   1603 	}
   1604 
   1605 	bool alive = mounter->is_alive(e);
   1606 
   1607 	CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_GREEN,
   1608 	    ("mount_client_impl::activate:%s alive %d except %p\n",
   1609 	    name, (int)alive, e.exception()));
   1610 
   1611 	if (e.exception() != NULL || !alive) {
   1612 		//
   1613 		// XXX There is currently a bug where low memory can return
   1614 		// EAGAIN from the xdoor upcall. We attempt recovery here
   1615 		// for now.
   1616 		//
   1617 		CORBA::SystemException *exp =
   1618 		    CORBA::SystemException::_exnarrow(e.exception());
   1619 		CLEXEC_EXCEPTION(e, "mount_client_impl::activate", "is_alive");
   1620 		if (exp != NULL && exp->_minor() == EAGAIN) {
   1621 			e.clear();
   1622 			alive = mounter->is_alive(e);
   1623 			CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_AMBER,
   1624 			    ("mount_client_impl::activate:%s alive %d exp %p\n",
   1625 			    name, (int)alive, e.exception()));
   1626 			if (e.exception() != NULL || !alive) {
   1627 				CLEXEC_EXCEPTION(e,
   1628 				    "mount_client_impl::activate", "is_alive");
   1629 				e.clear();
   1630 				MOUNT_DBPRINTF(
   1631 				    MOUNT_TRACE_CLIENT,
   1632 				    MOUNT_RED,
   1633 				    ("client:activate: 1: "
   1634 				    "clexecd does not appear to be running\n"));
   1635 				return (EAGAIN);
   1636 			}
   1637 		} else {
   1638 			if (e.exception() != NULL) {
   1639 #ifdef DEBUG
   1640 				// XXX
   1641 				e.exception()->print_exception(
   1642 				    "mount_client_impl::activate: ");
   1643 #endif
   1644 				MOUNT_DBPRINTF(
   1645 				    MOUNT_TRACE_CLIENT,
   1646 				    MOUNT_RED,
   1647 				    ("client:activate "
   1648 				    "failed mounter::is_alive\n"));
   1649 			}
   1650 			e.clear();
   1651 			MOUNT_DBPRINTF(
   1652 			    MOUNT_TRACE_CLIENT,
   1653 			    MOUNT_RED,
   1654 			    ("client:activate: "
   1655 			    "clexecd does not appear to be running\n"));
   1656 			CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_RED,
   1657 			    ("mount_client_impl::activate %s"
   1658 			    " clexecd does not appear to be running\n",
   1659 			    name));
   1660 			return (EAGAIN);
   1661 		}
   1662 	}
   1663 
   1664 	//
   1665 	// Ideally, there should be checks here that verify that the
   1666 	// prerequisites for establishing the global name space have been
   1667 	// satisfied.  However, this would amount to checking that the mount
   1668 	// point(s) into which the base global mount(s) would be done exist,
   1669 	// which is infeasible to do here.
   1670 	//
   1671 
   1672 	//
   1673 	// Ensure idempotence.
   1674 	//
   1675 	mount_client_lock.lock();
   1676 	if (this_mount_client != NULL) {
   1677 		mount_client_lock.unlock();
   1678 		return (0);
   1679 	}
   1680 
   1681 	//
   1682 	// Get the global mount server object.
   1683 	//
   1684 	replica::service_admin_var	sa =
   1685 	    pxfslib::get_service_admin_ref(
   1686 	    "mount_client_impl::startup", "mount", e);
   1687 	if (e.exception()) {
   1688 		mount_client_lock.unlock();
   1689 		e.clear();
   1690 		return (EIO);
   1691 	}
   1692 	obj = sa->get_root_obj(e);
   1693 	if (e.exception()) {
   1694 		mount_client_lock.unlock();
   1695 		e.clear();
   1696 		return (EIO);
   1697 	}
   1698 
   1699 	mount_client_impl	*mcp = new mount_client_impl();
   1700 	mcp->server = fs::mount_server::_narrow(obj);
   1701 	ASSERT(!CORBA::is_nil(mcp->server));
   1702 
   1703 	//
   1704 	// Register with mount_server.
   1705 	//
   1706 	fs::mount_client_var	clientv = mcp->get_objref();
   1707 	mcp->server->add_client(clientv, orb_conf::node_number(),
   1708 	    mcp->keepalive, e);
   1709 	CORBA::Exception	*ex = e.exception();
   1710 	if (ex != NULL) {
   1711 		int	error;
   1712 
   1713 		//
   1714 		// Note: mcp will be deleted when
   1715 		// mount_client_impl::_unreferenced() is called.
   1716 		//
   1717 		mount_client_lock.unlock();
   1718 
   1719 		fs::mount_server::mount_err	*merrp =
   1720 		    fs::mount_server::mount_err::_exnarrow(ex);
   1721 		if (merrp != NULL) {
   1722 			error = merrp->error;
   1723 			MOUNT_DBPRINTF(
   1724 			    MOUNT_TRACE_CLIENT,
   1725 			    MOUNT_RED,
   1726 			    ("client:activate add_client failed %s\n",
   1727 			    (const char *)merrp->mntpnt));
   1728 		} else {
   1729 			error = pxfslib::get_err(e);
   1730 			MOUNT_DBPRINTF(
   1731 			    MOUNT_TRACE_CLIENT,
   1732 			    MOUNT_RED,
   1733 			    ("client:activate add_client failed\n"));
   1734 			e.exception()->print_exception(
   1735 			    "mount_client_impl::activate"); // XXX
   1736 		}
   1737 		e.clear();
   1738 		return (error);
   1739 	}
   1740 
   1741 	this_mount_client = mcp;
   1742 	mount_client_lock.unlock();
   1743 
   1744 	return (0);
   1745 }
   1746 
   1747 
   1748 //
   1749 // Return true if activate() has been called.
   1750 //
   1751 bool
   1752 mount_client_impl::is_activated()
   1753 {
   1754 	return (this_mount_client != NULL);
   1755 }
   1756 
   1757 //
   1758 // Common code for add_notify_locked_v1() and add_notify_v1().
   1759 //
   1760 void
   1761 mount_client_impl::add_notify_common_v1(const sol::mounta &ma,
   1762     const char *mntoptions,
   1763     pxfs_v1::filesystem_ptr fsptr, const pxfs_v1::fs_info &fsinfo,
   1764     vnode_t *coveredvp)
   1765 {
   1766 	//
   1767 	// Find or create a local pxvfs struct to act as a proxy for fs.
   1768 	//
   1769 	pxvfs	*pxvfsp = pxvfs::find_pxvfs(fsptr, &fsinfo);
   1770 	ASSERT(pxvfsp != NULL);
   1771 	pxvfsp->set_mntoptions(mntoptions);
   1772 	vfs_t	*vfsp = pxvfsp->get_vfsp();
   1773 	ASSERT(vfsp != NULL);
   1774 
   1775 	MOUNT_DBPRINTF(
   1776 	    MOUNT_TRACE_CLIENT,
   1777 	    MOUNT_GREEN,
   1778 	    ("client:add_notify_common_v1 add %s pxvfsp %p\n",
   1779 	    (const char *)ma.dir, pxvfsp));
   1780 
   1781 	//
   1782 	// The vfs-specific fields should be initialized unless we
   1783 	// switch to a lazy propagation of vfs list changes scheme.
   1784 	// Our current algorithm immediately ("eagerly") pushes vfs list
   1785 	// changes to the client, so we require
   1786 	// find_pxvfs() to fully initialize the vfs struct.
   1787 	//
   1788 	ASSERT(vfsp->vfs_fstype != 0);
   1789 
   1790 	//
   1791 	// Lock the vfs_t similar to domount().
   1792 	// We should always get the lock since this vfs_t is newly created.
   1793 	//
   1794 	int	error = vfs_lock(vfsp);
   1795 	ASSERT(error == 0);
   1796 
   1797 	// Check if this mount should not be visible via /etc/mnttab
   1798 	if (ma.flags & MS_NOMNTTAB) {
   1799 		vfsp->vfs_flag |= VFS_NOMNTTAB;
   1800 	} else {
   1801 		vfsp->vfs_flag &= ~VFS_NOMNTTAB;
   1802 	}
   1803 
   1804 	//
   1805 	// Initialize values for /etc/mnttab on this node.
   1806 	//
   1807 	if ((const char *)ma.spec == NULL || *(const char *)ma.spec == '\0') {
   1808 		vfs_setresource(vfsp, VFS_NORESOURCE);
   1809 	} else {
   1810 		vfs_setresource(vfsp, (const char *)ma.spec);
   1811 	}
   1812 	vfs_setmntpoint(vfsp, (const char *)ma.dir);
   1813 	// XXX Use global time?
   1814 	vfsp->vfs_mtime = ddi_get_time();
   1815 	vfs_createopttbl(&vfsp->vfs_mntopts, mntoptions);
   1816 	vfs_parsemntopts(&vfsp->vfs_mntopts, (char *)mntoptions, 1);
   1817 	//
   1818 	// Hook vfsp into the local vfs list.
   1819 	// Note: we don't need to have a cluster-wide lock held
   1820 	// on the vfs list because we expect the global mount point
   1821 	// locking to prevent any mount races on this mount point.
   1822 	//
   1823 	VFS_HOLD(vfsp);
   1824 	vfs_list_add(vfsp);
   1825 
   1826 	//
   1827 	// Splice the mount into the name space by setting v_vfsmountedhere
   1828 	// in the covered vnode.
   1829 	// Note that we transfer the hold on coveredvp to v_vfsmountedhere.
   1830 	//
   1831 	ASSERT(!vn_ismntpt(coveredvp));
   1832 	coveredvp->v_vfsmountedhere = vfsp;
   1833 	vfsp->vfs_vnodecovered = coveredvp;
   1834 
   1835 	// Release the hold we got from find_pxvfs().
   1836 	VFS_RELE(vfsp);
   1837 
   1838 	vfs_unlock(vfsp);
   1839 	vn_vfsunlock(coveredvp);
   1840 }
   1841 
   1842 //
   1843 // Notify intent to mount a device.
   1844 // Device is "locked" until it is either mounted or the requesting node dies.
   1845 //
   1846 int
   1847 mount_client_impl::devlock(int cmd, struct pathname *devpnp)
   1848 {
   1849 	//
   1850 	// Verify that the mount client is already active, returning
   1851 	// failure if it isn't.
   1852 	//
   1853 	if (!is_activated()) {
   1854 		return (ENODEV);
   1855 	}
   1856 
   1857 	Environment	e;
   1858 
   1859 	//
   1860 	// If this is a request to unlock the device, unlock it.
   1861 	//
   1862 	if (cmd == CL_GBLMNT_UNLOCK) {
   1863 		get_server()->devunlock(devpnp->pn_path, e);
   1864 		return (pxfslib::get_err(e));
   1865 	}
   1866 
   1867 	//
   1868 	// We have to save a copy of the path name since lookuppn() will
   1869 	// clobber it.
   1870 	//
   1871 	char	*spec = os::strcpy(new char [devpnp->pn_pathlen + 1],
   1872 	    devpnp->pn_path);
   1873 
   1874 	//
   1875 	// We try to determine if this node has a local connection to the device
   1876 	// (i.e., it can be a device replica). If we determine its not
   1877 	// local, return an error without trying to get the lock since
   1878 	// there is no point in attempting to start the service until one
   1879 	// of the nodes that can be a replica boots.
   1880 	//
   1881 	vnode_t		*vp;
   1882 	int		error = lookuppn(devpnp, NULL, FOLLOW, NULL, &vp);
   1883 	if (error == 0) {
   1884 		// Check for a PXFS special file.
   1885 		if (vp->v_flag & VPXFS) {
   1886 			//
   1887 			// Contact DCS to get the list of nodes that this
   1888 			// device is attached to and whether or not its an
   1889 			// HA device.
   1890 			//
   1891 			bool			dev_is_ha;
   1892 			CORBA::String_var	dev_name;
   1893 			sol::nodeid_seq_t_var	dev_nids;
   1894 
   1895 			error = dcs_get_configured_nodes(vp->v_rdev,
   1896 			    fs::dc_callback::_nil(),
   1897 			    dev_is_ha, dev_name, dev_nids);
   1898 			if (error == 0) {
   1899 				uint32_t i, n = dev_nids->length();
   1900 				for (i = 0; i < n; i++) {
   1901 					if (dev_nids[i] ==
   1902 					    orb_conf::node_number()) {
   1903 						break;
   1904 					}
   1905 				}
   1906 				if (i == n) {
   1907 					//
   1908 					// We didn't find a local connection.
   1909 					//
   1910 					VN_RELE(vp);
   1911 					delete [] spec;
   1912 					return (ENXIO);
   1913 				}
   1914 			}
   1915 		}
   1916 		VN_RELE(vp);
   1917 	}
   1918 
   1919 	// Try to get the lock.
   1920 	fs::mount_client_var	clientv = get_client_ref();
   1921 
   1922 	bool log_message = B_TRUE;
   1923 	sol::nodeid_t lock_owner = 0;
   1924 	os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, NULL, NULL);
   1925 	do {
   1926 		get_server()->devlock(clientv, orb_conf::node_number(),
   1927 		    spec, e);
   1928 		error = pxfslib::get_err(e);
   1929 		e.clear();
   1930 
   1931 		if (error == ETIMEDOUT && log_message) {
   1932 			//
   1933 			// Try to get the id of the node holding the lock
   1934 			// If we don't find it, it would be because it was
   1935 			// released. We ignore this.
   1936 			//
   1937 			get_server()->get_devlock_owner(spec, lock_owner, e);
   1938 			if (lock_owner) {
   1939 				// Log an error message once.
   1940 				char nodename[CL_MAX_LEN + 1];
   1941 				clconf_get_nodename(lock_owner, nodename);
   1942 				//
   1943 				// SCMSGS
   1944 				// @explanation
   1945 				// Sun Cluster boot is waiting for the
   1946 				// mentioned node to complete fsck/mount of a
   1947 				// global filesystem and release the lock on a
   1948 				// device.
   1949 				// @user_action
   1950 				// Check the console of the specified cluster
   1951 				// node to see if any of the nodes are waiting
   1952 				// for a manual fsck to be done. If this is
   1953 				// so, exiting the shell after performing the
   1954 				// fsck will allow the boot of the other nodes
   1955 				// to continue.
   1956 				//
   1957 				(void) msg.log(SC_SYSLOG_NOTICE, MESSAGE,
   1958 				    "Sun Cluster is waiting for lock on device "
   1959 				    "%s. Lock is currently held by %s for "
   1960 				    "fsck/mount.",
   1961 				    spec, nodename);
   1962 				log_message = B_FALSE;
   1963 			}
   1964 		}
   1965 	} while (error == EAGAIN || error == ETIMEDOUT);
   1966 
   1967 	if (log_message == B_FALSE) {
   1968 		//
   1969 		// Log a message that the wait is over.
   1970 		//
   1971 		if (error == 0) {
   1972 			//
   1973 			// SCMSGS
   1974 			// @explanation
   1975 			// Sun Cluster successfully obtained a lock on a
   1976 			// device to perform fsck/mount.
   1977 			// @user_action
   1978 			// This is an informational message, no user action is
   1979 			// needed.
   1980 			//
   1981 			(void) msg.log(SC_SYSLOG_NOTICE, MESSAGE,
   1982 			    "Lock on device %s obtained. Proceeding.",
   1983 			    spec);
   1984 		} else {
   1985 			//
   1986 			// SCMSGS
   1987 			// @explanation
   1988 			// Sun Cluster was unable to lock a device.
   1989 			// @user_action
   1990 			// Check the error returned for why this happened. In
   1991 			// cases like an interrupted system call, no user
   1992 			// action is required.
   1993 			//
   1994 			(void) msg.log(SC_SYSLOG_NOTICE, MESSAGE,
   1995 			    "Unable to lock device %s. Error (%s).",
   1996 			    spec, strerror(error));
   1997 		}
   1998 	}
   1999 
   2000 	delete [] spec;
   2001 	return (error);
   2002 }
   2003 
   2004 //
   2005 // Called from cladmin() to import all global mounts
   2006 // (i.e., "/usr/cluster/lib/sc/clconfig -g").
   2007 //
   2008 extern "C" int
   2009 pxfs_mount_client_enable(int cmd, int onoff)
   2010 {
   2011 	int	error;
   2012 
   2013 	switch (cmd) {
   2014 	case CL_GBLMNT_ENABLE:
   2015 		if (onoff) {
   2016 			error = device_service_mgr::activate();
   2017 			if (error == 0)
   2018 				error = mount_client_impl::activate();
   2019 			return (error);
   2020 		} else {
   2021 			// cladmin returns ENOTSUP
   2022 			ASSERT(0);
   2023 			return (0);
   2024 		}
   2025 	case CL_SWITCHBACK_ENABLE:
   2026 		return (device_service_mgr::do_switchbacks());
   2027 
   2028 	default:
   2029 		return (EINVAL);
   2030 	}
   2031 }
   2032 
   2033 //
   2034 // Called from cladmin() to lock devices so they aren't fsck'ed by different
   2035 // nodes at the same time (i.e., "/usr/cluster/lib/sc/clconfig -m devname").
   2036 //
   2037 extern "C" int
   2038 pxfs_mount_client_lock(int cmd, struct pathname *devpnp)
   2039 {
   2040 	return (mount_client_impl::devlock(cmd, devpnp));
   2041 }
   2042 
   2043 //
   2044 // Called after the pxfs loadable module has been loaded.
   2045 //
   2046 sol::error_t
   2047 pxfs_mount_client_startup()
   2048 {
   2049 	extern int (*pxfs_mount_client_enable_ptr)(int cmd, int onoff);
   2050 	extern int (*pxfs_mount_client_lock_ptr)(int cmd,
   2051 	    struct pathname *devpnp);
   2052 	pxfs_mount_client_enable_ptr = pxfs_mount_client_enable;
   2053 	pxfs_mount_client_lock_ptr = pxfs_mount_client_lock;
   2054 	return (0);
   2055 }
   2056 
   2057 //
   2058 // Called before the pxfs loadable module is unloaded.
   2059 //
   2060 sol::error_t
   2061 pxfs_mount_client_shutdown()
   2062 {
   2063 	return (mount_client_impl::is_activated() ? EBUSY : 0);
   2064 }
   2065