1 // 2 // CDDL HEADER START 3 // 4 // The contents of this file are subject to the terms of the 5 // Common Development and Distribution License (the License). 6 // You may not use this file except in compliance with the License. 7 // 8 // You can obtain a copy of the license at usr/src/CDDL.txt 9 // or http://www.opensolaris.org/os/licensing. 10 // See the License for the specific language governing permissions 11 // and limitations under the License. 12 // 13 // When distributing Covered Code, include this CDDL HEADER in each 14 // file and include the License file at usr/src/CDDL.txt. 15 // If applicable, add the following below this CDDL HEADER, with the 16 // fields enclosed by brackets [] replaced with your own identifying 17 // information: Portions Copyright [yyyy] [name of copyright owner] 18 // 19 // CDDL HEADER END 20 // 21 22 // 23 // Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 // Use is subject to license terms. 25 // 26 27 #pragma ident "@(#)mount_client_impl.cc 1.106 08/05/20 SMI" 28 29 #include <sys/errno.h> 30 #include <sys/strerror.h> 31 32 #include <sys/types.h> 33 #include <sys/thread.h> 34 #include <sys/file.h> 35 #include <sys/pathname.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/mount.h> 39 #include <sys/dnlc.h> 40 41 #include <h/naming.h> 42 #include <sys/cladm_int.h> 43 #include <sys/cladm_debug.h> 44 #include <sys/sol_conv.h> 45 #include <solobj/solobj_impl.h> 46 #include <nslib/ns.h> 47 #include <sys/vm_util.h> 48 49 #include <h/repl_pxfs.h> 50 #include <pxfs/common/pxfslib.h> 51 #include <pxfs/mount/mount_client_impl.h> 52 #include <pxfs/mount/mount_debug.h> 53 #include <pxfs/device/device_service_mgr.h> 54 #include <pxfs/server/fs_impl.h> 55 #include <pxfs/server/repl_pxfs_server.h> 56 #include <pxfs/client/pxvfs.h> 57 58 #ifndef VXFS_DISABLED 59 #include <pxfs/server/vxfs_dependent_impl.h> 60 #endif 61 62 // 63 // For update of the mnttab modification time. The function 64 // vfs_mnttab_modtimeupd is declared static in vfs.c for 65 // Solaris 8 and 9. For Solaris 10, it is global. 66 // 67 #include <sys/sol_version.h> 68 #if SOL_VERSION >= __s10 69 #define GLOBAL_MNTTAB_MODTIME_INTERFACE 70 #else 71 extern timespec_t vfs_mnttab_mtime; 72 #endif 73 74 //lint -e1512 75 // 76 // Warning(1512) destructor for base class is not virtual -- In a 77 // final pass through all the classes, we have found a class that is 78 // the base class of a derivation and has a destructor but the 79 // destructor is not virtual. It is conventional for inherited classes 80 // to have virtual destructors so that is it safe to 'delete' a 81 // pointer to a base class. 82 // 83 // Ths classes prov_common_iter and prov_common_setin the file 84 // prov_common.h have to be changed to have virtual destructors. 85 // 86 87 88 // Static data member initialization. 89 mount_client_impl *mount_client_impl::this_mount_client = NULL; 90 os::mutex_t mount_client_impl::mount_client_lock; 91 92 mount_client_impl::mount_client_impl() : 93 mntvp(NULL), 94 unmount_pxvfs_v1_p(NULL) 95 { 96 } 97 98 mount_client_impl::~mount_client_impl() 99 { 100 if (mntvp != NULL) { 101 VN_RELE(mntvp); 102 mntvp = NULL; // for lint 103 } 104 if (unmount_pxvfs_v1_p != NULL) { 105 VFS_RELE(unmount_pxvfs_v1_p->get_vfsp()); 106 unmount_pxvfs_v1_p = NULL; // for lint 107 } 108 } 109 110 // 111 // This is called when all CORBA references are released. 112 // 113 void 114 mount_client_impl::_unreferenced(unref_t arg) 115 { 116 if (!_last_unref(arg)) { 117 // _last_unref() should always be true since we don't use 0->1. 118 ASSERT(0); 119 return; 120 } 121 122 // 123 // If _unreferenced() is called because activate() failed 124 // then delete this object. If for some reason the mount server 125 // crashes and we get unreferenced, we don't panic when referencing 126 // "this_mount_client". 127 // 128 if (CORBA::is_nil(keepalive)) { 129 delete this; 130 } 131 } 132 133 // 134 // This is called to get a new reference. Doing get_objref() here would 135 // get the highest reference version that was compiled. We want the 136 // highest reference which is currently committed so we use this indirect 137 // way. 138 // 139 void 140 mount_client_impl::_generic_method(CORBA::octet_seq_t &, 141 CORBA::object_seq_t &objs, Environment &e) 142 { 143 objs[0] = get_objref(); 144 } 145 146 // 147 // After a rolling upgrade commit, new mount server references should 148 // be used. Currently, this is only called by pxvfs::unmount for 149 // the new interface which supports forced unmount. 150 // 151 void 152 mount_client_impl::update_mount_server_ref() 153 { 154 Environment e; 155 CORBA::Object_var obj; 156 157 ASSERT(this_mount_client != NULL); 158 159 mount_client_lock.lock(); 160 replica::service_admin_var sa = pxfslib::get_service_admin_ref( 161 "mount_client_impl::startup", "mount", e); 162 if (e.exception()) { 163 mount_client_lock.unlock(); 164 e.clear(); 165 return; 166 } 167 obj = sa->get_root_obj(e); 168 if (e.exception()) { 169 mount_client_lock.unlock(); 170 e.clear(); 171 return; 172 } 173 this_mount_client->server = fs::mount_server::_narrow(obj); 174 mount_client_lock.unlock(); 175 } 176 177 // 178 // upgrade_mount_client - use the new version of the mount_server 179 // 180 void 181 mount_client_impl::upgrade_mount_client(fs::mount_server_ptr 182 mountserver_p, Environment &) 183 { 184 server = fs::mount_server::_duplicate(mountserver_p); 185 } 186 187 // 188 // After a rolling upgrade commit, this mount_server reference will 189 // will be released (in update_mount_server_ref() above). So we need 190 // to duplicate here to prevent the caller from using a bogus reference 191 // after the replacement done above. 192 // 193 fs::mount_server_ptr 194 mount_client_impl::get_server() 195 { 196 fs::mount_server_ptr mount_server_p; 197 198 ASSERT(this_mount_client != NULL); 199 200 mount_server_p = (fs::mount_server::_duplicate( 201 this_mount_client->server)); 202 203 return (mount_server_p); 204 } 205 206 // 207 // Return a new reference to the local mount client. 208 // 209 fs::mount_client_ptr 210 mount_client_impl::get_client_ref() 211 { 212 ASSERT(this_mount_client != NULL); 213 return (this_mount_client->get_objref()); 214 } 215 216 // 217 // Return a pointer to the local mount client. 218 // 219 mount_client_impl * 220 mount_client_impl::get_client() 221 { 222 ASSERT(this_mount_client != NULL); 223 return (this_mount_client); 224 } 225 226 // 227 // Lock the mount point and perform standard mount point checks. 228 // Return an exception if the lock fails. 229 // XXX should we locally lock vfs_t if this is for a remount? 230 // 231 // Note: this routine can be called multiple times if the primary 232 // fails over and the new primary retries this operation so 233 // this operation must be idempotent. 234 // 235 void 236 mount_client_impl::lock_mountpoint(const char *mountpoint, int32_t mntflags, 237 Environment &_environment) 238 { 239 mount_lock.lock(); 240 if (mntvp != NULL || _environment.is_orphan()) { 241 // 242 // The mount point is already locked by a call to us. 243 // This call should be a retry from a new primary or 244 // the global mount code has allowed two mounts (or remounts) 245 // to happen at the same time which shouldn't happen. 246 // If we modify the mount server to allow more than one 247 // mount point to be locked at the same time, we will 248 // need to use the mount point string to hash into a table. 249 // XXX We could save the mountpoint string to verify that 250 // it is the same as the previous call. 251 // 252 mount_lock.unlock(); 253 MOUNT_DBPRINTF( 254 MOUNT_TRACE_CLIENT, 255 MOUNT_AMBER, 256 ("client:lock_mountpoint retry %s\n", 257 mountpoint)); 258 return; 259 } 260 261 // Lookup the mount point path name to get a vnode pointer. 262 vnode_t *vp; 263 int error = lookupname((char *)mountpoint, UIO_SYSSPACE, FOLLOW, 264 NULL, &vp); 265 if (error != 0) { 266 mount_lock.unlock(); 267 MOUNT_DBPRINTF( 268 MOUNT_TRACE_CLIENT, 269 MOUNT_RED, 270 ("client:lock_mountpoint lookup %s error %d\n", 271 mountpoint, error)); 272 os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, NULL, NULL); 273 // 274 // SCMSGS 275 // @explanation 276 // While mounting a Cluster file system, the directory on 277 // which the mount is to take place could not be opened. 278 // @user_action 279 // Fix the reported error and retry. The most likely problem 280 // is that the directory does not exist - in that case, create 281 // it with the appropriate permissions and retry. 282 // 283 (void) msg.log(SC_SYSLOG_WARNING, MESSAGE, 284 "Could not mount '%s' because there was an error (%d) in " 285 "opening the directory.", mountpoint, error); 286 pxfslib::throw_exception(_environment, error); 287 return; 288 } 289 290 if (vn_vfswlock(vp) != 0) { 291 mount_lock.unlock(); 292 MOUNT_DBPRINTF( 293 MOUNT_TRACE_CLIENT, 294 MOUNT_RED, 295 ("client:lock_mountpoint vn_vfswlock %s\n", 296 mountpoint)); 297 pxfslib::throw_exception(_environment, EBUSY); 298 VN_RELE(vp); 299 return; 300 } 301 302 if (vp->v_flag & VNOMOUNT) { 303 mount_lock.unlock(); 304 MOUNT_DBPRINTF( 305 MOUNT_TRACE_CLIENT, 306 MOUNT_RED, 307 ("client:lock_mountpoint VNOMOUNT %s\n", 308 mountpoint)); 309 pxfslib::throw_exception(_environment, EINVAL); 310 vn_vfsunlock(vp); 311 VN_RELE(vp); 312 return; 313 } 314 315 // Make sure we are the only holder of the mount point. 316 dnlc_purge_vp(vp); 317 318 if (vn_ismntpt(vp) || 319 ((mntflags & (MS_REMOUNT | MS_OVERLAY)) == 0 && 320 (vp->v_count != 1 || (vp->v_flag & VROOT) != 0))) { 321 mount_lock.unlock(); 322 MOUNT_DBPRINTF( 323 MOUNT_TRACE_CLIENT, 324 MOUNT_RED, 325 ("client:lock_mountpoint %s mounted %d count %d\n", 326 mountpoint, vn_ismntpt(vp), vp->v_count)); 327 pxfslib::throw_exception(_environment, EBUSY); 328 vn_vfsunlock(vp); 329 VN_RELE(vp); 330 return; 331 } 332 333 // 334 // XXX Note that we don't check that vp->v_type == VDIR. 335 // This should be checked for most file systems but not "namefs". 336 // Also, the "busy" check above isn't quite right for "namefs". 337 // 338 339 mntvp = vp; 340 mount_lock.unlock(); 341 } 342 343 // 344 // Unlock the mount point. 345 // 346 // Note: this routine can be called multiple times if the primary 347 // fails over and the new primary retries this operation so 348 // this operation must be idempotent. 349 // 350 void 351 mount_client_impl::unlock_mountpoint(const char *, Environment &_environment) 352 { 353 // 354 // If the mount point vnode is not locked, this is a retry from a new 355 // primary. 356 // 357 mount_lock.lock(); 358 if (mntvp == NULL || _environment.is_orphan()) { 359 mount_lock.unlock(); 360 MOUNT_DBPRINTF( 361 MOUNT_TRACE_CLIENT, 362 MOUNT_AMBER, 363 ("client:unlock_mountpoint retry\n")); 364 return; 365 } 366 367 vnode_t *vp = mntvp; 368 mntvp = NULL; 369 mount_lock.unlock(); 370 371 vn_vfsunlock(vp); 372 VN_RELE(vp); 373 } 374 375 // 376 // This is called by the mount server to lock the mount 377 // point and prepare to unmount the file system. 378 // 379 // Note: this routine can be called multiple times if the primary 380 // fails over and the new primary retries this operation so 381 // this operation must be idempotent. 382 // 383 void 384 mount_client_impl::prepare_unmount(fs::filesystem_ptr fsptr, 385 solobj::cred_ptr credobj, Environment &_environment) 386 { 387 CL_PANIC(0); 388 } 389 390 // 391 // Forced unmount support version 392 // 393 // This is called by the mount server in preparation for the unmount. 394 // The method pxvfsp->purge_caches provides the needed preparation: 395 // (1)purge the dnlc; (2)release the cached root vnode; (3) empty the 396 // inactive vnode list; and (4)lock the filesystem (vfs). The unmount_vfsp 397 // is initialized - this is used when the mount server calls 398 // ::remove_notify after the underlying filesystem has been unmounted. 399 // Note: unmounting of filesystems is done serially. 400 // 401 // Note: this routine can be called multiple times if the primary 402 // fails over and the new primary retries this operation so 403 // this operation must be idempotent. 404 // 405 void 406 mount_client_impl::prepare_unmount_1(fs::filesystem_ptr fsptr, int32_t flags, 407 solobj::cred_ptr credobj, bool skip_purge, Environment &_environment) 408 { 409 CL_PANIC(0); 410 } 411 412 // 413 // prepare_unmount_v1 414 // 415 // This is called by the mount server in preparation for the unmount. 416 // The method pxvfsp->purge_caches provides the needed preparation: 417 // (1)purge the dnlc; (2)release the cached root vnode; (3) empty the 418 // inactive vnode list; and (4)lock the filesystem (vfs). 419 // 420 // The unmount_vfsp is initialized - this is used when the mount server calls 421 // ::remove_notify after the underlying filesystem has been unmounted. 422 // 423 // Unmounting of filesystems is done serially. 424 // 425 // This supports forced unmount. 426 // 427 // Note: this routine can be called multiple times if the primary 428 // fails over and the new primary retries this operation so 429 // this operation must be idempotent. 430 // 431 void 432 mount_client_impl::prepare_unmount_v1(pxfs_v1::filesystem_ptr fsptr, 433 int32_t flags, solobj::cred_ptr credobj, bool skip_purge, 434 Environment &_environment) 435 { 436 unmount_lock.lock(); 437 if (unmount_pxvfs_v1_p != NULL || _environment.is_orphan()) { 438 unmount_lock.unlock(); 439 // 440 // A second call to prepare_unmount has been made without 441 // calling unmount_failed() or remove_notify(). 442 // If the vfs's match, this call is a retry from a new primary. 443 // If the vfs's don't match, the global unmount code has 444 // allowed two unmounts to happen at the same time which 445 // shouldn't happen. 446 // 447 MOUNT_DBPRINTF( 448 MOUNT_TRACE_CLIENT, 449 MOUNT_AMBER, 450 ("client:prepare_unmount_v1 retry\n")); 451 return; 452 } 453 454 // 455 // Obtain the local vfs struct acting as a proxy for fs. 456 // Note that we can't be sure to find the pxvfs if the mount point 457 // is locked by a mount in progress, mount_server_impl::mount has 458 // returned but pxvfs::mount() hasn't entered the pxvfs yet. 459 // 460 pxvfs *pxvfsp = pxvfs::find_pxvfs(fsptr, NULL); 461 if (pxvfsp == NULL) { 462 unmount_lock.unlock(); 463 MOUNT_DBPRINTF( 464 MOUNT_TRACE_CLIENT, 465 MOUNT_RED, 466 ("client:prepare_unmount_v1 couldn't find fs\n")); 467 pxfslib::throw_exception(_environment, EINVAL); 468 return; 469 } 470 vfs_t *vfsp = pxvfsp->get_vfsp(); 471 ASSERT(vfsp != NULL); 472 473 if (skip_purge) { 474 unmount_pxvfs_v1_p = pxvfsp; 475 unmount_lock.unlock(); 476 477 // Release the hold from find_pxvfs(). 478 VFS_RELE(vfsp); 479 return; 480 } 481 482 // 483 // The vfs_t keeps the hold on the vnode, we are just using the 484 // pointer and thus shouldn't release it. 485 // 486 vnode_t *vp = vfsp->vfs_vnodecovered; 487 488 if (vn_vfswlock(vp) != 0) { 489 unmount_lock.unlock(); 490 MOUNT_DBPRINTF( 491 MOUNT_TRACE_CLIENT, 492 MOUNT_RED, 493 ("client:prepare_unmount_v1 vn_vfswlock pxvfs %p\n", 494 pxvfsp)); 495 // Release the hold we got from find_pxvfs(). 496 VFS_RELE(vfsp); 497 pxfslib::throw_exception(_environment, EBUSY); 498 return; 499 } 500 501 cred_t *credp = solobj_impl::conv(credobj); 502 if (pxvfsp->purge_caches(flags & MS_FORCE ? true : false, credp)) { 503 unmount_lock.unlock(); 504 MOUNT_DBPRINTF( 505 MOUNT_TRACE_CLIENT, 506 MOUNT_RED, 507 ("client:prepare_unmount_v1 busy\n")); 508 vn_vfsunlock(vp); 509 510 // Release the hold we got from find_pxvfs(). 511 VFS_RELE(vfsp); 512 pxfslib::throw_exception(_environment, EBUSY); 513 return; 514 } 515 516 // Release the hold from find_pxvfs(). 517 VFS_RELE(vfsp); 518 unmount_pxvfs_v1_p = pxvfsp; 519 unmount_lock.unlock(); 520 } 521 522 // 523 // Undo the locking done by prepare_unmount(). 524 // 525 // Note: this routine can be called multiple times if the primary 526 // fails over and the new primary retries this operation so 527 // this operation must be idempotent. 528 // 529 // This method supports only mount versions before 1.1 530 // 531 void 532 mount_client_impl::unmount_failed(Environment &_environment) 533 { 534 // 535 // If unmount is not in progress, this is a retry from a new primary. 536 // 537 unmount_lock.lock(); 538 if ((unmount_pxvfs_v1_p == NULL) || _environment.is_orphan()) { 539 unmount_lock.unlock(); 540 MOUNT_DBPRINTF( 541 MOUNT_TRACE_CLIENT, 542 MOUNT_AMBER, 543 ("client:unmount_failed retry\n")); 544 return; 545 } 546 547 } 548 549 // 550 // Forced unmount support version 551 // 552 // Undo the locking done by prepare_unmount(). 553 // 554 // Note: this routine can be called multiple times if the primary 555 // fails over and the new primary retries this operation so 556 // this operation must be idempotent. 557 // 558 void 559 mount_client_impl::unmount_failed_1(bool skip, Environment &_environment) 560 { 561 // 562 // If unmount is not in progress, this is a retry from a new primary. 563 // 564 unmount_lock.lock(); 565 if ((unmount_pxvfs_v1_p == NULL) || _environment.is_orphan()) { 566 unmount_lock.unlock(); 567 MOUNT_DBPRINTF( 568 MOUNT_TRACE_CLIENT, 569 MOUNT_AMBER, 570 ("client:unmount_failed_1 retry\n")); 571 return; 572 } 573 574 unmount_failed_v1(skip); 575 } 576 577 // 578 // unmount_failed_v1 579 // 580 // Supports forced unmount. 581 // 582 // Undo the locking done by prepare_unmount(). 583 // 584 // Note: this routine can be called multiple times if the primary 585 // fails over and the new primary retries this operation so 586 // this operation must be idempotent. 587 // 588 void 589 mount_client_impl::unmount_failed_v1(bool skip) 590 { 591 // 592 // Note: if the mount server calls us and then fails before sending 593 // a checkpoint to its secondary, the new primary could call us 594 // "at the same time". In order to avoid using a mutex here, 595 // we set unmount_pxvfsp now to make the race window small. 596 // 597 pxvfs *pxvfsp = unmount_pxvfs_v1_p; 598 unmount_pxvfs_v1_p = NULL; 599 unmount_lock.unlock(); 600 601 if (!skip) { 602 vfs_t *vfsp = pxvfsp->get_vfsp(); 603 vfs_unlock(vfsp); 604 vn_vfsunlock(vfsp->vfs_vnodecovered); 605 606 pxvfsp->unmount_failed(); 607 } 608 } 609 610 // 611 // Instantiate a non-HA file system on this node. 612 // The mount point should be locked before calling this. 613 // 614 // Note: this routine can be called multiple times if the primary 615 // fails over and the new primary retries this operation so 616 // this operation must be idempotent. 617 // XXX This code will need to change when new file system types 618 // are supported by PXFS. 619 // 620 void 621 mount_client_impl::instantiate(const sol::mounta &ma, sol::uintptr_t mvp, 622 solobj::cred_ptr credobj, fs::filesystem_out fs_obj, fs::fs_info &fsinfo, 623 CORBA::String_out mntoptions, Environment &_environment) 624 { 625 CL_PANIC(0); 626 } 627 628 // 629 // Instantiate a non-HA file system on this node. 630 // The mount point should be locked before calling this. 631 // 632 // Note: this routine can be called multiple times if the primary 633 // fails over and the new primary retries this operation so 634 // this operation must be idempotent. 635 // XXX This code will need to change when new file system types 636 // are supported by PXFS. 637 // 638 void 639 mount_client_impl::instantiate_v1(const sol::mounta &ma, sol::uintptr_t mvp, 640 solobj::cred_ptr credobj, pxfs_v1::filesystem_out fs_obj, 641 pxfs_v1::fs_info &fsinfo, 642 CORBA::String_out mntoptions, Environment &_environment) 643 { 644 ASSERT(ma.flags & MS_SYSSPACE); 645 646 // 647 // If we are on the node doing the mount system call, 'mvp' 648 // will be the address of the locked mount point vnode. 649 // Otherwise, 'mntvp' should be non-NULL from lock_mountpoint(). 650 // 651 vnode_t *vp; 652 if (mvp != NULL) { 653 vp = (vnode_t *)mvp; 654 } else { 655 vp = mntvp; 656 ASSERT(vp != NULL); 657 } 658 659 int datalen; 660 661 #ifndef VXFS_DISABLED 662 if (strcmp(ma.fstype, "vxfs") == 0) { 663 datalen = 664 vxfs_dependent_impl::vxfs_fixup_args(ma, 665 vxfs_dependent_impl::VX_MOUNT); 666 if (datalen == -1) { 667 pxfslib::throw_exception(_environment, ENOENT); 668 return; 669 } 670 } else { 671 datalen = (int)ma.data.length(); 672 } 673 #else 674 datalen = (int)ma.data.length(); 675 #endif 676 677 // 678 // We set MS_NOSPLICE so that the underlying file system isn't 679 // linked into the file system name space. 680 // 681 // We turn off MS_GLOBAL, as we are mounting the underlying filesystem 682 // locally. With Solaris 9 build 58, Solaris disables mount in progress 683 // checks if MS_GLOBAL is specified. We have to make sure that 684 // MS_GLOBAL is turned off here, as we want these checks to be made. 685 // These checks make sure that if a global mount and a local mount 686 // happen concurrently, and are trying to mount the same device, 687 // on different mount-points, only one of them succeeds. 688 // 689 char *options; 690 struct mounta mnta; 691 mnta.spec = ((sol::mounta &)ma).spec; 692 mnta.dir = ((sol::mounta &)ma).dir; 693 mnta.flags = ma.flags | MS_NOSPLICE; 694 mnta.flags &= ~MS_GLOBAL; 695 mnta.fstype = ((sol::mounta &)ma).fstype; 696 mnta.dataptr = (char *)ma.data.buffer(); 697 mnta.datalen = datalen; 698 int len; 699 if (mnta.flags & MS_OPTIONSTR) { 700 len = (int)ma.options.length(); 701 options = new char[(size_t)len]; //lint !e571 702 mnta.optptr = os::strcpy(options, 703 (const char *)ma.options.buffer()); 704 mnta.optlen = len; 705 } else { 706 len = MAX_MNTOPT_STR; 707 options = new char [(size_t)len]; //lint !e571 708 mnta.optptr = NULL; 709 mnta.optlen = 0; 710 } 711 712 // 713 // XXX Need a way to detect if this is a retry. 714 // We could lookup ma.spec and use vfs_devsearch() 715 // except this won't work for some file system types. 716 // It also doesn't work if the device really is busy 717 // (i.e., already mounted somewhere else) and this call 718 // is not a retry. 719 // 720 721 // 722 // Call the wrapped filesystem's mount routine, producing 723 // a vfs structure. 724 // 725 cred_t *credp = solobj_impl::conv(credobj); 726 vfs *vfsp = NULL; 727 int error = domount(NULL, &mnta, vp, credp, &vfsp); 728 729 if (error == 0 && (mnta.flags & MS_OPTIONSTR) == 0) { 730 error = vfs_buildoptionstr(&vfsp->vfs_mntopts, options, len); 731 } 732 if (error) { 733 delete [] options; 734 pxfslib::throw_exception(_environment, error); 735 MOUNT_DBPRINTF( 736 MOUNT_TRACE_CLIENT, 737 MOUNT_RED, 738 ("client:instantiate_v1 %s error %d\n", 739 (const char *)ma.dir, error)); 740 return; 741 } 742 743 ASSERT(vfsp != NULL); 744 745 // 746 // Create a filesystem object. 747 // 748 fs_norm_impl *fsp = 749 new fs_norm_impl(vfsp, mnta.fstype, mnta.spec, options); 750 fs_obj = fsp->get_objref(); 751 752 // Fill in the fs_info structure. 753 (void) os::strcpy(fsinfo.fstype, vfssw[vfsp->vfs_fstype].vsw_name); 754 fsinfo.fsbsize = vfsp->vfs_bsize; 755 fsinfo.fsdev = vfsp->vfs_dev; 756 fsinfo.fsflag = vfsp->vfs_flag; 757 // XXX: should be... fsinfo.fsid = vfs->vfs_fsid; 758 fsinfo.fsid.val[0] = vfsp->vfs_fsid.val[0]; 759 fsinfo.fsid.val[1] = vfsp->vfs_fsid.val[1]; 760 761 mntoptions = options; 762 } 763 764 // 765 // Instantiate a HA file system on this node. 766 // This method is called by mount_server_impl::notify_change(). The 767 // purpose of this code is similar to instantiate_ha() except that 768 // a filesystem pointer is passed instead of a vnode. Here we 769 // extract the vnode from the filesystem pointer. 770 // 771 // Note: this routine can be called multiple times if the primary 772 // fails over and the new primary retries this operation so 773 // this operation must be idempotent. 774 // XXX This code will need to change when new file system types 775 // are supported by PXFS. 776 // 777 void 778 mount_client_impl::reinstantiate_ha(const sol::mounta &ma, 779 fs::filesystem_ptr fsptr, solobj::cred_ptr credobj, 780 const char *dev_name, Environment &_environment) 781 { 782 CL_PANIC(0); 783 } 784 785 // 786 // Instantiate a HA file system on this node. 787 // This method is called by mount_server_impl::notify_change(). The 788 // purpose of this code is similar to instantiate_ha() except that 789 // a filesystem pointer is passed instead of a vnode. Here we 790 // extract the vnode from the filesystem pointer. 791 // 792 // Note: this routine can be called multiple times if the primary 793 // fails over and the new primary retries this operation so 794 // this operation must be idempotent. 795 // XXX This code will need to change when new file system types 796 // are supported by PXFS. 797 // 798 void 799 mount_client_impl::reinstantiate_ha_v1(const sol::mounta &ma, 800 pxfs_v1::filesystem_ptr fsptr, solobj::cred_ptr credobj, 801 const char *dev_name, Environment &_environment) 802 { 803 pxvfs *pxvfsp = pxvfs::find_pxvfs(fsptr, NULL); 804 // XXX Same bug possible as prepare_unmount(). 805 ASSERT(pxvfsp != NULL); 806 807 vfs_t *vfsp = pxvfsp->get_vfsp(); 808 ASSERT(vfsp != NULL); 809 810 vnode_t *vp = vfsp->vfs_vnodecovered; 811 ASSERT(vp != NULL); 812 813 VFS_RELE(pxvfsp->get_vfsp()); 814 815 instantiate_ha_common(ma, vp, credobj, dev_name, 816 VERSION_1, _environment); 817 } 818 819 // 820 // Instantiate a HA file system on this node. 821 // The mount point should be locked before calling this. 822 // 823 // Note: this routine can be called multiple times if the primary 824 // fails over and the new primary retries this operation so 825 // this operation must be idempotent. 826 // XXX This code will need to change when new file system types 827 // are supported by PXFS. 828 // 829 void 830 mount_client_impl::instantiate_ha(const sol::mounta &ma, sol::uintptr_t mvp, 831 solobj::cred_ptr credobj, const char *dev_name, Environment &_environment) 832 { 833 // 834 // If we are on the node doing the mount system call, 'mvp' 835 // will be the address of the locked mount point vnode. 836 // Otherwise, 'mntvp' should be non-NULL from lock_mountpoint(). 837 // 838 vnode_t *vp; 839 if (mvp != NULL) { 840 vp = (vnode_t *)mvp; 841 } else { 842 vp = mntvp; 843 ASSERT(vp != NULL); 844 } 845 846 instantiate_ha_common(ma, vp, credobj, dev_name, 847 VERSION_0, _environment); 848 } 849 850 // 851 // Instantiate a HA file system on this node. 852 // The mount point should be locked before calling this. 853 // 854 // Note: this routine can be called multiple times if the primary 855 // fails over and the new primary retries this operation so 856 // this operation must be idempotent. 857 // XXX This code will need to change when new file system types 858 // are supported by PXFS. 859 // 860 void 861 mount_client_impl::instantiate_ha_v1(const sol::mounta &ma, sol::uintptr_t mvp, 862 solobj::cred_ptr credobj, const char *dev_name, Environment &_environment) 863 { 864 // 865 // If we are on the node doing the mount system call, 'mvp' 866 // will be the address of the locked mount point vnode. 867 // Otherwise, 'mntvp' should be non-NULL from lock_mountpoint(). 868 // 869 vnode_t *vp; 870 if (mvp != NULL) { 871 vp = (vnode_t *)mvp; 872 } else { 873 vp = mntvp; 874 ASSERT(vp != NULL); 875 } 876 877 instantiate_ha_common(ma, vp, credobj, dev_name, 878 VERSION_1, _environment); 879 } 880 881 // 882 // Common code for both instantiate_ha() and reinstantiate_ha(). 883 // 884 // Create a file system replica and register with the 885 // replica manager. Also register with the cluster version manager for 886 // callbacks in support of rolling upgrade. 887 // 888 void 889 mount_client_impl::instantiate_ha_common(const sol::mounta &ma, vnode_t *vp, 890 solobj::cred_ptr credobj, const char *dev_name, mount_ver_t mount_ver, 891 Environment &) 892 { 893 // Nested invocations need their own Environment 894 Environment e; 895 896 char id[20]; 897 os::sprintf(id, "%u", orb_conf::node_number()); 898 899 // Create a file system replica for this node 900 repl_pxfs_server *repl_srvr_v1 = NULL; 901 ASSERT(mount_ver == VERSION_1); 902 repl_srvr_v1 = new repl_pxfs_server(vp, ma, 903 solobj_impl::conv(credobj), id); 904 905 // 906 // Have the filesystem replica register with the Version Manager 907 // for upgrade callbacks. 908 // 909 repl_srvr_v1->upgrade_callback_register(ma); 910 911 replica::service_dependencies serv_deps(1, 1); 912 serv_deps[0] = dev_name; 913 replica::prov_dependencies prov_deps(1, 1); 914 prov_deps[0].service = dev_name; 915 // 916 // Note that we assume the "provider ID" is the same for both 917 // the file system and the device service. 918 // Also note that the (const char *) is needed so a copy of 919 // the string is made instead of just a pointer assignment. 920 // 921 prov_deps[0].repl_prov_desc = (const char *)id; 922 923 repl_srvr_v1->register_with_rm(1, &serv_deps, &prov_deps, true, e); 924 if (e.exception()) { 925 // 926 // Clean up callback registration 927 // 928 repl_srvr_v1->upgrade_callback_unregister(); 929 930 // 931 // Check for service or provider already registered. 932 // If it is, this is either a retry after a failover 933 // or an attempt to mount a mounted file system. 934 // 935 if (!(replica::service_already_exists::_exnarrow( 936 e.exception()) != NULL || 937 replica::repl_prov_already_exists::_exnarrow( 938 e.exception()) != NULL)) { 939 MOUNT_DBPRINTF( 940 MOUNT_TRACE_CLIENT, 941 MOUNT_RED, 942 ("client:instantiate_ha - Error FS replica %s\n", 943 dev_name)); 944 } 945 e.clear(); 946 } 947 } 948 949 // 950 // Respond to notification from the VFS list server that there's a new file 951 // system to be added to this node's list. 952 // The moint point should have previously been locked with lock_mountpoint(). 953 // 954 // The basic work to be done is to construct a proxy for the filesystem object 955 // given as argument, initialize it, set its flags field, and cross-link it 956 // with a local mountpoint vnode. 957 // 958 // Note: this routine can be called multiple times if the primary 959 // fails over and the new primary retries this operation so 960 // this operation must be idempotent. 961 // 962 void 963 mount_client_impl::add_notify_locked(const sol::mounta &ma, 964 const char *mntoptions, fs::filesystem_ptr fsptr, const fs::fs_info &fsinfo, 965 Environment &_environment) 966 { 967 CL_PANIC(0); 968 } 969 970 // 971 // Respond to notification from the VFS list server that there's a new file 972 // system to be added to this node's list. 973 // The moint point should have previously been locked with lock_mountpoint(). 974 // 975 // The basic work to be done is to construct a proxy for the filesystem object 976 // given as argument, initialize it, set its flags field, and cross-link it 977 // with a local mountpoint vnode. 978 // 979 // Note: this routine can be called multiple times if the primary 980 // fails over and the new primary retries this operation so 981 // this operation must be idempotent. 982 // 983 void 984 mount_client_impl::add_notify_locked_v1(const sol::mounta &ma, 985 const char *mntoptions, 986 pxfs_v1::filesystem_ptr fsptr, const pxfs_v1::fs_info &fsinfo, 987 Environment &_environment) 988 { 989 // 990 // If the mount point vnode is not set, this is a retry from a new 991 // primary. 992 // 993 mount_lock.lock(); 994 if (mntvp == NULL || _environment.is_orphan()) { 995 mount_lock.unlock(); 996 MOUNT_DBPRINTF( 997 MOUNT_TRACE_CLIENT, 998 MOUNT_AMBER, 999 ("client:add_notify_locked_v1 %s retry\n", 1000 (const char *)ma.dir)); 1001 return; 1002 } 1003 1004 vnode_t *vp = mntvp; 1005 mntvp = NULL; 1006 mount_lock.unlock(); 1007 1008 ASSERT(!vn_ismntpt(vp)); 1009 1010 // 1011 // Now that we've obtained the mount point vnode, 1012 // the rest of the work is common. 1013 // Note that we transfer our hold on mntvp to the vfs_t. 1014 // 1015 add_notify_common_v1(ma, mntoptions, fsptr, fsinfo, vp); 1016 } 1017 1018 // 1019 // Respond to notification from the VFS list server that there's a new file 1020 // system to be added to this node's list. 1021 // 1022 // The basic work to be done is to lock the mount point, construct a proxy 1023 // for the filesystem object, initialize it, set its flags field, 1024 // and link it into the file system name space. 1025 // 1026 // Note: this routine can be called multiple times if the primary 1027 // fails over and the new primary retries this operation. The required 1028 // idempotence is problematic because of a retry racing with an initial 1029 // attempt. This is resolved by serializing calls using the add_notify_lock 1030 // and checking for an already existing mount. 1031 // 1032 void 1033 mount_client_impl::add_notify(const sol::mounta &ma, const char *mntoptions, 1034 bool is_ha_repl, const char *dev_name, fs::filesystem_ptr fsptr, 1035 const fs::fs_info &fsinfo, Environment &_environment) 1036 { 1037 CL_PANIC(0); 1038 } 1039 1040 // 1041 // Respond to notification from the VFS list server that there's a new file 1042 // system to be added to this node's list. 1043 // 1044 // The basic work to be done is to lock the mount point, construct a proxy 1045 // for the filesystem object, initialize it, set its flags field, 1046 // and link it into the file system name space. 1047 // 1048 // Note: this routine can be called multiple times if the primary 1049 // fails over and the new primary retries this operation so 1050 // this operation must be idempotent. 1051 // 1052 void 1053 mount_client_impl::add_notify_v1(const sol::mounta &ma, const char *mntoptions, 1054 bool is_ha_repl, const char *dev_name, pxfs_v1::filesystem_ptr fsptr, 1055 const pxfs_v1::fs_info &fsinfo, Environment &_environment) 1056 { 1057 ASSERT(ma.flags & MS_SYSSPACE); 1058 1059 vnode_t *vp; 1060 int error = lookupname(((sol::mounta &)ma).dir, 1061 UIO_SYSSPACE, FOLLOW, NULL, &vp); 1062 if (error != 0) { 1063 MOUNT_DBPRINTF( 1064 MOUNT_TRACE_CLIENT, 1065 MOUNT_RED, 1066 ("client:add_notify_v1 lookup %s error %d\n", 1067 (const char *)ma.dir, error)); 1068 os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, NULL, NULL); 1069 (void) msg.log(SC_SYSLOG_WARNING, MESSAGE, 1070 "Could not mount '%s' because there was an error (%d) in " 1071 "opening the directory.", (const char *)ma.dir, 1072 error); 1073 pxfslib::throw_exception(_environment, error); 1074 return; 1075 } 1076 1077 // 1078 // If the mount point is a PXFS vnode for the file system we 1079 // are trying to add, then this is a retry after a failover. 1080 // XXX This condition might not be met for a while, the race 1081 // mentioned in lock_mountpoint() for mntvp could apply here. 1082 // 1083 vfs_t *vfsp = vp->v_vfsp; 1084 if ((vfsp->vfs_flag & VFS_PXFS) && 1085 fsptr->_equiv(pxvfs::VFSTOPXFS(vfsp)->get_fsobj())) { 1086 MOUNT_DBPRINTF( 1087 MOUNT_TRACE_CLIENT, 1088 MOUNT_GREEN, 1089 ("client:add_notify_v1 %s retry\n", 1090 (const char *)ma.dir)); 1091 VN_RELE(vp); 1092 return; 1093 } 1094 1095 if (vn_vfswlock(vp) != 0) { 1096 MOUNT_DBPRINTF( 1097 MOUNT_TRACE_CLIENT, 1098 MOUNT_RED, 1099 ("client:add_notify_v1 %s vn_vfswlock\n", 1100 (const char *)ma.dir)); 1101 pxfslib::throw_exception(_environment, EBUSY); 1102 VN_RELE(vp); 1103 return; 1104 } 1105 1106 if (vp->v_flag & VNOMOUNT) { 1107 MOUNT_DBPRINTF( 1108 MOUNT_TRACE_CLIENT, 1109 MOUNT_RED, 1110 ("client:add_notify_v1 %s NVOMOUNT\n", 1111 (const char *)ma.dir)); 1112 pxfslib::throw_exception(_environment, EINVAL); 1113 vn_vfsunlock(vp); 1114 VN_RELE(vp); 1115 return; 1116 } 1117 1118 // Make sure we are the only holder of the mount point. 1119 dnlc_purge_vp(vp); 1120 1121 if (vn_ismntpt(vp) || 1122 (ma.flags & (MS_REMOUNT | MS_OVERLAY)) == 0 && 1123 (vp->v_count != 1 || (vp->v_flag & VROOT) != 0)) { 1124 MOUNT_DBPRINTF( 1125 MOUNT_TRACE_CLIENT, 1126 MOUNT_RED, 1127 ("client:add_notify_v1 %s mounted %d count %d\n", 1128 (const char *)ma.dir, 1129 vn_ismntpt(vp), vp->v_count)); 1130 pxfslib::throw_exception(_environment, EBUSY); 1131 vn_vfsunlock(vp); 1132 VN_RELE(vp); 1133 return; 1134 } 1135 1136 // 1137 // XXX Note that we don't check that vp->v_type == VDIR. 1138 // This should be checked for most file systems but not "namefs". 1139 // Also, the "busy" check above isn't quite right for "namefs". 1140 // 1141 1142 // 1143 // Check to see if we need to start a file system replica. 1144 // 1145 if (is_ha_repl) { 1146 // 1147 // Create the file system service and register with the 1148 // replica manager. 1149 // XXX kcred. 1150 // 1151 char id[20]; 1152 os::sprintf(id, "%u", orb_conf::node_number()); 1153 repl_pxfs_server *repl_srvr = 1154 new repl_pxfs_server(vp, ma, kcred, id); 1155 // 1156 // Have the filesystem replica register with the Version Manager 1157 // for upgrade callbacks. 1158 // 1159 repl_srvr->upgrade_callback_register(ma); 1160 1161 replica::service_dependencies serv_deps(1, 1); 1162 serv_deps[0] = dev_name; 1163 replica::prov_dependencies prov_deps(1, 1); 1164 prov_deps[0].service = dev_name; 1165 // 1166 // Note that we assume the "provider ID" is the same for both 1167 // the file system and the device service. 1168 // Also note that the (const char *) is needed so a copy of 1169 // the string is made instead of just a pointer assignment. 1170 // 1171 prov_deps[0].repl_prov_desc = (const char *)id; 1172 1173 // Nest invocation requires own Environment 1174 Environment e; 1175 1176 #ifdef _FAULT_INJECTION 1177 // 1178 // When this fault is triggered, repl_srvr->register_with_rm is 1179 // called with a bad paramter. The result of this is: 1180 // UserException: replica::invalid_dependency. 1181 // 1182 if (fault_triggered(FAULTNUM_PXFS_ADD_NOTIFY, NULL, NULL)) { 1183 repl_srvr->register_with_rm(1, NULL, &prov_deps, true, e); 1184 } else 1185 #endif 1186 repl_srvr->register_with_rm(1, &serv_deps, &prov_deps, true, e); 1187 if (e.exception()) { 1188 // 1189 // Clean up callback registration 1190 // 1191 repl_srvr->upgrade_callback_unregister(); 1192 1193 // 1194 // Rather than impact node startup by throwing an 1195 // exception and returning, we just continue on 1196 // with mounting the filesystem. Just because 1197 // a server replica for this filesystem didn't 1198 // start here is no reason to interfere with the 1199 // global mount. 1200 // 1201 #ifdef DEBUG 1202 e.exception()->print_exception( 1203 "failed to register:"); 1204 #endif 1205 e.clear(); 1206 1207 MOUNT_DBPRINTF( 1208 MOUNT_TRACE_CLIENT, 1209 MOUNT_RED, 1210 ("client:add_notify_v1 failed RM reg" 1211 " %s mount point %s nodeid %s\n", 1212 (const char *)ma.spec, (const char *)ma.dir, id)); 1213 1214 os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, 1215 NULL, NULL); 1216 // 1217 // SCMSGS 1218 // @explanation 1219 // Filesystem availability may be lessened due to 1220 // reduced component redundancy. 1221 // @user_action 1222 // Check the device. 1223 // 1224 (void) msg.log(SC_SYSLOG_WARNING, MESSAGE, 1225 "mount_client_impl::add_notify() " 1226 "failed to start filesystem replica for " 1227 "%s at mount point %s nodeid %s", 1228 (const char *)ma.spec, (const char *)ma.dir, id); 1229 } 1230 } 1231 1232 // 1233 // Now that we have the locked mount point vnode, 1234 // the rest of the work is common. 1235 // Note that we transfer our hold on vp to the vfs_t. 1236 // 1237 add_notify_common_v1(ma, mntoptions, fsptr, fsinfo, vp); 1238 } 1239 1240 // 1241 // Respond to a request from the VFS list server to remove a file system from 1242 // this node's list. 1243 // Note: we assume that the caller has globally locked the relevant mountpoint. 1244 // 1245 // Note: this routine can be called multiple times if the primary 1246 // fails over and the new primary retries this operation so 1247 // this operation must be idempotent. 1248 // 1249 void 1250 mount_client_impl::remove_notify(const char *mountpoint, 1251 const char *, bool, Environment &_environment) 1252 { 1253 CL_PANIC(0); 1254 } 1255 1256 // 1257 // Respond to a request from the VFS list server to remove a file system from 1258 // this node's list. 1259 // Note: we assume that the caller has globally locked the relevant mountpoint. 1260 // 1261 // Note: this routine can be called multiple times if the primary 1262 // fails over and the new primary retries this operation so 1263 // this operation must be idempotent. 1264 // 1265 // This supports forced unmount. 1266 // 1267 void 1268 mount_client_impl::remove_notify_1(const char *mountpoint, bool unlink_vfs, 1269 Environment &_environment) 1270 { 1271 unmount_lock.lock(); 1272 if ((unmount_pxvfs_v1_p == NULL) || _environment.is_orphan()) { 1273 unmount_lock.unlock(); 1274 // 1275 // This should be a retry after the mount server failed 1276 // over to another node. 1277 // 1278 return; 1279 } 1280 1281 remove_notify_v1(mountpoint, unlink_vfs); 1282 } 1283 1284 // 1285 // remove_notify_v1 1286 // 1287 // Respond to a request from the VFS list server to remove a file system from 1288 // this node's list. 1289 // 1290 // Note: we assume that the caller has globally locked the relevant mountpoint. 1291 // 1292 // Note: this routine can be called multiple times if the primary 1293 // fails over and the new primary retries this operation so 1294 // this operation must be idempotent. 1295 // 1296 void 1297 mount_client_impl::remove_notify_v1(const char *mountpoint, bool unlink_vfs) 1298 { 1299 // 1300 // Note: if the mount server calls us and then fails before sending 1301 // a checkpoint to its secondary, the new primary could call us 1302 // "at the same time". In order to avoid using a mutex here, 1303 // we set unmount_pxvfsp now to make the race window small. 1304 // 1305 pxvfs *pxvfsp = unmount_pxvfs_v1_p; 1306 unmount_pxvfs_v1_p = NULL; 1307 unmount_lock.unlock(); 1308 1309 vfs_t *vfsp = pxvfsp->get_vfsp(); 1310 ASSERT(vfsp != NULL); 1311 1312 MOUNT_DBPRINTF( 1313 MOUNT_TRACE_CLIENT, 1314 MOUNT_AMBER, 1315 ("client:remove_notify_v1 %s pxvfsp %p\n", 1316 mountpoint, pxvfsp)); 1317 1318 // 1319 // Release the unmount lock and wake up any sleepers waiting to create 1320 // pxfobjs. 1321 // 1322 pxvfsp->unmount_succeeded(); 1323 1324 // 1325 // Remove vfsp from the vfs list and release it. 1326 // 1327 if (unlink_vfs) { 1328 vnode_t *coveredvp = vfsp->vfs_vnodecovered; 1329 ASSERT(coveredvp != NULL); 1330 VN_HOLD(coveredvp); 1331 vfs_remove(vfsp); 1332 vn_vfsunlock(coveredvp); 1333 VN_RELE(coveredvp); 1334 } 1335 } 1336 1337 // 1338 // Remove a file system from the name space. 1339 // This should be called if this node is not the server for the file 1340 // system being removed or it is not the last replica (is_ha_repl is true). 1341 // 1342 // Note: this routine can be called multiple times if the primary 1343 // fails over and the new primary retries this operation so 1344 // this operation must be idempotent. 1345 // 1346 void 1347 mount_client_impl::remove_client(const char *, const char *spec, 1348 bool is_ha_repl, const char *, fs::filesystem_ptr fsptr, 1349 solobj::cred_ptr credobj, Environment &_environment) 1350 { 1351 CL_PANIC(0); 1352 } 1353 1354 // 1355 // Remove a file system from the name space. 1356 // This should be called if this node is not the server for the file 1357 // system being removed or it is not the last replica (is_ha_repl is true). 1358 // 1359 // Note: this routine can be called multiple times if the primary 1360 // fails over and the new primary retries this operation so 1361 // this operation must be idempotent. 1362 // 1363 void 1364 mount_client_impl::remove_client_v1(const char *, const char *spec, 1365 bool is_ha_repl, const char *, pxfs_v1::filesystem_ptr fsptr, 1366 solobj::cred_ptr credobj, Environment &_environment) 1367 { 1368 // 1369 // Obtain the proxy vfs struct for fs. 1370 // 1371 pxvfs *pxvfsp = 1372 pxvfs::find_pxvfs(fsptr, NULL); 1373 if (pxvfsp == NULL) { 1374 // Must be a retry or was unmounted before we got here. 1375 return; 1376 } 1377 1378 vfs_t *vfsp = pxvfsp->get_vfsp(); 1379 ASSERT(vfsp != NULL); 1380 1381 if (vn_vfswlock(vfsp->vfs_vnodecovered)) { 1382 // Release the hold we got from find_pxvfs(). 1383 VFS_RELE(vfsp); 1384 1385 // Couldn't get the covered mount point lock. 1386 pxfslib::throw_exception(_environment, EBUSY); 1387 return; 1388 } 1389 1390 cred_t *credp = solobj_impl::conv(credobj); 1391 if (pxvfsp->purge_caches(true, credp)) { 1392 vn_vfsunlock(vfsp->vfs_vnodecovered); 1393 1394 // Release the hold we got from find_pxvfs(). 1395 VFS_RELE(vfsp); 1396 1397 pxfslib::throw_exception(_environment, EBUSY); 1398 return; 1399 } 1400 1401 // 1402 // Release the fsmgr_client/pxvfs binding. 1403 // 1404 pxvfsp->unmount_succeeded(); 1405 1406 // 1407 // Remove vfsp from the vfs list and release it. 1408 // 1409 vnode_t *coveredvp = vfsp->vfs_vnodecovered; 1410 ASSERT(coveredvp != NULL); 1411 VN_HOLD(coveredvp); 1412 vfs_remove(vfsp); 1413 vn_vfsunlock(coveredvp); 1414 VN_RELE(coveredvp); 1415 1416 // Release the hold for unmount_pxvfsp we got from find_pxvfs(). 1417 VFS_RELE(vfsp); 1418 1419 char name[20]; 1420 1421 // 1422 // Shutdown replica if needed. 1423 // 1424 if (is_ha_repl) { 1425 replica::service_admin_var sa = 1426 pxfslib::get_service_admin_ref( 1427 "mount_client_impl::remove_client", spec, 1428 _environment); 1429 if (_environment.exception()) { 1430 // 1431 // Need to shut down this replica but 1432 // can't get the service_admin object to do it. 1433 // 1434 // The only reason get_service_admin_ref() 1435 // should fail is if service 1436 // registration failed/never occurred, 1437 // or if the RM is in the process 1438 // of shutting down the service. 1439 // 1440 #ifdef DEBUG 1441 _environment.exception()->print_exception( 1442 "mount_client_impl::remove_client " 1443 "get_service_admin_ref()"); // XXX 1444 #endif 1445 MOUNT_DBPRINTF( 1446 MOUNT_TRACE_CLIENT, 1447 MOUNT_AMBER, 1448 ("client:remove_client_v1 failed get_service_admin " 1449 "spec %s\n", 1450 spec)); 1451 _environment.clear(); 1452 } else { 1453 os::sprintf(name, "%d", orb_conf::node_number()); 1454 sa->change_repl_prov_status(name, 1455 replica::SC_REMOVE_REPL_PROV, true, _environment); 1456 if (_environment.exception()) { 1457 os::sc_syslog_msg msg( 1458 SC_SYSLOG_GLOBAL_MOUNT_TAG, 1459 NULL, NULL); 1460 // 1461 // SCMSGS 1462 // @explanation 1463 // The system was unable to remove a PXFS 1464 // replica on the node that this message was 1465 // seen. 1466 // @user_action 1467 // Contact your authorized Sun service 1468 // provider to determine whether a workaround 1469 // or patch is available. 1470 // 1471 (void) msg.log(SC_SYSLOG_WARNING, MESSAGE, 1472 "mount_client_impl::remove_client()" 1473 " failed attempted" 1474 " RM change_repl_prov_status() to" 1475 " remove client," 1476 " spec %s, name %s", 1477 spec, name); 1478 _environment.clear(); 1479 } 1480 } 1481 } 1482 } 1483 1484 // 1485 // Receive notification from server that proxy file system 1486 // flags need to be set; used to implement remounts. 1487 // It also unlocks the mount point and updates /etc/mnttab. 1488 // 1489 // Note: this routine can be called multiple times if the primary 1490 // fails over and the new primary retries this operation so 1491 // this operation must be idempotent. 1492 // 1493 void 1494 mount_client_impl::set_flags(const sol::mounta &, const char *mntoptions, 1495 fs::filesystem_ptr fsptr, uint32_t vfsflags, Environment &_environment) 1496 { 1497 CL_PANIC(0); 1498 } 1499 1500 // 1501 // Receive notification from server that proxy file system 1502 // flags need to be set; used to implement remounts. 1503 // It also unlocks the mount point and updates /etc/mnttab. 1504 // 1505 // Note: this routine can be called multiple times if the primary 1506 // fails over and the new primary retries this operation so 1507 // this operation must be idempotent. 1508 // 1509 void 1510 mount_client_impl::set_flags_v1(const sol::mounta &, const char *mntoptions, 1511 pxfs_v1::filesystem_ptr fsptr, uint32_t vfsflags, Environment &_environment) 1512 { 1513 // 1514 // If the mount point vnode is not set, this is a retry from a new 1515 // primary. 1516 // 1517 mount_lock.lock(); 1518 if (mntvp == NULL || _environment.is_orphan()) { 1519 mount_lock.unlock(); 1520 return; 1521 } 1522 1523 vnode_t *vp = mntvp; 1524 mntvp = NULL; 1525 mount_lock.unlock(); 1526 1527 pxvfs *pxvfsp = pxvfs::find_pxvfs(fsptr, NULL); 1528 // XXX Same bug possible as prepare_unmount(). 1529 ASSERT(pxvfsp != NULL); 1530 pxvfsp->set_mntoptions(mntoptions); 1531 vfs_t *vfsp = pxvfsp->get_vfsp(); 1532 1533 // Release the hold we got from find_pxvfs() 1534 VFS_RELE(vfsp); 1535 ASSERT(vfsp != NULL); 1536 1537 vfsp->vfs_flag = vfsflags; 1538 vfsp->vfs_mtime = ddi_get_time(); 1539 1540 // 1541 // Free the old mount options table and add a new one - then 1542 // fill the new table with the options. 1543 // Update the mnttab modification time. The function 1544 // vfs_mnttab_modtimeupd is declared static in vfs.c for 1545 // Solaris 8 and 9. For Solaris 10, it is global. 1546 // 1547 #ifdef GLOBAL_MNTTAB_MODTIME_INTERFACE 1548 vfs_list_lock(); 1549 vfs_createopttbl(&vfsp->vfs_mntopts, mntoptions); 1550 vfs_parsemntopts(&vfsp->vfs_mntopts, (char *)mntoptions, 1); 1551 vfs_mnttab_modtimeupd(); 1552 vfs_list_unlock(); 1553 #else 1554 vfs_createopttbl(&vfsp->vfs_mntopts, mntoptions); 1555 vfs_parsemntopts(&vfsp->vfs_mntopts, (char *)mntoptions, 1); 1556 gethrestime(&vfs_mnttab_mtime); 1557 #endif 1558 1559 // Unlock the mount point. 1560 vn_vfsunlock(vp); 1561 VN_RELE(vp); 1562 } 1563 1564 // 1565 // Register the mount client with the mount server. 1566 // This is called by _cladm(CL_INITIALIZE, CL_GBLMNT_ENABLE). 1567 // The return value is the errno value for the system call. 1568 // 1569 int 1570 mount_client_impl::activate() 1571 { 1572 // 1573 // Check to make sure clexecd is running. 1574 // 1575 char name[20]; 1576 Environment e; 1577 1578 repl_pxfs::ha_mounter_var mounter; 1579 naming::naming_context_var ctxp = ns::root_nameserver(); 1580 1581 os::sprintf(name, "ha_mounter.%d", orb_conf::node_number()); 1582 1583 CORBA::Object_var obj = ctxp->resolve(name, e); 1584 if (e.exception() == NULL) { 1585 mounter = repl_pxfs::ha_mounter::_narrow(obj); 1586 } 1587 if ((e.exception() != NULL) || CORBA::is_nil(mounter)) { 1588 if (e.exception() != NULL) { 1589 CLEXEC_EXCEPTION(e, "mount_client_impl::activate", 1590 "ha_mounter"); 1591 e.clear(); 1592 } else { 1593 CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_RED, 1594 ("mount_client_impl::activate:%s" 1595 " CORBA::is_nil(mounter)\n", name)); 1596 } 1597 MOUNT_DBPRINTF( 1598 MOUNT_TRACE_CLIENT, 1599 MOUNT_RED, 1600 ("client:activate: resolve %s failed\n", 1601 name)); 1602 return (EAGAIN); 1603 } 1604 1605 bool alive = mounter->is_alive(e); 1606 1607 CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_GREEN, 1608 ("mount_client_impl::activate:%s alive %d except %p\n", 1609 name, (int)alive, e.exception())); 1610 1611 if (e.exception() != NULL || !alive) { 1612 // 1613 // XXX There is currently a bug where low memory can return 1614 // EAGAIN from the xdoor upcall. We attempt recovery here 1615 // for now. 1616 // 1617 CORBA::SystemException *exp = 1618 CORBA::SystemException::_exnarrow(e.exception()); 1619 CLEXEC_EXCEPTION(e, "mount_client_impl::activate", "is_alive"); 1620 if (exp != NULL && exp->_minor() == EAGAIN) { 1621 e.clear(); 1622 alive = mounter->is_alive(e); 1623 CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_AMBER, 1624 ("mount_client_impl::activate:%s alive %d exp %p\n", 1625 name, (int)alive, e.exception())); 1626 if (e.exception() != NULL || !alive) { 1627 CLEXEC_EXCEPTION(e, 1628 "mount_client_impl::activate", "is_alive"); 1629 e.clear(); 1630 MOUNT_DBPRINTF( 1631 MOUNT_TRACE_CLIENT, 1632 MOUNT_RED, 1633 ("client:activate: 1: " 1634 "clexecd does not appear to be running\n")); 1635 return (EAGAIN); 1636 } 1637 } else { 1638 if (e.exception() != NULL) { 1639 #ifdef DEBUG 1640 // XXX 1641 e.exception()->print_exception( 1642 "mount_client_impl::activate: "); 1643 #endif 1644 MOUNT_DBPRINTF( 1645 MOUNT_TRACE_CLIENT, 1646 MOUNT_RED, 1647 ("client:activate " 1648 "failed mounter::is_alive\n")); 1649 } 1650 e.clear(); 1651 MOUNT_DBPRINTF( 1652 MOUNT_TRACE_CLIENT, 1653 MOUNT_RED, 1654 ("client:activate: " 1655 "clexecd does not appear to be running\n")); 1656 CLADM_DBPRINTF(CLADM_TRACE_CLEXEC, CLADM_RED, 1657 ("mount_client_impl::activate %s" 1658 " clexecd does not appear to be running\n", 1659 name)); 1660 return (EAGAIN); 1661 } 1662 } 1663 1664 // 1665 // Ideally, there should be checks here that verify that the 1666 // prerequisites for establishing the global name space have been 1667 // satisfied. However, this would amount to checking that the mount 1668 // point(s) into which the base global mount(s) would be done exist, 1669 // which is infeasible to do here. 1670 // 1671 1672 // 1673 // Ensure idempotence. 1674 // 1675 mount_client_lock.lock(); 1676 if (this_mount_client != NULL) { 1677 mount_client_lock.unlock(); 1678 return (0); 1679 } 1680 1681 // 1682 // Get the global mount server object. 1683 // 1684 replica::service_admin_var sa = 1685 pxfslib::get_service_admin_ref( 1686 "mount_client_impl::startup", "mount", e); 1687 if (e.exception()) { 1688 mount_client_lock.unlock(); 1689 e.clear(); 1690 return (EIO); 1691 } 1692 obj = sa->get_root_obj(e); 1693 if (e.exception()) { 1694 mount_client_lock.unlock(); 1695 e.clear(); 1696 return (EIO); 1697 } 1698 1699 mount_client_impl *mcp = new mount_client_impl(); 1700 mcp->server = fs::mount_server::_narrow(obj); 1701 ASSERT(!CORBA::is_nil(mcp->server)); 1702 1703 // 1704 // Register with mount_server. 1705 // 1706 fs::mount_client_var clientv = mcp->get_objref(); 1707 mcp->server->add_client(clientv, orb_conf::node_number(), 1708 mcp->keepalive, e); 1709 CORBA::Exception *ex = e.exception(); 1710 if (ex != NULL) { 1711 int error; 1712 1713 // 1714 // Note: mcp will be deleted when 1715 // mount_client_impl::_unreferenced() is called. 1716 // 1717 mount_client_lock.unlock(); 1718 1719 fs::mount_server::mount_err *merrp = 1720 fs::mount_server::mount_err::_exnarrow(ex); 1721 if (merrp != NULL) { 1722 error = merrp->error; 1723 MOUNT_DBPRINTF( 1724 MOUNT_TRACE_CLIENT, 1725 MOUNT_RED, 1726 ("client:activate add_client failed %s\n", 1727 (const char *)merrp->mntpnt)); 1728 } else { 1729 error = pxfslib::get_err(e); 1730 MOUNT_DBPRINTF( 1731 MOUNT_TRACE_CLIENT, 1732 MOUNT_RED, 1733 ("client:activate add_client failed\n")); 1734 e.exception()->print_exception( 1735 "mount_client_impl::activate"); // XXX 1736 } 1737 e.clear(); 1738 return (error); 1739 } 1740 1741 this_mount_client = mcp; 1742 mount_client_lock.unlock(); 1743 1744 return (0); 1745 } 1746 1747 1748 // 1749 // Return true if activate() has been called. 1750 // 1751 bool 1752 mount_client_impl::is_activated() 1753 { 1754 return (this_mount_client != NULL); 1755 } 1756 1757 // 1758 // Common code for add_notify_locked_v1() and add_notify_v1(). 1759 // 1760 void 1761 mount_client_impl::add_notify_common_v1(const sol::mounta &ma, 1762 const char *mntoptions, 1763 pxfs_v1::filesystem_ptr fsptr, const pxfs_v1::fs_info &fsinfo, 1764 vnode_t *coveredvp) 1765 { 1766 // 1767 // Find or create a local pxvfs struct to act as a proxy for fs. 1768 // 1769 pxvfs *pxvfsp = pxvfs::find_pxvfs(fsptr, &fsinfo); 1770 ASSERT(pxvfsp != NULL); 1771 pxvfsp->set_mntoptions(mntoptions); 1772 vfs_t *vfsp = pxvfsp->get_vfsp(); 1773 ASSERT(vfsp != NULL); 1774 1775 MOUNT_DBPRINTF( 1776 MOUNT_TRACE_CLIENT, 1777 MOUNT_GREEN, 1778 ("client:add_notify_common_v1 add %s pxvfsp %p\n", 1779 (const char *)ma.dir, pxvfsp)); 1780 1781 // 1782 // The vfs-specific fields should be initialized unless we 1783 // switch to a lazy propagation of vfs list changes scheme. 1784 // Our current algorithm immediately ("eagerly") pushes vfs list 1785 // changes to the client, so we require 1786 // find_pxvfs() to fully initialize the vfs struct. 1787 // 1788 ASSERT(vfsp->vfs_fstype != 0); 1789 1790 // 1791 // Lock the vfs_t similar to domount(). 1792 // We should always get the lock since this vfs_t is newly created. 1793 // 1794 int error = vfs_lock(vfsp); 1795 ASSERT(error == 0); 1796 1797 // Check if this mount should not be visible via /etc/mnttab 1798 if (ma.flags & MS_NOMNTTAB) { 1799 vfsp->vfs_flag |= VFS_NOMNTTAB; 1800 } else { 1801 vfsp->vfs_flag &= ~VFS_NOMNTTAB; 1802 } 1803 1804 // 1805 // Initialize values for /etc/mnttab on this node. 1806 // 1807 if ((const char *)ma.spec == NULL || *(const char *)ma.spec == '\0') { 1808 vfs_setresource(vfsp, VFS_NORESOURCE); 1809 } else { 1810 vfs_setresource(vfsp, (const char *)ma.spec); 1811 } 1812 vfs_setmntpoint(vfsp, (const char *)ma.dir); 1813 // XXX Use global time? 1814 vfsp->vfs_mtime = ddi_get_time(); 1815 vfs_createopttbl(&vfsp->vfs_mntopts, mntoptions); 1816 vfs_parsemntopts(&vfsp->vfs_mntopts, (char *)mntoptions, 1); 1817 // 1818 // Hook vfsp into the local vfs list. 1819 // Note: we don't need to have a cluster-wide lock held 1820 // on the vfs list because we expect the global mount point 1821 // locking to prevent any mount races on this mount point. 1822 // 1823 VFS_HOLD(vfsp); 1824 vfs_list_add(vfsp); 1825 1826 // 1827 // Splice the mount into the name space by setting v_vfsmountedhere 1828 // in the covered vnode. 1829 // Note that we transfer the hold on coveredvp to v_vfsmountedhere. 1830 // 1831 ASSERT(!vn_ismntpt(coveredvp)); 1832 coveredvp->v_vfsmountedhere = vfsp; 1833 vfsp->vfs_vnodecovered = coveredvp; 1834 1835 // Release the hold we got from find_pxvfs(). 1836 VFS_RELE(vfsp); 1837 1838 vfs_unlock(vfsp); 1839 vn_vfsunlock(coveredvp); 1840 } 1841 1842 // 1843 // Notify intent to mount a device. 1844 // Device is "locked" until it is either mounted or the requesting node dies. 1845 // 1846 int 1847 mount_client_impl::devlock(int cmd, struct pathname *devpnp) 1848 { 1849 // 1850 // Verify that the mount client is already active, returning 1851 // failure if it isn't. 1852 // 1853 if (!is_activated()) { 1854 return (ENODEV); 1855 } 1856 1857 Environment e; 1858 1859 // 1860 // If this is a request to unlock the device, unlock it. 1861 // 1862 if (cmd == CL_GBLMNT_UNLOCK) { 1863 get_server()->devunlock(devpnp->pn_path, e); 1864 return (pxfslib::get_err(e)); 1865 } 1866 1867 // 1868 // We have to save a copy of the path name since lookuppn() will 1869 // clobber it. 1870 // 1871 char *spec = os::strcpy(new char [devpnp->pn_pathlen + 1], 1872 devpnp->pn_path); 1873 1874 // 1875 // We try to determine if this node has a local connection to the device 1876 // (i.e., it can be a device replica). If we determine its not 1877 // local, return an error without trying to get the lock since 1878 // there is no point in attempting to start the service until one 1879 // of the nodes that can be a replica boots. 1880 // 1881 vnode_t *vp; 1882 int error = lookuppn(devpnp, NULL, FOLLOW, NULL, &vp); 1883 if (error == 0) { 1884 // Check for a PXFS special file. 1885 if (vp->v_flag & VPXFS) { 1886 // 1887 // Contact DCS to get the list of nodes that this 1888 // device is attached to and whether or not its an 1889 // HA device. 1890 // 1891 bool dev_is_ha; 1892 CORBA::String_var dev_name; 1893 sol::nodeid_seq_t_var dev_nids; 1894 1895 error = dcs_get_configured_nodes(vp->v_rdev, 1896 fs::dc_callback::_nil(), 1897 dev_is_ha, dev_name, dev_nids); 1898 if (error == 0) { 1899 uint32_t i, n = dev_nids->length(); 1900 for (i = 0; i < n; i++) { 1901 if (dev_nids[i] == 1902 orb_conf::node_number()) { 1903 break; 1904 } 1905 } 1906 if (i == n) { 1907 // 1908 // We didn't find a local connection. 1909 // 1910 VN_RELE(vp); 1911 delete [] spec; 1912 return (ENXIO); 1913 } 1914 } 1915 } 1916 VN_RELE(vp); 1917 } 1918 1919 // Try to get the lock. 1920 fs::mount_client_var clientv = get_client_ref(); 1921 1922 bool log_message = B_TRUE; 1923 sol::nodeid_t lock_owner = 0; 1924 os::sc_syslog_msg msg(SC_SYSLOG_GLOBAL_MOUNT_TAG, NULL, NULL); 1925 do { 1926 get_server()->devlock(clientv, orb_conf::node_number(), 1927 spec, e); 1928 error = pxfslib::get_err(e); 1929 e.clear(); 1930 1931 if (error == ETIMEDOUT && log_message) { 1932 // 1933 // Try to get the id of the node holding the lock 1934 // If we don't find it, it would be because it was 1935 // released. We ignore this. 1936 // 1937 get_server()->get_devlock_owner(spec, lock_owner, e); 1938 if (lock_owner) { 1939 // Log an error message once. 1940 char nodename[CL_MAX_LEN + 1]; 1941 clconf_get_nodename(lock_owner, nodename); 1942 // 1943 // SCMSGS 1944 // @explanation 1945 // Sun Cluster boot is waiting for the 1946 // mentioned node to complete fsck/mount of a 1947 // global filesystem and release the lock on a 1948 // device. 1949 // @user_action 1950 // Check the console of the specified cluster 1951 // node to see if any of the nodes are waiting 1952 // for a manual fsck to be done. If this is 1953 // so, exiting the shell after performing the 1954 // fsck will allow the boot of the other nodes 1955 // to continue. 1956 // 1957 (void) msg.log(SC_SYSLOG_NOTICE, MESSAGE, 1958 "Sun Cluster is waiting for lock on device " 1959 "%s. Lock is currently held by %s for " 1960 "fsck/mount.", 1961 spec, nodename); 1962 log_message = B_FALSE; 1963 } 1964 } 1965 } while (error == EAGAIN || error == ETIMEDOUT); 1966 1967 if (log_message == B_FALSE) { 1968 // 1969 // Log a message that the wait is over. 1970 // 1971 if (error == 0) { 1972 // 1973 // SCMSGS 1974 // @explanation 1975 // Sun Cluster successfully obtained a lock on a 1976 // device to perform fsck/mount. 1977 // @user_action 1978 // This is an informational message, no user action is 1979 // needed. 1980 // 1981 (void) msg.log(SC_SYSLOG_NOTICE, MESSAGE, 1982 "Lock on device %s obtained. Proceeding.", 1983 spec); 1984 } else { 1985 // 1986 // SCMSGS 1987 // @explanation 1988 // Sun Cluster was unable to lock a device. 1989 // @user_action 1990 // Check the error returned for why this happened. In 1991 // cases like an interrupted system call, no user 1992 // action is required. 1993 // 1994 (void) msg.log(SC_SYSLOG_NOTICE, MESSAGE, 1995 "Unable to lock device %s. Error (%s).", 1996 spec, strerror(error)); 1997 } 1998 } 1999 2000 delete [] spec; 2001 return (error); 2002 } 2003 2004 // 2005 // Called from cladmin() to import all global mounts 2006 // (i.e., "/usr/cluster/lib/sc/clconfig -g"). 2007 // 2008 extern "C" int 2009 pxfs_mount_client_enable(int cmd, int onoff) 2010 { 2011 int error; 2012 2013 switch (cmd) { 2014 case CL_GBLMNT_ENABLE: 2015 if (onoff) { 2016 error = device_service_mgr::activate(); 2017 if (error == 0) 2018 error = mount_client_impl::activate(); 2019 return (error); 2020 } else { 2021 // cladmin returns ENOTSUP 2022 ASSERT(0); 2023 return (0); 2024 } 2025 case CL_SWITCHBACK_ENABLE: 2026 return (device_service_mgr::do_switchbacks()); 2027 2028 default: 2029 return (EINVAL); 2030 } 2031 } 2032 2033 // 2034 // Called from cladmin() to lock devices so they aren't fsck'ed by different 2035 // nodes at the same time (i.e., "/usr/cluster/lib/sc/clconfig -m devname"). 2036 // 2037 extern "C" int 2038 pxfs_mount_client_lock(int cmd, struct pathname *devpnp) 2039 { 2040 return (mount_client_impl::devlock(cmd, devpnp)); 2041 } 2042 2043 // 2044 // Called after the pxfs loadable module has been loaded. 2045 // 2046 sol::error_t 2047 pxfs_mount_client_startup() 2048 { 2049 extern int (*pxfs_mount_client_enable_ptr)(int cmd, int onoff); 2050 extern int (*pxfs_mount_client_lock_ptr)(int cmd, 2051 struct pathname *devpnp); 2052 pxfs_mount_client_enable_ptr = pxfs_mount_client_enable; 2053 pxfs_mount_client_lock_ptr = pxfs_mount_client_lock; 2054 return (0); 2055 } 2056 2057 // 2058 // Called before the pxfs loadable module is unloaded. 2059 // 2060 sol::error_t 2061 pxfs_mount_client_shutdown() 2062 { 2063 return (mount_client_impl::is_activated() ? EBUSY : 0); 2064 } 2065