1 // 2 // CDDL HEADER START 3 // 4 // The contents of this file are subject to the terms of the 5 // Common Development and Distribution License (the License). 6 // You may not use this file except in compliance with the License. 7 // 8 // You can obtain a copy of the license at usr/src/CDDL.txt 9 // or http://www.opensolaris.org/os/licensing. 10 // See the License for the specific language governing permissions 11 // and limitations under the License. 12 // 13 // When distributing Covered Code, include this CDDL HEADER in each 14 // file and include the License file at usr/src/CDDL.txt. 15 // If applicable, add the following below this CDDL HEADER, with the 16 // fields enclosed by brackets [] replaced with your own identifying 17 // information: Portions Copyright [yyyy] [name of copyright owner] 18 // 19 // CDDL HEADER END 20 // 21 22 // 23 // Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 // Use is subject to license terms. 25 // 26 27 #ifndef REPL_PXFS_SERVER_H 28 #define REPL_PXFS_SERVER_H 29 30 #pragma ident "@(#)repl_pxfs_server.h 1.10 08/05/20 SMI" 31 32 #include <sys/vfs.h> 33 34 #include <sys/os.h> 35 #include <repl/service/replica_tmpl.h> 36 37 #include <pxfs/common/pxfslib.h> 38 #include "../version.h" 39 #include PXFS_IDL(pxfs) 40 #include PXFS_IDL(repl_pxfs) 41 42 // Forward declarations. 43 class fs_repl_impl; 44 class fobj_ii; 45 46 // 47 // HA service class for a single mounted pxfs file system. 48 // There is one of these for each mounted file system so that we can 49 // fail over file systems independently of eachother. 50 // 51 class repl_pxfs_server : public repl_server<REPL_PXFS_VER::fs_replica> { 52 public: 53 repl_pxfs_server(vnode_t *mvp, const sol::mounta &ma, cred_t *cr, 54 const char *id); 55 ~repl_pxfs_server(); 56 57 // Helper function to get the mount error (if any). 58 int get_mount_error() const; 59 60 // Update the mount arguments after a remount (see fs_ii::remount()). 61 void set_mountargs(const sol::mounta &ma); 62 63 // 64 // Helper function to mark this file system as unmounted. This 65 // method is used to manipulate the current replica only. In 66 // secondaries the check point function will set the flag. 67 // 68 void mark_fs_unmounted(); 69 70 // 71 // Tell whether this file system has been unmounted. 72 // 73 bool is_fs_unmounted(); 74 75 // Handle new invocations after service freeze has started. 76 bool check_freeze(Environment &env); 77 78 // Required functions for replica framework. 79 // replica::repl_prov::= 80 void become_secondary(Environment &_environment); 81 82 void add_secondary(replica::checkpoint_ptr sec_chkpt, 83 const char *secondary_name, Environment &_environment); 84 85 void remove_secondary(const char *secondary_name, 86 Environment &_environment); 87 88 void freeze_primary_prepare(Environment &_environment); 89 90 void freeze_primary(Environment &_environment); 91 92 void unfreeze_primary(Environment &_environment); 93 94 void become_primary(const replica::repl_name_seq &secondary_names, 95 Environment &_environment); 96 97 void become_spare(Environment &_environment); 98 99 void shutdown(Environment &_environment); 100 101 uint32_t forced_shutdown(Environment &_environment); 102 103 void shutdown_spare(replica::repl_prov_shutdown_type, 104 Environment &_environment); 105 106 CORBA::Object_ptr get_root_obj(Environment &_environment); 107 // 108 109 void ckpt_blocks_allocated( 110 const repl_pxfs_v1::blocks_allocated_t ¤t_allocations, 111 PXFS_VER::blkcnt_t blocks_free, Environment &_environment); 112 113 void ckpt_server_status(PXFS_VER::server_status_t status, 114 Environment &_environment); 115 116 // fs_replica::= 117 void ckpt_locks(PXFS_VER::fobj_ptr obj, 118 const REPL_PXFS_VER::lock_info_seq_t &locks, 119 Environment &_environment); 120 121 void ckpt_new_fsobj(PXFS_VER::filesystem_ptr fs_obj, 122 const char *mntoptions, Environment &_environment); 123 124 void ckpt_mnt_error(sol::error_t error, Environment &_environment); 125 126 void ckpt_entry_state(bool exists, Environment &_environment); 127 128 void ckpt_target(PXFS_VER::fobj_ptr obj, 129 const PXFS_VER::fobj_info &fobjinfo, 130 Environment &_environment); 131 132 void ckpt_target_remove(PXFS_VER::fobj_ptr obj, 133 const PXFS_VER::fobj_info &fobjinfo, 134 uint64_t delete_id, Environment &_environment); 135 136 void ckpt_fobj_return(PXFS_VER::fobj_ptr ret_obj, 137 const PXFS_VER::fobj_info &ret_info, Environment &_environment); 138 139 void ckpt_error_return(sol::error_t error, Environment &_environment); 140 141 void ckpt_new_fobj(PXFS_VER::fobj_ptr obj, 142 const PXFS_VER::fobjid_t &fid, 143 PXFS_VER::fobj_type_t type, 144 Environment &_environment); 145 146 void ckpt_delete_fobj(uint64_t delete_id, Environment &_environment); 147 148 void ckpt_deletecnt(uint64_t delete_id, Environment &_environment); 149 150 void ckpt_fobj_state(PXFS_VER::fobj_ptr obj, uint64_t delete_id, 151 Environment &_environment); 152 153 void ckpt_new_fsmgr(PXFS_VER::fsmgr_server_ptr servermgr, 154 PXFS_VER::fsmgr_client_ptr clientmgr, sol::nodeid_t nodeid, 155 Environment &_environment); 156 157 void ckpt_remount(const sol::mounta &ma, const char *mntoptions, 158 Environment &_environment); 159 160 void ckpt_lockfs_info(uint64_t lf_lock, uint64_t lf_flags, 161 uint64_t lf_key, const char *lf_comment, 162 Environment &_environment); 163 164 void ckpt_lockfs_start(uint64_t lf_lock, uint64_t lf_flags, 165 uint64_t lf_key, const char *lf_comment, 166 Environment &_environment); 167 168 void ckpt_lockfs_failure(sol::error_t err, Environment &_environment); 169 170 void ckpt_cachedata_flag(PXFS_VER::file_ptr obj, bool flag, 171 Environment &_environment); 172 173 void ckpt_vx_tunefs(const REPL_PXFS_VER::vx_tunefs_t &tunefs, 174 Environment &_environment); 175 176 void ckpt_remove_file_locks_by_sysid(int32_t sysid, 177 Environment &_environment); 178 179 void ckpt_remove_file_locks_by_nlmid(int32_t nlmid, 180 Environment &_environment); 181 182 void ckpt_service_version(unsigned short new_major, 183 unsigned short new_minor, Environment &); 184 185 void ckpt_fs_is_unmounted(Environment &_environment); 186 // 187 // Checkpoint accessor function. 188 REPL_PXFS_VER::fs_replica_ptr get_checkpoint_fs_replica(); 189 190 // 191 // Register with the Version Manager for upgrade callbacks. 192 // Called by the mount_client when instantiating a filesytem replica. 193 // 194 void upgrade_callback_register(const sol::mounta &); 195 196 // Unregister with the Version Manager 197 void upgrade_callback_unregister(); 198 199 // 200 // fs_version_callback_impl::do_callback is called by the 201 // version manager. That method calls here to pass on the version 202 // update. 203 // 204 void upgrade_callback(const version_manager::vp_version_t &, 205 Environment &); 206 207 // Set the initial protocol version number. 208 void set_version(const version_manager::vp_version_t &); 209 210 version_manager::upgrade_callback_var callback_object_v; 211 212 // 213 // These are both protected by version_lock. 214 // current_version is the versioned protocol number the version 215 // manager has told us we should be running as. 216 // pending_version is set when a secondary gets a callback before 217 // the primary sends a checkpoint so a failover during upgrade 218 // commit is processed correctly. 219 // 220 version_manager::vp_version_t current_version; 221 version_manager::vp_version_t pending_version; 222 223 // 224 // This lock protects 'current_version', _ckpt_proxy and 225 // provides locking between upgrade callbacks and become_primary(). 226 // It needs to be a rwlock since we make checkpoint calls while 227 // holding the lock. 228 // 229 os::rwlock_t version_lock; 230 231 // Helper function to track outstanding invocations. 232 void decrement_invo_count(); 233 234 private: 235 // 236 // Pointer to the fs that is being replicated. 237 // If the mount fails, mnt_error is set to the errno. 238 // We hold a reference to the file system so that _unreferenced() 239 // doesn't delete the file system object while we have a pointer 240 // to it. 241 // 242 PXFS_VER::filesystem_var fs_v; 243 fs_repl_impl *fsp; 244 int mnt_error; // saved errno from VFS_MOUNT() 245 246 // 247 // Our copy of the mountdata and mount point vnode. 248 // This is used to mount/remount the underlying file system. 249 // 250 sol::mounta mountdata; 251 vnode_t *mnt_vp; 252 cred_t *crp; 253 bool fs_is_unmounted; 254 255 // Checkpoint proxy. 256 REPL_PXFS_VER::fs_replica_ptr _ckpt_proxy; 257 258 // 259 // used for creating unique ucc names when registering with the 260 // version manager for callbacks to a filesystem replica. 261 // 262 static int unique_integer; 263 264 // 265 // A PxFS client issuing cascaded invocations results in the server 266 // issuing further invocations to other PxFS clients. Those clients 267 // in turn may need to issue further invocations to service the 268 // request. If the relevant PxFS service is frozen right after the 269 // server checked for service freeze and before a client issued an 270 // invocation to service the cascaded invocation, the client's new 271 // invocation will be frozen. The cascaded invocation will not 272 // complete until the client can complete it's invocation. That 273 // will not complete until the service is unfrozen. We have a 274 // deadlock. 275 // 276 // To address this deadlock we use check_freeze() method of PxFS 277 // replicated service. check_freeze() decided how to handle new 278 // requests depending on the state of the service. If the state is 279 // 'PRIMARY' new invocations will be allowed and the invocation 280 // count in incremented. If the current state is 'FREEZING' the 281 // thread blocks until all active invocations from the server have 282 // been serviced and the state of the this file system replicated 283 // service changes to 'FROZEN'. At this point blocked invocations 284 // are un-blocked and they return a 'PRIMARY_FROZEN' exception. The 285 // invocation will be retried by the client after the service is 286 // unfrozen. If current state is 'FROZEN' the thread will return a 287 // 'PRIMARY_FROZEN' exception immediately. 288 // 289 enum primary_state_t { 290 NOT_PRIMARY, // this is not the primary 291 PRIMARY, // Primary active and not frozen 292 FREEZING, // Primary blocks just deeply nested invos 293 FROZEN // Invos to primary return SERVICE_FROZEN 294 }; 295 296 primary_state_t replica_state; // Current state of provider 297 298 // 299 // Variables to track active invocations from this service. 300 // 301 int64_t active_invo_count; 302 os::mutex_t active_invo_lock; 303 os::condvar_t active_invo_cv; 304 }; 305 306 class fs_version_callback_impl : 307 public McServerof<version_manager::upgrade_callback> { 308 public: 309 fs_version_callback_impl(replica::repl_prov_ptr replica_p); 310 ~fs_version_callback_impl(); 311 312 void _unreferenced(unref_t); 313 314 // IDL methods. 315 void do_callback(const char *ucc_name, 316 const version_manager::vp_version_t &new_version, 317 Environment &_environment); 318 319 private: 320 replica::repl_prov_var prov_v; 321 }; 322 323 #include <pxfs/server/repl_pxfs_server_in.h> 324 325 #endif // REPL_PXFS_SERVER_H 326