Home | History | Annotate | Download | only in server
      1 //
      2 // CDDL HEADER START
      3 //
      4 // The contents of this file are subject to the terms of the
      5 // Common Development and Distribution License (the License).
      6 // You may not use this file except in compliance with the License.
      7 //
      8 // You can obtain a copy of the license at usr/src/CDDL.txt
      9 // or http://www.opensolaris.org/os/licensing.
     10 // See the License for the specific language governing permissions
     11 // and limitations under the License.
     12 //
     13 // When distributing Covered Code, include this CDDL HEADER in each
     14 // file and include the License file at usr/src/CDDL.txt.
     15 // If applicable, add the following below this CDDL HEADER, with the
     16 // fields enclosed by brackets [] replaced with your own identifying
     17 // information: Portions Copyright [yyyy] [name of copyright owner]
     18 //
     19 // CDDL HEADER END
     20 //
     21 
     22 //
     23 // Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24 // Use is subject to license terms.
     25 //
     26 
     27 #ifndef	REPL_PXFS_SERVER_H
     28 #define	REPL_PXFS_SERVER_H
     29 
     30 #pragma ident	"@(#)repl_pxfs_server.h	1.10	08/05/20 SMI"
     31 
     32 #include <sys/vfs.h>
     33 
     34 #include <sys/os.h>
     35 #include <repl/service/replica_tmpl.h>
     36 
     37 #include <pxfs/common/pxfslib.h>
     38 #include "../version.h"
     39 #include PXFS_IDL(pxfs)
     40 #include PXFS_IDL(repl_pxfs)
     41 
     42 // Forward declarations.
     43 class fs_repl_impl;
     44 class fobj_ii;
     45 
     46 //
     47 // HA service class for a single mounted pxfs file system.
     48 // There is one of these for each mounted file system so that we can
     49 // fail over file systems independently of eachother.
     50 //
     51 class repl_pxfs_server : public repl_server<REPL_PXFS_VER::fs_replica> {
     52 public:
     53 	repl_pxfs_server(vnode_t *mvp, const sol::mounta &ma, cred_t *cr,
     54 	    const char *id);
     55 	~repl_pxfs_server();
     56 
     57 	// Helper function to get the mount error (if any).
     58 	int get_mount_error() const;
     59 
     60 	// Update the mount arguments after a remount (see fs_ii::remount()).
     61 	void set_mountargs(const sol::mounta &ma);
     62 
     63 	//
     64 	// Helper function to mark this file system as unmounted. This
     65 	// method is used to manipulate the current replica only. In
     66 	// secondaries the check point function will set the flag.
     67 	//
     68 	void mark_fs_unmounted();
     69 
     70 	//
     71 	// Tell whether this file system has been unmounted.
     72 	//
     73 	bool is_fs_unmounted();
     74 
     75 	// Handle new invocations after service freeze has started.
     76 	bool	check_freeze(Environment &env);
     77 
     78 	// Required functions for replica framework.
     79 	// replica::repl_prov::=
     80 	void become_secondary(Environment &_environment);
     81 
     82 	void add_secondary(replica::checkpoint_ptr sec_chkpt,
     83 	    const char *secondary_name, Environment &_environment);
     84 
     85 	void remove_secondary(const char *secondary_name,
     86 	    Environment &_environment);
     87 
     88 	void freeze_primary_prepare(Environment &_environment);
     89 
     90 	void freeze_primary(Environment &_environment);
     91 
     92 	void unfreeze_primary(Environment &_environment);
     93 
     94 	void become_primary(const replica::repl_name_seq &secondary_names,
     95 	    Environment &_environment);
     96 
     97 	void become_spare(Environment &_environment);
     98 
     99 	void shutdown(Environment &_environment);
    100 
    101 	uint32_t forced_shutdown(Environment &_environment);
    102 
    103 	void shutdown_spare(replica::repl_prov_shutdown_type,
    104 		Environment &_environment);
    105 
    106 	CORBA::Object_ptr get_root_obj(Environment &_environment);
    107 	//
    108 
    109 	void ckpt_blocks_allocated(
    110 	    const repl_pxfs_v1::blocks_allocated_t &current_allocations,
    111 	    PXFS_VER::blkcnt_t blocks_free, Environment &_environment);
    112 
    113 	void ckpt_server_status(PXFS_VER::server_status_t status,
    114 	    Environment &_environment);
    115 
    116 	// fs_replica::=
    117 	void ckpt_locks(PXFS_VER::fobj_ptr obj,
    118 	    const REPL_PXFS_VER::lock_info_seq_t &locks,
    119 	    Environment &_environment);
    120 
    121 	void ckpt_new_fsobj(PXFS_VER::filesystem_ptr fs_obj,
    122 	    const char *mntoptions, Environment &_environment);
    123 
    124 	void ckpt_mnt_error(sol::error_t error, Environment &_environment);
    125 
    126 	void ckpt_entry_state(bool exists, Environment &_environment);
    127 
    128 	void ckpt_target(PXFS_VER::fobj_ptr obj,
    129 	    const PXFS_VER::fobj_info &fobjinfo,
    130 	    Environment &_environment);
    131 
    132 	void ckpt_target_remove(PXFS_VER::fobj_ptr obj,
    133 	    const PXFS_VER::fobj_info &fobjinfo,
    134 	    uint64_t delete_id, Environment &_environment);
    135 
    136 	void ckpt_fobj_return(PXFS_VER::fobj_ptr ret_obj,
    137 	    const PXFS_VER::fobj_info &ret_info, Environment &_environment);
    138 
    139 	void ckpt_error_return(sol::error_t error, Environment &_environment);
    140 
    141 	void ckpt_new_fobj(PXFS_VER::fobj_ptr obj,
    142 	    const PXFS_VER::fobjid_t &fid,
    143 	    PXFS_VER::fobj_type_t type,
    144 	    Environment &_environment);
    145 
    146 	void ckpt_delete_fobj(uint64_t delete_id, Environment &_environment);
    147 
    148 	void ckpt_deletecnt(uint64_t delete_id, Environment &_environment);
    149 
    150 	void ckpt_fobj_state(PXFS_VER::fobj_ptr obj, uint64_t delete_id,
    151 	    Environment &_environment);
    152 
    153 	void ckpt_new_fsmgr(PXFS_VER::fsmgr_server_ptr servermgr,
    154 	    PXFS_VER::fsmgr_client_ptr clientmgr, sol::nodeid_t nodeid,
    155 	    Environment &_environment);
    156 
    157 	void ckpt_remount(const sol::mounta &ma, const char *mntoptions,
    158 	    Environment &_environment);
    159 
    160 	void ckpt_lockfs_info(uint64_t lf_lock, uint64_t lf_flags,
    161 	    uint64_t lf_key, const char *lf_comment,
    162 	    Environment &_environment);
    163 
    164 	void ckpt_lockfs_start(uint64_t lf_lock, uint64_t lf_flags,
    165 	    uint64_t lf_key, const char *lf_comment,
    166 	    Environment &_environment);
    167 
    168 	void ckpt_lockfs_failure(sol::error_t err, Environment &_environment);
    169 
    170 	void ckpt_cachedata_flag(PXFS_VER::file_ptr obj, bool flag,
    171 	    Environment &_environment);
    172 
    173 	void ckpt_vx_tunefs(const REPL_PXFS_VER::vx_tunefs_t &tunefs,
    174 	    Environment &_environment);
    175 
    176 	void ckpt_remove_file_locks_by_sysid(int32_t sysid,
    177 	    Environment &_environment);
    178 
    179 	void ckpt_remove_file_locks_by_nlmid(int32_t nlmid,
    180 	    Environment &_environment);
    181 
    182 	void ckpt_service_version(unsigned short new_major,
    183 	    unsigned short new_minor, Environment &);
    184 
    185 	void ckpt_fs_is_unmounted(Environment &_environment);
    186 	//
    187 	// Checkpoint accessor function.
    188 	REPL_PXFS_VER::fs_replica_ptr	get_checkpoint_fs_replica();
    189 
    190 	//
    191 	// Register with the Version Manager for upgrade callbacks.
    192 	// Called by the mount_client when instantiating a filesytem replica.
    193 	//
    194 	void upgrade_callback_register(const sol::mounta &);
    195 
    196 	// Unregister with the Version Manager
    197 	void upgrade_callback_unregister();
    198 
    199 	//
    200 	// fs_version_callback_impl::do_callback is called by the
    201 	// version manager.  That method calls here to pass on the version
    202 	// update.
    203 	//
    204 	void upgrade_callback(const version_manager::vp_version_t &,
    205 	    Environment &);
    206 
    207 	// Set the initial protocol version number.
    208 	void set_version(const version_manager::vp_version_t &);
    209 
    210 	version_manager::upgrade_callback_var callback_object_v;
    211 
    212 	//
    213 	// These are both protected by version_lock.
    214 	// current_version is the versioned protocol number the version
    215 	// manager has told us we should be running as.
    216 	// pending_version is set when a secondary gets a callback before
    217 	// the primary sends a checkpoint so a failover during upgrade
    218 	// commit is processed correctly.
    219 	//
    220 	version_manager::vp_version_t current_version;
    221 	version_manager::vp_version_t pending_version;
    222 
    223 	//
    224 	// This lock protects 'current_version', _ckpt_proxy and
    225 	// provides locking between upgrade callbacks and become_primary().
    226 	// It needs to be a rwlock since we make checkpoint calls while
    227 	// holding the lock.
    228 	//
    229 	os::rwlock_t version_lock;
    230 
    231 	// Helper function to track outstanding invocations.
    232 	void	decrement_invo_count();
    233 
    234 private:
    235 	//
    236 	// Pointer to the fs that is being replicated.
    237 	// If the mount fails, mnt_error is set to the errno.
    238 	// We hold a reference to the file system so that _unreferenced()
    239 	// doesn't delete the file system object while we have a pointer
    240 	// to it.
    241 	//
    242 	PXFS_VER::filesystem_var	fs_v;
    243 	fs_repl_impl	*fsp;
    244 	int		mnt_error;	// saved errno from VFS_MOUNT()
    245 
    246 	//
    247 	// Our copy of the mountdata and mount point vnode.
    248 	// This is used to mount/remount the underlying file system.
    249 	//
    250 	sol::mounta	mountdata;
    251 	vnode_t		*mnt_vp;
    252 	cred_t		*crp;
    253 	bool		fs_is_unmounted;
    254 
    255 	// Checkpoint proxy.
    256 	REPL_PXFS_VER::fs_replica_ptr	_ckpt_proxy;
    257 
    258 	//
    259 	// used for creating unique ucc names when registering with the
    260 	// version manager for callbacks to a filesystem replica.
    261 	//
    262 	static int	unique_integer;
    263 
    264 	//
    265 	// A PxFS client issuing cascaded invocations results in the server
    266 	// issuing further invocations to other PxFS clients. Those clients
    267 	// in turn may need to issue further invocations to service the
    268 	// request. If the relevant PxFS service is frozen right after the
    269 	// server checked for service freeze and before a client issued an
    270 	// invocation to service the cascaded invocation, the client's new
    271 	// invocation will be frozen. The cascaded invocation will not
    272 	// complete until the client can complete it's invocation. That
    273 	// will not complete until the service is unfrozen. We have a
    274 	// deadlock.
    275 	//
    276 	// To address this deadlock we use check_freeze() method of PxFS
    277 	// replicated service. check_freeze() decided how to handle new
    278 	// requests depending on the state of the service. If the state is
    279 	// 'PRIMARY' new invocations will be allowed and the invocation
    280 	// count in incremented. If the current state is 'FREEZING' the
    281 	// thread blocks until all active invocations from the server have
    282 	// been serviced and the state of the this file system replicated
    283 	// service changes to 'FROZEN'. At this point blocked invocations
    284 	// are un-blocked and they return a 'PRIMARY_FROZEN' exception. The
    285 	// invocation will be retried by the client after the service is
    286 	// unfrozen. If current state is 'FROZEN' the thread will return a
    287 	// 'PRIMARY_FROZEN' exception immediately.
    288 	//
    289 	enum primary_state_t {
    290 		NOT_PRIMARY,	// this is not the primary
    291 		PRIMARY,	// Primary active and not frozen
    292 		FREEZING,	// Primary blocks just deeply nested invos
    293 		FROZEN		// Invos to primary return SERVICE_FROZEN
    294 	};
    295 
    296 	primary_state_t	replica_state;	// Current state of provider
    297 
    298 	//
    299 	// Variables to track active invocations from this service.
    300 	//
    301 	int64_t		active_invo_count;
    302 	os::mutex_t	active_invo_lock;
    303 	os::condvar_t	active_invo_cv;
    304 };
    305 
    306 class fs_version_callback_impl :
    307     public McServerof<version_manager::upgrade_callback> {
    308 public:
    309 	fs_version_callback_impl(replica::repl_prov_ptr replica_p);
    310 	~fs_version_callback_impl();
    311 
    312 	void _unreferenced(unref_t);
    313 
    314 	// IDL methods.
    315 	void do_callback(const char *ucc_name,
    316 	    const version_manager::vp_version_t &new_version,
    317 	    Environment &_environment);
    318 
    319 private:
    320 	replica::repl_prov_var prov_v;
    321 };
    322 
    323 #include <pxfs/server/repl_pxfs_server_in.h>
    324 
    325 #endif	// REPL_PXFS_SERVER_H
    326