Home | History | Annotate | Download | only in client
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the License).
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/CDDL.txt
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/CDDL.txt.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets [] replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef _PXVFS_H
     28 #define	_PXVFS_H
     29 
     30 #pragma ident	"@(#)pxvfs.h	1.20	08/05/20 SMI"
     31 
     32 #include <vm/page.h>
     33 #include <sys/vnode.h>
     34 #include <sys/flock.h>
     35 #include <sys/fs/pxfs_ki.h>
     36 
     37 #include <sys/refcnt.h>
     38 #include <sys/list_def.h>
     39 #include <sys/threadpool.h>
     40 #include <orb/monitor/monitor.h>
     41 
     42 #include "../version.h"
     43 #include <pxfs/common/pxfslib.h>
     44 #include PXFS_IDL(pxfs)
     45 #include <pxfs/client/pxfobj.h>
     46 #include <pxfs/client/pxfs_llm_callback_impl.h>
     47 
     48 #define	PXVFS_STATS_NUM_OPEN_FILES	0
     49 #define	PXVFS_STATS_ACCESS_TOKEN_HITS	1
     50 #define	PXVFS_STATS_ACCESS_TOKEN_MISSES	2
     51 #define	PXVFS_STATS_ACCESS_TOKEN_INVALS	3
     52 #define	PXVFS_STATS_ATTR_TOKEN_HITS	4
     53 #define	PXVFS_STATS_ATTR_TOKEN_MISSES	5
     54 #define	PXVFS_STATS_ATTR_TOKEN_INVALS	6
     55 #define	PXVFS_STATS_DATA_TOKEN_HITS	7
     56 #define	PXVFS_STATS_DATA_TOKEN_MISSES	8
     57 #define	PXVFS_STATS_DATA_TOKEN_INVALS	9
     58 #define	PXVFS_STATS_DATA_ALLOC		10
     59 #define	PXVFS_STATS_DATA_TOKEN_RETRIES	11
     60 #define	PXVFS_STATS_THROTTLING_HITS	12
     61 #define	PXVFS_STATS_MAX_NUM		13
     62 
     63 #define	PXVFS_NODE_STATS_NUM_OPEN_FILES	0
     64 #define	PXVFS_NODE_STATS_MAX_NUM	1
     65 
     66 //
     67 // Used only when mounting without syncdir.
     68 // We are using a portion of the flag space from /usr/include/sys/file.h.
     69 // This isn't nice but memcache_impl::fsync() will treat the following
     70 // definition as an int instead of a flag to avoid collisions with
     71 // traditional fsync flags.
     72 //
     73 #define	PXFS_DESTROY_PAGES		0xffff
     74 
     75 extern struct vfsops *pxfs_vfsopsp;
     76 
     77 // Forward declarations.
     78 class pxvfs;
     79 class fsmgr_client_impl;
     80 class fobj_client_impl;
     81 class pxfobjplus;
     82 
     83 // Type for list of all pxvfs objects.
     84 typedef IntrList<pxvfs, _SList> pxvfs_list_t;
     85 
     86 //
     87 // pxvfs_inactive_task - this task represents a need to process inactive
     88 // proxy vnodes for a specific proxy file system (pxvfs).
     89 //
     90 // The proxy file system object (pxvfs) inherits from this class,
     91 // and is the only user of this task. This approach avoids
     92 // memory allocations when reaping inactive proxy vnodes.
     93 //
     94 class pxvfs_inactive_task : public defer_task {
     95 public:
     96 	virtual void		execute();
     97 	virtual void		task_done();
     98 protected:
     99 	// a pxvfs_inactive_task should not be created by itself.
    100 	pxvfs_inactive_task();
    101 
    102 	virtual pxvfs		*get_pxvfs() = 0;
    103 };
    104 
    105 //
    106 // pxvfs_inactive_threadpool - This threadpool processes all of the requests
    107 // to reap inactive proxy vnodes for individual proxy file systems.
    108 // There is one object of this type per client node.
    109 //
    110 class pxvfs_inactive_threadpool : public threadpool {
    111 public:
    112 	static pxvfs_inactive_threadpool	&the();
    113 
    114 	static void		startup();
    115 	static void		shutdown();
    116 
    117 	virtual ~pxvfs_inactive_threadpool();
    118 
    119 private:
    120 	pxvfs_inactive_threadpool();
    121 
    122 	static pxvfs_inactive_threadpool	*the_pxvfs_inactive_threadpool;
    123 
    124 	// Disallow assignments and pass by value
    125 	pxvfs_inactive_threadpool(const pxvfs_inactive_threadpool &);
    126 	pxvfs_inactive_threadpool &operator = (pxvfs_inactive_threadpool &);
    127 };
    128 
    129 
    130 //
    131 // pxvfs_list_elem - this class provides a wrapper for _SList::ListElem
    132 // in order to eliminate ambiguity between the multiple _SList::ListElem
    133 // in the class pxvfs. This particular class supports the ability to
    134 // place the proxy vfs object on a list of all proxy file systems.
    135 //
    136 class pxvfs_list_elem : public _SList::ListElem {
    137 protected:
    138 	pxvfs_list_elem(void *);
    139 };
    140 
    141 //
    142 // pxvfs - this is the client side proxy for the file system "vfs" structure.
    143 //	There is one of these objects per PXFS file system that is accessible
    144 //	on this client node.
    145 //
    146 // N.B.	Because pxvfs instances map 1-1 to vfs structs, and because we rely on
    147 //	the fact that there is at most one proxy on a given node per
    148 //	filesystem, this design does not allow us to mount more than one
    149 //	instance of a filesystem at a time.
    150 //
    151 // Note that proxy vnodes don't change the reference count on the pxvfs
    152 // since they all share the pointer stored in the v_vfsp->vfs_data pointer.
    153 // The reference count is to fix the race where find_pxvfs()
    154 // returns a pointer, unmount deletes the pxvfs and then the
    155 // (stale) pointer is attempted to be used. Also, fsmgr_client_impl
    156 // holds a reference to the pxvfs since _unreferenced() is asynchronous.
    157 // XXX We should change this to use VFS_HOLD()/VFS_RELE() when
    158 // we implement forced unmount.
    159 //
    160 class pxvfs :
    161 	public pxvfs_list_elem,
    162 	public refcnt,
    163 	public pxvfs_inactive_task
    164 {
    165 	friend class pxvfs_inactive_task;
    166 public:
    167 	//
    168 	// Functions to support Fastwrites.
    169 	//
    170 	bool		fastwrite_enabled();
    171 
    172 	uint32_t	get_fs_bsize(); // Returns filesystem block size
    173 
    174 	PXFS_VER::blkcnt_t	reserve_blocks(PXFS_VER::blkcnt_t want,
    175 					    bool no_redzone_wait);
    176 
    177 	void		revoke_blocks();
    178 
    179 	void		set_server_status(PXFS_VER::server_status_t);
    180 
    181 	// Note: constructor is protected.
    182 
    183 	virtual ~pxvfs();
    184 
    185 	// This method supports the pxvfs_inactive_task
    186 	virtual pxvfs		*get_pxvfs();
    187 
    188 	//
    189 	// Accessor functions.
    190 	// Note that get_fsobj() does not do a _duplicate() and the pointer
    191 	// returned should not be released (pxvfs retains ownership).
    192 	// It can be used to get a pointer for doing remote invocations and
    193 	// to compare for equivalence.
    194 	//
    195 	PXFS_VER::filesystem_ptr	get_fsobj() const;
    196 	vfs_t				*get_vfsp() const;
    197 	uint32_t			get_server_incn();
    198 
    199 	//
    200 	// Called by mount_client_impl when the initial mount or a remount
    201 	// occur.
    202 	//
    203 	void	set_mntoptions(const char *mntoptions);
    204 
    205 	//
    206 	// This is called to unlink a proxy vnode from the list of all
    207 	// proxy vnodes for this file system.
    208 	//
    209 	void	pxfobj_inactive(pxfobj *pxfobjp);
    210 
    211 	// Remove a pxfobjplus from the hash table
    212 	bool	pxfobjplus_inactive(pxfobjplus *pxfobjplusp);
    213 
    214 	// Insert a new pxfobj into the hash table or return existing pxfobj.
    215 	pxfobj	*fobjhash_insert(pxfobj *new_pxfobjp);
    216 
    217 	// Add a pxfobj to the inactive list for cleanup processing
    218 	void	add_inactivelist(pxfobjplus *pxfobjplusp);
    219 
    220 	// function to sync all pxfs file systems.
    221 	static int	sync_all(short flag, cred *credp);
    222 
    223 	//
    224 	// Calls made to the cluster framework by the NLM when handling
    225 	// deaths/restarts of lockd/statd.  These calls arrive via calls in
    226 	// pxfs/server/nlm_pxfs.cc, which are exported directly to the Solaris
    227 	// NLM.
    228 	//
    229 	static void	set_nlm_status(int32_t nlmid,
    230 	    PXFS_VER::nlm_status status);
    231 
    232 	static void	remove_file_locks(int32_t sysid);
    233 
    234 	// Returns true if the underlying filesystem is a UFS filesystem.
    235 	bool	is_ufs();
    236 
    237 	void	new_file_system_primary(uint32_t server_incn,
    238 	    Environment &_environment);
    239 
    240 	// Replay all the sleeping locks that originated from this node.
    241 	void	replay_sleeping_locks();
    242 
    243 	//
    244 	// Purge all caches before unmounting/removing a PXFS file system.
    245 	// In case of a normal unmount, return true if file system
    246 	// is still in use (active vnodes present), and false otherwise.
    247 	// In case of a forced unmount, return false always.
    248 	//
    249 	bool	purge_caches(bool forced_unmount, cred *credp);
    250 
    251 	// This should be called when the global unmount has succeeded.
    252 	void	unmount_succeeded();
    253 
    254 	// This should be called when the global unmount has failed.
    255 	void	unmount_failed();
    256 
    257 	// This is called to clean up is the file system server crashes.
    258 	void	cleanup();
    259 
    260 	//
    261 	// Routines to insert and remove sleeping lock callback objects from
    262 	// the stored list.
    263 	//
    264 	void	insert_llm_cbobj(pxfs_llm_callback_impl *llmp);
    265 	void	remove_llm_cbobj(pxfs_llm_callback_impl *llmp);
    266 
    267 	//
    268 	// Function to find or create proxy vnodes for this pxfs file system.
    269 	// The vnode is return held and the caller is should call VN_RELE()
    270 	// when finished using the vnode.
    271 	//
    272 	vnode	*get_pxfobj(PXFS_VER::fobj_ptr fobjp,
    273 	    const PXFS_VER::fobj_info &fobjinfo,
    274 	    PXFS_VER::bind_info *binfop,
    275 	    fobj_client_impl *clientp);
    276 
    277 	//
    278 	// Find the proxy file object for the specified FID
    279 	//
    280 	pxfobj	*fid_to_proxy_file(fid_t *fidp);
    281 
    282 	//
    283 	// VFS operations dispatched from px_vfsops.cc
    284 	//
    285 	// XXX: Do these methods really need to be virtual?
    286 	// It doesn't seem like it, but we need to carefully
    287 	// consider the procfs case before changing them.
    288 	//
    289 	virtual int	mountroot(enum whymountroot why);
    290 	virtual int	unmount(int, cred *credp);
    291 	virtual int	root(vnode **vpp);
    292 	virtual int	statvfs(struct statvfs64 *sp);
    293 	virtual int	sync(short flag, cred *credp);
    294 	virtual int	vget(vnode **vpp, struct fid *fidp);
    295 	virtual int	swapvp(vnode **vpp, char *nm);
    296 
    297 	// Static mount method: used by "mount -g" mounts.
    298 	static int	mount(vfs *vfsp, vnode *mvp, mounta *uap, cred *credp);
    299 
    300 	//
    301 	// Return the pxvfs structure for a given PXFS file system
    302 	// object. If 'fsinfop' is not NULL and 'fsobj' does not already
    303 	// have a proxy, then use the file system info to create a new proxy.
    304 	// Return NULL if the proxy could not be found or created.
    305 	// Otherwise, the pointer is returned held() and the caller should
    306 	// call rele() when finished using the pointer.
    307 	//
    308 	static pxvfs	*find_pxvfs(PXFS_VER::filesystem_ptr fsobj,
    309 	    const PXFS_VER::fs_info *fsinfop);
    310 
    311 	// Convert a vfs structure pointer to a pxvfs object pointer.
    312 	static pxvfs	*VFSTOPXFS(vfs_t *vfsp);
    313 
    314 	//
    315 	// Set the fstype field; called at boot time by Solaris.
    316 	//
    317 	static void	setfstype(int fstype);
    318 
    319 	static int	startup();
    320 	static int	shutdown();
    321 
    322 	static void	disable_unmounts();
    323 
    324 	//
    325 	// Query functions to retrieve the state of the 'syncdir' and 'nocto'
    326 	// mount options.
    327 	//
    328 	bool	is_syncdir_on() const;
    329 	bool	is_nocto_on() const;
    330 	bool	is_forcedirectio_on() const;
    331 
    332 	// Query function for force unmount operation.
    333 	bool is_unmounted();
    334 
    335 	enum underlying_fs_t {
    336 		UNKNOWN = 0,
    337 		UFS,
    338 		VXFS,
    339 		HSFS
    340 	};
    341 
    342 	underlying_fs_t	get_underlying_fs_type();
    343 
    344 	//
    345 	// Function to flush out all dirty CFS attributes when fs_flush calls
    346 	// vfs_sync(SYNC_ATTR)
    347 	//
    348 	static void	sync_all_attr(void *);
    349 
    350 	// Called to flush the filesystem's dirty data.
    351 	int sync_filesystem(cred *credp, bool revoke);
    352 
    353 	class pxfobj_hash_bkt {
    354 	public:
    355 		pxfobj_list_t	hlist;		// linked list for this bucket
    356 		os::mutex_t	hlock;		// lock for this bucket.
    357 		uint_t 		hlist_cnt;	// Number files in this bucket
    358 
    359 		pxfobj_hash_bkt() : hlist_cnt(0) {}
    360 	};
    361 
    362 	static void memory_callback(monitor::system_state_t);
    363 
    364 	//
    365 	// These methods are common to the node. Throttling and bandwidth
    366 	// calculations are common for all PxFS filesytems in the node.
    367 	// Hence the methods for doing the same are static.
    368 	//
    369 
    370 	// Method to implement throttling logic.
    371 	static void update_throughput(int bytes_xfrd);
    372 
    373 	// Allocate bandwidth from per-second qouta.
    374 	static int wait_for_bandwidth(int bytes_needed, int &bytes_allocated);
    375 
    376 	// helper functions to track pending ios on the server due this client
    377 	static void increment_io_pending(bool wait_for_slot);
    378 	static void decrement_io_pending();
    379 
    380 protected:
    381 
    382 	//
    383 	// Constructor: instantiated in mount() or find_pxvfs()
    384 	// after we have an fs.
    385 	//
    386 	pxvfs(PXFS_VER::filesystem_ptr fs, fsmgr_client_impl *clientmgrp,
    387 	    const PXFS_VER::fs_info *fsinfop, int fstype, vfs_t *vfsp,
    388 	    uint32_t server_incarn);
    389 
    390 	//
    391 	// Create a proxy fobj. This is to be called by get_pxfobj()
    392 	// when a new pxfobj actually needs to be created. It is
    393 	// declared protected and virtual so other file system
    394 	// implementations can override the default implementation.
    395 	//
    396 	// Note: this is virtual so that pxprocvfs can
    397 	// construct pxprocdir and pxprocreg proxy vnodes.
    398 	//
    399 	virtual pxfobj	*make_pxfobj(PXFS_VER::fobj_ptr fobjp,
    400 	    const PXFS_VER::fobj_info &fobjinfo, fobj_client_impl *clientp);
    401 
    402 private:
    403 	// Clean up inactive proxy vnodes
    404 	void		empty_inactive_list();
    405 
    406 	//
    407 	// Wait for the cleanup up of inactive proxy vnodes
    408 	// Return true if file system is busy.
    409 	//
    410 	bool		wait_empty_inactive_list(bool forced_unmount);
    411 
    412 	int		connect_again(vnode **vnodepp,
    413 	    PXFS_VER::fobj_ptr fobj_p,
    414 	    PXFS_VER::fobj_info &fobjinfo,
    415 	    PXFS_VER::bind_info &binfo,
    416 	    fobj_client_impl *client1p,
    417 	    PXFS_VER::fobj_client_ptr client1_p,
    418 	    Environment &e);
    419 
    420 	// Search the all_pxvfs list for the given file system object.
    421 	static pxvfs	*search(PXFS_VER::filesystem_ptr fsobj);
    422 
    423 	// Helper function to get configured node parameters.
    424 	static int	get_configured_nodes(bool &dev_is_ha,
    425 	    CORBA::String_out dev_name, dev_t devid,
    426 	    sol::nodeid_seq_t_out nodes, Environment &e);
    427 
    428 	// Return true if underlying file system type is supported by pxfs.
    429 	static bool	supported_bdev_fs(char *name);
    430 
    431 #ifndef VXFS_DISABLED
    432 	// Copy in all mount specific data and fix up mounta structure.
    433 	static int	vxfs_copyinargs(sol::mounta &ma);
    434 
    435 	//
    436 	// The following functions are to handle the different
    437 	// versions of the mount argument structures used by VxFS.
    438 	//
    439 	static int	vxfs_copyinargs_vxfs34(sol::mounta &ma);
    440 	static int	vxfs_copyinargs_vxfs35(sol::mounta &ma);
    441 	static int	vxfs_copyinargs_vxfs41(sol::mounta &ma);
    442 #endif
    443 
    444 	// Method to create the thread that monitors throttle values.
    445 	static int	launch_throttle_monitor_thread();
    446 
    447 	// Throttle monitor thread entry point.
    448 	static void	throttle_monitor_thread(void *);
    449 
    450 	//
    451 	// Calculate difference between timespec_t 'start' and 'end'
    452 	// in approximate milli-seconds.
    453 	//
    454 	static int64_t	diff_timespec(timespec_t start, timespec_t end);
    455 
    456 public:
    457 	//
    458 	// These variables make sure that only one instance of 'sync_all_attr'
    459 	// above is running at a time.
    460 	//
    461 	static bool		sync_all_attr_thread_running;
    462 	static os::mutex_t	sync_all_attr_lock;
    463 
    464 	//
    465 	// These variables control the flushing of attributes
    466 	//
    467 	static int		sync_all_attr_throttle;
    468 	static os::usec_t	sync_all_attr_interval[];
    469 
    470 	// Throttle for controlling filesystem sync.
    471 	static int		sync_filesystem_throttle;
    472 
    473 	// Per-proxy file system pxvfs statistics.
    474 	kstat_t			*stats;
    475 
    476 	// Per-node pxvfs statistics.
    477 	static kstat_t		*node_stats;
    478 
    479 	//
    480 	// Variables to track pending ios on server due this client
    481 	//
    482 	// i/o requests to the server are complete only when the
    483 	// aio_callback routine signals completion. If we don't track
    484 	// the number of i/o requests pending in the server we can
    485 	// overload the server's i/o queues and cause the server to
    486 	// thrash.
    487 	//
    488 	// The members below keep track of pending ios due to page_out.
    489 	// Before issuing any new asynchronous i/o request we make sure
    490 	// that there aren't more than 'max_permitted_ios' pending.
    491 	// Synchronous requests will always increment and decrement pending
    492 	// count without waiting. This way synchronous requests get
    493 	// priority over asynchronous ones.
    494 	//
    495 	static os::mutex_t	io_pending_lock;
    496 	static os::condvar_t	io_pending_cv;
    497 	static int64_t		io_pending;
    498 
    499 	//
    500 	// This section declares write and throughput throttling
    501 	// variables. All of these are tunable.
    502 	//
    503 	// XXX: Default values for these variables should be tuned
    504 	// automatically based on total pages available in the node.
    505 	//
    506 	// Async task queue length throttling is separate from throughput
    507 	// throttling and is described and implemented in pxreg_v1.cc
    508 	//
    509 
    510 	//
    511 	// 'data_rate' is the perceived transfer rate to the server in
    512 	// bytes per second. This is re-calculated every second. On seconds
    513 	// without activity, data rate is set to the pre-configured default
    514 	// of 20MB/s. The lowest rate we allow by default is 2mb per
    515 	// second.
    516 	//
    517 	static int64_t data_rate;
    518 	static int64_t data_rate_minimum;
    519 	static int64_t data_rate_default;
    520 	static os::mutex_t data_rate_lock;
    521 
    522 	//
    523 	// For every page out, ie. when the page are about to be freed, we
    524 	// add the bytes transferred in that page_out to a global variable
    525 	// 'bytes_sent_in_second', This is reset every second. The possible
    526 	// data rate in the next second is derived from this variable. We
    527 	// start off with 20MB.
    528 	//
    529 	static int64_t bytes_sent_in_second;
    530 
    531 	//
    532 	// 'bytes_written_in_second' is used to identify cases when writers
    533 	// have been throttled even when there is plenty of free memory.
    534 	// Low throughput should not result in throttling unless we are
    535 	// beginning to clog this node with dirty pages.
    536 	static int64_t bytes_written_in_second;
    537 
    538 	//
    539 	// From 'data_rate' we know what bytes per-second is possible.  To
    540 	// implement throttling of writers we use a static window of one
    541 	// second and a qouta of bytes available for consumption in that
    542 	// window. This value is set to 'data_rate' every time throttle
    543 	// monitor thread runs.
    544 	//
    545 	// If a write call finds that there is not enough bytes in the
    546 	// qouta to cover that write, then we throttle the writer by making
    547 	// it sleep till the per second qouta is updated. After qouta
    548 	// updation all waiting threads will be signaled. If the current
    549 	// write size is bigger than the maximum qouta (ie. qouta of 20mb
    550 	// and the write is 30mb), the write waits for one qouta update and
    551 	// proceeds.
    552 	//
    553 	static timespec_t	window_start;
    554 	static int64_t		bytes_in_window;
    555 
    556 	// Lock and cv to protect bandwidth allocation variables
    557 	static os::condvar_t	bandwidth_cv;
    558 	static os::mutex_t	bandwidth_lock;
    559 	static int		bandwidth_chunk;
    560 
    561 	//
    562 	// Frequency in microseconds at which to wakeup writers
    563 	// waiting for bandwidth.
    564 	//
    565 	static int throttle_monitor_interval;
    566 
    567 	//
    568 	// Maximum IO queue length allowed in server. This is an artificial
    569 	// calculation and need not reflect the real i/o queue lengths in
    570 	// the server.
    571 	//
    572 	static int max_permitted_ios;
    573 
    574 protected:
    575 	// PXFS Server file system object reference
    576 	PXFS_VER::filesystem_ptr	fs_fsobj;
    577 
    578 	// Proxy file system structure
    579 	vfs_t			*fs_vfs;
    580 
    581 	//
    582 	// Proxy file system root directory vnode
    583 	//
    584 	// XXX:	If we choose to support file systems that change their root
    585 	//	objects on the fly, this check will have to change.
    586 	//
    587 	vnode_t			*fs_rootvp;
    588 
    589 private:
    590 	bool		fastwrite;	// Indicates whether fastwrites are
    591 					// enabled/disabled.
    592 	uint32_t	pxfs_bsize;	// Fundamental filesystem block size
    593 	PXFS_VER::blkcnt_t	blocks_available;	// Number of blocks
    594 							// reserved for this
    595 							// client.
    596 	os::mutex_t	blocks_reservation;
    597 	os::condvar_t	blocks_reservation_cv;
    598 	PXFS_VER::server_status	pxvfs_status;
    599 
    600 	//
    601 	// This flag indicates whether an invocation to request more storage
    602 	// blocks is in progress. If the flag is true, threads needing blocks
    603 	// will sleep on blocks_reservation_cv and the blocks reservation
    604 	// mutex will be released. Those threads will be woken up when the
    605 	// block reservation invocation returns.
    606 	//
    607 	bool		blk_reserve_invo_in_progress;
    608 
    609 	//
    610 	// Identifies file system primary incarnation.
    611 	// Used to identify orphanned client registrations.
    612 	// Protected by server_incn_lock
    613 	//
    614 	uint32_t		server_incn;
    615 public:
    616 	os::rwlock_t		server_incn_lock;
    617 
    618 	// Count of async tasks queued up via all pxvfs's
    619 	static uint64_t		async_task_count;
    620 
    621 	// Threadpool to service async page operations on this file-sytem.
    622 	threadpool	*mem_async_threadpool;
    623 private:
    624 
    625 	// Definitions for bits in 'flags' below.
    626 	enum {
    627 		//
    628 		// Set when beginning the process of umounting
    629 		// this proxy file system. The unmount may succeed or fail.
    630 		// During this time attempts to activate files
    631 		// will be blocked until the unmount succeeds or fails.
    632 		//
    633 		PXFS_UNMOUNTING	= 0x01,
    634 
    635 		//
    636 		// Set if unmount() succeeded globally.
    637 		//
    638 		PXFS_UNMOUNTED	= 0x02,
    639 
    640 		//
    641 		// There are file activations waiting for unmount to finish.
    642 		//
    643 		PXFS_FILE_ACTIVATE	= 0x04,
    644 
    645 		//
    646 		// set if sync(SYNC_CLOSE) is called
    647 		//
    648 		PXFS_SHUTDOWN	= 0x08,
    649 
    650 		//
    651 		// Someone wants to know when all inactive proxy vnodes
    652 		// have been cleaned up.
    653 		//
    654 		PXFS_INACTIVE_WAIT	= 0x10,
    655 
    656 		//
    657 		// The task to clean up inactive proxy vnodes has been queued.
    658 		//
    659 		PXFS_TASK_QUEUED	= 0x20,
    660 
    661 		// The unmounting is forced.
    662 		PXFS_FORCE_UNMOUNTING = 0x40
    663 	};
    664 
    665 	//
    666 	// Start of the list of fields protected by the flags_lock
    667 	// The flags_lock cannot be held when performing a VN_RELE.
    668 	//
    669 	os::mutex_t	flags_lock;
    670 	os::condvar_t	flags_cv;		// signal changes to 'flags'
    671 	uint_t		flags;			// see enum above for values
    672 
    673 	//
    674 	// Number of active proxy vnodes.
    675 	// This includes all proxy vnodes that have not been
    676 	// deactivated, in other words destroyed.
    677 	//
    678 	uint_t		active_cnt;
    679 
    680 	//
    681 	// List of proxy vnodes awaiting possible deactivation.
    682 	// A proxy vnode can become active again while on this list.
    683 	// The system uses this list in order to allow a separate thread
    684 	// to process proxy vnode deactivation and avoid any possible deadlock.
    685 	//
    686 	pxfobj_list_t	inactive_list;
    687 
    688 	//
    689 	// Count of proxy vnodes queued on the inactive list
    690 	// plus the count of proxy vnodes that are being processed
    691 	// for deactivation.
    692 	//
    693 	// It takes a while to deactivate a proxy vnode,
    694 	// because dirty information must be flushed back to the server.
    695 	//
    696 	uint_t		inactive_list_cnt;
    697 
    698 	// End of the list of fields protected by the flags_lock
    699 
    700 	//
    701 	// Note: we keep a pointer to our local fsmgr_client rather
    702 	// than a CORBA pointer so we can detect when the server crashes.
    703 	// When fsmgr_client gets _unreferenced(), it will release its
    704 	// hold to the pxvfs.
    705 	//
    706 	fsmgr_client_impl	*fsmgr_client_implp;	// our fs manager
    707 
    708 	//
    709 	// Hash of proxy vnodes for all the pxfs file sytems on this
    710 	// node. This is similar to the in-core inode hash table used
    711 	// by ufs.
    712 	//
    713 	static pxfobj_hash_bkt	*pxfobj_hash;
    714 	static uint_t	pxfobjhsz;	// Size of the hash table
    715 	static uint_t	pxfobjhsz_max;	// Maximum value for pxfobjhsz
    716 	static uint_t	pxfobjh_len;	// Desired size of each hash chain
    717 
    718 	pxfs_llm_callback_list_t llm_cb_list;	// list of all callback objects
    719 	os::mutex_t	llm_cb_list_lock;	// protects 'llm_cb_list'
    720 
    721 	static pxvfs_list_t all_pxvfs;		// list of all pxfs file systems
    722 	static os::rwlock_t all_pxvfs_lock;	// protects 'all_pxvfs'
    723 
    724 	static int	pxfstype;		// number assigned by Solaris
    725 	static bool	unmounts_disabled;
    726 
    727 	// Flags that indicate mount options.
    728 	bool	_syncdir_on;
    729 	bool	_nocto_on;
    730 	bool	_forcedirectio_on;
    731 
    732 	underlying_fs_t	underlying_fs;
    733 };
    734 
    735 #include <pxfs/client/pxvfs_in.h>
    736 
    737 #endif	/* _PXVFS_H */
    738