Home | History | Annotate | Download | only in nfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef _NFS4_PNFS_H
     27 #define	_NFS4_PNFS_H
     28 
     29 /*
     30  * Generic and file layout specific pNFS support.
     31  */
     32 
     33 #ifdef __cplusplus
     34 extern "C" {
     35 #endif
     36 
     37 #include <sys/types.h>
     38 #include <sys/avl.h>
     39 #include <nfs/nfs4.h>
     40 #include <nfs/nfs4_clnt.h>
     41 #include <nfs/rnode4.h>
     42 #include <nfs/nfs4_kprot.h>
     43 #include <nfs/nfssys.h>
     44 #include <sys/systm.h>
     45 #include <sys/taskq.h>
     46 #include <sys/disp.h>
     47 #include <sys/time.h>
     48 #include <rpc/xdr.h>
     49 #include <inet/ip.h>
     50 #include <inet/ip6.h>
     51 #include <sys/cmn_err.h>
     52 
     53 typedef struct {
     54 	/*
     55 	 * This structure mimics the mi_servers and mi_curr_serv
     56 	 * in the mntinfo4_t.  ds_servers is the list of servinfo4s
     57 	 * which refer to the same data server entity, typically, a
     58 	 * multi-homed data server.
     59 	 */
     60 	servinfo4_t	*ds_servers;
     61 	servinfo4_t	*ds_curr_serv;
     62 } ds_info_t;
     63 
     64 typedef struct devnode {
     65 	/* key */
     66 	deviceid4	dn_devid;
     67 	avl_node_t	dn_avl;
     68 	uint32_t	dn_count;
     69 	int		dn_flags;
     70 	kcondvar_t	dn_cv[1];
     71 
     72 	/* data servers, indexed indentically to ds_addrs */
     73 	ds_info_t	*dn_server_list;
     74 
     75 	/* xdr decoded information about the data servers */
     76 	nfsv4_1_file_layout_ds_addr4 dn_ds_addrs;
     77 } devnode_t;
     78 
     79 /*
     80  * Definitions for dn_flags
     81  *
     82  * DN_GDI_INFLIGHT	GETDEVICEINFO is currently OTW
     83  * DN_GDI_FAILED	GETDEVICEINFO has failed
     84  * DN_ORPHAN		The devnode is orphaned from the tree
     85  * DN_INSERTED		the devnode is inserted into the tree
     86  */
     87 #define	DN_GDI_INFLIGHT	0x01
     88 #define	DN_GDI_FAILED	0x02
     89 #define	DN_ORPHAN	0x04
     90 #define	DN_INSERTED	0x08
     91 
     92 /*
     93  * GETDEVICE OTW and NO_OTW
     94  */
     95 #define	PGD_OTW		0x01
     96 #define	PGD_NO_OTW	0x02
     97 
     98 /* per-rnode file layout */
     99 typedef struct {
    100 	uint32_t		std_refcount;
    101 
    102 	nfs4_sharedfh_t		*std_fh;
    103 	deviceid4		std_devid;
    104 
    105 	kmutex_t		std_lock;
    106 	verifier4		std_writeverf;
    107 	uint32_t		std_flags;
    108 	servinfo4_t		*std_svp;
    109 } stripe_dev_t;
    110 #define	STRIPE_DEV_HAVE_VERIFIER	(0x01)
    111 
    112 enum stripetype4 {
    113 	STRIPE4_SPARSE = 0,
    114 	STRIPE4_DENSE = 1
    115 };
    116 
    117 #define	PNFS_LAYOUTEND	0xffffffffffffffff
    118 
    119 /* per-rnode generic layout */
    120 typedef struct pnfs_layout {
    121 	list_node_t		plo_list;
    122 	layoutiomode4		plo_iomode;
    123 	int			plo_flags;
    124 	offset4			plo_offset;
    125 	length4			plo_length;
    126 	offset4			plo_pattern_offset;
    127 	uint32_t		plo_inusecnt;
    128 	kcondvar_t		plo_wait;
    129 	deviceid4		plo_deviceid;
    130 	uint32_t		plo_stripe_type;
    131 	length4			plo_stripe_unit;
    132 	uint32_t		plo_first_stripe_index;
    133 	uint32_t		plo_stripe_count;
    134 	stripe_dev_t		**plo_stripe_dev;
    135 	kmutex_t		plo_lock;
    136 	uint32_t		plo_refcount;
    137 	int64_t			plo_creation_sec;
    138 	int64_t			plo_creation_musec;
    139 } pnfs_layout_t;
    140 
    141 /*
    142  * Layout Flag Fields
    143  * NOTE: PLO_RETURN, PLO_GET and PLO_RECALL can only be set or cleared
    144  * when the code path "owns" the R4OTWLO bit in the rnode.  However
    145  * PLO_RETURN, PLO_GET and PLO_RECALL, can still be checked by only
    146  * having to hold the rnode's r_statelock.
    147  */
    148 #define	PLO_ROC		0x1	/* Return Layout On Close */
    149 #define	PLO_RETURN	0x02	/* Layout Being Returned. */
    150 #define	PLO_GET		0x04	/* Layoutget in Progress */
    151 #define	PLO_RECALL	0x08	/* Layout Being Recalled */
    152 #define	PLO_BAD		0x10	/* Layout is Bad */
    153 #define	PLO_UNAVAIL	0x20	/* Layout Unavailable from MDS */
    154 #define	PLO_COM2MDS	0x40	/* Commit To MDS */
    155 #define	PLO_TRYLATER	0x80	/* RETRY from MDS on LAYOUTGET, try later */
    156 #define	PLO_COMMIT_MDS	0x100	/* Commit to MDS */
    157 #define	PLO_LOWAITER	0x200	/* Thread waiting for this layout */
    158 #define	PLO_PROCESSED	0x400	/* LAYOUTGET processed this layout */
    159 
    160 typedef struct pnfs_lo_matches {
    161 	list_t		lm_layouts;
    162 	offset4 	lm_offset;
    163 	length4 	lm_length;
    164 	uint_t		lm_status;
    165 	uint_t		lm_flags;
    166 	layoutiomode4	lm_mode;
    167 } pnfs_lo_matches_t;
    168 
    169 /*
    170  * Status Flags For lm_status field of pnfs_lo_matches
    171  */
    172 #define	LOMSTAT_MATCHFOUND	0x1
    173 #define	LOMSTAT_NEEDSWAIT	0x02
    174 #define	LOMSTAT_DELAY		0x04
    175 
    176 /*
    177  * Use bits passed to pnfs_find_layouts() identifying why the layout list
    178  * is to be acquired.
    179  */
    180 #define	LOM_USE		0x2
    181 #define	LOM_RETURN	0x4
    182 #define	LOM_RECALL	0x8
    183 #define	LOM_COMMIT	0x10
    184 
    185 /*
    186  * LOM status bits, indicating status of the layout list returned, if any.
    187  */
    188 #define	LOM_STAT_SUCCESS	0x0
    189 #define	LOM_STAT_RECALLED	0x01    /* Layout(s) recalled */
    190 
    191 typedef struct pnfs_lol {
    192 	list_node_t	l_node;
    193 	pnfs_layout_t	*l_layout;
    194 	offset4		l_offset;
    195 	length4		l_length;
    196 	int		l_flags;
    197 } pnfs_lol_t;
    198 
    199 
    200 /*
    201  * Flag bits telling the layoutreturn code what type of return
    202  * it is doing and if it is from a return, or initiated by a recall.
    203  */
    204 #define	PNFS_LAYOUTRECALL_FILE	0x01
    205 #define	PNFS_LAYOUTRECALL_FSID	0x02
    206 #define	PNFS_LAYOUTRECALL_ALL	0x04
    207 #define	PNFS_LAYOUTRETURN_FILE	0x08
    208 
    209 /* a batch of read i/o work requested of pNFS */
    210 typedef struct {
    211 	kmutex_t	fir_lock;
    212 	kcondvar_t	fir_cv;
    213 	int32_t		fir_remaining;
    214 	int		fir_error;
    215 	int		fir_eof;
    216 	offset4		fir_eof_offset;
    217 	int		fir_count;
    218 	stateid4	fir_stateid;
    219 	list_t		fir_task_list;
    220 } file_io_read_t;
    221 
    222 /* units of read i/o work (part of a batch) */
    223 typedef struct {
    224 	file_io_read_t *rt_job;
    225 	stripe_dev_t *rt_dev;
    226 	nfs4_call_t *rt_call;
    227 	nfs4_recov_state_t rt_recov_state;
    228 	cred_t *rt_cred;
    229 	offset4 rt_offset;
    230 	count4 rt_count;
    231 	char *rt_base;
    232 	int rt_have_uio;
    233 	uint32_t rt_free_uio;
    234 	uio_t rt_uio;
    235 	list_node_t rt_next;
    236 } read_task_t;
    237 
    238 /* a batch of write i/o work requested of pNFS */
    239 typedef struct {
    240 	kmutex_t	fiw_lock;
    241 	kcondvar_t	fiw_cv;
    242 	uint32_t	fiw_flags;
    243 	int32_t		fiw_remaining;
    244 	int		fiw_error;
    245 	stable_how4	fiw_stable_how;
    246 	stable_how4	fiw_stable_result;
    247 	stateid4	fiw_stateid;
    248 	vnode_t		*fiw_vp;
    249 	list_t		fiw_task_list;
    250 } file_io_write_t;
    251 
    252 /* units of write i/o work (part of a batch) */
    253 typedef struct {
    254 	file_io_write_t *wt_job;
    255 	stripe_dev_t *wt_dev;
    256 	nfs4_call_t *wt_call;
    257 	nfs4_recov_state_t wt_recov_state;
    258 	cred_t *wt_cred;
    259 	pnfs_layout_t *wt_layout;
    260 	caddr_t wt_base;
    261 	offset4 wt_offset;
    262 	offset4 wt_voff;
    263 	count4 wt_count;
    264 	uint32_t wt_sui;
    265 	list_node_t wt_next;
    266 } write_task_t;
    267 
    268 typedef struct {
    269 	kmutex_t	fic_lock;
    270 	kcondvar_t	fic_cv;
    271 	int32_t		fic_remaining;
    272 	int		fic_error;
    273 	vnode_t		*fic_vp;
    274 	page_t		*fic_plist;
    275 } file_io_commit_t;
    276 
    277 typedef struct {
    278 	file_io_commit_t *cm_job;
    279 	stripe_dev_t *cm_dev;
    280 	nfs4_call_t *cm_call;
    281 	nfs4_recov_state_t cm_recov_state;
    282 	cred_t *cm_cred;
    283 	pnfs_layout_t *cm_layout;
    284 	offset4 cm_offset;
    285 	count4 cm_count;
    286 	uint32_t cm_sui;
    287 } commit_task_t;
    288 
    289 typedef struct {
    290 	offset4 	ce_offset;
    291 	length4 	ce_length;
    292 	pnfs_layout_t	*ce_lo;
    293 } commit_extent_t;
    294 
    295 typedef struct {
    296 	mntinfo4_t	*tgd_mi;
    297 	cred_t		*tgd_cred;
    298 } task_get_devicelist_t;
    299 
    300 typedef struct {
    301 	mntinfo4_t 		*tlg_mi;
    302 	vnode_t 		*tlg_vp;
    303 	cred_t 			*tlg_cred;
    304 	layoutiomode4 		tlg_iomode;
    305 	uint32_t 		tlg_flags;
    306 	offset4			tlg_offset;
    307 } task_layoutget_t;
    308 
    309 #define	TLG_NOFREE (0x01)
    310 #define	TLG_USE		LOM_USE
    311 #define	TLG_RETURN	LOM_RETURN
    312 #define	TLG_RECALL	LOM_RECALL
    313 
    314 typedef struct {
    315 	mntinfo4_t 		*tlr_mi;
    316 	vnode_t 		*tlr_vp;
    317 	cred_t 			*tlr_cr;
    318 	offset4 		tlr_offset;
    319 	length4 		tlr_length;
    320 	bool_t 			tlr_reclaim;
    321 	layoutiomode4 		tlr_iomode;
    322 	layouttype4 		tlr_layout_type;
    323 	pnfs_lo_matches_t	*tlr_lom;
    324 	layoutreturn_type4 	tlr_return_type;
    325 	int			tlr_aflag;
    326 	nfs4_server_t		*tlr_np;
    327 	nfs4_fsidlt_t		*tlr_lt;
    328 } task_layoutreturn_t;
    329 
    330 extern void	pnfs_layout_return(vnode_t *, cred_t *, int,
    331 	pnfs_lo_matches_t *, int);
    332 
    333 extern pnfs_lo_matches_t *
    334 pnfs_find_layouts(nfs4_server_t *, struct rnode4 *, cred_t *,
    335 layoutiomode4, offset4, length4, int);
    336 
    337 extern	int	pnfs_rnode_holds_layouts(struct rnode4 *);
    338 extern void	pnfs_layoutget(vnode_t *, cred_t *, offset4, layoutiomode4);
    339 extern void	pnfs_layout_hold(struct rnode4 *, struct pnfs_layout *);
    340 extern void	pnfs_layout_rele(struct rnode4 *, struct pnfs_layout *);
    341 extern void	pnfs_decr_layout_refcnt(struct rnode4 *, struct pnfs_layout *);
    342 extern void	pnfs_trim_fsid_tree(struct rnode4 *, struct nfs4_fsidlt *, int);
    343 extern void	pnfs_release_layouts(nfs4_server_t *np, struct rnode4 *,
    344 	struct pnfs_lo_matches *, int);
    345 extern void    	pnfs_insert_layout(pnfs_layout_t *, struct rnode4 *,
    346 	struct pnfs_layout *);
    347 
    348 
    349 /*
    350  * Layout data structures that get XDR encoded/decoded into the buffer
    351  * passed via the system call for getting layout information.
    352  */
    353 typedef struct stripe_info {
    354 	uint32_t 	stripe_index;
    355 	struct {
    356 		uint_t multipath_list_len;
    357 		struct netaddr4 *multipath_list_val;
    358 	} multipath_list;
    359 } stripe_info_t;
    360 
    361 
    362 typedef struct layoutspecs {
    363 	uint32_t 	plo_stripe_count;
    364 	uint32_t 	plo_stripe_unit;
    365 	uint32_t 	plo_status;
    366 	layoutiomode4 	iomode;
    367 	offset4 	plo_offset;
    368 	length4 	plo_length;
    369 	int64_t 	plo_creation_sec;
    370 	int64_t 	plo_creation_musec;
    371 	devnode_t	*plo_devnode;
    372 	struct {
    373 		uint_t plo_stripe_info_list_len;
    374 		stripe_info_t *plo_stripe_info_list_val;
    375 	} plo_stripe_info_list;
    376 } layoutspecs_t;
    377 
    378 typedef struct layoutstats {
    379 	uint64_t	proxy_iocount;
    380 	uint64_t	ds_iocount;
    381 	struct {
    382 		uint_t		total_layouts;
    383 		layoutspecs_t	*lo_specs;
    384 	} plo_data;
    385 } layoutstats_t;
    386 
    387 /*
    388  * Error codes to report conditions to the userland. The fields must have the
    389  * same value as the fields in the user-space file named nfsstat_layout.h.
    390  */
    391 typedef enum nfsstat_layout_errcodes {
    392 	ENOLAYOUT = 	-1,
    393 	ENOTAFILE = 	-2,
    394 	ENOPNFSSERV = 	-3,
    395 	ESYSCALL = 	-4,
    396 	ENONFS = 	-5
    397 } nfsstat_lo_errcodes_t;
    398 
    399 #ifdef __cplusplus
    400 }
    401 #endif
    402 
    403 #endif /* _NFS4_PNFS_H */
    404