1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _NFS4_PNFS_H 27 #define _NFS4_PNFS_H 28 29 /* 30 * Generic and file layout specific pNFS support. 31 */ 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #include <sys/types.h> 38 #include <sys/avl.h> 39 #include <nfs/nfs4.h> 40 #include <nfs/nfs4_clnt.h> 41 #include <nfs/rnode4.h> 42 #include <nfs/nfs4_kprot.h> 43 #include <nfs/nfssys.h> 44 #include <sys/systm.h> 45 #include <sys/taskq.h> 46 #include <sys/disp.h> 47 #include <sys/time.h> 48 #include <rpc/xdr.h> 49 #include <inet/ip.h> 50 #include <inet/ip6.h> 51 #include <sys/cmn_err.h> 52 53 typedef struct { 54 /* 55 * This structure mimics the mi_servers and mi_curr_serv 56 * in the mntinfo4_t. ds_servers is the list of servinfo4s 57 * which refer to the same data server entity, typically, a 58 * multi-homed data server. 59 */ 60 servinfo4_t *ds_servers; 61 servinfo4_t *ds_curr_serv; 62 } ds_info_t; 63 64 typedef struct devnode { 65 /* key */ 66 deviceid4 dn_devid; 67 avl_node_t dn_avl; 68 uint32_t dn_count; 69 int dn_flags; 70 kcondvar_t dn_cv[1]; 71 72 /* data servers, indexed indentically to ds_addrs */ 73 ds_info_t *dn_server_list; 74 75 /* xdr decoded information about the data servers */ 76 nfsv4_1_file_layout_ds_addr4 dn_ds_addrs; 77 } devnode_t; 78 79 /* 80 * Definitions for dn_flags 81 * 82 * DN_GDI_INFLIGHT GETDEVICEINFO is currently OTW 83 * DN_GDI_FAILED GETDEVICEINFO has failed 84 * DN_ORPHAN The devnode is orphaned from the tree 85 * DN_INSERTED the devnode is inserted into the tree 86 */ 87 #define DN_GDI_INFLIGHT 0x01 88 #define DN_GDI_FAILED 0x02 89 #define DN_ORPHAN 0x04 90 #define DN_INSERTED 0x08 91 92 /* 93 * GETDEVICE OTW and NO_OTW 94 */ 95 #define PGD_OTW 0x01 96 #define PGD_NO_OTW 0x02 97 98 /* per-rnode file layout */ 99 typedef struct { 100 uint32_t std_refcount; 101 102 nfs4_sharedfh_t *std_fh; 103 deviceid4 std_devid; 104 105 kmutex_t std_lock; 106 verifier4 std_writeverf; 107 uint32_t std_flags; 108 servinfo4_t *std_svp; 109 } stripe_dev_t; 110 #define STRIPE_DEV_HAVE_VERIFIER (0x01) 111 112 enum stripetype4 { 113 STRIPE4_SPARSE = 0, 114 STRIPE4_DENSE = 1 115 }; 116 117 #define PNFS_LAYOUTEND 0xffffffffffffffff 118 119 /* per-rnode generic layout */ 120 typedef struct pnfs_layout { 121 list_node_t plo_list; 122 layoutiomode4 plo_iomode; 123 int plo_flags; 124 offset4 plo_offset; 125 length4 plo_length; 126 offset4 plo_pattern_offset; 127 uint32_t plo_inusecnt; 128 kcondvar_t plo_wait; 129 deviceid4 plo_deviceid; 130 uint32_t plo_stripe_type; 131 length4 plo_stripe_unit; 132 uint32_t plo_first_stripe_index; 133 uint32_t plo_stripe_count; 134 stripe_dev_t **plo_stripe_dev; 135 kmutex_t plo_lock; 136 uint32_t plo_refcount; 137 int64_t plo_creation_sec; 138 int64_t plo_creation_musec; 139 } pnfs_layout_t; 140 141 /* 142 * Layout Flag Fields 143 * NOTE: PLO_RETURN, PLO_GET and PLO_RECALL can only be set or cleared 144 * when the code path "owns" the R4OTWLO bit in the rnode. However 145 * PLO_RETURN, PLO_GET and PLO_RECALL, can still be checked by only 146 * having to hold the rnode's r_statelock. 147 */ 148 #define PLO_ROC 0x1 /* Return Layout On Close */ 149 #define PLO_RETURN 0x02 /* Layout Being Returned. */ 150 #define PLO_GET 0x04 /* Layoutget in Progress */ 151 #define PLO_RECALL 0x08 /* Layout Being Recalled */ 152 #define PLO_BAD 0x10 /* Layout is Bad */ 153 #define PLO_UNAVAIL 0x20 /* Layout Unavailable from MDS */ 154 #define PLO_COM2MDS 0x40 /* Commit To MDS */ 155 #define PLO_TRYLATER 0x80 /* RETRY from MDS on LAYOUTGET, try later */ 156 #define PLO_COMMIT_MDS 0x100 /* Commit to MDS */ 157 #define PLO_LOWAITER 0x200 /* Thread waiting for this layout */ 158 #define PLO_PROCESSED 0x400 /* LAYOUTGET processed this layout */ 159 160 typedef struct pnfs_lo_matches { 161 list_t lm_layouts; 162 offset4 lm_offset; 163 length4 lm_length; 164 uint_t lm_status; 165 uint_t lm_flags; 166 layoutiomode4 lm_mode; 167 } pnfs_lo_matches_t; 168 169 /* 170 * Status Flags For lm_status field of pnfs_lo_matches 171 */ 172 #define LOMSTAT_MATCHFOUND 0x1 173 #define LOMSTAT_NEEDSWAIT 0x02 174 #define LOMSTAT_DELAY 0x04 175 176 /* 177 * Use bits passed to pnfs_find_layouts() identifying why the layout list 178 * is to be acquired. 179 */ 180 #define LOM_USE 0x2 181 #define LOM_RETURN 0x4 182 #define LOM_RECALL 0x8 183 #define LOM_COMMIT 0x10 184 185 /* 186 * LOM status bits, indicating status of the layout list returned, if any. 187 */ 188 #define LOM_STAT_SUCCESS 0x0 189 #define LOM_STAT_RECALLED 0x01 /* Layout(s) recalled */ 190 191 typedef struct pnfs_lol { 192 list_node_t l_node; 193 pnfs_layout_t *l_layout; 194 offset4 l_offset; 195 length4 l_length; 196 int l_flags; 197 } pnfs_lol_t; 198 199 200 /* 201 * Flag bits telling the layoutreturn code what type of return 202 * it is doing and if it is from a return, or initiated by a recall. 203 */ 204 #define PNFS_LAYOUTRECALL_FILE 0x01 205 #define PNFS_LAYOUTRECALL_FSID 0x02 206 #define PNFS_LAYOUTRECALL_ALL 0x04 207 #define PNFS_LAYOUTRETURN_FILE 0x08 208 209 /* a batch of read i/o work requested of pNFS */ 210 typedef struct { 211 kmutex_t fir_lock; 212 kcondvar_t fir_cv; 213 int32_t fir_remaining; 214 int fir_error; 215 int fir_eof; 216 offset4 fir_eof_offset; 217 int fir_count; 218 stateid4 fir_stateid; 219 list_t fir_task_list; 220 } file_io_read_t; 221 222 /* units of read i/o work (part of a batch) */ 223 typedef struct { 224 file_io_read_t *rt_job; 225 stripe_dev_t *rt_dev; 226 nfs4_call_t *rt_call; 227 nfs4_recov_state_t rt_recov_state; 228 cred_t *rt_cred; 229 offset4 rt_offset; 230 count4 rt_count; 231 char *rt_base; 232 int rt_have_uio; 233 uint32_t rt_free_uio; 234 uio_t rt_uio; 235 list_node_t rt_next; 236 } read_task_t; 237 238 /* a batch of write i/o work requested of pNFS */ 239 typedef struct { 240 kmutex_t fiw_lock; 241 kcondvar_t fiw_cv; 242 uint32_t fiw_flags; 243 int32_t fiw_remaining; 244 int fiw_error; 245 stable_how4 fiw_stable_how; 246 stable_how4 fiw_stable_result; 247 stateid4 fiw_stateid; 248 vnode_t *fiw_vp; 249 list_t fiw_task_list; 250 } file_io_write_t; 251 252 /* units of write i/o work (part of a batch) */ 253 typedef struct { 254 file_io_write_t *wt_job; 255 stripe_dev_t *wt_dev; 256 nfs4_call_t *wt_call; 257 nfs4_recov_state_t wt_recov_state; 258 cred_t *wt_cred; 259 pnfs_layout_t *wt_layout; 260 caddr_t wt_base; 261 offset4 wt_offset; 262 offset4 wt_voff; 263 count4 wt_count; 264 uint32_t wt_sui; 265 list_node_t wt_next; 266 } write_task_t; 267 268 typedef struct { 269 kmutex_t fic_lock; 270 kcondvar_t fic_cv; 271 int32_t fic_remaining; 272 int fic_error; 273 vnode_t *fic_vp; 274 page_t *fic_plist; 275 } file_io_commit_t; 276 277 typedef struct { 278 file_io_commit_t *cm_job; 279 stripe_dev_t *cm_dev; 280 nfs4_call_t *cm_call; 281 nfs4_recov_state_t cm_recov_state; 282 cred_t *cm_cred; 283 pnfs_layout_t *cm_layout; 284 offset4 cm_offset; 285 count4 cm_count; 286 uint32_t cm_sui; 287 } commit_task_t; 288 289 typedef struct { 290 offset4 ce_offset; 291 length4 ce_length; 292 pnfs_layout_t *ce_lo; 293 } commit_extent_t; 294 295 typedef struct { 296 mntinfo4_t *tgd_mi; 297 cred_t *tgd_cred; 298 } task_get_devicelist_t; 299 300 typedef struct { 301 mntinfo4_t *tlg_mi; 302 vnode_t *tlg_vp; 303 cred_t *tlg_cred; 304 layoutiomode4 tlg_iomode; 305 uint32_t tlg_flags; 306 offset4 tlg_offset; 307 } task_layoutget_t; 308 309 #define TLG_NOFREE (0x01) 310 #define TLG_USE LOM_USE 311 #define TLG_RETURN LOM_RETURN 312 #define TLG_RECALL LOM_RECALL 313 314 typedef struct { 315 mntinfo4_t *tlr_mi; 316 vnode_t *tlr_vp; 317 cred_t *tlr_cr; 318 offset4 tlr_offset; 319 length4 tlr_length; 320 bool_t tlr_reclaim; 321 layoutiomode4 tlr_iomode; 322 layouttype4 tlr_layout_type; 323 pnfs_lo_matches_t *tlr_lom; 324 layoutreturn_type4 tlr_return_type; 325 int tlr_aflag; 326 nfs4_server_t *tlr_np; 327 nfs4_fsidlt_t *tlr_lt; 328 } task_layoutreturn_t; 329 330 extern void pnfs_layout_return(vnode_t *, cred_t *, int, 331 pnfs_lo_matches_t *, int); 332 333 extern pnfs_lo_matches_t * 334 pnfs_find_layouts(nfs4_server_t *, struct rnode4 *, cred_t *, 335 layoutiomode4, offset4, length4, int); 336 337 extern int pnfs_rnode_holds_layouts(struct rnode4 *); 338 extern void pnfs_layoutget(vnode_t *, cred_t *, offset4, layoutiomode4); 339 extern void pnfs_layout_hold(struct rnode4 *, struct pnfs_layout *); 340 extern void pnfs_layout_rele(struct rnode4 *, struct pnfs_layout *); 341 extern void pnfs_decr_layout_refcnt(struct rnode4 *, struct pnfs_layout *); 342 extern void pnfs_trim_fsid_tree(struct rnode4 *, struct nfs4_fsidlt *, int); 343 extern void pnfs_release_layouts(nfs4_server_t *np, struct rnode4 *, 344 struct pnfs_lo_matches *, int); 345 extern void pnfs_insert_layout(pnfs_layout_t *, struct rnode4 *, 346 struct pnfs_layout *); 347 348 349 /* 350 * Layout data structures that get XDR encoded/decoded into the buffer 351 * passed via the system call for getting layout information. 352 */ 353 typedef struct stripe_info { 354 uint32_t stripe_index; 355 struct { 356 uint_t multipath_list_len; 357 struct netaddr4 *multipath_list_val; 358 } multipath_list; 359 } stripe_info_t; 360 361 362 typedef struct layoutspecs { 363 uint32_t plo_stripe_count; 364 uint32_t plo_stripe_unit; 365 uint32_t plo_status; 366 layoutiomode4 iomode; 367 offset4 plo_offset; 368 length4 plo_length; 369 int64_t plo_creation_sec; 370 int64_t plo_creation_musec; 371 devnode_t *plo_devnode; 372 struct { 373 uint_t plo_stripe_info_list_len; 374 stripe_info_t *plo_stripe_info_list_val; 375 } plo_stripe_info_list; 376 } layoutspecs_t; 377 378 typedef struct layoutstats { 379 uint64_t proxy_iocount; 380 uint64_t ds_iocount; 381 struct { 382 uint_t total_layouts; 383 layoutspecs_t *lo_specs; 384 } plo_data; 385 } layoutstats_t; 386 387 /* 388 * Error codes to report conditions to the userland. The fields must have the 389 * same value as the fields in the user-space file named nfsstat_layout.h. 390 */ 391 typedef enum nfsstat_layout_errcodes { 392 ENOLAYOUT = -1, 393 ENOTAFILE = -2, 394 ENOPNFSSERV = -3, 395 ESYSCALL = -4, 396 ENONFS = -5 397 } nfsstat_lo_errcodes_t; 398 399 #ifdef __cplusplus 400 } 401 #endif 402 403 #endif /* _NFS4_PNFS_H */ 404