1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/errno.h> 40 #include <sys/sysmacros.h> 41 #include <sys/statvfs.h> 42 #include <sys/kmem.h> 43 #include <sys/dirent.h> 44 #include <sys/cmn_err.h> 45 #include <sys/disp.h> 46 #include <sys/debug.h> 47 #include <sys/systeminfo.h> 48 #include <sys/flock.h> 49 #include <sys/pathname.h> 50 #include <sys/nbmlock.h> 51 #include <sys/share.h> 52 #include <sys/atomic.h> 53 #include <sys/policy.h> 54 #include <sys/fem.h> 55 #include <sys/sdt.h> 56 #include <sys/ddi.h> 57 #include <sys/modctl.h> 58 #include <sys/timod.h> 59 #include <sys/id_space.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpcsec_gss.h> 64 #include <rpc/svc.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/export.h> 68 #include <nfs/lm.h> 69 #include <nfs/nfs4.h> 70 71 #include <sys/strsubr.h> 72 #include <sys/strsun.h> 73 74 #include <inet/common.h> 75 #include <inet/ip.h> 76 #include <inet/ip6.h> 77 78 #include <sys/tsol/label.h> 79 #include <sys/tsol/tndb.h> 80 81 #include <nfs/nfs4_attrmap.h> 82 #include <nfs/nfs4_srv_attr.h> 83 #include <nfs/mds_state.h> 84 #include <nfs/mds_odl.h> 85 86 #include <nfs/nfs41_filehandle.h> 87 #include <nfs/ctl_mds_clnt.h> 88 89 #include <nfs/spe_impl.h> 90 91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */ 92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; 93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */ 94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; 95 96 int mds_strict_seqid = 0; 97 98 static void ping_cb_null_thr(mds_session_t *); 99 100 /* End of Tunables */ 101 102 /* 103 * Used to bump the stateid4.seqid value and show changes in the stateid 104 */ 105 #define next_stateid(sp) (++(sp)->v41_bits.chgseq) 106 107 /* 108 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent. 109 * This is used to return NFS4ERR_TOOSMALL when clients specify 110 * maxcount that isn't large enough to hold the smallest possible 111 * XDR encoded dirent. 112 * 113 * sizeof cookie (8 bytes) + 114 * sizeof name_len (4 bytes) + 115 * sizeof smallest (padded) name (4 bytes) + 116 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4 117 * sizeof attrlist4_len (4 bytes) + 118 * sizeof next boolean (4 bytes) 119 * 120 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing 121 * the smallest possible entry4 (assumes no attrs requested). 122 * sizeof nfsstat4 (4 bytes) + 123 * sizeof verifier4 (8 bytes) + 124 * sizeof entry4list bool (4 bytes) + 125 * sizeof entry4 (36 bytes) + 126 * sizeof eof bool (4 bytes) 127 * 128 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to 129 * VOP_READDIR. Its value is the size of the maximum possible dirent 130 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent 131 * required for a given name length. MAXNAMELEN is the maximum 132 * filename length allowed in Solaris. The first two DIRENT64_RECLEN() 133 * macros are to allow for . and .. entries -- just a minor tweak to try 134 * and guarantee that buffer we give to VOP_READDIR will be large enough 135 * to hold ., .., and the largest possible solaris dirent64. 136 */ 137 #define RFS4_MINLEN_ENTRY4 36 138 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4) 139 #define RFS4_MINLEN_RDDIR_BUF \ 140 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN)) 141 142 /* 143 * It would be better to pad to 4 bytes since that's what XDR would do, 144 * but the dirents UFS gives us are already padded to 8, so just take 145 * what we're given. Dircount is only a hint anyway. Currently the 146 * solaris kernel is ASCII only, so there's no point in calling the 147 * UTF8 functions. 148 * 149 * dirent64: named padded to provide 8 byte struct alignment 150 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad) 151 * 152 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes 153 * 154 */ 155 #define DIRENT64_TO_DIRCOUNT(dp) \ 156 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) 157 158 /* 159 * types of label comparison 160 */ 161 #define EQUALITY_CHECK 0 162 #define DOMINANCE_CHECK 1 163 164 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ 165 166 void rfs4_init_compound_state(struct compound_state *); 167 168 static void nullfree(nfs_resop4 *, compound_state_t *); 169 static void mds_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 170 compound_state_t *); 171 static void mds_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 172 compound_state_t *); 173 static void mds_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 174 compound_state_t *); 175 static void mds_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 176 compound_state_t *); 177 static void mds_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 178 compound_state_t *); 179 static void mds_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 180 compound_state_t *); 181 static void mds_op_create_free(nfs_resop4 *resop); 182 static void mds_op_delegreturn(nfs_argop4 *, nfs_resop4 *, 183 struct svc_req *, compound_state_t *); 184 static void mds_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 185 compound_state_t *); 186 static void mds_op_getattr_free(nfs_resop4 *, compound_state_t *); 187 static void mds_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 188 compound_state_t *); 189 static void mds_op_getfh_free(nfs_resop4 *, compound_state_t *); 190 static void mds_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 191 compound_state_t *); 192 static void mds_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 193 compound_state_t *); 194 static void mds_lock_denied_free(nfs_resop4 *, compound_state_t *); 195 static void mds_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 196 compound_state_t *); 197 static void mds_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 198 compound_state_t *); 199 static void mds_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 200 compound_state_t *); 201 static void mds_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 202 compound_state_t *); 203 static void mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, 204 struct svc_req *req, compound_state_t *); 205 static void mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 206 compound_state_t *); 207 static void mds_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 208 compound_state_t *); 209 static void mds_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, 210 struct svc_req *, compound_state_t *); 211 static void mds_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 212 compound_state_t *); 213 static void mds_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 214 compound_state_t *); 215 static void mds_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 216 compound_state_t *); 217 static void mds_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 218 compound_state_t *); 219 static void mds_op_read_free(nfs_resop4 *, compound_state_t *); 220 void mds_op_readdir(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 221 compound_state_t *); 222 static void mds_op_readdir_free(nfs_resop4 *, compound_state_t *); 223 static void mds_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 224 compound_state_t *); 225 static void mds_op_readlink_free(nfs_resop4 *, compound_state_t *); 226 static void mds_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, 227 struct svc_req *, compound_state_t *); 228 static void mds_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 229 compound_state_t *); 230 static void mds_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 231 compound_state_t *); 232 static void mds_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 233 compound_state_t *); 234 static void mds_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 235 compound_state_t *); 236 static void mds_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 237 compound_state_t *); 238 static void mds_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 239 compound_state_t *); 240 static void mds_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 241 compound_state_t *); 242 static void mds_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 243 compound_state_t *); 244 static void mds_op_exchange_id(nfs_argop4 *, nfs_resop4 *, 245 struct svc_req *, compound_state_t *); 246 static void mds_op_exid_free(nfs_resop4 *, compound_state_t *); 247 static void mds_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 248 compound_state_t *); 249 static void mds_op_secinfonn(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 250 compound_state_t *); 251 nfsstat4 do_rfs4_op_secinfo(struct compound_state *, char *, int, 252 SECINFO4res *); 253 254 static void mds_op_secinfo_free(nfs_resop4 *, compound_state_t *); 255 256 static void mds_op_backchannel_ctl(nfs_argop4 *, nfs_resop4 *, 257 struct svc_req *, compound_state_t *); 258 static void mds_op_bind_conn_to_session(nfs_argop4 *, nfs_resop4 *, 259 struct svc_req *, compound_state_t *); 260 static void mds_op_create_clientid(nfs_argop4 *, nfs_resop4 *, 261 struct svc_req *, compound_state_t *); 262 static void mds_op_create_session(nfs_argop4 *, nfs_resop4 *, 263 struct svc_req *, compound_state_t *); 264 static void mds_op_destroy_session(nfs_argop4 *, nfs_resop4 *, 265 struct svc_req *, compound_state_t *); 266 static void mds_op_sequence(nfs_argop4 *, nfs_resop4 *, 267 struct svc_req *, compound_state_t *); 268 269 static void mds_op_get_devlist(nfs_argop4 *, nfs_resop4 *, 270 struct svc_req *, compound_state_t *); 271 272 static void mds_op_get_devinfo(nfs_argop4 *, nfs_resop4 *, 273 struct svc_req *, compound_state_t *); 274 275 static void mds_op_layout_get(nfs_argop4 *, nfs_resop4 *, 276 struct svc_req *, compound_state_t *); 277 static void mds_op_layout_get_free(nfs_resop4 *, compound_state_t *); 278 279 static void mds_op_layout_commit(nfs_argop4 *, nfs_resop4 *, 280 struct svc_req *, compound_state_t *); 281 282 static void mds_op_layout_return(nfs_argop4 *, nfs_resop4 *, 283 struct svc_req *, compound_state_t *); 284 285 static void mds_op_reclaim_complete(nfs_argop4 *, nfs_resop4 *, 286 struct svc_req *, compound_state_t *); 287 288 static int seq_chk_limits(nfs_argop4 *, nfs_resop4 *, compound_state_t *); 289 290 nfsstat4 check_open_access(uint32_t, 291 struct compound_state *, struct svc_req *); 292 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); 293 294 static void mds_free_reply(nfs_resop4 *, compound_state_t *); 295 296 vnode_t *do_rfs4_op_mknod(CREATE4args *, CREATE4res *, struct svc_req *, 297 struct compound_state *, vattr_t *, char *); 298 299 nfsstat4 rfs4_do_lock(rfs4_lo_state_t *, nfs_lock_type4, seqid4, 300 offset4, length4, cred_t *, nfs_resop4 *); 301 302 rfs4_lo_state_t *mds_findlo_state_by_owner(rfs4_lockowner_t *, 303 rfs4_state_t *, bool_t *); 304 305 bool_t in_flavor_list(int, int *, int); 306 307 nfsstat4 attrmap4_to_vattrmask(attrmap4 *, struct nfs4_svgetit_arg *); 308 309 nfsstat4 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *); 310 311 nfsstat4 do_rfs4_op_getattr(attrmap4 *, fattr4 *, struct nfs4_svgetit_arg *); 312 313 nfsstat4 do_rfs4_op_lookup(char *, uint_t, struct svc_req *, 314 struct compound_state *); 315 316 rfs4_lockowner_t *mds_findlockowner_by_pid(nfs_server_instance_t *, pid_t); 317 318 mds_session_t *mds_findsession_by_id(nfs_server_instance_t *, sessionid4); 319 320 rfs4_openowner_t *mds_findopenowner(nfs_server_instance_t *, open_owner4 *, 321 bool_t *); 322 323 static void mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 324 compound_state_t *); 325 326 extern mds_mpd_t *mds_find_mpd(nfs_server_instance_t *, id_t); 327 extern void rfs41_lo_seqid(stateid_t *); 328 extern void mds_delete_layout(vnode_t *); 329 extern void mds_clean_grants_by_fsid(rfs4_client_t *, vnode_t *); 330 extern mds_layout_t *mds_add_layout(layout_core_t *lc); 331 332 nfsstat4 333 create_vnode(vnode_t *, char *, vattr_t *, createmode4, timespec32_t *, 334 cred_t *, vnode_t **, bool_t *); 335 336 337 /* HACKERY */ 338 nfsstat4 rfs4_get_all_state(struct compound_state *, stateid4 *, 339 rfs4_state_t **, rfs4_deleg_state_t **, rfs4_lo_state_t **); 340 341 void rfs4_ss_clid(struct compound_state *, rfs4_client_t *, struct svc_req *); 342 void rfs4_ss_chkclid(struct compound_state *, rfs4_client_t *); 343 344 int layout_match(stateid_t, stateid4, nfsstat4 *); 345 346 extern stateid4 special0; 347 extern stateid4 special1; 348 349 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \ 350 stateid4_cmp(id, &special1)) 351 352 void rfs4_cn_release(compound_state_t *); 353 354 mds_layout_grant_t *rfs41_findlogrant(struct compound_state *, 355 rfs4_file_t *, rfs4_client_t *, bool_t *); 356 void rfs41_lo_grant_rele(mds_layout_grant_t *); 357 mds_ever_grant_t *rfs41_findevergrant(rfs4_client_t *, vnode_t *, bool_t *); 358 void rfs41_ever_grant_rele(mds_ever_grant_t *); 359 360 static uint32_t compute_use_pnfs_flags(uint32_t); 361 362 /* ARGSUSED */ 363 static void 364 mds_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 365 compound_state_t *cs) 366 { 367 DTRACE_NFSV4_1(op__notsup__start, 368 strcut compound_state *, cs); 369 370 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP; 371 372 DTRACE_NFSV4_1(op__notsup__done, 373 struct compound_state *, cs); 374 } 375 376 /* ARGSUSED */ 377 static void 378 mds_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 379 compound_state_t *cs) 380 { 381 DTRACE_NFSV4_1(op__illegal__start, 382 struct compound_state *, cs); 383 384 *cs->statusp = 385 *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_OP_ILLEGAL; 386 387 DTRACE_NFSV4_1(op__illegal__done, 388 struct compound_state *, cs); 389 } 390 391 /* ARGSUSED */ 392 static void 393 mds_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 394 compound_state_t *cs) 395 { 396 DTRACE_NFSV4_1(op__inval__start, 397 struct compound_state *, cs); 398 399 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL; 400 401 DTRACE_NFSV4_1(op__inval__done, 402 struct compound_state *, cs); 403 } 404 405 /*ARGSUSED*/ 406 static void 407 nullfree(nfs_resop4 *resop, compound_state_t *cs) 408 { 409 } 410 411 static op_disp_tbl_t mds_disptab[] = { 412 {mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 0"}, 413 {mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 1"}, 414 {mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 2"}, 415 {mds_op_access, nullfree, DISP_OP_MDS, "ACCESS"}, 416 {mds_op_close, nullfree, DISP_OP_MDS, "CLOSE"}, 417 {mds_op_commit, nullfree, DISP_OP_BOTH, "COMMIT"}, 418 {mds_op_create, nullfree, DISP_OP_MDS, "CREATE"}, 419 {mds_op_inval, nullfree, DISP_OP_BAD, "BAD Op 7"}, 420 {mds_op_delegreturn, nullfree, DISP_OP_MDS, "DELEGRETURN"}, 421 {mds_op_getattr, mds_op_getattr_free, DISP_OP_MDS, "GETATTR"}, 422 {mds_op_getfh, mds_op_getfh_free, DISP_OP_MDS, "GETFH"}, 423 {mds_op_link, nullfree, DISP_OP_MDS, "LINK"}, 424 {mds_op_lock, mds_lock_denied_free, DISP_OP_MDS, "LOCK"}, 425 {mds_op_lockt, mds_lock_denied_free, DISP_OP_MDS, "LOCKT"}, 426 {mds_op_locku, nullfree, DISP_OP_MDS, "LOCKU"}, 427 {mds_op_lookup, nullfree, DISP_OP_MDS, "LOOKUP"}, 428 {mds_op_lookupp, nullfree, DISP_OP_MDS, "LOOKUPP"}, 429 {mds_op_nverify, nullfree, DISP_OP_MDS, "NVERIFY"}, 430 {mds_op_open, mds_free_reply, DISP_OP_MDS, "OPEN"}, 431 {mds_op_openattr, nullfree, DISP_OP_MDS, "OPENATTR"}, 432 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 20"}, 433 {mds_op_open_downgrade, nullfree, DISP_OP_MDS, "OPEN_DOWNGRADE"}, 434 {mds_op_putfh, nullfree, DISP_OP_BOTH, "PUTFH"}, 435 {mds_op_putpubfh, nullfree, DISP_OP_MDS, "PUTPUBFH"}, 436 {mds_op_putrootfh, nullfree, DISP_OP_MDS, "PUTROOTFH"}, 437 {mds_op_read, mds_op_read_free, DISP_OP_BOTH, "READ"}, 438 {mds_op_readdir, mds_op_readdir_free, DISP_OP_MDS, "READDIR"}, 439 {mds_op_readlink, mds_op_readlink_free, DISP_OP_MDS, "READLINK"}, 440 {mds_op_remove, nullfree, DISP_OP_MDS, "REMOVE"}, 441 {mds_op_rename, nullfree, DISP_OP_MDS, "RENAME"}, 442 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 30"}, 443 {mds_op_restorefh, nullfree, DISP_OP_MDS, "RESTOREFH"}, 444 {mds_op_savefh, nullfree, DISP_OP_MDS, "SAVEFH"}, 445 {mds_op_secinfo, mds_op_secinfo_free, DISP_OP_MDS, "SECINFO"}, 446 {mds_op_setattr, nullfree, DISP_OP_MDS, "SETATTR"}, 447 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 35"}, 448 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 36"}, 449 {mds_op_verify, nullfree, DISP_OP_MDS, "VERIFY"}, 450 {mds_op_write, nullfree, DISP_OP_BOTH, "WRITE"}, 451 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 39"}, 452 {mds_op_backchannel_ctl, nullfree, DISP_OP_BOTH, "BACKCHANNEL_CTL"}, 453 {mds_op_bind_conn_to_session, nullfree, 454 DISP_OP_BOTH, "BIND_CONN_TO_SESS"}, 455 {mds_op_exchange_id, mds_op_exid_free, DISP_OP_BOTH, "EXCHANGE_ID"}, 456 {mds_op_create_session, nullfree, DISP_OP_BOTH, "CREATE_SESS"}, 457 {mds_op_destroy_session, nullfree, DISP_OP_BOTH, "DESTROY_SESS"}, 458 {mds_op_illegal, nullfree, DISP_OP_MDS, "FREE_STATEID"}, 459 {mds_op_illegal, nullfree, DISP_OP_MDS, "GET_DIR_DELEG"}, 460 {mds_op_get_devinfo, nullfree, DISP_OP_MDS, "GET_DEVINFO"}, 461 {mds_op_get_devlist, nullfree, DISP_OP_MDS, "GET_DEVLIST"}, 462 {mds_op_layout_commit, nullfree, DISP_OP_MDS, "LAYOUT_COMMIT"}, 463 {mds_op_layout_get, mds_op_layout_get_free, DISP_OP_MDS, "LAYOUT_GET"}, 464 {mds_op_layout_return, nullfree, DISP_OP_MDS, "LAYOUT_RETURN"}, 465 {mds_op_secinfonn, nullfree, 466 DISP_OP_BOTH, "SECINFO_NONAME"}, 467 {mds_op_sequence, nullfree, DISP_OP_BOTH, "SEQUENCE"}, 468 {mds_op_notsup, nullfree, DISP_OP_BOTH, "SET_SSV"}, 469 {mds_op_notsup, nullfree, DISP_OP_MDS, "TEST_STATEID"}, 470 {mds_op_notsup, nullfree, DISP_OP_MDS, "WANT_DELEG"}, 471 {mds_op_notsup, nullfree, DISP_OP_BOTH, "DESTROY_CLIENTID"}, 472 {mds_op_reclaim_complete, nullfree, DISP_OP_MDS, "RECLAIM_COMPLETE"} 473 }; 474 475 static uint_t mds_disp_cnt = sizeof (mds_disptab) / sizeof (mds_disptab[0]); 476 477 #define OP_ILLEGAL_IDX (mds_disp_cnt) 478 479 extern size_t strlcpy(char *dst, const char *src, size_t dstsize); 480 481 #ifdef nextdp 482 #undef nextdp 483 #endif 484 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 485 486 /*ARGSUSED*/ 487 static void 488 mds_op_readdir_free(nfs_resop4 *resop, compound_state_t *cs) 489 { 490 /* Common function used for NFSv4.0 and NFSv4.1 */ 491 rfs4_op_readdir_free(resop); 492 } 493 494 /*ARGSUSED*/ 495 static void 496 mds_op_secinfo_free(nfs_resop4 *resop, compound_state_t *cs) 497 { 498 /* Common function used for NFSv4.0 and NFSv4.1 */ 499 rfs4_op_secinfo_free(resop); 500 } 501 502 /* 503 */ 504 void 505 mds_srvrfini(void) 506 { 507 /* some shutdown stuff for the minor verson 1 server */ 508 } 509 510 nfsstat4 rfs4_state_has_access(rfs4_state_t *, int, vnode_t *); 511 int rfs4_verify_attr(struct nfs4_svgetit_arg *, attrmap4 *, 512 struct nfs4_ntov_table *); 513 514 515 /* 516 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether 517 * the file is being truncated, return NFS4_OK if allowed or approriate 518 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on 519 * the associated file will be done if the I/O is not consistent with any 520 * delegation in effect on the file. Should be holding VOP_RWLOCK, either 521 * as reader or writer as appropriate. rfs4_op_open will accquire the 522 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad 523 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the 524 * deleg parameter, we will return whether a write delegation is held by 525 * the client associated with this stateid. 526 * If the server instance associated with the relevant client is in its 527 * grace period, return NFS4ERR_GRACE. 528 */ 529 nfsstat4 530 mds_validate_stateid(int mode, struct compound_state *cs, vnode_t *vp, 531 stateid4 *stateid, bool_t trunc, bool_t *deleg, bool_t do_access) 532 { 533 rfs4_file_t *fp; 534 bool_t create = FALSE; 535 rfs4_state_t *sp; 536 rfs4_deleg_state_t *dsp; 537 rfs4_lo_state_t *lsp; 538 stateid_t *id = (stateid_t *)stateid; 539 nfsstat4 stat = NFS4_OK; 540 541 if (ISSPECIAL(stateid)) { 542 fp = rfs4_findfile(cs->instp, vp, NULL, &create); 543 if (fp == NULL) 544 return (NFS4_OK); 545 if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) { 546 rfs4_file_rele(fp); 547 return (NFS4_OK); 548 } 549 if (mode == FWRITE || 550 fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) { 551 rfs4_recall_deleg(fp, trunc, NULL); 552 rfs4_file_rele(fp); 553 return (NFS4ERR_DELAY); 554 } 555 rfs4_file_rele(fp); 556 return (NFS4_OK); 557 } 558 559 stat = rfs4_get_all_state(cs, stateid, &sp, &dsp, &lsp); 560 if (stat != NFS4_OK) 561 return (stat); 562 563 /* 564 * Ordering of the following 'if' statements is specific 565 * since rfs4_get_all_state() may return a value for sp and 566 * lsp. First we check lsp, then 'fall' through to sp. 567 */ 568 if (lsp != NULL) { 569 /* Is associated server instance in its grace period? */ 570 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) { 571 rfs4_lo_state_rele(lsp, FALSE); 572 if (sp != NULL) 573 rfs4_dbe_rele(sp->rs_dbe); 574 return (NFS4ERR_GRACE); 575 } 576 577 if (lsp->rls_lockid.v41_bits.chgseq != 0) { 578 /* Seqid in the future? - that's bad */ 579 if (lsp->rls_lockid.v41_bits.chgseq < 580 id->v41_bits.chgseq) { 581 rfs4_lo_state_rele(lsp, FALSE); 582 if (sp != NULL) 583 rfs4_dbe_rele(sp->rs_dbe); 584 return (NFS4ERR_BAD_STATEID); 585 } 586 /* Seqid in the past? - that's old */ 587 if (lsp->rls_lockid.v41_bits.chgseq > 588 id->v41_bits.chgseq) { 589 rfs4_lo_state_rele(lsp, FALSE); 590 if (sp != NULL) 591 rfs4_dbe_rele(sp->rs_dbe); 592 return (NFS4ERR_OLD_STATEID); 593 } 594 } 595 596 /* Ensure specified filehandle matches */ 597 if (lsp->rls_state->rs_finfo->rf_vp != vp) { 598 rfs4_lo_state_rele(lsp, FALSE); 599 if (sp != NULL) 600 rfs4_dbe_rele(sp->rs_dbe); 601 return (NFS4ERR_BAD_STATEID); 602 } 603 rfs4_lo_state_rele(lsp, FALSE); 604 } 605 606 /* 607 * Stateid provided was an "open" or via the lock stateid 608 */ 609 if (sp != NULL) { 610 /* 611 * only check if the passed in stateid was an OPENID, 612 * ie. Skip if we got here via the LOCKID. 613 */ 614 if (id->v41_bits.type == OPENID) { 615 /* Is associated server instance in its grace period? */ 616 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) { 617 rfs4_dbe_rele(sp->rs_dbe); 618 return (NFS4ERR_GRACE); 619 } 620 621 if (sp->rs_stateid.v41_bits.chgseq != 0) { 622 /* Seqid in the future? - that's bad */ 623 if (sp->rs_stateid.v41_bits.chgseq < 624 id->v41_bits.chgseq) { 625 rfs4_dbe_rele(sp->rs_dbe); 626 return (NFS4ERR_BAD_STATEID); 627 } 628 /* Seqid in the past - that's old */ 629 if (sp->rs_stateid.v41_bits.chgseq > 630 id->v41_bits.chgseq) { 631 rfs4_dbe_rele(sp->rs_dbe); 632 return (NFS4ERR_OLD_STATEID); 633 } 634 } 635 636 /* Ensure specified filehandle matches */ 637 if (sp->rs_finfo->rf_vp != vp) { 638 rfs4_dbe_rele(sp->rs_dbe); 639 return (NFS4ERR_BAD_STATEID); 640 } 641 } 642 if (sp->rs_owner->ro_need_confirm) { 643 rfs4_dbe_rele(sp->rs_dbe); 644 return (NFS4ERR_BAD_STATEID); 645 } 646 647 if (sp->rs_closed == TRUE) { 648 rfs4_dbe_rele(sp->rs_dbe); 649 return (NFS4ERR_OLD_STATEID); 650 } 651 652 if (do_access) 653 stat = rfs4_state_has_access(sp, mode, vp); 654 else 655 stat = NFS4_OK; 656 657 /* 658 * Return whether this state has write 659 * delegation if desired 660 */ 661 if (deleg && 662 (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE)) 663 *deleg = TRUE; 664 665 /* 666 * We got a valid stateid, so we update the 667 * lease on the client. Ideally we would like 668 * to do this after the calling op succeeds, 669 * but for now this will be good 670 * enough. Callers of this routine are 671 * currently insulated from the state stuff. 672 */ 673 rfs4_update_lease(sp->rs_owner->ro_client); 674 675 /* 676 * If a delegation is present on this file and 677 * this is a WRITE, then update the lastwrite 678 * time to indicate that activity is present. 679 */ 680 if (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE && 681 mode == FWRITE) { 682 sp->rs_finfo->rf_dinfo->rd_time_lastwrite = 683 gethrestime_sec(); 684 } 685 686 rfs4_dbe_rele(sp->rs_dbe); 687 return (stat); 688 } 689 690 if (dsp != NULL) { 691 /* Is associated server instance in its grace period? */ 692 if (rfs4_clnt_in_grace(dsp->rds_client)) { 693 rfs4_deleg_state_rele(dsp); 694 return (NFS4ERR_GRACE); 695 } 696 697 if ((dsp->rds_delegid.v41_bits.chgseq != 0) && 698 (dsp->rds_delegid.v41_bits.chgseq != id->v41_bits.chgseq)) { 699 rfs4_deleg_state_rele(dsp); 700 return (NFS4ERR_BAD_STATEID); 701 } 702 703 /* Ensure specified filehandle matches */ 704 if (dsp->rds_finfo->rf_vp != vp) { 705 rfs4_deleg_state_rele(dsp); 706 return (NFS4ERR_BAD_STATEID); 707 } 708 /* 709 * Return whether this state has write 710 * delegation if desired 711 */ 712 if (deleg && 713 (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE)) 714 *deleg = TRUE; 715 716 rfs4_update_lease(dsp->rds_client); 717 718 /* 719 * If a delegation is present on this file and 720 * this is a WRITE, then update the lastwrite 721 * time to indicate that activity is present. 722 */ 723 if (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE && 724 mode == FWRITE) { 725 dsp->rds_finfo->rf_dinfo->rd_time_lastwrite = 726 gethrestime_sec(); 727 } 728 729 /* 730 * XXX - what happens if this is a WRITE and the 731 * delegation type of for READ. 732 */ 733 rfs4_deleg_state_rele(dsp); 734 735 return (stat); 736 } 737 /* 738 * If we got this far, something bad happened 739 */ 740 return (NFS4ERR_BAD_STATEID); 741 } 742 743 nfsstat4 744 mds_setattr(attrmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 745 stateid4 *stateid) 746 { 747 int error = 0; 748 struct nfs4_svgetit_arg sarg; 749 bool_t trunc; 750 751 nfsstat4 status = NFS4_OK; 752 cred_t *cr = cs->cr; 753 vnode_t *vp = cs->vp; 754 struct nfs4_ntov_table ntov; 755 struct statvfs64 sb; 756 struct vattr bva; 757 struct flock64 bf; 758 int in_crit = 0; 759 uint_t saved_mask = 0; 760 caller_context_t ct; 761 attrvers_t avers; 762 struct nfs4_ntov_map *nvmap; 763 764 avers = RFS4_ATTRVERS(cs); 765 nvmap = NFS4_NTOV_MAP(avers); 766 *resp = NFS4_EMPTY_ATTRMAP(avers); 767 sarg.sbp = &sb; 768 nfs4_ntov_table_init(&ntov, avers); 769 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, 770 NFS4ATTR_SETIT); 771 if (status != NFS4_OK) { 772 /* 773 * failed set attrs 774 */ 775 goto done; 776 } 777 778 if (sarg.vap->va_mask == 0 && ! ATTR_ISSET(fattrp->attrmask, ACL) && 779 ! ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) { 780 /* 781 * no further work to be done 782 */ 783 goto done; 784 } 785 786 ct.cc_sysid = 0; 787 ct.cc_pid = 0; 788 ct.cc_caller_id = cs->instp->caller_id; 789 ct.cc_flags = CC_DONTBLOCK; 790 791 /* 792 * If we got a request to set the ACL and the MODE, only 793 * allow changing VSUID, VSGID, and VSVTX. Attempting 794 * to change any other bits, along with setting an ACL, 795 * gives NFS4ERR_INVAL. 796 */ 797 if (ATTR_ISSET(fattrp->attrmask, ACL) && 798 ATTR_ISSET(fattrp->attrmask, MODE)) { 799 vattr_t va; 800 801 va.va_mask = AT_MODE; 802 error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 803 if (error) { 804 status = puterrno4(error); 805 goto done; 806 } 807 if ((sarg.vap->va_mode ^ va.va_mode) & 808 ~(VSUID | VSGID | VSVTX)) { 809 status = NFS4ERR_INVAL; 810 goto done; 811 } 812 } 813 814 /* Check stateid only if size has been set */ 815 if (sarg.vap->va_mask & AT_SIZE) { 816 trunc = (sarg.vap->va_size == 0); 817 status = mds_validate_stateid(FWRITE, 818 cs, cs->vp, stateid, trunc, 819 &cs->deleg, sarg.vap->va_mask & AT_SIZE); 820 if (status != NFS4_OK) 821 goto done; 822 } 823 824 /* XXX start of possible race with delegations */ 825 826 /* 827 * We need to specially handle size changes because it is 828 * possible for the client to create a file with read-only 829 * modes, but with the file opened for writing. If the client 830 * then tries to set the file size, e.g. ftruncate(3C), 831 * fcntl(F_FREESP), the normal access checking done in 832 * VOP_SETATTR would prevent the client from doing it even though 833 * it should be allowed to do so. To get around this, we do the 834 * access checking for ourselves and use VOP_SPACE which doesn't 835 * do the access checking. 836 * Also the client should not be allowed to change the file 837 * size if there is a conflicting non-blocking mandatory lock in 838 * the region of the change. 839 */ 840 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) { 841 u_offset_t offset; 842 ssize_t length; 843 844 /* 845 * ufs_setattr clears AT_SIZE from vap->va_mask, but 846 * before returning, sarg.vap->va_mask is used to 847 * generate the setattr reply bitmap. We also clear 848 * AT_SIZE below before calling VOP_SPACE. For both 849 * of these cases, the va_mask needs to be saved here 850 * and restored after calling VOP_SETATTR. 851 */ 852 saved_mask = sarg.vap->va_mask; 853 854 /* 855 * Check any possible conflict due to NBMAND locks. 856 * Get into critical region before VOP_GETATTR, so the 857 * size attribute is valid when checking conflicts. 858 */ 859 if (nbl_need_check(vp)) { 860 nbl_start_crit(vp, RW_READER); 861 in_crit = 1; 862 } 863 864 bva.va_mask = AT_UID|AT_SIZE; 865 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) { 866 status = puterrno4(error); 867 goto done; 868 } 869 870 if (in_crit) { 871 if (sarg.vap->va_size < bva.va_size) { 872 offset = sarg.vap->va_size; 873 length = bva.va_size - sarg.vap->va_size; 874 } else { 875 offset = bva.va_size; 876 length = sarg.vap->va_size - bva.va_size; 877 } 878 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 879 &ct)) { 880 status = NFS4ERR_LOCKED; 881 goto done; 882 } 883 } 884 885 if (crgetuid(cr) == bva.va_uid) { 886 sarg.vap->va_mask &= ~AT_SIZE; 887 bf.l_type = F_WRLCK; 888 bf.l_whence = 0; 889 bf.l_start = (off64_t)sarg.vap->va_size; 890 bf.l_len = 0; 891 bf.l_sysid = 0; 892 bf.l_pid = 0; 893 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 894 (offset_t)sarg.vap->va_size, cr, &ct); 895 } 896 } 897 898 if (!error && sarg.vap->va_mask != 0) 899 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct); 900 901 /* restore va_mask -- ufs_setattr clears AT_SIZE */ 902 if (saved_mask & AT_SIZE) 903 sarg.vap->va_mask |= AT_SIZE; 904 905 /* 906 * If an ACL was being set, it has been delayed until now, 907 * in order to set the mode (via the VOP_SETATTR() above) first. 908 */ 909 if (! error && ATTR_ISSET(fattrp->attrmask, ACL)) { 910 int i; 911 912 for (i = 0; i < ntov.attrcnt; i++) 913 if (ntov.amap[i] == FATTR4_ACL) 914 break; 915 if (i < ntov.attrcnt) { 916 error = (*nvmap[FATTR4_ACL].sv_getit)(NFS4ATTR_SETIT, 917 &sarg, &ntov.na[i]); 918 if (error == 0) { 919 ATTR_SET(*resp, ACL); 920 } else if (error == ENOTSUP) { 921 (void) rfs4_verify_attr(&sarg, resp, &ntov); 922 status = NFS4ERR_ATTRNOTSUPP; 923 goto done; 924 } 925 } else { 926 error = EINVAL; 927 } 928 } 929 930 if (! error && ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) { 931 /* 932 * Store layout hint. Layout hint will be stored 933 * in file struct (which means it can only be set 934 * when the file is open). If layout hint is allowed 935 * for files not open, then it must be stored 936 * persistently. 937 * 938 * status assignment placates lint. it will 939 * be replaced with code to store the layout 940 * hint. 941 */ 942 status = NFS4_OK; 943 } 944 945 if (error) { 946 /* check if a monitor detected a delegation conflict */ 947 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 948 status = NFS4ERR_DELAY; 949 else 950 status = puterrno4(error); 951 952 /* 953 * Set the response bitmap when setattr failed. 954 * If VOP_SETATTR partially succeeded, test by doing a 955 * VOP_GETATTR on the object and comparing the data 956 * to the setattr arguments. 957 */ 958 (void) rfs4_verify_attr(&sarg, resp, &ntov); 959 } else { 960 /* 961 * Force modified metadata out to stable storage. 962 */ 963 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 964 /* 965 * Set response bitmap 966 */ 967 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp); 968 } 969 970 /* Return early and already have a NFSv4 error */ 971 done: 972 /* 973 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr 974 * conversion sets both readable and writeable NFS4 attrs 975 * for AT_MTIME and AT_ATIME. The line below masks out 976 * unrequested attrs from the setattr result bitmap. This 977 * is placed after the done: label to catch the ATTRNOTSUP 978 * case. 979 */ 980 ATTRMAP_MASK(*resp, fattrp->attrmask); 981 982 if (in_crit) 983 nbl_end_crit(vp); 984 985 nfs4_ntov_table_free(&ntov, &sarg); 986 987 return (status); 988 } 989 990 /* ARGSUSED */ 991 void 992 mds_op_secinfonn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 993 compound_state_t *cs) 994 { 995 SECINFO_NO_NAME4res *respnn; 996 int dotdot; 997 998 DTRACE_NFSV4_1(op__secinfo__no__name__start, 999 struct compound_state *, cs); 1000 1001 respnn = &resop->nfs_resop4_u.opsecinfo_no_name; 1002 1003 /* 1004 * Current file handle (cfh) should have been set before 1005 * getting into this function. If not, return error. 1006 */ 1007 if (cs->vp == NULL) { 1008 *cs->statusp = respnn->status = NFS4ERR_NOFILEHANDLE; 1009 goto final; 1010 } 1011 1012 dotdot = 1013 (argop->nfs_argop4_u.opsecinfo_no_name == SECINFO_STYLE4_PARENT); 1014 1015 *cs->statusp = respnn->status = do_rfs4_op_secinfo(cs, NULL, 1016 dotdot, (SECINFO4res *)respnn); 1017 1018 final: 1019 DTRACE_NFSV4_2(op__secinfo__no__name__done, 1020 struct compound_state *, cs, 1021 SECINFO_NO_NAME4res *, respnn); 1022 } 1023 1024 /* ARGSUSED */ 1025 void 1026 mds_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1027 compound_state_t *cs) 1028 { 1029 SECINFO4res *resp; 1030 utf8string *utfnm; 1031 uint_t len, dotdot; 1032 char *nm; 1033 1034 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo; 1035 1036 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs, 1037 SECINFO4args *, args); 1038 1039 resp = &resop->nfs_resop4_u.opsecinfo; 1040 1041 /* 1042 * Current file handle (cfh) should have been set before 1043 * getting into this function. If not, return error. 1044 */ 1045 if (cs->vp == NULL) { 1046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1047 goto final; 1048 } 1049 if (cs->vp->v_type != VDIR) { 1050 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1051 goto final; 1052 } 1053 1054 /* 1055 * Verify the component name. If failed, error out, but 1056 * do not error out if the component name is a "..". 1057 * SECINFO will return its parents secinfo data for SECINFO "..". 1058 */ 1059 utfnm = &argop->nfs_argop4_u.opsecinfo.name; 1060 if (!utf8_dir_verify(utfnm)) { 1061 if (utfnm->utf8string_len != 2 || 1062 utfnm->utf8string_val[0] != '.' || 1063 utfnm->utf8string_val[1] != '.') { 1064 *cs->statusp = resp->status = NFS4ERR_INVAL; 1065 goto final; 1066 } 1067 dotdot = 1; 1068 } else 1069 dotdot = 0; 1070 1071 nm = utf8_to_str(utfnm, &len, NULL); 1072 if (nm == NULL) { 1073 *cs->statusp = resp->status = NFS4ERR_INVAL; 1074 goto final; 1075 } 1076 1077 if (len > MAXNAMELEN) { 1078 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1079 kmem_free(nm, len); 1080 goto final; 1081 } 1082 1083 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, dotdot, resp); 1084 1085 kmem_free(nm, len); 1086 1087 final: 1088 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs, 1089 SECINFO4res *, resp); 1090 } 1091 1092 /* 1093 * verify and nverify are exactly the same, except that nverify 1094 * succeeds when some argument changed, and verify succeeds when 1095 * when none changed. 1096 */ 1097 1098 /* ARGSUSED */ 1099 void 1100 mds_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1101 compound_state_t *cs) 1102 { 1103 VERIFY4args *args = &argop->nfs_argop4_u.opverify; 1104 VERIFY4res *resp = &resop->nfs_resop4_u.opverify; 1105 int error; 1106 struct nfs4_svgetit_arg sarg; 1107 struct statvfs64 sb; 1108 struct nfs4_ntov_table ntov; 1109 1110 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs, 1111 VERIFY4args *, args); 1112 1113 if (cs->vp == NULL) { 1114 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1115 goto final; 1116 } 1117 1118 sarg.sbp = &sb; 1119 nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs)); 1120 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 1121 &sarg, &ntov, NFS4ATTR_VERIT); 1122 if (resp->status != NFS4_OK) { 1123 /* 1124 * do_rfs4_set_attrs will try to verify systemwide attrs, 1125 * so could return -1 for "no match". 1126 */ 1127 if (resp->status == -1) 1128 resp->status = NFS4ERR_NOT_SAME; 1129 goto done; 1130 } 1131 error = rfs4_verify_attr(&sarg, NULL, &ntov); 1132 switch (error) { 1133 case 0: 1134 resp->status = NFS4_OK; 1135 break; 1136 case -1: 1137 resp->status = NFS4ERR_NOT_SAME; 1138 break; 1139 default: 1140 resp->status = puterrno4(error); 1141 break; 1142 } 1143 done: 1144 *cs->statusp = resp->status; 1145 nfs4_ntov_table_free(&ntov, &sarg); 1146 1147 final: 1148 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs, 1149 VERIFY4res *, resp); 1150 } 1151 1152 /* ARGSUSED */ 1153 void 1154 mds_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1155 compound_state_t *cs) 1156 { 1157 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify; 1158 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify; 1159 int error; 1160 struct nfs4_svgetit_arg sarg; 1161 struct statvfs64 sb; 1162 struct nfs4_ntov_table ntov; 1163 1164 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs, 1165 NVERIFY4args *, args); 1166 1167 if (cs->vp == NULL) { 1168 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1169 goto final; 1170 } 1171 sarg.sbp = &sb; 1172 nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs)); 1173 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 1174 &sarg, &ntov, NFS4ATTR_VERIT); 1175 if (resp->status != NFS4_OK) { 1176 /* 1177 * do_rfs4_set_attrs will try to verify systemwide attrs, 1178 * so could return -1 for "no match". 1179 */ 1180 if (resp->status == -1) 1181 resp->status = NFS4_OK; 1182 goto done; 1183 } 1184 error = rfs4_verify_attr(&sarg, NULL, &ntov); 1185 switch (error) { 1186 case 0: 1187 resp->status = NFS4ERR_SAME; 1188 break; 1189 case -1: 1190 resp->status = NFS4_OK; 1191 break; 1192 default: 1193 resp->status = puterrno4(error); 1194 break; 1195 } 1196 done: 1197 *cs->statusp = resp->status; 1198 nfs4_ntov_table_free(&ntov, &sarg); 1199 1200 final: 1201 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 1202 NVERIFY4res *, resp); 1203 1204 } 1205 1206 /* ARGSUSED */ 1207 void 1208 mds_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1209 compound_state_t *cs) 1210 { 1211 ACCESS4args *args = &argop->nfs_argop4_u.opaccess; 1212 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess; 1213 int error; 1214 vnode_t *vp; 1215 struct vattr va; 1216 int checkwriteperm; 1217 cred_t *cr = cs->cr; 1218 bslabel_t *clabel, *slabel; 1219 ts_label_t *tslabel; 1220 boolean_t admin_low_client; 1221 1222 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs, 1223 ACCESS4args *, args); 1224 1225 if (cs->vp == NULL) { 1226 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1227 goto final; 1228 } 1229 1230 ASSERT(cr != NULL); 1231 1232 vp = cs->vp; 1233 1234 /* 1235 * If the file system is exported read only, it is not appropriate 1236 * to check write permissions for regular files and directories. 1237 * Special files are interpreted by the client, so the underlying 1238 * permissions are sent back to the client for interpretation. 1239 */ 1240 if (rdonly4(cs->exi, cs->vp, req) && 1241 (vp->v_type == VREG || vp->v_type == VDIR)) 1242 checkwriteperm = 0; 1243 else 1244 checkwriteperm = 1; 1245 1246 /* 1247 * XXX 1248 * We need the mode so that we can correctly determine access 1249 * permissions relative to a mandatory lock file. Access to 1250 * mandatory lock files is denied on the server, so it might 1251 * as well be reflected to the server during the open. 1252 */ 1253 va.va_mask = AT_MODE; 1254 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1255 if (error) { 1256 *cs->statusp = resp->status = puterrno4(error); 1257 goto final; 1258 } 1259 resp->access = 0; 1260 resp->supported = 0; 1261 1262 if (is_system_labeled()) { 1263 ASSERT(req->rq_label != NULL); 1264 clabel = req->rq_label; 1265 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *, 1266 "got client label from request(1)", 1267 struct svc_req *, req); 1268 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1269 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) { 1270 *cs->statusp = resp->status = puterrno4(EACCES); 1271 goto final; 1272 } 1273 slabel = label2bslabel(tslabel); 1274 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel, 1275 char *, "got server label(1) for vp(2)", 1276 bslabel_t *, slabel, vnode_t *, vp); 1277 1278 admin_low_client = B_FALSE; 1279 } else 1280 admin_low_client = B_TRUE; 1281 } 1282 1283 if (args->access & ACCESS4_READ) { 1284 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 1285 if (!error && !MANDLOCK(vp, va.va_mode) && 1286 (!is_system_labeled() || admin_low_client || 1287 bldominates(clabel, slabel))) 1288 resp->access |= ACCESS4_READ; 1289 resp->supported |= ACCESS4_READ; 1290 } 1291 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) { 1292 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1293 if (!error && (!is_system_labeled() || admin_low_client || 1294 bldominates(clabel, slabel))) 1295 resp->access |= ACCESS4_LOOKUP; 1296 resp->supported |= ACCESS4_LOOKUP; 1297 } 1298 if (checkwriteperm && 1299 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) { 1300 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1301 if (!error && !MANDLOCK(vp, va.va_mode) && 1302 (!is_system_labeled() || admin_low_client || 1303 blequal(clabel, slabel))) 1304 resp->access |= 1305 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND)); 1306 resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND); 1307 } 1308 1309 if (checkwriteperm && 1310 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) { 1311 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1312 if (!error && (!is_system_labeled() || admin_low_client || 1313 blequal(clabel, slabel))) 1314 resp->access |= ACCESS4_DELETE; 1315 resp->supported |= ACCESS4_DELETE; 1316 } 1317 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) { 1318 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1319 if (!error && !MANDLOCK(vp, va.va_mode) && 1320 (!is_system_labeled() || admin_low_client || 1321 bldominates(clabel, slabel))) 1322 resp->access |= ACCESS4_EXECUTE; 1323 resp->supported |= ACCESS4_EXECUTE; 1324 } 1325 1326 if (is_system_labeled() && !admin_low_client) 1327 label_rele(tslabel); 1328 1329 *cs->statusp = resp->status = NFS4_OK; 1330 1331 final: 1332 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs, 1333 ACCESS4res *, resp); 1334 } 1335 1336 /* ARGSUSED */ 1337 static void 1338 mds_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1339 compound_state_t *cs) 1340 { 1341 COMMIT4args *args = &argop->nfs_argop4_u.opcommit; 1342 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit; 1343 int error; 1344 vnode_t *vp = cs->vp; 1345 cred_t *cr = cs->cr; 1346 vattr_t va; 1347 caller_context_t ct; 1348 1349 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, 1350 COMMIT4args *, args); 1351 1352 if (vp == NULL) { 1353 /* 1354 * XXX kludge: fake the commit if we are a data server 1355 * This will be replaced once we have nnop_commit(). 1356 */ 1357 if (cs->nn != NULL) { 1358 *cs->statusp = resp->status = NFS4_OK; 1359 resp->writeverf = cs->instp->Write4verf; 1360 } else { 1361 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1362 } 1363 goto final; 1364 } 1365 if (cs->access == CS_ACCESS_DENIED) { 1366 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1367 goto final; 1368 } 1369 1370 if (args->offset + args->count < args->offset) { 1371 *cs->statusp = resp->status = NFS4ERR_INVAL; 1372 goto final; 1373 } 1374 1375 ct.cc_sysid = 0; 1376 ct.cc_pid = 0; 1377 ct.cc_caller_id = cs->instp->caller_id; 1378 ct.cc_flags = CC_DONTBLOCK; 1379 1380 va.va_mask = AT_UID; 1381 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1382 1383 /* 1384 * If we can't get the attributes, then we can't do the 1385 * right access checking. So, we'll fail the request. 1386 */ 1387 if (error) { 1388 *cs->statusp = resp->status = puterrno4(error); 1389 goto final; 1390 } 1391 if (rdonly4(cs->exi, cs->vp, req)) { 1392 *cs->statusp = resp->status = NFS4ERR_ROFS; 1393 goto final; 1394 } 1395 1396 if (vp->v_type != VREG) { 1397 if (vp->v_type == VDIR) 1398 resp->status = NFS4ERR_ISDIR; 1399 else 1400 resp->status = NFS4ERR_INVAL; 1401 *cs->statusp = resp->status; 1402 goto final; 1403 } 1404 1405 if (crgetuid(cr) != va.va_uid && 1406 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, &ct))) { 1407 *cs->statusp = resp->status = puterrno4(error); 1408 goto final; 1409 } 1410 1411 error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, &ct); 1412 if (!error) 1413 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1414 1415 if (error) { 1416 *cs->statusp = resp->status = puterrno4(error); 1417 goto final; 1418 } 1419 1420 *cs->statusp = resp->status = NFS4_OK; 1421 resp->writeverf = cs->instp->Write4verf; 1422 1423 final: 1424 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, 1425 COMMIT4res *, resp); 1426 } 1427 1428 /* 1429 * rfs4_op_mknod is called from rfs4_op_create after all initial verification 1430 * was completed. It does the nfsv4 create for special files. 1431 * 1432 * nfsv4 create is used to create non-regular files. For regular files, 1433 * use nfsv4 open. 1434 */ 1435 /* ARGSUSED */ 1436 static void 1437 mds_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1438 compound_state_t *cs) 1439 { 1440 CREATE4args *args = &argop->nfs_argop4_u.opcreate; 1441 CREATE4res *resp = &resop->nfs_resop4_u.opcreate; 1442 int error; 1443 struct vattr bva, iva, iva2, ava, *vap; 1444 cred_t *cr = cs->cr; 1445 vnode_t *dvp = cs->vp; 1446 vnode_t *vp = NULL; 1447 vnode_t *realvp; 1448 char *nm, *lnm; 1449 uint_t len, llen; 1450 int syncval = 0; 1451 struct nfs4_svgetit_arg sarg; 1452 struct nfs4_ntov_table ntov; 1453 struct statvfs64 sb; 1454 nfsstat4 status; 1455 caller_context_t ct; 1456 1457 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs, 1458 CREATE4args *, args); 1459 1460 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1461 1462 if (dvp == NULL) { 1463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1464 goto final; 1465 } 1466 1467 /* 1468 * If there is an unshared filesystem mounted on this vnode, 1469 * do not allow to create an object in this directory. 1470 */ 1471 if (vn_ismntpt(dvp)) { 1472 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1473 goto final; 1474 } 1475 1476 ct.cc_sysid = 0; 1477 ct.cc_pid = 0; 1478 ct.cc_caller_id = cs->instp->caller_id; 1479 ct.cc_flags = CC_DONTBLOCK; 1480 1481 /* Verify that type is correct */ 1482 switch (args->type) { 1483 case NF4LNK: 1484 case NF4BLK: 1485 case NF4CHR: 1486 case NF4SOCK: 1487 case NF4FIFO: 1488 case NF4DIR: 1489 break; 1490 default: 1491 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1492 goto final; 1493 }; 1494 1495 if (cs->access == CS_ACCESS_DENIED) { 1496 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1497 goto final; 1498 } 1499 if (dvp->v_type != VDIR) { 1500 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1501 goto final; 1502 } 1503 if (!utf8_dir_verify(&args->objname)) { 1504 *cs->statusp = resp->status = NFS4ERR_INVAL; 1505 goto final; 1506 } 1507 1508 if (rdonly4(cs->exi, cs->vp, req)) { 1509 *cs->statusp = resp->status = NFS4ERR_ROFS; 1510 goto final; 1511 } 1512 1513 /* 1514 * Name of newly created object 1515 */ 1516 nm = utf8_to_fn(&args->objname, &len, NULL); 1517 if (nm == NULL) { 1518 *cs->statusp = resp->status = NFS4ERR_INVAL; 1519 goto final; 1520 } 1521 1522 if (len > MAXNAMELEN) { 1523 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1524 kmem_free(nm, len); 1525 goto final; 1526 } 1527 1528 sarg.sbp = &sb; 1529 nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs)); 1530 1531 status = do_rfs4_set_attrs(&resp->attrset, 1532 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT); 1533 1534 if (sarg.vap->va_mask == 0 && status == NFS4_OK) 1535 status = NFS4ERR_INVAL; 1536 1537 if (status != NFS4_OK) { 1538 *cs->statusp = resp->status = status; 1539 kmem_free(nm, len); 1540 nfs4_ntov_table_free(&ntov, &sarg); 1541 1542 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1543 goto final; 1544 } 1545 1546 /* Get "before" change value */ 1547 bva.va_mask = AT_CTIME|AT_SEQ; 1548 error = VOP_GETATTR(dvp, &bva, 0, cr, &ct); 1549 if (error) { 1550 *cs->statusp = resp->status = puterrno4(error); 1551 kmem_free(nm, len); 1552 nfs4_ntov_table_free(&ntov, &sarg); 1553 1554 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1555 goto final; 1556 } 1557 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime) 1558 1559 vap = sarg.vap; 1560 1561 /* 1562 * Set default initial values for attributes when not specified 1563 * in createattrs. 1564 */ 1565 if ((vap->va_mask & AT_UID) == 0) { 1566 vap->va_uid = crgetuid(cr); 1567 vap->va_mask |= AT_UID; 1568 } 1569 if ((vap->va_mask & AT_GID) == 0) { 1570 vap->va_gid = crgetgid(cr); 1571 vap->va_mask |= AT_GID; 1572 } 1573 1574 vap->va_mask |= AT_TYPE; 1575 switch (args->type) { 1576 case NF4DIR: 1577 vap->va_type = VDIR; 1578 if ((vap->va_mask & AT_MODE) == 0) { 1579 vap->va_mode = 0700; /* default: owner rwx only */ 1580 vap->va_mask |= AT_MODE; 1581 } 1582 error = VOP_MKDIR(dvp, nm, vap, &vp, cr, &ct, 0, NULL); 1583 if (error) 1584 break; 1585 1586 /* 1587 * Get the initial "after" sequence number, if it fails, 1588 * set to zero 1589 */ 1590 iva.va_mask = AT_SEQ; 1591 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct)) 1592 iva.va_seq = 0; 1593 break; 1594 case NF4LNK: 1595 vap->va_type = VLNK; 1596 if ((vap->va_mask & AT_MODE) == 0) { 1597 vap->va_mode = 0700; /* default: owner rwx only */ 1598 vap->va_mask |= AT_MODE; 1599 } 1600 1601 /* 1602 * symlink names must be treated as data 1603 */ 1604 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL); 1605 1606 if (lnm == NULL) { 1607 *cs->statusp = resp->status = NFS4ERR_INVAL; 1608 kmem_free(nm, len); 1609 nfs4_ntov_table_free(&ntov, &sarg); 1610 resp->attrset = 1611 NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1612 goto final; 1613 } 1614 1615 if (llen > MAXPATHLEN) { 1616 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1617 kmem_free(nm, len); 1618 kmem_free(lnm, llen); 1619 nfs4_ntov_table_free(&ntov, &sarg); 1620 resp->attrset = 1621 NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1622 goto final; 1623 } 1624 1625 error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, &ct, 0); 1626 if (lnm != NULL) 1627 kmem_free(lnm, llen); 1628 if (error) 1629 break; 1630 1631 /* 1632 * Get the initial "after" sequence number, if it fails, 1633 * set to zero 1634 */ 1635 iva.va_mask = AT_SEQ; 1636 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct)) 1637 iva.va_seq = 0; 1638 1639 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, 1640 &ct, 0, NULL); 1641 if (error) 1642 break; 1643 1644 /* 1645 * va_seq is not safe over VOP calls, check it again 1646 * if it has changed zero out iva to force atomic = FALSE. 1647 */ 1648 iva2.va_mask = AT_SEQ; 1649 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, &ct) || 1650 iva2.va_seq != iva.va_seq) 1651 iva.va_seq = 0; 1652 break; 1653 default: 1654 /* 1655 * probably a special file. 1656 */ 1657 if ((vap->va_mask & AT_MODE) == 0) { 1658 vap->va_mode = 0600; /* default: owner rw only */ 1659 vap->va_mask |= AT_MODE; 1660 } 1661 syncval = FNODSYNC; 1662 /* 1663 * We know this will only generate one VOP call 1664 */ 1665 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm); 1666 1667 if (vp == NULL) { 1668 kmem_free(nm, len); 1669 nfs4_ntov_table_free(&ntov, &sarg); 1670 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1671 goto final; 1672 } 1673 1674 /* 1675 * Get the initial "after" sequence number, if it fails, 1676 * set to zero 1677 */ 1678 iva.va_mask = AT_SEQ; 1679 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct)) 1680 iva.va_seq = 0; 1681 1682 break; 1683 } 1684 kmem_free(nm, len); 1685 1686 if (error) { 1687 *cs->statusp = resp->status = puterrno4(error); 1688 } 1689 1690 /* 1691 * Force modified data and metadata out to stable storage. 1692 */ 1693 (void) VOP_FSYNC(dvp, 0, cr, &ct); 1694 1695 if (resp->status != NFS4_OK) { 1696 if (vp != NULL) 1697 VN_RELE(vp); 1698 nfs4_ntov_table_free(&ntov, &sarg); 1699 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1700 goto final; 1701 } 1702 1703 /* 1704 * Finish setup of cinfo response, "before" value already set. 1705 * Get "after" change value, if it fails, simply return the 1706 * before value. 1707 */ 1708 ava.va_mask = AT_CTIME|AT_SEQ; 1709 if (VOP_GETATTR(dvp, &ava, 0, cr, &ct)) { 1710 ava.va_ctime = bva.va_ctime; 1711 ava.va_seq = 0; 1712 } 1713 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime); 1714 1715 /* 1716 * True verification that object was created with correct 1717 * attrs is impossible. The attrs could have been changed 1718 * immediately after object creation. If attributes did 1719 * not verify, the only recourse for the server is to 1720 * destroy the object. Maybe if some attrs (like gid) 1721 * are set incorrectly, the object should be destroyed; 1722 * however, seems bad as a default policy. Do we really 1723 * want to destroy an object over one of the times not 1724 * verifying correctly? For these reasons, the server 1725 * currently sets bits in attrset for createattrs 1726 * that were set; however, no verification is done. 1727 * 1728 * vmask_to_nmask accounts for vattr bits set on create 1729 * [do_rfs4_set_attrs() only sets resp bits for 1730 * non-vattr/vfs bits.] 1731 * Mask off any bits set by default so as not to return 1732 * more attrset bits than were requested in createattrs 1733 */ 1734 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset, 1735 RFS4_ATTRVERS(cs)); 1736 ATTRMAP_MASK(resp->attrset, args->createattrs.attrmask); 1737 nfs4_ntov_table_free(&ntov, &sarg); 1738 1739 error = mknfs41_fh(&cs->fh, vp, cs->exi); 1740 if (error) { 1741 *cs->statusp = resp->status = puterrno4(error); 1742 } 1743 1744 /* 1745 * The cinfo.atomic = TRUE only if we got no errors, we have 1746 * non-zero va_seq's, and it has incremented by exactly one 1747 * during the creation and it didn't change during the VOP_LOOKUP 1748 * or VOP_FSYNC. 1749 */ 1750 if (!error && bva.va_seq && iva.va_seq && ava.va_seq && 1751 iva.va_seq == (bva.va_seq + 1) && 1752 iva.va_seq == ava.va_seq) 1753 resp->cinfo.atomic = TRUE; 1754 else 1755 resp->cinfo.atomic = FALSE; 1756 1757 /* 1758 * Force modified metadata out to stable storage. 1759 * 1760 * if a underlying vp exists, pass it to VOP_FSYNC 1761 */ 1762 if (VOP_REALVP(vp, &realvp, &ct) == 0) 1763 (void) VOP_FSYNC(realvp, syncval, cr, &ct); 1764 else 1765 (void) VOP_FSYNC(vp, syncval, cr, &ct); 1766 1767 if (resp->status != NFS4_OK) { 1768 VN_RELE(vp); 1769 goto final; 1770 } 1771 if (cs->vp) 1772 VN_RELE(cs->vp); 1773 1774 cs->vp = vp; 1775 *cs->statusp = resp->status = NFS4_OK; 1776 1777 final: 1778 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs, 1779 CREATE4res *, resp); 1780 } 1781 1782 1783 /*ARGSUSED*/ 1784 static void 1785 mds_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1786 compound_state_t *cs) 1787 { 1788 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn; 1789 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn; 1790 rfs4_deleg_state_t *dsp; 1791 nfsstat4 status; 1792 1793 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs, 1794 DELEGRETURN4args *, args); 1795 1796 status = rfs4_get_deleg_state(cs, &args->deleg_stateid, &dsp); 1797 resp->status = *cs->statusp = status; 1798 if (status != NFS4_OK) 1799 goto final; 1800 1801 /* Ensure specified filehandle matches */ 1802 if (cs->vp != dsp->rds_finfo->rf_vp) { 1803 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID; 1804 } else 1805 rfs4_return_deleg(dsp, FALSE); 1806 1807 rfs4_update_lease(dsp->rds_client); 1808 1809 rfs4_deleg_state_rele(dsp); 1810 1811 final: 1812 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs, 1813 DELEGRETURN4res *, resp); 1814 } 1815 1816 1817 1818 /* ARGSUSED */ 1819 static void 1820 mds_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1821 compound_state_t *cs) 1822 { 1823 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr; 1824 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 1825 struct nfs4_svgetit_arg sarg; 1826 struct statvfs64 sb; 1827 nfsstat4 status; 1828 1829 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs, 1830 GETATTR4args *, args); 1831 1832 if (cs->vp == NULL) { 1833 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1834 goto final; 1835 } 1836 1837 if (cs->access == CS_ACCESS_DENIED) { 1838 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1839 goto final; 1840 } 1841 1842 sarg.sbp = &sb; 1843 sarg.cs = cs; 1844 1845 status = attrmap4_to_vattrmask(&args->attr_request, &sarg); 1846 if (status == NFS4_OK) { 1847 status = bitmap4_get_sysattrs(&sarg); 1848 if (status == NFS4_OK) 1849 status = do_rfs4_op_getattr(&args->attr_request, 1850 &resp->obj_attributes, &sarg); 1851 } 1852 *cs->statusp = resp->status = status; 1853 1854 final: 1855 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs, 1856 GETATTR4res *, resp); 1857 } 1858 1859 /*ARGSUSED*/ 1860 void 1861 mds_op_getattr_free(nfs_resop4 *resop, compound_state_t *cs) 1862 { 1863 /* Common function for NFSv4.0 and NFSv4.1 */ 1864 rfs4_op_getattr_free(resop); 1865 } 1866 1867 /* ARGSUSED */ 1868 static void 1869 mds_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1870 compound_state_t *cs) 1871 { 1872 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 1873 1874 DTRACE_NFSV4_1(op__getfh__start, 1875 struct compound_state *, cs); 1876 1877 if (cs->vp == NULL) { 1878 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1879 goto final; 1880 } 1881 if (cs->access == CS_ACCESS_DENIED) { 1882 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1883 goto final; 1884 } 1885 1886 resp->object.nfs_fh4_val = 1887 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); 1888 nfs_fh4_copy(&cs->fh, &resp->object); 1889 *cs->statusp = resp->status = NFS4_OK; 1890 1891 final: 1892 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs, 1893 GETFH4res *, resp); 1894 } 1895 1896 /*ARGSUSED*/ 1897 static void 1898 mds_op_getfh_free(nfs_resop4 *resop, compound_state_t *cs) 1899 { 1900 /* Common function for NFSv4.0 and NFSv4.1 */ 1901 rfs4_op_getfh_free(resop); 1902 } 1903 1904 /* 1905 * link: args: SAVED_FH: file, CURRENT_FH: target directory 1906 * res: status. If success - CURRENT_FH unchanged, return change_info 1907 */ 1908 /* ARGSUSED */ 1909 static void 1910 mds_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1911 compound_state_t *cs) 1912 { 1913 LINK4args *args = &argop->nfs_argop4_u.oplink; 1914 LINK4res *resp = &resop->nfs_resop4_u.oplink; 1915 int error; 1916 vnode_t *vp; 1917 vnode_t *dvp; 1918 struct vattr bdva, idva, adva; 1919 char *nm; 1920 uint_t len; 1921 caller_context_t ct; 1922 1923 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs, 1924 LINK4args *, args); 1925 1926 /* SAVED_FH: source object */ 1927 vp = cs->saved_vp; 1928 if (vp == NULL) { 1929 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1930 goto final; 1931 } 1932 1933 /* CURRENT_FH: target directory */ 1934 dvp = cs->vp; 1935 if (dvp == NULL) { 1936 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1937 goto final; 1938 } 1939 1940 /* 1941 * If there is a non-shared filesystem mounted on this vnode, 1942 * do not allow to link any file in this directory. 1943 */ 1944 if (vn_ismntpt(dvp)) { 1945 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1946 goto final; 1947 } 1948 1949 if (cs->access == CS_ACCESS_DENIED) { 1950 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1951 goto final; 1952 } 1953 1954 /* Check source object's type validity */ 1955 if (vp->v_type == VDIR) { 1956 *cs->statusp = resp->status = NFS4ERR_ISDIR; 1957 goto final; 1958 } 1959 1960 /* Check target directory's type */ 1961 if (dvp->v_type != VDIR) { 1962 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1963 goto final; 1964 } 1965 1966 if (cs->saved_exi != cs->exi) { 1967 *cs->statusp = resp->status = NFS4ERR_XDEV; 1968 goto final; 1969 } 1970 1971 if (!utf8_dir_verify(&args->newname)) { 1972 *cs->statusp = resp->status = NFS4ERR_INVAL; 1973 goto final; 1974 } 1975 1976 nm = utf8_to_fn(&args->newname, &len, NULL); 1977 if (nm == NULL) { 1978 *cs->statusp = resp->status = NFS4ERR_INVAL; 1979 goto final; 1980 } 1981 1982 if (len > MAXNAMELEN) { 1983 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1984 kmem_free(nm, len); 1985 goto final; 1986 } 1987 1988 if (rdonly4(cs->exi, cs->vp, req)) { 1989 *cs->statusp = resp->status = NFS4ERR_ROFS; 1990 kmem_free(nm, len); 1991 goto final; 1992 } 1993 1994 ct.cc_sysid = 0; 1995 ct.cc_pid = 0; 1996 ct.cc_caller_id = cs->instp->caller_id; 1997 ct.cc_flags = CC_DONTBLOCK; 1998 1999 /* Get "before" change value */ 2000 bdva.va_mask = AT_CTIME|AT_SEQ; 2001 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct); 2002 if (error) { 2003 *cs->statusp = resp->status = puterrno4(error); 2004 kmem_free(nm, len); 2005 goto final; 2006 } 2007 2008 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 2009 2010 error = VOP_LINK(dvp, vp, nm, cs->cr, &ct, 0); 2011 2012 kmem_free(nm, len); 2013 2014 /* 2015 * Get the initial "after" sequence number, if it fails, set to zero 2016 */ 2017 idva.va_mask = AT_SEQ; 2018 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct)) 2019 idva.va_seq = 0; 2020 2021 /* 2022 * Force modified data and metadata out to stable storage. 2023 */ 2024 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, &ct); 2025 (void) VOP_FSYNC(dvp, 0, cs->cr, &ct); 2026 2027 if (error) { 2028 *cs->statusp = resp->status = puterrno4(error); 2029 goto final; 2030 } 2031 2032 /* 2033 * Get "after" change value, if it fails, simply return the 2034 * before value. 2035 */ 2036 adva.va_mask = AT_CTIME|AT_SEQ; 2037 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) { 2038 adva.va_ctime = bdva.va_ctime; 2039 adva.va_seq = 0; 2040 } 2041 2042 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 2043 2044 /* 2045 * The cinfo.atomic = TRUE only if we have 2046 * non-zero va_seq's, and it has incremented by exactly one 2047 * during the VOP_LINK and it didn't change during the VOP_FSYNC. 2048 */ 2049 if (bdva.va_seq && idva.va_seq && adva.va_seq && 2050 idva.va_seq == (bdva.va_seq + 1) && 2051 idva.va_seq == adva.va_seq) 2052 resp->cinfo.atomic = TRUE; 2053 else 2054 resp->cinfo.atomic = FALSE; 2055 2056 *cs->statusp = resp->status = NFS4_OK; 2057 2058 final: 2059 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs, 2060 LINK4res *, resp); 2061 } 2062 2063 /* 2064 * Used by mds_op_lookup and mds_op_lookupp to do the actual work. 2065 */ 2066 2067 /* ARGSUSED */ 2068 static nfsstat4 2069 mds_do_lookup(char *nm, uint_t buflen, struct svc_req *req, 2070 struct compound_state *cs) 2071 { 2072 int error; 2073 int different_export = 0; 2074 vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL; 2075 struct exportinfo *exi = NULL, *pre_exi = NULL; 2076 nfsstat4 stat; 2077 fid_t fid; 2078 int attrdir, dotdot, walk; 2079 bool_t is_newvp = FALSE; 2080 caller_context_t ct; 2081 nfs41_fh_fmt_t *fhp; 2082 2083 fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val; 2084 2085 attrdir = ((cs->vp->v_flag & V_XATTRDIR) == V_XATTRDIR) 2086 ? FH41_ATTRDIR : 0; 2087 2088 ASSERT(FH41_GET_FLAG(fhp, FH41_ATTRDIR) == attrdir); 2089 2090 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 2091 2092 /* 2093 * If dotdotting, then need to check whether it's 2094 * above the root of a filesystem, or above an 2095 * export point. 2096 */ 2097 if (dotdot) { 2098 2099 /* 2100 * If dotdotting at the root of a filesystem, then 2101 * need to traverse back to the mounted-on filesystem 2102 * and do the dotdot lookup there. 2103 */ 2104 if (cs->vp->v_flag & VROOT) { 2105 2106 /* 2107 * If at the system root, then can 2108 * go up no further. 2109 */ 2110 if (VN_CMP(cs->vp, rootdir)) 2111 return (puterrno4(ENOENT)); 2112 2113 /* 2114 * Traverse back to the mounted-on filesystem 2115 */ 2116 cs->vp = untraverse(cs->vp); 2117 2118 /* 2119 * Set the different_export flag so we remember 2120 * to pick up a new exportinfo entry for 2121 * this new filesystem. 2122 */ 2123 different_export = 1; 2124 } else { 2125 2126 /* 2127 * If dotdotting above an export point then set 2128 * the different_export to get new export info. 2129 */ 2130 different_export = nfs_exported(cs->exi, cs->vp); 2131 } 2132 } 2133 2134 ct.cc_sysid = 0; 2135 ct.cc_pid = 0; 2136 ct.cc_caller_id = cs->instp->caller_id; 2137 ct.cc_flags = CC_DONTBLOCK; 2138 2139 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr, 2140 &ct, 0, NULL); 2141 if (error) 2142 return (puterrno4(error)); 2143 2144 /* 2145 * If the vnode is in a pseudo filesystem, check whether it is visible. 2146 * 2147 * XXX if the vnode is a symlink and it is not visible in 2148 * a pseudo filesystem, return ENOENT (not following symlink). 2149 * V4 client can not mount such symlink. 2150 * 2151 * In the same exported filesystem, if the security flavor used 2152 * is not an explicitly shared flavor, limit the view to the visible 2153 * list entries only. This is not a WRONGSEC case because it's already 2154 * checked via PUTROOTFH/PUTPUBFH or PUTFH. 2155 */ 2156 if (!different_export && 2157 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 2158 cs->access & CS_ACCESS_LIMITED)) { 2159 if (! nfs_visible(cs->exi, vp, &different_export)) { 2160 VN_RELE(vp); 2161 return (puterrno4(ENOENT)); 2162 } 2163 } 2164 2165 /* 2166 * If it's a mountpoint, then traverse it. 2167 */ 2168 if (vn_ismntpt(vp)) { 2169 pre_exi = cs->exi; /* save pre-traversed exportinfo */ 2170 pre_tvp = vp; /* save pre-traversed vnode */ 2171 2172 /* 2173 * hold pre_tvp to counteract rele by traverse. We will 2174 * need pre_tvp below if checkexport4 fails 2175 */ 2176 VN_HOLD(pre_tvp); 2177 tvp = vp; 2178 if ((error = traverse(&tvp)) != 0) { 2179 VN_RELE(vp); 2180 VN_RELE(pre_tvp); 2181 return (puterrno4(error)); 2182 } 2183 vp = tvp; 2184 different_export = 1; 2185 2186 } else if (vp->v_vfsp != cs->vp->v_vfsp) { 2187 /* 2188 * The vfsp comparison is to handle the case where 2189 * a LOFS mount is shared. lo_lookup traverses mount points, 2190 * and NFS is unaware of local fs transistions because 2191 * v_vfsmountedhere isn't set. For this special LOFS case, 2192 * the dir and the obj returned by lookup will have different 2193 * vfs ptrs. 2194 */ 2195 different_export = 1; 2196 } 2197 2198 if (different_export) { 2199 bzero(&fid, sizeof (fid)); 2200 fid.fid_len = MAXFIDSZ; 2201 error = vop_fid_pseudo(vp, &fid); 2202 if (error) { 2203 VN_RELE(vp); 2204 if (pre_tvp) 2205 VN_RELE(pre_tvp); 2206 return (puterrno4(error)); 2207 } 2208 2209 if (dotdot) 2210 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 2211 else 2212 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 2213 2214 if (exi == NULL) { 2215 if (pre_tvp) { 2216 /* 2217 * If this vnode is a mounted-on vnode, 2218 * but the mounted-on file system is not 2219 * exported, send back the filehandle for 2220 * the mounted-on vnode, not the root of 2221 * the mounted-on file system. 2222 */ 2223 VN_RELE(vp); 2224 vp = pre_tvp; 2225 exi = pre_exi; 2226 } else { 2227 VN_RELE(vp); 2228 return (puterrno4(EACCES)); 2229 } 2230 } else if (pre_tvp) { 2231 /* we're done with pre_tvp now. release extra hold */ 2232 VN_RELE(pre_tvp); 2233 } 2234 2235 cs->exi = exi; 2236 2237 /* 2238 * Now do a checkauth4. 2239 * 2240 * Checking here since the client/principle may not have 2241 * access to the cs->exi exported file system. 2242 * 2243 * If the client has access we also need to validate 2244 * the principle since it may have been re-mapped. 2245 * 2246 * We start with a new credential as a previous call to 2247 * checkauth4(), via a PUT*FH operation, wrote over cs->cr. 2248 */ 2249 crfree(cs->cr); 2250 cs->cr = crdup(cs->basecr); 2251 2252 if (cs->vp) 2253 oldvp = cs->vp; 2254 cs->vp = vp; 2255 is_newvp = TRUE; 2256 2257 stat = call_checkauth4(cs, req); 2258 if (stat != NFS4_OK) { 2259 VN_RELE(cs->vp); 2260 cs->vp = oldvp; 2261 return (stat); 2262 } 2263 } 2264 2265 /* 2266 * After various NFS checks, do a label check on the path 2267 * component. The label on this path should either be the 2268 * global zone's label or a zone's label. We are only 2269 * interested in the zone's label because exported files 2270 * in global zone is accessible (though read-only) to 2271 * clients. The exportability/visibility check is already 2272 * done before reaching this code. 2273 */ 2274 if (is_system_labeled()) { 2275 bslabel_t *clabel; 2276 2277 ASSERT(req->rq_label != NULL); 2278 clabel = req->rq_label; 2279 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *, 2280 "got client label from request(1)", struct svc_req *, req); 2281 2282 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2283 if (!do_rfs_label_check(clabel, vp, 2284 DOMINANCE_CHECK, cs->exi)) { 2285 error = EACCES; 2286 goto err_out; 2287 } 2288 } else { 2289 /* 2290 * We grant access to admin_low label clients 2291 * only if the client is trusted, i.e. also 2292 * running Solaris Trusted Extension. 2293 */ 2294 struct sockaddr *ca; 2295 int addr_type; 2296 void *ipaddr; 2297 tsol_tpc_t *tp; 2298 2299 ca = (struct sockaddr *)svc_getrpccaller( 2300 req->rq_xprt)->buf; 2301 if (ca->sa_family == AF_INET) { 2302 addr_type = IPV4_VERSION; 2303 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 2304 } else if (ca->sa_family == AF_INET6) { 2305 addr_type = IPV6_VERSION; 2306 ipaddr = &((struct sockaddr_in6 *) 2307 ca)->sin6_addr; 2308 } 2309 tp = find_tpc(ipaddr, addr_type, B_FALSE); 2310 if (tp == NULL || tp->tpc_tp.tp_doi != 2311 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 2312 SUN_CIPSO) { 2313 error = EACCES; 2314 goto err_out; 2315 } 2316 } 2317 } 2318 2319 error = mknfs41_fh(&cs->fh, vp, cs->exi); 2320 2321 err_out: 2322 if (error) { 2323 if (is_newvp) { 2324 VN_RELE(cs->vp); 2325 cs->vp = oldvp; 2326 } else 2327 VN_RELE(vp); 2328 return (puterrno4(error)); 2329 } 2330 2331 if (!is_newvp) { 2332 if (cs->vp) 2333 VN_RELE(cs->vp); 2334 cs->vp = vp; 2335 } else if (oldvp) 2336 VN_RELE(oldvp); 2337 2338 /* 2339 * if did lookup on attrdir and didn't lookup .., set named 2340 * attr fh flag 2341 */ 2342 if (attrdir && ! dotdot) 2343 FH41_SET_FLAG(fhp, FH41_NAMEDATTR); 2344 2345 /* Assume false for now, open proc will set this */ 2346 cs->mandlock = FALSE; 2347 2348 return (NFS4_OK); 2349 } 2350 2351 /* ARGSUSED */ 2352 static void 2353 mds_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2354 compound_state_t *cs) 2355 { 2356 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup; 2357 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup; 2358 char *nm; 2359 uint_t len; 2360 2361 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs, 2362 LOOKUP4args *, args); 2363 2364 if (cs->vp == NULL) { 2365 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2366 goto final; 2367 } 2368 2369 if (cs->vp->v_type == VLNK) { 2370 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 2371 goto final; 2372 } 2373 2374 if (cs->vp->v_type != VDIR) { 2375 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2376 goto final; 2377 } 2378 2379 if (!utf8_dir_verify(&args->objname)) { 2380 *cs->statusp = resp->status = NFS4ERR_INVAL; 2381 goto final; 2382 } 2383 2384 nm = utf8_to_str(&args->objname, &len, NULL); 2385 if (nm == NULL) { 2386 *cs->statusp = resp->status = NFS4ERR_INVAL; 2387 goto final; 2388 } 2389 2390 if (len > MAXNAMELEN) { 2391 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2392 kmem_free(nm, len); 2393 goto final; 2394 } 2395 2396 *cs->statusp = resp->status = mds_do_lookup(nm, len, req, cs); 2397 2398 kmem_free(nm, len); 2399 2400 final: 2401 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs, 2402 LOOKUP4res *, resp); 2403 } 2404 2405 /* ARGSUSED */ 2406 static void 2407 mds_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 2408 compound_state_t *cs) 2409 { 2410 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp; 2411 2412 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs); 2413 2414 if (cs->vp == NULL) { 2415 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2416 goto final; 2417 } 2418 2419 if (cs->vp->v_type != VDIR) { 2420 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2421 goto final; 2422 } 2423 2424 *cs->statusp = resp->status = mds_do_lookup("..", 3, req, cs); 2425 2426 /* 2427 * From NFSV4 Specification, LOOKUPP should not check for 2428 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead. 2429 */ 2430 if (resp->status == NFS4ERR_WRONGSEC) { 2431 *cs->statusp = resp->status = NFS4_OK; 2432 } 2433 2434 final: 2435 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs, 2436 LOOKUPP4res *, resp); 2437 } 2438 2439 2440 /*ARGSUSED2*/ 2441 static void 2442 mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2443 compound_state_t *cs) 2444 { 2445 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr; 2446 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr; 2447 vnode_t *avp = NULL; 2448 int lookup_flags = LOOKUP_XATTR, error; 2449 int exp_ro = 0; 2450 caller_context_t ct; 2451 2452 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs, 2453 OPENATTR4args *, args); 2454 2455 if (cs->vp == NULL) { 2456 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2457 goto final; 2458 } 2459 2460 /* 2461 * Make a couple of checks made by copen() 2462 * 2463 * Check to make sure underlying fs supports xattrs. This 2464 * is required because solaris filesystem implementations 2465 * (UFS/TMPFS) don't enforce the noxattr mount option 2466 * in VOP_LOOKUP(LOOKUP_XATTR). If fs doesn't support this 2467 * pathconf cmd or if fs supports cmd but doesn't claim 2468 * support for xattr, return NOTSUPP. It would be better 2469 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however, 2470 * that cmd is not available to VOP_PATHCONF interface 2471 * (it's only implemented inside pathconf syscall)... 2472 * 2473 * Verify permission to put attributes on files (access 2474 * checks from copen). 2475 */ 2476 2477 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) { 2478 error = ENOTSUP; 2479 goto error_out; 2480 } 2481 2482 ct.cc_sysid = 0; 2483 ct.cc_pid = 0; 2484 ct.cc_caller_id = cs->instp->caller_id; 2485 ct.cc_flags = CC_DONTBLOCK; 2486 2487 if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, &ct) != 0) && 2488 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, &ct) != 0) && 2489 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, &ct) != 0)) { 2490 error = EACCES; 2491 goto error_out; 2492 } 2493 2494 /* 2495 * The CREATE_XATTR_DIR VOP flag cannot be specified if 2496 * the file system is exported read-only -- regardless of 2497 * createdir flag. Otherwise the attrdir would be created 2498 * (assuming server fs isn't mounted readonly locally). If 2499 * VOP_LOOKUP returns ENOENT in this case, the error will 2500 * be translated into EROFS. ENOSYS is mapped to ENOTSUP 2501 * because specfs has no VOP_LOOKUP op, so the macro would 2502 * return ENOSYS. EINVAL is returned by all (current) 2503 * Solaris file system implementations when any of their 2504 * restrictions are violated (xattr(dir) can't have xattrdir). 2505 * Returning NOTSUPP is more appropriate in this case 2506 * because the object will never be able to have an attrdir. 2507 */ 2508 if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req))) 2509 lookup_flags |= CREATE_XATTR_DIR; 2510 2511 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, 2512 cs->cr, &ct, 0, NULL); 2513 2514 if (error) { 2515 if (error == ENOENT && args->createdir && exp_ro) 2516 error = EROFS; 2517 else if (error == EINVAL || error == ENOSYS) 2518 error = ENOTSUP; 2519 goto error_out; 2520 } 2521 2522 ASSERT(avp->v_flag & V_XATTRDIR); 2523 2524 error = mknfs41_fh(&cs->fh, avp, cs->exi); 2525 2526 if (error) { 2527 VN_RELE(avp); 2528 goto error_out; 2529 } 2530 2531 VN_RELE(cs->vp); 2532 cs->vp = avp; 2533 2534 /* 2535 * There is no requirement for an attrdir fh flag 2536 * because the attrdir has a vnode flag to distinguish 2537 * it from regular (non-xattr) directories. The 2538 * FH41_ATTRDIR flag is set for future sanity checks. 2539 */ 2540 FH41_SET_FLAG((nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val, FH41_ATTRDIR); 2541 *cs->statusp = resp->status = NFS4_OK; 2542 goto final; 2543 2544 error_out: 2545 2546 *cs->statusp = resp->status = puterrno4(error); 2547 2548 final: 2549 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs, 2550 OPENATTR4res *, resp); 2551 } 2552 2553 static int 2554 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred, 2555 caller_context_t *ct) 2556 { 2557 int error; 2558 int i; 2559 clock_t delaytime; 2560 2561 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 2562 2563 /* 2564 * Don't block on mandatory locks. If this routine returns 2565 * EAGAIN, the caller should return NFS4ERR_LOCKED. 2566 */ 2567 uio->uio_fmode = FNONBLOCK; 2568 2569 for (i = 0; i < rfs4_maxlock_tries; i++) { 2570 if (direction == FREAD) { 2571 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct); 2572 error = VOP_READ(vp, uio, ioflag, cred, ct); 2573 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct); 2574 } else { 2575 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct); 2576 error = VOP_WRITE(vp, uio, ioflag, cred, ct); 2577 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct); 2578 } 2579 2580 if (error != EAGAIN) 2581 break; 2582 2583 if (i < rfs4_maxlock_tries - 1) { 2584 delay(delaytime); 2585 delaytime *= 2; 2586 } 2587 } 2588 2589 return (error); 2590 } 2591 2592 /* ARGSUSED */ 2593 static void 2594 mds_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2595 compound_state_t *cs) 2596 { 2597 READ4args *args = &argop->nfs_argop4_u.opread; 2598 READ4res *resp = &resop->nfs_resop4_u.opread; 2599 int error; 2600 nnode_t *nn = NULL; 2601 struct iovec iov; 2602 struct uio uio; 2603 bool_t *deleg = &cs->deleg; 2604 nfsstat4 stat; 2605 mblk_t *mp; 2606 int alloc_err = 0; 2607 caller_context_t ct; 2608 uint32_t nnioflags = 0; 2609 2610 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs, 2611 READ4args, args); 2612 2613 nn = cs->nn; 2614 if (nn == NULL) { 2615 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2616 goto final; 2617 } 2618 if (cs->access == CS_ACCESS_DENIED) { 2619 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2620 goto final; 2621 } 2622 2623 if ((stat = nnop_check_stateid(nn, cs, FREAD, &args->stateid, 2624 FALSE, deleg, TRUE, &ct, NULL)) != NFS4_OK) { 2625 *cs->statusp = resp->status = stat; 2626 goto final; 2627 } 2628 2629 error = nnop_io_prep(nn, &nnioflags, cs->cr, &ct, args->offset, 2630 args->count, NULL); 2631 if (error != 0) { 2632 *cs->statusp = resp->status = nnode_stat4(error, 1); 2633 goto out; 2634 } 2635 2636 if (nnioflags & NNODE_IO_FLAG_PAST_EOF) { 2637 *cs->statusp = resp->status = NFS4_OK; 2638 resp->eof = TRUE; 2639 resp->data_len = 0; 2640 resp->data_val = NULL; 2641 resp->mblk = NULL; 2642 *cs->statusp = resp->status = NFS4_OK; 2643 goto out; 2644 } 2645 2646 if (args->count == 0) { 2647 *cs->statusp = resp->status = NFS4_OK; 2648 resp->eof = FALSE; 2649 resp->data_len = 0; 2650 resp->data_val = NULL; 2651 resp->mblk = NULL; 2652 goto out; 2653 } 2654 2655 /* 2656 * Do not allocate memory more than maximum allowed 2657 * transfer size 2658 */ 2659 if (args->count > rfs4_tsize(req)) 2660 args->count = rfs4_tsize(req); 2661 2662 if (args->wlist) { 2663 mp = NULL; 2664 (void) rdma_get_wchunk(req, &iov, args->wlist); 2665 } else { 2666 /* 2667 * mp will contain the data to be sent out in the read reply. 2668 * It will be freed after the reply has been sent. 2669 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, 2670 * so that the call to xdrmblk_putmblk() never fails. 2671 * If the first alloc of the requested size fails, then 2672 * decrease the size to something more reasonable and wait 2673 * for the allocation to occur. 2674 */ 2675 mp = allocb(RNDUP(args->count), BPRI_MED); 2676 if (mp == NULL) { 2677 if (args->count > MAXBSIZE) 2678 args->count = MAXBSIZE; 2679 mp = allocb_wait(RNDUP(args->count), BPRI_MED, 2680 STR_NOSIG, &alloc_err); 2681 } 2682 ASSERT(mp != NULL); 2683 ASSERT(alloc_err == 0); 2684 2685 iov.iov_base = (caddr_t)mp->b_datap->db_base; 2686 iov.iov_len = args->count; 2687 } 2688 2689 uio.uio_iov = &iov; 2690 uio.uio_iovcnt = 1; 2691 uio.uio_segflg = UIO_SYSSPACE; 2692 uio.uio_extflg = UIO_COPY_CACHED; 2693 uio.uio_loffset = args->offset; 2694 uio.uio_resid = args->count; 2695 2696 error = nnop_read(nn, &nnioflags, cs->cr, &ct, &uio, 0); 2697 if (error) { 2698 if (mp != NULL) 2699 freeb(mp); 2700 *cs->statusp = resp->status = nnode_stat4(error, 1); 2701 goto out; 2702 } 2703 2704 *cs->statusp = resp->status = NFS4_OK; 2705 2706 ASSERT(uio.uio_resid >= 0); 2707 resp->data_len = args->count - uio.uio_resid; 2708 resp->data_val = (char *)mp->b_datap->db_base; 2709 resp->mblk = mp; 2710 2711 resp->eof = (nnioflags & NNODE_IO_FLAG_EOF) ? TRUE : FALSE; 2712 2713 out: 2714 nnop_io_release(nn, nnioflags, &ct); 2715 2716 final: 2717 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs, 2718 READ4res *, resp); 2719 } 2720 2721 /*ARGSUSED*/ 2722 static void 2723 mds_op_read_free(nfs_resop4 *resop, compound_state_t *cs) 2724 { 2725 /* Common function for NFSv4.0 and NFSv4.1 */ 2726 rfs4_op_read_free(resop); 2727 } 2728 2729 /* ARGSUSED */ 2730 static void 2731 mds_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 2732 compound_state_t *cs) 2733 { 2734 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh; 2735 int error; 2736 vnode_t *vp; 2737 struct exportinfo *exi, *sav_exi; 2738 nfs41_fh_fmt_t *fhp; 2739 fid_t exp_fid; 2740 2741 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); 2742 2743 if (cs->vp) { 2744 VN_RELE(cs->vp); 2745 cs->vp = NULL; 2746 } 2747 2748 if (cs->cr) 2749 crfree(cs->cr); 2750 2751 cs->cr = crdup(cs->basecr); 2752 2753 vp = exi_public->exi_vp; 2754 if (vp == NULL) { 2755 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 2756 goto final; 2757 } 2758 2759 error = mknfs41_fh(&cs->fh, vp, exi_public); 2760 if (error != 0) { 2761 *cs->statusp = resp->status = puterrno4(error); 2762 goto final; 2763 } 2764 sav_exi = cs->exi; 2765 if (exi_public == exi_root) { 2766 /* 2767 * No filesystem is actually shared public, so we default 2768 * to exi_root. In this case, we must check whether root 2769 * is exported. 2770 */ 2771 fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val; 2772 2773 exp_fid.fid_len = fhp->fh.v1.export_fid.len; 2774 2775 bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data, 2776 exp_fid.fid_len); 2777 2778 /* 2779 * if root filesystem is exported, the exportinfo struct that we 2780 * should use is what checkexport4 returns, because root_exi is 2781 * actually a mostly empty struct. 2782 */ 2783 exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL); 2784 cs->exi = ((exi != NULL) ? exi : exi_public); 2785 } else { 2786 /* 2787 * it's a properly shared filesystem 2788 */ 2789 cs->exi = exi_public; 2790 } 2791 2792 VN_HOLD(vp); 2793 cs->vp = vp; 2794 2795 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 2796 VN_RELE(cs->vp); 2797 cs->vp = NULL; 2798 cs->exi = sav_exi; 2799 goto final; 2800 } 2801 2802 *cs->statusp = resp->status = NFS4_OK; 2803 2804 final: 2805 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs, 2806 PUTPUBFH4res *, resp); 2807 } 2808 2809 /* 2810 * XXX - issue with put*fh operations. 2811 * 2812 * let us assume that /export/home is shared via NFS and a NFS client 2813 * wishes to mount /export/home/joe. 2814 * 2815 * If /export, home, or joe have restrictive search permissions, then 2816 * the NFS Server should not return a filehandle to the client. 2817 * 2818 * This case is easy to enforce. However, the NFS Client does not know 2819 * which security flavor should be used until the pathname has been 2820 * fully resolved. In addition there is another complication for uid 2821 * mapping. If the credential being used is root, the default behaviour 2822 * will be to map it to the anonymous user. However the NFS Server can not 2823 * map it until the pathname has been fully resolved. 2824 * 2825 * XXX: JEFF: Proposed solution. 2826 * 2827 * Luckily, SECINFO uses a full pathname. So what we will 2828 * have to do in mds_op_lookup is check that flavor of 2829 * the target object matches that of the request, and if root was the 2830 * caller, check for the root= and anon= options, and if necessary, 2831 * repeat the lookup using the right cred_t. 2832 * 2833 * But that's not done yet. 2834 */ 2835 /* ARGSUSED */ 2836 static void 2837 mds_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2838 compound_state_t *cs) 2839 { 2840 PUTFH4args *args = &argop->nfs_argop4_u.opputfh; 2841 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh; 2842 nfs41_fh_fmt_t *fhp = NULL; 2843 fid_t exp_fid; 2844 int error; 2845 2846 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs, 2847 PUTFH4args *, args); 2848 2849 /* 2850 * release the old nnode, vnode and cred. 2851 */ 2852 if (cs->nn) 2853 nnode_rele(&cs->nn); 2854 if (cs->vp) { 2855 VN_RELE(cs->vp); 2856 cs->vp = NULL; 2857 } 2858 if (cs->cr) { 2859 crfree(cs->cr); 2860 cs->cr = NULL; 2861 } 2862 2863 2864 /* 2865 * Check exportinfo only if it's a FH41_TYPE_NFS filehandle. 2866 * If the filehandle is otherwise incorrect, 2867 * nnode_from_fh_v41() will return an error. 2868 */ 2869 fhp = (nfs41_fh_fmt_t *)args->object.nfs_fh4_val; 2870 if (fhp->type == FH41_TYPE_NFS) { 2871 exp_fid.fid_len = fhp->fh.v1.export_fid.len; 2872 bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data, 2873 exp_fid.fid_len); 2874 cs->exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL); 2875 if (cs->exi == NULL) { 2876 *cs->statusp = resp->status = NFS4ERR_STALE; 2877 DTRACE_PROBE(nfss41__e__chkexp); 2878 goto final; 2879 } 2880 } 2881 2882 error = nnode_from_fh_v41(&cs->nn, &args->object); 2883 if (error != 0) { 2884 resp->status = *cs->statusp = nnode_stat4(error, 1); 2885 goto final; 2886 } 2887 ASSERT(cs->nn != NULL); 2888 2889 cs->vp = nnop_io_getvp(cs->nn); 2890 2891 cs->cr = crdup(cs->basecr); 2892 ASSERT(cs->cr != NULL); 2893 2894 if (fhp->type == FH41_TYPE_NFS) { 2895 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 2896 nnode_rele(&cs->nn); 2897 VN_RELE(cs->vp); 2898 cs->vp = NULL; 2899 crfree(cs->cr); 2900 cs->cr = NULL; 2901 *cs->statusp = resp->status; 2902 DTRACE_PROBE(nfss41__e__fail_auth); 2903 goto final; 2904 } 2905 } 2906 2907 nfs_fh4_copy(&args->object, &cs->fh); 2908 *cs->statusp = resp->status = NFS4_OK; 2909 cs->deleg = FALSE; 2910 2911 final: 2912 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs, 2913 PUTFH4res *, resp); 2914 } 2915 2916 /* ARGSUSED */ 2917 static void 2918 mds_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2919 compound_state_t *cs) 2920 2921 { 2922 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh; 2923 int error; 2924 fid_t fid; 2925 struct exportinfo *exi, *sav_exi; 2926 2927 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs); 2928 2929 if (cs->vp) { 2930 VN_RELE(cs->vp); 2931 cs->vp = NULL; 2932 } 2933 2934 if (cs->cr) 2935 crfree(cs->cr); 2936 2937 cs->cr = crdup(cs->basecr); 2938 2939 /* 2940 * Using rootdir, the system root vnode, 2941 * get its fid. 2942 */ 2943 bzero(&fid, sizeof (fid)); 2944 fid.fid_len = MAXFIDSZ; 2945 error = vop_fid_pseudo(rootdir, &fid); 2946 if (error != 0) { 2947 *cs->statusp = resp->status = puterrno4(error); 2948 goto final; 2949 } 2950 2951 /* 2952 * Then use the root fsid & fid it to find out if it's exported 2953 * 2954 * If the server root isn't exported directly, then 2955 * it should at least be a pseudo export based on 2956 * one or more exports further down in the server's 2957 * file tree. 2958 */ 2959 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL); 2960 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { 2961 DTRACE_PROBE(nfss41__e__chkexp); 2962 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 2963 goto final; 2964 } 2965 2966 /* 2967 * Now make a filehandle based on the root 2968 * export and root vnode. 2969 */ 2970 error = mknfs41_fh(&cs->fh, rootdir, exi); 2971 if (error != 0) { 2972 *cs->statusp = resp->status = puterrno4(error); 2973 goto final; 2974 } 2975 2976 sav_exi = cs->exi; 2977 cs->exi = exi; 2978 2979 VN_HOLD(rootdir); 2980 cs->vp = rootdir; 2981 2982 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 2983 VN_RELE(rootdir); 2984 cs->vp = NULL; 2985 cs->exi = sav_exi; 2986 goto final; 2987 } 2988 2989 *cs->statusp = resp->status = NFS4_OK; 2990 cs->deleg = FALSE; 2991 2992 final: 2993 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs, 2994 PUTROOTFH4res *, resp); 2995 } 2996 2997 /* 2998 * A directory entry is a valid nfsv4 entry if 2999 * - it has a non-zero ino 3000 * - it is not a dot or dotdot name 3001 * - it is visible in a pseudo export or in a real export that can 3002 * only have a limited view. 3003 */ 3004 static bool_t 3005 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp, 3006 int *expseudo, int check_visible) 3007 { 3008 if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) { 3009 *expseudo = 0; 3010 return (FALSE); 3011 } 3012 3013 if (! check_visible) { 3014 *expseudo = 0; 3015 return (TRUE); 3016 } 3017 3018 return (nfs_visible_inode(exi, dp->d_ino, expseudo)); 3019 } 3020 3021 3022 /* 3023 * readlink: args: CURRENT_FH. 3024 * res: status. If success - CURRENT_FH unchanged, return linktext. 3025 */ 3026 3027 /* ARGSUSED */ 3028 static void 3029 mds_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3030 compound_state_t *cs) 3031 { 3032 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3033 int error; 3034 vnode_t *vp; 3035 struct iovec iov; 3036 struct vattr va; 3037 struct uio uio; 3038 char *data; 3039 caller_context_t ct; 3040 3041 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); 3042 3043 /* CURRENT_FH: directory */ 3044 vp = cs->vp; 3045 if (vp == NULL) { 3046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3047 goto final; 3048 } 3049 3050 if (cs->access == CS_ACCESS_DENIED) { 3051 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3052 goto final; 3053 } 3054 3055 if (vp->v_type == VDIR) { 3056 *cs->statusp = resp->status = NFS4ERR_ISDIR; 3057 goto final; 3058 } 3059 3060 if (vp->v_type != VLNK) { 3061 *cs->statusp = resp->status = NFS4ERR_INVAL; 3062 goto final; 3063 } 3064 3065 ct.cc_sysid = 0; 3066 ct.cc_pid = 0; 3067 ct.cc_caller_id = cs->instp->caller_id; 3068 ct.cc_flags = CC_DONTBLOCK; 3069 3070 va.va_mask = AT_MODE; 3071 error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3072 if (error) { 3073 *cs->statusp = resp->status = puterrno4(error); 3074 goto final; 3075 } 3076 3077 if (MANDLOCK(vp, va.va_mode)) { 3078 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3079 goto final; 3080 } 3081 3082 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 3083 3084 iov.iov_base = data; 3085 iov.iov_len = MAXPATHLEN; 3086 uio.uio_iov = &iov; 3087 uio.uio_iovcnt = 1; 3088 uio.uio_segflg = UIO_SYSSPACE; 3089 uio.uio_extflg = UIO_COPY_CACHED; 3090 uio.uio_loffset = 0; 3091 uio.uio_resid = MAXPATHLEN; 3092 3093 error = VOP_READLINK(vp, &uio, cs->cr, &ct); 3094 3095 if (error) { 3096 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 3097 *cs->statusp = resp->status = puterrno4(error); 3098 goto final; 3099 } 3100 3101 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 3102 3103 /* 3104 * treat link name as data 3105 */ 3106 (void) str_to_utf8(data, &resp->link); 3107 3108 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 3109 *cs->statusp = resp->status = NFS4_OK; 3110 3111 final: 3112 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs, 3113 READLINK4res *, resp); 3114 } 3115 3116 /*ARGSUSED*/ 3117 static void 3118 mds_op_readlink_free(nfs_resop4 *resop, compound_state_t *cs) 3119 { 3120 /* Common function used for NFSv4.0 and NFSv4.1 */ 3121 rfs4_op_readlink_free(resop); 3122 } 3123 3124 /* ARGSUSED */ 3125 static void 3126 mds_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop, 3127 struct svc_req *req, compound_state_t *cs) 3128 { 3129 RECLAIM_COMPLETE4args *args = &argop->nfs_argop4_u.opreclaim_complete; 3130 RECLAIM_COMPLETE4res *resp = &resop->nfs_resop4_u.opreclaim_complete; 3131 rfs4_client_t *cp; 3132 3133 cp = cs->cp; 3134 3135 if (cp->rc_reclaim_completed) { 3136 *cs->statusp = resp->rcr_status = NFS4ERR_COMPLETE_ALREADY; 3137 return; 3138 } 3139 3140 if (args->rca_one_fs) { 3141 /* do what? we don't track this */ 3142 *cs->statusp = resp->rcr_status = NFS4_OK; 3143 return; 3144 } 3145 3146 cp->rc_reclaim_completed = 1; 3147 3148 /* did we have reclaimable state stored for this client? */ 3149 if (cp->rc_can_reclaim) 3150 atomic_add_32(&(cs->instp->reclaim_cnt), -1); 3151 3152 *cs->statusp = resp->rcr_status = NFS4_OK; 3153 } 3154 3155 /* 3156 * short utility function to lookup a file and recall the delegation 3157 */ 3158 static rfs4_file_t * 3159 mds_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp, 3160 int *lkup_error, struct compound_state *cs) 3161 { 3162 vnode_t *vp; 3163 rfs4_file_t *fp = NULL; 3164 bool_t fcreate = FALSE; 3165 int error; 3166 3167 if (vpp) 3168 *vpp = NULL; 3169 3170 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr, 3171 NULL, 0, NULL)) == 0) { 3172 if (vp->v_type == VREG) 3173 fp = rfs4_findfile(cs->instp, vp, NULL, &fcreate); 3174 if (vpp) 3175 *vpp = vp; 3176 else 3177 VN_RELE(vp); 3178 } 3179 3180 if (lkup_error) 3181 *lkup_error = error; 3182 3183 return (fp); 3184 } 3185 3186 static int 3187 do_ctl_mds_remove(vnode_t *vp, rfs4_file_t *fp, compound_state_t *cs) 3188 { 3189 fid_t fid; 3190 nfs41_fid_t nfs41_fid; 3191 int error = 0; 3192 3193 /* 3194 * Use the file layout to determine which data servers to 3195 * send DS_REMOVEs to. If the layout is not cached in the 3196 * rfs4_file_t either this means that we do not have a layout 3197 * or it needs to be read in from disk. Right now, we do not 3198 * attempt to read the layout in from disk, but future phases 3199 * of REMOVE handling will take this into consideration. 3200 * 3201 * Known Problems with this implementation of REMOVE: 3202 * 1. Not attempting to read a layout from disk could mean 3203 * that if an on-disk layout did exist, storage on the data 3204 * servers will not be freed. 3205 * 3206 * 2. The server populates the layout stored in the rfs4_file_t 3207 * when it receives a LAYOUTGET. If the file has been written 3208 * (perhaps in a past server instance), but no clients have 3209 * issued new LAYOUTGETs, we will not have a cached layout and 3210 * we will not free space on the data servers. 3211 * 3212 * 3. If any of the DS_REMOVE calls to the data servers fail 3213 * the errors are ignored and will not be retried. This may 3214 * cause leaked space on the the data server. 3215 */ 3216 if (fp->rf_mlo != NULL) { 3217 bzero(&fid, sizeof (fid)); 3218 fid.fid_len = MAXFIDSZ; 3219 3220 error = vop_fid_pseudo(vp, &fid); 3221 if (error) { 3222 DTRACE_NFSV4_1(nfss__e__vop_fid_pseudo_failed, 3223 int, error); 3224 return (error); 3225 } else { 3226 nfs41_fid.len = fid.fid_len; 3227 bcopy(fid.fid_data, nfs41_fid.val, nfs41_fid.len); 3228 } 3229 3230 error = ctl_mds_clnt_remove_file(cs->instp, cs->exi->exi_fsid, 3231 nfs41_fid, fp->rf_mlo); 3232 } else 3233 DTRACE_PROBE(nfss__i__layout_is_null_cannot_remove); 3234 3235 return (error); 3236 } 3237 3238 /* 3239 * remove: args: CURRENT_FH: directory; name. 3240 * res: status. If success - CURRENT_FH unchanged, return change_info 3241 * for directory. 3242 */ 3243 /* ARGSUSED */ 3244 static void 3245 mds_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3246 compound_state_t *cs) 3247 { 3248 REMOVE4args *args = &argop->nfs_argop4_u.opremove; 3249 REMOVE4res *resp = &resop->nfs_resop4_u.opremove; 3250 int error; 3251 vnode_t *dvp, *vp; 3252 struct vattr bdva, idva, adva; 3253 char *nm; 3254 uint_t len; 3255 rfs4_file_t *fp; 3256 int in_crit = 0; 3257 bslabel_t *clabel; 3258 caller_context_t ct; 3259 3260 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs, 3261 REMOVE4args *, args); 3262 3263 /* CURRENT_FH: directory */ 3264 dvp = cs->vp; 3265 if (dvp == NULL) { 3266 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3267 goto final; 3268 } 3269 3270 if (cs->access == CS_ACCESS_DENIED) { 3271 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3272 goto final; 3273 } 3274 3275 /* 3276 * If there is an unshared filesystem mounted on this vnode, 3277 * Do not allow to remove anything in this directory. 3278 */ 3279 if (vn_ismntpt(dvp)) { 3280 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3281 goto final; 3282 } 3283 3284 if (dvp->v_type != VDIR) { 3285 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3286 goto final; 3287 } 3288 3289 if (!utf8_dir_verify(&args->target)) { 3290 *cs->statusp = resp->status = NFS4ERR_INVAL; 3291 goto final; 3292 } 3293 3294 /* 3295 * Lookup the file so that we can check if it's a directory 3296 */ 3297 nm = utf8_to_fn(&args->target, &len, NULL); 3298 if (nm == NULL) { 3299 *cs->statusp = resp->status = NFS4ERR_INVAL; 3300 goto final; 3301 } 3302 3303 if (len > MAXNAMELEN) { 3304 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3305 kmem_free(nm, len); 3306 goto final; 3307 } 3308 3309 if (rdonly4(cs->exi, cs->vp, req)) { 3310 *cs->statusp = resp->status = NFS4ERR_ROFS; 3311 kmem_free(nm, len); 3312 goto final; 3313 } 3314 3315 /* 3316 * Lookup the file to determine type and while we are see if 3317 * there is a file struct around and check for delegation. 3318 * We don't need to acquire va_seq before this lookup, if 3319 * it causes an update, cinfo.before will not match, which will 3320 * trigger a cache flush even if atomic is TRUE. 3321 */ 3322 fp = mds_lookup_and_findfile(dvp, nm, &vp, &error, cs); 3323 if (vp != NULL) { 3324 if (rfs4_check_delegated(FWRITE, vp, TRUE, TRUE, TRUE, NULL)) { 3325 VN_RELE(vp); 3326 rfs4_file_rele(fp); 3327 *cs->statusp = resp->status = NFS4ERR_DELAY; 3328 kmem_free(nm, len); 3329 goto final; 3330 } 3331 } else { /* Didn't find anything to remove */ 3332 *cs->statusp = resp->status = error; 3333 kmem_free(nm, len); 3334 goto final; 3335 } 3336 3337 if (nbl_need_check(vp)) { 3338 nbl_start_crit(vp, RW_READER); 3339 in_crit = 1; 3340 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, &ct)) { 3341 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 3342 kmem_free(nm, len); 3343 nbl_end_crit(vp); 3344 VN_RELE(vp); 3345 if (fp) { 3346 rfs4_clear_dont_grant(cs->instp, fp); 3347 rfs4_file_rele(fp); 3348 } 3349 goto final; 3350 } 3351 } 3352 3353 /* check label before allowing removal */ 3354 if (is_system_labeled()) { 3355 ASSERT(req->rq_label != NULL); 3356 clabel = req->rq_label; 3357 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 3358 "got client label from request(1)", 3359 struct svc_req *, req); 3360 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3361 if (!do_rfs_label_check(clabel, vp, 3362 EQUALITY_CHECK, cs->exi)) { 3363 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3364 kmem_free(nm, len); 3365 if (in_crit) 3366 nbl_end_crit(vp); 3367 VN_RELE(vp); 3368 if (fp) { 3369 rfs4_clear_dont_grant(cs->instp, fp); 3370 rfs4_file_rele(fp); 3371 } 3372 goto final; 3373 } 3374 } 3375 } 3376 3377 ct.cc_sysid = 0; 3378 ct.cc_pid = 0; 3379 ct.cc_caller_id = cs->instp->caller_id; 3380 ct.cc_flags = CC_DONTBLOCK; 3381 3382 /* Get dir "before" change value */ 3383 bdva.va_mask = AT_CTIME|AT_SEQ; 3384 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct); 3385 if (error) { 3386 *cs->statusp = resp->status = puterrno4(error); 3387 kmem_free(nm, len); 3388 if (in_crit) 3389 nbl_end_crit(vp); 3390 VN_RELE(vp); 3391 if (fp) { 3392 rfs4_clear_dont_grant(cs->instp, fp); 3393 rfs4_file_rele(fp); 3394 } 3395 goto final; 3396 } 3397 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 3398 3399 /* Actually do the REMOVE operation */ 3400 if (vp->v_type == VDIR) { 3401 /* 3402 * Can't remove a directory that has a mounted-on filesystem. 3403 */ 3404 if (vn_ismntpt(vp)) { 3405 error = EACCES; 3406 } else { 3407 /* 3408 * System V defines rmdir to return EEXIST, 3409 * not * ENOTEMPTY, if the directory is not 3410 * empty. A System V NFS server needs to map 3411 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to 3412 * transmit over the wire. 3413 */ 3414 if ((error = VOP_RMDIR(dvp, nm, rootdir, 3415 cs->cr, &ct, 0)) == EEXIST) 3416 error = ENOTEMPTY; 3417 } 3418 } else { 3419 if ((error = VOP_REMOVE(dvp, nm, cs->cr, &ct, 0)) == 0 && 3420 fp != NULL) { 3421 struct vattr va; 3422 vnode_t *tvp; 3423 3424 rfs4_dbe_lock(fp->rf_dbe); 3425 tvp = fp->rf_vp; 3426 if (tvp) 3427 VN_HOLD(tvp); 3428 rfs4_dbe_unlock(fp->rf_dbe); 3429 3430 if (tvp) { 3431 /* 3432 * This is va_seq safe because we are not 3433 * manipulating dvp. 3434 */ 3435 va.va_mask = AT_NLINK; 3436 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, 3437 &ct) && va.va_nlink == 0) { 3438 if (in_crit) { 3439 nbl_end_crit(vp); 3440 in_crit = 0; 3441 } 3442 3443 /* Remove the layout */ 3444 mds_delete_layout(tvp); 3445 3446 /* 3447 * Remove objects on data servers. 3448 * Ignore errors for now.. 3449 */ 3450 (void) do_ctl_mds_remove(tvp, fp, cs); 3451 3452 /* Remove state on file remove */ 3453 rfs4_close_all_state(fp); 3454 } 3455 VN_RELE(tvp); 3456 } 3457 } 3458 } 3459 3460 if (in_crit) 3461 nbl_end_crit(vp); 3462 VN_RELE(vp); 3463 3464 if (fp) { 3465 rfs4_clear_dont_grant(cs->instp, fp); 3466 rfs4_file_rele(fp); 3467 fp = NULL; 3468 } 3469 kmem_free(nm, len); 3470 3471 if (error) { 3472 *cs->statusp = resp->status = puterrno4(error); 3473 goto final; 3474 } 3475 3476 /* 3477 * Get the initial "after" sequence number, if it fails, set to zero 3478 */ 3479 idva.va_mask = AT_SEQ; 3480 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct)) 3481 idva.va_seq = 0; 3482 3483 /* 3484 * Force modified data and metadata out to stable storage. 3485 */ 3486 (void) VOP_FSYNC(dvp, 0, cs->cr, &ct); 3487 3488 /* 3489 * Get "after" change value, if it fails, simply return the 3490 * before value. 3491 */ 3492 adva.va_mask = AT_CTIME|AT_SEQ; 3493 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) { 3494 adva.va_ctime = bdva.va_ctime; 3495 adva.va_seq = 0; 3496 } 3497 3498 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 3499 3500 /* 3501 * The cinfo.atomic = TRUE only if we have 3502 * non-zero va_seq's, and it has incremented by exactly one 3503 * during the VOP_REMOVE/RMDIR and it didn't change during 3504 * the VOP_FSYNC. 3505 */ 3506 if (bdva.va_seq && idva.va_seq && adva.va_seq && 3507 idva.va_seq == (bdva.va_seq + 1) && 3508 idva.va_seq == adva.va_seq) 3509 resp->cinfo.atomic = TRUE; 3510 else 3511 resp->cinfo.atomic = FALSE; 3512 3513 *cs->statusp = resp->status = NFS4_OK; 3514 3515 final: 3516 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs, 3517 REMOVE4res *, resp); 3518 } 3519 3520 /* 3521 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory, 3522 * oldname and newname. 3523 * res: status. If success - CURRENT_FH unchanged, return change_info 3524 * for both from and target directories. 3525 */ 3526 /* ARGSUSED */ 3527 static void 3528 mds_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3529 compound_state_t *cs) 3530 { 3531 RENAME4args *args = &argop->nfs_argop4_u.oprename; 3532 RENAME4res *resp = &resop->nfs_resop4_u.oprename; 3533 int error; 3534 vnode_t *odvp; 3535 vnode_t *ndvp; 3536 vnode_t *srcvp, *targvp; 3537 struct vattr obdva, oidva, oadva; 3538 struct vattr nbdva, nidva, nadva; 3539 char *onm, *nnm; 3540 uint_t olen, nlen; 3541 rfs4_file_t *fp, *sfp; 3542 int in_crit_src, in_crit_targ; 3543 int fp_rele_grant_hold, sfp_rele_grant_hold; 3544 bslabel_t *clabel; 3545 caller_context_t ct; 3546 3547 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs, 3548 RENAME4args *, args); 3549 3550 fp = sfp = NULL; 3551 srcvp = targvp = NULL; 3552 in_crit_src = in_crit_targ = 0; 3553 fp_rele_grant_hold = sfp_rele_grant_hold = 0; 3554 3555 /* CURRENT_FH: target directory */ 3556 ndvp = cs->vp; 3557 if (ndvp == NULL) { 3558 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3559 goto final; 3560 } 3561 3562 /* SAVED_FH: from directory */ 3563 odvp = cs->saved_vp; 3564 if (odvp == NULL) { 3565 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3566 goto final; 3567 } 3568 3569 if (cs->access == CS_ACCESS_DENIED) { 3570 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3571 goto final; 3572 } 3573 3574 /* 3575 * If there is an unshared filesystem mounted on this vnode, 3576 * do not allow to rename objects in this directory. 3577 */ 3578 if (vn_ismntpt(odvp)) { 3579 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3580 goto final; 3581 } 3582 3583 /* 3584 * If there is an unshared filesystem mounted on this vnode, 3585 * do not allow to rename to this directory. 3586 */ 3587 if (vn_ismntpt(ndvp)) { 3588 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3589 goto final; 3590 } 3591 3592 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) { 3593 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3594 goto final; 3595 } 3596 3597 if (cs->saved_exi != cs->exi) { 3598 *cs->statusp = resp->status = NFS4ERR_XDEV; 3599 goto final; 3600 } 3601 3602 if (!utf8_dir_verify(&args->oldname)) { 3603 *cs->statusp = resp->status = NFS4ERR_INVAL; 3604 goto final; 3605 } 3606 3607 if (!utf8_dir_verify(&args->newname)) { 3608 *cs->statusp = resp->status = NFS4ERR_INVAL; 3609 goto final; 3610 } 3611 3612 onm = utf8_to_fn(&args->oldname, &olen, NULL); 3613 if (onm == NULL) { 3614 *cs->statusp = resp->status = NFS4ERR_INVAL; 3615 goto final; 3616 } 3617 3618 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 3619 if (nnm == NULL) { 3620 *cs->statusp = resp->status = NFS4ERR_INVAL; 3621 kmem_free(onm, olen); 3622 goto final; 3623 } 3624 3625 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) { 3626 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3627 kmem_free(onm, olen); 3628 kmem_free(nnm, nlen); 3629 goto final; 3630 } 3631 3632 3633 if (rdonly4(cs->exi, cs->vp, req)) { 3634 *cs->statusp = resp->status = NFS4ERR_ROFS; 3635 kmem_free(onm, olen); 3636 kmem_free(nnm, nlen); 3637 goto final; 3638 } 3639 3640 /* check label of the target dir */ 3641 if (is_system_labeled()) { 3642 ASSERT(req->rq_label != NULL); 3643 clabel = req->rq_label; 3644 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *, 3645 "got client label from request(1)", 3646 struct svc_req *, req); 3647 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3648 if (!do_rfs_label_check(clabel, ndvp, 3649 EQUALITY_CHECK, cs->exi)) { 3650 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3651 goto final; 3652 } 3653 } 3654 } 3655 3656 /* 3657 * Is the source a file and have a delegation? 3658 * We don't need to acquire va_seq before these lookups, if 3659 * it causes an update, cinfo.before will not match, which will 3660 * trigger a cache flush even if atomic is TRUE. 3661 */ 3662 sfp = mds_lookup_and_findfile(odvp, onm, &srcvp, &error, cs); 3663 if (srcvp != NULL) { 3664 if (rfs4_check_delegated(FWRITE, srcvp, TRUE, TRUE, TRUE, 3665 NULL)) { 3666 *cs->statusp = resp->status = NFS4ERR_DELAY; 3667 goto err_out; 3668 } 3669 } else { 3670 *cs->statusp = resp->status = puterrno4(error); 3671 kmem_free(onm, olen); 3672 kmem_free(nnm, nlen); 3673 goto final; 3674 } 3675 3676 sfp_rele_grant_hold = 1; 3677 3678 /* Does the destination exist and a file and have a delegation? */ 3679 fp = mds_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs); 3680 if (targvp != NULL) { 3681 if (rfs4_check_delegated(FWRITE, targvp, TRUE, TRUE, TRUE, 3682 NULL)) { 3683 *cs->statusp = resp->status = NFS4ERR_DELAY; 3684 goto err_out; 3685 } 3686 } 3687 3688 fp_rele_grant_hold = 1; 3689 3690 /* Check for NBMAND lock on both source and target */ 3691 if (nbl_need_check(srcvp)) { 3692 nbl_start_crit(srcvp, RW_READER); 3693 in_crit_src = 1; 3694 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, &ct)) { 3695 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 3696 goto err_out; 3697 } 3698 } 3699 3700 if (targvp && nbl_need_check(targvp)) { 3701 nbl_start_crit(targvp, RW_READER); 3702 in_crit_targ = 1; 3703 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, &ct)) { 3704 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 3705 goto err_out; 3706 } 3707 } 3708 3709 ct.cc_sysid = 0; 3710 ct.cc_pid = 0; 3711 ct.cc_caller_id = cs->instp->caller_id; 3712 ct.cc_flags = CC_DONTBLOCK; 3713 3714 /* Get source "before" change value */ 3715 obdva.va_mask = AT_CTIME|AT_SEQ; 3716 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, &ct); 3717 if (!error) { 3718 nbdva.va_mask = AT_CTIME|AT_SEQ; 3719 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, &ct); 3720 } 3721 if (error) { 3722 *cs->statusp = resp->status = puterrno4(error); 3723 goto err_out; 3724 } 3725 3726 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime) 3727 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime) 3728 3729 if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr, &ct, 0)) == 3730 0 && fp != NULL) { 3731 struct vattr va; 3732 vnode_t *tvp; 3733 3734 rfs4_dbe_lock(fp->rf_dbe); 3735 tvp = fp->rf_vp; 3736 if (tvp) 3737 VN_HOLD(tvp); 3738 rfs4_dbe_unlock(fp->rf_dbe); 3739 3740 if (tvp) { 3741 va.va_mask = AT_NLINK; 3742 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, &ct) && 3743 va.va_nlink == 0) { 3744 /* The file is gone and so should the state */ 3745 if (in_crit_targ) { 3746 nbl_end_crit(targvp); 3747 in_crit_targ = 0; 3748 } 3749 rfs4_close_all_state(fp); 3750 } 3751 VN_RELE(tvp); 3752 } 3753 } 3754 if (error == 0) { 3755 char *tmp; 3756 3757 /* fix the path name for the renamed file */ 3758 mutex_enter(&srcvp->v_lock); 3759 tmp = srcvp->v_path; 3760 srcvp->v_path = NULL; 3761 mutex_exit(&srcvp->v_lock); 3762 vn_setpath(rootdir, ndvp, srcvp, nnm, nlen - 1); 3763 if (tmp != NULL) 3764 kmem_free(tmp, strlen(tmp) + 1); 3765 } 3766 3767 if (in_crit_src) 3768 nbl_end_crit(srcvp); 3769 if (srcvp) 3770 VN_RELE(srcvp); 3771 if (in_crit_targ) 3772 nbl_end_crit(targvp); 3773 if (targvp) 3774 VN_RELE(targvp); 3775 3776 if (sfp) { 3777 rfs4_clear_dont_grant(cs->instp, sfp); 3778 rfs4_file_rele(sfp); 3779 sfp = NULL; 3780 } 3781 if (fp) { 3782 rfs4_clear_dont_grant(cs->instp, fp); 3783 rfs4_file_rele(fp); 3784 fp = NULL; 3785 } 3786 3787 kmem_free(onm, olen); 3788 kmem_free(nnm, nlen); 3789 3790 /* 3791 * Get the initial "after" sequence number, if it fails, set to zero 3792 */ 3793 oidva.va_mask = AT_SEQ; 3794 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, &ct)) 3795 oidva.va_seq = 0; 3796 3797 nidva.va_mask = AT_SEQ; 3798 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, &ct)) 3799 nidva.va_seq = 0; 3800 3801 /* 3802 * Force modified data and metadata out to stable storage. 3803 */ 3804 (void) VOP_FSYNC(odvp, 0, cs->cr, &ct); 3805 (void) VOP_FSYNC(ndvp, 0, cs->cr, &ct); 3806 3807 if (error) { 3808 *cs->statusp = resp->status = puterrno4(error); 3809 goto final; 3810 } 3811 3812 /* 3813 * Get "after" change values, if it fails, simply return the 3814 * before value. 3815 */ 3816 oadva.va_mask = AT_CTIME|AT_SEQ; 3817 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, &ct)) { 3818 oadva.va_ctime = obdva.va_ctime; 3819 oadva.va_seq = 0; 3820 } 3821 3822 nadva.va_mask = AT_CTIME|AT_SEQ; 3823 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, &ct)) { 3824 nadva.va_ctime = nbdva.va_ctime; 3825 nadva.va_seq = 0; 3826 } 3827 3828 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime) 3829 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime) 3830 3831 /* 3832 * The cinfo.atomic = TRUE only if we have 3833 * non-zero va_seq's, and it has incremented by exactly one 3834 * during the VOP_RENAME and it didn't change during the VOP_FSYNC. 3835 */ 3836 if (obdva.va_seq && oidva.va_seq && oadva.va_seq && 3837 oidva.va_seq == (obdva.va_seq + 1) && 3838 oidva.va_seq == oadva.va_seq) 3839 resp->source_cinfo.atomic = TRUE; 3840 else 3841 resp->source_cinfo.atomic = FALSE; 3842 3843 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq && 3844 nidva.va_seq == (nbdva.va_seq + 1) && 3845 nidva.va_seq == nadva.va_seq) 3846 resp->target_cinfo.atomic = TRUE; 3847 else 3848 resp->target_cinfo.atomic = FALSE; 3849 3850 *cs->statusp = resp->status = NFS4_OK; 3851 goto final; 3852 3853 err_out: 3854 kmem_free(onm, olen); 3855 kmem_free(nnm, nlen); 3856 3857 if (in_crit_src) nbl_end_crit(srcvp); 3858 if (in_crit_targ) nbl_end_crit(targvp); 3859 if (targvp) VN_RELE(targvp); 3860 if (srcvp) VN_RELE(srcvp); 3861 if (sfp) { 3862 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(cs->instp, sfp); 3863 rfs4_file_rele(sfp); 3864 } 3865 if (fp) { 3866 if (fp_rele_grant_hold) rfs4_clear_dont_grant(cs->instp, fp); 3867 rfs4_file_rele(fp); 3868 } 3869 3870 final: 3871 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs, 3872 RENAME4res *, resp); 3873 } 3874 3875 3876 /* ARGSUSED */ 3877 static void 3878 mds_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 3879 compound_state_t *cs) 3880 { 3881 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh; 3882 3883 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs); 3884 3885 /* No need to check cs->access - we are not accessing any object */ 3886 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) { 3887 *cs->statusp = resp->status = NFS4ERR_RESTOREFH; 3888 goto final; 3889 } 3890 if (cs->vp != NULL) { 3891 VN_RELE(cs->vp); 3892 } 3893 cs->vp = cs->saved_vp; 3894 cs->saved_vp = NULL; 3895 cs->exi = cs->saved_exi; 3896 nfs_fh4_copy(&cs->saved_fh, &cs->fh); 3897 *cs->statusp = resp->status = NFS4_OK; 3898 cs->deleg = FALSE; 3899 3900 final: 3901 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs, 3902 RESTOREFH4res *, resp); 3903 } 3904 3905 /* ARGSUSED */ 3906 static void 3907 mds_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3908 compound_state_t *cs) 3909 { 3910 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh; 3911 3912 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs); 3913 3914 /* No need to check cs->access - we are not accessing any object */ 3915 if (cs->vp == NULL) { 3916 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3917 goto final; 3918 } 3919 if (cs->saved_vp != NULL) { 3920 VN_RELE(cs->saved_vp); 3921 } 3922 cs->saved_vp = cs->vp; 3923 VN_HOLD(cs->saved_vp); 3924 cs->saved_exi = cs->exi; 3925 /* 3926 * since SAVEFH is fairly rare, don't alloc space for its fh 3927 * unless necessary. 3928 */ 3929 if (cs->saved_fh.nfs_fh4_val == NULL) { 3930 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP); 3931 } 3932 nfs_fh4_copy(&cs->fh, &cs->saved_fh); 3933 *cs->statusp = resp->status = NFS4_OK; 3934 3935 final: 3936 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs, 3937 SAVEFH4res *, resp); 3938 } 3939 3940 /* ARGSUSED */ 3941 static void 3942 mds_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3943 compound_state_t *cs) 3944 { 3945 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr; 3946 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr; 3947 bslabel_t *clabel; 3948 3949 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs, 3950 SETATTR4args *, args); 3951 3952 if (cs->vp == NULL) { 3953 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3954 goto final; 3955 } 3956 3957 /* 3958 * If there is an unshared filesystem mounted on this vnode, 3959 * do not allow to setattr on this vnode. 3960 */ 3961 if (vn_ismntpt(cs->vp)) { 3962 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3963 goto final; 3964 } 3965 3966 resp->attrsset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 3967 3968 if (rdonly4(cs->exi, cs->vp, req)) { 3969 *cs->statusp = resp->status = NFS4ERR_ROFS; 3970 goto final; 3971 } 3972 3973 /* check label before setting attributes */ 3974 if (is_system_labeled()) { 3975 ASSERT(req->rq_label != NULL); 3976 clabel = req->rq_label; 3977 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *, 3978 "got client label from request(1)", 3979 struct svc_req *, req); 3980 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3981 if (!do_rfs_label_check(clabel, cs->vp, 3982 EQUALITY_CHECK, cs->exi)) { 3983 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3984 goto final; 3985 } 3986 } 3987 } 3988 3989 *cs->statusp = resp->status = 3990 mds_setattr(&resp->attrsset, &args->obj_attributes, cs, 3991 &args->stateid); 3992 3993 final: 3994 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs, 3995 SETATTR4res *, resp); 3996 } 3997 3998 /* ARGSUSED */ 3999 static void 4000 mds_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 4001 compound_state_t *cs) 4002 { 4003 WRITE4args *args = &argop->nfs_argop4_u.opwrite; 4004 WRITE4res *resp = &resop->nfs_resop4_u.opwrite; 4005 nnode_io_flags_t nnioflags = NNODE_IO_FLAG_WRITE; 4006 int error; 4007 nnode_t *nn; 4008 u_offset_t rlimit; 4009 struct uio uio; 4010 struct iovec iov[NFS_MAX_IOVECS]; 4011 struct iovec *iovp = iov; 4012 int iovcnt; 4013 int ioflag; 4014 cred_t *savecred, *cr; 4015 bool_t *deleg = &cs->deleg; 4016 nfsstat4 stat; 4017 caller_context_t ct; 4018 4019 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, 4020 WRITE4args *, args); 4021 4022 nn = cs->nn; 4023 if (nn == NULL) { 4024 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 4025 goto final; 4026 } 4027 /* 4028 * cs->access is set in call_checkauth4 called in putfh code. The 4029 * current putfh code will not invoke these security functions on the 4030 * DS codepath since it goes by the filehandle, not by nnodes per se. 4031 */ 4032 if (cs->access == CS_ACCESS_DENIED) { 4033 *cs->statusp = resp->status = NFS4ERR_ACCESS; 4034 goto final; 4035 } 4036 4037 cr = cs->cr; 4038 if ((cs->vp != NULL) && (rdonly4(cs->exi, cs->vp, req))) { 4039 *cs->statusp = resp->status = NFS4ERR_ROFS; 4040 goto final; 4041 } 4042 4043 if ((stat = nnop_check_stateid(nn, cs, FWRITE, &args->stateid, FALSE, 4044 deleg, TRUE, &ct, NULL)) != NFS4_OK) { 4045 *cs->statusp = resp->status = stat; 4046 goto out; 4047 } 4048 4049 error = nnop_io_prep(nn, &nnioflags, cr, &ct, args->offset, 4050 args->data_len, NULL); 4051 if (error != 0) 4052 goto err; 4053 4054 if (args->data_len == 0) { 4055 *cs->statusp = resp->status = NFS4_OK; 4056 resp->count = 0; 4057 resp->committed = args->stable; 4058 resp->writeverf = cs->instp->Write4verf; 4059 goto out; 4060 } 4061 4062 if (args->mblk != NULL) { 4063 mblk_t *m; 4064 uint_t bytes, round_len; 4065 4066 iovcnt = 0; 4067 bytes = 0; 4068 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT); 4069 for (m = args->mblk; 4070 m != NULL && bytes < round_len; 4071 m = m->b_cont) { 4072 iovcnt++; 4073 bytes += MBLKL(m); 4074 } 4075 #ifdef DEBUG 4076 /* should have ended on an mblk boundary */ 4077 if (bytes != round_len) { 4078 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n", 4079 bytes, round_len, args->data_len); 4080 printf("args=%p, args->mblk=%p, m=%p", (void *)args, 4081 (void *)args->mblk, (void *)m); 4082 ASSERT(bytes == round_len); 4083 } 4084 #endif 4085 if (iovcnt <= NFS_MAX_IOVECS) { 4086 iovp = iov; 4087 } else { 4088 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP); 4089 } 4090 mblk_to_iov(args->mblk, iovcnt, iovp); 4091 } else { 4092 iovcnt = 1; 4093 iovp = iov; 4094 iovp->iov_base = args->data_val; 4095 iovp->iov_len = args->data_len; 4096 } 4097 4098 uio.uio_iov = iovp; 4099 uio.uio_iovcnt = iovcnt; 4100 4101 uio.uio_segflg = UIO_SYSSPACE; 4102 uio.uio_extflg = UIO_COPY_DEFAULT; 4103 uio.uio_loffset = args->offset; 4104 uio.uio_resid = args->data_len; 4105 uio.uio_llimit = curproc->p_fsz_ctl; 4106 rlimit = uio.uio_llimit - args->offset; 4107 if (rlimit < (u_offset_t)uio.uio_resid) 4108 uio.uio_resid = (int)rlimit; 4109 4110 if (args->stable == UNSTABLE4) 4111 ioflag = 0; 4112 else if (args->stable == FILE_SYNC4) 4113 ioflag = FSYNC; 4114 else if (args->stable == DATA_SYNC4) 4115 ioflag = FDSYNC; 4116 else { 4117 if (iovp != iov) 4118 kmem_free(iovp, sizeof (*iovp) * iovcnt); 4119 *cs->statusp = resp->status = NFS4ERR_INVAL; 4120 goto out; 4121 } 4122 4123 /* 4124 * We're changing creds because VM may fault and we need 4125 * the cred of the current thread to be used if quota 4126 * checking is enabled. 4127 */ 4128 savecred = curthread->t_cred; 4129 curthread->t_cred = cr; 4130 error = nnop_write(nn, &nnioflags, &uio, ioflag, cr, &ct, NULL); 4131 curthread->t_cred = savecred; 4132 4133 if (iovp != iov) 4134 kmem_free(iovp, sizeof (*iovp) * iovcnt); 4135 4136 err: 4137 if (error) { 4138 *cs->statusp = resp->status = nnode_stat4(error, 1); 4139 goto out; 4140 } 4141 4142 *cs->statusp = resp->status = NFS4_OK; 4143 resp->count = args->data_len - uio.uio_resid; 4144 4145 if (ioflag == 0) 4146 resp->committed = UNSTABLE4; 4147 else 4148 resp->committed = FILE_SYNC4; 4149 4150 resp->writeverf = cs->instp->Write4verf; 4151 4152 nnop_update(nn, nnioflags, cr, &ct, args->offset + resp->count); 4153 out: 4154 nnop_io_release(nn, nnioflags, &ct); 4155 4156 final: 4157 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs, 4158 WRITE4res *, resp); 4159 } 4160 4161 static void 4162 rfs41_op_dispatch(compound_state_t *cs, 4163 COMPOUND4args *args, COMPOUND4res *resp, struct svc_req *req) 4164 { 4165 nfs_argop4 *argop; 4166 nfs_resop4 *resop; 4167 uint_t op; 4168 4169 argop = &args->array[cs->op_ndx]; 4170 resop = &resp->array[cs->op_ndx]; 4171 4172 op = (uint_t)argop->argop; 4173 resop->resop = op; 4174 4175 if (op >= OP_ILLEGAL_IDX) { 4176 /* 4177 * This is effectively dead code since XDR code 4178 * will have already returned BADXDR if op doesn't 4179 * decode to legal value. This only done for a 4180 * day when XDR code doesn't verify v4 opcodes. 4181 * or some bozo didn't update the operation dispatch 4182 * table. 4183 */ 4184 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++; 4185 4186 mds_op_illegal(argop, resop, req, cs); 4187 DTRACE_PROBE(nfss41__e__operation_tilt); 4188 goto bail; 4189 } 4190 4191 /* 4192 * First if this is a bad operation stop 4193 * the compound processing right now! 4194 */ 4195 if (mds_disptab[op].op_flag == DISP_OP_BAD) { 4196 mds_op_illegal(argop, resop, req, cs); 4197 DTRACE_PROBE1(nfss41__e__disp_op_inval, int, op); 4198 goto bail; 4199 } 4200 4201 if (seq_chk_limits(argop, resop, cs)) { 4202 DTRACE_PROBE2(nfss41__i__scl_error, 4203 char *, nfs4_op_to_str(op), 4204 char *, nfs41_strerror(*cs->statusp)); 4205 } else { 4206 (*mds_disptab[op].dis_op)(argop, resop, req, cs); 4207 } 4208 4209 bail: 4210 if (*cs->statusp != NFS4_OK) 4211 cs->cont = FALSE; 4212 4213 /* 4214 * If not at last op, and if we are to stop, then 4215 * compact the results array. 4216 */ 4217 if ((cs->op_ndx + 1) < cs->op_len && !cs->cont) { 4218 nfs_resop4 *new_res = kmem_alloc( 4219 (cs->op_ndx+1) * sizeof (nfs_resop4), KM_SLEEP); 4220 bcopy(resp->array, 4221 new_res, (cs->op_ndx+1) * sizeof (nfs_resop4)); 4222 kmem_free(resp->array, 4223 cs->op_len * sizeof (nfs_resop4)); 4224 4225 resp->array_len = cs->op_ndx + 1; 4226 resp->array = new_res; 4227 } 4228 } 4229 4230 void 4231 rfs41_err_resp(COMPOUND4args *args, COMPOUND4res *resp, nfsstat4 err) 4232 { 4233 size_t sz; 4234 4235 resp->array_len = 1; 4236 sz = resp->array_len * sizeof (nfs_resop4); 4237 resp->array = kmem_zalloc(sz, KM_SLEEP); 4238 4239 resp->array[0].resop = args->array[0].argop; 4240 resp->array[0].nfs_resop4_u.opillegal.status = err; 4241 } 4242 4243 4244 /* ARGSUSED */ 4245 void 4246 mds_compound(compound_state_t *cs, 4247 COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, 4248 struct svc_req *req, int *rv) 4249 { 4250 cred_t *cr; 4251 size_t reslen; 4252 4253 if (rv != NULL) 4254 *rv = 0; 4255 /* 4256 * Form a reply tag by copying over the reqeuest tag. 4257 */ 4258 resp->tag.utf8string_val = 4259 kmem_alloc(args->tag.utf8string_len, KM_SLEEP); 4260 4261 resp->tag.utf8string_len = args->tag.utf8string_len; 4262 4263 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 4264 resp->tag.utf8string_len); 4265 4266 ASSERT(exi == NULL); 4267 4268 cr = crget(); 4269 ASSERT(cr != NULL); 4270 4271 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) { 4272 4273 DTRACE_NFSV4_2(compound__start, 4274 struct compound_state *, 4275 &cs, COMPOUND4args *, args); 4276 4277 crfree(cr); 4278 4279 DTRACE_NFSV4_2(compound__done, 4280 struct compound_state *, 4281 &cs, COMPOUND4res *, resp); 4282 4283 svcerr_badcred(req->rq_xprt); 4284 if (rv != NULL) 4285 *rv = 1; 4286 return; 4287 } 4288 if (cs->basecr != NULL) 4289 crfree(cs->basecr); 4290 cs->basecr = cr; 4291 cs->req = req; 4292 4293 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs, 4294 COMPOUND4args *, args); 4295 4296 /* 4297 * For now, NFS4 compound processing must be protected by 4298 * exported_lock because it can access more than one exportinfo 4299 * per compound and share/unshare can now change multiple 4300 * exinfo structs. The NFS2/3 code only refs 1 exportinfo 4301 * per proc (excluding public exinfo), and exi_count design 4302 * is sufficient to protect concurrent execution of NFS2/3 4303 * ops along with unexport. 4304 */ 4305 rw_enter(&exported_lock, RW_READER); 4306 4307 /* 4308 * If this is the first compound we've seen, we need to start 4309 * the instances' grace period. 4310 */ 4311 if (cs->instp->seen_first_compound == 0) { 4312 rfs4_grace_start_new(cs->instp); 4313 cs->instp->seen_first_compound = 1; 4314 } 4315 4316 /* 4317 * Any operations _other_ than the ones listed below, should _not_ 4318 * appear as the first operation in a compound. If so we will 4319 * error out. We use the opilleagal.status without regard to 4320 * the actual operation since we know that status always appears 4321 * as the first element for all the operations. 4322 */ 4323 switch (args->array[0].argop) { 4324 case OP_SEQUENCE: 4325 case OP_EXCHANGE_ID: 4326 case OP_CREATE_SESSION: 4327 case OP_DESTROY_SESSION: 4328 break; 4329 4330 case OP_BIND_CONN_TO_SESSION: 4331 /* 4332 * Should be the _only_ op in compound 4333 */ 4334 if (args->array_len != 1) { 4335 *cs->statusp = NFS4ERR_NOT_ONLY_OP; 4336 rfs41_err_resp(args, resp, *cs->statusp); 4337 goto out; 4338 } 4339 break; 4340 4341 default: 4342 *cs->statusp = NFS4ERR_OP_NOT_IN_SESSION; 4343 rfs41_err_resp(args, resp, *cs->statusp); 4344 goto out; 4345 } 4346 4347 /* 4348 * Everything kosher; allocate results array 4349 */ 4350 reslen = cs->op_len = resp->array_len = args->array_len; 4351 resp->array = kmem_zalloc(reslen * sizeof (nfs_resop4), KM_SLEEP); 4352 4353 /* 4354 * Iterate over the compound until we have exhausted the operations 4355 * or the compound state indicates that we should terminate. 4356 */ 4357 for (cs->op_ndx = 0; 4358 cs->op_ndx < cs->op_len && cs->cont == TRUE; cs->op_ndx++) 4359 rfs41_op_dispatch(cs, args, resp, req); 4360 4361 out: 4362 rw_exit(&exported_lock); 4363 4364 /* 4365 * done with this compound request, free the label 4366 */ 4367 if (req->rq_label != NULL) { 4368 kmem_free(req->rq_label, sizeof (bslabel_t)); 4369 req->rq_label = NULL; 4370 } 4371 4372 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs, 4373 COMPOUND4res *, resp); 4374 } 4375 4376 void 4377 rfs41_compound_free(COMPOUND4res *resp, compound_state_t *cs) 4378 { 4379 uint_t i; 4380 4381 if (resp->tag.utf8string_val) { 4382 UTF8STRING_FREE(resp->tag) 4383 } 4384 4385 for (i = 0; i < resp->array_len; i++) { 4386 nfs_resop4 *resop; 4387 uint_t op; 4388 4389 resop = &resp->array[i]; 4390 op = (uint_t)resop->resop; 4391 if (op < OP_ILLEGAL_IDX) { 4392 (*mds_disptab[op].dis_resfree)(resop, cs); 4393 } 4394 } 4395 4396 if (resp->array != NULL) { 4397 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4)); 4398 resp->array = NULL; 4399 resp->array_len = 0; 4400 } 4401 } 4402 4403 delegreq_t 4404 do_41_deleg_hack(int osa) 4405 { 4406 int want_deleg; 4407 4408 want_deleg = (osa & OPEN4_SHARE_ACCESS_WANT_DELEG_MASK); 4409 4410 switch (want_deleg) { 4411 case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: 4412 return (DELEG_READ); 4413 4414 case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: 4415 return (DELEG_WRITE); 4416 4417 case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: 4418 return (DELEG_ANY); 4419 4420 case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: 4421 return (DELEG_NONE); 4422 } 4423 return (DELEG_ANY); 4424 } 4425 4426 /* 4427 * XXX: This will go away with the SMF work for npools. 4428 */ 4429 extern mds_layout_t *mds_gen_default_layout(nfs_server_instance_t *); 4430 4431 /* 4432 * We are going to create the file, so we need to get 4433 * a layout in play for it. 4434 */ 4435 static nfsstat4 4436 mds_createfile_get_layout(struct svc_req *req, vnode_t *vp, 4437 struct compound_state *cs, caller_context_t *ct, mds_layout_t **plo) 4438 { 4439 vattr_t spe_va; 4440 4441 int i; 4442 4443 layout_core_t lc; 4444 4445 int error; 4446 struct netbuf *claddr; 4447 4448 nfsstat4 status = NFS4_OK; 4449 4450 spe_va.va_mask = AT_GID|AT_UID; 4451 error = VOP_GETATTR(vp, &spe_va, 0, cs->cr, ct); 4452 if (error) 4453 return (puterrno4(error)); 4454 4455 /* 4456 * Taken from nfsauth_cache_get(): 4457 */ 4458 claddr = svc_getrpccaller(req->rq_xprt); 4459 4460 lc.lc_mds_sids = NULL; 4461 4462 /* 4463 * XXX: We may not be able to trust vp->v_path, 4464 * but if it is filled in, we will use it. Otherwise 4465 * we will evaluate polices ignoring the path components. 4466 */ 4467 error = nfs41_spe_allocate(&spe_va, claddr, 4468 vp->v_path, &lc, TRUE); 4469 if (error) { 4470 /* 4471 * XXX: Until we get the SMF code 4472 * in place, we handle all errors by 4473 * using the default layout of the 4474 * old prototype code 4475 * 4476 * At that point, we should return the 4477 * given error. 4478 */ 4479 *plo = mds_gen_default_layout(cs->instp); 4480 if (*plo == NULL) { 4481 status = NFS4ERR_LAYOUTUNAVAILABLE; 4482 } else { 4483 /* 4484 * Record the layout, don't get 4485 * bent out of shape if it fails, 4486 * we'll try again at checkstate time. 4487 */ 4488 (void) mds_put_layout(*plo, vp); 4489 } 4490 4491 return (status); 4492 } 4493 4494 *plo = mds_add_layout(&lc); 4495 4496 if (lc.lc_mds_sids) { 4497 for (i = 0; i < lc.lc_stripe_count; i++) { 4498 kmem_free(lc.lc_mds_sids[i].val, 4499 lc.lc_mds_sids[i].len); 4500 } 4501 4502 kmem_free(lc.lc_mds_sids, 4503 lc.lc_stripe_count * sizeof (mds_sid)); 4504 } 4505 4506 if (*plo == NULL) { 4507 status = NFS4ERR_LAYOUTUNAVAILABLE; 4508 } else { 4509 /* 4510 * Record the layout, don't get bent out of shape 4511 * if it fails, we'll try again at checkstate time. 4512 */ 4513 (void) mds_put_layout(*plo, vp); 4514 } 4515 4516 return (status); 4517 } 4518 4519 /* 4520 * If we call the spe in here, we return the new layout in *plo. 4521 */ 4522 static nfsstat4 4523 mds_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, 4524 change_info4 *cinfo, attrmap4 *attrset, mds_layout_t **plo) 4525 { 4526 struct nfs4_svgetit_arg sarg; 4527 struct nfs4_ntov_table ntov; 4528 4529 bool_t ntov_table_init = FALSE; 4530 struct statvfs64 sb; 4531 nfsstat4 status = NFS4_OK; 4532 vnode_t *vp; 4533 vattr_t bva, ava, iva, cva, *vap; 4534 vnode_t *dvp; 4535 timespec32_t *mtime; 4536 char *nm = NULL; 4537 uint_t buflen; 4538 bool_t created; 4539 bool_t setsize = FALSE; 4540 len_t reqsize; 4541 int error; 4542 bool_t trunc; 4543 caller_context_t ct; 4544 component4 *component; 4545 bslabel_t *clabel; 4546 attrvers_t avers; 4547 4548 avers = RFS4_ATTRVERS(cs); 4549 sarg.sbp = &sb; 4550 dvp = cs->vp; 4551 4552 /* Check if the file system is read only */ 4553 if (rdonly4(cs->exi, dvp, req)) 4554 return (NFS4ERR_ROFS); 4555 4556 /* check the label of including directory */ 4557 if (is_system_labeled()) { 4558 ASSERT(req->rq_label != NULL); 4559 clabel = req->rq_label; 4560 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 4561 "got client label from request(1)", 4562 struct svc_req *, req); 4563 if (!blequal(&l_admin_low->tsl_label, clabel)) { 4564 if (!do_rfs_label_check(clabel, dvp, 4565 EQUALITY_CHECK, cs->exi)) { 4566 return (NFS4ERR_ACCESS); 4567 } 4568 } 4569 } 4570 4571 /* 4572 * Get the last component of path name in nm. cs will reference 4573 * the including directory on success. 4574 */ 4575 component = &args->open_claim4_u.file; 4576 if (!utf8_dir_verify(component)) 4577 return (NFS4ERR_INVAL); 4578 4579 nm = utf8_to_fn(component, &buflen, NULL); 4580 4581 if (nm == NULL) 4582 return (NFS4ERR_RESOURCE); 4583 4584 if (buflen > MAXNAMELEN) { 4585 kmem_free(nm, buflen); 4586 return (NFS4ERR_NAMETOOLONG); 4587 } 4588 4589 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ; 4590 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 4591 if (error) { 4592 kmem_free(nm, buflen); 4593 return (puterrno4(error)); 4594 } 4595 4596 if (bva.va_type != VDIR) { 4597 kmem_free(nm, buflen); 4598 return (NFS4ERR_NOTDIR); 4599 } 4600 4601 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime) 4602 4603 switch (args->mode) { 4604 case GUARDED4: 4605 /*FALLTHROUGH*/ 4606 case UNCHECKED4: 4607 nfs4_ntov_table_init(&ntov, avers); 4608 ntov_table_init = TRUE; 4609 4610 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4611 status = do_rfs4_set_attrs(attrset, 4612 &args->createhow4_u.createattrs, 4613 cs, &sarg, &ntov, NFS4ATTR_SETIT); 4614 4615 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) && 4616 sarg.vap->va_type != VREG) { 4617 if (sarg.vap->va_type == VDIR) 4618 status = NFS4ERR_ISDIR; 4619 else if (sarg.vap->va_type == VLNK) 4620 status = NFS4ERR_SYMLINK; 4621 else 4622 status = NFS4ERR_INVAL; 4623 } 4624 4625 if (status != NFS4_OK) { 4626 kmem_free(nm, buflen); 4627 nfs4_ntov_table_free(&ntov, &sarg); 4628 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4629 return (status); 4630 } 4631 4632 vap = sarg.vap; 4633 vap->va_type = VREG; 4634 vap->va_mask |= AT_TYPE; 4635 4636 if ((vap->va_mask & AT_MODE) == 0) { 4637 vap->va_mask |= AT_MODE; 4638 vap->va_mode = (mode_t)0600; 4639 } 4640 4641 if (vap->va_mask & AT_SIZE) { 4642 4643 /* Disallow create with a non-zero size */ 4644 4645 if ((reqsize = sarg.vap->va_size) != 0) { 4646 kmem_free(nm, buflen); 4647 nfs4_ntov_table_free(&ntov, &sarg); 4648 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4649 return (NFS4ERR_INVAL); 4650 } 4651 setsize = TRUE; 4652 } 4653 break; 4654 4655 case EXCLUSIVE4: 4656 /* prohibit EXCL create of named attributes */ 4657 if (dvp->v_flag & V_XATTRDIR) { 4658 kmem_free(nm, buflen); 4659 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4660 return (NFS4ERR_INVAL); 4661 } 4662 4663 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE; 4664 cva.va_type = VREG; 4665 /* 4666 * Ensure no time overflows. Assumes underlying 4667 * filesystem supports at least 32 bits. 4668 * Truncate nsec to usec resolution to allow valid 4669 * compares even if the underlying filesystem truncates. 4670 */ 4671 mtime = (timespec32_t *)&args->createhow4_u.createverf; 4672 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX; 4673 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000; 4674 cva.va_mode = (mode_t)0; 4675 vap = &cva; 4676 break; 4677 4678 case EXCLUSIVE4_1: 4679 kmem_free(nm, buflen); 4680 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4681 return (NFS4ERR_INVAL); 4682 } 4683 4684 status = create_vnode(dvp, nm, vap, args->mode, mtime, 4685 cs->cr, &vp, &created); 4686 kmem_free(nm, buflen); 4687 4688 if (status != NFS4_OK) { 4689 if (ntov_table_init) 4690 nfs4_ntov_table_free(&ntov, &sarg); 4691 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4692 return (status); 4693 } 4694 4695 trunc = (setsize && !created); 4696 4697 if (args->mode != EXCLUSIVE4) { 4698 attrmap4 createmask = args->createhow4_u.createattrs.attrmask; 4699 4700 /* 4701 * True verification that object was created with correct 4702 * attrs is impossible. The attrs could have been changed 4703 * immediately after object creation. If attributes did 4704 * not verify, the only recourse for the server is to 4705 * destroy the object. Maybe if some attrs (like gid) 4706 * are set incorrectly, the object should be destroyed; 4707 * however, seems bad as a default policy. Do we really 4708 * want to destroy an object over one of the times not 4709 * verifying correctly? For these reasons, the server 4710 * currently sets bits in attrset for createattrs 4711 * that were set; however, no verification is done. 4712 * 4713 * vmask_to_nmask accounts for vattr bits set on create 4714 * [do_rfs4_set_attrs() only sets resp bits for 4715 * non-vattr/vfs bits.] 4716 * Mask off any bits we set by default so as not to return 4717 * more attrset bits than were requested in createattrs 4718 */ 4719 if (created) { 4720 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset, avers); 4721 ATTRMAP_MASK(*attrset, createmask); 4722 } else { 4723 /* 4724 * We did not create the vnode (we tried but it 4725 * already existed). In this case, the only createattr 4726 * that the spec allows the server to set is size, 4727 * and even then, it can only be set if it is 0. 4728 */ 4729 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4730 if (trunc) 4731 ATTR_SET(*attrset, SIZE); 4732 } 4733 } 4734 if (ntov_table_init) 4735 nfs4_ntov_table_free(&ntov, &sarg); 4736 4737 /* 4738 * Get the initial "after" sequence number, if it fails, 4739 * set to zero, time to before. 4740 */ 4741 iva.va_mask = AT_CTIME|AT_SEQ; 4742 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) { 4743 iva.va_seq = 0; 4744 iva.va_ctime = bva.va_ctime; 4745 } 4746 4747 /* 4748 * create_vnode attempts to create the file exclusive, 4749 * if it already exists the VOP_CREATE will fail and 4750 * may not increase va_seq. It is atomic if 4751 * we haven't changed the directory, but if it has changed 4752 * we don't know what changed it. 4753 */ 4754 if (!created) { 4755 if (bva.va_seq && iva.va_seq && 4756 bva.va_seq == iva.va_seq) 4757 cinfo->atomic = TRUE; 4758 else 4759 cinfo->atomic = FALSE; 4760 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime); 4761 } else { 4762 /* 4763 * The entry was created, we need to sync the 4764 * directory metadata. 4765 */ 4766 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL); 4767 4768 /* 4769 * Get "after" change value, if it fails, simply return the 4770 * before value. 4771 */ 4772 ava.va_mask = AT_CTIME|AT_SEQ; 4773 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 4774 ava.va_ctime = bva.va_ctime; 4775 ava.va_seq = 0; 4776 } 4777 4778 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 4779 4780 /* 4781 * The cinfo->atomic = TRUE only if we have 4782 * non-zero va_seq's, and it has incremented by exactly one 4783 * during the create_vnode and it didn't 4784 * change during the VOP_FSYNC. 4785 */ 4786 if (bva.va_seq && iva.va_seq && ava.va_seq && 4787 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq) 4788 cinfo->atomic = TRUE; 4789 else 4790 cinfo->atomic = FALSE; 4791 } 4792 4793 /* Check for mandatory locking and that the size gets set. */ 4794 cva.va_mask = AT_MODE; 4795 if (setsize) 4796 cva.va_mask |= AT_SIZE; 4797 4798 /* Assume the worst */ 4799 cs->mandlock = TRUE; 4800 4801 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) { 4802 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode); 4803 4804 /* 4805 * Truncate the file if necessary; this would be 4806 * the case for create over an existing file. 4807 */ 4808 4809 if (trunc) { 4810 int in_crit = 0; 4811 rfs4_file_t *fp; 4812 bool_t create = FALSE; 4813 4814 /* 4815 * We are writing over an existing file. 4816 * Check to see if we need to recall a delegation. 4817 */ 4818 rfs4_hold_deleg_policy(cs->instp); 4819 if ((fp = rfs4_findfile(cs->instp, vp, NULL, &create)) 4820 != NULL) { 4821 if (rfs4_check_delegated_byfp(cs->instp, 4822 FWRITE, fp, (reqsize == 0), FALSE, FALSE, 4823 &cs->cp->rc_clientid)) { 4824 4825 rfs4_file_rele(fp); 4826 rfs4_rele_deleg_policy(cs->instp); 4827 VN_RELE(vp); 4828 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4829 return (NFS4ERR_DELAY); 4830 } 4831 rfs4_file_rele(fp); 4832 } 4833 rfs4_rele_deleg_policy(cs->instp); 4834 4835 if (nbl_need_check(vp)) { 4836 in_crit = 1; 4837 4838 ASSERT(reqsize == 0); 4839 4840 nbl_start_crit(vp, RW_READER); 4841 if (nbl_conflict(vp, NBL_WRITE, 0, 4842 cva.va_size, 0, NULL)) { 4843 in_crit = 0; 4844 nbl_end_crit(vp); 4845 VN_RELE(vp); 4846 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4847 return (NFS4ERR_ACCESS); 4848 } 4849 } 4850 ct.cc_sysid = 0; 4851 ct.cc_pid = 0; 4852 ct.cc_caller_id = cs->instp->caller_id; 4853 4854 cva.va_mask = AT_SIZE; 4855 cva.va_size = reqsize; 4856 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct); 4857 if (in_crit) 4858 nbl_end_crit(vp); 4859 } 4860 } 4861 4862 error = mknfs41_fh(&cs->fh, vp, cs->exi); 4863 /* 4864 * Force modified data and metadata out to stable storage. 4865 */ 4866 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL); 4867 4868 if (error) { 4869 VN_RELE(vp); 4870 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4871 return (puterrno4(error)); 4872 } 4873 4874 /* if parent dir is attrdir, set namedattr fh flag */ 4875 if (dvp->v_flag & V_XATTRDIR) 4876 FH41_SET_FLAG((nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val, 4877 FH41_NAMEDATTR); 4878 4879 if (cs->vp) 4880 VN_RELE(cs->vp); 4881 4882 cs->vp = vp; 4883 4884 /* 4885 * if we did not create the file, we will need to check 4886 * the access bits on the file 4887 */ 4888 4889 if (!created) { 4890 if (setsize) 4891 args->share_access |= OPEN4_SHARE_ACCESS_WRITE; 4892 status = check_open_access(args->share_access, cs, req); 4893 if (status != NFS4_OK) 4894 *attrset = NFS4_EMPTY_ATTRMAP(avers); 4895 } else { 4896 status = mds_createfile_get_layout(req, vp, cs, &ct, plo); 4897 4898 /* 4899 * Allow mds_createfile_get_layout() to be verbose 4900 * in what it presents as a status, but be aware 4901 * that it is permissible to not generate a 4902 * layout. 4903 */ 4904 if (status == NFS4ERR_LAYOUTUNAVAILABLE) { 4905 status = NFS4_OK; 4906 } 4907 } 4908 4909 return (status); 4910 } 4911 4912 /* 4913 * 1) CB RACE <kill stored rs record> [done] 4914 * 2) slot reuse <kill stored rs record> [done] 4915 * 3) CB_RECALL <(typical/normal case) use new sessid in deleg_state 4916 * to find session originally granted the delegation to 4917 * issue recall over _that_ session's back channel> 4918 * XXX - <<< check spec >>> 4919 */ 4920 void 4921 rfs41_rs_record(struct compound_state *cs, stateid_type_t type, void *p) 4922 { 4923 rfs4_deleg_state_t *dsp; 4924 slot_ent_t *slotent; 4925 4926 #ifdef DEBUG_VERBOSE 4927 /* 4928 * XXX - Do not change this to a static D probe; 4929 * this is not intended for production !!! 4930 */ 4931 ulong_t offset; 4932 char *who; 4933 who = modgetsymname((uintptr_t)caller(), &offset); 4934 #endif /* DEBUG_VERBOSE */ 4935 4936 switch (type) { 4937 case DELEGID: /* sessid/slot/seqid + rsid */ 4938 ASSERT(cs != NULL && cs->sp != NULL); 4939 4940 dsp = (rfs4_deleg_state_t *)p; 4941 ASSERT(dsp != NULL); 4942 #ifdef DEBUG_VERBOSE 4943 cmn_err(CE_NOTE, "rfs41_rs_record: (%s, dsp = 0x%p)", who, dsp); 4944 #endif /* DEBUG_VERBOSE */ 4945 4946 /* delegation state id stored in rfs4_deleg_state_t */ 4947 bcopy(cs->sp->sn_sessid, dsp->rds_rs.sessid, 4948 sizeof (sessionid4)); 4949 dsp->rds_rs.seqid = cs->seqid; 4950 dsp->rds_rs.slotno = cs->slotno; 4951 rfs41_deleg_rs_hold(dsp); 4952 4953 /* add it to slrc slot to track slot-reuse case */ 4954 slotent = slrc_slot_get(cs->sp->sn_replay, cs->slotno); 4955 ASSERT(slotent != NULL); 4956 ASSERT(slotent->se_p == NULL); 4957 mutex_enter(&slotent->se_lock); 4958 slotent->se_p = (rfs4_deleg_state_t *)dsp; 4959 mutex_exit(&slotent->se_lock); 4960 4961 rfs4_dbe_hold(dsp->rds_dbe); /* added ref to deleg_state */ 4962 break; 4963 4964 case LAYOUTID: 4965 /* 4966 * Layout stateid race detection will be done 4967 * using the stateid's embedded seqid field. 4968 */ 4969 /* FALLTHROUGH */ 4970 default: 4971 break; 4972 } 4973 } 4974 4975 void 4976 rfs41_rs_erase(void *p) 4977 { 4978 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)p; 4979 #ifdef DEBUG_VERBOSE 4980 /* 4981 * XXX - Do not change this to a static D probe; 4982 * this is not intended for production !!! 4983 */ 4984 ulong_t offset; 4985 char *who; 4986 who = modgetsymname((uintptr_t)caller(), &offset); 4987 cmn_err(CE_NOTE, "rfs41_rs_erase: (%s, dsp = 0x%p)", who, dsp); 4988 #endif /* DEBUG_VERBOSE */ 4989 4990 ASSERT(dsp != NULL); 4991 if (dsp->rds_rs.refcnt > 0) { 4992 rfs41_deleg_rs_rele(dsp); 4993 rfs4_deleg_state_rele(dsp); 4994 } 4995 } 4996 4997 #ifdef DEBUG 4998 /* 4999 * XXX - This is a handy way to force the server to "wait" before 5000 * granting a delegation to the requesting client (thereby 5001 * forcing the CB_RACE condition). rsec == # of secs to wait. 5002 */ 5003 int rsec = 0; 5004 #endif 5005 5006 /*ARGSUSED*/ 5007 static void 5008 mds_do_open(struct compound_state *cs, struct svc_req *req, 5009 rfs4_openowner_t *oo, delegreq_t deleg, uint32_t access, uint32_t deny, 5010 OPEN4res *resp, int deleg_cur, mds_layout_t *plo) 5011 { 5012 rfs4_state_t *sp; 5013 rfs4_file_t *fp; 5014 bool_t screate = TRUE; 5015 bool_t fcreate = TRUE; 5016 uint32_t amodes; 5017 uint32_t dmodes; 5018 rfs4_deleg_state_t *dsp; 5019 sysid_t sysid; 5020 nfsstat4 status; 5021 caller_context_t ct; 5022 int fflags = 0; 5023 int recall = 0; 5024 int err; 5025 int first_open; 5026 5027 /* get the file struct and hold a lock on it during initial open */ 5028 fp = rfs4_findfile_withlock(cs->instp, cs->vp, &cs->fh, &fcreate); 5029 if (fp == NULL) { 5030 DTRACE_PROBE(nfss__e__no_file); 5031 resp->status = NFS4ERR_SERVERFAULT; 5032 return; 5033 } 5034 5035 sp = rfs4_findstate_by_owner_file(cs, oo, fp, &screate); 5036 if (sp == NULL) { 5037 DTRACE_PROBE(nfss__e__no_state); 5038 resp->status = NFS4ERR_RESOURCE; 5039 /* No need to keep any reference */ 5040 rfs4_file_rele_withunlock(fp); 5041 return; 5042 } 5043 5044 /* try to get the sysid before continuing */ 5045 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) { 5046 resp->status = status; 5047 rfs4_file_rele(fp); 5048 /* Not a fully formed open; "close" it */ 5049 if (screate == TRUE) 5050 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 5051 rfs4_state_rele(sp); 5052 return; 5053 } 5054 5055 /* 5056 * Assign the layout if there is one 5057 * Note that this means the file was just created. 5058 */ 5059 if (plo) { 5060 ASSERT(fp->rf_mlo == NULL); 5061 if (fp->rf_mlo) { 5062 rfs4_dbe_rele(fp->rf_mlo->mlo_dbe); 5063 } 5064 5065 fp->rf_mlo = plo; 5066 } 5067 5068 /* Calculate the fflags for this OPEN */ 5069 if (access & OPEN4_SHARE_ACCESS_READ) 5070 fflags |= FREAD; 5071 if (access & OPEN4_SHARE_ACCESS_WRITE) 5072 fflags |= FWRITE; 5073 5074 rfs4_dbe_lock(sp->rs_dbe); 5075 5076 /* 5077 * Calculate the new deny and access mode that this open is adding to 5078 * the file for this open owner; 5079 */ 5080 dmodes = (deny & ~sp->rs_share_deny); 5081 amodes = (access & ~sp->rs_share_access); 5082 5083 first_open = (sp->rs_share_access & OPEN4_SHARE_ACCESS_BOTH) == 0; 5084 5085 /* 5086 * Check to see the client has already sent an open for this 5087 * open owner on this file with the same share/deny modes. 5088 * If so, we don't need to check for a conflict and we don't 5089 * need to add another shrlock. If not, then we need to 5090 * check for conflicts in deny and access before checking for 5091 * conflicts in delegation. We don't want to recall a 5092 * delegation based on an open that will eventually fail based 5093 * on shares modes. 5094 */ 5095 5096 if (dmodes || amodes) { 5097 if ((err = rfs4_share(sp, access, deny)) != 0) { 5098 rfs4_dbe_unlock(sp->rs_dbe); 5099 resp->status = err; 5100 5101 rfs4_file_rele(fp); 5102 /* Not a fully formed open; "close" it */ 5103 if (screate == TRUE) 5104 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 5105 rfs4_state_rele(sp); 5106 return; 5107 } 5108 } 5109 5110 rfs4_dbe_lock(fp->rf_dbe); 5111 5112 /* 5113 * Check to see if this file is delegated and if so, if a 5114 * recall needs to be done. 5115 * This only checke the delegations for this instance. If another 5116 * instance has a delegation for this file, then the conflict 5117 * detection will be done in the monitor on OPEN. We just need to 5118 * check if we have a delegation and if the calling client is the 5119 * owner. The monitor doesn't have enough info to determine if the 5120 * caller is the owner of the delegation or not. 5121 */ 5122 if (rfs4_check_recall(sp, access)) { 5123 rfs4_dbe_unlock(fp->rf_dbe); 5124 rfs4_dbe_unlock(sp->rs_dbe); 5125 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client); 5126 delay(NFS4_DELEGATION_CONFLICT_DELAY); 5127 rfs4_dbe_lock(sp->rs_dbe); 5128 5129 /* if state closed while lock was dropped */ 5130 if (sp->rs_closed) { 5131 if (dmodes || amodes) 5132 (void) rfs4_unshare(sp); 5133 rfs4_dbe_unlock(sp->rs_dbe); 5134 rfs4_file_rele(fp); 5135 /* Not a fully formed open; "close" it */ 5136 if (screate == TRUE) 5137 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 5138 rfs4_state_rele(sp); 5139 resp->status = NFS4ERR_OLD_STATEID; 5140 return; 5141 } 5142 5143 rfs4_dbe_lock(fp->rf_dbe); 5144 /* Let's see if the delegation was returned */ 5145 if (rfs4_check_recall(sp, access)) { 5146 rfs4_dbe_unlock(fp->rf_dbe); 5147 if (dmodes || amodes) 5148 (void) rfs4_unshare(sp); 5149 rfs4_dbe_unlock(sp->rs_dbe); 5150 rfs4_file_rele(fp); 5151 rfs4_update_lease(sp->rs_owner->ro_client); 5152 5153 /* Not a fully formed open; "close" it */ 5154 if (screate == TRUE) 5155 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 5156 rfs4_state_rele(sp); 5157 resp->status = NFS4ERR_DELAY; 5158 return; 5159 } 5160 } 5161 5162 /* 5163 * the share check passed and any delegation conflict has been 5164 * taken care of, now call vop_open. 5165 * if this is the first open then call vop_open with fflags. 5166 * if not, call vn_open_upgrade with just the upgrade flags. 5167 * 5168 * if the file has been opened already, it will have the current 5169 * access mode in the state struct. if it has no share access, then 5170 * this is a new open. 5171 * 5172 * However, if this is open with CLAIM_DELEGATE_CUR, then don't 5173 * call VOP_OPEN(), just do the open upgrade. 5174 */ 5175 if (first_open && !deleg_cur) { 5176 ct.cc_sysid = sysid; 5177 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe); 5178 ct.cc_caller_id = cs->instp->caller_id; 5179 ct.cc_flags = CC_DONTBLOCK; 5180 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct); 5181 if (err) { 5182 rfs4_dbe_unlock(fp->rf_dbe); 5183 if (dmodes || amodes) 5184 (void) rfs4_unshare(sp); 5185 rfs4_dbe_unlock(sp->rs_dbe); 5186 rfs4_file_rele(fp); 5187 5188 /* Not a fully formed open; "close" it */ 5189 if (screate == TRUE) 5190 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 5191 rfs4_state_rele(sp); 5192 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 5193 resp->status = NFS4ERR_DELAY; 5194 else 5195 resp->status = NFS4ERR_SERVERFAULT; 5196 return; 5197 } 5198 } else { /* open upgrade */ 5199 /* 5200 * calculate the fflags for the new mode that is being added 5201 * by this upgrade. 5202 */ 5203 fflags = 0; 5204 if (amodes & OPEN4_SHARE_ACCESS_READ) 5205 fflags |= FREAD; 5206 if (amodes & OPEN4_SHARE_ACCESS_WRITE) 5207 fflags |= FWRITE; 5208 vn_open_upgrade(cs->vp, fflags); 5209 } 5210 sp->rs_opened = TRUE; 5211 5212 if (dmodes & OPEN4_SHARE_DENY_READ) 5213 fp->rf_deny_read++; 5214 if (dmodes & OPEN4_SHARE_DENY_WRITE) 5215 fp->rf_deny_write++; 5216 fp->rf_share_deny |= deny; 5217 5218 if (amodes & OPEN4_SHARE_ACCESS_READ) 5219 fp->rf_access_read++; 5220 if (amodes & OPEN4_SHARE_ACCESS_WRITE) 5221 fp->rf_access_write++; 5222 fp->rf_share_access |= access; 5223 5224 /* 5225 * Check for delegation here. if the deleg argument is not 5226 * DELEG_ANY, then this is a reclaim from a client and 5227 * we must honor the delegation requested. If necessary we can 5228 * set the recall flag. 5229 */ 5230 dsp = rfs4_grant_delegation(cs, deleg, sp, &recall); 5231 5232 cs->deleg = (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE); 5233 5234 next_stateid(&sp->rs_stateid); 5235 5236 resp->stateid = sp->rs_stateid.stateid; 5237 5238 rfs4_dbe_unlock(fp->rf_dbe); 5239 rfs4_dbe_unlock(sp->rs_dbe); 5240 5241 if (dsp) { 5242 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall); 5243 rfs41_rs_record(cs, DELEGID, dsp); 5244 rfs4_deleg_state_rele(dsp); 5245 #ifdef DEBUG 5246 if (rsec) { 5247 /* add delay here to force CB_RACE; rick */ 5248 delay(SEC_TO_TICK(rsec)); 5249 } 5250 #endif 5251 } 5252 5253 rfs4_file_rele(fp); 5254 rfs4_state_rele(sp); 5255 5256 resp->status = NFS4_OK; 5257 } 5258 5259 nfsstat4 5260 mds_lookupfile(component4 *component, struct svc_req *req, 5261 struct compound_state *cs, uint32_t access, 5262 change_info4 *cinfo) 5263 { 5264 nfsstat4 status; 5265 char *nm; 5266 uint32_t len; 5267 vnode_t *dvp = cs->vp; 5268 vattr_t bva, ava, fva; 5269 int error; 5270 5271 if (dvp == NULL) { 5272 return (NFS4ERR_NOFILEHANDLE); 5273 } 5274 5275 if (dvp->v_type != VDIR) { 5276 return (NFS4ERR_NOTDIR); 5277 } 5278 5279 if (!utf8_dir_verify(component)) 5280 return (NFS4ERR_INVAL); 5281 5282 nm = utf8_to_fn(component, &len, NULL); 5283 if (nm == NULL) { 5284 return (NFS4ERR_INVAL); 5285 } 5286 5287 if (len > MAXNAMELEN) { 5288 kmem_free(nm, len); 5289 return (NFS4ERR_NAMETOOLONG); 5290 } 5291 5292 /* Get "before" change value */ 5293 bva.va_mask = AT_CTIME|AT_SEQ; 5294 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL); 5295 if (error) 5296 return (puterrno4(error)); 5297 5298 /* mds_lookup may VN_RELE directory */ 5299 VN_HOLD(dvp); 5300 5301 status = mds_do_lookup(nm, len, req, cs); 5302 5303 kmem_free(nm, len); 5304 5305 if (status != NFS4_OK) { 5306 VN_RELE(dvp); 5307 return (status); 5308 } 5309 5310 /* 5311 * Get "after" change value, if it fails, simply return the 5312 * before value. 5313 */ 5314 ava.va_mask = AT_CTIME|AT_SEQ; 5315 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) { 5316 ava.va_ctime = bva.va_ctime; 5317 ava.va_seq = 0; 5318 } 5319 VN_RELE(dvp); 5320 5321 /* 5322 * Validate the file is a file 5323 */ 5324 fva.va_mask = AT_TYPE|AT_MODE; 5325 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL); 5326 if (error) 5327 return (puterrno4(error)); 5328 5329 if (fva.va_type != VREG) { 5330 if (fva.va_type == VDIR) 5331 return (NFS4ERR_ISDIR); 5332 if (fva.va_type == VLNK) 5333 return (NFS4ERR_SYMLINK); 5334 return (NFS4ERR_INVAL); 5335 } 5336 5337 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime); 5338 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime); 5339 5340 /* 5341 * It is undefined if VOP_LOOKUP will change va_seq, so 5342 * cinfo.atomic = TRUE only if we have 5343 * non-zero va_seq's, and they have not changed. 5344 */ 5345 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq) 5346 cinfo->atomic = TRUE; 5347 else 5348 cinfo->atomic = FALSE; 5349 5350 /* Check for mandatory locking */ 5351 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode); 5352 return (check_open_access(access, cs, req)); 5353 } 5354 5355 /*ARGSUSED*/ 5356 static void 5357 mds_do_opennull(struct compound_state *cs, 5358 struct svc_req *req, 5359 OPEN4args *args, 5360 rfs4_openowner_t *oo, 5361 OPEN4res *resp) 5362 { 5363 change_info4 *cinfo = &resp->cinfo; 5364 attrmap4 *attrset = &resp->attrset; 5365 5366 mds_layout_t *plo = NULL; 5367 5368 if (args->opentype == OPEN4_NOCREATE) 5369 resp->status = mds_lookupfile(&args->open_claim4_u.file, 5370 req, cs, (args->share_access & 0xff), cinfo); 5371 else { 5372 /* inhibit delegation grants during exclusive create */ 5373 5374 if (args->mode == EXCLUSIVE4) 5375 rfs4_disable_delegation(cs->instp); 5376 5377 /* 5378 * Create the file and get the layout. 5379 */ 5380 resp->status = mds_createfile(args, req, cs, cinfo, 5381 attrset, &plo); 5382 } 5383 5384 if (resp->status == NFS4_OK) { 5385 5386 /* cs->vp and cs->fh now references the desired file */ 5387 mds_do_open(cs, req, oo, do_41_deleg_hack(args->share_access), 5388 (args->share_access & 0xff), args->share_deny, resp, 5389 0, plo); 5390 5391 /* 5392 * If rfs4_createfile set attrset, we must 5393 * clear this attrset before the response is copied. 5394 */ 5395 if (resp->status != NFS4_OK) 5396 resp->attrset = 5397 NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 5398 } else 5399 *cs->statusp = resp->status; 5400 5401 if (args->mode == EXCLUSIVE4) 5402 rfs4_enable_delegation(cs->instp); 5403 } 5404 5405 /*ARGSUSED*/ 5406 static void 5407 mds_do_openprev(struct compound_state *cs, struct svc_req *req, 5408 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 5409 { 5410 change_info4 *cinfo = &resp->cinfo; 5411 vattr_t va; 5412 vtype_t v_type = cs->vp->v_type; 5413 int error = 0; 5414 caller_context_t ct; 5415 5416 /* Verify that we have a regular file */ 5417 if (v_type != VREG) { 5418 if (v_type == VDIR) 5419 resp->status = NFS4ERR_ISDIR; 5420 else if (v_type == VLNK) 5421 resp->status = NFS4ERR_SYMLINK; 5422 else 5423 resp->status = NFS4ERR_INVAL; 5424 return; 5425 } 5426 5427 ct.cc_sysid = 0; 5428 ct.cc_pid = 0; 5429 ct.cc_caller_id = cs->instp->caller_id; 5430 ct.cc_flags = CC_DONTBLOCK; 5431 5432 va.va_mask = AT_MODE|AT_UID; 5433 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, &ct); 5434 if (error) { 5435 resp->status = puterrno4(error); 5436 return; 5437 } 5438 5439 cs->mandlock = MANDLOCK(cs->vp, va.va_mode); 5440 5441 /* 5442 * Check if we have access to the file, Note the the file 5443 * could have originally been open UNCHECKED or GUARDED 5444 * with mode bits that will now fail, but there is nothing 5445 * we can really do about that except in the case that the 5446 * owner of the file is the one requesting the open. 5447 */ 5448 if (crgetuid(cs->cr) != va.va_uid) { 5449 resp->status = check_open_access((args->share_access & 0xff), 5450 cs, req); 5451 if (resp->status != NFS4_OK) { 5452 return; 5453 } 5454 } 5455 5456 /* 5457 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero 5458 */ 5459 cinfo->before = 0; 5460 cinfo->after = 0; 5461 cinfo->atomic = FALSE; 5462 5463 mds_do_open(cs, req, oo, 5464 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type), 5465 (args->share_access && 0xff), args->share_deny, resp, 0, NULL); 5466 } 5467 5468 static void 5469 mds_do_opendelcur(struct compound_state *cs, struct svc_req *req, 5470 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 5471 { 5472 int error; 5473 nfsstat4 status; 5474 stateid4 stateid = 5475 args->open_claim4_u.delegate_cur_info.delegate_stateid; 5476 rfs4_deleg_state_t *dsp; 5477 5478 /* 5479 * Find the state info from the stateid and confirm that the 5480 * file is delegated. If the state openowner is the same as 5481 * the supplied openowner we're done. If not, get the file 5482 * info from the found state info. Use that file info to 5483 * create the state for this lock owner. Note solaris doen't 5484 * really need the pathname to find the file. We may want to 5485 * lookup the pathname and make sure that the vp exist and 5486 * matches the vp in the file structure. However it is 5487 * possible that the pathname nolonger exists (local process 5488 * unlinks the file), so this may not be that useful. 5489 */ 5490 5491 status = rfs4_get_deleg_state(cs, &stateid, &dsp); 5492 if (status != NFS4_OK) { 5493 resp->status = status; 5494 return; 5495 } 5496 5497 ASSERT(dsp->rds_finfo->rf_dinfo->rd_dtype != OPEN_DELEGATE_NONE); 5498 5499 /* 5500 * New lock owner, create state. Since this was probably called 5501 * in response to a CB_RECALL we set deleg to DELEG_NONE 5502 */ 5503 5504 ASSERT(cs->vp != NULL); 5505 VN_RELE(cs->vp); 5506 VN_HOLD(dsp->rds_finfo->rf_vp); 5507 cs->vp = dsp->rds_finfo->rf_vp; 5508 5509 if (error = mknfs41_fh(&cs->fh, cs->vp, cs->exi)) { 5510 rfs4_deleg_state_rele(dsp); 5511 *cs->statusp = resp->status = puterrno4(error); 5512 return; 5513 } 5514 5515 /* Mark progress for delegation returns */ 5516 dsp->rds_finfo->rf_dinfo->rd_time_lastwrite = gethrestime_sec(); 5517 rfs4_deleg_state_rele(dsp); 5518 mds_do_open(cs, req, oo, DELEG_NONE, 5519 (args->share_access & 0xff), 5520 args->share_deny, resp, 1, NULL); 5521 } 5522 5523 /*ARGSUSED*/ 5524 static void 5525 mds_do_opendelprev(struct compound_state *cs, struct svc_req *req, 5526 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp) 5527 { 5528 /* 5529 * Lookup the pathname, it must already exist since this file 5530 * was delegated. 5531 * 5532 * Find the file and state info for this vp and open owner pair. 5533 * check that they are in fact delegated. 5534 * check that the state access and deny modes are the same. 5535 * 5536 * Return the delgation possibly seting the recall flag. 5537 */ 5538 rfs4_file_t *fp; 5539 rfs4_state_t *sp; 5540 bool_t create = FALSE; 5541 bool_t dcreate = FALSE; 5542 rfs4_deleg_state_t *dsp; 5543 nfsace4 *ace; 5544 5545 5546 /* Note we ignore oflags */ 5547 resp->status = mds_lookupfile(&args->open_claim4_u.file_delegate_prev, 5548 req, cs, (args->share_access & 0xff), &resp->cinfo); 5549 if (resp->status != NFS4_OK) { 5550 return; 5551 } 5552 5553 /* get the file struct and hold a lock on it during initial open */ 5554 fp = rfs4_findfile_withlock(cs->instp, cs->vp, NULL, &create); 5555 if (fp == NULL) { 5556 DTRACE_PROBE(nfss__e__no_file); 5557 resp->status = NFS4ERR_SERVERFAULT; 5558 return; 5559 } 5560 5561 sp = rfs4_findstate_by_owner_file(cs, oo, fp, &create); 5562 if (sp == NULL) { 5563 DTRACE_PROBE(nfss__e__no_state); 5564 resp->status = NFS4ERR_SERVERFAULT; 5565 rfs4_file_rele_withunlock(fp); 5566 return; 5567 } 5568 5569 rfs4_dbe_lock(sp->rs_dbe); 5570 rfs4_dbe_lock(fp->rf_dbe); 5571 if ((args->share_access & 0xff) != sp->rs_share_access || 5572 args->share_deny != sp->rs_share_deny || 5573 sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) { 5574 DTRACE_PROBE2(nfss__e__state_mixup, rfs4_state_t *, sp, 5575 OPEN4args *, args); 5576 rfs4_dbe_unlock(fp->rf_dbe); 5577 rfs4_dbe_unlock(sp->rs_dbe); 5578 rfs4_file_rele(fp); 5579 rfs4_state_rele(sp); 5580 resp->status = NFS4ERR_SERVERFAULT; 5581 return; 5582 } 5583 rfs4_dbe_unlock(fp->rf_dbe); 5584 rfs4_dbe_unlock(sp->rs_dbe); 5585 5586 dsp = rfs4_finddeleg(cs, sp, &dcreate); 5587 if (dsp == NULL) { 5588 DTRACE_PROBE(nfss__e__no_deleg); 5589 rfs4_state_rele(sp); 5590 rfs4_file_rele(fp); 5591 resp->status = NFS4ERR_SERVERFAULT; 5592 return; 5593 } 5594 5595 next_stateid(&sp->rs_stateid); 5596 5597 resp->stateid = sp->rs_stateid.stateid; 5598 5599 resp->delegation.delegation_type = dsp->rds_dtype; 5600 5601 if (dsp->rds_dtype == OPEN_DELEGATE_READ) { 5602 open_read_delegation4 *rv = 5603 &resp->delegation.open_delegation4_u.read; 5604 5605 rv->stateid = dsp->rds_delegid.stateid; 5606 rv->recall = FALSE; /* no policy in place to set to TRUE */ 5607 ace = &rv->permissions; 5608 } else { 5609 open_write_delegation4 *rv = 5610 &resp->delegation.open_delegation4_u.write; 5611 5612 rv->stateid = dsp->rds_delegid.stateid; 5613 rv->recall = FALSE; /* no policy in place to set to TRUE */ 5614 ace = &rv->permissions; 5615 rv->space_limit.limitby = NFS_LIMIT_SIZE; 5616 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX; 5617 } 5618 5619 /* XXX For now */ 5620 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE; 5621 ace->flag = 0; 5622 ace->access_mask = 0; 5623 ace->who.utf8string_len = 0; 5624 ace->who.utf8string_val = 0; 5625 5626 rfs4_deleg_state_rele(dsp); 5627 rfs4_state_rele(sp); 5628 rfs4_file_rele(fp); 5629 } 5630 5631 static void 5632 mds_op_open(nfs_argop4 *argop, nfs_resop4 *resop, 5633 struct svc_req *req, compound_state_t *cs) 5634 { 5635 OPEN4args *args = &argop->nfs_argop4_u.opopen; 5636 OPEN4res *resp = &resop->nfs_resop4_u.opopen; 5637 open_owner4 *owner = &args->owner; 5638 open_claim_type4 claim = args->claim; 5639 rfs4_client_t *cp; 5640 rfs4_openowner_t *oo; 5641 bool_t create; 5642 int can_reclaim; 5643 int share_access; 5644 5645 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs, 5646 OPEN4args *, args); 5647 5648 if (cs->vp == NULL) { 5649 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5650 goto final; 5651 } 5652 5653 cp = cs->cp; 5654 owner->clientid = cp->rc_clientid; 5655 can_reclaim = cp->rc_can_reclaim; 5656 5657 retry: 5658 create = TRUE; 5659 oo = mds_findopenowner(cs->instp, owner, &create); 5660 if (oo == NULL) { 5661 /* XXX: this seems a little fishy... */ 5662 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID; 5663 goto final; 5664 } 5665 5666 /* Need to serialize access to the stateid space */ 5667 rfs4_sw_enter(&oo->ro_sw); 5668 5669 /* Grace only applies to regular-type OPENs */ 5670 if (rfs4_clnt_in_grace(cp) && 5671 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) { 5672 *cs->statusp = resp->status = NFS4ERR_GRACE; 5673 goto out; 5674 } 5675 5676 /* 5677 * If previous state at the server existed then can_reclaim 5678 * will be set. If not reply NFS4ERR_NO_GRACE to the 5679 * client. 5680 */ 5681 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) { 5682 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 5683 goto out; 5684 } 5685 5686 /* 5687 * Reject the open if the client has missed the grace period 5688 */ 5689 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) { 5690 *cs->statusp = resp->status = NFS4ERR_NO_GRACE; 5691 goto out; 5692 } 5693 5694 /* 5695 * OPEN_CONFIRM is mandatory not to impl in 4.1. 5696 */ 5697 oo->ro_need_confirm = FALSE; 5698 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX; 5699 5700 /* 5701 * If there is an unshared filesystem mounted on this vnode, 5702 * do not allow to open/create in this directory. 5703 */ 5704 if (vn_ismntpt(cs->vp)) { 5705 *cs->statusp = resp->status = NFS4ERR_ACCESS; 5706 goto out; 5707 } 5708 5709 share_access = (args->share_access && 0xff); 5710 5711 /* 5712 * access must READ, WRITE, or BOTH. No access is invalid. 5713 * deny can be READ, WRITE, BOTH, or NONE. 5714 * bits not defined for access/deny are invalid. 5715 */ 5716 if (! (share_access & OPEN4_SHARE_ACCESS_BOTH) || 5717 (share_access & ~OPEN4_SHARE_ACCESS_BOTH) || 5718 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) { 5719 *cs->statusp = resp->status = NFS4ERR_INVAL; 5720 goto out; 5721 } 5722 5723 /* 5724 * make sure attrset is zero before response is built. 5725 */ 5726 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 5727 5728 switch (claim) { 5729 case CLAIM_NULL: 5730 mds_do_opennull(cs, req, args, oo, resp); 5731 break; 5732 case CLAIM_PREVIOUS: 5733 mds_do_openprev(cs, req, args, oo, resp); 5734 break; 5735 case CLAIM_DELEGATE_CUR: 5736 mds_do_opendelcur(cs, req, args, oo, resp); 5737 break; 5738 case CLAIM_DELEGATE_PREV: 5739 mds_do_opendelprev(cs, req, args, oo, resp); 5740 break; 5741 /* OTHER CLAIM TYPES !!! */ 5742 default: 5743 resp->status = NFS4ERR_INVAL; 5744 break; 5745 } 5746 5747 out: 5748 switch (resp->status) { 5749 case NFS4ERR_BADXDR: 5750 case NFS4ERR_BAD_SEQID: 5751 case NFS4ERR_BAD_STATEID: 5752 case NFS4ERR_NOFILEHANDLE: 5753 case NFS4ERR_RESOURCE: 5754 case NFS4ERR_STALE_CLIENTID: 5755 case NFS4ERR_STALE_STATEID: 5756 /* 5757 * The protocol states that if any of these errors are 5758 * being returned, the sequence id should not be 5759 * incremented. Any other return requires an 5760 * increment. 5761 */ 5762 break; 5763 } 5764 *cs->statusp = resp->status; 5765 rfs4_sw_exit(&oo->ro_sw); 5766 rfs4_openowner_rele(oo); 5767 5768 final: 5769 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs, 5770 OPEN4res *, resp); 5771 } 5772 5773 /*ARGSUSED*/ 5774 static void 5775 mds_free_reply(nfs_resop4 *resop, compound_state_t *cs) 5776 { 5777 /* Common function for NFSv4.0 and NFSv4.1 */ 5778 rfs4_free_reply(resop); 5779 } 5780 5781 /*ARGSUSED*/ 5782 void 5783 mds_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop, 5784 struct svc_req *req, compound_state_t *cs) 5785 { 5786 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade; 5787 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade; 5788 uint32_t access = (args->share_access & 0xff); 5789 uint32_t deny = args->share_deny; 5790 nfsstat4 status; 5791 rfs4_state_t *sp; 5792 rfs4_file_t *fp; 5793 int fflags = 0; 5794 int rc; 5795 5796 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs, 5797 OPEN_DOWNGRADE4args *, args); 5798 5799 if (cs->vp == NULL) { 5800 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 5801 goto final; 5802 } 5803 5804 if (mds_strict_seqid && args->seqid) { 5805 *cs->statusp = resp->status = NFS4ERR_INVAL; 5806 goto final; 5807 } 5808 5809 status = rfs4_get_state(cs, &args->open_stateid, &sp, RFS4_DBS_VALID); 5810 if (status != NFS4_OK) { 5811 *cs->statusp = resp->status = status; 5812 goto final; 5813 } 5814 5815 /* Ensure specified filehandle matches */ 5816 if (cs->vp != sp->rs_finfo->rf_vp) { 5817 rfs4_state_rele(sp); 5818 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 5819 goto final; 5820 } 5821 5822 /* hold off other access to open_owner while we tinker */ 5823 rfs4_sw_enter(&sp->rs_owner->ro_sw); 5824 5825 rc = mds_check_stateid_seqid(sp, &args->open_stateid); 5826 switch (rc) { 5827 case NFS4_CHECK_STATEID_OKAY: 5828 break; 5829 case NFS4_CHECK_STATEID_OLD: 5830 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 5831 goto end; 5832 case NFS4_CHECK_STATEID_BAD: 5833 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 5834 goto end; 5835 case NFS4_CHECK_STATEID_EXPIRED: 5836 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 5837 goto end; 5838 case NFS4_CHECK_STATEID_CLOSED: 5839 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 5840 goto end; 5841 case NFS4_CHECK_STATEID_UNCONFIRMED: 5842 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 5843 goto end; 5844 case NFS4_CHECK_STATEID_REPLAY: 5845 ASSERT(0); 5846 break; 5847 default: 5848 ASSERT(FALSE); 5849 break; 5850 } 5851 5852 rfs4_dbe_lock(sp->rs_dbe); 5853 /* 5854 * Check that the new access modes and deny modes are valid. 5855 * Check that no invalid bits are set. 5856 */ 5857 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) || 5858 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) { 5859 *cs->statusp = resp->status = NFS4ERR_INVAL; 5860 rfs4_dbe_unlock(sp->rs_dbe); 5861 goto end; 5862 } 5863 5864 /* 5865 * The new modes must be a subset of the current modes and 5866 * the access must specify at least one mode. To test that 5867 * the new mode is a subset of the current modes we bitwise 5868 * AND them together and check that the result equals the new 5869 * mode. For example: 5870 * New mode, access == R and current mode, sp->share_access == RW 5871 * access & sp->share_access == R == access, so the new access mode 5872 * is valid. Consider access == RW, sp->share_access = R 5873 * access & sp->share_access == R != access, so the new access mode 5874 * is invalid. 5875 */ 5876 if ((access & sp->rs_share_access) != access || 5877 (deny & sp->rs_share_deny) != deny || 5878 (access & 5879 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) { 5880 *cs->statusp = resp->status = NFS4ERR_INVAL; 5881 rfs4_dbe_unlock(sp->rs_dbe); 5882 goto end; 5883 } 5884 5885 /* 5886 * Release any share locks associated with this stateID. 5887 * Strictly speaking, this violates the spec because the 5888 * spec effectively requires that open downgrade be atomic. 5889 * At present, fs_shrlock does not have this capability. 5890 */ 5891 rfs4_unshare(sp); 5892 5893 fp = sp->rs_finfo; 5894 rfs4_dbe_lock(fp->rf_dbe); 5895 5896 /* 5897 * If the current mode has deny read and the new mode 5898 * does not, decrement the number of deny read mode bits 5899 * and if it goes to zero turn off the deny read bit 5900 * on the file. 5901 */ 5902 if ((sp->rs_share_deny & OPEN4_SHARE_DENY_READ) && 5903 (deny & OPEN4_SHARE_DENY_READ) == 0) { 5904 fp->rf_deny_read--; 5905 if (fp->rf_deny_read == 0) 5906 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ; 5907 } 5908 5909 /* 5910 * If the current mode has deny write and the new mode 5911 * does not, decrement the number of deny write mode bits 5912 * and if it goes to zero turn off the deny write bit 5913 * on the file. 5914 */ 5915 if ((sp->rs_share_deny & OPEN4_SHARE_DENY_WRITE) && 5916 (deny & OPEN4_SHARE_DENY_WRITE) == 0) { 5917 fp->rf_deny_write--; 5918 if (fp->rf_deny_write == 0) 5919 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE; 5920 } 5921 5922 /* 5923 * If the current mode has access read and the new mode 5924 * does not, decrement the number of access read mode bits 5925 * and if it goes to zero turn off the access read bit 5926 * on the file. set fflags to FREAD for the call to 5927 * vn_open_downgrade(). 5928 */ 5929 if ((sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) && 5930 (access & OPEN4_SHARE_ACCESS_READ) == 0) { 5931 fp->rf_access_read--; 5932 if (fp->rf_access_read == 0) 5933 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ; 5934 fflags |= FREAD; 5935 } 5936 5937 /* 5938 * If the current mode has access write and the new mode 5939 * does not, decrement the number of access write mode bits 5940 * and if it goes to zero turn off the access write bit 5941 * on the file. set fflags to FWRITE for the call to 5942 * vn_open_downgrade(). 5943 */ 5944 if ((sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) && 5945 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) { 5946 fp->rf_access_write--; 5947 if (fp->rf_access_write == 0) 5948 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE; 5949 fflags |= FWRITE; 5950 } 5951 5952 /* Check that the file is still accessible */ 5953 ASSERT(fp->rf_share_access); 5954 5955 rfs4_dbe_unlock(fp->rf_dbe); 5956 5957 status = rfs4_share(sp, access, deny); 5958 rfs4_dbe_unlock(sp->rs_dbe); 5959 5960 if (status != NFS4_OK) { 5961 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 5962 goto end; 5963 } 5964 5965 /* 5966 * we successfully downgraded the share lock, now we need to downgrade 5967 * the open. it is possible that the downgrade was only for a deny 5968 * mode and we have nothing else to do. 5969 */ 5970 if ((fflags & (FREAD|FWRITE)) != 0) 5971 vn_open_downgrade(cs->vp, fflags); 5972 5973 rfs4_dbe_lock(sp->rs_dbe); 5974 5975 /* Update the stateid */ 5976 next_stateid(&sp->rs_stateid); 5977 resp->open_stateid = sp->rs_stateid.stateid; 5978 5979 rfs4_dbe_unlock(sp->rs_dbe); 5980 5981 *cs->statusp = resp->status = NFS4_OK; 5982 /* Update the lease */ 5983 rfs4_update_lease(sp->rs_owner->ro_client); 5984 end: 5985 rfs4_sw_exit(&sp->rs_owner->ro_sw); 5986 rfs4_state_rele(sp); 5987 5988 final: 5989 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs, 5990 OPEN_DOWNGRADE4res *, resp); 5991 5992 } 5993 5994 /*ARGSUSED*/ 5995 void 5996 mds_op_close(nfs_argop4 *argop, nfs_resop4 *resop, 5997 struct svc_req *req, compound_state_t *cs) 5998 { 5999 /* XXX Currently not using req arg */ 6000 CLOSE4args *args = &argop->nfs_argop4_u.opclose; 6001 CLOSE4res *resp = &resop->nfs_resop4_u.opclose; 6002 rfs4_state_t *sp; 6003 nfsstat4 status; 6004 int rc; 6005 6006 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs, 6007 CLOSE4args *, args); 6008 6009 if (cs->vp == NULL) { 6010 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 6011 goto final; 6012 } 6013 6014 if (mds_strict_seqid && args->seqid) { 6015 *cs->statusp = resp->status = NFS4ERR_INVAL; 6016 goto final; 6017 } 6018 6019 status = rfs4_get_state(cs, &args->open_stateid, &sp, RFS4_DBS_INVALID); 6020 if (status != NFS4_OK) { 6021 *cs->statusp = resp->status = status; 6022 goto final; 6023 } 6024 6025 /* Ensure specified filehandle matches */ 6026 if (cs->vp != sp->rs_finfo->rf_vp) { 6027 rfs4_state_rele(sp); 6028 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6029 goto final; 6030 } 6031 6032 /* hold off other access to open_owner while we tinker */ 6033 rfs4_sw_enter(&sp->rs_owner->ro_sw); 6034 6035 rc = mds_check_stateid_seqid(sp, &args->open_stateid); 6036 switch (rc) { 6037 case NFS4_CHECK_STATEID_OKAY: 6038 break; 6039 case NFS4_CHECK_STATEID_OLD: 6040 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6041 goto end; 6042 case NFS4_CHECK_STATEID_BAD: 6043 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6044 goto end; 6045 case NFS4_CHECK_STATEID_EXPIRED: 6046 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 6047 goto end; 6048 case NFS4_CHECK_STATEID_CLOSED: 6049 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6050 goto end; 6051 case NFS4_CHECK_STATEID_UNCONFIRMED: 6052 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6053 goto end; 6054 case NFS4_CHECK_STATEID_REPLAY: 6055 ASSERT(0); 6056 break; 6057 default: 6058 ASSERT(FALSE); 6059 break; 6060 } 6061 6062 rfs4_dbe_lock(sp->rs_dbe); 6063 6064 /* Update the stateid. */ 6065 next_stateid(&sp->rs_stateid); 6066 resp->open_stateid = sp->rs_stateid.stateid; 6067 6068 rfs4_dbe_unlock(sp->rs_dbe); 6069 6070 rfs4_update_lease(sp->rs_owner->ro_client); 6071 rfs4_state_close(sp, FALSE, FALSE, cs->cr); 6072 6073 *cs->statusp = resp->status = status; 6074 6075 end: 6076 rfs4_sw_exit(&sp->rs_owner->ro_sw); 6077 rfs4_state_rele(sp); 6078 6079 final: 6080 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs, 6081 CLOSE4res *, resp); 6082 6083 } 6084 6085 /* 6086 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure. 6087 */ 6088 static nfsstat4 6089 mds_lock_denied(nfs_server_instance_t *instp, LOCK4denied *dp, 6090 struct flock64 *flk) 6091 { 6092 rfs4_lockowner_t *lo; 6093 rfs4_client_t *cp; 6094 uint32_t len; 6095 6096 lo = findlockowner_by_pid(instp, flk->l_pid); 6097 if (lo != NULL) { 6098 cp = lo->rl_client; 6099 if (rfs4_lease_expired(cp)) { 6100 rfs4_lockowner_rele(lo); 6101 rfs4_dbe_hold(cp->rc_dbe); 6102 rfs4_client_close(cp); 6103 return (NFS4ERR_EXPIRED); 6104 } 6105 dp->owner.clientid = lo->rl_owner.clientid; 6106 len = lo->rl_owner.owner_len; 6107 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 6108 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len); 6109 dp->owner.owner_len = len; 6110 rfs4_lockowner_rele(lo); 6111 goto finish; 6112 } 6113 6114 /* 6115 * Its not a NFS4 lock. We take advantage that the upper 32 bits 6116 * of the client id contain the boot time for a NFS4 lock. So we 6117 * fabricate and identity by setting clientid to the sysid, and 6118 * the lock owner to the pid. 6119 */ 6120 dp->owner.clientid = flk->l_sysid; 6121 len = sizeof (pid_t); 6122 dp->owner.owner_len = len; 6123 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP); 6124 bcopy(&flk->l_pid, dp->owner.owner_val, len); 6125 finish: 6126 dp->offset = flk->l_start; 6127 dp->length = flk->l_len; 6128 6129 if (flk->l_type == F_RDLCK) 6130 dp->locktype = READ_LT; 6131 else if (flk->l_type == F_WRLCK) 6132 dp->locktype = WRITE_LT; 6133 else 6134 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */ 6135 6136 return (NFS4_OK); 6137 } 6138 6139 /*ARGSUSED*/ 6140 void 6141 mds_op_lock(nfs_argop4 *argop, nfs_resop4 *resop, 6142 struct svc_req *req, compound_state_t *cs) 6143 { 6144 /* XXX Currently not using req arg */ 6145 LOCK4args *args = &argop->nfs_argop4_u.oplock; 6146 LOCK4res *resp = &resop->nfs_resop4_u.oplock; 6147 nfsstat4 status; 6148 stateid4 *stateid; 6149 rfs4_lockowner_t *lo; 6150 rfs4_client_t *cp; 6151 rfs4_state_t *sp = NULL; 6152 rfs4_lo_state_t *lsp = NULL; 6153 bool_t ls_sw_held = FALSE; 6154 bool_t create = TRUE; 6155 bool_t lcreate = TRUE; 6156 int rc; 6157 6158 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs, 6159 LOCK4args *, args); 6160 6161 6162 if (cs->vp == NULL) { 6163 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 6164 goto final; 6165 } 6166 6167 if (args->locker.new_lock_owner) { 6168 /* Create a new lockowner for this instance */ 6169 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner; 6170 6171 /* 6172 * validate that open_seqid, lock_seqid and the 6173 * clientid in lock_owner are all zero. 6174 */ 6175 if (mds_strict_seqid && (olo->open_seqid || 6176 olo->lock_seqid || 6177 olo->lock_owner.clientid)) { 6178 *cs->statusp = resp->status = NFS4ERR_INVAL; 6179 goto final; 6180 } 6181 6182 /* 6183 * get/validate the open stateid 6184 */ 6185 stateid = &olo->open_stateid; 6186 status = rfs4_get_state(cs, stateid, &sp, RFS4_DBS_VALID); 6187 if (status != NFS4_OK) { 6188 *cs->statusp = resp->status = status; 6189 goto final; 6190 } 6191 6192 /* Ensure specified filehandle matches */ 6193 if (cs->vp != sp->rs_finfo->rf_vp) { 6194 rfs4_state_rele(sp); 6195 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6196 goto final; 6197 } 6198 6199 /* hold off other access to open_owner while we tinker */ 6200 rfs4_sw_enter(&sp->rs_owner->ro_sw); 6201 6202 rc = mds_check_stateid_seqid(sp, stateid); 6203 switch (rc) { 6204 case NFS4_CHECK_STATEID_OLD: 6205 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6206 goto end; 6207 case NFS4_CHECK_STATEID_BAD: 6208 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6209 goto end; 6210 case NFS4_CHECK_STATEID_EXPIRED: 6211 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 6212 goto end; 6213 case NFS4_CHECK_STATEID_UNCONFIRMED: 6214 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6215 goto end; 6216 case NFS4_CHECK_STATEID_CLOSED: 6217 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6218 goto end; 6219 case NFS4_CHECK_STATEID_OKAY: 6220 case NFS4_CHECK_STATEID_REPLAY: 6221 break; 6222 } 6223 6224 /* 6225 * Use the clientid4 from the session. 6226 * 6227 * XXX a quick hack is to plop the clientid4 from the 6228 * XXX compound state into the lock_owner structure, 6229 * XXX since the _hash and _compare functions use 6230 * XXX that field. 6231 */ 6232 olo->lock_owner.clientid = cs->cp->rc_clientid; 6233 6234 lo = findlockowner(cs->instp, &olo->lock_owner, &lcreate); 6235 if (lo == NULL) { 6236 *cs->statusp = resp->status = NFS4ERR_RESOURCE; 6237 goto end; 6238 } 6239 6240 lsp = mds_findlo_state_by_owner(lo, sp, &create); 6241 if (lsp == NULL) { 6242 rfs4_update_lease(sp->rs_owner->ro_client); 6243 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 6244 rfs4_lockowner_rele(lo); 6245 goto end; 6246 } 6247 6248 /* 6249 * This is the new_lock_owner branch and the client is 6250 * supposed to be associating a new lock_owner with 6251 * the open file at this point. If we find that a 6252 * lock_owner/state association already exists and a 6253 * successful LOCK request was returned to the client, 6254 * an error is returned to the client since this is 6255 * not appropriate. The client should be using the 6256 * existing lock_owner branch. 6257 */ 6258 if (create == FALSE) { 6259 if (lsp->rls_lock_completed == TRUE) { 6260 *cs->statusp = 6261 resp->status = NFS4ERR_BAD_SEQID; 6262 rfs4_lockowner_rele(lo); 6263 goto end; 6264 } 6265 } 6266 6267 rfs4_update_lease(sp->rs_owner->ro_client); 6268 rfs4_dbe_lock(lsp->rls_dbe); 6269 6270 /* hold off other access to lsp while we tinker */ 6271 rfs4_sw_enter(&lsp->rls_sw); 6272 ls_sw_held = TRUE; 6273 6274 rfs4_dbe_unlock(lsp->rls_dbe); 6275 6276 rfs4_lockowner_rele(lo); 6277 } else { 6278 /* 6279 * validate lock_seqid is zero. 6280 */ 6281 if (mds_strict_seqid && 6282 args->locker.locker4_u.lock_owner.lock_seqid) { 6283 *cs->statusp = resp->status = NFS4ERR_INVAL; 6284 goto final; 6285 } 6286 6287 stateid = &args->locker.locker4_u.lock_owner.lock_stateid; 6288 /* get lsp and hold the lock on the underlying file struct */ 6289 if ((status = rfs4_get_lo_state(cs, stateid, &lsp, TRUE)) 6290 != NFS4_OK) { 6291 *cs->statusp = resp->status = status; 6292 goto final; 6293 } 6294 create = FALSE; /* We didn't create lsp */ 6295 6296 /* Ensure specified filehandle matches */ 6297 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 6298 rfs4_lo_state_rele(lsp, TRUE); 6299 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6300 goto final; 6301 } 6302 6303 /* hold off other access to lsp while we tinker */ 6304 rfs4_sw_enter(&lsp->rls_sw); 6305 ls_sw_held = TRUE; 6306 6307 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) { 6308 /* 6309 * The stateid looks like it was okay (expected to be 6310 * the next one) 6311 */ 6312 case NFS4_CHECK_STATEID_OKAY: 6313 /* 6314 * The sequence id is now checked. Determine 6315 * if this is a replay or if it is in the 6316 * expected (next) sequence. In the case of a 6317 * replay, there are two replay conditions 6318 * that may occur. The first is the normal 6319 * condition where a LOCK is done with a 6320 * NFS4_OK response and the stateid is 6321 * updated. That case is handled below when 6322 * the stateid is identified as a REPLAY. The 6323 * second is the case where an error is 6324 * returned, like NFS4ERR_DENIED, and the 6325 * sequence number is updated but the stateid 6326 * is not updated. This second case is dealt 6327 * with here. So it may seem odd that the 6328 * stateid is okay but the sequence id is a 6329 * replay but it is okay. 6330 */ 6331 /* XXX: rbg -- missing code ? :-) */ 6332 break; 6333 case NFS4_CHECK_STATEID_OLD: 6334 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6335 goto end; 6336 case NFS4_CHECK_STATEID_BAD: 6337 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6338 goto end; 6339 case NFS4_CHECK_STATEID_EXPIRED: 6340 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 6341 goto end; 6342 case NFS4_CHECK_STATEID_CLOSED: 6343 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6344 goto end; 6345 case NFS4_CHECK_STATEID_REPLAY: 6346 ASSERT(0); 6347 break; 6348 default: 6349 ASSERT(FALSE); 6350 break; 6351 } 6352 6353 rfs4_update_lease(lsp->rls_locker->rl_client); 6354 } 6355 6356 /* 6357 * NFS4 only allows locking on regular files, so 6358 * verify type of object. 6359 */ 6360 if (cs->vp->v_type != VREG) { 6361 if (cs->vp->v_type == VDIR) 6362 status = NFS4ERR_ISDIR; 6363 else 6364 status = NFS4ERR_INVAL; 6365 goto out; 6366 } 6367 6368 cp = lsp->rls_state->rs_owner->ro_client; 6369 6370 if (rfs4_clnt_in_grace(cp) && !args->reclaim) { 6371 status = NFS4ERR_GRACE; 6372 goto out; 6373 } 6374 6375 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) { 6376 status = NFS4ERR_NO_GRACE; 6377 goto out; 6378 } 6379 6380 if (!rfs4_clnt_in_grace(cp) && args->reclaim) { 6381 status = NFS4ERR_NO_GRACE; 6382 goto out; 6383 } 6384 6385 if (lsp->rls_state->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) 6386 cs->deleg = TRUE; 6387 6388 status = rfs4_do_lock(lsp, args->locktype, 6389 args->locker.locker4_u.lock_owner.lock_seqid, args->offset, 6390 args->length, cs->cr, resop); 6391 6392 out: 6393 *cs->statusp = resp->status = status; 6394 6395 if (status == NFS4_OK) { 6396 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid; 6397 lsp->rls_lock_completed = TRUE; 6398 } 6399 6400 end: 6401 if (lsp) { 6402 if (ls_sw_held) 6403 rfs4_sw_exit(&lsp->rls_sw); 6404 /* 6405 * If an sp obtained, then the lsp does not represent 6406 * a lock on the file struct. 6407 */ 6408 if (sp != NULL) 6409 rfs4_lo_state_rele(lsp, FALSE); 6410 else 6411 rfs4_lo_state_rele(lsp, TRUE); 6412 } 6413 if (sp) { 6414 rfs4_sw_exit(&sp->rs_owner->ro_sw); 6415 rfs4_state_rele(sp); 6416 } 6417 6418 final: 6419 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs, 6420 LOCK4res *, resp); 6421 6422 } 6423 6424 /* free function for LOCK/LOCKT */ 6425 /*ARGSUSED*/ 6426 static void 6427 mds_lock_denied_free(nfs_resop4 *resop, compound_state_t *cs) 6428 { 6429 /* Common function for NFSv4.0 and NFSv4.1 */ 6430 lock_denied_free(resop); 6431 } 6432 6433 /*ARGSUSED*/ 6434 void 6435 mds_op_locku(nfs_argop4 *argop, nfs_resop4 *resop, 6436 struct svc_req *req, compound_state_t *cs) 6437 { 6438 /* XXX Currently not using req arg */ 6439 LOCKU4args *args = &argop->nfs_argop4_u.oplocku; 6440 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku; 6441 nfsstat4 status; 6442 stateid4 *stateid = &args->lock_stateid; 6443 rfs4_lo_state_t *lsp; 6444 6445 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs, 6446 LOCKU4args *, args); 6447 6448 6449 if (cs->vp == NULL) { 6450 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 6451 goto final; 6452 } 6453 6454 if (mds_strict_seqid && args->seqid) { 6455 *cs->statusp = resp->status = NFS4ERR_INVAL; 6456 goto final; 6457 } 6458 6459 if ((status = rfs4_get_lo_state(cs, stateid, &lsp, TRUE)) != NFS4_OK) { 6460 *cs->statusp = resp->status = status; 6461 goto final; 6462 } 6463 6464 /* Ensure specified filehandle matches */ 6465 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) { 6466 rfs4_lo_state_rele(lsp, TRUE); 6467 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6468 goto final; 6469 } 6470 6471 /* hold off other access to lsp while we tinker */ 6472 rfs4_sw_enter(&lsp->rls_sw); 6473 6474 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) { 6475 case NFS4_CHECK_STATEID_OKAY: 6476 break; 6477 case NFS4_CHECK_STATEID_OLD: 6478 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6479 goto end; 6480 case NFS4_CHECK_STATEID_BAD: 6481 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 6482 goto end; 6483 case NFS4_CHECK_STATEID_EXPIRED: 6484 *cs->statusp = resp->status = NFS4ERR_EXPIRED; 6485 goto end; 6486 case NFS4_CHECK_STATEID_CLOSED: 6487 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID; 6488 goto end; 6489 case NFS4_CHECK_STATEID_REPLAY: 6490 ASSERT(0); 6491 break; 6492 default: 6493 ASSERT(FALSE); 6494 break; 6495 } 6496 6497 rfs4_update_lock_sequence(lsp); 6498 rfs4_update_lease(lsp->rls_locker->rl_client); 6499 6500 /* 6501 * NFS4 only allows locking on regular files, so 6502 * verify type of object. 6503 */ 6504 if (cs->vp->v_type != VREG) { 6505 if (cs->vp->v_type == VDIR) 6506 status = NFS4ERR_ISDIR; 6507 else 6508 status = NFS4ERR_INVAL; 6509 goto out; 6510 } 6511 6512 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) { 6513 status = NFS4ERR_GRACE; 6514 goto out; 6515 } 6516 6517 status = rfs4_do_lock(lsp, args->locktype, 6518 args->seqid, args->offset, args->length, cs->cr, resop); 6519 6520 out: 6521 *cs->statusp = resp->status = status; 6522 6523 if (status == NFS4_OK) 6524 resp->lock_stateid = lsp->rls_lockid.stateid; 6525 6526 end: 6527 rfs4_sw_exit(&lsp->rls_sw); 6528 rfs4_lo_state_rele(lsp, TRUE); 6529 6530 final: 6531 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs, 6532 LOCKU4res *, resp); 6533 6534 } 6535 6536 /* 6537 * LOCKT is a best effort routine, the client can not be guaranteed that 6538 * the status return is still in effect by the time the reply is received. 6539 * They are numerous race conditions in this routine, but we are not required 6540 * and can not be accurate. 6541 */ 6542 /*ARGSUSED*/ 6543 void 6544 mds_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop, 6545 struct svc_req *req, compound_state_t *cs) 6546 { 6547 LOCKT4args *args = &argop->nfs_argop4_u.oplockt; 6548 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt; 6549 rfs4_lockowner_t *lo; 6550 bool_t create = FALSE; 6551 struct flock64 flk; 6552 int error; 6553 int flag = FREAD | FWRITE; 6554 int ltype; 6555 length4 posix_length; 6556 sysid_t sysid; 6557 pid_t pid; 6558 caller_context_t ct; 6559 6560 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs, 6561 LOCKT4args *, args); 6562 6563 if (cs->vp == NULL) { 6564 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 6565 goto final; 6566 } 6567 6568 args->owner.clientid = cs->cp->rc_clientid; 6569 6570 ct.cc_sysid = 0; 6571 ct.cc_pid = 0; 6572 ct.cc_caller_id = cs->instp->caller_id; 6573 ct.cc_flags = CC_DONTBLOCK; 6574 6575 /* 6576 * NFS4 only allows locking on regular files, so 6577 * verify type of object. 6578 */ 6579 if (cs->vp->v_type != VREG) { 6580 if (cs->vp->v_type == VDIR) 6581 *cs->statusp = resp->status = NFS4ERR_ISDIR; 6582 else 6583 *cs->statusp = resp->status = NFS4ERR_INVAL; 6584 goto final; 6585 } 6586 6587 resp->status = NFS4_OK; 6588 6589 switch (args->locktype) { 6590 case READ_LT: 6591 case READW_LT: 6592 ltype = F_RDLCK; 6593 break; 6594 case WRITE_LT: 6595 case WRITEW_LT: 6596 ltype = F_WRLCK; 6597 break; 6598 } 6599 6600 posix_length = args->length; 6601 /* Check for zero length. To lock to end of file use all ones for V4 */ 6602 if (posix_length == 0) { 6603 *cs->statusp = resp->status = NFS4ERR_INVAL; 6604 goto final; 6605 } else if (posix_length == (length4)(~0)) { 6606 posix_length = 0; /* Posix to end of file */ 6607 } 6608 6609 /* Find or create a lockowner */ 6610 lo = findlockowner(cs->instp, &args->owner, &create); 6611 6612 if (lo) { 6613 pid = lo->rl_pid; 6614 if ((resp->status = 6615 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK) 6616 goto out; 6617 } else { 6618 pid = 0; 6619 sysid = cs->instp->lockt_sysid; 6620 } 6621 retry: 6622 flk.l_type = ltype; 6623 flk.l_whence = 0; /* SEEK_SET */ 6624 flk.l_start = args->offset; 6625 flk.l_len = posix_length; 6626 flk.l_sysid = sysid; 6627 flk.l_pid = pid; 6628 flag |= F_REMOTELOCK; 6629 6630 /* Note that length4 is uint64_t but l_len and l_start are off64_t */ 6631 if (flk.l_len < 0 || flk.l_start < 0) { 6632 resp->status = NFS4ERR_INVAL; 6633 goto out; 6634 } 6635 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0, 6636 NULL, cs->cr, &ct); 6637 6638 /* 6639 * N.B. We map error values to nfsv4 errors. This is differrent 6640 * than puterrno4 routine. 6641 */ 6642 switch (error) { 6643 case 0: 6644 if (flk.l_type == F_UNLCK) 6645 resp->status = NFS4_OK; 6646 else { 6647 if (mds_lock_denied(cs->instp, &resp->denied, &flk) 6648 == NFS4ERR_EXPIRED) 6649 goto retry; 6650 resp->status = NFS4ERR_DENIED; 6651 } 6652 break; 6653 case EOVERFLOW: 6654 resp->status = NFS4ERR_INVAL; 6655 break; 6656 case EINVAL: 6657 resp->status = NFS4ERR_NOTSUPP; 6658 break; 6659 default: 6660 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)", 6661 error); 6662 resp->status = NFS4ERR_SERVERFAULT; 6663 break; 6664 } 6665 6666 out: 6667 if (lo) 6668 rfs4_lockowner_rele(lo); 6669 *cs->statusp = resp->status; 6670 6671 final: 6672 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs, 6673 LOCKT4res *, resp); 6674 } 6675 6676 /* 6677 * NFSv4.1 Server Sessions 6678 */ 6679 6680 /* Renew Lease */ 6681 void 6682 mds_refresh(mds_session_t *sp) 6683 { 6684 rfs4_client_t *cp; 6685 6686 ASSERT(sp != NULL && sp->sn_clnt != NULL); 6687 rfs4_dbe_lock(sp->sn_dbe); 6688 cp = sp->sn_clnt; 6689 sp->sn_laccess = gethrestime_sec(); 6690 rfs4_dbe_unlock(sp->sn_dbe); 6691 6692 rfs4_dbe_hold(cp->rc_dbe); 6693 rfs4_update_lease(cp); 6694 rfs4_client_rele(cp); 6695 } 6696 6697 6698 nfsstat4 6699 mds_lease_chk(mds_session_t *sp) 6700 { 6701 rfs4_client_t *cp; 6702 nfsstat4 error = NFS4_OK; 6703 6704 /* 6705 * If the client lease expired, go ahead and invalidate 6706 * all the sessions associated with this clientid. 6707 */ 6708 ASSERT(sp != NULL && sp->sn_clnt != NULL); 6709 cp = sp->sn_clnt; 6710 6711 if (rfs4_lease_expired(cp)) { 6712 error = NFS4ERR_BADSESSION; 6713 } 6714 return (error); 6715 } 6716 6717 /* 6718 * Rudimentary server implementation (XXX - for now) 6719 */ 6720 void 6721 mds_get_server_impl_id(EXCHANGE_ID4resok *resp) 6722 { 6723 timestruc_t currtime; 6724 char *sol_impl = "Solaris NFSv4.1 Server Implementation"; 6725 char *sol_idom = "nfsv41.ietf.org"; 6726 void *p; 6727 uint_t len = 0; 6728 nfs_impl_id4 *nip; 6729 6730 resp->eir_server_impl_id.eir_server_impl_id_len = 1; 6731 nip = kmem_zalloc(sizeof (nfs_impl_id4), KM_SLEEP); 6732 resp->eir_server_impl_id.eir_server_impl_id_val = nip; 6733 6734 /* Domain */ 6735 nip->nii_domain.utf8string_len = len = strlen(sol_idom); 6736 p = kmem_zalloc(len * sizeof (char), KM_SLEEP); 6737 nip->nii_domain.utf8string_val = p; 6738 bcopy(sol_idom, p, len); 6739 6740 /* Implementation */ 6741 nip->nii_name.utf8string_len = len = strlen(sol_impl); 6742 p = kmem_zalloc(len * sizeof (char), KM_SLEEP); 6743 nip->nii_name.utf8string_val = p; 6744 bcopy(sol_impl, p, len); 6745 6746 /* Time */ 6747 gethrestime(&currtime); 6748 (void) nfs4_time_vton(&currtime, &nip->nii_date); 6749 } 6750 6751 /* 6752 * Principal handling routines 6753 */ 6754 void 6755 rfs4_free_cred_princ(rfs4_client_t *cp) 6756 { 6757 cred_princ_t *p; 6758 rpc_gss_principal_t ppl; 6759 6760 ASSERT(cp != NULL); 6761 if ((p = cp->rc_cr_set) == NULL) 6762 return; 6763 6764 switch (p->cp_aflavor) { 6765 case AUTH_DES: 6766 kmem_free(p->cp_princ, strlen(p->cp_princ) + 1); 6767 break; 6768 6769 case RPCSEC_GSS: 6770 ppl = (rpc_gss_principal_t)p->cp_princ; 6771 kmem_free(ppl, ppl->len + sizeof (int)); 6772 break; 6773 } 6774 kmem_free(p, sizeof (cred_princ_t)); 6775 cp->rc_cr_set = NULL; 6776 } 6777 6778 static rpc_gss_principal_t 6779 rfs4_dup_princ(rpc_gss_principal_t ppl) 6780 { 6781 rpc_gss_principal_t pdup; 6782 int len; 6783 6784 if (ppl == NULL) 6785 return (NULL); 6786 6787 len = sizeof (int) + ppl->len; 6788 pdup = (rpc_gss_principal_t)kmem_alloc(len, KM_SLEEP); 6789 bcopy(ppl, pdup, len); 6790 return (pdup); 6791 } 6792 6793 void 6794 rfs4_set_cred_princ(cred_princ_t **pp, struct compound_state *cs) 6795 { 6796 cred_princ_t *p; 6797 caddr_t t; 6798 6799 ASSERT(pp != NULL); 6800 6801 6802 if (*pp == NULL) 6803 *pp = kmem_zalloc(sizeof (cred_princ_t), KM_SLEEP); 6804 6805 p = *pp; 6806 6807 p->cp_cr = crdup(cs->basecr); 6808 p->cp_aflavor = cs->req->rq_cred.oa_flavor; 6809 p->cp_secmod = cs->nfsflavor; /* secmod != flavor for RPCSEC_GSS */ 6810 6811 /* 6812 * Set principal as per security flavor 6813 */ 6814 switch (p->cp_aflavor) { 6815 case AUTH_DES: 6816 p->cp_princ = kstrdup(cs->principal); 6817 break; 6818 6819 case RPCSEC_GSS: 6820 t = (caddr_t)rfs4_dup_princ((rpc_gss_principal_t)cs->principal); 6821 p->cp_princ = (caddr_t)t; 6822 break; 6823 6824 case AUTH_SYS: 6825 case AUTH_NONE: 6826 default: 6827 break; 6828 } 6829 } 6830 6831 /* returns 0 if no match; or 1 for a match */ 6832 int 6833 rfs4_cmp_cred_princ(cred_princ_t *p, struct compound_state *cs) 6834 { 6835 int rc = 0; 6836 rpc_gss_principal_t recp; /* cached clnt princ */ 6837 rpc_gss_principal_t ibrp; /* inbound req princ */ 6838 6839 6840 if (p == NULL) 6841 return (rc); /* nothing to compare with */ 6842 6843 if (p->cp_aflavor != cs->req->rq_cred.oa_flavor) 6844 return (rc); 6845 6846 if (p->cp_secmod != cs->nfsflavor) 6847 return (rc); 6848 6849 if (crcmp(p->cp_cr, cs->basecr)) 6850 return (rc); 6851 6852 switch (p->cp_aflavor) { 6853 case AUTH_DES: 6854 rc = (strcmp(p->cp_princ, cs->principal) == 0); 6855 break; 6856 6857 case RPCSEC_GSS: 6858 recp = (rpc_gss_principal_t)p->cp_princ; 6859 ibrp = (rpc_gss_principal_t)cs->principal; 6860 6861 if (recp->len != ibrp->len) 6862 break; 6863 rc = (bcmp(recp->name, ibrp->name, ibrp->len) == 0); 6864 break; 6865 6866 case AUTH_SYS: 6867 case AUTH_NONE: 6868 default: 6869 rc = 1; 6870 break; 6871 } 6872 return (rc); 6873 } 6874 6875 /* { co_ownerid, co_verifier, principal, clientid, confirmed } */ 6876 rfs4_client_t * 6877 client_record(nfs_client_id4 *cip, struct compound_state *cs) 6878 { 6879 rfs4_client_t *cp; 6880 bool_t create = TRUE; 6881 6882 /* 6883 * 1. co_ownerid 6884 * 2. co_verifier 6885 */ 6886 cp = findclient(cs->instp, cip, &create, NULL); 6887 6888 /* 3. principal */ 6889 if (cp != NULL) 6890 rfs4_set_cred_princ(&cp->rc_cr_set, cs); 6891 6892 /* 6893 * Both of the following items of the 5-tuple are built as 6894 * part of creating the rfs4_client_t. 6895 * 6896 * 4. clientid; created as part of findclient() 6897 * 5. confirmed; cp->need_confirmed is initialized to TRUE 6898 */ 6899 return (cp); 6900 } 6901 6902 rfs4_client_t * 6903 client_lookup(nfs_client_id4 *cip, struct compound_state *cs) 6904 { 6905 bool_t create = FALSE; 6906 6907 return (findclient(cs->instp, cip, &create, NULL)); 6908 } 6909 6910 bool_t 6911 nfs_clid4_cmp(nfs_client_id4 *s1, nfs_client_id4 *s2) 6912 { 6913 if (s1->verifier != s2->verifier) 6914 return (FALSE); 6915 if (bcmp(s1->id_val, s2->id_val, s2->id_len)) 6916 return (FALSE); 6917 return (TRUE); 6918 } 6919 6920 /* 6921 * Compute the "use bits", i.e. the flags specifying the permissible 6922 * regular, MDS, and data server ops for the returned clientid. 6923 * 6924 * The minorversion1 specification allows a server implementor two 6925 * alternatives: allow PNFS_MDS and PNFS_DS on the same clientid, or 6926 * force the client to create separate clientids to distinguish 6927 * MDS versus DS operations. 6928 * 6929 * Our design distinguishes operations based upon filehandle, and thus 6930 * there is no reason to force the client to create separate clientids. 6931 * Thus, we give the client as much as possible, while keeping the result 6932 * within the allowed combinations as specified in the specification. 6933 * 6934 * Our constraints are: use a subset of the client's request, unless 6935 * the client requested nothing, in which case we may return any 6936 * legal combination; and, the combination of NON_PNFS and PNFS_MDS 6937 * may not both be set in the results. These constraints are reflected 6938 * in the ASSERT()s at the end. 6939 */ 6940 6941 static uint32_t 6942 compute_use_pnfs_flags(uint32_t request) 6943 { 6944 uint32_t rc; 6945 6946 /* Start with the client's initial request */ 6947 rc = request & EXCHGID4_FLAG_MASK_PNFS; 6948 6949 /* If the client requested nothing, return the most permissive. */ 6950 if (rc == 0) { 6951 rc = (EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS); 6952 goto done; 6953 } 6954 6955 /* Don't permit the illegal combination of MDS and NON_PNFS */ 6956 if ((rc & 6957 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS)) == 6958 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS)) 6959 rc &= ~EXCHGID4_FLAG_USE_NON_PNFS; 6960 6961 done: 6962 ASSERT(((request & EXCHGID4_FLAG_MASK_PNFS) == 0) || 6963 ((rc & ~(request & EXCHGID4_FLAG_MASK_PNFS)) == 0)); 6964 ASSERT((rc & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS)) 6965 != (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS)); 6966 ASSERT(rc != 0); 6967 6968 return (rc); 6969 } 6970 6971 /* 6972 * Session Trunking Support 6973 */ 6974 static struct netbuf * 6975 netbuf_dup(struct netbuf *obp) 6976 { 6977 struct netbuf *np = NULL; 6978 6979 np = (struct netbuf *)kmem_zalloc(sizeof (struct netbuf), KM_SLEEP); 6980 np->maxlen = np->len = obp->len; 6981 np->buf = (char *)kmem_zalloc(obp->len, KM_SLEEP); 6982 bcopy(obp->buf, np->buf, obp->len); 6983 6984 return (np); 6985 } 6986 6987 static void 6988 netbuf_destroy(struct netbuf *np) 6989 { 6990 kmem_free((char *)np->buf, np->len); 6991 kmem_free((struct netbuf *)np, sizeof (struct netbuf)); 6992 } 6993 6994 static t_scalar_t 6995 svc_get_type(SVCXPRT *xprt) 6996 { 6997 t_scalar_t xtype; 6998 6999 xtype = svc_gettype(xprt); 7000 switch (xtype) { 7001 case T_RDMA: 7002 break; 7003 7004 case T_COTS: 7005 case T_COTS_ORD: 7006 xtype = T_COTS_ORD; 7007 break; 7008 7009 case T_CLTS: 7010 default: 7011 cmn_err(CE_WARN, "svc_get_type: Bad service type %d\n", xtype); 7012 xtype = 0; 7013 } 7014 return (xtype); 7015 } 7016 7017 static rfs41_tie_t * 7018 rfs41_tie_init(SVCXPRT *xprt) 7019 { 7020 rfs41_tie_t *tip = NULL; 7021 struct sockaddr *sa; 7022 struct sockaddr_in *sa4; 7023 struct sockaddr_in6 *sa6; 7024 7025 tip = kmem_zalloc(sizeof (rfs41_tie_t), KM_SLEEP); 7026 7027 sa = (struct sockaddr *)svc_getendpoint(xprt); 7028 tip->t_famly = sa->sa_family; 7029 tip->t_xtype = svc_get_type(xprt); 7030 tip->t_netbf = netbuf_dup(svc_getlocaladdr(xprt)); 7031 7032 switch (tip->t_famly) { 7033 case AF_INET: 7034 sa4 = (struct sockaddr_in *)(tip->t_netbf->buf); 7035 bcopy(&sa4->sin_addr, &tip->t_ipaddr_u.ip4, 7036 sizeof (struct in_addr)); 7037 break; 7038 7039 case AF_INET6: 7040 sa6 = (struct sockaddr_in6 *)(tip->t_netbf->buf); 7041 bcopy(&sa6->sin6_addr, &tip->t_ipaddr_u.ip6, 7042 sizeof (struct in6_addr)); 7043 break; 7044 7045 default: 7046 cmn_err(CE_WARN, "rfs41_tie_init: Bad family (%d)\n", 7047 tip->t_famly); 7048 netbuf_destroy(tip->t_netbf); 7049 kmem_free(tip, sizeof (rfs41_tie_t)); 7050 tip = NULL; 7051 break; 7052 } 7053 return (tip); 7054 } 7055 7056 static void 7057 rfs41_exid_so_major(struct server_owner4 *sop, struct compound_state *cs) 7058 { 7059 int len = sizeof (void *) / sizeof (char); 7060 7061 sop->so_major_id.so_major_id_len = (len * 2) + 1; 7062 sop->so_major_id.so_major_id_val = tohex(cs->instp, len); 7063 } 7064 7065 /* 7066 * XXX - rfs4_srv_trunk_test is disabled by default; enabling it will 7067 * cause ip_dump() to spew addresses of inbound EXCHANGE_ID's 7068 * to the console. rfs4_srv_trunk_test and ip_dump() will go 7069 * away after client trunking is done. This is handy info to 7070 * have for debugging. 7071 */ 7072 int rfs4_srv_trunk_test = 0; 7073 7074 static void 7075 ip_dump(struct netbuf *np, char *msg) 7076 { 7077 struct sockaddr_in *sa4; 7078 struct sockaddr_in6 *sa6; 7079 7080 if (np == NULL || np->buf == NULL || !rfs4_srv_trunk_test) 7081 return; 7082 7083 sa4 = (struct sockaddr_in *)(np->buf); 7084 switch (sa4->sin_family) { 7085 case AF_INET: 7086 cmn_err(CE_WARN, "\n%s ip: %d.%d.%d.%d", msg, 7087 sa4->sin_addr.S_un.S_un_b.s_b1, 7088 sa4->sin_addr.S_un.S_un_b.s_b2, 7089 sa4->sin_addr.S_un.S_un_b.s_b3, 7090 sa4->sin_addr.S_un.S_un_b.s_b4); 7091 break; 7092 7093 case AF_INET6: 7094 sa6 = (struct sockaddr_in6 *)(np->buf); 7095 cmn_err(CE_WARN, "\n%s ip6: " 7096 "%2x%2x:%0x%0x:%0x%0x:%0x%0x:%2x%2x:%2x%2x:%2x%2x:%2x%2x", 7097 msg, 7098 sa6->sin6_addr._S6_un._S6_u8[0], 7099 sa6->sin6_addr._S6_un._S6_u8[1], 7100 sa6->sin6_addr._S6_un._S6_u8[2], 7101 sa6->sin6_addr._S6_un._S6_u8[3], 7102 sa6->sin6_addr._S6_un._S6_u8[4], 7103 sa6->sin6_addr._S6_un._S6_u8[5], 7104 sa6->sin6_addr._S6_un._S6_u8[6], 7105 sa6->sin6_addr._S6_un._S6_u8[7], 7106 sa6->sin6_addr._S6_un._S6_u8[8], 7107 sa6->sin6_addr._S6_un._S6_u8[9], 7108 sa6->sin6_addr._S6_un._S6_u8[10], 7109 sa6->sin6_addr._S6_un._S6_u8[11], 7110 sa6->sin6_addr._S6_un._S6_u8[12], 7111 sa6->sin6_addr._S6_un._S6_u8[13], 7112 sa6->sin6_addr._S6_un._S6_u8[14], 7113 sa6->sin6_addr._S6_un._S6_u8[15]); 7114 break; 7115 7116 default: 7117 cmn_err(CE_WARN, "%s <cannot translate ip>", msg); 7118 break; 7119 } 7120 } 7121 7122 static int 7123 ip_addr_cmp(rfs41_tie_t *tip, rfs41_tie_t *p) 7124 { 7125 int match = 0; 7126 7127 ASSERT(tip != NULL); 7128 ASSERT(p != NULL); 7129 7130 if (tip->t_famly != p->t_famly) 7131 return (0); 7132 7133 if (tip->t_famly == AF_INET) { 7134 if (bcmp(&tip->t_ipaddr_u.ip4, &p->t_ipaddr_u.ip4, 7135 sizeof (struct in_addr)) == 0) { 7136 match = 1; /* IPv4 addr match */ 7137 } 7138 } else if (tip->t_famly == AF_INET6) { 7139 if (bcmp(&tip->t_ipaddr_u.ip6, &p->t_ipaddr_u.ip6, 7140 sizeof (struct in6_addr)) == 0) { 7141 match = 1; /* IPv6 addr match */ 7142 } 7143 } 7144 7145 return (match); 7146 } 7147 7148 static void 7149 rfs41_set_trunkinfo(SVCXPRT *xprt, struct compound_state *cs, rfs4_client_t *cp, 7150 EXCHANGE_ID4resok *rok) 7151 { 7152 rfs41_tie_t *tip; 7153 rfs41_tie_t *p; 7154 7155 ASSERT(cs != NULL && cp != NULL && rok != NULL); 7156 if (cs == NULL || cp == NULL || rok == NULL) 7157 return; 7158 7159 /* 7160 * start out w/some sane defaults 7161 * XXX - scope needs to be revisited. 7162 */ 7163 rok->eir_clientid = cp->rc_clientid; 7164 rfs41_exid_so_major(&rok->eir_server_owner, cs); 7165 7166 ip_dump(svc_getlocaladdr(xprt), "inbound"); 7167 7168 /* build trunkinfo entry */ 7169 if (xprt == NULL || (tip = rfs41_tie_init(xprt)) == NULL) 7170 return; 7171 7172 /* fastpath for 1st exid */ 7173 rfs4_dbe_lock(cp->rc_dbe); 7174 if (list_is_empty(&cp->rc_trunkinfo)) { 7175 list_insert_head(&cp->rc_trunkinfo, tip); 7176 ip_dump(tip->t_netbf, "first-in-list"); 7177 rok->eir_server_owner.so_minor_id = 7178 (uint64_t)(uintptr_t)&cp->rc_trunkinfo; 7179 rfs4_dbe_unlock(cp->rc_dbe); 7180 return; 7181 } 7182 7183 /* run thru trunkinfo list to see if IP has been seen */ 7184 for (p = list_head(&cp->rc_trunkinfo); p != NULL; 7185 p = list_next(&cp->rc_trunkinfo, p)) { 7186 7187 /* 7188 * Is the IP already in list ? 7189 */ 7190 if (ip_addr_cmp(tip, p)) { 7191 ip_dump(p->t_netbf, "already-in-list"); 7192 rok->eir_server_owner.so_minor_id = 7193 (uint64_t)(uintptr_t)&cp->rc_trunkinfo; 7194 rfs4_dbe_unlock(cp->rc_dbe); 7195 return; 7196 } 7197 } 7198 7199 /* IP hasn't been seen; rerun list to see if equivalent exists */ 7200 for (p = list_head(&cp->rc_trunkinfo); p != NULL; 7201 p = list_next(&cp->rc_trunkinfo, p)) { 7202 7203 /* 7204 * Do we have an equivalent (ie. transport) entry 7205 */ 7206 if (p->t_xtype == tip->t_xtype) { 7207 list_insert_head(&cp->rc_trunkinfo, tip); 7208 ip_dump(tip->t_netbf, "Equiv FOUND: inserted-in-list"); 7209 rok->eir_server_owner.so_minor_id = 7210 (uint64_t)(uintptr_t)&cp->rc_trunkinfo; 7211 rfs4_dbe_unlock(cp->rc_dbe); 7212 return; 7213 } 7214 } 7215 7216 /* nothing in list has same IP addr or is equivalent to tip */ 7217 list_insert_head(&cp->rc_trunkinfo, tip); 7218 ip_dump(tip->t_netbf, "No IP or Equiv FOUND: inserted-in-list"); 7219 rfs4_dbe_unlock(cp->rc_dbe); 7220 rok->eir_server_owner.so_minor_id = (uint64_t)(uintptr_t)&tip; 7221 } 7222 7223 void 7224 mds_clean_up_trunkinfo(rfs4_client_t *cp) 7225 { 7226 rfs41_tie_t *p; 7227 7228 ASSERT(cp != NULL); 7229 if (cp == NULL) 7230 return; 7231 7232 rfs4_dbe_lock(cp->rc_dbe); 7233 while (p = list_remove_head(&cp->rc_trunkinfo)) { 7234 netbuf_destroy(p->t_netbf); 7235 kmem_free(p, sizeof (rfs41_tie_t)); 7236 } 7237 list_destroy(&cp->rc_trunkinfo); 7238 rfs4_dbe_unlock(cp->rc_dbe); 7239 } 7240 7241 /*ARGSUSED*/ 7242 static void 7243 mds_op_exid_free(nfs_resop4 *resop, compound_state_t *cs) 7244 { 7245 EXCHANGE_ID4res *resp = &resop->nfs_resop4_u.opexchange_id; 7246 EXCHANGE_ID4resok *rok = &resp->EXCHANGE_ID4res_u.eir_resok4; 7247 struct server_owner4 *sop = &rok->eir_server_owner; 7248 nfs_impl_id4 *nip; 7249 int len = 0; 7250 7251 /* Server Owner: major */ 7252 if ((len = sop->so_major_id.so_major_id_len) != 0) 7253 kmem_free(sop->so_major_id.so_major_id_val, len); 7254 7255 if ((nip = rok->eir_server_impl_id.eir_server_impl_id_val) != NULL) { 7256 /* Immplementation */ 7257 len = nip->nii_name.utf8string_len; 7258 kmem_free(nip->nii_name.utf8string_val, len * sizeof (char)); 7259 7260 /* Domain */ 7261 len = nip->nii_domain.utf8string_len; 7262 kmem_free(nip->nii_domain.utf8string_val, len * sizeof (char)); 7263 7264 /* Server Impl */ 7265 kmem_free(nip, sizeof (nfs_impl_id4)); 7266 } 7267 } 7268 7269 /* XXX - NOTE: EXCHANGE_ID conforms to draft-19 behavior */ 7270 7271 /*ARGSUSED*/ 7272 void 7273 mds_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop, 7274 struct svc_req *req, compound_state_t *cs) 7275 { 7276 EXCHANGE_ID4args *args = &argop->nfs_argop4_u.opexchange_id; 7277 EXCHANGE_ID4res *resp = &resop->nfs_resop4_u.opexchange_id; 7278 EXCHANGE_ID4resok *rok = &resp->EXCHANGE_ID4res_u.eir_resok4; 7279 rfs4_client_t *cp; 7280 rfs4_client_t *ocp; 7281 bool_t update; 7282 client_owner4 *cop; 7283 nfs_client_id4 *cip; 7284 verifier4 old_verifier_arg; 7285 7286 DTRACE_NFSV4_2(op__exchange__id__start, 7287 struct compound_state *, cs, 7288 EXCHANGE_ID4args *, args); 7289 7290 /* 7291 * EXCHANGE_ID's may be preceded by SEQUENCE 7292 * 7293 * Check that eia_flags only has "valid" spec bits 7294 * and that no 'eir_flag' ONLY bits are specified. 7295 */ 7296 if (args->eia_flags & ~EXID4_FLAG_MASK || 7297 args->eia_flags & EXID4_FLAG_INVALID_ARGS) { 7298 *cs->statusp = resp->eir_status = NFS4ERR_INVAL; 7299 goto final; 7300 } 7301 7302 update = (args->eia_flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A); 7303 cop = &args->eia_clientowner; 7304 cip = (nfs_client_id4 *)cop; 7305 7306 /* 7307 * Refer to Section 18.35.4 of draft 19 7308 */ 7309 cp = client_lookup(cip, cs); 7310 if (cp == NULL) { /* no record exists */ 7311 if (!update) { 7312 case1: /* case 1 - utok */ 7313 cp = client_record(cip, cs); 7314 ASSERT(cp != NULL); 7315 *cs->statusp = resp->eir_status = NFS4_OK; 7316 rok->eir_clientid = cp->rc_clientid; 7317 rok->eir_sequenceid = cp->rc_contrived.xi_sid; 7318 goto out; 7319 } 7320 /* no record and trying to update */ 7321 *cs->statusp = resp->eir_status = NFS4ERR_NOENT; 7322 goto final; 7323 } 7324 7325 /* record exists */ 7326 old_verifier_arg = cp->rc_nfs_client.verifier; 7327 if (CLID_REC_CONFIRMED(cp)) { 7328 if (!update) { 7329 if (!rfs4_cmp_cred_princ(cp->rc_cr_set, cs)) { 7330 /* case 3 */ 7331 if (rfs4_lease_expired(cp)) { 7332 rfs4_client_close(cp); 7333 goto case1; 7334 } 7335 /* 7336 * case 3: clid_in_use - utok 7337 * old_client_ret has unexpired lease w/state. 7338 */ 7339 *cs->statusp = NFS4ERR_CLID_INUSE; 7340 resp->eir_status = NFS4ERR_CLID_INUSE; 7341 rfs4_client_rele(cp); 7342 goto final; 7343 7344 } else if (nfs_clid4_cmp(&cp->rc_nfs_client, cip)) { 7345 /* case 2 - utok */ 7346 *cs->statusp = NFS4_OK; 7347 resp->eir_status = NFS4_OK; 7348 rok->eir_clientid = cp->rc_clientid; 7349 rok->eir_sequenceid = cp->rc_contrived.xi_sid; 7350 /* trickle down to "out" */ 7351 7352 } else if (old_verifier_arg != cip->verifier) { 7353 /* case 5 - utok */ 7354 /* 7355 * previous incarnation of clientid is first 7356 * hidden such that any subsequent lookups 7357 * will not find it in DB, then the current 7358 * reference to it is dropped; this will 7359 * force the reaper thread to clean it up. 7360 */ 7361 ocp = cp; 7362 mds_clean_up_sessions(ocp); 7363 rfs4_dbe_hide(ocp->rc_dbe); 7364 rfs4_client_rele(ocp); 7365 7366 cp = client_record(cip, cs); 7367 ASSERT(cp != NULL); 7368 *cs->statusp = resp->eir_status = NFS4_OK; 7369 rok->eir_clientid = cp->rc_clientid; 7370 rok->