1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 28 * All Rights Reserved 29 */ 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/buf.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/uio.h> 39 #include <sys/errno.h> 40 #include <sys/sysmacros.h> 41 #include <sys/statvfs.h> 42 #include <sys/kmem.h> 43 #include <sys/dirent.h> 44 #include <sys/cmn_err.h> 45 #include <sys/disp.h> 46 #include <sys/debug.h> 47 #include <sys/systeminfo.h> 48 #include <sys/flock.h> 49 #include <sys/pathname.h> 50 #include <sys/nbmlock.h> 51 #include <sys/share.h> 52 #include <sys/atomic.h> 53 #include <sys/policy.h> 54 #include <sys/fem.h> 55 #include <sys/sdt.h> 56 #include <sys/ddi.h> 57 #include <sys/modctl.h> 58 #include <sys/timod.h> 59 #include <sys/id_space.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpcsec_gss.h> 64 #include <rpc/svc.h> 65 66 #include <nfs/nfs.h> 67 #include <nfs/export.h> 68 #include <nfs/lm.h> 69 #include <nfs/nfs4.h> 70 71 #include <sys/strsubr.h> 72 #include <sys/strsun.h> 73 74 #include <inet/common.h> 75 #include <inet/ip.h> 76 #include <inet/ip6.h> 77 78 #include <sys/tsol/label.h> 79 #include <sys/tsol/tndb.h> 80 81 #include <nfs/nfs4_attrmap.h> 82 #include <nfs/nfs4_srv_attr.h> 83 #include <nfs/mds_state.h> 84 #include <nfs/mds_odl.h> 85 86 #include <nfs/nfs41_filehandle.h> 87 #include <nfs/ctl_mds_clnt.h> 88 89 #include <nfs/spe_impl.h> 90 91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */ 92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES; 93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */ 94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY; 95 96 int mds_strict_seqid = 0; 97 98 static void ping_cb_null_thr(mds_session_t *); 99 100 /* End of Tunables */ 101 102 /* 103 * Used to bump the stateid4.seqid value and show changes in the stateid 104 */ 105 #define next_stateid(sp) (++(sp)->v41_bits.chgseq) 106 107 /* 108 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent. 109 * This is used to return NFS4ERR_TOOSMALL when clients specify 110 * maxcount that isn't large enough to hold the smallest possible 111 * XDR encoded dirent. 112 * 113 * sizeof cookie (8 bytes) + 114 * sizeof name_len (4 bytes) + 115 * sizeof smallest (padded) name (4 bytes) + 116 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4 117 * sizeof attrlist4_len (4 bytes) + 118 * sizeof next boolean (4 bytes) 119 * 120 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing 121 * the smallest possible entry4 (assumes no attrs requested). 122 * sizeof nfsstat4 (4 bytes) + 123 * sizeof verifier4 (8 bytes) + 124 * sizeof entry4list bool (4 bytes) + 125 * sizeof entry4 (36 bytes) + 126 * sizeof eof bool (4 bytes) 127 * 128 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to 129 * VOP_READDIR. Its value is the size of the maximum possible dirent 130 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent 131 * required for a given name length. MAXNAMELEN is the maximum 132 * filename length allowed in Solaris. The first two DIRENT64_RECLEN() 133 * macros are to allow for . and .. entries -- just a minor tweak to try 134 * and guarantee that buffer we give to VOP_READDIR will be large enough 135 * to hold ., .., and the largest possible solaris dirent64. 136 */ 137 #define RFS4_MINLEN_ENTRY4 36 138 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4) 139 #define RFS4_MINLEN_RDDIR_BUF \ 140 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN)) 141 142 /* 143 * It would be better to pad to 4 bytes since that's what XDR would do, 144 * but the dirents UFS gives us are already padded to 8, so just take 145 * what we're given. Dircount is only a hint anyway. Currently the 146 * solaris kernel is ASCII only, so there's no point in calling the 147 * UTF8 functions. 148 * 149 * dirent64: named padded to provide 8 byte struct alignment 150 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad) 151 * 152 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes 153 * 154 */ 155 #define DIRENT64_TO_DIRCOUNT(dp) \ 156 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) 157 158 /* 159 * types of label comparison 160 */ 161 #define EQUALITY_CHECK 0 162 #define DOMINANCE_CHECK 1 163 164 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ 165 166 void rfs4_init_compound_state(struct compound_state *); 167 168 static void nullfree(nfs_resop4 *, compound_state_t *); 169 static void mds_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 170 compound_state_t *); 171 static void mds_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 172 compound_state_t *); 173 static void mds_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 174 compound_state_t *); 175 static void mds_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 176 compound_state_t *); 177 static void mds_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 178 compound_state_t *); 179 static void mds_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 180 compound_state_t *); 181 static void mds_op_create_free(nfs_resop4 *resop); 182 static void mds_op_delegreturn(nfs_argop4 *, nfs_resop4 *, 183 struct svc_req *, compound_state_t *); 184 static void mds_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 185 compound_state_t *); 186 static void mds_op_getattr_free(nfs_resop4 *, compound_state_t *); 187 static void mds_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 188 compound_state_t *); 189 static void mds_op_getfh_free(nfs_resop4 *, compound_state_t *); 190 static void mds_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 191 compound_state_t *); 192 static void mds_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 193 compound_state_t *); 194 static void mds_lock_denied_free(nfs_resop4 *, compound_state_t *); 195 static void mds_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 196 compound_state_t *); 197 static void mds_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 198 compound_state_t *); 199 static void mds_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 200 compound_state_t *); 201 static void mds_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 202 compound_state_t *); 203 static void mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, 204 struct svc_req *req, compound_state_t *); 205 static void mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 206 compound_state_t *); 207 static void mds_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 208 compound_state_t *); 209 static void mds_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, 210 struct svc_req *, compound_state_t *); 211 static void mds_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 212 compound_state_t *); 213 static void mds_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 214 compound_state_t *); 215 static void mds_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 216 compound_state_t *); 217 static void mds_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 218 compound_state_t *); 219 static void mds_op_read_free(nfs_resop4 *, compound_state_t *); 220 void mds_op_readdir(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 221 compound_state_t *); 222 static void mds_op_readdir_free(nfs_resop4 *, compound_state_t *); 223 static void mds_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 224 compound_state_t *); 225 static void mds_op_readlink_free(nfs_resop4 *, compound_state_t *); 226 static void mds_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, 227 struct svc_req *, compound_state_t *); 228 static void mds_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 229 compound_state_t *); 230 static void mds_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 231 compound_state_t *); 232 static void mds_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 233 compound_state_t *); 234 static void mds_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 235 compound_state_t *); 236 static void mds_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 237 compound_state_t *); 238 static void mds_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 239 compound_state_t *); 240 static void mds_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 241 compound_state_t *); 242 static void mds_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 243 compound_state_t *); 244 static void mds_op_exchange_id(nfs_argop4 *, nfs_resop4 *, 245 struct svc_req *, compound_state_t *); 246 static void mds_op_exid_free(nfs_resop4 *, compound_state_t *); 247 static void mds_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 248 compound_state_t *); 249 static void mds_op_secinfonn(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 250 compound_state_t *); 251 nfsstat4 do_rfs4_op_secinfo(struct compound_state *, char *, int, 252 SECINFO4res *); 253 254 static void mds_op_secinfo_free(nfs_resop4 *, compound_state_t *); 255 256 static void mds_op_backchannel_ctl(nfs_argop4 *, nfs_resop4 *, 257 struct svc_req *, compound_state_t *); 258 static void mds_op_bind_conn_to_session(nfs_argop4 *, nfs_resop4 *, 259 struct svc_req *, compound_state_t *); 260 static void mds_op_create_clientid(nfs_argop4 *, nfs_resop4 *, 261 struct svc_req *, compound_state_t *); 262 static void mds_op_create_session(nfs_argop4 *, nfs_resop4 *, 263 struct svc_req *, compound_state_t *); 264 static void mds_op_destroy_session(nfs_argop4 *, nfs_resop4 *, 265 struct svc_req *, compound_state_t *); 266 static void mds_op_sequence(nfs_argop4 *, nfs_resop4 *, 267 struct svc_req *, compound_state_t *); 268 269 static void mds_op_get_devlist(nfs_argop4 *, nfs_resop4 *, 270 struct svc_req *, compound_state_t *); 271 272 static void mds_op_get_devinfo(nfs_argop4 *, nfs_resop4 *, 273 struct svc_req *, compound_state_t *); 274 275 static void mds_op_layout_get(nfs_argop4 *, nfs_resop4 *, 276 struct svc_req *, compound_state_t *); 277 static void mds_op_layout_get_free(nfs_resop4 *, compound_state_t *); 278 279 static void mds_op_layout_commit(nfs_argop4 *, nfs_resop4 *, 280 struct svc_req *, compound_state_t *); 281 282 static void mds_op_layout_return(nfs_argop4 *, nfs_resop4 *, 283 struct svc_req *, compound_state_t *); 284 285 static void mds_op_reclaim_complete(nfs_argop4 *, nfs_resop4 *, 286 struct svc_req *, compound_state_t *); 287 288 static int seq_chk_limits(nfs_argop4 *, nfs_resop4 *, compound_state_t *); 289 290 nfsstat4 check_open_access(uint32_t, 291 struct compound_state *, struct svc_req *); 292 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); 293 294 static void mds_free_reply(nfs_resop4 *, compound_state_t *); 295 296 vnode_t *do_rfs4_op_mknod(CREATE4args *, CREATE4res *, struct svc_req *, 297 struct compound_state *, vattr_t *, char *); 298 299 nfsstat4 rfs4_do_lock(rfs4_lo_state_t *, nfs_lock_type4, seqid4, 300 offset4, length4, cred_t *, nfs_resop4 *); 301 302 rfs4_lo_state_t *mds_findlo_state_by_owner(rfs4_lockowner_t *, 303 rfs4_state_t *, bool_t *); 304 305 bool_t in_flavor_list(int, int *, int); 306 307 nfsstat4 attrmap4_to_vattrmask(attrmap4 *, struct nfs4_svgetit_arg *); 308 309 nfsstat4 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *); 310 311 nfsstat4 do_rfs4_op_getattr(attrmap4 *, fattr4 *, struct nfs4_svgetit_arg *); 312 313 nfsstat4 do_rfs4_op_lookup(char *, uint_t, struct svc_req *, 314 struct compound_state *); 315 316 rfs4_lockowner_t *mds_findlockowner_by_pid(nfs_server_instance_t *, pid_t); 317 318 mds_session_t *mds_findsession_by_id(nfs_server_instance_t *, sessionid4); 319 320 rfs4_openowner_t *mds_findopenowner(nfs_server_instance_t *, open_owner4 *, 321 bool_t *); 322 323 static void mds_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, 324 compound_state_t *); 325 326 extern mds_mpd_t *mds_find_mpd(nfs_server_instance_t *, id_t); 327 extern void rfs41_lo_seqid(stateid_t *); 328 extern void mds_delete_layout(vnode_t *); 329 extern void mds_clean_grants_by_fsid(rfs4_client_t *, vnode_t *); 330 extern mds_layout_t *mds_add_layout(layout_core_t *lc); 331 332 nfsstat4 333 create_vnode(vnode_t *, char *, vattr_t *, createmode4, timespec32_t *, 334 cred_t *, vnode_t **, bool_t *); 335 336 337 /* HACKERY */ 338 nfsstat4 rfs4_get_all_state(struct compound_state *, stateid4 *, 339 rfs4_state_t **, rfs4_deleg_state_t **, rfs4_lo_state_t **); 340 341 void rfs4_ss_clid(struct compound_state *, rfs4_client_t *, struct svc_req *); 342 void rfs4_ss_chkclid(struct compound_state *, rfs4_client_t *); 343 344 int layout_match(stateid_t, stateid4, nfsstat4 *); 345 346 extern stateid4 special0; 347 extern stateid4 special1; 348 349 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \ 350 stateid4_cmp(id, &special1)) 351 352 void rfs4_cn_release(compound_state_t *); 353 354 mds_layout_grant_t *rfs41_findlogrant(struct compound_state *, 355 rfs4_file_t *, rfs4_client_t *, bool_t *); 356 void rfs41_lo_grant_rele(mds_layout_grant_t *); 357 mds_ever_grant_t *rfs41_findevergrant(rfs4_client_t *, vnode_t *, bool_t *); 358 void rfs41_ever_grant_rele(mds_ever_grant_t *); 359 360 static uint32_t compute_use_pnfs_flags(uint32_t); 361 362 /* ARGSUSED */ 363 static void 364 mds_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 365 compound_state_t *cs) 366 { 367 DTRACE_NFSV4_1(op__notsup__start, 368 strcut compound_state *, cs); 369 370 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP; 371 372 DTRACE_NFSV4_1(op__notsup__done, 373 struct compound_state *, cs); 374 } 375 376 /* ARGSUSED */ 377 static void 378 mds_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 379 compound_state_t *cs) 380 { 381 DTRACE_NFSV4_1(op__illegal__start, 382 struct compound_state *, cs); 383 384 *cs->statusp = 385 *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_OP_ILLEGAL; 386 387 DTRACE_NFSV4_1(op__illegal__done, 388 struct compound_state *, cs); 389 } 390 391 /* ARGSUSED */ 392 static void 393 mds_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 394 compound_state_t *cs) 395 { 396 DTRACE_NFSV4_1(op__inval__start, 397 struct compound_state *, cs); 398 399 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL; 400 401 DTRACE_NFSV4_1(op__inval__done, 402 struct compound_state *, cs); 403 } 404 405 /*ARGSUSED*/ 406 static void 407 nullfree(nfs_resop4 *resop, compound_state_t *cs) 408 { 409 } 410 411 static op_disp_tbl_t mds_disptab[] = { 412 {mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 0"}, 413 {mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 1"}, 414 {mds_op_illegal, nullfree, DISP_OP_BAD, "BAD Op 2"}, 415 {mds_op_access, nullfree, DISP_OP_MDS, "ACCESS"}, 416 {mds_op_close, nullfree, DISP_OP_MDS, "CLOSE"}, 417 {mds_op_commit, nullfree, DISP_OP_BOTH, "COMMIT"}, 418 {mds_op_create, nullfree, DISP_OP_MDS, "CREATE"}, 419 {mds_op_inval, nullfree, DISP_OP_BAD, "BAD Op 7"}, 420 {mds_op_delegreturn, nullfree, DISP_OP_MDS, "DELEGRETURN"}, 421 {mds_op_getattr, mds_op_getattr_free, DISP_OP_MDS, "GETATTR"}, 422 {mds_op_getfh, mds_op_getfh_free, DISP_OP_MDS, "GETFH"}, 423 {mds_op_link, nullfree, DISP_OP_MDS, "LINK"}, 424 {mds_op_lock, mds_lock_denied_free, DISP_OP_MDS, "LOCK"}, 425 {mds_op_lockt, mds_lock_denied_free, DISP_OP_MDS, "LOCKT"}, 426 {mds_op_locku, nullfree, DISP_OP_MDS, "LOCKU"}, 427 {mds_op_lookup, nullfree, DISP_OP_MDS, "LOOKUP"}, 428 {mds_op_lookupp, nullfree, DISP_OP_MDS, "LOOKUPP"}, 429 {mds_op_nverify, nullfree, DISP_OP_MDS, "NVERIFY"}, 430 {mds_op_open, mds_free_reply, DISP_OP_MDS, "OPEN"}, 431 {mds_op_openattr, nullfree, DISP_OP_MDS, "OPENATTR"}, 432 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 20"}, 433 {mds_op_open_downgrade, nullfree, DISP_OP_MDS, "OPEN_DOWNGRADE"}, 434 {mds_op_putfh, nullfree, DISP_OP_BOTH, "PUTFH"}, 435 {mds_op_putpubfh, nullfree, DISP_OP_MDS, "PUTPUBFH"}, 436 {mds_op_putrootfh, nullfree, DISP_OP_MDS, "PUTROOTFH"}, 437 {mds_op_read, mds_op_read_free, DISP_OP_BOTH, "READ"}, 438 {mds_op_readdir, mds_op_readdir_free, DISP_OP_MDS, "READDIR"}, 439 {mds_op_readlink, mds_op_readlink_free, DISP_OP_MDS, "READLINK"}, 440 {mds_op_remove, nullfree, DISP_OP_MDS, "REMOVE"}, 441 {mds_op_rename, nullfree, DISP_OP_MDS, "RENAME"}, 442 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 30"}, 443 {mds_op_restorefh, nullfree, DISP_OP_MDS, "RESTOREFH"}, 444 {mds_op_savefh, nullfree, DISP_OP_MDS, "SAVEFH"}, 445 {mds_op_secinfo, mds_op_secinfo_free, DISP_OP_MDS, "SECINFO"}, 446 {mds_op_setattr, nullfree, DISP_OP_MDS, "SETATTR"}, 447 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 35"}, 448 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 36"}, 449 {mds_op_verify, nullfree, DISP_OP_MDS, "VERIFY"}, 450 {mds_op_write, nullfree, DISP_OP_BOTH, "WRITE"}, 451 {mds_op_notsup, nullfree, DISP_OP_BAD, "BAD Op 39"}, 452 {mds_op_backchannel_ctl, nullfree, DISP_OP_BOTH, "BACKCHANNEL_CTL"}, 453 {mds_op_bind_conn_to_session, nullfree, 454 DISP_OP_BOTH, "BIND_CONN_TO_SESS"}, 455 {mds_op_exchange_id, mds_op_exid_free, DISP_OP_BOTH, "EXCHANGE_ID"}, 456 {mds_op_create_session, nullfree, DISP_OP_BOTH, "CREATE_SESS"}, 457 {mds_op_destroy_session, nullfree, DISP_OP_BOTH, "DESTROY_SESS"}, 458 {mds_op_illegal, nullfree, DISP_OP_MDS, "FREE_STATEID"}, 459 {mds_op_illegal, nullfree, DISP_OP_MDS, "GET_DIR_DELEG"}, 460 {mds_op_get_devinfo, nullfree, DISP_OP_MDS, "GET_DEVINFO"}, 461 {mds_op_get_devlist, nullfree, DISP_OP_MDS, "GET_DEVLIST"}, 462 {mds_op_layout_commit, nullfree, DISP_OP_MDS, "LAYOUT_COMMIT"}, 463 {mds_op_layout_get, mds_op_layout_get_free, DISP_OP_MDS, "LAYOUT_GET"}, 464 {mds_op_layout_return, nullfree, DISP_OP_MDS, "LAYOUT_RETURN"}, 465 {mds_op_secinfonn, nullfree, 466 DISP_OP_BOTH, "SECINFO_NONAME"}, 467 {mds_op_sequence, nullfree, DISP_OP_BOTH, "SEQUENCE"}, 468 {mds_op_notsup, nullfree, DISP_OP_BOTH, "SET_SSV"}, 469 {mds_op_notsup, nullfree, DISP_OP_MDS, "TEST_STATEID"}, 470 {mds_op_notsup, nullfree, DISP_OP_MDS, "WANT_DELEG"}, 471 {mds_op_notsup, nullfree, DISP_OP_BOTH, "DESTROY_CLIENTID"}, 472 {mds_op_reclaim_complete, nullfree, DISP_OP_MDS, "RECLAIM_COMPLETE"} 473 }; 474 475 static uint_t mds_disp_cnt = sizeof (mds_disptab) / sizeof (mds_disptab[0]); 476 477 #define OP_ILLEGAL_IDX (mds_disp_cnt) 478 479 extern size_t strlcpy(char *dst, const char *src, size_t dstsize); 480 481 #ifdef nextdp 482 #undef nextdp 483 #endif 484 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 485 486 /*ARGSUSED*/ 487 static void 488 mds_op_readdir_free(nfs_resop4 *resop, compound_state_t *cs) 489 { 490 /* Common function used for NFSv4.0 and NFSv4.1 */ 491 rfs4_op_readdir_free(resop); 492 } 493 494 /*ARGSUSED*/ 495 static void 496 mds_op_secinfo_free(nfs_resop4 *resop, compound_state_t *cs) 497 { 498 /* Common function used for NFSv4.0 and NFSv4.1 */ 499 rfs4_op_secinfo_free(resop); 500 } 501 502 /* 503 */ 504 void 505 mds_srvrfini(void) 506 { 507 /* some shutdown stuff for the minor verson 1 server */ 508 } 509 510 nfsstat4 rfs4_state_has_access(rfs4_state_t *, int, vnode_t *); 511 int rfs4_verify_attr(struct nfs4_svgetit_arg *, attrmap4 *, 512 struct nfs4_ntov_table *); 513 514 515 /* 516 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether 517 * the file is being truncated, return NFS4_OK if allowed or approriate 518 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on 519 * the associated file will be done if the I/O is not consistent with any 520 * delegation in effect on the file. Should be holding VOP_RWLOCK, either 521 * as reader or writer as appropriate. rfs4_op_open will accquire the 522 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad 523 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the 524 * deleg parameter, we will return whether a write delegation is held by 525 * the client associated with this stateid. 526 * If the server instance associated with the relevant client is in its 527 * grace period, return NFS4ERR_GRACE. 528 */ 529 nfsstat4 530 mds_validate_stateid(int mode, struct compound_state *cs, vnode_t *vp, 531 stateid4 *stateid, bool_t trunc, bool_t *deleg, bool_t do_access) 532 { 533 rfs4_file_t *fp; 534 bool_t create = FALSE; 535 rfs4_state_t *sp; 536 rfs4_deleg_state_t *dsp; 537 rfs4_lo_state_t *lsp; 538 stateid_t *id = (stateid_t *)stateid; 539 nfsstat4 stat = NFS4_OK; 540 541 if (ISSPECIAL(stateid)) { 542 fp = rfs4_findfile(cs->instp, vp, NULL, &create); 543 if (fp == NULL) 544 return (NFS4_OK); 545 if (fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_NONE) { 546 rfs4_file_rele(fp); 547 return (NFS4_OK); 548 } 549 if (mode == FWRITE || 550 fp->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE) { 551 rfs4_recall_deleg(fp, trunc, NULL); 552 rfs4_file_rele(fp); 553 return (NFS4ERR_DELAY); 554 } 555 rfs4_file_rele(fp); 556 return (NFS4_OK); 557 } 558 559 stat = rfs4_get_all_state(cs, stateid, &sp, &dsp, &lsp); 560 if (stat != NFS4_OK) 561 return (stat); 562 563 /* 564 * Ordering of the following 'if' statements is specific 565 * since rfs4_get_all_state() may return a value for sp and 566 * lsp. First we check lsp, then 'fall' through to sp. 567 */ 568 if (lsp != NULL) { 569 /* Is associated server instance in its grace period? */ 570 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) { 571 rfs4_lo_state_rele(lsp, FALSE); 572 if (sp != NULL) 573 rfs4_dbe_rele(sp->rs_dbe); 574 return (NFS4ERR_GRACE); 575 } 576 577 if (lsp->rls_lockid.v41_bits.chgseq != 0) { 578 /* Seqid in the future? - that's bad */ 579 if (lsp->rls_lockid.v41_bits.chgseq < 580 id->v41_bits.chgseq) { 581 rfs4_lo_state_rele(lsp, FALSE); 582 if (sp != NULL) 583 rfs4_dbe_rele(sp->rs_dbe); 584 return (NFS4ERR_BAD_STATEID); 585 } 586 /* Seqid in the past? - that's old */ 587 if (lsp->rls_lockid.v41_bits.chgseq > 588 id->v41_bits.chgseq) { 589 rfs4_lo_state_rele(lsp, FALSE); 590 if (sp != NULL) 591 rfs4_dbe_rele(sp->rs_dbe); 592 return (NFS4ERR_OLD_STATEID); 593 } 594 } 595 596 /* Ensure specified filehandle matches */ 597 if (lsp->rls_state->rs_finfo->rf_vp != vp) { 598 rfs4_lo_state_rele(lsp, FALSE); 599 if (sp != NULL) 600 rfs4_dbe_rele(sp->rs_dbe); 601 return (NFS4ERR_BAD_STATEID); 602 } 603 rfs4_lo_state_rele(lsp, FALSE); 604 } 605 606 /* 607 * Stateid provided was an "open" or via the lock stateid 608 */ 609 if (sp != NULL) { 610 /* 611 * only check if the passed in stateid was an OPENID, 612 * ie. Skip if we got here via the LOCKID. 613 */ 614 if (id->v41_bits.type == OPENID) { 615 /* Is associated server instance in its grace period? */ 616 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) { 617 rfs4_dbe_rele(sp->rs_dbe); 618 return (NFS4ERR_GRACE); 619 } 620 621 if (sp->rs_stateid.v41_bits.chgseq != 0) { 622 /* Seqid in the future? - that's bad */ 623 if (sp->rs_stateid.v41_bits.chgseq < 624 id->v41_bits.chgseq) { 625 rfs4_dbe_rele(sp->rs_dbe); 626 return (NFS4ERR_BAD_STATEID); 627 } 628 /* Seqid in the past - that's old */ 629 if (sp->rs_stateid.v41_bits.chgseq > 630 id->v41_bits.chgseq) { 631 rfs4_dbe_rele(sp->rs_dbe); 632 return (NFS4ERR_OLD_STATEID); 633 } 634 } 635 636 /* Ensure specified filehandle matches */ 637 if (sp->rs_finfo->rf_vp != vp) { 638 rfs4_dbe_rele(sp->rs_dbe); 639 return (NFS4ERR_BAD_STATEID); 640 } 641 } 642 if (sp->rs_owner->ro_need_confirm) { 643 rfs4_dbe_rele(sp->rs_dbe); 644 return (NFS4ERR_BAD_STATEID); 645 } 646 647 if (sp->rs_closed == TRUE) { 648 rfs4_dbe_rele(sp->rs_dbe); 649 return (NFS4ERR_OLD_STATEID); 650 } 651 652 if (do_access) 653 stat = rfs4_state_has_access(sp, mode, vp); 654 else 655 stat = NFS4_OK; 656 657 /* 658 * Return whether this state has write 659 * delegation if desired 660 */ 661 if (deleg && 662 (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE)) 663 *deleg = TRUE; 664 665 /* 666 * We got a valid stateid, so we update the 667 * lease on the client. Ideally we would like 668 * to do this after the calling op succeeds, 669 * but for now this will be good 670 * enough. Callers of this routine are 671 * currently insulated from the state stuff. 672 */ 673 rfs4_update_lease(sp->rs_owner->ro_client); 674 675 /* 676 * If a delegation is present on this file and 677 * this is a WRITE, then update the lastwrite 678 * time to indicate that activity is present. 679 */ 680 if (sp->rs_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE && 681 mode == FWRITE) { 682 sp->rs_finfo->rf_dinfo->rd_time_lastwrite = 683 gethrestime_sec(); 684 } 685 686 rfs4_dbe_rele(sp->rs_dbe); 687 return (stat); 688 } 689 690 if (dsp != NULL) { 691 /* Is associated server instance in its grace period? */ 692 if (rfs4_clnt_in_grace(dsp->rds_client)) { 693 rfs4_deleg_state_rele(dsp); 694 return (NFS4ERR_GRACE); 695 } 696 697 if ((dsp->rds_delegid.v41_bits.chgseq != 0) && 698 (dsp->rds_delegid.v41_bits.chgseq != id->v41_bits.chgseq)) { 699 rfs4_deleg_state_rele(dsp); 700 return (NFS4ERR_BAD_STATEID); 701 } 702 703 /* Ensure specified filehandle matches */ 704 if (dsp->rds_finfo->rf_vp != vp) { 705 rfs4_deleg_state_rele(dsp); 706 return (NFS4ERR_BAD_STATEID); 707 } 708 /* 709 * Return whether this state has write 710 * delegation if desired 711 */ 712 if (deleg && 713 (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE)) 714 *deleg = TRUE; 715 716 rfs4_update_lease(dsp->rds_client); 717 718 /* 719 * If a delegation is present on this file and 720 * this is a WRITE, then update the lastwrite 721 * time to indicate that activity is present. 722 */ 723 if (dsp->rds_finfo->rf_dinfo->rd_dtype == OPEN_DELEGATE_WRITE && 724 mode == FWRITE) { 725 dsp->rds_finfo->rf_dinfo->rd_time_lastwrite = 726 gethrestime_sec(); 727 } 728 729 /* 730 * XXX - what happens if this is a WRITE and the 731 * delegation type of for READ. 732 */ 733 rfs4_deleg_state_rele(dsp); 734 735 return (stat); 736 } 737 /* 738 * If we got this far, something bad happened 739 */ 740 return (NFS4ERR_BAD_STATEID); 741 } 742 743 nfsstat4 744 mds_setattr(attrmap4 *resp, fattr4 *fattrp, struct compound_state *cs, 745 stateid4 *stateid) 746 { 747 int error = 0; 748 struct nfs4_svgetit_arg sarg; 749 bool_t trunc; 750 751 nfsstat4 status = NFS4_OK; 752 cred_t *cr = cs->cr; 753 vnode_t *vp = cs->vp; 754 struct nfs4_ntov_table ntov; 755 struct statvfs64 sb; 756 struct vattr bva; 757 struct flock64 bf; 758 int in_crit = 0; 759 uint_t saved_mask = 0; 760 caller_context_t ct; 761 attrvers_t avers; 762 struct nfs4_ntov_map *nvmap; 763 764 avers = RFS4_ATTRVERS(cs); 765 nvmap = NFS4_NTOV_MAP(avers); 766 *resp = NFS4_EMPTY_ATTRMAP(avers); 767 sarg.sbp = &sb; 768 nfs4_ntov_table_init(&ntov, avers); 769 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov, 770 NFS4ATTR_SETIT); 771 if (status != NFS4_OK) { 772 /* 773 * failed set attrs 774 */ 775 goto done; 776 } 777 778 if (sarg.vap->va_mask == 0 && ! ATTR_ISSET(fattrp->attrmask, ACL) && 779 ! ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) { 780 /* 781 * no further work to be done 782 */ 783 goto done; 784 } 785 786 ct.cc_sysid = 0; 787 ct.cc_pid = 0; 788 ct.cc_caller_id = cs->instp->caller_id; 789 ct.cc_flags = CC_DONTBLOCK; 790 791 /* 792 * If we got a request to set the ACL and the MODE, only 793 * allow changing VSUID, VSGID, and VSVTX. Attempting 794 * to change any other bits, along with setting an ACL, 795 * gives NFS4ERR_INVAL. 796 */ 797 if (ATTR_ISSET(fattrp->attrmask, ACL) && 798 ATTR_ISSET(fattrp->attrmask, MODE)) { 799 vattr_t va; 800 801 va.va_mask = AT_MODE; 802 error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 803 if (error) { 804 status = puterrno4(error); 805 goto done; 806 } 807 if ((sarg.vap->va_mode ^ va.va_mode) & 808 ~(VSUID | VSGID | VSVTX)) { 809 status = NFS4ERR_INVAL; 810 goto done; 811 } 812 } 813 814 /* Check stateid only if size has been set */ 815 if (sarg.vap->va_mask & AT_SIZE) { 816 trunc = (sarg.vap->va_size == 0); 817 status = mds_validate_stateid(FWRITE, 818 cs, cs->vp, stateid, trunc, 819 &cs->deleg, sarg.vap->va_mask & AT_SIZE); 820 if (status != NFS4_OK) 821 goto done; 822 } 823 824 /* XXX start of possible race with delegations */ 825 826 /* 827 * We need to specially handle size changes because it is 828 * possible for the client to create a file with read-only 829 * modes, but with the file opened for writing. If the client 830 * then tries to set the file size, e.g. ftruncate(3C), 831 * fcntl(F_FREESP), the normal access checking done in 832 * VOP_SETATTR would prevent the client from doing it even though 833 * it should be allowed to do so. To get around this, we do the 834 * access checking for ourselves and use VOP_SPACE which doesn't 835 * do the access checking. 836 * Also the client should not be allowed to change the file 837 * size if there is a conflicting non-blocking mandatory lock in 838 * the region of the change. 839 */ 840 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) { 841 u_offset_t offset; 842 ssize_t length; 843 844 /* 845 * ufs_setattr clears AT_SIZE from vap->va_mask, but 846 * before returning, sarg.vap->va_mask is used to 847 * generate the setattr reply bitmap. We also clear 848 * AT_SIZE below before calling VOP_SPACE. For both 849 * of these cases, the va_mask needs to be saved here 850 * and restored after calling VOP_SETATTR. 851 */ 852 saved_mask = sarg.vap->va_mask; 853 854 /* 855 * Check any possible conflict due to NBMAND locks. 856 * Get into critical region before VOP_GETATTR, so the 857 * size attribute is valid when checking conflicts. 858 */ 859 if (nbl_need_check(vp)) { 860 nbl_start_crit(vp, RW_READER); 861 in_crit = 1; 862 } 863 864 bva.va_mask = AT_UID|AT_SIZE; 865 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) { 866 status = puterrno4(error); 867 goto done; 868 } 869 870 if (in_crit) { 871 if (sarg.vap->va_size < bva.va_size) { 872 offset = sarg.vap->va_size; 873 length = bva.va_size - sarg.vap->va_size; 874 } else { 875 offset = bva.va_size; 876 length = sarg.vap->va_size - bva.va_size; 877 } 878 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0, 879 &ct)) { 880 status = NFS4ERR_LOCKED; 881 goto done; 882 } 883 } 884 885 if (crgetuid(cr) == bva.va_uid) { 886 sarg.vap->va_mask &= ~AT_SIZE; 887 bf.l_type = F_WRLCK; 888 bf.l_whence = 0; 889 bf.l_start = (off64_t)sarg.vap->va_size; 890 bf.l_len = 0; 891 bf.l_sysid = 0; 892 bf.l_pid = 0; 893 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE, 894 (offset_t)sarg.vap->va_size, cr, &ct); 895 } 896 } 897 898 if (!error && sarg.vap->va_mask != 0) 899 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct); 900 901 /* restore va_mask -- ufs_setattr clears AT_SIZE */ 902 if (saved_mask & AT_SIZE) 903 sarg.vap->va_mask |= AT_SIZE; 904 905 /* 906 * If an ACL was being set, it has been delayed until now, 907 * in order to set the mode (via the VOP_SETATTR() above) first. 908 */ 909 if (! error && ATTR_ISSET(fattrp->attrmask, ACL)) { 910 int i; 911 912 for (i = 0; i < ntov.attrcnt; i++) 913 if (ntov.amap[i] == FATTR4_ACL) 914 break; 915 if (i < ntov.attrcnt) { 916 error = (*nvmap[FATTR4_ACL].sv_getit)(NFS4ATTR_SETIT, 917 &sarg, &ntov.na[i]); 918 if (error == 0) { 919 ATTR_SET(*resp, ACL); 920 } else if (error == ENOTSUP) { 921 (void) rfs4_verify_attr(&sarg, resp, &ntov); 922 status = NFS4ERR_ATTRNOTSUPP; 923 goto done; 924 } 925 } else { 926 error = EINVAL; 927 } 928 } 929 930 if (! error && ATTR_ISSET(fattrp->attrmask, LAYOUT_HINT)) { 931 /* 932 * Store layout hint. Layout hint will be stored 933 * in file struct (which means it can only be set 934 * when the file is open). If layout hint is allowed 935 * for files not open, then it must be stored 936 * persistently. 937 * 938 * status assignment placates lint. it will 939 * be replaced with code to store the layout 940 * hint. 941 */ 942 status = NFS4_OK; 943 } 944 945 if (error) { 946 /* check if a monitor detected a delegation conflict */ 947 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) 948 status = NFS4ERR_DELAY; 949 else 950 status = puterrno4(error); 951 952 /* 953 * Set the response bitmap when setattr failed. 954 * If VOP_SETATTR partially succeeded, test by doing a 955 * VOP_GETATTR on the object and comparing the data 956 * to the setattr arguments. 957 */ 958 (void) rfs4_verify_attr(&sarg, resp, &ntov); 959 } else { 960 /* 961 * Force modified metadata out to stable storage. 962 */ 963 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct); 964 /* 965 * Set response bitmap 966 */ 967 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp); 968 } 969 970 /* Return early and already have a NFSv4 error */ 971 done: 972 /* 973 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr 974 * conversion sets both readable and writeable NFS4 attrs 975 * for AT_MTIME and AT_ATIME. The line below masks out 976 * unrequested attrs from the setattr result bitmap. This 977 * is placed after the done: label to catch the ATTRNOTSUP 978 * case. 979 */ 980 ATTRMAP_MASK(*resp, fattrp->attrmask); 981 982 if (in_crit) 983 nbl_end_crit(vp); 984 985 nfs4_ntov_table_free(&ntov, &sarg); 986 987 return (status); 988 } 989 990 /* ARGSUSED */ 991 void 992 mds_op_secinfonn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 993 compound_state_t *cs) 994 { 995 SECINFO_NO_NAME4res *respnn; 996 int dotdot; 997 998 DTRACE_NFSV4_1(op__secinfo__no__name__start, 999 struct compound_state *, cs); 1000 1001 respnn = &resop->nfs_resop4_u.opsecinfo_no_name; 1002 1003 /* 1004 * Current file handle (cfh) should have been set before 1005 * getting into this function. If not, return error. 1006 */ 1007 if (cs->vp == NULL) { 1008 *cs->statusp = respnn->status = NFS4ERR_NOFILEHANDLE; 1009 goto final; 1010 } 1011 1012 dotdot = 1013 (argop->nfs_argop4_u.opsecinfo_no_name == SECINFO_STYLE4_PARENT); 1014 1015 *cs->statusp = respnn->status = do_rfs4_op_secinfo(cs, NULL, 1016 dotdot, (SECINFO4res *)respnn); 1017 1018 final: 1019 DTRACE_NFSV4_2(op__secinfo__no__name__done, 1020 struct compound_state *, cs, 1021 SECINFO_NO_NAME4res *, respnn); 1022 } 1023 1024 /* ARGSUSED */ 1025 void 1026 mds_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1027 compound_state_t *cs) 1028 { 1029 SECINFO4res *resp; 1030 utf8string *utfnm; 1031 uint_t len, dotdot; 1032 char *nm; 1033 1034 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo; 1035 1036 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs, 1037 SECINFO4args *, args); 1038 1039 resp = &resop->nfs_resop4_u.opsecinfo; 1040 1041 /* 1042 * Current file handle (cfh) should have been set before 1043 * getting into this function. If not, return error. 1044 */ 1045 if (cs->vp == NULL) { 1046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1047 goto final; 1048 } 1049 if (cs->vp->v_type != VDIR) { 1050 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1051 goto final; 1052 } 1053 1054 /* 1055 * Verify the component name. If failed, error out, but 1056 * do not error out if the component name is a "..". 1057 * SECINFO will return its parents secinfo data for SECINFO "..". 1058 */ 1059 utfnm = &argop->nfs_argop4_u.opsecinfo.name; 1060 if (!utf8_dir_verify(utfnm)) { 1061 if (utfnm->utf8string_len != 2 || 1062 utfnm->utf8string_val[0] != '.' || 1063 utfnm->utf8string_val[1] != '.') { 1064 *cs->statusp = resp->status = NFS4ERR_INVAL; 1065 goto final; 1066 } 1067 dotdot = 1; 1068 } else 1069 dotdot = 0; 1070 1071 nm = utf8_to_str(utfnm, &len, NULL); 1072 if (nm == NULL) { 1073 *cs->statusp = resp->status = NFS4ERR_INVAL; 1074 goto final; 1075 } 1076 1077 if (len > MAXNAMELEN) { 1078 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1079 kmem_free(nm, len); 1080 goto final; 1081 } 1082 1083 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, dotdot, resp); 1084 1085 kmem_free(nm, len); 1086 1087 final: 1088 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs, 1089 SECINFO4res *, resp); 1090 } 1091 1092 /* 1093 * verify and nverify are exactly the same, except that nverify 1094 * succeeds when some argument changed, and verify succeeds when 1095 * when none changed. 1096 */ 1097 1098 /* ARGSUSED */ 1099 void 1100 mds_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1101 compound_state_t *cs) 1102 { 1103 VERIFY4args *args = &argop->nfs_argop4_u.opverify; 1104 VERIFY4res *resp = &resop->nfs_resop4_u.opverify; 1105 int error; 1106 struct nfs4_svgetit_arg sarg; 1107 struct statvfs64 sb; 1108 struct nfs4_ntov_table ntov; 1109 1110 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs, 1111 VERIFY4args *, args); 1112 1113 if (cs->vp == NULL) { 1114 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1115 goto final; 1116 } 1117 1118 sarg.sbp = &sb; 1119 nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs)); 1120 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 1121 &sarg, &ntov, NFS4ATTR_VERIT); 1122 if (resp->status != NFS4_OK) { 1123 /* 1124 * do_rfs4_set_attrs will try to verify systemwide attrs, 1125 * so could return -1 for "no match". 1126 */ 1127 if (resp->status == -1) 1128 resp->status = NFS4ERR_NOT_SAME; 1129 goto done; 1130 } 1131 error = rfs4_verify_attr(&sarg, NULL, &ntov); 1132 switch (error) { 1133 case 0: 1134 resp->status = NFS4_OK; 1135 break; 1136 case -1: 1137 resp->status = NFS4ERR_NOT_SAME; 1138 break; 1139 default: 1140 resp->status = puterrno4(error); 1141 break; 1142 } 1143 done: 1144 *cs->statusp = resp->status; 1145 nfs4_ntov_table_free(&ntov, &sarg); 1146 1147 final: 1148 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs, 1149 VERIFY4res *, resp); 1150 } 1151 1152 /* ARGSUSED */ 1153 void 1154 mds_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1155 compound_state_t *cs) 1156 { 1157 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify; 1158 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify; 1159 int error; 1160 struct nfs4_svgetit_arg sarg; 1161 struct statvfs64 sb; 1162 struct nfs4_ntov_table ntov; 1163 1164 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs, 1165 NVERIFY4args *, args); 1166 1167 if (cs->vp == NULL) { 1168 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1169 goto final; 1170 } 1171 sarg.sbp = &sb; 1172 nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs)); 1173 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs, 1174 &sarg, &ntov, NFS4ATTR_VERIT); 1175 if (resp->status != NFS4_OK) { 1176 /* 1177 * do_rfs4_set_attrs will try to verify systemwide attrs, 1178 * so could return -1 for "no match". 1179 */ 1180 if (resp->status == -1) 1181 resp->status = NFS4_OK; 1182 goto done; 1183 } 1184 error = rfs4_verify_attr(&sarg, NULL, &ntov); 1185 switch (error) { 1186 case 0: 1187 resp->status = NFS4ERR_SAME; 1188 break; 1189 case -1: 1190 resp->status = NFS4_OK; 1191 break; 1192 default: 1193 resp->status = puterrno4(error); 1194 break; 1195 } 1196 done: 1197 *cs->statusp = resp->status; 1198 nfs4_ntov_table_free(&ntov, &sarg); 1199 1200 final: 1201 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs, 1202 NVERIFY4res *, resp); 1203 1204 } 1205 1206 /* ARGSUSED */ 1207 void 1208 mds_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1209 compound_state_t *cs) 1210 { 1211 ACCESS4args *args = &argop->nfs_argop4_u.opaccess; 1212 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess; 1213 int error; 1214 vnode_t *vp; 1215 struct vattr va; 1216 int checkwriteperm; 1217 cred_t *cr = cs->cr; 1218 bslabel_t *clabel, *slabel; 1219 ts_label_t *tslabel; 1220 boolean_t admin_low_client; 1221 1222 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs, 1223 ACCESS4args *, args); 1224 1225 if (cs->vp == NULL) { 1226 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1227 goto final; 1228 } 1229 1230 ASSERT(cr != NULL); 1231 1232 vp = cs->vp; 1233 1234 /* 1235 * If the file system is exported read only, it is not appropriate 1236 * to check write permissions for regular files and directories. 1237 * Special files are interpreted by the client, so the underlying 1238 * permissions are sent back to the client for interpretation. 1239 */ 1240 if (rdonly4(cs->exi, cs->vp, req) && 1241 (vp->v_type == VREG || vp->v_type == VDIR)) 1242 checkwriteperm = 0; 1243 else 1244 checkwriteperm = 1; 1245 1246 /* 1247 * XXX 1248 * We need the mode so that we can correctly determine access 1249 * permissions relative to a mandatory lock file. Access to 1250 * mandatory lock files is denied on the server, so it might 1251 * as well be reflected to the server during the open. 1252 */ 1253 va.va_mask = AT_MODE; 1254 error = VOP_GETATTR(vp, &va, 0, cr, NULL); 1255 if (error) { 1256 *cs->statusp = resp->status = puterrno4(error); 1257 goto final; 1258 } 1259 resp->access = 0; 1260 resp->supported = 0; 1261 1262 if (is_system_labeled()) { 1263 ASSERT(req->rq_label != NULL); 1264 clabel = req->rq_label; 1265 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *, 1266 "got client label from request(1)", 1267 struct svc_req *, req); 1268 if (!blequal(&l_admin_low->tsl_label, clabel)) { 1269 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) { 1270 *cs->statusp = resp->status = puterrno4(EACCES); 1271 goto final; 1272 } 1273 slabel = label2bslabel(tslabel); 1274 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel, 1275 char *, "got server label(1) for vp(2)", 1276 bslabel_t *, slabel, vnode_t *, vp); 1277 1278 admin_low_client = B_FALSE; 1279 } else 1280 admin_low_client = B_TRUE; 1281 } 1282 1283 if (args->access & ACCESS4_READ) { 1284 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL); 1285 if (!error && !MANDLOCK(vp, va.va_mode) && 1286 (!is_system_labeled() || admin_low_client || 1287 bldominates(clabel, slabel))) 1288 resp->access |= ACCESS4_READ; 1289 resp->supported |= ACCESS4_READ; 1290 } 1291 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) { 1292 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1293 if (!error && (!is_system_labeled() || admin_low_client || 1294 bldominates(clabel, slabel))) 1295 resp->access |= ACCESS4_LOOKUP; 1296 resp->supported |= ACCESS4_LOOKUP; 1297 } 1298 if (checkwriteperm && 1299 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) { 1300 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1301 if (!error && !MANDLOCK(vp, va.va_mode) && 1302 (!is_system_labeled() || admin_low_client || 1303 blequal(clabel, slabel))) 1304 resp->access |= 1305 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND)); 1306 resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND); 1307 } 1308 1309 if (checkwriteperm && 1310 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) { 1311 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL); 1312 if (!error && (!is_system_labeled() || admin_low_client || 1313 blequal(clabel, slabel))) 1314 resp->access |= ACCESS4_DELETE; 1315 resp->supported |= ACCESS4_DELETE; 1316 } 1317 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) { 1318 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL); 1319 if (!error && !MANDLOCK(vp, va.va_mode) && 1320 (!is_system_labeled() || admin_low_client || 1321 bldominates(clabel, slabel))) 1322 resp->access |= ACCESS4_EXECUTE; 1323 resp->supported |= ACCESS4_EXECUTE; 1324 } 1325 1326 if (is_system_labeled() && !admin_low_client) 1327 label_rele(tslabel); 1328 1329 *cs->statusp = resp->status = NFS4_OK; 1330 1331 final: 1332 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs, 1333 ACCESS4res *, resp); 1334 } 1335 1336 /* ARGSUSED */ 1337 static void 1338 mds_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1339 compound_state_t *cs) 1340 { 1341 COMMIT4args *args = &argop->nfs_argop4_u.opcommit; 1342 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit; 1343 int error; 1344 vnode_t *vp = cs->vp; 1345 cred_t *cr = cs->cr; 1346 vattr_t va; 1347 caller_context_t ct; 1348 1349 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, 1350 COMMIT4args *, args); 1351 1352 if (vp == NULL) { 1353 /* 1354 * XXX kludge: fake the commit if we are a data server 1355 * This will be replaced once we have nnop_commit(). 1356 */ 1357 if (cs->nn != NULL) { 1358 *cs->statusp = resp->status = NFS4_OK; 1359 resp->writeverf = cs->instp->Write4verf; 1360 } else { 1361 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1362 } 1363 goto final; 1364 } 1365 if (cs->access == CS_ACCESS_DENIED) { 1366 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1367 goto final; 1368 } 1369 1370 if (args->offset + args->count < args->offset) { 1371 *cs->statusp = resp->status = NFS4ERR_INVAL; 1372 goto final; 1373 } 1374 1375 ct.cc_sysid = 0; 1376 ct.cc_pid = 0; 1377 ct.cc_caller_id = cs->instp->caller_id; 1378 ct.cc_flags = CC_DONTBLOCK; 1379 1380 va.va_mask = AT_UID; 1381 error = VOP_GETATTR(vp, &va, 0, cr, &ct); 1382 1383 /* 1384 * If we can't get the attributes, then we can't do the 1385 * right access checking. So, we'll fail the request. 1386 */ 1387 if (error) { 1388 *cs->statusp = resp->status = puterrno4(error); 1389 goto final; 1390 } 1391 if (rdonly4(cs->exi, cs->vp, req)) { 1392 *cs->statusp = resp->status = NFS4ERR_ROFS; 1393 goto final; 1394 } 1395 1396 if (vp->v_type != VREG) { 1397 if (vp->v_type == VDIR) 1398 resp->status = NFS4ERR_ISDIR; 1399 else 1400 resp->status = NFS4ERR_INVAL; 1401 *cs->statusp = resp->status; 1402 goto final; 1403 } 1404 1405 if (crgetuid(cr) != va.va_uid && 1406 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, &ct))) { 1407 *cs->statusp = resp->status = puterrno4(error); 1408 goto final; 1409 } 1410 1411 error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, &ct); 1412 if (!error) 1413 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct); 1414 1415 if (error) { 1416 *cs->statusp = resp->status = puterrno4(error); 1417 goto final; 1418 } 1419 1420 *cs->statusp = resp->status = NFS4_OK; 1421 resp->writeverf = cs->instp->Write4verf; 1422 1423 final: 1424 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, 1425 COMMIT4res *, resp); 1426 } 1427 1428 /* 1429 * rfs4_op_mknod is called from rfs4_op_create after all initial verification 1430 * was completed. It does the nfsv4 create for special files. 1431 * 1432 * nfsv4 create is used to create non-regular files. For regular files, 1433 * use nfsv4 open. 1434 */ 1435 /* ARGSUSED */ 1436 static void 1437 mds_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1438 compound_state_t *cs) 1439 { 1440 CREATE4args *args = &argop->nfs_argop4_u.opcreate; 1441 CREATE4res *resp = &resop->nfs_resop4_u.opcreate; 1442 int error; 1443 struct vattr bva, iva, iva2, ava, *vap; 1444 cred_t *cr = cs->cr; 1445 vnode_t *dvp = cs->vp; 1446 vnode_t *vp = NULL; 1447 vnode_t *realvp; 1448 char *nm, *lnm; 1449 uint_t len, llen; 1450 int syncval = 0; 1451 struct nfs4_svgetit_arg sarg; 1452 struct nfs4_ntov_table ntov; 1453 struct statvfs64 sb; 1454 nfsstat4 status; 1455 caller_context_t ct; 1456 1457 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs, 1458 CREATE4args *, args); 1459 1460 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1461 1462 if (dvp == NULL) { 1463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1464 goto final; 1465 } 1466 1467 /* 1468 * If there is an unshared filesystem mounted on this vnode, 1469 * do not allow to create an object in this directory. 1470 */ 1471 if (vn_ismntpt(dvp)) { 1472 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1473 goto final; 1474 } 1475 1476 ct.cc_sysid = 0; 1477 ct.cc_pid = 0; 1478 ct.cc_caller_id = cs->instp->caller_id; 1479 ct.cc_flags = CC_DONTBLOCK; 1480 1481 /* Verify that type is correct */ 1482 switch (args->type) { 1483 case NF4LNK: 1484 case NF4BLK: 1485 case NF4CHR: 1486 case NF4SOCK: 1487 case NF4FIFO: 1488 case NF4DIR: 1489 break; 1490 default: 1491 *cs->statusp = resp->status = NFS4ERR_BADTYPE; 1492 goto final; 1493 }; 1494 1495 if (cs->access == CS_ACCESS_DENIED) { 1496 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1497 goto final; 1498 } 1499 if (dvp->v_type != VDIR) { 1500 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1501 goto final; 1502 } 1503 if (!utf8_dir_verify(&args->objname)) { 1504 *cs->statusp = resp->status = NFS4ERR_INVAL; 1505 goto final; 1506 } 1507 1508 if (rdonly4(cs->exi, cs->vp, req)) { 1509 *cs->statusp = resp->status = NFS4ERR_ROFS; 1510 goto final; 1511 } 1512 1513 /* 1514 * Name of newly created object 1515 */ 1516 nm = utf8_to_fn(&args->objname, &len, NULL); 1517 if (nm == NULL) { 1518 *cs->statusp = resp->status = NFS4ERR_INVAL; 1519 goto final; 1520 } 1521 1522 if (len > MAXNAMELEN) { 1523 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1524 kmem_free(nm, len); 1525 goto final; 1526 } 1527 1528 sarg.sbp = &sb; 1529 nfs4_ntov_table_init(&ntov, RFS4_ATTRVERS(cs)); 1530 1531 status = do_rfs4_set_attrs(&resp->attrset, 1532 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT); 1533 1534 if (sarg.vap->va_mask == 0 && status == NFS4_OK) 1535 status = NFS4ERR_INVAL; 1536 1537 if (status != NFS4_OK) { 1538 *cs->statusp = resp->status = status; 1539 kmem_free(nm, len); 1540 nfs4_ntov_table_free(&ntov, &sarg); 1541 1542 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1543 goto final; 1544 } 1545 1546 /* Get "before" change value */ 1547 bva.va_mask = AT_CTIME|AT_SEQ; 1548 error = VOP_GETATTR(dvp, &bva, 0, cr, &ct); 1549 if (error) { 1550 *cs->statusp = resp->status = puterrno4(error); 1551 kmem_free(nm, len); 1552 nfs4_ntov_table_free(&ntov, &sarg); 1553 1554 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1555 goto final; 1556 } 1557 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime) 1558 1559 vap = sarg.vap; 1560 1561 /* 1562 * Set default initial values for attributes when not specified 1563 * in createattrs. 1564 */ 1565 if ((vap->va_mask & AT_UID) == 0) { 1566 vap->va_uid = crgetuid(cr); 1567 vap->va_mask |= AT_UID; 1568 } 1569 if ((vap->va_mask & AT_GID) == 0) { 1570 vap->va_gid = crgetgid(cr); 1571 vap->va_mask |= AT_GID; 1572 } 1573 1574 vap->va_mask |= AT_TYPE; 1575 switch (args->type) { 1576 case NF4DIR: 1577 vap->va_type = VDIR; 1578 if ((vap->va_mask & AT_MODE) == 0) { 1579 vap->va_mode = 0700; /* default: owner rwx only */ 1580 vap->va_mask |= AT_MODE; 1581 } 1582 error = VOP_MKDIR(dvp, nm, vap, &vp, cr, &ct, 0, NULL); 1583 if (error) 1584 break; 1585 1586 /* 1587 * Get the initial "after" sequence number, if it fails, 1588 * set to zero 1589 */ 1590 iva.va_mask = AT_SEQ; 1591 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct)) 1592 iva.va_seq = 0; 1593 break; 1594 case NF4LNK: 1595 vap->va_type = VLNK; 1596 if ((vap->va_mask & AT_MODE) == 0) { 1597 vap->va_mode = 0700; /* default: owner rwx only */ 1598 vap->va_mask |= AT_MODE; 1599 } 1600 1601 /* 1602 * symlink names must be treated as data 1603 */ 1604 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL); 1605 1606 if (lnm == NULL) { 1607 *cs->statusp = resp->status = NFS4ERR_INVAL; 1608 kmem_free(nm, len); 1609 nfs4_ntov_table_free(&ntov, &sarg); 1610 resp->attrset = 1611 NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1612 goto final; 1613 } 1614 1615 if (llen > MAXPATHLEN) { 1616 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1617 kmem_free(nm, len); 1618 kmem_free(lnm, llen); 1619 nfs4_ntov_table_free(&ntov, &sarg); 1620 resp->attrset = 1621 NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1622 goto final; 1623 } 1624 1625 error = VOP_SYMLINK(dvp, nm, vap, lnm, cr, &ct, 0); 1626 if (lnm != NULL) 1627 kmem_free(lnm, llen); 1628 if (error) 1629 break; 1630 1631 /* 1632 * Get the initial "after" sequence number, if it fails, 1633 * set to zero 1634 */ 1635 iva.va_mask = AT_SEQ; 1636 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct)) 1637 iva.va_seq = 0; 1638 1639 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, 1640 &ct, 0, NULL); 1641 if (error) 1642 break; 1643 1644 /* 1645 * va_seq is not safe over VOP calls, check it again 1646 * if it has changed zero out iva to force atomic = FALSE. 1647 */ 1648 iva2.va_mask = AT_SEQ; 1649 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, &ct) || 1650 iva2.va_seq != iva.va_seq) 1651 iva.va_seq = 0; 1652 break; 1653 default: 1654 /* 1655 * probably a special file. 1656 */ 1657 if ((vap->va_mask & AT_MODE) == 0) { 1658 vap->va_mode = 0600; /* default: owner rw only */ 1659 vap->va_mask |= AT_MODE; 1660 } 1661 syncval = FNODSYNC; 1662 /* 1663 * We know this will only generate one VOP call 1664 */ 1665 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm); 1666 1667 if (vp == NULL) { 1668 kmem_free(nm, len); 1669 nfs4_ntov_table_free(&ntov, &sarg); 1670 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1671 goto final; 1672 } 1673 1674 /* 1675 * Get the initial "after" sequence number, if it fails, 1676 * set to zero 1677 */ 1678 iva.va_mask = AT_SEQ; 1679 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, &ct)) 1680 iva.va_seq = 0; 1681 1682 break; 1683 } 1684 kmem_free(nm, len); 1685 1686 if (error) { 1687 *cs->statusp = resp->status = puterrno4(error); 1688 } 1689 1690 /* 1691 * Force modified data and metadata out to stable storage. 1692 */ 1693 (void) VOP_FSYNC(dvp, 0, cr, &ct); 1694 1695 if (resp->status != NFS4_OK) { 1696 if (vp != NULL) 1697 VN_RELE(vp); 1698 nfs4_ntov_table_free(&ntov, &sarg); 1699 resp->attrset = NFS4_EMPTY_ATTRMAP(RFS4_ATTRVERS(cs)); 1700 goto final; 1701 } 1702 1703 /* 1704 * Finish setup of cinfo response, "before" value already set. 1705 * Get "after" change value, if it fails, simply return the 1706 * before value. 1707 */ 1708 ava.va_mask = AT_CTIME|AT_SEQ; 1709 if (VOP_GETATTR(dvp, &ava, 0, cr, &ct)) { 1710 ava.va_ctime = bva.va_ctime; 1711 ava.va_seq = 0; 1712 } 1713 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime); 1714 1715 /* 1716 * True verification that object was created with correct 1717 * attrs is impossible. The attrs could have been changed 1718 * immediately after object creation. If attributes did 1719 * not verify, the only recourse for the server is to 1720 * destroy the object. Maybe if some attrs (like gid) 1721 * are set incorrectly, the object should be destroyed; 1722 * however, seems bad as a default policy. Do we really 1723 * want to destroy an object over one of the times not 1724 * verifying correctly? For these reasons, the server 1725 * currently sets bits in attrset for createattrs 1726 * that were set; however, no verification is done. 1727 * 1728 * vmask_to_nmask accounts for vattr bits set on create 1729 * [do_rfs4_set_attrs() only sets resp bits for 1730 * non-vattr/vfs bits.] 1731 * Mask off any bits set by default so as not to return 1732 * more attrset bits than were requested in createattrs 1733 */ 1734 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset, 1735 RFS4_ATTRVERS(cs)); 1736 ATTRMAP_MASK(resp->attrset, args->createattrs.attrmask); 1737 nfs4_ntov_table_free(&ntov, &sarg); 1738 1739 error = mknfs41_fh(&cs->fh, vp, cs->exi); 1740 if (error) { 1741 *cs->statusp = resp->status = puterrno4(error); 1742 } 1743 1744 /* 1745 * The cinfo.atomic = TRUE only if we got no errors, we have 1746 * non-zero va_seq's, and it has incremented by exactly one 1747 * during the creation and it didn't change during the VOP_LOOKUP 1748 * or VOP_FSYNC. 1749 */ 1750 if (!error && bva.va_seq && iva.va_seq && ava.va_seq && 1751 iva.va_seq == (bva.va_seq + 1) && 1752 iva.va_seq == ava.va_seq) 1753 resp->cinfo.atomic = TRUE; 1754 else 1755 resp->cinfo.atomic = FALSE; 1756 1757 /* 1758 * Force modified metadata out to stable storage. 1759 * 1760 * if a underlying vp exists, pass it to VOP_FSYNC 1761 */ 1762 if (VOP_REALVP(vp, &realvp, &ct) == 0) 1763 (void) VOP_FSYNC(realvp, syncval, cr, &ct); 1764 else 1765 (void) VOP_FSYNC(vp, syncval, cr, &ct); 1766 1767 if (resp->status != NFS4_OK) { 1768 VN_RELE(vp); 1769 goto final; 1770 } 1771 if (cs->vp) 1772 VN_RELE(cs->vp); 1773 1774 cs->vp = vp; 1775 *cs->statusp = resp->status = NFS4_OK; 1776 1777 final: 1778 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs, 1779 CREATE4res *, resp); 1780 } 1781 1782 1783 /*ARGSUSED*/ 1784 static void 1785 mds_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1786 compound_state_t *cs) 1787 { 1788 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn; 1789 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn; 1790 rfs4_deleg_state_t *dsp; 1791 nfsstat4 status; 1792 1793 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs, 1794 DELEGRETURN4args *, args); 1795 1796 status = rfs4_get_deleg_state(cs, &args->deleg_stateid, &dsp); 1797 resp->status = *cs->statusp = status; 1798 if (status != NFS4_OK) 1799 goto final; 1800 1801 /* Ensure specified filehandle matches */ 1802 if (cs->vp != dsp->rds_finfo->rf_vp) { 1803 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID; 1804 } else 1805 rfs4_return_deleg(dsp, FALSE); 1806 1807 rfs4_update_lease(dsp->rds_client); 1808 1809 rfs4_deleg_state_rele(dsp); 1810 1811 final: 1812 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs, 1813 DELEGRETURN4res *, resp); 1814 } 1815 1816 1817 1818 /* ARGSUSED */ 1819 static void 1820 mds_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1821 compound_state_t *cs) 1822 { 1823 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr; 1824 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr; 1825 struct nfs4_svgetit_arg sarg; 1826 struct statvfs64 sb; 1827 nfsstat4 status; 1828 1829 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs, 1830 GETATTR4args *, args); 1831 1832 if (cs->vp == NULL) { 1833 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1834 goto final; 1835 } 1836 1837 if (cs->access == CS_ACCESS_DENIED) { 1838 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1839 goto final; 1840 } 1841 1842 sarg.sbp = &sb; 1843 sarg.cs = cs; 1844 1845 status = attrmap4_to_vattrmask(&args->attr_request, &sarg); 1846 if (status == NFS4_OK) { 1847 status = bitmap4_get_sysattrs(&sarg); 1848 if (status == NFS4_OK) 1849 status = do_rfs4_op_getattr(&args->attr_request, 1850 &resp->obj_attributes, &sarg); 1851 } 1852 *cs->statusp = resp->status = status; 1853 1854 final: 1855 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs, 1856 GETATTR4res *, resp); 1857 } 1858 1859 /*ARGSUSED*/ 1860 void 1861 mds_op_getattr_free(nfs_resop4 *resop, compound_state_t *cs) 1862 { 1863 /* Common function for NFSv4.0 and NFSv4.1 */ 1864 rfs4_op_getattr_free(resop); 1865 } 1866 1867 /* ARGSUSED */ 1868 static void 1869 mds_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1870 compound_state_t *cs) 1871 { 1872 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh; 1873 1874 DTRACE_NFSV4_1(op__getfh__start, 1875 struct compound_state *, cs); 1876 1877 if (cs->vp == NULL) { 1878 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1879 goto final; 1880 } 1881 if (cs->access == CS_ACCESS_DENIED) { 1882 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1883 goto final; 1884 } 1885 1886 resp->object.nfs_fh4_val = 1887 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP); 1888 nfs_fh4_copy(&cs->fh, &resp->object); 1889 *cs->statusp = resp->status = NFS4_OK; 1890 1891 final: 1892 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs, 1893 GETFH4res *, resp); 1894 } 1895 1896 /*ARGSUSED*/ 1897 static void 1898 mds_op_getfh_free(nfs_resop4 *resop, compound_state_t *cs) 1899 { 1900 /* Common function for NFSv4.0 and NFSv4.1 */ 1901 rfs4_op_getfh_free(resop); 1902 } 1903 1904 /* 1905 * link: args: SAVED_FH: file, CURRENT_FH: target directory 1906 * res: status. If success - CURRENT_FH unchanged, return change_info 1907 */ 1908 /* ARGSUSED */ 1909 static void 1910 mds_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 1911 compound_state_t *cs) 1912 { 1913 LINK4args *args = &argop->nfs_argop4_u.oplink; 1914 LINK4res *resp = &resop->nfs_resop4_u.oplink; 1915 int error; 1916 vnode_t *vp; 1917 vnode_t *dvp; 1918 struct vattr bdva, idva, adva; 1919 char *nm; 1920 uint_t len; 1921 caller_context_t ct; 1922 1923 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs, 1924 LINK4args *, args); 1925 1926 /* SAVED_FH: source object */ 1927 vp = cs->saved_vp; 1928 if (vp == NULL) { 1929 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1930 goto final; 1931 } 1932 1933 /* CURRENT_FH: target directory */ 1934 dvp = cs->vp; 1935 if (dvp == NULL) { 1936 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 1937 goto final; 1938 } 1939 1940 /* 1941 * If there is a non-shared filesystem mounted on this vnode, 1942 * do not allow to link any file in this directory. 1943 */ 1944 if (vn_ismntpt(dvp)) { 1945 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1946 goto final; 1947 } 1948 1949 if (cs->access == CS_ACCESS_DENIED) { 1950 *cs->statusp = resp->status = NFS4ERR_ACCESS; 1951 goto final; 1952 } 1953 1954 /* Check source object's type validity */ 1955 if (vp->v_type == VDIR) { 1956 *cs->statusp = resp->status = NFS4ERR_ISDIR; 1957 goto final; 1958 } 1959 1960 /* Check target directory's type */ 1961 if (dvp->v_type != VDIR) { 1962 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 1963 goto final; 1964 } 1965 1966 if (cs->saved_exi != cs->exi) { 1967 *cs->statusp = resp->status = NFS4ERR_XDEV; 1968 goto final; 1969 } 1970 1971 if (!utf8_dir_verify(&args->newname)) { 1972 *cs->statusp = resp->status = NFS4ERR_INVAL; 1973 goto final; 1974 } 1975 1976 nm = utf8_to_fn(&args->newname, &len, NULL); 1977 if (nm == NULL) { 1978 *cs->statusp = resp->status = NFS4ERR_INVAL; 1979 goto final; 1980 } 1981 1982 if (len > MAXNAMELEN) { 1983 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 1984 kmem_free(nm, len); 1985 goto final; 1986 } 1987 1988 if (rdonly4(cs->exi, cs->vp, req)) { 1989 *cs->statusp = resp->status = NFS4ERR_ROFS; 1990 kmem_free(nm, len); 1991 goto final; 1992 } 1993 1994 ct.cc_sysid = 0; 1995 ct.cc_pid = 0; 1996 ct.cc_caller_id = cs->instp->caller_id; 1997 ct.cc_flags = CC_DONTBLOCK; 1998 1999 /* Get "before" change value */ 2000 bdva.va_mask = AT_CTIME|AT_SEQ; 2001 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct); 2002 if (error) { 2003 *cs->statusp = resp->status = puterrno4(error); 2004 kmem_free(nm, len); 2005 goto final; 2006 } 2007 2008 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 2009 2010 error = VOP_LINK(dvp, vp, nm, cs->cr, &ct, 0); 2011 2012 kmem_free(nm, len); 2013 2014 /* 2015 * Get the initial "after" sequence number, if it fails, set to zero 2016 */ 2017 idva.va_mask = AT_SEQ; 2018 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct)) 2019 idva.va_seq = 0; 2020 2021 /* 2022 * Force modified data and metadata out to stable storage. 2023 */ 2024 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, &ct); 2025 (void) VOP_FSYNC(dvp, 0, cs->cr, &ct); 2026 2027 if (error) { 2028 *cs->statusp = resp->status = puterrno4(error); 2029 goto final; 2030 } 2031 2032 /* 2033 * Get "after" change value, if it fails, simply return the 2034 * before value. 2035 */ 2036 adva.va_mask = AT_CTIME|AT_SEQ; 2037 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) { 2038 adva.va_ctime = bdva.va_ctime; 2039 adva.va_seq = 0; 2040 } 2041 2042 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 2043 2044 /* 2045 * The cinfo.atomic = TRUE only if we have 2046 * non-zero va_seq's, and it has incremented by exactly one 2047 * during the VOP_LINK and it didn't change during the VOP_FSYNC. 2048 */ 2049 if (bdva.va_seq && idva.va_seq && adva.va_seq && 2050 idva.va_seq == (bdva.va_seq + 1) && 2051 idva.va_seq == adva.va_seq) 2052 resp->cinfo.atomic = TRUE; 2053 else 2054 resp->cinfo.atomic = FALSE; 2055 2056 *cs->statusp = resp->status = NFS4_OK; 2057 2058 final: 2059 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs, 2060 LINK4res *, resp); 2061 } 2062 2063 /* 2064 * Used by mds_op_lookup and mds_op_lookupp to do the actual work. 2065 */ 2066 2067 /* ARGSUSED */ 2068 static nfsstat4 2069 mds_do_lookup(char *nm, uint_t buflen, struct svc_req *req, 2070 struct compound_state *cs) 2071 { 2072 int error; 2073 int different_export = 0; 2074 vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL; 2075 struct exportinfo *exi = NULL, *pre_exi = NULL; 2076 nfsstat4 stat; 2077 fid_t fid; 2078 int attrdir, dotdot, walk; 2079 bool_t is_newvp = FALSE; 2080 caller_context_t ct; 2081 nfs41_fh_fmt_t *fhp; 2082 2083 fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val; 2084 2085 attrdir = ((cs->vp->v_flag & V_XATTRDIR) == V_XATTRDIR) 2086 ? FH41_ATTRDIR : 0; 2087 2088 ASSERT(FH41_GET_FLAG(fhp, FH41_ATTRDIR) == attrdir); 2089 2090 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); 2091 2092 /* 2093 * If dotdotting, then need to check whether it's 2094 * above the root of a filesystem, or above an 2095 * export point. 2096 */ 2097 if (dotdot) { 2098 2099 /* 2100 * If dotdotting at the root of a filesystem, then 2101 * need to traverse back to the mounted-on filesystem 2102 * and do the dotdot lookup there. 2103 */ 2104 if (cs->vp->v_flag & VROOT) { 2105 2106 /* 2107 * If at the system root, then can 2108 * go up no further. 2109 */ 2110 if (VN_CMP(cs->vp, rootdir)) 2111 return (puterrno4(ENOENT)); 2112 2113 /* 2114 * Traverse back to the mounted-on filesystem 2115 */ 2116 cs->vp = untraverse(cs->vp); 2117 2118 /* 2119 * Set the different_export flag so we remember 2120 * to pick up a new exportinfo entry for 2121 * this new filesystem. 2122 */ 2123 different_export = 1; 2124 } else { 2125 2126 /* 2127 * If dotdotting above an export point then set 2128 * the different_export to get new export info. 2129 */ 2130 different_export = nfs_exported(cs->exi, cs->vp); 2131 } 2132 } 2133 2134 ct.cc_sysid = 0; 2135 ct.cc_pid = 0; 2136 ct.cc_caller_id = cs->instp->caller_id; 2137 ct.cc_flags = CC_DONTBLOCK; 2138 2139 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr, 2140 &ct, 0, NULL); 2141 if (error) 2142 return (puterrno4(error)); 2143 2144 /* 2145 * If the vnode is in a pseudo filesystem, check whether it is visible. 2146 * 2147 * XXX if the vnode is a symlink and it is not visible in 2148 * a pseudo filesystem, return ENOENT (not following symlink). 2149 * V4 client can not mount such symlink. 2150 * 2151 * In the same exported filesystem, if the security flavor used 2152 * is not an explicitly shared flavor, limit the view to the visible 2153 * list entries only. This is not a WRONGSEC case because it's already 2154 * checked via PUTROOTFH/PUTPUBFH or PUTFH. 2155 */ 2156 if (!different_export && 2157 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || 2158 cs->access & CS_ACCESS_LIMITED)) { 2159 if (! nfs_visible(cs->exi, vp, &different_export)) { 2160 VN_RELE(vp); 2161 return (puterrno4(ENOENT)); 2162 } 2163 } 2164 2165 /* 2166 * If it's a mountpoint, then traverse it. 2167 */ 2168 if (vn_ismntpt(vp)) { 2169 pre_exi = cs->exi; /* save pre-traversed exportinfo */ 2170 pre_tvp = vp; /* save pre-traversed vnode */ 2171 2172 /* 2173 * hold pre_tvp to counteract rele by traverse. We will 2174 * need pre_tvp below if checkexport4 fails 2175 */ 2176 VN_HOLD(pre_tvp); 2177 tvp = vp; 2178 if ((error = traverse(&tvp)) != 0) { 2179 VN_RELE(vp); 2180 VN_RELE(pre_tvp); 2181 return (puterrno4(error)); 2182 } 2183 vp = tvp; 2184 different_export = 1; 2185 2186 } else if (vp->v_vfsp != cs->vp->v_vfsp) { 2187 /* 2188 * The vfsp comparison is to handle the case where 2189 * a LOFS mount is shared. lo_lookup traverses mount points, 2190 * and NFS is unaware of local fs transistions because 2191 * v_vfsmountedhere isn't set. For this special LOFS case, 2192 * the dir and the obj returned by lookup will have different 2193 * vfs ptrs. 2194 */ 2195 different_export = 1; 2196 } 2197 2198 if (different_export) { 2199 bzero(&fid, sizeof (fid)); 2200 fid.fid_len = MAXFIDSZ; 2201 error = vop_fid_pseudo(vp, &fid); 2202 if (error) { 2203 VN_RELE(vp); 2204 if (pre_tvp) 2205 VN_RELE(pre_tvp); 2206 return (puterrno4(error)); 2207 } 2208 2209 if (dotdot) 2210 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); 2211 else 2212 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp); 2213 2214 if (exi == NULL) { 2215 if (pre_tvp) { 2216 /* 2217 * If this vnode is a mounted-on vnode, 2218 * but the mounted-on file system is not 2219 * exported, send back the filehandle for 2220 * the mounted-on vnode, not the root of 2221 * the mounted-on file system. 2222 */ 2223 VN_RELE(vp); 2224 vp = pre_tvp; 2225 exi = pre_exi; 2226 } else { 2227 VN_RELE(vp); 2228 return (puterrno4(EACCES)); 2229 } 2230 } else if (pre_tvp) { 2231 /* we're done with pre_tvp now. release extra hold */ 2232 VN_RELE(pre_tvp); 2233 } 2234 2235 cs->exi = exi; 2236 2237 /* 2238 * Now do a checkauth4. 2239 * 2240 * Checking here since the client/principle may not have 2241 * access to the cs->exi exported file system. 2242 * 2243 * If the client has access we also need to validate 2244 * the principle since it may have been re-mapped. 2245 * 2246 * We start with a new credential as a previous call to 2247 * checkauth4(), via a PUT*FH operation, wrote over cs->cr. 2248 */ 2249 crfree(cs->cr); 2250 cs->cr = crdup(cs->basecr); 2251 2252 if (cs->vp) 2253 oldvp = cs->vp; 2254 cs->vp = vp; 2255 is_newvp = TRUE; 2256 2257 stat = call_checkauth4(cs, req); 2258 if (stat != NFS4_OK) { 2259 VN_RELE(cs->vp); 2260 cs->vp = oldvp; 2261 return (stat); 2262 } 2263 } 2264 2265 /* 2266 * After various NFS checks, do a label check on the path 2267 * component. The label on this path should either be the 2268 * global zone's label or a zone's label. We are only 2269 * interested in the zone's label because exported files 2270 * in global zone is accessible (though read-only) to 2271 * clients. The exportability/visibility check is already 2272 * done before reaching this code. 2273 */ 2274 if (is_system_labeled()) { 2275 bslabel_t *clabel; 2276 2277 ASSERT(req->rq_label != NULL); 2278 clabel = req->rq_label; 2279 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *, 2280 "got client label from request(1)", struct svc_req *, req); 2281 2282 if (!blequal(&l_admin_low->tsl_label, clabel)) { 2283 if (!do_rfs_label_check(clabel, vp, 2284 DOMINANCE_CHECK, cs->exi)) { 2285 error = EACCES; 2286 goto err_out; 2287 } 2288 } else { 2289 /* 2290 * We grant access to admin_low label clients 2291 * only if the client is trusted, i.e. also 2292 * running Solaris Trusted Extension. 2293 */ 2294 struct sockaddr *ca; 2295 int addr_type; 2296 void *ipaddr; 2297 tsol_tpc_t *tp; 2298 2299 ca = (struct sockaddr *)svc_getrpccaller( 2300 req->rq_xprt)->buf; 2301 if (ca->sa_family == AF_INET) { 2302 addr_type = IPV4_VERSION; 2303 ipaddr = &((struct sockaddr_in *)ca)->sin_addr; 2304 } else if (ca->sa_family == AF_INET6) { 2305 addr_type = IPV6_VERSION; 2306 ipaddr = &((struct sockaddr_in6 *) 2307 ca)->sin6_addr; 2308 } 2309 tp = find_tpc(ipaddr, addr_type, B_FALSE); 2310 if (tp == NULL || tp->tpc_tp.tp_doi != 2311 l_admin_low->tsl_doi || tp->tpc_tp.host_type != 2312 SUN_CIPSO) { 2313 error = EACCES; 2314 goto err_out; 2315 } 2316 } 2317 } 2318 2319 error = mknfs41_fh(&cs->fh, vp, cs->exi); 2320 2321 err_out: 2322 if (error) { 2323 if (is_newvp) { 2324 VN_RELE(cs->vp); 2325 cs->vp = oldvp; 2326 } else 2327 VN_RELE(vp); 2328 return (puterrno4(error)); 2329 } 2330 2331 if (!is_newvp) { 2332 if (cs->vp) 2333 VN_RELE(cs->vp); 2334 cs->vp = vp; 2335 } else if (oldvp) 2336 VN_RELE(oldvp); 2337 2338 /* 2339 * if did lookup on attrdir and didn't lookup .., set named 2340 * attr fh flag 2341 */ 2342 if (attrdir && ! dotdot) 2343 FH41_SET_FLAG(fhp, FH41_NAMEDATTR); 2344 2345 /* Assume false for now, open proc will set this */ 2346 cs->mandlock = FALSE; 2347 2348 return (NFS4_OK); 2349 } 2350 2351 /* ARGSUSED */ 2352 static void 2353 mds_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2354 compound_state_t *cs) 2355 { 2356 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup; 2357 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup; 2358 char *nm; 2359 uint_t len; 2360 2361 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs, 2362 LOOKUP4args *, args); 2363 2364 if (cs->vp == NULL) { 2365 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2366 goto final; 2367 } 2368 2369 if (cs->vp->v_type == VLNK) { 2370 *cs->statusp = resp->status = NFS4ERR_SYMLINK; 2371 goto final; 2372 } 2373 2374 if (cs->vp->v_type != VDIR) { 2375 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2376 goto final; 2377 } 2378 2379 if (!utf8_dir_verify(&args->objname)) { 2380 *cs->statusp = resp->status = NFS4ERR_INVAL; 2381 goto final; 2382 } 2383 2384 nm = utf8_to_str(&args->objname, &len, NULL); 2385 if (nm == NULL) { 2386 *cs->statusp = resp->status = NFS4ERR_INVAL; 2387 goto final; 2388 } 2389 2390 if (len > MAXNAMELEN) { 2391 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 2392 kmem_free(nm, len); 2393 goto final; 2394 } 2395 2396 *cs->statusp = resp->status = mds_do_lookup(nm, len, req, cs); 2397 2398 kmem_free(nm, len); 2399 2400 final: 2401 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs, 2402 LOOKUP4res *, resp); 2403 } 2404 2405 /* ARGSUSED */ 2406 static void 2407 mds_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 2408 compound_state_t *cs) 2409 { 2410 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp; 2411 2412 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs); 2413 2414 if (cs->vp == NULL) { 2415 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2416 goto final; 2417 } 2418 2419 if (cs->vp->v_type != VDIR) { 2420 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 2421 goto final; 2422 } 2423 2424 *cs->statusp = resp->status = mds_do_lookup("..", 3, req, cs); 2425 2426 /* 2427 * From NFSV4 Specification, LOOKUPP should not check for 2428 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead. 2429 */ 2430 if (resp->status == NFS4ERR_WRONGSEC) { 2431 *cs->statusp = resp->status = NFS4_OK; 2432 } 2433 2434 final: 2435 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs, 2436 LOOKUPP4res *, resp); 2437 } 2438 2439 2440 /*ARGSUSED2*/ 2441 static void 2442 mds_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2443 compound_state_t *cs) 2444 { 2445 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr; 2446 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr; 2447 vnode_t *avp = NULL; 2448 int lookup_flags = LOOKUP_XATTR, error; 2449 int exp_ro = 0; 2450 caller_context_t ct; 2451 2452 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs, 2453 OPENATTR4args *, args); 2454 2455 if (cs->vp == NULL) { 2456 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2457 goto final; 2458 } 2459 2460 /* 2461 * Make a couple of checks made by copen() 2462 * 2463 * Check to make sure underlying fs supports xattrs. This 2464 * is required because solaris filesystem implementations 2465 * (UFS/TMPFS) don't enforce the noxattr mount option 2466 * in VOP_LOOKUP(LOOKUP_XATTR). If fs doesn't support this 2467 * pathconf cmd or if fs supports cmd but doesn't claim 2468 * support for xattr, return NOTSUPP. It would be better 2469 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however, 2470 * that cmd is not available to VOP_PATHCONF interface 2471 * (it's only implemented inside pathconf syscall)... 2472 * 2473 * Verify permission to put attributes on files (access 2474 * checks from copen). 2475 */ 2476 2477 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) { 2478 error = ENOTSUP; 2479 goto error_out; 2480 } 2481 2482 ct.cc_sysid = 0; 2483 ct.cc_pid = 0; 2484 ct.cc_caller_id = cs->instp->caller_id; 2485 ct.cc_flags = CC_DONTBLOCK; 2486 2487 if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, &ct) != 0) && 2488 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, &ct) != 0) && 2489 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, &ct) != 0)) { 2490 error = EACCES; 2491 goto error_out; 2492 } 2493 2494 /* 2495 * The CREATE_XATTR_DIR VOP flag cannot be specified if 2496 * the file system is exported read-only -- regardless of 2497 * createdir flag. Otherwise the attrdir would be created 2498 * (assuming server fs isn't mounted readonly locally). If 2499 * VOP_LOOKUP returns ENOENT in this case, the error will 2500 * be translated into EROFS. ENOSYS is mapped to ENOTSUP 2501 * because specfs has no VOP_LOOKUP op, so the macro would 2502 * return ENOSYS. EINVAL is returned by all (current) 2503 * Solaris file system implementations when any of their 2504 * restrictions are violated (xattr(dir) can't have xattrdir). 2505 * Returning NOTSUPP is more appropriate in this case 2506 * because the object will never be able to have an attrdir. 2507 */ 2508 if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req))) 2509 lookup_flags |= CREATE_XATTR_DIR; 2510 2511 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, 2512 cs->cr, &ct, 0, NULL); 2513 2514 if (error) { 2515 if (error == ENOENT && args->createdir && exp_ro) 2516 error = EROFS; 2517 else if (error == EINVAL || error == ENOSYS) 2518 error = ENOTSUP; 2519 goto error_out; 2520 } 2521 2522 ASSERT(avp->v_flag & V_XATTRDIR); 2523 2524 error = mknfs41_fh(&cs->fh, avp, cs->exi); 2525 2526 if (error) { 2527 VN_RELE(avp); 2528 goto error_out; 2529 } 2530 2531 VN_RELE(cs->vp); 2532 cs->vp = avp; 2533 2534 /* 2535 * There is no requirement for an attrdir fh flag 2536 * because the attrdir has a vnode flag to distinguish 2537 * it from regular (non-xattr) directories. The 2538 * FH41_ATTRDIR flag is set for future sanity checks. 2539 */ 2540 FH41_SET_FLAG((nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val, FH41_ATTRDIR); 2541 *cs->statusp = resp->status = NFS4_OK; 2542 goto final; 2543 2544 error_out: 2545 2546 *cs->statusp = resp->status = puterrno4(error); 2547 2548 final: 2549 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs, 2550 OPENATTR4res *, resp); 2551 } 2552 2553 static int 2554 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred, 2555 caller_context_t *ct) 2556 { 2557 int error; 2558 int i; 2559 clock_t delaytime; 2560 2561 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay); 2562 2563 /* 2564 * Don't block on mandatory locks. If this routine returns 2565 * EAGAIN, the caller should return NFS4ERR_LOCKED. 2566 */ 2567 uio->uio_fmode = FNONBLOCK; 2568 2569 for (i = 0; i < rfs4_maxlock_tries; i++) { 2570 if (direction == FREAD) { 2571 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct); 2572 error = VOP_READ(vp, uio, ioflag, cred, ct); 2573 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct); 2574 } else { 2575 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct); 2576 error = VOP_WRITE(vp, uio, ioflag, cred, ct); 2577 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct); 2578 } 2579 2580 if (error != EAGAIN) 2581 break; 2582 2583 if (i < rfs4_maxlock_tries - 1) { 2584 delay(delaytime); 2585 delaytime *= 2; 2586 } 2587 } 2588 2589 return (error); 2590 } 2591 2592 /* ARGSUSED */ 2593 static void 2594 mds_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2595 compound_state_t *cs) 2596 { 2597 READ4args *args = &argop->nfs_argop4_u.opread; 2598 READ4res *resp = &resop->nfs_resop4_u.opread; 2599 int error; 2600 nnode_t *nn = NULL; 2601 struct iovec iov; 2602 struct uio uio; 2603 bool_t *deleg = &cs->deleg; 2604 nfsstat4 stat; 2605 mblk_t *mp; 2606 int alloc_err = 0; 2607 caller_context_t ct; 2608 uint32_t nnioflags = 0; 2609 2610 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs, 2611 READ4args, args); 2612 2613 nn = cs->nn; 2614 if (nn == NULL) { 2615 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 2616 goto final; 2617 } 2618 if (cs->access == CS_ACCESS_DENIED) { 2619 *cs->statusp = resp->status = NFS4ERR_ACCESS; 2620 goto final; 2621 } 2622 2623 if ((stat = nnop_check_stateid(nn, cs, FREAD, &args->stateid, 2624 FALSE, deleg, TRUE, &ct, NULL)) != NFS4_OK) { 2625 *cs->statusp = resp->status = stat; 2626 goto final; 2627 } 2628 2629 error = nnop_io_prep(nn, &nnioflags, cs->cr, &ct, args->offset, 2630 args->count, NULL); 2631 if (error != 0) { 2632 *cs->statusp = resp->status = nnode_stat4(error, 1); 2633 goto out; 2634 } 2635 2636 if (nnioflags & NNODE_IO_FLAG_PAST_EOF) { 2637 *cs->statusp = resp->status = NFS4_OK; 2638 resp->eof = TRUE; 2639 resp->data_len = 0; 2640 resp->data_val = NULL; 2641 resp->mblk = NULL; 2642 *cs->statusp = resp->status = NFS4_OK; 2643 goto out; 2644 } 2645 2646 if (args->count == 0) { 2647 *cs->statusp = resp->status = NFS4_OK; 2648 resp->eof = FALSE; 2649 resp->data_len = 0; 2650 resp->data_val = NULL; 2651 resp->mblk = NULL; 2652 goto out; 2653 } 2654 2655 /* 2656 * Do not allocate memory more than maximum allowed 2657 * transfer size 2658 */ 2659 if (args->count > rfs4_tsize(req)) 2660 args->count = rfs4_tsize(req); 2661 2662 if (args->wlist) { 2663 mp = NULL; 2664 (void) rdma_get_wchunk(req, &iov, args->wlist); 2665 } else { 2666 /* 2667 * mp will contain the data to be sent out in the read reply. 2668 * It will be freed after the reply has been sent. 2669 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, 2670 * so that the call to xdrmblk_putmblk() never fails. 2671 * If the first alloc of the requested size fails, then 2672 * decrease the size to something more reasonable and wait 2673 * for the allocation to occur. 2674 */ 2675 mp = allocb(RNDUP(args->count), BPRI_MED); 2676 if (mp == NULL) { 2677 if (args->count > MAXBSIZE) 2678 args->count = MAXBSIZE; 2679 mp = allocb_wait(RNDUP(args->count), BPRI_MED, 2680 STR_NOSIG, &alloc_err); 2681 } 2682 ASSERT(mp != NULL); 2683 ASSERT(alloc_err == 0); 2684 2685 iov.iov_base = (caddr_t)mp->b_datap->db_base; 2686 iov.iov_len = args->count; 2687 } 2688 2689 uio.uio_iov = &iov; 2690 uio.uio_iovcnt = 1; 2691 uio.uio_segflg = UIO_SYSSPACE; 2692 uio.uio_extflg = UIO_COPY_CACHED; 2693 uio.uio_loffset = args->offset; 2694 uio.uio_resid = args->count; 2695 2696 error = nnop_read(nn, &nnioflags, cs->cr, &ct, &uio, 0); 2697 if (error) { 2698 if (mp != NULL) 2699 freeb(mp); 2700 *cs->statusp = resp->status = nnode_stat4(error, 1); 2701 goto out; 2702 } 2703 2704 *cs->statusp = resp->status = NFS4_OK; 2705 2706 ASSERT(uio.uio_resid >= 0); 2707 resp->data_len = args->count - uio.uio_resid; 2708 resp->data_val = (char *)mp->b_datap->db_base; 2709 resp->mblk = mp; 2710 2711 resp->eof = (nnioflags & NNODE_IO_FLAG_EOF) ? TRUE : FALSE; 2712 2713 out: 2714 nnop_io_release(nn, nnioflags, &ct); 2715 2716 final: 2717 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs, 2718 READ4res *, resp); 2719 } 2720 2721 /*ARGSUSED*/ 2722 static void 2723 mds_op_read_free(nfs_resop4 *resop, compound_state_t *cs) 2724 { 2725 /* Common function for NFSv4.0 and NFSv4.1 */ 2726 rfs4_op_read_free(resop); 2727 } 2728 2729 /* ARGSUSED */ 2730 static void 2731 mds_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, 2732 compound_state_t *cs) 2733 { 2734 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh; 2735 int error; 2736 vnode_t *vp; 2737 struct exportinfo *exi, *sav_exi; 2738 nfs41_fh_fmt_t *fhp; 2739 fid_t exp_fid; 2740 2741 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); 2742 2743 if (cs->vp) { 2744 VN_RELE(cs->vp); 2745 cs->vp = NULL; 2746 } 2747 2748 if (cs->cr) 2749 crfree(cs->cr); 2750 2751 cs->cr = crdup(cs->basecr); 2752 2753 vp = exi_public->exi_vp; 2754 if (vp == NULL) { 2755 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 2756 goto final; 2757 } 2758 2759 error = mknfs41_fh(&cs->fh, vp, exi_public); 2760 if (error != 0) { 2761 *cs->statusp = resp->status = puterrno4(error); 2762 goto final; 2763 } 2764 sav_exi = cs->exi; 2765 if (exi_public == exi_root) { 2766 /* 2767 * No filesystem is actually shared public, so we default 2768 * to exi_root. In this case, we must check whether root 2769 * is exported. 2770 */ 2771 fhp = (nfs41_fh_fmt_t *)cs->fh.nfs_fh4_val; 2772 2773 exp_fid.fid_len = fhp->fh.v1.export_fid.len; 2774 2775 bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data, 2776 exp_fid.fid_len); 2777 2778 /* 2779 * if root filesystem is exported, the exportinfo struct that we 2780 * should use is what checkexport4 returns, because root_exi is 2781 * actually a mostly empty struct. 2782 */ 2783 exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL); 2784 cs->exi = ((exi != NULL) ? exi : exi_public); 2785 } else { 2786 /* 2787 * it's a properly shared filesystem 2788 */ 2789 cs->exi = exi_public; 2790 } 2791 2792 VN_HOLD(vp); 2793 cs->vp = vp; 2794 2795 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 2796 VN_RELE(cs->vp); 2797 cs->vp = NULL; 2798 cs->exi = sav_exi; 2799 goto final; 2800 } 2801 2802 *cs->statusp = resp->status = NFS4_OK; 2803 2804 final: 2805 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs, 2806 PUTPUBFH4res *, resp); 2807 } 2808 2809 /* 2810 * XXX - issue with put*fh operations. 2811 * 2812 * let us assume that /export/home is shared via NFS and a NFS client 2813 * wishes to mount /export/home/joe. 2814 * 2815 * If /export, home, or joe have restrictive search permissions, then 2816 * the NFS Server should not return a filehandle to the client. 2817 * 2818 * This case is easy to enforce. However, the NFS Client does not know 2819 * which security flavor should be used until the pathname has been 2820 * fully resolved. In addition there is another complication for uid 2821 * mapping. If the credential being used is root, the default behaviour 2822 * will be to map it to the anonymous user. However the NFS Server can not 2823 * map it until the pathname has been fully resolved. 2824 * 2825 * XXX: JEFF: Proposed solution. 2826 * 2827 * Luckily, SECINFO uses a full pathname. So what we will 2828 * have to do in mds_op_lookup is check that flavor of 2829 * the target object matches that of the request, and if root was the 2830 * caller, check for the root= and anon= options, and if necessary, 2831 * repeat the lookup using the right cred_t. 2832 * 2833 * But that's not done yet. 2834 */ 2835 /* ARGSUSED */ 2836 static void 2837 mds_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2838 compound_state_t *cs) 2839 { 2840 PUTFH4args *args = &argop->nfs_argop4_u.opputfh; 2841 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh; 2842 nfs41_fh_fmt_t *fhp = NULL; 2843 fid_t exp_fid; 2844 int error; 2845 2846 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs, 2847 PUTFH4args *, args); 2848 2849 /* 2850 * release the old nnode, vnode and cred. 2851 */ 2852 if (cs->nn) 2853 nnode_rele(&cs->nn); 2854 if (cs->vp) { 2855 VN_RELE(cs->vp); 2856 cs->vp = NULL; 2857 } 2858 if (cs->cr) { 2859 crfree(cs->cr); 2860 cs->cr = NULL; 2861 } 2862 2863 2864 /* 2865 * Check exportinfo only if it's a FH41_TYPE_NFS filehandle. 2866 * If the filehandle is otherwise incorrect, 2867 * nnode_from_fh_v41() will return an error. 2868 */ 2869 fhp = (nfs41_fh_fmt_t *)args->object.nfs_fh4_val; 2870 if (fhp->type == FH41_TYPE_NFS) { 2871 exp_fid.fid_len = fhp->fh.v1.export_fid.len; 2872 bcopy(fhp->fh.v1.export_fid.val, exp_fid.fid_data, 2873 exp_fid.fid_len); 2874 cs->exi = checkexport4(&fhp->fh.v1.export_fsid, &exp_fid, NULL); 2875 if (cs->exi == NULL) { 2876 *cs->statusp = resp->status = NFS4ERR_STALE; 2877 DTRACE_PROBE(nfss41__e__chkexp); 2878 goto final; 2879 } 2880 } 2881 2882 error = nnode_from_fh_v41(&cs->nn, &args->object); 2883 if (error != 0) { 2884 resp->status = *cs->statusp = nnode_stat4(error, 1); 2885 goto final; 2886 } 2887 ASSERT(cs->nn != NULL); 2888 2889 cs->vp = nnop_io_getvp(cs->nn); 2890 2891 cs->cr = crdup(cs->basecr); 2892 ASSERT(cs->cr != NULL); 2893 2894 if (fhp->type == FH41_TYPE_NFS) { 2895 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 2896 nnode_rele(&cs->nn); 2897 VN_RELE(cs->vp); 2898 cs->vp = NULL; 2899 crfree(cs->cr); 2900 cs->cr = NULL; 2901 *cs->statusp = resp->status; 2902 DTRACE_PROBE(nfss41__e__fail_auth); 2903 goto final; 2904 } 2905 } 2906 2907 nfs_fh4_copy(&args->object, &cs->fh); 2908 *cs->statusp = resp->status = NFS4_OK; 2909 cs->deleg = FALSE; 2910 2911 final: 2912 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs, 2913 PUTFH4res *, resp); 2914 } 2915 2916 /* ARGSUSED */ 2917 static void 2918 mds_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 2919 compound_state_t *cs) 2920 2921 { 2922 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh; 2923 int error; 2924 fid_t fid; 2925 struct exportinfo *exi, *sav_exi; 2926 2927 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs); 2928 2929 if (cs->vp) { 2930 VN_RELE(cs->vp); 2931 cs->vp = NULL; 2932 } 2933 2934 if (cs->cr) 2935 crfree(cs->cr); 2936 2937 cs->cr = crdup(cs->basecr); 2938 2939 /* 2940 * Using rootdir, the system root vnode, 2941 * get its fid. 2942 */ 2943 bzero(&fid, sizeof (fid)); 2944 fid.fid_len = MAXFIDSZ; 2945 error = vop_fid_pseudo(rootdir, &fid); 2946 if (error != 0) { 2947 *cs->statusp = resp->status = puterrno4(error); 2948 goto final; 2949 } 2950 2951 /* 2952 * Then use the root fsid & fid it to find out if it's exported 2953 * 2954 * If the server root isn't exported directly, then 2955 * it should at least be a pseudo export based on 2956 * one or more exports further down in the server's 2957 * file tree. 2958 */ 2959 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL); 2960 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { 2961 DTRACE_PROBE(nfss41__e__chkexp); 2962 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; 2963 goto final; 2964 } 2965 2966 /* 2967 * Now make a filehandle based on the root 2968 * export and root vnode. 2969 */ 2970 error = mknfs41_fh(&cs->fh, rootdir, exi); 2971 if (error != 0) { 2972 *cs->statusp = resp->status = puterrno4(error); 2973 goto final; 2974 } 2975 2976 sav_exi = cs->exi; 2977 cs->exi = exi; 2978 2979 VN_HOLD(rootdir); 2980 cs->vp = rootdir; 2981 2982 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { 2983 VN_RELE(rootdir); 2984 cs->vp = NULL; 2985 cs->exi = sav_exi; 2986 goto final; 2987 } 2988 2989 *cs->statusp = resp->status = NFS4_OK; 2990 cs->deleg = FALSE; 2991 2992 final: 2993 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs, 2994 PUTROOTFH4res *, resp); 2995 } 2996 2997 /* 2998 * A directory entry is a valid nfsv4 entry if 2999 * - it has a non-zero ino 3000 * - it is not a dot or dotdot name 3001 * - it is visible in a pseudo export or in a real export that can 3002 * only have a limited view. 3003 */ 3004 static bool_t 3005 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp, 3006 int *expseudo, int check_visible) 3007 { 3008 if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) { 3009 *expseudo = 0; 3010 return (FALSE); 3011 } 3012 3013 if (! check_visible) { 3014 *expseudo = 0; 3015 return (TRUE); 3016 } 3017 3018 return (nfs_visible_inode(exi, dp->d_ino, expseudo)); 3019 } 3020 3021 3022 /* 3023 * readlink: args: CURRENT_FH. 3024 * res: status. If success - CURRENT_FH unchanged, return linktext. 3025 */ 3026 3027 /* ARGSUSED */ 3028 static void 3029 mds_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3030 compound_state_t *cs) 3031 { 3032 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink; 3033 int error; 3034 vnode_t *vp; 3035 struct iovec iov; 3036 struct vattr va; 3037 struct uio uio; 3038 char *data; 3039 caller_context_t ct; 3040 3041 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs); 3042 3043 /* CURRENT_FH: directory */ 3044 vp = cs->vp; 3045 if (vp == NULL) { 3046 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3047 goto final; 3048 } 3049 3050 if (cs->access == CS_ACCESS_DENIED) { 3051 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3052 goto final; 3053 } 3054 3055 if (vp->v_type == VDIR) { 3056 *cs->statusp = resp->status = NFS4ERR_ISDIR; 3057 goto final; 3058 } 3059 3060 if (vp->v_type != VLNK) { 3061 *cs->statusp = resp->status = NFS4ERR_INVAL; 3062 goto final; 3063 } 3064 3065 ct.cc_sysid = 0; 3066 ct.cc_pid = 0; 3067 ct.cc_caller_id = cs->instp->caller_id; 3068 ct.cc_flags = CC_DONTBLOCK; 3069 3070 va.va_mask = AT_MODE; 3071 error = VOP_GETATTR(vp, &va, 0, cs->cr, &ct); 3072 if (error) { 3073 *cs->statusp = resp->status = puterrno4(error); 3074 goto final; 3075 } 3076 3077 if (MANDLOCK(vp, va.va_mode)) { 3078 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3079 goto final; 3080 } 3081 3082 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP); 3083 3084 iov.iov_base = data; 3085 iov.iov_len = MAXPATHLEN; 3086 uio.uio_iov = &iov; 3087 uio.uio_iovcnt = 1; 3088 uio.uio_segflg = UIO_SYSSPACE; 3089 uio.uio_extflg = UIO_COPY_CACHED; 3090 uio.uio_loffset = 0; 3091 uio.uio_resid = MAXPATHLEN; 3092 3093 error = VOP_READLINK(vp, &uio, cs->cr, &ct); 3094 3095 if (error) { 3096 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 3097 *cs->statusp = resp->status = puterrno4(error); 3098 goto final; 3099 } 3100 3101 *(data + MAXPATHLEN - uio.uio_resid) = '\0'; 3102 3103 /* 3104 * treat link name as data 3105 */ 3106 (void) str_to_utf8(data, &resp->link); 3107 3108 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1); 3109 *cs->statusp = resp->status = NFS4_OK; 3110 3111 final: 3112 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs, 3113 READLINK4res *, resp); 3114 } 3115 3116 /*ARGSUSED*/ 3117 static void 3118 mds_op_readlink_free(nfs_resop4 *resop, compound_state_t *cs) 3119 { 3120 /* Common function used for NFSv4.0 and NFSv4.1 */ 3121 rfs4_op_readlink_free(resop); 3122 } 3123 3124 /* ARGSUSED */ 3125 static void 3126 mds_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop, 3127 struct svc_req *req, compound_state_t *cs) 3128 { 3129 RECLAIM_COMPLETE4args *args = &argop->nfs_argop4_u.opreclaim_complete; 3130 RECLAIM_COMPLETE4res *resp = &resop->nfs_resop4_u.opreclaim_complete; 3131 rfs4_client_t *cp; 3132 3133 cp = cs->cp; 3134 3135 if (cp->rc_reclaim_completed) { 3136 *cs->statusp = resp->rcr_status = NFS4ERR_COMPLETE_ALREADY; 3137 return; 3138 } 3139 3140 if (args->rca_one_fs) { 3141 /* do what? we don't track this */ 3142 *cs->statusp = resp->rcr_status = NFS4_OK; 3143 return; 3144 } 3145 3146 cp->rc_reclaim_completed = 1; 3147 3148 /* did we have reclaimable state stored for this client? */ 3149 if (cp->rc_can_reclaim) 3150 atomic_add_32(&(cs->instp->reclaim_cnt), -1); 3151 3152 *cs->statusp = resp->rcr_status = NFS4_OK; 3153 } 3154 3155 /* 3156 * short utility function to lookup a file and recall the delegation 3157 */ 3158 static rfs4_file_t * 3159 mds_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp, 3160 int *lkup_error, struct compound_state *cs) 3161 { 3162 vnode_t *vp; 3163 rfs4_file_t *fp = NULL; 3164 bool_t fcreate = FALSE; 3165 int error; 3166 3167 if (vpp) 3168 *vpp = NULL; 3169 3170 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr, 3171 NULL, 0, NULL)) == 0) { 3172 if (vp->v_type == VREG) 3173 fp = rfs4_findfile(cs->instp, vp, NULL, &fcreate); 3174 if (vpp) 3175 *vpp = vp; 3176 else 3177 VN_RELE(vp); 3178 } 3179 3180 if (lkup_error) 3181 *lkup_error = error; 3182 3183 return (fp); 3184 } 3185 3186 static int 3187 do_ctl_mds_remove(vnode_t *vp, rfs4_file_t *fp, compound_state_t *cs) 3188 { 3189 fid_t fid; 3190 nfs41_fid_t nfs41_fid; 3191 int error = 0; 3192 3193 /* 3194 * Use the file layout to determine which data servers to 3195 * send DS_REMOVEs to. If the layout is not cached in the 3196 * rfs4_file_t either this means that we do not have a layout 3197 * or it needs to be read in from disk. Right now, we do not 3198 * attempt to read the layout in from disk, but future phases 3199 * of REMOVE handling will take this into consideration. 3200 * 3201 * Known Problems with this implementation of REMOVE: 3202 * 1. Not attempting to read a layout from disk could mean 3203 * that if an on-disk layout did exist, storage on the data 3204 * servers will not be freed. 3205 * 3206 * 2. The server populates the layout stored in the rfs4_file_t 3207 * when it receives a LAYOUTGET. If the file has been written 3208 * (perhaps in a past server instance), but no clients have 3209 * issued new LAYOUTGETs, we will not have a cached layout and 3210 * we will not free space on the data servers. 3211 * 3212 * 3. If any of the DS_REMOVE calls to the data servers fail 3213 * the errors are ignored and will not be retried. This may 3214 * cause leaked space on the the data server. 3215 */ 3216 if (fp->rf_mlo != NULL) { 3217 bzero(&fid, sizeof (fid)); 3218 fid.fid_len = MAXFIDSZ; 3219 3220 error = vop_fid_pseudo(vp, &fid); 3221 if (error) { 3222 DTRACE_NFSV4_1(nfss__e__vop_fid_pseudo_failed, 3223 int, error); 3224 return (error); 3225 } else { 3226 nfs41_fid.len = fid.fid_len; 3227 bcopy(fid.fid_data, nfs41_fid.val, nfs41_fid.len); 3228 } 3229 3230 error = ctl_mds_clnt_remove_file(cs->instp, cs->exi->exi_fsid, 3231 nfs41_fid, fp->rf_mlo); 3232 } else 3233 DTRACE_PROBE(nfss__i__layout_is_null_cannot_remove); 3234 3235 return (error); 3236 } 3237 3238 /* 3239 * remove: args: CURRENT_FH: directory; name. 3240 * res: status. If success - CURRENT_FH unchanged, return change_info 3241 * for directory. 3242 */ 3243 /* ARGSUSED */ 3244 static void 3245 mds_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3246 compound_state_t *cs) 3247 { 3248 REMOVE4args *args = &argop->nfs_argop4_u.opremove; 3249 REMOVE4res *resp = &resop->nfs_resop4_u.opremove; 3250 int error; 3251 vnode_t *dvp, *vp; 3252 struct vattr bdva, idva, adva; 3253 char *nm; 3254 uint_t len; 3255 rfs4_file_t *fp; 3256 int in_crit = 0; 3257 bslabel_t *clabel; 3258 caller_context_t ct; 3259 3260 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs, 3261 REMOVE4args *, args); 3262 3263 /* CURRENT_FH: directory */ 3264 dvp = cs->vp; 3265 if (dvp == NULL) { 3266 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3267 goto final; 3268 } 3269 3270 if (cs->access == CS_ACCESS_DENIED) { 3271 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3272 goto final; 3273 } 3274 3275 /* 3276 * If there is an unshared filesystem mounted on this vnode, 3277 * Do not allow to remove anything in this directory. 3278 */ 3279 if (vn_ismntpt(dvp)) { 3280 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3281 goto final; 3282 } 3283 3284 if (dvp->v_type != VDIR) { 3285 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3286 goto final; 3287 } 3288 3289 if (!utf8_dir_verify(&args->target)) { 3290 *cs->statusp = resp->status = NFS4ERR_INVAL; 3291 goto final; 3292 } 3293 3294 /* 3295 * Lookup the file so that we can check if it's a directory 3296 */ 3297 nm = utf8_to_fn(&args->target, &len, NULL); 3298 if (nm == NULL) { 3299 *cs->statusp = resp->status = NFS4ERR_INVAL; 3300 goto final; 3301 } 3302 3303 if (len > MAXNAMELEN) { 3304 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3305 kmem_free(nm, len); 3306 goto final; 3307 } 3308 3309 if (rdonly4(cs->exi, cs->vp, req)) { 3310 *cs->statusp = resp->status = NFS4ERR_ROFS; 3311 kmem_free(nm, len); 3312 goto final; 3313 } 3314 3315 /* 3316 * Lookup the file to determine type and while we are see if 3317 * there is a file struct around and check for delegation. 3318 * We don't need to acquire va_seq before this lookup, if 3319 * it causes an update, cinfo.before will not match, which will 3320 * trigger a cache flush even if atomic is TRUE. 3321 */ 3322 fp = mds_lookup_and_findfile(dvp, nm, &vp, &error, cs); 3323 if (vp != NULL) { 3324 if (rfs4_check_delegated(FWRITE, vp, TRUE, TRUE, TRUE, NULL)) { 3325 VN_RELE(vp); 3326 rfs4_file_rele(fp); 3327 *cs->statusp = resp->status = NFS4ERR_DELAY; 3328 kmem_free(nm, len); 3329 goto final; 3330 } 3331 } else { /* Didn't find anything to remove */ 3332 *cs->statusp = resp->status = error; 3333 kmem_free(nm, len); 3334 goto final; 3335 } 3336 3337 if (nbl_need_check(vp)) { 3338 nbl_start_crit(vp, RW_READER); 3339 in_crit = 1; 3340 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, &ct)) { 3341 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN; 3342 kmem_free(nm, len); 3343 nbl_end_crit(vp); 3344 VN_RELE(vp); 3345 if (fp) { 3346 rfs4_clear_dont_grant(cs->instp, fp); 3347 rfs4_file_rele(fp); 3348 } 3349 goto final; 3350 } 3351 } 3352 3353 /* check label before allowing removal */ 3354 if (is_system_labeled()) { 3355 ASSERT(req->rq_label != NULL); 3356 clabel = req->rq_label; 3357 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *, 3358 "got client label from request(1)", 3359 struct svc_req *, req); 3360 if (!blequal(&l_admin_low->tsl_label, clabel)) { 3361 if (!do_rfs_label_check(clabel, vp, 3362 EQUALITY_CHECK, cs->exi)) { 3363 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3364 kmem_free(nm, len); 3365 if (in_crit) 3366 nbl_end_crit(vp); 3367 VN_RELE(vp); 3368 if (fp) { 3369 rfs4_clear_dont_grant(cs->instp, fp); 3370 rfs4_file_rele(fp); 3371 } 3372 goto final; 3373 } 3374 } 3375 } 3376 3377 ct.cc_sysid = 0; 3378 ct.cc_pid = 0; 3379 ct.cc_caller_id = cs->instp->caller_id; 3380 ct.cc_flags = CC_DONTBLOCK; 3381 3382 /* Get dir "before" change value */ 3383 bdva.va_mask = AT_CTIME|AT_SEQ; 3384 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, &ct); 3385 if (error) { 3386 *cs->statusp = resp->status = puterrno4(error); 3387 kmem_free(nm, len); 3388 if (in_crit) 3389 nbl_end_crit(vp); 3390 VN_RELE(vp); 3391 if (fp) { 3392 rfs4_clear_dont_grant(cs->instp, fp); 3393 rfs4_file_rele(fp); 3394 } 3395 goto final; 3396 } 3397 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime) 3398 3399 /* Actually do the REMOVE operation */ 3400 if (vp->v_type == VDIR) { 3401 /* 3402 * Can't remove a directory that has a mounted-on filesystem. 3403 */ 3404 if (vn_ismntpt(vp)) { 3405 error = EACCES; 3406 } else { 3407 /* 3408 * System V defines rmdir to return EEXIST, 3409 * not * ENOTEMPTY, if the directory is not 3410 * empty. A System V NFS server needs to map 3411 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to 3412 * transmit over the wire. 3413 */ 3414 if ((error = VOP_RMDIR(dvp, nm, rootdir, 3415 cs->cr, &ct, 0)) == EEXIST) 3416 error = ENOTEMPTY; 3417 } 3418 } else { 3419 if ((error = VOP_REMOVE(dvp, nm, cs->cr, &ct, 0)) == 0 && 3420 fp != NULL) { 3421 struct vattr va; 3422 vnode_t *tvp; 3423 3424 rfs4_dbe_lock(fp->rf_dbe); 3425 tvp = fp->rf_vp; 3426 if (tvp) 3427 VN_HOLD(tvp); 3428 rfs4_dbe_unlock(fp->rf_dbe); 3429 3430 if (tvp) { 3431 /* 3432 * This is va_seq safe because we are not 3433 * manipulating dvp. 3434 */ 3435 va.va_mask = AT_NLINK; 3436 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, 3437 &ct) && va.va_nlink == 0) { 3438 if (in_crit) { 3439 nbl_end_crit(vp); 3440 in_crit = 0; 3441 } 3442 3443 /* Remove the layout */ 3444 mds_delete_layout(tvp); 3445 3446 /* 3447 * Remove objects on data servers. 3448 * Ignore errors for now.. 3449 */ 3450 (void) do_ctl_mds_remove(tvp, fp, cs); 3451 3452 /* Remove state on file remove */ 3453 rfs4_close_all_state(fp); 3454 } 3455 VN_RELE(tvp); 3456 } 3457 } 3458 } 3459 3460 if (in_crit) 3461 nbl_end_crit(vp); 3462 VN_RELE(vp); 3463 3464 if (fp) { 3465 rfs4_clear_dont_grant(cs->instp, fp); 3466 rfs4_file_rele(fp); 3467 fp = NULL; 3468 } 3469 kmem_free(nm, len); 3470 3471 if (error) { 3472 *cs->statusp = resp->status = puterrno4(error); 3473 goto final; 3474 } 3475 3476 /* 3477 * Get the initial "after" sequence number, if it fails, set to zero 3478 */ 3479 idva.va_mask = AT_SEQ; 3480 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, &ct)) 3481 idva.va_seq = 0; 3482 3483 /* 3484 * Force modified data and metadata out to stable storage. 3485 */ 3486 (void) VOP_FSYNC(dvp, 0, cs->cr, &ct); 3487 3488 /* 3489 * Get "after" change value, if it fails, simply return the 3490 * before value. 3491 */ 3492 adva.va_mask = AT_CTIME|AT_SEQ; 3493 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, &ct)) { 3494 adva.va_ctime = bdva.va_ctime; 3495 adva.va_seq = 0; 3496 } 3497 3498 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime) 3499 3500 /* 3501 * The cinfo.atomic = TRUE only if we have 3502 * non-zero va_seq's, and it has incremented by exactly one 3503 * during the VOP_REMOVE/RMDIR and it didn't change during 3504 * the VOP_FSYNC. 3505 */ 3506 if (bdva.va_seq && idva.va_seq && adva.va_seq && 3507 idva.va_seq == (bdva.va_seq + 1) && 3508 idva.va_seq == adva.va_seq) 3509 resp->cinfo.atomic = TRUE; 3510 else 3511 resp->cinfo.atomic = FALSE; 3512 3513 *cs->statusp = resp->status = NFS4_OK; 3514 3515 final: 3516 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs, 3517 REMOVE4res *, resp); 3518 } 3519 3520 /* 3521 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory, 3522 * oldname and newname. 3523 * res: status. If success - CURRENT_FH unchanged, return change_info 3524 * for both from and target directories. 3525 */ 3526 /* ARGSUSED */ 3527 static void 3528 mds_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, 3529 compound_state_t *cs) 3530 { 3531 RENAME4args *args = &argop->nfs_argop4_u.oprename; 3532 RENAME4res *resp = &resop->nfs_resop4_u.oprename; 3533 int error; 3534 vnode_t *odvp; 3535 vnode_t *ndvp; 3536 vnode_t *srcvp, *targvp; 3537 struct vattr obdva, oidva, oadva; 3538 struct vattr nbdva, nidva, nadva; 3539 char *onm, *nnm; 3540 uint_t olen, nlen; 3541 rfs4_file_t *fp, *sfp; 3542 int in_crit_src, in_crit_targ; 3543 int fp_rele_grant_hold, sfp_rele_grant_hold; 3544 bslabel_t *clabel; 3545 caller_context_t ct; 3546 3547 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs, 3548 RENAME4args *, args); 3549 3550 fp = sfp = NULL; 3551 srcvp = targvp = NULL; 3552 in_crit_src = in_crit_targ = 0; 3553 fp_rele_grant_hold = sfp_rele_grant_hold = 0; 3554 3555 /* CURRENT_FH: target directory */ 3556 ndvp = cs->vp; 3557 if (ndvp == NULL) { 3558 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3559 goto final; 3560 } 3561 3562 /* SAVED_FH: from directory */ 3563 odvp = cs->saved_vp; 3564 if (odvp == NULL) { 3565 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE; 3566 goto final; 3567 } 3568 3569 if (cs->access == CS_ACCESS_DENIED) { 3570 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3571 goto final; 3572 } 3573 3574 /* 3575 * If there is an unshared filesystem mounted on this vnode, 3576 * do not allow to rename objects in this directory. 3577 */ 3578 if (vn_ismntpt(odvp)) { 3579 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3580 goto final; 3581 } 3582 3583 /* 3584 * If there is an unshared filesystem mounted on this vnode, 3585 * do not allow to rename to this directory. 3586 */ 3587 if (vn_ismntpt(ndvp)) { 3588 *cs->statusp = resp->status = NFS4ERR_ACCESS; 3589 goto final; 3590 } 3591 3592 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) { 3593 *cs->statusp = resp->status = NFS4ERR_NOTDIR; 3594 goto final; 3595 } 3596 3597 if (cs->saved_exi != cs->exi) { 3598 *cs->statusp = resp->status = NFS4ERR_XDEV; 3599 goto final; 3600 } 3601 3602 if (!utf8_dir_verify(&args->oldname)) { 3603 *cs->statusp = resp->status = NFS4ERR_INVAL; 3604 goto final; 3605 } 3606 3607 if (!utf8_dir_verify(&args->newname)) { 3608 *cs->statusp = resp->status = NFS4ERR_INVAL; 3609 goto final; 3610 } 3611 3612 onm = utf8_to_fn(&args->oldname, &olen, NULL); 3613 if (onm == NULL) { 3614 *cs->statusp = resp->status = NFS4ERR_INVAL; 3615 goto final; 3616 } 3617 3618 nnm = utf8_to_fn(&args->newname, &nlen, NULL); 3619 if (nnm == NULL) { 3620 *cs->statusp = resp->status = NFS4ERR_INVAL; 3621 kmem_free(onm, olen); 3622 goto final; 3623 } 3624 3625 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) { 3626 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG; 3627 kmem_free(onm, olen); 3628 kmem_free(nnm, nlen); 3629 goto final; 3630 } 3631 3632 3633 if (rdonly4(cs->exi, cs->vp, req)) { 3634 *cs->statusp = resp->status = NFS4ERR_ROFS; 3635 kmem_free(onm,