1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1460 marks * Common Development and Distribution License (the "License"). 6 1460 marks * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 8636 Mark * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 4144 peteh 26 4144 peteh /* Portions Copyright 2007 Jeremy Teo */ 27 789 ahrens 28 789 ahrens #include <sys/types.h> 29 789 ahrens #include <sys/param.h> 30 789 ahrens #include <sys/time.h> 31 789 ahrens #include <sys/systm.h> 32 789 ahrens #include <sys/sysmacros.h> 33 789 ahrens #include <sys/resource.h> 34 789 ahrens #include <sys/vfs.h> 35 3898 rsb #include <sys/vfs_opreg.h> 36 789 ahrens #include <sys/vnode.h> 37 789 ahrens #include <sys/file.h> 38 789 ahrens #include <sys/stat.h> 39 789 ahrens #include <sys/kmem.h> 40 789 ahrens #include <sys/taskq.h> 41 789 ahrens #include <sys/uio.h> 42 789 ahrens #include <sys/vmsystm.h> 43 789 ahrens #include <sys/atomic.h> 44 2688 maybee #include <sys/vm.h> 45 789 ahrens #include <vm/seg_vn.h> 46 789 ahrens #include <vm/pvn.h> 47 789 ahrens #include <vm/as.h> 48 7315 Jonathan #include <vm/kpm.h> 49 7315 Jonathan #include <vm/seg_kpm.h> 50 789 ahrens #include <sys/mman.h> 51 789 ahrens #include <sys/pathname.h> 52 789 ahrens #include <sys/cmn_err.h> 53 789 ahrens #include <sys/errno.h> 54 789 ahrens #include <sys/unistd.h> 55 789 ahrens #include <sys/zfs_dir.h> 56 789 ahrens #include <sys/zfs_acl.h> 57 789 ahrens #include <sys/zfs_ioctl.h> 58 789 ahrens #include <sys/fs/zfs.h> 59 789 ahrens #include <sys/dmu.h> 60 789 ahrens #include <sys/spa.h> 61 789 ahrens #include <sys/txg.h> 62 789 ahrens #include <sys/dbuf.h> 63 789 ahrens #include <sys/zap.h> 64 789 ahrens #include <sys/dirent.h> 65 789 ahrens #include <sys/policy.h> 66 789 ahrens #include <sys/sunddi.h> 67 789 ahrens #include <sys/filio.h> 68 7847 Mark #include <sys/sid.h> 69 789 ahrens #include "fs/fs_subr.h" 70 789 ahrens #include <sys/zfs_ctldir.h> 71 5331 amw #include <sys/zfs_fuid.h> 72 1484 ek110237 #include <sys/dnlc.h> 73 1669 perrin #include <sys/zfs_rlock.h> 74 5331 amw #include <sys/extdirent.h> 75 5331 amw #include <sys/kidmap.h> 76 11134 Casper #include <sys/cred.h> 77 5663 ck153898 #include <sys/attr.h> 78 789 ahrens 79 789 ahrens /* 80 789 ahrens * Programming rules. 81 789 ahrens * 82 789 ahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 83 789 ahrens * properly lock its in-core state, create a DMU transaction, do the work, 84 789 ahrens * record this work in the intent log (ZIL), commit the DMU transaction, 85 5331 amw * and wait for the intent log to commit if it is a synchronous operation. 86 5331 amw * Moreover, the vnode ops must work in both normal and log replay context. 87 789 ahrens * The ordering of events is important to avoid deadlocks and references 88 789 ahrens * to freed memory. The example below illustrates the following Big Rules: 89 789 ahrens * 90 789 ahrens * (1) A check must be made in each zfs thread for a mounted file system. 91 5367 ahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 92 5367 ahrens * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 93 5367 ahrens * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 94 5367 ahrens * can return EIO from the calling function. 95 789 ahrens * 96 789 ahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 97 2638 perrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 98 789 ahrens * First, if it's the last reference, the vnode/znode 99 789 ahrens * can be freed, so the zp may point to freed memory. Second, the last 100 789 ahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 101 1669 perrin * pushing cached pages (which acquires range locks) and syncing out 102 789 ahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 103 789 ahrens * which could deadlock the system if you were already holding one. 104 9321 Neil * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 105 789 ahrens * 106 1757 perrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 107 1757 perrin * as they can span dmu_tx_assign() calls. 108 1757 perrin * 109 8227 Neil * (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign(). 110 789 ahrens * This is critical because we don't want to block while holding locks. 111 789 ahrens * Note, in particular, that if a lock is sometimes acquired before 112 789 ahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing to 113 789 ahrens * use a non-blocking assign can deadlock the system. The scenario: 114 789 ahrens * 115 789 ahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 116 789 ahrens * Thread B is in an already-assigned tx, and blocks for this lock. 117 789 ahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 118 789 ahrens * forever, because the previous txg can't quiesce until B's tx commits. 119 789 ahrens * 120 789 ahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 121 2113 ahrens * then drop all locks, call dmu_tx_wait(), and try again. 122 789 ahrens * 123 1757 perrin * (5) If the operation succeeded, generate the intent log entry for it 124 789 ahrens * before dropping locks. This ensures that the ordering of events 125 789 ahrens * in the intent log matches the order in which they actually occurred. 126 8227 Neil * During ZIL replay the zfs_log_* functions will update the sequence 127 8227 Neil * number to indicate the zil transaction has replayed. 128 789 ahrens * 129 1757 perrin * (6) At the end of each vnode op, the DMU tx must always commit, 130 789 ahrens * regardless of whether there were any errors. 131 789 ahrens * 132 2638 perrin * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid) 133 789 ahrens * to ensure that synchronous semantics are provided when necessary. 134 789 ahrens * 135 789 ahrens * In general, this is how things should be ordered in each vnode op: 136 789 ahrens * 137 789 ahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 138 789 ahrens * top: 139 789 ahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 140 789 ahrens * rw_enter(...); // grab any other locks you need 141 789 ahrens * tx = dmu_tx_create(...); // get DMU tx 142 789 ahrens * dmu_tx_hold_*(); // hold each object you might modify 143 8227 Neil * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign 144 789 ahrens * if (error) { 145 789 ahrens * rw_exit(...); // drop locks 146 789 ahrens * zfs_dirent_unlock(dl); // unlock directory entry 147 789 ahrens * VN_RELE(...); // release held vnodes 148 8227 Neil * if (error == ERESTART) { 149 2113 ahrens * dmu_tx_wait(tx); 150 2113 ahrens * dmu_tx_abort(tx); 151 789 ahrens * goto top; 152 789 ahrens * } 153 2113 ahrens * dmu_tx_abort(tx); // abort DMU tx 154 789 ahrens * ZFS_EXIT(zfsvfs); // finished in zfs 155 789 ahrens * return (error); // really out of space 156 789 ahrens * } 157 789 ahrens * error = do_real_work(); // do whatever this VOP does 158 789 ahrens * if (error == 0) 159 2638 perrin * zfs_log_*(...); // on success, make ZIL entry 160 789 ahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 161 789 ahrens * rw_exit(...); // drop locks 162 789 ahrens * zfs_dirent_unlock(dl); // unlock directory entry 163 789 ahrens * VN_RELE(...); // release held vnodes 164 2638 perrin * zil_commit(zilog, seq, foid); // synchronous when necessary 165 789 ahrens * ZFS_EXIT(zfsvfs); // finished in zfs 166 789 ahrens * return (error); // done, report error 167 789 ahrens */ 168 5367 ahrens 169 789 ahrens /* ARGSUSED */ 170 789 ahrens static int 171 5331 amw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 172 789 ahrens { 173 3063 perrin znode_t *zp = VTOZ(*vpp); 174 7844 Mark zfsvfs_t *zfsvfs = zp->z_zfsvfs; 175 7844 Mark 176 7844 Mark ZFS_ENTER(zfsvfs); 177 7844 Mark ZFS_VERIFY_ZP(zp); 178 5331 amw 179 5331 amw if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) && 180 5331 amw ((flag & FAPPEND) == 0)) { 181 7844 Mark ZFS_EXIT(zfsvfs); 182 5331 amw return (EPERM); 183 5331 amw } 184 5331 amw 185 5331 amw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 186 5331 amw ZTOV(zp)->v_type == VREG && 187 5331 amw !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 188 7844 Mark zp->z_phys->zp_size > 0) { 189 7844 Mark if (fs_vscan(*vpp, cr, 0) != 0) { 190 7844 Mark ZFS_EXIT(zfsvfs); 191 5331 amw return (EACCES); 192 7844 Mark } 193 7844 Mark } 194 3063 perrin 195 3063 perrin /* Keep a count of the synchronous opens in the znode */ 196 3063 perrin if (flag & (FSYNC | FDSYNC)) 197 3063 perrin atomic_inc_32(&zp->z_sync_cnt); 198 5331 amw 199 7844 Mark ZFS_EXIT(zfsvfs); 200 5331 amw return (0); 201 5331 amw } 202 5331 amw 203 5331 amw /* ARGSUSED */ 204 5331 amw static int 205 5331 amw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 206 5331 amw caller_context_t *ct) 207 789 ahrens { 208 3063 perrin znode_t *zp = VTOZ(vp); 209 7844 Mark zfsvfs_t *zfsvfs = zp->z_zfsvfs; 210 7844 Mark 211 9909 chris /* 212 9909 chris * Clean up any locks held by this process on the vp. 213 9909 chris */ 214 9909 chris cleanlocks(vp, ddi_get_pid(), 0); 215 9909 chris cleanshares(vp, ddi_get_pid()); 216 9909 chris 217 7844 Mark ZFS_ENTER(zfsvfs); 218 7844 Mark ZFS_VERIFY_ZP(zp); 219 3063 perrin 220 3063 perrin /* Decrement the synchronous opens in the znode */ 221 4339 perrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 222 3063 perrin atomic_dec_32(&zp->z_sync_cnt); 223 5331 amw 224 5331 amw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 225 5331 amw ZTOV(zp)->v_type == VREG && 226 5331 amw !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) && 227 5331 amw zp->z_phys->zp_size > 0) 228 5331 amw VERIFY(fs_vscan(vp, cr, 1) == 0); 229 789 ahrens 230 7844 Mark ZFS_EXIT(zfsvfs); 231 789 ahrens return (0); 232 789 ahrens } 233 789 ahrens 234 789 ahrens /* 235 789 ahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 236 789 ahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 237 789 ahrens */ 238 789 ahrens static int 239 789 ahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 240 789 ahrens { 241 789 ahrens znode_t *zp = VTOZ(vp); 242 789 ahrens uint64_t noff = (uint64_t)*off; /* new offset */ 243 789 ahrens uint64_t file_sz; 244 789 ahrens int error; 245 789 ahrens boolean_t hole; 246 789 ahrens 247 789 ahrens file_sz = zp->z_phys->zp_size; 248 789 ahrens if (noff >= file_sz) { 249 789 ahrens return (ENXIO); 250 789 ahrens } 251 789 ahrens 252 789 ahrens if (cmd == _FIO_SEEK_HOLE) 253 789 ahrens hole = B_TRUE; 254 789 ahrens else 255 789 ahrens hole = B_FALSE; 256 789 ahrens 257 789 ahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 258 789 ahrens 259 789 ahrens /* end of file? */ 260 789 ahrens if ((error == ESRCH) || (noff > file_sz)) { 261 789 ahrens /* 262 789 ahrens * Handle the virtual hole at the end of file. 263 789 ahrens */ 264 789 ahrens if (hole) { 265 789 ahrens *off = file_sz; 266 789 ahrens return (0); 267 789 ahrens } 268 789 ahrens return (ENXIO); 269 789 ahrens } 270 789 ahrens 271 789 ahrens if (noff < *off) 272 789 ahrens return (error); 273 789 ahrens *off = noff; 274 789 ahrens return (error); 275 789 ahrens } 276 789 ahrens 277 789 ahrens /* ARGSUSED */ 278 789 ahrens static int 279 789 ahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 280 5331 amw int *rvalp, caller_context_t *ct) 281 789 ahrens { 282 789 ahrens offset_t off; 283 789 ahrens int error; 284 789 ahrens zfsvfs_t *zfsvfs; 285 5326 ek110237 znode_t *zp; 286 789 ahrens 287 789 ahrens switch (com) { 288 4339 perrin case _FIOFFS: 289 789 ahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 290 789 ahrens 291 1544 eschrock /* 292 1544 eschrock * The following two ioctls are used by bfu. Faking out, 293 1544 eschrock * necessary to avoid bfu errors. 294 1544 eschrock */ 295 4339 perrin case _FIOGDIO: 296 4339 perrin case _FIOSDIO: 297 1544 eschrock return (0); 298 1544 eschrock 299 4339 perrin case _FIO_SEEK_DATA: 300 4339 perrin case _FIO_SEEK_HOLE: 301 789 ahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 302 789 ahrens return (EFAULT); 303 789 ahrens 304 5326 ek110237 zp = VTOZ(vp); 305 5326 ek110237 zfsvfs = zp->z_zfsvfs; 306 5367 ahrens ZFS_ENTER(zfsvfs); 307 5367 ahrens ZFS_VERIFY_ZP(zp); 308 789 ahrens 309 789 ahrens /* offset parameter is in/out */ 310 789 ahrens error = zfs_holey(vp, com, &off); 311 789 ahrens ZFS_EXIT(zfsvfs); 312 789 ahrens if (error) 313 789 ahrens return (error); 314 789 ahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 315 789 ahrens return (EFAULT); 316 789 ahrens return (0); 317 789 ahrens } 318 789 ahrens return (ENOTTY); 319 789 ahrens } 320 789 ahrens 321 789 ahrens /* 322 7315 Jonathan * Utility functions to map and unmap a single physical page. These 323 7315 Jonathan * are used to manage the mappable copies of ZFS file data, and therefore 324 7315 Jonathan * do not update ref/mod bits. 325 7315 Jonathan */ 326 7315 Jonathan caddr_t 327 7315 Jonathan zfs_map_page(page_t *pp, enum seg_rw rw) 328 7315 Jonathan { 329 7315 Jonathan if (kpm_enable) 330 7315 Jonathan return (hat_kpm_mapin(pp, 0)); 331 7315 Jonathan ASSERT(rw == S_READ || rw == S_WRITE); 332 7315 Jonathan return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 333 7315 Jonathan (caddr_t)-1)); 334 7315 Jonathan } 335 7315 Jonathan 336 7315 Jonathan void 337 7315 Jonathan zfs_unmap_page(page_t *pp, caddr_t addr) 338 7315 Jonathan { 339 7315 Jonathan if (kpm_enable) { 340 7315 Jonathan hat_kpm_mapout(pp, 0, addr); 341 7315 Jonathan } else { 342 7315 Jonathan ppmapout(addr); 343 7315 Jonathan } 344 7315 Jonathan } 345 7315 Jonathan 346 7315 Jonathan /* 347 789 ahrens * When a file is memory mapped, we must keep the IO data synchronized 348 789 ahrens * between the DMU cache and the memory mapped pages. What this means: 349 789 ahrens * 350 789 ahrens * On Write: If we find a memory mapped page, we write to *both* 351 789 ahrens * the page and the dmu buffer. 352 8636 Mark */ 353 8636 Mark static void 354 8636 Mark update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 355 8636 Mark { 356 8636 Mark int64_t off; 357 8636 Mark 358 789 ahrens off = start & PAGEOFFSET; 359 789 ahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 360 789 ahrens page_t *pp; 361 8636 Mark uint64_t nbytes = MIN(PAGESIZE - off, len); 362 8636 Mark 363 789 ahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 364 789 ahrens caddr_t va; 365 789 ahrens 366 7315 Jonathan va = zfs_map_page(pp, S_WRITE); 367 9512 Neil (void) dmu_read(os, oid, start+off, nbytes, va+off, 368 9512 Neil DMU_READ_PREFETCH); 369 7315 Jonathan zfs_unmap_page(pp, va); 370 789 ahrens page_unlock(pp); 371 8636 Mark } 372 8636 Mark len -= nbytes; 373 789 ahrens off = 0; 374 8636 Mark } 375 789 ahrens } 376 789 ahrens 377 789 ahrens /* 378 789 ahrens * When a file is memory mapped, we must keep the IO data synchronized 379 789 ahrens * between the DMU cache and the memory mapped pages. What this means: 380 789 ahrens * 381 789 ahrens * On Read: We "read" preferentially from memory mapped pages, 382 789 ahrens * else we default from the dmu buffer. 383 789 ahrens * 384 789 ahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 385 789 ahrens * the file is memory mapped. 386 789 ahrens */ 387 789 ahrens static int 388 3638 billm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 389 789 ahrens { 390 3638 billm znode_t *zp = VTOZ(vp); 391 3638 billm objset_t *os = zp->z_zfsvfs->z_os; 392 3638 billm int64_t start, off; 393 789 ahrens int len = nbytes; 394 789 ahrens int error = 0; 395 789 ahrens 396 789 ahrens start = uio->uio_loffset; 397 789 ahrens off = start & PAGEOFFSET; 398 789 ahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 399 789 ahrens page_t *pp; 400 3638 billm uint64_t bytes = MIN(PAGESIZE - off, len); 401 789 ahrens 402 789 ahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 403 789 ahrens caddr_t va; 404 789 ahrens 405 7315 Jonathan va = zfs_map_page(pp, S_READ); 406 789 ahrens error = uiomove(va + off, bytes, UIO_READ, uio); 407 7315 Jonathan zfs_unmap_page(pp, va); 408 789 ahrens page_unlock(pp); 409 789 ahrens } else { 410 3638 billm error = dmu_read_uio(os, zp->z_id, uio, bytes); 411 789 ahrens } 412 789 ahrens len -= bytes; 413 789 ahrens off = 0; 414 789 ahrens if (error) 415 789 ahrens break; 416 789 ahrens } 417 789 ahrens return (error); 418 789 ahrens } 419 789 ahrens 420 3638 billm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 421 789 ahrens 422 789 ahrens /* 423 789 ahrens * Read bytes from specified file into supplied buffer. 424 789 ahrens * 425 789 ahrens * IN: vp - vnode of file to be read from. 426 789 ahrens * uio - structure supplying read location, range info, 427 789 ahrens * and return buffer. 428 789 ahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 429 789 ahrens * cr - credentials of caller. 430 5331 amw * ct - caller context 431 789 ahrens * 432 789 ahrens * OUT: uio - updated offset and range, buffer filled. 433 789 ahrens * 434 789 ahrens * RETURN: 0 if success 435 789 ahrens * error code if failure 436 789 ahrens * 437 789 ahrens * Side Effects: 438 789 ahrens * vp - atime updated if byte count > 0 439 789 ahrens */ 440 789 ahrens /* ARGSUSED */ 441 789 ahrens static int 442 789 ahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 443 789 ahrens { 444 789 ahrens znode_t *zp = VTOZ(vp); 445 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 446 5326 ek110237 objset_t *os; 447 3638 billm ssize_t n, nbytes; 448 3638 billm int error; 449 1669 perrin rl_t *rl; 450 789 ahrens 451 5367 ahrens ZFS_ENTER(zfsvfs); 452 5367 ahrens ZFS_VERIFY_ZP(zp); 453 5326 ek110237 os = zfsvfs->z_os; 454 789 ahrens 455 5929 marks if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) { 456 5929 marks ZFS_EXIT(zfsvfs); 457 5929 marks return (EACCES); 458 5929 marks } 459 5929 marks 460 789 ahrens /* 461 789 ahrens * Validate file offset 462 789 ahrens */ 463 789 ahrens if (uio->uio_loffset < (offset_t)0) { 464 789 ahrens ZFS_EXIT(zfsvfs); 465 789 ahrens return (EINVAL); 466 789 ahrens } 467 789 ahrens 468 789 ahrens /* 469 789 ahrens * Fasttrack empty reads 470 789 ahrens */ 471 789 ahrens if (uio->uio_resid == 0) { 472 789 ahrens ZFS_EXIT(zfsvfs); 473 789 ahrens return (0); 474 789 ahrens } 475 789 ahrens 476 789 ahrens /* 477 1669 perrin * Check for mandatory locks 478 789 ahrens */ 479 789 ahrens if (MANDMODE((mode_t)zp->z_phys->zp_mode)) { 480 789 ahrens if (error = chklock(vp, FREAD, 481 789 ahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 482 789 ahrens ZFS_EXIT(zfsvfs); 483 789 ahrens return (error); 484 789 ahrens } 485 789 ahrens } 486 789 ahrens 487 789 ahrens /* 488 789 ahrens * If we're in FRSYNC mode, sync out this znode before reading it. 489 789 ahrens */ 490 2638 perrin if (ioflag & FRSYNC) 491 2638 perrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 492 789 ahrens 493 789 ahrens /* 494 1669 perrin * Lock the range against changes. 495 789 ahrens */ 496 1669 perrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 497 1669 perrin 498 789 ahrens /* 499 789 ahrens * If we are reading past end-of-file we can skip 500 789 ahrens * to the end; but we might still need to set atime. 501 789 ahrens */ 502 789 ahrens if (uio->uio_loffset >= zp->z_phys->zp_size) { 503 789 ahrens error = 0; 504 789 ahrens goto out; 505 789 ahrens } 506 789 ahrens 507 3638 billm ASSERT(uio->uio_loffset < zp->z_phys->zp_size); 508 3638 billm n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset); 509 789 ahrens 510 3638 billm while (n > 0) { 511 3638 billm nbytes = MIN(n, zfs_read_chunk_size - 512 3638 billm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 513 3638 billm 514 3638 billm if (vn_has_cached_data(vp)) 515 3638 billm error = mappedread(vp, nbytes, uio); 516 3638 billm else 517 3638 billm error = dmu_read_uio(os, zp->z_id, uio, nbytes); 518 7294 perrin if (error) { 519 7294 perrin /* convert checksum errors into IO errors */ 520 7294 perrin if (error == ECKSUM) 521 7294 perrin error = EIO; 522 7294 perrin break; 523 7294 perrin } 524 789 ahrens 525 3638 billm n -= nbytes; 526 789 ahrens } 527 3638 billm 528 789 ahrens out: 529 2237 maybee zfs_range_unlock(rl); 530 789 ahrens 531 789 ahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 532 789 ahrens ZFS_EXIT(zfsvfs); 533 789 ahrens return (error); 534 789 ahrens } 535 789 ahrens 536 789 ahrens /* 537 789 ahrens * Write the bytes to a file. 538 789 ahrens * 539 789 ahrens * IN: vp - vnode of file to be written to. 540 789 ahrens * uio - structure supplying write location, range info, 541 789 ahrens * and data buffer. 542 789 ahrens * ioflag - FAPPEND flag set if in append mode. 543 789 ahrens * cr - credentials of caller. 544 5331 amw * ct - caller context (NFS/CIFS fem monitor only) 545 789 ahrens * 546 789 ahrens * OUT: uio - updated offset and range. 547 789 ahrens * 548 789 ahrens * RETURN: 0 if success 549 789 ahrens * error code if failure 550 789 ahrens * 551 789 ahrens * Timestamps: 552 789 ahrens * vp - ctime|mtime updated if byte count > 0 553 789 ahrens */ 554 789 ahrens /* ARGSUSED */ 555 789 ahrens static int 556 789 ahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 557 789 ahrens { 558 789 ahrens znode_t *zp = VTOZ(vp); 559 789 ahrens rlim64_t limit = uio->uio_llimit; 560 789 ahrens ssize_t start_resid = uio->uio_resid; 561 789 ahrens ssize_t tx_bytes; 562 789 ahrens uint64_t end_size; 563 789 ahrens dmu_tx_t *tx; 564 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 565 5326 ek110237 zilog_t *zilog; 566 789 ahrens offset_t woff; 567 789 ahrens ssize_t n, nbytes; 568 1669 perrin rl_t *rl; 569 789 ahrens int max_blksz = zfsvfs->z_max_blksz; 570 6743 marks uint64_t pflags; 571 6743 marks int error; 572 9412 Aleksandr arc_buf_t *abuf; 573 6743 marks 574 6743 marks /* 575 6743 marks * Fasttrack empty write 576 6743 marks */ 577 6743 marks n = start_resid; 578 6743 marks if (n == 0) 579 6743 marks return (0); 580 6743 marks 581 6743 marks if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 582 6743 marks limit = MAXOFFSET_T; 583 6743 marks 584 6743 marks ZFS_ENTER(zfsvfs); 585 6743 marks ZFS_VERIFY_ZP(zp); 586 5331 amw 587 5331 amw /* 588 5331 amw * If immutable or not appending then return EPERM 589 5331 amw */ 590 6743 marks pflags = zp->z_phys->zp_flags; 591 5331 amw if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 592 5331 amw ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 593 6743 marks (uio->uio_loffset < zp->z_phys->zp_size))) { 594 6743 marks ZFS_EXIT(zfsvfs); 595 6743 marks return (EPERM); 596 6743 marks } 597 6743 marks 598 5326 ek110237 zilog = zfsvfs->z_log; 599 789 ahrens 600 789 ahrens /* 601 11083 william * Validate file offset 602 11083 william */ 603 11083 william woff = ioflag & FAPPEND ? zp->z_phys->zp_size : uio->uio_loffset; 604 11083 william if (woff < 0) { 605 11083 william ZFS_EXIT(zfsvfs); 606 11083 william return (EINVAL); 607 11083 william } 608 11083 william 609 11083 william /* 610 11083 william * Check for mandatory locks before calling zfs_range_lock() 611 11083 william * in order to prevent a deadlock with locks set via fcntl(). 612 11083 william */ 613 11083 william if (MANDMODE((mode_t)zp->z_phys->zp_mode) && 614 11083 william (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 615 11083 william ZFS_EXIT(zfsvfs); 616 11083 william return (error); 617 11083 william } 618 11083 william 619 11083 william /* 620 2237 maybee * Pre-fault the pages to ensure slow (eg NFS) pages 621 1669 perrin * don't hold up txg. 622 789 ahrens */ 623 8059 Donghai uio_prefaultpages(n, uio); 624 789 ahrens 625 789 ahrens /* 626 789 ahrens * If in append mode, set the io offset pointer to eof. 627 789 ahrens */ 628 1669 perrin if (ioflag & FAPPEND) { 629 1669 perrin /* 630 11083 william * Obtain an appending range lock to guarantee file append 631 11083 william * semantics. We reset the write offset once we have the lock. 632 1669 perrin */ 633 1669 perrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 634 11083 william woff = rl->r_off; 635 1669 perrin if (rl->r_len == UINT64_MAX) { 636 11083 william /* 637 11083 william * We overlocked the file because this write will cause 638 11083 william * the file block size to increase. 639 11083 william * Note that zp_size cannot change with this lock held. 640 11083 william */ 641 11083 william woff = zp->z_phys->zp_size; 642 11083 william } 643 11083 william uio->uio_loffset = woff; 644 11083 william } else { 645 11083 william /* 646 11083 william * Note that if the file block size will change as a result of 647 11083 william * this write, then this range lock will lock the entire file 648 11083 william * so that we can re-write the block safely. 649 789 ahrens */ 650 1669 perrin rl = zfs_range_lock(zp, woff, n, RL_WRITER); 651 789 ahrens } 652 789 ahrens 653 789 ahrens if (woff >= limit) { 654 3638 billm zfs_range_unlock(rl); 655 3638 billm ZFS_EXIT(zfsvfs); 656 3638 billm return (EFBIG); 657 789 ahrens } 658 789 ahrens 659 789 ahrens if ((woff + n) > limit || woff > (limit - n)) 660 789 ahrens n = limit - woff; 661 789 ahrens 662 1669 perrin end_size = MAX(zp->z_phys->zp_size, woff + n); 663 789 ahrens 664 1669 perrin /* 665 3638 billm * Write the file in reasonable size chunks. Each chunk is written 666 3638 billm * in a separate transaction; this keeps the intent log records small 667 3638 billm * and allows us to do more fine-grained space accounting. 668 789 ahrens */ 669 789 ahrens while (n > 0) { 670 9412 Aleksandr abuf = NULL; 671 9412 Aleksandr woff = uio->uio_loffset; 672 9412 Aleksandr 673 9412 Aleksandr again: 674 9396 Matthew if (zfs_usergroup_overquota(zfsvfs, 675 9396 Matthew B_FALSE, zp->z_phys->zp_uid) || 676 9396 Matthew zfs_usergroup_overquota(zfsvfs, 677 9396 Matthew B_TRUE, zp->z_phys->zp_gid)) { 678 9412 Aleksandr if (abuf != NULL) 679 9412 Aleksandr dmu_return_arcbuf(abuf); 680 9396 Matthew error = EDQUOT; 681 9396 Matthew break; 682 9396 Matthew } 683 9412 Aleksandr 684 9412 Aleksandr /* 685 9412 Aleksandr * If dmu_assign_arcbuf() is expected to execute with minimum 686 9412 Aleksandr * overhead loan an arc buffer and copy user data to it before 687 9412 Aleksandr * we enter a txg. This avoids holding a txg forever while we 688 9412 Aleksandr * pagefault on a hanging NFS server mapping. 689 9412 Aleksandr */ 690 9412 Aleksandr if (abuf == NULL && n >= max_blksz && 691 9412 Aleksandr woff >= zp->z_phys->zp_size && 692 9412 Aleksandr P2PHASE(woff, max_blksz) == 0 && 693 9412 Aleksandr zp->z_blksz == max_blksz) { 694 9412 Aleksandr size_t cbytes; 695 9412 Aleksandr 696 9412 Aleksandr abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz); 697 9412 Aleksandr ASSERT(abuf != NULL); 698 9412 Aleksandr ASSERT(arc_buf_size(abuf) == max_blksz); 699 9412 Aleksandr if (error = uiocopy(abuf->b_data, max_blksz, 700 9412 Aleksandr UIO_WRITE, uio, &cbytes)) { 701 9412 Aleksandr dmu_return_arcbuf(abuf); 702 9412 Aleksandr break; 703 9412 Aleksandr } 704 9412 Aleksandr ASSERT(cbytes == max_blksz); 705 9412 Aleksandr } 706 9412 Aleksandr 707 9412 Aleksandr /* 708 9412 Aleksandr * Start a transaction. 709 9412 Aleksandr */ 710 3638 billm tx = dmu_tx_create(zfsvfs->z_os); 711 3638 billm dmu_tx_hold_bonus(tx, zp->z_id); 712 3638 billm dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 713 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 714 8227 Neil if (error) { 715 8227 Neil if (error == ERESTART) { 716 3638 billm dmu_tx_wait(tx); 717 3638 billm dmu_tx_abort(tx); 718 9412 Aleksandr goto again; 719 9412 Aleksandr } 720 9412 Aleksandr dmu_tx_abort(tx); 721 9412 Aleksandr if (abuf != NULL) 722 9412 Aleksandr dmu_return_arcbuf(abuf); 723 3638 billm break; 724 3638 billm } 725 3638 billm 726 3638 billm /* 727 3638 billm * If zfs_range_lock() over-locked we grow the blocksize 728 3638 billm * and then reduce the lock range. This will only happen 729 3638 billm * on the first iteration since zfs_range_reduce() will 730 3638 billm * shrink down r_len to the appropriate size. 731 3638 billm */ 732 3638 billm if (rl->r_len == UINT64_MAX) { 733 3638 billm uint64_t new_blksz; 734 3638 billm 735 3638 billm if (zp->z_blksz > max_blksz) { 736 3638 billm ASSERT(!ISP2(zp->z_blksz)); 737 3638 billm new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE); 738 3638 billm } else { 739 3638 billm new_blksz = MIN(end_size, max_blksz); 740 3638 billm } 741 3638 billm zfs_grow_blocksize(zp, new_blksz, tx); 742 3638 billm zfs_range_reduce(rl, woff, n); 743 3638 billm } 744 3638 billm 745 789 ahrens /* 746 789 ahrens * XXX - should we really limit each write to z_max_blksz? 747 789 ahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 748 789 ahrens */ 749 789 ahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 750 789 ahrens 751 9412 Aleksandr if (abuf == NULL) { 752 9412 Aleksandr tx_bytes = uio->uio_resid; 753 9412 Aleksandr error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, 754 9412 Aleksandr nbytes, tx); 755 9412 Aleksandr tx_bytes -= uio->uio_resid; 756 9412 Aleksandr } else { 757 9412 Aleksandr tx_bytes = nbytes; 758 9412 Aleksandr ASSERT(tx_bytes == max_blksz); 759 9412 Aleksandr dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx); 760 9412 Aleksandr ASSERT(tx_bytes <= uio->uio_resid); 761 9412 Aleksandr uioskip(uio, tx_bytes); 762 9412 Aleksandr } 763 9412 Aleksandr if (tx_bytes && vn_has_cached_data(vp)) { 764 8636 Mark update_pages(vp, woff, 765 8636 Mark tx_bytes, zfsvfs->z_os, zp->z_id); 766 9412 Aleksandr } 767 789 ahrens 768 3638 billm /* 769 3638 billm * If we made no progress, we're done. If we made even 770 3638 billm * partial progress, update the znode and ZIL accordingly. 771 3638 billm */ 772 3638 billm if (tx_bytes == 0) { 773 3897 maybee dmu_tx_commit(tx); 774 3638 billm ASSERT(error != 0); 775 789 ahrens break; 776 789 ahrens } 777 1576 marks 778 1576 marks /* 779 1576 marks * Clear Set-UID/Set-GID bits on successful write if not 780 1576 marks * privileged and at least one of the excute bits is set. 781 1576 marks * 782 1576 marks * It would be nice to to this after all writes have 783 1576 marks * been done, but that would still expose the ISUID/ISGID 784 1576 marks * to another app after the partial write is committed. 785 5331 amw * 786 5331 amw * Note: we don't call zfs_fuid_map_id() here because 787 5331 amw * user 0 is not an ephemeral uid. 788 1576 marks */ 789 1576 marks mutex_enter(&zp->z_acl_lock); 790 1576 marks if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) | 791 1576 marks (S_IXUSR >> 6))) != 0 && 792 1576 marks (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 && 793 1576 marks secpolicy_vnode_setid_retain(cr, 794 1576 marks (zp->z_phys->zp_mode & S_ISUID) != 0 && 795 1576 marks zp->z_phys->zp_uid == 0) != 0) { 796 4339 perrin zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID); 797 1576 marks } 798 1576 marks mutex_exit(&zp->z_acl_lock); 799 789 ahrens 800 3638 billm /* 801 3638 billm * Update time stamp. NOTE: This marks the bonus buffer as 802 3638 billm * dirty, so we don't have to do it again for zp_size. 803 3638 billm */ 804 3638 billm zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 805 789 ahrens 806 789 ahrens /* 807 3638 billm * Update the file size (zp_size) if it has changed; 808 3638 billm * account for possible concurrent updates. 809 789 ahrens */ 810 3638 billm while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset) 811 789 ahrens (void) atomic_cas_64(&zp->z_phys->zp_size, end_size, 812 789 ahrens uio->uio_loffset); 813 3638 billm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 814 789 ahrens dmu_tx_commit(tx); 815 789 ahrens 816 3638 billm if (error != 0) 817 3638 billm break; 818 3638 billm ASSERT(tx_bytes == nbytes); 819 3638 billm n -= nbytes; 820 789 ahrens } 821 789 ahrens 822 2237 maybee zfs_range_unlock(rl); 823 789 ahrens 824 789 ahrens /* 825 789 ahrens * If we're in replay mode, or we made no progress, return error. 826 789 ahrens * Otherwise, it's at least a partial write, so it's successful. 827 789 ahrens */ 828 8227 Neil if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 829 789 ahrens ZFS_EXIT(zfsvfs); 830 789 ahrens return (error); 831 789 ahrens } 832 789 ahrens 833 2638 perrin if (ioflag & (FSYNC | FDSYNC)) 834 2638 perrin zil_commit(zilog, zp->z_last_itx, zp->z_id); 835 789 ahrens 836 789 ahrens ZFS_EXIT(zfsvfs); 837 789 ahrens return (0); 838 789 ahrens } 839 789 ahrens 840 2237 maybee void 841 10922 Jeff zfs_get_done(zgd_t *zgd, int error) 842 10922 Jeff { 843 10922 Jeff znode_t *zp = zgd->zgd_private; 844 10922 Jeff objset_t *os = zp->z_zfsvfs->z_os; 845 10922 Jeff 846 10922 Jeff if (zgd->zgd_db) 847 10922 Jeff dmu_buf_rele(zgd->zgd_db, zgd); 848 10922 Jeff 849 10922 Jeff zfs_range_unlock(zgd->zgd_rl); 850 10922 Jeff 851 9321 Neil /* 852 9321 Neil * Release the vnode asynchronously as we currently have the 853 9321 Neil * txg stopped from syncing. 854 9321 Neil */ 855 10922 Jeff VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 856 10922 Jeff 857 10922 Jeff if (error == 0 && zgd->zgd_bp) 858 10922 Jeff zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 859 10922 Jeff 860 3063 perrin kmem_free(zgd, sizeof (zgd_t)); 861 2237 maybee } 862 2237 maybee 863 10209 Mark #ifdef DEBUG 864 10209 Mark static int zil_fault_io = 0; 865 10209 Mark #endif 866 10209 Mark 867 789 ahrens /* 868 789 ahrens * Get data to generate a TX_WRITE intent log record. 869 789 ahrens */ 870 789 ahrens int 871 2237 maybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 872 789 ahrens { 873 789 ahrens zfsvfs_t *zfsvfs = arg; 874 789 ahrens objset_t *os = zfsvfs->z_os; 875 789 ahrens znode_t *zp; 876 10922 Jeff uint64_t object = lr->lr_foid; 877 10922 Jeff uint64_t offset = lr->lr_offset; 878 10922 Jeff uint64_t size = lr->lr_length; 879 10922 Jeff blkptr_t *bp = &lr->lr_blkptr; 880 2237 maybee dmu_buf_t *db; 881 3063 perrin zgd_t *zgd; 882 789 ahrens int error = 0; 883 789 ahrens 884 10922 Jeff ASSERT(zio != NULL); 885 10922 Jeff ASSERT(size != 0); 886 789 ahrens 887 789 ahrens /* 888 1669 perrin * Nothing to do if the file has been removed 889 789 ahrens */ 890 10922 Jeff if (zfs_zget(zfsvfs, object, &zp) != 0) 891 789 ahrens return (ENOENT); 892 3461 ahrens if (zp->z_unlinked) { 893 9321 Neil /* 894 9321 Neil * Release the vnode asynchronously as we currently have the 895 9321 Neil * txg stopped from syncing. 896 9321 Neil */ 897 9321 Neil VN_RELE_ASYNC(ZTOV(zp), 898 9321 Neil dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 899 789 ahrens return (ENOENT); 900 789 ahrens } 901 10922 Jeff 902 10922 Jeff zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 903 10922 Jeff zgd->zgd_zilog = zfsvfs->z_log; 904 10922 Jeff zgd->zgd_private = zp; 905 789 ahrens 906 789 ahrens /* 907 789 ahrens * Write records come in two flavors: immediate and indirect. 908 789 ahrens * For small writes it's cheaper to store the data with the 909 789 ahrens * log record (immediate); for large writes it's cheaper to 910 789 ahrens * sync the data and get a pointer to it (indirect) so that 911 789 ahrens * we don't have to write the data twice. 912 789 ahrens */ 913 1669 perrin if (buf != NULL) { /* immediate write */ 914 10922 Jeff zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 915 1669 perrin /* test for truncation needs to be done while range locked */ 916 10922 Jeff if (offset >= zp->z_phys->zp_size) { 917 1669 perrin error = ENOENT; 918 10922 Jeff } else { 919 10922 Jeff error = dmu_read(os, object, offset, size, buf, 920 10922 Jeff DMU_READ_NO_PREFETCH); 921 10922 Jeff } 922 10922 Jeff ASSERT(error == 0 || error == ENOENT); 923 1669 perrin } else { /* indirect write */ 924 789 ahrens /* 925 1669 perrin * Have to lock the whole block to ensure when it's 926 1669 perrin * written out and it's checksum is being calculated 927 1669 perrin * that no one can change the data. We need to re-check 928 1669 perrin * blocksize after we get the lock in case it's changed! 929 789 ahrens */ 930 1669 perrin for (;;) { 931 10922 Jeff uint64_t blkoff; 932 10922 Jeff size = zp->z_blksz; 933 10945 Jeff blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 934 10922 Jeff offset -= blkoff; 935 10922 Jeff zgd->zgd_rl = zfs_range_lock(zp, offset, size, 936 10922 Jeff RL_READER); 937 10922 Jeff if (zp->z_blksz == size) 938 1669 perrin break; 939 10922 Jeff offset += blkoff; 940 10922 Jeff zfs_range_unlock(zgd->zgd_rl); 941 1669 perrin } 942 1669 perrin /* test for truncation needs to be done while range locked */ 943 10945 Jeff if (lr->lr_offset >= zp->z_phys->zp_size) 944 1669 perrin error = ENOENT; 945 10209 Mark #ifdef DEBUG 946 10209 Mark if (zil_fault_io) { 947 10209 Mark error = EIO; 948 10209 Mark zil_fault_io = 0; 949 10922 Jeff } 950 10209 Mark #endif 951 10922 Jeff if (error == 0) 952 10922 Jeff error = dmu_buf_hold(os, object, offset, zgd, &db); 953 10922 Jeff 954 10800 Neil if (error == 0) { 955 10922 Jeff zgd->zgd_db = db; 956 10922 Jeff zgd->zgd_bp = bp; 957 10922 Jeff 958 10922 Jeff ASSERT(db->db_offset == offset); 959 10922 Jeff ASSERT(db->db_size == size); 960 10922 Jeff 961 10922 Jeff error = dmu_sync(zio, lr->lr_common.lrc_txg, 962 10922 Jeff zfs_get_done, zgd); 963 10922 Jeff ASSERT(error || lr->lr_length <= zp->z_blksz); 964 10922 Jeff 965 10922 Jeff /* 966 10922 Jeff * On success, we need to wait for the write I/O 967 10922 Jeff * initiated by dmu_sync() to complete before we can 968 10922 Jeff * release this dbuf. We will finish everything up 969 10922 Jeff * in the zfs_get_done() callback. 970 10922 Jeff */ 971 10922 Jeff if (error == 0) 972 10922 Jeff return (0); 973 10922 Jeff 974 10922 Jeff if (error == EALREADY) { 975 10922 Jeff lr->lr_common.lrc_txtype = TX_WRITE2; 976 10922 Jeff error = 0; 977 10922 Jeff } 978 10922 Jeff } 979 10922 Jeff } 980 10922 Jeff 981 10922 Jeff zfs_get_done(zgd, error); 982 10922 Jeff 983 789 ahrens return (error); 984 789 ahrens } 985 789 ahrens 986 789 ahrens /*ARGSUSED*/ 987 789 ahrens static int 988 5331 amw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 989 5331 amw caller_context_t *ct) 990 5331 amw { 991 5331 amw znode_t *zp = VTOZ(vp); 992 5331 amw zfsvfs_t *zfsvfs = zp->z_zfsvfs; 993 5331 amw int error; 994 5331 amw 995 5367 ahrens ZFS_ENTER(zfsvfs); 996 5367 ahrens ZFS_VERIFY_ZP(zp); 997 5331 amw 998 5331 amw if (flag & V_ACE_MASK) 999 5331 amw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1000 5331 amw else 1001 5331 amw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1002 5331 amw 1003 789 ahrens ZFS_EXIT(zfsvfs); 1004 789 ahrens return (error); 1005 789 ahrens } 1006 789 ahrens 1007 789 ahrens /* 1008 9981 Tim * If vnode is for a device return a specfs vnode instead. 1009 9981 Tim */ 1010 9981 Tim static int 1011 9981 Tim specvp_check(vnode_t **vpp, cred_t *cr) 1012 9981 Tim { 1013 9981 Tim int error = 0; 1014 9981 Tim 1015 9981 Tim if (IS_DEVVP(*vpp)) { 1016 9981 Tim struct vnode *svp; 1017 9981 Tim 1018 9981 Tim svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1019 9981 Tim VN_RELE(*vpp); 1020 9981 Tim if (svp == NULL) 1021 9981 Tim error = ENOSYS; 1022 9981 Tim *vpp = svp; 1023 9981 Tim } 1024 9981 Tim return (error); 1025 9981 Tim } 1026 9981 Tim 1027 9981 Tim 1028 9981 Tim /* 1029 789 ahrens * Lookup an entry in a directory, or an extended attribute directory. 1030 789 ahrens * If it exists, return a held vnode reference for it. 1031 789 ahrens * 1032 789 ahrens * IN: dvp - vnode of directory to search. 1033 789 ahrens * nm - name of entry to lookup. 1034 789 ahrens * pnp - full pathname to lookup [UNUSED]. 1035 789 ahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1036 789 ahrens * rdir - root directory vnode [UNUSED]. 1037 789 ahrens * cr - credentials of caller. 1038 5331 amw * ct - caller context 1039 5331 amw * direntflags - directory lookup flags 1040 5331 amw * realpnp - returned pathname. 1041 789 ahrens * 1042 789 ahrens * OUT: vpp - vnode of located entry, NULL if not found. 1043 789 ahrens * 1044 789 ahrens * RETURN: 0 if success 1045 789 ahrens * error code if failure 1046 789 ahrens * 1047 789 ahrens * Timestamps: 1048 789 ahrens * NA 1049 789 ahrens */ 1050 789 ahrens /* ARGSUSED */ 1051 789 ahrens static int 1052 789 ahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1053 5331 amw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1054 5331 amw int *direntflags, pathname_t *realpnp) 1055 5331 amw { 1056 789 ahrens znode_t *zdp = VTOZ(dvp); 1057 789 ahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1058 9981 Tim int error = 0; 1059 9981 Tim 1060 9981 Tim /* fast path */ 1061 9981 Tim if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1062 9981 Tim 1063 9981 Tim if (dvp->v_type != VDIR) { 1064 9981 Tim return (ENOTDIR); 1065 9981 Tim } else if (zdp->z_dbuf == NULL) { 1066 9981 Tim return (EIO); 1067 9981 Tim } 1068 9981 Tim 1069 9981 Tim if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1070 9981 Tim error = zfs_fastaccesschk_execute(zdp, cr); 1071 9981 Tim if (!error) { 1072 9981 Tim *vpp = dvp; 1073 9981 Tim VN_HOLD(*vpp); 1074 9981 Tim return (0); 1075 9981 Tim } 1076 9981 Tim return (error); 1077 9981 Tim } else { 1078 9981 Tim vnode_t *tvp = dnlc_lookup(dvp, nm); 1079 9981 Tim 1080 9981 Tim if (tvp) { 1081 9981 Tim error = zfs_fastaccesschk_execute(zdp, cr); 1082 9981 Tim if (error) { 1083 9981 Tim VN_RELE(tvp); 1084 9981 Tim return (error); 1085 9981 Tim } 1086 9981 Tim if (tvp == DNLC_NO_VNODE) { 1087 9981 Tim VN_RELE(tvp); 1088 9981 Tim return (ENOENT); 1089 9981 Tim } else { 1090 9981 Tim *vpp = tvp; 1091 9981 Tim return (specvp_check(vpp, cr)); 1092 9981 Tim } 1093 9981 Tim } 1094 9981 Tim } 1095 9981 Tim } 1096 9981 Tim 1097 9981 Tim DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1098 789 ahrens 1099 5367 ahrens ZFS_ENTER(zfsvfs); 1100 5367 ahrens ZFS_VERIFY_ZP(zdp); 1101 789 ahrens 1102 789 ahrens *vpp = NULL; 1103 789 ahrens 1104 789 ahrens if (flags & LOOKUP_XATTR) { 1105 3234 ck153898 /* 1106 3234 ck153898 * If the xattr property is off, refuse the lookup request. 1107 3234 ck153898 */ 1108 3234 ck153898 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 1109 3234 ck153898 ZFS_EXIT(zfsvfs); 1110 3234 ck153898 return (EINVAL); 1111 3234 ck153898 } 1112 3234 ck153898 1113 789 ahrens /* 1114 789 ahrens * We don't allow recursive attributes.. 1115 789 ahrens * Maybe someday we will. 1116 789 ahrens */ 1117 789 ahrens if (zdp->z_phys->zp_flags & ZFS_XATTR) { 1118 789 ahrens ZFS_EXIT(zfsvfs); 1119 789 ahrens return (EINVAL); 1120 789 ahrens } 1121 789 ahrens 1122 3280 ck153898 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1123 789 ahrens ZFS_EXIT(zfsvfs); 1124 789 ahrens return (error); 1125 789 ahrens } 1126 789 ahrens 1127 789 ahrens /* 1128 789 ahrens * Do we have permission to get into attribute directory? 1129 789 ahrens */ 1130 789 ahrens 1131 5331 amw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1132 5331 amw B_FALSE, cr)) { 1133 789 ahrens VN_RELE(*vpp); 1134 5331 amw *vpp = NULL; 1135 789 ahrens } 1136 789 ahrens 1137 789 ahrens ZFS_EXIT(zfsvfs); 1138 789 ahrens return (error); 1139 789 ahrens } 1140 1460 marks 1141 1512 ek110237 if (dvp->v_type != VDIR) { 1142 1512 ek110237 ZFS_EXIT(zfsvfs); 1143 1460 marks return (ENOTDIR); 1144 1512 ek110237 } 1145 789 ahrens 1146 789 ahrens /* 1147 789 ahrens * Check accessibility of directory. 1148 789 ahrens */ 1149 789 ahrens 1150 5331 amw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1151 5331 amw ZFS_EXIT(zfsvfs); 1152 5331 amw return (error); 1153 5331 amw } 1154 5331 amw 1155 5498 timh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1156 5331 amw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1157 5331 amw ZFS_EXIT(zfsvfs); 1158 5331 amw return (EILSEQ); 1159 5331 amw } 1160 5331 amw 1161 5331 amw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1162 9981 Tim if (error == 0) 1163 9981 Tim error = specvp_check(vpp, cr); 1164 789 ahrens 1165 789 ahrens ZFS_EXIT(zfsvfs); 1166 789 ahrens return (error); 1167 789 ahrens } 1168 789 ahrens 1169 789 ahrens /* 1170 789 ahrens * Attempt to create a new entry in a directory. If the entry 1171 789 ahrens * already exists, truncate the file if permissible, else return 1172 789 ahrens * an error. Return the vp of the created or trunc'd file. 1173 789 ahrens * 1174 789 ahrens * IN: dvp - vnode of directory to put new file entry in. 1175 789 ahrens * name - name of new file entry. 1176 789 ahrens * vap - attributes of new file. 1177 789 ahrens * excl - flag indicating exclusive or non-exclusive mode. 1178 789 ahrens * mode - mode to open file with. 1179 789 ahrens * cr - credentials of caller. 1180 789 ahrens * flag - large file flag [UNUSED]. 1181 5331 amw * ct - caller context 1182 5331 amw * vsecp - ACL to be set 1183 789 ahrens * 1184 789 ahrens * OUT: vpp - vnode of created or trunc'd entry. 1185 789 ahrens * 1186 789 ahrens * RETURN: 0 if success 1187 789 ahrens * error code if failure 1188 789 ahrens * 1189 789 ahrens * Timestamps: 1190 789 ahrens * dvp - ctime|mtime updated if new entry created 1191 789 ahrens * vp - ctime|mtime always, atime if new 1192 789 ahrens */ 1193 5331 amw 1194 789 ahrens /* ARGSUSED */ 1195 789 ahrens static int 1196 789 ahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1197 5331 amw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1198 5331 amw vsecattr_t *vsecp) 1199 789 ahrens { 1200 789 ahrens znode_t *zp, *dzp = VTOZ(dvp); 1201 789 ahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1202 5326 ek110237 zilog_t *zilog; 1203 5326 ek110237 objset_t *os; 1204 789 ahrens zfs_dirlock_t *dl; 1205 789 ahrens dmu_tx_t *tx; 1206 789 ahrens int error; 1207 7847 Mark ksid_t *ksid; 1208 7847 Mark uid_t uid; 1209 7847 Mark gid_t gid = crgetgid(cr); 1210 9179 Mark zfs_acl_ids_t acl_ids; 1211 9179 Mark boolean_t fuid_dirtied; 1212 5331 amw 1213 5331 amw /* 1214 5331 amw * If we have an ephemeral id, ACL, or XVATTR then 1215 5331 amw * make sure file system is at proper version 1216 5331 amw */ 1217 5331 amw 1218 7847 Mark ksid = crgetsid(cr, KSID_OWNER); 1219 7847 Mark if (ksid) 1220 7847 Mark uid = ksid_getid(ksid); 1221 7847 Mark else 1222 7847 Mark uid = crgetuid(cr); 1223 7847 Mark 1224 5331 amw if (zfsvfs->z_use_fuids == B_FALSE && 1225 5331 amw (vsecp || (vap->va_mask & AT_XVATTR) || 1226 7847 Mark IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1227 5331 amw return (EINVAL); 1228 789 ahrens 1229 5367 ahrens ZFS_ENTER(zfsvfs); 1230 5367 ahrens ZFS_VERIFY_ZP(dzp); 1231 5326 ek110237 os = zfsvfs->z_os; 1232 5326 ek110237 zilog = zfsvfs->z_log; 1233 789 ahrens 1234 5498 timh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1235 5331 amw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1236 5331 amw ZFS_EXIT(zfsvfs); 1237 5331 amw return (EILSEQ); 1238 5331 amw } 1239 5331 amw 1240 5331 amw if (vap->va_mask & AT_XVATTR) { 1241 5331 amw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1242 5331 amw crgetuid(cr), cr, vap->va_type)) != 0) { 1243 5331 amw ZFS_EXIT(zfsvfs); 1244 5331 amw return (error); 1245 5331 amw } 1246 5331 amw } 1247 789 ahrens top: 1248 789 ahrens *vpp = NULL; 1249 789 ahrens 1250 789 ahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1251 789 ahrens vap->va_mode &= ~VSVTX; 1252 789 ahrens 1253 789 ahrens if (*name == '\0') { 1254 789 ahrens /* 1255 789 ahrens * Null component name refers to the directory itself. 1256 789 ahrens */ 1257 789 ahrens VN_HOLD(dvp); 1258 789 ahrens zp = dzp; 1259 789 ahrens dl = NULL; 1260 789 ahrens error = 0; 1261 789 ahrens } else { 1262 789 ahrens /* possible VN_HOLD(zp) */ 1263 5331 amw int zflg = 0; 1264 5331 amw 1265 5331 amw if (flag & FIGNORECASE) 1266 5331 amw zflg |= ZCILOOK; 1267 5331 amw 1268 5331 amw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1269 5331 amw NULL, NULL); 1270 5331 amw if (error) { 1271 789 ahrens if (strcmp(name, "..") == 0) 1272 789 ahrens error = EISDIR; 1273 789 ahrens ZFS_EXIT(zfsvfs); 1274 9179 Mark return (error); 1275 9179 Mark } 1276 9179 Mark } 1277 789 ahrens if (zp == NULL) { 1278 5331 amw uint64_t txtype; 1279 5331 amw 1280 789 ahrens /* 1281 789 ahrens * Create a new file object and update the directory 1282 789 ahrens * to reference it. 1283 789 ahrens */ 1284 5331 amw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 1285 789 ahrens goto out; 1286 789 ahrens } 1287 789 ahrens 1288 789 ahrens /* 1289 789 ahrens * We only support the creation of regular files in 1290 789 ahrens * extended attribute directories. 1291 789 ahrens */ 1292 789 ahrens if ((dzp->z_phys->zp_flags & ZFS_XATTR) && 1293 789 ahrens (vap->va_type != VREG)) { 1294 789 ahrens error = EINVAL; 1295 789 ahrens goto out; 1296 789 ahrens } 1297 789 ahrens 1298 9179 Mark if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1299 9179 Mark &acl_ids)) != 0) 1300 9179 Mark goto out; 1301 9396 Matthew if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1302 10143 Tim zfs_acl_ids_free(&acl_ids); 1303 9396 Matthew error = EDQUOT; 1304 9396 Matthew goto out; 1305 9396 Matthew } 1306 9179 Mark 1307 789 ahrens tx = dmu_tx_create(os); 1308 789 ahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1309 9179 Mark fuid_dirtied = zfsvfs->z_fuid_dirty; 1310 9396 Matthew if (fuid_dirtied) 1311 9396 Matthew zfs_fuid_txhold(zfsvfs, tx); 1312 789 ahrens dmu_tx_hold_bonus(tx, dzp->z_id); 1313 1544 eschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 1314 9179 Mark if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1315 789 ahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1316 789 ahrens 0, SPA_MAXBLOCKSIZE); 1317 5331 amw } 1318 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1319 789 ahrens if (error) { 1320 9179 Mark zfs_acl_ids_free(&acl_ids); 1321 789 ahrens zfs_dirent_unlock(dl); 1322 8227 Neil if (error == ERESTART) { 1323 2113 ahrens dmu_tx_wait(tx); 1324 2113 ahrens dmu_tx_abort(tx); 1325 789 ahrens goto top; 1326 789 ahrens } 1327 2113 ahrens dmu_tx_abort(tx); 1328 789 ahrens ZFS_EXIT(zfsvfs); 1329 9179 Mark return (error); 1330 9179 Mark } 1331 9179 Mark zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1332 9179 Mark 1333 9179 Mark if (fuid_dirtied) 1334 9179 Mark zfs_fuid_sync(zfsvfs, tx); 1335 9179 Mark 1336 789 ahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1337 9179 Mark 1338 5331 amw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1339 5331 amw if (flag & FIGNORECASE) 1340 5331 amw txtype |= TX_CI; 1341 5331 amw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 1342 9179 Mark vsecp, acl_ids.z_fuidp, vap); 1343 9179 Mark zfs_acl_ids_free(&acl_ids); 1344 789 ahrens dmu_tx_commit(tx); 1345 789 ahrens } else { 1346 5331 amw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1347 5331 amw 1348 789 ahrens /* 1349 789 ahrens * A directory entry already exists for this name. 1350 789 ahrens */ 1351 789 ahrens /* 1352 789 ahrens * Can't truncate an existing file if in exclusive mode. 1353 789 ahrens */ 1354 789 ahrens if (excl == EXCL) { 1355 789 ahrens error = EEXIST; 1356 789 ahrens goto out; 1357 789 ahrens } 1358 789 ahrens /* 1359 789 ahrens * Can't open a directory for writing. 1360 789 ahrens */ 1361 789 ahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1362 789 ahrens error = EISDIR; 1363 789 ahrens goto out; 1364 789 ahrens } 1365 789 ahrens /* 1366 789 ahrens * Verify requested access to file. 1367 789 ahrens */ 1368 5331 amw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1369 789 ahrens goto out; 1370 789 ahrens } 1371 789 ahrens 1372 789 ahrens mutex_enter(&dzp->z_lock); 1373 789 ahrens dzp->z_seq++; 1374 789 ahrens mutex_exit(&dzp->z_lock); 1375 789 ahrens 1376 1878 maybee /* 1377 1878 maybee * Truncate regular files if requested. 1378 1878 maybee */ 1379 1878 maybee if ((ZTOV(zp)->v_type == VREG) && 1380 789 ahrens (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1381 6992 maybee /* we can't hold any locks when calling zfs_freesp() */ 1382 6992 maybee zfs_dirent_unlock(dl); 1383 6992 maybee dl = NULL; 1384 1878 maybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1385 4863 praks if (error == 0) { 1386 5331 amw vnevent_create(ZTOV(zp), ct); 1387 4863 praks } 1388 789 ahrens } 1389 789 ahrens } 1390 789 ahrens out: 1391 789 ahrens 1392 789 ahrens if (dl) 1393 789 ahrens zfs_dirent_unlock(dl); 1394 789 ahrens 1395 789 ahrens if (error) { 1396 789 ahrens if (zp) 1397 789 ahrens VN_RELE(ZTOV(zp)); 1398 789 ahrens } else { 1399 789 ahrens *vpp = ZTOV(zp); 1400 9981 Tim error = specvp_check(vpp, cr); 1401 789 ahrens } 1402 789 ahrens 1403 789 ahrens ZFS_EXIT(zfsvfs); 1404 789 ahrens return (error); 1405 789 ahrens } 1406 789 ahrens 1407 789 ahrens /* 1408 789 ahrens * Remove an entry from a directory. 1409 789 ahrens * 1410 789 ahrens * IN: dvp - vnode of directory to remove entry from. 1411 789 ahrens * name - name of entry to remove. 1412 789 ahrens * cr - credentials of caller. 1413 5331 amw * ct - caller context 1414 5331 amw * flags - case flags 1415 789 ahrens * 1416 789 ahrens * RETURN: 0 if success 1417 789 ahrens * error code if failure 1418 789 ahrens * 1419 789 ahrens * Timestamps: 1420 789 ahrens * dvp - ctime|mtime 1421 789 ahrens * vp - ctime (if nlink > 0) 1422 789 ahrens */ 1423 5331 amw /*ARGSUSED*/ 1424 5331 amw static int 1425 5331 amw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1426 5331 amw int flags) 1427 789 ahrens { 1428 789 ahrens znode_t *zp, *dzp = VTOZ(dvp); 1429 789 ahrens znode_t *xzp = NULL; 1430 789 ahrens vnode_t *vp; 1431 789 ahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1432 5326 ek110237 zilog_t *zilog; 1433 789 ahrens uint64_t acl_obj, xattr_obj; 1434 789 ahrens zfs_dirlock_t *dl; 1435 789 ahrens dmu_tx_t *tx; 1436 3461 ahrens boolean_t may_delete_now, delete_now = FALSE; 1437 6992 maybee boolean_t unlinked, toobig = FALSE; 1438 5331 amw uint64_t txtype; 1439 5331 amw pathname_t *realnmp = NULL; 1440 5331 amw pathname_t realnm; 1441 5331 amw int error; 1442 5331 amw int zflg = ZEXISTS; 1443 5331 amw 1444 5367 ahrens ZFS_ENTER(zfsvfs); 1445 5367 ahrens ZFS_VERIFY_ZP(dzp); 1446 5331 amw zilog = zfsvfs->z_log; 1447 5331 amw 1448 5331 amw if (flags & FIGNORECASE) { 1449 5331 amw zflg |= ZCILOOK; 1450 5331 amw pn_alloc(&realnm); 1451 5331 amw realnmp = &realnm; 1452 5331 amw } 1453 789 ahrens 1454 789 ahrens top: 1455 789 ahrens /* 1456 789 ahrens * Attempt to lock directory; fail if entry doesn't exist. 1457 789 ahrens */ 1458 5331 amw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1459 5331 amw NULL, realnmp)) { 1460 5331 amw if (realnmp) 1461 5331 amw pn_free(realnmp); 1462 789 ahrens ZFS_EXIT(zfsvfs); 1463 789 ahrens return (error); 1464 789 ahrens } 1465 789 ahrens 1466 789 ahrens vp = ZTOV(zp); 1467 789 ahrens 1468 789 ahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1469 789 ahrens goto out; 1470 789 ahrens } 1471 789 ahrens 1472 789 ahrens /* 1473 789 ahrens * Need to use rmdir for removing directories. 1474 789 ahrens */ 1475 789 ahrens if (vp->v_type == VDIR) { 1476 789 ahrens error = EPERM; 1477 789 ahrens goto out; 1478 789 ahrens } 1479 789 ahrens 1480 5331 amw vnevent_remove(vp, dvp, name, ct); 1481 5331 amw 1482 5331 amw if (realnmp) 1483 6492 timh dnlc_remove(dvp, realnmp->pn_buf); 1484 5331 amw else 1485 5331 amw dnlc_remove(dvp, name); 1486 789 ahrens 1487 789 ahrens mutex_enter(&vp->v_lock); 1488 789 ahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1489 789 ahrens mutex_exit(&vp->v_lock); 1490 789 ahrens 1491 789 ahrens /* 1492 3461 ahrens * We may delete the znode now, or we may put it in the unlinked set; 1493 789 ahrens * it depends on whether we're the last link, and on whether there are 1494 789 ahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1495 789 ahrens * allow for either case. 1496 789 ahrens */ 1497 789 ahrens tx = dmu_tx_create(zfsvfs->z_os); 1498 1544 eschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1499 789 ahrens dmu_tx_hold_bonus(tx, zp->z_id); 1500 6992 maybee if (may_delete_now) { 1501 6992 maybee toobig = 1502 6992 maybee zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1503 6992 maybee /* if the file is too big, only hold_free a token amount */ 1504 6992 maybee dmu_tx_hold_free(tx, zp->z_id, 0, 1505 6992 maybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1506 6992 maybee } 1507 789 ahrens 1508 789 ahrens /* are there any extended attributes? */ 1509 789 ahrens if ((xattr_obj = zp->z_phys->zp_xattr) != 0) { 1510 789 ahrens /* XXX - do we need this if we are deleting? */ 1511 789 ahrens dmu_tx_hold_bonus(tx, xattr_obj); 1512 789 ahrens } 1513 789 ahrens 1514 789 ahrens /* are there any additional acls */ 1515 789 ahrens if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 && 1516 789 ahrens may_delete_now) 1517 789 ahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 1518 789 ahrens 1519 789 ahrens /* charge as an update -- would be nice not to charge at all */ 1520 3461 ahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1521 789 ahrens 1522 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1523 789 ahrens if (error) { 1524 789 ahrens zfs_dirent_unlock(dl); 1525 789 ahrens VN_RELE(vp); 1526 8227 Neil if (error == ERESTART) { 1527 2113 ahrens dmu_tx_wait(tx); 1528 2113 ahrens dmu_tx_abort(tx); 1529 789 ahrens goto top; 1530 789 ahrens } 1531 5331 amw if (realnmp) 1532 5331 amw pn_free(realnmp); 1533 2113 ahrens dmu_tx_abort(tx); 1534 789 ahrens ZFS_EXIT(zfsvfs); 1535 789 ahrens return (error); 1536 789 ahrens } 1537 789 ahrens 1538 789 ahrens /* 1539 789 ahrens * Remove the directory entry. 1540 789 ahrens */ 1541 5331 amw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1542 789 ahrens 1543 789 ahrens if (error) { 1544 789 ahrens dmu_tx_commit(tx); 1545 789 ahrens goto out; 1546 789 ahrens } 1547 789 ahrens 1548 3461 ahrens if (unlinked) { 1549 789 ahrens mutex_enter(&vp->v_lock); 1550 6992 maybee delete_now = may_delete_now && !toobig && 1551 789 ahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 1552 789 ahrens zp->z_phys->zp_xattr == xattr_obj && 1553 789 ahrens zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj; 1554 789 ahrens mutex_exit(&vp->v_lock); 1555 789 ahrens } 1556 789 ahrens 1557 789 ahrens if (delete_now) { 1558 789 ahrens if (zp->z_phys->zp_xattr) { 1559 789 ahrens error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp); 1560 789 ahrens ASSERT3U(error, ==, 0); 1561 789 ahrens ASSERT3U(xzp->z_phys->zp_links, ==, 2); 1562 789 ahrens dmu_buf_will_dirty(xzp->z_dbuf, tx); 1563 789 ahrens mutex_enter(&xzp->z_lock); 1564 3461 ahrens xzp->z_unlinked = 1; 1565 789 ahrens xzp->z_phys->zp_links = 0; 1566 789 ahrens mutex_exit(&xzp->z_lock); 1567 3461 ahrens zfs_unlinked_add(xzp, tx); 1568 789 ahrens zp->z_phys->zp_xattr = 0; /* probably unnecessary */ 1569 789 ahrens } 1570 789 ahrens mutex_enter(&zp->z_lock); 1571 789 ahrens mutex_enter(&vp->v_lock); 1572 789 ahrens vp->v_count--; 1573 789 ahrens ASSERT3U(vp->v_count, ==, 0); 1574 789 ahrens mutex_exit(&vp->v_lock); 1575 789 ahrens mutex_exit(&zp->z_lock); 1576 789 ahrens zfs_znode_delete(zp, tx); 1577 3461 ahrens } else if (unlinked) { 1578 3461 ahrens zfs_unlinked_add(zp, tx); 1579 789 ahrens } 1580 789 ahrens 1581 5331 amw txtype = TX_REMOVE; 1582 5331 amw if (flags & FIGNORECASE) 1583 5331 amw txtype |= TX_CI; 1584 5331 amw zfs_log_remove(zilog, tx, txtype, dzp, name); 1585 5331 amw 1586 5331 amw dmu_tx_commit(tx); 1587 5331 amw out: 1588 5331 amw if (realnmp) 1589 5331 amw pn_free(realnmp); 1590 5331 amw 1591 789 ahrens zfs_dirent_unlock(dl); 1592 789 ahrens 1593 789 ahrens if (!delete_now) { 1594 789 ahrens VN_RELE(vp); 1595 789 ahrens } else if (xzp) { 1596 6992 maybee /* this rele is delayed to prevent nesting transactions */ 1597 789 ahrens VN_RELE(ZTOV(xzp)); 1598 789 ahrens } 1599 789 ahrens 1600 789 ahrens ZFS_EXIT(zfsvfs); 1601 789 ahrens return (error); 1602 789 ahrens } 1603 789 ahrens 1604 789 ahrens /* 1605 789 ahrens * Create a new directory and insert it into dvp using the name 1606 789 ahrens * provided. Return a pointer to the inserted directory. 1607 789 ahrens * 1608 789 ahrens * IN: dvp - vnode of directory to add subdir to. 1609 789 ahrens * dirname - name of new directory. 1610 789 ahrens * vap - attributes of new directory. 1611 789 ahrens * cr - credentials of caller. 1612 5331 amw * ct - caller context 1613 5331 amw * vsecp - ACL to be set 1614 789 ahrens * 1615 789 ahrens * OUT: vpp - vnode of created directory. 1616 789 ahrens * 1617 789 ahrens * RETURN: 0 if success 1618 789 ahrens * error code if failure 1619 789 ahrens * 1620 789 ahrens * Timestamps: 1621 789 ahrens * dvp - ctime|mtime updated 1622 789 ahrens * vp - ctime|mtime|atime updated 1623 789 ahrens */ 1624 5331 amw /*ARGSUSED*/ 1625 5331 amw static int 1626 5331 amw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1627 5331 amw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1628 789 ahrens { 1629 789 ahrens znode_t *zp, *dzp = VTOZ(dvp); 1630 789 ahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1631 5326 ek110237 zilog_t *zilog; 1632 789 ahrens zfs_dirlock_t *dl; 1633 5331 amw uint64_t txtype; 1634 5331 amw dmu_tx_t *tx; 1635 5331 amw int error; 1636 5331 amw int zf = ZNEW; 1637 7847 Mark ksid_t *ksid; 1638 7847 Mark uid_t uid; 1639 7847 Mark gid_t gid = crgetgid(cr); 1640 9179 Mark zfs_acl_ids_t acl_ids; 1641 9179 Mark boolean_t fuid_dirtied; 1642 789 ahrens 1643 789 ahrens ASSERT(vap->va_type == VDIR); 1644 789 ahrens 1645 5331 amw /* 1646 5331 amw * If we have an ephemeral id, ACL, or XVATTR then 1647 5331 amw * make sure file system is at proper version 1648 5331 amw */ 1649 5331 amw 1650 7847 Mark ksid = crgetsid(cr, KSID_OWNER); 1651 7847 Mark if (ksid) 1652 7847 Mark uid = ksid_getid(ksid); 1653 7847 Mark else 1654 7847 Mark uid = crgetuid(cr); 1655 5331 amw if (zfsvfs->z_use_fuids == B_FALSE && 1656 7847 Mark (vsecp || (vap->va_mask & AT_XVATTR) || 1657 7876 Mark IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1658 5331 amw return (EINVAL); 1659 5331 amw 1660 5367 ahrens ZFS_ENTER(zfsvfs); 1661 5367 ahrens ZFS_VERIFY_ZP(dzp); 1662 5326 ek110237 zilog = zfsvfs->z_log; 1663 789 ahrens 1664 789 ahrens if (dzp->z_phys->zp_flags & ZFS_XATTR) { 1665 789 ahrens ZFS_EXIT(zfsvfs); 1666 789 ahrens return (EINVAL); 1667 789 ahrens } 1668 5331 amw 1669 5498 timh if (zfsvfs->z_utf8 && u8_validate(dirname, 1670 5331 amw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1671 5331 amw ZFS_EXIT(zfsvfs); 1672 5331 amw return (EILSEQ); 1673 5331 amw } 1674 5331 amw if (flags & FIGNORECASE) 1675 5331 amw zf |= ZCILOOK; 1676 5331 amw 1677 5331 amw if (vap->va_mask & AT_XVATTR) 1678 5331 amw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1679 5331 amw crgetuid(cr), cr, vap->va_type)) != 0) { 1680 5331 amw ZFS_EXIT(zfsvfs); 1681 5331 amw return (error); 1682 5331 amw } 1683 5331 amw 1684 5331 amw /* 1685 5331 amw * First make sure the new directory doesn't exist. 1686 5331 amw */ 1687 789 ahrens top: 1688 789 ahrens *vpp = NULL; 1689 789 ahrens 1690 5331 amw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1691 5331 amw NULL, NULL)) { 1692 5331 amw ZFS_EXIT(zfsvfs); 1693 5331 amw return (error); 1694 5331 amw } 1695 5331 amw 1696 5331 amw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1697 5331 amw zfs_dirent_unlock(dl); 1698 5331 amw ZFS_EXIT(zfsvfs); 1699 5331 amw return (error); 1700 5331 amw } 1701 5331 amw 1702 9179 Mark if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, 1703 9179 Mark &acl_ids)) != 0) { 1704 9179 Mark zfs_dirent_unlock(dl); 1705 9179 Mark ZFS_EXIT(zfsvfs); 1706 9179 Mark return (error); 1707 9179 Mark } 1708 9396 Matthew if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 1709 10143 Tim zfs_acl_ids_free(&acl_ids); 1710 9396 Matthew zfs_dirent_unlock(dl); 1711 9396 Matthew ZFS_EXIT(zfsvfs); 1712 9396 Matthew return (EDQUOT); 1713 9396 Matthew } 1714 9179 Mark 1715 789 ahrens /* 1716 789 ahrens * Add a new entry to the directory. 1717 789 ahrens */ 1718 789 ahrens tx = dmu_tx_create(zfsvfs->z_os); 1719 1544 eschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 1720 1544 eschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 1721 9179 Mark fuid_dirtied = zfsvfs->z_fuid_dirty; 1722 9396 Matthew if (fuid_dirtied) 1723 9396 Matthew zfs_fuid_txhold(zfsvfs, tx); 1724 9179 Mark if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) 1725 789 ahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 1726 789 ahrens 0, SPA_MAXBLOCKSIZE); 1727 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1728 8227 Neil if (error) { 1729 9179 Mark zfs_acl_ids_free(&acl_ids); 1730 9179 Mark zfs_dirent_unlock(dl); 1731 9179 Mark if (error == ERESTART) { 1732 9179 Mark dmu_tx_wait(tx); 1733 9179 Mark dmu_tx_abort(tx); 1734 9179 Mark goto top; 1735 9179 Mark } 1736 9179 Mark dmu_tx_abort(tx); 1737 9179 Mark ZFS_EXIT(zfsvfs); 1738 789 ahrens return (error); 1739 789 ahrens } 1740 789 ahrens 1741 789 ahrens /* 1742 789 ahrens * Create new node. 1743 789 ahrens */ 1744 9179 Mark zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids); 1745 9179 Mark 1746 9179 Mark if (fuid_dirtied) 1747 9179 Mark zfs_fuid_sync(zfsvfs, tx); 1748 789 ahrens /* 1749 789 ahrens * Now put new name in parent dir. 1750 789 ahrens */ 1751 789 ahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1752 789 ahrens 1753 789 ahrens *vpp = ZTOV(zp); 1754 789 ahrens 1755 5331 amw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 1756 5331 amw if (flags & FIGNORECASE) 1757 5331 amw txtype |= TX_CI; 1758 9179 Mark zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 1759 9179 Mark acl_ids.z_fuidp, vap); 1760 9179 Mark 1761 9179 Mark zfs_acl_ids_free(&acl_ids); 1762 789 ahrens dmu_tx_commit(tx); 1763 789 ahrens 1764 789 ahrens zfs_dirent_unlock(dl); 1765 789 ahrens 1766 789 ahrens ZFS_EXIT(zfsvfs); 1767 789 ahrens return (0); 1768 789 ahrens } 1769 789 ahrens 1770 789 ahrens /* 1771 789 ahrens * Remove a directory subdir entry. If the current working 1772 789 ahrens * directory is the same as the subdir to be removed, the 1773 789 ahrens * remove will fail. 1774 789 ahrens * 1775 789 ahrens * IN: dvp - vnode of directory to remove from. 1776 789 ahrens * name - name of directory to be removed. 1777 789 ahrens * cwd - vnode of current working directory. 1778 789 ahrens * cr - credentials of caller. 1779 5331 amw * ct - caller context 1780 5331 amw * flags - case flags 1781 789 ahrens * 1782 789 ahrens * RETURN: 0 if success 1783 789 ahrens * error code if failure 1784 789 ahrens * 1785 789 ahrens * Timestamps: 1786 789 ahrens * dvp - ctime|mtime updated 1787 789 ahrens */ 1788 5331 amw /*ARGSUSED*/ 1789 5331 amw static int 1790 5331 amw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 1791 5331 amw caller_context_t *ct, int flags) 1792 789 ahrens { 1793 789 ahrens znode_t *dzp = VTOZ(dvp); 1794 789 ahrens znode_t *zp; 1795 789 ahrens vnode_t *vp; 1796 789 ahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1797 5326 ek110237 zilog_t *zilog; 1798 789 ahrens zfs_dirlock_t *dl; 1799 789 ahrens dmu_tx_t *tx; 1800 789 ahrens int error; 1801 5331 amw int zflg = ZEXISTS; 1802 5331 amw 1803 5367 ahrens ZFS_ENTER(zfsvfs); 1804 5367 ahrens ZFS_VERIFY_ZP(dzp); 1805 5331 amw zilog = zfsvfs->z_log; 1806 5331 amw 1807 5331 amw if (flags & FIGNORECASE) 1808 5331 amw zflg |= ZCILOOK; 1809 789 ahrens top: 1810 789 ahrens zp = NULL; 1811 789 ahrens 1812 789 ahrens /* 1813 789 ahrens * Attempt to lock directory; fail if entry doesn't exist. 1814 789 ahrens */ 1815 5331 amw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1816 5331 amw NULL, NULL)) { 1817 789 ahrens ZFS_EXIT(zfsvfs); 1818 789 ahrens return (error); 1819 789 ahrens } 1820 789 ahrens 1821 789 ahrens vp = ZTOV(zp); 1822 789 ahrens 1823 789 ahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1824 789 ahrens goto out; 1825 789 ahrens } 1826 789 ahrens 1827 789 ahrens if (vp->v_type != VDIR) { 1828 789 ahrens error = ENOTDIR; 1829 789 ahrens goto out; 1830 789 ahrens } 1831 789 ahrens 1832 789 ahrens if (vp == cwd) { 1833 789 ahrens error = EINVAL; 1834 789 ahrens goto out; 1835 789 ahrens } 1836 789 ahrens 1837 5331 amw vnevent_rmdir(vp, dvp, name, ct); 1838 789 ahrens 1839 789 ahrens /* 1840 3897 maybee * Grab a lock on the directory to make sure that noone is 1841 3897 maybee * trying to add (or lookup) entries while we are removing it. 1842 3897 maybee */ 1843 3897 maybee rw_enter(&zp->z_name_lock, RW_WRITER); 1844 3897 maybee 1845 3897 maybee /* 1846 3897 maybee * Grab a lock on the parent pointer to make sure we play well 1847 789 ahrens * with the treewalk and directory rename code. 1848 789 ahrens */ 1849 789 ahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 1850 789 ahrens 1851 789 ahrens tx = dmu_tx_create(zfsvfs->z_os); 1852 1544 eschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 1853 789 ahrens dmu_tx_hold_bonus(tx, zp->z_id); 1854 3461 ahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1855 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1856 789 ahrens if (error) { 1857 789 ahrens rw_exit(&zp->z_parent_lock); 1858 3897 maybee rw_exit(&zp->z_name_lock); 1859 789 ahrens zfs_dirent_unlock(dl); 1860 789 ahrens VN_RELE(vp); 1861 8227 Neil if (error == ERESTART) { 1862 2113 ahrens dmu_tx_wait(tx); 1863 2113 ahrens dmu_tx_abort(tx); 1864 789 ahrens goto top; 1865 789 ahrens } 1866 2113 ahrens dmu_tx_abort(tx); 1867 789 ahrens ZFS_EXIT(zfsvfs); 1868 789 ahrens return (error); 1869 789 ahrens } 1870 789 ahrens 1871 5331 amw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 1872 5331 amw 1873 5331 amw if (error == 0) { 1874 5331 amw uint64_t txtype = TX_RMDIR; 1875 5331 amw if (flags & FIGNORECASE) 1876 5331 amw txtype |= TX_CI; 1877 5331 amw zfs_log_remove(zilog, tx, txtype, dzp, name); 1878 5331 amw } 1879 789 ahrens 1880 789 ahrens dmu_tx_commit(tx); 1881 789 ahrens 1882 789 ahrens rw_exit(&zp->z_parent_lock); 1883 3897 maybee rw_exit(&zp->z_name_lock); 1884 789 ahrens out: 1885 789 ahrens zfs_dirent_unlock(dl); 1886 789 ahrens 1887 789 ahrens VN_RELE(vp); 1888 789 ahrens 1889 789 ahrens ZFS_EXIT(zfsvfs); 1890 789 ahrens return (error); 1891 789 ahrens } 1892 789 ahrens 1893 789 ahrens /* 1894 789 ahrens * Read as many directory entries as will fit into the provided 1895 789 ahrens * buffer from the given directory cursor position (specified in 1896 789 ahrens * the uio structure. 1897 789 ahrens * 1898 789 ahrens * IN: vp - vnode of directory to read. 1899 789 ahrens * uio - structure supplying read location, range info, 1900 789 ahrens * and return buffer. 1901 789 ahrens * cr - credentials of caller. 1902 5331 amw * ct - caller context 1903 5331 amw * flags - case flags 1904 789 ahrens * 1905 789 ahrens * OUT: uio - updated offset and range, buffer filled. 1906 789 ahrens * eofp - set to true if end-of-file detected. 1907 789 ahrens * 1908 789 ahrens * RETURN: 0 if success 1909 789 ahrens * error code if failure 1910 789 ahrens * 1911 789 ahrens * Timestamps: 1912 789 ahrens * vp - atime updated 1913 789 ahrens * 1914 789 ahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 1915 789 ahrens * This allows us to use the low range for "special" directory entries: 1916 789 ahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 1917 789 ahrens * we use the offset 2 for the '.zfs' directory. 1918 789 ahrens */ 1919 789 ahrens /* ARGSUSED */ 1920 789 ahrens static int 1921 5331 amw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 1922 5331 amw caller_context_t *ct, int flags) 1923 789 ahrens { 1924 789 ahrens znode_t *zp = VTOZ(vp); 1925 789 ahrens iovec_t *iovp; 1926 5331 amw edirent_t *eodp; 1927 789 ahrens dirent64_t *odp; 1928 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1929 869 perrin objset_t *os; 1930 789 ahrens caddr_t outbuf; 1931 789 ahrens size_t bufsize; 1932 789 ahrens zap_cursor_t zc; 1933 789 ahrens zap_attribute_t zap; 1934 789 ahrens uint_t bytes_wanted; 1935 789 ahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 1936 789 ahrens int local_eof; 1937 869 perrin int outcount; 1938 869 perrin int error; 1939 869 perrin uint8_t prefetch; 1940 5663 ck153898 boolean_t check_sysattrs; 1941 789 ahrens 1942 5367 ahrens ZFS_ENTER(zfsvfs); 1943 5367 ahrens ZFS_VERIFY_ZP(zp); 1944 789 ahrens 1945 789 ahrens /* 1946 789 ahrens * If we are not given an eof variable, 1947 789 ahrens * use a local one. 1948 789 ahrens */ 1949 789 ahrens if (eofp == NULL) 1950 789 ahrens eofp = &local_eof; 1951 789 ahrens 1952 789 ahrens /* 1953 789 ahrens * Check for valid iov_len. 1954 789 ahrens */ 1955 789 ahrens if (uio->uio_iov->iov_len <= 0) { 1956 789 ahrens ZFS_EXIT(zfsvfs); 1957 789 ahrens return (EINVAL); 1958 789 ahrens } 1959 789 ahrens 1960 789 ahrens /* 1961 789 ahrens * Quit if directory has been removed (posix) 1962 789 ahrens */ 1963 3461 ahrens if ((*eofp = zp->z_unlinked) != 0) { 1964 789 ahrens ZFS_EXIT(zfsvfs); 1965 789 ahrens return (0); 1966 789 ahrens } 1967 789 ahrens 1968 869 perrin error = 0; 1969 869 perrin os = zfsvfs->z_os; 1970 869 perrin offset = uio->uio_loffset; 1971 869 perrin prefetch = zp->z_zn_prefetch; 1972 869 perrin 1973 789 ahrens /* 1974 789 ahrens * Initialize the iterator cursor. 1975 789 ahrens */ 1976 789 ahrens if (offset <= 3) { 1977 789 ahrens /* 1978 789 ahrens * Start iteration from the beginning of the directory. 1979 789 ahrens */ 1980 869 perrin zap_cursor_init(&zc, os, zp->z_id); 1981 789 ahrens } else { 1982 789 ahrens /* 1983 789 ahrens * The offset is a serialized cursor. 1984 789 ahrens */ 1985 869 perrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 1986 789 ahrens } 1987 789 ahrens 1988 789 ahrens /* 1989 789 ahrens * Get space to change directory entries into fs independent format. 1990 789 ahrens */ 1991 789 ahrens iovp = uio->uio_iov; 1992 789 ahrens bytes_wanted = iovp->iov_len; 1993 789 ahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 1994 789 ahrens bufsize = bytes_wanted; 1995 789 ahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 1996 789 ahrens odp = (struct dirent64 *)outbuf; 1997 789 ahrens } else { 1998 789 ahrens bufsize = bytes_wanted; 1999 789 ahrens odp = (struct dirent64 *)iovp->iov_base; 2000 789 ahrens } 2001 5331 amw eodp = (struct edirent *)odp; 2002 5663 ck153898 2003 5663 ck153898 /* 2004 7757 Janice * If this VFS supports the system attribute view interface; and 2005 7757 Janice * we're looking at an extended attribute directory; and we care 2006 7757 Janice * about normalization conflicts on this vfs; then we must check 2007 7757 Janice * for normalization conflicts with the sysattr name space. 2008 7757 Janice */ 2009 7757 Janice check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2010 5663 ck153898 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2011 5663 ck153898 (flags & V_RDDIR_ENTFLAGS); 2012 789 ahrens 2013 789 ahrens /* 2014 789 ahrens * Transform to file-system independent format 2015 789 ahrens */ 2016 789 ahrens outcount = 0; 2017 789 ahrens while (outcount < bytes_wanted) { 2018 3912 lling ino64_t objnum; 2019 3912 lling ushort_t reclen; 2020 3912 lling off64_t *next; 2021 3912 lling 2022 789 ahrens /* 2023 789 ahrens * Special case `.', `..', and `.zfs'. 2024 789 ahrens */ 2025 789 ahrens if (offset == 0) { 2026 789 ahrens (void) strcpy(zap.za_name, "."); 2027 5331 amw zap.za_normalization_conflict = 0; 2028 3912 lling objnum = zp->z_id; 2029 789 ahrens } else if (offset == 1) { 2030 789 ahrens (void) strcpy(zap.za_name, ".."); 2031 5331 amw zap.za_normalization_conflict = 0; 2032 3912 lling objnum = zp->z_phys->zp_parent; 2033 789 ahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2034 789 ahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2035 5331 amw zap.za_normalization_conflict = 0; 2036 3912 lling objnum = ZFSCTL_INO_ROOT; 2037 789 ahrens } else { 2038 789 ahrens /* 2039 789 ahrens * Grab next entry. 2040 789 ahrens */ 2041 789 ahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2042 789 ahrens if ((*eofp = (error == ENOENT)) != 0) 2043 789 ahrens break; 2044 789 ahrens else 2045 789 ahrens goto update; 2046 789 ahrens } 2047 789 ahrens 2048 789 ahrens if (zap.za_integer_length != 8 || 2049 789 ahrens zap.za_num_integers != 1) { 2050 789 ahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2051 789 ahrens "entry, obj = %lld, offset = %lld\n", 2052 789 ahrens (u_longlong_t)zp->z_id, 2053 789 ahrens (u_longlong_t)offset); 2054 789 ahrens error = ENXIO; 2055 789 ahrens goto update; 2056 789 ahrens } 2057 3912 lling 2058 3912 lling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2059 3912 lling /* 2060 3912 lling * MacOS X can extract the object type here such as: 2061 3912 lling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2062 3912 lling */ 2063 5663 ck153898 2064 5663 ck153898 if (check_sysattrs && !zap.za_normalization_conflict) { 2065 5663 ck153898 zap.za_normalization_conflict = 2066 5663 ck153898 xattr_sysattr_casechk(zap.za_name); 2067 5663 ck153898 } 2068 789 ahrens } 2069 5331 amw 2070 9749 Tim if (flags & V_RDDIR_ACCFILTER) { 2071 9749 Tim /* 2072 9749 Tim * If we have no access at all, don't include 2073 9749 Tim * this entry in the returned information 2074 9749 Tim */ 2075 9749 Tim znode_t *ezp; 2076 9749 Tim if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2077 9749 Tim goto skip_entry; 2078 9749 Tim if (!zfs_has_access(ezp, cr)) { 2079 9749 Tim VN_RELE(ZTOV(ezp)); 2080 9749 Tim goto skip_entry; 2081 9749 Tim } 2082 9749 Tim VN_RELE(ZTOV(ezp)); 2083 9749 Tim } 2084 9749 Tim 2085 5331 amw if (flags & V_RDDIR_ENTFLAGS) 2086 5331 amw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2087 5331 amw else 2088 5331 amw reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2089 789 ahrens 2090 789 ahrens /* 2091 789 ahrens * Will this entry fit in the buffer? 2092 789 ahrens */ 2093 3912 lling if (outcount + reclen > bufsize) { 2094 789 ahrens /* 2095 789 ahrens * Did we manage to fit anything in the buffer? 2096 789 ahrens */ 2097 789 ahrens if (!outcount) { 2098 789 ahrens error = EINVAL; 2099 789 ahrens goto update; 2100 789 ahrens } 2101 789 ahrens break; 2102 789 ahrens } 2103 5331 amw if (flags & V_RDDIR_ENTFLAGS) { 2104 5331 amw /* 2105 5331 amw * Add extended flag entry: 2106 5331 amw */ 2107 5331 amw eodp->ed_ino = objnum; 2108 5331 amw eodp->ed_reclen = reclen; 2109 5331 amw /* NOTE: ed_off is the offset for the *next* entry */ 2110 5331 amw next = &(eodp->ed_off); 2111 5331 amw eodp->ed_eflags = zap.za_normalization_conflict ? 2112 5331 amw ED_CASE_CONFLICT : 0; 2113 5331 amw (void) strncpy(eodp->ed_name, zap.za_name, 2114 5331 amw EDIRENT_NAMELEN(reclen)); 2115 5331 amw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2116 5331 amw } else { 2117 5331 amw /* 2118 5331 amw * Add normal entry: 2119 5331 amw */ 2120 5331 amw odp->d_ino = objnum; 2121 5331 amw odp->d_reclen = reclen; 2122 5331 amw /* NOTE: d_off is the offset for the *next* entry */ 2123 5331 amw next = &(odp->d_off); 2124 5331 amw (void) strncpy(odp->d_name, zap.za_name, 2125 5331 amw DIRENT64_NAMELEN(reclen)); 2126 5331 amw odp = (dirent64_t *)((intptr_t)odp + reclen); 2127 5331 amw } 2128 3912 lling outcount += reclen; 2129 789 ahrens 2130 789 ahrens ASSERT(outcount <= bufsize); 2131 789 ahrens 2132 789 ahrens /* Prefetch znode */ 2133 869 perrin if (prefetch) 2134 3912 lling dmu_prefetch(os, objnum, 0, 0); 2135 789 ahrens 2136 9749 Tim skip_entry: 2137 789 ahrens /* 2138 789 ahrens * Move to the next entry, fill in the previous offset. 2139 789 ahrens */ 2140 789 ahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2141 789 ahrens zap_cursor_advance(&zc); 2142 789 ahrens offset = zap_cursor_serialize(&zc); 2143 789 ahrens } else { 2144 789 ahrens offset += 1; 2145 789 ahrens } 2146 789 ahrens *next = offset; 2147 789 ahrens } 2148 869 perrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2149 789 ahrens 2150 789 ahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2151 789 ahrens iovp->iov_base += outcount; 2152 789 ahrens iovp->iov_len -= outcount; 2153 789 ahrens uio->uio_resid -= outcount; 2154 789 ahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2155 789 ahrens /* 2156 789 ahrens * Reset the pointer. 2157 789 ahrens */ 2158 789 ahrens offset = uio->uio_loffset; 2159 789 ahrens } 2160 789 ahrens 2161 789 ahrens update: 2162 885 ahrens zap_cursor_fini(&zc); 2163 789 ahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2164 789 ahrens kmem_free(outbuf, bufsize); 2165 789 ahrens 2166 789 ahrens if (error == ENOENT) 2167 789 ahrens error = 0; 2168 789 ahrens 2169 789 ahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2170 789 ahrens 2171 789 ahrens uio->uio_loffset = offset; 2172 789 ahrens ZFS_EXIT(zfsvfs); 2173 789 ahrens return (error); 2174 789 ahrens } 2175 789 ahrens 2176 4720 fr157268 ulong_t zfs_fsync_sync_cnt = 4; 2177 4720 fr157268 2178 789 ahrens static int 2179 5331 amw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2180 789 ahrens { 2181 789 ahrens znode_t *zp = VTOZ(vp); 2182 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2183 1773 eschrock 2184 1773 eschrock /* 2185 1773 eschrock * Regardless of whether this is required for standards conformance, 2186 1773 eschrock * this is the logical behavior when fsync() is called on a file with 2187 1773 eschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2188 1773 eschrock * going to be pushed out as part of the zil_commit(). 2189 1773 eschrock */ 2190 1773 eschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2191 1773 eschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2192 5331 amw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2193 4720 fr157268 2194 4720 fr157268 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2195 789 ahrens 2196 5367 ahrens ZFS_ENTER(zfsvfs); 2197 5367 ahrens ZFS_VERIFY_ZP(zp); 2198 2638 perrin zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id); 2199 789 ahrens ZFS_EXIT(zfsvfs); 2200 789 ahrens return (0); 2201 789 ahrens } 2202 5331 amw 2203 789 ahrens 2204 789 ahrens /* 2205 789 ahrens * Get the requested file attributes and place them in the provided 2206 789 ahrens * vattr structure. 2207 789 ahrens * 2208 789 ahrens * IN: vp - vnode of file. 2209 789 ahrens * vap - va_mask identifies requested attributes. 2210 5331 amw * If AT_XVATTR set, then optional attrs are requested 2211 5331 amw * flags - ATTR_NOACLCHECK (CIFS server context) 2212 5331 amw * cr - credentials of caller. 2213 5331 amw * ct - caller context 2214 789 ahrens * 2215 789 ahrens * OUT: vap - attribute values. 2216 789 ahrens * 2217 789 ahrens * RETURN: 0 (always succeeds) 2218 789 ahrens */ 2219 789 ahrens /* ARGSUSED */ 2220 789 ahrens static int 2221 5331 amw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2222 5331 amw caller_context_t *ct) 2223 789 ahrens { 2224 789 ahrens znode_t *zp = VTOZ(vp); 2225 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2226 5326 ek110237 znode_phys_t *pzp; 2227 5331 amw int error = 0; 2228 4543 marks uint64_t links; 2229 5331 amw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2230 5331 amw xoptattr_t *xoap = NULL; 2231 5331 amw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2232 789 ahrens 2233 5367 ahrens ZFS_ENTER(zfsvfs); 2234 5367 ahrens ZFS_VERIFY_ZP(zp); 2235 5326 ek110237 pzp = zp->z_phys; 2236 5331 amw 2237 5331 amw /* 2238 5331 amw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2239 5331 amw * Also, if we are the owner don't bother, since owner should 2240 5331 amw * always be allowed to read basic attributes of file. 2241 5331 amw */ 2242 5331 amw if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) && 2243 5331 amw (pzp->zp_uid != crgetuid(cr))) { 2244 5331 amw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2245 5331 amw skipaclchk, cr)) { 2246 5331 amw ZFS_EXIT(zfsvfs); 2247 5331 amw return (error); 2248 5331 amw } 2249 5331 amw } 2250 789 ahrens 2251 789 ahrens /* 2252 789 ahrens * Return all attributes. It's cheaper to provide the answer 2253 789 ahrens * than to determine whether we were asked the question. 2254 789 ahrens */ 2255 789 ahrens 2256 9774 Ray mutex_enter(&zp->z_lock); 2257 789 ahrens vap->va_type = vp->v_type; 2258 789 ahrens vap->va_mode = pzp->zp_mode & MODEMASK; 2259 5771 jp151216 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2260 789 ahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2261 789 ahrens vap->va_nodeid = zp->z_id; 2262 4543 marks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 2263 4543 marks links = pzp->zp_links + 1; 2264 4543 marks else 2265 4543 marks links = pzp->zp_links; 2266 4543 marks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 2267 789 ahrens vap->va_size = pzp->zp_size; 2268 1816 marks vap->va_rdev = vp->v_rdev; 2269 789 ahrens vap->va_seq = zp->z_seq; 2270 789 ahrens 2271 5331 amw /* 2272 5331 amw * Add in any requested optional attributes and the create time. 2273 5331 amw * Also set the corresponding bits in the returned attribute bitmap. 2274 5331 amw */ 2275 5331 amw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2276 5331 amw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2277 5331 amw xoap->xoa_archive = 2278 5331 amw ((pzp->zp_flags & ZFS_ARCHIVE) != 0); 2279 5331 amw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2280 5331 amw } 2281 5331 amw 2282 5331 amw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2283 5331 amw xoap->xoa_readonly = 2284 5331 amw ((pzp->zp_flags & ZFS_READONLY) != 0); 2285 5331 amw XVA_SET_RTN(xvap, XAT_READONLY); 2286 5331 amw } 2287 5331 amw 2288 5331 amw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2289 5331 amw xoap->xoa_system = 2290 5331 amw ((pzp->zp_flags & ZFS_SYSTEM) != 0); 2291 5331 amw XVA_SET_RTN(xvap, XAT_SYSTEM); 2292 5331 amw } 2293 5331 amw 2294 5331 amw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2295 5331 amw xoap->xoa_hidden = 2296 5331 amw ((pzp->zp_flags & ZFS_HIDDEN) != 0); 2297 5331 amw XVA_SET_RTN(xvap, XAT_HIDDEN); 2298 5331 amw } 2299 5331 amw 2300 5331 amw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2301 5331 amw xoap->xoa_nounlink = 2302 5331 amw ((pzp->zp_flags & ZFS_NOUNLINK) != 0); 2303 5331 amw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2304 5331 amw } 2305 5331 amw 2306 5331 amw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2307 5331 amw xoap->xoa_immutable = 2308 5331 amw ((pzp->zp_flags & ZFS_IMMUTABLE) != 0); 2309 5331 amw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2310 5331 amw } 2311 5331 amw 2312 5331 amw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2313 5331 amw xoap->xoa_appendonly = 2314 5331 amw ((pzp->zp_flags & ZFS_APPENDONLY) != 0); 2315 5331 amw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2316 5331 amw } 2317 5331 amw 2318 5331 amw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2319 5331 amw xoap->xoa_nodump = 2320 5331 amw ((pzp->zp_flags & ZFS_NODUMP) != 0); 2321 5331 amw XVA_SET_RTN(xvap, XAT_NODUMP); 2322 5331 amw } 2323 5331 amw 2324 5331 amw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2325 5331 amw xoap->xoa_opaque = 2326 5331 amw ((pzp->zp_flags & ZFS_OPAQUE) != 0); 2327 5331 amw XVA_SET_RTN(xvap, XAT_OPAQUE); 2328 5331 amw } 2329 5331 amw 2330 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2331 5331 amw xoap->xoa_av_quarantined = 2332 5331 amw ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0); 2333 5331 amw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2334 5331 amw } 2335 5331 amw 2336 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2337 5331 amw xoap->xoa_av_modified = 2338 5331 amw ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0); 2339 5331 amw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2340 5331 amw } 2341 5331 amw 2342 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 2343 5331 amw vp->v_type == VREG && 2344 5331 amw (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) { 2345 5331 amw size_t len; 2346 5331 amw dmu_object_info_t doi; 2347 5331 amw 2348 5331 amw /* 2349 5331 amw * Only VREG files have anti-virus scanstamps, so we 2350 5331 amw * won't conflict with symlinks in the bonus buffer. 2351 5331 amw */ 2352 5331 amw dmu_object_info_from_db(zp->z_dbuf, &doi); 2353 5331 amw len = sizeof (xoap->xoa_av_scanstamp) + 2354 5331 amw sizeof (znode_phys_t); 2355 5331 amw if (len <= doi.doi_bonus_size) { 2356 5331 amw /* 2357 5331 amw * pzp points to the start of the 2358 5331 amw * znode_phys_t. pzp + 1 points to the 2359 5331 amw * first byte after the znode_phys_t. 2360 5331 amw */ 2361 5331 amw (void) memcpy(xoap->xoa_av_scanstamp, 2362 5331 amw pzp + 1, 2363 5331 amw sizeof (xoap->xoa_av_scanstamp)); 2364 5331 amw XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 2365 5331 amw } 2366 5331 amw } 2367 5331 amw 2368 5331 amw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 2369 5331 amw ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime); 2370 5331 amw XVA_SET_RTN(xvap, XAT_CREATETIME); 2371 5331 amw } 2372 10793 dai 2373 10793 dai if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2374 10793 dai xoap->xoa_reparse = 2375 10793 dai ((pzp->zp_flags & ZFS_REPARSE) != 0); 2376 10793 dai XVA_SET_RTN(xvap, XAT_REPARSE); 2377 10793 dai } 2378 5331 amw } 2379 5331 amw 2380 789 ahrens ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime); 2381 789 ahrens ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime); 2382 789 ahrens ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime); 2383 789 ahrens 2384 789 ahrens mutex_exit(&zp->z_lock); 2385 789 ahrens 2386 789 ahrens dmu_object_size_from_db(zp->z_dbuf, &vap->va_blksize, &vap->va_nblocks); 2387 789 ahrens 2388 789 ahrens if (zp->z_blksz == 0) { 2389 789 ahrens /* 2390 789 ahrens * Block size hasn't been set; suggest maximal I/O transfers. 2391 789 ahrens */ 2392 789 ahrens vap->va_blksize = zfsvfs->z_max_blksz; 2393 789 ahrens } 2394 789 ahrens 2395 789 ahrens ZFS_EXIT(zfsvfs); 2396 789 ahrens return (0); 2397 789 ahrens } 2398 789 ahrens 2399 789 ahrens /* 2400 789 ahrens * Set the file attributes to the values contained in the 2401 789 ahrens * vattr structure. 2402 789 ahrens * 2403 789 ahrens * IN: vp - vnode of file to be modified. 2404 789 ahrens * vap - new attribute values. 2405 5331 amw * If AT_XVATTR set, then optional attrs are being set 2406 789 ahrens * flags - ATTR_UTIME set if non-default time values provided. 2407 5331 amw * - ATTR_NOACLCHECK (CIFS context only). 2408 5331 amw * cr - credentials of caller. 2409 5331 amw * ct - caller context 2410 789 ahrens * 2411 789 ahrens * RETURN: 0 if success 2412 789 ahrens * error code if failure 2413 789 ahrens * 2414 789 ahrens * Timestamps: 2415 789 ahrens * vp - ctime updated, mtime updated if size changed. 2416 789 ahrens */ 2417 789 ahrens /* ARGSUSED */ 2418 789 ahrens static int 2419 789 ahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2420 789 ahrens caller_context_t *ct) 2421 789 ahrens { 2422 5326 ek110237 znode_t *zp = VTOZ(vp); 2423 5326 ek110237 znode_phys_t *pzp; 2424 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2425 5326 ek110237 zilog_t *zilog; 2426 789 ahrens dmu_tx_t *tx; 2427 1878 maybee vattr_t oldva; 2428 8190 Mark xvattr_t tmpxvattr; 2429 789 ahrens uint_t mask = vap->va_mask; 2430 1878 maybee uint_t saved_mask; 2431 2796 marks int trim_mask = 0; 2432 789 ahrens uint64_t new_mode; 2433 9179 Mark uint64_t new_uid, new_gid; 2434 1231 marks znode_t *attrzp; 2435 789 ahrens int need_policy = FALSE; 2436 789 ahrens int err; 2437 5331 amw zfs_fuid_info_t *fuidp = NULL; 2438 5331 amw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2439 5331 amw xoptattr_t *xoap; 2440 5824 marks zfs_acl_t *aclp = NULL; 2441 5331 amw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 2442 9179 Mark boolean_t fuid_dirtied = B_FALSE; 2443 789 ahrens 2444 789 ahrens if (mask == 0) 2445 789 ahrens return (0); 2446 789 ahrens 2447 789 ahrens if (mask & AT_NOSET) 2448 5331 amw return (EINVAL); 2449 5331 amw 2450 5367 ahrens ZFS_ENTER(zfsvfs); 2451 5367 ahrens ZFS_VERIFY_ZP(zp); 2452 5331 amw 2453 5331 amw pzp = zp->z_phys; 2454 5331 amw zilog = zfsvfs->z_log; 2455 5331 amw 2456 5331 amw /* 2457 5331 amw * Make sure that if we have ephemeral uid/gid or xvattr specified 2458 5331 amw * that file system is at proper version level 2459 5331 amw */ 2460 5331 amw 2461 5331 amw if (zfsvfs->z_use_fuids == B_FALSE && 2462 5331 amw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2463 5331 amw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 2464 5386 timh (mask & AT_XVATTR))) { 2465 5386 timh ZFS_EXIT(zfsvfs); 2466 5386 timh return (EINVAL); 2467 5386 timh } 2468 5386 timh 2469 5386 timh if (mask & AT_SIZE && vp->v_type == VDIR) { 2470 5386 timh ZFS_EXIT(zfsvfs); 2471 789 ahrens return (EISDIR); 2472 5386 timh } 2473 5386 timh 2474 5386 timh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 2475 5386 timh ZFS_EXIT(zfsvfs); 2476 5386 timh return (EINVAL); 2477 5386 timh } 2478 789 ahrens 2479 5331 amw /* 2480 5331 amw * If this is an xvattr_t, then get a pointer to the structure of 2481 5331 amw * optional attributes. If this is NULL, then we have a vattr_t. 2482 5331 amw */ 2483 5331 amw xoap = xva_getxoptattr(xvap); 2484 5331 amw 2485 8190 Mark xva_init(&tmpxvattr); 2486 8190 Mark 2487 5331 amw /* 2488 5331 amw * Immutable files can only alter immutable bit and atime 2489 5331 amw */ 2490 5331 amw if ((pzp->zp_flags & ZFS_IMMUTABLE) && 2491 5331 amw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 2492 5386 timh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 2493 5386 timh ZFS_EXIT(zfsvfs); 2494 5386 timh return (EPERM); 2495 5386 timh } 2496 5386 timh 2497 5386 timh if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) { 2498 5386 timh ZFS_EXIT(zfsvfs); 2499 5386 timh return (EPERM); 2500 5386 timh } 2501 789 ahrens 2502 6064 marks /* 2503 6064 marks * Verify timestamps doesn't overflow 32 bits. 2504 6064 marks * ZFS can handle large timestamps, but 32bit syscalls can't 2505 6064 marks * handle times greater than 2039. This check should be removed 2506 6064 marks * once large timestamps are fully supported. 2507 6064 marks */ 2508 6064 marks if (mask & (AT_ATIME | AT_MTIME)) { 2509 6064 marks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 2510 6064 marks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 2511 6064 marks ZFS_EXIT(zfsvfs); 2512 6064 marks return (EOVERFLOW); 2513 6064 marks } 2514 6064 marks } 2515 6064 marks 2516 789 ahrens top: 2517 1231 marks attrzp = NULL; 2518 789 ahrens 2519 9981 Tim /* Can this be moved to before the top label? */ 2520 789 ahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2521 789 ahrens ZFS_EXIT(zfsvfs); 2522 789 ahrens return (EROFS); 2523 789 ahrens } 2524 789 ahrens 2525 789 ahrens /* 2526 789 ahrens * First validate permissions 2527 789 ahrens */ 2528 789 ahrens 2529 789 ahrens if (mask & AT_SIZE) { 2530 5331 amw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2531 1878 maybee if (err) { 2532 1878 maybee ZFS_EXIT(zfsvfs); 2533 1878 maybee return (err); 2534 1878 maybee } 2535 1878 maybee /* 2536 1878 maybee * XXX - Note, we are not providing any open 2537 1878 maybee * mode flags here (like FNDELAY), so we may 2538 1878 maybee * block if there are locks present... this 2539 1878 maybee * should be addressed in openat(). 2540 1878 maybee */ 2541 6992 maybee /* XXX - would it be OK to generate a log record here? */ 2542 6992 maybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 2543 789 ahrens if (err) { 2544 789 ahrens ZFS_EXIT(zfsvfs); 2545 789 ahrens return (err); 2546 789 ahrens } 2547 789 ahrens } 2548 789 ahrens 2549 5331 amw if (mask & (AT_ATIME|AT_MTIME) || 2550 5331 amw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2551 5331 amw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2552 5331 amw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2553 5331 amw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 2554 5331 amw XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) 2555 5331 amw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2556 5331 amw skipaclchk, cr); 2557 789 ahrens 2558 789 ahrens if (mask & (AT_UID|AT_GID)) { 2559 789 ahrens int idmask = (mask & (AT_UID|AT_GID)); 2560 789 ahrens int take_owner; 2561 789 ahrens int take_group; 2562 789 ahrens 2563 789 ahrens /* 2564 913 marks * NOTE: even if a new mode is being set, 2565 913 marks * we may clear S_ISUID/S_ISGID bits. 2566 913 marks */ 2567 913 marks 2568 913 marks if (!(mask & AT_MODE)) 2569 913 marks vap->va_mode = pzp->zp_mode; 2570 913 marks 2571 913 marks /* 2572 789 ahrens * Take ownership or chgrp to group we are a member of 2573 789 ahrens */ 2574 789 ahrens 2575 789 ahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2576 5331 amw take_group = (mask & AT_GID) && 2577 5331 amw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2578 789 ahrens 2579 789 ahrens /* 2580 789 ahrens * If both AT_UID and AT_GID are set then take_owner and 2581 789 ahrens * take_group must both be set in order to allow taking 2582 789 ahrens * ownership. 2583 789 ahrens * 2584 789 ahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2585 789 ahrens * 2586 789 ahrens */ 2587 789 ahrens 2588 789 ahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2589 789 ahrens ((idmask == AT_UID) && take_owner) || 2590 789 ahrens ((idmask == AT_GID) && take_group)) { 2591 5331 amw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2592 5331 amw skipaclchk, cr) == 0) { 2593 789 ahrens /* 2594 789 ahrens * Remove setuid/setgid for non-privileged users 2595 789 ahrens */ 2596 1115 marks secpolicy_setid_clear(vap, cr); 2597 2796 marks trim_mask = (mask & (AT_UID|AT_GID)); 2598 789 ahrens } else { 2599 789 ahrens need_policy = TRUE; 2600 789 ahrens } 2601 789 ahrens } else { 2602 789 ahrens need_policy = TRUE; 2603 789 ahrens } 2604 789 ahrens } 2605 789 ahrens 2606 2796 marks mutex_enter(&zp->z_lock); 2607 2796 marks oldva.va_mode = pzp->zp_mode; 2608 5771 jp151216 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2609 5331 amw if (mask & AT_XVATTR) { 2610 8190 Mark /* 2611 8190 Mark * Update xvattr mask to include only those attributes 2612 8190 Mark * that are actually changing. 2613 8190 Mark * 2614 8190 Mark * the bits will be restored prior to actually setting 2615 8190 Mark * the attributes so the caller thinks they were set. 2616 8190 Mark */ 2617 8190 Mark if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2618 8190 Mark if (xoap->xoa_appendonly != 2619 8190 Mark ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) { 2620 8190 Mark need_policy = TRUE; 2621 8190 Mark } else { 2622 8190 Mark XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2623 8190 Mark XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2624 8190 Mark } 2625 8190 Mark } 2626 8190 Mark 2627 8190 Mark if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2628 8190 Mark if (xoap->xoa_nounlink != 2629 8190 Mark ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) { 2630 8190 Mark need_policy = TRUE; 2631 8190 Mark } else { 2632 8190 Mark XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2633 8190 Mark XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2634 8190 Mark } 2635 8190 Mark } 2636 8190 Mark 2637 8190 Mark if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2638 8190 Mark if (xoap->xoa_immutable != 2639 8190 Mark ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) { 2640 8190 Mark need_policy = TRUE; 2641 8190 Mark } else { 2642 8190 Mark XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2643 8190 Mark XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2644 8190 Mark } 2645 8190 Mark } 2646 8190 Mark 2647 8190 Mark if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2648 8190 Mark if (xoap->xoa_nodump != 2649 8190 Mark ((pzp->zp_flags & ZFS_NODUMP) != 0)) { 2650 8190 Mark need_policy = TRUE; 2651 8190 Mark } else { 2652 8190 Mark XVA_CLR_REQ(xvap, XAT_NODUMP); 2653 8190 Mark XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2654 8190 Mark } 2655 8190 Mark } 2656 8190 Mark 2657 8190 Mark if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2658 8190 Mark if (xoap->xoa_av_modified != 2659 8190 Mark ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) { 2660 8190 Mark need_policy = TRUE; 2661 8190 Mark } else { 2662 8190 Mark XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2663 8190 Mark XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 2664 8190 Mark } 2665 8190 Mark } 2666 8190 Mark 2667 8190 Mark if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2668 8190 Mark if ((vp->v_type != VREG && 2669 8190 Mark xoap->xoa_av_quarantined) || 2670 8190 Mark xoap->xoa_av_quarantined != 2671 8190 Mark ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) { 2672 8190 Mark need_policy = TRUE; 2673 8190 Mark } else { 2674 8190 Mark XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 2675 8190 Mark XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 2676 8190 Mark } 2677 8190 Mark } 2678 8190 Mark 2679 10793 dai if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 2680 10793 dai mutex_exit(&zp->z_lock); 2681 10793 dai ZFS_EXIT(zfsvfs); 2682 10793 dai return (EPERM); 2683 10793 dai } 2684 10793 dai 2685 8190 Mark if (need_policy == FALSE && 2686 8190 Mark (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 2687 8190 Mark XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 2688 5331 amw need_policy = TRUE; 2689 5331 amw } 2690 5331 amw } 2691 5331 amw 2692 2796 marks mutex_exit(&zp->z_lock); 2693 2796 marks 2694 2796 marks if (mask & AT_MODE) { 2695 5331 amw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 2696 2796 marks err = secpolicy_setid_setsticky_clear(vp, vap, 2697 2796 marks &oldva, cr); 2698 2796 marks if (err) { 2699 2796 marks ZFS_EXIT(zfsvfs); 2700 2796 marks return (err); 2701 2796 marks } 2702 2796 marks trim_mask |= AT_MODE; 2703 2796 marks } else { 2704 2796 marks need_policy = TRUE; 2705 2796 marks } 2706 2796 marks } 2707 789 ahrens 2708 789 ahrens if (need_policy) { 2709 1115 marks /* 2710 1115 marks * If trim_mask is set then take ownership 2711 2796 marks * has been granted or write_acl is present and user 2712 2796 marks * has the ability to modify mode. In that case remove 2713 2796 marks * UID|GID and or MODE from mask so that 2714 1115 marks * secpolicy_vnode_setattr() doesn't revoke it. 2715 1115 marks */ 2716 1115 marks 2717 2796 marks if (trim_mask) { 2718 2796 marks saved_mask = vap->va_mask; 2719 2796 marks vap->va_mask &= ~trim_mask; 2720 2796 marks } 2721 789 ahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 2722 5331 amw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 2723 789 ahrens if (err) { 2724 789 ahrens ZFS_EXIT(zfsvfs); 2725 789 ahrens return (err); 2726 789 ahrens } 2727 1115 marks 2728 1115 marks if (trim_mask) 2729 2796 marks vap->va_mask |= saved_mask; 2730 789 ahrens } 2731 789 ahrens 2732 789 ahrens /* 2733 789 ahrens * secpolicy_vnode_setattr, or take ownership may have 2734 789 ahrens * changed va_mask 2735 789 ahrens */ 2736 789 ahrens mask = vap->va_mask; 2737 789 ahrens 2738 789 ahrens tx = dmu_tx_create(zfsvfs->z_os); 2739 789 ahrens dmu_tx_hold_bonus(tx, zp->z_id); 2740 789 ahrens 2741 789 ahrens if (mask & AT_MODE) { 2742 1576 marks uint64_t pmode = pzp->zp_mode; 2743 789 ahrens 2744 1576 marks new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 2745 789 ahrens 2746 9396 Matthew if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 2747 9396 Matthew goto out; 2748 5331 amw if (pzp->zp_acl.z_acl_extern_obj) { 2749 5331 amw /* Are we upgrading ACL from old V0 format to new V1 */ 2750 5331 amw if (zfsvfs->z_version <= ZPL_VERSION_FUID && 2751 5331 amw pzp->zp_acl.z_acl_version == 2752 5331 amw ZFS_ACL_VERSION_INITIAL) { 2753 5331 amw dmu_tx_hold_free(tx, 2754 5331 amw pzp->zp_acl.z_acl_extern_obj, 0, 2755 5331 amw DMU_OBJECT_END); 2756 5331 amw dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2757 5824 marks 0, aclp->z_acl_bytes); 2758 5331 amw } else { 2759 5331 amw dmu_tx_hold_write(tx, 2760 5331 amw pzp->zp_acl.z_acl_extern_obj, 0, 2761 5824 marks aclp->z_acl_bytes); 2762 5824 marks } 2763 6180 marks } else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { 2764 6180 marks dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 2765 6180 marks 0, aclp->z_acl_bytes); 2766 5331 amw } 2767 5331 amw } 2768 5331 amw 2769 9179 Mark if (mask & (AT_UID | AT_GID)) { 2770 9179 Mark if (pzp->zp_xattr) { 2771 9179 Mark err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp); 2772 9396 Matthew if (err) 2773 9396 Matthew goto out; 2774 9179 Mark dmu_tx_hold_bonus(tx, attrzp->z_id); 2775 9179 Mark } 2776 9179 Mark if (mask & AT_UID) { 2777 9179 Mark new_uid = zfs_fuid_create(zfsvfs, 2778 9179 Mark (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 2779 9396 Matthew if (new_uid != pzp->zp_uid && 2780 9396 Matthew zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) { 2781 9396 Matthew err = EDQUOT; 2782 9396 Matthew goto out; 2783 9396 Matthew } 2784 9396 Matthew } 2785 9396 Matthew 2786 9179 Mark if (mask & AT_GID) { 2787 9179 Mark new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 2788 9179 Mark cr, ZFS_GROUP, &fuidp); 2789 9396 Matthew if (new_gid != pzp->zp_gid && 2790 9396 Matthew zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) { 2791 9396 Matthew err = EDQUOT; 2792 9396 Matthew goto out; 2793 9396 Matthew } 2794 9179 Mark } 2795 9179 Mark fuid_dirtied = zfsvfs->z_fuid_dirty; 2796 9179 Mark if (fuid_dirtied) { 2797 9179 Mark if (zfsvfs->z_fuid_obj == 0) { 2798 9179 Mark dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 2799 9179 Mark dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 2800 9179 Mark FUID_SIZE_ESTIMATE(zfsvfs)); 2801 9179 Mark dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 2802 9179 Mark FALSE, NULL); 2803 9179 Mark } else { 2804 9179 Mark dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); 2805 9179 Mark dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, 2806 9179 Mark FUID_SIZE_ESTIMATE(zfsvfs)); 2807 9179 Mark } 2808 9179 Mark } 2809 1231 marks } 2810 1231 marks 2811 8227 Neil err = dmu_tx_assign(tx, TXG_NOWAIT); 2812 789 ahrens if (err) { 2813 9396 Matthew if (err == ERESTART) 2814 9396 Matthew dmu_tx_wait(tx); 2815 9396 Matthew goto out; 2816 789 ahrens } 2817 789 ahrens 2818 789 ahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 2819 789 ahrens 2820 789 ahrens /* 2821 789 ahrens * Set each attribute requested. 2822 789 ahrens * We group settings according to the locks they need to acquire. 2823 789 ahrens * 2824 789 ahrens * Note: you cannot set ctime directly, although it will be 2825 789 ahrens * updated as a side-effect of calling this function. 2826 789 ahrens */ 2827 789 ahrens 2828 789 ahrens mutex_enter(&zp->z_lock); 2829 789 ahrens 2830 789 ahrens if (mask & AT_MODE) { 2831 5824 marks mutex_enter(&zp->z_acl_lock); 2832 5824 marks zp->z_phys->zp_mode = new_mode; 2833 9179 Mark err = zfs_aclset_common(zp, aclp, cr, tx); 2834 789 ahrens ASSERT3U(err, ==, 0); 2835 10143 Tim zp->z_acl_cached = aclp; 2836 10143 Tim aclp = NULL; 2837 5824 marks mutex_exit(&zp->z_acl_lock); 2838 789 ahrens } 2839 789 ahrens 2840 1231 marks if (attrzp) 2841 1231 marks mutex_enter(&attrzp->z_lock); 2842 1231 marks 2843 1231 marks if (mask & AT_UID) { 2844 9179 Mark pzp->zp_uid = new_uid; 2845 9179 Mark if (attrzp) 2846 9179 Mark attrzp->z_phys->zp_uid = new_uid; 2847 1231 marks } 2848 789 ahrens 2849 1231 marks if (mask & AT_GID) { 2850 9179 Mark pzp->zp_gid = new_gid; 2851 1231 marks if (attrzp) 2852 9179 Mark attrzp->z_phys->zp_gid = new_gid; 2853 1231 marks } 2854 5824 marks 2855 1231 marks if (attrzp) 2856 1231 marks mutex_exit(&attrzp->z_lock); 2857 789 ahrens 2858 789 ahrens if (mask & AT_ATIME) 2859 789 ahrens ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 2860 789 ahrens 2861 789 ahrens if (mask & AT_MTIME) 2862 789 ahrens ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 2863 789 ahrens 2864 6992 maybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 2865 1878 maybee if (mask & AT_SIZE) 2866 789 ahrens zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx); 2867 1878 maybee else if (mask != 0) 2868 789 ahrens zfs_time_stamper_locked(zp, STATE_CHANGED, tx); 2869 5331 amw /* 2870 5331 amw * Do this after setting timestamps to prevent timestamp 2871 5331 amw * update from toggling bit 2872 5331 amw */ 2873 5331 amw 2874 5331 amw if (xoap && (mask & AT_XVATTR)) { 2875 8190 Mark 2876 8190 Mark /* 2877 8190 Mark * restore trimmed off masks 2878 8190 Mark * so that return masks can be set for caller. 2879 8190 Mark */ 2880 8190 Mark 2881 8190 Mark if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 2882 8190 Mark XVA_SET_REQ(xvap, XAT_APPENDONLY); 2883 8190 Mark } 2884 8190 Mark if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 2885 8190 Mark XVA_SET_REQ(xvap, XAT_NOUNLINK); 2886 8190 Mark } 2887 8190 Mark if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 2888 8190 Mark XVA_SET_REQ(xvap, XAT_IMMUTABLE); 2889 8190 Mark } 2890 8190 Mark if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 2891 8190 Mark XVA_SET_REQ(xvap, XAT_NODUMP); 2892 8190 Mark } 2893 8190 Mark if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 2894 8190 Mark XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 2895 8190 Mark } 2896 8190 Mark if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 2897 8190 Mark XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 2898 8190 Mark } 2899 8190 Mark 2900 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 2901 5331 amw size_t len; 2902 5331 amw dmu_object_info_t doi; 2903 5331 amw 2904 5331 amw ASSERT(vp->v_type == VREG); 2905 5331 amw 2906 5331 amw /* Grow the bonus buffer if necessary. */ 2907 5331 amw dmu_object_info_from_db(zp->z_dbuf, &doi); 2908 5331 amw len = sizeof (xoap->xoa_av_scanstamp) + 2909 5331 amw sizeof (znode_phys_t); 2910 5331 amw if (len > doi.doi_bonus_size) 2911 5331 amw VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0); 2912 5331 amw } 2913 5331 amw zfs_xvattr_set(zp, xvap); 2914 5331 amw } 2915 789 ahrens 2916 9179 Mark if (fuid_dirtied) 2917 9179 Mark zfs_fuid_sync(zfsvfs, tx); 2918 9179 Mark 2919 1878 maybee if (mask != 0) 2920 5331 amw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 2921 5331 amw 2922 789 ahrens mutex_exit(&zp->z_lock); 2923 1231 marks 2924 9396 Matthew out: 2925 1231 marks if (attrzp) 2926 1231 marks VN_RELE(ZTOV(attrzp)); 2927 10143 Tim 2928 10143 Tim if (aclp) 2929 10143 Tim zfs_acl_free(aclp); 2930 9396 Matthew 2931 9396 Matthew if (fuidp) { 2932 9396 Matthew zfs_fuid_info_free(fuidp); 2933 9396 Matthew fuidp = NULL; 2934 9396 Matthew } 2935 9396 Matthew 2936 9396 Matthew if (err) 2937 9396 Matthew dmu_tx_abort(tx); 2938 9396 Matthew else 2939 9396 Matthew dmu_tx_commit(tx); 2940 9396 Matthew 2941 9396 Matthew if (err == ERESTART) 2942 9396 Matthew goto top; 2943 789 ahrens 2944 789 ahrens ZFS_EXIT(zfsvfs); 2945 789 ahrens return (err); 2946 789 ahrens } 2947 789 ahrens 2948 3271 maybee typedef struct zfs_zlock { 2949 3271 maybee krwlock_t *zl_rwlock; /* lock we acquired */ 2950 3271 maybee znode_t *zl_znode; /* znode we held */ 2951 3271 maybee struct zfs_zlock *zl_next; /* next in list */ 2952 3271 maybee } zfs_zlock_t; 2953 3271 maybee 2954 3271 maybee /* 2955 3271 maybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 2956 3271 maybee */ 2957 3271 maybee static void 2958 3271 maybee zfs_rename_unlock(zfs_zlock_t **zlpp) 2959 3271 maybee { 2960 3271 maybee zfs_zlock_t *zl; 2961 3271 maybee 2962 3271 maybee while ((zl = *zlpp) != NULL) { 2963 3271 maybee if (zl->zl_znode != NULL) 2964 3271 maybee VN_RELE(ZTOV(zl->zl_znode)); 2965 3271 maybee rw_exit(zl->zl_rwlock); 2966 3271 maybee *zlpp = zl->zl_next; 2967 3271 maybee kmem_free(zl, sizeof (*zl)); 2968 3271 maybee } 2969 3271 maybee } 2970 3271 maybee 2971 789 ahrens /* 2972 789 ahrens * Search back through the directory tree, using the ".." entries. 2973 789 ahrens * Lock each directory in the chain to prevent concurrent renames. 2974 789 ahrens * Fail any attempt to move a directory into one of its own descendants. 2975 789 ahrens * XXX - z_parent_lock can overlap with map or grow locks 2976 789 ahrens */ 2977 789 ahrens static int 2978 789 ahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 2979 789 ahrens { 2980 789 ahrens zfs_zlock_t *zl; 2981 3638 billm znode_t *zp = tdzp; 2982 789 ahrens uint64_t rootid = zp->z_zfsvfs->z_root; 2983 789 ahrens uint64_t *oidp = &zp->z_id; 2984 789 ahrens krwlock_t *rwlp = &szp->z_parent_lock; 2985 789 ahrens krw_t rw = RW_WRITER; 2986 789 ahrens 2987 789 ahrens /* 2988 789 ahrens * First pass write-locks szp and compares to zp->z_id. 2989 789 ahrens * Later passes read-lock zp and compare to zp->z_parent. 2990 789 ahrens */ 2991 789 ahrens do { 2992 3271 maybee if (!rw_tryenter(rwlp, rw)) { 2993 3271 maybee /* 2994 3271 maybee * Another thread is renaming in this path. 2995 3271 maybee * Note that if we are a WRITER, we don't have any 2996 3271 maybee * parent_locks held yet. 2997 3271 maybee */ 2998 3271 maybee if (rw == RW_READER && zp->z_id > szp->z_id) { 2999 3271 maybee /* 3000 3271 maybee * Drop our locks and restart 3001 3271 maybee */ 3002 3271 maybee zfs_rename_unlock(&zl); 3003 3271 maybee *zlpp = NULL; 3004 3271 maybee zp = tdzp; 3005 3271 maybee oidp = &zp->z_id; 3006 3271 maybee rwlp = &szp->z_parent_lock; 3007 3271 maybee rw = RW_WRITER; 3008 3271 maybee continue; 3009 3271 maybee } else { 3010 3271 maybee /* 3011 3271 maybee * Wait for other thread to drop its locks 3012 3271 maybee */ 3013 3271 maybee rw_enter(rwlp, rw); 3014 3271 maybee } 3015 3271 maybee } 3016 3271 maybee 3017 789 ahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3018 789 ahrens zl->zl_rwlock = rwlp; 3019 789 ahrens zl->zl_znode = NULL; 3020 789 ahrens zl->zl_next = *zlpp; 3021 789 ahrens *zlpp = zl; 3022 789 ahrens 3023 789 ahrens if (*oidp == szp->z_id) /* We're a descendant of szp */ 3024 789 ahrens return (EINVAL); 3025 789 ahrens 3026 789 ahrens if (*oidp == rootid) /* We've hit the top */ 3027 789 ahrens return (0); 3028 789 ahrens 3029 789 ahrens if (rw == RW_READER) { /* i.e. not the first pass */ 3030 789 ahrens int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp); 3031 789 ahrens if (error) 3032 789 ahrens return (error); 3033 789 ahrens zl->zl_znode = zp; 3034 789 ahrens } 3035 789 ahrens oidp = &zp->z_phys->zp_parent; 3036 789 ahrens rwlp = &zp->z_parent_lock; 3037 789 ahrens rw = RW_READER; 3038 789 ahrens 3039 789 ahrens } while (zp->z_id != sdzp->z_id); 3040 789 ahrens 3041 789 ahrens return (0); 3042 789 ahrens } 3043 789 ahrens 3044 789 ahrens /* 3045 789 ahrens * Move an entry from the provided source directory to the target 3046 789 ahrens * directory. Change the entry name as indicated. 3047 789 ahrens * 3048 789 ahrens * IN: sdvp - Source directory containing the "old entry". 3049 789 ahrens * snm - Old entry name. 3050 789 ahrens * tdvp - Target directory to contain the "new entry". 3051 789 ahrens * tnm - New entry name. 3052 789 ahrens * cr - credentials of caller. 3053 5331 amw * ct - caller context 3054 5331 amw * flags - case flags 3055 <