1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 8636 Mark * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 4144 peteh 26 4144 peteh /* Portions Copyright 2007 Jeremy Teo */ 27 789 ahrens 28 3444 ek110237 #ifdef _KERNEL 29 789 ahrens #include <sys/types.h> 30 789 ahrens #include <sys/param.h> 31 789 ahrens #include <sys/time.h> 32 789 ahrens #include <sys/systm.h> 33 789 ahrens #include <sys/sysmacros.h> 34 789 ahrens #include <sys/resource.h> 35 789 ahrens #include <sys/mntent.h> 36 1816 marks #include <sys/mkdev.h> 37 5498 timh #include <sys/u8_textprep.h> 38 6492 timh #include <sys/dsl_dataset.h> 39 789 ahrens #include <sys/vfs.h> 40 3898 rsb #include <sys/vfs_opreg.h> 41 789 ahrens #include <sys/vnode.h> 42 789 ahrens #include <sys/file.h> 43 789 ahrens #include <sys/kmem.h> 44 789 ahrens #include <sys/errno.h> 45 789 ahrens #include <sys/unistd.h> 46 789 ahrens #include <sys/mode.h> 47 789 ahrens #include <sys/atomic.h> 48 789 ahrens #include <vm/pvn.h> 49 789 ahrens #include "fs/fs_subr.h" 50 789 ahrens #include <sys/zfs_dir.h> 51 789 ahrens #include <sys/zfs_acl.h> 52 789 ahrens #include <sys/zfs_ioctl.h> 53 3444 ek110237 #include <sys/zfs_rlock.h> 54 5331 amw #include <sys/zfs_fuid.h> 55 3444 ek110237 #include <sys/fs/zfs.h> 56 5331 amw #include <sys/kidmap.h> 57 3444 ek110237 #endif /* _KERNEL */ 58 3444 ek110237 59 3444 ek110237 #include <sys/dmu.h> 60 3444 ek110237 #include <sys/refcount.h> 61 3444 ek110237 #include <sys/stat.h> 62 3444 ek110237 #include <sys/zap.h> 63 789 ahrens #include <sys/zfs_znode.h> 64 5498 timh 65 5498 timh #include "zfs_prop.h" 66 789 ahrens 67 3444 ek110237 /* 68 6712 tomee * Define ZNODE_STATS to turn on statistic gathering. By default, it is only 69 6712 tomee * turned on when DEBUG is also defined. 70 6712 tomee */ 71 6712 tomee #ifdef DEBUG 72 6712 tomee #define ZNODE_STATS 73 6712 tomee #endif /* DEBUG */ 74 6712 tomee 75 6712 tomee #ifdef ZNODE_STATS 76 6712 tomee #define ZNODE_STAT_ADD(stat) ((stat)++) 77 6712 tomee #else 78 6712 tomee #define ZNODE_STAT_ADD(stat) /* nothing */ 79 6712 tomee #endif /* ZNODE_STATS */ 80 6712 tomee 81 6712 tomee #define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) 82 6712 tomee #define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) 83 6712 tomee 84 6712 tomee /* 85 3444 ek110237 * Functions needed for userland (ie: libzpool) are not put under 86 3444 ek110237 * #ifdef_KERNEL; the rest of the functions have dependencies 87 3444 ek110237 * (such as VFS logic) that will not compile easily in userland. 88 3444 ek110237 */ 89 3444 ek110237 #ifdef _KERNEL 90 9788 Tom /* 91 9788 Tom * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to 92 9788 Tom * be freed before it can be safely accessed. 93 9788 Tom */ 94 9788 Tom krwlock_t zfsvfs_lock; 95 9788 Tom 96 6712 tomee static kmem_cache_t *znode_cache = NULL; 97 789 ahrens 98 789 ahrens /*ARGSUSED*/ 99 789 ahrens static void 100 5642 maybee znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) 101 789 ahrens { 102 5642 maybee /* 103 5642 maybee * We should never drop all dbuf refs without first clearing 104 5642 maybee * the eviction callback. 105 5642 maybee */ 106 5642 maybee panic("evicting znode %p\n", user_ptr); 107 789 ahrens } 108 789 ahrens 109 789 ahrens /*ARGSUSED*/ 110 789 ahrens static int 111 6712 tomee zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 112 789 ahrens { 113 789 ahrens znode_t *zp = buf; 114 789 ahrens 115 6712 tomee ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 116 6712 tomee 117 6712 tomee zp->z_vnode = vn_alloc(kmflags); 118 6712 tomee if (zp->z_vnode == NULL) { 119 6712 tomee return (-1); 120 6712 tomee } 121 6712 tomee ZTOV(zp)->v_data = zp; 122 6712 tomee 123 6712 tomee list_link_init(&zp->z_link_node); 124 6712 tomee 125 789 ahrens mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 126 1669 perrin rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 127 3897 maybee rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); 128 789 ahrens mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 129 1669 perrin 130 1669 perrin mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); 131 1669 perrin avl_create(&zp->z_range_avl, zfs_range_compare, 132 1669 perrin sizeof (rl_t), offsetof(rl_t, r_node)); 133 1669 perrin 134 5446 ahrens zp->z_dbuf = NULL; 135 6712 tomee zp->z_dirlocks = NULL; 136 9981 Tim zp->z_acl_cached = NULL; 137 789 ahrens return (0); 138 789 ahrens } 139 789 ahrens 140 789 ahrens /*ARGSUSED*/ 141 789 ahrens static void 142 6712 tomee zfs_znode_cache_destructor(void *buf, void *arg) 143 789 ahrens { 144 789 ahrens znode_t *zp = buf; 145 789 ahrens 146 6712 tomee ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 147 6712 tomee ASSERT(ZTOV(zp)->v_data == zp); 148 6712 tomee vn_free(ZTOV(zp)); 149 6712 tomee ASSERT(!list_link_active(&zp->z_link_node)); 150 789 ahrens mutex_destroy(&zp->z_lock); 151 1669 perrin rw_destroy(&zp->z_parent_lock); 152 3897 maybee rw_destroy(&zp->z_name_lock); 153 789 ahrens mutex_destroy(&zp->z_acl_lock); 154 1669 perrin avl_destroy(&zp->z_range_avl); 155 4831 gw25295 mutex_destroy(&zp->z_range_lock); 156 789 ahrens 157 5446 ahrens ASSERT(zp->z_dbuf == NULL); 158 6712 tomee ASSERT(zp->z_dirlocks == NULL); 159 10143 Tim ASSERT(zp->z_acl_cached == NULL); 160 6712 tomee } 161 6712 tomee 162 6712 tomee #ifdef ZNODE_STATS 163 6712 tomee static struct { 164 6712 tomee uint64_t zms_zfsvfs_invalid; 165 9788 Tom uint64_t zms_zfsvfs_recheck1; 166 6712 tomee uint64_t zms_zfsvfs_unmounted; 167 9788 Tom uint64_t zms_zfsvfs_recheck2; 168 7579 Tom uint64_t zms_obj_held; 169 6712 tomee uint64_t zms_vnode_locked; 170 7579 Tom uint64_t zms_not_only_dnlc; 171 6712 tomee } znode_move_stats; 172 6712 tomee #endif /* ZNODE_STATS */ 173 6712 tomee 174 6712 tomee static void 175 6712 tomee zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) 176 6712 tomee { 177 6712 tomee vnode_t *vp; 178 6712 tomee 179 6712 tomee /* Copy fields. */ 180 6712 tomee nzp->z_zfsvfs = ozp->z_zfsvfs; 181 6712 tomee 182 6712 tomee /* Swap vnodes. */ 183 6712 tomee vp = nzp->z_vnode; 184 6712 tomee nzp->z_vnode = ozp->z_vnode; 185 6712 tomee ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ 186 6712 tomee ZTOV(ozp)->v_data = ozp; 187 6712 tomee ZTOV(nzp)->v_data = nzp; 188 6712 tomee 189 6712 tomee nzp->z_id = ozp->z_id; 190 6712 tomee ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ 191 6712 tomee ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); 192 6712 tomee nzp->z_unlinked = ozp->z_unlinked; 193 6712 tomee nzp->z_atime_dirty = ozp->z_atime_dirty; 194 6712 tomee nzp->z_zn_prefetch = ozp->z_zn_prefetch; 195 6712 tomee nzp->z_blksz = ozp->z_blksz; 196 6712 tomee nzp->z_seq = ozp->z_seq; 197 6712 tomee nzp->z_mapcnt = ozp->z_mapcnt; 198 6712 tomee nzp->z_last_itx = ozp->z_last_itx; 199 6712 tomee nzp->z_gen = ozp->z_gen; 200 6712 tomee nzp->z_sync_cnt = ozp->z_sync_cnt; 201 6712 tomee nzp->z_phys = ozp->z_phys; 202 6712 tomee nzp->z_dbuf = ozp->z_dbuf; 203 10250 Mark 204 10250 Mark /* 205 10269 Mark * Since this is just an idle znode and kmem is already dealing with 206 10269 Mark * memory pressure, release any cached ACL. 207 10250 Mark */ 208 10250 Mark if (ozp->z_acl_cached) { 209 10250 Mark zfs_acl_free(ozp->z_acl_cached); 210 10250 Mark ozp->z_acl_cached = NULL; 211 10250 Mark } 212 6712 tomee 213 6712 tomee /* Update back pointers. */ 214 6712 tomee (void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys, 215 6712 tomee znode_evict_error); 216 6712 tomee 217 6712 tomee /* 218 6712 tomee * Invalidate the original znode by clearing fields that provide a 219 6712 tomee * pointer back to the znode. Set the low bit of the vfs pointer to 220 6712 tomee * ensure that zfs_znode_move() recognizes the znode as invalid in any 221 6712 tomee * subsequent callback. 222 6712 tomee */ 223 6712 tomee ozp->z_dbuf = NULL; 224 6712 tomee POINTER_INVALIDATE(&ozp->z_zfsvfs); 225 6712 tomee } 226 6712 tomee 227 6712 tomee /*ARGSUSED*/ 228 6712 tomee static kmem_cbrc_t 229 6712 tomee zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) 230 6712 tomee { 231 6712 tomee znode_t *ozp = buf, *nzp = newbuf; 232 6712 tomee zfsvfs_t *zfsvfs; 233 6712 tomee vnode_t *vp; 234 6712 tomee 235 6712 tomee /* 236 6712 tomee * The znode is on the file system's list of known znodes if the vfs 237 6712 tomee * pointer is valid. We set the low bit of the vfs pointer when freeing 238 6712 tomee * the znode to invalidate it, and the memory patterns written by kmem 239 6712 tomee * (baddcafe and deadbeef) set at least one of the two low bits. A newly 240 6712 tomee * created znode sets the vfs pointer last of all to indicate that the 241 6712 tomee * znode is known and in a valid state to be moved by this function. 242 6712 tomee */ 243 6712 tomee zfsvfs = ozp->z_zfsvfs; 244 6712 tomee if (!POINTER_IS_VALID(zfsvfs)) { 245 6712 tomee ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); 246 6712 tomee return (KMEM_CBRC_DONT_KNOW); 247 6712 tomee } 248 6712 tomee 249 6712 tomee /* 250 9788 Tom * Close a small window in which it's possible that the filesystem could 251 9788 Tom * be unmounted and freed, and zfsvfs, though valid in the previous 252 9788 Tom * statement, could point to unrelated memory by the time we try to 253 9788 Tom * prevent the filesystem from being unmounted. 254 9788 Tom */ 255 9788 Tom rw_enter(&zfsvfs_lock, RW_WRITER); 256 9788 Tom if (zfsvfs != ozp->z_zfsvfs) { 257 9788 Tom rw_exit(&zfsvfs_lock); 258 9788 Tom ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); 259 9788 Tom return (KMEM_CBRC_DONT_KNOW); 260 9788 Tom } 261 9788 Tom 262 9788 Tom /* 263 9788 Tom * If the znode is still valid, then so is the file system. We know that 264 9788 Tom * no valid file system can be freed while we hold zfsvfs_lock, so we 265 9788 Tom * can safely ensure that the filesystem is not and will not be 266 9788 Tom * unmounted. The next statement is equivalent to ZFS_ENTER(). 267 6712 tomee */ 268 9396 Matthew rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); 269 9396 Matthew if (zfsvfs->z_unmounted) { 270 9396 Matthew ZFS_EXIT(zfsvfs); 271 9788 Tom rw_exit(&zfsvfs_lock); 272 6712 tomee ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); 273 6712 tomee return (KMEM_CBRC_DONT_KNOW); 274 6712 tomee } 275 9788 Tom rw_exit(&zfsvfs_lock); 276 6712 tomee 277 6712 tomee mutex_enter(&zfsvfs->z_znodes_lock); 278 6712 tomee /* 279 6712 tomee * Recheck the vfs pointer in case the znode was removed just before 280 6712 tomee * acquiring the lock. 281 6712 tomee */ 282 6712 tomee if (zfsvfs != ozp->z_zfsvfs) { 283 6712 tomee mutex_exit(&zfsvfs->z_znodes_lock); 284 6712 tomee ZFS_EXIT(zfsvfs); 285 9788 Tom ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); 286 6712 tomee return (KMEM_CBRC_DONT_KNOW); 287 6712 tomee } 288 6712 tomee 289 6712 tomee /* 290 6712 tomee * At this point we know that as long as we hold z_znodes_lock, the 291 6712 tomee * znode cannot be freed and fields within the znode can be safely 292 7579 Tom * accessed. Now, prevent a race with zfs_zget(). 293 6712 tomee */ 294 7579 Tom if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { 295 7579 Tom mutex_exit(&zfsvfs->z_znodes_lock); 296 7579 Tom ZFS_EXIT(zfsvfs); 297 7579 Tom ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); 298 7579 Tom return (KMEM_CBRC_LATER); 299 7579 Tom } 300 7579 Tom 301 6712 tomee vp = ZTOV(ozp); 302 6712 tomee if (mutex_tryenter(&vp->v_lock) == 0) { 303 7579 Tom ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 304 6712 tomee mutex_exit(&zfsvfs->z_znodes_lock); 305 6712 tomee ZFS_EXIT(zfsvfs); 306 6712 tomee ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); 307 6712 tomee return (KMEM_CBRC_LATER); 308 6712 tomee } 309 7579 Tom 310 6712 tomee /* Only move znodes that are referenced _only_ by the DNLC. */ 311 6712 tomee if (vp->v_count != 1 || !vn_in_dnlc(vp)) { 312 6712 tomee mutex_exit(&vp->v_lock); 313 7579 Tom ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 314 6712 tomee mutex_exit(&zfsvfs->z_znodes_lock); 315 6712 tomee ZFS_EXIT(zfsvfs); 316 7579 Tom ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); 317 6712 tomee return (KMEM_CBRC_LATER); 318 6712 tomee } 319 6712 tomee 320 6712 tomee /* 321 6712 tomee * The znode is known and in a valid state to move. We're holding the 322 6712 tomee * locks needed to execute the critical section. 323 6712 tomee */ 324 6712 tomee zfs_znode_move_impl(ozp, nzp); 325 6712 tomee mutex_exit(&vp->v_lock); 326 7579 Tom ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); 327 6712 tomee 328 6712 tomee list_link_replace(&ozp->z_link_node, &nzp->z_link_node); 329 6712 tomee mutex_exit(&zfsvfs->z_znodes_lock); 330 6712 tomee ZFS_EXIT(zfsvfs); 331 6712 tomee 332 6712 tomee return (KMEM_CBRC_YES); 333 789 ahrens } 334 789 ahrens 335 789 ahrens void 336 789 ahrens zfs_znode_init(void) 337 789 ahrens { 338 789 ahrens /* 339 789 ahrens * Initialize zcache 340 789 ahrens */ 341 9788 Tom rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); 342 789 ahrens ASSERT(znode_cache == NULL); 343 789 ahrens znode_cache = kmem_cache_create("zfs_znode_cache", 344 789 ahrens sizeof (znode_t), 0, zfs_znode_cache_constructor, 345 789 ahrens zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 346 6712 tomee kmem_cache_set_move(znode_cache, zfs_znode_move); 347 789 ahrens } 348 789 ahrens 349 789 ahrens void 350 789 ahrens zfs_znode_fini(void) 351 789 ahrens { 352 789 ahrens /* 353 789 ahrens * Cleanup vfs & vnode ops 354 789 ahrens */ 355 789 ahrens zfs_remove_op_tables(); 356 789 ahrens 357 789 ahrens /* 358 789 ahrens * Cleanup zcache 359 789 ahrens */ 360 789 ahrens if (znode_cache) 361 789 ahrens kmem_cache_destroy(znode_cache); 362 789 ahrens znode_cache = NULL; 363 9788 Tom rw_destroy(&zfsvfs_lock); 364 789 ahrens } 365 789 ahrens 366 789 ahrens struct vnodeops *zfs_dvnodeops; 367 789 ahrens struct vnodeops *zfs_fvnodeops; 368 789 ahrens struct vnodeops *zfs_symvnodeops; 369 789 ahrens struct vnodeops *zfs_xdvnodeops; 370 789 ahrens struct vnodeops *zfs_evnodeops; 371 8845 amw struct vnodeops *zfs_sharevnodeops; 372 789 ahrens 373 789 ahrens void 374 789 ahrens zfs_remove_op_tables() 375 789 ahrens { 376 789 ahrens /* 377 789 ahrens * Remove vfs ops 378 789 ahrens */ 379 789 ahrens ASSERT(zfsfstype); 380 789 ahrens (void) vfs_freevfsops_by_type(zfsfstype); 381 789 ahrens zfsfstype = 0; 382 789 ahrens 383 789 ahrens /* 384 789 ahrens * Remove vnode ops 385 789 ahrens */ 386 789 ahrens if (zfs_dvnodeops) 387 789 ahrens vn_freevnodeops(zfs_dvnodeops); 388 789 ahrens if (zfs_fvnodeops) 389 789 ahrens vn_freevnodeops(zfs_fvnodeops); 390 789 ahrens if (zfs_symvnodeops) 391 789 ahrens vn_freevnodeops(zfs_symvnodeops); 392 789 ahrens if (zfs_xdvnodeops) 393 789 ahrens vn_freevnodeops(zfs_xdvnodeops); 394 789 ahrens if (zfs_evnodeops) 395 789 ahrens vn_freevnodeops(zfs_evnodeops); 396 8845 amw if (zfs_sharevnodeops) 397 8845 amw vn_freevnodeops(zfs_sharevnodeops); 398 789 ahrens 399 789 ahrens zfs_dvnodeops = NULL; 400 789 ahrens zfs_fvnodeops = NULL; 401 789 ahrens zfs_symvnodeops = NULL; 402 789 ahrens zfs_xdvnodeops = NULL; 403 789 ahrens zfs_evnodeops = NULL; 404 8845 amw zfs_sharevnodeops = NULL; 405 789 ahrens } 406 789 ahrens 407 789 ahrens extern const fs_operation_def_t zfs_dvnodeops_template[]; 408 789 ahrens extern const fs_operation_def_t zfs_fvnodeops_template[]; 409 789 ahrens extern const fs_operation_def_t zfs_xdvnodeops_template[]; 410 789 ahrens extern const fs_operation_def_t zfs_symvnodeops_template[]; 411 789 ahrens extern const fs_operation_def_t zfs_evnodeops_template[]; 412 8845 amw extern const fs_operation_def_t zfs_sharevnodeops_template[]; 413 789 ahrens 414 789 ahrens int 415 789 ahrens zfs_create_op_tables() 416 789 ahrens { 417 789 ahrens int error; 418 789 ahrens 419 789 ahrens /* 420 789 ahrens * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() 421 789 ahrens * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). 422 789 ahrens * In this case we just return as the ops vectors are already set up. 423 789 ahrens */ 424 789 ahrens if (zfs_dvnodeops) 425 789 ahrens return (0); 426 789 ahrens 427 789 ahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, 428 789 ahrens &zfs_dvnodeops); 429 789 ahrens if (error) 430 789 ahrens return (error); 431 789 ahrens 432 789 ahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, 433 789 ahrens &zfs_fvnodeops); 434 789 ahrens if (error) 435 789 ahrens return (error); 436 789 ahrens 437 789 ahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, 438 789 ahrens &zfs_symvnodeops); 439 789 ahrens if (error) 440 789 ahrens return (error); 441 789 ahrens 442 789 ahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, 443 789 ahrens &zfs_xdvnodeops); 444 789 ahrens if (error) 445 789 ahrens return (error); 446 789 ahrens 447 789 ahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, 448 789 ahrens &zfs_evnodeops); 449 8845 amw if (error) 450 8845 amw return (error); 451 8845 amw 452 8845 amw error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, 453 8845 amw &zfs_sharevnodeops); 454 8845 amw 455 8845 amw return (error); 456 8845 amw } 457 8845 amw 458 9030 Mark int 459 8845 amw zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) 460 8845 amw { 461 9179 Mark zfs_acl_ids_t acl_ids; 462 8845 amw vattr_t vattr; 463 8845 amw znode_t *sharezp; 464 8845 amw vnode_t *vp; 465 8845 amw znode_t *zp; 466 8845 amw int error; 467 8845 amw 468 8845 amw vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 469 8845 amw vattr.va_type = VDIR; 470 8845 amw vattr.va_mode = S_IFDIR|0555; 471 8845 amw vattr.va_uid = crgetuid(kcred); 472 8845 amw vattr.va_gid = crgetgid(kcred); 473 8845 amw 474 8845 amw sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); 475 8845 amw sharezp->z_unlinked = 0; 476 8845 amw sharezp->z_atime_dirty = 0; 477 8845 amw sharezp->z_zfsvfs = zfsvfs; 478 8845 amw 479 8845 amw vp = ZTOV(sharezp); 480 8845 amw vn_reinit(vp); 481 8845 amw vp->v_type = VDIR; 482 8845 amw 483 9179 Mark VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, 484 9179 Mark kcred, NULL, &acl_ids)); 485 8845 amw zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, 486 9179 Mark &zp, 0, &acl_ids); 487 8845 amw ASSERT3P(zp, ==, sharezp); 488 8845 amw ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */ 489 8845 amw POINTER_INVALIDATE(&sharezp->z_zfsvfs); 490 8845 amw error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 491 8845 amw ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); 492 8845 amw zfsvfs->z_shares_dir = sharezp->z_id; 493 8845 amw 494 9179 Mark zfs_acl_ids_free(&acl_ids); 495 8845 amw ZTOV(sharezp)->v_count = 0; 496 8845 amw dmu_buf_rele(sharezp->z_dbuf, NULL); 497 8845 amw sharezp->z_dbuf = NULL; 498 8845 amw kmem_cache_free(znode_cache, sharezp); 499 5331 amw 500 8845 amw return (error); 501 789 ahrens } 502 789 ahrens 503 789 ahrens /* 504 1816 marks * define a couple of values we need available 505 1816 marks * for both 64 and 32 bit environments. 506 1816 marks */ 507 1816 marks #ifndef NBITSMINOR64 508 1816 marks #define NBITSMINOR64 32 509 1816 marks #endif 510 1816 marks #ifndef MAXMAJ64 511 1816 marks #define MAXMAJ64 0xffffffffUL 512 1816 marks #endif 513 1816 marks #ifndef MAXMIN64 514 1816 marks #define MAXMIN64 0xffffffffUL 515 1816 marks #endif 516 1816 marks 517 1816 marks /* 518 1816 marks * Create special expldev for ZFS private use. 519 1816 marks * Can't use standard expldev since it doesn't do 520 1816 marks * what we want. The standard expldev() takes a 521 1816 marks * dev32_t in LP64 and expands it to a long dev_t. 522 1816 marks * We need an interface that takes a dev32_t in ILP32 523 1816 marks * and expands it to a long dev_t. 524 1816 marks */ 525 1816 marks static uint64_t 526 1816 marks zfs_expldev(dev_t dev) 527 1816 marks { 528 1816 marks #ifndef _LP64 529 1816 marks major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32; 530 1816 marks return (((uint64_t)major << NBITSMINOR64) | 531 1816 marks ((minor_t)dev & MAXMIN32)); 532 1816 marks #else 533 1816 marks return (dev); 534 1816 marks #endif 535 1816 marks } 536 1816 marks 537 1816 marks /* 538 1816 marks * Special cmpldev for ZFS private use. 539 1816 marks * Can't use standard cmpldev since it takes 540 1816 marks * a long dev_t and compresses it to dev32_t in 541 1816 marks * LP64. We need to do a compaction of a long dev_t 542 1816 marks * to a dev32_t in ILP32. 543 1816 marks */ 544 1816 marks dev_t 545 1816 marks zfs_cmpldev(uint64_t dev) 546 1816 marks { 547 1816 marks #ifndef _LP64 548 1816 marks minor_t minor = (minor_t)dev & MAXMIN64; 549 1816 marks major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; 550 1816 marks 551 1816 marks if (major > MAXMAJ32 || minor > MAXMIN32) 552 1816 marks return (NODEV32); 553 1816 marks 554 1816 marks return (((dev32_t)major << NBITSMINOR32) | minor); 555 1816 marks #else 556 1816 marks return (dev); 557 1816 marks #endif 558 1816 marks } 559 1816 marks 560 5446 ahrens static void 561 6712 tomee zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db) 562 5446 ahrens { 563 5446 ahrens znode_t *nzp; 564 5446 ahrens 565 6712 tomee ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); 566 6712 tomee ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); 567 5446 ahrens 568 5446 ahrens mutex_enter(&zp->z_lock); 569 5446 ahrens 570 5446 ahrens ASSERT(zp->z_dbuf == NULL); 571 10269 Mark ASSERT(zp->z_acl_cached == NULL); 572 5446 ahrens zp->z_dbuf = db; 573 5642 maybee nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error); 574 5446 ahrens 575 5446 ahrens /* 576 5446 ahrens * there should be no 577 5446 ahrens * concurrent zgets on this object. 578 5446 ahrens */ 579 5446 ahrens if (nzp != NULL) 580 7240 rh87107 panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db); 581 5446 ahrens 582 5446 ahrens /* 583 5446 ahrens * Slap on VROOT if we are the root znode 584 5446 ahrens */ 585 5446 ahrens if (zp->z_id == zfsvfs->z_root) 586 5446 ahrens ZTOV(zp)->v_flag |= VROOT; 587 5446 ahrens 588 5446 ahrens mutex_exit(&zp->z_lock); 589 5446 ahrens vn_exists(ZTOV(zp)); 590 5446 ahrens } 591 5446 ahrens 592 5642 maybee void 593 5446 ahrens zfs_znode_dmu_fini(znode_t *zp) 594 5446 ahrens { 595 5446 ahrens dmu_buf_t *db = zp->z_dbuf; 596 6712 tomee ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || 597 6712 tomee zp->z_unlinked || 598 5642 maybee RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); 599 5446 ahrens ASSERT(zp->z_dbuf != NULL); 600 5446 ahrens zp->z_dbuf = NULL; 601 5642 maybee VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL)); 602 5446 ahrens dmu_buf_rele(db, NULL); 603 5446 ahrens } 604 5446 ahrens 605 1816 marks /* 606 789 ahrens * Construct a new znode/vnode and intialize. 607 789 ahrens * 608 789 ahrens * This does not do a call to dmu_set_user() that is 609 789 ahrens * up to the caller to do, in case you don't want to 610 789 ahrens * return the znode 611 789 ahrens */ 612 1544 eschrock static znode_t * 613 5446 ahrens zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz) 614 789 ahrens { 615 789 ahrens znode_t *zp; 616 789 ahrens vnode_t *vp; 617 789 ahrens 618 789 ahrens zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 619 789 ahrens 620 789 ahrens ASSERT(zp->z_dirlocks == NULL); 621 5446 ahrens ASSERT(zp->z_dbuf == NULL); 622 6712 tomee ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); 623 789 ahrens 624 6712 tomee /* 625 6712 tomee * Defer setting z_zfsvfs until the znode is ready to be a candidate for 626 6712 tomee * the zfs_znode_move() callback. 627 6712 tomee */ 628 5446 ahrens zp->z_phys = NULL; 629 3461 ahrens zp->z_unlinked = 0; 630 789 ahrens zp->z_atime_dirty = 0; 631 789 ahrens zp->z_mapcnt = 0; 632 789 ahrens zp->z_last_itx = 0; 633 5446 ahrens zp->z_id = db->db_object; 634 789 ahrens zp->z_blksz = blksz; 635 789 ahrens zp->z_seq = 0x7A4653; 636 3063 perrin zp->z_sync_cnt = 0; 637 5446 ahrens 638 5446 ahrens vp = ZTOV(zp); 639 5446 ahrens vn_reinit(vp); 640 5446 ahrens 641 6712 tomee zfs_znode_dmu_init(zfsvfs, zp, db); 642 5446 ahrens 643 5326 ek110237 zp->z_gen = zp->z_phys->zp_gen; 644 789 ahrens 645 789 ahrens vp->v_vfsp = zfsvfs->z_parent->z_vfs; 646 789 ahrens vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 647 789 ahrens 648 789 ahrens switch (vp->v_type) { 649 789 ahrens case VDIR: 650 789 ahrens if (zp->z_phys->zp_flags & ZFS_XATTR) { 651 789 ahrens vn_setops(vp, zfs_xdvnodeops); 652 789 ahrens vp->v_flag |= V_XATTRDIR; 653 5446 ahrens } else { 654 789 ahrens vn_setops(vp, zfs_dvnodeops); 655 5446 ahrens } 656 869 perrin zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ 657 789 ahrens break; 658 789 ahrens case VBLK: 659 789 ahrens case VCHR: 660 1816 marks vp->v_rdev = zfs_cmpldev(zp->z_phys->zp_rdev); 661 789 ahrens /*FALLTHROUGH*/ 662 789 ahrens case VFIFO: 663 789 ahrens case VSOCK: 664 789 ahrens case VDOOR: 665 789 ahrens vn_setops(vp, zfs_fvnodeops); 666 789 ahrens break; 667 789 ahrens case VREG: 668 789 ahrens vp->v_flag |= VMODSORT; 669 8845 amw if (zp->z_phys->zp_parent == zfsvfs->z_shares_dir) 670 8845 amw vn_setops(vp, zfs_sharevnodeops); 671 8845 amw else 672 8845 amw vn_setops(vp, zfs_fvnodeops); 673 789 ahrens break; 674 789 ahrens case VLNK: 675 789 ahrens vn_setops(vp, zfs_symvnodeops); 676 789 ahrens break; 677 789 ahrens default: 678 789 ahrens vn_setops(vp, zfs_evnodeops); 679 789 ahrens break; 680 789 ahrens } 681 6712 tomee 682 6712 tomee mutex_enter(&zfsvfs->z_znodes_lock); 683 6712 tomee list_insert_tail(&zfsvfs->z_all_znodes, zp); 684 6712 tomee membar_producer(); 685 6712 tomee /* 686 6712 tomee * Everything else must be valid before assigning z_zfsvfs makes the 687 6712 tomee * znode eligible for zfs_znode_move(). 688 6712 tomee */ 689 6712 tomee zp->z_zfsvfs = zfsvfs; 690 6712 tomee mutex_exit(&zfsvfs->z_znodes_lock); 691 789 ahrens 692 5642 maybee VFS_HOLD(zfsvfs->z_vfs); 693 789 ahrens return (zp); 694 789 ahrens } 695 789 ahrens 696 789 ahrens /* 697 789 ahrens * Create a new DMU object to hold a zfs znode. 698 789 ahrens * 699 789 ahrens * IN: dzp - parent directory for new znode 700 789 ahrens * vap - file attributes for new znode 701 789 ahrens * tx - dmu transaction id for zap operations 702 789 ahrens * cr - credentials of caller 703 789 ahrens * flag - flags: 704 789 ahrens * IS_ROOT_NODE - new object will be root 705 789 ahrens * IS_XATTR - new object is an attribute 706 789 ahrens * IS_REPLAY - intent log replay 707 5331 amw * bonuslen - length of bonus buffer 708 5331 amw * setaclp - File/Dir initial ACL 709 5331 amw * fuidp - Tracks fuid allocation. 710 789 ahrens * 711 5446 ahrens * OUT: zpp - allocated znode 712 789 ahrens * 713 789 ahrens */ 714 789 ahrens void 715 5446 ahrens zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 716 9179 Mark uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids) 717 789 ahrens { 718 5446 ahrens dmu_buf_t *db; 719 789 ahrens znode_phys_t *pzp; 720 789 ahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 721 789 ahrens timestruc_t now; 722 5446 ahrens uint64_t gen, obj; 723 789 ahrens int err; 724 789 ahrens 725 789 ahrens ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 726 789 ahrens 727 8227 Neil if (zfsvfs->z_replay) { 728 5446 ahrens obj = vap->va_nodeid; 729 789 ahrens flag |= IS_REPLAY; 730 789 ahrens now = vap->va_ctime; /* see zfs_replay_create() */ 731 789 ahrens gen = vap->va_nblocks; /* ditto */ 732 789 ahrens } else { 733 5446 ahrens obj = 0; 734 789 ahrens gethrestime(&now); 735 789 ahrens gen = dmu_tx_get_txg(tx); 736 789 ahrens } 737 789 ahrens 738 789 ahrens /* 739 789 ahrens * Create a new DMU object. 740 789 ahrens */ 741 1544 eschrock /* 742 1544 eschrock * There's currently no mechanism for pre-reading the blocks that will 743 1544 eschrock * be to needed allocate a new object, so we accept the small chance 744 1544 eschrock * that there will be an i/o error and we will fail one of the 745 1544 eschrock * assertions below. 746 1544 eschrock */ 747 789 ahrens if (vap->va_type == VDIR) { 748 789 ahrens if (flag & IS_REPLAY) { 749 5446 ahrens err = zap_create_claim_norm(zfsvfs->z_os, obj, 750 5331 amw zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 751 789 ahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 752 789 ahrens ASSERT3U(err, ==, 0); 753 789 ahrens } else { 754 5446 ahrens obj = zap_create_norm(zfsvfs->z_os, 755 5331 amw zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 756 789 ahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 757 789 ahrens } 758 789 ahrens } else { 759 789 ahrens if (flag & IS_REPLAY) { 760 5446 ahrens err = dmu_object_claim(zfsvfs->z_os, obj, 761 789 ahrens DMU_OT_PLAIN_FILE_CONTENTS, 0, 762 789 ahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 763 789 ahrens ASSERT3U(err, ==, 0); 764 789 ahrens } else { 765 5446 ahrens obj = dmu_object_alloc(zfsvfs->z_os, 766 789 ahrens DMU_OT_PLAIN_FILE_CONTENTS, 0, 767 789 ahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 768 789 ahrens } 769 789 ahrens } 770 10938 Mark 771 10938 Mark ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 772 5446 ahrens VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db)); 773 5446 ahrens dmu_buf_will_dirty(db, tx); 774 789 ahrens 775 789 ahrens /* 776 789 ahrens * Initialize the znode physical data to zero. 777 789 ahrens */ 778 5446 ahrens ASSERT(db->db_size >= sizeof (znode_phys_t)); 779 5446 ahrens bzero(db->db_data, db->db_size); 780 5446 ahrens pzp = db->db_data; 781 789 ahrens 782 789 ahrens /* 783 789 ahrens * If this is the root, fix up the half-initialized parent pointer 784 789 ahrens * to reference the just-allocated physical data area. 785 789 ahrens */ 786 789 ahrens if (flag & IS_ROOT_NODE) { 787 5642 maybee dzp->z_dbuf = db; 788 789 ahrens dzp->z_phys = pzp; 789 5446 ahrens dzp->z_id = obj; 790 789 ahrens } 791 789 ahrens 792 789 ahrens /* 793 789 ahrens * If parent is an xattr, so am I. 794 789 ahrens */ 795 789 ahrens if (dzp->z_phys->zp_flags & ZFS_XATTR) 796 789 ahrens flag |= IS_XATTR; 797 789 ahrens 798 789 ahrens if (vap->va_type == VBLK || vap->va_type == VCHR) { 799 1816 marks pzp->zp_rdev = zfs_expldev(vap->va_rdev); 800 789 ahrens } 801 789 ahrens 802 5331 amw if (zfsvfs->z_use_fuids) 803 5331 amw pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 804 5331 amw 805 789 ahrens if (vap->va_type == VDIR) { 806 789 ahrens pzp->zp_size = 2; /* contents ("." and "..") */ 807 789 ahrens pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 808 789 ahrens } 809 789 ahrens 810 789 ahrens pzp->zp_parent = dzp->z_id; 811 789 ahrens if (flag & IS_XATTR) 812 789 ahrens pzp->zp_flags |= ZFS_XATTR; 813 789 ahrens 814 789 ahrens pzp->zp_gen = gen; 815 789 ahrens 816 789 ahrens ZFS_TIME_ENCODE(&now, pzp->zp_crtime); 817 789 ahrens ZFS_TIME_ENCODE(&now, pzp->zp_ctime); 818 789 ahrens 819 789 ahrens if (vap->va_mask & AT_ATIME) { 820 789 ahrens ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 821 789 ahrens } else { 822 789 ahrens ZFS_TIME_ENCODE(&now, pzp->zp_atime); 823 789 ahrens } 824 789 ahrens 825 789 ahrens if (vap->va_mask & AT_MTIME) { 826 789 ahrens ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 827 789 ahrens } else { 828 789 ahrens ZFS_TIME_ENCODE(&now, pzp->zp_mtime); 829 789 ahrens } 830 10938 Mark pzp->zp_uid = acl_ids->z_fuid; 831 10938 Mark pzp->zp_gid = acl_ids->z_fgid; 832 10938 Mark pzp->zp_mode = acl_ids->z_mode; 833 5642 maybee if (!(flag & IS_ROOT_NODE)) { 834 5642 maybee *zpp = zfs_znode_alloc(zfsvfs, db, 0); 835 5642 maybee } else { 836 5642 maybee /* 837 5642 maybee * If we are creating the root node, the "parent" we 838 5642 maybee * passed in is the znode for the root. 839 5642 maybee */ 840 5642 maybee *zpp = dzp; 841 5642 maybee } 842 9179 Mark VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 843 9179 Mark if (vap->va_mask & AT_XVATTR) 844 9179 Mark zfs_xvattr_set(*zpp, (xvattr_t *)vap); 845 10938 Mark 846 10938 Mark ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 847 5331 amw } 848 5331 amw 849 5331 amw void 850 5331 amw zfs_xvattr_set(znode_t *zp, xvattr_t *xvap) 851 5331 amw { 852 5331 amw xoptattr_t *xoap; 853 5331 amw 854 5331 amw xoap = xva_getxoptattr(xvap); 855 5331 amw ASSERT(xoap); 856 5331 amw 857 5331 amw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 858 5331 amw ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime); 859 5331 amw XVA_SET_RTN(xvap, XAT_CREATETIME); 860 5331 amw } 861 5331 amw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 862 5331 amw ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly); 863 5331 amw XVA_SET_RTN(xvap, XAT_READONLY); 864 5331 amw } 865 5331 amw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 866 5331 amw ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden); 867 5331 amw XVA_SET_RTN(xvap, XAT_HIDDEN); 868 5331 amw } 869 5331 amw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 870 5331 amw ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system); 871 5331 amw XVA_SET_RTN(xvap, XAT_SYSTEM); 872 5331 amw } 873 5331 amw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 874 5331 amw ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive); 875 5331 amw XVA_SET_RTN(xvap, XAT_ARCHIVE); 876 5331 amw } 877 5331 amw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 878 5331 amw ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable); 879 5331 amw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 880 5331 amw } 881 5331 amw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 882 5331 amw ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink); 883 5331 amw XVA_SET_RTN(xvap, XAT_NOUNLINK); 884 5331 amw } 885 5331 amw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 886 5331 amw ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly); 887 5331 amw XVA_SET_RTN(xvap, XAT_APPENDONLY); 888 5331 amw } 889 5331 amw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 890 5331 amw ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump); 891 5331 amw XVA_SET_RTN(xvap, XAT_NODUMP); 892 5331 amw } 893 5331 amw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 894 5331 amw ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque); 895 5331 amw XVA_SET_RTN(xvap, XAT_OPAQUE); 896 5331 amw } 897 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 898 5331 amw ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 899 5331 amw xoap->xoa_av_quarantined); 900 5331 amw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 901 5331 amw } 902 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 903 5331 amw ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified); 904 5331 amw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 905 5331 amw } 906 5331 amw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 907 5331 amw (void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp, 908 5331 amw sizeof (xoap->xoa_av_scanstamp)); 909 5331 amw zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP; 910 5331 amw XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 911 789 ahrens } 912 10793 dai if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 913 10793 dai ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse); 914 10793 dai XVA_SET_RTN(xvap, XAT_REPARSE); 915 10793 dai } 916 789 ahrens } 917 789 ahrens 918 789 ahrens int 919 789 ahrens zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 920 789 ahrens { 921 789 ahrens dmu_object_info_t doi; 922 789 ahrens dmu_buf_t *db; 923 789 ahrens znode_t *zp; 924 1544 eschrock int err; 925 789 ahrens 926 789 ahrens *zpp = NULL; 927 789 ahrens 928 789 ahrens ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 929 789 ahrens 930 1544 eschrock err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); 931 1544 eschrock if (err) { 932 789 ahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 933 1544 eschrock return (err); 934 789 ahrens } 935 789 ahrens 936 789 ahrens dmu_object_info_from_db(db, &doi); 937 789 ahrens if (doi.doi_bonus_type != DMU_OT_ZNODE || 938 789 ahrens doi.doi_bonus_size < sizeof (znode_phys_t)) { 939 1544 eschrock dmu_buf_rele(db, NULL); 940 789 ahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 941 789 ahrens return (EINVAL); 942 789 ahrens } 943 789 ahrens 944 789 ahrens zp = dmu_buf_get_user(db); 945 789 ahrens if (zp != NULL) { 946 789 ahrens mutex_enter(&zp->z_lock); 947 789 ahrens 948 5446 ahrens /* 949 5446 ahrens * Since we do immediate eviction of the z_dbuf, we 950 5446 ahrens * should never find a dbuf with a znode that doesn't 951 5446 ahrens * know about the dbuf. 952 5446 ahrens */ 953 5446 ahrens ASSERT3P(zp->z_dbuf, ==, db); 954 789 ahrens ASSERT3U(zp->z_id, ==, obj_num); 955 3461 ahrens if (zp->z_unlinked) { 956 5446 ahrens err = ENOENT; 957 789 ahrens } else { 958 5446 ahrens VN_HOLD(ZTOV(zp)); 959 5446 ahrens *zpp = zp; 960 5446 ahrens err = 0; 961 789 ahrens } 962 5446 ahrens dmu_buf_rele(db, NULL); 963 789 ahrens mutex_exit(&zp->z_lock); 964 1544 eschrock ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 965 5446 ahrens return (err); 966 789 ahrens } 967 789 ahrens 968 789 ahrens /* 969 789 ahrens * Not found create new znode/vnode 970 10938 Mark * but only if file exists. 971 10938 Mark * 972 10938 Mark * There is a small window where zfs_vget() could 973 10938 Mark * find this object while a file create is still in 974 10938 Mark * progress. Since a gen number can never be zero 975 10938 Mark * we will check that to determine if its an allocated 976 10938 Mark * file. 977 789 ahrens */ 978 10938 Mark 979 10938 Mark if (((znode_phys_t *)db->db_data)->zp_gen != 0) { 980 10938 Mark zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); 981 10938 Mark *zpp = zp; 982 10938 Mark err = 0; 983 10938 Mark } else { 984 10938 Mark dmu_buf_rele(db, NULL); 985 10938 Mark err = ENOENT; 986 10938 Mark } 987 1544 eschrock ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 988 10938 Mark return (err); 989 5326 ek110237 } 990 5326 ek110237 991 5326 ek110237 int 992 5326 ek110237 zfs_rezget(znode_t *zp) 993 5326 ek110237 { 994 5326 ek110237 zfsvfs_t *zfsvfs = zp->z_zfsvfs; 995 5326 ek110237 dmu_object_info_t doi; 996 5326 ek110237 dmu_buf_t *db; 997 5326 ek110237 uint64_t obj_num = zp->z_id; 998 5326 ek110237 int err; 999 5326 ek110237 1000 5326 ek110237 ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 1001 5326 ek110237 1002 5326 ek110237 err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); 1003 5326 ek110237 if (err) { 1004 5326 ek110237 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1005 5326 ek110237 return (err); 1006 5326 ek110237 } 1007 5326 ek110237 1008 5326 ek110237 dmu_object_info_from_db(db, &doi); 1009 5326 ek110237 if (doi.doi_bonus_type != DMU_OT_ZNODE || 1010 5326 ek110237 doi.doi_bonus_size < sizeof (znode_phys_t)) { 1011 5326 ek110237 dmu_buf_rele(db, NULL); 1012 5326 ek110237 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1013 5326 ek110237 return (EINVAL); 1014 5326 ek110237 } 1015 5326 ek110237 1016 5326 ek110237 if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) { 1017 5326 ek110237 dmu_buf_rele(db, NULL); 1018 5326 ek110237 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1019 5326 ek110237 return (EIO); 1020 5326 ek110237 } 1021 10269 Mark 1022 10269 Mark mutex_enter(&zp->z_acl_lock); 1023 10269 Mark if (zp->z_acl_cached) { 1024 10269 Mark zfs_acl_free(zp->z_acl_cached); 1025 10269 Mark zp->z_acl_cached = NULL; 1026 10269 Mark } 1027 10269 Mark mutex_exit(&zp->z_acl_lock); 1028 5326 ek110237 1029 6712 tomee zfs_znode_dmu_init(zfsvfs, zp, db); 1030 5326 ek110237 zp->z_unlinked = (zp->z_phys->zp_links == 0); 1031 5844 ek110237 zp->z_blksz = doi.doi_data_block_size; 1032 5326 ek110237 1033 5326 ek110237 ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 1034 5326 ek110237 1035 789 ahrens return (0); 1036 789 ahrens } 1037 789 ahrens 1038 789 ahrens void 1039 789 ahrens zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1040 789 ahrens { 1041 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1042 6992 maybee objset_t *os = zfsvfs->z_os; 1043 5446 ahrens uint64_t obj = zp->z_id; 1044 6992 maybee uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; 1045 789 ahrens 1046 5446 ahrens ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); 1047 6992 maybee if (acl_obj) 1048 6992 maybee VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 1049 6992 maybee VERIFY(0 == dmu_object_free(os, obj, tx)); 1050 5446 ahrens zfs_znode_dmu_fini(zp); 1051 5446 ahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); 1052 5642 maybee zfs_znode_free(zp); 1053 789 ahrens } 1054 789 ahrens 1055 789 ahrens void 1056 789 ahrens zfs_zinactive(znode_t *zp) 1057 789 ahrens { 1058 789 ahrens vnode_t *vp = ZTOV(zp); 1059 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1060 789 ahrens uint64_t z_id = zp->z_id; 1061 789 ahrens 1062 5446 ahrens ASSERT(zp->z_dbuf && zp->z_phys); 1063 789 ahrens 1064 789 ahrens /* 1065 789 ahrens * Don't allow a zfs_zget() while were trying to release this znode 1066 789 ahrens */ 1067 789 ahrens ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 1068 789 ahrens 1069 789 ahrens mutex_enter(&zp->z_lock); 1070 789 ahrens mutex_enter(&vp->v_lock); 1071 789 ahrens vp->v_count--; 1072 789 ahrens if (vp->v_count > 0 || vn_has_cached_data(vp)) { 1073 789 ahrens /* 1074 789 ahrens * If the hold count is greater than zero, somebody has 1075 789 ahrens * obtained a new reference on this znode while we were 1076 789 ahrens * processing it here, so we are done. If we still have 1077 789 ahrens * mapped pages then we are also done, since we don't 1078 789 ahrens * want to inactivate the znode until the pages get pushed. 1079 789 ahrens * 1080 789 ahrens * XXX - if vn_has_cached_data(vp) is true, but count == 0, 1081 789 ahrens * this seems like it would leave the znode hanging with 1082 789 ahrens * no chance to go inactive... 1083 789 ahrens */ 1084 789 ahrens mutex_exit(&vp->v_lock); 1085 789 ahrens mutex_exit(&zp->z_lock); 1086 789 ahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1087 789 ahrens return; 1088 789 ahrens } 1089 789 ahrens mutex_exit(&vp->v_lock); 1090 789 ahrens 1091 789 ahrens /* 1092 789 ahrens * If this was the last reference to a file with no links, 1093 789 ahrens * remove the file from the file system. 1094 789 ahrens */ 1095 3461 ahrens if (zp->z_unlinked) { 1096 789 ahrens mutex_exit(&zp->z_lock); 1097 789 ahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1098 3461 ahrens zfs_rmnode(zp); 1099 789 ahrens return; 1100 789 ahrens } 1101 789 ahrens mutex_exit(&zp->z_lock); 1102 5446 ahrens zfs_znode_dmu_fini(zp); 1103 789 ahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 1104 5642 maybee zfs_znode_free(zp); 1105 789 ahrens } 1106 789 ahrens 1107 789 ahrens void 1108 789 ahrens zfs_znode_free(znode_t *zp) 1109 789 ahrens { 1110 789 ahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1111 789 ahrens 1112 5642 maybee vn_invalid(ZTOV(zp)); 1113 5642 maybee 1114 6712 tomee ASSERT(ZTOV(zp)->v_count == 0); 1115 6712 tomee 1116 789 ahrens mutex_enter(&zfsvfs->z_znodes_lock); 1117 6712 tomee POINTER_INVALIDATE(&zp->z_zfsvfs); 1118 789 ahrens list_remove(&zfsvfs->z_all_znodes, zp); 1119 789 ahrens mutex_exit(&zfsvfs->z_znodes_lock); 1120 789 ahrens 1121 9981 Tim if (zp->z_acl_cached) { 1122 9981 Tim zfs_acl_free(zp->z_acl_cached); 1123 9981 Tim zp->z_acl_cached = NULL; 1124 9981 Tim } 1125 9981 Tim 1126 789 ahrens kmem_cache_free(znode_cache, zp); 1127 5642 maybee 1128 5642 maybee VFS_RELE(zfsvfs->z_vfs); 1129 789 ahrens } 1130 789 ahrens 1131 789 ahrens void 1132 789 ahrens zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) 1133 789 ahrens { 1134 789 ahrens timestruc_t now; 1135 789 ahrens 1136 789 ahrens ASSERT(MUTEX_HELD(&zp->z_lock)); 1137 789 ahrens 1138 789 ahrens gethrestime(&now); 1139 789 ahrens 1140 789 ahrens if (tx) { 1141 789 ahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 1142 789 ahrens zp->z_atime_dirty = 0; 1143 789 ahrens zp->z_seq++; 1144 789 ahrens } else { 1145 789 ahrens zp->z_atime_dirty = 1; 1146 789 ahrens } 1147 789 ahrens 1148 789 ahrens if (flag & AT_ATIME) 1149 789 ahrens ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); 1150 789 ahrens 1151 5331 amw if (flag & AT_MTIME) { 1152 789 ahrens ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); 1153 5331 amw if (zp->z_zfsvfs->z_use_fuids) 1154 5331 amw zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); 1155 5331 amw } 1156 789 ahrens 1157 5331 amw if (flag & AT_CTIME) { 1158 789 ahrens ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); 1159 5331 amw if (zp->z_zfsvfs->z_use_fuids) 1160 5331 amw zp->z_phys->zp_flags |= ZFS_ARCHIVE; 1161 5331 amw } 1162 789 ahrens } 1163 789 ahrens 1164 789 ahrens /* 1165 789 ahrens * Update the requested znode timestamps with the current time. 1166 789 ahrens * If we are in a transaction, then go ahead and mark the znode 1167 789 ahrens * dirty in the transaction so the timestamps will go to disk. 1168 789 ahrens * Otherwise, we will get pushed next time the znode is updated 1169 789 ahrens * in a transaction, or when this znode eventually goes inactive. 1170 789 ahrens * 1171 789 ahrens * Why is this OK? 1172 789 ahrens * 1 - Only the ACCESS time is ever updated outside of a transaction. 1173 789 ahrens * 2 - Multiple consecutive updates will be collapsed into a single 1174 789 ahrens * znode update by the transaction grouping semantics of the DMU. 1175 789 ahrens */ 1176 789 ahrens void 1177 789 ahrens zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) 1178 789 ahrens { 1179 789 ahrens mutex_enter(&zp->z_lock); 1180 789 ahrens zfs_time_stamper_locked(zp, flag, tx); 1181 789 ahrens mutex_exit(&zp->z_lock); 1182 789 ahrens } 1183 789 ahrens 1184 789 ahrens /* 1185 1669 perrin * Grow the block size for a file. 1186 789 ahrens * 1187 789 ahrens * IN: zp - znode of file to free data in. 1188 789 ahrens * size - requested block size 1189 789 ahrens * tx - open transaction. 1190 789 ahrens * 1191 789 ahrens * NOTE: this function assumes that the znode is write locked. 1192 789 ahrens */ 1193 1669 perrin void 1194 789 ahrens zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1195 789 ahrens { 1196 789 ahrens int error; 1197 789 ahrens u_longlong_t dummy; 1198 789 ahrens 1199 789 ahrens if (size <= zp->z_blksz) 1200 1669 perrin return; 1201 789 ahrens /* 1202 789 ahrens * If the file size is already greater than the current blocksize, 1203 789 ahrens * we will not grow. If there is more than one block in a file, 1204 789 ahrens * the blocksize cannot change. 1205 789 ahrens */ 1206 789 ahrens if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) 1207 1669 perrin return; 1208 789 ahrens 1209 789 ahrens error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 1210 789 ahrens size, 0, tx); 1211 789 ahrens if (error == ENOTSUP) 1212 1669 perrin return; 1213 789 ahrens ASSERT3U(error, ==, 0); 1214 789 ahrens 1215 789 ahrens /* What blocksize did we actually get? */ 1216 789 ahrens dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); 1217 789 ahrens } 1218 789 ahrens 1219 789 ahrens /* 1220 789 ahrens * This is a dummy interface used when pvn_vplist_dirty() should *not* 1221 789 ahrens * be calling back into the fs for a putpage(). E.g.: when truncating 1222 789 ahrens * a file, the pages being "thrown away* don't need to be written out. 1223 789 ahrens */ 1224 789 ahrens /* ARGSUSED */ 1225 789 ahrens static int 1226 789 ahrens zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 1227 789 ahrens int flags, cred_t *cr) 1228 789 ahrens { 1229 789 ahrens ASSERT(0); 1230 789 ahrens return (0); 1231 789 ahrens } 1232 789 ahrens 1233 789 ahrens /* 1234 6992 maybee * Increase the file length 1235 789 ahrens * 1236 789 ahrens * IN: zp - znode of file to free data in. 1237 6992 maybee * end - new end-of-file 1238 789 ahrens * 1239 789 ahrens * RETURN: 0 if success 1240 789 ahrens * error code if failure 1241 789 ahrens */ 1242 6992 maybee static int 1243 6992 maybee zfs_extend(znode_t *zp, uint64_t end) 1244 789 ahrens { 1245 6992 maybee zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1246 1878 maybee dmu_tx_t *tx; 1247 1878 maybee rl_t *rl; 1248 6992 maybee uint64_t newblksz; 1249 1669 perrin int error; 1250 5331 amw 1251 789 ahrens /* 1252 6992 maybee * We will change zp_size, lock the whole file. 1253 1878 maybee */ 1254 6992 maybee rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 1255 1878 maybee 1256 1878 maybee /* 1257 789 ahrens * Nothing to do if file already at desired length. 1258 789 ahrens */ 1259 6992 maybee if (end <= zp->z_phys->zp_size) { 1260 2237 maybee zfs_range_unlock(rl); 1261 789 ahrens return (0); 1262 789 ahrens } 1263 6992 maybee top: 1264 1878 maybee tx = dmu_tx_create(zfsvfs->z_os); 1265 1878 maybee dmu_tx_hold_bonus(tx, zp->z_id); 1266 6992 maybee if (end > zp->z_blksz && 1267 1878 maybee (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1268 789 ahrens /* 1269 789 ahrens * We are growing the file past the current block size. 1270 789 ahrens */ 1271 789 ahrens if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 1272 789 ahrens ASSERT(!ISP2(zp->z_blksz)); 1273 6992 maybee newblksz = MIN(end, SPA_MAXBLOCKSIZE); 1274 789 ahrens } else { 1275 6992 maybee newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 1276 789 ahrens } 1277 6992 maybee dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1278 6992 maybee } else { 1279 6992 maybee newblksz = 0; 1280 1878 maybee } 1281 1878 maybee 1282 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1283 1878 maybee if (error) { 1284 8227 Neil if (error == ERESTART) { 1285 2113 ahrens dmu_tx_wait(tx); 1286 6992 maybee dmu_tx_abort(tx); 1287 6992 maybee goto top; 1288 6992 maybee } 1289 1878 maybee dmu_tx_abort(tx); 1290 2237 maybee zfs_range_unlock(rl); 1291 1878 maybee return (error); 1292 1878 maybee } 1293 6992 maybee dmu_buf_will_dirty(zp->z_dbuf, tx); 1294 1878 maybee 1295 6992 maybee if (newblksz) 1296 6992 maybee zfs_grow_blocksize(zp, newblksz, tx); 1297 1878 maybee 1298 6992 maybee zp->z_phys->zp_size = end; 1299 1878 maybee 1300 2237 maybee zfs_range_unlock(rl); 1301 1878 maybee 1302 1878 maybee dmu_tx_commit(tx); 1303 6992 maybee 1304 6992 maybee return (0); 1305 6992 maybee } 1306 6992 maybee 1307 6992 maybee /* 1308 6992 maybee * Free space in a file. 1309 6992 maybee * 1310 6992 maybee * IN: zp - znode of file to free data in. 1311 6992 maybee * off - start of section to free. 1312 6992 maybee * len - length of section to free. 1313 6992 maybee * 1314 6992 maybee * RETURN: 0 if success 1315 6992 maybee * error code if failure 1316 6992 maybee */ 1317 6992 maybee static int 1318 6992 maybee zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1319 6992 maybee { 1320 6992 maybee zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1321 6992 maybee rl_t *rl; 1322 6992 maybee int error; 1323 6992 maybee 1324 6992 maybee /* 1325 6992 maybee * Lock the range being freed. 1326 6992 maybee */ 1327 6992 maybee rl = zfs_range_lock(zp, off, len, RL_WRITER); 1328 6992 maybee 1329 6992 maybee /* 1330 6992 maybee * Nothing to do if file already at desired length. 1331 6992 maybee */ 1332 6992 maybee if (off >= zp->z_phys->zp_size) { 1333 6992 maybee zfs_range_unlock(rl); 1334 6992 maybee return (0); 1335 6992 maybee } 1336 6992 maybee 1337 6992 maybee if (off + len > zp->z_phys->zp_size) 1338 6992 maybee len = zp->z_phys->zp_size - off; 1339 6992 maybee 1340 6992 maybee error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1341 6992 maybee 1342 6992 maybee zfs_range_unlock(rl); 1343 6992 maybee 1344 6992 maybee return (error); 1345 6992 maybee } 1346 6992 maybee 1347 6992 maybee /* 1348 6992 maybee * Truncate a file 1349 6992 maybee * 1350 6992 maybee * IN: zp - znode of file to free data in. 1351 6992 maybee * end - new end-of-file. 1352 6992 maybee * 1353 6992 maybee * RETURN: 0 if success 1354 6992 maybee * error code if failure 1355 6992 maybee */ 1356 6992 maybee static int 1357 6992 maybee zfs_trunc(znode_t *zp, uint64_t end) 1358 6992 maybee { 1359 6992 maybee zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1360 6992 maybee vnode_t *vp = ZTOV(zp); 1361 6992 maybee dmu_tx_t *tx; 1362 6992 maybee rl_t *rl; 1363 6992 maybee int error; 1364 6992 maybee 1365 6992 maybee /* 1366 6992 maybee * We will change zp_size, lock the whole file. 1367 6992 maybee */ 1368 6992 maybee rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); 1369 6992 maybee 1370 6992 maybee /* 1371 6992 maybee * Nothing to do if file already at desired length. 1372 6992 maybee */ 1373 6992 maybee if (end >= zp->z_phys->zp_size) { 1374 6992 maybee zfs_range_unlock(rl); 1375 6992 maybee return (0); 1376 6992 maybee } 1377 6992 maybee 1378 6992 maybee error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); 1379 6992 maybee if (error) { 1380 6992 maybee zfs_range_unlock(rl); 1381 6992 maybee return (error); 1382 6992 maybee } 1383 6992 maybee top: 1384 6992 maybee tx = dmu_tx_create(zfsvfs->z_os); 1385 6992 maybee dmu_tx_hold_bonus(tx, zp->z_id); 1386 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1387 6992 maybee if (error) { 1388 8227 Neil if (error == ERESTART) { 1389 6992 maybee dmu_tx_wait(tx); 1390 6992 maybee dmu_tx_abort(tx); 1391 6992 maybee goto top; 1392 6992 maybee } 1393 6992 maybee dmu_tx_abort(tx); 1394 6992 maybee zfs_range_unlock(rl); 1395 6992 maybee return (error); 1396 6992 maybee } 1397 6992 maybee dmu_buf_will_dirty(zp->z_dbuf, tx); 1398 6992 maybee 1399 6992 maybee zp->z_phys->zp_size = end; 1400 6992 maybee 1401 6992 maybee dmu_tx_commit(tx); 1402 6992 maybee 1403 789 ahrens /* 1404 1878 maybee * Clear any mapped pages in the truncated region. This has to 1405 1878 maybee * happen outside of the transaction to avoid the possibility of 1406 1878 maybee * a deadlock with someone trying to push a page that we are 1407 1878 maybee * about to invalidate. 1408 789 ahrens */ 1409 6992 maybee if (vn_has_cached_data(vp)) { 1410 789 ahrens page_t *pp; 1411 6992 maybee uint64_t start = end & PAGEMASK; 1412 6992 maybee int poff = end & PAGEOFFSET; 1413 789 ahrens 1414 1878 maybee if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { 1415 789 ahrens /* 1416 789 ahrens * We need to zero a partial page. 1417 789 ahrens */ 1418 1878 maybee pagezero(pp, poff, PAGESIZE - poff); 1419 789 ahrens start += PAGESIZE; 1420 789 ahrens page_unlock(pp); 1421 789 ahrens } 1422 789 ahrens error = pvn_vplist_dirty(vp, start, zfs_no_putpage, 1423 1878 maybee B_INVAL | B_TRUNC, NULL); 1424 789 ahrens ASSERT(error == 0); 1425 789 ahrens } 1426 8636 Mark 1427 8636 Mark zfs_range_unlock(rl); 1428 789 ahrens 1429 6992 maybee return (0); 1430 6992 maybee } 1431 6992 maybee 1432 6992 maybee /* 1433 6992 maybee * Free space in a file 1434 6992 maybee * 1435 6992 maybee * IN: zp - znode of file to free data in. 1436 6992 maybee * off - start of range 1437 6992 maybee * len - end of range (0 => EOF) 1438 6992 maybee * flag - current file open mode flags. 1439 6992 maybee * log - TRUE if this action should be logged 1440 6992 maybee * 1441 6992 maybee * RETURN: 0 if success 1442 6992 maybee * error code if failure 1443 6992 maybee */ 1444 6992 maybee int 1445 6992 maybee zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1446 6992 maybee { 1447 6992 maybee vnode_t *vp = ZTOV(zp); 1448 6992 maybee dmu_tx_t *tx; 1449 6992 maybee zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1450 6992 maybee zilog_t *zilog = zfsvfs->z_log; 1451 6992 maybee int error; 1452 6992 maybee 1453 6992 maybee if (off > zp->z_phys->zp_size) { 1454 6992 maybee error = zfs_extend(zp, off+len); 1455 6992 maybee if (error == 0 && log) 1456 6992 maybee goto log; 1457 6992 maybee else 1458 6992 maybee return (error); 1459 6992 maybee } 1460 6992 maybee 1461 6992 maybee /* 1462 6992 maybee * Check for any locks in the region to be freed. 1463 6992 maybee */ 1464 6992 maybee if (MANDLOCK(vp, (mode_t)zp->z_phys->zp_mode)) { 1465 6992 maybee uint64_t length = (len ? len : zp->z_phys->zp_size - off); 1466 6992 maybee if (error = chklock(vp, FWRITE, off, length, flag, NULL)) 1467 6992 maybee return (error); 1468 6992 maybee } 1469 6992 maybee 1470 6992 maybee if (len == 0) { 1471 6992 maybee error = zfs_trunc(zp, off); 1472 6992 maybee } else { 1473 6992 maybee if ((error = zfs_free_range(zp, off, len)) == 0 && 1474 6992 maybee off + len > zp->z_phys->zp_size) 1475 6992 maybee error = zfs_extend(zp, off+len); 1476 6992 maybee } 1477 6992 maybee if (error || !log) 1478 6992 maybee return (error); 1479 6992 maybee log: 1480 6992 maybee tx = dmu_tx_create(zfsvfs->z_os); 1481 6992 maybee dmu_tx_hold_bonus(tx, zp->z_id); 1482 8227 Neil error = dmu_tx_assign(tx, TXG_NOWAIT); 1483 6992 maybee if (error) { 1484 8227 Neil if (error == ERESTART) { 1485 6992 maybee dmu_tx_wait(tx); 1486 6992 maybee dmu_tx_abort(tx); 1487 6992 maybee goto log; 1488 6992 maybee } 1489 6992 maybee dmu_tx_abort(tx); 1490 6992 maybee return (error); 1491 6992 maybee } 1492 6992 maybee 1493 6992 maybee zfs_time_stamper(zp, CONTENT_MODIFIED, tx); 1494 6992 maybee zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1495 6992 maybee 1496 6992 maybee dmu_tx_commit(tx); 1497 789 ahrens return (0); 1498 789 ahrens } 1499 789 ahrens 1500 789 ahrens void 1501 5498 timh zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1502 789 ahrens { 1503 789 ahrens zfsvfs_t zfsvfs; 1504 9396 Matthew uint64_t moid, obj, version; 1505 5498 timh uint64_t sense = ZFS_CASE_SENSITIVE; 1506 5498 timh uint64_t norm = 0; 1507 5498 timh nvpair_t *elem; 1508 789 ahrens int error; 1509 10938 Mark int i; 1510 789 ahrens znode_t *rootzp = NULL; 1511 789 ahrens vnode_t *vp; 1512 789 ahrens vattr_t vattr; 1513 5446 ahrens znode_t *zp; 1514 9179 Mark zfs_acl_ids_t acl_ids; 1515 789 ahrens 1516 789 ahrens /* 1517 789 ahrens * First attempt to create master node. 1518 1544 eschrock */ 1519 1544 eschrock /* 1520 1544 eschrock * In an empty objset, there are no blocks to read and thus 1521 1544 eschrock * there can be no i/o errors (which we assert below). 1522 789 ahrens */ 1523 789 ahrens moid = MASTER_NODE_OBJ; 1524 789 ahrens error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1525 789 ahrens DMU_OT_NONE, 0, tx); 1526 789 ahrens ASSERT(error == 0); 1527 789 ahrens 1528 789 ahrens /* 1529 789 ahrens * Set starting attributes. 1530 789 ahrens */ 1531 9396 Matthew if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_USERSPACE) 1532 7046 ahrens version = ZPL_VERSION; 1533 9396 Matthew else if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) 1534 9396 Matthew version = ZPL_VERSION_USERSPACE - 1; 1535 7046 ahrens else 1536 7046 ahrens version = ZPL_VERSION_FUID - 1; 1537 5498 timh elem = NULL; 1538 5498 timh while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 1539 5498 timh /* For the moment we expect all zpl props to be uint64_ts */ 1540 5498 timh uint64_t val; 1541 5498 timh char *name; 1542 789 ahrens 1543 5498 timh ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 1544 5520 timh VERIFY(nvpair_value_uint64(elem, &val) == 0); 1545 5498 timh name = nvpair_name(elem); 1546 5498 timh if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 1547 9396 Matthew if (val < version) 1548 9396 Matthew version = val; 1549 5498 timh } else { 1550 5498 timh error = zap_update(os, moid, name, 8, 1, &val, tx); 1551 5498 timh } 1552 5498 timh ASSERT(error == 0); 1553 5498 timh if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 1554 5498 timh norm = val; 1555 5498 timh else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 1556 5498 timh sense = val; 1557 5498 timh } 1558 5498 timh ASSERT(version != 0); 1559 9396 Matthew error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 1560 789 ahrens 1561 789 ahrens /* 1562 789 ahrens * Create a delete queue. 1563 789 ahrens */ 1564 9396 Matthew obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 1565 789 ahrens 1566 9396 Matthew error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 1567 789 ahrens ASSERT(error == 0); 1568 789 ahrens 1569 789 ahrens /* 1570 789 ahrens * Create root znode. Create minimal znode/vnode/zfsvfs 1571 789 ahrens * to allow zfs_mknode to work. 1572 789 ahrens */ 1573 789 ahrens vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 1574 789 ahrens vattr.va_type = VDIR; 1575 789 ahrens vattr.va_mode = S_IFDIR|0755; 1576 4543 marks vattr.va_uid = crgetuid(cr); 1577 4543 marks vattr.va_gid = crgetgid(cr); 1578 789 ahrens 1579 789 ahrens rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 1580 3461 ahrens rootzp->z_unlinked = 0; 1581 789 ahrens rootzp->z_atime_dirty = 0; 1582 789 ahrens 1583 789 ahrens vp = ZTOV(rootzp); 1584 789 ahrens vn_reinit(vp); 1585 789 ahrens vp->v_type = VDIR; 1586 789 ahrens 1587 789 ahrens bzero(&zfsvfs, sizeof (zfsvfs_t)); 1588 789 ahrens 1589 789 ahrens zfsvfs.z_os = os; 1590 789 ahrens zfsvfs.z_parent = &zfsvfs; 1591 5331 amw zfsvfs.z_version = version; 1592 5331 amw zfsvfs.z_use_fuids = USE_FUIDS(version, os); 1593 5331 amw zfsvfs.z_norm = norm; 1594 5498 timh /* 1595 5498 timh * Fold case on file systems that are always or sometimes case 1596 5498 timh * insensitive. 1597 5498 timh */ 1598 5498 timh if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 1599 5498 timh zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; 1600 789 ahrens 1601 789 ahrens mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1602 789 ahrens list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), 1603 789 ahrens offsetof(znode_t, z_link_node)); 1604 789 ahrens 1605 10938 Mark for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1606 10938 Mark mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1607 10938 Mark 1608 6712 tomee ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); 1609 6712 tomee rootzp->z_zfsvfs = &zfsvfs; 1610 9179 Mark VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 1611 9179 Mark cr, NULL, &acl_ids)); 1612 9179 Mark zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, &acl_ids); 1613 5642 maybee ASSERT3P(zp, ==, rootzp); 1614 6712 tomee ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */ 1615 5446 ahrens error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 1616 789 ahrens ASSERT(error == 0); 1617 9179 Mark zfs_acl_ids_free(&acl_ids); 1618 6712 tomee POINTER_INVALIDATE(&rootzp->z_zfsvfs); 1619 789 ahrens 1620 789 ahrens ZTOV(rootzp)->v_count = 0; 1621 5642 maybee dmu_buf_rele(rootzp->z_dbuf, NULL); 1622 5642 maybee rootzp->z_dbuf = NULL; 1623 789 ahrens kmem_cache_free(znode_cache, rootzp); 1624 8845 amw 1625 8845 amw /* 1626 8845 amw * Create shares directory 1627 8845 amw */ 1628 8845 amw 1629 8845 amw error = zfs_create_share_dir(&zfsvfs, tx); 1630 9179 Mark 1631 8845 amw ASSERT(error == 0); 1632 10938 Mark 1633 10938 Mark for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1634 10938 Mark mutex_destroy(&zfsvfs.z_hold_mtx[i]); 1635 789 ahrens } 1636 5331 amw 1637 3444 ek110237 #endif /* _KERNEL */ 1638 3444 ek110237 /* 1639 3444 ek110237 * Given an object number, return its parent object number and whether 1640 3444 ek110237 * or not the object is an extended attribute directory. 1641 3444 ek110237 */ 1642 3444 ek110237 static int 1643 3444 ek110237 zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir) 1644 3444 ek110237 { 1645 3444 ek110237 dmu_buf_t *db; 1646 3444 ek110237 dmu_object_info_t doi; 1647 3444 ek110237 znode_phys_t *zp; 1648 3444 ek110237 int error; 1649 3444 ek110237 1650 3444 ek110237 if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0) 1651 3444 ek110237 return (error); 1652 3444 ek110237 1653 3444 ek110237 dmu_object_info_from_db(db, &doi); 1654 3444 ek110237 if (doi.doi_bonus_type != DMU_OT_ZNODE || 1655 3444 ek110237 doi.doi_bonus_size < sizeof (znode_phys_t)) { 1656 3444 ek110237 dmu_buf_rele(db, FTAG); 1657 3444 ek110237 return (EINVAL); 1658 3444 ek110237 } 1659 3444 ek110237 1660 3444 ek110237 zp = db->db_data; 1661 3444 ek110237 *pobjp = zp->zp_parent; 1662 3444 ek110237 *is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) && 1663 3444 ek110237 S_ISDIR(zp->zp_mode); 1664 3444 ek110237 dmu_buf_rele(db, FTAG); 1665 3444 ek110237 1666 3444 ek110237 return (0); 1667 3444 ek110237 } 1668 3444 ek110237 1669 3444 ek110237 int 1670 3444 ek110237 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) 1671 3444 ek110237 { 1672 3444 ek110237 char *path = buf + len - 1; 1673 3444 ek110237 int error; 1674 3444 ek110237 1675 3444 ek110237 *path = '\0'; 1676 3444 ek110237 1677 3444 ek110237 for (;;) { 1678 3444 ek110237 uint64_t pobj; 1679 3444 ek110237 char component[MAXNAMELEN + 2]; 1680 3444 ek110237 size_t complen; 1681 3444 ek110237 int is_xattrdir; 1682 3444 ek110237 1683 3444 ek110237 if ((error = zfs_obj_to_pobj(osp, obj, &pobj, 1684 3444 ek110237 &is_xattrdir)) != 0) 1685 3444 ek110237 break; 1686 3444 ek110237 1687 3444 ek110237 if (pobj == obj) { 1688 3444 ek110237 if (path[0] != '/') 1689 3444 ek110237 *--path = '/'; 1690 3444 ek110237 break; 1691 3444 ek110237 } 1692 3444 ek110237 1693 3444 ek110237 component[0] = '/'; 1694 3444 ek110237 if (is_xattrdir) { 1695 3444 ek110237 (void) sprintf(component + 1, "<xattrdir>"); 1696 3444 ek110237 } else { 1697 4577 ahrens error = zap_value_search(osp, pobj, obj, 1698 4577 ahrens ZFS_DIRENT_OBJ(-1ULL), component + 1); 1699 3444 ek110237 if (error != 0) 1700 3444 ek110237 break; 1701 3444 ek110237 } 1702 3444 ek110237 1703 3444 ek110237 complen = strlen(component); 1704 3444 ek110237 path -= complen; 1705 3444 ek110237 ASSERT(path >= buf); 1706 3444 ek110237 bcopy(component, path, complen); 1707 3444 ek110237 obj = pobj; 1708 3444 ek110237 } 1709 3444 ek110237 1710 3444 ek110237 if (error == 0) 1711 3444 ek110237 (void) memmove(buf, path, buf + len - path); 1712 3444 ek110237 return (error); 1713 3444 ek110237 } 1714