1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1484 ek110237 * Common Development and Distribution License (the "License"). 6 1484 ek110237 * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 9030 Mark * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 789 ahrens 26 789 ahrens #include <sys/types.h> 27 789 ahrens #include <sys/param.h> 28 789 ahrens #include <sys/systm.h> 29 789 ahrens #include <sys/sysmacros.h> 30 789 ahrens #include <sys/kmem.h> 31 789 ahrens #include <sys/pathname.h> 32 789 ahrens #include <sys/vnode.h> 33 789 ahrens #include <sys/vfs.h> 34 3898 rsb #include <sys/vfs_opreg.h> 35 789 ahrens #include <sys/mntent.h> 36 789 ahrens #include <sys/mount.h> 37 789 ahrens #include <sys/cmn_err.h> 38 789 ahrens #include "fs/fs_subr.h" 39 789 ahrens #include <sys/zfs_znode.h> 40 3461 ahrens #include <sys/zfs_dir.h> 41 789 ahrens #include <sys/zil.h> 42 789 ahrens #include <sys/fs/zfs.h> 43 789 ahrens #include <sys/dmu.h> 44 789 ahrens #include <sys/dsl_prop.h> 45 3912 lling #include <sys/dsl_dataset.h> 46 4543 marks #include <sys/dsl_deleg.h> 47 789 ahrens #include <sys/spa.h> 48 789 ahrens #include <sys/zap.h> 49 789 ahrens #include <sys/varargs.h> 50 789 ahrens #include <sys/policy.h> 51 789 ahrens #include <sys/atomic.h> 52 789 ahrens #include <sys/mkdev.h> 53 789 ahrens #include <sys/modctl.h> 54 4543 marks #include <sys/refstr.h> 55 789 ahrens #include <sys/zfs_ioctl.h> 56 789 ahrens #include <sys/zfs_ctldir.h> 57 5331 amw #include <sys/zfs_fuid.h> 58 1544 eschrock #include <sys/bootconf.h> 59 849 bonwick #include <sys/sunddi.h> 60 1484 ek110237 #include <sys/dnlc.h> 61 5326 ek110237 #include <sys/dmu_objset.h> 62 6423 gw25295 #include <sys/spa_boot.h> 63 789 ahrens 64 789 ahrens int zfsfstype; 65 789 ahrens vfsops_t *zfs_vfsops = NULL; 66 849 bonwick static major_t zfs_major; 67 789 ahrens static minor_t zfs_minor; 68 789 ahrens static kmutex_t zfs_dev_mtx; 69 9234 George 70 9234 George extern int sys_shutdown; 71 1544 eschrock 72 789 ahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 73 789 ahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 74 1544 eschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 75 789 ahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 76 789 ahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 77 789 ahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 78 789 ahrens static void zfs_freevfs(vfs_t *vfsp); 79 789 ahrens 80 789 ahrens static const fs_operation_def_t zfs_vfsops_template[] = { 81 3898 rsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 82 3898 rsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 83 3898 rsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 84 3898 rsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 85 3898 rsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 86 3898 rsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 87 3898 rsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 88 3898 rsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 89 3898 rsb NULL, NULL 90 789 ahrens }; 91 789 ahrens 92 789 ahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 93 3898 rsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 94 3898 rsb NULL, NULL 95 789 ahrens }; 96 789 ahrens 97 789 ahrens /* 98 789 ahrens * We need to keep a count of active fs's. 99 789 ahrens * This is necessary to prevent our module 100 789 ahrens * from being unloaded after a umount -f 101 789 ahrens */ 102 789 ahrens static uint32_t zfs_active_fs_count = 0; 103 789 ahrens 104 789 ahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 105 789 ahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 106 3234 ck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 107 3234 ck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 108 789 ahrens 109 3234 ck153898 /* 110 4596 lling * MO_DEFAULT is not used since the default value is determined 111 4596 lling * by the equivalent property. 112 3234 ck153898 */ 113 789 ahrens static mntopt_t mntopts[] = { 114 3234 ck153898 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 115 3234 ck153898 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 116 4596 lling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 117 789 ahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 118 789 ahrens }; 119 789 ahrens 120 789 ahrens static mntopts_t zfs_mntopts = { 121 789 ahrens sizeof (mntopts) / sizeof (mntopt_t), 122 789 ahrens mntopts 123 789 ahrens }; 124 789 ahrens 125 789 ahrens /*ARGSUSED*/ 126 789 ahrens int 127 789 ahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 128 789 ahrens { 129 789 ahrens /* 130 789 ahrens * Data integrity is job one. We don't want a compromised kernel 131 789 ahrens * writing to the storage pool, so we never sync during panic. 132 789 ahrens */ 133 789 ahrens if (panicstr) 134 789 ahrens return (0); 135 789 ahrens 136 789 ahrens /* 137 789 ahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 138 789 ahrens * to sync metadata, which they would otherwise cache indefinitely. 139 789 ahrens * Semantically, the only requirement is that the sync be initiated. 140 789 ahrens * The DMU syncs out txgs frequently, so there's nothing to do. 141 789 ahrens */ 142 789 ahrens if (flag & SYNC_ATTR) 143 789 ahrens return (0); 144 789 ahrens 145 789 ahrens if (vfsp != NULL) { 146 789 ahrens /* 147 789 ahrens * Sync a specific filesystem. 148 789 ahrens */ 149 789 ahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 150 9234 George dsl_pool_t *dp; 151 789 ahrens 152 789 ahrens ZFS_ENTER(zfsvfs); 153 9234 George dp = dmu_objset_pool(zfsvfs->z_os); 154 9234 George 155 9234 George /* 156 9234 George * If the system is shutting down, then skip any 157 9234 George * filesystems which may exist on a suspended pool. 158 9234 George */ 159 9234 George if (sys_shutdown && spa_suspended(dp->dp_spa)) { 160 9234 George ZFS_EXIT(zfsvfs); 161 9234 George return (0); 162 9234 George } 163 9234 George 164 789 ahrens if (zfsvfs->z_log != NULL) 165 2638 perrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 166 789 ahrens else 167 9234 George txg_wait_synced(dp, 0); 168 789 ahrens ZFS_EXIT(zfsvfs); 169 789 ahrens } else { 170 789 ahrens /* 171 789 ahrens * Sync all ZFS filesystems. This is what happens when you 172 789 ahrens * run sync(1M). Unlike other filesystems, ZFS honors the 173 789 ahrens * request by waiting for all pools to commit all dirty data. 174 789 ahrens */ 175 789 ahrens spa_sync_allpools(); 176 789 ahrens } 177 1544 eschrock 178 1544 eschrock return (0); 179 1544 eschrock } 180 1544 eschrock 181 1544 eschrock static int 182 1544 eschrock zfs_create_unique_device(dev_t *dev) 183 1544 eschrock { 184 1544 eschrock major_t new_major; 185 1544 eschrock 186 1544 eschrock do { 187 1544 eschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 188 1544 eschrock minor_t start = zfs_minor; 189 1544 eschrock do { 190 1544 eschrock mutex_enter(&zfs_dev_mtx); 191 1544 eschrock if (zfs_minor >= MAXMIN32) { 192 1544 eschrock /* 193 1544 eschrock * If we're still using the real major 194 1544 eschrock * keep out of /dev/zfs and /dev/zvol minor 195 1544 eschrock * number space. If we're using a getudev()'ed 196 1544 eschrock * major number, we can use all of its minors. 197 1544 eschrock */ 198 1544 eschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 199 1544 eschrock zfs_minor = ZFS_MIN_MINOR; 200 1544 eschrock else 201 1544 eschrock zfs_minor = 0; 202 1544 eschrock } else { 203 1544 eschrock zfs_minor++; 204 1544 eschrock } 205 1544 eschrock *dev = makedevice(zfs_major, zfs_minor); 206 1544 eschrock mutex_exit(&zfs_dev_mtx); 207 1544 eschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 208 1544 eschrock if (zfs_minor == start) { 209 1544 eschrock /* 210 1544 eschrock * We are using all ~262,000 minor numbers for the 211 1544 eschrock * current major number. Create a new major number. 212 1544 eschrock */ 213 1544 eschrock if ((new_major = getudev()) == (major_t)-1) { 214 1544 eschrock cmn_err(CE_WARN, 215 1544 eschrock "zfs_mount: Can't get unique major " 216 1544 eschrock "device number."); 217 1544 eschrock return (-1); 218 1544 eschrock } 219 1544 eschrock mutex_enter(&zfs_dev_mtx); 220 1544 eschrock zfs_major = new_major; 221 1544 eschrock zfs_minor = 0; 222 1544 eschrock 223 1544 eschrock mutex_exit(&zfs_dev_mtx); 224 1544 eschrock } else { 225 1544 eschrock break; 226 1544 eschrock } 227 1544 eschrock /* CONSTANTCONDITION */ 228 1544 eschrock } while (1); 229 789 ahrens 230 789 ahrens return (0); 231 789 ahrens } 232 789 ahrens 233 789 ahrens static void 234 789 ahrens atime_changed_cb(void *arg, uint64_t newval) 235 789 ahrens { 236 789 ahrens zfsvfs_t *zfsvfs = arg; 237 789 ahrens 238 789 ahrens if (newval == TRUE) { 239 789 ahrens zfsvfs->z_atime = TRUE; 240 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 241 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 242 789 ahrens } else { 243 789 ahrens zfsvfs->z_atime = FALSE; 244 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 245 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 246 3234 ck153898 } 247 3234 ck153898 } 248 3234 ck153898 249 3234 ck153898 static void 250 3234 ck153898 xattr_changed_cb(void *arg, uint64_t newval) 251 3234 ck153898 { 252 3234 ck153898 zfsvfs_t *zfsvfs = arg; 253 3234 ck153898 254 3234 ck153898 if (newval == TRUE) { 255 3234 ck153898 /* XXX locking on vfs_flag? */ 256 3234 ck153898 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 257 3234 ck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 258 3234 ck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 259 3234 ck153898 } else { 260 3234 ck153898 /* XXX locking on vfs_flag? */ 261 3234 ck153898 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 262 3234 ck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 263 3234 ck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 264 789 ahrens } 265 789 ahrens } 266 789 ahrens 267 789 ahrens static void 268 789 ahrens blksz_changed_cb(void *arg, uint64_t newval) 269 789 ahrens { 270 789 ahrens zfsvfs_t *zfsvfs = arg; 271 789 ahrens 272 789 ahrens if (newval < SPA_MINBLOCKSIZE || 273 789 ahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 274 789 ahrens newval = SPA_MAXBLOCKSIZE; 275 789 ahrens 276 789 ahrens zfsvfs->z_max_blksz = newval; 277 789 ahrens zfsvfs->z_vfs->vfs_bsize = newval; 278 789 ahrens } 279 789 ahrens 280 789 ahrens static void 281 789 ahrens readonly_changed_cb(void *arg, uint64_t newval) 282 789 ahrens { 283 789 ahrens zfsvfs_t *zfsvfs = arg; 284 789 ahrens 285 789 ahrens if (newval) { 286 789 ahrens /* XXX locking on vfs_flag? */ 287 789 ahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 288 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 289 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 290 789 ahrens } else { 291 789 ahrens /* XXX locking on vfs_flag? */ 292 789 ahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 293 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 294 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 295 789 ahrens } 296 789 ahrens } 297 789 ahrens 298 789 ahrens static void 299 789 ahrens devices_changed_cb(void *arg, uint64_t newval) 300 789 ahrens { 301 789 ahrens zfsvfs_t *zfsvfs = arg; 302 789 ahrens 303 789 ahrens if (newval == FALSE) { 304 789 ahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 305 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 306 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 307 789 ahrens } else { 308 789 ahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 309 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 310 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 311 789 ahrens } 312 789 ahrens } 313 789 ahrens 314 789 ahrens static void 315 789 ahrens setuid_changed_cb(void *arg, uint64_t newval) 316 789 ahrens { 317 789 ahrens zfsvfs_t *zfsvfs = arg; 318 789 ahrens 319 789 ahrens if (newval == FALSE) { 320 789 ahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 321 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 322 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 323 789 ahrens } else { 324 789 ahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 325 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 326 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 327 789 ahrens } 328 789 ahrens } 329 789 ahrens 330 789 ahrens static void 331 789 ahrens exec_changed_cb(void *arg, uint64_t newval) 332 789 ahrens { 333 789 ahrens zfsvfs_t *zfsvfs = arg; 334 789 ahrens 335 789 ahrens if (newval == FALSE) { 336 789 ahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 337 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 338 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 339 789 ahrens } else { 340 789 ahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 341 789 ahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 342 789 ahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 343 789 ahrens } 344 789 ahrens } 345 789 ahrens 346 5331 amw /* 347 5331 amw * The nbmand mount option can be changed at mount time. 348 5331 amw * We can't allow it to be toggled on live file systems or incorrect 349 5331 amw * behavior may be seen from cifs clients 350 5331 amw * 351 5331 amw * This property isn't registered via dsl_prop_register(), but this callback 352 5331 amw * will be called when a file system is first mounted 353 5331 amw */ 354 5331 amw static void 355 5331 amw nbmand_changed_cb(void *arg, uint64_t newval) 356 5331 amw { 357 5331 amw zfsvfs_t *zfsvfs = arg; 358 5331 amw if (newval == FALSE) { 359 5331 amw vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 360 5331 amw vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 361 5331 amw } else { 362 5331 amw vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 363 5331 amw vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 364 5331 amw } 365 5331 amw } 366 5331 amw 367 789 ahrens static void 368 789 ahrens snapdir_changed_cb(void *arg, uint64_t newval) 369 789 ahrens { 370 789 ahrens zfsvfs_t *zfsvfs = arg; 371 789 ahrens 372 789 ahrens zfsvfs->z_show_ctldir = newval; 373 5331 amw } 374 5331 amw 375 5331 amw static void 376 5331 amw vscan_changed_cb(void *arg, uint64_t newval) 377 5331 amw { 378 5331 amw zfsvfs_t *zfsvfs = arg; 379 5331 amw 380 5331 amw zfsvfs->z_vscan = newval; 381 789 ahrens } 382 789 ahrens 383 789 ahrens static void 384 789 ahrens acl_mode_changed_cb(void *arg, uint64_t newval) 385 789 ahrens { 386 789 ahrens zfsvfs_t *zfsvfs = arg; 387 789 ahrens 388 789 ahrens zfsvfs->z_acl_mode = newval; 389 789 ahrens } 390 789 ahrens 391 789 ahrens static void 392 789 ahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 393 789 ahrens { 394 789 ahrens zfsvfs_t *zfsvfs = arg; 395 789 ahrens 396 789 ahrens zfsvfs->z_acl_inherit = newval; 397 789 ahrens } 398 789 ahrens 399 1544 eschrock static int 400 1544 eschrock zfs_register_callbacks(vfs_t *vfsp) 401 1544 eschrock { 402 1544 eschrock struct dsl_dataset *ds = NULL; 403 1544 eschrock objset_t *os = NULL; 404 1544 eschrock zfsvfs_t *zfsvfs = NULL; 405 5331 amw uint64_t nbmand; 406 5331 amw int readonly, do_readonly = B_FALSE; 407 5331 amw int setuid, do_setuid = B_FALSE; 408 5331 amw int exec, do_exec = B_FALSE; 409 5331 amw int devices, do_devices = B_FALSE; 410 5331 amw int xattr, do_xattr = B_FALSE; 411 5331 amw int atime, do_atime = B_FALSE; 412 1544 eschrock int error = 0; 413 1544 eschrock 414 1544 eschrock ASSERT(vfsp); 415 1544 eschrock zfsvfs = vfsp->vfs_data; 416 1544 eschrock ASSERT(zfsvfs); 417 1544 eschrock os = zfsvfs->z_os; 418 1544 eschrock 419 1544 eschrock /* 420 1544 eschrock * The act of registering our callbacks will destroy any mount 421 1544 eschrock * options we may have. In order to enable temporary overrides 422 3234 ck153898 * of mount options, we stash away the current values and 423 1544 eschrock * restore them after we register the callbacks. 424 1544 eschrock */ 425 1544 eschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 426 1544 eschrock readonly = B_TRUE; 427 1544 eschrock do_readonly = B_TRUE; 428 1544 eschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 429 1544 eschrock readonly = B_FALSE; 430 1544 eschrock do_readonly = B_TRUE; 431 1544 eschrock } 432 1544 eschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 433 1544 eschrock devices = B_FALSE; 434 1544 eschrock setuid = B_FALSE; 435 1544 eschrock do_devices = B_TRUE; 436 1544 eschrock do_setuid = B_TRUE; 437 1544 eschrock } else { 438 1544 eschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 439 1544 eschrock devices = B_FALSE; 440 1544 eschrock do_devices = B_TRUE; 441 3912 lling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 442 1544 eschrock devices = B_TRUE; 443 1544 eschrock do_devices = B_TRUE; 444 1544 eschrock } 445 1544 eschrock 446 1544 eschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 447 1544 eschrock setuid = B_FALSE; 448 1544 eschrock do_setuid = B_TRUE; 449 1544 eschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 450 1544 eschrock setuid = B_TRUE; 451 1544 eschrock do_setuid = B_TRUE; 452 1544 eschrock } 453 1544 eschrock } 454 1544 eschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 455 1544 eschrock exec = B_FALSE; 456 1544 eschrock do_exec = B_TRUE; 457 1544 eschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 458 1544 eschrock exec = B_TRUE; 459 1544 eschrock do_exec = B_TRUE; 460 1544 eschrock } 461 3234 ck153898 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 462 3234 ck153898 xattr = B_FALSE; 463 3234 ck153898 do_xattr = B_TRUE; 464 3234 ck153898 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 465 3234 ck153898 xattr = B_TRUE; 466 3234 ck153898 do_xattr = B_TRUE; 467 3234 ck153898 } 468 4596 lling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 469 4596 lling atime = B_FALSE; 470 4596 lling do_atime = B_TRUE; 471 4596 lling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 472 4596 lling atime = B_TRUE; 473 4596 lling do_atime = B_TRUE; 474 4596 lling } 475 1544 eschrock 476 1544 eschrock /* 477 5331 amw * nbmand is a special property. It can only be changed at 478 5331 amw * mount time. 479 5331 amw * 480 5331 amw * This is weird, but it is documented to only be changeable 481 5331 amw * at mount time. 482 5331 amw */ 483 5331 amw if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 484 5331 amw nbmand = B_FALSE; 485 5331 amw } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 486 5331 amw nbmand = B_TRUE; 487 5331 amw } else { 488 5331 amw char osname[MAXNAMELEN]; 489 5331 amw 490 5331 amw dmu_objset_name(os, osname); 491 5331 amw if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 492 7265 ahrens NULL)) { 493 7265 ahrens return (error); 494 7265 ahrens } 495 5331 amw } 496 5331 amw 497 5331 amw /* 498 1544 eschrock * Register property callbacks. 499 1544 eschrock * 500 1544 eschrock * It would probably be fine to just check for i/o error from 501 1544 eschrock * the first prop_register(), but I guess I like to go 502 1544 eschrock * overboard... 503 1544 eschrock */ 504 1544 eschrock ds = dmu_objset_ds(os); 505 1544 eschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 506 3234 ck153898 error = error ? error : dsl_prop_register(ds, 507 3234 ck153898 "xattr", xattr_changed_cb, zfsvfs); 508 1544 eschrock error = error ? error : dsl_prop_register(ds, 509 1544 eschrock "recordsize", blksz_changed_cb, zfsvfs); 510 1544 eschrock error = error ? error : dsl_prop_register(ds, 511 1544 eschrock "readonly", readonly_changed_cb, zfsvfs); 512 1544 eschrock error = error ? error : dsl_prop_register(ds, 513 1544 eschrock "devices", devices_changed_cb, zfsvfs); 514 1544 eschrock error = error ? error : dsl_prop_register(ds, 515 1544 eschrock "setuid", setuid_changed_cb, zfsvfs); 516 1544 eschrock error = error ? error : dsl_prop_register(ds, 517 1544 eschrock "exec", exec_changed_cb, zfsvfs); 518 1544 eschrock error = error ? error : dsl_prop_register(ds, 519 1544 eschrock "snapdir", snapdir_changed_cb, zfsvfs); 520 1544 eschrock error = error ? error : dsl_prop_register(ds, 521 1544 eschrock "aclmode", acl_mode_changed_cb, zfsvfs); 522 1544 eschrock error = error ? error : dsl_prop_register(ds, 523 1544 eschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 524 5331 amw error = error ? error : dsl_prop_register(ds, 525 5331 amw "vscan", vscan_changed_cb, zfsvfs); 526 1544 eschrock if (error) 527 1544 eschrock goto unregister; 528 1544 eschrock 529 1544 eschrock /* 530 1544 eschrock * Invoke our callbacks to restore temporary mount options. 531 1544 eschrock */ 532 1544 eschrock if (do_readonly) 533 1544 eschrock readonly_changed_cb(zfsvfs, readonly); 534 1544 eschrock if (do_setuid) 535 1544 eschrock setuid_changed_cb(zfsvfs, setuid); 536 1544 eschrock if (do_exec) 537 1544 eschrock exec_changed_cb(zfsvfs, exec); 538 1544 eschrock if (do_devices) 539 1544 eschrock devices_changed_cb(zfsvfs, devices); 540 3234 ck153898 if (do_xattr) 541 3234 ck153898 xattr_changed_cb(zfsvfs, xattr); 542 4596 lling if (do_atime) 543 4596 lling atime_changed_cb(zfsvfs, atime); 544 5331 amw 545 5331 amw nbmand_changed_cb(zfsvfs, nbmand); 546 1544 eschrock 547 1544 eschrock return (0); 548 1544 eschrock 549 1544 eschrock unregister: 550 1544 eschrock /* 551 1544 eschrock * We may attempt to unregister some callbacks that are not 552 1544 eschrock * registered, but this is OK; it will simply return ENOMSG, 553 1544 eschrock * which we will ignore. 554 1544 eschrock */ 555 1544 eschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 556 3234 ck153898 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 557 1544 eschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 558 1544 eschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 559 1544 eschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 560 1544 eschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 561 1544 eschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 562 1544 eschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 563 1544 eschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 564 1544 eschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 565 1544 eschrock zfsvfs); 566 5331 amw (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 567 1544 eschrock return (error); 568 1544 eschrock 569 1544 eschrock } 570 1544 eschrock 571 9396 Matthew static void 572 9396 Matthew uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid, 573 9396 Matthew int64_t delta, dmu_tx_t *tx) 574 9396 Matthew { 575 9396 Matthew uint64_t used = 0; 576 9396 Matthew char buf[32]; 577 9396 Matthew int err; 578 9396 Matthew uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 579 9396 Matthew 580 9396 Matthew if (delta == 0) 581 9396 Matthew return; 582 9396 Matthew 583 9396 Matthew (void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid); 584 9396 Matthew err = zap_lookup(os, obj, buf, 8, 1, &used); 585 9396 Matthew ASSERT(err == 0 || err == ENOENT); 586 9396 Matthew /* no underflow/overflow */ 587 9396 Matthew ASSERT(delta > 0 || used >= -delta); 588 9396 Matthew ASSERT(delta < 0 || used + delta > used); 589 9396 Matthew used += delta; 590 9396 Matthew if (used == 0) 591 9396 Matthew err = zap_remove(os, obj, buf, tx); 592 9396 Matthew else 593 9396 Matthew err = zap_update(os, obj, buf, 8, 1, &used, tx); 594 9396 Matthew ASSERT(err == 0); 595 9396 Matthew } 596 9396 Matthew 597 10407 Matthew static int 598 10407 Matthew zfs_space_delta_cb(dmu_object_type_t bonustype, void *bonus, 599 10407 Matthew uint64_t *userp, uint64_t *groupp) 600 9396 Matthew { 601 10407 Matthew znode_phys_t *znp = bonus; 602 9396 Matthew 603 9396 Matthew if (bonustype != DMU_OT_ZNODE) 604 10407 Matthew return (ENOENT); 605 9396 Matthew 606 10407 Matthew *userp = znp->zp_uid; 607 10407 Matthew *groupp = znp->zp_gid; 608 10407 Matthew return (0); 609 9396 Matthew } 610 9396 Matthew 611 9396 Matthew static void 612 9396 Matthew fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, 613 9396 Matthew char *domainbuf, int buflen, uid_t *ridp) 614 9396 Matthew { 615 9396 Matthew uint64_t fuid; 616 9396 Matthew const char *domain; 617 9396 Matthew 618 9396 Matthew fuid = strtonum(fuidstr, NULL); 619 9396 Matthew 620 9396 Matthew domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); 621 9396 Matthew if (domain) 622 9396 Matthew (void) strlcpy(domainbuf, domain, buflen); 623 9396 Matthew else 624 9396 Matthew domainbuf[0] = '\0'; 625 9396 Matthew *ridp = FUID_RID(fuid); 626 9396 Matthew } 627 9396 Matthew 628 9396 Matthew static uint64_t 629 9396 Matthew zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) 630 9396 Matthew { 631 9396 Matthew switch (type) { 632 9396 Matthew case ZFS_PROP_USERUSED: 633 9396 Matthew return (DMU_USERUSED_OBJECT); 634 9396 Matthew case ZFS_PROP_GROUPUSED: 635 9396 Matthew return (DMU_GROUPUSED_OBJECT); 636 9396 Matthew case ZFS_PROP_USERQUOTA: 637 9396 Matthew return (zfsvfs->z_userquota_obj); 638 9396 Matthew case ZFS_PROP_GROUPQUOTA: 639 9396 Matthew return (zfsvfs->z_groupquota_obj); 640 9396 Matthew } 641 9396 Matthew return (0); 642 9396 Matthew } 643 9396 Matthew 644 9396 Matthew int 645 9396 Matthew zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 646 9396 Matthew uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) 647 9396 Matthew { 648 9396 Matthew int error; 649 9396 Matthew zap_cursor_t zc; 650 9396 Matthew zap_attribute_t za; 651 9396 Matthew zfs_useracct_t *buf = vbuf; 652 9396 Matthew uint64_t obj; 653 9396 Matthew 654 9396 Matthew if (!dmu_objset_userspace_present(zfsvfs->z_os)) 655 9396 Matthew return (ENOTSUP); 656 9396 Matthew 657 9396 Matthew obj = zfs_userquota_prop_to_obj(zfsvfs, type); 658 9396 Matthew if (obj == 0) { 659 9396 Matthew *bufsizep = 0; 660 9396 Matthew return (0); 661 9396 Matthew } 662 9396 Matthew 663 9396 Matthew for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); 664 9396 Matthew (error = zap_cursor_retrieve(&zc, &za)) == 0; 665 9396 Matthew zap_cursor_advance(&zc)) { 666 9396 Matthew if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > 667 9396 Matthew *bufsizep) 668 9396 Matthew break; 669 9396 Matthew 670 9396 Matthew fuidstr_to_sid(zfsvfs, za.za_name, 671 9396 Matthew buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); 672 9396 Matthew 673 9396 Matthew buf->zu_space = za.za_first_integer; 674 9396 Matthew buf++; 675 9396 Matthew } 676 9396 Matthew if (error == ENOENT) 677 9396 Matthew error = 0; 678 9396 Matthew 679 9396 Matthew ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep); 680 9396 Matthew *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf; 681 9396 Matthew *cookiep = zap_cursor_serialize(&zc); 682 9396 Matthew zap_cursor_fini(&zc); 683 9396 Matthew return (error); 684 9396 Matthew } 685 9396 Matthew 686 9396 Matthew /* 687 9396 Matthew * buf must be big enough (eg, 32 bytes) 688 9396 Matthew */ 689 9396 Matthew static int 690 9396 Matthew id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, 691 9396 Matthew char *buf, boolean_t addok) 692 9396 Matthew { 693 9396 Matthew uint64_t fuid; 694 9396 Matthew int domainid = 0; 695 9396 Matthew 696 9396 Matthew if (domain && domain[0]) { 697 9396 Matthew domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); 698 9396 Matthew if (domainid == -1) 699 9396 Matthew return (ENOENT); 700 9396 Matthew } 701 9396 Matthew fuid = FUID_ENCODE(domainid, rid); 702 9396 Matthew (void) sprintf(buf, "%llx", (longlong_t)fuid); 703 9396 Matthew return (0); 704 9396 Matthew } 705 9396 Matthew 706 9396 Matthew int 707 9396 Matthew zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 708 9396 Matthew const char *domain, uint64_t rid, uint64_t *valp) 709 9396 Matthew { 710 9396 Matthew char buf[32]; 711 9396 Matthew int err; 712 9396 Matthew uint64_t obj; 713 9396 Matthew 714 9396 Matthew *valp = 0; 715 9396 Matthew 716 9396 Matthew if (!dmu_objset_userspace_present(zfsvfs->z_os)) 717 9396 Matthew return (ENOTSUP); 718 9396 Matthew 719 9396 Matthew obj = zfs_userquota_prop_to_obj(zfsvfs, type); 720 9396 Matthew if (obj == 0) 721 9396 Matthew return (0); 722 9396 Matthew 723 9396 Matthew err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); 724 9396 Matthew if (err) 725 9396 Matthew return (err); 726 9396 Matthew 727 9396 Matthew err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); 728 9396 Matthew if (err == ENOENT) 729 9396 Matthew err = 0; 730 9396 Matthew return (err); 731 9396 Matthew } 732 9396 Matthew 733 9396 Matthew int 734 9396 Matthew zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, 735 9396 Matthew const char *domain, uint64_t rid, uint64_t quota) 736 9396 Matthew { 737 9396 Matthew char buf[32]; 738 9396 Matthew int err; 739 9396 Matthew dmu_tx_t *tx; 740 9396 Matthew uint64_t *objp; 741 9396 Matthew boolean_t fuid_dirtied; 742 9396 Matthew 743 9396 Matthew if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) 744 9396 Matthew return (EINVAL); 745 9396 Matthew 746 9396 Matthew if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) 747 9396 Matthew return (ENOTSUP); 748 9396 Matthew 749 9396 Matthew objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : 750 9396 Matthew &zfsvfs->z_groupquota_obj; 751 9396 Matthew 752 9396 Matthew err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); 753 9396 Matthew if (err) 754 9396 Matthew return (err); 755 9396 Matthew fuid_dirtied = zfsvfs->z_fuid_dirty; 756 9396 Matthew 757 9396 Matthew tx = dmu_tx_create(zfsvfs->z_os); 758 9396 Matthew dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); 759 9396 Matthew if (*objp == 0) { 760 9396 Matthew dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 761 9396 Matthew zfs_userquota_prop_prefixes[type]); 762 9396 Matthew } 763 9396 Matthew if (fuid_dirtied) 764 9396 Matthew zfs_fuid_txhold(zfsvfs, tx); 765 9396 Matthew err = dmu_tx_assign(tx, TXG_WAIT); 766 9396 Matthew if (err) { 767 9396 Matthew dmu_tx_abort(tx); 768 9396 Matthew return (err); 769 9396 Matthew } 770 9396 Matthew 771 9396 Matthew mutex_enter(&zfsvfs->z_lock); 772 9396 Matthew if (*objp == 0) { 773 9396 Matthew *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, 774 9396 Matthew DMU_OT_NONE, 0, tx); 775 9396 Matthew VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, 776 9396 Matthew zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); 777 9396 Matthew } 778 9396 Matthew mutex_exit(&zfsvfs->z_lock); 779 9396 Matthew 780 9396 Matthew if (quota == 0) { 781 9396 Matthew err = zap_remove(zfsvfs->z_os, *objp, buf, tx); 782 9396 Matthew if (err == ENOENT) 783 9396 Matthew err = 0; 784 9396 Matthew } else { 785 9396 Matthew err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); 786 9396 Matthew } 787 9396 Matthew ASSERT(err == 0); 788 9396 Matthew if (fuid_dirtied) 789 9396 Matthew zfs_fuid_sync(zfsvfs, tx); 790 9396 Matthew dmu_tx_commit(tx); 791 9396 Matthew return (err); 792 9396 Matthew } 793 9396 Matthew 794 9396 Matthew boolean_t 795 9396 Matthew zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) 796 9396 Matthew { 797 9396 Matthew char buf[32]; 798 9396 Matthew uint64_t used, quota, usedobj, quotaobj; 799 9396 Matthew int err; 800 9396 Matthew 801 9396 Matthew usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 802 9396 Matthew quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 803 9396 Matthew 804 9396 Matthew if (quotaobj == 0 || zfsvfs->z_replay) 805 9396 Matthew return (B_FALSE); 806 9396 Matthew 807 9396 Matthew (void) sprintf(buf, "%llx", (longlong_t)fuid); 808 9396 Matthew err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); 809 9396 Matthew if (err != 0) 810 9396 Matthew return (B_FALSE); 811 9396 Matthew 812 9396 Matthew err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); 813 9396 Matthew if (err != 0) 814 9396 Matthew return (B_FALSE); 815 9396 Matthew return (used >= quota); 816 9396 Matthew } 817 9396 Matthew 818 9396 Matthew int 819 11185 Sean zfsvfs_create(const char *osname, zfsvfs_t **zfvp) 820 9396 Matthew { 821 9396 Matthew objset_t *os; 822 9396 Matthew zfsvfs_t *zfsvfs; 823 9396 Matthew uint64_t zval; 824 9396 Matthew int i, error; 825 9396 Matthew 826 10298 Matthew zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 827 10298 Matthew 828 10298 Matthew /* 829 10298 Matthew * We claim to always be readonly so we can open snapshots; 830 10298 Matthew * other ZPL code will prevent us from writing to snapshots. 831 10298 Matthew */ 832 10298 Matthew error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); 833 10298 Matthew if (error) { 834 10298 Matthew kmem_free(zfsvfs, sizeof (zfsvfs_t)); 835 9396 Matthew return (error); 836 9396 Matthew } 837 9396 Matthew 838 9396 Matthew /* 839 9396 Matthew * Initialize the zfs-specific filesystem structure. 840 9396 Matthew * Should probably make this a kmem cache, shuffle fields, 841 9396 Matthew * and just bzero up to z_hold_mtx[]. 842 9396 Matthew */ 843 9396 Matthew zfsvfs->z_vfs = NULL; 844 9396 Matthew zfsvfs->z_parent = zfsvfs; 845 9396 Matthew zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 846 9396 Matthew zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 847 9396 Matthew zfsvfs->z_os = os; 848 9396 Matthew 849 9396 Matthew error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 850 9396 Matthew if (error) { 851 9396 Matthew goto out; 852 9396 Matthew } else if (zfsvfs->z_version > ZPL_VERSION) { 853 9396 Matthew (void) printf("Mismatched versions: File system " 854 9396 Matthew "is version %llu on-disk format, which is " 855 9396 Matthew "incompatible with this software version %lld!", 856 9396 Matthew (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); 857 9396 Matthew error = ENOTSUP; 858 9396 Matthew goto out; 859 9396 Matthew } 860 9396 Matthew 861 9396 Matthew if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) 862 9396 Matthew goto out; 863 9396 Matthew zfsvfs->z_norm = (int)zval; 864 9396 Matthew 865 9396 Matthew if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) 866 9396 Matthew goto out; 867 9396 Matthew zfsvfs->z_utf8 = (zval != 0); 868 9396 Matthew 869 9396 Matthew if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) 870 9396 Matthew goto out; 871 9396 Matthew zfsvfs->z_case = (uint_t)zval; 872 9396 Matthew 873 9396 Matthew /* 874 9396 Matthew * Fold case on file systems that are always or sometimes case 875 9396 Matthew * insensitive. 876 9396 Matthew */ 877 9396 Matthew if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 878 9396 Matthew zfsvfs->z_case == ZFS_CASE_MIXED) 879 9396 Matthew zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 880 9396 Matthew 881 9396 Matthew zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 882 9396 Matthew 883 9396 Matthew error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 884 9396 Matthew &zfsvfs->z_root); 885 9396 Matthew if (error) 886 9396 Matthew goto out; 887 9396 Matthew ASSERT(zfsvfs->z_root != 0); 888 9396 Matthew 889 9396 Matthew error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 890 9396 Matthew &zfsvfs->z_unlinkedobj); 891 9396 Matthew if (error) 892 9396 Matthew goto out; 893 9396 Matthew 894 9396 Matthew error = zap_lookup(os, MASTER_NODE_OBJ, 895 9396 Matthew zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 896 9396 Matthew 8, 1, &zfsvfs->z_userquota_obj); 897 9396 Matthew if (error && error != ENOENT) 898 9396 Matthew goto out; 899 9396 Matthew 900 9396 Matthew error = zap_lookup(os, MASTER_NODE_OBJ, 901 9396 Matthew zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 902 9396 Matthew 8, 1, &zfsvfs->z_groupquota_obj); 903 9396 Matthew if (error && error != ENOENT) 904 9396 Matthew goto out; 905 9396 Matthew 906 9396 Matthew error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 907 9396 Matthew &zfsvfs->z_fuid_obj); 908 9396 Matthew if (error && error != ENOENT) 909 9396 Matthew goto out; 910 9396 Matthew 911 9396 Matthew error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 912 9396 Matthew &zfsvfs->z_shares_dir); 913 9396 Matthew if (error && error != ENOENT) 914 9396 Matthew goto out; 915 9396 Matthew 916 9396 Matthew mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 917 9396 Matthew mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 918 9396 Matthew list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 919 9396 Matthew offsetof(znode_t, z_link_node)); 920 9396 Matthew rrw_init(&zfsvfs->z_teardown_lock); 921 9396 Matthew rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 922 9396 Matthew rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 923 9396 Matthew for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 924 9396 Matthew mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 925 9396 Matthew 926 11185 Sean *zfvp = zfsvfs; 927 9396 Matthew return (0); 928 9396 Matthew 929 9396 Matthew out: 930 10298 Matthew dmu_objset_disown(os, zfsvfs); 931 11185 Sean *zfvp = NULL; 932 9396 Matthew kmem_free(zfsvfs, sizeof (zfsvfs_t)); 933 9396 Matthew return (error); 934 9396 Matthew } 935 9396 Matthew 936 1544 eschrock static int 937 5326 ek110237 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 938 5326 ek110237 { 939 5326 ek110237 int error; 940 5326 ek110237 941 5326 ek110237 error = zfs_register_callbacks(zfsvfs->z_vfs); 942 5326 ek110237 if (error) 943 5326 ek110237 return (error); 944 5326 ek110237 945 5326 ek110237 /* 946 5326 ek110237 * Set the objset user_ptr to track its zfsvfs. 947 5326 ek110237 */ 948 10298 Matthew mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 949 5326 ek110237 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 950 10298 Matthew mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 951 5326 ek110237 952 9292 Neil zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 953 9292 Neil if (zil_disable) { 954 10685 George zil_destroy(zfsvfs->z_log, B_FALSE); 955 9292 Neil zfsvfs->z_log = NULL; 956 9292 Neil } 957 9292 Neil 958 5326 ek110237 /* 959 5326 ek110237 * If we are not mounting (ie: online recv), then we don't 960 5326 ek110237 * have to worry about replaying the log as we blocked all 961 5326 ek110237 * operations out since we closed the ZIL. 962 5326 ek110237 */ 963 5326 ek110237 if (mounting) { 964 7638 Neil boolean_t readonly; 965 7638 Neil 966 5326 ek110237 /* 967 5326 ek110237 * During replay we remove the read only flag to 968 5326 ek110237 * allow replays to succeed. 969 5326 ek110237 */ 970 5326 ek110237 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 971 8227 Neil if (readonly != 0) 972 8227 Neil zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 973 8227 Neil else 974 8227 Neil zfs_unlinked_drain(zfsvfs); 975 5326 ek110237 976 9292 Neil if (zfsvfs->z_log) { 977 8227 Neil /* 978 8227 Neil * Parse and replay the intent log. 979 8227 Neil * 980 8227 Neil * Because of ziltest, this must be done after 981 8227 Neil * zfs_unlinked_drain(). (Further note: ziltest 982 8227 Neil * doesn't use readonly mounts, where 983 8227 Neil * zfs_unlinked_drain() isn't called.) This is because 984 8227 Neil * ziltest causes spa_sync() to think it's committed, 985 8227 Neil * but actually it is not, so the intent log contains 986 8227 Neil * many txg's worth of changes. 987 8227 Neil * 988 8227 Neil * In particular, if object N is in the unlinked set in 989 8227 Neil * the last txg to actually sync, then it could be 990 8227 Neil * actually freed in a later txg and then reallocated 991 8227 Neil * in a yet later txg. This would write a "create 992 8227 Neil * object N" record to the intent log. Normally, this 993 8227 Neil * would be fine because the spa_sync() would have 994 8227 Neil * written out the fact that object N is free, before 995 8227 Neil * we could write the "create object N" intent log 996 8227 Neil * record. 997 8227 Neil * 998 8227 Neil * But when we are in ziltest mode, we advance the "open 999 8227 Neil * txg" without actually spa_sync()-ing the changes to 1000 8227 Neil * disk. So we would see that object N is still 1001 8227 Neil * allocated and in the unlinked set, and there is an 1002 8227 Neil * intent log record saying to allocate it. 1003 8227 Neil */ 1004 8227 Neil zfsvfs->z_replay = B_TRUE; 1005 8227 Neil zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector); 1006 8227 Neil zfsvfs->z_replay = B_FALSE; 1007 8227 Neil } 1008 5326 ek110237 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 1009 5326 ek110237 } 1010 5326 ek110237 1011 5326 ek110237 return (0); 1012 5326 ek110237 } 1013 5326 ek110237 1014 9396 Matthew void 1015 9396 Matthew zfsvfs_free(zfsvfs_t *zfsvfs) 1016 6083 ek110237 { 1017 9396 Matthew int i; 1018 9788 Tom extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ 1019 9788 Tom 1020 9788 Tom /* 1021 9788 Tom * This is a barrier to prevent the filesystem from going away in 1022 9788 Tom * zfs_znode_move() until we can safely ensure that the filesystem is 1023 9788 Tom * not unmounted. We consider the filesystem valid before the barrier 1024 9788 Tom * and invalid after the barrier. 1025 9788 Tom */ 1026 9788 Tom rw_enter(&zfsvfs_lock, RW_READER); 1027 9788 Tom rw_exit(&zfsvfs_lock); 1028 9396 Matthew 1029 9396 Matthew zfs_fuid_destroy(zfsvfs); 1030 9396 Matthew 1031 6083 ek110237 mutex_destroy(&zfsvfs->z_znodes_lock); 1032 9030 Mark mutex_destroy(&zfsvfs->z_lock); 1033 6083 ek110237 list_destroy(&zfsvfs->z_all_znodes); 1034 6083 ek110237 rrw_destroy(&zfsvfs->z_teardown_lock); 1035 6083 ek110237 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 1036 6083 ek110237 rw_destroy(&zfsvfs->z_fuid_lock); 1037 9396 Matthew for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1038 9396 Matthew mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1039 6083 ek110237 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1040 9396 Matthew } 1041 9396 Matthew 1042 9396 Matthew static void 1043 9396 Matthew zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1044 9396 Matthew { 1045 9396 Matthew zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1046 9396 Matthew if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { 1047 9396 Matthew vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1048 9396 Matthew vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1049 9396 Matthew vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1050 9396 Matthew vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1051 9749 Tim vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1052 10793 dai vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1053 9396 Matthew } 1054 6083 ek110237 } 1055 6083 ek110237 1056 5326 ek110237 static int 1057 7046 ahrens zfs_domount(vfs_t *vfsp, char *osname) 1058 1544 eschrock { 1059 1544 eschrock dev_t mount_dev; 1060 9396 Matthew uint64_t recordsize, fsid_guid; 1061 1544 eschrock int error = 0; 1062 1544 eschrock zfsvfs_t *zfsvfs; 1063 1544 eschrock 1064 1544 eschrock ASSERT(vfsp); 1065 1544 eschrock ASSERT(osname); 1066 1544 eschrock 1067 10298 Matthew error = zfsvfs_create(osname, &zfsvfs); 1068 9396 Matthew if (error) 1069 9396 Matthew return (error); 1070 1544 eschrock zfsvfs->z_vfs = vfsp; 1071 1544 eschrock 1072 1544 eschrock /* Initialize the generic filesystem structure. */ 1073 1544 eschrock vfsp->vfs_bcount = 0; 1074 1544 eschrock vfsp->vfs_data = NULL; 1075 1544 eschrock 1076 1544 eschrock if (zfs_create_unique_device(&mount_dev) == -1) { 1077 1544 eschrock error = ENODEV; 1078 1544 eschrock goto out; 1079 1544 eschrock } 1080 1544 eschrock ASSERT(vfs_devismounted(mount_dev) == 0); 1081 1544 eschrock 1082 1544 eschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 1083 1544 eschrock NULL)) 1084 1544 eschrock goto out; 1085 1544 eschrock 1086 1544 eschrock vfsp->vfs_dev = mount_dev; 1087 1544 eschrock vfsp->vfs_fstype = zfsfstype; 1088 1544 eschrock vfsp->vfs_bsize = recordsize; 1089 1544 eschrock vfsp->vfs_flag |= VFS_NOTRUNC; 1090 1544 eschrock vfsp->vfs_data = zfsvfs; 1091 1544 eschrock 1092 9396 Matthew /* 1093 9396 Matthew * The fsid is 64 bits, composed of an 8-bit fs type, which 1094 9396 Matthew * separates our fsid from any other filesystem types, and a 1095 9396 Matthew * 56-bit objset unique ID. The objset unique ID is unique to 1096 9396 Matthew * all objsets open on this system, provided by unique_create(). 1097 9396 Matthew * The 8-bit fs type must be put in the low bits of fsid[1] 1098 9396 Matthew * because that's where other Solaris filesystems put it. 1099 9396 Matthew */ 1100 9396 Matthew fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1101 9396 Matthew ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); 1102 9396 Matthew vfsp->vfs_fsid.val[0] = fsid_guid; 1103 9396 Matthew vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | 1104 9396 Matthew zfsfstype & 0xFF; 1105 1544 eschrock 1106 5331 amw /* 1107 5331 amw * Set features for file system. 1108 5331 amw */ 1109 9396 Matthew zfs_set_fuid_feature(zfsvfs); 1110 5498 timh if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1111 5498 timh vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1112 5498 timh vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1113 5498 timh vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1114 5498 timh } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1115 5498 timh vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1116 5498 timh vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1117 5498 timh } 1118 5331 amw 1119 1544 eschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1120 5331 amw uint64_t pval; 1121 3234 ck153898 1122 1544 eschrock atime_changed_cb(zfsvfs, B_FALSE); 1123 1544 eschrock readonly_changed_cb(zfsvfs, B_TRUE); 1124 5331 amw if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 1125 3234 ck153898 goto out; 1126 5331 amw xattr_changed_cb(zfsvfs, pval); 1127 1544 eschrock zfsvfs->z_issnap = B_TRUE; 1128 9688 Matthew 1129 10298 Matthew mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1130 9688 Matthew dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1131 10298 Matthew mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1132 1544 eschrock } else { 1133 5326 ek110237 error = zfsvfs_setup(zfsvfs, B_TRUE); 1134 1544 eschrock } 1135 1544 eschrock 1136 1544 eschrock if (!zfsvfs->z_issnap) 1137 1544 eschrock zfsctl_create(zfsvfs); 1138 1544 eschrock out: 1139 1544 eschrock if (error) { 1140 10298 Matthew dmu_objset_disown(zfsvfs->z_os, zfsvfs); 1141 9396 Matthew zfsvfs_free(zfsvfs); 1142 1544 eschrock } else { 1143 1544 eschrock atomic_add_32(&zfs_active_fs_count, 1); 1144 1544 eschrock } 1145 1544 eschrock 1146 1544 eschrock return (error); 1147 1544 eschrock } 1148 1544 eschrock 1149 1544 eschrock void 1150 1544 eschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1151 1544 eschrock { 1152 1544 eschrock objset_t *os = zfsvfs->z_os; 1153 1544 eschrock struct dsl_dataset *ds; 1154 1544 eschrock 1155 1544 eschrock /* 1156 1544 eschrock * Unregister properties. 1157 1544 eschrock */ 1158 1544 eschrock if (!dmu_objset_is_snapshot(os)) { 1159 1544 eschrock ds = dmu_objset_ds(os); 1160 1544 eschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 1161 3234 ck153898 zfsvfs) == 0); 1162 3234 ck153898 1163 3234 ck153898 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 1164 1544 eschrock zfsvfs) == 0); 1165 1544 eschrock 1166 1544 eschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 1167 1544 eschrock zfsvfs) == 0); 1168 1544 eschrock 1169 1544 eschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 1170 1544 eschrock zfsvfs) == 0); 1171 1544 eschrock 1172 1544 eschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 1173 1544 eschrock zfsvfs) == 0); 1174 1544 eschrock 1175 1544 eschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 1176 1544 eschrock zfsvfs) == 0); 1177 1544 eschrock 1178 1544 eschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 1179 1544 eschrock zfsvfs) == 0); 1180 1544 eschrock 1181 1544 eschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 1182 1544 eschrock zfsvfs) == 0); 1183 1544 eschrock 1184 1544 eschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 1185 1544 eschrock zfsvfs) == 0); 1186 1544 eschrock 1187 1544 eschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 1188 1544 eschrock acl_inherit_changed_cb, zfsvfs) == 0); 1189 5331 amw 1190 5331 amw VERIFY(dsl_prop_unregister(ds, "vscan", 1191 5331 amw vscan_changed_cb, zfsvfs) == 0); 1192 1544 eschrock } 1193 1544 eschrock } 1194 1544 eschrock 1195 3912 lling /* 1196 3912 lling * Convert a decimal digit string to a uint64_t integer. 1197 3912 lling */ 1198 3912 lling static int 1199 3912 lling str_to_uint64(char *str, uint64_t *objnum) 1200 3912 lling { 1201 3912 lling uint64_t num = 0; 1202 3912 lling 1203 3912 lling while (*str) { 1204 3912 lling if (*str < '0' || *str > '9') 1205 3912 lling return (EINVAL); 1206 3912 lling 1207 3912 lling num = num*10 + *str++ - '0'; 1208 3912 lling } 1209 3912 lling 1210 3912 lling *objnum = num; 1211 3912 lling return (0); 1212 3912 lling } 1213 3912 lling 1214 3912 lling /* 1215 3912 lling * The boot path passed from the boot loader is in the form of 1216 3912 lling * "rootpool-name/root-filesystem-object-number'. Convert this 1217 3912 lling * string to a dataset name: "rootpool-name/root-filesystem-name". 1218 3912 lling */ 1219 3912 lling static int 1220 6423 gw25295 zfs_parse_bootfs(char *bpath, char *outpath) 1221 3912 lling { 1222 3912 lling char *slashp; 1223 3912 lling uint64_t objnum; 1224 3912 lling int error; 1225 3912 lling 1226 3912 lling if (*bpath == 0 || *bpath == '/') 1227 3912 lling return (EINVAL); 1228 3912 lling 1229 7656 Sherry (void) strcpy(outpath, bpath); 1230 7656 Sherry 1231 3912 lling slashp = strchr(bpath, '/'); 1232 3912 lling 1233 3912 lling /* if no '/', just return the pool name */ 1234 3912 lling if (slashp == NULL) { 1235 3912 lling return (0); 1236 3912 lling } 1237 3912 lling 1238 7656 Sherry /* if not a number, just return the root dataset name */ 1239 7656 Sherry if (str_to_uint64(slashp+1, &objnum)) { 1240 7656 Sherry return (0); 1241 7656 Sherry } 1242 3912 lling 1243 3912 lling *slashp = '\0'; 1244 3912 lling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 1245 3912 lling *slashp = '/'; 1246 3912 lling 1247 3912 lling return (error); 1248 3912 lling } 1249 3912 lling 1250 10972 Ric /* 1251 10972 Ric * zfs_check_global_label: 1252 10972 Ric * Check that the hex label string is appropriate for the dataset 1253 10972 Ric * being mounted into the global_zone proper. 1254 10972 Ric * 1255 10972 Ric * Return an error if the hex label string is not default or 1256 10972 Ric * admin_low/admin_high. For admin_low labels, the corresponding 1257 10972 Ric * dataset must be readonly. 1258 10972 Ric */ 1259 10972 Ric int 1260 10972 Ric zfs_check_global_label(const char *dsname, const char *hexsl) 1261 10972 Ric { 1262 10972 Ric if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 1263 10972 Ric return (0); 1264 10972 Ric if (strcasecmp(hexsl, ADMIN_HIGH) == 0) 1265 10972 Ric return (0); 1266 10972 Ric if (strcasecmp(hexsl, ADMIN_LOW) == 0) { 1267 10972 Ric /* must be readonly */ 1268 10972 Ric uint64_t rdonly; 1269 10972 Ric 1270 10972 Ric if (dsl_prop_get_integer(dsname, 1271 10972 Ric zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL)) 1272 10972 Ric return (EACCES); 1273 10972 Ric return (rdonly ? 0 : EACCES); 1274 10972 Ric } 1275 10972 Ric return (EACCES); 1276 10972 Ric } 1277 10972 Ric 1278 10972 Ric /* 1279 10972 Ric * zfs_mount_label_policy: 1280 10972 Ric * Determine whether the mount is allowed according to MAC check. 1281 10972 Ric * by comparing (where appropriate) label of the dataset against 1282 10972 Ric * the label of the zone being mounted into. If the dataset has 1283 10972 Ric * no label, create one. 1284 10972 Ric * 1285 10972 Ric * Returns: 1286 10972 Ric * 0 : access allowed 1287 10972 Ric * >0 : error code, such as EACCES 1288 10972 Ric */ 1289 10972 Ric static int 1290 10972 Ric zfs_mount_label_policy(vfs_t *vfsp, char *osname) 1291 10972 Ric { 1292 10972 Ric int error, retv; 1293 10972 Ric zone_t *mntzone = NULL; 1294 10972 Ric ts_label_t *mnt_tsl; 1295 10972 Ric bslabel_t *mnt_sl; 1296 10972 Ric bslabel_t ds_sl; 1297 10972 Ric char ds_hexsl[MAXNAMELEN]; 1298 10972 Ric 1299 10972 Ric retv = EACCES; /* assume the worst */ 1300 10972 Ric 1301 10972 Ric /* 1302 10972 Ric * Start by getting the dataset label if it exists. 1303 10972 Ric */ 1304 10972 Ric error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1305 10972 Ric 1, sizeof (ds_hexsl), &ds_hexsl, NULL); 1306 10972 Ric if (error) 1307 10972 Ric return (EACCES); 1308 10972 Ric 1309 10972 Ric /* 1310 10972 Ric * If labeling is NOT enabled, then disallow the mount of datasets 1311 10972 Ric * which have a non-default label already. No other label checks 1312 10972 Ric * are needed. 1313 10972 Ric */ 1314 10972 Ric if (!is_system_labeled()) { 1315 10972 Ric if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) 1316 10972 Ric return (0); 1317 10972 Ric return (EACCES); 1318 10972 Ric } 1319 10972 Ric 1320 10972 Ric /* 1321 10972 Ric * Get the label of the mountpoint. If mounting into the global 1322 10972 Ric * zone (i.e. mountpoint is not within an active zone and the 1323 10972 Ric * zoned property is off), the label must be default or 1324 10972 Ric * admin_low/admin_high only; no other checks are needed. 1325 10972 Ric */ 1326 10972 Ric mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); 1327 10972 Ric if (mntzone->zone_id == GLOBAL_ZONEID) { 1328 10972 Ric uint64_t zoned; 1329 10972 Ric 1330 10972 Ric zone_rele(mntzone); 1331 10972 Ric 1332 10972 Ric if (dsl_prop_get_integer(osname, 1333 10972 Ric zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) 1334 10972 Ric return (EACCES); 1335 10972 Ric if (!zoned) 1336 10972 Ric return (zfs_check_global_label(osname, ds_hexsl)); 1337 10972 Ric else 1338 10972 Ric /* 1339 10972 Ric * This is the case of a zone dataset being mounted 1340 10972 Ric * initially, before the zone has been fully created; 1341 10972 Ric * allow this mount into global zone. 1342 10972 Ric */ 1343 10972 Ric return (0); 1344 10972 Ric } 1345 10972 Ric 1346 10972 Ric mnt_tsl = mntzone->zone_slabel; 1347 10972 Ric ASSERT(mnt_tsl != NULL); 1348 10972 Ric label_hold(mnt_tsl); 1349 10972 Ric mnt_sl = label2bslabel(mnt_tsl); 1350 10972 Ric 1351 10972 Ric if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { 1352 10972 Ric /* 1353 10972 Ric * The dataset doesn't have a real label, so fabricate one. 1354 10972 Ric */ 1355 10972 Ric char *str = NULL; 1356 10972 Ric 1357 10972 Ric if (l_to_str_internal(mnt_sl, &str) == 0 && 1358 10972 Ric dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), 1359 11022 Tom ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) 1360 10972 Ric retv = 0; 1361 10972 Ric if (str != NULL) 1362 10972 Ric kmem_free(str, strlen(str) + 1); 1363 10972 Ric } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { 1364 10972 Ric /* 1365 10972 Ric * Now compare labels to complete the MAC check. If the 1366 10972 Ric * labels are equal then allow access. If the mountpoint 1367 10972 Ric * label dominates the dataset label, allow readonly access. 1368 10972 Ric * Otherwise, access is denied. 1369 10972 Ric */ 1370 10972 Ric if (blequal(mnt_sl, &ds_sl)) 1371 10972 Ric retv = 0; 1372 10972 Ric else if (bldominates(mnt_sl, &ds_sl)) { 1373 10972 Ric vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 1374 10972 Ric retv = 0; 1375 10972 Ric } 1376 10972 Ric } 1377 10972 Ric 1378 10972 Ric label_rele(mnt_tsl); 1379 10972 Ric zone_rele(mntzone); 1380 10972 Ric return (retv); 1381 10972 Ric } 1382 10972 Ric 1383 1544 eschrock static int 1384 1544 eschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 1385 1544 eschrock { 1386 1544 eschrock int error = 0; 1387 1544 eschrock static int zfsrootdone = 0; 1388 1544 eschrock zfsvfs_t *zfsvfs = NULL; 1389 1544 eschrock znode_t *zp = NULL; 1390 1544 eschrock vnode_t *vp = NULL; 1391 6423 gw25295 char *zfs_bootfs; 1392 7147 taylor char *zfs_devid; 1393 1544 eschrock 1394 1544 eschrock ASSERT(vfsp); 1395 1544 eschrock 1396 1544 eschrock /* 1397 3912 lling * The filesystem that we mount as root is defined in the 1398 6423 gw25295 * boot property "zfs-bootfs" with a format of 1399 6423 gw25295 * "poolname/root-dataset-objnum". 1400 1544 eschrock */ 1401 1544 eschrock if (why == ROOT_INIT) { 1402 1544 eschrock if (zfsrootdone++) 1403 1544 eschrock return (EBUSY); 1404 6423 gw25295 /* 1405 6423 gw25295 * the process of doing a spa_load will require the 1406 6423 gw25295 * clock to be set before we could (for example) do 1407 6423 gw25295 * something better by looking at the timestamp on 1408 6423 gw25295 * an uberblock, so just set it to -1. 1409 6423 gw25295 */ 1410 6423 gw25295 clkset(-1); 1411 1544 eschrock 1412 7147 taylor if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { 1413 7147 taylor cmn_err(CE_NOTE, "spa_get_bootfs: can not get " 1414 7147 taylor "bootfs name"); 1415 6423 gw25295 return (EINVAL); 1416 5648 setje } 1417 7147 taylor zfs_devid = spa_get_bootprop("diskdevid"); 1418 7147 taylor error = spa_import_rootpool(rootfs.bo_name, zfs_devid); 1419 7147 taylor if (zfs_devid) 1420 7147 taylor spa_free_bootprop(zfs_devid); 1421 7147 taylor if (error) { 1422 7147 taylor spa_free_bootprop(zfs_bootfs); 1423 7147 taylor cmn_err(CE_NOTE, "spa_import_rootpool: error %d", 1424 7147 taylor error); 1425 7147 taylor return (error); 1426 7147 taylor } 1427 7147 taylor if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { 1428 7147 taylor spa_free_bootprop(zfs_bootfs); 1429 7147 taylor cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", 1430 6423 gw25295 error); 1431 6423 gw25295 return (error); 1432 6423 gw25295 } 1433 3912 lling 1434 7147 taylor spa_free_bootprop(zfs_bootfs); 1435 1544 eschrock 1436 1544 eschrock if (error = vfs_lock(vfsp)) 1437 1544 eschrock return (error); 1438 1544 eschrock 1439 7046 ahrens if (error = zfs_domount(vfsp, rootfs.bo_name)) { 1440 7147 taylor cmn_err(CE_NOTE, "zfs_domount: error %d", error); 1441 1544 eschrock goto out; 1442 6423 gw25295 } 1443 1544 eschrock 1444 1544 eschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 1445 1544 eschrock ASSERT(zfsvfs); 1446 6423 gw25295 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { 1447 7147 taylor cmn_err(CE_NOTE, "zfs_zget: error %d", error); 1448 1544 eschrock goto out; 1449 6423 gw25295 } 1450 1544 eschrock 1451 1544 eschrock vp = ZTOV(zp); 1452 1544 eschrock mutex_enter(&vp->v_lock); 1453 1544 eschrock vp->v_flag |= VROOT; 1454 1544 eschrock mutex_exit(&vp->v_lock); 1455 1544 eschrock rootvp = vp; 1456 1544 eschrock 1457 1544 eschrock /* 1458 6570 marks * Leave rootvp held. The root file system is never unmounted. 1459 1544 eschrock */ 1460 1544 eschrock 1461 1544 eschrock vfs_add((struct vnode *)0, vfsp, 1462 1544 eschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 1463 1544 eschrock out: 1464 1544 eschrock vfs_unlock(vfsp); 1465 6423 gw25295 return (error); 1466 1544 eschrock } else if (why == ROOT_REMOUNT) { 1467 1544 eschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 1468 1544 eschrock vfsp->vfs_flag |= VFS_REMOUNT; 1469 4596 lling 1470 4596 lling /* refresh mount options */ 1471 4596 lling zfs_unregister_callbacks(vfsp->vfs_data); 1472 4596 lling return (zfs_register_callbacks(vfsp)); 1473 4596 lling 1474 1544 eschrock } else if (why == ROOT_UNMOUNT) { 1475 1544 eschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 1476 1544 eschrock (void) zfs_sync(vfsp, 0, 0); 1477 1544 eschrock return (0); 1478 1544 eschrock } 1479 1544 eschrock 1480 1544 eschrock /* 1481 1544 eschrock * if "why" is equal to anything else other than ROOT_INIT, 1482 1544 eschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 1483 1544 eschrock */ 1484 1544 eschrock return (ENOTSUP); 1485 1544 eschrock } 1486 1544 eschrock 1487 789 ahrens /*ARGSUSED*/ 1488 789 ahrens static int 1489 789 ahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 1490 789 ahrens { 1491 789 ahrens char *osname; 1492 789 ahrens pathname_t spn; 1493 789 ahrens int error = 0; 1494 789 ahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 1495 3912 lling UIO_SYSSPACE : UIO_USERSPACE; 1496 789 ahrens int canwrite; 1497 789 ahrens 1498 789 ahrens if (mvp->v_type != VDIR) 1499 789 ahrens return (ENOTDIR); 1500 789 ahrens 1501 789 ahrens mutex_enter(&mvp->v_lock); 1502 789 ahrens if ((uap->flags & MS_REMOUNT) == 0 && 1503 789 ahrens (uap->flags & MS_OVERLAY) == 0 && 1504 789 ahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 1505 789 ahrens mutex_exit(&mvp->v_lock); 1506 789 ahrens return (EBUSY); 1507 789 ahrens } 1508 789 ahrens mutex_exit(&mvp->v_lock); 1509 789 ahrens 1510 789 ahrens /* 1511 789 ahrens * ZFS does not support passing unparsed data in via MS_DATA. 1512 789 ahrens * Users should use the MS_OPTIONSTR interface; this means 1513 789 ahrens * that all option parsing is already done and the options struct 1514 789 ahrens * can be interrogated. 1515 789 ahrens */ 1516 789 ahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 1517 789 ahrens return (EINVAL); 1518 789 ahrens 1519 789 ahrens /* 1520 789 ahrens * Get the objset name (the "special" mount argument). 1521 789 ahrens */ 1522 789 ahrens if (error = pn_get(uap->spec, fromspace, &spn)) 1523 789 ahrens return (error); 1524 789 ahrens 1525 789 ahrens osname = spn.pn_path; 1526 789 ahrens 1527 4543 marks /* 1528 4543 marks * Check for mount privilege? 1529 4543 marks * 1530 4543 marks * If we don't have privilege then see if 1531 4543 marks * we have local permission to allow it 1532 4543 marks */ 1533 4543 marks error = secpolicy_fs_mount(cr, mvp, vfsp); 1534 4543 marks if (error) { 1535 4543 marks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 1536 4543 marks if (error == 0) { 1537 4543 marks vattr_t vattr; 1538 4543 marks 1539 4543 marks /* 1540 4543 marks * Make sure user is the owner of the mount point 1541 4543 marks * or has sufficient privileges. 1542 4543 marks */ 1543 4543 marks 1544 4543 marks vattr.va_mask = AT_UID; 1545 4543 marks 1546 5331 amw if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { 1547 4543 marks goto out; 1548 4543 marks } 1549 4543 marks 1550 5489 marks if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 && 1551 5489 marks VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) { 1552 5489 marks error = EPERM; 1553 4543 marks goto out; 1554 4543 marks } 1555 4543 marks 1556 4543 marks secpolicy_fs_mount_clearopts(cr, vfsp); 1557 4543 marks } else { 1558 4543 marks goto out; 1559 4543 marks } 1560 4543 marks } 1561 789 ahrens 1562 789 ahrens /* 1563 789 ahrens * Refuse to mount a filesystem if we are in a local zone and the 1564 789 ahrens * dataset is not visible. 1565 789 ahrens */ 1566 789 ahrens if (!INGLOBALZONE(curproc) && 1567 789 ahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1568 789 ahrens error = EPERM; 1569 4596 lling goto out; 1570 4596 lling } 1571 10972 Ric 1572 10972 Ric error = zfs_mount_label_policy(vfsp, osname); 1573 10972 Ric if (error) 1574 10972 Ric goto out; 1575 4596 lling 1576 4596 lling /* 1577 4596 lling * When doing a remount, we simply refresh our temporary properties 1578 4596 lling * according to those options set in the current VFS options. 1579 4596 lling */ 1580 4596 lling if (uap->flags & MS_REMOUNT) { 1581 4596 lling /* refresh mount options */ 1582 4596 lling zfs_unregister_callbacks(vfsp->vfs_data); 1583 4596 lling error = zfs_register_callbacks(vfsp); 1584 789 ahrens goto out; 1585 789 ahrens } 1586 789 ahrens 1587 7046 ahrens error = zfs_domount(vfsp, osname); 1588 789 ahrens 1589 9214 chris /* 1590 9214 chris * Add an extra VFS_HOLD on our parent vfs so that it can't 1591 9214 chris * disappear due to a forced unmount. 1592 9214 chris */ 1593 9246 chris if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) 1594 9214 chris VFS_HOLD(mvp->v_vfsp); 1595 9214 chris 1596 789 ahrens out: 1597 789 ahrens pn_free(&spn); 1598 789 ahrens return (error); 1599 789 ahrens } 1600 789 ahrens 1601 789 ahrens static int 1602 789 ahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 1603 789 ahrens { 1604 789 ahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1605 789 ahrens dev32_t d32; 1606 2885 ahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 1607 789 ahrens 1608 789 ahrens ZFS_ENTER(zfsvfs); 1609 789 ahrens 1610 2885 ahrens dmu_objset_space(zfsvfs->z_os, 1611 2885 ahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 1612 789 ahrens 1613 789 ahrens /* 1614 789 ahrens * The underlying storage pool actually uses multiple block sizes. 1615 789 ahrens * We report the fragsize as the smallest block size we support, 1616 789 ahrens * and we report our blocksize as the filesystem's maximum blocksize. 1617 789 ahrens */ 1618 789 ahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 1619 789 ahrens statp->f_bsize = zfsvfs->z_max_blksz; 1620 789 ahrens 1621 789 ahrens /* 1622 789 ahrens * The following report "total" blocks of various kinds in the 1623 789 ahrens * file system, but reported in terms of f_frsize - the 1624 789 ahrens * "fragment" size. 1625 789 ahrens */ 1626 789 ahrens 1627 2885 ahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1628 2885 ahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 1629 789 ahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 1630 789 ahrens 1631 789 ahrens /* 1632 789 ahrens * statvfs() should really be called statufs(), because it assumes 1633 789 ahrens * static metadata. ZFS doesn't preallocate files, so the best 1634 789 ahrens * we can do is report the max that could possibly fit in f_files, 1635 789 ahrens * and that minus the number actually used in f_ffree. 1636 789 ahrens * For f_ffree, report the smaller of the number of object available 1637 789 ahrens * and the number of blocks (each object will take at least a block). 1638 789 ahrens */ 1639 2885 ahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 1640 789 ahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 1641 2885 ahrens statp->f_files = statp->f_ffree + usedobjs; 1642 789 ahrens 1643 789 ahrens (void) cmpldev(&d32, vfsp->vfs_dev); 1644 789 ahrens statp->f_fsid = d32; 1645 789 ahrens 1646 789 ahrens /* 1647 789 ahrens * We're a zfs filesystem. 1648 789 ahrens */ 1649 789 ahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 1650 789 ahrens 1651 1123 marks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 1652 789 ahrens 1653 789 ahrens statp->f_namemax = ZFS_MAXNAMELEN; 1654 789 ahrens 1655 789 ahrens /* 1656 789 ahrens * We have all of 32 characters to stuff a string here. 1657 789 ahrens * Is there anything useful we could/should provide? 1658 789 ahrens */ 1659 789 ahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 1660 789 ahrens 1661 789 ahrens ZFS_EXIT(zfsvfs); 1662 789 ahrens return (0); 1663 789 ahrens } 1664 789 ahrens 1665 789 ahrens static int 1666 789 ahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 1667 789 ahrens { 1668 789 ahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1669 789 ahrens znode_t *rootzp; 1670 789 ahrens int error; 1671 789 ahrens 1672 789 ahrens ZFS_ENTER(zfsvfs); 1673 789 ahrens 1674 789 ahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1675 789 ahrens if (error == 0) 1676 789 ahrens *vpp = ZTOV(rootzp); 1677 789 ahrens 1678 789 ahrens ZFS_EXIT(zfsvfs); 1679 789 ahrens return (error); 1680 789 ahrens } 1681 789 ahrens 1682 5326 ek110237 /* 1683 5326 ek110237 * Teardown the zfsvfs::z_os. 1684 5326 ek110237 * 1685 5326 ek110237 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 1686 5326 ek110237 * and 'z_teardown_inactive_lock' held. 1687 5326 ek110237 */ 1688 5326 ek110237 static int 1689 5326 ek110237 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1690 5326 ek110237 { 1691 5642 maybee znode_t *zp; 1692 5326 ek110237 1693 5326 ek110237 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1694 5326 ek110237 1695 5326 ek110237 if (!unmounting) { 1696 5326 ek110237 /* 1697 5326 ek110237 * We purge the parent filesystem's vfsp as the parent 1698 5326 ek110237 * filesystem and all of its snapshots have their vnode's 1699 5326 ek110237 * v_vfsp set to the parent's filesystem's vfsp. Note, 1700 5326 ek110237 * 'z_parent' is self referential for non-snapshots. 1701 5326 ek110237 */ 1702 5326 ek110237 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1703 5326 ek110237 } 1704 5326 ek110237 1705 5326 ek110237 /* 1706 5326 ek110237 * Close the zil. NB: Can't close the zil while zfs_inactive 1707 5326 ek110237 * threads are blocked as zil_close can call zfs_inactive. 1708 5326 ek110237 */ 1709 5326 ek110237 if (zfsvfs->z_log) { 1710 5326 ek110237 zil_close(zfsvfs->z_log); 1711 5326 ek110237 zfsvfs->z_log = NULL; 1712 5326 ek110237 } 1713 5326 ek110237 1714 5326 ek110237 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 1715 5326 ek110237 1716 5326 ek110237 /* 1717 5326 ek110237 * If we are not unmounting (ie: online recv) and someone already 1718 5326 ek110237 * unmounted this file system while we were doing the switcheroo, 1719 5326 ek110237 * or a reopen of z_os failed then just bail out now. 1720 5326 ek110237 */ 1721 5326 ek110237 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1722 5326 ek110237 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1723 5326 ek110237 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1724 5326 ek110237 return (EIO); 1725 5326 ek110237 } 1726 5326 ek110237 1727 5326 ek110237 /* 1728 5326 ek110237 * At this point there are no vops active, and any new vops will 1729 5326 ek110237 * fail with EIO since we have z_teardown_lock for writer (only 1730 5326 ek110237 * relavent for forced unmount). 1731 5326 ek110237 * 1732 5326 ek110237 * Release all holds on dbufs. 1733 5326 ek110237 */ 1734 5326 ek110237 mutex_enter(&zfsvfs->z_znodes_lock); 1735 5642 maybee for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1736 5642 maybee zp = list_next(&zfsvfs->z_all_znodes, zp)) 1737 5446 ahrens if (zp->z_dbuf) { 1738 5642 maybee ASSERT(ZTOV(zp)->v_count > 0); 1739 5642 maybee zfs_znode_dmu_fini(zp); 1740 5326 ek110237 } 1741 5326 ek110237 mutex_exit(&zfsvfs->z_znodes_lock); 1742 5326 ek110237 1743 5326 ek110237 /* 1744 5326 ek110237 * If we are unmounting, set the unmounted flag and let new vops 1745 5326 ek110237 * unblock. zfs_inactive will have the unmounted behavior, and all 1746 5326 ek110237 * other vops will fail with EIO. 1747 5326 ek110237 */ 1748 5326 ek110237 if (unmounting) { 1749 5326 ek110237 zfsvfs->z_unmounted = B_TRUE; 1750 5326 ek110237 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1751 5326 ek110237 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1752 5326 ek110237 } 1753 5326 ek110237 1754 5326 ek110237 /* 1755 5326 ek110237 * z_os will be NULL if there was an error in attempting to reopen 1756 5326 ek110237 * zfsvfs, so just return as the properties had already been 1757 5326 ek110237 * unregistered and cached data had been evicted before. 1758 5326 ek110237 */ 1759 5326 ek110237 if (zfsvfs->z_os == NULL) 1760 5326 ek110237 return (0); 1761 5326 ek110237 1762 5326 ek110237 /* 1763 5326 ek110237 * Unregister properties. 1764 5326 ek110237 */ 1765 5326 ek110237 zfs_unregister_callbacks(zfsvfs); 1766 5326 ek110237 1767 5326 ek110237 /* 1768 5326 ek110237 * Evict cached data 1769 5326 ek110237 */ 1770 6083 ek110237 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1771 5429 maybee txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1772 6083 ek110237 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1773 5429 maybee } 1774 5326 ek110237 1775 5326 ek110237 return (0); 1776 5326 ek110237 } 1777 5326 ek110237 1778 789 ahrens /*ARGSUSED*/ 1779 789 ahrens static int 1780 789 ahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1781 789 ahrens { 1782 789 ahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1783 5326 ek110237 objset_t *os; 1784 789 ahrens int ret; 1785 789 ahrens 1786 4543 marks ret = secpolicy_fs_unmount(cr, vfsp); 1787 4543 marks if (ret) { 1788 4543 marks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1789 4543 marks ZFS_DELEG_PERM_MOUNT, cr); 1790 4543 marks if (ret) 1791 4543 marks return (ret); 1792 4543 marks } 1793 1484 ek110237 1794 4736 ek110237 /* 1795 4736 ek110237 * We purge the parent filesystem's vfsp as the parent filesystem 1796 4736 ek110237 * and all of its snapshots have their vnode's v_vfsp set to the 1797 4736 ek110237 * parent's filesystem's vfsp. Note, 'z_parent' is self 1798 4736 ek110237 * referential for non-snapshots. 1799 4736 ek110237 */ 1800 4736 ek110237 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1801 1484 ek110237 1802 789 ahrens /* 1803 789 ahrens * Unmount any snapshots mounted under .zfs before unmounting the 1804 789 ahrens * dataset itself. 1805 789 ahrens */ 1806 789 ahrens if (zfsvfs->z_ctldir != NULL && 1807 4543 marks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1808 789 ahrens return (ret); 1809 4543 marks } 1810 789 ahrens 1811 4787 ahrens if (!(fflag & MS_FORCE)) { 1812 789 ahrens /* 1813 4787 ahrens * Check the number of active vnodes in the file system. 1814 4787 ahrens * Our count is maintained in the vfs structure, but the 1815 4787 ahrens * number is off by 1 to indicate a hold on the vfs 1816 4787 ahrens * structure itself. 1817 4787 ahrens * 1818 4787 ahrens * The '.zfs' directory maintains a reference of its 1819 4787 ahrens * own, and any active references underneath are 1820 4787 ahrens * reflected in the vnode count. 1821 4480 gw25295 */ 1822 4787 ahrens if (zfsvfs->z_ctldir == NULL) { 1823 4787 ahrens if (vfsp->vfs_count > 1) 1824 4787 ahrens return (EBUSY); 1825 4787 ahrens } else { 1826 4787 ahrens if (vfsp->vfs_count > 2 || 1827 5326 ek110237 zfsvfs->z_ctldir->v_count > 1) 1828 4787 ahrens return (EBUSY); 1829 789 ahrens } 1830 789 ahrens } 1831 789 ahrens 1832 789 ahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1833 4787 ahrens 1834 5326 ek110237 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1835 5326 ek110237 os = zfsvfs->z_os; 1836 4787 ahrens 1837 4787 ahrens /* 1838 5326 ek110237 * z_os will be NULL if there was an error in 1839 5326 ek110237 * attempting to reopen zfsvfs. 1840 4787 ahrens */ 1841 5326 ek110237 if (os != NULL) { 1842 5326 ek110237 /* 1843 5326 ek110237 * Unset the objset user_ptr. 1844 5326 ek110237 */ 1845 10298 Matthew mutex_enter(&os->os_user_ptr_lock); 1846 5326 ek110237 dmu_objset_set_user(os, NULL); 1847 10298 Matthew mutex_exit(&os->os_user_ptr_lock); 1848 5326 ek110237 1849 5326 ek110237 /* 1850 6689 maybee * Finally release the objset 1851 5326 ek110237 */ 1852 10298 Matthew dmu_objset_disown(os, zfsvfs); 1853 4787 ahrens } 1854 4787 ahrens 1855 4787 ahrens /* 1856 4787 ahrens * We can now safely destroy the '.zfs' directory node. 1857 4787 ahrens */ 1858 4787 ahrens if (zfsvfs->z_ctldir != NULL) 1859 4787 ahrens zfsctl_destroy(zfsvfs); 1860 789 ahrens 1861 789 ahrens return (0); 1862 789 ahrens } 1863 789 ahrens 1864 789 ahrens static int 1865 789 ahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1866 789 ahrens { 1867 789 ahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1868 789 ahrens znode_t *zp; 1869 789 ahrens uint64_t object = 0; 1870 789 ahrens uint64_t fid_gen = 0; 1871 789 ahrens uint64_t gen_mask; 1872 789 ahrens uint64_t zp_gen; 1873 789 ahrens int i, err; 1874 789 ahrens 1875 789 ahrens *vpp = NULL; 1876 789 ahrens 1877 789 ahrens ZFS_ENTER(zfsvfs); 1878 789 ahrens 1879 789 ahrens if (fidp->fid_len == LONG_FID_LEN) { 1880 789 ahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1881 789 ahrens uint64_t objsetid = 0; 1882 789 ahrens uint64_t setgen = 0; 1883 789 ahrens 1884 789 ahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1885 789 ahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1886 789 ahrens 1887 789 ahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1888 789 ahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1889 789 ahrens 1890 789 ahrens ZFS_EXIT(zfsvfs); 1891 789 ahrens 1892 789 ahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1893 789 ahrens if (err) 1894 789 ahrens return (EINVAL); 1895 789 ahrens ZFS_ENTER(zfsvfs); 1896 789 ahrens } 1897 789 ahrens 1898 789 ahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1899 789 ahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1900 789 ahrens 1901 789 ahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1902 789 ahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1903 789 ahrens 1904 789 ahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1905 789 ahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1906 789 ahrens } else { 1907 789 ahrens ZFS_EXIT(zfsvfs); 1908 789 ahrens return (EINVAL); 1909 789 ahrens } 1910 789 ahrens 1911 789 ahrens /* A zero fid_gen means we are in the .zfs control directories */ 1912 789 ahrens if (fid_gen == 0 && 1913 789 ahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1914 789 ahrens *vpp = zfsvfs->z_ctldir; 1915 789 ahrens ASSERT(*vpp != NULL); 1916 789 ahrens if (object == ZFSCTL_INO_SNAPDIR) { 1917 789 ahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1918 5331 amw 0, NULL, NULL, NULL, NULL, NULL) == 0); 1919 789 ahrens } else { 1920 789 ahrens VN_HOLD(*vpp); 1921 789 ahrens } 1922 789 ahrens ZFS_EXIT(zfsvfs); 1923 789 ahrens return (0); 1924 789 ahrens } 1925 789 ahrens 1926 789 ahrens gen_mask = -1ULL >> (64 - 8 * i); 1927 789 ahrens 1928 789 ahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1929 789 ahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1930 789 ahrens ZFS_EXIT(zfsvfs); 1931 789 ahrens return (err); 1932 789 ahrens } 1933 789 ahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1934 789 ahrens if (zp_gen == 0) 1935 789 ahrens zp_gen = 1; 1936 3461 ahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1937 789 ahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1938 789 ahrens VN_RELE(ZTOV(zp)); 1939 789 ahrens ZFS_EXIT(zfsvfs); 1940 789 ahrens return (EINVAL); 1941 789 ahrens } 1942 789 ahrens 1943 789 ahrens *vpp = ZTOV(zp); 1944 789 ahrens ZFS_EXIT(zfsvfs); 1945 789 ahrens return (0); 1946 789 ahrens } 1947 789 ahrens 1948 5326 ek110237 /* 1949 5326 ek110237 * Block out VOPs and close zfsvfs_t::z_os 1950 5326 ek110237 * 1951 5326 ek110237 * Note, if successful, then we return with the 'z_teardown_lock' and 1952 5326 ek110237 * 'z_teardown_inactive_lock' write held. 1953 5326 ek110237 */ 1954 5326 ek110237 int 1955 10298 Matthew zfs_suspend_fs(zfsvfs_t *zfsvfs) 1956 5326 ek110237 { 1957 5326 ek110237 int error; 1958 5326 ek110237 1959 5326 ek110237 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1960 5326 ek110237 return (error); 1961 10298 Matthew dmu_objset_disown(zfsvfs->z_os, zfsvfs); 1962 5326 ek110237 1963 5326 ek110237 return (0); 1964 5326 ek110237 } 1965 5326 ek110237 1966 5326 ek110237 /* 1967 5326 ek110237 * Reopen zfsvfs_t::z_os and release VOPs. 1968 5326 ek110237 */ 1969 5326 ek110237 int 1970 10298 Matthew zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) 1971 5326 ek110237 { 1972 5326 ek110237 int err; 1973 5326 ek110237 1974 5326 ek110237 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1975 5326 ek110237 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1976 5326 ek110237 1977 10298 Matthew err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, 1978 10298 Matthew &zfsvfs->z_os); 1979 5326 ek110237 if (err) { 1980 5326 ek110237 zfsvfs->z_os = NULL; 1981 5326 ek110237 } else { 1982 5326 ek110237 znode_t *zp; 1983 5326 ek110237 1984 5326 ek110237 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1985 5326 ek110237 1986 5326 ek110237 /* 1987 5326 ek110237 * Attempt to re-establish all the active znodes with 1988 5326 ek110237 * their dbufs. If a zfs_rezget() fails, then we'll let 1989 5326 ek110237 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1990 5326 ek110237 * when they try to use their znode. 1991 5326 ek110237 */ 1992 5326 ek110237 mutex_enter(&zfsvfs->z_znodes_lock); 1993 5326 ek110237 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1994 5326 ek110237 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1995 5326 ek110237 (void) zfs_rezget(zp); 1996 5326 ek110237 } 1997 5326 ek110237 mutex_exit(&zfsvfs->z_znodes_lock); 1998 5326 ek110237 1999 5326 ek110237 } 2000 5326 ek110237 2001 5326 ek110237 /* release the VOPs */ 2002 5326 ek110237 rw_exit(&zfsvfs->z_teardown_inactive_lock); 2003 5326 ek110237 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 2004 5326 ek110237 2005 5326 ek110237 if (err) { 2006 5326 ek110237 /* 2007 5326 ek110237 * Since we couldn't reopen zfsvfs::z_os, force 2008 5326 ek110237 * unmount this file system. 2009 5326 ek110237 */ 2010 5326 ek110237 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 2011 5326 ek110237 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED()); 2012 5326 ek110237 } 2013 5326 ek110237 return (err); 2014 5326 ek110237 } 2015 5326 ek110237 2016 789 ahrens static void 2017 789 ahrens zfs_freevfs(vfs_t *vfsp) 2018 789 ahrens { 2019 789 ahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 2020 9214 chris 2021 9214 chris /* 2022 9214 chris * If this is a snapshot, we have an extra VFS_HOLD on our parent 2023 9214 chris * from zfs_mount(). Release it here. 2024 9214 chris */ 2025 9214 chris if (zfsvfs->z_issnap) 2026 9214 chris VFS_RELE(zfsvfs->z_parent->z_vfs); 2027 9214 chris 2028 9396 Matthew zfsvfs_free(zfsvfs); 2029 789 ahrens 2030 789 ahrens atomic_add_32(&zfs_active_fs_count, -1); 2031 789 ahrens } 2032 789 ahrens 2033 789 ahrens /* 2034 789 ahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 2035 789 ahrens * so we can't safely do any non-idempotent initialization here. 2036 789 ahrens * Leave that to zfs_init() and zfs_fini(), which are called 2037 789 ahrens * from the module's _init() and _fini() entry points. 2038 789 ahrens */ 2039 789 ahrens /*ARGSUSED*/ 2040 789 ahrens static int 2041 789 ahrens zfs_vfsinit(int fstype, char *name) 2042 789 ahrens { 2043 789 ahrens int error; 2044 789 ahrens 2045 789 ahrens zfsfstype = fstype; 2046 789 ahrens 2047 789 ahrens /* 2048 789 ahrens * Setup vfsops and vnodeops tables. 2049 789 ahrens */ 2050 789 ahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 2051 789 ahrens if (error != 0) { 2052 789 ahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 2053 789 ahrens } 2054 789 ahrens 2055 789 ahrens error = zfs_create_op_tables(); 2056 789 ahrens if (error) { 2057 789 ahrens zfs_remove_op_tables(); 2058 789 ahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 2059 789 ahrens (void) vfs_freevfsops_by_type(zfsfstype); 2060 789 ahrens return (error); 2061 789 ahrens } 2062 789 ahrens 2063 789 ahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 2064 789 ahrens 2065 789 ahrens /* 2066 849 bonwick * Unique major number for all zfs mounts. 2067 849 bonwick * If we run out of 32-bit minors, we'll getudev() another major. 2068 789 ahrens */ 2069 849 bonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 2070 849 bonwick zfs_minor = ZFS_MIN_MINOR; 2071 789 ahrens 2072 789 ahrens return (0); 2073 789 ahrens } 2074 789 ahrens 2075 789 ahrens void 2076 789 ahrens zfs_init(void) 2077 789 ahrens { 2078 789 ahrens /* 2079 789 ahrens * Initialize .zfs directory structures 2080 789 ahrens */ 2081 789 ahrens zfsctl_init(); 2082 789 ahrens 2083 789 ahrens /* 2084 789 ahrens * Initialize znode cache, vnode ops, etc... 2085 789 ahrens */ 2086 789 ahrens zfs_znode_init(); 2087 9396 Matthew 2088 9396 Matthew dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); 2089 789 ahrens } 2090 789 ahrens 2091 789 ahrens void 2092 789 ahrens zfs_fini(void) 2093 789 ahrens { 2094 789 ahrens zfsctl_fini(); 2095 789 ahrens zfs_znode_fini(); 2096 789 ahrens } 2097 789 ahrens 2098 789 ahrens int 2099 789 ahrens zfs_busy(void) 2100 789 ahrens { 2101 789 ahrens return (zfs_active_fs_count != 0); 2102 789 ahrens } 2103 789 ahrens 2104 4577 ahrens int 2105 9396 Matthew zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2106 4577 ahrens { 2107 4577 ahrens int error; 2108 9396 Matthew objset_t *os = zfsvfs->z_os; 2109 4577 ahrens dmu_tx_t *tx; 2110 4577 ahrens 2111 4577 ahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2112 4577 ahrens return (EINVAL); 2113 4577 ahrens 2114 9396 Matthew if (newvers < zfsvfs->z_version) 2115 9396 Matthew return (EINVAL); 2116 4577 ahrens 2117 4577 ahrens tx = dmu_tx_create(os); 2118 9396 Matthew dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2119 4577 ahrens error = dmu_tx_assign(tx, TXG_WAIT); 2120 4577 ahrens if (error) { 2121 4577 ahrens dmu_tx_abort(tx); 2122 9396 Matthew return (error); 2123 4577 ahrens } 2124 9396 Matthew error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2125 9396 Matthew 8, 1, &newvers, tx); 2126 9396 Matthew 2127 9396 Matthew if (error) { 2128 9396 Matthew dmu_tx_commit(tx); 2129 9396 Matthew return (error); 2130 9396 Matthew } 2131 4577 ahrens 2132 4577 ahrens spa_history_internal_log(LOG_DS_UPGRADE, 2133 4577 ahrens dmu_objset_spa(os), tx, CRED(), 2134 9396 Matthew "oldver=%llu newver=%llu dataset = %llu", 2135 9396 Matthew zfsvfs->z_version, newvers, dmu_objset_id(os)); 2136 9396 Matthew 2137 4577 ahrens dmu_tx_commit(tx); 2138 4577 ahrens 2139 9396 Matthew zfsvfs->z_version = newvers; 2140 9396 Matthew 2141 9396 Matthew if (zfsvfs->z_version >= ZPL_VERSION_FUID) 2142 9396 Matthew zfs_set_fuid_feature(zfsvfs); 2143 9396 Matthew 2144 9396 Matthew return (0); 2145 4577 ahrens } 2146 4577 ahrens 2147 5498 timh /* 2148 5498 timh * Read a property stored within the master node. 2149 5498 timh */ 2150 5498 timh int 2151 5498 timh zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2152 5498 timh { 2153 5498 timh const char *pname; 2154 7184 timh int error = ENOENT; 2155 5498 timh 2156 5498 timh /* 2157 5498 timh * Look up the file system's value for the property. For the 2158 5498 timh * version property, we look up a slightly different string. 2159 5498 timh */ 2160 5498 timh if (prop == ZFS_PROP_VERSION) 2161 5498 timh pname = ZPL_VERSION_STR; 2162 5498 timh else 2163 5498 timh pname = zfs_prop_to_name(prop); 2164 5498 timh 2165 7184 timh if (os != NULL) 2166 7184 timh error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2167 5498 timh 2168 6404 maybee if (error == ENOENT) { 2169 5498 timh /* No value set, use the default value */ 2170 5498 timh switch (prop) { 2171 6404 maybee case ZFS_PROP_VERSION: 2172 6404 maybee *value = ZPL_VERSION; 2173 6404 maybee break; 2174 5498 timh case ZFS_PROP_NORMALIZE: 2175 5498 timh case ZFS_PROP_UTF8ONLY: 2176 5498 timh *value = 0; 2177 5498 timh break; 2178 5498 timh case ZFS_PROP_CASE: 2179 5498 timh *value = ZFS_CASE_SENSITIVE; 2180 5498 timh break; 2181 5498 timh default: 2182 6404 maybee return (error); 2183 5498 timh } 2184 6404 maybee error = 0; 2185 5498 timh } 2186 6404 maybee return (error); 2187 5498 timh } 2188 5498 timh 2189 789 ahrens static vfsdef_t vfw = { 2190 789 ahrens VFSDEF_VERSION, 2191 789 ahrens MNTTYPE_ZFS, 2192 789 ahrens zfs_vfsinit, 2193 5331 amw VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| 2194 5331 amw VSW_XID, 2195 789 ahrens &zfs_mntopts 2196 789 ahrens }; 2197 789 ahrens 2198 789 ahrens struct modlfs zfs_modlfs = { 2199 4577 ahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 2200 789 ahrens }; 2201