1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 1544 eschrock * Common Development and Distribution License (the "License"). 6 1544 eschrock * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 8524 Eric * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 789 ahrens 26 789 ahrens /* 27 789 ahrens * ZFS volume emulation driver. 28 789 ahrens * 29 789 ahrens * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. 30 789 ahrens * Volumes are accessed through the symbolic links named: 31 789 ahrens * 32 789 ahrens * /dev/zvol/dsk/<pool_name>/<dataset_name> 33 789 ahrens * /dev/zvol/rdsk/<pool_name>/<dataset_name> 34 789 ahrens * 35 10588 Eric * These links are created by the /dev filesystem (sdev_zvolops.c). 36 789 ahrens * Volumes are persistent through reboot. No user command needs to be 37 789 ahrens * run before opening and using a device. 38 789 ahrens */ 39 789 ahrens 40 789 ahrens #include <sys/types.h> 41 789 ahrens #include <sys/param.h> 42 789 ahrens #include <sys/errno.h> 43 789 ahrens #include <sys/uio.h> 44 789 ahrens #include <sys/buf.h> 45 789 ahrens #include <sys/modctl.h> 46 789 ahrens #include <sys/open.h> 47 789 ahrens #include <sys/kmem.h> 48 789 ahrens #include <sys/conf.h> 49 789 ahrens #include <sys/cmn_err.h> 50 789 ahrens #include <sys/stat.h> 51 789 ahrens #include <sys/zap.h> 52 789 ahrens #include <sys/spa.h> 53 789 ahrens #include <sys/zio.h> 54 6423 gw25295 #include <sys/dmu_traverse.h> 55 6423 gw25295 #include <sys/dnode.h> 56 6423 gw25295 #include <sys/dsl_dataset.h> 57 789 ahrens #include <sys/dsl_prop.h> 58 789 ahrens #include <sys/dkio.h> 59 789 ahrens #include <sys/efi_partition.h> 60 789 ahrens #include <sys/byteorder.h> 61 789 ahrens #include <sys/pathname.h> 62 789 ahrens #include <sys/ddi.h> 63 789 ahrens #include <sys/sunddi.h> 64 789 ahrens #include <sys/crc32.h> 65 789 ahrens #include <sys/dirent.h> 66 789 ahrens #include <sys/policy.h> 67 789 ahrens #include <sys/fs/zfs.h> 68 789 ahrens #include <sys/zfs_ioctl.h> 69 789 ahrens #include <sys/mkdev.h> 70 1141 perrin #include <sys/zil.h> 71 2237 maybee #include <sys/refcount.h> 72 3755 perrin #include <sys/zfs_znode.h> 73 3755 perrin #include <sys/zfs_rlock.h> 74 6423 gw25295 #include <sys/vdev_disk.h> 75 6423 gw25295 #include <sys/vdev_impl.h> 76 6423 gw25295 #include <sys/zvol.h> 77 6423 gw25295 #include <sys/dumphdr.h> 78 8227 Neil #include <sys/zil_impl.h> 79 789 ahrens 80 789 ahrens #include "zfs_namecheck.h" 81 789 ahrens 82 6423 gw25295 static void *zvol_state; 83 10298 Matthew static char *zvol_tag = "zvol_tag"; 84 789 ahrens 85 6423 gw25295 #define ZVOL_DUMPSIZE "dumpsize" 86 789 ahrens 87 789 ahrens /* 88 789 ahrens * This lock protects the zvol_state structure from being modified 89 789 ahrens * while it's being used, e.g. an open that comes in before a create 90 789 ahrens * finishes. It also protects temporary opens of the dataset so that, 91 789 ahrens * e.g., an open doesn't get a spurious EBUSY. 92 789 ahrens */ 93 789 ahrens static kmutex_t zvol_state_lock; 94 789 ahrens static uint32_t zvol_minors; 95 789 ahrens 96 6423 gw25295 typedef struct zvol_extent { 97 7837 Matthew list_node_t ze_node; 98 6423 gw25295 dva_t ze_dva; /* dva associated with this extent */ 99 7837 Matthew uint64_t ze_nblks; /* number of blocks in extent */ 100 6423 gw25295 } zvol_extent_t; 101 6423 gw25295 102 789 ahrens /* 103 789 ahrens * The in-core state of each volume. 104 789 ahrens */ 105 789 ahrens typedef struct zvol_state { 106 789 ahrens char zv_name[MAXPATHLEN]; /* pool/dd name */ 107 789 ahrens uint64_t zv_volsize; /* amount of space we advertise */ 108 3063 perrin uint64_t zv_volblocksize; /* volume block size */ 109 789 ahrens minor_t zv_minor; /* minor number */ 110 789 ahrens uint8_t zv_min_bs; /* minimum addressable block shift */ 111 9303 Eric uint8_t zv_flags; /* readonly, dumpified, etc. */ 112 789 ahrens objset_t *zv_objset; /* objset handle */ 113 789 ahrens uint32_t zv_open_count[OTYPCNT]; /* open counts */ 114 789 ahrens uint32_t zv_total_opens; /* total open count */ 115 1141 perrin zilog_t *zv_zilog; /* ZIL handle */ 116 7837 Matthew list_t zv_extents; /* List of extents for dump */ 117 3755 perrin znode_t zv_znode; /* for range locking */ 118 789 ahrens } zvol_state_t; 119 6423 gw25295 120 6423 gw25295 /* 121 6423 gw25295 * zvol specific flags 122 6423 gw25295 */ 123 6423 gw25295 #define ZVOL_RDONLY 0x1 124 6423 gw25295 #define ZVOL_DUMPIFIED 0x2 125 7405 Eric #define ZVOL_EXCL 0x4 126 9303 Eric #define ZVOL_WCE 0x8 127 3063 perrin 128 3063 perrin /* 129 3063 perrin * zvol maximum transfer in one DMU tx. 130 3063 perrin */ 131 3063 perrin int zvol_maxphys = DMU_MAX_ACCESS/2; 132 3063 perrin 133 11022 Tom extern int zfs_set_prop_nvlist(const char *, zprop_source_t, 134 11022 Tom nvlist_t *, nvlist_t **); 135 10588 Eric static int zvol_remove_zv(zvol_state_t *); 136 3638 billm static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio); 137 6423 gw25295 static int zvol_dumpify(zvol_state_t *zv); 138 6423 gw25295 static int zvol_dump_fini(zvol_state_t *zv); 139 6423 gw25295 static int zvol_dump_init(zvol_state_t *zv, boolean_t resize); 140 789 ahrens 141 789 ahrens static void 142 10588 Eric zvol_size_changed(uint64_t volsize, major_t maj, minor_t min) 143 789 ahrens { 144 10588 Eric dev_t dev = makedevice(maj, min); 145 789 ahrens 146 789 ahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip, 147 10588 Eric "Size", volsize) == DDI_SUCCESS); 148 789 ahrens VERIFY(ddi_prop_update_int64(dev, zfs_dip, 149 10588 Eric "Nblocks", lbtodb(volsize)) == DDI_SUCCESS); 150 6423 gw25295 151 6423 gw25295 /* Notify specfs to invalidate the cached size */ 152 6423 gw25295 spec_size_invalidate(dev, VBLK); 153 6423 gw25295 spec_size_invalidate(dev, VCHR); 154 789 ahrens } 155 789 ahrens 156 789 ahrens int 157 2676 eschrock zvol_check_volsize(uint64_t volsize, uint64_t blocksize) 158 789 ahrens { 159 2676 eschrock if (volsize == 0) 160 789 ahrens return (EINVAL); 161 789 ahrens 162 2676 eschrock if (volsize % blocksize != 0) 163 1133 eschrock return (EINVAL); 164 1133 eschrock 165 789 ahrens #ifdef _ILP32 166 2676 eschrock if (volsize - 1 > SPEC_MAXOFFSET_T) 167 789 ahrens return (EOVERFLOW); 168 789 ahrens #endif 169 789 ahrens return (0); 170 789 ahrens } 171 789 ahrens 172 789 ahrens int 173 2676 eschrock zvol_check_volblocksize(uint64_t volblocksize) 174 789 ahrens { 175 2676 eschrock if (volblocksize < SPA_MINBLOCKSIZE || 176 2676 eschrock volblocksize > SPA_MAXBLOCKSIZE || 177 2676 eschrock !ISP2(volblocksize)) 178 789 ahrens return (EDOM); 179 789 ahrens 180 789 ahrens return (0); 181 789 ahrens } 182 789 ahrens 183 789 ahrens int 184 2885 ahrens zvol_get_stats(objset_t *os, nvlist_t *nv) 185 789 ahrens { 186 789 ahrens int error; 187 789 ahrens dmu_object_info_t doi; 188 2885 ahrens uint64_t val; 189 789 ahrens 190 2885 ahrens error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); 191 789 ahrens if (error) 192 789 ahrens return (error); 193 789 ahrens 194 2885 ahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); 195 2885 ahrens 196 789 ahrens error = dmu_object_info(os, ZVOL_OBJ, &doi); 197 789 ahrens 198 2885 ahrens if (error == 0) { 199 2885 ahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, 200 2885 ahrens doi.doi_data_block_size); 201 2885 ahrens } 202 789 ahrens 203 789 ahrens return (error); 204 789 ahrens } 205 789 ahrens 206 789 ahrens /* 207 789 ahrens * Find a free minor number. 208 789 ahrens */ 209 789 ahrens static minor_t 210 789 ahrens zvol_minor_alloc(void) 211 789 ahrens { 212 789 ahrens minor_t minor; 213 789 ahrens 214 789 ahrens ASSERT(MUTEX_HELD(&zvol_state_lock)); 215 789 ahrens 216 789 ahrens for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) 217 789 ahrens if (ddi_get_soft_state(zvol_state, minor) == NULL) 218 789 ahrens return (minor); 219 789 ahrens 220 789 ahrens return (0); 221 789 ahrens } 222 789 ahrens 223 789 ahrens static zvol_state_t * 224 2676 eschrock zvol_minor_lookup(const char *name) 225 789 ahrens { 226 789 ahrens minor_t minor; 227 789 ahrens zvol_state_t *zv; 228 789 ahrens 229 789 ahrens ASSERT(MUTEX_HELD(&zvol_state_lock)); 230 789 ahrens 231 789 ahrens for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { 232 789 ahrens zv = ddi_get_soft_state(zvol_state, minor); 233 789 ahrens if (zv == NULL) 234 789 ahrens continue; 235 789 ahrens if (strcmp(zv->zv_name, name) == 0) 236 789 ahrens break; 237 789 ahrens } 238 789 ahrens 239 789 ahrens return (zv); 240 789 ahrens } 241 789 ahrens 242 6423 gw25295 /* extent mapping arg */ 243 6423 gw25295 struct maparg { 244 7837 Matthew zvol_state_t *ma_zv; 245 7837 Matthew uint64_t ma_blks; 246 6423 gw25295 }; 247 6423 gw25295 248 6423 gw25295 /*ARGSUSED*/ 249 6423 gw25295 static int 250 10922 Jeff zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 251 10922 Jeff const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 252 6423 gw25295 { 253 7837 Matthew struct maparg *ma = arg; 254 7837 Matthew zvol_extent_t *ze; 255 7837 Matthew int bs = ma->ma_zv->zv_volblocksize; 256 6423 gw25295 257 7837 Matthew if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0) 258 6423 gw25295 return (0); 259 6423 gw25295 260 7837 Matthew VERIFY3U(ma->ma_blks, ==, zb->zb_blkid); 261 7837 Matthew ma->ma_blks++; 262 7837 Matthew 263 6423 gw25295 /* Abort immediately if we have encountered gang blocks */ 264 7837 Matthew if (BP_IS_GANG(bp)) 265 7837 Matthew return (EFRAGS); 266 6423 gw25295 267 7837 Matthew /* 268 7837 Matthew * See if the block is at the end of the previous extent. 269 7837 Matthew */ 270 7837 Matthew ze = list_tail(&ma->ma_zv->zv_extents); 271 7837 Matthew if (ze && 272 7837 Matthew DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) && 273 7837 Matthew DVA_GET_OFFSET(BP_IDENTITY(bp)) == 274 7837 Matthew DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) { 275 7837 Matthew ze->ze_nblks++; 276 6423 gw25295 return (0); 277 6423 gw25295 } 278 6423 gw25295 279 7837 Matthew dprintf_bp(bp, "%s", "next blkptr:"); 280 7837 Matthew 281 7837 Matthew /* start a new extent */ 282 7837 Matthew ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP); 283 7837 Matthew ze->ze_dva = bp->blk_dva[0]; /* structure assignment */ 284 7837 Matthew ze->ze_nblks = 1; 285 7837 Matthew list_insert_tail(&ma->ma_zv->zv_extents, ze); 286 7837 Matthew return (0); 287 7837 Matthew } 288 7837 Matthew 289 7837 Matthew static void 290 7837 Matthew zvol_free_extents(zvol_state_t *zv) 291 7837 Matthew { 292 7837 Matthew zvol_extent_t *ze; 293 7837 Matthew 294 7837 Matthew while (ze = list_head(&zv->zv_extents)) { 295 7837 Matthew list_remove(&zv->zv_extents, ze); 296 7837 Matthew kmem_free(ze, sizeof (zvol_extent_t)); 297 7837 Matthew } 298 7837 Matthew } 299 7837 Matthew 300 7837 Matthew static int 301 7837 Matthew zvol_get_lbas(zvol_state_t *zv) 302 7837 Matthew { 303 7837 Matthew struct maparg ma; 304 7837 Matthew int err; 305 7837 Matthew 306 7837 Matthew ma.ma_zv = zv; 307 7837 Matthew ma.ma_blks = 0; 308 7837 Matthew zvol_free_extents(zv); 309 7837 Matthew 310 7837 Matthew err = traverse_dataset(dmu_objset_ds(zv->zv_objset), 0, 311 7837 Matthew TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma); 312 7837 Matthew if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) { 313 7837 Matthew zvol_free_extents(zv); 314 7837 Matthew return (err ? err : EIO); 315 6423 gw25295 } 316 6423 gw25295 317 6423 gw25295 return (0); 318 6423 gw25295 } 319 6423 gw25295 320 4543 marks /* ARGSUSED */ 321 789 ahrens void 322 4543 marks zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) 323 789 ahrens { 324 5331 amw zfs_creat_t *zct = arg; 325 5331 amw nvlist_t *nvprops = zct->zct_props; 326 789 ahrens int error; 327 2676 eschrock uint64_t volblocksize, volsize; 328 789 ahrens 329 4543 marks VERIFY(nvlist_lookup_uint64(nvprops, 330 2676 eschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); 331 4543 marks if (nvlist_lookup_uint64(nvprops, 332 2676 eschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) 333 2676 eschrock volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); 334 2676 eschrock 335 2676 eschrock /* 336 6423 gw25295 * These properties must be removed from the list so the generic 337 2676 eschrock * property setting step won't apply to them. 338 2676 eschrock */ 339 4543 marks VERIFY(nvlist_remove_all(nvprops, 340 2676 eschrock zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); 341 4543 marks (void) nvlist_remove_all(nvprops, 342 2676 eschrock zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); 343 2676 eschrock 344 2676 eschrock error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, 345 789 ahrens DMU_OT_NONE, 0, tx); 346 789 ahrens ASSERT(error == 0); 347 789 ahrens 348 789 ahrens error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, 349 789 ahrens DMU_OT_NONE, 0, tx); 350 789 ahrens ASSERT(error == 0); 351 789 ahrens 352 2676 eschrock error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); 353 789 ahrens ASSERT(error == 0); 354 789 ahrens } 355 1141 perrin 356 1141 perrin /* 357 1141 perrin * Replay a TX_WRITE ZIL transaction that didn't get committed 358 1141 perrin * after a system failure 359 1141 perrin */ 360 1141 perrin static int 361 1141 perrin zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) 362 1141 perrin { 363 1141 perrin objset_t *os = zv->zv_objset; 364 1141 perrin char *data = (char *)(lr + 1); /* data follows lr_write_t */ 365 10922 Jeff uint64_t offset, length; 366 1141 perrin dmu_tx_t *tx; 367 1141 perrin int error; 368 1141 perrin 369 1141 perrin if (byteswap) 370 1141 perrin byteswap_uint64_array(lr, sizeof (*lr)); 371 1141 perrin 372 10922 Jeff offset = lr->lr_offset; 373 10922 Jeff length = lr->lr_length; 374 10922 Jeff 375 10922 Jeff /* If it's a dmu_sync() block, write the whole block */ 376 10922 Jeff if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { 377 10922 Jeff uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr); 378 10922 Jeff if (length < blocksize) { 379 10922 Jeff offset -= offset % blocksize; 380 10922 Jeff length = blocksize; 381 10922 Jeff } 382 10922 Jeff } 383 10800 Neil 384 1141 perrin tx = dmu_tx_create(os); 385 10922 Jeff dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length); 386 8227 Neil error = dmu_tx_assign(tx, TXG_WAIT); 387 1141 perrin if (error) { 388 1141 perrin dmu_tx_abort(tx); 389 1141 perrin } else { 390 10922 Jeff dmu_write(os, ZVOL_OBJ, offset, length, data, tx); 391 1141 perrin dmu_tx_commit(tx); 392 1141 perrin } 393 1141 perrin 394 1141 perrin return (error); 395 1141 perrin } 396 1141 perrin 397 1141 perrin /* ARGSUSED */ 398 1141 perrin static int 399 1141 perrin zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) 400 1141 perrin { 401 1141 perrin return (ENOTSUP); 402 1141 perrin } 403 1141 perrin 404 1141 perrin /* 405 1141 perrin * Callback vectors for replaying records. 406 1141 perrin * Only TX_WRITE is needed for zvol. 407 1141 perrin */ 408 1141 perrin zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { 409 1141 perrin zvol_replay_err, /* 0 no such transaction type */ 410 1141 perrin zvol_replay_err, /* TX_CREATE */ 411 1141 perrin zvol_replay_err, /* TX_MKDIR */ 412 1141 perrin zvol_replay_err, /* TX_MKXATTR */ 413 1141 perrin zvol_replay_err, /* TX_SYMLINK */ 414 1141 perrin zvol_replay_err, /* TX_REMOVE */ 415 1141 perrin zvol_replay_err, /* TX_RMDIR */ 416 1141 perrin zvol_replay_err, /* TX_LINK */ 417 1141 perrin zvol_replay_err, /* TX_RENAME */ 418 1141 perrin zvol_replay_write, /* TX_WRITE */ 419 1141 perrin zvol_replay_err, /* TX_TRUNCATE */ 420 1141 perrin zvol_replay_err, /* TX_SETATTR */ 421 1141 perrin zvol_replay_err, /* TX_ACL */ 422 10800 Neil zvol_replay_err, /* TX_CREATE_ACL */ 423 10800 Neil zvol_replay_err, /* TX_CREATE_ATTR */ 424 10800 Neil zvol_replay_err, /* TX_CREATE_ACL_ATTR */ 425 10800 Neil zvol_replay_err, /* TX_MKDIR_ACL */ 426 10800 Neil zvol_replay_err, /* TX_MKDIR_ATTR */ 427 10800 Neil zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ 428 10800 Neil zvol_replay_err, /* TX_WRITE2 */ 429 1141 perrin }; 430 6423 gw25295 431 10588 Eric int 432 10588 Eric zvol_name2minor(const char *name, minor_t *minor) 433 10588 Eric { 434 10588 Eric zvol_state_t *zv; 435 10588 Eric 436 10588 Eric mutex_enter(&zvol_state_lock); 437 10588 Eric zv = zvol_minor_lookup(name); 438 10588 Eric if (minor && zv) 439 10588 Eric *minor = zv->zv_minor; 440 10588 Eric mutex_exit(&zvol_state_lock); 441 10588 Eric return (zv ? 0 : -1); 442 10588 Eric } 443 10588 Eric 444 6423 gw25295 /* 445 6423 gw25295 * Create a minor node (plus a whole lot more) for the specified volume. 446 789 ahrens */ 447 789 ahrens int 448 10588 Eric zvol_create_minor(const char *name) 449 789 ahrens { 450 789 ahrens zvol_state_t *zv; 451 789 ahrens objset_t *os; 452 3063 perrin dmu_object_info_t doi; 453 789 ahrens minor_t minor = 0; 454 789 ahrens char chrbuf[30], blkbuf[30]; 455 789 ahrens int error; 456 789 ahrens 457 789 ahrens mutex_enter(&zvol_state_lock); 458 789 ahrens 459 789 ahrens if ((zv = zvol_minor_lookup(name)) != NULL) { 460 789 ahrens mutex_exit(&zvol_state_lock); 461 789 ahrens return (EEXIST); 462 789 ahrens } 463 789 ahrens 464 10298 Matthew /* lie and say we're read-only */ 465 10298 Matthew error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); 466 789 ahrens 467 789 ahrens if (error) { 468 789 ahrens mutex_exit(&zvol_state_lock); 469 789 ahrens return (error); 470 789 ahrens } 471 789 ahrens 472 10588 Eric if ((minor = zvol_minor_alloc()) == 0) { 473 10298 Matthew dmu_objset_disown(os, zvol_tag); 474 789 ahrens mutex_exit(&zvol_state_lock); 475 789 ahrens return (ENXIO); 476 789 ahrens } 477 789 ahrens 478 789 ahrens if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { 479 10298 Matthew dmu_objset_disown(os, zvol_tag); 480 789 ahrens mutex_exit(&zvol_state_lock); 481 789 ahrens return (EAGAIN); 482 789 ahrens } 483 2676 eschrock (void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, 484 2676 eschrock (char *)name); 485 789 ahrens 486 10588 Eric (void) snprintf(chrbuf, sizeof (chrbuf), "%u,raw", minor); 487 789 ahrens 488 789 ahrens if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, 489 789 ahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 490 789 ahrens ddi_soft_state_free(zvol_state, minor); 491 10298 Matthew dmu_objset_disown(os, zvol_tag); 492 789 ahrens mutex_exit(&zvol_state_lock); 493 789 ahrens return (EAGAIN); 494 789 ahrens } 495 789 ahrens 496 10588 Eric (void) snprintf(blkbuf, sizeof (blkbuf), "%u", minor); 497 789 ahrens 498 789 ahrens if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK, 499 789 ahrens minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 500 789 ahrens ddi_remove_minor_node(zfs_dip, chrbuf); 501 789 ahrens ddi_soft_state_free(zvol_state, minor); 502 10298 Matthew dmu_objset_disown(os, zvol_tag); 503 789 ahrens mutex_exit(&zvol_state_lock); 504 789 ahrens return (EAGAIN); 505 789 ahrens } 506 789 ahrens 507 789 ahrens zv = ddi_get_soft_state(zvol_state, minor); 508 789 ahrens 509 10588 Eric (void) strlcpy(zv->zv_name, name, MAXPATHLEN); 510 789 ahrens zv->zv_min_bs = DEV_BSHIFT; 511 789 ahrens zv->zv_minor = minor; 512 789 ahrens zv->zv_objset = os; 513 10588 Eric if (dmu_objset_is_snapshot(os)) 514 10588 Eric zv->zv_flags |= ZVOL_RDONLY; 515 3755 perrin mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); 516 3755 perrin avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, 517 3755 perrin sizeof (rl_t), offsetof(rl_t, r_node)); 518 7837 Matthew list_create(&zv->zv_extents, sizeof (zvol_extent_t), 519 7837 Matthew offsetof(zvol_extent_t, ze_node)); 520 3063 perrin /* get and cache the blocksize */ 521 3063 perrin error = dmu_object_info(os, ZVOL_OBJ, &doi); 522 3063 perrin ASSERT(error == 0); 523 3063 perrin zv->zv_volblocksize = doi.doi_data_block_size; 524 1861 perrin 525 8227 Neil zil_replay(os, zv, zvol_replay_vector); 526 10588 Eric dmu_objset_disown(os, zvol_tag); 527 10588 Eric zv->zv_objset = NULL; 528 789 ahrens 529 789 ahrens zvol_minors++; 530 789 ahrens 531 789 ahrens mutex_exit(&zvol_state_lock); 532 789 ahrens 533 789 ahrens return (0); 534 789 ahrens } 535 789 ahrens 536 789 ahrens /* 537 789 ahrens * Remove minor node for the specified volume. 538 789 ahrens */ 539 10588 Eric static int 540 10588 Eric zvol_remove_zv(zvol_state_t *zv) 541 789 ahrens { 542 10588 Eric char nmbuf[20]; 543 789 ahrens 544 10588 Eric ASSERT(MUTEX_HELD(&zvol_state_lock)); 545 10588 Eric if (zv->zv_total_opens != 0) 546 10588 Eric return (EBUSY); 547 789 ahrens 548 10588 Eric (void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", zv->zv_minor); 549 10588 Eric ddi_remove_minor_node(zfs_dip, nmbuf); 550 789 ahrens 551 10588 Eric (void) snprintf(nmbuf, sizeof (nmbuf), "%u", zv->zv_minor); 552 10588 Eric ddi_remove_minor_node(zfs_dip, nmbuf); 553 789 ahrens 554 3755 perrin avl_destroy(&zv->zv_znode.z_range_avl); 555 3755 perrin mutex_destroy(&zv->zv_znode.z_range_lock); 556 789 ahrens 557 789 ahrens ddi_soft_state_free(zvol_state, zv->zv_minor); 558 789 ahrens 559 789 ahrens zvol_minors--; 560 10588 Eric return (0); 561 10588 Eric } 562 789 ahrens 563 10588 Eric int 564 10588 Eric zvol_remove_minor(const char *name) 565 10588 Eric { 566 10588 Eric zvol_state_t *zv; 567 10588 Eric int rc; 568 10588 Eric 569 10588 Eric mutex_enter(&zvol_state_lock); 570 10588 Eric if ((zv = zvol_minor_lookup(name)) == NULL) { 571 10588 Eric mutex_exit(&zvol_state_lock); 572 10588 Eric return (ENXIO); 573 10588 Eric } 574 10588 Eric rc = zvol_remove_zv(zv); 575 789 ahrens mutex_exit(&zvol_state_lock); 576 10588 Eric return (rc); 577 10588 Eric } 578 789 ahrens 579 10588 Eric int 580 10588 Eric zvol_first_open(zvol_state_t *zv) 581 10588 Eric { 582 10588 Eric objset_t *os; 583 10588 Eric uint64_t volsize; 584 10588 Eric int error; 585 10588 Eric uint64_t readonly; 586 10588 Eric 587 10588 Eric /* lie and say we're read-only */ 588 10588 Eric error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, 589 10588 Eric zvol_tag, &os); 590 10588 Eric if (error) 591 10588 Eric return (error); 592 10588 Eric 593 10588 Eric error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); 594 10588 Eric if (error) { 595 10588 Eric ASSERT(error == 0); 596 10588 Eric dmu_objset_disown(os, zvol_tag); 597 10588 Eric return (error); 598 10588 Eric } 599 10588 Eric zv->zv_objset = os; 600 10588 Eric zv->zv_volsize = volsize; 601 10588 Eric zv->zv_zilog = zil_open(os, zvol_get_data); 602 10588 Eric zvol_size_changed(zv->zv_volsize, ddi_driver_major(zfs_dip), 603 10588 Eric zv->zv_minor); 604 10588 Eric 605 10588 Eric VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &readonly, 606 10588 Eric NULL) == 0); 607 10588 Eric if (readonly || dmu_objset_is_snapshot(os)) 608 10588 Eric zv->zv_flags |= ZVOL_RDONLY; 609 10588 Eric else 610 10588 Eric zv->zv_flags &= ~ZVOL_RDONLY; 611 10588 Eric return (error); 612 10588 Eric } 613 10588 Eric 614 10588 Eric void 615 10588 Eric zvol_last_close(zvol_state_t *zv) 616 10588 Eric { 617 10588 Eric zil_close(zv->zv_zilog); 618 10588 Eric zv->zv_zilog = NULL; 619 10588 Eric dmu_objset_disown(zv->zv_objset, zvol_tag); 620 10588 Eric zv->zv_objset = NULL; 621 789 ahrens } 622 789 ahrens 623 6423 gw25295 int 624 6423 gw25295 zvol_prealloc(zvol_state_t *zv) 625 6423 gw25295 { 626 6423 gw25295 objset_t *os = zv->zv_objset; 627 6423 gw25295 dmu_tx_t *tx; 628 6423 gw25295 uint64_t refd, avail, usedobjs, availobjs; 629 6423 gw25295 uint64_t resid = zv->zv_volsize; 630 6423 gw25295 uint64_t off = 0; 631 6423 gw25295 632 6423 gw25295 /* Check the space usage before attempting to allocate the space */ 633 6423 gw25295 dmu_objset_space(os, &refd, &avail, &usedobjs, &availobjs); 634 6423 gw25295 if (avail < zv->zv_volsize) 635 6423 gw25295 return (ENOSPC); 636 6423 gw25295 637 6423 gw25295 /* Free old extents if they exist */ 638 6423 gw25295 zvol_free_extents(zv); 639 6423 gw25295 640 6423 gw25295 while (resid != 0) { 641 6423 gw25295 int error; 642 6423 gw25295 uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE); 643 6423 gw25295 644 6423 gw25295 tx = dmu_tx_create(os); 645 6423 gw25295 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 646 6423 gw25295 error = dmu_tx_assign(tx, TXG_WAIT); 647 6423 gw25295 if (error) { 648 6423 gw25295 dmu_tx_abort(tx); 649 6992 maybee (void) dmu_free_long_range(os, ZVOL_OBJ, 0, off); 650 6423 gw25295 return (error); 651 6423 gw25295 } 652 7872 Tim dmu_prealloc(os, ZVOL_OBJ, off, bytes, tx); 653 6423 gw25295 dmu_tx_commit(tx); 654 6423 gw25295 off += bytes; 655 6423 gw25295 resid -= bytes; 656 6423 gw25295 } 657 6423 gw25295 txg_wait_synced(dmu_objset_pool(os), 0); 658 6423 gw25295 659 6423 gw25295 return (0); 660 6423 gw25295 } 661 6423 gw25295 662 6423 gw25295 int 663 10588 Eric zvol_update_volsize(objset_t *os, uint64_t volsize) 664 6423 gw25295 { 665 6423 gw25295 dmu_tx_t *tx; 666 6423 gw25295 int error; 667 6423 gw25295 668 6423 gw25295 ASSERT(MUTEX_HELD(&zvol_state_lock)); 669 6423 gw25295 670 10588 Eric tx = dmu_tx_create(os); 671 6423 gw25295 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 672 6423 gw25295 error = dmu_tx_assign(tx, TXG_WAIT); 673 6423 gw25295 if (error) { 674 6423 gw25295 dmu_tx_abort(tx); 675 6423 gw25295 return (error); 676 6423 gw25295 } 677 6423 gw25295 678 10588 Eric error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, 679 6423 gw25295 &volsize, tx); 680 6423 gw25295 dmu_tx_commit(tx); 681 6423 gw25295 682 6423 gw25295 if (error == 0) 683 10588 Eric error = dmu_free_long_range(os, 684 6992 maybee ZVOL_OBJ, volsize, DMU_OBJECT_END); 685 10588 Eric return (error); 686 10588 Eric } 687 6423 gw25295 688 10588 Eric void 689 10588 Eric zvol_remove_minors(const char *name) 690 10588 Eric { 691 10588 Eric zvol_state_t *zv; 692 10588 Eric char *namebuf; 693 10588 Eric minor_t minor; 694 10588 Eric 695 10588 Eric namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP); 696 10588 Eric (void) strncpy(namebuf, name, strlen(name)); 697 10588 Eric (void) strcat(namebuf, "/"); 698 10588 Eric mutex_enter(&zvol_state_lock); 699 10588 Eric for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) { 700 10588 Eric 701 10588 Eric zv = ddi_get_soft_state(zvol_state, minor); 702 10588 Eric if (zv == NULL) 703 10588 Eric continue; 704 10588 Eric if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0) 705 10588 Eric (void) zvol_remove_zv(zv); 706 6423 gw25295 } 707 10588 Eric kmem_free(namebuf, strlen(name) + 2); 708 10588 Eric 709 10588 Eric mutex_exit(&zvol_state_lock); 710 6423 gw25295 } 711 6423 gw25295 712 789 ahrens int 713 4787 ahrens zvol_set_volsize(const char *name, major_t maj, uint64_t volsize) 714 789 ahrens { 715 10588 Eric zvol_state_t *zv = NULL; 716 10588 Eric objset_t *os; 717 789 ahrens int error; 718 1133 eschrock dmu_object_info_t doi; 719 6423 gw25295 uint64_t old_volsize = 0ULL; 720 10588 Eric uint64_t readonly; 721 789 ahrens 722 789 ahrens mutex_enter(&zvol_state_lock); 723 10588 Eric zv = zvol_minor_lookup(name); 724 10588 Eric if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { 725 10588 Eric mutex_exit(&zvol_state_lock); 726 10588 Eric return (error); 727 10588 Eric } 728 789 ahrens 729 10588 Eric if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || 730 2676 eschrock (error = zvol_check_volsize(volsize, 731 7265 ahrens doi.doi_data_block_size)) != 0) 732 7265 ahrens goto out; 733 789 ahrens 734 10588 Eric VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, 735 10588 Eric NULL) == 0); 736 10588 Eric if (readonly) { 737 7265 ahrens error = EROFS; 738 7265 ahrens goto out; 739 789 ahrens } 740 789 ahrens 741 10588 Eric error = zvol_update_volsize(os, volsize); 742 6423 gw25295 /* 743 6423 gw25295 * Reinitialize the dump area to the new size. If we 744 10588 Eric * failed to resize the dump area then restore it back to 745 10588 Eric * its original size. 746 6423 gw25295 */ 747 10588 Eric if (zv && error == 0) { 748 10588 Eric if (zv->zv_flags & ZVOL_DUMPIFIED) { 749 10588 Eric old_volsize = zv->zv_volsize; 750 10588 Eric zv->zv_volsize = volsize; 751 10588 Eric if ((error = zvol_dumpify(zv)) != 0 || 752 10588 Eric (error = dumpvp_resize()) != 0) { 753 10588 Eric (void) zvol_update_volsize(os, old_volsize); 754 10588 Eric zv->zv_volsize = old_volsize; 755 10588 Eric error = zvol_dumpify(zv); 756 10588 Eric } 757 10588 Eric } 758 10588 Eric if (error == 0) { 759 10588 Eric zv->zv_volsize = volsize; 760 10588 Eric zvol_size_changed(volsize, maj, zv->zv_minor); 761 6423 gw25295 } 762 9816 George } 763 9816 George 764 9816 George /* 765 9816 George * Generate a LUN expansion event. 766 9816 George */ 767 10588 Eric if (zv && error == 0) { 768 9816 George sysevent_id_t eid; 769 9816 George nvlist_t *attr; 770 9816 George char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 771 9816 George 772 10588 Eric (void) snprintf(physpath, MAXPATHLEN, "%s%u", ZVOL_PSEUDO_DEV, 773 9816 George zv->zv_minor); 774 9816 George 775 9816 George VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); 776 9816 George VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); 777 9816 George 778 9816 George (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, 779 9816 George ESC_DEV_DLE, attr, &eid, DDI_SLEEP); 780 9816 George 781 9816 George nvlist_free(attr); 782 9816 George kmem_free(physpath, MAXPATHLEN); 783 789 ahrens } 784 7265 ahrens 785 7265 ahrens out: 786 10588 Eric dmu_objset_rele(os, FTAG); 787 789 ahrens 788 789 ahrens mutex_exit(&zvol_state_lock); 789 789 ahrens 790 789 ahrens return (error); 791 789 ahrens } 792 789 ahrens 793 789 ahrens /*ARGSUSED*/ 794 789 ahrens int 795 789 ahrens zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) 796 789 ahrens { 797 789 ahrens minor_t minor = getminor(*devp); 798 789 ahrens zvol_state_t *zv; 799 10588 Eric int err = 0; 800 789 ahrens 801 789 ahrens if (minor == 0) /* This is the control device */ 802 789 ahrens return (0); 803 789 ahrens 804 789 ahrens mutex_enter(&zvol_state_lock); 805 789 ahrens 806 789 ahrens zv = ddi_get_soft_state(zvol_state, minor); 807 789 ahrens if (zv == NULL) { 808 789 ahrens mutex_exit(&zvol_state_lock); 809 789 ahrens return (ENXIO); 810 789 ahrens } 811 789 ahrens 812 10588 Eric if (zv->zv_total_opens == 0) 813 10588 Eric err = zvol_first_open(zv); 814 10588 Eric if (err) { 815 789 ahrens mutex_exit(&zvol_state_lock); 816 10588 Eric return (err); 817 10588 Eric } 818 10588 Eric if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { 819 10588 Eric err = EROFS; 820 10588 Eric goto out; 821 789 ahrens } 822 7405 Eric if (zv->zv_flags & ZVOL_EXCL) { 823 10588 Eric err = EBUSY; 824 10588 Eric goto out; 825 7405 Eric } 826 7405 Eric if (flag & FEXCL) { 827 7405 Eric if (zv->zv_total_opens != 0) { 828 10588 Eric err = EBUSY; 829 10588 Eric goto out; 830 7405 Eric } 831 7405 Eric zv->zv_flags |= ZVOL_EXCL; 832 7405 Eric } 833 789 ahrens 834 789 ahrens if (zv->zv_open_count[otyp] == 0 || otyp == OTYP_LYR) { 835 789 ahrens zv->zv_open_count[otyp]++; 836 789 ahrens zv->zv_total_opens++; 837 789 ahrens } 838 789 ahrens mutex_exit(&zvol_state_lock); 839 789 ahrens 840 10588 Eric return (err); 841 10588 Eric out: 842 10588 Eric if (zv->zv_total_opens == 0) 843 10588 Eric zvol_last_close(zv); 844 10588 Eric mutex_exit(&zvol_state_lock); 845 10588 Eric return (err); 846 789 ahrens } 847 789 ahrens 848 789 ahrens /*ARGSUSED*/ 849 789 ahrens int 850 789 ahrens zvol_close(dev_t dev, int flag, int otyp, cred_t *cr) 851 789 ahrens { 852 789 ahrens minor_t minor = getminor(dev); 853 789 ahrens zvol_state_t *zv; 854 10588 Eric int error = 0; 855 789 ahrens 856 789 ahrens if (minor == 0) /* This is the control device */ 857 789 ahrens return (0); 858 789 ahrens 859 789 ahrens mutex_enter(&zvol_state_lock); 860 789 ahrens 861 789 ahrens zv = ddi_get_soft_state(zvol_state, minor); 862 789 ahrens if (zv == NULL) { 863 789 ahrens mutex_exit(&zvol_state_lock); 864 789 ahrens return (ENXIO); 865 789 ahrens } 866 789 ahrens 867 7405 Eric if (zv->zv_flags & ZVOL_EXCL) { 868 7405 Eric ASSERT(zv->zv_total_opens == 1); 869 7405 Eric zv->zv_flags &= ~ZVOL_EXCL; 870 789 ahrens } 871 789 ahrens 872 789 ahrens /* 873 789 ahrens * If the open count is zero, this is a spurious close. 874 789 ahrens * That indicates a bug in the kernel / DDI framework. 875 789 ahrens */ 876 789 ahrens ASSERT(zv->zv_open_count[otyp] != 0); 877 789 ahrens ASSERT(zv->zv_total_opens != 0); 878 789 ahrens 879 789 ahrens /* 880 789 ahrens * You may get multiple opens, but only one close. 881 789 ahrens */ 882 789 ahrens zv->zv_open_count[otyp]--; 883 789 ahrens zv->zv_total_opens--; 884 789 ahrens 885 10588 Eric if (zv->zv_total_opens == 0) 886 10588 Eric zvol_last_close(zv); 887 10588 Eric 888 789 ahrens mutex_exit(&zvol_state_lock); 889 10588 Eric return (error); 890 789 ahrens } 891 789 ahrens 892 3638 billm static void 893 10922 Jeff zvol_get_done(zgd_t *zgd, int error) 894 3063 perrin { 895 10922 Jeff if (zgd->zgd_db) 896 10922 Jeff dmu_buf_rele(zgd->zgd_db, zgd); 897 3063 perrin 898 10922 Jeff zfs_range_unlock(zgd->zgd_rl); 899 10922 Jeff 900 10922 Jeff if (error == 0 && zgd->zgd_bp) 901 10922 Jeff zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 902 10922 Jeff 903 3063 perrin kmem_free(zgd, sizeof (zgd_t)); 904 3063 perrin } 905 3063 perrin 906 3063 perrin /* 907 3063 perrin * Get data to generate a TX_WRITE intent log record. 908 3063 perrin */ 909 3638 billm static int 910 3063 perrin zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 911 3063 perrin { 912 3063 perrin zvol_state_t *zv = arg; 913 3063 perrin objset_t *os = zv->zv_objset; 914 10922 Jeff uint64_t object = ZVOL_OBJ; 915 10922 Jeff uint64_t offset = lr->lr_offset; 916 10922 Jeff uint64_t size = lr->lr_length; /* length of user data */ 917 10922 Jeff blkptr_t *bp = &lr->lr_blkptr; 918 3063 perrin dmu_buf_t *db; 919 3063 perrin zgd_t *zgd; 920 3063 perrin int error; 921 3063 perrin 922 10922 Jeff ASSERT(zio != NULL); 923 10922 Jeff ASSERT(size != 0); 924 10922 Jeff 925 10922 Jeff zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 926 10922 Jeff zgd->zgd_zilog = zv->zv_zilog; 927 10922 Jeff zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); 928 3638 billm 929 3755 perrin /* 930 3755 perrin * Write records come in two flavors: immediate and indirect. 931 3755 perrin * For small writes it's cheaper to store the data with the 932 3755 perrin * log record (immediate); for large writes it's cheaper to 933 3755 perrin * sync the data and get a pointer to it (indirect) so that 934 3755 perrin * we don't have to write the data twice. 935 3755 perrin */ 936 10922 Jeff if (buf != NULL) { /* immediate write */ 937 10922 Jeff error = dmu_read(os, object, offset, size, buf, 938 10922 Jeff DMU_READ_NO_PREFETCH); 939 10922 Jeff } else { 940 10922 Jeff size = zv->zv_volblocksize; 941 10922 Jeff offset = P2ALIGN(offset, size); 942 10922 Jeff error = dmu_buf_hold(os, object, offset, zgd, &db); 943 10922 Jeff if (error == 0) { 944 10922 Jeff zgd->zgd_db = db; 945 10922 Jeff zgd->zgd_bp = bp; 946 3063 perrin 947 10922 Jeff ASSERT(db->db_offset == offset); 948 10922 Jeff ASSERT(db->db_size == size); 949 3063 perrin 950 10922 Jeff error = dmu_sync(zio, lr->lr_common.lrc_txg, 951 10922 Jeff zvol_get_done, zgd); 952 3755 perrin 953 10922 Jeff if (error == 0) 954 10922 Jeff return (0); 955 10922 Jeff } 956 10800 Neil } 957 10800 Neil 958 10922 Jeff zvol_get_done(zgd, error); 959 10922 Jeff 960 3063 perrin return (error); 961 3063 perrin } 962 3063 perrin 963 1861 perrin /* 964 1861 perrin * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. 965 1141 perrin * 966 1141 perrin * We store data in the log buffers if it's small enough. 967 3063 perrin * Otherwise we will later flush the data out via dmu_sync(). 968 1141 perrin */ 969 3063 perrin ssize_t zvol_immediate_write_sz = 32768; 970 1141 perrin 971 3638 billm static void 972 9401 Neil zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, 973 9401 Neil boolean_t sync) 974 1141 perrin { 975 3638 billm uint32_t blocksize = zv->zv_volblocksize; 976 8227 Neil zilog_t *zilog = zv->zv_zilog; 977 9401 Neil boolean_t slogging; 978 10310 Neil ssize_t immediate_write_sz; 979 9401 Neil 980 9401 Neil if (zil_disable) 981 9401 Neil return; 982 8227 Neil 983 10922 Jeff if (zil_replaying(zilog, tx)) 984 8227 Neil return; 985 1141 perrin 986 10310 Neil immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) 987 10310 Neil ? 0 : zvol_immediate_write_sz; 988 10310 Neil 989 10310 Neil slogging = spa_has_slogs(zilog->zl_spa) && 990 10310 Neil (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); 991 3638 billm 992 9401 Neil while (resid) { 993 9401 Neil itx_t *itx; 994 9401 Neil lr_write_t *lr; 995 9401 Neil ssize_t len; 996 9401 Neil itx_wr_state_t write_state; 997 9401 Neil 998 9401 Neil /* 999 9401 Neil * Unlike zfs_log_write() we can be called with 1000 9401 Neil * upto DMU_MAX_ACCESS/2 (5MB) writes. 1001 9401 Neil */ 1002 10310 Neil if (blocksize > immediate_write_sz && !slogging && 1003 9401 Neil resid >= blocksize && off % blocksize == 0) { 1004 9401 Neil write_state = WR_INDIRECT; /* uses dmu_sync */ 1005 9401 Neil len = blocksize; 1006 9401 Neil } else if (sync) { 1007 9401 Neil write_state = WR_COPIED; 1008 9401 Neil len = MIN(ZIL_MAX_LOG_DATA, resid); 1009 9401 Neil } else { 1010 9401 Neil write_state = WR_NEED_COPY; 1011 9401 Neil len = MIN(ZIL_MAX_LOG_DATA, resid); 1012 9401 Neil } 1013 9401 Neil 1014 9401 Neil itx = zil_itx_create(TX_WRITE, sizeof (*lr) + 1015 9401 Neil (write_state == WR_COPIED ? len : 0)); 1016 3638 billm lr = (lr_write_t *)&itx->itx_lr; 1017 9401 Neil if (write_state == WR_COPIED && dmu_read(zv->zv_objset, 1018 9512 Neil ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { 1019 10922 Jeff zil_itx_destroy(itx); 1020 9401 Neil itx = zil_itx_create(TX_WRITE, sizeof (*lr)); 1021 9401 Neil lr = (lr_write_t *)&itx->itx_lr; 1022 9401 Neil write_state = WR_NEED_COPY; 1023 9401 Neil } 1024 9401 Neil 1025 9401 Neil itx->itx_wr_state = write_state; 1026 9401 Neil if (write_state == WR_NEED_COPY) 1027 9401 Neil itx->itx_sod += len; 1028 3638 billm lr->lr_foid = ZVOL_OBJ; 1029 3638 billm lr->lr_offset = off; 1030 9401 Neil lr->lr_length = len; 1031 10922 Jeff lr->lr_blkoff = 0; 1032 3638 billm BP_ZERO(&lr->lr_blkptr); 1033 3638 billm 1034 9401 Neil itx->itx_private = zv; 1035 9401 Neil itx->itx_sync = sync; 1036 9401 Neil 1037 8227 Neil (void) zil_itx_assign(zilog, itx, tx); 1038 9401 Neil 1039 9401 Neil off += len; 1040 9401 Neil resid -= len; 1041 1141 perrin } 1042 1141 perrin } 1043 1141 perrin 1044 7837 Matthew static int 1045 7837 Matthew zvol_dumpio_vdev(vdev_t *vd, void *addr, uint64_t offset, uint64_t size, 1046 7837 Matthew boolean_t doread, boolean_t isdump) 1047 6423 gw25295 { 1048 6423 gw25295 vdev_disk_t *dvd; 1049 6423 gw25295 int c; 1050 6423 gw25295 int numerrors = 0; 1051 6423 gw25295 1052 6423 gw25295 for (c = 0; c < vd->vdev_children; c++) { 1053 9790 Lin ASSERT(vd->vdev_ops == &vdev_mirror_ops || 1054 9790 Lin vd->vdev_ops == &vdev_replacing_ops || 1055 9790 Lin vd->vdev_ops == &vdev_spare_ops); 1056 7837 Matthew int err = zvol_dumpio_vdev(vd->vdev_child[c], 1057 7837 Matthew addr, offset, size, doread, isdump); 1058 7837 Matthew if (err != 0) { 1059 6423 gw25295 numerrors++; 1060 7837 Matthew } else if (doread) { 1061 6423 gw25295 break; 1062 6423 gw25295 } 1063 6423 gw25295 } 1064 6423 gw25295 1065 6423 gw25295 if (!vd->vdev_ops->vdev_op_leaf) 1066 6423 gw25295 return (numerrors < vd->vdev_children ? 0 : EIO); 1067 6423 gw25295 1068 7903 Eric if (doread && !vdev_readable(vd)) 1069 7903 Eric return (EIO); 1070 7903 Eric else if (!doread && !vdev_writeable(vd)) 1071 6423 gw25295 return (EIO); 1072 6423 gw25295 1073 6423 gw25295 dvd = vd->vdev_tsd; 1074 6423 gw25295 ASSERT3P(dvd, !=, NULL); 1075 6423 gw25295 offset += VDEV_LABEL_START_SIZE; 1076 6423 gw25295 1077 6423 gw25295 if (ddi_in_panic() || isdump) { 1078 7837 Matthew ASSERT(!doread); 1079 7837 Matthew if (doread) 1080 6423 gw25295 return (EIO); 1081 6423 gw25295 return (ldi_dump(dvd->vd_lh, addr, lbtodb(offset), 1082 6423 gw25295 lbtodb(size))); 1083 6423 gw25295 } else { 1084 6423 gw25295 return (vdev_disk_physio(dvd->vd_lh, addr, size, offset, 1085 7837 Matthew doread ? B_READ : B_WRITE)); 1086 6423 gw25295 } 1087 6423 gw25295 } 1088 6423 gw25295 1089 7837 Matthew static int 1090 7837 Matthew zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size, 1091 7837 Matthew boolean_t doread, boolean_t isdump) 1092 6423 gw25295 { 1093 6423 gw25295 vdev_t *vd; 1094 6423 gw25295 int error; 1095 7837 Matthew zvol_extent_t *ze; 1096 6423 gw25295 spa_t *spa = dmu_objset_spa(zv->zv_objset); 1097 6423 gw25295 1098 7837 Matthew /* Must be sector aligned, and not stradle a block boundary. */ 1099 7837 Matthew if (P2PHASE(offset, DEV_BSIZE) || P2PHASE(size, DEV_BSIZE) || 1100 7837 Matthew P2BOUNDARY(offset, size, zv->zv_volblocksize)) { 1101 7837 Matthew return (EINVAL); 1102 7837 Matthew } 1103 6423 gw25295 ASSERT(size <= zv->zv_volblocksize); 1104 6423 gw25295 1105 7837 Matthew /* Locate the extent this belongs to */ 1106 7837 Matthew ze = list_head(&zv->zv_extents); 1107 7837 Matthew while (offset >= ze->ze_nblks * zv->zv_volblocksize) { 1108 7837 Matthew offset -= ze->ze_nblks * zv->zv_volblocksize; 1109 7837 Matthew ze = list_next(&zv->zv_extents, ze); 1110 7837 Matthew } 1111 7754 Jeff spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 1112 7837 Matthew vd = vdev_lookup_top(spa, DVA_GET_VDEV(&ze->ze_dva)); 1113 7837 Matthew offset += DVA_GET_OFFSET(&ze->ze_dva); 1114 7837 Matthew error = zvol_dumpio_vdev(vd, addr, offset, size, doread, isdump); 1115 7754 Jeff spa_config_exit(spa, SCL_STATE, FTAG); 1116 6423 gw25295 return (error); 1117 6423 gw25295 } 1118 6423 gw25295 1119 6423 gw25295 int 1120 789 ahrens zvol_strategy(buf_t *bp) 1121 789 ahrens { 1122 789 ahrens zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); 1123 789 ahrens uint64_t off, volsize; 1124 7837 Matthew size_t resid; 1125 789 ahrens char *addr; 1126 1141 perrin objset_t *os; 1127 3755 perrin rl_t *rl; 1128 789 ahrens int error = 0; 1129 7837 Matthew boolean_t doread = bp->b_flags & B_READ; 1130 7837 Matthew boolean_t is_dump = zv->zv_flags & ZVOL_DUMPIFIED; 1131 9401 Neil boolean_t sync; 1132 789 ahrens 1133 789 ahrens if (zv == NULL) { 1134 789 ahrens bioerror(bp, ENXIO); 1135 789 ahrens biodone(bp); 1136 789 ahrens return (0); 1137 789 ahrens } 1138 789 ahrens 1139 789 ahrens if (getminor(bp->b_edev) == 0) { 1140 789 ahrens bioerror(bp, EINVAL); 1141 789 ahrens biodone(bp); 1142 789 ahrens return (0); 1143 789 ahrens } 1144 789 ahrens 1145 10588 Eric if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) { 1146 789 ahrens bioerror(bp, EROFS); 1147 789 ahrens biodone(bp); 1148 789 ahrens return (0); 1149 789 ahrens } 1150 789 ahrens 1151 789 ahrens off = ldbtob(bp->b_blkno); 1152 789 ahrens volsize = zv->zv_volsize; 1153 789 ahrens 1154 1141 perrin os = zv->zv_objset; 1155 1141 perrin ASSERT(os != NULL); 1156 789 ahrens 1157 789 ahrens bp_mapin(bp); 1158 789 ahrens addr = bp->b_un.b_addr; 1159 789 ahrens resid = bp->b_bcount; 1160 789 ahrens 1161 7837 Matthew if (resid > 0 && (off < 0 || off >= volsize)) { 1162 7837 Matthew bioerror(bp, EIO); 1163 7837 Matthew biodone(bp); 1164 7837 Matthew return (0); 1165 7837 Matthew } 1166 7013 gw25295 1167 9401 Neil sync = !(bp->b_flags & B_ASYNC) && !doread && !is_dump && 1168 9401 Neil !(zv->zv_flags & ZVOL_WCE) && !zil_disable; 1169 9401 Neil 1170 1861 perrin /* 1171 1861 perrin * There must be no buffer changes when doing a dmu_sync() because 1172 1861 perrin * we can't change the data whilst calculating the checksum. 1173 1861 perrin */ 1174 3755 perrin rl = zfs_range_lock(&zv->zv_znode, off, resid, 1175 7837 Matthew doread ? RL_READER : RL_WRITER); 1176 6423 gw25295 1177 789 ahrens while (resid != 0 && off < volsize) { 1178 7837 Matthew size_t size = MIN(resid, zvol_maxphys); 1179 6423 gw25295 if (is_dump) { 1180 6423 gw25295 size = MIN(size, P2END(off, zv->zv_volblocksize) - off); 1181 7837 Matthew error = zvol_dumpio(zv, addr, off, size, 1182 7837 Matthew doread, B_FALSE); 1183 7837 Matthew } else if (doread) { 1184 9512 Neil error = dmu_read(os, ZVOL_OBJ, off, size, addr, 1185 9512 Neil DMU_READ_PREFETCH); 1186 789 ahrens } else { 1187 1141 perrin dmu_tx_t *tx = dmu_tx_create(os); 1188 789 ahrens dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); 1189 789 ahrens error = dmu_tx_assign(tx, TXG_WAIT); 1190 789 ahrens if (error) { 1191 789 ahrens dmu_tx_abort(tx); 1192 789 ahrens } else { 1193 1141 perrin dmu_write(os, ZVOL_OBJ, off, size, addr, tx); 1194 9401 Neil zvol_log_write(zv, tx, off, size, sync); 1195 789 ahrens dmu_tx_commit(tx); 1196 789 ahrens } 1197 789 ahrens } 1198 7294 perrin if (error) { 1199 7294 perrin /* convert checksum errors into IO errors */ 1200 7294 perrin if (error == ECKSUM) 1201 7294 perrin error = EIO; 1202 789 ahrens break; 1203 7294 perrin } 1204 789 ahrens off += size; 1205 789 ahrens addr += size; 1206 789 ahrens resid -= size; 1207 789 ahrens } 1208 3755 perrin zfs_range_unlock(rl); 1209 789 ahrens 1210 789 ahrens if ((bp->b_resid = resid) == bp->b_bcount) 1211 789 ahrens bioerror(bp, off > volsize ? EINVAL : error); 1212 789 ahrens 1213 9401 Neil if (sync) 1214 3638 billm zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); 1215 789 ahrens biodone(bp); 1216 1141 perrin 1217 789 ahrens return (0); 1218 3063 perrin } 1219 3063 perrin 1220 3063 perrin /* 1221 3063 perrin * Set the buffer count to the zvol maximum transfer. 1222 3063 perrin * Using our own routine instead of the default minphys() 1223 3063 perrin * means that for larger writes we write bigger buffers on X86 1224 3063 perrin * (128K instead of 56K) and flush the disk write cache less often 1225 3063 perrin * (every zvol_maxphys - currently 1MB) instead of minphys (currently 1226 3063 perrin * 56K on X86 and 128K on sparc). 1227 3063 perrin */ 1228 3063 perrin void 1229 3063 perrin zvol_minphys(struct buf *bp) 1230 3063 perrin { 1231 3063 perrin if (bp->b_bcount > zvol_maxphys) 1232 3063 perrin bp->b_bcount = zvol_maxphys; 1233 6423 gw25295 } 1234 6423 gw25295 1235 6423 gw25295 int 1236 6423 gw25295 zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks) 1237 6423 gw25295 { 1238 6423 gw25295 minor_t minor = getminor(dev); 1239 6423 gw25295 zvol_state_t *zv; 1240 6423 gw25295 int error = 0; 1241 6423 gw25295 uint64_t size; 1242 6423 gw25295 uint64_t boff; 1243 6423 gw25295 uint64_t resid; 1244 6423 gw25295 1245 6423 gw25295 if (minor == 0) /* This is the control device */ 1246 6423 gw25295 return (ENXIO); 1247 6423 gw25295 1248 6423 gw25295 zv = ddi_get_soft_state(zvol_state, minor); 1249 6423 gw25295 if (zv == NULL) 1250 6423 gw25295 return (ENXIO); 1251 6423 gw25295 1252 6423 gw25295 boff = ldbtob(blkno); 1253 6423 gw25295 resid = ldbtob(nblocks); 1254 7837 Matthew 1255 7837 Matthew VERIFY3U(boff + resid, <=, zv->zv_volsize); 1256 7837 Matthew 1257 6423 gw25295 while (resid) { 1258 6423 gw25295 size = MIN(resid, P2END(boff, zv->zv_volblocksize) - boff); 1259 7837 Matthew error = zvol_dumpio(zv, addr, boff, size, B_FALSE, B_TRUE); 1260 6423 gw25295 if (error) 1261 6423 gw25295 break; 1262 6423 gw25295 boff += size; 1263 6423 gw25295 addr += size; 1264 6423 gw25295 resid -= size; 1265 6423 gw25295 } 1266 6423 gw25295 1267 6423 gw25295 return (error); 1268 789 ahrens } 1269 789 ahrens 1270 789 ahrens /*ARGSUSED*/ 1271 789 ahrens int 1272 3638 billm zvol_read(dev_t dev, uio_t *uio, cred_t *cr) 1273 789 ahrens { 1274 4107 gw25295 minor_t minor = getminor(dev); 1275 4107 gw25295 zvol_state_t *zv; 1276 7013 gw25295 uint64_t volsize; 1277 3755 perrin rl_t *rl; 1278 3638 billm int error = 0; 1279 4107 gw25295 1280 4107 gw25295 if (minor == 0) /* This is the control device */ 1281 4107 gw25295 return (ENXIO); 1282 4107 gw25295 1283 4107 gw25295 zv = ddi_get_soft_state(zvol_state, minor); 1284 4107 gw25295 if (zv == NULL) 1285 4107 gw25295 return (ENXIO); 1286 3638 billm 1287 7013 gw25295 volsize = zv->zv_volsize; 1288 7013 gw25295 if (uio->uio_resid > 0 && 1289 7013 gw25295 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 1290 7013 gw25295 return (EIO); 1291 7837 Matthew 1292 7837 Matthew if (zv->zv_flags & ZVOL_DUMPIFIED) { 1293 7837 Matthew error = physio(zvol_strategy, NULL, dev, B_READ, 1294 7837 Matthew zvol_minphys, uio); 1295 7837 Matthew return (error); 1296 7837 Matthew } 1297 7013 gw25295 1298 3755 perrin rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1299 3755 perrin RL_READER); 1300 7013 gw25295 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1301 3638 billm uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1302 7013 gw25295 1303 7013 gw25295 /* don't read past the end */ 1304 7013 gw25295 if (bytes > volsize - uio->uio_loffset) 1305 7013 gw25295 bytes = volsize - uio->uio_loffset; 1306 3638 billm 1307 3638 billm error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes); 1308 7294 perrin if (error) { 1309 7294 perrin /* convert checksum errors into IO errors */ 1310 7294 perrin if (error == ECKSUM) 1311 7294 perrin error = EIO; 1312 3638 billm break; 1313 7294 perrin } 1314 3638 billm } 1315 3755 perrin zfs_range_unlock(rl); 1316 3638 billm return (error); 1317 789 ahrens } 1318 789 ahrens 1319 789 ahrens /*ARGSUSED*/ 1320 789 ahrens int 1321 3638 billm zvol_write(dev_t dev, uio_t *uio, cred_t *cr) 1322 789 ahrens { 1323 4107 gw25295 minor_t minor = getminor(dev); 1324 4107 gw25295 zvol_state_t *zv; 1325 7013 gw25295 uint64_t volsize; 1326 3755 perrin rl_t *rl; 1327 3638 billm int error = 0; 1328 9401 Neil boolean_t sync; 1329 4107 gw25295 1330 4107 gw25295 if (minor == 0) /* This is the control device */ 1331 4107 gw25295 return (ENXIO); 1332 4107 gw25295 1333 4107 gw25295 zv = ddi_get_soft_state(zvol_state, minor); 1334 4107 gw25295 if (zv == NULL) 1335 4107 gw25295 return (ENXIO); 1336 789 ahrens 1337 7013 gw25295 volsize = zv->zv_volsize; 1338 7013 gw25295 if (uio->uio_resid > 0 && 1339 7013 gw25295 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize)) 1340 7013 gw25295 return (EIO); 1341 7013 gw25295 1342 6423 gw25295 if (zv->zv_flags & ZVOL_DUMPIFIED) { 1343 6423 gw25295 error = physio(zvol_strategy, NULL, dev, B_WRITE, 1344 6423 gw25295 zvol_minphys, uio); 1345 6423 gw25295 return (error); 1346 6423 gw25295 } 1347 6423 gw25295 1348 9401 Neil sync = !(zv->zv_flags & ZVOL_WCE) && !zil_disable; 1349 9401 Neil 1350 3755 perrin rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid, 1351 3755 perrin RL_WRITER); 1352 7013 gw25295 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) { 1353 3638 billm uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1); 1354 3638 billm uint64_t off = uio->uio_loffset; 1355 7013 gw25295 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); 1356 789 ahrens 1357 7013 gw25295 if (bytes > volsize - off) /* don't write past the end */ 1358 7013 gw25295 bytes = volsize - off; 1359 7013 gw25295 1360 3638 billm dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes); 1361 3638 billm error = dmu_tx_assign(tx, TXG_WAIT); 1362 3638 billm if (error) { 1363 3638 billm dmu_tx_abort(tx); 1364 3638 billm break; 1365 3638 billm } 1366 3638 billm error = dmu_write_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes, tx); 1367 3638 billm if (error == 0) 1368 9401 Neil zvol_log_write(zv, tx, off, bytes, sync); 1369 3638 billm dmu_tx_commit(tx); 1370 3638 billm 1371 3638 billm if (error) 1372 3638 billm break; 1373 3638 billm } 1374 3755 perrin zfs_range_unlock(rl); 1375 9401 Neil if (sync) 1376 8524 Eric zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); 1377 3638 billm return (error); 1378 789 ahrens } 1379 789 ahrens 1380 7405 Eric int 1381 7405 Eric zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs) 1382 7405 Eric { 1383 7405 Eric struct uuid uuid = EFI_RESERVED; 1384 7405 Eric efi_gpe_t gpe = { 0 }; 1385 7405 Eric uint32_t crc; 1386 7405 Eric dk_efi_t efi; 1387 7405 Eric int length; 1388 7405 Eric char *ptr; 1389 7405 Eric 1390 7405 Eric if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag)) 1391 7405 Eric return (EFAULT); 1392 7405 Eric ptr = (char *)(uintptr_t)efi.dki_data_64; 1393 7405 Eric length = efi.dki_length; 1394 7405 Eric /* 1395 7405 Eric * Some clients may attempt to request a PMBR for the 1396 7405 Eric * zvol. Currently this interface will return EINVAL to 1397 7405 Eric * such requests. These requests could be supported by 1398 7405 Eric * adding a check for lba == 0 and consing up an appropriate 1399 7405 Eric * PMBR. 1400 7405 Eric */ 1401 7405 Eric if (efi.dki_lba < 1 || efi.dki_lba > 2 || length <= 0) 1402 7405 Eric return (EINVAL); 1403 7405 Eric 1404 7405 Eric gpe.efi_gpe_StartingLBA = LE_64(34ULL); 1405 7405 Eric gpe.efi_gpe_EndingLBA = LE_64((vs >> bs) - 1); 1406 7405 Eric UUID_LE_CONVERT(gpe.efi_gpe_PartitionTypeGUID, uuid); 1407 7405 Eric 1408 7405 Eric if (efi.dki_lba == 1) { 1409 7405 Eric efi_gpt_t gpt = { 0 }; 1410 7405 Eric 1411 7405 Eric gpt.efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1412 7405 Eric gpt.efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); 1413 7405 Eric gpt.efi_gpt_HeaderSize = LE_32(sizeof (gpt)); 1414 7405 Eric gpt.efi_gpt_MyLBA = LE_64(1ULL); 1415 7405 Eric gpt.efi_gpt_FirstUsableLBA = LE_64(34ULL); 1416 7405 Eric gpt.efi_gpt_LastUsableLBA = LE_64((vs >> bs) - 1); 1417 7405 Eric gpt.efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1418 7405 Eric gpt.efi_gpt_NumberOfPartitionEntries = LE_32(1); 1419 7405 Eric gpt.efi_gpt_SizeOfPartitionEntry = 1420 7405 Eric LE_32(sizeof (efi_gpe_t)); 1421 7405 Eric CRC32(crc, &gpe, sizeof (gpe), -1U, crc32_table); 1422 7405 Eric gpt.efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); 1423 7405 Eric CRC32(crc, &gpt, sizeof (gpt), -1U, crc32_table); 1424 7405 Eric gpt.efi_gpt_HeaderCRC32 = LE_32(~crc); 1425 7405 Eric if (ddi_copyout(&gpt, ptr, MIN(sizeof (gpt), length), 1426 7405 Eric flag)) 1427 7405 Eric return (EFAULT); 1428 7405 Eric ptr += sizeof (gpt); 1429 7405 Eric length -= sizeof (gpt); 1430 7405 Eric } 1431 7405 Eric if (length > 0 && ddi_copyout(&gpe, ptr, MIN(sizeof (gpe), 1432 7405 Eric length), flag)) 1433 7405 Eric return (EFAULT); 1434 7405 Eric return (0); 1435 7405 Eric } 1436 7405 Eric 1437 789 ahrens /* 1438 789 ahrens * Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I). 1439 789 ahrens */ 1440 789 ahrens /*ARGSUSED*/ 1441 789 ahrens int 1442 789 ahrens zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1443 789 ahrens { 1444 789 ahrens zvol_state_t *zv; 1445 3897 maybee struct dk_cinfo dki; 1446 789 ahrens struct dk_minfo dkm; 1447 3897 maybee struct dk_callback *dkc; 1448 789 ahrens int error = 0; 1449 6423 gw25295 rl_t *rl; 1450 789 ahrens 1451 789 ahrens mutex_enter(&zvol_state_lock); 1452 789 ahrens 1453 789 ahrens zv = ddi_get_soft_state(zvol_state, getminor(dev)); 1454 789 ahrens 1455 789 ahrens if (zv == NULL) { 1456 789 ahrens mutex_exit(&zvol_state_lock); 1457 789 ahrens return (ENXIO); 1458 789 ahrens } 1459 9303 Eric ASSERT(zv->zv_total_opens > 0); 1460 789 ahrens 1461 789 ahrens switch (cmd) { 1462 789 ahrens 1463 789 ahrens case DKIOCINFO: 1464 3897 maybee bzero(&dki, sizeof (dki)); 1465 3897 maybee (void) strcpy(dki.dki_cname, "zvol"); 1466 3897 maybee (void) strcpy(dki.dki_dname, "zvol"); 1467 3897 maybee dki.dki_ctype = DKC_UNKNOWN; 1468 3897 maybee dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs); 1469 789 ahrens mutex_exit(&zvol_state_lock); 1470 3897 maybee if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag)) 1471 789 ahrens error = EFAULT; 1472 789 ahrens return (error); 1473 789 ahrens 1474 789 ahrens case DKIOCGMEDIAINFO: 1475 789 ahrens bzero(&dkm, sizeof (dkm)); 1476 789 ahrens dkm.dki_lbsize = 1U << zv->zv_min_bs; 1477 789 ahrens dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs; 1478 789 ahrens dkm.dki_media_type = DK_UNKNOWN; 1479 789 ahrens mutex_exit(&zvol_state_lock); 1480 789 ahrens if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag)) 1481 789 ahrens error = EFAULT; 1482 789 ahrens return (error); 1483 789 ahrens 1484 789 ahrens case DKIOCGETEFI: 1485 7405 Eric { 1486 7405 Eric uint64_t vs = zv->zv_volsize; 1487 7405 Eric uint8_t bs = zv->zv_min_bs; 1488 3016 maybee 1489 789 ahrens mutex_exit(&zvol_state_lock); 1490 7405 Eric error = zvol_getefi((void *)arg, flag, vs, bs); 1491 7405 Eric return (error); 1492 789 ahrens } 1493 789 ahrens 1494 3638 billm case DKIOCFLUSHWRITECACHE: 1495 3897 maybee dkc = (struct dk_callback *)arg; 1496 9303 Eric mutex_exit(&zvol_state_lock); 1497 3638 billm zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); 1498 3897 maybee if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) { 1499 3897 maybee (*dkc->dkc_callback)(dkc->dkc_cookie, error); 1500 3897 maybee error = 0; 1501 3897 maybee } 1502 9303 Eric return (error); 1503 9303 Eric 1504 9303 Eric case DKIOCGETWCE: 1505 9303 Eric { 1506 9303 Eric int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0; 1507 9303 Eric if (ddi_copyout(&wce, (void *)arg, sizeof (int), 1508 9303 Eric flag)) 1509 9303 Eric error = EFAULT; 1510 9303 Eric break; 1511 9303 Eric } 1512 9303 Eric case DKIOCSETWCE: 1513 9303 Eric { 1514 9303 Eric int wce; 1515 9303 Eric if (ddi_copyin((void *)arg, &wce, sizeof (int), 1516 9303 Eric flag)) { 1517 9303 Eric error = EFAULT; 1518 9303 Eric break; 1519 9303 Eric } 1520 9303 Eric if (wce) { 1521 9303 Eric zv->zv_flags |= ZVOL_WCE; 1522 9303 Eric mutex_exit(&zvol_state_lock); 1523 9303 Eric } else { 1524 9303 Eric zv->zv_flags &= ~ZVOL_WCE; 1525 9303 Eric mutex_exit(&zvol_state_lock); 1526 9303 Eric zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); 1527 9303 Eric } 1528 9303 Eric return (0); 1529 9303 Eric } 1530 3638 billm 1531 3245 maybee case DKIOCGGEOM: 1532 3245 maybee case DKIOCGVTOC: 1533 6423 gw25295 /* 1534 6423 gw25295 * commands using these (like prtvtoc) expect ENOTSUP 1535 6423 gw25295 * since we're emulating an EFI label 1536 6423 gw25295 */ 1537 3245 maybee error = ENOTSUP; 1538 6423 gw25295 break; 1539 6423 gw25295 1540 6423 gw25295 case DKIOCDUMPINIT: 1541 6423 gw25295 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1542 6423 gw25295 RL_WRITER); 1543 6423 gw25295 error = zvol_dumpify(zv); 1544 6423 gw25295 zfs_range_unlock(rl); 1545 6423 gw25295 break; 1546 6423 gw25295 1547 6423 gw25295 case DKIOCDUMPFINI: 1548 9277 Eric if (!(zv->zv_flags & ZVOL_DUMPIFIED)) 1549 9277 Eric break; 1550 6423 gw25295 rl = zfs_range_lock(&zv->zv_znode, 0, zv->zv_volsize, 1551 6423 gw25295 RL_WRITER); 1552 6423 gw25295 error = zvol_dump_fini(zv); 1553 6423 gw25295 zfs_range_unlock(rl); 1554 3245 maybee break; 1555 3245 maybee 1556 789 ahrens default: 1557 3016 maybee error = ENOTTY; 1558 789 ahrens break; 1559 789 ahrens 1560 789 ahrens } 1561 789 ahrens mutex_exit(&zvol_state_lock); 1562 789 ahrens return (error); 1563 789 ahrens } 1564 789 ahrens 1565 789 ahrens int 1566 789 ahrens zvol_busy(void) 1567 789 ahrens { 1568 789 ahrens return (zvol_minors != 0); 1569 789 ahrens } 1570 789 ahrens 1571 789 ahrens void 1572 789 ahrens zvol_init(void) 1573 789 ahrens { 1574 789 ahrens VERIFY(ddi_soft_state_init(&zvol_state, sizeof (zvol_state_t), 1) == 0); 1575 789 ahrens mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); 1576 789 ahrens } 1577 789 ahrens 1578 789 ahrens void 1579 789 ahrens zvol_fini(void) 1580 789 ahrens { 1581 789 ahrens mutex_destroy(&zvol_state_lock); 1582 789 ahrens ddi_soft_state_fini(&zvol_state); 1583 789 ahrens } 1584 6423 gw25295 1585 6423 gw25295 static int 1586 6423 gw25295 zvol_dump_init(zvol_state_t *zv, boolean_t resize) 1587 6423 gw25295 { 1588 6423 gw25295 dmu_tx_t *tx; 1589 6423 gw25295 int error = 0; 1590 6423 gw25295 objset_t *os = zv->zv_objset; 1591 6423 gw25295 nvlist_t *nv = NULL; 1592 6423 gw25295 1593 6423 gw25295 ASSERT(MUTEX_HELD(&zvol_state_lock)); 1594 10588 Eric error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0, 1595 10588 Eric DMU_OBJECT_END); 1596 10588 Eric /* wait for dmu_free_long_range to actually free the blocks */ 1597 10588 Eric txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 1598 6423 gw25295 1599 6423 gw25295 tx = dmu_tx_create(os); 1600 6423 gw25295 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1601 10588 Eric dmu_tx_hold_bonus(tx, ZVOL_OBJ); 1602 6423 gw25295 error = dmu_tx_assign(tx, TXG_WAIT); 1603 6423 gw25295 if (error) { 1604 6423 gw25295 dmu_tx_abort(tx); 1605 6423 gw25295 return (error); 1606 6423 gw25295 } 1607 6423 gw25295 1608 6423 gw25295 /* 1609 6423 gw25295 * If we are resizing the dump device then we only need to 1610 6423 gw25295 * update the refreservation to match the newly updated 1611 6423 gw25295 * zvolsize. Otherwise, we save off the original state of the 1612 6423 gw25295 * zvol so that we can restore them if the zvol is ever undumpified. 1613 6423 gw25295 */ 1614 6423 gw25295 if (resize) { 1615 6423 gw25295 error = zap_update(os, ZVOL_ZAP_OBJ, 1616 6423 gw25295 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1617 6423 gw25295 &zv->zv_volsize, tx); 1618 6423 gw25295 } else { 1619 7837 Matthew uint64_t checksum, compress, refresrv, vbs; 1620 7837 Matthew 1621 6423 gw25295 error = dsl_prop_get_integer(zv->zv_name, 1622 6423 gw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL); 1623 6423 gw25295 error = error ? error : dsl_prop_get_integer(zv->zv_name, 1624 6423 gw25295 zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL); 1625 6423 gw25295 error = error ? error : dsl_prop_get_integer(zv->zv_name, 1626 6423 gw25295 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL); 1627 7837 Matthew error = error ? error : dsl_prop_get_integer(zv->zv_name, 1628 7837 Matthew zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL); 1629 6423 gw25295 1630 6423 gw25295 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1631 6423 gw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, 1632 6423 gw25295 &compress, tx); 1633 6423 gw25295 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1634 6423 gw25295 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, tx); 1635 6423 gw25295 error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1636 6423 gw25295 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, 1637 6423 gw25295 &refresrv, tx); 1638 7837 Matthew error = error ? error : zap_update(os, ZVOL_ZAP_OBJ, 1639 7837 Matthew zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, 1640 7837 Matthew &vbs, tx); 1641 10588 Eric error = error ? error : dmu_object_set_blocksize( 1642 10588 Eric os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx); 1643 10588 Eric if (error == 0) 1644 10588 Eric zv->zv_volblocksize = SPA_MAXBLOCKSIZE; 1645 6423 gw25295 } 1646 6423 gw25295 dmu_tx_commit(tx); 1647 6423 gw25295 1648 6423 gw25295 /* 1649 6423 gw25295 * We only need update the zvol's property if we are initializing 1650 6423 gw25295 * the dump area for the first time. 1651 6423 gw25295 */ 1652 6423 gw25295 if (!resize) { 1653 6423 gw25295 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1654 6423 gw25295 VERIFY(nvlist_add_uint64(nv, 1655 6423 gw25295 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 0) == 0); 1656 6423 gw25295 VERIFY(nvlist_add_uint64(nv, 1657 6423 gw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 1658 6423 gw25295 ZIO_COMPRESS_OFF) == 0); 1659 6423 gw25295 VERIFY(nvlist_add_uint64(nv, 1660 6423 gw25295 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 1661 6423 gw25295 ZIO_CHECKSUM_OFF) == 0); 1662 6423 gw25295 1663 11022 Tom error = zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 1664 11022 Tom nv, NULL); 1665 6423 gw25295 nvlist_free(nv); 1666 6423 gw25295 1667 6423 gw25295 if (error) 1668 6423 gw25295 return (error); 1669 6423 gw25295 } 1670 6423 gw25295 1671 6423 gw25295 /* Allocate the space for the dump */ 1672 6423 gw25295 error = zvol_prealloc(zv); 1673 6423 gw25295 return (error); 1674 6423 gw25295 } 1675 6423 gw25295 1676 6423 gw25295 static int 1677 6423 gw25295 zvol_dumpify(zvol_state_t *zv) 1678 6423 gw25295 { 1679 6423 gw25295 int error = 0; 1680 6423 gw25295 uint64_t dumpsize = 0; 1681 6423 gw25295 dmu_tx_t *tx; 1682 6423 gw25295 objset_t *os = zv->zv_objset; 1683 6423 gw25295 1684 10588 Eric if (zv->zv_flags & ZVOL_RDONLY) 1685 6423 gw25295 return (EROFS); 1686 6423 gw25295 1687 6423 gw25295 if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 1688 6423 gw25295 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { 1689 6423 gw25295 boolean_t resize = (dumpsize > 0) ? B_TRUE : B_FALSE; 1690 6423 gw25295 1691 6423 gw25295 if ((error = zvol_dump_init(zv, resize)) != 0) { 1692 6423 gw25295 (void) zvol_dump_fini(zv); 1693 6423 gw25295 return (error); 1694 6423 gw25295 } 1695 6423 gw25295 } 1696 6423 gw25295 1697 6423 gw25295 /* 1698 6423 gw25295 * Build up our lba mapping. 1699 6423 gw25295 */ 1700 6423 gw25295 error = zvol_get_lbas(zv); 1701 6423 gw25295 if (error) { 1702 6423 gw25295 (void) zvol_dump_fini(zv); 1703 6423 gw25295 return (error); 1704 6423 gw25295 } 1705 6423 gw25295 1706 6423 gw25295 tx = dmu_tx_create(os); 1707 6423 gw25295 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1708 6423 gw25295 error = dmu_tx_assign(tx, TXG_WAIT); 1709 6423 gw25295 if (error) { 1710 6423 gw25295 dmu_tx_abort(tx); 1711 6423 gw25295 (void) zvol_dump_fini(zv); 1712 6423 gw25295 return (error); 1713 6423 gw25295 } 1714 6423 gw25295 1715 6423 gw25295 zv->zv_flags |= ZVOL_DUMPIFIED; 1716 6423 gw25295 error = zap_update(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, 1717 6423 gw25295 &zv->zv_volsize, tx); 1718 6423 gw25295 dmu_tx_commit(tx); 1719 6423 gw25295 1720 6423 gw25295 if (error) { 1721 6423 gw25295 (void) zvol_dump_fini(zv); 1722 6423 gw25295 return (error); 1723 6423 gw25295 } 1724 6423 gw25295 1725 6423 gw25295 txg_wait_synced(dmu_objset_pool(os), 0); 1726 6423 gw25295 return (0); 1727 6423 gw25295 } 1728 6423 gw25295 1729 6423 gw25295 static int 1730 6423 gw25295 zvol_dump_fini(zvol_state_t *zv) 1731 6423 gw25295 { 1732 6423 gw25295 dmu_tx_t *tx; 1733 6423 gw25295 objset_t *os = zv->zv_objset; 1734 6423 gw25295 nvlist_t *nv; 1735 6423 gw25295 int error = 0; 1736 7837 Matthew uint64_t checksum, compress, refresrv, vbs; 1737 6423 gw25295 1738 7080 maybee /* 1739 7080 maybee * Attempt to restore the zvol back to its pre-dumpified state. 1740 7080 maybee * This is a best-effort attempt as it's possible that not all 1741 7080 maybee * of these properties were initialized during the dumpify process 1742 7080 maybee * (i.e. error during zvol_dump_init). 1743 7080 maybee */ 1744 7080 maybee 1745 6423 gw25295 tx = dmu_tx_create(os); 1746 6423 gw25295 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); 1747 6423 gw25295 error = dmu_tx_assign(tx, TXG_WAIT); 1748 6423 gw25295 if (error) { 1749 6423 gw25295 dmu_tx_abort(tx); 1750 6423 gw25295 return (error); 1751 6423 gw25295 } 1752 7080 maybee (void) zap_remove(os, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, tx); 1753 7080 maybee dmu_tx_commit(tx); 1754 6423 gw25295 1755 6423 gw25295 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1756 6423 gw25295 zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum); 1757 6423 gw25295 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1758 6423 gw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress); 1759 6423 gw25295 (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1760 6423 gw25295 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv); 1761 7837 Matthew (void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, 1762 7837 Matthew zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs); 1763 6423 gw25295 1764 6423 gw25295 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1765 6423 gw25295 (void) nvlist_add_uint64(nv, 1766 6423 gw25295 zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum); 1767 6423 gw25295 (void) nvlist_add_uint64(nv, 1768 6423 gw25295 zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress); 1769 6423 gw25295 (void) nvlist_add_uint64(nv, 1770 6423 gw25295 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv); 1771 11022 Tom (void) zfs_set_prop_nvlist(zv->zv_name, ZPROP_SRC_LOCAL, 1772 11022 Tom nv, NULL); 1773 6423 gw25295 nvlist_free(nv); 1774 6423 gw25295 1775 7080 maybee zvol_free_extents(zv); 1776 7080 maybee zv->zv_flags &= ~ZVOL_DUMPIFIED; 1777 7080 maybee (void) dmu_free_long_range(os, ZVOL_OBJ, 0, DMU_OBJECT_END); 1778 10588 Eric /* wait for dmu_free_long_range to actually free the blocks */ 1779 10588 Eric txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); 1780 10588 Eric tx = dmu_tx_create(os); 1781 10588 Eric dmu_tx_hold_bonus(tx, ZVOL_OBJ); 1782 10588 Eric error = dmu_tx_assign(tx, TXG_WAIT); 1783 10588 Eric if (error) { 1784 10588 Eric dmu_tx_abort(tx); 1785 10588 Eric return (error); 1786 10588 Eric } 1787 10922 Jeff if (dmu_object_set_blocksize(os, ZVOL_OBJ, vbs, 0, tx) == 0) 1788 10922 Jeff zv->zv_volblocksize = vbs; 1789 10588 Eric dmu_tx_commit(tx); 1790 7080 maybee 1791 6423 gw25295 return (0); 1792 6423 gw25295 } 1793