1 789 ahrens /* 2 789 ahrens * CDDL HEADER START 3 789 ahrens * 4 789 ahrens * The contents of this file are subject to the terms of the 5 2391 maybee * Common Development and Distribution License (the "License"). 6 2391 maybee * You may not use this file except in compliance with the License. 7 789 ahrens * 8 789 ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 789 ahrens * or http://www.opensolaris.org/os/licensing. 10 789 ahrens * See the License for the specific language governing permissions 11 789 ahrens * and limitations under the License. 12 789 ahrens * 13 789 ahrens * When distributing Covered Code, include this CDDL HEADER in each 14 789 ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 789 ahrens * If applicable, add the following below this CDDL HEADER, with the 16 789 ahrens * fields enclosed by brackets "[]" replaced with your own identifying 17 789 ahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18 789 ahrens * 19 789 ahrens * CDDL HEADER END 20 789 ahrens */ 21 789 ahrens /* 22 10474 Richard * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 789 ahrens * Use is subject to license terms. 24 789 ahrens */ 25 789 ahrens 26 789 ahrens #include <sys/zfs_context.h> 27 789 ahrens #include <sys/dnode.h> 28 789 ahrens #include <sys/dmu_objset.h> 29 789 ahrens #include <sys/dmu_zfetch.h> 30 789 ahrens #include <sys/dmu.h> 31 789 ahrens #include <sys/dbuf.h> 32 10474 Richard #include <sys/kstat.h> 33 789 ahrens 34 789 ahrens /* 35 789 ahrens * I'm against tune-ables, but these should probably exist as tweakable globals 36 789 ahrens * until we can get this working the way we want it to. 37 789 ahrens */ 38 2885 ahrens 39 2986 ek110237 int zfs_prefetch_disable = 0; 40 789 ahrens 41 789 ahrens /* max # of streams per zfetch */ 42 789 ahrens uint32_t zfetch_max_streams = 8; 43 789 ahrens /* min time before stream reclaim */ 44 789 ahrens uint32_t zfetch_min_sec_reap = 2; 45 789 ahrens /* max number of blocks to fetch at a time */ 46 2391 maybee uint32_t zfetch_block_cap = 256; 47 789 ahrens /* number of bytes in a array_read at which we stop prefetching (1Mb) */ 48 789 ahrens uint64_t zfetch_array_rd_sz = 1024 * 1024; 49 789 ahrens 50 789 ahrens /* forward decls for static routines */ 51 789 ahrens static int dmu_zfetch_colinear(zfetch_t *, zstream_t *); 52 789 ahrens static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *); 53 789 ahrens static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t); 54 789 ahrens static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t); 55 2391 maybee static int dmu_zfetch_find(zfetch_t *, zstream_t *, int); 56 789 ahrens static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *); 57 789 ahrens static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *); 58 789 ahrens static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *); 59 789 ahrens static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *); 60 10474 Richard 61 10474 Richard typedef struct zfetch_stats { 62 10474 Richard kstat_named_t zfetchstat_hits; 63 10474 Richard kstat_named_t zfetchstat_misses; 64 10474 Richard kstat_named_t zfetchstat_colinear_hits; 65 10474 Richard kstat_named_t zfetchstat_colinear_misses; 66 10474 Richard kstat_named_t zfetchstat_stride_hits; 67 10474 Richard kstat_named_t zfetchstat_stride_misses; 68 10474 Richard kstat_named_t zfetchstat_reclaim_successes; 69 10474 Richard kstat_named_t zfetchstat_reclaim_failures; 70 10474 Richard kstat_named_t zfetchstat_stream_resets; 71 10474 Richard kstat_named_t zfetchstat_stream_noresets; 72 10474 Richard kstat_named_t zfetchstat_bogus_streams; 73 10474 Richard } zfetch_stats_t; 74 10474 Richard 75 10474 Richard static zfetch_stats_t zfetch_stats = { 76 10474 Richard { "hits", KSTAT_DATA_UINT64 }, 77 10474 Richard { "misses", KSTAT_DATA_UINT64 }, 78 10474 Richard { "colinear_hits", KSTAT_DATA_UINT64 }, 79 10474 Richard { "colinear_misses", KSTAT_DATA_UINT64 }, 80 10474 Richard { "stride_hits", KSTAT_DATA_UINT64 }, 81 10474 Richard { "stride_misses", KSTAT_DATA_UINT64 }, 82 10474 Richard { "reclaim_successes", KSTAT_DATA_UINT64 }, 83 10474 Richard { "reclaim_failures", KSTAT_DATA_UINT64 }, 84 10474 Richard { "streams_resets", KSTAT_DATA_UINT64 }, 85 10474 Richard { "streams_noresets", KSTAT_DATA_UINT64 }, 86 10474 Richard { "bogus_streams", KSTAT_DATA_UINT64 }, 87 10474 Richard }; 88 10474 Richard 89 10474 Richard #define ZFETCHSTAT_INCR(stat, val) \ 90 10474 Richard atomic_add_64(&zfetch_stats.stat.value.ui64, (val)); 91 10474 Richard 92 10474 Richard #define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1); 93 10474 Richard 94 10474 Richard kstat_t *zfetch_ksp; 95 789 ahrens 96 789 ahrens /* 97 789 ahrens * Given a zfetch structure and a zstream structure, determine whether the 98 2391 maybee * blocks to be read are part of a co-linear pair of existing prefetch 99 789 ahrens * streams. If a set is found, coalesce the streams, removing one, and 100 789 ahrens * configure the prefetch so it looks for a strided access pattern. 101 2391 maybee * 102 2391 maybee * In other words: if we find two sequential access streams that are 103 2391 maybee * the same length and distance N appart, and this read is N from the 104 2391 maybee * last stream, then we are probably in a strided access pattern. So 105 2391 maybee * combine the two sequential streams into a single strided stream. 106 789 ahrens * 107 789 ahrens * If no co-linear streams are found, return NULL. 108 789 ahrens */ 109 789 ahrens static int 110 789 ahrens dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh) 111 789 ahrens { 112 789 ahrens zstream_t *z_walk; 113 789 ahrens zstream_t *z_comp; 114 789 ahrens 115 1380 rbourbon if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER)) 116 1380 rbourbon return (0); 117 789 ahrens 118 789 ahrens if (zh == NULL) { 119 789 ahrens rw_exit(&zf->zf_rwlock); 120 789 ahrens return (0); 121 789 ahrens } 122 789 ahrens 123 789 ahrens for (z_walk = list_head(&zf->zf_stream); z_walk; 124 789 ahrens z_walk = list_next(&zf->zf_stream, z_walk)) { 125 789 ahrens for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp; 126 789 ahrens z_comp = list_next(&zf->zf_stream, z_comp)) { 127 789 ahrens int64_t diff; 128 789 ahrens 129 789 ahrens if (z_walk->zst_len != z_walk->zst_stride || 130 789 ahrens z_comp->zst_len != z_comp->zst_stride) { 131 789 ahrens continue; 132 789 ahrens } 133 789 ahrens 134 789 ahrens diff = z_comp->zst_offset - z_walk->zst_offset; 135 789 ahrens if (z_comp->zst_offset + diff == zh->zst_offset) { 136 789 ahrens z_walk->zst_offset = zh->zst_offset; 137 789 ahrens z_walk->zst_direction = diff < 0 ? -1 : 1; 138 789 ahrens z_walk->zst_stride = 139 789 ahrens diff * z_walk->zst_direction; 140 789 ahrens z_walk->zst_ph_offset = 141 789 ahrens zh->zst_offset + z_walk->zst_stride; 142 789 ahrens dmu_zfetch_stream_remove(zf, z_comp); 143 789 ahrens mutex_destroy(&z_comp->zst_lock); 144 789 ahrens kmem_free(z_comp, sizeof (zstream_t)); 145 789 ahrens 146 789 ahrens dmu_zfetch_dofetch(zf, z_walk); 147 789 ahrens 148 789 ahrens rw_exit(&zf->zf_rwlock); 149 789 ahrens return (1); 150 789 ahrens } 151 789 ahrens 152 789 ahrens diff = z_walk->zst_offset - z_comp->zst_offset; 153 789 ahrens if (z_walk->zst_offset + diff == zh->zst_offset) { 154 789 ahrens z_walk->zst_offset = zh->zst_offset; 155 789 ahrens z_walk->zst_direction = diff < 0 ? -1 : 1; 156 789 ahrens z_walk->zst_stride = 157 789 ahrens diff * z_walk->zst_direction; 158 789 ahrens z_walk->zst_ph_offset = 159 789 ahrens zh->zst_offset + z_walk->zst_stride; 160 789 ahrens dmu_zfetch_stream_remove(zf, z_comp); 161 789 ahrens mutex_destroy(&z_comp->zst_lock); 162 789 ahrens kmem_free(z_comp, sizeof (zstream_t)); 163 789 ahrens 164 789 ahrens dmu_zfetch_dofetch(zf, z_walk); 165 789 ahrens 166 789 ahrens rw_exit(&zf->zf_rwlock); 167 789 ahrens return (1); 168 789 ahrens } 169 789 ahrens } 170 789 ahrens } 171 789 ahrens 172 789 ahrens rw_exit(&zf->zf_rwlock); 173 789 ahrens return (0); 174 789 ahrens } 175 789 ahrens 176 789 ahrens /* 177 789 ahrens * Given a zstream_t, determine the bounds of the prefetch. Then call the 178 789 ahrens * routine that actually prefetches the individual blocks. 179 789 ahrens */ 180 789 ahrens static void 181 789 ahrens dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs) 182 789 ahrens { 183 789 ahrens uint64_t prefetch_tail; 184 789 ahrens uint64_t prefetch_limit; 185 789 ahrens uint64_t prefetch_ofst; 186 789 ahrens uint64_t prefetch_len; 187 789 ahrens uint64_t blocks_fetched; 188 789 ahrens 189 789 ahrens zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len); 190 789 ahrens zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap); 191 789 ahrens 192 789 ahrens prefetch_tail = MAX((int64_t)zs->zst_ph_offset, 193 789 ahrens (int64_t)(zs->zst_offset + zs->zst_stride)); 194 789 ahrens /* 195 789 ahrens * XXX: use a faster division method? 196 789 ahrens */ 197 789 ahrens prefetch_limit = zs->zst_offset + zs->zst_len + 198 789 ahrens (zs->zst_cap * zs->zst_stride) / zs->zst_len; 199 789 ahrens 200 789 ahrens while (prefetch_tail < prefetch_limit) { 201 789 ahrens prefetch_ofst = zs->zst_offset + zs->zst_direction * 202 789 ahrens (prefetch_tail - zs->zst_offset); 203 789 ahrens 204 789 ahrens prefetch_len = zs->zst_len; 205 789 ahrens 206 789 ahrens /* 207 789 ahrens * Don't prefetch beyond the end of the file, if working 208 789 ahrens * backwards. 209 789 ahrens */ 210 789 ahrens if ((zs->zst_direction == ZFETCH_BACKWARD) && 211 789 ahrens (prefetch_ofst > prefetch_tail)) { 212 789 ahrens prefetch_len += prefetch_ofst; 213 789 ahrens prefetch_ofst = 0; 214 789 ahrens } 215 789 ahrens 216 789 ahrens /* don't prefetch more than we're supposed to */ 217 789 ahrens if (prefetch_len > zs->zst_len) 218 789 ahrens break; 219 789 ahrens 220 789 ahrens blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode, 221 789 ahrens prefetch_ofst, zs->zst_len); 222 789 ahrens 223 789 ahrens prefetch_tail += zs->zst_stride; 224 789 ahrens /* stop if we've run out of stuff to prefetch */ 225 789 ahrens if (blocks_fetched < zs->zst_len) 226 789 ahrens break; 227 789 ahrens } 228 789 ahrens zs->zst_ph_offset = prefetch_tail; 229 11066 rafael zs->zst_last = ddi_get_lbolt(); 230 789 ahrens } 231 789 ahrens 232 10474 Richard void 233 10474 Richard zfetch_init(void) 234 10474 Richard { 235 10474 Richard 236 10474 Richard zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", 237 10474 Richard KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), 238 10474 Richard KSTAT_FLAG_VIRTUAL); 239 10474 Richard 240 10474 Richard if (zfetch_ksp != NULL) { 241 10474 Richard zfetch_ksp->ks_data = &zfetch_stats; 242 10474 Richard kstat_install(zfetch_ksp); 243 10474 Richard } 244 10474 Richard } 245 10474 Richard 246 10474 Richard void 247 10474 Richard zfetch_fini(void) 248 10474 Richard { 249 10474 Richard if (zfetch_ksp != NULL) { 250 10474 Richard kstat_delete(zfetch_ksp); 251 10474 Richard zfetch_ksp = NULL; 252 10474 Richard } 253 10474 Richard } 254 10474 Richard 255 789 ahrens /* 256 789 ahrens * This takes a pointer to a zfetch structure and a dnode. It performs the 257 789 ahrens * necessary setup for the zfetch structure, grokking data from the 258 789 ahrens * associated dnode. 259 789 ahrens */ 260 789 ahrens void 261 789 ahrens dmu_zfetch_init(zfetch_t *zf, dnode_t *dno) 262 789 ahrens { 263 789 ahrens if (zf == NULL) { 264 789 ahrens return; 265 789 ahrens } 266 789 ahrens 267 789 ahrens zf->zf_dnode = dno; 268 789 ahrens zf->zf_stream_cnt = 0; 269 789 ahrens zf->zf_alloc_fail = 0; 270 789 ahrens 271 789 ahrens list_create(&zf->zf_stream, sizeof (zstream_t), 272 789 ahrens offsetof(zstream_t, zst_node)); 273 789 ahrens 274 789 ahrens rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL); 275 789 ahrens } 276 789 ahrens 277 789 ahrens /* 278 789 ahrens * This function computes the actual size, in blocks, that can be prefetched, 279 789 ahrens * and fetches it. 280 789 ahrens */ 281 789 ahrens static uint64_t 282 789 ahrens dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks) 283 789 ahrens { 284 789 ahrens uint64_t fetchsz; 285 789 ahrens uint64_t i; 286 789 ahrens 287 789 ahrens fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks); 288 789 ahrens 289 789 ahrens for (i = 0; i < fetchsz; i++) { 290 789 ahrens dbuf_prefetch(dn, blkid + i); 291 789 ahrens } 292 789 ahrens 293 789 ahrens return (fetchsz); 294 789 ahrens } 295 789 ahrens 296 789 ahrens /* 297 789 ahrens * this function returns the number of blocks that would be prefetched, based 298 789 ahrens * upon the supplied dnode, blockid, and nblks. This is used so that we can 299 789 ahrens * update streams in place, and then prefetch with their old value after the 300 789 ahrens * fact. This way, we can delay the prefetch, but subsequent accesses to the 301 789 ahrens * stream won't result in the same data being prefetched multiple times. 302 789 ahrens */ 303 789 ahrens static uint64_t 304 789 ahrens dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks) 305 789 ahrens { 306 789 ahrens uint64_t fetchsz; 307 789 ahrens 308 789 ahrens if (blkid > dn->dn_maxblkid) { 309 789 ahrens return (0); 310 789 ahrens } 311 789 ahrens 312 789 ahrens /* compute fetch size */ 313 2391 maybee if (blkid + nblks + 1 > dn->dn_maxblkid) { 314 2391 maybee fetchsz = (dn->dn_maxblkid - blkid) + 1; 315 2391 maybee ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid); 316 789 ahrens } else { 317 789 ahrens fetchsz = nblks; 318 789 ahrens } 319 789 ahrens 320 789 ahrens 321 789 ahrens return (fetchsz); 322 789 ahrens } 323 789 ahrens 324 789 ahrens /* 325 10474 Richard * given a zfetch and a zstream structure, see if there is an associated zstream 326 789 ahrens * for this block read. If so, it starts a prefetch for the stream it 327 789 ahrens * located and returns true, otherwise it returns false 328 789 ahrens */ 329 789 ahrens static int 330 2391 maybee dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) 331 789 ahrens { 332 789 ahrens zstream_t *zs; 333 789 ahrens int64_t diff; 334 2391 maybee int reset = !prefetched; 335 789 ahrens int rc = 0; 336 789 ahrens 337 789 ahrens if (zh == NULL) 338 789 ahrens return (0); 339 789 ahrens 340 789 ahrens /* 341 789 ahrens * XXX: This locking strategy is a bit coarse; however, it's impact has 342 789 ahrens * yet to be tested. If this turns out to be an issue, it can be 343 789 ahrens * modified in a number of different ways. 344 789 ahrens */ 345 789 ahrens 346 789 ahrens rw_enter(&zf->zf_rwlock, RW_READER); 347 789 ahrens top: 348 789 ahrens 349 789 ahrens for (zs = list_head(&zf->zf_stream); zs; 350 789 ahrens zs = list_next(&zf->zf_stream, zs)) { 351 789 ahrens 352 2391 maybee /* 353 2391 maybee * XXX - should this be an assert? 354 2391 maybee */ 355 789 ahrens if (zs->zst_len == 0) { 356 789 ahrens /* bogus stream */ 357 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_bogus_streams); 358 789 ahrens continue; 359 789 ahrens } 360 789 ahrens 361 2391 maybee /* 362 2391 maybee * We hit this case when we are in a strided prefetch stream: 363 2391 maybee * we will read "len" blocks before "striding". 364 2391 maybee */ 365 2391 maybee if (zh->zst_offset >= zs->zst_offset && 366 2391 maybee zh->zst_offset < zs->zst_offset + zs->zst_len) { 367 10474 Richard if (prefetched) { 368 10474 Richard /* already fetched */ 369 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_stride_hits); 370 10474 Richard rc = 1; 371 10474 Richard goto out; 372 10474 Richard } else { 373 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_stride_misses); 374 10474 Richard } 375 789 ahrens } 376 789 ahrens 377 2391 maybee /* 378 2391 maybee * This is the forward sequential read case: we increment 379 2391 maybee * len by one each time we hit here, so we will enter this 380 2391 maybee * case on every read. 381 2391 maybee */ 382 789 ahrens if (zh->zst_offset == zs->zst_offset + zs->zst_len) { 383 2391 maybee 384 2391 maybee reset = !prefetched && zs->zst_len > 1; 385 789 ahrens 386 789 ahrens mutex_enter(&zs->zst_lock); 387 789 ahrens 388 789 ahrens if (zh->zst_offset != zs->zst_offset + zs->zst_len) { 389 789 ahrens mutex_exit(&zs->zst_lock); 390 789 ahrens goto top; 391 789 ahrens } 392 789 ahrens zs->zst_len += zh->zst_len; 393 789 ahrens diff = zs->zst_len - zfetch_block_cap; 394 789 ahrens if (diff > 0) { 395 789 ahrens zs->zst_offset += diff; 396 789 ahrens zs->zst_len = zs->zst_len > diff ? 397 789 ahrens zs->zst_len - diff : 0; 398 789 ahrens } 399 789 ahrens zs->zst_direction = ZFETCH_FORWARD; 400 789 ahrens 401 789 ahrens break; 402 789 ahrens 403 2391 maybee /* 404 2391 maybee * Same as above, but reading backwards through the file. 405 2391 maybee */ 406 789 ahrens } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) { 407 789 ahrens /* backwards sequential access */ 408 2391 maybee 409 2391 maybee reset = !prefetched && zs->zst_len > 1; 410 789 ahrens 411 789 ahrens mutex_enter(&zs->zst_lock); 412 789 ahrens 413 789 ahrens if (zh->zst_offset != zs->zst_offset - zh->zst_len) { 414 789 ahrens mutex_exit(&zs->zst_lock); 415 789 ahrens goto top; 416 789 ahrens } 417 789 ahrens 418 789 ahrens zs->zst_offset = zs->zst_offset > zh->zst_len ? 419 789 ahrens zs->zst_offset - zh->zst_len : 0; 420 789 ahrens zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ? 421 789 ahrens zs->zst_ph_offset - zh->zst_len : 0; 422 789 ahrens zs->zst_len += zh->zst_len; 423 789 ahrens 424 789 ahrens diff = zs->zst_len - zfetch_block_cap; 425 789 ahrens if (diff > 0) { 426 789 ahrens zs->zst_ph_offset = zs->zst_ph_offset > diff ? 427 789 ahrens zs->zst_ph_offset - diff : 0; 428 789 ahrens zs->zst_len = zs->zst_len > diff ? 429 789 ahrens zs->zst_len - diff : zs->zst_len; 430 789 ahrens } 431 789 ahrens zs->zst_direction = ZFETCH_BACKWARD; 432 789 ahrens 433 789 ahrens break; 434 789 ahrens 435 789 ahrens } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride < 436 789 ahrens zs->zst_len) && (zs->zst_len != zs->zst_stride)) { 437 789 ahrens /* strided forward access */ 438 789 ahrens 439 789 ahrens mutex_enter(&zs->zst_lock); 440 789 ahrens 441 789 ahrens if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >= 442 789 ahrens zs->zst_len) || (zs->zst_len == zs->zst_stride)) { 443 789 ahrens mutex_exit(&zs->zst_lock); 444 789 ahrens goto top; 445 789 ahrens } 446 789 ahrens 447 789 ahrens zs->zst_offset += zs->zst_stride; 448 789 ahrens zs->zst_direction = ZFETCH_FORWARD; 449 789 ahrens 450 789 ahrens break; 451 789 ahrens 452 789 ahrens } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride < 453 789 ahrens zs->zst_len) && (zs->zst_len != zs->zst_stride)) { 454 789 ahrens /* strided reverse access */ 455 789 ahrens 456 789 ahrens mutex_enter(&zs->zst_lock); 457 789 ahrens 458 789 ahrens if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >= 459 789 ahrens zs->zst_len) || (zs->zst_len == zs->zst_stride)) { 460 789 ahrens mutex_exit(&zs->zst_lock); 461 789 ahrens goto top; 462 789 ahrens } 463 789 ahrens 464 789 ahrens zs->zst_offset = zs->zst_offset > zs->zst_stride ? 465 789 ahrens zs->zst_offset - zs->zst_stride : 0; 466 789 ahrens zs->zst_ph_offset = (zs->zst_ph_offset > 467 789 ahrens (2 * zs->zst_stride)) ? 468 789 ahrens (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0; 469 789 ahrens zs->zst_direction = ZFETCH_BACKWARD; 470 789 ahrens 471 789 ahrens break; 472 789 ahrens } 473 789 ahrens } 474 789 ahrens 475 789 ahrens if (zs) { 476 2391 maybee if (reset) { 477 2391 maybee zstream_t *remove = zs; 478 2391 maybee 479 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_stream_resets); 480 2391 maybee rc = 0; 481 2391 maybee mutex_exit(&zs->zst_lock); 482 2391 maybee rw_exit(&zf->zf_rwlock); 483 2391 maybee rw_enter(&zf->zf_rwlock, RW_WRITER); 484 2391 maybee /* 485 2391 maybee * Relocate the stream, in case someone removes 486 2391 maybee * it while we were acquiring the WRITER lock. 487 2391 maybee */ 488 2391 maybee for (zs = list_head(&zf->zf_stream); zs; 489 2391 maybee zs = list_next(&zf->zf_stream, zs)) { 490 2391 maybee if (zs == remove) { 491 2391 maybee dmu_zfetch_stream_remove(zf, zs); 492 2391 maybee mutex_destroy(&zs->zst_lock); 493 2391 maybee kmem_free(zs, sizeof (zstream_t)); 494 2391 maybee break; 495 2391 maybee } 496 2391 maybee } 497 2391 maybee } else { 498 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_stream_noresets); 499 2391 maybee rc = 1; 500 2391 maybee dmu_zfetch_dofetch(zf, zs); 501 2391 maybee mutex_exit(&zs->zst_lock); 502 2391 maybee } 503 789 ahrens } 504 2391 maybee out: 505 789 ahrens rw_exit(&zf->zf_rwlock); 506 789 ahrens return (rc); 507 789 ahrens } 508 789 ahrens 509 789 ahrens /* 510 789 ahrens * Clean-up state associated with a zfetch structure. This frees allocated 511 789 ahrens * structure members, empties the zf_stream tree, and generally makes things 512 789 ahrens * nice. This doesn't free the zfetch_t itself, that's left to the caller. 513 789 ahrens */ 514 789 ahrens void 515 789 ahrens dmu_zfetch_rele(zfetch_t *zf) 516 789 ahrens { 517 789 ahrens zstream_t *zs; 518 789 ahrens zstream_t *zs_next; 519 789 ahrens 520 789 ahrens ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock)); 521 789 ahrens 522 789 ahrens for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) { 523 789 ahrens zs_next = list_next(&zf->zf_stream, zs); 524 789 ahrens 525 789 ahrens list_remove(&zf->zf_stream, zs); 526 789 ahrens mutex_destroy(&zs->zst_lock); 527 789 ahrens kmem_free(zs, sizeof (zstream_t)); 528 789 ahrens } 529 789 ahrens list_destroy(&zf->zf_stream); 530 789 ahrens rw_destroy(&zf->zf_rwlock); 531 789 ahrens 532 789 ahrens zf->zf_dnode = NULL; 533 789 ahrens } 534 789 ahrens 535 789 ahrens /* 536 789 ahrens * Given a zfetch and zstream structure, insert the zstream structure into the 537 789 ahrens * AVL tree contained within the zfetch structure. Peform the appropriate 538 789 ahrens * book-keeping. It is possible that another thread has inserted a stream which 539 789 ahrens * matches one that we are about to insert, so we must be sure to check for this 540 789 ahrens * case. If one is found, return failure, and let the caller cleanup the 541 789 ahrens * duplicates. 542 789 ahrens */ 543 789 ahrens static int 544 789 ahrens dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs) 545 789 ahrens { 546 789 ahrens zstream_t *zs_walk; 547 789 ahrens zstream_t *zs_next; 548 789 ahrens 549 789 ahrens ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); 550 789 ahrens 551 789 ahrens for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) { 552 789 ahrens zs_next = list_next(&zf->zf_stream, zs_walk); 553 789 ahrens 554 789 ahrens if (dmu_zfetch_streams_equal(zs_walk, zs)) { 555 10474 Richard return (0); 556 789 ahrens } 557 789 ahrens } 558 789 ahrens 559 789 ahrens list_insert_head(&zf->zf_stream, zs); 560 789 ahrens zf->zf_stream_cnt++; 561 789 ahrens return (1); 562 789 ahrens } 563 789 ahrens 564 789 ahrens 565 789 ahrens /* 566 789 ahrens * Walk the list of zstreams in the given zfetch, find an old one (by time), and 567 789 ahrens * reclaim it for use by the caller. 568 789 ahrens */ 569 789 ahrens static zstream_t * 570 789 ahrens dmu_zfetch_stream_reclaim(zfetch_t *zf) 571 789 ahrens { 572 789 ahrens zstream_t *zs; 573 789 ahrens 574 1380 rbourbon if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER)) 575 1380 rbourbon return (0); 576 789 ahrens 577 789 ahrens for (zs = list_head(&zf->zf_stream); zs; 578 789 ahrens zs = list_next(&zf->zf_stream, zs)) { 579 789 ahrens 580 11066 rafael if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap) 581 789 ahrens break; 582 789 ahrens } 583 789 ahrens 584 789 ahrens if (zs) { 585 789 ahrens dmu_zfetch_stream_remove(zf, zs); 586 789 ahrens mutex_destroy(&zs->zst_lock); 587 789 ahrens bzero(zs, sizeof (zstream_t)); 588 789 ahrens } else { 589 789 ahrens zf->zf_alloc_fail++; 590 789 ahrens } 591 789 ahrens rw_exit(&zf->zf_rwlock); 592 789 ahrens 593 789 ahrens return (zs); 594 789 ahrens } 595 789 ahrens 596 789 ahrens /* 597 789 ahrens * Given a zfetch and zstream structure, remove the zstream structure from its 598 789 ahrens * container in the zfetch structure. Perform the appropriate book-keeping. 599 789 ahrens */ 600 789 ahrens static void 601 789 ahrens dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs) 602 789 ahrens { 603 789 ahrens ASSERT(RW_WRITE_HELD(&zf->zf_rwlock)); 604 789 ahrens 605 789 ahrens list_remove(&zf->zf_stream, zs); 606 789 ahrens zf->zf_stream_cnt--; 607 789 ahrens } 608 789 ahrens 609 789 ahrens static int 610 789 ahrens dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2) 611 789 ahrens { 612 789 ahrens if (zs1->zst_offset != zs2->zst_offset) 613 789 ahrens return (0); 614 789 ahrens 615 789 ahrens if (zs1->zst_len != zs2->zst_len) 616 789 ahrens return (0); 617 789 ahrens 618 789 ahrens if (zs1->zst_stride != zs2->zst_stride) 619 789 ahrens return (0); 620 789 ahrens 621 789 ahrens if (zs1->zst_ph_offset != zs2->zst_ph_offset) 622 789 ahrens return (0); 623 789 ahrens 624 789 ahrens if (zs1->zst_cap != zs2->zst_cap) 625 789 ahrens return (0); 626 789 ahrens 627 789 ahrens if (zs1->zst_direction != zs2->zst_direction) 628 789 ahrens return (0); 629 789 ahrens 630 789 ahrens return (1); 631 789 ahrens } 632 789 ahrens 633 789 ahrens /* 634 789 ahrens * This is the prefetch entry point. It calls all of the other dmu_zfetch 635 789 ahrens * routines to create, delete, find, or operate upon prefetch streams. 636 789 ahrens */ 637 789 ahrens void 638 2391 maybee dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) 639 789 ahrens { 640 789 ahrens zstream_t zst; 641 789 ahrens zstream_t *newstream; 642 789 ahrens int fetched; 643 789 ahrens int inserted; 644 789 ahrens unsigned int blkshft; 645 789 ahrens uint64_t blksz; 646 789 ahrens 647 2885 ahrens if (zfs_prefetch_disable) 648 2885 ahrens return; 649 2885 ahrens 650 789 ahrens /* files that aren't ln2 blocksz are only one block -- nothing to do */ 651 2885 ahrens if (!zf->zf_dnode->dn_datablkshift) 652 789 ahrens return; 653 789 ahrens 654 789 ahrens /* convert offset and size, into blockid and nblocks */ 655 789 ahrens blkshft = zf->zf_dnode->dn_datablkshift; 656 789 ahrens blksz = (1 << blkshft); 657 789 ahrens 658 789 ahrens bzero(&zst, sizeof (zstream_t)); 659 789 ahrens zst.zst_offset = offset >> blkshft; 660 789 ahrens zst.zst_len = (P2ROUNDUP(offset + size, blksz) - 661 789 ahrens P2ALIGN(offset, blksz)) >> blkshft; 662 789 ahrens 663 2391 maybee fetched = dmu_zfetch_find(zf, &zst, prefetched); 664 10474 Richard if (fetched) { 665 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_hits); 666 10474 Richard } else { 667 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_misses); 668 10474 Richard if (fetched = dmu_zfetch_colinear(zf, &zst)) { 669 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_colinear_hits); 670 10474 Richard } else { 671 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_colinear_misses); 672 10474 Richard } 673 789 ahrens } 674 789 ahrens 675 789 ahrens if (!fetched) { 676 789 ahrens newstream = dmu_zfetch_stream_reclaim(zf); 677 789 ahrens 678 789 ahrens /* 679 789 ahrens * we still couldn't find a stream, drop the lock, and allocate 680 789 ahrens * one if possible. Otherwise, give up and go home. 681 789 ahrens */ 682 10474 Richard if (newstream) { 683 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes); 684 10474 Richard } else { 685 789 ahrens uint64_t maxblocks; 686 789 ahrens uint32_t max_streams; 687 789 ahrens uint32_t cur_streams; 688 789 ahrens 689 10474 Richard ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures); 690 789 ahrens cur_streams = zf->zf_stream_cnt; 691 789 ahrens maxblocks = zf->zf_dnode->dn_maxblkid; 692 789 ahrens 693 789 ahrens max_streams = MIN(zfetch_max_streams, 694 789 ahrens (maxblocks / zfetch_block_cap)); 695 789 ahrens if (max_streams == 0) { 696 789 ahrens max_streams++; 697 789 ahrens } 698 789 ahrens 699 789 ahrens if (cur_streams >= max_streams) { 700 789 ahrens return; 701 789 ahrens } 702 789 ahrens newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP); 703 789 ahrens } 704 789 ahrens 705 789 ahrens newstream->zst_offset = zst.zst_offset; 706 789 ahrens newstream->zst_len = zst.zst_len; 707 789 ahrens newstream->zst_stride = zst.zst_len; 708 789 ahrens newstream->zst_ph_offset = zst.zst_len + zst.zst_offset; 709 789 ahrens newstream->zst_cap = zst.zst_len; 710 789 ahrens newstream->zst_direction = ZFETCH_FORWARD; 711 11066 rafael newstream->zst_last = ddi_get_lbolt(); 712 789 ahrens 713 789 ahrens mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL); 714 789 ahrens 715 789 ahrens rw_enter(&zf->zf_rwlock, RW_WRITER); 716 789 ahrens inserted = dmu_zfetch_stream_insert(zf, newstream); 717 789 ahrens rw_exit(&zf->zf_rwlock); 718 789 ahrens 719 789 ahrens if (!inserted) { 720 789 ahrens mutex_destroy(&newstream->zst_lock); 721 789 ahrens kmem_free(newstream, sizeof (zstream_t)); 722 789 ahrens } 723 789 ahrens } 724 789 ahrens } 725