Home | History | Annotate | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * ZFS fault injection
     28  *
     29  * To handle fault injection, we keep track of a series of zinject_record_t
     30  * structures which describe which logical block(s) should be injected with a
     31  * fault.  These are kept in a global list.  Each record corresponds to a given
     32  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
     33  * or exported while the injection record exists.
     34  *
     35  * Device level injection is done using the 'zi_guid' field.  If this is set, it
     36  * means that the error is destined for a particular device, not a piece of
     37  * data.
     38  *
     39  * This is a rather poor data structure and algorithm, but we don't expect more
     40  * than a few faults at any one time, so it should be sufficient for our needs.
     41  */
     42 
     43 #include <sys/arc.h>
     44 #include <sys/zio_impl.h>
     45 #include <sys/zfs_ioctl.h>
     46 #include <sys/vdev_impl.h>
     47 #include <sys/dmu_objset.h>
     48 #include <sys/fs/zfs.h>
     49 
     50 uint32_t zio_injection_enabled;
     51 
     52 typedef struct inject_handler {
     53 	int			zi_id;
     54 	spa_t			*zi_spa;
     55 	zinject_record_t	zi_record;
     56 	list_node_t		zi_link;
     57 } inject_handler_t;
     58 
     59 static list_t inject_handlers;
     60 static krwlock_t inject_lock;
     61 static int inject_next_id = 1;
     62 
     63 /*
     64  * Returns true if the given record matches the I/O in progress.
     65  */
     66 static boolean_t
     67 zio_match_handler(zbookmark_t *zb, uint64_t type,
     68     zinject_record_t *record, int error)
     69 {
     70 	/*
     71 	 * Check for a match against the MOS, which is based on type
     72 	 */
     73 	if (zb->zb_objset == DMU_META_OBJSET &&
     74 	    record->zi_objset == DMU_META_OBJSET &&
     75 	    record->zi_object == DMU_META_DNODE_OBJECT) {
     76 		if (record->zi_type == DMU_OT_NONE ||
     77 		    type == record->zi_type)
     78 			return (record->zi_freq == 0 ||
     79 			    spa_get_random(100) < record->zi_freq);
     80 		else
     81 			return (B_FALSE);
     82 	}
     83 
     84 	/*
     85 	 * Check for an exact match.
     86 	 */
     87 	if (zb->zb_objset == record->zi_objset &&
     88 	    zb->zb_object == record->zi_object &&
     89 	    zb->zb_level == record->zi_level &&
     90 	    zb->zb_blkid >= record->zi_start &&
     91 	    zb->zb_blkid <= record->zi_end &&
     92 	    error == record->zi_error)
     93 		return (record->zi_freq == 0 ||
     94 		    spa_get_random(100) < record->zi_freq);
     95 
     96 	return (B_FALSE);
     97 }
     98 
     99 /*
    100  * Panic the system when a config change happens in the function
    101  * specified by tag.
    102  */
    103 void
    104 zio_handle_panic_injection(spa_t *spa, char *tag)
    105 {
    106 	inject_handler_t *handler;
    107 
    108 	rw_enter(&inject_lock, RW_READER);
    109 
    110 	for (handler = list_head(&inject_handlers); handler != NULL;
    111 	    handler = list_next(&inject_handlers, handler)) {
    112 
    113 		if (spa != handler->zi_spa)
    114 			continue;
    115 
    116 		if (strcmp(tag, handler->zi_record.zi_func) == 0)
    117 			panic("Panic requested in function %s\n", tag);
    118 	}
    119 
    120 	rw_exit(&inject_lock);
    121 }
    122 
    123 /*
    124  * Determine if the I/O in question should return failure.  Returns the errno
    125  * to be returned to the caller.
    126  */
    127 int
    128 zio_handle_fault_injection(zio_t *zio, int error)
    129 {
    130 	int ret = 0;
    131 	inject_handler_t *handler;
    132 
    133 	/*
    134 	 * Ignore I/O not associated with any logical data.
    135 	 */
    136 	if (zio->io_logical == NULL)
    137 		return (0);
    138 
    139 	/*
    140 	 * Currently, we only support fault injection on reads.
    141 	 */
    142 	if (zio->io_type != ZIO_TYPE_READ)
    143 		return (0);
    144 
    145 	rw_enter(&inject_lock, RW_READER);
    146 
    147 	for (handler = list_head(&inject_handlers); handler != NULL;
    148 	    handler = list_next(&inject_handlers, handler)) {
    149 
    150 		/* Ignore errors not destined for this pool */
    151 		if (zio->io_spa != handler->zi_spa)
    152 			continue;
    153 
    154 		/* Ignore device errors and panic injection */
    155 		if (handler->zi_record.zi_guid != 0 ||
    156 		    handler->zi_record.zi_func[0] != '\0' ||
    157 		    handler->zi_record.zi_duration != 0)
    158 			continue;
    159 
    160 		/* If this handler matches, return EIO */
    161 		if (zio_match_handler(&zio->io_logical->io_bookmark,
    162 		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
    163 		    &handler->zi_record, error)) {
    164 			ret = error;
    165 			break;
    166 		}
    167 	}
    168 
    169 	rw_exit(&inject_lock);
    170 
    171 	return (ret);
    172 }
    173 
    174 /*
    175  * Determine if the zio is part of a label update and has an injection
    176  * handler associated with that portion of the label. Currently, we
    177  * allow error injection in either the nvlist or the uberblock region of
    178  * of the vdev label.
    179  */
    180 int
    181 zio_handle_label_injection(zio_t *zio, int error)
    182 {
    183 	inject_handler_t *handler;
    184 	vdev_t *vd = zio->io_vd;
    185 	uint64_t offset = zio->io_offset;
    186 	int label;
    187 	int ret = 0;
    188 
    189 	if (offset >= VDEV_LABEL_START_SIZE &&
    190 	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
    191 		return (0);
    192 
    193 	rw_enter(&inject_lock, RW_READER);
    194 
    195 	for (handler = list_head(&inject_handlers); handler != NULL;
    196 	    handler = list_next(&inject_handlers, handler)) {
    197 		uint64_t start = handler->zi_record.zi_start;
    198 		uint64_t end = handler->zi_record.zi_end;
    199 
    200 		/* Ignore device only faults or panic injection */
    201 		if (handler->zi_record.zi_start == 0 ||
    202 		    handler->zi_record.zi_func[0] != '\0' ||
    203 		    handler->zi_record.zi_duration != 0)
    204 			continue;
    205 
    206 		/*
    207 		 * The injection region is the relative offsets within a
    208 		 * vdev label. We must determine the label which is being
    209 		 * updated and adjust our region accordingly.
    210 		 */
    211 		label = vdev_label_number(vd->vdev_psize, offset);
    212 		start = vdev_label_offset(vd->vdev_psize, label, start);
    213 		end = vdev_label_offset(vd->vdev_psize, label, end);
    214 
    215 		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
    216 		    (offset >= start && offset <= end)) {
    217 			ret = error;
    218 			break;
    219 		}
    220 	}
    221 	rw_exit(&inject_lock);
    222 	return (ret);
    223 }
    224 
    225 
    226 int
    227 zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
    228 {
    229 	inject_handler_t *handler;
    230 	int ret = 0;
    231 
    232 	/*
    233 	 * We skip over faults in the labels unless it's during
    234 	 * device open (i.e. zio == NULL).
    235 	 */
    236 	if (zio != NULL) {
    237 		uint64_t offset = zio->io_offset;
    238 
    239 		if (offset < VDEV_LABEL_START_SIZE ||
    240 		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
    241 		return (0);
    242 	}
    243 
    244 	rw_enter(&inject_lock, RW_READER);
    245 
    246 	for (handler = list_head(&inject_handlers); handler != NULL;
    247 	    handler = list_next(&inject_handlers, handler)) {
    248 
    249 		/*
    250 		 * Ignore label specific faults, panic injection
    251 		 * or fake writes
    252 		 */
    253 		if (handler->zi_record.zi_start != 0 ||
    254 		    handler->zi_record.zi_func[0] != '\0' ||
    255 		    handler->zi_record.zi_duration != 0)
    256 			continue;
    257 
    258 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
    259 			if (handler->zi_record.zi_failfast &&
    260 			    (zio == NULL || (zio->io_flags &
    261 			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
    262 				continue;
    263 			}
    264 
    265 			/* Handle type specific I/O failures */
    266 			if (zio != NULL &&
    267 			    handler->zi_record.zi_iotype != ZIO_TYPES &&
    268 			    handler->zi_record.zi_iotype != zio->io_type)
    269 				continue;
    270 
    271 			if (handler->zi_record.zi_error == error) {
    272 				/*
    273 				 * For a failed open, pretend like the device
    274 				 * has gone away.
    275 				 */
    276 				if (error == ENXIO)
    277 					vd->vdev_stat.vs_aux =
    278 					    VDEV_AUX_OPEN_FAILED;
    279 				ret = error;
    280 				break;
    281 			}
    282 			if (handler->zi_record.zi_error == ENXIO) {
    283 				ret = EIO;
    284 				break;
    285 			}
    286 		}
    287 	}
    288 
    289 	rw_exit(&inject_lock);
    290 
    291 	return (ret);
    292 }
    293 
    294 /*
    295  * Simulate hardware that ignores cache flushes.  For requested number
    296  * of seconds nix the actual writing to disk.
    297  */
    298 void
    299 zio_handle_ignored_writes(zio_t *zio)
    300 {
    301 	inject_handler_t *handler;
    302 
    303 	rw_enter(&inject_lock, RW_READER);
    304 
    305 	for (handler = list_head(&inject_handlers); handler != NULL;
    306 	    handler = list_next(&inject_handlers, handler)) {
    307 
    308 		/* Ignore errors not destined for this pool */
    309 		if (zio->io_spa != handler->zi_spa)
    310 			continue;
    311 
    312 		if (handler->zi_record.zi_duration == 0)
    313 			continue;
    314 
    315 		/*
    316 		 * Positive duration implies # of seconds, negative
    317 		 * a number of txgs
    318 		 */
    319 		if (handler->zi_record.zi_timer == 0) {
    320 			if (handler->zi_record.zi_duration > 0)
    321 				handler->zi_record.zi_timer = lbolt64;
    322 			else
    323 				handler->zi_record.zi_timer = zio->io_txg;
    324 		}
    325 		zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
    326 		break;
    327 	}
    328 
    329 	rw_exit(&inject_lock);
    330 }
    331 
    332 void
    333 spa_handle_ignored_writes(spa_t *spa)
    334 {
    335 	inject_handler_t *handler;
    336 
    337 	if (zio_injection_enabled == 0)
    338 		return;
    339 
    340 	rw_enter(&inject_lock, RW_READER);
    341 
    342 	for (handler = list_head(&inject_handlers); handler != NULL;
    343 	    handler = list_next(&inject_handlers, handler)) {
    344 
    345 		/* Ignore errors not destined for this pool */
    346 		if (spa != handler->zi_spa)
    347 			continue;
    348 
    349 		if (handler->zi_record.zi_duration == 0)
    350 			continue;
    351 
    352 		if (handler->zi_record.zi_duration > 0) {
    353 			VERIFY(handler->zi_record.zi_timer == 0 ||
    354 			    handler->zi_record.zi_timer +
    355 			    handler->zi_record.zi_duration * hz > lbolt64);
    356 		} else {
    357 			/* duration is negative so the subtraction here adds */
    358 			VERIFY(handler->zi_record.zi_timer == 0 ||
    359 			    handler->zi_record.zi_timer -
    360 			    handler->zi_record.zi_duration >=
    361 			    spa_syncing_txg(spa));
    362 		}
    363 	}
    364 
    365 	rw_exit(&inject_lock);
    366 }
    367 
    368 /*
    369  * Create a new handler for the given record.  We add it to the list, adding
    370  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
    371  * which is the switch to trigger all fault injection.
    372  */
    373 int
    374 zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
    375 {
    376 	inject_handler_t *handler;
    377 	int error;
    378 	spa_t *spa;
    379 
    380 	/*
    381 	 * If this is pool-wide metadata, make sure we unload the corresponding
    382 	 * spa_t, so that the next attempt to load it will trigger the fault.
    383 	 * We call spa_reset() to unload the pool appropriately.
    384 	 */
    385 	if (flags & ZINJECT_UNLOAD_SPA)
    386 		if ((error = spa_reset(name)) != 0)
    387 			return (error);
    388 
    389 	if (!(flags & ZINJECT_NULL)) {
    390 		/*
    391 		 * spa_inject_ref() will add an injection reference, which will
    392 		 * prevent the pool from being removed from the namespace while
    393 		 * still allowing it to be unloaded.
    394 		 */
    395 		if ((spa = spa_inject_addref(name)) == NULL)
    396 			return (ENOENT);
    397 
    398 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
    399 
    400 		rw_enter(&inject_lock, RW_WRITER);
    401 
    402 		*id = handler->zi_id = inject_next_id++;
    403 		handler->zi_spa = spa;
    404 		handler->zi_record = *record;
    405 		list_insert_tail(&inject_handlers, handler);
    406 		atomic_add_32(&zio_injection_enabled, 1);
    407 
    408 		rw_exit(&inject_lock);
    409 	}
    410 
    411 	/*
    412 	 * Flush the ARC, so that any attempts to read this data will end up
    413 	 * going to the ZIO layer.  Note that this is a little overkill, but
    414 	 * we don't have the necessary ARC interfaces to do anything else, and
    415 	 * fault injection isn't a performance critical path.
    416 	 */
    417 	if (flags & ZINJECT_FLUSH_ARC)
    418 		arc_flush(NULL);
    419 
    420 	return (0);
    421 }
    422 
    423 /*
    424  * Returns the next record with an ID greater than that supplied to the
    425  * function.  Used to iterate over all handlers in the system.
    426  */
    427 int
    428 zio_inject_list_next(int *id, char *name, size_t buflen,
    429     zinject_record_t *record)
    430 {
    431 	inject_handler_t *handler;
    432 	int ret;
    433 
    434 	mutex_enter(&spa_namespace_lock);
    435 	rw_enter(&inject_lock, RW_READER);
    436 
    437 	for (handler = list_head(&inject_handlers); handler != NULL;
    438 	    handler = list_next(&inject_handlers, handler))
    439 		if (handler->zi_id > *id)
    440 			break;
    441 
    442 	if (handler) {
    443 		*record = handler->zi_record;
    444 		*id = handler->zi_id;
    445 		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
    446 		ret = 0;
    447 	} else {
    448 		ret = ENOENT;
    449 	}
    450 
    451 	rw_exit(&inject_lock);
    452 	mutex_exit(&spa_namespace_lock);
    453 
    454 	return (ret);
    455 }
    456 
    457 /*
    458  * Clear the fault handler with the given identifier, or return ENOENT if none
    459  * exists.
    460  */
    461 int
    462 zio_clear_fault(int id)
    463 {
    464 	inject_handler_t *handler;
    465 	int ret;
    466 
    467 	rw_enter(&inject_lock, RW_WRITER);
    468 
    469 	for (handler = list_head(&inject_handlers); handler != NULL;
    470 	    handler = list_next(&inject_handlers, handler))
    471 		if (handler->zi_id == id)
    472 			break;
    473 
    474 	if (handler == NULL) {
    475 		ret = ENOENT;
    476 	} else {
    477 		list_remove(&inject_handlers, handler);
    478 		spa_inject_delref(handler->zi_spa);
    479 		kmem_free(handler, sizeof (inject_handler_t));
    480 		atomic_add_32(&zio_injection_enabled, -1);
    481 		ret = 0;
    482 	}
    483 
    484 	rw_exit(&inject_lock);
    485 
    486 	return (ret);
    487 }
    488 
    489 void
    490 zio_inject_init(void)
    491 {
    492 	rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
    493 	list_create(&inject_handlers, sizeof (inject_handler_t),
    494 	    offsetof(inject_handler_t, zi_link));
    495 }
    496 
    497 void
    498 zio_inject_fini(void)
    499 {
    500 	list_destroy(&inject_handlers);
    501 	rw_destroy(&inject_lock);
    502 }
    503