OpenGrok

Cross Reference: zio_inject.c
xref: /onnv/onnv-gate/usr/src/uts/common/fs/zfs/zio_inject.c
Home | History | Annotate | Line # | Download | only in zfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
     23  */
     24 
     25 /*
     26  * ZFS fault injection
     27  *
     28  * To handle fault injection, we keep track of a series of zinject_record_t
     29  * structures which describe which logical block(s) should be injected with a
     30  * fault.  These are kept in a global list.  Each record corresponds to a given
     31  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
     32  * or exported while the injection record exists.
     33  *
     34  * Device level injection is done using the 'zi_guid' field.  If this is set, it
     35  * means that the error is destined for a particular device, not a piece of
     36  * data.
     37  *
     38  * This is a rather poor data structure and algorithm, but we don't expect more
     39  * than a few faults at any one time, so it should be sufficient for our needs.
     40  */
     41 
     42 #include <sys/arc.h>
     43 #include <sys/zio_impl.h>
     44 #include <sys/zfs_ioctl.h>
     45 #include <sys/vdev_impl.h>
     46 #include <sys/dmu_objset.h>
     47 #include <sys/fs/zfs.h>
     48 
     49 uint32_t zio_injection_enabled;
     50 
     51 typedef struct inject_handler {
     52 	int			zi_id;
     53 	spa_t			*zi_spa;
     54 	zinject_record_t	zi_record;
     55 	list_node_t		zi_link;
     56 } inject_handler_t;
     57 
     58 static list_t inject_handlers;
     59 static krwlock_t inject_lock;
     60 static int inject_next_id = 1;
     61 
     62 /*
     63  * Returns true if the given record matches the I/O in progress.
     64  */
     65 static boolean_t
     66 zio_match_handler(zbookmark_t *zb, uint64_t type,
     67     zinject_record_t *record, int error)
     68 {
     69 	/*
     70 	 * Check for a match against the MOS, which is based on type
     71 	 */
     72 	if (zb->zb_objset == DMU_META_OBJSET &&
     73 	    record->zi_objset == DMU_META_OBJSET &&
     74 	    record->zi_object == DMU_META_DNODE_OBJECT) {
     75 		if (record->zi_type == DMU_OT_NONE ||
     76 		    type == record->zi_type)
     77 			return (record->zi_freq == 0 ||
     78 			    spa_get_random(100) < record->zi_freq);
     79 		else
     80 			return (B_FALSE);
     81 	}
     82 
     83 	/*
     84 	 * Check for an exact match.
     85 	 */
     86 	if (zb->zb_objset == record->zi_objset &&
     87 	    zb->zb_object == record->zi_object &&
     88 	    zb->zb_level == record->zi_level &&
     89 	    zb->zb_blkid >= record->zi_start &&
     90 	    zb->zb_blkid <= record->zi_end &&
     91 	    error == record->zi_error)
     92 		return (record->zi_freq == 0 ||
     93 		    spa_get_random(100) < record->zi_freq);
     94 
     95 	return (B_FALSE);
     96 }
     97 
     98 /*
     99  * Panic the system when a config change happens in the function
    100  * specified by tag.
    101  */
    102 void
    103 zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type)
    104 {
    105 	inject_handler_t *handler;
    106 
    107 	rw_enter(&inject_lock, RW_READER);
    108 
    109 	for (handler = list_head(&inject_handlers); handler != NULL;
    110 	    handler = list_next(&inject_handlers, handler)) {
    111 
    112 		if (spa != handler->zi_spa)
    113 			continue;
    114 
    115 		if (handler->zi_record.zi_type == type &&
    116 		    strcmp(tag, handler->zi_record.zi_func) == 0)
    117 			panic("Panic requested in function %s\n", tag);
    118 	}
    119 
    120 	rw_exit(&inject_lock);
    121 }
    122 
    123 /*
    124  * Determine if the I/O in question should return failure.  Returns the errno
    125  * to be returned to the caller.
    126  */
    127 int
    128 zio_handle_fault_injection(zio_t *zio, int error)
    129 {
    130 	int ret = 0;
    131 	inject_handler_t *handler;
    132 
    133 	/*
    134 	 * Ignore I/O not associated with any logical data.
    135 	 */
    136 	if (zio->io_logical == NULL)
    137 		return (0);
    138 
    139 	/*
    140 	 * Currently, we only support fault injection on reads.
    141 	 */
    142 	if (zio->io_type != ZIO_TYPE_READ)
    143 		return (0);
    144 
    145 	rw_enter(&inject_lock, RW_READER);
    146 
    147 	for (handler = list_head(&inject_handlers); handler != NULL;
    148 	    handler = list_next(&inject_handlers, handler)) {
    149 
    150 		/* Ignore errors not destined for this pool */
    151 		if (zio->io_spa != handler->zi_spa)
    152 			continue;
    153 
    154 		/* Ignore device errors and panic injection */
    155 		if (handler->zi_record.zi_guid != 0 ||
    156 		    handler->zi_record.zi_func[0] != '\0' ||
    157 		    handler->zi_record.zi_duration != 0)
    158 			continue;
    159 
    160 		/* If this handler matches, return EIO */
    161 		if (zio_match_handler(&zio->io_logical->io_bookmark,
    162 		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
    163 		    &handler->zi_record, error)) {
    164 			ret = error;
    165 			break;
    166 		}
    167 	}
    168 
    169 	rw_exit(&inject_lock);
    170 
    171 	return (ret);
    172 }
    173 
    174 /*
    175  * Determine if the zio is part of a label update and has an injection
    176  * handler associated with that portion of the label. Currently, we
    177  * allow error injection in either the nvlist or the uberblock region of
    178  * of the vdev label.
    179  */
    180 int
    181 zio_handle_label_injection(zio_t *zio, int error)
    182 {
    183 	inject_handler_t *handler;
    184 	vdev_t *vd = zio->io_vd;
    185 	uint64_t offset = zio->io_offset;
    186 	int label;
    187 	int ret = 0;
    188 
    189 	if (offset >= VDEV_LABEL_START_SIZE &&
    190 	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
    191 		return (0);
    192 
    193 	rw_enter(&inject_lock, RW_READER);
    194 
    195 	for (handler = list_head(&inject_handlers); handler != NULL;
    196 	    handler = list_next(&inject_handlers, handler)) {
    197 		uint64_t start = handler->zi_record.zi_start;
    198 		uint64_t end = handler->zi_record.zi_end;
    199 
    200 		/* Ignore device only faults or panic injection */
    201 		if (handler->zi_record.zi_start == 0 ||
    202 		    handler->zi_record.zi_func[0] != '\0' ||
    203 		    handler->zi_record.zi_duration != 0)
    204 			continue;
    205 
    206 		/*
    207 		 * The injection region is the relative offsets within a
    208 		 * vdev label. We must determine the label which is being
    209 		 * updated and adjust our region accordingly.
    210 		 */
    211 		label = vdev_label_number(vd->vdev_psize, offset);
    212 		start = vdev_label_offset(vd->vdev_psize, label, start);
    213 		end = vdev_label_offset(vd->vdev_psize, label, end);
    214 
    215 		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
    216 		    (offset >= start && offset <= end)) {
    217 			ret = error;
    218 			break;
    219 		}
    220 	}
    221 	rw_exit(&inject_lock);
    222 	return (ret);
    223 }
    224 
    225 
    226 int
    227 zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
    228 {
    229 	inject_handler_t *handler;
    230 	int ret = 0;
    231 
    232 	/*
    233 	 * We skip over faults in the labels unless it's during
    234 	 * device open (i.e. zio == NULL).
    235 	 */
    236 	if (zio != NULL) {
    237 		uint64_t offset = zio->io_offset;
    238 
    239 		if (offset < VDEV_LABEL_START_SIZE ||
    240 		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
    241 			return (0);
    242 	}
    243 
    244 	rw_enter(&inject_lock, RW_READER);
    245 
    246 	for (handler = list_head(&inject_handlers); handler != NULL;
    247 	    handler = list_next(&inject_handlers, handler)) {
    248 
    249 		/*
    250 		 * Ignore label specific faults, panic injection
    251 		 * or fake writes
    252 		 */
    253 		if (handler->zi_record.zi_start != 0 ||
    254 		    handler->zi_record.zi_func[0] != '\0' ||
    255 		    handler->zi_record.zi_duration != 0)
    256 			continue;
    257 
    258 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
    259 			if (handler->zi_record.zi_failfast &&
    260 			    (zio == NULL || (zio->io_flags &
    261 			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
    262 				continue;
    263 			}
    264 
    265 			/* Handle type specific I/O failures */
    266 			if (zio != NULL &&
    267 			    handler->zi_record.zi_iotype != ZIO_TYPES &&
    268 			    handler->zi_record.zi_iotype != zio->io_type)
    269 				continue;
    270 
    271 			if (handler->zi_record.zi_error == error) {
    272 				/*
    273 				 * For a failed open, pretend like the device
    274 				 * has gone away.
    275 				 */
    276 				if (error == ENXIO)
    277 					vd->vdev_stat.vs_aux =
    278 					    VDEV_AUX_OPEN_FAILED;
    279 
    280 				/*
    281 				 * Treat these errors as if they had been
    282 				 * retried so that all the appropriate stats
    283 				 * and FMA events are generated.
    284 				 */
    285 				if (!handler->zi_record.zi_failfast &&
    286 				    zio != NULL)
    287 					zio->io_flags |= ZIO_FLAG_IO_RETRY;
    288 
    289 				ret = error;
    290 				break;
    291 			}
    292 			if (handler->zi_record.zi_error == ENXIO) {
    293 				ret = EIO;
    294 				break;
    295 			}
    296 		}
    297 	}
    298 
    299 	rw_exit(&inject_lock);
    300 
    301 	return (ret);
    302 }
    303 
    304 /*
    305  * Simulate hardware that ignores cache flushes.  For requested number
    306  * of seconds nix the actual writing to disk.
    307  */
    308 void
    309 zio_handle_ignored_writes(zio_t *zio)
    310 {
    311 	inject_handler_t *handler;
    312 
    313 	rw_enter(&inject_lock, RW_READER);
    314 
    315 	for (handler = list_head(&inject_handlers); handler != NULL;
    316 	    handler = list_next(&inject_handlers, handler)) {
    317 
    318 		/* Ignore errors not destined for this pool */
    319 		if (zio->io_spa != handler->zi_spa)
    320 			continue;
    321 
    322 		if (handler->zi_record.zi_duration == 0)
    323 			continue;
    324 
    325 		/*
    326 		 * Positive duration implies # of seconds, negative
    327 		 * a number of txgs
    328 		 */
    329 		if (handler->zi_record.zi_timer == 0) {
    330 			if (handler->zi_record.zi_duration > 0)
    331 				handler->zi_record.zi_timer = ddi_get_lbolt64();
    332 			else
    333 				handler->zi_record.zi_timer = zio->io_txg;
    334 		}
    335 
    336 		/* Have a "problem" writing 60% of the time */
    337 		if (spa_get_random(100) < 60)
    338 			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
    339 		break;
    340 	}
    341 
    342 	rw_exit(&inject_lock);
    343 }
    344 
    345 void
    346 spa_handle_ignored_writes(spa_t *spa)
    347 {
    348 	inject_handler_t *handler;
    349 
    350 	if (zio_injection_enabled == 0)
    351 		return;
    352 
    353 	rw_enter(&inject_lock, RW_READER);
    354 
    355 	for (handler = list_head(&inject_handlers); handler != NULL;
    356 	    handler = list_next(&inject_handlers, handler)) {
    357 
    358 		/* Ignore errors not destined for this pool */
    359 		if (spa != handler->zi_spa)
    360 			continue;
    361 
    362 		if (handler->zi_record.zi_duration == 0)
    363 			continue;
    364 
    365 		if (handler->zi_record.zi_duration > 0) {
    366 			VERIFY(handler->zi_record.zi_timer == 0 ||
    367 			    handler->zi_record.zi_timer +
    368 			    handler->zi_record.zi_duration * hz >
    369 			    ddi_get_lbolt64());
    370 		} else {
    371 			/* duration is negative so the subtraction here adds */
    372 			VERIFY(handler->zi_record.zi_timer == 0 ||
    373 			    handler->zi_record.zi_timer -
    374 			    handler->zi_record.zi_duration >=
    375 			    spa_syncing_txg(spa));
    376 		}
    377 	}
    378 
    379 	rw_exit(&inject_lock);
    380 }
    381 
    382 /*
    383  * Create a new handler for the given record.  We add it to the list, adding
    384  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
    385  * which is the switch to trigger all fault injection.
    386  */
    387 int
    388 zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
    389 {
    390 	inject_handler_t *handler;
    391 	int error;
    392 	spa_t *spa;
    393 
    394 	/*
    395 	 * If this is pool-wide metadata, make sure we unload the corresponding
    396 	 * spa_t, so that the next attempt to load it will trigger the fault.
    397 	 * We call spa_reset() to unload the pool appropriately.
    398 	 */
    399 	if (flags & ZINJECT_UNLOAD_SPA)
    400 		if ((error = spa_reset(name)) != 0)
    401 			return (error);
    402 
    403 	if (!(flags & ZINJECT_NULL)) {
    404 		/*
    405 		 * spa_inject_ref() will add an injection reference, which will
    406 		 * prevent the pool from being removed from the namespace while
    407 		 * still allowing it to be unloaded.
    408 		 */
    409 		if ((spa = spa_inject_addref(name)) == NULL)
    410 			return (ENOENT);
    411 
    412 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
    413 
    414 		rw_enter(&inject_lock, RW_WRITER);
    415 
    416 		*id = handler->zi_id = inject_next_id++;
    417 		handler->zi_spa = spa;
    418 		handler->zi_record = *record;
    419 		list_insert_tail(&inject_handlers, handler);
    420 		atomic_add_32(&zio_injection_enabled, 1);
    421 
    422 		rw_exit(&inject_lock);
    423 	}
    424 
    425 	/*
    426 	 * Flush the ARC, so that any attempts to read this data will end up
    427 	 * going to the ZIO layer.  Note that this is a little overkill, but
    428 	 * we don't have the necessary ARC interfaces to do anything else, and
    429 	 * fault injection isn't a performance critical path.
    430 	 */
    431 	if (flags & ZINJECT_FLUSH_ARC)
    432 		arc_flush(NULL);
    433 
    434 	return (0);
    435 }
    436 
    437 /*
    438  * Returns the next record with an ID greater than that supplied to the
    439  * function.  Used to iterate over all handlers in the system.
    440  */
    441 int
    442 zio_inject_list_next(int *id, char *name, size_t buflen,
    443     zinject_record_t *record)
    444 {
    445 	inject_handler_t *handler;
    446 	int ret;
    447 
    448 	mutex_enter(&spa_namespace_lock);
    449 	rw_enter(&inject_lock, RW_READER);
    450 
    451 	for (handler = list_head(&inject_handlers); handler != NULL;
    452 	    handler = list_next(&inject_handlers, handler))
    453 		if (handler->zi_id > *id)
    454 			break;
    455 
    456 	if (handler) {
    457 		*record = handler->zi_record;
    458 		*id = handler->zi_id;
    459 		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
    460 		ret = 0;
    461 	} else {
    462 		ret = ENOENT;
    463 	}
    464 
    465 	rw_exit(&inject_lock);
    466 	mutex_exit(&spa_namespace_lock);
    467 
    468 	return (ret);
    469 }
    470 
    471 /*
    472  * Clear the fault handler with the given identifier, or return ENOENT if none
    473  * exists.
    474  */
    475 int
    476 zio_clear_fault(int id)
    477 {
    478 	inject_handler_t *handler;
    479 
    480 	rw_enter(&inject_lock, RW_WRITER);
    481 
    482 	for (handler = list_head(&inject_handlers); handler != NULL;
    483 	    handler = list_next(&inject_handlers, handler))
    484 		if (handler->zi_id == id)
    485 			break;
    486 
    487 	if (handler == NULL) {
    488 		rw_exit(&inject_lock);
    489 		return (ENOENT);
    490 	}
    491 
    492 	list_remove(&inject_handlers, handler);
    493 	rw_exit(&inject_lock);
    494 
    495 	spa_inject_delref(handler->zi_spa);
    496 	kmem_free(handler, sizeof (inject_handler_t));
    497 	atomic_add_32(&zio_injection_enabled, -1);
    498 
    499 	return (0);
    500 }
    501 
    502 void
    503 zio_inject_init(void)
    504 {
    505 	rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
    506 	list_create(&inject_handlers, sizeof (inject_handler_t),
    507 	    offsetof(inject_handler_t, zi_link));
    508 }
    509 
    510 void
    511 zio_inject_fini(void)
    512 {
    513 	list_destroy(&inject_handlers);
    514 	rw_destroy(&inject_lock);
    515 }
    516