xref: /onnv-gate/usr/src/uts/common/fs/zfs/zio_inject.c (revision 5642:504c84876fda)
11544Seschrock /*
21544Seschrock  * CDDL HEADER START
31544Seschrock  *
41544Seschrock  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
71544Seschrock  *
81544Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91544Seschrock  * or http://www.opensolaris.org/os/licensing.
101544Seschrock  * See the License for the specific language governing permissions
111544Seschrock  * and limitations under the License.
121544Seschrock  *
131544Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
141544Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151544Seschrock  * If applicable, add the following below this CDDL HEADER, with the
161544Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
171544Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
181544Seschrock  *
191544Seschrock  * CDDL HEADER END
201544Seschrock  */
211544Seschrock /*
22*5642Smaybee  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
231544Seschrock  * Use is subject to license terms.
241544Seschrock  */
251544Seschrock 
261544Seschrock #pragma ident	"%Z%%M%	%I%	%E% SMI"
271544Seschrock 
281544Seschrock /*
291544Seschrock  * ZFS fault injection
301544Seschrock  *
311544Seschrock  * To handle fault injection, we keep track of a series of zinject_record_t
321544Seschrock  * structures which describe which logical block(s) should be injected with a
331544Seschrock  * fault.  These are kept in a global list.  Each record corresponds to a given
341544Seschrock  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
351544Seschrock  * or exported while the injection record exists.
361544Seschrock  *
371544Seschrock  * Device level injection is done using the 'zi_guid' field.  If this is set, it
381544Seschrock  * means that the error is destined for a particular device, not a piece of
391544Seschrock  * data.
401544Seschrock  *
411544Seschrock  * This is a rather poor data structure and algorithm, but we don't expect more
421544Seschrock  * than a few faults at any one time, so it should be sufficient for our needs.
431544Seschrock  */
441544Seschrock 
451544Seschrock #include <sys/arc.h>
461544Seschrock #include <sys/zio_impl.h>
471544Seschrock #include <sys/zfs_ioctl.h>
481544Seschrock #include <sys/spa_impl.h>
491544Seschrock #include <sys/vdev_impl.h>
501544Seschrock 
511544Seschrock uint32_t zio_injection_enabled;
521544Seschrock 
531544Seschrock typedef struct inject_handler {
541544Seschrock 	int			zi_id;
551544Seschrock 	spa_t			*zi_spa;
561544Seschrock 	zinject_record_t	zi_record;
571544Seschrock 	list_node_t		zi_link;
581544Seschrock } inject_handler_t;
591544Seschrock 
601544Seschrock static list_t inject_handlers;
611544Seschrock static krwlock_t inject_lock;
621544Seschrock static int inject_next_id = 1;
631544Seschrock 
641544Seschrock /*
651544Seschrock  * Returns true if the given record matches the I/O in progress.
661544Seschrock  */
671544Seschrock static boolean_t
681544Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type,
691544Seschrock     zinject_record_t *record, int error)
701544Seschrock {
711544Seschrock 	/*
721544Seschrock 	 * Check for a match against the MOS, which is based on type
731544Seschrock 	 */
741544Seschrock 	if (zb->zb_objset == 0 && record->zi_objset == 0 &&
751544Seschrock 	    record->zi_object == 0) {
761544Seschrock 		if (record->zi_type == DMU_OT_NONE ||
771544Seschrock 		    type == record->zi_type)
781544Seschrock 			return (record->zi_freq == 0 ||
791544Seschrock 			    spa_get_random(100) < record->zi_freq);
801544Seschrock 		else
811544Seschrock 			return (B_FALSE);
821544Seschrock 	}
831544Seschrock 
841544Seschrock 	/*
851544Seschrock 	 * Check for an exact match.
861544Seschrock 	 */
871544Seschrock 	if (zb->zb_objset == record->zi_objset &&
881544Seschrock 	    zb->zb_object == record->zi_object &&
891544Seschrock 	    zb->zb_level == record->zi_level &&
901544Seschrock 	    zb->zb_blkid >= record->zi_start &&
911544Seschrock 	    zb->zb_blkid <= record->zi_end &&
921544Seschrock 	    error == record->zi_error)
931544Seschrock 		return (record->zi_freq == 0 ||
941544Seschrock 		    spa_get_random(100) < record->zi_freq);
951544Seschrock 
961544Seschrock 	return (B_FALSE);
971544Seschrock }
981544Seschrock 
991544Seschrock /*
1001544Seschrock  * Determine if the I/O in question should return failure.  Returns the errno
1011544Seschrock  * to be returned to the caller.
1021544Seschrock  */
1031544Seschrock int
1041544Seschrock zio_handle_fault_injection(zio_t *zio, int error)
1051544Seschrock {
1061544Seschrock 	int ret = 0;
1071544Seschrock 	inject_handler_t *handler;
1081544Seschrock 
1091544Seschrock 	/*
1101544Seschrock 	 * Ignore I/O not associated with any logical data.
1111544Seschrock 	 */
1121544Seschrock 	if (zio->io_logical == NULL)
1131544Seschrock 		return (0);
1141544Seschrock 
1151544Seschrock 	/*
1161544Seschrock 	 * Currently, we only support fault injection on reads.
1171544Seschrock 	 */
1181544Seschrock 	if (zio->io_type != ZIO_TYPE_READ)
1191544Seschrock 		return (0);
1201544Seschrock 
1211544Seschrock 	rw_enter(&inject_lock, RW_READER);
1221544Seschrock 
1231544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
1241544Seschrock 	    handler = list_next(&inject_handlers, handler)) {
1251544Seschrock 
1261544Seschrock 		/* Ignore errors not destined for this pool */
1271544Seschrock 		if (zio->io_spa != handler->zi_spa)
1281544Seschrock 			continue;
1291544Seschrock 
1301544Seschrock 		/* Ignore device errors */
1311544Seschrock 		if (handler->zi_record.zi_guid != 0)
1321544Seschrock 			continue;
1331544Seschrock 
1341544Seschrock 		/* If this handler matches, return EIO */
1351544Seschrock 		if (zio_match_handler(&zio->io_logical->io_bookmark,
1361544Seschrock 		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
1371544Seschrock 		    &handler->zi_record, error)) {
1381544Seschrock 			ret = error;
1391544Seschrock 			break;
1401544Seschrock 		}
1411544Seschrock 	}
1421544Seschrock 
1431544Seschrock 	rw_exit(&inject_lock);
1441544Seschrock 
1451544Seschrock 	return (ret);
1461544Seschrock }
1471544Seschrock 
1481544Seschrock int
1491544Seschrock zio_handle_device_injection(vdev_t *vd, int error)
1501544Seschrock {
1511544Seschrock 	inject_handler_t *handler;
1521544Seschrock 	int ret = 0;
1531544Seschrock 
1541544Seschrock 	rw_enter(&inject_lock, RW_READER);
1551544Seschrock 
1561544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
1571544Seschrock 	    handler = list_next(&inject_handlers, handler)) {
1581544Seschrock 
1591544Seschrock 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
1601544Seschrock 			if (handler->zi_record.zi_error == error) {
1611544Seschrock 				/*
1621544Seschrock 				 * For a failed open, pretend like the device
1631544Seschrock 				 * has gone away.
1641544Seschrock 				 */
1651544Seschrock 				if (error == ENXIO)
1661544Seschrock 					vd->vdev_stat.vs_aux =
1671544Seschrock 					    VDEV_AUX_OPEN_FAILED;
1681544Seschrock 				ret = error;
1691544Seschrock 				break;
1701544Seschrock 			}
1711544Seschrock 			if (handler->zi_record.zi_error == ENXIO) {
1721544Seschrock 				ret = EIO;
1731544Seschrock 				break;
1741544Seschrock 			}
1751544Seschrock 		}
1761544Seschrock 	}
1771544Seschrock 
1781544Seschrock 	rw_exit(&inject_lock);
1791544Seschrock 
1801544Seschrock 	return (ret);
1811544Seschrock }
1821544Seschrock 
1831544Seschrock /*
1841544Seschrock  * Create a new handler for the given record.  We add it to the list, adding
1851544Seschrock  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
1861544Seschrock  * which is the switch to trigger all fault injection.
1871544Seschrock  */
1881544Seschrock int
1891544Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
1901544Seschrock {
1911544Seschrock 	inject_handler_t *handler;
1921544Seschrock 	int error;
1931544Seschrock 	spa_t *spa;
1941544Seschrock 
1951544Seschrock 	/*
1961544Seschrock 	 * If this is pool-wide metadata, make sure we unload the corresponding
1971544Seschrock 	 * spa_t, so that the next attempt to load it will trigger the fault.
1981544Seschrock 	 * We call spa_reset() to unload the pool appropriately.
1991544Seschrock 	 */
2001544Seschrock 	if (flags & ZINJECT_UNLOAD_SPA)
2011544Seschrock 		if ((error = spa_reset(name)) != 0)
2021544Seschrock 			return (error);
2031544Seschrock 
2041544Seschrock 	if (!(flags & ZINJECT_NULL)) {
2051544Seschrock 		/*
2061544Seschrock 		 * spa_inject_ref() will add an injection reference, which will
2071544Seschrock 		 * prevent the pool from being removed from the namespace while
2081544Seschrock 		 * still allowing it to be unloaded.
2091544Seschrock 		 */
2101544Seschrock 		if ((spa = spa_inject_addref(name)) == NULL)
2111544Seschrock 			return (ENOENT);
2121544Seschrock 
2131544Seschrock 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
2141544Seschrock 
2151544Seschrock 		rw_enter(&inject_lock, RW_WRITER);
2161544Seschrock 
2171544Seschrock 		*id = handler->zi_id = inject_next_id++;
2181544Seschrock 		handler->zi_spa = spa;
2191544Seschrock 		handler->zi_record = *record;
2201544Seschrock 		list_insert_tail(&inject_handlers, handler);
2211544Seschrock 		atomic_add_32(&zio_injection_enabled, 1);
2221544Seschrock 
2231544Seschrock 		rw_exit(&inject_lock);
2241544Seschrock 	}
2251544Seschrock 
2261544Seschrock 	/*
2271544Seschrock 	 * Flush the ARC, so that any attempts to read this data will end up
2281544Seschrock 	 * going to the ZIO layer.  Note that this is a little overkill, but
2291544Seschrock 	 * we don't have the necessary ARC interfaces to do anything else, and
2301544Seschrock 	 * fault injection isn't a performance critical path.
2311544Seschrock 	 */
2321544Seschrock 	if (flags & ZINJECT_FLUSH_ARC)
233*5642Smaybee 		arc_flush(NULL);
2341544Seschrock 
2351544Seschrock 	return (0);
2361544Seschrock }
2371544Seschrock 
2381544Seschrock /*
2391544Seschrock  * Returns the next record with an ID greater than that supplied to the
2401544Seschrock  * function.  Used to iterate over all handlers in the system.
2411544Seschrock  */
2421544Seschrock int
2431544Seschrock zio_inject_list_next(int *id, char *name, size_t buflen,
2441544Seschrock     zinject_record_t *record)
2451544Seschrock {
2461544Seschrock 	inject_handler_t *handler;
2471544Seschrock 	int ret;
2481544Seschrock 
2491544Seschrock 	mutex_enter(&spa_namespace_lock);
2501544Seschrock 	rw_enter(&inject_lock, RW_READER);
2511544Seschrock 
2521544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
2531544Seschrock 	    handler = list_next(&inject_handlers, handler))
2541544Seschrock 		if (handler->zi_id > *id)
2551544Seschrock 			break;
2561544Seschrock 
2571544Seschrock 	if (handler) {
2581544Seschrock 		*record = handler->zi_record;
2591544Seschrock 		*id = handler->zi_id;
2601544Seschrock 		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
2611544Seschrock 		ret = 0;
2621544Seschrock 	} else {
2631544Seschrock 		ret = ENOENT;
2641544Seschrock 	}
2651544Seschrock 
2661544Seschrock 	rw_exit(&inject_lock);
2671544Seschrock 	mutex_exit(&spa_namespace_lock);
2681544Seschrock 
2691544Seschrock 	return (ret);
2701544Seschrock }
2711544Seschrock 
2721544Seschrock /*
2731544Seschrock  * Clear the fault handler with the given identifier, or return ENOENT if none
2741544Seschrock  * exists.
2751544Seschrock  */
2761544Seschrock int
2771544Seschrock zio_clear_fault(int id)
2781544Seschrock {
2791544Seschrock 	inject_handler_t *handler;
2801544Seschrock 	int ret;
2811544Seschrock 
2821544Seschrock 	rw_enter(&inject_lock, RW_WRITER);
2831544Seschrock 
2841544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
2851544Seschrock 	    handler = list_next(&inject_handlers, handler))
2861544Seschrock 		if (handler->zi_id == id)
2871544Seschrock 			break;
2881544Seschrock 
2891544Seschrock 	if (handler == NULL) {
2901544Seschrock 		ret = ENOENT;
2911544Seschrock 	} else {
2921544Seschrock 		list_remove(&inject_handlers, handler);
2931544Seschrock 		spa_inject_delref(handler->zi_spa);
2941544Seschrock 		kmem_free(handler, sizeof (inject_handler_t));
2951544Seschrock 		atomic_add_32(&zio_injection_enabled, -1);
2961544Seschrock 		ret = 0;
2971544Seschrock 	}
2981544Seschrock 
2991544Seschrock 	rw_exit(&inject_lock);
3001544Seschrock 
3011544Seschrock 	return (ret);
3021544Seschrock }
3031544Seschrock 
3041544Seschrock void
3051544Seschrock zio_inject_init(void)
3061544Seschrock {
3071544Seschrock 	list_create(&inject_handlers, sizeof (inject_handler_t),
3081544Seschrock 	    offsetof(inject_handler_t, zi_link));
3091544Seschrock }
3101544Seschrock 
3111544Seschrock void
3121544Seschrock zio_inject_fini(void)
3131544Seschrock {
3141544Seschrock 	list_destroy(&inject_handlers);
3151544Seschrock }
316