xref: /onnv-gate/usr/src/uts/common/fs/zfs/zio_inject.c (revision 12662:b0fe99c06d14)
11544Seschrock /*
21544Seschrock  * CDDL HEADER START
31544Seschrock  *
41544Seschrock  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
71544Seschrock  *
81544Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91544Seschrock  * or http://www.opensolaris.org/os/licensing.
101544Seschrock  * See the License for the specific language governing permissions
111544Seschrock  * and limitations under the License.
121544Seschrock  *
131544Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
141544Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151544Seschrock  * If applicable, add the following below this CDDL HEADER, with the
161544Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
171544Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
181544Seschrock  *
191544Seschrock  * CDDL HEADER END
201544Seschrock  */
211544Seschrock /*
2212247SGeorge.Wilson@Sun.COM  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
231544Seschrock  */
241544Seschrock 
251544Seschrock /*
261544Seschrock  * ZFS fault injection
271544Seschrock  *
281544Seschrock  * To handle fault injection, we keep track of a series of zinject_record_t
291544Seschrock  * structures which describe which logical block(s) should be injected with a
301544Seschrock  * fault.  These are kept in a global list.  Each record corresponds to a given
311544Seschrock  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
321544Seschrock  * or exported while the injection record exists.
331544Seschrock  *
341544Seschrock  * Device level injection is done using the 'zi_guid' field.  If this is set, it
351544Seschrock  * means that the error is destined for a particular device, not a piece of
361544Seschrock  * data.
371544Seschrock  *
381544Seschrock  * This is a rather poor data structure and algorithm, but we don't expect more
391544Seschrock  * than a few faults at any one time, so it should be sufficient for our needs.
401544Seschrock  */
411544Seschrock 
421544Seschrock #include <sys/arc.h>
431544Seschrock #include <sys/zio_impl.h>
441544Seschrock #include <sys/zfs_ioctl.h>
451544Seschrock #include <sys/vdev_impl.h>
4610922SJeff.Bonwick@Sun.COM #include <sys/dmu_objset.h>
476615Sgw25295 #include <sys/fs/zfs.h>
481544Seschrock 
491544Seschrock uint32_t zio_injection_enabled;
501544Seschrock 
511544Seschrock typedef struct inject_handler {
521544Seschrock 	int			zi_id;
531544Seschrock 	spa_t			*zi_spa;
541544Seschrock 	zinject_record_t	zi_record;
551544Seschrock 	list_node_t		zi_link;
561544Seschrock } inject_handler_t;
571544Seschrock 
581544Seschrock static list_t inject_handlers;
591544Seschrock static krwlock_t inject_lock;
601544Seschrock static int inject_next_id = 1;
611544Seschrock 
621544Seschrock /*
631544Seschrock  * Returns true if the given record matches the I/O in progress.
641544Seschrock  */
651544Seschrock static boolean_t
zio_match_handler(zbookmark_t * zb,uint64_t type,zinject_record_t * record,int error)661544Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type,
671544Seschrock     zinject_record_t *record, int error)
681544Seschrock {
691544Seschrock 	/*
701544Seschrock 	 * Check for a match against the MOS, which is based on type
711544Seschrock 	 */
7210922SJeff.Bonwick@Sun.COM 	if (zb->zb_objset == DMU_META_OBJSET &&
7310922SJeff.Bonwick@Sun.COM 	    record->zi_objset == DMU_META_OBJSET &&
7410922SJeff.Bonwick@Sun.COM 	    record->zi_object == DMU_META_DNODE_OBJECT) {
751544Seschrock 		if (record->zi_type == DMU_OT_NONE ||
761544Seschrock 		    type == record->zi_type)
771544Seschrock 			return (record->zi_freq == 0 ||
781544Seschrock 			    spa_get_random(100) < record->zi_freq);
791544Seschrock 		else
801544Seschrock 			return (B_FALSE);
811544Seschrock 	}
821544Seschrock 
831544Seschrock 	/*
841544Seschrock 	 * Check for an exact match.
851544Seschrock 	 */
861544Seschrock 	if (zb->zb_objset == record->zi_objset &&
871544Seschrock 	    zb->zb_object == record->zi_object &&
881544Seschrock 	    zb->zb_level == record->zi_level &&
891544Seschrock 	    zb->zb_blkid >= record->zi_start &&
901544Seschrock 	    zb->zb_blkid <= record->zi_end &&
911544Seschrock 	    error == record->zi_error)
921544Seschrock 		return (record->zi_freq == 0 ||
931544Seschrock 		    spa_get_random(100) < record->zi_freq);
941544Seschrock 
951544Seschrock 	return (B_FALSE);
961544Seschrock }
971544Seschrock 
981544Seschrock /*
9910594SGeorge.Wilson@Sun.COM  * Panic the system when a config change happens in the function
10010594SGeorge.Wilson@Sun.COM  * specified by tag.
10110594SGeorge.Wilson@Sun.COM  */
10210594SGeorge.Wilson@Sun.COM void
zio_handle_panic_injection(spa_t * spa,char * tag,uint64_t type)10311422SMark.Musante@Sun.COM zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type)
10410594SGeorge.Wilson@Sun.COM {
10510594SGeorge.Wilson@Sun.COM 	inject_handler_t *handler;
10610594SGeorge.Wilson@Sun.COM 
10710594SGeorge.Wilson@Sun.COM 	rw_enter(&inject_lock, RW_READER);
10810594SGeorge.Wilson@Sun.COM 
10910594SGeorge.Wilson@Sun.COM 	for (handler = list_head(&inject_handlers); handler != NULL;
11010594SGeorge.Wilson@Sun.COM 	    handler = list_next(&inject_handlers, handler)) {
11110594SGeorge.Wilson@Sun.COM 
11210594SGeorge.Wilson@Sun.COM 		if (spa != handler->zi_spa)
11310594SGeorge.Wilson@Sun.COM 			continue;
11410594SGeorge.Wilson@Sun.COM 
11511422SMark.Musante@Sun.COM 		if (handler->zi_record.zi_type == type &&
11611422SMark.Musante@Sun.COM 		    strcmp(tag, handler->zi_record.zi_func) == 0)
11710594SGeorge.Wilson@Sun.COM 			panic("Panic requested in function %s\n", tag);
11810594SGeorge.Wilson@Sun.COM 	}
11910594SGeorge.Wilson@Sun.COM 
12010594SGeorge.Wilson@Sun.COM 	rw_exit(&inject_lock);
12110594SGeorge.Wilson@Sun.COM }
12210594SGeorge.Wilson@Sun.COM 
12310594SGeorge.Wilson@Sun.COM /*
1241544Seschrock  * Determine if the I/O in question should return failure.  Returns the errno
1251544Seschrock  * to be returned to the caller.
1261544Seschrock  */
1271544Seschrock int
zio_handle_fault_injection(zio_t * zio,int error)1281544Seschrock zio_handle_fault_injection(zio_t *zio, int error)
1291544Seschrock {
1301544Seschrock 	int ret = 0;
1311544Seschrock 	inject_handler_t *handler;
1321544Seschrock 
1331544Seschrock 	/*
1341544Seschrock 	 * Ignore I/O not associated with any logical data.
1351544Seschrock 	 */
1361544Seschrock 	if (zio->io_logical == NULL)
1371544Seschrock 		return (0);
1381544Seschrock 
1391544Seschrock 	/*
1401544Seschrock 	 * Currently, we only support fault injection on reads.
1411544Seschrock 	 */
1421544Seschrock 	if (zio->io_type != ZIO_TYPE_READ)
1431544Seschrock 		return (0);
1441544Seschrock 
1451544Seschrock 	rw_enter(&inject_lock, RW_READER);
1461544Seschrock 
1471544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
1481544Seschrock 	    handler = list_next(&inject_handlers, handler)) {
1491544Seschrock 
1501544Seschrock 		/* Ignore errors not destined for this pool */
1511544Seschrock 		if (zio->io_spa != handler->zi_spa)
1521544Seschrock 			continue;
1531544Seschrock 
15410594SGeorge.Wilson@Sun.COM 		/* Ignore device errors and panic injection */
15510594SGeorge.Wilson@Sun.COM 		if (handler->zi_record.zi_guid != 0 ||
15610921STim.Haley@Sun.COM 		    handler->zi_record.zi_func[0] != '\0' ||
15710921STim.Haley@Sun.COM 		    handler->zi_record.zi_duration != 0)
1581544Seschrock 			continue;
1591544Seschrock 
1601544Seschrock 		/* If this handler matches, return EIO */
1611544Seschrock 		if (zio_match_handler(&zio->io_logical->io_bookmark,
1621544Seschrock 		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
1631544Seschrock 		    &handler->zi_record, error)) {
1641544Seschrock 			ret = error;
1651544Seschrock 			break;
1661544Seschrock 		}
1671544Seschrock 	}
1681544Seschrock 
1691544Seschrock 	rw_exit(&inject_lock);
1701544Seschrock 
1711544Seschrock 	return (ret);
1721544Seschrock }
1731544Seschrock 
1746615Sgw25295 /*
1756615Sgw25295  * Determine if the zio is part of a label update and has an injection
1766615Sgw25295  * handler associated with that portion of the label. Currently, we
1776615Sgw25295  * allow error injection in either the nvlist or the uberblock region of
1786615Sgw25295  * of the vdev label.
1796615Sgw25295  */
1806615Sgw25295 int
zio_handle_label_injection(zio_t * zio,int error)1816615Sgw25295 zio_handle_label_injection(zio_t *zio, int error)
1826615Sgw25295 {
1836615Sgw25295 	inject_handler_t *handler;
1846615Sgw25295 	vdev_t *vd = zio->io_vd;
1856615Sgw25295 	uint64_t offset = zio->io_offset;
1866615Sgw25295 	int label;
1876615Sgw25295 	int ret = 0;
1886615Sgw25295 
18910685SGeorge.Wilson@Sun.COM 	if (offset >= VDEV_LABEL_START_SIZE &&
1906615Sgw25295 	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
1916615Sgw25295 		return (0);
1926615Sgw25295 
1936615Sgw25295 	rw_enter(&inject_lock, RW_READER);
1946615Sgw25295 
1956615Sgw25295 	for (handler = list_head(&inject_handlers); handler != NULL;
1966615Sgw25295 	    handler = list_next(&inject_handlers, handler)) {
1976615Sgw25295 		uint64_t start = handler->zi_record.zi_start;
1986615Sgw25295 		uint64_t end = handler->zi_record.zi_end;
1996615Sgw25295 
20010594SGeorge.Wilson@Sun.COM 		/* Ignore device only faults or panic injection */
20110594SGeorge.Wilson@Sun.COM 		if (handler->zi_record.zi_start == 0 ||
20210921STim.Haley@Sun.COM 		    handler->zi_record.zi_func[0] != '\0' ||
20310921STim.Haley@Sun.COM 		    handler->zi_record.zi_duration != 0)
2046615Sgw25295 			continue;
2056615Sgw25295 
2066615Sgw25295 		/*
2076615Sgw25295 		 * The injection region is the relative offsets within a
2086615Sgw25295 		 * vdev label. We must determine the label which is being
2096615Sgw25295 		 * updated and adjust our region accordingly.
2106615Sgw25295 		 */
2116615Sgw25295 		label = vdev_label_number(vd->vdev_psize, offset);
2126615Sgw25295 		start = vdev_label_offset(vd->vdev_psize, label, start);
2136615Sgw25295 		end = vdev_label_offset(vd->vdev_psize, label, end);
2146615Sgw25295 
2156615Sgw25295 		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
2166615Sgw25295 		    (offset >= start && offset <= end)) {
2176615Sgw25295 			ret = error;
2186615Sgw25295 			break;
2196615Sgw25295 		}
2206615Sgw25295 	}
2216615Sgw25295 	rw_exit(&inject_lock);
2226615Sgw25295 	return (ret);
2236615Sgw25295 }
2246615Sgw25295 
2256615Sgw25295 
2261544Seschrock int
zio_handle_device_injection(vdev_t * vd,zio_t * zio,int error)2279725SEric.Schrock@Sun.COM zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
2281544Seschrock {
2291544Seschrock 	inject_handler_t *handler;
2301544Seschrock 	int ret = 0;
2311544Seschrock 
23210685SGeorge.Wilson@Sun.COM 	/*
23310685SGeorge.Wilson@Sun.COM 	 * We skip over faults in the labels unless it's during
23410685SGeorge.Wilson@Sun.COM 	 * device open (i.e. zio == NULL).
23510685SGeorge.Wilson@Sun.COM 	 */
23610685SGeorge.Wilson@Sun.COM 	if (zio != NULL) {
23710685SGeorge.Wilson@Sun.COM 		uint64_t offset = zio->io_offset;
23810685SGeorge.Wilson@Sun.COM 
23910685SGeorge.Wilson@Sun.COM 		if (offset < VDEV_LABEL_START_SIZE ||
24010685SGeorge.Wilson@Sun.COM 		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
24111937SMark.Musante@Sun.COM 			return (0);
24210685SGeorge.Wilson@Sun.COM 	}
24310685SGeorge.Wilson@Sun.COM 
2441544Seschrock 	rw_enter(&inject_lock, RW_READER);
2451544Seschrock 
2461544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
2471544Seschrock 	    handler = list_next(&inject_handlers, handler)) {
2481544Seschrock 
24910921STim.Haley@Sun.COM 		/*
25010921STim.Haley@Sun.COM 		 * Ignore label specific faults, panic injection
25110921STim.Haley@Sun.COM 		 * or fake writes
25210921STim.Haley@Sun.COM 		 */
25310594SGeorge.Wilson@Sun.COM 		if (handler->zi_record.zi_start != 0 ||
25410921STim.Haley@Sun.COM 		    handler->zi_record.zi_func[0] != '\0' ||
25510921STim.Haley@Sun.COM 		    handler->zi_record.zi_duration != 0)
2566615Sgw25295 			continue;
2576615Sgw25295 
2581544Seschrock 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
2599725SEric.Schrock@Sun.COM 			if (handler->zi_record.zi_failfast &&
2609725SEric.Schrock@Sun.COM 			    (zio == NULL || (zio->io_flags &
2619725SEric.Schrock@Sun.COM 			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
2629725SEric.Schrock@Sun.COM 				continue;
2639725SEric.Schrock@Sun.COM 			}
2649725SEric.Schrock@Sun.COM 
26510685SGeorge.Wilson@Sun.COM 			/* Handle type specific I/O failures */
26610685SGeorge.Wilson@Sun.COM 			if (zio != NULL &&
26710685SGeorge.Wilson@Sun.COM 			    handler->zi_record.zi_iotype != ZIO_TYPES &&
26810685SGeorge.Wilson@Sun.COM 			    handler->zi_record.zi_iotype != zio->io_type)
26910685SGeorge.Wilson@Sun.COM 				continue;
27010685SGeorge.Wilson@Sun.COM 
2711544Seschrock 			if (handler->zi_record.zi_error == error) {
2721544Seschrock 				/*
2731544Seschrock 				 * For a failed open, pretend like the device
2741544Seschrock 				 * has gone away.
2751544Seschrock 				 */
2761544Seschrock 				if (error == ENXIO)
2771544Seschrock 					vd->vdev_stat.vs_aux =
2781544Seschrock 					    VDEV_AUX_OPEN_FAILED;
27912247SGeorge.Wilson@Sun.COM 
28012247SGeorge.Wilson@Sun.COM 				/*
28112247SGeorge.Wilson@Sun.COM 				 * Treat these errors as if they had been
28212247SGeorge.Wilson@Sun.COM 				 * retried so that all the appropriate stats
28312247SGeorge.Wilson@Sun.COM 				 * and FMA events are generated.
28412247SGeorge.Wilson@Sun.COM 				 */
28512247SGeorge.Wilson@Sun.COM 				if (!handler->zi_record.zi_failfast &&
28612247SGeorge.Wilson@Sun.COM 				    zio != NULL)
28712247SGeorge.Wilson@Sun.COM 					zio->io_flags |= ZIO_FLAG_IO_RETRY;
28812247SGeorge.Wilson@Sun.COM 
2891544Seschrock 				ret = error;
2901544Seschrock 				break;
2911544Seschrock 			}
2921544Seschrock 			if (handler->zi_record.zi_error == ENXIO) {
2931544Seschrock 				ret = EIO;
2941544Seschrock 				break;
2951544Seschrock 			}
2961544Seschrock 		}
2971544Seschrock 	}
2981544Seschrock 
2991544Seschrock 	rw_exit(&inject_lock);
3001544Seschrock 
3011544Seschrock 	return (ret);
3021544Seschrock }
3031544Seschrock 
3041544Seschrock /*
30510921STim.Haley@Sun.COM  * Simulate hardware that ignores cache flushes.  For requested number
30610921STim.Haley@Sun.COM  * of seconds nix the actual writing to disk.
30710921STim.Haley@Sun.COM  */
30810921STim.Haley@Sun.COM void
zio_handle_ignored_writes(zio_t * zio)30910921STim.Haley@Sun.COM zio_handle_ignored_writes(zio_t *zio)
31010921STim.Haley@Sun.COM {
31110921STim.Haley@Sun.COM 	inject_handler_t *handler;
31210921STim.Haley@Sun.COM 
31310921STim.Haley@Sun.COM 	rw_enter(&inject_lock, RW_READER);
31410921STim.Haley@Sun.COM 
31510921STim.Haley@Sun.COM 	for (handler = list_head(&inject_handlers); handler != NULL;
31610921STim.Haley@Sun.COM 	    handler = list_next(&inject_handlers, handler)) {
31710921STim.Haley@Sun.COM 
31810921STim.Haley@Sun.COM 		/* Ignore errors not destined for this pool */
31910921STim.Haley@Sun.COM 		if (zio->io_spa != handler->zi_spa)
32010921STim.Haley@Sun.COM 			continue;
32110921STim.Haley@Sun.COM 
32210921STim.Haley@Sun.COM 		if (handler->zi_record.zi_duration == 0)
32310921STim.Haley@Sun.COM 			continue;
32410921STim.Haley@Sun.COM 
32510921STim.Haley@Sun.COM 		/*
32610921STim.Haley@Sun.COM 		 * Positive duration implies # of seconds, negative
32710921STim.Haley@Sun.COM 		 * a number of txgs
32810921STim.Haley@Sun.COM 		 */
32910921STim.Haley@Sun.COM 		if (handler->zi_record.zi_timer == 0) {
33010921STim.Haley@Sun.COM 			if (handler->zi_record.zi_duration > 0)
33111066Srafael.vanoni@sun.com 				handler->zi_record.zi_timer = ddi_get_lbolt64();
33210921STim.Haley@Sun.COM 			else
33310921STim.Haley@Sun.COM 				handler->zi_record.zi_timer = zio->io_txg;
33410921STim.Haley@Sun.COM 		}
33511026STim.Haley@Sun.COM 
33611026STim.Haley@Sun.COM 		/* Have a "problem" writing 60% of the time */
33711026STim.Haley@Sun.COM 		if (spa_get_random(100) < 60)
33811026STim.Haley@Sun.COM 			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
33910921STim.Haley@Sun.COM 		break;
34010921STim.Haley@Sun.COM 	}
34110921STim.Haley@Sun.COM 
34210921STim.Haley@Sun.COM 	rw_exit(&inject_lock);
34310921STim.Haley@Sun.COM }
34410921STim.Haley@Sun.COM 
34510921STim.Haley@Sun.COM void
spa_handle_ignored_writes(spa_t * spa)34610921STim.Haley@Sun.COM spa_handle_ignored_writes(spa_t *spa)
34710921STim.Haley@Sun.COM {
34810921STim.Haley@Sun.COM 	inject_handler_t *handler;
34910921STim.Haley@Sun.COM 
35010921STim.Haley@Sun.COM 	if (zio_injection_enabled == 0)
35110921STim.Haley@Sun.COM 		return;
35210921STim.Haley@Sun.COM 
35310921STim.Haley@Sun.COM 	rw_enter(&inject_lock, RW_READER);
35410921STim.Haley@Sun.COM 
35510921STim.Haley@Sun.COM 	for (handler = list_head(&inject_handlers); handler != NULL;
35610921STim.Haley@Sun.COM 	    handler = list_next(&inject_handlers, handler)) {
35710921STim.Haley@Sun.COM 
35810921STim.Haley@Sun.COM 		/* Ignore errors not destined for this pool */
35910921STim.Haley@Sun.COM 		if (spa != handler->zi_spa)
36010921STim.Haley@Sun.COM 			continue;
36110921STim.Haley@Sun.COM 
36210921STim.Haley@Sun.COM 		if (handler->zi_record.zi_duration == 0)
36310921STim.Haley@Sun.COM 			continue;
36410921STim.Haley@Sun.COM 
36510921STim.Haley@Sun.COM 		if (handler->zi_record.zi_duration > 0) {
36610921STim.Haley@Sun.COM 			VERIFY(handler->zi_record.zi_timer == 0 ||
36710921STim.Haley@Sun.COM 			    handler->zi_record.zi_timer +
36811066Srafael.vanoni@sun.com 			    handler->zi_record.zi_duration * hz >
36911066Srafael.vanoni@sun.com 			    ddi_get_lbolt64());
37010921STim.Haley@Sun.COM 		} else {
37110921STim.Haley@Sun.COM 			/* duration is negative so the subtraction here adds */
37210921STim.Haley@Sun.COM 			VERIFY(handler->zi_record.zi_timer == 0 ||
37310921STim.Haley@Sun.COM 			    handler->zi_record.zi_timer -
37410921STim.Haley@Sun.COM 			    handler->zi_record.zi_duration >=
37510922SJeff.Bonwick@Sun.COM 			    spa_syncing_txg(spa));
37610921STim.Haley@Sun.COM 		}
37710921STim.Haley@Sun.COM 	}
37810921STim.Haley@Sun.COM 
37910921STim.Haley@Sun.COM 	rw_exit(&inject_lock);
38010921STim.Haley@Sun.COM }
38110921STim.Haley@Sun.COM 
38210921STim.Haley@Sun.COM /*
3831544Seschrock  * Create a new handler for the given record.  We add it to the list, adding
3841544Seschrock  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
3851544Seschrock  * which is the switch to trigger all fault injection.
3861544Seschrock  */
3871544Seschrock int
zio_inject_fault(char * name,int flags,int * id,zinject_record_t * record)3881544Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
3891544Seschrock {
3901544Seschrock 	inject_handler_t *handler;
3911544Seschrock 	int error;
3921544Seschrock 	spa_t *spa;
3931544Seschrock 
3941544Seschrock 	/*
3951544Seschrock 	 * If this is pool-wide metadata, make sure we unload the corresponding
3961544Seschrock 	 * spa_t, so that the next attempt to load it will trigger the fault.
3971544Seschrock 	 * We call spa_reset() to unload the pool appropriately.
3981544Seschrock 	 */
3991544Seschrock 	if (flags & ZINJECT_UNLOAD_SPA)
4001544Seschrock 		if ((error = spa_reset(name)) != 0)
4011544Seschrock 			return (error);
4021544Seschrock 
4031544Seschrock 	if (!(flags & ZINJECT_NULL)) {
4041544Seschrock 		/*
4051544Seschrock 		 * spa_inject_ref() will add an injection reference, which will
4061544Seschrock 		 * prevent the pool from being removed from the namespace while
4071544Seschrock 		 * still allowing it to be unloaded.
4081544Seschrock 		 */
4091544Seschrock 		if ((spa = spa_inject_addref(name)) == NULL)
4101544Seschrock 			return (ENOENT);
4111544Seschrock 
4121544Seschrock 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
4131544Seschrock 
4141544Seschrock 		rw_enter(&inject_lock, RW_WRITER);
4151544Seschrock 
4161544Seschrock 		*id = handler->zi_id = inject_next_id++;
4171544Seschrock 		handler->zi_spa = spa;
4181544Seschrock 		handler->zi_record = *record;
4191544Seschrock 		list_insert_tail(&inject_handlers, handler);
4201544Seschrock 		atomic_add_32(&zio_injection_enabled, 1);
4211544Seschrock 
4221544Seschrock 		rw_exit(&inject_lock);
4231544Seschrock 	}
4241544Seschrock 
4251544Seschrock 	/*
4261544Seschrock 	 * Flush the ARC, so that any attempts to read this data will end up
4271544Seschrock 	 * going to the ZIO layer.  Note that this is a little overkill, but
4281544Seschrock 	 * we don't have the necessary ARC interfaces to do anything else, and
4291544Seschrock 	 * fault injection isn't a performance critical path.
4301544Seschrock 	 */
4311544Seschrock 	if (flags & ZINJECT_FLUSH_ARC)
4325642Smaybee 		arc_flush(NULL);
4331544Seschrock 
4341544Seschrock 	return (0);
4351544Seschrock }
4361544Seschrock 
4371544Seschrock /*
4381544Seschrock  * Returns the next record with an ID greater than that supplied to the
4391544Seschrock  * function.  Used to iterate over all handlers in the system.
4401544Seschrock  */
4411544Seschrock int
zio_inject_list_next(int * id,char * name,size_t buflen,zinject_record_t * record)4421544Seschrock zio_inject_list_next(int *id, char *name, size_t buflen,
4431544Seschrock     zinject_record_t *record)
4441544Seschrock {
4451544Seschrock 	inject_handler_t *handler;
4461544Seschrock 	int ret;
4471544Seschrock 
4481544Seschrock 	mutex_enter(&spa_namespace_lock);
4491544Seschrock 	rw_enter(&inject_lock, RW_READER);
4501544Seschrock 
4511544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
4521544Seschrock 	    handler = list_next(&inject_handlers, handler))
4531544Seschrock 		if (handler->zi_id > *id)
4541544Seschrock 			break;
4551544Seschrock 
4561544Seschrock 	if (handler) {
4571544Seschrock 		*record = handler->zi_record;
4581544Seschrock 		*id = handler->zi_id;
4591544Seschrock 		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
4601544Seschrock 		ret = 0;
4611544Seschrock 	} else {
4621544Seschrock 		ret = ENOENT;
4631544Seschrock 	}
4641544Seschrock 
4651544Seschrock 	rw_exit(&inject_lock);
4661544Seschrock 	mutex_exit(&spa_namespace_lock);
4671544Seschrock 
4681544Seschrock 	return (ret);
4691544Seschrock }
4701544Seschrock 
4711544Seschrock /*
4721544Seschrock  * Clear the fault handler with the given identifier, or return ENOENT if none
4731544Seschrock  * exists.
4741544Seschrock  */
4751544Seschrock int
zio_clear_fault(int id)4761544Seschrock zio_clear_fault(int id)
4771544Seschrock {
4781544Seschrock 	inject_handler_t *handler;
4791544Seschrock 
4801544Seschrock 	rw_enter(&inject_lock, RW_WRITER);
4811544Seschrock 
4821544Seschrock 	for (handler = list_head(&inject_handlers); handler != NULL;
4831544Seschrock 	    handler = list_next(&inject_handlers, handler))
4841544Seschrock 		if (handler->zi_id == id)
4851544Seschrock 			break;
4861544Seschrock 
4871544Seschrock 	if (handler == NULL) {
488*12662SMark.Musante@Sun.COM 		rw_exit(&inject_lock);
489*12662SMark.Musante@Sun.COM 		return (ENOENT);
4901544Seschrock 	}
4911544Seschrock 
492*12662SMark.Musante@Sun.COM 	list_remove(&inject_handlers, handler);
4931544Seschrock 	rw_exit(&inject_lock);
4941544Seschrock 
495*12662SMark.Musante@Sun.COM 	spa_inject_delref(handler->zi_spa);
496*12662SMark.Musante@Sun.COM 	kmem_free(handler, sizeof (inject_handler_t));
497*12662SMark.Musante@Sun.COM 	atomic_add_32(&zio_injection_enabled, -1);
498*12662SMark.Musante@Sun.COM 
499*12662SMark.Musante@Sun.COM 	return (0);
5001544Seschrock }
5011544Seschrock 
5021544Seschrock void
zio_inject_init(void)5031544Seschrock zio_inject_init(void)
5041544Seschrock {
5057313SEric.Kustarz@Sun.COM 	rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
5061544Seschrock 	list_create(&inject_handlers, sizeof (inject_handler_t),
5071544Seschrock 	    offsetof(inject_handler_t, zi_link));
5081544Seschrock }
5091544Seschrock 
5101544Seschrock void
zio_inject_fini(void)5111544Seschrock zio_inject_fini(void)
5121544Seschrock {
5131544Seschrock 	list_destroy(&inject_handlers);
5147313SEric.Kustarz@Sun.COM 	rw_destroy(&inject_lock);
5151544Seschrock }
516