11544Seschrock /* 21544Seschrock * CDDL HEADER START 31544Seschrock * 41544Seschrock * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 71544Seschrock * 81544Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91544Seschrock * or http://www.opensolaris.org/os/licensing. 101544Seschrock * See the License for the specific language governing permissions 111544Seschrock * and limitations under the License. 121544Seschrock * 131544Seschrock * When distributing Covered Code, include this CDDL HEADER in each 141544Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151544Seschrock * If applicable, add the following below this CDDL HEADER, with the 161544Seschrock * fields enclosed by brackets "[]" replaced with your own identifying 171544Seschrock * information: Portions Copyright [yyyy] [name of copyright owner] 181544Seschrock * 191544Seschrock * CDDL HEADER END 201544Seschrock */ 211544Seschrock /* 229725SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 231544Seschrock * Use is subject to license terms. 241544Seschrock */ 251544Seschrock 261544Seschrock /* 271544Seschrock * ZFS fault injection 281544Seschrock * 291544Seschrock * To handle fault injection, we keep track of a series of zinject_record_t 301544Seschrock * structures which describe which logical block(s) should be injected with a 311544Seschrock * fault. These are kept in a global list. Each record corresponds to a given 321544Seschrock * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 331544Seschrock * or exported while the injection record exists. 341544Seschrock * 351544Seschrock * Device level injection is done using the 'zi_guid' field. If this is set, it 361544Seschrock * means that the error is destined for a particular device, not a piece of 371544Seschrock * data. 381544Seschrock * 391544Seschrock * This is a rather poor data structure and algorithm, but we don't expect more 401544Seschrock * than a few faults at any one time, so it should be sufficient for our needs. 411544Seschrock */ 421544Seschrock 431544Seschrock #include <sys/arc.h> 441544Seschrock #include <sys/zio_impl.h> 451544Seschrock #include <sys/zfs_ioctl.h> 461544Seschrock #include <sys/spa_impl.h> 471544Seschrock #include <sys/vdev_impl.h> 486615Sgw25295 #include <sys/fs/zfs.h> 491544Seschrock 501544Seschrock uint32_t zio_injection_enabled; 511544Seschrock 521544Seschrock typedef struct inject_handler { 531544Seschrock int zi_id; 541544Seschrock spa_t *zi_spa; 551544Seschrock zinject_record_t zi_record; 561544Seschrock list_node_t zi_link; 571544Seschrock } inject_handler_t; 581544Seschrock 591544Seschrock static list_t inject_handlers; 601544Seschrock static krwlock_t inject_lock; 611544Seschrock static int inject_next_id = 1; 621544Seschrock 631544Seschrock /* 641544Seschrock * Returns true if the given record matches the I/O in progress. 651544Seschrock */ 661544Seschrock static boolean_t 671544Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type, 681544Seschrock zinject_record_t *record, int error) 691544Seschrock { 701544Seschrock /* 711544Seschrock * Check for a match against the MOS, which is based on type 721544Seschrock */ 731544Seschrock if (zb->zb_objset == 0 && record->zi_objset == 0 && 741544Seschrock record->zi_object == 0) { 751544Seschrock if (record->zi_type == DMU_OT_NONE || 761544Seschrock type == record->zi_type) 771544Seschrock return (record->zi_freq == 0 || 781544Seschrock spa_get_random(100) < record->zi_freq); 791544Seschrock else 801544Seschrock return (B_FALSE); 811544Seschrock } 821544Seschrock 831544Seschrock /* 841544Seschrock * Check for an exact match. 851544Seschrock */ 861544Seschrock if (zb->zb_objset == record->zi_objset && 871544Seschrock zb->zb_object == record->zi_object && 881544Seschrock zb->zb_level == record->zi_level && 891544Seschrock zb->zb_blkid >= record->zi_start && 901544Seschrock zb->zb_blkid <= record->zi_end && 911544Seschrock error == record->zi_error) 921544Seschrock return (record->zi_freq == 0 || 931544Seschrock spa_get_random(100) < record->zi_freq); 941544Seschrock 951544Seschrock return (B_FALSE); 961544Seschrock } 971544Seschrock 981544Seschrock /* 9910594SGeorge.Wilson@Sun.COM * Panic the system when a config change happens in the function 10010594SGeorge.Wilson@Sun.COM * specified by tag. 10110594SGeorge.Wilson@Sun.COM */ 10210594SGeorge.Wilson@Sun.COM void 10310594SGeorge.Wilson@Sun.COM zio_handle_panic_injection(spa_t *spa, char *tag) 10410594SGeorge.Wilson@Sun.COM { 10510594SGeorge.Wilson@Sun.COM inject_handler_t *handler; 10610594SGeorge.Wilson@Sun.COM 10710594SGeorge.Wilson@Sun.COM rw_enter(&inject_lock, RW_READER); 10810594SGeorge.Wilson@Sun.COM 10910594SGeorge.Wilson@Sun.COM for (handler = list_head(&inject_handlers); handler != NULL; 11010594SGeorge.Wilson@Sun.COM handler = list_next(&inject_handlers, handler)) { 11110594SGeorge.Wilson@Sun.COM 11210594SGeorge.Wilson@Sun.COM if (spa != handler->zi_spa) 11310594SGeorge.Wilson@Sun.COM continue; 11410594SGeorge.Wilson@Sun.COM 11510594SGeorge.Wilson@Sun.COM if (strcmp(tag, handler->zi_record.zi_func) == 0) 11610594SGeorge.Wilson@Sun.COM panic("Panic requested in function %s\n", tag); 11710594SGeorge.Wilson@Sun.COM } 11810594SGeorge.Wilson@Sun.COM 11910594SGeorge.Wilson@Sun.COM rw_exit(&inject_lock); 12010594SGeorge.Wilson@Sun.COM } 12110594SGeorge.Wilson@Sun.COM 12210594SGeorge.Wilson@Sun.COM /* 1231544Seschrock * Determine if the I/O in question should return failure. Returns the errno 1241544Seschrock * to be returned to the caller. 1251544Seschrock */ 1261544Seschrock int 1271544Seschrock zio_handle_fault_injection(zio_t *zio, int error) 1281544Seschrock { 1291544Seschrock int ret = 0; 1301544Seschrock inject_handler_t *handler; 1311544Seschrock 1321544Seschrock /* 1331544Seschrock * Ignore I/O not associated with any logical data. 1341544Seschrock */ 1351544Seschrock if (zio->io_logical == NULL) 1361544Seschrock return (0); 1371544Seschrock 1381544Seschrock /* 1391544Seschrock * Currently, we only support fault injection on reads. 1401544Seschrock */ 1411544Seschrock if (zio->io_type != ZIO_TYPE_READ) 1421544Seschrock return (0); 1431544Seschrock 1441544Seschrock rw_enter(&inject_lock, RW_READER); 1451544Seschrock 1461544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 1471544Seschrock handler = list_next(&inject_handlers, handler)) { 1481544Seschrock 1491544Seschrock /* Ignore errors not destined for this pool */ 1501544Seschrock if (zio->io_spa != handler->zi_spa) 1511544Seschrock continue; 1521544Seschrock 15310594SGeorge.Wilson@Sun.COM /* Ignore device errors and panic injection */ 15410594SGeorge.Wilson@Sun.COM if (handler->zi_record.zi_guid != 0 || 15510594SGeorge.Wilson@Sun.COM handler->zi_record.zi_func[0] != '\0') 1561544Seschrock continue; 1571544Seschrock 1581544Seschrock /* If this handler matches, return EIO */ 1591544Seschrock if (zio_match_handler(&zio->io_logical->io_bookmark, 1601544Seschrock zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 1611544Seschrock &handler->zi_record, error)) { 1621544Seschrock ret = error; 1631544Seschrock break; 1641544Seschrock } 1651544Seschrock } 1661544Seschrock 1671544Seschrock rw_exit(&inject_lock); 1681544Seschrock 1691544Seschrock return (ret); 1701544Seschrock } 1711544Seschrock 1726615Sgw25295 /* 1736615Sgw25295 * Determine if the zio is part of a label update and has an injection 1746615Sgw25295 * handler associated with that portion of the label. Currently, we 1756615Sgw25295 * allow error injection in either the nvlist or the uberblock region of 1766615Sgw25295 * of the vdev label. 1776615Sgw25295 */ 1786615Sgw25295 int 1796615Sgw25295 zio_handle_label_injection(zio_t *zio, int error) 1806615Sgw25295 { 1816615Sgw25295 inject_handler_t *handler; 1826615Sgw25295 vdev_t *vd = zio->io_vd; 1836615Sgw25295 uint64_t offset = zio->io_offset; 1846615Sgw25295 int label; 1856615Sgw25295 int ret = 0; 1866615Sgw25295 187*10685SGeorge.Wilson@Sun.COM if (offset >= VDEV_LABEL_START_SIZE && 1886615Sgw25295 offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) 1896615Sgw25295 return (0); 1906615Sgw25295 1916615Sgw25295 rw_enter(&inject_lock, RW_READER); 1926615Sgw25295 1936615Sgw25295 for (handler = list_head(&inject_handlers); handler != NULL; 1946615Sgw25295 handler = list_next(&inject_handlers, handler)) { 1956615Sgw25295 uint64_t start = handler->zi_record.zi_start; 1966615Sgw25295 uint64_t end = handler->zi_record.zi_end; 1976615Sgw25295 19810594SGeorge.Wilson@Sun.COM /* Ignore device only faults or panic injection */ 19910594SGeorge.Wilson@Sun.COM if (handler->zi_record.zi_start == 0 || 20010594SGeorge.Wilson@Sun.COM handler->zi_record.zi_func[0] != '\0') 2016615Sgw25295 continue; 2026615Sgw25295 2036615Sgw25295 /* 2046615Sgw25295 * The injection region is the relative offsets within a 2056615Sgw25295 * vdev label. We must determine the label which is being 2066615Sgw25295 * updated and adjust our region accordingly. 2076615Sgw25295 */ 2086615Sgw25295 label = vdev_label_number(vd->vdev_psize, offset); 2096615Sgw25295 start = vdev_label_offset(vd->vdev_psize, label, start); 2106615Sgw25295 end = vdev_label_offset(vd->vdev_psize, label, end); 2116615Sgw25295 2126615Sgw25295 if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && 2136615Sgw25295 (offset >= start && offset <= end)) { 2146615Sgw25295 ret = error; 2156615Sgw25295 break; 2166615Sgw25295 } 2176615Sgw25295 } 2186615Sgw25295 rw_exit(&inject_lock); 2196615Sgw25295 return (ret); 2206615Sgw25295 } 2216615Sgw25295 2226615Sgw25295 2231544Seschrock int 2249725SEric.Schrock@Sun.COM zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) 2251544Seschrock { 2261544Seschrock inject_handler_t *handler; 2271544Seschrock int ret = 0; 2281544Seschrock 229*10685SGeorge.Wilson@Sun.COM /* 230*10685SGeorge.Wilson@Sun.COM * We skip over faults in the labels unless it's during 231*10685SGeorge.Wilson@Sun.COM * device open (i.e. zio == NULL). 232*10685SGeorge.Wilson@Sun.COM */ 233*10685SGeorge.Wilson@Sun.COM if (zio != NULL) { 234*10685SGeorge.Wilson@Sun.COM uint64_t offset = zio->io_offset; 235*10685SGeorge.Wilson@Sun.COM 236*10685SGeorge.Wilson@Sun.COM if (offset < VDEV_LABEL_START_SIZE || 237*10685SGeorge.Wilson@Sun.COM offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE) 238*10685SGeorge.Wilson@Sun.COM return (0); 239*10685SGeorge.Wilson@Sun.COM } 240*10685SGeorge.Wilson@Sun.COM 2411544Seschrock rw_enter(&inject_lock, RW_READER); 2421544Seschrock 2431544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 2441544Seschrock handler = list_next(&inject_handlers, handler)) { 2451544Seschrock 24610594SGeorge.Wilson@Sun.COM /* Ignore label specific faults or panic injection */ 24710594SGeorge.Wilson@Sun.COM if (handler->zi_record.zi_start != 0 || 24810594SGeorge.Wilson@Sun.COM handler->zi_record.zi_func[0] != '\0') 2496615Sgw25295 continue; 2506615Sgw25295 2511544Seschrock if (vd->vdev_guid == handler->zi_record.zi_guid) { 2529725SEric.Schrock@Sun.COM if (handler->zi_record.zi_failfast && 2539725SEric.Schrock@Sun.COM (zio == NULL || (zio->io_flags & 2549725SEric.Schrock@Sun.COM (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { 2559725SEric.Schrock@Sun.COM continue; 2569725SEric.Schrock@Sun.COM } 2579725SEric.Schrock@Sun.COM 258*10685SGeorge.Wilson@Sun.COM /* Handle type specific I/O failures */ 259*10685SGeorge.Wilson@Sun.COM if (zio != NULL && 260*10685SGeorge.Wilson@Sun.COM handler->zi_record.zi_iotype != ZIO_TYPES && 261*10685SGeorge.Wilson@Sun.COM handler->zi_record.zi_iotype != zio->io_type) 262*10685SGeorge.Wilson@Sun.COM continue; 263*10685SGeorge.Wilson@Sun.COM 2641544Seschrock if (handler->zi_record.zi_error == error) { 2651544Seschrock /* 2661544Seschrock * For a failed open, pretend like the device 2671544Seschrock * has gone away. 2681544Seschrock */ 2691544Seschrock if (error == ENXIO) 2701544Seschrock vd->vdev_stat.vs_aux = 2711544Seschrock VDEV_AUX_OPEN_FAILED; 2721544Seschrock ret = error; 2731544Seschrock break; 2741544Seschrock } 2751544Seschrock if (handler->zi_record.zi_error == ENXIO) { 2761544Seschrock ret = EIO; 2771544Seschrock break; 2781544Seschrock } 2791544Seschrock } 2801544Seschrock } 2811544Seschrock 2821544Seschrock rw_exit(&inject_lock); 2831544Seschrock 2841544Seschrock return (ret); 2851544Seschrock } 2861544Seschrock 2871544Seschrock /* 2881544Seschrock * Create a new handler for the given record. We add it to the list, adding 2891544Seschrock * a reference to the spa_t in the process. We increment zio_injection_enabled, 2901544Seschrock * which is the switch to trigger all fault injection. 2911544Seschrock */ 2921544Seschrock int 2931544Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 2941544Seschrock { 2951544Seschrock inject_handler_t *handler; 2961544Seschrock int error; 2971544Seschrock spa_t *spa; 2981544Seschrock 2991544Seschrock /* 3001544Seschrock * If this is pool-wide metadata, make sure we unload the corresponding 3011544Seschrock * spa_t, so that the next attempt to load it will trigger the fault. 3021544Seschrock * We call spa_reset() to unload the pool appropriately. 3031544Seschrock */ 3041544Seschrock if (flags & ZINJECT_UNLOAD_SPA) 3051544Seschrock if ((error = spa_reset(name)) != 0) 3061544Seschrock return (error); 3071544Seschrock 3081544Seschrock if (!(flags & ZINJECT_NULL)) { 3091544Seschrock /* 3101544Seschrock * spa_inject_ref() will add an injection reference, which will 3111544Seschrock * prevent the pool from being removed from the namespace while 3121544Seschrock * still allowing it to be unloaded. 3131544Seschrock */ 3141544Seschrock if ((spa = spa_inject_addref(name)) == NULL) 3151544Seschrock return (ENOENT); 3161544Seschrock 3171544Seschrock handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 3181544Seschrock 3191544Seschrock rw_enter(&inject_lock, RW_WRITER); 3201544Seschrock 3211544Seschrock *id = handler->zi_id = inject_next_id++; 3221544Seschrock handler->zi_spa = spa; 3231544Seschrock handler->zi_record = *record; 3241544Seschrock list_insert_tail(&inject_handlers, handler); 3251544Seschrock atomic_add_32(&zio_injection_enabled, 1); 3261544Seschrock 3271544Seschrock rw_exit(&inject_lock); 3281544Seschrock } 3291544Seschrock 3301544Seschrock /* 3311544Seschrock * Flush the ARC, so that any attempts to read this data will end up 3321544Seschrock * going to the ZIO layer. Note that this is a little overkill, but 3331544Seschrock * we don't have the necessary ARC interfaces to do anything else, and 3341544Seschrock * fault injection isn't a performance critical path. 3351544Seschrock */ 3361544Seschrock if (flags & ZINJECT_FLUSH_ARC) 3375642Smaybee arc_flush(NULL); 3381544Seschrock 3391544Seschrock return (0); 3401544Seschrock } 3411544Seschrock 3421544Seschrock /* 3431544Seschrock * Returns the next record with an ID greater than that supplied to the 3441544Seschrock * function. Used to iterate over all handlers in the system. 3451544Seschrock */ 3461544Seschrock int 3471544Seschrock zio_inject_list_next(int *id, char *name, size_t buflen, 3481544Seschrock zinject_record_t *record) 3491544Seschrock { 3501544Seschrock inject_handler_t *handler; 3511544Seschrock int ret; 3521544Seschrock 3531544Seschrock mutex_enter(&spa_namespace_lock); 3541544Seschrock rw_enter(&inject_lock, RW_READER); 3551544Seschrock 3561544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3571544Seschrock handler = list_next(&inject_handlers, handler)) 3581544Seschrock if (handler->zi_id > *id) 3591544Seschrock break; 3601544Seschrock 3611544Seschrock if (handler) { 3621544Seschrock *record = handler->zi_record; 3631544Seschrock *id = handler->zi_id; 3641544Seschrock (void) strncpy(name, spa_name(handler->zi_spa), buflen); 3651544Seschrock ret = 0; 3661544Seschrock } else { 3671544Seschrock ret = ENOENT; 3681544Seschrock } 3691544Seschrock 3701544Seschrock rw_exit(&inject_lock); 3711544Seschrock mutex_exit(&spa_namespace_lock); 3721544Seschrock 3731544Seschrock return (ret); 3741544Seschrock } 3751544Seschrock 3761544Seschrock /* 3771544Seschrock * Clear the fault handler with the given identifier, or return ENOENT if none 3781544Seschrock * exists. 3791544Seschrock */ 3801544Seschrock int 3811544Seschrock zio_clear_fault(int id) 3821544Seschrock { 3831544Seschrock inject_handler_t *handler; 3841544Seschrock int ret; 3851544Seschrock 3861544Seschrock rw_enter(&inject_lock, RW_WRITER); 3871544Seschrock 3881544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3891544Seschrock handler = list_next(&inject_handlers, handler)) 3901544Seschrock if (handler->zi_id == id) 3911544Seschrock break; 3921544Seschrock 3931544Seschrock if (handler == NULL) { 3941544Seschrock ret = ENOENT; 3951544Seschrock } else { 3961544Seschrock list_remove(&inject_handlers, handler); 3971544Seschrock spa_inject_delref(handler->zi_spa); 3981544Seschrock kmem_free(handler, sizeof (inject_handler_t)); 3991544Seschrock atomic_add_32(&zio_injection_enabled, -1); 4001544Seschrock ret = 0; 4011544Seschrock } 4021544Seschrock 4031544Seschrock rw_exit(&inject_lock); 4041544Seschrock 4051544Seschrock return (ret); 4061544Seschrock } 4071544Seschrock 4081544Seschrock void 4091544Seschrock zio_inject_init(void) 4101544Seschrock { 4117313SEric.Kustarz@Sun.COM rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); 4121544Seschrock list_create(&inject_handlers, sizeof (inject_handler_t), 4131544Seschrock offsetof(inject_handler_t, zi_link)); 4141544Seschrock } 4151544Seschrock 4161544Seschrock void 4171544Seschrock zio_inject_fini(void) 4181544Seschrock { 4191544Seschrock list_destroy(&inject_handlers); 4207313SEric.Kustarz@Sun.COM rw_destroy(&inject_lock); 4211544Seschrock } 422