11544Seschrock /* 21544Seschrock * CDDL HEADER START 31544Seschrock * 41544Seschrock * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 71544Seschrock * 81544Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91544Seschrock * or http://www.opensolaris.org/os/licensing. 101544Seschrock * See the License for the specific language governing permissions 111544Seschrock * and limitations under the License. 121544Seschrock * 131544Seschrock * When distributing Covered Code, include this CDDL HEADER in each 141544Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151544Seschrock * If applicable, add the following below this CDDL HEADER, with the 161544Seschrock * fields enclosed by brackets "[]" replaced with your own identifying 171544Seschrock * information: Portions Copyright [yyyy] [name of copyright owner] 181544Seschrock * 191544Seschrock * CDDL HEADER END 201544Seschrock */ 211544Seschrock /* 229725SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 231544Seschrock * Use is subject to license terms. 241544Seschrock */ 251544Seschrock 261544Seschrock /* 271544Seschrock * ZFS fault injection 281544Seschrock * 291544Seschrock * To handle fault injection, we keep track of a series of zinject_record_t 301544Seschrock * structures which describe which logical block(s) should be injected with a 311544Seschrock * fault. These are kept in a global list. Each record corresponds to a given 321544Seschrock * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 331544Seschrock * or exported while the injection record exists. 341544Seschrock * 351544Seschrock * Device level injection is done using the 'zi_guid' field. If this is set, it 361544Seschrock * means that the error is destined for a particular device, not a piece of 371544Seschrock * data. 381544Seschrock * 391544Seschrock * This is a rather poor data structure and algorithm, but we don't expect more 401544Seschrock * than a few faults at any one time, so it should be sufficient for our needs. 411544Seschrock */ 421544Seschrock 431544Seschrock #include <sys/arc.h> 441544Seschrock #include <sys/zio_impl.h> 451544Seschrock #include <sys/zfs_ioctl.h> 461544Seschrock #include <sys/spa_impl.h> 471544Seschrock #include <sys/vdev_impl.h> 486615Sgw25295 #include <sys/fs/zfs.h> 491544Seschrock 501544Seschrock uint32_t zio_injection_enabled; 511544Seschrock 521544Seschrock typedef struct inject_handler { 531544Seschrock int zi_id; 541544Seschrock spa_t *zi_spa; 551544Seschrock zinject_record_t zi_record; 561544Seschrock list_node_t zi_link; 571544Seschrock } inject_handler_t; 581544Seschrock 591544Seschrock static list_t inject_handlers; 601544Seschrock static krwlock_t inject_lock; 611544Seschrock static int inject_next_id = 1; 621544Seschrock 631544Seschrock /* 641544Seschrock * Returns true if the given record matches the I/O in progress. 651544Seschrock */ 661544Seschrock static boolean_t 671544Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type, 681544Seschrock zinject_record_t *record, int error) 691544Seschrock { 701544Seschrock /* 711544Seschrock * Check for a match against the MOS, which is based on type 721544Seschrock */ 731544Seschrock if (zb->zb_objset == 0 && record->zi_objset == 0 && 741544Seschrock record->zi_object == 0) { 751544Seschrock if (record->zi_type == DMU_OT_NONE || 761544Seschrock type == record->zi_type) 771544Seschrock return (record->zi_freq == 0 || 781544Seschrock spa_get_random(100) < record->zi_freq); 791544Seschrock else 801544Seschrock return (B_FALSE); 811544Seschrock } 821544Seschrock 831544Seschrock /* 841544Seschrock * Check for an exact match. 851544Seschrock */ 861544Seschrock if (zb->zb_objset == record->zi_objset && 871544Seschrock zb->zb_object == record->zi_object && 881544Seschrock zb->zb_level == record->zi_level && 891544Seschrock zb->zb_blkid >= record->zi_start && 901544Seschrock zb->zb_blkid <= record->zi_end && 911544Seschrock error == record->zi_error) 921544Seschrock return (record->zi_freq == 0 || 931544Seschrock spa_get_random(100) < record->zi_freq); 941544Seschrock 951544Seschrock return (B_FALSE); 961544Seschrock } 971544Seschrock 981544Seschrock /* 99*10594SGeorge.Wilson@Sun.COM * Panic the system when a config change happens in the function 100*10594SGeorge.Wilson@Sun.COM * specified by tag. 101*10594SGeorge.Wilson@Sun.COM */ 102*10594SGeorge.Wilson@Sun.COM void 103*10594SGeorge.Wilson@Sun.COM zio_handle_panic_injection(spa_t *spa, char *tag) 104*10594SGeorge.Wilson@Sun.COM { 105*10594SGeorge.Wilson@Sun.COM inject_handler_t *handler; 106*10594SGeorge.Wilson@Sun.COM 107*10594SGeorge.Wilson@Sun.COM rw_enter(&inject_lock, RW_READER); 108*10594SGeorge.Wilson@Sun.COM 109*10594SGeorge.Wilson@Sun.COM for (handler = list_head(&inject_handlers); handler != NULL; 110*10594SGeorge.Wilson@Sun.COM handler = list_next(&inject_handlers, handler)) { 111*10594SGeorge.Wilson@Sun.COM 112*10594SGeorge.Wilson@Sun.COM if (spa != handler->zi_spa) 113*10594SGeorge.Wilson@Sun.COM continue; 114*10594SGeorge.Wilson@Sun.COM 115*10594SGeorge.Wilson@Sun.COM if (strcmp(tag, handler->zi_record.zi_func) == 0) 116*10594SGeorge.Wilson@Sun.COM panic("Panic requested in function %s\n", tag); 117*10594SGeorge.Wilson@Sun.COM } 118*10594SGeorge.Wilson@Sun.COM 119*10594SGeorge.Wilson@Sun.COM rw_exit(&inject_lock); 120*10594SGeorge.Wilson@Sun.COM } 121*10594SGeorge.Wilson@Sun.COM 122*10594SGeorge.Wilson@Sun.COM /* 1231544Seschrock * Determine if the I/O in question should return failure. Returns the errno 1241544Seschrock * to be returned to the caller. 1251544Seschrock */ 1261544Seschrock int 1271544Seschrock zio_handle_fault_injection(zio_t *zio, int error) 1281544Seschrock { 1291544Seschrock int ret = 0; 1301544Seschrock inject_handler_t *handler; 1311544Seschrock 1321544Seschrock /* 1331544Seschrock * Ignore I/O not associated with any logical data. 1341544Seschrock */ 1351544Seschrock if (zio->io_logical == NULL) 1361544Seschrock return (0); 1371544Seschrock 1381544Seschrock /* 1391544Seschrock * Currently, we only support fault injection on reads. 1401544Seschrock */ 1411544Seschrock if (zio->io_type != ZIO_TYPE_READ) 1421544Seschrock return (0); 1431544Seschrock 1441544Seschrock rw_enter(&inject_lock, RW_READER); 1451544Seschrock 1461544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 1471544Seschrock handler = list_next(&inject_handlers, handler)) { 1481544Seschrock 1491544Seschrock /* Ignore errors not destined for this pool */ 1501544Seschrock if (zio->io_spa != handler->zi_spa) 1511544Seschrock continue; 1521544Seschrock 153*10594SGeorge.Wilson@Sun.COM /* Ignore device errors and panic injection */ 154*10594SGeorge.Wilson@Sun.COM if (handler->zi_record.zi_guid != 0 || 155*10594SGeorge.Wilson@Sun.COM handler->zi_record.zi_func[0] != '\0') 1561544Seschrock continue; 1571544Seschrock 1581544Seschrock /* If this handler matches, return EIO */ 1591544Seschrock if (zio_match_handler(&zio->io_logical->io_bookmark, 1601544Seschrock zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 1611544Seschrock &handler->zi_record, error)) { 1621544Seschrock ret = error; 1631544Seschrock break; 1641544Seschrock } 1651544Seschrock } 1661544Seschrock 1671544Seschrock rw_exit(&inject_lock); 1681544Seschrock 1691544Seschrock return (ret); 1701544Seschrock } 1711544Seschrock 1726615Sgw25295 /* 1736615Sgw25295 * Determine if the zio is part of a label update and has an injection 1746615Sgw25295 * handler associated with that portion of the label. Currently, we 1756615Sgw25295 * allow error injection in either the nvlist or the uberblock region of 1766615Sgw25295 * of the vdev label. 1776615Sgw25295 */ 1786615Sgw25295 int 1796615Sgw25295 zio_handle_label_injection(zio_t *zio, int error) 1806615Sgw25295 { 1816615Sgw25295 inject_handler_t *handler; 1826615Sgw25295 vdev_t *vd = zio->io_vd; 1836615Sgw25295 uint64_t offset = zio->io_offset; 1846615Sgw25295 int label; 1856615Sgw25295 int ret = 0; 1866615Sgw25295 1876615Sgw25295 if (offset + zio->io_size > VDEV_LABEL_START_SIZE && 1886615Sgw25295 offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) 1896615Sgw25295 return (0); 1906615Sgw25295 1916615Sgw25295 rw_enter(&inject_lock, RW_READER); 1926615Sgw25295 1936615Sgw25295 for (handler = list_head(&inject_handlers); handler != NULL; 1946615Sgw25295 handler = list_next(&inject_handlers, handler)) { 1956615Sgw25295 uint64_t start = handler->zi_record.zi_start; 1966615Sgw25295 uint64_t end = handler->zi_record.zi_end; 1976615Sgw25295 198*10594SGeorge.Wilson@Sun.COM /* Ignore device only faults or panic injection */ 199*10594SGeorge.Wilson@Sun.COM if (handler->zi_record.zi_start == 0 || 200*10594SGeorge.Wilson@Sun.COM handler->zi_record.zi_func[0] != '\0') 2016615Sgw25295 continue; 2026615Sgw25295 2036615Sgw25295 /* 2046615Sgw25295 * The injection region is the relative offsets within a 2056615Sgw25295 * vdev label. We must determine the label which is being 2066615Sgw25295 * updated and adjust our region accordingly. 2076615Sgw25295 */ 2086615Sgw25295 label = vdev_label_number(vd->vdev_psize, offset); 2096615Sgw25295 start = vdev_label_offset(vd->vdev_psize, label, start); 2106615Sgw25295 end = vdev_label_offset(vd->vdev_psize, label, end); 2116615Sgw25295 2126615Sgw25295 if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && 2136615Sgw25295 (offset >= start && offset <= end)) { 2146615Sgw25295 ret = error; 2156615Sgw25295 break; 2166615Sgw25295 } 2176615Sgw25295 } 2186615Sgw25295 rw_exit(&inject_lock); 2196615Sgw25295 return (ret); 2206615Sgw25295 } 2216615Sgw25295 2226615Sgw25295 2231544Seschrock int 2249725SEric.Schrock@Sun.COM zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) 2251544Seschrock { 2261544Seschrock inject_handler_t *handler; 2271544Seschrock int ret = 0; 2281544Seschrock 2291544Seschrock rw_enter(&inject_lock, RW_READER); 2301544Seschrock 2311544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 2321544Seschrock handler = list_next(&inject_handlers, handler)) { 2331544Seschrock 234*10594SGeorge.Wilson@Sun.COM /* Ignore label specific faults or panic injection */ 235*10594SGeorge.Wilson@Sun.COM if (handler->zi_record.zi_start != 0 || 236*10594SGeorge.Wilson@Sun.COM handler->zi_record.zi_func[0] != '\0') 2376615Sgw25295 continue; 2386615Sgw25295 2391544Seschrock if (vd->vdev_guid == handler->zi_record.zi_guid) { 2409725SEric.Schrock@Sun.COM if (handler->zi_record.zi_failfast && 2419725SEric.Schrock@Sun.COM (zio == NULL || (zio->io_flags & 2429725SEric.Schrock@Sun.COM (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { 2439725SEric.Schrock@Sun.COM continue; 2449725SEric.Schrock@Sun.COM } 2459725SEric.Schrock@Sun.COM 2461544Seschrock if (handler->zi_record.zi_error == error) { 2471544Seschrock /* 2481544Seschrock * For a failed open, pretend like the device 2491544Seschrock * has gone away. 2501544Seschrock */ 2511544Seschrock if (error == ENXIO) 2521544Seschrock vd->vdev_stat.vs_aux = 2531544Seschrock VDEV_AUX_OPEN_FAILED; 2541544Seschrock ret = error; 2551544Seschrock break; 2561544Seschrock } 2571544Seschrock if (handler->zi_record.zi_error == ENXIO) { 2581544Seschrock ret = EIO; 2591544Seschrock break; 2601544Seschrock } 2611544Seschrock } 2621544Seschrock } 2631544Seschrock 2641544Seschrock rw_exit(&inject_lock); 2651544Seschrock 2661544Seschrock return (ret); 2671544Seschrock } 2681544Seschrock 2691544Seschrock /* 2701544Seschrock * Create a new handler for the given record. We add it to the list, adding 2711544Seschrock * a reference to the spa_t in the process. We increment zio_injection_enabled, 2721544Seschrock * which is the switch to trigger all fault injection. 2731544Seschrock */ 2741544Seschrock int 2751544Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 2761544Seschrock { 2771544Seschrock inject_handler_t *handler; 2781544Seschrock int error; 2791544Seschrock spa_t *spa; 2801544Seschrock 2811544Seschrock /* 2821544Seschrock * If this is pool-wide metadata, make sure we unload the corresponding 2831544Seschrock * spa_t, so that the next attempt to load it will trigger the fault. 2841544Seschrock * We call spa_reset() to unload the pool appropriately. 2851544Seschrock */ 2861544Seschrock if (flags & ZINJECT_UNLOAD_SPA) 2871544Seschrock if ((error = spa_reset(name)) != 0) 2881544Seschrock return (error); 2891544Seschrock 2901544Seschrock if (!(flags & ZINJECT_NULL)) { 2911544Seschrock /* 2921544Seschrock * spa_inject_ref() will add an injection reference, which will 2931544Seschrock * prevent the pool from being removed from the namespace while 2941544Seschrock * still allowing it to be unloaded. 2951544Seschrock */ 2961544Seschrock if ((spa = spa_inject_addref(name)) == NULL) 2971544Seschrock return (ENOENT); 2981544Seschrock 2991544Seschrock handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 3001544Seschrock 3011544Seschrock rw_enter(&inject_lock, RW_WRITER); 3021544Seschrock 3031544Seschrock *id = handler->zi_id = inject_next_id++; 3041544Seschrock handler->zi_spa = spa; 3051544Seschrock handler->zi_record = *record; 3061544Seschrock list_insert_tail(&inject_handlers, handler); 3071544Seschrock atomic_add_32(&zio_injection_enabled, 1); 3081544Seschrock 3091544Seschrock rw_exit(&inject_lock); 3101544Seschrock } 3111544Seschrock 3121544Seschrock /* 3131544Seschrock * Flush the ARC, so that any attempts to read this data will end up 3141544Seschrock * going to the ZIO layer. Note that this is a little overkill, but 3151544Seschrock * we don't have the necessary ARC interfaces to do anything else, and 3161544Seschrock * fault injection isn't a performance critical path. 3171544Seschrock */ 3181544Seschrock if (flags & ZINJECT_FLUSH_ARC) 3195642Smaybee arc_flush(NULL); 3201544Seschrock 3211544Seschrock return (0); 3221544Seschrock } 3231544Seschrock 3241544Seschrock /* 3251544Seschrock * Returns the next record with an ID greater than that supplied to the 3261544Seschrock * function. Used to iterate over all handlers in the system. 3271544Seschrock */ 3281544Seschrock int 3291544Seschrock zio_inject_list_next(int *id, char *name, size_t buflen, 3301544Seschrock zinject_record_t *record) 3311544Seschrock { 3321544Seschrock inject_handler_t *handler; 3331544Seschrock int ret; 3341544Seschrock 3351544Seschrock mutex_enter(&spa_namespace_lock); 3361544Seschrock rw_enter(&inject_lock, RW_READER); 3371544Seschrock 3381544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3391544Seschrock handler = list_next(&inject_handlers, handler)) 3401544Seschrock if (handler->zi_id > *id) 3411544Seschrock break; 3421544Seschrock 3431544Seschrock if (handler) { 3441544Seschrock *record = handler->zi_record; 3451544Seschrock *id = handler->zi_id; 3461544Seschrock (void) strncpy(name, spa_name(handler->zi_spa), buflen); 3471544Seschrock ret = 0; 3481544Seschrock } else { 3491544Seschrock ret = ENOENT; 3501544Seschrock } 3511544Seschrock 3521544Seschrock rw_exit(&inject_lock); 3531544Seschrock mutex_exit(&spa_namespace_lock); 3541544Seschrock 3551544Seschrock return (ret); 3561544Seschrock } 3571544Seschrock 3581544Seschrock /* 3591544Seschrock * Clear the fault handler with the given identifier, or return ENOENT if none 3601544Seschrock * exists. 3611544Seschrock */ 3621544Seschrock int 3631544Seschrock zio_clear_fault(int id) 3641544Seschrock { 3651544Seschrock inject_handler_t *handler; 3661544Seschrock int ret; 3671544Seschrock 3681544Seschrock rw_enter(&inject_lock, RW_WRITER); 3691544Seschrock 3701544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3711544Seschrock handler = list_next(&inject_handlers, handler)) 3721544Seschrock if (handler->zi_id == id) 3731544Seschrock break; 3741544Seschrock 3751544Seschrock if (handler == NULL) { 3761544Seschrock ret = ENOENT; 3771544Seschrock } else { 3781544Seschrock list_remove(&inject_handlers, handler); 3791544Seschrock spa_inject_delref(handler->zi_spa); 3801544Seschrock kmem_free(handler, sizeof (inject_handler_t)); 3811544Seschrock atomic_add_32(&zio_injection_enabled, -1); 3821544Seschrock ret = 0; 3831544Seschrock } 3841544Seschrock 3851544Seschrock rw_exit(&inject_lock); 3861544Seschrock 3871544Seschrock return (ret); 3881544Seschrock } 3891544Seschrock 3901544Seschrock void 3911544Seschrock zio_inject_init(void) 3921544Seschrock { 3937313SEric.Kustarz@Sun.COM rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); 3941544Seschrock list_create(&inject_handlers, sizeof (inject_handler_t), 3951544Seschrock offsetof(inject_handler_t, zi_link)); 3961544Seschrock } 3971544Seschrock 3981544Seschrock void 3991544Seschrock zio_inject_fini(void) 4001544Seschrock { 4011544Seschrock list_destroy(&inject_handlers); 4027313SEric.Kustarz@Sun.COM rw_destroy(&inject_lock); 4031544Seschrock } 404