11544Seschrock /* 21544Seschrock * CDDL HEADER START 31544Seschrock * 41544Seschrock * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 71544Seschrock * 81544Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91544Seschrock * or http://www.opensolaris.org/os/licensing. 101544Seschrock * See the License for the specific language governing permissions 111544Seschrock * and limitations under the License. 121544Seschrock * 131544Seschrock * When distributing Covered Code, include this CDDL HEADER in each 141544Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151544Seschrock * If applicable, add the following below this CDDL HEADER, with the 161544Seschrock * fields enclosed by brackets "[]" replaced with your own identifying 171544Seschrock * information: Portions Copyright [yyyy] [name of copyright owner] 181544Seschrock * 191544Seschrock * CDDL HEADER END 201544Seschrock */ 211544Seschrock /* 22*9725SEric.Schrock@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 231544Seschrock * Use is subject to license terms. 241544Seschrock */ 251544Seschrock 261544Seschrock /* 271544Seschrock * ZFS fault injection 281544Seschrock * 291544Seschrock * To handle fault injection, we keep track of a series of zinject_record_t 301544Seschrock * structures which describe which logical block(s) should be injected with a 311544Seschrock * fault. These are kept in a global list. Each record corresponds to a given 321544Seschrock * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 331544Seschrock * or exported while the injection record exists. 341544Seschrock * 351544Seschrock * Device level injection is done using the 'zi_guid' field. If this is set, it 361544Seschrock * means that the error is destined for a particular device, not a piece of 371544Seschrock * data. 381544Seschrock * 391544Seschrock * This is a rather poor data structure and algorithm, but we don't expect more 401544Seschrock * than a few faults at any one time, so it should be sufficient for our needs. 411544Seschrock */ 421544Seschrock 431544Seschrock #include <sys/arc.h> 441544Seschrock #include <sys/zio_impl.h> 451544Seschrock #include <sys/zfs_ioctl.h> 461544Seschrock #include <sys/spa_impl.h> 471544Seschrock #include <sys/vdev_impl.h> 486615Sgw25295 #include <sys/fs/zfs.h> 491544Seschrock 501544Seschrock uint32_t zio_injection_enabled; 511544Seschrock 521544Seschrock typedef struct inject_handler { 531544Seschrock int zi_id; 541544Seschrock spa_t *zi_spa; 551544Seschrock zinject_record_t zi_record; 561544Seschrock list_node_t zi_link; 571544Seschrock } inject_handler_t; 581544Seschrock 591544Seschrock static list_t inject_handlers; 601544Seschrock static krwlock_t inject_lock; 611544Seschrock static int inject_next_id = 1; 621544Seschrock 631544Seschrock /* 641544Seschrock * Returns true if the given record matches the I/O in progress. 651544Seschrock */ 661544Seschrock static boolean_t 671544Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type, 681544Seschrock zinject_record_t *record, int error) 691544Seschrock { 701544Seschrock /* 711544Seschrock * Check for a match against the MOS, which is based on type 721544Seschrock */ 731544Seschrock if (zb->zb_objset == 0 && record->zi_objset == 0 && 741544Seschrock record->zi_object == 0) { 751544Seschrock if (record->zi_type == DMU_OT_NONE || 761544Seschrock type == record->zi_type) 771544Seschrock return (record->zi_freq == 0 || 781544Seschrock spa_get_random(100) < record->zi_freq); 791544Seschrock else 801544Seschrock return (B_FALSE); 811544Seschrock } 821544Seschrock 831544Seschrock /* 841544Seschrock * Check for an exact match. 851544Seschrock */ 861544Seschrock if (zb->zb_objset == record->zi_objset && 871544Seschrock zb->zb_object == record->zi_object && 881544Seschrock zb->zb_level == record->zi_level && 891544Seschrock zb->zb_blkid >= record->zi_start && 901544Seschrock zb->zb_blkid <= record->zi_end && 911544Seschrock error == record->zi_error) 921544Seschrock return (record->zi_freq == 0 || 931544Seschrock spa_get_random(100) < record->zi_freq); 941544Seschrock 951544Seschrock return (B_FALSE); 961544Seschrock } 971544Seschrock 981544Seschrock /* 991544Seschrock * Determine if the I/O in question should return failure. Returns the errno 1001544Seschrock * to be returned to the caller. 1011544Seschrock */ 1021544Seschrock int 1031544Seschrock zio_handle_fault_injection(zio_t *zio, int error) 1041544Seschrock { 1051544Seschrock int ret = 0; 1061544Seschrock inject_handler_t *handler; 1071544Seschrock 1081544Seschrock /* 1091544Seschrock * Ignore I/O not associated with any logical data. 1101544Seschrock */ 1111544Seschrock if (zio->io_logical == NULL) 1121544Seschrock return (0); 1131544Seschrock 1141544Seschrock /* 1151544Seschrock * Currently, we only support fault injection on reads. 1161544Seschrock */ 1171544Seschrock if (zio->io_type != ZIO_TYPE_READ) 1181544Seschrock return (0); 1191544Seschrock 1201544Seschrock rw_enter(&inject_lock, RW_READER); 1211544Seschrock 1221544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 1231544Seschrock handler = list_next(&inject_handlers, handler)) { 1241544Seschrock 1251544Seschrock /* Ignore errors not destined for this pool */ 1261544Seschrock if (zio->io_spa != handler->zi_spa) 1271544Seschrock continue; 1281544Seschrock 1291544Seschrock /* Ignore device errors */ 1301544Seschrock if (handler->zi_record.zi_guid != 0) 1311544Seschrock continue; 1321544Seschrock 1331544Seschrock /* If this handler matches, return EIO */ 1341544Seschrock if (zio_match_handler(&zio->io_logical->io_bookmark, 1351544Seschrock zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 1361544Seschrock &handler->zi_record, error)) { 1371544Seschrock ret = error; 1381544Seschrock break; 1391544Seschrock } 1401544Seschrock } 1411544Seschrock 1421544Seschrock rw_exit(&inject_lock); 1431544Seschrock 1441544Seschrock return (ret); 1451544Seschrock } 1461544Seschrock 1476615Sgw25295 /* 1486615Sgw25295 * Determine if the zio is part of a label update and has an injection 1496615Sgw25295 * handler associated with that portion of the label. Currently, we 1506615Sgw25295 * allow error injection in either the nvlist or the uberblock region of 1516615Sgw25295 * of the vdev label. 1526615Sgw25295 */ 1536615Sgw25295 int 1546615Sgw25295 zio_handle_label_injection(zio_t *zio, int error) 1556615Sgw25295 { 1566615Sgw25295 inject_handler_t *handler; 1576615Sgw25295 vdev_t *vd = zio->io_vd; 1586615Sgw25295 uint64_t offset = zio->io_offset; 1596615Sgw25295 int label; 1606615Sgw25295 int ret = 0; 1616615Sgw25295 1626615Sgw25295 if (offset + zio->io_size > VDEV_LABEL_START_SIZE && 1636615Sgw25295 offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) 1646615Sgw25295 return (0); 1656615Sgw25295 1666615Sgw25295 rw_enter(&inject_lock, RW_READER); 1676615Sgw25295 1686615Sgw25295 for (handler = list_head(&inject_handlers); handler != NULL; 1696615Sgw25295 handler = list_next(&inject_handlers, handler)) { 1706615Sgw25295 uint64_t start = handler->zi_record.zi_start; 1716615Sgw25295 uint64_t end = handler->zi_record.zi_end; 1726615Sgw25295 1736615Sgw25295 /* Ignore device only faults */ 1746615Sgw25295 if (handler->zi_record.zi_start == 0) 1756615Sgw25295 continue; 1766615Sgw25295 1776615Sgw25295 /* 1786615Sgw25295 * The injection region is the relative offsets within a 1796615Sgw25295 * vdev label. We must determine the label which is being 1806615Sgw25295 * updated and adjust our region accordingly. 1816615Sgw25295 */ 1826615Sgw25295 label = vdev_label_number(vd->vdev_psize, offset); 1836615Sgw25295 start = vdev_label_offset(vd->vdev_psize, label, start); 1846615Sgw25295 end = vdev_label_offset(vd->vdev_psize, label, end); 1856615Sgw25295 1866615Sgw25295 if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && 1876615Sgw25295 (offset >= start && offset <= end)) { 1886615Sgw25295 ret = error; 1896615Sgw25295 break; 1906615Sgw25295 } 1916615Sgw25295 } 1926615Sgw25295 rw_exit(&inject_lock); 1936615Sgw25295 return (ret); 1946615Sgw25295 } 1956615Sgw25295 1966615Sgw25295 1971544Seschrock int 198*9725SEric.Schrock@Sun.COM zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) 1991544Seschrock { 2001544Seschrock inject_handler_t *handler; 2011544Seschrock int ret = 0; 2021544Seschrock 2031544Seschrock rw_enter(&inject_lock, RW_READER); 2041544Seschrock 2051544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 2061544Seschrock handler = list_next(&inject_handlers, handler)) { 2071544Seschrock 2086615Sgw25295 /* Ignore label specific faults */ 2096615Sgw25295 if (handler->zi_record.zi_start != 0) 2106615Sgw25295 continue; 2116615Sgw25295 2121544Seschrock if (vd->vdev_guid == handler->zi_record.zi_guid) { 213*9725SEric.Schrock@Sun.COM if (handler->zi_record.zi_failfast && 214*9725SEric.Schrock@Sun.COM (zio == NULL || (zio->io_flags & 215*9725SEric.Schrock@Sun.COM (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) { 216*9725SEric.Schrock@Sun.COM continue; 217*9725SEric.Schrock@Sun.COM } 218*9725SEric.Schrock@Sun.COM 2191544Seschrock if (handler->zi_record.zi_error == error) { 2201544Seschrock /* 2211544Seschrock * For a failed open, pretend like the device 2221544Seschrock * has gone away. 2231544Seschrock */ 2241544Seschrock if (error == ENXIO) 2251544Seschrock vd->vdev_stat.vs_aux = 2261544Seschrock VDEV_AUX_OPEN_FAILED; 2271544Seschrock ret = error; 2281544Seschrock break; 2291544Seschrock } 2301544Seschrock if (handler->zi_record.zi_error == ENXIO) { 2311544Seschrock ret = EIO; 2321544Seschrock break; 2331544Seschrock } 2341544Seschrock } 2351544Seschrock } 2361544Seschrock 2371544Seschrock rw_exit(&inject_lock); 2381544Seschrock 2391544Seschrock return (ret); 2401544Seschrock } 2411544Seschrock 2421544Seschrock /* 2431544Seschrock * Create a new handler for the given record. We add it to the list, adding 2441544Seschrock * a reference to the spa_t in the process. We increment zio_injection_enabled, 2451544Seschrock * which is the switch to trigger all fault injection. 2461544Seschrock */ 2471544Seschrock int 2481544Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 2491544Seschrock { 2501544Seschrock inject_handler_t *handler; 2511544Seschrock int error; 2521544Seschrock spa_t *spa; 2531544Seschrock 2541544Seschrock /* 2551544Seschrock * If this is pool-wide metadata, make sure we unload the corresponding 2561544Seschrock * spa_t, so that the next attempt to load it will trigger the fault. 2571544Seschrock * We call spa_reset() to unload the pool appropriately. 2581544Seschrock */ 2591544Seschrock if (flags & ZINJECT_UNLOAD_SPA) 2601544Seschrock if ((error = spa_reset(name)) != 0) 2611544Seschrock return (error); 2621544Seschrock 2631544Seschrock if (!(flags & ZINJECT_NULL)) { 2641544Seschrock /* 2651544Seschrock * spa_inject_ref() will add an injection reference, which will 2661544Seschrock * prevent the pool from being removed from the namespace while 2671544Seschrock * still allowing it to be unloaded. 2681544Seschrock */ 2691544Seschrock if ((spa = spa_inject_addref(name)) == NULL) 2701544Seschrock return (ENOENT); 2711544Seschrock 2721544Seschrock handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 2731544Seschrock 2741544Seschrock rw_enter(&inject_lock, RW_WRITER); 2751544Seschrock 2761544Seschrock *id = handler->zi_id = inject_next_id++; 2771544Seschrock handler->zi_spa = spa; 2781544Seschrock handler->zi_record = *record; 2791544Seschrock list_insert_tail(&inject_handlers, handler); 2801544Seschrock atomic_add_32(&zio_injection_enabled, 1); 2811544Seschrock 2821544Seschrock rw_exit(&inject_lock); 2831544Seschrock } 2841544Seschrock 2851544Seschrock /* 2861544Seschrock * Flush the ARC, so that any attempts to read this data will end up 2871544Seschrock * going to the ZIO layer. Note that this is a little overkill, but 2881544Seschrock * we don't have the necessary ARC interfaces to do anything else, and 2891544Seschrock * fault injection isn't a performance critical path. 2901544Seschrock */ 2911544Seschrock if (flags & ZINJECT_FLUSH_ARC) 2925642Smaybee arc_flush(NULL); 2931544Seschrock 2941544Seschrock return (0); 2951544Seschrock } 2961544Seschrock 2971544Seschrock /* 2981544Seschrock * Returns the next record with an ID greater than that supplied to the 2991544Seschrock * function. Used to iterate over all handlers in the system. 3001544Seschrock */ 3011544Seschrock int 3021544Seschrock zio_inject_list_next(int *id, char *name, size_t buflen, 3031544Seschrock zinject_record_t *record) 3041544Seschrock { 3051544Seschrock inject_handler_t *handler; 3061544Seschrock int ret; 3071544Seschrock 3081544Seschrock mutex_enter(&spa_namespace_lock); 3091544Seschrock rw_enter(&inject_lock, RW_READER); 3101544Seschrock 3111544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3121544Seschrock handler = list_next(&inject_handlers, handler)) 3131544Seschrock if (handler->zi_id > *id) 3141544Seschrock break; 3151544Seschrock 3161544Seschrock if (handler) { 3171544Seschrock *record = handler->zi_record; 3181544Seschrock *id = handler->zi_id; 3191544Seschrock (void) strncpy(name, spa_name(handler->zi_spa), buflen); 3201544Seschrock ret = 0; 3211544Seschrock } else { 3221544Seschrock ret = ENOENT; 3231544Seschrock } 3241544Seschrock 3251544Seschrock rw_exit(&inject_lock); 3261544Seschrock mutex_exit(&spa_namespace_lock); 3271544Seschrock 3281544Seschrock return (ret); 3291544Seschrock } 3301544Seschrock 3311544Seschrock /* 3321544Seschrock * Clear the fault handler with the given identifier, or return ENOENT if none 3331544Seschrock * exists. 3341544Seschrock */ 3351544Seschrock int 3361544Seschrock zio_clear_fault(int id) 3371544Seschrock { 3381544Seschrock inject_handler_t *handler; 3391544Seschrock int ret; 3401544Seschrock 3411544Seschrock rw_enter(&inject_lock, RW_WRITER); 3421544Seschrock 3431544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3441544Seschrock handler = list_next(&inject_handlers, handler)) 3451544Seschrock if (handler->zi_id == id) 3461544Seschrock break; 3471544Seschrock 3481544Seschrock if (handler == NULL) { 3491544Seschrock ret = ENOENT; 3501544Seschrock } else { 3511544Seschrock list_remove(&inject_handlers, handler); 3521544Seschrock spa_inject_delref(handler->zi_spa); 3531544Seschrock kmem_free(handler, sizeof (inject_handler_t)); 3541544Seschrock atomic_add_32(&zio_injection_enabled, -1); 3551544Seschrock ret = 0; 3561544Seschrock } 3571544Seschrock 3581544Seschrock rw_exit(&inject_lock); 3591544Seschrock 3601544Seschrock return (ret); 3611544Seschrock } 3621544Seschrock 3631544Seschrock void 3641544Seschrock zio_inject_init(void) 3651544Seschrock { 3667313SEric.Kustarz@Sun.COM rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); 3671544Seschrock list_create(&inject_handlers, sizeof (inject_handler_t), 3681544Seschrock offsetof(inject_handler_t, zi_link)); 3691544Seschrock } 3701544Seschrock 3711544Seschrock void 3721544Seschrock zio_inject_fini(void) 3731544Seschrock { 3741544Seschrock list_destroy(&inject_handlers); 3757313SEric.Kustarz@Sun.COM rw_destroy(&inject_lock); 3761544Seschrock } 377