11544Seschrock /* 21544Seschrock * CDDL HEADER START 31544Seschrock * 41544Seschrock * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 71544Seschrock * 81544Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91544Seschrock * or http://www.opensolaris.org/os/licensing. 101544Seschrock * See the License for the specific language governing permissions 111544Seschrock * and limitations under the License. 121544Seschrock * 131544Seschrock * When distributing Covered Code, include this CDDL HEADER in each 141544Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151544Seschrock * If applicable, add the following below this CDDL HEADER, with the 161544Seschrock * fields enclosed by brackets "[]" replaced with your own identifying 171544Seschrock * information: Portions Copyright [yyyy] [name of copyright owner] 181544Seschrock * 191544Seschrock * CDDL HEADER END 201544Seschrock */ 211544Seschrock /* 22*6615Sgw25295 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 231544Seschrock * Use is subject to license terms. 241544Seschrock */ 251544Seschrock 261544Seschrock #pragma ident "%Z%%M% %I% %E% SMI" 271544Seschrock 281544Seschrock /* 291544Seschrock * ZFS fault injection 301544Seschrock * 311544Seschrock * To handle fault injection, we keep track of a series of zinject_record_t 321544Seschrock * structures which describe which logical block(s) should be injected with a 331544Seschrock * fault. These are kept in a global list. Each record corresponds to a given 341544Seschrock * spa_t and maintains a special hold on the spa_t so that it cannot be deleted 351544Seschrock * or exported while the injection record exists. 361544Seschrock * 371544Seschrock * Device level injection is done using the 'zi_guid' field. If this is set, it 381544Seschrock * means that the error is destined for a particular device, not a piece of 391544Seschrock * data. 401544Seschrock * 411544Seschrock * This is a rather poor data structure and algorithm, but we don't expect more 421544Seschrock * than a few faults at any one time, so it should be sufficient for our needs. 431544Seschrock */ 441544Seschrock 451544Seschrock #include <sys/arc.h> 461544Seschrock #include <sys/zio_impl.h> 471544Seschrock #include <sys/zfs_ioctl.h> 481544Seschrock #include <sys/spa_impl.h> 491544Seschrock #include <sys/vdev_impl.h> 50*6615Sgw25295 #include <sys/fs/zfs.h> 511544Seschrock 521544Seschrock uint32_t zio_injection_enabled; 531544Seschrock 541544Seschrock typedef struct inject_handler { 551544Seschrock int zi_id; 561544Seschrock spa_t *zi_spa; 571544Seschrock zinject_record_t zi_record; 581544Seschrock list_node_t zi_link; 591544Seschrock } inject_handler_t; 601544Seschrock 611544Seschrock static list_t inject_handlers; 621544Seschrock static krwlock_t inject_lock; 631544Seschrock static int inject_next_id = 1; 641544Seschrock 651544Seschrock /* 661544Seschrock * Returns true if the given record matches the I/O in progress. 671544Seschrock */ 681544Seschrock static boolean_t 691544Seschrock zio_match_handler(zbookmark_t *zb, uint64_t type, 701544Seschrock zinject_record_t *record, int error) 711544Seschrock { 721544Seschrock /* 731544Seschrock * Check for a match against the MOS, which is based on type 741544Seschrock */ 751544Seschrock if (zb->zb_objset == 0 && record->zi_objset == 0 && 761544Seschrock record->zi_object == 0) { 771544Seschrock if (record->zi_type == DMU_OT_NONE || 781544Seschrock type == record->zi_type) 791544Seschrock return (record->zi_freq == 0 || 801544Seschrock spa_get_random(100) < record->zi_freq); 811544Seschrock else 821544Seschrock return (B_FALSE); 831544Seschrock } 841544Seschrock 851544Seschrock /* 861544Seschrock * Check for an exact match. 871544Seschrock */ 881544Seschrock if (zb->zb_objset == record->zi_objset && 891544Seschrock zb->zb_object == record->zi_object && 901544Seschrock zb->zb_level == record->zi_level && 911544Seschrock zb->zb_blkid >= record->zi_start && 921544Seschrock zb->zb_blkid <= record->zi_end && 931544Seschrock error == record->zi_error) 941544Seschrock return (record->zi_freq == 0 || 951544Seschrock spa_get_random(100) < record->zi_freq); 961544Seschrock 971544Seschrock return (B_FALSE); 981544Seschrock } 991544Seschrock 1001544Seschrock /* 1011544Seschrock * Determine if the I/O in question should return failure. Returns the errno 1021544Seschrock * to be returned to the caller. 1031544Seschrock */ 1041544Seschrock int 1051544Seschrock zio_handle_fault_injection(zio_t *zio, int error) 1061544Seschrock { 1071544Seschrock int ret = 0; 1081544Seschrock inject_handler_t *handler; 1091544Seschrock 1101544Seschrock /* 1111544Seschrock * Ignore I/O not associated with any logical data. 1121544Seschrock */ 1131544Seschrock if (zio->io_logical == NULL) 1141544Seschrock return (0); 1151544Seschrock 1161544Seschrock /* 1171544Seschrock * Currently, we only support fault injection on reads. 1181544Seschrock */ 1191544Seschrock if (zio->io_type != ZIO_TYPE_READ) 1201544Seschrock return (0); 1211544Seschrock 1221544Seschrock rw_enter(&inject_lock, RW_READER); 1231544Seschrock 1241544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 1251544Seschrock handler = list_next(&inject_handlers, handler)) { 1261544Seschrock 1271544Seschrock /* Ignore errors not destined for this pool */ 1281544Seschrock if (zio->io_spa != handler->zi_spa) 1291544Seschrock continue; 1301544Seschrock 1311544Seschrock /* Ignore device errors */ 1321544Seschrock if (handler->zi_record.zi_guid != 0) 1331544Seschrock continue; 1341544Seschrock 1351544Seschrock /* If this handler matches, return EIO */ 1361544Seschrock if (zio_match_handler(&zio->io_logical->io_bookmark, 1371544Seschrock zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE, 1381544Seschrock &handler->zi_record, error)) { 1391544Seschrock ret = error; 1401544Seschrock break; 1411544Seschrock } 1421544Seschrock } 1431544Seschrock 1441544Seschrock rw_exit(&inject_lock); 1451544Seschrock 1461544Seschrock return (ret); 1471544Seschrock } 1481544Seschrock 149*6615Sgw25295 /* 150*6615Sgw25295 * Determine if the zio is part of a label update and has an injection 151*6615Sgw25295 * handler associated with that portion of the label. Currently, we 152*6615Sgw25295 * allow error injection in either the nvlist or the uberblock region of 153*6615Sgw25295 * of the vdev label. 154*6615Sgw25295 */ 155*6615Sgw25295 int 156*6615Sgw25295 zio_handle_label_injection(zio_t *zio, int error) 157*6615Sgw25295 { 158*6615Sgw25295 inject_handler_t *handler; 159*6615Sgw25295 vdev_t *vd = zio->io_vd; 160*6615Sgw25295 uint64_t offset = zio->io_offset; 161*6615Sgw25295 int label; 162*6615Sgw25295 int ret = 0; 163*6615Sgw25295 164*6615Sgw25295 /* 165*6615Sgw25295 * We only care about physical I/Os. 166*6615Sgw25295 */ 167*6615Sgw25295 if (!(zio->io_flags & ZIO_FLAG_PHYSICAL)) 168*6615Sgw25295 return (0); 169*6615Sgw25295 170*6615Sgw25295 if (offset + zio->io_size > VDEV_LABEL_START_SIZE && 171*6615Sgw25295 offset < vd->vdev_psize - VDEV_LABEL_END_SIZE) 172*6615Sgw25295 return (0); 173*6615Sgw25295 174*6615Sgw25295 rw_enter(&inject_lock, RW_READER); 175*6615Sgw25295 176*6615Sgw25295 for (handler = list_head(&inject_handlers); handler != NULL; 177*6615Sgw25295 handler = list_next(&inject_handlers, handler)) { 178*6615Sgw25295 uint64_t start = handler->zi_record.zi_start; 179*6615Sgw25295 uint64_t end = handler->zi_record.zi_end; 180*6615Sgw25295 181*6615Sgw25295 /* Ignore device only faults */ 182*6615Sgw25295 if (handler->zi_record.zi_start == 0) 183*6615Sgw25295 continue; 184*6615Sgw25295 185*6615Sgw25295 /* 186*6615Sgw25295 * The injection region is the relative offsets within a 187*6615Sgw25295 * vdev label. We must determine the label which is being 188*6615Sgw25295 * updated and adjust our region accordingly. 189*6615Sgw25295 */ 190*6615Sgw25295 label = vdev_label_number(vd->vdev_psize, offset); 191*6615Sgw25295 start = vdev_label_offset(vd->vdev_psize, label, start); 192*6615Sgw25295 end = vdev_label_offset(vd->vdev_psize, label, end); 193*6615Sgw25295 194*6615Sgw25295 if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid && 195*6615Sgw25295 (offset >= start && offset <= end)) { 196*6615Sgw25295 ret = error; 197*6615Sgw25295 break; 198*6615Sgw25295 } 199*6615Sgw25295 } 200*6615Sgw25295 rw_exit(&inject_lock); 201*6615Sgw25295 return (ret); 202*6615Sgw25295 } 203*6615Sgw25295 204*6615Sgw25295 2051544Seschrock int 2061544Seschrock zio_handle_device_injection(vdev_t *vd, int error) 2071544Seschrock { 2081544Seschrock inject_handler_t *handler; 2091544Seschrock int ret = 0; 2101544Seschrock 2111544Seschrock rw_enter(&inject_lock, RW_READER); 2121544Seschrock 2131544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 2141544Seschrock handler = list_next(&inject_handlers, handler)) { 2151544Seschrock 216*6615Sgw25295 /* Ignore label specific faults */ 217*6615Sgw25295 if (handler->zi_record.zi_start != 0) 218*6615Sgw25295 continue; 219*6615Sgw25295 2201544Seschrock if (vd->vdev_guid == handler->zi_record.zi_guid) { 2211544Seschrock if (handler->zi_record.zi_error == error) { 2221544Seschrock /* 2231544Seschrock * For a failed open, pretend like the device 2241544Seschrock * has gone away. 2251544Seschrock */ 2261544Seschrock if (error == ENXIO) 2271544Seschrock vd->vdev_stat.vs_aux = 2281544Seschrock VDEV_AUX_OPEN_FAILED; 2291544Seschrock ret = error; 2301544Seschrock break; 2311544Seschrock } 2321544Seschrock if (handler->zi_record.zi_error == ENXIO) { 2331544Seschrock ret = EIO; 2341544Seschrock break; 2351544Seschrock } 2361544Seschrock } 2371544Seschrock } 2381544Seschrock 2391544Seschrock rw_exit(&inject_lock); 2401544Seschrock 2411544Seschrock return (ret); 2421544Seschrock } 2431544Seschrock 2441544Seschrock /* 2451544Seschrock * Create a new handler for the given record. We add it to the list, adding 2461544Seschrock * a reference to the spa_t in the process. We increment zio_injection_enabled, 2471544Seschrock * which is the switch to trigger all fault injection. 2481544Seschrock */ 2491544Seschrock int 2501544Seschrock zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) 2511544Seschrock { 2521544Seschrock inject_handler_t *handler; 2531544Seschrock int error; 2541544Seschrock spa_t *spa; 2551544Seschrock 2561544Seschrock /* 2571544Seschrock * If this is pool-wide metadata, make sure we unload the corresponding 2581544Seschrock * spa_t, so that the next attempt to load it will trigger the fault. 2591544Seschrock * We call spa_reset() to unload the pool appropriately. 2601544Seschrock */ 2611544Seschrock if (flags & ZINJECT_UNLOAD_SPA) 2621544Seschrock if ((error = spa_reset(name)) != 0) 2631544Seschrock return (error); 2641544Seschrock 2651544Seschrock if (!(flags & ZINJECT_NULL)) { 2661544Seschrock /* 2671544Seschrock * spa_inject_ref() will add an injection reference, which will 2681544Seschrock * prevent the pool from being removed from the namespace while 2691544Seschrock * still allowing it to be unloaded. 2701544Seschrock */ 2711544Seschrock if ((spa = spa_inject_addref(name)) == NULL) 2721544Seschrock return (ENOENT); 2731544Seschrock 2741544Seschrock handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); 2751544Seschrock 2761544Seschrock rw_enter(&inject_lock, RW_WRITER); 2771544Seschrock 2781544Seschrock *id = handler->zi_id = inject_next_id++; 2791544Seschrock handler->zi_spa = spa; 2801544Seschrock handler->zi_record = *record; 2811544Seschrock list_insert_tail(&inject_handlers, handler); 2821544Seschrock atomic_add_32(&zio_injection_enabled, 1); 2831544Seschrock 2841544Seschrock rw_exit(&inject_lock); 2851544Seschrock } 2861544Seschrock 2871544Seschrock /* 2881544Seschrock * Flush the ARC, so that any attempts to read this data will end up 2891544Seschrock * going to the ZIO layer. Note that this is a little overkill, but 2901544Seschrock * we don't have the necessary ARC interfaces to do anything else, and 2911544Seschrock * fault injection isn't a performance critical path. 2921544Seschrock */ 2931544Seschrock if (flags & ZINJECT_FLUSH_ARC) 2945642Smaybee arc_flush(NULL); 2951544Seschrock 2961544Seschrock return (0); 2971544Seschrock } 2981544Seschrock 2991544Seschrock /* 3001544Seschrock * Returns the next record with an ID greater than that supplied to the 3011544Seschrock * function. Used to iterate over all handlers in the system. 3021544Seschrock */ 3031544Seschrock int 3041544Seschrock zio_inject_list_next(int *id, char *name, size_t buflen, 3051544Seschrock zinject_record_t *record) 3061544Seschrock { 3071544Seschrock inject_handler_t *handler; 3081544Seschrock int ret; 3091544Seschrock 3101544Seschrock mutex_enter(&spa_namespace_lock); 3111544Seschrock rw_enter(&inject_lock, RW_READER); 3121544Seschrock 3131544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3141544Seschrock handler = list_next(&inject_handlers, handler)) 3151544Seschrock if (handler->zi_id > *id) 3161544Seschrock break; 3171544Seschrock 3181544Seschrock if (handler) { 3191544Seschrock *record = handler->zi_record; 3201544Seschrock *id = handler->zi_id; 3211544Seschrock (void) strncpy(name, spa_name(handler->zi_spa), buflen); 3221544Seschrock ret = 0; 3231544Seschrock } else { 3241544Seschrock ret = ENOENT; 3251544Seschrock } 3261544Seschrock 3271544Seschrock rw_exit(&inject_lock); 3281544Seschrock mutex_exit(&spa_namespace_lock); 3291544Seschrock 3301544Seschrock return (ret); 3311544Seschrock } 3321544Seschrock 3331544Seschrock /* 3341544Seschrock * Clear the fault handler with the given identifier, or return ENOENT if none 3351544Seschrock * exists. 3361544Seschrock */ 3371544Seschrock int 3381544Seschrock zio_clear_fault(int id) 3391544Seschrock { 3401544Seschrock inject_handler_t *handler; 3411544Seschrock int ret; 3421544Seschrock 3431544Seschrock rw_enter(&inject_lock, RW_WRITER); 3441544Seschrock 3451544Seschrock for (handler = list_head(&inject_handlers); handler != NULL; 3461544Seschrock handler = list_next(&inject_handlers, handler)) 3471544Seschrock if (handler->zi_id == id) 3481544Seschrock break; 3491544Seschrock 3501544Seschrock if (handler == NULL) { 3511544Seschrock ret = ENOENT; 3521544Seschrock } else { 3531544Seschrock list_remove(&inject_handlers, handler); 3541544Seschrock spa_inject_delref(handler->zi_spa); 3551544Seschrock kmem_free(handler, sizeof (inject_handler_t)); 3561544Seschrock atomic_add_32(&zio_injection_enabled, -1); 3571544Seschrock ret = 0; 3581544Seschrock } 3591544Seschrock 3601544Seschrock rw_exit(&inject_lock); 3611544Seschrock 3621544Seschrock return (ret); 3631544Seschrock } 3641544Seschrock 3651544Seschrock void 3661544Seschrock zio_inject_init(void) 3671544Seschrock { 3681544Seschrock list_create(&inject_handlers, sizeof (inject_handler_t), 3691544Seschrock offsetof(inject_handler_t, zi_link)); 3701544Seschrock } 3711544Seschrock 3721544Seschrock void 3731544Seschrock zio_inject_fini(void) 3741544Seschrock { 3751544Seschrock list_destroy(&inject_handlers); 3761544Seschrock } 377