xref: /onnv-gate/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c (revision 12768:0e29ce2277d6)
14451Seschrock /*
24451Seschrock  * CDDL HEADER START
34451Seschrock  *
44451Seschrock  * The contents of this file are subject to the terms of the
54451Seschrock  * Common Development and Distribution License (the "License").
64451Seschrock  * You may not use this file except in compliance with the License.
74451Seschrock  *
84451Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
94451Seschrock  * or http://www.opensolaris.org/os/licensing.
104451Seschrock  * See the License for the specific language governing permissions
114451Seschrock  * and limitations under the License.
124451Seschrock  *
134451Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
144451Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
154451Seschrock  * If applicable, add the following below this CDDL HEADER, with the
164451Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
174451Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
184451Seschrock  *
194451Seschrock  * CDDL HEADER END
204451Seschrock  */
214451Seschrock /*
2212318SEric.Taylor@Sun.COM  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
234451Seschrock  */
244451Seschrock 
254451Seschrock /*
264451Seschrock  * ZFS syseventd module.
274451Seschrock  *
284451Seschrock  * The purpose of this module is to identify when devices are added to the
294451Seschrock  * system, and appropriately online or replace the affected vdevs.
304451Seschrock  *
314451Seschrock  * When a device is added to the system:
324451Seschrock  *
334451Seschrock  * 	1. Search for any vdevs whose devid matches that of the newly added
344451Seschrock  *	   device.
354451Seschrock  *
364451Seschrock  * 	2. If no vdevs are found, then search for any vdevs whose devfs path
374451Seschrock  *	   matches that of the new device.
384451Seschrock  *
394451Seschrock  *	3. If no vdevs match by either method, then ignore the event.
404451Seschrock  *
414451Seschrock  * 	4. Attempt to online the device with a flag to indicate that it should
424451Seschrock  *	   be unspared when resilvering completes.  If this succeeds, then the
434451Seschrock  *	   same device was inserted and we should continue normally.
444451Seschrock  *
454451Seschrock  *	5. If the pool does not have the 'autoreplace' property set, attempt to
464451Seschrock  *	   online the device again without the unspare flag, which will
474451Seschrock  *	   generate a FMA fault.
484451Seschrock  *
494451Seschrock  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
504451Seschrock  *	   is a whole disk, then label the new disk and attempt a 'zpool
514451Seschrock  *	   replace'.
524451Seschrock  *
534451Seschrock  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
544451Seschrock  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
554451Seschrock  * indicates that a device failed to open during pool load, but the autoreplace
564451Seschrock  * property was set.  In this case, we deferred the associated FMA fault until
574451Seschrock  * our module had a chance to process the autoreplace logic.  If the device
584451Seschrock  * could not be replaced, then the second online attempt will trigger the FMA
594451Seschrock  * fault that we skipped earlier.
604451Seschrock  */
614451Seschrock 
624451Seschrock #include <alloca.h>
634451Seschrock #include <devid.h>
644451Seschrock #include <fcntl.h>
654451Seschrock #include <libnvpair.h>
664451Seschrock #include <libsysevent.h>
674451Seschrock #include <libzfs.h>
684451Seschrock #include <limits.h>
694451Seschrock #include <stdlib.h>
704451Seschrock #include <string.h>
714451Seschrock #include <syslog.h>
7212710SEric.Taylor@Sun.COM #include <sys/list.h>
734451Seschrock #include <sys/sunddi.h>
744451Seschrock #include <sys/sysevent/eventdefs.h>
754451Seschrock #include <sys/sysevent/dev.h>
7612710SEric.Taylor@Sun.COM #include <thread_pool.h>
774451Seschrock #include <unistd.h>
7812318SEric.Taylor@Sun.COM #include "syseventd.h"
794451Seschrock 
804451Seschrock #if defined(__i386) || defined(__amd64)
814451Seschrock #define	PHYS_PATH	":q"
824451Seschrock #define	RAW_SLICE	"p0"
834451Seschrock #elif defined(__sparc)
844451Seschrock #define	PHYS_PATH	":c"
854451Seschrock #define	RAW_SLICE	"s2"
864451Seschrock #else
874451Seschrock #error Unknown architecture
884451Seschrock #endif
894451Seschrock 
904451Seschrock typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
914451Seschrock 
924451Seschrock libzfs_handle_t *g_zfshdl;
9312710SEric.Taylor@Sun.COM list_t g_pool_list;
9412710SEric.Taylor@Sun.COM tpool_t *g_tpool;
95*12768SEric.Taylor@oracle.com boolean_t g_enumeration_done;
96*12768SEric.Taylor@oracle.com thread_t g_zfs_tid;
9712710SEric.Taylor@Sun.COM 
9812710SEric.Taylor@Sun.COM typedef struct unavailpool {
9912710SEric.Taylor@Sun.COM 	zpool_handle_t	*uap_zhp;
10012710SEric.Taylor@Sun.COM 	list_node_t	uap_node;
10112710SEric.Taylor@Sun.COM } unavailpool_t;
10212710SEric.Taylor@Sun.COM 
10312710SEric.Taylor@Sun.COM int
zfs_toplevel_state(zpool_handle_t * zhp)10412710SEric.Taylor@Sun.COM zfs_toplevel_state(zpool_handle_t *zhp)
10512710SEric.Taylor@Sun.COM {
10612710SEric.Taylor@Sun.COM 	nvlist_t *nvroot;
10712710SEric.Taylor@Sun.COM 	vdev_stat_t *vs;
10812710SEric.Taylor@Sun.COM 	unsigned int c;
10912710SEric.Taylor@Sun.COM 
11012710SEric.Taylor@Sun.COM 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
11112710SEric.Taylor@Sun.COM 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
11212710SEric.Taylor@Sun.COM 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
11312710SEric.Taylor@Sun.COM 	    (uint64_t **)&vs, &c) == 0);
11412710SEric.Taylor@Sun.COM 	return (vs->vs_state);
11512710SEric.Taylor@Sun.COM }
11612710SEric.Taylor@Sun.COM 
11712710SEric.Taylor@Sun.COM static int
zfs_unavail_pool(zpool_handle_t * zhp,void * data)11812710SEric.Taylor@Sun.COM zfs_unavail_pool(zpool_handle_t *zhp, void *data)
11912710SEric.Taylor@Sun.COM {
12012710SEric.Taylor@Sun.COM 	if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
12112710SEric.Taylor@Sun.COM 		unavailpool_t *uap;
12212710SEric.Taylor@Sun.COM 		uap = malloc(sizeof (unavailpool_t));
12312710SEric.Taylor@Sun.COM 		uap->uap_zhp = zhp;
12412710SEric.Taylor@Sun.COM 		list_insert_tail((list_t *)data, uap);
12512710SEric.Taylor@Sun.COM 	} else {
12612710SEric.Taylor@Sun.COM 		zpool_close(zhp);
12712710SEric.Taylor@Sun.COM 	}
12812710SEric.Taylor@Sun.COM 	return (0);
12912710SEric.Taylor@Sun.COM }
1304451Seschrock 
1314451Seschrock /*
1324451Seschrock  * The device associated with the given vdev (either by devid or physical path)
1334451Seschrock  * has been added to the system.  If 'isdisk' is set, then we only attempt a
1344451Seschrock  * replacement if it's a whole disk.  This also implies that we should label the
1354451Seschrock  * disk first.
1364451Seschrock  *
1374451Seschrock  * First, we attempt to online the device (making sure to undo any spare
1384451Seschrock  * operation when finished).  If this succeeds, then we're done.  If it fails,
1394451Seschrock  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
1404451Seschrock  * but that the label was not what we expected.  If the 'autoreplace' property
1414451Seschrock  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
1424451Seschrock  * replace'.  If the online is successful, but the new state is something else
1434451Seschrock  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
1444451Seschrock  * race, and we should avoid attempting to relabel the disk.
1454451Seschrock  */
1464451Seschrock static void
zfs_process_add(zpool_handle_t * zhp,nvlist_t * vdev,boolean_t isdisk)1474451Seschrock zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
1484451Seschrock {
1494451Seschrock 	char *path;
1504451Seschrock 	vdev_state_t newstate;
1514451Seschrock 	nvlist_t *nvroot, *newvd;
1524451Seschrock 	uint64_t wholedisk = 0ULL;
1536673Seschrock 	char *physpath = NULL;
1544451Seschrock 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
1554451Seschrock 	size_t len;
1564451Seschrock 
1574451Seschrock 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
1584451Seschrock 		return;
1594451Seschrock 
1606673Seschrock 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
1614451Seschrock 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
1624451Seschrock 
1634451Seschrock 	/*
1644451Seschrock 	 * We should have a way to online a device by guid.  With the current
1654451Seschrock 	 * interface, we are forced to chop off the 's0' for whole disks.
1664451Seschrock 	 */
1674451Seschrock 	(void) strlcpy(fullpath, path, sizeof (fullpath));
1684451Seschrock 	if (wholedisk)
1694451Seschrock 		fullpath[strlen(fullpath) - 2] = '\0';
1704451Seschrock 
1714451Seschrock 	/*
1724451Seschrock 	 * Attempt to online the device.  It would be nice to online this by
1734451Seschrock 	 * GUID, but the current interface only supports lookup by path.
1744451Seschrock 	 */
1754451Seschrock 	if (zpool_vdev_online(zhp, fullpath,
1764451Seschrock 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
1776673Seschrock 	    (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED))
1784451Seschrock 		return;
1794451Seschrock 
1804451Seschrock 	/*
1814451Seschrock 	 * If the pool doesn't have the autoreplace property set, then attempt a
1824451Seschrock 	 * true online (without the unspare flag), which will trigger a FMA
1834451Seschrock 	 * fault.
1844451Seschrock 	 */
1855094Slling 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
1864451Seschrock 	    (isdisk && !wholedisk)) {
1874451Seschrock 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
1884451Seschrock 		    &newstate);
1894451Seschrock 		return;
1904451Seschrock 	}
1914451Seschrock 
1924451Seschrock 	if (isdisk) {
1934451Seschrock 		/*
1944451Seschrock 		 * If this is a request to label a whole disk, then attempt to
1954451Seschrock 		 * write out the label.  Before we can label the disk, we need
1964451Seschrock 		 * access to a raw node.  Ideally, we'd like to walk the devinfo
1974451Seschrock 		 * tree and find a raw node from the corresponding parent node.
1984451Seschrock 		 * This is overly complicated, and since we know how we labeled
1994451Seschrock 		 * this device in the first place, we know it's save to switch
2004451Seschrock 		 * from /dev/dsk to /dev/rdsk and append the backup slice.
2016643Seschrock 		 *
2026643Seschrock 		 * If any part of this process fails, then do a force online to
2036643Seschrock 		 * trigger a ZFS fault for the device (and any hot spare
2046643Seschrock 		 * replacement).
2054451Seschrock 		 */
2066643Seschrock 		if (strncmp(path, "/dev/dsk/", 9) != 0) {
2076643Seschrock 			(void) zpool_vdev_online(zhp, fullpath,
2086643Seschrock 			    ZFS_ONLINE_FORCEFAULT, &newstate);
2094451Seschrock 			return;
2106643Seschrock 		}
2114451Seschrock 
2124451Seschrock 		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
2134451Seschrock 		len = strlen(rawpath);
2144451Seschrock 		rawpath[len - 2] = '\0';
2154451Seschrock 
2166643Seschrock 		if (zpool_label_disk(g_zfshdl, zhp, rawpath) != 0) {
2176643Seschrock 			(void) zpool_vdev_online(zhp, fullpath,
2186643Seschrock 			    ZFS_ONLINE_FORCEFAULT, &newstate);
2194451Seschrock 			return;
2206643Seschrock 		}
2214451Seschrock 	}
2224451Seschrock 
2234451Seschrock 	/*
2244451Seschrock 	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
2254451Seschrock 	 * the entire vdev structure is harmless, we construct a reduced set of
2266673Seschrock 	 * path/physpath/wholedisk to keep it simple.
2274451Seschrock 	 */
2284451Seschrock 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
2294451Seschrock 		return;
2304451Seschrock 
2314451Seschrock 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
2324451Seschrock 		nvlist_free(nvroot);
2334451Seschrock 		return;
2344451Seschrock 	}
2354451Seschrock 
2364451Seschrock 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
2374451Seschrock 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
2386673Seschrock 	    (physpath != NULL && nvlist_add_string(newvd,
2396673Seschrock 	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
2404451Seschrock 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
2414451Seschrock 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
2424451Seschrock 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
2434451Seschrock 	    1) != 0) {
2444451Seschrock 		nvlist_free(newvd);
2454451Seschrock 		nvlist_free(nvroot);
2464451Seschrock 		return;
2474451Seschrock 	}
2484451Seschrock 
2494451Seschrock 	nvlist_free(newvd);
2504451Seschrock 
2514451Seschrock 	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
2524451Seschrock 
2534451Seschrock 	nvlist_free(nvroot);
2544451Seschrock 
2554451Seschrock }
2564451Seschrock 
2574451Seschrock /*
2584451Seschrock  * Utility functions to find a vdev matching given criteria.
2594451Seschrock  */
2604451Seschrock typedef struct dev_data {
2614451Seschrock 	const char		*dd_compare;
2624451Seschrock 	const char		*dd_prop;
2634451Seschrock 	zfs_process_func_t	dd_func;
2644451Seschrock 	boolean_t		dd_found;
2654451Seschrock 	boolean_t		dd_isdisk;
2664451Seschrock 	uint64_t		dd_pool_guid;
2674451Seschrock 	uint64_t		dd_vdev_guid;
2684451Seschrock } dev_data_t;
2694451Seschrock 
2704451Seschrock static void
zfs_iter_vdev(zpool_handle_t * zhp,nvlist_t * nvl,void * data)2714451Seschrock zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
2724451Seschrock {
2734451Seschrock 	dev_data_t *dp = data;
2744451Seschrock 	char *path;
2754451Seschrock 	uint_t c, children;
2764451Seschrock 	nvlist_t **child;
2775756Seschrock 	size_t len;
2784451Seschrock 	uint64_t guid;
2794451Seschrock 
2804451Seschrock 	/*
2814451Seschrock 	 * First iterate over any children.
2824451Seschrock 	 */
2834451Seschrock 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
2844451Seschrock 	    &child, &children) == 0) {
2854451Seschrock 		for (c = 0; c < children; c++)
2864451Seschrock 			zfs_iter_vdev(zhp, child[c], data);
2874451Seschrock 		return;
2884451Seschrock 	}
2894451Seschrock 
2904451Seschrock 	if (dp->dd_vdev_guid != 0) {
2914451Seschrock 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
2924451Seschrock 		    &guid) != 0 || guid != dp->dd_vdev_guid)
2934451Seschrock 			return;
2944451Seschrock 	} else {
2955756Seschrock 		len = strlen(dp->dd_compare);
2965756Seschrock 
2974451Seschrock 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
2984451Seschrock 		    strncmp(dp->dd_compare, path, len) != 0)
2994451Seschrock 			return;
3004451Seschrock 
3014451Seschrock 		/*
3024451Seschrock 		 * Normally, we want to have an exact match for the comparison
3034451Seschrock 		 * string.  However, we allow substring matches in the following
3044451Seschrock 		 * cases:
3054451Seschrock 		 *
3064451Seschrock 		 * 	<path>:		This is a devpath, and the target is one
3074451Seschrock 		 * 			of its children.
3084451Seschrock 		 *
3094451Seschrock 		 * 	<path/>		This is a devid for a whole disk, and
3104451Seschrock 		 * 			the target is one of its children.
3114451Seschrock 		 */
3124451Seschrock 		if (path[len] != '\0' && path[len] != ':' &&
3134451Seschrock 		    path[len - 1] != '/')
3144451Seschrock 			return;
3154451Seschrock 	}
3164451Seschrock 
3174451Seschrock 	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
3184451Seschrock }
3194451Seschrock 
32012710SEric.Taylor@Sun.COM void
zfs_enable_ds(void * arg)32112710SEric.Taylor@Sun.COM zfs_enable_ds(void *arg)
32212710SEric.Taylor@Sun.COM {
32312710SEric.Taylor@Sun.COM 	unavailpool_t *pool = (unavailpool_t *)arg;
32412710SEric.Taylor@Sun.COM 
32512710SEric.Taylor@Sun.COM 	(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
32612710SEric.Taylor@Sun.COM 	zpool_close(pool->uap_zhp);
32712710SEric.Taylor@Sun.COM 	free(pool);
32812710SEric.Taylor@Sun.COM }
32912710SEric.Taylor@Sun.COM 
3304451Seschrock static int
zfs_iter_pool(zpool_handle_t * zhp,void * data)3314451Seschrock zfs_iter_pool(zpool_handle_t *zhp, void *data)
3324451Seschrock {
3334451Seschrock 	nvlist_t *config, *nvl;
3344451Seschrock 	dev_data_t *dp = data;
3354451Seschrock 	uint64_t pool_guid;
33612710SEric.Taylor@Sun.COM 	unavailpool_t *pool;
3374451Seschrock 
3384451Seschrock 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
3394451Seschrock 		if (dp->dd_pool_guid == 0 ||
3404451Seschrock 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
3414451Seschrock 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
3424451Seschrock 			(void) nvlist_lookup_nvlist(config,
3434451Seschrock 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
3444451Seschrock 			zfs_iter_vdev(zhp, nvl, data);
3454451Seschrock 		}
3464451Seschrock 	}
347*12768SEric.Taylor@oracle.com 	if (g_enumeration_done)  {
348*12768SEric.Taylor@oracle.com 		for (pool = list_head(&g_pool_list); pool != NULL;
349*12768SEric.Taylor@oracle.com 		    pool = list_next(&g_pool_list, pool)) {
35012710SEric.Taylor@Sun.COM 
351*12768SEric.Taylor@oracle.com 			if (strcmp(zpool_get_name(zhp),
352*12768SEric.Taylor@oracle.com 			    zpool_get_name(pool->uap_zhp)))
353*12768SEric.Taylor@oracle.com 				continue;
354*12768SEric.Taylor@oracle.com 			if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
355*12768SEric.Taylor@oracle.com 				list_remove(&g_pool_list, pool);
356*12768SEric.Taylor@oracle.com 				(void) tpool_dispatch(g_tpool, zfs_enable_ds,
357*12768SEric.Taylor@oracle.com 				    pool);
358*12768SEric.Taylor@oracle.com 				break;
359*12768SEric.Taylor@oracle.com 			}
36012710SEric.Taylor@Sun.COM 		}
36112710SEric.Taylor@Sun.COM 	}
3624451Seschrock 
3634451Seschrock 	zpool_close(zhp);
3644451Seschrock 	return (0);
3654451Seschrock }
3664451Seschrock 
3674451Seschrock /*
3684451Seschrock  * Given a physical device path, iterate over all (pool, vdev) pairs which
3694451Seschrock  * correspond to the given path.
3704451Seschrock  */
3714451Seschrock static boolean_t
devpath_iter(const char * devpath,zfs_process_func_t func,boolean_t wholedisk)3724451Seschrock devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
3734451Seschrock {
3744451Seschrock 	dev_data_t data = { 0 };
3754451Seschrock 
3764451Seschrock 	data.dd_compare = devpath;
3774451Seschrock 	data.dd_func = func;
3784451Seschrock 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
3794451Seschrock 	data.dd_found = B_FALSE;
3804451Seschrock 	data.dd_isdisk = wholedisk;
3814451Seschrock 
3824451Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
3834451Seschrock 
3844451Seschrock 	return (data.dd_found);
3854451Seschrock }
3864451Seschrock 
3874451Seschrock /*
3884451Seschrock  * Given a /devices path, lookup the corresponding devid for each minor node,
3894451Seschrock  * and find any vdevs with matching devids.  Doing this straight up would be
3904451Seschrock  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
3914451Seschrock  * the fact that each devid ends with "/<minornode>".  Once we find any valid
3924451Seschrock  * minor node, we chop off the portion after the last slash, and then search for
3934451Seschrock  * matching vdevs, which is O(vdevs in system).
3944451Seschrock  */
3954451Seschrock static boolean_t
devid_iter(const char * devpath,zfs_process_func_t func,boolean_t wholedisk)3964451Seschrock devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
3974451Seschrock {
3984451Seschrock 	size_t len = strlen(devpath) + sizeof ("/devices") +
3994451Seschrock 	    sizeof (PHYS_PATH) - 1;
4004451Seschrock 	char *fullpath;
4014451Seschrock 	int fd;
4024451Seschrock 	ddi_devid_t devid;
4034451Seschrock 	char *devidstr, *fulldevid;
4044451Seschrock 	dev_data_t data = { 0 };
4054451Seschrock 
4064451Seschrock 	/*
4074451Seschrock 	 * Try to open a known minor node.
4084451Seschrock 	 */
4094451Seschrock 	fullpath = alloca(len);
4104451Seschrock 	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
4114451Seschrock 	if ((fd = open(fullpath, O_RDONLY)) < 0)
4124451Seschrock 		return (B_FALSE);
4134451Seschrock 
4144451Seschrock 	/*
4154451Seschrock 	 * Determine the devid as a string, with no trailing slash for the minor
4164451Seschrock 	 * node.
4174451Seschrock 	 */
4184451Seschrock 	if (devid_get(fd, &devid) != 0) {
4194451Seschrock 		(void) close(fd);
4204451Seschrock 		return (B_FALSE);
4214451Seschrock 	}
4224451Seschrock 	(void) close(fd);
4234451Seschrock 
4244451Seschrock 	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
4254451Seschrock 		devid_free(devid);
4264451Seschrock 		return (B_FALSE);
4274451Seschrock 	}
4284451Seschrock 
4294451Seschrock 	len = strlen(devidstr) + 2;
4304451Seschrock 	fulldevid = alloca(len);
4314451Seschrock 	(void) snprintf(fulldevid, len, "%s/", devidstr);
4324451Seschrock 
4334451Seschrock 	data.dd_compare = fulldevid;
4344451Seschrock 	data.dd_func = func;
4354451Seschrock 	data.dd_prop = ZPOOL_CONFIG_DEVID;
4364451Seschrock 	data.dd_found = B_FALSE;
4374451Seschrock 	data.dd_isdisk = wholedisk;
4384451Seschrock 
4394451Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
4404451Seschrock 
4414451Seschrock 	devid_str_free(devidstr);
44212456SEric.Taylor@Sun.COM 	devid_free(devid);
4434451Seschrock 
4444451Seschrock 	return (data.dd_found);
4454451Seschrock }
4464451Seschrock 
4474451Seschrock /*
4484451Seschrock  * This function is called when we receive a devfs add event.  This can be
4494451Seschrock  * either a disk event or a lofi event, and the behavior is slightly different
4504451Seschrock  * depending on which it is.
4514451Seschrock  */
4524451Seschrock static int
zfs_deliver_add(nvlist_t * nvl,boolean_t is_lofi)4534451Seschrock zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
4544451Seschrock {
4554451Seschrock 	char *devpath, *devname;
4564451Seschrock 	char path[PATH_MAX], realpath[PATH_MAX];
4574451Seschrock 	char *colon, *raw;
4584451Seschrock 	int ret;
4594451Seschrock 
4604451Seschrock 	/*
4614451Seschrock 	 * The main unit of operation is the physical device path.  For disks,
4624451Seschrock 	 * this is the device node, as all minor nodes are affected.  For lofi
4634451Seschrock 	 * devices, this includes the minor path.  Unfortunately, this isn't
4644451Seschrock 	 * represented in the DEV_PHYS_PATH for various reasons.
4654451Seschrock 	 */
4664451Seschrock 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
4674451Seschrock 		return (-1);
4684451Seschrock 
4694451Seschrock 	/*
4704451Seschrock 	 * If this is a lofi device, then also get the minor instance name.
4714451Seschrock 	 * Unfortunately, the current payload doesn't include an easy way to get
4724451Seschrock 	 * this information.  So we cheat by resolving the 'dev_name' (which
4734451Seschrock 	 * refers to the raw device) and taking the portion between ':(*),raw'.
4744451Seschrock 	 */
4754451Seschrock 	(void) strlcpy(realpath, devpath, sizeof (realpath));
4764451Seschrock 	if (is_lofi) {
4774451Seschrock 		if (nvlist_lookup_string(nvl, DEV_NAME,
4784451Seschrock 		    &devname) == 0 &&
4794451Seschrock 		    (ret = resolvepath(devname, path,
4804451Seschrock 		    sizeof (path))) > 0) {
4814451Seschrock 			path[ret] = '\0';
4824451Seschrock 			colon = strchr(path, ':');
4834451Seschrock 			if (colon != NULL)
4844451Seschrock 				raw = strstr(colon + 1, ",raw");
4854451Seschrock 			if (colon != NULL && raw != NULL) {
4864451Seschrock 				*raw = '\0';
4874451Seschrock 				(void) snprintf(realpath,
4884451Seschrock 				    sizeof (realpath), "%s%s",
4894451Seschrock 				    devpath, colon);
4904451Seschrock 				*raw = ',';
4914451Seschrock 			}
4924451Seschrock 		}
4934451Seschrock 	}
4944451Seschrock 
4954451Seschrock 	/*
4964451Seschrock 	 * Iterate over all vdevs with a matching devid, and then those with a
4974451Seschrock 	 * matching /devices path.  For disks, we only want to pay attention to
4984451Seschrock 	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
4994451Seschrock 	 * matching an exact minor name).
5004451Seschrock 	 */
5014451Seschrock 	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
5024451Seschrock 		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
5034451Seschrock 
5044451Seschrock 	return (0);
5054451Seschrock }
5064451Seschrock 
5074451Seschrock /*
5084451Seschrock  * Called when we receive a VDEV_CHECK event, which indicates a device could not
5094451Seschrock  * be opened during initial pool open, but the autoreplace property was set on
5104451Seschrock  * the pool.  In this case, we treat it as if it were an add event.
5114451Seschrock  */
5124451Seschrock static int
zfs_deliver_check(nvlist_t * nvl)5134451Seschrock zfs_deliver_check(nvlist_t *nvl)
5144451Seschrock {
5154451Seschrock 	dev_data_t data = { 0 };
5164451Seschrock 
5174451Seschrock 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
5184451Seschrock 	    &data.dd_pool_guid) != 0 ||
5194451Seschrock 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
5204451Seschrock 	    &data.dd_vdev_guid) != 0)
5214451Seschrock 		return (0);
5224451Seschrock 
5234451Seschrock 	data.dd_isdisk = B_TRUE;
5244451Seschrock 	data.dd_func = zfs_process_add;
5254451Seschrock 
5264451Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
5274451Seschrock 
5284451Seschrock 	return (0);
5294451Seschrock }
5304451Seschrock 
53112318SEric.Taylor@Sun.COM #define	DEVICE_PREFIX	"/devices"
53212318SEric.Taylor@Sun.COM 
53312318SEric.Taylor@Sun.COM static int
zfsdle_vdev_online(zpool_handle_t * zhp,void * data)53412318SEric.Taylor@Sun.COM zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
53512318SEric.Taylor@Sun.COM {
53612318SEric.Taylor@Sun.COM 	char *devname = data;
53712318SEric.Taylor@Sun.COM 	boolean_t avail_spare, l2cache;
53812318SEric.Taylor@Sun.COM 	vdev_state_t newstate;
53912318SEric.Taylor@Sun.COM 	nvlist_t *tgt;
54012318SEric.Taylor@Sun.COM 
54112318SEric.Taylor@Sun.COM 	syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
54212318SEric.Taylor@Sun.COM 	    devname, zpool_get_name(zhp));
54312318SEric.Taylor@Sun.COM 
54412318SEric.Taylor@Sun.COM 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
54512318SEric.Taylor@Sun.COM 	    &avail_spare, &l2cache, NULL)) != NULL) {
54612318SEric.Taylor@Sun.COM 		char *path, fullpath[MAXPATHLEN];
54712318SEric.Taylor@Sun.COM 		uint64_t wholedisk = 0ULL;
54812318SEric.Taylor@Sun.COM 
54912318SEric.Taylor@Sun.COM 		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
55012318SEric.Taylor@Sun.COM 		    &path) == 0);
55112318SEric.Taylor@Sun.COM 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
55212318SEric.Taylor@Sun.COM 		    &wholedisk) == 0);
55312318SEric.Taylor@Sun.COM 
55412318SEric.Taylor@Sun.COM 		(void) strlcpy(fullpath, path, sizeof (fullpath));
55512318SEric.Taylor@Sun.COM 		if (wholedisk)
55612318SEric.Taylor@Sun.COM 			fullpath[strlen(fullpath) - 2] = '\0';
55712318SEric.Taylor@Sun.COM 
55812318SEric.Taylor@Sun.COM 		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
55912318SEric.Taylor@Sun.COM 			syseventd_print(9, "zfsdle_vdev_online: setting device"
56012318SEric.Taylor@Sun.COM 			    " device %s to ONLINE state in pool %s.\n",
56112318SEric.Taylor@Sun.COM 			    fullpath, zpool_get_name(zhp));
56212318SEric.Taylor@Sun.COM 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
56312318SEric.Taylor@Sun.COM 				(void) zpool_vdev_online(zhp, fullpath, 0,
56412318SEric.Taylor@Sun.COM 				    &newstate);
56512318SEric.Taylor@Sun.COM 		}
56612456SEric.Taylor@Sun.COM 		zpool_close(zhp);
56712318SEric.Taylor@Sun.COM 		return (1);
56812318SEric.Taylor@Sun.COM 	}
56912456SEric.Taylor@Sun.COM 	zpool_close(zhp);
57012318SEric.Taylor@Sun.COM 	return (0);
57112318SEric.Taylor@Sun.COM }
57212318SEric.Taylor@Sun.COM 
57312318SEric.Taylor@Sun.COM int
zfs_deliver_dle(nvlist_t * nvl)57412318SEric.Taylor@Sun.COM zfs_deliver_dle(nvlist_t *nvl)
57512318SEric.Taylor@Sun.COM {
57612318SEric.Taylor@Sun.COM 	char *devname;
57712318SEric.Taylor@Sun.COM 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
57812318SEric.Taylor@Sun.COM 		syseventd_print(9, "zfs_deliver_event: no physpath\n");
57912318SEric.Taylor@Sun.COM 		return (-1);
58012318SEric.Taylor@Sun.COM 	}
58112318SEric.Taylor@Sun.COM 	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
58212318SEric.Taylor@Sun.COM 		syseventd_print(9, "zfs_deliver_event: invalid "
58312318SEric.Taylor@Sun.COM 		    "device '%s'", devname);
58412318SEric.Taylor@Sun.COM 		return (-1);
58512318SEric.Taylor@Sun.COM 	}
58612318SEric.Taylor@Sun.COM 
58712318SEric.Taylor@Sun.COM 	/*
58812318SEric.Taylor@Sun.COM 	 * We try to find the device using the physical
58912318SEric.Taylor@Sun.COM 	 * path that has been supplied. We need to strip off
59012318SEric.Taylor@Sun.COM 	 * the /devices prefix before starting our search.
59112318SEric.Taylor@Sun.COM 	 */
59212318SEric.Taylor@Sun.COM 	devname += strlen(DEVICE_PREFIX);
59312318SEric.Taylor@Sun.COM 	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
59412318SEric.Taylor@Sun.COM 		syseventd_print(9, "zfs_deliver_event: device '%s' not"
59512318SEric.Taylor@Sun.COM 		    " found\n", devname);
59612318SEric.Taylor@Sun.COM 		return (1);
59712318SEric.Taylor@Sun.COM 	}
59812318SEric.Taylor@Sun.COM 	return (0);
59912318SEric.Taylor@Sun.COM }
60012318SEric.Taylor@Sun.COM 
60112318SEric.Taylor@Sun.COM 
6024451Seschrock /*ARGSUSED*/
6034451Seschrock static int
zfs_deliver_event(sysevent_t * ev,int unused)6044451Seschrock zfs_deliver_event(sysevent_t *ev, int unused)
6054451Seschrock {
6064451Seschrock 	const char *class = sysevent_get_class_name(ev);
6074451Seschrock 	const char *subclass = sysevent_get_subclass_name(ev);
6084451Seschrock 	nvlist_t *nvl;
6094451Seschrock 	int ret;
61012318SEric.Taylor@Sun.COM 	boolean_t is_lofi, is_check, is_dle = B_FALSE;
6114451Seschrock 
6124451Seschrock 	if (strcmp(class, EC_DEV_ADD) == 0) {
6134451Seschrock 		/*
6144451Seschrock 		 * We're mainly interested in disk additions, but we also listen
6154451Seschrock 		 * for new lofi devices, to allow for simplified testing.
6164451Seschrock 		 */
6174451Seschrock 		if (strcmp(subclass, ESC_DISK) == 0)
6184451Seschrock 			is_lofi = B_FALSE;
6194451Seschrock 		else if (strcmp(subclass, ESC_LOFI) == 0)
6204451Seschrock 			is_lofi = B_TRUE;
6214451Seschrock 		else
6224451Seschrock 			return (0);
6234451Seschrock 
6244451Seschrock 		is_check = B_FALSE;
6254451Seschrock 	} else if (strcmp(class, EC_ZFS) == 0 &&
6264451Seschrock 	    strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
6274451Seschrock 		/*
6284451Seschrock 		 * This event signifies that a device failed to open during pool
6294451Seschrock 		 * load, but the 'autoreplace' property was set, so we should
6304451Seschrock 		 * pretend it's just been added.
6314451Seschrock 		 */
6324451Seschrock 		is_check = B_TRUE;
63312318SEric.Taylor@Sun.COM 	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
63412318SEric.Taylor@Sun.COM 	    strcmp(subclass, ESC_DEV_DLE) == 0) {
63512318SEric.Taylor@Sun.COM 		is_dle = B_TRUE;
6364451Seschrock 	} else {
6374451Seschrock 		return (0);
6384451Seschrock 	}
6394451Seschrock 
6404451Seschrock 	if (sysevent_get_attr_list(ev, &nvl) != 0)
6414451Seschrock 		return (-1);
6424451Seschrock 
64312318SEric.Taylor@Sun.COM 	if (is_dle)
64412318SEric.Taylor@Sun.COM 		ret = zfs_deliver_dle(nvl);
64512318SEric.Taylor@Sun.COM 	else if (is_check)
6464451Seschrock 		ret = zfs_deliver_check(nvl);
6474451Seschrock 	else
6484451Seschrock 		ret = zfs_deliver_add(nvl, is_lofi);
6494451Seschrock 
6504451Seschrock 	nvlist_free(nvl);
6514451Seschrock 	return (ret);
6524451Seschrock }
6534451Seschrock 
654*12768SEric.Taylor@oracle.com /*ARGSUSED*/
655*12768SEric.Taylor@oracle.com void *
zfs_enum_pools(void * arg)656*12768SEric.Taylor@oracle.com zfs_enum_pools(void *arg)
657*12768SEric.Taylor@oracle.com {
658*12768SEric.Taylor@oracle.com 	(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
659*12768SEric.Taylor@oracle.com 	if (!list_is_empty(&g_pool_list))
660*12768SEric.Taylor@oracle.com 		g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
661*12768SEric.Taylor@oracle.com 		    0, NULL);
662*12768SEric.Taylor@oracle.com 	g_enumeration_done = B_TRUE;
663*12768SEric.Taylor@oracle.com 	return (NULL);
664*12768SEric.Taylor@oracle.com }
665*12768SEric.Taylor@oracle.com 
6664451Seschrock static struct slm_mod_ops zfs_mod_ops = {
6674451Seschrock 	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
6684451Seschrock };
6694451Seschrock 
6704451Seschrock struct slm_mod_ops *
slm_init()6714451Seschrock slm_init()
6724451Seschrock {
6734451Seschrock 	if ((g_zfshdl = libzfs_init()) == NULL)
6744451Seschrock 		return (NULL);
675*12768SEric.Taylor@oracle.com 	/*
676*12768SEric.Taylor@oracle.com 	 * collect a list of unavailable pools (asynchronously,
677*12768SEric.Taylor@oracle.com 	 * since this can take a while)
678*12768SEric.Taylor@oracle.com 	 */
67912710SEric.Taylor@Sun.COM 	list_create(&g_pool_list, sizeof (struct unavailpool),
68012710SEric.Taylor@Sun.COM 	    offsetof(struct unavailpool, uap_node));
681*12768SEric.Taylor@oracle.com 	if (thr_create(NULL, 0, zfs_enum_pools, NULL, 0, &g_zfs_tid) != 0)
682*12768SEric.Taylor@oracle.com 		return (NULL);
6834451Seschrock 	return (&zfs_mod_ops);
6844451Seschrock }
6854451Seschrock 
6864451Seschrock void
slm_fini()6874451Seschrock slm_fini()
6884451Seschrock {
68912710SEric.Taylor@Sun.COM 	unavailpool_t *pool;
69012710SEric.Taylor@Sun.COM 
691*12768SEric.Taylor@oracle.com 	if (g_tpool != NULL) {
69212710SEric.Taylor@Sun.COM 		tpool_wait(g_tpool);
69312710SEric.Taylor@Sun.COM 		tpool_destroy(g_tpool);
69412710SEric.Taylor@Sun.COM 	}
69512710SEric.Taylor@Sun.COM 	while ((pool = (list_head(&g_pool_list))) != NULL) {
69612710SEric.Taylor@Sun.COM 		list_remove(&g_pool_list, pool);
69712710SEric.Taylor@Sun.COM 		zpool_close(pool->uap_zhp);
69812710SEric.Taylor@Sun.COM 		free(pool);
69912710SEric.Taylor@Sun.COM 	}
700*12768SEric.Taylor@oracle.com 	(void) thr_join(g_zfs_tid, NULL, NULL);
70112710SEric.Taylor@Sun.COM 	list_destroy(&g_pool_list);
70212456SEric.Taylor@Sun.COM 	libzfs_fini(g_zfshdl);
7034451Seschrock }
704