12621Sllai1 /* 22621Sllai1 * CDDL HEADER START 32621Sllai1 * 42621Sllai1 * The contents of this file are subject to the terms of the 52621Sllai1 * Common Development and Distribution License (the "License"). 62621Sllai1 * You may not use this file except in compliance with the License. 72621Sllai1 * 82621Sllai1 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 92621Sllai1 * or http://www.opensolaris.org/os/licensing. 102621Sllai1 * See the License for the specific language governing permissions 112621Sllai1 * and limitations under the License. 122621Sllai1 * 132621Sllai1 * When distributing Covered Code, include this CDDL HEADER in each 142621Sllai1 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 152621Sllai1 * If applicable, add the following below this CDDL HEADER, with the 162621Sllai1 * fields enclosed by brackets "[]" replaced with your own identifying 172621Sllai1 * information: Portions Copyright [yyyy] [name of copyright owner] 182621Sllai1 * 192621Sllai1 * CDDL HEADER END 202621Sllai1 */ 212621Sllai1 /* 22*10097SEric.Taylor@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 232621Sllai1 * Use is subject to license terms. 242621Sllai1 */ 252621Sllai1 262621Sllai1 /* 272621Sllai1 * negative cache handling for the /dev fs 282621Sllai1 */ 292621Sllai1 302621Sllai1 #include <sys/types.h> 312621Sllai1 #include <sys/param.h> 322621Sllai1 #include <sys/t_lock.h> 332621Sllai1 #include <sys/systm.h> 342621Sllai1 #include <sys/sysmacros.h> 352621Sllai1 #include <sys/user.h> 362621Sllai1 #include <sys/time.h> 372621Sllai1 #include <sys/vfs.h> 382621Sllai1 #include <sys/vnode.h> 392621Sllai1 #include <sys/file.h> 402621Sllai1 #include <sys/fcntl.h> 412621Sllai1 #include <sys/flock.h> 422621Sllai1 #include <sys/kmem.h> 432621Sllai1 #include <sys/uio.h> 442621Sllai1 #include <sys/errno.h> 452621Sllai1 #include <sys/stat.h> 462621Sllai1 #include <sys/cred.h> 472621Sllai1 #include <sys/cmn_err.h> 482621Sllai1 #include <sys/debug.h> 492621Sllai1 #include <sys/mode.h> 502621Sllai1 #include <sys/policy.h> 512621Sllai1 #include <fs/fs_subr.h> 522621Sllai1 #include <sys/mount.h> 532621Sllai1 #include <sys/fs/snode.h> 542621Sllai1 #include <sys/fs/dv_node.h> 55*10097SEric.Taylor@Sun.COM #include <sys/fs/sdev_impl.h> 562621Sllai1 #include <sys/sunndi.h> 572621Sllai1 #include <sys/sunmdi.h> 582621Sllai1 #include <sys/ddi.h> 592621Sllai1 #include <sys/modctl.h> 602797Sjg #include <sys/devcache.h> 612621Sllai1 622621Sllai1 632621Sllai1 /* 642621Sllai1 * ncache is a negative cache of failed lookups. An entry 652621Sllai1 * is added after an attempt to configure a device by that 662621Sllai1 * name failed. An accumulation of these entries over time 672621Sllai1 * gives us a set of device name for which implicit reconfiguration 682621Sllai1 * does not need to be attempted. If a name is created matching 692621Sllai1 * an entry in ncache, that entry is removed, with the 702621Sllai1 * persistent store updated. 712621Sllai1 * 722621Sllai1 * Implicit reconfig is initiated for any name during lookup that 732621Sllai1 * can't be resolved from the backing store and that isn't 742621Sllai1 * present in the negative cache. This functionality is 752621Sllai1 * enabled during system startup once communication with devfsadm 762621Sllai1 * can be achieved. Since readdir is more general, implicit 772621Sllai1 * reconfig initiated by reading a directory isn't enabled until 782621Sllai1 * the system is more fully booted, at the time of the multi-user 792621Sllai1 * milestone, corresponding to init state 2. 802621Sllai1 * 812621Sllai1 * A maximum is imposed on the number of entries in the cache 822621Sllai1 * to limit some script going wild and as a defense against attack. 832621Sllai1 * The default limit is 64 and can be adjusted via sdev_nc_max_entries. 842621Sllai1 * 852621Sllai1 * Each entry also has a expiration count. When looked up a name in 862621Sllai1 * the cache is set to the default. Subsequent boots will decrement 872621Sllai1 * the count if a name isn't referenced. This permits a once-only 882621Sllai1 * entry to eventually be removed over time. 892621Sllai1 * 902621Sllai1 * sdev_reconfig_delay implements a "debounce" of the timing beyond 912621Sllai1 * system available indication, providing what the filesystem considers 922621Sllai1 * to be the system-is-fully-booted state. This is provided to adjust 932621Sllai1 * the timing if some application startup is performing a readdir 942621Sllai1 * in /dev that initiates a troublesome implicit reconfig on every boot. 952621Sllai1 * 962621Sllai1 * sdev_nc_disable_reset can be used to disable clearing the negative cache 972621Sllai1 * on reconfig boot. The default is to clear the cache on reconfig boot. 982621Sllai1 * sdev_nc_disable can be used to disable the negative cache itself. 992621Sllai1 * 1002621Sllai1 * sdev_reconfig_disable can be used to disable implicit reconfig. 1012621Sllai1 * The default is that implicit reconfig is enabled. 1022621Sllai1 */ 1032621Sllai1 1042621Sllai1 /* tunables and defaults */ 1052621Sllai1 #define SDEV_NC_EXPIRECNT 4 1062621Sllai1 #define SDEV_NC_MAX_ENTRIES 64 1072621Sllai1 #define SEV_RECONFIG_DELAY 6 /* seconds */ 1082621Sllai1 1092797Sjg /* tunables */ 1102797Sjg int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT; 1112797Sjg int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES; 1122797Sjg int sdev_reconfig_delay = SEV_RECONFIG_DELAY; 1132797Sjg int sdev_reconfig_verbose = 0; 1142797Sjg int sdev_reconfig_disable = 0; 1152797Sjg int sdev_nc_disable = 0; 1162797Sjg int sdev_nc_disable_reset = 0; 1172797Sjg int sdev_nc_verbose = 0; 1182797Sjg int sdev_cache_read_disable = 0; 1192797Sjg int sdev_cache_write_disable = 0; 1202621Sllai1 1212621Sllai1 /* globals */ 1222797Sjg int sdev_boot_state = SDEV_BOOT_STATE_INITIAL; 1232797Sjg int sdev_reconfig_boot = 0; 1242797Sjg sdev_nc_list_t *sdev_ncache; 1252797Sjg static nvf_handle_t sdevfd_handle; 1262621Sllai1 1272621Sllai1 /* static prototypes */ 1282797Sjg static void sdev_ncache_write_complete(nvf_handle_t); 1292621Sllai1 static void sdev_ncache_write(void); 1302621Sllai1 static void sdev_ncache_process_store(void); 1312621Sllai1 static sdev_nc_list_t *sdev_nc_newlist(void); 1322621Sllai1 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *); 1332621Sllai1 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *); 1342621Sllai1 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *); 1352621Sllai1 static void sdev_nc_free_bootonly(void); 1362797Sjg static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *); 1372797Sjg static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **); 1382797Sjg static void sdev_ncache_list_free(nvf_handle_t); 1392797Sjg static void sdev_nvp_free(nvp_devname_t *); 1402621Sllai1 1412797Sjg /* 1422797Sjg * Registration for /etc/devices/devname_cache 1432797Sjg */ 1442797Sjg static nvf_ops_t sdev_cache_ops = { 1452797Sjg "/etc/devices/devname_cache", /* path to cache */ 1462797Sjg sdev_ncache_unpack_nvlist, /* read: unpack nvlist */ 1472797Sjg sdev_ncache_pack_list, /* write: pack list */ 1482797Sjg sdev_ncache_list_free, /* free data list */ 1492797Sjg sdev_ncache_write_complete /* write complete callback */ 1502797Sjg }; 1512621Sllai1 1522621Sllai1 /* 1532621Sllai1 * called once at filesystem initialization 1542621Sllai1 */ 1552621Sllai1 void 1562621Sllai1 sdev_ncache_init(void) 1572621Sllai1 { 1582621Sllai1 sdev_ncache = sdev_nc_newlist(); 1592621Sllai1 } 1602621Sllai1 1612621Sllai1 /* 1622621Sllai1 * called at mount of the global instance 1632621Sllai1 * currently the global instance is never unmounted 1642621Sllai1 */ 1652621Sllai1 void 1662621Sllai1 sdev_ncache_setup(void) 1672621Sllai1 { 1682797Sjg sdevfd_handle = nvf_register_file(&sdev_cache_ops); 1692797Sjg ASSERT(sdevfd_handle); 1702797Sjg 1712797Sjg list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t), 1722797Sjg offsetof(nvp_devname_t, nvp_link)); 1732621Sllai1 1742797Sjg rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 1752797Sjg if (!sdev_cache_read_disable) { 1762797Sjg (void) nvf_read_file(sdevfd_handle); 1772797Sjg } 1782797Sjg sdev_ncache_process_store(); 1792797Sjg rw_exit(nvf_lock(sdevfd_handle)); 1802621Sllai1 1812621Sllai1 sdev_devstate_change(); 1822621Sllai1 } 1832621Sllai1 1842621Sllai1 static void 1852797Sjg sdev_nvp_free(nvp_devname_t *dp) 1862621Sllai1 { 1872797Sjg int i; 1882797Sjg char **p; 1892797Sjg 1902797Sjg if (dp->nvp_npaths > 0) { 1912797Sjg p = dp->nvp_paths; 1922797Sjg for (i = 0; i < dp->nvp_npaths; i++, p++) { 1932797Sjg kmem_free(*p, strlen(*p)+1); 1942797Sjg } 1952797Sjg kmem_free(dp->nvp_paths, 196*10097SEric.Taylor@Sun.COM dp->nvp_npaths * sizeof (char *)); 1972797Sjg kmem_free(dp->nvp_expirecnts, 198*10097SEric.Taylor@Sun.COM dp->nvp_npaths * sizeof (int)); 1992797Sjg } 2002621Sllai1 2012797Sjg kmem_free(dp, sizeof (nvp_devname_t)); 2022797Sjg } 2032797Sjg 2042797Sjg static void 2052797Sjg sdev_ncache_list_free(nvf_handle_t fd) 2062797Sjg { 2072797Sjg list_t *listp; 2082797Sjg nvp_devname_t *dp; 2092797Sjg 2102797Sjg ASSERT(fd == sdevfd_handle); 2112797Sjg ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 2122797Sjg 2132797Sjg listp = nvf_list(fd); 2142797Sjg if ((dp = list_head(listp)) != NULL) { 2152797Sjg list_remove(listp, dp); 2162797Sjg sdev_nvp_free(dp); 2172621Sllai1 } 2182621Sllai1 } 2192621Sllai1 2202797Sjg /* 2212797Sjg * Unpack a device path/nvlist pair to internal data list format. 2222797Sjg * Used to decode the nvlist format into the internal representation 2232797Sjg * when reading /etc/devices/devname_cache. 2242797Sjg * Note that the expiration counts are optional, for compatibility 2252797Sjg * with earlier instances of the cache. If not present, the 2262797Sjg * expire counts are initialized to defaults. 2272797Sjg */ 2282797Sjg static int 2292797Sjg sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name) 2302797Sjg { 2312797Sjg nvp_devname_t *np; 2322797Sjg char **strs; 2332797Sjg int *cnts; 2342797Sjg uint_t nstrs, ncnts; 2352797Sjg int rval, i; 2362797Sjg 2372797Sjg ASSERT(fd == sdevfd_handle); 2382797Sjg ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 2392797Sjg 2402797Sjg /* name of the sublist must match what we created */ 2412797Sjg if (strcmp(name, DP_DEVNAME_ID) != 0) { 2422797Sjg return (-1); 2432797Sjg } 2442797Sjg 2452797Sjg np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 2462797Sjg 2472797Sjg rval = nvlist_lookup_string_array(nvl, 2482797Sjg DP_DEVNAME_NCACHE_ID, &strs, &nstrs); 2492797Sjg if (rval) { 2502797Sjg kmem_free(np, sizeof (nvp_devname_t)); 2512797Sjg return (-1); 2522797Sjg } 2532797Sjg 2542797Sjg np->nvp_npaths = nstrs; 2552797Sjg np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP); 2562797Sjg for (i = 0; i < nstrs; i++) { 2572797Sjg np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP); 2582797Sjg } 2592797Sjg np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP); 2602797Sjg for (i = 0; i < nstrs; i++) { 2612797Sjg np->nvp_expirecnts[i] = sdev_nc_expirecnt; 2622797Sjg } 2632797Sjg 2642797Sjg rval = nvlist_lookup_int32_array(nvl, 2652797Sjg DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts); 2662797Sjg if (rval == 0) { 2672797Sjg ASSERT(ncnts == nstrs); 2682797Sjg ncnts = min(ncnts, nstrs); 2692797Sjg for (i = 0; i < nstrs; i++) { 2702797Sjg np->nvp_expirecnts[i] = cnts[i]; 2712797Sjg } 2722797Sjg } 2732797Sjg 2742797Sjg list_insert_tail(nvf_list(sdevfd_handle), np); 2752797Sjg 2762797Sjg return (0); 2772797Sjg } 2782797Sjg 2792797Sjg /* 2802797Sjg * Pack internal format cache data to a single nvlist. 2812797Sjg * Used when writing the nvlist file. 2822797Sjg * Note this is called indirectly by the nvpflush daemon. 2832797Sjg */ 2842797Sjg static int 2852797Sjg sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl) 2862797Sjg { 2872797Sjg nvlist_t *nvl, *sub_nvl; 2882797Sjg nvp_devname_t *np; 2892797Sjg int rval; 2902797Sjg list_t *listp; 2912797Sjg 2922797Sjg ASSERT(fd == sdevfd_handle); 2932797Sjg ASSERT(RW_WRITE_HELD(nvf_lock(fd))); 2942797Sjg 2952797Sjg rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); 2962797Sjg if (rval != 0) { 2972797Sjg nvf_error("%s: nvlist alloc error %d\n", 298*10097SEric.Taylor@Sun.COM nvf_cache_name(fd), rval); 2992797Sjg return (DDI_FAILURE); 3002797Sjg } 3012797Sjg 3022797Sjg listp = nvf_list(sdevfd_handle); 3032797Sjg if ((np = list_head(listp)) != NULL) { 3042797Sjg ASSERT(list_next(listp, np) == NULL); 3052797Sjg 3062797Sjg rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP); 3072797Sjg if (rval != 0) { 3082797Sjg nvf_error("%s: nvlist alloc error %d\n", 309*10097SEric.Taylor@Sun.COM nvf_cache_name(fd), rval); 3102797Sjg sub_nvl = NULL; 3112797Sjg goto err; 3122797Sjg } 3132797Sjg 3142797Sjg rval = nvlist_add_string_array(sub_nvl, 3152797Sjg DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths); 3162797Sjg if (rval != 0) { 3172797Sjg nvf_error("%s: nvlist add error %d (sdev)\n", 3182797Sjg nvf_cache_name(fd), rval); 3192797Sjg goto err; 3202797Sjg } 3212797Sjg 3222797Sjg rval = nvlist_add_int32_array(sub_nvl, 3232797Sjg DP_DEVNAME_NC_EXPIRECNT_ID, 3242797Sjg np->nvp_expirecnts, np->nvp_npaths); 3252797Sjg if (rval != 0) { 3262797Sjg nvf_error("%s: nvlist add error %d (sdev)\n", 3272797Sjg nvf_cache_name(fd), rval); 3282797Sjg goto err; 3292797Sjg } 3302797Sjg 3312797Sjg rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl); 3322797Sjg if (rval != 0) { 3332797Sjg nvf_error("%s: nvlist add error %d (sublist)\n", 3342797Sjg nvf_cache_name(fd), rval); 3352797Sjg goto err; 3362797Sjg } 3372797Sjg nvlist_free(sub_nvl); 3382797Sjg } 3392797Sjg 3402797Sjg *ret_nvl = nvl; 3412797Sjg return (DDI_SUCCESS); 3422797Sjg 3432797Sjg err: 3442797Sjg if (sub_nvl) 3452797Sjg nvlist_free(sub_nvl); 3462797Sjg nvlist_free(nvl); 3472797Sjg *ret_nvl = NULL; 3482797Sjg return (DDI_FAILURE); 3492797Sjg } 3502797Sjg 3512797Sjg /* 3522797Sjg * Run through the data read from the backing cache store 3532797Sjg * to establish the initial state of the neg. cache. 3542797Sjg */ 3552621Sllai1 static void 3562621Sllai1 sdev_ncache_process_store(void) 3572621Sllai1 { 3582621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 3592621Sllai1 nvp_devname_t *np; 3602621Sllai1 sdev_nc_node_t *lp; 3612621Sllai1 char *path; 3622621Sllai1 int i, n; 3632797Sjg list_t *listp; 3642621Sllai1 3652621Sllai1 if (sdev_nc_disable) 3662621Sllai1 return; 3672621Sllai1 3682797Sjg ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle))); 3692797Sjg 3702797Sjg listp = nvf_list(sdevfd_handle); 3712797Sjg for (np = list_head(listp); np; np = list_next(listp, np)) { 3722621Sllai1 for (i = 0; i < np->nvp_npaths; i++) { 3732621Sllai1 sdcmn_err5((" %s %d\n", 3742621Sllai1 np->nvp_paths[i], np->nvp_expirecnts[i])); 3752621Sllai1 if (ncl->ncl_nentries < sdev_nc_max_entries) { 3762621Sllai1 path = np->nvp_paths[i]; 3772621Sllai1 n = strlen(path) + 1; 3782621Sllai1 lp = kmem_alloc(sizeof (sdev_nc_node_t), 3792621Sllai1 KM_SLEEP); 3802621Sllai1 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 3812621Sllai1 bcopy(path, lp->ncn_name, n); 3822621Sllai1 lp->ncn_flags = NCN_SRC_STORE; 3832621Sllai1 lp->ncn_expirecnt = np->nvp_expirecnts[i]; 3842621Sllai1 sdev_nc_insertnode(ncl, lp); 3852621Sllai1 } else if (sdev_nc_verbose) { 3862621Sllai1 cmn_err(CE_CONT, 3872621Sllai1 "?%s: truncating from ncache (max %d)\n", 3882621Sllai1 np->nvp_paths[i], sdev_nc_max_entries); 3892621Sllai1 } 3902621Sllai1 } 3912621Sllai1 } 3922621Sllai1 } 3932621Sllai1 3942797Sjg /* 3952797Sjg * called by nvpflush daemon to inform us that an update of 3962797Sjg * the cache file has been completed. 3972797Sjg */ 3982621Sllai1 static void 3992797Sjg sdev_ncache_write_complete(nvf_handle_t fd) 4002621Sllai1 { 4012621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 4022621Sllai1 4032797Sjg ASSERT(fd == sdevfd_handle); 4042797Sjg 4052621Sllai1 mutex_enter(&ncl->ncl_mutex); 4062621Sllai1 4072621Sllai1 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING); 4082621Sllai1 4092621Sllai1 if (ncl->ncl_flags & NCL_LIST_DIRTY) { 4102621Sllai1 sdcmn_err5(("ncache write complete but dirty again\n")); 4112621Sllai1 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 4122621Sllai1 mutex_exit(&ncl->ncl_mutex); 4132621Sllai1 sdev_ncache_write(); 4142621Sllai1 } else { 4152621Sllai1 sdcmn_err5(("ncache write complete\n")); 4162621Sllai1 ncl->ncl_flags &= ~NCL_LIST_WRITING; 4172621Sllai1 mutex_exit(&ncl->ncl_mutex); 4182797Sjg rw_enter(nvf_lock(fd), RW_WRITER); 4192797Sjg sdev_ncache_list_free(fd); 4202797Sjg rw_exit(nvf_lock(fd)); 4212621Sllai1 } 4222621Sllai1 } 4232621Sllai1 4242797Sjg /* 4252797Sjg * Prepare to perform an update of the neg. cache backing store. 4262797Sjg */ 4272621Sllai1 static void 4282621Sllai1 sdev_ncache_write(void) 4292621Sllai1 { 4302621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 4312621Sllai1 nvp_devname_t *np; 4322621Sllai1 sdev_nc_node_t *lp; 4332621Sllai1 int n, i; 4342621Sllai1 4352621Sllai1 if (sdev_cache_write_disable) { 4362621Sllai1 mutex_enter(&ncl->ncl_mutex); 4372621Sllai1 ncl->ncl_flags &= ~NCL_LIST_WRITING; 4382621Sllai1 mutex_exit(&ncl->ncl_mutex); 4392621Sllai1 return; 4402621Sllai1 } 4412621Sllai1 4422621Sllai1 /* proper lock ordering here is essential */ 4432797Sjg rw_enter(nvf_lock(sdevfd_handle), RW_WRITER); 4442797Sjg sdev_ncache_list_free(sdevfd_handle); 4452621Sllai1 4462621Sllai1 rw_enter(&ncl->ncl_lock, RW_READER); 4472621Sllai1 n = ncl->ncl_nentries; 4482621Sllai1 ASSERT(n <= sdev_nc_max_entries); 4492621Sllai1 4502621Sllai1 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 4512621Sllai1 np->nvp_npaths = n; 4522621Sllai1 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP); 4532621Sllai1 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP); 4542621Sllai1 4552621Sllai1 i = 0; 4562621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; 4572621Sllai1 lp = list_next(&ncl->ncl_list, lp)) { 4582621Sllai1 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP); 4592621Sllai1 np->nvp_expirecnts[i] = lp->ncn_expirecnt; 4602621Sllai1 sdcmn_err5((" %s %d\n", 4612621Sllai1 np->nvp_paths[i], np->nvp_expirecnts[i])); 4622621Sllai1 i++; 4632621Sllai1 } 4642621Sllai1 4652621Sllai1 rw_exit(&ncl->ncl_lock); 4662621Sllai1 4672797Sjg nvf_mark_dirty(sdevfd_handle); 4682797Sjg list_insert_tail(nvf_list(sdevfd_handle), np); 4692797Sjg rw_exit(nvf_lock(sdevfd_handle)); 4702621Sllai1 4712797Sjg nvf_wake_daemon(); 4722621Sllai1 } 4732621Sllai1 4742621Sllai1 static void 4752621Sllai1 sdev_nc_flush_updates(void) 4762621Sllai1 { 4772621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 4782621Sllai1 4792621Sllai1 if (sdev_nc_disable || sdev_cache_write_disable) 4802621Sllai1 return; 4812621Sllai1 4822621Sllai1 mutex_enter(&ncl->ncl_mutex); 4832621Sllai1 if (((ncl->ncl_flags & 4842621Sllai1 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) == 4852621Sllai1 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) { 4862621Sllai1 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 4872621Sllai1 ncl->ncl_flags |= NCL_LIST_WRITING; 4882621Sllai1 mutex_exit(&ncl->ncl_mutex); 4892621Sllai1 sdev_ncache_write(); 4902621Sllai1 } else { 4912621Sllai1 mutex_exit(&ncl->ncl_mutex); 4922621Sllai1 } 4932621Sllai1 } 4942621Sllai1 4952621Sllai1 static void 4962621Sllai1 sdev_nc_flush_boot_update(void) 4972621Sllai1 { 4982621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 4992621Sllai1 5002621Sllai1 if (sdev_nc_disable || sdev_cache_write_disable || 5012621Sllai1 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) { 5022621Sllai1 return; 5032621Sllai1 } 5042621Sllai1 mutex_enter(&ncl->ncl_mutex); 5052621Sllai1 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 5062621Sllai1 mutex_exit(&ncl->ncl_mutex); 5072621Sllai1 sdev_nc_flush_updates(); 5082621Sllai1 } else { 5092621Sllai1 mutex_exit(&ncl->ncl_mutex); 5102621Sllai1 } 5112621Sllai1 5122621Sllai1 } 5132621Sllai1 5142621Sllai1 static void 5152621Sllai1 sdev_state_boot_complete() 5162621Sllai1 { 5172621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 5182621Sllai1 sdev_nc_node_t *lp, *next; 5192621Sllai1 5202621Sllai1 /* 5212621Sllai1 * Once boot is complete, decrement the expire count of each entry 5222621Sllai1 * in the cache not touched by a reference. Remove any that 5232621Sllai1 * goes to zero. This effectively removes random entries over 5242621Sllai1 * time. 5252621Sllai1 */ 5262621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 5272621Sllai1 mutex_enter(&ncl->ncl_mutex); 5282621Sllai1 5292621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 5302621Sllai1 next = list_next(&ncl->ncl_list, lp); 5312621Sllai1 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) { 5322621Sllai1 if (lp->ncn_flags & NCN_ACTIVE) { 5332621Sllai1 if (lp->ncn_expirecnt != sdev_nc_expirecnt) { 5342621Sllai1 lp->ncn_expirecnt = sdev_nc_expirecnt; 5352621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 5362621Sllai1 } 5372621Sllai1 } else { 5382621Sllai1 if (--lp->ncn_expirecnt == 0) { 5392621Sllai1 list_remove(&ncl->ncl_list, lp); 5402621Sllai1 sdev_nc_free_unlinked_node(lp); 5412621Sllai1 ncl->ncl_nentries--; 5422621Sllai1 } 5432621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 5442621Sllai1 } 5452621Sllai1 } 5462621Sllai1 } 5472621Sllai1 5482621Sllai1 mutex_exit(&ncl->ncl_mutex); 5492621Sllai1 rw_exit(&ncl->ncl_lock); 5502621Sllai1 5512621Sllai1 sdev_nc_flush_boot_update(); 5522621Sllai1 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE; 5532621Sllai1 } 5542621Sllai1 5552621Sllai1 /* 5562621Sllai1 * Upon transition to the login state on a reconfigure boot, 5572621Sllai1 * a debounce timer is set up so that we cache all the nonsense 5582621Sllai1 * lookups we're hit with by the windowing system startup. 5592621Sllai1 */ 5602621Sllai1 5612621Sllai1 /*ARGSUSED*/ 5622621Sllai1 static void 5632621Sllai1 sdev_state_timeout(void *arg) 5642621Sllai1 { 5652621Sllai1 sdev_state_boot_complete(); 5662621Sllai1 } 5672621Sllai1 5682621Sllai1 static void 5692621Sllai1 sdev_state_sysavail() 5702621Sllai1 { 5712621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 5722621Sllai1 clock_t nticks; 5732621Sllai1 int nsecs; 5742621Sllai1 5752621Sllai1 mutex_enter(&ncl->ncl_mutex); 5762621Sllai1 ncl->ncl_flags |= NCL_LIST_WENABLE; 5772621Sllai1 mutex_exit(&ncl->ncl_mutex); 5782621Sllai1 5792621Sllai1 nsecs = sdev_reconfig_delay; 5802621Sllai1 if (nsecs == 0) { 5812621Sllai1 sdev_state_boot_complete(); 5822621Sllai1 } else { 5832621Sllai1 nticks = drv_usectohz(1000000 * nsecs); 5842621Sllai1 sdcmn_err5(("timeout initiated %ld\n", nticks)); 5853133Sjg (void) timeout(sdev_state_timeout, NULL, nticks); 5862621Sllai1 sdev_nc_flush_boot_update(); 5872621Sllai1 } 5882621Sllai1 } 5892621Sllai1 5902621Sllai1 /* 5912621Sllai1 * Called to inform the filesystem of progress during boot, 5922621Sllai1 * either a notice of reconfiguration boot or an indication of 5932621Sllai1 * system boot complete. At system boot complete, set up a 5942621Sllai1 * timer at the expiration of which no further failed lookups 5952621Sllai1 * will be added to the negative cache. 5962621Sllai1 * 5972621Sllai1 * The dev filesystem infers from reconfig boot that implicit 5982621Sllai1 * reconfig need not be invoked at all as all available devices 5992621Sllai1 * will have already been named. 6002621Sllai1 * 6012621Sllai1 * The dev filesystem infers from "system available" that devfsadmd 6022621Sllai1 * can now be run and hence implicit reconfiguration may be initiated. 6032621Sllai1 * During early stages of system startup, implicit reconfig is 6042621Sllai1 * not done to avoid impacting boot performance. 6052621Sllai1 */ 6062621Sllai1 void 6072621Sllai1 sdev_devstate_change(void) 6082621Sllai1 { 6092621Sllai1 int new_state; 6102621Sllai1 6112621Sllai1 /* 6122621Sllai1 * Track system state and manage interesting transitions 6132621Sllai1 */ 6142621Sllai1 new_state = SDEV_BOOT_STATE_INITIAL; 6152621Sllai1 if (i_ddi_reconfig()) 6162621Sllai1 new_state = SDEV_BOOT_STATE_RECONFIG; 6172621Sllai1 if (i_ddi_sysavail()) 6182621Sllai1 new_state = SDEV_BOOT_STATE_SYSAVAIL; 6192621Sllai1 6202621Sllai1 if (sdev_boot_state < new_state) { 6212621Sllai1 switch (new_state) { 6222621Sllai1 case SDEV_BOOT_STATE_RECONFIG: 6232621Sllai1 sdcmn_err5(("state change: reconfigure boot\n")); 6242621Sllai1 sdev_boot_state = new_state; 6252621Sllai1 sdev_reconfig_boot = 1; 6262621Sllai1 if (!sdev_nc_disable_reset) 6272621Sllai1 sdev_nc_free_bootonly(); 6282621Sllai1 break; 6292621Sllai1 case SDEV_BOOT_STATE_SYSAVAIL: 6302621Sllai1 sdcmn_err5(("system available\n")); 6312621Sllai1 sdev_boot_state = new_state; 6322621Sllai1 sdev_state_sysavail(); 6332621Sllai1 break; 6342621Sllai1 } 6352621Sllai1 } 6362621Sllai1 } 6372621Sllai1 6382621Sllai1 /* 6392621Sllai1 * Lookup: filter out entries in the negative cache 6402621Sllai1 * Return 1 if the lookup should not cause a reconfig. 6412621Sllai1 */ 6422621Sllai1 int 6432621Sllai1 sdev_lookup_filter(sdev_node_t *dv, char *nm) 6442621Sllai1 { 6452621Sllai1 int n; 6462621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 6472621Sllai1 sdev_nc_node_t *lp; 6482621Sllai1 char *path; 6492621Sllai1 int rval = 0; 6502621Sllai1 int changed = 0; 6512621Sllai1 6522621Sllai1 ASSERT(i_ddi_io_initialized()); 6532621Sllai1 ASSERT(SDEVTOV(dv)->v_type == VDIR); 6542621Sllai1 6552621Sllai1 if (sdev_nc_disable) 6562621Sllai1 return (0); 6572621Sllai1 6582621Sllai1 n = strlen(dv->sdev_path) + strlen(nm) + 2; 6592621Sllai1 path = kmem_alloc(n, KM_SLEEP); 6602621Sllai1 (void) sprintf(path, "%s/%s", dv->sdev_path, nm); 6612621Sllai1 6622621Sllai1 rw_enter(&ncl->ncl_lock, RW_READER); 6632621Sllai1 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) { 6642621Sllai1 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n", 6652621Sllai1 dv->sdev_name, nm, curproc->p_user.u_comm)); 6662621Sllai1 if (sdev_nc_verbose) { 6672621Sllai1 cmn_err(CE_CONT, 6682621Sllai1 "?%s/%s: lookup by %s cached, no reconfig\n", 6692621Sllai1 dv->sdev_name, nm, curproc->p_user.u_comm); 6702621Sllai1 } 6712621Sllai1 mutex_enter(&ncl->ncl_mutex); 6722621Sllai1 lp->ncn_flags |= NCN_ACTIVE; 6732621Sllai1 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 && 6742621Sllai1 lp->ncn_expirecnt < sdev_nc_expirecnt) { 6752621Sllai1 lp->ncn_expirecnt = sdev_nc_expirecnt; 6762621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 6772621Sllai1 changed = 1; 6782621Sllai1 } 6792621Sllai1 mutex_exit(&ncl->ncl_mutex); 6802621Sllai1 rval = 1; 6812621Sllai1 } 6822621Sllai1 rw_exit(&ncl->ncl_lock); 6832621Sllai1 kmem_free(path, n); 6842621Sllai1 if (changed) 6852621Sllai1 sdev_nc_flush_boot_update(); 6862621Sllai1 return (rval); 6872621Sllai1 } 6882621Sllai1 6892621Sllai1 void 6902621Sllai1 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags) 6912621Sllai1 { 6922621Sllai1 if (sdev_nc_disable) 6932621Sllai1 return; 6942621Sllai1 6952621Sllai1 /* 6962621Sllai1 * If we're still in the initial boot stage, always update 6972621Sllai1 * the cache - we may not have received notice of the 6982621Sllai1 * reconfig boot state yet. On a reconfigure boot, entries 6992621Sllai1 * from the backing store are not re-persisted on update, 7002621Sllai1 * but new entries are marked as needing an update. 7012621Sllai1 * Never cache dynamic or non-global nodes. 7022621Sllai1 */ 7032621Sllai1 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 7042621Sllai1 !SDEV_IS_NO_NCACHE(dv) && 7052621Sllai1 ((failed_flags & SLF_NO_NCACHE) == 0) && 7062621Sllai1 ((sdev_reconfig_boot && 707*10097SEric.Taylor@Sun.COM (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) || 7082621Sllai1 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) { 7092621Sllai1 sdev_nc_addname(sdev_ncache, 7102621Sllai1 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE); 7112621Sllai1 } 7122621Sllai1 } 7132621Sllai1 7142621Sllai1 static sdev_nc_list_t * 7152621Sllai1 sdev_nc_newlist(void) 7162621Sllai1 { 7172621Sllai1 sdev_nc_list_t *ncl; 7182621Sllai1 7192621Sllai1 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP); 7202621Sllai1 7212621Sllai1 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL); 7222621Sllai1 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL); 7232621Sllai1 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t), 7242621Sllai1 offsetof(sdev_nc_node_t, ncn_link)); 7252621Sllai1 7262621Sllai1 return (ncl); 7272621Sllai1 } 7282621Sllai1 7292621Sllai1 static void 7302621Sllai1 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp) 7312621Sllai1 { 7322621Sllai1 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1); 7332621Sllai1 kmem_free(lp, sizeof (sdev_nc_node_t)); 7342621Sllai1 } 7352621Sllai1 7362621Sllai1 static sdev_nc_node_t * 7372621Sllai1 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path) 7382621Sllai1 { 7392621Sllai1 sdev_nc_node_t *lp; 7402621Sllai1 7412621Sllai1 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock)); 7422621Sllai1 7432621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; 7442621Sllai1 lp = list_next(&ncl->ncl_list, lp)) { 7452621Sllai1 if (strcmp(path, lp->ncn_name) == 0) 7462621Sllai1 return (lp); 7472621Sllai1 } 7482621Sllai1 7492621Sllai1 return (NULL); 7502621Sllai1 } 7512621Sllai1 7522621Sllai1 static void 7532621Sllai1 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new) 7542621Sllai1 { 7552621Sllai1 sdev_nc_node_t *lp; 7562621Sllai1 7572621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 7582621Sllai1 7592621Sllai1 lp = sdev_nc_findpath(ncl, new->ncn_name); 7602621Sllai1 if (lp == NULL) { 7612621Sllai1 if (ncl->ncl_nentries == sdev_nc_max_entries) { 7622621Sllai1 sdcmn_err5(( 7632621Sllai1 "%s by %s: not adding to ncache (max %d)\n", 7642621Sllai1 new->ncn_name, curproc->p_user.u_comm, 7652621Sllai1 ncl->ncl_nentries)); 7662621Sllai1 if (sdev_nc_verbose) { 7672621Sllai1 cmn_err(CE_CONT, "?%s by %s: " 7682621Sllai1 "not adding to ncache (max %d)\n", 7692621Sllai1 new->ncn_name, curproc->p_user.u_comm, 7702621Sllai1 ncl->ncl_nentries); 7712621Sllai1 } 7722621Sllai1 rw_exit(&ncl->ncl_lock); 7732621Sllai1 sdev_nc_free_unlinked_node(new); 7742621Sllai1 } else { 7752621Sllai1 7762621Sllai1 list_insert_tail(&ncl->ncl_list, new); 7772621Sllai1 ncl->ncl_nentries++; 7782621Sllai1 7792621Sllai1 /* don't mark list dirty for nodes from store */ 7802621Sllai1 mutex_enter(&ncl->ncl_mutex); 7812621Sllai1 if ((new->ncn_flags & NCN_SRC_STORE) == 0) { 7822621Sllai1 sdcmn_err5(("%s by %s: add to ncache\n", 7832621Sllai1 new->ncn_name, curproc->p_user.u_comm)); 7842621Sllai1 if (sdev_nc_verbose) { 7852621Sllai1 cmn_err(CE_CONT, 7862621Sllai1 "?%s by %s: add to ncache\n", 7872621Sllai1 new->ncn_name, 7882621Sllai1 curproc->p_user.u_comm); 7892621Sllai1 } 7902621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 7912621Sllai1 } 7922621Sllai1 mutex_exit(&ncl->ncl_mutex); 7932621Sllai1 rw_exit(&ncl->ncl_lock); 7942621Sllai1 lp = new; 7952621Sllai1 sdev_nc_flush_boot_update(); 7962621Sllai1 } 7972621Sllai1 } else { 7982621Sllai1 mutex_enter(&ncl->ncl_mutex); 7992621Sllai1 lp->ncn_flags |= new->ncn_flags; 8002621Sllai1 mutex_exit(&ncl->ncl_mutex); 8012621Sllai1 rw_exit(&ncl->ncl_lock); 8022621Sllai1 sdev_nc_free_unlinked_node(new); 8032621Sllai1 } 8042621Sllai1 } 8052621Sllai1 8062621Sllai1 void 8072621Sllai1 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags) 8082621Sllai1 { 8092621Sllai1 int n; 8102621Sllai1 sdev_nc_node_t *lp; 8112621Sllai1 8122621Sllai1 ASSERT(SDEVTOV(dv)->v_type == VDIR); 8132621Sllai1 8142621Sllai1 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP); 8152621Sllai1 8162621Sllai1 n = strlen(dv->sdev_path) + strlen(nm) + 2; 8172621Sllai1 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 8182621Sllai1 (void) sprintf(lp->ncn_name, "%s/%s", 819*10097SEric.Taylor@Sun.COM dv->sdev_path, nm); 8202621Sllai1 lp->ncn_flags = flags; 8212621Sllai1 lp->ncn_expirecnt = sdev_nc_expirecnt; 8222621Sllai1 sdev_nc_insertnode(ncl, lp); 8232621Sllai1 } 8242621Sllai1 8252621Sllai1 void 8262621Sllai1 sdev_nc_node_exists(sdev_node_t *dv) 8272621Sllai1 { 8282621Sllai1 /* dynamic and non-global nodes are never cached */ 8292621Sllai1 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 8302621Sllai1 !SDEV_IS_NO_NCACHE(dv)) { 8312621Sllai1 sdev_nc_path_exists(sdev_ncache, dv->sdev_path); 8322621Sllai1 } 8332621Sllai1 } 8342621Sllai1 8352621Sllai1 void 8362621Sllai1 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path) 8372621Sllai1 { 8382621Sllai1 sdev_nc_node_t *lp; 8392621Sllai1 8402621Sllai1 if (sdev_nc_disable) 8412621Sllai1 return; 8422621Sllai1 8432621Sllai1 rw_enter(&ncl->ncl_lock, RW_READER); 8442621Sllai1 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) { 8452621Sllai1 rw_exit(&ncl->ncl_lock); 8462621Sllai1 return; 8472621Sllai1 } 8482621Sllai1 if (rw_tryupgrade(&ncl->ncl_lock) == 0) { 8492621Sllai1 rw_exit(&ncl->ncl_lock); 8502621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 8512621Sllai1 lp = sdev_nc_findpath(ncl, path); 8522621Sllai1 } 8532621Sllai1 if (lp) { 8542621Sllai1 list_remove(&ncl->ncl_list, lp); 8552621Sllai1 ncl->ncl_nentries--; 8562621Sllai1 mutex_enter(&ncl->ncl_mutex); 8572621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 8582621Sllai1 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 8592621Sllai1 mutex_exit(&ncl->ncl_mutex); 8602621Sllai1 rw_exit(&ncl->ncl_lock); 8612621Sllai1 sdev_nc_flush_updates(); 8622621Sllai1 } else { 8632621Sllai1 mutex_exit(&ncl->ncl_mutex); 8642621Sllai1 rw_exit(&ncl->ncl_lock); 8652621Sllai1 } 8662621Sllai1 sdev_nc_free_unlinked_node(lp); 8672621Sllai1 sdcmn_err5(("%s by %s: removed from ncache\n", 8682621Sllai1 path, curproc->p_user.u_comm)); 8692621Sllai1 if (sdev_nc_verbose) { 8702621Sllai1 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n", 8712621Sllai1 path, curproc->p_user.u_comm); 8722621Sllai1 } 8732621Sllai1 } else 8742621Sllai1 rw_exit(&ncl->ncl_lock); 8752621Sllai1 } 8762621Sllai1 8772621Sllai1 static void 8782621Sllai1 sdev_nc_free_bootonly(void) 8792621Sllai1 { 8802621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 8812621Sllai1 sdev_nc_node_t *lp; 8822621Sllai1 sdev_nc_node_t *next; 8832621Sllai1 8842621Sllai1 ASSERT(sdev_reconfig_boot); 8852621Sllai1 8862621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 8872621Sllai1 8882621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 8892621Sllai1 next = list_next(&ncl->ncl_list, lp); 8902621Sllai1 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) { 8912621Sllai1 sdcmn_err5(("freeing %s\n", lp->ncn_name)); 8922621Sllai1 mutex_enter(&ncl->ncl_mutex); 8932621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 8942621Sllai1 mutex_exit(&ncl->ncl_mutex); 8952621Sllai1 list_remove(&ncl->ncl_list, lp); 8962621Sllai1 sdev_nc_free_unlinked_node(lp); 8972621Sllai1 ncl->ncl_nentries--; 8982621Sllai1 } 8992621Sllai1 } 9002621Sllai1 9012621Sllai1 rw_exit(&ncl->ncl_lock); 9022621Sllai1 } 903