1*2621Sllai1 /* 2*2621Sllai1 * CDDL HEADER START 3*2621Sllai1 * 4*2621Sllai1 * The contents of this file are subject to the terms of the 5*2621Sllai1 * Common Development and Distribution License (the "License"). 6*2621Sllai1 * You may not use this file except in compliance with the License. 7*2621Sllai1 * 8*2621Sllai1 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*2621Sllai1 * or http://www.opensolaris.org/os/licensing. 10*2621Sllai1 * See the License for the specific language governing permissions 11*2621Sllai1 * and limitations under the License. 12*2621Sllai1 * 13*2621Sllai1 * When distributing Covered Code, include this CDDL HEADER in each 14*2621Sllai1 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*2621Sllai1 * If applicable, add the following below this CDDL HEADER, with the 16*2621Sllai1 * fields enclosed by brackets "[]" replaced with your own identifying 17*2621Sllai1 * information: Portions Copyright [yyyy] [name of copyright owner] 18*2621Sllai1 * 19*2621Sllai1 * CDDL HEADER END 20*2621Sllai1 */ 21*2621Sllai1 /* 22*2621Sllai1 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*2621Sllai1 * Use is subject to license terms. 24*2621Sllai1 */ 25*2621Sllai1 26*2621Sllai1 #pragma ident "%Z%%M% %I% %E% SMI" 27*2621Sllai1 28*2621Sllai1 /* 29*2621Sllai1 * negative cache handling for the /dev fs 30*2621Sllai1 */ 31*2621Sllai1 32*2621Sllai1 #include <sys/types.h> 33*2621Sllai1 #include <sys/param.h> 34*2621Sllai1 #include <sys/t_lock.h> 35*2621Sllai1 #include <sys/systm.h> 36*2621Sllai1 #include <sys/sysmacros.h> 37*2621Sllai1 #include <sys/user.h> 38*2621Sllai1 #include <sys/time.h> 39*2621Sllai1 #include <sys/vfs.h> 40*2621Sllai1 #include <sys/vnode.h> 41*2621Sllai1 #include <sys/file.h> 42*2621Sllai1 #include <sys/fcntl.h> 43*2621Sllai1 #include <sys/flock.h> 44*2621Sllai1 #include <sys/kmem.h> 45*2621Sllai1 #include <sys/uio.h> 46*2621Sllai1 #include <sys/errno.h> 47*2621Sllai1 #include <sys/stat.h> 48*2621Sllai1 #include <sys/cred.h> 49*2621Sllai1 #include <sys/cmn_err.h> 50*2621Sllai1 #include <sys/debug.h> 51*2621Sllai1 #include <sys/mode.h> 52*2621Sllai1 #include <sys/policy.h> 53*2621Sllai1 #include <fs/fs_subr.h> 54*2621Sllai1 #include <sys/mount.h> 55*2621Sllai1 #include <sys/fs/snode.h> 56*2621Sllai1 #include <sys/fs/dv_node.h> 57*2621Sllai1 #include <sys/fs/sdev_node.h> 58*2621Sllai1 #include <sys/sunndi.h> 59*2621Sllai1 #include <sys/sunmdi.h> 60*2621Sllai1 #include <sys/ddi.h> 61*2621Sllai1 #include <sys/modctl.h> 62*2621Sllai1 #include <sys/devctl_impl.h> 63*2621Sllai1 64*2621Sllai1 65*2621Sllai1 /* 66*2621Sllai1 * ncache is a negative cache of failed lookups. An entry 67*2621Sllai1 * is added after an attempt to configure a device by that 68*2621Sllai1 * name failed. An accumulation of these entries over time 69*2621Sllai1 * gives us a set of device name for which implicit reconfiguration 70*2621Sllai1 * does not need to be attempted. If a name is created matching 71*2621Sllai1 * an entry in ncache, that entry is removed, with the 72*2621Sllai1 * persistent store updated. 73*2621Sllai1 * 74*2621Sllai1 * Implicit reconfig is initiated for any name during lookup that 75*2621Sllai1 * can't be resolved from the backing store and that isn't 76*2621Sllai1 * present in the negative cache. This functionality is 77*2621Sllai1 * enabled during system startup once communication with devfsadm 78*2621Sllai1 * can be achieved. Since readdir is more general, implicit 79*2621Sllai1 * reconfig initiated by reading a directory isn't enabled until 80*2621Sllai1 * the system is more fully booted, at the time of the multi-user 81*2621Sllai1 * milestone, corresponding to init state 2. 82*2621Sllai1 * 83*2621Sllai1 * A maximum is imposed on the number of entries in the cache 84*2621Sllai1 * to limit some script going wild and as a defense against attack. 85*2621Sllai1 * The default limit is 64 and can be adjusted via sdev_nc_max_entries. 86*2621Sllai1 * 87*2621Sllai1 * Each entry also has a expiration count. When looked up a name in 88*2621Sllai1 * the cache is set to the default. Subsequent boots will decrement 89*2621Sllai1 * the count if a name isn't referenced. This permits a once-only 90*2621Sllai1 * entry to eventually be removed over time. 91*2621Sllai1 * 92*2621Sllai1 * sdev_reconfig_delay implements a "debounce" of the timing beyond 93*2621Sllai1 * system available indication, providing what the filesystem considers 94*2621Sllai1 * to be the system-is-fully-booted state. This is provided to adjust 95*2621Sllai1 * the timing if some application startup is performing a readdir 96*2621Sllai1 * in /dev that initiates a troublesome implicit reconfig on every boot. 97*2621Sllai1 * 98*2621Sllai1 * sdev_nc_disable_reset can be used to disable clearing the negative cache 99*2621Sllai1 * on reconfig boot. The default is to clear the cache on reconfig boot. 100*2621Sllai1 * sdev_nc_disable can be used to disable the negative cache itself. 101*2621Sllai1 * 102*2621Sllai1 * sdev_reconfig_disable can be used to disable implicit reconfig. 103*2621Sllai1 * The default is that implicit reconfig is enabled. 104*2621Sllai1 */ 105*2621Sllai1 106*2621Sllai1 /* tunables and defaults */ 107*2621Sllai1 #define SDEV_NC_EXPIRECNT 4 108*2621Sllai1 #define SDEV_NC_MAX_ENTRIES 64 109*2621Sllai1 #define SEV_RECONFIG_DELAY 6 /* seconds */ 110*2621Sllai1 111*2621Sllai1 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT; 112*2621Sllai1 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES; 113*2621Sllai1 int sdev_reconfig_delay = SEV_RECONFIG_DELAY; 114*2621Sllai1 int sdev_reconfig_verbose = 0; 115*2621Sllai1 int sdev_reconfig_disable = 0; 116*2621Sllai1 int sdev_nc_disable = 0; 117*2621Sllai1 int sdev_nc_disable_reset = 0; 118*2621Sllai1 int sdev_nc_verbose = 0; 119*2621Sllai1 120*2621Sllai1 /* globals */ 121*2621Sllai1 sdev_nc_list_t *sdev_ncache; 122*2621Sllai1 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL; 123*2621Sllai1 int sdev_reconfig_boot = 0; 124*2621Sllai1 static timeout_id_t sdev_timeout_id = 0; 125*2621Sllai1 126*2621Sllai1 /* static prototypes */ 127*2621Sllai1 static void sdev_ncache_write_complete(nvfd_t *); 128*2621Sllai1 static void sdev_ncache_write(void); 129*2621Sllai1 static void sdev_ncache_process_store(void); 130*2621Sllai1 static sdev_nc_list_t *sdev_nc_newlist(void); 131*2621Sllai1 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *); 132*2621Sllai1 static void sdev_nc_free_all_nodes(sdev_nc_list_t *); 133*2621Sllai1 static void sdev_nc_freelist(sdev_nc_list_t *); 134*2621Sllai1 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *); 135*2621Sllai1 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *); 136*2621Sllai1 static void sdev_nc_free_bootonly(void); 137*2621Sllai1 138*2621Sllai1 139*2621Sllai1 /* 140*2621Sllai1 * called once at filesystem initialization 141*2621Sllai1 */ 142*2621Sllai1 void 143*2621Sllai1 sdev_ncache_init(void) 144*2621Sllai1 { 145*2621Sllai1 sdev_ncache = sdev_nc_newlist(); 146*2621Sllai1 } 147*2621Sllai1 148*2621Sllai1 /* 149*2621Sllai1 * called at mount of the global instance 150*2621Sllai1 * currently the global instance is never unmounted 151*2621Sllai1 */ 152*2621Sllai1 void 153*2621Sllai1 sdev_ncache_setup(void) 154*2621Sllai1 { 155*2621Sllai1 nvfd_t *nvf = sdevfd; 156*2621Sllai1 157*2621Sllai1 nvf_register_write_complete(nvf, sdev_ncache_write_complete); 158*2621Sllai1 159*2621Sllai1 i_ddi_read_devname_file(); 160*2621Sllai1 sdev_ncache_process_store(); 161*2621Sllai1 sdev_devstate_change(); 162*2621Sllai1 } 163*2621Sllai1 164*2621Sllai1 static void 165*2621Sllai1 sdev_nvp_cache_free(nvfd_t *nvf) 166*2621Sllai1 { 167*2621Sllai1 nvp_devname_t *np; 168*2621Sllai1 nvp_devname_t *next; 169*2621Sllai1 170*2621Sllai1 for (np = NVF_DEVNAME_LIST(nvf); np; np = next) { 171*2621Sllai1 next = NVP_DEVNAME_NEXT(np); 172*2621Sllai1 nfd_nvp_free_and_unlink(nvf, NVPLIST(np)); 173*2621Sllai1 } 174*2621Sllai1 } 175*2621Sllai1 176*2621Sllai1 static void 177*2621Sllai1 sdev_ncache_process_store(void) 178*2621Sllai1 { 179*2621Sllai1 nvfd_t *nvf = sdevfd; 180*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 181*2621Sllai1 nvp_devname_t *np; 182*2621Sllai1 sdev_nc_node_t *lp; 183*2621Sllai1 char *path; 184*2621Sllai1 int i, n; 185*2621Sllai1 186*2621Sllai1 if (sdev_nc_disable) 187*2621Sllai1 return; 188*2621Sllai1 189*2621Sllai1 for (np = NVF_DEVNAME_LIST(nvf); np; np = NVP_DEVNAME_NEXT(np)) { 190*2621Sllai1 for (i = 0; i < np->nvp_npaths; i++) { 191*2621Sllai1 sdcmn_err5((" %s %d\n", 192*2621Sllai1 np->nvp_paths[i], np->nvp_expirecnts[i])); 193*2621Sllai1 if (ncl->ncl_nentries < sdev_nc_max_entries) { 194*2621Sllai1 path = np->nvp_paths[i]; 195*2621Sllai1 n = strlen(path) + 1; 196*2621Sllai1 lp = kmem_alloc(sizeof (sdev_nc_node_t), 197*2621Sllai1 KM_SLEEP); 198*2621Sllai1 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 199*2621Sllai1 bcopy(path, lp->ncn_name, n); 200*2621Sllai1 lp->ncn_flags = NCN_SRC_STORE; 201*2621Sllai1 lp->ncn_expirecnt = np->nvp_expirecnts[i]; 202*2621Sllai1 sdev_nc_insertnode(ncl, lp); 203*2621Sllai1 } else if (sdev_nc_verbose) { 204*2621Sllai1 cmn_err(CE_CONT, 205*2621Sllai1 "?%s: truncating from ncache (max %d)\n", 206*2621Sllai1 np->nvp_paths[i], sdev_nc_max_entries); 207*2621Sllai1 } 208*2621Sllai1 } 209*2621Sllai1 } 210*2621Sllai1 } 211*2621Sllai1 212*2621Sllai1 static void 213*2621Sllai1 sdev_ncache_write_complete(nvfd_t *nvf) 214*2621Sllai1 { 215*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 216*2621Sllai1 217*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 218*2621Sllai1 219*2621Sllai1 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING); 220*2621Sllai1 221*2621Sllai1 if (ncl->ncl_flags & NCL_LIST_DIRTY) { 222*2621Sllai1 sdcmn_err5(("ncache write complete but dirty again\n")); 223*2621Sllai1 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 224*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 225*2621Sllai1 sdev_ncache_write(); 226*2621Sllai1 } else { 227*2621Sllai1 sdcmn_err5(("ncache write complete\n")); 228*2621Sllai1 ncl->ncl_flags &= ~NCL_LIST_WRITING; 229*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 230*2621Sllai1 rw_enter(&nvf->nvf_lock, RW_WRITER); 231*2621Sllai1 sdev_nvp_cache_free(nvf); 232*2621Sllai1 rw_exit(&nvf->nvf_lock); 233*2621Sllai1 } 234*2621Sllai1 } 235*2621Sllai1 236*2621Sllai1 static void 237*2621Sllai1 sdev_ncache_write(void) 238*2621Sllai1 { 239*2621Sllai1 nvfd_t *nvf = sdevfd; 240*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 241*2621Sllai1 nvp_devname_t *np; 242*2621Sllai1 sdev_nc_node_t *lp; 243*2621Sllai1 int n, i; 244*2621Sllai1 245*2621Sllai1 if (sdev_cache_write_disable) { 246*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 247*2621Sllai1 ncl->ncl_flags &= ~NCL_LIST_WRITING; 248*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 249*2621Sllai1 return; 250*2621Sllai1 } 251*2621Sllai1 252*2621Sllai1 /* proper lock ordering here is essential */ 253*2621Sllai1 rw_enter(&nvf->nvf_lock, RW_WRITER); 254*2621Sllai1 sdev_nvp_cache_free(nvf); 255*2621Sllai1 256*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_READER); 257*2621Sllai1 n = ncl->ncl_nentries; 258*2621Sllai1 ASSERT(n <= sdev_nc_max_entries); 259*2621Sllai1 260*2621Sllai1 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP); 261*2621Sllai1 np->nvp_npaths = n; 262*2621Sllai1 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP); 263*2621Sllai1 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP); 264*2621Sllai1 265*2621Sllai1 i = 0; 266*2621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; 267*2621Sllai1 lp = list_next(&ncl->ncl_list, lp)) { 268*2621Sllai1 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP); 269*2621Sllai1 np->nvp_expirecnts[i] = lp->ncn_expirecnt; 270*2621Sllai1 sdcmn_err5((" %s %d\n", 271*2621Sllai1 np->nvp_paths[i], np->nvp_expirecnts[i])); 272*2621Sllai1 i++; 273*2621Sllai1 } 274*2621Sllai1 275*2621Sllai1 rw_exit(&ncl->ncl_lock); 276*2621Sllai1 277*2621Sllai1 NVF_MARK_DIRTY(nvf); 278*2621Sllai1 nfd_nvp_link(nvf, NVPLIST(np)); 279*2621Sllai1 rw_exit(&nvf->nvf_lock); 280*2621Sllai1 281*2621Sllai1 wake_nvpflush_daemon(); 282*2621Sllai1 } 283*2621Sllai1 284*2621Sllai1 static void 285*2621Sllai1 sdev_nc_flush_updates(void) 286*2621Sllai1 { 287*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 288*2621Sllai1 289*2621Sllai1 if (sdev_nc_disable || sdev_cache_write_disable) 290*2621Sllai1 return; 291*2621Sllai1 292*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 293*2621Sllai1 if (((ncl->ncl_flags & 294*2621Sllai1 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) == 295*2621Sllai1 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) { 296*2621Sllai1 ncl->ncl_flags &= ~NCL_LIST_DIRTY; 297*2621Sllai1 ncl->ncl_flags |= NCL_LIST_WRITING; 298*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 299*2621Sllai1 sdev_ncache_write(); 300*2621Sllai1 } else { 301*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 302*2621Sllai1 } 303*2621Sllai1 } 304*2621Sllai1 305*2621Sllai1 static void 306*2621Sllai1 sdev_nc_flush_boot_update(void) 307*2621Sllai1 { 308*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 309*2621Sllai1 310*2621Sllai1 if (sdev_nc_disable || sdev_cache_write_disable || 311*2621Sllai1 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) { 312*2621Sllai1 return; 313*2621Sllai1 } 314*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 315*2621Sllai1 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 316*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 317*2621Sllai1 sdev_nc_flush_updates(); 318*2621Sllai1 } else { 319*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 320*2621Sllai1 } 321*2621Sllai1 322*2621Sllai1 } 323*2621Sllai1 324*2621Sllai1 static void 325*2621Sllai1 sdev_state_boot_complete() 326*2621Sllai1 { 327*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 328*2621Sllai1 sdev_nc_node_t *lp, *next; 329*2621Sllai1 330*2621Sllai1 /* 331*2621Sllai1 * Once boot is complete, decrement the expire count of each entry 332*2621Sllai1 * in the cache not touched by a reference. Remove any that 333*2621Sllai1 * goes to zero. This effectively removes random entries over 334*2621Sllai1 * time. 335*2621Sllai1 */ 336*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 337*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 338*2621Sllai1 339*2621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 340*2621Sllai1 next = list_next(&ncl->ncl_list, lp); 341*2621Sllai1 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) { 342*2621Sllai1 if (lp->ncn_flags & NCN_ACTIVE) { 343*2621Sllai1 if (lp->ncn_expirecnt != sdev_nc_expirecnt) { 344*2621Sllai1 lp->ncn_expirecnt = sdev_nc_expirecnt; 345*2621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 346*2621Sllai1 } 347*2621Sllai1 } else { 348*2621Sllai1 if (--lp->ncn_expirecnt == 0) { 349*2621Sllai1 list_remove(&ncl->ncl_list, lp); 350*2621Sllai1 sdev_nc_free_unlinked_node(lp); 351*2621Sllai1 ncl->ncl_nentries--; 352*2621Sllai1 } 353*2621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 354*2621Sllai1 } 355*2621Sllai1 } 356*2621Sllai1 } 357*2621Sllai1 358*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 359*2621Sllai1 rw_exit(&ncl->ncl_lock); 360*2621Sllai1 361*2621Sllai1 sdev_nc_flush_boot_update(); 362*2621Sllai1 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE; 363*2621Sllai1 } 364*2621Sllai1 365*2621Sllai1 /* 366*2621Sllai1 * Upon transition to the login state on a reconfigure boot, 367*2621Sllai1 * a debounce timer is set up so that we cache all the nonsense 368*2621Sllai1 * lookups we're hit with by the windowing system startup. 369*2621Sllai1 */ 370*2621Sllai1 371*2621Sllai1 /*ARGSUSED*/ 372*2621Sllai1 static void 373*2621Sllai1 sdev_state_timeout(void *arg) 374*2621Sllai1 { 375*2621Sllai1 sdev_timeout_id = 0; 376*2621Sllai1 sdev_state_boot_complete(); 377*2621Sllai1 } 378*2621Sllai1 379*2621Sllai1 static void 380*2621Sllai1 sdev_state_sysavail() 381*2621Sllai1 { 382*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 383*2621Sllai1 clock_t nticks; 384*2621Sllai1 int nsecs; 385*2621Sllai1 386*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 387*2621Sllai1 ncl->ncl_flags |= NCL_LIST_WENABLE; 388*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 389*2621Sllai1 390*2621Sllai1 nsecs = sdev_reconfig_delay; 391*2621Sllai1 if (nsecs == 0) { 392*2621Sllai1 sdev_state_boot_complete(); 393*2621Sllai1 } else { 394*2621Sllai1 nticks = drv_usectohz(1000000 * nsecs); 395*2621Sllai1 sdcmn_err5(("timeout initiated %ld\n", nticks)); 396*2621Sllai1 sdev_timeout_id = timeout(sdev_state_timeout, NULL, nticks); 397*2621Sllai1 sdev_nc_flush_boot_update(); 398*2621Sllai1 } 399*2621Sllai1 } 400*2621Sllai1 401*2621Sllai1 /* 402*2621Sllai1 * Called to inform the filesystem of progress during boot, 403*2621Sllai1 * either a notice of reconfiguration boot or an indication of 404*2621Sllai1 * system boot complete. At system boot complete, set up a 405*2621Sllai1 * timer at the expiration of which no further failed lookups 406*2621Sllai1 * will be added to the negative cache. 407*2621Sllai1 * 408*2621Sllai1 * The dev filesystem infers from reconfig boot that implicit 409*2621Sllai1 * reconfig need not be invoked at all as all available devices 410*2621Sllai1 * will have already been named. 411*2621Sllai1 * 412*2621Sllai1 * The dev filesystem infers from "system available" that devfsadmd 413*2621Sllai1 * can now be run and hence implicit reconfiguration may be initiated. 414*2621Sllai1 * During early stages of system startup, implicit reconfig is 415*2621Sllai1 * not done to avoid impacting boot performance. 416*2621Sllai1 */ 417*2621Sllai1 void 418*2621Sllai1 sdev_devstate_change(void) 419*2621Sllai1 { 420*2621Sllai1 int new_state; 421*2621Sllai1 422*2621Sllai1 /* 423*2621Sllai1 * Track system state and manage interesting transitions 424*2621Sllai1 */ 425*2621Sllai1 new_state = SDEV_BOOT_STATE_INITIAL; 426*2621Sllai1 if (i_ddi_reconfig()) 427*2621Sllai1 new_state = SDEV_BOOT_STATE_RECONFIG; 428*2621Sllai1 if (i_ddi_sysavail()) 429*2621Sllai1 new_state = SDEV_BOOT_STATE_SYSAVAIL; 430*2621Sllai1 431*2621Sllai1 if (sdev_boot_state < new_state) { 432*2621Sllai1 switch (new_state) { 433*2621Sllai1 case SDEV_BOOT_STATE_RECONFIG: 434*2621Sllai1 sdcmn_err5(("state change: reconfigure boot\n")); 435*2621Sllai1 sdev_boot_state = new_state; 436*2621Sllai1 sdev_reconfig_boot = 1; 437*2621Sllai1 if (!sdev_nc_disable_reset) 438*2621Sllai1 sdev_nc_free_bootonly(); 439*2621Sllai1 break; 440*2621Sllai1 case SDEV_BOOT_STATE_SYSAVAIL: 441*2621Sllai1 sdcmn_err5(("system available\n")); 442*2621Sllai1 sdev_boot_state = new_state; 443*2621Sllai1 sdev_state_sysavail(); 444*2621Sllai1 break; 445*2621Sllai1 } 446*2621Sllai1 } 447*2621Sllai1 } 448*2621Sllai1 449*2621Sllai1 /* 450*2621Sllai1 * Lookup: filter out entries in the negative cache 451*2621Sllai1 * Return 1 if the lookup should not cause a reconfig. 452*2621Sllai1 */ 453*2621Sllai1 int 454*2621Sllai1 sdev_lookup_filter(sdev_node_t *dv, char *nm) 455*2621Sllai1 { 456*2621Sllai1 int n; 457*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 458*2621Sllai1 sdev_nc_node_t *lp; 459*2621Sllai1 char *path; 460*2621Sllai1 int rval = 0; 461*2621Sllai1 int changed = 0; 462*2621Sllai1 463*2621Sllai1 ASSERT(i_ddi_io_initialized()); 464*2621Sllai1 ASSERT(SDEVTOV(dv)->v_type == VDIR); 465*2621Sllai1 466*2621Sllai1 if (sdev_nc_disable) 467*2621Sllai1 return (0); 468*2621Sllai1 469*2621Sllai1 n = strlen(dv->sdev_path) + strlen(nm) + 2; 470*2621Sllai1 path = kmem_alloc(n, KM_SLEEP); 471*2621Sllai1 (void) sprintf(path, "%s/%s", dv->sdev_path, nm); 472*2621Sllai1 473*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_READER); 474*2621Sllai1 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) { 475*2621Sllai1 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n", 476*2621Sllai1 dv->sdev_name, nm, curproc->p_user.u_comm)); 477*2621Sllai1 if (sdev_nc_verbose) { 478*2621Sllai1 cmn_err(CE_CONT, 479*2621Sllai1 "?%s/%s: lookup by %s cached, no reconfig\n", 480*2621Sllai1 dv->sdev_name, nm, curproc->p_user.u_comm); 481*2621Sllai1 } 482*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 483*2621Sllai1 lp->ncn_flags |= NCN_ACTIVE; 484*2621Sllai1 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 && 485*2621Sllai1 lp->ncn_expirecnt < sdev_nc_expirecnt) { 486*2621Sllai1 lp->ncn_expirecnt = sdev_nc_expirecnt; 487*2621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 488*2621Sllai1 changed = 1; 489*2621Sllai1 } 490*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 491*2621Sllai1 rval = 1; 492*2621Sllai1 } 493*2621Sllai1 rw_exit(&ncl->ncl_lock); 494*2621Sllai1 kmem_free(path, n); 495*2621Sllai1 if (changed) 496*2621Sllai1 sdev_nc_flush_boot_update(); 497*2621Sllai1 return (rval); 498*2621Sllai1 } 499*2621Sllai1 500*2621Sllai1 void 501*2621Sllai1 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags) 502*2621Sllai1 { 503*2621Sllai1 if (sdev_nc_disable) 504*2621Sllai1 return; 505*2621Sllai1 506*2621Sllai1 /* 507*2621Sllai1 * If we're still in the initial boot stage, always update 508*2621Sllai1 * the cache - we may not have received notice of the 509*2621Sllai1 * reconfig boot state yet. On a reconfigure boot, entries 510*2621Sllai1 * from the backing store are not re-persisted on update, 511*2621Sllai1 * but new entries are marked as needing an update. 512*2621Sllai1 * Never cache dynamic or non-global nodes. 513*2621Sllai1 */ 514*2621Sllai1 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 515*2621Sllai1 !SDEV_IS_NO_NCACHE(dv) && 516*2621Sllai1 ((failed_flags & SLF_NO_NCACHE) == 0) && 517*2621Sllai1 ((sdev_reconfig_boot && 518*2621Sllai1 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) || 519*2621Sllai1 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) { 520*2621Sllai1 sdev_nc_addname(sdev_ncache, 521*2621Sllai1 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE); 522*2621Sllai1 } 523*2621Sllai1 } 524*2621Sllai1 525*2621Sllai1 static sdev_nc_list_t * 526*2621Sllai1 sdev_nc_newlist(void) 527*2621Sllai1 { 528*2621Sllai1 sdev_nc_list_t *ncl; 529*2621Sllai1 530*2621Sllai1 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP); 531*2621Sllai1 532*2621Sllai1 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL); 533*2621Sllai1 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL); 534*2621Sllai1 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t), 535*2621Sllai1 offsetof(sdev_nc_node_t, ncn_link)); 536*2621Sllai1 537*2621Sllai1 return (ncl); 538*2621Sllai1 } 539*2621Sllai1 540*2621Sllai1 static void 541*2621Sllai1 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp) 542*2621Sllai1 { 543*2621Sllai1 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1); 544*2621Sllai1 kmem_free(lp, sizeof (sdev_nc_node_t)); 545*2621Sllai1 } 546*2621Sllai1 547*2621Sllai1 static void 548*2621Sllai1 sdev_nc_free_all_nodes(sdev_nc_list_t *ncl) 549*2621Sllai1 { 550*2621Sllai1 sdev_nc_node_t *lp; 551*2621Sllai1 552*2621Sllai1 while ((lp = list_head(&ncl->ncl_list)) != NULL) { 553*2621Sllai1 list_remove(&ncl->ncl_list, lp); 554*2621Sllai1 sdev_nc_free_unlinked_node(lp); 555*2621Sllai1 ncl->ncl_nentries--; 556*2621Sllai1 } 557*2621Sllai1 ASSERT(ncl->ncl_nentries == 0); 558*2621Sllai1 } 559*2621Sllai1 560*2621Sllai1 static void 561*2621Sllai1 sdev_nc_freelist(sdev_nc_list_t *ncl) 562*2621Sllai1 { 563*2621Sllai1 if (!list_is_empty(&ncl->ncl_list)) 564*2621Sllai1 sdev_nc_free_all_nodes(ncl); 565*2621Sllai1 ASSERT(list_is_empty(&ncl->ncl_list)); 566*2621Sllai1 ASSERT(ncl->ncl_nentries == 0); 567*2621Sllai1 568*2621Sllai1 mutex_destroy(&ncl->ncl_mutex); 569*2621Sllai1 rw_destroy(&ncl->ncl_lock); 570*2621Sllai1 list_destroy(&ncl->ncl_list); 571*2621Sllai1 kmem_free(ncl, sizeof (sdev_nc_list_t)); 572*2621Sllai1 } 573*2621Sllai1 574*2621Sllai1 static sdev_nc_node_t * 575*2621Sllai1 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path) 576*2621Sllai1 { 577*2621Sllai1 sdev_nc_node_t *lp; 578*2621Sllai1 579*2621Sllai1 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock)); 580*2621Sllai1 581*2621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; 582*2621Sllai1 lp = list_next(&ncl->ncl_list, lp)) { 583*2621Sllai1 if (strcmp(path, lp->ncn_name) == 0) 584*2621Sllai1 return (lp); 585*2621Sllai1 } 586*2621Sllai1 587*2621Sllai1 return (NULL); 588*2621Sllai1 } 589*2621Sllai1 590*2621Sllai1 static void 591*2621Sllai1 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new) 592*2621Sllai1 { 593*2621Sllai1 sdev_nc_node_t *lp; 594*2621Sllai1 595*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 596*2621Sllai1 597*2621Sllai1 lp = sdev_nc_findpath(ncl, new->ncn_name); 598*2621Sllai1 if (lp == NULL) { 599*2621Sllai1 if (ncl->ncl_nentries == sdev_nc_max_entries) { 600*2621Sllai1 sdcmn_err5(( 601*2621Sllai1 "%s by %s: not adding to ncache (max %d)\n", 602*2621Sllai1 new->ncn_name, curproc->p_user.u_comm, 603*2621Sllai1 ncl->ncl_nentries)); 604*2621Sllai1 if (sdev_nc_verbose) { 605*2621Sllai1 cmn_err(CE_CONT, "?%s by %s: " 606*2621Sllai1 "not adding to ncache (max %d)\n", 607*2621Sllai1 new->ncn_name, curproc->p_user.u_comm, 608*2621Sllai1 ncl->ncl_nentries); 609*2621Sllai1 } 610*2621Sllai1 rw_exit(&ncl->ncl_lock); 611*2621Sllai1 sdev_nc_free_unlinked_node(new); 612*2621Sllai1 } else { 613*2621Sllai1 614*2621Sllai1 list_insert_tail(&ncl->ncl_list, new); 615*2621Sllai1 ncl->ncl_nentries++; 616*2621Sllai1 617*2621Sllai1 /* don't mark list dirty for nodes from store */ 618*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 619*2621Sllai1 if ((new->ncn_flags & NCN_SRC_STORE) == 0) { 620*2621Sllai1 sdcmn_err5(("%s by %s: add to ncache\n", 621*2621Sllai1 new->ncn_name, curproc->p_user.u_comm)); 622*2621Sllai1 if (sdev_nc_verbose) { 623*2621Sllai1 cmn_err(CE_CONT, 624*2621Sllai1 "?%s by %s: add to ncache\n", 625*2621Sllai1 new->ncn_name, 626*2621Sllai1 curproc->p_user.u_comm); 627*2621Sllai1 } 628*2621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 629*2621Sllai1 } 630*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 631*2621Sllai1 rw_exit(&ncl->ncl_lock); 632*2621Sllai1 lp = new; 633*2621Sllai1 sdev_nc_flush_boot_update(); 634*2621Sllai1 } 635*2621Sllai1 } else { 636*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 637*2621Sllai1 lp->ncn_flags |= new->ncn_flags; 638*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 639*2621Sllai1 rw_exit(&ncl->ncl_lock); 640*2621Sllai1 sdev_nc_free_unlinked_node(new); 641*2621Sllai1 } 642*2621Sllai1 } 643*2621Sllai1 644*2621Sllai1 void 645*2621Sllai1 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags) 646*2621Sllai1 { 647*2621Sllai1 int n; 648*2621Sllai1 sdev_nc_node_t *lp; 649*2621Sllai1 650*2621Sllai1 ASSERT(SDEVTOV(dv)->v_type == VDIR); 651*2621Sllai1 652*2621Sllai1 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP); 653*2621Sllai1 654*2621Sllai1 n = strlen(dv->sdev_path) + strlen(nm) + 2; 655*2621Sllai1 lp->ncn_name = kmem_alloc(n, KM_SLEEP); 656*2621Sllai1 (void) sprintf(lp->ncn_name, "%s/%s", 657*2621Sllai1 dv->sdev_path, nm); 658*2621Sllai1 lp->ncn_flags = flags; 659*2621Sllai1 lp->ncn_expirecnt = sdev_nc_expirecnt; 660*2621Sllai1 sdev_nc_insertnode(ncl, lp); 661*2621Sllai1 } 662*2621Sllai1 663*2621Sllai1 void 664*2621Sllai1 sdev_nc_node_exists(sdev_node_t *dv) 665*2621Sllai1 { 666*2621Sllai1 /* dynamic and non-global nodes are never cached */ 667*2621Sllai1 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) && 668*2621Sllai1 !SDEV_IS_NO_NCACHE(dv)) { 669*2621Sllai1 sdev_nc_path_exists(sdev_ncache, dv->sdev_path); 670*2621Sllai1 } 671*2621Sllai1 } 672*2621Sllai1 673*2621Sllai1 void 674*2621Sllai1 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path) 675*2621Sllai1 { 676*2621Sllai1 sdev_nc_node_t *lp; 677*2621Sllai1 678*2621Sllai1 if (sdev_nc_disable) 679*2621Sllai1 return; 680*2621Sllai1 681*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_READER); 682*2621Sllai1 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) { 683*2621Sllai1 rw_exit(&ncl->ncl_lock); 684*2621Sllai1 return; 685*2621Sllai1 } 686*2621Sllai1 if (rw_tryupgrade(&ncl->ncl_lock) == 0) { 687*2621Sllai1 rw_exit(&ncl->ncl_lock); 688*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 689*2621Sllai1 lp = sdev_nc_findpath(ncl, path); 690*2621Sllai1 } 691*2621Sllai1 if (lp) { 692*2621Sllai1 list_remove(&ncl->ncl_list, lp); 693*2621Sllai1 ncl->ncl_nentries--; 694*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 695*2621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 696*2621Sllai1 if (ncl->ncl_flags & NCL_LIST_WENABLE) { 697*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 698*2621Sllai1 rw_exit(&ncl->ncl_lock); 699*2621Sllai1 sdev_nc_flush_updates(); 700*2621Sllai1 } else { 701*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 702*2621Sllai1 rw_exit(&ncl->ncl_lock); 703*2621Sllai1 } 704*2621Sllai1 sdev_nc_free_unlinked_node(lp); 705*2621Sllai1 sdcmn_err5(("%s by %s: removed from ncache\n", 706*2621Sllai1 path, curproc->p_user.u_comm)); 707*2621Sllai1 if (sdev_nc_verbose) { 708*2621Sllai1 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n", 709*2621Sllai1 path, curproc->p_user.u_comm); 710*2621Sllai1 } 711*2621Sllai1 } else 712*2621Sllai1 rw_exit(&ncl->ncl_lock); 713*2621Sllai1 } 714*2621Sllai1 715*2621Sllai1 static void 716*2621Sllai1 sdev_nc_free_bootonly(void) 717*2621Sllai1 { 718*2621Sllai1 sdev_nc_list_t *ncl = sdev_ncache; 719*2621Sllai1 sdev_nc_node_t *lp; 720*2621Sllai1 sdev_nc_node_t *next; 721*2621Sllai1 722*2621Sllai1 ASSERT(sdev_reconfig_boot); 723*2621Sllai1 724*2621Sllai1 rw_enter(&ncl->ncl_lock, RW_WRITER); 725*2621Sllai1 726*2621Sllai1 for (lp = list_head(&ncl->ncl_list); lp; lp = next) { 727*2621Sllai1 next = list_next(&ncl->ncl_list, lp); 728*2621Sllai1 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) { 729*2621Sllai1 sdcmn_err5(("freeing %s\n", lp->ncn_name)); 730*2621Sllai1 mutex_enter(&ncl->ncl_mutex); 731*2621Sllai1 ncl->ncl_flags |= NCL_LIST_DIRTY; 732*2621Sllai1 mutex_exit(&ncl->ncl_mutex); 733*2621Sllai1 list_remove(&ncl->ncl_list, lp); 734*2621Sllai1 sdev_nc_free_unlinked_node(lp); 735*2621Sllai1 ncl->ncl_nentries--; 736*2621Sllai1 } 737*2621Sllai1 } 738*2621Sllai1 739*2621Sllai1 rw_exit(&ncl->ncl_lock); 740*2621Sllai1 } 741