1*2797Sjg /* 2*2797Sjg * CDDL HEADER START 3*2797Sjg * 4*2797Sjg * The contents of this file are subject to the terms of the 5*2797Sjg * Common Development and Distribution License (the "License"). 6*2797Sjg * You may not use this file except in compliance with the License. 7*2797Sjg * 8*2797Sjg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*2797Sjg * or http://www.opensolaris.org/os/licensing. 10*2797Sjg * See the License for the specific language governing permissions 11*2797Sjg * and limitations under the License. 12*2797Sjg * 13*2797Sjg * When distributing Covered Code, include this CDDL HEADER in each 14*2797Sjg * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*2797Sjg * If applicable, add the following below this CDDL HEADER, with the 16*2797Sjg * fields enclosed by brackets "[]" replaced with your own identifying 17*2797Sjg * information: Portions Copyright [yyyy] [name of copyright owner] 18*2797Sjg * 19*2797Sjg * CDDL HEADER END 20*2797Sjg */ 21*2797Sjg /* 22*2797Sjg * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*2797Sjg * Use is subject to license terms. 24*2797Sjg */ 25*2797Sjg 26*2797Sjg #pragma ident "%Z%%M% %I% %E% SMI" 27*2797Sjg 28*2797Sjg #include <sys/note.h> 29*2797Sjg #include <sys/t_lock.h> 30*2797Sjg #include <sys/cmn_err.h> 31*2797Sjg #include <sys/instance.h> 32*2797Sjg #include <sys/conf.h> 33*2797Sjg #include <sys/stat.h> 34*2797Sjg #include <sys/ddi.h> 35*2797Sjg #include <sys/hwconf.h> 36*2797Sjg #include <sys/sunddi.h> 37*2797Sjg #include <sys/sunndi.h> 38*2797Sjg #include <sys/ddi_impldefs.h> 39*2797Sjg #include <sys/ndi_impldefs.h> 40*2797Sjg #include <sys/modctl.h> 41*2797Sjg #include <sys/dacf.h> 42*2797Sjg #include <sys/promif.h> 43*2797Sjg #include <sys/cpuvar.h> 44*2797Sjg #include <sys/pathname.h> 45*2797Sjg #include <sys/kobj.h> 46*2797Sjg #include <sys/devcache.h> 47*2797Sjg #include <sys/devcache_impl.h> 48*2797Sjg #include <sys/sysmacros.h> 49*2797Sjg #include <sys/varargs.h> 50*2797Sjg #include <sys/callb.h> 51*2797Sjg 52*2797Sjg /* 53*2797Sjg * This facility provides interfaces to clients to register, 54*2797Sjg * read and update cache data in persisted backing store files, 55*2797Sjg * usually in /etc/devices. The data persisted through this 56*2797Sjg * mechanism should be stateless data, functioning in the sense 57*2797Sjg * of a cache. Writes are performed by a background daemon 58*2797Sjg * thread, permitting a client to schedule an update without 59*2797Sjg * blocking, then continue updating the data state in 60*2797Sjg * parallel. The data is only locked by the daemon thread 61*2797Sjg * to pack the data in preparation for the write. 62*2797Sjg * 63*2797Sjg * Data persisted through this mechanism should be capable 64*2797Sjg * of being regenerated through normal system operation, 65*2797Sjg * for example attaching all disk devices would cause all 66*2797Sjg * devids to be registered for those devices. By caching 67*2797Sjg * a devid-device tuple, the system can operate in a 68*2797Sjg * more optimal way, directly attaching the device mapped 69*2797Sjg * to a devid, rather than burdensomely driving attach of 70*2797Sjg * the entire device tree to discover a single device. 71*2797Sjg * 72*2797Sjg * Note that a client should only need to include 73*2797Sjg * <sys/devcache.h> for the supported interfaces. 74*2797Sjg * 75*2797Sjg * The data per client is entirely within the control of 76*2797Sjg * the client. When reading, data unpacked from the backing 77*2797Sjg * store should be inserted in the list. The pointer to 78*2797Sjg * the list can be retreived via nvf_list(). When writing, 79*2797Sjg * the data on the list is to be packed and returned to the 80*2797Sjg * nvpdaemon as an nvlist. 81*2797Sjg * 82*2797Sjg * Obvious restrictions are imposed by the limits of the 83*2797Sjg * nvlist format. The data cannot be read or written 84*2797Sjg * piecemeal, and large amounts of data aren't recommended. 85*2797Sjg * However, nvlists do allow that data be named and typed 86*2797Sjg * and can be size-of-int invariant, and the cached data 87*2797Sjg * can be versioned conveniently. 88*2797Sjg * 89*2797Sjg * The registration involves two steps: a handle is 90*2797Sjg * allocated by calling the registration function. 91*2797Sjg * This sets up the data referenced by the handle and 92*2797Sjg * initializes the lock. Following registration, the 93*2797Sjg * client must initialize the data list. The list 94*2797Sjg * interfaces require that the list element with offset 95*2797Sjg * to the node link be provided. The format of the 96*2797Sjg * list element is under the control of the client. 97*2797Sjg * 98*2797Sjg * Locking: the address of the data list r/w lock provided 99*2797Sjg * can be accessed with nvf_lock(). The lock must be held 100*2797Sjg * as reader when traversing the list or checking state, 101*2797Sjg * such as nvf_is_dirty(). The lock must be held as 102*2797Sjg * writer when updating the list or marking it dirty. 103*2797Sjg * The lock must not be held when waking the daemon. 104*2797Sjg * 105*2797Sjg * The data r/w lock is held as writer when the pack, 106*2797Sjg * unpack and free list handlers are called. The 107*2797Sjg * lock should not be dropped and must be still held 108*2797Sjg * upon return. The client should also hold the lock 109*2797Sjg * as reader when checking if the list is dirty, and 110*2797Sjg * as writer when marking the list dirty or initiating 111*2797Sjg * a read. 112*2797Sjg * 113*2797Sjg * The asynchronous nature of updates allows for the 114*2797Sjg * possibility that the data may continue to be updated 115*2797Sjg * once the daemon has been notified that an update is 116*2797Sjg * desired. The data only needs to be locked against 117*2797Sjg * updates when packing the data into the form to be 118*2797Sjg * written. When the write of the packed data has 119*2797Sjg * completed, the daemon will automatically reschedule 120*2797Sjg * an update if the data was marked dirty after the 121*2797Sjg * point at which it was packed. Before beginning an 122*2797Sjg * update, the daemon attempts to lock the data as 123*2797Sjg * writer; if the writer lock is already held, it 124*2797Sjg * backs off and retries later. The model is to give 125*2797Sjg * priority to the kernel processes generating the 126*2797Sjg * data, and that the nature of the data is that 127*2797Sjg * it does not change often, can be re-generated when 128*2797Sjg * needed, so updates should not happen often and 129*2797Sjg * can be delayed until the data stops changing. 130*2797Sjg * The client may update the list or mark it dirty 131*2797Sjg * any time it is able to acquire the lock as 132*2797Sjg * writer first. 133*2797Sjg * 134*2797Sjg * A failed write will be retried after some delay, 135*2797Sjg * in the hope that the cause of the error will be 136*2797Sjg * transient, for example a filesystem with no space 137*2797Sjg * available. An update on a read-only filesystem 138*2797Sjg * is failed silently and not retried; this would be 139*2797Sjg * the case when booted off install media. 140*2797Sjg * 141*2797Sjg * There is no unregister mechanism as of yet, as it 142*2797Sjg * hasn't been needed so far. 143*2797Sjg */ 144*2797Sjg 145*2797Sjg /* 146*2797Sjg * Global list of files registered and updated by the nvpflush 147*2797Sjg * daemon, protected by the nvf_cache_mutex. While an 148*2797Sjg * update is taking place, a file is temporarily moved to 149*2797Sjg * the dirty list to avoid locking the primary list for 150*2797Sjg * the duration of the update. 151*2797Sjg */ 152*2797Sjg list_t nvf_cache_files; 153*2797Sjg list_t nvf_dirty_files; 154*2797Sjg kmutex_t nvf_cache_mutex; 155*2797Sjg 156*2797Sjg 157*2797Sjg /* 158*2797Sjg * Allow some delay from an update of the data before flushing 159*2797Sjg * to permit simultaneous updates of multiple changes. 160*2797Sjg * Changes in the data are expected to be bursty, ie 161*2797Sjg * reconfig or hot-plug of a new adapter. 162*2797Sjg * 163*2797Sjg * kfio_report_error (default 0) 164*2797Sjg * Set to 1 to enable some error messages related to low-level 165*2797Sjg * kernel file i/o operations. 166*2797Sjg * 167*2797Sjg * nvpflush_delay (default 10) 168*2797Sjg * The number of seconds after data is marked dirty before the 169*2797Sjg * flush daemon is triggered to flush the data. A longer period 170*2797Sjg * of time permits more data updates per write. Note that 171*2797Sjg * every update resets the timer so no repository write will 172*2797Sjg * occur while data is being updated continuously. 173*2797Sjg * 174*2797Sjg * nvpdaemon_idle_time (default 60) 175*2797Sjg * The number of seconds the daemon will sleep idle before exiting. 176*2797Sjg * 177*2797Sjg */ 178*2797Sjg #define NVPFLUSH_DELAY 10 179*2797Sjg #define NVPDAEMON_IDLE_TIME 60 180*2797Sjg 181*2797Sjg #define TICKS_PER_SECOND (drv_usectohz(1000000)) 182*2797Sjg 183*2797Sjg /* 184*2797Sjg * Tunables 185*2797Sjg */ 186*2797Sjg int kfio_report_error = 0; /* kernel file i/o operations */ 187*2797Sjg int kfio_disable_read = 0; /* disable all reads */ 188*2797Sjg int kfio_disable_write = 0; /* disable all writes */ 189*2797Sjg 190*2797Sjg int nvpflush_delay = NVPFLUSH_DELAY; 191*2797Sjg int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME; 192*2797Sjg 193*2797Sjg static timeout_id_t nvpflush_id = 0; 194*2797Sjg static int nvpflush_timer_busy = 0; 195*2797Sjg static int nvpflush_daemon_active = 0; 196*2797Sjg static kthread_t *nvpflush_thr_id = 0; 197*2797Sjg 198*2797Sjg static int do_nvpflush = 0; 199*2797Sjg static int nvpbusy = 0; 200*2797Sjg static kmutex_t nvpflush_lock; 201*2797Sjg static kcondvar_t nvpflush_cv; 202*2797Sjg static kthread_id_t nvpflush_thread; 203*2797Sjg static clock_t nvpticks; 204*2797Sjg 205*2797Sjg static void nvpflush_daemon(void); 206*2797Sjg 207*2797Sjg #ifdef DEBUG 208*2797Sjg int nvpdaemon_debug = 0; 209*2797Sjg int kfio_debug = 0; 210*2797Sjg #endif /* DEBUG */ 211*2797Sjg 212*2797Sjg extern int modrootloaded; 213*2797Sjg extern void mdi_read_devices_files(void); 214*2797Sjg extern void mdi_clean_vhcache(void); 215*2797Sjg 216*2797Sjg /* 217*2797Sjg * Initialize the overall cache file management 218*2797Sjg */ 219*2797Sjg void 220*2797Sjg i_ddi_devices_init(void) 221*2797Sjg { 222*2797Sjg list_create(&nvf_cache_files, sizeof (nvfd_t), 223*2797Sjg offsetof(nvfd_t, nvf_link)); 224*2797Sjg list_create(&nvf_dirty_files, sizeof (nvfd_t), 225*2797Sjg offsetof(nvfd_t, nvf_link)); 226*2797Sjg mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL); 227*2797Sjg devid_cache_init(); 228*2797Sjg } 229*2797Sjg 230*2797Sjg /* 231*2797Sjg * Read cache files 232*2797Sjg * The files read here should be restricted to those 233*2797Sjg * that may be required to mount root. 234*2797Sjg */ 235*2797Sjg void 236*2797Sjg i_ddi_read_devices_files(void) 237*2797Sjg { 238*2797Sjg if (!kfio_disable_read) { 239*2797Sjg mdi_read_devices_files(); 240*2797Sjg devid_cache_read(); 241*2797Sjg } 242*2797Sjg } 243*2797Sjg 244*2797Sjg void 245*2797Sjg i_ddi_start_flush_daemon(void) 246*2797Sjg { 247*2797Sjg nvfd_t *nvfdp; 248*2797Sjg 249*2797Sjg ASSERT(i_ddi_io_initialized()); 250*2797Sjg 251*2797Sjg mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL); 252*2797Sjg cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL); 253*2797Sjg 254*2797Sjg mutex_enter(&nvf_cache_mutex); 255*2797Sjg for (nvfdp = list_head(&nvf_cache_files); nvfdp; 256*2797Sjg nvfdp = list_next(&nvf_cache_files, nvfdp)) { 257*2797Sjg if (NVF_IS_DIRTY(nvfdp)) { 258*2797Sjg nvf_wake_daemon(); 259*2797Sjg break; 260*2797Sjg } 261*2797Sjg } 262*2797Sjg mutex_exit(&nvf_cache_mutex); 263*2797Sjg } 264*2797Sjg 265*2797Sjg void 266*2797Sjg i_ddi_clean_devices_files(void) 267*2797Sjg { 268*2797Sjg devid_cache_cleanup(); 269*2797Sjg mdi_clean_vhcache(); 270*2797Sjg } 271*2797Sjg 272*2797Sjg /* 273*2797Sjg * Register a cache file to be managed and updated by the nvpflush daemon. 274*2797Sjg * All operations are performed through the returned handle. 275*2797Sjg * There is no unregister mechanism for now. 276*2797Sjg */ 277*2797Sjg nvf_handle_t 278*2797Sjg nvf_register_file(nvf_ops_t *ops) 279*2797Sjg { 280*2797Sjg nvfd_t *nvfdp; 281*2797Sjg 282*2797Sjg nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP); 283*2797Sjg 284*2797Sjg nvfdp->nvf_ops = ops; 285*2797Sjg nvfdp->nvf_flags = 0; 286*2797Sjg rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL); 287*2797Sjg 288*2797Sjg mutex_enter(&nvf_cache_mutex); 289*2797Sjg list_insert_tail(&nvf_cache_files, nvfdp); 290*2797Sjg mutex_exit(&nvf_cache_mutex); 291*2797Sjg 292*2797Sjg return ((nvf_handle_t)nvfdp); 293*2797Sjg } 294*2797Sjg 295*2797Sjg /*PRINTFLIKE1*/ 296*2797Sjg void 297*2797Sjg nvf_error(const char *fmt, ...) 298*2797Sjg { 299*2797Sjg va_list ap; 300*2797Sjg 301*2797Sjg if (kfio_report_error) { 302*2797Sjg va_start(ap, fmt); 303*2797Sjg vcmn_err(CE_NOTE, fmt, ap); 304*2797Sjg va_end(ap); 305*2797Sjg } 306*2797Sjg } 307*2797Sjg 308*2797Sjg /* 309*2797Sjg * Some operations clients may use to manage the data 310*2797Sjg * to be persisted in a cache file. 311*2797Sjg */ 312*2797Sjg char * 313*2797Sjg nvf_cache_name(nvf_handle_t handle) 314*2797Sjg { 315*2797Sjg return (((nvfd_t *)handle)->nvf_cache_path); 316*2797Sjg } 317*2797Sjg 318*2797Sjg krwlock_t * 319*2797Sjg nvf_lock(nvf_handle_t handle) 320*2797Sjg { 321*2797Sjg return (&(((nvfd_t *)handle)->nvf_lock)); 322*2797Sjg } 323*2797Sjg 324*2797Sjg list_t * 325*2797Sjg nvf_list(nvf_handle_t handle) 326*2797Sjg { 327*2797Sjg return (&(((nvfd_t *)handle)->nvf_data_list)); 328*2797Sjg } 329*2797Sjg 330*2797Sjg void 331*2797Sjg nvf_mark_dirty(nvf_handle_t handle) 332*2797Sjg { 333*2797Sjg ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock))); 334*2797Sjg NVF_MARK_DIRTY((nvfd_t *)handle); 335*2797Sjg } 336*2797Sjg 337*2797Sjg int 338*2797Sjg nvf_is_dirty(nvf_handle_t handle) 339*2797Sjg { 340*2797Sjg ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock))); 341*2797Sjg return (NVF_IS_DIRTY((nvfd_t *)handle)); 342*2797Sjg } 343*2797Sjg 344*2797Sjg static uint16_t 345*2797Sjg nvp_cksum(uchar_t *buf, int64_t buflen) 346*2797Sjg { 347*2797Sjg uint16_t cksum = 0; 348*2797Sjg uint16_t *p = (uint16_t *)buf; 349*2797Sjg int64_t n; 350*2797Sjg 351*2797Sjg if ((buflen & 0x01) != 0) { 352*2797Sjg buflen--; 353*2797Sjg cksum = buf[buflen]; 354*2797Sjg } 355*2797Sjg n = buflen / 2; 356*2797Sjg while (n-- > 0) 357*2797Sjg cksum ^= *p++; 358*2797Sjg return (cksum); 359*2797Sjg } 360*2797Sjg 361*2797Sjg int 362*2797Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist) 363*2797Sjg { 364*2797Sjg struct _buf *file; 365*2797Sjg nvpf_hdr_t hdr; 366*2797Sjg char *buf; 367*2797Sjg nvlist_t *nvl; 368*2797Sjg int rval; 369*2797Sjg uint_t offset; 370*2797Sjg int n; 371*2797Sjg char c; 372*2797Sjg uint16_t cksum, hdrsum; 373*2797Sjg 374*2797Sjg *ret_nvlist = NULL; 375*2797Sjg 376*2797Sjg file = kobj_open_file(filename); 377*2797Sjg if (file == (struct _buf *)-1) { 378*2797Sjg KFDEBUG((CE_CONT, "cannot open file: %s\n", filename)); 379*2797Sjg return (ENOENT); 380*2797Sjg } 381*2797Sjg 382*2797Sjg offset = 0; 383*2797Sjg n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset); 384*2797Sjg if (n != sizeof (hdr)) { 385*2797Sjg kobj_close_file(file); 386*2797Sjg if (n < 0) { 387*2797Sjg nvf_error("error reading header: %s\n", filename); 388*2797Sjg return (EIO); 389*2797Sjg } else if (n == 0) { 390*2797Sjg KFDEBUG((CE_CONT, "file empty: %s\n", filename)); 391*2797Sjg } else { 392*2797Sjg nvf_error("header size incorrect: %s\n", filename); 393*2797Sjg } 394*2797Sjg return (EINVAL); 395*2797Sjg } 396*2797Sjg offset += n; 397*2797Sjg 398*2797Sjg KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic)); 399*2797Sjg KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version)); 400*2797Sjg KFDEBUG2((CE_CONT, "nvpf_size: %lld\n", 401*2797Sjg (longlong_t)hdr.nvpf_size)); 402*2797Sjg KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n", 403*2797Sjg hdr.nvpf_hdr_chksum)); 404*2797Sjg KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum)); 405*2797Sjg 406*2797Sjg cksum = hdr.nvpf_hdr_chksum; 407*2797Sjg hdr.nvpf_hdr_chksum = 0; 408*2797Sjg hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr)); 409*2797Sjg 410*2797Sjg if (hdr.nvpf_magic != NVPF_HDR_MAGIC || 411*2797Sjg hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) { 412*2797Sjg kobj_close_file(file); 413*2797Sjg if (hdrsum != cksum) { 414*2797Sjg nvf_error("%s: checksum error " 415*2797Sjg "(actual 0x%x, expected 0x%x)\n", 416*2797Sjg filename, hdrsum, cksum); 417*2797Sjg } 418*2797Sjg nvf_error("%s: header information incorrect", filename); 419*2797Sjg return (EINVAL); 420*2797Sjg } 421*2797Sjg 422*2797Sjg ASSERT(hdr.nvpf_size >= 0); 423*2797Sjg 424*2797Sjg buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP); 425*2797Sjg n = kobj_read_file(file, buf, hdr.nvpf_size, offset); 426*2797Sjg if (n != hdr.nvpf_size) { 427*2797Sjg kmem_free(buf, hdr.nvpf_size); 428*2797Sjg kobj_close_file(file); 429*2797Sjg if (n < 0) { 430*2797Sjg nvf_error("%s: read error %d", filename, n); 431*2797Sjg } else { 432*2797Sjg nvf_error("%s: incomplete read %d/%lld", 433*2797Sjg filename, n, (longlong_t)hdr.nvpf_size); 434*2797Sjg } 435*2797Sjg return (EINVAL); 436*2797Sjg } 437*2797Sjg offset += n; 438*2797Sjg 439*2797Sjg rval = kobj_read_file(file, &c, 1, offset); 440*2797Sjg kobj_close_file(file); 441*2797Sjg if (rval > 0) { 442*2797Sjg nvf_error("%s is larger than %lld\n", 443*2797Sjg filename, (longlong_t)hdr.nvpf_size); 444*2797Sjg kmem_free(buf, hdr.nvpf_size); 445*2797Sjg return (EINVAL); 446*2797Sjg } 447*2797Sjg 448*2797Sjg cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size); 449*2797Sjg if (hdr.nvpf_chksum != cksum) { 450*2797Sjg nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n", 451*2797Sjg filename, hdr.nvpf_chksum, cksum); 452*2797Sjg kmem_free(buf, hdr.nvpf_size); 453*2797Sjg return (EINVAL); 454*2797Sjg } 455*2797Sjg 456*2797Sjg nvl = NULL; 457*2797Sjg rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0); 458*2797Sjg if (rval != 0) { 459*2797Sjg nvf_error("%s: error %d unpacking nvlist\n", 460*2797Sjg filename, rval); 461*2797Sjg kmem_free(buf, hdr.nvpf_size); 462*2797Sjg return (EINVAL); 463*2797Sjg } 464*2797Sjg 465*2797Sjg kmem_free(buf, hdr.nvpf_size); 466*2797Sjg *ret_nvlist = nvl; 467*2797Sjg return (0); 468*2797Sjg } 469*2797Sjg 470*2797Sjg static int 471*2797Sjg kfcreate(char *filename, kfile_t **kfilep) 472*2797Sjg { 473*2797Sjg kfile_t *fp; 474*2797Sjg int rval; 475*2797Sjg 476*2797Sjg ASSERT(modrootloaded); 477*2797Sjg 478*2797Sjg fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP); 479*2797Sjg 480*2797Sjg fp->kf_vnflags = FCREAT | FWRITE | FTRUNC; 481*2797Sjg fp->kf_fname = filename; 482*2797Sjg fp->kf_fpos = 0; 483*2797Sjg fp->kf_state = 0; 484*2797Sjg 485*2797Sjg KFDEBUG((CE_CONT, "create: %s flags 0x%x\n", 486*2797Sjg filename, fp->kf_vnflags)); 487*2797Sjg rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags, 488*2797Sjg 0444, &fp->kf_vp, CRCREAT, 0); 489*2797Sjg if (rval != 0) { 490*2797Sjg kmem_free(fp, sizeof (kfile_t)); 491*2797Sjg KFDEBUG((CE_CONT, "%s: create error %d\n", 492*2797Sjg filename, rval)); 493*2797Sjg return (rval); 494*2797Sjg } 495*2797Sjg 496*2797Sjg *kfilep = fp; 497*2797Sjg return (0); 498*2797Sjg } 499*2797Sjg 500*2797Sjg static int 501*2797Sjg kfremove(char *filename) 502*2797Sjg { 503*2797Sjg int rval; 504*2797Sjg 505*2797Sjg KFDEBUG((CE_CONT, "remove: %s\n", filename)); 506*2797Sjg rval = vn_remove(filename, UIO_SYSSPACE, RMFILE); 507*2797Sjg if (rval != 0) { 508*2797Sjg KFDEBUG((CE_CONT, "%s: remove error %d\n", 509*2797Sjg filename, rval)); 510*2797Sjg } 511*2797Sjg return (rval); 512*2797Sjg } 513*2797Sjg 514*2797Sjg static int 515*2797Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 516*2797Sjg { 517*2797Sjg ssize_t resid; 518*2797Sjg int err; 519*2797Sjg ssize_t n; 520*2797Sjg 521*2797Sjg ASSERT(modrootloaded); 522*2797Sjg 523*2797Sjg if (fp->kf_state != 0) 524*2797Sjg return (fp->kf_state); 525*2797Sjg 526*2797Sjg err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos, 527*2797Sjg UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid); 528*2797Sjg if (err != 0) { 529*2797Sjg KFDEBUG((CE_CONT, "%s: read error %d\n", 530*2797Sjg fp->kf_fname, err)); 531*2797Sjg fp->kf_state = err; 532*2797Sjg return (err); 533*2797Sjg } 534*2797Sjg 535*2797Sjg ASSERT(resid >= 0 && resid <= bufsiz); 536*2797Sjg n = bufsiz - resid; 537*2797Sjg 538*2797Sjg KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n", 539*2797Sjg fp->kf_fname, n, bufsiz, resid)); 540*2797Sjg 541*2797Sjg fp->kf_fpos += n; 542*2797Sjg *ret_n = n; 543*2797Sjg return (0); 544*2797Sjg } 545*2797Sjg 546*2797Sjg static int 547*2797Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 548*2797Sjg { 549*2797Sjg rlim64_t rlimit; 550*2797Sjg ssize_t resid; 551*2797Sjg int err; 552*2797Sjg ssize_t len; 553*2797Sjg ssize_t n = 0; 554*2797Sjg 555*2797Sjg ASSERT(modrootloaded); 556*2797Sjg 557*2797Sjg if (fp->kf_state != 0) 558*2797Sjg return (fp->kf_state); 559*2797Sjg 560*2797Sjg len = bufsiz; 561*2797Sjg rlimit = bufsiz + 1; 562*2797Sjg for (;;) { 563*2797Sjg err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos, 564*2797Sjg UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 565*2797Sjg if (err) { 566*2797Sjg KFDEBUG((CE_CONT, "%s: write error %d\n", 567*2797Sjg fp->kf_fname, err)); 568*2797Sjg fp->kf_state = err; 569*2797Sjg return (err); 570*2797Sjg } 571*2797Sjg 572*2797Sjg KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n", 573*2797Sjg fp->kf_fname, len-resid, resid)); 574*2797Sjg 575*2797Sjg ASSERT(resid >= 0 && resid <= len); 576*2797Sjg 577*2797Sjg n += (len - resid); 578*2797Sjg if (resid == 0) 579*2797Sjg break; 580*2797Sjg 581*2797Sjg if (resid == len) { 582*2797Sjg KFDEBUG((CE_CONT, "%s: filesystem full?\n", 583*2797Sjg fp->kf_fname)); 584*2797Sjg fp->kf_state = ENOSPC; 585*2797Sjg return (ENOSPC); 586*2797Sjg } 587*2797Sjg 588*2797Sjg len -= resid; 589*2797Sjg buf += len; 590*2797Sjg fp->kf_fpos += len; 591*2797Sjg len = resid; 592*2797Sjg } 593*2797Sjg 594*2797Sjg ASSERT(n == bufsiz); 595*2797Sjg KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n)); 596*2797Sjg 597*2797Sjg *ret_n = n; 598*2797Sjg return (0); 599*2797Sjg } 600*2797Sjg 601*2797Sjg 602*2797Sjg static int 603*2797Sjg kfclose(kfile_t *fp) 604*2797Sjg { 605*2797Sjg int rval; 606*2797Sjg 607*2797Sjg KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname)); 608*2797Sjg 609*2797Sjg if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) { 610*2797Sjg rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred); 611*2797Sjg if (rval != 0) { 612*2797Sjg nvf_error("%s: sync error %d\n", 613*2797Sjg fp->kf_fname, rval); 614*2797Sjg } 615*2797Sjg KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname)); 616*2797Sjg } 617*2797Sjg 618*2797Sjg rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1, (offset_t)0, kcred); 619*2797Sjg if (rval != 0) { 620*2797Sjg if (fp->kf_state == 0) { 621*2797Sjg nvf_error("%s: close error %d\n", 622*2797Sjg fp->kf_fname, rval); 623*2797Sjg } 624*2797Sjg } else { 625*2797Sjg if (fp->kf_state == 0) 626*2797Sjg KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname)); 627*2797Sjg } 628*2797Sjg 629*2797Sjg VN_RELE(fp->kf_vp); 630*2797Sjg kmem_free(fp, sizeof (kfile_t)); 631*2797Sjg return (rval); 632*2797Sjg } 633*2797Sjg 634*2797Sjg static int 635*2797Sjg kfrename(char *oldname, char *newname) 636*2797Sjg { 637*2797Sjg int rval; 638*2797Sjg 639*2797Sjg ASSERT(modrootloaded); 640*2797Sjg 641*2797Sjg KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname)); 642*2797Sjg 643*2797Sjg if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) { 644*2797Sjg KFDEBUG((CE_CONT, "rename %s to %s: %d\n", 645*2797Sjg oldname, newname, rval)); 646*2797Sjg } 647*2797Sjg 648*2797Sjg return (rval); 649*2797Sjg } 650*2797Sjg 651*2797Sjg int 652*2797Sjg fwrite_nvlist(char *filename, nvlist_t *nvl) 653*2797Sjg { 654*2797Sjg char *buf; 655*2797Sjg char *nvbuf; 656*2797Sjg kfile_t *fp; 657*2797Sjg char *newname; 658*2797Sjg int len, err, err1; 659*2797Sjg size_t buflen; 660*2797Sjg ssize_t n; 661*2797Sjg 662*2797Sjg ASSERT(modrootloaded); 663*2797Sjg 664*2797Sjg nvbuf = NULL; 665*2797Sjg err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0); 666*2797Sjg if (err != 0) { 667*2797Sjg nvf_error("%s: error %d packing nvlist\n", 668*2797Sjg filename, err); 669*2797Sjg return (err); 670*2797Sjg } 671*2797Sjg 672*2797Sjg buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP); 673*2797Sjg bzero(buf, sizeof (nvpf_hdr_t)); 674*2797Sjg 675*2797Sjg ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC; 676*2797Sjg ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION; 677*2797Sjg ((nvpf_hdr_t *)buf)->nvpf_size = buflen; 678*2797Sjg ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen); 679*2797Sjg ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum = 680*2797Sjg nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t)); 681*2797Sjg 682*2797Sjg bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen); 683*2797Sjg kmem_free(nvbuf, buflen); 684*2797Sjg buflen += sizeof (nvpf_hdr_t); 685*2797Sjg 686*2797Sjg len = strlen(filename) + MAX_SUFFIX_LEN + 2; 687*2797Sjg newname = kmem_alloc(len, KM_SLEEP); 688*2797Sjg 689*2797Sjg 690*2797Sjg (void) sprintf(newname, "%s.%s", 691*2797Sjg filename, NEW_FILENAME_SUFFIX); 692*2797Sjg 693*2797Sjg /* 694*2797Sjg * To make it unlikely we suffer data loss, write 695*2797Sjg * data to the new temporary file. Once successful 696*2797Sjg * complete the transaction by renaming the new file 697*2797Sjg * to replace the previous. 698*2797Sjg */ 699*2797Sjg 700*2797Sjg if ((err = kfcreate(newname, &fp)) == 0) { 701*2797Sjg err = kfwrite(fp, buf, buflen, &n); 702*2797Sjg if (err) { 703*2797Sjg nvf_error("%s: write error - %d\n", 704*2797Sjg newname, err); 705*2797Sjg } else { 706*2797Sjg if (n != buflen) { 707*2797Sjg nvf_error( 708*2797Sjg "%s: partial write %ld of %ld bytes\n", 709*2797Sjg newname, n, buflen); 710*2797Sjg nvf_error("%s: filesystem may be full?\n", 711*2797Sjg newname); 712*2797Sjg err = EIO; 713*2797Sjg } 714*2797Sjg } 715*2797Sjg if ((err1 = kfclose(fp)) != 0) { 716*2797Sjg nvf_error("%s: close error\n", newname); 717*2797Sjg if (err == 0) 718*2797Sjg err = err1; 719*2797Sjg } 720*2797Sjg if (err != 0) { 721*2797Sjg if (kfremove(newname) != 0) { 722*2797Sjg nvf_error("%s: remove failed\n", 723*2797Sjg newname); 724*2797Sjg } 725*2797Sjg } 726*2797Sjg } else { 727*2797Sjg nvf_error("%s: create failed - %d\n", filename, err); 728*2797Sjg } 729*2797Sjg 730*2797Sjg if (err == 0) { 731*2797Sjg if ((err = kfrename(newname, filename)) != 0) { 732*2797Sjg nvf_error("%s: rename from %s failed\n", 733*2797Sjg newname, filename); 734*2797Sjg } 735*2797Sjg } 736*2797Sjg 737*2797Sjg kmem_free(newname, len); 738*2797Sjg kmem_free(buf, buflen); 739*2797Sjg 740*2797Sjg return (err); 741*2797Sjg } 742*2797Sjg 743*2797Sjg static int 744*2797Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl) 745*2797Sjg { 746*2797Sjg int err; 747*2797Sjg 748*2797Sjg if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0) 749*2797Sjg return (DDI_SUCCESS); 750*2797Sjg else { 751*2797Sjg if (err == EROFS) 752*2797Sjg NVF_MARK_READONLY(nvfd); 753*2797Sjg return (DDI_FAILURE); 754*2797Sjg } 755*2797Sjg } 756*2797Sjg 757*2797Sjg static void 758*2797Sjg nvp_list_free(nvfd_t *nvf) 759*2797Sjg { 760*2797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 761*2797Sjg (nvf->nvf_list_free)((nvf_handle_t)nvf); 762*2797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 763*2797Sjg } 764*2797Sjg 765*2797Sjg /* 766*2797Sjg * Read a file in the nvlist format 767*2797Sjg * EIO - i/o error during read 768*2797Sjg * ENOENT - file not found 769*2797Sjg * EINVAL - file contents corrupted 770*2797Sjg */ 771*2797Sjg static int 772*2797Sjg fread_nvp_list(nvfd_t *nvfd) 773*2797Sjg { 774*2797Sjg nvlist_t *nvl; 775*2797Sjg nvpair_t *nvp; 776*2797Sjg char *name; 777*2797Sjg nvlist_t *sublist; 778*2797Sjg int rval; 779*2797Sjg int rv; 780*2797Sjg 781*2797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 782*2797Sjg 783*2797Sjg rval = fread_nvlist(nvfd->nvf_cache_path, &nvl); 784*2797Sjg if (rval != 0) 785*2797Sjg return (rval); 786*2797Sjg ASSERT(nvl != NULL); 787*2797Sjg 788*2797Sjg nvp = NULL; 789*2797Sjg while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 790*2797Sjg name = nvpair_name(nvp); 791*2797Sjg ASSERT(strlen(name) > 0); 792*2797Sjg 793*2797Sjg switch (nvpair_type(nvp)) { 794*2797Sjg case DATA_TYPE_NVLIST: 795*2797Sjg rval = nvpair_value_nvlist(nvp, &sublist); 796*2797Sjg if (rval != 0) { 797*2797Sjg nvf_error( 798*2797Sjg "nvpair_value_nvlist error %s %d\n", 799*2797Sjg name, rval); 800*2797Sjg goto error; 801*2797Sjg } 802*2797Sjg 803*2797Sjg /* 804*2797Sjg * unpack nvlist for this device and 805*2797Sjg * add elements to data list. 806*2797Sjg */ 807*2797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 808*2797Sjg rv = (nvfd->nvf_unpack_nvlist) 809*2797Sjg ((nvf_handle_t)nvfd, sublist, name); 810*2797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 811*2797Sjg if (rv != 0) { 812*2797Sjg nvf_error( 813*2797Sjg "%s: %s invalid list element\n", 814*2797Sjg nvfd->nvf_cache_path, name); 815*2797Sjg rval = EINVAL; 816*2797Sjg goto error; 817*2797Sjg } 818*2797Sjg break; 819*2797Sjg 820*2797Sjg default: 821*2797Sjg nvf_error("%s: %s unsupported data type %d\n", 822*2797Sjg nvfd->nvf_cache_path, name, nvpair_type(nvp)); 823*2797Sjg rval = EINVAL; 824*2797Sjg goto error; 825*2797Sjg } 826*2797Sjg } 827*2797Sjg 828*2797Sjg nvlist_free(nvl); 829*2797Sjg 830*2797Sjg return (0); 831*2797Sjg 832*2797Sjg error: 833*2797Sjg nvlist_free(nvl); 834*2797Sjg nvp_list_free(nvfd); 835*2797Sjg return (rval); 836*2797Sjg } 837*2797Sjg 838*2797Sjg 839*2797Sjg int 840*2797Sjg nvf_read_file(nvf_handle_t nvf_handle) 841*2797Sjg { 842*2797Sjg nvfd_t *nvfd = (nvfd_t *)nvf_handle; 843*2797Sjg int rval; 844*2797Sjg 845*2797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 846*2797Sjg 847*2797Sjg if (kfio_disable_read) 848*2797Sjg return (0); 849*2797Sjg 850*2797Sjg KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path)); 851*2797Sjg 852*2797Sjg rval = fread_nvp_list(nvfd); 853*2797Sjg if (rval) { 854*2797Sjg switch (rval) { 855*2797Sjg case EIO: 856*2797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 857*2797Sjg cmn_err(CE_WARN, "%s: I/O error", 858*2797Sjg nvfd->nvf_cache_path); 859*2797Sjg break; 860*2797Sjg case ENOENT: 861*2797Sjg nvfd->nvf_flags |= NVF_F_CREATE_MSG; 862*2797Sjg nvf_error("%s: not found\n", 863*2797Sjg nvfd->nvf_cache_path); 864*2797Sjg break; 865*2797Sjg case EINVAL: 866*2797Sjg default: 867*2797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 868*2797Sjg cmn_err(CE_WARN, "%s: data file corrupted", 869*2797Sjg nvfd->nvf_cache_path); 870*2797Sjg break; 871*2797Sjg } 872*2797Sjg } 873*2797Sjg return (rval); 874*2797Sjg } 875*2797Sjg 876*2797Sjg static void 877*2797Sjg nvf_write_is_complete(nvfd_t *fd) 878*2797Sjg { 879*2797Sjg if (fd->nvf_write_complete) { 880*2797Sjg (fd->nvf_write_complete)((nvf_handle_t)fd); 881*2797Sjg } 882*2797Sjg } 883*2797Sjg 884*2797Sjg /*ARGSUSED*/ 885*2797Sjg static void 886*2797Sjg nvpflush_timeout(void *arg) 887*2797Sjg { 888*2797Sjg clock_t nticks; 889*2797Sjg 890*2797Sjg mutex_enter(&nvpflush_lock); 891*2797Sjg nticks = nvpticks - ddi_get_lbolt(); 892*2797Sjg if (nticks > 4) { 893*2797Sjg nvpflush_timer_busy = 1; 894*2797Sjg mutex_exit(&nvpflush_lock); 895*2797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks); 896*2797Sjg } else { 897*2797Sjg do_nvpflush = 1; 898*2797Sjg NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n")); 899*2797Sjg cv_signal(&nvpflush_cv); 900*2797Sjg nvpflush_id = 0; 901*2797Sjg nvpflush_timer_busy = 0; 902*2797Sjg mutex_exit(&nvpflush_lock); 903*2797Sjg } 904*2797Sjg } 905*2797Sjg 906*2797Sjg /* 907*2797Sjg * After marking a list as dirty, wake the nvpflush daemon 908*2797Sjg * to perform the update. 909*2797Sjg */ 910*2797Sjg void 911*2797Sjg nvf_wake_daemon(void) 912*2797Sjg { 913*2797Sjg clock_t nticks; 914*2797Sjg 915*2797Sjg /* 916*2797Sjg * If the system isn't up yet 917*2797Sjg * don't even think about starting a flush. 918*2797Sjg */ 919*2797Sjg if (!i_ddi_io_initialized()) 920*2797Sjg return; 921*2797Sjg 922*2797Sjg mutex_enter(&nvpflush_lock); 923*2797Sjg 924*2797Sjg if (nvpflush_daemon_active == 0) { 925*2797Sjg nvpflush_daemon_active = 1; 926*2797Sjg mutex_exit(&nvpflush_lock); 927*2797Sjg NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n")); 928*2797Sjg nvpflush_thr_id = thread_create(NULL, 0, 929*2797Sjg (void (*)())nvpflush_daemon, 930*2797Sjg NULL, 0, &p0, TS_RUN, minclsyspri); 931*2797Sjg mutex_enter(&nvpflush_lock); 932*2797Sjg } 933*2797Sjg 934*2797Sjg nticks = nvpflush_delay * TICKS_PER_SECOND; 935*2797Sjg nvpticks = ddi_get_lbolt() + nticks; 936*2797Sjg if (nvpflush_timer_busy == 0) { 937*2797Sjg nvpflush_timer_busy = 1; 938*2797Sjg mutex_exit(&nvpflush_lock); 939*2797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4); 940*2797Sjg } else 941*2797Sjg mutex_exit(&nvpflush_lock); 942*2797Sjg } 943*2797Sjg 944*2797Sjg static int 945*2797Sjg nvpflush_one(nvfd_t *nvfd) 946*2797Sjg { 947*2797Sjg int rval = DDI_SUCCESS; 948*2797Sjg nvlist_t *nvl; 949*2797Sjg 950*2797Sjg rw_enter(&nvfd->nvf_lock, RW_READER); 951*2797Sjg 952*2797Sjg ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0); 953*2797Sjg 954*2797Sjg if (!NVF_IS_DIRTY(nvfd) || 955*2797Sjg NVF_IS_READONLY(nvfd) || kfio_disable_write) { 956*2797Sjg NVF_CLEAR_DIRTY(nvfd); 957*2797Sjg rw_exit(&nvfd->nvf_lock); 958*2797Sjg return (DDI_SUCCESS); 959*2797Sjg } 960*2797Sjg 961*2797Sjg if (rw_tryupgrade(&nvfd->nvf_lock) == 0) { 962*2797Sjg nvf_error("nvpflush: " 963*2797Sjg "%s rw upgrade failed\n", nvfd->nvf_cache_path); 964*2797Sjg rw_exit(&nvfd->nvf_lock); 965*2797Sjg return (DDI_FAILURE); 966*2797Sjg } 967*2797Sjg if (((nvfd->nvf_pack_list) 968*2797Sjg ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) { 969*2797Sjg nvf_error("nvpflush: " 970*2797Sjg "%s nvlist construction failed\n", nvfd->nvf_cache_path); 971*2797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 972*2797Sjg rw_exit(&nvfd->nvf_lock); 973*2797Sjg return (DDI_FAILURE); 974*2797Sjg } 975*2797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 976*2797Sjg 977*2797Sjg NVF_CLEAR_DIRTY(nvfd); 978*2797Sjg nvfd->nvf_flags |= NVF_F_FLUSHING; 979*2797Sjg rw_exit(&nvfd->nvf_lock); 980*2797Sjg 981*2797Sjg rval = e_fwrite_nvlist(nvfd, nvl); 982*2797Sjg nvlist_free(nvl); 983*2797Sjg 984*2797Sjg rw_enter(&nvfd->nvf_lock, RW_WRITER); 985*2797Sjg nvfd->nvf_flags &= ~NVF_F_FLUSHING; 986*2797Sjg if (rval == DDI_FAILURE) { 987*2797Sjg if (NVF_IS_READONLY(nvfd)) { 988*2797Sjg rval = DDI_SUCCESS; 989*2797Sjg nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY); 990*2797Sjg } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) { 991*2797Sjg cmn_err(CE_CONT, 992*2797Sjg "%s: updated failed\n", nvfd->nvf_cache_path); 993*2797Sjg nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY; 994*2797Sjg } 995*2797Sjg } else { 996*2797Sjg if (nvfd->nvf_flags & NVF_F_CREATE_MSG) { 997*2797Sjg cmn_err(CE_CONT, 998*2797Sjg "!Creating %s\n", nvfd->nvf_cache_path); 999*2797Sjg nvfd->nvf_flags &= ~NVF_F_CREATE_MSG; 1000*2797Sjg } 1001*2797Sjg if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) { 1002*2797Sjg cmn_err(CE_CONT, 1003*2797Sjg "!Rebuilding %s\n", nvfd->nvf_cache_path); 1004*2797Sjg nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG; 1005*2797Sjg } 1006*2797Sjg if (nvfd->nvf_flags & NVF_F_ERROR) { 1007*2797Sjg cmn_err(CE_CONT, 1008*2797Sjg "%s: update now ok\n", nvfd->nvf_cache_path); 1009*2797Sjg nvfd->nvf_flags &= ~NVF_F_ERROR; 1010*2797Sjg } 1011*2797Sjg /* 1012*2797Sjg * The file may need to be flushed again if the cached 1013*2797Sjg * data was touched while writing the earlier contents. 1014*2797Sjg */ 1015*2797Sjg if (NVF_IS_DIRTY(nvfd)) 1016*2797Sjg rval = DDI_FAILURE; 1017*2797Sjg } 1018*2797Sjg 1019*2797Sjg rw_exit(&nvfd->nvf_lock); 1020*2797Sjg return (rval); 1021*2797Sjg } 1022*2797Sjg 1023*2797Sjg 1024*2797Sjg static void 1025*2797Sjg nvpflush_daemon(void) 1026*2797Sjg { 1027*2797Sjg callb_cpr_t cprinfo; 1028*2797Sjg nvfd_t *nvfdp, *nextfdp; 1029*2797Sjg clock_t clk; 1030*2797Sjg int rval; 1031*2797Sjg int want_wakeup; 1032*2797Sjg int is_now_clean; 1033*2797Sjg 1034*2797Sjg ASSERT(modrootloaded); 1035*2797Sjg 1036*2797Sjg nvpflush_thread = curthread; 1037*2797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n")); 1038*2797Sjg 1039*2797Sjg CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp"); 1040*2797Sjg mutex_enter(&nvpflush_lock); 1041*2797Sjg for (;;) { 1042*2797Sjg 1043*2797Sjg CALLB_CPR_SAFE_BEGIN(&cprinfo); 1044*2797Sjg while (do_nvpflush == 0) { 1045*2797Sjg clk = cv_timedwait(&nvpflush_cv, &nvpflush_lock, 1046*2797Sjg ddi_get_lbolt() + 1047*2797Sjg (nvpdaemon_idle_time * TICKS_PER_SECOND)); 1048*2797Sjg if (clk == -1 && 1049*2797Sjg do_nvpflush == 0 && nvpflush_timer_busy == 0) { 1050*2797Sjg /* 1051*2797Sjg * Note that CALLB_CPR_EXIT calls mutex_exit() 1052*2797Sjg * on the lock passed in to CALLB_CPR_INIT, 1053*2797Sjg * so the lock must be held when invoking it. 1054*2797Sjg */ 1055*2797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 1056*2797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n")); 1057*2797Sjg ASSERT(mutex_owned(&nvpflush_lock)); 1058*2797Sjg nvpflush_thr_id = NULL; 1059*2797Sjg nvpflush_daemon_active = 0; 1060*2797Sjg CALLB_CPR_EXIT(&cprinfo); 1061*2797Sjg thread_exit(); 1062*2797Sjg } 1063*2797Sjg } 1064*2797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 1065*2797Sjg 1066*2797Sjg nvpbusy = 1; 1067*2797Sjg want_wakeup = 0; 1068*2797Sjg do_nvpflush = 0; 1069*2797Sjg mutex_exit(&nvpflush_lock); 1070*2797Sjg 1071*2797Sjg /* 1072*2797Sjg * Try flushing what's dirty, reschedule if there's 1073*2797Sjg * a failure or data gets marked as dirty again. 1074*2797Sjg * First move each file marked dirty to the dirty 1075*2797Sjg * list to avoid locking the list across the write. 1076*2797Sjg */ 1077*2797Sjg mutex_enter(&nvf_cache_mutex); 1078*2797Sjg for (nvfdp = list_head(&nvf_cache_files); 1079*2797Sjg nvfdp; nvfdp = nextfdp) { 1080*2797Sjg nextfdp = list_next(&nvf_cache_files, nvfdp); 1081*2797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 1082*2797Sjg if (NVF_IS_DIRTY(nvfdp)) { 1083*2797Sjg list_remove(&nvf_cache_files, nvfdp); 1084*2797Sjg list_insert_tail(&nvf_dirty_files, nvfdp); 1085*2797Sjg rw_exit(&nvfdp->nvf_lock); 1086*2797Sjg } else { 1087*2797Sjg NVPDAEMON_DEBUG((CE_CONT, 1088*2797Sjg "nvpdaemon: not dirty %s\n", 1089*2797Sjg nvfdp->nvf_cache_path)); 1090*2797Sjg rw_exit(&nvfdp->nvf_lock); 1091*2797Sjg } 1092*2797Sjg } 1093*2797Sjg mutex_exit(&nvf_cache_mutex); 1094*2797Sjg 1095*2797Sjg /* 1096*2797Sjg * Now go through the dirty list 1097*2797Sjg */ 1098*2797Sjg for (nvfdp = list_head(&nvf_dirty_files); 1099*2797Sjg nvfdp; nvfdp = nextfdp) { 1100*2797Sjg nextfdp = list_next(&nvf_dirty_files, nvfdp); 1101*2797Sjg 1102*2797Sjg is_now_clean = 0; 1103*2797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 1104*2797Sjg if (NVF_IS_DIRTY(nvfdp)) { 1105*2797Sjg NVPDAEMON_DEBUG((CE_CONT, 1106*2797Sjg "nvpdaemon: flush %s\n", 1107*2797Sjg nvfdp->nvf_cache_path)); 1108*2797Sjg rw_exit(&nvfdp->nvf_lock); 1109*2797Sjg rval = nvpflush_one(nvfdp); 1110*2797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 1111*2797Sjg if (rval != DDI_SUCCESS || 1112*2797Sjg NVF_IS_DIRTY(nvfdp)) { 1113*2797Sjg rw_exit(&nvfdp->nvf_lock); 1114*2797Sjg NVPDAEMON_DEBUG((CE_CONT, 1115*2797Sjg "nvpdaemon: %s dirty again\n", 1116*2797Sjg nvfdp->nvf_cache_path)); 1117*2797Sjg want_wakeup = 1; 1118*2797Sjg } else { 1119*2797Sjg rw_exit(&nvfdp->nvf_lock); 1120*2797Sjg nvf_write_is_complete(nvfdp); 1121*2797Sjg is_now_clean = 1; 1122*2797Sjg } 1123*2797Sjg } else { 1124*2797Sjg NVPDAEMON_DEBUG((CE_CONT, 1125*2797Sjg "nvpdaemon: not dirty %s\n", 1126*2797Sjg nvfdp->nvf_cache_path)); 1127*2797Sjg rw_exit(&nvfdp->nvf_lock); 1128*2797Sjg is_now_clean = 1; 1129*2797Sjg } 1130*2797Sjg 1131*2797Sjg if (is_now_clean) { 1132*2797Sjg mutex_enter(&nvf_cache_mutex); 1133*2797Sjg list_remove(&nvf_dirty_files, nvfdp); 1134*2797Sjg list_insert_tail(&nvf_cache_files, 1135*2797Sjg nvfdp); 1136*2797Sjg mutex_exit(&nvf_cache_mutex); 1137*2797Sjg } 1138*2797Sjg } 1139*2797Sjg 1140*2797Sjg if (want_wakeup) 1141*2797Sjg nvf_wake_daemon(); 1142*2797Sjg 1143*2797Sjg mutex_enter(&nvpflush_lock); 1144*2797Sjg nvpbusy = 0; 1145*2797Sjg } 1146*2797Sjg } 1147