12797Sjg /* 22797Sjg * CDDL HEADER START 32797Sjg * 42797Sjg * The contents of this file are subject to the terms of the 52797Sjg * Common Development and Distribution License (the "License"). 62797Sjg * You may not use this file except in compliance with the License. 72797Sjg * 82797Sjg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 92797Sjg * or http://www.opensolaris.org/os/licensing. 102797Sjg * See the License for the specific language governing permissions 112797Sjg * and limitations under the License. 122797Sjg * 132797Sjg * When distributing Covered Code, include this CDDL HEADER in each 142797Sjg * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 152797Sjg * If applicable, add the following below this CDDL HEADER, with the 162797Sjg * fields enclosed by brackets "[]" replaced with your own identifying 172797Sjg * information: Portions Copyright [yyyy] [name of copyright owner] 182797Sjg * 192797Sjg * CDDL HEADER END 202797Sjg */ 212797Sjg /* 224845Svikram * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 232797Sjg * Use is subject to license terms. 242797Sjg */ 252797Sjg 262797Sjg #pragma ident "%Z%%M% %I% %E% SMI" 272797Sjg 282797Sjg #include <sys/note.h> 292797Sjg #include <sys/t_lock.h> 302797Sjg #include <sys/cmn_err.h> 312797Sjg #include <sys/instance.h> 322797Sjg #include <sys/conf.h> 332797Sjg #include <sys/stat.h> 342797Sjg #include <sys/ddi.h> 352797Sjg #include <sys/hwconf.h> 362797Sjg #include <sys/sunddi.h> 372797Sjg #include <sys/sunndi.h> 382797Sjg #include <sys/ddi_impldefs.h> 392797Sjg #include <sys/ndi_impldefs.h> 402797Sjg #include <sys/modctl.h> 412797Sjg #include <sys/dacf.h> 422797Sjg #include <sys/promif.h> 432797Sjg #include <sys/cpuvar.h> 442797Sjg #include <sys/pathname.h> 452797Sjg #include <sys/kobj.h> 462797Sjg #include <sys/devcache.h> 472797Sjg #include <sys/devcache_impl.h> 482797Sjg #include <sys/sysmacros.h> 492797Sjg #include <sys/varargs.h> 502797Sjg #include <sys/callb.h> 512797Sjg 522797Sjg /* 532797Sjg * This facility provides interfaces to clients to register, 542797Sjg * read and update cache data in persisted backing store files, 552797Sjg * usually in /etc/devices. The data persisted through this 562797Sjg * mechanism should be stateless data, functioning in the sense 572797Sjg * of a cache. Writes are performed by a background daemon 582797Sjg * thread, permitting a client to schedule an update without 592797Sjg * blocking, then continue updating the data state in 602797Sjg * parallel. The data is only locked by the daemon thread 612797Sjg * to pack the data in preparation for the write. 622797Sjg * 632797Sjg * Data persisted through this mechanism should be capable 642797Sjg * of being regenerated through normal system operation, 652797Sjg * for example attaching all disk devices would cause all 662797Sjg * devids to be registered for those devices. By caching 672797Sjg * a devid-device tuple, the system can operate in a 682797Sjg * more optimal way, directly attaching the device mapped 692797Sjg * to a devid, rather than burdensomely driving attach of 702797Sjg * the entire device tree to discover a single device. 712797Sjg * 722797Sjg * Note that a client should only need to include 732797Sjg * <sys/devcache.h> for the supported interfaces. 742797Sjg * 752797Sjg * The data per client is entirely within the control of 762797Sjg * the client. When reading, data unpacked from the backing 772797Sjg * store should be inserted in the list. The pointer to 78*5331Samw * the list can be retrieved via nvf_list(). When writing, 792797Sjg * the data on the list is to be packed and returned to the 802797Sjg * nvpdaemon as an nvlist. 812797Sjg * 822797Sjg * Obvious restrictions are imposed by the limits of the 832797Sjg * nvlist format. The data cannot be read or written 842797Sjg * piecemeal, and large amounts of data aren't recommended. 852797Sjg * However, nvlists do allow that data be named and typed 862797Sjg * and can be size-of-int invariant, and the cached data 872797Sjg * can be versioned conveniently. 882797Sjg * 892797Sjg * The registration involves two steps: a handle is 902797Sjg * allocated by calling the registration function. 912797Sjg * This sets up the data referenced by the handle and 922797Sjg * initializes the lock. Following registration, the 932797Sjg * client must initialize the data list. The list 942797Sjg * interfaces require that the list element with offset 952797Sjg * to the node link be provided. The format of the 962797Sjg * list element is under the control of the client. 972797Sjg * 982797Sjg * Locking: the address of the data list r/w lock provided 992797Sjg * can be accessed with nvf_lock(). The lock must be held 1002797Sjg * as reader when traversing the list or checking state, 1012797Sjg * such as nvf_is_dirty(). The lock must be held as 1022797Sjg * writer when updating the list or marking it dirty. 1032797Sjg * The lock must not be held when waking the daemon. 1042797Sjg * 1052797Sjg * The data r/w lock is held as writer when the pack, 1062797Sjg * unpack and free list handlers are called. The 1072797Sjg * lock should not be dropped and must be still held 1082797Sjg * upon return. The client should also hold the lock 1092797Sjg * as reader when checking if the list is dirty, and 1102797Sjg * as writer when marking the list dirty or initiating 1112797Sjg * a read. 1122797Sjg * 1132797Sjg * The asynchronous nature of updates allows for the 1142797Sjg * possibility that the data may continue to be updated 1152797Sjg * once the daemon has been notified that an update is 1162797Sjg * desired. The data only needs to be locked against 1172797Sjg * updates when packing the data into the form to be 1182797Sjg * written. When the write of the packed data has 1192797Sjg * completed, the daemon will automatically reschedule 1202797Sjg * an update if the data was marked dirty after the 1212797Sjg * point at which it was packed. Before beginning an 1222797Sjg * update, the daemon attempts to lock the data as 1232797Sjg * writer; if the writer lock is already held, it 1242797Sjg * backs off and retries later. The model is to give 1252797Sjg * priority to the kernel processes generating the 1262797Sjg * data, and that the nature of the data is that 1272797Sjg * it does not change often, can be re-generated when 1282797Sjg * needed, so updates should not happen often and 1292797Sjg * can be delayed until the data stops changing. 1302797Sjg * The client may update the list or mark it dirty 1312797Sjg * any time it is able to acquire the lock as 1322797Sjg * writer first. 1332797Sjg * 1342797Sjg * A failed write will be retried after some delay, 1352797Sjg * in the hope that the cause of the error will be 1362797Sjg * transient, for example a filesystem with no space 1372797Sjg * available. An update on a read-only filesystem 1382797Sjg * is failed silently and not retried; this would be 1392797Sjg * the case when booted off install media. 1402797Sjg * 1412797Sjg * There is no unregister mechanism as of yet, as it 1422797Sjg * hasn't been needed so far. 1432797Sjg */ 1442797Sjg 1452797Sjg /* 1462797Sjg * Global list of files registered and updated by the nvpflush 1472797Sjg * daemon, protected by the nvf_cache_mutex. While an 1482797Sjg * update is taking place, a file is temporarily moved to 1492797Sjg * the dirty list to avoid locking the primary list for 1502797Sjg * the duration of the update. 1512797Sjg */ 1522797Sjg list_t nvf_cache_files; 1532797Sjg list_t nvf_dirty_files; 1542797Sjg kmutex_t nvf_cache_mutex; 1552797Sjg 1562797Sjg 1572797Sjg /* 1582797Sjg * Allow some delay from an update of the data before flushing 1592797Sjg * to permit simultaneous updates of multiple changes. 1602797Sjg * Changes in the data are expected to be bursty, ie 1612797Sjg * reconfig or hot-plug of a new adapter. 1622797Sjg * 1632797Sjg * kfio_report_error (default 0) 1642797Sjg * Set to 1 to enable some error messages related to low-level 1652797Sjg * kernel file i/o operations. 1662797Sjg * 1672797Sjg * nvpflush_delay (default 10) 1682797Sjg * The number of seconds after data is marked dirty before the 1692797Sjg * flush daemon is triggered to flush the data. A longer period 1702797Sjg * of time permits more data updates per write. Note that 1712797Sjg * every update resets the timer so no repository write will 1722797Sjg * occur while data is being updated continuously. 1732797Sjg * 1742797Sjg * nvpdaemon_idle_time (default 60) 1752797Sjg * The number of seconds the daemon will sleep idle before exiting. 1762797Sjg * 1772797Sjg */ 1782797Sjg #define NVPFLUSH_DELAY 10 1792797Sjg #define NVPDAEMON_IDLE_TIME 60 1802797Sjg 1812797Sjg #define TICKS_PER_SECOND (drv_usectohz(1000000)) 1822797Sjg 1832797Sjg /* 1842797Sjg * Tunables 1852797Sjg */ 1862797Sjg int kfio_report_error = 0; /* kernel file i/o operations */ 1872797Sjg int kfio_disable_read = 0; /* disable all reads */ 1882797Sjg int kfio_disable_write = 0; /* disable all writes */ 1892797Sjg 1902797Sjg int nvpflush_delay = NVPFLUSH_DELAY; 1912797Sjg int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME; 1922797Sjg 1932797Sjg static timeout_id_t nvpflush_id = 0; 1942797Sjg static int nvpflush_timer_busy = 0; 1952797Sjg static int nvpflush_daemon_active = 0; 1962797Sjg static kthread_t *nvpflush_thr_id = 0; 1972797Sjg 1982797Sjg static int do_nvpflush = 0; 1992797Sjg static int nvpbusy = 0; 2002797Sjg static kmutex_t nvpflush_lock; 2012797Sjg static kcondvar_t nvpflush_cv; 2022797Sjg static kthread_id_t nvpflush_thread; 2032797Sjg static clock_t nvpticks; 2042797Sjg 2052797Sjg static void nvpflush_daemon(void); 2062797Sjg 2072797Sjg #ifdef DEBUG 2082797Sjg int nvpdaemon_debug = 0; 2092797Sjg int kfio_debug = 0; 2102797Sjg #endif /* DEBUG */ 2112797Sjg 2122797Sjg extern int modrootloaded; 2132797Sjg extern void mdi_read_devices_files(void); 2142797Sjg extern void mdi_clean_vhcache(void); 2152797Sjg 2162797Sjg /* 2172797Sjg * Initialize the overall cache file management 2182797Sjg */ 2192797Sjg void 2202797Sjg i_ddi_devices_init(void) 2212797Sjg { 2222797Sjg list_create(&nvf_cache_files, sizeof (nvfd_t), 2232797Sjg offsetof(nvfd_t, nvf_link)); 2242797Sjg list_create(&nvf_dirty_files, sizeof (nvfd_t), 2252797Sjg offsetof(nvfd_t, nvf_link)); 2262797Sjg mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL); 2274845Svikram retire_store_init(); 2282797Sjg devid_cache_init(); 2292797Sjg } 2302797Sjg 2312797Sjg /* 2322797Sjg * Read cache files 2332797Sjg * The files read here should be restricted to those 2342797Sjg * that may be required to mount root. 2352797Sjg */ 2362797Sjg void 2372797Sjg i_ddi_read_devices_files(void) 2382797Sjg { 2394845Svikram /* 2404845Svikram * The retire store should be the first file read as it 2414845Svikram * may need to offline devices. kfio_disable_read is not 2424845Svikram * used for retire. For the rationale see the tunable 2434845Svikram * ddi_retire_store_bypass and comments in: 2444845Svikram * uts/common/os/retire_store.c 2454845Svikram */ 2464845Svikram 2474845Svikram retire_store_read(); 2484845Svikram 2492797Sjg if (!kfio_disable_read) { 2502797Sjg mdi_read_devices_files(); 2512797Sjg devid_cache_read(); 2522797Sjg } 2532797Sjg } 2542797Sjg 2552797Sjg void 2562797Sjg i_ddi_start_flush_daemon(void) 2572797Sjg { 2582797Sjg nvfd_t *nvfdp; 2592797Sjg 2602797Sjg ASSERT(i_ddi_io_initialized()); 2612797Sjg 2622797Sjg mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL); 2632797Sjg cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL); 2642797Sjg 2652797Sjg mutex_enter(&nvf_cache_mutex); 2662797Sjg for (nvfdp = list_head(&nvf_cache_files); nvfdp; 2672797Sjg nvfdp = list_next(&nvf_cache_files, nvfdp)) { 2682797Sjg if (NVF_IS_DIRTY(nvfdp)) { 2692797Sjg nvf_wake_daemon(); 2702797Sjg break; 2712797Sjg } 2722797Sjg } 2732797Sjg mutex_exit(&nvf_cache_mutex); 2742797Sjg } 2752797Sjg 2762797Sjg void 2772797Sjg i_ddi_clean_devices_files(void) 2782797Sjg { 2792797Sjg devid_cache_cleanup(); 2802797Sjg mdi_clean_vhcache(); 2812797Sjg } 2822797Sjg 2832797Sjg /* 2842797Sjg * Register a cache file to be managed and updated by the nvpflush daemon. 2852797Sjg * All operations are performed through the returned handle. 2862797Sjg * There is no unregister mechanism for now. 2872797Sjg */ 2882797Sjg nvf_handle_t 2892797Sjg nvf_register_file(nvf_ops_t *ops) 2902797Sjg { 2912797Sjg nvfd_t *nvfdp; 2922797Sjg 2932797Sjg nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP); 2942797Sjg 2952797Sjg nvfdp->nvf_ops = ops; 2962797Sjg nvfdp->nvf_flags = 0; 2972797Sjg rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL); 2982797Sjg 2992797Sjg mutex_enter(&nvf_cache_mutex); 3002797Sjg list_insert_tail(&nvf_cache_files, nvfdp); 3012797Sjg mutex_exit(&nvf_cache_mutex); 3022797Sjg 3032797Sjg return ((nvf_handle_t)nvfdp); 3042797Sjg } 3052797Sjg 3062797Sjg /*PRINTFLIKE1*/ 3072797Sjg void 3082797Sjg nvf_error(const char *fmt, ...) 3092797Sjg { 3102797Sjg va_list ap; 3112797Sjg 3122797Sjg if (kfio_report_error) { 3132797Sjg va_start(ap, fmt); 3142797Sjg vcmn_err(CE_NOTE, fmt, ap); 3152797Sjg va_end(ap); 3162797Sjg } 3172797Sjg } 3182797Sjg 3192797Sjg /* 3202797Sjg * Some operations clients may use to manage the data 3212797Sjg * to be persisted in a cache file. 3222797Sjg */ 3232797Sjg char * 3242797Sjg nvf_cache_name(nvf_handle_t handle) 3252797Sjg { 3262797Sjg return (((nvfd_t *)handle)->nvf_cache_path); 3272797Sjg } 3282797Sjg 3292797Sjg krwlock_t * 3302797Sjg nvf_lock(nvf_handle_t handle) 3312797Sjg { 3322797Sjg return (&(((nvfd_t *)handle)->nvf_lock)); 3332797Sjg } 3342797Sjg 3352797Sjg list_t * 3362797Sjg nvf_list(nvf_handle_t handle) 3372797Sjg { 3382797Sjg return (&(((nvfd_t *)handle)->nvf_data_list)); 3392797Sjg } 3402797Sjg 3412797Sjg void 3422797Sjg nvf_mark_dirty(nvf_handle_t handle) 3432797Sjg { 3442797Sjg ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock))); 3452797Sjg NVF_MARK_DIRTY((nvfd_t *)handle); 3462797Sjg } 3472797Sjg 3482797Sjg int 3492797Sjg nvf_is_dirty(nvf_handle_t handle) 3502797Sjg { 3512797Sjg ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock))); 3522797Sjg return (NVF_IS_DIRTY((nvfd_t *)handle)); 3532797Sjg } 3542797Sjg 3552797Sjg static uint16_t 3562797Sjg nvp_cksum(uchar_t *buf, int64_t buflen) 3572797Sjg { 3582797Sjg uint16_t cksum = 0; 3592797Sjg uint16_t *p = (uint16_t *)buf; 3602797Sjg int64_t n; 3612797Sjg 3622797Sjg if ((buflen & 0x01) != 0) { 3632797Sjg buflen--; 3642797Sjg cksum = buf[buflen]; 3652797Sjg } 3662797Sjg n = buflen / 2; 3672797Sjg while (n-- > 0) 3682797Sjg cksum ^= *p++; 3692797Sjg return (cksum); 3702797Sjg } 3712797Sjg 3722797Sjg int 3732797Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist) 3742797Sjg { 3752797Sjg struct _buf *file; 3762797Sjg nvpf_hdr_t hdr; 3772797Sjg char *buf; 3782797Sjg nvlist_t *nvl; 3792797Sjg int rval; 3802797Sjg uint_t offset; 3812797Sjg int n; 3822797Sjg char c; 3832797Sjg uint16_t cksum, hdrsum; 3842797Sjg 3852797Sjg *ret_nvlist = NULL; 3862797Sjg 3872797Sjg file = kobj_open_file(filename); 3882797Sjg if (file == (struct _buf *)-1) { 3892797Sjg KFDEBUG((CE_CONT, "cannot open file: %s\n", filename)); 3902797Sjg return (ENOENT); 3912797Sjg } 3922797Sjg 3932797Sjg offset = 0; 3942797Sjg n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset); 3952797Sjg if (n != sizeof (hdr)) { 3962797Sjg kobj_close_file(file); 3972797Sjg if (n < 0) { 3982797Sjg nvf_error("error reading header: %s\n", filename); 3992797Sjg return (EIO); 4002797Sjg } else if (n == 0) { 4012797Sjg KFDEBUG((CE_CONT, "file empty: %s\n", filename)); 4022797Sjg } else { 4032797Sjg nvf_error("header size incorrect: %s\n", filename); 4042797Sjg } 4052797Sjg return (EINVAL); 4062797Sjg } 4072797Sjg offset += n; 4082797Sjg 4092797Sjg KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic)); 4102797Sjg KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version)); 4112797Sjg KFDEBUG2((CE_CONT, "nvpf_size: %lld\n", 4122797Sjg (longlong_t)hdr.nvpf_size)); 4132797Sjg KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n", 4142797Sjg hdr.nvpf_hdr_chksum)); 4152797Sjg KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum)); 4162797Sjg 4172797Sjg cksum = hdr.nvpf_hdr_chksum; 4182797Sjg hdr.nvpf_hdr_chksum = 0; 4192797Sjg hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr)); 4202797Sjg 4212797Sjg if (hdr.nvpf_magic != NVPF_HDR_MAGIC || 4222797Sjg hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) { 4232797Sjg kobj_close_file(file); 4242797Sjg if (hdrsum != cksum) { 4252797Sjg nvf_error("%s: checksum error " 4262797Sjg "(actual 0x%x, expected 0x%x)\n", 4272797Sjg filename, hdrsum, cksum); 4282797Sjg } 4292797Sjg nvf_error("%s: header information incorrect", filename); 4302797Sjg return (EINVAL); 4312797Sjg } 4322797Sjg 4332797Sjg ASSERT(hdr.nvpf_size >= 0); 4342797Sjg 4352797Sjg buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP); 4362797Sjg n = kobj_read_file(file, buf, hdr.nvpf_size, offset); 4372797Sjg if (n != hdr.nvpf_size) { 4382797Sjg kmem_free(buf, hdr.nvpf_size); 4392797Sjg kobj_close_file(file); 4402797Sjg if (n < 0) { 4412797Sjg nvf_error("%s: read error %d", filename, n); 4422797Sjg } else { 4432797Sjg nvf_error("%s: incomplete read %d/%lld", 4442797Sjg filename, n, (longlong_t)hdr.nvpf_size); 4452797Sjg } 4462797Sjg return (EINVAL); 4472797Sjg } 4482797Sjg offset += n; 4492797Sjg 4502797Sjg rval = kobj_read_file(file, &c, 1, offset); 4512797Sjg kobj_close_file(file); 4522797Sjg if (rval > 0) { 4532797Sjg nvf_error("%s is larger than %lld\n", 4542797Sjg filename, (longlong_t)hdr.nvpf_size); 4552797Sjg kmem_free(buf, hdr.nvpf_size); 4562797Sjg return (EINVAL); 4572797Sjg } 4582797Sjg 4592797Sjg cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size); 4602797Sjg if (hdr.nvpf_chksum != cksum) { 4612797Sjg nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n", 4622797Sjg filename, hdr.nvpf_chksum, cksum); 4632797Sjg kmem_free(buf, hdr.nvpf_size); 4642797Sjg return (EINVAL); 4652797Sjg } 4662797Sjg 4672797Sjg nvl = NULL; 4682797Sjg rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0); 4692797Sjg if (rval != 0) { 4702797Sjg nvf_error("%s: error %d unpacking nvlist\n", 4712797Sjg filename, rval); 4722797Sjg kmem_free(buf, hdr.nvpf_size); 4732797Sjg return (EINVAL); 4742797Sjg } 4752797Sjg 4762797Sjg kmem_free(buf, hdr.nvpf_size); 4772797Sjg *ret_nvlist = nvl; 4782797Sjg return (0); 4792797Sjg } 4802797Sjg 4812797Sjg static int 4822797Sjg kfcreate(char *filename, kfile_t **kfilep) 4832797Sjg { 4842797Sjg kfile_t *fp; 4852797Sjg int rval; 4862797Sjg 4872797Sjg ASSERT(modrootloaded); 4882797Sjg 4892797Sjg fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP); 4902797Sjg 4912797Sjg fp->kf_vnflags = FCREAT | FWRITE | FTRUNC; 4922797Sjg fp->kf_fname = filename; 4932797Sjg fp->kf_fpos = 0; 4942797Sjg fp->kf_state = 0; 4952797Sjg 4962797Sjg KFDEBUG((CE_CONT, "create: %s flags 0x%x\n", 4972797Sjg filename, fp->kf_vnflags)); 4982797Sjg rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags, 4992797Sjg 0444, &fp->kf_vp, CRCREAT, 0); 5002797Sjg if (rval != 0) { 5012797Sjg kmem_free(fp, sizeof (kfile_t)); 5022797Sjg KFDEBUG((CE_CONT, "%s: create error %d\n", 5032797Sjg filename, rval)); 5042797Sjg return (rval); 5052797Sjg } 5062797Sjg 5072797Sjg *kfilep = fp; 5082797Sjg return (0); 5092797Sjg } 5102797Sjg 5112797Sjg static int 5122797Sjg kfremove(char *filename) 5132797Sjg { 5142797Sjg int rval; 5152797Sjg 5162797Sjg KFDEBUG((CE_CONT, "remove: %s\n", filename)); 5172797Sjg rval = vn_remove(filename, UIO_SYSSPACE, RMFILE); 5182797Sjg if (rval != 0) { 5192797Sjg KFDEBUG((CE_CONT, "%s: remove error %d\n", 5202797Sjg filename, rval)); 5212797Sjg } 5222797Sjg return (rval); 5232797Sjg } 5242797Sjg 5252797Sjg static int 5262797Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 5272797Sjg { 5282797Sjg ssize_t resid; 5292797Sjg int err; 5302797Sjg ssize_t n; 5312797Sjg 5322797Sjg ASSERT(modrootloaded); 5332797Sjg 5342797Sjg if (fp->kf_state != 0) 5352797Sjg return (fp->kf_state); 5362797Sjg 5372797Sjg err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos, 5382797Sjg UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid); 5392797Sjg if (err != 0) { 5402797Sjg KFDEBUG((CE_CONT, "%s: read error %d\n", 5412797Sjg fp->kf_fname, err)); 5422797Sjg fp->kf_state = err; 5432797Sjg return (err); 5442797Sjg } 5452797Sjg 5462797Sjg ASSERT(resid >= 0 && resid <= bufsiz); 5472797Sjg n = bufsiz - resid; 5482797Sjg 5492797Sjg KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n", 5502797Sjg fp->kf_fname, n, bufsiz, resid)); 5512797Sjg 5522797Sjg fp->kf_fpos += n; 5532797Sjg *ret_n = n; 5542797Sjg return (0); 5552797Sjg } 5562797Sjg 5572797Sjg static int 5582797Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 5592797Sjg { 5602797Sjg rlim64_t rlimit; 5612797Sjg ssize_t resid; 5622797Sjg int err; 5632797Sjg ssize_t len; 5642797Sjg ssize_t n = 0; 5652797Sjg 5662797Sjg ASSERT(modrootloaded); 5672797Sjg 5682797Sjg if (fp->kf_state != 0) 5692797Sjg return (fp->kf_state); 5702797Sjg 5712797Sjg len = bufsiz; 5722797Sjg rlimit = bufsiz + 1; 5732797Sjg for (;;) { 5742797Sjg err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos, 5752797Sjg UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 5762797Sjg if (err) { 5772797Sjg KFDEBUG((CE_CONT, "%s: write error %d\n", 5782797Sjg fp->kf_fname, err)); 5792797Sjg fp->kf_state = err; 5802797Sjg return (err); 5812797Sjg } 5822797Sjg 5832797Sjg KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n", 5842797Sjg fp->kf_fname, len-resid, resid)); 5852797Sjg 5862797Sjg ASSERT(resid >= 0 && resid <= len); 5872797Sjg 5882797Sjg n += (len - resid); 5892797Sjg if (resid == 0) 5902797Sjg break; 5912797Sjg 5922797Sjg if (resid == len) { 5932797Sjg KFDEBUG((CE_CONT, "%s: filesystem full?\n", 5942797Sjg fp->kf_fname)); 5952797Sjg fp->kf_state = ENOSPC; 5962797Sjg return (ENOSPC); 5972797Sjg } 5982797Sjg 5992797Sjg len -= resid; 6002797Sjg buf += len; 6012797Sjg fp->kf_fpos += len; 6022797Sjg len = resid; 6032797Sjg } 6042797Sjg 6052797Sjg ASSERT(n == bufsiz); 6062797Sjg KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n)); 6072797Sjg 6082797Sjg *ret_n = n; 6092797Sjg return (0); 6102797Sjg } 6112797Sjg 6122797Sjg 6132797Sjg static int 6142797Sjg kfclose(kfile_t *fp) 6152797Sjg { 6162797Sjg int rval; 6172797Sjg 6182797Sjg KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname)); 6192797Sjg 6202797Sjg if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) { 621*5331Samw rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred, NULL); 6222797Sjg if (rval != 0) { 6232797Sjg nvf_error("%s: sync error %d\n", 6242797Sjg fp->kf_fname, rval); 6252797Sjg } 6262797Sjg KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname)); 6272797Sjg } 6282797Sjg 629*5331Samw rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1, (offset_t)0, kcred, 630*5331Samw NULL); 6312797Sjg if (rval != 0) { 6322797Sjg if (fp->kf_state == 0) { 6332797Sjg nvf_error("%s: close error %d\n", 6342797Sjg fp->kf_fname, rval); 6352797Sjg } 6362797Sjg } else { 6372797Sjg if (fp->kf_state == 0) 6382797Sjg KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname)); 6392797Sjg } 6402797Sjg 6412797Sjg VN_RELE(fp->kf_vp); 6422797Sjg kmem_free(fp, sizeof (kfile_t)); 6432797Sjg return (rval); 6442797Sjg } 6452797Sjg 6462797Sjg static int 6472797Sjg kfrename(char *oldname, char *newname) 6482797Sjg { 6492797Sjg int rval; 6502797Sjg 6512797Sjg ASSERT(modrootloaded); 6522797Sjg 6532797Sjg KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname)); 6542797Sjg 6552797Sjg if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) { 6562797Sjg KFDEBUG((CE_CONT, "rename %s to %s: %d\n", 6572797Sjg oldname, newname, rval)); 6582797Sjg } 6592797Sjg 6602797Sjg return (rval); 6612797Sjg } 6622797Sjg 6632797Sjg int 6642797Sjg fwrite_nvlist(char *filename, nvlist_t *nvl) 6652797Sjg { 6662797Sjg char *buf; 6672797Sjg char *nvbuf; 6682797Sjg kfile_t *fp; 6692797Sjg char *newname; 6702797Sjg int len, err, err1; 6712797Sjg size_t buflen; 6722797Sjg ssize_t n; 6732797Sjg 6742797Sjg ASSERT(modrootloaded); 6752797Sjg 6762797Sjg nvbuf = NULL; 6772797Sjg err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0); 6782797Sjg if (err != 0) { 6792797Sjg nvf_error("%s: error %d packing nvlist\n", 6802797Sjg filename, err); 6812797Sjg return (err); 6822797Sjg } 6832797Sjg 6842797Sjg buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP); 6852797Sjg bzero(buf, sizeof (nvpf_hdr_t)); 6862797Sjg 6872797Sjg ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC; 6882797Sjg ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION; 6892797Sjg ((nvpf_hdr_t *)buf)->nvpf_size = buflen; 6902797Sjg ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen); 6912797Sjg ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum = 6922797Sjg nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t)); 6932797Sjg 6942797Sjg bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen); 6952797Sjg kmem_free(nvbuf, buflen); 6962797Sjg buflen += sizeof (nvpf_hdr_t); 6972797Sjg 6982797Sjg len = strlen(filename) + MAX_SUFFIX_LEN + 2; 6992797Sjg newname = kmem_alloc(len, KM_SLEEP); 7002797Sjg 7012797Sjg 7022797Sjg (void) sprintf(newname, "%s.%s", 7032797Sjg filename, NEW_FILENAME_SUFFIX); 7042797Sjg 7052797Sjg /* 7062797Sjg * To make it unlikely we suffer data loss, write 7072797Sjg * data to the new temporary file. Once successful 7082797Sjg * complete the transaction by renaming the new file 7092797Sjg * to replace the previous. 7102797Sjg */ 7112797Sjg 7122797Sjg if ((err = kfcreate(newname, &fp)) == 0) { 7132797Sjg err = kfwrite(fp, buf, buflen, &n); 7142797Sjg if (err) { 7152797Sjg nvf_error("%s: write error - %d\n", 7162797Sjg newname, err); 7172797Sjg } else { 7182797Sjg if (n != buflen) { 7192797Sjg nvf_error( 7202797Sjg "%s: partial write %ld of %ld bytes\n", 7212797Sjg newname, n, buflen); 7222797Sjg nvf_error("%s: filesystem may be full?\n", 7232797Sjg newname); 7242797Sjg err = EIO; 7252797Sjg } 7262797Sjg } 7272797Sjg if ((err1 = kfclose(fp)) != 0) { 7282797Sjg nvf_error("%s: close error\n", newname); 7292797Sjg if (err == 0) 7302797Sjg err = err1; 7312797Sjg } 7322797Sjg if (err != 0) { 7332797Sjg if (kfremove(newname) != 0) { 7342797Sjg nvf_error("%s: remove failed\n", 7352797Sjg newname); 7362797Sjg } 7372797Sjg } 7382797Sjg } else { 7392797Sjg nvf_error("%s: create failed - %d\n", filename, err); 7402797Sjg } 7412797Sjg 7422797Sjg if (err == 0) { 7432797Sjg if ((err = kfrename(newname, filename)) != 0) { 7442797Sjg nvf_error("%s: rename from %s failed\n", 7452797Sjg newname, filename); 7462797Sjg } 7472797Sjg } 7482797Sjg 7492797Sjg kmem_free(newname, len); 7502797Sjg kmem_free(buf, buflen); 7512797Sjg 7522797Sjg return (err); 7532797Sjg } 7542797Sjg 7552797Sjg static int 7562797Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl) 7572797Sjg { 7582797Sjg int err; 7592797Sjg 7602797Sjg if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0) 7612797Sjg return (DDI_SUCCESS); 7622797Sjg else { 7632797Sjg if (err == EROFS) 7642797Sjg NVF_MARK_READONLY(nvfd); 7652797Sjg return (DDI_FAILURE); 7662797Sjg } 7672797Sjg } 7682797Sjg 7692797Sjg static void 7702797Sjg nvp_list_free(nvfd_t *nvf) 7712797Sjg { 7722797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 7732797Sjg (nvf->nvf_list_free)((nvf_handle_t)nvf); 7742797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 7752797Sjg } 7762797Sjg 7772797Sjg /* 7782797Sjg * Read a file in the nvlist format 7792797Sjg * EIO - i/o error during read 7802797Sjg * ENOENT - file not found 7812797Sjg * EINVAL - file contents corrupted 7822797Sjg */ 7832797Sjg static int 7842797Sjg fread_nvp_list(nvfd_t *nvfd) 7852797Sjg { 7862797Sjg nvlist_t *nvl; 7872797Sjg nvpair_t *nvp; 7882797Sjg char *name; 7892797Sjg nvlist_t *sublist; 7902797Sjg int rval; 7912797Sjg int rv; 7922797Sjg 7932797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 7942797Sjg 7952797Sjg rval = fread_nvlist(nvfd->nvf_cache_path, &nvl); 7962797Sjg if (rval != 0) 7972797Sjg return (rval); 7982797Sjg ASSERT(nvl != NULL); 7992797Sjg 8002797Sjg nvp = NULL; 8012797Sjg while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 8022797Sjg name = nvpair_name(nvp); 8032797Sjg ASSERT(strlen(name) > 0); 8042797Sjg 8052797Sjg switch (nvpair_type(nvp)) { 8062797Sjg case DATA_TYPE_NVLIST: 8072797Sjg rval = nvpair_value_nvlist(nvp, &sublist); 8082797Sjg if (rval != 0) { 8092797Sjg nvf_error( 8102797Sjg "nvpair_value_nvlist error %s %d\n", 8112797Sjg name, rval); 8122797Sjg goto error; 8132797Sjg } 8142797Sjg 8152797Sjg /* 8162797Sjg * unpack nvlist for this device and 8172797Sjg * add elements to data list. 8182797Sjg */ 8192797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 8202797Sjg rv = (nvfd->nvf_unpack_nvlist) 8212797Sjg ((nvf_handle_t)nvfd, sublist, name); 8222797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 8232797Sjg if (rv != 0) { 8242797Sjg nvf_error( 8252797Sjg "%s: %s invalid list element\n", 8262797Sjg nvfd->nvf_cache_path, name); 8272797Sjg rval = EINVAL; 8282797Sjg goto error; 8292797Sjg } 8302797Sjg break; 8312797Sjg 8322797Sjg default: 8332797Sjg nvf_error("%s: %s unsupported data type %d\n", 8342797Sjg nvfd->nvf_cache_path, name, nvpair_type(nvp)); 8352797Sjg rval = EINVAL; 8362797Sjg goto error; 8372797Sjg } 8382797Sjg } 8392797Sjg 8402797Sjg nvlist_free(nvl); 8412797Sjg 8422797Sjg return (0); 8432797Sjg 8442797Sjg error: 8452797Sjg nvlist_free(nvl); 8462797Sjg nvp_list_free(nvfd); 8472797Sjg return (rval); 8482797Sjg } 8492797Sjg 8502797Sjg 8512797Sjg int 8522797Sjg nvf_read_file(nvf_handle_t nvf_handle) 8532797Sjg { 8542797Sjg nvfd_t *nvfd = (nvfd_t *)nvf_handle; 8552797Sjg int rval; 8562797Sjg 8572797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 8582797Sjg 8592797Sjg if (kfio_disable_read) 8602797Sjg return (0); 8612797Sjg 8622797Sjg KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path)); 8632797Sjg 8642797Sjg rval = fread_nvp_list(nvfd); 8652797Sjg if (rval) { 8662797Sjg switch (rval) { 8672797Sjg case EIO: 8682797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 8692797Sjg cmn_err(CE_WARN, "%s: I/O error", 8702797Sjg nvfd->nvf_cache_path); 8712797Sjg break; 8722797Sjg case ENOENT: 8732797Sjg nvfd->nvf_flags |= NVF_F_CREATE_MSG; 8742797Sjg nvf_error("%s: not found\n", 8752797Sjg nvfd->nvf_cache_path); 8762797Sjg break; 8772797Sjg case EINVAL: 8782797Sjg default: 8792797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 8802797Sjg cmn_err(CE_WARN, "%s: data file corrupted", 8812797Sjg nvfd->nvf_cache_path); 8822797Sjg break; 8832797Sjg } 8842797Sjg } 8852797Sjg return (rval); 8862797Sjg } 8872797Sjg 8882797Sjg static void 8892797Sjg nvf_write_is_complete(nvfd_t *fd) 8902797Sjg { 8912797Sjg if (fd->nvf_write_complete) { 8922797Sjg (fd->nvf_write_complete)((nvf_handle_t)fd); 8932797Sjg } 8942797Sjg } 8952797Sjg 8962797Sjg /*ARGSUSED*/ 8972797Sjg static void 8982797Sjg nvpflush_timeout(void *arg) 8992797Sjg { 9002797Sjg clock_t nticks; 9012797Sjg 9022797Sjg mutex_enter(&nvpflush_lock); 9032797Sjg nticks = nvpticks - ddi_get_lbolt(); 9042797Sjg if (nticks > 4) { 9052797Sjg nvpflush_timer_busy = 1; 9062797Sjg mutex_exit(&nvpflush_lock); 9072797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks); 9082797Sjg } else { 9092797Sjg do_nvpflush = 1; 9102797Sjg NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n")); 9112797Sjg cv_signal(&nvpflush_cv); 9122797Sjg nvpflush_id = 0; 9132797Sjg nvpflush_timer_busy = 0; 9142797Sjg mutex_exit(&nvpflush_lock); 9152797Sjg } 9162797Sjg } 9172797Sjg 9182797Sjg /* 9192797Sjg * After marking a list as dirty, wake the nvpflush daemon 9202797Sjg * to perform the update. 9212797Sjg */ 9222797Sjg void 9232797Sjg nvf_wake_daemon(void) 9242797Sjg { 9252797Sjg clock_t nticks; 9262797Sjg 9272797Sjg /* 9282797Sjg * If the system isn't up yet 9292797Sjg * don't even think about starting a flush. 9302797Sjg */ 9312797Sjg if (!i_ddi_io_initialized()) 9322797Sjg return; 9332797Sjg 9342797Sjg mutex_enter(&nvpflush_lock); 9352797Sjg 9362797Sjg if (nvpflush_daemon_active == 0) { 9372797Sjg nvpflush_daemon_active = 1; 9382797Sjg mutex_exit(&nvpflush_lock); 9392797Sjg NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n")); 9402797Sjg nvpflush_thr_id = thread_create(NULL, 0, 9412797Sjg (void (*)())nvpflush_daemon, 9422797Sjg NULL, 0, &p0, TS_RUN, minclsyspri); 9432797Sjg mutex_enter(&nvpflush_lock); 9442797Sjg } 9452797Sjg 9462797Sjg nticks = nvpflush_delay * TICKS_PER_SECOND; 9472797Sjg nvpticks = ddi_get_lbolt() + nticks; 9482797Sjg if (nvpflush_timer_busy == 0) { 9492797Sjg nvpflush_timer_busy = 1; 9502797Sjg mutex_exit(&nvpflush_lock); 9512797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4); 9522797Sjg } else 9532797Sjg mutex_exit(&nvpflush_lock); 9542797Sjg } 9552797Sjg 9562797Sjg static int 9572797Sjg nvpflush_one(nvfd_t *nvfd) 9582797Sjg { 9592797Sjg int rval = DDI_SUCCESS; 9602797Sjg nvlist_t *nvl; 9612797Sjg 9622797Sjg rw_enter(&nvfd->nvf_lock, RW_READER); 9632797Sjg 9642797Sjg ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0); 9652797Sjg 9662797Sjg if (!NVF_IS_DIRTY(nvfd) || 9672797Sjg NVF_IS_READONLY(nvfd) || kfio_disable_write) { 9682797Sjg NVF_CLEAR_DIRTY(nvfd); 9692797Sjg rw_exit(&nvfd->nvf_lock); 9702797Sjg return (DDI_SUCCESS); 9712797Sjg } 9722797Sjg 9732797Sjg if (rw_tryupgrade(&nvfd->nvf_lock) == 0) { 9742797Sjg nvf_error("nvpflush: " 9752797Sjg "%s rw upgrade failed\n", nvfd->nvf_cache_path); 9762797Sjg rw_exit(&nvfd->nvf_lock); 9772797Sjg return (DDI_FAILURE); 9782797Sjg } 9792797Sjg if (((nvfd->nvf_pack_list) 9802797Sjg ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) { 9812797Sjg nvf_error("nvpflush: " 9822797Sjg "%s nvlist construction failed\n", nvfd->nvf_cache_path); 9832797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 9842797Sjg rw_exit(&nvfd->nvf_lock); 9852797Sjg return (DDI_FAILURE); 9862797Sjg } 9872797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 9882797Sjg 9892797Sjg NVF_CLEAR_DIRTY(nvfd); 9902797Sjg nvfd->nvf_flags |= NVF_F_FLUSHING; 9912797Sjg rw_exit(&nvfd->nvf_lock); 9922797Sjg 9932797Sjg rval = e_fwrite_nvlist(nvfd, nvl); 9942797Sjg nvlist_free(nvl); 9952797Sjg 9962797Sjg rw_enter(&nvfd->nvf_lock, RW_WRITER); 9972797Sjg nvfd->nvf_flags &= ~NVF_F_FLUSHING; 9982797Sjg if (rval == DDI_FAILURE) { 9992797Sjg if (NVF_IS_READONLY(nvfd)) { 10002797Sjg rval = DDI_SUCCESS; 10012797Sjg nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY); 10022797Sjg } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) { 10032797Sjg cmn_err(CE_CONT, 10042797Sjg "%s: updated failed\n", nvfd->nvf_cache_path); 10052797Sjg nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY; 10062797Sjg } 10072797Sjg } else { 10082797Sjg if (nvfd->nvf_flags & NVF_F_CREATE_MSG) { 10092797Sjg cmn_err(CE_CONT, 10102797Sjg "!Creating %s\n", nvfd->nvf_cache_path); 10112797Sjg nvfd->nvf_flags &= ~NVF_F_CREATE_MSG; 10122797Sjg } 10132797Sjg if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) { 10142797Sjg cmn_err(CE_CONT, 10152797Sjg "!Rebuilding %s\n", nvfd->nvf_cache_path); 10162797Sjg nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG; 10172797Sjg } 10182797Sjg if (nvfd->nvf_flags & NVF_F_ERROR) { 10192797Sjg cmn_err(CE_CONT, 10202797Sjg "%s: update now ok\n", nvfd->nvf_cache_path); 10212797Sjg nvfd->nvf_flags &= ~NVF_F_ERROR; 10222797Sjg } 10232797Sjg /* 10242797Sjg * The file may need to be flushed again if the cached 10252797Sjg * data was touched while writing the earlier contents. 10262797Sjg */ 10272797Sjg if (NVF_IS_DIRTY(nvfd)) 10282797Sjg rval = DDI_FAILURE; 10292797Sjg } 10302797Sjg 10312797Sjg rw_exit(&nvfd->nvf_lock); 10322797Sjg return (rval); 10332797Sjg } 10342797Sjg 10352797Sjg 10362797Sjg static void 10372797Sjg nvpflush_daemon(void) 10382797Sjg { 10392797Sjg callb_cpr_t cprinfo; 10402797Sjg nvfd_t *nvfdp, *nextfdp; 10412797Sjg clock_t clk; 10422797Sjg int rval; 10432797Sjg int want_wakeup; 10442797Sjg int is_now_clean; 10452797Sjg 10462797Sjg ASSERT(modrootloaded); 10472797Sjg 10482797Sjg nvpflush_thread = curthread; 10492797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n")); 10502797Sjg 10512797Sjg CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp"); 10522797Sjg mutex_enter(&nvpflush_lock); 10532797Sjg for (;;) { 10542797Sjg 10552797Sjg CALLB_CPR_SAFE_BEGIN(&cprinfo); 10562797Sjg while (do_nvpflush == 0) { 10572797Sjg clk = cv_timedwait(&nvpflush_cv, &nvpflush_lock, 10582797Sjg ddi_get_lbolt() + 10592797Sjg (nvpdaemon_idle_time * TICKS_PER_SECOND)); 10602797Sjg if (clk == -1 && 10612797Sjg do_nvpflush == 0 && nvpflush_timer_busy == 0) { 10622797Sjg /* 10632797Sjg * Note that CALLB_CPR_EXIT calls mutex_exit() 10642797Sjg * on the lock passed in to CALLB_CPR_INIT, 10652797Sjg * so the lock must be held when invoking it. 10662797Sjg */ 10672797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 10682797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n")); 10692797Sjg ASSERT(mutex_owned(&nvpflush_lock)); 10702797Sjg nvpflush_thr_id = NULL; 10712797Sjg nvpflush_daemon_active = 0; 10722797Sjg CALLB_CPR_EXIT(&cprinfo); 10732797Sjg thread_exit(); 10742797Sjg } 10752797Sjg } 10762797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 10772797Sjg 10782797Sjg nvpbusy = 1; 10792797Sjg want_wakeup = 0; 10802797Sjg do_nvpflush = 0; 10812797Sjg mutex_exit(&nvpflush_lock); 10822797Sjg 10832797Sjg /* 10842797Sjg * Try flushing what's dirty, reschedule if there's 10852797Sjg * a failure or data gets marked as dirty again. 10862797Sjg * First move each file marked dirty to the dirty 10872797Sjg * list to avoid locking the list across the write. 10882797Sjg */ 10892797Sjg mutex_enter(&nvf_cache_mutex); 10902797Sjg for (nvfdp = list_head(&nvf_cache_files); 10912797Sjg nvfdp; nvfdp = nextfdp) { 10922797Sjg nextfdp = list_next(&nvf_cache_files, nvfdp); 10932797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 10942797Sjg if (NVF_IS_DIRTY(nvfdp)) { 10952797Sjg list_remove(&nvf_cache_files, nvfdp); 10962797Sjg list_insert_tail(&nvf_dirty_files, nvfdp); 10972797Sjg rw_exit(&nvfdp->nvf_lock); 10982797Sjg } else { 10992797Sjg NVPDAEMON_DEBUG((CE_CONT, 11002797Sjg "nvpdaemon: not dirty %s\n", 11012797Sjg nvfdp->nvf_cache_path)); 11022797Sjg rw_exit(&nvfdp->nvf_lock); 11032797Sjg } 11042797Sjg } 11052797Sjg mutex_exit(&nvf_cache_mutex); 11062797Sjg 11072797Sjg /* 11082797Sjg * Now go through the dirty list 11092797Sjg */ 11102797Sjg for (nvfdp = list_head(&nvf_dirty_files); 11112797Sjg nvfdp; nvfdp = nextfdp) { 11122797Sjg nextfdp = list_next(&nvf_dirty_files, nvfdp); 11132797Sjg 11142797Sjg is_now_clean = 0; 11152797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 11162797Sjg if (NVF_IS_DIRTY(nvfdp)) { 11172797Sjg NVPDAEMON_DEBUG((CE_CONT, 11182797Sjg "nvpdaemon: flush %s\n", 11192797Sjg nvfdp->nvf_cache_path)); 11202797Sjg rw_exit(&nvfdp->nvf_lock); 11212797Sjg rval = nvpflush_one(nvfdp); 11222797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 11232797Sjg if (rval != DDI_SUCCESS || 11242797Sjg NVF_IS_DIRTY(nvfdp)) { 11252797Sjg rw_exit(&nvfdp->nvf_lock); 11262797Sjg NVPDAEMON_DEBUG((CE_CONT, 11272797Sjg "nvpdaemon: %s dirty again\n", 11282797Sjg nvfdp->nvf_cache_path)); 11292797Sjg want_wakeup = 1; 11302797Sjg } else { 11312797Sjg rw_exit(&nvfdp->nvf_lock); 11322797Sjg nvf_write_is_complete(nvfdp); 11332797Sjg is_now_clean = 1; 11342797Sjg } 11352797Sjg } else { 11362797Sjg NVPDAEMON_DEBUG((CE_CONT, 11372797Sjg "nvpdaemon: not dirty %s\n", 11382797Sjg nvfdp->nvf_cache_path)); 11392797Sjg rw_exit(&nvfdp->nvf_lock); 11402797Sjg is_now_clean = 1; 11412797Sjg } 11422797Sjg 11432797Sjg if (is_now_clean) { 11442797Sjg mutex_enter(&nvf_cache_mutex); 11452797Sjg list_remove(&nvf_dirty_files, nvfdp); 11462797Sjg list_insert_tail(&nvf_cache_files, 11472797Sjg nvfdp); 11482797Sjg mutex_exit(&nvf_cache_mutex); 11492797Sjg } 11502797Sjg } 11512797Sjg 11522797Sjg if (want_wakeup) 11532797Sjg nvf_wake_daemon(); 11542797Sjg 11552797Sjg mutex_enter(&nvpflush_lock); 11562797Sjg nvpbusy = 0; 11572797Sjg } 11582797Sjg } 1159