12797Sjg /* 22797Sjg * CDDL HEADER START 32797Sjg * 42797Sjg * The contents of this file are subject to the terms of the 52797Sjg * Common Development and Distribution License (the "License"). 62797Sjg * You may not use this file except in compliance with the License. 72797Sjg * 82797Sjg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 92797Sjg * or http://www.opensolaris.org/os/licensing. 102797Sjg * See the License for the specific language governing permissions 112797Sjg * and limitations under the License. 122797Sjg * 132797Sjg * When distributing Covered Code, include this CDDL HEADER in each 142797Sjg * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 152797Sjg * If applicable, add the following below this CDDL HEADER, with the 162797Sjg * fields enclosed by brackets "[]" replaced with your own identifying 172797Sjg * information: Portions Copyright [yyyy] [name of copyright owner] 182797Sjg * 192797Sjg * CDDL HEADER END 202797Sjg */ 212797Sjg /* 22*7576SJerry.Gilliam@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 232797Sjg * Use is subject to license terms. 242797Sjg */ 252797Sjg 262797Sjg #include <sys/note.h> 272797Sjg #include <sys/t_lock.h> 282797Sjg #include <sys/cmn_err.h> 292797Sjg #include <sys/instance.h> 302797Sjg #include <sys/conf.h> 312797Sjg #include <sys/stat.h> 322797Sjg #include <sys/ddi.h> 332797Sjg #include <sys/hwconf.h> 342797Sjg #include <sys/sunddi.h> 352797Sjg #include <sys/sunndi.h> 362797Sjg #include <sys/ddi_impldefs.h> 372797Sjg #include <sys/ndi_impldefs.h> 382797Sjg #include <sys/modctl.h> 392797Sjg #include <sys/dacf.h> 402797Sjg #include <sys/promif.h> 412797Sjg #include <sys/cpuvar.h> 422797Sjg #include <sys/pathname.h> 432797Sjg #include <sys/kobj.h> 442797Sjg #include <sys/devcache.h> 452797Sjg #include <sys/devcache_impl.h> 462797Sjg #include <sys/sysmacros.h> 472797Sjg #include <sys/varargs.h> 482797Sjg #include <sys/callb.h> 492797Sjg 502797Sjg /* 512797Sjg * This facility provides interfaces to clients to register, 522797Sjg * read and update cache data in persisted backing store files, 532797Sjg * usually in /etc/devices. The data persisted through this 542797Sjg * mechanism should be stateless data, functioning in the sense 552797Sjg * of a cache. Writes are performed by a background daemon 562797Sjg * thread, permitting a client to schedule an update without 572797Sjg * blocking, then continue updating the data state in 582797Sjg * parallel. The data is only locked by the daemon thread 592797Sjg * to pack the data in preparation for the write. 602797Sjg * 612797Sjg * Data persisted through this mechanism should be capable 622797Sjg * of being regenerated through normal system operation, 632797Sjg * for example attaching all disk devices would cause all 642797Sjg * devids to be registered for those devices. By caching 652797Sjg * a devid-device tuple, the system can operate in a 662797Sjg * more optimal way, directly attaching the device mapped 672797Sjg * to a devid, rather than burdensomely driving attach of 682797Sjg * the entire device tree to discover a single device. 692797Sjg * 702797Sjg * Note that a client should only need to include 712797Sjg * <sys/devcache.h> for the supported interfaces. 722797Sjg * 732797Sjg * The data per client is entirely within the control of 742797Sjg * the client. When reading, data unpacked from the backing 752797Sjg * store should be inserted in the list. The pointer to 765331Samw * the list can be retrieved via nvf_list(). When writing, 772797Sjg * the data on the list is to be packed and returned to the 782797Sjg * nvpdaemon as an nvlist. 792797Sjg * 802797Sjg * Obvious restrictions are imposed by the limits of the 812797Sjg * nvlist format. The data cannot be read or written 822797Sjg * piecemeal, and large amounts of data aren't recommended. 832797Sjg * However, nvlists do allow that data be named and typed 842797Sjg * and can be size-of-int invariant, and the cached data 852797Sjg * can be versioned conveniently. 862797Sjg * 872797Sjg * The registration involves two steps: a handle is 882797Sjg * allocated by calling the registration function. 892797Sjg * This sets up the data referenced by the handle and 902797Sjg * initializes the lock. Following registration, the 912797Sjg * client must initialize the data list. The list 922797Sjg * interfaces require that the list element with offset 932797Sjg * to the node link be provided. The format of the 942797Sjg * list element is under the control of the client. 952797Sjg * 962797Sjg * Locking: the address of the data list r/w lock provided 972797Sjg * can be accessed with nvf_lock(). The lock must be held 982797Sjg * as reader when traversing the list or checking state, 992797Sjg * such as nvf_is_dirty(). The lock must be held as 1002797Sjg * writer when updating the list or marking it dirty. 1012797Sjg * The lock must not be held when waking the daemon. 1022797Sjg * 1032797Sjg * The data r/w lock is held as writer when the pack, 1042797Sjg * unpack and free list handlers are called. The 1052797Sjg * lock should not be dropped and must be still held 1062797Sjg * upon return. The client should also hold the lock 1072797Sjg * as reader when checking if the list is dirty, and 1082797Sjg * as writer when marking the list dirty or initiating 1092797Sjg * a read. 1102797Sjg * 1112797Sjg * The asynchronous nature of updates allows for the 1122797Sjg * possibility that the data may continue to be updated 1132797Sjg * once the daemon has been notified that an update is 1142797Sjg * desired. The data only needs to be locked against 1152797Sjg * updates when packing the data into the form to be 1162797Sjg * written. When the write of the packed data has 1172797Sjg * completed, the daemon will automatically reschedule 1182797Sjg * an update if the data was marked dirty after the 1192797Sjg * point at which it was packed. Before beginning an 1202797Sjg * update, the daemon attempts to lock the data as 1212797Sjg * writer; if the writer lock is already held, it 1222797Sjg * backs off and retries later. The model is to give 1232797Sjg * priority to the kernel processes generating the 1242797Sjg * data, and that the nature of the data is that 1252797Sjg * it does not change often, can be re-generated when 1262797Sjg * needed, so updates should not happen often and 1272797Sjg * can be delayed until the data stops changing. 1282797Sjg * The client may update the list or mark it dirty 1292797Sjg * any time it is able to acquire the lock as 1302797Sjg * writer first. 1312797Sjg * 1322797Sjg * A failed write will be retried after some delay, 1332797Sjg * in the hope that the cause of the error will be 1342797Sjg * transient, for example a filesystem with no space 1352797Sjg * available. An update on a read-only filesystem 1362797Sjg * is failed silently and not retried; this would be 1372797Sjg * the case when booted off install media. 1382797Sjg * 1392797Sjg * There is no unregister mechanism as of yet, as it 1402797Sjg * hasn't been needed so far. 1412797Sjg */ 1422797Sjg 1432797Sjg /* 1442797Sjg * Global list of files registered and updated by the nvpflush 1452797Sjg * daemon, protected by the nvf_cache_mutex. While an 1462797Sjg * update is taking place, a file is temporarily moved to 1472797Sjg * the dirty list to avoid locking the primary list for 1482797Sjg * the duration of the update. 1492797Sjg */ 1502797Sjg list_t nvf_cache_files; 1512797Sjg list_t nvf_dirty_files; 1522797Sjg kmutex_t nvf_cache_mutex; 1532797Sjg 1542797Sjg 1552797Sjg /* 1562797Sjg * Allow some delay from an update of the data before flushing 1572797Sjg * to permit simultaneous updates of multiple changes. 1582797Sjg * Changes in the data are expected to be bursty, ie 1592797Sjg * reconfig or hot-plug of a new adapter. 1602797Sjg * 1612797Sjg * kfio_report_error (default 0) 1622797Sjg * Set to 1 to enable some error messages related to low-level 1632797Sjg * kernel file i/o operations. 1642797Sjg * 1652797Sjg * nvpflush_delay (default 10) 1662797Sjg * The number of seconds after data is marked dirty before the 1672797Sjg * flush daemon is triggered to flush the data. A longer period 1682797Sjg * of time permits more data updates per write. Note that 1692797Sjg * every update resets the timer so no repository write will 1702797Sjg * occur while data is being updated continuously. 1712797Sjg * 1722797Sjg * nvpdaemon_idle_time (default 60) 1732797Sjg * The number of seconds the daemon will sleep idle before exiting. 1742797Sjg * 1752797Sjg */ 1762797Sjg #define NVPFLUSH_DELAY 10 1772797Sjg #define NVPDAEMON_IDLE_TIME 60 1782797Sjg 1792797Sjg #define TICKS_PER_SECOND (drv_usectohz(1000000)) 1802797Sjg 1812797Sjg /* 1822797Sjg * Tunables 1832797Sjg */ 1842797Sjg int kfio_report_error = 0; /* kernel file i/o operations */ 1852797Sjg int kfio_disable_read = 0; /* disable all reads */ 1862797Sjg int kfio_disable_write = 0; /* disable all writes */ 1872797Sjg 1882797Sjg int nvpflush_delay = NVPFLUSH_DELAY; 1892797Sjg int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME; 1902797Sjg 1912797Sjg static timeout_id_t nvpflush_id = 0; 1922797Sjg static int nvpflush_timer_busy = 0; 1932797Sjg static int nvpflush_daemon_active = 0; 1942797Sjg static kthread_t *nvpflush_thr_id = 0; 1952797Sjg 1962797Sjg static int do_nvpflush = 0; 1972797Sjg static int nvpbusy = 0; 1982797Sjg static kmutex_t nvpflush_lock; 1992797Sjg static kcondvar_t nvpflush_cv; 2002797Sjg static kthread_id_t nvpflush_thread; 2012797Sjg static clock_t nvpticks; 2022797Sjg 2032797Sjg static void nvpflush_daemon(void); 2042797Sjg 2052797Sjg #ifdef DEBUG 2062797Sjg int nvpdaemon_debug = 0; 2072797Sjg int kfio_debug = 0; 2082797Sjg #endif /* DEBUG */ 2092797Sjg 2102797Sjg extern int modrootloaded; 2112797Sjg extern void mdi_read_devices_files(void); 2122797Sjg extern void mdi_clean_vhcache(void); 213*7576SJerry.Gilliam@Sun.COM extern int sys_shutdown; 2142797Sjg 2152797Sjg /* 2162797Sjg * Initialize the overall cache file management 2172797Sjg */ 2182797Sjg void 2192797Sjg i_ddi_devices_init(void) 2202797Sjg { 2212797Sjg list_create(&nvf_cache_files, sizeof (nvfd_t), 2222797Sjg offsetof(nvfd_t, nvf_link)); 2232797Sjg list_create(&nvf_dirty_files, sizeof (nvfd_t), 2242797Sjg offsetof(nvfd_t, nvf_link)); 2252797Sjg mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL); 2264845Svikram retire_store_init(); 2272797Sjg devid_cache_init(); 2282797Sjg } 2292797Sjg 2302797Sjg /* 2312797Sjg * Read cache files 2322797Sjg * The files read here should be restricted to those 2332797Sjg * that may be required to mount root. 2342797Sjg */ 2352797Sjg void 2362797Sjg i_ddi_read_devices_files(void) 2372797Sjg { 2384845Svikram /* 2394845Svikram * The retire store should be the first file read as it 2404845Svikram * may need to offline devices. kfio_disable_read is not 2414845Svikram * used for retire. For the rationale see the tunable 2424845Svikram * ddi_retire_store_bypass and comments in: 2434845Svikram * uts/common/os/retire_store.c 2444845Svikram */ 2454845Svikram 2464845Svikram retire_store_read(); 2474845Svikram 2482797Sjg if (!kfio_disable_read) { 2492797Sjg mdi_read_devices_files(); 2502797Sjg devid_cache_read(); 2512797Sjg } 2522797Sjg } 2532797Sjg 2542797Sjg void 2552797Sjg i_ddi_start_flush_daemon(void) 2562797Sjg { 2572797Sjg nvfd_t *nvfdp; 2582797Sjg 2592797Sjg ASSERT(i_ddi_io_initialized()); 2602797Sjg 2612797Sjg mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL); 2622797Sjg cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL); 2632797Sjg 2642797Sjg mutex_enter(&nvf_cache_mutex); 2652797Sjg for (nvfdp = list_head(&nvf_cache_files); nvfdp; 2662797Sjg nvfdp = list_next(&nvf_cache_files, nvfdp)) { 2672797Sjg if (NVF_IS_DIRTY(nvfdp)) { 2682797Sjg nvf_wake_daemon(); 2692797Sjg break; 2702797Sjg } 2712797Sjg } 2722797Sjg mutex_exit(&nvf_cache_mutex); 2732797Sjg } 2742797Sjg 2752797Sjg void 2762797Sjg i_ddi_clean_devices_files(void) 2772797Sjg { 2782797Sjg devid_cache_cleanup(); 2792797Sjg mdi_clean_vhcache(); 2802797Sjg } 2812797Sjg 2822797Sjg /* 2832797Sjg * Register a cache file to be managed and updated by the nvpflush daemon. 2842797Sjg * All operations are performed through the returned handle. 2852797Sjg * There is no unregister mechanism for now. 2862797Sjg */ 2872797Sjg nvf_handle_t 2882797Sjg nvf_register_file(nvf_ops_t *ops) 2892797Sjg { 2902797Sjg nvfd_t *nvfdp; 2912797Sjg 2922797Sjg nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP); 2932797Sjg 2942797Sjg nvfdp->nvf_ops = ops; 2952797Sjg nvfdp->nvf_flags = 0; 2962797Sjg rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL); 2972797Sjg 2982797Sjg mutex_enter(&nvf_cache_mutex); 2992797Sjg list_insert_tail(&nvf_cache_files, nvfdp); 3002797Sjg mutex_exit(&nvf_cache_mutex); 3012797Sjg 3022797Sjg return ((nvf_handle_t)nvfdp); 3032797Sjg } 3042797Sjg 3052797Sjg /*PRINTFLIKE1*/ 3062797Sjg void 3072797Sjg nvf_error(const char *fmt, ...) 3082797Sjg { 3092797Sjg va_list ap; 3102797Sjg 3112797Sjg if (kfio_report_error) { 3122797Sjg va_start(ap, fmt); 3132797Sjg vcmn_err(CE_NOTE, fmt, ap); 3142797Sjg va_end(ap); 3152797Sjg } 3162797Sjg } 3172797Sjg 3182797Sjg /* 3192797Sjg * Some operations clients may use to manage the data 3202797Sjg * to be persisted in a cache file. 3212797Sjg */ 3222797Sjg char * 3232797Sjg nvf_cache_name(nvf_handle_t handle) 3242797Sjg { 3252797Sjg return (((nvfd_t *)handle)->nvf_cache_path); 3262797Sjg } 3272797Sjg 3282797Sjg krwlock_t * 3292797Sjg nvf_lock(nvf_handle_t handle) 3302797Sjg { 3312797Sjg return (&(((nvfd_t *)handle)->nvf_lock)); 3322797Sjg } 3332797Sjg 3342797Sjg list_t * 3352797Sjg nvf_list(nvf_handle_t handle) 3362797Sjg { 3372797Sjg return (&(((nvfd_t *)handle)->nvf_data_list)); 3382797Sjg } 3392797Sjg 3402797Sjg void 3412797Sjg nvf_mark_dirty(nvf_handle_t handle) 3422797Sjg { 3432797Sjg ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock))); 3442797Sjg NVF_MARK_DIRTY((nvfd_t *)handle); 3452797Sjg } 3462797Sjg 3472797Sjg int 3482797Sjg nvf_is_dirty(nvf_handle_t handle) 3492797Sjg { 3502797Sjg ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock))); 3512797Sjg return (NVF_IS_DIRTY((nvfd_t *)handle)); 3522797Sjg } 3532797Sjg 3542797Sjg static uint16_t 3552797Sjg nvp_cksum(uchar_t *buf, int64_t buflen) 3562797Sjg { 3572797Sjg uint16_t cksum = 0; 3582797Sjg uint16_t *p = (uint16_t *)buf; 3592797Sjg int64_t n; 3602797Sjg 3612797Sjg if ((buflen & 0x01) != 0) { 3622797Sjg buflen--; 3632797Sjg cksum = buf[buflen]; 3642797Sjg } 3652797Sjg n = buflen / 2; 3662797Sjg while (n-- > 0) 3672797Sjg cksum ^= *p++; 3682797Sjg return (cksum); 3692797Sjg } 3702797Sjg 3712797Sjg int 3722797Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist) 3732797Sjg { 3742797Sjg struct _buf *file; 3752797Sjg nvpf_hdr_t hdr; 3762797Sjg char *buf; 3772797Sjg nvlist_t *nvl; 3782797Sjg int rval; 3792797Sjg uint_t offset; 3802797Sjg int n; 3812797Sjg char c; 3822797Sjg uint16_t cksum, hdrsum; 3832797Sjg 3842797Sjg *ret_nvlist = NULL; 3852797Sjg 3862797Sjg file = kobj_open_file(filename); 3872797Sjg if (file == (struct _buf *)-1) { 3882797Sjg KFDEBUG((CE_CONT, "cannot open file: %s\n", filename)); 3892797Sjg return (ENOENT); 3902797Sjg } 3912797Sjg 3922797Sjg offset = 0; 3932797Sjg n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset); 3942797Sjg if (n != sizeof (hdr)) { 3952797Sjg kobj_close_file(file); 3962797Sjg if (n < 0) { 3972797Sjg nvf_error("error reading header: %s\n", filename); 3982797Sjg return (EIO); 3992797Sjg } else if (n == 0) { 4002797Sjg KFDEBUG((CE_CONT, "file empty: %s\n", filename)); 4012797Sjg } else { 4022797Sjg nvf_error("header size incorrect: %s\n", filename); 4032797Sjg } 4042797Sjg return (EINVAL); 4052797Sjg } 4062797Sjg offset += n; 4072797Sjg 4082797Sjg KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic)); 4092797Sjg KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version)); 4102797Sjg KFDEBUG2((CE_CONT, "nvpf_size: %lld\n", 411*7576SJerry.Gilliam@Sun.COM (longlong_t)hdr.nvpf_size)); 4122797Sjg KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n", 413*7576SJerry.Gilliam@Sun.COM hdr.nvpf_hdr_chksum)); 4142797Sjg KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum)); 4152797Sjg 4162797Sjg cksum = hdr.nvpf_hdr_chksum; 4172797Sjg hdr.nvpf_hdr_chksum = 0; 4182797Sjg hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr)); 4192797Sjg 4202797Sjg if (hdr.nvpf_magic != NVPF_HDR_MAGIC || 4212797Sjg hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) { 4222797Sjg kobj_close_file(file); 4232797Sjg if (hdrsum != cksum) { 4242797Sjg nvf_error("%s: checksum error " 4252797Sjg "(actual 0x%x, expected 0x%x)\n", 4262797Sjg filename, hdrsum, cksum); 4272797Sjg } 4282797Sjg nvf_error("%s: header information incorrect", filename); 4292797Sjg return (EINVAL); 4302797Sjg } 4312797Sjg 4322797Sjg ASSERT(hdr.nvpf_size >= 0); 4332797Sjg 4342797Sjg buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP); 4352797Sjg n = kobj_read_file(file, buf, hdr.nvpf_size, offset); 4362797Sjg if (n != hdr.nvpf_size) { 4372797Sjg kmem_free(buf, hdr.nvpf_size); 4382797Sjg kobj_close_file(file); 4392797Sjg if (n < 0) { 4402797Sjg nvf_error("%s: read error %d", filename, n); 4412797Sjg } else { 4422797Sjg nvf_error("%s: incomplete read %d/%lld", 443*7576SJerry.Gilliam@Sun.COM filename, n, (longlong_t)hdr.nvpf_size); 4442797Sjg } 4452797Sjg return (EINVAL); 4462797Sjg } 4472797Sjg offset += n; 4482797Sjg 4492797Sjg rval = kobj_read_file(file, &c, 1, offset); 4502797Sjg kobj_close_file(file); 4512797Sjg if (rval > 0) { 4522797Sjg nvf_error("%s is larger than %lld\n", 453*7576SJerry.Gilliam@Sun.COM filename, (longlong_t)hdr.nvpf_size); 4542797Sjg kmem_free(buf, hdr.nvpf_size); 4552797Sjg return (EINVAL); 4562797Sjg } 4572797Sjg 4582797Sjg cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size); 4592797Sjg if (hdr.nvpf_chksum != cksum) { 4602797Sjg nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n", 4612797Sjg filename, hdr.nvpf_chksum, cksum); 4622797Sjg kmem_free(buf, hdr.nvpf_size); 4632797Sjg return (EINVAL); 4642797Sjg } 4652797Sjg 4662797Sjg nvl = NULL; 4672797Sjg rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0); 4682797Sjg if (rval != 0) { 4692797Sjg nvf_error("%s: error %d unpacking nvlist\n", 470*7576SJerry.Gilliam@Sun.COM filename, rval); 4712797Sjg kmem_free(buf, hdr.nvpf_size); 4722797Sjg return (EINVAL); 4732797Sjg } 4742797Sjg 4752797Sjg kmem_free(buf, hdr.nvpf_size); 4762797Sjg *ret_nvlist = nvl; 4772797Sjg return (0); 4782797Sjg } 4792797Sjg 4802797Sjg static int 4812797Sjg kfcreate(char *filename, kfile_t **kfilep) 4822797Sjg { 4832797Sjg kfile_t *fp; 4842797Sjg int rval; 4852797Sjg 4862797Sjg ASSERT(modrootloaded); 4872797Sjg 4882797Sjg fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP); 4892797Sjg 4902797Sjg fp->kf_vnflags = FCREAT | FWRITE | FTRUNC; 4912797Sjg fp->kf_fname = filename; 4922797Sjg fp->kf_fpos = 0; 4932797Sjg fp->kf_state = 0; 4942797Sjg 4952797Sjg KFDEBUG((CE_CONT, "create: %s flags 0x%x\n", 496*7576SJerry.Gilliam@Sun.COM filename, fp->kf_vnflags)); 4972797Sjg rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags, 4982797Sjg 0444, &fp->kf_vp, CRCREAT, 0); 4992797Sjg if (rval != 0) { 5002797Sjg kmem_free(fp, sizeof (kfile_t)); 5012797Sjg KFDEBUG((CE_CONT, "%s: create error %d\n", 502*7576SJerry.Gilliam@Sun.COM filename, rval)); 5032797Sjg return (rval); 5042797Sjg } 5052797Sjg 5062797Sjg *kfilep = fp; 5072797Sjg return (0); 5082797Sjg } 5092797Sjg 5102797Sjg static int 5112797Sjg kfremove(char *filename) 5122797Sjg { 5132797Sjg int rval; 5142797Sjg 5152797Sjg KFDEBUG((CE_CONT, "remove: %s\n", filename)); 5162797Sjg rval = vn_remove(filename, UIO_SYSSPACE, RMFILE); 5172797Sjg if (rval != 0) { 5182797Sjg KFDEBUG((CE_CONT, "%s: remove error %d\n", 519*7576SJerry.Gilliam@Sun.COM filename, rval)); 5202797Sjg } 5212797Sjg return (rval); 5222797Sjg } 5232797Sjg 5242797Sjg static int 5252797Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 5262797Sjg { 5272797Sjg ssize_t resid; 5282797Sjg int err; 5292797Sjg ssize_t n; 5302797Sjg 5312797Sjg ASSERT(modrootloaded); 5322797Sjg 5332797Sjg if (fp->kf_state != 0) 5342797Sjg return (fp->kf_state); 5352797Sjg 5362797Sjg err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos, 537*7576SJerry.Gilliam@Sun.COM UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid); 5382797Sjg if (err != 0) { 5392797Sjg KFDEBUG((CE_CONT, "%s: read error %d\n", 540*7576SJerry.Gilliam@Sun.COM fp->kf_fname, err)); 5412797Sjg fp->kf_state = err; 5422797Sjg return (err); 5432797Sjg } 5442797Sjg 5452797Sjg ASSERT(resid >= 0 && resid <= bufsiz); 5462797Sjg n = bufsiz - resid; 5472797Sjg 5482797Sjg KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n", 549*7576SJerry.Gilliam@Sun.COM fp->kf_fname, n, bufsiz, resid)); 5502797Sjg 5512797Sjg fp->kf_fpos += n; 5522797Sjg *ret_n = n; 5532797Sjg return (0); 5542797Sjg } 5552797Sjg 5562797Sjg static int 5572797Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n) 5582797Sjg { 5592797Sjg rlim64_t rlimit; 5602797Sjg ssize_t resid; 5612797Sjg int err; 5622797Sjg ssize_t len; 5632797Sjg ssize_t n = 0; 5642797Sjg 5652797Sjg ASSERT(modrootloaded); 5662797Sjg 5672797Sjg if (fp->kf_state != 0) 5682797Sjg return (fp->kf_state); 5692797Sjg 5702797Sjg len = bufsiz; 5712797Sjg rlimit = bufsiz + 1; 5722797Sjg for (;;) { 5732797Sjg err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos, 574*7576SJerry.Gilliam@Sun.COM UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 5752797Sjg if (err) { 5762797Sjg KFDEBUG((CE_CONT, "%s: write error %d\n", 577*7576SJerry.Gilliam@Sun.COM fp->kf_fname, err)); 5782797Sjg fp->kf_state = err; 5792797Sjg return (err); 5802797Sjg } 5812797Sjg 5822797Sjg KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n", 583*7576SJerry.Gilliam@Sun.COM fp->kf_fname, len-resid, resid)); 5842797Sjg 5852797Sjg ASSERT(resid >= 0 && resid <= len); 5862797Sjg 5872797Sjg n += (len - resid); 5882797Sjg if (resid == 0) 5892797Sjg break; 5902797Sjg 5912797Sjg if (resid == len) { 5922797Sjg KFDEBUG((CE_CONT, "%s: filesystem full?\n", 593*7576SJerry.Gilliam@Sun.COM fp->kf_fname)); 5942797Sjg fp->kf_state = ENOSPC; 5952797Sjg return (ENOSPC); 5962797Sjg } 5972797Sjg 5982797Sjg len -= resid; 5992797Sjg buf += len; 6002797Sjg fp->kf_fpos += len; 6012797Sjg len = resid; 6022797Sjg } 6032797Sjg 6042797Sjg ASSERT(n == bufsiz); 6052797Sjg KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n)); 6062797Sjg 6072797Sjg *ret_n = n; 6082797Sjg return (0); 6092797Sjg } 6102797Sjg 6112797Sjg 6122797Sjg static int 6132797Sjg kfclose(kfile_t *fp) 6142797Sjg { 6152797Sjg int rval; 6162797Sjg 6172797Sjg KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname)); 6182797Sjg 6192797Sjg if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) { 6205331Samw rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred, NULL); 6212797Sjg if (rval != 0) { 6222797Sjg nvf_error("%s: sync error %d\n", 623*7576SJerry.Gilliam@Sun.COM fp->kf_fname, rval); 6242797Sjg } 6252797Sjg KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname)); 6262797Sjg } 6272797Sjg 628*7576SJerry.Gilliam@Sun.COM rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1, 629*7576SJerry.Gilliam@Sun.COM (offset_t)0, kcred, NULL); 6302797Sjg if (rval != 0) { 6312797Sjg if (fp->kf_state == 0) { 6322797Sjg nvf_error("%s: close error %d\n", 633*7576SJerry.Gilliam@Sun.COM fp->kf_fname, rval); 6342797Sjg } 6352797Sjg } else { 6362797Sjg if (fp->kf_state == 0) 6372797Sjg KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname)); 6382797Sjg } 6392797Sjg 6402797Sjg VN_RELE(fp->kf_vp); 6412797Sjg kmem_free(fp, sizeof (kfile_t)); 6422797Sjg return (rval); 6432797Sjg } 6442797Sjg 6452797Sjg static int 6462797Sjg kfrename(char *oldname, char *newname) 6472797Sjg { 6482797Sjg int rval; 6492797Sjg 6502797Sjg ASSERT(modrootloaded); 6512797Sjg 6522797Sjg KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname)); 6532797Sjg 6542797Sjg if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) { 6552797Sjg KFDEBUG((CE_CONT, "rename %s to %s: %d\n", 656*7576SJerry.Gilliam@Sun.COM oldname, newname, rval)); 6572797Sjg } 6582797Sjg 6592797Sjg return (rval); 6602797Sjg } 6612797Sjg 6622797Sjg int 6632797Sjg fwrite_nvlist(char *filename, nvlist_t *nvl) 6642797Sjg { 6652797Sjg char *buf; 6662797Sjg char *nvbuf; 6672797Sjg kfile_t *fp; 6682797Sjg char *newname; 6692797Sjg int len, err, err1; 6702797Sjg size_t buflen; 6712797Sjg ssize_t n; 6722797Sjg 6732797Sjg ASSERT(modrootloaded); 6742797Sjg 6752797Sjg nvbuf = NULL; 6762797Sjg err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0); 6772797Sjg if (err != 0) { 6782797Sjg nvf_error("%s: error %d packing nvlist\n", 679*7576SJerry.Gilliam@Sun.COM filename, err); 6802797Sjg return (err); 6812797Sjg } 6822797Sjg 6832797Sjg buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP); 6842797Sjg bzero(buf, sizeof (nvpf_hdr_t)); 6852797Sjg 6862797Sjg ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC; 6872797Sjg ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION; 6882797Sjg ((nvpf_hdr_t *)buf)->nvpf_size = buflen; 6892797Sjg ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen); 6902797Sjg ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum = 691*7576SJerry.Gilliam@Sun.COM nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t)); 6922797Sjg 6932797Sjg bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen); 6942797Sjg kmem_free(nvbuf, buflen); 6952797Sjg buflen += sizeof (nvpf_hdr_t); 6962797Sjg 6972797Sjg len = strlen(filename) + MAX_SUFFIX_LEN + 2; 6982797Sjg newname = kmem_alloc(len, KM_SLEEP); 6992797Sjg 7002797Sjg 701*7576SJerry.Gilliam@Sun.COM (void) sprintf(newname, "%s.%s", filename, NEW_FILENAME_SUFFIX); 7022797Sjg 7032797Sjg /* 7042797Sjg * To make it unlikely we suffer data loss, write 7052797Sjg * data to the new temporary file. Once successful 7062797Sjg * complete the transaction by renaming the new file 7072797Sjg * to replace the previous. 7082797Sjg */ 7092797Sjg 7102797Sjg if ((err = kfcreate(newname, &fp)) == 0) { 7112797Sjg err = kfwrite(fp, buf, buflen, &n); 7122797Sjg if (err) { 7132797Sjg nvf_error("%s: write error - %d\n", 714*7576SJerry.Gilliam@Sun.COM newname, err); 7152797Sjg } else { 7162797Sjg if (n != buflen) { 7172797Sjg nvf_error( 7182797Sjg "%s: partial write %ld of %ld bytes\n", 7192797Sjg newname, n, buflen); 7202797Sjg nvf_error("%s: filesystem may be full?\n", 7212797Sjg newname); 7222797Sjg err = EIO; 7232797Sjg } 7242797Sjg } 7252797Sjg if ((err1 = kfclose(fp)) != 0) { 7262797Sjg nvf_error("%s: close error\n", newname); 7272797Sjg if (err == 0) 7282797Sjg err = err1; 7292797Sjg } 7302797Sjg if (err != 0) { 7312797Sjg if (kfremove(newname) != 0) { 7322797Sjg nvf_error("%s: remove failed\n", 7332797Sjg newname); 7342797Sjg } 7352797Sjg } 7362797Sjg } else { 7372797Sjg nvf_error("%s: create failed - %d\n", filename, err); 7382797Sjg } 7392797Sjg 7402797Sjg if (err == 0) { 7412797Sjg if ((err = kfrename(newname, filename)) != 0) { 7422797Sjg nvf_error("%s: rename from %s failed\n", 743*7576SJerry.Gilliam@Sun.COM newname, filename); 7442797Sjg } 7452797Sjg } 7462797Sjg 7472797Sjg kmem_free(newname, len); 7482797Sjg kmem_free(buf, buflen); 7492797Sjg 7502797Sjg return (err); 7512797Sjg } 7522797Sjg 7532797Sjg static int 7542797Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl) 7552797Sjg { 7562797Sjg int err; 7572797Sjg 7582797Sjg if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0) 7592797Sjg return (DDI_SUCCESS); 7602797Sjg else { 7612797Sjg if (err == EROFS) 7622797Sjg NVF_MARK_READONLY(nvfd); 7632797Sjg return (DDI_FAILURE); 7642797Sjg } 7652797Sjg } 7662797Sjg 7672797Sjg static void 7682797Sjg nvp_list_free(nvfd_t *nvf) 7692797Sjg { 7702797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 7712797Sjg (nvf->nvf_list_free)((nvf_handle_t)nvf); 7722797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock)); 7732797Sjg } 7742797Sjg 7752797Sjg /* 7762797Sjg * Read a file in the nvlist format 7772797Sjg * EIO - i/o error during read 7782797Sjg * ENOENT - file not found 7792797Sjg * EINVAL - file contents corrupted 7802797Sjg */ 7812797Sjg static int 7822797Sjg fread_nvp_list(nvfd_t *nvfd) 7832797Sjg { 7842797Sjg nvlist_t *nvl; 7852797Sjg nvpair_t *nvp; 7862797Sjg char *name; 7872797Sjg nvlist_t *sublist; 7882797Sjg int rval; 7892797Sjg int rv; 7902797Sjg 7912797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 7922797Sjg 7932797Sjg rval = fread_nvlist(nvfd->nvf_cache_path, &nvl); 7942797Sjg if (rval != 0) 7952797Sjg return (rval); 7962797Sjg ASSERT(nvl != NULL); 7972797Sjg 7982797Sjg nvp = NULL; 7992797Sjg while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 8002797Sjg name = nvpair_name(nvp); 8012797Sjg ASSERT(strlen(name) > 0); 8022797Sjg 8032797Sjg switch (nvpair_type(nvp)) { 8042797Sjg case DATA_TYPE_NVLIST: 8052797Sjg rval = nvpair_value_nvlist(nvp, &sublist); 8062797Sjg if (rval != 0) { 8072797Sjg nvf_error( 8082797Sjg "nvpair_value_nvlist error %s %d\n", 8092797Sjg name, rval); 8102797Sjg goto error; 8112797Sjg } 8122797Sjg 8132797Sjg /* 8142797Sjg * unpack nvlist for this device and 8152797Sjg * add elements to data list. 8162797Sjg */ 8172797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 8182797Sjg rv = (nvfd->nvf_unpack_nvlist) 8192797Sjg ((nvf_handle_t)nvfd, sublist, name); 8202797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock))); 8212797Sjg if (rv != 0) { 8222797Sjg nvf_error( 8232797Sjg "%s: %s invalid list element\n", 8242797Sjg nvfd->nvf_cache_path, name); 8252797Sjg rval = EINVAL; 8262797Sjg goto error; 8272797Sjg } 8282797Sjg break; 8292797Sjg 8302797Sjg default: 8312797Sjg nvf_error("%s: %s unsupported data type %d\n", 832*7576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path, name, nvpair_type(nvp)); 8332797Sjg rval = EINVAL; 8342797Sjg goto error; 8352797Sjg } 8362797Sjg } 8372797Sjg 8382797Sjg nvlist_free(nvl); 8392797Sjg 8402797Sjg return (0); 8412797Sjg 8422797Sjg error: 8432797Sjg nvlist_free(nvl); 8442797Sjg nvp_list_free(nvfd); 8452797Sjg return (rval); 8462797Sjg } 8472797Sjg 8482797Sjg 8492797Sjg int 8502797Sjg nvf_read_file(nvf_handle_t nvf_handle) 8512797Sjg { 8522797Sjg nvfd_t *nvfd = (nvfd_t *)nvf_handle; 8532797Sjg int rval; 8542797Sjg 8552797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 8562797Sjg 8572797Sjg if (kfio_disable_read) 8582797Sjg return (0); 8592797Sjg 8602797Sjg KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path)); 8612797Sjg 8622797Sjg rval = fread_nvp_list(nvfd); 8632797Sjg if (rval) { 8642797Sjg switch (rval) { 8652797Sjg case EIO: 8662797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 8672797Sjg cmn_err(CE_WARN, "%s: I/O error", 868*7576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path); 8692797Sjg break; 8702797Sjg case ENOENT: 8712797Sjg nvfd->nvf_flags |= NVF_F_CREATE_MSG; 8722797Sjg nvf_error("%s: not found\n", 873*7576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path); 8742797Sjg break; 8752797Sjg case EINVAL: 8762797Sjg default: 8772797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG; 8782797Sjg cmn_err(CE_WARN, "%s: data file corrupted", 879*7576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path); 8802797Sjg break; 8812797Sjg } 8822797Sjg } 8832797Sjg return (rval); 8842797Sjg } 8852797Sjg 8862797Sjg static void 8872797Sjg nvf_write_is_complete(nvfd_t *fd) 8882797Sjg { 8892797Sjg if (fd->nvf_write_complete) { 8902797Sjg (fd->nvf_write_complete)((nvf_handle_t)fd); 8912797Sjg } 8922797Sjg } 8932797Sjg 8942797Sjg /*ARGSUSED*/ 8952797Sjg static void 8962797Sjg nvpflush_timeout(void *arg) 8972797Sjg { 8982797Sjg clock_t nticks; 8992797Sjg 9002797Sjg mutex_enter(&nvpflush_lock); 9012797Sjg nticks = nvpticks - ddi_get_lbolt(); 9022797Sjg if (nticks > 4) { 9032797Sjg nvpflush_timer_busy = 1; 9042797Sjg mutex_exit(&nvpflush_lock); 9052797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks); 9062797Sjg } else { 9072797Sjg do_nvpflush = 1; 9082797Sjg NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n")); 9092797Sjg cv_signal(&nvpflush_cv); 9102797Sjg nvpflush_id = 0; 9112797Sjg nvpflush_timer_busy = 0; 9122797Sjg mutex_exit(&nvpflush_lock); 9132797Sjg } 9142797Sjg } 9152797Sjg 9162797Sjg /* 9172797Sjg * After marking a list as dirty, wake the nvpflush daemon 9182797Sjg * to perform the update. 9192797Sjg */ 9202797Sjg void 9212797Sjg nvf_wake_daemon(void) 9222797Sjg { 9232797Sjg clock_t nticks; 9242797Sjg 9252797Sjg /* 926*7576SJerry.Gilliam@Sun.COM * If the system isn't up yet or is shutting down, 9272797Sjg * don't even think about starting a flush. 9282797Sjg */ 929*7576SJerry.Gilliam@Sun.COM if (!i_ddi_io_initialized() || sys_shutdown) 9302797Sjg return; 9312797Sjg 9322797Sjg mutex_enter(&nvpflush_lock); 9332797Sjg 9342797Sjg if (nvpflush_daemon_active == 0) { 9352797Sjg nvpflush_daemon_active = 1; 9362797Sjg mutex_exit(&nvpflush_lock); 9372797Sjg NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n")); 9382797Sjg nvpflush_thr_id = thread_create(NULL, 0, 9392797Sjg (void (*)())nvpflush_daemon, 9402797Sjg NULL, 0, &p0, TS_RUN, minclsyspri); 9412797Sjg mutex_enter(&nvpflush_lock); 9422797Sjg } 9432797Sjg 9442797Sjg nticks = nvpflush_delay * TICKS_PER_SECOND; 9452797Sjg nvpticks = ddi_get_lbolt() + nticks; 9462797Sjg if (nvpflush_timer_busy == 0) { 9472797Sjg nvpflush_timer_busy = 1; 9482797Sjg mutex_exit(&nvpflush_lock); 9492797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4); 9502797Sjg } else 9512797Sjg mutex_exit(&nvpflush_lock); 9522797Sjg } 9532797Sjg 9542797Sjg static int 9552797Sjg nvpflush_one(nvfd_t *nvfd) 9562797Sjg { 9572797Sjg int rval = DDI_SUCCESS; 9582797Sjg nvlist_t *nvl; 9592797Sjg 9602797Sjg rw_enter(&nvfd->nvf_lock, RW_READER); 9612797Sjg 9622797Sjg ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0); 9632797Sjg 9642797Sjg if (!NVF_IS_DIRTY(nvfd) || 965*7576SJerry.Gilliam@Sun.COM NVF_IS_READONLY(nvfd) || kfio_disable_write || sys_shutdown) { 9662797Sjg NVF_CLEAR_DIRTY(nvfd); 9672797Sjg rw_exit(&nvfd->nvf_lock); 9682797Sjg return (DDI_SUCCESS); 9692797Sjg } 9702797Sjg 9712797Sjg if (rw_tryupgrade(&nvfd->nvf_lock) == 0) { 9722797Sjg nvf_error("nvpflush: " 9732797Sjg "%s rw upgrade failed\n", nvfd->nvf_cache_path); 9742797Sjg rw_exit(&nvfd->nvf_lock); 9752797Sjg return (DDI_FAILURE); 9762797Sjg } 9772797Sjg if (((nvfd->nvf_pack_list) 9782797Sjg ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) { 9792797Sjg nvf_error("nvpflush: " 9802797Sjg "%s nvlist construction failed\n", nvfd->nvf_cache_path); 9812797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 9822797Sjg rw_exit(&nvfd->nvf_lock); 9832797Sjg return (DDI_FAILURE); 9842797Sjg } 9852797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock)); 9862797Sjg 9872797Sjg NVF_CLEAR_DIRTY(nvfd); 9882797Sjg nvfd->nvf_flags |= NVF_F_FLUSHING; 9892797Sjg rw_exit(&nvfd->nvf_lock); 9902797Sjg 9912797Sjg rval = e_fwrite_nvlist(nvfd, nvl); 9922797Sjg nvlist_free(nvl); 9932797Sjg 9942797Sjg rw_enter(&nvfd->nvf_lock, RW_WRITER); 9952797Sjg nvfd->nvf_flags &= ~NVF_F_FLUSHING; 9962797Sjg if (rval == DDI_FAILURE) { 9972797Sjg if (NVF_IS_READONLY(nvfd)) { 9982797Sjg rval = DDI_SUCCESS; 9992797Sjg nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY); 10002797Sjg } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) { 10012797Sjg cmn_err(CE_CONT, 1002*7576SJerry.Gilliam@Sun.COM "%s: update failed\n", nvfd->nvf_cache_path); 10032797Sjg nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY; 10042797Sjg } 10052797Sjg } else { 10062797Sjg if (nvfd->nvf_flags & NVF_F_CREATE_MSG) { 10072797Sjg cmn_err(CE_CONT, 10082797Sjg "!Creating %s\n", nvfd->nvf_cache_path); 10092797Sjg nvfd->nvf_flags &= ~NVF_F_CREATE_MSG; 10102797Sjg } 10112797Sjg if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) { 10122797Sjg cmn_err(CE_CONT, 10132797Sjg "!Rebuilding %s\n", nvfd->nvf_cache_path); 10142797Sjg nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG; 10152797Sjg } 10162797Sjg if (nvfd->nvf_flags & NVF_F_ERROR) { 10172797Sjg cmn_err(CE_CONT, 10182797Sjg "%s: update now ok\n", nvfd->nvf_cache_path); 10192797Sjg nvfd->nvf_flags &= ~NVF_F_ERROR; 10202797Sjg } 10212797Sjg /* 10222797Sjg * The file may need to be flushed again if the cached 10232797Sjg * data was touched while writing the earlier contents. 10242797Sjg */ 10252797Sjg if (NVF_IS_DIRTY(nvfd)) 10262797Sjg rval = DDI_FAILURE; 10272797Sjg } 10282797Sjg 10292797Sjg rw_exit(&nvfd->nvf_lock); 10302797Sjg return (rval); 10312797Sjg } 10322797Sjg 10332797Sjg 10342797Sjg static void 10352797Sjg nvpflush_daemon(void) 10362797Sjg { 10372797Sjg callb_cpr_t cprinfo; 10382797Sjg nvfd_t *nvfdp, *nextfdp; 10392797Sjg clock_t clk; 10402797Sjg int rval; 10412797Sjg int want_wakeup; 10422797Sjg int is_now_clean; 10432797Sjg 10442797Sjg ASSERT(modrootloaded); 10452797Sjg 10462797Sjg nvpflush_thread = curthread; 10472797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n")); 10482797Sjg 10492797Sjg CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp"); 10502797Sjg mutex_enter(&nvpflush_lock); 10512797Sjg for (;;) { 10522797Sjg CALLB_CPR_SAFE_BEGIN(&cprinfo); 10532797Sjg while (do_nvpflush == 0) { 10542797Sjg clk = cv_timedwait(&nvpflush_cv, &nvpflush_lock, 10552797Sjg ddi_get_lbolt() + 1056*7576SJerry.Gilliam@Sun.COM (nvpdaemon_idle_time * TICKS_PER_SECOND)); 1057*7576SJerry.Gilliam@Sun.COM if ((clk == -1 && do_nvpflush == 0 && 1058*7576SJerry.Gilliam@Sun.COM nvpflush_timer_busy == 0) || sys_shutdown) { 10592797Sjg /* 10602797Sjg * Note that CALLB_CPR_EXIT calls mutex_exit() 10612797Sjg * on the lock passed in to CALLB_CPR_INIT, 10622797Sjg * so the lock must be held when invoking it. 10632797Sjg */ 10642797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 10652797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n")); 10662797Sjg ASSERT(mutex_owned(&nvpflush_lock)); 10672797Sjg nvpflush_thr_id = NULL; 10682797Sjg nvpflush_daemon_active = 0; 10692797Sjg CALLB_CPR_EXIT(&cprinfo); 10702797Sjg thread_exit(); 10712797Sjg } 10722797Sjg } 10732797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock); 10742797Sjg 10752797Sjg nvpbusy = 1; 10762797Sjg want_wakeup = 0; 10772797Sjg do_nvpflush = 0; 10782797Sjg mutex_exit(&nvpflush_lock); 10792797Sjg 10802797Sjg /* 10812797Sjg * Try flushing what's dirty, reschedule if there's 10822797Sjg * a failure or data gets marked as dirty again. 10832797Sjg * First move each file marked dirty to the dirty 10842797Sjg * list to avoid locking the list across the write. 10852797Sjg */ 10862797Sjg mutex_enter(&nvf_cache_mutex); 10872797Sjg for (nvfdp = list_head(&nvf_cache_files); 10882797Sjg nvfdp; nvfdp = nextfdp) { 10892797Sjg nextfdp = list_next(&nvf_cache_files, nvfdp); 10902797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 10912797Sjg if (NVF_IS_DIRTY(nvfdp)) { 10922797Sjg list_remove(&nvf_cache_files, nvfdp); 10932797Sjg list_insert_tail(&nvf_dirty_files, nvfdp); 10942797Sjg rw_exit(&nvfdp->nvf_lock); 10952797Sjg } else { 10962797Sjg NVPDAEMON_DEBUG((CE_CONT, 10972797Sjg "nvpdaemon: not dirty %s\n", 10982797Sjg nvfdp->nvf_cache_path)); 10992797Sjg rw_exit(&nvfdp->nvf_lock); 11002797Sjg } 11012797Sjg } 11022797Sjg mutex_exit(&nvf_cache_mutex); 11032797Sjg 11042797Sjg /* 11052797Sjg * Now go through the dirty list 11062797Sjg */ 11072797Sjg for (nvfdp = list_head(&nvf_dirty_files); 11082797Sjg nvfdp; nvfdp = nextfdp) { 11092797Sjg nextfdp = list_next(&nvf_dirty_files, nvfdp); 11102797Sjg 11112797Sjg is_now_clean = 0; 11122797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 11132797Sjg if (NVF_IS_DIRTY(nvfdp)) { 11142797Sjg NVPDAEMON_DEBUG((CE_CONT, 11152797Sjg "nvpdaemon: flush %s\n", 11162797Sjg nvfdp->nvf_cache_path)); 11172797Sjg rw_exit(&nvfdp->nvf_lock); 11182797Sjg rval = nvpflush_one(nvfdp); 11192797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER); 11202797Sjg if (rval != DDI_SUCCESS || 11212797Sjg NVF_IS_DIRTY(nvfdp)) { 11222797Sjg rw_exit(&nvfdp->nvf_lock); 11232797Sjg NVPDAEMON_DEBUG((CE_CONT, 11242797Sjg "nvpdaemon: %s dirty again\n", 11252797Sjg nvfdp->nvf_cache_path)); 11262797Sjg want_wakeup = 1; 11272797Sjg } else { 11282797Sjg rw_exit(&nvfdp->nvf_lock); 11292797Sjg nvf_write_is_complete(nvfdp); 11302797Sjg is_now_clean = 1; 11312797Sjg } 11322797Sjg } else { 11332797Sjg NVPDAEMON_DEBUG((CE_CONT, 11342797Sjg "nvpdaemon: not dirty %s\n", 11352797Sjg nvfdp->nvf_cache_path)); 11362797Sjg rw_exit(&nvfdp->nvf_lock); 11372797Sjg is_now_clean = 1; 11382797Sjg } 11392797Sjg 11402797Sjg if (is_now_clean) { 11412797Sjg mutex_enter(&nvf_cache_mutex); 11422797Sjg list_remove(&nvf_dirty_files, nvfdp); 11432797Sjg list_insert_tail(&nvf_cache_files, 11442797Sjg nvfdp); 11452797Sjg mutex_exit(&nvf_cache_mutex); 11462797Sjg } 11472797Sjg } 11482797Sjg 11492797Sjg if (want_wakeup) 11502797Sjg nvf_wake_daemon(); 11512797Sjg 11522797Sjg mutex_enter(&nvpflush_lock); 11532797Sjg nvpbusy = 0; 11542797Sjg } 11552797Sjg } 1156