12797Sjg /*
22797Sjg * CDDL HEADER START
32797Sjg *
42797Sjg * The contents of this file are subject to the terms of the
52797Sjg * Common Development and Distribution License (the "License").
62797Sjg * You may not use this file except in compliance with the License.
72797Sjg *
82797Sjg * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
92797Sjg * or http://www.opensolaris.org/os/licensing.
102797Sjg * See the License for the specific language governing permissions
112797Sjg * and limitations under the License.
122797Sjg *
132797Sjg * When distributing Covered Code, include this CDDL HEADER in each
142797Sjg * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
152797Sjg * If applicable, add the following below this CDDL HEADER, with the
162797Sjg * fields enclosed by brackets "[]" replaced with your own identifying
172797Sjg * information: Portions Copyright [yyyy] [name of copyright owner]
182797Sjg *
192797Sjg * CDDL HEADER END
202797Sjg */
212797Sjg /*
22*11066Srafael.vanoni@sun.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
232797Sjg * Use is subject to license terms.
242797Sjg */
252797Sjg
262797Sjg #include <sys/note.h>
272797Sjg #include <sys/t_lock.h>
282797Sjg #include <sys/cmn_err.h>
292797Sjg #include <sys/instance.h>
302797Sjg #include <sys/conf.h>
312797Sjg #include <sys/stat.h>
322797Sjg #include <sys/ddi.h>
332797Sjg #include <sys/hwconf.h>
342797Sjg #include <sys/sunddi.h>
352797Sjg #include <sys/sunndi.h>
362797Sjg #include <sys/ddi_impldefs.h>
372797Sjg #include <sys/ndi_impldefs.h>
382797Sjg #include <sys/modctl.h>
392797Sjg #include <sys/dacf.h>
402797Sjg #include <sys/promif.h>
412797Sjg #include <sys/cpuvar.h>
422797Sjg #include <sys/pathname.h>
432797Sjg #include <sys/kobj.h>
442797Sjg #include <sys/devcache.h>
452797Sjg #include <sys/devcache_impl.h>
462797Sjg #include <sys/sysmacros.h>
472797Sjg #include <sys/varargs.h>
482797Sjg #include <sys/callb.h>
492797Sjg
502797Sjg /*
512797Sjg * This facility provides interfaces to clients to register,
522797Sjg * read and update cache data in persisted backing store files,
532797Sjg * usually in /etc/devices. The data persisted through this
542797Sjg * mechanism should be stateless data, functioning in the sense
552797Sjg * of a cache. Writes are performed by a background daemon
562797Sjg * thread, permitting a client to schedule an update without
572797Sjg * blocking, then continue updating the data state in
582797Sjg * parallel. The data is only locked by the daemon thread
592797Sjg * to pack the data in preparation for the write.
602797Sjg *
612797Sjg * Data persisted through this mechanism should be capable
622797Sjg * of being regenerated through normal system operation,
632797Sjg * for example attaching all disk devices would cause all
642797Sjg * devids to be registered for those devices. By caching
652797Sjg * a devid-device tuple, the system can operate in a
662797Sjg * more optimal way, directly attaching the device mapped
672797Sjg * to a devid, rather than burdensomely driving attach of
682797Sjg * the entire device tree to discover a single device.
692797Sjg *
702797Sjg * Note that a client should only need to include
712797Sjg * <sys/devcache.h> for the supported interfaces.
722797Sjg *
732797Sjg * The data per client is entirely within the control of
742797Sjg * the client. When reading, data unpacked from the backing
752797Sjg * store should be inserted in the list. The pointer to
765331Samw * the list can be retrieved via nvf_list(). When writing,
772797Sjg * the data on the list is to be packed and returned to the
782797Sjg * nvpdaemon as an nvlist.
792797Sjg *
802797Sjg * Obvious restrictions are imposed by the limits of the
812797Sjg * nvlist format. The data cannot be read or written
822797Sjg * piecemeal, and large amounts of data aren't recommended.
832797Sjg * However, nvlists do allow that data be named and typed
842797Sjg * and can be size-of-int invariant, and the cached data
852797Sjg * can be versioned conveniently.
862797Sjg *
872797Sjg * The registration involves two steps: a handle is
882797Sjg * allocated by calling the registration function.
892797Sjg * This sets up the data referenced by the handle and
902797Sjg * initializes the lock. Following registration, the
912797Sjg * client must initialize the data list. The list
922797Sjg * interfaces require that the list element with offset
932797Sjg * to the node link be provided. The format of the
942797Sjg * list element is under the control of the client.
952797Sjg *
962797Sjg * Locking: the address of the data list r/w lock provided
972797Sjg * can be accessed with nvf_lock(). The lock must be held
982797Sjg * as reader when traversing the list or checking state,
992797Sjg * such as nvf_is_dirty(). The lock must be held as
1002797Sjg * writer when updating the list or marking it dirty.
1012797Sjg * The lock must not be held when waking the daemon.
1022797Sjg *
1032797Sjg * The data r/w lock is held as writer when the pack,
1042797Sjg * unpack and free list handlers are called. The
1052797Sjg * lock should not be dropped and must be still held
1062797Sjg * upon return. The client should also hold the lock
1072797Sjg * as reader when checking if the list is dirty, and
1082797Sjg * as writer when marking the list dirty or initiating
1092797Sjg * a read.
1102797Sjg *
1112797Sjg * The asynchronous nature of updates allows for the
1122797Sjg * possibility that the data may continue to be updated
1132797Sjg * once the daemon has been notified that an update is
1142797Sjg * desired. The data only needs to be locked against
1152797Sjg * updates when packing the data into the form to be
1162797Sjg * written. When the write of the packed data has
1172797Sjg * completed, the daemon will automatically reschedule
1182797Sjg * an update if the data was marked dirty after the
1192797Sjg * point at which it was packed. Before beginning an
1202797Sjg * update, the daemon attempts to lock the data as
1212797Sjg * writer; if the writer lock is already held, it
1222797Sjg * backs off and retries later. The model is to give
1232797Sjg * priority to the kernel processes generating the
1242797Sjg * data, and that the nature of the data is that
1252797Sjg * it does not change often, can be re-generated when
1262797Sjg * needed, so updates should not happen often and
1272797Sjg * can be delayed until the data stops changing.
1282797Sjg * The client may update the list or mark it dirty
1292797Sjg * any time it is able to acquire the lock as
1302797Sjg * writer first.
1312797Sjg *
1322797Sjg * A failed write will be retried after some delay,
1332797Sjg * in the hope that the cause of the error will be
1342797Sjg * transient, for example a filesystem with no space
1352797Sjg * available. An update on a read-only filesystem
1362797Sjg * is failed silently and not retried; this would be
1372797Sjg * the case when booted off install media.
1382797Sjg *
1392797Sjg * There is no unregister mechanism as of yet, as it
1402797Sjg * hasn't been needed so far.
1412797Sjg */
1422797Sjg
1432797Sjg /*
1442797Sjg * Global list of files registered and updated by the nvpflush
1452797Sjg * daemon, protected by the nvf_cache_mutex. While an
1462797Sjg * update is taking place, a file is temporarily moved to
1472797Sjg * the dirty list to avoid locking the primary list for
1482797Sjg * the duration of the update.
1492797Sjg */
1502797Sjg list_t nvf_cache_files;
1512797Sjg list_t nvf_dirty_files;
1522797Sjg kmutex_t nvf_cache_mutex;
1532797Sjg
1542797Sjg
1552797Sjg /*
1562797Sjg * Allow some delay from an update of the data before flushing
1572797Sjg * to permit simultaneous updates of multiple changes.
1582797Sjg * Changes in the data are expected to be bursty, ie
1592797Sjg * reconfig or hot-plug of a new adapter.
1602797Sjg *
1612797Sjg * kfio_report_error (default 0)
1622797Sjg * Set to 1 to enable some error messages related to low-level
1632797Sjg * kernel file i/o operations.
1642797Sjg *
1652797Sjg * nvpflush_delay (default 10)
1662797Sjg * The number of seconds after data is marked dirty before the
1672797Sjg * flush daemon is triggered to flush the data. A longer period
1682797Sjg * of time permits more data updates per write. Note that
1692797Sjg * every update resets the timer so no repository write will
1702797Sjg * occur while data is being updated continuously.
1712797Sjg *
1722797Sjg * nvpdaemon_idle_time (default 60)
1732797Sjg * The number of seconds the daemon will sleep idle before exiting.
1742797Sjg *
1752797Sjg */
1762797Sjg #define NVPFLUSH_DELAY 10
1772797Sjg #define NVPDAEMON_IDLE_TIME 60
1782797Sjg
1792797Sjg #define TICKS_PER_SECOND (drv_usectohz(1000000))
1802797Sjg
1812797Sjg /*
1822797Sjg * Tunables
1832797Sjg */
1842797Sjg int kfio_report_error = 0; /* kernel file i/o operations */
1852797Sjg int kfio_disable_read = 0; /* disable all reads */
1862797Sjg int kfio_disable_write = 0; /* disable all writes */
1872797Sjg
1882797Sjg int nvpflush_delay = NVPFLUSH_DELAY;
1892797Sjg int nvpdaemon_idle_time = NVPDAEMON_IDLE_TIME;
1902797Sjg
1912797Sjg static timeout_id_t nvpflush_id = 0;
1922797Sjg static int nvpflush_timer_busy = 0;
1932797Sjg static int nvpflush_daemon_active = 0;
1942797Sjg static kthread_t *nvpflush_thr_id = 0;
1952797Sjg
1962797Sjg static int do_nvpflush = 0;
1972797Sjg static int nvpbusy = 0;
1982797Sjg static kmutex_t nvpflush_lock;
1992797Sjg static kcondvar_t nvpflush_cv;
2002797Sjg static kthread_id_t nvpflush_thread;
2012797Sjg static clock_t nvpticks;
2022797Sjg
2032797Sjg static void nvpflush_daemon(void);
2042797Sjg
2052797Sjg #ifdef DEBUG
2062797Sjg int nvpdaemon_debug = 0;
2072797Sjg int kfio_debug = 0;
2082797Sjg #endif /* DEBUG */
2092797Sjg
2102797Sjg extern int modrootloaded;
2112797Sjg extern void mdi_read_devices_files(void);
2122797Sjg extern void mdi_clean_vhcache(void);
2137576SJerry.Gilliam@Sun.COM extern int sys_shutdown;
2142797Sjg
2152797Sjg /*
2162797Sjg * Initialize the overall cache file management
2172797Sjg */
2182797Sjg void
i_ddi_devices_init(void)2192797Sjg i_ddi_devices_init(void)
2202797Sjg {
2212797Sjg list_create(&nvf_cache_files, sizeof (nvfd_t),
2222797Sjg offsetof(nvfd_t, nvf_link));
2232797Sjg list_create(&nvf_dirty_files, sizeof (nvfd_t),
2242797Sjg offsetof(nvfd_t, nvf_link));
2252797Sjg mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL);
2264845Svikram retire_store_init();
2272797Sjg devid_cache_init();
2282797Sjg }
2292797Sjg
2302797Sjg /*
2312797Sjg * Read cache files
2322797Sjg * The files read here should be restricted to those
2332797Sjg * that may be required to mount root.
2342797Sjg */
2352797Sjg void
i_ddi_read_devices_files(void)2362797Sjg i_ddi_read_devices_files(void)
2372797Sjg {
2384845Svikram /*
2394845Svikram * The retire store should be the first file read as it
2404845Svikram * may need to offline devices. kfio_disable_read is not
2414845Svikram * used for retire. For the rationale see the tunable
2424845Svikram * ddi_retire_store_bypass and comments in:
2434845Svikram * uts/common/os/retire_store.c
2444845Svikram */
2454845Svikram
2464845Svikram retire_store_read();
2474845Svikram
2482797Sjg if (!kfio_disable_read) {
2492797Sjg mdi_read_devices_files();
2502797Sjg devid_cache_read();
2512797Sjg }
2522797Sjg }
2532797Sjg
2542797Sjg void
i_ddi_start_flush_daemon(void)2552797Sjg i_ddi_start_flush_daemon(void)
2562797Sjg {
2572797Sjg nvfd_t *nvfdp;
2582797Sjg
2592797Sjg ASSERT(i_ddi_io_initialized());
2602797Sjg
2612797Sjg mutex_init(&nvpflush_lock, NULL, MUTEX_DRIVER, NULL);
2622797Sjg cv_init(&nvpflush_cv, NULL, CV_DRIVER, NULL);
2632797Sjg
2642797Sjg mutex_enter(&nvf_cache_mutex);
2652797Sjg for (nvfdp = list_head(&nvf_cache_files); nvfdp;
2662797Sjg nvfdp = list_next(&nvf_cache_files, nvfdp)) {
2672797Sjg if (NVF_IS_DIRTY(nvfdp)) {
2682797Sjg nvf_wake_daemon();
2692797Sjg break;
2702797Sjg }
2712797Sjg }
2722797Sjg mutex_exit(&nvf_cache_mutex);
2732797Sjg }
2742797Sjg
2752797Sjg void
i_ddi_clean_devices_files(void)2762797Sjg i_ddi_clean_devices_files(void)
2772797Sjg {
2782797Sjg devid_cache_cleanup();
2792797Sjg mdi_clean_vhcache();
2802797Sjg }
2812797Sjg
2822797Sjg /*
2832797Sjg * Register a cache file to be managed and updated by the nvpflush daemon.
2842797Sjg * All operations are performed through the returned handle.
2852797Sjg * There is no unregister mechanism for now.
2862797Sjg */
2872797Sjg nvf_handle_t
nvf_register_file(nvf_ops_t * ops)2882797Sjg nvf_register_file(nvf_ops_t *ops)
2892797Sjg {
2902797Sjg nvfd_t *nvfdp;
2912797Sjg
2922797Sjg nvfdp = kmem_zalloc(sizeof (*nvfdp), KM_SLEEP);
2932797Sjg
2942797Sjg nvfdp->nvf_ops = ops;
2952797Sjg nvfdp->nvf_flags = 0;
2962797Sjg rw_init(&nvfdp->nvf_lock, NULL, RW_DRIVER, NULL);
2972797Sjg
2982797Sjg mutex_enter(&nvf_cache_mutex);
2992797Sjg list_insert_tail(&nvf_cache_files, nvfdp);
3002797Sjg mutex_exit(&nvf_cache_mutex);
3012797Sjg
3022797Sjg return ((nvf_handle_t)nvfdp);
3032797Sjg }
3042797Sjg
3052797Sjg /*PRINTFLIKE1*/
3062797Sjg void
nvf_error(const char * fmt,...)3072797Sjg nvf_error(const char *fmt, ...)
3082797Sjg {
3092797Sjg va_list ap;
3102797Sjg
3112797Sjg if (kfio_report_error) {
3122797Sjg va_start(ap, fmt);
3132797Sjg vcmn_err(CE_NOTE, fmt, ap);
3142797Sjg va_end(ap);
3152797Sjg }
3162797Sjg }
3172797Sjg
3182797Sjg /*
3192797Sjg * Some operations clients may use to manage the data
3202797Sjg * to be persisted in a cache file.
3212797Sjg */
3222797Sjg char *
nvf_cache_name(nvf_handle_t handle)3232797Sjg nvf_cache_name(nvf_handle_t handle)
3242797Sjg {
3252797Sjg return (((nvfd_t *)handle)->nvf_cache_path);
3262797Sjg }
3272797Sjg
3282797Sjg krwlock_t *
nvf_lock(nvf_handle_t handle)3292797Sjg nvf_lock(nvf_handle_t handle)
3302797Sjg {
3312797Sjg return (&(((nvfd_t *)handle)->nvf_lock));
3322797Sjg }
3332797Sjg
3342797Sjg list_t *
nvf_list(nvf_handle_t handle)3352797Sjg nvf_list(nvf_handle_t handle)
3362797Sjg {
3372797Sjg return (&(((nvfd_t *)handle)->nvf_data_list));
3382797Sjg }
3392797Sjg
3402797Sjg void
nvf_mark_dirty(nvf_handle_t handle)3412797Sjg nvf_mark_dirty(nvf_handle_t handle)
3422797Sjg {
3432797Sjg ASSERT(RW_WRITE_HELD(&(((nvfd_t *)handle)->nvf_lock)));
3442797Sjg NVF_MARK_DIRTY((nvfd_t *)handle);
3452797Sjg }
3462797Sjg
3472797Sjg int
nvf_is_dirty(nvf_handle_t handle)3482797Sjg nvf_is_dirty(nvf_handle_t handle)
3492797Sjg {
3502797Sjg ASSERT(RW_LOCK_HELD(&(((nvfd_t *)handle)->nvf_lock)));
3512797Sjg return (NVF_IS_DIRTY((nvfd_t *)handle));
3522797Sjg }
3532797Sjg
3542797Sjg static uint16_t
nvp_cksum(uchar_t * buf,int64_t buflen)3552797Sjg nvp_cksum(uchar_t *buf, int64_t buflen)
3562797Sjg {
3572797Sjg uint16_t cksum = 0;
3582797Sjg uint16_t *p = (uint16_t *)buf;
3592797Sjg int64_t n;
3602797Sjg
3612797Sjg if ((buflen & 0x01) != 0) {
3622797Sjg buflen--;
3632797Sjg cksum = buf[buflen];
3642797Sjg }
3652797Sjg n = buflen / 2;
3662797Sjg while (n-- > 0)
3672797Sjg cksum ^= *p++;
3682797Sjg return (cksum);
3692797Sjg }
3702797Sjg
3712797Sjg int
fread_nvlist(char * filename,nvlist_t ** ret_nvlist)3722797Sjg fread_nvlist(char *filename, nvlist_t **ret_nvlist)
3732797Sjg {
3742797Sjg struct _buf *file;
3752797Sjg nvpf_hdr_t hdr;
3762797Sjg char *buf;
3772797Sjg nvlist_t *nvl;
3782797Sjg int rval;
3792797Sjg uint_t offset;
3802797Sjg int n;
3812797Sjg char c;
3822797Sjg uint16_t cksum, hdrsum;
3832797Sjg
3842797Sjg *ret_nvlist = NULL;
3852797Sjg
3862797Sjg file = kobj_open_file(filename);
3872797Sjg if (file == (struct _buf *)-1) {
3882797Sjg KFDEBUG((CE_CONT, "cannot open file: %s\n", filename));
3892797Sjg return (ENOENT);
3902797Sjg }
3912797Sjg
3922797Sjg offset = 0;
3932797Sjg n = kobj_read_file(file, (char *)&hdr, sizeof (hdr), offset);
3942797Sjg if (n != sizeof (hdr)) {
3952797Sjg kobj_close_file(file);
3962797Sjg if (n < 0) {
3972797Sjg nvf_error("error reading header: %s\n", filename);
3982797Sjg return (EIO);
3992797Sjg } else if (n == 0) {
4002797Sjg KFDEBUG((CE_CONT, "file empty: %s\n", filename));
4012797Sjg } else {
4022797Sjg nvf_error("header size incorrect: %s\n", filename);
4032797Sjg }
4042797Sjg return (EINVAL);
4052797Sjg }
4062797Sjg offset += n;
4072797Sjg
4082797Sjg KFDEBUG2((CE_CONT, "nvpf_magic: 0x%x\n", hdr.nvpf_magic));
4092797Sjg KFDEBUG2((CE_CONT, "nvpf_version: %d\n", hdr.nvpf_version));
4102797Sjg KFDEBUG2((CE_CONT, "nvpf_size: %lld\n",
4117576SJerry.Gilliam@Sun.COM (longlong_t)hdr.nvpf_size));
4122797Sjg KFDEBUG2((CE_CONT, "nvpf_hdr_chksum: 0x%x\n",
4137576SJerry.Gilliam@Sun.COM hdr.nvpf_hdr_chksum));
4142797Sjg KFDEBUG2((CE_CONT, "nvpf_chksum: 0x%x\n", hdr.nvpf_chksum));
4152797Sjg
4162797Sjg cksum = hdr.nvpf_hdr_chksum;
4172797Sjg hdr.nvpf_hdr_chksum = 0;
4182797Sjg hdrsum = nvp_cksum((uchar_t *)&hdr, sizeof (hdr));
4192797Sjg
4202797Sjg if (hdr.nvpf_magic != NVPF_HDR_MAGIC ||
4212797Sjg hdr.nvpf_version != NVPF_HDR_VERSION || hdrsum != cksum) {
4222797Sjg kobj_close_file(file);
4232797Sjg if (hdrsum != cksum) {
4242797Sjg nvf_error("%s: checksum error "
4252797Sjg "(actual 0x%x, expected 0x%x)\n",
4262797Sjg filename, hdrsum, cksum);
4272797Sjg }
4282797Sjg nvf_error("%s: header information incorrect", filename);
4292797Sjg return (EINVAL);
4302797Sjg }
4312797Sjg
4322797Sjg ASSERT(hdr.nvpf_size >= 0);
4332797Sjg
4342797Sjg buf = kmem_alloc(hdr.nvpf_size, KM_SLEEP);
4352797Sjg n = kobj_read_file(file, buf, hdr.nvpf_size, offset);
4362797Sjg if (n != hdr.nvpf_size) {
4372797Sjg kmem_free(buf, hdr.nvpf_size);
4382797Sjg kobj_close_file(file);
4392797Sjg if (n < 0) {
4402797Sjg nvf_error("%s: read error %d", filename, n);
4412797Sjg } else {
4422797Sjg nvf_error("%s: incomplete read %d/%lld",
4437576SJerry.Gilliam@Sun.COM filename, n, (longlong_t)hdr.nvpf_size);
4442797Sjg }
4452797Sjg return (EINVAL);
4462797Sjg }
4472797Sjg offset += n;
4482797Sjg
4492797Sjg rval = kobj_read_file(file, &c, 1, offset);
4502797Sjg kobj_close_file(file);
4512797Sjg if (rval > 0) {
4522797Sjg nvf_error("%s is larger than %lld\n",
4537576SJerry.Gilliam@Sun.COM filename, (longlong_t)hdr.nvpf_size);
4542797Sjg kmem_free(buf, hdr.nvpf_size);
4552797Sjg return (EINVAL);
4562797Sjg }
4572797Sjg
4582797Sjg cksum = nvp_cksum((uchar_t *)buf, hdr.nvpf_size);
4592797Sjg if (hdr.nvpf_chksum != cksum) {
4602797Sjg nvf_error("%s: checksum error (actual 0x%x, expected 0x%x)\n",
4612797Sjg filename, hdr.nvpf_chksum, cksum);
4622797Sjg kmem_free(buf, hdr.nvpf_size);
4632797Sjg return (EINVAL);
4642797Sjg }
4652797Sjg
4662797Sjg nvl = NULL;
4672797Sjg rval = nvlist_unpack(buf, hdr.nvpf_size, &nvl, 0);
4682797Sjg if (rval != 0) {
4692797Sjg nvf_error("%s: error %d unpacking nvlist\n",
4707576SJerry.Gilliam@Sun.COM filename, rval);
4712797Sjg kmem_free(buf, hdr.nvpf_size);
4722797Sjg return (EINVAL);
4732797Sjg }
4742797Sjg
4752797Sjg kmem_free(buf, hdr.nvpf_size);
4762797Sjg *ret_nvlist = nvl;
4772797Sjg return (0);
4782797Sjg }
4792797Sjg
4802797Sjg static int
kfcreate(char * filename,kfile_t ** kfilep)4812797Sjg kfcreate(char *filename, kfile_t **kfilep)
4822797Sjg {
4832797Sjg kfile_t *fp;
4842797Sjg int rval;
4852797Sjg
4862797Sjg ASSERT(modrootloaded);
4872797Sjg
4882797Sjg fp = kmem_alloc(sizeof (kfile_t), KM_SLEEP);
4892797Sjg
4902797Sjg fp->kf_vnflags = FCREAT | FWRITE | FTRUNC;
4912797Sjg fp->kf_fname = filename;
4922797Sjg fp->kf_fpos = 0;
4932797Sjg fp->kf_state = 0;
4942797Sjg
4952797Sjg KFDEBUG((CE_CONT, "create: %s flags 0x%x\n",
4967576SJerry.Gilliam@Sun.COM filename, fp->kf_vnflags));
4972797Sjg rval = vn_open(filename, UIO_SYSSPACE, fp->kf_vnflags,
4982797Sjg 0444, &fp->kf_vp, CRCREAT, 0);
4992797Sjg if (rval != 0) {
5002797Sjg kmem_free(fp, sizeof (kfile_t));
5012797Sjg KFDEBUG((CE_CONT, "%s: create error %d\n",
5027576SJerry.Gilliam@Sun.COM filename, rval));
5032797Sjg return (rval);
5042797Sjg }
5052797Sjg
5062797Sjg *kfilep = fp;
5072797Sjg return (0);
5082797Sjg }
5092797Sjg
5102797Sjg static int
kfremove(char * filename)5112797Sjg kfremove(char *filename)
5122797Sjg {
5132797Sjg int rval;
5142797Sjg
5152797Sjg KFDEBUG((CE_CONT, "remove: %s\n", filename));
5162797Sjg rval = vn_remove(filename, UIO_SYSSPACE, RMFILE);
5172797Sjg if (rval != 0) {
5182797Sjg KFDEBUG((CE_CONT, "%s: remove error %d\n",
5197576SJerry.Gilliam@Sun.COM filename, rval));
5202797Sjg }
5212797Sjg return (rval);
5222797Sjg }
5232797Sjg
5242797Sjg static int
kfread(kfile_t * fp,char * buf,ssize_t bufsiz,ssize_t * ret_n)5252797Sjg kfread(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
5262797Sjg {
5272797Sjg ssize_t resid;
5282797Sjg int err;
5292797Sjg ssize_t n;
5302797Sjg
5312797Sjg ASSERT(modrootloaded);
5322797Sjg
5332797Sjg if (fp->kf_state != 0)
5342797Sjg return (fp->kf_state);
5352797Sjg
5362797Sjg err = vn_rdwr(UIO_READ, fp->kf_vp, buf, bufsiz, fp->kf_fpos,
5377576SJerry.Gilliam@Sun.COM UIO_SYSSPACE, 0, (rlim64_t)0, kcred, &resid);
5382797Sjg if (err != 0) {
5392797Sjg KFDEBUG((CE_CONT, "%s: read error %d\n",
5407576SJerry.Gilliam@Sun.COM fp->kf_fname, err));
5412797Sjg fp->kf_state = err;
5422797Sjg return (err);
5432797Sjg }
5442797Sjg
5452797Sjg ASSERT(resid >= 0 && resid <= bufsiz);
5462797Sjg n = bufsiz - resid;
5472797Sjg
5482797Sjg KFDEBUG1((CE_CONT, "%s: read %ld bytes ok %ld bufsiz, %ld resid\n",
5497576SJerry.Gilliam@Sun.COM fp->kf_fname, n, bufsiz, resid));
5502797Sjg
5512797Sjg fp->kf_fpos += n;
5522797Sjg *ret_n = n;
5532797Sjg return (0);
5542797Sjg }
5552797Sjg
5562797Sjg static int
kfwrite(kfile_t * fp,char * buf,ssize_t bufsiz,ssize_t * ret_n)5572797Sjg kfwrite(kfile_t *fp, char *buf, ssize_t bufsiz, ssize_t *ret_n)
5582797Sjg {
5592797Sjg rlim64_t rlimit;
5602797Sjg ssize_t resid;
5612797Sjg int err;
5622797Sjg ssize_t len;
5632797Sjg ssize_t n = 0;
5642797Sjg
5652797Sjg ASSERT(modrootloaded);
5662797Sjg
5672797Sjg if (fp->kf_state != 0)
5682797Sjg return (fp->kf_state);
5692797Sjg
5702797Sjg len = bufsiz;
5712797Sjg rlimit = bufsiz + 1;
5722797Sjg for (;;) {
5732797Sjg err = vn_rdwr(UIO_WRITE, fp->kf_vp, buf, len, fp->kf_fpos,
5747576SJerry.Gilliam@Sun.COM UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
5752797Sjg if (err) {
5762797Sjg KFDEBUG((CE_CONT, "%s: write error %d\n",
5777576SJerry.Gilliam@Sun.COM fp->kf_fname, err));
5782797Sjg fp->kf_state = err;
5792797Sjg return (err);
5802797Sjg }
5812797Sjg
5822797Sjg KFDEBUG1((CE_CONT, "%s: write %ld bytes ok %ld resid\n",
5837576SJerry.Gilliam@Sun.COM fp->kf_fname, len-resid, resid));
5842797Sjg
5852797Sjg ASSERT(resid >= 0 && resid <= len);
5862797Sjg
5872797Sjg n += (len - resid);
5882797Sjg if (resid == 0)
5892797Sjg break;
5902797Sjg
5912797Sjg if (resid == len) {
5922797Sjg KFDEBUG((CE_CONT, "%s: filesystem full?\n",
5937576SJerry.Gilliam@Sun.COM fp->kf_fname));
5942797Sjg fp->kf_state = ENOSPC;
5952797Sjg return (ENOSPC);
5962797Sjg }
5972797Sjg
5982797Sjg len -= resid;
5992797Sjg buf += len;
6002797Sjg fp->kf_fpos += len;
6012797Sjg len = resid;
6022797Sjg }
6032797Sjg
6042797Sjg ASSERT(n == bufsiz);
6052797Sjg KFDEBUG1((CE_CONT, "%s: wrote %ld bytes ok\n", fp->kf_fname, n));
6062797Sjg
6072797Sjg *ret_n = n;
6082797Sjg return (0);
6092797Sjg }
6102797Sjg
6112797Sjg
6122797Sjg static int
kfclose(kfile_t * fp)6132797Sjg kfclose(kfile_t *fp)
6142797Sjg {
6152797Sjg int rval;
6162797Sjg
6172797Sjg KFDEBUG((CE_CONT, "close: %s\n", fp->kf_fname));
6182797Sjg
6192797Sjg if ((fp->kf_vnflags & FWRITE) && fp->kf_state == 0) {
6205331Samw rval = VOP_FSYNC(fp->kf_vp, FSYNC, kcred, NULL);
6212797Sjg if (rval != 0) {
6222797Sjg nvf_error("%s: sync error %d\n",
6237576SJerry.Gilliam@Sun.COM fp->kf_fname, rval);
6242797Sjg }
6252797Sjg KFDEBUG((CE_CONT, "%s: sync ok\n", fp->kf_fname));
6262797Sjg }
6272797Sjg
6287576SJerry.Gilliam@Sun.COM rval = VOP_CLOSE(fp->kf_vp, fp->kf_vnflags, 1,
6297576SJerry.Gilliam@Sun.COM (offset_t)0, kcred, NULL);
6302797Sjg if (rval != 0) {
6312797Sjg if (fp->kf_state == 0) {
6322797Sjg nvf_error("%s: close error %d\n",
6337576SJerry.Gilliam@Sun.COM fp->kf_fname, rval);
6342797Sjg }
6352797Sjg } else {
6362797Sjg if (fp->kf_state == 0)
6372797Sjg KFDEBUG((CE_CONT, "%s: close ok\n", fp->kf_fname));
6382797Sjg }
6392797Sjg
6402797Sjg VN_RELE(fp->kf_vp);
6412797Sjg kmem_free(fp, sizeof (kfile_t));
6422797Sjg return (rval);
6432797Sjg }
6442797Sjg
6452797Sjg static int
kfrename(char * oldname,char * newname)6462797Sjg kfrename(char *oldname, char *newname)
6472797Sjg {
6482797Sjg int rval;
6492797Sjg
6502797Sjg ASSERT(modrootloaded);
6512797Sjg
6522797Sjg KFDEBUG((CE_CONT, "renaming %s to %s\n", oldname, newname));
6532797Sjg
6542797Sjg if ((rval = vn_rename(oldname, newname, UIO_SYSSPACE)) != 0) {
6552797Sjg KFDEBUG((CE_CONT, "rename %s to %s: %d\n",
6567576SJerry.Gilliam@Sun.COM oldname, newname, rval));
6572797Sjg }
6582797Sjg
6592797Sjg return (rval);
6602797Sjg }
6612797Sjg
6622797Sjg int
fwrite_nvlist(char * filename,nvlist_t * nvl)6632797Sjg fwrite_nvlist(char *filename, nvlist_t *nvl)
6642797Sjg {
6652797Sjg char *buf;
6662797Sjg char *nvbuf;
6672797Sjg kfile_t *fp;
6682797Sjg char *newname;
6692797Sjg int len, err, err1;
6702797Sjg size_t buflen;
6712797Sjg ssize_t n;
6722797Sjg
6732797Sjg ASSERT(modrootloaded);
6742797Sjg
6752797Sjg nvbuf = NULL;
6762797Sjg err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0);
6772797Sjg if (err != 0) {
6782797Sjg nvf_error("%s: error %d packing nvlist\n",
6797576SJerry.Gilliam@Sun.COM filename, err);
6802797Sjg return (err);
6812797Sjg }
6822797Sjg
6832797Sjg buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP);
6842797Sjg bzero(buf, sizeof (nvpf_hdr_t));
6852797Sjg
6862797Sjg ((nvpf_hdr_t *)buf)->nvpf_magic = NVPF_HDR_MAGIC;
6872797Sjg ((nvpf_hdr_t *)buf)->nvpf_version = NVPF_HDR_VERSION;
6882797Sjg ((nvpf_hdr_t *)buf)->nvpf_size = buflen;
6892797Sjg ((nvpf_hdr_t *)buf)->nvpf_chksum = nvp_cksum((uchar_t *)nvbuf, buflen);
6902797Sjg ((nvpf_hdr_t *)buf)->nvpf_hdr_chksum =
6917576SJerry.Gilliam@Sun.COM nvp_cksum((uchar_t *)buf, sizeof (nvpf_hdr_t));
6922797Sjg
6932797Sjg bcopy(nvbuf, buf + sizeof (nvpf_hdr_t), buflen);
6942797Sjg kmem_free(nvbuf, buflen);
6952797Sjg buflen += sizeof (nvpf_hdr_t);
6962797Sjg
6972797Sjg len = strlen(filename) + MAX_SUFFIX_LEN + 2;
6982797Sjg newname = kmem_alloc(len, KM_SLEEP);
6992797Sjg
7002797Sjg
7017576SJerry.Gilliam@Sun.COM (void) sprintf(newname, "%s.%s", filename, NEW_FILENAME_SUFFIX);
7022797Sjg
7032797Sjg /*
7042797Sjg * To make it unlikely we suffer data loss, write
7052797Sjg * data to the new temporary file. Once successful
7062797Sjg * complete the transaction by renaming the new file
7072797Sjg * to replace the previous.
7082797Sjg */
7092797Sjg
7102797Sjg if ((err = kfcreate(newname, &fp)) == 0) {
7112797Sjg err = kfwrite(fp, buf, buflen, &n);
7122797Sjg if (err) {
7132797Sjg nvf_error("%s: write error - %d\n",
7147576SJerry.Gilliam@Sun.COM newname, err);
7152797Sjg } else {
7162797Sjg if (n != buflen) {
7172797Sjg nvf_error(
7182797Sjg "%s: partial write %ld of %ld bytes\n",
7192797Sjg newname, n, buflen);
7202797Sjg nvf_error("%s: filesystem may be full?\n",
7212797Sjg newname);
7222797Sjg err = EIO;
7232797Sjg }
7242797Sjg }
7252797Sjg if ((err1 = kfclose(fp)) != 0) {
7262797Sjg nvf_error("%s: close error\n", newname);
7272797Sjg if (err == 0)
7282797Sjg err = err1;
7292797Sjg }
7302797Sjg if (err != 0) {
7312797Sjg if (kfremove(newname) != 0) {
7322797Sjg nvf_error("%s: remove failed\n",
7332797Sjg newname);
7342797Sjg }
7352797Sjg }
7362797Sjg } else {
7372797Sjg nvf_error("%s: create failed - %d\n", filename, err);
7382797Sjg }
7392797Sjg
7402797Sjg if (err == 0) {
7412797Sjg if ((err = kfrename(newname, filename)) != 0) {
7422797Sjg nvf_error("%s: rename from %s failed\n",
7437576SJerry.Gilliam@Sun.COM newname, filename);
7442797Sjg }
7452797Sjg }
7462797Sjg
7472797Sjg kmem_free(newname, len);
7482797Sjg kmem_free(buf, buflen);
7492797Sjg
7502797Sjg return (err);
7512797Sjg }
7522797Sjg
7532797Sjg static int
e_fwrite_nvlist(nvfd_t * nvfd,nvlist_t * nvl)7542797Sjg e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl)
7552797Sjg {
7562797Sjg int err;
7572797Sjg
7582797Sjg if ((err = fwrite_nvlist(nvfd->nvf_cache_path, nvl)) == 0)
7592797Sjg return (DDI_SUCCESS);
7602797Sjg else {
7612797Sjg if (err == EROFS)
7622797Sjg NVF_MARK_READONLY(nvfd);
7632797Sjg return (DDI_FAILURE);
7642797Sjg }
7652797Sjg }
7662797Sjg
7672797Sjg static void
nvp_list_free(nvfd_t * nvf)7682797Sjg nvp_list_free(nvfd_t *nvf)
7692797Sjg {
7702797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
7712797Sjg (nvf->nvf_list_free)((nvf_handle_t)nvf);
7722797Sjg ASSERT(RW_WRITE_HELD(&nvf->nvf_lock));
7732797Sjg }
7742797Sjg
7752797Sjg /*
7762797Sjg * Read a file in the nvlist format
7772797Sjg * EIO - i/o error during read
7782797Sjg * ENOENT - file not found
7792797Sjg * EINVAL - file contents corrupted
7802797Sjg */
7812797Sjg static int
fread_nvp_list(nvfd_t * nvfd)7822797Sjg fread_nvp_list(nvfd_t *nvfd)
7832797Sjg {
7842797Sjg nvlist_t *nvl;
7852797Sjg nvpair_t *nvp;
7862797Sjg char *name;
7872797Sjg nvlist_t *sublist;
7882797Sjg int rval;
7892797Sjg int rv;
7902797Sjg
7912797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
7922797Sjg
7932797Sjg rval = fread_nvlist(nvfd->nvf_cache_path, &nvl);
7942797Sjg if (rval != 0)
7952797Sjg return (rval);
7962797Sjg ASSERT(nvl != NULL);
7972797Sjg
7982797Sjg nvp = NULL;
7992797Sjg while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
8002797Sjg name = nvpair_name(nvp);
8012797Sjg ASSERT(strlen(name) > 0);
8022797Sjg
8032797Sjg switch (nvpair_type(nvp)) {
8042797Sjg case DATA_TYPE_NVLIST:
8052797Sjg rval = nvpair_value_nvlist(nvp, &sublist);
8062797Sjg if (rval != 0) {
8072797Sjg nvf_error(
8082797Sjg "nvpair_value_nvlist error %s %d\n",
8092797Sjg name, rval);
8102797Sjg goto error;
8112797Sjg }
8122797Sjg
8132797Sjg /*
8142797Sjg * unpack nvlist for this device and
8152797Sjg * add elements to data list.
8162797Sjg */
8172797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
8182797Sjg rv = (nvfd->nvf_unpack_nvlist)
8192797Sjg ((nvf_handle_t)nvfd, sublist, name);
8202797Sjg ASSERT(RW_WRITE_HELD(&(nvfd->nvf_lock)));
8212797Sjg if (rv != 0) {
8222797Sjg nvf_error(
8232797Sjg "%s: %s invalid list element\n",
8242797Sjg nvfd->nvf_cache_path, name);
8252797Sjg rval = EINVAL;
8262797Sjg goto error;
8272797Sjg }
8282797Sjg break;
8292797Sjg
8302797Sjg default:
8312797Sjg nvf_error("%s: %s unsupported data type %d\n",
8327576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path, name, nvpair_type(nvp));
8332797Sjg rval = EINVAL;
8342797Sjg goto error;
8352797Sjg }
8362797Sjg }
8372797Sjg
8382797Sjg nvlist_free(nvl);
8392797Sjg
8402797Sjg return (0);
8412797Sjg
8422797Sjg error:
8432797Sjg nvlist_free(nvl);
8442797Sjg nvp_list_free(nvfd);
8452797Sjg return (rval);
8462797Sjg }
8472797Sjg
8482797Sjg
8492797Sjg int
nvf_read_file(nvf_handle_t nvf_handle)8502797Sjg nvf_read_file(nvf_handle_t nvf_handle)
8512797Sjg {
8522797Sjg nvfd_t *nvfd = (nvfd_t *)nvf_handle;
8532797Sjg int rval;
8542797Sjg
8552797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
8562797Sjg
8572797Sjg if (kfio_disable_read)
8582797Sjg return (0);
8592797Sjg
8602797Sjg KFDEBUG((CE_CONT, "reading %s\n", nvfd->nvf_cache_path));
8612797Sjg
8622797Sjg rval = fread_nvp_list(nvfd);
8632797Sjg if (rval) {
8642797Sjg switch (rval) {
8652797Sjg case EIO:
8662797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
8672797Sjg cmn_err(CE_WARN, "%s: I/O error",
8687576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path);
8692797Sjg break;
8702797Sjg case ENOENT:
8712797Sjg nvfd->nvf_flags |= NVF_F_CREATE_MSG;
8722797Sjg nvf_error("%s: not found\n",
8737576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path);
8742797Sjg break;
8752797Sjg case EINVAL:
8762797Sjg default:
8772797Sjg nvfd->nvf_flags |= NVF_F_REBUILD_MSG;
8782797Sjg cmn_err(CE_WARN, "%s: data file corrupted",
8797576SJerry.Gilliam@Sun.COM nvfd->nvf_cache_path);
8802797Sjg break;
8812797Sjg }
8822797Sjg }
8832797Sjg return (rval);
8842797Sjg }
8852797Sjg
8862797Sjg static void
nvf_write_is_complete(nvfd_t * fd)8872797Sjg nvf_write_is_complete(nvfd_t *fd)
8882797Sjg {
8892797Sjg if (fd->nvf_write_complete) {
8902797Sjg (fd->nvf_write_complete)((nvf_handle_t)fd);
8912797Sjg }
8922797Sjg }
8932797Sjg
8942797Sjg /*ARGSUSED*/
8952797Sjg static void
nvpflush_timeout(void * arg)8962797Sjg nvpflush_timeout(void *arg)
8972797Sjg {
8982797Sjg clock_t nticks;
8992797Sjg
9002797Sjg mutex_enter(&nvpflush_lock);
9012797Sjg nticks = nvpticks - ddi_get_lbolt();
9022797Sjg if (nticks > 4) {
9032797Sjg nvpflush_timer_busy = 1;
9042797Sjg mutex_exit(&nvpflush_lock);
9052797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks);
9062797Sjg } else {
9072797Sjg do_nvpflush = 1;
9082797Sjg NVPDAEMON_DEBUG((CE_CONT, "signal nvpdaemon\n"));
9092797Sjg cv_signal(&nvpflush_cv);
9102797Sjg nvpflush_id = 0;
9112797Sjg nvpflush_timer_busy = 0;
9122797Sjg mutex_exit(&nvpflush_lock);
9132797Sjg }
9142797Sjg }
9152797Sjg
9162797Sjg /*
9172797Sjg * After marking a list as dirty, wake the nvpflush daemon
9182797Sjg * to perform the update.
9192797Sjg */
9202797Sjg void
nvf_wake_daemon(void)9212797Sjg nvf_wake_daemon(void)
9222797Sjg {
9232797Sjg clock_t nticks;
9242797Sjg
9252797Sjg /*
9267576SJerry.Gilliam@Sun.COM * If the system isn't up yet or is shutting down,
9272797Sjg * don't even think about starting a flush.
9282797Sjg */
9297576SJerry.Gilliam@Sun.COM if (!i_ddi_io_initialized() || sys_shutdown)
9302797Sjg return;
9312797Sjg
9322797Sjg mutex_enter(&nvpflush_lock);
9332797Sjg
9342797Sjg if (nvpflush_daemon_active == 0) {
9352797Sjg nvpflush_daemon_active = 1;
9362797Sjg mutex_exit(&nvpflush_lock);
9372797Sjg NVPDAEMON_DEBUG((CE_CONT, "starting nvpdaemon thread\n"));
9382797Sjg nvpflush_thr_id = thread_create(NULL, 0,
9392797Sjg (void (*)())nvpflush_daemon,
9402797Sjg NULL, 0, &p0, TS_RUN, minclsyspri);
9412797Sjg mutex_enter(&nvpflush_lock);
9422797Sjg }
9432797Sjg
9442797Sjg nticks = nvpflush_delay * TICKS_PER_SECOND;
9452797Sjg nvpticks = ddi_get_lbolt() + nticks;
9462797Sjg if (nvpflush_timer_busy == 0) {
9472797Sjg nvpflush_timer_busy = 1;
9482797Sjg mutex_exit(&nvpflush_lock);
9492797Sjg nvpflush_id = timeout(nvpflush_timeout, NULL, nticks + 4);
9502797Sjg } else
9512797Sjg mutex_exit(&nvpflush_lock);
9522797Sjg }
9532797Sjg
9542797Sjg static int
nvpflush_one(nvfd_t * nvfd)9552797Sjg nvpflush_one(nvfd_t *nvfd)
9562797Sjg {
9572797Sjg int rval = DDI_SUCCESS;
9582797Sjg nvlist_t *nvl;
9592797Sjg
9602797Sjg rw_enter(&nvfd->nvf_lock, RW_READER);
9612797Sjg
9622797Sjg ASSERT((nvfd->nvf_flags & NVF_F_FLUSHING) == 0);
9632797Sjg
9642797Sjg if (!NVF_IS_DIRTY(nvfd) ||
9657576SJerry.Gilliam@Sun.COM NVF_IS_READONLY(nvfd) || kfio_disable_write || sys_shutdown) {
9662797Sjg NVF_CLEAR_DIRTY(nvfd);
9672797Sjg rw_exit(&nvfd->nvf_lock);
9682797Sjg return (DDI_SUCCESS);
9692797Sjg }
9702797Sjg
9712797Sjg if (rw_tryupgrade(&nvfd->nvf_lock) == 0) {
9722797Sjg nvf_error("nvpflush: "
9732797Sjg "%s rw upgrade failed\n", nvfd->nvf_cache_path);
9742797Sjg rw_exit(&nvfd->nvf_lock);
9752797Sjg return (DDI_FAILURE);
9762797Sjg }
9772797Sjg if (((nvfd->nvf_pack_list)
9782797Sjg ((nvf_handle_t)nvfd, &nvl)) != DDI_SUCCESS) {
9792797Sjg nvf_error("nvpflush: "
9802797Sjg "%s nvlist construction failed\n", nvfd->nvf_cache_path);
9812797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
9822797Sjg rw_exit(&nvfd->nvf_lock);
9832797Sjg return (DDI_FAILURE);
9842797Sjg }
9852797Sjg ASSERT(RW_WRITE_HELD(&nvfd->nvf_lock));
9862797Sjg
9872797Sjg NVF_CLEAR_DIRTY(nvfd);
9882797Sjg nvfd->nvf_flags |= NVF_F_FLUSHING;
9892797Sjg rw_exit(&nvfd->nvf_lock);
9902797Sjg
9912797Sjg rval = e_fwrite_nvlist(nvfd, nvl);
9922797Sjg nvlist_free(nvl);
9932797Sjg
9942797Sjg rw_enter(&nvfd->nvf_lock, RW_WRITER);
9952797Sjg nvfd->nvf_flags &= ~NVF_F_FLUSHING;
9962797Sjg if (rval == DDI_FAILURE) {
9972797Sjg if (NVF_IS_READONLY(nvfd)) {
9982797Sjg rval = DDI_SUCCESS;
9992797Sjg nvfd->nvf_flags &= ~(NVF_F_ERROR | NVF_F_DIRTY);
10002797Sjg } else if ((nvfd->nvf_flags & NVF_F_ERROR) == 0) {
10012797Sjg cmn_err(CE_CONT,
10027576SJerry.Gilliam@Sun.COM "%s: update failed\n", nvfd->nvf_cache_path);
10032797Sjg nvfd->nvf_flags |= NVF_F_ERROR | NVF_F_DIRTY;
10042797Sjg }
10052797Sjg } else {
10062797Sjg if (nvfd->nvf_flags & NVF_F_CREATE_MSG) {
10072797Sjg cmn_err(CE_CONT,
10082797Sjg "!Creating %s\n", nvfd->nvf_cache_path);
10092797Sjg nvfd->nvf_flags &= ~NVF_F_CREATE_MSG;
10102797Sjg }
10112797Sjg if (nvfd->nvf_flags & NVF_F_REBUILD_MSG) {
10122797Sjg cmn_err(CE_CONT,
10132797Sjg "!Rebuilding %s\n", nvfd->nvf_cache_path);
10142797Sjg nvfd->nvf_flags &= ~NVF_F_REBUILD_MSG;
10152797Sjg }
10162797Sjg if (nvfd->nvf_flags & NVF_F_ERROR) {
10172797Sjg cmn_err(CE_CONT,
10182797Sjg "%s: update now ok\n", nvfd->nvf_cache_path);
10192797Sjg nvfd->nvf_flags &= ~NVF_F_ERROR;
10202797Sjg }
10212797Sjg /*
10222797Sjg * The file may need to be flushed again if the cached
10232797Sjg * data was touched while writing the earlier contents.
10242797Sjg */
10252797Sjg if (NVF_IS_DIRTY(nvfd))
10262797Sjg rval = DDI_FAILURE;
10272797Sjg }
10282797Sjg
10292797Sjg rw_exit(&nvfd->nvf_lock);
10302797Sjg return (rval);
10312797Sjg }
10322797Sjg
10332797Sjg
10342797Sjg static void
nvpflush_daemon(void)10352797Sjg nvpflush_daemon(void)
10362797Sjg {
10372797Sjg callb_cpr_t cprinfo;
10382797Sjg nvfd_t *nvfdp, *nextfdp;
10392797Sjg clock_t clk;
10402797Sjg int rval;
10412797Sjg int want_wakeup;
10422797Sjg int is_now_clean;
10432797Sjg
10442797Sjg ASSERT(modrootloaded);
10452797Sjg
10462797Sjg nvpflush_thread = curthread;
10472797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: init\n"));
10482797Sjg
10492797Sjg CALLB_CPR_INIT(&cprinfo, &nvpflush_lock, callb_generic_cpr, "nvp");
10502797Sjg mutex_enter(&nvpflush_lock);
10512797Sjg for (;;) {
10522797Sjg CALLB_CPR_SAFE_BEGIN(&cprinfo);
10532797Sjg while (do_nvpflush == 0) {
1054*11066Srafael.vanoni@sun.com clk = cv_reltimedwait(&nvpflush_cv, &nvpflush_lock,
1055*11066Srafael.vanoni@sun.com (nvpdaemon_idle_time * TICKS_PER_SECOND),
1056*11066Srafael.vanoni@sun.com TR_CLOCK_TICK);
10577576SJerry.Gilliam@Sun.COM if ((clk == -1 && do_nvpflush == 0 &&
10587576SJerry.Gilliam@Sun.COM nvpflush_timer_busy == 0) || sys_shutdown) {
10592797Sjg /*
10602797Sjg * Note that CALLB_CPR_EXIT calls mutex_exit()
10612797Sjg * on the lock passed in to CALLB_CPR_INIT,
10622797Sjg * so the lock must be held when invoking it.
10632797Sjg */
10642797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
10652797Sjg NVPDAEMON_DEBUG((CE_CONT, "nvpdaemon: exit\n"));
10662797Sjg ASSERT(mutex_owned(&nvpflush_lock));
10672797Sjg nvpflush_thr_id = NULL;
10682797Sjg nvpflush_daemon_active = 0;
10692797Sjg CALLB_CPR_EXIT(&cprinfo);
10702797Sjg thread_exit();
10712797Sjg }
10722797Sjg }
10732797Sjg CALLB_CPR_SAFE_END(&cprinfo, &nvpflush_lock);
10742797Sjg
10752797Sjg nvpbusy = 1;
10762797Sjg want_wakeup = 0;
10772797Sjg do_nvpflush = 0;
10782797Sjg mutex_exit(&nvpflush_lock);
10792797Sjg
10802797Sjg /*
10812797Sjg * Try flushing what's dirty, reschedule if there's
10822797Sjg * a failure or data gets marked as dirty again.
10832797Sjg * First move each file marked dirty to the dirty
10842797Sjg * list to avoid locking the list across the write.
10852797Sjg */
10862797Sjg mutex_enter(&nvf_cache_mutex);
10872797Sjg for (nvfdp = list_head(&nvf_cache_files);
10882797Sjg nvfdp; nvfdp = nextfdp) {
10892797Sjg nextfdp = list_next(&nvf_cache_files, nvfdp);
10902797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER);
10912797Sjg if (NVF_IS_DIRTY(nvfdp)) {
10922797Sjg list_remove(&nvf_cache_files, nvfdp);
10932797Sjg list_insert_tail(&nvf_dirty_files, nvfdp);
10942797Sjg rw_exit(&nvfdp->nvf_lock);
10952797Sjg } else {
10962797Sjg NVPDAEMON_DEBUG((CE_CONT,
10972797Sjg "nvpdaemon: not dirty %s\n",
10982797Sjg nvfdp->nvf_cache_path));
10992797Sjg rw_exit(&nvfdp->nvf_lock);
11002797Sjg }
11012797Sjg }
11022797Sjg mutex_exit(&nvf_cache_mutex);
11032797Sjg
11042797Sjg /*
11052797Sjg * Now go through the dirty list
11062797Sjg */
11072797Sjg for (nvfdp = list_head(&nvf_dirty_files);
11082797Sjg nvfdp; nvfdp = nextfdp) {
11092797Sjg nextfdp = list_next(&nvf_dirty_files, nvfdp);
11102797Sjg
11112797Sjg is_now_clean = 0;
11122797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER);
11132797Sjg if (NVF_IS_DIRTY(nvfdp)) {
11142797Sjg NVPDAEMON_DEBUG((CE_CONT,
11152797Sjg "nvpdaemon: flush %s\n",
11162797Sjg nvfdp->nvf_cache_path));
11172797Sjg rw_exit(&nvfdp->nvf_lock);
11182797Sjg rval = nvpflush_one(nvfdp);
11192797Sjg rw_enter(&nvfdp->nvf_lock, RW_READER);
11202797Sjg if (rval != DDI_SUCCESS ||
11212797Sjg NVF_IS_DIRTY(nvfdp)) {
11222797Sjg rw_exit(&nvfdp->nvf_lock);
11232797Sjg NVPDAEMON_DEBUG((CE_CONT,
11242797Sjg "nvpdaemon: %s dirty again\n",
11252797Sjg nvfdp->nvf_cache_path));
11262797Sjg want_wakeup = 1;
11272797Sjg } else {
11282797Sjg rw_exit(&nvfdp->nvf_lock);
11292797Sjg nvf_write_is_complete(nvfdp);
11302797Sjg is_now_clean = 1;
11312797Sjg }
11322797Sjg } else {
11332797Sjg NVPDAEMON_DEBUG((CE_CONT,
11342797Sjg "nvpdaemon: not dirty %s\n",
11352797Sjg nvfdp->nvf_cache_path));
11362797Sjg rw_exit(&nvfdp->nvf_lock);
11372797Sjg is_now_clean = 1;
11382797Sjg }
11392797Sjg
11402797Sjg if (is_now_clean) {
11412797Sjg mutex_enter(&nvf_cache_mutex);
11422797Sjg list_remove(&nvf_dirty_files, nvfdp);
11432797Sjg list_insert_tail(&nvf_cache_files,
11442797Sjg nvfdp);
11452797Sjg mutex_exit(&nvf_cache_mutex);
11462797Sjg }
11472797Sjg }
11482797Sjg
11492797Sjg if (want_wakeup)
11502797Sjg nvf_wake_daemon();
11512797Sjg
11522797Sjg mutex_enter(&nvpflush_lock);
11532797Sjg nvpbusy = 0;
11542797Sjg }
11552797Sjg }
1156