1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 5789Sahrens * Common Development and Distribution License, Version 1.0 only 6789Sahrens * (the "License"). You may not use this file except in compliance 7789Sahrens * with the License. 8789Sahrens * 9789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10789Sahrens * or http://www.opensolaris.org/os/licensing. 11789Sahrens * See the License for the specific language governing permissions 12789Sahrens * and limitations under the License. 13789Sahrens * 14789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16789Sahrens * If applicable, add the following below this CDDL HEADER, with the 17789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19789Sahrens * 20789Sahrens * CDDL HEADER END 21789Sahrens */ 22789Sahrens /* 23789Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #ifndef _SYS_DBUF_H 28789Sahrens #define _SYS_DBUF_H 29789Sahrens 30789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 31789Sahrens 32789Sahrens #include <sys/dmu.h> 33789Sahrens #include <sys/spa.h> 34789Sahrens #include <sys/txg.h> 35789Sahrens #include <sys/zio.h> 36789Sahrens #include <sys/arc.h> 37789Sahrens #include <sys/zfs_context.h> 38789Sahrens #include <sys/refcount.h> 39789Sahrens 40789Sahrens #ifdef __cplusplus 41789Sahrens extern "C" { 42789Sahrens #endif 43789Sahrens 44789Sahrens #define DB_BONUS_BLKID (-1ULL) 45789Sahrens #define IN_DMU_SYNC ((blkptr_t *)-1) 46789Sahrens 47789Sahrens /* 48789Sahrens * define flags for dbuf_read and friends 49789Sahrens */ 50789Sahrens 51789Sahrens #define DB_RF_MUST_SUCCEED 0 52789Sahrens #define DB_RF_CANFAIL (1 << 1) 53789Sahrens #define DB_RF_HAVESTRUCT (1 << 2) 54789Sahrens #define DB_RF_NOPREFETCH (1 << 3) 55789Sahrens 56789Sahrens /* 57789Sahrens * The state transition diagram for dbufs looks like: 58789Sahrens * 59789Sahrens * +----> READ ----+ 60789Sahrens * | | 61789Sahrens * | V 62789Sahrens * (alloc)-->UNCACHED CACHED-->(free) 63789Sahrens * | ^ 64789Sahrens * | | 65789Sahrens * +----> FILL ----+ 66789Sahrens */ 67789Sahrens typedef enum dbuf_states { 68789Sahrens DB_UNCACHED, 69789Sahrens DB_FILL, 70789Sahrens DB_READ, 71789Sahrens DB_CACHED 72789Sahrens } dbuf_states_t; 73789Sahrens 74789Sahrens struct objset_impl; 75789Sahrens struct dnode; 76789Sahrens struct dmu_tx; 77789Sahrens 78789Sahrens /* 79789Sahrens * level = 0 means the user data 80789Sahrens * level = 1 means the single indirect block 81789Sahrens * etc. 82789Sahrens */ 83789Sahrens 84789Sahrens #define LIST_LINK_INACTIVE(link) \ 85789Sahrens ((link)->list_next == NULL && (link)->list_prev == NULL) 86789Sahrens 87789Sahrens typedef struct dmu_buf_impl { 88789Sahrens /* 89789Sahrens * The following members are immutable, with the exception of 90789Sahrens * db.db_data, which is protected by db_mtx. 91789Sahrens */ 92789Sahrens 93789Sahrens /* the publicly visible structure */ 94789Sahrens dmu_buf_t db; 95789Sahrens 96789Sahrens /* the objset we belong to */ 97789Sahrens struct objset_impl *db_objset; 98789Sahrens 99789Sahrens /* 100789Sahrens * the dnode we belong to (NULL when evicted) 101789Sahrens */ 102789Sahrens struct dnode *db_dnode; 103789Sahrens 104789Sahrens /* 105789Sahrens * our parent buffer; if the dnode points to us directly, 106789Sahrens * db_parent == db_dnode->dn_dbuf 107789Sahrens * only accessed by sync thread ??? 108789Sahrens * (NULL when evicted) 109789Sahrens */ 110789Sahrens struct dmu_buf_impl *db_parent; 111789Sahrens 112789Sahrens /* 113789Sahrens * link for hash table of all dmu_buf_impl_t's 114789Sahrens */ 115789Sahrens struct dmu_buf_impl *db_hash_next; 116789Sahrens 117789Sahrens /* our block number */ 118789Sahrens uint64_t db_blkid; 119789Sahrens 120789Sahrens /* 121789Sahrens * Pointer to the blkptr_t which points to us. May be NULL if we 122789Sahrens * don't have one yet. (NULL when evicted) 123789Sahrens */ 124789Sahrens blkptr_t *db_blkptr; 125789Sahrens 126789Sahrens /* 127789Sahrens * Our indirection level. Data buffers have db_level==0. 128789Sahrens * Indirect buffers which point to data buffers have 129789Sahrens * db_level==1. etc. Buffers which contain dnodes have 130789Sahrens * db_level==0, since the dnodes are stored in a file. 131789Sahrens */ 132789Sahrens uint8_t db_level; 133789Sahrens 134789Sahrens /* db_mtx protects the members below */ 135789Sahrens kmutex_t db_mtx; 136789Sahrens 137789Sahrens /* 138789Sahrens * Current state of the buffer 139789Sahrens */ 140789Sahrens dbuf_states_t db_state; 141789Sahrens 142789Sahrens /* 143789Sahrens * Refcount accessed by dmu_buf_{hold,rele}. 144789Sahrens * If nonzero, the buffer can't be destroyed. 145789Sahrens * Protected by db_mtx. 146789Sahrens */ 147789Sahrens refcount_t db_holds; 148789Sahrens 149789Sahrens /* buffer holding our data */ 150789Sahrens arc_buf_t *db_buf; 151789Sahrens 152789Sahrens kcondvar_t db_changed; 153789Sahrens arc_buf_t *db_data_pending; 154789Sahrens 155789Sahrens /* 156789Sahrens * Last time (transaction group) this buffer was dirtied. 157789Sahrens */ 158789Sahrens uint64_t db_dirtied; 159789Sahrens 160789Sahrens /* 161789Sahrens * If dd_dnode != NULL, our link on the owner dnodes's dn_dbufs list. 162789Sahrens * Protected by its dn_mtx. 163789Sahrens */ 164789Sahrens list_node_t db_link; 165789Sahrens 166789Sahrens /* Our link on dn_dirty_dbufs[txg] */ 167789Sahrens list_node_t db_dirty_node[TXG_SIZE]; 168789Sahrens uint8_t db_dirtycnt; 169789Sahrens 170789Sahrens /* 171789Sahrens * Data which is unique to data (leaf) blocks: 172789Sahrens */ 173789Sahrens struct { 174789Sahrens /* stuff we store for the user (see dmu_buf_set_user) */ 175789Sahrens void *db_user_ptr; 176789Sahrens void **db_user_data_ptr_ptr; 177789Sahrens dmu_buf_evict_func_t *db_evict_func; 178789Sahrens uint8_t db_immediate_evict; 179789Sahrens uint8_t db_freed_in_flight; 180789Sahrens 181789Sahrens /* 182789Sahrens * db_data_old[txg&TXG_MASK] is set when we 183789Sahrens * dirty the buffer, so that we can retain the 184789Sahrens * pointer even if it gets COW'd in a subsequent 185789Sahrens * transaction group. 186789Sahrens * 187789Sahrens * If the buffer is dirty in any txg, it can't 188789Sahrens * be destroyed. 189789Sahrens */ 190789Sahrens /* 191789Sahrens * XXX Protected by db_mtx and dn_dirty_mtx. 192789Sahrens * db_mtx must be held to read db_dirty[], and 193789Sahrens * both db_mtx and dn_dirty_mtx must be held to 194789Sahrens * modify (dirty or clean). db_mtx must be held 195789Sahrens * before dn_dirty_mtx. 196789Sahrens */ 197789Sahrens arc_buf_t *db_data_old[TXG_SIZE]; 198789Sahrens blkptr_t *db_overridden_by[TXG_SIZE]; 199789Sahrens } db_d; 200789Sahrens } dmu_buf_impl_t; 201789Sahrens 202789Sahrens /* Note: the dbuf hash table is exposed only for the mdb module */ 203789Sahrens #define DBUF_MUTEXES 256 204789Sahrens #define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) 205789Sahrens typedef struct dbuf_hash_table { 206789Sahrens uint64_t hash_table_mask; 207789Sahrens dmu_buf_impl_t **hash_table; 208789Sahrens kmutex_t hash_mutexes[DBUF_MUTEXES]; 209789Sahrens } dbuf_hash_table_t; 210789Sahrens 211789Sahrens 212789Sahrens uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset); 213789Sahrens 214789Sahrens dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data); 215789Sahrens 216789Sahrens dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid); 217789Sahrens dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid, 218789Sahrens void *tag); 219789Sahrens dmu_buf_impl_t *dbuf_hold_bonus(struct dnode *dn, void *tag); 220789Sahrens int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create, 221789Sahrens void *tag, dmu_buf_impl_t **dbp); 222789Sahrens 223789Sahrens void dbuf_prefetch(struct dnode *dn, uint64_t blkid); 224789Sahrens 225789Sahrens void dbuf_add_ref(dmu_buf_impl_t *db, void *tag); 226789Sahrens void dbuf_remove_ref(dmu_buf_impl_t *db, void *tag); 227789Sahrens uint64_t dbuf_refcount(dmu_buf_impl_t *db); 228789Sahrens 229789Sahrens void dbuf_rele(dmu_buf_impl_t *db); 230789Sahrens 231789Sahrens dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid); 232789Sahrens 233789Sahrens void dbuf_read(dmu_buf_impl_t *db); 234789Sahrens int dbuf_read_canfail(dmu_buf_impl_t *db); 235789Sahrens void dbuf_read_havestruct(dmu_buf_impl_t *db); 236789Sahrens void dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); 237789Sahrens void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); 238789Sahrens void dbuf_will_fill(dmu_buf_impl_t *db, dmu_tx_t *tx); 239789Sahrens void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx); 240789Sahrens void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); 241789Sahrens void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); 242789Sahrens void dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); 243789Sahrens 244789Sahrens void dbuf_evict(dmu_buf_impl_t *db); 245789Sahrens 246789Sahrens void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx); 247789Sahrens void dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx); 248789Sahrens void dbuf_unoverride(dmu_buf_impl_t *db, uint64_t txg); 249789Sahrens 250789Sahrens void dbuf_free_range(struct dnode *dn, uint64_t blkid, uint64_t nblks, 251789Sahrens struct dmu_tx *); 252789Sahrens 253789Sahrens void dbuf_downgrade(dmu_buf_impl_t *db, int evicting); 254789Sahrens void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx); 255789Sahrens 256789Sahrens void dbuf_init(void); 257789Sahrens void dbuf_fini(void); 258789Sahrens 259789Sahrens #ifdef ZFS_DEBUG 260789Sahrens 261789Sahrens /* 262789Sahrens * There should be a ## between the string literal and fmt, to make it 263*896Smaybee * clear that we're joining two strings together, but gcc does not 264*896Smaybee * support that preprocessor token. 265789Sahrens */ 266789Sahrens #define dprintf_dbuf(dbuf, fmt, ...) do { \ 267789Sahrens if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ 268789Sahrens char __db_buf[32]; \ 269789Sahrens uint64_t __db_obj = (dbuf)->db.db_object; \ 270789Sahrens if (__db_obj == DMU_META_DNODE_OBJECT) \ 271789Sahrens (void) strcpy(__db_buf, "mdn"); \ 272789Sahrens else \ 273789Sahrens (void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \ 274789Sahrens (u_longlong_t)__db_obj); \ 275789Sahrens dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \ 276789Sahrens "obj=%s lvl=%u blkid=%lld " fmt, \ 277789Sahrens __db_buf, (dbuf)->db_level, \ 278789Sahrens (u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \ 279789Sahrens } \ 280789Sahrens _NOTE(CONSTCOND) } while (0) 281789Sahrens 282789Sahrens #define dprintf_dbuf_bp(db, bp, fmt, ...) do { \ 283*896Smaybee if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ 284*896Smaybee char __blkbuf[BP_SPRINTF_LEN]; \ 285*896Smaybee sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \ 286789Sahrens dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \ 287789Sahrens } \ 288789Sahrens _NOTE(CONSTCOND) } while (0) 289789Sahrens 290873Sek110237 #define DBUF_VERIFY(db) dbuf_verify(db) 291873Sek110237 292789Sahrens #else 293789Sahrens 294789Sahrens #define dprintf_dbuf(db, fmt, ...) 295789Sahrens #define dprintf_dbuf_bp(db, bp, fmt, ...) 296873Sek110237 #define DBUF_VERIFY(db) 297789Sahrens 298789Sahrens #endif 299789Sahrens 300789Sahrens 301789Sahrens #ifdef __cplusplus 302789Sahrens } 303789Sahrens #endif 304789Sahrens 305789Sahrens #endif /* _SYS_DBUF_H */ 306