1 typedef struct Arch Arch; 2 typedef struct BList BList; 3 typedef struct Block Block; 4 typedef struct Cache Cache; 5 typedef struct Disk Disk; 6 typedef struct Entry Entry; 7 typedef struct Header Header; 8 typedef struct Label Label; 9 typedef struct Periodic Periodic; 10 typedef struct Snap Snap; 11 typedef struct Source Source; 12 typedef struct Super Super; 13 typedef struct WalkPtr WalkPtr; 14 15 /* tuneable parameters - probably should not be constants */ 16 enum { 17 BytesPerEntry = 100, /* estimate of bytes per dir entries - determines number of index entries in the block */ 18 FullPercentage = 80, /* don't allocate in block if more than this percentage full */ 19 FlushSize = 200, /* number of blocks to flush */ 20 DirtyPercentage = 50, /* maximum percentage of dirty blocks */ 21 }; 22 23 enum { 24 NilBlock = (~0UL), 25 MaxBlock = (1UL<<31), 26 }; 27 28 enum { 29 HeaderMagic = 0x3776ae89, 30 HeaderVersion = 1, 31 HeaderOffset = 128*1024, 32 HeaderSize = 512, 33 SuperMagic = 0x2340a3b1, 34 SuperSize = 512, 35 SuperVersion = 1, 36 LabelSize = 14, 37 }; 38 39 /* well known tags */ 40 enum { 41 BadTag = 0, /* this tag should not be used */ 42 RootTag = 1, /* root of fs */ 43 EnumTag, /* root of a dir listing */ 44 UserTag = 32, /* all other tags should be >= UserTag */ 45 }; 46 47 struct Super { 48 u16int version; 49 u32int epochLow; 50 u32int epochHigh; 51 u64int qid; /* next qid */ 52 u32int active; /* root of active file system */ 53 u32int next; /* root of next snapshot to archive */ 54 u32int current; /* root of snapshot currently archiving */ 55 uchar last[VtScoreSize]; /* last snapshot successfully archived */ 56 char name[128]; /* label */ 57 }; 58 59 60 struct Fs { 61 Arch *arch; /* immutable */ 62 Cache *cache; /* immutable */ 63 int mode; /* immutable */ 64 int blockSize; /* immutable */ 65 VtSession *z; /* immutable */ 66 Snap *snap; /* immutable */ 67 68 Periodic *metaFlush; /* periodically flushes meta data cached in files */ 69 70 /* 71 * epoch lock. 72 * Most operations on the fs require a read lock of elk, ensuring that 73 * the current high and low epochs do not change under foot. 74 * This lock is mostly acquired via a call to fileLock or fileRlock. 75 * Deletion and creation of snapshots occurs under a write lock of elk, 76 * ensuring no file operations are occurring concurrently. 77 */ 78 VtLock *elk; /* epoch lock */ 79 u32int ehi; /* epoch high */ 80 u32int elo; /* epoch low */ 81 82 int halted; /* epoch lock is held to halt (console initiated) */ 83 84 Source *source; /* immutable: root of sources */ 85 File *file; /* immutable: root of files */ 86 }; 87 88 /* 89 * variant on VtEntry 90 * there are extra fields when stored locally 91 */ 92 struct Entry { 93 u32int gen; /* generation number */ 94 ushort psize; /* pointer block size */ 95 ushort dsize; /* data block size */ 96 uchar depth; /* unpacked from flags */ 97 uchar flags; 98 uvlong size; 99 uchar score[VtScoreSize]; 100 u32int tag; /* tag for local blocks: zero if stored on Venti */ 101 u32int snap; /* non zero -> entering snapshot of given epoch */ 102 uchar archive; /* archive this snapshot: only valid for snap != 0 */ 103 }; 104 105 struct Source { 106 Fs *fs; /* immutable */ 107 int mode; /* immutable */ 108 u32int gen; /* immutable */ 109 int dsize; /* immutable */ 110 int dir; /* immutable */ 111 112 Source *parent; /* immutable */ 113 114 VtLock *lk; 115 int ref; 116 /* 117 * epoch for the source 118 * for ReadWrite sources, epoch is used to lazily notice 119 * sources that must be split from the snapshots. 120 * for ReadOnly sources, the epoch represents the minimum epoch 121 * along the chain from the root, and is used to lazily notice 122 * sources that have become invalid because they belong to an old 123 * snapshot. 124 */ 125 u32int epoch; 126 Block *b; /* block containing this source */ 127 uchar score[VtScoreSize]; /* score of block containing this source */ 128 u32int scoreEpoch; /* epoch of block containing this source */ 129 int epb; /* immutable: entries per block in parent */ 130 u32int tag; /* immutable: tag of parent */ 131 u32int offset; /* immutable: entry offset in parent */ 132 }; 133 134 135 struct Header { 136 ushort version; 137 ushort blockSize; 138 ulong super; /* super blocks */ 139 ulong label; /* start of labels */ 140 ulong data; /* end of labels - start of data blocks */ 141 ulong end; /* end of data blocks */ 142 }; 143 144 /* 145 * contains a one block buffer 146 * to avoid problems of the block changing underfoot 147 * and to enable an interface that supports unget. 148 */ 149 struct DirEntryEnum { 150 File *file; 151 152 u32int boff; /* block offset */ 153 154 int i, n; 155 DirEntry *buf; 156 }; 157 158 /* Block states; two orthogonal fields, Bv* and Ba* */ 159 enum { 160 BsFree = 0, /* available for allocation */ 161 BsBad = 0xFF, /* something is wrong with this block */ 162 163 /* bit fields */ 164 BsAlloc = 1<<0, /* block is in use */ 165 BsCopied = 1<<1, /* block has been copied */ 166 BsVenti = 1<<2, /* block has been stored on Venti */ 167 BsClosed = 1<<3, /* block has been unlinked from active file system */ 168 BsMask = BsAlloc|BsCopied|BsVenti|BsClosed, 169 }; 170 171 /* 172 * Each block has a state and generation 173 * The following invariants are maintained 174 * Each block has no more than than one parent per generation 175 * For Active*, no child has a parent of a greater generation 176 * For Snap*, there is a snap parent of given generation and there are 177 * no parents of greater gen - implies no children snaps 178 * of a lesser gen 179 * For *RO, the block is fixed - no change can be made - all pointers 180 * are valid venti addresses 181 * For *A, the block is on the venti server 182 * There are no pointers to Zombie blocks 183 * 184 * Transitions 185 * Archiver at generation g 186 * Mutator at generation h 187 * 188 * Want to modify a block 189 * Venti: create new Active(h) 190 * Active(x): x == h: do nothing 191 * Active(x): x < h: change to Snap(h-1) + add Active(h) 192 * ActiveRO(x): change to SnapRO(h-1) + add Active(h) 193 * ActiveA(x): add Active(h) 194 * Snap*(x): should not occur 195 * Zombie(x): should not occur 196 * Want to archive 197 * Active(x): x != g: should never happen 198 * Active(x): x == g fix children and free them: move to ActiveRO(g); 199 * ActiveRO(x): x != g: should never happen 200 * ActiveRO(x): x == g: wait until it hits ActiveA or SnapA 201 * ActiveA(x): done 202 * Snap(x): x < g: should never happen 203 * Snap(x): x >= g: fix children, freeing all SnapA(y) x == y; 204 * SnapRO(x): wait until it hits SnapA 205 * 206 */ 207 208 /* 209 * block types 210 * more regular than Venti block types 211 * bit 3 -> block or data block 212 * bits 2-0 -> level of block 213 */ 214 enum { 215 BtData, 216 BtDir = 1<<3, 217 BtLevelMask = 7, 218 BtMax = 1<<4, 219 }; 220 221 /* io states */ 222 enum { 223 BioEmpty, /* label & data are not valid */ 224 BioLabel, /* label is good */ 225 BioClean, /* data is on the disk */ 226 BioDirty, /* data is not yet on the disk */ 227 BioReading, /* in process of reading data */ 228 BioWriting, /* in process of writing data */ 229 BioReadError, /* error reading: assume disk always handles write errors */ 230 BioVentiError, /* error reading from venti (probably disconnected) */ 231 BioMax 232 }; 233 234 struct Label { 235 uchar type; 236 uchar state; 237 u32int tag; 238 u32int epoch; 239 u32int epochClose; 240 }; 241 242 struct Block { 243 Cache *c; 244 int ref; 245 int nlock; 246 ulong pc; /* pc that fetched this block from the cache */ 247 248 VtLock *lk; 249 250 int part; 251 u32int addr; 252 uchar score[VtScoreSize]; /* score */ 253 Label l; 254 255 uchar *dmap; 256 257 uchar *data; 258 259 /* the following is private; used by cache */ 260 261 Block *next; /* doubly linked hash chains */ 262 Block **prev; 263 u32int heap; /* index in heap table */ 264 u32int used; /* last reference times */ 265 266 u32int vers; /* version of dirty flag */ 267 268 BList *uhead; /* blocks to unlink when this block is written */ 269 BList *utail; 270 271 /* block ordering for cache -> disk */ 272 BList *prior; /* list of blocks before this one */ 273 274 Block *ionext; 275 int iostate; 276 VtRendez *ioready; 277 }; 278 279 /* tree walker, for gc and archiver */ 280 struct WalkPtr 281 { 282 uchar *data; 283 int isEntry; 284 int n; 285 int m; 286 Entry e; 287 uchar type; 288 u32int tag; 289 }; 290 291 /* disk partitions */ 292 enum { 293 PartError, 294 PartSuper, 295 PartLabel, 296 PartData, 297 PartVenti, /* fake partition */ 298 }; 299 300 extern vtType[BtMax]; 301