1 /* $NetBSD: lfs.h,v 1.160 2013/07/28 01:22:55 dholland Exp $ */ 2 3 /* from NetBSD: dinode.h,v 1.22 2013/01/22 09:39:18 dholland Exp */ 4 /* from NetBSD: dir.h,v 1.21 2009/07/22 04:49:19 dholland Exp */ 5 6 /*- 7 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Konrad E. Schroder <perseant@hhhh.org>. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 /*- 35 * Copyright (c) 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)lfs.h 8.9 (Berkeley) 5/8/95 63 */ 64 /* 65 * Copyright (c) 2002 Networks Associates Technology, Inc. 66 * All rights reserved. 67 * 68 * This software was developed for the FreeBSD Project by Marshall 69 * Kirk McKusick and Network Associates Laboratories, the Security 70 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 71 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 72 * research program 73 * 74 * Copyright (c) 1982, 1989, 1993 75 * The Regents of the University of California. All rights reserved. 76 * (c) UNIX System Laboratories, Inc. 77 * All or some portions of this file are derived from material licensed 78 * to the University of California by American Telephone and Telegraph 79 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 80 * the permission of UNIX System Laboratories, Inc. 81 * 82 * Redistribution and use in source and binary forms, with or without 83 * modification, are permitted provided that the following conditions 84 * are met: 85 * 1. Redistributions of source code must retain the above copyright 86 * notice, this list of conditions and the following disclaimer. 87 * 2. Redistributions in binary form must reproduce the above copyright 88 * notice, this list of conditions and the following disclaimer in the 89 * documentation and/or other materials provided with the distribution. 90 * 3. Neither the name of the University nor the names of its contributors 91 * may be used to endorse or promote products derived from this software 92 * without specific prior written permission. 93 * 94 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 95 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 96 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 97 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 98 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 99 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 100 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 101 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 102 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 103 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 104 * SUCH DAMAGE. 105 * 106 * @(#)dinode.h 8.9 (Berkeley) 3/29/95 107 */ 108 /* 109 * Copyright (c) 1982, 1986, 1989, 1993 110 * The Regents of the University of California. All rights reserved. 111 * (c) UNIX System Laboratories, Inc. 112 * All or some portions of this file are derived from material licensed 113 * to the University of California by American Telephone and Telegraph 114 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 115 * the permission of UNIX System Laboratories, Inc. 116 * 117 * Redistribution and use in source and binary forms, with or without 118 * modification, are permitted provided that the following conditions 119 * are met: 120 * 1. Redistributions of source code must retain the above copyright 121 * notice, this list of conditions and the following disclaimer. 122 * 2. Redistributions in binary form must reproduce the above copyright 123 * notice, this list of conditions and the following disclaimer in the 124 * documentation and/or other materials provided with the distribution. 125 * 3. Neither the name of the University nor the names of its contributors 126 * may be used to endorse or promote products derived from this software 127 * without specific prior written permission. 128 * 129 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 130 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 131 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 132 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 133 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 134 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 135 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 136 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 137 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 138 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 139 * SUCH DAMAGE. 140 * 141 * @(#)dir.h 8.5 (Berkeley) 4/27/95 142 */ 143 144 /* 145 * NOTE: COORDINATE ON-DISK FORMAT CHANGES WITH THE FREEBSD PROJECT. 146 */ 147 148 #ifndef _UFS_LFS_LFS_H_ 149 #define _UFS_LFS_LFS_H_ 150 151 #include <sys/rwlock.h> 152 #include <sys/mutex.h> 153 #include <sys/queue.h> 154 #include <sys/condvar.h> 155 #include <sys/mount.h> 156 #include <sys/pool.h> 157 158 /* 159 * Compile-time options for LFS. 160 */ 161 #define LFS_IFIND_RETRIES 16 162 #define LFS_LOGLENGTH 1024 /* size of debugging log */ 163 #define LFS_MAX_ACTIVE 10 /* Dirty segments before ckp forced */ 164 165 /* 166 * Fixed filesystem layout parameters 167 */ 168 #define LFS_LABELPAD 8192 /* LFS label size */ 169 #define LFS_SBPAD 8192 /* LFS superblock size */ 170 171 #define LFS_UNUSED_INUM 0 /* 0: out of band inode number */ 172 #define LFS_IFILE_INUM 1 /* 1: IFILE inode number */ 173 /* 2: Root inode number */ 174 #define LFS_LOSTFOUNDINO 3 /* 3: lost+found inode number */ 175 #define LFS_FIRST_INUM 4 /* 4: first free inode number */ 176 177 /* 178 * The root inode is the root of the file system. Inode 0 can't be used for 179 * normal purposes and historically bad blocks were linked to inode 1, thus 180 * the root inode is 2. (Inode 1 is no longer used for this purpose, however 181 * numerous dump tapes make this assumption, so we are stuck with it). 182 */ 183 #define ULFS_ROOTINO ((ino_t)2) 184 185 /* 186 * The Whiteout inode# is a dummy non-zero inode number which will 187 * never be allocated to a real file. It is used as a place holder 188 * in the directory entry which has been tagged as a LFS_DT_WHT entry. 189 * See the comments about ULFS_ROOTINO above. 190 */ 191 #define ULFS_WINO ((ino_t)1) 192 193 194 #define LFS_V1_SUMMARY_SIZE 512 /* V1 fixed summary size */ 195 #define LFS_DFL_SUMMARY_SIZE 512 /* Default summary size */ 196 197 #define LFS_MAX_DADDR 0x7fffffff /* Highest addressable fsb */ 198 199 #define LFS_MAXNAMLEN 255 /* maximum name length in a dir */ 200 201 #define ULFS_NXADDR 2 202 #define ULFS_NDADDR 12 /* Direct addresses in inode. */ 203 #define ULFS_NIADDR 3 /* Indirect addresses in inode. */ 204 205 /* 206 * Adjustable filesystem parameters 207 */ 208 #ifndef LFS_ATIME_IFILE 209 # define LFS_ATIME_IFILE 0 /* Store atime info in ifile (optional in LFSv1) */ 210 #endif 211 #define LFS_MARKV_MAXBLKCNT 65536 /* Max block count for lfs_markv() */ 212 213 /* 214 * Directories 215 */ 216 217 /* 218 * A directory consists of some number of blocks of LFS_DIRBLKSIZ 219 * bytes, where LFS_DIRBLKSIZ is chosen such that it can be transferred 220 * to disk in a single atomic operation (e.g. 512 bytes on most machines). 221 * 222 * Each LFS_DIRBLKSIZ byte block contains some number of directory entry 223 * structures, which are of variable length. Each directory entry has 224 * a struct lfs_direct at the front of it, containing its inode number, 225 * the length of the entry, and the length of the name contained in 226 * the entry. These are followed by the name padded to a 4 byte boundary. 227 * All names are guaranteed null terminated. 228 * The maximum length of a name in a directory is LFS_MAXNAMLEN. 229 * 230 * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent 231 * a directory entry. Free space in a directory is represented by 232 * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All LFS_DIRBLKSIZ bytes 233 * in a directory block are claimed by the directory entries. This 234 * usually results in the last entry in a directory having a large 235 * dp->d_reclen. When entries are deleted from a directory, the 236 * space is returned to the previous entry in the same directory 237 * block by increasing its dp->d_reclen. If the first entry of 238 * a directory block is free, then its dp->d_ino is set to 0. 239 * Entries other than the first in a directory do not normally have 240 * dp->d_ino set to 0. 241 */ 242 243 /* 244 * Directory block size. 245 */ 246 #undef LFS_DIRBLKSIZ 247 #define LFS_DIRBLKSIZ DEV_BSIZE 248 249 /* 250 * Convert between stat structure types and directory types. 251 */ 252 #define LFS_IFTODT(mode) (((mode) & 0170000) >> 12) 253 #define LFS_DTTOIF(dirtype) ((dirtype) << 12) 254 255 /* 256 * The LFS_DIRSIZ macro gives the minimum record length which will hold 257 * the directory entry. This requires the amount of space in struct lfs_direct 258 * without the d_name field, plus enough space for the name with a terminating 259 * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. 260 */ 261 #define LFS_DIRECTSIZ(namlen) \ 262 ((sizeof(struct lfs_direct) - (LFS_MAXNAMLEN+1)) + (((namlen)+1 + 3) &~ 3)) 263 264 #if (BYTE_ORDER == LITTLE_ENDIAN) 265 #define LFS_DIRSIZ(oldfmt, dp, needswap) \ 266 (((oldfmt) && !(needswap)) ? \ 267 LFS_DIRECTSIZ((dp)->d_type) : LFS_DIRECTSIZ((dp)->d_namlen)) 268 #else 269 #define LFS_DIRSIZ(oldfmt, dp, needswap) \ 270 (((oldfmt) && (needswap)) ? \ 271 LFS_DIRECTSIZ((dp)->d_type) : LFS_DIRECTSIZ((dp)->d_namlen)) 272 #endif 273 274 /* Constants for the first argument of LFS_DIRSIZ */ 275 #define LFS_OLDDIRFMT 1 276 #define LFS_NEWDIRFMT 0 277 278 /* 279 * Theoretically, directories can be more than 2Gb in length; however, in 280 * practice this seems unlikely. So, we define the type doff_t as a 32-bit 281 * quantity to keep down the cost of doing lookup on a 32-bit machine. 282 */ 283 #define doff_t int32_t 284 #define lfs_doff_t int32_t 285 #define LFS_MAXDIRSIZE (0x7fffffff) 286 287 /* 288 * File types for d_type 289 */ 290 #define LFS_DT_UNKNOWN 0 291 #define LFS_DT_FIFO 1 292 #define LFS_DT_CHR 2 293 #define LFS_DT_DIR 4 294 #define LFS_DT_BLK 6 295 #define LFS_DT_REG 8 296 #define LFS_DT_LNK 10 297 #define LFS_DT_SOCK 12 298 #define LFS_DT_WHT 14 299 300 /* 301 * (See notes above) 302 */ 303 #define d_ino d_fileno 304 struct lfs_direct { 305 u_int32_t d_fileno; /* inode number of entry */ 306 u_int16_t d_reclen; /* length of this record */ 307 u_int8_t d_type; /* file type, see below */ 308 u_int8_t d_namlen; /* length of string in d_name */ 309 char d_name[LFS_MAXNAMLEN + 1];/* name with length <= LFS_MAXNAMLEN */ 310 }; 311 312 /* 313 * Template for manipulating directories. Should use struct lfs_direct's, 314 * but the name field is LFS_MAXNAMLEN - 1, and this just won't do. 315 */ 316 struct lfs_dirtemplate { 317 u_int32_t dot_ino; 318 int16_t dot_reclen; 319 u_int8_t dot_type; 320 u_int8_t dot_namlen; 321 char dot_name[4]; /* must be multiple of 4 */ 322 u_int32_t dotdot_ino; 323 int16_t dotdot_reclen; 324 u_int8_t dotdot_type; 325 u_int8_t dotdot_namlen; 326 char dotdot_name[4]; /* ditto */ 327 }; 328 329 /* 330 * This is the old format of directories, sans type element. 331 */ 332 struct lfs_odirtemplate { 333 u_int32_t dot_ino; 334 int16_t dot_reclen; 335 u_int16_t dot_namlen; 336 char dot_name[4]; /* must be multiple of 4 */ 337 u_int32_t dotdot_ino; 338 int16_t dotdot_reclen; 339 u_int16_t dotdot_namlen; 340 char dotdot_name[4]; /* ditto */ 341 }; 342 343 /* 344 * Inodes 345 */ 346 347 /* 348 * A dinode contains all the meta-data associated with a LFS file. 349 * This structure defines the on-disk format of a dinode. Since 350 * this structure describes an on-disk structure, all its fields 351 * are defined by types with precise widths. 352 */ 353 354 struct ulfs1_dinode { 355 u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ 356 int16_t di_nlink; /* 2: File link count. */ 357 u_int32_t di_inumber; /* 4: Inode number. */ 358 u_int64_t di_size; /* 8: File byte count. */ 359 int32_t di_atime; /* 16: Last access time. */ 360 int32_t di_atimensec; /* 20: Last access time. */ 361 int32_t di_mtime; /* 24: Last modified time. */ 362 int32_t di_mtimensec; /* 28: Last modified time. */ 363 int32_t di_ctime; /* 32: Last inode change time. */ 364 int32_t di_ctimensec; /* 36: Last inode change time. */ 365 int32_t di_db[ULFS_NDADDR]; /* 40: Direct disk blocks. */ 366 int32_t di_ib[ULFS_NIADDR]; /* 88: Indirect disk blocks. */ 367 u_int32_t di_flags; /* 100: Status flags (chflags). */ 368 u_int32_t di_blocks; /* 104: Blocks actually held. */ 369 int32_t di_gen; /* 108: Generation number. */ 370 u_int32_t di_uid; /* 112: File owner. */ 371 u_int32_t di_gid; /* 116: File group. */ 372 u_int64_t di_modrev; /* 120: i_modrev for NFSv4 */ 373 }; 374 375 struct ulfs2_dinode { 376 u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ 377 int16_t di_nlink; /* 2: File link count. */ 378 u_int32_t di_uid; /* 4: File owner. */ 379 u_int32_t di_gid; /* 8: File group. */ 380 u_int32_t di_blksize; /* 12: Inode blocksize. */ 381 u_int64_t di_size; /* 16: File byte count. */ 382 u_int64_t di_blocks; /* 24: Bytes actually held. */ 383 int64_t di_atime; /* 32: Last access time. */ 384 int64_t di_mtime; /* 40: Last modified time. */ 385 int64_t di_ctime; /* 48: Last inode change time. */ 386 int64_t di_birthtime; /* 56: Inode creation time. */ 387 int32_t di_mtimensec; /* 64: Last modified time. */ 388 int32_t di_atimensec; /* 68: Last access time. */ 389 int32_t di_ctimensec; /* 72: Last inode change time. */ 390 int32_t di_birthnsec; /* 76: Inode creation time. */ 391 int32_t di_gen; /* 80: Generation number. */ 392 u_int32_t di_kernflags; /* 84: Kernel flags. */ 393 u_int32_t di_flags; /* 88: Status flags (chflags). */ 394 int32_t di_extsize; /* 92: External attributes block. */ 395 int64_t di_extb[ULFS_NXADDR];/* 96: External attributes block. */ 396 int64_t di_db[ULFS_NDADDR]; /* 112: Direct disk blocks. */ 397 int64_t di_ib[ULFS_NIADDR]; /* 208: Indirect disk blocks. */ 398 u_int64_t di_modrev; /* 232: i_modrev for NFSv4 */ 399 int64_t di_spare[2]; /* 240: Reserved; currently unused */ 400 }; 401 402 /* 403 * The di_db fields may be overlaid with other information for 404 * file types that do not have associated disk storage. Block 405 * and character devices overlay the first data block with their 406 * dev_t value. Short symbolic links place their path in the 407 * di_db area. 408 */ 409 #define di_rdev di_db[0] 410 411 /* Size of the on-disk inode. */ 412 #define LFS_DINODE1_SIZE (sizeof(struct ulfs1_dinode)) /* 128 */ 413 #define LFS_DINODE2_SIZE (sizeof(struct ulfs2_dinode)) 414 415 /* File types, found in the upper bits of di_mode. */ 416 #define LFS_IFMT 0170000 /* Mask of file type. */ 417 #define LFS_IFIFO 0010000 /* Named pipe (fifo). */ 418 #define LFS_IFCHR 0020000 /* Character device. */ 419 #define LFS_IFDIR 0040000 /* Directory file. */ 420 #define LFS_IFBLK 0060000 /* Block device. */ 421 #define LFS_IFREG 0100000 /* Regular file. */ 422 #define LFS_IFLNK 0120000 /* Symbolic link. */ 423 #define LFS_IFSOCK 0140000 /* UNIX domain socket. */ 424 #define LFS_IFWHT 0160000 /* Whiteout. */ 425 426 /* 427 * Maximum length of a symlink that can be stored within the inode. 428 */ 429 #define ULFS1_MAXSYMLINKLEN ((ULFS_NDADDR + ULFS_NIADDR) * sizeof(int32_t)) 430 #define ULFS2_MAXSYMLINKLEN ((ULFS_NDADDR + ULFS_NIADDR) * sizeof(int64_t)) 431 432 #define ULFS_MAXSYMLINKLEN(ip) \ 433 ((ip)->i_ump->um_fstype == ULFS1) ? \ 434 ULFS1_MAXSYMLINKLEN : ULFS2_MAXSYMLINKLEN 435 436 /* 437 * "struct buf" associated definitions 438 */ 439 440 /* Unassigned disk addresses. */ 441 #define UNASSIGNED -1 442 #define UNWRITTEN -2 443 444 /* Unused logical block number */ 445 #define LFS_UNUSED_LBN -1 446 447 # define LFS_LOCK_BUF(bp) do { \ 448 if (((bp)->b_flags & B_LOCKED) == 0 && bp->b_iodone == NULL) { \ 449 mutex_enter(&lfs_lock); \ 450 ++locked_queue_count; \ 451 locked_queue_bytes += bp->b_bufsize; \ 452 mutex_exit(&lfs_lock); \ 453 } \ 454 (bp)->b_flags |= B_LOCKED; \ 455 } while (0) 456 457 # define LFS_UNLOCK_BUF(bp) do { \ 458 if (((bp)->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) { \ 459 mutex_enter(&lfs_lock); \ 460 --locked_queue_count; \ 461 locked_queue_bytes -= bp->b_bufsize; \ 462 if (locked_queue_count < LFS_WAIT_BUFS && \ 463 locked_queue_bytes < LFS_WAIT_BYTES) \ 464 cv_broadcast(&locked_queue_cv); \ 465 mutex_exit(&lfs_lock); \ 466 } \ 467 (bp)->b_flags &= ~B_LOCKED; \ 468 } while (0) 469 470 /* 471 * "struct inode" associated definitions 472 */ 473 474 /* For convenience */ 475 #define IN_ALLMOD (IN_MODIFIED|IN_ACCESS|IN_CHANGE|IN_UPDATE|IN_MODIFY|IN_ACCESSED|IN_CLEANING) 476 477 #define LFS_SET_UINO(ip, flags) do { \ 478 if (((flags) & IN_ACCESSED) && !((ip)->i_flag & IN_ACCESSED)) \ 479 ++(ip)->i_lfs->lfs_uinodes; \ 480 if (((flags) & IN_CLEANING) && !((ip)->i_flag & IN_CLEANING)) \ 481 ++(ip)->i_lfs->lfs_uinodes; \ 482 if (((flags) & IN_MODIFIED) && !((ip)->i_flag & IN_MODIFIED)) \ 483 ++(ip)->i_lfs->lfs_uinodes; \ 484 (ip)->i_flag |= (flags); \ 485 } while (0) 486 487 #define LFS_CLR_UINO(ip, flags) do { \ 488 if (((flags) & IN_ACCESSED) && ((ip)->i_flag & IN_ACCESSED)) \ 489 --(ip)->i_lfs->lfs_uinodes; \ 490 if (((flags) & IN_CLEANING) && ((ip)->i_flag & IN_CLEANING)) \ 491 --(ip)->i_lfs->lfs_uinodes; \ 492 if (((flags) & IN_MODIFIED) && ((ip)->i_flag & IN_MODIFIED)) \ 493 --(ip)->i_lfs->lfs_uinodes; \ 494 (ip)->i_flag &= ~(flags); \ 495 if ((ip)->i_lfs->lfs_uinodes < 0) { \ 496 panic("lfs_uinodes < 0"); \ 497 } \ 498 } while (0) 499 500 #define LFS_ITIMES(ip, acc, mod, cre) \ 501 while ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY)) \ 502 lfs_itimes(ip, acc, mod, cre) 503 504 /* 505 * On-disk and in-memory checkpoint segment usage structure. 506 */ 507 typedef struct segusage SEGUSE; 508 struct segusage { 509 u_int32_t su_nbytes; /* 0: number of live bytes */ 510 u_int32_t su_olastmod; /* 4: SEGUSE last modified timestamp */ 511 u_int16_t su_nsums; /* 8: number of summaries in segment */ 512 u_int16_t su_ninos; /* 10: number of inode blocks in seg */ 513 514 #define SEGUSE_ACTIVE 0x01 /* segment currently being written */ 515 #define SEGUSE_DIRTY 0x02 /* segment has data in it */ 516 #define SEGUSE_SUPERBLOCK 0x04 /* segment contains a superblock */ 517 #define SEGUSE_ERROR 0x08 /* cleaner: do not clean segment */ 518 #define SEGUSE_EMPTY 0x10 /* segment is empty */ 519 #define SEGUSE_INVAL 0x20 /* segment is invalid */ 520 u_int32_t su_flags; /* 12: segment flags */ 521 u_int64_t su_lastmod; /* 16: last modified timestamp */ 522 }; 523 524 typedef struct segusage_v1 SEGUSE_V1; 525 struct segusage_v1 { 526 u_int32_t su_nbytes; /* 0: number of live bytes */ 527 u_int32_t su_lastmod; /* 4: SEGUSE last modified timestamp */ 528 u_int16_t su_nsums; /* 8: number of summaries in segment */ 529 u_int16_t su_ninos; /* 10: number of inode blocks in seg */ 530 u_int32_t su_flags; /* 12: segment flags */ 531 }; 532 533 #define SEGUPB(fs) (fs->lfs_sepb) 534 #define SEGTABSIZE_SU(fs) \ 535 (((fs)->lfs_nseg + SEGUPB(fs) - 1) / (fs)->lfs_sepb) 536 537 #ifdef _KERNEL 538 # define SHARE_IFLOCK(F) \ 539 do { \ 540 rw_enter(&(F)->lfs_iflock, RW_READER); \ 541 } while(0) 542 # define UNSHARE_IFLOCK(F) \ 543 do { \ 544 rw_exit(&(F)->lfs_iflock); \ 545 } while(0) 546 #else /* ! _KERNEL */ 547 # define SHARE_IFLOCK(F) 548 # define UNSHARE_IFLOCK(F) 549 #endif /* ! _KERNEL */ 550 551 /* Read in the block with a specific segment usage entry from the ifile. */ 552 #define LFS_SEGENTRY(SP, F, IN, BP) do { \ 553 int _e; \ 554 SHARE_IFLOCK(F); \ 555 VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \ 556 if ((_e = bread((F)->lfs_ivnode, \ 557 ((IN) / (F)->lfs_sepb) + (F)->lfs_cleansz, \ 558 (F)->lfs_bsize, NOCRED, 0, &(BP))) != 0) \ 559 panic("lfs: ifile read: %d", _e); \ 560 if ((F)->lfs_version == 1) \ 561 (SP) = (SEGUSE *)((SEGUSE_V1 *)(BP)->b_data + \ 562 ((IN) & ((F)->lfs_sepb - 1))); \ 563 else \ 564 (SP) = (SEGUSE *)(BP)->b_data + ((IN) % (F)->lfs_sepb); \ 565 UNSHARE_IFLOCK(F); \ 566 } while (0) 567 568 #define LFS_WRITESEGENTRY(SP, F, IN, BP) do { \ 569 if ((SP)->su_nbytes == 0) \ 570 (SP)->su_flags |= SEGUSE_EMPTY; \ 571 else \ 572 (SP)->su_flags &= ~SEGUSE_EMPTY; \ 573 (F)->lfs_suflags[(F)->lfs_activesb][(IN)] = (SP)->su_flags; \ 574 LFS_BWRITE_LOG(BP); \ 575 } while (0) 576 577 /* 578 * On-disk file information. One per file with data blocks in the segment. 579 */ 580 typedef struct finfo FINFO; 581 struct finfo { 582 u_int32_t fi_nblocks; /* number of blocks */ 583 u_int32_t fi_version; /* version number */ 584 u_int32_t fi_ino; /* inode number */ 585 u_int32_t fi_lastlength; /* length of last block in array */ 586 int32_t fi_blocks[1]; /* array of logical block numbers */ 587 }; 588 /* sizeof FINFO except fi_blocks */ 589 #define FINFOSIZE (sizeof(FINFO) - sizeof(int32_t)) 590 591 /* 592 * Index file inode entries. 593 */ 594 typedef struct ifile IFILE; 595 struct ifile { 596 u_int32_t if_version; /* inode version number */ 597 #define LFS_UNUSED_DADDR 0 /* out-of-band daddr */ 598 int32_t if_daddr; /* inode disk address */ 599 #define LFS_ORPHAN_NEXTFREE (~(u_int32_t)0) /* indicate orphaned file */ 600 u_int32_t if_nextfree; /* next-unallocated inode */ 601 u_int32_t if_atime_sec; /* Last access time, seconds */ 602 u_int32_t if_atime_nsec; /* and nanoseconds */ 603 }; 604 605 typedef struct ifile_v1 IFILE_V1; 606 struct ifile_v1 { 607 u_int32_t if_version; /* inode version number */ 608 int32_t if_daddr; /* inode disk address */ 609 u_int32_t if_nextfree; /* next-unallocated inode */ 610 #if LFS_ATIME_IFILE 611 struct timespec if_atime; /* Last access time */ 612 #endif 613 }; 614 615 /* 616 * LFSv1 compatibility code is not allowed to touch if_atime, since it 617 * may not be mapped! 618 */ 619 /* Read in the block with a specific inode from the ifile. */ 620 #define LFS_IENTRY(IP, F, IN, BP) do { \ 621 int _e; \ 622 SHARE_IFLOCK(F); \ 623 VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \ 624 if ((_e = bread((F)->lfs_ivnode, \ 625 (IN) / (F)->lfs_ifpb + (F)->lfs_cleansz + (F)->lfs_segtabsz, \ 626 (F)->lfs_bsize, NOCRED, 0, &(BP))) != 0) \ 627 panic("lfs: ifile ino %d read %d", (int)(IN), _e); \ 628 if ((F)->lfs_version == 1) \ 629 (IP) = (IFILE *)((IFILE_V1 *)(BP)->b_data + \ 630 (IN) % (F)->lfs_ifpb); \ 631 else \ 632 (IP) = (IFILE *)(BP)->b_data + (IN) % (F)->lfs_ifpb; \ 633 UNSHARE_IFLOCK(F); \ 634 } while (0) 635 636 /* 637 * Cleaner information structure. This resides in the ifile and is used 638 * to pass information from the kernel to the cleaner. 639 */ 640 typedef struct _cleanerinfo { 641 u_int32_t clean; /* number of clean segments */ 642 u_int32_t dirty; /* number of dirty segments */ 643 int32_t bfree; /* disk blocks free */ 644 int32_t avail; /* disk blocks available */ 645 u_int32_t free_head; /* head of the inode free list */ 646 u_int32_t free_tail; /* tail of the inode free list */ 647 #define LFS_CLEANER_MUST_CLEAN 0x01 648 u_int32_t flags; /* status word from the kernel */ 649 } CLEANERINFO; 650 651 #define CLEANSIZE_SU(fs) \ 652 ((sizeof(CLEANERINFO) + (fs)->lfs_bsize - 1) >> (fs)->lfs_bshift) 653 654 /* Read in the block with the cleaner info from the ifile. */ 655 #define LFS_CLEANERINFO(CP, F, BP) do { \ 656 SHARE_IFLOCK(F); \ 657 VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS; \ 658 if (bread((F)->lfs_ivnode, \ 659 (daddr_t)0, (F)->lfs_bsize, NOCRED, 0, &(BP))) \ 660 panic("lfs: ifile read"); \ 661 (CP) = (CLEANERINFO *)(BP)->b_data; \ 662 UNSHARE_IFLOCK(F); \ 663 } while (0) 664 665 /* 666 * Synchronize the Ifile cleaner info with current avail and bfree. 667 */ 668 #define LFS_SYNC_CLEANERINFO(cip, fs, bp, w) do { \ 669 mutex_enter(&lfs_lock); \ 670 if ((w) || (cip)->bfree != (fs)->lfs_bfree || \ 671 (cip)->avail != (fs)->lfs_avail - (fs)->lfs_ravail - \ 672 (fs)->lfs_favail) { \ 673 (cip)->bfree = (fs)->lfs_bfree; \ 674 (cip)->avail = (fs)->lfs_avail - (fs)->lfs_ravail - \ 675 (fs)->lfs_favail; \ 676 if (((bp)->b_flags & B_GATHERED) == 0) { \ 677 (fs)->lfs_flags |= LFS_IFDIRTY; \ 678 } \ 679 mutex_exit(&lfs_lock); \ 680 (void) LFS_BWRITE_LOG(bp); /* Ifile */ \ 681 } else { \ 682 mutex_exit(&lfs_lock); \ 683 brelse(bp, 0); \ 684 } \ 685 } while (0) 686 687 /* 688 * Get the head of the inode free list. 689 * Always called with the segment lock held. 690 */ 691 #define LFS_GET_HEADFREE(FS, CIP, BP, FREEP) do { \ 692 if ((FS)->lfs_version > 1) { \ 693 LFS_CLEANERINFO((CIP), (FS), (BP)); \ 694 (FS)->lfs_freehd = (CIP)->free_head; \ 695 brelse(BP, 0); \ 696 } \ 697 *(FREEP) = (FS)->lfs_freehd; \ 698 } while (0) 699 700 #define LFS_PUT_HEADFREE(FS, CIP, BP, VAL) do { \ 701 (FS)->lfs_freehd = (VAL); \ 702 if ((FS)->lfs_version > 1) { \ 703 LFS_CLEANERINFO((CIP), (FS), (BP)); \ 704 (CIP)->free_head = (VAL); \ 705 LFS_BWRITE_LOG(BP); \ 706 mutex_enter(&lfs_lock); \ 707 (FS)->lfs_flags |= LFS_IFDIRTY; \ 708 mutex_exit(&lfs_lock); \ 709 } \ 710 } while (0) 711 712 #define LFS_GET_TAILFREE(FS, CIP, BP, FREEP) do { \ 713 LFS_CLEANERINFO((CIP), (FS), (BP)); \ 714 *(FREEP) = (CIP)->free_tail; \ 715 brelse(BP, 0); \ 716 } while (0) 717 718 #define LFS_PUT_TAILFREE(FS, CIP, BP, VAL) do { \ 719 LFS_CLEANERINFO((CIP), (FS), (BP)); \ 720 (CIP)->free_tail = (VAL); \ 721 LFS_BWRITE_LOG(BP); \ 722 mutex_enter(&lfs_lock); \ 723 (FS)->lfs_flags |= LFS_IFDIRTY; \ 724 mutex_exit(&lfs_lock); \ 725 } while (0) 726 727 /* 728 * On-disk segment summary information 729 */ 730 typedef struct segsum_v1 SEGSUM_V1; 731 struct segsum_v1 { 732 u_int32_t ss_sumsum; /* 0: check sum of summary block */ 733 u_int32_t ss_datasum; /* 4: check sum of data */ 734 u_int32_t ss_magic; /* 8: segment summary magic number */ 735 #define SS_MAGIC 0x061561 736 int32_t ss_next; /* 12: next segment */ 737 u_int32_t ss_create; /* 16: creation time stamp */ 738 u_int16_t ss_nfinfo; /* 20: number of file info structures */ 739 u_int16_t ss_ninos; /* 22: number of inodes in summary */ 740 741 #define SS_DIROP 0x01 /* segment begins a dirop */ 742 #define SS_CONT 0x02 /* more partials to finish this write*/ 743 #define SS_CLEAN 0x04 /* written by the cleaner */ 744 #define SS_RFW 0x08 /* written by the roll-forward agent */ 745 #define SS_RECLAIM 0x10 /* written by the roll-forward agent */ 746 u_int16_t ss_flags; /* 24: used for directory operations */ 747 u_int16_t ss_pad; /* 26: extra space */ 748 /* FINFO's and inode daddr's... */ 749 }; 750 751 typedef struct segsum SEGSUM; 752 struct segsum { 753 u_int32_t ss_sumsum; /* 0: check sum of summary block */ 754 u_int32_t ss_datasum; /* 4: check sum of data */ 755 u_int32_t ss_magic; /* 8: segment summary magic number */ 756 int32_t ss_next; /* 12: next segment */ 757 u_int32_t ss_ident; /* 16: roll-forward fsid */ 758 #define ss_ocreate ss_ident /* ident is where create was in v1 */ 759 u_int16_t ss_nfinfo; /* 20: number of file info structures */ 760 u_int16_t ss_ninos; /* 22: number of inodes in summary */ 761 u_int16_t ss_flags; /* 24: used for directory operations */ 762 u_int8_t ss_pad[2]; /* 26: extra space */ 763 u_int32_t ss_reclino; /* 28: inode being reclaimed */ 764 u_int64_t ss_serial; /* 32: serial number */ 765 u_int64_t ss_create; /* 40: time stamp */ 766 /* FINFO's and inode daddr's... */ 767 }; 768 769 #define SEGSUM_SIZE(fs) ((fs)->lfs_version == 1 ? sizeof(SEGSUM_V1) : sizeof(SEGSUM)) 770 771 772 /* 773 * On-disk super block. 774 */ 775 struct dlfs { 776 #define LFS_MAGIC 0x070162 777 u_int32_t dlfs_magic; /* 0: magic number */ 778 #define LFS_VERSION 2 779 u_int32_t dlfs_version; /* 4: version number */ 780 781 u_int32_t dlfs_size; /* 8: number of blocks in fs (v1) */ 782 /* number of frags in fs (v2) */ 783 u_int32_t dlfs_ssize; /* 12: number of blocks per segment (v1) */ 784 /* number of bytes per segment (v2) */ 785 u_int32_t dlfs_dsize; /* 16: number of disk blocks in fs */ 786 u_int32_t dlfs_bsize; /* 20: file system block size */ 787 u_int32_t dlfs_fsize; /* 24: size of frag blocks in fs */ 788 u_int32_t dlfs_frag; /* 28: number of frags in a block in fs */ 789 790 /* Checkpoint region. */ 791 u_int32_t dlfs_freehd; /* 32: start of the free list */ 792 int32_t dlfs_bfree; /* 36: number of free disk blocks */ 793 u_int32_t dlfs_nfiles; /* 40: number of allocated inodes */ 794 int32_t dlfs_avail; /* 44: blocks available for writing */ 795 int32_t dlfs_uinodes; /* 48: inodes in cache not yet on disk */ 796 int32_t dlfs_idaddr; /* 52: inode file disk address */ 797 u_int32_t dlfs_ifile; /* 56: inode file inode number */ 798 int32_t dlfs_lastseg; /* 60: address of last segment written */ 799 int32_t dlfs_nextseg; /* 64: address of next segment to write */ 800 int32_t dlfs_curseg; /* 68: current segment being written */ 801 int32_t dlfs_offset; /* 72: offset in curseg for next partial */ 802 int32_t dlfs_lastpseg; /* 76: address of last partial written */ 803 u_int32_t dlfs_inopf; /* 80: v1: time stamp; v2: inodes per frag */ 804 #define dlfs_otstamp dlfs_inopf 805 806 /* These are configuration parameters. */ 807 u_int32_t dlfs_minfree; /* 84: minimum percentage of free blocks */ 808 809 /* These fields can be computed from the others. */ 810 u_int64_t dlfs_maxfilesize; /* 88: maximum representable file size */ 811 u_int32_t dlfs_fsbpseg; /* 96: fsb per segment */ 812 u_int32_t dlfs_inopb; /* 100: inodes per block */ 813 u_int32_t dlfs_ifpb; /* 104: IFILE entries per block */ 814 u_int32_t dlfs_sepb; /* 108: SEGUSE entries per block */ 815 u_int32_t dlfs_nindir; /* 112: indirect pointers per block */ 816 u_int32_t dlfs_nseg; /* 116: number of segments */ 817 u_int32_t dlfs_nspf; /* 120: number of sectors per fragment */ 818 u_int32_t dlfs_cleansz; /* 124: cleaner info size in blocks */ 819 u_int32_t dlfs_segtabsz; /* 128: segment table size in blocks */ 820 u_int32_t dlfs_segmask; /* 132: calculate offset within a segment */ 821 u_int32_t dlfs_segshift; /* 136: fast mult/div for segments */ 822 u_int32_t dlfs_bshift; /* 140: calc block number from file offset */ 823 u_int32_t dlfs_ffshift; /* 144: fast mult/div for frag from file */ 824 u_int32_t dlfs_fbshift; /* 148: fast mult/div for frag from block */ 825 u_int64_t dlfs_bmask; /* 152: calc block offset from file offset */ 826 u_int64_t dlfs_ffmask; /* 160: calc frag offset from file offset */ 827 u_int64_t dlfs_fbmask; /* 168: calc frag offset from block offset */ 828 u_int32_t dlfs_blktodb; /* 176: blktodb and dbtoblk shift constant */ 829 u_int32_t dlfs_sushift; /* 180: fast mult/div for segusage table */ 830 831 int32_t dlfs_maxsymlinklen; /* 184: max length of an internal symlink */ 832 #define LFS_MIN_SBINTERVAL 5 /* minimum superblock segment spacing */ 833 #define LFS_MAXNUMSB 10 /* 188: superblock disk offsets */ 834 int32_t dlfs_sboffs[LFS_MAXNUMSB]; 835 836 u_int32_t dlfs_nclean; /* 228: Number of clean segments */ 837 u_char dlfs_fsmnt[MNAMELEN]; /* 232: name mounted on */ 838 #define LFS_PF_CLEAN 0x1 839 u_int16_t dlfs_pflags; /* 322: file system persistent flags */ 840 int32_t dlfs_dmeta; /* 324: total number of dirty summaries */ 841 u_int32_t dlfs_minfreeseg; /* 328: segments not counted in bfree */ 842 u_int32_t dlfs_sumsize; /* 332: size of summary blocks */ 843 u_int64_t dlfs_serial; /* 336: serial number */ 844 u_int32_t dlfs_ibsize; /* 344: size of inode blocks */ 845 int32_t dlfs_start; /* 348: start of segment 0 */ 846 u_int64_t dlfs_tstamp; /* 352: time stamp */ 847 #define LFS_44INODEFMT 0 848 #define LFS_MAXINODEFMT 0 849 u_int32_t dlfs_inodefmt; /* 360: inode format version */ 850 u_int32_t dlfs_interleave; /* 364: segment interleave */ 851 u_int32_t dlfs_ident; /* 368: per-fs identifier */ 852 u_int32_t dlfs_fsbtodb; /* 372: fsbtodb and dbtodsb shift constant */ 853 u_int32_t dlfs_resvseg; /* 376: segments reserved for the cleaner */ 854 int8_t dlfs_pad[128]; /* 380: round to 512 bytes */ 855 /* Checksum -- last valid disk field. */ 856 u_int32_t dlfs_cksum; /* 508: checksum for superblock checking */ 857 }; 858 859 /* Type used for the inode bitmap */ 860 typedef u_int32_t lfs_bm_t; 861 862 /* 863 * Linked list of segments whose byte count needs updating following a 864 * file truncation. 865 */ 866 struct segdelta { 867 long segnum; 868 size_t num; 869 LIST_ENTRY(segdelta) list; 870 }; 871 872 /* 873 * In-memory super block. 874 */ 875 struct lfs { 876 struct dlfs lfs_dlfs; /* on-disk parameters */ 877 #define lfs_magic lfs_dlfs.dlfs_magic 878 #define lfs_version lfs_dlfs.dlfs_version 879 #define lfs_size lfs_dlfs.dlfs_size 880 #define lfs_ssize lfs_dlfs.dlfs_ssize 881 #define lfs_dsize lfs_dlfs.dlfs_dsize 882 #define lfs_bsize lfs_dlfs.dlfs_bsize 883 #define lfs_fsize lfs_dlfs.dlfs_fsize 884 #define lfs_frag lfs_dlfs.dlfs_frag 885 #define lfs_freehd lfs_dlfs.dlfs_freehd 886 #define lfs_bfree lfs_dlfs.dlfs_bfree 887 #define lfs_nfiles lfs_dlfs.dlfs_nfiles 888 #define lfs_avail lfs_dlfs.dlfs_avail 889 #define lfs_uinodes lfs_dlfs.dlfs_uinodes 890 #define lfs_idaddr lfs_dlfs.dlfs_idaddr 891 #define lfs_ifile lfs_dlfs.dlfs_ifile 892 #define lfs_lastseg lfs_dlfs.dlfs_lastseg 893 #define lfs_nextseg lfs_dlfs.dlfs_nextseg 894 #define lfs_curseg lfs_dlfs.dlfs_curseg 895 #define lfs_offset lfs_dlfs.dlfs_offset 896 #define lfs_lastpseg lfs_dlfs.dlfs_lastpseg 897 #define lfs_otstamp lfs_dlfs.dlfs_inopf 898 #define lfs_inopf lfs_dlfs.dlfs_inopf 899 #define lfs_minfree lfs_dlfs.dlfs_minfree 900 #define lfs_maxfilesize lfs_dlfs.dlfs_maxfilesize 901 #define lfs_fsbpseg lfs_dlfs.dlfs_fsbpseg 902 #define lfs_inopb lfs_dlfs.dlfs_inopb 903 #define lfs_ifpb lfs_dlfs.dlfs_ifpb 904 #define lfs_sepb lfs_dlfs.dlfs_sepb 905 #define lfs_nindir lfs_dlfs.dlfs_nindir 906 #define lfs_nseg lfs_dlfs.dlfs_nseg 907 #define lfs_nspf lfs_dlfs.dlfs_nspf 908 #define lfs_cleansz lfs_dlfs.dlfs_cleansz 909 #define lfs_segtabsz lfs_dlfs.dlfs_segtabsz 910 #define lfs_segmask lfs_dlfs.dlfs_segmask 911 #define lfs_segshift lfs_dlfs.dlfs_segshift 912 #define lfs_bmask lfs_dlfs.dlfs_bmask 913 #define lfs_bshift lfs_dlfs.dlfs_bshift 914 #define lfs_ffmask lfs_dlfs.dlfs_ffmask 915 #define lfs_ffshift lfs_dlfs.dlfs_ffshift 916 #define lfs_fbmask lfs_dlfs.dlfs_fbmask 917 #define lfs_fbshift lfs_dlfs.dlfs_fbshift 918 #define lfs_blktodb lfs_dlfs.dlfs_blktodb 919 #define lfs_fsbtodb lfs_dlfs.dlfs_fsbtodb 920 #define lfs_sushift lfs_dlfs.dlfs_sushift 921 #define lfs_maxsymlinklen lfs_dlfs.dlfs_maxsymlinklen 922 #define lfs_sboffs lfs_dlfs.dlfs_sboffs 923 #define lfs_cksum lfs_dlfs.dlfs_cksum 924 #define lfs_pflags lfs_dlfs.dlfs_pflags 925 #define lfs_fsmnt lfs_dlfs.dlfs_fsmnt 926 #define lfs_nclean lfs_dlfs.dlfs_nclean 927 #define lfs_dmeta lfs_dlfs.dlfs_dmeta 928 #define lfs_minfreeseg lfs_dlfs.dlfs_minfreeseg 929 #define lfs_sumsize lfs_dlfs.dlfs_sumsize 930 #define lfs_serial lfs_dlfs.dlfs_serial 931 #define lfs_ibsize lfs_dlfs.dlfs_ibsize 932 #define lfs_start lfs_dlfs.dlfs_start 933 #define lfs_tstamp lfs_dlfs.dlfs_tstamp 934 #define lfs_inodefmt lfs_dlfs.dlfs_inodefmt 935 #define lfs_interleave lfs_dlfs.dlfs_interleave 936 #define lfs_ident lfs_dlfs.dlfs_ident 937 #define lfs_resvseg lfs_dlfs.dlfs_resvseg 938 939 /* These fields are set at mount time and are meaningless on disk. */ 940 struct segment *lfs_sp; /* current segment being written */ 941 struct vnode *lfs_ivnode; /* vnode for the ifile */ 942 u_int32_t lfs_seglock; /* single-thread the segment writer */ 943 pid_t lfs_lockpid; /* pid of lock holder */ 944 lwpid_t lfs_locklwp; /* lwp of lock holder */ 945 u_int32_t lfs_iocount; /* number of ios pending */ 946 u_int32_t lfs_writer; /* don't allow any dirops to start */ 947 u_int32_t lfs_dirops; /* count of active directory ops */ 948 u_int32_t lfs_dirvcount; /* count of VDIROP nodes in this fs */ 949 u_int32_t lfs_doifile; /* Write ifile blocks on next write */ 950 u_int32_t lfs_nactive; /* Number of segments since last ckp */ 951 int8_t lfs_fmod; /* super block modified flag */ 952 int8_t lfs_ronly; /* mounted read-only flag */ 953 #define LFS_NOTYET 0x01 954 #define LFS_IFDIRTY 0x02 955 #define LFS_WARNED 0x04 956 #define LFS_UNDIROP 0x08 957 int8_t lfs_flags; /* currently unused flag */ 958 u_int16_t lfs_activesb; /* toggle between superblocks */ 959 daddr_t lfs_sbactive; /* disk address of current sb write */ 960 struct vnode *lfs_flushvp; /* vnode being flushed */ 961 int lfs_flushvp_fakevref; /* fake vref count for flushvp */ 962 struct vnode *lfs_unlockvp; /* being inactivated in lfs_segunlock */ 963 u_int32_t lfs_diropwait; /* # procs waiting on dirop flush */ 964 size_t lfs_devbsize; /* Device block size */ 965 size_t lfs_devbshift; /* Device block shift */ 966 krwlock_t lfs_fraglock; 967 krwlock_t lfs_iflock; /* Ifile lock */ 968 kcondvar_t lfs_stopcv; /* Wrap lock */ 969 struct lwp *lfs_stoplwp; 970 pid_t lfs_rfpid; /* Process ID of roll-forward agent */ 971 int lfs_nadirop; /* number of active dirop nodes */ 972 long lfs_ravail; /* blocks pre-reserved for writing */ 973 long lfs_favail; /* blocks pre-reserved for writing */ 974 struct lfs_res_blk *lfs_resblk; /* Reserved memory for pageout */ 975 TAILQ_HEAD(, inode) lfs_dchainhd; /* dirop vnodes */ 976 TAILQ_HEAD(, inode) lfs_pchainhd; /* paging vnodes */ 977 #define LFS_RESHASH_WIDTH 17 978 LIST_HEAD(, lfs_res_blk) lfs_reshash[LFS_RESHASH_WIDTH]; 979 int lfs_pdflush; /* pagedaemon wants us to flush */ 980 u_int32_t **lfs_suflags; /* Segment use flags */ 981 #ifdef _KERNEL 982 struct pool lfs_clpool; /* Pool for struct lfs_cluster */ 983 struct pool lfs_bpppool; /* Pool for bpp */ 984 struct pool lfs_segpool; /* Pool for struct segment */ 985 #endif /* _KERNEL */ 986 #define LFS_MAX_CLEANIND 64 987 int32_t lfs_cleanint[LFS_MAX_CLEANIND]; /* Active cleaning intervals */ 988 int lfs_cleanind; /* Index into intervals */ 989 int lfs_sleepers; /* # procs sleeping this fs */ 990 int lfs_pages; /* dirty pages blaming this fs */ 991 lfs_bm_t *lfs_ino_bitmap; /* Inuse inodes bitmap */ 992 int lfs_nowrap; /* Suspend log wrap */ 993 int lfs_wrappass; /* Allow first log wrap requester to pass */ 994 int lfs_wrapstatus; /* Wrap status */ 995 int lfs_reclino; /* Inode being reclaimed */ 996 int lfs_startseg; /* Segment we started writing at */ 997 LIST_HEAD(, segdelta) lfs_segdhd; /* List of pending trunc accounting events */ 998 999 #ifdef _KERNEL 1000 /* ULFS-level information */ 1001 u_int32_t um_flags; /* ULFS flags (below) */ 1002 u_long um_nindir; /* indirect ptrs per block */ 1003 u_long um_lognindir; /* log2 of um_nindir */ 1004 u_long um_bptrtodb; /* indir ptr to disk block */ 1005 u_long um_seqinc; /* inc between seq blocks */ 1006 int um_maxsymlinklen; 1007 int um_dirblksiz; 1008 u_int64_t um_maxfilesize; 1009 1010 /* Stuff used by quota2 code, not currently operable */ 1011 unsigned lfs_use_quota2 : 1; 1012 uint32_t lfs_quota_magic; 1013 uint8_t lfs_quota_flags; 1014 uint64_t lfs_quotaino[2]; 1015 #endif 1016 }; 1017 1018 /* LFS_NINDIR is the number of indirects in a file system block. */ 1019 #define LFS_NINDIR(fs) ((fs)->lfs_nindir) 1020 1021 /* LFS_INOPB is the number of inodes in a secondary storage block. */ 1022 #define LFS_INOPB(fs) ((fs)->lfs_inopb) 1023 /* LFS_INOPF is the number of inodes in a fragment. */ 1024 #define LFS_INOPF(fs) ((fs)->lfs_inopf) 1025 1026 #define lfs_blksize(fs, ip, lbn) \ 1027 (((lbn) >= ULFS_NDADDR || (ip)->i_ffs1_size >= ((lbn) + 1) << (fs)->lfs_bshift) \ 1028 ? (fs)->lfs_bsize \ 1029 : (lfs_fragroundup(fs, lfs_blkoff(fs, (ip)->i_ffs1_size)))) 1030 #define lfs_blkoff(fs, loc) ((int)((loc) & (fs)->lfs_bmask)) 1031 #define lfs_fragoff(fs, loc) /* calculates (loc % fs->lfs_fsize) */ \ 1032 ((int)((loc) & (fs)->lfs_ffmask)) 1033 1034 #if defined(_KERNEL) 1035 #define LFS_FSBTODB(fs, b) ((b) << ((fs)->lfs_ffshift - DEV_BSHIFT)) 1036 #define LFS_DBTOFSB(fs, b) ((b) >> ((fs)->lfs_ffshift - DEV_BSHIFT)) 1037 #else 1038 #define LFS_FSBTODB(fs, b) ((b) << (fs)->lfs_fsbtodb) 1039 #define LFS_DBTOFSB(fs, b) ((b) >> (fs)->lfs_fsbtodb) 1040 #endif 1041 1042 #define lfs_lblkno(fs, loc) ((loc) >> (fs)->lfs_bshift) 1043 #define lfs_lblktosize(fs, blk) ((blk) << (fs)->lfs_bshift) 1044 1045 #define lfs_fsbtob(fs, b) ((b) << (fs)->lfs_ffshift) 1046 #define lfs_btofsb(fs, b) ((b) >> (fs)->lfs_ffshift) 1047 1048 #define lfs_numfrags(fs, loc) /* calculates (loc / fs->lfs_fsize) */ \ 1049 ((loc) >> (fs)->lfs_ffshift) 1050 #define lfs_blkroundup(fs, size)/* calculates roundup(size, fs->lfs_bsize) */ \ 1051 ((off_t)(((size) + (fs)->lfs_bmask) & (~(fs)->lfs_bmask))) 1052 #define lfs_fragroundup(fs, size)/* calculates roundup(size, fs->lfs_fsize) */ \ 1053 ((off_t)(((size) + (fs)->lfs_ffmask) & (~(fs)->lfs_ffmask))) 1054 #define lfs_fragstoblks(fs, frags)/* calculates (frags / fs->fs_frag) */ \ 1055 ((frags) >> (fs)->lfs_fbshift) 1056 #define lfs_blkstofrags(fs, blks)/* calculates (blks * fs->fs_frag) */ \ 1057 ((blks) << (fs)->lfs_fbshift) 1058 #define lfs_fragnum(fs, fsb) /* calculates (fsb % fs->lfs_frag) */ \ 1059 ((fsb) & ((fs)->lfs_frag - 1)) 1060 #define lfs_blknum(fs, fsb) /* calculates rounddown(fsb, fs->lfs_frag) */ \ 1061 ((fsb) &~ ((fs)->lfs_frag - 1)) 1062 #define lfs_dblksize(fs, dp, lbn) \ 1063 (((lbn) >= ULFS_NDADDR || (dp)->di_size >= ((lbn) + 1) << (fs)->lfs_bshift)\ 1064 ? (fs)->lfs_bsize \ 1065 : (lfs_fragroundup(fs, lfs_blkoff(fs, (dp)->di_size)))) 1066 1067 #define lfs_segsize(fs) ((fs)->lfs_version == 1 ? \ 1068 lfs_lblktosize((fs), (fs)->lfs_ssize) : \ 1069 (fs)->lfs_ssize) 1070 #define lfs_segtod(fs, seg) (((fs)->lfs_version == 1 ? \ 1071 (fs)->lfs_ssize << (fs)->lfs_blktodb : \ 1072 lfs_btofsb((fs), (fs)->lfs_ssize)) * (seg)) 1073 #define lfs_dtosn(fs, daddr) /* block address to segment number */ \ 1074 ((uint32_t)(((daddr) - (fs)->lfs_start) / lfs_segtod((fs), 1))) 1075 #define lfs_sntod(fs, sn) /* segment number to disk address */ \ 1076 ((daddr_t)(lfs_segtod((fs), (sn)) + (fs)->lfs_start)) 1077 1078 /* 1079 * Structures used by lfs_bmapv and lfs_markv to communicate information 1080 * about inodes and data blocks. 1081 */ 1082 typedef struct block_info { 1083 u_int32_t bi_inode; /* inode # */ 1084 int32_t bi_lbn; /* logical block w/in file */ 1085 int32_t bi_daddr; /* disk address of block */ 1086 u_int64_t bi_segcreate; /* origin segment create time */ 1087 int bi_version; /* file version number */ 1088 void *bi_bp; /* data buffer */ 1089 int bi_size; /* size of the block (if fragment) */ 1090 } BLOCK_INFO; 1091 1092 /* Compatibility for 1.5 binaries */ 1093 typedef struct block_info_15 { 1094 u_int32_t bi_inode; /* inode # */ 1095 int32_t bi_lbn; /* logical block w/in file */ 1096 int32_t bi_daddr; /* disk address of block */ 1097 u_int32_t bi_segcreate; /* origin segment create time */ 1098 int bi_version; /* file version number */ 1099 void *bi_bp; /* data buffer */ 1100 int bi_size; /* size of the block (if fragment) */ 1101 } BLOCK_INFO_15; 1102 1103 /* In-memory description of a segment about to be written. */ 1104 struct segment { 1105 struct lfs *fs; /* file system pointer */ 1106 struct buf **bpp; /* pointer to buffer array */ 1107 struct buf **cbpp; /* pointer to next available bp */ 1108 struct buf **start_bpp; /* pointer to first bp in this set */ 1109 struct buf *ibp; /* buffer pointer to inode page */ 1110 struct ulfs1_dinode *idp; /* pointer to ifile dinode */ 1111 struct finfo *fip; /* current fileinfo pointer */ 1112 struct vnode *vp; /* vnode being gathered */ 1113 void *segsum; /* segment summary info */ 1114 u_int32_t ninodes; /* number of inodes in this segment */ 1115 int32_t seg_bytes_left; /* bytes left in segment */ 1116 int32_t sum_bytes_left; /* bytes left in summary block */ 1117 u_int32_t seg_number; /* number of this segment */ 1118 int32_t *start_lbp; /* beginning lbn for this set */ 1119 1120 #define SEGM_CKP 0x0001 /* doing a checkpoint */ 1121 #define SEGM_CLEAN 0x0002 /* cleaner call; don't sort */ 1122 #define SEGM_SYNC 0x0004 /* wait for segment */ 1123 #define SEGM_PROT 0x0008 /* don't inactivate at segunlock */ 1124 #define SEGM_PAGEDAEMON 0x0010 /* pagedaemon called us */ 1125 #define SEGM_WRITERD 0x0020 /* LFS writed called us */ 1126 #define SEGM_FORCE_CKP 0x0040 /* Force checkpoint right away */ 1127 #define SEGM_RECLAIM 0x0080 /* Writing to reclaim vnode */ 1128 #define SEGM_SINGLE 0x0100 /* Opportunistic writevnodes */ 1129 u_int16_t seg_flags; /* run-time flags for this segment */ 1130 u_int32_t seg_iocount; /* number of ios pending */ 1131 int ndupino; /* number of duplicate inodes */ 1132 }; 1133 1134 /* 1135 * Macros for determining free space on the disk, with the variable metadata 1136 * of segment summaries and inode blocks taken into account. 1137 */ 1138 /* 1139 * Estimate number of clean blocks not available for writing because 1140 * they will contain metadata or overhead. This is calculated as 1141 * 1142 * E = ((C * M / D) * D + (0) * (T - D)) / T 1143 * or more simply 1144 * E = (C * M) / T 1145 * 1146 * where 1147 * C is the clean space, 1148 * D is the dirty space, 1149 * M is the dirty metadata, and 1150 * T = C + D is the total space on disk. 1151 * 1152 * This approximates the old formula of E = C * M / D when D is close to T, 1153 * but avoids falsely reporting "disk full" when the sample size (D) is small. 1154 */ 1155 #define LFS_EST_CMETA(F) (int32_t)(( \ 1156 ((F)->lfs_dmeta * (int64_t)(F)->lfs_nclean) / \ 1157 ((F)->lfs_nseg))) 1158 1159 /* Estimate total size of the disk not including metadata */ 1160 #define LFS_EST_NONMETA(F) ((F)->lfs_dsize - (F)->lfs_dmeta - LFS_EST_CMETA(F)) 1161 1162 /* Estimate number of blocks actually available for writing */ 1163 #define LFS_EST_BFREE(F) ((F)->lfs_bfree > LFS_EST_CMETA(F) ? \ 1164 (F)->lfs_bfree - LFS_EST_CMETA(F) : 0) 1165 1166 /* Amount of non-meta space not available to mortal man */ 1167 #define LFS_EST_RSVD(F) (int32_t)((LFS_EST_NONMETA(F) * \ 1168 (u_int64_t)(F)->lfs_minfree) / \ 1169 100) 1170 1171 /* Can credential C write BB blocks */ 1172 #define ISSPACE(F, BB, C) \ 1173 ((((C) == NOCRED || kauth_cred_geteuid(C) == 0) && \ 1174 LFS_EST_BFREE(F) >= (BB)) || \ 1175 (kauth_cred_geteuid(C) != 0 && IS_FREESPACE(F, BB))) 1176 1177 /* Can an ordinary user write BB blocks */ 1178 #define IS_FREESPACE(F, BB) \ 1179 (LFS_EST_BFREE(F) >= (BB) + LFS_EST_RSVD(F)) 1180 1181 /* 1182 * The minimum number of blocks to create a new inode. This is: 1183 * directory direct block (1) + ULFS_NIADDR indirect blocks + inode block (1) + 1184 * ifile direct block (1) + ULFS_NIADDR indirect blocks = 3 + 2 * ULFS_NIADDR blocks. 1185 */ 1186 #define LFS_NRESERVE(F) (lfs_btofsb((F), (2 * ULFS_NIADDR + 3) << (F)->lfs_bshift)) 1187 1188 /* Statistics Counters */ 1189 struct lfs_stats { /* Must match sysctl list in lfs_vfsops.h ! */ 1190 u_int segsused; 1191 u_int psegwrites; 1192 u_int psyncwrites; 1193 u_int pcleanwrites; 1194 u_int blocktot; 1195 u_int cleanblocks; 1196 u_int ncheckpoints; 1197 u_int nwrites; 1198 u_int nsync_writes; 1199 u_int wait_exceeded; 1200 u_int write_exceeded; 1201 u_int flush_invoked; 1202 u_int vflush_invoked; 1203 u_int clean_inlocked; 1204 u_int clean_vnlocked; 1205 u_int segs_reclaimed; 1206 }; 1207 1208 /* Fcntls to take the place of the lfs syscalls */ 1209 struct lfs_fcntl_markv { 1210 BLOCK_INFO *blkiov; /* blocks to relocate */ 1211 int blkcnt; /* number of blocks */ 1212 }; 1213 1214 #define LFCNSEGWAITALL _FCNR_FSPRIV('L', 14, struct timeval) 1215 #define LFCNSEGWAIT _FCNR_FSPRIV('L', 15, struct timeval) 1216 #define LFCNBMAPV _FCNRW_FSPRIV('L', 2, struct lfs_fcntl_markv) 1217 #define LFCNMARKV _FCNRW_FSPRIV('L', 3, struct lfs_fcntl_markv) 1218 #define LFCNRECLAIM _FCNO_FSPRIV('L', 4) 1219 1220 struct lfs_fhandle { 1221 char space[28]; /* FHANDLE_SIZE_COMPAT (but used from userland too) */ 1222 }; 1223 #define LFCNREWIND _FCNR_FSPRIV('L', 6, int) 1224 #define LFCNINVAL _FCNR_FSPRIV('L', 7, int) 1225 #define LFCNRESIZE _FCNR_FSPRIV('L', 8, int) 1226 #define LFCNWRAPSTOP _FCNR_FSPRIV('L', 9, int) 1227 #define LFCNWRAPGO _FCNR_FSPRIV('L', 10, int) 1228 #define LFCNIFILEFH _FCNW_FSPRIV('L', 11, struct lfs_fhandle) 1229 #define LFCNWRAPPASS _FCNR_FSPRIV('L', 12, int) 1230 # define LFS_WRAP_GOING 0x0 1231 # define LFS_WRAP_WAITING 0x1 1232 #define LFCNWRAPSTATUS _FCNW_FSPRIV('L', 13, int) 1233 1234 /* Debug segment lock */ 1235 #ifdef notyet 1236 # define ASSERT_SEGLOCK(fs) KASSERT(LFS_SEGLOCK_HELD(fs)) 1237 # define ASSERT_NO_SEGLOCK(fs) KASSERT(!LFS_SEGLOCK_HELD(fs)) 1238 # define ASSERT_DUNNO_SEGLOCK(fs) 1239 # define ASSERT_MAYBE_SEGLOCK(fs) 1240 #else /* !notyet */ 1241 # define ASSERT_DUNNO_SEGLOCK(fs) \ 1242 DLOG((DLOG_SEG, "lfs func %s seglock wrong (%d)\n", __func__, \ 1243 LFS_SEGLOCK_HELD(fs))) 1244 # define ASSERT_SEGLOCK(fs) do { \ 1245 if (!LFS_SEGLOCK_HELD(fs)) { \ 1246 DLOG((DLOG_SEG, "lfs func %s seglock wrong (0)\n", __func__)); \ 1247 } \ 1248 } while(0) 1249 # define ASSERT_NO_SEGLOCK(fs) do { \ 1250 if (LFS_SEGLOCK_HELD(fs)) { \ 1251 DLOG((DLOG_SEG, "lfs func %s seglock wrong (1)\n", __func__)); \ 1252 } \ 1253 } while(0) 1254 # define ASSERT_MAYBE_SEGLOCK(x) 1255 #endif /* !notyet */ 1256 1257 /* 1258 * Arguments to mount LFS filesystems 1259 */ 1260 struct ulfs_args { 1261 char *fspec; /* block special device to mount */ 1262 }; 1263 1264 __BEGIN_DECLS 1265 void lfs_itimes(struct inode *, const struct timespec *, 1266 const struct timespec *, const struct timespec *); 1267 __END_DECLS 1268 1269 #endif /* !_UFS_LFS_LFS_H_ */ 1270