161609Sbostic.\" Copyright (c) 1983, 1991, 1993 261609Sbostic.\" The Regents of the University of California. All rights reserved. 320801Smckusick.\" 448830Scael.\" %sccs.include.redist.roff% 520802Smckusick.\" 6*66972Sbostic.\" @(#)fs.5 8.2 (Berkeley) 04/19/94 748830Scael.\" 848830Scael.Dd 948830Scael.Dt FS 5 1048830Scael.Os BSD 4.2 1148830Scael.Sh NAME 1248830Scael.Nm fs , 1348830Scael.Nm inode 1448830Scael.Nd format of file system volume 1548830Scael.Sh SYNOPSIS 1648830Scael.Fd #include <sys/types.h> 1748830Scael.Fd #include <ufs/fs.h> 1848830Scael.Fd #include <ufs/inode.h> 1948830Scael.Sh DESCRIPTION 2048830ScaelThe files 2148830Scael.Aq Pa fs.h 2248830Scaeland 2348830Scael.Aq Pa inode.h 2448830Scaeldeclare several structures, defined variables and macros 2548830Scaelwhich are used to create and manage the underlying format of 2648830Scaelfile system objects on random access devices (disks). 2748830Scael.Pp 2848830ScaelThe block size and number of blocks which 2948830Scaelcomprise a file system are parameters of the file system. 3048830ScaelSectors beginning at 3148830Scael.Dv BBLOCK 3248830Scaeland continuing for 3348830Scael.Dv BBSIZE 3448830Scaelare used 3548830Scaelfor a disklabel and for some hardware primary 3648830Scaeland secondary bootstrapping programs. 3748830Scael.Pp 3848830ScaelThe actual file system begins at sector 3948830Scael.Dv SBLOCK 4048830Scaelwith the 4148830Scael.Em super-block 4248830Scaelthat is of size 4348830Scael.Dv SBSIZE . 4448830ScaelThe following structure described the super-block and is 4548830Scaelfrom the file 4648830Scael.Aq Pa ufs/fs.h : 4748830Scael.Bd -literal 4834152Smckusick#define FS_MAGIC 0x011954 4934152Smckusickstruct fs { 5034152Smckusick struct fs *fs_link; /* linked list of file systems */ 5134152Smckusick struct fs *fs_rlink; /* used for incore super blocks */ 5234152Smckusick daddr_t fs_sblkno; /* addr of super-block in filesys */ 5334152Smckusick daddr_t fs_cblkno; /* offset of cyl-block in filesys */ 5434152Smckusick daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ 5534152Smckusick daddr_t fs_dblkno; /* offset of first data after cg */ 5634152Smckusick long fs_cgoffset; /* cylinder group offset in cylinder */ 5734152Smckusick long fs_cgmask; /* used to calc mod fs_ntrak */ 5834152Smckusick time_t fs_time; /* last time written */ 5934152Smckusick long fs_size; /* number of blocks in fs */ 6034152Smckusick long fs_dsize; /* number of data blocks in fs */ 6134152Smckusick long fs_ncg; /* number of cylinder groups */ 6234152Smckusick long fs_bsize; /* size of basic blocks in fs */ 6334152Smckusick long fs_fsize; /* size of frag blocks in fs */ 6434152Smckusick long fs_frag; /* number of frags in a block in fs */ 6520802Smckusick/* these are configuration parameters */ 6634152Smckusick long fs_minfree; /* minimum percentage of free blocks */ 6734152Smckusick long fs_rotdelay; /* num of ms for optimal next block */ 6834152Smckusick long fs_rps; /* disk revolutions per second */ 6920802Smckusick/* these fields can be computed from the others */ 7034152Smckusick long fs_bmask; /* ``blkoff'' calc of blk offsets */ 7134152Smckusick long fs_fmask; /* ``fragoff'' calc of frag offsets */ 7234152Smckusick long fs_bshift; /* ``lblkno'' calc of logical blkno */ 7334152Smckusick long fs_fshift; /* ``numfrags'' calc number of frags */ 7420802Smckusick/* these are configuration parameters */ 7534152Smckusick long fs_maxcontig; /* max number of contiguous blks */ 7634152Smckusick long fs_maxbpg; /* max number of blks per cyl group */ 7720802Smckusick/* these fields can be computed from the others */ 7834152Smckusick long fs_fragshift; /* block to frag shift */ 7934152Smckusick long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ 8034152Smckusick long fs_sbsize; /* actual size of super block */ 8134152Smckusick long fs_csmask; /* csum block offset */ 8234152Smckusick long fs_csshift; /* csum block number */ 8334152Smckusick long fs_nindir; /* value of NINDIR */ 8434152Smckusick long fs_inopb; /* value of INOPB */ 8534152Smckusick long fs_nspf; /* value of NSPF */ 8634152Smckusick/* yet another configuration parameter */ 8734152Smckusick long fs_optim; /* optimization preference, see below */ 8834152Smckusick/* these fields are derived from the hardware */ 8934152Smckusick long fs_npsect; /* # sectors/track including spares */ 9034152Smckusick long fs_interleave; /* hardware sector interleave */ 9134152Smckusick long fs_trackskew; /* sector 0 skew, per track */ 9234152Smckusick long fs_headswitch; /* head switch time, usec */ 9334152Smckusick long fs_trkseek; /* track-to-track seek, usec */ 9420802Smckusick/* sizes determined by number of cylinder groups and their sizes */ 9534152Smckusick daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ 9634152Smckusick long fs_cssize; /* size of cyl grp summary area */ 9734152Smckusick long fs_cgsize; /* cylinder group size */ 9834152Smckusick/* these fields are derived from the hardware */ 9934152Smckusick long fs_ntrak; /* tracks per cylinder */ 10034152Smckusick long fs_nsect; /* sectors per track */ 10134152Smckusick long fs_spc; /* sectors per cylinder */ 10220802Smckusick/* this comes from the disk driver partitioning */ 10334152Smckusick long fs_ncyl; /* cylinders in file system */ 10420802Smckusick/* these fields can be computed from the others */ 10534152Smckusick long fs_cpg; /* cylinders per group */ 10634152Smckusick long fs_ipg; /* inodes per group */ 10734152Smckusick long fs_fpg; /* blocks per group * fs_frag */ 10820802Smckusick/* this data must be re-computed after crashes */ 10920802Smckusick struct csum fs_cstotal; /* cylinder summary information */ 11020802Smckusick/* these fields are cleared at mount time */ 11134152Smckusick char fs_fmod; /* super block modified flag */ 11234152Smckusick char fs_clean; /* file system is clean flag */ 11334152Smckusick char fs_ronly; /* mounted read-only flag */ 11434152Smckusick char fs_flags; /* currently unused flag */ 11520802Smckusick char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ 11620802Smckusick/* these fields retain the current block allocation info */ 11734152Smckusick long fs_cgrotor; /* last cg searched */ 11834152Smckusick struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */ 11934152Smckusick long fs_cpc; /* cyl per cycle in postbl */ 12034152Smckusick short fs_opostbl[16][8]; /* old rotation block list head */ 12134152Smckusick long fs_sparecon[56]; /* reserved for future constants */ 12234152Smckusick quad fs_qbmask; /* ~fs_bmask - for use with quad size */ 12334152Smckusick quad fs_qfmask; /* ~fs_fmask - for use with quad size */ 12448830Scael long fs_postblformat; /* format of positional layout tables */ 125*66972Sbostic long fs_nrpos; /* number of rotational positions */ 12634152Smckusick long fs_postbloff; /* (short) rotation block list head */ 12734152Smckusick long fs_rotbloff; /* (u_char) blocks for each rotation */ 12834152Smckusick long fs_magic; /* magic number */ 12934152Smckusick u_char fs_space[1]; /* list of blocks for each rotation */ 13020802Smckusick/* actually longer */ 13120802Smckusick}; 13248830Scael.Ed 13348830Scael.Pp 13420802SmckusickEach disk drive contains some number of file systems. 13520802SmckusickA file system consists of a number of cylinder groups. 13620802SmckusickEach cylinder group has inodes and data. 13748830Scael.Pp 13820802SmckusickA file system is described by its super-block, which in turn 13920802Smckusickdescribes the cylinder groups. The super-block is critical 14020802Smckusickdata and is replicated in each cylinder group to protect against 14120802Smckusickcatastrophic loss. This is done at file system creation 14220802Smckusicktime and the critical 14320802Smckusicksuper-block data does not change, so the copies need not be 14420802Smckusickreferenced further unless disaster strikes. 14548830Scael.Pp 14620802SmckusickAddresses stored in inodes are capable of addressing fragments 14748830Scaelof `blocks'. File system blocks of at most size 14848830Scael.Dv MAXBSIZE 14948830Scaelcan 15020802Smckusickbe optionally broken into 2, 4, or 8 pieces, each of which is 15148830Scaeladdressable; these pieces may be 15248830Scael.Dv DEV_BSIZE , 15348830Scaelor some multiple of 15448830Scaela 15548830Scael.Dv DEV_BSIZE 15648830Scaelunit. 15748830Scael.Pp 15820802SmckusickLarge files consist of exclusively large data blocks. To avoid 15920802Smckusickundue wasted disk space, the last data block of a small file is 16020802Smckusickallocated as only as many fragments of a large block as are 16120802Smckusicknecessary. The file system format retains only a single pointer 16220802Smckusickto such a fragment, which is a piece of a single large block that 16320802Smckusickhas been divided. The size of such a fragment is determinable from 16448830Scaelinformation in the inode, using the 16548830Scael.Fn blksize fs ip lbn 16648830Scaelmacro. 16748830Scael.Pp 16820802SmckusickThe file system records space availability at the fragment level; 16920802Smckusickto determine block availability, aligned fragments are examined. 17048830Scael.Pp 17120802SmckusickThe root inode is the root of the file system. 17220802SmckusickInode 0 can't be used for normal purposes and 17320802Smckusickhistorically bad blocks were linked to inode 1, 17420802Smckusickthus the root inode is 2 (inode 1 is no longer used for 17520802Smckusickthis purpose, however numerous dump tapes make this 17620802Smckusickassumption, so we are stuck with it). 17748830Scael.Pp 17848830ScaelThe 17948830Scael.Fa fs_minfree 18048830Scaelelement gives the minimum acceptable percentage of file system 18128244Smckusickblocks that may be free. If the freelist drops below this level 18234152Smckusickonly the super-user may continue to allocate blocks. 18348830ScaelThe 18448830Scael.Fa fs_minfree 18548830Scaelelement 18634152Smckusickmay be set to 0 if no reserve of free blocks is deemed necessary, 18720802Smckusickhowever severe performance degradations will be observed if the 18820802Smckusickfile system is run at greater than 90% full; thus the default 18920802Smckusickvalue of 19048830Scael.Fa fs_minfree 19120802Smckusickis 10%. 19248830Scael.Pp 19320802SmckusickEmpirically the best trade-off between block fragmentation and 19420802Smckusickoverall disk utilization at a loading of 90% comes with a 19534152Smckusickfragmentation of 8, thus the default fragment size is an eighth 19620802Smckusickof the block size. 19748830Scael.Pp 19848830ScaelThe element 19948830Scael.Fa fs_optim 20028244Smckusickspecifies whether the file system should try to minimize the time spent 20128244Smckusickallocating blocks, or if it should attempt to minimize the space 20228244Smckusickfragmentation on the disk. 20328244SmckusickIf the value of fs_minfree (see above) is less than 10%, 20428244Smckusickthen the file system defaults to optimizing for space to avoid 20528244Smckusickrunning out of full sized blocks. 20628244SmckusickIf the value of minfree is greater than or equal to 10%, 20728244Smckusickfragmentation is unlikely to be problematical, and 20828244Smckusickthe file system defaults to optimizing for time. 20948830Scael.Pp 21048830Scael.Em Cylinder group related limits : 21120802SmckusickEach cylinder keeps track of the availability of blocks at different 21220802Smckusickrotational positions, so that sequential blocks can be laid out 21334152Smckusickwith minimum rotational latency. With the default of 8 distinguished 21434152Smckusickrotational positions, the resolution of the 21520802Smckusicksummary information is 2ms for a typical 3600 rpm drive. 21648830Scael.Pp 21748830ScaelThe element 21848830Scael.Fa fs_rotdelay 21920802Smckusickgives the minimum number of milliseconds to initiate 22048830Scaelanother disk transfer on the same cylinder. 22148830ScaelIt is used in determining the rotationally optimal 22248830Scaellayout for disk blocks within a file; 22348830Scaelthe default value for 22448830Scael.Fa fs_rotdelay 22520802Smckusickis 2ms. 22648830Scael.Pp 22720802SmckusickEach file system has a statically allocated number of inodes. 22848830ScaelAn inode is allocated for each 22948830Scael.Dv NBPI 23048830Scaelbytes of disk space. 23120802SmckusickThe inode allocation strategy is extremely conservative. 23248830Scael.Pp 23348830Scael.Dv MINBSIZE 23448830Scaelis the smallest allowable block size. 23548830ScaelWith a 23648830Scael.Dv MINBSIZE 23748830Scaelof 4096 23820802Smckusickit is possible to create files of size 23920802Smckusick2^32 with only two levels of indirection. 24048830Scael.Dv MINBSIZE 24148830Scaelmust be big enough to hold a cylinder group block, 24248830Scaelthus changes to 24348830Scael.Pq Fa struct cg 24448830Scaelmust keep its size within 24548830Scael.Dv MINBSIZE . 24648830ScaelNote that super-blocks are never more than size 24748830Scael.Dv SBSIZE . 24848830Scael.Pp 24928244SmckusickThe path name on which the file system is mounted is maintained in 25048830Scael.Fa fs_fsmnt . 25148830Scael.Dv MAXMNTLEN 25248830Scaeldefines the amount of space allocated in 25348830Scaelthe super-block for this name. 25420802SmckusickThe limit on the amount of summary information per file system 25548830Scaelis defined by 25648830Scael.Dv MAXCSBUFS. 25734152SmckusickFor a 4096 byte block size, it is currently parameterized for a 25820802Smckusickmaximum of two million cylinders. 25948830Scael.Pp 26020802SmckusickPer cylinder group information is summarized in blocks allocated 26120802Smckusickfrom the first cylinder group's data blocks. 26220802SmckusickThese blocks are read in from 26348830Scael.Fa fs_csaddr 26420802Smckusick(size 26548830Scael.Fa fs_cssize ) 26648830Scaelin addition to the super-block. 26748830Scael.Pp 26848830Scael.Sy N.B.: 26948830Scael.Xr sizeof Pq Fa struct csum 27048830Scaelmust be a power of two in order for 27148830Scaelthe 27248830Scael.Fn fs_cs 27348830Scaelmacro to work. 27448830Scael.Pp 27548830ScaelThe 27648830Scael.Em "Super-block for a file system" : 27734152SmckusickThe size of the rotational layout tables 27848830Scaelis limited by the fact that the super-block is of size 27948830Scael.Dv SBSIZE . 28020802SmckusickThe size of these tables is 28148830Scael.Em inversely 28220802Smckusickproportional to the block 28320802Smckusicksize of the file system. The size of the tables is 28420802Smckusickincreased when sector sizes are not powers of two, 28520802Smckusickas this increases the number of cylinders 28648830Scaelincluded before the rotational pattern repeats 28748830Scael.Pq Fa fs_cpc . 28820802SmckusickThe size of the rotational layout 28948830Scaeltables is derived from the number of bytes remaining in 29048830Scael.Pq Fa struct fs . 29148830Scael.Pp 29234152SmckusickThe number of blocks of data per cylinder group 29334152Smckusickis limited because cylinder groups are at most one block. 29434152SmckusickThe inode and free block tables 29534152Smckusickmust fit into a single block after deducting space for 29648830Scaelthe cylinder group structure 29748830Scael.Pq Fa struct cg . 29848830Scael.Pp 29948830ScaelThe 30048830Scael.Em Inode : 30120802SmckusickThe inode is the focus of all file activity in the 30248830Scael.Tn UNIX 30348830Scaelfile system. 30448830ScaelThere is a unique inode allocated 30520802Smckusickfor each active file, 30620802Smckusickeach current directory, each mounted-on file, 30720802Smckusicktext file, and the root. 30820802SmckusickAn inode is `named' by its device/i-number pair. 30920802SmckusickFor further information, see the include file 31048830Scael.Aq Pa sys/inode.h . 31148830Scael.Sh HISTORY 31248830ScaelA super-block structure named filsys appeared in 31348830Scael.At v6 . 31448830ScaelThe file system described in this manual appeared 31548830Scaelin 31648830Scael.Bx 4.2 . 317