1*48830Scael.\" Copyright (c) 1983, 1991 Regents of the University of California. 2*48830Scael.\" All rights reserved. 320801Smckusick.\" 4*48830Scael.\" %sccs.include.redist.roff% 520802Smckusick.\" 6*48830Scael.\" @(#)fs.5 6.4 (Berkeley) 04/29/91 7*48830Scael.\" 8*48830Scael.Dd 9*48830Scael.Dt FS 5 10*48830Scael.Os BSD 4.2 11*48830Scael.Sh NAME 12*48830Scael.Nm fs , 13*48830Scael.Nm inode 14*48830Scael.Nd format of file system volume 15*48830Scael.Sh SYNOPSIS 16*48830Scael.Fd #include <sys/types.h> 17*48830Scael.Fd #include <ufs/fs.h> 18*48830Scael.Fd #include <ufs/inode.h> 19*48830Scael.Sh DESCRIPTION 20*48830ScaelThe files 21*48830Scael.Aq Pa fs.h 22*48830Scaeland 23*48830Scael.Aq Pa inode.h 24*48830Scaeldeclare several structures, defined variables and macros 25*48830Scaelwhich are used to create and manage the underlying format of 26*48830Scaelfile system objects on random access devices (disks). 27*48830Scael.Pp 28*48830ScaelThe block size and number of blocks which 29*48830Scaelcomprise a file system are parameters of the file system. 30*48830ScaelSectors beginning at 31*48830Scael.Dv BBLOCK 32*48830Scaeland continuing for 33*48830Scael.Dv BBSIZE 34*48830Scaelare used 35*48830Scaelfor a disklabel and for some hardware primary 36*48830Scaeland secondary bootstrapping programs. 37*48830Scael.Pp 38*48830ScaelThe actual file system begins at sector 39*48830Scael.Dv SBLOCK 40*48830Scaelwith the 41*48830Scael.Em super-block 42*48830Scaelthat is of size 43*48830Scael.Dv SBSIZE . 44*48830ScaelThe following structure described the super-block and is 45*48830Scaelfrom the file 46*48830Scael.Aq Pa ufs/fs.h : 47*48830Scael.Bd -literal 4834152Smckusick#define FS_MAGIC 0x011954 4934152Smckusickstruct fs { 5034152Smckusick struct fs *fs_link; /* linked list of file systems */ 5134152Smckusick struct fs *fs_rlink; /* used for incore super blocks */ 5234152Smckusick daddr_t fs_sblkno; /* addr of super-block in filesys */ 5334152Smckusick daddr_t fs_cblkno; /* offset of cyl-block in filesys */ 5434152Smckusick daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ 5534152Smckusick daddr_t fs_dblkno; /* offset of first data after cg */ 5634152Smckusick long fs_cgoffset; /* cylinder group offset in cylinder */ 5734152Smckusick long fs_cgmask; /* used to calc mod fs_ntrak */ 5834152Smckusick time_t fs_time; /* last time written */ 5934152Smckusick long fs_size; /* number of blocks in fs */ 6034152Smckusick long fs_dsize; /* number of data blocks in fs */ 6134152Smckusick long fs_ncg; /* number of cylinder groups */ 6234152Smckusick long fs_bsize; /* size of basic blocks in fs */ 6334152Smckusick long fs_fsize; /* size of frag blocks in fs */ 6434152Smckusick long fs_frag; /* number of frags in a block in fs */ 6520802Smckusick/* these are configuration parameters */ 6634152Smckusick long fs_minfree; /* minimum percentage of free blocks */ 6734152Smckusick long fs_rotdelay; /* num of ms for optimal next block */ 6834152Smckusick long fs_rps; /* disk revolutions per second */ 6920802Smckusick/* these fields can be computed from the others */ 7034152Smckusick long fs_bmask; /* ``blkoff'' calc of blk offsets */ 7134152Smckusick long fs_fmask; /* ``fragoff'' calc of frag offsets */ 7234152Smckusick long fs_bshift; /* ``lblkno'' calc of logical blkno */ 7334152Smckusick long fs_fshift; /* ``numfrags'' calc number of frags */ 7420802Smckusick/* these are configuration parameters */ 7534152Smckusick long fs_maxcontig; /* max number of contiguous blks */ 7634152Smckusick long fs_maxbpg; /* max number of blks per cyl group */ 7720802Smckusick/* these fields can be computed from the others */ 7834152Smckusick long fs_fragshift; /* block to frag shift */ 7934152Smckusick long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ 8034152Smckusick long fs_sbsize; /* actual size of super block */ 8134152Smckusick long fs_csmask; /* csum block offset */ 8234152Smckusick long fs_csshift; /* csum block number */ 8334152Smckusick long fs_nindir; /* value of NINDIR */ 8434152Smckusick long fs_inopb; /* value of INOPB */ 8534152Smckusick long fs_nspf; /* value of NSPF */ 8634152Smckusick/* yet another configuration parameter */ 8734152Smckusick long fs_optim; /* optimization preference, see below */ 8834152Smckusick/* these fields are derived from the hardware */ 8934152Smckusick long fs_npsect; /* # sectors/track including spares */ 9034152Smckusick long fs_interleave; /* hardware sector interleave */ 9134152Smckusick long fs_trackskew; /* sector 0 skew, per track */ 9234152Smckusick long fs_headswitch; /* head switch time, usec */ 9334152Smckusick long fs_trkseek; /* track-to-track seek, usec */ 9420802Smckusick/* sizes determined by number of cylinder groups and their sizes */ 9534152Smckusick daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ 9634152Smckusick long fs_cssize; /* size of cyl grp summary area */ 9734152Smckusick long fs_cgsize; /* cylinder group size */ 9834152Smckusick/* these fields are derived from the hardware */ 9934152Smckusick long fs_ntrak; /* tracks per cylinder */ 10034152Smckusick long fs_nsect; /* sectors per track */ 10134152Smckusick long fs_spc; /* sectors per cylinder */ 10220802Smckusick/* this comes from the disk driver partitioning */ 10334152Smckusick long fs_ncyl; /* cylinders in file system */ 10420802Smckusick/* these fields can be computed from the others */ 10534152Smckusick long fs_cpg; /* cylinders per group */ 10634152Smckusick long fs_ipg; /* inodes per group */ 10734152Smckusick long fs_fpg; /* blocks per group * fs_frag */ 10820802Smckusick/* this data must be re-computed after crashes */ 10920802Smckusick struct csum fs_cstotal; /* cylinder summary information */ 11020802Smckusick/* these fields are cleared at mount time */ 11134152Smckusick char fs_fmod; /* super block modified flag */ 11234152Smckusick char fs_clean; /* file system is clean flag */ 11334152Smckusick char fs_ronly; /* mounted read-only flag */ 11434152Smckusick char fs_flags; /* currently unused flag */ 11520802Smckusick char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ 11620802Smckusick/* these fields retain the current block allocation info */ 11734152Smckusick long fs_cgrotor; /* last cg searched */ 11834152Smckusick struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */ 11934152Smckusick long fs_cpc; /* cyl per cycle in postbl */ 12034152Smckusick short fs_opostbl[16][8]; /* old rotation block list head */ 12134152Smckusick long fs_sparecon[56]; /* reserved for future constants */ 12234152Smckusick quad fs_qbmask; /* ~fs_bmask - for use with quad size */ 12334152Smckusick quad fs_qfmask; /* ~fs_fmask - for use with quad size */ 124*48830Scael long fs_postblformat; /* format of positional layout tables */ 12534152Smckusick long fs_nrpos; /* number of rotaional positions */ 12634152Smckusick long fs_postbloff; /* (short) rotation block list head */ 12734152Smckusick long fs_rotbloff; /* (u_char) blocks for each rotation */ 12834152Smckusick long fs_magic; /* magic number */ 12934152Smckusick u_char fs_space[1]; /* list of blocks for each rotation */ 13020802Smckusick/* actually longer */ 13120802Smckusick}; 132*48830Scael.Ed 133*48830Scael.Pp 13420802SmckusickEach disk drive contains some number of file systems. 13520802SmckusickA file system consists of a number of cylinder groups. 13620802SmckusickEach cylinder group has inodes and data. 137*48830Scael.Pp 13820802SmckusickA file system is described by its super-block, which in turn 13920802Smckusickdescribes the cylinder groups. The super-block is critical 14020802Smckusickdata and is replicated in each cylinder group to protect against 14120802Smckusickcatastrophic loss. This is done at file system creation 14220802Smckusicktime and the critical 14320802Smckusicksuper-block data does not change, so the copies need not be 14420802Smckusickreferenced further unless disaster strikes. 145*48830Scael.Pp 14620802SmckusickAddresses stored in inodes are capable of addressing fragments 147*48830Scaelof `blocks'. File system blocks of at most size 148*48830Scael.Dv MAXBSIZE 149*48830Scaelcan 15020802Smckusickbe optionally broken into 2, 4, or 8 pieces, each of which is 151*48830Scaeladdressable; these pieces may be 152*48830Scael.Dv DEV_BSIZE , 153*48830Scaelor some multiple of 154*48830Scaela 155*48830Scael.Dv DEV_BSIZE 156*48830Scaelunit. 157*48830Scael.Pp 15820802SmckusickLarge files consist of exclusively large data blocks. To avoid 15920802Smckusickundue wasted disk space, the last data block of a small file is 16020802Smckusickallocated as only as many fragments of a large block as are 16120802Smckusicknecessary. The file system format retains only a single pointer 16220802Smckusickto such a fragment, which is a piece of a single large block that 16320802Smckusickhas been divided. The size of such a fragment is determinable from 164*48830Scaelinformation in the inode, using the 165*48830Scael.Fn blksize fs ip lbn 166*48830Scaelmacro. 167*48830Scael.Pp 16820802SmckusickThe file system records space availability at the fragment level; 16920802Smckusickto determine block availability, aligned fragments are examined. 170*48830Scael.Pp 17120802SmckusickThe root inode is the root of the file system. 17220802SmckusickInode 0 can't be used for normal purposes and 17320802Smckusickhistorically bad blocks were linked to inode 1, 17420802Smckusickthus the root inode is 2 (inode 1 is no longer used for 17520802Smckusickthis purpose, however numerous dump tapes make this 17620802Smckusickassumption, so we are stuck with it). 177*48830Scael.Pp 178*48830ScaelThe 179*48830Scael.Fa fs_minfree 180*48830Scaelelement gives the minimum acceptable percentage of file system 18128244Smckusickblocks that may be free. If the freelist drops below this level 18234152Smckusickonly the super-user may continue to allocate blocks. 183*48830ScaelThe 184*48830Scael.Fa fs_minfree 185*48830Scaelelement 18634152Smckusickmay be set to 0 if no reserve of free blocks is deemed necessary, 18720802Smckusickhowever severe performance degradations will be observed if the 18820802Smckusickfile system is run at greater than 90% full; thus the default 18920802Smckusickvalue of 190*48830Scael.Fa fs_minfree 19120802Smckusickis 10%. 192*48830Scael.Pp 19320802SmckusickEmpirically the best trade-off between block fragmentation and 19420802Smckusickoverall disk utilization at a loading of 90% comes with a 19534152Smckusickfragmentation of 8, thus the default fragment size is an eighth 19620802Smckusickof the block size. 197*48830Scael.Pp 198*48830ScaelThe element 199*48830Scael.Fa fs_optim 20028244Smckusickspecifies whether the file system should try to minimize the time spent 20128244Smckusickallocating blocks, or if it should attempt to minimize the space 20228244Smckusickfragmentation on the disk. 20328244SmckusickIf the value of fs_minfree (see above) is less than 10%, 20428244Smckusickthen the file system defaults to optimizing for space to avoid 20528244Smckusickrunning out of full sized blocks. 20628244SmckusickIf the value of minfree is greater than or equal to 10%, 20728244Smckusickfragmentation is unlikely to be problematical, and 20828244Smckusickthe file system defaults to optimizing for time. 209*48830Scael.Pp 210*48830Scael.Em Cylinder group related limits : 21120802SmckusickEach cylinder keeps track of the availability of blocks at different 21220802Smckusickrotational positions, so that sequential blocks can be laid out 21334152Smckusickwith minimum rotational latency. With the default of 8 distinguished 21434152Smckusickrotational positions, the resolution of the 21520802Smckusicksummary information is 2ms for a typical 3600 rpm drive. 216*48830Scael.Pp 217*48830ScaelThe element 218*48830Scael.Fa fs_rotdelay 21920802Smckusickgives the minimum number of milliseconds to initiate 220*48830Scaelanother disk transfer on the same cylinder. 221*48830ScaelIt is used in determining the rotationally optimal 222*48830Scaellayout for disk blocks within a file; 223*48830Scaelthe default value for 224*48830Scael.Fa fs_rotdelay 22520802Smckusickis 2ms. 226*48830Scael.Pp 22720802SmckusickEach file system has a statically allocated number of inodes. 228*48830ScaelAn inode is allocated for each 229*48830Scael.Dv NBPI 230*48830Scaelbytes of disk space. 23120802SmckusickThe inode allocation strategy is extremely conservative. 232*48830Scael.Pp 233*48830Scael.Dv MINBSIZE 234*48830Scaelis the smallest allowable block size. 235*48830ScaelWith a 236*48830Scael.Dv MINBSIZE 237*48830Scaelof 4096 23820802Smckusickit is possible to create files of size 23920802Smckusick2^32 with only two levels of indirection. 240*48830Scael.Dv MINBSIZE 241*48830Scaelmust be big enough to hold a cylinder group block, 242*48830Scaelthus changes to 243*48830Scael.Pq Fa struct cg 244*48830Scaelmust keep its size within 245*48830Scael.Dv MINBSIZE . 246*48830ScaelNote that super-blocks are never more than size 247*48830Scael.Dv SBSIZE . 248*48830Scael.Pp 24928244SmckusickThe path name on which the file system is mounted is maintained in 250*48830Scael.Fa fs_fsmnt . 251*48830Scael.Dv MAXMNTLEN 252*48830Scaeldefines the amount of space allocated in 253*48830Scaelthe super-block for this name. 25420802SmckusickThe limit on the amount of summary information per file system 255*48830Scaelis defined by 256*48830Scael.Dv MAXCSBUFS. 25734152SmckusickFor a 4096 byte block size, it is currently parameterized for a 25820802Smckusickmaximum of two million cylinders. 259*48830Scael.Pp 26020802SmckusickPer cylinder group information is summarized in blocks allocated 26120802Smckusickfrom the first cylinder group's data blocks. 26220802SmckusickThese blocks are read in from 263*48830Scael.Fa fs_csaddr 26420802Smckusick(size 265*48830Scael.Fa fs_cssize ) 266*48830Scaelin addition to the super-block. 267*48830Scael.Pp 268*48830Scael.Sy N.B.: 269*48830Scael.Xr sizeof Pq Fa struct csum 270*48830Scaelmust be a power of two in order for 271*48830Scaelthe 272*48830Scael.Fn fs_cs 273*48830Scaelmacro to work. 274*48830Scael.Pp 275*48830ScaelThe 276*48830Scael.Em "Super-block for a file system" : 27734152SmckusickThe size of the rotational layout tables 278*48830Scaelis limited by the fact that the super-block is of size 279*48830Scael.Dv SBSIZE . 28020802SmckusickThe size of these tables is 281*48830Scael.Em inversely 28220802Smckusickproportional to the block 28320802Smckusicksize of the file system. The size of the tables is 28420802Smckusickincreased when sector sizes are not powers of two, 28520802Smckusickas this increases the number of cylinders 286*48830Scaelincluded before the rotational pattern repeats 287*48830Scael.Pq Fa fs_cpc . 28820802SmckusickThe size of the rotational layout 289*48830Scaeltables is derived from the number of bytes remaining in 290*48830Scael.Pq Fa struct fs . 291*48830Scael.Pp 29234152SmckusickThe number of blocks of data per cylinder group 29334152Smckusickis limited because cylinder groups are at most one block. 29434152SmckusickThe inode and free block tables 29534152Smckusickmust fit into a single block after deducting space for 296*48830Scaelthe cylinder group structure 297*48830Scael.Pq Fa struct cg . 298*48830Scael.Pp 299*48830ScaelThe 300*48830Scael.Em Inode : 30120802SmckusickThe inode is the focus of all file activity in the 302*48830Scael.Tn UNIX 303*48830Scaelfile system. 304*48830ScaelThere is a unique inode allocated 30520802Smckusickfor each active file, 30620802Smckusickeach current directory, each mounted-on file, 30720802Smckusicktext file, and the root. 30820802SmckusickAn inode is `named' by its device/i-number pair. 30920802SmckusickFor further information, see the include file 310*48830Scael.Aq Pa sys/inode.h . 311*48830Scael.Sh HISTORY 312*48830ScaelA super-block structure named filsys appeared in 313*48830Scael.At v6 . 314*48830ScaelThe file system described in this manual appeared 315*48830Scaelin 316*48830Scael.Bx 4.2 . 317