xref: /csrg-svn/share/man/man5/fs.5 (revision 66972)
161609Sbostic.\" Copyright (c) 1983, 1991, 1993
261609Sbostic.\"	The Regents of the University of California.  All rights reserved.
320801Smckusick.\"
448830Scael.\" %sccs.include.redist.roff%
520802Smckusick.\"
6*66972Sbostic.\"     @(#)fs.5	8.2 (Berkeley) 04/19/94
748830Scael.\"
848830Scael.Dd
948830Scael.Dt FS 5
1048830Scael.Os BSD 4.2
1148830Scael.Sh NAME
1248830Scael.Nm fs ,
1348830Scael.Nm inode
1448830Scael.Nd format of file system volume
1548830Scael.Sh SYNOPSIS
1648830Scael.Fd #include <sys/types.h>
1748830Scael.Fd #include <ufs/fs.h>
1848830Scael.Fd #include <ufs/inode.h>
1948830Scael.Sh DESCRIPTION
2048830ScaelThe files
2148830Scael.Aq Pa fs.h
2248830Scaeland
2348830Scael.Aq Pa inode.h
2448830Scaeldeclare several structures, defined variables and macros
2548830Scaelwhich are used to create and manage the underlying format of
2648830Scaelfile system objects on random access devices (disks).
2748830Scael.Pp
2848830ScaelThe block size and number of blocks which
2948830Scaelcomprise a file system are parameters of the file system.
3048830ScaelSectors beginning at
3148830Scael.Dv BBLOCK
3248830Scaeland continuing for
3348830Scael.Dv BBSIZE
3448830Scaelare used
3548830Scaelfor a disklabel and for some hardware primary
3648830Scaeland secondary bootstrapping programs.
3748830Scael.Pp
3848830ScaelThe actual file system begins at sector
3948830Scael.Dv SBLOCK
4048830Scaelwith the
4148830Scael.Em super-block
4248830Scaelthat is of size
4348830Scael.Dv SBSIZE .
4448830ScaelThe following structure described the super-block and is
4548830Scaelfrom the file
4648830Scael.Aq Pa ufs/fs.h :
4748830Scael.Bd -literal
4834152Smckusick#define	FS_MAGIC 0x011954
4934152Smckusickstruct fs {
5034152Smckusick	struct	fs *fs_link;	/* linked list of file systems */
5134152Smckusick	struct	fs *fs_rlink;	/*     used for incore super blocks */
5234152Smckusick	daddr_t	fs_sblkno;	/* addr of super-block in filesys */
5334152Smckusick	daddr_t	fs_cblkno;	/* offset of cyl-block in filesys */
5434152Smckusick	daddr_t	fs_iblkno;	/* offset of inode-blocks in filesys */
5534152Smckusick	daddr_t	fs_dblkno;	/* offset of first data after cg */
5634152Smckusick	long	fs_cgoffset;	/* cylinder group offset in cylinder */
5734152Smckusick	long	fs_cgmask;	/* used to calc mod fs_ntrak */
5834152Smckusick	time_t 	fs_time;    	/* last time written */
5934152Smckusick	long	fs_size;	/* number of blocks in fs */
6034152Smckusick	long	fs_dsize;	/* number of data blocks in fs */
6134152Smckusick	long	fs_ncg;	/* number of cylinder groups */
6234152Smckusick	long	fs_bsize;	/* size of basic blocks in fs */
6334152Smckusick	long	fs_fsize;	/* size of frag blocks in fs */
6434152Smckusick	long	fs_frag;	/* number of frags in a block in fs */
6520802Smckusick/* these are configuration parameters */
6634152Smckusick	long	fs_minfree;	/* minimum percentage of free blocks */
6734152Smckusick	long	fs_rotdelay;	/* num of ms for optimal next block */
6834152Smckusick	long	fs_rps;	/* disk revolutions per second */
6920802Smckusick/* these fields can be computed from the others */
7034152Smckusick	long	fs_bmask;	/* ``blkoff'' calc of blk offsets */
7134152Smckusick	long	fs_fmask;	/* ``fragoff'' calc of frag offsets */
7234152Smckusick	long	fs_bshift;	/* ``lblkno'' calc of logical blkno */
7334152Smckusick	long	fs_fshift;	/* ``numfrags'' calc number of frags */
7420802Smckusick/* these are configuration parameters */
7534152Smckusick	long	fs_maxcontig;	/* max number of contiguous blks */
7634152Smckusick	long	fs_maxbpg;	/* max number of blks per cyl group */
7720802Smckusick/* these fields can be computed from the others */
7834152Smckusick	long	fs_fragshift;	/* block to frag shift */
7934152Smckusick	long	fs_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
8034152Smckusick	long	fs_sbsize;	/* actual size of super block */
8134152Smckusick	long	fs_csmask;	/* csum block offset */
8234152Smckusick	long	fs_csshift;	/* csum block number */
8334152Smckusick	long	fs_nindir;	/* value of NINDIR */
8434152Smckusick	long	fs_inopb;	/* value of INOPB */
8534152Smckusick	long	fs_nspf;	/* value of NSPF */
8634152Smckusick/* yet another configuration parameter */
8734152Smckusick	long	fs_optim;	/* optimization preference, see below */
8834152Smckusick/* these fields are derived from the hardware */
8934152Smckusick	long	fs_npsect;	/* # sectors/track including spares */
9034152Smckusick	long	fs_interleave;	/* hardware sector interleave */
9134152Smckusick	long	fs_trackskew;	/* sector 0 skew, per track */
9234152Smckusick	long	fs_headswitch;	/* head switch time, usec */
9334152Smckusick	long	fs_trkseek;	/* track-to-track seek, usec */
9420802Smckusick/* sizes determined by number of cylinder groups and their sizes */
9534152Smckusick	daddr_t fs_csaddr;	/* blk addr of cyl grp summary area */
9634152Smckusick	long	fs_cssize;	/* size of cyl grp summary area */
9734152Smckusick	long	fs_cgsize;	/* cylinder group size */
9834152Smckusick/* these fields are derived from the hardware */
9934152Smckusick	long	fs_ntrak;	/* tracks per cylinder */
10034152Smckusick	long	fs_nsect;	/* sectors per track */
10134152Smckusick	long  	fs_spc;   	/* sectors per cylinder */
10220802Smckusick/* this comes from the disk driver partitioning */
10334152Smckusick	long	fs_ncyl;   	/* cylinders in file system */
10420802Smckusick/* these fields can be computed from the others */
10534152Smckusick	long	fs_cpg;	/* cylinders per group */
10634152Smckusick	long	fs_ipg;	/* inodes per group */
10734152Smckusick	long	fs_fpg;	/* blocks per group * fs_frag */
10820802Smckusick/* this data must be re-computed after crashes */
10920802Smckusick	struct	csum fs_cstotal;	/* cylinder summary information */
11020802Smckusick/* these fields are cleared at mount time */
11134152Smckusick	char   	fs_fmod;    	/* super block modified flag */
11234152Smckusick	char   	fs_clean;    	/* file system is clean flag */
11334152Smckusick	char   	fs_ronly;   	/* mounted read-only flag */
11434152Smckusick	char   	fs_flags;   	/* currently unused flag */
11520802Smckusick	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
11620802Smckusick/* these fields retain the current block allocation info */
11734152Smckusick	long	fs_cgrotor;	/* last cg searched */
11834152Smckusick	struct	csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */
11934152Smckusick	long	fs_cpc;	/* cyl per cycle in postbl */
12034152Smckusick	short	fs_opostbl[16][8];	/* old rotation block list head */
12134152Smckusick	long	fs_sparecon[56];	/* reserved for future constants */
12234152Smckusick	quad	fs_qbmask;	/* ~fs_bmask - for use with quad size */
12334152Smckusick	quad	fs_qfmask;	/* ~fs_fmask - for use with quad size */
12448830Scael	long	fs_postblformat; /* format of positional layout tables */
125*66972Sbostic	long	fs_nrpos;	/* number of rotational positions */
12634152Smckusick	long	fs_postbloff;	/* (short) rotation block list head */
12734152Smckusick	long	fs_rotbloff;	/* (u_char) blocks for each rotation */
12834152Smckusick	long	fs_magic;	/* magic number */
12934152Smckusick	u_char	fs_space[1];	/* list of blocks for each rotation */
13020802Smckusick/* actually longer */
13120802Smckusick};
13248830Scael.Ed
13348830Scael.Pp
13420802SmckusickEach disk drive contains some number of file systems.
13520802SmckusickA file system consists of a number of cylinder groups.
13620802SmckusickEach cylinder group has inodes and data.
13748830Scael.Pp
13820802SmckusickA file system is described by its super-block, which in turn
13920802Smckusickdescribes the cylinder groups.  The super-block is critical
14020802Smckusickdata and is replicated in each cylinder group to protect against
14120802Smckusickcatastrophic loss.  This is done at file system creation
14220802Smckusicktime and the critical
14320802Smckusicksuper-block data does not change, so the copies need not be
14420802Smckusickreferenced further unless disaster strikes.
14548830Scael.Pp
14620802SmckusickAddresses stored in inodes are capable of addressing fragments
14748830Scaelof `blocks'. File system blocks of at most size
14848830Scael.Dv MAXBSIZE
14948830Scaelcan
15020802Smckusickbe optionally broken into 2, 4, or 8 pieces, each of which is
15148830Scaeladdressable; these pieces may be
15248830Scael.Dv DEV_BSIZE ,
15348830Scaelor some multiple of
15448830Scaela
15548830Scael.Dv DEV_BSIZE
15648830Scaelunit.
15748830Scael.Pp
15820802SmckusickLarge files consist of exclusively large data blocks.  To avoid
15920802Smckusickundue wasted disk space, the last data block of a small file is
16020802Smckusickallocated as only as many fragments of a large block as are
16120802Smckusicknecessary.  The file system format retains only a single pointer
16220802Smckusickto such a fragment, which is a piece of a single large block that
16320802Smckusickhas been divided.  The size of such a fragment is determinable from
16448830Scaelinformation in the inode, using the
16548830Scael.Fn blksize fs ip lbn
16648830Scaelmacro.
16748830Scael.Pp
16820802SmckusickThe file system records space availability at the fragment level;
16920802Smckusickto determine block availability, aligned fragments are examined.
17048830Scael.Pp
17120802SmckusickThe root inode is the root of the file system.
17220802SmckusickInode 0 can't be used for normal purposes and
17320802Smckusickhistorically bad blocks were linked to inode 1,
17420802Smckusickthus the root inode is 2 (inode 1 is no longer used for
17520802Smckusickthis purpose, however numerous dump tapes make this
17620802Smckusickassumption, so we are stuck with it).
17748830Scael.Pp
17848830ScaelThe
17948830Scael.Fa fs_minfree
18048830Scaelelement gives the minimum acceptable percentage of file system
18128244Smckusickblocks that may be free. If the freelist drops below this level
18234152Smckusickonly the super-user may continue to allocate blocks.
18348830ScaelThe
18448830Scael.Fa fs_minfree
18548830Scaelelement
18634152Smckusickmay be set to 0 if no reserve of free blocks is deemed necessary,
18720802Smckusickhowever severe performance degradations will be observed if the
18820802Smckusickfile system is run at greater than 90% full; thus the default
18920802Smckusickvalue of
19048830Scael.Fa fs_minfree
19120802Smckusickis 10%.
19248830Scael.Pp
19320802SmckusickEmpirically the best trade-off between block fragmentation and
19420802Smckusickoverall disk utilization at a loading of 90% comes with a
19534152Smckusickfragmentation of 8, thus the default fragment size is an eighth
19620802Smckusickof the block size.
19748830Scael.Pp
19848830ScaelThe element
19948830Scael.Fa fs_optim
20028244Smckusickspecifies whether the file system should try to minimize the time spent
20128244Smckusickallocating blocks, or if it should attempt to minimize the space
20228244Smckusickfragmentation on the disk.
20328244SmckusickIf the value of fs_minfree (see above) is less than 10%,
20428244Smckusickthen the file system defaults to optimizing for space to avoid
20528244Smckusickrunning out of full sized blocks.
20628244SmckusickIf the value of minfree is greater than or equal to 10%,
20728244Smckusickfragmentation is unlikely to be problematical, and
20828244Smckusickthe file system defaults to optimizing for time.
20948830Scael.Pp
21048830Scael.Em Cylinder group related limits :
21120802SmckusickEach cylinder keeps track of the availability of blocks at different
21220802Smckusickrotational positions, so that sequential blocks can be laid out
21334152Smckusickwith minimum rotational latency. With the default of 8 distinguished
21434152Smckusickrotational positions, the resolution of the
21520802Smckusicksummary information is 2ms for a typical 3600 rpm drive.
21648830Scael.Pp
21748830ScaelThe element
21848830Scael.Fa fs_rotdelay
21920802Smckusickgives the minimum number of milliseconds to initiate
22048830Scaelanother disk transfer on the same cylinder.
22148830ScaelIt is used in determining the rotationally optimal
22248830Scaellayout for disk blocks within a file;
22348830Scaelthe default value for
22448830Scael.Fa fs_rotdelay
22520802Smckusickis 2ms.
22648830Scael.Pp
22720802SmckusickEach file system has a statically allocated number of inodes.
22848830ScaelAn inode is allocated for each
22948830Scael.Dv NBPI
23048830Scaelbytes of disk space.
23120802SmckusickThe inode allocation strategy is extremely conservative.
23248830Scael.Pp
23348830Scael.Dv MINBSIZE
23448830Scaelis the smallest allowable block size.
23548830ScaelWith a
23648830Scael.Dv MINBSIZE
23748830Scaelof 4096
23820802Smckusickit is possible to create files of size
23920802Smckusick2^32 with only two levels of indirection.
24048830Scael.Dv MINBSIZE
24148830Scaelmust be big enough to hold a cylinder group block,
24248830Scaelthus changes to
24348830Scael.Pq Fa struct cg
24448830Scaelmust keep its size within
24548830Scael.Dv MINBSIZE .
24648830ScaelNote that super-blocks are never more than size
24748830Scael.Dv SBSIZE .
24848830Scael.Pp
24928244SmckusickThe path name on which the file system is mounted is maintained in
25048830Scael.Fa fs_fsmnt .
25148830Scael.Dv MAXMNTLEN
25248830Scaeldefines the amount of space allocated in
25348830Scaelthe super-block for this name.
25420802SmckusickThe limit on the amount of summary information per file system
25548830Scaelis defined by
25648830Scael.Dv MAXCSBUFS.
25734152SmckusickFor a 4096 byte block size, it is currently parameterized for a
25820802Smckusickmaximum of two million cylinders.
25948830Scael.Pp
26020802SmckusickPer cylinder group information is summarized in blocks allocated
26120802Smckusickfrom the first cylinder group's data blocks.
26220802SmckusickThese blocks are read in from
26348830Scael.Fa fs_csaddr
26420802Smckusick(size
26548830Scael.Fa fs_cssize )
26648830Scaelin addition to the super-block.
26748830Scael.Pp
26848830Scael.Sy N.B.:
26948830Scael.Xr sizeof Pq Fa struct csum
27048830Scaelmust be a power of two in order for
27148830Scaelthe
27248830Scael.Fn fs_cs
27348830Scaelmacro to work.
27448830Scael.Pp
27548830ScaelThe
27648830Scael.Em "Super-block for a file system" :
27734152SmckusickThe size of the rotational layout tables
27848830Scaelis limited by the fact that the super-block is of size
27948830Scael.Dv SBSIZE .
28020802SmckusickThe size of these tables is
28148830Scael.Em inversely
28220802Smckusickproportional to the block
28320802Smckusicksize of the file system. The size of the tables is
28420802Smckusickincreased when sector sizes are not powers of two,
28520802Smckusickas this increases the number of cylinders
28648830Scaelincluded before the rotational pattern repeats
28748830Scael.Pq Fa fs_cpc .
28820802SmckusickThe size of the rotational layout
28948830Scaeltables is derived from the number of bytes remaining in
29048830Scael.Pq Fa struct fs .
29148830Scael.Pp
29234152SmckusickThe number of blocks of data per cylinder group
29334152Smckusickis limited because cylinder groups are at most one block.
29434152SmckusickThe inode and free block tables
29534152Smckusickmust fit into a single block after deducting space for
29648830Scaelthe cylinder group structure
29748830Scael.Pq Fa struct cg .
29848830Scael.Pp
29948830ScaelThe
30048830Scael.Em Inode :
30120802SmckusickThe inode is the focus of all file activity in the
30248830Scael.Tn UNIX
30348830Scaelfile system.
30448830ScaelThere is a unique inode allocated
30520802Smckusickfor each active file,
30620802Smckusickeach current directory, each mounted-on file,
30720802Smckusicktext file, and the root.
30820802SmckusickAn inode is `named' by its device/i-number pair.
30920802SmckusickFor further information, see the include file
31048830Scael.Aq Pa sys/inode.h .
31148830Scael.Sh HISTORY
31248830ScaelA super-block structure named filsys appeared in
31348830Scael.At v6 .
31448830ScaelThe file system described in this manual appeared
31548830Scaelin
31648830Scael.Bx 4.2 .
317