xref: /onnv-gate/usr/src/cmd/fs.d/ufs/fsck/inode.c (revision 12286:2ba5ef27b57b)
10Sstevel@tonic-gate /*
2*12286SAndrew.Balfour@Sun.COM  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
30Sstevel@tonic-gate  */
40Sstevel@tonic-gate 
50Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
60Sstevel@tonic-gate /*	  All Rights Reserved  	*/
70Sstevel@tonic-gate 
80Sstevel@tonic-gate /*
90Sstevel@tonic-gate  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
100Sstevel@tonic-gate  * All rights reserved.
110Sstevel@tonic-gate  *
120Sstevel@tonic-gate  * Redistribution and use in source and binary forms are permitted
130Sstevel@tonic-gate  * provided that: (1) source distributions retain this entire copyright
140Sstevel@tonic-gate  * notice and comment, and (2) distributions including binaries display
150Sstevel@tonic-gate  * the following acknowledgement:  ``This product includes software
160Sstevel@tonic-gate  * developed by the University of California, Berkeley and its contributors''
170Sstevel@tonic-gate  * in the documentation or other materials provided with the distribution
180Sstevel@tonic-gate  * and in all advertising materials mentioning features or use of this
190Sstevel@tonic-gate  * software. Neither the name of the University nor the names of its
200Sstevel@tonic-gate  * contributors may be used to endorse or promote products derived
210Sstevel@tonic-gate  * from this software without specific prior written permission.
220Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
230Sstevel@tonic-gate  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
240Sstevel@tonic-gate  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <stdio.h>
290Sstevel@tonic-gate #include <string.h>
300Sstevel@tonic-gate #include <stdlib.h>
310Sstevel@tonic-gate #include <unistd.h>
320Sstevel@tonic-gate #include <time.h>
33392Sswilcox #include <limits.h>
340Sstevel@tonic-gate #include <sys/param.h>
350Sstevel@tonic-gate #include <sys/types.h>
360Sstevel@tonic-gate #include <sys/sysmacros.h>
370Sstevel@tonic-gate #include <sys/mntent.h>
380Sstevel@tonic-gate #include <sys/vnode.h>
390Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
400Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
41392Sswilcox #define	_KERNEL
420Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
43392Sswilcox #undef _KERNEL
440Sstevel@tonic-gate #include <pwd.h>
450Sstevel@tonic-gate #include "fsck.h"
460Sstevel@tonic-gate 
47392Sswilcox static int get_indir_offsets(int, daddr_t, int *, int *);
48392Sswilcox static int clearanentry(struct inodesc *);
49392Sswilcox static void pdinode(struct dinode *);
50392Sswilcox static void inoflush(void);
51392Sswilcox static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
52392Sswilcox static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
53392Sswilcox static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
54392Sswilcox static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
55392Sswilcox static int lookup_dotdot_ino(fsck_ino_t);
560Sstevel@tonic-gate 
57392Sswilcox /*
58392Sswilcox  * ckinode() essentially traverses the blocklist of the provided
59392Sswilcox  * inode.  For each block either the caller-supplied callback (id_func
60392Sswilcox  * in the provided struct inodesc) or dirscan() is invoked.  Which is
61392Sswilcox  * chosen is controlled by what type of traversal was requested
62392Sswilcox  * (id_type) - if it was for an ADDR or ACL, use the callback,
63392Sswilcox  * otherwise it is assumed to be DATA (i.e., a directory) whose
64392Sswilcox  * contents need to be scanned.
65392Sswilcox  *
66392Sswilcox  * Note that a directory inode can get passed in with a type of ADDR;
67392Sswilcox  * the type field is orthogonal to the IFMT value.  This is so that
68392Sswilcox  * the file aspects (no duplicate blocks, etc) of a directory can be
69392Sswilcox  * verified just like is done for any other file, or the actual
70392Sswilcox  * contents can be scanned so that connectivity and such can be
71392Sswilcox  * investigated.
72392Sswilcox  *
73392Sswilcox  * The traversal is controlled by flags in the return value of
74392Sswilcox  * dirscan() or the callback.  Five flags are defined, STOP, SKIP,
75392Sswilcox  * KEEPON, ALTERED, and FOUND.  Their semantics are:
76392Sswilcox  *
77392Sswilcox  *     STOP -    no further processing of this inode is desired/possible/
78392Sswilcox  *               feasible/etc.  This can mean that whatever the scan
79392Sswilcox  *               was searching for was found, or a serious
80392Sswilcox  *               inconsistency was encountered, or anything else
81392Sswilcox  *               appropriate.
82392Sswilcox  *
83392Sswilcox  *     SKIP -    something that made it impossible to continue was
84392Sswilcox  *               encountered, and the caller should go on to the next
85392Sswilcox  *               inode.  This is more for i/o failures than for
86392Sswilcox  *               logical inconsistencies.  Nothing actually looks for
87392Sswilcox  *               this.
88392Sswilcox  *
89392Sswilcox  *     KEEPON -  no more blocks of this inode need to be scanned, but
90392Sswilcox  *               nothing's wrong, so keep on going with the next
91392Sswilcox  *               inode.  It is similar to STOP, except that
92392Sswilcox  *               ckinode()'s caller will typically advance to the next
93392Sswilcox  *               inode for KEEPON, whereas it ceases scanning through
94392Sswilcox  *               the inodes completely for STOP.
95392Sswilcox  *
96392Sswilcox  *     ALTERED - a change was made to the inode.  If the caller sees
97392Sswilcox  *               this set, it should make sure to flush out the
98392Sswilcox  *               changes.  Note that any data blocks read in by the
99392Sswilcox  *               function need to be marked dirty by it directly;
100392Sswilcox  *               flushing of those will happen automatically later.
101392Sswilcox  *
102392Sswilcox  *     FOUND -   whatever was being searched for was located.
103392Sswilcox  *               Typically combined with STOP to avoid wasting time
104392Sswilcox  *               doing additional looking.
105392Sswilcox  *
106392Sswilcox  * During a traversal, some state needs to be carried around.  At the
107392Sswilcox  * least, the callback functions need to know what inode they're
108392Sswilcox  * working on, which logical block, and whether or not fixing problems
109392Sswilcox  * when they're encountered is desired.  Rather than try to guess what
110392Sswilcox  * else might be needed (and thus end up passing way more arguments
111392Sswilcox  * than is reasonable), all the possibilities have been bundled in
112392Sswilcox  * struct inodesc.  About half of the fields are specific to directory
113392Sswilcox  * traversals, and the rest are pretty much generic to any traversal.
114392Sswilcox  *
115392Sswilcox  * The general fields are:
116392Sswilcox  *
117392Sswilcox  *     id_fix        What to do when an error is found.  Generally, this
118392Sswilcox  *                   is set to DONTKNOW before a traversal.  If a
119392Sswilcox  *                   problem is encountered, it is changed to either FIX
120392Sswilcox  *                   or NOFIX by the dofix() query function.  If id_fix
121392Sswilcox  *                   has already been set to FIX when dofix() is called, then
122392Sswilcox  *                   it includes the ALTERED flag (see above) in its return
123392Sswilcox  *                   value; the net effect is that the inode's buffer
124392Sswilcox  *                   will get marked dirty and written to disk at some
125392Sswilcox  *                   point.  If id_fix is DONTKNOW, then dofix() will
126392Sswilcox  *                   query the user.  If it is NOFIX, then dofix()
127392Sswilcox  *                   essentially does nothing.  A few routines set NOFIX
128392Sswilcox  *                   as the initial value, as they are performing a best-
129392Sswilcox  *                   effort informational task, rather than an actual
130392Sswilcox  *                   repair operation.
131392Sswilcox  *
132392Sswilcox  *     id_func       This is the function that will be called for every
133392Sswilcox  *                   logical block in the file (assuming id_type is not
134392Sswilcox  *                   DATA).  The logical block may represent a hole, so
135392Sswilcox  *                   the callback needs to be prepared to handle that
136392Sswilcox  *                   case.  Its return value is a combination of the flags
137392Sswilcox  *                   described above (SKIP, ALTERED, etc).
138392Sswilcox  *
139392Sswilcox  *     id_number     The inode number whose block list or data is being
140392Sswilcox  *                   scanned.
141392Sswilcox  *
142392Sswilcox  *     id_parent     When id_type is DATA, this is the inode number for
143392Sswilcox  *                   the parent of id_number.  Otherwise, it is
144392Sswilcox  *                   available for use as an extra parameter or return
145392Sswilcox  *                   value between the callback and ckinode()'s caller.
146392Sswilcox  *                   Which, if either, of those is left completely up to
147392Sswilcox  *                   the two routines involved, so nothing can generally
148392Sswilcox  *                   be assumed about the id_parent value for non-DATA
149392Sswilcox  *                   traversals.
150392Sswilcox  *
151392Sswilcox  *     id_lbn        This is the current logical block (not fragment)
152392Sswilcox  *                   number being visited by the traversal.
153392Sswilcox  *
154392Sswilcox  *     id_blkno      This is the physical block corresponding to id_lbn.
155392Sswilcox  *
156392Sswilcox  *     id_numfrags   This defines how large a block is being processed in
157392Sswilcox  *                   this particular invocation of the callback.
158392Sswilcox  *                   Usually, it will be the same as sblock.fs_frag.
159392Sswilcox  *                   However, if a direct block is being processed and
160392Sswilcox  *                   it is less than a full filesystem block,
161392Sswilcox  *                   id_numfrags will indicate just how many fragments
162392Sswilcox  *                   (starting from id_lbn) are actually part of the
163392Sswilcox  *                   file.
164392Sswilcox  *
165392Sswilcox  *     id_truncto    The pass 4 callback is used in several places to
166392Sswilcox  *                   free the blocks of a file (the `FILE HAS PROBLEM
167392Sswilcox  *                   FOO; CLEAR?' scenario).  This has been generalized
168392Sswilcox  *                   to allow truncating a file to a particular length
169392Sswilcox  *                   rather than always completely discarding it.  If
170392Sswilcox  *                   id_truncto is -1, then the entire file is released,
171392Sswilcox  *                   otherwise it is logical block number to truncate
172392Sswilcox  *                   to.  This generalized interface was motivated by a
173392Sswilcox  *                   desire to be able to discard everything after a
174392Sswilcox  *                   hole in a directory, rather than the entire
175392Sswilcox  *                   directory.
176392Sswilcox  *
177392Sswilcox  *     id_type       Selects the type of traversal.  DATA for dirscan(),
178392Sswilcox  *                   ADDR or ACL for using the provided callback.
179392Sswilcox  *
180392Sswilcox  * There are several more fields used just for dirscan() traversals:
181392Sswilcox  *
182392Sswilcox  *     id_filesize   The number of bytes in the overall directory left to
183392Sswilcox  *                   process.
184392Sswilcox  *
185392Sswilcox  *     id_loc        Byte position within the directory block.  Should always
186392Sswilcox  *                   point to the start of a directory entry.
187392Sswilcox  *
188392Sswilcox  *     id_entryno    Which logical directory entry is being processed (0
189392Sswilcox  *                   is `.', 1 is `..', 2 and on are normal entries).
190392Sswilcox  *                   This field is primarily used to enable special
191392Sswilcox  *                   checks when looking at the first two entries.
192392Sswilcox  *
193392Sswilcox  *                   The exception (there's always an exception in fsck)
194392Sswilcox  *                   is that in pass 1, it tracks how many fragments are
195392Sswilcox  *                   being used by a particular inode.
196392Sswilcox  *
197392Sswilcox  *     id_firsthole  The first logical block number that was found to
198392Sswilcox  *                   be zero.  As directories are not supposed to have
199392Sswilcox  *                   holes, this marks where a directory should be
200392Sswilcox  *                   truncated down to.  A value of -1 indicates that
201392Sswilcox  *                   no holes were found.
202392Sswilcox  *
203392Sswilcox  *     id_dirp       A pointer to the in-memory copy of the current
204392Sswilcox  *                   directory entry (as identified by id_loc).
205392Sswilcox  *
206392Sswilcox  *     id_name       This is a directory entry name to either create
207392Sswilcox  *                   (callback is mkentry) or locate (callback is
208392Sswilcox  *                   chgino, findino, or findname).
209392Sswilcox  */
210392Sswilcox int
ckinode(struct dinode * dp,struct inodesc * idesc,enum cki_action action)211392Sswilcox ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
2120Sstevel@tonic-gate {
213392Sswilcox 	struct inodesc cleardesc;
214392Sswilcox 	mode_t	mode;
215392Sswilcox 
216392Sswilcox 	if (idesc->id_filesize == 0)
217392Sswilcox 		idesc->id_filesize = (offset_t)dp->di_size;
2180Sstevel@tonic-gate 
219392Sswilcox 	/*
220392Sswilcox 	 * Our caller should be filtering out completely-free inodes
221392Sswilcox 	 * (mode == zero), so we'll work on the assumption that what
222392Sswilcox 	 * we're given has some basic validity.
223392Sswilcox 	 *
224392Sswilcox 	 * The kernel is inconsistent about MAXPATHLEN including the
225392Sswilcox 	 * trailing \0, so allow the more-generous length for symlinks.
226392Sswilcox 	 */
227392Sswilcox 	mode = dp->di_mode & IFMT;
228392Sswilcox 	if (mode == IFBLK || mode == IFCHR)
2290Sstevel@tonic-gate 		return (KEEPON);
230392Sswilcox 	if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
231*12286SAndrew.Balfour@Sun.COM 		pwarn("I=%d  Symlink longer than supported maximum\n",
232392Sswilcox 		    idesc->id_number);
233392Sswilcox 		init_inodesc(&cleardesc);
234392Sswilcox 		cleardesc.id_type = ADDR;
235392Sswilcox 		cleardesc.id_number = idesc->id_number;
236392Sswilcox 		cleardesc.id_fix = DONTKNOW;
237392Sswilcox 		clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
238392Sswilcox 		return (STOP);
239392Sswilcox 	}
240392Sswilcox 	return (ckinode_common(dp, idesc, action));
241392Sswilcox }
242392Sswilcox 
243392Sswilcox /*
244392Sswilcox  * This was split out from ckinode() to allow it to be used
245392Sswilcox  * without having to pass in kludge flags to suppress the
246392Sswilcox  * wrong-for-deletion initialization and irrelevant checks.
247392Sswilcox  * This feature is no longer needed, but is being kept in case
248392Sswilcox  * the need comes back.
249392Sswilcox  */
250392Sswilcox static int
ckinode_common(struct dinode * dp,struct inodesc * idesc,enum cki_action action)251392Sswilcox ckinode_common(struct dinode *dp, struct inodesc *idesc,
252392Sswilcox 	enum cki_action action)
253392Sswilcox {
254392Sswilcox 	offset_t offset;
255392Sswilcox 	struct dinode dino;
256392Sswilcox 	daddr_t ndb;
257392Sswilcox 	int indir_data_blks, last_indir_blk;
258392Sswilcox 	int ret, i, frags;
259392Sswilcox 
260392Sswilcox 	(void) memmove(&dino, dp, sizeof (struct dinode));
2610Sstevel@tonic-gate 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
262392Sswilcox 
263392Sswilcox 	for (i = 0; i < NDADDR; i++) {
264392Sswilcox 		idesc->id_lbn++;
265392Sswilcox 		offset = blkoff(&sblock, dino.di_size);
266392Sswilcox 		if ((--ndb == 0) && (offset != 0)) {
2670Sstevel@tonic-gate 			idesc->id_numfrags =
2680Sstevel@tonic-gate 			    numfrags(&sblock, fragroundup(&sblock, offset));
269392Sswilcox 		} else {
2700Sstevel@tonic-gate 			idesc->id_numfrags = sblock.fs_frag;
271392Sswilcox 		}
272392Sswilcox 		if (dino.di_db[i] == 0) {
273392Sswilcox 			if ((ndb > 0) && (idesc->id_firsthole < 0)) {
274392Sswilcox 				idesc->id_firsthole = i;
275392Sswilcox 			}
2760Sstevel@tonic-gate 			continue;
277392Sswilcox 		}
278392Sswilcox 		idesc->id_blkno = dino.di_db[i];
2790Sstevel@tonic-gate 		if (idesc->id_type == ADDR || idesc->id_type == ACL)
2800Sstevel@tonic-gate 			ret = (*idesc->id_func)(idesc);
2810Sstevel@tonic-gate 		else
2820Sstevel@tonic-gate 			ret = dirscan(idesc);
283392Sswilcox 
284392Sswilcox 		/*
285392Sswilcox 		 * Need to clear the entry, now that we're done with
286392Sswilcox 		 * it.  We depend on freeblk() ignoring a request to
287392Sswilcox 		 * free already-free fragments to handle the problem of
288392Sswilcox 		 * a partial block.
289392Sswilcox 		 */
290392Sswilcox 		if ((action == CKI_TRUNCATE) &&
291392Sswilcox 		    (idesc->id_truncto >= 0) &&
292392Sswilcox 		    (idesc->id_lbn >= idesc->id_truncto)) {
293392Sswilcox 			dp = ginode(idesc->id_number);
294392Sswilcox 			/*
295392Sswilcox 			 * The (int) cast is safe, in that if di_size won't
296392Sswilcox 			 * fit, it'll be a multiple of any legal fs_frag,
297392Sswilcox 			 * thus giving a zero result.  That value, in turn
298392Sswilcox 			 * means we're doing an entire block.
299392Sswilcox 			 */
300392Sswilcox 			frags = howmany((int)dp->di_size, sblock.fs_fsize) %
301392Sswilcox 			    sblock.fs_frag;
302392Sswilcox 			if (frags == 0)
303392Sswilcox 				frags = sblock.fs_frag;
304392Sswilcox 			freeblk(idesc->id_number, dp->di_db[i],
305392Sswilcox 			    frags);
306392Sswilcox 			dp = ginode(idesc->id_number);
307392Sswilcox 			dp->di_db[i] = 0;
308392Sswilcox 			inodirty();
309392Sswilcox 			ret |= ALTERED;
310392Sswilcox 		}
311392Sswilcox 
3120Sstevel@tonic-gate 		if (ret & STOP)
3130Sstevel@tonic-gate 			return (ret);
3140Sstevel@tonic-gate 	}
3150Sstevel@tonic-gate 
316392Sswilcox #ifdef lint
317392Sswilcox 	/*
318392Sswilcox 	 * Cure a lint complaint of ``possible use before set''.
319392Sswilcox 	 * Apparently it can't quite figure out the switch statement.
320392Sswilcox 	 */
321392Sswilcox 	indir_data_blks = 0;
322392Sswilcox #endif
3230Sstevel@tonic-gate 	/*
324392Sswilcox 	 * indir_data_blks contains the number of data blocks in all
325392Sswilcox 	 * the previous levels for this iteration.  E.g., for the
326392Sswilcox 	 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
327392Sswilcox 	 * of blocks have already been covered by the direct blocks
328392Sswilcox 	 * (di_db[]).  At the triple indirect level (i = NIADDR - 1),
329392Sswilcox 	 * it is all of the number of data blocks that were covered
330392Sswilcox 	 * by the second indirect, single indirect, and direct block
331392Sswilcox 	 * levels.
3320Sstevel@tonic-gate 	 */
333392Sswilcox 	idesc->id_numfrags = sblock.fs_frag;
334392Sswilcox 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
335392Sswilcox 	for (i = 0; i < NIADDR; i++) {
336392Sswilcox 		(void) get_indir_offsets(i, ndb, &indir_data_blks,
337392Sswilcox 		    &last_indir_blk);
338392Sswilcox 		if (dino.di_ib[i] != 0) {
339392Sswilcox 			/*
340392Sswilcox 			 * We'll only clear di_ib[i] if the first entry (and
341392Sswilcox 			 * therefore all of them) is to be cleared, since we
342392Sswilcox 			 * only go through this code on the first entry of
343392Sswilcox 			 * each level of indirection.  The +1 is to account
344392Sswilcox 			 * for the fact that we don't modify id_lbn until
345392Sswilcox 			 * we actually start processing on a data block.
346392Sswilcox 			 */
347392Sswilcox 			idesc->id_blkno = dino.di_ib[i];
348392Sswilcox 			ret = iblock(idesc, i + 1,
3490Sstevel@tonic-gate 			    (u_offset_t)howmany(dino.di_size,
3506179Smc208700 			    (u_offset_t)sblock.fs_bsize) - indir_data_blks,
3516179Smc208700 			    action);
352392Sswilcox 			if ((action == CKI_TRUNCATE) &&
353392Sswilcox 			    (idesc->id_truncto <= indir_data_blks) &&
354392Sswilcox 			    ((idesc->id_lbn + 1) >= indir_data_blks) &&
355392Sswilcox 			    ((idesc->id_lbn + 1) <= last_indir_blk)) {
356392Sswilcox 				dp = ginode(idesc->id_number);
357392Sswilcox 				if (dp->di_ib[i] != 0) {
358392Sswilcox 					freeblk(idesc->id_number, dp->di_ib[i],
359392Sswilcox 					    sblock.fs_frag);
360392Sswilcox 				}
361392Sswilcox 			}
3620Sstevel@tonic-gate 			if (ret & STOP)
3630Sstevel@tonic-gate 				return (ret);
3640Sstevel@tonic-gate 		} else {
365392Sswilcox 			/*
366392Sswilcox 			 * Need to know which of the file's logical blocks
367392Sswilcox 			 * reside in the missing indirect block.  However, the
368392Sswilcox 			 * precise location is only needed for truncating
369392Sswilcox 			 * directories, and level-of-indirection precision is
370392Sswilcox 			 * sufficient for that.
371392Sswilcox 			 */
372392Sswilcox 			if ((indir_data_blks < ndb) &&
373392Sswilcox 			    (idesc->id_firsthole < 0)) {
374392Sswilcox 				idesc->id_firsthole = indir_data_blks;
375392Sswilcox 			}
3760Sstevel@tonic-gate 		}
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 	return (KEEPON);
3790Sstevel@tonic-gate }
3800Sstevel@tonic-gate 
381392Sswilcox static int
get_indir_offsets(int ilevel_wanted,daddr_t ndb,int * data_blks,int * last_blk)382392Sswilcox get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
383392Sswilcox 	int *last_blk)
3840Sstevel@tonic-gate {
385392Sswilcox 	int ndb_ilevel = -1;
386392Sswilcox 	int ilevel;
387392Sswilcox 	int dblks, lblk;
388392Sswilcox 
389392Sswilcox 	for (ilevel = 0; ilevel < NIADDR; ilevel++) {
390392Sswilcox 		switch (ilevel) {
391392Sswilcox 		case 0:	/* SINGLE */
392392Sswilcox 			dblks = NDADDR;
393392Sswilcox 			lblk = dblks + NINDIR(&sblock) - 1;
394392Sswilcox 			break;
395392Sswilcox 		case 1:	/* DOUBLE */
396392Sswilcox 			dblks = NDADDR + NINDIR(&sblock);
397392Sswilcox 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
398392Sswilcox 			break;
399392Sswilcox 		case 2:	/* TRIPLE */
400392Sswilcox 			dblks = NDADDR + NINDIR(&sblock) +
401392Sswilcox 			    (NINDIR(&sblock) * NINDIR(&sblock));
402392Sswilcox 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
403392Sswilcox 			    NINDIR(&sblock)) - 1;
404392Sswilcox 			break;
405392Sswilcox 		default:
406392Sswilcox 			exitstat = EXERRFATAL;
407392Sswilcox 			/*
408392Sswilcox 			 * Translate from zero-based array to
409392Sswilcox 			 * one-based human-style counting.
410392Sswilcox 			 */
411392Sswilcox 			errexit("panic: indirection level %d not 1, 2, or 3",
412392Sswilcox 			    ilevel + 1);
413392Sswilcox 			/* NOTREACHED */
414392Sswilcox 		}
415392Sswilcox 
416392Sswilcox 		if (dblks < ndb && ndb <= lblk)
417392Sswilcox 			ndb_ilevel = ilevel;
418392Sswilcox 
419392Sswilcox 		if (ilevel == ilevel_wanted) {
420392Sswilcox 			if (data_blks != NULL)
421392Sswilcox 				*data_blks = dblks;
422392Sswilcox 			if (last_blk != NULL)
423392Sswilcox 				*last_blk = lblk;
424392Sswilcox 		}
425392Sswilcox 	}
426392Sswilcox 
427392Sswilcox 	return (ndb_ilevel);
428392Sswilcox }
429392Sswilcox 
430392Sswilcox static int
iblock(struct inodesc * idesc,int ilevel,u_offset_t iblks,enum cki_action action)431392Sswilcox iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
432392Sswilcox 	enum cki_action action)
433392Sswilcox {
434392Sswilcox 	struct bufarea *bp;
435392Sswilcox 	int i, n;
436392Sswilcox 	int (*func)(struct inodesc *) = NULL;
4370Sstevel@tonic-gate 	u_offset_t fsbperindirb;
438392Sswilcox 	daddr32_t last_lbn;
439392Sswilcox 	int nif;
4400Sstevel@tonic-gate 	char buf[BUFSIZ];
441392Sswilcox 
442392Sswilcox 	n = KEEPON;
4430Sstevel@tonic-gate 
444392Sswilcox 	switch (idesc->id_type) {
445392Sswilcox 	case ADDR:
4460Sstevel@tonic-gate 		func = idesc->id_func;
4470Sstevel@tonic-gate 		if (((n = (*func)(idesc)) & KEEPON) == 0)
448392Sswilcox 				return (n);
449392Sswilcox 		break;
450392Sswilcox 	case ACL:
4510Sstevel@tonic-gate 		func = idesc->id_func;
452392Sswilcox 		break;
453392Sswilcox 	case DATA:
4540Sstevel@tonic-gate 		func = dirscan;
455392Sswilcox 		break;
456392Sswilcox 	default:
457392Sswilcox 		errexit("unknown inodesc type %d in iblock()", idesc->id_type);
458392Sswilcox 		/* NOTREACHED */
459392Sswilcox 	}
460392Sswilcox 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
461392Sswilcox 		return ((idesc->id_type == ACL) ? STOP : SKIP);
4620Sstevel@tonic-gate 	}
463392Sswilcox 
464392Sswilcox 	bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
465392Sswilcox 	if (bp->b_errs != 0) {
466392Sswilcox 		brelse(bp);
467392Sswilcox 		return (SKIP);
468392Sswilcox 	}
469392Sswilcox 
4700Sstevel@tonic-gate 	ilevel--;
471392Sswilcox 	/*
472392Sswilcox 	 * Trivia note: the BSD fsck has the number of bytes remaining
473392Sswilcox 	 * as the third argument to iblock(), so the equivalent of
474392Sswilcox 	 * fsbperindirb starts at fs_bsize instead of one.  We're
475392Sswilcox 	 * working in units of filesystem blocks here, not bytes or
476392Sswilcox 	 * fragments.
477392Sswilcox 	 */
4780Sstevel@tonic-gate 	for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
4790Sstevel@tonic-gate 		fsbperindirb *= (u_offset_t)NINDIR(&sblock);
4800Sstevel@tonic-gate 	}
4810Sstevel@tonic-gate 	/*
4820Sstevel@tonic-gate 	 * nif indicates the next "free" pointer (as an array index) in this
4830Sstevel@tonic-gate 	 * indirect block, based on counting the blocks remaining in the
4840Sstevel@tonic-gate 	 * file after subtracting all previously processed blocks.
4850Sstevel@tonic-gate 	 * This figure is based on the size field of the inode.
4860Sstevel@tonic-gate 	 *
487392Sswilcox 	 * Note that in normal operation, nif may initially be calculated
488392Sswilcox 	 * as larger than the number of pointers in this block (as when
489392Sswilcox 	 * there are more indirect blocks following); if that is
4900Sstevel@tonic-gate 	 * the case, nif is limited to the max number of pointers per
4910Sstevel@tonic-gate 	 * indirect block.
4920Sstevel@tonic-gate 	 *
493392Sswilcox 	 * Also note that if an inode is inconsistent (has more blocks
4940Sstevel@tonic-gate 	 * allocated to it than the size field would indicate), the sweep
4950Sstevel@tonic-gate 	 * through any indirect blocks directly pointed at by the inode
4960Sstevel@tonic-gate 	 * continues. Since the block offset of any data blocks referenced
4970Sstevel@tonic-gate 	 * by these indirect blocks is greater than the size of the file,
4980Sstevel@tonic-gate 	 * the index nif may be computed as a negative value.
4990Sstevel@tonic-gate 	 * In this case, we reset nif to indicate that all pointers in
5000Sstevel@tonic-gate 	 * this retrieval block should be zeroed and the resulting
501392Sswilcox 	 * unreferenced data and/or retrieval blocks will be recovered
5020Sstevel@tonic-gate 	 * through garbage collection later.
5030Sstevel@tonic-gate 	 */
5040Sstevel@tonic-gate 	nif = (offset_t)howmany(iblks, fsbperindirb);
5050Sstevel@tonic-gate 	if (nif > NINDIR(&sblock))
5060Sstevel@tonic-gate 		nif = NINDIR(&sblock);
5070Sstevel@tonic-gate 	else if (nif < 0)
5080Sstevel@tonic-gate 		nif = 0;
5090Sstevel@tonic-gate 	/*
5100Sstevel@tonic-gate 	 * first pass: all "free" retrieval pointers (from [nif] thru
5110Sstevel@tonic-gate 	 * 	the end of the indirect block) should be zero. (This
5120Sstevel@tonic-gate 	 *	assertion does not hold for directories, which may be
5130Sstevel@tonic-gate 	 *	truncated without releasing their allocated space)
5140Sstevel@tonic-gate 	 */
515392Sswilcox 	if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
516392Sswilcox 	    idesc->id_func == pass3bcheck)) {
517392Sswilcox 		for (i = nif; i < NINDIR(&sblock); i++) {
518392Sswilcox 			if (bp->b_un.b_indir[i] == 0)
5190Sstevel@tonic-gate 				continue;
520392Sswilcox 			(void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
521392Sswilcox 			    (ulong_t)idesc->id_number);
522392Sswilcox 			if (preen) {
523392Sswilcox 				pfatal(buf);
524392Sswilcox 			} else if (dofix(idesc, buf)) {
525392Sswilcox 				freeblk(idesc->id_number,
526392Sswilcox 				    bp->b_un.b_indir[i],
527392Sswilcox 				    sblock.fs_frag);
528392Sswilcox 				bp->b_un.b_indir[i] = 0;
5290Sstevel@tonic-gate 				dirty(bp);
5300Sstevel@tonic-gate 			}
5310Sstevel@tonic-gate 		}
5320Sstevel@tonic-gate 		flush(fswritefd, bp);
5330Sstevel@tonic-gate 	}
5340Sstevel@tonic-gate 	/*
535392Sswilcox 	 * second pass: all retrieval pointers referring to blocks within
5360Sstevel@tonic-gate 	 *	a valid range [0..filesize] (both indirect and data blocks)
537392Sswilcox 	 *	are examined in the same manner as ckinode() checks the
538392Sswilcox 	 *	direct blocks in the inode.  Sweep through from
5390Sstevel@tonic-gate 	 *	the first pointer in this retrieval block to [nif-1].
5400Sstevel@tonic-gate 	 */
541392Sswilcox 	last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
542392Sswilcox 	for (i = 0; i < nif; i++) {
543392Sswilcox 		if (ilevel == 0)
544392Sswilcox 			idesc->id_lbn++;
545392Sswilcox 		if (bp->b_un.b_indir[i] != 0) {
546392Sswilcox 			idesc->id_blkno = bp->b_un.b_indir[i];
5470Sstevel@tonic-gate 			if (ilevel > 0) {
548392Sswilcox 				n = iblock(idesc, ilevel, iblks, action);
5490Sstevel@tonic-gate 				/*
550392Sswilcox 				 * Each iteration decreases "remaining block
551392Sswilcox 				 * count" by the number of blocks accessible
5520Sstevel@tonic-gate 				 * by a pointer at this indirect block level.
5530Sstevel@tonic-gate 				 */
5540Sstevel@tonic-gate 				iblks -= fsbperindirb;
5550Sstevel@tonic-gate 			} else {
556392Sswilcox 				/*
557392Sswilcox 				 * If we're truncating, func will discard
558392Sswilcox 				 * the data block for us.
559392Sswilcox 				 */
5600Sstevel@tonic-gate 				n = (*func)(idesc);
5610Sstevel@tonic-gate 			}
562392Sswilcox 
563392Sswilcox 			if ((action == CKI_TRUNCATE) &&
564392Sswilcox 			    (idesc->id_truncto >= 0) &&
565392Sswilcox 			    (idesc->id_lbn >= idesc->id_truncto)) {
566392Sswilcox 				freeblk(idesc->id_number,  bp->b_un.b_indir[i],
567392Sswilcox 				    sblock.fs_frag);
568392Sswilcox 			}
569392Sswilcox 
570392Sswilcox 			/*
571392Sswilcox 			 * Note that truncation never gets STOP back
572392Sswilcox 			 * under normal circumstances.  Abnormal would
573392Sswilcox 			 * be a bad acl short-circuit in iblock() or
574392Sswilcox 			 * an out-of-range failure in pass4check().
575392Sswilcox 			 * We still want to keep going when truncating
576392Sswilcox 			 * under those circumstances, since the whole
577392Sswilcox 			 * point of truncating is to get rid of all
578392Sswilcox 			 * that.
579392Sswilcox 			 */
580392Sswilcox 			if ((n & STOP) && (action != CKI_TRUNCATE)) {
5810Sstevel@tonic-gate 				brelse(bp);
5820Sstevel@tonic-gate 				return (n);
5830Sstevel@tonic-gate 			}
5840Sstevel@tonic-gate 		} else {
585392Sswilcox 			if ((idesc->id_lbn < last_lbn) &&
586392Sswilcox 			    (idesc->id_firsthole < 0)) {
587392Sswilcox 				idesc->id_firsthole = idesc->id_lbn;
588392Sswilcox 			}
589392Sswilcox 			if (idesc->id_type == DATA) {
590392Sswilcox 				/*
591392Sswilcox 				 * No point in continuing in the indirect
592392Sswilcox 				 * blocks of a directory, since they'll just
593392Sswilcox 				 * get freed anyway.
594392Sswilcox 				 */
595392Sswilcox 				brelse(bp);
596392Sswilcox 				return ((n & ~KEEPON) | STOP);
597392Sswilcox 			}
5980Sstevel@tonic-gate 		}
5990Sstevel@tonic-gate 	}
600392Sswilcox 
6010Sstevel@tonic-gate 	brelse(bp);
6020Sstevel@tonic-gate 	return (KEEPON);
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate /*
6060Sstevel@tonic-gate  * Check that a block is a legal block number.
6070Sstevel@tonic-gate  * Return 0 if in range, 1 if out of range.
6080Sstevel@tonic-gate  */
609392Sswilcox int
chkrange(daddr32_t blk,int cnt)610392Sswilcox chkrange(daddr32_t blk, int cnt)
6110Sstevel@tonic-gate {
6120Sstevel@tonic-gate 	int c;
6130Sstevel@tonic-gate 
614392Sswilcox 	if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
615392Sswilcox 	    ((cnt - 1) > (maxfsblock - blk))) {
616392Sswilcox 		if (debug)
617392Sswilcox 			(void) printf(
618392Sswilcox 			    "Bad fragment range: should be 1 <= %d..%d < %d\n",
619392Sswilcox 			    blk, blk + cnt, maxfsblock);
6200Sstevel@tonic-gate 		return (1);
621392Sswilcox 	}
622392Sswilcox 	if ((cnt > sblock.fs_frag) ||
623392Sswilcox 	    ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
624392Sswilcox 		if (debug)
625392Sswilcox 			(void) printf("Bad fragment size: size %d\n", cnt);
626392Sswilcox 		return (1);
627392Sswilcox 	}
6280Sstevel@tonic-gate 	c = dtog(&sblock, blk);
6290Sstevel@tonic-gate 	if (blk < cgdmin(&sblock, c)) {
6300Sstevel@tonic-gate 		if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
631392Sswilcox 			if (debug)
632392Sswilcox 				(void) printf(
633392Sswilcox 	    "Bad fragment position: %d..%d spans start of cg metadata\n",
634392Sswilcox 				    blk, blk + cnt);
6350Sstevel@tonic-gate 			return (1);
6360Sstevel@tonic-gate 		}
6370Sstevel@tonic-gate 	} else {
6380Sstevel@tonic-gate 		if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
639392Sswilcox 			if (debug)
640392Sswilcox 				(void) printf(
641392Sswilcox 				    "Bad frag pos: %d..%d crosses end of cg\n",
642392Sswilcox 				    blk, blk + cnt);
6430Sstevel@tonic-gate 			return (1);
6440Sstevel@tonic-gate 		}
6450Sstevel@tonic-gate 	}
6460Sstevel@tonic-gate 	return (0);
6470Sstevel@tonic-gate }
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate /*
6500Sstevel@tonic-gate  * General purpose interface for reading inodes.
6510Sstevel@tonic-gate  */
652392Sswilcox 
653392Sswilcox /*
654392Sswilcox  * Note that any call to ginode() can potentially invalidate any
655392Sswilcox  * dinode pointers previously acquired from it.  To avoid pain,
656392Sswilcox  * make sure to always call inodirty() immediately after modifying
657392Sswilcox  * an inode, if there's any chance of ginode() being called after
658392Sswilcox  * that.  Also, always call ginode() right before you need to access
659392Sswilcox  * an inode, so that there won't be any surprises from functions
660392Sswilcox  * called between the previous ginode() invocation and the dinode
661392Sswilcox  * use.
662392Sswilcox  *
663392Sswilcox  * Despite all that, we aren't doing the amount of i/o that's implied,
664392Sswilcox  * as we use the buffer cache that getdatablk() and friends maintain.
665392Sswilcox  */
666392Sswilcox static fsck_ino_t startinum = -1;
667392Sswilcox 
6680Sstevel@tonic-gate struct dinode *
ginode(fsck_ino_t inum)669392Sswilcox ginode(fsck_ino_t inum)
6700Sstevel@tonic-gate {
6710Sstevel@tonic-gate 	daddr32_t iblk;
6720Sstevel@tonic-gate 	struct dinode *dp;
6730Sstevel@tonic-gate 
674392Sswilcox 	if (inum < UFSROOTINO || inum > maxino) {
675392Sswilcox 		errexit("bad inode number %d to ginode\n", inum);
676392Sswilcox 	}
677392Sswilcox 	if (startinum == -1 ||
678392Sswilcox 	    pbp == NULL ||
679392Sswilcox 	    inum < startinum ||
680392Sswilcox 	    inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
681392Sswilcox 		iblk = itod(&sblock, inum);
682392Sswilcox 		if (pbp != NULL) {
6830Sstevel@tonic-gate 			brelse(pbp);
6840Sstevel@tonic-gate 		}
685392Sswilcox 		/*
686392Sswilcox 		 * We don't check for errors here, because we can't
687392Sswilcox 		 * tell our caller about it, and the zeros that will
688392Sswilcox 		 * be in the buffer are just as good as anything we
689392Sswilcox 		 * could fake.
690392Sswilcox 		 */
691392Sswilcox 		pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
6920Sstevel@tonic-gate 		startinum =
693392Sswilcox 		    (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
6940Sstevel@tonic-gate 	}
695392Sswilcox 	dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
696392Sswilcox 	if (dp->di_suid != UID_LONG)
697392Sswilcox 		dp->di_uid = dp->di_suid;
698392Sswilcox 	if (dp->di_sgid != GID_LONG)
699392Sswilcox 		dp->di_gid = dp->di_sgid;
7000Sstevel@tonic-gate 	return (dp);
7010Sstevel@tonic-gate }
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate /*
7040Sstevel@tonic-gate  * Special purpose version of ginode used to optimize first pass
705392Sswilcox  * over all the inodes in numerical order.  It bypasses the buffer
706392Sswilcox  * system used by ginode(), etc in favour of reading the bulk of a
707392Sswilcox  * cg's inodes at one time.
7080Sstevel@tonic-gate  */
709392Sswilcox static fsck_ino_t nextino, lastinum;
710392Sswilcox static int64_t readcnt, readpercg, fullcnt, inobufsize;
711392Sswilcox static int64_t partialcnt, partialsize;
712392Sswilcox static size_t lastsize;
713392Sswilcox static struct dinode *inodebuf;
714392Sswilcox static diskaddr_t currentdblk;
715392Sswilcox static struct dinode *currentinode;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate struct dinode *
getnextinode(fsck_ino_t inum)718392Sswilcox getnextinode(fsck_ino_t inum)
7190Sstevel@tonic-gate {
720392Sswilcox 	size_t size;
7210Sstevel@tonic-gate 	diskaddr_t dblk;
7220Sstevel@tonic-gate 	static struct dinode *dp;
7230Sstevel@tonic-gate 
724392Sswilcox 	if (inum != nextino++ || inum > maxino)
725392Sswilcox 		errexit("bad inode number %d to nextinode\n", inum);
726392Sswilcox 
727392Sswilcox 	/*
728392Sswilcox 	 * Will always go into the if() the first time we're called,
729392Sswilcox 	 * so dp will always be valid.
730392Sswilcox 	 */
731392Sswilcox 	if (inum >= lastinum) {
7320Sstevel@tonic-gate 		readcnt++;
7330Sstevel@tonic-gate 		dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
734392Sswilcox 		currentdblk = dblk;
7350Sstevel@tonic-gate 		if (readcnt % readpercg == 0) {
736392Sswilcox 			if (partialsize > SIZE_MAX)
737392Sswilcox 				errexit(
738392Sswilcox 				    "Internal error: partialsize overflow");
739392Sswilcox 			size = (size_t)partialsize;
7400Sstevel@tonic-gate 			lastinum += partialcnt;
7410Sstevel@tonic-gate 		} else {
742392Sswilcox 			if (inobufsize > SIZE_MAX)
743392Sswilcox 				errexit("Internal error: inobufsize overflow");
744392Sswilcox 			size = (size_t)inobufsize;
7450Sstevel@tonic-gate 			lastinum += fullcnt;
7460Sstevel@tonic-gate 		}
747392Sswilcox 		/*
748392Sswilcox 		 * If fsck_bread() returns an error, it will already have
749392Sswilcox 		 * zeroed out the buffer, so we do not need to do so here.
750392Sswilcox 		 */
751392Sswilcox 		(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
752392Sswilcox 		lastsize = size;
7530Sstevel@tonic-gate 		dp = inodebuf;
7540Sstevel@tonic-gate 	}
755392Sswilcox 	currentinode = dp;
7560Sstevel@tonic-gate 	return (dp++);
7570Sstevel@tonic-gate }
7580Sstevel@tonic-gate 
759392Sswilcox /*
760392Sswilcox  * Reread the current getnext() buffer.  This allows for changing inodes
761392Sswilcox  * other than the current one via ginode()/inodirty()/inoflush().
762392Sswilcox  *
763392Sswilcox  * Just reuses all the interesting variables that getnextinode() set up
764392Sswilcox  * last time it was called.  This shouldn't get called often, so we don't
765392Sswilcox  * try to figure out if the caller's actually touched an inode in the
766392Sswilcox  * range we have cached.  There could have been an arbitrary number of
767392Sswilcox  * them, after all.
768392Sswilcox  */
769392Sswilcox struct dinode *
getnextrefresh(void)770392Sswilcox getnextrefresh(void)
7710Sstevel@tonic-gate {
772392Sswilcox 	if (inodebuf == NULL) {
773392Sswilcox 		return (NULL);
774392Sswilcox 	}
7750Sstevel@tonic-gate 
776392Sswilcox 	inoflush();
777392Sswilcox 	(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
778392Sswilcox 	return (currentinode);
779392Sswilcox }
780392Sswilcox 
781392Sswilcox void
resetinodebuf(void)782392Sswilcox resetinodebuf(void)
783392Sswilcox {
7840Sstevel@tonic-gate 	startinum = 0;
7850Sstevel@tonic-gate 	nextino = 0;
7860Sstevel@tonic-gate 	lastinum = 0;
7870Sstevel@tonic-gate 	readcnt = 0;
7880Sstevel@tonic-gate 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
7890Sstevel@tonic-gate 	fullcnt = inobufsize / sizeof (struct dinode);
7900Sstevel@tonic-gate 	readpercg = sblock.fs_ipg / fullcnt;
7910Sstevel@tonic-gate 	partialcnt = sblock.fs_ipg % fullcnt;
7920Sstevel@tonic-gate 	partialsize = partialcnt * sizeof (struct dinode);
7930Sstevel@tonic-gate 	if (partialcnt != 0) {
7940Sstevel@tonic-gate 		readpercg++;
7950Sstevel@tonic-gate 	} else {
7960Sstevel@tonic-gate 		partialcnt = fullcnt;
7970Sstevel@tonic-gate 		partialsize = inobufsize;
7980Sstevel@tonic-gate 	}
7990Sstevel@tonic-gate 	if (inodebuf == NULL &&
8000Sstevel@tonic-gate 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
8010Sstevel@tonic-gate 		errexit("Cannot allocate space for inode buffer\n");
8020Sstevel@tonic-gate 	while (nextino < UFSROOTINO)
8030Sstevel@tonic-gate 		(void) getnextinode(nextino);
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate 
806392Sswilcox void
freeinodebuf(void)807392Sswilcox freeinodebuf(void)
8080Sstevel@tonic-gate {
809392Sswilcox 	if (inodebuf != NULL) {
810392Sswilcox 		free((void *)inodebuf);
811392Sswilcox 	}
8120Sstevel@tonic-gate 	inodebuf = NULL;
8130Sstevel@tonic-gate }
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate /*
8160Sstevel@tonic-gate  * Routines to maintain information about directory inodes.
8170Sstevel@tonic-gate  * This is built during the first pass and used during the
8180Sstevel@tonic-gate  * second and third passes.
8190Sstevel@tonic-gate  *
8200Sstevel@tonic-gate  * Enter inodes into the cache.
8210Sstevel@tonic-gate  */
822392Sswilcox void
cacheino(struct dinode * dp,fsck_ino_t inum)823392Sswilcox cacheino(struct dinode *dp, fsck_ino_t inum)
8240Sstevel@tonic-gate {
8250Sstevel@tonic-gate 	struct inoinfo *inp;
8260Sstevel@tonic-gate 	struct inoinfo **inpp;
8270Sstevel@tonic-gate 	uint_t blks;
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	blks = NDADDR + NIADDR;
8300Sstevel@tonic-gate 	inp = (struct inoinfo *)
8316179Smc208700 	    malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
8320Sstevel@tonic-gate 	if (inp == NULL)
833392Sswilcox 		errexit("Cannot increase directory list\n");
834392Sswilcox 	init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
835392Sswilcox 	inpp = &inphead[inum % numdirs];
836392Sswilcox 	inp->i_nextlist = *inpp;
8370Sstevel@tonic-gate 	*inpp = inp;
838392Sswilcox 	inp->i_number = inum;
8390Sstevel@tonic-gate 	if (inplast == listmax) {
8400Sstevel@tonic-gate 		listmax += 100;
841392Sswilcox 		inpsort = (struct inoinfo **)realloc((void *)inpsort,
8420Sstevel@tonic-gate 		    (unsigned)listmax * sizeof (struct inoinfo *));
8430Sstevel@tonic-gate 		if (inpsort == NULL)
8440Sstevel@tonic-gate 			errexit("cannot increase directory list");
8450Sstevel@tonic-gate 	}
8460Sstevel@tonic-gate 	inpsort[inplast++] = inp;
8470Sstevel@tonic-gate }
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate /*
8500Sstevel@tonic-gate  * Look up an inode cache structure.
8510Sstevel@tonic-gate  */
8520Sstevel@tonic-gate struct inoinfo *
getinoinfo(fsck_ino_t inum)853392Sswilcox getinoinfo(fsck_ino_t inum)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate 	struct inoinfo *inp;
8560Sstevel@tonic-gate 
857392Sswilcox 	inp = search_cache(inphead[inum % numdirs], inum);
858392Sswilcox 	return (inp);
8590Sstevel@tonic-gate }
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate /*
8620Sstevel@tonic-gate  * Determine whether inode is in cache.
8630Sstevel@tonic-gate  */
864392Sswilcox int
inocached(fsck_ino_t inum)865392Sswilcox inocached(fsck_ino_t inum)
8660Sstevel@tonic-gate {
867392Sswilcox 	return (search_cache(inphead[inum % numdirs], inum) != NULL);
8680Sstevel@tonic-gate }
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate /*
8710Sstevel@tonic-gate  * Clean up all the inode cache structure.
8720Sstevel@tonic-gate  */
873392Sswilcox void
inocleanup(void)874392Sswilcox inocleanup(void)
8750Sstevel@tonic-gate {
8760Sstevel@tonic-gate 	struct inoinfo **inpp;
8770Sstevel@tonic-gate 
8780Sstevel@tonic-gate 	if (inphead == NULL)
8790Sstevel@tonic-gate 		return;
880392Sswilcox 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
881392Sswilcox 		free((void *)(*inpp));
882392Sswilcox 	}
883392Sswilcox 	free((void *)inphead);
884392Sswilcox 	free((void *)inpsort);
8850Sstevel@tonic-gate 	inphead = inpsort = NULL;
8860Sstevel@tonic-gate }
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate /*
8890Sstevel@tonic-gate  * Routines to maintain information about acl inodes.
8900Sstevel@tonic-gate  * This is built during the first pass and used during the
8910Sstevel@tonic-gate  * second and third passes.
8920Sstevel@tonic-gate  *
8930Sstevel@tonic-gate  * Enter acl inodes into the cache.
8940Sstevel@tonic-gate  */
895392Sswilcox void
cacheacl(struct dinode * dp,fsck_ino_t inum)896392Sswilcox cacheacl(struct dinode *dp, fsck_ino_t inum)
8970Sstevel@tonic-gate {
898392Sswilcox 	struct inoinfo *aclp;
899392Sswilcox 	struct inoinfo **aclpp;
9000Sstevel@tonic-gate 	uint_t blks;
9010Sstevel@tonic-gate 
9020Sstevel@tonic-gate 	blks = NDADDR + NIADDR;
903392Sswilcox 	aclp = (struct inoinfo *)
9046179Smc208700 	    malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
9050Sstevel@tonic-gate 	if (aclp == NULL)
9060Sstevel@tonic-gate 		return;
907392Sswilcox 	aclpp = &aclphead[inum % numacls];
908392Sswilcox 	aclp->i_nextlist = *aclpp;
9090Sstevel@tonic-gate 	*aclpp = aclp;
910392Sswilcox 	aclp->i_number = inum;
9110Sstevel@tonic-gate 	aclp->i_isize = (offset_t)dp->di_size;
912392Sswilcox 	aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
913392Sswilcox 	(void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
9140Sstevel@tonic-gate 	if (aclplast == aclmax) {
9150Sstevel@tonic-gate 		aclmax += 100;
916392Sswilcox 		aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
917392Sswilcox 		    (unsigned)aclmax * sizeof (struct inoinfo *));
9180Sstevel@tonic-gate 		if (aclpsort == NULL)
9190Sstevel@tonic-gate 			errexit("cannot increase acl list");
9200Sstevel@tonic-gate 	}
9210Sstevel@tonic-gate 	aclpsort[aclplast++] = aclp;
9220Sstevel@tonic-gate }
9230Sstevel@tonic-gate 
924392Sswilcox 
9250Sstevel@tonic-gate /*
926392Sswilcox  * Generic cache search function.
927392Sswilcox  * ROOT is the first entry in a hash chain (the caller is expected
928392Sswilcox  * to have done the initial bucket lookup).  KEY is what's being
929392Sswilcox  * searched for.
930392Sswilcox  *
931392Sswilcox  * Returns a pointer to the entry if it is found, NULL otherwise.
9320Sstevel@tonic-gate  */
933392Sswilcox static struct inoinfo *
search_cache(struct inoinfo * element,fsck_ino_t key)934392Sswilcox search_cache(struct inoinfo *element, fsck_ino_t key)
9350Sstevel@tonic-gate {
936392Sswilcox 	while (element != NULL) {
937392Sswilcox 		if (element->i_number == key)
938392Sswilcox 			break;
939392Sswilcox 		element = element->i_nextlist;
940392Sswilcox 	}
941392Sswilcox 
942392Sswilcox 	return (element);
943392Sswilcox }
9440Sstevel@tonic-gate 
945392Sswilcox void
inodirty(void)946392Sswilcox inodirty(void)
947392Sswilcox {
948392Sswilcox 	dirty(pbp);
949392Sswilcox }
950392Sswilcox 
951392Sswilcox static void
inoflush(void)952392Sswilcox inoflush(void)
953392Sswilcox {
954392Sswilcox 	if (pbp != NULL)
955392Sswilcox 		flush(fswritefd, pbp);
9560Sstevel@tonic-gate }
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate /*
959392Sswilcox  * Interactive wrapper for freeino(), for those times when we're
960392Sswilcox  * not sure if we should throw something away.
9610Sstevel@tonic-gate  */
962392Sswilcox void
clri(struct inodesc * idesc,char * type,int verbose,int corrupting)963392Sswilcox clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
9640Sstevel@tonic-gate {
965392Sswilcox 	int need_parent;
966392Sswilcox 	struct dinode *dp;
967392Sswilcox 
968392Sswilcox 	if (statemap[idesc->id_number] == USTATE)
969392Sswilcox 		return;
9700Sstevel@tonic-gate 
971392Sswilcox 	dp = ginode(idesc->id_number);
972392Sswilcox 	if (verbose == CLRI_VERBOSE) {
973392Sswilcox 		pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
974392Sswilcox 		pinode(idesc->id_number);
9750Sstevel@tonic-gate 	}
976392Sswilcox 	if (preen || (reply("CLEAR") == 1)) {
977392Sswilcox 		need_parent = (corrupting == CLRI_NOP_OK) ?
9786179Smc208700 		    TI_NOPARENT : TI_PARENT;
979392Sswilcox 		freeino(idesc->id_number, need_parent);
980392Sswilcox 		if (preen)
981392Sswilcox 			(void) printf(" (CLEARED)\n");
982392Sswilcox 		remove_orphan_dir(idesc->id_number);
983392Sswilcox 	} else if (corrupting == CLRI_NOP_CORRUPT) {
984392Sswilcox 		iscorrupt = 1;
985392Sswilcox 	}
986392Sswilcox 	(void) printf("\n");
9870Sstevel@tonic-gate }
9880Sstevel@tonic-gate 
989392Sswilcox /*
990392Sswilcox  * Find the directory entry for the inode noted in id_parent (which is
991392Sswilcox  * not necessarily the parent of anything, we're just using a convenient
992392Sswilcox  * field.
993392Sswilcox  */
994392Sswilcox int
findname(struct inodesc * idesc)995392Sswilcox findname(struct inodesc *idesc)
9960Sstevel@tonic-gate {
9970Sstevel@tonic-gate 	struct direct *dirp = idesc->id_dirp;
9980Sstevel@tonic-gate 
9990Sstevel@tonic-gate 	if (dirp->d_ino != idesc->id_parent)
10000Sstevel@tonic-gate 		return (KEEPON);
1001392Sswilcox 	(void) memmove(idesc->id_name, dirp->d_name,
10020Sstevel@tonic-gate 	    MIN(dirp->d_namlen, MAXNAMLEN) + 1);
10030Sstevel@tonic-gate 	return (STOP|FOUND);
10040Sstevel@tonic-gate }
10050Sstevel@tonic-gate 
1006392Sswilcox /*
1007392Sswilcox  * Find the inode number associated with the given name.
1008392Sswilcox  */
1009392Sswilcox int
findino(struct inodesc * idesc)1010392Sswilcox findino(struct inodesc *idesc)
10110Sstevel@tonic-gate {
10120Sstevel@tonic-gate 	struct direct *dirp = idesc->id_dirp;
10130Sstevel@tonic-gate 
10140Sstevel@tonic-gate 	if (dirp->d_ino == 0)
10150Sstevel@tonic-gate 		return (KEEPON);
10160Sstevel@tonic-gate 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
10170Sstevel@tonic-gate 	    dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
10180Sstevel@tonic-gate 		idesc->id_parent = dirp->d_ino;
10190Sstevel@tonic-gate 		return (STOP|FOUND);
10200Sstevel@tonic-gate 	}
10210Sstevel@tonic-gate 	return (KEEPON);
10220Sstevel@tonic-gate }
10230Sstevel@tonic-gate 
1024392Sswilcox int
cleardirentry(fsck_ino_t parentdir,fsck_ino_t target)1025392Sswilcox cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1026392Sswilcox {
1027392Sswilcox 	struct inodesc idesc;
1028392Sswilcox 	struct dinode *dp;
1029392Sswilcox 
1030392Sswilcox 	dp = ginode(parentdir);
1031392Sswilcox 	init_inodesc(&idesc);
1032392Sswilcox 	idesc.id_func = clearanentry;
1033392Sswilcox 	idesc.id_parent = target;
1034392Sswilcox 	idesc.id_type = DATA;
1035392Sswilcox 	idesc.id_fix = NOFIX;
1036392Sswilcox 	return (ckinode(dp, &idesc, CKI_TRAVERSE));
1037392Sswilcox }
1038392Sswilcox 
1039392Sswilcox static int
clearanentry(struct inodesc * idesc)1040392Sswilcox clearanentry(struct inodesc *idesc)
1041392Sswilcox {
1042392Sswilcox 	struct direct *dirp = idesc->id_dirp;
1043392Sswilcox 
1044392Sswilcox 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1045392Sswilcox 		idesc->id_entryno++;
1046392Sswilcox 		return (KEEPON);
1047392Sswilcox 	}
1048392Sswilcox 	dirp->d_ino = 0;
1049392Sswilcox 	return (STOP|FOUND|ALTERED);
1050392Sswilcox }
1051392Sswilcox 
1052392Sswilcox void
pinode(fsck_ino_t ino)1053392Sswilcox pinode(fsck_ino_t ino)
10540Sstevel@tonic-gate {
10550Sstevel@tonic-gate 	struct dinode *dp;
1056392Sswilcox 
1057392Sswilcox 	(void) printf(" I=%lu ", (ulong_t)ino);
1058392Sswilcox 	if (ino < UFSROOTINO || ino > maxino)
1059392Sswilcox 		return;
1060392Sswilcox 	dp = ginode(ino);
1061392Sswilcox 	pdinode(dp);
1062392Sswilcox }
1063392Sswilcox 
1064392Sswilcox static void
pdinode(struct dinode * dp)1065392Sswilcox pdinode(struct dinode *dp)
1066392Sswilcox {
10670Sstevel@tonic-gate 	char *p;
10680Sstevel@tonic-gate 	struct passwd *pw;
10690Sstevel@tonic-gate 	time_t t;
10700Sstevel@tonic-gate 
1071392Sswilcox 	(void) printf(" OWNER=");
10720Sstevel@tonic-gate 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
1073392Sswilcox 		(void) printf("%s ", pw->pw_name);
10740Sstevel@tonic-gate 	else
1075392Sswilcox 		(void) printf("%lu ", (ulong_t)dp->di_uid);
1076392Sswilcox 	(void) printf("MODE=%o\n", dp->di_mode);
10770Sstevel@tonic-gate 	if (preen)
1078392Sswilcox 		(void) printf("%s: ", devname);
1079392Sswilcox 	(void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1080392Sswilcox 
1081392Sswilcox 	/* ctime() ignores LOCALE, so this is safe */
10820Sstevel@tonic-gate 	t = (time_t)dp->di_mtime;
10830Sstevel@tonic-gate 	p = ctime(&t);
1084392Sswilcox 	(void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
10850Sstevel@tonic-gate }
10860Sstevel@tonic-gate 
1087392Sswilcox void
blkerror(fsck_ino_t ino,char * type,daddr32_t blk,daddr32_t lbn)1088392Sswilcox blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
10890Sstevel@tonic-gate {
1090392Sswilcox 	pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1091392Sswilcox 	(void) printf("\n");
10920Sstevel@tonic-gate 
1093392Sswilcox 	switch (statemap[ino] & ~INDELAYD) {
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 	case FSTATE:
1096392Sswilcox 	case FZLINK:
10970Sstevel@tonic-gate 		statemap[ino] = FCLEAR;
10980Sstevel@tonic-gate 		return;
10990Sstevel@tonic-gate 
1100392Sswilcox 	case DFOUND:
11010Sstevel@tonic-gate 	case DSTATE:
1102392Sswilcox 	case DZLINK:
11030Sstevel@tonic-gate 		statemap[ino] = DCLEAR;
1104392Sswilcox 		add_orphan_dir(ino);
11050Sstevel@tonic-gate 		return;
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 	case SSTATE:
11080Sstevel@tonic-gate 		statemap[ino] = SCLEAR;
11090Sstevel@tonic-gate 		return;
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate 	case FCLEAR:
11120Sstevel@tonic-gate 	case DCLEAR:
11130Sstevel@tonic-gate 	case SCLEAR:
11140Sstevel@tonic-gate 		return;
11150Sstevel@tonic-gate 
11160Sstevel@tonic-gate 	default:
1117392Sswilcox 		errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
11180Sstevel@tonic-gate 		/* NOTREACHED */
11190Sstevel@tonic-gate 	}
11200Sstevel@tonic-gate }
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate /*
11230Sstevel@tonic-gate  * allocate an unused inode
11240Sstevel@tonic-gate  */
1125392Sswilcox fsck_ino_t
allocino(fsck_ino_t request,int type)1126392Sswilcox allocino(fsck_ino_t request, int type)
11270Sstevel@tonic-gate {
1128392Sswilcox 	fsck_ino_t ino;
11290Sstevel@tonic-gate 	struct dinode *dp;
1130392Sswilcox 	struct cg *cgp = &cgrp;
11313219Sabalfour 	int cg;
11320Sstevel@tonic-gate 	time_t t;
1133392Sswilcox 	caddr_t err;
11340Sstevel@tonic-gate 
1135392Sswilcox 	if (debug && (request != 0) && (request != UFSROOTINO))
1136392Sswilcox 		errexit("assertion failed: allocino() asked for "
11376179Smc208700 		    "inode %d instead of 0 or %d",
11386179Smc208700 		    (int)request, (int)UFSROOTINO);
1139392Sswilcox 
1140392Sswilcox 	/*
1141392Sswilcox 	 * We know that we're only going to get requests for UFSROOTINO
1142392Sswilcox 	 * or 0.  If UFSROOTINO is wanted, then it better be available
1143392Sswilcox 	 * because our caller is trying to recreate the root directory.
1144392Sswilcox 	 * If we're asked for 0, then which one we return doesn't matter.
1145392Sswilcox 	 * We know that inodes 0 and 1 are never valid to return, so we
1146392Sswilcox 	 * the start at the lowest-legal inode number.
1147392Sswilcox 	 *
1148392Sswilcox 	 * If we got a request for UFSROOTINO, then request != 0, and
1149392Sswilcox 	 * this pair of conditionals is the only place that treats
1150392Sswilcox 	 * UFSROOTINO specially.
1151392Sswilcox 	 */
11520Sstevel@tonic-gate 	if (request == 0)
11530Sstevel@tonic-gate 		request = UFSROOTINO;
11540Sstevel@tonic-gate 	else if (statemap[request] != USTATE)
11550Sstevel@tonic-gate 		return (0);
1156392Sswilcox 
1157392Sswilcox 	/*
1158392Sswilcox 	 * Doesn't do wrapping, since we know we started at
1159392Sswilcox 	 * the smallest inode.
1160392Sswilcox 	 */
11610Sstevel@tonic-gate 	for (ino = request; ino < maxino; ino++)
11620Sstevel@tonic-gate 		if (statemap[ino] == USTATE)
11630Sstevel@tonic-gate 			break;
11640Sstevel@tonic-gate 	if (ino == maxino)
11650Sstevel@tonic-gate 		return (0);
1166392Sswilcox 
1167392Sswilcox 	/*
1168392Sswilcox 	 * In pass5, we'll calculate the bitmaps and counts all again from
1169392Sswilcox 	 * scratch and do a comparison, but for that to work the cg has
1170392Sswilcox 	 * to know what in-memory changes we've made to it.  If we have
1171392Sswilcox 	 * trouble reading the cg, cg_sanity() should kick it out so
1172392Sswilcox 	 * we can skip explicit i/o error checking here.
1173392Sswilcox 	 */
1174392Sswilcox 	cg = itog(&sblock, ino);
1175392Sswilcox 	(void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
11763219Sabalfour 	err = cg_sanity(cgp, cg);
1177392Sswilcox 	if (err != NULL) {
1178392Sswilcox 		pfatal("CG %d: %s\n", cg, err);
1179392Sswilcox 		free((void *)err);
1180392Sswilcox 		if (reply("REPAIR") == 0)
1181392Sswilcox 			errexit("Program terminated.");
1182392Sswilcox 		fix_cg(cgp, cg);
1183392Sswilcox 	}
1184392Sswilcox 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1185392Sswilcox 	cgp->cg_cs.cs_nifree--;
1186392Sswilcox 	cgdirty();
1187392Sswilcox 
1188392Sswilcox 	if (lastino < ino)
1189392Sswilcox 		lastino = ino;
1190392Sswilcox 
1191392Sswilcox 	/*
1192392Sswilcox 	 * Don't currently support IFATTRDIR or any of the other
1193392Sswilcox 	 * types, as they aren't needed.
1194392Sswilcox 	 */
11950Sstevel@tonic-gate 	switch (type & IFMT) {
11960Sstevel@tonic-gate 	case IFDIR:
11970Sstevel@tonic-gate 		statemap[ino] = DSTATE;
1198392Sswilcox 		cgp->cg_cs.cs_ndir++;
11990Sstevel@tonic-gate 		break;
12000Sstevel@tonic-gate 	case IFREG:
12010Sstevel@tonic-gate 	case IFLNK:
12020Sstevel@tonic-gate 		statemap[ino] = FSTATE;
12030Sstevel@tonic-gate 		break;
12040Sstevel@tonic-gate 	default:
1205392Sswilcox 		/*
1206392Sswilcox 		 * Pretend nothing ever happened.  This clears the
1207392Sswilcox 		 * dirty flag, among other things.
1208392Sswilcox 		 */
1209392Sswilcox 		initbarea(&cgblk);
1210392Sswilcox 		if (debug)
1211392Sswilcox 			(void) printf("allocino: unknown type 0%o\n",
1212392Sswilcox 			    type & IFMT);
12130Sstevel@tonic-gate 		return (0);
12140Sstevel@tonic-gate 	}
1215392Sswilcox 
1216392Sswilcox 	/*
1217392Sswilcox 	 * We're allocating what should be a completely-unused inode,
1218392Sswilcox 	 * so make sure we don't inherit anything from any previous
1219392Sswilcox 	 * incarnations.
1220392Sswilcox 	 */
12210Sstevel@tonic-gate 	dp = ginode(ino);
1222392Sswilcox 	(void) memset((void *)dp, 0, sizeof (struct dinode));
12230Sstevel@tonic-gate 	dp->di_db[0] = allocblk(1);
12240Sstevel@tonic-gate 	if (dp->di_db[0] == 0) {
12250Sstevel@tonic-gate 		statemap[ino] = USTATE;
12260Sstevel@tonic-gate 		return (0);
12270Sstevel@tonic-gate 	}
1228392Sswilcox 	dp->di_mode = (mode_t)type;
1229392Sswilcox 	(void) time(&t);
12300Sstevel@tonic-gate 	dp->di_atime = (time32_t)t;
1231392Sswilcox 	dp->di_ctime = dp->di_atime;
1232392Sswilcox 	dp->di_mtime = dp->di_ctime;
12330Sstevel@tonic-gate 	dp->di_size = (u_offset_t)sblock.fs_fsize;
12340Sstevel@tonic-gate 	dp->di_blocks = btodb(sblock.fs_fsize);
12350Sstevel@tonic-gate 	n_files++;
12360Sstevel@tonic-gate 	inodirty();
12370Sstevel@tonic-gate 	return (ino);
12380Sstevel@tonic-gate }
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate /*
1241392Sswilcox  * Release some or all of the blocks of an inode.
1242392Sswilcox  * Only truncates down.  Assumes new_length is appropriately aligned
1243392Sswilcox  * to a block boundary (or a directory block boundary, if it's a
1244392Sswilcox  * directory).
1245392Sswilcox  *
1246392Sswilcox  * If this is a directory, discard all of its contents first, so
1247392Sswilcox  * we don't create a bunch of orphans that would need another fsck
1248392Sswilcox  * run to clean up.
1249392Sswilcox  *
1250392Sswilcox  * Even if truncating to zero length, the inode remains allocated.
12510Sstevel@tonic-gate  */
1252392Sswilcox void
truncino(fsck_ino_t ino,offset_t new_length,int update)1253392Sswilcox truncino(fsck_ino_t ino, offset_t new_length, int update)
12540Sstevel@tonic-gate {
12550Sstevel@tonic-gate 	struct inodesc idesc;
1256392Sswilcox 	struct inoinfo *iip;
12570Sstevel@tonic-gate 	struct dinode *dp;
1258392Sswilcox 	fsck_ino_t parent;
1259392Sswilcox 	mode_t mode;
1260392Sswilcox 	caddr_t message;
1261*12286SAndrew.Balfour@Sun.COM 	int isdir, islink;
1262392Sswilcox 	int ilevel, dblk;
12630Sstevel@tonic-gate 
1264392Sswilcox 	dp = ginode(ino);
1265392Sswilcox 	mode = (dp->di_mode & IFMT);
1266392Sswilcox 	isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1267*12286SAndrew.Balfour@Sun.COM 	islink = (mode == IFLNK);
1268392Sswilcox 
1269392Sswilcox 	if (isdir) {
1270392Sswilcox 		/*
1271392Sswilcox 		 * Go with the parent we found by chasing references,
1272392Sswilcox 		 * if we've gotten that far.  Otherwise, use what the
1273392Sswilcox 		 * directory itself claims.  If there's no ``..'' entry
1274392Sswilcox 		 * in it, give up trying to get the link counts right.
1275392Sswilcox 		 */
1276392Sswilcox 		if (update == TI_NOPARENT) {
1277392Sswilcox 			parent = -1;
1278392Sswilcox 		} else {
1279392Sswilcox 			iip = getinoinfo(ino);
1280392Sswilcox 			if (iip != NULL) {
1281392Sswilcox 				parent = iip->i_parent;
1282392Sswilcox 			} else {
1283392Sswilcox 				parent = lookup_dotdot_ino(ino);
1284392Sswilcox 				if (parent != 0) {
1285392Sswilcox 					/*
1286392Sswilcox 					 * Make sure that the claimed
1287392Sswilcox 					 * parent actually has a
1288392Sswilcox 					 * reference to us.
1289392Sswilcox 					 */
1290392Sswilcox 					dp = ginode(parent);
1291392Sswilcox 					idesc.id_name = lfname;
1292392Sswilcox 					idesc.id_type = DATA;
1293392Sswilcox 					idesc.id_func = findino;
1294392Sswilcox 					idesc.id_number = ino;
1295392Sswilcox 					idesc.id_fix = DONTKNOW;
1296392Sswilcox 					if ((ckinode(dp, &idesc,
1297392Sswilcox 					    CKI_TRAVERSE) & FOUND) == 0)
1298392Sswilcox 						parent = 0;
1299392Sswilcox 				}
1300392Sswilcox 			}
1301392Sswilcox 		}
1302392Sswilcox 
1303392Sswilcox 		mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1304392Sswilcox 		if (parent > 0) {
1305392Sswilcox 			dp = ginode(parent);
1306392Sswilcox 			LINK_RANGE(message, dp->di_nlink, -1);
1307392Sswilcox 			if (message != NULL) {
1308392Sswilcox 				LINK_CLEAR(message, parent, dp->di_mode,
1309392Sswilcox 				    &idesc);
1310392Sswilcox 				if (statemap[parent] == USTATE)
1311392Sswilcox 					goto no_parent_update;
1312392Sswilcox 			}
1313392Sswilcox 			TRACK_LNCNTP(parent, lncntp[parent]--);
1314392Sswilcox 		} else if ((mode == IFDIR) && (parent == 0)) {
1315392Sswilcox 			/*
1316392Sswilcox 			 * Currently don't have a good way to
1317392Sswilcox 			 * handle this, so throw up our hands.
1318392Sswilcox 			 * However, we know that we can still
1319392Sswilcox 			 * do some good if we continue, so
1320392Sswilcox 			 * don't actually exit yet.
1321392Sswilcox 			 *
1322392Sswilcox 			 * We don't do it for attrdirs,
1323392Sswilcox 			 * because there aren't link counts
1324392Sswilcox 			 * between them and their parents.
1325392Sswilcox 			 */
1326392Sswilcox 			pwarn("Could not determine former parent of "
1327392Sswilcox 			    "inode %d, link counts are possibly\n"
1328392Sswilcox 			    "incorrect.  Please rerun fsck(1M) to "
1329392Sswilcox 			    "correct this.\n",
1330392Sswilcox 			    ino);
1331392Sswilcox 			iscorrupt = 1;
1332392Sswilcox 		}
1333392Sswilcox 		/*
1334392Sswilcox 		 * ...else if it's a directory with parent == -1, then
1335392Sswilcox 		 * we've not gotten far enough to know connectivity,
1336392Sswilcox 		 * and it'll get handled automatically later.
1337392Sswilcox 		 */
1338392Sswilcox 	}
1339392Sswilcox 
1340392Sswilcox no_parent_update:
1341392Sswilcox 	init_inodesc(&idesc);
13420Sstevel@tonic-gate 	idesc.id_type = ADDR;
13430Sstevel@tonic-gate 	idesc.id_func = pass4check;
13440Sstevel@tonic-gate 	idesc.id_number = ino;
13450Sstevel@tonic-gate 	idesc.id_fix = DONTKNOW;
1346392Sswilcox 	idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
13470Sstevel@tonic-gate 	dp = ginode(ino);
1348*12286SAndrew.Balfour@Sun.COM 	if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1349392Sswilcox 		inodirty();
1350392Sswilcox 
1351392Sswilcox 	/*
1352392Sswilcox 	 * This has to be done after ckinode(), so that all of
1353392Sswilcox 	 * the fragments get visited.  Note that we assume we're
1354392Sswilcox 	 * always truncating to a block boundary, rather than a
1355392Sswilcox 	 * fragment boundary.
1356392Sswilcox 	 */
1357392Sswilcox 	dp = ginode(ino);
1358392Sswilcox 	dp->di_size = new_length;
1359392Sswilcox 
1360392Sswilcox 	/*
1361392Sswilcox 	 * Clear now-obsolete pointers.
1362392Sswilcox 	 */
1363392Sswilcox 	for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1364392Sswilcox 		dp->di_db[dblk] = 0;
1365392Sswilcox 	}
1366392Sswilcox 
1367392Sswilcox 	ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1368392Sswilcox 	for (ilevel++; ilevel < NIADDR; ilevel++) {
1369392Sswilcox 		dp->di_ib[ilevel] = 0;
1370392Sswilcox 	}
1371392Sswilcox 
1372392Sswilcox 	inodirty();
1373392Sswilcox }
1374392Sswilcox 
1375392Sswilcox /*
1376392Sswilcox  * Release an inode's resources, then release the inode itself.
1377392Sswilcox  */
1378392Sswilcox void
freeino(fsck_ino_t ino,int update_parent)1379392Sswilcox freeino(fsck_ino_t ino, int update_parent)
1380392Sswilcox {
1381392Sswilcox 	int cg;
1382392Sswilcox 	struct dinode *dp;
1383392Sswilcox 	struct cg *cgp;
1384392Sswilcox 
1385392Sswilcox 	n_files--;
1386392Sswilcox 	dp = ginode(ino);
13873960Sjk201079 	/*
13883960Sjk201079 	 * We need to make sure that the file is really a large file.
13893960Sjk201079 	 * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
13903960Sjk201079 	 * negative size, which shall be cleared. (see verify_inode() in
13913960Sjk201079 	 * pass1.c)
13923960Sjk201079 	 */
13933960Sjk201079 	if (dp->di_size > (u_offset_t)MAXOFF_T &&
13946179Smc208700 	    dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
13956179Smc208700 	    ftypeok(dp) &&
13966179Smc208700 	    (dp->di_mode & IFMT) != IFBLK &&
13976179Smc208700 	    (dp->di_mode & IFMT) != IFCHR) {
1398392Sswilcox 		largefile_count--;
1399392Sswilcox 	}
1400392Sswilcox 	truncino(ino, 0, update_parent);
1401392Sswilcox 
1402392Sswilcox 	dp = ginode(ino);
1403392Sswilcox 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
1404392Sswilcox 		clearshadow(ino, &attrclientinfo);
1405392Sswilcox 		dp = ginode(ino);
1406392Sswilcox 	}
1407392Sswilcox 
14080Sstevel@tonic-gate 	clearinode(dp);
14090Sstevel@tonic-gate 	inodirty();
14100Sstevel@tonic-gate 	statemap[ino] = USTATE;
1411392Sswilcox 
1412392Sswilcox 	/*
1413392Sswilcox 	 * Keep the disk in sync with us so that pass5 doesn't get
1414392Sswilcox 	 * upset about spurious inconsistencies.
1415392Sswilcox 	 */
1416392Sswilcox 	cg = itog(&sblock, ino);
1417392Sswilcox 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1418392Sswilcox 	    (size_t)sblock.fs_cgsize);
1419392Sswilcox 	cgp = cgblk.b_un.b_cg;
1420392Sswilcox 	clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1421392Sswilcox 	cgp->cg_cs.cs_nifree += 1;
1422392Sswilcox 	cgdirty();
1423392Sswilcox 	sblock.fs_cstotal.cs_nifree += 1;
1424392Sswilcox 	sbdirty();
1425392Sswilcox }
1426392Sswilcox 
1427392Sswilcox void
init_inoinfo(struct inoinfo * inp,struct dinode * dp,fsck_ino_t inum)1428392Sswilcox init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1429392Sswilcox {
1430392Sswilcox 	inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1431392Sswilcox 	inp->i_dotdot = (fsck_ino_t)0;
1432392Sswilcox 	inp->i_isize = (offset_t)dp->di_size;
1433392Sswilcox 	inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1434392Sswilcox 	inp->i_extattr = dp->di_oeftflag;
1435392Sswilcox 	(void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1436392Sswilcox 	    inp->i_blkssize);
1437392Sswilcox }
1438392Sswilcox 
1439392Sswilcox /*
1440392Sswilcox  * Return the inode number in the ".." entry of the provided
1441392Sswilcox  * directory inode.
1442392Sswilcox  */
1443392Sswilcox static int
lookup_dotdot_ino(fsck_ino_t ino)1444392Sswilcox lookup_dotdot_ino(fsck_ino_t ino)
1445392Sswilcox {
1446392Sswilcox 	struct inodesc idesc;
1447392Sswilcox 
1448392Sswilcox 	init_inodesc(&idesc);
1449392Sswilcox 	idesc.id_type = DATA;
1450392Sswilcox 	idesc.id_func = findino;
1451392Sswilcox 	idesc.id_name = "..";
1452392Sswilcox 	idesc.id_number = ino;
1453392Sswilcox 	idesc.id_fix = NOFIX;
1454392Sswilcox 
1455392Sswilcox 	if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1456392Sswilcox 		return (idesc.id_parent);
1457392Sswilcox 	}
1458392Sswilcox 
1459392Sswilcox 	return (0);
14600Sstevel@tonic-gate }
1461392Sswilcox 
1462392Sswilcox /*
1463392Sswilcox  * Convenience wrapper around ckinode(findino()).
1464392Sswilcox  */
1465392Sswilcox int
lookup_named_ino(fsck_ino_t dir,caddr_t name)1466392Sswilcox lookup_named_ino(fsck_ino_t dir, caddr_t name)
1467392Sswilcox {
1468392Sswilcox 	struct inodesc idesc;
1469392Sswilcox 
1470392Sswilcox 	init_inodesc(&idesc);
1471392Sswilcox 	idesc.id_type = DATA;
1472392Sswilcox 	idesc.id_func = findino;
1473392Sswilcox 	idesc.id_name = name;
1474392Sswilcox 	idesc.id_number = dir;
1475392Sswilcox 	idesc.id_fix = NOFIX;
1476392Sswilcox 
1477392Sswilcox 	if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1478392Sswilcox 		return (idesc.id_parent);
1479392Sswilcox 	}
1480392Sswilcox 
1481392Sswilcox 	return (0);
1482392Sswilcox }
1483392Sswilcox 
1484392Sswilcox /*
1485392Sswilcox  * Marks inodes that are being orphaned and might need to be reconnected
1486392Sswilcox  * by pass4().  The inode we're traversing is the directory whose
1487392Sswilcox  * contents will be reconnected later.  id_parent is the lfn at which
1488392Sswilcox  * to start looking at said contents.
1489392Sswilcox  */
1490392Sswilcox static int
mark_a_delayed_inode(struct inodesc * idesc)1491392Sswilcox mark_a_delayed_inode(struct inodesc *idesc)
1492392Sswilcox {
1493392Sswilcox 	struct direct *dirp = idesc->id_dirp;
1494392Sswilcox 
1495392Sswilcox 	if (idesc->id_lbn < idesc->id_parent) {
1496392Sswilcox 		return (KEEPON);
1497392Sswilcox 	}
1498392Sswilcox 
1499392Sswilcox 	if (dirp->d_ino != 0 &&
1500392Sswilcox 	    strcmp(dirp->d_name, ".") != 0 &&
1501392Sswilcox 	    strcmp(dirp->d_name, "..") != 0) {
1502392Sswilcox 		statemap[dirp->d_ino] &= ~INFOUND;
1503392Sswilcox 		statemap[dirp->d_ino] |= INDELAYD;
1504392Sswilcox 	}
1505392Sswilcox 
1506392Sswilcox 	return (KEEPON);
1507392Sswilcox }
1508392Sswilcox 
1509392Sswilcox static void
mark_delayed_inodes(fsck_ino_t ino,daddr32_t first_lfn)1510392Sswilcox mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1511392Sswilcox {
1512392Sswilcox 	struct dinode *dp;
1513392Sswilcox 	struct inodesc idelayed;
1514392Sswilcox 
1515392Sswilcox 	init_inodesc(&idelayed);
1516392Sswilcox 	idelayed.id_number = ino;
1517392Sswilcox 	idelayed.id_type = DATA;
1518392Sswilcox 	idelayed.id_fix = NOFIX;
1519392Sswilcox 	idelayed.id_func = mark_a_delayed_inode;
1520392Sswilcox 	idelayed.id_parent = first_lfn;
1521392Sswilcox 	idelayed.id_entryno = 2;
1522392Sswilcox 
1523392Sswilcox 	dp = ginode(ino);
1524392Sswilcox 	(void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1525392Sswilcox }
1526392Sswilcox 
1527392Sswilcox /*
1528392Sswilcox  * Clear the i_oeftflag/extended attribute pointer from INO.
1529392Sswilcox  */
1530392Sswilcox void
clearattrref(fsck_ino_t ino)1531392Sswilcox clearattrref(fsck_ino_t ino)
1532392Sswilcox {
1533392Sswilcox 	struct dinode *dp;
1534392Sswilcox 
1535392Sswilcox 	dp = ginode(ino);
1536392Sswilcox 	if (debug) {
1537392Sswilcox 		if (dp->di_oeftflag == 0)
1538392Sswilcox 			(void) printf("clearattref: no attr to clear on %d\n",
1539392Sswilcox 			    ino);
1540392Sswilcox 	}
1541392Sswilcox 
1542392Sswilcox 	dp->di_oeftflag = 0;
1543392Sswilcox 	inodirty();
1544392Sswilcox }
1545