xref: /csrg-svn/sys/ufs/ffs/ufs_lookup.c (revision 38776)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)ufs_lookup.c	7.13 (Berkeley) 08/26/89
18  */
19 
20 #include "param.h"
21 #include "user.h"
22 #include "buf.h"
23 #include "file.h"
24 #include "vnode.h"
25 #include "../ufs/inode.h"
26 #include "../ufs/fs.h"
27 
28 struct	nchstats nchstats;
29 int	dirchk = 1;
30 
31 /*
32  * Convert a component of a pathname into a pointer to a locked inode.
33  * This is a very central and rather complicated routine.
34  * If the file system is not maintained in a strict tree hierarchy,
35  * this can result in a deadlock situation (see comments in code below).
36  *
37  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
38  * whether the name is to be looked up, created, renamed, or deleted.
39  * When CREATE, RENAME, or DELETE is specified, information usable in
40  * creating, renaming, or deleting a directory entry may be calculated.
41  * If flag has LOCKPARENT or'ed into it and the target of the pathname
42  * exists, lookup returns both the target and its parent directory locked.
43  * When creating or renaming and LOCKPARENT is specified, the target may
44  * not be ".".  When deleting and LOCKPARENT is specified, the target may
45  * be "."., but the caller must check to ensure it does an vrele and iput
46  * instead of two iputs.
47  *
48  * Overall outline of ufs_lookup:
49  *
50  *	check accessibility of directory
51  *	look for name in cache, if found, then if at end of path
52  *	  and deleting or creating, drop it, else return name
53  *	search for name in directory, to found or notfound
54  * notfound:
55  *	if creating, return locked directory, leaving info on available slots
56  *	else return error
57  * found:
58  *	if at end of path and deleting, return information to allow delete
59  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
60  *	  inode and return info to allow rewrite
61  *	if not at end, add name to cache; if at end and neither creating
62  *	  nor deleting, add name to cache
63  *
64  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
65  */
66 ufs_lookup(vp, ndp)
67 	struct vnode *vp;
68 	register struct nameidata *ndp;
69 {
70 	register struct vnode *vdp;	/* vnode copy of dp */
71 	register struct inode *dp = 0;	/* the directory we are searching */
72 	register struct fs *fs;		/* file system that directory is in */
73 	struct buf *bp = 0;		/* a buffer of directory entries */
74 	register struct direct *ep;	/* the current directory entry */
75 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
76 	enum {NONE, COMPACT, FOUND} slotstatus;
77 	int slotoffset = -1;		/* offset of area with free space */
78 	int slotsize;			/* size of area at slotoffset */
79 	int slotfreespace;		/* amount of space free in slot */
80 	int slotneeded;			/* size of the entry we're seeking */
81 	int numdirpasses;		/* strategy for directory search */
82 	int endsearch;			/* offset to end directory search */
83 	int prevoff;			/* ndp->ni_offset of previous entry */
84 	struct inode *pdp;		/* saved dp during symlink work */
85 	struct inode *tdp;		/* returned by iget */
86 	off_t enduseful;		/* pointer past last used dir slot */
87 	int flag;			/* LOOKUP, CREATE, RENAME, or DELETE */
88 	int lockparent;			/* 1 => lockparent flag is set */
89 	int wantparent;			/* 1 => wantparent or lockparent flag */
90 	int error;
91 
92 	ndp->ni_dvp = vp;
93 	ndp->ni_vp = NULL;
94 	dp = VTOI(vp);
95 	fs = dp->i_fs;
96 	lockparent = ndp->ni_nameiop & LOCKPARENT;
97 	flag = ndp->ni_nameiop & OPFLAG;
98 	wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT);
99 
100 	/*
101 	 * Check accessiblity of directory.
102 	 */
103 	if ((dp->i_mode&IFMT) != IFDIR)
104 		return (ENOTDIR);
105 	if (error = iaccess(dp, IEXEC, ndp->ni_cred))
106 		return (error);
107 
108 	/*
109 	 * We now have a segment name to search for, and a directory to search.
110 	 *
111 	 * Before tediously performing a linear scan of the directory,
112 	 * check the name cache to see if the directory/name pair
113 	 * we are looking for is known already.
114 	 */
115 	if (error = cache_lookup(ndp)) {
116 		int vpid;	/* capability number of vnode */
117 
118 		if (error == ENOENT)
119 			return (error);
120 		/*
121 		 * Get the next vnode in the path.
122 		 * See comment below starting `Step through' for
123 		 * an explaination of the locking protocol.
124 		 */
125 		pdp = dp;
126 		if (!(ndp->ni_vp == ndp->ni_rdir && ndp->ni_isdotdot))
127 			dp = VTOI(ndp->ni_vp);
128 		vdp = ITOV(dp);
129 		vpid = vdp->v_id;
130 		if (pdp == dp) {
131 			VREF(vdp);
132 		} else if (ndp->ni_isdotdot) {
133 			IUNLOCK(pdp);
134 			igrab(dp);
135 		} else {
136 			igrab(dp);
137 			IUNLOCK(pdp);
138 		}
139 		/*
140 		 * Check that the capability number did not change
141 		 * while we were waiting for the lock.
142 		 */
143 		if (vpid == vdp->v_id)
144 			return (0);
145 		iput(dp);
146 		ILOCK(pdp);
147 		dp = pdp;
148 		ndp->ni_vp = NULL;
149 	}
150 
151 	/*
152 	 * Suppress search for slots unless creating
153 	 * file and at end of pathname, in which case
154 	 * we watch for a place to put the new file in
155 	 * case it doesn't already exist.
156 	 */
157 	slotstatus = FOUND;
158 	if ((flag == CREATE || flag == RENAME) && *ndp->ni_next == 0) {
159 		slotstatus = NONE;
160 		slotfreespace = 0;
161 		slotneeded = DIRSIZ(&ndp->ni_dent);
162 	}
163 
164 	/*
165 	 * If there is cached information on a previous search of
166 	 * this directory, pick up where we last left off.
167 	 * We cache only lookups as these are the most common
168 	 * and have the greatest payoff. Caching CREATE has little
169 	 * benefit as it usually must search the entire directory
170 	 * to determine that the entry does not exist. Caching the
171 	 * location of the last DELETE or RENAME has not reduced
172 	 * profiling time and hence has been removed in the interest
173 	 * of simplicity.
174 	 */
175 	if (flag != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) {
176 		ndp->ni_offset = 0;
177 		numdirpasses = 1;
178 	} else {
179 		ndp->ni_offset = dp->i_diroff;
180 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
181 		if (entryoffsetinblock != 0) {
182 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
183 			if (error)
184 				return (error);
185 		}
186 		numdirpasses = 2;
187 		nchstats.ncs_2passes++;
188 	}
189 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
190 	enduseful = 0;
191 
192 searchloop:
193 	while (ndp->ni_offset < endsearch) {
194 		/*
195 		 * If offset is on a block boundary,
196 		 * read the next directory block.
197 		 * Release previous if it exists.
198 		 */
199 		if (blkoff(fs, ndp->ni_offset) == 0) {
200 			if (bp != NULL)
201 				brelse(bp);
202 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
203 			if (error)
204 				return (error);
205 			entryoffsetinblock = 0;
206 		}
207 		/*
208 		 * If still looking for a slot, and at a DIRBLKSIZE
209 		 * boundary, have to start looking for free space again.
210 		 */
211 		if (slotstatus == NONE &&
212 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
213 			slotoffset = -1;
214 			slotfreespace = 0;
215 		}
216 		/*
217 		 * Get pointer to next entry.
218 		 * Full validation checks are slow, so we only check
219 		 * enough to insure forward progress through the
220 		 * directory. Complete checks can be run by patching
221 		 * "dirchk" to be true.
222 		 */
223 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
224 		if (ep->d_reclen == 0 ||
225 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
226 			int i;
227 
228 			dirbad(dp, ndp->ni_offset, "mangled entry");
229 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
230 			ndp->ni_offset += i;
231 			entryoffsetinblock += i;
232 			continue;
233 		}
234 
235 		/*
236 		 * If an appropriate sized slot has not yet been found,
237 		 * check to see if one is available. Also accumulate space
238 		 * in the current block so that we can determine if
239 		 * compaction is viable.
240 		 */
241 		if (slotstatus != FOUND) {
242 			int size = ep->d_reclen;
243 
244 			if (ep->d_ino != 0)
245 				size -= DIRSIZ(ep);
246 			if (size > 0) {
247 				if (size >= slotneeded) {
248 					slotstatus = FOUND;
249 					slotoffset = ndp->ni_offset;
250 					slotsize = ep->d_reclen;
251 				} else if (slotstatus == NONE) {
252 					slotfreespace += size;
253 					if (slotoffset == -1)
254 						slotoffset = ndp->ni_offset;
255 					if (slotfreespace >= slotneeded) {
256 						slotstatus = COMPACT;
257 						slotsize = ndp->ni_offset +
258 						      ep->d_reclen - slotoffset;
259 					}
260 				}
261 			}
262 		}
263 
264 		/*
265 		 * Check for a name match.
266 		 */
267 		if (ep->d_ino) {
268 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
269 			    !bcmp(ndp->ni_ptr, ep->d_name,
270 				(unsigned)ep->d_namlen)) {
271 				/*
272 				 * Save directory entry's inode number and
273 				 * reclen in ndp->ni_dent, and release
274 				 * directory buffer.
275 				 */
276 				ndp->ni_dent.d_ino = ep->d_ino;
277 				ndp->ni_dent.d_reclen = ep->d_reclen;
278 				brelse(bp);
279 				goto found;
280 			}
281 		}
282 		prevoff = ndp->ni_offset;
283 		ndp->ni_offset += ep->d_reclen;
284 		entryoffsetinblock += ep->d_reclen;
285 		if (ep->d_ino)
286 			enduseful = ndp->ni_offset;
287 	}
288 /* notfound: */
289 	/*
290 	 * If we started in the middle of the directory and failed
291 	 * to find our target, we must check the beginning as well.
292 	 */
293 	if (numdirpasses == 2) {
294 		numdirpasses--;
295 		ndp->ni_offset = 0;
296 		endsearch = dp->i_diroff;
297 		goto searchloop;
298 	}
299 	if (bp != NULL)
300 		brelse(bp);
301 	/*
302 	 * If creating, and at end of pathname and current
303 	 * directory has not been removed, then can consider
304 	 * allowing file to be created.
305 	 */
306 	if ((flag == CREATE || flag == RENAME) &&
307 	    *ndp->ni_next == 0 && dp->i_nlink != 0) {
308 		/*
309 		 * Access for write is interpreted as allowing
310 		 * creation of files in the directory.
311 		 */
312 		if (error = iaccess(dp, IWRITE, ndp->ni_cred))
313 			return (error);
314 		/*
315 		 * Return an indication of where the new directory
316 		 * entry should be put.  If we didn't find a slot,
317 		 * then set ndp->ni_count to 0 indicating that the new
318 		 * slot belongs at the end of the directory. If we found
319 		 * a slot, then the new entry can be put in the range
320 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
321 		 */
322 		if (slotstatus == NONE) {
323 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
324 			ndp->ni_count = 0;
325 			enduseful = ndp->ni_offset;
326 		} else {
327 			ndp->ni_offset = slotoffset;
328 			ndp->ni_count = slotsize;
329 			if (enduseful < slotoffset + slotsize)
330 				enduseful = slotoffset + slotsize;
331 		}
332 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
333 		dp->i_flag |= IUPD|ICHG;
334 		/*
335 		 * We return with the directory locked, so that
336 		 * the parameters we set up above will still be
337 		 * valid if we actually decide to do a direnter().
338 		 * We return ni_vp == NULL to indicate that the entry
339 		 * does not currently exist; we leave a pointer to
340 		 * the (locked) directory inode in ndp->ni_dvp.
341 		 *
342 		 * NB - if the directory is unlocked, then this
343 		 * information cannot be used.
344 		 */
345 		if (!lockparent)
346 			IUNLOCK(dp);
347 	}
348 	/*
349 	 * Insert name into cache (as non-existent) if appropriate.
350 	 */
351 	if (ndp->ni_makeentry)
352 		cache_enter(ndp);
353 	return (ENOENT);
354 
355 found:
356 	if (numdirpasses == 2)
357 		nchstats.ncs_pass2++;
358 	/*
359 	 * Check that directory length properly reflects presence
360 	 * of this entry.
361 	 */
362 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
363 		dirbad(dp, ndp->ni_offset, "i_size too small");
364 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
365 		dp->i_flag |= IUPD|ICHG;
366 	}
367 
368 	/*
369 	 * Found component in pathname.
370 	 * If the final component of path name, save information
371 	 * in the cache as to where the entry was found.
372 	 */
373 	if (*ndp->ni_next == '\0' && flag == LOOKUP)
374 		dp->i_diroff = ndp->ni_offset &~ (DIRBLKSIZ - 1);
375 
376 	/*
377 	 * If deleting, and at end of pathname, return
378 	 * parameters which can be used to remove file.
379 	 * If the wantparent flag isn't set, we return only
380 	 * the directory (in ndp->ni_dvp), otherwise we go
381 	 * on and lock the inode, being careful with ".".
382 	 */
383 	if (flag == DELETE && *ndp->ni_next == 0) {
384 		/*
385 		 * Write access to directory required to delete files.
386 		 */
387 		if (error = iaccess(dp, IWRITE, ndp->ni_cred))
388 			return (error);
389 		/*
390 		 * Return pointer to current entry in ndp->ni_offset,
391 		 * and distance past previous entry (if there
392 		 * is a previous entry in this block) in ndp->ni_count.
393 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
394 		 */
395 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
396 			ndp->ni_count = 0;
397 		else
398 			ndp->ni_count = ndp->ni_offset - prevoff;
399 		vdp = ITOV(dp);
400 		if (dp->i_number == ndp->ni_dent.d_ino) {
401 			VREF(vdp);
402 		} else {
403 			pdp = dp;
404 			if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
405 				return (error);
406 			vdp = ITOV(tdp);
407 			/*
408 			 * If directory is "sticky", then user must own
409 			 * the directory, or the file in it, else he
410 			 * may not delete it (unless he's root). This
411 			 * implements append-only directories.
412 			 */
413 			if ((pdp->i_mode & ISVTX) &&
414 			    ndp->ni_cred->cr_uid != 0 &&
415 			    ndp->ni_cred->cr_uid != pdp->i_uid &&
416 			    tdp->i_uid != ndp->ni_cred->cr_uid) {
417 				iput(tdp);
418 				return (EPERM);
419 			}
420 		}
421 		ndp->ni_vp = vdp;
422 		if (!lockparent)
423 			IUNLOCK(pdp);
424 		return (0);
425 	}
426 
427 	/*
428 	 * If rewriting (RENAME), return the inode and the
429 	 * information required to rewrite the present directory
430 	 * Must get inode of directory entry to verify it's a
431 	 * regular file, or empty directory.
432 	 */
433 	if (flag == RENAME && wantparent && *ndp->ni_next == 0) {
434 		if (error = iaccess(dp, IWRITE, ndp->ni_cred))
435 			return (error);
436 		/*
437 		 * Careful about locking second inode.
438 		 * This can only occur if the target is ".".
439 		 */
440 		if (dp->i_number == ndp->ni_dent.d_ino)
441 			return (EISDIR);
442 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
443 			return (error);
444 		ndp->ni_vp = ITOV(tdp);
445 		if (!lockparent)
446 			IUNLOCK(dp);
447 		return (0);
448 	}
449 
450 	/*
451 	 * Step through the translation in the name.  We do not `iput' the
452 	 * directory because we may need it again if a symbolic link
453 	 * is relative to the current directory.  Instead we save it
454 	 * unlocked as "pdp".  We must get the target inode before unlocking
455 	 * the directory to insure that the inode will not be removed
456 	 * before we get it.  We prevent deadlock by always fetching
457 	 * inodes from the root, moving down the directory tree. Thus
458 	 * when following backward pointers ".." we must unlock the
459 	 * parent directory before getting the requested directory.
460 	 * There is a potential race condition here if both the current
461 	 * and parent directories are removed before the `iget' for the
462 	 * inode associated with ".." returns.  We hope that this occurs
463 	 * infrequently since we cannot avoid this race condition without
464 	 * implementing a sophisticated deadlock detection algorithm.
465 	 * Note also that this simple deadlock detection scheme will not
466 	 * work if the file system has any hard links other than ".."
467 	 * that point backwards in the directory structure.
468 	 */
469 	pdp = dp;
470 	if (ndp->ni_isdotdot) {
471 		IUNLOCK(pdp);	/* race to get the inode */
472 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp)) {
473 			ILOCK(pdp);
474 			return (error);
475 		}
476 		if (lockparent && *ndp->ni_next == '\0')
477 			ILOCK(pdp);
478 		ndp->ni_vp = ITOV(tdp);
479 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
480 		vdp = ITOV(dp);
481 		VREF(vdp);	/* we want ourself, ie "." */
482 		ndp->ni_vp = vdp;
483 	} else {
484 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
485 			return (error);
486 		if (!lockparent || *ndp->ni_next != '\0')
487 			IUNLOCK(pdp);
488 		ndp->ni_vp = ITOV(tdp);
489 	}
490 
491 	/*
492 	 * Insert name into cache if appropriate.
493 	 */
494 	if (ndp->ni_makeentry)
495 		cache_enter(ndp);
496 	return (0);
497 }
498 
499 
500 dirbad(ip, offset, how)
501 	struct inode *ip;
502 	off_t offset;
503 	char *how;
504 {
505 
506 	printf("%s: bad dir ino %d at offset %d: %s\n",
507 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
508 }
509 
510 /*
511  * Do consistency checking on a directory entry:
512  *	record length must be multiple of 4
513  *	entry must fit in rest of its DIRBLKSIZ block
514  *	record must be large enough to contain entry
515  *	name is not longer than MAXNAMLEN
516  *	name must be as long as advertised, and null terminated
517  */
518 dirbadentry(ep, entryoffsetinblock)
519 	register struct direct *ep;
520 	int entryoffsetinblock;
521 {
522 	register int i;
523 
524 	if ((ep->d_reclen & 0x3) != 0 ||
525 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
526 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
527 		return (1);
528 	for (i = 0; i < ep->d_namlen; i++)
529 		if (ep->d_name[i] == '\0')
530 			return (1);
531 	return (ep->d_name[i]);
532 }
533 
534 /*
535  * Write a directory entry after a call to namei, using the parameters
536  * which it left in nameidata.  The argument ip is the inode which the
537  * new directory entry will refer to.  The nameidata field ndp->ni_dvp
538  * is a pointer to the directory to be written, which was left locked by
539  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
540  * how the space for the new entry is to be gotten.
541  */
542 direnter(ip, ndp)
543 	struct inode *ip;
544 	register struct nameidata *ndp;
545 {
546 	register struct direct *ep, *nep;
547 	register struct inode *dp = VTOI(ndp->ni_dvp);
548 	struct buf *bp;
549 	int loc, spacefree, error = 0;
550 	u_int dsize;
551 	int newentrysize;
552 	char *dirbuf;
553 
554 	ndp->ni_dent.d_ino = ip->i_number;
555 	newentrysize = DIRSIZ(&ndp->ni_dent);
556 	if (ndp->ni_count == 0) {
557 		/*
558 		 * If ndp->ni_count is 0, then namei could find no space in the
559 		 * directory. In this case ndp->ni_offset will be on a directory
560 		 * block boundary and we will write the new entry into a fresh
561 		 * block.
562 		 */
563 		if (ndp->ni_offset&(DIRBLKSIZ-1))
564 			panic("wdir: newblk");
565 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
566 		ndp->ni_count = newentrysize;
567 		ndp->ni_resid = newentrysize;
568 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
569 		error = writeip(dp, &ndp->ni_uio, ndp->ni_cred);
570 		if (DIRBLKSIZ > dp->i_fs->fs_fsize)
571 			panic("wdir: blksize"); /* XXX - should grow w/balloc */
572 		else
573 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
574 		iput(dp);
575 		return (error);
576 	}
577 
578 	/*
579 	 * If ndp->ni_count is non-zero, then namei found space for the new
580 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
581 	 * in the directory.  To use this space, we may have to compact
582 	 * the entries located there, by copying them together towards
583 	 * the beginning of the block, leaving the free space in
584 	 * one usable chunk at the end.
585 	 */
586 
587 	/*
588 	 * Increase size of directory if entry eats into new space.
589 	 * This should never push the size past a new multiple of
590 	 * DIRBLKSIZE.
591 	 *
592 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
593 	 */
594 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
595 		dp->i_size = ndp->ni_offset + ndp->ni_count;
596 	/*
597 	 * Get the block containing the space for the new directory entry.
598 	 */
599 	if (error = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf, &bp)) {
600 		iput(dp);
601 		return (error);
602 	}
603 	/*
604 	 * Find space for the new entry.  In the simple case, the
605 	 * entry at offset base will have the space.  If it does
606 	 * not, then namei arranged that compacting the region
607 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
608 	 */
609 	ep = (struct direct *)dirbuf;
610 	dsize = DIRSIZ(ep);
611 	spacefree = ep->d_reclen - dsize;
612 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
613 		nep = (struct direct *)(dirbuf + loc);
614 		if (ep->d_ino) {
615 			/* trim the existing slot */
616 			ep->d_reclen = dsize;
617 			ep = (struct direct *)((char *)ep + dsize);
618 		} else {
619 			/* overwrite; nothing there; header is ours */
620 			spacefree += dsize;
621 		}
622 		dsize = DIRSIZ(nep);
623 		spacefree += nep->d_reclen - dsize;
624 		loc += nep->d_reclen;
625 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
626 	}
627 	/*
628 	 * Update the pointer fields in the previous entry (if any),
629 	 * copy in the new entry, and write out the block.
630 	 */
631 	if (ep->d_ino == 0) {
632 		if (spacefree + dsize < newentrysize)
633 			panic("wdir: compact1");
634 		ndp->ni_dent.d_reclen = spacefree + dsize;
635 	} else {
636 		if (spacefree < newentrysize)
637 			panic("wdir: compact2");
638 		ndp->ni_dent.d_reclen = spacefree;
639 		ep->d_reclen = dsize;
640 		ep = (struct direct *)((char *)ep + dsize);
641 	}
642 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
643 	error = bwrite(bp);
644 	dp->i_flag |= IUPD|ICHG;
645 	if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
646 		error = itrunc(dp, (u_long)ndp->ni_endoff);
647 	iput(dp);
648 	return (error);
649 }
650 
651 /*
652  * Remove a directory entry after a call to namei, using
653  * the parameters which it left in nameidata. The entry
654  * ni_offset contains the offset into the directory of the
655  * entry to be eliminated.  The ni_count field contains the
656  * size of the previous record in the directory.  If this
657  * is 0, the first entry is being deleted, so we need only
658  * zero the inode number to mark the entry as free.  If the
659  * entry isn't the first in the directory, we must reclaim
660  * the space of the now empty record by adding the record size
661  * to the size of the previous entry.
662  */
663 dirremove(ndp)
664 	register struct nameidata *ndp;
665 {
666 	register struct inode *dp = VTOI(ndp->ni_dvp);
667 	struct direct *ep;
668 	struct buf *bp;
669 	int error;
670 
671 	if (ndp->ni_count == 0) {
672 		/*
673 		 * First entry in block: set d_ino to zero.
674 		 */
675 		ndp->ni_dent.d_ino = 0;
676 		ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
677 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
678 		error = writeip(dp, &ndp->ni_uio, ndp->ni_cred);
679 	} else {
680 		/*
681 		 * Collapse new free space into previous entry.
682 		 */
683 		if (error = blkatoff(dp, ndp->ni_offset - ndp->ni_count,
684 		    (char **)&ep, &bp)) {
685 			return (error);
686 		}
687 		ep->d_reclen += ndp->ni_dent.d_reclen;
688 		error = bwrite(bp);
689 		dp->i_flag |= IUPD|ICHG;
690 	}
691 	return (error);
692 }
693 
694 /*
695  * Rewrite an existing directory entry to point at the inode
696  * supplied.  The parameters describing the directory entry are
697  * set up by a call to namei.
698  */
699 dirrewrite(dp, ip, ndp)
700 	struct inode *dp, *ip;
701 	struct nameidata *ndp;
702 {
703 
704 	ndp->ni_dent.d_ino = ip->i_number;
705 	ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
706 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
707 	return (writeip(dp, &ndp->ni_uio, ndp->ni_cred));
708 }
709 
710 /*
711  * Return buffer with contents of block "offset"
712  * from the beginning of directory "ip".  If "res"
713  * is non-zero, fill it in with a pointer to the
714  * remaining space in the directory.
715  */
716 blkatoff(ip, offset, res, bpp)
717 	struct inode *ip;
718 	off_t offset;
719 	char **res;
720 	struct buf **bpp;
721 {
722 	register struct fs *fs = ip->i_fs;
723 	daddr_t lbn = lblkno(fs, offset);
724 	int bsize = blksize(fs, ip, lbn);
725 	struct buf *bp;
726 	daddr_t bn;
727 	int error;
728 
729 	*bpp = 0;
730 	if (error = bmap(ip, lbn, &bn, (daddr_t *)0, (int *)0))
731 		return (error);
732 	if (bn == (daddr_t)-1) {
733 		dirbad(ip, offset, "hole in dir");
734 		return (EIO);
735 	}
736 	error = bread(ip->i_devvp, bn, bsize, NOCRED, &bp);
737 	if (error) {
738 		brelse(bp);
739 		return (error);
740 	}
741 	if (res)
742 		*res = bp->b_un.b_addr + blkoff(fs, offset);
743 	*bpp = bp;
744 	return (0);
745 }
746 
747 /*
748  * Check if a directory is empty or not.
749  * Inode supplied must be locked.
750  *
751  * Using a struct dirtemplate here is not precisely
752  * what we want, but better than using a struct direct.
753  *
754  * NB: does not handle corrupted directories.
755  */
756 dirempty(ip, parentino, cred)
757 	register struct inode *ip;
758 	ino_t parentino;
759 	struct ucred *cred;
760 {
761 	register off_t off;
762 	struct dirtemplate dbuf;
763 	register struct direct *dp = (struct direct *)&dbuf;
764 	int error, count;
765 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
766 
767 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
768 		error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, off,
769 		    UIO_SYSSPACE, cred, &count);
770 		/*
771 		 * Since we read MINDIRSIZ, residual must
772 		 * be 0 unless we're at end of file.
773 		 */
774 		if (error || count != 0)
775 			return (0);
776 		/* avoid infinite loops */
777 		if (dp->d_reclen == 0)
778 			return (0);
779 		/* skip empty entries */
780 		if (dp->d_ino == 0)
781 			continue;
782 		/* accept only "." and ".." */
783 		if (dp->d_namlen > 2)
784 			return (0);
785 		if (dp->d_name[0] != '.')
786 			return (0);
787 		/*
788 		 * At this point d_namlen must be 1 or 2.
789 		 * 1 implies ".", 2 implies ".." if second
790 		 * char is also "."
791 		 */
792 		if (dp->d_namlen == 1)
793 			continue;
794 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
795 			continue;
796 		return (0);
797 	}
798 	return (1);
799 }
800 
801 /*
802  * Check if source directory is in the path of the target directory.
803  * Target is supplied locked, source is unlocked.
804  * The target is always iput() before returning.
805  */
806 checkpath(source, target, cred)
807 	struct inode *source, *target;
808 	struct ucred *cred;
809 {
810 	struct dirtemplate dirbuf;
811 	struct inode *ip;
812 	int error = 0;
813 
814 	ip = target;
815 	if (ip->i_number == source->i_number) {
816 		error = EEXIST;
817 		goto out;
818 	}
819 	if (ip->i_number == ROOTINO)
820 		goto out;
821 
822 	for (;;) {
823 		if ((ip->i_mode&IFMT) != IFDIR) {
824 			error = ENOTDIR;
825 			break;
826 		}
827 		error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
828 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
829 			cred, (int *)0);
830 		if (error != 0)
831 			break;
832 		if (dirbuf.dotdot_namlen != 2 ||
833 		    dirbuf.dotdot_name[0] != '.' ||
834 		    dirbuf.dotdot_name[1] != '.') {
835 			error = ENOTDIR;
836 			break;
837 		}
838 		if (dirbuf.dotdot_ino == source->i_number) {
839 			error = EINVAL;
840 			break;
841 		}
842 		if (dirbuf.dotdot_ino == ROOTINO)
843 			break;
844 		iput(ip);
845 		if (error = iget(ip, dirbuf.dotdot_ino, &ip))
846 			break;
847 	}
848 
849 out:
850 	if (error == ENOTDIR)
851 		printf("checkpath: .. not a directory\n");
852 	if (ip != NULL)
853 		iput(ip);
854 	return (error);
855 }
856