xref: /netbsd-src/sys/ufs/ext2fs/ext2fs_lookup.c (revision fdecd6a253f999ae92b139670d9e15cc9df4497c)
1 /*	$NetBSD: ext2fs_lookup.c,v 1.1 1997/06/11 09:33:59 bouyer Exp $	*/
2 
3 /*
4  * Modified for NetBSD 1.2E
5  * May 1997, Manuel Bouyer
6  * Laboratoire d'informatique de Paris VI
7  */
8 /*
9  *  modified for Lites 1.1
10  *
11  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
12  *  University of Utah, Department of Computer Science
13  */
14 /*
15  * Copyright (c) 1989, 1993
16  *	The Regents of the University of California.  All rights reserved.
17  * (c) UNIX System Laboratories, Inc.
18  * All or some portions of this file are derived from material licensed
19  * to the University of California by American Telephone and Telegraph
20  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
21  * the permission of UNIX System Laboratories, Inc.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the above copyright
27  *	notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *	notice, this list of conditions and the following disclaimer in the
30  *	documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *	must display the following acknowledgement:
33  *	This product includes software developed by the University of
34  *	California, Berkeley and its contributors.
35  * 4. Neither the name of the University nor the names of its contributors
36  *	may be used to endorse or promote products derived from this software
37  *	without specific prior written permission.
38  *
39  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
40  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
42  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
43  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
44  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
45  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
46  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
47  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
48  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49  * SUCH DAMAGE.
50  *
51  *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
52  */
53 
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/namei.h>
57 #include <sys/buf.h>
58 #include <sys/file.h>
59 #include <sys/mount.h>
60 #include <sys/vnode.h>
61 #include <sys/malloc.h>
62 #include <sys/dirent.h>
63 
64 #include <ufs/ufs/quota.h>
65 #include <ufs/ufs/inode.h>
66 #include <ufs/ufs/ufsmount.h>
67 #include <ufs/ufs/ufs_extern.h>
68 
69 #include <ufs/ext2fs/ext2fs_extern.h>
70 #include <ufs/ext2fs/ext2fs_dir.h>
71 #include <ufs/ext2fs/ext2fs.h>
72 
73 extern	int dirchk;
74 
75 static void	ext2fs_dirconv2ffs __P((struct ext2fs_direct *e2dir,
76 					  struct dirent *ffsdir));
77 static int	ext2fs_dirbadentry __P((struct vnode *dp,
78 					  struct ext2fs_direct *de,
79 					  int entryoffsetinblock));
80 
81 /*
82  * the problem that is tackled below is the fact that FFS
83  * includes the terminating zero on disk while EXT2FS doesn't
84  * this implies that we need to introduce some padding.
85  * For instance, a filename "sbin" has normally a reclen 12
86  * in EXT2, but 16 in FFS.
87  * This reminds me of that Pepsi commercial: 'Kid saved a lousy nine cents...'
88  * If it wasn't for that, the complete ufs code for directories would
89  * have worked w/o changes (except for the difference in DIRBLKSIZ)
90  */
91 static void
92 ext2fs_dirconv2ffs( e2dir, ffsdir)
93 	struct ext2fs_direct	*e2dir;
94 	struct dirent 		*ffsdir;
95 {
96 	bzero(ffsdir, sizeof(struct dirent));
97 	ffsdir->d_fileno = e2dir->e2d_ino;
98 	ffsdir->d_namlen = e2dir->e2d_namlen;
99 
100 	ffsdir->d_type = DT_UNKNOWN;		/* don't know more here */
101 #ifdef DIAGNOSTIC
102 	/*
103 	 * XXX Rigth now this can't happen, but if one day
104 	 * MAXNAMLEN != E2FS_MAXNAMLEN we should handle this more gracefully !
105 	 */
106 	if (e2dir->e2d_namlen > MAXNAMLEN) panic("ext2fs: e2dir->e2d_namlen\n");
107 #endif
108 	strncpy(ffsdir->d_name, e2dir->e2d_name, ffsdir->d_namlen);
109 
110 	/* Godmar thinks: since e2dir->e2d_reclen can be big and means
111 	   nothing anyway, we compute our own reclen according to what
112 	   we think is right
113 	 */
114 	ffsdir->d_reclen = DIRENT_SIZE(ffsdir);
115 }
116 
117 /*
118  * Vnode op for reading directories.
119  *
120  * Convert the on-disk entries to <sys/dirent.h> entries.
121  * the problem is that the conversion will blow up some entries by four bytes,
122  * so it can't be done in place. This is too bad. Right now the conversion is
123  * done entry by entry, the converted entry is sent via uiomove.
124  *
125  * XXX allocate a buffer, convert as many entries as possible, then send
126  * the whole buffer to uiomove
127  */
128 int
129 ext2fs_readdir(v)
130 	void *v;
131 {
132 	struct vop_readdir_args /* {
133 		struct vnode *a_vp;
134 		struct uio *a_uio;
135 		struct ucred *a_cred;
136 		int *a_eofflag;
137 		u_long *a_cookies;
138 		int ncookies;
139 	} */ *ap = v;
140 	register struct uio *uio = ap->a_uio;
141 	int error;
142 	size_t count, readcnt, lost;
143 	struct m_ext2fs *fs = VTOI(ap->a_vp)->i_e2fs;
144 
145 	struct ext2fs_direct *dp;
146 	struct dirent dstd;
147 	struct uio auio;
148 	struct iovec aiov;
149 	caddr_t dirbuf;
150 	off_t off = uio->uio_offset;
151 	u_long *cookies = ap->a_cookies;
152 	int ncookies = ap->a_ncookies;
153 
154 	count = uio->uio_resid;
155 	/* Make sure we don't return partial entries. */
156 	count -= (uio->uio_offset + count) & (fs->e2fs_bsize -1);
157 	if (count <= 0)
158 		return (EINVAL);
159 	lost = uio->uio_resid - count;
160 	uio->uio_resid = count;
161 	uio->uio_iov->iov_len = count;
162 
163 	auio = *uio;
164 	auio.uio_iov = &aiov;
165 	auio.uio_iovcnt = 1;
166 	auio.uio_segflg = UIO_SYSSPACE;
167 	aiov.iov_len = count;
168 	MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
169 	bzero(dirbuf, count);
170 	aiov.iov_base = dirbuf;
171 
172 	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
173 	if (error == 0) {
174 		readcnt = count - auio.uio_resid;
175 		for (dp = (struct ext2fs_direct *)dirbuf;
176 			(char *)dp < (char *)dirbuf + readcnt; ) {
177 			if (dp->e2d_reclen <= 0) {
178 				error = EIO;
179 				break;
180 			}
181 			ext2fs_dirconv2ffs(dp, &dstd);
182 			if(dstd.d_reclen > uio->uio_resid) {
183 				break;
184 			}
185 			if ((error = uiomove((caddr_t)&dstd, dstd.d_reclen, uio)) != 0) {
186 				break;
187 			}
188 			off = off + dp->e2d_reclen;
189 			if (cookies != NULL) {
190 				*cookies++ = off;
191 				if (--ncookies <= 0){
192 					break;  /* out of cookies */
193 				}
194 			}
195 			/* advance dp */
196 			dp = (struct ext2fs_direct *) ((char *)dp + dp->e2d_reclen);
197 		}
198 		/* we need to correct uio_offset */
199 		uio->uio_offset = off;
200 	}
201 	FREE(dirbuf, M_TEMP);
202 	*ap->a_eofflag = VTOI(ap->a_vp)->i_e2fs_size <= uio->uio_offset;
203 	uio->uio_resid += lost;
204 	return (error);
205 }
206 
207 /*
208  * Convert a component of a pathname into a pointer to a locked inode.
209  * This is a very central and rather complicated routine.
210  * If the file system is not maintained in a strict tree hierarchy,
211  * this can result in a deadlock situation (see comments in code below).
212  *
213  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
214  * on whether the name is to be looked up, created, renamed, or deleted.
215  * When CREATE, RENAME, or DELETE is specified, information usable in
216  * creating, renaming, or deleting a directory entry may be calculated.
217  * If flag has LOCKPARENT or'ed into it and the target of the pathname
218  * exists, lookup returns both the target and its parent directory locked.
219  * When creating or renaming and LOCKPARENT is specified, the target may
220  * not be ".".  When deleting and LOCKPARENT is specified, the target may
221  * be "."., but the caller must check to ensure it does an vrele and vput
222  * instead of two vputs.
223  *
224  * Overall outline of ext2fs_lookup:
225  *
226  *	check accessibility of directory
227  *	look for name in cache, if found, then if at end of path
228  *	  and deleting or creating, drop it, else return name
229  *	search for name in directory, to found or notfound
230  * notfound:
231  *	if creating, return locked directory, leaving info on available slots
232  *	else return error
233  * found:
234  *	if at end of path and deleting, return information to allow delete
235  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
236  *	  inode and return info to allow rewrite
237  *	if not at end, add name to cache; if at end and neither creating
238  *	  nor deleting, add name to cache
239  */
240 int
241 ext2fs_lookup(v)
242 	void *v;
243 {
244 	struct vop_lookup_args /* {
245 		struct vnode *a_dvp;
246 		struct vnode **a_vpp;
247 		struct componentname *a_cnp;
248 	} */ *ap = v;
249 	register struct vnode *vdp;	/* vnode for directory being searched */
250 	register struct inode *dp;	/* inode for directory being searched */
251 	struct buf *bp;			/* a buffer of directory entries */
252 	register struct ext2fs_direct *ep; /* the current directory entry */
253 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
254 	enum {NONE, COMPACT, FOUND} slotstatus;
255 	doff_t slotoffset;		/* offset of area with free space */
256 	int slotsize;			/* size of area at slotoffset */
257 	int slotfreespace;		/* amount of space free in slot */
258 	int slotneeded;			/* size of the entry we're seeking */
259 	int numdirpasses;		/* strategy for directory search */
260 	doff_t endsearch;		/* offset to end directory search */
261 	doff_t prevoff;			/* prev entry dp->i_offset */
262 	struct vnode *pdp;		/* saved dp during symlink work */
263 	struct vnode *tdp;		/* returned by VFS_VGET */
264 	doff_t enduseful;		/* pointer past last used dir slot */
265 	u_long bmask;			/* block offset mask */
266 	int lockparent;			/* 1 => lockparent flag is set */
267 	int wantparent;			/* 1 => wantparent or lockparent flag */
268 	int namlen, error;
269 	struct vnode **vpp = ap->a_vpp;
270 	struct componentname *cnp = ap->a_cnp;
271 	struct ucred *cred = cnp->cn_cred;
272 	int flags = cnp->cn_flags;
273 	int nameiop = cnp->cn_nameiop;
274 
275 	int	dirblksize = VTOI(ap->a_dvp)->i_e2fs->e2fs_bsize;
276 
277 	bp = NULL;
278 	slotoffset = -1;
279 	*vpp = NULL;
280 	vdp = ap->a_dvp;
281 	dp = VTOI(vdp);
282 	lockparent = flags & LOCKPARENT;
283 	wantparent = flags & (LOCKPARENT|WANTPARENT);
284 	/*
285 	 * Check accessiblity of directory.
286 	 */
287 	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
288 		return (error);
289 
290 	/*
291 	 * We now have a segment name to search for, and a directory to search.
292 	 *
293 	 * Before tediously performing a linear scan of the directory,
294 	 * check the name cache to see if the directory/name pair
295 	 * we are looking for is known already.
296 	 */
297 	if ((error = cache_lookup(vdp, vpp, cnp)) != 0) {
298 		int vpid;	/* capability number of vnode */
299 
300 		if (error == ENOENT)
301 			return (error);
302 		/*
303 		 * Get the next vnode in the path.
304 		 * See comment below starting `Step through' for
305 		 * an explaination of the locking protocol.
306 		 */
307 		pdp = vdp;
308 		dp = VTOI(*vpp);
309 		vdp = *vpp;
310 		vpid = vdp->v_id;
311 		if (pdp == vdp) {   /* lookup on "." */
312 			VREF(vdp);
313 			error = 0;
314 		} else if (flags & ISDOTDOT) {
315 			VOP_UNLOCK(pdp);
316 			error = vget(vdp, 1);
317 			if (!error && lockparent && (flags & ISLASTCN))
318 				error = VOP_LOCK(pdp);
319 		} else {
320 			error = vget(vdp, 1);
321 			if (!lockparent || error || !(flags & ISLASTCN))
322 				VOP_UNLOCK(pdp);
323 		}
324 		/*
325 		 * Check that the capability number did not change
326 		 * while we were waiting for the lock.
327 		 */
328 		if (!error) {
329 			if (vpid == vdp->v_id)
330 				return (0);
331 			vput(vdp);
332 			if (lockparent && pdp != vdp && (flags & ISLASTCN))
333 				VOP_UNLOCK(pdp);
334 		}
335 		if ((error = VOP_LOCK(pdp)) != 0)
336 			return (error);
337 		vdp = pdp;
338 		dp = VTOI(pdp);
339 		*vpp = NULL;
340 	}
341 
342 	/*
343 	 * Suppress search for slots unless creating
344 	 * file and at end of pathname, in which case
345 	 * we watch for a place to put the new file in
346 	 * case it doesn't already exist.
347 	 */
348 	slotstatus = FOUND;
349 	slotfreespace = slotsize = slotneeded = 0;
350 	if ((nameiop == CREATE || nameiop == RENAME) &&
351 		(flags & ISLASTCN)) {
352 		slotstatus = NONE;
353 		slotneeded = EXT2FS_DIRSIZ(cnp->cn_namelen);
354 	}
355 
356 	/*
357 	 * If there is cached information on a previous search of
358 	 * this directory, pick up where we last left off.
359 	 * We cache only lookups as these are the most common
360 	 * and have the greatest payoff. Caching CREATE has little
361 	 * benefit as it usually must search the entire directory
362 	 * to determine that the entry does not exist. Caching the
363 	 * location of the last DELETE or RENAME has not reduced
364 	 * profiling time and hence has been removed in the interest
365 	 * of simplicity.
366 	 */
367 	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
368 	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
369 		dp->i_diroff > dp->i_e2fs_size) {
370 		entryoffsetinblock = 0;
371 		dp->i_offset = 0;
372 		numdirpasses = 1;
373 	} else {
374 		dp->i_offset = dp->i_diroff;
375 		if ((entryoffsetinblock = dp->i_offset & bmask) &&
376 			(error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
377 			return (error);
378 		numdirpasses = 2;
379 	}
380 	prevoff = dp->i_offset;
381 	endsearch = roundup(dp->i_e2fs_size, dirblksize);
382 	enduseful = 0;
383 
384 searchloop:
385 	while (dp->i_offset < endsearch) {
386 		/*
387 		 * If necessary, get the next directory block.
388 		 */
389 		if ((dp->i_offset & bmask) == 0) {
390 			if (bp != NULL)
391 				brelse(bp);
392 			error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp);
393 			if (error != 0)
394 				return (error);
395 			entryoffsetinblock = 0;
396 		}
397 		/*
398 		 * If still looking for a slot, and at a dirblksize
399 		 * boundary, have to start looking for free space again.
400 		 */
401 		if (slotstatus == NONE &&
402 			(entryoffsetinblock & (dirblksize - 1)) == 0) {
403 			slotoffset = -1;
404 			slotfreespace = 0;
405 		}
406 		/*
407 		 * Get pointer to next entry.
408 		 * Full validation checks are slow, so we only check
409 		 * enough to insure forward progress through the
410 		 * directory. Complete checks can be run by patching
411 		 * "dirchk" to be true.
412 		 */
413 		ep = (struct ext2fs_direct *)
414 			((char *)bp->b_data + entryoffsetinblock);
415 		if (ep->e2d_reclen == 0 ||
416 			(dirchk && ext2fs_dirbadentry(vdp, ep, entryoffsetinblock))) {
417 			int i;
418 			ufs_dirbad(dp, dp->i_offset, "mangled entry");
419 			i = dirblksize - (entryoffsetinblock & (dirblksize - 1));
420 			dp->i_offset += i;
421 			entryoffsetinblock += i;
422 			continue;
423 		}
424 
425 		/*
426 		 * If an appropriate sized slot has not yet been found,
427 		 * check to see if one is available. Also accumulate space
428 		 * in the current block so that we can determine if
429 		 * compaction is viable.
430 		 */
431 		if (slotstatus != FOUND) {
432 			int size = ep->e2d_reclen;
433 
434 			if (ep->e2d_ino != 0)
435 				size -= EXT2FS_DIRSIZ(ep->e2d_namlen);
436 			if (size > 0) {
437 				if (size >= slotneeded) {
438 					slotstatus = FOUND;
439 					slotoffset = dp->i_offset;
440 					slotsize = ep->e2d_reclen;
441 				} else if (slotstatus == NONE) {
442 					slotfreespace += size;
443 					if (slotoffset == -1)
444 						slotoffset = dp->i_offset;
445 					if (slotfreespace >= slotneeded) {
446 						slotstatus = COMPACT;
447 						slotsize = dp->i_offset +
448 							  ep->e2d_reclen - slotoffset;
449 					}
450 				}
451 			}
452 		}
453 
454 		/*
455 		 * Check for a name match.
456 		 */
457 		if (ep->e2d_ino) {
458 			namlen = ep->e2d_namlen;
459 			if (namlen == cnp->cn_namelen &&
460 				!bcmp(cnp->cn_nameptr, ep->e2d_name,
461 				(unsigned)namlen)) {
462 				/*
463 				 * Save directory entry's inode number and
464 				 * reclen in ndp->ni_ufs area, and release
465 				 * directory buffer.
466 				 */
467 				dp->i_ino = ep->e2d_ino;
468 				dp->i_reclen = ep->e2d_reclen;
469 				brelse(bp);
470 				goto found;
471 			}
472 		}
473 		prevoff = dp->i_offset;
474 		dp->i_offset += ep->e2d_reclen;
475 		entryoffsetinblock += ep->e2d_reclen;
476 		if (ep->e2d_ino)
477 			enduseful = dp->i_offset;
478 	}
479 /* notfound: */
480 	/*
481 	 * If we started in the middle of the directory and failed
482 	 * to find our target, we must check the beginning as well.
483 	 */
484 	if (numdirpasses == 2) {
485 		numdirpasses--;
486 		dp->i_offset = 0;
487 		endsearch = dp->i_diroff;
488 		goto searchloop;
489 	}
490 	if (bp != NULL)
491 		brelse(bp);
492 	/*
493 	 * If creating, and at end of pathname and current
494 	 * directory has not been removed, then can consider
495 	 * allowing file to be created.
496 	 */
497 	if ((nameiop == CREATE || nameiop == RENAME) &&
498 		(flags & ISLASTCN) && dp->i_e2fs_nlink != 0) {
499 		/*
500 		 * Creation of files on a read-only mounted file system
501 		 * is pointless, so don't proceed any further.
502 		 */
503 		if (vdp->v_mount->mnt_flag & MNT_RDONLY)
504 					return (EROFS);
505 		/*
506 		 * Access for write is interpreted as allowing
507 		 * creation of files in the directory.
508 		 */
509 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
510 			return (error);
511 		/*
512 		 * Return an indication of where the new directory
513 		 * entry should be put.  If we didn't find a slot,
514 		 * then set dp->i_count to 0 indicating
515 		 * that the new slot belongs at the end of the
516 		 * directory. If we found a slot, then the new entry
517 		 * can be put in the range from dp->i_offset to
518 		 * dp->i_offset + dp->i_count.
519 		 */
520 		if (slotstatus == NONE) {
521 			dp->i_offset = roundup(dp->i_e2fs_size, dirblksize);
522 			dp->i_count = 0;
523 			enduseful = dp->i_offset;
524 		} else {
525 			dp->i_offset = slotoffset;
526 			dp->i_count = slotsize;
527 			if (enduseful < slotoffset + slotsize)
528 				enduseful = slotoffset + slotsize;
529 		}
530 		dp->i_endoff = roundup(enduseful, dirblksize);
531 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
532 		/*
533 		 * We return with the directory locked, so that
534 		 * the parameters we set up above will still be
535 		 * valid if we actually decide to do a direnter().
536 		 * We return ni_vp == NULL to indicate that the entry
537 		 * does not currently exist; we leave a pointer to
538 		 * the (locked) directory inode in ndp->ni_dvp.
539 		 * The pathname buffer is saved so that the name
540 		 * can be obtained later.
541 		 *
542 		 * NB - if the directory is unlocked, then this
543 		 * information cannot be used.
544 		 */
545 		cnp->cn_flags |= SAVENAME;
546 		if (!lockparent)
547 			VOP_UNLOCK(vdp);
548 		return (EJUSTRETURN);
549 	}
550 	/*
551 	 * Insert name into cache (as non-existent) if appropriate.
552 	 */
553 	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
554 		cache_enter(vdp, *vpp, cnp);
555 	return (ENOENT);
556 
557 found:
558 	/*
559 	 * Check that directory length properly reflects presence
560 	 * of this entry.
561 	 */
562 	if (entryoffsetinblock + EXT2FS_DIRSIZ(ep->e2d_namlen)
563 		> dp->i_e2fs_size) {
564 		ufs_dirbad(dp, dp->i_offset, "i_size too small");
565 		dp->i_e2fs_size = entryoffsetinblock+EXT2FS_DIRSIZ(ep->e2d_namlen);
566 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
567 	}
568 
569 	/*
570 	 * Found component in pathname.
571 	 * If the final component of path name, save information
572 	 * in the cache as to where the entry was found.
573 	 */
574 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
575 		dp->i_diroff = dp->i_offset &~ (dirblksize - 1);
576 
577 	/*
578 	 * If deleting, and at end of pathname, return
579 	 * parameters which can be used to remove file.
580 	 * If the wantparent flag isn't set, we return only
581 	 * the directory (in ndp->ni_dvp), otherwise we go
582 	 * on and lock the inode, being careful with ".".
583 	 */
584 	if (nameiop == DELETE && (flags & ISLASTCN)) {
585 		/*
586 		 * Write access to directory required to delete files.
587 		 */
588 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
589 			return (error);
590 		/*
591 		 * Return pointer to current entry in dp->i_offset,
592 		 * and distance past previous entry (if there
593 		 * is a previous entry in this block) in dp->i_count.
594 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
595 		 */
596 		if ((dp->i_offset & (dirblksize - 1)) == 0)
597 			dp->i_count = 0;
598 		else
599 			dp->i_count = dp->i_offset - prevoff;
600 		if (dp->i_number == dp->i_ino) {
601 			VREF(vdp);
602 			*vpp = vdp;
603 			return (0);
604 		}
605 		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
606 			return (error);
607 		/*
608 		 * If directory is "sticky", then user must own
609 		 * the directory, or the file in it, else she
610 		 * may not delete it (unless she's root). This
611 		 * implements append-only directories.
612 		 */
613 		if ((dp->i_e2fs_mode & ISVTX) &&
614 			cred->cr_uid != 0 &&
615 			cred->cr_uid != dp->i_e2fs_uid &&
616 			VTOI(tdp)->i_e2fs_uid != cred->cr_uid) {
617 			vput(tdp);
618 			return (EPERM);
619 		}
620 		*vpp = tdp;
621 		if (!lockparent)
622 			VOP_UNLOCK(vdp);
623 		return (0);
624 	}
625 
626 	/*
627 	 * If rewriting (RENAME), return the inode and the
628 	 * information required to rewrite the present directory
629 	 * Must get inode of directory entry to verify it's a
630 	 * regular file, or empty directory.
631 	 */
632 	if (nameiop == RENAME && wantparent &&
633 		(flags & ISLASTCN)) {
634 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0)
635 			return (error);
636 		/*
637 		 * Careful about locking second inode.
638 		 * This can only occur if the target is ".".
639 		 */
640 		if (dp->i_number == dp->i_ino)
641 			return (EISDIR);
642 		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
643 			return (error);
644 		*vpp = tdp;
645 		cnp->cn_flags |= SAVENAME;
646 		if (!lockparent)
647 			VOP_UNLOCK(vdp);
648 		return (0);
649 	}
650 
651 	/*
652 	 * Step through the translation in the name.  We do not `vput' the
653 	 * directory because we may need it again if a symbolic link
654 	 * is relative to the current directory.  Instead we save it
655 	 * unlocked as "pdp".  We must get the target inode before unlocking
656 	 * the directory to insure that the inode will not be removed
657 	 * before we get it.  We prevent deadlock by always fetching
658 	 * inodes from the root, moving down the directory tree. Thus
659 	 * when following backward pointers ".." we must unlock the
660 	 * parent directory before getting the requested directory.
661 	 * There is a potential race condition here if both the current
662 	 * and parent directories are removed before the VFS_VGET for the
663 	 * inode associated with ".." returns.  We hope that this occurs
664 	 * infrequently since we cannot avoid this race condition without
665 	 * implementing a sophisticated deadlock detection algorithm.
666 	 * Note also that this simple deadlock detection scheme will not
667 	 * work if the file system has any hard links other than ".."
668 	 * that point backwards in the directory structure.
669 	 */
670 	pdp = vdp;
671 	if (flags & ISDOTDOT) {
672 		VOP_UNLOCK(pdp);	/* race to get the inode */
673 		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) {
674 			VOP_LOCK(pdp);
675 			return (error);
676 		}
677 		if (lockparent && (flags & ISLASTCN) &&
678 			(error = VOP_LOCK(pdp)) != 0) {
679 			vput(tdp);
680 			return (error);
681 		}
682 		*vpp = tdp;
683 	} else if (dp->i_number == dp->i_ino) {
684 		VREF(vdp);	/* we want ourself, ie "." */
685 		*vpp = vdp;
686 	} else {
687 		if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0)
688 			return (error);
689 		if (!lockparent || !(flags & ISLASTCN))
690 			VOP_UNLOCK(pdp);
691 		*vpp = tdp;
692 	}
693 
694 	/*
695 	 * Insert name into cache if appropriate.
696 	 */
697 	if (cnp->cn_flags & MAKEENTRY)
698 		cache_enter(vdp, *vpp, cnp);
699 	return (0);
700 }
701 
702 /*
703  * Do consistency checking on a directory entry:
704  *	record length must be multiple of 4
705  *	entry must fit in rest of its dirblksize block
706  *	record must be large enough to contain entry
707  *	name is not longer than MAXNAMLEN
708  *	name must be as long as advertised, and null terminated
709  */
710 /*
711  *	changed so that it confirms to ext2fs_check_dir_entry
712  */
713 static int
714 ext2fs_dirbadentry(dp, de, entryoffsetinblock)
715 	struct vnode *dp;
716 	register struct ext2fs_direct *de;
717 	int entryoffsetinblock;
718 {
719 	int	dirblksize = VTOI(dp)->i_e2fs->e2fs_bsize;
720 
721 		char * error_msg = NULL;
722 
723 		if (de->e2d_reclen < EXT2FS_DIRSIZ(1)) /* e2d_namlen = 1 */
724 				error_msg = "rec_len is smaller than minimal";
725 		else if (de->e2d_reclen % 4 != 0)
726 				error_msg = "rec_len % 4 != 0";
727 		else if (de->e2d_reclen < EXT2FS_DIRSIZ(de->e2d_namlen))
728 				error_msg = "reclen is too small for name_len";
729 		else if (entryoffsetinblock + de->e2d_reclen > dirblksize)
730 				error_msg = "directory entry across blocks";
731 		else if (de->e2d_ino > VTOI(dp)->i_e2fs->e2fs.e2fs_icount)
732 				error_msg = "inode out of bounds";
733 
734 		if (error_msg != NULL) {
735 			printf( "bad directory entry: %s\n"
736 						"offset=%d, inode=%lu, rec_len=%d, name_len=%d \n",
737 						error_msg, entryoffsetinblock,
738 			(unsigned long) de->e2d_ino, de->e2d_reclen, de->e2d_namlen);
739 			panic("ext2fs_dirbadentry");
740 		}
741 		return error_msg == NULL ? 0 : 1;
742 }
743 
744 /*
745  * Write a directory entry after a call to namei, using the parameters
746  * that it left in nameidata.  The argument ip is the inode which the new
747  * directory entry will refer to.  Dvp is a pointer to the directory to
748  * be written, which was left locked by namei. Remaining parameters
749  * (dp->i_offset, dp->i_count) indicate how the space for the new
750  * entry is to be obtained.
751  */
752 int
753 ext2fs_direnter(ip, dvp, cnp)
754 	struct inode *ip;
755 	struct vnode *dvp;
756 	register struct componentname *cnp;
757 {
758 	register struct ext2fs_direct *ep, *nep;
759 	register struct inode *dp;
760 	struct buf *bp;
761 	struct ext2fs_direct newdir;
762 	struct iovec aiov;
763 	struct uio auio;
764 	u_int dsize;
765 	int error, loc, newentrysize, spacefree;
766 	char *dirbuf;
767 	int	 dirblksize = ip->i_e2fs->e2fs_bsize;
768 
769 
770 #ifdef DIAGNOSTIC
771 	if ((cnp->cn_flags & SAVENAME) == 0)
772 		panic("direnter: missing name");
773 #endif
774 	dp = VTOI(dvp);
775 	newdir.e2d_ino = ip->i_number;
776 	newdir.e2d_namlen = cnp->cn_namelen;
777 	bcopy(cnp->cn_nameptr, newdir.e2d_name, (unsigned)cnp->cn_namelen + 1);
778 	newentrysize = EXT2FS_DIRSIZ(newdir.e2d_namlen);
779 	if (dp->i_count == 0) {
780 		/*
781 		 * If dp->i_count is 0, then namei could find no
782 		 * space in the directory. Here, dp->i_offset will
783 		 * be on a directory block boundary and we will write the
784 		 * new entry into a fresh block.
785 		 */
786 		if (dp->i_offset & (dirblksize - 1))
787 			panic("ext2fs_direnter: newblk");
788 		auio.uio_offset = dp->i_offset;
789 		newdir.e2d_reclen = dirblksize;
790 		auio.uio_resid = newentrysize;
791 		aiov.iov_len = newentrysize;
792 		aiov.iov_base = (caddr_t)&newdir;
793 		auio.uio_iov = &aiov;
794 		auio.uio_iovcnt = 1;
795 		auio.uio_rw = UIO_WRITE;
796 		auio.uio_segflg = UIO_SYSSPACE;
797 		auio.uio_procp = (struct proc *)0;
798 		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
799 		if (dirblksize >
800 			VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
801 			/* XXX should grow with balloc() */
802 			panic("ext2fs_direnter: frag size");
803 		else if (!error) {
804 			dp->i_e2fs_size = roundup(dp->i_e2fs_size, dirblksize);
805 			dp->i_flag |= IN_CHANGE;
806 		}
807 		return (error);
808 	}
809 
810 	/*
811 	 * If dp->i_count is non-zero, then namei found space
812 	 * for the new entry in the range dp->i_offset to
813 	 * dp->i_offset + dp->i_count in the directory.
814 	 * To use this space, we may have to compact the entries located
815 	 * there, by copying them together towards the beginning of the
816 	 * block, leaving the free space in one usable chunk at the end.
817 	 */
818 
819 	/*
820 	 * Get the block containing the space for the new directory entry.
821 	 */
822 	if ((error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0)
823 		return (error);
824 	/*
825 	 * Find space for the new entry. In the simple case, the entry at
826 	 * offset base will have the space. If it does not, then namei
827 	 * arranged that compacting the region dp->i_offset to
828 	 * dp->i_offset + dp->i_count would yield the
829 	 * space.
830 	 */
831 	ep = (struct ext2fs_direct *)dirbuf;
832 	dsize = EXT2FS_DIRSIZ(ep->e2d_namlen);
833 	spacefree = ep->e2d_reclen - dsize;
834 	for (loc = ep->e2d_reclen; loc < dp->i_count; ) {
835 		nep = (struct ext2fs_direct *)(dirbuf + loc);
836 		if (ep->e2d_ino) {
837 			/* trim the existing slot */
838 			ep->e2d_reclen = dsize;
839 			ep = (struct ext2fs_direct *)((char *)ep + dsize);
840 		} else {
841 			/* overwrite; nothing there; header is ours */
842 			spacefree += dsize;
843 		}
844 		dsize = EXT2FS_DIRSIZ(nep->e2d_namlen);
845 		spacefree += nep->e2d_reclen - dsize;
846 		loc += nep->e2d_reclen;
847 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
848 	}
849 	/*
850 	 * Update the pointer fields in the previous entry (if any),
851 	 * copy in the new entry, and write out the block.
852 	 */
853 	if (ep->e2d_ino == 0) {
854 #ifdef DIAGNOSTIC
855 		if (spacefree + dsize < newentrysize)
856 			panic("ext2fs_direnter: compact1");
857 #endif
858 		newdir.e2d_reclen = spacefree + dsize;
859 	} else {
860 #ifdef DIAGNOSTIC
861 		if (spacefree < newentrysize) {
862 			printf("ext2fs_direnter: compact2 %u %u",
863 						(u_int)spacefree, (u_int)newentrysize);
864 			panic("ext2fs_direnter: compact2");
865 		}
866 #endif
867 		newdir.e2d_reclen = spacefree;
868 		ep->e2d_reclen = dsize;
869 		ep = (struct ext2fs_direct *)((char *)ep + dsize);
870 	}
871 	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
872 	error = VOP_BWRITE(bp);
873 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
874 	if (!error && dp->i_endoff && dp->i_endoff < dp->i_e2fs_size)
875 		error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
876 			cnp->cn_cred, cnp->cn_proc);
877 	return (error);
878 }
879 
880 /*
881  * Remove a directory entry after a call to namei, using
882  * the parameters which it left in nameidata. The entry
883  * dp->i_offset contains the offset into the directory of the
884  * entry to be eliminated.  The dp->i_count field contains the
885  * size of the previous record in the directory.  If this
886  * is 0, the first entry is being deleted, so we need only
887  * zero the inode number to mark the entry as free.  If the
888  * entry is not the first in the directory, we must reclaim
889  * the space of the now empty record by adding the record size
890  * to the size of the previous entry.
891  */
892 int
893 ext2fs_dirremove(dvp, cnp)
894 	struct vnode *dvp;
895 	struct componentname *cnp;
896 {
897 	register struct inode *dp;
898 	struct ext2fs_direct *ep;
899 	struct buf *bp;
900 	int error;
901 
902 	dp = VTOI(dvp);
903 	if (dp->i_count == 0) {
904 		/*
905 		 * First entry in block: set d_ino to zero.
906 		 */
907 		error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp);
908 		if (error != 0)
909 			return (error);
910 		ep->e2d_ino = 0;
911 		error = VOP_BWRITE(bp);
912 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
913 		return (error);
914 	}
915 	/*
916 	 * Collapse new free space into previous entry.
917 	 */
918 	error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
919 			(char **)&ep, &bp);
920 	if (error != 0)
921 		return (error);
922 	ep->e2d_reclen += dp->i_reclen;
923 	error = VOP_BWRITE(bp);
924 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
925 	return (error);
926 }
927 
928 /*
929  * Rewrite an existing directory entry to point at the inode
930  * supplied.  The parameters describing the directory entry are
931  * set up by a call to namei.
932  */
933 int
934 ext2fs_dirrewrite(dp, ip, cnp)
935 	struct inode *dp, *ip;
936 	struct componentname *cnp;
937 {
938 	struct buf *bp;
939 	struct ext2fs_direct *ep;
940 	struct vnode *vdp = ITOV(dp);
941 	int error;
942 
943 	error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
944 	if (error != 0)
945 		return (error);
946 	ep->e2d_ino = ip->i_number;
947 	error = VOP_BWRITE(bp);
948 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
949 	return (error);
950 }
951 
952 /*
953  * Check if a directory is empty or not.
954  * Inode supplied must be locked.
955  *
956  * Using a struct dirtemplate here is not precisely
957  * what we want, but better than using a struct ext2fs_direct.
958  *
959  * NB: does not handle corrupted directories.
960  */
961 int
962 ext2fs_dirempty(ip, parentino, cred)
963 	register struct inode *ip;
964 	ino_t parentino;
965 	struct ucred *cred;
966 {
967 	register off_t off;
968 	struct ext2fs_dirtemplate dbuf;
969 	register struct ext2fs_direct *dp = (struct ext2fs_direct *)&dbuf;
970 	int error, count, namlen;
971 
972 #define	MINDIRSIZ (sizeof (struct ext2fs_dirtemplate) / 2)
973 
974 	for (off = 0; off < ip->i_e2fs_size; off += dp->e2d_reclen) {
975 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
976 		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
977 		/*
978 		 * Since we read MINDIRSIZ, residual must
979 		 * be 0 unless we're at end of file.
980 		 */
981 		if (error || count != 0)
982 			return (0);
983 		/* avoid infinite loops */
984 		if (dp->e2d_reclen == 0)
985 			return (0);
986 		/* skip empty entries */
987 		if (dp->e2d_ino == 0)
988 			continue;
989 		/* accept only "." and ".." */
990 		namlen = dp->e2d_namlen;
991 		if (namlen > 2)
992 			return (0);
993 		if (dp->e2d_name[0] != '.')
994 			return (0);
995 		/*
996 		 * At this point namlen must be 1 or 2.
997 		 * 1 implies ".", 2 implies ".." if second
998 		 * char is also "."
999 		 */
1000 		if (namlen == 1)
1001 			continue;
1002 		if (dp->e2d_name[1] == '.' && dp->e2d_ino == parentino)
1003 			continue;
1004 		return (0);
1005 	}
1006 	return (1);
1007 }
1008 
1009 /*
1010  * Check if source directory is in the path of the target directory.
1011  * Target is supplied locked, source is unlocked.
1012  * The target is always vput before returning.
1013  */
1014 int
1015 ext2fs_checkpath(source, target, cred)
1016 	struct inode *source, *target;
1017 	struct ucred *cred;
1018 {
1019 	struct vnode *vp;
1020 	int error, rootino, namlen;
1021 	struct ext2fs_dirtemplate dirbuf;
1022 
1023 	vp = ITOV(target);
1024 	if (target->i_number == source->i_number) {
1025 		error = EEXIST;
1026 		goto out;
1027 	}
1028 	rootino = ROOTINO;
1029 	error = 0;
1030 	if (target->i_number == rootino)
1031 		goto out;
1032 
1033 	for (;;) {
1034 		if (vp->v_type != VDIR) {
1035 			error = ENOTDIR;
1036 			break;
1037 		}
1038 		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1039 			sizeof (struct ext2fs_dirtemplate), (off_t)0, UIO_SYSSPACE,
1040 			IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
1041 		if (error != 0)
1042 			break;
1043 		namlen = dirbuf.dotdot_namlen;
1044 		if (namlen != 2 ||
1045 			dirbuf.dotdot_name[0] != '.' ||
1046 			dirbuf.dotdot_name[1] != '.') {
1047 			error = ENOTDIR;
1048 			break;
1049 		}
1050 		if (dirbuf.dotdot_ino == source->i_number) {
1051 			error = EINVAL;
1052 			break;
1053 		}
1054 		if (dirbuf.dotdot_ino == rootino)
1055 			break;
1056 		vput(vp);
1057 		error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp);
1058 		if (error != 0) {
1059 			vp = NULL;
1060 			break;
1061 		}
1062 	}
1063 
1064 out:
1065 	if (error == ENOTDIR) {
1066 		printf("checkpath: .. not a directory\n");
1067 		panic("checkpath");
1068 	}
1069 	if (vp != NULL)
1070 		vput(vp);
1071 	return (error);
1072 }
1073 
1074