xref: /csrg-svn/sys/kern/vfs_syscalls.c (revision 24433)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)vfs_syscalls.c	6.20 (Berkeley) 08/26/85
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "dir.h"
12 #include "user.h"
13 #include "kernel.h"
14 #include "file.h"
15 #include "stat.h"
16 #include "inode.h"
17 #include "fs.h"
18 #include "buf.h"
19 #include "proc.h"
20 #include "quota.h"
21 #include "uio.h"
22 #include "socket.h"
23 #include "socketvar.h"
24 #include "mount.h"
25 
26 extern	struct fileops inodeops;
27 struct	file *getinode();
28 
29 /*
30  * Change current working directory (``.'').
31  */
32 chdir()
33 {
34 
35 	chdirec(&u.u_cdir);
36 }
37 
38 /*
39  * Change notion of root (``/'') directory.
40  */
41 chroot()
42 {
43 
44 	if (suser())
45 		chdirec(&u.u_rdir);
46 }
47 
48 /*
49  * Common routine for chroot and chdir.
50  */
51 chdirec(ipp)
52 	register struct inode **ipp;
53 {
54 	register struct inode *ip;
55 	struct a {
56 		char	*fname;
57 	} *uap = (struct a *)u.u_ap;
58 	register struct nameidata *ndp = &u.u_nd;
59 
60 	ndp->ni_nameiop = LOOKUP | FOLLOW;
61 	ndp->ni_segflg = UIO_USERSPACE;
62 	ndp->ni_dirp = uap->fname;
63 	ip = namei(ndp);
64 	if (ip == NULL)
65 		return;
66 	if ((ip->i_mode&IFMT) != IFDIR) {
67 		u.u_error = ENOTDIR;
68 		goto bad;
69 	}
70 	if (access(ip, IEXEC))
71 		goto bad;
72 	IUNLOCK(ip);
73 	if (*ipp)
74 		irele(*ipp);
75 	*ipp = ip;
76 	return;
77 
78 bad:
79 	iput(ip);
80 }
81 
82 /*
83  * Open system call.
84  */
85 open()
86 {
87 	struct a {
88 		char	*fname;
89 		int	mode;
90 		int	crtmode;
91 	} *uap = (struct a *) u.u_ap;
92 
93 	copen(uap->mode-FOPEN, uap->crtmode, uap->fname);
94 }
95 
96 /*
97  * Creat system call.
98  */
99 creat()
100 {
101 	struct a {
102 		char	*fname;
103 		int	fmode;
104 	} *uap = (struct a *)u.u_ap;
105 
106 	copen(FWRITE|FCREAT|FTRUNC, uap->fmode, uap->fname);
107 }
108 
109 /*
110  * Common code for open and creat.
111  * Check permissions, allocate an open file structure,
112  * and call the device open routine if any.
113  */
114 copen(mode, arg, fname)
115 	register int mode;
116 	int arg;
117 	caddr_t fname;
118 {
119 	register struct inode *ip;
120 	register struct file *fp;
121 	register struct nameidata *ndp = &u.u_nd;
122 	int i;
123 
124 #ifdef notdef
125 	if ((mode&(FREAD|FWRITE)) == 0) {
126 		u.u_error = EINVAL;
127 		return;
128 	}
129 #endif
130 	ndp->ni_segflg = UIO_USERSPACE;
131 	ndp->ni_dirp = fname;
132 	if (mode&FCREAT) {
133 		if (mode & FEXCL)
134 			ndp->ni_nameiop = CREATE;
135 		else
136 			ndp->ni_nameiop = CREATE | FOLLOW;
137 		ip = namei(ndp);
138 		if (ip == NULL) {
139 			if (u.u_error)
140 				return;
141 			ip = maknode(arg&07777&(~ISVTX), ndp);
142 			if (ip == NULL)
143 				return;
144 			mode &= ~FTRUNC;
145 		} else {
146 			if (mode&FEXCL) {
147 				u.u_error = EEXIST;
148 				iput(ip);
149 				return;
150 			}
151 			mode &= ~FCREAT;
152 		}
153 	} else {
154 		ndp->ni_nameiop = LOOKUP | FOLLOW;
155 		ip = namei(ndp);
156 		if (ip == NULL)
157 			return;
158 	}
159 	if ((ip->i_mode & IFMT) == IFSOCK) {
160 		u.u_error = EOPNOTSUPP;
161 		goto bad;
162 	}
163 	if ((mode&FCREAT) == 0) {
164 		if (mode&FREAD)
165 			if (access(ip, IREAD))
166 				goto bad;
167 		if (mode&(FWRITE|FTRUNC)) {
168 			if (access(ip, IWRITE))
169 				goto bad;
170 			if ((ip->i_mode&IFMT) == IFDIR) {
171 				u.u_error = EISDIR;
172 				goto bad;
173 			}
174 		}
175 	}
176 	fp = falloc();
177 	if (fp == NULL)
178 		goto bad;
179 	if (mode&FTRUNC)
180 		itrunc(ip, (u_long)0);
181 	IUNLOCK(ip);
182 	fp->f_flag = mode&FMASK;
183 	fp->f_type = DTYPE_INODE;
184 	fp->f_ops = &inodeops;
185 	fp->f_data = (caddr_t)ip;
186 	i = u.u_r.r_val1;
187 	if (setjmp(&u.u_qsave)) {
188 		if (u.u_error == 0)
189 			u.u_error = EINTR;
190 		u.u_ofile[i] = NULL;
191 		closef(fp);
192 		return;
193 	}
194 	u.u_error = openi(ip, mode);
195 	if (u.u_error == 0)
196 		return;
197 	u.u_ofile[i] = NULL;
198 	fp->f_count--;
199 	irele(ip);
200 	return;
201 bad:
202 	iput(ip);
203 }
204 
205 /*
206  * Mknod system call
207  */
208 mknod()
209 {
210 	register struct inode *ip;
211 	register struct a {
212 		char	*fname;
213 		int	fmode;
214 		int	dev;
215 	} *uap = (struct a *)u.u_ap;
216 	register struct nameidata *ndp = &u.u_nd;
217 
218 	if (!suser())
219 		return;
220 	ndp->ni_nameiop = CREATE;
221 	ndp->ni_segflg = UIO_USERSPACE;
222 	ndp->ni_dirp = uap->fname;
223 	ip = namei(ndp);
224 	if (ip != NULL) {
225 		u.u_error = EEXIST;
226 		goto out;
227 	}
228 	if (u.u_error)
229 		return;
230 	ip = maknode(uap->fmode, ndp);
231 	if (ip == NULL)
232 		return;
233 	switch (ip->i_mode & IFMT) {
234 
235 	case IFMT:	/* used by badsect to flag bad sectors */
236 	case IFCHR:
237 	case IFBLK:
238 		if (uap->dev) {
239 			/*
240 			 * Want to be able to use this to make badblock
241 			 * inodes, so don't truncate the dev number.
242 			 */
243 			ip->i_rdev = uap->dev;
244 			ip->i_flag |= IACC|IUPD|ICHG;
245 		}
246 	}
247 
248 out:
249 	iput(ip);
250 }
251 
252 /*
253  * link system call
254  */
255 link()
256 {
257 	register struct inode *ip, *xp;
258 	register struct a {
259 		char	*target;
260 		char	*linkname;
261 	} *uap = (struct a *)u.u_ap;
262 	register struct nameidata *ndp = &u.u_nd;
263 
264 	ndp->ni_nameiop = LOOKUP | FOLLOW;
265 	ndp->ni_segflg = UIO_USERSPACE;
266 	ndp->ni_dirp = uap->target;
267 	ip = namei(ndp);	/* well, this routine is doomed anyhow */
268 	if (ip == NULL)
269 		return;
270 	if ((ip->i_mode&IFMT) == IFDIR && !suser()) {
271 		iput(ip);
272 		return;
273 	}
274 	ip->i_nlink++;
275 	ip->i_flag |= ICHG;
276 	iupdat(ip, &time, &time, 1);
277 	IUNLOCK(ip);
278 	ndp->ni_nameiop = CREATE;
279 	ndp->ni_segflg = UIO_USERSPACE;
280 	ndp->ni_dirp = (caddr_t)uap->linkname;
281 	xp = namei(ndp);
282 	if (xp != NULL) {
283 		u.u_error = EEXIST;
284 		iput(xp);
285 		goto out;
286 	}
287 	if (u.u_error)
288 		goto out;
289 	if (ndp->ni_pdir->i_dev != ip->i_dev) {
290 		iput(ndp->ni_pdir);
291 		u.u_error = EXDEV;
292 		goto out;
293 	}
294 	u.u_error = direnter(ip, ndp);
295 out:
296 	if (u.u_error) {
297 		ip->i_nlink--;
298 		ip->i_flag |= ICHG;
299 	}
300 	irele(ip);
301 }
302 
303 /*
304  * symlink -- make a symbolic link
305  */
306 symlink()
307 {
308 	register struct a {
309 		char	*target;
310 		char	*linkname;
311 	} *uap = (struct a *)u.u_ap;
312 	register struct inode *ip;
313 	register char *tp;
314 	register c, nc;
315 	register struct nameidata *ndp = &u.u_nd;
316 
317 	tp = uap->target;
318 	nc = 0;
319 	while (c = fubyte(tp)) {
320 		if (c < 0) {
321 			u.u_error = EFAULT;
322 			return;
323 		}
324 		tp++;
325 		nc++;
326 	}
327 	ndp->ni_nameiop = CREATE;
328 	ndp->ni_segflg = UIO_USERSPACE;
329 	ndp->ni_dirp = uap->linkname;
330 	ip = namei(ndp);
331 	if (ip) {
332 		iput(ip);
333 		u.u_error = EEXIST;
334 		return;
335 	}
336 	if (u.u_error)
337 		return;
338 	ip = maknode(IFLNK | 0777, ndp);
339 	if (ip == NULL)
340 		return;
341 	u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0);
342 	/* handle u.u_error != 0 */
343 	iput(ip);
344 }
345 
346 /*
347  * Unlink system call.
348  * Hard to avoid races here, especially
349  * in unlinking directories.
350  */
351 unlink()
352 {
353 	struct a {
354 		char	*fname;
355 	} *uap = (struct a *)u.u_ap;
356 	register struct inode *ip, *dp;
357 	register struct nameidata *ndp = &u.u_nd;
358 
359 	ndp->ni_nameiop = DELETE | LOCKPARENT;
360 	ndp->ni_segflg = UIO_USERSPACE;
361 	ndp->ni_dirp = uap->fname;
362 	ip = namei(ndp);
363 	if (ip == NULL)
364 		return;
365 	dp = ndp->ni_pdir;
366 	if ((ip->i_mode&IFMT) == IFDIR && !suser())
367 		goto out;
368 	/*
369 	 * Don't unlink a mounted file.
370 	 */
371 	if (ip->i_dev != dp->i_dev) {
372 		u.u_error = EBUSY;
373 		goto out;
374 	}
375 	if (ip->i_flag&ITEXT)
376 		xrele(ip);	/* try once to free text */
377 	if (dirremove(ndp)) {
378 		ip->i_nlink--;
379 		ip->i_flag |= ICHG;
380 	}
381 out:
382 	if (dp == ip)
383 		irele(ip);
384 	else
385 		iput(ip);
386 	iput(dp);
387 }
388 
389 /*
390  * Seek system call
391  */
392 lseek()
393 {
394 	register struct file *fp;
395 	register struct a {
396 		int	fd;
397 		off_t	off;
398 		int	sbase;
399 	} *uap = (struct a *)u.u_ap;
400 
401 	GETF(fp, uap->fd);
402 	if (fp->f_type != DTYPE_INODE) {
403 		u.u_error = ESPIPE;
404 		return;
405 	}
406 	switch (uap->sbase) {
407 
408 	case L_INCR:
409 		fp->f_offset += uap->off;
410 		break;
411 
412 	case L_XTND:
413 		fp->f_offset = uap->off + ((struct inode *)fp->f_data)->i_size;
414 		break;
415 
416 	case L_SET:
417 		fp->f_offset = uap->off;
418 		break;
419 
420 	default:
421 		u.u_error = EINVAL;
422 		return;
423 	}
424 	u.u_r.r_off = fp->f_offset;
425 }
426 
427 /*
428  * Access system call
429  */
430 saccess()
431 {
432 	register svuid, svgid;
433 	register struct inode *ip;
434 	register struct a {
435 		char	*fname;
436 		int	fmode;
437 	} *uap = (struct a *)u.u_ap;
438 	register struct nameidata *ndp = &u.u_nd;
439 
440 	svuid = u.u_uid;
441 	svgid = u.u_gid;
442 	u.u_uid = u.u_ruid;
443 	u.u_gid = u.u_rgid;
444 	ndp->ni_nameiop = LOOKUP | FOLLOW;
445 	ndp->ni_segflg = UIO_USERSPACE;
446 	ndp->ni_dirp = uap->fname;
447 	ip = namei(ndp);
448 	if (ip != NULL) {
449 		if ((uap->fmode&R_OK) && access(ip, IREAD))
450 			goto done;
451 		if ((uap->fmode&W_OK) && access(ip, IWRITE))
452 			goto done;
453 		if ((uap->fmode&X_OK) && access(ip, IEXEC))
454 			goto done;
455 done:
456 		iput(ip);
457 	}
458 	u.u_uid = svuid;
459 	u.u_gid = svgid;
460 }
461 
462 /*
463  * Stat system call.  This version follows links.
464  */
465 stat()
466 {
467 
468 	stat1(FOLLOW);
469 }
470 
471 /*
472  * Lstat system call.  This version does not follow links.
473  */
474 lstat()
475 {
476 
477 	stat1(NOFOLLOW);
478 }
479 
480 stat1(follow)
481 	int follow;
482 {
483 	register struct inode *ip;
484 	register struct a {
485 		char	*fname;
486 		struct stat *ub;
487 	} *uap = (struct a *)u.u_ap;
488 	struct stat sb;
489 	register struct nameidata *ndp = &u.u_nd;
490 
491 	ndp->ni_nameiop = LOOKUP | follow;
492 	ndp->ni_segflg = UIO_USERSPACE;
493 	ndp->ni_dirp = uap->fname;
494 	ip = namei(ndp);
495 	if (ip == NULL)
496 		return;
497 	(void) ino_stat(ip, &sb);
498 	iput(ip);
499 	u.u_error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
500 }
501 
502 /*
503  * Return target name of a symbolic link
504  */
505 readlink()
506 {
507 	register struct inode *ip;
508 	register struct a {
509 		char	*name;
510 		char	*buf;
511 		int	count;
512 	} *uap = (struct a *)u.u_ap;
513 	register struct nameidata *ndp = &u.u_nd;
514 	int resid;
515 
516 	ndp->ni_nameiop = LOOKUP;
517 	ndp->ni_segflg = UIO_USERSPACE;
518 	ndp->ni_dirp = uap->name;
519 	ip = namei(ndp);
520 	if (ip == NULL)
521 		return;
522 	if ((ip->i_mode&IFMT) != IFLNK) {
523 		u.u_error = EINVAL;
524 		goto out;
525 	}
526 	u.u_error = rdwri(UIO_READ, ip, uap->buf, uap->count, 0, 0, &resid);
527 out:
528 	iput(ip);
529 	u.u_r.r_val1 = uap->count - resid;
530 }
531 
532 /*
533  * Change mode of a file given path name.
534  */
535 chmod()
536 {
537 	struct inode *ip;
538 	struct a {
539 		char	*fname;
540 		int	fmode;
541 	} *uap = (struct a *)u.u_ap;
542 
543 	if ((ip = owner(uap->fname, FOLLOW)) == NULL)
544 		return;
545 	u.u_error = chmod1(ip, uap->fmode);
546 	iput(ip);
547 }
548 
549 /*
550  * Change mode of a file given a file descriptor.
551  */
552 fchmod()
553 {
554 	struct a {
555 		int	fd;
556 		int	fmode;
557 	} *uap = (struct a *)u.u_ap;
558 	register struct inode *ip;
559 	register struct file *fp;
560 
561 	fp = getinode(uap->fd);
562 	if (fp == NULL)
563 		return;
564 	ip = (struct inode *)fp->f_data;
565 	if (u.u_uid != ip->i_uid && !suser())
566 		return;
567 	ILOCK(ip);
568 	u.u_error = chmod1(ip, uap->fmode);
569 	IUNLOCK(ip);
570 }
571 
572 /*
573  * Change the mode on a file.
574  * Inode must be locked before calling.
575  */
576 chmod1(ip, mode)
577 	register struct inode *ip;
578 	register int mode;
579 {
580 
581 	if (ip->i_fs->fs_ronly)
582 		return (EROFS);
583 	ip->i_mode &= ~07777;
584 	if (u.u_uid) {
585 		if ((ip->i_mode & IFMT) != IFDIR)
586 			mode &= ~ISVTX;
587 		if (!groupmember(ip->i_gid))
588 			mode &= ~ISGID;
589 	}
590 	ip->i_mode |= mode&07777;
591 	ip->i_flag |= ICHG;
592 	if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0)
593 		xrele(ip);
594 	return (0);
595 }
596 
597 /*
598  * Set ownership given a path name.
599  */
600 chown()
601 {
602 	struct inode *ip;
603 	struct a {
604 		char	*fname;
605 		int	uid;
606 		int	gid;
607 	} *uap = (struct a *)u.u_ap;
608 
609 	if (!suser() || (ip = owner(uap->fname, NOFOLLOW)) == NULL)
610 		return;
611 	u.u_error = chown1(ip, uap->uid, uap->gid);
612 	iput(ip);
613 }
614 
615 /*
616  * Set ownership given a file descriptor.
617  */
618 fchown()
619 {
620 	struct a {
621 		int	fd;
622 		int	uid;
623 		int	gid;
624 	} *uap = (struct a *)u.u_ap;
625 	register struct inode *ip;
626 	register struct file *fp;
627 
628 	fp = getinode(uap->fd);
629 	if (fp == NULL)
630 		return;
631 	ip = (struct inode *)fp->f_data;
632 	if (!suser())
633 		return;
634 	ILOCK(ip);
635 	u.u_error = chown1(ip, uap->uid, uap->gid);
636 	IUNLOCK(ip);
637 }
638 
639 /*
640  * Perform chown operation on inode ip;
641  * inode must be locked prior to call.
642  */
643 chown1(ip, uid, gid)
644 	register struct inode *ip;
645 	int uid, gid;
646 {
647 #ifdef QUOTA
648 	register long change;
649 #endif
650 
651 	if (ip->i_fs->fs_ronly)
652 		return (EROFS);
653 	if (uid == -1)
654 		uid = ip->i_uid;
655 	if (gid == -1)
656 		gid = ip->i_gid;
657 #ifdef QUOTA
658 	if (ip->i_uid == uid)		/* this just speeds things a little */
659 		change = 0;
660 	else
661 		change = ip->i_blocks;
662 	(void) chkdq(ip, -change, 1);
663 	(void) chkiq(ip->i_dev, ip, ip->i_uid, 1);
664 	dqrele(ip->i_dquot);
665 #endif
666 	ip->i_uid = uid;
667 	ip->i_gid = gid;
668 	ip->i_flag |= ICHG;
669 	if (u.u_ruid != 0)
670 		ip->i_mode &= ~(ISUID|ISGID);
671 #ifdef QUOTA
672 	ip->i_dquot = inoquota(ip);
673 	(void) chkdq(ip, change, 1);
674 	(void) chkiq(ip->i_dev, (struct inode *)NULL, uid, 1);
675 	return (u.u_error);		/* should == 0 ALWAYS !! */
676 #else
677 	return (0);
678 #endif
679 }
680 
681 utimes()
682 {
683 	register struct a {
684 		char	*fname;
685 		struct	timeval *tptr;
686 	} *uap = (struct a *)u.u_ap;
687 	register struct inode *ip;
688 	struct timeval tv[2];
689 
690 	if ((ip = owner(uap->fname, FOLLOW)) == NULL)
691 		return;
692 	if (ip->i_fs->fs_ronly) {
693 		u.u_error = EROFS;
694 		iput(ip);
695 		return;
696 	}
697 	u.u_error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv));
698 	if (u.u_error == 0) {
699 		ip->i_flag |= IACC|IUPD|ICHG;
700 		iupdat(ip, &tv[0], &tv[1], 0);
701 	}
702 	iput(ip);
703 }
704 
705 /*
706  * Flush any pending I/O.
707  */
708 sync()
709 {
710 
711 	update();
712 }
713 
714 /*
715  * Truncate a file given its path name.
716  */
717 truncate()
718 {
719 	struct a {
720 		char	*fname;
721 		u_long	length;
722 	} *uap = (struct a *)u.u_ap;
723 	struct inode *ip;
724 	register struct nameidata *ndp = &u.u_nd;
725 
726 	ndp->ni_nameiop = LOOKUP | FOLLOW;
727 	ndp->ni_segflg = UIO_USERSPACE;
728 	ndp->ni_dirp = uap->fname;
729 	ip = namei(ndp);
730 	if (ip == NULL)
731 		return;
732 	if (access(ip, IWRITE))
733 		goto bad;
734 	if ((ip->i_mode&IFMT) == IFDIR) {
735 		u.u_error = EISDIR;
736 		goto bad;
737 	}
738 	itrunc(ip, uap->length);
739 bad:
740 	iput(ip);
741 }
742 
743 /*
744  * Truncate a file given a file descriptor.
745  */
746 ftruncate()
747 {
748 	struct a {
749 		int	fd;
750 		u_long	length;
751 	} *uap = (struct a *)u.u_ap;
752 	struct inode *ip;
753 	struct file *fp;
754 
755 	fp = getinode(uap->fd);
756 	if (fp == NULL)
757 		return;
758 	if ((fp->f_flag&FWRITE) == 0) {
759 		u.u_error = EINVAL;
760 		return;
761 	}
762 	ip = (struct inode *)fp->f_data;
763 	ILOCK(ip);
764 	itrunc(ip, uap->length);
765 	IUNLOCK(ip);
766 }
767 
768 /*
769  * Synch an open file.
770  */
771 fsync()
772 {
773 	struct a {
774 		int	fd;
775 	} *uap = (struct a *)u.u_ap;
776 	struct inode *ip;
777 	struct file *fp;
778 
779 	fp = getinode(uap->fd);
780 	if (fp == NULL)
781 		return;
782 	ip = (struct inode *)fp->f_data;
783 	ILOCK(ip);
784 	syncip(ip);
785 	IUNLOCK(ip);
786 }
787 
788 /*
789  * Rename system call.
790  * 	rename("foo", "bar");
791  * is essentially
792  *	unlink("bar");
793  *	link("foo", "bar");
794  *	unlink("foo");
795  * but ``atomically''.  Can't do full commit without saving state in the
796  * inode on disk which isn't feasible at this time.  Best we can do is
797  * always guarantee the target exists.
798  *
799  * Basic algorithm is:
800  *
801  * 1) Bump link count on source while we're linking it to the
802  *    target.  This also insure the inode won't be deleted out
803  *    from underneath us while we work (it may be truncated by
804  *    a concurrent `trunc' or `open' for creation).
805  * 2) Link source to destination.  If destination already exists,
806  *    delete it first.
807  * 3) Unlink source reference to inode if still around. If a
808  *    directory was moved and the parent of the destination
809  *    is different from the source, patch the ".." entry in the
810  *    directory.
811  *
812  * Source and destination must either both be directories, or both
813  * not be directories.  If target is a directory, it must be empty.
814  */
815 rename()
816 {
817 	struct a {
818 		char	*from;
819 		char	*to;
820 	} *uap = (struct a *)u.u_ap;
821 	register struct inode *ip, *xp, *dp;
822 	struct dirtemplate dirbuf;
823 	int doingdirectory = 0, oldparent = 0, newparent = 0;
824 	register struct nameidata *ndp = &u.u_nd;
825 	int error = 0;
826 
827 	ndp->ni_nameiop = DELETE | LOCKPARENT;
828 	ndp->ni_segflg = UIO_USERSPACE;
829 	ndp->ni_dirp = uap->from;
830 	ip = namei(ndp);
831 	if (ip == NULL)
832 		return;
833 	dp = ndp->ni_pdir;
834 	if ((ip->i_mode&IFMT) == IFDIR) {
835 		register struct direct *d;
836 
837 		d = &ndp->ni_dent;
838 		/*
839 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
840 		 */
841 		if ((d->d_namlen == 1 && d->d_name[0] == '.') ||
842 		    (d->d_namlen == 2 && bcmp(d->d_name, "..", 2) == 0) ||
843 		    (dp == ip) || (ip->i_flag & IRENAME)) {
844 			iput(dp);
845 			if (dp == ip)
846 				irele(ip);
847 			else
848 				iput(ip);
849 			u.u_error = EINVAL;
850 			return;
851 		}
852 		ip->i_flag |= IRENAME;
853 		oldparent = dp->i_number;
854 		doingdirectory++;
855 	}
856 	iput(dp);
857 
858 	/*
859 	 * 1) Bump link count while we're moving stuff
860 	 *    around.  If we crash somewhere before
861 	 *    completing our work, the link count
862 	 *    may be wrong, but correctable.
863 	 */
864 	ip->i_nlink++;
865 	ip->i_flag |= ICHG;
866 	iupdat(ip, &time, &time, 1);
867 	IUNLOCK(ip);
868 
869 	/*
870 	 * When the target exists, both the directory
871 	 * and target inodes are returned locked.
872 	 */
873 	ndp->ni_nameiop = CREATE | LOCKPARENT | NOCACHE;
874 	ndp->ni_dirp = (caddr_t)uap->to;
875 	xp = namei(ndp);
876 	if (u.u_error) {
877 		error = u.u_error;
878 		goto out;
879 	}
880 	dp = ndp->ni_pdir;
881 	/*
882 	 * If ".." must be changed (ie the directory gets a new
883 	 * parent) then the source directory must not be in the
884 	 * directory heirarchy above the target, as this would
885 	 * orphan everything below the source directory. Also
886 	 * the user must have write permission in the source so
887 	 * as to be able to change "..". We must repeat the call
888 	 * to namei, as the parent directory is unlocked by the
889 	 * call to checkpath().
890 	 */
891 	if (oldparent != dp->i_number)
892 		newparent = dp->i_number;
893 	if (doingdirectory && newparent) {
894 		if (access(ip, IWRITE))
895 			goto bad;
896 		do {
897 			dp = ndp->ni_pdir;
898 			if (xp != NULL)
899 				iput(xp);
900 			u.u_error = checkpath(ip, dp);
901 			if (u.u_error)
902 				goto out;
903 			xp = namei(ndp);
904 			if (u.u_error) {
905 				error = u.u_error;
906 				goto out;
907 			}
908 		} while (dp != ndp->ni_pdir);
909 	}
910 	/*
911 	 * 2) If target doesn't exist, link the target
912 	 *    to the source and unlink the source.
913 	 *    Otherwise, rewrite the target directory
914 	 *    entry to reference the source inode and
915 	 *    expunge the original entry's existence.
916 	 */
917 	if (xp == NULL) {
918 		if (dp->i_dev != ip->i_dev) {
919 			error = EXDEV;
920 			goto bad;
921 		}
922 		/*
923 		 * Account for ".." in new directory.
924 		 * When source and destination have the same
925 		 * parent we don't fool with the link count.
926 		 */
927 		if (doingdirectory && newparent) {
928 			dp->i_nlink++;
929 			dp->i_flag |= ICHG;
930 			iupdat(dp, &time, &time, 1);
931 		}
932 		error = direnter(ip, ndp);
933 		if (error)
934 			goto out;
935 	} else {
936 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) {
937 			error = EXDEV;
938 			goto bad;
939 		}
940 		/*
941 		 * Short circuit rename(foo, foo).
942 		 */
943 		if (xp->i_number == ip->i_number)
944 			goto bad;
945 		/*
946 		 * If the parent directory is "sticky", then the user must
947 		 * own the parent directory, or the destination of the rename,
948 		 * otherwise the destination may not be changed (except by
949 		 * root). This implements append-only directories.
950 		 */
951 		if ((dp->i_mode & ISVTX) && u.u_uid != 0 &&
952 		    u.u_uid != dp->i_uid && xp->i_uid != u.u_uid) {
953 			error = EPERM;
954 			goto bad;
955 		}
956 		/*
957 		 * Target must be empty if a directory
958 		 * and have no links to it.
959 		 * Also, insure source and target are
960 		 * compatible (both directories, or both
961 		 * not directories).
962 		 */
963 		if ((xp->i_mode&IFMT) == IFDIR) {
964 			if (!dirempty(xp, dp->i_number) || xp->i_nlink > 2) {
965 				error = ENOTEMPTY;
966 				goto bad;
967 			}
968 			if (!doingdirectory) {
969 				error = ENOTDIR;
970 				goto bad;
971 			}
972 			cacheinval(dp);
973 		} else if (doingdirectory) {
974 			error = EISDIR;
975 			goto bad;
976 		}
977 		dirrewrite(dp, ip, ndp);
978 		if (u.u_error) {
979 			error = u.u_error;
980 			goto bad1;
981 		}
982 		/*
983 		 * Adjust the link count of the target to
984 		 * reflect the dirrewrite above.  If this is
985 		 * a directory it is empty and there are
986 		 * no links to it, so we can squash the inode and
987 		 * any space associated with it.  We disallowed
988 		 * renaming over top of a directory with links to
989 		 * it above, as the remaining link would point to
990 		 * a directory without "." or ".." entries.
991 		 */
992 		xp->i_nlink--;
993 		if (doingdirectory) {
994 			if (--xp->i_nlink != 0)
995 				panic("rename: linked directory");
996 			itrunc(xp, (u_long)0);
997 		}
998 		xp->i_flag |= ICHG;
999 		iput(xp);
1000 		xp = NULL;
1001 	}
1002 
1003 	/*
1004 	 * 3) Unlink the source.
1005 	 */
1006 	ndp->ni_nameiop = DELETE | LOCKPARENT;
1007 	ndp->ni_segflg = UIO_USERSPACE;
1008 	ndp->ni_dirp = uap->from;
1009 	xp = namei(ndp);
1010 	if (xp != NULL)
1011 		dp = ndp->ni_pdir;
1012 	else
1013 		dp = NULL;
1014 	/*
1015 	 * Insure that the directory entry still exists and has not
1016 	 * changed while the new name has been entered. If the source is
1017 	 * a file then the entry may have been unlinked or renamed. In
1018 	 * either case there is no further work to be done. If the source
1019 	 * is a directory then it cannot have been rmdir'ed; its link
1020 	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1021 	 * The IRENAME flag insures that it cannot be moved by another
1022 	 * rename.
1023 	 */
1024 	if (xp != ip) {
1025 		if (doingdirectory)
1026 			panic("rename: lost dir entry");
1027 	} else {
1028 		/*
1029 		 * If the source is a directory with a
1030 		 * new parent, the link count of the old
1031 		 * parent directory must be decremented
1032 		 * and ".." set to point to the new parent.
1033 		 */
1034 		if (doingdirectory && newparent) {
1035 			dp->i_nlink--;
1036 			dp->i_flag |= ICHG;
1037 			error = rdwri(UIO_READ, xp, (caddr_t)&dirbuf,
1038 				sizeof (struct dirtemplate), (off_t)0, 1,
1039 				(int *)0);
1040 			if (error == 0) {
1041 				if (dirbuf.dotdot_namlen != 2 ||
1042 				    dirbuf.dotdot_name[0] != '.' ||
1043 				    dirbuf.dotdot_name[1] != '.') {
1044 					printf("rename: mangled dir\n");
1045 				} else {
1046 					dirbuf.dotdot_ino = newparent;
1047 					(void) rdwri(UIO_WRITE, xp,
1048 					    (caddr_t)&dirbuf,
1049 					    sizeof (struct dirtemplate),
1050 					    (off_t)0, 1, (int *)0);
1051 					cacheinval(dp);
1052 				}
1053 			}
1054 		}
1055 		if (dirremove(ndp)) {
1056 			xp->i_nlink--;
1057 			xp->i_flag |= ICHG;
1058 		}
1059 		xp->i_flag &= ~IRENAME;
1060 		if (error == 0)		/* XXX conservative */
1061 			error = u.u_error;
1062 	}
1063 	if (dp)
1064 		iput(dp);
1065 	if (xp)
1066 		iput(xp);
1067 	irele(ip);
1068 	if (error)
1069 		u.u_error = error;
1070 	return;
1071 
1072 bad:
1073 	iput(dp);
1074 bad1:
1075 	if (xp)
1076 		iput(xp);
1077 out:
1078 	ip->i_nlink--;
1079 	ip->i_flag |= ICHG;
1080 	irele(ip);
1081 	if (error)
1082 		u.u_error = error;
1083 }
1084 
1085 /*
1086  * Make a new file.
1087  */
1088 struct inode *
1089 maknode(mode, ndp)
1090 	int mode;
1091 	register struct nameidata *ndp;
1092 {
1093 	register struct inode *ip;
1094 	register struct inode *pdir = ndp->ni_pdir;
1095 	ino_t ipref;
1096 
1097 	if ((mode & IFMT) == IFDIR)
1098 		ipref = dirpref(pdir->i_fs);
1099 	else
1100 		ipref = pdir->i_number;
1101 	ip = ialloc(pdir, ipref, mode);
1102 	if (ip == NULL) {
1103 		iput(pdir);
1104 		return (NULL);
1105 	}
1106 #ifdef QUOTA
1107 	if (ip->i_dquot != NODQUOT)
1108 		panic("maknode: dquot");
1109 #endif
1110 	ip->i_flag |= IACC|IUPD|ICHG;
1111 	if ((mode & IFMT) == 0)
1112 		mode |= IFREG;
1113 	ip->i_mode = mode & ~u.u_cmask;
1114 	ip->i_nlink = 1;
1115 	ip->i_uid = u.u_uid;
1116 	ip->i_gid = pdir->i_gid;
1117 	if (ip->i_mode & ISGID && !groupmember(ip->i_gid))
1118 		ip->i_mode &= ~ISGID;
1119 #ifdef QUOTA
1120 	ip->i_dquot = inoquota(ip);
1121 #endif
1122 
1123 	/*
1124 	 * Make sure inode goes to disk before directory entry.
1125 	 */
1126 	iupdat(ip, &time, &time, 1);
1127 	u.u_error = direnter(ip, ndp);
1128 	if (u.u_error) {
1129 		/*
1130 		 * Write error occurred trying to update directory
1131 		 * so must deallocate the inode.
1132 		 */
1133 		ip->i_nlink = 0;
1134 		ip->i_flag |= ICHG;
1135 		iput(ip);
1136 		return (NULL);
1137 	}
1138 	return (ip);
1139 }
1140 
1141 /*
1142  * A virgin directory (no blushing please).
1143  */
1144 struct dirtemplate mastertemplate = {
1145 	0, 12, 1, ".",
1146 	0, DIRBLKSIZ - 12, 2, ".."
1147 };
1148 
1149 /*
1150  * Mkdir system call
1151  */
1152 mkdir()
1153 {
1154 	struct a {
1155 		char	*name;
1156 		int	dmode;
1157 	} *uap = (struct a *)u.u_ap;
1158 	register struct inode *ip, *dp;
1159 	struct dirtemplate dirtemplate;
1160 	register struct nameidata *ndp = &u.u_nd;
1161 
1162 	ndp->ni_nameiop = CREATE;
1163 	ndp->ni_segflg = UIO_USERSPACE;
1164 	ndp->ni_dirp = uap->name;
1165 	ip = namei(ndp);
1166 	if (u.u_error)
1167 		return;
1168 	if (ip != NULL) {
1169 		iput(ip);
1170 		u.u_error = EEXIST;
1171 		return;
1172 	}
1173 	dp = ndp->ni_pdir;
1174 	uap->dmode &= 0777;
1175 	uap->dmode |= IFDIR;
1176 	/*
1177 	 * Must simulate part of maknode here
1178 	 * in order to acquire the inode, but
1179 	 * not have it entered in the parent
1180 	 * directory.  The entry is made later
1181 	 * after writing "." and ".." entries out.
1182 	 */
1183 	ip = ialloc(dp, dirpref(dp->i_fs), uap->dmode);
1184 	if (ip == NULL) {
1185 		iput(dp);
1186 		return;
1187 	}
1188 #ifdef QUOTA
1189 	if (ip->i_dquot != NODQUOT)
1190 		panic("mkdir: dquot");
1191 #endif
1192 	ip->i_flag |= IACC|IUPD|ICHG;
1193 	ip->i_mode = uap->dmode & ~u.u_cmask;
1194 	ip->i_nlink = 2;
1195 	ip->i_uid = u.u_uid;
1196 	ip->i_gid = dp->i_gid;
1197 #ifdef QUOTA
1198 	ip->i_dquot = inoquota(ip);
1199 #endif
1200 	iupdat(ip, &time, &time, 1);
1201 
1202 	/*
1203 	 * Bump link count in parent directory
1204 	 * to reflect work done below.  Should
1205 	 * be done before reference is created
1206 	 * so reparation is possible if we crash.
1207 	 */
1208 	dp->i_nlink++;
1209 	dp->i_flag |= ICHG;
1210 	iupdat(dp, &time, &time, 1);
1211 
1212 	/*
1213 	 * Initialize directory with "."
1214 	 * and ".." from static template.
1215 	 */
1216 	dirtemplate = mastertemplate;
1217 	dirtemplate.dot_ino = ip->i_number;
1218 	dirtemplate.dotdot_ino = dp->i_number;
1219 	u.u_error = rdwri(UIO_WRITE, ip, (caddr_t)&dirtemplate,
1220 		sizeof (dirtemplate), (off_t)0, 1, (int *)0);
1221 	if (u.u_error) {
1222 		dp->i_nlink--;
1223 		dp->i_flag |= ICHG;
1224 		goto bad;
1225 	}
1226 	if (DIRBLKSIZ > ip->i_fs->fs_fsize)
1227 		panic("mkdir: blksize");     /* XXX - should grow with bmap() */
1228 	else
1229 		ip->i_size = DIRBLKSIZ;
1230 	/*
1231 	 * Directory all set up, now
1232 	 * install the entry for it in
1233 	 * the parent directory.
1234 	 */
1235 	u.u_error = direnter(ip, ndp);
1236 	dp = NULL;
1237 	if (u.u_error) {
1238 		ndp->ni_nameiop = LOOKUP | NOCACHE;
1239 		ndp->ni_segflg = UIO_USERSPACE;
1240 		ndp->ni_dirp = uap->name;
1241 		dp = namei(ndp);
1242 		if (dp) {
1243 			dp->i_nlink--;
1244 			dp->i_flag |= ICHG;
1245 		}
1246 	}
1247 bad:
1248 	/*
1249 	 * No need to do an explicit itrunc here,
1250 	 * irele will do this for us because we set
1251 	 * the link count to 0.
1252 	 */
1253 	if (u.u_error) {
1254 		ip->i_nlink = 0;
1255 		ip->i_flag |= ICHG;
1256 	}
1257 	if (dp)
1258 		iput(dp);
1259 	iput(ip);
1260 }
1261 
1262 /*
1263  * Rmdir system call.
1264  */
1265 rmdir()
1266 {
1267 	struct a {
1268 		char	*name;
1269 	} *uap = (struct a *)u.u_ap;
1270 	register struct inode *ip, *dp;
1271 	register struct nameidata *ndp = &u.u_nd;
1272 
1273 	ndp->ni_nameiop = DELETE | LOCKPARENT;
1274 	ndp->ni_segflg = UIO_USERSPACE;
1275 	ndp->ni_dirp = uap->name;
1276 	ip = namei(ndp);
1277 	if (ip == NULL)
1278 		return;
1279 	dp = ndp->ni_pdir;
1280 	/*
1281 	 * No rmdir "." please.
1282 	 */
1283 	if (dp == ip) {
1284 		irele(dp);
1285 		iput(ip);
1286 		u.u_error = EINVAL;
1287 		return;
1288 	}
1289 	if ((ip->i_mode&IFMT) != IFDIR) {
1290 		u.u_error = ENOTDIR;
1291 		goto out;
1292 	}
1293 	/*
1294 	 * Don't remove a mounted on directory.
1295 	 */
1296 	if (ip->i_dev != dp->i_dev) {
1297 		u.u_error = EBUSY;
1298 		goto out;
1299 	}
1300 	/*
1301 	 * Verify the directory is empty (and valid).
1302 	 * (Rmdir ".." won't be valid since
1303 	 *  ".." will contain a reference to
1304 	 *  the current directory and thus be
1305 	 *  non-empty.)
1306 	 */
1307 	if (ip->i_nlink != 2 || !dirempty(ip, dp->i_number)) {
1308 		u.u_error = ENOTEMPTY;
1309 		goto out;
1310 	}
1311 	/*
1312 	 * Delete reference to directory before purging
1313 	 * inode.  If we crash in between, the directory
1314 	 * will be reattached to lost+found,
1315 	 */
1316 	if (dirremove(ndp) == 0)
1317 		goto out;
1318 	dp->i_nlink--;
1319 	dp->i_flag |= ICHG;
1320 	cacheinval(dp);
1321 	iput(dp);
1322 	dp = NULL;
1323 	/*
1324 	 * Truncate inode.  The only stuff left
1325 	 * in the directory is "." and "..".  The
1326 	 * "." reference is inconsequential since
1327 	 * we're quashing it.  The ".." reference
1328 	 * has already been adjusted above.  We've
1329 	 * removed the "." reference and the reference
1330 	 * in the parent directory, but there may be
1331 	 * other hard links so decrement by 2 and
1332 	 * worry about them later.
1333 	 */
1334 	ip->i_nlink -= 2;
1335 	itrunc(ip, (u_long)0);
1336 	cacheinval(ip);
1337 out:
1338 	if (dp)
1339 		iput(dp);
1340 	iput(ip);
1341 }
1342 
1343 struct file *
1344 getinode(fdes)
1345 	int fdes;
1346 {
1347 	struct file *fp;
1348 
1349 	if ((unsigned)fdes >= NOFILE || (fp = u.u_ofile[fdes]) == NULL) {
1350 		u.u_error = EBADF;
1351 		return ((struct file *)0);
1352 	}
1353 	if (fp->f_type != DTYPE_INODE) {
1354 		u.u_error = EINVAL;
1355 		return ((struct file *)0);
1356 	}
1357 	return (fp);
1358 }
1359 
1360 /*
1361  * mode mask for creation of files
1362  */
1363 umask()
1364 {
1365 	register struct a {
1366 		int	mask;
1367 	} *uap = (struct a *)u.u_ap;
1368 
1369 	u.u_r.r_val1 = u.u_cmask;
1370 	u.u_cmask = uap->mask & 07777;
1371 }
1372