xref: /dflybsd-src/sys/vfs/procfs/procfs_vnops.c (revision e90a7c45c3303ed54c0fde732b2ba32dc80ffd9b)
1 /*
2  * Copyright (c) 1993, 1995 Jan-Simon Pendry
3  * Copyright (c) 1993, 1995
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Jan-Simon Pendry.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)procfs_vnops.c	8.18 (Berkeley) 5/21/95
38  *
39  * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
40  */
41 
42 /*
43  * procfs vnode interface
44  */
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/fcntl.h>
52 #include <sys/proc.h>
53 #include <sys/priv.h>
54 #include <sys/signalvar.h>
55 #include <sys/vnode.h>
56 #include <sys/uio.h>
57 #include <sys/mount.h>
58 #include <sys/namei.h>
59 #include <sys/dirent.h>
60 #include <sys/malloc.h>
61 #include <sys/reg.h>
62 #include <vm/vm_zone.h>
63 #include <vfs/procfs/procfs.h>
64 #include <sys/pioctl.h>
65 
66 #include <sys/spinlock2.h>
67 
68 #include <machine/limits.h>
69 
70 static int	procfs_access (struct vop_access_args *);
71 static int	procfs_badop (struct vop_generic_args *);
72 static int	procfs_bmap (struct vop_bmap_args *);
73 static int	procfs_close (struct vop_close_args *);
74 static int	procfs_getattr (struct vop_getattr_args *);
75 static int	procfs_inactive (struct vop_inactive_args *);
76 static int	procfs_ioctl (struct vop_ioctl_args *);
77 static int	procfs_lookup (struct vop_old_lookup_args *);
78 static int	procfs_open (struct vop_open_args *);
79 static int	procfs_print (struct vop_print_args *);
80 static int	procfs_readdir (struct vop_readdir_args *);
81 static int	procfs_readlink (struct vop_readlink_args *);
82 static int	procfs_reclaim (struct vop_reclaim_args *);
83 static int	procfs_setattr (struct vop_setattr_args *);
84 
85 static int	procfs_readdir_proc(struct vop_readdir_args *);
86 static int	procfs_readdir_root(struct vop_readdir_args *);
87 
88 /*
89  * procfs vnode operations.
90  */
91 struct vop_ops procfs_vnode_vops = {
92 	.vop_default =		vop_defaultop,
93 	.vop_access =		procfs_access,
94 	.vop_advlock =		(void *)procfs_badop,
95 	.vop_bmap =		procfs_bmap,
96 	.vop_close =		procfs_close,
97 	.vop_old_create =	(void *)procfs_badop,
98 	.vop_getattr =		procfs_getattr,
99 	.vop_inactive =		procfs_inactive,
100 	.vop_old_link =		(void *)procfs_badop,
101 	.vop_old_lookup =	procfs_lookup,
102 	.vop_old_mkdir =	(void *)procfs_badop,
103 	.vop_old_mknod =	(void *)procfs_badop,
104 	.vop_open =		procfs_open,
105 	.vop_pathconf =		vop_stdpathconf,
106 	.vop_print =		procfs_print,
107 	.vop_read =		procfs_rw,
108 	.vop_readdir =		procfs_readdir,
109 	.vop_readlink =		procfs_readlink,
110 	.vop_reclaim =		procfs_reclaim,
111 	.vop_old_remove =	(void *)procfs_badop,
112 	.vop_old_rename =	(void *)procfs_badop,
113 	.vop_old_rmdir =	(void *)procfs_badop,
114 	.vop_setattr =		procfs_setattr,
115 	.vop_old_symlink =	(void *)procfs_badop,
116 	.vop_write =		(void *)procfs_rw,
117 	.vop_ioctl =		procfs_ioctl
118 };
119 
120 
121 /*
122  * This is a list of the valid names in the
123  * process-specific sub-directories.  It is
124  * used in procfs_lookup and procfs_readdir
125  */
126 static struct proc_target {
127 	u_char	pt_type;
128 	u_char	pt_namlen;
129 	char	*pt_name;
130 	pfstype	pt_pfstype;
131 	int	(*pt_valid) (struct lwp *p);
132 } proc_targets[] = {
133 #define N(s) sizeof(s)-1, s
134 	/*	  name		type		validp */
135 	{ DT_DIR, N("."),	Pproc,		NULL },
136 	{ DT_DIR, N(".."),	Proot,		NULL },
137 	{ DT_REG, N("mem"),	Pmem,		NULL },
138 	{ DT_REG, N("regs"),	Pregs,		procfs_validregs },
139 	{ DT_REG, N("fpregs"),	Pfpregs,	procfs_validfpregs },
140 	{ DT_REG, N("dbregs"),	Pdbregs,	procfs_validdbregs },
141 	{ DT_REG, N("ctl"),	Pctl,		NULL },
142 	{ DT_REG, N("status"),	Pstatus,	NULL },
143 	{ DT_REG, N("note"),	Pnote,		NULL },
144 	{ DT_REG, N("notepg"),	Pnotepg,	NULL },
145 	{ DT_REG, N("map"), 	Pmap,		procfs_validmap },
146 	{ DT_REG, N("etype"),	Ptype,		procfs_validtype },
147 	{ DT_REG, N("cmdline"),	Pcmdline,	NULL },
148 	{ DT_REG, N("rlimit"),	Prlimit,	NULL },
149 	{ DT_LNK, N("file"),	Pfile,		NULL },
150 #undef N
151 };
152 static const int nproc_targets = NELEM(proc_targets);
153 
154 static pid_t atopid (const char *, u_int);
155 
156 /*
157  * set things up for doing i/o on
158  * the pfsnode (vp).  (vp) is locked
159  * on entry, and should be left locked
160  * on exit.
161  *
162  * for procfs we don't need to do anything
163  * in particular for i/o.  all that is done
164  * is to support exclusive open on process
165  * memory images.
166  *
167  * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
168  *	       struct file *a_fp)
169  */
170 static int
171 procfs_open(struct vop_open_args *ap)
172 {
173 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
174 	struct proc *p1, *p2;
175 
176 	p2 = PFIND(pfs->pfs_pid);
177 	if (p2 == NULL)
178 		return (ENOENT);
179 	if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred))
180 		return (ENOENT);
181 
182 	switch (pfs->pfs_type) {
183 	case Pmem:
184 		if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
185 		    ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
186 			return (EBUSY);
187 
188 		p1 = curproc;
189 		KKASSERT(p1);
190 		/* Can't trace a process that's currently exec'ing. */
191 		if ((p2->p_flag & P_INEXEC) != 0)
192 			return EAGAIN;
193 		if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred))
194 			return (EPERM);
195 
196 		if (ap->a_mode & FWRITE)
197 			pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
198 
199 		break;
200 
201 	default:
202 		break;
203 	}
204 
205 	return (vop_stdopen(ap));
206 }
207 
208 /*
209  * close the pfsnode (vp) after doing i/o.
210  * (vp) is not locked on entry or exit.
211  *
212  * nothing to do for procfs other than undo
213  * any exclusive open flag (see _open above).
214  *
215  * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred)
216  */
217 static int
218 procfs_close(struct vop_close_args *ap)
219 {
220 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
221 	struct proc *p;
222 
223 	switch (pfs->pfs_type) {
224 	case Pmem:
225 		if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
226 			pfs->pfs_flags &= ~(FWRITE|O_EXCL);
227 		/*
228 		 * v_opencount determines the last real close on the vnode.
229 		 *
230 		 * If this is the last close, then it checks to see if
231 		 * the target process has PF_LINGER set in p_pfsflags,
232 		 * if this is *not* the case, then the process' stop flags
233 		 * are cleared, and the process is woken up.  This is
234 		 * to help prevent the case where a process has been
235 		 * told to stop on an event, but then the requesting process
236 		 * has gone away or forgotten about it.
237 		 */
238 		if ((ap->a_vp->v_opencount < 2)
239 		    && (p = pfind(pfs->pfs_pid))
240 		    && !(p->p_pfsflags & PF_LINGER)) {
241 			spin_lock(&p->p_spin);
242 			p->p_stops = 0;
243 			p->p_step = 0;
244 			spin_unlock(&p->p_spin);
245 			wakeup(&p->p_step);
246 		}
247 		break;
248 	default:
249 		break;
250 	}
251 
252 	return (vop_stdclose(ap));
253 }
254 
255 /*
256  * do an ioctl operation on a pfsnode (vp).
257  * (vp) is not locked on entry or exit.
258  */
259 static int
260 procfs_ioctl(struct vop_ioctl_args *ap)
261 {
262 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
263 	struct proc *procp;
264 	struct proc *p;
265 	int error;
266 	int signo;
267 	struct procfs_status *psp;
268 	unsigned char flags;
269 
270 	procp = pfind(pfs->pfs_pid);
271 	if (procp == NULL)
272 		return ENOTTY;
273 	p = curproc;
274 	if (p == NULL)
275 		return EINVAL;
276 
277 	/* Can't trace a process that's currently exec'ing. */
278 	if ((procp->p_flag & P_INEXEC) != 0)
279 		return EAGAIN;
280 	if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred))
281 		return EPERM;
282 
283 	switch (ap->a_command) {
284 	case PIOCBIS:
285 	  procp->p_stops |= *(unsigned int*)ap->a_data;
286 	  break;
287 	case PIOCBIC:
288 	  procp->p_stops &= ~*(unsigned int*)ap->a_data;
289 	  break;
290 	case PIOCSFL:
291 	  /*
292 	   * NFLAGS is "non-suser_xxx flags" -- currently, only
293 	   * PFS_ISUGID ("ignore set u/g id");
294 	   */
295 #define NFLAGS	(PF_ISUGID)
296 	  flags = (unsigned char)*(unsigned int*)ap->a_data;
297 	  if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)))
298 	    return error;
299 	  procp->p_pfsflags = flags;
300 	  break;
301 	case PIOCGFL:
302 	  *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
303 	  break;
304 	case PIOCSTATUS:
305 	  /*
306 	   * NOTE: syscall entry deals with stopevents and may run without
307 	   *	   the MP lock.
308 	   */
309 	  psp = (struct procfs_status *)ap->a_data;
310 	  psp->flags = procp->p_pfsflags;
311 	  psp->events = procp->p_stops;
312 	  spin_lock(&procp->p_spin);
313 	  if (procp->p_step) {
314 	    psp->state = 0;
315 	    psp->why = procp->p_stype;
316 	    psp->val = procp->p_xstat;
317 	    spin_unlock(&procp->p_spin);
318 	  } else {
319 	    psp->state = 1;
320 	    spin_unlock(&procp->p_spin);
321 	    psp->why = 0;	/* Not defined values */
322 	    psp->val = 0;	/* Not defined values */
323 	  }
324 	  break;
325 	case PIOCWAIT:
326 	  /*
327 	   * NOTE: syscall entry deals with stopevents and may run without
328 	   *	   the MP lock.
329 	   */
330 	  psp = (struct procfs_status *)ap->a_data;
331 	  spin_lock(&procp->p_spin);
332 	  while (procp->p_step == 0) {
333 	    tsleep_interlock(&procp->p_stype, PCATCH);
334 	    spin_unlock(&procp->p_spin);
335 	    error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, "piocwait", 0);
336 	    if (error)
337 	      return error;
338 	    spin_lock(&procp->p_spin);
339 	  }
340 	  spin_unlock(&procp->p_spin);
341 	  psp->state = 1;	/* It stopped */
342 	  psp->flags = procp->p_pfsflags;
343 	  psp->events = procp->p_stops;
344 	  psp->why = procp->p_stype;	/* why it stopped */
345 	  psp->val = procp->p_xstat;	/* any extra info */
346 	  break;
347 	case PIOCCONT:	/* Restart a proc */
348 	  /*
349 	   * NOTE: syscall entry deals with stopevents and may run without
350 	   *	   the MP lock.  However, the caller is presumably interlocked
351 	   *	   by having waited.
352 	   */
353 	  if (procp->p_step == 0)
354 	    return EINVAL;	/* Can only start a stopped process */
355 	  if ((signo = *(int*)ap->a_data) != 0) {
356 	    if (signo >= NSIG || signo <= 0)
357 	      return EINVAL;
358 	    ksignal(procp, signo);
359 	  }
360 	  procp->p_step = 0;
361 	  wakeup(&procp->p_step);
362 	  break;
363 	default:
364 	  return (ENOTTY);
365 	}
366 	return 0;
367 }
368 
369 /*
370  * do block mapping for pfsnode (vp).
371  * since we don't use the buffer cache
372  * for procfs this function should never
373  * be called.  in any case, it's not clear
374  * what part of the kernel ever makes use
375  * of this function.  for sanity, this is the
376  * usual no-op bmap, although returning
377  * (EIO) would be a reasonable alternative.
378  *
379  * XXX mmap assumes buffer cache operation
380  *
381  * procfs_bmap(struct vnode *a_vp, off_t a_loffset,
382  *		off_t *a_doffsetp, int *a_runp, int *a_runb)
383  */
384 static int
385 procfs_bmap(struct vop_bmap_args *ap)
386 {
387 	if (ap->a_doffsetp != NULL)
388 		*ap->a_doffsetp = ap->a_loffset;
389 	if (ap->a_runp != NULL)
390 		*ap->a_runp = 0;
391 	if (ap->a_runb != NULL)
392 		*ap->a_runb = 0;
393 	return (0);
394 }
395 
396 /*
397  * procfs_inactive is called when the pfsnode
398  * is vrele'd and the reference count goes
399  * to zero.  (vp) will be on the vnode free
400  * list, so to get it back vget() must be
401  * used.
402  *
403  * (vp) is locked on entry, but must be unlocked on exit.
404  *
405  * procfs_inactive(struct vnode *a_vp)
406  */
407 static int
408 procfs_inactive(struct vop_inactive_args *ap)
409 {
410 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
411 
412 	if (pfs->pfs_pid & PFS_DEAD)
413 		vrecycle(ap->a_vp);
414 	return (0);
415 }
416 
417 /*
418  * _reclaim is called when getnewvnode()
419  * wants to make use of an entry on the vnode
420  * free list.  at this time the filesystem needs
421  * to free any private data and remove the node
422  * from any private lists.
423  *
424  * procfs_reclaim(struct vnode *a_vp)
425  */
426 static int
427 procfs_reclaim(struct vop_reclaim_args *ap)
428 {
429 	return (procfs_freevp(ap->a_vp));
430 }
431 
432 /*
433  * _print is used for debugging.
434  * just print a readable description
435  * of (vp).
436  *
437  * procfs_print(struct vnode *a_vp)
438  */
439 static int
440 procfs_print(struct vop_print_args *ap)
441 {
442 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
443 
444 	kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
445 	    pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
446 	return (0);
447 }
448 
449 /*
450  * generic entry point for unsupported operations
451  */
452 static int
453 procfs_badop(struct vop_generic_args *ap)
454 {
455 	return (EIO);
456 }
457 
458 /*
459  * Invent attributes for pfsnode (vp) and store
460  * them in (vap).
461  * Directories lengths are returned as zero since
462  * any real length would require the genuine size
463  * to be computed, and nothing cares anyway.
464  *
465  * this is relatively minimal for procfs.
466  *
467  * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap)
468  */
469 static int
470 procfs_getattr(struct vop_getattr_args *ap)
471 {
472 	struct pfsnode *pfs = VTOPFS(ap->a_vp);
473 	struct vattr *vap = ap->a_vap;
474 	struct proc *procp;
475 	int error;
476 
477 	/*
478 	 * First make sure that the process and its credentials
479 	 * still exist.
480 	 */
481 	switch (pfs->pfs_type) {
482 	case Proot:
483 	case Pcurproc:
484 		procp = 0;
485 		break;
486 
487 	default:
488 		procp = PFIND(pfs->pfs_pid);
489 		if (procp == NULL || procp->p_ucred == NULL)
490 			return (ENOENT);
491 	}
492 
493 	error = 0;
494 
495 	/* start by zeroing out the attributes */
496 	VATTR_NULL(vap);
497 
498 	/* next do all the common fields */
499 	vap->va_type = ap->a_vp->v_type;
500 	vap->va_mode = pfs->pfs_mode;
501 	vap->va_fileid = pfs->pfs_fileno;
502 	vap->va_flags = 0;
503 	vap->va_blocksize = PAGE_SIZE;
504 	vap->va_bytes = vap->va_size = 0;
505 	vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
506 
507 	/*
508 	 * Make all times be current TOD.
509 	 * It would be possible to get the process start
510 	 * time from the p_stat structure, but there's
511 	 * no "file creation" time stamp anyway, and the
512 	 * p_stat structure is not addressible if u. gets
513 	 * swapped out for that process.
514 	 */
515 	nanotime(&vap->va_ctime);
516 	vap->va_atime = vap->va_mtime = vap->va_ctime;
517 
518 	/*
519 	 * If the process has exercised some setuid or setgid
520 	 * privilege, then rip away read/write permission so
521 	 * that only root can gain access.
522 	 */
523 	switch (pfs->pfs_type) {
524 	case Pctl:
525 	case Pregs:
526 	case Pfpregs:
527 	case Pdbregs:
528 	case Pmem:
529 		if (procp->p_flag & P_SUGID)
530 			vap->va_mode &= ~((VREAD|VWRITE)|
531 					  ((VREAD|VWRITE)>>3)|
532 					  ((VREAD|VWRITE)>>6));
533 		break;
534 	default:
535 		break;
536 	}
537 
538 	/*
539 	 * now do the object specific fields
540 	 *
541 	 * The size could be set from struct reg, but it's hardly
542 	 * worth the trouble, and it puts some (potentially) machine
543 	 * dependent data into this machine-independent code.  If it
544 	 * becomes important then this function should break out into
545 	 * a per-file stat function in the corresponding .c file.
546 	 */
547 
548 	vap->va_nlink = 1;
549 	if (procp) {
550 		vap->va_uid = procp->p_ucred->cr_uid;
551 		vap->va_gid = procp->p_ucred->cr_gid;
552 	}
553 
554 	switch (pfs->pfs_type) {
555 	case Proot:
556 		/*
557 		 * Set nlink to 1 to tell fts(3) we don't actually know.
558 		 */
559 		vap->va_nlink = 1;
560 		vap->va_uid = 0;
561 		vap->va_gid = 0;
562 		vap->va_size = vap->va_bytes = DEV_BSIZE;
563 		break;
564 
565 	case Pcurproc: {
566 		char buf[16];		/* should be enough */
567 		vap->va_uid = 0;
568 		vap->va_gid = 0;
569 		vap->va_size = vap->va_bytes =
570 		    ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
571 		break;
572 	}
573 
574 	case Pproc:
575 		vap->va_nlink = nproc_targets;
576 		vap->va_size = vap->va_bytes = DEV_BSIZE;
577 		break;
578 
579 	case Pfile: {
580 		char *fullpath, *freepath;
581 		error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0);
582 		if (error == 0) {
583 			vap->va_size = strlen(fullpath);
584 			kfree(freepath, M_TEMP);
585 		} else {
586 			vap->va_size = sizeof("unknown") - 1;
587 			error = 0;
588 		}
589 		vap->va_bytes = vap->va_size;
590 		break;
591 	}
592 
593 	case Pmem:
594 		/*
595 		 * If we denied owner access earlier, then we have to
596 		 * change the owner to root - otherwise 'ps' and friends
597 		 * will break even though they are setgid kmem. *SIGH*
598 		 */
599 		if (procp->p_flag & P_SUGID)
600 			vap->va_uid = 0;
601 		else
602 			vap->va_uid = procp->p_ucred->cr_uid;
603 		break;
604 
605 	case Pregs:
606 		vap->va_bytes = vap->va_size = sizeof(struct reg);
607 		break;
608 
609 	case Pfpregs:
610 		vap->va_bytes = vap->va_size = sizeof(struct fpreg);
611 		break;
612 
613         case Pdbregs:
614                 vap->va_bytes = vap->va_size = sizeof(struct dbreg);
615                 break;
616 
617 	case Ptype:
618 	case Pmap:
619 	case Pctl:
620 	case Pstatus:
621 	case Pnote:
622 	case Pnotepg:
623 	case Pcmdline:
624 	case Prlimit:
625 		break;
626 
627 	default:
628 		panic("procfs_getattr");
629 	}
630 
631 	return (error);
632 }
633 
634 /*
635  * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap,
636  *		  struct ucred *a_cred)
637  */
638 static int
639 procfs_setattr(struct vop_setattr_args *ap)
640 {
641 	if (ap->a_vap->va_flags != VNOVAL)
642 		return (EOPNOTSUPP);
643 
644 	/*
645 	 * just fake out attribute setting
646 	 * it's not good to generate an error
647 	 * return, otherwise things like creat()
648 	 * will fail when they try to set the
649 	 * file length to 0.  worse, this means
650 	 * that echo $note > /proc/$pid/note will fail.
651 	 */
652 
653 	return (0);
654 }
655 
656 /*
657  * implement access checking.
658  *
659  * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
660  */
661 static int
662 procfs_access(struct vop_access_args *ap)
663 {
664 	struct vattr vattr;
665 	int error;
666 
667 	error = VOP_GETATTR(ap->a_vp, &vattr);
668 	if (!error)
669 		error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid,
670 				vattr.va_mode, 0);
671 	return (error);
672 }
673 
674 /*
675  * lookup.  this is incredibly complicated in the general case, however
676  * for most pseudo-filesystems very little needs to be done.
677  *
678  * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
679  *		 struct componentname *a_cnp)
680  */
681 static int
682 procfs_lookup(struct vop_old_lookup_args *ap)
683 {
684 	struct componentname *cnp = ap->a_cnp;
685 	struct vnode **vpp = ap->a_vpp;
686 	struct vnode *dvp = ap->a_dvp;
687 	char *pname = cnp->cn_nameptr;
688 	/* struct proc *curp = cnp->cn_proc; */
689 	struct proc_target *pt;
690 	pid_t pid;
691 	struct pfsnode *pfs;
692 	struct proc *p;
693 	struct lwp *lp;
694 	int i;
695 	int error;
696 
697 	*vpp = NULL;
698 
699 	if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)
700 		return (EROFS);
701 
702 	error = 0;
703 	if (cnp->cn_namelen == 1 && *pname == '.') {
704 		*vpp = dvp;
705 		vref(*vpp);
706 		goto out;
707 	}
708 
709 	pfs = VTOPFS(dvp);
710 	switch (pfs->pfs_type) {
711 	case Proot:
712 		if (cnp->cn_flags & CNP_ISDOTDOT)
713 			return (EIO);
714 
715 		if (CNEQ(cnp, "curproc", 7)) {
716 			error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc);
717 			goto out;
718 		}
719 
720 		pid = atopid(pname, cnp->cn_namelen);
721 		if (pid == NO_PID)
722 			break;
723 
724 		p = PFIND(pid);
725 		if (p == NULL)
726 			break;
727 
728 		if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
729 			break;
730 
731 		if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
732 		    ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
733 			break;
734 
735 		error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
736 		goto out;
737 
738 	case Pproc:
739 		if (cnp->cn_flags & CNP_ISDOTDOT) {
740 			error = procfs_root(dvp->v_mount, vpp);
741 			goto out;
742 		}
743 
744 		p = PFIND(pfs->pfs_pid);
745 		if (p == NULL)
746 			break;
747 		/* XXX lwp */
748 		lp = FIRST_LWP_IN_PROC(p);
749 
750 		if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
751 			break;
752 
753 		if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
754 		    ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
755 			break;
756 
757 		for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
758 			if (cnp->cn_namelen == pt->pt_namlen &&
759 			    bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
760 			    (pt->pt_valid == NULL || (*pt->pt_valid)(lp)))
761 				goto found;
762 		}
763 		break;
764 	found:
765 		error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
766 					pt->pt_pfstype);
767 		goto out;
768 
769 	default:
770 		error = ENOTDIR;
771 		goto out;
772 	}
773 	if (cnp->cn_nameiop == NAMEI_LOOKUP)
774 		error = ENOENT;
775 	else
776 		error = EROFS;
777 	/*
778 	 * If no error occured *vpp will hold a referenced locked vnode.
779 	 * dvp was passed to us locked and *vpp must be returned locked.
780 	 * If *vpp != dvp then we should unlock dvp if (1) this is not the
781 	 * last component or (2) CNP_LOCKPARENT is not set.
782 	 */
783 out:
784 	if (error == 0 && *vpp != dvp) {
785 		if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) {
786 			cnp->cn_flags |= CNP_PDIRUNLOCK;
787 			vn_unlock(dvp);
788 		}
789 	}
790 	return (error);
791 }
792 
793 /*
794  * Does this process have a text file?
795  */
796 int
797 procfs_validfile(struct lwp *lp)
798 {
799 	return (procfs_findtextvp(lp->lwp_proc) != NULLVP);
800 }
801 
802 /*
803  * readdir() returns directory entries from pfsnode (vp).
804  *
805  * We generate just one directory entry at a time, as it would probably
806  * not pay off to buffer several entries locally to save uiomove calls.
807  *
808  * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred,
809  *		  int *a_eofflag, int *a_ncookies, off_t **a_cookies)
810  */
811 static int
812 procfs_readdir(struct vop_readdir_args *ap)
813 {
814 	struct pfsnode *pfs;
815 	int error;
816 
817 	if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX)
818 		return (EINVAL);
819 	if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
820 		return (error);
821 	pfs = VTOPFS(ap->a_vp);
822 
823 	switch (pfs->pfs_type) {
824 	case Pproc:
825 		/*
826 		 * this is for the process-specific sub-directories.
827 		 * all that is needed to is copy out all the entries
828 		 * from the procent[] table (top of this file).
829 		 */
830 		error = procfs_readdir_proc(ap);
831 		break;
832 	case Proot:
833 		/*
834 		 * this is for the root of the procfs filesystem
835 		 * what is needed is a special entry for "curproc"
836 		 * followed by an entry for each process on allproc
837 		 */
838 		error = procfs_readdir_root(ap);
839 		break;
840 	default:
841 		error = ENOTDIR;
842 		break;
843 	}
844 
845 	vn_unlock(ap->a_vp);
846 	return (error);
847 }
848 
849 static int
850 procfs_readdir_proc(struct vop_readdir_args *ap)
851 {
852 	struct pfsnode *pfs;
853 	int error, i, retval;
854 	struct proc *p;
855 	struct lwp *lp;
856 	struct proc_target *pt;
857 	struct uio *uio = ap->a_uio;
858 
859 	pfs = VTOPFS(ap->a_vp);
860 	p = PFIND(pfs->pfs_pid);
861 	if (p == NULL)
862 		return(0);
863 	if (!PRISON_CHECK(ap->a_cred, p->p_ucred))
864 		return(0);
865 	/* XXX lwp */
866 	lp = FIRST_LWP_IN_PROC(p);
867 
868 	error = 0;
869 	i = (int)uio->uio_offset;
870 	if (i < 0)
871 		return (EINVAL);
872 
873 	for (pt = &proc_targets[i];
874 	     !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) {
875 		if (pt->pt_valid && (*pt->pt_valid)(lp) == 0)
876 			continue;
877 
878 		retval = vop_write_dirent(&error, uio,
879 		    PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type,
880 		    pt->pt_namlen, pt->pt_name);
881 		if (retval)
882 			break;
883 	}
884 
885 	uio->uio_offset = (off_t)i;
886 
887 	return(0);
888 }
889 
890 struct procfs_readdir_root_info {
891 	int error;
892 	int i;
893 	int pcnt;
894 	struct uio *uio;
895 	struct ucred *cred;
896 };
897 
898 static int procfs_readdir_root_callback(struct proc *p, void *data);
899 
900 static int
901 procfs_readdir_root(struct vop_readdir_args *ap)
902 {
903 	struct procfs_readdir_root_info info;
904 	struct uio *uio = ap->a_uio;
905 	int res;
906 
907 	info.error = 0;
908 	info.i = (int)uio->uio_offset;
909 
910 	if (info.i < 0)
911 		return (EINVAL);
912 
913 	info.pcnt = 0;
914 	info.uio = uio;
915 	info.cred = ap->a_cred;
916 	while (info.pcnt < 3) {
917 		res = procfs_readdir_root_callback(NULL, &info);
918 		if (res < 0)
919 			break;
920 	}
921 	if (res >= 0)
922 		allproc_scan(procfs_readdir_root_callback, &info);
923 	uio->uio_offset = (off_t)info.i;
924 
925 	return (info.error);
926 }
927 
928 static int
929 procfs_readdir_root_callback(struct proc *p, void *data)
930 {
931 	struct procfs_readdir_root_info *info = data;
932 	struct uio *uio;
933 	int retval;
934 	ino_t d_ino;
935 	const char *d_name;
936 	char d_name_pid[20];
937 	size_t d_namlen;
938 	uint8_t d_type;
939 
940 	uio = info->uio;
941 
942 	if (uio->uio_resid <= 0 || info->error)
943 		return(-1);
944 
945 	switch (info->pcnt) {
946 	case 0:		/* `.' */
947 		d_ino = PROCFS_FILENO(0, Proot);
948 		d_name = ".";
949 		d_namlen = 1;
950 		d_type = DT_DIR;
951 		break;
952 	case 1:		/* `..' */
953 		d_ino = PROCFS_FILENO(0, Proot);
954 		d_name = "..";
955 		d_namlen = 2;
956 		d_type = DT_DIR;
957 		break;
958 
959 	case 2:
960 		d_ino = PROCFS_FILENO(0, Pcurproc);
961 		d_namlen = 7;
962 		d_name = "curproc";
963 		d_type = DT_LNK;
964 		break;
965 
966 
967 	default:
968 		if (!PRISON_CHECK(info->cred, p->p_ucred))
969 			return(0);
970 		if (ps_showallprocs == 0 &&
971 		    info->cred->cr_uid != 0 &&
972 		    info->cred->cr_uid != p->p_ucred->cr_uid) {
973 			return(0);
974 		}
975 
976 		/*
977 		 * Skip entries we have already returned (optimization)
978 		 */
979 		if (info->pcnt < info->i) {
980 			++info->pcnt;
981 			return(0);
982 		}
983 
984 		d_ino = PROCFS_FILENO(p->p_pid, Pproc);
985 		d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid),
986 		    "%ld", (long)p->p_pid);
987 		d_name = d_name_pid;
988 		d_type = DT_DIR;
989 		break;
990 	}
991 
992 	/*
993 	 * Skip entries we have already returned (optimization)
994 	 */
995 	if (info->pcnt < info->i) {
996 		++info->pcnt;
997 		return(0);
998 	}
999 
1000 	retval = vop_write_dirent(&info->error, uio,
1001 				  d_ino, d_type, d_namlen, d_name);
1002 	if (retval)
1003 		return(-1);
1004 	++info->pcnt;
1005 	++info->i;
1006 	return(0);
1007 }
1008 
1009 /*
1010  * readlink reads the link of `curproc' or `file'
1011  */
1012 static int
1013 procfs_readlink(struct vop_readlink_args *ap)
1014 {
1015 	char buf[16];		/* should be enough */
1016 	struct proc *procp;
1017 	struct vnode *vp = ap->a_vp;
1018 	struct pfsnode *pfs = VTOPFS(vp);
1019 	char *fullpath, *freepath;
1020 	int error, len;
1021 
1022 	switch (pfs->pfs_type) {
1023 	case Pcurproc:
1024 		if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
1025 			return (EINVAL);
1026 
1027 		len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
1028 
1029 		return (uiomove(buf, len, ap->a_uio));
1030 	/*
1031 	 * There _should_ be no way for an entire process to disappear
1032 	 * from under us...
1033 	 */
1034 	case Pfile:
1035 		procp = PFIND(pfs->pfs_pid);
1036 		if (procp == NULL || procp->p_ucred == NULL) {
1037 			kprintf("procfs_readlink: pid %d disappeared\n",
1038 			    pfs->pfs_pid);
1039 			return (uiomove("unknown", sizeof("unknown") - 1,
1040 			    ap->a_uio));
1041 		}
1042 		error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0);
1043 		if (error != 0)
1044 			return (uiomove("unknown", sizeof("unknown") - 1,
1045 			    ap->a_uio));
1046 		error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
1047 		kfree(freepath, M_TEMP);
1048 		return (error);
1049 	default:
1050 		return (EINVAL);
1051 	}
1052 }
1053 
1054 /*
1055  * convert decimal ascii to pid_t
1056  */
1057 static pid_t
1058 atopid(const char *b, u_int len)
1059 {
1060 	pid_t p = 0;
1061 
1062 	while (len--) {
1063 		char c = *b++;
1064 		if (c < '0' || c > '9')
1065 			return (NO_PID);
1066 		p = 10 * p + (c - '0');
1067 		if (p > PID_MAX)
1068 			return (NO_PID);
1069 	}
1070 
1071 	return (p);
1072 }
1073 
1074