1 /*
2 * Copyright (c) 1993, 1995 Jan-Simon Pendry
3 * Copyright (c) 1993, 1995
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
34 *
35 * $FreeBSD: src/sys/miscfs/procfs/procfs_vnops.c,v 1.76.2.7 2002/01/22 17:22:59 nectar Exp $
36 */
37
38 /*
39 * procfs vnode interface
40 */
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/time.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/fcntl.h>
48 #include <sys/proc.h>
49 #include <sys/caps.h>
50 #include <sys/signalvar.h>
51 #include <sys/vnode.h>
52 #include <sys/uio.h>
53 #include <sys/mount.h>
54 #include <sys/namei.h>
55 #include <sys/dirent.h>
56 #include <sys/malloc.h>
57 #include <sys/reg.h>
58 #include <vm/vm_zone.h>
59 #include <vfs/procfs/procfs.h>
60 #include <sys/pioctl.h>
61
62 #include <sys/spinlock2.h>
63
64 #include <machine/limits.h>
65
66 static int procfs_access (struct vop_access_args *);
67 static int procfs_badop (struct vop_generic_args *);
68 static int procfs_bmap (struct vop_bmap_args *);
69 static int procfs_close (struct vop_close_args *);
70 static int procfs_getattr (struct vop_getattr_args *);
71 static int procfs_inactive (struct vop_inactive_args *);
72 static int procfs_ioctl (struct vop_ioctl_args *);
73 static int procfs_kqfilter (struct vop_kqfilter_args *);
74 static int procfs_lookup (struct vop_old_lookup_args *);
75 static int procfs_open (struct vop_open_args *);
76 static int procfs_print (struct vop_print_args *);
77 static int procfs_readdir (struct vop_readdir_args *);
78 static int procfs_readlink (struct vop_readlink_args *);
79 static int procfs_reclaim (struct vop_reclaim_args *);
80 static int procfs_setattr (struct vop_setattr_args *);
81
82 static int procfs_readdir_proc(struct vop_readdir_args *);
83 static int procfs_readdir_root(struct vop_readdir_args *);
84
85 /*
86 * procfs vnode operations.
87 */
88 struct vop_ops procfs_vnode_vops = {
89 .vop_default = vop_defaultop,
90 .vop_access = procfs_access,
91 .vop_advlock = (void *)procfs_badop,
92 .vop_bmap = procfs_bmap,
93 .vop_close = procfs_close,
94 .vop_old_create = (void *)procfs_badop,
95 .vop_getattr = procfs_getattr,
96 .vop_inactive = procfs_inactive,
97 .vop_old_link = (void *)procfs_badop,
98 .vop_old_lookup = procfs_lookup,
99 .vop_old_mkdir = (void *)procfs_badop,
100 .vop_old_mknod = (void *)procfs_badop,
101 .vop_open = procfs_open,
102 .vop_pathconf = vop_stdpathconf,
103 .vop_print = procfs_print,
104 .vop_read = procfs_rw,
105 .vop_readdir = procfs_readdir,
106 .vop_readlink = procfs_readlink,
107 .vop_reclaim = procfs_reclaim,
108 .vop_old_remove = (void *)procfs_badop,
109 .vop_old_rename = (void *)procfs_badop,
110 .vop_old_rmdir = (void *)procfs_badop,
111 .vop_setattr = procfs_setattr,
112 .vop_old_symlink = (void *)procfs_badop,
113 .vop_write = (void *)procfs_rw,
114 .vop_ioctl = procfs_ioctl,
115 .vop_kqfilter = procfs_kqfilter,
116 };
117
118
119 /*
120 * This is a list of the valid names in the
121 * process-specific sub-directories. It is
122 * used in procfs_lookup and procfs_readdir
123 */
124 static struct proc_target {
125 u_char pt_type;
126 u_char pt_namlen;
127 char *pt_name;
128 pfstype pt_pfstype;
129 int (*pt_valid) (struct lwp *p);
130 } proc_targets[] = {
131 #define N(s) sizeof(s)-1, s
132 /* name type validp */
133 { DT_DIR, N("."), Pproc, NULL },
134 { DT_DIR, N(".."), Proot, NULL },
135 { DT_REG, N("mem"), Pmem, NULL },
136 { DT_REG, N("regs"), Pregs, procfs_validregs },
137 { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
138 { DT_REG, N("dbregs"), Pdbregs, procfs_validdbregs },
139 { DT_REG, N("ctl"), Pctl, NULL },
140 { DT_REG, N("status"), Pstatus, NULL },
141 { DT_REG, N("note"), Pnote, NULL },
142 { DT_REG, N("notepg"), Pnotepg, NULL },
143 { DT_REG, N("map"), Pmap, procfs_validmap },
144 { DT_REG, N("etype"), Ptype, procfs_validtype },
145 { DT_REG, N("cmdline"), Pcmdline, NULL },
146 { DT_REG, N("rlimit"), Prlimit, NULL },
147 { DT_LNK, N("file"), Pfile, NULL },
148 { DT_LNK, N("exe"), Pfile, NULL },
149 #undef N
150 };
151 static const int nproc_targets = NELEM(proc_targets);
152
153 static pid_t atopid (const char *, u_int);
154
155 /*
156 * set things up for doing i/o on
157 * the pfsnode (vp). (vp) is locked
158 * on entry, and should be left locked
159 * on exit.
160 *
161 * for procfs we don't need to do anything
162 * in particular for i/o. all that is done
163 * is to support exclusive open on process
164 * memory images.
165 *
166 * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
167 * struct file *a_fp)
168 */
169 static int
procfs_open(struct vop_open_args * ap)170 procfs_open(struct vop_open_args *ap)
171 {
172 struct pfsnode *pfs = VTOPFS(ap->a_vp);
173 struct proc *p1, *p2;
174 int error;
175
176 p2 = pfs_pfind(pfs->pfs_pid);
177 if (p2 == NULL)
178 return (ENOENT);
179 if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) {
180 error = ENOENT;
181 goto done;
182 }
183
184 switch (pfs->pfs_type) {
185 case Pmem:
186 if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
187 ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
188 error = EBUSY;
189 goto done;
190 }
191
192 p1 = curproc;
193 KKASSERT(p1);
194 /* Can't trace a process that's currently exec'ing. */
195 if ((p2->p_flags & P_INEXEC) != 0) {
196 error = EAGAIN;
197 goto done;
198 }
199 if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) {
200 error = EPERM;
201 goto done;
202 }
203
204 if (ap->a_mode & FWRITE)
205 pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
206
207 break;
208
209 default:
210 break;
211 }
212 error = vop_stdopen(ap);
213 done:
214 pfs_pdone(p2);
215 return error;
216 }
217
218 /*
219 * close the pfsnode (vp) after doing i/o.
220 * (vp) is not locked on entry or exit.
221 *
222 * nothing to do for procfs other than undo
223 * any exclusive open flag (see _open above).
224 *
225 * procfs_close(struct vnode *a_vp, int a_fflag, struct ucred *a_cred)
226 */
227 static int
procfs_close(struct vop_close_args * ap)228 procfs_close(struct vop_close_args *ap)
229 {
230 struct pfsnode *pfs = VTOPFS(ap->a_vp);
231 struct proc *p;
232
233 /*
234 * Make sure the lock is exclusive for opencount tests
235 */
236 vn_lock(ap->a_vp, LK_UPGRADE | LK_RETRY);
237
238 switch (pfs->pfs_type) {
239 case Pmem:
240 if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
241 pfs->pfs_flags &= ~(FWRITE|O_EXCL);
242 /*
243 * v_opencount determines the last real close on the vnode.
244 *
245 * If this is the last close, then it checks to see if
246 * the target process has PF_LINGER set in p_pfsflags,
247 * if this is *not* the case, then the process' stop flags
248 * are cleared, and the process is woken up. This is
249 * to help prevent the case where a process has been
250 * told to stop on an event, but then the requesting process
251 * has gone away or forgotten about it.
252 */
253 p = NULL;
254 if ((ap->a_vp->v_opencount < 2)
255 && ((p = pfs_pfind(pfs->pfs_pid)) != NULL ||
256 (p = pfs_zpfind(pfs->pfs_pid)) != NULL)
257 && !(p->p_pfsflags & PF_LINGER)) {
258 spin_lock(&p->p_spin);
259 p->p_stops = 0;
260 p->p_step = 0;
261 spin_unlock(&p->p_spin);
262 wakeup(&p->p_stype);
263 wakeup(&p->p_step);
264 }
265 pfs_pdone(p);
266 break;
267 default:
268 break;
269 }
270
271 return (vop_stdclose(ap));
272 }
273
274 /*
275 * do an ioctl operation on a pfsnode (vp).
276 * (vp) is not locked on entry or exit.
277 */
278 static int
procfs_ioctl(struct vop_ioctl_args * ap)279 procfs_ioctl(struct vop_ioctl_args *ap)
280 {
281 struct pfsnode *pfs = VTOPFS(ap->a_vp);
282 struct proc *procp;
283 struct proc *p;
284 int error;
285 int signo;
286 struct procfs_status *psp;
287 unsigned char flags;
288
289 procp = pfs_pfind(pfs->pfs_pid);
290 if (procp == NULL)
291 return ENOTTY;
292 p = curproc;
293 if (p == NULL) {
294 error = EINVAL;
295 goto done;
296 }
297
298 /* Can't trace a process that's currently exec'ing. */
299 if ((procp->p_flags & P_INEXEC) != 0) {
300 error = EAGAIN;
301 goto done;
302 }
303 if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) {
304 error = EPERM;
305 goto done;
306 }
307
308 switch (ap->a_command) {
309 case PIOCBIS:
310 spin_lock(&procp->p_spin);
311 procp->p_stops |= *(unsigned int*)ap->a_data;
312 spin_unlock(&procp->p_spin);
313 break;
314 case PIOCBIC:
315 spin_lock(&procp->p_spin);
316 procp->p_stops &= ~*(unsigned int*)ap->a_data;
317 spin_unlock(&procp->p_spin);
318 break;
319 case PIOCSFL:
320 /*
321 * NFLAGS is "non-suser_xxx flags" -- currently, only
322 * PFS_ISUGID ("ignore set u/g id");
323 */
324 #define NFLAGS (PF_ISUGID)
325 flags = (unsigned char)*(unsigned int*)ap->a_data;
326 if (flags & NFLAGS &&
327 (error = caps_priv_check(ap->a_cred,
328 SYSCAP_RESTRICTEDROOT))) {
329 goto done;
330 }
331 procp->p_pfsflags = flags;
332 break;
333 case PIOCGFL:
334 *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
335 break;
336 case PIOCSTATUS:
337 /*
338 * NOTE: syscall entry deals with stopevents and may run
339 * without the MP lock.
340 */
341 psp = (struct procfs_status *)ap->a_data;
342 psp->flags = procp->p_pfsflags;
343 psp->events = procp->p_stops;
344 spin_lock(&procp->p_spin);
345 if (procp->p_step) {
346 psp->state = 0;
347 psp->why = procp->p_stype;
348 psp->val = procp->p_xstat;
349 spin_unlock(&procp->p_spin);
350 } else {
351 psp->state = 1;
352 spin_unlock(&procp->p_spin);
353 psp->why = 0; /* Not defined values */
354 psp->val = 0; /* Not defined values */
355 }
356 break;
357 case PIOCWAIT:
358 /*
359 * NOTE: syscall entry deals with stopevents and may run
360 * without the MP lock.
361 */
362 psp = (struct procfs_status *)ap->a_data;
363 spin_lock(&procp->p_spin);
364 while (procp->p_step == 0) {
365 tsleep_interlock(&procp->p_stype, PCATCH);
366 spin_unlock(&procp->p_spin);
367 if (procp->p_stops == 0) {
368 error = 0;
369 goto done;
370 }
371 if (procp->p_flags & P_POSTEXIT) {
372 error = EINVAL;
373 goto done;
374 }
375 if (procp->p_flags & P_INEXEC) {
376 error = EAGAIN;
377 goto done;
378 }
379 error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED,
380 "piocwait", 0);
381 if (error)
382 goto done;
383 spin_lock(&procp->p_spin);
384 }
385 spin_unlock(&procp->p_spin);
386 psp->state = 1; /* It stopped */
387 psp->flags = procp->p_pfsflags;
388 psp->events = procp->p_stops;
389 psp->why = procp->p_stype; /* why it stopped */
390 psp->val = procp->p_xstat; /* any extra info */
391 break;
392 case PIOCCONT: /* Restart a proc */
393 /*
394 * NOTE: syscall entry deals with stopevents and may run
395 * without the MP lock. However, the caller is
396 * presumably interlocked by having waited.
397 */
398 if (procp->p_step == 0) {
399 error = EINVAL; /* Can only start a stopped process */
400 goto done;
401 }
402 if ((signo = *(int*)ap->a_data) != 0) {
403 if (signo >= NSIG || signo <= 0) {
404 error = EINVAL;
405 goto done;
406 }
407 ksignal(procp, signo);
408 }
409 procp->p_step = 0;
410 wakeup(&procp->p_step);
411 break;
412 default:
413 error = ENOTTY;
414 goto done;
415 }
416 error = 0;
417 done:
418 pfs_pdone(procp);
419 return error;
420 }
421
422 /*
423 * do block mapping for pfsnode (vp).
424 * since we don't use the buffer cache
425 * for procfs this function should never
426 * be called. in any case, it's not clear
427 * what part of the kernel ever makes use
428 * of this function. for sanity, this is the
429 * usual no-op bmap, although returning
430 * (EIO) would be a reasonable alternative.
431 *
432 * XXX mmap assumes buffer cache operation
433 *
434 * procfs_bmap(struct vnode *a_vp, off_t a_loffset,
435 * off_t *a_doffsetp, int *a_runp, int *a_runb)
436 */
437 static int
procfs_bmap(struct vop_bmap_args * ap)438 procfs_bmap(struct vop_bmap_args *ap)
439 {
440 if (ap->a_doffsetp != NULL)
441 *ap->a_doffsetp = ap->a_loffset;
442 if (ap->a_runp != NULL)
443 *ap->a_runp = 0;
444 if (ap->a_runb != NULL)
445 *ap->a_runb = 0;
446 return (0);
447 }
448
449 /*
450 * procfs_inactive is called when the pfsnode
451 * is vrele'd and the reference count goes
452 * to zero. (vp) will be on the vnode free
453 * list, so to get it back vget() must be
454 * used.
455 *
456 * (vp) is locked on entry, but must be unlocked on exit.
457 *
458 * procfs_inactive(struct vnode *a_vp)
459 */
460 static int
procfs_inactive(struct vop_inactive_args * ap)461 procfs_inactive(struct vop_inactive_args *ap)
462 {
463 struct pfsnode *pfs = VTOPFS(ap->a_vp);
464
465 if (pfs->pfs_pid & PFS_DEAD)
466 vrecycle(ap->a_vp);
467 return (0);
468 }
469
470 /*
471 * _reclaim is called when getnewvnode()
472 * wants to make use of an entry on the vnode
473 * free list. at this time the filesystem needs
474 * to free any private data and remove the node
475 * from any private lists.
476 *
477 * procfs_reclaim(struct vnode *a_vp)
478 */
479 static int
procfs_reclaim(struct vop_reclaim_args * ap)480 procfs_reclaim(struct vop_reclaim_args *ap)
481 {
482 return (procfs_freevp(ap->a_vp));
483 }
484
485 /*
486 * _print is used for debugging.
487 * just print a readable description
488 * of (vp).
489 *
490 * procfs_print(struct vnode *a_vp)
491 */
492 static int
procfs_print(struct vop_print_args * ap)493 procfs_print(struct vop_print_args *ap)
494 {
495 struct pfsnode *pfs = VTOPFS(ap->a_vp);
496
497 kprintf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
498 pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
499 return (0);
500 }
501
502 /*
503 * generic entry point for unsupported operations
504 */
505 static int
procfs_badop(struct vop_generic_args * ap)506 procfs_badop(struct vop_generic_args *ap)
507 {
508 return (EIO);
509 }
510
511 /*
512 * Invent attributes for pfsnode (vp) and store
513 * them in (vap).
514 * Directories lengths are returned as zero since
515 * any real length would require the genuine size
516 * to be computed, and nothing cares anyway.
517 *
518 * this is relatively minimal for procfs.
519 *
520 * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap)
521 */
522 static int
procfs_getattr(struct vop_getattr_args * ap)523 procfs_getattr(struct vop_getattr_args *ap)
524 {
525 struct pfsnode *pfs = VTOPFS(ap->a_vp);
526 struct vattr *vap = ap->a_vap;
527 struct proc *procp;
528 int error;
529
530 /*
531 * First make sure that the process and its credentials
532 * still exist.
533 */
534 switch (pfs->pfs_type) {
535 case Proot:
536 case Pcurproc:
537 procp = NULL;
538 break;
539 default:
540 procp = pfs_pfind(pfs->pfs_pid);
541 if (procp == NULL || procp->p_ucred == NULL) {
542 error = ENOENT;
543 goto done;
544 }
545 break;
546 }
547
548 error = 0;
549
550 /* start by zeroing out the attributes */
551 VATTR_NULL(vap);
552
553 /* next do all the common fields */
554 vap->va_type = ap->a_vp->v_type;
555 vap->va_mode = pfs->pfs_mode;
556 vap->va_fileid = pfs->pfs_fileno;
557 vap->va_flags = 0;
558 vap->va_blocksize = PAGE_SIZE;
559 vap->va_bytes = vap->va_size = 0;
560 vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
561
562 /*
563 * Make all times be current TOD.
564 * It would be possible to get the process start
565 * time from the p_stat structure, but there's
566 * no "file creation" time stamp anyway, and the
567 * p_stat structure is not addressible if u. gets
568 * swapped out for that process.
569 */
570 vfs_timestamp(&vap->va_ctime);
571 vap->va_atime = vap->va_mtime = vap->va_ctime;
572
573 /*
574 * If the process has exercised some setuid or setgid
575 * privilege, then rip away read/write permission so
576 * that only root can gain access.
577 */
578 switch (pfs->pfs_type) {
579 case Pctl:
580 case Pregs:
581 case Pfpregs:
582 case Pdbregs:
583 case Pmem:
584 if (procp->p_flags & P_SUGID) {
585 vap->va_mode &= ~((VREAD|VWRITE)|
586 ((VREAD|VWRITE)>>3)|
587 ((VREAD|VWRITE)>>6));
588 }
589 break;
590 default:
591 break;
592 }
593
594 /*
595 * now do the object specific fields
596 *
597 * The size could be set from struct reg, but it's hardly
598 * worth the trouble, and it puts some (potentially) machine
599 * dependent data into this machine-independent code. If it
600 * becomes important then this function should break out into
601 * a per-file stat function in the corresponding .c file.
602 */
603
604 vap->va_nlink = 1;
605 if (procp) {
606 if (procp->p_ucred) {
607 vap->va_uid = procp->p_ucred->cr_uid;
608 vap->va_gid = procp->p_ucred->cr_gid;
609 } else {
610 vap->va_uid = -1;
611 vap->va_gid = -1;
612 }
613 }
614
615 switch (pfs->pfs_type) {
616 case Proot:
617 /*
618 * Set nlink to 1 to tell fts(3) we don't actually know.
619 */
620 vap->va_nlink = 1;
621 vap->va_uid = 0;
622 vap->va_gid = 0;
623 vap->va_size = vap->va_bytes = DEV_BSIZE;
624 break;
625
626 case Pcurproc: {
627 char buf[16]; /* should be enough */
628
629 vap->va_uid = 0;
630 vap->va_gid = 0;
631 vap->va_size = ksnprintf(buf, sizeof(buf),
632 "%ld", (long)curproc->p_pid);
633 vap->va_bytes = vap->va_size;
634 break;
635 }
636
637 case Pproc:
638 vap->va_nlink = nproc_targets;
639 vap->va_size = vap->va_bytes = DEV_BSIZE;
640 break;
641
642 case Pfile: {
643 char *fullpath, *freepath;
644
645 if (procp->p_textnch.ncp) {
646 struct nchandle nch;
647
648 cache_copy(&procp->p_textnch, &nch);
649 error = cache_fullpath(procp, &nch, NULL,
650 &fullpath, &freepath, 0);
651 cache_drop(&nch);
652 } else {
653 error = EINVAL;
654 }
655
656 if (error == 0) {
657 vap->va_size = strlen(fullpath);
658 kfree(freepath, M_TEMP);
659 } else {
660 vap->va_size = sizeof("unknown") - 1;
661 error = 0;
662 }
663 vap->va_bytes = vap->va_size;
664 break;
665 }
666
667 case Pmem:
668 /*
669 * If we denied owner access earlier, then we have to
670 * change the owner to root - otherwise 'ps' and friends
671 * will break even though they are setgid kmem. *SIGH*
672 */
673 if (procp->p_flags & P_SUGID)
674 vap->va_uid = 0;
675 else if (procp->p_ucred)
676 vap->va_uid = procp->p_ucred->cr_uid;
677 else
678 vap->va_uid = -1;
679 break;
680
681 case Pregs:
682 vap->va_bytes = vap->va_size = sizeof(struct reg);
683 break;
684
685 case Pfpregs:
686 vap->va_bytes = vap->va_size = sizeof(struct fpreg);
687 break;
688
689 case Pdbregs:
690 vap->va_bytes = vap->va_size = sizeof(struct dbreg);
691 break;
692
693 case Ptype:
694 case Pmap:
695 case Pctl:
696 case Pstatus:
697 case Pnote:
698 case Pnotepg:
699 case Pcmdline:
700 case Prlimit:
701 break;
702
703 default:
704 panic("procfs_getattr");
705 }
706 done:
707 pfs_pdone(procp);
708 return (error);
709 }
710
711 /*
712 * procfs_setattr(struct vnode *a_vp, struct vattr *a_vap,
713 * struct ucred *a_cred)
714 */
715 static int
procfs_setattr(struct vop_setattr_args * ap)716 procfs_setattr(struct vop_setattr_args *ap)
717 {
718 if (ap->a_vap->va_flags != VNOVAL)
719 return (EOPNOTSUPP);
720
721 /*
722 * just fake out attribute setting
723 * it's not good to generate an error
724 * return, otherwise things like creat()
725 * will fail when they try to set the
726 * file length to 0. worse, this means
727 * that echo $note > /proc/$pid/note will fail.
728 */
729
730 return (0);
731 }
732
733 /*
734 * implement access checking.
735 *
736 * procfs_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred)
737 */
738 static int
procfs_access(struct vop_access_args * ap)739 procfs_access(struct vop_access_args *ap)
740 {
741 struct vattr vattr;
742 int error;
743
744 error = VOP_GETATTR(ap->a_vp, &vattr);
745 if (!error)
746 error = vop_helper_access(ap, vattr.va_uid, vattr.va_gid,
747 vattr.va_mode, 0);
748 return (error);
749 }
750
751 /*
752 * lookup. this is incredibly complicated in the general case, however
753 * for most pseudo-filesystems very little needs to be done.
754 *
755 * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
756 * struct componentname *a_cnp)
757 */
758 static int
procfs_lookup(struct vop_old_lookup_args * ap)759 procfs_lookup(struct vop_old_lookup_args *ap)
760 {
761 struct componentname *cnp = ap->a_cnp;
762 struct vnode **vpp = ap->a_vpp;
763 struct vnode *dvp = ap->a_dvp;
764 char *pname = cnp->cn_nameptr;
765 /* struct proc *curp = cnp->cn_proc; */
766 struct proc_target *pt;
767 pid_t pid;
768 struct pfsnode *pfs;
769 struct proc *p;
770 struct lwp *lp;
771 int i;
772 int error;
773
774 *vpp = NULL;
775
776 if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)
777 return (EROFS);
778
779 p = NULL;
780 error = 0;
781 if (cnp->cn_namelen == 1 && *pname == '.') {
782 *vpp = dvp;
783 vref(*vpp);
784 goto out;
785 }
786
787 pfs = VTOPFS(dvp);
788 switch (pfs->pfs_type) {
789 case Proot:
790 if (cnp->cn_flags & CNP_ISDOTDOT)
791 return (EIO);
792
793 if (CNEQ(cnp, "curproc", 7) || CNEQ(cnp, "self", 4)) {
794 error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc);
795 goto out;
796 }
797
798 pid = atopid(pname, cnp->cn_namelen);
799 if (pid == NO_PID)
800 break;
801
802 p = pfs_pfind(pid);
803 if (p == NULL)
804 break;
805
806 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
807 break;
808
809 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
810 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
811 break;
812
813 error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc);
814 goto out;
815
816 case Pproc:
817 if (cnp->cn_flags & CNP_ISDOTDOT) {
818 error = procfs_root(dvp->v_mount, vpp);
819 goto out;
820 }
821
822 p = pfs_pfind(pfs->pfs_pid);
823 if (p == NULL)
824 break;
825 /* XXX lwp */
826 lp = FIRST_LWP_IN_PROC(p);
827 if (lp == NULL)
828 break;
829
830 if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred))
831 break;
832
833 if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 &&
834 ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid)
835 break;
836
837 for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
838 if (cnp->cn_namelen == pt->pt_namlen &&
839 bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
840 (pt->pt_valid == NULL || (*pt->pt_valid)(lp)))
841 goto found;
842 }
843 break;
844 found:
845 error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
846 pt->pt_pfstype);
847 goto out;
848
849 default:
850 error = ENOTDIR;
851 goto out;
852 }
853 if (cnp->cn_nameiop == NAMEI_LOOKUP)
854 error = ENOENT;
855 else
856 error = EROFS;
857 /*
858 * If no error occured *vpp will hold a referenced locked vnode.
859 * dvp was passed to us locked and *vpp must be returned locked.
860 * If *vpp != dvp then we should unlock dvp if (1) this is not the
861 * last component or (2) CNP_LOCKPARENT is not set.
862 */
863 out:
864 if (error == 0 && *vpp != dvp) {
865 if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) {
866 cnp->cn_flags |= CNP_PDIRUNLOCK;
867 vn_unlock(dvp);
868 }
869 }
870 pfs_pdone(p);
871 return (error);
872 }
873
874 /*
875 * Does this process have a text file?
876 */
877 int
procfs_validfile(struct lwp * lp)878 procfs_validfile(struct lwp *lp)
879 {
880 return (procfs_findtextvp(lp->lwp_proc) != NULLVP);
881 }
882
883 /*
884 * readdir() returns directory entries from pfsnode (vp).
885 *
886 * We generate just one directory entry at a time, as it would probably
887 * not pay off to buffer several entries locally to save uiomove calls.
888 *
889 * procfs_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred,
890 * int *a_eofflag, int *a_ncookies, off_t **a_cookies)
891 */
892 static int
procfs_readdir(struct vop_readdir_args * ap)893 procfs_readdir(struct vop_readdir_args *ap)
894 {
895 struct pfsnode *pfs;
896 int error;
897
898 if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX)
899 return (EINVAL);
900 error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM);
901 if (error)
902 return (error);
903
904 pfs = VTOPFS(ap->a_vp);
905 switch (pfs->pfs_type) {
906 case Pproc:
907 /*
908 * this is for the process-specific sub-directories.
909 * all that is needed to is copy out all the entries
910 * from the procent[] table (top of this file).
911 */
912 error = procfs_readdir_proc(ap);
913 break;
914 case Proot:
915 /*
916 * this is for the root of the procfs filesystem
917 * what is needed is a special entry for "curproc"
918 * followed by an entry for each process on allproc
919 */
920 error = procfs_readdir_root(ap);
921 break;
922 default:
923 error = ENOTDIR;
924 break;
925 }
926
927 vn_unlock(ap->a_vp);
928 return (error);
929 }
930
931 static int
procfs_readdir_proc(struct vop_readdir_args * ap)932 procfs_readdir_proc(struct vop_readdir_args *ap)
933 {
934 struct pfsnode *pfs;
935 int error, i, retval;
936 struct proc *p;
937 struct lwp *lp;
938 struct proc_target *pt;
939 struct uio *uio = ap->a_uio;
940
941 pfs = VTOPFS(ap->a_vp);
942 p = pfs_pfind(pfs->pfs_pid);
943 if (p == NULL)
944 return(0);
945 if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) {
946 error = 0;
947 goto done;
948 }
949 /* XXX lwp, not MPSAFE */
950 lp = FIRST_LWP_IN_PROC(p);
951 if (lp == NULL) {
952 error = EINVAL;
953 goto done;
954 }
955
956 error = 0;
957 i = (int)uio->uio_offset;
958 if (i < 0) {
959 error = EINVAL;
960 goto done;
961 }
962
963 for (pt = &proc_targets[i];
964 !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) {
965 if (pt->pt_valid && (*pt->pt_valid)(lp) == 0)
966 continue;
967
968 retval = vop_write_dirent(&error, uio,
969 PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type,
970 pt->pt_namlen, pt->pt_name);
971 if (retval)
972 break;
973 }
974
975 uio->uio_offset = (off_t)i;
976 error = 0;
977 done:
978 pfs_pdone(p);
979 return error;
980 }
981
982 struct procfs_readdir_root_info {
983 int error;
984 int i;
985 int pcnt;
986 struct uio *uio;
987 struct ucred *cred;
988 };
989
990 static int procfs_readdir_root_callback(struct proc *p, void *data);
991
992 static int
procfs_readdir_root(struct vop_readdir_args * ap)993 procfs_readdir_root(struct vop_readdir_args *ap)
994 {
995 struct procfs_readdir_root_info info;
996 struct uio *uio = ap->a_uio;
997 int res;
998
999 res = 0;
1000 info.error = 0;
1001 info.i = (int)uio->uio_offset;
1002
1003 if (info.i < 0)
1004 return (EINVAL);
1005
1006 info.pcnt = 0;
1007 info.uio = uio;
1008 info.cred = ap->a_cred;
1009 while (info.pcnt < 4) {
1010 res = procfs_readdir_root_callback(NULL, &info);
1011 if (res < 0)
1012 break;
1013 }
1014 if (res >= 0)
1015 allproc_scan(procfs_readdir_root_callback, &info, 0);
1016 uio->uio_offset = (off_t)info.i;
1017
1018 return (info.error);
1019 }
1020
1021 static int
procfs_readdir_root_callback(struct proc * p,void * data)1022 procfs_readdir_root_callback(struct proc *p, void *data)
1023 {
1024 struct procfs_readdir_root_info *info = data;
1025 struct uio *uio;
1026 int retval;
1027 ino_t d_ino;
1028 const char *d_name;
1029 char d_name_pid[20];
1030 size_t d_namlen;
1031 uint8_t d_type;
1032
1033 uio = info->uio;
1034
1035 if (uio->uio_resid <= 0 || info->error)
1036 return(-1);
1037
1038 switch (info->pcnt) {
1039 case 0: /* `.' */
1040 d_ino = PROCFS_FILENO(0, Proot);
1041 d_name = ".";
1042 d_namlen = 1;
1043 d_type = DT_DIR;
1044 break;
1045 case 1: /* `..' */
1046 d_ino = PROCFS_FILENO(0, Proot);
1047 d_name = "..";
1048 d_namlen = 2;
1049 d_type = DT_DIR;
1050 break;
1051
1052 case 2:
1053 d_ino = PROCFS_FILENO(0, Pcurproc);
1054 d_namlen = 7;
1055 d_name = "curproc";
1056 d_type = DT_LNK;
1057 break;
1058
1059 case 3:
1060 d_ino = PROCFS_FILENO(0, Pcurproc);
1061 d_namlen = 4;
1062 d_name = "self";
1063 d_type = DT_LNK;
1064 break;
1065
1066 default:
1067 if (!PRISON_CHECK(info->cred, p->p_ucred))
1068 return(0);
1069 if (ps_showallprocs == 0 &&
1070 info->cred->cr_uid != 0 &&
1071 info->cred->cr_uid != p->p_ucred->cr_uid) {
1072 return(0);
1073 }
1074
1075 /*
1076 * Skip entries we have already returned (optimization)
1077 */
1078 if (info->pcnt < info->i) {
1079 ++info->pcnt;
1080 return(0);
1081 }
1082
1083 d_ino = PROCFS_FILENO(p->p_pid, Pproc);
1084 d_namlen = ksnprintf(d_name_pid, sizeof(d_name_pid),
1085 "%ld", (long)p->p_pid);
1086 d_name = d_name_pid;
1087 d_type = DT_DIR;
1088 break;
1089 }
1090
1091 /*
1092 * Skip entries we have already returned (optimization)
1093 */
1094 if (info->pcnt < info->i) {
1095 ++info->pcnt;
1096 return(0);
1097 }
1098
1099 retval = vop_write_dirent(&info->error, uio,
1100 d_ino, d_type, d_namlen, d_name);
1101 if (retval)
1102 return(-1);
1103 ++info->pcnt;
1104 ++info->i;
1105 return(0);
1106 }
1107
1108 /*
1109 * readlink reads the link of `curproc' or `file'
1110 */
1111 static int
procfs_readlink(struct vop_readlink_args * ap)1112 procfs_readlink(struct vop_readlink_args *ap)
1113 {
1114 char buf[16]; /* should be enough */
1115 struct proc *procp;
1116 struct vnode *vp = ap->a_vp;
1117 struct pfsnode *pfs = VTOPFS(vp);
1118 char *fullpath, *freepath;
1119 int error, len;
1120
1121 switch (pfs->pfs_type) {
1122 case Pcurproc:
1123 if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
1124 return (EINVAL);
1125
1126 len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
1127 return (uiomove(buf, len, ap->a_uio));
1128 case Pfile:
1129 /*
1130 * procfs's directory topology is somewhat asynchronous from
1131 * reality so it is possible for pid requests to race exiting
1132 * processes. In this situation, bit 31 is set in
1133 * pfs->pfs_pid which guarantees that pfs_pfind() will return
1134 * NULL.
1135 *
1136 * It is also possible to catch a process in the middle of
1137 * an exit sequence so various fields might wind up being
1138 * NULL that are not normally NULL.
1139 */
1140 procp = pfs_pfind(pfs->pfs_pid);
1141 if (procp == NULL || procp->p_ucred == NULL) {
1142 pfs_pdone(procp);
1143 return (uiomove("unknown", sizeof("unknown") - 1,
1144 ap->a_uio));
1145 }
1146 if (procp->p_textnch.ncp) {
1147 struct nchandle nch;
1148
1149 cache_copy(&procp->p_textnch, &nch);
1150 error = cache_fullpath(procp, &nch, NULL,
1151 &fullpath, &freepath, 0);
1152 cache_drop(&nch);
1153 } else {
1154 error = EINVAL;
1155 }
1156
1157 if (error != 0) {
1158 pfs_pdone(procp);
1159 return (uiomove("unknown", sizeof("unknown") - 1,
1160 ap->a_uio));
1161 }
1162 error = uiomove(fullpath, strlen(fullpath), ap->a_uio);
1163 kfree(freepath, M_TEMP);
1164 pfs_pdone(procp);
1165 return (error);
1166 default:
1167 return (EINVAL);
1168 }
1169 }
1170
1171 /*
1172 * convert decimal ascii to pid_t
1173 */
1174 static pid_t
atopid(const char * b,u_int len)1175 atopid(const char *b, u_int len)
1176 {
1177 pid_t p = 0;
1178
1179 while (len--) {
1180 char c = *b++;
1181 if (c < '0' || c > '9')
1182 return (NO_PID);
1183 p = 10 * p + (c - '0');
1184 if (p > PID_MAX)
1185 return (NO_PID);
1186 }
1187
1188 return (p);
1189 }
1190
1191 /*
1192 * kqfilter operations
1193 */
1194 static void
procfs_filt_detach(struct knote * kn)1195 procfs_filt_detach(struct knote *kn)
1196 {
1197 struct vnode *vp = (void *)kn->kn_hook;
1198
1199 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1200 }
1201
1202 static int
procfs_filt_read(struct knote * kn,long hint)1203 procfs_filt_read(struct knote *kn, long hint)
1204 {
1205 if (hint == NOTE_REVOKE) {
1206 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1207 return (1);
1208 }
1209
1210 /* Files on procfs have a size of 0. */
1211 kn->kn_data = 0;
1212 if (kn->kn_sfflags & NOTE_OLDAPI)
1213 return (1);
1214 return (kn->kn_data != 0);
1215 }
1216
1217 static int
procfs_filt_write(struct knote * kn,long hint)1218 procfs_filt_write(struct knote *kn, long hint)
1219 {
1220 if (hint == NOTE_REVOKE)
1221 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1222 kn->kn_data = 0;
1223 return (1);
1224 }
1225
1226 static int
procfs_filt_vnode(struct knote * kn,long hint)1227 procfs_filt_vnode(struct knote *kn, long hint)
1228 {
1229 if (kn->kn_sfflags & hint)
1230 kn->kn_fflags |= hint;
1231 if (hint == NOTE_REVOKE) {
1232 kn->kn_flags |= (EV_EOF | EV_NODATA);
1233 return (1);
1234 }
1235 return (kn->kn_fflags != 0);
1236 }
1237
1238 static struct filterops procfs_read_filtops = {
1239 FILTEROP_ISFD | FILTEROP_MPSAFE, NULL,
1240 procfs_filt_detach, procfs_filt_read,
1241 };
1242 static struct filterops procfs_write_filtops = {
1243 FILTEROP_ISFD | FILTEROP_MPSAFE, NULL,
1244 procfs_filt_detach, procfs_filt_write,
1245 };
1246 static struct filterops procfs_vnode_filtops = {
1247 FILTEROP_ISFD | FILTEROP_MPSAFE, NULL,
1248 procfs_filt_detach, procfs_filt_vnode,
1249 };
1250
1251 static int
procfs_kqfilter(struct vop_kqfilter_args * ap)1252 procfs_kqfilter(struct vop_kqfilter_args *ap)
1253 {
1254 struct vnode *vp = ap->a_vp;
1255 struct knote *kn = ap->a_kn;
1256
1257 switch (kn->kn_filter) {
1258 case EVFILT_READ:
1259 kn->kn_fop = &procfs_read_filtops;
1260 break;
1261 case EVFILT_WRITE:
1262 kn->kn_fop = &procfs_write_filtops;
1263 break;
1264 case EVFILT_VNODE:
1265 kn->kn_fop = &procfs_vnode_filtops;
1266 break;
1267 default:
1268 return (EOPNOTSUPP);
1269 }
1270
1271 kn->kn_hook = (caddr_t)vp;
1272 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1273
1274 return (0);
1275 }
1276