xref: /minix3/minix/servers/vfs/main.c (revision b5e2faaaaf60a8b9a02f8d72f64caa56a87eb312)
1 /*
2  * a loop that gets messages requesting work, carries out the work, and sends
3  * replies.
4  *
5  * The entry points into this file are:
6  *   main:	main program of the Virtual File System
7  *   reply:	send a reply to a process after the requested work is done
8  *
9  */
10 
11 #include "fs.h"
12 #include <fcntl.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <signal.h>
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <sys/ioc_memory.h>
19 #include <sys/svrctl.h>
20 #include <sys/select.h>
21 #include <minix/callnr.h>
22 #include <minix/com.h>
23 #include <minix/const.h>
24 #include <minix/endpoint.h>
25 #include <minix/safecopies.h>
26 #include <minix/debug.h>
27 #include <minix/vfsif.h>
28 #include "file.h"
29 #include "vmnt.h"
30 #include "vnode.h"
31 
32 #if ENABLE_SYSCALL_STATS
33 EXTERN unsigned long calls_stats[NR_VFS_CALLS];
34 #endif
35 
36 /* Thread related prototypes */
37 static void do_reply(struct worker_thread *wp);
38 static void do_work(void);
39 static void do_init_root(void);
40 static void handle_work(void (*func)(void));
41 static void reply(message *m_out, endpoint_t whom, int result);
42 
43 static int get_work(void);
44 static void service_pm(void);
45 static int unblock(struct fproc *rfp);
46 
47 /* SEF functions and variables. */
48 static void sef_local_startup(void);
49 static int sef_cb_init_fresh(int type, sef_init_info_t *info);
50 
51 /*===========================================================================*
52  *				main					     *
53  *===========================================================================*/
54 int main(void)
55 {
56 /* This is the main program of the file system.  The main loop consists of
57  * three major activities: getting new work, processing the work, and sending
58  * the reply.  This loop never terminates as long as the file system runs.
59  */
60   int transid;
61   struct worker_thread *wp;
62 
63   /* SEF local startup. */
64   sef_local_startup();
65 
66   printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
67 
68   if (OK != (sys_getkinfo(&kinfo)))
69 	panic("couldn't get kernel kinfo");
70 
71   /* This is the main loop that gets work, processes it, and sends replies. */
72   while (TRUE) {
73 	yield_all();	/* let other threads run */
74 	self = NULL;
75 	send_work();
76 
77 	/* The get_work() function returns TRUE if we have a new message to
78 	 * process. It returns FALSE if it spawned other thread activities.
79 	 */
80 	if (!get_work())
81 		continue;
82 
83 	transid = TRNS_GET_ID(m_in.m_type);
84 	if (IS_VFS_FS_TRANSID(transid)) {
85 		wp = worker_get((thread_t) transid - VFS_TRANSID);
86 		if (wp == NULL || wp->w_fp == NULL) {
87 			printf("VFS: spurious message %d from endpoint %d\n",
88 				m_in.m_type, m_in.m_source);
89 			continue;
90 		}
91 		m_in.m_type = TRNS_DEL_ID(m_in.m_type);
92 		do_reply(wp);
93 		continue;
94 	} else if (who_e == PM_PROC_NR) { /* Calls from PM */
95 		/* Special control messages from PM */
96 		service_pm();
97 		continue;
98 	} else if (is_notify(call_nr)) {
99 		/* A task ipc_notify()ed us */
100 		switch (who_e) {
101 		case DS_PROC_NR:
102 			/* Start a thread to handle DS events, if no thread
103 			 * is pending or active for it already. DS is not
104 			 * supposed to issue calls to VFS or be the subject of
105 			 * postponed PM requests, so this should be no problem.
106 			 */
107 			if (worker_can_start(fp))
108 				handle_work(ds_event);
109 			break;
110 		case KERNEL:
111 			mthread_stacktraces();
112 			break;
113 		case CLOCK:
114 			/* Timer expired. Used only for select(). Check it. */
115 			expire_timers(m_in.m_notify.timestamp);
116 			break;
117 		default:
118 			printf("VFS: ignoring notification from %d\n", who_e);
119 		}
120 		continue;
121 	} else if (who_p < 0) { /* i.e., message comes from a task */
122 		/* We're going to ignore this message. Tasks should
123 		 * send ipc_notify()s only.
124 		 */
125 		 printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
126 		 continue;
127 	}
128 
129 	if (IS_BDEV_RS(call_nr)) {
130 		/* We've got results for a block device request. */
131 		bdev_reply();
132 	} else if (IS_CDEV_RS(call_nr)) {
133 		/* We've got results for a character device request. */
134 		cdev_reply();
135 	} else {
136 		/* Normal syscall. This spawns a new thread. */
137 		handle_work(do_work);
138 	}
139   }
140   return(OK);				/* shouldn't come here */
141 }
142 
143 /*===========================================================================*
144  *			       handle_work				     *
145  *===========================================================================*/
146 static void handle_work(void (*func)(void))
147 {
148 /* Handle asynchronous device replies and new system calls. If the originating
149  * endpoint is an FS endpoint, take extra care not to get in deadlock. */
150   struct vmnt *vmp = NULL;
151   endpoint_t proc_e;
152   int use_spare = FALSE;
153 
154   proc_e = m_in.m_source;
155 
156   if (fp->fp_flags & FP_SRV_PROC) {
157 	vmp = find_vmnt(proc_e);
158 	if (vmp != NULL) {
159 		/* A callback from an FS endpoint. Can do only one at once. */
160 		if (vmp->m_flags & VMNT_CALLBACK) {
161 			replycode(proc_e, EAGAIN);
162 			return;
163 		}
164 		/* Already trying to resolve a deadlock? Can't handle more. */
165 		if (worker_available() == 0) {
166 			replycode(proc_e, EAGAIN);
167 			return;
168 		}
169 		/* A thread is available. Set callback flag. */
170 		vmp->m_flags |= VMNT_CALLBACK;
171 		if (vmp->m_flags & VMNT_MOUNTING) {
172 			vmp->m_flags |= VMNT_FORCEROOTBSF;
173 		}
174 	}
175 
176 	/* Use the spare thread to handle this request if needed. */
177 	use_spare = TRUE;
178   }
179 
180   worker_start(fp, func, &m_in, use_spare);
181 }
182 
183 
184 /*===========================================================================*
185  *			       do_reply				             *
186  *===========================================================================*/
187 static void do_reply(struct worker_thread *wp)
188 {
189   struct vmnt *vmp = NULL;
190 
191   if(who_e != VM_PROC_NR && (vmp = find_vmnt(who_e)) == NULL)
192 	panic("Couldn't find vmnt for endpoint %d", who_e);
193 
194   if (wp->w_task != who_e) {
195 	printf("VFS: tid %d: expected %d to reply, not %d\n",
196 		wp->w_tid, wp->w_task, who_e);
197 	return;
198   }
199   /* It should be impossible to trigger the following case, but it is here for
200    * consistency reasons: worker_stop() resets w_sendrec but not w_task.
201    */
202   if (wp->w_sendrec == NULL) {
203 	printf("VFS: tid %d: late reply from %d ignored\n", wp->w_tid, who_e);
204 	return;
205   }
206   *wp->w_sendrec = m_in;
207   wp->w_sendrec = NULL;
208   wp->w_task = NONE;
209   if(vmp) vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
210   worker_signal(wp); /* Continue this thread */
211 }
212 
213 /*===========================================================================*
214  *			       do_pending_pipe				     *
215  *===========================================================================*/
216 static void do_pending_pipe(void)
217 {
218   int r, op;
219   struct filp *f;
220   tll_access_t locktype;
221 
222   f = fp->fp_filp[fp->fp_fd];
223   assert(f != NULL);
224 
225   locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE;
226   op = (job_call_nr == VFS_READ) ? READING : WRITING;
227   lock_filp(f, locktype);
228 
229   r = rw_pipe(op, who_e, f, fp->fp_io_buffer, fp->fp_io_nbytes);
230 
231   if (r != SUSPEND) { /* Do we have results to report? */
232 	/* Process is writing, but there is no reader. Send a SIGPIPE signal.
233 	 * This should match the corresponding code in read_write().
234 	 */
235 	if (r == EPIPE && op == WRITING) {
236 		if (!(f->filp_flags & O_NOSIGPIPE))
237 			sys_kill(fp->fp_endpoint, SIGPIPE);
238 	}
239 
240 	replycode(fp->fp_endpoint, r);
241   }
242 
243   unlock_filp(f);
244 }
245 
246 /*===========================================================================*
247  *			       do_work					     *
248  *===========================================================================*/
249 static void do_work(void)
250 {
251   unsigned int call_index;
252   int error;
253 
254   if (fp->fp_pid == PID_FREE) {
255 	/* Process vanished before we were able to handle request.
256 	 * Replying has no use. Just drop it.
257 	 */
258 	return;
259   }
260 
261   memset(&job_m_out, 0, sizeof(job_m_out));
262 
263   /* At this point we assume that we're dealing with a call that has been
264    * made specifically to VFS. Typically it will be a POSIX call from a
265    * normal process, but we also handle a few calls made by drivers such
266    * such as UDS and VND through here. Call the internal function that
267    * does the work.
268    */
269   if (IS_VFS_CALL(job_call_nr)) {
270 	call_index = (unsigned int) (job_call_nr - VFS_BASE);
271 
272 	if (call_index < NR_VFS_CALLS && call_vec[call_index] != NULL) {
273 #if ENABLE_SYSCALL_STATS
274 		calls_stats[call_index]++;
275 #endif
276 		error = (*call_vec[call_index])();
277 	} else
278 		error = ENOSYS;
279   } else
280 	error = ENOSYS;
281 
282   /* Copy the results back to the user and send reply. */
283   if (error != SUSPEND) reply(&job_m_out, fp->fp_endpoint, error);
284 }
285 
286 /*===========================================================================*
287  *			       sef_local_startup			     *
288  *===========================================================================*/
289 static void sef_local_startup()
290 {
291   /* Register init callbacks. */
292   sef_setcb_init_fresh(sef_cb_init_fresh);
293   sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
294 
295   /* Let SEF perform startup. */
296   sef_startup();
297 }
298 
299 /*===========================================================================*
300  *				sef_cb_init_fresh			     *
301  *===========================================================================*/
302 static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
303 {
304 /* Initialize the virtual file server. */
305   int s, i;
306   struct fproc *rfp;
307   message mess;
308   struct rprocpub rprocpub[NR_BOOT_PROCS];
309 
310   self = NULL;
311   verbose = 0;
312 
313   /* Initialize proc endpoints to NONE */
314   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
315 	rfp->fp_endpoint = NONE;
316 	rfp->fp_pid = PID_FREE;
317   }
318 
319   /* Initialize the process table with help of the process manager messages.
320    * Expect one message for each system process with its slot number and pid.
321    * When no more processes follow, the magic process number NONE is sent.
322    * Then, stop and synchronize with the PM.
323    */
324   do {
325 	if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
326 		panic("VFS: couldn't receive from PM: %d", s);
327 
328 	if (mess.m_type != VFS_PM_INIT)
329 		panic("unexpected message from PM: %d", mess.m_type);
330 
331 	if (NONE == mess.VFS_PM_ENDPT) break;
332 
333 	rfp = &fproc[mess.VFS_PM_SLOT];
334 	rfp->fp_flags = FP_NOFLAGS;
335 	rfp->fp_pid = mess.VFS_PM_PID;
336 	rfp->fp_endpoint = mess.VFS_PM_ENDPT;
337 	rfp->fp_grant = GRANT_INVALID;
338 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
339 	rfp->fp_realuid = (uid_t) SYS_UID;
340 	rfp->fp_effuid = (uid_t) SYS_UID;
341 	rfp->fp_realgid = (gid_t) SYS_GID;
342 	rfp->fp_effgid = (gid_t) SYS_GID;
343 	rfp->fp_umask = ~0;
344   } while (TRUE);			/* continue until process NONE */
345   mess.m_type = OK;			/* tell PM that we succeeded */
346   s = ipc_send(PM_PROC_NR, &mess);		/* send synchronization message */
347 
348   system_hz = sys_hz();
349 
350   /* Subscribe to block and character driver events. */
351   s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
352   if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
353 
354   /* Initialize worker threads */
355   worker_init();
356 
357   /* Initialize global locks */
358   if (mthread_mutex_init(&bsf_lock, NULL) != 0)
359 	panic("VFS: couldn't initialize block special file lock");
360 
361   init_dmap();			/* Initialize device table. */
362 
363   /* Map all the services in the boot image. */
364   if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
365 			    (vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
366 	panic("sys_safecopyfrom failed: %d", s);
367   }
368   for (i = 0; i < NR_BOOT_PROCS; i++) {
369 	if (rprocpub[i].in_use) {
370 		if ((s = map_service(&rprocpub[i])) != OK) {
371 			panic("VFS: unable to map service: %d", s);
372 		}
373 	}
374   }
375 
376   /* Initialize locks and initial values for all processes. */
377   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
378 	if (mutex_init(&rfp->fp_lock, NULL) != 0)
379 		panic("unable to initialize fproc lock");
380 	rfp->fp_worker = NULL;
381 #if LOCK_DEBUG
382 	rfp->fp_vp_rdlocks = 0;
383 	rfp->fp_vmnt_rdlocks = 0;
384 #endif
385 
386 	/* Initialize process directories. mount_fs will set them to the
387 	 * correct values.
388 	 */
389 	for (i = 0; i < OPEN_MAX; i++)
390 		rfp->fp_filp[i] = NULL;
391 	rfp->fp_rd = NULL;
392 	rfp->fp_wd = NULL;
393   }
394 
395   init_vnodes();		/* init vnodes */
396   init_vmnts();			/* init vmnt structures */
397   init_select();		/* init select() structures */
398   init_filps();			/* Init filp structures */
399 
400   /* Mount PFS and initial file system root. */
401   worker_start(fproc_addr(VFS_PROC_NR), do_init_root, &mess /*unused*/,
402 	FALSE /*use_spare*/);
403 
404   return(OK);
405 }
406 
407 /*===========================================================================*
408  *			       do_init_root				     *
409  *===========================================================================*/
410 static void do_init_root(void)
411 {
412   char *mount_type, *mount_label;
413   int r;
414 
415   /* Disallow requests from e.g. init(8) while doing the initial mounting. */
416   worker_allow(FALSE);
417 
418   /* Mount the pipe file server. */
419   mount_pfs();
420 
421   /* Mount the root file system. */
422   mount_type = "mfs";       /* FIXME: use boot image process name instead */
423   mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
424 
425   r = mount_fs(DEV_IMGRD, "bootramdisk", "/", MFS_PROC_NR, 0, mount_type,
426 	mount_label);
427   if (r != OK)
428 	panic("Failed to initialize root");
429 
430   /* All done with mounting, allow requests now. */
431   worker_allow(TRUE);
432 }
433 
434 /*===========================================================================*
435  *				lock_proc				     *
436  *===========================================================================*/
437 void lock_proc(struct fproc *rfp)
438 {
439   int r;
440   struct worker_thread *org_self;
441 
442   r = mutex_trylock(&rfp->fp_lock);
443   if (r == 0) return;
444 
445   org_self = worker_suspend();
446 
447   if ((r = mutex_lock(&rfp->fp_lock)) != 0)
448 	panic("unable to lock fproc lock: %d", r);
449 
450   worker_resume(org_self);
451 }
452 
453 /*===========================================================================*
454  *				unlock_proc				     *
455  *===========================================================================*/
456 void unlock_proc(struct fproc *rfp)
457 {
458   int r;
459 
460   if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
461 	panic("Failed to unlock: %d", r);
462 }
463 
464 /*===========================================================================*
465  *				thread_cleanup				     *
466  *===========================================================================*/
467 void thread_cleanup(void)
468 {
469 /* Perform cleanup actions for a worker thread. */
470 
471 #if LOCK_DEBUG
472   check_filp_locks_by_me();
473   check_vnode_locks_by_me(fp);
474   check_vmnt_locks_by_me(fp);
475 #endif
476 
477   if (fp->fp_flags & FP_SRV_PROC) {
478 	struct vmnt *vmp;
479 
480 	if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL) {
481 		vmp->m_flags &= ~VMNT_CALLBACK;
482 	}
483   }
484 }
485 
486 /*===========================================================================*
487  *				get_work				     *
488  *===========================================================================*/
489 static int get_work(void)
490 {
491   /* Normally wait for new input.  However, if 'reviving' is nonzero, a
492    * suspended process must be awakened.  Return TRUE if there is a message to
493    * process (usually newly received, but possibly a resumed request), or FALSE
494    * if a thread for other activities has been spawned instead.
495    */
496   int r, proc_p;
497   register struct fproc *rp;
498 
499   if (reviving != 0) {
500 	/* Find a suspended process. */
501 	for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
502 		if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED))
503 			return unblock(rp); /* So main loop can process job */
504 
505 	panic("VFS: get_work couldn't revive anyone");
506   }
507 
508   for(;;) {
509 	/* Normal case.  No one to revive. Get a useful request. */
510 	if ((r = sef_receive(ANY, &m_in)) != OK) {
511 		panic("VFS: sef_receive error: %d", r);
512 	}
513 
514 	proc_p = _ENDPOINT_P(m_in.m_source);
515 	if (proc_p < 0 || proc_p >= NR_PROCS) fp = NULL;
516 	else fp = &fproc[proc_p];
517 
518 	/* Negative who_p is never used to access the fproc array. Negative
519 	 * numbers (kernel tasks) are treated in a special way.
520 	 */
521 	if (fp && fp->fp_endpoint == NONE) {
522 		printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
523 			m_in.m_source, who_p, m_in.m_type);
524 		continue;
525 	}
526 
527 	/* Internal consistency check; our mental image of process numbers and
528 	 * endpoints must match with how the rest of the system thinks of them.
529 	 */
530 	if (fp && fp->fp_endpoint != who_e) {
531 		if (fproc[who_p].fp_endpoint == NONE)
532 			printf("slot unknown even\n");
533 
534 		panic("VFS: receive endpoint inconsistent (source %d, who_p "
535 			"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
536 			fproc[who_p].fp_endpoint, who_e);
537 	}
538 
539 	return TRUE;
540   }
541   /* NOTREACHED */
542 }
543 
544 /*===========================================================================*
545  *				reply					     *
546  *===========================================================================*/
547 static void reply(message *m_out, endpoint_t whom, int result)
548 {
549 /* Send a reply to a user process.  If the send fails, just ignore it. */
550   int r;
551 
552   m_out->m_type = result;
553   r = ipc_sendnb(whom, m_out);
554   if (r != OK) {
555 	printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
556 		result, whom, r);
557 	util_stacktrace();
558   }
559 }
560 
561 /*===========================================================================*
562  *				replycode				     *
563  *===========================================================================*/
564 void replycode(endpoint_t whom, int result)
565 {
566 /* Send a reply to a user process.  If the send fails, just ignore it. */
567   message m_out;
568 
569   memset(&m_out, 0, sizeof(m_out));
570 
571   reply(&m_out, whom, result);
572 }
573 
574 /*===========================================================================*
575  *				service_pm_postponed			     *
576  *===========================================================================*/
577 void service_pm_postponed(void)
578 {
579   int r, term_signal;
580   vir_bytes core_path;
581   vir_bytes exec_path, stack_frame, pc, newsp, ps_str;
582   size_t exec_path_len, stack_frame_len;
583   endpoint_t proc_e;
584   message m_out;
585 
586   memset(&m_out, 0, sizeof(m_out));
587 
588   switch(job_call_nr) {
589   case VFS_PM_EXEC:
590 	proc_e = job_m_in.VFS_PM_ENDPT;
591 	exec_path = (vir_bytes) job_m_in.VFS_PM_PATH;
592 	exec_path_len = (size_t) job_m_in.VFS_PM_PATH_LEN;
593 	stack_frame = (vir_bytes) job_m_in.VFS_PM_FRAME;
594 	stack_frame_len = (size_t) job_m_in.VFS_PM_FRAME_LEN;
595 	ps_str = (vir_bytes) job_m_in.VFS_PM_PS_STR;
596 
597 	assert(proc_e == fp->fp_endpoint);
598 
599 	r = pm_exec(exec_path, exec_path_len, stack_frame, stack_frame_len,
600 		&pc, &newsp, &ps_str);
601 
602 	/* Reply status to PM */
603 	m_out.m_type = VFS_PM_EXEC_REPLY;
604 	m_out.VFS_PM_ENDPT = proc_e;
605 	m_out.VFS_PM_PC = (void *) pc;
606 	m_out.VFS_PM_STATUS = r;
607 	m_out.VFS_PM_NEWSP = (void *) newsp;
608 	m_out.VFS_PM_NEWPS_STR = ps_str;
609 
610 	break;
611 
612   case VFS_PM_EXIT:
613 	proc_e = job_m_in.VFS_PM_ENDPT;
614 
615 	assert(proc_e == fp->fp_endpoint);
616 
617 	pm_exit();
618 
619 	/* Reply dummy status to PM for synchronization */
620 	m_out.m_type = VFS_PM_EXIT_REPLY;
621 	m_out.VFS_PM_ENDPT = proc_e;
622 
623 	break;
624 
625   case VFS_PM_DUMPCORE:
626 	proc_e = job_m_in.VFS_PM_ENDPT;
627 	term_signal = job_m_in.VFS_PM_TERM_SIG;
628 	core_path = (vir_bytes) job_m_in.VFS_PM_PATH;
629 
630 	/* A zero signal used to indicate that a coredump should be generated
631 	 * without terminating the target process, but this was broken in so
632 	 * many ways that we no longer support this. Userland should implement
633 	 * this functionality itself, for example through ptrace(2).
634 	 */
635 	if (term_signal == 0)
636 		panic("no termination signal given for coredump!");
637 
638 	assert(proc_e == fp->fp_endpoint);
639 
640 	r = pm_dumpcore(term_signal, core_path);
641 
642 	/* Reply status to PM */
643 	m_out.m_type = VFS_PM_CORE_REPLY;
644 	m_out.VFS_PM_ENDPT = proc_e;
645 	m_out.VFS_PM_STATUS = r;
646 
647 	break;
648 
649   case VFS_PM_UNPAUSE:
650 	proc_e = job_m_in.VFS_PM_ENDPT;
651 
652 	assert(proc_e == fp->fp_endpoint);
653 
654 	unpause();
655 
656 	m_out.m_type = VFS_PM_UNPAUSE_REPLY;
657 	m_out.VFS_PM_ENDPT = proc_e;
658 
659 	break;
660 
661   default:
662 	panic("Unhandled postponed PM call %d", job_m_in.m_type);
663   }
664 
665   r = ipc_send(PM_PROC_NR, &m_out);
666   if (r != OK)
667 	panic("service_pm_postponed: ipc_send failed: %d", r);
668 }
669 
670 /*===========================================================================*
671  *				service_pm				     *
672  *===========================================================================*/
673 static void service_pm(void)
674 {
675 /* Process a request from PM. This function is called from the main thread, and
676  * may therefore not block. Any requests that may require blocking the calling
677  * thread must be executed in a separate thread. Aside from VFS_PM_REBOOT, all
678  * requests from PM involve another, target process: for example, PM tells VFS
679  * that a process is performing a setuid() call. For some requests however,
680  * that other process may not be idle, and in that case VFS must serialize the
681  * PM request handling with any operation is it handling for that target
682  * process. As it happens, the requests that may require blocking are also the
683  * ones where the target process may not be idle. For both these reasons, such
684  * requests are run in worker threads associated to the target process.
685  */
686   struct fproc *rfp;
687   int r, slot;
688   message m_out;
689 
690   memset(&m_out, 0, sizeof(m_out));
691 
692   switch (call_nr) {
693   case VFS_PM_SETUID:
694 	{
695 		endpoint_t proc_e;
696 		uid_t euid, ruid;
697 
698 		proc_e = m_in.VFS_PM_ENDPT;
699 		euid = m_in.VFS_PM_EID;
700 		ruid = m_in.VFS_PM_RID;
701 
702 		pm_setuid(proc_e, euid, ruid);
703 
704 		m_out.m_type = VFS_PM_SETUID_REPLY;
705 		m_out.VFS_PM_ENDPT = proc_e;
706 	}
707 	break;
708 
709   case VFS_PM_SETGID:
710 	{
711 		endpoint_t proc_e;
712 		gid_t egid, rgid;
713 
714 		proc_e = m_in.VFS_PM_ENDPT;
715 		egid = m_in.VFS_PM_EID;
716 		rgid = m_in.VFS_PM_RID;
717 
718 		pm_setgid(proc_e, egid, rgid);
719 
720 		m_out.m_type = VFS_PM_SETGID_REPLY;
721 		m_out.VFS_PM_ENDPT = proc_e;
722 	}
723 	break;
724 
725   case VFS_PM_SETSID:
726 	{
727 		endpoint_t proc_e;
728 
729 		proc_e = m_in.VFS_PM_ENDPT;
730 		pm_setsid(proc_e);
731 
732 		m_out.m_type = VFS_PM_SETSID_REPLY;
733 		m_out.VFS_PM_ENDPT = proc_e;
734 	}
735 	break;
736 
737   case VFS_PM_EXEC:
738   case VFS_PM_EXIT:
739   case VFS_PM_DUMPCORE:
740   case VFS_PM_UNPAUSE:
741 	{
742 		endpoint_t proc_e = m_in.VFS_PM_ENDPT;
743 
744 		if(isokendpt(proc_e, &slot) != OK) {
745 			printf("VFS: proc ep %d not ok\n", proc_e);
746 			return;
747 		}
748 
749 		rfp = &fproc[slot];
750 
751 		/* PM requests on behalf of a proc are handled after the
752 		 * system call that might be in progress for that proc has
753 		 * finished. If the proc is not busy, we start a new thread.
754 		 */
755 		worker_start(rfp, NULL, &m_in, FALSE /*use_spare*/);
756 
757 		return;
758 	}
759   case VFS_PM_FORK:
760   case VFS_PM_SRV_FORK:
761 	{
762 		endpoint_t pproc_e, proc_e;
763 		pid_t child_pid;
764 		uid_t reuid;
765 		gid_t regid;
766 
767 		pproc_e = m_in.VFS_PM_PENDPT;
768 		proc_e = m_in.VFS_PM_ENDPT;
769 		child_pid = m_in.VFS_PM_CPID;
770 		reuid = m_in.VFS_PM_REUID;
771 		regid = m_in.VFS_PM_REGID;
772 
773 		pm_fork(pproc_e, proc_e, child_pid);
774 		m_out.m_type = VFS_PM_FORK_REPLY;
775 
776 		if (call_nr == VFS_PM_SRV_FORK) {
777 			m_out.m_type = VFS_PM_SRV_FORK_REPLY;
778 			pm_setuid(proc_e, reuid, reuid);
779 			pm_setgid(proc_e, regid, regid);
780 		}
781 
782 		m_out.VFS_PM_ENDPT = proc_e;
783 	}
784 	break;
785   case VFS_PM_SETGROUPS:
786 	{
787 		endpoint_t proc_e;
788 		int group_no;
789 		gid_t *group_addr;
790 
791 		proc_e = m_in.VFS_PM_ENDPT;
792 		group_no = m_in.VFS_PM_GROUP_NO;
793 		group_addr = (gid_t *) m_in.VFS_PM_GROUP_ADDR;
794 
795 		pm_setgroups(proc_e, group_no, group_addr);
796 
797 		m_out.m_type = VFS_PM_SETGROUPS_REPLY;
798 		m_out.VFS_PM_ENDPT = proc_e;
799 	}
800 	break;
801 
802   case VFS_PM_REBOOT:
803 	/* Reboot requests are not considered postponed PM work and are instead
804 	 * handled from a separate worker thread that is associated with PM's
805 	 * process. PM makes no regular VFS calls, and thus, from VFS's
806 	 * perspective, PM is always idle. Therefore, we can safely do this.
807 	 * We do assume that PM sends us only one VFS_PM_REBOOT message at
808 	 * once, or ever for that matter. :)
809 	 */
810 	worker_start(fproc_addr(PM_PROC_NR), pm_reboot, &m_in,
811 		FALSE /*use_spare*/);
812 
813 	return;
814 
815     default:
816 	printf("VFS: don't know how to handle PM request %d\n", call_nr);
817 
818 	return;
819   }
820 
821   r = ipc_send(PM_PROC_NR, &m_out);
822   if (r != OK)
823 	panic("service_pm: ipc_send failed: %d", r);
824 }
825 
826 
827 /*===========================================================================*
828  *				unblock					     *
829  *===========================================================================*/
830 static int unblock(rfp)
831 struct fproc *rfp;
832 {
833 /* Unblock a process that was previously blocked on a pipe or a lock.  This is
834  * done by reconstructing the original request and continuing/repeating it.
835  * This function returns TRUE when it has restored a request for execution, and
836  * FALSE if the caller should continue looking for work to do.
837  */
838   int blocked_on;
839 
840   blocked_on = rfp->fp_blocked_on;
841 
842   /* Reconstruct the original request from the saved data. */
843   memset(&m_in, 0, sizeof(m_in));
844   m_in.m_source = rfp->fp_endpoint;
845   m_in.m_type = rfp->fp_block_callnr;
846   switch (m_in.m_type) {
847   case VFS_READ:
848   case VFS_WRITE:
849 	assert(blocked_on == FP_BLOCKED_ON_PIPE);
850 	m_in.m_lc_vfs_readwrite.fd = rfp->fp_fd;
851 	m_in.m_lc_vfs_readwrite.buf = rfp->fp_io_buffer;
852 	m_in.m_lc_vfs_readwrite.len = rfp->fp_io_nbytes;
853 	break;
854   case VFS_FCNTL:
855 	assert(blocked_on == FP_BLOCKED_ON_LOCK);
856 	m_in.m_lc_vfs_fcntl.fd = rfp->fp_fd;
857 	m_in.m_lc_vfs_fcntl.cmd = rfp->fp_io_nbytes;
858 	m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_io_buffer;
859 	assert(m_in.m_lc_vfs_fcntl.cmd == F_SETLKW);
860 	break;
861   default:
862 	panic("unblocking call %d blocked on %d ??", m_in.m_type, blocked_on);
863   }
864 
865   rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
866   rfp->fp_flags &= ~FP_REVIVED;
867   reviving--;
868   assert(reviving >= 0);
869 
870   /* This should not be device I/O. If it is, it'll 'leak' grants. */
871   assert(!GRANT_VALID(rfp->fp_grant));
872 
873   /* Pending pipe reads/writes cannot be repeated as is, and thus require a
874    * special resumption procedure.
875    */
876   if (blocked_on == FP_BLOCKED_ON_PIPE) {
877 	worker_start(rfp, do_pending_pipe, &m_in, FALSE /*use_spare*/);
878 	return(FALSE);	/* Retrieve more work */
879   }
880 
881   /* A lock request. Repeat the original request as though it just came in. */
882   fp = rfp;
883   return(TRUE);	/* We've unblocked a process */
884 }
885