xref: /minix3/minix/servers/vfs/main.c (revision dd41186aac5f9c05e657f127b7e5d33f375d1686)
1 /*
2  * a loop that gets messages requesting work, carries out the work, and sends
3  * replies.
4  *
5  * The entry points into this file are:
6  *   main:	main program of the Virtual File System
7  *   reply:	send a reply to a process after the requested work is done
8  *
9  */
10 
11 #include "fs.h"
12 #include <fcntl.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <signal.h>
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <sys/ioc_memory.h>
19 #include <sys/svrctl.h>
20 #include <sys/select.h>
21 #include <minix/callnr.h>
22 #include <minix/com.h>
23 #include <minix/const.h>
24 #include <minix/endpoint.h>
25 #include <minix/safecopies.h>
26 #include <minix/debug.h>
27 #include <minix/vfsif.h>
28 #include "file.h"
29 #include "vmnt.h"
30 #include "vnode.h"
31 
32 #if ENABLE_SYSCALL_STATS
33 EXTERN unsigned long calls_stats[NR_VFS_CALLS];
34 #endif
35 
36 /* Thread related prototypes */
37 static void do_reply(struct worker_thread *wp);
38 static void do_work(void);
39 static void do_init_root(void);
40 static void handle_work(void (*func)(void));
41 static void reply(message *m_out, endpoint_t whom, int result);
42 
43 static int get_work(void);
44 static void service_pm(void);
45 static int unblock(struct fproc *rfp);
46 
47 /* SEF functions and variables. */
48 static void sef_local_startup(void);
49 static int sef_cb_init_fresh(int type, sef_init_info_t *info);
50 static int sef_cb_init_lu(int type, sef_init_info_t *info);
51 
52 /*===========================================================================*
53  *				main					     *
54  *===========================================================================*/
55 int main(void)
56 {
57 /* This is the main program of the file system.  The main loop consists of
58  * three major activities: getting new work, processing the work, and sending
59  * the reply.  This loop never terminates as long as the file system runs.
60  */
61   int transid;
62   struct worker_thread *wp;
63 
64   /* SEF local startup. */
65   sef_local_startup();
66 
67   printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
68 
69   /* This is the main loop that gets work, processes it, and sends replies. */
70   while (TRUE) {
71 	worker_yield();	/* let other threads run */
72 
73 	send_work();
74 
75 	/* The get_work() function returns TRUE if we have a new message to
76 	 * process. It returns FALSE if it spawned other thread activities.
77 	 */
78 	if (!get_work())
79 		continue;
80 
81 	transid = TRNS_GET_ID(m_in.m_type);
82 	if (IS_VFS_FS_TRANSID(transid)) {
83 		wp = worker_get((thread_t) transid - VFS_TRANSID);
84 		if (wp == NULL || wp->w_fp == NULL) {
85 			printf("VFS: spurious message %d from endpoint %d\n",
86 				m_in.m_type, m_in.m_source);
87 			continue;
88 		}
89 		m_in.m_type = TRNS_DEL_ID(m_in.m_type);
90 		do_reply(wp);
91 		continue;
92 	} else if (who_e == PM_PROC_NR) { /* Calls from PM */
93 		/* Special control messages from PM */
94 		service_pm();
95 		continue;
96 	} else if (is_notify(call_nr)) {
97 		/* A task ipc_notify()ed us */
98 		switch (who_e) {
99 		case DS_PROC_NR:
100 			/* Start a thread to handle DS events, if no thread
101 			 * is pending or active for it already. DS is not
102 			 * supposed to issue calls to VFS or be the subject of
103 			 * postponed PM requests, so this should be no problem.
104 			 */
105 			if (worker_can_start(fp))
106 				handle_work(ds_event);
107 			break;
108 		case KERNEL:
109 			mthread_stacktraces();
110 			break;
111 		case CLOCK:
112 			/* Timer expired. Used only for select(). Check it. */
113 			expire_timers(m_in.m_notify.timestamp);
114 			break;
115 		default:
116 			printf("VFS: ignoring notification from %d\n", who_e);
117 		}
118 		continue;
119 	} else if (who_p < 0) { /* i.e., message comes from a task */
120 		/* We're going to ignore this message. Tasks should
121 		 * send ipc_notify()s only.
122 		 */
123 		 printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
124 		 continue;
125 	}
126 
127 	if (IS_BDEV_RS(call_nr)) {
128 		/* We've got results for a block device request. */
129 		bdev_reply();
130 	} else if (IS_CDEV_RS(call_nr)) {
131 		/* We've got results for a character device request. */
132 		cdev_reply();
133 	} else {
134 		/* Normal syscall. This spawns a new thread. */
135 		handle_work(do_work);
136 	}
137   }
138   return(OK);				/* shouldn't come here */
139 }
140 
141 /*===========================================================================*
142  *			       handle_work				     *
143  *===========================================================================*/
144 static void handle_work(void (*func)(void))
145 {
146 /* Handle asynchronous device replies and new system calls. If the originating
147  * endpoint is an FS endpoint, take extra care not to get in deadlock. */
148   struct vmnt *vmp = NULL;
149   endpoint_t proc_e;
150   int use_spare = FALSE;
151 
152   proc_e = m_in.m_source;
153 
154   if (fp->fp_flags & FP_SRV_PROC) {
155 	vmp = find_vmnt(proc_e);
156 	if (vmp != NULL) {
157 		/* A callback from an FS endpoint. Can do only one at once. */
158 		if (vmp->m_flags & VMNT_CALLBACK) {
159 			replycode(proc_e, EAGAIN);
160 			return;
161 		}
162 		/* Already trying to resolve a deadlock? Can't handle more. */
163 		if (worker_available() == 0) {
164 			replycode(proc_e, EAGAIN);
165 			return;
166 		}
167 		/* A thread is available. Set callback flag. */
168 		vmp->m_flags |= VMNT_CALLBACK;
169 		if (vmp->m_flags & VMNT_MOUNTING) {
170 			vmp->m_flags |= VMNT_FORCEROOTBSF;
171 		}
172 	}
173 
174 	/* Use the spare thread to handle this request if needed. */
175 	use_spare = TRUE;
176   }
177 
178   worker_start(fp, func, &m_in, use_spare);
179 }
180 
181 
182 /*===========================================================================*
183  *			       do_reply				             *
184  *===========================================================================*/
185 static void do_reply(struct worker_thread *wp)
186 {
187   struct vmnt *vmp = NULL;
188 
189   if(who_e != VM_PROC_NR && (vmp = find_vmnt(who_e)) == NULL)
190 	panic("Couldn't find vmnt for endpoint %d", who_e);
191 
192   if (wp->w_task != who_e) {
193 	printf("VFS: tid %d: expected %d to reply, not %d\n",
194 		wp->w_tid, wp->w_task, who_e);
195 	return;
196   }
197   /* It should be impossible to trigger the following case, but it is here for
198    * consistency reasons: worker_stop() resets w_sendrec but not w_task.
199    */
200   if (wp->w_sendrec == NULL) {
201 	printf("VFS: tid %d: late reply from %d ignored\n", wp->w_tid, who_e);
202 	return;
203   }
204   *wp->w_sendrec = m_in;
205   wp->w_sendrec = NULL;
206   wp->w_task = NONE;
207   if(vmp) vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
208   worker_signal(wp); /* Continue this thread */
209 }
210 
211 /*===========================================================================*
212  *			       do_pending_pipe				     *
213  *===========================================================================*/
214 static void do_pending_pipe(void)
215 {
216   vir_bytes buf;
217   size_t nbytes, cum_io;
218   int r, op, fd;
219   struct filp *f;
220   tll_access_t locktype;
221 
222   assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
223 
224   /*
225    * We take all our needed resumption state from the m_in message, which is
226    * filled by unblock().  Since this is an internal resumption, there is no
227    * need to perform extensive checks on the message fields.
228    */
229   fd = job_m_in.m_lc_vfs_readwrite.fd;
230   buf = job_m_in.m_lc_vfs_readwrite.buf;
231   nbytes = job_m_in.m_lc_vfs_readwrite.len;
232   cum_io = job_m_in.m_lc_vfs_readwrite.cum_io;
233 
234   f = fp->fp_filp[fd];
235   assert(f != NULL);
236 
237   locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE;
238   op = (job_call_nr == VFS_READ) ? READING : WRITING;
239   lock_filp(f, locktype);
240 
241   r = rw_pipe(op, who_e, f, job_call_nr, fd, buf, nbytes, cum_io);
242 
243   if (r != SUSPEND) { /* Do we have results to report? */
244 	/* Process is writing, but there is no reader. Send a SIGPIPE signal.
245 	 * This should match the corresponding code in read_write().
246 	 */
247 	if (r == EPIPE && op == WRITING) {
248 		if (!(f->filp_flags & O_NOSIGPIPE))
249 			sys_kill(fp->fp_endpoint, SIGPIPE);
250 	}
251 
252 	replycode(fp->fp_endpoint, r);
253   }
254 
255   unlock_filp(f);
256 }
257 
258 /*===========================================================================*
259  *			       do_work					     *
260  *===========================================================================*/
261 static void do_work(void)
262 {
263   unsigned int call_index;
264   int error;
265 
266   if (fp->fp_pid == PID_FREE) {
267 	/* Process vanished before we were able to handle request.
268 	 * Replying has no use. Just drop it.
269 	 */
270 	return;
271   }
272 
273   memset(&job_m_out, 0, sizeof(job_m_out));
274 
275   /* At this point we assume that we're dealing with a call that has been
276    * made specifically to VFS. Typically it will be a POSIX call from a
277    * normal process, but we also handle a few calls made by drivers such
278    * such as UDS and VND through here. Call the internal function that
279    * does the work.
280    */
281   if (IS_VFS_CALL(job_call_nr)) {
282 	call_index = (unsigned int) (job_call_nr - VFS_BASE);
283 
284 	if (call_index < NR_VFS_CALLS && call_vec[call_index] != NULL) {
285 #if ENABLE_SYSCALL_STATS
286 		calls_stats[call_index]++;
287 #endif
288 		error = (*call_vec[call_index])();
289 	} else
290 		error = ENOSYS;
291   } else
292 	error = ENOSYS;
293 
294   /* Copy the results back to the user and send reply. */
295   if (error != SUSPEND) reply(&job_m_out, fp->fp_endpoint, error);
296 }
297 
298 /*===========================================================================*
299  *				sef_cb_lu_prepare			     *
300  *===========================================================================*/
301 static int sef_cb_lu_prepare(int state)
302 {
303 /* This function is called to decide whether we can enter the given live
304  * update state, and to prepare for such an update. If we are requested to
305  * update to a request-free or protocol-free state, make sure there is no work
306  * pending or being processed, and shut down all worker threads.
307  */
308 
309   switch (state) {
310   case SEF_LU_STATE_REQUEST_FREE:
311   case SEF_LU_STATE_PROTOCOL_FREE:
312 	if (!worker_idle()) {
313 		printf("VFS: worker threads not idle, blocking update\n");
314 		break;
315 	}
316 
317 	worker_cleanup();
318 
319 	return OK;
320   }
321 
322   return ENOTREADY;
323 }
324 
325 /*===========================================================================*
326  *			       sef_cb_lu_state_changed			     *
327  *===========================================================================*/
328 static void sef_cb_lu_state_changed(int old_state, int state)
329 {
330 /* Worker threads (especially their stacks) pose a serious problem for state
331  * transfer during live update, and therefore, we shut down all worker threads
332  * during live update and restart them afterwards. This function is called in
333  * the old VFS instance when the state changed. We use it to restart worker
334  * threads after a failed live update.
335  */
336 
337   if (state != SEF_LU_STATE_NULL)
338 	return;
339 
340   switch (old_state) {
341   case SEF_LU_STATE_REQUEST_FREE:
342   case SEF_LU_STATE_PROTOCOL_FREE:
343 	worker_init();
344   }
345 }
346 
347 /*===========================================================================*
348  *				sef_cb_init_lu				     *
349  *===========================================================================*/
350 static int sef_cb_init_lu(int type, sef_init_info_t *info)
351 {
352 /* This function is called in the new VFS instance during a live update. */
353   int r;
354 
355   /* Perform regular state transfer. */
356   if ((r = SEF_CB_INIT_LU_DEFAULT(type, info)) != OK)
357 	return r;
358 
359   /* Recreate worker threads, if necessary. */
360   switch (info->prepare_state) {
361   case SEF_LU_STATE_REQUEST_FREE:
362   case SEF_LU_STATE_PROTOCOL_FREE:
363 	worker_init();
364   }
365 
366   return OK;
367 }
368 
369 /*===========================================================================*
370  *			       sef_local_startup			     *
371  *===========================================================================*/
372 static void sef_local_startup(void)
373 {
374   /* Register init callbacks. */
375   sef_setcb_init_fresh(sef_cb_init_fresh);
376   sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
377 
378   /* Register live update callbacks. */
379   sef_setcb_init_lu(sef_cb_init_lu);
380   sef_setcb_lu_prepare(sef_cb_lu_prepare);
381   sef_setcb_lu_state_changed(sef_cb_lu_state_changed);
382   sef_setcb_lu_state_isvalid(sef_cb_lu_state_isvalid_standard);
383 
384   /* Let SEF perform startup. */
385   sef_startup();
386 }
387 
388 /*===========================================================================*
389  *				sef_cb_init_fresh			     *
390  *===========================================================================*/
391 static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
392 {
393 /* Initialize the virtual file server. */
394   int s, i;
395   struct fproc *rfp;
396   message mess;
397   struct rprocpub rprocpub[NR_BOOT_PROCS];
398 
399   self = NULL;
400   verbose = 0;
401 
402   /* Initialize proc endpoints to NONE */
403   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
404 	rfp->fp_endpoint = NONE;
405 	rfp->fp_pid = PID_FREE;
406   }
407 
408   /* Initialize the process table with help of the process manager messages.
409    * Expect one message for each system process with its slot number and pid.
410    * When no more processes follow, the magic process number NONE is sent.
411    * Then, stop and synchronize with the PM.
412    */
413   do {
414 	if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
415 		panic("VFS: couldn't receive from PM: %d", s);
416 
417 	if (mess.m_type != VFS_PM_INIT)
418 		panic("unexpected message from PM: %d", mess.m_type);
419 
420 	if (NONE == mess.VFS_PM_ENDPT) break;
421 
422 	rfp = &fproc[mess.VFS_PM_SLOT];
423 	rfp->fp_flags = FP_NOFLAGS;
424 	rfp->fp_pid = mess.VFS_PM_PID;
425 	rfp->fp_endpoint = mess.VFS_PM_ENDPT;
426 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
427 	rfp->fp_realuid = (uid_t) SYS_UID;
428 	rfp->fp_effuid = (uid_t) SYS_UID;
429 	rfp->fp_realgid = (gid_t) SYS_GID;
430 	rfp->fp_effgid = (gid_t) SYS_GID;
431 	rfp->fp_umask = ~0;
432   } while (TRUE);			/* continue until process NONE */
433   mess.m_type = OK;			/* tell PM that we succeeded */
434   s = ipc_send(PM_PROC_NR, &mess);		/* send synchronization message */
435 
436   system_hz = sys_hz();
437 
438   /* Subscribe to block and character driver events. */
439   s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
440   if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
441 
442   /* Initialize worker threads */
443   worker_init();
444 
445   /* Initialize global locks */
446   if (mthread_mutex_init(&bsf_lock, NULL) != 0)
447 	panic("VFS: couldn't initialize block special file lock");
448 
449   init_dmap();			/* Initialize device table. */
450 
451   /* Map all the services in the boot image. */
452   if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
453 			    (vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
454 	panic("sys_safecopyfrom failed: %d", s);
455   }
456   for (i = 0; i < NR_BOOT_PROCS; i++) {
457 	if (rprocpub[i].in_use) {
458 		if ((s = map_service(&rprocpub[i])) != OK) {
459 			panic("VFS: unable to map service: %d", s);
460 		}
461 	}
462   }
463 
464   /* Initialize locks and initial values for all processes. */
465   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
466 	if (mutex_init(&rfp->fp_lock, NULL) != 0)
467 		panic("unable to initialize fproc lock");
468 	rfp->fp_worker = NULL;
469 #if LOCK_DEBUG
470 	rfp->fp_vp_rdlocks = 0;
471 	rfp->fp_vmnt_rdlocks = 0;
472 #endif
473 
474 	/* Initialize process directories. mount_fs will set them to the
475 	 * correct values.
476 	 */
477 	for (i = 0; i < OPEN_MAX; i++)
478 		rfp->fp_filp[i] = NULL;
479 	rfp->fp_rd = NULL;
480 	rfp->fp_wd = NULL;
481   }
482 
483   init_vnodes();		/* init vnodes */
484   init_vmnts();			/* init vmnt structures */
485   init_select();		/* init select() structures */
486   init_filps();			/* Init filp structures */
487 
488   /* Mount PFS and initial file system root. */
489   worker_start(fproc_addr(VFS_PROC_NR), do_init_root, &mess /*unused*/,
490 	FALSE /*use_spare*/);
491 
492   return(OK);
493 }
494 
495 /*===========================================================================*
496  *			       do_init_root				     *
497  *===========================================================================*/
498 static void do_init_root(void)
499 {
500   char *mount_type, *mount_label;
501   int r;
502 
503   /* Disallow requests from e.g. init(8) while doing the initial mounting. */
504   worker_allow(FALSE);
505 
506   /* Mount the pipe file server. */
507   mount_pfs();
508 
509   /* Mount the root file system. */
510   mount_type = "mfs";       /* FIXME: use boot image process name instead */
511   mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
512 
513   r = mount_fs(DEV_IMGRD, "bootramdisk", "/", MFS_PROC_NR, 0, mount_type,
514 	mount_label);
515   if (r != OK)
516 	panic("Failed to initialize root");
517 
518   /* All done with mounting, allow requests now. */
519   worker_allow(TRUE);
520 }
521 
522 /*===========================================================================*
523  *				lock_proc				     *
524  *===========================================================================*/
525 void lock_proc(struct fproc *rfp)
526 {
527   int r;
528   struct worker_thread *org_self;
529 
530   r = mutex_trylock(&rfp->fp_lock);
531   if (r == 0) return;
532 
533   org_self = worker_suspend();
534 
535   if ((r = mutex_lock(&rfp->fp_lock)) != 0)
536 	panic("unable to lock fproc lock: %d", r);
537 
538   worker_resume(org_self);
539 }
540 
541 /*===========================================================================*
542  *				unlock_proc				     *
543  *===========================================================================*/
544 void unlock_proc(struct fproc *rfp)
545 {
546   int r;
547 
548   if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
549 	panic("Failed to unlock: %d", r);
550 }
551 
552 /*===========================================================================*
553  *				thread_cleanup				     *
554  *===========================================================================*/
555 void thread_cleanup(void)
556 {
557 /* Perform cleanup actions for a worker thread. */
558 
559 #if LOCK_DEBUG
560   check_filp_locks_by_me();
561   check_vnode_locks_by_me(fp);
562   check_vmnt_locks_by_me(fp);
563 #endif
564 
565   if (fp->fp_flags & FP_SRV_PROC) {
566 	struct vmnt *vmp;
567 
568 	if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL) {
569 		vmp->m_flags &= ~VMNT_CALLBACK;
570 	}
571   }
572 }
573 
574 /*===========================================================================*
575  *				get_work				     *
576  *===========================================================================*/
577 static int get_work(void)
578 {
579   /* Normally wait for new input.  However, if 'reviving' is nonzero, a
580    * suspended process must be awakened.  Return TRUE if there is a message to
581    * process (usually newly received, but possibly a resumed request), or FALSE
582    * if a thread for other activities has been spawned instead.
583    */
584   int r, proc_p;
585   register struct fproc *rp;
586 
587   if (reviving != 0) {
588 	/* Find a suspended process. */
589 	for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
590 		if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED))
591 			return unblock(rp); /* So main loop can process job */
592 
593 	panic("VFS: get_work couldn't revive anyone");
594   }
595 
596   for(;;) {
597 	/* Normal case.  No one to revive. Get a useful request. */
598 	if ((r = sef_receive(ANY, &m_in)) != OK) {
599 		panic("VFS: sef_receive error: %d", r);
600 	}
601 
602 	proc_p = _ENDPOINT_P(m_in.m_source);
603 	if (proc_p < 0 || proc_p >= NR_PROCS) fp = NULL;
604 	else fp = &fproc[proc_p];
605 
606 	/* Negative who_p is never used to access the fproc array. Negative
607 	 * numbers (kernel tasks) are treated in a special way.
608 	 */
609 	if (fp && fp->fp_endpoint == NONE) {
610 		printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
611 			m_in.m_source, who_p, m_in.m_type);
612 		continue;
613 	}
614 
615 	/* Internal consistency check; our mental image of process numbers and
616 	 * endpoints must match with how the rest of the system thinks of them.
617 	 */
618 	if (fp && fp->fp_endpoint != who_e) {
619 		if (fproc[who_p].fp_endpoint == NONE)
620 			printf("slot unknown even\n");
621 
622 		panic("VFS: receive endpoint inconsistent (source %d, who_p "
623 			"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
624 			fproc[who_p].fp_endpoint, who_e);
625 	}
626 
627 	return TRUE;
628   }
629   /* NOTREACHED */
630 }
631 
632 /*===========================================================================*
633  *				reply					     *
634  *===========================================================================*/
635 static void reply(message *m_out, endpoint_t whom, int result)
636 {
637 /* Send a reply to a user process.  If the send fails, just ignore it. */
638   int r;
639 
640   m_out->m_type = result;
641   r = ipc_sendnb(whom, m_out);
642   if (r != OK) {
643 	printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
644 		result, whom, r);
645 	util_stacktrace();
646   }
647 }
648 
649 /*===========================================================================*
650  *				replycode				     *
651  *===========================================================================*/
652 void replycode(endpoint_t whom, int result)
653 {
654 /* Send a reply to a user process.  If the send fails, just ignore it. */
655   message m_out;
656 
657   memset(&m_out, 0, sizeof(m_out));
658 
659   reply(&m_out, whom, result);
660 }
661 
662 /*===========================================================================*
663  *				service_pm_postponed			     *
664  *===========================================================================*/
665 void service_pm_postponed(void)
666 {
667   int r, term_signal;
668   vir_bytes core_path;
669   vir_bytes exec_path, stack_frame, pc, newsp, ps_str;
670   size_t exec_path_len, stack_frame_len;
671   endpoint_t proc_e;
672   message m_out;
673 
674   memset(&m_out, 0, sizeof(m_out));
675 
676   switch(job_call_nr) {
677   case VFS_PM_EXEC:
678 	proc_e = job_m_in.VFS_PM_ENDPT;
679 	exec_path = (vir_bytes) job_m_in.VFS_PM_PATH;
680 	exec_path_len = (size_t) job_m_in.VFS_PM_PATH_LEN;
681 	stack_frame = (vir_bytes) job_m_in.VFS_PM_FRAME;
682 	stack_frame_len = (size_t) job_m_in.VFS_PM_FRAME_LEN;
683 	ps_str = (vir_bytes) job_m_in.VFS_PM_PS_STR;
684 
685 	assert(proc_e == fp->fp_endpoint);
686 
687 	r = pm_exec(exec_path, exec_path_len, stack_frame, stack_frame_len,
688 		&pc, &newsp, &ps_str);
689 
690 	/* Reply status to PM */
691 	m_out.m_type = VFS_PM_EXEC_REPLY;
692 	m_out.VFS_PM_ENDPT = proc_e;
693 	m_out.VFS_PM_PC = (void *) pc;
694 	m_out.VFS_PM_STATUS = r;
695 	m_out.VFS_PM_NEWSP = (void *) newsp;
696 	m_out.VFS_PM_NEWPS_STR = ps_str;
697 
698 	break;
699 
700   case VFS_PM_EXIT:
701 	proc_e = job_m_in.VFS_PM_ENDPT;
702 
703 	assert(proc_e == fp->fp_endpoint);
704 
705 	pm_exit();
706 
707 	/* Reply dummy status to PM for synchronization */
708 	m_out.m_type = VFS_PM_EXIT_REPLY;
709 	m_out.VFS_PM_ENDPT = proc_e;
710 
711 	break;
712 
713   case VFS_PM_DUMPCORE:
714 	proc_e = job_m_in.VFS_PM_ENDPT;
715 	term_signal = job_m_in.VFS_PM_TERM_SIG;
716 	core_path = (vir_bytes) job_m_in.VFS_PM_PATH;
717 
718 	/* A zero signal used to indicate that a coredump should be generated
719 	 * without terminating the target process, but this was broken in so
720 	 * many ways that we no longer support this. Userland should implement
721 	 * this functionality itself, for example through ptrace(2).
722 	 */
723 	if (term_signal == 0)
724 		panic("no termination signal given for coredump!");
725 
726 	assert(proc_e == fp->fp_endpoint);
727 
728 	r = pm_dumpcore(term_signal, core_path);
729 
730 	/* Reply status to PM */
731 	m_out.m_type = VFS_PM_CORE_REPLY;
732 	m_out.VFS_PM_ENDPT = proc_e;
733 	m_out.VFS_PM_STATUS = r;
734 
735 	break;
736 
737   case VFS_PM_UNPAUSE:
738 	proc_e = job_m_in.VFS_PM_ENDPT;
739 
740 	assert(proc_e == fp->fp_endpoint);
741 
742 	unpause();
743 
744 	m_out.m_type = VFS_PM_UNPAUSE_REPLY;
745 	m_out.VFS_PM_ENDPT = proc_e;
746 
747 	break;
748 
749   default:
750 	panic("Unhandled postponed PM call %d", job_m_in.m_type);
751   }
752 
753   r = ipc_send(PM_PROC_NR, &m_out);
754   if (r != OK)
755 	panic("service_pm_postponed: ipc_send failed: %d", r);
756 }
757 
758 /*===========================================================================*
759  *				service_pm				     *
760  *===========================================================================*/
761 static void service_pm(void)
762 {
763 /* Process a request from PM. This function is called from the main thread, and
764  * may therefore not block. Any requests that may require blocking the calling
765  * thread must be executed in a separate thread. Aside from VFS_PM_REBOOT, all
766  * requests from PM involve another, target process: for example, PM tells VFS
767  * that a process is performing a setuid() call. For some requests however,
768  * that other process may not be idle, and in that case VFS must serialize the
769  * PM request handling with any operation is it handling for that target
770  * process. As it happens, the requests that may require blocking are also the
771  * ones where the target process may not be idle. For both these reasons, such
772  * requests are run in worker threads associated to the target process.
773  */
774   struct fproc *rfp;
775   int r, slot;
776   message m_out;
777 
778   memset(&m_out, 0, sizeof(m_out));
779 
780   switch (call_nr) {
781   case VFS_PM_SETUID:
782 	{
783 		endpoint_t proc_e;
784 		uid_t euid, ruid;
785 
786 		proc_e = m_in.VFS_PM_ENDPT;
787 		euid = m_in.VFS_PM_EID;
788 		ruid = m_in.VFS_PM_RID;
789 
790 		pm_setuid(proc_e, euid, ruid);
791 
792 		m_out.m_type = VFS_PM_SETUID_REPLY;
793 		m_out.VFS_PM_ENDPT = proc_e;
794 	}
795 	break;
796 
797   case VFS_PM_SETGID:
798 	{
799 		endpoint_t proc_e;
800 		gid_t egid, rgid;
801 
802 		proc_e = m_in.VFS_PM_ENDPT;
803 		egid = m_in.VFS_PM_EID;
804 		rgid = m_in.VFS_PM_RID;
805 
806 		pm_setgid(proc_e, egid, rgid);
807 
808 		m_out.m_type = VFS_PM_SETGID_REPLY;
809 		m_out.VFS_PM_ENDPT = proc_e;
810 	}
811 	break;
812 
813   case VFS_PM_SETSID:
814 	{
815 		endpoint_t proc_e;
816 
817 		proc_e = m_in.VFS_PM_ENDPT;
818 		pm_setsid(proc_e);
819 
820 		m_out.m_type = VFS_PM_SETSID_REPLY;
821 		m_out.VFS_PM_ENDPT = proc_e;
822 	}
823 	break;
824 
825   case VFS_PM_EXEC:
826   case VFS_PM_EXIT:
827   case VFS_PM_DUMPCORE:
828   case VFS_PM_UNPAUSE:
829 	{
830 		endpoint_t proc_e = m_in.VFS_PM_ENDPT;
831 
832 		if(isokendpt(proc_e, &slot) != OK) {
833 			printf("VFS: proc ep %d not ok\n", proc_e);
834 			return;
835 		}
836 
837 		rfp = &fproc[slot];
838 
839 		/* PM requests on behalf of a proc are handled after the
840 		 * system call that might be in progress for that proc has
841 		 * finished. If the proc is not busy, we start a new thread.
842 		 */
843 		worker_start(rfp, NULL, &m_in, FALSE /*use_spare*/);
844 
845 		return;
846 	}
847   case VFS_PM_FORK:
848   case VFS_PM_SRV_FORK:
849 	{
850 		endpoint_t pproc_e, proc_e;
851 		pid_t child_pid;
852 		uid_t reuid;
853 		gid_t regid;
854 
855 		pproc_e = m_in.VFS_PM_PENDPT;
856 		proc_e = m_in.VFS_PM_ENDPT;
857 		child_pid = m_in.VFS_PM_CPID;
858 		reuid = m_in.VFS_PM_REUID;
859 		regid = m_in.VFS_PM_REGID;
860 
861 		pm_fork(pproc_e, proc_e, child_pid);
862 		m_out.m_type = VFS_PM_FORK_REPLY;
863 
864 		if (call_nr == VFS_PM_SRV_FORK) {
865 			m_out.m_type = VFS_PM_SRV_FORK_REPLY;
866 			pm_setuid(proc_e, reuid, reuid);
867 			pm_setgid(proc_e, regid, regid);
868 		}
869 
870 		m_out.VFS_PM_ENDPT = proc_e;
871 	}
872 	break;
873   case VFS_PM_SETGROUPS:
874 	{
875 		endpoint_t proc_e;
876 		int group_no;
877 		gid_t *group_addr;
878 
879 		proc_e = m_in.VFS_PM_ENDPT;
880 		group_no = m_in.VFS_PM_GROUP_NO;
881 		group_addr = (gid_t *) m_in.VFS_PM_GROUP_ADDR;
882 
883 		pm_setgroups(proc_e, group_no, group_addr);
884 
885 		m_out.m_type = VFS_PM_SETGROUPS_REPLY;
886 		m_out.VFS_PM_ENDPT = proc_e;
887 	}
888 	break;
889 
890   case VFS_PM_REBOOT:
891 	/* Reboot requests are not considered postponed PM work and are instead
892 	 * handled from a separate worker thread that is associated with PM's
893 	 * process. PM makes no regular VFS calls, and thus, from VFS's
894 	 * perspective, PM is always idle. Therefore, we can safely do this.
895 	 * We do assume that PM sends us only one VFS_PM_REBOOT message at
896 	 * once, or ever for that matter. :)
897 	 */
898 	worker_start(fproc_addr(PM_PROC_NR), pm_reboot, &m_in,
899 		FALSE /*use_spare*/);
900 
901 	return;
902 
903     default:
904 	printf("VFS: don't know how to handle PM request %d\n", call_nr);
905 
906 	return;
907   }
908 
909   r = ipc_send(PM_PROC_NR, &m_out);
910   if (r != OK)
911 	panic("service_pm: ipc_send failed: %d", r);
912 }
913 
914 
915 /*===========================================================================*
916  *				unblock					     *
917  *===========================================================================*/
918 static int
919 unblock(struct fproc *rfp)
920 {
921 /* Unblock a process that was previously blocked on a pipe or a lock.  This is
922  * done by reconstructing the original request and continuing/repeating it.
923  * This function returns TRUE when it has restored a request for execution, and
924  * FALSE if the caller should continue looking for work to do.
925  */
926   int blocked_on;
927 
928   blocked_on = rfp->fp_blocked_on;
929 
930   /* Reconstruct the original request from the saved data. */
931   memset(&m_in, 0, sizeof(m_in));
932   m_in.m_source = rfp->fp_endpoint;
933   switch (blocked_on) {
934   case FP_BLOCKED_ON_PIPE:
935 	assert(rfp->fp_pipe.callnr == VFS_READ ||
936 	    rfp->fp_pipe.callnr == VFS_WRITE);
937 	m_in.m_type = rfp->fp_pipe.callnr;
938 	m_in.m_lc_vfs_readwrite.fd = rfp->fp_pipe.fd;
939 	m_in.m_lc_vfs_readwrite.buf = rfp->fp_pipe.buf;
940 	m_in.m_lc_vfs_readwrite.len = rfp->fp_pipe.nbytes;
941 	m_in.m_lc_vfs_readwrite.cum_io = rfp->fp_pipe.cum_io;
942 	break;
943   case FP_BLOCKED_ON_FLOCK:
944 	assert(rfp->fp_flock.cmd == F_SETLKW);
945 	m_in.m_type = VFS_FCNTL;
946 	m_in.m_lc_vfs_fcntl.fd = rfp->fp_flock.fd;
947 	m_in.m_lc_vfs_fcntl.cmd = rfp->fp_flock.cmd;
948 	m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_flock.arg;
949 	break;
950   default:
951 	panic("unblocking call blocked on %d ??", blocked_on);
952   }
953 
954   rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
955   rfp->fp_flags &= ~FP_REVIVED;
956   reviving--;
957   assert(reviving >= 0);
958 
959   /* Pending pipe reads/writes cannot be repeated as is, and thus require a
960    * special resumption procedure.
961    */
962   if (blocked_on == FP_BLOCKED_ON_PIPE) {
963 	worker_start(rfp, do_pending_pipe, &m_in, FALSE /*use_spare*/);
964 	return(FALSE);	/* Retrieve more work */
965   }
966 
967   /* A lock request. Repeat the original request as though it just came in. */
968   fp = rfp;
969   return(TRUE);	/* We've unblocked a process */
970 }
971