xref: /minix3/minix/kernel/system.c (revision ebfedea0ce5bbe81e252ddf32d732e40fb633fae)
1 /* This task handles the interface between the kernel and user-level servers.
2  * System services can be accessed by doing a system call. System calls are
3  * transformed into request messages, which are handled by this task. By
4  * convention, a sys_call() is transformed in a SYS_CALL request message that
5  * is handled in a function named do_call().
6  *
7  * A private call vector is used to map all system calls to the functions that
8  * handle them. The actual handler functions are contained in separate files
9  * to keep this file clean. The call vector is used in the system task's main
10  * loop to handle all incoming requests.
11  *
12  * In addition to the main sys_task() entry point, which starts the main loop,
13  * there are several other minor entry points:
14  *   get_priv:		assign privilege structure to user or system process
15  *   set_sendto_bit:	allow a process to send messages to a new target
16  *   unset_sendto_bit:	disallow a process from sending messages to a target
17  *   fill_sendto_mask:	fill the target mask of a given process
18  *   send_sig:		send a signal directly to a system process
19  *   cause_sig:		take action to cause a signal to occur via a signal mgr
20  *   sig_delay_done:	tell PM that a process is not sending
21  *   send_diag_sig:	send a diagnostics signal to interested processes
22  *   get_randomness:	accumulate randomness in a buffer
23  *   clear_endpoint:	remove a process' ability to send and receive messages
24  *   sched_proc:	schedule a process
25  *
26  * Changes:
27 *    Nov 22, 2009   get_priv supports static priv ids (Cristiano Giuffrida)
28  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
29  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder)
30  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
31  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
32  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
33  */
34 
35 #include "kernel/kernel.h"
36 #include "kernel/system.h"
37 #include "kernel/vm.h"
38 #include "kernel/clock.h"
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <assert.h>
42 #include <signal.h>
43 #include <unistd.h>
44 #include <minix/endpoint.h>
45 #include <minix/safecopies.h>
46 
47 /* Declaration of the call vector that defines the mapping of system calls
48  * to handler functions. The vector is initialized in sys_init() with map(),
49  * which makes sure the system call numbers are ok. No space is allocated,
50  * because the dummy is declared extern. If an illegal call is given, the
51  * array size will be negative and this won't compile.
52  */
53 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
54 
55 #define map(call_nr, handler) 					\
56     {	int call_index = call_nr-KERNEL_CALL; 				\
57     	assert(call_index >= 0 && call_index < NR_SYS_CALLS);			\
58     call_vec[call_index] = (handler)  ; }
59 
60 static void kernel_call_finish(struct proc * caller, message *msg, int result)
61 {
62   if(result == VMSUSPEND) {
63 	  /* Special case: message has to be saved for handling
64 	   * until VM tells us it's allowed. VM has been notified
65 	   * and we must wait for its reply to restart the call.
66 	   */
67 	  assert(RTS_ISSET(caller, RTS_VMREQUEST));
68 	  assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
69 	  caller->p_vmrequest.saved.reqmsg = *msg;
70 	  caller->p_misc_flags |= MF_KCALL_RESUME;
71   } else {
72 	  /*
73 	   * call is finished, we could have been suspended because of VM,
74 	   * remove the request message
75 	   */
76 	  caller->p_vmrequest.saved.reqmsg.m_source = NONE;
77 	  if (result != EDONTREPLY) {
78 		  /* copy the result as a message to the original user buffer */
79 		  msg->m_source = SYSTEM;
80 		  msg->m_type = result;		/* report status of call */
81 #if DEBUG_IPC_HOOK
82 	hook_ipc_msgkresult(msg, caller);
83 #endif
84 		  if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
85 			  printf("WARNING wrong user pointer 0x%08x from "
86 					  "process %s / %d\n",
87 					  caller->p_delivermsg_vir,
88 					  caller->p_name,
89 					  caller->p_endpoint);
90 			  cause_sig(proc_nr(caller), SIGSEGV);
91 		  }
92 	  }
93   }
94 }
95 
96 static int kernel_call_dispatch(struct proc * caller, message *msg)
97 {
98   int result = OK;
99   int call_nr;
100 
101 #if DEBUG_IPC_HOOK
102 	hook_ipc_msgkcall(msg, caller);
103 #endif
104   call_nr = msg->m_type - KERNEL_CALL;
105 
106   /* See if the caller made a valid request and try to handle it. */
107   if (call_nr < 0 || call_nr >= NR_SYS_CALLS) {	/* check call number */
108 	  printf("SYSTEM: illegal request %d from %d.\n",
109 			  call_nr,msg->m_source);
110 	  result = EBADREQUEST;			/* illegal message type */
111   }
112   else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
113 	  printf("SYSTEM: denied request %d from %d.\n",
114 			  call_nr,msg->m_source);
115 	  result = ECALLDENIED;			/* illegal message type */
116   } else {
117 	  /* handle the system call */
118 	  if (call_vec[call_nr])
119 		  result = (*call_vec[call_nr])(caller, msg);
120 	  else {
121 		  printf("Unused kernel call %d from %d\n",
122 				  call_nr, caller->p_endpoint);
123 		  result = EBADREQUEST;
124 	  }
125   }
126 
127   return result;
128 }
129 
130 /*===========================================================================*
131  *				kernel_call				     *
132  *===========================================================================*/
133 /*
134  * this function checks the basic syscall parameters and if accepted it
135  * dispatches its handling to the right handler
136  */
137 void kernel_call(message *m_user, struct proc * caller)
138 {
139   int result = OK;
140   message msg;
141 
142   caller->p_delivermsg_vir = (vir_bytes) m_user;
143   /*
144    * the ldt and cr3 of the caller process is loaded because it just've trapped
145    * into the kernel or was already set in switch_to_user() before we resume
146    * execution of an interrupted kernel call
147    */
148   if (copy_msg_from_user(m_user, &msg) == 0) {
149 	  msg.m_source = caller->p_endpoint;
150 	  result = kernel_call_dispatch(caller, &msg);
151   }
152   else {
153 	  printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
154 			  m_user, caller->p_name, caller->p_endpoint);
155 	  cause_sig(proc_nr(caller), SIGSEGV);
156 	  return;
157   }
158 
159 
160   /* remember who invoked the kcall so we can bill it its time */
161   kbill_kcall = caller;
162 
163   kernel_call_finish(caller, &msg, result);
164 }
165 
166 /*===========================================================================*
167  *				initialize				     *
168  *===========================================================================*/
169 void system_init(void)
170 {
171   register struct priv *sp;
172   int i;
173 
174   /* Initialize IRQ handler hooks. Mark all hooks available. */
175   for (i=0; i<NR_IRQ_HOOKS; i++) {
176       irq_hooks[i].proc_nr_e = NONE;
177   }
178 
179   /* Initialize all alarm timers for all processes. */
180   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
181     tmr_inittimer(&(sp->s_alarm_timer));
182   }
183 
184   /* Initialize the call vector to a safe default handler. Some system calls
185    * may be disabled or nonexistant. Then explicitly map known calls to their
186    * handler functions. This is done with a macro that gives a compile error
187    * if an illegal call number is used. The ordering is not important here.
188    */
189   for (i=0; i<NR_SYS_CALLS; i++) {
190       call_vec[i] = NULL;
191   }
192 
193   /* Process management. */
194   map(SYS_FORK, do_fork); 		/* a process forked a new process */
195   map(SYS_EXEC, do_exec);		/* update process after execute */
196   map(SYS_CLEAR, do_clear);		/* clean up after process exit */
197   map(SYS_EXIT, do_exit);		/* a system process wants to exit */
198   map(SYS_PRIVCTL, do_privctl);		/* system privileges control */
199   map(SYS_TRACE, do_trace);		/* request a trace operation */
200   map(SYS_SETGRANT, do_setgrant);	/* get/set own parameters */
201   map(SYS_RUNCTL, do_runctl);		/* set/clear stop flag of a process */
202   map(SYS_UPDATE, do_update);		/* update a process into another */
203   map(SYS_STATECTL, do_statectl);	/* let a process control its state */
204 
205   /* Signal handling. */
206   map(SYS_KILL, do_kill); 		/* cause a process to be signaled */
207   map(SYS_GETKSIG, do_getksig);		/* signal manager checks for signals */
208   map(SYS_ENDKSIG, do_endksig);		/* signal manager finished signal */
209   map(SYS_SIGSEND, do_sigsend);		/* start POSIX-style signal */
210   map(SYS_SIGRETURN, do_sigreturn);	/* return from POSIX-style signal */
211 
212   /* Device I/O. */
213   map(SYS_IRQCTL, do_irqctl);  		/* interrupt control operations */
214 #if defined(__i386__)
215   map(SYS_DEVIO, do_devio);   		/* inb, inw, inl, outb, outw, outl */
216   map(SYS_VDEVIO, do_vdevio);  		/* vector with devio requests */
217 #endif
218 
219   /* Memory management. */
220   map(SYS_MEMSET, do_memset);		/* write char to memory area */
221   map(SYS_VMCTL, do_vmctl);		/* various VM process settings */
222 
223   /* Copying. */
224   map(SYS_UMAP, do_umap);		/* map virtual to physical address */
225   map(SYS_UMAP_REMOTE, do_umap_remote);	/* do_umap for non-caller process */
226   map(SYS_VUMAP, do_vumap);		/* vectored virtual to physical map */
227   map(SYS_VIRCOPY, do_vircopy); 	/* use pure virtual addressing */
228   map(SYS_PHYSCOPY, do_copy);	 	/* use physical addressing */
229   map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
230   map(SYS_SAFECOPYTO, do_safecopy_to);	/* copy with pre-granted permission */
231   map(SYS_VSAFECOPY, do_vsafecopy);	/* vectored safecopy */
232 
233   /* safe memset */
234   map(SYS_SAFEMEMSET, do_safememset);	/* safememset */
235 
236   /* Clock functionality. */
237   map(SYS_TIMES, do_times);		/* get uptime and process times */
238   map(SYS_SETALARM, do_setalarm);	/* schedule a synchronous alarm */
239   map(SYS_STIME, do_stime);		/* set the boottime */
240   map(SYS_SETTIME, do_settime);		/* set the system time (realtime) */
241   map(SYS_VTIMER, do_vtimer);		/* set or retrieve a virtual timer */
242 
243   /* System control. */
244   map(SYS_ABORT, do_abort);		/* abort MINIX */
245   map(SYS_GETINFO, do_getinfo); 	/* request system information */
246   map(SYS_DIAGCTL, do_diagctl);		/* diagnostics-related functionality */
247 
248   /* Profiling. */
249   map(SYS_SPROF, do_sprofile);         /* start/stop statistical profiling */
250 
251   /* arm-specific. */
252 #if defined(__arm__)
253   map(SYS_PADCONF, do_padconf);		/* configure pinmux */
254 #endif
255 
256   /* i386-specific. */
257 #if defined(__i386__)
258   map(SYS_READBIOS, do_readbios);	/* read from BIOS locations */
259   map(SYS_IOPENABLE, do_iopenable); 	/* Enable I/O */
260   map(SYS_SDEVIO, do_sdevio);		/* phys_insb, _insw, _outsb, _outsw */
261 #endif
262 
263   /* Machine state switching. */
264   map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
265   map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
266 
267   /* Scheduling */
268   map(SYS_SCHEDULE, do_schedule);	/* reschedule a process */
269   map(SYS_SCHEDCTL, do_schedctl);	/* change process scheduler */
270 
271 }
272 /*===========================================================================*
273  *				get_priv				     *
274  *===========================================================================*/
275 int get_priv(rc, priv_id)
276 register struct proc *rc;		/* new (child) process pointer */
277 int priv_id;				/* privilege id */
278 {
279 /* Allocate a new privilege structure for a system process. Privilege ids
280  * can be assigned either statically or dynamically.
281  */
282   register struct priv *sp;                 /* privilege structure */
283 
284   if(priv_id == NULL_PRIV_ID) {             /* allocate slot dynamically */
285       for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
286           if (sp->s_proc_nr == NONE) break;
287       if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
288   }
289   else {                                    /* allocate slot from id */
290       if(!is_static_priv_id(priv_id)) {
291           return EINVAL;                    /* invalid static priv id */
292       }
293       if(priv[priv_id].s_proc_nr != NONE) {
294           return EBUSY;                     /* slot already in use */
295       }
296       sp = &priv[priv_id];
297   }
298   rc->p_priv = sp;			    /* assign new slot */
299   rc->p_priv->s_proc_nr = proc_nr(rc);	    /* set association */
300 
301   return(OK);
302 }
303 
304 /*===========================================================================*
305  *				set_sendto_bit				     *
306  *===========================================================================*/
307 void set_sendto_bit(const struct proc *rp, int id)
308 {
309 /* Allow a process to send messages to the process(es) associated with the
310  * system privilege structure with the given ID.
311  */
312 
313   /* Disallow the process from sending to a process privilege structure with no
314    * associated process, and disallow the process from sending to itself.
315    */
316   if (id_to_nr(id) == NONE || priv_id(rp) == id) {
317 	unset_sys_bit(priv(rp)->s_ipc_to, id);
318 	return;
319   }
320 
321   set_sys_bit(priv(rp)->s_ipc_to, id);
322 
323   /* The process that this process can now send to, must be able to reply (or
324    * vice versa). Therefore, its send mask should be updated as well. Ignore
325    * receivers that don't support traps other than RECEIVE, they can't reply
326    * or send messages anyway.
327    */
328   if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
329       set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
330 }
331 
332 /*===========================================================================*
333  *				unset_sendto_bit			     *
334  *===========================================================================*/
335 void unset_sendto_bit(const struct proc *rp, int id)
336 {
337 /* Prevent a process from sending to another process. Retain the send mask
338  * symmetry by also unsetting the bit for the other direction.
339  */
340 
341   unset_sys_bit(priv(rp)->s_ipc_to, id);
342 
343   unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
344 }
345 
346 /*===========================================================================*
347  *			      fill_sendto_mask				     *
348  *===========================================================================*/
349 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
350 {
351   int i;
352 
353   for (i=0; i < NR_SYS_PROCS; i++) {
354   	if (get_sys_bit(*map, i))
355   		set_sendto_bit(rp, i);
356   	else
357   		unset_sendto_bit(rp, i);
358   }
359 }
360 
361 /*===========================================================================*
362  *				send_sig				     *
363  *===========================================================================*/
364 int send_sig(endpoint_t ep, int sig_nr)
365 {
366 /* Notify a system process about a signal. This is straightforward. Simply
367  * set the signal that is to be delivered in the pending signals map and
368  * send a notification with source SYSTEM.
369  */
370   register struct proc *rp;
371   struct priv *priv;
372   int proc_nr;
373 
374   if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
375 	return EINVAL;
376 
377   rp = proc_addr(proc_nr);
378   priv = priv(rp);
379   if(!priv) return ENOENT;
380   sigaddset(&priv->s_sig_pending, sig_nr);
381   mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
382 
383   return OK;
384 }
385 
386 /*===========================================================================*
387  *				cause_sig				     *
388  *===========================================================================*/
389 void cause_sig(proc_nr, sig_nr)
390 proc_nr_t proc_nr;		/* process to be signalled */
391 int sig_nr;			/* signal to be sent */
392 {
393 /* A system process wants to send a signal to a process.  Examples are:
394  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
395  *  - TTY wanting to cause SIGINT upon getting a DEL
396  *  - FS wanting to cause SIGPIPE for a broken pipe
397  * Signals are handled by sending a message to the signal manager assigned to
398  * the process. This function handles the signals and makes sure the signal
399  * manager gets them by sending a notification. The process being signaled
400  * is blocked while the signal manager has not finished all signals for it.
401  * Race conditions between calls to this function and the system calls that
402  * process pending kernel signals cannot exist. Signal related functions are
403  * only called when a user process causes a CPU exception and from the kernel
404  * process level, which runs to completion.
405  */
406   register struct proc *rp, *sig_mgr_rp;
407   endpoint_t sig_mgr;
408   int sig_mgr_proc_nr;
409   int s;
410 
411   /* Lookup signal manager. */
412   rp = proc_addr(proc_nr);
413   sig_mgr = priv(rp)->s_sig_mgr;
414   if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
415 
416   /* If the target is the signal manager of itself, send the signal directly. */
417   if(rp->p_endpoint == sig_mgr) {
418        if(SIGS_IS_LETHAL(sig_nr)) {
419            /* If the signal is lethal, see if a backup signal manager exists. */
420            sig_mgr = priv(rp)->s_bak_sig_mgr;
421            if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
422                priv(rp)->s_sig_mgr = sig_mgr;
423                priv(rp)->s_bak_sig_mgr = NONE;
424                sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
425                RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
426                cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
427                return;
428            }
429            /* We are out of luck. Time to panic. */
430            proc_stacktrace(rp);
431            panic("cause_sig: sig manager %d gets lethal signal %d for itself",
432 	   	rp->p_endpoint, sig_nr);
433        }
434        sigaddset(&priv(rp)->s_sig_pending, sig_nr);
435        if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
436        	panic("send_sig failed");
437        return;
438   }
439 
440   if((s = sigismember(&rp->p_pending, sig_nr)) < 0)
441 	panic("sigismember failed");
442   /* Check if the signal is already pending. Process it otherwise. */
443   if (!s) {
444       sigaddset(&rp->p_pending, sig_nr);
445       if (! (RTS_ISSET(rp, RTS_SIGNALED))) {		/* other pending */
446 	  RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
447           if(OK != send_sig(sig_mgr, SIGKSIG))
448 	  	panic("send_sig failed");
449       }
450   }
451 }
452 
453 /*===========================================================================*
454  *				sig_delay_done				     *
455  *===========================================================================*/
456 void sig_delay_done(struct proc *rp)
457 {
458 /* A process is now known not to send any direct messages.
459  * Tell PM that the stop delay has ended, by sending a signal to the process.
460  * Used for actual signal delivery.
461  */
462 
463   rp->p_misc_flags &= ~MF_SIG_DELAY;
464 
465   cause_sig(proc_nr(rp), SIGSNDELAY);
466 }
467 
468 /*===========================================================================*
469  *				send_diag_sig				     *
470  *===========================================================================*/
471 void send_diag_sig(void)
472 {
473 /* Send a SIGKMESS signal to all processes in receiving updates about new
474  * diagnostics messages.
475  */
476   struct priv *privp;
477   endpoint_t ep;
478 
479   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
480 	if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
481 		ep = proc_addr(privp->s_proc_nr)->p_endpoint;
482 		send_sig(ep, SIGKMESS);
483 	}
484   }
485 }
486 
487 /*===========================================================================*
488  *			         clear_memreq				     *
489  *===========================================================================*/
490 static void clear_memreq(struct proc *rp)
491 {
492   struct proc **rpp;
493 
494   if (!RTS_ISSET(rp, RTS_VMREQUEST))
495 	return; /* nothing to do */
496 
497   for (rpp = &vmrequest; *rpp != NULL;
498      rpp = &(*rpp)->p_vmrequest.nextrequestor) {
499 	if (*rpp == rp) {
500 		*rpp = rp->p_vmrequest.nextrequestor;
501 		break;
502 	}
503   }
504 
505   RTS_UNSET(rp, RTS_VMREQUEST);
506 }
507 
508 /*===========================================================================*
509  *			         clear_ipc				     *
510  *===========================================================================*/
511 static void clear_ipc(
512   register struct proc *rc	/* slot of process to clean up */
513 )
514 {
515 /* Clear IPC data for a given process slot. */
516   struct proc **xpp;			/* iterate over caller queue */
517 
518   if (RTS_ISSET(rc, RTS_SENDING)) {
519       int target_proc;
520 
521       okendpt(rc->p_sendto_e, &target_proc);
522       xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
523       while (*xpp) {		/* check entire queue */
524           if (*xpp == rc) {			/* process is on the queue */
525               *xpp = (*xpp)->p_q_link;		/* replace by next process */
526 #if DEBUG_ENABLE_IPC_WARNINGS
527 	      printf("endpoint %d / %s removed from queue at %d\n",
528 	          rc->p_endpoint, rc->p_name, rc->p_sendto_e);
529 #endif
530               break;				/* can only be queued once */
531           }
532           xpp = &(*xpp)->p_q_link;		/* proceed to next queued */
533       }
534       RTS_UNSET(rc, RTS_SENDING);
535   }
536   RTS_UNSET(rc, RTS_RECEIVING);
537 }
538 
539 /*===========================================================================*
540  *			         clear_endpoint				     *
541  *===========================================================================*/
542 void clear_endpoint(rc)
543 register struct proc *rc;		/* slot of process to clean up */
544 {
545   if(isemptyp(rc)) panic("clear_proc: empty process: %d",  rc->p_endpoint);
546 
547 
548 #if DEBUG_IPC_HOOK
549   hook_ipc_clear(rc);
550 #endif
551 
552   /* Make sure that the exiting process is no longer scheduled. */
553   RTS_SET(rc, RTS_NO_ENDPOINT);
554   if (priv(rc)->s_flags & SYS_PROC)
555   {
556 	priv(rc)->s_asynsize= 0;
557   }
558 
559   /* If the process happens to be queued trying to send a
560    * message, then it must be removed from the message queues.
561    */
562   clear_ipc(rc);
563 
564   /* Likewise, if another process was sending or receive a message to or from
565    * the exiting process, it must be alerted that process no longer is alive.
566    * Check all processes.
567    */
568   clear_ipc_refs(rc, EDEADSRCDST);
569 
570   /* Finally, if the process was blocked on a VM request, remove it from the
571    * queue of processes waiting to be processed by VM.
572    */
573   clear_memreq(rc);
574 }
575 
576 /*===========================================================================*
577  *			       clear_ipc_refs				     *
578  *===========================================================================*/
579 void clear_ipc_refs(rc, caller_ret)
580 register struct proc *rc;		/* slot of process to clean up */
581 int caller_ret;				/* code to return on callers */
582 {
583 /* Clear IPC references for a given process slot. */
584   struct proc *rp;			/* iterate over process table */
585   int src_id;
586 
587   /* Tell processes that sent asynchronous messages to 'rc' they are not
588    * going to be delivered */
589   while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
590       cancel_async(proc_addr(id_to_nr(src_id)), rc);
591 
592   for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
593       if(isemptyp(rp))
594 	continue;
595 
596       /* Unset pending notification bits. */
597       unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
598 
599       /* Unset pending asynchronous messages */
600       unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
601 
602       /* Check if process depends on given process. */
603       if (P_BLOCKEDON(rp) == rc->p_endpoint) {
604           rp->p_reg.retreg = caller_ret;	/* return requested code */
605 	  clear_ipc(rp);
606       }
607   }
608 }
609 
610 /*===========================================================================*
611  *                              kernel_call_resume                           *
612  *===========================================================================*/
613 void kernel_call_resume(struct proc *caller)
614 {
615 	int result;
616 
617 	assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
618 	assert(!RTS_ISSET(caller, RTS_VMREQUEST));
619 
620 	assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
621 
622 	/*
623 	printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
624 			caller->p_name, caller->p_endpoint,
625 			caller->p_rts_flags, caller->p_misc_flags);
626 	 */
627 
628 	/* re-execute the kernel call, with MF_KCALL_RESUME still set so
629 	 * the call knows this is a retry.
630 	 */
631 	result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
632 	/*
633 	 * we are resuming the kernel call so we have to remove this flag so it
634 	 * can be set again
635 	 */
636 	caller->p_misc_flags &= ~MF_KCALL_RESUME;
637 	kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
638 }
639 
640 /*===========================================================================*
641  *                               sched_proc                                  *
642  *===========================================================================*/
643 int sched_proc(struct proc *p,
644 			int priority,
645 			int quantum,
646 			int cpu)
647 {
648 	/* Make sure the values given are within the allowed range.*/
649 	if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
650 		return(EINVAL);
651 
652 	if (quantum < 1 && quantum != -1)
653 		return(EINVAL);
654 
655 #ifdef CONFIG_SMP
656 	if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
657 		return(EINVAL);
658 	if (cpu != -1 && !(cpu_is_ready(cpu)))
659 		return EBADCPU;
660 #endif
661 
662 	/* In some cases, we might be rescheduling a runnable process. In such
663 	 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
664 	 * flag before the generic unset to dequeue/enqueue the process
665 	 */
666 
667 	/* FIXME this preempts the process, do we really want to do that ?*/
668 
669 	/* FIXME this is a problem for SMP if the processes currently runs on a
670 	 * different CPU */
671 	if (proc_is_runnable(p)) {
672 #ifdef CONFIG_SMP
673 		if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
674 			smp_schedule_migrate_proc(p, cpu);
675 		}
676 #endif
677 
678 		RTS_SET(p, RTS_NO_QUANTUM);
679 	}
680 
681 	if (proc_is_runnable(p))
682 		RTS_SET(p, RTS_NO_QUANTUM);
683 
684 	if (priority != -1)
685 		p->p_priority = priority;
686 	if (quantum != -1) {
687 		p->p_quantum_size_ms = quantum;
688 		p->p_cpu_time_left = ms_2_cpu_time(quantum);
689 	}
690 #ifdef CONFIG_SMP
691 	if (cpu != -1)
692 		p->p_cpu = cpu;
693 #endif
694 
695 	/* Clear the scheduling bit and enqueue the process */
696 	RTS_UNSET(p, RTS_NO_QUANTUM);
697 
698 	return OK;
699 }
700 
701 /*===========================================================================*
702  *				add_ipc_filter				     *
703  *===========================================================================*/
704 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
705 	size_t length)
706 {
707 	int num_elements, r;
708 	ipc_filter_t *ipcf, **ipcfp;
709 
710 	/* Validate arguments. */
711 	if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
712 		return EINVAL;
713 
714 	if (length % sizeof(ipc_filter_el_t) != 0)
715 		return EINVAL;
716 
717 	num_elements = length / sizeof(ipc_filter_el_t);
718 	if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
719 		return E2BIG;
720 
721 	/* Allocate a new IPC filter slot. */
722 	IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
723 	if (ipcf == NULL)
724 		return ENOMEM;
725 
726 	/* Fill details. */
727 	ipcf->num_elements = num_elements;
728 	ipcf->next = NULL;
729 	r = data_copy(rp->p_endpoint, address,
730 		KERNEL, (vir_bytes)ipcf->elements, length);
731 	if (r == OK)
732 		r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
733 	if (r != OK) {
734 		IPCF_POOL_FREE_SLOT(ipcf);
735 		return r;
736 	}
737 
738 	/* Add the new filter at the end of the IPC filter chain. */
739 	for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
740 	    ipcfp = &(*ipcfp)->next)
741 		;
742 	*ipcfp = ipcf;
743 
744 	return OK;
745 }
746 
747 /*===========================================================================*
748  *				clear_ipc_filters			     *
749  *===========================================================================*/
750 void clear_ipc_filters(struct proc *rp)
751 {
752 	ipc_filter_t *curr_ipcf, *ipcf;
753 
754 	ipcf = priv(rp)->s_ipcf;
755 	while (ipcf != NULL) {
756 		curr_ipcf = ipcf;
757 		ipcf = ipcf->next;
758 		IPCF_POOL_FREE_SLOT(curr_ipcf);
759 	}
760 
761 	priv(rp)->s_ipcf = NULL;
762 
763 	/* VM is a special case here: since the cleared IPC filter may have
764 	 * blocked memory handling requests, we may now have to tell VM that
765 	 * there are "new" requests pending.
766 	 */
767 	if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
768 		if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
769 			panic("send_sig failed");
770 }
771 
772 /*===========================================================================*
773  *				check_ipc_filter			     *
774  *===========================================================================*/
775 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
776 {
777 	ipc_filter_el_t *ipcf_el;
778 	int i, num_elements, flags;
779 
780 	if (ipcf == NULL)
781 		return OK;
782 
783 	num_elements = ipcf->num_elements;
784 	flags = 0;
785 	for (i = 0; i < num_elements; i++) {
786 		ipcf_el = &ipcf->elements[i];
787 		if (!IPCF_EL_CHECK(ipcf_el))
788 			return EINVAL;
789 		flags |= ipcf_el->flags;
790 	}
791 
792 	if (fill_flags)
793 		ipcf->flags = flags;
794 	else if (ipcf->flags != flags)
795 		return EINVAL;
796 	return OK;
797 }
798 
799 /*===========================================================================*
800  *				allow_ipc_filtered_msg			     *
801  *===========================================================================*/
802 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
803 	vir_bytes m_src_v, message *m_src_p)
804 {
805 	int i, r, num_elements, get_mtype, allow;
806 	ipc_filter_t *ipcf;
807 	ipc_filter_el_t *ipcf_el;
808 	message m_buff;
809 
810 	ipcf = priv(rp)->s_ipcf;
811 	if (ipcf == NULL)
812 		return TRUE; /* no IPC filters, always allow */
813 
814 	if (m_src_p == NULL) {
815 		assert(m_src_v != 0);
816 
817 		/* Should we copy in the message type? */
818 		get_mtype = FALSE;
819 		do {
820 #if DEBUG_DUMPIPCF
821 			if (TRUE) {
822 #else
823 			if (ipcf->flags & IPCF_MATCH_M_TYPE) {
824 #endif
825 				get_mtype = TRUE;
826 				break;
827 			}
828 			ipcf = ipcf->next;
829 		} while (ipcf);
830 		ipcf = priv(rp)->s_ipcf; /* reset to start */
831 
832 		/* If so, copy it in from the process. */
833 		if (get_mtype) {
834 			r = data_copy(src_e,
835 			    m_src_v + offsetof(message, m_type), KERNEL,
836 			    (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
837 			if (r != OK) {
838 				/* allow for now, this will fail later anyway */
839 #if DEBUG_DUMPIPCF
840 				printf("KERNEL: allow_ipc_filtered_msg: data "
841 				    "copy error %d, allowing message...\n", r);
842 #endif
843 				return TRUE;
844 			}
845 		}
846 		m_src_p = &m_buff;
847 	}
848 
849 	m_src_p->m_source = src_e;
850 
851 	/* See if the message is allowed. */
852 	allow = (ipcf->type == IPCF_BLACKLIST);
853 	do {
854 		if (allow != (ipcf->type == IPCF_WHITELIST)) {
855 			num_elements = ipcf->num_elements;
856 			for (i = 0; i < num_elements; i++) {
857 				ipcf_el = &ipcf->elements[i];
858 				if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
859 					allow = (ipcf->type == IPCF_WHITELIST);
860 					break;
861 				}
862 			}
863 		}
864 		ipcf = ipcf->next;
865 	} while (ipcf);
866 
867 #if DEBUG_DUMPIPCF
868 	printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
869 	    TRUE /*printparams*/);
870 #endif
871 
872 	return allow;
873 }
874 
875 /*===========================================================================*
876  *			  allow_ipc_filtered_memreq			     *
877  *===========================================================================*/
878 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
879 {
880 	/* Determine whether VM should receive a request to handle memory
881 	 * that is the result of process 'src_rp' trying to access currently
882 	 * unavailable memory in process 'dst_rp'. Return TRUE if VM should
883 	 * be given the request, FALSE otherwise.
884 	 */
885 
886 	struct proc *vmp;
887 	message m_buf;
888 	int allow_src, allow_dst;
889 
890 	vmp = proc_addr(VM_PROC_NR);
891 
892 	/* If VM has no filter in place, all requests should go through. */
893 	if (priv(vmp)->s_ipcf == NULL)
894 		return TRUE;
895 
896 	/* VM obtains memory requests in response to a SIGKMEM signal, which
897 	 * is a notification sent from SYSTEM. Thus, if VM blocks such
898 	 * notifications, it also should not get any memory requests. Of
899 	 * course, VM should not be asking for requests in that case either,
900 	 * but the extra check doesn't hurt.
901 	 */
902 	m_buf.m_type = NOTIFY_MESSAGE;
903 	if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
904 		return FALSE;
905 
906 	/* A more refined policy may be implemented here, for example to
907 	 * ensure that both the source and the destination (if different)
908 	 * are in the group of processes that VM wants to talk to. Since VM
909 	 * is basically not able to handle any memory requests during an
910 	 * update, we will not get here, and none of that is needed.
911 	 */
912 	return TRUE;
913 }
914 
915 /*===========================================================================*
916  *                             priv_add_irq                                  *
917  *===========================================================================*/
918 int priv_add_irq(struct proc *rp, int irq)
919 {
920         struct priv *priv = priv(rp);
921         int i;
922 
923 	priv->s_flags |= CHECK_IRQ;	/* Check IRQ */
924 
925 	/* When restarting a driver, check if it already has the permission */
926 	for (i = 0; i < priv->s_nr_irq; i++) {
927 		if (priv->s_irq_tab[i] == irq)
928 			return OK;
929 	}
930 
931 	i= priv->s_nr_irq;
932 	if (i >= NR_IRQ) {
933 		printf("do_privctl: %d already has %d irq's.\n",
934 			rp->p_endpoint, i);
935 		return ENOMEM;
936 	}
937 	priv->s_irq_tab[i]= irq;
938 	priv->s_nr_irq++;
939 	return OK;
940 }
941 
942 /*===========================================================================*
943  *                             priv_add_io                                   *
944  *===========================================================================*/
945 int priv_add_io(struct proc *rp, struct io_range *ior)
946 {
947         struct priv *priv = priv(rp);
948         int i;
949 
950 	priv->s_flags |= CHECK_IO_PORT;	/* Check I/O accesses */
951 
952 	for (i = 0; i < priv->s_nr_io_range; i++) {
953 		if (priv->s_io_tab[i].ior_base == ior->ior_base &&
954 			priv->s_io_tab[i].ior_limit == ior->ior_limit)
955 			return OK;
956 	}
957 
958 	i= priv->s_nr_io_range;
959 	if (i >= NR_IO_RANGE) {
960 		printf("do_privctl: %d already has %d i/o ranges.\n",
961 			rp->p_endpoint, i);
962 		return ENOMEM;
963 	}
964 
965 	priv->s_io_tab[i] = *ior;
966 	priv->s_nr_io_range++;
967 	return OK;
968 }
969 
970 /*===========================================================================*
971  *                             priv_add_mem                                  *
972  *===========================================================================*/
973 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
974 {
975         struct priv *priv = priv(rp);
976         int i;
977 
978 	priv->s_flags |= CHECK_MEM;	/* Check memory mappings */
979 
980 	/* When restarting a driver, check if it already has the permission */
981 	for (i = 0; i < priv->s_nr_mem_range; i++) {
982 		if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
983 			priv->s_mem_tab[i].mr_limit == memr->mr_limit)
984 			return OK;
985 	}
986 
987 	i= priv->s_nr_mem_range;
988 	if (i >= NR_MEM_RANGE) {
989 		printf("do_privctl: %d already has %d mem ranges.\n",
990 			rp->p_endpoint, i);
991 		return ENOMEM;
992 	}
993 	priv->s_mem_tab[i]= *memr;
994 	priv->s_nr_mem_range++;
995 	return OK;
996 }
997 
998